{ "00058354250940674c4117796a8f50b492d7879e": { "authors": [ { "ids": [ "40458202" ], "name": "Cristobal Camarero" }, { "ids": [ "10678545" ], "name": "Carmen Mart\u00ednez" }, { "ids": [ "1762103" ], "name": "Ram\u00f3n Beivide" } ], "doi": "10.1109/HPCA.2017.26", "doiUrl": "https://doi.org/10.1109/HPCA.2017.26", "entities": [ "Assistive technology", "Clos network", "Data center", "Deadlock", "Equal-cost multi-path routing", "Experiment", "Fat tree", "Fault tolerance", "Jellyfish.com", "Multipath routing", "Randomness", "Requirement", "Routing", "Synthetic data" ], "id": "00058354250940674c4117796a8f50b492d7879e", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "193-204", "journalVolume": "", "outCitations": [ "8eda1f2f57c8fe644975f68c9b7cdb591ecf40d5", "d801135f1085b2b76cc3bd0fb42af943236bbf06", "47512d99d1a2971f013561dcec1190afa05df703", "0dd57dbc7e47ed7e27affd8d289585005d4d62a5", "5b5cb4970038d25423e70b23baa85fc97686d35d", "5203210d18c94f01169bd50afcebf70cd3284898", "057a8310124ef6565fbd13ae1ec1412b96dedae8", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "552263c8b3e6b23c29a54820f2ebbf9c4ab80804", "943cf22e168a86fec0381ca380474c1da39e509c", "06d003643499015f1f1e15d30b24585d8cf82d45", "1ea6460b290976ec92bbf503c4568d76730a18c4", "17be90cbff777e45ec4732bed7250566861cf40c", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "c321e064fe8ece4d45ae5a76ca032417a98b347a", "663e064469ad91e6bda345d216504b4c868f537b", "42e5e97272ad8728749f861ed7a920707e698778", "4b9618b059e1383ec7ea011fc41f40f5c759bb89", "d3ffe3303a3735bb34b18cd2b8e771d6d8ee908d", "f57ac7f53438b2877022125bac957fda2bb2a97b", "053e39720366429b6a4b76270993b27e7fe2cece", "5b29cdd10c434fb02eef2fadb0a405c7d09c41f0", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "5f8991828def57d2f0cda942566afff56740d150", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "413e390f3e36f11cfc3de7e62b370cf530830857", "491eef9f7adada860abbd274e008e7acb964ef8b", "cf2dc0cee8e250956ab1c093612d486abb8795ac", "39f5f9b234659888eaec18d816499996a5d0ccce", "5885d3525c1789aaa3aacc1740a3a6b51376f1b8", "18c15c7c6ab7813cfd4f2b68ffe6ecfe86388d61", "13ab4cd5ec4710672ecf26ffa34a795d27cf0003", "14b82ab954a85cb8b336e86cf536c5701ca722e9" ], "paperAbstract": "In datacenter networks, big scale, high performance and fault-tolerance, low-cost, and graceful expandability are pursued features. Recently, random regular networks, as the Jellyfish, have been proposed for satisfying these stringent requirements. However, their completely unstructured design entails several drawbacks. As a related alternative, in this paper we propose Random Folded Clos (RFC) networks. They constitute a compromise between total randomness and maintaining some topological structure. As it will be shown, RFCs preserve important properties of Clos networks that provide a straightforward deadlock-free equal-cost multi-path routing and enough randomness to gracefully expanding. These networks are minutely compared, in topological and cost terms, against fat-trees, orthogonal fat-trees and random regular graphs. Also, experiments are carried out to simulate their performance under synthetic traffics that emulate common loads in datacenters. It is shown that RFCs constitute an interesting alternative to currently deployed networks since they appropriately balance all the important design requirements. Moreover, they do that at much lower cost than the fat-tree, their natural competitor. Being able up to connect the same number of compute nodes, saving up to 95% of the cost, and giving similar performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00058354250940674c4117796a8f50b492d7879e", "sources": [ "DBLP" ], "title": "Random Folded Clos Topologies for Datacenter Networks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "000f18a98b1a305d5ff07972b2a63849f9b26908": { "authors": [ { "ids": [ "2519125" ], "name": "Taewook Oh" }, { "ids": [ "1888818" ], "name": "Stephen R. Beard" }, { "ids": [ "2744545" ], "name": "Nick P. Johnson" }, { "ids": [ "1840375" ], "name": "Sergiy Popovych" }, { "ids": [ "1722513" ], "name": "David I. August" } ], "doi": "10.1109/PACT.2017.28", "doiUrl": "https://doi.org/10.1109/PACT.2017.28", "entities": [ "Automatic parallelization", "Computational science", "Just-in-time compilation", "Linear algebra", "Open-source software", "Parallel computing", "Partial evaluation", "Programmer", "Scripting language", "Speculative execution", "Speedup" ], "id": "000f18a98b1a305d5ff07972b2a63849f9b26908", "inCitations": [ "a075f3a381ee3856fda63b006e8f841d003fb354", "305826910778ccbb0c39d830e90913a1fcef6c57" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "356-369", "journalVolume": "", "outCitations": [ "22e775c629ad9d1e1cf6842eae60c5b9b96ce309", "6074c1108997e0c1f97dc3c199323a162ffe978d", "304fe263b042cedd3309422292e487f59cf39ffa", "a22080f1a7f54317b24d4176b8810446665dd8d1", "183298f38c2a16c4534db3de7c6e5c320b861b75", "da1df0946c1a0b2be7a265e9158c441c028715ad", "41d290943f39d23949f88a2ca52f1ebfc62a7090", "6a2814c9876ca97c99d9edcb204475dfd0bbc2bc", "9d9d7bc940e17a11bf4738deb3bdff6d5aef2ee2", "73538f17cbeb45fd0822cb971375ad03c515dbe7", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "20b2421005a95fe747a3f186ada61525a361bfaa", "a23e6569cafc467cde325c59f69f0d5f3838fce1", "0d4889a452a9948944f628c3b8147ab2cb9e70f9", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "08c10397976e566fb27d984e42a44a75c350d0da", "40cb40b7812e019c1051e3a457a8643400b81d51", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "808b90dadea6426924374633c8c49f78175f04a8", "a19a7c5e45125a570dcbac018184669b8cab2789", "09ed565e84057123c15ab12b885c235d1f241aed", "619fe8f80908a6001c96b606c39137d4ad48802c", "d1c2ecf0fea4c430633389553f40189d0a23b3a0", "11e947e6509ecd98c905b44747b3978ac06c2900", "09cc66777e889e7ba26c5e8265cf4f62e841fd9d", "131ca2d600bc720343c1a735729a9b4521aed2d2", "0d281938d3ff2377541704cab6ba1c4408420733", "4c41353ff2b2acc45a0fd3b35c841f442d11591e", "4c372083685ab8b8cfc6ff984310c6d078580897", "6662ad36d052739f19a40a230e9e8afc26d88bc2", "35c89b2ad35ff57c7006a65a84d05df1f00affbe", "0856f6f40b889dba559f19654834114e9f469760", "93bb36b2fa886a808919a93ad13092790a4cca8f", "d67f67e2a8d2caf5ff04f315c21611571f7779c6", "5231091fd9fe75115bedf967fa8ed95810ae6ae3", "2b5d290bc646fb86ed31340c32017f83ffcb5d33", "58b00f733f75f0dd4fa5236263b5e1a64c5161d7", "16a74ec035f5cb660e839abf1ac076bea6469989", "5c6c460e58b72651a60d880c42d7e14b5daf206c", "2e88cb3f95da0342be347ac3903d50a9c5c95cf3", "448f4144a1d818754d91d0821ece830501ae6f9f", "6a2edd2c10e6daaea7ffdc4b0a58f8ad9527ca49", "159437777bba0139a6b4d6bde460b9201d284500", "280f49d0bbcc23780d6452f0aae6851f61b012bf", "2804ca11158d8d5a85d6e4dfd7b226dc1f203403", "5d00483952d303b9cfd9b9acfc8fd1173d5058c6", "c85f605689fff6599d55f6f057264b2eb068ea5a", "2194c3460ab71f3826db00b045b2ae590c753319", "0560fc4924bbbe7e920122dc25c1ecfc3e59e374", "28461538e59946bdb9c629629f1afdbfd7afb5aa", "08a3b7e1aef12dbd2500e65946b98d57b400dbf0", "72682890677496da1a98f2d4ce9396ad13997e07" ], "paperAbstract": "Computational scientists are typically not expert programmers, and thus work in easy to use dynamic languages. However, they have very high performance requirements, due to their large datasets and experimental setups. Thus, the performance required for computational science must be extracted from dynamic languages in a manner that is transparent to the programmer. Current approaches to optimize and parallelize dynamic languages, such as just-in-time compilation and highly optimized interpreters, require a huge amount of implementation effort and are typically only effective for a single language. However, scientists in different fields use different languages, depending upon their needs.This paper presents techniques to enable automatic extraction of parallelism within scripts that are universally applicable across multiple different dynamic scripting languages. The key insight is that combining a script with its interpreter, through program specialization techniques, will embed any parallelism within the script into the combined program that can then be extracted via automatic parallelization techniques. Additionally, this paper presents several enhancements to existing speculative automatic parallelization techniques to handle the dependence patterns created by the specialization process. A prototype of the proposed technique, called Partial Evaluation with Parallelization (PEP), is evaluated against two open-source script interpreters with 6 input linear algebra kernel scripts each. The resulting geomean speedup of 5.10× on a 24-core machine shows the potential of the generalized approach in automatic extraction of parallelism in dynamic scripting languages.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.28", "http://liberty.princeton.edu/Publications/pact17_pep.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/000f18a98b1a305d5ff07972b2a63849f9b26908", "sources": [ "DBLP" ], "title": "A Generalized Framework for Automatic Scripting Language Parallelization", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "002fe9a7c5f0522279974faa4eeadc70838eb862": { "authors": [ { "ids": [ "40163311" ], "name": "Qi Zeng" }, { "ids": [ "1759383" ], "name": "Jih-Kwon Peir" } ], "doi": "10.1109/IPDPS.2017.103", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.103", "entities": [ "Byte", "Cache (computing)", "Hamming distance", "Magnetoresistive random-access memory", "Multi-core processor", "Non-volatile memory", "Overhead (computing)", "Performance Evaluation", "Pseudo-LRU", "Random-access memory", "Spectral leakage", "Volatility" ], "id": "002fe9a7c5f0522279974faa4eeadc70838eb862", "inCitations": [ "fbb6409694d23f2322738cc9247735c55959626a" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "92-101", "journalVolume": "", "outCitations": [ "bbd677f51628791eb44d64fb9744ea0e610c357b", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "69f7a1499a9b24caeaa586de5ff9737c04fd0b89", "17a0a008a276daee5ce7a38b60cac964dea57da9", "05015f9db9c040d76d026deb4dd2f82ce275cd91", "5f852bfcf28e6a84723567bd40f247c0a8e7638e", "a2f3bb40653499eeb33babacf69579b5ea9d20e1", "df1ed68dba0407cf2d93736af8cfd2dc5cf86918", "2960c89331eb7afa86584792e2e11dbf6a125820", "61d16d80cd5e7f79f25785a462ee752d24e3b414", "5a893d8cab79cf43a1d225f5beaae54cbae13235", "12bc20a1963859e9f76afb4b308b90ded1cff1fe", "71c2deb5c3b4b0fd1ed68bdda534ec7ea76e845b", "7e2a21fb9f63c91c2974ca3d6c74d8c1ee89c228", "a1e4f4ae16c5a18896fe1718acfe56a26aeca620", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "45f92febffdc46540a3cae433a7b4ef48c029a50", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "40b65be7d6e7cae7d530910220182df914103a04", "0b5e5a2516a49997feea686c434580d9058fd1aa", "3871446c86963903b087c1616bb1a0887a63f234", "dd4f901d0e692a4cc17741fc3479a661432b2824", "4654615ee9187ee1bb784feb6175a47a726d813d", "1600c3ed12301b06a1107a68c2de84fb3582a918", "7cd29ed1da71593bfb79b553ba6c5ee39ccf7a7b", "40eb2f5a97298da40838388700b097f82adff167", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "d1f4ff21631dc8ac85dd39516e22d5e187cd9d5e", "afd4a9332cb43854b513ebba6ff17a79c388824b" ], "paperAbstract": "Spin-Transfer Torque Magnetoresistive Random-Access Memory (STT-MRAM) is a promising memory technology, which has high density, fast read speed, low leakage power, and non-volatility, and is suitable for multi-core on-chip last-level caches. However, the high write energy and latency, as well as less-than-desirable write endurance of STT-MRAM remain challenges. This paper proposes a new encoded content-aware cache replacement policy to reduce the total switch bits for write, lower the write energy, and improve write endurance. Instead of replacing the LRU block under the conventional pseudo-LRU replacement policy, we select a replacement block near the LRU position, which has the most similar content to the missed block. The selected replacement block can reduce the switch bits without damaging the cache performance. To avoid fetching and comparing the entire block contents, we present a novel content encoding method to encode 64-byte block using just 8 bits, each bit represents 8-byte content. The encoded bit is determined by the presence of a dominant bit value in the 8 bytes. We measure the content similarity using the Hamming distance between the encoded bits of the missed block and the replaced block. Performance evaluation demonstrates that the proposed simple content encoding method is effective with an average of 20.5% reduction in total switch bits, which results in improvement on write endurance and less write energy consumption. These improvements are accomplished with low overhead and minimum impact on the cache performance.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.103" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/002fe9a7c5f0522279974faa4eeadc70838eb862", "sources": [ "DBLP" ], "title": "Content-Aware Non-Volatile Cache Replacement", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "003269cbab91ddf9f86833883fafd6ddaa61b038": { "authors": [ { "ids": [ "3451064" ], "name": "Saumay Dublish" }, { "ids": [ "2164782" ], "name": "Vijay Nagarajan" }, { "ids": [ "14984639" ], "name": "Nigel Topham" } ], "doi": "10.1109/ISPASS.2017.7975295", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975295", "entities": [ "Baseline (configuration management)", "CPU cache", "Computer memory", "Dynamic random-access memory", "Graphics processing unit", "High Bandwidth Memory", "Imperative programming", "Memory bandwidth", "Memory hierarchy", "Speedup", "Synergy" ], "id": "003269cbab91ddf9f86833883fafd6ddaa61b038", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "239-248", "journalVolume": "", "outCitations": [ "36e46139ac2d2f3242cfe49469ce09403b5df852", "35fc951ff2b2bb9784391f3352282980e4c8137e", "e0857c644b1059323d15ef9d45ffe86f4f3b6a09", "6c86a995c3454d888713e66948c0d09b1451f0c2", "1c919013b4b7270927e0a4e5213909bd05e89891", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "4377307d51b459b89e768dc17cd532983766ba9e", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "2d6f002477015469075954c6748a1a85af352c94", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "015298cd0df643ad7e3915e97ac14453b183d5df", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "d9b47764db442dc1bc1dad1570c85367002afe4a", "70c4ef7c1aad74d0fbe362ce4260e94f99fc4aee", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "3b2925fe06b3658950e14241e87b979c4d91a4ef" ], "paperAbstract": "GPUs are often limited by off-chip memory bandwidth. With the advent of general-purpose computing on GPUs, a cache hierarchy has been introduced to filter the bandwidth demand to the off-chip memory. However, the cache hierarchy presents its own bandwidth limitations in sustaining such high levels of memory traffic. In this paper, we characterize the bandwidth bottlenecks present across the memory hierarchy in GPUs for generalpurpose applications. We quantify the stalls throughout the memory hierarchy and identify the architectural parameters that play a pivotal role in leading to a congested memory system. We explore the architectural design space to mitigate the bandwidth bottlenecks and show that performance improvement achieved by mitigating the bandwidth bottleneck in the cache hierarchy can exceed the speedup obtained by a memory system with a baseline cache hierarchy and High Bandwidth Memory (HBM) DRAM. We also show that addressing the bandwidth bottleneck in isolation at specific levels can be sub-optimal and can even be counter-productive. Therefore, we show that it is imperative to resolve the bandwidth bottlenecks synergistically across different levels of the memory hierarchy. With the insights developed in this paper, we perform a cost-benefit analysis and identify costeffective configurations of the memory hierarchy that effectively mitigate the bandwidth bottlenecks. We show that our final configuration achieves a performance improvement of 29% on average with a minimal area overhead of 1.6%.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/33199116/PID4694747.pdf", "http://homepages.inf.ed.ac.uk/s1433370/papers/ispass2017/ispass2017-dublish-slides.pdf", "https://doi.org/10.1109/ISPASS.2017.7975295", "http://homepages.inf.ed.ac.uk/vnagaraj/papers/ispass17.pdf", "https://www.research.ed.ac.uk/portal/files/33199116/PID4694747.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/003269cbab91ddf9f86833883fafd6ddaa61b038", "sources": [ "DBLP" ], "title": "Evaluating and mitigating bandwidth bottlenecks across the memory hierarchy in GPUs", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "00388d4a976d40a182b2ad37e05f65b1def0afd1": { "authors": [ { "ids": [ "2651748" ], "name": "Gabriele Tolomei" }, { "ids": [ "1753531" ], "name": "Fabrizio Silvestri" }, { "ids": [ "33897206" ], "name": "Andrew Haines" }, { "ids": [ "1684032" ], "name": "Mounia Lalmas" } ], "doi": "10.1145/3097983.3098039", "doiUrl": "https://doi.org/10.1145/3097983.3098039", "entities": [ "Algorithm", "Black box", "Display resolution", "Ensemble learning", "Feature engineering", "Feature vector", "Online advertising", "Random forest", "Recommender system", "Tweaking" ], "id": "00388d4a976d40a182b2ad37e05f65b1def0afd1", "inCitations": [ "162dc19e3795062292cc78b2f226b2f4bc5245ec", "382f1ebe6009e580949d5513bc298cb253a1eeda" ], "journalName": "", "journalPages": "465-474", "journalVolume": "", "outCitations": [ "564985430ff2fbc3a9daa9c2af8997b7f5046da8", "47d5d824ea75254d5cd789141f702b1289d028de", "0f64ccff1c1c8ca3a2cd08245305f68a23249c2a", "6b17827b6e2563f10b53b8b37c95f5af5415c556", "48caac2f65bce47f6d27400ae4f60d8395cec2f3", "c4806efffa95a727006d2d6284240f2c181f75ab", "db5458476f3fd850d9b4c947e3aff04bf8ba3edc", "90e962c7980c790e5b3ba9d511e13f19b47b622f", "5636dca44384240ce9aff2b10b78458cd3c2f450", "8010d66631512f32df94ab7a34b98a53adab962d", "b093ef7c1bfe6e2beecc20523bb1c65e686b44ad", "83bfdd6a2b28106b9fb66e52832c45f08b828541", "33b3d9864a0f776afdc2d57453a4acc0a9f2519e", "318acfafaa66c5b1f1fe93caaa5c435fb637db9d", "75e0a740fb375524a9d0fc40a79f2c2442e9aaf1", "25045b29dd0cfa7abe493fdf1dcf0b488f014065", "2f991be8d35e4c1a45bfb0d646673b1ef5239a1f", "cfb2297032401b7cfb2cfed02ee8f957dce68506", "5f14436636346cfc6d7d4b8145af3d1920fd677e", "57243e5f22f8224817c4b89fbca1a7b86c4fa42e", "ae565efa4dc48b03edc2ddcbaeccf8a71267bf59", "e350acaf6673d240ad2772652f3328246e735342", "9f47b1ac404bcdd327bedaf34e8a72f127cf5e00" ], "paperAbstract": "Machine-learned models are often described as \"black boxes\". In many real-world applications however, models may have to sacrifice predictive power in favour of human-interpretability. When this is the case, feature engineering becomes a crucial task, which requires significant and time-consuming human effort. Whilst some features are inherently static, representing properties that cannot be influenced (e.g., the age of an individual), others capture characteristics that could be adjusted (e.g., the daily amount of carbohydrates taken). Nonetheless, once a model is learned from the data, each prediction it makes on new instances is irreversible - assuming every instance to be a static point located in the chosen feature space. There are many circumstances however where it is important to understand (i) why a model outputs a certain prediction on a given instance, (ii) which adjustable features of that instance should be modified, and finally (iii) how to alter such a prediction when the mutated instance is input back to the model.\n In this paper, we present a technique that exploits the internals of a tree-based ensemble classifier to offer recommendations for transforming true negative instances into positively predicted ones. We demonstrate the validity of our approach using an online advertising application. First, we design a Random Forest classifier that effectively separates between two types of ads: low (negative) and high (positive) quality ads (instances). Then, we introduce an algorithm that provides recommendations that aim to transform a low quality ad (negative instance) into a high quality one (positive instance). Finally, we evaluate our approach on a subset of the active inventory of a large ad network, Yahoo Gemini.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098039" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00388d4a976d40a182b2ad37e05f65b1def0afd1", "sources": [ "DBLP" ], "title": "Interpretable Predictions of Tree-based Ensembles via Actionable Feature Tweaking", "venue": "KDD", "year": 2017 }, "003ee7658359e89867ae41397ccd8490e86f7a9c": { "authors": [ { "ids": [ "2098053" ], "name": "Trinabh Gupta" }, { "ids": [ "3215063" ], "name": "Henrique Fingler" }, { "ids": [ "2445753" ], "name": "Lorenzo Alvisi" }, { "ids": [ "1756078" ], "name": "Michael Walfish" } ], "doi": "10.1145/3098822.3098835", "doiUrl": "https://doi.org/10.1145/3098822.3098835", "entities": [ "Anti-spam techniques", "Cryptographic protocol", "Email", "Email encryption", "Encryption", "End-to-end encryption", "Plaintext", "Privacy" ], "id": "003ee7658359e89867ae41397ccd8490e86f7a9c", "inCitations": [ "6793a8e3ce9f56f19a381b85af6e22b405b88be9" ], "journalName": "", "journalPages": "169-182", "journalVolume": "", "outCitations": [ "2e72e467bc4445b697cc825666a341a8ed83b3ab", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "04ce064505b1635583fa0d9cc07cac7e9ea993cc", "9e487eea9772ba8742c5deee3f6b214ebfb79811", "37ca60b5a286107985567d90e31b96bfa1251cbb", "3bec43ace9f6834464bfc1af8e67f9616f2dc757", "23ec68ed03b485b645478a3f6905615617d905a6", "1b96e0effa98ffe22a179bc40b54be49ca10593f", "028ca1ee0709304970cd5ac306ca700c2be1d925", "4a5cda63e76498f30a0fcb3fa59e9731c9902de9", "522a16a41c33f8cb0f4a8bf51c9f3cd13cd2f05e", "24e6cf0796237f21c780a3f0c996817f57b3a1bd", "4cee8edc1acbbdf5accff10bc1b13370f2c5745e", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "daf30b656afb032f5da2b9799b34f841dd2f443d", "ab4cce1ba0939f6944743e46c77f56e7d991ab9f", "127adf86474103b6f05afcc5bceda45bb5e34a8a", "10ab1b48b2a55ec9e2920a5397febd84906a7769", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "c8619ca909a9ad1cc6cd6ef6089614ca3897e22d", "9509f45ebc129bd68ea94d55d90fee410afb8143", "49e78901c274b04a320b2edf6f6a76bc5e8ae9f7", "3b03935dfc89c0cad63e05976c21fef6c9fb4190", "469243cc7d80ea0c34702a9652ebb2ab9dad3e7f", "39bd1f1f75ca061985833f7f1d339ace60047f45", "895410b9eaf6693217580c1f279ebee33c5d19a6", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "5e95a469a4bf7a079a517aae76d66b35fa6d84b6", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "6d7c4a3ced42bc72a0025d329b04a3cfa14e8f0d", "2905a5c4da8c9a0970f078a211742316ef0ab77d", "51c16613dc8673ff1f8137badfa39d9891ef6cd0", "685b3d2aacda1059fdeb6c7c7723f4b3dac94ce5", "354e28d805afb6891274783a2ea54025df12de0f", "15964bef0c5a10420ccf44f4e02f4905aa9d85d0", "2fb3c68ac20704fcda5b6ec91a3e166ec41f6c13", "db0f82a419f89cda64fcbec2c58137862cd04475", "21b61061ca2711e2dc66dd799ecd6b9d6dcced36", "2e918c9ceae2422090951000e40445f04dce818d", "789708cfa812dd79e5ee0b071979c49b367ab983", "3a15ad9855bdbaafb687f9b2d9f4b06a068ccabd", "b42acbbaf7068828fd8581b58f1df4632a192ee6", "6b9acc1374fe9e5e2e02cfba055bed357468a1b8", "6ae39779f1273170f8a990d9558d3a248d6907b5", "18ac49f65f7794f40eb855b7ba0b084a6a5ac156", "0b9286a010bee710e74362a35f96dd1c6fee0fdb", "19c3736da5116e0e80a64db35afe421663c4b4a8", "74b57a54fb755d13082c1598756db7cec9866d8b", "7844a1f0effa4e1cf75c8f90894437e9c6f2fe1f", "5a26d6ac6a8e97f0917da3f02c0245f0be6a47ae", "61a297247f899995789dc6e32bcf3972502374b8", "c66b6aa41812e00facea7b5de249b9670c602fd3", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "57f0a7ef8a11f191ff84e825f9153c254a29b427", "65a9d492c7ba31df76e86ca0c46d1c4500741385", "1efbc4c3c66f1dded4cfc626982d2e40a1c5b87b", "124005cd9102541f62c731ad9b2035ad35b244eb", "32bb922480593e1856d7223f5abecd0c15d69c6f", "24c9b0b05c5e957e255b854f947472f9181772a4", "e1ecc225690f79d1d51202d6772d3c2e0d0aea2a", "c1d36203276052765afbc8d9cd822ba5d0384627", "4beef78e9b21611a59237b63d512014e47f32d5e", "41bcaa2df214711891a78624e26286fda212a970", "00bbbf3af78f80651e9f955209ee72711fa5d412", "111df2d8bfae2a06d593317c6aa60f90f9a3e0a4", "2cf3fd84f30e5cae30dd46a3d7ecc0d63583b1a6", "1d279a4281d7f05ce8aecb083d5f4ea2e317c66b", "08026d939ac1f30951ff7f4f7c335bf3fef47be4", "2abe6b9ea1b13653b7384e9c8ef14b0d87e20cfc", "107444d65c858555bdb4a93eeeb7b3622a2af1c6", "f0569efef9069572a2958b59dbf43ba01fe2cfae", "45f6957cab31e802934cc761380c1a4a37c66208", "16dcd9dd1a947dc3fe4207d17f84e7e1da2cb236", "2d2581b990fd8b2df020cea5a6392b15f771bf0a", "4ea466a79c3fbdfce4d5916481a484aa3e22860b", "8a7374b98a9d94b8c01e996e72340f86a4327869", "b259bc404180d24e958859edeee1966a1a62f11a", "40d68c0011958b9a990c9df65414fcf4fd539c72", "4ce2e5da422caff5ac6d5164132618de8eead6a9", "1d75b7e7aac3e54984a8d8c70478482a3fd3067d", "47dd6b9d9cedbe2526ad22a01ca4fea1025e07d1", "a09dcece804c6cd11fd3f0025dda7d327121ae67", "595a00f0975b5d5c28d904ddba1ae5a493316573", "39a80339844a27e63a2b82ae4f8eb964da787172", "39a696a77f221ee139fdce8438a0b12224bf67f3", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "1d64f2ed0cec2950245154180dd106fb0c5669bc", "1fc0d824cd3ced24290806c62fae5aa13c961d79" ], "paperAbstract": "Emails today are often encrypted, but only between mail servers---the vast majority of emails are exposed in plaintext to the mail servers that handle them. While better than no encryption, this arrangement leaves open the possibility of attacks, privacy violations, and other disclosures. Publicly, email providers have stated that default end-to-end encryption would conflict with essential functions (spam filtering, etc.), because the latter requires analyzing email text. The goal of this paper is to demonstrate that there is no conflict. We do so by designing, implementing, and evaluating Pretzel. Starting from a cryptographic protocol that enables two parties to jointly perform a classification task without revealing their inputs to each other, Pretzel refines and adapts this protocol to the email context. Our experimental evaluation of a prototype demonstrates that email can be encrypted end-to-end and providers can compute over it, at tolerable cost: clients must devote some storage and processing, and provider overhead is roughly 5x versus the status quo.", "pdfUrls": [ "http://arxiv.org/abs/1612.04265", "http://www.cs.nyu.edu/~mwalfish/papers/pretzel-sigcomm17.pdf", "https://arxiv.org/pdf/1612.04265v3.pdf", "https://arxiv.org/pdf/1612.04265v2.pdf", "http://doi.acm.org/10.1145/3098822.3098835", "https://arxiv.org/pdf/1612.04265v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/003ee7658359e89867ae41397ccd8490e86f7a9c", "sources": [ "DBLP" ], "title": "Pretzel: Email encryption and provider-supplied functions are compatible", "venue": "SIGCOMM", "year": 2017 }, "004c2345477eda977f12b4485ac24a9e41557439": { "authors": [ { "ids": [ "2089711" ], "name": "Aasheesh Kolli" }, { "ids": [ "2077856" ], "name": "Vaibhav Gogte" }, { "ids": [ "2303964" ], "name": "Ali G. Saidi" }, { "ids": [ "2298231" ], "name": "Stephan Diestelhorst" }, { "ids": [ "37845066" ], "name": "Peter M. Chen" }, { "ids": [ "1678884" ], "name": "Satish Narayanasamy" }, { "ids": [ "3334450" ], "name": "Thomas F. Wenisch" } ], "doi": "10.1145/3079856.3080229", "doiUrl": "https://doi.org/10.1145/3079856.3080229", "entities": [ "3D XPoint", "Atomicity (database systems)", "Baseline (configuration management)", "Byte", "Byte addressing", "C++11", "Compiler", "Consistency model", "Data structure", "Industry Standard Architecture", "Persistent data structure", "Programmer", "Schedule (computer science)", "Serializability" ], "id": "004c2345477eda977f12b4485ac24a9e41557439", "inCitations": [ "41ea95cc4dca373bf324555b897760054ec4a76e", "3b6dddd6109b9ff9b339677931ca29d568efdbd9" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "481-493", "journalVolume": "", "outCitations": [ "129f11028220d87525b37b4605a2c04eb26f3e73", "24724ad8962a9e04eb496fddaefe9708f6960601", "42c70d64890726f60556caf3eec3f06e85642dd9", "19710fa0e64f36616e112c8a7b4e99ba4cb43c74", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "3af216f371069b57c0dca5448384d052fb490fb4", "512a8925693d5f4b8e4cfde32bcd3c846a14b71e", "1bb29cdeab20f4f5d739aacbb403e3751ca15f3b", "85398d5f19157c91bf00da3d36210e72d57887e4", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "94783d113951822195d4ba44599a8fcbdef9d4bf", "05a1357946de5eca42a477b7b268db4944219a2e", "33dcafd805a3b44fd64270028633032ff0bb6fac", "164a2d44033f7003565892a6f10ac86703d6ca7f", "209c2347a28bc0af9f8ace63ebbdf056729f41dc", "747ad718761b7d848a12e4f3a82aa0f46117a815", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "57c823b3b07b98233394bf15cfbbaed6a84809df", "157b439116e0dfb349f175d51c3793489355e08c", "05bd926844ffa89f668237a6836825c59d6377e9", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "d04957ae69caf43707b13fa833e50119724688f1", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "03b6a916498fa8591201a2de5f22344609b1e457", "2bf9bc1bb7386e8a1e0e172ff7b27a805584e3d1", "05c56f4abc527fbf384ad011dc9c0a613955641a", "16653666b0005f91060a3e402566659749b84313", "5bc06f8a33370f46f52f1d0282e5f91057a7192b", "47b851237f240831abee3971bca6bb8d2a121eb1", "823116269044ab4c713373c66c7da3fcb495b459", "425c117685a681c6c6de55e2928dc87066b53fbb", "0a6c15f75b0b52ea345caffabacd4c3f382b59a4", "33817456b5263fab036210ff1245dcc96f863101", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "67c64f4e676e1996cca7fd0ec50e453d6c698814", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0a92088c1cf7463ed5d347d2624976e0126ffced", "2cc69da629e857dbd7facbcf808a64b10e9db9a7", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "0ab0bc631b29f75118dbcb655df783e9a299d9f0", "7b2a7ed9ebd0a3c80d186959bced7cf46b02d6f6", "0645f0f88e9a3cd6e9b1d0c21bc24666a7377666", "3ede1909bf70d6e4bca46302f474083517b081a3", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "23773ffc679a8d9ebfd73810dec3e6fe6aa278ab", "0204f40221260d00c5ee63646560a40dcd7d97d1", "277862a906af8489a1d98add2f6516a0e5df1bb1", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8" ], "paperAbstract": "The commercial release of byte-addressable persistent memories, such as Intel/Micron 3D XPoint memory, is imminent. Ongoing research has sought mechanisms to allow programmers to implement recoverable data structures in these new main memories. Ensuring recoverability requires programmer control of the order of persistent stores; recent work proposes persistency models as an extension to memory consistency to specify such ordering. Prior work has considered persistency models at the abstraction of the instruction set architecture. Instead, we argue for extending the language-level memory model to provide guarantees on the order of persistent writes.\n We explore a taxonomy of guarantees a language-level persistency model might provide, considering both atomicity and ordering constraints on groups of persistent stores. Then, we propose and evaluate Acquire-Release Persistency (ARP), a language-level persistency model for C++11. We describe how to compile code written for ARP to a state-of-the-art ISA-level persistency model. We then consider enhancements to the ISA-level persistency model that can distinguish memory consistency constraints required for proper synchronization but unnecessary for correct recovery. With these optimizations, we show that ARP increases performance by up to 33.2% (19.8% avg.) over coding directly to the baseline ISA-level persistency model for a suite of persistent-write-intensive workloads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080229", "https://web.eecs.umich.edu/~pmchen/papers/kolli17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/004c2345477eda977f12b4485ac24a9e41557439", "sources": [ "DBLP" ], "title": "Language-level persistency", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "005b005eba413972ff1be3f102e229e730e051b0": { "authors": [ { "ids": [ "2290932" ], "name": "Fredrik Kjolstad" }, { "ids": [ "2853477" ], "name": "Shoaib Kamil" }, { "ids": [ "29920289" ], "name": "Stephen Chou" }, { "ids": [ "2636191" ], "name": "David Lugato" }, { "ids": [ "1709150" ], "name": "Saman P. Amarasinghe" } ], "doi": "10.1145/3133901", "doiUrl": "https://doi.org/10.1145/3133901", "entities": [ "C++", "Compiler", "Library (computing)", "Machine learning", "Sparse matrix" ], "id": "005b005eba413972ff1be3f102e229e730e051b0", "inCitations": [ "d4df38e1c0e4821388946f26a9ee51d3f2b82bed", "6360c75a753a0a29c4cd194f11b0f939b78e0f0a", "1731c32f9e644d2ecd4e08395351f01ad25ad579", "3c6cb1d654509bd43af5f70ebb9ebc06827bfa29", "904d4c36306db05e902cc2c0050bee9579a34f68", "db12b1acdf950527ee8eccbdaa99ee9dcf5c1274", "585cec9677e5cdb04e882cb47cc491c54ecbeb80", "2465db685bd5694dc00e8ea9d80612aa2ebf7708", "4cd112d3707eaefddd796204e9f1b64676682ea0", "0d461ea21db16fed87e53bbef72201708c5f6b7e", "05f057c34e0fcd05d0067ed504d55671bd9c967f", "135614db311313e4b12fc2cfec11c1231441f034" ], "journalName": "PACMPL", "journalPages": "77:1-77:29", "journalVolume": "1", "outCitations": [ "a3b049e2bd96d92bdef3f262b16bcb77140132f5", "00fc93840f3e3d5421c9b273f70a7410b8961c5a", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "2891da32932a45f8b14bb95f7e26b5ae9677f430", "27f3bb5ef854c0b0e559fb382114ba24891514b0", "a7f8ec0ecd2d5b07aee99c1707fdf1d7ff99ae12", "529e1deae0a67b0f8d92fbb256adddced491ea40", "12f1a2a510a4e86ecd75c8081a78620c71822f99", "3563be7789459d88bec67844f4dcdf22703eed7b", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "70da6a5e4b90253cafa917b9824efe034717f47b", "14fde290a2f08ff1fc7260f717eef16a20cdd994", "31af4b8793e93fd35e89569ccd663ae8777f0072", "065f10e40b17fce3a3735f50bd04e2a00fe6c583", "2ed09ce69ec5e46e55c44e894aed20022bc97772", "3ac79c19099bebc11dfef2dea017d1e1a159fe7c", "62b996c8b0845277f1b8a1459ecae454c054cd7c", "00f581aca4dd370615fa0ea99e730d6dd42fe347", "0b2522ed3cf20c6e9844dd0cb72481041006e97c", "9689ba2d4673a39cb9bdfb9802660d6acf427704", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "4902e83433eb32fa3b18eb820b76894ecbd71702", "3edef062698ab35fbe4cc5a5ffce633e09f8b6f2", "892d5068d8200b6d8d7654c1cbe01883cbcb8488", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "4fd74b807b47a5975e9b0ab354bfd780e0d921d2", "0072eb224991ada6fc8a4e2d3465e4a51c0b26bc", "5672ce28f2927b81b01303e4926643c55a4c8133", "196006af4361d7a94c55885a7370599aeff21119", "9bcb7ff601bb96f0de52e460007d1dfeaf0cb5c8", "f4dff66ba8f2338d118f379f2eff1410feb57ce6", "5f491a183c71b0322b16e4f5dc69538c50db79e0", "2489dfa220df07f9a94f4f4fa0f8ec1b2e695c61", "f17c253c37225094130aa58ab29c1493a59f9432", "99a1520bc334c111ff84619a1ac376f009d0d3bf", "430fa1802c54cae3bed1b978fe1c645c35087286" ], "paperAbstract": "Tensor algebra is a powerful tool with applications in machine learning, data analytics, engineering and the physical sciences. Tensors are often sparse and compound operations must frequently be computed in a single kernel for performance and to save memory. Programmers are left to write kernels for every operation of interest, with different mixes of dense and sparse tensors in different formats. The combinations are infinite, which makes it impossible to manually implement and optimize them all. This paper introduces the first compiler technique to automatically generate kernels for any compound tensor algebra operation on dense and sparse tensors. The technique is implemented in a C++ library called taco. Its performance is competitive with best-in-class hand-optimized kernels in popular libraries, while supporting far more tensor operations.", "pdfUrls": [ "http://people.csail.mit.edu/fred/tensor-compiler-preprint.pdf", "http://doi.acm.org/10.1145/3133901", "http://dspace.mit.edu/bitstream/handle/1721.1/107013/MIT-CSAIL-TR-2017-003.pdf?sequence=1", "http://groups.csail.mit.edu/commit/papers/2017/kjolstad-oopsla17-tensor-compiler.pdf", "http://people.csail.mit.edu/fred/tensor-compiler-techreport.pdf", "http://people.csail.mit.edu/fred/tensor-compiler.pdf", "http://groups.csail.mit.edu/commit/papers/2017/tensor-compiler-techreport.pdf", "http://dspace.mit.edu/bitstream/handle/1721.1/107013/tensor-compiler.pdf?sequence=5", "http://groups.csail.mit.edu/commit/papers/2017/kjolstad-oopsla17-taco.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/005b005eba413972ff1be3f102e229e730e051b0", "sources": [ "DBLP" ], "title": "The tensor algebra compiler", "venue": "PACMPL", "year": 2017 }, "00949bff493a83be184650c80e94a74b9e238b52": { "authors": [ { "ids": [ "1894033" ], "name": "Sizhuo Zhang" }, { "ids": [ "2287352" ], "name": "Muralidaran Vijayaraghavan" }, { "ids": [ "3285866" ], "name": "Arvind" } ], "doi": "10.1109/PACT.2017.29", "doiUrl": "https://doi.org/10.1109/PACT.2017.29", "entities": [ "Cache (computing)", "Cache coherence", "Central processing unit", "Circular definition", "Memory coherence", "Memory model (programming)", "Open-source software", "Operational definition", "Out-of-order execution", "RISC-V", "Time Sharing Option" ], "id": "00949bff493a83be184650c80e94a74b9e238b52", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "288-302", "journalVolume": "", "outCitations": [ "69e6fb41751ebf0a6b99522a2fabcd3879e8cf2b", "17e4e843676868b7dd5dcacea945141d7a8e17ee", "0ed62848d5c9e01f692c0c0b3851848ac7bb0764", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "33dcafd805a3b44fd64270028633032ff0bb6fac", "4bb640b092cbbf55ed4d1de8edb79ba8a79b0ebd", "3082d9ff0a7356b7414a5c6f0521e43dbcb9b2f8", "180189c3e8b0f783a8df6a1887a94a5e3f82148b", "9b117e3188bc7e7aba69d532165c0cceccc78f04", "a28f4c45ad72a50f56f7f9df13762c739230b646", "16dc592aa326ecd1f8d46ca7e3485a7311af3dba", "857726e6c21504e66569e3d61ed6b8710e44db4a", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "bbac864f6815762a57ad18bdc3e6c456b7140947", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "413d938109026fb513083a3b3f1c616da005639c", "9a95cb1f79a8078e47dfb17f695952a6bea92fb5", "044aa72dd3879d4164094c3c8d32e9a1ba2a4f2e", "10f1faeec4ee2158b8535b249a20de5419998153", "1ea33a0ba2ded13492a4afa6817f953eede0e037", "2cea911044b0b9dc2cee2e2b04915b9aab22f86f", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "49f0f6c03f6eec08fe4426706609413fa5fa6f17", "3eae0271717f6b4d65024abf04e5d98aef41d748", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "3972fee7dbef2c2c1fc695d175faf4a56dcb382b", "362e9b5afe5934a9d8046d758c17c5bada0652b3", "5eef609f21fc9327e551ab40425f7f1715c3e200", "3a850f54e6dea4728aaa6a71ba222b7d612cd2b1", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "ae8ee52b076263e1108ac35714bf15c6dd514f11", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "0f0046ae34181e08594ad9be7b5bfffdbaeda177", "51b172edac5e4f60321db6127bd04d9a8931ca1a", "34d2db88f259d69022e7492225301ffd6e0f55c0", "4d1e3d20531b7118c50b137715b69926d990d7c6", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "1476bc7362e02995a8869ed6d3703e740284f450", "bb6cedd67b26fce1f0d8eacb0357658c6831586d", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "3a66a682ee36cde0738824b152a51df2ccbb80fd", "520f2bb3565ab01a28c35f5c7e506bbbef71ed79", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "3e033205357becbb70e0b697134a5fe6fa17da43", "54fe429f0292ad691daaf923e6bf477788892b3b" ], "paperAbstract": "The memory model for RISC-V, a newly developed open source ISA, has not been finalized yet and thus, offers an opportunity to evaluate existing memory models. We believe RISC-V should not adopt the memory models of POWER or ARM, because their axiomatic and operational definitions are too complicated. We propose two new weak memory models: WMM and WMM-S, which balance definitional simplicity and implementation flexibility differently. Both allow all instruction reorderings except overtaking of loads by a store. We show that this restriction has little impact on performance and it considerably simplifies operational definitions. It also rules out the out-of-thin-air problem that plagues many definitions. WMM is simple (it is similar to the Alpha memory model), but it disallows behaviors arising due to shared store buffers and shared write-through caches (which are seen in POWER processors). WMM-S, on the other hand, is more complex and allows these behaviors. We give the operational definitions of both models using Instantaneous Instruction Execution (I2E), which has been used in the definitions of SC and TSO. We also show how both models can be implemented using conventional cache-coherent memory systems and out-of-order processors, and encompasses the behaviors of most known optimizations.", "pdfUrls": [ "https://arxiv.org/pdf/1707.05923v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.29", "http://arxiv.org/abs/1707.05923" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00949bff493a83be184650c80e94a74b9e238b52", "sources": [ "DBLP" ], "title": "Weak Memory Models: Balancing Definitional Simplicity and Implementation Flexibility", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "00be8c5ef2a75a205172d0f2bfb24caabecbdefd": { "authors": [ { "ids": [ "3153082" ], "name": "Aosen Wang" }, { "ids": [ "27247854" ], "name": "Lizhong Chen" }, { "ids": [ "2164973" ], "name": "Wenyao Xu" } ], "doi": "10.1145/3079856.3080219", "doiUrl": "https://doi.org/10.1145/3079856.3080219", "entities": [ "Analytical Engine", "Computer", "Data aggregation", "Sensor", "Sensor node", "Wearable computer", "Wearable technology" ], "id": "00be8c5ef2a75a205172d0f2bfb24caabecbdefd", "inCitations": [ "0691177be59cca292fb488970f3130ffdb22bd85", "3224f122d199bb24633ff37b398f273b0ff96289" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "69-80", "journalVolume": "", "outCitations": [ "11d0b69ea185c0c0918e02a78c06014b3b8a6162", "3b3f4b97ad8b0b07e5aee1edb27b52f1dc720d98", "1de9458e673c2be4483160f2ba2de478698cf059", "798548b6995f327440100e0d7382ff2652c17c6f", "24d9a4b498f67057b758f6501b5f7a792aafa4b2", "86551e015cbf7bc1721674844b7e3ff7cd7e1190", "61b1908e796cdc036a028c787cace553de45816f", "a7588e230f43a349c4d0df5ee45964701a213109", "58e4491dc48d46f4f47362686e09e6319c01edc0", "f48bf09555a616592a40b47d13b3956e21d620e8", "76329cc092b5fdc1b201594cab2788613947a852", "38febe6b89738ca05095f50f68709e985e14829a", "ed32d6368e1f46b5642dfc1dade1a3dcfd5ab990", "3be80a8c6d7fc0219ff0044a93401ca4f563f74e", "8f6c58c7d343ea2c07f85b5bc403eed6ab72dce2", "d74203416e555d79304f454a5fd4c99a8db95253", "7ce87672ebcd5476d43e705d9c84e4b97adb4b1d", "e88ede2e4eeb7c312722bfae9cbe72a866d56a6e", "352a8957005dc5519b15ed1870751ec494d66395", "34bb46fc412a206c7f08a59e9a4fa876955448ab", "4a310610600bf55085fe883b75a9b141ac4fdae1", "3c11b4e74086db34430d5381031319cae83ce17a", "5d5816bf02ea39ab02415e168d6145e5244dc889", "00f71e2ed7cabaca57c5cd86bae4e76350ed1f70", "b5b9b82be4154c7a3af17c7551284052903cb55f", "2d87e9ac2ead16d3a59f1df3ecf3a5d095ccf3f0", "059bdf296170b030fa9cb3c80efd202472b2f350", "d8d0d01453c67a52dff2afc7304f4658640a9f99", "e2682f2a2752cba7a05fd3db1cb43731c1afb002", "8c59a872972c6a71938f094ae0e27682e165dae4", "f09274db650ad5acddcde3912f61ee7f7cb82303", "13f6d32c34e97e746a470d71882a3cfd5d304c6d", "2eed7e2f120d0f9a2b778b97e684101745997f9a", "2a96a8edb4433861c619a2ce278a1c6f6465b453", "37f1c546fd9dc23f077c98997e5d7784e39b7183", "71278785aeaa9fb41ff0c08182721be2fd058ae1", "4cd0a064ca8ac519215c82b8d2a5d96e5b72bbcd", "af4eec9f6258940a90a86d3b450176f439249e17", "9d08d7ecef81303c0c45172ece892027063b5209", "3ecc44a209cf712043dc7733e438c9b77efbc2c5", "67f9d12c1c90dcc31a6c970dc8da3146acf520dd", "00af79ca91bf0a7e7202efcf610e05b63eee6c9b" ], "paperAbstract": "Wearable computing systems have spurred many opportunities to continuously monitor human bodies with sensors worn on or implanted in the body. These emerging platforms have started to revolutionize many fields, including healthcare and wellness applications, particularly when integrated with intelligent analytic capabilities. However, a significant challenge that computer architects are facing is how to embed sophisticated analytic capabilities in wearable computers in an energy-efficient way while not compromising system performance. In this paper, we present XPro, a novel cross-end analytic engine architecture for wearable computing systems. The proposed cross-end architecture is able to realize a generic classification design across wearable sensors and a data aggregator with high energy-efficiency. To facilitate the practical use of XPro, we also develop an Automatic XPro Generator that formally generates XPro instances according to specific design constraints. As a proof of concept, we study the design and implementation of XPro with six different health applications. Evaluation results show that, compared with state-of-the-art methods, XPro can increase the battery life of the sensor node by 1.6-2.4X while at the same time reducing system delay by 15.6-60.8% for wearable computing systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080219", "https://www.cse.buffalo.edu//~wenyaoxu/papers/conference/xu-isca2017.pdf", "http://web.engr.oregonstate.edu/~chenliz/publications/2017_ISCA_XProf.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00be8c5ef2a75a205172d0f2bfb24caabecbdefd", "sources": [ "DBLP" ], "title": "XPro: A cross-end processing architecture for data analytics in wearables", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "00c8e8242f03a1cdf1b9a71632f42d58cddf3814": { "authors": [ { "ids": [ "30307951" ], "name": "Crefeda Faviola Rodrigues" }, { "ids": [ "39609950" ], "name": "Graham D. Riley" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1109/IISWC.2017.8167764", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167764", "entities": [ "Adobe Streamline", "Artificial neural network", "Computer vision", "Convolutional neural network", "Deep learning", "Graphics processing unit", "Maxwell (microarchitecture)", "Mobile device", "Power semiconductor device", "Server (computing)", "Tegra" ], "id": "00c8e8242f03a1cdf1b9a71632f42d58cddf3814", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "114-115", "journalVolume": "", "outCitations": [ "a1543975098f8ec14f4402f761eefb473100beee", "3ac1df952ffb63abb4231a4410f6f8375ccdfe79", "305806d53240aa523168d5aa59d902fb0c9a1581", "52d2a6110e3bc2215d0347a04c421fb094044557", "c3bd0b86c74a4464173073b1f36fd12d2637c7a8", "3296a866a88f6be8f9354695cc7a098596f04253", "1c454ae4e1bbc600791f3a4796fdb6b1ee2ca016", "0b99d677883883584d9a328f6f2d54738363997a", "9f1f065bf08cd90431cc051267a708f56436cd82", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "adfaf01773c8af859faa5a9f40fb3aa9770a8aa7", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "5d6fca1c2dc1bb30b2bfcc131ec6e35a16374df8" ], "paperAbstract": "Energy-use is a key concern when migrating current deep learning applications onto low power heterogeneous devices such as a mobile device. This is because deep neural networks are typically designed and trained on high-end GPUs or servers and require additional processing steps to deploy them on low power devices. Such steps include the use of compression techniques to scale down the network size or the provision of efficient device-specific software implementations. Migration is further aggravated by the lack of tools and the inability to measure power and performance accurately and consistently across devices. We present a novel evaluation framework for measuring energy and performance for deep neural networks using ARMs Streamline Performance Analyser integrated with standard deep learning frameworks such as Caffe and CuDNNv5. We apply the framework to study the execution behaviour of SqueezeNet on the Maxwell GPU of the NVidia Jetson TX1, on an image classification task (also known as inference) and demonstrate the ability to measure energy of specific layers of the neural network.", "pdfUrls": [ "https://arxiv.org/pdf/1803.11151v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167764" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00c8e8242f03a1cdf1b9a71632f42d58cddf3814", "sources": [ "DBLP" ], "title": "Fine-grained energy profiling for deep convolutional neural networks on the Jetson TX1", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "00caa4dea9216bec01b465f8a69d0e1becc07b7a": { "authors": [ { "ids": [ "2598683" ], "name": "Thanumalayan Sankaranarayana Pillai" }, { "ids": [ "31817919" ], "name": "Ramnatthan Alagappan" }, { "ids": [ "2170646" ], "name": "Lanyue Lu" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" }, { "ids": [ "1743175" ], "name": "Andrea C. Arpaci-Dusseau" }, { "ids": [ "1703415" ], "name": "Remzi H. Arpaci-Dusseau" } ], "doi": "10.1145/3119897", "doiUrl": "https://doi.org/10.1145/3119897", "entities": [ "Benchmark (computing)", "Best, worst and average case", "Correctness (computer science)", "Crash (computing)", "Eventual consistency", "Linux", "Linux", "Scheduling (computing)" ], "id": "00caa4dea9216bec01b465f8a69d0e1becc07b7a", "inCitations": [ "0f4386d4a521e36cb15252b4e908a948a65252ef", "a377d5f506a411c5d95361188c0b7f500fc2ca09", "47f645013589f0c3babc505ee846711605f46226", "4e731dfc4eee0006865d131b384f46b29965f42e", "e97372229adcf4c015fcf43b3dcf3b51ddc48f2e", "ade874e837a2a6b9ce67fad0c5dce6f4e3c68d11", "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "8d555af4ad0bcb45ac5ce62374fbd23ea429121f", "41da20c0fb04dd4769f3772e392362acd893af57", "347e1352fb903b40dce606a1e581e9d601bc289c" ], "journalName": "TOS", "journalPages": "19:1-19:29", "journalVolume": "13", "outCitations": [ "885c666fbcfd1a10c613496d7a041d01b99c7a39", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "05a1357946de5eca42a477b7b268db4944219a2e", "aed6e488244198d8bd9b882c8a53fff619666e7e", "13c27125584651329f66461981cbb20fa63e9023", "8c0573ba5f6aeb5a6391132ef26d613c045e6e1c", "39e3d058a5987cb643e000bce555676d71be1c80", "120c8504b4290920309165d48bb032f2c724a161", "243c522b56809292f1f50117a9915053d32bf4fb", "08e7d789b23d616c4c04432cf14b1836a73bbb6f", "0420266f84cc95d6b7a8100e601f67d1118d4965", "1cbaf27b55717e503284cfe339438c98da3a9867", "10f1faeec4ee2158b8535b249a20de5419998153", "3bbcce40cc2b9c848cb98e7ea8cd03a483aaca6c", "23ee1c97c4a1229618bf6a614b02f33dc678fe6b", "833da56175762daf644fe42b230917367264208c", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "2be26e8aa238ac37a80e08303f128d8014bb9f3b", "2b1f67102166434c404e5f0bcd6e3da1c6837363", "14cb2d4f902544862076519d9e424d071612a15e", "bee0a31573c37a5808a0af25d39de98e06c385d8", "265d18ced11e2e64d98afa97b0e86965e68101f7", "045a975c1753724b3a0780673ee92b37b9827be6", "acca916dcf29e548a8f3bd53b05acd18380b0f03", "47b78e7eb12859a141aed6a28a4e301eb0352629", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "7e4ecfc13aba74db770378e640d5fbcce7fd3d2e", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "00918f711d847f9934b606b9a1d6622ca24fc3ec", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "6c2bc356d3abc932d2a15068728261bef5aae69d", "34ef9c71821bd3ed7fa52c9178e1ee272fedb803", "088e3e939ad234b6fdd0e321290fb26937dc2553", "199ac28b6bc68bf05c77645ffae7640df114bca5", "765e6f4feeb1f7d59d2b3c011e2e38814a958afa", "036b85d48048b47180058034bde97ae633ba8c28", "09c0d62190aedb53e820695ccbe98d90f877cc46", "274e495824827f5a9dc1ba3ab62620445e6b3d4b", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "29357ed9c2b0b6e76dda247bbe90aa1dd39089aa", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "25a83ec7cc04a5bf22061b78164c9d09a4de21a5" ], "paperAbstract": "Recent research has shown that applications often incorrectly implement crash consistency. We present the Crash-Consistent File System (ccfs), a file system that improves the correctness of application-level crash consistency protocols while maintaining high performance. A key idea in ccfs is the abstraction of a stream. Within a stream, updates are committed in program order, improving correctness; across streams, there are no ordering restrictions, enabling scheduling flexibility and high performance. We empirically demonstrate that applications running atop ccfs achieve high levels of crash consistency. Further, we show that ccfs performance under standard file-system benchmarks is excellent, in the worst case on par with the highest performing modes of Linux ext4, and in some cases notably better. Overall, we demonstrate that both application correctness and high performance can be realized in a modern file system.", "pdfUrls": [ "http://research.cs.wisc.edu/wind/Publications/ccfs-tos17.pdf", "http://research.cs.wisc.edu/adsl/Publications/fast17-pillai.pdf", "https://www.snia.org/sites/default/files/SDC/2017/presentations/etc/Pillai_Thanu_Application_Crash_Consistency_and_Performance_with_CCFS.pdf", "http://www.cs.utexas.edu/~vijay/papers/ccfs-fast17-slides.pdf", "http://research.cs.wisc.edu/adsl/Publications/fast17-thanu-slides.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17_pillai.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_pillai.pdf", "http://www.cs.utexas.edu/~vijay/papers/fast17-c2fs.pdf", "http://doi.acm.org/10.1145/3119897", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/pillai", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_pillai.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17_pillai.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/pillai" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00caa4dea9216bec01b465f8a69d0e1becc07b7a", "sources": [ "DBLP" ], "title": "Application Crash Consistency and Performance with CCFS", "venue": "FAST", "year": 2017 }, "00cc482570d739e7b733f45b6f8f1836b24056bd": { "authors": [ { "ids": [ "1720084" ], "name": "Vivek Seshadri" }, { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "1786530" ], "name": "Thomas Mullins" }, { "ids": [ "40016363" ], "name": "Hasan Hassan" }, { "ids": [ "2675748" ], "name": "Amirali Boroumand" }, { "ids": [ "2512816" ], "name": "Jeremie Kim" }, { "ids": [ "2366265" ], "name": "Michael A. Kozuch" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" }, { "ids": [ "1974678" ], "name": "Phillip B. Gibbons" }, { "ids": [ "1761585" ], "name": "Todd C. Mowry" } ], "doi": "10.1145/3123939.3124544", "doiUrl": "https://doi.org/10.1145/3123939.3124544", "entities": [ "AMBIT", "Amplifier", "Baseline (configuration management)", "Bit array", "Bitmap", "Bitmap index", "Bitwise operation", "Central processing unit", "Cube", "Data-intensive computing", "Database", "Dynamic random-access memory", "Field-programmable gate array", "Graphics processing unit", "Hybrid Memory Cube", "In-memory database", "Memory bandwidth", "Memory bus", "Power inverter", "SIMD", "Sense amplifier", "Simulation", "Throughput", "Web search engine" ], "id": "00cc482570d739e7b733f45b6f8f1836b24056bd", "inCitations": [ "6b6a5f2127b5ffbccd54d4823a9ca3a73969f3d1", "ecf5efd5fe18860b42a1abd198e94a868dbf944c", "651ae380b5d500c613770dbf55c175c52576d7da", "983e87929eeb3f77c2ddb02d17d6efe978c80667", "7b8c6b2e7652620c037ff4732bc6c7b4ae88da6c", "2976932bec7334a150e1bb6916b7564bdaa864ea", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6" ], "journalName": "", "journalPages": "273-287", "journalVolume": "", "outCitations": [ "430b1fa7fd090f65d063d32911820671288f23e3", "12b9e924175e4df2cc559ffec5cf9df171453146", "51db58e17d061db3e03bf43ebec5c9cd6569259f", "3c761857787b3efe5e65b25bd94c737bf2cd7632", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "0437e781bf22d47f3a13cca1e27eca6ae91d3f41", "2394c6644efa856f0da160a0f0031d74cd3b5000", "21bda5f42e92f535c29012746915f6dd06adb97a", "468035263afa59095614f26a62e0217da4a1aeed", "a3f41b800e5e3d7a3fecc303cf9edd570d15e5a2", "34fa41ccb6e548612886623916d502fce17fd3a8", "85398d5f19157c91bf00da3d36210e72d57887e4", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "3c8722737ef9f37b7a1da6ab81b54224a3c64f72", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "1c32ad0a42109fab826eb3054df7cfc33b424125", "70a0f96171d25e910f7a598e9c9a5b9128699f5d", "72530e9ecc814155608e39ed4e0db7ca3ca7da5e", "3edacab130540193df4aba07cd07366ffd3600de", "054e4a6966d54eb9fd207cf0484214201f46424a", "15a7853875bf29a84b2d4e475029afaa032ccb76", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "43d66433875cb5c4eee68c8575f7be9108682c4b", "28055aaeb478fd09f5a042408cd6b63cbf707d1e", "ca0babdd9daba55708e6c83af12bf2872a76987d", "43355917bdfeecc08c64acfcbc2ea7ddbd1a806b", "790bbacb2bfe8830bbf03fecc2d7091c316bb3a8", "160631daa8ecea247014401f5429deb49883d395", "a6ca37aeeef5911e4f36b904088479bea999cc81", "a5bd15d203c6aa740aba16776b422db010e66b58", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "0a908373dd5e87446ba85db0e590b3e3004e04f7", "024157990c0257c454beae3915f83ce5b088d767", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "06902cb95ede2c305db4000852014f276b25c082", "38790cc347ee3a6e783689cdfae51a14570f5776", "15509136b2c799bb86e8bd4f5c802f4f5311aee7", "1dec8f5106d11047aaaf126121110cbf890f17c3", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "9d02bfc7bfdfe9b580e1464b1336cb295222dc30", "e9af96fbbacb4268c3c5ff974cc44990b12294e5", "2671b7aa02d4e1c837ec6f8bbdbba2e355fbf954", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "012d556d67acedc6898930b4c93f54b87aabf5ee", "4e225fafd104abbf05a1bd0780d53c6763408b18", "7edb887ed7f15203eccb614095af001ea74bcfb6", "347b3b154b9283c97908fc8bea42225ee4bbfc8c", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "8ca2b3fcf20694f18edde393cd9a5cf4f8c3759b", "071564baef078867847fc54a3a0b50dd22d29d62", "4678cdcf7e57c1563379ac7cc344254f01ace572", "0c6f81e60514edbc6a936a5f8593838f14658653", "42f174df3876256dd5606bb61b366116e9943beb", "8b04ea524cb6ced72868c120a00c4679d84be006", "5906fc1d9cc56d31b9373cdb868cb90aa613d90d", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "02cd807277f9da21daa9ead698348215a9bed094", "135c49e5543ce41ec8274b270b2ac25e015cabd9", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "14cd0daeed8c12db40be03dfd56e446fcc10f32a", "51a10bc2d3966dfcf82060e9c94fa7436e98023e", "179f80848143cf109fa6aebae6c3844da03b062c", "0015d8b6ec47ec2bc4bc0564a11e2f98a3971650", "1ef38c80b1bc4352ce0df0ef7c05249fb64bf78d", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "3c89345bb88a440096f7a057c28857cc4baf3695", "069eafae5ee9df25ff5c457bb636f73b98d8f6e9", "1e2c74686d4337113008d148b258e89414e35e20", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "9341125876271d46cc25f86dac93f25acb343e8d", "2fa80c8342dcb349f1d91c102a76400c86dfb042", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "3d92d4a2e886e22b0d4346c74bcac2faa80ea58a", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "b6a8f2d4f99277f1b7bf3b7f08c61abec4687eb5", "87f3d7730e190c695c84683830a702cc7dd6e296", "447f492235719d7c2b061b95d818f928d6cbdac5", "a0280c69589951383ea0dbcd06f11bc4c595eff1" ], "paperAbstract": "Many important applications trigger bulk bitwise operations, i.e., bitwise operations on large bit vectors. In fact, recent works design techniques that exploit fast bulk bitwise operations to accelerate databases (bitmap indices, BitWeaving) and web search (BitFunnel). Unfortunately, in existing architectures, the throughput of bulk bitwise operations is limited by the memory bandwidth available to the processing unit (e.g., CPU, GPU, FPGA, processing-in-memory).\n To overcome this bottleneck, we propose Ambit, an Accelerator-in-Memory for bulk bitwise operations. Unlike prior works, Ambit exploits the analog operation of DRAM technology to perform bitwise operations completely inside DRAM, thereby exploiting the full internal DRAM bandwidth. Ambit consists of two components. First, simultaneous activation of three DRAM rows that share the same set of sense amplifiers enables the system to perform bitwise AND and OR operations. Second, with modest changes to the sense amplifier, the system can use the inverters present inside the sense amplifier to perform bitwise NOT operations. With these two components, Ambit can perform any bulk bitwise operation efficiently inside DRAM. Ambit largely exploits existing DRAM structure, and hence incurs low cost on top of commodity DRAM designs (1% of DRAM chip area). Importantly, Ambit uses the modern DRAM interface without any changes, and therefore it can be directly plugged onto the memory bus.\n Our extensive circuit simulations show that Ambit works as expected even in the presence of significant process variation. Averaged across seven bulk bitwise operations, Ambit improves performance by 32X and reduces energy consumption by 35X compared to state-of-the-art systems. When integrated with Hybrid Memory Cube (HMC), a 3D-stacked DRAM with a logic layer, Ambit improves performance of bulk bitwise operations by 9.7X compared to processing in the logic layer of the HMC. Ambit improves the performance of three real-world data-intensive applications, 1) database bitmap indices, 2) BitWeaving, a technique to accelerate database scans, and 3) bit-vector-based implementation of sets, by 3X-7X compared to a state-of-the-art baseline using SIMD optimizations. We describe four other applications that can benefit from Ambit, including a recent technique proposed to speed up web search. We believe that large performance and energy improvements provided by Ambit can enable other applications to use bulk bitwise operations.", "pdfUrls": [ "https://people.inf.ethz.ch/omutlu/pub/ambit-bulk-bitwise-dram_micro17.pdf", "http://doi.acm.org/10.1145/3123939.3124544", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/09/MICRO-50_347.pdf", "http://www.pdl.cmu.edu/PDL-FTP/NVM/ambit-bulk-bitwise-dram_micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00cc482570d739e7b733f45b6f8f1836b24056bd", "sources": [ "DBLP" ], "title": "Ambit: in-memory accelerator for bulk bitwise operations using commodity DRAM technology", "venue": "MICRO", "year": 2017 }, "00d3d1554166ab1dd91089111dabac7ca456f5be": { "authors": [ { "ids": [ "2599242" ], "name": "Kay Ousterhout" }, { "ids": [ "20978225" ], "name": "Christopher Canel" }, { "ids": [ "1699297" ], "name": "Sylvia Ratnasamy" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "10.1145/3132747.3132766", "doiUrl": "https://doi.org/10.1145/3132747.3132766", "entities": [ "Apache Spark", "Internet bottleneck", "Jumpstart Our Business Startups Act", "Systems architecture" ], "id": "00d3d1554166ab1dd91089111dabac7ca456f5be", "inCitations": [ "284b7631a9961f69eae1e0bac49438aee34edaa0", "40dca29aea76ae426791e4c6bf0e24f3ae88e318", "372a2383891257520ad6dea816d3f14ddff8f003", "83aaf61e91053745e667427d2132527b8a05ef8a" ], "journalName": "", "journalPages": "184-200", "journalVolume": "", "outCitations": [ "c5cc6243f070d80f5edef24608694c39195e2d1a", "88fd5ae53854a26b9edb2eb42ce6dfdd6e186ea5", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "70ad150169f19d782ac992cbb3da3e7906cb7c66", "28a9dca6faeead651539c700bef413203b2b876e", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "332f77fd05703c1607e3b57884ad31fb1fad0104", "43776b15c034076a36b7143d58af8e04715e41d0", "0162a3f7c5bd29af364fd946db139df1ffa825c4", "4426abea067d858926f1178ab53dd357fa90f495", "223159ae070f0b6c270d618f02c5a00e0248022b", "19b304df6f13798a0745eeaf8f4573b202a43e5f", "2988e34168fa91398fa397baf823af2063893e9c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "6d0b0155303bccf7e2395f0745fcabe3d4474e61", "0541d5338adc48276b3b8cd3a141d799e2d40150", "20400945c87f75acbad70f1f9ccfe94f556d2d02", "26deee037b221bd05ed34461819f5c067b745445", "463bec3d0298e96e3702e071e241e3898f76eff2", "f5b31911d960e136e5912a126dcbf6ef819edcf9", "0254e7809ea94c30adedd5e853bdd0014b6521c9", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "133eacaf0ad25b8364cb4510007d9363298e8adf", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce" ], "paperAbstract": "In today's data analytics frameworks, many users struggle to reason about the performance of their workloads. Without an understanding of what factors are most important to performance, users can't determine what configuration parameters to set and what hardware to use to optimize runtime. This paper explores a system architecture designed to make it easy for users to reason about performance bottlenecks. Rather than breaking jobs into tasks that pipeline many resources, as in today's frameworks, we propose breaking jobs into monotasks: units of work that each use a single resource. We demonstrate that explicitly separating the use of different resources simplifies reasoning about performance without sacrificing performance. Monotasks provide job completion times within 9% of Apache Spark for typical scenarios, and lead to a model for job completion time that predicts runtime under different hardware and software configurations with at most 28% error. Furthermore, separating the use of different resources allows for new optimizations to improve performance.", "pdfUrls": [ "http://kayousterhout.org/publications/sosp17-final183.pdf", "http://kayousterhout.org/talks/2017_10_29_SOSP_Monotasks.pdf", "http://doi.acm.org/10.1145/3132747.3132766" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00d3d1554166ab1dd91089111dabac7ca456f5be", "sources": [ "DBLP" ], "title": "Monotasks: Architecting for Performance Clarity in Data Analytics Frameworks", "venue": "SOSP", "year": 2017 }, "00e534fdc29233aef6e44d5b716043d226b7b882": { "authors": [ { "ids": [ "2462977" ], "name": "Kwangsung Oh" }, { "ids": [ "1770097" ], "name": "Abhishek Chandra" }, { "ids": [ "1750436" ], "name": "Jon B. Weissman" } ], "doi": "10.1145/3078468.3078485", "doiUrl": "https://doi.org/10.1145/3078468.3078485", "entities": [ "Amazon Web Services", "Cloud computing", "Cloud storage", "Clustered file system", "Computer data storage", "Consistency model", "Data center", "Fault tolerance", "Integer programming", "Linear programming", "Memory hierarchy", "Microsoft Azure", "Service-level agreement", "Thesaurus Linguae Latinae" ], "id": "00e534fdc29233aef6e44d5b716043d226b7b882", "inCitations": [], "journalName": "", "journalPages": "12:1-12:11", "journalVolume": "", "outCitations": [ "73f512de77dad7d0abe8076a856727021b9493d3", "2f6af58c7905fb8367652fe62fbb1f6ec7e28be0", "0a4110fda21f0de29824ead1df591d2c5e1da8d0", "41c43d0a579339ceaaaa5e95b514e8a955389569", "0a625d2ee9465b0d8e4319f1e187349861f4d2cd", "7c4cf4515091593106242f169dac0dd2208f9d8b", "24c9ad0d66f6a05ad41563a7dade60bff6f59106", "1e987ea60c476bbabbb306e2e795bfb81ecc97aa", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "1b10ad7ee2ce30703d769ea7abf42938195973a5", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "4a3e73c756ac6fe62e9d728a85000ffe892892e1", "83389bacf62e6c8513482395838caf7d01339a6b", "5316ceeadacd161386e1ece5d117b24a3a9344e0", "418e5e5e58cd9cafe802d8b679651f66160d3728", "12481927d7d78e6f231c24a708406943fa3f863d", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "87b94c2f86b9e8838bf15276fcfe9be0fd293588", "0558c94a094158ecd64f0d5014d3d9668054fb97", "9aa0d7253574e50fe3a190ccd924433f048997dd", "1d5de7a7ed362ecd596ac9ed5b85bf19d5c08ef5" ], "paperAbstract": "Exploiting the cloud storage hierarchy both within and across data-centers of different cloud providers empowers Internet applications to choose data centers (DCs) and storage services based on storage needs. However, using multiple storage services across multiple data centers brings a complex data placement problem that depends on a large number of factors including, e.g., desired goals, storage and network characteristics, and pricing policies. In addition, dynamics e.g., changing user locations and access patterns, make it impossible to determine the best data placement statically. In this paper, we present TripS, a lightweight system that considers both data center locations and storage tiers to determine the data placement for geo-distributed storage systems. Such systems make use of TripS by providing inputs including SLA, consistency model, fault tolerance, latency information, and cost information. With given inputs, TripS models and solves the data placement problem using mixed integer linear programming (MILP) to determine data placement. In addition, to adapt quickly to dynamics, we introduce the notion of Target Locale List (TLL), a pro-active approach to avoid expensive re-evaluation of the optimal placement. The TripS prototype is running on Wiera, a policy driven geo-distributed storage system, to show how a storage system can easily utilize TripS for data placement. We evaluate TripS/Wiera on multiple data centers of AWS and Azure. The results show that TripS/Wiera can reduce cost 14.96% ∼ 98.1% based on workloads in comparison with other works' approaches and can handle both short- and long-term dynamics to avoid SLA violations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078485", "https://www-users.cs.umn.edu/~ohxxx222/slides/TripS_SYSTOR2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00e534fdc29233aef6e44d5b716043d226b7b882", "sources": [ "DBLP" ], "title": "TripS: automated multi-tiered data placement in a geo-distributed cloud environment", "venue": "SYSTOR", "year": 2017 }, "00fdad565f3bb86294580fc01664bdbe862f1b06": { "authors": [ { "ids": [ "1710103" ], "name": "Pedro Silva" }, { "ids": [ "37434914" ], "name": "Christian P\u00e9rez" } ], "doi": "10.1007/978-3-319-64203-1_27", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_27", "entities": [ "Heuristic", "Software as a service" ], "id": "00fdad565f3bb86294580fc01664bdbe862f1b06", "inCitations": [], "journalName": "", "journalPages": "372-384", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_27" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/00fdad565f3bb86294580fc01664bdbe862f1b06", "sources": [ "DBLP" ], "title": "An Efficient Communication Aware Heuristic for Multiple Cloud Application Placement", "venue": "Euro-Par", "year": 2017 }, "011f7f9ba9e6f9bc7f05994271725bc0fc9c3b94": { "authors": [ { "ids": [ "38737063" ], "name": "Dong Deng" }, { "ids": [ "34568734" ], "name": "Raul Castro Fernandez" }, { "ids": [ "2034349" ], "name": "Ziawasch Abedjan" }, { "ids": [ "39996718" ], "name": "Sibo Wang" }, { "ids": [ "1695715" ], "name": "Michael Stonebraker" }, { "ids": [ "1740095" ], "name": "Ahmed K. Elmagarmid" }, { "ids": [ "1743316" ], "name": "Ihab F. Ilyas" }, { "ids": [ "2033016" ], "name": "Samuel Madden" }, { "ids": [ "2168047" ], "name": "Mourad Ouzzani" }, { "ids": [ "8669763" ], "name": "Nan Tang" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Computation", "Data science", "Database", "End-to-end principle", "Experience", "Federation (information technology)", "Grunt", "Linkage (software)", "Merck Index", "Sputter cleaning", "Workflow engine" ], "id": "011f7f9ba9e6f9bc7f05994271725bc0fc9c3b94", "inCitations": [ "56a1414b337d46e2683c66c777760a4a62af29ee", "57e979025374da67fae37fbb81bbadecee68cc08", "1af3118f0d70e2e04b42498d53a1893385689bd6", "9dce39920dd4d6d62bff9e8632751f8e2d39eb20", "7f8a1ba888fc4ce551530914d68f23ac54ce265f", "b904c2cbe34598bf52f82a8da8b2b02fefd791c5", "da182e52b0c8a95d97bf3088561db466e86247f5", "1b4f5bb49dc95340a66c75e1c4c719f0f96439c8", "7c11b349296003d6406c10c96aa223cfa8f5f542", "3073eda62f8391db0e695acb69bcb8c68b34c7b4", "ddfd6fec5f784b9b59a24937844b7fef61c46ba1", "315cd76e4a34b8fe27e20345abcd4fc27c7ee1ab" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "8584bc6fb5fe616d338d5ae3d20d4848572b5578", "4cf40a18955e5b79eb256666ee571fafd599b76d", "9a641e3730fab824e5ff988107794cb0b54943fc", "6306fbfa8fcde8e98a677cd4a833b8c76c613974", "31a816f4fef768f29772a003e534b1378611bfe6", "18c021c9cce95ed5615a060f590b8388b604e7c5", "5178810be2cc19348ae358920dbd33e93ff2d813", "73f31354cc9058ddc2e47a1c585b753e1592c1bf", "09cacb2d068d605e6f8148b173524094a41670d5", "2762f8b22fa9513a73f6d73205450e144fab3045", "0964ac250b81a2caa85dd172527f07a9ffc8230b", "0f1b67d1545299b8ccec4b28afb735ff045e5c1e", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "0c687d5f26d78f2ba5e66e47af6db721c639f907", "156b07d3a2d8d744385f1e09ea49a04b09c612a5", "0f5c9968fe2cdb0f52c55b2d5b3dec7accf91306", "c4221a899528798105ca94e509027e7210a87d6b", "1f990d98dcc3941f01bd6bb5405fbda37e00dd6a", "19f10c75265a43829cf00e619224ab3e481c4fad" ], "paperAbstract": "In many organizations, it is often challenging for users to find relevant data for specific tasks, since the data is usually scattered across the enterprise and often inconsistent. In fact, data scientists routinely report that the majority of their effort is spent finding, cleaning, integrating, and accessing data of interest to a task at hand. In order to decrease the \u201cgrunt work\u201d needed to facilitate the analysis of data \u201cin the wild\u201d, we present DATA CIVILIZER, an end-to-end big data management system. DATA CIVILIZER has a linkage graph computation module to build a linkage graph for the data and a data discovery module which utilizes the linkage graph to help identify data that is relevant to user tasks. It also uses the linkage graph to discover possible join paths that can then be used in a query. For the actual query execution, we use a polystore DBMS, which federates query processing across disparate systems. In addition, DATA CIVILIZER integrates data cleaning operations into query processing. Because different users need to invoke the above tasks in different orders, DATA CIVILIZER embeds a workflow engine which enables the arbitrary composition of different modules, as well as the handling of data updates. We have deployed our preliminary DATA CIVILIZER system in two institutions, MIT and Merck and describe initial positive experiences that show the system shortens the time and effort required to find, prepare, and analyze data.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p44-deng-cidr17.pdf", "http://da.qcri.org/ntang/pubs/cidr2017.pdf", "http://cs.brown.edu/courses/cs227/papers/data-civilizer.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/04d5/069f0db8ec5b637c4091c598838096800970.pdf", "s2Url": "https://semanticscholar.org/paper/011f7f9ba9e6f9bc7f05994271725bc0fc9c3b94", "sources": [ "DBLP" ], "title": "The Data Civilizer System", "venue": "CIDR", "year": 2017 }, "013ab8df817d07f52167163ce85519d64b85390e": { "authors": [ { "ids": [ "1767573" ], "name": "Nishanth Chandran" }, { "ids": [ "1727286" ], "name": "Juan A. Garay" }, { "ids": [ "1773836" ], "name": "Payman Mohassel" }, { "ids": [ "8210582" ], "name": "Satyanarayana Vusirikala" } ], "doi": "10.1145/3133956.3134100", "doiUrl": "https://doi.org/10.1145/3133956.3134100", "entities": [ "Mathematical optimization", "Secure state", "Software deployment", "Symmetric-key algorithm" ], "id": "013ab8df817d07f52167163ce85519d64b85390e", "inCitations": [], "journalName": "", "journalPages": "277-294", "journalVolume": "", "outCitations": [ "23ec68ed03b485b645478a3f6905615617d905a6", "a2a8b5cc914c653730a251cf1a0b3452dac322b3", "01ae736135f6aa1ec765ffbd6d1d2c991acb2b35", "31100ccd0867d6d5338612a62b2cde11be75f1b8", "36250592849fc8dc50b3b5df0a72a8b072ce34e4", "0ff204bf8854f258f181a249e2dceb1633f910d9", "a853e0842d74fa3ff146f45ea7f2ed52dac08d1a", "47b8fd6ee8b07bd14de3c91df515b11180121de9", "2f9c590bb2df7fe3e4caffaaa709fa6840d02d62", "04948723dec0e6724777ee56f0d10168cce44921", "15c76f461543c44a8b9d8b32b2bbd18c595aea52", "8128c7e13e69c29880b11ed675ecb108e879059a", "0affd3f06d26de268d81c288454dd7880e518f9e", "1c07a74467c912602b33f28e90abd6eeaa60af6d", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "2eb315952f6a2e342b19cf95287c8a0b1f2c36fa", "13e622fca1a6b52aa85898e260f9455e4ba0d94b", "5efa700b61efac0b571da693e06d0af085f7344c", "33148623fc14ea5735e73dd716d030ab17118299", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "37d41c44e034a282820f698bb70cf15c2083a9ab", "13ca5e20283085e1c2854325665bd7fd6497a62c", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "411e4ecb35e5385ed0c88a36f0b2821c42af8f70", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "ab6d858715ea8ecc664e3d41bec87269368e15a4", "05dfe536310bc0176ad23cc40fdc8e501811f4be", "1eb0b401e7dbd8a4e638243713b39fffc991fe9f", "444630ced6bda572461744423ff420106472d5e3", "01dede4ec077d7495e7dacdac8b584678aca5fc7", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "18f5d7663632c92c84f89151823dff2120ae43cf", "c3b6d1b083f132d6f40f354fce32453410b6f942", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "19c3736da5116e0e80a64db35afe421663c4b4a8", "42333e3f231bbfe508f6da6bad2feff9ae223113", "01ab267578a0f3286425f4b261c7ce6dcec8e407", "7767c71b09ad63fbc9892d3deb1c07292c9cfbf5", "e00b72a00f591353e9f13c0127f7006ec9528557", "61883fbd35396888924520e109355e912337d2b8", "88915d3d45829e9b929e3c5019dda47985a13b7d", "3cb55d539b232e309f4a5974148ec6f22afb5888", "13e5ca27f887c2be2795cdb335201c4c247c60f3", "b57aec9b611817d5272c8f97ec8211ecd33dca6d", "e5302edfa2fa077525008333fcb56d9c2f3451ef", "218bbd0efffc2ee63edffb8c5220f06155e23578", "a797a0346e106e0d1d1d2db778aa509031c7bf8c", "7dd5a9a774b96ef8f551ded6418fe8adf28e8952" ], "paperAbstract": "While the feasibility of constant-round and actively secure MPC has been known for over two decades, the last few years have witnessed a flurry of designs and implementations that make its deployment a palpable reality. To our knowledge, however, existing concretely efficient MPC constructions are only for up to three parties.\n In this paper we design and implement a new actively secure 5PC protocol tolerating two corruptions that requires 8 rounds of interaction, only uses fast symmetric-key operations, and incurs 60% less communication than the passively secure state-of-the-art solution from the work of Ben-Efraim, Lindell, and Omri [CCS 2016]. For example, securely evaluating the AES circuit when the parties are in different regions of the U.S. and Europe only takes 1.8s which is 2.6x faster than the passively secure 5PC in the same environment.\n Instrumental for our efficiency gains (less interaction, only symmetric key primitives) is a new 4-party primitive we call Attested OT, which in addition to Sender and Receiver involves two additional \"assistant parties\" who will attest to the respective inputs of both parties, and which might be of broader applicability in practically relevant MPC scenarios. Finally, we also show how to generalize our construction to n parties with similar efficiency properties where the corruption threshold is t ≈ √n, and propose a combinatorial problem which, if solved optimally, can yield even better corruption thresholds for the same cost.", "pdfUrls": [ "https://eprint.iacr.org/2017/519.pdf", "http://eprint.iacr.org/2017/519", "http://doi.acm.org/10.1145/3133956.3134100", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/08/ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/013ab8df817d07f52167163ce85519d64b85390e", "sources": [ "DBLP" ], "title": "Efficient, Constant-Round and Actively Secure MPC: Beyond the Three-Party Case", "venue": "CCS", "year": 2017 }, "014b09f5b1872a7aa70ec233c2746ee1cb93f7cd": { "authors": [ { "ids": [ "39645110" ], "name": "Qi Alfred Chen" }, { "ids": [ "1898170" ], "name": "Matthew Thomas" }, { "ids": [ "2471594" ], "name": "Eric Osterweil" }, { "ids": [ "2861481" ], "name": "Yulong Cao" }, { "ids": [ "40048308" ], "name": "Jie You" }, { "ids": [ "3895596" ], "name": "Zhuoqing Morley Mao" } ], "doi": "10.1145/3133956.3134084", "doiUrl": "https://doi.org/10.1145/3133956.3134084", "entities": [ "Client (computing)", "Client-side", "Code injection", "Collision attack", "Collision problem", "Credential", "Feasible region", "Intranet", "Malware", "Man-in-the-middle attack", "Name collision", "namespaces" ], "id": "014b09f5b1872a7aa70ec233c2746ee1cb93f7cd", "inCitations": [], "journalName": "", "journalPages": "941-956", "journalVolume": "", "outCitations": [ "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "69349684bf61888dc9fe5ff679ff1c7572d2d535", "0228d21869d7d1e6d1acdaf7d7086d9e7d1327a0", "e6305e00746f75401fde3f4719f037a9fd183d7c", "1149fee645180babc05c2565ee86e63402ead90b", "3dfca21820fb74935b2145be8d37f9dcb1adf2f5", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "5f60d66221f466ac806828ce068dd24c18b5901e", "223cfa8cab6f00a9d37af94e87454e82b28fa19b", "7a1890d288fe3e2dae1add14c12f7b5428686ff7", "11c5f57419fd0e64b0feb78f7d42c1cb7508c31f", "2df89af8e95047fa4d0b035366144e5d73a4e368" ], "paperAbstract": "The recent unprecedented delegation of new generic top-level domains (gTLDs) has exacerbated an existing, but fallow, problem called name collisions. One concrete exploit of such problem was discovered recently, which targets internal namespaces and enables Man in the Middle (MitM) attacks against end-user devices from anywhere on the Internet. Analysis of the underlying problem shows that it is not specific to any single service protocol, but little attention has been paid to understand the vulnerability status and the defense solution space at the service level. In this paper, we perform the first systematic study of the robustness of internal network services under name collision attacks.\n We first perform a measure study and uncover a wide spectrum of services affected by the name collision problem. We then collect their client implementations and systematically analyze their vulnerability status under name collision attacks using dynamic analysis. Out of the 48 identified exposed services, we find that nearly all (45) of them expose vulnerabilities in popular clients. To demonstrate the severity, we construct exploits and find a set of new name collision attacks with severe security implications including MitM attacks, internal or personal document leakage, malicious code injection, and credential theft. We analyze the causes, and find that the name collision problem broadly breaks common security assumptions made in today's service client software. Leveraging the insights from our analysis, we propose multiple service software level solutions, which enables the victim services to actively defend against name collision attacks.", "pdfUrls": [ "https://acmccs.github.io/papers/p941-chenA.pdf", "http://web.eecs.umich.edu/~alfchen/alfred_ccs17.pdf", "http://doi.acm.org/10.1145/3133956.3134084" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/014b09f5b1872a7aa70ec233c2746ee1cb93f7cd", "sources": [ "DBLP" ], "title": "Client-side Name Collision Vulnerability in the New gTLD Era: A Systematic Study", "venue": "CCS", "year": 2017 }, "014d28ef6ad36b22c1a4edb43c1b34bc7981b2e3": { "authors": [ { "ids": [ "2541035" ], "name": "Gunjae Koo" }, { "ids": [ "3235717" ], "name": "Yunho Oh" }, { "ids": [ "2957310" ], "name": "Won Woo Ro" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" } ], "doi": "10.1145/3079856.3080239", "doiUrl": "https://doi.org/10.1145/3079856.3080239", "entities": [ "Baseline (configuration management)", "CPU cache", "Cache (computing)", "Graphics processing unit", "Locality of reference", "Simulation", "Stream (computing)", "WARP (information security)" ], "id": "014d28ef6ad36b22c1a4edb43c1b34bc7981b2e3", "inCitations": [ "fa21c85107516c7f0a341de27856d7ffe4a6c5d9", "b20230c61d5db7863ba6a12fc18da85be6a35a60" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "307-319", "journalVolume": "", "outCitations": [ "4308295a2eaef30be423520918ad224dc2f3ffe2", "0d7ab4003220cc847ae2b5fbf32bfa901da8edde", "01079a4f0bcac90e8977cbcee2ec50b98d408310", "b37cdf43ff9c85693e335c04086003819a7aa4f9", "8e0bace83c69e81cf7c68ce007347c4775204cd0", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "00156e79606084497789662dfaf59c3b54a10722", "6170a341e38990ac3c3df35f557e149746c9e099", "f08a5e7a23b44c37a22e011e31843aeeae0ed4e6", "5670a2391d0c085be2ff5c704cae8e76a80a15fb", "3d50c803cc715e51d263f5a42b06858be9466c0f", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "0e0fb6a3ccbd9da9dc216913ef77d346515936c6", "60a1389c827f9f706c9dc1639e2584f0f3de878e", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "67bf737ceccf387cdd05c379487da8301f55e93d", "6f4dfb66fea49a55ad7f2e2312728aa68d9313e3", "30a6f5a8c2d61421fcef53f66a5c450cd561d378", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "43260df86b2aaa20824d73eff48e0b49162689cb", "03d832219a7cf933db0ef1f686fec730c09acd55", "3ce662e1663456ce2a5b5d240112721c0d0a4582", "0717371b254df3e466a11d1965c2c9541a43b7a3", "3364bc50921a9566d61ef8cb73baa82341725e4b", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "7bee024cfab6e16be7c57e2ddbe13618d2a2968c", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "0ee3a956a67b0d679bf485d60e75abdbdb5d50e7", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "2d6f002477015469075954c6748a1a85af352c94", "b6659b2af4789a1daaff6310161d850a840fe3d7", "180b2d793d5a79466d773db05b7652695a8d4671" ], "paperAbstract": "Long latency of memory operation is a prominent performance bottleneck in graphics processing units (GPUs). The small data cache that must be shared across dozens of warps (a collection of threads) creates significant cache contention and premature data eviction. Prior works have recognized this problem and proposed warp throttling which reduces the number of active warps contending for cache space. In this paper we discover that individual load instructions in a warp exhibit four different types of data locality behavior: (1) data brought by a warp load instruction is used only once, which is classified as streaming data (2) data brought by a warp load is reused multiple times within the same warp, called intra-warp locality (3) data brought by a warp is reused multiple times but across different warps, called inter-warp locality (4) and some data exhibit both a mix of intra- and inter-warp locality. Furthermore, each load instruction exhibits consistently the same locality type across all warps within a GPU kernel. Based on this discovery we argue that cache management must be done using per-load locality type information, rather than applying warp-wide cache management policies. We propose Access Pattern-aware Cache Management (APCM), which dynamically detects the locality type of each load instruction by monitoring the accesses from one exemplary warp. APCM then uses the detected locality type to selectively apply cache bypassing and cache pinning of data based on load locality characterization. Using an extensive set of simulations we show that APCM improves performance of GPUs by 34% for cache sensitive applications while saving 27% of energy consumption over baseline GPU.", "pdfUrls": [ "http://www-scf.usc.edu/~gunjaeko/pubs/Gunjae_ISCA17.pdf", "http://doi.acm.org/10.1145/3079856.3080239" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/014d28ef6ad36b22c1a4edb43c1b34bc7981b2e3", "sources": [ "DBLP" ], "title": "Access pattern-aware cache management for improving data utilization in GPU", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "016bb661e767d8fa2491743d289b11cfc41e3efb": { "authors": [ { "ids": [ "4260658" ], "name": "David Lie" }, { "ids": [ "2286904" ], "name": "Petros Maniatis" } ], "doi": "10.1145/3102980.3102996", "doiUrl": "https://doi.org/10.1145/3102980.3102996", "entities": [ "Client-side", "Cloud computing", "Display resolution", "GLIMMER", "Personally identifiable information", "Quality of service", "Trust (emotion)", "Trust metric", "User-generated content" ], "id": "016bb661e767d8fa2491743d289b11cfc41e3efb", "inCitations": [ "0646a88dfd7e7ce7233041eaad62076ccc55624c" ], "journalName": "", "journalPages": "94-99", "journalVolume": "", "outCitations": [ "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "7a6ca8144dbf3331e8ad34c4024670c6ef4ec9be", "038343c387ed6e39c8d8eee21fee1fef8fe55f72", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "ad13de073252eaf17e437e68d644fac7826edc8a", "0d3c49f0d6743b03615bfcf546b5d015d32d4035", "5ff155a684fdae3da603e615095084567dcfc3ea", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "70a8963eda8fe4567ad434fdf4fe93fa4da10b46", "6a363d25c6b09b3d48d59ec3683341fedbd030c6", "3f9ff793f462a36b5d9847f25d5a8c413d48389d", "226c420178be541d6a061334f3f9760cc683653c", "01fde8698110cf46ff48a17c65f2658dab4c323c", "326bb49d3ae9e1e1551028200916192e50004105", "2a09faed33a1c58bf2f1e827b326bbdc656fd363", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "0a289fd7b14345822b1acda6d82750b15d59663e", "0cf200311921b4a9232a284691ce92b91a05885b", "561269a24f2f2a06409109723a8ab93a01696efc", "30f52a79ff53f8969ffcba19013b4a43e629875f" ], "paperAbstract": "Users today enjoy access to a wealth of services that rely on user-contributed data, such as recommendation services, prediction services, and services that help classify and interpret data. The quality of such services inescapably relies on trustworthy contributions from users. However, validating the trustworthiness of contributions may rely on privacy-sensitive contextual data about the user, such as a user's location or usage habits, creating a conflict between privacy and trust: users benefit from a higher-quality service that identifies and removes illegitimate user contributions, but, at the same time, they may be reluctant to let the service access their private information to achieve this high quality.\n We argue that this conflict can be resolved with a pragmatic Glimmer of Trust, which allows services to validate user contributions in a trustworthy way without forfeiting user privacy. We describe how trustworthy hardware such as Intel's SGX can be used on the client-side---in contrast to much recent work exploring SGX in cloud services---to realize the Glimmer architecture, and demonstrate how this realization is able to resolve the tension between privacy and trust in a variety of cases.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102996", "https://arxiv.org/pdf/1702.07436v1.pdf", "http://www.eecg.toronto.edu/~lie/papers/lie-glimmer-hotos2017.pdf", "http://arxiv.org/abs/1702.07436", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46128.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/016bb661e767d8fa2491743d289b11cfc41e3efb", "sources": [ "DBLP" ], "title": "Glimmers: Resolving the Privacy/Trust Quagmire", "venue": "HotOS", "year": 2017 }, "016d1541f81655d3c193aafcfb3e9fab64dba2b3": { "authors": [ { "ids": [ "39782005" ], "name": "Nirvan Tyagi" }, { "ids": [ "1682067" ], "name": "Yossi Gilad" }, { "ids": [ "39946997" ], "name": "Derek Leung" }, { "ids": [ "1901948" ], "name": "Matei Zaharia" }, { "ids": [ "1789973" ], "name": "Nickolai Zeldovich" } ], "doi": "10.1145/3132747.3132783", "doiUrl": "https://doi.org/10.1145/3132747.3132783", "entities": [ "Differential privacy", "Distributed computing", "Encryption", "Information privacy", "Inter-process communication", "Limiter", "Mix network", "Observable", "Provable prime", "Routing", "Scalability", "Tor Messenger", "Traffic analysis" ], "id": "016d1541f81655d3c193aafcfb3e9fab64dba2b3", "inCitations": [ "4c18ad6a3819d0de1d8df2f6ba323b175f985a3c" ], "journalName": "", "journalPages": "423-440", "journalVolume": "", "outCitations": [ "34a9eba074b1439d972541ffcffe70d90bac02aa", "2bc6a80519543859c1c150d132804d8fd69a9d8c", "732bf9b3acce10677bc9409edea8864018d46319", "357af3dd66a8ee994f17c890422fda1b618586d3", "78e2d6b7a671d8e53f207adff088833fd7606e13", "557d8b988bca3d0033189723d11102e04c0c67c0", "9d2c1271f1219522d13f150c2b04123bef300dd9", "33bcd8da1f6dc589cde6415434175548fd527ef7", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "108747579aef6bf029623639a86070feaf5cad41", "03a9f96a5e95587ab319fb3bddb931ee84fb648d", "56dc0aacffd9dc8e9931daa719f78a69b57cfb48", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "8d69c06d48b618a090dd19185aea7a13def894a5", "b1fa37ec7cf8c76ed30961a86019bb78073f6287", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "a089defc1eea22b4d3afaeccf031ae110d7af459", "6e8cf181b6e4d759f0416665a3a9f62ad37b316c", "0efa9ee4557c8b0cc8f0d329a0dab34c53fd55f2", "9b2c3acc1806ccfdbae67bc0a353692f0ed31091", "b532099ff8b67049f292cd62700dca37fc2be623", "2fc986fd942797c0bcbebf01f464b375f1dd464d", "4c18ad6a3819d0de1d8df2f6ba323b175f985a3c", "60d6ac52ef063d01cea47601e9b9bde1e3148440", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "18b1c62d6c7fa0e619f0c13172d8852b3d5a71fe", "1c4906cff8621cc2f240001975d8d956767060f2", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "2949851ab9827fdd334ecc3b392296df2aacaf92", "9fcb7035836d98d112e011a7e0d93a4ec8c444d7", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "406a37d8ccb6cb1355b7aeded65e50fc00b2977c", "20ef4778cd48f946bfa63ffb18332199fd3f2ad5", "3c09b40d5ef5c5558174423b43cb5699e3c26107", "3047154c1f8f3f1829180ce7e5bd1e4639689339", "345947186f190649c582204776071ac9a62e8d67", "1996b2011357fc54f023df344f50d120388daac4", "03e4f73474351a62abc9abf2fb17ec6277bb064e", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "d124c709294733d4273de63755ae29b3ed7fbb00", "5b566b58184e302e1bd364903010fcc55a226fd3" ], "paperAbstract": "Private communication over the Internet remains a challenging problem. Even if messages are encrypted, it is hard to deliver them without revealing metadata about which pairs of users are communicating. Scalable anonymity systems, such as Tor, are susceptible to traffic analysis attacks that leak metadata. In contrast, the largest-scale systems with metadata privacy require passing all messages through a small number of providers, requiring a high operational cost for each provider and limiting their deployability in practice.\n This paper presents Stadium, a point-to-point messaging system that provides metadata and data privacy while scaling its work efficiently across hundreds of low-cost providers operated by different organizations. Much like Vuvuzela, the current largest-scale metadata-private system, Stadium achieves its provable guarantees through differential privacy and the addition of noisy cover traffic. The key challenge in Stadium is limiting the information revealed from the many observable traffic links of a highly distributed system, without requiring an overwhelming amount of noise. To solve this challenge, Stadium introduces techniques for distributed noise generation and differentially private routing as well as a verifiable parallel mixnet design where the servers collaboratively check that others follow the protocol. We show that Stadium can scale to support 4x more users than Vuvuzela using servers that cost an order of magnitude less to operate than Vuvuzela nodes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132783", "http://people.csail.mit.edu/nickolai/papers/tyagi-stadium.pdf", "https://people.csail.mit.edu/nickolai/papers/tyagi-stadium-eprint.pdf", "http://people.csail.mit.edu/nickolai/papers/tyagi-stadium-eprint.pdf", "http://eprint.iacr.org/2016/943.pdf", "http://eprint.iacr.org/2016/943", "https://www.cs.cornell.edu/~tyagi/papers/stadium.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/016d1541f81655d3c193aafcfb3e9fab64dba2b3", "sources": [ "DBLP" ], "title": "Stadium: A Distributed Metadata-Private Messaging System", "venue": "SOSP", "year": 2016 }, "017424c0bd3f3208e109998c54db1d294022fe80": { "authors": [ { "ids": [ "2650184" ], "name": "Feng Chen" }, { "ids": [ "2787368" ], "name": "Baojian Zhou" }, { "ids": [ "34265351" ], "name": "Adil Alim" }, { "ids": [ "39164176" ], "name": "Liang Zhao" } ], "doi": "10.1109/ICDM.2017.13", "doiUrl": "https://doi.org/10.1109/ICDM.2017.13", "entities": [ "Algorithm", "Coherence (physics)", "Feature selection", "Generic programming", "Heuristic", "Matching pursuit", "Need for Speed: Hot Pursuit", "Network theory", "Sparse matrix", "SuicideGirls", "Time complexity" ], "id": "017424c0bd3f3208e109998c54db1d294022fe80", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "41-50", "journalVolume": "", "outCitations": [ "6dc112431fe6db74149d5885f08f896f60d0b0d8", "95121dc15cc145631137123ec2c0a2d7a82c14f2", "c1bea7d028c68212554b163f90d5d78065b2e6f1", "1f41bb748ef6041631b7e282fc42db0b31d9c8ec", "1c4163fd169b66b671ce0125d356bfe25af9636c", "5e3fd8ae6061d8067ad1cb5b12a22ceac3187852", "378b33222bfce367e7adb849066e7adae5d9a59c", "816d7bff3a6e9a07d2e569d61c7df5dc204176d7", "76bb50343855980bd451bc16ca2d028e7e70fe10", "2fb36ecf864f40e84f2e50a5152107e16a03fb21", "1f142bdabb47238532425ead592cba0537c76b37", "1326a25a6961a25af5fe49ddff20c98b961985c5", "1d6320a672b866444737880cee8a980f5cca6864", "1b348075d02cc532b1a01955e21ba3062e769113", "0a814be5d4918e0e536bb98d7c01a8f693777a6e", "6968d5994081068d3eea1f1e1f81f0482145bb5c", "94f56da3968c15afc2bf0a7e2a738be3876d7f3c", "00568ab7c7ee96bcf1c5d2ceba1471404c7be2b2", "67e57fc602e324a7269370345ce03bd3e38384b5", "b981dc00f4b49f16663f4ce64675db7b9a096606", "3783fd271a4fa5b65894743c0a6b19a02b268120", "02e17c55b4c73929a769e99ff9b542ba35bbe1a3", "4909317c14d7098edb31c2e34c2a812a39a47105", "8d25720bcc12220c0e0cf5a0168de2c0ad6ef6e8", "d1be7b6901ce22847843af4a3874fa1b2cfdfb0c", "025e224cf0f12f772c7efba4f7c6b769a2bf298b", "344dad92401fd4800b85643de323750e1e65117a", "56faf302eb810957b7be3b556539be93e2dc9ff0", "0b476d58458fb1b00a43ea0de4af01fd7655ce90", "b3523fdf49fe6c841d7174f66abdd8161c14c794", "4948b089ac744e41a262aa98e89f655b74f47193", "99ac4b8d3c8790a50a468d8268cff00651cb65b6", "83188372a74c6b93e50f2e54a0bd2a29bd97e64c", "a6693adb1f6e15060396fed9e53189266097ee35", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "f85c2d9d19e181ee6a18cffd753dd478337fce68", "78278b8e06729dbebfa060ed4f40788cb9213ba4", "e0c77c067380cea9e8841cc04a8d3ffd3147db4c" ], "paperAbstract": "Detection of interesting (e.g., coherent or anomalous) clusters has been studied extensively on plain or univariate networks, with various applications. Recently, algorithms have been extended to networks with multiple attributes for each node in the real-world. In a multi-attributed network, often, a cluster of nodes is only interesting for a subset (subspace) of attributes, andthis type of clusters is called subspace clusters. However, in the current literature, few methods are capable of detecting subspace clusters, which involves concurrent feature selection and network cluster detection. These relevant methods are mostly heuristic-driven and customized for specific application scenarios. In this work, we present a generic and theoretical framework for detection of interesting subspace clusters in large multi-attributed networks. Specifically, we propose a subspace graph-structured matching pursuit algorithm, namely, SG-Pursuit, to address a broad class of such problems for different scorefunctions (e.g., coherence or anomalous functions) and topology constraints (e.g., connected subgraphs and dense subgraphs). We prove that our algorithm 1) runs in nearly-linear time on the network size and the total number of attributes and 2) enjoys rigorous guarantees (geometrical convergence rate and tight error bound) analogous to those of the state-of-the-art algorithms for sparse feature selection problems and subgraph detection problems. As a case study, we specialize SG-Pursuit to optimizea number of well-known score functions for two typical tasks, including detection of coherent dense and anomalous connected subspace clusters in real-world networks. Empirical evidence demonstrates that our proposed generic algorithm SG-Pursuit is superior over state-of-the-art methods that are designed specifically for these two tasks.", "pdfUrls": [ "https://arxiv.org/pdf/1709.05246v1.pdf", "http://arxiv.org/abs/1709.05246", "https://dac.cs.vt.edu/wp-content/uploads/2017/11/a-generic-framework.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/017424c0bd3f3208e109998c54db1d294022fe80", "sources": [ "DBLP" ], "title": "A Generic Framework for Interesting Subspace Cluster Detection in Multi-attributed Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "017c91abd8adf741a4e8b06daf93a964fc57a820": { "authors": [ { "ids": [ "24471056" ], "name": "Hyun Wook Baek" }, { "ids": [ "1746708" ], "name": "Abhinav Srivastava" }, { "ids": [ "2358499" ], "name": "Jacobus E. van der Merwe" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "FUJITSU Cloud IaaS Trusted Public S5", "Layer (electronics)", "Service abstraction" ], "id": "017c91abd8adf741a4e8b06daf93a964fc57a820", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "268-273", "journalVolume": "", "outCitations": [ "2dd86a08595948d29568e5de245a0a9b59b28c7b", "0c4c6eacefe36063d2d564184273a32ca815a958", "6b81d4b1ef1c5bfce830e798d230561d5608acd9", "765ee60756583f27322f1316da40696ae72812ac", "daef64fb7bcdf9bc6eb999c91b8699b926edb50b", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "1f367238213ea0fff128fced7e768f19de08ee93", "6fdf88d5463b64fc8fad6881a56c44517348da67", "7189769129ded261fc00b1fc66f4461f7d48c97d", "283ae048d4bd7603bbf7bdae059580079439e1b8", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "10da8673314188dd6ab1f16f73c05358771dd8cf", "7e02c5b0e83cd6bba4c94c458bdb7079e97c36cd", "4e2833f3fc24c37bd416ad59ebc914701d6eedb9", "625150cb2523db4af61281895290b95a946fbea2" ], "paperAbstract": "Troubleshooting in an infrastructure-as-a-Service (IaaS) cloud platform is an inherently difficult task because it is a multi-player as well as multi-layer environment where tenant and provider effectively share administrative duties. To address these concerns, we present our work on CloudSight in which cloud providers allow tenants greater system-wide visibility through a transparency-as-a-service abstraction. We present the design, implementation, and evaluation of CloudSight in the OpenStack cloud platform. We also develop two example applications that make use of the CloudSight abstraction and use the applications to explore real cloud problems.", "pdfUrls": [ "http://www.cs.utah.edu/~baekhw/assets/demo_cloudsight.pdf", "http://dl.acm.org/citation.cfm?id=3101150", "http://www.flux.utah.edu/download?uid=260", "http://www2.cs.utah.edu/~baekhw/cloudsight.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/017c91abd8adf741a4e8b06daf93a964fc57a820", "sources": [ "DBLP" ], "title": "CloudSight: A Tenant-Oriented Transparency Framework for Cross-Layer Cloud Troubleshooting", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "01814d50f0a7699f264c21d986ccd1f390b101b2": { "authors": [ { "ids": [ "1680465" ], "name": "Rui Zhang" }, { "ids": [ "1757885" ], "name": "Natalie Stanley" }, { "ids": [ "38199747" ], "name": "Christopher Griggs" }, { "ids": [ "3388635" ], "name": "Andrew Chi" }, { "ids": [ "2480778" ], "name": "Cynthia Sturton" } ], "doi": "10.1145/3037697.3037734", "doiUrl": "https://doi.org/10.1145/3037697.3037734", "entities": [ "Machine learning", "Open-source software", "OpenRISC 1200", "Toolchain", "Vulnerability (computing)" ], "id": "01814d50f0a7699f264c21d986ccd1f390b101b2", "inCitations": [ "49e88c6bcaea88ddeccd6fb19fee950137819d3e" ], "journalName": "", "journalPages": "541-554", "journalVolume": "", "outCitations": [ "3c008a42e6b9d627c5a8feac0757403dbff959a3", "774d50f55cf9268435a2147e92e025c9309e2947", "b6b07bb0ffd85a090814580de1291ce39182a467", "d3ca4145bd84d983c6732a354bdb4800ae47cea1", "b8719183f3579e6f0bdf2d98ee500097a28cb9cf", "217dd588fc53f4dbe51210145f1b9b2ffe92fbd0", "9f0c016bb12e1567a1d3a460493957ae135a0d40", "6465ad714d44d18e50adb9f69b36c24ad6ba83ce", "551ac1a6b959a911209846c7f9a0c07c69c2bb7b", "457f7e9363c673e54200378119b533115285209d", "216b99ebe093c1b363654baf662f592df305d295", "61d15445ca86bad719ed5d829b984408223c3578", "3a00c02ea51911170a263f4a75959754b7da66e5", "19c3578a68605eb06a6dd41c927d56d12b47af45", "46217f372a75dddc2254fdbc6b9418ba3554e453", "12789fd5b47542937d1b83ef8b99bdb9c7a70dec", "49f8e6bc2679d0a7ec4a1e52d2956ff336211bfb", "889c2c6d08861a078094c724709f8ccac3a86cd1", "7f84a6906b4aa6734e2e5f9b7cb83786e65de637", "a5f62c892c2ff4cb283f66c27b655dd0ae14eca3", "0003ce240eb8c05cee9c56c54e16c0e3b84390dd", "a282bfe3f936372a6de642dc0396b0fd6e44576b", "65b6079988ec29ef3c6d62daf88b0f9e2ceee14c", "34533947ac8274a4757716df754ffe4ff992fb1a", "6481022b05642727f8425bde6a0717a90065dbe7", "0c80eb8588fac0a763a15e1b7a33c6d885ce80a4" ], "paperAbstract": "We present a methodology for identifying security critical properties for use in the dynamic verification of a processor. Such verification has been shown to be an effective way to prevent exploits of vulnerabilities in the processor, given a meaningful set of security properties. We use known processor errata to establish an initial set of security-critical invariants of the processor. We then use machine learning to infer an additional set of invariants that are not tied to any particular, known vulnerability, yet are critical to security.\n We build a tool chain implementing the approach and evaluate it for the open-source OR1200 RISC processor. We find that our tool can identify 19 (86.4%) of the 22 manually crafted security-critical properties from prior work and generates 3 new security properties not covered in prior work.", "pdfUrls": [ "https://cs.unc.edu/~csturton/SCIFinder/files/ASPLOS17Talk.pdf", "http://doi.acm.org/10.1145/3037697.3037734", "http://cs.unc.edu/~csturton/SCIFinder/files/ASPLOS2017.pdf", "http://cs.unc.edu/~csturton/papers/ASPLOS2017Zhang.pdf", "http://cs.unc.edu/~csturton/SCIFinder/files/Lightening-7B-RuiZhang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01814d50f0a7699f264c21d986ccd1f390b101b2", "sources": [ "DBLP" ], "title": "Identifying Security Critical Properties for the Dynamic Verification of a Processor", "venue": "ASPLOS", "year": 2017 }, "01cabaadb1da4f5fa0fba62bca31f7d70a9ab939": { "authors": [ { "ids": [ "35930993" ], "name": "Tian Tan" }, { "ids": [ "30461535" ], "name": "Yue Li" }, { "ids": [ "38726687" ], "name": "Jingling Xue" } ], "doi": "10.1145/3062341.3062360", "doiUrl": "https://doi.org/10.1145/3062341.3062360", "entities": [ "Automaton", "Call graph", "Java", "Pointer analysis" ], "id": "01cabaadb1da4f5fa0fba62bca31f7d70a9ab939", "inCitations": [ "ce992c5be70243c83a5faaeea3f314ebd36302a9", "4defad6c060c69d346f54b8912a2dbe3a8efd79a", "cf4b581f3622fc177bdcc8d1c936034e531b09c0", "ba7e09e838fe013b8d8789e4f0313133752b0b6e", "fb6bf892d3373df6c7d7fea5af40f0a61788b1ed", "830f6be24ab13dcbc4154bd52469fbb85ff25f0e" ], "journalName": "", "journalPages": "278-291", "journalVolume": "", "outCitations": [ "a38f20ccaf6369feadb2341109f1857848adfe8b", "041fb17a8de187528529990e43f14280d420002f", "fc1de916fa550c35e57ae8ccbc3874f509db0ca7", "0a980373963ba017fc320148ac4e1bf1259d4ef5", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "5a2ec85f87d518ff6b6f57aeac1c43bdb35729c8", "0e7f5980e4083c12011be3783bd23e788e6b2ad2", "5abd03095061c25ab7c4fab6b33a6ceb999c78e3", "75e47286fe208102a63775c1e05dc61a5da607d3", "9ea49abc003a832776df864a92838b3b51f3e55e", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "3db4291a1a629876516bb06ae798a98475fb0148", "87ad5767017a8487196257b4ae93a52765f429bf", "3e6d92ed139f19418c74cce6697fbd2de609138e", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "85ff7e25c39d216bd50ac6eb89e335ca7aef43f4", "e8af823e0b25acfc8e41b59805e17d9d1b126990", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "068e9f8dd77ecfcc2019fdf3123d163b159fe4eb", "a578530c785b14f54918720ee4acb672ffe3986e", "1f32cece629d41929e6913f3b445b93bf2c168ac", "22999e5ace5fe1ff41bcea18e997a48d1af108e4", "17f58c906c6f453fc10b1d7e4db0e545b70e27d1", "30f7824cce02499632d2b04f154bbe70d6ce3118", "8bfd64fe8f9192a8b3c801c7d91fd46cabfc5319", "187768583aa8fd7dfe64cc88cb2aa831b6b531db", "724f15daaf81ef1cd7a9419bc69f59bba19cbe88", "80a36a56472c4929ea9daf59516f4502320d4764", "75724ed033e8d7978feb14d5e78fb7fa58bb5ad0", "153d144f411f7054b0c4bbd6b829a3d8c2b2df31", "1753d3e97fdbe7799b9625cb873b77eef506a608", "44daa1fde25be30d21c4a1a32b7af314c9890af8", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "0ab515c25b8cd689fce64c5c52f8fddb10abba52", "107ef0cf32ea69f8dd3e8939ac6829524726c2e8", "3803066162073c027179103dd18de3e9ae378d45", "042396ba29d59a083366154c29aab7a28dccac37", "5cb216302bdebaec708f705f83b317eeccf73753", "5e567cda5999a6dd4e5da4bb30b9033f8d5687c4", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "31181e73befea410e25de462eccd0e74ba8fea0b", "5aed9231774c7742431d79c22de749c79f7e56e2", "3597bcdb6f9eb154abb80c15368d67ef169bfacf", "03aacfe8d36a673ecc379d3b76e7df1245a8d9e5" ], "paperAbstract": "Mainstream points-to analysis techniques for object-oriented languages rely predominantly on the allocation-site abstraction to model heap objects. We present MAHJONG, a novel heap abstraction that is specifically developed to address the needs of an important class of type-dependent clients, such as call graph construction, devirtualization and may-fail casting. By merging equivalent automata representing type-consistent objects that are created by the allocation-site abstraction, MAHJONG enables an allocation-site-based points-to analysis to run significantly faster while achieving nearly the same precision for type-dependent clients. \n MAHJONG is simple conceptually, efficient, and drops easily on any allocation-site-based points-to analysis. We demonstrate its effectiveness by discussing some insights on why it is a better alternative of the allocation-site abstraction for type-dependent clients and evaluating it extensively on 12 large real-world Java programs with five context-sensitive points-to analyses and three widely used type-dependent clients. MAHJONG is expected to provide significant benefits for many program analyses where call graphs are required.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062360", "http://www.cse.unsw.edu.au/~tiantan/papers/pldi2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01cabaadb1da4f5fa0fba62bca31f7d70a9ab939", "sources": [ "DBLP" ], "title": "Efficient and precise points-to analysis: modeling the heap by merging equivalent automata", "venue": "PLDI", "year": 2017 }, "01d83aa4653f1eefff1ea0f017b30423ff4f818f": { "authors": [ { "ids": [ "2861707" ], "name": "Hamid Reza Faragardi" }, { "ids": [ "2905669" ], "name": "Hossein Fotouhi" }, { "ids": [ "2191832" ], "name": "Thomas Nolte" }, { "ids": [ "2627161" ], "name": "Rahim Rahmani" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.77", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.77", "entities": [ "Algorithm", "Benchmark (computing)", "Experiment", "GRASP", "Greedy randomized adaptive search procedure", "Internet of things", "Limiter", "Max", "Reconfigurability", "Requirement", "Routing", "Sensor", "Sensor node", "Software deployment", "Software-defined networking" ], "id": "01d83aa4653f1eefff1ea0f017b30423ff4f818f", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "594-602", "journalVolume": "", "outCitations": [ "72a5b2cd900321fe6e0f826dc0e80443971b920e", "56637368f9e82fdd3fecb2a534b22c799320cfa5", "19a3606af56b6d2d9a8fc4e922a47550275ec24f", "0de1204aacb9382651aa569d141820d35a1290f3", "57ec950e812cbd6b3e00846bb32d2d6a806bd8b6", "428940fa3f81f8d415c26661de797a77d8af4d43", "3965bf4d76c1c989781a14a39252d05254822554", "b0ddeebce9c975a1c1a8576362686ca45eb31316", "7f66746a864d531abdf13f77ce2826f923d76537", "7e6a98281b69809a6dad94cafdda20422d1f08a5", "1154502149a4a00f2d8403e4aabff62e904d7b6b", "6a6bb873d4131120c5b4f72251426d94e12a72f4", "8abf04223c6162d07d3d02be48478aa7e5aa82bb", "6159c68bc6202d99c64a4a9330b1738b50dc4a02", "bb792c7de9abb8bf96f4c9e499799e797c245a96", "fdb53266b101ad1b4d106720f5ff98fa8df9acf5", "a66f79d3e5b9667392bde0d4aebe5b5db1040dba", "a4955a9a10031cbbf1caa5986d08a00cc022c571" ], "paperAbstract": "Internet of Things (IoT), one of the key elements of a smart factory, is dubbed as Industrial IoT (IIoT). Software defined networking is a technique that benefits network management in IIoT applications by providing network reconfigurability. In this way, controllers are integrated within the network to advertise routing rules dynamically based on network and link changes. We consider controllers within Wireless Sensor Networks (WSNs) for IIoT applications in such a way to provide reliability and timeliness. Network reliability is addressed for the case of node failure by considering multiple sinks and multiple controllers. Real-time requirements are implicitly applied by limiting the number of hops (maximum path-length) between sensors and sinks/controllers, and by confining the maximum workload on each sink/controller. Deployment planning of sinks should ensure that when a sink or controller fails, the network is still connected. In this paper, we target the challenge of placement of multiple sinks and controllers, while ensuring that each sensor node is covered by multiple sinks (k sinks) and multiple controllers (k′ controllers). We evaluate the proposed algorithm using the benchmark GRASP-MSP through extensive experiments, and show that our approach outperforms the benchmark by lowering the total deployment cost by up to 24%. The reduction of the total deployment cost is fulfilled not only as the result of decreasing the number of required sinks and controllers but also selecting cost-effective sinks/controllers among all candidate sinks/controllers.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.77" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01d83aa4653f1eefff1ea0f017b30423ff4f818f", "sources": [ "DBLP" ], "title": "A Cost Efficient Design of a Multi-sink Multi-controller WSN in a Smart Factory", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "01d8f75b6382c7534a67637249122de28a780ce9": { "authors": [ { "ids": [ "38566150" ], "name": "Michael Wei" }, { "ids": [ "37531541" ], "name": "Amy Tai" }, { "ids": [ "1692790" ], "name": "Christopher J. Rossbach" }, { "ids": [ "1804661" ], "name": "Ittai Abraham" }, { "ids": [ "9773986" ], "name": "Maithem Munshed" }, { "ids": [ "35081828" ], "name": "Medhavi Dhawan" }, { "ids": [ "5170084" ], "name": "Jim Stabile" }, { "ids": [ "1753945" ], "name": "Udi Wieder" }, { "ids": [ "21841246" ], "name": "Scott Fritchie" }, { "ids": [ "1760342" ], "name": "Steven Swanson" }, { "ids": [ "3122063" ], "name": "Michael J. Freedman" }, { "ids": [ "1767467" ], "name": "Dahlia Malkhi" } ], "doi": "", "doiUrl": "", "entities": [ "Data store", "Finite-state machine", "NoSQL", "State machine replication", "Strong consistency" ], "id": "01d8f75b6382c7534a67637249122de28a780ce9", "inCitations": [ "340d6db56d94623ac090599cf9ea5287370607ef", "33f95f238e12e1790ad880ec40cf6c63ea4a70dc", "797dad570f82414592a87ed7ebdce44f9801e8df" ], "journalName": "", "journalPages": "35-49", "journalVolume": "", "outCitations": [ "1a133e61010294b0cd77fa851dfeea7292e49439", "e2c6297a9ad5118dc4a6a0dab6a2af2b83545e3d", "e4c1d1ad684535bf835475aafb8fcfe5d23b0a93", "206b20f225fc655dfac733b6f0bd8077ed86215e", "039f09d49bc408db9e0e8429e6bd92be49c5f72e", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "29a05cde1994548e2e9487822248c679626c6241", "d12d1289d2384c2ce642f01855637b9f0519e189", "9748241beb02ef1e2d0e6dc877c04b354033a838", "088e3e939ad234b6fdd0e321290fb26937dc2553", "5ea7103a1c39de9f96fefe5b02fd9306ae439c9f", "517e239f97f50079bc557cccf1a6b56aa5736d30", "a1c5e3904aa14e42e9ffa6f5903229245f1fa067", "9aa0d7253574e50fe3a190ccd924433f048997dd", "7062268b78dff4a8819fe3f1e89c6b5344f715a5", "49af572ef8f7ea89db06d5e7b66e9369c22d7607", "13f7c5807452ae602046582a385c0fb544ec5de1", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "be1815b0102d79b62a14ae39867e3ecc8146cfe9", "7afa08d7c1c6c8758ee1227437c69463d5441d09", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "11d23a3e7b03ef9679bb4cd47c631118f56f67e3", "5ba9e730afd256ed1138fb563e59c214c6ec9259", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "068e59b88a1230d709d99c83a45d3a5b91260810", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "13d6c568c770ff5a070072e720fb34b0037cdab8", "9f948448e7a5f0cc94cd53656410face8b31b18a", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "3784fcc2a0789ee1f0d26a34822b8138895e3340", "07d847f310d5fa9138f461f0a25c5e0024f1c4af" ], "paperAbstract": "This paper presents vCorfu, a strongly consistent cloudscale object store built over a shared log. vCorfu augments the traditional replication scheme of a shared log to provide fast reads and leverages a new technique, composable state machine replication, to compose large state machines from smaller ones, enabling the use of state machine replication to be used to efficiently in huge data stores. We show that vCorfu outperforms Cassandra, a popular state-of-the art NOSQL stores while providing strong consistency (opacity, read-own-writes), efficient transactions, and global snapshots at cloud scale.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-wei-michael.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/wei-michael", "http://www.cs.princeton.edu/~mfreed/docs/vcorfu-nsdi17.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-wei-michael.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0247/2fb494ab2e19aa0756bed34b9c6ea73baf4b.pdf", "s2Url": "https://semanticscholar.org/paper/01d8f75b6382c7534a67637249122de28a780ce9", "sources": [ "DBLP" ], "title": "vCorfu: A Cloud-Scale Object Store on a Shared Log", "venue": "NSDI", "year": 2017 }, "01de7e8a5ec0c5de8c34ea2fc91d82b9db1c2715": { "authors": [ { "ids": [ "1720537" ], "name": "Slawomir Hanczewski" }, { "ids": [ "6589337" ], "name": "Maciej Stasiak" }, { "ids": [ "2310025" ], "name": "Joanna Weissenberg" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.40", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.40", "entities": [ "FIFO (computing and electronics)", "Queueing theory" ], "id": "01de7e8a5ec0c5de8c34ea2fc91d82b9db1c2715", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "303-308", "journalVolume": "", "outCitations": [ "198b7c85c45bffba2801e9cf874196dbc9337ebb", "d25c5750dcd469c4b3e51e6009b0b0d3618be883", "01eda83bebb9f9d152ff313c5a8e33acef259300", "61a8ee5e0efb32d91b6d3fb462f772f88ec346a4", "79e70288eb694f59b89d6a3d8427f9aac12ef6eb", "235aaa6eeb29aded47741abf50a4a150d1fb6d34", "ed9a7ccd585f3cd6d4c21c4189f912c34bdbbff3", "842c69d58a95392dfb1ba4d4e72da5ac43fa0f7a", "bb19cbdc3bf25c00811d80e4268a730f08e775d5", "e8a82f419e80b94e033517dff5c9a9df8875333b", "06583b13ce4e59a72e8c7efc32fc818f906583a4", "50768e5420c733ca07ab6b696dfcf504d61d6c18", "03003cf918171f6c8f4ed42d7903099cccd1dac6", "fa1c3643b60551314f93ab7bd1d1aec16db4b69a", "4e3dda4201f8dc26b5ef66efe28e9eb624a91c18", "27f98925d8eb3447824b7eeea9258dfbdf877925", "c48e489147cb15b486addfdc9013092f4205caf8" ], "paperAbstract": "This paper presents a model of a queueing system with multi-service adaptive traffic. This type of low-latency traffic is one of the most dominant type of traffic in modern packet networks. The idea and operation of the model is discussed with the example of a queueing system with the FIFO service discipline.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01de7e8a5ec0c5de8c34ea2fc91d82b9db1c2715", "sources": [ "DBLP" ], "title": "The Model of the Queuing System with Adaptive Traffic", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "01e1cba36e403c5c620d49e3eda73b29fcfc616b": { "authors": [ { "ids": [ "1893193" ], "name": "Aws Albarghouthi" }, { "ids": [ "1806462" ], "name": "Loris D'Antoni" }, { "ids": [ "31638968" ], "name": "Samuel Drews" }, { "ids": [ "34894873" ], "name": "Aditya V. Nori" } ], "doi": "10.1145/3133904", "doiUrl": "https://doi.org/10.1145/3133904", "entities": [ "Fairness measure", "Imperative programming", "Model checking", "Program analysis" ], "id": "01e1cba36e403c5c620d49e3eda73b29fcfc616b", "inCitations": [ "3f9a733ff17f080a15eab97dd26697652363d933" ], "journalName": "PACMPL", "journalPages": "80:1-80:30", "journalVolume": "1", "outCitations": [ "1459e3e4242c2590c6875976de4859ea2a58bf6c", "2f3edee1d3459096ba1de54450fca4d8406d1ed1", "d80c777a0bbb948c58059d6862aaa28203d68551", "7cecdf2eee3d4ca7191730ea923a24d8d52acc68", "ce77c7ed66ea25d51ee09839ed6d38ab5f815857", "a92d721f22001ecbca87655c5963f048f7d5e013", "24799da8a19cc41225024310ce9a9655a548516c", "1c406afc440c357764a4e686f571f52becaaed80", "4087b4bd8841691b84aa546f16129a49ae2b5c20", "9809537f4fa24532f45a6f94fd5a41a46217cd46", "20dbc7f13253f47f31a2d86217fa38a1fcb03a21", "c3eba5fcba83f9637e83c1ad8be15944f22b15c1", "a6d2d771e8d6dd16a7da3479dbd0619f2400438c", "15581e7eca3f1cfe02b148fe307e55f87eafbfc4", "4556f3f9463166aa3e27b2bec798c0ca7316bd65", "59684cf4f60456f5eea2991a0d7f90095f37a657", "a538b05ebb01a40323997629e171c91aa28b8e2f", "153f586c3b4f3047900f9f5b5ddf61a37309d698", "89a193c4e9f80122c8b7ae083db4749c65e600fe", "204dc0986b512a95a66632556d10c3c162caf7b7", "04282d68a4c3cbeaa5adbc9a62d2a756bdf679f6", "73062e44e8a4b3d80c0a98e009c9604dc90d3911", "4adc02e7c7e265bed0d4ba374ac513181bb043b0", "d000c039374bf46905720d5b140f65761b00f51f", "2bb7e029b50af446a494cccfb32c482e66fe2365", "164347c68dc8f901b7bd56890f3beefe0a407111", "6d6809d45bbfba2d5cd086bda6cc2efb34cb79ff", "2c536e9401daeede3408f9f614a10b16d2ea77ac", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "0b384a6e32db24b4d217b71d5fa18c38f7332788", "0da7cc27b817a702148b0f0f23aa5f0ad626b502" ], "paperAbstract": "With the range and sensitivity of algorithmic decisions expanding at a break-neck speed, it is imperative that we aggressively investigate fairness and bias in decision-making programs. First, we show that a number of recently proposed formal definitions of fairness can be encoded as probabilistic program properties. Second, with the goal of enabling rigorous reasoning about fairness, we design a novel technique for verifying probabilistic properties that admits a wide class of decision-making programs. Third, we present FairSquare, the first verification tool for automatically certifying that a program meets a given fairness property. We evaluate FairSquare on a range of decision-making programs. Our evaluation demonstrates FairSquare’s ability to verify fairness for a range of different programs, which we show are out-of-reach for state-of-the-art program analysis techniques.", "pdfUrls": [ "http://pages.cs.wisc.edu/~aws/papers/oopsla17.pdf", "http://pages.cs.wisc.edu/~sdrews/slides/oopsla17slides.pdf", "http://pages.cs.wisc.edu/~sdrews/papers/oopsla17.pdf", "http://pages.cs.wisc.edu/~loris/papers/oopsla17.pdf", "http://doi.acm.org/10.1145/3133904" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01e1cba36e403c5c620d49e3eda73b29fcfc616b", "sources": [ "DBLP" ], "title": "FairSquare: probabilistic verification of program fairness", "venue": "PACMPL", "year": 2017 }, "01f42436042ddaa48998c87109cbe46cad6e7e52": { "authors": [ { "ids": [ "3315868" ], "name": "Prathmesh Kallurkar" }, { "ids": [ "2550384" ], "name": "Smruti R. Sarangi" } ], "doi": "10.1145/3123939.3123984", "doiUrl": "https://doi.org/10.1145/3123939.3123984", "entities": [ "Bloom filter", "Cache (computing)", "Database server", "Forth", "Interrupt handler", "Kilobyte", "Network switch", "Operating system", "Scheduling (computing)", "System call", "Web server", "Windows Task Scheduler" ], "id": "01f42436042ddaa48998c87109cbe46cad6e7e52", "inCitations": [ "622a11843f129452d0c9daeb87a076d02fd2a0f0" ], "journalName": "", "journalPages": "612-624", "journalVolume": "", "outCitations": [ "1106d3a383899c13c9a63f293ad78c02631ee5ce", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "2960c89331eb7afa86584792e2e11dbf6a125820", "7ff303e7c450aee82b6fff5cc64be54e5604da01", "0e2ee93bb53d93684d5276a07a582c574770ab53", "48536fdbbc79ddf163901c7e63bb70b6f64802e0", "298cc4031c95a634371fa9cfc4fe2f09e579493d", "5fda732aae5f0d845c8ff2e72f144f3d69e362d9", "10fede77f843e9eb5ef1768a17543013616d9243", "158ebe313a72857c5534a313f3ec0e413593b732", "b8bfce11df38955685c09f408ca3f7828af2a0c1", "109df0e8e5969ddf01e073143e83599228a1163f", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "907e4972815c0fcd484d335a9c3fd4cccc9a081e", "7932a4597cec5149c575aa2303fe8f12241e4320", "29c324788b83463aa707784210edbca894694f20", "fae207eff574ee2994bc70437954ccb0b139ec7b", "0852a44c86db434e9b51c67704636791e9940487", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "9acfb20d3e1ef5ad6a3c0361d88f6839fec99fac", "286b5b80bc76dbb63094a85951bb8e8895ee9f14", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "2612541a89857949bc512b6fb2ad7f0c153cb97c", "00a739b660486ec6468ea53f15fddc84df8b6631", "43644b8cd34a759e5cda4953c57dba0bb3e25805", "16aecf8b1a0a97fac8681741febe434be2dc0b28", "8cfa975a656838356dc4b211b6c2186bc2601a05", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "44135fd9b5e38cf29e41d675f9eac670455ed860", "0beb2535d25abcc2101ca7dd5502f610e2a553ab" ], "paperAbstract": "The execution of workloads such as web servers and database servers typically switches back and forth between different tasks such as user applications, system call handlers, and interrupt handlers. The combined size of the instruction footprints of such tasks typically exceeds that of the i-cache (16--32 KB). This causes a lot of i-cache misses and thereby reduces the application's performance. Hence, we propose SchedTask, a hardware-assisted task scheduler that improves the performance of such workloads by executing tasks with similar instruction footprints on the same core. We start by decomposing the combined execution of the OS and the applications into sequences of instructions called SuperFunctions. We propose a scheme to determine the amount of overlap between the instruction footprints of different SuperFunctions by using Bloom filters. We then use a hierarchical scheduler to execute SuperFunctions with similar instruction footprints on the same core. For a suite of 8 popular OS-intensive workloads, we report an increase in the application's performance of up to 29 percentage points (mean: 11.4 percentage points) over state of the art scheduling techniques.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123984", "http://www.cse.iitd.ernet.in/~srsarangi/files/papers/schedtask.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01f42436042ddaa48998c87109cbe46cad6e7e52", "sources": [ "DBLP" ], "title": "Schedtask: a hardware-assisted task scheduler", "venue": "MICRO", "year": 2017 }, "01f660b239de2a7d250315053170ee792a91f4b5": { "authors": [ { "ids": [ "2026805" ], "name": "Robert V. Lim" }, { "ids": [ "1763308" ], "name": "Boyana Norris" }, { "ids": [ "1687994" ], "name": "Allen D. Malony" } ], "doi": "10.1109/ICPP.2017.61", "doiUrl": "https://doi.org/10.1109/ICPP.2017.61", "entities": [ "Auto-Tune", "CUDA", "Compiler", "Experiment", "Graphics processing unit", "Programmer", "Static program analysis" ], "id": "01f660b239de2a7d250315053170ee792a91f4b5", "inCitations": [ "e2bbb648b6312fafce1cfcc72edd90a56ac7ab24" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "523-532", "journalVolume": "", "outCitations": [ "478ca7603036efcf3b6a02a6540d6a84351ef23d", "10d3e0f0648d0a5cfaebb3044ea7b14a52e54466", "3020f7f8381227c90ac58466ec116f470d0b63ec", "05d64be0e237c447ebee3ede50106ee4177d6daa", "1e375b7bd9b02336371dbbb06bee4a94b2a93fc8", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "8c0b0e80514f70b4eef3a274315168c7a5a66335", "3e5c959b0371e95efb45f7e375801ddba23aa7bb", "12f9926c247e27f136efdbff0c76f36af75a9291", "04d54c2219b750371eb4c2f234c6069c1b40971a", "c4ec5dc7d68d858e141113feca9921c632b3b2d5", "38dad8ef8d98aa81c9072c905ce851d33916bfca", "1ac19f434c742202451da7c44591c52ad3f9e9fd", "035c542402de661b544603d84b7ec45bada14e7f", "17056314e26434c4e71cf8f30da8926bb858651f", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7" ], "paperAbstract": "Optimizing the performance of GPU kernels is challenging for both human programmers and code generators. For example, CUDA programmers must set thread and block parameters for a kernel, but might not have the intuition to make a good choice. Similarly, compilers can generate working code, but may miss tuning opportunities by not targeting GPU models or performing code transformations. Although empirical autotuning addresses some of these challenges, it requires extensive experimentation and search for optimal code variants. This research presents an approach for tuning CUDA kernels based on static analysis that considers fine-grained code structure and the specific GPU architecture features. Notably, our approach does not require any program runs in order to discover near-optimal parameter settings. We demonstrate the applicability of our approach in enabling code autotuners such as Orio to produce competitive code variants comparable with empirical-based methods, without the high cost of experiments.", "pdfUrls": [ "http://arxiv.org/abs/1701.08547", "https://arxiv.org/pdf/1701.08547v1.pdf", "https://arxiv.org/pdf/1701.08547v2.pdf", "https://arxiv.org/pdf/1701.08547v3.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.61" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/01f660b239de2a7d250315053170ee792a91f4b5", "sources": [ "DBLP" ], "title": "Autotuning GPU Kernels via Static and Predictive Analysis", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "0201403268e3aaa552165dfc53a86534151c3dd9": { "authors": [ { "ids": [ "2143124" ], "name": "Sajal Dash" }, { "ids": [ "3447404" ], "name": "Anshuman Verma" }, { "ids": [ "1796013" ], "name": "Chris North" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.2", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.2", "entities": [ "Algorithm", "Approximation algorithm", "Data pre-processing", "Dimensionality reduction", "GLIMMER", "Interactive visualization", "Multidimensional scaling", "Real-time data" ], "id": "0201403268e3aaa552165dfc53a86534151c3dd9", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "10-17", "journalVolume": "", "outCitations": [ "4f7cfc54caa5db27347f7278d1482de8e761cfc0", "e47ec3e910c2ba7a33ed1188e88245e54e2b0e1c", "2b5d8135750899518322491ffd60f12a4789681d", "755e4ad5468747b31b9d6994885b17ad957dc9d7", "a75dfa6a77e42f77836e771a679e5902fb43edea", "00c4db9c5b8b8bb2285a4649de75ba3580cd0e35", "54c5239b7a293fd4882f043978027a8676a16a26", "520a10cd9bc944ad5ab14bff46578251ac40828d", "04d0daf15e6bd3b8b20d96513698d327584ace04", "47b48ec0877ee09f7c30678dfc128d4c5504db74", "594d2e123ecb8ec0bc781aec467007d65ab5464d" ], "paperAbstract": "Projecting a high-dimensional dataset onto a lower dimensional space can improve the efficiency of knowledge discovery and facilitate real-time data analysis. One technique for dimension reduction, weighted multi-dimensional scaling (WMDS), approximately preserves pairwise weighted distances during the transformation; but its O(f(n)d) algorithm impedes real-time performance on large datasets. Thus, we present CLARET, our fast and portable parallel WMDS tool that combines algorithmic concepts adapted and extended from the stochastic force-based MDS (SF-MDS) and Glimmer. To further improve Claret's performance for real-time data analysis, we propose a preprocessing step that computes approximate weighted Euclidean distances by combining a novel data mapping called stretching and Johnson Lindestrauss' lemma in O(log d) time in place of the original O(d) time. This preprocessing step reduces the complexity of WMDS from O(f(n)d) to O(f(n) log d), which for large d is a significant computational gain. Finally, we present a case study of Claret by integrating it into an interactive visualization tool called V2PI to facilitate real-time analytics. To ensure the quality of the projections, we propose a geometric shape matching-based alignment process and a quality metric.", "pdfUrls": [ "http://synergy.cs.vt.edu/pubs/papers/dash-claret-hpcc17.pdf", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.2" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0201403268e3aaa552165dfc53a86534151c3dd9", "sources": [ "DBLP" ], "title": "Portable Parallel Design of Weighted Multi-Dimensional Scaling for Real-Time Data Analysis", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "020af9e8d35b7f6ca563397a8e82778dfa7dac7b": { "authors": [ { "ids": [ "1706290" ], "name": "Wen Xu" }, { "ids": [ "1909974" ], "name": "Sanidhya Kashyap" }, { "ids": [ "7761504" ], "name": "Changwoo Min" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" } ], "doi": "10.1145/3133956.3134046", "doiUrl": "https://doi.org/10.1145/3133956.3134046", "entities": [ "Cloud computing", "Data center", "Design pattern", "Fork (system call)", "Iteration", "Multi-core processor", "Operating system", "Run time (program lifecycle phase)", "Scalability", "Software bug", "Software design pattern", "Software testing", "System call", "Test suite", "Throughput", "Vulnerability (computing)", "american fuzzy lop" ], "id": "020af9e8d35b7f6ca563397a8e82778dfa7dac7b", "inCitations": [], "journalName": "", "journalPages": "2313-2328", "journalVolume": "", "outCitations": [ "3cae67dde8b20aa58ebd12def02c7fa8ad844de4", "6f9058b5175aee958e330527aeb55074702dbfd4", "117025a430aaa984dd260bea97531da221b634a4", "274e7e576534b3e091f09e801cce807f5fd221c1", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "0a0bf9e017e05d58b85e793e58148d2946259a74", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "08832863bc3f041222f381c8ae143f8a66449059", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "abf1157c2043274a8d580151db1d4ef5be2c892e", "5cfc936d12bbd8a0f100687b12b20e406215f30a", "34e8a2787d737b050afff384ff0befd31c95e3a9", "1d5ca5dda6526012738276f3e58cd752a30b4652", "158ebe313a72857c5534a313f3ec0e413593b732", "5556995fb630c47805bbba560287ea59ce357fa1", "42685a7f175b44c3365d20f41853e18c7998e2b7", "36800d797c927b1be9437a789eaa30e90d0b7c87" ], "paperAbstract": "Fuzzing is a software testing technique that finds bugs by repeatedly injecting mutated inputs to a target program. Known to be a highly practical approach, fuzzing is gaining more popularity than ever before. Current research on fuzzing has focused on producing an input that is more likely to trigger a vulnerability.\n In this paper, we tackle another way to improve the performance of fuzzing, which is to shorten the execution time of each iteration. We observe that AFL, a state-of-the-art fuzzer, slows down by 24x because of file system contention and the scalability of fork() system call when it runs on 120 cores in parallel. Other fuzzers are expected to suffer from the same scalability bottlenecks in that they follow a similar design pattern. To improve the fuzzing performance, we design and implement three new operating primitives specialized for fuzzing that solve these performance bottlenecks and achieve scalable performance on multi-core machines. Our experiment shows that the proposed primitives speed up AFL and LibFuzzer by 6.1 to 28.9x and 1.1 to 735.7x, respectively, on the overall number of executions per second when targeting Google's fuzzer test suite with 120 cores. In addition, the primitives improve AFL's throughput up to 7.7x with 30 cores, which is a more common setting in data centers. Our fuzzer-agnostic primitives can be easily applied to any fuzzer with fundamental performance improvement and directly benefit large-scale fuzzing and cloud-based fuzzing services.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/designing_new_operating_primitives_to_improve_fuzzing_performance_vt.pdf", "http://doi.acm.org/10.1145/3133956.3134046", "https://taesoo.kim/pubs/2017/xu:os-fuzz.pdf", "https://taesoo.kim/pubs/2017/xu:os-fuzz-slides.pdf", "http://iisp.gatech.edu/sites/default/files/images/wen-ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/020af9e8d35b7f6ca563397a8e82778dfa7dac7b", "sources": [ "DBLP" ], "title": "Designing New Operating Primitives to Improve Fuzzing Performance", "venue": "CCS", "year": 2017 }, "022086e6eebbc7bd7210a5f1217577f384f343a7": { "authors": [ { "ids": [ "39483735" ], "name": "Renqin Cai" }, { "ids": [ "1735804" ], "name": "Chi Wang" }, { "ids": [ "31825390" ], "name": "Hongning Wang" } ], "doi": "10.1145/3077136.3080781", "doiUrl": "https://doi.org/10.1145/3077136.3080781", "entities": [ "Document", "Multinomial logistic regression", "Speech repetition", "Text corpus", "Topic model", "User-generated content" ], "id": "022086e6eebbc7bd7210a5f1217577f384f343a7", "inCitations": [], "journalName": "", "journalPages": "365-374", "journalVolume": "", "outCitations": [ "71aaf59727081880d2833fb76d8f862048ce75dc", "12082a08377b7051360ef8be5a788adb2c024e98", "51fec0515b11cabc2be4832eab43ca5f6ff387d1", "ff02973613c2339f2dfcc95fe3c41cc72f0ca377", "9bf021126f96dbbf2c3968765003aaba0c826144", "339817cf189cae4e91b39fb0c3284a72d5e81198", "952c44bc56e54f64d0fe8247a3a2bd11c2188c61", "87d907a114409755ecd3c6886585de26a4e17ffe", "d15c57cb30c38da115e7ca31f2c6e3e5f1815ce0", "a0628fc3e3324ec8906b09154cfff8b6b664fa7d", "263f103fd2bfbbd6aeb392c6519d3f590e647c0a", "dc6b0e6949f806d69f63445985ee8a7a1c551fff", "a5dc3e018bc45fc707a56ed1bacc08d3cd648b0e", "0ef311acf523d4d0e2cc5f747a6508af2c89c5f7", "ef738810711edea9e2aa0aedb9c2eb1470661bf5", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "479effb344518acdc0fc301af393a75cd8bec40b", "3d74d241af53d85cbc74f73a785fddd675f0e644", "3dffe882c5447a5b34b25c4892e20bd21a3637da", "40b7f6cdfbafb9586f77f3796c5343346bd4cac9", "4ae54170d96423730f12f2f3d30a8820e7250d5c", "01f3290d6f3dee5978a53d9d2362f44daebc4008", "495548a800509c65a7bf54b2ddde0f8e44ca84d4", "5c6de157e19b49ff007f24c04c1f24d91addb6ba" ], "paperAbstract": "One important way for people to make their voice heard is to comment on the articles they have read online, such as news reports and each other's posts. The user-generated comments together with the commented documents form a unique correspondence structure. Properly modeling the dependency in such data is thus vital for one to obtain accurate insight of people's opinions and attention.\n In this work, we develop a Commented Correspondence Topic Model to model correspondence in commented text data. We focus on two levels of correspondence. First, to capture topic-level correspondence, we treat the topic assignments in commented documents as the prior to their comments' topic proportions. This captures the thematic dependency between commented documents and their comments. Second, to capture word-level correspondence, we utilize the Dirichlet compound multinomial distribution to model topics. This captures the word repetition patterns within the commented data. By integrating these two aspects, our model demonstrated encouraging performance in capturing the correspondence sturcture, which provides improved results in modeling user-generated content, spam comment detection, and sentence-based comment retrieval compared with state-of-the-art topic model solutions for correspondence modeling.", "pdfUrls": [ "http://www.cs.virginia.edu/~hw5x/paper/p365-cai.pdf", "http://doi.acm.org/10.1145/3077136.3080781", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/05/SIGIR17CCTM.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/022086e6eebbc7bd7210a5f1217577f384f343a7", "sources": [ "DBLP" ], "title": "Accounting for the Correspondence in Commented Data", "venue": "SIGIR", "year": 2017 }, "022c3fa7794ba53e2d275a7e985f48cd0ee0ed7e": { "authors": [ { "ids": [ "1687456" ], "name": "Jan Hidders" }, { "ids": [ "1717880" ], "name": "Jan Paredaens" }, { "ids": [ "1713880" ], "name": "Jan Van den Bussche" } ], "doi": "10.1145/3034786.3056106", "doiUrl": "https://doi.org/10.1145/3034786.3056106", "entities": [ "Datalog", "Deep learning", "JSON", "Logical framework", "Recursion", "Set packing", "Tree structure" ], "id": "022c3fa7794ba53e2d275a7e985f48cd0ee0ed7e", "inCitations": [ "9312e5efa0dcef1445d45a41771f12e2a8dc6715", "239869c5679418fe6f35eac3cff5c64dc6fc8c57" ], "journalName": "", "journalPages": "137-149", "journalVolume": "", "outCitations": [ "879fdd5dd812357b029c595358d5eb2757bea179", "17ee04f6e12e12509a39d203dbb43aa8e83bc526", "79b3f8183223aea85dd28a2beb6599252f063f64", "5465b7b7bf99660cc3a79af3502d84c3f9d8da1a", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "8f3d2d6a42785a9d7dc03cba48474bfb623d97e3", "1526b9c49baf1c7d50a2820f7306f655e81fcfb9", "610aaf556dd3504496cbec0b8d59c0eff19b5ccd", "c80a30f0312038c1c1aa238c6662d06f036fae05", "bec5e2f2f1cda3dd1cb89622b7f00f8cba070dd0", "8547d1c310b67b0a412de45942c1bae2b4645d22", "0442845100020f8d2e3970a3180ce918908d57ae", "407748e97d8d3878535f6371ad324708915bf6d9", "ceab83f2eed3c6dc3f2771db782e023303cb4141", "bbfd6c5365cf74c953b2c2b451c4798b7618f3c4", "0503d5a2d98320c3741b6afedd2cb1e048e6a018", "7f0e528b8195fab236aac9267f3ae2a79fd2730e", "65298a45b07dbe81bd7ff297b647688e3322e3b8", "9c4ac903a419752d0d2948baeb6d3ecad9c67df5", "512bb4d36cb0ed50890d03b690cb03b789fbbfbb", "4e7547b3d31b4f9b4b73a958cbe4d1b774d3ba94", "eb80b83eea907144c111af9d1058c99b6403edeb", "80cbe343ca73d75647d14149b6c466fac0741654", "602c518edb66ccbfca2af91850dc2e764fed51a7", "4b82bcc621c01fc06ae5159051702f5a9ab56975", "084d297399d6bd9bad1f090933f261d858a31b88", "172e126a6d0d5760e6802467cd8b1b68f3edc749", "b27ab73f60c242123e01f95f0d75ba2c2c4ab39d" ], "paperAbstract": "We propose a logical framework, based on Datalog, to study the foundations of querying JSON data. The main feature of our approach, which we call J-Logic, is the emphasis on paths. Paths are sequences of keys and are used to access the tree structure of nested JSON objects. J-Logic also features \"packing\" as a means to generate a new key from a path or subpath. J-Logic with recursion is computationally complete, but many queries can be expressed without recursion, such as deep equality. We give a necessary condition for queries to be expressible without recursion. Most of our results focus on the deterministic nature of JSON objects as partial functions from keys to values. Predicates defined by J-Logic programs may not properly describe objects, however. Nevertheless we show that every object-to-object transformation in J-Logic can be defined using only objects in intermediate results. Moreover we show that it is decidable whether a positive, nonrecursive J-Logic program always returns an object when given objects as inputs. Regarding packing, we show that packing is unnecessary if the output does not require new keys. Finally, we show the decidability of query containment for positive, nonrecursive J-Logic programs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056106" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/022c3fa7794ba53e2d275a7e985f48cd0ee0ed7e", "sources": [ "DBLP" ], "title": "J-Logic: Logical Foundations for JSON Querying", "venue": "PODS", "year": 2017 }, "026884b69b3d44c60312d339012e9733d4d631a7": { "authors": [ { "ids": [ "35064164" ], "name": "Talia Ringer" }, { "ids": [ "8319903" ], "name": "Dan Grossman" }, { "ids": [ "2832255" ], "name": "Daniel Schwartz-Narbonne" }, { "ids": [ "1797515" ], "name": "Serdar Tasiran" } ], "doi": "10.1145/3133915", "doiUrl": "https://doi.org/10.1145/3133915", "entities": [ "Constraint programming", "Domain-specific language", "Programmer", "Random testing", "Recursion", "Software bug", "String (computer science)", "String generation", "Test automation" ], "id": "026884b69b3d44c60312d339012e9733d4d631a7", "inCitations": [], "journalName": "PACMPL", "journalPages": "91:1-91:24", "journalVolume": "1", "outCitations": [ "120fcc709955b62fa70807147909be6fa93d9a20", "1f7e5e582663868ed2f6763f98066ca278177a61", "34db94bb4e84c9b8ab8a06da82932c24bebdf127", "4f789439fe5a121e6f47453d8a95ec733baca537", "decb7c40e12fb4e20f04b2b514704575e4481ff8", "8ea3d1cf91d2e5fc6f2e5220500e52f4ed9e6689", "2652e1188e0826572adfd5759d85faa9f39b914e", "05f0c383c785f168da8e80c903517ec5fdf71d41", "d99c7937923450664a819cdd2efee7ba698000a4", "6106f4d972cfd2123621694908442c2eb705cc11", "a05e223169ab022f800bf9f2664847919844cab9", "155759e41a8e5b145c78fcc6f53bb60423b5a9cf", "9171b5d4349fa8a73f846343bdfd978034ba0207", "1eb890680e4b451117d05c7223cded3fb13812ea", "464ae8ff0a6cba171fe596d6088969129fb907c4", "216b98bb3d9221d5f5d261864975612e4d0faaa6" ], "paperAbstract": "Developing a small but useful set of inputs for tests is challenging. We show that a domain-specific language backed by a constraint solver can help the programmer with this process. The solver can generate a set of test inputs and guarantee that each input is different from other inputs in a way that is useful for testing. \nThis paper presents Iorek: a tool that empowers the programmer with the ability to express to any SMT solver what it means for inputs to be different. The core of Iorek is a rich language for constraining the set of inputs, which includes a novel bounded enumeration mechanism that makes it easy to define and encode a flexible notion of difference over a recursive structure. We demonstrate the flexibility of this mechanism for generating strings. \nWe use Iorek to test real services and find that it is effective at finding bugs. We also build Iorek into a random testing tool and show that it increases coverage.", "pdfUrls": [ "https://homes.cs.washington.edu/~djg/papers/iorekpaper.pdf", "http://doi.acm.org/10.1145/3133915" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/026884b69b3d44c60312d339012e9733d4d631a7", "sources": [ "DBLP" ], "title": "A solver-aided language for test input generation", "venue": "PACMPL", "year": 2017 }, "02700e7e0cdc291e55af704530e181e0da668c1e": { "authors": [ { "ids": [ "40026298" ], "name": "Xin Liang" }, { "ids": [ "2506582" ], "name": "Jieyang Chen" }, { "ids": [ "3058378" ], "name": "Dingwen Tao" }, { "ids": [ "3450082" ], "name": "Sihuan Li" }, { "ids": [ "39559311" ], "name": "Panruo Wu" }, { "ids": [ "30299336" ], "name": "Hongbo Li" }, { "ids": [ "9547335" ], "name": "Kaiming Ouyang" }, { "ids": [ "28300153" ], "name": "Yuanlai Liu" }, { "ids": [ "2495855" ], "name": "Fengguang Song" }, { "ids": [ "1756221" ], "name": "Zizhong Chen" } ], "doi": "10.1145/3126908.3126915", "doiUrl": "https://doi.org/10.1145/3126908.3126915", "entities": [ "Algorithm", "Computation", "FFTW", "Fast Fourier transform", "Fastest", "Fault coverage", "Fault tolerance", "Numerical stability", "Overhead (computing)", "Soft error" ], "id": "02700e7e0cdc291e55af704530e181e0da668c1e", "inCitations": [], "journalName": "", "journalPages": "30:1-30:12", "journalVolume": "", "outCitations": [ "f8e9b050c93af6dea582563f61b6460b590bc3af", "729d8aef78a6166e2df15903e9ba3d6ff366417d", "38884b89254d5ac26cc437dfdd1c5512d0cdf9bd", "42452be4c840abd3a4a0fa49c4b8d4aeeb3f2f6e", "5d5ac7167bc5f834173aa4c63821916a1bdf487a", "1ad1ff28c41c036aed259bd4af1e5c1c42cdc5c7", "14a0ac48029d823a5aa7e81228af5237395ca2d8", "79c0062e0eae09d6715054fe7fc46d4164443aba", "91181ab9d0faf27a996a37fe266c725a0eacea67", "b295a2e3667b52b900950417c2e9b58b01938f34", "1c20521112e3bf937e756a28061ad4887f4ad720", "14e5bbf94dba58ead368cceab1541cff7cbb0170", "39ef5d362200126497b2f74c33338383dcc9589c", "450f66cd38a37201759384b33493798d2a82b9f6", "4ed3a2853506e9ac40d0907f6597da492995b17f", "02e1fe87b8c30a9d32647c088f97520afa8de181", "ba6f0ca75f965bc2eca7b4e3850b7394f7f60c3e", "18992850afed53b60ce696e20374a1e1b3d9da22", "87a8a7d48205aa864b1269cedf6a39925a3c24f8", "7027eb880ee4d5a1f71bfc861bb36ae980f781fc", "920ce1f5e69d34c531ad14bb77b524c100d4a8c2", "6b8c72e71697d1b680765e227232574ed289eb34", "4832d118efdf3d9399027bb90082eff1bd8b4abc", "36480300b1e382c062b78c6bd610d1879efd950e", "3e99a917b9a4e89497541bbc3bb72079054644c6", "4d931c6f2b099283552982bb745e5974a67fd8f0", "73c9a5beceea745330d7e9d952d13233389c453d", "62ffeb055fb0499a38732bc33193a2ef3b4e1523", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "ced080a28e76531421c030ddc524780f3236af6f", "414b0fb3e72689d2148798e15f4df35b5baa62fd", "ff9e73f9fb2d87b8e65772e447bbf93c5aa0b3e3", "a2f99528a2dd954f38f6e0bd42b686c165f23403", "8909b00de4f855eeafc2a09082ee340493818e2d", "60e90ece1fb2ce7bffedfa2ea4321162c5d9311b" ], "paperAbstract": "While many algorithm-based fault tolerance (ABFT) schemes have been proposed to detect soft errors offline in the fast Fourier transform (FFT) after computation finishes, none of the existing ABFT schemes detect soft errors online before the computation finishes. This paper presents an online ABFT scheme for FFT so that soft errors can be detected online and the corrupted computation can be terminated in a much more timely manner. We also extend our scheme to tolerate both arithmetic errors and memory errors, develop strategies to reduce its fault tolerance overhead and improve its numerical stability and fault coverage, and finally incorporate it into the widely used FFTW library - one of the today's fastest FFT software implementations. Experimental results demonstrate that: (1) the proposed online ABFT scheme introduces much lower overhead than the existing offline ABFT schemes; (2) it detects errors in a much more timely manner; and (3) it also has higher numerical stability and better fault coverage.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126915" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02700e7e0cdc291e55af704530e181e0da668c1e", "sources": [ "DBLP" ], "title": "Correcting soft errors online in fast fourier transform", "venue": "SC", "year": 2017 }, "027158a5050a09a0f66188372e2eb1584215fac5": { "authors": [ { "ids": [ "3460027" ], "name": "Yuanshun Yao" }, { "ids": [ "3359577" ], "name": "Zhujun Xiao" }, { "ids": [ "2081795" ], "name": "Bolun Wang" }, { "ids": [ "34824488" ], "name": "Bimal Viswanath" }, { "ids": [ "2704852" ], "name": "Haitao Zheng" }, { "ids": [ "1972108" ], "name": "Ben Y. Zhao" } ], "doi": "10.1145/3131365.3131372", "doiUrl": "https://doi.org/10.1145/3131365.3131372", "entities": [ "Approximation algorithm", "Centralisation", "Feature selection", "Library", "Machine learning", "Network analysis (electrical circuits)", "Server-side", "Turnkey", "User interface" ], "id": "027158a5050a09a0f66188372e2eb1584215fac5", "inCitations": [ "83aaf61e91053745e667427d2132527b8a05ef8a" ], "journalName": "", "journalPages": "384-397", "journalVolume": "", "outCitations": [ "1375c722eee6e58041f9e295042d42e43ac3428c", "595a00f0975b5d5c28d904ddba1ae5a493316573", "1fa82596e6e14a082db1413f746605d513e6245e", "9d42bac176ed5e213afec867f4c04dfe8c201adc", "9b9607b78ca1896738ec1fbf0633032bc74fbec2", "2e8f90db603b3a1c2845cc59435d6886fe15abf2", "acbd73b27937fb53077e89398a8e422c35221779", "217135d666e8349ba6d7312a37bd1dd166c098ec", "171ddad7a5ed834ff7313bb614de2f44924ebeb4", "2a918ea62cf910c6f9548b8baf6b53f34ba879c3", "011d53f4255899b654d3bd53089fa3eff7cdac08", "381f43ee885b78ec2b2264c915135e19b7dde8b6", "929bb4e2e474d7bb4338a82cc708c7b9794567da", "9f2aefc3821853e963beda011ed770f740385b77", "36df8d7a778489d565715f273b82ea82acb71b2f", "21874a977ce807b7d93ace4220b31b2ace6d0d91", "d91ae788cdf5cf6191ca23b5c38d8dc988503886", "1d4d86edb87d775739786664db6ac3830a009e0e", "3b70bb6c268ccd190cc487fede8dce7b076469f9", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "04e8dac83e50b0b4a32c9878ddba04446fc6a3f4", "27db63ab642d9c27601a9311d65b63e2d2d26744", "0468532d28499ef38287afd7557dd62b802ee85b", "7fe7e80bf59a112386211b38ef2ea0b71ae76345", "e25a07384dee2ce73e8426b7f3bff4a38eb7bf5b", "210b3ccdc5d43ff218f894695a6ee8f1ff71a32f", "0a248f46e08497f632cb80e0c362ed45e7f317c6", "3653266b5427295bdd54d6a22bf4caaa8c0b6961", "4087af703566b50037ea3a68e2514cdd53282e8d", "38171ef0443ef60c78a861838eccd24f004c22b2", "7b550425afe75edbfe7058ea4075cbf7a82c98a8", "2f9bb353e06dd0cafa7e287f9b9415c22878645a", "3342e60cb8647838fae0765e02fbb77a01df030e", "199e2a48f36b56f011ba4542721dc47e1b9078aa", "03d88407c702b6dffaae48b3d55ee716bcaffb8d", "3fd276e42268111373cc5d669b4d8175c3fe2420", "0e67bac2937f5f53f310564efa547efd82c0371d", "d44bb000915594cba0129315d855c28497738319", "d62813d30199431a44d0b72b41aa1b8ed76117e5", "1848bf446496df8bce6222d322422fab4e23e94e", "46f45109ba90ce7ac6068e9c27097949dc3e1c4d", "04f04c43ed1ed5bfa0706ed087277ef83de7e175", "24f95cc73758d870706fe8ab590d477b9dd2b791", "02df5e428a759091ffb9b3eb3f6542e0efab79b0", "22ba0d428dc3935bb466ef5ae6414473b86327b0", "1ed982c3846d8ceed3a4bb105bb2dcd5b147ace3", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "6f325db30d5b039727715df8ee7f9f37e845c927", "17c73776942309d8c406df5be6f7dcc17dd90410", "12e433f6ee70d3037e6eec58ff3f0e61b3d65fa1", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "37092f9c79cb6220fd6f01882e9e3c1b41d0750e", "1c732afc3c057e1ef8cefd22097a14d60d779322", "89b1b3f9af27fed6ebf9bd8afec6f1cfc7c155de", "5881189bdcba6907f2e7f7dbb3143ffbab8c5e90", "ae2d852535803d8b9fb7e9f8aeed0c674b801e15", "117d089d8eea767e72d5bb800ba4d6e0e15dad93", "ecffa2745a78d1cffbea3d6b9af3d74131be704f", "29b88ce334514dcd88efd83476678fb3b42a7bf9", "d8c71b0710e725cad12e5fc44f5230213f075e46", "2c47bd8bd699914e3535292b17ba46542800845c", "334ade0d31485c59cca29018d6baeef8ccf20f05", "48caac2f65bce47f6d27400ae4f60d8395cec2f3", "47aa3758c0ac35bfb2a3d2bbeff1e0ac28e623c2", "9be7e7579fbec5d45e3e6ea1c4465258225a183d", "29d880dfd7f39b1a91d5f6a66e2a3170b8f62703", "0363d348cbfba2be71ff95cebcdc9119d4a0183d", "260368e4b7ddef442bb5c197078e200b3c0ab7b1", "ea57e9e2d557fa6e944b69bbe4420ef61c122e4c", "1542da2cf7c4925917d922fc6b317a962afb5dba", "2ec2352d4009f3953d3322c8b7aaa9f6c8777043", "02bc27c39eaaa6b85d336be81b15ca19f112a950" ], "paperAbstract": "Machine learning classifiers are basic research tools used in numerous types of network analysis and modeling. To reduce the need for domain expertise and costs of running local ML classifiers, network researchers can instead rely on centralized Machine Learning as a Service (MLaaS) platforms.\n In this paper, we evaluate the effectiveness of MLaaS systems ranging from fully-automated, turnkey systems to fully-customizable systems, and find that with more user control comes greater risk. Good decisions produce even higher performance, and poor decisions result in harsher performance penalties. We also find that server side optimizations help fully-automated systems outperform default settings on competitors, but still lag far behind well-tuned MLaaS systems which compare favorably to standalone ML libraries. Finally, we find classifier choice is the dominating factor in determining model performance, and that users can approximate the performance of an optimal classifier choice by experimenting with a small subset of random classifiers. While network researchers should approach MLaaS systems with caution, they can achieve results comparable to standalone classifiers if they have sufficient insight into key decisions like classifiers and feature selection.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131372", "https://conferences.sigcomm.org/imc/2017/slides/imc-pdf.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final51.pdf", "http://www.cs.ucsb.edu/~bolunwang/docs/mlaas-imc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/027158a5050a09a0f66188372e2eb1584215fac5", "sources": [ "DBLP" ], "title": "Complexity vs. performance: empirical analysis of machine learning as a service", "venue": "IMC", "year": 2017 }, "027188ede13c9f29bea0710921f7c341f045a75b": { "authors": [ { "ids": [ "2061911" ], "name": "Juan Jos\u00e9 Fumero" }, { "ids": [ "1795890" ], "name": "Michel Steuwer" }, { "ids": [ "6503919" ], "name": "Lukas Stadler" }, { "ids": [ "3224333" ], "name": "Christophe Dubach" } ], "doi": "10.1145/3050748.3050761", "doiUrl": "https://doi.org/10.1145/3050748.3050761", "entities": [ "Big data", "Central processing unit", "Computation", "Graphics processing unit", "Heterogeneous computing", "High- and low-level", "Just-in-time compilation", "Library (computing)", "Low-level programming language", "Manycore processor", "OpenCL API", "Partial evaluation", "Profiling (information science)", "Programmer", "Programming language", "R language", "Run time (program lifecycle phase)", "Uptime" ], "id": "027188ede13c9f29bea0710921f7c341f045a75b", "inCitations": [ "fc43b21e3582dee88e364a6dff2441ad366c43f5", "55c143f5b991501a09a644ab0f39c05951ae4754" ], "journalName": "", "journalPages": "60-73", "journalVolume": "", "outCitations": [ "d4defe055ddaf9d84e453598ad75529709c64b70", "5231091fd9fe75115bedf967fa8ed95810ae6ae3", "3e129f0194279d49056c737e0caa97af25a5f1aa", "642da427e4fc7a0d62e239c561cc28821c341d50", "a1be5e4b91ee22100ac946c007d71266a4399502", "8d19166630b77df25624ba64cfa5dbdc6cd9aba8", "7e007883306b2d0b8da57ed608f5441dcc30a3e2", "9ad48bd155815ad662e10e1228557d9ec9846828", "1850ceb5376a4a14a7d77031789ef3ccb4f87e93", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "4a088b3ef14d19448e77008f852f2e9805ffc1ea", "2fa4722b81b63c4973ecc7a327f4c827f34d2c5e", "1f7ad334ee1b933fcd2917f97a1b2eb97c8e44c2", "a3e88aa2505c1f4e7f176b1afa467c60fd30bdac", "74a271cc13ccbdb1842f56ddb6faa144046e84d3", "5d5e1b35dcfbf52299c327baab696568ba0e1d15", "c17ac40f0fb475c810c70a52b3dd6535454eabf4", "0f58afdae0b5d40a599d685c81c83f33586c671a", "26b612d9c0f3c1b88394ebf299a450e73594b5dc", "53e2b31ad6fea91655ecbe64fe66968b934d0160", "7521513abd7acae00b3fd89001da47019606cf38" ], "paperAbstract": "Computer systems are increasingly featuring powerful parallel devices with the advent of many-core CPUs and GPUs. This offers the opportunity to solve computationally-intensive problems at a fraction of the time traditional CPUs need. However, exploiting heterogeneous hardware requires the use of low-level programming language approaches such as OpenCL, which is incredibly challenging, even for advanced programmers.\n On the application side, interpreted dynamic languages are increasingly becoming popular in many domains due to their simplicity, expressiveness and flexibility. However, this creates a wide gap between the high-level abstractions offered to programmers and the low-level hardware-specific interface. Currently, programmers must rely on high performance libraries or they are forced to write parts of their application in a low-level language like OpenCL. Ideally, nonexpert programmers should be able to exploit heterogeneous hardware directly from their interpreted dynamic languages.\n In this paper, we present a technique to transparently and automatically offload computations from interpreted dynamic languages to heterogeneous devices. Using just-in-time compilation, we automatically generate OpenCL code at runtime which is specialized to the actual observed data types using profiling information. We demonstrate our technique using R, which is a popular interpreted dynamic language predominately used in big data analytic. Our experimental results show the execution on a GPU yields speedups of over 150x compared to the sequential FastR implementation and the obtained performance is competitive with manually written GPU code. We also show that when taking into account start-up time, large speedups are achievable, even when the applications run for as little as a few seconds.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/33009999/vee.pdf", "http://doi.acm.org/10.1145/3050748.3050761", "http://eprints.gla.ac.uk/146598/7/146598.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/027188ede13c9f29bea0710921f7c341f045a75b", "sources": [ "DBLP" ], "title": "Just-In-Time GPU Compilation for Interpreted Languages with Partial Evaluation", "venue": "VEE", "year": 2017 }, "02904a446013931f3f3cd972c177dfcf841d6e16": { "authors": [ { "ids": [ "1914988" ], "name": "Yi Su" }, { "ids": [ "1718546" ], "name": "Dan Feng" }, { "ids": [ "40172713" ], "name": "Yu Hua" }, { "ids": [ "7947698" ], "name": "Zhan Shi" } ], "doi": "10.1109/ICPP.2017.33", "doiUrl": "https://doi.org/10.1109/ICPP.2017.33", "entities": [ "Baseline (configuration management)", "Cloud computing", "Computer data storage", "Event-driven programming", "Experience", "Object storage", "Programming model", "Service-level agreement" ], "id": "02904a446013931f3f3cd972c177dfcf841d6e16", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "241-250", "journalVolume": "", "outCitations": [ "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "045943438dd45f25f0127d97ed9116b3b05914a7", "806cb3df830e5f58fad7460904c898065cd9357f", "329eec581160cf7c3f651196bae9e702c1e8647b", "5ae87dedd95dd5dda85012e1a8f6ebdfb7e575d0", "3168681722207c86827e596860115a2977ce761f", "61c2571f6029aba65ec6288881211797c27d5ecc", "19488ad6103c678a7f5d7a5a149cdbac4663a366", "c35c524070b829b2e34dc3b952d950e400181430", "5b97d28248f13ce6ae1a565e6ea06415def1c4c7", "848fa1f48ad9d3edb24b05667f15cfc633eb8f69", "e2576ac7fad7a371b0db58c2837a887869f797bc", "1b3e102739030bd2bdfbd3a02eabba81419ccb8f", "00fad2ef73cf6841c88b8f76957d382ab9cc88f4", "8060b97fb8c05ada26afd31e237aa1b3dba4dd39", "938dd91b211c46e91ae309154fa810f4bef933d2", "db540eec873ede30f0db6377fe4bf799c17a4fc5", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "9c0f244c4fd365f64d15755ed54b92e6dc2d99c6", "396514fb219879a4a18762cddfae2a6a607f439f", "55b5f88ba09e4f2f53aec5418835f2a6498cd289" ], "paperAbstract": "As a fundamental cloud service for modern Web applications, the cloud object storage system stores and retrieves millions or even billions of read-heavy data objects. Serving for a massive amount of requests each day makes the response latency be a vital component of user experiences. Due to the lack of suitable understanding on the response latency distribution, current practice is to use overprovision resources to meet Service Level Agreement (SLA). Hence we build a performance model for the cloud object storage system to predict the percentiles of requests meeting SLA (response latency requirement), in the context of complicated disk operations and event-driven programming model. Furthermore, we find that the waiting time for being accept()-ed at storage servers may introduce significant delay. And we quantify the impacts on system response latency, due to requests waiting for being accept()-ed. In a variety of scenarios, our model reduces the prediction errors by up to 73% compared to baseline models, and the prediction error of our model is 4.44% on average.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02904a446013931f3f3cd972c177dfcf841d6e16", "sources": [ "DBLP" ], "title": "Predicting Response Latency Percentiles for Cloud Object Storage Systems", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "02b045984643113792882d30f28c2245cfbcf0e6": { "authors": [ { "ids": [ "1773557" ], "name": "Omer Subasi" }, { "ids": [ "3139819" ], "name": "Gulay Yalcin" }, { "ids": [ "1777868" ], "name": "Ferad Zyulkyarov" }, { "ids": [ "3309458" ], "name": "Osman S. Unsal" }, { "ids": [ "1699563" ], "name": "Jes\u00fas Labarta" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Distributed computing", "Fail-stop", "Fault tolerance" ], "id": "02b045984643113792882d30f28c2245cfbcf0e6", "inCitations": [ "8abe342812ee5025755d680977383e4bdf8d6703", "24f827feddc105dad5659e32ca15ef91ac3b8061", "a07d0a5f997161adb395a2ed718bf62d4d3106cd" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "452-457", "journalVolume": "", "outCitations": [ "4b2e1b0edbaa3d6d6bf059b89108cadfcbdf5c7b", "39ef5d362200126497b2f74c33338383dcc9589c", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "ddfe7c78115c3a610c0ad64691791ce463162282", "01e499b6cf6b89babe390503e30e20d6628ddc39", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "18fe996c6f43a8f301cd842507045b679ba3506a", "1d500b9788ad9608ec3c584c0a22059bbdb0ac9a", "b35585217d340b78f7a7c1fe7079429ac36fc229", "243c2d9ce406d82ec24d69e7c473fb99392ebdb6", "01d62cd850496455ce1616500f491690effa5c98", "88412b002ee39eb121d93c0a2c11ddbb658e9d6b", "983eb4473a7de0f7497a5047941c0808fdaf18a8", "f1eaaccfc03e06c9ea5ec99162c7a6a118eea155", "0eacd1b47786f740b723d906d46e160f143c0378", "5ee6d6523a8e7b0fae7539503854a8d3659f126c", "a5de222d68cbd3ab5dd509744e3a63f2073d734b" ], "paperAbstract": "Fail-stop errors and Silent Data Corruptions (SDCs) are the most common failure modes for High Performance Computing (HPC) applications. There are studies that address fail-stop errors and studies that address SDCs. However few studies address both types of errors together. In this paper we propose a software-based selective replication technique for HPC applications for both fail-stop errors and SDCs. Since complete replication of applications can be costly in terms of resources, we develop a runtime-based technique for selective replication. Selective replication provides an opportunity to meet HPC reliability targets while decreasing resource costs. Our technique is low-overhead, automatic and completely transparent to the user.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101173", "http://upcommons.upc.edu/bitstream/handle/2117/107497/Designing+and+Modelling.pdf;jsessionid=6D34CF994F2443B75097CD6E43D70142?sequence=3" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02b045984643113792882d30f28c2245cfbcf0e6", "sources": [ "DBLP" ], "title": "Designing and Modelling Selective Replication for Fault-Tolerant HPC Applications", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "02d5ee5a61cd68cf47ad71a4dd9ae889149a2553": { "authors": [ { "ids": [ "1968460" ], "name": "Min Du" }, { "ids": [ "3245752" ], "name": "Feifei Li" }, { "ids": [ "2615075" ], "name": "Guineng Zheng" }, { "ids": [ "3052879" ], "name": "Vivek Srikumar" } ], "doi": "10.1145/3133956.3134015", "doiUrl": "https://doi.org/10.1145/3133956.3134015", "entities": [ "Anomaly detection", "Artificial neural network", "Data mining", "Deep learning", "Failure rate", "Information source", "Long short-term memory", "Natural language", "Network model", "Software bug" ], "id": "02d5ee5a61cd68cf47ad71a4dd9ae889149a2553", "inCitations": [ "7aaa159e2b762e94da000a1515c2b1cc9b6afa50" ], "journalName": "", "journalPages": "1285-1298", "journalVolume": "", "outCitations": [ "188e1d54d2c73b0b83e543d9183ec4c413625622", "b4622086651fcc6e9b4bf87d918668f7579d5954", "06fd7d924d499fbc62ccbcc2e458fb6c187bcf6f", "27211ed68a7a00f1df0121fa1890a1b2acdd1a88", "19d78d8c072b60294792c523742a8609accf3890", "8de8a4b5193200d332aa4c86956bffbdac758194", "49e8721bd4821eff0f147d73bea970f2de3aab8a", "5ba262cc2173e4201df2406cde8c9e1078db7841", "0da0ce23fccc1b2b84d633526a34bc4b6f2c5679", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "12d4c92f0a3a70538ed609bf6f7b603e44d11abd", "996ce6a529c3d7652a304ca05bf9d32d3db44e95", "68bcc7b233ca3ef6b1f6d751c3325e301ebaf2ee", "8681fa540b786a1858a3d429c0fbcaf3aeeb52ee", "87bb5be2e9336938450e340e3e24e20f2ef79adf", "c368e5f5d6390ecd6b431f3b535c707ea8b21993", "68aeb92287ede815e480c8becc6385f99f20b29a", "3edfc29b8a4f4fb1e245087cd1c59498f2255fe8", "6e558f2929ba8d95d55adad44ba89e62762270a0", "9826daa08e5e4d73a1878fd3383e37472064f23f", "067bd9d975b132dc668013895a5e4298623feebd", "233b43f13c17fc5c45d6dd67a46a18d5e7d95d57", "852094207ef6083d807a5215028e46e50685acbb", "46166919007b237d1fafb93adb5dd6d288bac84d", "095aed5a23cb1c807bbc9ffa40d1ae82c6685d43", "14c5c9f9bb24cb433e156ba8a30a879d84ed49d8", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "4aa9f5150b46320f534de4747a2dd0cd7f3fe292", "03d88407c702b6dffaae48b3d55ee716bcaffb8d", "47a87c2cbdd928bb081974d308b3d9cf678d257e", "2c1ed7e32a85d72fb270ebd07a45641acfba02a9", "7260363c8b9a3e9d8f0b560c67cc49619bf06e56", "95193d2c016f1ee266b1dbf714678ce6bb1bb2ea", "350a4b5cecfffb6a0e88c349b84e56df8829da44", "2ca6f673bfb1e218e8a97763becf2a4a5cf195ae", "43cf32ba6dad06247bfd6d4869c523e364e43eb3" ], "paperAbstract": "Anomaly detection is a critical step towards building a secure and trustworthy system. The primary purpose of a system log is to record system states and significant events at various critical points to help debug system failures and perform root cause analysis. Such log data is universally available in nearly all computer systems. Log data is an important and valuable resource for understanding system status and performance issues; therefore, the various system logs are naturally excellent source of information for online monitoring and anomaly detection. We propose DeepLog, a deep neural network model utilizing Long Short-Term Memory (LSTM), to model a system log as a natural language sequence. This allows DeepLog to automatically learn log patterns from normal execution, and detect anomalies when log patterns deviate from the model trained from log data under normal execution. In addition, we demonstrate how to incrementally update the DeepLog model in an online fashion so that it can adapt to new log patterns over time. Furthermore, DeepLog constructs workflows from the underlying system log so that once an anomaly is detected, users can diagnose the detected anomaly and perform root cause analysis effectively. Extensive experimental evaluations over large log data have shown that DeepLog has outperformed other existing log-based anomaly detection methods based on traditional data mining methodologies.", "pdfUrls": [ "http://www.cs.utah.edu/~lifeifei/papers/dl_ccs.pdf", "http://doi.acm.org/10.1145/3133956.3134015", "http://www.flux.utah.edu/download?uid=261", "http://www.cs.utah.edu/~mind/papers/deepLog_poster.pdf", "https://people.engr.ncsu.edu/gjin2/Classes/591/Spring2018/deepLog.pdf", "http://www.cs.utah.edu/~mind/papers/deepLog.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02d5ee5a61cd68cf47ad71a4dd9ae889149a2553", "sources": [ "DBLP" ], "title": "DeepLog: Anomaly Detection and Diagnosis from System Logs through Deep Learning", "venue": "CCS", "year": 2017 }, "02e0f4e418638f3edfa037b1aed46c432ab3ac4f": { "authors": [ { "ids": [ "3339930" ], "name": "Maomeng Su" }, { "ids": [ "4408986" ], "name": "Mingxing Zhang" }, { "ids": [ "1680073" ], "name": "Kang Chen" }, { "ids": [ "1850900" ], "name": "Zhenyu Guo" }, { "ids": [ "1725574" ], "name": "Yongwei Wu" } ], "doi": "10.1145/3064176.3064189", "doiUrl": "https://doi.org/10.1145/3064176.3064189", "entities": [ "Attribute\u2013value pair", "Data center", "Data structure", "Direct memory access", "In-memory database", "Key-value database", "Programming paradigm", "Remote direct memory access", "Remote procedure call", "Request for proposal", "Server (computing)", "USB flash drive" ], "id": "02e0f4e418638f3edfa037b1aed46c432ab3ac4f", "inCitations": [ "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "1-15", "journalVolume": "", "outCitations": [ "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "1594118f2696b573f08510cf837f3b37db87face", "514a5c15e8cf3f681febecad954a4508d9189c99", "3cc2336cb701ab40273d0b5603064a70a209b4c6", "4ab775b9811a8b9f0ff24fa06b535986149e51e3", "d842578a10648c9cb1a7e87bd1f8de30246d5a51", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "5f948207acb92e6f4e09aa5f5a2cf7cdf2d80ba5", "29a1148d75878671dc3663bf480e33d7bd91597d", "898634f0e693cb521ad2dd4a7432c11381e6df60", "0401a8c1feeb489f3fa011fe50e00e91a8fd7903", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "8318fa48ed23f9e8b9909385d3560f029c623171", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "66ede69aec0e37e0851464076e1719cd8036998e", "b4087345c63a7b2412eeb31066b5e4bceadbbcb2", "0276440f721b17ff77165f2b1ed24e029b9a2432", "10ca6fc3a9adf282073defda372355bfd668b31e", "daf0cd0076b388712ea12ec4105572997fc50cdf", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "12078fd9bee79fd2e9fae055c4cc33db382272af", "2ab305079385594badd4233ebb9512d52ecaccfb", "205cf007cf77bbf81e55b74635017087585f7b7c", "184687c92c6890743a663a7cdb0216d04f8e9fbf", "01094798b20e96e1d029d6874577167f2214c7b6", "9aa0d7253574e50fe3a190ccd924433f048997dd", "21474d50689bb4b4af6399c4bae2cb612f382713", "742c641506ac9efc3281af2effb31f2fb31b2dd4" ], "paperAbstract": "Remote Direct Memory Access (RDMA) has been widely deployed in modern data centers. However, existing usages of RDMA lead to a dilemma between performance and redesign cost. They either directly replace socket-based send/receive primitives with the corresponding RDMA counterpart (server-reply), which only achieves moderate performance improvement; or push performance further by using one-sided RDMA operations to totally bypass the server (server-bypass), at the cost of redesigning the software.\n In this paper, we introduce two interesting observations about RDMA. First, RDMA has asymmetric performance characteristics, which can be used to improve server-reply's performance. Second, the performance of server-bypass is not as good as expected in many cases, because more rounds of RDMA may be needed if the server is totally bypassed. We therefore introduce a new RDMA paradigm called Remote Fetching Paradigm (RFP). Although RFP requires users to set several parameters to achieve the best performance, it supports the legacy RPC interfaces and hence avoids the need of redesigning application-specific data structures. Moreover, with proper parameters, it can achieve even higher IOPS than that of the previous paradigms.\n We have designed and implemented an in-memory key-value store based on RFP to evaluate its effectiveness. Experimental results show that RFP improves performance by 1.6×~4× compared with both server-reply and server-bypass paradigms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064189" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02e0f4e418638f3edfa037b1aed46c432ab3ac4f", "sources": [ "DBLP" ], "title": "RFP: When RPC is Faster than Server-Bypass with RDMA", "venue": "EuroSys", "year": 2017 }, "02e770fe56cc33834c8e81e35ed39074471997f7": { "authors": [ { "ids": [ "2142614" ], "name": "Xinyu Wang" }, { "ids": [ "1714075" ], "name": "Isil Dillig" }, { "ids": [ "37493633" ], "name": "Rishabh Singh" } ], "doi": "10.1145/3133886", "doiUrl": "https://doi.org/10.1145/3133886", "entities": [ "Algorithm", "Computation", "Data science", "Database", "Digital subscriber line", "Domain-specific language", "Fault tree analysis", "Missing data", "Relational database", "Sketch", "Spreadsheet", "Table (information)", "Tree automaton", "Version space learning" ], "id": "02e770fe56cc33834c8e81e35ed39074471997f7", "inCitations": [ "791714728fefcb067fb6b56c7f4de093d536cf00", "33de4502da805dd10769d2412fd04ba5ad7867f7" ], "journalName": "PACMPL", "journalPages": "62:1-62:26", "journalVolume": "1", "outCitations": [ "54ed970d56ce4343a3d3fa29fb6080572255f26d", "2eda52d7a1723df6eee46d69496fd576e5787575", "426a2eb44a8f947edf9a92288e80fd0d6b515de2", "05c8103e1b77437875a4c69c6258be988ab2946b", "74da2a29ffef4636e581a777dcddaa44e2bf069f", "49d5f1340aad43d48bbb3b9df58eb5a250a57396", "020e287d79d0d96abc5026b9af4a4f8820fc0b1d", "00c08861cfb438d5ff209dfadc2d839641cd3ca9", "208e7934d900055b43b8b60e4a807ac00674ec4a", "67d18339ed72b7fc2152cb42b63362b570c11946", "157bc8409146eb82230637dce86d19829ee45a83", "011a0f193a4ad6e118abd5a36f705618071891ba" ], "paperAbstract": "In application domains that store data in a tabular format, a common task is to fill the values of some cells using values stored in other cells. For instance, such data completion tasks arise in the context of missing value imputation in data science and derived data computation in spreadsheets and relational databases. Unfortunately, end-users and data scientists typically struggle with many data completion tasks that require non-trivial programming expertise. This paper presents a synthesis technique for automating data completion tasks using programming-by-example (PBE) and a very lightweight sketching approach. Given a formula sketch (e.g., <pre>AVG</pre>(<pre>?</pre>1, <pre>?</pre>2)) and a few input-output examples for each hole, our technique synthesizes a program to automate the desired data completion task. Towards this goal, we propose a domain-specific language (DSL) that combines spatial and relational reasoning over tabular data and a novel synthesis algorithm that can generate DSL programs that are consistent with the input-output examples. The key technical novelty of our approach is a new version space learning algorithm that is based on finite tree automata (FTA). The use of FTAs in the learning algorithm leads to a more compact representation that allows more sharing between programs that are consistent with the examples. We have implemented the proposed approach in a tool called DACE and evaluate it on 84 benchmarks taken from online help forums. We also illustrate the advantages of our approach by comparing our technique against two existing synthesizers, namely Prose and Sketch.", "pdfUrls": [ "http://www.cs.utexas.edu/~xwang/pubs/oopsla17.pdf", "http://doi.acm.org/10.1145/3133886", "https://www.cs.utexas.edu/~xwang/pubs/oopsla17.pdf", "http://arxiv.org/abs/1707.01469", "http://export.arxiv.org/pdf/1707.01469", "https://arxiv.org/pdf/1707.01469v1.pdf", "http://www.cs.utexas.edu/users/isil/dace.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02e770fe56cc33834c8e81e35ed39074471997f7", "sources": [ "DBLP" ], "title": "Synthesis of data completion scripts using finite tree automata", "venue": "PACMPL", "year": 2017 }, "02f036c820c1432a254895088a0d01abd4605449": { "authors": [ { "ids": [ "4762647" ], "name": "James Larisch" }, { "ids": [ "2450059" ], "name": "David R. Choffnes" }, { "ids": [ "36147319" ], "name": "Dave Levin" }, { "ids": [ "1711252" ], "name": "Bruce M. Maggs" }, { "ids": [ "1729928" ], "name": "Alan Mislove" }, { "ids": [ "35497150" ], "name": "Christo Wilson" } ], "doi": "10.1109/SP.2017.17", "doiUrl": "https://doi.org/10.1109/SP.2017.17", "entities": [ "Byte", "Certificate Transparency", "Client-side", "Data structure", "Download", "Firefox", "OCSP stapling", "Online Certificate Status Protocol", "Poor posture", "Public key infrastructure", "Server (computing)", "Server-side", "Transport Layer Security" ], "id": "02f036c820c1432a254895088a0d01abd4605449", "inCitations": [ "67b1bfd459a70990d2894dfd115e2633e927bb59", "36e81b745b1122de2440be3a25920860f8287147" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "539-556", "journalVolume": "", "outCitations": [ "15ca5943844e12f676555a53e9faeeb80f4738e4", "201b0a185dda51629d7b6fdef3b380a0beaba455", "1688c9bb957395bf7ac05098537c736cfd076382", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "08fabacc44f1f7d3b968fa41e52e350a24e02abc", "46cd7e1d4231e47873f3eb4e26ab73187deb5437", "3586b5f06ad646441406c8a9706c869067f54fa4", "1eea9f527d7902748b14b807e7d544d933734ce6", "bf7b79513e5ca9e037964c9fd0b9a63e50ea4833", "1ee169e1161fbaaea334bd99759015cebe506764", "806c8e1cb853e38dc90ea592b3c2e62f844069aa", "6e4480275887464a483cf85ada0fff26514b1313", "3d049eb62dd331b066df3cd455287ec487a745bb", "0641830054d30adf5c115adc0fd369f3ecdc6d73", "08e9542de3cbfe791bf86a0dee6ba5e83bc29ea7", "06c87865bc8f19df60db5c37e504146b0735255a", "ee9001b8649ecc3e731018214d852e532d2bd5bd", "563239b0eaa3aa7003e8e8e66ba3e789f7cee265", "f0cfdd16edb45182ab227400d75b6f736aafb0a0", "3a2f37d3648592ffb42155c28f71894ad61937fe", "828cc4f5f736e2d5ef555ef052e2a99f754e401a", "23567eb140757d026cb3f5d25419386b52a5623b", "2050a16b0d5272b49fa03c8c4d32cacb08cc800a", "a58e5388358da913ede1ac7ca0807c66fb871f00", "1113664b038d0390b061afb80ee214b09a207fc9", "43e632540fa490c2352a03546a20d53850953626", "2bebf168f3eab1dadc44106977fc5c0f8703967d", "1f13b5ad5c07daee56537aad44c4cd0fc3ea5bd5", "487fb1ea0bbf53bdee927c7ceb8319095d37c42c", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "6c86e80f902b264cb3e0bb7088490ba2c3df2106", "8bb584dd12dd82b9041b819b8f25633eadf1c5d5", "00f220ea0951673d80137215fa3fd0f005a02789", "3837b80bc3ec48e71ca44a5d6a7b97b5fc3136a7", "1f0665485f7fbc06675c981866efab2c4ccbcdd4", "bdfd34769911b3fb40eadf71bfb34a0ec98fe160", "5ad285efff1151af53ccae1ce9c836bf2b9d8e49", "582d62de1236dbb7ed5416941f818e88bd10b059", "4c30ca01698083bb6afcecfb2f99ec995705498a", "588c404fa3f64c58facb178ca957ed4697aa622c", "182f32790af12408a5656e59356e4ce6873e2066", "b409f82579d9775ce51ae4cfe93b0abe69612565", "208ed7512ea84f22a004920ea0b4c475bc836abc", "54e08c0ee320cc8e20d3517dc29276974eb2a26c", "8cdbab26fa0dee8f165b6680e59e8966679fd068", "f79063dbcfc3e5e1d50c006d64eb0c94264e63e2", "27a8f66219047eb41900f12bd5813b4f52b829e1", "bb52ff840b1b6e2144268e57c72118a49460d6f4", "197f0b31f4088c7a7301e4e3079b43be2eae3dc3", "943718810d6f0b21406bebebe26b498c9ca97e01", "3591be0ccd08c80c0048ebaa0e7005556f49cf5e", "185d057d3bce4ea115c4fbe39da65a43b1cc1a0c", "0563bbbf980fffcd0091dc429c157e874bd5c542", "6c5395868a818c6f414c653a30376461240bd366", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c" ], "paperAbstract": "Currently, no major browser fully checks for TLS/SSL certificate revocations. This is largely due to the fact that the deployed mechanisms for disseminating revocations (CRLs, OCSP, OCSP Stapling, CRLSet, and OneCRL) are each either incomplete, insecure, inefficient, slow to update, not private, or some combination thereof. In this paper, we present CRLite, an efficient and easily-deployable system for proactively pushing all TLS certificate revocations to browsers. CRLite servers aggregate revocation information for all known, valid TLS certificates on the web, and store them in a space-efficient filter cascade data structure. Browsers periodically download and use this data to check for revocations of observed certificates in real-time. CRLite does not require any additional trust beyond the existing PKI, and it allows clients to adopt a fail-closed security posture even in the face of network errors or attacks that make revocation information temporarily unavailable. We present a prototype of name that processes TLS certificates gathered by Rapid7, the University of Michigan, and Google's Certificate Transparency on the server-side, with a Firefox extension on the client-side. Comparing CRLite to an idealized browser that performs correct CRL/OCSP checking, we show that CRLite reduces latency and eliminates privacy concerns. Moreover, CRLite has low bandwidth costs: it can represent all certificates with an initial download of 10 MB (less than 1 byte per revocation) followed by daily updates of 580 KB on average. Taken together, our results demonstrate that complete TLS/SSL revocation checking is within reach for all clients.", "pdfUrls": [ "http://www.ccs.neu.edu/home/cbw/static/pdf/larisch-oakland17.pdf", "https://obj.umiacs.umd.edu/papers_for_stories/crlite_oakland17.pdf", "https://doi.org/10.1109/SP.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02f036c820c1432a254895088a0d01abd4605449", "sources": [ "DBLP" ], "title": "CRLite: A Scalable System for Pushing All TLS Revocations to All Browsers", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "02f34709c626b3076c9fac5c0a4f9cf8d7ecdeb5": { "authors": [ { "ids": [ "31251390" ], "name": "Maxime C. Cohen" }, { "ids": [ "38694262" ], "name": "Philipp W. Keller" }, { "ids": [ "1728881" ], "name": "Vahab S. Mirrokni" }, { "ids": [ "1724391" ], "name": "Morteza Zadimoghaddam" } ], "doi": "10.1145/3078505.3078530", "doiUrl": "https://doi.org/10.1145/3078505.3078530", "entities": [ "Algorithm", "Bin packing problem", "Cloud computing", "Data center", "Experiment", "Jumpstart Our Business Startups Act", "Memory overcommitment", "Online algorithm", "Requirement", "Risk aversion", "Risk management", "Schedule (project management)", "Scheduling (computing)", "Set packing", "Submodular set function" ], "id": "02f34709c626b3076c9fac5c0a4f9cf8d7ecdeb5", "inCitations": [ "35b347c57e8765ae46b0ccb0fd0639214d0f7c0e" ], "journalName": "", "journalPages": "7", "journalVolume": "", "outCitations": [ "c4db59cfebfb97d119fa6f96fc251d4cb1bb25db", "81966368cc3f2aed2581e726590d7a52cec3bc96", "cc34846754a3cc95384bac736e91e35ae7f0d374", "6d8610a87f190985f508372f2f981aeddbbebc4d", "2fc51f5f8dfa03d736cbd6960a08615522e19eaa", "172fc53432c0b1b8bd3d0da1be6f9363f27cfaa9", "8d56d4bc69a8c562434b9a129542bb79e9d6f1d6", "ffb9ba598fab24edf5e143901e246804137114b2", "610f5e505a94544edf774d28f89558670bcc2318", "61b370c44b85cb06a36219343471e180e20a235e", "09f6c6f9d630774e707b5ffe060158cc7def4b89", "0cebc93d088750083be23d59a7f10ce9e0f9324c", "1fa5edc36a127ccf496a8d2e5189b2c22e1693d4", "05705bf8ab08bde69a2918bf69cf794eb1948124" ], "paperAbstract": "This paper considers a traditional problem of resource allocation, scheduling jobs on machines. One such recent application is cloud computing, where jobs arrive in an online fashion with capacity requirements and need to be immediately scheduled on physical machines in data centers. It is often observed that the requested capacities are not fully utilized, hence offering an opportunity to employ an overcommitment policy, i.e., selling resources beyond capacity. Setting the right overcommitment level can induce a significant cost reduction for the cloud provider, while only inducing a very low risk of violating capacity constraints. We introduce and study a model that quantifies the value of overcommitment by modeling the problem as a bin packing with chance constraints. We then propose an alternative formulation that transforms each chance constraint into a submodular function. We show that our model captures the risk pooling effect and can guide scheduling and overcommitment decisions. We also develop a family of online algorithms that are intuitive, easy to implement and provide a constant factor guarantee from optimal. Finally, we calibrate our model using realistic workload data, and test our approach in a practical setting. Our analysis and experiments illustrate the benefit of overcommitment in cloud services, and suggest a cost reduction of 1.5% to 17% depending on the provider's risk tolerance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078530", "http://arxiv.org/abs/1705.09335", "https://arxiv.org/pdf/1705.09335v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02f34709c626b3076c9fac5c0a4f9cf8d7ecdeb5", "sources": [ "DBLP" ], "title": "Overcommitment in Cloud Services Bin packing with Chance Constraints", "venue": "SIGMETRICS", "year": 2017 }, "02fd71524e15de535d5e7aa6f79743352cac3992": { "authors": [ { "ids": [ "2784940" ], "name": "Tim Ruffing" }, { "ids": [ "2970940" ], "name": "Pedro Moreno-Sanchez" }, { "ids": [ "1828965" ], "name": "Aniket Kate" } ], "doi": "", "doiUrl": "", "entities": [ "Best, worst and average case", "Bitcoin", "Communications protocol", "Crypto-anarchism", "De-anonymization", "Dining cryptographers problem", "Formal verification", "Malware", "P2P caching", "Peer-to-peer", "Pseudonymity" ], "id": "02fd71524e15de535d5e7aa6f79743352cac3992", "inCitations": [ "9d5d3fa5bc48de89c398042236b85566f433eb5c", "4e12417ac7138f4e898cfbe35c7fd7c4e5e13b45", "aad038f478194173c181782e187a9370b95d0180", "01e1d63eb9f23163458f0364dc92377c9a17b466", "b5d8f9196dcf75fb4736b83c7bff228f83353757", "24f036498862dba97036df9c26de066c75e843c2", "6c39f2c252d095ab9d4a398fa66706c901387683", "f234f428eb552b94435683e7e784e805c201d309", "5f0db49bc309c6f82dec7368a1adb9bde4363b1c" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "824", "journalVolume": "2016", "outCitations": [ "9d2c1271f1219522d13f150c2b04123bef300dd9", "17a3c5593f159d3180b256e18498bb11eb14b9e8", "10336cdef674893f41bf4824d44c4156be5e9ca2", "6747887cc328764781b21b543cd11b953efe7519", "c8e275e627757d3c090afd9db367be000e401d01", "139cfb65d375bba4ca59acc19efb0b7ac99247dc", "4123e9fecfc01c3cf32fb3d59ed1566ee0856874", "c27762257f068fdbb2ad34e8f787d8af13fac7d1", "557d8b988bca3d0033189723d11102e04c0c67c0", "331be37d467f4e630cea0ea689697945698caab9", "1993c3ee54425aa9fa7486c82aeb56d22f77b14f", "049e2c54fe8a35cd941937ba592e07bbc2dda591", "03cee43dcbb978f663db5dd3e658e0e0f4dacfbe", "5ae4e852d333564923e1b6caf6b009729df6ca6a", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "b32836684d504afc8e33dcc41d77336e51e27fc5", "244ddba27efef35bac9b01d5b1780922f5f33ec4", "1ed7234e9de7b8e3a6e8078e70ae8cec0020c06b", "7f17ee37b9cc8dbf5de6363c863d9e3c49768400", "0871f3b37652edfa2dfb4689512eaf0b4ecff889", "73041923fcdc4141d9269a5df16f24a587070d31", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "06f1f0da373de40493a819bc8eedbfe2e9edec39", "0706225eeac0f855b19c365313db61252ecde0d7", "19bab496d5d7f60d3e5b9217739b9cf7fedaf44b", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "c773c64ab52f702ae0aaba8c35b72dde471ea04a", "a9267bc516da5940740d95664c562b6fa14d4b34", "15167da8d35184d062b988b5a6807e0fa72cd77f", "14829636fee5a1cf8dee9737849a8e2bdaf9a91f", "09af9108cb5c196d5c15a6f3d26e604434203bea", "2949851ab9827fdd334ecc3b392296df2aacaf92", "1d1d1d72f226ffec5468dc9cb28ea804cd7864fa", "05f84e7e8f53e39d84a9e41c799ef56c35b217ab", "02573d72bff8dd5f7dc68334ffb5a337a474d839", "12b66f7180072dd8d5ac1c935b12df381d71ad81", "67af8bf83dc4354d1513b6f60b13df60f694c5b3" ], "paperAbstract": "Starting with Dining Cryptographers networks (DC-net), several peer-to-peer (P2P) anonymous communication protocols have been proposed. Despite their strong anonymity guarantees none of those has been employed in practice so far: Most fail to simultaneously handle the crucial problems of slot collisions and malicious peers, while the remaining ones handle those with a significant increased latency (communication rounds) linear in the number of participating peers in the best case, and quadratic in the worst case. We conceptualize these P2P anonymous communication protocols as P2P mixing, and present a novel P2P mixing protocol, DiceMix, that only requires constant (i.e., four) communication rounds in the best case, and 4 + 2f rounds in the worst case of f malicious peers. As every individual malicious peer can prevent a protocol run from success by omitting his messages, we find DiceMix with its worst-case linear-round complexity to be an optimal P2P mixing solution. On the application side, we find DiceMix to be an ideal privacy-enhancing primitive for crypto-currencies such as Bitcoin. The public verifiability of their pseudonymous transactions through publicly available ledgers (or blockchains) makes these systems highly vulnerable to a variety of linkability and deanonymization attacks. DiceMix can allow pseudonymous users to make their transactions unlinkable to each other in a manner fully compatible with the existing systems. We demonstrate the efficiency of DiceMix with a proof-of-concept implementation. In our evaluation, DiceMix requires less than 8 seconds to mix 50 messages (160 bits, i.e., Bitcoin addresses), while the best protocol in the literate requires almost 3 minutes in a very similar setting. As a representative example, we use DiceMix to define a protocol for creating unlinkable Bitcoin transactions. Finally, we discover a generic attack on P2P mixing protocols that exploits the implicit unfairness of a protocol with a dishonest majority to break anonymity. Our attack uses the attacker\u2019s realworld ability to omit some communication from a honest peer to deanonymize her input message. We also discuss how this attack is resolved in our application to crypto-currencies by employing uncorrelated input messages across different protocol runs.", "pdfUrls": [ "http://eprint.iacr.org/2016/824", "https://www.internetsociety.org/sites/default/files/ndss2017_01-4_Ruffing_paper.pdf", "http://crypsys.mmci.uni-saarland.de/projects/FastDC/draft-paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/p2p-mixing-and-unlinkable-bitcoin-transactions/", "http://diyhpl.us/~bryan/papers2/bitcoin/P2P%20mixing%20and%20unlinkable%20p2p%20transactions%20-%20Anonymity%20of%20the%20people,%20by%20the%20people,%20and%20for%20the%20people%20-%202016.pdf", "http://eprint.iacr.org/2016/824.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c95c/c001a8b93059d24c0ea4f458acb583e43b21.pdf", "s2Url": "https://semanticscholar.org/paper/02fd71524e15de535d5e7aa6f79743352cac3992", "sources": [ "DBLP" ], "title": "P2P Mixing and Unlinkable Bitcoin Transactions", "venue": "NDSS", "year": 2016 }, "02ff04aca88c6c27ea24fe727e7c66d1482d46ec": { "authors": [ { "ids": [ "30521811" ], "name": "Hosein Mohammadi Makrani" }, { "ids": [ "1747542" ], "name": "Houman Homayoun" } ], "doi": "10.1109/IISWC.2017.8167763", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167763", "entities": [ "Apache Hadoop", "Big data", "Commodity computing", "Computational resource", "Computer data storage", "Dynamic random-access memory", "Machine learning", "Message Passing Interface", "SPARK", "Server (computing)" ], "id": "02ff04aca88c6c27ea24fe727e7c66d1482d46ec", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "112-113", "journalVolume": "", "outCitations": [ "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "1c437e8220d4122476d3a1ea0ca2debc4871aa76" ], "paperAbstract": "Emerging big data frameworks requires computational resources and memory subsystems that can naturally scale to manage massive amounts of diverse data. Given the large size and heterogeneity of the data, it is currently unclear whether big data frameworks such as Hadoop, Spark, and MPI will require high performance and large capacity memory to cope with this change and exactly what role main memory subsystems will play; particularly in terms of energy efficiency. The primary purpose of this study is to answer these questions through empirical analysis of different memory configurations available on commodity hardware and to assess the impact of these configurations on the performance and power of these well-established frameworks. Our results reveal that while for Hadoop there is no major demand for high-end DRAM, Spark and MPI iterative tasks (e.g. machine learning) are benefiting from a high-end DRAM; in particular high frequency and large numbers of channels. Among the configurable parameters, our results indicate that increasing the number of DRAM channels reduces DRAM power and improves the energy-efficiency across all three frameworks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167763", "http://ece.gmu.edu/~hhomayou/files/iiswc2017-2017-Hosein-1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/02ff04aca88c6c27ea24fe727e7c66d1482d46ec", "sources": [ "DBLP" ], "title": "Memory requirements of hadoop, spark, and MPI based big data applications on commodity server class architectures", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "031048fe96d408fb5b0a245ebf2e313b51e4c90a": { "authors": [ { "ids": [ "3284944" ], "name": "Stratos Dimopoulos" }, { "ids": [ "1713536" ], "name": "Chandra Krintz" }, { "ids": [ "1682591" ], "name": "Richard Wolski" } ], "doi": "10.1109/CLUSTER.2017.52", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.52", "entities": [ "Apache Hadoop", "Big data", "Enterprise resource planning", "Fair-share scheduling", "Fairness measure", "MapleStory", "Resource contention", "Simulation", "Trace-based simulation" ], "id": "031048fe96d408fb5b0a245ebf2e313b51e4c90a", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "233-244", "journalVolume": "", "outCitations": [ "20244961dbba619d38e9115dfc63ebd90676d224", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "3000e77ed7282d9fb27216f3e862a3769119d89e", "8dd9dca858d1c21e495aeb5135e6ff7b9c18f37c", "02953ffd49d51b2d8a00520e42d852c241c3ddd1", "a43dfb040d60d0df3dbe66a52b920e05a1ac3083", "85c058c445cbabe1dab281ec8792e8f154fa2e61", "29571c68067f483deb833c4eaa70f9f78cd9470f", "0b5105bbe6635b55d8a0677071b44e4000f2f6d4", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "980773ca869fc17562e4fbcf4202a8f21893b114", "0b95a8628f90a78909447c4cfee2dce7cb92dd52", "e995899e0b4c8d3ad46af9083c120daeac4110dc", "f42e1c4556034d8955a101079e514ef7b72481d7", "8ae47a914915a4a9071662eaf504ddc9bc0c3194", "2a7d3b967a356c2a42f729048b0d3511b0005351", "114019f734b28125525756cc15810cc23dc1297a", "08f13e484e7e51831ec13076d14570ced91a50fb", "0d868efa67bf06b1f784d60769c082fd9a58893e", "09d7a6126120458d3988676d4f0a1ffada7d0a55", "afa2aa72e82ea97f4c98e0d6adb26b9be3dbf5df", "207ea0115bf4388d11f0ab4ddbfd9fd00de5e8d1", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "1591168514e17936199d57f5724a960315988b58", "c1baed1ddf0be06370d52a2a6d7014faf0226e60", "43f68c6b38dae2b6f5403362ecd96a5183c3aeab", "287f2a6b29574197551d80c69455b51b7a7d3c9a", "090599a2caf4591c87699ad850c75554cd712937", "830840a01bd03b0f7812a10bece54d5682c9714b", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "2b5e2bf1d05cf358e5d9b5c85a96a74b145dc5a3", "090bbaf22ba20bf032e38770a5379e25d52a1bd4", "4d22a82681bd58e959ed2f3544bba7495701b7f2", "6344369772fe18c032944d7a317b87588308fd3d", "2997435fe9f0e646e6a37d9783b520b9cdbdd38b", "f207de5d870ab3851df841def8b962ddb428f865", "488e3c0b5935c0f82b3044962f0d32fba3eb4a8c", "7c7a10ed39d58ce65711465ef10e89b16df6e35d", "5f9305096c8bdb47b146a6a2ec0c9569513d8a16", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "0ea4380ff8bb30e6bd5fd888268d6f8f38229fb7", "676e50a4d2141ae66a0d2aafcf79c8c989fcce33", "998df69751b93679888449d3327c2e47acbaaaa3", "45e2dd9fe949025ff7f82d888e5be8693dbd317d", "6fddb6690a6aaca1e42fbac7ef9b9fb18ac31590", "184014795c3c2bbf23f3959f6d8b1ab8bc03aea8" ], "paperAbstract": "In this paper, we present Justice, a fair-share deadline-aware resource allocator for big data cluster managers. In resource constrained environments, where resource contention introduces significant execution delays, Justice outperforms the popular existing fair-share allocator that is implemented as part of Mesos and YARN. Justice uses deadline information supplied with each job and historical job execution logs to implement admission control. It automatically adapts to changing workload conditions to assign enough resources for each job to meet its deadline "just in time." We use trace-based simulation of production YARN workloads to evaluate Justice under different deadline formulations. We compare Justice to the existing fair-share allocation policy deployed on cluster managers like YARN and Mesos and find that in resource-constrained settings, Justice improves fairness, satisfies significantly more deadlines, and utilizes resources more efficiently.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.52", "http://www.cs.ucsb.edu/~ckrintz/papers/cluster17.pdf", "http://cs.ucsb.edu/~stratos/documents/justice.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/031048fe96d408fb5b0a245ebf2e313b51e4c90a", "sources": [ "DBLP" ], "title": "Justice: A Deadline-Aware, Fair-Share Resource Allocator for Implementing Multi-Analytics", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "0327efc1d7628328c44b81f452f808d4fe05d955": { "authors": [ { "ids": [ "3393459" ], "name": "Gal Yehuda" }, { "ids": [ "33438590" ], "name": "Daniel Keren" }, { "ids": [ "2379080" ], "name": "Islam Akaria" } ], "doi": "10.1109/IPDPS.2017.123", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.123", "entities": [ "Algorithm", "Centralisation", "Graph property", "Linear function (calculus)", "Local variable", "Mathematical optimization", "Nonlinear system", "Online and offline", "Spectral clustering", "Vertex (geometry)" ], "id": "0327efc1d7628328c44b81f452f808d4fe05d955", "inCitations": [ "782079770e60b2ef266dcf3de861a81f97baa985" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "2-11", "journalVolume": "", "outCitations": [ "4716de48821d4669a08f97e94493c5253c907e0f", "86fb6d3152a9849444f2301c91ddce5b97ce611b", "4f8298d9932393b0fdd73576715c0818b4083292", "24259d92169e287c55abd3bd6cc5b2da50a88c4b", "3df7d6f56bcf9d4741409b439b418f4217cdcd2c", "41d7a4cb6c804945a7c6a0976a3dd85b9fe37677", "3ea5e18d3da3c72212aeccca74e28a2c8d9449cc", "121e53ccc22cbf6aa453a221fcde294b1fcffe60", "4ebc5082dc41cf6fdc80533d44dfc5db35ffa94f", "7c6d51677ffff060ac04e0a61ce2cf9cb2437709", "011c8ce97e4481e92e3e6cdb989247a8881a7f2f", "63d567b512fca70f84aef4a59bc0e2aafaaebb56", "0967bd75632d959541ee4afef35a5ef37c805cc7", "aa6ad058dffedcaa0b614b23a7508562a4652855", "01169e6900a3bb555d45b55ba674fc3b342d31c9", "015d2bee5968ceecdbec6cb4a9328ad04c9efe6c", "6d248d20660602f34b87b2e9a597dbc3be06cd3a", "59dd507247fd03a93437288b015d55e337807247" ], "paperAbstract": "The following is a very common question in numerous theoretical and application-related domains: given a graph G, does it satisfy some given property? For example, is G connected? Is its diameter smaller than a given threshold? Is its average degree larger than a certain threshold? Traditionally, algorithms to quickly answer such questions were developed for static and centralized graphs (i.e. G is stored in a central server and the list of its vertices and edges is static and quickly accessible). Later, as dictated by practical considerations, a great deal of attention was given to on-line algorithms for dynamic graphs (where vertices and edges can be added and deleted); the focus of research was to quickly decide whether the new graph still satisfies the given property. Today, a more difficult version of this problem, referred to as the distributed monitoring problem, is becoming increasingly important: large graphs are not only dynamic, but also distributed, that is, G is partitioned between a few servers, none of which "sees" G in its entirety. The question is how to define local conditions, such that as long as they hold on the local graphs, it is guaranteed that the desired property holds for the global G. Such local conditions are crucial for avoiding a huge communication overhead. While defining local conditions for linear properties (e.g. average degree) is relatively easy, they are considerably more difficult to derive for non-linear functions over graphs. We propose a solution and a general definition of solution optimality, and demonstrate how to apply it to two important graph properties – the spectral gap and the number of triangles. We also define an absolute lower bound on the communication overhead for distributed monitoring, and compare our algorithm to it, with excellent results. Last but not least, performance improves as the graph becomes larger and denser – that is, when distributing it is more important.", "pdfUrls": [ "http://www.cs.haifa.ac.il/~dkeren/ipdps17.pdf", "http://www.weizmann.ac.il/math/printpdf/seminar/monitoring-properties-large-distributed-dynamic-graphs", "https://doi.org/10.1109/IPDPS.2017.123" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0327efc1d7628328c44b81f452f808d4fe05d955", "sources": [ "DBLP" ], "title": "Monitoring Properties of Large, Distributed, Dynamic Graphs", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "0330b64dff62e2d537f3bbb5b5a882194066a481": { "authors": [ { "ids": [ "1736081" ], "name": "Amit Sabne" }, { "ids": [ "33727877" ], "name": "Xiao Wang" }, { "ids": [ "3039737" ], "name": "Sherman J. Kisner" }, { "ids": [ "1745655" ], "name": "Charles A. Bouman" }, { "ids": [ "1682337" ], "name": "Anand Raghunathan" }, { "ids": [ "1697585" ], "name": "Samuel P. Midkiff" } ], "doi": "10.1145/3018743.3018765", "doiUrl": "https://doi.org/10.1145/3018743.3018765", "entities": [ "Algorithm", "Central processing unit", "Coordinate descent", "Gradient descent", "Graphics processing unit", "Image quality", "Iterative reconstruction", "Medical imaging", "Multi-core processor", "Parallel computing", "Speedup", "Test case", "Tomography" ], "id": "0330b64dff62e2d537f3bbb5b5a882194066a481", "inCitations": [ "15ac663677324fa0c352efd11f00efa5b488658c", "2e67dfdb5435a7c903ade7a7bad50cba09968048", "c82033a0c15c1c64852c63a2379d50ddc8554aaa" ], "journalName": "", "journalPages": "207-220", "journalVolume": "", "outCitations": [ "2c1a16f01b85bfd397676db1128a664e42400861", "773f5a0299f0f772fd698f4a54030827ba9b2c2d", "c6dc80c832d6760f8a5d6a01678474e534c0258b", "1333fc35045fd7897e9fa8d9d29a395c6230fda3", "0eb7a0f1d9f6ca2627294b52d1ef601363885262", "23234ac0c65caa0eb8fb300d3b7e19ccfffc323c", "6feac7b4129a7e507aa0204080f6b5bc4ef5896b", "0059cfac9c5b7811866f0729d0917b7478148fc5", "87660a2538ffb49960e12a07cb09e85acbbab35e", "00f38163ddfddb5ad34c1db1711dd6845a4de855", "a68be42bd90645ef53d14ba0ccfb95176db2c258", "637643b4eeab8feaffbca9f00936f96ccf1838ac", "0389a414c5d0ef50e06fe0c15f6102f374ce1b04", "aac43d8c33362f6de537d7b7f87191a64efd37d4", "90eb72b10cd337af115b84014a0933d0760f3d1f", "5c3fe5e8439287ea6c0695207a20b16fb85a3290", "4d650e2a74dea2f99683d058ee5f09ca951a5661", "cc260dc356514090eb82becd5c3cce3fbd9a5306", "ba09ff8f71930605287fc478ea47864c818e42e1", "1e4abfa6c323bb7ad7a62f94f088104e515cd1be", "79b33b94477515a1c005ac2ce3169c4dec142938", "4bc9181d19257301c57d280d440061df4092fe15", "0c77cac3243697964103a35686ea2379137bc5e9", "1afaa6fa1906292e0bf6fcedf27f04722d2d86e3", "0f922c6d696a26f7d4eb1e508afcc7982cdb2c4d", "5d205292cfe87909f7a50419a18652f0d813f4b7", "0555761dbfdcb65fb60386f5d715046c737e42f7", "4aba22b139d432b394f4beaa8b0ddae991bbee8f", "1e100b3e990b23a33c2dfb76e4fa044b2055e946", "9ebb76086d51702204828ba01b3164a4ccefef38", "04f0a6e8f86e5e5c2af236e1460109414f45c6e7", "378096ff5a43e294489efdcb191d3aeee566ad5d", "7a7f08c789f3d1e6359cae01d30e90ce18429c0b", "707c82501c0ad8356ccc5d6f3fbf03c9b1b92f4b" ], "paperAbstract": "Computed Tomography (CT) Image Reconstruction is an important technique used in a variety of domains, including medical imaging, electron microscopy, non-destructive testing and transportation security. Model-based Iterative Reconstruction (MBIR) using Iterative Coordinate Descent (ICD) is a CT algorithm that produces state-of-the-art results in terms of image quality. However, MBIR is highly computationally intensive and challenging to parallelize, and has traditionally been viewed as impractical in applications where reconstruction time is critical. We present the first GPU-based algorithm for ICD-based MBIR. The algorithm leverages the recently-proposed concept of SuperVoxels, and efficiently exploits the three levels of parallelism available in MBIR to better utilize the GPU hardware resources. We also explore data layout transformations to obtain more coalesced accesses and several GPU-specific optimizations for MBIR that boost performance. Across a suite of 3200 test cases, our GPU implementation obtains a geometric mean speedup of 4.43X over a state-of-the-art multi-core implementation on a 16-core iso-power CPU.", "pdfUrls": [ "https://engineering.purdue.edu/~bouman/publications/orig-pdf/PPoPP-2017.pdf", "http://dl.acm.org/citation.cfm?id=3018765" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0330b64dff62e2d537f3bbb5b5a882194066a481", "sources": [ "DBLP" ], "title": "Model-based Iterative CT Image Reconstruction on GPUs", "venue": "PPOPP", "year": 2017 }, "033492cf9e4fdd36380065d7e6f31817ba561e57": { "authors": [ { "ids": [ "9545854" ], "name": "Chathuri Gunawardhana" }, { "ids": [ "2248998" ], "name": "Manuel Bravo" }, { "ids": [ "1741342" ], "name": "Lu\u00eds E. T. Rodrigues" } ], "doi": "", "doiUrl": "", "entities": [ "Concurrency (computer science)", "Kinetic Void", "Limiter", "Microsequencer", "Requirement", "Riak", "Serialization", "Throughput", "Windows Update" ], "id": "033492cf9e4fdd36380065d7e6f31817ba561e57", "inCitations": [ "efb351341158c8cb92ea6f479021c05e8e2e6120", "8fe5d6bb93d046c4c1ff3a075225b8acf147584f", "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2" ], "journalName": "", "journalPages": "83-95", "journalVolume": "", "outCitations": [ "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "d12d1289d2384c2ce642f01855637b9f0519e189", "740ee3de6f8ca734797d7a808c956e303f4a5730", "200adc5e9ca486f6919bc194415cec28e986df2d", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "34d269619576cd827b9842581755c06dac344b16", "71c0dd6bd1dd57716b6797043e9f09b951c88a22", "1eb6ffee1f322412d9d76190fc76b3dcc6546cee", "e9af96fbbacb4268c3c5ff974cc44990b12294e5", "ed2e39973435a4b53da760ad9837237ddce2eda5" ], "paperAbstract": "In this paper we propose a novel approach to manage the throughput vs latency tradeoff that emerges when managing updates in geo-replicated systems. Our approach consists in allowing full concurrency when processing local updates and using a deferred local serialisation procedure before shipping updates to remote datacenters. This strategy allows to implement inexpensive mechanisms to ensure system consistency requirements while avoiding intrusive effects on update operations, a major performance limitation of previous systems. We have implemented our approach as a variant of Riak KV. Our extensive evaluation shows that we outperform sequencer-based approaches by almost an order of magnitude in the maximum achievable throughput. Furthermore, unlike previous sequencer-free solutions, our approach reaches nearly optimal remote update visibility latencies without limiting throughput.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-gunawardhana.pdf", "http://www.gsd.inesc-id.pt/~ler/reports/cgunawardhanamsc.pdf", "http://arxiv.org/abs/1702.01786", "https://arxiv.org/pdf/1702.01786v1.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/gunawardhana", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_rodrigues.pdf", "http://www.gsd.inesc-id.pt/~ler/reports/cgunawardhanaea.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0334/92cf9e4fdd36380065d7e6f31817ba561e57.pdf", "s2Url": "https://semanticscholar.org/paper/033492cf9e4fdd36380065d7e6f31817ba561e57", "sources": [ "DBLP" ], "title": "Unobtrusive Deferred Update Stabilization for Efficient Geo-Replication", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "033a16554eca0961d0f7002d0ef9515a01b18896": { "authors": [ { "ids": [ "1745614" ], "name": "Liu Yang" }, { "ids": [ "1728602" ], "name": "Susan T. Dumais" }, { "ids": [ "2142374" ], "name": "Paul N. Bennett" }, { "ids": [ "1977489" ], "name": "Ahmed Hassan Awadallah" } ], "doi": "10.1145/3077136.3080782", "doiUrl": "https://doi.org/10.1145/3077136.3080782", "entities": [ "Archive", "Baseline (configuration management)", "Dyadic transformation", "Email", "Experiment", "Gmail", "Next-generation network", "Organizational behavior", "Personally identifiable information" ], "id": "033a16554eca0961d0f7002d0ef9515a01b18896", "inCitations": [ "9fed5aad11e9bd45a02b05168284a22d79f06b62", "35fe602bc3e47e1d96e2a51bda7ed7228831952f", "c557fd527e77d65cd4e59f8c312fd59bfe5e6c58", "9bc6b6402ac136d97c49344cf5eaf0f8f64771f5", "ab565ded025579f450b1688923b21feef8fb8570" ], "journalName": "", "journalPages": "235-244", "journalVolume": "", "outCitations": [ "c32e8187d7a575432eee831294b5e2f67962d441", "c7ba25074ab7b03f304d11aa9810341925922b4b" ], "paperAbstract": "Email is still among the most popular online activities. People spend a significant amount of time sending, reading and responding to email in order to communicate with others, manage tasks and archive personal information. Most previous research on email is based on either relatively small data samples from user surveys and interviews, or on consumer email accounts such as those from Yahoo! Mail or Gmail. Much less has been published on how people interact with enterprise email even though it contains less automatically generated commercial email and involves more organizational behavior than is evident in personal accounts. In this paper, we extend previous work on predicting email reply behavior by looking at enterprise settings and considering more than dyadic communications. We characterize the influence of various factors such as email content and metadata, historical interaction features and temporal features on email reply behavior. We also develop models to predict whether a recipient will reply to an email and how long it will take to do so. Experiments with the publicly-available Avocado email collection show that our methods outperform all baselines with large gains. We also analyze the importance of different features on reply behavior predictions. Our findings provide new insights about how people interact with enterprise email and have implications for the design of the next generation of email clients.", "pdfUrls": [ "http://maroo.cs.umass.edu/pub/web/getpdf.php?id=1270", "http://www.cs.cmu.edu/~pbennett/papers/SIGIR17-EmailReply-yang-et-al.pdf", "http://doi.acm.org/10.1145/3077136.3080782", "http://ciir-publications.cs.umass.edu/pub/web/getpdf.php?id=1270" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/033a16554eca0961d0f7002d0ef9515a01b18896", "sources": [ "DBLP" ], "title": "Characterizing and Predicting Enterprise Email Reply Behavior", "venue": "SIGIR", "year": 2017 }, "0343ae9ab99d0cbd719baf0d2cc1b82425f3664a": { "authors": [ { "ids": [ "2906275" ], "name": "Berkin Ilbeyi" }, { "ids": [ "27019192" ], "name": "Carl Friedrich Bolz-Tereick" }, { "ids": [ "3206189" ], "name": "Christopher Batten" } ], "doi": "10.1109/IISWC.2017.8167760", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167760", "entities": [ "Benchmark (computing)", "Compiled language", "Compiler", "Dynamic programming", "High- and low-level", "High-level programming language", "Interpreter (computing)", "Just-in-time compilation", "Meta-process modeling", "Microarchitecture", "Programming language", "Python", "Racket", "Virtual machine" ], "id": "0343ae9ab99d0cbd719baf0d2cc1b82425f3664a", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "97-107", "journalVolume": "", "outCitations": [ "add350d0c5605c98d285b87493fc77c1d68281df", "3c3f311b4877a0aa49800c10d71ecd136de941c9", "7baa3d0ba24ed8b8e7b91a512fe7fc8fda25ae07", "04a15cb22777cb22048502204c83175253fb15a0", "1d4c0211549a8fe259a273da88c63e8f00fef463", "0d281938d3ff2377541704cab6ba1c4408420733", "0ff7e33a637f0a228501f8c29880e7e8d84a31e8", "6662d518878d3eee218462ee4d8b389c64e1b6f7", "0c03c044f10c2d165468afeb0cb5718953c315ac", "5d5e1b35dcfbf52299c327baab696568ba0e1d15", "b73572f4bce2a14e9f9c023766dab7feec2d5f6c", "5878301fb9bcd3e6ca30e644670955bf07696607", "40b491d7b820783a79cfaa77f15b9400c72e54a7", "7fddc2242e5e96eefc9502e150971564b84f66da", "7521513abd7acae00b3fd89001da47019606cf38", "cf058a45de72a537d0588d38a3fe4ff6244d5e7a", "2f28a633862812674aae9366ba603f0e40b439c2", "3b0569a1bbac66166a8b9d724c6c6fc190951298", "a3f3d0f41d0f914f0a7edaccb3d80cc69388cb59", "3bc180e00cb21933223785f70abc5509852dfa00", "1e52c603d66a7604ade572c6525a4eb73aec0e3f", "53e2b31ad6fea91655ecbe64fe66968b934d0160", "1f8549b87e2b0a16e5785a9c4013f28bc4e9ef35", "ea8acbcecd906e840c2f03246cd686b3e88fd318", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "104a9057b97b50d053a01e7a36c0de46480a1948", "2e942fb549eff96fda39cb1bef44b7eca3f4fcf1", "333ea43ab30ae453d6bd847360cd475275e0acbf", "160ad871b437c95e2f5d89b649a8392ad711cf8c", "0653e2ed9f683868cb4539eb8718551242834f6b", "f8b29de9130a9c7e0d2373b57abcb37f0efe3c7a", "38a3f26f2981ae9e7532c37f7bf32dd07e9f0323" ], "paperAbstract": "Dynamic programming languages are becoming increasingly popular, and this motivates the need for just-in-time (JIT) compilation to close the productivity/performance gap. Unfortunately, developing custom JIT-optimizing virtual machines (VMs) requires significant effort. Recent work has shown the promiseofmeta-JITframeworks, which abstract the language definition from the VM internals. Meta-JITs can enable automatic generation of high-performance JIT-optimizing VMs from high-level language specifications. This paper provides a detailed workload characterization of meta-tracing JITs for two different dynamic programming languages: Python and Racket. We propose a new cross-layer methodology, and then we use this methodology to characterize a diverse selection of benchmarks at the application, framework, interpreter, JIT-intermediate-representation, and microarchitecture level. Our work is able to provide initial answers to important questions about meta-tracing JITs including the potential performance improvement over optimized interpreters, the source of various overheads, and the continued performance gap between JIT-compiled code and statically compiled languages.", "pdfUrls": [ "http://www.csl.cornell.edu/~cbatten/pdfs/ilbeyi-mtwc-iiswc2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167760" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0343ae9ab99d0cbd719baf0d2cc1b82425f3664a", "sources": [ "DBLP" ], "title": "Cross-layer workload characterization of meta-tracing JIT VMs", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "0359692cf53e0b484f1c2e7048c15cc7f3b5f605": { "authors": [ { "ids": [ "39109639" ], "name": "Victor B. F. Gomes" }, { "ids": [ "3272157" ], "name": "Martin Kleppmann" }, { "ids": [ "34791251" ], "name": "Dominic P. Mulligan" }, { "ids": [ "2619693" ], "name": "Alastair R. Beresford" } ], "doi": "10.1145/3133933", "doiUrl": "https://doi.org/10.1145/3133933", "entities": [ "Algorithm", "Computer", "Computer Networks (journal)", "Conflict-free replicated data type", "Correctness (computer science)", "Distributed computing", "Dynamic array", "Eventual consistency", "Formal system", "HOL (proof assistant)", "Increment and decrement operators", "Interactive proof system", "Isabelle", "Network model", "Proof assistant", "Replication (computing)", "Verification and validation", "XACML" ], "id": "0359692cf53e0b484f1c2e7048c15cc7f3b5f605", "inCitations": [], "journalName": "PACMPL", "journalPages": "109:1-109:28", "journalVolume": "1", "outCitations": [ "d4b133c946a9105dde49c820a09216dcce8f1130", "a3bea602dc20a1aa20ed1d8ed46325750d6f2b7e", "7f26c079f1bcddfcb5f6a87ae1f8bc055520404a", "32581fc444da1dc63c457eff347915bb177d6f09", "322fe70408d33e72403d67f1aee6ac2772793390", "2f9a41495f7eb27f22b43ccf24901a2b39c2d9b9", "23346a18e78062e586cab22195819eb0f18ffc66", "a1109cb113db04d9fd0eb2ec3c5daefb0d5d9df3", "16c35dcacb9f1517769dac709e7a1ca4d80dcb46", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "078a991394551a1881d11beef0351887bb8ddb1d", "17243e42e572c1a823eb0a7c9c3b871eb2136137", "172514edf345a2aa62143e4a6e99f49eb795ba88", "06e04a7a24100dbc0f72f22bc8e6dde4b2a27d8b", "8470ae40470235604f40382aea4747275a6f6eef", "7e8348f39f019817d4cf5389111974bc0e31245f", "047a91395070abb5c75de446883aa18c52eb3274", "2478af7c3aa8dd0f2c22bb3ca136ef892931ff75", "03864c9e57d8975efeea20094b561bc37df229f3", "0b86da5fba3491f5c07f671097d548aecb6f8776", "4d4f8e9f721f191b76cfa6c6a9b1e5b5739b0526", "0dff42b9d6b3444c3405cdb93a83345ff2f8831d", "7536e9d7d8976093bb92445fac782e9bfaca7191", "0ea5ac1eb04bcf16a8856d886be45ec90044a4c3", "b66b109a3e63c548a1c8285b7262f7d48e87a90e", "b35a1bb41a364b1b39722b533c39776a978c646a", "838ea5698990157359168d4b97c1bc3b053269fb", "f82c25d2b79a37664c9c6fc92a446572f4e407cf", "369c52d8214b73a86b1e3f31d287823ea91884d6", "45b07b0a3d4f1dc7f1be523889097be072d5f0ec", "6a313d12c90b01efae531e70f8d0cd1d1e8565ae", "028aefb5dc972111e95cb126df67791545895ae1", "dc2d652bc6ba61a7e90a7921e1e4f813af8c3814", "1a8a74c22941f90df6350aed258867a196753f1d", "afe0c966797f9d362b9b77482061e1a90fd9e075", "0478670e9c2be6a2fb7f017e94e21579ab7a570d", "1d9538531ee7457e356e089004f9f570eb84cc0d", "a81ffabb32f6ce1f70d25c625da27d3138412c3a", "3e8396d977df0996a4461fe7477bc5661a2058a7", "7b3533216d5064660458d3754a18fc69f8fbeba0", "54666a6fefb37de6146b58ae732abcf4a2351975", "d8df9824072e8c0368fff82c85c61498870465ca", "f8ab904126cd5f0e1f407069f9b6e522d4156007", "639a13b45b39be8ecabe2d49040a1f502384913e", "372736b3d94837e91fd0b19369e5104873b6d77d", "a2e51e20bf78f26524b112b5c8420aca42cfaeab", "272b90fb84bcffc5b1634d048fda9e355f15e6f5", "6f164cc777efdf08748c96e5be185f69a8f01cd8", "1f6226cad38c791e91fa1bff905c66e7d421847a", "fe2cfc28b4872fd057669f28b9e8a5d8f4d8b704", "0804ed47a40fbe6deb5ce93efe551086695ae393", "1936d49c95cf6454c159262da89851cdf5f9588d", "6983eec030b05bd4825ede9049094f351d880b92", "efc501b37b9993945f395abc816befceb569a434" ], "paperAbstract": "Data replication is used in distributed systems to maintain up-to-date copies of shared data across multiple computers in a network. However, despite decades of research, algorithms for achieving consistency in replicated systems are still poorly understood. Indeed, many published algorithms have later been shown to be incorrect, even some that were accompanied by supposed mechanised proofs of correctness. In this work, we focus on the correctness of Conflict-free Replicated Data Types (CRDTs), a class of algorithm that provides strong eventual consistency guarantees for replicated data. We develop a modular and reusable framework in the Isabelle/HOL interactive proof assistant for verifying the correctness of CRDT algorithms. We avoid correctness issues that have dogged previous mechanised proofs in this area by including a network model in our formalisation, and proving that our theorems hold in all possible network behaviours. Our axiomatic network model is a standard abstraction that accurately reflects the behaviour of real-world computer networks. Moreover, we identify an abstract convergence theorem, a property of order relations, which provides a formal definition of strong eventual consistency. We then obtain the first machine-checked correctness theorems for three concrete CRDTs: the Replicated Growable Array, the Observed-Remove Set, and an Increment-Decrement Counter. We find that our framework is highly reusable, developing proofs of correctness for the latter two CRDTs in a few hours and with relatively little CRDT-specific code.", "pdfUrls": [ "https://www.cl.cam.ac.uk/research/dtg/www/files/publications/public/mk428/oopsla.pdf", "https://arxiv.org/pdf/1707.01747v3.pdf", "http://martin.kleppmann.com/papers/crdt-isabelle-oopsla17.pdf", "http://dominic-mulligan.co.uk/wp-content/uploads/2017/10/oopsla.pdf", "https://arxiv.org/pdf/1707.01747v1.pdf", "https://arxiv.org/pdf/1707.01747v2.pdf", "http://arxiv.org/abs/1707.01747", "http://doi.acm.org/10.1145/3133933" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0359692cf53e0b484f1c2e7048c15cc7f3b5f605", "sources": [ "DBLP" ], "title": "Verifying strong eventual consistency in distributed systems", "venue": "PACMPL", "year": 2017 }, "036a89c1652d47ccdde91a5296de7c83042dbac5": { "authors": [ { "ids": [ "32434924" ], "name": "Gunnar Hartung" }, { "ids": [ "32299356" ], "name": "Max Hoffmann" }, { "ids": [ "40330664" ], "name": "Matthias Nagel" }, { "ids": [ "35135100" ], "name": "Andy Rupp" } ], "doi": "10.1145/3133956.3134071", "doiUrl": "https://doi.org/10.1145/3133956.3134071", "entities": [ "Authentication and Key Agreement (protocol)", "Circular definition", "Cryptography", "Hall effect", "Privacy", "Reputation system", "Smartphone", "Universal instantiation", "Usability" ], "id": "036a89c1652d47ccdde91a5296de7c83042dbac5", "inCitations": [], "journalName": "", "journalPages": "1925-1942", "journalVolume": "", "outCitations": [ "02dc2a93a48d38deae9f1369d5b33ce98af2a3f2", "135e48980bc7c942ae8b2c73f16e5f0d892d140a", "2fb12422f6caa8ef7acf6e0ad5ac4d300cd01dfe", "0326080fe13d3c41037434f89868a69e11d15580", "22a0fdd14ec069733d15ef7caf7145150e7c32b5", "887eae50cef6fd228c59d47b80887b35782a9a2a", "2a7ca2ed7ff59689af8bd9d9b93b6a9974413c9b", "2e84d658c77869341a5b439f216476607c3f8fea", "0d526d3ed49943b302bbbe6747dd3484c7d706af", "76ff3f01a258575d70cd8bf7646e0daf87c4ebf5", "0aeb21de164e5c4567bfaa7f787fff8b42670429", "38cc07e87baf5600a3301f37f4cdef423c30eb45", "259424f10f76729aa5bd18977f8b92a44a09b308", "dd85a3178c93cdad7102b524f15355a9cc928b00", "2b26cf1eab78d9f9c687073af36769e72aa3cc8b", "09645ab4f68291faf6a79107cdcb6da7f5a9a159", "1c4a5b630bc4e13bd0714132c9d22923c81d5108", "06bcae889904556263b47847080531a848febd73", "83c6e0c91a5e1ca8becb32fdd9bdd95997495535", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "8405ddb312b09905dfe1a7f54f2d846cc34c9abb", "14ee6a52b24d2f6160865871284421a2fbcbb497", "32cc3fd437950a098d6e93ae755fc6571554a955", "1144078fe05a113c02d068962be9d17d0f2b9e53", "751ada5a83c0dc6e5dfc527fff0786b1ffda8a67" ], "paperAbstract": "Black-box accumulation (BBA) has recently been introduced as a building-block for a variety of user-centric protocols such as loyalty, refund, and incentive systems. Loosely speaking, this building block may be viewed as a cryptographic \"piggy bank\" that allows a user to collect points (aka incentives, coins, etc.) in an anonymous and unlinkable way. A piggy bank may be \"robbed\" at some point by a user, letting her spend the collected points, thereby only revealing the total amount inside the piggy bank and its unique serial number.\n In this paper we present BBA+, a definitional framework extending the BBA model in multiple ways: (1) We support offline systems in the sense that there does not need to be a permanent connection to a serial number database to check whether a presented piggy bank has already been robbed. (2) We enforce the collection of \"negative points\" which users may not voluntarily collect, as this is, for example, needed in pre-payment or reputation systems. (3) The security property formalized for \\bbap schemes is stronger and more natural than for BBA: Essentially, we demand that the amount claimed to be inside a piggy bank must be exactly the amount legitimately collected with this piggy bank. As piggy bank transactions need to be unlinkable at the same time, defining this property is highly non-trivial. (4) We also define a stronger form of privacy, namely forward and backward privacy. Besides the framework, we show how to construct a BBA+ system from cryptographic building blocks and present the promising results of a smartphone-based prototypical implementation. They show that our current instantiation may already be useable in practice, allowing to run transactions within a second---while we have not exhausted the potential for optimizations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134071", "https://homepage.ruhr-uni-bochum.de/andy.rupp/papers/bbap_ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/036a89c1652d47ccdde91a5296de7c83042dbac5", "sources": [ "DBLP" ], "title": "BBA+: Improving the Security and Applicability of Privacy-Preserving Point Collection", "venue": "CCS", "year": 2017 }, "037070f1e362b008254f45467c861db0b7406b04": { "authors": [ { "ids": [ "17853037" ], "name": "Ryan G. Scott" }, { "ids": [ "8517341" ], "name": "Omar S. Navarro Leija" }, { "ids": [ "1739688" ], "name": "Joseph Devietti" }, { "ids": [ "31778078" ], "name": "Ryan Newton" } ], "doi": "10.1145/3133897", "doiUrl": "https://doi.org/10.1145/3133897", "entities": [ "Batch processing", "Binary file", "Bioinformatics", "Bioinformatics", "Central processing unit", "Entry point", "Haskell", "Monad (functional programming)", "Parallel computing", "Pipeline (computing)", "Process (computing)", "Sandbox (computer security)", "Scheduling (computing)", "Shared memory", "Software build", "Software system", "System call", "Type system", "Xojo" ], "id": "037070f1e362b008254f45467c861db0b7406b04", "inCitations": [], "journalName": "PACMPL", "journalPages": "73:1-73:26", "journalVolume": "1", "outCitations": [ "452b7f1eb4899fb83d6bc21a180643c4433684bb", "1f33e83905ee40dfeeacd6c04f64c1af71c2b7fb", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "3b62c1f19254820c75dd0011f038d7aae04b3414", "a49c9057fc3912d3e9bea3d6e2cd39e57561cec3", "97a236836489b48b76b0fe455dcbe6978c8e5a4a", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "217fa6474533b7ca0981aaa8600543afb308ab66", "771b52e7c7d0a4ac8b8ee0cdeed209d1c4114480", "7e40209617935569a12a104c354eabf029a3b537", "8723e38978fe1e48c9c219cd6e9bd88d5cd237a8", "5ac46b7c320aabe83eacb1a91c055939c1941dac", "146a0ef96c41beb15182e7e3f48e8d7c25d70b62", "233087525268aa2353b7feb77054a7d5905042c7", "3313e04736e4245abf018fc0799002031fcb7764", "0a8d3007ce2fbd15ee15e7c4440526ad326adcb6", "be7536d9baaef7ccdbff845f8e98c136b4c80bb3", "7021ff3efab9bdfeda591f78e42f54b1482fb39e", "ab2d1fd9a27039cb4dcdf91422ca54e6ca38dbaf", "50edb17bb311757206a60801a25dd56ca2b342dd", "feb5db279d43f6affb474398f96bb5c910aa2340", "498bb6d4bbff79b97695cd65b03a787bf8c4388a", "23affb01412312341fb336943756800c0bf2468c", "2a85b683073c2c8b762079c52a0d54392b243afb", "11fb91cf78700428342aa3ed6636f655bb97ca33", "8a0af8ae748210ef571d074362b552af571e6d33", "127b35b01f4d1186a0707aed4fdd50eb00ae2ea2", "03ad81f6276792a78312471429fc9495b89a1ffc", "1d9e276dc901978f5e0bc6f6d9898b5777d1b86a", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "023e3bcd1c1d374f894836dc7dce688bdb406817", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "0065c8c9bf4961d637a69e26a8045074929a8cd3", "63c46be541fa2f18022d00c3cf15eb5342b00b01", "0d69f96cf1927ea2993608f839752de0314c0347", "55edf8d36576d63851d8f5739e8d0b6b094fe5cf", "060c491cff220d6ad73b6bb9e09e261ca508d7e0", "fcae2fcef595059529ebe553431ab41b44062ae4", "49f3ef5baf15bc044d79c96e3ef19ecf952169ac", "b44a4cfd880ecd47978fda1738479179651304f8", "ca80639f3309251ab3b626da37cc2d700430e1e5", "02f1d9f5ec925dee3f5d483585f6beab0660aa0f", "0db9636ace0830b8b5e86b031a7a86d621446bd9", "7f66a291f885617b1c975c0cae7ad0eb978f2aa5", "f4e10c197040252beeabcd3393c81062e60e7475", "08a7ace62570baad1cb40807c71e7347508ebabf" ], "paperAbstract": "Achieving determinism on real software systems remains difficult. Even a batch-processing job, whose task is to map input bits to output bits, risks nondeterminism from thread scheduling, system calls, CPU instructions, and leakage of environmental information such as date or CPU model. In this work, we present a system for achieving low-overhead deterministic execution of batch-processing programs that read and write the file system—turning them into pure functions on files. \n We allow multi-process executions where a permissions system prevents races on the file system. Process separation enables different processes to enforce permissions and enforce determinism using distinct mechanisms. Our prototype, DetFlow, allows a statically-typed coordinator process to use shared-memory parallelism, as well as invoking process-trees of sandboxed legacy binaries. DetFlow currently implements the coordinator as a Haskell program with a restricted I/O type for its main function: a new monad we call DetIO. Legacy binaries launched by the coordinator run concurrently, but internally each process schedules threads sequentially, allowing dynamic determinism-enforcement with predictably low overhead. \n We evaluate DetFlow by applying it to bioinformatics data pipelines and software build systems. DetFlow enables determinizing these data-processing workflows by porting a small amount of code to become a statically-typed coordinator. This hybrid approach of static and dynamic determinism enforcement permits freedom where possible but restrictions where necessary.", "pdfUrls": [ "http://ryanglscott.github.io/talk-slides/mcdpbp-wonks.pdf", "http://doi.acm.org/10.1145/3133897", "http://ryanglscott.github.io/talk-slides/mcdpbp-oopsla.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/037070f1e362b008254f45467c861db0b7406b04", "sources": [ "DBLP" ], "title": "Monadic composition for deterministic, parallel batch processing", "venue": "PACMPL", "year": 2017 }, "0378a60ebb240f3de23e763ebabe4daef3f02fe8": { "authors": [ { "ids": [ "2871106" ], "name": "Xiaoen Ju" }, { "ids": [ "2704919" ], "name": "Hani Jamjoom" }, { "ids": [ "1730051" ], "name": "Kang G. Shin" } ], "doi": "10.1145/3084446", "doiUrl": "https://doi.org/10.1145/3084446", "entities": [ "Abstraction layer", "Algorithm", "Computation", "Graph (abstract data type)", "Graph state", "Speedup", "Unified Extensible Firmware Interface", "Vertex (graph theory)", "Vertex separator" ], "id": "0378a60ebb240f3de23e763ebabe4daef3f02fe8", "inCitations": [ "d1407cc4a56515e135187a62f934e1156642e568" ], "journalName": "", "journalPages": "5", "journalVolume": "", "outCitations": [ "1521d39088b203ddac981d10d214f463449ae95b", "b1cd460c000d5e9a37f2f4732428d3cf16ff9ffd", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "1e8c283cedbbceb2a56bf962bc0a86fd40f1cea6", "2138776f89bccc9362b239a6d33018ca2a847960", "7b44ab10de53f89890580f0f68717e92ff3225cf", "0521af8f07ba4105a6eebbebd0057a2159c83e2c", "1156f60e40548096df49528b1342bb3e88b0f378", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "0ad8e89091eed09217e66adc98136126addc2619", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "141004dee9e799b40bfaf50b4a72618613137250", "4e4d17113a179174d2711a7b07c6fba0c4fe1c05", "3726c60552263e648c6856679e672de2e1c110e5", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "9359fa64a59105e93dd6ca9f5aa35e0d9f9055be", "423befa4222b5b54cf63f0879e99243b0e5139b0", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "2b9e6181502369199bd89691a27f89bdbaac36e4", "09031aa6d6743bebebc695955cd77c032cd9192f", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "272550f6745acba4da9a10ab29ba738cb2c19d3b", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "047565a5b15fbebc78e0bc7d8ca823237dac9de2", "0546fa6622b8b8db8527be777a692d88c5c037b0", "3486aeaf540c48952120fe853d672af984f40a6a", "0706356c9ab6014d6b04577d38289ea8328291a5", "87f931f4d8aad3b71b8261703bbcfa18c1293181", "04ab17e6944d8d3abf113cdf5495701c6c358448", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "95141db914fb1074304d4075d61ca79c037b771f", "0f014693b25d9846025219b88f8ca480fac68b0a", "22e98d48c4cb573adec6fa875d18d14955113456" ], "paperAbstract": "Despite their widespread adoption, large-scale graph processing systems do not fully decouple computation and communication, often yielding suboptimal performance. Locally-sufficient computation-computation that relies only on the graph state local to a computing host-can mitigate the effects of this coupling. In this paper, we present Compute-Sync-Merge (CSM), a new programming abstraction that achieves efficient locally-sufficient computation. CSM enforces local sufficiency at the programming abstraction level and enables the activation of vertex-centric computation on all vertex replicas, thus supporting vertex-cut partitioning. We demonstrate the simplicity of expressing several fundamental graph algorithms in CSM. Hieroglyph-our implementation of a graph processing system with CSM support-outperforms state of the art by up to 53x, with a median speedup of 3.5x and an average speedup of 6x across a wide range of datasets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084446", "https://kabru.eecs.umich.edu/wordpress/wp-content/uploads/hieroglyph_sigmetrics17.pdf", "http://doi.acm.org/10.1145/3078505.3078589" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0378a60ebb240f3de23e763ebabe4daef3f02fe8", "sources": [ "DBLP" ], "title": "Hieroglyph: Locally-Sufficient Graph Processing via Compute-Sync-Merge", "venue": "SIGMETRICS", "year": 2017 }, "0387a5caf19b887046473cc7dd317da0ea1378a6": { "authors": [ { "ids": [ "28130536" ], "name": "Martin B\u00e4ttig" }, { "ids": [ "1735078" ], "name": "Thomas R. Gross" } ], "doi": "10.1145/3018743.3018747", "doiUrl": "https://doi.org/10.1145/3018743.3018747", "entities": [ "Benchmark (computing)", "Concurrency (computer science)", "Concurrency control", "DACAPO", "Database transaction", "Lock (computer science)", "Naivety", "Overhead (computing)", "Programmer", "Programming language", "Secure by design", "Shared memory", "Side effect (computer science)", "Smart Battery", "Software transactional memory", "Static program analysis", "Transactional memory" ], "id": "0387a5caf19b887046473cc7dd317da0ea1378a6", "inCitations": [ "e45dea6588d1de0a23618e019031e67eedeeee26" ], "journalName": "", "journalPages": "299-312", "journalVolume": "", "outCitations": [ "4b328006a699106fa809cc610b799a2d03bc77a4", "6756d3e0669430fa6e006754aecb46084818d6b6", "23b0d41c138979705494222035cd07fc95a5faf8", "2bf4940710deb2571e93b1c922e8e7452e854afd", "6850ed761f8c2c796019b6359b6190fe6b2d2b42", "d2268a9ae1607965c0cd6a85da6194e630cb9496", "e6961c43facdd83f3efa7bc77bfcb76ac2b1553a", "280b2b0bc4172756001ceda89cd102ba4afbbfe3", "171695dfdb42ea09ea3207f0f5fd11985c02e671", "a10fde829f83a5e5c101de85dd78e330c9c33d1b", "100671bc2d560d42d85f75bdf5deb124cb79d2e2", "5987b948677c5528a061890f4df507c85a5a97b5", "2295c028bb44dfdbf185876d04a0d37fa14a89d8", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "2ac3c4537be12b52f9e60d140ccf5621dc43cb75", "0065c8c9bf4961d637a69e26a8045074929a8cd3", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "ab3f531f3c6e4920c9ba4b437d997c0ce797f5b0", "00aee6e171c40e54ec7eb2240192820871ca5ded", "13e8df0a6afd8565ef644c87f4cb94aa183bc113", "7e40209617935569a12a104c354eabf029a3b537", "a7b5b9d048572577c82461ce3c9330f1875bfaf9", "09ed565e84057123c15ab12b885c235d1f241aed", "34a97a016e6c419eb4b1005a7306d45a775a407b", "4adadc82e4f6db798164438ca655d0fc0252cf17", "76057a3c7b489290afd4a4dccf09b623502619fd", "13f7c5807452ae602046582a385c0fb544ec5de1", "f1e8792d102b260c0b6e2808d416df286121c574", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "46e61ad29ab20618fb551afbc00ebb8eb4e9be21", "25883553e5315e32194614676f11bb012db6dafd", "6f705b791b4b951a273f0c3ced886a52daa8f5aa", "027eb436c35c7e293e7ebc565163cb54c05fe2e9" ], "paperAbstract": "We explore a programming approach for concurrency that synchronizes all accesses to shared memory by default. Synchronization takes place by ensuring that all program code runs inside atomic sections even if the program code has external side effects. Threads are mapped to atomic sections that a programmer must explicitly split to increase concurrency.\n A naive implementation of this approach incurs a large amount of overhead. We show how to reduce this overhead to make the approach suitable for realistic application programs on existing hardware. We present an implementation technique based on a special-purpose software transactional memory system. To reduce the overhead, the technique exploits properties of managed, object-oriented programming languages as well as intraprocedural static analyses and uses field-level granularity locking in combination with transactional I/O to provide good scaling properties.\n We implemented the synchronized-by-default (SBD) approach for the Java language and evaluate its performance for six programs from the DaCapo benchmark suite. The evaluation shows that, compared to explicit synchronization, the SBD approach has an overhead between 0.4% and 102% depending on the benchmark and the number of threads, with a mean (geom.) of 23.9%.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018747" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0387a5caf19b887046473cc7dd317da0ea1378a6", "sources": [ "DBLP" ], "title": "Synchronized-by-Default Concurrency for Shared-Memory Systems", "venue": "PPOPP", "year": 2017 }, "038ec03e66ec8ed2593a4a7481b64e8f2bf1e9df": { "authors": [ { "ids": [ "3029699" ], "name": "Chaoshun Zuo" }, { "ids": [ "40097032" ], "name": "Qingchuan Zhao" }, { "ids": [ "34472423" ], "name": "Zhiqiang Lin" } ], "doi": "10.1145/3133956.3134089", "doiUrl": "https://doi.org/10.1145/3133956.3134089", "entities": [ "Access control", "Authentication", "Authorization", "Best practice", "E-services", "Login", "Mobile app", "Online service provider", "Server (computing)", "Traffic analysis", "User profile", "Vulnerability (computing)" ], "id": "038ec03e66ec8ed2593a4a7481b64e8f2bf1e9df", "inCitations": [ "1961c82250cf02079c34d3f4b990ae8f81c06e15" ], "journalName": "", "journalPages": "799-813", "journalVolume": "", "outCitations": [ "9b8b6ad7c3bbbdec2cb41d95fc8262138607abe2", "23cbb38a3da69c710ea630c417db7e8256ff183a", "129570333e7631456c70354113a43fe6eb193329", "101245256e1a36736045ac9010b0cb5c058ea04f", "587358f81630929d7c03da065cb0804756fc3b6d", "17138b471f2dade960cd3969db0c08b623b33797", "6c57b758334576abb98c703eb013ddb36888fa7f", "9d4ae199975439acb943b3635c7fdb0c6a382910", "81a502b52485e52713ccab6d260f15871c2acdcb", "2be20b92cd27c47c11ad5b8ecf451db84cd768ad", "13e185c42cf59a3ca4db0e47564d17b8f5801a3e", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "2f35c2bf57242f5a755ac82635605100c14319da", "2c5a5a2ab4f7b63523981ac790399c3ef2f08014", "479949999394d7db736d7088a746e5159bee5894", "3c7cf150b5fbbdce6d937eccc1ab05aeb77d0566", "1370b7ec6cb56b0ff25f512bd673acbab214708c", "c285f9f186a68f30dba6274166419a72a810421e", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "0b7f62a2ac217e035e0cd9cb73d2de4fb6135af5", "0fd2467de521b52805eea902edc9587c87818276", "16b2d6f76febe56ac1fed2a9dc266b2409bfb7ed", "56c6ed3ea8eaaa12052636ec37b283d6c797bcbf", "590b1fda209259f3502018bb2dfc4b80191c842e", "51e53b7148cf7387d90f3048f14f721367e283f5", "a73f2dab1e9caae57bbbffe551dcefdf00e43f3e", "482e01ba5d29de96842c3e3daebcbad29945e4c0", "845bdb47a01b5dcb42e3f0a8b57a672e49c813a2", "30af8702c6c9f69a64d176d61784b4d313eb3e26", "03f628dbb91c226011fa11a964025177da7824ad", "2e61fc82bcbdeaa0f8778d51c166e904c04ed34e", "1a7160058a87a2a7dedd2f6e95f25892ec4f3d35", "d5f8c9b83c05e258f1a6b5e2ec3e7687bd5348f0", "4333b14ecaf8cd2b6fcb7f1a8c881ccc104c3778", "313274e8a6ad34c7c24b35979ccea03ce145bfd1", "06edea3041bf20833b8c71396c46357247dc08d8", "1c126c0ddc80c1fa177adb9ef32bdf84e0306846" ], "paperAbstract": "When accessing online private resources (e.g., user profiles, photos, shopping carts) from a client (e.g., a desktop web-browser or a mobile app), the service providers must implement proper access control, which typically involves both authentication and authorization. However, not all of the service providers follow the best practice, resulting in various access control vulnerabilities. To understand such a threat in a large scale, and identify the vulnerable access control implementations in online services, this paper introduces AuthScope, a tool that is able to automatically execute a mobile app and pinpoint the vulnerable access control implementations, particularly the vulnerable authorizations, in the corresponding online service. The key idea is to use differential traffic analysis to recognize the protocol fields and then automatically substitute the fields and observe the server response. One of the key challenges for a large scale study lies in how to obtain the post-authentication request-and-response messages for a given app. We have thus developed a targeted dynamic activity explorer to perform an in-context analysis and drive the app execution to automatically log in the service. We have tested AuthScope with 4,838 popular mobile apps from Google Play, and identified 597 0-day vulnerable authorizations that map to 306 apps.", "pdfUrls": [ "http://www.utdallas.edu/~zxl111930/file/CCS17a.pdf", "http://www.utdallas.edu/~zhiqiang.lin/file/CCS17a-slides.pdf", "http://doi.acm.org/10.1145/3133956.3134089" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/038ec03e66ec8ed2593a4a7481b64e8f2bf1e9df", "sources": [ "DBLP" ], "title": "AUTHSCOPE: Towards Automatic Discovery of Vulnerable Authorizations in Online Services", "venue": "CCS", "year": 2017 }, "03a6e24e5423b4edbd1684779c60f3f70b57a7bb": { "authors": [ { "ids": [ "1709007" ], "name": "Meng Jin" }, { "ids": [ "1683855" ], "name": "Yuan He" }, { "ids": [ "1747903" ], "name": "Xin Meng" }, { "ids": [ "4050884" ], "name": "Yilun Zheng" }, { "ids": [ "2068791" ], "name": "Dingyi Fang" }, { "ids": [ "2466164" ], "name": "Xiaojiang Chen" } ], "doi": "10.1145/3117811.3117828", "doiUrl": "https://doi.org/10.1145/3117811.3117828", "entities": [ "Data rate units", "Denial-of-service attack", "Experiment", "Flip graph", "Graphical model", "Mathematical optimization", "Throughput" ], "id": "03a6e24e5423b4edbd1684779c60f3f70b57a7bb", "inCitations": [], "journalName": "", "journalPages": "275-287", "journalVolume": "", "outCitations": [ "760e8ea2cbe5750db0aee37ca36925955e8bef79", "05fe031e53dd8990e7076a91277cb2b74e22b811", "498d2ed40427eeb78799fa96ac0f5a58c6648d05", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "05eca7b08c495020d499716fb90a37ba0715f7ff", "644e1494176c0ff33fba8f745087a56cafa1ecaf", "03ca2b2494aa4977bfef1d30d314490feb68760c", "1413b78c713429ea00dbd70a49e0d2e606a82be9", "2b3aabf4173e515a6e9bbc3410cd5dd9c87549ba", "1c581311f18c251a5c39ac195e6265e52d639bfe", "0847978726acaaa27aca91f2053350b818f1fe53", "82802e411495bbad77fa2415c6d4633dde180764", "3c3a0a0ef5ff52d9de1e45b88e9228d0972d2b45", "2d12b6189a0681b933f9a96b8ab14daac2bcfd73", "015ce3f823dac9e78ab3ff1f63e67e5a00145ac6", "0c9b68449b6241478ba38c2af220b393db86e206", "1879bf3d2e843155056344a8f6a6cd27b10e0668", "9c84d7af3db4fc90e872556c936953aae48ea1a1", "61276c0d646510e4665246ff7504d98d257cbbc2", "a361f606d62e0be40df91b143f7f7086d0b249d4", "4618dc76f77c3d09510f0530f1805dd7702f5b5c" ], "paperAbstract": "With parallel decoding for backscatter communication, tags are allowed to transmit concurrently and more efficiently. Existing parallel decoding mechanisms, however, assume that signals of the tags are highly stable, and hence may not perform optimally in the naturally dynamic backscatter systems. This paper introduces FlipTracer, a practical system that achieves highly reliable parallel decoding even in hostile channel conditions. FlipTracer is designed with a key insight: although the collided signal is time-varying and irregular, transitions between signals' combined states follow highly stable probabilities, which offers important clues for identifying the collided signals, and provides us with an opportunity to decode the collided signals without relying on stable signals. Motivated by this observation, we propose a graphical model, called one-flip-graph (OFG), to capture the transition pattern of collided signals, and design a reliable approach to construct the OFG in a manner robust to the diversity in backscatter systems. Then FlipTracer can resolve the collided signals by tracking the OFG. We have implemented FlipTracer and evaluated its performance with extensive experiments across a wide variety of scenarios. Our experimental results have shown that FlipTracer achieves a maximum aggregated throughput that approaches 2 Mbps, which is 6x higher than the state-of-the-art.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117828" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03a6e24e5423b4edbd1684779c60f3f70b57a7bb", "sources": [ "DBLP" ], "title": "FlipTracer: Practical Parallel Decoding for Backscatter Communication", "venue": "MobiCom", "year": 2017 }, "03a7ecb0a3f1eff43ef7db66991ca37c7189d81c": { "authors": [ { "ids": [ "1898809" ], "name": "Shaizeen Aga" }, { "ids": [ "1678884" ], "name": "Satish Narayanasamy" } ], "doi": "10.1145/3079856.3080232", "doiUrl": "https://doi.org/10.1145/3079856.3080232", "entities": [ "Cryptographic primitive", "Cryptography", "Encryption", "Memory bandwidth", "Memory bus", "Merkle tree", "Oblivious ram", "Overhead (computing)", "Processor design", "Random-access memory", "Replay attack", "Side-channel attack", "Timing channel" ], "id": "03a7ecb0a3f1eff43ef7db66991ca37c7189d81c", "inCitations": [ "9d0c7a61c47d0db3181408ffdde5f140a5e07c0f", "56ad278ca41d14386d558f259f6a8b98ae6e86d1", "fcf8efb59680ef79bcca894947aa46578d2bbd8c", "d9a6cfad15c6268b30da1f5b45f720b96ead1805", "0d8952e0a65caf480228ede7e632201d5420e7b7", "a6994ee043e174871983386d6a78a3f3be6c09da" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "94-106", "journalVolume": "", "outCitations": [ "5c40cdb6386021d68288e7158e1330ad3b8c223e", "8c47b8884af7798ece246b9c561a35c5425aae2d", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "8f1247646e29e07dddbec698f281d06cee87acbe", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "2200640161a8fe6ce3a03c7bad586e890f10679f", "67f881ebc6df47f140dbf99308f1846851a9b434", "487e6a85d55c3adbffcd3ce8032b150e90a25bf0", "07b0b5d59ef09f33a40f30d3a2dec880029a5002", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "00ab25c6582d543932fccbb0f15fe93445f95d61", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "4ce02fb69245a84d3ffceae20e596dcf0497508d", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "56ad278ca41d14386d558f259f6a8b98ae6e86d1", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "3c4e907c07944cd55e800b4e55918adf8cb2a683", "1d08bb92568d98319634fe2409a9eab085d68b60", "2835808d700c88459ff21ce31ba3c4ef02778ddb", "078b855c40fefabd766a09f23280c59feef21634", "352a8957005dc5519b15ed1870751ec494d66395", "114a4222c53f1a6879f1a77f1bae2fc0f8f55348", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "570466e82fce883daef38047ff694c084fdcadec", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "8a41c198449d0f30de5427fe753c6b10bbb7255d", "5481a090d11f655a3b240dbcdb4f2133f4028c14", "0eed5ca9fa62cf2008fa6e1bb0d729e510363a9a", "a6cc2def07a1880a81003449e0f0f901da597b18", "07272e31fb957e026a6bc36d55e412de26843c7f", "6f45ab51391f1f0010ad54c06a47abaf208a2396", "3b03935dfc89c0cad63e05976c21fef6c9fb4190", "2b3cdf37bff57e29fb5aecc136603f16c855366b", "8b04ea524cb6ced72868c120a00c4679d84be006", "201213b124452451cd6f4f06bb94523aa861a60c", "43dcd30e653b6a66efe18b78a9eed9c3bdeaaf23", "a52945840b980adfef34466cb4186c7cda3b61e6", "da42f505e5d9bf6e932adfe24677f12457da22f3", "19218913ef99ba9acd2491d8bab1d154cb375fa3", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "3ca369fa2cadb403db7ac5e75deefd9acbb10723", "0a679d9d08231b2856fe648e6b331d8e6e46a1fa", "5e4e0daea223658f8c96d7728bd32398680ebef3", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "21ddf1f7ab7e2cd2ae07073bf3238ce46314bac9", "8f91eafaab7bd4803e0d064280a86df693674011", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "2065450d96aca38c79cad5172b58660765533650", "0be09034895068cd359e93e0fbf4f61d6189974c" ], "paperAbstract": "A practically feasible low-overhead hardware design that provides strong defenses against memory bus side channel remains elusive. This paper observes that smart memory, memory with compute capability and a packetized interface, can dramatically simplify this problem. InvisiMem expands the trust base to include the logic layer in the smart memory to implement cryptographic primitives, which aid in addressing several memory bus side channel vulnerabilities efficiently. This allows the secure host processor to send encrypted addresses over the untrusted memory bus, and thereby eliminates the need for expensive address obfuscation techniques based on Oblivious RAM (ORAM). In addition, smart memory enables efficient solutions for ensuring freshness without using expensive Merkle trees, and mitigates memory bus timing channel using constant heart-beat packets. We demonstrate that InvisiMem designs have one to two orders of magnitude of lower overheads for performance, space, energy, and memory bandwidth, compared to prior solutions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080232" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03a7ecb0a3f1eff43ef7db66991ca37c7189d81c", "sources": [ "DBLP" ], "title": "InvisiMem: Smart memory defenses for memory bus side channel", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "03a97821f3f77490f1c775501762985f10cd7be8": { "authors": [ { "ids": [ "1933752" ], "name": "Christoffer Dall" }, { "ids": [ "2175344" ], "name": "Shih-Wei Li" }, { "ids": [ "1700208" ], "name": "Jason Nieh" } ], "doi": "", "doiUrl": "", "entities": [ "ARM architecture", "Architecture of Windows NT", "Hypervisor", "Kernel (operating system)", "Linux", "Linux", "Multiplexing", "Operating system", "Protection ring", "Virtual machine", "X86", "X86 virtualization", "z/OS" ], "id": "03a97821f3f77490f1c775501762985f10cd7be8", "inCitations": [ "a027a5a5d021f8236678d730c74a71ab43ffebd3" ], "journalName": "", "journalPages": "221-233", "journalVolume": "", "outCitations": [ "905b27d6fe624a28fd6fdd04cf7139333e052030", "067c7857753e21e7317b556c86e30be60aa7cac0", "611cce8f8236c1de04c3217f4341c9e03cd8a1eb", "0852a44c86db434e9b51c67704636791e9940487", "71a2d8c473f13d0c664f751db97e81128281b1eb", "7de2ed992aae322333c14e4ffad5b347f7a7016a", "5016dedcbc51faec5f0aa0b5303a4e96c6e669de", "07ebe9df86f0e6eb19fcdd03bbe9dd7f64ff887f", "44d666999ca078e0fce6b5f2642a1c3e72ac87a1", "0e003ed084cf22fff2cbb2d1a1b57894b7415a0c", "423455ad8afb9b2534c0954a5e61c95bea611801", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "5e7567dc5c9922527e7ce5e4fd62981488a09829", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "5b09fc2403507383e4000139ab845c67cf549675", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a" ], "paperAbstract": "Modern hypervisor designs for both ARM and x86 virtualization rely on running an operating system kernel, the hypervisor OS kernel, to support hypervisor functionality. While x86 hypervisors effectively leverage architectural support to run the kernel, existing ARM hypervisors map poorly to the virtualization features of the ARM architecture, resulting in worse performance. We identify the key reason for this problem is the need to multiplex kernel mode state between the hypervisor and virtual machines, which each run their own kernel. To address this problem, we take a fundamentally different approach to hypervisor design that runs the hypervisor together with its OS kernel in a separate CPU mode from kernel mode. Using this approach, we redesign KVM/ARM to leverage a separate ARM CPU mode for running both the hypervisor and its OS kernel. We show what changes are required in Linux to implement this on current ARM hardware as well as how newer ARM architectural support can be used to support this approach without any changes to Linux other than to KVM/ARM itself. We show that our redesign and optimizations can result in an order of magnitude performance improvement for KVM/ARM, and can provide faster performance than x86 on key hypervisor operations. As a result, many aspects of our design have been successfully merged into mainline Linux.", "pdfUrls": [ "http://www.cs.columbia.edu/~cdall/pubs/atc17-dall.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/dall", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_dall.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-dall.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/03a9/7821f3f77490f1c775501762985f10cd7be8.pdf", "s2Url": "https://semanticscholar.org/paper/03a97821f3f77490f1c775501762985f10cd7be8", "sources": [ "DBLP" ], "title": "Optimizing the Design and Implementation of the Linux ARM Hypervisor", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "03b95c1c6859ce4f792ac6995137a6cfab60670c": { "authors": [ { "ids": [ "2388951" ], "name": "Rajiv Nishtala" }, { "ids": [ "2410518" ], "name": "Paul M. Carpenter" }, { "ids": [ "1771770" ], "name": "Vinicius Petrucci" }, { "ids": [ "1767107" ], "name": "Xavier Martorell" } ], "doi": "10.1109/HPCA.2017.13", "doiUrl": "https://doi.org/10.1109/HPCA.2017.13", "entities": [ "64-bit computing", "ARM architecture", "ARM big.LITTLE", "Data center", "Dynamic frequency scaling", "Dynamic voltage scaling", "Experiment", "Frequency scaling", "Heuristic", "Memcached", "Quality of service", "Reinforcement learning", "Task manager" ], "id": "03b95c1c6859ce4f792ac6995137a6cfab60670c", "inCitations": [ "b6263576b4477fe3b5a86b1f18ec0949b8f52517" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "409-420", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "41b380539d15a733e78c2b29388ffa8bef4bb370", "08632fe2b934ed15d3499e7321282c81adc2c390", "3000e77ed7282d9fb27216f3e862a3769119d89e", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "da94b4bf79fcb32a3e24da9b152c1fd7efb199f5", "72ee099a3b228972d6abba328ce9100892daf151", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "362d884ff43d8c7cd6bce184944cfc04cdd57c18", "15dc663b6761d53e90415427d5a24cce1e0e38da", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "27f8ac77b89986f7a24f929b200b6a358b8f7d01", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "0d683085618e654a173b3590c4d2b431569cbfb6", "053825c0a1c111e76c18f28b6d8ae13b414f3bed", "2db9bcd369e59837278be7e6ffb4c4a96b24fc35", "17f820491ffb223d553a9efb73933abfd3db67c1", "345803a2146d4906f3f3808841e8af136e5f38e8", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "7a978f2902460e732c50c36a171deb11733df1fc", "0a8cfe6bf63530d9ee402a6a6e1a7666008e43b7", "45ee540d3b9b16ed9b5ad6ee034f3779b9561a73", "2ea6e3243c9aa5d9910cf44c4f0e18002bf01638", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "27c66ba59c76e737f863ba05b7099ad5788af836", "23265cad4d3f6dd2db1d0f5e58286f3ea98175af", "f125b540d7453eb58d38f933588f4b80c80959f2", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "30c5b89ef93b564781b9a7b8f03be0056d926876", "54754cbd5011c059af8358b162ffd9ffbcb51f39", "1ecd36058e48734213c81728f42ff798a2c52833", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "269c24a4aad9be622b609a0860f5df80688c2f93", "0e7148699994155cf8afae0ed943812fbb4f4b7f", "31fb2b92f92968fcd60112f86b2201e874cfba19", "167c651a235cf567ee8ca19b8d0e4d2f19e01b42", "64e7cbb46ecb25f8c00fc58b5b4ab4e4091369b6" ], "paperAbstract": "In 2013, U. S. data centers accounted for 2.2% of the country's total electricity consumption, a figure that is projected to increase rapidly over the next decade. Many important workloads are interactive, and they demand strict levels of quality-of-service (QoS) to meet user expectations, making it challenging to reduce power consumption due to increasing performance demands. This paper introduces Hipster, a technique that combines heuristics and reinforcement learning to manage latency-critical workloads. Hipster's goal is to improve resource efficiency in data centers while respecting the QoS of the latency-critical workloads. Hipster achieves its goal by exploring heterogeneous multi-cores and dynamic voltage and frequency scaling (DVFS). To improve data center utilization and make best usage of the available resources, Hipster can dynamically assign remaining cores to batch workloads without violating the QoS constraints for the latency-critical workloads. We perform experiments using a 64-bit ARM big.LITTLE platform, and show that, compared to prior work, Hipster improves the QoS guarantee for Web-Search from 80% to 96%, and for Memcached from 92% to 99%, while reducing the energy consumption by up to 18%.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.13", "http://upcommons.upc.edu/bitstream/handle/2117/105074/Hipster+Hybrid+Task+Manager+for+Latency-Critical+Cloud+Workloads.pdf;jsessionid=B0D19B7F4661035B229E059813DA7945?sequence=1" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03b95c1c6859ce4f792ac6995137a6cfab60670c", "sources": [ "DBLP" ], "title": "Hipster: Hybrid Task Manager for Latency-Critical Cloud Workloads", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "03c7a20c919dd3b6996124a96b199b0b2836d462": { "authors": [ { "ids": [ "12898292" ], "name": "Thang Cao" }, { "ids": [ "1730584" ], "name": "Wei Huang" }, { "ids": [ "40498718" ], "name": "Yuan He" }, { "ids": [ "1683736" ], "name": "Masaaki Kondo" } ], "doi": "10.1109/IPDPS.2017.19", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.19", "entities": [ "Computer cooling", "Java HotSpot Virtual Machine", "Job scheduler", "Jumpstart Our Business Startups Act", "Location awareness", "Next-generation network", "Scheduling (computing)", "Simulation", "Supercomputer", "Throughput" ], "id": "03c7a20c919dd3b6996124a96b199b0b2836d462", "inCitations": [ "d206e9a132b7eb00840b47da84e2960b720065e3" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "728-737", "journalVolume": "", "outCitations": [ "9ba533adf4776c0a708d2f5a2431ce2ab35bf915", "3583241ac041f60b845395b13ce0d90caf41f49f", "bb58f3858c937d6769ea8a3b6fc02e04a6521e82", "239e046347d5075b3eeef5439050e9f2ca760b7b", "075cac76a487db1c11751f340ada8cd59e1e2017", "8303554a48d900acf0a432fe06e48d48c5962601", "7d21404a90d7bf9b75c140bc0b6546551bd91979", "4c059a8900d24058c9cb27b85df96cc430a79970", "30a001817f503fa9aa46b01e6dbde887e94cfc3d", "208a5e499a2836effd9d15c2ff867cf5697796ac", "494c4c60ab265415d29fd378583e1e295f20bcfe", "6318fbbde6eb3cc0175b7fb1856b7dd116b8b710", "2ca6b56fd65b4fa486d754af55e19771f56a3b60", "3b43f4fca2fcfd7f351ccd78076032b312b52221", "35e646293776581b01700c1d2d5ac4885a9d56b9" ], "paperAbstract": "Limited power budget is becoming one of the most crucial challenges in developing supercomputer systems. Hardware overprovisioning which installs a larger number of nodes beyond the limitations of the power constraint is an attractive way to design next generation supercomputers. In air cooled HPC centers, about half of the total power is consumed by cooling facilities. Reducing cooling power and effectively utilizing power resource for computing nodes are important challenges. It is known that the cooling power depends on the hotspot temperature of the node inlets. Therefore, if we minimize the hotspot temperature, performance efficiency of the HPC system will be increased. One of the ways to reduce the hotspot temperature is to allocate power-hungry jobs to compute nodes whose effect on the hotspot temperature is small. It can be accomplished by optimizing job-to-node mapping in the job scheduler. In this paper, we propose a cooling and node location-aware job scheduling strategy which tries to optimize job-to-node mapping while improving the total system throughput under the constraint of total system (compute nodes and cooling facilities) power consumption. Experimental results with the job scheduling simulation show that our scheduling scheme achieves 1.49X higher total system throughput than the conventional scheme.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03c7a20c919dd3b6996124a96b199b0b2836d462", "sources": [ "DBLP" ], "title": "Cooling-Aware Job Scheduling and Node Allocation for Overprovisioned HPC Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "03db8959fce80db50c3ed7f7c3b30ff750cdc870": { "authors": [ { "ids": [ "1689181" ], "name": "Yong Li" }, { "ids": [ "2674237" ], "name": "Sven Sch\u00e4ge" } ], "doi": "10.1145/3133956.3134006", "doiUrl": "https://doi.org/10.1145/3133956.3134006", "entities": [ "Cognitive dimensions of notations", "Cryptographic hash function", "Cryptographic primitive", "Cryptography", "Denial-of-service attack", "Key exchange", "Standard-definition television" ], "id": "03db8959fce80db50c3ed7f7c3b30ff750cdc870", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "818", "journalVolume": "2017", "outCitations": [ "f2bfed32c28b25076ceaa4a8897f050932ea9c88", "2eab52ed3761f8946a3adc368c9de3560743ede9", "6bc308af54fe8c71993d08c9d796947eb2cfc6f2", "1a79a3efec3d4a177bae7326dd75e33cf362120d", "3b84eb981b8940147a2fdad8a95ffa92b1ed8674", "41cce5971ffad66f1c6dd7353e0b5a5763f80b0e", "3a5352992cf04969557477bebecdaeaf23d5b730", "851cd7f2924bf302e93c7aebfed0469954704cd2", "e143f19ae0423e82564afb41ecf51bf8dc17cc1f", "683c8f5c60916751bb23f159c86c1f2d4170e43f", "5426706f5c9ec33d4df9fabce473d4aaaa175e67", "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "27c4eefbdc3ecee1238807ee454a3bcfbdc3b694", "d030862b5ab53b3fad21e1f48733b78d4a6e35b2", "1668d381aafb2c23ebe84c3e91517f6af369d5ae", "021fe0d3dd74fdd2db57c2af510d99ddf7a59d10", "3cb2009ebfa2c83ab0e26bd8c0e8e7855e535a08", "48cfd40ed728f6c33d177e670c51b436225500f6", "ab39ac59408ad25186d0c1854ba7cf0e0ca69c36", "0a45dc987c8265115bedb2fccf574f38e04d4ca6", "74e0e22cdefb11a49dac487c3151dcc19deca0cb", "14b6054c19faa224e40b4987eaac7518f8f76955", "0439891cdb1eb280b83c910e27b3a3be9192e902", "1471e355a9ed0857687bda13e7694d5353f9715e", "b42ae750455f7ced510cdfc82d6b2694a065ac3a", "233e3eb23bb3e882d221474e992afe944f1787d5", "380d4e7d272f1682d699cb3058094a56b9e76b66", "2d8a132fd622b6b8e46507911f7ab24cbd37e667", "2abb585d8263d6048d04fc00ed2782c79722b50c", "bde332de7397463a2c641c9983eead2267a2143c", "54113d65b26940b290c1fe3f6324e012b3ae77d6", "ade75bd84df51da989efef5a5cd20f9c0fa18b74", "4de5bb864c8f7dc3896ec2d0a9991a65b7bec831", "5b269f67ca847ab27392063f6959917a1f22560c", "d0078077f100e27516af114e33a9d375fcfeee9a", "43c046c3f3b78bec2b528d45b3ded4bb0046d426", "de9dc621472717950511cee953f6a94736f3c177", "517d4d45013e3b040cec89ba1cffbd4a7eb0122d", "c3ebbc2e3ac42b5bdf97f67a22ba4b656593a0a4", "4b2dab634a6af740eb41e792cea3d04c1a3542b1", "a28ec078e8414d6bb509cc90db7a5c5f47a97c46", "2e149d969293bef42eb113af644c898c8531dc06", "fe022d3fe9366a495ebd73cce3a3e9a214c157a3", "4e055184f0d2362bd649880146e942ef41fb47f9" ], "paperAbstract": "An essential cornerstone of the definition of security for key exchange protocols is the notion of partnering. The de-facto standard definition of partnering is that of (partial) matching conversations (MC), which essentially states that two processes are partnered if every message sent by the first is actually received by the second and vice versa. We show that proving security under MC-based definitions is error-prone. To this end, we introduce no-match attacks, a new class of attacks that renders many existing security proofs invalid. We show that no-match attacks are often hard to avoid in MC-based security definitions without a) modifications of the original protocol or b) resorting to the use of cryptographic primitives with special properties. Finally, we show several ways to thwart no-match attacks. Most notably and as one of our major contributions, we provide a conceptually new definition of partnering that circumvents the problems of a MC-based partnering notion while preserving all its advantages. Our new notion of partnering not only makes security definitions for key exchange model practice much more closely. In contrast to many other security notions of key exchange it also adheres to the high standards of good cryptographic definitions: it is general, supports cryptographic intuition, allows for efficient falsification, and provides a fundamental composition property that MC-based notions lack.", "pdfUrls": [ "http://eprint.iacr.org/2017/818", "http://doi.acm.org/10.1145/3133956.3134006", "https://eprint.iacr.org/2017/818.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03db8959fce80db50c3ed7f7c3b30ff750cdc870", "sources": [ "DBLP" ], "title": "No-Match Attacks and Robust Partnering Definitions - Defining Trivial Attacks for Security Protocols is Not Trivial", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "03df9cfd3eb3c4f8a4230c8ee389ccd74dcb8ac9": { "authors": [ { "ids": [ "2580938" ], "name": "Ivy Bo Peng" }, { "ids": [ "1695375" ], "name": "Roberto Gioiosa" }, { "ids": [ "1746771" ], "name": "Gokcen Kestor" }, { "ids": [ "1758463" ], "name": "Erwin Laure" }, { "ids": [ "2279799" ], "name": "Stefano Markidis" } ], "doi": "10.1109/ICPP.2017.9", "doiUrl": "https://doi.org/10.1109/ICPP.2017.9", "entities": [ "Admissible numbering", "Cray XC40", "Dataflow", "Dataflow programming", "Decoupling (electronics)", "Memory-mapped I/O", "Parallel computing", "Performance Evaluation", "Pipeline (computing)", "Programming paradigm", "Scalability", "Speedup", "Supercomputer" ], "id": "03df9cfd3eb3c4f8a4230c8ee389ccd74dcb8ac9", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "1-10", "journalVolume": "", "outCitations": [ "14e0d2bdfb3fca202b3fc0e19a12d3082f81b931", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "cbf8acf187297b22bf189cd057b9495f90bd973b", "721c5be47c923d9c0303a3eefd3d42a57e0add03", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "050b6a5f0e650a12223c27fb133eb5e398df8480", "843851b2f836537d627cb318416c688e88613339", "d42a29e6977c28f7bf23d63b00c48f2e9100403e", "1614a2964f0d655e840399e3d458a8836b700ad9", "275d613b52965edcb20483a5d3fc6a5122d6e4fb", "27f942d376a3e25f46ddd236b3deef1653cf737e", "0541d5338adc48276b3b8cd3a141d799e2d40150", "d157f24150fa5ffe5a065418331cf8951dbe8910", "0c205f91402984905e1bcf5f05f973c5588c1325", "cb11f346bb41adf8c5d89d91ad106beb3091ffb1", "08937c92f31895e16af48de1c7d18eeceef11f6f", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "52e684dbec39ec87883cb1e16b8e1ca15cbcacdc", "71a4bacd57f43f927f0b9dc07b9e53b0f166fe67", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "94ff8cd9e59ec747bdad91835f089a33819c0cb5", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a" ], "paperAbstract": "Production-quality parallel applications are often a mixture of diverse operations, such as computation- and communication-intensive, regular and irregular, tightly coupled and loosely linked operations. In conventional construction of parallel applications, each process performs all the operations, which might result inefficient and seriously limit scalability, especially at large scale. We propose a decoupling strategy to improve the scalability of applications running on large-scale systems. Our strategy separates application operations onto groups of processes and enables a dataflow processing paradigm among the groups. This mechanism is effective in reducing the impact of load imbalance and increases the parallel efficiency by pipelining multiple operations. We provide a proof-of-concept implementation using MPI, the de-facto programming system on current supercomputers. We demonstrate the effectiveness of this strategy by decoupling the reduce, particle communication, halo exchange and I/O operations in a set of scientific and data-analytics applications. A performance evaluation on 8,192 processes of a Cray XC40 supercomputer shows that the proposed approach can achieve up to 4x performance improvement.", "pdfUrls": [ "https://arxiv.org/pdf/1708.01304v1.pdf", "http://arxiv.org/abs/1708.01304", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03df9cfd3eb3c4f8a4230c8ee389ccd74dcb8ac9", "sources": [ "DBLP" ], "title": "Preparing HPC Applications for the Exascale Era: A Decoupling Strategy", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "03e82961baf648be80bfe8406660377b0b1feca7": { "authors": [ { "ids": [ "1775574" ], "name": "Mario Alviano" }, { "ids": [ "1786782" ], "name": "Michael Morak" }, { "ids": [ "1771740" ], "name": "Andreas Pieris" } ], "doi": "10.1145/3034786.3034794", "doiUrl": "https://doi.org/10.1145/3034786.3034794", "entities": [ "Analysis of algorithms", "Chase (algorithm)", "Directed acyclic graph", "Existential quantification", "Logic programming", "Negation as failure", "Operational semantics", "Polynomial", "Polynomial hierarchy", "Query language", "Skolem normal form", "Stable model semantics", "Tuple-generating dependency" ], "id": "03e82961baf648be80bfe8406660377b0b1feca7", "inCitations": [ "9ddcca5760ad3885ea3053df78519494c04eee2f" ], "journalName": "", "journalPages": "377-388", "journalVolume": "", "outCitations": [ "09c1d67941c6f59e9d31f5a1cdbbc2538c572992", "a59c55fb92e4563392c9a8231d08626a95ed3980", "9377d3c17d7b76294928b5249f8fa065ed19ea06", "5351ee12cf6b2f13c4baee30ab9137b1977c00f9", "18679988622767cefb123d63219b42e4bd1e4b9b", "d23fafd9586a759729e134134fd311fef0dbdde9", "4de92b8695ef75d438344b60a73fea0ffa56de1a", "5fddfb7234b8e6ea4fb17c3efb70bc3eb31ac098", "b49a3aa915ac2d21262406bbe1d3c0071fe39372", "304c5b3b9bb0bb1791a17bef37aebf46e238bcba", "07952e90071960db663d05513b9490b9df9f98f3", "4cdf3930fabf148fae7b82a9676bd03660372023", "10c54b46651cb25bf57ff20d779c98eb8d393130", "5e6f3a3b472d7abf4783c1714f44a14395190571", "f7011491210da1400d46e04f9704dc4f9f9568bc", "08fce40b11fc943d41a8d06f164d0c99a3f6fd18", "0d491b3378ceebddaf6f76b123d2a103a342e88d", "af21def975d12de6bf3d92570f487235c89966e5", "466a0b7cffa32a62ca3fe8db8fd65f363a3a6463", "21df57c55c00d44b8ab235c230d58b17a6637466", "205b5fb66c8d38420595ab0769c3e00a1910486b", "88d4caf8a32673af4d810e17f9fd912cb4687867", "100883322e86a09a7b66c9e291e44fce2074c126", "0637102d5c918e2496c7607155c896321a3218ca", "930f85b466412d3447dd84e1b9db0f5eb7f2b733", "1ccbe32f1f4de198c204b9786809e079f500fc5d", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "65298a45b07dbe81bd7ff297b647688e3322e3b8", "c7d39887aa9283e61ffaa5827244525dee598c96", "2e29ef2217a27c088f390579bfe0d3cf2878fe1b" ], "paperAbstract": "Normal tuple-generating dependencies (NTGDs) are TGDs enriched with default negation, a.k.a. negation as failure. Query answering under NTGDs, where negation is interpreted according to the stable model semantics, is an intriguing new problem that gave rise to flourishing research activity in the database and KR communities. So far, all the existing works that investigate this problem, except for one recent paper that adopts an operational semantics based on the chase, follow the so-called logic programming (LP) approach. According to the LP approach, the existentially quantified variables are first eliminated via Skolemization, which leads to a normal logic program, and then the standard stable model semantics for normal logic programs is applied. However, as we discuss in the paper, Skolemization is not appropriate in the presence of default negation since it fails to capture the intended meaning of NTGDs, while the operational semantics mentioned above fails to overcome the limitations of the LP approach. This reveals the need to adopt an alternative approach to stable model semantics that is directly applicable to NTGDs with existentially quantified variables. We propose such an approach based on a recent characterization of stable models in terms of second-order logic, which indeed overcomes the limitations of the LP approach. We then perform an in-depth complexity analysis of query answering under prominent classes of NTGDs based on the main decidability paradigms for TGDs, namely weak-acyclicity, guardedness and stickiness. Interestingly, weakly-acyclic NTGDs give rise to robust and highly expressive query languages that allow us to solve in a declarative way problems in the second level of the polynomial hierarchy.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/31832886/PODS_17_1.pdf", "http://doi.acm.org/10.1145/3034786.3034794" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03e82961baf648be80bfe8406660377b0b1feca7", "sources": [ "DBLP" ], "title": "Stable Model Semantics for Tuple-Generating Dependencies Revisited", "venue": "PODS", "year": 2017 }, "03e89626cbb864fb1243b4ee8b4037020a9250eb": { "authors": [ { "ids": [ "2661057" ], "name": "Andrew Ferraiuolo" }, { "ids": [ "1919355" ], "name": "Andrew Baumann" }, { "ids": [ "3095589" ], "name": "Chris Hawblitzel" }, { "ids": [ "2291514" ], "name": "Bryan Parno" } ], "doi": "10.1145/3132747.3132782", "doiUrl": "https://doi.org/10.1145/3132747.3132782", "entities": [ "ARM architecture", "Address space", "Assembly language", "Concurrent computing", "Confidentiality", "Correctness (computer science)", "Encryption", "High- and low-level", "Komodo Edit", "Microcode", "Microkernel", "Operating system", "Protection ring", "Software deployment", "User space", "X86" ], "id": "03e89626cbb864fb1243b4ee8b4037020a9250eb", "inCitations": [ "8569785f80712b5787e12b86a3870a28c0182b2c", "85f1cbe69b6dc7a78b04fe62d8ce821714326652", "55eab01bb2d10a2fdc38fa4a7403c0c96f66e5cd", "788b9e288c8db9decbbb2668fdee3737e386e143" ], "journalName": "", "journalPages": "287-305", "journalVolume": "", "outCitations": [ "07fa3cf4e7be333b8a862c8859e36ea4ff42a8e0", "30f52a79ff53f8969ffcba19013b4a43e629875f", "78b872aa7453aeaa8803d1fef9f110387ee23420", "69b7456f3d47fed3745239b5f67996a0b9a1a5c9", "4c891cc807e701ba31a378a1e672d26bbac22cdc", "7a5cf32d06c3b2e4f27bee372a53bdc2e8fcfbce", "4ce02fb69245a84d3ffceae20e596dcf0497508d", "05b85222bd229a7fc6774fc687ffddacf6445780", "194f7d8647009dea5f4867ae27d340c84c46f51b", "d296252ddf0e2c6b7422008d703843c1863bd15b", "42e5216f57b17bb7dba13a2b73e36b4c057a6c96", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "1fb49ae43195232f0b3d1c9d534a5aa03bdd8f26", "4a21b985d8c33977876968359de7d361ba55e208", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "693770a65bf0183c9bca10e5fde5e3848bbbb40c", "3133c223a3ae8a740dee4a47363231d3c3160b16", "109c2e2a5d61c22b7c00c543c18a5252da130c3d", "9dcb7c2ba8d55629c854f0bf89c759ad656a9088", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "eeb5ec8d23124c4b352aa4168cb03f87f9480c92", "3367eaf02789f5dcf741318fcc18c0dea8fcbb76", "3c4e907c07944cd55e800b4e55918adf8cb2a683", "30ba0dd406a6f22e2ff30a0bfd7d1377e672c1ba", "41c2c11acde144ccf62cb6eff30731195d22775b", "01fde8698110cf46ff48a17c65f2658dab4c323c", "5693c2a2c52f4905638559b2fc2b76c975806175", "9396371baa0f755a6e766c12eb102a97a3bc5562", "36222f8eb2ccf21ca345e15186cea64506581543", "7c5d25c73dddcb8ad07c7d29f3a2d97437a3123c", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "04c9a788aaafe449680e85f90a65540913e13275", "3457738844208e1b4aeb2f1a0971ec39216e3000", "477bbcb5655a9c64893207bb49032e87c06a05f2", "1bd2d9fb62832737735d011154834b7c80c7e50a", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "0d3c49f0d6743b03615bfcf546b5d015d32d4035", "b4b7f7a6c668be9d966567a2de9a50eb83986fd5", "15aaa56f06eca80760943e47f1781591209f2860", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "2b3cdf37bff57e29fb5aecc136603f16c855366b", "08c2649dee7ba1ab46106425a854ca3af869c2f0", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "0cff564ecbf954a61327a944f605019ae38a0da5", "4271680ae4d95b130426e165ad9e9d9b81d938cd", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "17886b4911ffd50d7e02a574caad34a286458b3a", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "04d6f78e14a92fa72bcefc206c24b2df7b27e5e6", "c5dc96463e5ad4378277550f95aa86ee070d93ef", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "2817df10c4ffe29482928cb97b8ee89d8560b4cd", "082a3d4886a28e046c92796f86dd7ec7f7e77d25", "3f6f619fea4e9241d9fa5d39be4e985757e571de", "0a289fd7b14345822b1acda6d82750b15d59663e", "c9d48b2ad9ae24d1028a6d7be22e66d867788a3c", "c75f723c4a55bd30ee64f7c0d65560011c7b2f95", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "2b6df21137f30d25494bb58521a6062f93e915f8", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824" ], "paperAbstract": "Intel SGX promises powerful security: an arbitrary number of user-mode enclaves protected against physical attacks and privileged software adversaries. However, to achieve this, Intel extended the x86 architecture with an isolation mechanism approaching the complexity of an OS microkernel, implemented by an inscrutable mix of silicon and microcode. While hardware-based security can offer performance and features that are difficult or impossible to achieve in pure software, hardware-only solutions are difficult to update, either to patch security flaws or introduce new features.\n Komodo illustrates an alternative approach to attested, on-demand, user-mode, concurrent isolated execution. We decouple the core hardware mechanisms such as memory encryption, address-space isolation and attestation from the management thereof, which Komodo delegates to a privileged software monitor that in turn implements enclaves. The monitor's correctness is ensured by a machine-checkable proof of both functional correctness and high-level security properties of enclave integrity and confidentiality. We show that the approach is practical and performant with a concrete implementation of a prototype in verified assembly code on ARM TrustZone. Our ultimate goal is to achieve security equivalent to or better than SGX while enabling deployment of new enclave features independently of CPU upgrades.\n The Komodo specification, prototype implementation, and proofs are available at https://github.com/Microsoft/Komodo.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132782", "https://www.sigops.org/sosp/sosp17/slides/komodo-sosp17-slides.pdf", "https://people.ece.cornell.edu/af433/pdf/ferraiuolo-sosp-17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03e89626cbb864fb1243b4ee8b4037020a9250eb", "sources": [ "DBLP" ], "title": "Komodo: Using verification to disentangle secure-enclave hardware from software", "venue": "SOSP", "year": 2017 }, "03ff19babea1e67d2aba21e996af3695bbc87d0e": { "authors": [ { "ids": [ "2874809" ], "name": "George M. Slota" }, { "ids": [ "1750699" ], "name": "Sivasankaran Rajamanickam" }, { "ids": [ "1782490" ], "name": "Karen D. Devine" }, { "ids": [ "2421239" ], "name": "Kamesh Madduri" } ], "doi": "10.1109/IPDPS.2017.95", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.95", "entities": [ "Computation", "Display resolution", "Distributed memory", "End-to-end principle", "Run time (program lifecycle phase)", "Scalability", "Sparse matrix", "Time complexity" ], "id": "03ff19babea1e67d2aba21e996af3695bbc87d0e", "inCitations": [ "13e388ab3495d313ae6838b26e8d34517a67e698", "771610413f3654b8e4f38aab4dd970a481c7196f" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "646-655", "journalVolume": "", "outCitations": [ "4dc578364f357b993b5554b9181c90c84aa6b4d1", "c0e5583357f80b884a033346aa6580c149378803", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "1a907f453ad42e68247c3dc3ea9f88e157fd0235", "190983c95e49a0fbde080d8a92bca0270e0eb968", "4714cd9a2c38a4590ca6802a076009a09e49f7e9", "4410f0c48f982f960a54500df7bd88e4cab88927", "0d06de003e8ca949b3b39f9a51750c050addb997", "65250c893b60e86360352d239842e6c37967b2fb", "16c83e37be3a423a8eefaa483e7a4cffe8cd3a70", "2512c72dcf83e13cc2c5543ff310dc75652f4bad", "0be9827857bfd79a00a9b1e64d59e8c34534362c", "9e6a3bc9a1bd5fe34a989c7d7d718db50970a31c", "58ba34f71bafffcd120112f97a55cebb656b8bab", "141e35263ab810983c90d47ad62eb4fab5e51717", "141004dee9e799b40bfaf50b4a72618613137250", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "052715e9292df2bb62e95616ac6486fba7cbf72f", "2d1d0ee6e21c288d96577b24656cd3398082f857", "1521d39088b203ddac981d10d214f463449ae95b", "08c64461c57f2bcc9cb63003d5acf613943fb705", "0371f9e3efbcd4829b5ffbff585155746ef05284", "70ef942ceda757cea71ee2c53f7deb4f09b0df7b", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "c3fbbd9c1fc5e53c6a9e3fe27e1bfce4755c8ef3", "cf507e064919656fbbf5174257de81a90e8bfcf5", "1f0612de1f191abadf250b78cd78f884203cca5e", "7f1d9bda4324068355e23f5b1413b2e068903407", "1ad8410d0ded269af4a0116d8b38842a7549f0ae", "308002cca6afdfd4f751a382357b027dd94d2de4", "64a513b60ad89c4eee81a186e53c8d5c8773acac" ], "paperAbstract": "We introduce XtraPuLP, a new distributed-memory graph partitioner designed to process trillion-edge graphs. XtraPuLP is based on the scalable label propagation community detection technique, which has been demonstrated as a viable means to produce high quality partitions with minimal computation time. On a collection of large sparse graphs, we show that XtraPuLP partitioning quality is comparable to state-of-the-art partitioning methods. We also demonstrate that XtraPuLP can produce partitions of real-world graphs with billion+ vertices in minutes. Further, we show that using XtraPuLP partitions for distributed-memory graph analytics leads to significant end-to-end execution time reduction.", "pdfUrls": [ "http://www.sandia.gov/~srajama/publications/PuLP-IPDPS17.pdf", "https://arxiv.org/pdf/1610.07220v1.pdf", "https://doi.org/10.1109/IPDPS.2017.95", "http://arxiv.org/abs/1610.07220" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/03ff19babea1e67d2aba21e996af3695bbc87d0e", "sources": [ "DBLP" ], "title": "Partitioning Trillion-Edge Graphs in Minutes", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "040e9e2ed720af8822080a6c3826172ad72e0c3e": { "authors": [ { "ids": [ "39131579" ], "name": "Bin Dong" }, { "ids": [ "1773743" ], "name": "Kesheng Wu" }, { "ids": [ "1749233" ], "name": "Surendra Byna" }, { "ids": [ "1685638" ], "name": "Jialin Liu" }, { "ids": [ "3083209" ], "name": "Weijie Zhao" }, { "ids": [ "2289824" ], "name": "Florin Rusu" } ], "doi": "10.1145/3078597.3078599", "doiUrl": "https://doi.org/10.1145/3078597.3078599", "entities": [ "Big data", "Computation", "Data access", "Database", "Locality of reference", "Programmer", "SciDB", "Time series", "Universal Disk Format", "User-defined function", "rasdaman" ], "id": "040e9e2ed720af8822080a6c3826172ad72e0c3e", "inCitations": [], "journalName": "", "journalPages": "53-64", "journalVolume": "", "outCitations": [ "62ea7fbdc3349f4fe8f12f098f1ce4a746faa5db", "d827cdb49d3abb23405ee03e070c5a42c07d28ea", "0b99088a6579e29776382978899748fee98ca589", "a2ff5117ccd1eb3e42c6a606b8cecb4358d3ec84", "24679ccb0586642553a21e9fcd8aa5a57f97cabe", "a296d522b6b02ee1829fd35e0f9dc7f13fb07ac4", "6955de9b4c6554a0d8a1ce8c7c06bfad7b4d1918", "1e7be30c6b2cd522083183913a0ca820a036342c", "3512434abce0c19fa3eff0126000279b3d1cb059", "06c28afd00805ff76f25e89ada90fe8ef66a3a45", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "80fe4d2f309f2d4d9026fe2fdd53ee74f4fd25cd", "058224ac7b9bc0a0b82e62257656c7a6df62219e", "267ed9139427b2d30b17cfc91880b04fb910983a", "d21d261eca404e60fb3edd2a489694ca4cbda537", "3c4776e5f96ebe8a6de1a855f523a28c687eb994", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "32455f131e1eb9fa88db2e991be69b59e73f525c", "153703ab30c7cb56a49718991f6bc450f0c2273f", "99b8440905bd77d3ea46b47479ae967e33eb05ee", "9d62a4187b126111aef25a10e6691df9ca66835f", "3851430bb53b09f78880bb3480f835c22ca81a94", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4ac29283ea51b3987caeaa165fc2e2366cf17738", "05d21a0984a92310131917ed22c255ff29a93b6c", "32c1ece816a8b5efa08e3ddd339345f88326be28", "9ee76efb171dbc1264ab4b22933e3deedfd7fde8", "7982bdb498c5efeafddd2ffaf9810a7f0712f162", "2e0e331f92982ee5693d315821655c3583983c6c", "11e512701c7a2a5cd48d5435e8d42f292161ceca", "9d67f52469f24f65f661e3842774fc4b7f5cb77b", "eea2ea1d4a7ae945046986f5674f437e0187e184", "6474100a17b82d028e7131e8e0769cbc4e110914", "b1ff5308c6c6da7317f8d7649f884be701a72a0b" ], "paperAbstract": "User-Defined Functions (UDF) allow application programmers to specify analysis operations on data, while leaving the data management tasks to the system. This general approach enables numerous custom analysis functions and is at the heart of the modern Big Data systems. Even though the UDF mechanism can theoretically support arbitrary operations, a wide variety of common operations -- such as computing the moving average of a time series, the vorticity of a fluid flow, etc., -- are hard to express and slow to execute. Since these operations are traditionally performed on multi-dimensional arrays, we propose to extend the expressiveness of structural locality for supporting UDF operations on arrays. We further propose an in situ UDF mechanism, called ArrayUDF, to implement the structural locality. ArrayUDF allows users to define computations on adjacent array cells without the use of join operations and executes the UDF directly on arrays stored in data files without requiring to load their content into a data management system. Additionally, we present a thorough theoretical analysis of the data access cost to exploit the structural locality, which enables ArrayUDF to automatically select the best array partitioning strategy for a given UDF operation. In a series of performance evaluations on large scientific datasets, we have observed that -- using the generic UDF interface -- ArrayUDF consistently outperforms Spark, SciDB, and RasDaMan.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078599", "http://crd.lbl.gov/assets/Uploads/hpdc02.pdf", "http://faculty.ucmerced.edu/frusu/Papers/Conference/2017-hpdc-array-udf.pdf", "http://faculty.ucmerced.edu/frusu/Talks/2017-06-hpdc-array-udf.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/040e9e2ed720af8822080a6c3826172ad72e0c3e", "sources": [ "DBLP" ], "title": "ArrayUDF: User-Defined Scientific Data Analysis on Arrays", "venue": "HPDC", "year": 2017 }, "0418905a962864523b9d0283e5b1dfa940038cfe": { "authors": [ { "ids": [ "26388047" ], "name": "Willian Barreiros" }, { "ids": [ "2711977" ], "name": "George Teodoro" }, { "ids": [ "1753288" ], "name": "Tahsin M. Kur\u00e7" }, { "ids": [ "1711386" ], "name": "Jun Kong" }, { "ids": [ "1771683" ], "name": "Alba Cristina Magalhaes Alves de Melo" }, { "ids": [ "1735710" ], "name": "Joel H. Saltz" } ], "doi": "10.1109/CLUSTER.2017.28", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.28", "entities": [ "Algorithm", "Anatomic Node", "Central processing unit", "Computation", "Hybrid system", "Image analysis", "Image resolution", "Image segmentation", "Quantitation", "Reuse (action)", "Socket Device Component", "Speedup", "Value (ethics)", "algorithm", "cellular targeting" ], "id": "0418905a962864523b9d0283e5b1dfa940038cfe", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "25-35", "journalVolume": "", "outCitations": [ "8630c01e55b4025ad30f857c0218f392facb8f21", "791c8b4e7538cfd542e20dd1a27d0a78b33bed6f", "b9d5572f70e5b3c4287b17ba23c223e9515d3714", "6b9f7f1e8a602ff83126d087c5a08aa9c8c12f16", "3fddfe82fbd1866cccd9eb6f3577533521bfe0b0", "b10145f7fc3d07e43607abc2a148e58d24ced543", "b6fdeb0c962a7e0b24480b20a044cb925f15f077", "0f5de500a6bfdd7c0c7ff40a9717af3a56fdefc2", "09045f55ef18cfb6bb97934857bc5906f0f14c70", "8fc52ce413863e5b9d78f884912858cd8a1f4ad9", "2566acc500a8f013610d306bea7a8f548930dfed", "03daf2d17337f000538d9d4727fa49d52bdb922c", "7b0569980ca59e6b7d5c1f9dea97464640149b84", "061e80ca3bc302b1f5031d0065e563423dafb12e", "bf9cdf51852562e5f09a3ddbd6c93b12abbc152a", "0a756312d6a6dfcf0a9e27f91affce6412833a9f", "d119a886aa6a2062038567b6f840f843930e1f1f" ], "paperAbstract": "We investigate efficient sensitivity analysis (SA) of algorithms that segment and classify image features in a large dataset of high-resolution images. Algorithm SA is the process of evaluating variations of methods and parameter values to quantify differences in the output. A SA can be very compute demanding because it requires re-processing the input dataset several times with different parameters to assess variations in output. In this work, we introduce strategies to efficiently speed up SA via runtime optimizations targeting distributed hybrid systems and reuse of computations from runs with different parameters. We evaluate our approach using a cancer image analysis workflow on a hybrid cluster with 256 nodes, each with an Intel Phi and a dual socket CPU. The SA attained a parallel efficiency of over 90% on 256 nodes. The cooperative execution using the CPUs and the Phi available in each node with smart task assignment strategies resulted in an additional speedup of about 2×. Finally, multi-level computation reuse lead to an additional speedup of up to 2.46× on the parallel version. The level of performance attained with the proposed optimizations will allow the use of SA in large-scale studies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.28" ], "pmid": "29081725v1", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0418905a962864523b9d0283e5b1dfa940038cfe", "sources": [ "Medline", "DBLP" ], "title": "Parallel and Efficient Sensitivity Analysis of Microscopy Image Segmentation Workflows in Hybrid Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "041b0d961c61265ba5529787d6c53ac2e9ec4b89": { "authors": [ { "ids": [ "1828907" ], "name": "He Sun" }, { "ids": [ "2838505" ], "name": "Luca Zanetti" } ], "doi": "10.1145/3087556.3087569", "doiUrl": "https://doi.org/10.1145/3087556.3087569", "entities": [ "Algorithm", "Algorithm design", "Big data", "Cluster analysis", "Computational problem", "Convex function", "Convex optimization", "Data mining", "Distributed algorithm", "Distributed computing", "Load balancing (computing)", "Machine learning", "Mathematical optimization", "P (complexity)", "Social network", "Stochastic process" ], "id": "041b0d961c61265ba5529787d6c53ac2e9ec4b89", "inCitations": [ "96b584f90af556c51440d7d27dbd078c8984bc8d" ], "journalName": "", "journalPages": "163-171", "journalVolume": "", "outCitations": [ "cc9a34806690d2278c06c2242ed761f0e9592dd2", "aedc1fbd8902e6a6a7abb09b6d53c36ff8f47497", "2d115ae720872e61bdb55dd18343b83f24c415a6", "3b98975b82441a4ed2d2a1404e2767afff38502f", "dd174e56a7fb404369cbc8bccfa0de6328749297", "24259d92169e287c55abd3bd6cc5b2da50a88c4b", "bbc0895ebeb90fa3f48a2a26a71ad15fd44caf8e", "d1d462ae220a79480347070a3dbab8863bfc58be", "13dff6a28d24e4fe443161fcb7d96b68a085a3d4", "173c7a0ed002d1d694380469e251515a2b516888", "13f008360c48e279afbaa9335155a4ea54b9da31", "45e8546d3272ee05329ec8c9eedeb8952a8e879b", "4c256085f2b76aa5554e8fc47fa8c42cf076a428", "0f8dd25d69691636b6d3206572271d292e9a37e6", "37030e618f7caa7a8c3fec3454fb0d43915002a4", "6f8c546b574ff16a800d202d51900cc1e56e4e94", "097b0fc37ebe626414835e990ed0c7af9b31796c", "66549f785d13a44171fcc21899802325e7d923cd", "08e666907929fd29dae3fd52e66143b0a9e45ce1", "49f2214fa494c034106e050ab6d140cc6d215c15", "1102b9cd97b8b368e47c44d687f1000997757d36", "64ad3e977678d7034a66fe89df5ce60ab764cf99", "0aaaab026edf97470d3401d4e2390c508f828209", "24d484a36928362ce1c61331870ca0bef6f96480", "1c7e0a76c8a5d2b14fa73d639af6ad79da6cf059", "7423137dd23b0044698fe9f3554fea8a6beb776a", "0197a437cebc4a759fc3b9d577622f2475f548af", "34f084c531faf6131f0540a41cfd81ae7e7830aa", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "042ee2af87fabc6ce71035d887b8e866815c10bd", "5928dd51e1d7d940d528ffc0455cab8248c551bc", "18f3f55f67d6b487e369be8ae1362cc621b9fe6c" ], "paperAbstract": "Graph clustering is a fundamental computational problem with a number of applications in algorithm design, machine learning, data mining, and analysis of social networks. Over the past decades, researchers have proposed a number of algorithmic design methods for graph clustering. However, most of these methods are based on complicated spectral techniques or convex optimisation, and cannot be applied directly for clustering many networks that occur in practice, whose information is often collected on different sites. Designing a simple and distributed clustering algorithm is of great interest, and has wide applications for processing big datasets. In this paper we present a simple and distributed algorithm for graph clustering: for a wide class of graphs that are characterised by a strong cluster-structure, our algorithm finishes in a poly-logarithmic number of rounds, and recovers a partition of the graph close to an optimal partition. The main component of our algorithm is an application of the random matching model of load balancing, which is a fundamental protocol in distributed computing and has been extensively studied in the past 20 years. Hence, our result highlights an intrinsic and interesting connection between graph clustering and load balancing.\n At a technical level, we present a purely algebraic result characterising the early behaviours of load balancing processes for graphs exhibiting a cluster-structure. We believe that this result can be further applied to analyse other gossip processes, such as rumour spreading and averaging processes.", "pdfUrls": [ "http://seis.bris.ac.uk/~hs15417/SPAA17.pdf", "https://arxiv.org/pdf/1607.04984v1.pdf", "http://arxiv.org/pdf/1607.04984v1.pdf", "http://arxiv.org/abs/1607.04984", "https://arxiv.org/pdf/1607.04984v2.pdf", "http://doi.acm.org/10.1145/3087556.3087569" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/041b0d961c61265ba5529787d6c53ac2e9ec4b89", "sources": [ "DBLP" ], "title": "Distributed Graph Clustering by Load Balancing", "venue": "SPAA", "year": 2017 }, "04376a241d021461eb55b6a8a1391679a73cfa6e": { "authors": [ { "ids": [ "35067898" ], "name": "Sepehr Assadi" } ], "doi": "10.1145/3034786.3056116", "doiUrl": "https://doi.org/10.1145/3034786.3056116", "entities": [ "Algorithm", "Approximation", "Approximation algorithm", "Computation", "Computational complexity theory", "DSPACE", "Independent set (graph theory)", "Maximum coverage problem", "Set cover problem", "Time complexity" ], "id": "04376a241d021461eb55b6a8a1391679a73cfa6e", "inCitations": [ "265422784efe15311b28116c16c82a4d27dc0d79", "3ab135880738545f69e03028746fd7715ce8a036", "2b248b8eff815a1061459bcd34e50c105642ee64", "7e0695d65ad3aedaa30bb7aaf28edc432ac711e7", "59975447afa0b00f7336139ae300964715cf92ca" ], "journalName": "", "journalPages": "321-335", "journalVolume": "", "outCitations": [ "2c74cac7f8171f9e6aec986c12b38025359c105a", "1e0c2bbba98c3a6970eb88f3250a328e6893be66", "379ef18377d803d87859314c0e110cdf64f2ea73", "02f7b61d3d557da6de3c26178530179492e8f574", "88e222dc82e4323b4515908e72b2bcbd678e51f8", "d921036a6cb7e340b019afa557a19bc65586a1ad", "5811dbdd522970f2393d54d3fcea9e4b856f8fbc", "033df0fa41a945d135406e911eeb97d34c325e9d", "39d1814c2820ec53c24362e34ff1e000c6982822", "0df3980457291b7425e37eb686a6fd7b3eb94abe", "30a0bd3e7446fe2f42d3c8f46fc6c49b7637135e", "202a3630d9bea2a61a7b026bf395993c0f637caa", "ab2c3eacd12d9d37b2cc9d05c8580e7b3b78d63c", "7e4cb3ca74b9e0d83cb53340d4ead2331cc8328c", "b50e429252a5c3135977000c67f977ba222a8c59", "69b6a42ad7068962363687c038c6ae2e0760867a", "06e7ec1b1a018225fb632c1b7d029b74151b4730", "072baa4481eb75e73187b40b7c87a93db2c76659", "4d080825ac4bfcca1804abc5fcb6404dd1a2ae94", "28dd94325f1fe6a8ea5787abe3bbdf7dfea71259", "07917775e85ed02803f405b2fbf9d57a240e156e", "bf2e2f7fe2c4cf965127229574d20681ebcc3d0e", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "16b816afe64f4525c0ba4a7a803655d50c05f706", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "1ddc56eca40a5730cedabb26815053c1ddab0832", "2ca183874427200496f8e642b96f6e90c8321a4f", "59975447afa0b00f7336139ae300964715cf92ca", "0371825bda41874cfa458055c93c132f0da3e04e", "c82276f525ab37cda42025714134b496cdad9988", "6094392d07d36c086a988493686b73ebca39169b", "67762ce2c7a85dbf55cdf86fedee2610229d91eb", "0b2ab31ecb350ad53db236622c9e8d5ff180be37", "ec411455675508e1751d45aa9e20dd72e3d61faa", "46b51960a073a759e1d55b41c75b6bb3e5273be8", "bed65c0c0a54d28508efb64ae79fb2537bb9f6f8", "1de6abd76b02bdc6e42a0a2d24fffcd967318e54", "1719b6402f6ad8014959018832f564ee3835efe8", "5e493ab3e938d34aeca99d463463d58a863ee97e", "054c5374c74d33dc3a65c3140315eafeb3d62604", "b7772296ffeef6897b7be4e36af296d920a7acf4", "9bd101ad8faba5ff0ff1f625be773ce0acb697fc", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "bde7cc85837836fab6c1f946a6e77189ac9d9eed", "843f186c7f38f5d434f46ca55b6dea7d5f14deef", "9202a76c3b53d385f1b715d3a75e18c053232c32" ], "paperAbstract": "We study the classic set cover problem in the streaming model: the sets that comprise the instance are revealed one by one in a stream and the goal is to solve the problem by making one or few passes over the stream while maintaining a sublinear space o(mn) in the input size; here m denotes the number of the sets and n is the universe size. Notice that in this model, we are mainly concerned with the space requirement of the algorithms and hence do not restrict their computation time.\n Our main result is a resolution of the space-approximation tradeoff for the streaming set cover problem: we show that any α-approximation algorithm for the set cover problem requires Ω(mn1) space, even if it is allowed polylog(n) passes over the stream, and even if the sets are arriving in a random order in the stream. This space-approximation tradeoff matches the best known bounds achieved by the recent algorithm of Har-Peled et.al. (PODS 2016) that requires only O(α) passes over the stream in an adversarial order, hence settling the space complexity of approximating the set cover problem in data streams in a quite robust manner. Additionally, our approach yields tight lower bounds for the space complexity of (1- ε)-approximating the streaming maximum coverage problem studied in several recent works.", "pdfUrls": [ "https://arxiv.org/pdf/1703.01847v1.pdf", "http://www.seas.upenn.edu/~sassadi/stuff/papers/multipass-streaming-setcover.pdf", "http://www.seas.upenn.edu/~sassadi/stuff/presentations/scmp-pods.pdf", "https://arxiv.org/pdf/1703.01847.pdf", "http://doi.acm.org/10.1145/3034786.3056116", "http://arxiv.org/abs/1703.01847" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04376a241d021461eb55b6a8a1391679a73cfa6e", "sources": [ "DBLP" ], "title": "Tight Space-Approximation Tradeoff for the Multi-Pass Streaming Set Cover Problem", "venue": "PODS", "year": 2017 }, "04379477b31622586b3a632a5ac528c664f88d7a": { "authors": [ { "ids": [ "1708510" ], "name": "Juan G\u00f3mez-Luna" }, { "ids": [ "3077499" ], "name": "Izzat El Hajj" }, { "ids": [ "1709430" ], "name": "Li-Wen Chang" }, { "ids": [ "21242411" ], "name": "Victor Garcia-Flores" }, { "ids": [ "2192182" ], "name": "Simon Garcia De Gonzalo" }, { "ids": [ "2550847" ], "name": "Thomas B. Jablin" }, { "ids": [ "24636606" ], "name": "Antonio J. Pe\u00f1a" }, { "ids": [ "1723122" ], "name": "Wen-mei W. Hwu" } ], "doi": "10.1109/ISPASS.2017.7975269", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975269", "entities": [ "Benchmark (computing)", "C++ AMP", "CUDA", "Central processing unit", "Embodied energy", "Graphics processing unit", "Heterogeneous computing", "Memory coherence", "OpenCL API", "Programmer", "Programming language", "Task allocation and partitioning of social insects" ], "id": "04379477b31622586b3a632a5ac528c664f88d7a", "inCitations": [ "ce992c5be70243c83a5faaeea3f314ebd36302a9", "d55df4e557a56ea969a99ce9f1b3164bd21c0b1d", "d9fe07044fc80f6b84301d3d6fc088a3c6730242", "36ad8fb17b210f4a82ede242469d32ca07b44c7d", "46122831f2f1aea6b5f45025b8791ca29c239679", "bac4a0e99c98fac6fa231e9ed21e8f643674200a", "95288f5fff01bcd3fe03a090a65cadef2e87b06d" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "43-54", "journalVolume": "", "outCitations": [ "5e5da2a57395b0ca6888f1bbd7de5d27e33b5a81", "aa6e7660056a641440bdda478e38aace44bd29d1", "08104146873817cc35cbd96d7ca3e5169cb72296", "6ac0c44e4e56583914de316346977c8461716141", "00156e79606084497789662dfaf59c3b54a10722", "347a08cd9ada1cee83713d24ec84ed49ab121987", "54acbdadf00cc793eb3e0a2962746ae5a849c4ee", "b62430598576f433ed7b4c5c3d44000c236feab0", "ae5bd68d29d0b9b8a2a85cede82bd9ab8229e73d", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "9defbe70576ac91f13a9bd02e93cb86539f0bbd5", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "ab8405e56fc4dc9b3f7842e4fa0909fc61283e82", "26362bc38c0c3ffe27e3424ea82704678fe09eb0", "8314e43f15c26a5824333687e672a71afb415940", "681c5784c26ef6e5779ab4259eec6c351ea36631", "0ce8199bb276f6c540cfbfc3248bc2f7a1469819", "5db195c9157a8178c89e69d413d08c1725a11267", "5236710748ceb864a91e9fb4efac905114d8f1ef", "31864e13a9b3473ebb07b4f991f0ae3363517244", "f632d67c13a113fd468d910078b4be180f92127f", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "8a313a96288fa94b67b8a7aa690e2daecefabfe9", "4ad495b07abc0d7080c020dd563d9406e1753d65", "63af4355721f417bc405886f383af096fbfe51b2", "10a0ab781e94a75fdcbde819f3f4cddcab768bbd" ], "paperAbstract": "Heterogeneous system architectures are evolving towards tighter integration among devices, with emerging features such as shared virtual memory, memory coherence, and systemwide atomics. Languages, device architectures, system specifications, and applications are rapidly adapting to the challenges and opportunities of tightly integrated heterogeneous platforms. Programming languages such as OpenCL 2.0, CUDA 8.0, and C++ AMP allow programmers to exploit these architectures for productive collaboration between CPU and GPU threads. To evaluate these new architectures and programming languages, and to empower researchers to experiment with new ideas, a suite of benchmarks targeting these architectures with close CPU-GPU collaboration is needed. In this paper, we classify applications that target heterogeneous architectures into generic collaboration patterns including data partitioning, fine-grain task partitioning, and coarse-grain task partitioning. We present Chai, a new suite of 14 benchmarks that cover these patterns and exercise different features of heterogeneous architectures with varying intensity. Each benchmark in Chai has seven different implementations in different programming models such as OpenCL, C++ AMP, and CUDA, and with and without the use of the latest heterogeneous architecture features. We characterize the behavior of each benchmark with respect to varying input sizes and collaboration combinations, and evaluate the impact of using the emerging features of heterogeneous architectures on application performance.", "pdfUrls": [ "http://impact.crhc.illinois.edu/shared/Papers/chai-ispass17.pdf", "http://elhajj2.web.engr.illinois.edu/docs/paper-chai-ispass17.pdf", "https://doi.org/10.1109/ISPASS.2017.7975269" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04379477b31622586b3a632a5ac528c664f88d7a", "sources": [ "DBLP" ], "title": "Chai: Collaborative heterogeneous applications for integrated-architectures", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "0443f873f3b3a12bdb7819d16e45ae99cd616e86": { "authors": [ { "ids": [ "3112463" ], "name": "Yang You" }, { "ids": [ "2238795" ], "name": "Aydin Bulu\u00e7" }, { "ids": [ "1700326" ], "name": "James Demmel" } ], "doi": "10.1145/3126908.3126912", "doiUrl": "https://doi.org/10.1145/3126908.3126912", "entities": [ "Algorithm", "Artificial neural network", "Avid Elastic Reality", "Central processing unit", "Deep learning", "Graphics processing unit", "Hardware acceleration", "ImageNet", "Knights", "Scalability", "Speedup", "System on a chip", "Xeon Phi" ], "id": "0443f873f3b3a12bdb7819d16e45ae99cd616e86", "inCitations": [ "a7e9f6c55c1118c9947c6ef63bddd11764b85d33", "d0556be65e8564ab8bb3e26b6a0146a62027bc40" ], "journalName": "", "journalPages": "9:1-9:12", "journalVolume": "", "outCitations": [ "838c9137e6fd807c871c80976b4f75c8c8bfcffc", "a058935fd019c2367fd32c16cd1ce6983a29aafb", "6435805ebe3abd7c02fae390edad37c1a5c7c5a6", "235fa2b1983eff9f13b27c620cda389359126bf4", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "193fa681987603bd5c672ff7344966625fcaf54a", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "5d90f06bb70a0a3dced62413346235c02b1aa086", "b7cf49e30355633af2db19f35189410c8515e91f", "8ecc044d920df247fbd455b752fd7cc0f7363ad7", "160e1a787a3364a10ea89a9a8c04238cd468d1a4", "3f1c1427b175140e7f725a155096a4e73c1b8509", "01fcae344d2edb715bcc63a40b6052c0331741bd", "38211dc39e41273c0007889202c69f841e02248a", "0760550d3830230a05191766c635cec80a676b7e", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "3439a127e45fb763881f03ef3ec735a1db0e0ccc", "0af203b0112a8564c730a596fe5cf35556537e2e", "0b99d677883883584d9a328f6f2d54738363997a", "7a4092f170a3ed058a64f3156248d9c4e32c4d48", "09b8120cbc52e7df46122e8e608146289fddbdfa", "061356704ec86334dbbc073985375fe13cd39088" ], "paperAbstract": "Training neural networks has become a big bottleneck. For example, training ImageNet dataset on one Nvidia K20 GPU needs 21 days. To speed up the training process, the current deep learning systems heavily rely on the hardware accelerators. However, these accelerators have limited on-chip memory compared with CPUs.\n We use both self-host Intel Knights Landing (KNL) clusters and multi-GPU clusters as our target platforms. From the algorithm aspect, we focus on Elastic Averaging SGD (EASGD) to design algorithms for HPC clusters.\n We redesign four efficient algorithms for HPC systems to improve EASGD's poor scaling on clusters. Async EASGD, Async MEASGD, and Hogwild EASGD are faster than existing counter-part methods (Async SGD, Async MSGD, and Hogwild SGD) in all comparisons. Sync EASGD achieves 5.3X speedup over original EASGD on the same platform. We achieve 91.5% weak scaling efficiency on 4253 KNL cores, which is higher than the state-of-the-art implementation.", "pdfUrls": [ "https://people.eecs.berkeley.edu/~youyang/publications/sc2017.pdf", "http://doi.acm.org/10.1145/3126908.3126912", "https://people.eecs.berkeley.edu/~youyang/publications/imagenet_minutes.pdf", "https://arxiv.org/pdf/1708.02983v1.pdf", "http://gauss.cs.ucsb.edu/~aydin/sc2017_deep_learning.pdf", "http://arxiv.org/abs/1708.02983" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0443f873f3b3a12bdb7819d16e45ae99cd616e86", "sources": [ "DBLP" ], "title": "Scaling deep learning on GPU and knights landing clusters", "venue": "SC", "year": 2017 }, "044d4f949759b602332d7d2408fb003108422e21": { "authors": [ { "ids": [ "39531773" ], "name": "Yi Cao" }, { "ids": [ "3371087" ], "name": "Javad Nejati" }, { "ids": [ "9223428" ], "name": "Muhammad Wajahat" }, { "ids": [ "2187214" ], "name": "Aruna Balasubramanian" }, { "ids": [ "2044504" ], "name": "Anshul Gandhi" } ], "doi": "10.1145/3084443", "doiUrl": "https://doi.org/10.1145/3084443", "entities": [ "Experiment", "High- and low-level", "Mobile device", "Program optimization", "Web page" ], "id": "044d4f949759b602332d7d2408fb003108422e21", "inCitations": [ "0457c76af0aa3d0586e3fdd6ece7ea6fda65b7da" ], "journalName": "", "journalPages": "68", "journalVolume": "", "outCitations": [ "cf64cdc889a4edaf641a307aa2b11d89d4d10a09", "1733b454eb643f4c534e81f6089a85e63cfd2629", "143481d55d9f9d25e53f06a6afaf15feb7430c62", "0495641c590874be9e09c3743d0d15c536cd3f4e", "086699da0528ed47463cea3108851bd3dc5ba715", "5430dfef92fb67ec887e365208477226f0cddb10", "0b6ea07d2d7ea0f95969f9e223d362c2e6aa79b4", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "1cadb267720b8723fa417840003ac51ec56d7aa5", "8d78b035469b2c0c8238c2b4c85460b04aa6d4ef", "06d4deb6e116578eb3ce6c2c228ee99cad3718da", "1aaea3bf77dfa69605cf7d243fc6a8255d11aae9", "111cc5261a6034612ca543bc3c15b9bf25cb2ec3", "45f43abc49a8a60e6b43ddbda5af9fc6c88d663d", "84fdccb41f31247dfb86aadba6f2b4d75538767f", "0b369ac8bd9e0c618e4ea3568ebaa944f460c454", "16d0a8ee484f4a34e1cdcda8a0c2453e2e962ada", "0507b04c131f2244524fda97cd1707af5760216e", "2cac6e84d3d7fed13ec9a5d39fd2bd6e75423578", "20d0b7473429464fc2f9bfd59d513d63c844551c", "17fd49a20b1fd914ec6dde6c835edc852826dede", "430cd2b1c08aa86bb4aef152ee2ca764c5342c3e", "b4074a1b276afb37d103b934773071ff176a1b9d", "24dcf23f4aeb146b1323b8e9f559f17f6282fdd7" ], "paperAbstract": "Modeling the energy consumption of applications on mobile devices is an important topic that has received much attention in recent years. However, there has been very little research on modeling the energy consumption of the mobile Web. This is primarily due to the short-lived yet complex page load process that makes it infeasible to rely on coarse-grained resource monitoring for accurate power estimation.\n We present RECON, a modeling approach that accurately estimates the energy consumption of any Web page load and deconstructs it into the energy contributions of individual page load activities. Our key intuition is to leverage low-level application semantics in addition to coarse-grained resource utilizations for modeling the page load energy consumption. By exploiting fine-grained information about the individual activities that make up the page load, RECON enables fast and accurate energy estimations without requiring complex models. Experiments across 80 Web pages and under four different optimizations show that RECON can estimate the energy consumption for a Web page load with an average error of less than 7%. Importantly, RECON helps to analyze and explain the energy effects of an optimization on the individual components of Web page loads.", "pdfUrls": [ "https://davycao.github.io/sigm17_paper16.pdf", "http://www3.cs.stonybrook.edu/~anshul/sigm17_recon.pdf", "http://doi.acm.org/10.1145/3084443", "http://doi.acm.org/10.1145/3078505.3078587", "http://netsys.cs.stonybrook.edu/sites/netsys.cs.stonybrook.edu/files/sigm17_paper16_0.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/044d4f949759b602332d7d2408fb003108422e21", "sources": [ "DBLP" ], "title": "Deconstructing the Energy Consumption of the Mobile Page Load", "venue": "SIGMETRICS", "year": 2017 }, "044ef9a2b3f12a36cf4c01ff45b57fe6b414f2d9": { "authors": [ { "ids": [ "24553949" ], "name": "Dongyu Meng" }, { "ids": [ "1721849" ], "name": "Hao Chen" } ], "doi": "10.1145/3133956.3134057", "doiUrl": "https://doi.org/10.1145/3133956.3134057", "entities": [ "Approximation algorithm", "Artificial neural network", "Cryptography", "Deep learning", "Randomness" ], "id": "044ef9a2b3f12a36cf4c01ff45b57fe6b414f2d9", "inCitations": [ "69092affc3461a38eb05cf7982f104eb30b0492c", "15285d8ae6d2fef3dfecaeacbf5a246bfc7b3137", "5a5e6395ea614e392089516b9e68caa8fabab4e2", "a91fd02ed2231ead51078e3e1f055d8be7828d02", "a0c90e89d81469d5ab9ed93af5a020a94fa05188", "7b5e12f7784f8d5cecd3f2bd73c35860de2b21f8", "f16d1c0dbff12aa9c05feae542cca7878e625b51", "dd215b777c1c251b61ebee99592250f44073d4c0", "6b327af674145a34597986ec60f2a49cff7ed155", "ec2df1a2b46279bfd658746c9ab0dcdcbca3177c", "5ce1cdd95b3977e66a5c22fb6cab577a8a65597d", "d9716a34853188061ee5365d84677bfae635229d", "1e77822d88d1064317d0e5d229b536820cc8df81", "66d5ec7a71a8b92d0c9563edda94ca62d39f96be", "57126589b3fe62c35a36a2646dac3045d095ecf5", "4a6025ac9fa969846ab0ee32a6d8792734383105", "0939b060ba4832420a7be317806768fc40f13cc3", "956272153ce970d99d182d99919c7c471cf48166", "8e4808e71c9b9f852dc9558d7ef41566639137f3", "ea0eaaece0f4e0c3760d87850f65fb42df980c3c", "a2d19828c435a48aaa0b9c2a08112f6a023b2df9", "1f70bbe8099daea2adccf4e9120e453fa935eefd", "21dc8ebc3b8373c233e66031dead3ed5a0024a5f", "70f646b21115a896300d2ae5a1decf0cce5cdb82", "04d2fe52b97ad769974650b76e47fb50842fed8f", "9bec5e3292a6ca7cea5fb37a7f6719b1149b2bb0", "23f97a0a6ce0c54b024213e200315be1ba391932", "1c71e653f86b06eb7d5b1d92694f34e6f57173de", "533892babde5b8390b3a02336c9a6a293378eb1d", "3d0a8a8e01625fd2c668364c1ee31f3dc9098f39", "29176632807b17bf3da444713763b4b2b568306c", "29f712156b5c216fe00c2ec8fa115bdfcce6bbf3", "7e17e21c3e48e5432c38d6a9f635f9708357d273", "83a8235a231540e07743d67c9f127c2bee4389ae", "f820c05024b75959199dcfba59ec6cfc7f162994", "039df729edbc7c20085fda50599241ea626d20f0", "19c53c3ddf90c6ede05a6ac670083e238ba4589f", "48185257697b84c4ebfb137b44cf2e2bce182174" ], "journalName": "", "journalPages": "135-147", "journalVolume": "", "outCitations": [ "83bfdd6a2b28106b9fb66e52832c45f08b828541", "24529bffa95f07c01ccf6f02eb4dc9d859430159", "8f92b4ea04758df2acfb49bd46a4cde923c3ddcb", "0e03189871cd303b3438743f90232514dfa7885e", "5d90f06bb70a0a3dced62413346235c02b1aa086", "4115569538e2d71aa96389b01aa5ca1b8b30f8dd", "74fc396d0b8ec548d600395182f12c9b06cc84e9", "9b618fa0cd834f7c4122c8e53539085e06922f8c", "0f84a81f431b18a78bd97f59ed4b9d8eda390970", "0e34fa5ec5476ea801021fb082fd3089a62f0aff", "55b3410a9025f7547d68d59ea899ff391d555953", "f2c20cb6ebd2ad704c5bcae4eb8b942d3c62f8e0", "1beea702eecb426474794c43faf1364463ab0ec0", "20f28af7a5f14c994b5c62315f215d95939de18a", "7ab0f0da686cd4094fd96f5a30e0b6072525fd09", "46f74231b9afeb0c290d6d550043c55045284e5f", "16aa01ca0834a924c25faad5d8bfef3fd1acfcfe", "04ee77ef1143af8b19f71c63b8c5b077c5387855", "15b4017d6f295accd02adc04494e854c9cf4434d", "046a1302079f56b94c81457bf7fd21c3417a9f72", "49e77b981a0813460e2da2760ff72c522ae49871", "010719cd94f8fea13b78f998d220499e6174e9c7", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "7cdf6882ac4562b680cbf679dea5d60e110ce771", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "31868290adf1c000c611dfc966b514d5a34e8d23", "1439e05971a053c2368e6dee6d484b43c833d43c", "0e3cc46583217ec81e87045a4f9ae3478a008227", "169e0f340ed880b0c2d288bc8f3c8753fe7b0cfb", "01fcae344d2edb715bcc63a40b6052c0331741bd", "3b2bf65ebee91249d1045709200a51d157b0176e" ], "paperAbstract": "Deep learning has shown impressive performance on hard perceptual problems. However, researchers found deep learning systems to be vulnerable to small, specially crafted perturbations that are imperceptible to humans. Such perturbations cause deep learning systems to mis-classify adversarial examples, with potentially disastrous consequences where safety or security is crucial. Prior defenses against adversarial examples either targeted specific attacks or were shown to be ineffective.\n We propose MagNet, a framework for defending neural network classifiers against adversarial examples. MagNet neither modifies the protected classifier nor requires knowledge of the process for generating adversarial examples. MagNet includes one or more separate detector networks and a reformer network. The detector networks learn to differentiate between normal and adversarial examples by approximating the manifold of normal examples. Since they assume no specific process for generating adversarial examples, they generalize well. The reformer network moves adversarial examples towards the manifold of normal examples, which is effective for correctly classifying adversarial examples with small perturbation. We discuss the intrinsic difficulties in defending against whitebox attack and propose a mechanism to defend against graybox attack. Inspired by the use of randomness in cryptography, we use diversity to strengthen MagNet. We show empirically that MagNet is effective against the most advanced state-of-the-art attacks in blackbox and graybox scenarios without sacrificing false positive rate on normal examples.", "pdfUrls": [ "https://arxiv.org/pdf/1705.09064v1.pdf", "http://arxiv.org/abs/1705.09064", "http://doi.acm.org/10.1145/3133956.3134057", "http://web.cs.ucdavis.edu/~hchen/paper/meng2017.pdf", "https://arxiv.org/pdf/1705.09064v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/044ef9a2b3f12a36cf4c01ff45b57fe6b414f2d9", "sources": [ "DBLP" ], "title": "MagNet: A Two-Pronged Defense against Adversarial Examples", "venue": "CCS", "year": 2017 }, "0450987faf2baf11df986a6bf6d477c6ce4e9d93": { "authors": [ { "ids": [ "11019105" ], "name": "Katherine Q. Ye" }, { "ids": [ "38436509" ], "name": "Matthew Green" }, { "ids": [ "24010074" ], "name": "Naphat Sanguansin" }, { "ids": [ "2722085" ], "name": "Lennart Beringer" }, { "ids": [ "2059700" ], "name": "Adam Petcher" }, { "ids": [ "1804502" ], "name": "Andrew W. Appel" } ], "doi": "10.1145/3133956.3133974", "doiUrl": "https://doi.org/10.1145/3133956.3133974", "entities": [ "Compiler", "Compiler correctness", "Coq (software)", "Correctness (computer science)", "Cryptography", "Dual EC DRBG", "End-to-end principle", "Formal verification", "Functional specification", "Hash-based message authentication code", "Machine code", "Proof assistant", "Pseudorandom number generator", "Pseudorandomness" ], "id": "0450987faf2baf11df986a6bf6d477c6ce4e9d93", "inCitations": [ "18307d7fea0fed1067a5704f9aa13c93541e0142" ], "journalName": "", "journalPages": "2007-2020", "journalVolume": "", "outCitations": [ "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "2638a939cd8f4bbdd927dbe8a277569c0d202e93", "49565dd40c89680fdf9d6958f721eabcdfb89c22", "5513593daa8b8a52c5808590f0975e4c80c5c71a", "043a2dc8bddc2af1b03b320c1b9aef1f7ca01568", "2977e30243c4a93462cdb466d97abff4bcd638d2", "3c338bb3dcc10b7c840b4dbf3ad32e8256313ee3", "280250adda984a6464eddf98beac56f8e302fe07", "92ed7e6a20e5c91191f424b9ac9e129b621612d3", "0e8816a796a977f7729099bc21c5473c7c582ff3", "3dbde4c3ebeb5c52aeee28b98e80e405b2a5ebb0", "04402122e2fb065ed1280000981f7626496f0afb", "57adf20f0fa575a43609937c8f1a695a444a0ae0", "615168555150d80752a1c195229642acbe6fb3d9", "57f0a7ef8a11f191ff84e825f9153c254a29b427", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "1038b389bb18f87faed387364a696b01f60d5e7e", "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "afb91cb334aa5892e1ae567e016ba9de63738575", "11bce56b9d86954f627209bd2ca3786f66a35fee", "e9022113d566e75fa2c1f86f5e72d33361d45bf9", "136e9214b3637a84b9accb32f7b6176f047c403a" ], "paperAbstract": "We have formalized the functional specification of HMAC-DRBG (NIST 800-90A), and we have proved its cryptographic security-that its output is pseudorandom--using a hybrid game-based proof. We have also proved that the mbedTLS implementation (C program) correctly implements this functional specification. That proof composes with an existing C compiler correctness proof to guarantee, end-to-end, that the machine language program gives strong pseudorandomness. All proofs (hybrid games, C program verification, compiler, and their composition) are machine-checked in the Coq proof assistant. Our proofs are modular: the hybrid game proof holds on any implementation of HMAC-DRBG that satisfies our functional specification. Therefore, our functional specification can serve as a high-assurance reference.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133974", "https://www.cs.cmu.edu/~kqy/resources/HMAC_DRBG_CCS17.pdf", "https://arxiv.org/pdf/1708.08542v1.pdf", "http://www.cs.princeton.edu/~appel/papers/verified-hmac-drbg.pdf", "https://www.cs.cmu.edu/~kqy/resources/Verified-HMAC-DRBG.pdf", "http://arxiv.org/abs/1708.08542" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0450987faf2baf11df986a6bf6d477c6ce4e9d93", "sources": [ "DBLP" ], "title": "Verified Correctness and Security of mbedTLS HMAC-DRBG", "venue": "CCS", "year": 2017 }, "045ed48ce9ab08cd8d89995ec6f61655be37f827": { "authors": [ { "ids": [ "3175379" ], "name": "Friedrich Steimann" }, { "ids": [ "30468313" ], "name": "Marcus Frenkel" }, { "ids": [ "1733990" ], "name": "Markus V\u00f6lter" } ], "doi": "10.1145/3136014.3136034", "doiUrl": "https://doi.org/10.1145/3136014.3136034", "entities": [ "Conformance testing", "Metamodeling", "Programmer", "Structure editor", "Whole Earth 'Lectronic Link" ], "id": "045ed48ce9ab08cd8d89995ec6f61655be37f827", "inCitations": [], "journalName": "", "journalPages": "79-90", "journalVolume": "", "outCitations": [ "188426f3339b555dda2740ae59a1b9f8a0af17c8", "07616ac5872d581b93a82bbcfce1760812fbe506", "83c349c4f10fedd52179e6c55b5ffb97c125ab39", "3fa29135a4382a9f2378dc1cb75aa34f84113116", "52baaf6d23c305fffaff4a82f4943c0395a36e05", "486639cf03fc0e08fc876e992647501fca611890", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "895cc0b69c42290d1164cc25403e7c4e70db23c8", "c19e228c99443d508d4d799dbeec7b056441bf18", "c957b66fc39e88546fc08c6cfae782c6c7cb6796", "0083d6b5c9d4b18c452b453cda36c00bbb985252", "0467a3b0e4afca0712b42f6e96cd879e2b274522", "a7f2702767e16b03a860d50f19fefd709c695f80", "b82d1190197f0f7f513974808ed6913714c3dd80", "4b697347a0c3bb777507afb2f16dde238e71df10", "df473e1cbce6bbd51e2a0ba88fdafd7b1270b54c", "c4e62cf795d5c5e34ba5e1ed3f511d74e9161fcb", "b60f8b0b67a321defb3ac511bbfd8afb53b929f7", "8ea209607f0febfd98ae4050fad1c6a15f04f923", "2793a6772afdfeac0c80f3f2c1834c930dcd4abd", "37db876b401b5578765e376be6bed962ca4e63ba", "034cb3e63c4a9a47e450137ded08da1d301e81cf", "7423cca7880932040c6d1c72c8524159510becda", "df88dc427082e216a632f7dff9813a1c94acbe5c", "54a18c597d5d72536f396673ebf5d4fc7649e671", "60c56908f5aeca7b641344390446cfc580a89dca", "b5558429b89d35dda86f0e5546d3b204a4b364ff", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "460ea25534755bce9517f615446efc9f6c508359" ], "paperAbstract": "While contemporary projectional editors make sure that the edited programs conform to the programming languageâ\u0080\u0099s metamodel, they do not enforce that they are also well-formed, that is, that they obey the well-formedness rules defined for the language. We show how, based on a constraint-based capture of well-formedness, projectional editors can be empowered to enforce well-formedness in much the same way they enforce conformance with the metamodel. The resulting robust edits may be more complex than ordinary, well-formedness breaking edits, and hence may require more user involvement; yet, maintaining well-formedness at all times ensures that necessary corrections of a program are linked to the edit that necessitated them, and that the projectional editorâ\u0080\u0099s services are never compromised by inconsistent programs. Robust projectional editing is not a straitjacket, however: If a programmer prefers to work without it, its constraint-based capture of well-formedness will still catch all introduced errors â\u0080\u0094 unlike many other editor services, well-formedness checking and robust editing are based on the same implementation, and are hence guaranteed to behave consistently.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136034" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/045ed48ce9ab08cd8d89995ec6f61655be37f827", "sources": [ "DBLP" ], "title": "Robust projectional editing", "venue": "SLE", "year": 2017 }, "0460cd7935008dff7178a69b96bff952110bb6ad": { "authors": [ { "ids": [ "1775185" ], "name": "Kubilay Atasu" }, { "ids": [ "3023587" ], "name": "Thomas P. Parnell" }, { "ids": [ "3372321" ], "name": "Celestine D\u00fcnner" }, { "ids": [ "1699100" ], "name": "Michail Vlachos" }, { "ids": [ "2004009" ], "name": "Haralampos Pozidis" } ], "doi": "10.1109/ICPP.2017.46", "doiUrl": "https://doi.org/10.1109/ICPP.2017.46", "entities": [ "Algorithm", "Analysis of algorithms", "Central processing unit", "Computer cluster", "Graphics processing unit", "Load balancing (computing)", "Machine learning", "Non-negative matrix factorization", "Recommender system" ], "id": "0460cd7935008dff7178a69b96bff952110bb6ad", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "372-381", "journalVolume": "", "outCitations": [ "21613f3a8ed001065023064befcaf7447268b45d", "8c6d84f60c953eecafa20b5989b3f697ac10cbf9", "ce4e206cbe1aaef0c2381d2fc62ab147c42c40db", "6676fbd502bf19fbe751eafafd25be7370216e7e", "8e39f9ec05e058e727ece3067abb541f65c6b11e", "9aa88a8a354f1d322e242376d27d0474e50252f8", "ae89b64bb83848d6deb54bb5f91161afa6e5c935", "672b341f7373feafc02ae3d8b3421d2777e32be1", "c7e8886244d505714897375b2146cebcf72863c8", "9eea2012abde8f692982e85236c24b2aba29e73b", "0c07d26f82f3c84371bfd18f8327ce0a2d00da81", "5d06f630188a5ec9c05c4961eddbf9f24e2e6916", "ddce2f41414d35592dda0d12ea33bfac29fe983f", "87f8d6d13c30b5abeb260f8375f325bcb7dd965f", "876014931b26abf9b87a911d394d25beab674bbe", "327fb5ed6d2057c98c485e3c7ff7c55e87095e50", "6b3e40a330650eaf1fbcdd15b6ef8d4acc49b245", "f0ce902c99bf26197c86921f0d2b5effab83748e", "749521b5fc9791c242ac3acb26d0db64499ec2fe", "2f85322125e793057933cd21e7e7ba238fbd8154" ], "paperAbstract": "Recommender systems are becoming the crystal ball of the Internet because they can anticipate what the users may want, even before the users know they want it. However, the machine-learning algorithms typically involved in the training of such systems can be computationally expensive, and often may require several days for retraining. Here, we present a distributed approach for load-balancing the training of a recommender system based on state-of-art non-negative matrix factorization principles. The approach can exploit the presence of a cluster of mixed CPUs and GPUs, and results in a 466-fold performance improvement compared with the serial CPU implementation, and a 15-fold performance improvement compared with the best previously reported results for the popular Netflix data set.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0460cd7935008dff7178a69b96bff952110bb6ad", "sources": [ "DBLP" ], "title": "High-Performance Recommender System Training Using Co-Clustering on CPU/GPU Clusters", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "0467df50312601c78569a82da5a39344351da983": { "authors": [ { "ids": [ "31606525" ], "name": "Yu Shi" }, { "ids": [ "19277495" ], "name": "Po-Wei Chan" }, { "ids": [ "39371343" ], "name": "Honglei Zhuang" }, { "ids": [ "2286096" ], "name": "Huan Gui" }, { "ids": [ "1722175" ], "name": "Jiawei Han" } ], "doi": "10.1145/3097983.3097990", "doiUrl": "https://doi.org/10.1145/3097983.3097990", "entities": [ "Algorithm", "Experiment", "Generative model", "PowerPC Reference Platform", "Programming paradigm", "Relevance", "Text mining", "Type system" ], "id": "0467df50312601c78569a82da5a39344351da983", "inCitations": [ "873bb1d992e55afca552e27d9c58afd329220c7f" ], "journalName": "", "journalPages": "425-434", "journalVolume": "", "outCitations": [ "221b59a2a6bc19302bac89abaf42c531f4dc4cf8", "054ba27fe5cc6085d20ea2707de886db6865dbed", "40150a1f67a5ebfa78a9ac99f998b39fa34bc9ba", "1c96cdb6bc0029b8ca4cd578aca5e939b359e578", "86979d8b19914284d376e1981319b762702797b4", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "d822f61bc8b6a6625e939a2445ba74180f53c829", "13c40b32b9f35c8d24a5c00ec16a88382aaf07fe", "71413d837edfb2e6d7f4685476158bcc70cf9d9d", "2cbe0ba73d02aabbeefedf841203219796a551b7", "1970a644bc8a9fa7340f04785f8b19e9d33778e1", "63d440eb606c7aa4ee3c7fcd94d65af3f5c92c96", "7f2968ecdf3bb966fbbb605a4f24733c22937fab", "1871ea4cf23441d0297c99d9115f664a6ba0efda", "5abf1c0ff7dc9157aedd9dfa021f8d3dcc647d9b", "20dc1890ca65e01856f31edf10126c2ad67e9d04", "02e0bc77460469aefec5bd794ee6c4efc15e6adb", "a4e3b552fda3d6dab61ef6ddb75944bfd38248ab", "65f8e3d819786754fecc6085ee5ded94c7c0b142", "009dbf3187862352aac542bf7d61e27bce6b27f5", "1c8c9a7713395e9a176c42e49bc80574a013f89f", "0d1e2c2ae657895c7532ed26e3f09f140ad84afb" ], "paperAbstract": "As a powerful representation paradigm for networked and multi-typed data, the heterogeneous information network (HIN) is ubiquitous. Meanwhile, defining proper relevance measures has always been a fundamental problem and of great pragmatic importance for network mining tasks. Inspired by our probabilistic interpretation of existing path-based relevance measures, we propose to study HIN relevance from a probabilistic perspective. We also identify, from real-world data, and propose to model cross-meta-path synergy, which is a characteristic important for defining path-based HIN relevance and has not been modeled by existing methods. A generative model is established to derive a novel path-based relevance measure, which is data-driven and tailored for each HIN. We develop an inference algorithm to find the maximum a posteriori (MAP) estimate of the model parameters, which entails non-trivial tricks. Experiments on two real-world datasets demonstrate the effectiveness of the proposed model and relevance measure.", "pdfUrls": [ "http://yushi2.web.engr.illinois.edu/kdd17.pdf", "https://arxiv.org/pdf/1706.01177v1.pdf", "http://shichuan.org/hin/topic/Similarity%20Measure/2017.%20KDD2017%20PReP%20Path-Based%20Relevance%20from%20a%20Probabilistic%20Perspective%20in%20Heterogeneous%20Information%20Networks.pdf", "http://yushi2.web.engr.illinois.edu/kdd17_slides.pdf", "http://arxiv.org/abs/1706.01177", "http://hanj.cs.illinois.edu/pdf/kdd17_yshi.pdf", "http://doi.acm.org/10.1145/3097983.3097990" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0467df50312601c78569a82da5a39344351da983", "sources": [ "DBLP" ], "title": "PReP: Path-Based Relevance from a Probabilistic Perspective in Heterogeneous Information Networks", "venue": "KDD", "year": 2017 }, "046a2418b45d8e4db8b42bc55f36f2f9331645b1": { "authors": [ { "ids": [ "39322919" ], "name": "Ravindra Babu Ganapathi" }, { "ids": [ "27099044" ], "name": "Aravind Gopalakrishnan" }, { "ids": [ "27021998" ], "name": "Russell W. McGuire" } ], "doi": "10.1109/HOTI.2017.12", "doiUrl": "https://doi.org/10.1109/HOTI.2017.12", "entities": [ "Algorithm", "Experiment", "Input/output", "Memory-mapped I/O", "Message Passing Interface", "Network switch", "Networking hardware", "Omni-Path", "Operating system", "PCI Express", "Partitioned global address space", "Processor affinity", "Programmer", "Programming model", "Selection algorithm", "Telephone number" ], "id": "046a2418b45d8e4db8b42bc55f36f2f9331645b1", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "80-86", "journalVolume": "", "outCitations": [ "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "bb65ddf274ef42e05da09bfb080a97d035876cff", "4f7d60be860184511a71f2e6475a3947adf45372", "5f464ae0f3faf0e80b3653a49011bd5ba233edfd", "adb98e9965ac4303b7457af34b2f352df359bdc1", "00a0ce824021313ee63c72e7bf05a4c708233cb0", "4f08b68a94563a247f092effbdd46281f82a6b9a", "9e4d3d6b74affd06329a7f72d647016868312728", "6a4b87b53654f1a63323c9ec294bcd9eb18e1bbc", "637074c9c400cddb4797e6ca353c35edd83f4c38", "7421d28428e041c271fe6370c331353f4a3fa974" ], "paperAbstract": "High Performance Computing(HPC) applications are highly optimized to maximize allocated resources for the job such as compute resources, memory and storage. Optimal performance for MPI applications requires the best possible affinity across all the allocated resources. Typically, setting process affinity to compute resources is well defined, i.e MPI processes on a compute node have processor affinity set for one to one mapping between MPI processes and the physical processing cores. Several well defined methods exist to efficiently map MPI processes to a compute node. With the growing complexity of HPC systems, platforms are designed with complex compute and I/O subsystems. Capacity of I/O devices attached to a node are expanded with PCIe switches resulting in large numbers of PCIe endpoint devices. With a lot of heterogeneity in systems, applications programmers are forced to think harder about affinitizing processesas it affects performance based on not only compute but also NUMA placement of IO devices. Mapping of process to processor cores and the closest IO device(s) is not straightforward. While operating systems do a reasonable job of trying to keep a process physically located near the processor core(s) and memory, they lack the application developer's knowledge of process workflow and optimal IO resource allocation when more than one IO device is connected to the compute node.In this paper we look at ways to assuage the problems of affinity choices by abstracting the device selection algorithm from MPI application layer. MPI continues to be the dominant programming model for HPC and hence our focus in this paper is limited to providing a solution for MPI based applications. Our solution can be extended to other HPC programming modelssuch as Partitioned Global Address Space(PGAS) or a hybrid MPI and PGAS based applications. We propose a solution to solve NUMA effects at the MPI runtime level independent of MPI applications. Our experiments are conducted on a two node system where each node consists of two socket Intel® Xeon® servers, attached with up to four Intel® Omni-Path fabric devices connected over PCIe. The performance benefits seen by MPI applications by affinitizing MPI processes with best possible network device is evident from the results where we notice up to 40% improvement in uni-directional bandwidth, 48% bi-directional bandwidth, 32% improvement in latency measurements and finally up to 40% improvement in message rate.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/046a2418b45d8e4db8b42bc55f36f2f9331645b1", "sources": [ "DBLP" ], "title": "MPI Process and Network Device Affinitization for Optimal HPC Application Performance", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "049a1ac1c022f7b800b185d313239ccb9b60bfed": { "authors": [ { "ids": [ "3448436" ], "name": "Stephen Roberts" }, { "ids": [ "40431102" ], "name": "Steven A. Wright" }, { "ids": [ "3450002" ], "name": "Suhaib A. Fahmy" }, { "ids": [ "1690561" ], "name": "Stephen A. Jarvis" } ], "doi": "10.1007/978-3-319-58667-0_22", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_22", "entities": [ "Computational science", "Experiment", "Limiter", "Mathematical optimization", "Program optimization", "Requirement" ], "id": "049a1ac1c022f7b800b185d313239ccb9b60bfed", "inCitations": [], "journalName": "", "journalPages": "413-430", "journalVolume": "", "outCitations": [ "429d28998216da5648f40248bf4bc9e508edd2fd", "08db6c20a034bfdb119e2eb3a049cccccb7e1fc0", "260d0adfad93dfd02c7a945dee48c60f8fb938e1", "8c12c4ff57e1992d1e3a926a2e75d3d4d9279c96", "3d044d4f708b8803ca2323ede66ba5f303ac1fba", "54cb190135885898a5fd780253ff14a821ff71cf", "b1479a44735a4d93a99c3c1572acc6b752046c04", "56e4263251aa8d1888ca5840e0bf187af043f49c", "3fdd691621d41ddabdd225878536e4c75223971d", "b83c318632affb34e58dbe847b26f491baf6653b", "28e34059176c36934de116e138dd53cf4ee1dff0", "db365cd0e6c42278fd091a1f3b710e443c2555a4", "9d5b5e0e1547f172c3f0d75a78aa7d2894590520", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "60f41423b70cd2ae454ae5802ad4e4927aeb9d6f", "751ee769767a70b6dc1ac2dc57e8957d28308686", "f4a91972bf1a05b195bce06a24dc33960bff1151", "52d4b916fe76401dc0477e4a00528103cdae4625", "6e0607664ee56a9c3404b9b5a570665b4d26a3a0", "e17e7bffaa7bdda0dcdec8eb4d200a13e4e156a4" ], "paperAbstract": "Energy consumption is rapidly becoming a limiting factor in scientific computing. As a result, hardware manufacturers increasingly prioritise energy efficiency in their processor designs. Performance engineers are also beginning to explore software optimisation and hardware/software co-design as a means to reduce energy consumption. Energy efficiency metrics developed by the hardware community are often re-purposed to guide these software optimisation efforts. In this paper we argue that established metrics, and in particular those in the Energy Delay Product (Et) family, are unsuitable for energy-aware software optimisation. A good metric should provide meaningful values for a single experiment, allow fair comparison between experiments, and drive optimisation in a sensible direction. We show that Et metrics are unable to fulfil these basic requirements and present suitable alternatives for guiding energy-aware software optimisation. We finish with a practical demonstration of the utility of our proposed metrics.", "pdfUrls": [ "http://wrap.warwick.ac.uk/87287/13/WRAP-metrics-energy-aware-software-optimisation-Roberts-2017.pdf", "https://doi.org/10.1007/978-3-319-58667-0_22", "http://wrap.warwick.ac.uk/87287/7/WRAP_Wright_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/dc80/59bb1d5aa7745b86cd7e3441d60c516b0b86.pdf", "s2Url": "https://semanticscholar.org/paper/049a1ac1c022f7b800b185d313239ccb9b60bfed", "sources": [ "DBLP" ], "title": "Metrics for Energy-Aware Software Optimisation", "venue": "ISC", "year": 2017 }, "04a1566de1fab63e4d5f4c3de2444189b2fffe3c": { "authors": [ { "ids": [ "34825854" ], "name": "Haitao Yuan" }, { "ids": [ "2344116" ], "name": "Jing Bi" }, { "ids": [ "2424969" ], "name": "Jia Zhang" }, { "ids": [ "2700163" ], "name": "Wei Tan" }, { "ids": [ "1746012" ], "name": "Keman Huang" } ], "doi": "10.1109/CLOUD.2017.12", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.12", "entities": [ "Data center", "Expectation\u2013maximization algorithm", "OpenVMS", "Revenue sharing", "Routing", "Scheduling (computing)", "Software-defined networking", "Virtual machine" ], "id": "04a1566de1fab63e4d5f4c3de2444189b2fffe3c", "inCitations": [ "625115a88a1676e8319ec38ad309b1cd4646829f" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "18-25", "journalVolume": "", "outCitations": [ "5617533e3b9b0602ca20f5eafdf8168ad149f328", "4329fab4771dd4cf50694804d4bafca8f40dbbab", "49fc32e00629c062279d8347a6189fb751fe0b11", "2aea898e1ffa6704561dca05c5bf90b29d0d2b7c", "e3a0e3cc6ec566981abe53bf91347a503cd7090a", "2ec4022eb4b34617e5281331d90a242e60a2a28b", "411f70a6a3bf0ad6efc3c06546de41aaada4aec4", "bca2c862d05a00c471221da78e44a759499f4f79", "d47bca3f9f8c86506ba842533990c2d6f2b91658", "1f3497afec11f21b60b615513353157b2891b76b", "27109339ffd9b93cce0f885ab6cd26014fd79f1c", "5020fc196002bc22b90d14459fa331ae321f3c8b", "0394a93e0bae94671ca18d9ecc35e4c250309bf4", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "21616ede4fbcf03484d653901d0a051a05cde223", "e90cf0903a00a575dbb46089186b06ae4af3699d", "9ce279778579fc150e4ddc72ea30706f1bb38a94", "eb66fa5d216adb3a1eb59395386a83ddf00d1212", "cc2b975d42bfffa7f534d2a9ad574214064c90da", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "134aff4476875a386cefa81c57b18e9897ab3e3f", "1c4887952889c95973e017409f1d39587636532a" ], "paperAbstract": "Nowadays many companies and organizations choose to deploy their applications in data centers to leverage resource sharing. The increase in tasks of multiple applications, however, makes it challenging for a data center provider to maximize its revenue by intelligently scheduling tasks in software-defined networking (SDN)-enabled data centers. Existing SDN controllers only reduce network latency while ignoring virtual machine (VM) latency, thus may lead to revenue loss. In the context of SDN-enabled data centers, this paper presents a workload-aware revenue maximization (WARM) approach to maximize the revenue from a data center provider's perspective. The core idea is to jointly consider the optimal combination of VMs and routing paths for tasks of each application. Comparing with state-of-the-art methods, the experimental results show that WARM yields the best schedules that not only increase the revenue but also reduce the round-trip time of tasks of all applications.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04a1566de1fab63e4d5f4c3de2444189b2fffe3c", "sources": [ "DBLP" ], "title": "Workload-Aware Revenue Maximization in SDN-Enabled Data Center", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "04aed47b71fd3a07a9a9d04ec9c9429a242299a6": { "authors": [ { "ids": [ "2950127" ], "name": "Xi He" }, { "ids": [ "2357165" ], "name": "Ashwin Machanavajjhala" }, { "ids": [ "24440615" ], "name": "Cheryl J. Flynn" }, { "ids": [ "1704011" ], "name": "Divesh Srivastava" } ], "doi": "10.1145/3133956.3134030", "doiUrl": "https://doi.org/10.1145/3133956.3134030", "entities": [ "Algorithm", "Angular defect", "Computation", "Database", "Differential privacy", "Distributed Proofreaders", "End-to-end encryption", "End-to-end principle", "Linkage (software)", "Pattern Recognition Letters", "Privacy", "Secure multi-party computation", "Virtual private network" ], "id": "04aed47b71fd3a07a9a9d04ec9c9429a242299a6", "inCitations": [], "journalName": "", "journalPages": "1389-1406", "journalVolume": "", "outCitations": [ "332c3587b73140517993d478db923357e2144531", "b5065bb03ee4ccba956488ac49431db915e0d9e4", "684cbdc64df41f30e0f6ba4f9b442285519f605b", "36a3904938efc22427af7baf1a5655a75c35afd3", "38ccbd4097a16f062f8dbd4095e4873a95f387f5", "33ac1d455b62b96f189579b99bd734f987598b38", "185c811f94c6526c50dcf3da0aff78fe032a27f7", "0761ae9a1884c8e2a168845e155e114c0fe828a8", "02b8e2b9301f83005f0b284fec7ee2468ffc2cba", "3713b16c8ef53f6acf2374a9e73e46b057e365f0", "1dabd2515b0f87e077c3d78979b5d57eb5ebfc84", "09378d09d4026c21c8c80f291f4afa3bcb4956ff", "03c1711090d76cc9163e238686786a71c028377e", "8392fa13e5013073b617e947b0229bf1734990ac", "d1b82ba92fa74343d3c743cd1d12411195f52a1d", "9007444b29df5383aec43e6a68322d2ae26bd216", "0dcaa37bbdd620f25ce459529796d00d912c242c", "cf64ed742ab694d8a0ebed6c96a6f8709b9e8705", "663f4f32b643376f36b5bcbee65cc32cb9f11de4", "323174074f4353a7b9d6a92eb45959ef862f347c", "4f553ee2246dd617d89c487f260d77388177e1c4", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "913a6223e266297d34bbf63d08e6e7ed9f01de5b", "2b926454d20a57d57befcec245917d1614a4c3d2", "6223684e14778e4d7948e994d2169ebf38e0a95f", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "a9d07270be6e48448ef17b348f3455d76ea1d68f", "1c799eca7983c62f7815ac5f41787b3e552567b6", "b1a25851dac53b6b0bc3238564a2bea5b0f57bf6", "9407fda128b185bdb0ced615ad8107381b831071", "b532099ff8b67049f292cd62700dca37fc2be623" ], "paperAbstract": "Private record linkage (PRL) is the problem of identifying pairs of records that are similar as per an input matching rule from databases held by two parties that do not trust one another. We identify three key desiderata that a PRL solution must ensure: (1) perfect precision and high recall of matching pairs, (2) a proof of end-to-end privacy, and (3) communication and computational costs that scale subquadratically in the number of input records. We show that all of the existing solutions for PRL? including secure 2-party computation (S2PC), and their variants that use non-private or differentially private (DP) blocking to ensure subquadratic cost -- violate at least one of the three desiderata. In particular, S2PC techniques guarantee end-to-end privacy but have either low recall or quadratic cost. In contrast, no end-to-end privacy guarantee has been formalized for solutions that achieve subquadratic cost. This is true even for solutions that compose DP and S2PC: DP does not permit the release of any exact information about the databases, while S2PC algorithms for PRL allow the release of matching records.\n In light of this deficiency, we propose a novel privacy model, called output constrained differential privacy, that shares the strong privacy protection of DP, but allows for the truthful release of the output of a certain function applied to the data. We apply this to PRL, and show that protocols satisfying this privacy model permit the disclosure of the true matching records, but their execution is insensitive to the presence or absence of a single non-matching record. We find that prior work that combine DP and S2PC techniques even fail to satisfy this end-to-end privacy model. Hence, we develop novel protocols that provably achieve this end-to-end privacy guarantee, together with the other two desiderata of PRL. Our empirical evaluation also shows that our protocols obtain high recall, scale near linearly in the size of the input databases and the output set of matching pairs, and have communication and computational costs that are at least 2 orders of magnitude smaller than S2PC baselines.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134030", "https://arxiv.org/pdf/1702.00535v1.pdf", "http://www.research.att.com/export/sites/att_labs/techdocs/TD_101822.pdf", "https://arxiv.org/pdf/1702.00535v3.pdf", "https://arxiv.org/pdf/1702.00535v4.pdf", "https://arxiv.org/pdf/1702.00535v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04aed47b71fd3a07a9a9d04ec9c9429a242299a6", "sources": [ "DBLP" ], "title": "Composing Differential Privacy and Secure Computation: A Case Study on Scaling Private Record Linkage", "venue": "CCS", "year": 2017 }, "04cceeb42618da0ebc534afba74ddc366885e82a": { "authors": [ { "ids": [ "1679961" ], "name": "Chao Li" }, { "ids": [ "1724566" ], "name": "Balaji Palanisamy" } ], "doi": "10.1109/CLOUD.2017.13", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.13", "entities": [ "Adversary (cryptography)", "Big data", "Centralisation", "Cloud storage", "Computer data storage", "Denial-of-service attack", "Discrete Hartley transform", "Distributed computing", "Distributed hash table", "Emergence", "Encryption", "Hash table", "Information privacy", "Key (cryptography)", "Routing" ], "id": "04cceeb42618da0ebc534afba74ddc366885e82a", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "26-33", "journalVolume": "", "outCitations": [ "8125fa009f3eeba20464704a7324f92cfa3b83e4", "74879c098d285c5a5b08789bd737b2991fbea178", "8145d731f22373de07c0268a8f90689fbcabf3ae", "35516916cd8840566acc05d0226f711bee1b563b", "7769c3c2a3f03486d621125dede17b8b8adf397c", "0a76f32d90a4ed3036482511d351b6db12a34083", "c3651e31b54d74d593141e870f272d28a26da597", "035b44ba1dcfb780df810176059df8b027dbc922", "962777a87e23ad5135bd24d013d2109504e16fcf", "5f680c8ac6a80f02488237c949e95b7fc35cc8ef", "3ea6c9ef1884e8724b909b20aa5219406873d312", "2e42da64d50df21803ed7424041316675669dc9e", "abb9f5f32eafc414688e02f29abbf353c975992f", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "10473de8f36c463c48152fcd0e09d7e20d00e671", "6b47f482784022b5c3cd2aefde6d433d32f43746", "025ce0e02392fc24e9b15ad5444b67cd705a945d", "03e4f73474351a62abc9abf2fb17ec6277bb064e", "0368d2445d3ee4205ee73da933cb8b810a89091c", "4b49d374c9306b929743e7d213c28cd47fc2d4fc", "2280274c05d578b24205b1af0aebfa552d51c132" ], "paperAbstract": "In the age of Big Data, advances in distributed technologies and cloud storage services provide highly efficient and cost-effective solutions to large scale data storage and management. Supporting self-emerging data using clouds is a challenging problem. While straight-forward centralized approaches provide a basic solution to the problem, unfortunately they are limited to a single point of trust. Supporting attack-resilient timed release of encrypted data stored in clouds requires new mechanisms for self emergence of data encryption keys that enables encrypted data to become accessible at a future point in time. Prior to the release time, the encryption key remains undiscovered and unavailable in a secure distributed system, making the private data unavailable. In this paper, we propose Emerge, a self-emerging timed data release protocol for securely hiding data encryption keys of private encrypted data in a large-scale Distributed Hash Table (DHT) network that makes the data available and accessible only at the defined release time. We develop a suite of erasure-coding-based routing path construction schemes for securely storing and routing encryption keys in DHT networks that protect an adversary from inferring the encryption key prior to the release time (release-ahead attack) or from destroying the key altogether (drop attack). Through extensive experimental evaluation, we demonstrate that the proposed schemes are resilient to both release-ahead attack and drop attack as well as to attacks that arise due to traditional churn issues in DHT networks.", "pdfUrls": [ "http://d-scholarship.pitt.edu/32727/1/cloud-2017.pdf", "https://doi.org/10.1109/CLOUD.2017.13", "http://www.sis.pitt.edu/bpalan/papers/Emerge-cloud-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04cceeb42618da0ebc534afba74ddc366885e82a", "sources": [ "DBLP" ], "title": "Emerge: Self-Emerging Data Release Using Cloud Data Storage", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "04ce88ca257c62db651478d02afbfd04187b568d": { "authors": [ { "ids": [ "3224069" ], "name": "Xenofon Foukas" }, { "ids": [ "1712068" ], "name": "Mahesh K. Marina" }, { "ids": [ "1758370" ], "name": "Kimon P. Kontovasilis" } ], "doi": "10.1145/3117811.3117831", "doiUrl": "https://doi.org/10.1145/3117811.3117831", "entities": [ "Access network", "End-to-end principle", "Network architecture", "Over-the-top content", "Radio access network", "Requirement", "Software deployment" ], "id": "04ce88ca257c62db651478d02afbfd04187b568d", "inCitations": [ "1ca46f1ea039cd4167f8cce3de0e1f1a2af042e4" ], "journalName": "", "journalPages": "127-140", "journalVolume": "", "outCitations": [ "7c2e35c0298de36336f1533c8ce737c9a6e92b66", "23732086a0f61758e2d0a83cc1f08fa8940b9794", "57cfb44be82575569275dc58e887acbca4ad7fa8", "25e6612d7700c76a460ba3bfc55e463a11393acd", "a1d6a88bdceb70107c0be1a9599aa8190530af53", "8f078271d8bd6b9ff21818de0dc3b4294e5fac12", "98061392ea59145c415deafab37400f3e9ebac15", "151af2a0704c13a429db67ed7d9020c84ae89cba", "0b77241145e21a9ef804b9198372521100044cfd", "00a328fbab90c024f30c2237e6761ea801872750", "ea13ed9b14c2d49d1b32db9e1d4807bd6316c7ee", "a7ec60be77f1e06e73d3f2c3b0165de5ce97e3e7", "917365d6d19ab2495c2af068a49c1c3c73a117f3", "6800646e8de9b08e6a2174a927b50bb0e28fbb76", "0955cb0fde62c786985001e95d8de7b84ced604f", "f643179599c0b9ca8b817ff9c475cf4166821cc2", "3ec22dfebf1312740e2c59ce8cb8270627b0544c", "62fc9a3972bc3e82a4e75248105570446d30d64f", "3525a3688eef9dec048f2e15b7ac495abe15f208", "0d4c17bfb68d4d16364b3992ecaff0966affbe19", "4f28b348747d133b638e257c8215f8f9be5b2434", "3574657705475722b6c398c266805f758268778b", "24e13c33e8ac68f6eae9784052e8e1ee70feff98", "692f891e2dda71ab47a331f8ec2b0bafa9e5854d", "83f45dc4c48e9307dbceb80a187039300166f8d8", "643be8f5591c6cbdfc6828eb960ce1fab332d75b", "4888eae6bc2f50fea1cd2b6c5a5dc5fda9b49d0e", "64f3a81fff495ac336dccdd63136d451852eb1c9", "2486d901a041a8a3048d66ab4bf505cf19244a2e", "32e033c46c731d41cd24082618491d65ad5840d8", "58b5a4db6d88c355909fa251e63e05efad3b8b7e", "da71ca4877de496ed243e5b84373e2a426275e1b", "7954790a779742daf992e533cdaec23e7ffdf60a", "c924617a9feabc742bbd5f66bf2273b413e7c15c", "8ea67ce6e59151bfea56fff377b88fa7258641cd", "28f2ae875b37ce38d5e6c7209e6a705a39a53a47", "02c76f7d61f1ff47609a19f46aec3e6d0c8a9425", "58d92176f017f99f0ae623e34fa9303af77cd70b", "b941a2011b95d09e7858c2d910216cd9ac010f76", "236c0e3ee64a4dafcfc99a2cc4d9450698dfbf0a", "d3934ebdf84561799b869173607ef547089b67c7", "05f66ed2940454c397a279bcb80732f641e94d96", "859af9a05d8392507d44f13523a867b17c416377", "2f97728a8d357a3e4c3dd3bd7eeb68757e913604", "053dafac667c8c36e88cf141ca9a63695f6637c1" ], "paperAbstract": "Emerging 5G mobile networks are envisioned to become multi-service environments, enabling the dynamic deployment of services with a diverse set of performance requirements, accommodating the needs of mobile network operators, verticals and over-the-top (OTT) service providers. Virtualizing the mobile network in a flexible way is of paramount importance for a cost-effective realization of this vision. While virtualization has been extensively studied in the case of the mobile core, virtualizing the radio access network (RAN) is still at its infancy. In this paper, we present Orion, a novel RAN slicing system that enables the dynamic on-the-fly virtualization of base stations, the flexible customization of slices to meet their respective service needs and which can be used in an end-to-end network slicing setting. Orion guarantees the functional and performance isolation of slices, while allowing for the efficient use of RAN resources among them. We present a concrete prototype implementation of Orion for LTE, with experimental results, considering alternative RAN slicing approaches, indicating its efficiency and highlighting its isolation capabilities. We also present an extension to Orion for accommodating the needs of OTT providers.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/42138372/orion_final_version_2.pdf", "http://doi.acm.org/10.1145/3117811.3117831" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04ce88ca257c62db651478d02afbfd04187b568d", "sources": [ "DBLP" ], "title": "Orion: RAN Slicing for a Flexible and Cost-Effective Multi-Service Mobile Network Architecture", "venue": "MobiCom", "year": 2017 }, "04dbddadc9b5e947a18cedd5e757829d958483ad": { "authors": [ { "ids": [ "36816914" ], "name": "Peter Snyder" }, { "ids": [ "32873584" ], "name": "Periwinkle Doerfler" }, { "ids": [ "3110399" ], "name": "Chris Kanich" }, { "ids": [ "1703426" ], "name": "Damon McCoy" } ], "doi": "10.1145/3131365.3131385", "doiUrl": "https://doi.org/10.1145/3131365.3131385", "entities": [ "4chan", "Cyberstalking", "Doxing", "Information sensitivity", "Social network" ], "id": "04dbddadc9b5e947a18cedd5e757829d958483ad", "inCitations": [ "3b28b1fc9109e4d50535f332dfd42a2b1b296e70" ], "journalName": "", "journalPages": "432-444", "journalVolume": "", "outCitations": [ "09b54aa33fbc7b81b1c2b3c573de3c40c4db46e4", "713ae236445593ba194ad77a40042e0553179b4a", "4a7204431900338877c738c8f56b10a71a52e064", "039cfab52407e5bb3f3e0b16dfd99d7a72479d12", "14ddd0288f1b29bbb5b6a0166bd6a12cca7bec20", "e39b586e561b36a3b71fa3d9ee7cb15c35d84203", "9a4b30f220486992319e026c9a2f56b51956a922", "9ae976f82e11f7ecc3fd9f8d17a2744582cb22e8", "0f279b97e9e318b6db58da8da66a565505a0fab6", "12a3a703aa37d79ba296a83669257a25fcc86bf5", "716d50c5e55cbb1f9b3b3932ccef9ad4346f922e", "f53790142d5fa509591f29458d61642e29185b80", "ab0aab05dc001e75961f7ca138fd0be335be3223", "87e5931dee6988d95e950837c27a5d59c2536b40", "2aab45ffcd28f3945f2b3bda34887ccdd14adfc3", "52c3248151e1d1bee68eb1d9507bf4edcffff0bb", "4d1aaca480d3bef07f5a6686bfc8af6c3065baec", "b9895c47cc273d2cd78c9f6320da937497ad0351", "c9948f7213167d65db79b60381d01ea71d438f94" ], "paperAbstract": "Doxing is online abuse where a malicious party harms another by releasing identifying or sensitive information. Motivations for doxing include personal, competitive, and political reasons, and web users of all ages, genders and internet experience have been targeted. Existing research on doxing is primarily qualitative. This work improves our understanding of doxing by being the first to take a quantitative approach. We do so by designing and deploying a tool which can detect dox files and measure the frequency, content, targets, and effects of doxing on popular dox-posting sites.\n This work analyzes over 1.7 million text files posted to paste-bin.com, 4chan.org and 8ch.net, sites frequently used to share doxes online, over a combined period of approximately thirteen weeks. Notable findings in this work include that approximately 0.3% of shared files are doxes, that online social networking accounts mentioned in these dox files are more likely to close than typical accounts, that justice and revenge are the most often cited motivations for doxing, and that dox files target males more frequently than females.\n We also find that recent anti-abuse efforts by social networks have reduced how frequently these doxing victims closed or restricted their accounts after being attacked. We also propose mitigation steps, such a service that can inform people when their accounts have been shared in a dox file, or law enforcement notification tools to inform authorities when individuals are at heightened risk of abuse.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131385", "https://conferences.sigcomm.org/imc/2017/slides/IMC%202017.pdf", "https://www.cs.uic.edu/~psnyder/static/papers/fifteen-minutes.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final109.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04dbddadc9b5e947a18cedd5e757829d958483ad", "sources": [ "DBLP" ], "title": "Fifteen minutes of unwanted fame: detecting and characterizing doxing", "venue": "IMC", "year": 2017 }, "04f0650ab41af3aad8c0476a9b06826c55356b99": { "authors": [ { "ids": [ "40361678" ], "name": "Sidharth Kumar" }, { "ids": [ "1732339" ], "name": "Duong Hoang" }, { "ids": [ "8808472" ], "name": "Steve Petruzza" }, { "ids": [ "1870103" ], "name": "John Edwards" }, { "ids": [ "1685087" ], "name": "Valerio Pascucci" } ], "doi": "10.1109/HiPC.2017.00034", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00034", "entities": [ "Data aggregation", "Domain model", "Fastest", "Hierarchical database model", "Internet bottleneck", "Network congestion", "Overhead projector", "Simulation", "Span and div", "Speedup", "Supercomputer" ], "id": "04f0650ab41af3aad8c0476a9b06826c55356b99", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "223-232", "journalVolume": "", "outCitations": [ "176d712d084112b2e65e385e8220e4679c24f28a", "05e0dd9ba23f99acf5537b51f3a3263d3febe6dc", "2da4ab6c02d97fe47b589ddd450a5c41f2b47bb9", "18ff47d4024f9ba7fdb7c21c6c49ffbe9a6ed99f", "0b1d26613dc0bd12da5c3f9d637d5e8571621395", "57eb0364d4c545a077ec7d66a067b0426962dda2", "34e64b546a37df201ecb29cad0248df029a71adb", "409ed5839cf6d0ba246d91f82d1ac33cbe600c27", "dcba56ee1fa047e1c983336ecb4099dab46cd749", "bd3d50ea47c6073d1dcd0582e49e01c3df702b23", "59a902d3a87001aaf091752773e8b4679651499c", "8de993e56cc95df26171741d8f33ae3d83f3261d", "058224ac7b9bc0a0b82e62257656c7a6df62219e", "2680e43fff9b16200106702e0c5165685312d52d", "25d5f7757ebd0b7a5cde7bf64c83ad0020318f39", "547014986afdf86ced23cdcce4583ee04f464160", "c750b9288ed25777e5b7129139e01c143177324c", "a7c58954468de7113ac1e1588a3efa6683add7f3", "4468833a27f2641d15eee8335bc4263abc6d26ea", "36033cc275f927d2835bce6d19ec727fdb1a2fb4", "155f59e40f1ad2467e004dfcb4bb9ccf5522d1d1", "1f1ddb2c47b45653f759e69fabdbf21aab7656f9", "1103f46b77bed0f597e7289ee54073d5190853a9", "919f9b4c78e1af2ba4c343ec504bc6193709ed77", "c0c56908d343d52669e1aee072dd611681dc831f", "12e7574576be81bcc9827754ec1593ed3e75d14a" ], "paperAbstract": "Hierarchical data representations have been shown to be effective tools for coping with large-scale scientific data. Writing hierarchical data on supercomputers, however, is challenging as it often involves all-to-one communication during aggregation of low-resolution data which tends to span the entire network domain, resulting in several bottlenecks. We introduce the concept of indexing templates, which succinctly describe data organization and can be used to alter movement of data in beneficial ways. We present two techniques, domain partitioning and localized aggregation, that leverage indexing templates to alleviate congestion and synchronization overheads during data aggregation. We report experimental results that show significant I/O speedup using our proposed schemes on two of today's fastest supercomputers, Mira and Shaheen II, using the Uintah and S3D simulation frameworks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00034", "http://www.sci.utah.edu/publications/Kum2017a/HiPC_2017_IEEE.pdf", "http://www.cs.utah.edu/~sikumar/papers/HiPC_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/04f0650ab41af3aad8c0476a9b06826c55356b99", "sources": [ "DBLP" ], "title": "Reducing Network Congestion and Synchronization Overhead During Aggregation of Hierarchical Data", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "0518cb9b83f94125cc3e7e617dad996ec139ed53": { "authors": [ { "ids": [ "32051146" ], "name": "Amit A. Levy" }, { "ids": [ "4290484" ], "name": "Bradford Campbell" }, { "ids": [ "2260383" ], "name": "Branden Ghena" }, { "ids": [ "2234849" ], "name": "Daniel B. Giffin" }, { "ids": [ "3302895" ], "name": "Pat Pannuto" }, { "ids": [ "1735731" ], "name": "Prabal Dutta" }, { "ids": [ "1721681" ], "name": "Philip Levis" } ], "doi": "10.1145/3132747.3132786", "doiUrl": "https://doi.org/10.1145/3132747.3132786", "entities": [ "Computer multitasking", "Concurrency (computer science)", "Dependability", "Embedded system", "Fault detection and isolation", "Low-power broadcasting", "Memory management", "Memory protection", "Microcontroller", "Operating system", "Programming language", "Rust", "Type safety" ], "id": "0518cb9b83f94125cc3e7e617dad996ec139ed53", "inCitations": [ "090448627c52f2586816277cd97fc2c13b1e07c6", "5bb6dfc59e7206f9845ea7bf8ae3985a71b35318", "08002ccb91de1dc63dbfaa8c34f25cfffd68f6bc" ], "journalName": "", "journalPages": "234-251", "journalVolume": "", "outCitations": [ "139448e4bcf8e4a4c2563e2efc97af36e1753ee8", "2074c9bb69a75b6c83e3b9f842d444c6cf4da3e5", "16a455aeacd14529bee92b0c197619fa2d173151", "0d3453d2de7ff2acdd0f0b841f138228553edb6d", "2530079d98f216a88dd5d91be12a48c6e39d143e", "06a5e486828fc79018f3d2889d6475aeb2692523", "332e1f6b86760a02e17c0c98abc5b89bae9088a6", "a98d7af52fe5ba44a5c8b0a3dceb95109bc4c339", "57e6335af738f8051746c0d6af58bc0afb008c04", "11c6a5905966c437055dcf7f11ae80401a18d0dd", "642da8b5b22adac15a2613d4f813c0d2637e93d9", "0ce5f33c20d686f414b0d91665a73d2e5b1fbc78", "5d5190477e22977b3c286558bc5fe3a27ab375d3", "adc1da0f52501b60a28e79c9233e9bd23f308c24", "9b1485630ffaaa543acff16741343437cdaae08a", "066add40724f1022011ef4e17a39c7d66c88397c", "4f5b7cd8d57a314851f5daa395b6eb178ad582ed", "77f69dd05370d7829c7aeb8457df9b58751d9d80", "50eba68089cf51323d95631c2f59ff916848863f", "43fb7b102ea54ce51b6fcd42005698ae1399e25e", "15aaa56f06eca80760943e47f1781591209f2860", "b6741f7862be64a5435d2625ea46f0508b9d3fee", "94ac53e5e7e4eeda742c1df3c46c1edec9bea4a4", "0f091484790fc7a4807c3bf4d6019db63d1d4097", "33b85ea9b4fb28ac893167c29529d62d355c06a5", "09ccf9a6f0d890a156731db6899225eadea1df5d", "28ece685d21306453e257bd39d8d94bc142e81c5", "5e105f04394d59fd29f62d0c8f303011ce63805b", "58683a098aee33ed6755d1fc4b950127ddee969d", "2d09016fa20fbaf3bb2a419a93c14d4363bc4db3", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "0f04a0b658f00f329687d8ba94d9fca25269b4b7", "8af2c2056a8cbd6eba90cd4e4f9911a19d03d4cc" ], "paperAbstract": "Low-power microcontrollers lack some of the hardware features and memory resources that enable multiprogrammable systems. Accordingly, microcontroller-based operating systems have not provided important features like fault isolation, dynamic memory allocation, and flexible concurrency. However, an emerging class of embedded applications are software platforms, rather than single purpose devices, and need these multiprogramming features. Tock, a new operating system for low-power platforms, takes advantage of limited hardware-protection mechanisms as well as the type-safety features of the Rust programming language to provide a multiprogramming environment for microcontrollers. Tock isolates software faults, provides memory protection, and efficiently manages memory for dynamic application workloads written in any language. It achieves this while retaining the dependability requirements of long-running applications.", "pdfUrls": [ "https://lab11.eecs.umich.edu/content/pubs/levy17multiprogramming.pdf", "http://amitlevy.com/papers/tock-sosp2017.pdf", "https://sing.stanford.edu/site/publications/levy17-tock.pdf", "http://doi.acm.org/10.1145/3132747.3132786", "http://web.eecs.umich.edu/~prabal/pubs/papers/levy17tock.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0518cb9b83f94125cc3e7e617dad996ec139ed53", "sources": [ "DBLP" ], "title": "Multiprogramming a 64kB Computer Safely and Efficiently", "venue": "SOSP", "year": 2017 }, "051961468e3e7a3855eaff8ac9ec35e0235a4a38": { "authors": [ { "ids": [ "36816914" ], "name": "Peter Snyder" }, { "ids": [ "38044867" ], "name": "Cynthia Taylor" }, { "ids": [ "3110399" ], "name": "Chris Kanich" } ], "doi": "10.1145/3133956.3133966", "doiUrl": "https://doi.org/10.1145/3133956.3133966", "entities": [ "Application programming interface", "Browser extension", "Browser security", "Hypermedia", "Privacy", "Source lines of code", "Web API", "Web application" ], "id": "051961468e3e7a3855eaff8ac9ec35e0235a4a38", "inCitations": [ "e766cb4ebdaaadb6e1d4c9022bedbc4100f91506" ], "journalName": "", "journalPages": "179-194", "journalVolume": "", "outCitations": [ "40d41c8c94e71d76ad84bc2a7154800cb2693fdd", "89d1633f0019ff2d561132a29fa5a9ab549fa8bd", "38528fada34bad059cbbe1e424f12497ba6f8bb8", "9b5f6b2b56a698a8d56dd3d7847d0821daf18bca", "c274145195b73ea6b6f57f7bc4a88460dbacf045", "18c48a28a0d97496651e8c966b5dbc3983a15b28", "284f2732a9ce5507d171a0821f6c2e7264021ed8", "01dbc5466cce6abd567cc5b34a481f5c438fb15a", "fe2f4faec5cf209ae7d8a73100db9cce46ce53d4", "9a3c791067911d17a79918b1b0b5826beaeb2fe1", "b3dc76e3478f97b2c5bced80f4ebaa587f146b53", "aeb3fc01c0c834b9d64c3cf5c3a1e0e499326dbb", "282df29b34f3fde19480c39daf7b44bf703b4649", "9a2934caacf51e28030b9c60cfd4671ddeb4128e", "a329abfdd35fb908cbf35d2a26327f704a4f7a17", "66a6b8b5086454d2f511089ed3c157075239eb7d", "268be1a8339965aa7cfaa5fe113ed34fe1b7be16", "48b5dd4b43e403a17c3a94688efa666b554b8882", "0d939c3826455ca42310a92d5c00a956c4630b0e", "8db5fd6c8b016d3dfc3d2e8761ceb65e14cd2405", "598848aaa4aa40bb6b7ab51490821a173cf18800", "a155264f143aafd380f40fd0167c9b7960f64ea2", "06cd648c3b90aaa66305af9a41714aa5ded54dd8", "31c4320abb49b83f68b09ce355df708a3c3be363", "18c1a15663d568e865a639980c846f37708ebb09", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "0c0b65e6f9ff235b62b1dec87ab905c54fc13d96", "3188dc28042effbd519005ec18c07e7afa51c975", "0d2f693901fba451ede4d388724b0e3f57029cd3" ], "paperAbstract": "Modern web browsers have accrued an incredibly broad set of features since being invented for hypermedia dissemination in 1990. Many of these features benefit users by enabling new types of web applications. However, some features also bring risk to users' privacy and security, whether through implementation error, unexpected composition, or unintended use. Currently there is no general methodology for weighing these costs and benefits. Restricting access to only the features which are necessary for delivering desired functionality on a given website would allow users to enforce the principle of lease privilege on use of the myriad APIs present in the modern web browser.\n However, security benefits gained by increasing restrictions must be balanced against the risk of breaking existing websites. This work addresses this problem with a methodology for weighing the costs and benefits of giving websites default access to each browser feature. We model the benefit as the number of websites that require the feature for some user-visible benefit, and the cost as the number of CVEs, lines of code, and academic attacks related to the functionality. We then apply this methodology to 74 Web API standards implemented in modern browsers. We find that allowing websites default access to large parts of the Web API poses significant security and privacy risks, with little corresponding benefit.\n We also introduce a configurable browser extension that allows users to selectively restrict access to low-benefit, high-risk features on a per site basis. We evaluated our extension with two hardened browser configurations, and found that blocking 15 of the 74 standards avoids 52.0% of code paths related to previous CVEs, and 50.0% of implementation code identified by our metric, without affecting the functionality of 94.7% of measured websites.", "pdfUrls": [ "https://arxiv.org/pdf/1708.08510v1.pdf", "https://csaw.engineering.nyu.edu/application/files/9915/0825/7322/CSAW17_paper_42.pdf", "https://www.cs.uic.edu/~psnyder/static/papers/most-websites-don-t-need-to-vibrate.pdf", "https://arxiv.org/pdf/1708.08510v2.pdf", "http://arxiv.org/abs/1708.08510", "https://www.cs.uic.edu/~cynthiat/pubs/ccs17.pdf", "http://doi.acm.org/10.1145/3133956.3133966" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/051961468e3e7a3855eaff8ac9ec35e0235a4a38", "sources": [ "DBLP" ], "title": "Most Websites Don't Need to Vibrate: A Cost-Benefit Approach to Improving Browser Security", "venue": "CCS", "year": 2017 }, "05313e1290182beb7e06f6f527d144cf70e77cdb": { "authors": [ { "ids": [ "2185243" ], "name": "Mohammad Noormohammadpour" }, { "ids": [ "1756733" ], "name": "Cauligi S. Raghavendra" }, { "ids": [ "40282073" ], "name": "Sriram Rao" }, { "ids": [ "1741860" ], "name": "Srikanth Kandula" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Cloud computing", "Data center", "Experiment", "Load balancing (computing)", "Overhead (computing)", "Point-to-multipoint communication", "Simulation", "Telecommunications link", "The Coroner's Toolkit" ], "id": "05313e1290182beb7e06f6f527d144cf70e77cdb", "inCitations": [ "9bbd5be2829e49b1fac7f034baf7499cb069db95", "c73eb48e66fe4541bc16e9d75e4966d111acd830", "63bb7501faa99154efee231efeb294f3deccf70e", "890dae1eda8b9ba83912611128286cd762e8955e", "983153b0d5883ea42eb18ba5fe29b7fdc2418bfd" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1707.02096", "outCitations": [ "c92eb7d492ce6b4e471c33a2b2cf7ce9f30e4b55", "82cb824eb340c7b6e9230af4c2a22093393fea29", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "c2825bd36ccb5e231baad9fe329a299c12cea8e4", "d40fc1b77a453ed004a7ea0d0f4f31f1263165ca", "0e482d54f234766f0792707479dd8719f86cb17e", "098cb3139059c6c8b51da998a5df585d6552c475", "18f3787b4eaf0ca00ede2e783ba043b250116a41", "2de63b0c867b290d4f7217459c968aa98e5ad39d", "6428d17dc46e10e4e0458d606c4ba6b26106dd3b", "1edb070e3530f1a02ecd76f6621f7719d13b2109", "626434a07a56c0a127d122e8fb6b7c0d17f1c608", "8d8b8e90077f9906dff0e760dc51394863e462a5", "b49121b0834ba418a1926e91d85e29040a481f45", "190abe965d98de2e9dcd26e501fce2516acd8bab", "3470547c5d91da6e51e30626d3fc35c9bbc4d1e0", "6bb153f0decfe3ca5e4d13a4fe8472837d750788", "610ca25419e47a3e1b088e944277acadc2ecf6b5", "3d76026cdced10c764453d6b8f0a32fd074d1995", "233c7b2aa05ed9b5e18302bad6bf2425766a51f1", "085abcc5a0ec77b2560c1a34391401d06489e059", "0aabcfbbd125ca095a08292aeb56a6d281648615", "1c2122e6e140301f5d9e56f8bae476105bc01fcb", "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "4e046da90c233bcfc128921f65b7bd27df226330", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "7b1701e8d3d8636b7c9e1dd5d1b48e3ace62af5d", "9ec3c21756d88abd6ec4b2b50cf2f529564ebcb0", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "2e8a322666a89adf83e8e0e7cbc5142fba5e7b01", "065465ac37607a347186ea50873fc63d17cd2c79", "2053f512ab4fd5e5f0f08e3fbf64927a844ee2a5", "068e59b88a1230d709d99c83a45d3a5b91260810", "140e387b9268681e1379ecf4c5a6e21c96da8e5d", "908f7931de8768786d9ef7d64f5a8156860709dd", "22dba54ce93c528bb4d8ebeef7f0fcc9e9ae2e05", "4592090c7283a8e49ceddcdb0f9d87c1be1056c1", "65503e174262d82c8a03278fa576da23a4bcdf2b", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "1cafaac11664e48bd121695ac1be06b0930d00a5" ], "paperAbstract": "Using multiple datacenters allows for higher availability, load balancing and reduced latency to customers of cloud services. To distribute multiple copies of data, cloud providers depend on inter-datacenter WANs that ought to be used efficiently considering their limited capacity and the ever-increasing data demands. In this paper, we focus on applications that transfer objects from one datacenter to several datacenters over dedicated inter-datacenter networks. We present DCCast, a centralized Point to Multi-Point (P2MP) algorithm that uses forwarding trees to efficiently deliver an object from a source datacenter to required destination datacenters. With low computational overhead, DCCast selects forwarding trees that minimize bandwidth usage and balance load across all links. With simulation experiments on Google\u2019s GScale network, we show that DCCast can reduce total bandwidth usage and tail Transfer Completion Times (TCT) by up to 50% compared to delivering the same objects via independent point-to-point (P2P) transfers.", "pdfUrls": [ "https://arxiv.org/pdf/1707.02096v1.pdf", "http://arxiv.org/abs/1707.02096", "https://www.usenix.org/conference/hotcloud17/program/presentation/noormohammadpour", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-noormohammadpour.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0531/3e1290182beb7e06f6f527d144cf70e77cdb.pdf", "s2Url": "https://semanticscholar.org/paper/05313e1290182beb7e06f6f527d144cf70e77cdb", "sources": [ "DBLP" ], "title": "DCCast: Efficient Point to Multipoint Transfers Across Datacenters", "venue": "HotCloud", "year": 2017 }, "053dc612683a45783efd672f7a6803cce07372ef": { "authors": [ { "ids": [ "2038661" ], "name": "Bogdan Ghit" }, { "ids": [ "1776848" ], "name": "Dick H. J. Epema" } ], "doi": "10.1145/3078597.3078600", "doiUrl": "https://doi.org/10.1145/3078597.3078600", "entities": [ "Apache Hadoop", "Application checkpointing", "Failure rate", "Fault tolerance", "In-memory database", "Jumpstart Our Business Startups Act", "Load (computing)", "Simulation", "Spark" ], "id": "053dc612683a45783efd672f7a6803cce07372ef", "inCitations": [ "3105cd78fb5f9c62ccf0346e061579e2bcd130c6" ], "journalName": "", "journalPages": "105-116", "journalVolume": "", "outCitations": [ "18baeaa09ce028fb3044a89430a4939f270bc480", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "9c378565a0b510890b474df039caab1f2d58bded", "2997dfa7fea0c32c9438b7576c4509923fe8d457", "2870353565e86f26a3f1459a4d063467b609933a", "4bf59b4d21968de33020e78cd8f20306eac2c247", "f2f3f15dbf10cc68503713cfc77d13f274019d54", "045a50ec31973fee15ff967f18e016fae77fd1f3", "0d3533cc0aa6feed97c294a37cd06cd887d354d2", "5568df48a03cd16e286025c812f1912a7d1c1766", "aabf5c0907f2eaeed488127dcd5fd1b4149cae02", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "06230d13e276bd871a378ca932a41b5cff94e29f", "3aa0e9578b2d8a482e4720f4a5ff08f78b487a52", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "6906ffdd7d5448ed52f3a28c9092739e76c79691", "fb35b5bc1e02de4d9b31176c39247ee9ad6c3290", "3e257f01e3ee71545d824a1615c35659525b856a", "981058ba0c417be6377823bed3b204e6a85a61e6", "7e74ea151efcdcfecffdbeaec0728f9ac1f80389" ], "paperAbstract": "Providing fault-tolerance is of major importance for data analytics frameworks such as Hadoop and Spark, which are typically deployed in large clusters that are known to experience high failures rates. Unexpected events such as compute node failures are in particular an important challenge for in-memory data analytics frameworks, as the widely adopted approach to deal with them is to recompute work already done. Recomputing lost work, however, requires allocation of extra resource to re-execute tasks, thus increasing the job runtimes. To address this problem, we design a checkpointing system called Panda that is tailored to the intrinsic characteristics of data analytics frameworks. In particular, Panda employs fine-grained checkpointing at the level of task outputs and dynamically identifies tasks that are worthwhile to be checkpointed rather than be recomputed. As has been abundantly shown, tasks of data analytics jobs may have very variable runtimes and output sizes. These properties form the basis of three checkpointing policies which we incorporate into Panda.\n We first empirically evaluate Panda on a multicluster system with single data analytics applications under space-correlated failures, and find that Panda is close to the performance of a fail-free execution in unmodified Spark for a large range of concurrent failures. Then we perform simulations of complete workloads, mimicking the size and operation of a Google cluster, and show that Panda provides significant improvements in the average job runtime for wide ranges of the failure rate and system load.", "pdfUrls": [ "https://pure.tudelft.nl/portal/files/29539984/HPDC2017_Ghit_Epema.pdf", "http://pure.tudelft.nl/ws/files/29539984/HPDC2017_Ghit_Epema.pdf", "http://doi.acm.org/10.1145/3078597.3078600" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/053dc612683a45783efd672f7a6803cce07372ef", "sources": [ "DBLP" ], "title": "Better Safe than Sorry: Grappling with Failures of In-Memory Data Analytics Frameworks", "venue": "HPDC", "year": 2017 }, "053f06b15e59aaec4cbb4ae56694590f0206ed12": { "authors": [ { "ids": [ "1736809" ], "name": "Kartik Nayak" }, { "ids": [ "2012099" ], "name": "Christopher W. Fletcher" }, { "ids": [ "35584790" ], "name": "Ling Ren" }, { "ids": [ "1767573" ], "name": "Nishanth Chandran" }, { "ids": [ "1685076" ], "name": "Satyanarayana V. Lokam" }, { "ids": [ "1726246" ], "name": "Elaine Shi" }, { "ids": [ "1707396" ], "name": "Vipul Goyal" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Black box", "Computer simulation", "Context switch", "Cryptography", "Field-programmable gate array", "Gate array", "Hop", "Instruction scheduling", "Obfuscation (software)", "Oblivious ram", "Processor design", "Provable security", "Random-access memory", "Scheduling (computing)", "Scratchpad memory", "Simulation" ], "id": "053f06b15e59aaec4cbb4ae56694590f0206ed12", "inCitations": [ "2c74b71b0ef24c20fc959c7bd82fa82097187327", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9", "53f7a3697e3e5c620f5413b77e86488d7bf089a9", "4da6fa9a83e74d42e04a78d1da73716f64b21578", "7557e87f01563f1c37b771d6bce82ae69fa27343", "50f7cdbd2d99641dbbb6bb706e37b011508af28a", "210fadeda1d9f109e0fd333a2afdd1509f0f5d51" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3181b9ce21265bbf8175314714e1535f75b3d80f", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "3ca369fa2cadb403db7ac5e75deefd9acbb10723", "98cccb17fbefc01a6310574f25e591ab9d2586e2", "51e2b3d61d5af53ef9d8f3e5ae98d20bf9d4b084", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "ec9f42a034a35a8fb7b3be212dab1dad947b47d9", "3c4e907c07944cd55e800b4e55918adf8cb2a683", "8a41c198449d0f30de5427fe753c6b10bbb7255d", "21cbabb34e3004823005e8181044c65b20519d06", "20b63210954f7c5a70664f301dcd7196856ccfa7", "d7980c5ee4614b258f7326b05ccd5efa5cf391d8", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "4c7b9c8cd1057ff24cf3e14c3d8de22ecf49762f", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "12b06d1555b07926b5691aabd6308ef3b452f53a", "19f7caf88ba1e30eb85bdab58b092e46b1a054c0", "0101445aec81d2dec8562a83e656ac6ccd633ee2", "565ed53f4a40a98b18a389a3790a7fe62a525f58", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "42333e3f231bbfe508f6da6bad2feff9ae223113", "9af882e9b6002731eb93110e654dd413e43887c8", "2f9a90ce5c67e7601f5110f212d81176137517b7", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "2fe30c45b16da9cdbdbb4462c857a68f2f4dd54d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "8a37efc82e54353d387cfb073f9379c053988aef", "37e5f1f2415fbb7a5b24d9493ad5ac1086c4bd30", "7622200b9459a8c0e25e74ce7316c2402862e919", "5d0d384f9ba8b8e2fe0785be3e888c481114f811", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "386c5e8f9e2f289c5c1df458e9043c04475cfdc5", "4eb80db02471e09bc70baa83720fc21604cc6f58", "724787ea1a4fedd69e961b8ec1c352f4b77bb1b0", "0003c342fd0b3e48a483901bd3b731b974fc1f37", "b10cb04fd45f968d29ce0bdc17c4d29d12e05b67", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "3e565182551bf91cddd6676f1292fc1c601019e9", "078b855c40fefabd766a09f23280c59feef21634", "cdbef43013d6849806ff9be354eb51f7e42dcd74", "8f1247646e29e07dddbec698f281d06cee87acbe", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "fd78630a003dbb1a40d438d7326593e79a87ad95", "07ceee8590b93f1d0d19a163c597dc844c313f40", "2065450d96aca38c79cad5172b58660765533650", "1debdeea67b3e0825ea4bec811f299563645a24f", "9875abbef7a859c5a276dca9274e2a296d1998de", "059045328e385abafd145593b0d8067ea4e2ec99", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "7ebde6e808ddb9f29287f26718da4b4fd159f4bf" ], "paperAbstract": "Program obfuscation is a central primitive in cryptography, and has important real-world applications in protecting software from IP theft. However, well known results from the cryptographic literature have shown that software only virtual black box (VBB) obfuscation of general programs is impossible. In this paper we propose HOP, a system (with matching theoretic analysis) that achieves simulation-secure obfuscation for RAM programs, using secure hardware to circumvent previous impossibility results. To the best of our knowledge, HOP is the first implementation of a provably secure VBB obfuscation scheme in any model under any assumptions. HOP trusts only a hardware single-chip processor. We present a theoretical model for our complete hardware design and prove its security in the UC framework. Our goal is both provable security and practicality. To this end, our theoretic analysis accounts for all optimizations used in our practical design, including the use of a hardware Oblivious RAM (ORAM), hardware scratchpad memories, instruction scheduling techniques and context switching. We then detail a prototype hardware implementation of HOP. The complete design requires 72% of the area of a V7485t Field Programmable Gate Array (FPGA) chip. Evaluated on a variety of benchmarks, HOP achieves an overhead of 8\u00d7 \u223c 76\u00d7 relative to an insecure system. Compared to all prior (not implemented) work that strives to achieve obfuscation, HOP improves performance by more than three orders of magnitude. We view this as an important step towards deploying obfuscation technology in practice.", "pdfUrls": [ "https://www.internetsociety.org/sites/default/files/ndss2017_07-4_Nayak_paper.pdf", "http://www.cs.umd.edu/~kartik/papers/10_hop.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/hop-hardware-makes-obfuscation-practical/", "http://dimacs.rutgers.edu/Workshops/RAM/Slides/nayak.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/11/hop-slides.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fc2b/a97178e665c25b99381aac8bc6249a4ceed0.pdf", "s2Url": "https://semanticscholar.org/paper/053f06b15e59aaec4cbb4ae56694590f0206ed12", "sources": [ "DBLP" ], "title": "HOP: Hardware makes Obfuscation Practical", "venue": "NDSS", "year": 2017 }, "0550ea9c4fe35fe005cdbcf8b63ae18ae310960d": { "authors": [ { "ids": [ "3607439" ], "name": "Liming Dong" }, { "ids": [ "21618914" ], "name": "Weidong Liu" }, { "ids": [ "23213997" ], "name": "Renchuan Li" }, { "ids": [ "1690784" ], "name": "Tiejun Zhang" }, { "ids": [ "1752014" ], "name": "Weiguo Zhao" } ], "doi": "10.1007/978-3-319-64203-1_22", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_22", "entities": [ "Algorithm", "Branch and bound", "IBM Tivoli Storage Productivity Center", "Medoid", "Parallel database", "Partition (database)", "Plan", "Relational database management system", "SQL" ], "id": "0550ea9c4fe35fe005cdbcf8b63ae18ae310960d", "inCitations": [], "journalName": "", "journalPages": "303-316", "journalVolume": "", "outCitations": [ "1e557937f418accc13f9c5edb33a3d48259d80e5", "71c1a0fc681a7a62cdd1c6a533e5f581e2287781", "f010eef368d69d6ef80c473012aa83b49e7ee0e8", "9a11bbaf9af5ce7988386e6da8d6d3acb587f5ef", "d1c21c34936f587779c216ed79ca33883845caa1", "461cad26f0d3d2e76405e02791d3797c723b4d73", "8de7bef0ebfa65889fbb4751d09017d63a9cd3d9", "229467e56c6093cb1f5927f8ffeddd51ac012934", "006b89abc356c1c3bf2dfa35f47c0601c39dce38", "9fc1d0e4da751a09b49f5b0f7e61eb71d587c20f", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "347920406c9a9a3846adf485e2b864d4523a0652", "3112be4824d8e385d62e6c54a7da497d7b25e8ac" ], "paperAbstract": "In parallel database systems, data is partitioned and replicated across multiple independent nodes to improve system performance and increase robustness. In current practice of database partitioning design, all replicas are uniformly partitioned, however, different statements may prefer contradictory partitioning plans, so a single plan cannot achieve the overall optimal performance for the workload. In this paper, we propose a novel approach of replica-aware data partitioning design to address the contradictions. According to the access graph of SQL statements, we use the k -medoids algorithm to classify workload into statement clusters, then we use the branch-and-bound algorithm to search for the optimal partitioning plan for each cluster. Finally, we organize replicas with these plans, and route statements to their preferred replicas. We use TPC-E, TPC-H and National College and University Enrollment System (NACUES) to evaluate our approach. The evaluation results demonstrate that our approach improves system performance by up to 4x over the current practice of partitioning design.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0550ea9c4fe35fe005cdbcf8b63ae18ae310960d", "sources": [ "DBLP" ], "title": "Replica-Aware Partitioning Design in Parallel Database Systems", "venue": "Euro-Par", "year": 2017 }, "055359bf3807f067db7b3518540b719759df2388": { "authors": [ { "ids": [ "1690745" ], "name": "Johannes Hofmann" }, { "ids": [ "1694080" ], "name": "Georg Hager" }, { "ids": [ "1708441" ], "name": "Gerhard Wellein" }, { "ids": [ "1798887" ], "name": "Dietmar Fey" } ], "doi": "10.1007/978-3-319-58667-0_16", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_16", "entities": [ "Benchmark (computing)", "Broadwell (microarchitecture)", "CPU cache", "Clock rate", "Graph500", "HPCG benchmark", "Haswell (microarchitecture)", "Ivy Bridge (microarchitecture)", "Memory hierarchy", "Sandy Bridge", "Throughput", "Uncore", "snoop" ], "id": "055359bf3807f067db7b3518540b719759df2388", "inCitations": [ "43c75b376612800639bd9b23797690be44add6f6" ], "journalName": "", "journalPages": "294-314", "journalVolume": "", "outCitations": [ "044c1f0bcda1af4a5eb98074e46d847507a8384f", "8ca9b31b957a8bf45b27d9caeb93b91437d50571", "55512fc0be51166c06fbde0eda8c1e4cdccd298c", "67cf1189c859d66bac309f9438df434fb651f97a", "7c0c02245f6704a00800a43ecf7d87f0e977ff7e", "9849a9a60d05c79e5cb757ef784982744ebab679", "0f9080d297fc22dcf24dfd8ffcd3de5cea04c689", "7caf696ceedc1d47c1cb54b1f0fcbf6c67a44613", "2636777505e35452269dce101a6e4bc3577bccef", "a62f4f774b8d473211fc5fdd2d54841107d5940a", "d8d7a9c16c49b4456ef304f71ff91ee6e3039be6", "aba0621a287a1aa2161d577ba81281864f3fcf3d", "377175d109126aea51714e8ef0e4324d28eb6fcc", "c793f23187645bfbc7424645e8b5e306f354807a", "7f864cfdde0a6f92e90ac53a73079f4bea884d85", "561dcca4267f105e7308751ee73a9273810f8079", "092217c2267f6e0673590aa151d811e579ff7760" ], "paperAbstract": "This paper presents a survey of architectural features among four generations of Intel server processors (Sandy Bridge, Ivy Bridge, Haswell, and Broadwell) with a focus on performance with floating point workloads. Starting on the core level and going down the memory hierarchy we cover instruction throughput for floating-point instructions, L1 cache, address generation capabilities, core clock speed and its limitations, L2 and L3 cache bandwidth and latency, the impact of Cluster on Die (CoD) and cache snoop modes, and the Uncore clock speed. Using microbenchmarks we study the influence of these factors on code performance. This insight can then serve as input for analytic performance models. We show that the energy efficiency of the LINPACK and HPCG benchmarks can be improved considerably by tuning the Uncore clock speed without sacrificing performance, and that the Graph500 benchmark performance may profit from a suitable choice of cache snoop mode settings.", "pdfUrls": [ "https://arxiv.org/pdf/1702.07554v1.pdf", "http://arxiv.org/abs/1702.07554", "https://doi.org/10.1007/978-3-319-58667-0_16" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0553/59bf3807f067db7b3518540b719759df2388.pdf", "s2Url": "https://semanticscholar.org/paper/055359bf3807f067db7b3518540b719759df2388", "sources": [ "DBLP" ], "title": "An Analysis of Core- and Chip-Level Architectural Features in Four Generations of Intel Server Processors", "venue": "ISC", "year": 2017 }, "055c7900c4ccaa621ebed2d946510849b98ad6f1": { "authors": [ { "ids": [ "2484837" ], "name": "Debadatta Mishra" }, { "ids": [ "9516817" ], "name": "Prashanth" }, { "ids": [ "1749860" ], "name": "Purushottam Kulkarni" } ], "doi": "10.1145/3135974.3135992", "doiUrl": "https://doi.org/10.1145/3135974.3135992", "entities": [ "Algorithm", "Best, worst and average case", "Cache (computing)", "Centralisation", "Experiment", "Floor and ceiling functions", "Holism", "Hypervisor", "Memory management", "Non-volatile memory", "Operating system", "P2P caching", "Provisioning", "Requirement", "Solid-state drive", "Virtual machine", "Volatile memory", "Web container" ], "id": "055c7900c4ccaa621ebed2d946510849b98ad6f1", "inCitations": [], "journalName": "", "journalPages": "235-247", "journalVolume": "", "outCitations": [ "0935bb723e4071ccd4c2334d3b6d728faa111d11", "0b43a722d2ca43752750e4976f3056a006990143", "6111f1a9ab657910f5a11a95de117b3c5181565a", "a3de178c43b990b5755be4d640a7525f97ce2f33", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "5fe4eb1749a823469950456a123c77530e33ad73", "0ba9924ac38a425a9484dbc0a50cb71858ce416d", "9aa0d7253574e50fe3a190ccd924433f048997dd", "4fbdef00d80ed26ee01e4624c92465d4bea38aac", "86337138bb6dfabef8e1d45ec3c4e30d64c3ce36", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "11bbc477d14d1c945f203f1a83a530856a89d28f", "d55d9c3caff8131bc01468bc73b274da07c237f6", "28b212a7c0354aa8b866b9459aa64eac12c2b370", "876fe387dedd14c364bf9e41fcdc25c6dfc1ddc3", "170a81df3ff2076fe9a3f2fdee0755a7310c2c41", "07042865b10297ca4fc9164829d6330db2f60b4c", "4b7b45aa74d84f5b86ef3d8bc8bf460602e97d38", "24dc8d1de7e78ab100d2d83cbdf1390ddb9234c9", "03bf5d2bc45794e241f53aecf8880c26c712933d", "38fd918de20b0613cb07de8794fe6713d48f86d4", "1b6262f0533c202c1f140e60053ee3c72f216687", "984ab13ce54a8deadf0f30d00ee7b7951852da60", "2ee01ab9aca4163d391bd29c2123d9be44b0e986", "13a6d31c1cbefb36b6ceacd99f058bc96b8a4673", "43cf61960c85339deeeeeb2b75cdf9595565afa8", "3574657705475722b6c398c266805f758268778b", "47419c7d160fd05f9be712b876c292cb6241228d", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "3a03957218eda9094858087538e9668ab0db503b" ], "paperAbstract": "Derivative clouds, light weight application containers provisioned in virtual machines, are becoming viable and cost-effective options for infrastructure and software-based services. Ubiquitous dynamic memory management techniques in virtualized systems are centralized at the hypervisor and are ineffective in nested derivative cloud setups. In this paper, we highlight the challenges in management of memory resources in derivative cloud systems. Hypervisor caching, an enabler of centralized disk cache management, provides flexible memory or non-volatile memory management at the hypervisor to improve the resource usage efficiency and performance of applications. Existing hypervisor caching solutions have limited effectiveness in nested setups due to their nesting agnostic design, centralized management model and lack of holistic view of memory management. We propose DoubleDecker, a decentralized disk caching framework, realized through guest OS and hypervisor cooperation, with support for efficient memory management in derivative clouds. The DoubleDecker hypervisor caching framework, an integral part of our proposed solution, provides interfaces for differentiated cache partitioning and management in nested setups and is equipped to handle both memory and SSD based caching stores. We demonstrate the flexibility of DoubleDecker to handle dynamic and changing memory provisioning requirements and its capability to simultaneously provision memory across multiple levels. Such multi-level configurations cannot be explored by centralized designs and are a key feature of DoubleDecker. Our experimentation with DoubleDecker demonstrates that application performance can be consistently improved due to the flexible policy framework for disk caching. With our setup, we report an average performance improvement of 4x and a maximum of 11x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135992" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/055c7900c4ccaa621ebed2d946510849b98ad6f1", "sources": [ "DBLP" ], "title": "DoubleDecker: a cooperative disk caching framework for derivative clouds", "venue": "Middleware", "year": 2017 }, "05759b8b70b51f3fe0c302bf29c4f2d5315bcae5": { "authors": [ { "ids": [ "2910441" ], "name": "Aleksandar Prokopec" } ], "doi": "10.1007/978-3-319-64203-1_13", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_13", "entities": [ "Speculative execution" ], "id": "05759b8b70b51f3fe0c302bf29c4f2d5315bcae5", "inCitations": [], "journalName": "", "journalPages": "177-191", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05759b8b70b51f3fe0c302bf29c4f2d5315bcae5", "sources": [ "DBLP" ], "title": "Accelerating by Idling: How Speculative Delays Improve Performance of Message-Oriented Systems", "venue": "Euro-Par", "year": 2017 }, "0590d539e980c1b9dc33abb5c97e68cb6c39c3f9": { "authors": [ { "ids": [ "1782674" ], "name": "Bo Mao" }, { "ids": [ "9280383" ], "name": "Hong Jiang" }, { "ids": [ "8175008" ], "name": "Suzhen Wu" }, { "ids": [ "3154422" ], "name": "Yaodong Yang" }, { "ids": [ "1769287" ], "name": "Zaifa Xi" } ], "doi": "10.1109/IPDPS.2017.64", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.64", "entities": [ "Algorithm", "Data compression", "Error detection and correction", "Experiment", "Flash memory", "Memory-mapped I/O" ], "id": "0590d539e980c1b9dc33abb5c97e68cb6c39c3f9", "inCitations": [ "e087ed99f88d00c32cff2f9c3b7a8788594aec0d" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1109-1118", "journalVolume": "", "outCitations": [ "15bb9232bc05e27c114d12811a67330863958b9e", "d4e153d0ff33cb15cd6c13570599c6c36cc78db5", "31ee28ad7207eb9e3f558488786a888a42bbb907", "b220199029253cda0744b3b39a876ca007a5f12b", "e8524d6388505eede28665e36cd1b5da811ab50e", "33b36e79f7e82907b656177d31069a36efa6e6a9", "6f6252aaa0fc4bc9e35d6e7b4691a99ba49206a7", "27cc332571aa00e892d7e094a3ee7b9e44b12c75", "1029c647d5a3906bc1cada451bffea7e6da72ee3", "009d8914ca7ca1ec459f6c35a772f85c602eb052", "0c279813f1dba545c50c237f69b89c6496117015", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "32e43b28f78908314a77c23edd3f089363c2d2b2", "12d6da762b2a5d512d383f3b587bd30c23c3df97", "92bc53a3a28a2cc02e02d959c439c80fce1846f1", "d616765a381d0a43996a5b3ed33ae57e4f126918", "4749be82e7320b4c8b2f31ca38c4a939027cf1e4", "189aa2eaae5502e63ccca293d2c3dc1de1bfc8a1", "424a0f460b4f261b386787bdec37a2b01347a930", "75e74a0f013e9028c69df3addc0d161ef35d0c51", "9f83ef5f08ffcfc56ddd8ca67f7efd99aadfc94a", "403e4f2aa66e789ea8e01dc9b8b96d9fcdab4ae1", "e087ed99f88d00c32cff2f9c3b7a8788594aec0d", "581ddd1d36483f1c6fa66292bf85bf0eeff2efb6", "1820a34042d6371a9e20484b0c63b698eb522a6c", "0918495eb01aa8f6a3700fb37e5b781492d66920", "c07810339203898aeb485611e615f02f2fd22443", "70ce10f47aafa0994627a9575565b5c98af58d98", "1d05f17a575a4536f53645a099474cddf96c3c63", "d4e5801efdfb30ac9ca93096995fcb32c06f4e29", "070fe0f510d9c8335528b7103ea7fd81b62e4695" ], "paperAbstract": "Data compression has become a commodity feature for space efficiency and reliability in flash-based storage systems by reducing write traffic and space capacity demand. However, it introduces noticeable processing overheads on the critical I/O path, which degrades the system performance significantly. Existing data compression schemes for flash-based storage systems use fixed compression algorithms for all the incoming write data, failing to recognize and exploit the significant diversity in compressibility and access patterns of data and missing an opportunity to improve the system performance, the space efficiency or both. To achieve a reasonable trade-off between these two important design objectives, in this paper we introduce an Elastic Data Compression scheme, called EDC, which exploits the data compressibility and access intensity characteristics by judiciously matching data of different compressibility with different compression algorithms while leveraging the access idleness. Specifically, for compressible data blocks EDC exploits the compression diversity of the workload, and employs algorithms of higher compression rate in periods of lower system utilization and algorithms of lower compression rate in periods of higher system utilization. For non-compressible (or very lowly compressible) data blocks, it will write them through to the flash storage directly without any compression. The experiments conducted on our lightweight prototype implementation of the EDC system show that EDC saves storage space by up to 38.7%, with an average of 33.7%. In addition, it significantly outperforms the fixed compression schemes in the I/O performance measure by up to 61.4%, with an average of 36.7%.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.64" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0590d539e980c1b9dc33abb5c97e68cb6c39c3f9", "sources": [ "DBLP" ], "title": "Elastic Data Compression with Improved Performance and Space Efficiency for Flash-Based Storage Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "05a1b746b03b729aa9c0679d6657a96382843159": { "authors": [ { "ids": [ "3108945" ], "name": "Haoyu Zhang" }, { "ids": [ "35428741" ], "name": "Qin Zhang" } ], "doi": "10.1145/3097983.3098003", "doiUrl": "https://doi.org/10.1145/3097983.3098003", "entities": [ "Algorithm", "Bioinformatics", "Bioinformatics", "Collaborative filtering", "Edit distance", "Experiment", "Natural language processing", "Relational database management system", "String (computer science)" ], "id": "05a1b746b03b729aa9c0679d6657a96382843159", "inCitations": [ "0b47e24b7aa12b2ec65abf76b70984d9836c3635", "bb3707a6ffe8e0dd7208930bdcc4230bf95cebb9", "712ee1f295cc473d5126abe9c6221986f25116c7", "37188150ca9d4698a67803039d3e3d95923ed7f9", "947729c7e627bc1e0109896b91effcaeab112867" ], "journalName": "", "journalPages": "585-594", "journalVolume": "", "outCitations": [ "1822530792a1f6ca526ecee49505387a3421a4bc", "998c23f747271c297f8c6a8acd645ff5a9f8d880", "124f6f240ba622cd74a9a0ea554ec2a5011eaadf", "4d99e622a1166960723c3874cc62e0cee9e7d4ba", "1b3c2bf25ddc8c70d8d5058df6c7fe35a644855d", "c9f76e451e460bc3bc3e5b3d02ecc88e6c361790", "703c8586a6aa49f58266112321a1b03716059a10", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "50c9a75513a4da3446642d6e6a397081c97baac2", "a07c0717d3f259ee8a5d33a3b70a331d7a807d7d", "1df4d83bd44ac0baaeb5dcec2f9b7834afe39d14", "051546f9d417c9ec82f71abead211be679703bc0", "b1e867a9efc1c294e201c8434d0dac32c30131c9", "7d66ff645cd7a0f0b655860d07e9e89308384e29", "3ea209b0486b2b17543b7a7fa17189768ac99612", "3ac024ec80dc0efd56b6ed5ef3caf5aaf4312f7b", "42e84c4c1ec0bed9c1436ff1b5d6ecfe07981615", "01ce408d53d1fd6660ad11d6980a28c4892e1fc9", "a5f782739f4647100edab7158b3ea9f1ee9c84a4", "152ae230ea49aba046aaa1dcefd7f7e4be0185b5", "530f4487992599b3598bd4bb45d74de8436fc3fc", "92ce82b6047040004a8484e20aab4fb88c9d50ab", "1c799eca7983c62f7815ac5f41787b3e552567b6", "00af4d7a9de6f01b9b4e468bd8d63c4d5da6bebd", "cbd45b97b5332e4b955cd54f090baed9d2ec5a72", "24c48b97725d84246f6dbd39c055648a305e1df4", "1eeb85014348bd1d52c7dfdb71c93e73af180ba1" ], "paperAbstract": "We study the problem of edit similarity joins, where given a set of strings and a threshold value K, we want to output all pairs of strings whose edit distances are at most K. Edit similarity join is a fundamental problem in data cleaning/integration, bioinformatics, collaborative filtering and natural language processing, and has been identified as a primitive operator for database systems. This problem has been studied extensively in the literature. However, we have observed that all the existing algorithms fall short on long strings and large distance thresholds.\n In this paper we propose an algorithm named EmbedJoin which scales very well with string length and distance threshold. Our algorithm is built on the recent advance of metric embeddings for edit distance, and is very different from all of the previous approaches. We demonstrate via an extensive set of experiments that EmbedJoin significantly outperforms the previous best algorithms on long strings and large distance thresholds.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098003", "https://arxiv.org/pdf/1702.00093v2.pdf", "https://arxiv.org/pdf/1702.00093v3.pdf", "https://arxiv.org/pdf/1702.00093v1.pdf", "http://arxiv.org/abs/1702.00093" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05a1b746b03b729aa9c0679d6657a96382843159", "sources": [ "DBLP" ], "title": "EmbedJoin: Efficient Edit Similarity Joins via Embeddings", "venue": "KDD", "year": 2017 }, "05a1bad1ef2341339e18d636d78594226d4ee8e6": { "authors": [ { "ids": [ "36937479" ], "name": "Jian Huang" }, { "ids": [ "1783539" ], "name": "Anirudh Badam" }, { "ids": [ "9725581" ], "name": "Laura Caulfield" }, { "ids": [ "39496676" ], "name": "Suman Nath" }, { "ids": [ "1690586" ], "name": "Sudipta Sengupta" }, { "ids": [ "39807362" ], "name": "Bikash Sharma" }, { "ids": [ "1740036" ], "name": "Moinuddin K. Qureshi" } ], "doi": "", "doiUrl": "", "entities": [ "Die (integrated circuit)", "Experiment", "Flash memory", "Multitenancy", "Parallel computing", "Solid-state drive" ], "id": "05a1bad1ef2341339e18d636d78594226d4ee8e6", "inCitations": [ "2971948a9229ed61604778b76e03d5a31328a7cb", "65c43d1b70985054907e08fddb4a9907244b0801", "40dc09f5fbd3776c3f34adedc7a4718307ace0d6", "226ca798b529c13605a2aa7fe75d58f4188f850a", "40f196e21a289394c4354961116587b8accba45e", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "55318fe320d8217fdc0e1359f04ac79844222c8e", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041" ], "journalName": "", "journalPages": "375-390", "journalVolume": "", "outCitations": [ "27cb0c2229299a82cf767d19dcc68aa1e5f0f233", "84127db83f5ce3a3f92f2f114a10a65a4a342b06", "40f04909aaa24b09569863aa71e76fe3d284cdb0", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "088e3e939ad234b6fdd0e321290fb26937dc2553", "01438abf044c42f90de0591e08fe33461908c6cd", "438c51040ee6ccf9198e52d105c47e75d615b29c", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "13d6c568c770ff5a070072e720fb34b0037cdab8", "0e5c646909bb762da0cd325e084655c12445578f", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "dc7f57cf92f8aa87c33853a724a3fa19c7ba12ce", "490d862480cf30949dce90e832aa292c498ac768", "4ba4613eab33cddc53bec9e14e50d03fa66270ca", "81b761ea5c679b452f4a78fa176b8e2d608e77ac", "72722e7602138e3896e5576d3f3ef730e7b7c4b4", "2e46f9074bd81ea4ec29ecec7e0231c16fb2e8db", "7f713eeef50a87ec595c64832fdaf25ffa38b5bd", "b45e1f16cf2b6f735013e9f279e45bf8b7a8d5db", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "5909192b374eac0cda4df7c986ebc997cdcd6002", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "38a9120f780602521af9744e31d80ef5cd9593a7", "1820a34042d6371a9e20484b0c63b698eb522a6c", "3cf9039fa2fc01f711870e33d868669caf5c4df4", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "65a2cb8a02795015b398856327bdccc36214cdc6", "6d44790b6d952eff28f302998e8121f90786e3ff", "131e1e1d163a0f49881d7b5ac092892093391015", "13b925352e4ee3066a6d38ef9f16efdfa967cabb", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "19ffc4f5129ed9d39f498f4eb901024c514263c7", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "9aa0d7253574e50fe3a190ccd924433f048997dd", "5c06564087db9e53a72ef1eb5865696b0dddd8ca", "8969f883979ac45fe24cecde39c15ddc4bd756d3", "151fe4cd7d0c788b3e362636d5c31a4c13f90a9a", "26e72340c47b7348e1b1de285f89dd96cc925b27", "8c9a91b774fcc126db7ce7c67bd97d1d16143932", "d67adb456a315aee244babf4f20e318cc14d13f3", "7fb6b53bdc81f06fd34d5d9c2dd00f6e38cfd98b", "4251f331db37a1c2c16c2e0c4daa729074c99110", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "dbcdb4c402756b2b5ac910b9eb17ddb412290d16", "061944ca83bb46fac511394dca642f7af2d2858a", "d137b83c3e43d4953cc389cb0a50619cc7be5319", "05961fc1d02ca30653dd0b4c906113db796df941" ], "paperAbstract": "A longstanding goal of SSD virtualization has been to provide performance isolation between multiple tenants sharing the device. Virtualizing SSDs, however, has traditionally been a challenge because of the fundamental tussle between resource isolation and the lifetime of the device \u2013 existing SSDs aim to uniformly age all the regions of flash and this hurts isolation. We propose utilizing flash parallelism to improve isolation between virtual SSDs by running them on dedicated channels and dies. Furthermore, we offer a complete solution by also managing the wear. We propose allowing the wear of different channels and dies to diverge at fine time granularities in favor of isolation and adjusting that imbalance at a coarse time granularity in a principled manner. Our experiments show that the new SSD wears uniformly while the 99th percentile latencies of storage operations in a variety of multi-tenant settings are reduced by up to 3.1x compared to software isolated virtual SSDs.", "pdfUrls": [ "http://www.cc.gatech.edu/grads/j/jhuang95/papers/flashblox-fast17.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17_huang.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_huang.pdf", "http://www.cc.gatech.edu/~jhuang95/papers/flashblox-fast17.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/huang", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_huang.pdf", "http://www.cc.gatech.edu/grads/j/jhuang95/papers/fast17_slides_huang.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17_huang.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4a85/7d84a1f410b5264683a3d2d1d959d2085e44.pdf", "s2Url": "https://semanticscholar.org/paper/05a1bad1ef2341339e18d636d78594226d4ee8e6", "sources": [ "DBLP" ], "title": "FlashBlox: Achieving Both Performance Isolation and Uniform Lifetime for Virtualized SSDs", "venue": "FAST", "year": 2017 }, "05a1dfc881760b4dd7a059b21afa753f198459ea": { "authors": [ { "ids": [ "1699746" ], "name": "Jie Liu" }, { "ids": [ "1685323" ], "name": "Xin Li" }, { "ids": [ "1682058" ], "name": "Hao Zhang" }, { "ids": [ "1859741" ], "name": "Chengcheng Liu" }, { "ids": [ "40651849" ], "name": "Lei Dou" }, { "ids": [ "39350835" ], "name": "Lei Ju" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.32", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.32", "entities": [ "Artificial neural network", "Automatic number plate recognition", "Convolutional neural network", "Distortion", "Pixel", "TensorFlow", "Test set", "Video game localization" ], "id": "05a1dfc881760b4dd7a059b21afa753f198459ea", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "246-253", "journalVolume": "", "outCitations": [], "paperAbstract": "Automatic number plate recognition (ANPR) is a significant part in intelligent traffic system. At present, there are many traditional approaches that have achieved a rather high accuracy to solve this problem, almost all of which are separated into three steps of localization, segmentation and recognition. However, these approaches, especially in segmentation progress, are limited to some specific conditions including light intensity, orientations, rotation and distortion angle of plates, etc. In this paper, distinct from traditional approaches, a network including a convolutional neural network(CNN) that operates directly on the image pixels is employed as a substitute of the integration of segmentation and recognition. The network works on the TensorFlow framework. Evaluation of this training network is characters' recognition accuracy on a test set of 796 number plate pictures. In result, we achieve a 88.61% accuracy with a training set of only 7396 photographs that are expanded from 3041 different number plate pictures, which is a relatively high accuracy, especially for a deep CNN that usually needs a great number of samples. We also demonstrate one possible way to enrich data set and make a test using a even simpler network, which results in a 90.07% accuracy on a test set.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05a1dfc881760b4dd7a059b21afa753f198459ea", "sources": [ "DBLP" ], "title": "An Implementation of Number Plate Recognition without Segmentation Using Convolutional Neural Network", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "05acdf395981f7c04957b0f7583d34b6b172b883": { "authors": [ { "ids": [ "28262989" ], "name": "Th\u00e9ophile Terraz" }, { "ids": [ "21555557" ], "name": "Alejandro Rib\u00e9s" }, { "ids": [ "2316659" ], "name": "Yvan Fournier" }, { "ids": [ "3148002" ], "name": "Bertrand Iooss" }, { "ids": [ "2583571" ], "name": "Bruno Raffin" } ], "doi": "10.1145/3126908.3126922", "doiUrl": "https://doi.org/10.1145/3126908.3126922", "entities": [ "Fault tolerance", "Hexahedron", "High-resolution scheme", "Image resolution", "Numerical analysis", "Server (computing)", "Simulation", "Supercomputer", "Terabyte" ], "id": "05acdf395981f7c04957b0f7583d34b6b172b883", "inCitations": [], "journalName": "", "journalPages": "61:1-61:14", "journalVolume": "", "outCitations": [ "f7a8d8df3251d28561791cd83ebdef00c771af19", "1a69fd58d883049e24ec734529ad5caf9f850620", "3519add893934bac5cf334d0719d953746136513", "a03d6ee4ea70eb7feaa65ab046ffc2232d76b0f0", "1e52da6571efc3fbc979afb5a07e44d381b730e7", "c46427857446a7534ae883e89cc4b3d0044dde59", "d225032c36cfae444c010427af88026bf85e5253", "159e4774e3254d944c6463f10de07fc60ae81a11", "d164091af9c60edee0bda14a828b6797145a8062", "0011e3ac148971c8df1fe560c70692a5261375d2", "4224374796da64e17fce96033d4cd42240d80eaf", "6fa9035d3c8b450071bae8dbb6d2c1d3f829e16a", "eb399f6ae21f78e231d8d98c231194891b2bc5a9", "04856a5a8c24b7e259730bb2096c31cd2929cd08", "5b5dfbfffeade87035fca8fadca1a7f27f8a72fe", "c73a30210e1a777bb176382a86ec70e822ea98c0", "4fe2bf624e18d71d87ae36824606c42c64446562", "41c97a6b41aefc6b0e0a3c702db080fd5aeef6f5", "eb11fa122dd73d516ce29172720575d3d41ed9d0", "a6606bb5fbe5815bc1d740a60334d0b2b189167f", "55582978748505a665b044e61995d701c2139902", "15c0356be4fa9566269b912278aca5a2d10d6d16" ], "paperAbstract": "Global sensitivity analysis is an important step for analyzing and validating numerical simulations. One classical approach consists in computing statistics on the outputs from well-chosen multiple simulation runs. Simulation results are stored to disk and statistics are computed postmortem. Even if supercomputers enable to run large studies, scientists are constrained to run low resolution simulations with a limited number of probes to keep the amount of intermediate storage manageable. In this paper we propose a file avoiding, adaptive, fault tolerant and elastic framework that enables high resolution global sensitivity analysis at large scale. Our approach combines iterative statistics and in transit processing to compute Sobol' indices without any intermediate storage. Statistics are updated on-the-fly as soon as the in transit parallel server receives results from one of the running simulations. For one experiment, we computed the Sobol' indices on 10M hexahedra and 100 timesteps, running 8000 parallel simulations executed in 1h27 on up to 28672 cores, avoiding 48TB of file storage.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126922" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05acdf395981f7c04957b0f7583d34b6b172b883", "sources": [ "DBLP" ], "title": "Melissa: large scale in transit sensitivity analysis avoiding intermediate files", "venue": "SC", "year": 2017 }, "05b073c44188946aeb9c410c1447262cbdf77b6d": { "authors": [ { "ids": [ "1773836" ], "name": "Payman Mohassel" }, { "ids": [ "3092404" ], "name": "Yupeng Zhang" } ], "doi": "10.1109/SP.2017.12", "doiUrl": "https://doi.org/10.1109/SP.2017.12", "entities": [ "Artificial neural network", "C++", "Computation", "Experiment", "Gradient", "Gradient descent", "Image processing", "Information privacy", "Linear function (calculus)", "Logistic regression", "Machine learning", "Nonlinear system", "Optical character recognition", "Predictive modelling", "Privacy", "Secure two-party computation", "Server (computing)", "Sigmoid function", "Softmax function", "Stochastic gradient descent", "Two-phase commit protocol" ], "id": "05b073c44188946aeb9c410c1447262cbdf77b6d", "inCitations": [ "5603325eee0f5d70176860d8cc77a9a9c89289a7", "f954cf9bc02645778421a2423af5278126d757fb", "2c4cc18223fec4b06cb8ea50dae1e6b2ebce0971", "3a40f10445a6ad415ac7dc6968a7295dc384eb0e", "7b24bea661e4ab8fddd5e2c76d307ffa6e0a4aa5", "0faf801e0511cfce8953b4766523c771d156cdb4", "a55c8e5fa3c937414b458af2072ff195e9882e14", "4b09e01cb21b26d4120077301b359d88aa206b28", "44a97f4eaaefaf5338f8aed2913d5debb2459f7e", "8e3f04c9936949d13b9b1157857e66dd291c45d5", "a46fdcae60e683b9fbca3a76530b00f69ad0aa82", "74c279b0dc37df611e2ad165ce5735913cb8ad72", "08e2db0c2f79b7a807747f19707ab3e96d3541a0", "29b14b6f0aee8cb3ea6da4a5b08a21aaa868bba1", "530a4ab0308bc98995ffd64207135ca0ae36db7f", "eec0bc4c3fddbaf78feb0872a195fb3aeb01010e", "6cefb70f4668ee6c0bf0c18ea36fd49dd60e8365", "d75fddd91657543260e1e839f81a71bb7f8485be", "0257f30d01eaf774681f6266edd9c38973ac99e1", "e479d7cb1b2622b42282d0b7ac0f6a35cba02ca6" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "19-38", "journalVolume": "", "outCitations": [ "588972fccb475cfaafdbb6efeef592eacadbe5f0", "0eefa33a1ad9118ba91a2e4a88e555b453a952f1", "6a3ab165f52ff39959e527990ee629a4d7dbd16d", "31100ccd0867d6d5338612a62b2cde11be75f1b8", "15fc5f92da22ecb1761be6adccd7c858288c40ab", "8fa56ecfb46b8dadf8a4dd063d15da5b975c83f1", "0166c8b5c6445043b94fc7b62d145d0c3c8b6483", "37bbe6d64cb4ff9ad546bfa36b0512f580bc6bf8", "63569d8b7e3171174fce91443e10fededbad4ac7", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "316d5642b39ba001efc8949cb87ed83eba1def95", "61a297247f899995789dc6e32bcf3972502374b8", "42333e3f231bbfe508f6da6bad2feff9ae223113", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "6154ce8c02375184f7928e41c4fae532500f7175", "b14dea76cafede81c6ff5478d4221fce3aec9284", "20b5b5c25e2b56693b38fe7f69caddca78872085", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "6db00c7de3c9c13b7fd1c0078eefdbe506d054cd", "6764845d03fa22e5dc51234d07ae0b2901cdfd25", "8c13af62501337bd4281d2b9498590feadfa368b", "0d8abe14b8f8c166f97165f03424af2193bf41c6", "18b7880edc5dead10795105ae600ca19ba15f8c5", "5de068c94fbe9976a7017ce0451c05941a2fe70a", "7ffe3790234f977caee2f4850ad2c33734d24827", "a09dcece804c6cd11fd3f0025dda7d327121ae67", "12893121d4a467d5dae188b8cc8e3a67e4c69750", "23ec68ed03b485b645478a3f6905615617d905a6", "032d59d75b26872d40081fb40d7a81c894455d91", "d24c81f1e2904ba6ec3f341161865ef93247855b", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "012b8a941e96594783fb10d3a785e91f13384413", "326bb49d3ae9e1e1551028200916192e50004105", "362246709de205ec0ac5b34e07306839c38d5a3a" ], "paperAbstract": "Machine learning is widely used in practice to produce predictive models for applications such as image processing, speech and text recognition. These models are more accurate when trained on large amount of data collected from different sources. However, the massive data collection raises privacy concerns. In this paper, we present new and efficient protocols for privacy preserving machine learning for linear regression, logistic regression and neural network training using the stochastic gradient descent method. Our protocols fall in the two-server model where data owners distribute their private data among two non-colluding servers who train various models on the joint data using secure two-party computation (2PC). We develop new techniques to support secure arithmetic operations on shared decimal numbers, and propose MPC-friendly alternatives to non-linear functions such as sigmoid and softmax that are superior to prior work. We implement our system in C++. Our experiments validate that our protocols are several orders of magnitude faster than the state of the art implementations for privacy preserving linear and logistic regressions, and scale to millions of data samples with thousands of features. We also implement the first privacy preserving system for training neural networks.", "pdfUrls": [ "http://eprint.iacr.org/2017/396", "https://eprint.iacr.org/2017/396.pdf", "https://doi.org/10.1109/SP.2017.12", "https://obj.umiacs.umd.edu/papers_for_stories/SecureML_Zhang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05b073c44188946aeb9c410c1447262cbdf77b6d", "sources": [ "DBLP" ], "title": "SecureML: A System for Scalable Privacy-Preserving Machine Learning", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "05b493cac86ef358ee4990429aaa1095a1315054": { "authors": [ { "ids": [ "2919642" ], "name": "Maciej Besta" }, { "ids": [ "19322066" ], "name": "Florian Marending" }, { "ids": [ "2880213" ], "name": "Edgar Solomonik" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/IPDPS.2017.93", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.93", "entities": [ "512-bit", "64-bit computing", "Algorithm", "Automatic vectorization", "Breadth-first search", "Central processing unit", "Graph (abstract data type)", "Graph500", "Graphics processing unit", "Haswell (microarchitecture)", "Knights", "List of algorithms", "Load balancing (computing)", "Manycore processor", "Multi-core processor", "Nvidia Tesla", "SIMD", "Sparse matrix", "Xeon Phi" ], "id": "05b493cac86ef358ee4990429aaa1095a1315054", "inCitations": [ "232e641a8b5f550c436af6336ee63e1cd771e073", "37050d37c793f4eee3874840fa60a58ca03c3fb0" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "32-41", "journalVolume": "", "outCitations": [ "af6d5dd24498e0ce9aa7cbee8a7f6356079f5dfa", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "3e426349f0cf3a65b502be05ebca23e693ec03fd", "2984638090457cf02d82715d9834314448efa878", "0a7bcfcb0ddc167de4b456504600806e18690d02", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "175d795f44037ef60dd9df341701cd5fdc449f1f", "4c77e5650e2328390995f3219ec44a4efd803b84", "0624ec3adb8d9f785935746534d4041c2e0802dc", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "ae18b99bfa8940f7a17b7f77eb7177d953a5d9f5", "947c6bf534ccd620044f77c3bd6068f633b421fb", "5b975248796c2ee3f65b2f4430fd3be4d7e6191e", "0c9a56eb4f45d3969943e8cff74593e9c6c5f549", "3983fe131eb3902f9923f35060c56546bbdc951e", "47a6a274c648aeb5ff02eb09aff7ea310eae122e", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "1156f60e40548096df49528b1342bb3e88b0f378", "477a2e92d2fd2ca56fd989d42de58248f1ce04ae", "3ef02548615246e74b88808af811f1557b57fa75", "ce8190de5cac2b583667079502c130888783303f", "141e35263ab810983c90d47ad62eb4fab5e51717", "189f76a7501666386809bd280ffe2f0c3acd7cb0", "a5aad5abb32f6b15f31b92312bb3b0f7b6470977", "31181e73befea410e25de462eccd0e74ba8fea0b", "17ad1361dfabc1c50b506813d0f5d54df159fc36", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "d7f449c199ce86d3b8039899caabb31b54ced7f2" ], "paperAbstract": "Vectorization and GPUs will profoundly change graph processing. Traditional graph algorithms tuned for 32- or 64-bit based memory accesses will be inefficient on architectures with 512-bit wide (or larger) instruction units that are already present in the Intel Knights Landing (KNL) manycore CPU. Anticipating this shift, we propose SlimSell: a vectorizable graph representation to accelerate Breadth-First Search (BFS) based on sparse-matrix dense-vector (SpMV) products. SlimSell extends and combines the state-of-the-art SIMD-friendly Sell-C-σ matrix storage format with tropical, real, boolean, and sel-max semiring operations. The resulting design reduces the necessary storage (by up to 50%) and thus pressure on the memory subsystem. We augment SlimSell with the SlimWork and SlimChunk schemes that reduce the amount of work and improve load balance, further accelerating BFS. We evaluate all the schemes on Intel Haswell multicore CPUs, the state-of-the-art Intel Xeon Phi KNL manycore CPUs, and NVIDIA Tesla GPUs. Our experiments indicate which semiring offers highest speedups for BFS and illustrate that SlimSell accelerates a tuned Graph500 BFS code by up to 33%. This work shows that vectorization can secure high-performance in BFS based on SpMV products; the proposed principles and designs can be extended to other graph algorithms.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.93", "https://htor.inf.ethz.ch/publications/img/slimsell.pdf", "https://people.csail.mit.edu/jshun/6886-s18/papers/BMSH17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05b493cac86ef358ee4990429aaa1095a1315054", "sources": [ "DBLP" ], "title": "SlimSell: A Vectorizable Graph Representation for Breadth-First Search", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "05b68f826366be34c52b7ab69e740d6845768080": { "authors": [ { "ids": [ "2395665" ], "name": "Dixin Tang" }, { "ids": [ "1786139" ], "name": "Hao Jiang" }, { "ids": [ "1787375" ], "name": "Aaron J. Elmore" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Central processing unit", "Concurrency (computer science)", "Concurrency control", "Database", "In-memory database", "Load balancing (computing)", "Multi-core processor", "Project Looking Glass" ], "id": "05b68f826366be34c52b7ab69e740d6845768080", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "98cca67dfd0320d56030dd6637a733436d2b521e", "13875088254a585cd0b050f3bc27c1af9ada690f", "35f751e46799e3a91425267819f40dce273abec1", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "3abca96006f8a6c014635b6a111368f459110e83", "a53e550b1c9282dc79ae920c12b62358bdb6e193", "0acc31039de608f2ac51f59b6848a48d50c919a5", "92c661404330dd1bc9ad9b6cdfc25ebd782999aa", "9aa0d7253574e50fe3a190ccd924433f048997dd", "96d197be2253f5c853edce37b59c186915160ce0", "0c5656c5ffe5fb092791deff10e919b209bb8004", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "19cadcb4e7439bc525c604771ab4872ec93a5b53", "412a9e54bbb31e12d008a9579994e009c5b40b46", "56f6aec0132e56769e2036bbeff791dfa137d107", "040d45e995ab920588607ebc6977ea19dc781923", "1e557937f418accc13f9c5edb33a3d48259d80e5", "10eb9cfb2cea0d6a256e436becd8f0f5494dc5a0", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9" ], "paperAbstract": "Use of transactional multicore main-memory databases is growing due to dramatic increases in memory size and CPU cores available for a single machine. To leverage these resources, recent concurrency control protocols have been proposed for main-memory databases, but are largely optimized for specific workloads. Due to shifting and unknown access patterns, workloads may change and one specific algorithm cannot dynamically fit all varied workloads. Thus, it is desirable to choose the right concurrency control protocol for a given workload. To address this issue we present adaptive concurrency control (ACC), that dynamically clusters data and chooses the optimal concurrency control protocol for each cluster. ACC addresses three key challenges: i) how to cluster data to minimize cross-cluster access and maintain load-balancing, ii) how to model workloads and perform protocol selection accordingly, and iii) how to support mixed concurrency control protocols running simultaneously. In this paper, we outline these challenges and present preliminary results.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p63-tang-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/05b6/8f826366be34c52b7ab69e740d6845768080.pdf", "s2Url": "https://semanticscholar.org/paper/05b68f826366be34c52b7ab69e740d6845768080", "sources": [ "DBLP" ], "title": "Adaptive Concurrency Control: Despite the Looking Glass, One Concurrency Control Does Not Fit All", "venue": "CIDR", "year": 2017 }, "05bcf2245c8ee80fdf8d0e1d3e85bbe68fcf11a0": { "authors": [ { "ids": [ "19170117" ], "name": "Amrita Mazumdar" }, { "ids": [ "34862953" ], "name": "Thierry Moreau" }, { "ids": [ "1732259" ], "name": "Sung Kim" }, { "ids": [ "37270394" ], "name": "Meghan Cowan" }, { "ids": [ "1698528" ], "name": "Armin Alaghi" }, { "ids": [ "1717411" ], "name": "Luis Ceze" }, { "ids": [ "1723213" ], "name": "Mark Oskin" }, { "ids": [ "11816328" ], "name": "Visvesh Sathe" } ], "doi": "10.1109/IISWC.2017.8167775", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167775", "entities": [ "Authentication", "Central processing unit", "Cloud computing", "Computation", "Computer vision", "Data rate units", "Field-programmable gate array", "Graphics processing unit", "Image processing", "Image sensor", "Low-power broadcasting", "Microprocessor", "Program optimization", "Radio-frequency identification", "Real-time computing", "System on a chip", "Time complexity", "Virtual camera system", "Virtual reality" ], "id": "05bcf2245c8ee80fdf8d0e1d3e85bbe68fcf11a0", "inCitations": [ "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "d1be7f6de75dbe350d8d45bb0997e294fd58a985" ], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "177-186", "journalVolume": "", "outCitations": [ "3aee096770f4b2e3a9e2c7110f088b453b6d98ab", "26e0521cc30b47bc6eefa14afd92fa756b223c12", "19bf895220e4d3d23488b34074a47bebc04589a8", "2fd1c99edbb3d22cec4adc9ba9319cfc2360e903", "59485d6bccefcbc09dcbb4235e977ef0a9c801e3", "4a2d7bf9937793a648a43c93029353ade10e64da", "14ce7635ff18318e7094417d0f92acbec6669f1c", "e5b301ee349ba8e96ea6c71782295c4f06be6c31", "006662a19c6383e8ee15616c90be206cd08867f0", "154898f34460e95aef932bec5615bbd995824cad", "370b5757a5379b15e30d619e4d3fb9e8e13f3256", "0959ba9874c9225cef08de110be7300715a2b792", "71bd0af2eb95061d43acb61d32ae72038b36c821", "039071c7dda82fa03a8cddc14a8a86871f502037", "ca71db3905d3fb2d970bcdaaa79993058560f9f7", "609e71d1d648f6e0795913ceab2153a5b35b80bc", "0028eb8a82cfabb162243852040aa39d3edb7a14", "127dd6bd7d1284c2b28403075515747299df6d53", "df3d657eb009bcd0ea1f199d7dd53a3a700619bb", "28f1f0292fc2821cfcfadca59c91fba4c262e829", "6ebc80909632da6f41c499d7d50c4c8a757605dc", "961a5d5750f18e91e28a767b3cb234a77aac8305", "3ba74755c530347f14ec8261996dd9eae896e383", "04fa47f1d3983bacfea1e3c838cf868f9b73dc58", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "30bda171168f229749e39ca2f9c3fbfdc33003a8", "9e691bad16f46f89c9b379f7ba0c6b6492d1ae66", "4f4ba5c28b5f7b519979264f42a3336e363bd910", "58e4491dc48d46f4f47362686e09e6319c01edc0", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "13b4c25333158f630025b2b2db72efa102f9cf46", "3d49bd7f7c2eff701b4df211f53a9cae694cb572", "0cde0252eb8ce6d34b514043979393babf86f2a1" ], "paperAbstract": "Cameras are the defacto sensor. The growing demand for real-time and low-power computer vision, coupled with trends towards high-efficiency heterogeneous systems, has given rise to a wide range of image processing acceleration techniques at the camera node and in the cloud. In this paper, we characterize two novel camera systems that use acceleration techniques to push the extremes of energy and performance scaling, and explore the computation-communication tradeoffs in their design. The first case study targets a camera system designed to detect and authenticate individual faces, running solely on energy harvested from RFID readers. We design a multi-accelerator SoC design operating in the sub-mW range, and evaluate it with real-world workloads to show performance and energy efficiency improvements over a general purpose microprocessor. The second camera system supports a 16-camera rig processing over 32 Gb/s of data to produce real-time 3D-360° virtual reality video. We design a multi-FPGA processing pipeline that outperforms CPU and GPU configurations by up to 10× in computation time, producing panoramic stereo video directly from the camera rig at 30 frames per second. We find that an early data reduction step, either before complex processing or offloading, is the most critical optimization for in-camera systems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167775", "https://arxiv.org/pdf/1706.03864v2.pdf", "https://homes.cs.washington.edu/~amrita/papers/iiswc17.pdf", "http://psylab.ee.washington.edu/documents/near_sensor_camera_arxiv.pdf", "http://arxiv.org/abs/1706.03864", "https://arxiv.org/pdf/1706.03864v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05bcf2245c8ee80fdf8d0e1d3e85bbe68fcf11a0", "sources": [ "DBLP" ], "title": "Exploring computation-communication tradeoffs in camera systems", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "05bd926844ffa89f668237a6836825c59d6377e9": { "authors": [ { "ids": [ "1984554" ], "name": "Arpit Joshi" }, { "ids": [ "2164782" ], "name": "Vijay Nagarajan" }, { "ids": [ "1699540" ], "name": "Stratis Viglas" }, { "ids": [ "1687142" ], "name": "Marcelo Cintra" } ], "doi": "10.1109/HPCA.2017.50", "doiUrl": "https://doi.org/10.1109/HPCA.2017.50", "entities": [ "Atom", "Atom", "Baseline (configuration management)", "Byte", "Byte addressing", "Critical path method", "Durability (database systems)", "IBM Tivoli Storage Productivity Center", "Non-volatile memory", "Redo log", "Undo", "Volatile memory" ], "id": "05bd926844ffa89f668237a6836825c59d6377e9", "inCitations": [ "19eba1cfdecdd9a918f22880bc3599ca461c6454", "81e4324b8047463961692d38af9b0da881fe44e2", "41ea95cc4dca373bf324555b897760054ec4a76e", "20f1081cf001f716037e20d9cff147f5ac50632a", "004c2345477eda977f12b4485ac24a9e41557439", "5716db825bbd2c39836a2d6fa22e7f313fc12ccf", "aa0fb8802532106dcb78c62065258b8e4683ec94" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "361-372", "journalVolume": "", "outCitations": [ "05a1357946de5eca42a477b7b268db4944219a2e", "16653666b0005f91060a3e402566659749b84313", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "3af216f371069b57c0dca5448384d052fb490fb4", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "34a97a016e6c419eb4b1005a7306d45a775a407b", "47b851237f240831abee3971bca6bb8d2a121eb1", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "7a9abc36f336750f4c0679f0b4ef87c9dc12133c", "0645f0f88e9a3cd6e9b1d0c21bc24666a7377666", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "3ede1909bf70d6e4bca46302f474083517b081a3", "0204f40221260d00c5ee63646560a40dcd7d97d1", "42c70d64890726f60556caf3eec3f06e85642dd9", "2ef08ccb970632bb8ada93ea70078eac54ce92d3", "2b625353fe50e219412e18b6d50b5d8de0538a60", "578667cbc39c6bfc1c89fe6a54506643c3b097f8", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "94783d113951822195d4ba44599a8fcbdef9d4bf", "39e3d058a5987cb643e000bce555676d71be1c80", "823116269044ab4c713373c66c7da3fcb495b459", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "2b26821287fa20ca9924326e08c4041880171ebf", "4bad51c7685254155733ee8def6a1294378aa1af", "2e663c1047ff14ddc2416229459922757a20edfb", "277862a906af8489a1d98add2f6516a0e5df1bb1", "5fbdf47c120d1c23a7715f5a5fec3d67b616ba99", "57c823b3b07b98233394bf15cfbbaed6a84809df", "a7592cb0c6f59211a2b48c3ed5c65a27a3f5cf12" ], "paperAbstract": "Non-volatile memory (NVM) is emerging as a fast byte-addressable alternative for storing persistent data. Ensuring atomic durability in NVM requires logging. Existing techniques have proposed software logging either by using streaming stores for an undo log, or, by relying on the combination of clflush and mfence for a redo log. These techniques are suboptimal because they waste precious execution cycles to implement logging, which is fundamentally a data movement operation. We propose ATOM, a hardware log manager based on undo logging that performs the logging operation out of the critical path. We present the design principles behind ATOM and two techniques to optimize its performance. Our results show that ATOM achieves an improvement of 27% to 33% for micro-benchmarks and 60% for TPC-C over a baseline undo log design.", "pdfUrls": [ "https://doi.org/10.1109/HPCA.2017.50", "http://www.research.ed.ac.uk/portal/files/29957991/hpca17a.pdf", "http://homepages.inf.ed.ac.uk/vnagaraj/papers/hpca17a.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05bd926844ffa89f668237a6836825c59d6377e9", "sources": [ "DBLP" ], "title": "ATOM: Atomic Durability in Non-volatile Memory through Hardware Logging", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "05be45c608ec0c6b6f2700188e28f55f1ea910a4": { "authors": [ { "ids": [ "1867677" ], "name": "Stuart Byma" }, { "ids": [ "12888874" ], "name": "Sam Whitlock" }, { "ids": [ "20647943" ], "name": "Laura Flueratoru" }, { "ids": [ "32324156" ], "name": "Ethan Tseng" }, { "ids": [ "1700331" ], "name": "Christoforos E. Kozyrakis" }, { "ids": [ "1678618" ], "name": "Edouard Bugnion" }, { "ids": [ "1752633" ], "name": "James R. Larus" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Analysis of algorithms", "Biobank", "Bioinformatics", "Bioinformatics", "Computation", "Computer data storage", "Distributed computing", "End-to-end principle", "High-throughput computing", "Monolithic kernel", "Mozilla Persona", "Scalability", "Sequence alignment", "Server (computing)", "Software system", "Sorting", "Throughput", "Whole genome sequencing" ], "id": "05be45c608ec0c6b6f2700188e28f55f1ea910a4", "inCitations": [ "d7c2187f04a950b9588a3189aef73e40f6509b8c" ], "journalName": "", "journalPages": "153-165", "journalVolume": "", "outCitations": [ "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "9c465b7d37024f6afe8a7063590c38fb69ec815c", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "25f017efd2905c6d0c6a92f2dfe19113ee42938e", "2228b4208c5ea6754df6edcae805038f3e47857c", "4ee0bf51012960c9aa55a2f3f913b22d0fd9a8ed", "2aff3d6ad3cd61928a0c0c66eff3270ff1c112b0", "cddfb34a35924b2958950deac3a6075f450e4519", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "4cad8f2a31b3c72742a761fe90a372d4a4717ebf", "eaa398d58b7712a9bbc25b177a93624a7029ed29", "3a587f10ac804442c7236fcc63615a5f73930a2f", "3b3d2dd633f46b9040d726bc63e12df4ba2cb14c", "3979cf5a013063e98ad0caf2e7110c2686cf1640", "29f68a9512a16c6db526fb166a6433be72ad005c", "196514ca53f505dec7a8a2b446fc599e8de3f0cc", "2765bde8275938f89e9418d85befbbf03fcbb5fe", "04d8fd856dfff162a6e52e89f7967e378d8889f5", "2da760f90c3d2bf6598becdde9063093f488548c", "76e02e51eb7e529f5665356dc9a914946e247453", "5c8146845a1aac387ba4377ba6198d6b1c0626a3", "584856e3e85c7c02a8b9c1acdd0f961b7ee10a14", "04ecc752b775f934ca04a09e9bbc67bbb5f31c27", "40c5441aad96b366996e6af163ca9473a19bb9ad", "0d77bb6ef2bb6d165f58bf0251bf3d7cf29f1491", "652854487b655289fce13f88b7d1569e09b242fe", "b7ec915ecd1260433529153405c1b68692573217", "3605a786a96ad9e392c484e7eb7b036063ae8d0c", "6fd3aa67f07e5df43f5079ada6997b88e6c904ff", "27174cf4ffd83e2044549df0f2872608b73a6ef6" ], "paperAbstract": "Next-generation genome sequencing technology has reached a point at which it is becoming cost-effective to sequence all patients. Biobanks and researchers are faced with an oncoming deluge of genomic data, whose processing requires new and scalable bioinformatics architectures and systems. Processing raw genetic sequence data is computationally expensive and datasets are large. Current software systems can require many hours to process a single genome and generally run only on a single computer. Common file formats are monolithic and roworiented, a barrier to distributed computation. To address these challenges, we built Persona, a cluster-scale, high-throughput bioinformatics framework. Persona currently supports paired-read alignment, sorting, and duplicate marking using well-known algorithms and techniques. Persona can significantly reduce end-to-end processing times for bioinformatics computations. A new Aggregate Genomic Data (AGD) format unifies sample data and analysis results, while enabling efficient distributed computation and I/O. In a case study on sequence alignment, Persona sustains 1.353 gigabases aligned per second with 101 base pair reads on a 32-node cluster and can align a full genome in \u223c16.7 seconds using the SNAP algorithm. Our results demonstrate that: (1) alignment computation with Persona scales linearly across servers with no measurable completion-time imbalance and negligible framework overheads; (2) on a single server, sorting with Persona and AGD is up to 2.3\u00d7 faster than commonly used tools, while duplicate marking is 3\u00d7 faster; (3) with AGD, a 7 node COTS network storage system can service up to 60 alignment compute nodes; (4) server cost dominates for a balanced system running Persona, while long-term data storage dwarfs the cost of computation. \u2217EPFL \u2020U. Politehnica of Bucharest (work done during EPFL internship) \u2021Carnegie Mellon University (work done during EPFL internship) \u00a7Stanford University", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/byma", "https://infoscience.epfl.ch/record/229429/files/atc17-byma_1.pdf", "https://infoscience.epfl.ch/record/229429/files/paper.pdf", "https://icservices.epfl.ch/edic/down.asp?ID=2767&pid=2659", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_byma.pdf", "http://csl.stanford.edu/~christos/publications/2017.persona.atc.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8753/d965c13633918ac0451dee25cb6bca981bf1.pdf", "s2Url": "https://semanticscholar.org/paper/05be45c608ec0c6b6f2700188e28f55f1ea910a4", "sources": [ "DBLP" ], "title": "Persona: A High-Performance Bioinformatics Framework", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "05c9330f261ed3f5aecbca28004206d9a029656d": { "authors": [ { "ids": [ "3032988" ], "name": "Ariful Azad" }, { "ids": [ "2238795" ], "name": "Aydin Bulu\u00e7" } ], "doi": "10.1109/IPDPS.2017.76", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.76", "entities": [ "Algorithm", "Breadth-first search", "Column (database)", "Data structure", "Independent set (graph theory)", "Ivy Bridge (microarchitecture)", "Manycore processor", "Maximal independent set", "Multi-core processor", "Multiplication algorithm", "Shared memory", "Sparse matrix", "Speedup", "The Matrix", "Thread (computing)" ], "id": "05c9330f261ed3f5aecbca28004206d9a029656d", "inCitations": [ "79ad275569d313354c203623eb321817542de819", "652640d1226131fbeb66aba6eab681196c2d5222" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "688-697", "journalVolume": "", "outCitations": [ "f7f4136512d2d40ba455f161e64a31cdb099b9ae", "09b11dd581fd9d00c3a55d4a49f83660bd7c3d9a", "2d7bf91ca184def17e15bf515532651fd5fe5f01", "7ff0fa0958783397fa8db7125205bd6ee65b4c01", "5f491a183c71b0322b16e4f5dc69538c50db79e0", "0f16f6f478b5c788dce466eb50e36c612273c36e", "3153364f8255458ac808a800bc54989000caa94f", "05af27768c4bf59dc2cc8d6b681bc5438523e587", "66549f785d13a44171fcc21899802325e7d923cd", "4c6c7694fcd56da6a2f75e4437eb86b1462f464b", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "1b348075d02cc532b1a01955e21ba3062e769113", "24c9b0b05c5e957e255b854f947472f9181772a4", "ac9caa876f6f17dd7802447e061f4809f9f4731f", "3edef062698ab35fbe4cc5a5ffce633e09f8b6f2", "3983fe131eb3902f9923f35060c56546bbdc951e", "7655853a1346aa3299663c6073c322e324e60a4e", "b513711621e81d0abd042e0877ca751581a993f5" ], "paperAbstract": "We design and develop a work-efficient multithreaded algorithm for sparse matrix-sparse vector multiplication (SpMSpV) where the matrix, the input vector, and the output vector are all sparse. SpMSpV is an important primitive in the emerging GraphBLAS standard and is the workhorse of many graph algorithms including breadth-first search, bipartite graph matching, and maximal independent set. As thread counts increase, existing multithreaded SpMSpV algorithms can spend more time accessing the sparse matrix data structure than doing arithmetic. Our shared-memory parallel SpMSpV algorithm is work efficient in the sense that its total work is proportional to the number of arithmetic operations required. The key insight is to avoid each thread individually scan the list of matrix columns. Our algorithm is simple to implement and operates on existing column-based sparse matrix formats. It performs well on diverse matrices and vectors with heterogeneous sparsity patterns. A high-performance implementation of the algorithm attains up to 15x speedup on a 24-core Intel Ivy Bridge processor and up to 49x speedup on a 64-core Intel KNL manycore processor. In contrast to implementations of existing algorithms, the performance of our algorithm is sustained on a variety of different input types include matrices representing scale-free and high-diameter graphs.", "pdfUrls": [ "https://crd.lbl.gov/assets/Uploads/SpMSpV-ipdps17.pdf", "http://arxiv.org/abs/1610.07902", "https://doi.org/10.1109/IPDPS.2017.76", "http://crd.lbl.gov/assets/Uploads/SpMSpV-ipdps17.pdf", "https://arxiv.org/pdf/1610.07902v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05c9330f261ed3f5aecbca28004206d9a029656d", "sources": [ "DBLP" ], "title": "A Work-Efficient Parallel Sparse Matrix-Sparse Vector Multiplication Algorithm", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "05d6a284a55c07434325f8554e67741860e38c30": { "authors": [ { "ids": [ "37224490" ], "name": "Bhushan Jain" }, { "ids": [ "2104538" ], "name": "Chia-che Tsai" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" } ], "doi": "10.1145/3102980.3102991", "doiUrl": "https://doi.org/10.1145/3102980.3102991", "entities": [ "Application security", "Common Vulnerabilities and Exposures", "Machine learning", "Open-source software", "Software development process", "Source lines of code", "Static program analysis", "Trusted Computing", "Trusted computing base", "Unified Model", "Usability", "Vulnerability (computing)" ], "id": "05d6a284a55c07434325f8554e67741860e38c30", "inCitations": [ "334ec6e57110ece9f482f9ec2e85412b0be8072a" ], "journalName": "", "journalPages": "62-68", "journalVolume": "", "outCitations": [ "0be708acdf4fccbc706fd3f03bc74fad783b28a4", "57f406f355daae96c32f5deae0f9381c53147dac", "3366770807828bc508cc34ecbc07ff0406ee48c9", "36222f8eb2ccf21ca345e15186cea64506581543", "10b9a084eca0003b91bb4c7ca59cbd0139ba0131", "23bfb1a07e2fe2abeb4af58e360ac6278269c31b", "ae2f1716884e061fd0bcd06f59e1c27234677da0", "1544a52f31e1475af848ff59a5fccabff56f3355", "97e05df49bdfdfc05bdb39eefc900d6393e1d61f", "8206a7e3e4b0c02b3a53c7a08b944de413a58413", "268be1a8339965aa7cfaa5fe113ed34fe1b7be16", "a60d00ba42a4bed7adb3dc40cd1c32cbaffda5df", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "883a595fd76cb4dc0509a1005040286b31610059", "2dbf2b8182af61a07c2448579a0753b5b5783c8e", "03b1932785190d0fce2e3fc0384b7bd6f5efbc5c", "67ef51e20cdfa7b257c0ece9f44a75058e65a00a", "a799a3ba98a6d0706cf0345026c4ce0cc6076562", "2d0571bfecc007dfa96befea14d5a2cc94860f18", "46507bbd01d73196f848a81ea6166146e32aa42f", "14b750a0fd5a13f7494e4abf9b97718ff558f508", "41b099fde4c6d38bef7329c7e0f3beceac065202", "17886b4911ffd50d7e02a574caad34a286458b3a", "16c5e4b3dbb699b47523a5882ed7a3b6adac962b", "bb7eaa951de5ff3106d4380a6a6b79efa4574bac", "0baa8313c5fa39d9e81a4c7fed7e9e2118f2f08e", "87558eedbbf4753d2cc3ead7b0a164ce684073b2", "65f4d0bf8e2cced14603bd46f67c16f65e985b50", "2f187265b02511b92cd6ea28c6b689a6bc1ea573", "3f93a67887191069cf67034ef904f781ec2d4fe6", "b33d4c821b7c38a256cc970cbbd837157477554f", "71691ee2dbe001d599334e5389d80dd32c44a74e", "181ab946b3370034a2bc3bfa7953a7e907515631", "ec993a8dc344970f1c8b992df2fdd74a91148025", "1b12eb42a9e04af626c7ed266b2e299d7f6f96a3", "9b9d17da57e83272a53292850b5e956643a94a4d", "a1bbfcd8f2ca07d671cd940b5971efad1064d9fc", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "55edf8d36576d63851d8f5739e8d0b6b094fe5cf", "b929be2068e9bc81701b9065b16401c77bebea48", "5fbf739032dd548c1ff189e7333f05e215906a1b", "01fde8698110cf46ff48a17c65f2658dab4c323c", "91f81ebaffe183cf413423e0c189567321e6f517", "04bc01bbcfa93f72f7ea958911de3aedd7320936", "37071d0a1ff7efbd6c5eb303f8f9f105f1449bbc" ], "paperAbstract": "Nearly all modern software has security flaws---either known or unknown by the users. However, metrics for evaluating software security (or lack thereof) are noisy at best. Common evaluation methods include counting the past vulnerabilities of the program, or comparing the size of the Trusted Computing Base (TCB), measured in lines of code (LoC) or binary size. Other than deleting large swaths of code from project, it is difficult to assess whether a code change decreased the likelihood of a future security vulnerability. Developers need a practical, constructive way of evaluating security.\n This position paper argues that we actually have all the tools needed to design a better, empirical method of security evaluation. We discuss related work that estimates the severity and vulnerability of certain attack vectors based on code properties that can be determined via static analysis. This paper proposes a grand, unified model that can predict the risk and severity of vulnerabilities in a program. Our prediction model uses machine learning to correlate these code features of open-source applications with the history of vulnerabilities reported in the CVE (Common Vulnerabilities and Exposures) database. Based on this model, one can incorporate an analysis into the standard development cycle that predicts whether the code is becoming more or less prone to vulnerabilities.", "pdfUrls": [ "http://cs.unc.edu/~porter/pubs/hotos17-final38.pdf", "http://doi.acm.org/10.1145/3102980.3102991", "http://www.chiachetsai.com/files/hotos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05d6a284a55c07434325f8554e67741860e38c30", "sources": [ "DBLP" ], "title": "A Clairvoyant Approach to Evaluating Software (In)Security", "venue": "HotOS", "year": 2017 }, "05dacaee4f019fc54bd08de950bdbe97bda377ee": { "authors": [ { "ids": [ "2546078" ], "name": "Noah Wolfe" }, { "ids": [ "2383364" ], "name": "Misbah Mubarak" }, { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "2300557" ], "name": "Jens Domke" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "1759102" ], "name": "Christopher D. Carothers" }, { "ids": [ "40211322" ], "name": "Robert B. Ross" } ], "doi": "", "doiUrl": "", "entities": [ "Fat tree", "Network analysis (electrical circuits)", "Network model", "Profiling (computer programming)", "Routing", "Scalability", "Simulation", "Tree network" ], "id": "05dacaee4f019fc54bd08de950bdbe97bda377ee", "inCitations": [ "867155b2db9e329794ad1ebb69f709a6756ce496" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "258-261", "journalVolume": "", "outCitations": [ "0a9c8fef61634e392f9de6f34361cc1c690f7a00", "7e06d6922e32d30bd6f7e86ae660ed7bf2e99fd2", "1fe8c9894a79f22d2edfeea0020995e714f83c38", "121b1445260daa67c5da3b94e41e8304fc81ef60", "10f3fa67bcb56322427d12f81abf49ed10198247", "296efa02ff08ad3533b1d37c79f0d8a8a963eefb", "5203210d18c94f01169bd50afcebf70cd3284898", "8cf9e252c8314e26f20b619acb6392d52abac647", "251544e7c508771ab34cb2d6b97800960cde1f1e", "a86eb622eaaae24053a158a857624470af790bb6", "491eef9f7adada860abbd274e008e7acb964ef8b", "c39c26d510c1a965c5f132edc989a598ca92b700" ], "paperAbstract": "Among the low-diameter, high-radix networks beingdeployed in next-generation HPC systems, dual-rail fat-treenetworks are a promising approach. Adding additional injectionconnections (rails) to one or more network planes allows multirailfat-tree networks to alleviate communication bottlenecks. These multi-rail networks necessitate new design considerations, such as routing choices, job placements, and scalability of rails. We extend our fat-tree network model in the CODES parallelsimulation framework to support multi-rail and multi-planeconfigurations in addition to different types of static routing, resulting in a powerful research vehicle for fat-tree network analysis. Our detailed packet-level simulations use communicationtraces from real applications to make performance predictionsand to evaluate the impact of single-and multi-rail networks inconjunction with schemes for injection rail selection and intraplane routing.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101147" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05dacaee4f019fc54bd08de950bdbe97bda377ee", "sources": [ "DBLP" ], "title": "Preliminary Performance Analysis of Multi-rail Fat-Tree Networks", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "05f44cddc0884c5ae7ce6502a247c502d63c922f": { "authors": [ { "ids": [ "2731972" ], "name": "Fumin Shen" }, { "ids": [ "40501686" ], "name": "Yadong Mu" }, { "ids": [ "1708973" ], "name": "Yang Yang" }, { "ids": [ "1722649" ], "name": "Wei Liu" }, { "ids": [ "1746030" ], "name": "Li Liu" }, { "ids": [ "2346105" ], "name": "Jingkuan Song" }, { "ids": [ "1724393" ], "name": "Heng Tao Shen" } ], "doi": "10.1145/3077136.3080767", "doiUrl": "https://doi.org/10.1145/3077136.3080767", "entities": [ "BQP", "Benchmark (computing)", "Binary code", "Computer vision", "Experiment", "Hamming space", "Hash function", "Linear programming", "Local optimum", "Loss function", "Optimization problem", "Program optimization", "Quadratic programming", "Software deployment", "Test set", "Time complexity" ], "id": "05f44cddc0884c5ae7ce6502a247c502d63c922f", "inCitations": [ "26f753a7d8304922dff1f1b52f8f5fc30451497a", "2411270f111a160c9289d56132651c896a5738f6", "b4ef86ba93ff26a73eb1069caa6789a2d5eb43f0", "9478a6df6e845d88f660e5b141aeab7d12f0a8ba" ], "journalName": "", "journalPages": "595-604", "journalVolume": "", "outCitations": [ "3e69456017e04b9a0ee915e815216d314383068c", "1721d05db7f9ea7eee6730150676563f65a8fb38", "1d9b302a5a004e279b984f35d01190cb59658c50", "91fd133adf2bd15ab814351b3a9e9f13f2951e38", "0756d1e7ed9e0d20f0c6e7cfbebfc7153db8d3a1", "5a26ec6568152731ce1667a426307ebccff5a50e", "0413a801810739b1c7ed8211607dc1c7eb5ed7b8", "310b203a7754959df711056a617634bc10ed1d9a", "07f64c359a48f865f6d0dcc767425f1e2e0beb96", "52c89ca39a9fcad716e1e43c0bd4e40101c15d64", "7458f8bfffecb1baf72e32590a1da5ca8ba923d5", "4fdaea2654512d2dfe4fa9b1a6673668c686fe65", "348605f50474c955c91c344f701a13989e81ff57", "24c9b0b05c5e957e255b854f947472f9181772a4", "877d083b2a3a75cc1bb25f770a9c5684bf5f6f44", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "52c0876b25a5721c4c6930d94d5308f0779734ec", "383a58de852715c8544abe60fa64d29fb7ea5688", "5ff8c048a042d37a99a9f9c5acb0d82972a45ad0", "2e72c8aa2acf362ab98667a2e2c49f6b52b656ee", "0cd032a93890d61b9bd187119abee0d6aeb899f7", "2ad9a338d81340b7b02510e7f9e390f9202ca72d", "0bc288f4e7097cc05ed8709149aa75c740e5de3c", "1ddacefa549de21f734f43016115ce7d54ab3d94", "6184ddbe780cb934f036b04dd1d28226b6bcbcce", "47f54a7953b3d167c6d94f2e8e035c2e798b3f18", "12f15d23e6e3a5b89d5872961a66106cc316f347", "236a1facf2bbd96608763363390f7acff9dd764a", "1c0c670162391106a78e601fe9ed83c814d604d4", "03488b77ec21c586a151249289f517a8e954bc30", "227db8b8d02040ca0d8853d52716be06d87c40b7", "031854648e0688c1bfc991e7597e54947928fb74", "02657848c16f9571e5f0369658023007575eac58", "313c782f18bb01933668dce56003553b49d1fc44", "0ffbef76c6ac74d6e14878484c86b60f38d016dc", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "1379ad7fe27fa07419b7f6956af754bdb6d49558", "1c799eca7983c62f7815ac5f41787b3e552567b6", "7573ff84d71de19fe7d387bb4a6de73cb28402f4", "16a651ad4a558d428c18fa92094433de89dbd7fc", "1dac961cbf1d5a01ccd09d5a3668abc3c5a1edec", "8c4fae5e494fe27d44138e790e632bbff8a68ba6", "026050f71175d235f3f91ca0e99e994c00f9b5a6", "61181e71ca1b899b5fdaaac24daac2463b3e6c96", "0e72a9c87c320093b77f941e95abcb93e7dc1f08", "df112f4e83dd6ce1898237e0aabdb790b6b51597", "03fcaa855332fdd11d5b9ac8f369aa904347d577", "38211dc39e41273c0007889202c69f841e02248a", "d062a4995298ba64e8c8061afacf069edee0c879", "2a88541448be2eb1b953ac2c0c54da240b47dd8a", "12d0c11d546d91e776a170898ebf3a38c010695c", "954ec02492cecbf8a4fc1c8e37179fae613dbfc9" ], "paperAbstract": "This paper proposes a generic formulation that significantly expedites the training and deployment of image classification models, particularly under the scenarios of many image categories and high feature dimensions. As the core idea, our method represents both the images and learned classifiers using binary hash codes, which are simultaneously learned from the training data. Classifying an image thereby reduces to retrieving its nearest class codes in the Hamming space. Specifically, we formulate multiclass image classification as an optimization problem over binary variables. The optimization alternatingly proceeds over the binary classifiers and image hash codes. Profiting from the special property of binary codes, we show that the sub-problems can be efficiently solved through either a binary quadratic program (BQP) or a linear program. In particular, for attacking the BQP problem, we propose a novel bit-flipping procedure which enjoys high efficacy and a local optimality guarantee. Our formulation supports a large family of empirical loss functions and is, in specific, instantiated by exponential and linear losses. Comprehensive evaluations are conducted on several representative image benchmarks. The experiments consistently exhibit reduced computational and memory complexities of model training and deployment, without sacrificing classification accuracy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080767", "http://www.ee.columbia.edu/~wliu/SIGIR17_binarizing.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/05f44cddc0884c5ae7ce6502a247c502d63c922f", "sources": [ "DBLP" ], "title": "Classification by Retrieval: Binarizing Data and Classifiers", "venue": "SIGIR", "year": 2017 }, "06018aa08f70a2c10d51e51976eaf4c560918bb9": { "authors": [ { "ids": [ "7777279" ], "name": "Yunfei Ma" }, { "ids": [ "3979901" ], "name": "Nicholas Selby" }, { "ids": [ "1761544" ], "name": "Fadel Adib" } ], "doi": "10.1145/3098822.3098847", "doiUrl": "https://doi.org/10.1145/3098822.3098847", "entities": [ "Algorithm", "Duplex (telecommunications)", "Location estimation in sensor networks", "Printed circuit board", "Radio frequency", "Radio-frequency identification", "Relay", "Sensor", "Unmanned aerial vehicle", "Video game localization" ], "id": "06018aa08f70a2c10d51e51976eaf4c560918bb9", "inCitations": [ "0d023aa7b708a02ebeb7853565c9d0f607932ae7", "d1e0fb65317c729b8c2b72461ba6f8b2d3728c42", "076cefb7b5de0ad5b6ab53402f238e97359de39c", "af8ae2a2a2d74ae63c599865beaaf54d85c69acc", "98cb0ce9ca0cc29f2468f7da50c75666fd09483f" ], "journalName": "", "journalPages": "335-347", "journalVolume": "", "outCitations": [ "14ba7b31b92233766089dfae54b53e339822f3cc", "29e9cd18af650b7e448dea668121a1d98afd3c46", "e895a0c2b989221a665868331eafbca5967436b7", "8317f40c569af2b5bb0aefbb6b07d6a991c1204e", "16ccb8d307d3f33ebb395b32db23279b409f1228", "4b2f3372baef782618daf54e59782f251c58b97d", "151831fc041a3fc19ed56bacdd8bf330d2a93eeb", "ecfa0452164fc39e6b1d63b7e298e1b74582ef8e", "ff19edf8a6eb77821a8e58ac51b62c619b538614", "291bc654cc9622d7fb71bf6507ae927ebc684153", "0c9b68449b6241478ba38c2af220b393db86e206", "04ce31e09475303e84473b4b29613204da92c9c1", "ceb862f41396546d5d7df3b6db194e9a45aeb34e", "7a8704e143cd93edee5ee4fd7077cdc0c0469e5c", "05fe031e53dd8990e7076a91277cb2b74e22b811", "24d96f44682195f9901dcbdd9506ac1cc1a19879", "0a5cb101a28848dfec9955ad15e2ed754d3f0bb4", "2b2d03f8b96aa1e306fb941e0318d403efbde4be", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "2b91d684bced5c95172a6e847355a37969a3c9e3", "15f4d8eca1d25f6ec7fbfaa939e5e70bb4abbbcd", "e5edfbdf645a3dbcdaf7d9fcbf350c67fbbadae5", "cd068e50d8590dfea9acf829d713aef194c6ba5d", "1682daf380ecd745adc378422a3ff58eb4141b0d", "052b36fd8bde6035c11eb316c3f9a3665c0110f0", "381d605d38e372c4f3d9306aeb781f7204c29385", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "201432738e66b70616c1a442880648b79b56ca01", "d98598f0c210730856852ba0c80a061c9a227ad1", "82802e411495bbad77fa2415c6d4633dde180764", "6d8c069ea2b6b8c71638ec40799244f7e3d3284b", "af2b7426c907ba9baed2e63c1c90ed6dd7249720" ], "paperAbstract": "Battery-free sensors, such as RFIDs, are annually attached to billions of items including pharmaceutical drugs, clothes, and manufacturing parts. The fundamental challenge with battery-free sensors is that they are only reliable at short distances of tens of centimeters to few meters. As a result, today's systems for communicating with and localizing battery-free sensors are crippled by the limited range.\n To overcome this challenge, this paper presents RFly, a system that leverages drones as relays for battery-free networks. RFly delivers two key innovations. It introduces the first full-duplex relay for battery-free networks. The relay can seamlessly integrate with a deployed RFID infrastructure, and it preserves phase and timing characteristics of the forwarded packets. RFly also develops the first RF-localization algorithm that can operate through a mobile relay.\n We built a hardware prototype of RFly's relay into a custom PCB circuit and mounted it on a Parrot Bebop drone. Our experimental evaluation demonstrates that RFly enables communication with commercial RFIDs at over 50 m. Moreover, its through-relay localization algorithm has a median accuracy of 19 centimeters. These results demonstrate that RFly provides powerful primitives for communication and localization in battery-free networks.", "pdfUrls": [ "http://www.mit.edu/~fadel/papers/RFly-paper.pdf", "http://doi.acm.org/10.1145/3098822.3098847" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06018aa08f70a2c10d51e51976eaf4c560918bb9", "sources": [ "DBLP" ], "title": "Drone Relays for Battery-Free Networks", "venue": "SIGCOMM", "year": 2017 }, "0631f1cdd062b257fdf8ca51728aa44f216edb64": { "authors": [ { "ids": [ "2390252" ], "name": "Tingzhe Zhou" }, { "ids": [ "3448074" ], "name": "Pantea Zardoshti" }, { "ids": [ "1687335" ], "name": "Michael F. Spear" } ], "doi": "10.1109/ICPP.2017.17", "doiUrl": "https://doi.org/10.1109/ICPP.2017.17", "entities": [ "Compiler", "Critical section", "Data compression", "Encoder", "Experience", "Experiment", "GNU Compiler Collection", "Manifest (transportation)", "Programmer", "Scalability", "Transactional memory" ], "id": "0631f1cdd062b257fdf8ca51728aa44f216edb64", "inCitations": [ "4499838e0f3b5756e8d50970dd1c502409eafed1" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "81-90", "journalVolume": "", "outCitations": [ "2bf4940710deb2571e93b1c922e8e7452e854afd", "a16d87f2fa0712593f0af25a5ef802775ddd3baf", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "167d2cfd31948e72243a5f442544c0d4b1f826b9", "ab3f531f3c6e4920c9ba4b437d997c0ce797f5b0", "22d3b78476f5b6c002b71b335a65f132b2a63069", "f4d2ec012d2484ba693c63a009f5dd66dafe9b4b", "917392fb11729b5b522d1ce5a00d3f23f4594e3c", "92b052d441f22dd073cbe235b58a96dc78bb48ff", "b087c5133649dbf01dfa56805a92b14f153ba3bb", "2900690eb3132a4d1536226d629727de41f38a66", "06fdfba1bb58bc7b43213594e6030935c8df4103", "429e313d33a82bf086b69d47eee735450cbeb4ae", "6b1099588fbd2be693c5a235f5a20e7bcb1bc4a7", "7f11c7b0dda506d532e069ccd3f323c6c3155a1d", "861fbac82ae5ec0ea654d0d95ce4d48de62419ea", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "25883553e5315e32194614676f11bb012db6dafd", "14e980a3d6d5a4617ed56e9bac91f3ea5cf1654c", "3150e68dccebd9d8e371143270f6bc3942b7d69c", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "6f5e4f6a4b31c886d5fdb1e0da1237fbc7b7a3a5", "842340ba3dbf81ddb65b652c22d60ece5be2e05e", "1fffb35160cf06ddbbfa3dad44fc293ad9b29b87" ], "paperAbstract": "Transactional Memory (TM) promises both to provide a scalable mechanism for synchronization in concurrent programs, and to offer ease-of-use benefits to programmers. The most straightforward use of TM in real-world programs is in the form of Transactional Lock Elision (TLE). In TLE, critical sections are attempted as transactions, with a fall-back to a lock if conflicts manifest. Thus TLE expects to improve scalability, but not ease of programming. Still, until TLE can deliver performance improvements, transactional styles of programming are unlikely to gain popularity.In this paper, we describe our experiences employing TLE in two real-world programs: the PBZip2 file compression tool, and the x265 video encoder/decoder. We discuss the obstacles we encountered, propose solutions to those obstacles, and introduce open challenges. In experiments using the GCC compiler's hardware and software support for TM, we observe that both are able to outperform the original lock-based code, potentially heralding the readiness of TM to be used more broadly for TLE, if not for truly transactional styles of programming.", "pdfUrls": [ "http://transact2017.cse.lehigh.edu/zhou.pdf", "http://transact2017.cse.lehigh.edu/slides_zhou.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0631f1cdd062b257fdf8ca51728aa44f216edb64", "sources": [ "DBLP" ], "title": "Practical Experience with Transactional Lock Elision", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "063d724a6f7a376f5c276cc2c7113c68c33fc1c1": { "authors": [ { "ids": [ "12762343" ], "name": "Benjamin Fuller" }, { "ids": [ "33860798" ], "name": "Mayank Varia" }, { "ids": [ "2529763" ], "name": "Arkady Yerukhimovich" }, { "ids": [ "2374854" ], "name": "Emily Shen" }, { "ids": [ "2984519" ], "name": "Ariel Hamlin" }, { "ids": [ "1867298" ], "name": "Vijay Gadepally" }, { "ids": [ "1783649" ], "name": "Richard Shay" }, { "ids": [ "2377837" ], "name": "John Darby Mitchell" }, { "ids": [ "1939551" ], "name": "Robert K. Cunningham" } ], "doi": "10.1109/SP.2017.10", "doiUrl": "https://doi.org/10.1109/SP.2017.10", "entities": [ "Computer security", "Cryptography", "Database", "NewSQL", "NoSQL", "Open-source software", "Performance Evaluation", "SQL", "Usability" ], "id": "063d724a6f7a376f5c276cc2c7113c68c33fc1c1", "inCitations": [ "80621d09c3d3dd896c7e2bff083b9e702dc2ed29", "f81bf1e2fd52b978af73e4b567528a66de2e319f", "46e837585af419dc79a949fcb1cfa46a8621f9ff", "53f18a9a84c41ff532302166f4456856f3711830", "ed84133ca8ef37a273d4b187202f55c6618b953e", "f39796b6656cac1e9ddf9e4758dec9d6a8aab8d1", "27d04d11402484260517af073172724e0b6eecaf", "b7a64553739fa597268a5f4b912837aced813ab8", "00e99b1ac9e395068f90e413fcbb96f2112c1293" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "172-191", "journalVolume": "", "outCitations": [ "40438276d4744cd4ac13140ccea28ea157098075", "0e84d3a84b41c06ae8f8413fe7bd37c2d7f37c2f", "00e6e8a8dc5c4100584af8175e24616a8a5efab1", "49e72b668dcde9fe57a8ed60e6890a5622733f19", "18264d2218cd7e45bd459750a0c946023a6b845c", "85770543edf9b69a7a1e06551582906ed8cd24e9", "35b7492ff025d4b9412508504c97d8545c8d8a3f", "4527bf2b990a91a276cffd1cd65253f827247cfe", "9b5890065c9ff71b1279250c1c809bb782c31c60", "1f6cbdee0cd99b74ab2a8ffb381265286a11ea90", "4f888052bc790d2e6825ce34b494cce5112d9609", "3ae6e3f385f075c2b7b6958122c1e30fb1b54b0e", "63e6d48a08035cf111ecf09f965514dcbb2c841e", "52e1811ed88f9eed814e3a208efd1bafffd6a598", "cf3fbac4277172f24d6a8f7e2b2beb39c8ea14cf", "bcb49a06e4fb7ea831257e146073d84234f4d238", "73f31354cc9058ddc2e47a1c585b753e1592c1bf", "6e5c20d5c50aa92857a5880285674f4dee27fd96", "13868fa5a86ebde021a1c91415fb9bb718c4a804", "4af77753e00973f339fd93a27e4131047018e79c", "225c357ee5490febc4fe9ca002fbf08b29adec46", "732ae647aa75acd7b7349679a4746c0539370122", "70d2a37d5af527dfc345691e2f978f6e46dc4efe", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "7a9afe050785e0c51498e2978359a84307d7e368", "3181b9ce21265bbf8175314714e1535f75b3d80f", "18e704e31d06f955f39955cd4c785c4731e5fbd7", "6db00c7de3c9c13b7fd1c0078eefdbe506d054cd", "7ee7c3ddc35e084a03decda196866c64c359dfd6", "b274d13fce445dc10f7e0d40620b8c96ebc01f44", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "db5647a233f5657913e669ce11d02aafb3fb8fa1", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "5ad1dd1aa78ba772c969aa01ee5e8ee0d255ce3d", "b4ed18399f7690c469f973c28ee1b0f61991572d", "46527c14457cf84d1cf26487d6b4c31f4825db71", "18a5f443299784479e78d9e77f175af57cb2fa2b", "02beed2e1350a0d0b01bb9622081cb93a965a716", "669a754df3cffa8f52bbfad60c44f8ae8aa83183", "2ce053e6d742031ef2da8b83d6ee5d8876b9dc2e", "09598c6fa85bb64b22816cfaef54e682cb3f3a6a", "0b3c3941b97876b1034dd341cf527d297c257000", "abaa88edcdb30b66a6adb3b098fcd4e082e45231", "75cbe27efe1c8b255102f641feba1871176c6c20", "9ea1bbb1d3302aa9504e71ca42e1c19c09e310e0", "5623dec3a4fe0e6c45f3422d1840bc463cbad3c9", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "11ef7c142295aeb1a28a0e714c91fc8d610c3047", "083ce3238ea0f3b94f92c850767354475195f678", "1c799eca7983c62f7815ac5f41787b3e552567b6", "4cee38d9d088cf021bc5f5b9fda6764feeb1806a", "dd186d6826a0bc007fd02bafed6861f99b2f4ef1", "a344060ddde7b86e8f2105ed8b96a54954ecc57c", "4b510ec66f9ca8a3248427367e1c627e663ecad2", "fdae98ed41d0324e7484d5e1a0a24e0aab56233f", "2ef1d7024bb95d908857f4113c1880b18d4d96f2", "685c70f73abff7ce4c16282cfa5a36478ef4cb8b", "2898cd36fcaaf2e1dc44f555cc4685545ebc5178", "47564fdfc63a1a36102b8b6c74f978bbc5190c5a", "19c3736da5116e0e80a64db35afe421663c4b4a8", "3d8e636e82339b83feda41b689ef92b462d9420d", "a560e4a8264280ff5c4246d502beb351e564dea2", "8a37efc82e54353d387cfb073f9379c053988aef", "019d8ba2274b5555bb71baebf76af35de23ef988", "ae2bb2dd98ce5db50703005e3c6c7b43045621ca", "1da3e391252ea1a346744a6dade6983ec5c5babf", "080ed793c12d97436ae29851b5e34c54c07e3816", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "140a563a1ef271b7dcd0675225cb543d92636f6c", "0227e83202440c13c4c2b97b49ef7c64dfbd52c3", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "35704ca4fbf9a0af598877f1b0cfa84603e26287", "99e7d185f1e70474806c9dedfc576e8525bb5c92", "a94205aed0148ae6d00986aef009e5e05d046f43", "1360a9e2fcd1504effe81f54bbd20ab5b5a07685", "8c9982a59457d448ac899f8e70e277e69f5d7942", "90569b27c21f400cd818a58005fecd9f2033048a", "41d16b9f8bc2e3af15067d9227deaef88c81a28d", "629a62c681a03ab7875d0883a986978b280e76d3", "1fb4a73e144257d2d5ad7990db319720a960b07a", "45ba5e720039754ccd07b936714f64f8b6355e5a", "c484e351445232ea526c8d73b84bc529ffcddee1", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "be2f737bd30976386b069f6edc61371dcda9fec8", "3efacce012135fa5ea952339b101761e2bb56bfb", "56d320acfad7f6e8060acb77191c179844fab3cb", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "ab288ba0ecf4027d5eec90d8debbb06dde0076e3", "1939f908a8b47e16617bbba22d08e97ad3eadba8", "3864cfb41db27452cefe3b1f64f05623690201ab", "42435ffebeae0b09810c61b661c88cb51e36c4c0", "6871b95c14dccca7636b498b5d363a743c5288e6", "b07fb9ae940ddaaa690de67cee2029e4373fdbc4", "8776c004a351e23be9ef7a4d214da4fc93260484", "4646e3be0ab8ef61846c4ef954677376d0f880fb", "3661adc4d70f140e86957b2dff527a5676079adb", "17e20f8493523809f0189476d52d3b2ed13e0e3f", "10130d16b8ceb9aea868c416df56e929a0631cdc", "57a40440be2c8f38ebde172cddca3925d2827c62", "ad0c881078b2cd3d69b5cc2ef63bcdb72070298e", "26c4c1dd27fdb449fe0267eac595930766917878", "b748b52dc7e9de9c0d96ba31faaf4ea87fc28892", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "a3072a044575760e0a6fd20e353f3f44d86f692d", "961487973d4b33f96406fddbfcf1235dc587571f", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "3cb49b5c2614d5d54fc0827567de9dc7d8dfb8f3", "1c730d368ee9b381907a95bc3638cffbc0968bcd", "0b6d88342563acaf5f7ac34bec19cfdef6c77eff", "9543618825efde94e081aa4820f4852fc973963d", "14dc5effd28d22cf7fc8aa6a1be8ae2d37859891", "1cf87af22b3b4dd0ff1144d861e0573121d8de2e", "2e72e6c022dd33115304ecfcb6dad7ea609534a4", "85a3c518ae3f0d77a2a16e3a45761be2c8517b19", "965299efb158bace13e71ab11c6d547d6234d009", "3d989af52b72e9ccc7bcc215c0f25a4fb62aee12", "20b63210954f7c5a70664f301dcd7196856ccfa7", "1ab81ae077d6944fbff279a7a8a38df48f75eadf", "a0835c336ccc0e2f6f7cde1ba9c214996a70f1f3", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "94bb86c99e93b8ae04eed9a48d4fa138439f7ef0", "057337e87c84640a3ad32ad499908e83696c0147", "3cbf6df60d91d4f2422827c46ec4f85fb45bbeb7" ], "paperAbstract": "Protected database search systems cryptographically isolate the roles of reading from, writing to, and administering the database. This separation limits unnecessary administrator access and protects data in the case of system breaches. Since protected search was introduced in 2000, the area has grown rapidly, systems are offered by academia, start-ups, and established companies. However, there is no best protected search system or set of techniques. Design of such systems is a balancing act between security, functionality, performance, and usability. This challenge is made more difficult by ongoing database specialization, as some users will want the functionality of SQL, NoSQL, or NewSQL databases. This database evolution will continue, and the protected search community should be able to quickly provide functionality consistent with newly invented databases. At the same time, the community must accurately and clearly characterize the tradeoffs between different approaches. To address these challenges, we provide the following contributions:1) An identification of the important primitive operations across database paradigms. We find there are a small number of base operations that can be used and combined to support a large number of database paradigms.2) An evaluation of the current state of protected search systems in implementing these base operations. This evaluation describes the main approaches and tradeoffs for each base operation. Furthermore, it puts protected search in the context of unprotected search, identifying key gaps in functionality.3) An analysis of attacks against protected search for different base queries.4) A roadmap and tools for transforming a protected search system into a protected database, including an open-source performance evaluation platform and initial user opinions of protected search.", "pdfUrls": [ "https://arxiv.org/pdf/1703.02014v1.pdf", "https://doi.org/10.1109/SP.2017.10", "https://arxiv.org/pdf/1703.02014v2.pdf", "http://arxiv.org/abs/1703.02014" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/063d724a6f7a376f5c276cc2c7113c68c33fc1c1", "sources": [ "DBLP" ], "title": "SoK: Cryptographically Protected Database Search", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0646a88dfd7e7ce7233041eaad62076ccc55624c": { "authors": [ { "ids": [ "3243432" ], "name": "Andrea Bittau" }, { "ids": [ "1758110" ], "name": "\u00dalfar Erlingsson" }, { "ids": [ "2286904" ], "name": "Petros Maniatis" }, { "ids": [ "1761190" ], "name": "Ilya Mironov" }, { "ids": [ "1806005" ], "name": "Ananth Raghunathan" }, { "ids": [ "4260658" ], "name": "David Lie" }, { "ids": [ "2830166" ], "name": "Mitch Rudominer" }, { "ids": [ "35798085" ], "name": "Ushasree Kode" }, { "ids": [ "2521134" ], "name": "Julien Tinn\u00e9s" }, { "ids": [ "11435780" ], "name": "Bernhard Seefeld" } ], "doi": "10.1145/3132747.3132769", "doiUrl": "https://doi.org/10.1145/3132747.3132769", "entities": [ "Algorithm", "Blinding (cryptography)", "Cryptography", "Differential privacy", "Enthusiast System Architecture", "Exception handling", "Experience", "Experiment", "Pipeline (computing)", "Privacy", "Randomness", "Scalability", "Secret sharing", "Software deployment", "Systems architecture", "User (computing)" ], "id": "0646a88dfd7e7ce7233041eaad62076ccc55624c", "inCitations": [ "ed7682b9ab6c19d87e0275cf2823b2d824b13e40" ], "journalName": "", "journalPages": "441-459", "journalVolume": "", "outCitations": [ "491bbfcc4d5b8d322b312fb18bbc5d9f7bc5b2d4", "1c7a9933cfad8dfcc4dd0e2e4f2100dba3c34a08", "1bb07c114cb447552d36a95445cc207f496d85aa", "312fcd1ab4e5187ad5f79701d9abd730cd6d5642", "3b2849c55fe6fd719cc298be03292a93ce78d107", "3dfce4601c3f413605399267b3314b90dc4b3362", "09de90384bacfdd82e4503dc155ab6868f953eb3", "4fbf4fd303d969606edc6f1cb42642ab2d11ce14", "5523832d7e841d5aa3336b7d0ae4c14d784fbed8", "78e2d6b7a671d8e53f207adff088833fd7606e13", "2e8b9a7a085a8bc18783e76b776c6e780116efd8", "06eaabcdf0c1f578c2442b3e7a0858a8dc5679c8", "795d820bad517834441a78117effaf661fc58933", "0dca2b86c0bffff6b47ce03cb1b01545d8d2cbb2", "2c9f073340ab55613f0e25e444bbd09b7851aa23", "2e037e25c0af281f6b699d238e79aa7074e9fe06", "3ca369fa2cadb403db7ac5e75deefd9acbb10723", "64028c85cd7b7e42f208e29734028572d7735c61", "35516916cd8840566acc05d0226f711bee1b563b", "3d2af27adb6fe7751b91248a5b4da60e032bf4f8", "9a5c5e7f30c1db4aa91b55829e8fe1669213f65e", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "61d968e9c2e067398e8ba325dc14170b686e68e4", "70fda5147aedd42c64143a464117b5ffde18a2e4", "8e48f3892878edc64a312b1bc20299508522d16f", "2173406c4ca5fff0de66e8cbed4cb01ca959cb31", "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "1c2d161c5bb15efd73311d0a3223aee773d38cca", "0b52c0dddb4b37abfd6fb3657c81342777ff62bc", "0d7ecfcbca1e8884944073c82886cef76fc53adf", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "28a6e6ceb0a92de7a49048d094321af5fab227a0", "03b01daccd140e7d65358f31f8c4472d18573a5a", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "5a51a18a63fc57cd9ef206bcfdb303933c2bcfb9", "11a651253f8603c01ed29c00c76673a67bd291c7", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "1026527f60f4df0c523dc4b4b07a06274f1f0517", "4df9835710f8a684854da04dd68cd472ca214f12", "b532099ff8b67049f292cd62700dca37fc2be623", "249d2e15cfcd531e3f91d561877d5b23d31ec2e8", "39fbcdd2253f7749fc5b8a91db2eca71f618887c", "9771e382794af067f7360f1cac7b6d2a1e6dd1c4", "9209c11ec4a63c5e6b4c967e49e6fc9ae3e169f2", "0a84bbe4dd47d99bc77010931b7daffecf8c1a11", "12da6c20e0743e9434894359c387f6a72e7e91f0", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "5c15b11610d7c3ee8d6d99846c276795c072eec3", "ebc5a18b1b043144d848e2e2c2563dc71b7bf815", "19db199fd25aa604618d13e80cf317f0858d5604", "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "45267521d36920a49ee5cc64e6f5e50bd5f029e3", "4342a1109fef5aaaf438de2ffcb82d5a71a8ab95", "d4312997dd7ade9cf411a0997e36c8c289e6ab68", "016bb661e767d8fa2491743d289b11cfc41e3efb", "2833e9958db9721550f2dab609ef7124875dc12a", "0f2d0df8d3fdc8db93d776dbc565fda4b8a3b7a1", "07a5809436a9ade7bee9fdc9a970c23263a580d0", "4954fa180728932959997a4768411ff9136aac81", "5ff155a684fdae3da603e615095084567dcfc3ea", "1265c6dad37e184955a97ef5fd018bf45d2aca88", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "1e2a5126486820abea0cdaccf996c975b9103443", "17fac85921a6538161b30665f55991f7c7e0f940", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824" ], "paperAbstract": "The large-scale monitoring of computer users' software activities has become commonplace, e.g., for application telemetry, error reporting, or demographic profiling. This paper describes a principled systems architecture---Encode, Shuffle, Analyze (ESA)---for performing such monitoring with high utility while also protecting user privacy. The ESA design, and its Prochlo implementation, are informed by our practical experiences with an existing, large deployment of privacy-preserving software monitoring.\n With ESA, the privacy of monitored users' data is guaranteed by its processing in a three-step pipeline. First, the data is encoded to control scope, granularity, and randomness. Second, the encoded data is collected in batches subject to a randomized threshold, and blindly shuffled, to break linkability and to ensure that individual data items get \"lost in the crowd\" of the batch. Third, the anonymous, shuffled data is analyzed by a specific analysis engine that further prevents statistical inference attacks on analysis results.\n ESA extends existing best-practice methods for sensitive-data analytics, by using cryptography and statistical techniques to make explicit how data is elided and reduced in precision, how only common-enough, anonymous data is analyzed, and how this is done for only specific, permitted purposes. As a result, ESA remains compatible with the established workflows of traditional database analysis.\n Strong privacy guarantees, including differential privacy, can be established at each processing step to defend against malice or compromise at one or more of those steps. Prochlo develops new techniques to harden those steps, including the Stash Shuffle, a novel scalable and efficient oblivious-shuffling algorithm based on Intel's SGX, and new applications of cryptographic secret sharing and blinding. We describe ESA and Prochlo, as well as experiments that validate their ability to balance utility and privacy.", "pdfUrls": [ "http://arxiv.org/abs/1710.00901", "http://www.eecg.toronto.edu/~lie/papers/prochlo_sosp2017.pdf", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46411.pdf", "https://arxiv.org/pdf/1710.00901v1.pdf", "http://doi.acm.org/10.1145/3132747.3132769" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0646a88dfd7e7ce7233041eaad62076ccc55624c", "sources": [ "DBLP" ], "title": "Prochlo: Strong Privacy for Analytics in the Crowd", "venue": "SOSP", "year": 2017 }, "06479f0b9b71e8c0744ec7291ab0867ab9ec5059": { "authors": [ { "ids": [ "2522763" ], "name": "Jorge Albericio" }, { "ids": [ "39885275" ], "name": "Alberto Delmas" }, { "ids": [ "39891957" ], "name": "Patrick Judd" }, { "ids": [ "15384887" ], "name": "Sayeh Sharify" }, { "ids": [ "37606908" ], "name": "Gerard O'Leary" }, { "ids": [ "2655550" ], "name": "Roman Genov" }, { "ids": [ "1782536" ], "name": "Andreas Moshovos" } ], "doi": "10.1145/3123939.3123982", "doiUrl": "https://doi.org/10.1145/3123939.3123982", "entities": [ "16-bit", "8-bit", "Artificial neural network", "Bitap algorithm", "Computation", "Convolutional neural network", "Data parallelism", "Deep learning", "Degree of parallelism", "Fixed-point arithmetic", "Neural Networks", "Parallel computing" ], "id": "06479f0b9b71e8c0744ec7291ab0867ab9ec5059", "inCitations": [ "9a4e0f2ad4c854de475748f519f3c6340f5b9412", "2512a6ced085503c399ee512ecaeb88606081261", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "8d321487858b3c5c9fe1720629bf3f0f354a0e31", "8033f293c894eae64c9f379dee2192bfe4f7883a", "dfbddd14c1b517c5a9478961534e765b0eac513f" ], "journalName": "", "journalPages": "382-394", "journalVolume": "", "outCitations": [ "37e6d234ed6caf7abcc489a30c9c3c6bf1ad74a1", "68837728232463651283edbb7ef0c93b2f502b2b", "dbf33161c1dc9c94dba25b233b2376c8f95c9bd2", "c3013c2068b41cb1235707e097ec797592510e28", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "49b4094f2c313a92da4461572c0bef80b0d7d649", "0a3ad4a0ec19926128e307e7ec178fd7288b5a37", "5bfecd14937da569eabec0afea710db846d3899b", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "55bc52bbec8972d62874bcbe169dac573b57d1df", "e277762804aa4615b2258fbd367d91326c00b90e", "62124e3cb35d9d34159d2d4c673c0f7d04cfa533", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "3d8cbd72a4effa7ee4ad20f9644719819343ab7a", "02c78232075ac431834e3442dcb2954d4e708def", "9f1f065bf08cd90431cc051267a708f56436cd82" ], "paperAbstract": "Deep Neural Networks expose a high degree of parallelism, making them amenable to highly data parallel architectures. However, data-parallel architectures often accept inefficiency in individual computations for the sake of overall efficiency. We show that on average, activation values of convolutional layers during inference in modern Deep Convolutional Neural Networks (CNNs) contain 92% zero bits. Processing these zero bits entails ineffectual computations that could be skipped. We propose Pragmatic (PRA), a massively data-parallel architecture that eliminates most of the ineffectual computations on-the-fly, improving performance and energy efficiency compared to state-of-the-art high-performance accelerators [5]. The idea behind PRA is deceptively simple: use serial-parallel shift-and-add multiplication while skipping the zero bits of the serial input. However, a straightforward implementation based on shift-and-add multiplication yields unacceptable area, power and memory access overheads compared to a conventional bit-parallel design. PRA incorporates a set of design decisions to yield a practical, area and energy efficient design.\n Measurements demonstrate that for convolutional layers, PRA is 4.31X faster than DaDianNao [5] (DaDN) using a 16-bit fixed-point representation. While PRA requires 1.68X more area than DaDN, the performance gains yield a 1.70X increase in energy efficiency in a 65nm technology. With 8-bit quantized activations, PRA is 2.25X faster and 1.31X more energy efficient than an 8-bit version of DaDN.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123982", "https://openreview.net/pdf?id=ryeF7mVFl", "https://arxiv.org/pdf/1610.06920v1.pdf", "https://arxiv.org/pdf/1610.06920.pdf", "http://arxiv.org/abs/1610.06920", "https://openreview.net/pdf?id=By14kuqxx" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06479f0b9b71e8c0744ec7291ab0867ab9ec5059", "sources": [ "DBLP" ], "title": "Bit-pragmatic deep neural network computing", "venue": "MICRO", "year": 2017 }, "0665e1a0ca3fb7dec851eeaf830ed277fab26572": { "authors": [ { "ids": [ "1800764" ], "name": "Yan Zheng" }, { "ids": [ "1795436" ], "name": "Jeff M. Phillips" } ], "doi": "10.1145/3097983.3098000", "doiUrl": "https://doi.org/10.1145/3097983.3098000", "entities": [ "Best, worst and average case", "Data compression", "Data point", "Kernel (operating system)", "Smoothing", "Time complexity", "Time series" ], "id": "0665e1a0ca3fb7dec851eeaf830ed277fab26572", "inCitations": [ "15a122e1f5efc7727a4610c1313b7b1217d4ec93" ], "journalName": "", "journalPages": "645-654", "journalVolume": "", "outCitations": [ "3c29f6a47c955382ccbc26f258123fcce627a00b", "663bdbb58506774f70366c03e4e974fc7085548b", "746941d4bb73b64238c2f16a048220e410017fbd", "1592fe924114866c1ac559bae33ea789930daa98", "03ef1b0c3ca5ad5eac8d379b59d150aed59294b6", "225f78ae8a44723c136646044fd5c5d7f1d3d15a", "1942364cc6b0399bf099f57bd0d322b2f5c0544b", "a36b028d024bf358c4af1a5e1dc3ca0aed23b553", "0fe0aa0a80b8eeb2ea9005e2d96951e3bf4f3f59", "ba3c88fecf39ff7db405deb2fbd298685e8f9b70", "044c1f31a27014301b5c879406275b70d62f320a", "115313a919c93fe8741f4a2431324f3b67036189", "3ff7d797f59971b1df94d14ac8bc931b136f10f2", "499ffc99eeee63ceff6fc33f732b590e4d3352b9", "3818cad77f4fab71163ec5f741d2d142f10926df", "12bafd25fa3a9d1552480555f7fcdb7e7fd8b7e9", "1c2dc6f0cb5a808924838e823b1cbca0eca21799", "e3cb014f1663b7a4236081c0c8d0667f4730b171", "1cdd3c62172b7598cd090e349d38e9644734edfd", "7f750853c849dbdf08a17fcd91ab077fb6d8a791" ], "paperAbstract": "Kernel regression is an essential and ubiquitous tool for non-parametric data analysis, particularly popular among time series and spatial data. However, the central operation which is performed many times, evaluating a kernel on the data set, takes linear time. This is impractical for modern large data sets.\n In this paper we describe coresets for kernel regression: compressed data sets which can be used as proxy for the original data and have provably bounded worst case error. The size of the coresets are independent of the raw number of data points; rather they only depend on the error guarantee, and in some cases the size of domain and amount of smoothing. We evaluate our methods on very large time series and spatial data, and demonstrate that they incur negligible error, can be constructed extremely efficiently, and allow for great computational gains.", "pdfUrls": [ "https://arxiv.org/pdf/1702.03644v2.pdf", "http://arxiv.org/abs/1702.03644", "http://doi.acm.org/10.1145/3097983.3098000", "https://arxiv.org/pdf/1702.03644v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0665e1a0ca3fb7dec851eeaf830ed277fab26572", "sources": [ "DBLP" ], "title": "Coresets for Kernel Regression", "venue": "KDD", "year": 2017 }, "066ad1afcb6344a8b65d9249d694f5a2605247b4": { "authors": [ { "ids": [ "2056741" ], "name": "Doowon Lee" }, { "ids": [ "1683260" ], "name": "Valeria Bertacco" } ], "doi": "10.1145/3079856.3080235", "doiUrl": "https://doi.org/10.1145/3079856.3080235", "entities": [ "Bare machine", "Baseline (configuration management)", "Computation", "Computer architecture simulator", "Consistency model", "Execution pattern", "Memory ordering", "Model checking", "Multi-core processor", "Requirement", "Simulation", "Software bug", "Software verification and validation", "Sorting", "System Simulation", "Thread (computing)", "Topological sorting", "Type signature" ], "id": "066ad1afcb6344a8b65d9249d694f5a2605247b4", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "201-213", "journalVolume": "", "outCitations": [ "4bb640b092cbbf55ed4d1de8edb79ba8a79b0ebd", "12a233efbdd874afdeb8a1e6fe71c4ccff758175", "4bc8a08d77ca193f4506c617f626d0a07afd2f89", "30dac5d73a5aebc5dcb4671ee4d915267a6b78ac", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "370d546ab1ce3988194cbf835ee09e73e3733b41", "6f5907ec67ed7266a34a8094a22bae322eb2aae4", "428449baa75b4af7987109b9fcb942c9f0b6295d", "66c4e96ccc884601b0eb79fb680f83fdfd0c05cf", "4d5099d75d8aa8f1328bccb1f0b8578f35b42526", "7f80ae3a81d063083b049b91cd0299f09bbb4696", "447563c219ca8241334a69d2f2caf5b855277f1a", "31181e73befea410e25de462eccd0e74ba8fea0b", "24366914b06dfddff8d343a7f93b89820d525d75", "837be6dc51f0ccf8135ebaea8a48afc3faf5b14b", "6308c1a4bc95986c594fbab8abed87a780c59e6a", "4292384b0b798feea238c7f0437d88476e342771", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "36f5c34fd648301984a68b638dc9cd726a108853", "5d8223b9caf90736f4ca75750290a1a25f66b7a8", "5cb1012da36f41f0d56777d4ad0ba4d5f42390c2", "f0ca54ebf208c7ef592b2ccf4e8961ec5524633c", "10f1faeec4ee2158b8535b249a20de5419998153", "aeea9a3480fd211bb44558353c1751dbf1df3f19", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "86fb520c6596ae0780ba6c24541a7304c53f3891", "15bb80d7f7d5a76ba0955e2a1a7b79852ca89509", "3eae0271717f6b4d65024abf04e5d98aef41d748", "a28f4c45ad72a50f56f7f9df13762c739230b646", "3a850f54e6dea4728aaa6a71ba222b7d612cd2b1", "1811222cd3f4116d586bf752745c7500230983cb", "15c8550942ee0191bb34d177d7e653b2f3cb6eff", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "957272cdbd0a44a44dc7d1e91eb2fa5bcf85e6e0", "1984a63dac62e537ae2bdb7372355e5891e0e05b", "2beebe471d23ca56cc07a254d74065574ff809e4", "27b94d947c4b094f482e9689412e1f753b52a62f" ], "paperAbstract": "This work presents a minimally-intrusive, high-performance, post-silicon validation framework for validating memory consistency in multi-core systems. Our framework generates constrained-random tests that are instrumented with observability-enhancing code for memory consistency verification. For each test, we generate a set of compact signatures reflecting the memory-ordering patterns observed over many executions of the test, with each of the signatures corresponding to a unique memory-ordering pattern. We then leverage an efficient and novel analysis to quickly determine if the observed execution patterns represented by each unique signature abide by the memory consistency model. Our analysis derives its efficiency by exploiting the structural similarities among the patterns observed.\n We evaluated our framework, MTraceCheck, on two platforms: an x86-based desktop and an ARM-based SoC platform, both running multi-threaded test programs in a bare-metal environment. We show that MTraceCheck reduces the perturbation introduced by the memory-ordering monitoring activity by 93% on average, compared to a baseline register flushing approach that saves the register's state after each load operation. We also reduce the computation requirements of our consistency checking analysis by 81% on average, compared to a conventional topological sorting solution. We finally demonstrate the effectiveness of MTraceCheck on buggy designs, by evaluating multiple case studies where it successfully exposes subtle bugs in a full-system simulation environment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080235", "http://web.eecs.umich.edu/~valeria/research/publications/MTrace-ISCA17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/066ad1afcb6344a8b65d9249d694f5a2605247b4", "sources": [ "DBLP" ], "title": "MTraceCheck: Validating non-deterministic behavior of memory consistency models in post-silicon validation", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "066e1cd75a4f37a3c58089e24ccf43eb5adf1f19": { "authors": [ { "ids": [ "39433682" ], "name": "Hung T. Nguyen" }, { "ids": [ "9398355" ], "name": "Tri P. Nguyen" }, { "ids": [ "17903381" ], "name": "Tam N. Vu" }, { "ids": [ "1745290" ], "name": "Thang N. Dinh" } ], "doi": "10.1145/3084457", "doiUrl": "https://doi.org/10.1145/3084457", "entities": [ "Biological network", "Estimation theory", "Experiment", "Ground truth", "Influence line", "Information", "Sampling (signal processing)", "Shoe size" ], "id": "066e1cd75a4f37a3c58089e24ccf43eb5adf1f19", "inCitations": [ "5b7452ed5791f3b4497a5cb0fd95699dcaa14119", "e2b7b9bd2c9fb7baae1f7e5de4b994229d53031a", "25113be728f6126f31683bd02460c8a72bd3e270" ], "journalName": "", "journalPages": "63", "journalVolume": "", "outCitations": [ "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "23d85a0008429845870780c6db3640c05165acaf", "6a5ae0e083ab69153ce395874c8dddcd830dfcfd", "8c1fa3949409eb65017a4625a7351039f72ebf04", "4bb0f607c1f6be38ca720ad6913577a778cc2f15" ], "paperAbstract": "Estimating cascade size and nodes' influence is a fundamental task in social, technological, and biological networks. Yet this task is extremely challenging due to the sheer size and the structural heterogeneity of networks. We investigate a new influence measure, termed outward influence (OI), defined as the (expected) number of nodes that a subset of nodes S will activate, excluding the nodes in S. Thus, OI equals, the de facto standard measure, influence spread of S minus |S|. OI is not only more informative for nodes with small influence, but also, critical in designing new effective sampling and statistical estimation methods.\n Based on OI, we propose SIEA/SOIEA, novel methods to estimate influence spread/outward influence at scale and with rigorous theoretical guarantees. The proposed methods are built on two novel components 1) IICP an important sampling method for outward influence; and 2) RSA, a robust mean estimation method that minimize the number of samples through analyzing variance and range of random variables. Compared to the state-of-the art for influence estimation, SIEA is Ω(log4 n) times faster in theory and up to several orders of magnitude faster in practice. For the first time, influence of nodes in the networks of billions of edges can be estimated with high accuracy within a few minutes. Our comprehensive experiments on real-world networks also give evidence against the popular practice of using a fixed number, e.g. 10K or 20K, of samples to compute the \"ground truth\" for influence spread.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084457", "https://arxiv.org/pdf/1704.04794v1.pdf", "http://arxiv.org/abs/1704.04794", "http://doi.acm.org/10.1145/3078505.3078526" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/066e1cd75a4f37a3c58089e24ccf43eb5adf1f19", "sources": [ "DBLP" ], "title": "Outward Influence and Cascade Size Estimation in Billion-scale Networks", "venue": "SIGMETRICS", "year": 2017 }, "0670ef42e5c9f28d547ba0dfb816fdb99ca2992f": { "authors": [ { "ids": [ "2172728" ], "name": "Luke Valenta" }, { "ids": [ "2406885" ], "name": "David Adrian" }, { "ids": [ "34880513" ], "name": "Antonio Sanso" }, { "ids": [ "39974279" ], "name": "Shaanan Cohney" }, { "ids": [ "2188415" ], "name": "Joshua Fried" }, { "ids": [ "36420322" ], "name": "Marcella Hastings" }, { "ids": [ "2349976" ], "name": "J. Alex Halderman" }, { "ids": [ "2842650" ], "name": "Nadia Heninger" } ], "doi": "", "doiUrl": "", "entities": [ "Cryptography", "Diffie\u2013Hellman key exchange", "Directory System Agent", "HTTPS", "Internet Key Exchange", "Key escrow", "Key exchange", "Library", "Load balancing (computing)", "Open-source software", "OpenSSL", "Opportunistic TLS", "Server (computing)" ], "id": "0670ef42e5c9f28d547ba0dfb816fdb99ca2992f", "inCitations": [ "00e363001d11bc47b6e347d7dd2db6addfdf7858", "12a969cd25b06ba88eb2ce92903b1d7c8f959f1b", "12de15f0e283f748af4f1820c9a3d4361c026561", "baa58c6a4976094a0186b2c58411a4fa3537f777" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "995", "journalVolume": "2016", "outCitations": [ "b2a7da3aafc4787d3e1370fa9609e381ea296722", "fb46335b5a7b4cad0fd1935b97f90ebc443ad8e4", "201b0a185dda51629d7b6fdef3b380a0beaba455", "2b6ce083906634e3c3b084e4c9139fb58f082df6", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "082d2b922818331e2994aeebaaccb776cfa09145", "5635e383c4a9edb01c35e07e83196ab0ba85f129", "a56bb06de510bd9f947372dea98b3dede408ceb2", "4c165fb087b4861141577a07571c46fbd2324a69", "19e5adf691d70deff696dfd27a521009cb1cf437", "d21f261bf5a9d7333337031a3fa206eaf0c6082c", "2dbcc7077a01981679007eceac6c6659a1c18200", "6074680689aa03260d46a27ee969a2ce95680b30", "372e528fb9de9f062496af4530ea3e2ec5df02a1", "0f17ca31699de87aaa09dd31205b146bc472c861", "8169647e744faf5f08de3d5af69a22acf9532563", "271be477fda5bb096706bbb2615240dd3282f6db", "14aec370592b692a7341a77ed18471fd39db8a4a", "72ef5bb2f8459b397942ab66e196d32db4fdb80a", "8353b7ce17536d7c4c4d11e379781fff4f4c45a6", "fd1d864a95d7231eaf133b00a1757ee5d0bf0e07", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "d6d0f49fe811a32341a055eb914c528879398904", "06e8ec86953a2cce5b604bf03ef6d677a3d85f8a", "444a738feecb3f7b911886e7b5ec0d75afd12b6b", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "455e4078715f19a2a97e61b4acc6670bc73a3e8c", "179962ca03a85002b7bd45d9f2cae9fd32b5bfca", "32e6b1eeebaec6dbc380a284dca837d20ef281a5", "834e9ad34048740f59826d3be75d5635fc7eb252", "1020f10f733cba8562db77fcceef47145118b8bc", "3b03935dfc89c0cad63e05976c21fef6c9fb4190", "418cffbc1313eab9a4650b00161bb4b8897a2569" ], "paperAbstract": "Several recent standards, including NIST SP 80056A and RFC 5114, advocate the use of \u201cDSA\u201d parameters for Diffie-Hellman key exchange. While it is possible to use such parameters securely, additional validation checks are necessary to prevent well-known and potentially devastating attacks. In this paper, we observe that many Diffie-Hellman implementations do not properly validate key exchange inputs. Combined with other protocol properties and implementation choices, this can radically decrease security. We measure the prevalence of these parameter choices in the wild for HTTPS, POP3S, SMTP with STARTTLS, SSH, IKEv1, and IKEv2, finding millions of hosts using DSA and other non-\u201csafe\u201d primes for Diffie-Hellman key exchange, many of them in combination with potentially vulnerable behaviors. We examine over 20 open-source cryptographic libraries and applications and observe that until January 2016, not a single one validated subgroup orders by default. We found feasible full or partial key recovery vulnerabilities in OpenSSL, the Exim mail server, the Unbound DNS client, and Amazon\u2019s load balancer, as well as susceptibility to weaker attacks in many other applications.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/measuring-small-subgroup-attacks-against-diffie-hellman/", "https://jhalderm.com/pub/papers/subgroup-ndss16.pdf", "http://www.cis.upenn.edu/~lukev/files/subgroup-slides.pdf", "https://eprint.iacr.org/2016/995.pdf", "http://eprint.iacr.org/2016/995.pdf", "http://eprint.iacr.org/2016/995" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0896/cf42b12df1917e67dbc82060752e91462c1c.pdf", "s2Url": "https://semanticscholar.org/paper/0670ef42e5c9f28d547ba0dfb816fdb99ca2992f", "sources": [ "DBLP" ], "title": "Measuring small subgroup attacks against Diffie-Hellman", "venue": "NDSS", "year": 2016 }, "067b78da4dc2309510d4a74d4606cc1a46426581": { "authors": [ { "ids": [ "1722767" ], "name": "Peng Wang" }, { "ids": [ "1726699" ], "name": "Di Wang" }, { "ids": [ "1762600" ], "name": "Adam Chlipala" } ], "doi": "10.1145/3133903", "doiUrl": "https://doi.org/10.1145/3133903", "entities": [ "Algorithm", "Amortized analysis", "Analysis of algorithms", "Best, worst and average case", "Broadcast automation", "Coq (software)", "Data structure", "Functional programming", "Heuristic", "Invariant (computer science)", "Master theorem", "Merge sort", "Pattern matching", "Programmer", "Recurrence plot", "Red\u2013black tree", "Shortest path problem", "Time complexity", "Type safety", "Type system", "Usability" ], "id": "067b78da4dc2309510d4a74d4606cc1a46426581", "inCitations": [ "2d92f591b555e101ce083a73a6cbbfe68e3016d7", "785b84dd4f9506dc4fe76f47d75e76bec9b8f4f7", "0379110eef88d0721e6a66f8211474fb4906e16b" ], "journalName": "PACMPL", "journalPages": "79:1-79:26", "journalVolume": "1", "outCitations": [ "13ded9c00933c12d4f81c4a754b8b44d19940956", "a7f3a72a82a4f7864e1a8e6c1b0183d3b3249f20", "330047eafc086b07eb1cb69030e59288f824748d", "16c8ad89060897e39803a175470d7990bd40cd2b", "31181e73befea410e25de462eccd0e74ba8fea0b", "c8f6a8f081f49325eb97600eca05620887092d2c", "231e6a4fd7922c6adaaa48b2d02f7878e88c4048", "a3be177176ffdc02dbb0317ecef4ce41f8840bb7", "2e5122e23ee5f354cea7189fd2900c1aaf290fbb", "15aee0a8260bcf2cfc9334409bf6c1c450851ff6", "3782f4660041d22d3d3b737384d0144771f4ef3b", "0a638d50ab62a14e0ddeb0afe97333e6156670c3", "1663f8a95a41a66502a2c902c7bc3520e3287bbf", "5b98bbbb92b7bf6f6d5945defead158c045bf252", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "5ebd59de39d5e79328d84903f47be4c2f5efccb9", "736768fe05e6d114f9d0d2b10ba4a04db6c5ba75", "f78cde3866f6cdcb1a88a8598ae3c4ce48bda0bc", "1b31c65d8b5023dabcdd18fd57241488834c7206", "646c91f328a129fe7f4c1b182b05dd501a271a06", "129b6328e2d2d951d6ea49bafd0fd77b312df60c", "a638ddb00cdb5d2fcc9616c7e254eaaa790d48ef", "43de5136309e262007d3f14893959af69749caf8", "0e639ae7d0caae09489f7fbfb6f4739d96f626e8", "990893b26bf52167b806c23dd18f8d2632e0fa01", "7154a072fe7b7bb416d9492f95101f691e4d6d6c", "327c5c7540a17718e77bc7bd8be3db12f684f7f2", "67105715d44cfc13db798804d08f8dac7f079090", "a4d1265e3b7473e73ab168b8fa06d185733f853c", "3ccced52d24cad8b3d9b4f69cd2e3d4872e447d9", "36eac00175dcea2b33cf998e8a2b5bca2b567ba0", "0d4d6e9ea9aa77e9c9e822b1c85a4d497021e3fb", "12cb1b261106fe238505c0772e8826a294fa3546", "75068766c0a09523504d14be8aac8a029ad097ef", "4cd8465d8f323daa7ead10c8d19801a2d234c0da", "ac881b07060bc0563721b80c5bc220168f1ddba3", "0f5bd2edf5b1ce8815e34f6090d726c35d9331d5", "dcca6b40661c280294b526756b9aa67857c1eee2", "0a39f18c6ce7b5a63b20a122c24ddcf2ae8a3ce4", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "56595635124dd4170fcb9ca606d3881876eab30f", "48a0fb31fbc7440bd0d92d4f9a5378e09018e20f", "25e7e119e82a332c1787f3a9e9b91a560f74b163", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "20f3fcd714230fbcb88661ba0f623d9e6217a717", "39d85ddbf6c9aad76689fd02306dcc7583f5b094", "a5ade56a2f37f3f5f5b956b0c5546de9a3428537", "164b11b1f9f8432db88424b1e4f9ba6e09e5c894", "9cb5955e01d7bfdf6595e7414c80f53ac452cd2c" ], "paperAbstract": "We present TiML (Timed ML), an ML-like functional language with time-complexity annotations in types. It uses indexed types to express sizes of data structures and upper bounds on running time of functions; and refinement kinds to constrain these indices, expressing data-structure invariants and pre/post-conditions. Indexed types are flexible enough that TiML avoids a built-in notion of â\u0080\u009csize,â\u0080\u009d and the programmer can choose to index user-defined datatypes in any way that helps her analysis. TiMLâ\u0080\u0099s distinguishing characteristic is supporting highly automated time-bound verification applicable to data structures with nontrivial invariants. The programmer provides type annotations, and the typechecker generates verification conditions that are discharged by an SMT solver. Type and index inference are supported to lower annotation burden, and, furthermore, big-O complexity can be inferred from recurrences generated during typechecking by a recurrence solver based on heuristic pattern matching (e.g. using the Master Theorem to handle divide-and-conquer-like recurrences). We have evaluated TiMLâ\u0080\u0099s usability by implementing a broad suite of case-study modules, demonstrating that TiML, though lacking full automation and theoretical completeness, is versatile enough to verify worst-case and/or amortized complexities for algorithms and data structures like classic list operations, merge sort, Dijkstraâ\u0080\u0099s shortest-path algorithm, red-black trees, Braun trees, functional queues, and dynamic tables with bounds like m n logn. The learning curve and annotation burden are reasonable, as we argue with empirical results on our case studies. We formalized TiMLâ\u0080\u0099s type-soundness proof in Coq.", "pdfUrls": [ "http://people.csail.mit.edu/wangpeng/timl.pdf", "http://doi.acm.org/10.1145/3133903", "http://adam.chlipala.net/papers/TimlOOPSLA17/TimlOOPSLA17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/067b78da4dc2309510d4a74d4606cc1a46426581", "sources": [ "DBLP" ], "title": "TiML: a functional language for practical complexity analysis with invariants", "venue": "PACMPL", "year": 2017 }, "06885ce9502460e231c240bae121554cfd5209ee": { "authors": [ { "ids": [ "2218875" ], "name": "Jian Guo" }, { "ids": [ "1812563" ], "name": "Fangming Liu" }, { "ids": [ "1685072" ], "name": "Tao Wang" }, { "ids": [ "1723366" ], "name": "John C. S. Lui" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Cloud computing", "Data center", "Experiment", "Fairness measure", "Network performance", "OpenVMS", "Scheduling (computing)", "Simulation", "Software bug", "Testbed" ], "id": "06885ce9502460e231c240bae121554cfd5209ee", "inCitations": [ "9cf2db35591b832d78d112ad6e1746c635d1a6ea", "0c3913eb8a3628cc8c9100468da6a4242c345d7d" ], "journalName": "", "journalPages": "69-81", "journalVolume": "", "outCitations": [ "3b988049dd8f62f772281e90196bbd793700c86b", "1d2bdecbc025f3c28118e5b06aac890603c73526", "33de35074ba1e72e57f2fe5e347899652d6295c2", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "0e25351745b6b5cb364fbdf73d327f3c71474d66", "438110dc02f39f221896847a4d0e24f88e130598", "057d04a074eaa7162594800abdd80320ea172874", "47d5357957cabb610131db1b228e58b70860ee8d", "544b0ba4ae011fe26c3f207a7c6f9d6de04468ae", "27c04dce51362fcc7531acbe74823a7f0a4e48bf", "132f00de21cee656d00ad6779f1926070ad59544", "18cdc2ce6d040228cf1c385dfb9b8373fca64298", "22630a79f1c50603c1356f6ac9dc8524a18d4061", "058f6752d85a517aae298586fdf117acdd7560ea", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "52ece1e929758e9d282e818e8e9985f88570f2dd", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "4f86fa28602d9503a8575c5b31082284abc8415c", "1862c29bc091a17e43952046811b5d129dda87c7", "0b7301fe4766447af960f9a2c06ccde042538e9c", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "41fca6c199464c983cb6384ae65c83eb7522fb46", "056f1d66700d33f5e95de5cb571deb28a1706aef", "0a856697f40bac32d4243320d95c8e614e82c7d1", "0aa4cacf6a60125961f1dac4afca63a8dcf706f9", "663e064469ad91e6bda345d216504b4c868f537b", "1927de3ddaddbc3bf53257cc5ee6e8ba127819a0", "238dd4c308c1ee6ef3809fdf15fdc87be74bdbc8", "231ba17921ebd80e95771e28dfb5082e169d5a53", "0be133617dfb5fe8fe35cf7cdfb7c2f0c3e672cd", "9b3cd6d6ae6b071e69d4e2510d73384258811ef6", "908f7931de8768786d9ef7d64f5a8156860709dd", "8c9a91b774fcc126db7ce7c67bd97d1d16143932" ], "paperAbstract": "Current IaaS clouds provide performance guarantee on CPU and memory but no quantitative network performance for VM instances. Our measurements from three production IaaS clouds show that for the VMs with same CPU and memory, or similar pricing, the difference in bandwidth performance can be as much as 16\u00d7, which reveals a severe price-performance anomaly due to a lack of pricing for bandwidth guarantee. Considering the low network utilization in cloud-scale datacenters, we address this by presenting SoftBW, a system that enables pricing bandwidth with over commitment on bandwidth guarantee. SoftBW leverages usage-based charging to guarantee price-performance consistency among tenants, and implements a fulfillment based scheduling to provide bandwidth/fairness guarantee under bandwidth over commitment. Both testbed experiments and large-scale simulation results validate SoftBW\u2019s ability of providing efficient bandwidth guarantee, and show that by using bandwidth over commitment, SoftBW increases 3.9\u00d7 network utilization while incurring less than 5% guarantee failure.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/guo-jian", "http://www.cs.cuhk.hk/~cslui/PUBLICATION/ATC-17-a.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-guo.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_guo.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9466/35010bb9a0a2ff2792d535343694901b1c29.pdf", "s2Url": "https://semanticscholar.org/paper/06885ce9502460e231c240bae121554cfd5209ee", "sources": [ "DBLP" ], "title": "Pricing Intra-Datacenter Networks with Over-Committed Bandwidth Guarantee", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "069794b44b81c8b0651c8ea39594a91cd6081142": { "authors": [ { "ids": [ "3409732" ], "name": "Ariel Eizenberg" }, { "ids": [ "3199368" ], "name": "Yuanfeng Peng" }, { "ids": [ "17785358" ], "name": "Toma Pigli" }, { "ids": [ "2686490" ], "name": "William Mansky" }, { "ids": [ "1739688" ], "name": "Joseph Devietti" } ], "doi": "10.1145/3062341.3062342", "doiUrl": "https://doi.org/10.1145/3062341.3062342", "entities": [ "CUDA", "Concurrency (computer science)", "Correctness (computer science)", "Graphics processing unit", "High- and low-level", "Parallel computing", "Software bug" ], "id": "069794b44b81c8b0651c8ea39594a91cd6081142", "inCitations": [ "3c874433b330676b693ffe45fedd9d2d10b0b767" ], "journalName": "", "journalPages": "126-140", "journalVolume": "", "outCitations": [ "d9fe45f6ee750d8aad7e79302554497bebe8a92d", "109c4450b7fdbf5c760bc8ee5c28bec3d1186c0e", "59857e2857df6d69a12e3cbaa720648b5c299159", "00c3b08c4e1dbfa080b6d3c422fa0da0131a743c", "d2949f47e0b007d632aae21407edf5e5760413fb", "01d271d173eaa7b20d187b0938e70ab58493ff6a", "2927360763d20dbd3678b83d4df22891d86b2aeb", "1799668a2d5a08f48c1e768c4cde957d93c7bfc1", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "019bdc1544839acf14f49971f6b3661dd6b497c7", "4308295a2eaef30be423520918ad224dc2f3ffe2", "289c67f9976d72b6d9aeb64a4e9743538bf9b4be", "8747dabeaeda342fbac4ebff628c574be4c53826", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "47cdefebd5534d1d8c5d0f8061b482dbcd656e63", "0958a63d9c6238b38377f076b487c413bc8642c1", "0a11aaef4a109166db76ad2cbfaa78b240548354", "c2c42d4d5c8a02b6d39ffea8cdba44a0453dd12f", "00156e79606084497789662dfaf59c3b54a10722", "86ed165adcfd254b511ff1bbb912cad65d45f0d6", "015f9639075216603db029d2273d17f429916a2e", "5009b9a830ce61ba97305f04c87ae35deac66b67", "2f2daefa4a6d1c722dcaf7110f8c29f779435d99", "8eef8d67441a0a6d83c98d8b4ff3250a4f59e0e1", "7b93d3e42a7498e4de67a76b8f6861875fa74d79", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "430f66819f758f6a84aaac4b5f516f9ee4861482", "1769e6ed89d2314bb25e681f1e006b3585cb4754", "25010bbdf127101e1fd5adea5e15f45765b87b0f", "66bf50846bf7713306ed8274fd9702fab4616dbb", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "0a44e8cd34a110ec4ed7221b0431694172eadda8", "f4721973c7b55d091c86e1f13b44726987f8732a", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "8143a43afeb2d50ba46ee98a41db8f4430c1f5c8", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "a23ee6da0dc85cee92f03de621096bb79b692d35", "4aa993db77b888a02084a542a929b1a81a8d03f6" ], "paperAbstract": "GPU programming models enable and encourage massively parallel programming with over a million threads, requiring extreme parallelism to achieve good performance. Massive parallelism brings significant correctness challenges by increasing the possibility for bugs as the number of thread interleavings balloons. Conventional dynamic safety analyses struggle to run at this scale. \n We present BARRACUDA, a concurrency bug detector for GPU programs written in Nvidia’s CUDA language. BARRACUDA handles a wider range of parallelism constructs than previous work, including branch operations, low-level atomics and memory fences, which allows BARRACUDA to detect new classes of concurrency bugs. BARRACUDA operates at the binary level for increased compatibility with existing code, leveraging a new binary instrumentation framework that is extensible to other dynamic analyses. BARRACUDA incorporates a number of novel optimizations that are crucial for scaling concurrency bug detection to over a million threads.", "pdfUrls": [ "http://www.cs.princeton.edu/~wmansky/barracuda.pdf", "http://doi.acm.org/10.1145/3062341.3062342" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/069794b44b81c8b0651c8ea39594a91cd6081142", "sources": [ "DBLP" ], "title": "BARRACUDA: binary-level analysis of runtime RAces in CUDA programs", "venue": "PLDI", "year": 2017 }, "06b5973d2fde715a6c5970ebac373a4009dd4963": { "authors": [ { "ids": [ "2615278" ], "name": "Pavlos Katsogridakis" }, { "ids": [ "2740159" ], "name": "Sofia Papagiannaki" }, { "ids": [ "2385564" ], "name": "Polyvios Pratikakis" } ], "doi": "10.1007/978-3-319-64203-1_21", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_21", "entities": [ "Algorithm", "Apache Spark", "Computation", "Distributed Interactive Simulation", "Fault tolerance", "Hierarchical and recursive queries in SQL", "MapReduce", "Programmer", "Programming model", "Recursion", "Scalability", "Scheduling (computing)", "Synchronization (computer science)" ], "id": "06b5973d2fde715a6c5970ebac373a4009dd4963", "inCitations": [], "journalName": "", "journalPages": "289-302", "journalVolume": "", "outCitations": [ "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "fce7fd98928ab9bf3e4e919e108c48fc1040f569", "0608d9937c074520cdc93cc444cc1c77039c5332", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "243230d5b623f79c22750b42447e902ab07a2db9", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "0a12a179bebdf4bb69d692a1127795b3f536270b", "89aed5a5da5c7e34f8ff0bc11d5704ec49f266d9", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "0e23117148029fbef47d1eed869c7952546e53aa", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae" ], "paperAbstract": "MapReduce environments offer great scalability by restricting the programming model to only map and reduce operators. This abstraction simplifies many difficult problems occuring in generic dis-ion simplifies many difficult problems occuring in generic distributed computations like fault tolerance and synchronization, hiding them from the programmer. There are, however, algorithms that cannot be easily or efficiently expressed in MapReduce, such as recursive functions. In this paper we extend the Apache Spark runtime so that it can support recursive queries. We also introduce a new parallel and more lightweight scheduling mechanism, ideal for scheduling a very large set of tiny tasks. We implemented the aformentioned scheduler and found that it simplifies the code for recursive computation and can perform up to 2.1\u00d7 faster than the default Spark scheduler.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06b5973d2fde715a6c5970ebac373a4009dd4963", "sources": [ "DBLP" ], "title": "Execution of Recursive Queries in Apache Spark", "venue": "Euro-Par", "year": 2017 }, "06c4652c6333baca654be06c26da9f940ed5b53b": { "authors": [ { "ids": [ "1723309" ], "name": "Yue Wang" }, { "ids": [ "2687810" ], "name": "Yeye He" } ], "doi": "10.1145/3035918.3064010", "doiUrl": "https://doi.org/10.1145/3035918.3064010", "entities": [ "Column (database)", "Display resolution", "Functional dependency", "Software repository", "Spreadsheet", "Text corpus" ], "id": "06c4652c6333baca654be06c26da9f940ed5b53b", "inCitations": [ "1a691ae847155cfb1908711664dd14f7f3c1f84c", "0da82e643ffb634681154e73b5087adb655bad8c" ], "journalName": "", "journalPages": "1117-1132", "journalVolume": "", "outCitations": [ "1b189d721adbf1d2bab93b7ed6ce826e188b0b99", "06f31a1a8be734c6dadc4b6b7074c51ffd1f1109", "7de3221dafddc78a38cafe8d50d0929fe6994b03", "3965680854b5d503c1b4ba079ec010c5d3ebe1ef", "45595d87dbe94105595de857bced011a20137999", "8b052b4278020fb96354c6977988953e09eff05a", "283bd5f985d27a2790b79479f2907f0065c4eb47", "ab50b1801ec9585642f320e12b1488599bff030e", "2a01665351592b6c113510c6015b9d365475bbb4", "5a3c095e943c4444ad70a5ceb20cefa3f1ab9d54", "952dd3540e2d1ccee59bc0971c897166acb5048e", "0df69e1454c0c3a4c90de32bccc3771acb5fbc67", "022e367b68958d6818e6f6b688970caec7155ea6", "3b76b68c44e4d9f875e2aaa95eae689bbc67396c", "33eec6aac58cfa7743c2ad2a7996a5a345cf2610", "b2ec74c72d99b755325dc470dec2949d69cd4d57", "2454c84c71d282b95bc99d05adda914361905ffe", "530f4487992599b3598bd4bb45d74de8436fc3fc", "46e286af9798472955ec2258e464dc7e3ff97936", "5afa5c75d5b11ddecaee5594e502ae2c04ee4f2b", "bbcf76a84ee10348442ccb50ccdbfb288ede5cbb", "6e0b2b32aa3eed696aa868386d485321a63ccebb", "67c42a30220edacf848e58c29a624234a0c37a6e", "55d176b92d5740d039e1c8ebbad025d460de9ae0", "e4350656463922d21177251458d25cef68a6763d", "17ffd84480267785c6a9987211a8a86a58cea1a9", "1f990d98dcc3941f01bd6bb5405fbda37e00dd6a", "372942ff6ede5a66b0600a852c9902fb8cdd5fa1", "6350f382e814d4b2f888f5a2a8bd6dd0e9362d81", "ce18a622d6cfc38ff739f850f01a750d33587fd4", "1b4474be934290872e9c03fc084d940e9a51a360", "00a3f6924f90fcd77e6e7e6534b957a75d0ced07", "5eafdf2477441c1da7cee5f2a6982b1af9abc6f5", "1976c9eeccc7115d18a04f1e7fb5145db6b96002" ], "paperAbstract": "Mapping relationships, such as (country, country-code) or (company, stock-ticker), are versatile data assets for an array of applications in data cleaning and data integration like auto-correction and auto-join. However, today there are no good repositories of mapping tables that can enable these intelligent applications.\n Given a corpus of tables such as web tables or spreadsheet tables, we observe that values of these mappings often exist in pairs of columns in same tables. Motivated by their broad applicability, we study the problem of synthesizing mapping relationships using a large table corpus. Our synthesis process leverages compatibility of tables based on co-occurrence statistics, as well as constraints such as functional dependency. Experiment results using web tables and enterprise spreadsheets suggest that the proposed approach can produce high quality mappings.", "pdfUrls": [ "https://arxiv.org/pdf/1705.09276v2.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/mapping-synthesis.pdf", "https://arxiv.org/pdf/1705.09276v1.pdf", "http://doi.acm.org/10.1145/3035918.3064010", "http://arxiv.org/abs/1705.09276" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06c4652c6333baca654be06c26da9f940ed5b53b", "sources": [ "DBLP" ], "title": "Synthesizing Mapping Relationships Using Table Corpus", "venue": "SIGMOD Conference", "year": 2017 }, "06e97b3ea02af7274d659a420f94bc5da5e6d541": { "authors": [ { "ids": [ "38568238" ], "name": "Jayesh Gaur" }, { "ids": [ "6226754" ], "name": "Mainak Chaudhuri" }, { "ids": [ "2256065" ], "name": "Pradeep Ramachandran" }, { "ids": [ "1706165" ], "name": "Sreenivas Subramoney" } ], "doi": "10.1109/HPCA.2017.46", "doiUrl": "https://doi.org/10.1109/HPCA.2017.46", "entities": [ "Algorithm", "Best, worst and average case", "Byte", "Cache (computing)", "Central processing unit", "Computer data storage", "Die (integrated circuit)", "Double data rate", "Dynamic random-access memory", "EDRAM", "High Bandwidth Memory", "ICL Distributed Array Processor", "Memory hierarchy", "Random-access memory", "Simulation", "Static random-access memory" ], "id": "06e97b3ea02af7274d659a420f94bc5da5e6d541", "inCitations": [ "88824f4400bf03caed2f99879e68f3543b214c92", "bac4a0e99c98fac6fa231e9ed21e8f643674200a" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "13-24", "journalVolume": "", "outCitations": [ "0cbc3b849eb23d23654c882c70cb65b19f99c011", "ffae769267e77e8b025c13e7a9b4f2c559e7593e", "02ffe89d82a5b727f8c8d259474dd972230b0f98", "43260df86b2aaa20824d73eff48e0b49162689cb", "786e1b83380c8953dc65de35d2df9bf495755d08", "417ab9b8b003982222017ef585e19680366609f3", "39e83bc7d1dd445a879c4ed7a50cb787103d1c4f", "2046f7c54470e7617269cc954aab877a4691c241", "1495c420afc2e26b649e3254840e111dfc928b0d", "18633256bb17ba0744518479c0752ca87f0d03c6", "36de396ee9d1c9991e44c01be35e5206d79c3328", "5bf1fdb6dc950537962eafe888259272eed67737", "22f00894ce6f678a9a701ab0b010d05a2d38e4bf", "95390b3c703ae99dfe98758f5f008a2a90bb0bb5", "0891f725910e268c00ea0ba84f08c268db91fe4f", "70d44cb49dc7aa31d90e2c9c6100b1a249e42136", "3216ab441ef92aedededd7c72dcacc866423ce69", "22b4811bb8265e84d53c62a842cac10dda15f6af", "0dc38d3afb68f617e23eced7ce2994a0a82feb11", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "cb111d5c350431b53a9a217cefb5d1701ef46e46", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "98ab001452b8392bb0d0b2677cfb91281bad7708", "234049a484dee54d3f9555fe7f50805e783ec432", "f01a27fe52fd71e853ad823e38835bbf8e7269e7", "48fc9415566ca5bc80e6519d50f4d78fe43a383b", "3000b16ee204ffed4c602ed6f93fc7a692850b6e", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "745d50eb6b74b191191ce93c6ef1ec9760ce0cb0", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "90851f7e712bc8a2a201c0609fdf53520779d1f8", "1154b2fd6fb913b02eb6f64f5287a6b75a506e64", "c07ebd47e86f0ece88b28c57d79ed7544f5a30f0", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "92e9e22f2201ee11f244cde9f37fc8954bc3a6a0", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "1e1b0f0411e35a7d8edacc3cd555e8a347674ef1", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "513e0a61ad2a55fc5f87606cfa55e3590f80ecaf" ], "paperAbstract": "The memory wall continues to be a major performance bottleneck. While small on-die caches have been effective so far in hiding this bottleneck, the ever-increasing footprint of modern applications renders such caches ineffective. Recent advances in memory technologies like embedded DRAM (eDRAM) and High Bandwidth Memory (HBM) have enabled the integration of large memories on the CPU package as an additional source of bandwidth other than the DDR main memory. Because of limited capacity, these memories are typically implemented as a memory-side cache. Driven by traditional wisdom, many of the optimizations that target improving system performance have been tried to maximize the hit rate of the memory-side cache. A higher hit rate enables better utilization of the cache, and is therefore believed to result in higher performance. In this paper, we challenge this traditional wisdom and present DAP, a Dynamic Access Partitioning algorithm that sacrifices cache hit rates to exploit under-utilized bandwidth available at main memory. DAP achieves a near-optimal bandwidth partitioning between the memory-side cache and main memory by using a light-weight learning mechanism that needs just sixteen bytes of additional hardware. Simulation results show a 13% average performance gain when DAP is implemented on top of a die-stacked memory-side DRAM cache. We also show that DAP delivers large performance benefits across different implementations, bandwidth points, and capacity points of the memory-side cache, making it a valuable addition to any current or future systems based on multiple heterogeneous bandwidth sources beyond the on-chip SRAM cache hierarchy.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.46", "http://www.cse.iitk.ac.in/users/mainakc/pub/hpca2017dap.pdf", "http://www.cse.iitk.ac.in/users/mainakc/pub/hpca2017dap.pdf/", "https://www.cse.iitk.ac.in/users/mainakc/pub/hpca2017dap.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06e97b3ea02af7274d659a420f94bc5da5e6d541", "sources": [ "DBLP" ], "title": "Near-Optimal Access Partitioning for Memory Hierarchies with Multiple Heterogeneous Bandwidth Sources", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "06fd348d9388abfd880d3f207e3664e3180857cb": { "authors": [ { "ids": [ "2993481" ], "name": "Srikanth Sundaresan" }, { "ids": [ "1771164" ], "name": "Mark Allman" }, { "ids": [ "1718655" ], "name": "Amogh Dhamdhere" }, { "ids": [ "10179735" ], "name": "Kimberly C. Claffy" } ], "doi": "10.1145/3131365.3131381", "doiUrl": "https://doi.org/10.1145/3131365.3131381", "entities": [ "Experiment", "Interconnection", "Last mile", "Maxima and minima", "Multitier architecture", "Network congestion", "Network switch", "TCP congestion control", "Throughput", "Type signature" ], "id": "06fd348d9388abfd880d3f207e3664e3180857cb", "inCitations": [ "4c49270fefd4d359e3ee76e1a6bccb94283d51ff" ], "journalName": "", "journalPages": "64-77", "journalVolume": "", "outCitations": [ "83e16961840d660945295918b96e840829fbd984", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "bf0da8f3a47fffcba82cf9e4f57c43521b8effd3", "605543e3bad4dcc65fdc711fa16c0a22d4ddfd95", "3318f54d21edb825cb223e2fd88754c61b362e4c", "7b49ef639ecc802b7ccb2933a8eda5ddb21c8be1", "05a800a0cd33048d6e5ec59efc2532fa86071755", "b3afefa8e89adda9112724015142e99daeabf9e9", "4c49270fefd4d359e3ee76e1a6bccb94283d51ff", "7043703fe7d4efc25764e03cb4bea46ceb0ab353", "5c8ee2b6d8ff485bb9ce1ef525693a2e728b36c3", "5d4445bfa05fc47466dbbc950717a181613e87a0", "21a378262cb3402ab1f4b11cb20a4687c28bc052", "20b7cb4c512556ce597ce99cf11712a2b9409262", "6d2337411e0bdf7fe19a0089cdc1f8a754b11305", "2be5a7ddb866e22bacb7982a9a73188cf5564f8d", "097ca8b402d3eb1ee125396dc2e36b1d7713a5ea", "25ded9f81378f6b85daf5a70c85bbadfb84ebc3d", "a01117f34d8f692e948f20080a3080096ec144ae", "d09aa6f9d2df3e20441f80914947e6aae60a016b", "7a0e7065d521e31e74fc367597db41b62b19a789", "16eedc8ccfda1a7e213097ada3c234829488add5", "11945a1b14a40206e3e74956c1911d10207dcfe9", "04a32d03a605e7b27c29faebb6a5079689c04bc8" ], "paperAbstract": "We develop and validate Internet path measurement techniques to distinguish congestion experienced when a flow self-induces congestion in the path from when a flow is affected by an already congested path. One application of this technique is for speed tests, when the user is affected by congestion either in the last mile or in an interconnect link. This difference is important because in the latter case, the user is constrained by their service plan (i.e., what they are paying for), and in the former case, they are constrained by forces outside of their control. We exploit TCP congestion control dynamics to distinguish these cases for Internet paths that are predominantly TCP traffic. In TCP terms, we re-articulate the question: was a TCP flow bottlenecked by an already congested (possibly interconnect) link, or did it induce congestion in an otherwise idle (possibly a last-mile) link?\n TCP congestion control affects the round-trip time (RTT) of packets within the flow (i.e., the flow RTT): an endpoint sends packets at higher throughput, increasing the occupancy of the bottleneck buffer, thereby increasing the RTT of packets in the flow. We show that two simple, statistical metrics derived from the flow RTT during the slow start period---its coefficient of variation, and the normalized difference between the maximum and minimum RTT---can robustly identify which type of congestion the flow encounters. We use extensive controlled experiments to demonstrate that our technique works with up to 90% accuracy. We also evaluate our techniques using two unique real-world datasets of TCP throughput measurements using Measurement Lab data and the Ark platform. We find up to 99% accuracy in detecting self-induced congestion, and up to 85% accuracy in detecting external congestion. Our results can benefit regulators of interconnection markets, content providers trying to improve customer service, and users trying to understand whether poor performance is something they can fix by upgrading their service tier.", "pdfUrls": [ "http://www.icir.org/mallman/pubs/SDAC17/SDAC17.pdf", "http://doi.acm.org/10.1145/3131365.3131381", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final99.pdf", "http://www.caida.org/publications/papers/2017/tcp_congestion_signatures/tcp_congestion_signatures.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/06fd348d9388abfd880d3f207e3664e3180857cb", "sources": [ "DBLP" ], "title": "TCP congestion signatures", "venue": "IMC", "year": 2017 }, "070771bdc55490cdcdadc63f815faf0cf23224fb": { "authors": [ { "ids": [ "34710867" ], "name": "Lun Liu" }, { "ids": [ "3105241" ], "name": "Todd D. Millstein" }, { "ids": [ "1702346" ], "name": "Madan Musuvathi" } ], "doi": "10.1145/3133873", "doiUrl": "https://doi.org/10.1145/3133873", "entities": [ "A* search algorithm", "Apache Spark", "Baseline (configuration management)", "Benchmark (computing)", "Big data", "Compiler", "Consistency model", "Java", "Java HotSpot Virtual Machine", "Java virtual machine", "Library", "Machine learning", "Memory model (programming)", "Optimizing compiler", "Overhead projector", "Programmer", "Programming language", "Sequential consistency", "Server (computing)", "Server-side", "Shared memory", "Thread (computing)", "X86" ], "id": "070771bdc55490cdcdadc63f815faf0cf23224fb", "inCitations": [], "journalName": "PACMPL", "journalPages": "49:1-49:25", "journalVolume": "1", "outCitations": [ "3302da00188f85f87710d62a743c98d7fbc1e437", "3d802a3254a1a532f080bc8e713d970ea8796db5", "5eef609f21fc9327e551ab40425f7f1715c3e200", "4e624272a61a228bcf9565b0e48e86ae3936db80", "129aea79d23b3295999332bf336c4aa8804ebfc5", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "1820869030fca660212fb7a6449b6ad1aa99d9db", "da68320330b849d7a549db022a2400a7c6a711cf", "13bc2fe34d03c49cfcb80a814c046b8ad8895deb", "1066cce77abb53eea67bfcc1d2dee8e7f4e3ebcf", "33817456b5263fab036210ff1245dcc96f863101", "5e51e70eb2e423988cf73262d9cb3adf72f5b6f1", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "3eae0271717f6b4d65024abf04e5d98aef41d748", "3784b73a1f392160523400ec0309191c0a96d86f", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "362e9b5afe5934a9d8046d758c17c5bada0652b3", "66dada0d684ddc29a3156adfd3b2e97be5c44943", "3a66a682ee36cde0738824b152a51df2ccbb80fd", "833dd2477b9f783434121f9d07a91349fad4d5d4", "254fe5dec3f90810a89ea02ae66e8f1d60b5054a", "2e3058c0c279a5ff59b7fd65b7f73fab2fe0d0b3", "52eee82594982f3c6ba7f0385a78002868fa30cb", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "0ed62848d5c9e01f692c0c0b3851848ac7bb0764", "26fff9101c4499511d44e140570983d2a655e6be", "0eee11504314fb4c13da25773edfc3ae061e8fc2", "09cb251072ef19e125ec5d94de5777584af68db5", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "bbb9c3119edd9daa414fd8f2df5072587bfa3462", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "19580e4beb903595082ced092c0bc5ba0a2e7bac", "0aff06e25dd081211e39771f2aeb41aff7b2fcd6", "00a9ba0063d34ec56792849a67ef57b4601becbb", "3371781698dbd3d3e78477af7528530024b828f8", "2701419224edb78e2f34f1470115be290097ba8b", "012f8e43e7973c8fad3c9a48b4dd7be773c770d1", "2cdeba58517d6b3e0dc3e9a4998c141f08f9f11c", "67c64f4e676e1996cca7fd0ec50e453d6c698814", "4a3f0c1b983315c863dd6f4820dc147b50ab6109", "c11db6ccd0c480c47c97d3c3beadb1a90f0a8e2b", "9a95cb1f79a8078e47dfb17f695952a6bea92fb5" ], "paperAbstract": "A *memory consistency model* (or simply *memory model*) defines the possible values that a shared-memory read may return in a multithreaded programming language. Choosing a memory model involves an inherent performance-programmability tradeoff. The Java language has adopted a *relaxed* (or *weak*) memory model that is designed to admit most traditional compiler optimizations and obviate the need for hardware fences on most shared-memory accesses. The downside, however, is that programmers are exposed to a complex and unintuitive semantics and must carefully declare certain variables as `volatile` in order to enforce program orderings that are necessary for proper behavior. \n This paper proposes a simpler and stronger memory model for Java through a conceptually small change: *every* variable has `volatile` semantics by default, but the language allows a programmer to tag certain variables, methods, or classes as `relaxed` and provides the current Java semantics for these portions of code. This *volatile-by-default* semantics provides *sequential consistency* (SC) for all programs by default. At the same time, expert programmers retain the freedom to build performance-critical libraries that violate the SC semantics. \n At the outset, it is unclear if the `volatile`-by-default semantics is practical for Java, given the cost of memory fences on today's hardware platforms. The core contribution of this paper is to demonstrate, through comprehensive empirical evaluation, that the `volatile`-by-default semantics is arguably acceptable for a predominant use case for Java today -- server-side applications running on Intel x86 architectures. We present VBD-HotSpot, a modification to Oracle's widely used HotSpot JVM that implements the `volatile`-by-default semantics for x86. To our knowledge VBD-HotSpot is the first implementation of SC for Java in the context of a modern JVM. VBD-HotSpot incurs an average overhead versus the baseline HotSpot JVM of 28% for the Da Capo benchmarks, which is significant though perhaps less than commonly assumed. Further, VBD-HotSpot incurs average overheads of 12% and 19% respectively on standard benchmark suites for big-data analytics and machine learning in the widely used Spark framework.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133873", "http://web.cs.ucla.edu/~todd/research/oopsla17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/070771bdc55490cdcdadc63f815faf0cf23224fb", "sources": [ "DBLP" ], "title": "A volatile-by-default JVM for server applications", "venue": "PACMPL", "year": 2017 }, "071341873bf6755131dae4347a09996b29852c90": { "authors": [ { "ids": [ "24624948" ], "name": "Charalampos Stylianopoulos" }, { "ids": [ "2088512" ], "name": "Magnus Almgren" }, { "ids": [ "2626539" ], "name": "Olaf Landsiedel" }, { "ids": [ "1752071" ], "name": "Marina Papatriantafilou" } ], "doi": "10.1109/ICPP.2017.56", "doiUrl": "https://doi.org/10.1109/ICPP.2017.56", "entities": [ "Aho\u2013Corasick algorithm", "Algorithm", "Automatic vectorization", "Central processing unit", "Cloud computing", "Firewall (computing)", "Haswell (microarchitecture)", "Intrusion detection system", "Laptop", "Locality of reference", "Network security", "Network traffic control", "Pattern matching", "SIMD", "Speedup", "Time complexity", "Web server", "Xeon Phi" ], "id": "071341873bf6755131dae4347a09996b29852c90", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "472-482", "journalVolume": "", "outCitations": [ "7f864cfdde0a6f92e90ac53a73079f4bea884d85", "3d940be9caba441c327991febf19bed1d896cc89", "f311412463c33223947df56ae04644e8c68cdd5d", "b85df0212d624cbcf52108969ba722fe5d24cb2e", "80527e7595530951081494d1b98f3f13da3033a2", "d059a2ade9d34a97846ec63f30caf80014088582", "7ad9b11b446d29006ed857b0f13323f6875d601b", "3f82fb3d54b2baaa3b18ae1e0953b16760641b20", "24251f02c34f32b1dd96572a1d984c4463a26a10", "846fcf30dc75f04886092891e754791e9704f69f", "784c88bfd72ae75e21942e1bc74bd840e25fb2aa", "0cff369abc1673194ea1e61999ad6c8cd1c8bc30", "eb7539d4a06a027dd8cbabe8f28190de60b28629", "1e5027ff533d31513b667cec06f6a650882e1ee0", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "6a5e0414a01c19da4a48ac4018b5687d782ad25c", "d985c93917cd0a145451ec2c02c9e25d988ac368", "b4bf6a0a782450383bca2b91eac34a64a083acb5", "210da4f92fe3a29656165d895d44c71aad3f1b79", "c0b438eee7bd423606da9335229602b9c77c10d4", "752a2efb4cc8a40e86a442b45e8f675f9ff8d224", "5b6705672ecd3281fd1736bfa93f1c153f4c86c0", "3547ac839d02f6efe3f6f76a8289738a22528442" ], "paperAbstract": "Pattern matching is a key building block of Intrusion Detection Systems and firewalls, which are deployed nowadays on commodity systems from laptops to massive web servers in the cloud. In fact, pattern matching is one of their most computationally intensive parts and a bottleneck to their performance. In Network Intrusion Detection, for example, pattern matching algorithms handle thousands of patterns and contribute to more than 70% of the total running time of the system.In this paper, we introduce efficient algorithmic designs for multiple pattern matching which (a) ensure cache locality and (b) utilize modern SIMD instructions. We first identify properties of pattern matching that make it fit for vectorization and show how to use them in the algorithmic design. Second, we build on an earlier, cache-aware algorithmic design and we show how cache-locality combined with SIMD gather instructions, introduced in 2013 to Intel's family of processors, can be applied to pattern matching. We evaluate our algorithmic design with open data sets of real-world network traffic:Our results on two different platforms, Haswell and Xeon-Phi, show a speedup of 1.8x and 3.6x, respectively, over Direct Filter Classification (DFC), a recently proposed algorithm by Choi et al. for pattern matching exploiting cache locality, and a speedup of more than 2.3x over Aho-Corasick, a widely used algorithm in today's Intrusion Detection Systems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.56" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/071341873bf6755131dae4347a09996b29852c90", "sources": [ "DBLP" ], "title": "Multiple Pattern Matching for Network Security Applications: Acceleration through Vectorization", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "0713ca8723993973640da3ad3c68074547ebd673": { "authors": [ { "ids": [ "4367659" ], "name": "Karthik Rao" }, { "ids": [ "1715001" ], "name": "Jun Wang" }, { "ids": [ "2933334" ], "name": "Sudhakar Yalamanchili" }, { "ids": [ "2279524" ], "name": "Yorai Wardi" }, { "ids": [ "3210669" ], "name": "Handong Ye" } ], "doi": "10.1109/HPCA.2017.32", "doiUrl": "https://doi.org/10.1109/HPCA.2017.32", "entities": [ "Algorithm", "Android", "Best, worst and average case", "Central processing unit", "Control theory", "Dynamic voltage scaling", "Memory bandwidth", "Mobile device", "Operating system", "Power management", "Smartphone", "System configuration" ], "id": "0713ca8723993973640da3ad3c68074547ebd673", "inCitations": [ "a447e9372fbe9abaeb88a8d83856e81c0a0fd343", "a7a971a51e10a0f5cdc2d2ee4e1d5c735f6d86c2" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "169-180", "journalVolume": "", "outCitations": [ "d320abd5c9d10abdb40ee24925ce757df7ae807d", "04a8986ea5df3d6c29fb21627ac1f51ccf68eb15", "01debf23d55fcd72ff2d78f980c5c73a79b90102", "35fb4a067532c90b030bb19a94857f891dff28d5", "9442ed36516eac3af26bf515c659e4cacf999dee", "6252dda1eda881986d91ef19f5f1f46750679ba6", "9d5862f2251d6ad666fe8582bbc5d1fdf18c1a45", "1fe0f151c9e693431d15193e6f47ad81d8345dc2", "53d2bf89576e95387a1004842722bd721d675c18", "8f577a032b08c61ee5ce858062bfa051c8194b5f", "2fbf9ab58d81b2a45a5c52803e02c1a2c18add3d", "7cb133f451aa5f00a21ff19941d1417ba77fa6d3", "24208da401b10445ae684c20ef1bbc4428eb131d", "d01a01e0ff9f730517231f9d2aad201e14080795", "03385e04bf3df318ee9a94237e6b5e96b8663a0d", "15860f9f774f19f245f016d9cf479222e4f9a6ba", "f08f79290a79800969a33ab209bd20931160557a", "961c1840153791e10376630d6a428300389e98de", "649db1efb9f7415f6c0f1247bcb3734ad62e4442", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3" ], "paperAbstract": "Energy management is a key issue for mobile devices. On current Android devices, power management relies heavily on OS modules known as governors. These modules are created for various hardware components, including the CPU, to support DVFS. They implement algorithms that attempt to balance performance and power consumption. In this paper we make the observation that the existing governors are (1) general-purpose by nature (2) focused on power reduction and (3) are not energy-optimal for many applications. We thus establish the need for an application-specific approach that could overcome these drawbacks and provide higher energy efficiency for suitable applications. We also show that existing methods manage power and performance in an independent and isolated fashion and that co-ordinated control of multiple components can save more energy. In addition, we note that on mobile devices, energy savings cannot be achieved at the expense of performance. Consequently, we propose a solution that minimizes energy consumption of specific applications while maintaining a user-specified performance target. Our solution consists of two stages: (1) offline profiling and (2) online controlling. Utilizing the offline profiling data of the target application, our control theory based online controller dynamically selects the optimal system configuration (in this paper, combination of CPU frequency and memory bandwidth) for the application, while it is running. Our energy management solution is tested on a Nexus 6 smartphone with 6 real-world applications. We achieve 4 - 31% better energy than default governors with a worst case performance loss of", "pdfUrls": [ "http://casl.gatech.edu/wp-content/uploads/2016/11/enopt.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0713ca8723993973640da3ad3c68074547ebd673", "sources": [ "DBLP" ], "title": "Application-Specific Performance-Aware Energy Optimization on Android Mobile Devices", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "071564baef078867847fc54a3a0b50dd22d29d62": { "authors": [ { "ids": [ "40016363" ], "name": "Hasan Hassan" }, { "ids": [ "1920997" ], "name": "Nandita Vijaykumar" }, { "ids": [ "2781428" ], "name": "Samira Manabi Khan" }, { "ids": [ "33801185" ], "name": "Saugata Ghose" }, { "ids": [ "23008160" ], "name": "Kevin K. Chang" }, { "ids": [ "3257164" ], "name": "Gennady Pekhimenko" }, { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "1741511" ], "name": "Oguz Ergin" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1109/HPCA.2017.62", "doiUrl": "https://doi.org/10.1109/HPCA.2017.62", "entities": [ "Computer data storage", "Data integrity", "Double data rate", "Dynamic random-access memory", "Field-programmable gate array", "High- and low-level", "High-level programming language", "Memory controller", "Memory module", "Non-volatile memory", "Observable", "Open-source software", "Systems design", "Utility", "Volatile memory" ], "id": "071564baef078867847fc54a3a0b50dd22d29d62", "inCitations": [ "60aa9510638d4d9739ebfc3a0042187988482346", "00cc482570d739e7b733f45b6f8f1836b24056bd", "2976932bec7334a150e1bb6916b7564bdaa864ea", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "42f7ade4ab1ee6941da178b53712bb7ef7822815", "1f80d8bdf5a0a1787a36ccfc4929f71d14a94e57", "b06b556169d8b55d6d8058164dd599c67c50c430", "6ff7fd341b0a4ab4d919f8ce3b35d447668e80ae", "983e87929eeb3f77c2ddb02d17d6efe978c80667", "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "0f41b9c0900b1c17b63d3d59bd4c334f7cf736af", "5c478e5c774eb3cf71e446e2c9eb2166ca032b28", "0b393cab00401cb971cf71970e00c2767f881f75", "2aa997522d212ab74163b986be211ffc7f3e9e34", "447f492235719d7c2b061b95d818f928d6cbdac5", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "042855085a52934e5599e02555071bb222f6a000" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "241-252", "journalVolume": "", "outCitations": [ "77f826132cf09ac91ea9c859387a8d52221a019a", "8d71fb5efe95801b31d65366ff1ce8c01525e493", "403966143ae4ded89f519214124761d667821a11", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "1da48d8173e34eb7825870248c4c12b6bbe7d9c1", "081c32609be4adcf16fe6f3bd6ae35ce2622edaf", "170fc81c89a7fa5541d078b8400529fdea94af18", "831b348bbabaf2fbab1700de982440de11bedf72", "710b3d324b07197a705683af18fc417ef712d042", "1be96030c042ff6b5bbe05bf0fd86f5f9a4d27dc", "e0a4d1dbd9d459f3613be9da56243d72c40e152e", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "3420b736c4a182cb72c90f3649d8475e196d0401", "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "5a04b332441e2ff025313bfd303383e13050a274", "06d5e64635ff941d08cf833706554c493deb7acb", "fae8a785260ac5c34be82fca92a4abef4c30d655", "7815c4243d581d0f96d0dac2c6e90e01d1ce94a3", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "eef1f3f44d249a37c8382f77ed9770b62e8bc158", "9341125876271d46cc25f86dac93f25acb343e8d", "0ffbd9cd0fe4fa005fc9b6eea24ecf9bff67c806", "3f310c82a8a6f5a6a02841d4c5484873cb9530c6", "1c32ad0a42109fab826eb3054df7cfc33b424125", "37b5850e3e75a3462f3991491ca26674925f233b", "07e01e6ea72ef3e0cca2bf3316de05546285c8b6", "084037d504c95c1af6fb1398179f8495618b72d7", "33cb4013c7cc36a173e7fb4e541133056e8e43cf", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "2612541a89857949bc512b6fb2ad7f0c153cb97c", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "8b4682a90b39d0b95d92098be48f05687cb23086", "3c89345bb88a440096f7a057c28857cc4baf3695", "0eacd1b47786f740b723d906d46e160f143c0378", "bb117349638a1d63be1b105bba0e152bd6c031f8", "ab6888a1b024d109c768f81b49c77b585efc975a", "8e53bdcc9f1d059a0649c97a05a9e8bd2c25698b", "1114ef6ef315a23755740545ee46c5af0cf1e02c", "ddc3e4501691c41bda5d927628f5f4abb2cfeb7f", "31c299532c42106b71e909c2fc0fc7472c39ce90", "fd840d5275cac98d64e7778a1b9173b937a77386", "26e72340c47b7348e1b1de285f89dd96cc925b27", "3f82aa1373e823ec622b3021fff9df4a82230267", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "67acde1c1a93644a45e92b42a6467a558235861a", "a56683f144d7498e1fc5b34a9314c138221d71c5", "f0409a67cbc7457693c4787892076d94c4ec3c6c", "76e29695c7c119d869d3b87886a611261a98e4a4", "2fa80c8342dcb349f1d91c102a76400c86dfb042", "1a8c7439080c2e5d42bf173c4db084713e5f05b7", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "238de3a78baa66b590c077ae95b33570b0ee2fc2", "663fee48f41849a47a89ce014876c745851c8a06", "468035263afa59095614f26a62e0217da4a1aeed", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "63e8d74a3243df7f05f2a12ad537b211767f2adc", "012d556d67acedc6898930b4c93f54b87aabf5ee", "6902867509928c0e5c19aff3e62e1def3a19d581", "0d89743bbb517a2534b3a4a7a8e9e4f04610c7fa", "1db11a76fa33ca81970aa345fe4bc150ae846ce0", "c2d21dc070bec49f3efcb71e4edf73770faa2fef", "94468d080421c4ec3141625a6c573b42d3b01261", "76d791a34301b60f4c6c081b091fb7bdc2971435", "0494a1ab6f0dd764fb9039772818b8f269ed70b4", "35235e03fd84d273235abbe71357b9b9dea77e3d", "61ea230d0e757ff46d3a381e79691bd54b92a503", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "9cea2a7caea7a77dec3b6f493518a6b26375723c", "45d8dfe5b5dce66bd73c7688d91327f280915314", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "84564d347d505467dd628e56319bc037b0a1ec28", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "8c581854139c628a8c16e36bf48dc5b65d3e26d0", "356955d0f190829b7481b8dc39c5f90dfac1b652" ], "paperAbstract": "DRAM is the primary technology used for main memory in modern systems. Unfortunately, as DRAM scales down to smaller technology nodes, it faces key challenges in both data integrity and latency, which strongly affects overall system reliability and performance. To develop reliable and high-performance DRAM-based main memory in future systems, it is critical to characterize, understand, and analyze various aspects (e.g., reliability, latency) of existing DRAM chips. To enable this, there is a strong need for a publicly-available DRAM testing infrastructure that can flexibly and efficiently test DRAM chips in a manner accessible to both software and hardware developers. This paper develops the first such infrastructure, SoftMC (Soft Memory Controller), an FPGA-based testing platform that can control and test memory modules designed for the commonly-used DDR (Double Data Rate) interface. SoftMC has two key properties: (i) it provides flexibility to thoroughly control memory behavior or to implement a wide range of mechanisms using DDR commands, and (ii) it is easy to use as it provides a simple and intuitive high-level programming interface for users, completely hiding the low-level details of the FPGA. We demonstrate the capability, flexibility, and programming ease of SoftMC with two example use cases. First, we implement a test that characterizes the retention time of DRAM cells. Experimental results we obtain using SoftMC are consistent with the findings of prior studies on retention time in modern DRAM, which serves as a validation of our infrastructure. Second, we validate two recently-proposed mechanisms, which rely on accessing recently-refreshed or recently-accessed DRAM cells faster than other DRAM cells. Using our infrastructure, we show that the expected latency reduction effect of these mechanisms is not observable in existing DRAM chips, which demonstrates the usefulness of SoftMC in testing new ideas on existing memory modules. We discuss several other use cases of SoftMC, including the ability to characterize emerging non-volatile memory modules that obey the DDR standard. We hope that our open-source release of SoftMC fills a gap in the space of publicly-available experimental memory testing infrastructures and inspires new studies, ideas, and methodologies in memory system design.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.62", "http://www.pdl.cmu.edu/PDL-FTP/NVM/17hpca_softmc.pdf", "http://www.ece.cmu.edu/~safari/pubs/softMC_hpca17-lightning-talk.pdf", "http://www.ece.cmu.edu/~safari/pubs/softMC_hpca17-talk.pdf", "https://people.inf.ethz.ch/omutlu/pub/softMC_hpca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/071564baef078867847fc54a3a0b50dd22d29d62", "sources": [ "DBLP" ], "title": "SoftMC: A Flexible and Practical Open-Source Infrastructure for Enabling Experimental DRAM Studies", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "071ed5d381c0ab3a600f0ca8b5829c973ea3990a": { "authors": [ { "ids": [ "1727449" ], "name": "Vu Nguyen" }, { "ids": [ "8409193" ], "name": "Sunil Gupta" }, { "ids": [ "2867032" ], "name": "Santu Rana" }, { "ids": [ "40144382" ], "name": "Cheng Li" }, { "ids": [ "1679520" ], "name": "Svetha Venkatesh" } ], "doi": "10.1109/ICDM.2017.44", "doiUrl": "https://doi.org/10.1109/ICDM.2017.44", "entities": [ "Address space", "Algorithm", "Bayesian optimization", "Benchmark (computing)", "Black box", "Experiment", "Global optimization", "Hyper-threading", "Information management", "Machine learning", "Mathematical optimization", "Maxima and minima", "Procedural parameter", "Program optimization", "Regret (decision theory)", "Theory", "Value (ethics)" ], "id": "071ed5d381c0ab3a600f0ca8b5829c973ea3990a", "inCitations": [ "0edbac9c47df0cea7f3fee7409421ee8a9a97420" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "347-356", "journalVolume": "", "outCitations": [ "5520b482b2c92ddad4907152217948a30cb459e2", "30423f985355d74295546f1d14ed2ddd33cdef99", "fe0749b9f46b4b6aa0fcfe05e0fef95f61d1cb85", "552c4a314cdab45336801a685872a4b45c7d1caa", "b39c891a23d920ab1aa10b6fc8978b127c95c8bd", "217135d666e8349ba6d7312a37bd1dd166c098ec", "25ca0946d0318570a758d9a6e4e4dd260fca126a", "1592fe924114866c1ac559bae33ea789930daa98", "6a6a20bcae8fe52d50c86457e71a2b9ae88d6c7c", "cd5a26b89f0799db1cbc1dff5607cb6815739fe7", "034c8c60a10d09a0b28ca929a9349cb3c0466b8b", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "210b3ccdc5d43ff218f894695a6ee8f1ff71a32f", "7e0c75efced4a572c369e88e2aec69c1c8d4687b", "46ecf53aad58de50fa85714f9a175a609ffb7ebb", "bcfca73fd9a210f9a4c78a0e0ca7e045c5495250", "9346ee1c7d8eb41301b733311270aa9ee73d0e6d", "d859d016c42ea2e8b941002c1468bfc0eb1f02b2", "4f9a756ceecc75f1f339dcaa7e590e6aef6f2244", "0c839972087ca6e798f10cfd4368d461b872d6ac", "1a7fd7b566697c9b69e64b27b68db4384314d925", "30c4b5432dded3ce170f58d96e8935d538c58b98", "1903b4448f1d87dbe3b6a5ef6089944a68a06ffd", "5ba6dcdbf846abb56bf9c8a060d98875ae70dbc8", "3706bba3ed884524e9cf8c63ecbf9f0d615f7004" ], "paperAbstract": "Bayesian optimization (BO) has recently emerged as a powerful and flexible tool for hyper-parameter tuning and more generally for the efficient global optimization of expensive black-box functions. Systems implementing BO has successfully solved difficult problems in automatic design choices and machine learning hyper-parameters tunings. Many recent advances in the methodologies and theories underlying Bayesian optimization have extended the framework to new applications and provided greater insights into the behavior of these algorithms. Still, these established techniques always require a user-defined space to perform optimization. This pre-defined space specifies the ranges of hyper-parameter values. In many situations, however, it can be difficult to prescribe such spaces, as a prior knowledge is often unavailable. Setting these regions arbitrarily can lead to inefficient optimization - if a space is too large, we can miss the optimum with a limited budget, on the other hand, if a space is too small, it may not contain the optimum point that we want to get. The unknown search space problem is intractable to solve in practice. Therefore, in this paper, we narrow down to consider specifically the setting of "weakly specified" search space for Bayesian optimization. By weakly specified space, we mean that the pre-defined space is placed at a sufficiently good region so that the optimization can expand and reach to the optimum. However, this pre-defined space need not include the global optimum. We tackle this problem by proposing the filtering expansion strategy for Bayesian optimization. Our approach starts from the initial region and gradually expands the search space. Wedevelop an efficient algorithm for this strategy and derive its regret bound. These theoretical results are complemented by an extensive set of experiments on benchmark functions and tworeal-world applications which demonstrate the benefits of our proposed approach.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.44" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/071ed5d381c0ab3a600f0ca8b5829c973ea3990a", "sources": [ "DBLP" ], "title": "Bayesian Optimization in Weakly Specified Search Space", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "0734438fb61b5a1558b42e9156f800bb49aa6e26": { "authors": [ { "ids": [ "34608926" ], "name": "Sharath Chandrashekhara" }, { "ids": [ "3102980" ], "name": "Taeyeon Ki" }, { "ids": [ "2654406" ], "name": "Kyungho Jeon" }, { "ids": [ "3119495" ], "name": "Karthik Dantu" }, { "ids": [ "1691861" ], "name": "Steven Y. Ko" } ], "doi": "10.1145/3117811.3117822", "doiUrl": "https://doi.org/10.1145/3117811.3117822", "entities": [ "Android", "Database", "End-to-end principle", "Mobile operating system" ], "id": "0734438fb61b5a1558b42e9156f800bb49aa6e26", "inCitations": [], "journalName": "", "journalPages": "396-408", "journalVolume": "", "outCitations": [ "7dbce0de554c2adbc28d7ba1d927c9f1cc8b184a", "2d1addf9bc1c37214d1656cd400f3f344e82ac33", "bc53135d3296ce4bb907238f423e89d2a59c7c71", "5e97ff426b5d0a415fa380a6c9645d2537aa6cd2", "3f834b98ea5cf9a24beb13e48018bcbf846c4e20", "186e91a2c251e55a787e96dbb7a5d06cb4c81517", "2ab731e0263229327d43a4e716ac6d7f0473a56d", "4e412c4a9c216312a59e95114330b06f6ba14592", "0f6e3c5ad255b43c867f8f63f5ea7aec67da075a", "0ea6716514909256eeedb4849c5009b9e237f763", "60ab9884f187e7395011d78fcb142f5614fe0d5e", "9061a3802910b71cf5d840473d7b9989649af94a", "642e0646013dadd1f8f49f88901a109cdb6f2984", "5619c61ac036cfcde641ff6e1ce882f0e00f5acb", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "aa7458464f3e0ad3cbfeed72fea08dd93ef79649", "3207fdb643e19e0602757241c303e9fc21953b49", "0d366f3522bcf503b9f0fea8a5d009ba3ecddf39", "2c9b6f1a420ecd9e54b7467efd17f203690ef07e", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "407a55ea947f5f430e8def26c5f4183db0f53c3a" ], "paperAbstract": "In this paper, we design a pluggable data management solution for modern mobile platforms (e.g., Android). Our goal is to allow data management mechanisms and policies to be implemented independently of core app logic. Our design allows a user to install data management solutions as apps, install multiple such solutions on a single device, and choose a suitable solution each for one or more apps. It allows app developers to focus their effort on app logic and helps the developers of data management solutions to achieve wider deployability. It also gives increased control of data management to end users and allows them to use different solutions for different apps. We present a prototype implementation of our design called BlueMountain, and implement several data management solutions for file and database management to demonstrate the utility and ease of using our design. We perform detailed microbenchmarks as well as end-to-end measurements for files and databases to demonstrate the performance overhead incurred by our implementation.", "pdfUrls": [ "https://nsr.cse.buffalo.edu/wp-content/uploads/2017/12/bluemountain-mobicom17.pdf", "http://doi.acm.org/10.1145/3117811.3117822" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0734438fb61b5a1558b42e9156f800bb49aa6e26", "sources": [ "DBLP" ], "title": "BlueMountain: An Architecture for Customized Data Management on Mobile Systems", "venue": "MobiCom", "year": 2017 }, "075420f809aa1a9e5a56016ca4ae9d8cdb78b213": { "authors": [ { "ids": [ "2219393" ], "name": "Wilson Lian" }, { "ids": [ "1786752" ], "name": "Hovav Shacham" }, { "ids": [ "1727599" ], "name": "Stefan Savage" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "Byte", "Compiler", "Diversification (finance)", "Executable", "JIT spraying", "JavaScript", "Just-in-time compilation", "Unified Framework", "X86", "X86-64" ], "id": "075420f809aa1a9e5a56016ca4ae9d8cdb78b213", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "96628792f9e88e95bb788ba0e128c001c320490c", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "480d4a756381f7aec1ffda84a3d7f1ef2695252a", "2f4002755b309cdb91e18116b8028005497d8400", "f7d02a1b86772f0ce8cbb3a6a7424b3ce5f367e4", "f479c0578156255ce176e75bb13051fbb0f25b98", "569393ee0bbba78af3241e544c347b2e98a1275d", "3738a8045c001c8ffd245e72b0d68382fba27a48", "1bb2363ddfec8e12f5408ce6b1538d74570bd865", "67b752aaef2133ec0cda47b2a2c1856f0f2f266f", "3875d1d1b623af0d640528efc9e581bc91338e35", "36ef68442d55cee50fd35283617d3e77ecca6784", "4e12a563998733080cf02240ce8fdd3292c14044", "f4666ceedb6f0590dc4031ca2558c1ed3e8ffbcc", "65950dfc50eb482d9df1ae11050a9f76fcddbc61", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "26de23713ac23ed7a952cf56faa8bd23f8fd6575", "1677bf5c635ef0e81b6c6cdfce30727f83959132" ], "paperAbstract": "JIT spraying allows an attacker to subvert a JustIn-Time compiler, introducing instruction sequences useful to the attacker into executable regions of the victim program\u2019s address space as a side effect of compiling seemingly innocuous code in a safe language like JavaScript. We present new JIT spraying attacks against Google\u2019s V8 and Mozilla\u2019s SpiderMonkey JavaScript engines on ARM. The V8 attack is the first JIT spraying attack not to rely on instruction decoding ambiguity, and the SpiderMonkey attack uses the first ARM payload that executes unintended instructions derived from intended instruction bytes without resynchronizing to the intended instruction stream. We review the JIT spraying defenses proposed in the literature and their currently-deployed implementations and conclude that the current state of JIT spraying mitigation, which prioritizes low performance overhead, leaves many exploitable attacker options unchecked. We perform an empirical evaluation of mitigations with low but non-zero overhead in a unified framework and find that full, robust defense implementations of diversification defenses can effectively mitigate JIT spraying attacks in the literature as well as our new attacks with a combined average overhead of 4.56% on x86-64 and 4.88% on ARM32.", "pdfUrls": [ "http://cseweb.ucsd.edu/~savage/papers/NDSS2017.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/call-arms-understanding-costs-and-benefits-jit-spraying-mitigations/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0754/20f809aa1a9e5a56016ca4ae9d8cdb78b213.pdf", "s2Url": "https://semanticscholar.org/paper/075420f809aa1a9e5a56016ca4ae9d8cdb78b213", "sources": [ "DBLP" ], "title": "A Call to ARMs: Understanding the Costs and Benefits of JIT Spraying Mitigations", "venue": "NDSS", "year": 2017 }, "075547a518e15af7e334fe66a845da4862d79af4": { "authors": [ { "ids": [ "1684480" ], "name": "Kartik Gopalan" }, { "ids": [ "24694633" ], "name": "Rohith Kugve" }, { "ids": [ "7314140" ], "name": "Hardik Bagdi" }, { "ids": [ "2973015" ], "name": "Yaohui Hu" }, { "ids": [ "30904419" ], "name": "Dan Williams" }, { "ids": [ "2606437" ], "name": "Nilton Bila" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Cloud computing", "Disk mirroring", "Ecosystem", "FUJITSU Cloud IaaS Trusted Public S5", "Hypervisor", "Introspection", "Management system", "Operating system", "Span and div", "Virtual machine", "X86 virtualization" ], "id": "075547a518e15af7e334fe66a845da4862d79af4", "inCitations": [], "journalName": "", "journalPages": "235-249", "journalVolume": "", "outCitations": [ "46b9d88a665a94f7bd0fd88d4d99ca71891ad182", "423455ad8afb9b2534c0954a5e61c95bea611801", "1ef9c15256e66f020c339df27c5d0fe5ff758aaf", "0e851f49432767888b6ef4421beb268b9f2fc057", "86013daaae16572bceb755e65ee5fa2fdfb63848", "8cf946e26dda4b335850195f661d78518a6870ca", "a52defad31f7cc297559159c872ee54f1d94b300", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "3574657705475722b6c398c266805f758268778b", "12d6a1bc055f40b5ac7b35a6b560483dfd5fb909", "972bf3fbd8c7c6bc35d29a383f3805cca5ddd583", "3b0045277fd4cd8134439ea29a6361dc8a63c2a6", "5bc690391cb140731f88c8a68b4dee6dacd7097d", "6c2a4fd3bae2ddae3f23558985de58dd7673378e", "30d9132ef7845b8fb4e53d9ad982363700746928", "85d555f7ce19740b4fc656ff797623c6e1513018", "8204d8fef85ddd8c32e7b470c244a50910836263", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "7911a30950d0bd1e6fab291df51803b453b851be", "c1761031e49fb73b185a73ddc8ba61c234fce646", "454dd673096a64d5ed41e4afe246ff4059a40a1a", "37d6841e5a5b11c8f187234ce2d1ee5ee2a888b2", "27f071ccbea5a4940dcc585ba4cfa9258bf2bcdf", "6111f1a9ab657910f5a11a95de117b3c5181565a", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "170a81df3ff2076fe9a3f2fdee0755a7310c2c41", "43f6fbd92f450aab99fd58fde5e7c861898b33ae", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "60f135af3eb5253394f4ff944062a1b9e6a0c564", "9d0dfe6c2ffb2c1ff0715010688b213dfc1d0e9f", "47dc52eeb7bf6efb46c550201cc8d52af71cc1a3", "07aca048b6dbc583fed7434890a213b68dd4e0f1", "1251fe24e96d5c12f868bf4584351c0ee03d55ec", "24748ef2b88e6df370b5dccfb75cba47e132f92d", "b2d12095065899f6fb0970b56d6a9f5ea5af1a2e", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "69ccb255d3747bbebbd031d8c21cb870e5bf3b53", "ad4a6346ef0da6704d2017ae48839644de92c9ba", "4295e39f3e631fdd099481acb24b1e1fef772c9b", "5edb4dd1952a63707f1ff73db5e507c21bb962f8", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922" ], "paperAbstract": "Public cloud software marketplaces already offer users a wealth of choice in operating systems, database management systems, financial software, and virtual networking, all deployable and configurable at the click of a button. Unfortunately, this level of customization has not extended to emerging hypervisor-level services, partly because traditional virtual machines (VMs) are fully controlled by only one hypervisor at a time. Currently, a VM in a cloud platform cannot concurrently use hypervisorlevel services from multiple third-parties in a compartmentalized manner. We propose the notion of a multihypervisor VM, which is an unmodified guest that can simultaneously use services from multiple coresident, but isolated, hypervisors. We present a new virtualization architecture, called Span virtualization, that leverages nesting to allow multiple hypervisors to concurrently control a guest\u2019s memory, virtual CPU, and I/O resources. Our prototype of Span virtualization on the KVM/QEMU platform enables a guest to use services such as introspection, network monitoring, guest mirroring, and hypervisor refresh, with performance comparable to traditional nested VMs.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-gopalan.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_gopalan.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/gopalan" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4d22/eb245bbee568195c5e84a32c41284da0f5fc.pdf", "s2Url": "https://semanticscholar.org/paper/075547a518e15af7e334fe66a845da4862d79af4", "sources": [ "DBLP" ], "title": "Multi-Hypervisor Virtual Machines: Enabling an Ecosystem of Hypervisor-level Services", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "075ce944583b93b2dc7c2b3bbe53485780dfc7e2": { "authors": [ { "ids": [ "3359075" ], "name": "Ewnetu Bayuh Lakew" }, { "ids": [ "1805880" ], "name": "Alessandro Vittorio Papadopoulos" }, { "ids": [ "2753088" ], "name": "Martina Maggio" }, { "ids": [ "2748720" ], "name": "Cristian Klein" }, { "ids": [ "1685517" ], "name": "Erik Elmroth" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Control theory", "Elasticity (cloud computing)", "Experiment", "Interactivity", "LXC", "Linux", "Megabyte", "Provisioning", "Throughput" ], "id": "075ce944583b93b2dc7c2b3bbe53485780dfc7e2", "inCitations": [ "a8ccae500af3dbcfb2d2ce3701bea479a46e2556", "bf1122c2881e6b48be951c930e61fb882c1cfa9d", "5446eb3a622f5d14d4202395e2960892407ab00a" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "589-598", "journalVolume": "", "outCitations": [ "42c9ffb48c907631d1471de313b3870a330a5182", "9e98d529d158e2230d722f497fbc36373eaa8583", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "ee981f0d7de58726989cdc14ac1b71f754adc621", "6f5d96874b919df9e884a165a21859b860f2a5fd", "2e72178091b2ca445f46200dcba71a53417b69eb", "85dfe3c3053506f7602c410cfa97cc1595cd6143", "5e8b0bce8fcc140c124241e0ac89121a13b40f92", "48cd46d91c45f45b073d2d38a5fea45dbc3f7f1e", "363db948ce2da7d84e3e9ee85e0182d0632bb33a", "2af510fa15b09b6f2247691b73955fb1885797e4", "605f6a93cc650c37dcb00c27da4f5026724523bc", "725b58aa6b81490b9db8e9ed2d4a72c1d0fb366f", "04c0c3d9f08c77de9ba448d64825c4f556c2de99", "4beed7056c232d88ae0f5294034a710e06802eb4", "eeaac9219d99c0dee5f194f598d1abe278cc9c92", "6ec43f357fb62d16c853e1848bc02bd5aea98676", "067c7857753e21e7317b556c86e30be60aa7cac0", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "c2bc2e165fe6af3de5de600af57cb0b301ce0c0f", "d3ee0f817b1909d3373c95d5133c485d15e0b77d", "277f20ddc0e9fa593753ef2778110508372c597f", "379d51fbe02b562a31719a1005a5c520508348c5", "a4aa78a726fb277a94f08f301a7153b2ee5e4e92", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "fa3b9aeaddbf900c6df1c6979e10dd3330e757da" ], "paperAbstract": "Applications hosted in the cloud have become indispensable in several contexts, with their performance often being key to business operation and their running costs needing to be minimized. To minimize running costs, most modern virtualization technologies such as Linux Containers, Xen, and KVM offer powerful resource control primitives for individual provisioning – that enable adding or removing of fraction of cores and/or megabytes of memory for as short as few seconds. Despite the technology being ready, there is a lack of proper techniques for fine-grained resource allocation, because there is an inherent challenge in determining the correct composition of resources an application needs, with varying workload, to ensure deterministic performance. This paper presents a control-based approach for the management of multiple resources, accounting for the resource consumption, together with the application performance, enabling fine-grained vertical elasticity. The control strategy ensures that the application meets the target performance indicators, consuming as less resources as possible. We carried out an extensive set of experiments using different applications – interactive with response-time requirements, as well as noninteractive with throughput desires – by varying the workload mixes of each application over time. The results demonstrate that our solution precisely provides guaranteed performance while at the same time avoiding both resource over-and underprovisioning.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101192" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/075ce944583b93b2dc7c2b3bbe53485780dfc7e2", "sources": [ "DBLP" ], "title": "KPI-Agnostic Control for Fine-Grained Vertical Elasticity", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "075dfcf2d75023c51fdab755d747a3f9437883c1": { "authors": [ { "ids": [ "1682591" ], "name": "Richard Wolski" }, { "ids": [ "1793260" ], "name": "John Brevik" }, { "ids": [ "36319017" ], "name": "Ryan Chard" }, { "ids": [ "3091414" ], "name": "Kyle Chard" } ], "doi": "10.1145/3126908.3126953", "doiUrl": "https://doi.org/10.1145/3126908.3126953", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "F-Spot", "Multitier architecture", "Reduced cost", "Service-level agreement", "Signal trace", "Virtual machine" ], "id": "075dfcf2d75023c51fdab755d747a3f9437883c1", "inCitations": [ "c57b90ab1ca73dd52ce1b1161e68f5feb652d694", "ca7c6532d9174b15bb783a09c4c95d669e4fffd5", "3f99bb743fa9576f8da7d168f3858dd0acf35e79" ], "journalName": "", "journalPages": "18:1-18:11", "journalVolume": "", "outCitations": [ "72d521432ec40e1c855f45f1cf88c1b77edfbb36", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "48a6b370460dc8e6ce9c5a45eb39cf1fb654f1f3", "0287a0c19b29b2497fd860b568dbb89cdf1a4813", "4e44046bfb459c5f627ef141786773e2c4591de4", "d608a95490b02839fdf71a412aab46ad20a70596", "05be0db01d70bcce9530b462ab2368f9e15127d9", "754eaf7f37708105f94b9798a30e177cd4c58f98", "3cd6ad03a55a0450f34043bc5091cb9a6827255f", "71de39ceaaa0efecc2c84ce8fe0af8ceb5ed79e7", "bbbdaa8d70f767956358f365cebe80206ee20a4d", "12c28dd5ea0b2d0269a67a43c2eb0b1207b2b889", "9bea73e953bd714354829b4f0eda55f9eb1fa37f", "4f86fa28602d9503a8575c5b31082284abc8415c", "1da8852aa591d82f6dab3d93c8aba923e69a45d4", "0364d9b50978071565a1abc6206daaa0b6178899", "94859f850f345629c23526e1155aa9deb1852491", "616ac1d7764f3586b26e482818b0070bd85b1288", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "b46bf5cfc2f721557c28d2809d6704dce1af1abc", "031a2c591a08b89bf06de1d8277fe8fd3c1705ae" ], "paperAbstract": "In this paper we propose DrAFTS - a methodology for implementing probabilistic guarantees of instance reliability in the Amazon Spot tier. Amazon offers \"unreliable\" virtual machine instances (ones that may be terminated at any time) at a potentially large discount relative to \"reliable\" On-demand and Reserved instances. Our method predicts the \"bid values\" that users can specify to provision Spot instances which ensure at least a fixed duration of execution with a given probability. We illustrate the method and test its validity using Spot pricing data post facto, both randomly and using real-world workload traces. We also test the efficacy of the method experimentally by using it to launch Spot instances and then observing the instance termination rate. Our results indicate that it is possible to obtain the same level of reliability from unreliable instances that the Amazon service level agreement guarantees for reliable instances with a greatly reduced cost.", "pdfUrls": [ "http://www.cs.ucsb.edu/~rich/publications/drafts-sc17.pdf", "http://www.cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master.pdf", "http://cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master.pdf", "https://www.cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master.pdf", "http://doi.acm.org/10.1145/3126908.3126953" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/075dfcf2d75023c51fdab755d747a3f9437883c1", "sources": [ "DBLP" ], "title": "Probabilistic guarantees of execution duration for Amazon spot instances", "venue": "SC", "year": 2017 }, "07901da32c40a5bb25c8a9a6137c261d7465eb8d": { "authors": [ { "ids": [ "1693687" ], "name": "Umut A. Acar" }, { "ids": [ "2595046" ], "name": "Naama Ben-David" }, { "ids": [ "1847386" ], "name": "Mike Rainey" } ], "doi": "10.1145/3018743.3018762", "doiUrl": "https://doi.org/10.1145/3018743.3018762", "entities": [ "Amortized analysis", "Concurrency (computer science)", "Concurrent data structure", "Data structure", "Directed acyclic graph", "Parallel computing", "Programming language", "Programming paradigm", "Series-parallel graph", "Shared memory", "Zero" ], "id": "07901da32c40a5bb25c8a9a6137c261d7465eb8d", "inCitations": [ "f8b4eca178b95047647db1fef0af9023c0ee4bc0" ], "journalName": "", "journalPages": "75-88", "journalVolume": "", "outCitations": [ "617697d13cd88ae2a7a950f8ebcd11a0795930d4", "4e8dd370cf9b6d6f6179cfb8cf2e529814e193c9", "11a5cdac00df51114a77fafb62c72ac57f52e8c0", "04f83137f43c6caba920b6455639f26b48656231", "09ed565e84057123c15ab12b885c235d1f241aed", "035941ac151141dbfb4303ee6bd941e885082207", "59d45fd3582e73b1328fd0490b84896bb7a9d7c1", "1372e033396fe1a5aa12a1b148c5015a2e09d1d6", "b022f7981d06e24c9ac03e954a4ab21746e7086b", "5b0206dd59d8a70788c11933d8409938b4ea3fd0", "4fcfa58bda82134cdf2982ea12e653da6b553f89", "1c808644694dd6e287018491ca14a060fab1a6bd", "1d1c68d07c4738e321a3db24fede081e95baff2c", "bee25de126b12f3f9dc64b1da804a88831eb11b3", "32d99461d75b42ff53f56c8fa562af92c0ebaddc", "045a975c1753724b3a0780673ee92b37b9827be6", "429e313d33a82bf086b69d47eee735450cbeb4ae", "111b6d6a518ea292e022e407962d2b5bcdf38206", "17ccb526085ec88fa2d35d8c8d7dc246b9d1bbe3", "101f10b90ce859135868668478fbde5882c87458", "3f2c23ead76447e44b404c16bd82f3948ba557dd", "5069f6267707df50e3578afaa8dfa9c15f3c3b07", "4aaf53b5b57dc748ee87b4e025b13b9e48b979ec", "0e41c57137ed2e59f2a11ec1b26a50f691f33310", "251ef42ebdbde077b154085339128d0b59292c2f", "327cbb1da2652b430a52171d510cf72235b890b6", "0f7a65a644408f102e85df9d5c49e970b9e369c7", "415e5008232116e6869caf29c349a2dfe390264e", "cba77292e7f1f271fff1bd28238728f4f18dd13e", "21b5d84fcca50eb0af296c6c0f901c9c3a4c6d9d", "0e422bd90c8be636358d4eb75f05276b361d19d4", "1dff33cb24cf30be232d02bc48ebdf200480d2f3", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "58c1e11d6ee2e36f80625231ad384448769d7e27", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "0d8f77b74460f5abcb8e7a885b677a000a2656be", "52aad68b6a150c5db537ef64c23e07d8abd58cc1", "0cc3bfa44a22d615f09da5e4f9d99e4c4ad019b8", "1f0a871a4c9abf418c4a5e2a66db9f78da669dca" ], "paperAbstract": "Over the past two decades, many concurrent data structures have been designed and implemented. Nearly all such work analyzes concurrent data structures empirically, omitting asymptotic bounds on their efficiency, partly because of the complexity of the analysis needed, and partly because of the difficulty of obtaining relevant asymptotic bounds: when the analysis takes into account important practical factors, such as contention, it is difficult or even impossible to prove desirable bounds.\n In this paper, we show that considering structured concurrency or relaxed concurrency models can enable establishing strong bounds, also for contention. To this end, we first present a dynamic relaxed counter data structure that indicates the non-zero status of the counter. Our data structure extends a recently proposed data structure, called SNZI, allowing our structure to grow dynamically in response to the increasing degree of concurrency in the system.\n Using the dynamic SNZI data structure, we then present a concurrent data structure for series-parallel directed acyclic graphs (sp-dags), a key data structure widely used in the implementation of modern parallel programming languages. The key component of sp-dags is an in-counter data structure that is an instance of our dynamic SNZI. We analyze the efficiency of our concurrent sp-dags and in-counter data structures under nested-parallel computing paradigm. This paradigm offers a structured model for concurrency. Under this model, we prove that our data structures require amortized (1) shared memory steps, including contention. We present an implementation and an experimental evaluation that suggests that the sp-dags data structure is practical and can perform well in practice.", "pdfUrls": [ "http://www.cs.cmu.edu/~nbendavi/ppopp_2017_conf.pdf", "http://reports-archive.adm.cs.cmu.edu/anon/2016/CMU-CS-16-133.pdf", "http://gallium.inria.fr/~rainey/dynsnzi.pdf", "http://www.cs.cmu.edu/~nbendavi/ppopp_2017_slides.pdf", "http://dl.acm.org/citation.cfm?id=3018762", "https://hal.inria.fr/hal-01416531/document" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07901da32c40a5bb25c8a9a6137c261d7465eb8d", "sources": [ "DBLP" ], "title": "Contention in Structured Concurrency: Provably Efficient Dynamic Non-Zero Indicators for Nested Parallelism", "venue": "PPOPP", "year": 2017 }, "07917775e85ed02803f405b2fbf9d57a240e156e": { "authors": [ { "ids": [ "1949001" ], "name": "MohammadHossein Bateni" }, { "ids": [ "2266873" ], "name": "Hossein Esfandiari" }, { "ids": [ "1728881" ], "name": "Vahab S. Mirrokni" } ], "doi": "10.1145/3087556.3087585", "doiUrl": "https://doi.org/10.1145/3087556.3087585", "entities": [ "Algorithm", "Approximation algorithm", "Black box", "DSPACE", "Human factors and ergonomics", "Set cover problem", "Streaming algorithm" ], "id": "07917775e85ed02803f405b2fbf9d57a240e156e", "inCitations": [ "7e0695d65ad3aedaa30bb7aaf28edc432ac711e7", "42c04620d98623d49d39bce3f5f9363ea1769daa", "3ab135880738545f69e03028746fd7715ce8a036", "02f34f9d891ec0561439008028a4059db52f3aac", "04376a241d021461eb55b6a8a1391679a73cfa6e", "bb82ab4c6bf3da4dbf448a0c036cf2eaf3889964", "265422784efe15311b28116c16c82a4d27dc0d79" ], "journalName": "", "journalPages": "13-23", "journalVolume": "", "outCitations": [ "30fc67dfcc25ab3ce1642cb3b4f114940414dee8", "9089101cd4306c380c0b57d7205822411b323c38", "0df3980457291b7425e37eb686a6fd7b3eb94abe", "0e767d7d9e146682a26df27df12d3843de128b7a", "6518035089d0c87b925c6262bbf5b949d3bb3fff", "cb12d6f0460bd9e3a0c28e6ff3c974763265e02f", "6484a2de45d7e16ed3a2f4a302e9d45c980a8920", "202a3630d9bea2a61a7b026bf395993c0f637caa", "bde7cc85837836fab6c1f946a6e77189ac9d9eed", "02f34f9d891ec0561439008028a4059db52f3aac", "5be4a65faa4e7fe077ad13f4b0cdbbe68222c49c", "107344a6a049d629465e9d38f00222872043a2b1", "69b6a42ad7068962363687c038c6ae2e0760867a", "9a141890c4ea436ff9ad8ac5ba7aea1c9832f33e", "46b51960a073a759e1d55b41c75b6bb3e5273be8", "abbdb6177b4408c5885a569dc24e6361f91cf169", "07de950bb54b9572b1b41aff9f026f7b00a61ccd", "1d415725ca9c5a551158355b8b497ddda48ba80d", "632c6bfaa258aae7958bd192b2a00db8a33cc516", "84dca4b6095bc07376a1f1f1d9a6e2d73aa4d01d", "5e493ab3e938d34aeca99d463463d58a863ee97e", "01881280bd3a673abc6da8ab5c7fa2f215e7476b", "054c5374c74d33dc3a65c3140315eafeb3d62604", "b9e43395663f74c581982e9ca97a0d7057a0008c", "9202a76c3b53d385f1b715d3a75e18c053232c32", "3d25eb8241345f86101fda145d95d89c27844fd1", "839d54efd65172cfb24ba700bab51b9e74f59caa", "06e7ec1b1a018225fb632c1b7d029b74151b4730", "4bf72d8636c129142b77f6241f75074e720e6a1c", "379ef18377d803d87859314c0e110cdf64f2ea73", "82afdb9f6b3441bbe4e8a5d6e1d0f5a647748d68", "9bd101ad8faba5ff0ff1f625be773ce0acb697fc", "7e4cb3ca74b9e0d83cb53340d4ead2331cc8328c", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "28dd94325f1fe6a8ea5787abe3bbdf7dfea71259", "4d080825ac4bfcca1804abc5fcb6404dd1a2ae94", "03e66d2e5f428454037f865400691652941ba9f1", "069b6245aa2573ac0c5c6e57b62d52d8c8234984", "99abc13b552c78387d8e21475ed74a38c9547905", "052af1757c410fa8b65bf95339c6e4142d723d61", "c9bb3728b1e2afe09def0733caffcb99a68baef3" ], "paperAbstract": "Maximum coverage and minimum set cover problems---here collectively called coverage problems---have been studied extensively in streaming models. However, previous research not only achieves suboptimal approximation factors and space complexities but also study a restricted set-arrival model which makes an explicit or implicit assumption on oracle access to the sets, ignoring the complexity of reading and storing the whole set at once. In this paper, we address the above shortcomings and present algorithms with improved approximation factor and improved space complexity, and prove that our results are almost tight. Moreover, unlike most of the previous work, our results hold in a more general edge-arrival model.\n More specifically, consider an instance with n sets, together covering m elements. Information arrives in the form of \"edges\" from sets to elements (denoting membership) in arbitrary order.
  1. We present (almost) optimal approximation algorithms for maximum coverage and minimum set cover problems in the streaming model with an (almost) optimal space complexity of Õ(n); i.e., the space is independent of the size of the sets or the size of the ground set of elements. These results not only improve the best known algorithms for the set-arrival model, but also are the first such algorithms for the more powerful edge-arrival model.
  2. In order to achieve the above results, we introduce a new general sketching technique for coverage functions: One can apply this sketching scheme to convert an α-approximation algorithm for a coverage problem to a (1-ε)α-approximation algorithm for the same problem in streaming model.
  3. We show the significance of our sketching technique by ruling out the possibility of solving coverage problems via accessing (as a black box) a (1 ± ε)-approximate oracle (e.g., a sketch function) that estimates the coverage function on any subfamily of the sets. Finally, we show that our streaming algorithms achieve an almost optimal space complexity.
", "pdfUrls": [ "https://arxiv.org/pdf/1610.08096v1.pdf", "http://doi.acm.org/10.1145/3087556.3087585", "https://arxiv.org/pdf/1610.08096v2.pdf", "http://arxiv.org/abs/1610.08096" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07917775e85ed02803f405b2fbf9d57a240e156e", "sources": [ "DBLP" ], "title": "Almost Optimal Streaming Algorithms for Coverage Problems", "venue": "SPAA", "year": 2017 }, "07987d8ef619891f8b9c41f7ac7519a62ad785f5": { "authors": [ { "ids": [ "20437926" ], "name": "Hyoukjun Kwon" }, { "ids": [ "2526541" ], "name": "Tushar Krishna" } ], "doi": "10.1109/ISPASS.2017.7975291", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975291", "entities": [ "Algorithm", "ChIP-on-chip", "Experience", "Interconnection", "Microarchitecture", "Moore's law", "Network on a chip", "Open-source hardware", "Open-source software", "Pipeline (computing)", "Plug and play", "Program optimization", "Router (computing)", "Routing", "SMART", "Scalability", "Semiconductor intellectual property core", "System on a chip", "SystemVerilog", "Verilog" ], "id": "07987d8ef619891f8b9c41f7ac7519a62ad785f5", "inCitations": [ "a6998e429556aa790adc7cf8c6223e1b5bc458f4" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "195-204", "journalVolume": "", "outCitations": [ "651534f5bd9d3c3ec4baba3461f03af264e1bbef", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "0a569200aeb678b4420cb1025ee9dc6097f60567", "46aac855784e894242488bcfc1f0779503042f8b", "0979d67140ac96174f65be430012ae8607095310", "4bad51c7685254155733ee8def6a1294378aa1af", "92fd40952fb7561127e325cfb66398fc70c22ff7", "1653bbcf0428c33af34daa8c3feb55924dbc3fa4", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "654db98cf76ecc6bd2bd5d63952fff1d6365ad4f", "26d6362895ac0c28bec4f68106fe2df6a3d27e9c", "e71b943c495c4a097c5074fbb6b7120ebcfa0be0", "7062d41ef2e30ba2b8d91dfaa1dee3cce137ad96", "5c9730d0db498289a9d1897590faf0185e75857e", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "16840a49fb3c1e4ef144a1939707fa405008093d", "20e4eadb02bd2307ed1e47f9defce1cfb6c3b53f", "2cba84a71e7a7949ccdc238fd3ef6b039066d793", "002cca80b9deaa3c1e9d46ae0bfdc9fd79079907", "32c8c7949a6efa2c114e482c830321428ee58d70", "329756f2d29829e1b2e713360016995855d0ea26", "02c78232075ac431834e3442dcb2954d4e708def", "54ee30efe09b9e817ce9ec83d46553e4bded58ed", "2c2e32267c43161f80241a2e1ba21d1f0f871dd4", "30e913c6bd6fd0bee790b6237d4c7a958a779d29", "0548ab587826c85cd55f1586f8b6fad807bdc24f", "17f7a86c811c3dca1a65723ef180d3e01196f6fd", "20e29444a28a763a45f9d9860ec4cd210ea5f084" ], "paperAbstract": "The chip industry faces two key challenges today — the impending end of Moore's Law and the rising costs of chip design and verification (millions of dollars today). Heterogeneous IPs — cores and domain-specific accelerators — are a promising answer to the first challenge, enabling performance and energy benefits no longer provided by technology scaling. IP-reuse with plug-and-play designs can help with the second challenge, amortizing NRE costs tremendously. A key requirement in a heterogeneous IP-based plug-and-play SoC environment is an interconnection fabric to connect these IPs together. This fabric needs to be scalable — low latency, low energy and low area — and yet be flexible/parametrizable for use across designs. The key scalability challenge in any Network-on-Chip (NoC) today is that the latency increases proportional to the number of hops. In this work, we present a NoC generator called OpenSMART, which generates low-latency NoCs based on SMART1. SMART is a recently proposed NoC microarchitecture that enables multihop on-chip traversals within a single cycle, removing the dependence of latency on hops. SMART leverages wire delay of the underlying repeated wires, and augments each router with the ability to request and setup bypass paths. OpenSMART takes SMART from a NoC optimization to a design methodology for SoCs, enabling users to generate verified RTL for a class of userspecified network configurations, such as network size, topology, routing algorithm, number of VCs/buffers, router pipeline stages, and so on. OpenSMART also provides the ability to generate any heterogeneous topology with low and high-radix routers and optimized single-stage pipelines, leveraging fast logic delays in technology nodes today. OpenSMART v1.0 comes with both Bluespec System Verilog and Chisel implementations, and this paper also presents a case study of our experiences with both languages. OpenSMART is available for download2 and is going to be a key addition to the emerging open-source hardware movement, providing a glue for interconnecting existing and emerging IPs.", "pdfUrls": [ "http://synergy.ece.gatech.edu/wp-content/uploads/sites/332/2017/03/OpenSMART_ISPASS17.pdf", "https://doi.org/10.1109/ISPASS.2017.7975291" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07987d8ef619891f8b9c41f7ac7519a62ad785f5", "sources": [ "DBLP" ], "title": "OpenSMART: Single-cycle multi-hop NoC generator in BSV and Chisel", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "07a6c453adc7a5844180a10d111f1cc3ebfc7717": { "authors": [ { "ids": [ "40264133" ], "name": "Rajdeep Das" }, { "ids": [ "3144640" ], "name": "Nimantha Thushan Baranasuriya" }, { "ids": [ "1799406" ], "name": "Venkata N. Padmanabhan" }, { "ids": [ "3116781" ], "name": "Christoffer R\u00f8dbro" }, { "ids": [ "3758909" ], "name": "Seth Gilbert" } ], "doi": "10.1145/3143361.3143390", "doiUrl": "https://doi.org/10.1145/3143361.3143390", "entities": [ "A/B testing", "Experiment", "Network congestion", "Randomness" ], "id": "07a6c453adc7a5844180a10d111f1cc3ebfc7717", "inCitations": [], "journalName": "", "journalPages": "376-388", "journalVolume": "", "outCitations": [ "3b59a3d653fe3ed2892be57cbf89ce4258e4e209", "114e68ae77738097ba690499dccffe817da1b839", "0ad508e318ae0ee6e6b7ea0adc6c9d9aa9446320", "21144c114bce1a756c5b7be939f4fa7b944eb88d", "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "de17cf40a4db13315c631c597959ae26f691f2fa", "43262a1017e57333175e5866d4f4b6e590cca5f2", "0bd667494d9f0c499e09c728616bb44029428e82", "68393ea2a7281e02f0c4ee4dd9fccc6bae3d9370", "57e29b3745253addac7d3f429d5cedb39e483813", "27644a68d3a0dd999b040ec47f08560bbce71773", "43649ab7ecbfa60f4acdc4f0729fc0286767b05a", "805d0da469da6ba7571ee75732ab66202aaea9e0", "22b33f57e3ffed02849874c6da46b770d9a40cae", "680c819b14aab08e41b969c6eab5b42c27ef08be", "5e40fc1fd8f6b94b9ab2309109af607ddaf702b4", "6c1246163dc263b588031dc743b4bcb37a6aeb76", "2f85f20a076cb91dcdf4b3e5b16886ee9b6b3543", "3c6c577509204e2755775ba848aa0de4e8687520", "3f200c41618d0c3d75c4cd287b4730aadcf596f7", "4b9b84a255ff86d6f8a67797109d8d319fea9f5b", "0d971b64abf9d8e345f54802510303c63a276b50", "0867c495ea653f3d64493e0da0682adc3953d75a", "a4e5a44176487f6559bbdb3ba033d9de777208ad", "094aca6103f4079521e6a596d099ed37f7d2b498", "15e176fc33eff28d9379a689dbd90211841eb1b0", "3e5e2ebade6a4374e97a0e046a5efdb69fb5eab2", "12571980fbd4a7785e16f193d4cc47bbef816139", "69fcfa33bfc16e43ee748af73daf349d778a197c", "0f10f9e89f942336ca08c4a8db907bee52c38eb3", "16eedc8ccfda1a7e213097ada3c234829488add5", "44945c691c544f62e6374a5484b144936570517b" ], "paperAbstract": "Bandwidth adaptation for real-time streaming applications is typically designed to be conservative, since pushing for higher bandwidth could be counterproductive if it means an increased latency. However, such bandwidth adaptation operates based on the \"symptoms\" of congestion (e.g., increased delay) without knowing the underlying cause (self-congestion vs. cross-traffic). In this paper, we consider this problem in the context of Wi-Fi networks and introduce a novel technique, Ping-Pair, to measure and attribute congestion. We have integrated Ping-Pair into the popular Skype audio-video conferencing application to enable improved bandwidth adaptation dubbed Kwikr, using which we have conducted controlled experiments and also randomized A/B tests in a production setting.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/09/PingPair-CoNEXT2017.pdf", "http://doi.acm.org/10.1145/3143361.3143390" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07a6c453adc7a5844180a10d111f1cc3ebfc7717", "sources": [ "DBLP" ], "title": "Informed Bandwidth Adaptation in Wi-Fi Networks using Ping-Pair", "venue": "CoNEXT", "year": 2017 }, "07b381e7f6bba30abbc344963c3010d145f30e65": { "authors": [ { "ids": [ "40157196" ], "name": "Devarshi Ghoshal" }, { "ids": [ "1792683" ], "name": "Lavanya Ramakrishnan" } ], "doi": "10.1145/3078597.3078611", "doiUrl": "https://doi.org/10.1145/3078597.3078611", "entities": [ "Computer data storage", "Data-intensive computing", "Dataspaces", "Definition", "Experiment", "Hierarchical storage management", "Hoc (programming language)", "Memory hierarchy", "Memory-mapped I/O", "Scalability", "Simulation", "Software architecture", "Synthetic data" ], "id": "07b381e7f6bba30abbc344963c3010d145f30e65", "inCitations": [ "fb67b3bf033286acf2ed4fd663b4e432de8b7c7a" ], "journalName": "", "journalPages": "41-52", "journalVolume": "", "outCitations": [ "4b9130b8336f1e5a1b638480a7d833a81da6347f", "1d5de7a7ed362ecd596ac9ed5b85bf19d5c08ef5", "57e9d60fd6ab7c0b0f7fd2e19533a333ea911194", "0edb8ac77c0769a92882e4654b8dd45fa092bd82", "cf0fae9de7f69bcc1fcbd99ff64af5af43428432", "0fbed4dbe1ee87479c12330df89e4ef8540f0156", "dc1cb1c641f246d28361c88126daf38b1a319071", "77755328781bd3d9132f94c2b104b6c92ccc8ce0", "48b17ca36c42e20872a3a03da8881fd48f0d2d09", "a5c9bfcaf7b52edee6a94f58337b4a0e33575cd3", "a03d6ee4ea70eb7feaa65ab046ffc2232d76b0f0", "57d791eb2cd5fe8ed9cfe8a7167f7a4439e3b11e", "56b2c3f855fb59bfdb97bd311dca551a7f2d11fa", "64d4f6759b32697e6cbebf901624c93c0a0c1744", "0558c94a094158ecd64f0d5014d3d9668054fb97", "67c115d3a8ae3af0164e97f46c8d18abb56d8ed9", "7717cb7fbbf26557238c2ef847d0a48def176d0b", "02d3739f3d1af8a529fb60366c854b4e207e6e75", "205863642accdb592667fdc7e851335f09705341", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "4dd05ff4d7d69878a10cfe95a86213d83b66081d", "5a39aa58f0c63d0c043034b554fc80716afad9ca", "03f026aa9917eda9bbb8794ca019001e64a1f0ea", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "867ad29b3392965e40ede63f3cdbfdedac1c781b", "35202b030bab316139ebe039146061a2eb501576", "a3ad3b859c630b1153276dda4439f3cc835afd54", "7eea9d5a4048b46733ea85bd5b169e09944c1a10" ], "paperAbstract": "Scientific workflows are increasingly used in High Performance Computing (HPC) environments to manage complex simulation and analyses, often consuming and generating large amounts of data. However, workflow tools have limited support for managing the input, output and intermediate data. The data elements of a workflow are often managed by the user through scripts or other ad-hoc mechanisms. Technology advances for future HPC systems is redefining the memory and storage subsystem by introducing additional tiers to improve the I/O performance of data-intensive applications. These architectural changes introduce additional complexities to managing data for scientific workflows. Thus, we need to manage the scientific workflow data across the tiered storage system on HPC machines. In this paper, we present the design and implementation of MaDaTS (Managing Data on Tiered Storage for Scientific Workflows), a software architecture that manages data for scientific workflows. We introduce Virtual Data Space (VDS), an abstraction of the data in a workflow that hides the complexities of the underlying storage system while allowing users to control data management strategies. We evaluate the data management strategies with real scientific and synthetic workflows, and demonstrate the capabilities of MaDaTS. Our experiments demonstrate the flexibility, performance and scalability gains of MaDaTS as compared to the traditional approach of managing data in scientific workflows.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078611" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07b381e7f6bba30abbc344963c3010d145f30e65", "sources": [ "DBLP" ], "title": "MaDaTS: Managing Data on Tiered Storage for Scientific Workflows", "venue": "HPDC", "year": 2017 }, "07baaa2dad2fccb816073383ed32e46bab91c961": { "authors": [ { "ids": [ "2880213" ], "name": "Edgar Solomonik" }, { "ids": [ "1737715" ], "name": "Grey Ballard" }, { "ids": [ "1700326" ], "name": "James Demmel" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1145/3087556.3087561", "doiUrl": "https://doi.org/10.1145/3087556.3087561", "entities": [ "Algorithm", "Bulk synchronous parallel", "Central processing unit", "Computation", "Inter-process communication", "Parallel algorithm", "QR decomposition" ], "id": "07baaa2dad2fccb816073383ed32e46bab91c961", "inCitations": [ "41875eaea7cb58024b1bd46f9d9df80d19208e6b", "4e4e3c65b6e66e76dc0396ba61beffd0338f21af", "7ce2b2403f96e5cb0da2c3d2d7e0ce5231f353b3" ], "journalName": "", "journalPages": "111-121", "journalVolume": "", "outCitations": [ "70f1197b71aab1617e3c12cd61ee9977bd475c57", "a2b6876b05d434f3f349f239fefe4517abd33ea7", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "76d97e8cae2f5a2d660c294eb2a34faa493175a3", "b582d4a005c3288858eb3910e9233edb35323f49", "0b66d1a2303abc265e807188aa18681c971a4e19", "1aa8ad634d1879af9b5ac34b44ecc3de8debd276", "3e58c1263047ccc126ca0c06dcb150ff9d172512", "c29f7d257249a7ca1f769889458377c1a46e6b86", "bf980d3cc50ae14ce104207882ee1fbbadf7a5f1", "077ef5b142ac146a7013296e1f3f643c3fc8fc84", "081300c4313163cd8f4a520494beeb90791fbb2e", "00935127596629d7943d87d25adef9369a12814d", "4242147d4d83e8e9a09e793c733feee57557d37d", "04373d13bedbf3c4276a8b3b86311a1bff99db75", "ec1c01a3702aa5a32e8054769b7e6e16148b5609", "1fcaa630c6ae6a7d28ead7d8906f3bf682ccb680", "84ba025c6b28617241274699dccd9e5308fba766", "3e69317455f7db9b1325239c6f6f52cbe29a5491", "ad5e1e6c5b48f7f2cdafe306fbcac55b0be755f0", "364259d1ed0aa3ed40f3de21d8636a300dc88bad", "c846f0a7570ca3f5762328b665b821e5b251fd71", "6c24fa80068fa70f3328e7a1eeec9b8642a450f8", "f6430121b2af7d55b090a1c260570630e6cf1f41", "04d69fd2ad166a3305a0a0b373356945397dc2d0", "dd33bb8de5c88ffd5d71f550c003b0a6ec6440a2", "21e64df8919e85ee0f3238682160d5dd6ac012ab", "0de8fbe5e01d4045cb1fa15fef44cab94deb9b36", "03880f1d3faedb37aa51deab3b70a98b939dba28", "f5ad4d559762b0d51a3e7cbc4a170dc9bbaa40f5", "b7bb051c2376345f5c5e80f165b15f2f2e68ecc9", "2049b8090f539defa8facd7413a35ae3fac00fb8", "253402be4173c31f09b74007c3024518fa1c06fe", "8a269f794c54b62d81ba76d23aaa4bdf12301ec8" ], "paperAbstract": "Many large-scale scientific computations require eigenvalue solvers in a scaling regime where efficiency is limited by data movement. We introduce a parallel algorithm for computing the eigenvalues of a dense symmetric matrix, which performs asymptotically less communication than previously known approaches. We provide analysis in the Bulk Synchronous Parallel (BSP) model with additional consideration for communication between a local memory and cache. Given sufficient memory to store c copies of the symmetric matrix, our algorithm requires \\Theta(\\sqrt{c}) less interprocessor communication than previously known algorithms, for any c\\leq p^{1/3} when using p processors. The algorithm first reduces the dense symmetric matrix to a banded matrix with the same eigenvalues. Subsequently, the algorithm employs successive reduction to O(\\log p) thinner banded matrices. We employ two new parallel algorithms that achieve lower communication costs for the full-to-band and band-to-band reductions. Both of these algorithms leverage a novel QR factorization algorithm for rectangular matrices.", "pdfUrls": [ "http://arxiv.org/pdf/1604.03703v1.pdf", "http://doi.acm.org/10.1145/3087556.3087561", "http://arxiv.org/abs/1604.03703", "http://solomon2.cs.illinois.edu/talks/spaa-jul-2017.pdf", "http://solomon2.cs.illinois.edu/talks/householder-vtech-jun-2017.pdf", "https://arxiv.org/pdf/1604.03703v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07baaa2dad2fccb816073383ed32e46bab91c961", "sources": [ "DBLP" ], "title": "A Communication-Avoiding Parallel Algorithm for the Symmetric Eigenvalue Problem", "venue": "SPAA", "year": 2017 }, "07c6107c3e38c49729799054ca24586c905caa67": { "authors": [ { "ids": [ "1950072" ], "name": "Linpeng Tang" }, { "ids": [ "1708020" ], "name": "Qi Huang" }, { "ids": [ "39754100" ], "name": "Amit Puntambekar" }, { "ids": [ "3295331" ], "name": "Ymir Vigfusson" }, { "ids": [ "2665531" ], "name": "Wyatt Lloyd" }, { "ids": [ "39463614" ], "name": "Kai Li" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Heuristic", "Overhead (computing)", "Overhead projector", "Scalability", "Streaming media" ], "id": "07c6107c3e38c49729799054ca24586c905caa67", "inCitations": [ "193342874858249aed4796cee35a8bec1b70e236", "8451e889fe6bad691fcee03213a9c3790a5f66ee", "957e98a2084f6c2d22694aadd22f57070b5d7e23" ], "journalName": "", "journalPages": "111-123", "journalVolume": "", "outCitations": [ "3ea8a292ac995a44d64e080209164a66619ea3d2", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "037c6a50a4a7cae1998d944ae2991c986731912a", "3c1eb617625560e8ed24c846f7e767fc3900e988", "206fc8c3c2277402cacedb9581014cfaf6aca084", "033190c9caee87309724308aea87d675c2efa070", "19c3fcffda8e6e5870b3a533c483bca024501ab5", "82783e26dcc371ddf311084b095382f7058ddba9", "0edebf74c00dad0ea93a7ce8fc412f0c02c478c8", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "0860bc34aac8a304674aa4c205ff46e6dbc93295", "1073c29baf6d9cfc7b7b028024b58ed070696da3", "c53dd57bbbe164058bdcefa820e21da420479d47", "396514fb219879a4a18762cddfae2a6a607f439f", "4b155455a56bb0fb229d23e8ddcafdae263c9f65", "5bf660501e1c3fbc933ac490eef07275e328fe3f", "119a67a62b0e9351e4348d234d9eae4c84f366a3", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "3168681722207c86827e596860115a2977ce761f", "1d287f70305f3127ed9901e3efcbbef1ab2cea1f", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "3b13533495ec04b7b263d9bdf82372959c9d87e6", "7068187c2c0f3804a9adcb399131da56ad8fcde5", "275f66e845043217d5c37328b5e71a178302469f", "c08206b44dd1f0ea54bd073e4effaf2e4483169b", "65fd142f37c315cdf892184f8fb21281b88f6269", "09088515673d1d44919bd654c9829c7ae7822170", "56893647902b4ab971fd092ce78687675b6942a7", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "3ff93ff1ccbfce995067a4617d73ea30115318b6", "5caff120fcd0310acaba9408e2d995e936fa2d4e", "eacf1cf7a6d605e5686c145692231d40a4ba72b2", "2ddf948e97eb79b3fba3940e05454ccbe64d01a5", "8558857f33b4a7df03d682f83b055890043c0fa0" ], "paperAbstract": "Streaming video algorithms dynamically select between different versions of a video to deliver the highest quality version that can be viewed without buffering over the client\u2019s connection. To improve the quality for viewers, the backing video service can generate more and/or better versions, but at a significant computational overhead. Processing all videos uploaded to Facebook in the most intensive way would require a prohibitively large cluster. Facebook\u2019s video popularity distribution is highly skewed, however, with analysis on sampled videos showing 1% of them accounting for 83% of the total watch time by users. Thus, if we can predict the future popularity of videos, we can focus the intensive processing on those videos that improve the quality of the most watch time. To address this challenge, we designed Chess, the first popularity prediction algorithm that is both scalable and accurate. Chess is scalable because, unlike the state-ofthe-art approaches, it requires only constant space per video, enabling it to handle Facebook\u2019s video workload. Chess is accurate because it delivers superior predictions using a combination of historical access patterns with social signals in a unified online learning framework. We have built a video prediction service, ChessVPS, using our new algorithm that can handle Facebook\u2019s workload with only four machines. We find that re-encoding popular videos predicted byChessVPS enables a higher percentage of total user watch time to benefit from intensive encoding, with less overhead than a recent production heuristic, e.g., 80% of watch time with one-third as much overhead.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/tang", "http://www.cs.princeton.edu/~wlloyd/papers/chess-atc17-talk-public.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-tang.pdf", "http://www.cs.princeton.edu/~linpengt/papers/chess-atc17.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_tang.pdf", "http://www.cs.princeton.edu/~wlloyd/papers/chess-atc17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f942/d66e6f2f4d618c9017965aeceb865b55c51a.pdf", "s2Url": "https://semanticscholar.org/paper/07c6107c3e38c49729799054ca24586c905caa67", "sources": [ "DBLP" ], "title": "Popularity Prediction of Facebook Videos for Higher Quality Streaming", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "07d40084599302c18ef4498ac18e90162098b146": { "authors": [ { "ids": [ "1961095" ], "name": "Amy Ousterhout" }, { "ids": [ "21653817" ], "name": "Jonathan Perry" }, { "ids": [ "1712771" ], "name": "Hari Balakrishnan" }, { "ids": [ "9759285" ], "name": "Petr Lapukhov" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "C++", "Centralisation", "Chipset", "Convex hull", "Emulator", "Experiment", "Explicit Congestion Notification", "Forwarding plane", "Gigabit", "High- and low-level", "High-level programming language", "Multi-core processor", "Operating system", "Router (computing)", "TRAVERSE", "Throughput" ], "id": "07d40084599302c18ef4498ac18e90162098b146", "inCitations": [ "b74d2874646d36b36eee5c836adf6b29d9173425" ], "journalName": "", "journalPages": "438-451", "journalVolume": "", "outCitations": [ "3b988049dd8f62f772281e90196bbd793700c86b", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "1aafc7066e52f18dee78103822da24a5d85da93c", "39300a6bb64f813bd233343b840cb169d8d0527f", "4a098868891474ba73ea338e6f56033d1827d216", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "de17cf40a4db13315c631c597959ae26f691f2fa", "14c84514d25336223473290fe7c13ad66a68ef64", "0b4e7e7a67778cebc846dfe848302dff57bf613d", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "06db78ece7ba41bccab5df77240541e32cffd623", "00afe47832bc165dcc3744e33550252d997286c3", "0586f39a5280d49e62b49838c229dcb37d105994", "2fa434d3310c478f589d8ff9f2ac89e995bf9ab9", "00ddc85d502aa4bdc45a3b8b9099fad75938b50a", "2077579d62fc090d4ddf45f107ffae0468936165", "122229239aeba1eb4f1623adb40f1845c582a520", "274b913658674eb107f9edc0714937fd5f1fcbcf", "531957a3e9e47f1993e99bab2391cd828393e2d2", "764d7de61421968d6b477f0c055d72dcb0893544", "62ab98f05621ed6bc174337d1cbf7065350776f3", "ad308f3480ad25452398ea39edb5e9ebeb3454f7", "22bb3ad75755e046bfd051d41ad7f8c1dbed2e2d", "69766466cb19395064bc556af78b19cd2583041d", "025652412d507a8cf98ecacd8a44d32ce28995e1", "19d97ac569fb27bc5fc71deca04c36a64278c99d", "9edfe7c6166d08eaf0b7dd865537e2c1c0ed082a", "11040f24714857941c569df70b21c4c8655e074a", "4e4f4b3d04f1e1ec2b3a18318d9a42886b10ad25", "0e2249e3b0cd1fa9a7e0eee847b58be1cf2ec707", "1dea114401f53d188fea7f528ff3068db80e71b6", "25f855c968af75e4617f25c71aee3cedec1dedaf", "035e903cc8946617cf1b3b69d9093f7052caa71c", "4d9adb7a49d468800f9315ef971fdedb4b607b7c", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "aad6cb88b6802a11b921b0dccc429caec7bf163b", "034b937edbff280dfdd7b2e98639655fd3587402", "3963ee1d7f31625cdc7db4489e7970c6b4d32324", "177d039a925fcf384ba868d65b6449746726b127", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "089b10645ee63cd9c5bb4ab661141dd813408e15", "0b630b9afeb3d69645845c6cc124cb08d3d85a62", "094aca6103f4079521e6a596d099ed37f7d2b498", "2b73665da8fa582422997321d832803b6cbe9078", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "940563def60eb2e29d1cddf20c5576cd93bbd421", "114e68ae77738097ba690499dccffe817da1b839" ], "paperAbstract": "Flexplane enables users to program data plane algorithms and conduct experiments that run real application traffic over them at hardware line rates. Flexplane explores an intermediate point in the design space between past work on software routers and emerging work on programmable hardware chipsets. Like software routers, Flexplane enables users to express resource management schemes in a high-level language (C++), but unlike software routers, Flexplane runs at close to hardware line rates. To achieve these two goals, a centralized emulator faithfully emulates, in real-time on a multi-core machine, the desired data plane algorithms with very succinct representations of the original packets. Real packets traverse the network when notified by the emulator, sharing the same fate and relative delays as their emulated counterparts. Flexplane accurately predicts the behavior of several network schemes such as RED and DCTCP, sustains aggregate throughput of up to 760 Gbits/s on a 10-core machine (\u21e1 20\u21e5 faster than software routers), and enables experiments with real-world operating systems and applications (e.g., Spark) running on diverse network schemes at line rate, including those such as HULL and pFabric that are not available in hardware today.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-ousterhout.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-ousterhout.pdf", "http://inat.lcs.mit.edu/papers/Flexplane-NSDI-2017.pdf", "http://wind.lcs.mit.edu/papers/Flexplane-NSDI-2017.pdf", "http://people.csail.mit.edu/aousterh/papers/flexplane_nsdi17.pdf", "http://nms.lcs.mit.edu/papers/Flexplane-NSDI-2017.pdf", "http://nms.csail.mit.edu/papers/Flexplane-NSDI-2017.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/ousterhout" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8613/dda8d2703249fbdb13bde1c4a68ea6a52ada.pdf", "s2Url": "https://semanticscholar.org/paper/07d40084599302c18ef4498ac18e90162098b146", "sources": [ "DBLP" ], "title": "Flexplane: An Experimentation Platform for Resource Management in Datacenters", "venue": "NSDI", "year": 2017 }, "07d639436125ee204202cb0b34a081cde31e5214": { "authors": [ { "ids": [ "2512621" ], "name": "Hongzi Mao" }, { "ids": [ "2846332" ], "name": "Ravi Netravali" }, { "ids": [ "2587719" ], "name": "Mohammad Alizadeh" } ], "doi": "10.1145/3098822.3098843", "doiUrl": "https://doi.org/10.1145/3098822.3098843", "entities": [ "Algorithm", "Artificial neural network", "Client-side", "Deployment environment", "Experiment", "Network model", "Reinforcement learning", "Streaming media" ], "id": "07d639436125ee204202cb0b34a081cde31e5214", "inCitations": [ "9d6c90fffb8c5df8a842a9ec4a6fbc50c288ffd8", "38f79fad9d6c54f1f491745517ac8973cfd57d44", "e8645f896e5b1f6bbad665a86af8437978dd58c8", "3fb13b740e5112ff5622cf720837a3dcf34d0f59", "4f529a014523fc1a54c03f781d49309f847bbfea", "d8b2706be5b6bb9f20dd22e903104da3720a30ef", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "7dc1b9de7e1be394648542c01e4d2e4df6ef4890", "7c4a7c37102f5cf97137c27181dcb7f24d3f32ef", "b6d48757202bf1afe537d438b2993cef69a83c01" ], "journalName": "", "journalPages": "197-210", "journalVolume": "", "outCitations": [ "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "0af8dc481a9130e443f512e5db14d4ceda7bd3b3", "f33f82a7d0b5533c66b448ac1e659b71073ea087", "1ab7aa767e1779c87d822325859e47fe2986e6b2", "add7b8b65355d5408a1ffb93a94b0ae688806bc4", "160315c07d18fe785aff07f50c9e44319a0af0cb", "2758d7e4d03b5b61651b9c09468723e167492969", "2b4373169bab1114d5bde52c269d392b74225fb9", "162da047573285a9527c3ece7679639537dbe97c", "2c15efe06e3c4ad9d92263235731823a6ff25ac2", "02160860e66ac561530f7339ad343781a055147b", "4954fa180728932959997a4768411ff9136aac81", "48289b7d57e43bb5a001c334cf96694e933f7001", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "0c4867f11c9758014d591381d8b397a1d38b04a7", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "650759045a1a28a977f42b219fcbe12394c296f5", "091578afc211f9c871c1af90b06cdcb8dadd37fd", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "aade3f6465effed89841bdc9e30291e2952958ea", "7d597067088ffdd474c1d7e4e72bf967e16a4bd1", "6f226a1578ae646ca6fc414a08b399568ebec0b4", "9f1f065bf08cd90431cc051267a708f56436cd82", "049c6e5736313374c6e594c34b9be89a3a09dced", "ea9d2a2b4ce11aaf85136840c65f3bc9c03ab649", "1a50e63ebf99d47e69175e16db9c621f9c558e1a", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "13d05af402b54c310237d4deba45e94d80c76d4b", "ad1c26f2fa0a59bd4c33ee2bb65e0c6f4847eb72", "5bf660501e1c3fbc933ac490eef07275e328fe3f", "0233f4e12db890dac1d06b5593c3ae7205d721a6", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "56893647902b4ab971fd092ce78687675b6942a7", "846b97f0dff3be0235cbc698cd861fa5281d6806", "3017bb41f18096e34eea94329834f6f8b9372be8", "41758f25a78f4223fefb7ac00cc70a9e6ba949af", "2ddf948e97eb79b3fba3940e05454ccbe64d01a5", "5e279a183435995cbafb09d87365c0e5c9103235" ], "paperAbstract": "Client-side video players employ adaptive bitrate (ABR) algorithms to optimize user quality of experience (QoE). Despite the abundance of recently proposed schemes, state-of-the-art ABR algorithms suffer from a key limitation: they use fixed control rules based on simplified or inaccurate models of the deployment environment. As a result, existing schemes inevitably fail to achieve optimal performance across a broad set of network conditions and QoE objectives.\n We propose Pensieve, a system that generates ABR algorithms using reinforcement learning (RL). Pensieve trains a neural network model that selects bitrates for future video chunks based on observations collected by client video players. Pensieve does not rely on pre-programmed models or assumptions about the environment. Instead, it learns to make ABR decisions solely through observations of the resulting performance of past decisions. As a result, Pensieve automatically learns ABR algorithms that adapt to a wide range of environments and QoE metrics. We compare Pensieve to state-of-the-art ABR algorithms using trace-driven and real world experiments spanning a wide variety of network conditions, QoE metrics, and video properties. In all considered scenarios, Pensieve outperforms the best state-of-the-art scheme, with improvements in average QoE of 12%--25%. Pensieve also generalizes well, outperforming existing schemes even on networks for which it was not explicitly trained.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098843", "http://web.mit.edu/pensieve/content/pensieve-tech-report.pdf", "http://people.csail.mit.edu/alizadeh/papers/pensieve-sigcomm17.pdf", "http://web.mit.edu/ravinet/www/pensieve.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07d639436125ee204202cb0b34a081cde31e5214", "sources": [ "DBLP" ], "title": "Neural Adaptive Video Streaming with Pensieve", "venue": "SIGCOMM", "year": 2017 }, "07db449c676fa6eb65890b9b3c87e47e13fb2ceb": { "authors": [ { "ids": [ "1682058" ], "name": "Hao Zhang" }, { "ids": [ "34418171" ], "name": "Zeyu Zheng" }, { "ids": [ "1704538" ], "name": "Shizhen Xu" }, { "ids": [ "1727493" ], "name": "Wei Dai" }, { "ids": [ "1707357" ], "name": "Qirong Ho" }, { "ids": [ "40250403" ], "name": "Xiaodan Liang" }, { "ids": [ "2749311" ], "name": "Zhiting Hu" }, { "ids": [ "1766143" ], "name": "Jinliang Wei" }, { "ids": [ "40526720" ], "name": "Pengtao Xie" }, { "ids": [ "1752601" ], "name": "Eric P. Xing" } ], "doi": "", "doiUrl": "", "entities": [ "Byte", "Central processing unit", "Computation", "Computer vision", "Deep learning", "GPU cluster", "Graphics processing unit", "Open-source software", "TensorFlow", "Throughput" ], "id": "07db449c676fa6eb65890b9b3c87e47e13fb2ceb", "inCitations": [ "fa0412fc819fce2468a65b65a2820247c2776760", "68ab2dbddc0eded5711bec34f54f362a9a861ae7", "d0556be65e8564ab8bb3e26b6a0146a62027bc40", "48231ac69e8d17ce08a2868b27d1a9b08f99be83", "466f9f9c4a63c0fbc337637a1619e3411ea14c59", "42fab5ddea4fec117b7688aa7f18ecaf392975f8", "3d80f420b87bf16eabac6142275e71bf48aa61a5", "5a8cd841f59a68c948c7aa05359c7df32dbc8d5c", "1ae0e2ee356839f83756617040e1868984df6c0c", "5d29283e0f977cdacff94dca4df5d9c6dc7ac847" ], "journalName": "", "journalPages": "181-193", "journalVolume": "", "outCitations": [ "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "235fa2b1983eff9f13b27c620cda389359126bf4", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "32192d744d86e7cde73f0c9aa773214f88619a9e", "8dc550724c004daaa5a12ddf02ab06b54303eb7b", "5d90f06bb70a0a3dced62413346235c02b1aa086", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "3439a127e45fb763881f03ef3ec735a1db0e0ccc", "3769644c21140977abf85317e1c75780075fbcd4", "12c09be5b8a5775440b0785e3f31aff1f7c4316c", "47ae44f066a4e98a9f59c7e7eb94babb011bd5a8", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "6772164c3dd4ff6e71ba58c5c4c22fa092b9fe55", "2b3113b7fda6414548e88fc664f3be96d5209830", "8866824806018b89fe373c01474daa3744c9db7d", "01fcae344d2edb715bcc63a40b6052c0331741bd", "bb2b45a0e650ca87590cfa3df93066eecf4e54f6", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "061356704ec86334dbbc073985375fe13cd39088", "3f1c1427b175140e7f725a155096a4e73c1b8509", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "0626908dd710b91aece1a81f4ca0635f23fc47f3", "080aebd2cc1019f17e78496354c37195560b0697", "12078fd9bee79fd2e9fae055c4cc33db382272af", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "4afa6c2eb552ceef0e396fbfe449932492873034", "90efc90cfeab53f1bc7495609771e91671560489", "4954fa180728932959997a4768411ff9136aac81" ], "paperAbstract": "Deep learning models can take weeks to train on a single GPU-equipped machine, necessitating scaling out DL training to a GPU-cluster. However, current distributed DL implementations can scale poorly due to substantial parameter synchronization over the network, because the high throughput of GPUs allows more data batches to be processed per unit time than CPUs, leading to more frequent network synchronization. We present Poseidon, an efficient communication architecture for distributed DL on GPUs. Poseidon exploits the layered model structures in DL programs to overlap communication and computation, reducing bursty network communication. Moreover, Poseidon uses a hybrid communication scheme that optimizes the number of bytes required to synchronize each layer, according to layer properties and the number of machines. We show that Poseidon is applicable to different DL frameworks by plugging Poseidon into Caffe and TensorFlow. We show that Poseidon enables Caffe and TensorFlow to achieve 15.5x speed-up on 16 single-GPU machines, even with limited bandwidth (10GbE) and the challenging VGG19-22K network for image classification. Moreover, Poseidon-enabled TensorFlow achieves 31.5x speed-up with 32 single-GPU machines on Inception-V3, a 50% improvement over the open-source TensorFlow (20x speed-up).", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-zhang.pdf", "https://arxiv.org/pdf/1706.03292v1.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/zhang", "http://arxiv.org/abs/1706.03292", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_zhang_hao.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c371/45669be8e7f14f4cdd5ddc3935ea03a54673.pdf", "s2Url": "https://semanticscholar.org/paper/07db449c676fa6eb65890b9b3c87e47e13fb2ceb", "sources": [ "DBLP" ], "title": "Poseidon: An Efficient Communication Architecture for Distributed Deep Learning on GPU Clusters", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "07db4d7b141081644b9cbb3e6d1f34c1bc80db24": { "authors": [ { "ids": [ "3408002" ], "name": "Maria Apostolaki" }, { "ids": [ "1782535" ], "name": "Aviv Zohar" }, { "ids": [ "2562998" ], "name": "Laurent Vanbever" } ], "doi": "10.1109/SP.2017.29", "doiUrl": "https://doi.org/10.1109/SP.2017.29", "entities": [ "Autonomous system (Internet)", "Bitcoin", "Border Gateway Protocol", "Centralisation", "Countermeasure (computer)", "Cryptocurrency", "Double-spending", "Multihoming", "Routing", "Telephone number" ], "id": "07db4d7b141081644b9cbb3e6d1f34c1bc80db24", "inCitations": [ "d4275c9c9a2a715d2459f5121b59bdcd25146f31", "1969453d7960eaca8cfbd642877925f5f5028ce5", "7c4ffeb9635ff7023b782721ae7de72b65d7fd84", "8c383de05682e4c96dd4407b5b99bf163063f8e3", "c2039c2cf4e0c1222a1a2f964c73858093e47890", "bdd7454cdbf10b58cdbdf81c751ebf075126bfb4", "57981075a1d050e11ceb9c57957c1165dfe7ee76", "58c0217720b83732daa5040512e1b621c491ecd8", "3ef1beabe87c75fe1a395040d0f75a9d75eab385", "4248943bb3ca08c999c1d6322aa9a1d3616b1193", "14f419e9f3b15d87426a57616d00f3783eda5b64" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "375-392", "journalVolume": "", "outCitations": [ "23208b2f513af5c3d7b40f0826ed367da77396c9", "34e4016ad6362dc1f85c4a755b2b47c4795fbe4d", "2751d9c57f5e962166153cd328dd3e0919b78003", "686886ae930d50f469ccdab43c33cd88acad0f00", "2b384bfee51e0897d4b1d98c4e9a815fd7560d33", "35fe18606529d82ce3fc90961dd6813c92713b3c", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "356e0eece5bbd700ea3c388af8ea3e088baf7c6e", "2077579d62fc090d4ddf45f107ffae0468936165", "2f7bb6613154e1b3580c0114bf2cfb3c8ceb477e", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "1ccef9fa75e519daa10618fe9f2d7a46a34a7040", "40a98bed1d10248d30e86304315df07280dad93e", "084da7c90567476907522d91d22a8a8a6f818447", "10ca7363cfd3c07c76ab183dfd4e007b73bb60c3", "0469044c24ac4897fcffc3c7db6a2d9c7fd08848", "b783208e05649f9f636d7f9f89ad93c820493a78", "b1bd89d8c0fdf7beef57add2068cae20ac1b7bfe", "12d854f326b43232d906eb323db5d282786acb9d", "4715401473dca02ebaa5bdd4d4003705ed91c380", "1d4abd83093f1343ee1f5b8ffb3c5999e3754c90", "476b20ee2c8d0da7211afb245fd30ea39464f5e9", "5e86853f533c88a1996455d955a2e20ac47b3878", "b4925bd986101dbc87dc9c4661326b2a357e8fa3", "1bd47b4fcb605960e87a2890e5e724694f63d059", "728b60c04afb5b87853b59265e49f430dbf631db", "566333376dd1af014555b4cf82cda42c22501013", "4f7d8cf8857ccc0f4a47ba03ec56f25fe5a0af57", "23b961a2c7aeb2824a0ae429916f107e4cecc4f9", "603457793cf9a6056f6597420c838210342a3d2b", "306a2933e91fdf8971bd160c5bbe365e48ec7fdd", "2465de3a5786e5f786879678bc180a5d67e20b8d" ], "paperAbstract": "As the most successful cryptocurrency to date, Bitcoin constitutes a target of choice for attackers. While many attack vectors have already been uncovered, one important vector has been left out though: attacking the currency via the Internet routing infrastructure itself. Indeed, by manipulating routing advertisements (BGP hijacks) or by naturally intercepting traffic, Autonomous Systems (ASes) can intercept and manipulate a large fraction of Bitcoin traffic.This paper presents the first taxonomy of routing attacks and their impact on Bitcoin, considering both small-scale attacks, targeting individual nodes, and large-scale attacks, targeting the network as a whole. While challenging, we show that two key properties make routing attacks practical: (i) the efficiency of routing manipulation; and (ii) the significant centralization of Bitcoin in terms of mining and routing. Specifically, we find that any network attacker can hijack few (<100) BGP prefixes to isolate ∼50% of the mining power—even when considering that mining pools are heavily multi-homed. We also show that on-path network attackers can considerably slow down block propagation by interfering with few key Bitcoin messages.We demonstrate the feasibility of each attack against the deployed Bitcoin software. We also quantify their effectiveness on the current Bitcoin topology using data collected from a Bitcoin supernode combined with BGP routing data. The potential damage to Bitcoin is worrying. By isolating parts of the network or delaying block propagation, attackers can cause a significant amount of mining power to be wasted, leading to revenue losses and enabling a wide range of exploits such as double spending. To prevent such effects in practice, we provide both short and long-term countermeasures, some of which can be deployed immediately.", "pdfUrls": [ "https://arxiv.org/pdf/1605.07524v1.pdf", "http://arxiv.org/pdf/1605.07524v1.pdf", "http://diyhpl.us/~bryan/papers2/bitcoin/Hijacking%20Bitcoin:%20Large-scale%20Network%20Attacks%20on%20Cryptocurrencies%20-%202016.pdf", "http://vanbever.eu/pdfs/vanbever_bitcoin_arxiv_2016.pdf", "https://arxiv.org/pdf/1605.07524v2.pdf", "http://diyhpl.us/~bryan/papers2/bitcoin/Hijacking%20Bitcoin:%20Routing%20attacks%20on%20cryptocurrencies%20-%202017.pdf", "https://doi.org/10.1109/SP.2017.29" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/07db4d7b141081644b9cbb3e6d1f34c1bc80db24", "sources": [ "DBLP" ], "title": "Hijacking Bitcoin: Routing Attacks on Cryptocurrencies", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0822500c8f7246f5abb6a57836e946d564a1f5ea": { "authors": [ { "ids": [ "31960069" ], "name": "Seyyed Ahmad Javadi" }, { "ids": [ "2044504" ], "name": "Anshul Gandhi" } ], "doi": "10.1109/ICAC.2017.17", "doiUrl": "https://doi.org/10.1109/ICAC.2017.17", "entities": [ "Agile software development", "Amazon Web Services", "Cloud computing", "Colocation centre", "Interference (communication)", "Load balancing (computing)", "OpenVMS", "Requirement", "Scheduling (computing)", "Web application" ], "id": "0822500c8f7246f5abb6a57836e946d564a1f5ea", "inCitations": [ "796b61d401037a9c5c91d7f0d3bd62697acbb208", "344be5b5d99f420e29bd80ea288ff0f573d8cbe3" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "135-144", "journalVolume": "", "outCitations": [ "01fbd0dd3229278905b9a124aaf20280c64f56bc", "5848da5058fed3b97bfd801ca19e5265f489abfe", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "277f20ddc0e9fa593753ef2778110508372c597f", "46678493e77e5176d73d14393668247003e8c63d", "6d8c9fcce8177d6f8d122d653c7d32d7624d6714", "1ff2a26cf246fc7c390e907426fb2bce8026bb38", "440f3e59fde1fde9868bc4a0e8fa9132050ce89c", "443b8c56d7300f61b825d1dbafe06afdda23c3e1", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "f24b702c16849ba88da1df30cc3f1e126a487ede", "1ecd36058e48734213c81728f42ff798a2c52833", "3000e77ed7282d9fb27216f3e862a3769119d89e", "5233d7195acccd2681f20b9f60e9f12ec1cbba70", "9c96514250c4a35deba5ae3ffb93e9731fe23a79", "a1c324efaa2bc070ff7d44a3a81f6eaf98e115c1", "1eb845e672abc3e172725639eece560c3cd5ec2a", "490d862480cf30949dce90e832aa292c498ac768", "18ec39ea5f91ff19ec516816da9228dbc5bdc6b1", "5e1a1b6f70ceadc4a7ca3378e9fced99db711dee", "6b31216d77f6ef706f5cc5d11933b65bd71ae043", "20ca2f3f5f80181ed0db3c8dcbac98b55727b451", "11da3b399626aa7a65a27a2f16451421b2843b56", "4c767b5bef5f2d6d22ed2b342852cb3743326e29", "316486bada6023816c785c0d4eb401658737be3f", "47db5a1f12720c47d24b3748801cac0ca61a8e92", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "6c5bb5148a9ba089cb9ba694fab05b52ef2663f0" ], "paperAbstract": "Many online application services are now provided by cloud-deployed VM clusters. Although economical, VMs in the cloud are prone to interference due to contention for physical resources among colocated users. Worse, this interference is dynamic and unpredictable. Current provider-centric solutions are application-oblivious and are thus not always aware of the user's SLO requirements or application bottlenecks. Further, such solutions rely on VM scheduling and migration, approaches that are not agile enough to mitigate volatile interference.This paper presents DIAL, an interference-aware load balancer that can be employed by cloud users without requiring any assistance from the provider. DIAL addresses timevarying interference by dynamically shifting load away from compromised VMs without violating the application's tail latency SLOs. The key idea behind DIAL is to infer the demand for contended resources on the physical hosts, which is otherwise hidden from users. Estimates of the colocated load are then used to drive the load distribution for the application VMs. Our experimental results on OpenStack and AWS clouds show that DIAL can reduce application tail latencies by as much as 70% and 48% compared to interference-oblivious and existing interference-aware load balancers, respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.17", "http://www3.cs.stonybrook.edu/~anshul/icac17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0822500c8f7246f5abb6a57836e946d564a1f5ea", "sources": [ "DBLP" ], "title": "DIAL: Reducing Tail Latencies for Cloud Applications via Dynamic Interference-aware Load Balancing", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "082b402e28248a2a6bf8cf45dff08cdef3b71fe6": { "authors": [ { "ids": [ "39447323" ], "name": "Mustafa Emre Acer" }, { "ids": [ "12016397" ], "name": "Emily Stark" }, { "ids": [ "2838694" ], "name": "Adrienne Porter Felt" }, { "ids": [ "2200198" ], "name": "Sascha Fahl" }, { "ids": [ "3453215" ], "name": "Radhika Bhargava" }, { "ids": [ "40073324" ], "name": "Bhanu Dev" }, { "ids": [ "39690668" ], "name": "Matt Braithwaite" }, { "ids": [ "26933016" ], "name": "Ryan Sleevi" }, { "ids": [ "2230201" ], "name": "Parisa Tabriz" } ], "doi": "10.1145/3133956.3134007", "doiUrl": "https://doi.org/10.1145/3133956.3134007", "entities": [ "Client-side", "Google Chrome", "HTTPS", "Server (computing)" ], "id": "082b402e28248a2a6bf8cf45dff08cdef3b71fe6", "inCitations": [ "4ff86de963c9bd29c92adca8205db2162e1c6745", "32187449ad863fa01597b1a857ab5dc8677769cc", "4886df0636bf95696b0a01cb7a21a480c8562bcb" ], "journalName": "", "journalPages": "1407-1420", "journalVolume": "", "outCitations": [ "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "15921484ef80b0dfb6629f6fae7b5c9b8c8877e7", "729979881bc84e3c49c382ac93d3b7b61cdc529c", "400961e2b11d8bcf5a1f56d04db24283c14a3f72", "6c5395868a818c6f414c653a30376461240bd366", "08fabacc44f1f7d3b968fa41e52e350a24e02abc", "133eea63e0a9702207dc14fdd72740d402f5748b", "07045bed2f4d3efc649f3665e84feb895f27fede", "92e7b2f90ca3020c68b93557d1f7b25da3736ebd", "4655c716f39a981830adf334769e6926e74212a6", "a41762f34955a57710a2582653f883cb14795b29", "e130fee5c7e0957c1c50f25203b795dd46f64e1b", "39ac27363c06ade948e0cc3e7797523122a19085" ], "paperAbstract": "HTTPS error warnings are supposed to alert browser users to network attacks. Unfortunately, a wide range of non-attack circumstances trigger hundreds of millions of spurious browser warnings per month. Spurious warnings frustrate users, hinder the widespread adoption of HTTPS, and undermine trust in browser warnings. We investigate the root causes of HTTPS error warnings in the field, with the goal of resolving benign errors.\n We study a sample of over 300 million errors that Google Chrome users encountered in the course of normal browsing. After manually reviewing more than 2,000 error reports, we developed automated rules to classify the top causes of HTTPS error warnings. We are able to automatically diagnose the root causes of two-thirds of error reports. To our surprise, we find that more than half of errors are caused by client-side or network issues instead of server misconfigurations. Based on these findings, we implemented more actionable warnings and other browser changes to address client-side error causes. We further propose solutions for other classes of root causes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134007", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46359.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/082b402e28248a2a6bf8cf45dff08cdef3b71fe6", "sources": [ "DBLP" ], "title": "Where the Wild Warnings Are: Root Causes of Chrome HTTPS Certificate Errors", "venue": "CCS", "year": 2017 }, "083ce125d8187fae52cfedfeaf01c3678d18ffd3": { "authors": [ { "ids": [ "5295950" ], "name": "Tarique Siddiqui" }, { "ids": [ "40585544" ], "name": "John Lee" }, { "ids": [ "2671663" ], "name": "Albert Kim" }, { "ids": [ "34265480" ], "name": "Edward Xue" }, { "ids": [ "10280425" ], "name": "Xiaofo Yu" }, { "ids": [ "7523578" ], "name": "Sean Zou" }, { "ids": [ "1912029" ], "name": "Lijin Guo" }, { "ids": [ "3382974" ], "name": "Changfeng Liu" }, { "ids": [ "4017475" ], "name": "Chaoran Wang" }, { "ids": [ "1680270" ], "name": "Karrie Karahalios" }, { "ids": [ "1801540" ], "name": "Aditya G. Parameswaran" } ], "doi": "", "doiUrl": "", "entities": [ "Data science", "Interaction", "Programmer" ], "id": "083ce125d8187fae52cfedfeaf01c3678d18ffd3", "inCitations": [ "ecf884085552b15ec49119a8cd2ac36708d39e06", "c7dbf823311ea7e06b85db2f3125b6c5ceb20975", "d6c649b8b74e69fc8be86aac640a52e7c71476a8" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "9d54c62a4d30bf38d5215024ef02aa2cf7aeb1d2", "4724a2274b957939e960e1ea4cdfb5a319ff3d63", "e9209c51a0456acc0b0564212b97f1f6ca429b6a", "04700bcc5abfad47d11b67a2d9901f01c8f0adf5", "0762f92a81f6196cb89839fdb80601345aaf5322", "2ef606258486d6c32fd0b9ca54244273c21331b9", "15d0d28a2f35ea4f8b350f49ee4c480762146ba8", "1d1608441c3c1b6106bd022727011975aa9ff30f", "713f54f167deb2dc59be154cbf858a9b94e23a84", "0cc96cb1c0ad3799e829e7109c88b1a5f14bcfdc", "3b8a4cc6bb32b50b29943ceb7248f318e589cd79", "5b7929dc88665927aefb2b231f78abce23a74433", "2962a76dc7ffb53e7fa8a6a251b07be206c86476", "4a283fe7de108d476ba8cab69acbbed907e5c4d8", "6c2e2b05e8fd6ab7a9de1ca2c88f77e1368e2c13", "418f49d4a4b58f8aa7ba610ee474420fec4f4a71", "0d22b6cab566e5dc5bdab95742ee51c8981d5422", "dc39c68a00e38f2993b450eb01c96e1d032ab850", "5a2bfaa724ba37134eb55c29644f8576c3d64c96", "5a2d20868eddd4312c19c0144f2d651dc9141163", "bd6b9bf3f19ecc79393451ca00ae8e7904e5c758", "0fe24e7cf12b20d135ab3b27f3bd29ffe3cbd10e", "4e3c1f3904d4b5404a03b6101370841f7c4798d5", "73a1116905643fad65c242c9f43e6b7fcc6b3aad", "2e8a511a67ccfbc1fd6129bef0ad46cc919e8b2a", "7387f65208daccc345f1480b4023b266f85e420b", "494574f2a629076daf33c24fb32d9324ba44144c", "318dcb851398c2543d8c82b16c8fb4e33470196e", "0479b7e8c433e3f18a2b6c5dedd328f0229c1566", "21b9065d0596a033a15c01b227a8d933885d77a2", "4c81e08c4ab02d0c7efbf4aea68265fa87d21f62", "5d37dbcead67858f972056555745041250bb1b6a", "266907e673944ff0bd4486fe6f8b4f14a3c8c9f6" ], "paperAbstract": "Data exploration and analysis, especially for non-programmers, remains a tedious and frustrating process of trial-and-error\u2014data scientists spend many hours poring through visualizations in the hope of finding those that match desired patterns. We demonstrate zenvisage, an interactive data exploration system tailored towards \u201cfastforwarding\u201d to desired trends, patterns, or insights, without much effort from the user. zenvisage\u2019s interface supports simple dragand-drop and sketch-based interactions as specification mechanisms for the exploration need, as well as an intuitive data exploration language called ZQL for more complex needs. zenvisage is being developed in collaboration with ad analysts, battery scientists, and genomic data analysts, and will be demonstrated on similar datasets.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p43-siddiqui-cidr17.pdf", "http://data-people.cs.illinois.edu/papers/zenvisage-cidr.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/083c/e125d8187fae52cfedfeaf01c3678d18ffd3.pdf", "s2Url": "https://semanticscholar.org/paper/083ce125d8187fae52cfedfeaf01c3678d18ffd3", "sources": [ "DBLP" ], "title": "Fast-Forwarding to Desired Visualizations with Zenvisage", "venue": "CIDR", "year": 2017 }, "0866accf3169ba838b77fbe8600ab0a0146e89d9": { "authors": [ { "ids": [ "2071127" ], "name": "Joseph Izraelevitz" }, { "ids": [ "2794865" ], "name": "Lingxiang Xiang" }, { "ids": [ "19882154" ], "name": "Michael L. Scott" } ], "doi": "10.1109/PACT.2017.16", "doiUrl": "https://doi.org/10.1109/PACT.2017.16", "entities": [ "Branch predictor", "HTML", "Programmer", "Side effect (computer science)", "Transactional memory" ], "id": "0866accf3169ba838b77fbe8600ab0a0146e89d9", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "79-90", "journalVolume": "", "outCitations": [ "1d9889486e2e4e553e73f7154bb73bcb3e2024c8", "0fca03c476d869660dec04fb83f54161767a4ba7", "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "1e5343aedb2117e9a211f471edd553af45e450f7", "e02ec8cd92c8e687b9e343868b07e0898302c72f", "51e8b3cfca99f4477f7267ea633adb72a442288a", "9cf4ca5d05db678f40b09d044203ab940fc62bb5", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "2f7ae6b41a97bf7dc705b4a4bd42ec37a8dc1d87", "14d5de21fd760893f9195e257b5f7e01919733e1", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "3e77a77247734dc918a5723573e1158eee1955f9", "1e365e63d5001819f11eb14e84057e8b85b4b138", "46e61ad29ab20618fb551afbc00ebb8eb4e9be21", "36165d72079a03f10e420dbca85e661e8b811534", "2ae2684f120dab4c319e30d33b33e7adf384810a", "222fafc8afce5219bd3e40c6784b6f3502e42c32", "3db65842289ba31794d504ba11ddcb8f4440f241", "3370784dacf9df1e54384190dad40b817520ba3a", "3150e68dccebd9d8e371143270f6bc3942b7d69c", "339a15240ff850110e211bb141e282a4626e7b91", "ab12cef09635b578d1c6479a2a693de8a75be2c7", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "3150af98e61952c09f70e53a3f84911291d8f440", "18084bc614fec70d920a899d39cdb32255007863", "faa97a8689d9204d4ca11e9c1188414bd0bd4bbe", "206d4ebdf93ae6c9b530efc94fa408ccca2b402f", "056aea9d5e4961533ea849f05478856a09fb367d", "40cb40b7812e019c1051e3a457a8643400b81d51", "22839816fbd337d77b81a7f3c6430324e057c250", "5798c7f1114d8db20627a8aa994ae77e005eb623" ], "paperAbstract": "Several research groups have noted that hardware transactional memory (HTM), even in the case of aborts, can have the side effect of warming up the branch predictor and caches, thereby accelerating subsequent execution. We propose to employ this side effect deliberately, in cases where execution must wait for action in another thread. In doing so, we allow "warm-up" transactions to observe inconsistent state. We must therefore ensure that they never accidentally commit. To that end, we propose that the hardware allow the program to specify, at the start of a transaction, that it should in all cases abort, even if it (accidentally) executes a commit instruction. We discuss several scenarios in which always-abort HTM (AAHTM) can be useful, and present lock and barrier implementations that employ it. We demonstrate the value of these implementations on several real-world applications, obtaining performance improvements of up to 2.5x with almost no programmer effort.", "pdfUrls": [ "http://www.cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm.pdf", "http://ftp.cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm.pdf", "http://cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm-slides", "http://ftp.cs.rochester.edu/u/jhi1/papers/2017-pact-aahtm-slides", "http://cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm.pdf", "http://ftp.cs.rochester.edu/u/jhi1/papers/2017-pact-aahtm.pdf", "http://www.cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm-slides", "http://ftp.cs.rochester.edu/u/scott/papers/2017_TRANSACT_AAHTM.pdf", "http://cs.rochester.edu/u/scott/papers/2017_TRANSACT_AAHTM.pdf", "http://ftp.cs.rochester.edu/u/jhi1/papers/2017-transact-aahtm-slides", "http://transact2017.cse.lehigh.edu/izraelevitz.pdf", "http://www.cs.rochester.edu/u/scott/papers/2017_TRANSACT_AAHTM.pdf", "http://cs.rochester.edu/u/scott/papers/2017_PACT_AAHTM.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0866accf3169ba838b77fbe8600ab0a0146e89d9", "sources": [ "DBLP" ], "title": "Performance Improvement via Always-Abort HTM", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "0869331a48cec49977a56ae489035c615a269fba": { "authors": [ { "ids": [ "2832718" ], "name": "Phani Vadrevu" }, { "ids": [ "20492581" ], "name": "Jienan Liu" }, { "ids": [ "2485552" ], "name": "Bo Li" }, { "ids": [ "2946091" ], "name": "Babak Rahbarinia" }, { "ids": [ "1766676" ], "name": "Kyu Hyung Lee" }, { "ids": [ "2822260" ], "name": "Roberto Perdisci" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Chromium (web browser)", "Clickjacking", "High availability", "Phishing", "Screenshot", "Snapshot (computer storage)", "Social engineering (security)", "Usability testing", "Web page" ], "id": "0869331a48cec49977a56ae489035c615a269fba", "inCitations": [ "7cb306cc212dc887e322ec072855b5908b5f207d" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "dcc55085c1e6d69a0c5c368809742bffb6f3ba0e", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "8b1282536760055a5e524bbeada1e3ee5ad89ac9", "2152f9f91e798c23715fdce699b6a8f0f8d43170", "2a97c9568f6bb789244d8ef463b8169433169edd", "ccf00a9032632250f8c76a25b688a697038b6ee3", "d72656b705fafbc7d0842587c97dedd7a5cd84be", "0be628988f8ee0beaad57d68cbae8b635f0b555c", "0ca4dbd0201c3ce423c5cd64d5cdb420c966b847", "5eea7073acfa8b946204ff681aca192571a1d6c2", "12b6f48f64700b829a7bb5591ff6b733fb510755", "51a23e9565756c71febb28f90a738ed4e604668d", "1cbef02d99729cd8195466a89a2cb79cb2253dff", "0d14221e3bbb1a58f115a7c7301dc4d4048be13f", "16e6dae28af1df278e1793d1fee022a4fefd260e", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "12d6cf6346f6d693b6dc3b88d176a8a7b192355c", "3c0a4576cae8da360e5f25a1cd3dba8264ea6a3a", "007733c1deac2fb8fb8175f3c99ecc4e5a5484cb", "4578afb3d3108a9064f2299b47f2f32cb94926ee", "a370b90f96f92f5b1d2e0a3725d50bf7f4b1d5ec", "1f973b2799544c6c30981d821f6db29ba5fb237e", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "928d2e2b03a7771966673047db1417b20f905bfc", "1c126c0ddc80c1fa177adb9ef32bdf84e0306846", "8133926b933e82d29766c042764ae5cac935f830", "1a7160058a87a2a7dedd2f6e95f25892ec4f3d35", "6445d5fa98fee9b66a08501b97e64bc8bfca19a8", "013a2d879fcc30e581b9f503dcc03360afbb6b15" ], "paperAbstract": "In this paper, we present ChromePic, a web browser equipped with a novel forensic engine that aims to greatly enhance the browser\u2019s logging capabilities. ChromePic\u2019s main goal is to enable a fine-grained post-mortem reconstruction and trace-back of web attacks without incurring the high overhead of record-andreplay systems. In particular, we aim to enable the reconstruction of attacks that target users and have a significant visual component, such as social engineering and phishing attacks. To this end, ChromePic records a detailed snapshot of the state of a web page, including a screenshot of how the page is rendered and a \u201cdeep\u201d DOM snapshot, at every significant interaction between the user and the page. If an attack is later suspected, these finegrained logs can be used to reconstruct the attack and trace back the sequence of steps the user followed to reach the attack page. We develop ChromePic by implementing several careful modifications and optimizations to the Chromium code base, to minimize overhead and make always-on logging practical. We then demonstrate that ChromePic can successfully capture and aid the reconstruction of attacks on users. Our evaluation includes the analysis of an in-the-wild social engineering download attack on Android, a phishing attack, and two different clickjacking attacks, as well as a user study aimed at accurately measuring the overhead introduced by our forensic engine. The experimental results show that browsing snapshots can be logged very efficiently, making the logging events practically unnoticeable to users.", "pdfUrls": [ "http://www.phanivadrevu.com/papers/chromepic.pdf", "http://cobweb.cs.uga.edu/~kyuhlee/publications/ndss17_2.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/enabling-reconstruction-attacks-users-efficient-browsing-snapshots/", "http://cobweb.cs.uga.edu/~vadrevu/papers/chromepic.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/db3b/a695d49a84073e173d7308f0e01f7dc09361.pdf", "s2Url": "https://semanticscholar.org/paper/0869331a48cec49977a56ae489035c615a269fba", "sources": [ "DBLP" ], "title": "Enabling Reconstruction of Attacks on Users via Efficient Browsing Snapshots", "venue": "NDSS", "year": 2017 }, "087184c7ee33a6497ed5130615d1731559685f05": { "authors": [ { "ids": [ "3014657" ], "name": "Vignesh Adhinarayanan" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" }, { "ids": [ "2549894" ], "name": "David H. Rogers" }, { "ids": [ "1787080" ], "name": "James P. Ahrens" }, { "ids": [ "1726678" ], "name": "Scott Pakin" } ], "doi": "10.1109/IPDPS.2017.113", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.113", "entities": [ "Experiment", "IBM WebSphere eXtreme Scale", "Simulation", "Soft systems methodology", "Supercomputer" ], "id": "087184c7ee33a6497ed5130615d1731559685f05", "inCitations": [ "b3c69039c5f9884bbaa5b76b29622e2956c6ba3f" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "978-987", "journalVolume": "", "outCitations": [ "81a3ea77ab4f2944cfd5d507dcf87f0599c079a1", "0edb8ac77c0769a92882e4654b8dd45fa092bd82", "3d257eb00ab02d54fb4005233ae9b460faf22fcf", "429d28998216da5648f40248bf4bc9e508edd2fd", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "2566acc500a8f013610d306bea7a8f548930dfed", "009342aa77a56c46a475fa85e66506219f271526", "2073266dfb3f034d55cd5a3fca62d230832afd43", "6b37f1d8c7afc971c96c78afa99ec9daef357e85", "ba49274c5be932188dad93f88b2ca75ea9acd411", "9c44e61f4762618dd78ce8355065b94235b84ae2", "5cf4b2a8552277b75f6329ef162891dd4d890830", "37344f50b98c5e8f7115d2b45de7565ee68c66d0", "cc329b309624abd3f67d37a3f7012a313947031f", "1a931c30bf6bb5c0dcc2ae4dd740cd2a68281a11", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "3ffa2f769ada734c92d0638efe2a0a7febf375bd", "44da4713fcf0a4ee7a8323737e678b3faec42d2e", "5c0bf29f29f4d02bc25d94227526103db5e2e69b", "8616bc588ec3fda14f6a559ce4e637f5b72af490", "294a76c28db636cb9eb6c9b7fd9f46b96c246bb8", "70e1cef1129793954694e0f4519441284448d938", "8bc835b7005b705b65c1506b32ddb907d6167b56", "211ab1a3a036970fe532c4b10a503545f426ae56" ], "paperAbstract": "Plans for exascale computing have identified power and energy as looming problems for simulations running at that scale. In particular, writing to disk all the data generated by these simulations is becoming prohibitively expensive due to the energy consumption of the supercomputer while it idles waiting for data to be written to permanent storage. In addition, the power cost of data movement is also steadily increasing. A solution to this problem is to write only a small fraction of the data generated while still maintaining the cognitive fidelity of the visualization. With domain scientists increasingly amenable towards adopting an in-situ framework that can identify and extract valuable data from extremely large simulation results and write them to permanent storage as compact images, a large-scale simulation will commit to disk a reduced dataset of data extracts that will be much smaller than the raw results, resulting in a savings in both power and energy. The goal of this paper is two-fold: (i) to understand the role of in-situ techniques in combating power and energy issues of extreme-scale visualization and (ii) to create a model for performance, power, energy, and storage to facilitate what-if analysis. Our experiments on a specially instrumented, dedicated 150-node cluster show that while it is difficult to achieve power savings in practice using in-situ techniques, applications can achieve significant energy savings due to shorter write times for in-situ visualization. We present a characterization of power and energy for in-situ visualization; an application-aware, architecturespecific methodology for modeling and analysis of such in-situ workflows; and results that uncover indirect power savings in visualization workflows for high-performance computing (HPC).", "pdfUrls": [ "http://synergy.cs.vt.edu/pubs/papers/adhinarayanan-characterizing-ipdps17.pdf", "https://doi.org/10.1109/IPDPS.2017.113" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/087184c7ee33a6497ed5130615d1731559685f05", "sources": [ "DBLP" ], "title": "Characterizing and Modeling Power and Energy for Extreme-Scale In-Situ Visualization", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "087767aa5daedbc7bd14289a92945d3eced61c79": { "authors": [ { "ids": [ "32105700" ], "name": "Aravinda Prasad" }, { "ids": [ "1818818" ], "name": "K. Gopinath" }, { "ids": [ "1817241" ], "name": "Paul E. McKenney" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Central processing unit", "IP fragmentation", "Lock (computer science)", "Memory footprint", "Paging", "Preemption (computing)", "Read-copy-update", "Virtual machine" ], "id": "087767aa5daedbc7bd14289a92945d3eced61c79", "inCitations": [], "journalName": "", "journalPages": "265-270", "journalVolume": "", "outCitations": [ "a7f42e2fb075e35cf1f4d61cf2e9d300a34b6a0e", "008f16f7af27244b507659be26bebb8bb07aded3", "f0bcaaa7918b1b895a82897fa93ba911787e6fe1", "09cef59336519ce93d15841bc2756a79ce13477d", "371c5cc544d758eacd08ee2e9333b1b94f26689e", "21e51da40ab080ca2b71ad36094e2b686008b6cc", "6a285b0a2243223ee6905692d79b4a8d39f5af5e", "3574657705475722b6c398c266805f758268778b", "d2b00c9737af15c148a6a871cba38b1e6f286da7", "2d40c4fe4c076abe1ceb058a0f63d01159d485c5", "ca6e70cca64c928872a8cd137515d72708b58a69", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "7c833a334df551456885bd8b55d63753afac1fae", "d5368bf4e182aedd6875ef4f81c54b2fb70131ca" ], "paperAbstract": "When synchronization primitives such as locking and read-copy update (RCU) execute within virtual machines (VMs), preemption can cause multi-second latency spikes, increasing peak memory footprint and fragmentation inside VMs, which in turn may trigger swapping or VM ballooning. The resulting CPU utilization and memory footprint increases can negate the serverconsolidation benefits of virtualization. Although preemption of lock holders in VMs has been well-studied, the corresponding solutions do not apply to RCU due to its exceedingly lightweight read-side primitives. This paper presents the first evaluation of RCU-reader preemption in a virtualized environment. Our evaluation shows 50% increase in the peak memory footprint and 155% increase in fragmentation for a microbenchmark, 23.71% increase in average kernel CPU utilization, 2.9\u00d7 increase in the CPU time to compute a grace period and 2.18\u00d7 increase in the average grace period duration for the Postmark benchmark.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-prasad.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_prasad.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/prasad" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0877/67aa5daedbc7bd14289a92945d3eced61c79.pdf", "s2Url": "https://semanticscholar.org/paper/087767aa5daedbc7bd14289a92945d3eced61c79", "sources": [ "DBLP" ], "title": "The RCU-Reader Preemption Problem in VMs", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0878137beb62b817af269c01039770745510c362": { "authors": [ { "ids": [ "2129120" ], "name": "Changyu Dong" }, { "ids": [ "3057294" ], "name": "Yilei Wang" }, { "ids": [ "24074620" ], "name": "Amjad Aldweesh" }, { "ids": [ "3036364" ], "name": "Patrick McCorry" }, { "ids": [ "39896769" ], "name": "Aad van Moorsel" } ], "doi": "10.1145/3133956.3134032", "doiUrl": "https://doi.org/10.1145/3133956.3134032", "entities": [ "Cloud computing", "Computation", "Correctness (computer science)", "Cryptographic protocol", "Cryptography", "Ethereum", "Formal verification", "Game theory", "On-premises software", "Plaintext", "Rational Unified Process", "Rationality", "Smart contract", "Solidity", "Trust metric" ], "id": "0878137beb62b817af269c01039770745510c362", "inCitations": [ "91480c9cacdd0d630698fb1e30e7e8582e17751e" ], "journalName": "", "journalPages": "211-227", "journalVolume": "", "outCitations": [ "296eaf328fb74c62c1704c5ac8a1b5d75243b100", "1ce06b82943a525839a402f366c8cac07cda8114", "8c5e81a2badc7ed7c03914a8c12773084a96155a", "0d5f7a1825bae713cebd66d121d5b01e31d8adab", "45f6f2425f3e21abea774fa6261eb6c457efd140", "ac760a88e1cc87823df032e46d5aad4ae43663f4", "14cafb1eb6c2c4bcc9cf340d6c5ef496d296c4d5", "1eb12d6de68847d7cadb36b4ef00a6e52de65554", "2812bc89d7f136dc11d33db4c65dbbb2b4f652b4", "8ee11249c02c0a86cd7a759cfdf3ea2705b80d04", "0df07fc5f4e09f5c9030efecf359d989d81fa36d", "450f4ccfb97f4e7e12d56c1482c6196fc754a9f6", "f407af624b127af27e00e9321dbb4c0f31ed4a78", "019b5b8e54b10860d39dac8f449c9d3db173527b", "4df26879184de245c0f35cc99bde6165e853c943", "4f246dd7f2ba3764245d8a16c3048adf0cc68b1d", "1436afbd9120cf8f7be2ba329fae0a0f6093e407", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "7c842ec1c1e065961c3799e92cd4a48b40080607", "0e43a1ba5afce2d87fed72e0a34e3e4fbf1cefa5", "444efc8ac45b096a723c765fb13addaffa45468e", "6b3515e9ed9d5af9952e6af01cdb8784014cd3c9", "d12d1289d2384c2ce642f01855637b9f0519e189", "9e38f65689de68019fb8a1fc4ffb00f7caac0dd4", "4b10e6a2efb5d54f31ff6d010d0a7e3c5394a729", "03dc3e7ddfe10dd7e0617a78b1dee1f593b97f87", "66818f952327b9145d3c7f6ef392240f06767755", "b6af621db98a3440cdf8745eec873a1bc7e349bc", "3f7df8d63076f2917b695c905c503745edf31055", "33853565b4dcad38b9b79091a48d3f40409f06d7", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "48326c5da8fd277cc32e1440b544793c397e41d6", "241afc601cb41d336b5e66a8a5a2cf35c6512882" ], "paperAbstract": "Cloud computing has become an irreversible trend. Together comes the pressing need for verifiability, to assure the client the correctness of computation outsourced to the cloud. Existing verifiable computation techniques all have a high overhead, thus if being deployed in the clouds, would render cloud computing more expensive than the on-premises counterpart. To achieve verifiability at a reasonable cost, we leverage game theory and propose a smart contract based solution. In a nutshell, a client lets two clouds compute the same task, and uses smart contracts to stimulate tension, betrayal and distrust between the clouds, so that rational clouds will not collude and cheat. In the absence of collusion, verification of correctness can be done easily by crosschecking the results from the two clouds. We provide a formal analysis of the games induced by the contracts, and prove that the contracts will be effective under certain reasonable assumptions. By resorting to game theory and smart contracts, we are able to avoid heavy cryptographic protocols. The client only needs to pay two clouds to compute in the clear, and a small transaction fee to use the smart contracts. We also conducted a feasibility study that involves implementing the contracts in Solidity and running them on the official Ethereum network.", "pdfUrls": [ "https://arxiv.org/pdf/1708.01171v1.pdf", "https://arxiv.org/pdf/1708.01171v3.pdf", "https://arxiv.org/pdf/1708.01171v2.pdf", "https://arxiv.org/pdf/1708.01171v4.pdf", "http://arxiv.org/abs/1708.01171", "http://doi.acm.org/10.1145/3133956.3134032" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0878137beb62b817af269c01039770745510c362", "sources": [ "DBLP" ], "title": "Betrayal, Distrust, and Rationality: Smart Counter-Collusion Contracts for Verifiable Cloud Computing", "venue": "CCS", "year": 2017 }, "087f4fdb7d4473616a9cf764b5924a9b46f2c152": { "authors": [ { "ids": [ "32697990" ], "name": "Kejiao Li" }, { "ids": [ "1703309" ], "name": "Hui Li" }, { "ids": [ "3050021" ], "name": "Hanxu Hou" }, { "ids": [ "31652926" ], "name": "Kedan Li" }, { "ids": [ "1775463" ], "name": "Yongle Chen" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.61", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.61", "entities": [], "id": "087f4fdb7d4473616a9cf764b5924a9b46f2c152", "inCitations": [], "journalName": "", "journalPages": "466-473", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.61" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/087f4fdb7d4473616a9cf764b5924a9b46f2c152", "sources": [ "DBLP" ], "title": "Proof of Vote: A High-Performance Consensus Protocol Based on Vote Mechanism & Consortium Blockchain", "venue": "HPCC/SmartCity/DSS", "year": 2017 }, "0892fc02ed972ec5005c73fbe47db2d64db52628": { "authors": [ { "ids": [ "34996452" ], "name": "Flavio Chierichetti" }, { "ids": [ "1683442" ], "name": "Ravi Kumar" }, { "ids": [ "1717692" ], "name": "Bo Pang" } ], "doi": "10.1145/3077136.3080821", "doiUrl": "https://doi.org/10.1145/3077136.3080821", "entities": [ "Concave function", "Generative model", "Text corpus", "Three Laws of Robotics", "Word lists by frequency", "Zipf's law" ], "id": "0892fc02ed972ec5005c73fbe47db2d64db52628", "inCitations": [], "journalName": "", "journalPages": "385-394", "journalVolume": "", "outCitations": [ "1c6e4fe69a8b7792b051c997a484f55c54527b23", "3eeff93605ae2cb8d1dc524e14cab0d1ac03c79b", "369ba5f0db56b7e7e617f35cc97776a5aa1259af", "7174d52caddfb442da6da04e3319b2a5fd349c39", "ad3a8dd835606b89dd180531769bf89775df2121", "2ef2211ea04fd2455b5bf51254ccefe1e356ebf2", "e50a316f97c9a405aa000d883a633bd5707f1a34", "7817312251fa9a45b433997ea344580c1e528aef", "a28aa08f8e946baeadc59b1023bd2ff5c5e0daab", "87d907a114409755ecd3c6886585de26a4e17ffe", "34874d1ca5e84eb91005d5c0363175c550e94945", "86b04584ec470e2fa44b29f6fd39d8d8c0d6ddb9", "a4a8c4e6bc5a7036d61bbc1a9eb0b4acccfab756", "2d3db65e12c332a8250265fdbd6055d69a98688e", "a1517ac30a1cc829b0c7cee18590fc1df93e5f9d", "0e8d3f68c0a0eb9dab241c63aab319dbf596e697", "b50e429252a5c3135977000c67f977ba222a8c59", "4e2dd51cb8a46e5af5ec4c2821ab086658a3dab9", "0096e0c231fa0c208cde2f8253343525bc442269", "ac9379ac1885dcbbc833214e8591c790ae60e7c8", "0b51c9c01afb3c43854d33107571321ed3ffab67", "17136a8244ed56dae22d232f4b1b8309d306b973", "16dde76c2ecc94c34004c640f603a8920c2fe1be", "5eabaf7f36cbd6be74c2e78e91f0457f358ba14c", "ef52801ac0e8a330c11788dad6960ea9f7966b94", "bad935dd6f10d038e462bea42b5d5f9dd0ffcf19", "040678daf6a49a88345ee0c680fccfd134f24d4b", "8486d3c6fc783bb2eb078d655fbb94f6b8efaa82", "3726ffd29d3e3c73cd1a925215901c34d664d87a", "78f43b29c83a10c50eb02f3043ee18b0308c536c", "02457efddaf1dadf64e2553393214baedcba8949", "27b92df79c638fc74bc4e8b2bd27d272d512c399", "629cc8247557ecd86af6773e1ab30baabe063440" ], "paperAbstract": "About eight decades ago, Zipf postulated that the word frequency distribution of languages is a power law, i.e., it is a straight line on a log-log plot. Over the years, this phenomenon has been documented and studied extensively. For many corpora, however, the empirical distribution barely resembles a power law: when plotted on a log-log scale, the distribution is concave and appears to be composed of two differently sloped straight lines joined by a smooth curve. A simple generative model is proposed to capture this phenomenon. The word frequency distributions produced by this model are shown to match the observations both analytically and empirically.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080821" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0892fc02ed972ec5005c73fbe47db2d64db52628", "sources": [ "DBLP" ], "title": "On the Power Laws of Language: Word Frequency Distributions", "venue": "SIGIR", "year": 2017 }, "0898c1143b70eefc9abed1869b2ecee2a5a49b40": { "authors": [ { "ids": [ "1731161" ], "name": "Waleed Reda" }, { "ids": [ "1709876" ], "name": "Marco Canini" }, { "ids": [ "8399335" ], "name": "P. Lalith Suresh" }, { "ids": [ "1736741" ], "name": "Dejan Kostic" }, { "ids": [ "34626057" ], "name": "Sean Braithwaite" } ], "doi": "10.1145/3064176.3064209", "doiUrl": "https://doi.org/10.1145/3064176.3064209", "entities": [ "Algorithm", "Amazon Web Services", "Apache Cassandra", "Attribute\u2013value pair", "Data store", "Experiment", "Round-robin scheduling", "Scheduling (computing)", "Simulation" ], "id": "0898c1143b70eefc9abed1869b2ecee2a5a49b40", "inCitations": [ "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2", "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "95-110", "journalVolume": "", "outCitations": [ "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "1b6cb42480d298cd25e8546d25d930ae44729855", "17bef89290a3054ecfdd68d2fe5be8066d59b856", "0538e05e1ced11b91cda5d1aed88a73969def882", "231ba17921ebd80e95771e28dfb5082e169d5a53", "09f0751d7452cd0480d572171593d07996325fcb", "065465ac37607a347186ea50873fc63d17cd2c79", "2a01193b14343f28be114cd39fbe73bceffa327f", "38f1fe2e3d6a29a5cb68cf4d7512b947d9d8240c", "1ae7993c0c2d795b243354de48dab80bf2000356", "178b92c9d7438aa44949a4f5441e83f8a9de3ccb", "4cfd229dfaf647e7e2833012c7c5d688e1a0ecce", "3872ccb40b44e0a7af38a1e6fdeb35754dfce9d2", "544b0ba4ae011fe26c3f207a7c6f9d6de04468ae", "6e669e90a34c4179f9364406d8a7a7f855745086", "519273ac1a017f42ec849477185475a7bc431d7c", "78335c3b8c71fe387572f45ca241421a63a2d8aa", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "1adb7a6ec9563b33523e4ef27af7357d0bbc4355", "0faff4fa4347d5369956dbdbea410869fc399bfd", "3338173866c3c85338a5ac26560d5392108c8eac", "9aa0d7253574e50fe3a190ccd924433f048997dd", "30b378dd68b36db5e5f99f61a29cec0f0523c55c", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "62a68d15bbfef566170fc610183eb7ebf8313dce", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "4feb45a9b30438632b5d2d20c3bbcf0c0df99c95", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "230239fb61d7a6996ac9552706363323b34735f2", "e457b81e15db815ec3dfcd18e0a2838962db2be4", "5bd8ad07f0b900dc476ca75ab0838082abceb8e3", "234e6be0d4238f76b3ac038ee422be39f391c625", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "0a3300d149a0f45623e5cde4f9114b9773b0054c", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "118da4d571ee02b4f31b5c4c078857472e77ba1e", "0d923afc5ca379e7a488f3a0eefd5767bb2a191d", "13b26d008210fffeb8a77c9e90f1ff837523c536", "35ab564ef135073f9f24098e36b425b921927cb9", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4" ], "paperAbstract": "We tackle the problem of reducing tail latencies in distributed key-value stores, such as the popular Cassandra database. We focus on workloads of multiget requests, which batch together access to several data elements and parallelize read operations across the data store machines. We first analyze a production trace of a real system and quantify the skew due to multiget sizes, key popularity, and other factors. We then proceed to identify opportunities for reduction of tail latencies by recognizing the composition of aggregate requests and by carefully scheduling bottleneck operations that can otherwise create excessive queues. We design and implement a system called Rein, which reduces latency via inter-multiget scheduling using low overhead techniques. We extensively evaluate Rein via experiments in Amazon Web Services (AWS) and simulations. Our scheduling algorithms reduce the median, 95th, and 99th percentile latencies by factors of 1.5, 1.5, and 1.9, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064209", "http://kth.diva-portal.org/smash/get/diva2:1085916/FULLTEXT01.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0898c1143b70eefc9abed1869b2ecee2a5a49b40", "sources": [ "DBLP" ], "title": "Rein: Taming Tail Latency in Key-Value Stores via Multiget Scheduling", "venue": "EuroSys", "year": 2017 }, "08a76016f0864089ea6d5cdf535382fe3d8c97fd": { "authors": [ { "ids": [ "3224854" ], "name": "Abdul Wasay" }, { "ids": [ "10704708" ], "name": "Xinding Wei" }, { "ids": [ "3242478" ], "name": "Niv Dayan" }, { "ids": [ "2203901" ], "name": "Stratos Idreos" } ], "doi": "10.1145/3035918.3064051", "doiUrl": "https://doi.org/10.1145/3035918.3064051", "entities": [ "Algorithm", "Data science", "Data structure", "Domain-specific modeling", "Exploratory testing", "In-memory database", "Library (computing)", "Machine learning", "Motorola Canopy", "Statistical model" ], "id": "08a76016f0864089ea6d5cdf535382fe3d8c97fd", "inCitations": [ "38a96a0585e6d4c5f9fe5d326fd639bb289e69f8", "b61bd7521b65077eac8381e71facc397060c60b0" ], "journalName": "", "journalPages": "557-572", "journalVolume": "", "outCitations": [ "aceb72aeefa7e6a752cfa25f75b57c1af4aa0d37", "bd0de8fb2d9c1e1a6420f5842927d20216b271af", "96ff658c6fd8237b789ff89ed485cf9503d59cb8", "5208060771fd213eefd827e3e1260b939f1aed6d", "418f49d4a4b58f8aa7ba610ee474420fec4f4a71", "65c46b04244c194aafe5cff074e824b4aad081ce", "8388bfc21e16e69bdda4b66db395a3618816c76d", "3f419db6f66c32bbb7ea887b139abd4e088a0405", "a70e02b6e42b908cdbc53bc6cecb532cf72d4d4a", "2340c3dc69abd0b767d3c5dfd5de2d153399f776", "44662438b9659fc7a48d32eae112dd9bcdef9ee3", "6f1f5ab2eb6965e3fd12c21d4ed0fd76b3c12f85", "8af01e6cb7375ff671ed6efd8576253ab6e12d04", "a24d39e7c504a5705e4a480f99c1461992931934", "1317f7e3d1de6ffd0888303ca95d9c8c6bae2af3", "0d1346ced4b1b04765573433bfad5c328fd4e734", "dc39c68a00e38f2993b450eb01c96e1d032ab850", "156060edfc6b4b00871e9f9f83a994c5f97ad470", "56974d490966a9d4f5c28f8be37fc34a08256388", "1536706ebf24447ba6a45e5829c8bf036edf7b76", "50b36b1b72056b9dc2814cd2c5d1a4146bebdd53", "03a666995669ebb66f3a8e7cd3b6e0f07a0f8d6a", "b6eb482777d1686f6ea2b0e2d05cb0ec9431a6b3", "0b22e6cab2d101003b315c7b50ce0dc6060e37e0", "299613bc57efd6e4bb590878220ecdd1d222d7b1", "e321ac7f3872d07dbe8c6e634c96a4c28fb7a7aa", "1fe660134bf98e6d5b8408a831b0fbc262979697", "3493b2232449635aff50fc17e03163cb4b66f1b5", "4de9d0429992392dce8508ba7800e11c03e42277", "31d2c9e85d395b0dcb48123d03b1b33440d389fb", "1c8e9f9fa4a030ef03d6b5f912a8ff1bc1cb2c47", "78c5118f50ce4369809133cfb9496d2f97917c1e", "588f2b2f5fa6d08ef729a64a45a1e15dfd90dd6e", "3e140ea76fe8bbdcd0aa5702294ca4371376ba1b", "0c0cfae57d32de295292fff6e67b2a22001bde23", "541ddea3fa1cf2e22099f014101da492d12788b1", "366c4ab50da396991b28c6fb58996ac7ff1775e8", "0403ef9fd23e2a10069ac979ea56791aa1f0e580", "62a8ec1501872ae45d7607ac83f08f89b0486a92", "1f9e5cf7648c50ed764edcf0b8a6e32ad3aa5a96", "845067be5ce40ff2e82257b834c6dff02e2d193c", "23cedbae2800fecf6a3784faeda064f7f4ea6126", "b63eaa5aab7788f783bc0f25c94c4eca7f19313b", "11f5a2cfda10410dc581c6c09d08ef669238e978", "42fee5b7c0d96f93172ac64bfef5a888874f3ab6", "a6078ad365012c1f527c192894a9184daab8e597", "406a30efc72f227c2c3289f3046cce44a0ff4a9c", "efa5558bddd68abe4adc81adbbef6f739e648392", "2321a150c84d771d81fd81759757795dcda25750", "ad2ed0db597b4c87d1c6443e525d55d8c09656ee", "dc638336827ebf5f6f2d4dd055bc2e6b9bffe2f0", "17e01776d5734c27c5305ff3af6e904cfbf136f9", "5eab0265b76dbcfaa1402a6e20acc12eff721a26", "168de446821c0de9ebfa6d2145ca69837bd92671", "3963ff791608bd2ee2621a00fa035efdf3d5048a", "45790ddf3309753f47689da6ad2d874e2362c7f3", "008b18ee86a04bf65e461c9d77f56bcbc28c3788", "9c2a576de3e2dc9bc670f5bf8aeb7d9ec738d9ce", "a5dba464aa71d5048408fc8ac9d734a284f1cd54", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "0b9f263a7fe1a6a6f1db60068b72cba8f7510345", "39ce3db4516cd6e7d7f8e580a494c7a665a6a16a", "65e0af21793f0dc748a1755b736db4fbeb9bb4e8", "7a278ee0578f194700cadc3811cdda4ec751f88a", "81957286d692030cc7f1572debbb046edf11a43b", "43aac4922fc82c1e8062d4d22b670701b93d980a" ], "paperAbstract": "During exploratory statistical analysis, data scientists repeatedly compute statistics on data sets to infer knowledge. Moreover, statistics form the building blocks of core machine learning classification and filtering algorithms. Modern data systems, software libraries, and domain-specific tools provide support to compute statistics but lack a cohesive framework for storing, organizing, and reusing them. This creates a significant problem for exploratory statistical analysis as data grows: Despite existing overlap in exploratory workloads (which are repetitive in nature), statistics are always computed from scratch. This leads to repeated data movement and recomputation, hindering interactive data exploration.\n We address this challenge in Data Canopy, where descriptive and dependence statistics are synthesized from a library of basic aggregates. These basic aggregates are stored within an in-memory data structure, and and are reused for overlapping data parts and for various statistical measures. What this means for exploratory statistical analysis is that repeated requests to compute different statistics do not trigger a full pass over the data. We discuss in detail the basic design elements in Data Canopy, which address multiple challenges: (1) How to decompose statistics into basic aggregates for maximal reuse? (2) How to represent, store, maintain, and access these basic aggregates? (3) Under different scenarios, which basic aggregates to maintain? (4) How to tune Data Canopy in a hardware conscious way for maximum performance and how to maintain good performance as data grows and memory pressure increases?\n We demonstrate experimentally that Data Canopy results in an average speed-up of at least 10x after just 100 exploratory queries when compared with state-of-the-art systems used for exploratory statistical analysis.", "pdfUrls": [ "http://daslab.seas.harvard.edu/classes/cs165/doc/guest_lectures/DC_CS165.pdf", "https://stratos.seas.harvard.edu/files/stratos/files/datacanopy.pdf", "http://doi.acm.org/10.1145/3035918.3064051", "http://daslab.seas.harvard.edu/data-canopy/doc/DC_SIGMOD.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/08a76016f0864089ea6d5cdf535382fe3d8c97fd", "sources": [ "DBLP" ], "title": "Data Canopy: Accelerating Exploratory Statistical Analysis", "venue": "SIGMOD Conference", "year": 2017 }, "08ca316390729e26376836efb653e872f8184ec5": { "authors": [ { "ids": [ "32563772" ], "name": "Jose Picado" }, { "ids": [ "3192362" ], "name": "Arash Termehchy" }, { "ids": [ "1791751" ], "name": "Alan Fern" }, { "ids": [ "3454984" ], "name": "Parisa Ataei" } ], "doi": "10.1145/3035918.3035923", "doiUrl": "https://doi.org/10.1145/3035918.3035923", "entities": [ "Algorithm", "Castor", "Data dependency", "Data quality", "Database", "Machine learning", "Relational database", "Usability" ], "id": "08ca316390729e26376836efb653e872f8184ec5", "inCitations": [ "006b99c44bedd6bb9b0ebe27aea95f3a31f54c97", "45e171871f642a6058b6558c385045b372b29131", "0bbcce9821119777c0a2aa8302feb56dd979736a", "05456a4ad95fa596781f3598bed9d18f7eb56310", "1798c565cb393a461bbb87a243b7997926baa1c4" ], "journalName": "", "journalPages": "929-944", "journalVolume": "", "outCitations": [ "3474820cd8a15bfa3d38a3cf7ff71881c5d5786f", "4735fc168eb72ef96817384aa5d40d5b6016d713", "4ce09dee71cc7cb146751e68f12d18fa240dcfea", "2749cb94f92170f79d0e8ad266605a871767f38a", "554f3b32b956035fbfabba730c6f0300d6955dce", "0f6fb65ab0ac654a1f8be07d134c8957a692a83e", "43fc585a0d15a47e4b5a96559fd6fc13853cb21f", "7f26b3e2fa06a193fcbf9243c1f9afcd5cbb9019", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "11d702bdd25dfdf368d9028693ea00dac25c8851", "350f0f6e4a3db23897741b378da3c4f458625404" ], "paperAbstract": "Learning novel relations from relational databases is an important problem with many applications. Relational learning algorithms learn the definition of a new relation in terms of existing relations in the database. Nevertheless, the same database may be represented under different schemas for various reasons, such as data quality, efficiency and usability. The output of current relational learning algorithms tends to vary quite substantially over the choice of schema. This variation complicates their off-the-shelf application. We introduce and formalize the property of schema independence of relational learning algorithms, and study both the theoretical and empirical dependence of existing algorithms on the common class of (de) composition schema transformations. We show that current algorithms are not schema independent. We propose Castor, a relational learning algorithm that achieves schema independence by leveraging data dependencies.", "pdfUrls": [ "http://web.engr.oregonstate.edu/~termehca/SchemaIndep-LearningSys-15", "http://arxiv.org/pdf/1508.03846v1.pdf", "https://arxiv.org/pdf/1508.03846v1.pdf", "http://web.engr.oregonstate.edu/~termehca/SchemaIndepLearning-2015.pdf", "http://arxiv.org/abs/1508.03846", "http://josepicado.com/papers/Castor_SIGMOD2017.pdf", "http://doi.acm.org/10.1145/3035918.3035923", "https://arxiv.org/pdf/1508.03846v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/08ca316390729e26376836efb653e872f8184ec5", "sources": [ "DBLP" ], "title": "Schema Independent Relational Learning", "venue": "SIGMOD Conference", "year": 2017 }, "08d0d3e3ecb7914e8b29edbca4ab70f39c3822b7": { "authors": [ { "ids": [ "5755609" ], "name": "Yixin Sun" }, { "ids": [ "12996095" ], "name": "Anne Edmundson" }, { "ids": [ "1800154" ], "name": "Nick Feamster" }, { "ids": [ "1908294" ], "name": "Mung Chiang" }, { "ids": [ "2030711" ], "name": "Prateek Mittal" } ], "doi": "10.1109/SP.2017.34", "doiUrl": "https://doi.org/10.1109/SP.2017.34", "entities": [ "Algorithm", "Border Gateway Protocol", "Denial-of-service attack", "Man-in-the-middle attack", "RAPTOR", "Relay", "Routing", "Selection algorithm", "Simulation", "Tor Messenger" ], "id": "08d0d3e3ecb7914e8b29edbca4ab70f39c3822b7", "inCitations": [ "3584c519259cc4bfdb910c8d2d8b6caf9d60a7d4", "8c383de05682e4c96dd4407b5b99bf163063f8e3", "0a529ce5e1b00fdd9c7392b94e96f78f513d3e8a" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "977-992", "journalVolume": "", "outCitations": [ "3958b667b5016aec92b728278726f7341ac99ca7", "144144d06ceb6df35eeefc3b8ee63653fe4f2479", "051a89c2f5fd4c54f654e2016b3c31841805b23a", "0c553b53925c4c37f951908f0c1c7a7874a28ad1", "23208b2f513af5c3d7b40f0826ed367da77396c9", "96b3aa0f8bbb060f38e714b5533ca362cd87e1ec", "2751d9c57f5e962166153cd328dd3e0919b78003", "3b13ddfca849418deed7035085b2276e2c8fd6e0", "0f1a01fde1ac0507b48c369c7f4f837bb3c628f6", "044c01d55411994fdc6f6cf0544d6a1c2e1f75a1", "34e4016ad6362dc1f85c4a755b2b47c4795fbe4d", "40048481e1bc23e56b7507575f31cc5255a02e9f", "0469044c24ac4897fcffc3c7db6a2d9c7fd08848", "20f38a5d49473d999e3bafd25c9808c3f564154b", "119b69cb10b5df1a4dd1b2c52bc21460a533d49d", "108747579aef6bf029623639a86070feaf5cad41", "8853ca8425684a83ebb2feae1409563a5fbef7c8", "395867fa1708ad9ef3572ab9be0b34b203707be3", "566333376dd1af014555b4cf82cda42c22501013", "356e0eece5bbd700ea3c388af8ea3e088baf7c6e", "31f530f8d0ecb37d2d376313bc4d0dd371bc34de", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "476b20ee2c8d0da7211afb245fd30ea39464f5e9", "3712247f13b89f7c2caaee04503738d3913538aa", "05bb5174ee7de801b6e1a55086455b0341ff5649" ], "paperAbstract": "Tor is vulnerable to network-level adversaries who can observe both ends of the communication to deanonymize users. Recent work has shown that Tor is susceptible to the previously unknown active BGP routing attacks, called RAPTOR attacks, which expose Tor users to more network-level adversaries. In this paper, we aim to mitigate and detect such active routing attacks against Tor. First, we present a new measurement study on the resilience of the Tor network to active BGP prefix attacks. We show that ASes with high Tor bandwidth can be less resilient to attacks than other ASes. Second, we present a new Tor guard relay selection algorithm that incorporates resilience of relays into consideration to proactively mitigate such attacks. We show that the algorithm successfully improves the security for Tor clients by up to 36% on average (up to 166% for certain clients). Finally, we build a live BGP monitoring system that can detect routing anomalies on the Tor network in real time by performing an AS origin check and novel detection analytics. Our monitoring system successfully detects simulated attacks that are modeled after multiple known attack types as well as a real-world hijack attack (performed by us), while having low false positive rates.", "pdfUrls": [ "https://arxiv.org/pdf/1704.00843v2.pdf", "https://doi.org/10.1109/SP.2017.34", "https://arxiv.org/pdf/1704.00843v1.pdf", "http://www.princeton.edu/~pmittal/publications/counter-raptor-sp17", "http://arxiv.org/abs/1704.00843", "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/counter.pdf", "https://www.ieee-security.org/TC/SP2017/papers/533.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/08d0d3e3ecb7914e8b29edbca4ab70f39c3822b7", "sources": [ "DBLP" ], "title": "Counter-RAPTOR: Safeguarding Tor Against Active Routing Attacks", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "08d0d71c779661eade1f4d857378ca819f46c434": { "authors": [ { "ids": [ "40333356" ], "name": "Daniel Castro" }, { "ids": [ "3366116" ], "name": "Paolo Romano" }, { "ids": [ "33215555" ], "name": "Diego Didona" }, { "ids": [ "1711100" ], "name": "Willy Zwaenepoel" } ], "doi": "10.1109/MASCOTS.2017.29", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.29", "entities": [ "Concurrency (computer science)", "Concurrency control", "Experiment", "HTML", "Haswell (microarchitecture)", "Transactional memory", "White box (computer hardware)" ], "id": "08d0d71c779661eade1f4d857378ca819f46c434", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "221-231", "journalVolume": "", "outCitations": [ "0847d4d9d86226cad99fa6ad2f12e39bce0c8bb7", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "f5f75fd78f4be3dd7cfddd1b46f754befe85391a", "680f3123e40a08feea348dab2e6c27aa7f2710ff", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "2bd3b5532712052d85b0207bd42795e0f31bdfae", "46ec0c7cbef89e31f878b8e9716a6c1c1cdedd29", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "1812302d4bdaadee8a258f967449047d552529a9", "2ae2684f120dab4c319e30d33b33e7adf384810a", "200609035711763e162096cc010cc3e00895c6c6", "253d779cc8939c4f5e2d50158bc76586c743417d", "4518d1127a018c51c8c01eef92cb87208bb22e7b", "f9d497feb13e784f53d430ddc958b51694c32fa2", "faa97a8689d9204d4ca11e9c1188414bd0bd4bbe", "ab3f531f3c6e4920c9ba4b437d997c0ce797f5b0", "848d63aa53a263ca2206214bcbb3a83d50fa1346", "250244b7c65435a8aca822ccf072096dd75ea8c9", "0c4867f11c9758014d591381d8b397a1d38b04a7", "5ea7103a1c39de9f96fefe5b02fd9306ae439c9f", "152d43f84ee0e070bf58094b3a43afbf90e6c84e", "69f42b62cfdf80904c385b8009d4e18abaa18123", "74463aee2d9d084e10c0cb4ee72a80eb6a641803", "51225f24b4bfb922bc9ed9738566de0b3cae5393", "22839816fbd337d77b81a7f3c6430324e057c250", "d77063325129544c41b1422918c28e89e4fd5c10", "c3f91c57bbccc8a93b384ee475c7f69a92c2a178", "86c83cf81f2df8f6dc4c05843bb2412415e4655b", "0b1c3b2554a683bab2db9100ed53a7de545e7b98" ], "paperAbstract": "This paper investigates the problem of deriving a white box performance model of Hardware Transactional Memory (HTM) systems. The proposed model targets TSX, a popular implementation of HTM integrated in Intel processors starting with the Haswell family in 2013.An inherent difficulty with building white-box models of commercially available HTM systems is that their internals are either vaguely documented or undisclosed by their manufacturers. We tackle this challenge by designing a set of experiments that allow us to shed lights on the internal mechanisms used in TSX to manage conflicts among transactions and to track their readsets and writesets.We exploit the information inferred from this experimental study to build an analytical model of TSX focused on capturing the impact on performance of two key mechanisms: the concurrency control scheme and the management of transactional meta-data in the processor's caches. We validate the proposed model by means of an extensive experimental study encompassing a broad range of workloads executed on a real system.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.29", "https://infoscience.epfl.ch/record/229444/files/mascots.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/08d0d71c779661eade1f4d857378ca819f46c434", "sources": [ "DBLP" ], "title": "An Analytical Model of Hardware Transactional Memory", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "091fedd0f07244c568e2c654c7e2baf87c70d670": { "authors": [ { "ids": [ "1840075" ], "name": "Johannes Kiesel" }, { "ids": [ "1804677" ], "name": "Benno Stein" }, { "ids": [ "1790347" ], "name": "Stefan Lucks" } ], "doi": "", "doiUrl": "", "entities": [ "Character encoding", "Concatenation", "Dictionary", "Password", "Password cracking", "Password strength", "Terabyte", "Text corpus" ], "id": "091fedd0f07244c568e2c654c7e2baf87c70d670", "inCitations": [ "0b196550a65c6ebb17104cdc631286bc741baf18", "7de0082c4b0393b805da177bec981728b15aa8c2", "d8adb849ac1a0e1aeea9db4add37ac1aa7787cd9" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0924358924ea433ce5ba5b4dc9fc769ed7afacf6", "486020d807340c26d0b7155a6948383a01ffff43", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "b3b4881b947d40fab82144b4e229f74098651027", "956cbbf33ba15071efb11a54005f252d442700d2", "2bc39ce3dd4edd31a5ea840c3d1cb231cfd95298", "1d944ba5648c72bc7686237044609305f07da1e4", "3d46dbb0da1b4e0ee5b45c46525d9459fb94222d", "4a5186e6046577174a52bc614187106fbce7e19b", "f5952c4c57ab69023b05df04fab115bf07c37612", "492184989035226d1207801e3e5b9dff8b5f6427", "9f3b99761b501d5ffca3a557c7cf40254130fbb4", "63b426203b61464dc8ecb4322720bd7f95f0f5a5", "901e3ac1bcd3dd9af3f00b976c54b8e82397c7de", "6b807c320732137d5ba77d76f4fe421edc9e5cc3", "299a008d8221b572c25ccf581e723ababb364fbb", "04645e17a1acb783a2ffb2b9b201624c76d52ae2", "8811bf77f0dbbd2802413a3229208b0bf69970ca", "26a2a2f683b6e6d93c510a2f8065870c54b05f05", "13e9d7a36a74ea2f5ea1e2d8d13124ae39a992e1", "096d7d2e9b3fbc37f1c4e75b1896ae3797950ef9", "1b7c8e492112cc80d058b858f038a3bfde9bb2d3", "50a249dc89f9c5e8fecf335eebce692df2a09e20", "38612e346fdf3158c32c16058f7e8820a8f0325e", "7fbe6f46787accb1c4bb9302995e374ce146e1a2", "0c4867f11c9758014d591381d8b397a1d38b04a7", "da2695f7ba0b56feccd9f4c3c2bad61c9881921a", "f32c07c0c06e2359ae9514e9d25978c6946bf6c1" ], "paperAbstract": "How to choose a strong but still easily memorable password? An often recommended advice is to memorize a random sentence (the mnemonic) and to concatenate the words\u2019 initials: a so-called mnemonic password. The paper in hand analyzes the effectiveness of this advice\u2014in terms of the obtained password strength\u2014and sheds light on various related aspects. While it is infeasible to obtain a sufficiently large sample of human-chosen mnemonics, the password strength depends only on the distribution of certain character probabilities. We provide several pieces of evidence that these character probabilities are approximately the same for human-chosen mnemonics and sentences from a web crawl and exploit this connection for our analyses. The presented analyses are independent of cracking software, avoid privacy concerns, and allow full control over the details of how passwords are generated from sentences. In particular, the paper introduces the following original research contributions: (1) construction of one of the largest corpora of human-chosen mnemonics, (2) construction of two web sentence corpora from the 27.3 TB ClueWeb12 web crawl, (3) demonstration of the suitability of web sentences as substitutes for mnemonics in password strength analyses, (4) improved estimation of password probabilities by position-dependent language models, and (5) analysis of the obtained password strength using web sentence samples of different sentence complexity and using 18 generation rules for mnemonic password construction. Our findings include both expected and less expected results, among others: mnemonic passwords from lowercase letters only provide comparable strength to mnemonic passwords that exploit the 7-bit visible ASCII character set, less complex mnemonics reduce password strength in offline scenarios by less than expected, and longer mnemonic passwords provide more security in an offline but not necessarily in an online scenario. When compared to passwords generated by uniform sampling from a dictionary, distributions of mnemonic passwords can reach the same strength against offline attacks with less characters.", "pdfUrls": [ "http://www.internetsociety.org/sites/default/files/ndss2017_03a-4_kiesel.pdf.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/large-scale-analysis-mnemonic-password-advice/", "https://www.uni-weimar.de/medien/webis/publications/papers/stein_2017a.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_03A-4_Kiesel_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e0d2/ab3a7e0856ae083910488be2e7b09292a9b0.pdf", "s2Url": "https://semanticscholar.org/paper/091fedd0f07244c568e2c654c7e2baf87c70d670", "sources": [ "DBLP" ], "title": "A Large-scale Analysis of the Mnemonic Password Advice", "venue": "NDSS", "year": 2017 }, "093b8adeb29ee4c17b3528c1b9791f275630c8f2": { "authors": [ { "ids": [ "21653817" ], "name": "Jonathan Perry" }, { "ids": [ "1712771" ], "name": "Hari Balakrishnan" }, { "ids": [ "1777471" ], "name": "Devavrat Shah" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Centralisation", "Communication endpoint", "Cubic function", "Data aggregation", "Data center", "Expectation\u2013maximization algorithm", "Explicit Congestion Notification", "Linux", "Network congestion", "Network packet", "Network utility", "Software deployment" ], "id": "093b8adeb29ee4c17b3528c1b9791f275630c8f2", "inCitations": [ "ed94b0106b125a786698865e99bb889bf1f75552", "959cdc393f0d9c5e04dfce997d68cbe410abce68", "2c713ea0f3dacc2ce4189891a57c69aec0707c52", "948a9f289bef8c94eedb30ec530423d02e1ff991", "293ca58169024b0f40ae3342200737767321f6b1", "9bbd5be2829e49b1fac7f034baf7499cb069db95" ], "journalName": "", "journalPages": "421-435", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0509b4554cf388bd091e5151851a25ae8c1f3826", "b3493a8ec43df1861c5d7037ce57bcff7d343dfc", "114e68ae77738097ba690499dccffe817da1b839", "4a098868891474ba73ea338e6f56033d1827d216", "7ad0e1e0f2b3ae40cc9e1075faea5407169a15a2", "094aca6103f4079521e6a596d099ed37f7d2b498", "327a02b19a60319cc35be860ad0259a5c1aef920", "c3c262b8e56536d14826926b69af59eaefc29bc2", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "2f85f20a076cb91dcdf4b3e5b16886ee9b6b3543", "3b988049dd8f62f772281e90196bbd793700c86b", "cbc67813565acd9e9772d08583a0f197f81b09b7", "15e176fc33eff28d9379a689dbd90211841eb1b0", "12e531184ab28a0ff925a8c7d63fe774fe2dbf21", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "058f6752d85a517aae298586fdf117acdd7560ea", "37e882e696162cbce0d2a4f4f022162a418a58dd", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "1dc62b09b964b8faeecbc03270f7d7a5f2fee733", "132f00de21cee656d00ad6779f1926070ad59544", "3f200c41618d0c3d75c4cd287b4730aadcf596f7", "1abd8490033d3a8e5f832fc34e0d4fa69775adf1", "2ce9340c3d7bcdb79f8174c45a920617d7451c2c", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "0b7301fe4766447af960f9a2c06ccde042538e9c", "81176da076bb31862cec89c84b5ffb92a622e6ee", "13cf338de9edf78be449df692477762cfa9b6f0e", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "231ba17921ebd80e95771e28dfb5082e169d5a53", "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "de17cf40a4db13315c631c597959ae26f691f2fa", "122229239aeba1eb4f1623adb40f1845c582a520", "9dc60d74e8eb2213d43e507d04a84ac85ab833c4", "2d4906884bc5309f1539195ff5b181d41a15ff60", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "12f86f055dd6c866b3795e1fe35931159337a26e", "2de63b0c867b290d4f7217459c968aa98e5ad39d", "764d7de61421968d6b477f0c055d72dcb0893544", "65da29a03c8905cbc0614612d1632864336c4786" ], "paperAbstract": "Rapid convergence to a desired allocation of network resources to endpoint traffic is a difficult problem. The reason is that congestion control decisions are distributed across the endpoints, which vary their offered load in response to changes in application demand and network feedback on a packet-by-packet basis. We propose a different approach for datacenter networks, flowlet control, in which congestion control decisions are made at the granularity of a flowlet, not a packet. With flowlet control, allocations have to change only when flowlets arrive or leave. We have implemented this idea in a system called Flowtune using a centralized allocator that receives flowlet start and end notifications from endpoints. The allocator computes optimal rates using a new, fast method for network utility maximization, and updates endpoint congestion-control parameters. Experiments show that Flowtune outperforms DCTCP, pFabric, sfqCoDel, and XCP on tail packet delays in various settings, converging to optimal rates within a few packets rather than over several RTTs. Benchmarks on an EC2 deployment show a fairer rate allocation than Linux\u2019s Cubic. A data aggregation benchmark shows 1.61\u00d7 lower p95 coflow completion time.", "pdfUrls": [ "http://nms.csail.mit.edu/papers/flowtune-nsdi17.pdf", "http://dspace.mit.edu/bitstream/handle/1721.1/103920/MIT-CSAIL-TR-2016-011.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_perry.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_perry.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/perry", "http://dspace.mit.edu/bitstream/handle/1721.1/103920/MIT-CSAIL-TR-2016-011.pdf?sequence=1", "http://wind.lcs.mit.edu/papers/flowtune-nsdi17.pdf", "http://inat.lcs.mit.edu/papers/flowtune-nsdi17.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-perry.pdf", "http://nms.lcs.mit.edu/papers/flowtune-nsdi17.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-perry.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8dd2/371b1291c0311967e4f1330f53da4b3d503f.pdf", "s2Url": "https://semanticscholar.org/paper/093b8adeb29ee4c17b3528c1b9791f275630c8f2", "sources": [ "DBLP" ], "title": "Flowtune: Flowlet Control for Datacenter Networks", "venue": "NSDI", "year": 2017 }, "093e773f5bb6f6f19a93abcea315491e730472c1": { "authors": [ { "ids": [ "1705489" ], "name": "Xin Wang" }, { "ids": [ "1721878" ], "name": "Richard T. B. Ma" }, { "ids": [ "7869811" ], "name": "Yinlong Xu" } ], "doi": "10.1145/3084444", "doiUrl": "https://doi.org/10.1145/3084444", "entities": [ "Elasticity (cloud computing)", "Experience", "Internet access", "Network congestion", "Throughput" ], "id": "093e773f5bb6f6f19a93abcea315491e730472c1", "inCitations": [ "aecee3e9a71c68d82cc2402ad2380025d795f8bf", "fd6c4b016dd760421cb2f6bef5d01406352dfeff", "60c88a28fdcffe8129796104ea62b8495ec420d0" ], "journalName": "", "journalPages": "61", "journalVolume": "", "outCitations": [ "11c2241c19a666f470d3566cc16c4a59a47dd6dd", "0cd618357733ee04f7561c63498c9f8065dd8698", "2f0f2dbdad54579669c8a3961634f9bc74132e85", "b22466da2e75f3dc8946bf334f904c2a4fdcc8b9", "1369e96daf374146944d6f026f48c2587748d6f3", "1c7c28b0fb051b17d89326a9f68c676c0104c424", "bc45fb6f46dcfc77563cdcf3c020cb8ca89579a5", "21a9aea6099dab1537747e8d58fcdcd21903be63", "65ca19a01de827420e1bea9448ef3e81824ffe2e", "42b436d9ee870159fa8244ceaa6e77a292c3d346", "ab646a61b29761fd933717792d13137880700b20", "096c10cec05bc6b5ee6eafdb340b8ac11f692f4a", "45373d2a7ef4e10e71efce2a8a2c896e2610a33c", "8a61d5c6030be0d2a1f19feba33e923d9fc9923f", "94e22a1bad48c13402a76ae052fa2543645d50ab", "13bf13f019632a4edb967635e72e3e140f89e90e", "4f3224a0593d4cee3dc54bb331bfb03ecdf8dc4a", "6c7830769f2183d83110b4237cddd39c82c9001f", "36da6ed30be8dfeb016bdbf3e5e21e932a22b8d8", "5eae830442c129596ab7d9f5316f1aad2e8c178f", "00750ae60d95e26bf80e20b2944236b7b2ff8d0e" ], "paperAbstract": "Traditionally, Internet Access Providers (APs) only charge end-users for Internet access services; however, to recoup infrastructure costs and increase revenues, some APs have recently adopted two-sided pricing schemes under which both end-users and content providers are charged. Meanwhile, with the rapid growth of traffic, network congestion could seriously degrade user experiences and influence providers' utility. To optimize profit and social welfare, APs and regulators need to design appropriate pricing strategies and regulatory policies that take the effects of network congestion into consideration. In this paper, we model two-sided networks under which users' traffic demands are influenced by exogenous pricing and endogenous congestion parameters and derive the system congestion under an equilibrium. We characterize the structures and sensitivities of profit- and welfare-optimal two-sided pricing schemes and reveal that 1) the elasticity of system throughput plays a crucial role in determining the structures of optimal pricing, 2) the changes of optimal pricing under varying AP's capacity and users' congestion sensitivity are largely driven by the type of data traffic, e.g., text or video, and 3) APs and regulators will be incentivized to shift from one-sided to two-sided pricing when APs' capacities and user demand for video traffic grow. Our results can help APs design optimal two-sided pricing and guide regulators to legislate desirable policies.", "pdfUrls": [ "http://arxiv.org/abs/1704.03641", "http://doi.acm.org/10.1145/3084444", "https://arxiv.org/pdf/1704.03641v2.pdf", "http://doi.acm.org/10.1145/3078505.3078588", "https://arxiv.org/pdf/1704.03641v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/093e773f5bb6f6f19a93abcea315491e730472c1", "sources": [ "DBLP" ], "title": "On Optimal Two-Sided Pricing of Congested Networks", "venue": "SIGMETRICS", "year": 2017 }, "094484c9751fc774bbf6573abcd09eac944b71f5": { "authors": [ { "ids": [ "1727689" ], "name": "Xiaolan Wang" }, { "ids": [ "2283085" ], "name": "Alexandra Meliou" }, { "ids": [ "39708150" ], "name": "Eugene Wu" } ], "doi": "10.1145/3035918.3035925", "doiUrl": "https://doi.org/10.1145/3035918.3035925", "entities": [ "Benchmark (computing)", "Correctness (computer science)", "Program optimization", "Software bug", "Sputter cleaning", "Synthetic data" ], "id": "094484c9751fc774bbf6573abcd09eac944b71f5", "inCitations": [ "a7bd440acfb8fd22602121a3837221ea87c2dee3", "63ce13f365ee6c20c2bf2be20a32d62b497430e8", "42f9a32d53402c5ad5a4560d606047295fad993e", "2548d37b108168ef1dc302cc35361285bc04c42a" ], "journalName": "", "journalPages": "1369-1384", "journalVolume": "", "outCitations": [ "1df05b37ab38851a7537f5a7d1cc31d60ab819dd", "6a97303b92477d95d1e6acf7b443ebe19a6beb60", "52032bb1547b859bc4e74f671a74869173c7b1c4", "8848c8135905f2c53b6da06a8a7c0a57bb5dbcc6", "3ac54dc219ae82788fa77648c498fb61725789b0", "02d1828d908ed6c3f5b989e870e125309e280345", "31a816f4fef768f29772a003e534b1378611bfe6", "33710ad6e62312090d4f7a3b57624bd290fdc4fd", "7254253b9ba200277e7bea7c05f896f363e61a1f", "c3fecbd5bbb1c34714d4359fe8012633b2c5d8e6", "383478370162b80f1ef24ccc9c95e156fc808077", "7da0f773ead3c04e5f9753044d96d61d302d2eb5", "c921482cb0048738284aa66e531b83f64ce46bf5", "17a23aaab0a713b7863ada44eca0c252a243c6b1", "38526532f66d5bcd7b47cea0ed9642b9b232d50f", "a7a911acf0fa7f82d7fecdda5f78d8d5879b424b", "1b4f194321f8f3219306a040c0d6d8c70dca1dcb", "1b9fb7623ee4a717664bc145c7e03722b9a63e24", "74e86f325ddfb08f2f79b488ebfac2a43fd5e529", "157852dd14a9b518bf62fa6511be0f02f5d04a79", "2077cc18da002721390a23392ce4a25d19c3e2a2", "4ace5e30053b8b8f49d5825c8653439eb4126c05", "4607f09a348c87f95aedd7711b24d8bf614fe58c", "091d2640414ff9565ff58bbf71ad290f1d30b750", "494608a6837147f8168e2bd371a4bb5ac0a54513", "632eca15ed20f87490c60a6005c4c58f06bee61b", "35251f1b97fde8c3ac540d12c5cb8a13547e26ab", "dad7338a7290a17c5c2c0101060fe3c1f3f2402e", "3d984db20061882f74118b43109ce26f7fcda048", "6306fbfa8fcde8e98a677cd4a833b8c76c613974", "486b80ee409dc0e4173834b88b25069443c82714", "5b7929dc88665927aefb2b231f78abce23a74433", "006336b4082bbba1ab1e5e2e6c633a44971a7dc3", "2548d37b108168ef1dc302cc35361285bc04c42a", "1aa60b5ae893cd93a221bf71b6b264f5aa5ca6b8", "65821014abe934029310cb10d4e329645acd4817", "b458a5626b3186d98660323142c6eb595a392d01", "269fe303510ed842933d33bf91cf32c70791653a", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "9af26df2edd37eca936ba261746d239242b488b6", "6cb98c3fc86b5067af79b524abccef4e8de7f633", "04b67bda49edadf2dc618004582a10d2dc66b6a3", "9c44466293383746fb4ced3fc09bc00e67a6f38e", "4d7f9b2ad8cc25a6c5f67d3611e8bc8a1b85ae67", "19f10c75265a43829cf00e619224ab3e481c4fad", "85339ee3bd042d6118ea39316a9eb4aa9eb76854", "1126ceee34acd741396c493c84d8b6072a18bfd7", "083633a33bf5afc0b4481f050d0a89ee0a399876", "0f1b67d1545299b8ccec4b28afb735ff045e5c1e", "1ef6d442bd3928c54b162d3cb3c65f04b54c8975", "2024c8eb42f6a64944e21636fb2ba725aff490c8", "922560afb3e0d4eed9e8c0c012d5bcc44e702e72", "6046770d1c3e08edfdd39bdb57fccaca84f5139c", "1fe41b1240a0eddec736b675e914b4858a955876", "4c015244665ec9ca821eb78b82481490cff73a31", "c1054c1124303eca1152eafe84eef46c850af551", "227a4163f6702aadc0978d339f1cc2e966577f6a", "c4221a899528798105ca94e509027e7210a87d6b", "4f056ccacce99ec26ec05a988f144804c7af8b00", "239ffe260e5a13c43d7131200a891194e94ff767", "84368db88954915e2903fdcd796a5f62d2dc4da6", "0ce67c89b4d7829580c16290a292ebee15507fa5", "aadebe8230edfdb8d9e2f52f7d8c325f159e68dc", "8f4fbec6053b15603e7ba750fde132bbefd81b5e", "2a8969bf0a02a2fa28e3784fd6025828859a7f67", "c665421671d2d8282936113b283941fa7066e957", "1746a1f92f97e4d15bbcbca627b8e21ef001adf4" ], "paperAbstract": "Data-driven applications rely on the correctness of their data to function properly and effectively. Errors in data can be incredibly costly and disruptive, leading to loss of revenue, incorrect conclusions, and misguided policy decisions. While data cleaning tools can purge datasets of many errors before the data is used, applications and users interacting with the data can introduce new errors. Subsequent valid updates can obscure these errors and propagate them through the dataset causing more discrepancies. Even when some of these discrepancies are discovered, they are often corrected superficially, on a case-by-case basis, further obscuring the true underlying cause, and making detection of the remaining errors harder.\n In this paper, we propose QFix, a framework that derives explanations and repairs for discrepancies in relational data, by analyzing the effect of queries that operated on the data and identifying potential mistakes in those queries. QFix is flexible, handling scenarios where only a subset of the true discrepancies is known, and robust to different types of update workloads. We make four important contributions: (a) we formalize the problem of diagnosing the causes of data errors based on the queries that operated on and introduced errors to a dataset; (b) we develop exact methods for deriving diagnoses and fixes for identified errors using state-of-the-art tools; (c) we present several optimization techniques that improve our basic approach without compromising accuracy, and (d) we leverage a tradeoff between accuracy and performance to scale diagnosis to large datasets and query logs, while achieving near-optimal results. We demonstrate the effectiveness of QFix through extensive evaluation over benchmark and synthetic data.", "pdfUrls": [ "http://arxiv.org/pdf/1601.07539v2.pdf", "http://arxiv.org/pdf/1601.07539v1.pdf", "http://doi.acm.org/10.1145/3035918.3035925", "https://queryfix.github.io/files/papers/qfix-sigmod17.pdf", "http://people.cs.umass.edu/~ameli/projects/queryProvenance/papers/WangMW2017.pdf", "http://arxiv.org/abs/1601.07539", "https://people.cs.umass.edu/~xlwang/qfix-paper.pdf", "http://sirrice.github.io/files/papers/qfix-sigmod17.pdf", "https://arxiv.org/pdf/1601.07539v1.pdf", "https://arxiv.org/pdf/1601.07539v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/094484c9751fc774bbf6573abcd09eac944b71f5", "sources": [ "DBLP" ], "title": "QFix: Diagnosing Errors through Query Histories", "venue": "SIGMOD Conference", "year": 2017 }, "0954987dcb3d3e18ac9c66eeede5986c598d99a6": { "authors": [ { "ids": [ "7225179" ], "name": "Felicitas Hetzelt" }, { "ids": [ "7238406" ], "name": "Robert Buhren" } ], "doi": "10.1145/3050748.3050763", "doiUrl": "https://doi.org/10.1145/3050748.3050763", "entities": [ "Cloud computing", "Component-based software engineering", "Encryption", "Hypervisor", "Processor register", "Protection mechanism", "Replay attack", "Virtual machine" ], "id": "0954987dcb3d3e18ac9c66eeede5986c598d99a6", "inCitations": [ "377712ef264d63c97b341fb782037d063018305e" ], "journalName": "", "journalPages": "129-142", "journalVolume": "", "outCitations": [ "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "07272e31fb957e026a6bc36d55e412de26843c7f", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "d7558f7bb921df3a84f2b5f540699506232b214b", "0f0eb41a392b4b3baec93fe2f02d284b3ba8b60c", "1fb49ae43195232f0b3d1c9d534a5aa03bdd8f26", "85d555f7ce19740b4fc656ff797623c6e1513018", "46bc4d7c5605e8468f4355335416e15f0d7e4dcd", "05f70f429a7bf38efa9e457fd486cb862bd495be", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "1b80ae882afb809686f20765e4a42a5b99aa55de", "2fcdec58c1c0028e07c4823cf082fd6d3abc05dc" ], "paperAbstract": "Cloud computing has become indispensable in today's computer landscape. The flexibility it offers for customers as well as for providers has become a crucial factor for large parts of the computer industry. Virtualization is the key technology that allows for sharing of hardware resources among different customers. The controlling software component, called hypervisor, provides a virtualized view of the computer resources and ensures separation of different guest virtual machines. However, this important cornerstone of cloud computing is not necessarily trustworthy or bug-free. To mitigate this threat AMD introduced Secure Encrypted Virtualization, short SEV, which transparently encrypts a virtual machines memory.\n In this paper we analyse to what extend the proposed features can resist a malicious hypervisor and discuss the tradeoffs imposed by additional protection mechanisms. To do so, we developed a model of SEV's security capabilities based on the available documentation as actual silicon implementations are not yet on the market.\n We found that the first proposed version of SEV is not up to the task owing to three design shortcomings. First the virtual machine control block is not encrypted and handled directly by the hypervisor, allowing it to bypass VM memory encryption by executing conveniently chosen gadgets. Secondly, the general purpose registers are not encrypted upon vmexit, leaking potentially sensitive data. Finally, the control over the nested pagetables allows a malicious hypervisor to closely monitor the execution state of a VM and attack it with memory replay attacks.", "pdfUrls": [ "https://arxiv.org/pdf/1612.01119v2.pdf", "http://doi.acm.org/10.1145/3050748.3050763", "https://arxiv.org/pdf/1612.01119v1.pdf", "http://arxiv.org/abs/1612.01119" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0954987dcb3d3e18ac9c66eeede5986c598d99a6", "sources": [ "DBLP" ], "title": "Security Analysis of Encrypted Virtual Machines", "venue": "VEE", "year": 2017 }, "095b83cd48d1a8a4ff7a45cc8f21e018498374d5": { "authors": [ { "ids": [ "1724260" ], "name": "Xiang Chen" }, { "ids": [ "3021207" ], "name": "Bowei Chen" }, { "ids": [ "1744045" ], "name": "Mohan S. Kankanhalli" } ], "doi": "10.1145/3077136.3080802", "doiUrl": "https://doi.org/10.1145/3077136.3080802", "entities": [ "Algorithm", "Auction algorithm", "Page view", "Real-time bidding", "Relevance", "Web banner", "Web page" ], "id": "095b83cd48d1a8a4ff7a45cc8f21e018498374d5", "inCitations": [], "journalName": "", "journalPages": "205-214", "journalVolume": "", "outCitations": [ "ae7d5d50fc1d11aff59f910c3b37d7ee6415ee51", "1a2931a234e41034f78321dba216828eb8feddff", "165dd9fda3e6ab9976b45f01425acfbee4fea995", "2092f0f1adaee50ed247216a1cfab7016add37f5", "6d55be7a6fcbf760f9c32548afddb313606598c8", "1134280b51cb3a24be47ee98be9044414f74b718", "81e8916b9d4eeb47c2e96c823b36d032b244842f", "a4f241e3133126ca8b96b8b670c1f02ce36d4264", "0862546afcd8cd40af6e149eda82e832d2b9868d", "8fbe262bf0e83766f6985e25e2af851ea5cec66b", "2605f7dd3e8a225b28f1ddafe6cfa0d466ec6adc", "970628d3fcbae58a787c5a50523e93ecf8d212e7", "0fe69a715a936ba44d3fd8b0a25b969838d09044", "2286315e1787519c14617a85fe633b8080b30ea9", "2834899017d814886679f378858ba52d34ee6ddd", "0df68ccc83e7b879282c4a89100661fb2194a207", "13ef0fdc29f0c5440e5d3f0a23cafc7e7c31052f", "2ed346d7bc09b6151f6a2b1910a6817c822d72db", "18f09f14a6e16f8945f58ffcf4ff29028aef2e1c", "1b4523aa07fc0dab565a45c55009de963053942c", "2cd6252dbe666b7c17957e396f31df5c766c426a", "7338df1b84f9817cc3da000361fb675e39a47b5b", "2ef4ec21347c457dcedef334d7a54e5cb0545ca9", "8c087fe26314a8f9327fb9382e2e6aa7d99f9bda", "5ccf2ede4bb17bac118a6bb47a0583f3c55e68d2", "bfc214ce7ab5b101425e5cabd631176bb427adff", "1cea62d9d9436fe78ed83676a7102a46a7f5446b", "4d76d5b6f7636be9bd8a3a28ab61ec5a0e8ba949", "89b2facea4c73aea089d37b0782d7e9156145be5", "4e937d0372eb356ab64b47c6e140ad1dc729a0d6", "6fb5142dc60256ad0cea7051be23fb94eb27ccc0", "4f92339b2624fae5b4dfbaa85cad326f380af83a", "59e8a0371a8cdaf24cdc3e3c0f5e7e6f2d6a1251", "53993c7fabf631cbd8a44ab3e42c6bdf784db456", "2cc88f337d7bed4de6902ae58d6d05a40fd9ca56" ], "paperAbstract": "Displaying banner advertisements (in short, ads) on webpages has usually been discussed as an Internet economics topic where a publisher uses auction models to sell an online user's page view to advertisers and the one with the highest bid can have her ad displayed to the user. This is also called real-time bidding (RTB) and the ad displaying process ensures that the publisher's benefit is maximized or there is an equilibrium in ad auctions. However, the benefits of the other two stakeholders - the advertiser and the user - have been rarely discussed. In this paper, we propose a two-stage computational framework that selects a banner ad based on the optimized trade-offs among all stakeholders. The first stage is still auction based and the second stage re-ranks ads by considering the benefits of all stakeholders. Our metric variables are: the publisher's revenue, the advertiser's utility, the ad memorability, the ad click-through rate (CTR), the contextual relevance, and the visual saliency. To the best of our knowledge, this is the first work that optimizes trade-offs among all stakeholders in RTB by incorporating multimedia metrics. An algorithm is also proposed to determine the optimal weights of the metric variables. We use both ad auction datasets and multimedia datasets to validate the proposed framework. Our experimental results show that the publisher can significantly improve the other stakeholders' benefits by slightly reducing her revenue in the short-term. In the long run, advertisers and users will be more engaged, the increased demand of advertising and the increased supply of page views can then boost the publisher's revenue.", "pdfUrls": [ "https://arxiv.org/pdf/1705.10642v2.pdf", "https://arxiv.org/pdf/1705.10642v1.pdf", "http://doi.acm.org/10.1145/3077136.3080802", "https://arxiv.org/pdf/1705.10642v3.pdf", "http://arxiv.org/abs/1705.10642" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/095b83cd48d1a8a4ff7a45cc8f21e018498374d5", "sources": [ "DBLP" ], "title": "Optimizing Trade-offs Among Stakeholders in Real-Time Bidding by Incorporating Multimedia Metrics", "venue": "SIGIR", "year": 2017 }, "0977dd7a1092e8a417ed22b9ca26dacee99e0347": { "authors": [ { "ids": [ "25635969" ], "name": "Saket Sathe" }, { "ids": [ "1682418" ], "name": "Charu C. Aggarwal" } ], "doi": "10.1145/3097983.3098046", "doiUrl": "https://doi.org/10.1145/3097983.3098046", "entities": [ "Computation", "Data mining", "Random forest", "Similarity measure", "Value (ethics)" ], "id": "0977dd7a1092e8a417ed22b9ca26dacee99e0347", "inCitations": [ "71ea4bdcd1210829fd66fcf62d3bb80ded0a8cde", "390173a5889695097385fe6833359e79ac486e16" ], "journalName": "", "journalPages": "395-403", "journalVolume": "", "outCitations": [ "5af0e69ff389f3355bf0d95570dd2791449200c3", "1ad15c08556c8f8e3739703857ea01077ce738c5", "46b09a6095d02a0f239fb890c13dba7fbceef0bc", "c5fc615e184da7ceffbdcda4288c23019f4b8fd6", "d6a452ce55cc8cb5202e6b0a20ce4154036380e9", "24e6cf0796237f21c780a3f0c996817f57b3a1bd", "4fba6cf1fec9888feb4477da6d2985194a188d9c", "c2a77c8771051b389fee220c0ead3d5a6e8024e1", "205ee06a3f3fff0c64d6849e940d8559e7c227b4", "0dddeae55bf055d810fc2cd6ec99d548fef484bb", "1860f84062d513be68d7e6db67e27ddd3f2f5319" ], "paperAbstract": "Random forests are among the most successful methods used in data mining because of their extraordinary accuracy and effectiveness. However, their use is primarily limited to multidimensional data because they sample features from the original data set. In this paper, we propose a method for extending random forests to work with any arbitrary set of data objects, as long as similarities can be computed among the data objects. Furthermore, since it is understood that similarity computation between all O(n2) pairs of n objects might be expensive, our method computes only a very small fraction of the O(n2) pairwise similarities between objects to construct the forests. Our results show that the proposed similarity forest approach is very efficient and accurate on a wide variety of data sets. Therefore, this paper significantly extends the applicability of random forest methods to arbitrary data domains. Furthermore, the approach even outperforms traditional random forests on multidimensional data. We show that similarity forests are robust to the noisy similarity values that are ubiquitous in real-world applications. In many practical settings, the similarity values between objects are incompletely specified because of the difficulty in collecting such values. Similarity forests can be used in such cases with straightforward modifications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098046" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0977dd7a1092e8a417ed22b9ca26dacee99e0347", "sources": [ "DBLP" ], "title": "Similarity Forests", "venue": "KDD", "year": 2017 }, "09944142043338de285575751861728d4212fd1d": { "authors": [ { "ids": [ "3364200" ], "name": "Daichi Fujiki" }, { "ids": [ "2527905" ], "name": "Kiyo Ishii" }, { "ids": [ "1888437" ], "name": "Ikki Fujiwara" }, { "ids": [ "2567723" ], "name": "Hiroki Matsutani" }, { "ids": [ "34575333" ], "name": "Hideharu Amano" }, { "ids": [ "1707417" ], "name": "Henri Casanova" }, { "ids": [ "2876310" ], "name": "Michihiro Koibuchi" } ], "doi": "10.1109/HPCA.2017.38", "doiUrl": "https://doi.org/10.1109/HPCA.2017.38", "entities": [ "Central processing unit", "Cognitive dimensions of notations", "Data integrity", "Error detection and correction", "Error-tolerant design", "Forward error correction", "Interconnection", "Modulation", "Network performance", "Numerical analysis", "Random-access memory", "Round-off error", "Scalability", "Simulation", "Soft error" ], "id": "09944142043338de285575751861728d4212fd1d", "inCitations": [ "73371e0dd70b45d93cb50f27812fb5d971d34ff5", "781cf9b4d17f89ad4b971d2a1655421378149e2d", "00af0de94a9fa3e9df8f841b788d2ad67ec2fc09" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "469-480", "journalVolume": "", "outCitations": [ "ab724df417d8913f053d01aa8e10b3267f0ab7d3", "741a04ef3a0c3953a3d37726bf4d6170eaa68a55", "1666f986614a398742baa0d671d425258591b6e4", "44a7bb35c10f94af012507f6d8eb6e4593d1536e", "654e303e59b75876d53b5184e3096805791f7c77", "526cd12becb5c1c8c866abfc106cdbd211735e3c", "5a830ad18ff1a45c197570065b65d212818eaef6", "8837556c3bfbe669a09ee78ae119b39bfdfe0b39", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "e8ed59001a58b311f9c3eab69240e72db6d06ee6", "97c3054425e135126f01c9b3cac3614f462db0f5", "1aa3f20a24588a2dba26a9e8e8842ba48a9826c6", "eacf3f864eb9033e15e87efbfac9f5937a0f8c24", "7b4631da09b505bd71b069d52116cbcea52398a7", "48fbdfc80933817ab328a3fb841dca4c1f54553e", "87a8a7d48205aa864b1269cedf6a39925a3c24f8", "528628b4d20e6a98130ac12083a8c879aa31c7ad", "4c9501268cb3d1a9a5d5739b8cabd83bd3d56e1d", "d5e0a717bbf1c715b10d2d41490a498075ebd9a7", "52137476895005f26098678a9af934f93071b416", "5f8991828def57d2f0cda942566afff56740d150", "1ce039bfb9a45847d176b98d0904430f1938d105", "1ad1ff28c41c036aed259bd4af1e5c1c42cdc5c7", "0dd57dbc7e47ed7e27affd8d289585005d4d62a5", "9e2dca06b0ea81c82aa749f9bc7bad220247ebe6", "79c0062e0eae09d6715054fe7fc46d4164443aba", "719488e9d048f782f7420a81914d046531dbe7d1", "04706cfdae1e666fa0e7a8c2c23d179712a3d792", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "3e99a917b9a4e89497541bbc3bb72079054644c6", "983d5cf52ccc1cd4e8338c6b7c4ee24168a6e807", "4cc707dddfbdc1a5bd8e8f34d7bc1f43ac273c14", "18fe996c6f43a8f301cd842507045b679ba3506a", "320150a4d7d95c5627264ecc59aad7441c8fda80", "471618f89fc88b094d15bc684b9fe2cfd018a799", "4335d05354fd5c5179ea59de58946441f9f1b36b", "a19563b4014919c405964cea5271bebe918ad265", "4abdbda4f0a56d0be1ecca3fa58baeb13c8329bb" ], "paperAbstract": "Computational applications are subject to various kinds of numerical errors, ranging from deterministic round-off errors to soft errors caused by non-deterministic bit flips, which do not lead to application failure but corrupt application results. Non-deterministic bit flips are typically mitigated in hardware using various error correcting codes (ECC). But in practice, due to performance and cost concerns, these techniques do not guarantee error-free execution. On large-scale computing platforms, soft errors occur with non-negligible probability in RAM and on the CPU, and it has become clear that applications must tolerate them. For some applications, this tolerance is intrinsic as result quality can remain acceptable even in the presence of soft errors (e.g., data analysis applications, multimedia applications). Tolerance can also be built into the application, resolving data corruptions in software during application execution. By contrast, today's optical networks hold on to a rigid error-free standard, which imposes limits on network performance scalability. In this work we propose high-bandwidth, low-latency approximate networks with the following three features:(1) Optical links that exploit multi-level quadrature amplitude modulation (QAM) for achieving high bandwidth, (2) Avoidance of forward error correction (FEC), which makes optical link error-prone but affords lower latency, and(3) The use of symbol mapping coding between bit sequence and QAM to ensure data integrity that is sufficient for practical soft-error-tolerant applications. Discrete-event simulation results for application benchmarks show that approx networks achieve speedups up to 2.94 when compared to conventional networks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/09944142043338de285575751861728d4212fd1d", "sources": [ "DBLP" ], "title": "High-Bandwidth Low-Latency Approximate Interconnection Networks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "09b6a3fcb9a7e76d8b5041b4b8f4fb39058889ba": { "authors": [ { "ids": [ "2938740" ], "name": "Marios Pomonis" }, { "ids": [ "2880350" ], "name": "Theofilos Petsios" }, { "ids": [ "1720824" ], "name": "Angelos D. Keromytis" }, { "ids": [ "1782812" ], "name": "Michalis Polychronakis" }, { "ids": [ "1806308" ], "name": "Vasileios P. Kemerlis" } ], "doi": "10.1145/3064176.3064216", "doiUrl": "https://doi.org/10.1145/3064176.3064216", "entities": [ "Central processing unit", "Code reuse", "Diversification (finance)", "GNU Compiler Collection", "Hardening (computing)", "Hypervisor", "Just-in-time compilation", "Kernel (operating system)", "Linux", "Memory corruption", "Memory protection", "Memory safety", "Plug-in (computing)", "Privilege escalation", "Software deployment", "User space", "X86", "X86-64", "XOR" ], "id": "09b6a3fcb9a7e76d8b5041b4b8f4fb39058889ba", "inCitations": [ "723931de6d91a965bc2fa24ac649291c9f1a4639", "325390173841d52f7a2791ba6b0e32ad80bf2630", "565d52ee2df2e8bc9ae5e05b416c9aaa596cbac4" ], "journalName": "", "journalPages": "420-436", "journalVolume": "", "outCitations": [ "255bdcb05805c97d973081b59bc61c649263ceae", "316eb469e6b4ca848d1b68f4a52650f880d495c6", "61504aa2d0cde80429f1c3a7809d0e084e184172", "3fa27974cade47e98993b98798f73594b902583b", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "10a59e595461de43e3183c99a380e6a35ae264bd", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "b58a85e46d365e47ce937ccc09d60fbcd0fc22d4", "0d0154d589205cc519607fbb142ecefe0f96aef0", "704e2027ecdaa9561b75a854b585336c16cea89f", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "0e039df712774fcea67f214d9b5780c1dc250747", "e89f097651f2bf25ceac9c644c754f8c94a42240", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "61071f5483b56f48634757a13d585c206fa28840", "1e90de11a62e926a72b10776dd08d488e5ed865f", "0988a425689f6f3700e797f4a2c18f73692573c3", "acf32e644db8c3ac54834d294bba4cf46551480a", "483266ca3a33ac23b4b1459873de08f2284d5f7a", "116eaac2e498bc2c9bea10ea838309dcf143d764", "08c3e50a2913da51ed3cdafdcfdfb488e8fa83c3", "6458f4c0c029b038ebd1d7f61005a010ac250892", "2947959aa2cfc45719fac7a54812614d1fa8707f", "1251fe24e96d5c12f868bf4584351c0ee03d55ec", "69fcc8b9cb7867c0606ba165d29ea5790c8e752d", "8a6d19bea6f04e2bf2277c7ccd61becdf2bb48e7", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "53396c842bc8a94575470fab3acb4aef91c5073d", "196d341cdfb85f1a1d2e431fc40f34604c30bb59", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "26dd91f31cc34c5bec6eaf1da1af0bd45c1f96db", "03f827395a17beb941241dbd72322705bdf79791", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "567fbe38b1e63d3e718527b3ea9918440dd703ad", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "67b086caacc543b7d30b2f006f77a315bc9572e0", "28236cff92291fd6380bb82875675cbaeb9575d5", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "71da01051534d46fb3becd0a7506b64db56efc7a", "ac9186b8f64f7c5bb76b1becdf0bcc3bd8b93dbb", "6a8f65381a627a2db6c756a7185d9106f0acefec", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "3251ddc15c1891f36a1c912179781da972851443", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "7b2cf50a197888a3eb273d0ef056e93c581aa272", "c465cddd1a514d75d5e21775a4ae972d90e81902", "3c7e73f92beb3eebc0ea1ad48235f4fd4bd6ee53", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "15f0aeddfe3f8d2a62793318cef48e203ab3b037", "9b2585f7248c8b5a22e9c816506e01060213ca85", "f0ac31c2248ef8eb597448395da6f79227ffe916", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "686150e2179840ed40a0166cba6c5d507f3aa49c", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "1de5ae8534fc76323e4d926e10dc0fc76a28a361", "36f4666c5c294548d2a9a536ed44e926172639f3", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "919be1ff612f673606ab258651b800412576619f", "0653e2ed9f683868cb4539eb8718551242834f6b", "387e571981a8ee2bd49b1f30563e3a3a215e3b65", "de5bd35339e5692002a77145d8b861940429ad77", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "216e02adde586cbc73cb0f242d580b9c5506ac86", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "592be7266ac5e1a423703242a5f976bdf05627af", "74572d07252e2f0b60b16abb931c46e819e2b448", "6a7a3033f2e9adc294633667c01689c5bed167f2" ], "paperAbstract": "The abundance of memory corruption and disclosure vulnerabilities in kernel code necessitates the deployment of hardening techniques to prevent privilege escalation attacks. As more strict memory isolation mechanisms between the kernel and user space, like Intel's SMEP, become commonplace, attackers increasingly rely on code reuse techniques to exploit kernel vulnerabilities. Contrary to similar attacks in more restrictive settings, such as web browsers, in kernel exploitation, non-privileged local adversaries have great flexibility in abusing memory disclosure vulnerabilities to dynamically discover, or infer, the location of certain code snippets and construct code-reuse payloads. Recent studies have shown that the coupling of code diversification with the enforcement of a \"read XOR execute\" (R^X) memory safety policy is an effective defense against the exploitation of userland software, but so far this approach has not been applied for the protection of the kernel itself.\n In this paper, we fill this gap by presenting kR^X: a kernel hardening scheme based on execute-only memory and code diversification. We study a previously unexplored point in the design space, where a hypervisor or a super-privileged component is not required. Implemented mostly as a set of GCC plugins, kR^X is readily applicable to the x86-64 Linux kernel and can benefit from hardware support (e.g., MPX on modern Intel CPUs) to optimize performance. In full protection mode, kR^X incurs a low runtime overhead of 4.04%, which drops to 2.32% when MPX is available.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064216", "http://nsl.cs.columbia.edu/papers/2017/krx.eurosys17.pdf", "http://www.nsl.cs.columbia.edu/papers/2017/krx.eurosys17.pdf", "https://cs.brown.edu/~vpk/papers/krx.eurosys17.pdf", "http://www.cs.columbia.edu/~theofilos/files/slides/krx.pdf", "http://www3.cs.stonybrook.edu/~mikepo/papers/krx.eurosys17.pdf", "http://cs.brown.edu/~vpk/papers/krx.eurosys17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/09b6a3fcb9a7e76d8b5041b4b8f4fb39058889ba", "sources": [ "DBLP" ], "title": "kR^X: Comprehensive Kernel Protection against Just-In-Time Code Reuse", "venue": "EuroSys", "year": 2017 }, "09da9a22e89c5e3a2e6e9f1995fc6cd2b7e92a0b": { "authors": [ { "ids": [ "33524114" ], "name": "Zi Yan" }, { "ids": [ "40086591" ], "name": "J\u00e1n Vesel\u00fd" }, { "ids": [ "28981429" ], "name": "Guilherme Cox" }, { "ids": [ "21975961" ], "name": "Abhishek Bhattacharjee" } ], "doi": "10.1145/3079856.3080211", "doiUrl": "https://doi.org/10.1145/3079856.3080211", "entities": [ "Cache coherence", "Operating system", "Page table", "Page view", "Physical address" ], "id": "09da9a22e89c5e3a2e6e9f1995fc6cd2b7e92a0b", "inCitations": [ "085fcd6daf90838781c264b1d892aff625aa53c1", "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "eec3759577ba749915dca6aff1de6ad7f0bac741" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "430-443", "journalVolume": "", "outCitations": [ "71456379e8ed03e612cf870031a3e0473d446686", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "343a384d5476ead9496f96559aba5ad09e95e01e", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "bd1b8a27acb66ade711f08e59bb11f50593f3f21", "0e2a26def9432e19fb96dcf8956d7e786da0d814", "b0cd27efc4c73578e7fbabebfca173e00ac73574", "4e7a9d7d8d3ed4a9e02043554567949d6a000b7d", "60f9d8874d8679b94896160bd3a8bf4b02d8b883", "671958087f3c24e7b025019476be8918302270e2", "05a55820da0430f3b7e68f54bcb2cb6427c8cf28", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "bd5cf72e2843ee2b749a5f790d1332c508ec9c0c", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "2637871fde2a6393e8c63c924721bdc6303a2e70", "2fee80acb6f7b4172622e0f40d350339ca4e3dc9", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "4b82766a16aa951020e43d6f70b5cf097a6b353c", "28f5de9effd9bf21220e81984710489b78608cc6", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "d875686d4b910315859db0bc477875cc8d1c1acd", "533d720a8542b707c316d39cf5beeb58738af86d", "c7d09a304c5de8050c4b97566d05d50048961ef5", "10f1faeec4ee2158b8535b249a20de5419998153", "19554445f1f3ea7b54be06a74a0d0840ade02be5", "088598cf41bf76d2ec0c52c14dcdf4f8bbdfc028", "68635c5d25ec3001548b14cf267391b16ad5aa28", "09d97251a2932b6a3c1c2009f820d55f281433b9", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "6565cc1520fcaf69205a2c5d4d9a1065e7c6bd5b", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633", "59ca42e1911be417863d0f7068b89e1e59189cc9", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "89b11dc5ec54d088be960e305aa442ff565fbfd9", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "a58445c48c3402305e92ff7cb7eaa9641a56ca6f", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "71a2d8c473f13d0c664f751db97e81128281b1eb", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "0653e2ed9f683868cb4539eb8718551242834f6b", "3142f44d2ab6153f9eb263f78fb6e09411c482ec", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "2a660e81e6501ec3489d962fe87448ecf277237f", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "1d68841b2b139b5f437652ad37780eea577bb15b", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "33196b69eeec351efd5178eae5da92979bdc6fd7", "5fff6219bc66df34ef8dbdf00ae6848f69583883", "5389fccd8e6679331eb4042d34f53ca8af3b9f5e", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "5ece19ddc8abc5454426deece280d0750972c2da", "378782a827933059f9f91e6e29aac84bd0857828", "294ad206a120a519cfd99294c8b5e004dcc06abf" ], "paperAbstract": "To improve system performance, operating systems (OSes) often undertake activities that require modification of virtual-to-physical address translations. For example, the OS may migrate data between physical pages to manage heterogeneous memory devices. We refer to such activities as page remappings. Unfortunately, page remappings are expensive. We show that a big part of this cost arises from address translation coherence, particularly on systems employing virtualization. In response, we propose hardware translation invalidation and coherence or HATRIC, a readily implementable hardware mechanism to piggyback translation coherence atop existing cache coherence protocols. We perform detailed studies using KVM-based virtualization, showing that HATRIC achieves up to 30% performance and 10% energy benefits, for per-CPU area overheads of 0.2%. We also quantify HATRIC's benefits on systems running Xen and find up to 33% performance improvements.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080211", "https://arxiv.org/pdf/1701.07517v2.pdf", "http://paul.rutgers.edu/~jv356/pub/ziyan-isca17.pdf", "http://arxiv.org/abs/1701.07517", "https://arxiv.org/pdf/1701.07517.pdf", "https://arxiv.org/pdf/1701.07517v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/09da9a22e89c5e3a2e6e9f1995fc6cd2b7e92a0b", "sources": [ "DBLP" ], "title": "Hardware translation coherence for virtualized systems", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "09dcfd29a20858cc11866778afdf53da4cb6459b": { "authors": [ { "ids": [ "38152002" ], "name": "Asif R. Khan" }, { "ids": [ "1695250" ], "name": "Hector Garcia-Molina" } ], "doi": "10.1145/3035918.3064055", "doiUrl": "https://doi.org/10.1145/3035918.3064055", "entities": [ "Amazon Mechanical Turk", "Crowdsourcing", "The Turk" ], "id": "09dcfd29a20858cc11866778afdf53da4cb6459b", "inCitations": [ "0bcb4ab04e1ebabbf67a38bd7b20b8c660018add" ], "journalName": "", "journalPages": "1447-1462", "journalVolume": "", "outCitations": [ "6c3c36fbc2cf24baf2301e80da57ed68cab97cd6", "761115e9b9564f7ad1f5ca8fe531fabcad7dfbea", "371fc532e70dc5a72c49eb3503ae1f707f38ea7d", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "6953420c593842697dd09bc2cf7ffbbaf67a6e8e", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "1ed3aea4af6986e5f5fc9fc4d596681bf10a9e67", "7ac5425b837a5e889847afdaf04c0241b9b0b4ba", "6ac7262028d905ad97bbabce37c610e5d84e4c6f", "8d4594b4d4827f44b57863376d54536112b7aaca", "ba24bd33caaa38127664f86d1bb486ac508b8a47", "060bdc422872e375bbef9b1cd82fbf0f936d4691", "74b42722aab4eea20f86a9fc6229e55db618e568", "0cfec22c458e7ce41709dc07cc1408f9184bba7a", "0aaa7e6b5fcd526f9e217dfa0133d338cca297d5", "7783fd2984ac139194d21c10bd83b4c9764826a3", "8a0b267493ac9510e47ceb4bcebb6d202b2f89a5", "7bbdb1803788a0e0cf8b814ed12a8f87e544b6ec", "b5e3beb791cc17cdaf131d5cca6ceb796226d832", "7e402f75d09b2de259e14e07cdb88d40e3128e7c", "22f516acc61967369ec29d4121c7b517d5f60e08", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "f5d9c0182d8578f7c0a99ad9bdd4ff62e5f7c68d", "40c6e880da4fa9d3266e2ce60342b20a1eca106e", "743a4a699ef01786a7321291ff8ca94b7bde7780", "71b76a4de9b1364be2905278b69e0912c5f70b3a", "1456c5b5676f1353f0e2f794eef995bc10759682", "ddff18418de90e64e2a56580ac2507a4322c59e9", "307192581628f00315df9e088dca7cafc8f4f38e", "79f9e562470ea00d1bba08fd1fba8cc46d96f211", "151673abe01271dc3fc37725c02e95e7970f3bed", "0c93701fdffd928cf1725e1ca3286bd031bcfeef", "88cd4becf3587a8378e450a99ded801fbdb264e1", "a84f8b5f6fc5af05564353e2fa5d456e34059aa0", "ae1a0331abf71a21205d4c4971db30ab59f5f473", "cb035d8eb21dd6641234c1120fff570cf93bac76", "526ece284aacc3ab8e3d4e839a9512dbbd27867b", "83b5d51ab2f0c526dbe640f3b3d9b3b257658dc8", "1a7651308dbbdf91ccf07886b016b5fa8678fd27" ], "paperAbstract": "In this paper, we present CrowdDQS, a system that uses the most recent set of crowdsourced voting evidence to dynamically issue questions to workers on Amazon Mechanical Turk (AMT). CrowdDQS posts all questions to AMT in a single batch, but delays the decision of the exact question to issue a worker until the last moment, concentrating votes on uncertain questions to maximize accuracy. Unlike previous works, CrowdDQS also (1) optionally can decide when it is more beneficial to issue gold standard questions with known answers than to solicit new votes (both can help us estimate worker accuracy, but gold standard questions provide a less noisy estimate of worker accuracy at the expense of not obtaining new votes), (2) estimates worker accuracies in real-time even with limited evidence (with or without gold standard questions), and (3) infers the distribution of worker skill levels to actively block poor workers. We deploy our system live on AMT to over 1000 crowdworkers, and find that CrowdDQS can accurately answer questions using up to 6x fewer votes than standard approaches. We also find there are many non-obvious practical challenges involved in deploying such a system seamlessly to crowdworkers, and discuss techniques to overcome these challenges.", "pdfUrls": [ "http://ilpubs.stanford.edu:8090/1152/1/CrowdDQS_032117.pdf", "http://doi.acm.org/10.1145/3035918.3064055", "http://ilpubs.stanford.edu:8090/1148/2/CrowdDQS_11112016.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/09dcfd29a20858cc11866778afdf53da4cb6459b", "sources": [ "DBLP" ], "title": "CrowdDQS: Dynamic Question Selection in Crowdsourcing Systems", "venue": "SIGMOD Conference", "year": 2017 }, "09f67349e0779c565ab36b49b3aec09b8ab024b3": { "authors": [ { "ids": [ "2496920" ], "name": "Claude Fachkha" }, { "ids": [ "3223999" ], "name": "Elias Bou-Harb" }, { "ids": [ "2597530" ], "name": "Anastasis Keliris" }, { "ids": [ "1719861" ], "name": "Nasir D. Memon" }, { "ids": [ "1710123" ], "name": "Mustaque Ahamad" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "BACnet", "Cyber-physical system", "DNP3", "Holism", "Modbus", "Network telescope", "Statistical model", "Time series", "Undocumented feature" ], "id": "09f67349e0779c565ab36b49b3aec09b8ab024b3", "inCitations": [ "4d9f12b198bfc9911b60d485b13e863b29985313", "81d71d2cfe50acb8cc8d523add2e69de420f7e6f" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3f5e13e951b58c1725250cb60afc27f08d8bf02c", "334ca522814ed02d73c62a114ea2026847b81c90", "a6a57d60eb5c4e22a727cb8b9dbfbeb88ba351b0", "6514dd694c795ceca0dbf31729e16b9e45031829", "050430cfb42d564f37e42618b66892dac6588e22", "05d268ae5975c4452aaad80de6cac9ec2210613f", "0363de2ed14c7df55a88493ccad122d2bd0a1302", "140b0e1731516581d07b61c9fcdfe42c7e261c15", "62a02408ed8c49401e7d87c7725bd703c928d202", "5b629db1000c1836908f7ab32f5165d33c4d2578", "6042c6a57fada82340ecb359647e16f0150c6e93", "1d5053c61d429fab245809199c2006c39c5c6be7", "1b5c9d1fbef10f8fac349ad2037a011630fcb9ce", "24e9c86f6aba84a1a4b1e266cd60f689a6d0f98e", "cd9fdb1ffb358a5e4a69c6de50f4a75968b75dd4", "b857ed131a7b7cfe5811d8e8dbc551b0f93bab74", "3c69ed9cfb2cdea79f08f55c91e47a9b1f083e8d", "5d2a3d98ca91573d9186d637abdaa6738584983a", "81a9bb0db4573f243ae980447c6eed0275d49d35", "8992a36af8549dbed9a9866695b520ff32a576cd", "94439e8b58651f2e137c47b76fa69244442f1b7e", "551b9048d0d86aab734817d1655bae96e2c87c53", "4f47a044ec8ec2b2e08e7d10013ce6d01b54c942", "0f7f345574b0f0c81e2fd31f1381e7c9c9b6fa96", "0be25a561378eeda3a41e86411fb83273f55b1dd", "1813a88797677e684f51d72e349dae10674c087f", "80a965eca262831c7a064ed7eee91b118f8e84ce", "0112891050537d4f587529c396c8b9855796d182", "a95a3d54a039b14ef074804d38f53e039e65edd7", "3325cdfb66a03aad5a6d0b19840f6bdb713d0e7a", "94917e2c031ea8abd8eb4986009d470598c561d8", "4df88162559e8144f87b5f51f20d2199a55065f1", "03227f9c3be6dd2e0202d97ca3a9522d3d58719c", "18f355d7ef4aa9f82bf5c00f84e46714efa5fd77", "420ea8443b80ec456ed4beb88f4296254fe45107", "b0b36e0800de1bcf64e577aa1300e1973877b2c1", "dbb32debfe2993f9a4ad17cff7cb73be278655de", "0434d85ae9f151bd3e5e0aa9a58a47842987360d", "10c06a20200ebd51b6533536fb0b7ff824e37542", "e79afeb77812e77c8ceea88eaae00caeed12b213", "099f225edc2afc33a43ccdc231fa242d6f6c1f94", "0516cd097791bcada1e199c4e7b3d3b126f80471", "b50198b3550a0a2ebf96874962b8ecf63132804d", "17bb76f79a0aca5abc36096bcb36c2611c0d1d71", "d23c357b502f75908a85ce498936753d989a1aa4", "52012bd4deee9009ce397bb557342138b9ff0952", "051da9a3595d63de898482840cc668c2cd23f277", "0bf505602a87024369210279890aff77058fcdd2", "a1352af5cdf57823a772efb81b93307db709c128", "46af1c67d5310c2d713e1687dc99f7c4d965a834", "8ed5e272c9df14c9fd960dfd442b8b3e4f10602d", "2fc92fe53d0523b7495d1641cb5ce906bd7c6724", "2a6f6c2df88928ff3ffdcd4a276a28c1ff3e4e7b", "1ad9015c60ec25bc771f7a76c0aa6317026ef7ab", "14b36683a23ffbe19d351dfcfc24a155c81cc8e5", "2241e019db35c1cbca57e151a50a9bd9cfecf53c", "171abf2044e45051650ca40495b04300ef7c5d0f", "d23e8633e369015ec8f64dd7793b27685bef0095", "268a656d930fd8dad5e42a0e0d093eff17d0b40f", "6dbcb713cffe29425b67ea36e82b93cd9a883705", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "1c482680fd3cc83bf647352de9daae69e089ad9b" ], "paperAbstract": "Although the security of Cyber-Physical Systems (CPS) has been recently receiving significant attention from the research community, undoubtedly, there still exists a substantial lack of a comprehensive and a holistic understanding of attackers\u2019 malicious strategies, aims and intentions. To this end, this paper uniquely exploits passive monitoring and analysis of a newly deployed network telescope IP address space in a first attempt ever to build broad notions of real CPS maliciousness. Specifically, we approach this problem by inferring, investigating, characterizing and reporting large-scale probing activities that specifically target more than 20 diverse, heavily employed CPS protocols. To permit such analysis, we initially devise and evaluate a novel probabilistic model that aims at filtering noise that is embedded in network telescope traffic. Subsequently, we generate amalgamated statistics, inferences and insights characterizing such inferred scanning activities in terms of their probe types, the distribution of their sources and their packets\u2019 headers, among numerous others, in addition to examining and visualizing the co-occurrence patterns of such events. Further, we propose and empirically evaluate an innovative hybrid approach rooted in time-series analysis and context triggered piecewise hashing to infer, characterize and cluster orchestrated and well-coordinated probing activities targeting CPS protocols, which are generated from Internet-scale unsolicited sources. Our analysis and evaluations, which draw upon extensive network telescope data observed over a recent one month period, demonstrate a staggering 33 thousand probes towards ample of CPS protocols, the lack of interest in UDP-based CPS services, and the prevalence of probes towards the ICCP and Modbus protocols. Additionally, we infer a considerable 74% of CPS probes that were persistent throughout the entire analyzed period targeting prominent protocols such as DNP3 and BACnet. Further, we uncover close to 9 thousand large-scale, stealthy, previously undocumented orchestrated probing events targeting a number of such CPS protocols. We validate the various outcomes through cross-validations against publicly available threat repositories. We concur that the devised approaches, techniques, and methods provide a solid first step towards better comprehending real CPS unsolicited objectives and intents.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/ahamad_ndss_17_internet_scale_probing_of_cps.pdf", "https://csaw.engineering.nyu.edu/application/files/6315/0851/8339/CSAW17_paper_149.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/internet-scale-probing-cps-inference-characterization-and-orchestration-analysis/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0d17/372688aed300d41c4fb0eb0c6b131f7e2058.pdf", "s2Url": "https://semanticscholar.org/paper/09f67349e0779c565ab36b49b3aec09b8ab024b3", "sources": [ "DBLP" ], "title": "Internet-scale Probing of CPS: Inference, Characterization and Orchestration Analysis", "venue": "NDSS", "year": 2017 }, "0a029cb87a988ae5ad6ddd68df4e71cb44abdc98": { "authors": [ { "ids": [ "5494188" ], "name": "Magnus Madsen" }, { "ids": [ "3284421" ], "name": "Ondrej Lhot\u00e1k" }, { "ids": [ "1746015" ], "name": "Frank Tip" } ], "doi": "10.1145/3133910", "doiUrl": "https://doi.org/10.1145/3133910", "entities": [ "Asynchronous I/O", "Asynchrony (computer programming)", "Blocking (computing)", "Cognitive dimensions of notations", "Computation", "Correctness (computer science)", "Debugging", "ECMAScript", "Exception handling", "HTTP Strict Transport Security", "Hypertext Transfer Protocol", "Input/output", "JavaScript", "Non-blocking algorithm", "Programmer", "Stack Overflow", "Web application" ], "id": "0a029cb87a988ae5ad6ddd68df4e71cb44abdc98", "inCitations": [], "journalName": "PACMPL", "journalPages": "86:1-86:24", "journalVolume": "1", "outCitations": [ "2466407dddb5b1a15f1885721390ac24953fa39a", "c03512277e95b7055b2fb13b662916d0ebd74cfc", "0b6975dfee824f53f54281afe5755620c4ee9e92", "2ae2e8e62befd7603c66c3bd1f10d2fa23e0ffe6", "938430097d30aba246c907503d7c3d22cfe23428", "cb4160990391c9069ef08a262468d21171beae91", "31c4320abb49b83f68b09ce355df708a3c3be363", "0f30462958b56c285f37876e62f1b4543c2c3c58", "103f2107e7e66788684e51ae69bd3bf67abf5e4d", "30cd035c738768c752f29199482d24ec7e3e45b3", "53dd6297bfc0489800d78fb7163be8039b47df78", "03bb63660c3935ad2ec011a7f9e868587063f89c", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "6dc866a556ece66293d7d2e93ad7d714c03bbd05", "2133a71f4ac920792abb5dbee89db1add32a511a", "477d0e9e02968be9a9d728e34ecf71aaa409216f", "517fee3acd23e57112e0f66c5ce3dd8d034d3538", "35f53e1071db2b40146ea4bc9029dfbd82993f16", "683e445871ac1168768d8ce58ac830af918b0f40", "a74d2672e0f1bb05b321e60fffab0c003693dcef" ], "paperAbstract": "In JavaScript programs, asynchrony arises in situations such as web-based user-interfaces, communicating with servers through HTTP requests, and non-blocking I/O. Event-based programming is the most popular approach for managing asynchrony, but suffers from problems such as lost events and event races, and results in code that is hard to understand and debug. Recently, ECMAScript 6 has added support for promises, an alternative mechanism for managing asynchrony that enables programmers to chain asynchronous computations while supporting proper error handling. However, promises are complex and error-prone in their own right, so programmers would benefit from techniques that can reason about the correctness of promise-based code. \nSince the ECMAScript 6 specification is informal and intended for implementers of JavaScript engines, it does not provide a suitable basis for formal reasoning. This paper presents Î\u00bbp, a core calculus that captures the essence of ECMAScript 6 promises. Based on Î\u00bbp, we introduce the promise graph, a program representation that can assist programmers with debugging of promise-based code. We then report on a case study in which we investigate how the promise graph can be helpful for debugging errors related to promises in code fragments posted to the StackOverflow website.", "pdfUrls": [ "https://plg.uwaterloo.ca/~mmadsen/papers/oopsla17/paper.pdf", "http://doi.acm.org/10.1145/3133910", "http://www.franktip.org/pubs/oopsla2017promises.pdf", "https://plg.uwaterloo.ca/~olhotak/pubs/oopsla17b.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a029cb87a988ae5ad6ddd68df4e71cb44abdc98", "sources": [ "DBLP" ], "title": "A model for reasoning about JavaScript promises", "venue": "PACMPL", "year": 2017 }, "0a14b9ba6b3178c8a04cc3b618f101ab8951eb27": { "authors": [ { "ids": [ "2070759" ], "name": "Paul Pearce" }, { "ids": [ "2534561" ], "name": "Roya Ensafi" }, { "ids": [ "13154862" ], "name": "Frank Li" }, { "ids": [ "1800154" ], "name": "Nick Feamster" }, { "ids": [ "1744800" ], "name": "Vern Paxson" } ], "doi": "10.1109/SP.2017.55", "doiUrl": "https://doi.org/10.1109/SP.2017.55", "entities": [ "Denial-of-service attack", "IP address spoofing", "Internet", "Onset (audio)", "Pervasive informatics", "Reachability", "Scalability" ], "id": "0a14b9ba6b3178c8a04cc3b618f101ab8951eb27", "inCitations": [ "41f234a5b1e43984515aefd0ad9a8b02782466e9", "e7432c18f29b268c530a6cd57a0d935d282ab851", "2b646e8cd3a94239d3f473062900bde801808272", "ccad1f05fbba99278f9cf65454961c8c43ea5506", "e97353eca255c4d9e8bc9d75b782ac573b53bf4f", "035d1ab0cc209b3650ca120f6c83845141cd137e", "6bc343c1c08791dc5bea33a393ed8ea0bc5d5c26" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "427-443", "journalVolume": "", "outCitations": [ "16e2e232449755bad0ab22c3cf51f511cdb5cd3a", "1a29dd17b602bbbb487285fc6aa5fa2bb9bf8649", "294bdb204d649cae0823760095dffdd32f6c837e", "791382f7dc39154ec39ea249493d5f653b739df4", "40c10518adb442896a4d4fdcd3a7b127fd38c672", "35c69dce589285144a5dac042a322f17bb54cec8", "62a13d8ce14fe16365862828122e08c944aa1c65", "099d12674f378461b54c4472d36f0c867502f338", "b79b67422e106352aabfbeb1451071be1ae6ca1f", "754d3aa641d9da8e50796c3c4015fa064f10c1ba", "fa4726dfbbb48530d849fecf7ba8c95552cd5d48", "9ba0f20ce687eaa817e14653b970fadea2898d50", "52a67d797e331815894314e81614d80b9febea5b", "0112891050537d4f587529c396c8b9855796d182", "4049b2ef1f8d66d01cf0fcb99635d8aa9f78f20e", "7b102e4c57feaa1cb802b58f0f9ff2c934a82db7", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "d9253f52689802a4134af811d72c79593f3c59f8", "325590347ad5962830f9fe01346931086354e166", "397fc541620130e1aa26cbff6f4d61b6d9ecf787", "b4196f07bc3dedde11e21cc45d0931e452f598c5", "5fc52f24b81b49b6b8d0f9a8acdb00f7d1e2d6e2", "2a4cedd359ae6c0d8287a12a6a5dd8522ff614c7", "1085045094f17ccdc8c4b25d28a257af98a0e38b", "49cd1030cd8f98b6ec7545750c78d580ca80a43d", "c82e79459a998f9048aa8d902142a0d6e1c69651", "33e3657e4f0ea6a47d126f33a6e529817d92fc5d", "1ff9b151019648eaea901ee3c2b795e921358b21", "20423c7282aa2b940758ae5f0af71b795b40cf51", "807c486f37a114bc726ca9457b3c25139313f9c1", "2bf377008d579c84aaa61db527a123ad72a0a8b1", "22c55552488207ee224f22a840115026403e6b60", "9a1f87dc27c6ae4605ec82cfd42fec872c89408b", "4bc403451dacc6cae14d1f22ef13f7ddd07055b5" ], "paperAbstract": "Anecdotes, news reports, and policy briefings collectively suggest that Internet censorship practices are pervasive. The scale and diversity of Internet censorship practices makes it difficult to precisely monitor where, when, and how censorship occurs, as well as what is censored. The potential risks in performing the measurements make this problem even more challenging. As a result, many accounts of censorship begin—and end—with anecdotes or short-term studies from only a handful of vantage points. We seek to instead continuously monitor information about Internet reachability, to capture the onset or termination of censorship across regions and ISPs. To achieve this goal, we introduce Augur, a method and accompanying system that utilizes TCP/IP side channels to measure reachability between two Internet locations without directly controlling a measurement vantage point at either location. Using these side channels, coupled with techniques to ensure safety by not implicating individual users, we develop scalable, statistically robust methods to infer network-layer filtering, and implement a corresponding system capable of performing continuous monitoring of global censorship. We validate our measurements of Internet-wide disruption in nearly 180 countries over 17 days against sites known to be frequently blocked, we also identify the countries where connectivity disruption is most prevalent.", "pdfUrls": [ "https://www.cs.princeton.edu/~rensafi/papers/Pearce-Ensafi-Augur.pdf", "https://people.eecs.berkeley.edu/~pearce/papers/augur_oakland_2017.pdf", "http://www.icir.org/vern/papers/oakland_2017_augur.pdf", "https://www.cs.princeton.edu/~rensafi/papers/Augur-Oakland17.pdf", "https://people.eecs.berkeley.edu/~frankli/papers/augur_oakland_2017.pdf", "https://doi.org/10.1109/SP.2017.55" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a14b9ba6b3178c8a04cc3b618f101ab8951eb27", "sources": [ "DBLP" ], "title": "Augur: Internet-Wide Detection of Connectivity Disruptions", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0a34b4e799ee212768af6b50adb2f1854bd98c82": { "authors": [ { "ids": [ "9919012" ], "name": "Lechanceux Luhunu" }, { "ids": [ "2873716" ], "name": "Eugene Syriani" } ], "doi": "10.1145/3136014.3136021", "doiUrl": "https://doi.org/10.1145/3136014.3136021", "entities": [ "Acceleo", "Code generation (compiler)", "EGL (OpenGL)", "Metamodeling", "Microsoft Jet Database Engine", "Model transformation", "Model-driven engineering", "Persistence (computer science)", "Serial communication", "Socket AM2", "Software documentation", "Velocity" ], "id": "0a34b4e799ee212768af6b50adb2f1854bd98c82", "inCitations": [ "fdea9d38287cb0d5d3b3bf77a6c7b738ff7625cf" ], "journalName": "", "journalPages": "206-216", "journalVolume": "", "outCitations": [ "6fe3feeb20df3f9f31d1f7cfda8e23173a760a96", "7ecaca8db190608dc4482999e19b1593cc6ad4e5", "1c22f6393cc547c60230e5e84f4ff33107290605", "7a7474f0405f9bd1e5207b4994ddc7b25e6f0815", "8a10703b8f0ec86131f48f22bf85aba10659f80b", "e4bc2da18ebf1b7f212ea16c2de4510da9317bf5", "ebeb5026cc5c6cf496441887f8b5bd0e36ff987b", "185ff2ac221bc91cc1c17c8204c794117d9f82a7", "6e21b66e4afb9fd90df1b8cba5ca41b35cc4f1c9", "35ecbcdc1b98e121b0a2cc48856b095d808505e9", "31ca5b60f90e5d9be6387a01ef79930aacbe3ae5", "c5d695c94426a18694cb365e6ff2fb7896f0c8ee" ], "paperAbstract": "A critical step in model-driven engineering (MDE) is the automatic synthesis of a textual artifact from models. This is a very useful model transformation to generate application code, to serialize the model in persistent storage, generate documentation or reports. Among the various model-to-text (M2T) paradigms, template-based code generation is the most popular in MDE. This is supported by over 70 different tools, whether they are model-based (e.g., Acceleo, EGL) or code-based (JET, Velocity). To help developers in their difficult choice of selecting the M2T tool, we compare the expressiveness power and performance of the nine most popular tools spanning the different technological approaches. We evaluate the expressiveness based on common metamodel patterns and evaluate the performance on a range of models that conform to a metamodel composed by the combination of these patterns. The results show that MDE-based tools are more expressive, but that code-based tools are more performant. Xtend2 offers the best compromise between the expressiveness and the performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136021" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a34b4e799ee212768af6b50adb2f1854bd98c82", "sources": [ "DBLP" ], "title": "Comparison of the expressiveness and performance of template-based code generation tools", "venue": "SLE", "year": 2017 }, "0a36d8472e77a036351006e4732834b50434b541": { "authors": [ { "ids": [ "3089873" ], "name": "Ryan Beckett" }, { "ids": [ "38981616" ], "name": "Ratul Mahajan" }, { "ids": [ "3105241" ], "name": "Todd D. Millstein" }, { "ids": [ "1695132" ], "name": "Jitendra Padhye" }, { "ids": [ "1832958" ], "name": "David Walker" } ], "doi": "10.1145/3062341.3062367", "doiUrl": "https://doi.org/10.1145/3062341.3062367", "entities": [ "Abstract type", "Algorithm", "Border Gateway Protocol", "Correctness (computer science)", "Evolving networks", "Experiment", "Fault tolerance", "High- and low-level", "Network topology", "Routing", "Universal instantiation" ], "id": "0a36d8472e77a036351006e4732834b50434b541", "inCitations": [ "40257a8618b120681d29f758304c6aa2827093d4", "659fb2739513bf565c7225ef8b9468a836675260", "4706fc1a66d70dbc63f1f782cc3e1cc6e56be3e6" ], "journalName": "", "journalPages": "437-451", "journalVolume": "", "outCitations": [ "17059e939aa051d7db57f4af959b2af21fa3dd18", "08a572c06bdaa78d85a287111832d188e8e07f0b", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "0dd046fd2f1ba04690c1f41be83326cbf6c4897b", "58099048c6dd8b6a7dcfac0855deb546e50024b3", "6c10b6e6e098dfc52e48023e2db0709ce140fbc6", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "c24809e301b30cb1dcc1da4ee14e4e1f87dd742b", "0355a7b4c66e42b73fa3d0d7198ce68b2dbcd5be", "42e5e97272ad8728749f861ed7a920707e698778", "7e0ffb6ee0c188193e37e65d935e36cc2811ca41", "9fb35946a52007b708851f42b10b429e0611cb4d", "663e064469ad91e6bda345d216504b4c868f537b", "4f21cbaa02e89b7aed6c405232ca9b804ca748cb", "58f692e9b03cb973355aab46bb6f867239aeb513", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "2f5e593d29a5eb8b3f7c65e4e5c740b792933757", "273de61c65c39e0e55942ea166a473e63ddaa02c", "058f6752d85a517aae298586fdf117acdd7560ea", "24e10a0f77ef92eb86d26108ebe725178bc0bf94", "20f38a5d49473d999e3bafd25c9808c3f564154b", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "3651da2bc6e5b79d39f583e47b9f998a41e98794", "6f4617c67263cb4e9a185a9a35781a9c4b3cc9d4", "3c903855e111dc5a2bbd38e64f4a199f14fc29fd", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "6bf2daa2760a46b6d53c0d0a9776331e8a6d024c" ], "paperAbstract": "We develop Propane/AT, a system to synthesize provably-correct BGP (border gateway protocol) configurations for large, evolving networks from high-level specifications of topology, routing policy, and fault-tolerance requirements. Propane/AT is based on new abstractions for capturing parameterized network topologies and their evolution, and algorithms to analyze the impact of topology and routing policy on fault tolerance. Our algorithms operate entirely on abstract topologies. We prove that the properties established by our analyses hold for every concrete instantiation of the given abstract topology. Propane/AT also guarantees that only incremental changes to existing device configurations are required when the network evolves to add or remove devices and links. Our experiments with real-world topologies and policies show that our abstractions and algorithms are effective, and that, for large networks, Propane/AT synthesizes configurations two orders of magnitude faster than systems that operate on concrete topologies.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062367", "http://www.cs.princeton.edu/~dpw/papers/propane-at-pldi17.pdf", "http://web.cs.ucla.edu/~todd/research/pldi17.pdf", "https://ratul.org/papers/pldi2017-propaneat.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a36d8472e77a036351006e4732834b50434b541", "sources": [ "DBLP" ], "title": "Network configuration synthesis with abstract topologies", "venue": "PLDI", "year": 2017 }, "0a3b20d8feea9d4017d0c5123bc37a2cf7f463d0": { "authors": [ { "ids": [ "8481999" ], "name": "Hongyu Miao" }, { "ids": [ "3303304" ], "name": "Heejin Park" }, { "ids": [ "2421588" ], "name": "Myeongjae Jeon" }, { "ids": [ "3257164" ], "name": "Gennady Pekhimenko" }, { "ids": [ "1766093" ], "name": "Kathryn S. McKinley" }, { "ids": [ "1774176" ], "name": "Felix Xiaozhu Lin" } ], "doi": "", "doiUrl": "", "entities": [ "Cascading Style Sheets", "Concurrency (computer science)", "Data structure", "Distributed computing", "Epoch (reference date)", "Locality of reference", "Memory hierarchy", "Multi-core processor", "Parallel computing", "Stream processing", "Throughput" ], "id": "0a3b20d8feea9d4017d0c5123bc37a2cf7f463d0", "inCitations": [ "be867d2468032757460d7085f8ef00e479c5d82e", "194e35844f896d610a5dbf4a802a04a8ec7e4fd7" ], "journalName": "", "journalPages": "617-629", "journalVolume": "", "outCitations": [ "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "4b65024cd376067156a5ac967899a7748fa31f6f", "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "8375f40706943a50094acf909849a6bc611fe5e9", "1ab74d44982409beeca21efb2dbcb97a5c7de4b2", "178b92c9d7438aa44949a4f5441e83f8a9de3ccb", "5208060771fd213eefd827e3e1260b939f1aed6d", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "65da29a03c8905cbc0614612d1632864336c4786", "0608d9937c074520cdc93cc444cc1c77039c5332", "4f90ed641debc15b2ab27c868280cad41101b318", "13125ac8226fe3fc297d4880012fd3531f1305bd", "7e7b6249b598d9a4c63394e3a2efd008268ae851", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "67a77d3242a357571541d39c26d305c5dda89445", "463bec3d0298e96e3702e071e241e3898f76eff2", "5850ead375e41daceeca0efd7d02b7f6e70578c1" ], "paperAbstract": "Stream analytics on real-time events has an insatiable demand for throughput and latency. Its performance on a single machine is central to meeting this demand, even in a distributed system. This paper presents a novel stream processing engine called StreamBox that exploits the parallelism and memory hierarchy of modern multicore hardware. StreamBox executes a pipeline of transforms over records that may arrive out-of-order. As records arrive, it groups the records into ordered epochs delineated by watermarks. A watermark guarantees no subsequent record\u2019s event timestamp will precede it. Our contribution is to produce and manage abundant parallelism by generalizing out-of-order record processing within each epoch to out-of-order epoch processing and by dynamically prioritizing epochs to optimize latency. We introduce a data structure called cascading containers, which dynamically manages concurrency and dependences among epochs in the transform pipeline. StreamBox creates sequential memory layout of records in epochs and steers them to optimize NUMA locality. On a 56-core machine, StreamBox processes records up to 38 GB/sec (38M Records/sec) with 50 ms latency.", "pdfUrls": [ "http://www.cs.toronto.edu/~pekhimenko/Papers/StreamBox.pdf", "https://engineering.purdue.edu/~miaoh/papers/atc17.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-miao.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_miao_0.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/miao", "http://www.cs.utexas.edu/users/mckinley/papers/stream-box-act-2017.pdf", "https://wiki.itap.purdue.edu/download/temp/pdfexport-20170906-060917-1749-14385/CES-computerengineeringgraduateseminars-060917-1749-14386.pdf?contentType=application/pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d3c6/58cab74307d8a355b29a1af09cc146ac76f5.pdf", "s2Url": "https://semanticscholar.org/paper/0a3b20d8feea9d4017d0c5123bc37a2cf7f463d0", "sources": [ "DBLP" ], "title": "StreamBox: Modern Stream Processing on a Multicore Machine", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0a3ce42de0e5c126309ca3dfaf61ca1d259af5d7": { "authors": [ { "ids": [ "2131598" ], "name": "Gaoyang Guan" }, { "ids": [ "2303622" ], "name": "Wei Dong" }, { "ids": [ "1755448" ], "name": "Yi Gao" }, { "ids": [ "27025144" ], "name": "Kaibo Fu" }, { "ids": [ "6592040" ], "name": "Zhihao Cheng" } ], "doi": "10.1145/3117811.3117825", "doiUrl": "https://doi.org/10.1145/3117811.3117825", "entities": [ "Executable", "Holism", "Internet of things", "Overhead (computing)", "Read-only memory", "Run time (program lifecycle phase)", "Source lines of code", "Top-down and bottom-up design" ], "id": "0a3ce42de0e5c126309ca3dfaf61ca1d259af5d7", "inCitations": [], "journalName": "", "journalPages": "383-395", "journalVolume": "", "outCitations": [ "3a1db3c1940bfdd4af47f1b675d75161598452c1", "20b7f73e5e9277f9818965701271fc987e486b6f", "5e792dc103134999ab13f03f488141bf977b9d42", "8182e2082f59ce12ead5c8a26b0981007449ef4c", "0651805aa6b2c4e1187d1d6faa6459eb1777ce14", "aa2f2effbb5f0bf58f9b71be8f55eecde8521292", "a9d3c73e189331ddda706806d9fe3d70be0309ce", "c53d1372c0e7a11b29549a5ddb6065454f7d36f1", "e6f167b496397f2724bd7c693c6b56250fb8265e", "ef332d0a3437a5f5f5db126c69f7b03c9f1b98dd", "0fde5e44f95150d869047c2b7268bbdd3b4ba1f1", "d47778683f252e0a19975a48306ba75b279b304f", "e46b1bcd2636cd495db96e73c8425c70827e38f8", "4383ac7d42480c9547587025908522c291436665", "330e5970507a1a62047ed737abd24e88207724d4", "2406a204ff2c02253033b9e224080516821cba30", "47ec965bb342169db604b90f0e589285ec0cd4d7", "304c025473e876625dc828a78f40aae3f93ac761", "2074c9bb69a75b6c83e3b9f842d444c6cf4da3e5", "84f25ddd053e414f239b91552410dab0adbaedad", "4196cfaf354cd0cabab206bb932c135a173bf1b9", "6f7816c4310c2ecc198010ff930e6a982eb245ba" ], "paperAbstract": "Rapid development is essential for IoT (Internet of Things) application developers to obtain first-mover advantages and reduce the development cost. In this paper, we present TinyLink, a holistic system for rapid development of IoT applications. The key idea of TinyLink is to use a top-down approach for designing both the hardware and the software of IoT applications. Developers write the application code in a C-like language to specify the key logic of their applications, without dealing with the details of the specific hardware components. Taking the application code as input, TinyLink automatically generates the hardware configuration as well as the binary program executable on the target hardware platform. TinyLink provides unified APIs for applications to interact with the underlying hardware components. We implement TinyLink and evaluate its performance using real-world IoT applications. Results show that: (1) TinyLink achieves rapid development of IoT applications, reducing 52.58% of lines of code in average compared with traditional approaches; (2) TinyLink searches a much larger design space and thus can generate a superior solution for the hardware configuration, compared with the state-of-the-art approach; (3) TinyLink incurs acceptable overhead in terms of execution time and program memory.", "pdfUrls": [ "http://www.emnets.org/dongw/pub/MobiCom17-TinyLink.pdf", "http://doi.acm.org/10.1145/3117811.3117825" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a3ce42de0e5c126309ca3dfaf61ca1d259af5d7", "sources": [ "DBLP" ], "title": "TinyLink: A Holistic System for Rapid Development of IoT Applications", "venue": "MobiCom", "year": 2017 }, "0a55dab49eac9856e68bb62ebc50b98f2d0d7dc4": { "authors": [ { "ids": [ "1798480" ], "name": "Calin Iorgulescu" }, { "ids": [ "1772480" ], "name": "Florin Dinu" }, { "ids": [ "10195630" ], "name": "Aunn Raza" }, { "ids": [ "23665691" ], "name": "Wajih Ul Hassan" }, { "ids": [ "1711100" ], "name": "Willy Zwaenepoel" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Data parallelism", "Elasticity (cloud computing)", "PageRank", "Priority queue", "SPARK", "Scheduling (computing)", "Simulation" ], "id": "0a55dab49eac9856e68bb62ebc50b98f2d0d7dc4", "inCitations": [ "a1bfd25126692416e7cddc619470da9030a5dba5" ], "journalName": "", "journalPages": "97-109", "journalVolume": "", "outCitations": [ "a43dfb040d60d0df3dbe66a52b920e05a1ac3083", "3a043714354fe498752b45e4cf429dbae0fb2558", "3e257f01e3ee71545d824a1615c35659525b856a", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "3000e77ed7282d9fb27216f3e862a3769119d89e", "090599a2caf4591c87699ad850c75554cd712937", "029068a33f6e9f9ba0ddfe5498a67e4c0d349d2f", "423549d9e46a26474bc9554530c84c244481d881", "28a9dca6faeead651539c700bef413203b2b876e", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "b6571efa4483aa00d23bbcd36930c4877548ba38", "2988e34168fa91398fa397baf823af2063893e9c", "04fe7d8276178be18afd6c17e399e8df4ab693c7", "d7704bb76d8f2598c13f338f0991ca39646afd3c", "57c2192c290fd944d6623853b695a255d06b28f8", "08f13e484e7e51831ec13076d14570ced91a50fb", "379bb3fab4073075ea955a5096196e960b909403", "0d868efa67bf06b1f784d60769c082fd9a58893e", "47b627916586fa7e0ba64f0fcdb80a5037d66dc7", "0558c94a094158ecd64f0d5014d3d9668054fb97", "43776b15c034076a36b7143d58af8e04715e41d0", "0bf963bd1fea6b6efdbfb1e829f1db562e367c11" ], "paperAbstract": "Understanding the performance of data-parallel workloads when resource-constrained has significant practical importance but unfortunately has received only limited attention. This paper identifies, quantifies and demonstrates memory elasticity, an intrinsic property of dataparallel tasks. Memory elasticity allows tasks to run with significantly less memory that they would ideally want while only paying a moderate performance penalty. For example, we find that given as little as 10% of ideal memory, PageRank and NutchIndexing Hadoop reducers become only 1.2x/1.75x and 1.08x slower. We show that memory elasticity is prevalent in the Hadoop, Spark, Tez and Flink frameworks. We also show that memory elasticity is predictable in nature by building simple models for Hadoop and extending them to Tez and Spark. To demonstrate the potential benefits of leveraging memory elasticity, this paper further explores its application to cluster scheduling. In this setting, we observe that the resource vs. time trade-off enabled by memory elasticity becomes a task queuing time vs task runtime trade-off. Tasks may complete faster when scheduled with less memory because their waiting time is reduced. We show that a scheduler can turn this task-level tradeoff into improved job completion time and cluster-wide memory utilization. We have integrated memory elasticity into Apache YARN. We show gains of up to 60% in average job completion time on a 50-node Hadoop cluster. Extensive simulations show similar improvements over a large number of scenarios.", "pdfUrls": [ "https://arxiv.org/pdf/1702.04323v1.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-iorgulescu.pdf", "https://infoscience.epfl.ch/record/227471/files/atc17-paper152.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/iorgulescu", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_iorgulescu.pdf", "https://infoscience.epfl.ch/record/225642/files/ms.pdf", "http://arxiv.org/abs/1702.04323" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0a55/dab49eac9856e68bb62ebc50b98f2d0d7dc4.pdf", "s2Url": "https://semanticscholar.org/paper/0a55dab49eac9856e68bb62ebc50b98f2d0d7dc4", "sources": [ "DBLP" ], "title": "Don't cry over spilled records: Memory elasticity of data-parallel applications and its application to cluster scheduling", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0a5e40e7f9b754882d2096598e0080ee2b40b2f1": { "authors": [ { "ids": [ "8491987" ], "name": "Zhiyi Wang" }, { "ids": [ "3329563" ], "name": "Shimin Chen" } ], "doi": "10.1145/3035918.3035956", "doiUrl": "https://doi.org/10.1145/3035918.3035956", "entities": [ "Algorithm", "Baseline (configuration management)", "Data model", "Data structure", "Geocaching", "JSON", "Missing data", "MongoDB", "PostgreSQL", "Program optimization", "Protocol Buffers", "Relational database management system" ], "id": "0a5e40e7f9b754882d2096598e0080ee2b40b2f1", "inCitations": [ "add330e246b1f4886c94e73cd53e1e95cae9dee3" ], "journalName": "", "journalPages": "883-896", "journalVolume": "", "outCitations": [ "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "2cfbdfcf3f590cbd7a6c9c4299eb42569e77697c", "0c8288f4a91bdf129491370cd3919959207b2dcb", "1156f60e40548096df49528b1342bb3e88b0f378", "03363ed04e9d4d2e8c9348551815e80615969611", "cd4b958bf9dda5f44fbb457f7bf0eca96d6563e7", "43715cdc52b75ffaaff701852deafc4736a89081", "85ee4bc37077c3809fc220a94994bef6a8bc4391", "22725ce33c721a6e75052ac5a39e567a8b363719", "33f9d19cdb5f3df4c5c36237290449d6dc0f8746", "8dbe22a1987448c3fb6470de2fe7e5976a62824b", "235864238ef9cea6a0bea7e31fbbc712dfadfb5e", "55e4ae822a9ec7ac4e07dae07877392b7045ae92", "13f6bbc21047ce4eba895c9ed308d947eca8fbee", "7400fd9304f1b4b7bfb9f4150adc5e1745afca9c", "0997037e940df06ed7a6d19f7501579aab01e829", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "0558c94a094158ecd64f0d5014d3d9668054fb97" ], "paperAbstract": "Tree-structured data formats, such as JSON and Protocol Buffers, are capable of expressing sophisticated data types, including nested, repeated, and missing values. While such expressing power contributes to their popularity in real-world applications, it presents a significant challenge for systems supporting tree-structured data. Existing systems have focused on general-purpose solutions either extending RDBMSs or designing native systems. However, the general-purpose approach often results in sophisticated data structures and algorithms, which may not reflect and optimize for the actual structure patterns in the real world.\n In this paper, we aim to better understand tree-structured data types in real uses and optimize for the common patterns. We present an in-depth study of five types of real-world use cases of tree-structured data. We find that a majority of the root-to-leaf paths in the tree structures are simple, containing up to one repeated node. Given this insight, we design and implement Steed, a native analytical database system for tree-structured data. Steed implements the baseline general-purpose support for storing and querying data in both row and column layouts. Then we enhance the baseline design with a set of optimizations to simplify and improve the processing of simple paths. Experimental evaluation shows that our optimization improves the baseline by a factor of up to 1.74x. Compared to three representative state-of-the-art systems (i.e. PostgreSQL, MongoDB, and Hive+Parquet), Steed achieves orders of magnitude better performance in both cold cache and hot cache scenarios.", "pdfUrls": [ "http://www.cs.cmu.edu/~chensm/papers/steed-sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3035956" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a5e40e7f9b754882d2096598e0080ee2b40b2f1", "sources": [ "DBLP" ], "title": "Exploiting Common Patterns for Tree-Structured Data", "venue": "SIGMOD Conference", "year": 2017 }, "0a5ff7336879c99513dca6fce6ef44984ebf3f55": { "authors": [ { "ids": [ "3283132" ], "name": "Daniel Crankshaw" }, { "ids": [ "1705489" ], "name": "Xin Wang" }, { "ids": [ "8383835" ], "name": "Giulio Zhou" }, { "ids": [ "1712149" ], "name": "Michael J. Franklin" }, { "ids": [ "30503077" ], "name": "Joseph Gonzalez" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Cache (computing)", "Clipper", "Machine learning", "Model selection", "Software deployment", "TensorFlow", "Throughput" ], "id": "0a5ff7336879c99513dca6fce6ef44984ebf3f55", "inCitations": [ "d4db99796a41f9cfa9e7918619c3f9a8c9209d37", "391a6a423e06b0767e9fc9df4f43c5533c0ab662", "bc773c95cf3ac690d74163948e4df9055fa264f8", "acaa6136ce98b086a7bcf5bed71ac34165939a08", "08b2b8bb4fdbf613209bd24813d051eb13d164d8", "093c3b389384812ea16f1ad18ce6c5f43c4f7106", "c6b9fb2b5bf87d87550e2dee0d45cda00d6a3373", "0154103e091dea574c39f3c89d52ccfefc06af6c", "1f06cd0ba8ade1716c5526202d54fab7019c5092", "66dd732e588bac4580342da21302c36270d615c5", "9fec3836b3e9554b5dc065498eb2214762a5f69b", "1bd049c431a3b763cfba63963435f6c91465cb35", "9f631cd564b38d87eb33de12657b2b7de69119a7", "9ca155165434e4dfd0832e4b325c88381dc603de", "081fdeea36d4b56a71e87b5b0de191aa368261c8", "a70ba22645eba9891e8cac8d08e36cc3d09e242b", "4853a26200889f033c0f509abf0f91d8cafba55b" ], "journalName": "", "journalPages": "613-627", "journalVolume": "", "outCitations": [ "c69f555adb1814e85d315387e99dc4a5a05f5a3e", "17ab106e2ce0657e1face65ac910eec3683674f5", "6074c1108997e0c1f97dc3c199323a162ffe978d", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "0641c61c2709ea41536cf78bcc6316fb4951b5ab", "805d0da469da6ba7571ee75732ab66202aaea9e0", "3b2697d76f035304bfeb57f6a682224c87645065", "26bc9195c6343e4d7f434dd65b4ad67efe2be27a", "046b7f6b48e4d9fcf173dea0a0802d7e87b383e1", "0154103e091dea574c39f3c89d52ccfefc06af6c", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "87e782af17ee32570ec30c1fdc2b97f33b3053f6", "7717b438da4ec3ca4247ff7abf6dd603e91fe41d", "0c4867f11c9758014d591381d8b397a1d38b04a7", "043afbd936c95d0e33c4a391365893bd4102f1a7", "e7d53f538f5239739d1f943c81d17e4a167c65c6", "3784b73a1f392160523400ec0309191c0a96d86f", "0ad8e89091eed09217e66adc98136126addc2619", "26e17f6b62a7caec660b3356d49e879e6e0eeabc", "0122e063ca5f0f9fb9d144d44d41421503252010", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "4619a9ee024cbbf49947ef50eb2fc0f3d90f9180", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "01fcae344d2edb715bcc63a40b6052c0331741bd", "3a33d36257a40d180bef5385c8586fb618fc1161", "3c513e3f47c87da19a12cc65fb809eab671bf7ee", "080aebd2cc1019f17e78496354c37195560b0697", "6f54a7933235ced5684e3bff18f7e5dc40510018", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "1eb131a34fbb508a9dd8b646950c65901d6f1a5b", "e98b7a0cc384dc0eb8769557922f7dd01c9f3f24", "09a503095db2d68b439e48d67481399198ed0e5b", "91907feaf8dfb0bda37781cf1c17a961d50960b7", "0558c94a094158ecd64f0d5014d3d9668054fb97", "74fc396d0b8ec548d600395182f12c9b06cc84e9", "0a3300d149a0f45623e5cde4f9114b9773b0054c", "0b99d677883883584d9a328f6f2d54738363997a", "65fd142f37c315cdf892184f8fb21281b88f6269", "0626908dd710b91aece1a81f4ca0635f23fc47f3", "061356704ec86334dbbc073985375fe13cd39088", "5649e65179894dc8dc641fcb9aa10dee2b2ba7c6", "1ff88585ce3fd5fdaab6573722d4874641359951" ], "paperAbstract": "Machine learning is being deployed in a growing number of applications which demand real-time, accurate, and robust predictions under heavy query load. However, most machine learning frameworks and systems only address model training and not deployment. In this paper, we introduce Clipper, a general-purpose low-latency prediction serving system. Interposing between end-user applications and a wide range of machine learning frameworks, Clipper introduces a modular architecture to simplify model deployment across frameworks and applications. Furthermore, by introducing caching, batching, and adaptive model selection techniques, Clipper reduces prediction latency and improves prediction throughput, accuracy, and robustness without modifying the underlying machine learning frameworks. We evaluate Clipper on four common machine learning benchmark datasets and demonstrate its ability to meet the latency, accuracy, and throughput demands of online serving applications. Finally, we compare Clipper to the Tensorflow Serving system and demonstrate that we are able to achieve comparable throughput and latency while enabling model composition and online learning to improve accuracy and render more robust predictions.", "pdfUrls": [ "https://rise.cs.berkeley.edu/wp-content/uploads/2017/02/clipper_final.pdf", "https://arxiv.org/pdf/1612.03079v2.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-crankshaw.pdf", "https://arxiv.org/pdf/1612.03079v1.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/crankshaw", "http://arxiv.org/abs/1612.03079", "https://arxiv.org/pdf/1612.03079.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-crankshaw.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0a5f/f7336879c99513dca6fce6ef44984ebf3f55.pdf", "s2Url": "https://semanticscholar.org/paper/0a5ff7336879c99513dca6fce6ef44984ebf3f55", "sources": [ "DBLP" ], "title": "Clipper: A Low-Latency Online Prediction Serving System", "venue": "NSDI", "year": 2017 }, "0a6d2eeaa0b75b5a21583fbd75637a1541f30487": { "authors": [ { "ids": [ "3198911" ], "name": "Adam Roegiest" }, { "ids": [ "40379164" ], "name": "Luchen Tan" }, { "ids": [ "37752900" ], "name": "Jimmy Lin" } ], "doi": "10.1145/3077136.3080808", "doiUrl": "https://doi.org/10.1145/3077136.3080808", "entities": [ "Hoc (programming language)", "Information needs", "Mobile device", "Push technology", "Social media", "Software deployment", "Usability testing" ], "id": "0a6d2eeaa0b75b5a21583fbd75637a1541f30487", "inCitations": [ "83cd8de03862e9158491d67508c191511670ff5f", "6faeacaa803a86996a4b49af71f3a3615b46afc5", "470d2203dbfe0d9b7b88e1da73d4fda8869a4b11", "163f206e49ad92590c233bb52cdbb285abcdb3da", "5e913bc0ae97d94b8fe099b950e0421b0f2e199d", "21721417b24ffe4518f3f0e71f7ea287bf46822b" ], "journalName": "", "journalPages": "415-424", "journalVolume": "", "outCitations": [ "5199dab867277dc1b63193f7f8a3086c8c31c412", "56cc933f7ce1ff7421fb69cd2d50a51c0fc1dbc0", "25015dd8bd52fb85f8bc2ef2f432736750eac63d", "4ca3a87a6c5999dfd98a09720e14732346add88f", "0806321b65562ea4e5ef037be5fd9c754fc23298", "7e43ec7ef23547dbb042768de7b77a714fc351ac", "2858655a06c9e4a17b93c73e7d4951e43a236c47", "2167054da02b0bb2dddfccfa2c60866858478da2", "b0f78fab8f70deaa0b64466b3fe77e2863d02908", "712f45af0a205bcdabd9605c9af7d307dd34d493", "99c7fcf5b89857c3d24e6ab5b6ad82febdd6a8de", "2e36d49444ecb20d0905482a873c9386ef3ec9f6", "ab0463d8aef96697ad0e4d2c86fd1b41ffd4633d", "e561b69d5da73d8ba4af611e5bfeb3783f75c6c9", "2dcfbab048b397b42e955982b0fff94e4f64620d", "46e01ee940b09077ea582cd9b689b7a2bcacfd57", "1e0eb72167944b5ed65a51b5fd4205e740513f5e", "8fa336307bd4a4f80337469e5826c0b04161a125", "a4b954f18bb8fd3d1d02acab6e9c250f569986f2", "860f07c970acb58fa093c6b961be05b39ab7f0fa", "8a9f5e61c6c977cdacd85d9a7b14153026619505", "81873a26fd9e0e40de7163067ce959e47e052bb4", "2b1a28417592fa3091e261401d0f93c7d08597f8", "5b638d89dfcb061edd9949866a00d5117a5a0191", "2876dd349607801d42610252209c14910dee98d8", "6ba99ee6761815d870e06e2964e8bb8178793ebf", "46fddc2ebc3f6758edbad54c584170429c8d1a4e" ], "paperAbstract": "Real-time push notification systems monitor continuous document streams such as social media posts and alert users to relevant content directly on their mobile devices. We describe a user study of such systems in the context of the TREC 2016 Real-Time Summarization Track, where system updates are immediately delivered as push notifications to the mobile devices of a cohort of users. Our study represents, to our knowledge, the first deployment of an interleaved evaluation framework for prospective information needs, and also provides an opportunity to examine user behavior in a realistic setting. Results of our online in-situ evaluation are correlated against the results a more traditional post-hoc batch evaluation. We observe substantial correlations between many online and batch evaluation metrics, especially for those that share the same basic design (e.g., are utility-based). For some metrics, we observe little correlation, but are able to identify the volume of messages that a system pushes as one major source of differences.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080808", "https://cs.uwaterloo.ca/~jimmylin/publications/Roegiest_etal_SIGIR2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a6d2eeaa0b75b5a21583fbd75637a1541f30487", "sources": [ "DBLP" ], "title": "Online In-Situ Interleaved Evaluation of Real-Time Push Notification Systems", "venue": "SIGIR", "year": 2017 }, "0a6fcd364bdbc911974ef9e775839e239d512e18": { "authors": [ { "ids": [ "34109206" ], "name": "Brent Stephens" }, { "ids": [ "3294190" ], "name": "Arjun Singhvi" }, { "ids": [ "1713535" ], "name": "Aditya Akella" }, { "ids": [ "9833675" ], "name": "Michael M. Swift" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Fairness measure", "Linux", "Linux", "Network interface controller", "Operating system", "Overselling", "Protocol stack", "Scheduling (computing)", "Stock and flow", "Throughput", "Titan" ], "id": "0a6fcd364bdbc911974ef9e775839e239d512e18", "inCitations": [], "journalName": "", "journalPages": "431-444", "journalVolume": "", "outCitations": [ "2de63b0c867b290d4f7217459c968aa98e5ad39d", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "77a3133097ff59bae0b6ac8fae418a58b585dacb", "022e4c238f9cf85b9d8142725c6a2adbdcca2094", "424bc408f86bada47693a2fb45369cdfaf5fbdb4", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "063e13ef8dbe06b69ecd07988898c102532a7458", "47d5357957cabb610131db1b228e58b70860ee8d", "025652412d507a8cf98ecacd8a44d32ce28995e1", "1aafc7066e52f18dee78103822da24a5d85da93c", "0d3f85933b6355789588476e491683532c68a906", "39300a6bb64f813bd233343b840cb169d8d0527f", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "65da29a03c8905cbc0614612d1632864336c4786", "132f00de21cee656d00ad6779f1926070ad59544", "0baf1bef6ee3bcb0b385a4ac303dcf0b406c64f4", "0e2249e3b0cd1fa9a7e0eee847b58be1cf2ec707", "90fe816f5af055871f63a77282d4a4849e0764d3", "3b988049dd8f62f772281e90196bbd793700c86b", "0a165c5f6c3dfbff61fb15c21f973b10f236ba2b", "7245d979a7ddd7079ea2bfb8b6f8ef4494ba1150", "5f28bf666498d5800e015f12318930ce03cd5587", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "42d1b52254873ecd0f36eb7342f95dbad9c50187" ], "paperAbstract": "The performance of an OS\u2019s networking stack can be measured by its achieved throughput, CPU utilization, latency, and per-flow fairness. To be able to drive increasing line-rates at 10Gbps and beyond, modern OS networking stacks rely on a number of important hardware and software optimizations, including but not limited to using multiple transmit and receive queues and segmentation offloading. Unfortunately, we have observed that these optimizations lead to substantial flowlevel unfairness. We describe Titan, an extension to the Linux networking stack that systematically addresses unfairness arising in different operating conditions. Across both fine and coarse timescales and when NIC queues are undersubscribed and oversubscribed, we find that the Titan can reduce unfairness by 58% or more when compared with the best performing Linux configuration. We also find that improving fairness can lead to a reduction in tail flow completion times for flows in an all-to-all shuffle in a cluster of servers.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/stephens", "https://minds.wisconsin.edu/bitstream/handle/1793/75739/TR1840.pdf?sequence=1", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_stephens.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-stephens.pdf", "http://pages.cs.wisc.edu/~brentstephens/docs/titan.atc17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/936c/aa56e89e17363d1d56b0631ac22f11fc3e8d.pdf", "s2Url": "https://semanticscholar.org/paper/0a6fcd364bdbc911974ef9e775839e239d512e18", "sources": [ "DBLP" ], "title": "Titan: Fair Packet Scheduling for Commodity Multiqueue NICs", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0a71dda39e97c9a4b8c4c88a135b9cda5bae588b": { "authors": [ { "ids": [ "2722186" ], "name": "Daiping Liu" }, { "ids": [ "1709780" ], "name": "Zhou Li" }, { "ids": [ "2146838" ], "name": "Kun Du" }, { "ids": [ "1699802" ], "name": "Haining Wang" }, { "ids": [ "33762719" ], "name": "Baojun Liu" }, { "ids": [ "39999006" ], "name": "Hai-Xin Duan" } ], "doi": "10.1145/3133956.3134049", "doiUrl": "https://doi.org/10.1145/3133956.3134049", "entities": [ "Algorithm", "Apex (geometry)", "Bootstrapping (statistics)", "Coat of arms", "Exploit kit", "Open security", "Phishing", "Rotten Tomatoes", "Spawn (computing)", "VirusTotal" ], "id": "0a71dda39e97c9a4b8c4c88a135b9cda5bae588b", "inCitations": [], "journalName": "", "journalPages": "537-552", "journalVolume": "", "outCitations": [ "05faf07051c6d4d18d2537e2ca00a66273a831ec", "121d9c737a887f7f6a03ffbc5be87b26bdad17f8", "523b3c576021827efbc54be8794953a216e77225", "11bd3633e2647a205b78f71a7a583f81cbe33b39", "0a964c5ac7e19cbdc820fd4ee101a5263385733d", "1418c58ccf2f85461dfff22f1a7ac0ce27e44f7b", "2912aa192bacb60c3764e7855a91ac8fb497093d", "2ff9c10a0a8f43306f3a0492f8d6eca744d4e7c7", "aa5b74993af58febc566cd067b94425e495b1a17", "8188a3bc844d1ea707fa476edadfc99d4c38e3e3", "7e9305eef5b3cd02c5d9e3768a5acf3c5654c8b3", "090aca02c7faf914f8cab563065fca43f8aa6118", "040f9477107816b0d0234ea49cb350befd4ce54c", "34722d89e5f33203cf1cc1465350b8696bbf3a9e", "297c46edcc9730e9177e28d7824c1cccba655fb5", "416787e6a008ee539f2f5d157863f1cd36419e7d", "6c5d03568e012a95c5a663309c8c21ff1e07e53f", "01e1cbc042ba2c0b6fa73e41cf0a5ba2e972d0cf", "49a8f9e8ed7dbd8382dbd30aa81321281cd54c07", "69349684bf61888dc9fe5ff679ff1c7572d2d535", "63b94d37f0826f65c2bc7ecc22c10da91efa6520", "06240b5f5a928d49b53e848bcbaa4bf7c6beab35", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "653eddac5447381e9a7e221498973296e8eb732c", "3032182c47b75d9c1d16877815dab8f8637631a2", "22a78f31395e79cb6c99c3cedd248ecd6568b7f7", "0bb1bb38263368784df02ca4546fba3ea12c7c0e", "6e9f6466a125587b4eeeb3845a5d8afe8b1e902c", "0bec9e9fee4bee287ed2ea1ef9059b573fb0b711", "0238bfd6a96479a45715423abaaedaa78a2e8b8a", "ba9af0bf228cedfad61daa481a71ed433076ab8d", "88a0a5f7dab7ca987d17d3bbdb4e49d3fbf439bc", "0796bb6c803e4256d7bdc0885c6a26d058da3319", "16b4a0d02f844e27a30dd722811442dd8e6a580d", "219a11f8034f5d1a60a5afee946f9df386b71b4d", "9509f45ebc129bd68ea94d55d90fee410afb8143", "6db3835143fb2c337449717005e551ef11c58fbb", "531ef3ad7b9e6d46b453547cf40182832eed2b0f", "8714e68bb306f89b3fbce3307833405b6a632487", "519a022f6103a68331402f499a9bc9447ef70995", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "2830246be09bbd376cd7f2ed9ae150110dcb08cf", "e769c99f63f526ec88e74d9683bdbc12a6e55359", "6a06f87d82975b873b3cd6130a60a26c1d0b181c" ], "paperAbstract": "Domain names have been exploited for illicit online activities for decades. In the past, miscreants mostly registered new domains for their attacks. However, the domains registered for malicious purposes can be deterred by existing reputation and blacklisting systems. In response to the arms race, miscreants have recently adopted a new strategy, called domain shadowing, to build their attack infrastructures. Specifically, instead of registering new domains, miscreants are beginning to compromise legitimate ones and spawn malicious subdomains under them. This has rendered almost all existing countermeasures ineffective and fragile because subdomains inherit the trust of their apex domains, and attackers can virtually spawn an infinite number of shadowed domains.\n In this paper, we conduct the first study to understand and detect this emerging threat. Bootstrapped with a set of manually confirmed shadowed domains, we identify a set of novel features that uniquely characterize domain shadowing by analyzing the deviation from their apex domains and the correlation among different apex domains. Building upon these features, we train a classifier and apply it to detect shadowed domains on the daily feeds of VirusTotal, a large open security scanning service. Our study highlights domain shadowing as an increasingly rampant threat. Moreover, while previously confirmed domain shadowing campaigns are exclusively involved in exploit kits, we reveal that they are also widely exploited for phishing attacks. Finally, we observe that instead of algorithmically generating subdomain names, several domain shadowing cases exploit the wildcard DNS records.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134049" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a71dda39e97c9a4b8c4c88a135b9cda5bae588b", "sources": [ "DBLP" ], "title": "Don't Let One Rotten Apple Spoil the Whole Barrel: Towards Automated Detection of Shadowed Domains", "venue": "CCS", "year": 2017 }, "0a85b3afc89958583642b7fd39b37e745a053190": { "authors": [ { "ids": [ "3071783" ], "name": "Cheng Tan" }, { "ids": [ "2490788" ], "name": "Lingfan Yu" }, { "ids": [ "1956975" ], "name": "Joshua B. Leners" }, { "ids": [ "1756078" ], "name": "Michael Walfish" } ], "doi": "10.1145/3132747.3132760", "doiUrl": "https://doi.org/10.1145/3132747.3132760", "entities": [ "Client\u2013server model", "Data deduplication", "Overhead (computing)", "PHP", "Server (computing)", "Speedup", "Web application" ], "id": "0a85b3afc89958583642b7fd39b37e745a053190", "inCitations": [], "journalName": "", "journalPages": "546-564", "journalVolume": "", "outCitations": [ "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "045943438dd45f25f0127d97ed9116b3b05914a7", "1b5e73314491f5ecbd4db9d211ca81be28e2eeb4", "0821b7efb6a47783d8bf9a62291b24d94bbaaf31", "8eb2b3334c2226460e234f411b33ccc3819d3acb", "72657b0428f9b8f705546eb5a9147203a534d8f6", "0336a45cb84a4838c2b6a81ba08adb9a473dcd98", "062008493d48ac414b45e3d989266d0574c1b3e5", "5644dd91f6e5ce01229dd94e9db9e2fba321da6e", "05568422beb1fe145094506371cda1f70ea04d25", "33460a38e8503e37a112d6179736980ced0e3148", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "30f52a79ff53f8969ffcba19013b4a43e629875f", "6ddd789f2aa0c3710ffe2c8b652a6f027ffc1bef", "133eba30fbd96f0551d692c76f4c851d4d2f9f27", "11e3bc2798fdd8e1d7a6979c3c4043147e0e3c2d", "d3a95ea430c0e33b044e9212a4857942e49621d5", "041b6be90c7abb4d1db99b1a56074ff5bc816392", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "0b5f3e446fff1c1674c6ca568f6a933631cd36fd", "1a33c542b064f95eb6fc9b0003e80ff4f1b9289a", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "25b4b94a9fc4fed86a5fb18d0bf1c2e681946e18", "38bea412ab3bc9cd5f73d6c2209252f6a1f14aad", "7b685dba8e41ad793dc107957c44a842fdee9ea4", "977d014244451a0182d5ca915dd0748f269321de", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "1d9cf87fa6d6175a2c1543afff263113657765f6", "044b95211345359657b819a7a68b3c146baac802", "05bd2c6e82a96e7bb3d7d7262f953fc53ead3d1a", "040f3b7db46a8d6cc0e9f58a807a29d9e8e13e3b", "17886b4911ffd50d7e02a574caad34a286458b3a", "114801eccb5eb0831fd1848f351a138253a42f15", "30ef5ee760f7257951d887cdcec1f87d04a1ee2b", "643a5ea2791f56ed58dcf50141301216de10bb9d", "e7ab23d011e5183db78cfea48e303210f6e57e2e", "09cb251072ef19e125ec5d94de5777584af68db5", "4096f239b93dfee8fe033db2846a334db9c1f524", "72880d15db2282512e5d3f0a3796b397d68cc7db", "29543bb7c680dde79f374e73930ca68833e2fb37", "48326c5da8fd277cc32e1440b544793c397e41d6", "25d4d5aa3c0ea9b4b1084bc50fefb05bf3f6b2e1", "ab0d8f966a6fd16865b9a459ccb5383bf58e70a3", "05f70f429a7bf38efa9e457fd486cb862bd495be", "0e578433d4e8bb2a571c87a2d22816074902f009", "266edad9a7cb024fd6a4128488de1485bfb664d8", "07c746c119b1d18e6580840b2166721e07b4433d", "f2ac9d6670fead4edf219ccf9534c1068cb8008f", "71b6a40dd2195c4e6624c8bcaf1b93f8b8f1cd89", "85d555f7ce19740b4fc656ff797623c6e1513018", "4a3f0c1b983315c863dd6f4820dc147b50ab6109", "3ff0fbebd3e0a2aa43e43963231131b9bd55336f", "764d2086f1ad5dd617c666c227708309d969fa03", "493c3746f082ec20b45053c6c5f850149c571360", "3d5f3bedca7828899b81448e9c33717dd55c36c2", "201f43d5b419070dc25cb173793a8227a075d32a", "42142c121b2dbe48d55e81c2ce198a5639645030", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "31181e73befea410e25de462eccd0e74ba8fea0b", "5d87b9a20825c62d6dbe76f95a094ba953077588", "9117e118c233c882658c0b7e6f6afb0fc1985f18", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "177ba72da171d8c741a08c75162d820c501a4f4c", "40b77a383e6254c1416c3582e98b9c9527bad691", "1d1fc81989ed1d26ba4cf6c3025ffaa740023a5d", "2c0fba0ac99f9e73de7552a1e82ac5bc528c0864", "30f08de285bc168f6326ecabc384c2e415adcb37", "47b7f413e553f8534b584c51a7cc7903b98d3c48", "0270a2b35f745f2ed17fbbac950e8086ee9aa1d6", "0a289fd7b14345822b1acda6d82750b15d59663e", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "ca4114da5e6885e907ccf094f2f469dd23f6c816", "0b77f2155333fe403218d891433534bb4bbdc3b4", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "20d1c524fd1557e76a687ffab57f96364beb6fe7" ], "paperAbstract": "You put a program on a concurrent server, but you don't trust the server; later, you get a trace of the actual requests that the server received from its clients and the responses that it delivered. You separately get logs from the server; these are untrusted. How can you use the logs to efficiently verify that the responses were derived from running the program on the requests? This is the Efficient Server Audit Problem, which abstracts real-world scenarios, including running a web application on an untrusted provider. We give a solution based on several new techniques, including simultaneous replay and efficient verification of concurrent executions. We implement the solution for PHP web applications. For several applications, our verifier achieves 5.6-10.9x speedup versus simply re-executing, with <10% overhead for the server.", "pdfUrls": [ "http://arxiv.org/abs/1709.08501", "http://doi.acm.org/10.1145/3132747.3132760", "https://arxiv.org/pdf/1709.08501v1.pdf", "http://www.cs.nyu.edu/~mwalfish/papers/ssco-sosp17.pdf", "https://cs.nyu.edu/~mwalfish/papers/ssco-sosp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a85b3afc89958583642b7fd39b37e745a053190", "sources": [ "DBLP" ], "title": "The Efficient Server Audit Problem, Deduplicated Re-execution, and the Web", "venue": "SOSP", "year": 2017 }, "0a896b93947d36632c13799ccd873006d4debfe5": { "authors": [ { "ids": [ "1758590" ], "name": "Andr\u00e9 Hernich" }, { "ids": [ "1707504" ], "name": "Carsten Lutz" }, { "ids": [ "2339999" ], "name": "Fabio Papacchini" }, { "ids": [ "1681559" ], "name": "Frank Wolter" } ], "doi": "10.1145/3034786.3056108", "doiUrl": "https://doi.org/10.1145/3034786.3056108", "entities": [ "Co-NP", "Complexity of constraint satisfaction", "Conjunctive query", "Constraint satisfaction", "Constraint satisfaction problem", "Datalog", "First-order logic", "Grammatical Framework", "Guarded logic", "NP-intermediate", "Olami\u2013Feder\u2013Christensen model", "Ontology (information science)", "P (complexity)" ], "id": "0a896b93947d36632c13799ccd873006d4debfe5", "inCitations": [ "25dbb2685ddc6ef918d00cab27174b37bde78b59", "28fdee2de3c2f8b504a4e069ae8de90c934c8d77" ], "journalName": "", "journalPages": "185-199", "journalVolume": "", "outCitations": [ "35d4d9c6dcd60996921f824c0d5993cf97bc2484", "49cdddf8d1eb9cf7050484c4c59f2128d7b24edc", "300300759be356a3a355b21bcc312d87280d93d2", "0946163c1464c18b52d8f7783e0b984cd18b4655", "6da47cb5e895a6c121eeb0225ed1f1e50507b9d1", "096ff58c6217431b38d0f235b2a68c76a43d9525", "37302f7c873301a6546a59bf80b9eb1e716dbd92", "7e08593366278f426c4cf90a16435daf1a934174", "d627789407f9a263ac7b40b73ad085e5efda81f8", "65298a45b07dbe81bd7ff297b647688e3322e3b8", "0b5591b570f035cfcf00fd3774c63900f1da84ce", "466a0b7cffa32a62ca3fe8db8fd65f363a3a6463", "f2ae19076b26df32d7df3566860befc188fc6abe", "c8a25014ee31212477a456c5243947976c8a728e", "5aca5717abfcb3fd6d344e0c62943660c16b252c", "b9a02a63671c04f5f0aa6327965c161b681afbf9", "684592ca024de5f84e3f96fc522043c2ca47b9df", "022917442e2f6406d759e523c691c9aa95a98ed7", "5ed326378ae15321234c60d41ea172acf260d988", "61f0c4e91dba45e337feb5fc46853140c87267a3", "2057029e758fd42f882a1108b8d8d269603a9439", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "686a97c0806a8590f58733afe3dedec555534041", "a1bb7f499cc059e89d1a024594d7ed5ab4f5a051", "353e9cd76026d1b5abf1631a5bcf74fbf111b2d5", "0d07b93cb2ee53cdfa27b116102f38c8e8b921f7", "4ea6563b80acd071c9acc0b7665d495f950ed519", "65783c300d71a4cb5cf34662f9e5a7119af3d2e2", "08d1aea39a9687bf9d13c0b21b767df16b97bfeb", "af21def975d12de6bf3d92570f487235c89966e5", "1ed8636cdd2892833a367a3ffb6a43cf0a8d7d76", "2132f2419f0e38d57199a5046885b04196fc0155", "2c6a4af88d24e8a1acc2ceccc06edcb4fc03cf5e", "02557d37a9f129fbe23a46b7a00a90baf7909234", "13efc5ab07db0c41c0cfc25018ef4d8a5a10c3d9", "407748e97d8d3878535f6371ad324708915bf6d9", "157df6d28bea0f3b3d053268185170d9e97baa70", "01cefd68769a905baacdf5afd82ed0f3234eeb74", "4f097062a1864f5f64592d79539ddb3a67aadd0f", "4cdf3930fabf148fae7b82a9676bd03660372023", "4ca743cd9095700f1ad11703d6ffcc1b98c4b2c1", "d4e517728690a77751358bc1bfabc559c297d62d", "7cc81c5a0baf0593d32d5418293ec1aa26efec7f", "1ebd1c8076531dbee5eba19250506f5225b07ef8", "a7018d72473daadce315a6da5d4801c12143a4e2", "21b9d5df4174cfed85a51c87e8848f9095fe3214" ], "paperAbstract": "We study the complexity of ontology-mediated querying when ontologies are formulated in the guarded fragment of first-order logic (GF). Our general aim is to classify the data complexity on the level of ontologies where query evaluation w.r.t. an ontology O is considered to be in PTime if all (unions of conjunctive) queries can be evaluated in PTime w.r.t. O and coNP-hard if at least one query is coNP-hard w.r.t. O. We identify several large and relevant fragments of GF that enjoy a dichotomy between PTime and coNP, some of them additionally admitting a form of counting. In fact, almost all ontologies in the BioPortal repository fall into these fragments or can easily be rewritten to do so. We then establish a variation of Ladner's Theorem on the existence of NP-intermediate problems and use this result to show that for other fragments, there is provably no such dichotomy. Again for other fragments (such as full GF), establishing a dichotomy implies the Feder-Vardi conjecture on the complexity of constraint satisfaction problems. We also link these results to Datalog-rewritability and study the decidability of whether a given ontology enjoys PTime query evaluation, presenting both positive and negative results.", "pdfUrls": [ "http://www.informatik.uni-bremen.de/tdki/research/papers/2017/HLPW-PODS17.pdf", "http://doi.acm.org/10.1145/3034786.3056108" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a896b93947d36632c13799ccd873006d4debfe5", "sources": [ "DBLP" ], "title": "Dichotomies in Ontology-Mediated Querying with the Guarded Fragment", "venue": "PODS", "year": 2017 }, "0a92b75415121f5f9fed192c97b48959451a9072": { "authors": [ { "ids": [ "3407310" ], "name": "J\u00f6rg Thalheim" }, { "ids": [ "38278812" ], "name": "Antonio Rodrigues" }, { "ids": [ "2282441" ], "name": "Istemi Ekin Akkus" }, { "ids": [ "3025359" ], "name": "Pramod Bhatotia" }, { "ids": [ "2042672" ], "name": "Ruichuan Chen" }, { "ids": [ "34824488" ], "name": "Bimal Viswanath" }, { "ids": [ "39458536" ], "name": "Lei Jiao" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" } ], "doi": "10.1145/3135974.3135977", "doiUrl": "https://doi.org/10.1145/3135974.3135977", "entities": [ "Autoscaling", "Causality", "Cloud computing", "Dependability", "Dimensionality reduction", "Distributed computing", "Extractor (mathematics)", "LaTeX", "Microservices", "Trust metric" ], "id": "0a92b75415121f5f9fed192c97b48959451a9072", "inCitations": [ "330de12472ed98642e1ed28944ff94d3d6eee8de" ], "journalName": "", "journalPages": "14-27", "journalVolume": "", "outCitations": [ "4e9218e0e4d71ab08670342a57636ca0adf3d092", "78f853271fe69da617d5a14a1e54cbae6a982a50", "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "375c3b4753410b642cd404b9625ecedde043bd1b", "0456a5c3b2001465d05e84ce6786ef200184de65", "8b0bc865a9b04c1995a6105005a46c68729a1d75", "37763b7744161c415181ec71fd7e6290ef582a4f", "21b9065d0596a033a15c01b227a8d933885d77a2", "330de12472ed98642e1ed28944ff94d3d6eee8de", "91713f09aef58aea7b5a319156bdb92400cc2e24", "5e0f649a6471e33df1ce968c909bfd7946637823", "df96114c34c1cb9aa8c1237ad710adfab3c269b6", "52ecb220e8a6e67fe0382f64a9f676de5fce32d3", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "9e94390e67fa2c44188634f6a4e8195b1eb309c8", "04c724bad0963d1e6e7a3743be08c08810402582", "09ed9cb47e09f56608bdbe6dffaa527c8ca0cd73", "5e53ed57e4b41d1258dd4b7e1feba7425b76e747", "5ec6157896c053600625da7a5da6f0451ed9c12e", "f3dec4cb3741bf3b88ab547e28fb8b37e371d72f", "4a42f1599d7e2d1a5f74651f4ba21386f9afdb31", "0e4af03d7379603014d1bdab34cd4801f37f8e4a", "9cba7563c2a1390a4c3283f64134ac732b465077", "3b034ee536cbf8c0152c8eae29b74a821d958976", "6c0b659d8af32ae70b944c0e3215a2e0740aafa4", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "3d5f3bedca7828899b81448e9c33717dd55c36c2", "6409e64aed68fcc9e3fdc35b87dd168eeb440d32", "f16841e022038e94a59f7e0a82002102b78d79a4", "a63715af54c3161843c1f24325f96f0b780b7055", "3af15292037d1fa634662f9acec89e89d0e21656", "35c15d7dfa1fddd2c0292146412ebbec704e8be9", "2498644b120efc708e253b3ae9fbb5abba062d50", "1c6805d6029f56b49041f7578ad4a412a0327953", "b2630bb1ecfdc53a63947391d6914356db26fe9b", "140d1164ec4f7e1099894517def9569650134bed", "f060942169f56e0aa8f3253047fac49b7c8eff2d", "58caa96376740ea56cabafb64dfdcc5d2c49a861", "765ee60756583f27322f1316da40696ae72812ac", "10e7b82d45b77cef4807bc21dfed345800826664", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "372907cf1b8affb89cc791512c09cc0f19e2e7df", "10da8673314188dd6ab1f16f73c05358771dd8cf", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "ec78f31c4d43c8de4ccd66a73778ff0913375a96", "15dc663b6761d53e90415427d5a24cce1e0e38da", "31972cf84ce91094718947037da3debb220e9fb9", "2fa833359523abef0a86aa15a05d0c63893a4033", "9e819f5cd459026bcbe97ad42db0647b1dc7c6a9" ], "paperAbstract": "Major cloud computing operators provide powerful monitoring tools to understand the current (and prior) state of the distributed systems deployed in their infrastructure. While such tools provide a detailed monitoring mechanism at scale, they also pose a significant challenge for the application developers/operators to transform the huge space of monitored metrics into useful insights. These insights are essential to build effective management tools for improving the efficiency, resiliency, and dependability of distributed systems.\n This paper reports on our experience with building and deploying Sieve---a platform to derive actionable insights from monitored metrics in distributed systems. Sieve builds on two core components: a metrics reduction framework, and a metrics dependency extractor. More specifically, Sieve first reduces the dimensionality of metrics by automatically filtering out unimportant metrics by observing their signal over time. Afterwards, Sieve infers metrics dependencies between distributed components of the system using a predictive-causality model by testing for Granger Causality.\n We implemented Sieve as a generic platform and deployed it for two microservices-based distributed systems: OpenStack and Share-Latex. Our experience shows that (1) Sieve can reduce the number of metrics by at least an order of magnitude (10 -- 100×), while preserving the statistical equivalence to the total number of monitored metrics; (2) Sieve can dramatically improve existing monitoring infrastructures by reducing the associated overheads over the entire system stack (CPU---80%, storage---90%, and network---50%); (3) Lastly, Sieve can be effective to support a wide-range of workflows in distributed systems---we showcase two such workflows: Orchestration of autoscaling, and Root Cause Analysis (RCA).", "pdfUrls": [ "https://iakkus.github.io/papers/2017-middleware-thalheim.pdf", "http://doi.acm.org/10.1145/3135974.3135977", "http://ix.cs.uoregon.edu/~jiao/papers/middleware17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0a92b75415121f5f9fed192c97b48959451a9072", "sources": [ "DBLP" ], "title": "Sieve: actionable insights from monitored metrics in distributed systems", "venue": "Middleware", "year": 2017 }, "0aa2910baf6d3bc6735a63ad8b648c3b3fdbad8d": { "authors": [ { "ids": [ "2880213" ], "name": "Edgar Solomonik" }, { "ids": [ "2919642" ], "name": "Maciej Besta" }, { "ids": [ "34920674" ], "name": "Flavio Vella" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1145/3126908.3126971", "doiUrl": "https://doi.org/10.1145/3126908.3126971", "entities": [ "Algorithm", "BCJR algorithm", "Betweenness centrality", "Centrality", "Correctness (computer science)", "Graph theory", "Matrix multiplication", "Multiplication algorithm", "Naruto Shippuden: Clash of Ninja Revolution 3", "Shortest path problem", "Sparse matrix" ], "id": "0aa2910baf6d3bc6735a63ad8b648c3b3fdbad8d", "inCitations": [], "journalName": "", "journalPages": "47:1-47:14", "journalVolume": "", "outCitations": [ "7c5fd7827a851c781ca1fba8916d6bb425179504", "43f01d002dab4377b944e0fd42ea6ec02587758e", "ef62e60b81317a24dbeb8ded6dc4a8ed89b776a8", "32462e94db16133265d3c9abde8418d13937eed9", "b888ce28cbbdd2f7173bc0c7a284d92d6b2007b0", "253402be4173c31f09b74007c3024518fa1c06fe", "a3b9dcc5d35f622f279f5fdeaac8d6d4b38a1b0c", "ae18b99bfa8940f7a17b7f77eb7177d953a5d9f5", "9702d4b5c5528f7b06b427160a9a19c5bbbfad9f", "44836e86d596e0c7f608ac2b4102c6d436311452", "04373d13bedbf3c4276a8b3b86311a1bff99db75", "45ec2f139038925ac9badac55182e908a65d0377", "1aa8ad634d1879af9b5ac34b44ecc3de8debd276", "5f8991828def57d2f0cda942566afff56740d150", "069ff37d81bc6c48d3a9c75af6ab0a07341cc3fc", "08e1da876945d170c527b0c6e836335cdf15fc59", "12bbaed16616256b48a12f36a9951ebe00c2481b", "70279bddb1fb7f5a032a7856640f8dbc7b08edf9", "9d2c8cc8f431dbfd852882df627479226347542f", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "24264bbaa34bdb3234a5b6947bc30329f63851fa", "b7bb051c2376345f5c5e80f165b15f2f2e68ecc9", "1ef8c8c815b7268d7f7d4fe76af78aaa8df3e6da", "080da07e23a2d3b2ecf05fea82959aea365ecdbb", "31699c35f42e4d9d108b4c595f9cea9655f5022e", "84ba025c6b28617241274699dccd9e5308fba766", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "8c51cd04be72721166c44f65279f9b9bebf423d1", "10ba8c0a0c507019291fcca384ae30628410325c", "3e58c1263047ccc126ca0c06dcb150ff9d172512", "bf980d3cc50ae14ce104207882ee1fbbadf7a5f1", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "ad5e1e6c5b48f7f2cdafe306fbcac55b0be755f0", "1186c4a90fb212bdd466159c3a9d45a83189088f", "5c72341e401827a3c0c06b169cfda57b3f316a89", "7a8308159dc543c5945d1b448b9b080fd3aa52e0", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "3fb3f861b51a3f3c2f240a3a311be83cb762ca1d", "8f2ae41f85720e86b5f420d0e55b8d913f808804", "b582d4a005c3288858eb3910e9233edb35323f49" ], "paperAbstract": "Betweenness centrality (BC) is a crucial graph problem that measures the significance of a vertex by the number of shortest paths leading through it. We propose Maximal Frontier Betweenness Centrality (MFBC): a succinct BC algorithm based on novel sparse matrix multiplication routines that performs a factor of p1/3 less communication on p processors than the best known alternatives, for graphs with n vertices and average degree k = n/p2/3. We formulate, implement, and prove the correctness of MFBC for weighted graphs by leveraging monoids instead of semirings, which enables a surprisingly succinct formulation. MFBC scales well for both extremely sparse and relatively dense graphs. It automatically searches a space of distributed data decompositions and sparse matrix multiplication algorithms for the most advantageous configuration. The MFBC implementation outperforms the well-known CombBLAS library by up to 8x and shows more robust performance. Our design methodology is readily extensible to other graph problems.", "pdfUrls": [ "https://arxiv.org/pdf/1609.07008v1.pdf", "http://arxiv.org/pdf/1609.07008v1.pdf", "http://doi.acm.org/10.1145/3126908.3126971", "http://solomon2.web.engr.illinois.edu/talks/sc-nov-2017.pdf", "https://arxiv.org/pdf/1609.07008v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0aa2910baf6d3bc6735a63ad8b648c3b3fdbad8d", "sources": [ "DBLP" ], "title": "Scaling betweenness centrality using communication-efficient sparse matrix multiplication", "venue": "SC", "year": 2017 }, "0aa588e5e7a6253e548e55bf35c77128ee55963d": { "authors": [ { "ids": [ "3393711" ], "name": "Thomas H\u00e4ner" }, { "ids": [ "3393324" ], "name": "Damian S. Steiger" } ], "doi": "10.1145/3126908.3126947", "doiUrl": "https://doi.org/10.1145/3126908.3126947", "entities": [ "Algorithm", "Automatic programming", "Benchmark (computing)", "Central processing unit", "Code generation (compiler)", "Compute kernel", "Computer", "Electronic circuit simulation", "Manycore processor", "Petabyte", "Program optimization", "Quantum", "Quantum circuit", "Quantum computing", "Quantum mechanics", "Requirement", "Scheduling (computing)", "Simulation", "Supercomputer", "Supremacy: Your Will Be Done" ], "id": "0aa588e5e7a6253e548e55bf35c77128ee55963d", "inCitations": [ "522671f4b9d0a17a2a9f151bddc5ef8495c7e7bf", "e553affcdb26b54101c33f553d9399dea8b76945", "d2f57b0f5926731ed17731faf07dfc68b10d87ea", "7fc32298214e8f5656f761b0c621ede631960fcb", "f5a4748362be169a09d12f5dbaaeced561bfe5cc", "a5a84bfee235464ea3ba7f960548f804dcd0015a" ], "journalName": "", "journalPages": "33:1-33:10", "journalVolume": "", "outCitations": [ "ad099359e2a032b425337df27047fabc873e6e3d", "d023ca8e0583d31b7adfafbd60b6422c19c07ab5", "156de73e263732c16f0c0a52f15f110ba1c23a8c", "5f8991828def57d2f0cda942566afff56740d150", "52c82934fc0911133382dc90ec586d24b8f10aac", "38c5b64b3864d8355ef7d1d8cbf66229cd323b19", "0274da2ee9a87ec8eebc86324cb18cb844741115", "07c970b778197e4938e6651ea7732612274f26bc", "931ecf5db28dfedf713d69e585162e08f060ccd4", "0ff40629863fae7e7824c5e55dd0382644a970ed", "09d2ef139e0e9a3912501bbc273ccc1dbe4f4322", "b4ac17c649bc8dc4653ce6c114d4eeae3d6ed66f", "02dc8507c3b52953851d93393c96863aec2970ff", "0c969a06d7096cc81fc547f46b4baa6029e17d1c", "33ef53f7bfa7d777df6989810fd3ab2b0e987aca", "9a128bab702e5378df5f8524a6e876aa6cce44d8", "9d30381c49afa033eacc04fb68975762eb7bafab" ], "paperAbstract": "Near-term quantum computers will soon reach sizes that are challenging to directly simulate, even when employing the most powerful supercomputers. Yet, the ability to simulate these early devices using classical computers is crucial for calibration, validation, and benchmarking. In order to make use of the full potential of systems featuring multi- and many-core processors, we use automatic code generation and optimization of compute kernels, which also enables performance portability. We apply a scheduling algorithm to quantum supremacy circuits in order to reduce the required communication and simulate a 45-qubit circuit on the Cori II super-computer using 8, 192 nodes and 0.5 petabytes of memory. To our knowledge, this constitutes the largest quantum circuit simulation to this date. Our highly-tuned kernels in combination with the reduced communication requirements allow an improvement in time-to-solution over state-of-the-art simulations by more than an order of magnitude at every scale.", "pdfUrls": [ "https://arxiv.org/pdf/1704.01127v1.pdf", "https://arxiv.org/pdf/1704.01127v2.pdf", "http://arxiv.org/abs/1704.01127", "http://doi.acm.org/10.1145/3126908.3126947" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0aa588e5e7a6253e548e55bf35c77128ee55963d", "sources": [ "DBLP" ], "title": "0.5 Petabyte Simulation of a 45-qubit Quantum Circuit", "venue": "SC", "year": 2017 }, "0aa8488958c2ef93bf147ffb17b5e9727482d41f": { "authors": [ { "ids": [ "2793898" ], "name": "Jo\u00ebl Alwen" }, { "ids": [ "35785880" ], "name": "Jeremiah Blocki" }, { "ids": [ "40610655" ], "name": "Benjamin Harsha" } ], "doi": "10.1145/3133956.3134031", "doiUrl": "https://doi.org/10.1145/3133956.3134031", "entities": [ "Adversary (cryptography)", "Algorithm", "Amortized analysis", "Argon2", "Block cipher mode of operation", "Central processing unit", "Cryptography", "Data dictionary", "Depth perception", "Depth-first search", "Dictionary attack", "Directed acyclic graph", "Directed graph", "Heuristic", "Key derivation function", "Online and offline", "Password", "Provable security", "Side-channel attack", "Throughput", "Time complexity" ], "id": "0aa8488958c2ef93bf147ffb17b5e9727482d41f", "inCitations": [ "98be4c64a0a8e64d743633f3b75c1a31223af691", "671e039c3ba333393c02877e1ff06e6ad778ea95", "0f9c24045c9622e4647d15a0f3169f5761cdbf84", "5b2145357d490c78d7ca090626c2ab481ca88d2c", "56bb7cab4c72b9d5cc00d53a9514cf8e85e251b9" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "443", "journalVolume": "2017", "outCitations": [ "d8ad6ba58158088e7b5bafac8c5fa911c22c3076", "6aa583bbe2fa0b0e776a403544134ac52c1964db", "236ca8e8cf2797415af4260869a8681502119730", "2cb5bf5e932187e6e856313ff0b306a3fe135091", "efe94851c83d86591d272bb55176a7ba94730673", "47ac4bc3e1dca647e3fd69305f63686287c207c6", "08efa1b83fe9a6dfe8e4436f595ec98ddd1dc02d", "18ac2601032a6a0c4b367f95061a6c6fab3218ec", "6d5c67cbc0bf3aefb7cac6d17c26b79e237a7ade", "4f5253c5acb2e4aff3c59e401458e098833842b5", "6b0fbe6e70a771671b8007b308ef5eee592fd216", "2aede4259e63290663cd5971241153fca7e46c8f", "a250e97827c7e2abd615cd9a6bf933f60f234978", "6d21cb9e2ee76fa72b8ce22e9e7d24c30fcc6049", "1055858b1a9e9cd7885eca37f6259d10270865ca", "7b412025ccec85b51f604a9bca3fed5f3d00771f", "8e02819e3182df4b529ee0db15f1e09410dbea4e", "0c48f0365c816d54cbebc47edbe71132d09b6436", "f00a229642df5a29e0012c03a640d0878e1b0648", "42a9adeaf02521e350a80c29f89f9fda5429f81f", "f49f8e135695937bfe03e467e215177eec79d7dc", "21ccaabe3e50bdd7a8b4a2c08a654dbad95ee4fa", "5ce3c67588462e053a621ed49c79fc58037de7ab", "01ddb2881ee1e912ca52c5d59248e451d9827519", "a7315f8f263567e5234dcde02899a8111876d679", "fe7c1437c4da50f9fd9cfadc8a1ac6c59ea3ffe5", "0001f92cf8e343bcb10f24498cee635159e5c08f", "3ab11c3360c5a14b2854378d6f79140d815681d4", "3c8d9c1f059efe403d39b40bdf46b9d881db7ff1", "bfe9062fe7324dbe1ca914b0bad6c2aa6d83c2c1", "d02ecbe31041d38be4febad484994a2c04bd9014", "5b2145357d490c78d7ca090626c2ab481ca88d2c" ], "paperAbstract": "A memory-hard function (MHF) ƒn with parameter n can be computed in sequential time and space n. Simultaneously, a high amortized parallel area-time complexity (aAT) is incurred per evaluation. In practice, MHFs are used to limit the rate at which an adversary (using a custom computational device) can evaluate a security sensitive function that still occasionally needs to be evaluated by honest users (using an off-the-shelf general purpose device). The most prevalent examples of such sensitive functions are Key Derivation Functions (KDFs) and password hashing algorithms where rate limits help mitigate off-line dictionary attacks. As the honest users' inputs to these functions are often (low-entropy) passwords special attention is given to a class of side-channel resistant MHFs called iMHFs.\n Essentially all iMHFs can be viewed as some mode of operation (making n calls to some round function) given by a directed acyclic graph (DAG) with very low indegree. Recently, a combinatorial property of a DAG has been identified (called \"depth-robustness\") which results in good provable security for an iMHF based on that DAG. Depth-robust DAGs have also proven useful in other cryptographic applications. Unfortunately, up till now, all known very depth-robust DAGs are impractically complicated and little is known about their exact (i.e. non-asymptotic) depth-robustness both in theory and in practice.\n In this work we build and analyze (both formally and empirically) several exceedingly simple and efficient to navigate practical DAGs for use in iMHFs and other applications. For each DAG we: \n Along the way we also improve the best known empirical attacks on the aAT of Argon2i by implementing and testing several heuristic versions of a (hitherto purely theoretical) depth-reduction attack. Finally, we demonstrate practicality of our constructions by modifying the Argon2i code base to use one of the new high aAT DAGs. Experimental benchmarks on a standard off-the-shelf CPU show that the new modifications do not adversely affect the impressive throughput of Argon2i (despite seemingly enjoying significantly higher aAT).", "pdfUrls": [ "https://eprint.iacr.org/2017/443.pdf", "https://www.cs.purdue.edu/homes/bharsha/papers/practicalgraphs.pdf", "http://eprint.iacr.org/2017/443", "http://doi.acm.org/10.1145/3133956.3134031" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0aa8488958c2ef93bf147ffb17b5e9727482d41f", "sources": [ "DBLP" ], "title": "Practical Graphs for Optimal Side-Channel Resistant Memory-Hard Functions", "venue": "CCS", "year": 2017 }, "0aa9b26b407a36ed62d19c8c1c1c6a26d75991af": { "authors": [ { "ids": [ "3249571" ], "name": "Tiffany Bao" }, { "ids": [ "8199136" ], "name": "Ruoyu Wang" }, { "ids": [ "3260361" ], "name": "Yan Shoshitaishvili" }, { "ids": [ "2764539" ], "name": "David Brumley" } ], "doi": "10.1109/SP.2017.67", "doiUrl": "https://doi.org/10.1109/SP.2017.67", "entities": [ "Exploit (computer security)", "Shellcode", "Test case", "The 100", "Vulnerability (computing)" ], "id": "0aa9b26b407a36ed62d19c8c1c1c6a26d75991af", "inCitations": [ "3ae14011bcf698f7a61777b22d54569fa766ac46", "2cf43b8bc82f063e257bf21c92e5b038eacd34d3" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "824-839", "journalVolume": "", "outCitations": [ "0c5de0e5cb46e862b933c6bd543cc15695506034", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "351c028723083d8c85af93f9dff9abe6b390aafd", "26127796667203f4e015cc1f47072c24f3952356", "afa1b511c12ef6b20cc23f095561c9423fe645bf", "165a738d4e36843bc077e1fee1b3248cddf70717", "7d5e165a55d62750e9ad69bb317c764a2e4e12fc", "27c11b3d113b3fe063d5eaf8e4fb22188a9c99b9", "1c6628281d4617c743868a54e2fb41345d2e2ad1", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "0653e2ed9f683868cb4539eb8718551242834f6b", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "560457dbf3387bd934a4572f5a87211abadcd5fe", "4cd63e0701177f04e377fa9f0857c5b0fa10b07e", "1dc5aebebf425c1c225daef502e2d966771968fe", "20017e874c15f17e305d71168985c621467162e7", "463c55b8b745f1491176922ae7d1effd2d6b2b56", "03c85aa6d213f56cbd3602d9eaf2cc72de9f9a7e", "11f7876aa83d79c90c7ddb49b01186c80f6777b5", "660ad810c69affa189f567e76ff83af682228703", "0f7c7a16274c6f0bc30913182af488a88f105f63", "005d92543a3ebf303d2b8e16c7c6a32d52c6618f" ], "paperAbstract": "Developing a remote exploit is not easy. It requires a comprehensive understanding of a vulnerability and delicate techniques to bypass defense mechanisms. As a result, attackers may prefer to reuse an existing exploit and make necessary changes over developing a new exploit from scratch. One such adaptation is the replacement of the original shellcode (i.e., the attacker-injected code that is executed as the final step of the exploit) in the original exploit with a replacement shellcode, resulting in a modified exploit that carries out the actions desired by the attacker as opposed to the original exploit author. We call this a shellcode transplant. Current automated shellcode placement methods are insufficient because they over-constrain the replacement shellcode, and so cannot be used to achieve shellcode transplant. For example, these systems consider the shellcode as an integrated memory chunk and require that the execution path of the modified exploit must be same as the original one. To resolve these issues, we present ShellSwap, a system that uses symbolic tracing, with a combination of shellcode layout remediation and path kneading to achieve shellcode transplant. We evaluated the ShellSwap system on a combination of 20 exploits and 5 pieces of shellcode that are independently developed and different from the original exploit. Among the 100 test cases, our system successfully generated 88% of the exploits.", "pdfUrls": [ "https://users.ece.cmu.edu/~youzhib/paper/bao2017.pdf", "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/shellcode.pdf", "https://doi.org/10.1109/SP.2017.67" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0aa9b26b407a36ed62d19c8c1c1c6a26d75991af", "sources": [ "DBLP" ], "title": "Your Exploit is Mine: Automatic Shellcode Transplant for Remote Exploits", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0acc3b8712c183f3b4ae3cb194a5803ea4a5a187": { "authors": [ { "ids": [ "3393481" ], "name": "Nofar Carmeli" }, { "ids": [ "2831260" ], "name": "Batya Kenig" }, { "ids": [ "1679226" ], "name": "Benny Kimelfeld" } ], "doi": "10.1145/3034786.3056109", "doiUrl": "https://doi.org/10.1145/3034786.3056109", "entities": [ "Algorithm", "Anytime algorithm", "Experiment", "Polynomial", "Time complexity", "Tree decomposition" ], "id": "0acc3b8712c183f3b4ae3cb194a5803ea4a5a187", "inCitations": [ "e5724c98020636809508508ec35f8bb14df9dc5e", "059940fec3b7348e5728d22113b1c6853847aa69" ], "journalName": "", "journalPages": "273-287", "journalVolume": "", "outCitations": [ "1ffc977d82798cfab971e4abdb46ae7b707c57c0", "05a9a562a215e855fa4f3f324eb219313f5f5dec", "8157707e331a21b7aa1c00cf7e64c0e36a0367cd", "0da1b4f93ac3f7ebc5e70fae547e2a85581c6745", "846726e0c5b3e5d74fe5292bbf4866e4b93cd4e2", "f57fe866f8b059bd9fbb372d706e484368cc2883", "1b1c21d0749f34d0acf8c3aca0b3bc25ba6a0b4f", "ef4ff3c0f2d686832458cd629709927441476019", "b05011bcb117b104ca0efe5b13bf011d73c1977a", "257a11168aa90ae86b57afa336c453f9abd2a49e", "6d5d6156d37705e63fac2e10948d25f6b8f10109", "68cec9a204178c15072c84d8ca090179e5c17450", "980a8e1b59a782dde9fb4f1e84be61a74d80f332", "d1605abcbae458feddd57b8037f4dfbf91e20171", "3c1f77be28d487cedb197100556b55613cf532f8", "ec80574bde11fc789b83247e70e40c765b48386b", "bf6af5a2d45964c7daf45a22976d6f2ffe205c16", "2e4a2236ecfe43630c3988a3fb953be09ed54b7a", "072f30530dbec74eff91243b08587cd2df14ab77", "3717b9b45aa7dd3b9379147e7db8cfd87390aff5", "e4af685b3b228e09f77789f054eaf90351a5aed7", "b17f651273c629e615523bb0003d9d42b80175ca", "5d0d098dd284b74504d27af7c5e0e5c50f1b7fd4", "07a0a39ff4447ce9f0d2b06978d7b00a6d9fab15", "eab7b3ccd519323bb29b0ac4011001936deb06c6", "13b77bed4038262f2dedc3bfba8a1905e1e8dd2b", "1fafe0013162e7e7154950ac710d18d8c2971295", "3da04674c4fedc0bc72f3eac2d4fbdd7cea9ecf4", "85f260779afce6a7586eddb7ca9648f8a5bdf106", "a894334d4af1318ef2f5c6c06cf88f614ff3550b", "01be0018674ecb8a7cc3a8a43745e0fd8a518dfc" ], "paperAbstract": "We present an algorithm that enumerates all the minimal triangulations of a graph in incremental polynomial time. Consequently, we get an algorithm for enumerating all the proper tree decompositions, in incremental polynomial time, where ``proper'' means that the tree decomposition cannot be improved by removing or splitting a bag. The algorithm can incorporate any method for (ordinary, single result) triangulation or tree decomposition, and can serve as an anytime algorithm to improve such a method. We describe an extensive experimental study of an implementation on real data from different fields. Our experiments show that the algorithm improves upon central quality measures over the underlying tree decompositions, and is able to produce a large number of high-quality decompositions.", "pdfUrls": [ "http://batyak.cswp.cs.technion.ac.il/wp-content/uploads/sites/81/2017/07/tdenum.pdf", "http://doi.acm.org/10.1145/3034786.3056109" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0acc3b8712c183f3b4ae3cb194a5803ea4a5a187", "sources": [ "DBLP" ], "title": "Efficiently Enumerating Minimal Triangulations", "venue": "PODS", "year": 2017 }, "0ad4c28fcb7fb8497d2f23f5c7b53bc4245de23f": { "authors": [ { "ids": [ "1717408" ], "name": "Pin-Yu Chen" }, { "ids": [ "3008832" ], "name": "Lingfei Wu" } ], "doi": "10.1109/ICDM.2017.14", "doiUrl": "https://doi.org/10.1109/ICDM.2017.14", "entities": [ "Algorithm", "Baseline (configuration management)", "Cluster analysis", "Computational complexity theory", "Experiment", "Gen 5 Fibre Channel", "Google Cloud Messaging", "Laplacian matrix", "Loss function", "Network model", "Optimization problem", "Server-Gated Cryptography", "Supervised learning", "Unified Framework" ], "id": "0ad4c28fcb7fb8497d2f23f5c7b53bc4245de23f", "inCitations": [ "a489121c3526ed17ed5a5bfe5833dc7a50150187" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "51-60", "journalVolume": "", "outCitations": [ "7612f0793f77886d4753cdcbb86d247001b53991", "0daa4eb86f82ac4b9468b576a32b10093fcf5f05", "482ba5aaa40cd86f6d38043ee8fb5f083a149d56", "5a92ed2081d75861dbda2237658bd373cd78955d", "3d05f7f9c49c0e772fa8aa90ae5c036f6fa44ee4", "8ae8324f6caa0d262e02ad6a8eb0da0f5879f32c", "694a51fe29fc82d6d4e36d1df04c08619003574c", "fb25566db4ff4d676dc4aec99a0122966e3e90b0", "064c243409355338dc29a92a10a6be096efe81ac", "0f78dd86620bb0a3baeda293c286a20375b703a5", "1c827550c718cef74a679c2b5e8b3296e1f1b3d9", "43ce3d38404fe8ba29197ccec5ffa647ea2ad9eb", "8c6d8523df8f9236e035abc3c048c9d7d7d517d4", "134dcfc4f3a69dff3bf89d4db4c688eb2394dc65", "84cc4348e183d24a719228bb1ac0913b8949d396", "5a0ffcb58a756f528c628a1ed70bc509d0d251d7", "0b0b7ca21806b1b3ae24794a6c12d1accf91f301", "08bb7f39a03898e0e9da7354c267d6442b33e538", "4ce777b863f7656a928d2d7831247a699213f264", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "10b2d30886dda3d271fb546e6e895e0ab8e45cb4", "05064b678d3bb00397f897125da0f6168c8a5290", "08d3fe8b5e5d34df1586fc7fe0f380896e421d12", "1d70f0d7bd782c65273bc689b6ada8723e52d7a3", "44c21bd0ece5abd586264f1caa114fc2e6b47d63", "030473349ba2530932de029140fcb6bc07eb9a49", "14f2bc1234ed1418790262f56488dc4447c78bc8", "141e35263ab810983c90d47ad62eb4fab5e51717", "71d7f4cb4823a6440492aec0320b8bcd20ab5a7a", "825315415eba86846605512c31d8adaf173e6f8d", "2353f12ef06b5f11a43b8369d7dcc176164e01c1", "996263c3ddbb50f0198354827445abd214f83030", "b6ef0685afefc9fc0f70d6ee8191e61252177a34", "1356b1daebf1114a2a0f3e6dfee606bdc06e4fc2", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "6abb292510ec6dfdb7a845d316c2d61539670de8", "0250b5c0f7a414dec8c7a0aa7be20c9637eeb6ec", "b099b967d5ae9d175a5cff0fdaf17e17cac2fef0", "6780823d309a4a96fd4fcab53544bb8724bf461b", "0829eec26e669d457d48e339278eff51dbb74f41", "13d9f43bc8a8a08b79ffc3ae0fce2ca45774427d", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "0eec83ae4f33be0e0f1d9434047687d9fdd439d6", "212f6ab53a9ed5695855c2d84360d420350d8eec", "2a91c8ff11a828209f10714cfc46fd929a51e9dc", "3553dd5a903e9422b8c13520c44bf6bdefa5846d", "b84db2e3d3e38c02b635dd45c330e5cb57537f4b", "254fcf4b2faeb6b9c06d3770c23f33a5ea114fea", "3c9ab313dbe31863075f212d794db364a06eb988", "f5c2dd63afdca9e0562cb5525381dd338a48df46", "1339e83c31a0ff6175bbd5ef04e0bfdc4ca52c83", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "285654e9e4f6e3880153433b62ec0b6d761fbd30", "92163d1bee0f7a2e10f8abefcdf2cf6b520cf836", "94aa54a13632793b68a6a05420f14408c2661cc0", "245ca2580a4a08cf1cff85da02a1213f4d7891e1", "529a53e4e2d2457a8a8a5507cddef17953d3949e", "0c400ad35f5e7563275015cd3fdf78db95b563c8", "ff10df016ce4bf17cb5c55ea2237353a195534b9", "064566f30ff4c52e177bbe97001ba48d2b3b3d90", "d6c652c8e762daeaeeb0fa00abf960a6ba078aae", "1411c3d92aaed82484bff27a25dc7cb68492d0a4", "3e89303ae511d43c2397fd6f7bb070aa87b7f504" ], "paperAbstract": "The methodology of community detection can be divided into two principles: imposing a network model on a given graph, or optimizing a designed objective function. The former provides guarantees on theoretical detectability but falls short when the graph is inconsistent with the underlying model. The latter is model-free but fails to provide quality assurance for the detected communities. In this paper, we propose a novel unified framework to combine the advantages of these two principles. The presented method, SGC-GEN, not only considers the detection error caused by the corresponding model mismatch to a given graph, but also yields a theoretical guarantee on community detectability by analyzing Spectral Graph Clustering (SGC) under GENerative community models (GCMs). SGC-GEN incorporates the predictability on correct community detection with a measure of community fitness to GCMs. It resembles the formulation of supervised learning problems by enabling various community detection loss functions and model mismatch metrics. We further establish a theoretical condition for correct community detection using the normalized graph Laplacian matrix under a GCM, which provides a novel data-driven loss function for SGC-GEN. In addition, we present an effective algorithm to implement SGC-GEN, and show that the computational complexity of SGC-GEN is comparable to the baseline methods. Our experiments on 18 real-world datasets demonstrate that SGC-GEN possesses superior and robust performance compared to 6 baseline methods under 7 representative clustering metrics.", "pdfUrls": [ "http://arxiv.org/abs/1709.04594", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.14", "https://arxiv.org/pdf/1709.04594v2.pdf", "https://arxiv.org/pdf/1709.04594v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ad4c28fcb7fb8497d2f23f5c7b53bc4245de23f", "sources": [ "DBLP" ], "title": "Revisiting Spectral Graph Clustering with Generative Community Models", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "0afb5d62cdde8056f6ba9ce6eed91c4cbc36c84e": { "authors": [ { "ids": [ "2541035" ], "name": "Gunjae Koo" }, { "ids": [ "3175184" ], "name": "Kiran Kumar Matam" }, { "ids": [ "20637338" ], "name": "Te I" }, { "ids": [ "27046843" ], "name": "H. V. Krishna Giri Narra" }, { "ids": [ "1723081" ], "name": "Jing Li" }, { "ids": [ "2653363" ], "name": "Hung-Wei Tseng" }, { "ids": [ "1760342" ], "name": "Steven Swanson" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" } ], "doi": "10.1145/3123939.3124553", "doiUrl": "https://doi.org/10.1145/3123939.3124553", "entities": [ "Application programming interface", "Computation", "Computer programming", "Data center", "Data-intensive computing", "Design space exploration", "Embedded system", "Flash file system", "Flash memory controller", "Garbage collection (computer science)", "Non-volatile memory", "Overhead (computing)", "Solid-state drive", "Solid-state electronics", "Volatile memory", "Wear leveling" ], "id": "0afb5d62cdde8056f6ba9ce6eed91c4cbc36c84e", "inCitations": [ "b20230c61d5db7863ba6a12fc18da85be6a35a60", "7b8c6b2e7652620c037ff4732bc6c7b4ae88da6c" ], "journalName": "", "journalPages": "219-231", "journalVolume": "", "outCitations": [ "0e5c646909bb762da0cd325e084655c12445578f", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "3aff5fb3d1e23dfc0c45989f71b4aa99b3a5784b", "6de2f02cfcc10d514431953a623898bfa61c1580", "2e5132493276714e4cce3b2f64d60da4e47210cb", "a17b22ade9f5fbf9729e923128f5bfb969cb93d1", "a9313ee6d77860955fdf80032bf3035bf18054b8", "0cbadd3a63ddc748cc30a16a706eda77a4deab8c", "ae9e3a647a53362b820f42d4a7d57dba992f15a0", "5906fc1d9cc56d31b9373cdb868cb90aa613d90d", "f6c62e96e2cec8ea1f73047d4692aafd73dd9dc5", "b582a20eab086d443a35e466113081e16dc56c5b", "2e4090f083744f803aedf4fac0b1f78075e335ef", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "998c23f747271c297f8c6a8acd645ff5a9f8d880", "2ad5141d5c729334809fb5c92f4d94ee7dd204d3", "0b5de3cf4ac3069dc9a7ee5b4d745e908c218536", "4678cdcf7e57c1563379ac7cc344254f01ace572", "7347e3a04e3e05ab771fc82a2d7169b8df723b2e", "9d2c683bf5227eda7110d8a746ccc971dbad4c0d", "0903d6b3b5a26fea2cb7b4956f66365d71c78549", "35c6f2f41aa3ea043c3ce58e074ac4c2ac76bddd", "dbe73781be3fcba36bb85b491789a53003e3292f", "3a10fad57f186e8da3b912ac96e8cfa853734417", "1eb9dc6955b0de81a078c9d6fa937c33f1f04545", "138a9c2a9579435cd8cb0f24e7ec135821074557", "107579e715ac89da27593217238b9e0540dbb564", "11ceeea43c970abede5aa95b4bfce621138a0bed", "b6a75410d9aed0032486664f4afa7a8eaa4c4c70", "f7f82dfa5554db94d10b5cd2365c82fb3f24944d" ], "paperAbstract": "Modern data center solid state drives (SSDs) integrate multiple general-purpose embedded cores to manage flash translation layer, garbage collection, wear-leveling, and etc., to improve the performance and the reliability of SSDs. As the performance of these cores steadily improves there are opportunities to repurpose these cores to perform application driven computations on stored data, with the aim of reducing the communication between the host processor and the SSD. Reducing host-SSD bandwidth demand cuts down the I/O time which is a bottleneck for many applications operating on large data sets. However, the embedded core performance is still significantly lower than the host processor, as generally wimpy embedded cores are used within SSD for cost effective reasons. So there is a trade-off between the computation overhead associated with near SSD processing and the reduction in communication overhead to the host system.\n In this work, we design a set of application programming interfaces (APIs) that can be used by the host application to offload a data intensive task to the SSD processor. We describe how these APIs can be implemented by simple modifications to the existing Non-Volatile Memory Express (NVMe) command interface between the host and the SSD processor. We then quantify the computation versus communication tradeoffs for near storage computing using applications from two important domains, namely data analytics and data integration. Using a fully functional SSD evaluation platform we perform design space exploration of our proposed approach by varying the bandwidth and computation capabilities of the SSD processor. We evaluate static and dynamic approaches for dividing the work between the host and SSD processor, and show that our design may improve the performance by up to 20% when compared to processing at the host processor only, and 6X when compared to processing at the SSD processor only.", "pdfUrls": [ "http://www-scf.usc.edu/~gunjaeko/pubs/Gunjae_MICRO17.pdf", "http://doi.acm.org/10.1145/3123939.3124553", "https://people.engr.ncsu.edu/htseng3/papers/MICRO2017_Summarizer.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0afb5d62cdde8056f6ba9ce6eed91c4cbc36c84e", "sources": [ "DBLP" ], "title": "Summarizer: trading communication with computing near storage", "venue": "MICRO", "year": 2017 }, "0b0061a140678e50fed0fca0204f398cdd4ffe82": { "authors": [ { "ids": [ "2322572" ], "name": "Andrey Rodchenko" }, { "ids": [ "1963560" ], "name": "Christos Kotselidis" }, { "ids": [ "2476643" ], "name": "Andy Nisbet" }, { "ids": [ "40079054" ], "name": "Antoniu Pop" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1109/ISPASS.2017.7975286", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975286", "entities": [ "Address space", "CPU cache", "Java", "Maxine", "Microarchitecture", "Morphing", "Pointer (computer programming)", "Program optimization", "Programming language", "Server (computing)", "Simulation", "Tagged pointer", "Timing closure", "X86", "X86-64", "z/VM" ], "id": "0b0061a140678e50fed0fca0204f398cdd4ffe82", "inCitations": [ "d440bdc8a46e57c7c0922ef6dfef68dd9fb6ae65", "653e56eafd7001abd35f96cb317d790a835248f2", "28004163c4d4646b66079d6810d3159dd1106999" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "141-152", "journalVolume": "", "outCitations": [ "03e53dddc865bf688fe313a94ad186a4d96bffe0", "71b1b77a43d92d80041723af9d4f6cad12e4580f", "3a54e9d683c172acf9d2a503754f1c68b7daf611", "337875a573821125831380402e4dab844fcd602e", "27047b4af85c2eecf581fd22185c7c7afcb0bf01", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "dc6e7f9d811c5d04cb781685550978a3e0565e08", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "4ff8d44338a2be192db0303b04c65214fdee0fb9", "42691b019d068f18ebdb33c2be65c17956fae071", "ef8a012cbc169282564421aaea8cfd0c4ec09f28", "b86698fe07205757729aa5ba80506596a312fdde", "d167b5c8b21c642662000417f313798d375ff38e", "7e007883306b2d0b8da57ed608f5441dcc30a3e2", "2266e34950bfcbc2af57618748ab4d7bcead8ad9", "0653e2ed9f683868cb4539eb8718551242834f6b", "01ac84ffb4b7f575ea0705181795f4fd2368f519", "0e2ee93bb53d93684d5276a07a582c574770ab53", "69b5a2d53840677b9855cb22bb1b98da041e6733", "4ba1b2c06f1f7e250c3fa32a8de6ba73502ded93", "22dd6d79baf65f26364e42b3d5389b8ca3b139bc", "589556b4bdfe6c30b89de322c6c5382484bd6ef6", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "f37b93278eebe4c3871779a57b4d9a9d351483c9", "2bdb17f6f75e5fb8dc49de7cfdbdd891cc3c83a9", "23b564bfb4e3f84e9676247f90781d04cd8b6c71", "338e0e131b48d214e9995e85237a539b44d03367", "216f2ee11f9be97d297979a3010aac4529248359", "68c8af8f831d8596222c226ea19f8a2c7bbf3699", "0c41bb4d1b3c049e42c12a5054eac206f35da859", "5c40cdb6386021d68288e7158e1330ad3b8c223e", "6db21e22b4b9db4ea1e193777aa957ccabd9bbbb", "8a81748e4e9476575a2a7c9e353d4f8f133cd786", "04bf1edc2605a653d20471bf58da31beda5d81e6", "00a9ba0063d34ec56792849a67ef57b4601becbb", "49942300422a671155c9f58d744fc401ce13ef2e", "17bff708b1b6791db2dec8621a417c17aa79448d", "34e41ebc64b786e20efc490363aaeb5fa508866b", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "35a331c7be9808df2e0086c2bf17d9b65b518aae", "87bf9c81ed472f79e067d59db244f7c8870735d6", "3bf23f74bf33ed52f7c28587fab315610b27221a", "2daec4dd31465529ddaa02451e18d1bb380c811c", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "44da7e36d0116834407e7255fc21889fe963eb94" ], "paperAbstract": "Managed applications, written in programming languages such as Java, C# and others, represent a significant share of workloads in the mobile, desktop, and server domains. Microarchitectural timing simulation of such workloads is useful for characterization and performance analysis, of both hardware and software, as well as for research and development of novel hardware extensions. This paper introduces MaxSim, a simulation platform based on the Maxine VM, the ZSim simulator, and the McPAT modeling framework. MaxSim is able to simulate fast and accurately managed workloads running on top of Maxine VM and its capabilities are showcased with novel simulation techniques for: 1) low-intrusive microarchitectural profiling via pointer tagging on the x86-64 platforms, 2) modeling of hardware extensions related, but not limited to, tagged pointers, and 3) modeling of complex software changes via address-space morphing. Low-intrusive microarchitectural profiling is achieved by utilizing tagged pointers to collect type- and allocation-site-related hardware events. Furthermore, MaxSim allows, through a novel technique called address space morphing, the easy modeling of complex object layout transformations. Finally, through the codesigned capabilities of MaxSim, novel hardware extensions can be implemented and evaluated. We showcase MaxSim's capabilities by simulating the whole set of the DaCapo-9.12-bach benchmarks in less than a day while performing an up-to-date microarchitectural power and performance characterization. Furthermore, we demonstrate a hardware/software co-designed optimization that performs dynamic load elimination for array length retrieval achieving up to 14% L1 data cache loads reduction and up to 4% dynamic energy reduction. MaxSim is available at https://github.com/arodchen/MaxSim released as free software.", "pdfUrls": [ "http://apt.cs.manchester.ac.uk/people/arodchenko/MaxSim_A_Simulation_Platform_for_Managed_Applications_Slides.pdf", "https://doi.org/10.1109/ISPASS.2017.7975286" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b0061a140678e50fed0fca0204f398cdd4ffe82", "sources": [ "DBLP" ], "title": "MaxSim: A simulation platform for managed applications", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "0b1866b187148fed570809773111549684a0e68b": { "authors": [ { "ids": [ "2444195" ], "name": "David Hallac" }, { "ids": [ "3159449" ], "name": "Youngsuk Park" }, { "ids": [ "1872152" ], "name": "Stephen P. Boyd" }, { "ids": [ "1702139" ], "name": "Jure Leskovec" } ], "doi": "10.1145/3097983.3098037", "doiUrl": "https://doi.org/10.1145/3097983.3098037", "entities": [ "Algorithm", "Analysis of algorithms", "Augmented Lagrangian method", "Entity", "Graphical user interface", "Interdependence", "Lasso", "Message passing", "Scalability", "Sparse matrix", "Streaming algorithm", "Synthetic data", "Time series", "Time-varying network" ], "id": "0b1866b187148fed570809773111549684a0e68b", "inCitations": [ "2dc3ec722948c08987127647ae34a502cabaa6db", "aa2b708d7526ce6fade9ebf78bd80522e0b8d007", "c5eff85849a58a1ea0886c32cd5f26e2ac3ac15e", "015c5dce975f0e683e5c1ca60cb8f12dcafe61a4" ], "journalName": "", "journalPages": "205-213", "journalVolume": "", "outCitations": [ "4b641ecbea6703404fa5a3254d3530bec52c857c", "0af803c078f337b19a6b32b28b168654271dc8bf", "c3708f2ee0cf0e701c8733744cf13614520e14ea", "25c760c11c7803b2aefd6b6ae36f15908f76b544", "01175f0720d955e030ae74690706a1f14ba23278", "25318220bc77e91a37f544f8b1a7b41051ff03ff", "4f8751c3ec68fb524eed972e090f573311a2dd3e", "8e58a5fe84863c5c83190215db1c570d36fcc4d4", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "0a8cae43856f9c5a08b79b2153dcbbd46698c9c2", "0dcc7a2c1e3403ab71aae7340aeec1c3450b2ee7", "44c263fd8fb026158229b5a1d2f51532edc7419a", "1810f41515c346ab932efc5c6a73f9e52bff9235", "2c863569e7e179ae660cd7b4ab2541a24b8f619a", "1b5b0862c402c0e8a186552d1487804991b33c84", "32983eb361d7cc5d5c5e4f583cb16951b5e84077", "7cc528bda7296d89f032aa634e0aef7670928a31", "c92420f001e023c693db762758f9590571256e35", "2561bdc461e2188a09366ac70ef84452f9a133b6", "20ee888ca9c3bae784f266405b4d6395c6f32955", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "603233a11e93193a3d4fc8b9cbfb2e2c1ebf8f9a" ], "paperAbstract": "Many important problems can be modeled as a system of interconnected entities, where each entity is recording time-dependent observations or measurements. In order to spot trends, detect anomalies, and interpret the temporal dynamics of such data, it is essential to understand the relationships between the different entities and how these relationships evolve over time. In this paper, we introduce the time-varying graphical lasso (TVGL), a method of inferring time-varying networks from raw time series data. We cast the problem in terms of estimating a sparse time-varying inverse covariance matrix, which reveals a dynamic network of interdependencies between the entities. Since dynamic network inference is a computationally expensive task, we derive a scalable message-passing algorithm based on the Alternating Direction Method of Multipliers (ADMM) to solve this problem in an efficient way. We also discuss several extensions, including a streaming algorithm to update the model and incorporate new observations in real time. Finally, we evaluate our TVGL algorithm on both real and synthetic datasets, obtaining interpretable results and outperforming state-of-the-art baselines in terms of both accuracy and scalability.", "pdfUrls": [ "https://arxiv.org/pdf/1703.01958v2.pdf", "http://cs.stanford.edu/people/jure/pubs/tvgl-kdd17.pdf", "http://web.stanford.edu/~hallac/TVGL.pdf", "http://arxiv.org/abs/1703.01958", "https://web.stanford.edu/~hallac/TVGL.pdf", "http://doi.acm.org/10.1145/3097983.3098037", "https://arxiv.org/pdf/1703.01958v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b1866b187148fed570809773111549684a0e68b", "sources": [ "DBLP" ], "title": "Network Inference via the Time-Varying Graphical Lasso", "venue": "KDD", "year": 2017 }, "0b23e4be50e710dd9d339fc64f025ba89cc002d3": { "authors": [ { "ids": [ "33601888" ], "name": "Marcus H\u00e4hnel" }, { "ids": [ "5998467" ], "name": "Weidong Cui" }, { "ids": [ "3798388" ], "name": "Marcus Peinado" } ], "doi": "", "doiUrl": "", "entities": [ "Cache (computing)", "Haven (graph theory)", "Information leakage", "Interrupt", "Operating system", "Page fault", "Side-channel attack", "Task Control Block", "Timer", "Trusted Computing", "Valkyria Chronicles III", "libjpeg" ], "id": "0b23e4be50e710dd9d339fc64f025ba89cc002d3", "inCitations": [ "680fa994443080d43e7452f1137b339d5f74cc03", "e41440cff90683629228b308a94e48c7af11ca36", "0a1c5482215f99df9d4f12bcfd88324a356d7b10", "60a72da351f9e706b1417c5ce531aa661f314456", "33ae35cc24ef4303979b479671c2065256e1b3a7", "b643e5ce4ad3d2f674fc3f13e89bdaabf75fa066", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "22fd7ae4bcdce1e12909315b48440b719f984f19", "2a7056e53f29bc73471048a77b0c55ea4e92b897", "e232abbe03c7685de30948e9ea1e2b9c1670464a", "c34ef3a056db7b11db6ea5a72b442e0743643aa0", "0c0994b08b790dc467b892d538321f9dbd10a3c2", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "b053033ad436cd404bb0eb2e75b3aac83b70d62c", "38a54f9bbbfc46599770a28999365144a273783f", "8569785f80712b5787e12b86a3870a28c0182b2c" ], "journalName": "", "journalPages": "299-312", "journalVolume": "", "outCitations": [ "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "171c25d7c2b1fd87c32951cceb480f8525cd37dc", "452c803f91ab670bf36403ed5412875b13ae9e94", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "934e8d76376f6c78a8b89ef2304f01a8e7099401", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "4d624b942a58818f8d425460638cb4b65ed84e1c", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "30909df12b1b01760ae4c5406e15f302a6524446", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "0183d8c6623aaf106a27db72ecec9bb9704ab98c", "77a1532cb64eab28162a0277cde52b4b7eceda49", "06f16d9430d5f6213cf5399b167a3d989c3ff798", "05f70f429a7bf38efa9e457fd486cb862bd495be", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "0541d5338adc48276b3b8cd3a141d799e2d40150", "41c2c11acde144ccf62cb6eff30731195d22775b", "bd79772a58dd4bf040ac9f9c1946614b6a51cc4c", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "352e74019d86163d73618f03429ae452ab429629", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "4ad23e9e5745e8f3ee19317c54844b58d93513df", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "52c2c050af5b32d4929b4b193967a3675d03aea0", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "6b6fae57882fd193461fca64654107068ce9fd9a" ], "paperAbstract": "Feature-rich mass-market operating systems have large trusted computing bases (TCBs) and a long history of vulnerabilities. Systems like Overshadow, InkTag or Haven attempt to remove the operating system (OS) from the TCB of applications while retaining its functionality. However, the untrusted OS\u2019s control of most physical resources puts it in a much better position to launch sidechannel attacks than traditional unprivileged side-channel attackers. Initial attacks focused on the page-fault channel, demonstrating significant information leakage for three legacy applications. We present two new side channels for an untrusted OS which use timer interrupts and cache misses to achieve higher temporal and spatial resolution than the page-fault channel. We leverage the untrusted OS\u2019s control over hardware to reduce noise in the side channels to enable successful attacks in just a single run of the target. We demonstrate that our side channels enable attacks against new SGX applications such as VC3 that were designed not to trust the OS. We also show a new attack against libjpeg that extracts images with two orders of magnitude more information than the page-fault channel attack.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/atc17-final230.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/hahnel", "https://www.usenix.org/system/files/conference/atc17/atc17-hahnel.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/40ce/4693719490abc0d7407117e826e79d58b554.pdf", "s2Url": "https://semanticscholar.org/paper/0b23e4be50e710dd9d339fc64f025ba89cc002d3", "sources": [ "DBLP" ], "title": "High-Resolution Side Channels for Untrusted Operating Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0b23f0ee49ed34443ca62cb085c0addd75eac6a9": { "authors": [ { "ids": [ "2726787" ], "name": "Raja R. Sambasivan" }, { "ids": [ "3302142" ], "name": "David Tran-Lam" }, { "ids": [ "1713535" ], "name": "Aditya Akella" }, { "ids": [ "1728086" ], "name": "Peter Steenkiste" } ], "doi": "10.1145/3098822.3098857", "doiUrl": "https://doi.org/10.1145/3098822.3098857", "entities": [ "Border Gateway Protocol", "Classless Inter-Domain Routing", "Communications protocol", "Inter-domain", "Routing" ], "id": "0b23f0ee49ed34443ca62cb085c0addd75eac6a9", "inCitations": [ "4aa4861d4205ef0885d5c5d04fe80617d3c099da", "d977c9a1b18191c924bc9e5529eb6fb25b75489d" ], "journalName": "", "journalPages": "474-487", "journalVolume": "", "outCitations": [ "2cec9dffa8021694ffe1ee9c894a05fdca4c73a2", "18e7dde371cf17ca4089a4a59660483e70160b09", "53071744cdb7ba41261b8c8edb13f7cdcd6a1552", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "9e9e1af5f2694638c0a62e650139583d7b5fa0e5", "4d85527f672e00c75cdb6087ebcf2ca10738b598", "1942aff3bb24d4ff9c1e8688b1104a767f0bc346", "314765c7259dabccbf72722875e3e7ddd1bc5558", "8f34653a764cb5f9c77f5635a66561380af54a81", "597ac27962cf8ab84a51103aa69c4a565d3edb6f", "0e3bf64d8d3cf9b3ccdb396ce826eb229982186e", "3cd2f248e9c03bde2c2203a7e0cffdb8754926cb", "500a36498671d0be6108d7124a55277ffebad25b", "7ed7b849499f2fa53ca98986b5116ce3b883de01", "3fb3ff8702ea89ff771184e4c5f197b162265512", "89dfaab876ef69d08c7332d560f3b4664cacb9fb", "5958b3d8f2c9b59c53086b76287986ae18b3a1e4", "23208b2f513af5c3d7b40f0826ed367da77396c9", "6f1ea0772a28703a61aa8024469ae8b7d4ac8284", "6fbdc6d514983314a0f895aff61482d3e4139085", "0ec3464b4a0a6e3d92f209b601afccef3cf5df65", "0fa37b92444d8fcbef150470226e216bce15e3a8", "8114db39b6ff4a4d7db34af2e67ceed0804ecf73", "0155faf33da0d2184bcf18717c6ccac7e3c2527e", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "a6b0085a59d8f42461814d03fc6e8c9904a9c5ee", "cef7980dd77da2c62796fdeb89bd2e45c1f4f2e4", "d1ecfb7ec04fc3b3f93d9a5bf01b12eb38238285", "7e8454ef803d1f3749003dd3e7be1794f2e12332", "306a2933e91fdf8971bd160c5bbe365e48ec7fdd", "04322dde94a71eeb610fad4522647fc7e4f53126", "81bc7f6f997051ff7f7500f3a75886747208d6b5", "a431cb670e3b3e7279c4f4081a672e970655b3c3", "63780e4e68ee8d6eed19b6d2753798e593afe4d4", "1efb338eb19fc175e59c8c5d5245ce9c4f5af3a3", "2d7ae6c154416c9da813a8a4ea229bea6d814c6a", "12c45ef6c4c62284a6de39ea33df44e0b0b944af", "001b5b131479eccbfb79caa0900a303f7b42e8a9", "34f91f94fe0bcc53a7ff798b5182ca3082505546", "016aa9d353ebe23c740e16da9178bf02e84a42f4", "58d47b5d0ad1ca17a66cc4b98001f6a9fa713943", "640194688ca1a3059b05ce462b446e575955109d", "03a6909f6cffe4021d37cf875ada566a716fc2e7" ], "paperAbstract": "The Internet's inter-domain routing infrastructure, provided today by BGP, is extremely rigid and does not facilitate the introduction of new inter-domain routing protocols. This rigidity has made it incredibly difficult to widely deploy critical fixes to BGP. It has also depressed ASes' ability to sell value-added services or replace BGP entirely with a more sophisticated protocol. Even if operators undertook the significant effort needed to fix or replace BGP, it is likely the next protocol will be just as difficult to change or evolve. To help, this paper identifies two features needed in the routing infrastructure (i.e., within any inter-domain routing protocol) to facilitate evolution to new protocols. To understand their utility, it presents D-BGP, a version of BGP that incorporates them.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098857", "http://reports-archive.adm.cs.cmu.edu/anon/2016/CMU-CS-16-117.pdf", "http://pages.cs.wisc.edu/~akella/papers/dbgp-sigcomm.pdf", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-11-2-D-BGP.pdf", "http://www.cs.cmu.edu/~rajas/Publications/sambasivan_evolve_techreport16.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b23f0ee49ed34443ca62cb085c0addd75eac6a9", "sources": [ "DBLP" ], "title": "Bootstrapping evolvability for inter-domain routing with D-BGP", "venue": "SIGCOMM", "year": 2017 }, "0b360c6c2cf6933b690b55ae00672c552a31c0c6": { "authors": [ { "ids": [ "3224522" ], "name": "Yongming Shen" }, { "ids": [ "1843152" ], "name": "Michael Ferdman" }, { "ids": [ "2719024" ], "name": "Peter A. Milder" } ], "doi": "10.1145/3079856.3080221", "doiUrl": "https://doi.org/10.1145/3079856.3080221", "entities": [ "Artificial neural network", "Central processing unit", "Convolutional neural network", "Field-programmable gate array", "Machine learning", "Throughput" ], "id": "0b360c6c2cf6933b690b55ae00672c552a31c0c6", "inCitations": [ "b4871945bc9d8e6fe69d283651ecbddc87c30a2c", "42412b3242e60209d66764fff759f7d1cec9c717", "ddc339ae2c367880fb9b6e89ffa0606b94456fbf", "65b041bbffd533dbada19d036fb0a74e43b59b2a", "4636d53cc1548f2cd7a185c8ae5fe2320b0502da", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "3c6121a1b3193424710960c0e8e15e6c3be9e3d7", "039bce345036757f4b7feb224a8a41d29f9b653a", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "52514fcc2bcb2d14e3c503c004d3f3a779e90aef", "a3ebabb680e54ad395235b8e25b7d6cd1c1afd72", "0f3056a84ea59a9b976163f28002401fa88ba80f" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "535-547", "journalVolume": "", "outCitations": [ "5c1d6a32abdeab7a8a1f50c0ccaccbe2f11ea6e2", "2ffc74bec88d8762a613256589891ff323123e99", "56828bb7ad555eed8d43e6d3eba4ee39e862defe", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "766b21e4984729a17d2d826691affa25f855a38b", "269c24a4aad9be622b609a0860f5df80688c2f93", "02c78232075ac431834e3442dcb2954d4e708def", "b4eac8295c90dbfb7d8d22ba560e025621287c58", "49b4094f2c313a92da4461572c0bef80b0d7d649", "714544b7cf35a3b8bdc12fb1967624a38f257a42", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "0934508c768ff8ba9744678ad92e51dfdbd5f122", "4f5dd051a907027f1f00797f5a7b7a6d7f33a1e5", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "1a07186bc10592f0330655519ad91652125cd907", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "e1c4e2fa071046569a05e9cfdf13496d094025dd", "1908d2c198edfea455059167ed5ca7b533372b31", "c382406fd8db2744b2a609837395e5da05e1d2ed", "58462eed4e22d34d35d31be2b902b4eb18a231e0", "437b11128948f92e1139c555cf1326922ee36b39", "0b99d677883883584d9a328f6f2d54738363997a", "71bd0af2eb95061d43acb61d32ae72038b36c821", "4f40ea0248653d4ffb6ef4857cd23f0f713d8c69", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "2e11ca64a35299b612f92ee0782a6ad3c2878f18", "65b041bbffd533dbada19d036fb0a74e43b59b2a", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "061356704ec86334dbbc073985375fe13cd39088", "52d2a6110e3bc2215d0347a04c421fb094044557" ], "paperAbstract": "Convolutional neural networks (CNNs) are revolutionizing machine learning, but they present significant computational challenges. Recently, many FPGA-based accelerators have been proposed to improve the performance and efficiency of CNNs. Current approaches construct a single processor that computes the CNN layers one at a time; the processor is optimized to maximize the throughput at which the collection of layers is computed. However, this approach leads to inefficient designs because the same processor structure is used to compute CNN layers of radically varying dimensions.\n We present a new CNN accelerator paradigm and an accompanying automated design methodology that partitions the available FPGA resources into multiple processors, each of which is tailored for a different subset of the CNN convolutional layers. Using the same FPGA resources as a single large processor, multiple smaller specialized processors increase computational efficiency and lead to a higher overall throughput. Our design methodology achieves 3.8x higher throughput than the state-of-the-art approach on evaluating the popular AlexNet CNN on a Xilinx Virtex-7 FPGA. For the more recent SqueezeNet and GoogLeNet, the speedups are 2.2x and 2.0x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080221", "http://compas.cs.stonybrook.edu/~mferdman/downloads.php/ISCA17_Maximizing_CNN_Accelerator_Efficiency_Through_Resource_Partitioning.pdf", "https://arxiv.org/pdf/1607.00064v1.pdf", "http://arxiv.org/pdf/1607.00064v1.pdf", "http://arxiv.org/abs/1607.00064" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b360c6c2cf6933b690b55ae00672c552a31c0c6", "sources": [ "DBLP" ], "title": "Maximizing CNN accelerator efficiency through resource partitioning", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0b549912e5f111c7c60eadda634ef4484427b684": { "authors": [ { "ids": [ "2880350" ], "name": "Theofilos Petsios" }, { "ids": [ "8283564" ], "name": "Adrian Tang" }, { "ids": [ "1807433" ], "name": "Salvatore J. Stolfo" }, { "ids": [ "1720824" ], "name": "Angelos D. Keromytis" }, { "ids": [ "39400201" ], "name": "Suman Jana" } ], "doi": "10.1109/SP.2017.27", "doiUrl": "https://doi.org/10.1109/SP.2017.27", "entities": [ "Black box", "Clam AntiVirus", "Code coverage", "Crash (computing)", "Cross-reference", "Evasion (network security)", "Failure rate", "Library", "Man-in-the-middle attack", "Memory corruption", "Parsing", "Portable Document Format", "Software bug", "Software testing", "Transport Layer Security", "Vulnerability (computing)", "X.509", "american fuzzy lop" ], "id": "0b549912e5f111c7c60eadda634ef4484427b684", "inCitations": [ "f194bddbe2d4d4ce5d2e54e7cee01a30aa3e11ce", "1ac30950aaab6297d2a7cb2d7a55ba6b9b2a100f", "157810ccca1ab1acb815ca9c77afcd9040ecdd16" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "615-632", "journalVolume": "", "outCitations": [ "051da9a3595d63de898482840cc668c2cd23f277", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "13702681da7d276b2891c961f842c4f08dee82a6", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "fc881e8d0432ea8e4dd5fda4979243cac5e4b9e3", "10ab79b91e5e61c75f609aa05810538c13be34ba", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "bc44df77508e02b5d2cb0edbef3dfa87625e8a33", "95baae72c5fcca4038339c350556dd6143d9a263", "088a546d751651088e8a7050cb4a20239653012f", "2a5c00109e15c68e8d8f479a2657c2889cc69cc5", "1e110877904379b5906220a6f04c300456864cf2", "0b5b42425deb371d8dc60ac9b090c7232702370a", "4313c500616a89a306bf6b7fa9517e64a3f19165", "629e4eb69b02f1025aee7a8242e47db3a048b234", "4dc1810abaf0415fbf0a40185892856da4308302", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "0f5029d3e21e188d0ed7b24764f72ec1a2f4910b", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "2097c344eea1d77682aab16db502d559f34e5150", "5e4fa9397c18062b970910f8ee168d3297cf098f", "120c819da02fcb312986ac492f723ef9ea3223b5", "0a123a92ebfc1efa07ddfdc1d30e390d3ef4341e", "5556995fb630c47805bbba560287ea59ce357fa1", "03086e02b706e6955735ed15603b1015334bc095", "5ab3f57e013dacfbdac65ee562dfdbcb7e3a5f9c", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "208ed7512ea84f22a004920ea0b4c475bc836abc", "e5e1327ef05b629e5015631b562716ea2e024d1f", "14a881960158ae4672cd110a21b613a6512ddd99", "6ea63d09993b9a268689790ea8d25bc36345497e", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "247943eb67439626745b4385326d389c9855a37b", "ab80995142dd27d2b4834de7d58344bb0d30a8b1", "0ab393affe9d674ef790be14fdfade368f3e5989", "45f6957cab31e802934cc761380c1a4a37c66208", "02838cb6982e67992ae54fa616162b16ce5110c6", "341d33498388711a5303c5f51433b3d5739a21d2", "869a42035afad5dc0bf48408a4471982f21f78d1" ], "paperAbstract": "Differential testing uses similar programs as cross-referencing oracles to find semantic bugs that do not exhibit explicit erroneous behaviors like crashes or assertion failures. Unfortunately, existing differential testing tools are domain-specific and inefficient, requiring large numbers of test inputs to find a single bug. In this paper, we address these issues by designing and implementing NEZHA, an efficient input-format-agnostic differential testing framework. The key insight behind NEZHA's design is that current tools generate inputs by simply borrowing techniques designed for finding crash or memory corruption bugs in individual programs (e.g., maximizing code coverage). By contrast, NEZHA exploits the behavioral asymmetries between multiple test programs to focus on inputs that are more likely to trigger semantic bugs. We introduce the notion of δ-diversity, which summarizes the observed asymmetries between the behaviors of multiple test applications. Based on δ-diversity, we design two efficient domain-independent input generation mechanisms for differential testing, one gray-box and one black-box. We demonstrate that both of these input generation schemes are significantly more efficient than existing tools at finding semantic bugs in real-world, complex software. NEZHA's average rate of finding differences is 52 times and 27 times higher than that of Frankencerts and Mucerts, two popular domain-specific differential testing tools that check SSL/TLS certificate validation implementations, respectively. Moreover, performing differential testing with NEZHA results in 6 times more semantic bugs per tested input, compared to adapting state-of-the-art general-purpose fuzzers like American Fuzzy Lop (AFL) to differential testing by running them on individual test programs for input generation. NEZHA discovered 778 unique, previously unknown discrepancies across a wide variety of applications (ELF and XZ parsers, PDF viewers and SSL/TLS libraries), many of which constitute previously unknown critical security vulnerabilities. In particular, we found two critical evasion attacks against ClamAV, allowing arbitrary malicious ELF/XZ files to evade detection. The discrepancies NEZHA found in the X.509 certificate validation implementations of the tested SSL/TLS libraries range from mishandling certain types of KeyUsage extensions, to incorrect acceptance of specially crafted expired certificates, enabling man-in-the-middle attacks. All of our reported vulnerabilities have been confirmed and fixed within a week from the date of reporting.", "pdfUrls": [ "http://nsl.cs.columbia.edu/papers/2017/nezha.oakland17.pdf", "http://ids.cs.columbia.edu/sites/default/files/oakland17_nezha.pdf", "http://www.cs.columbia.edu/~theofilos/files/slides/nezha.pdf", "http://0x0atang.github.io/files/nezha17_slides.pdf", "https://doi.org/10.1109/SP.2017.27", "http://www.nsl.cs.columbia.edu/papers/2017/nezha.oakland17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b549912e5f111c7c60eadda634ef4484427b684", "sources": [ "DBLP" ], "title": "NEZHA: Efficient Domain-Independent Differential Testing", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0b5c1b6c8f40926aa04c34591f5453a3f61619ec": { "authors": [ { "ids": [ "2905543" ], "name": "David Naylor" }, { "ids": [ "2357317" ], "name": "Richard Li" }, { "ids": [ "2903620" ], "name": "Christos Gkantsidis" }, { "ids": [ "31978478" ], "name": "Thomas Karagiannis" }, { "ids": [ "1728086" ], "name": "Peter Steenkiste" } ], "doi": "10.1145/3143361.3143383", "doiUrl": "https://doi.org/10.1145/3143361.3143383", "entities": [ "Antivirus software", "Communications protocol", "Encryption", "Hypertext Transfer Protocol", "Legacy system", "List of HTTP status codes", "Middlebox", "Outsourcing", "Proxy server", "Requirement", "Secure communication", "Syntax highlighting", "Transport Layer Security" ], "id": "0b5c1b6c8f40926aa04c34591f5453a3f61619ec", "inCitations": [], "journalName": "", "journalPages": "88-100", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "133eea63e0a9702207dc14fdd72740d402f5748b", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "336b4f3099b8f629adc20a69aba15257e53539f9", "36bb67d8fba0c85f2495449a9926018827368df5", "72880d15db2282512e5d3f0a3796b397d68cc7db", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "2d968ef0c5ad0cc6718e2f8b40ce7f4c323dbbdd", "1e3822536527f98c53b716c26fa05da5bf729f17", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "39ac27363c06ade948e0cc3e7797523122a19085", "488f4ea444bb21d8e37845dac576277df50fa40b", "0507b04c131f2244524fda97cd1707af5760216e", "5b2092b54860f134f78b2ec884c910750def71e6", "452c803f91ab670bf36403ed5412875b13ae9e94", "5f71698f71d31dd64f41706c1abf46c46f9d9140" ], "paperAbstract": "Internet communication today typically involves intermediary middleboxes like caches, compression proxies, or virus scanners. Unfortunately, as encryption becomes more widespread, these middleboxes become blind and we lose their security, functionality, and performance benefits. Despite initial efforts in both industry and academia, we remain unsure how to integrate middleboxes into secure sessions---it is not even clear how to define \"secure\" in this multi-entity context.\n In this paper, we first describe a design space for secure multi-entity communication protocols, highlighting tradeoffs between mutually incompatible properties. We then target real-world requirements unmet by existing protocols, like outsourcing middleboxes to untrusted infrastructure and supporting legacy clients. We propose a security definition and present Middlebox TLS (mbTLS), a protocol that provides it (in part by using Intel SGX to protect middleboxes from untrusted hardware). We show that mbTLS is deployable today and introduces little overhead, and we describe our experience building a simple mbTLS HTTP proxy.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/07/tr-2.pdf", "https://davidtnaylor.com/mbTLS.pdf", "https://www.microsoft.com/en-us/research/uploads/prod/2018/01/securecomm_conext17.pdf", "http://doi.acm.org/10.1145/3143361.3143383" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b5c1b6c8f40926aa04c34591f5453a3f61619ec", "sources": [ "DBLP" ], "title": "And Then There Were More: Secure Communication for More Than Two Parties", "venue": "CoNEXT", "year": 2017 }, "0b6092ca9e92399b5509d8225d87c7672f884613": { "authors": [ { "ids": [ "1781880" ], "name": "Falko Bause" }, { "ids": [ "37560854" ], "name": "Peter Buchholz" }, { "ids": [ "39510165" ], "name": "Johannes May" } ], "doi": "10.1145/3030207.3030215", "doiUrl": "https://doi.org/10.1145/3030207.3030215", "entities": [ "Best, worst and average case", "Graphical user interface", "Quality of service", "Service-level agreement", "Service-oriented architecture", "Value (ethics)" ], "id": "0b6092ca9e92399b5509d8225d87c7672f884613", "inCitations": [ "5e663223888921b030b3d3158b3c8666137fe3c0", "059a8d2d8b6e0de34b15b5aeb2817f3472c0ceb2" ], "journalName": "", "journalPages": "233-244", "journalVolume": "", "outCitations": [ "7fc297fc099fdf02e9598d65f2fbc38d323ccbc9", "1d4f7989999941572074e91ac3c9202cde6771af", "22d2f4b46238776b47cdef14c6e913bbfe1e0b73", "243d43190ae2a7eceb9f33d3c953581b196bde82", "5b02cf69f2f9efe0cb61c922974748d10d1506af", "7b600167106de00b28221b0a67986cee0b9432f0", "29c36c973dafa330da9311ea2cccce588bed5107", "3c71fc21c52deefe639d51c39ba7be897b35b6f0", "e6a64156aa5e25ffe6dcd0ebd679495febd54ac4", "430b44723ec4e8ac653db8d843962aee4ccc9efe", "c216a6994478fa4957224b02297fb667b46393b5", "160b7ea649dd128cc21375557b5b5574acb1345a", "147e2ed4fdc01493a301b00f52b06046160cb88a", "d661f65ab7be534a9d63c05883fcb4030b71b13f", "1066bf67cc98f73e2111e7a7d222b92c1693e879", "5e663223888921b030b3d3158b3c8666137fe3c0", "fa2fad4993a43f8ebd503ddedb70822294588b7a", "8f249c3b2c66146b9ac92d254103b69c746f9b88", "059a8d2d8b6e0de34b15b5aeb2817f3472c0ceb2", "0e8e2898b2212a8322046e5f744c1691ef656e69", "0b235a8f9f6498633b5fcb04eafee68a49192f5a" ], "paperAbstract": "Quantitative aspects of modern IT systems are often specified by service level agreements (SLAs) which relate the maximal load of a system with guaranteed bounds for response times and delays. These quantities are specified for single services which are combined in a service oriented architecture (SOA) to composed services offered to potential users or other service providers. To derive SLAs for composed services and to plan the required capacity to guarantee SLAs, appropriate methods and tools have to be used that compute results based on information given in SLAs. In this paper it is argued that most available approaches are not sufficient to analyze systems based on SLA information. A new method and a tool are presented that support the efficient calculation of bounds for delays in composed systems based on bounds for the load and the delay of the individual components which are specified in the SLAs of the components. Furthermore, the presented tool can be used to generate bounds for the required processing capacity which a provider has to provide in order to guarantee the quality of service defined in the SLAs.\n The presented approach is in some sense a counterpart to mean value analysis for queueing networks but rather than mean values, worst case bounds for different quantities like response times or departure processes are computed. Analysis is based on min/+ algebra but the mathematical approach is hidden from the user by a graphical interface allowing a simple graphical specification and result representation for networks of composed services.", "pdfUrls": [ "https://research.spec.org/icpe_proceedings/2017/proceedings/p233.pdf", "http://doi.acm.org/10.1145/3030207.3030215" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b6092ca9e92399b5509d8225d87c7672f884613", "sources": [ "DBLP" ], "title": "A Tool Supporting the Analytical Evaluation of Service Level Agreements", "venue": "ICPE", "year": 2017 }, "0b6187649cc8af54897e2f46122edf5e814eb3cc": { "authors": [ { "ids": [ "10736096" ], "name": "Ajeya Naithani" }, { "ids": [ "2070187" ], "name": "Stijn Eyerman" }, { "ids": [ "1717133" ], "name": "Lieven Eeckhout" } ], "doi": "10.1109/HPCA.2017.12", "doiUrl": "https://doi.org/10.1109/HPCA.2017.12", "entities": [ "Byte", "Central processing unit", "Computer multitasking", "Multi-core processor", "Reliability engineering", "Scheduling (computing)" ], "id": "0b6187649cc8af54897e2f46122edf5e814eb3cc", "inCitations": [ "88824f4400bf03caed2f99879e68f3543b214c92", "8abe342812ee5025755d680977383e4bdf8d6703", "7cdf63e05545333f10f69317383a3a88c6e29d03" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "397-408", "journalVolume": "", "outCitations": [ "01e499b6cf6b89babe390503e30e20d6628ddc39", "7848ea4660a27f6d05f8f5dfc33cfa55f999e975", "7ce25a0852e2345be1a1bd02b8eb4cefb9d47073", "68073f621072d793e95b9562bf9a9245415d5a96", "023abef0f3f56cda13bcb5adeb28dd4c7241c261", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "1ba9756e5bca19f2753d3afe255021f8a7387c62", "3640fd02d3a62c22aaee643aaa8083a3b37325b7", "0c8919aeb81fa8b7851585ea4190570873efa628", "17c5972f45561c705bce6d8e0174d94cc8ad4adf", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72", "167c651a235cf567ee8ca19b8d0e4d2f19e01b42", "e5dc8be9b4678ae1f91764494acc96299cf44009", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "35e3643eb7060f30ef408c4910fc6448eecde6e6", "352a8957005dc5519b15ed1870751ec494d66395", "109df0e8e5969ddf01e073143e83599228a1163f", "0fa1ab218ff80ee7782848e207a5b912ae8775f3", "2640471efddd30a2855a2a4d76fde3459d36cdf6", "7569c5e5f9d8c81e4dfd6ce2c044d4ce0dab07fd", "06325345f9ffef958d9d7c704b28e6cbb3021b8c", "480a952f7d24cf6d3ccda62439424eea6a8fd469", "19679a294ff67a8e8ad49f72c240cce61eed3b6d", "68bdda25848b214652cae830f066e4d4b82df7c7", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "30690e66ed3a2f7989a389d0f0189c49e6483888", "84c31932d221afbd7d50f55e16900664b1027a1a" ], "paperAbstract": "Reliability to soft errors is an increasingly important issue as technology continues to shrink. In this paper, we show that applications exhibit different reliability characteristics on big, high-performance cores versus small, power-efficient cores, and that there is significant opportunity to improve system reliability through reliability-aware scheduling on heterogeneous multicore processors. We monitor the reliability characteristics of all running applications, and dynamically schedule applications to the different core types in a heterogeneous multicore to maximize system reliability. Reliability-aware scheduling improves reliability by 25.4% on average (and up to 60.2%) compared to performance-optimized scheduling on a heterogeneous multicore processor with two big cores and two small cores, while de-grading performance by 6.3% only. We also introduce a novel system-level reliability metric for multiprogram workloads on (heterogeneous) multicores. We further show that our reliability-aware scheduler is robust across core count, number of big and small cores, and their frequency settings. The hardware cost in support of our reliability-aware scheduler is limited to 296 bytes per core.", "pdfUrls": [ "http://www.eecs.umich.edu/courses/eecs573/slides/03_ReliabilityAwareSchedulingOnHeterogeneousMulticoreProcessors.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b6187649cc8af54897e2f46122edf5e814eb3cc", "sources": [ "DBLP" ], "title": "Reliability-Aware Scheduling on Heterogeneous Multicore Processors", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "0b643dc29fa5aca25d78d831f95009872f83cb26": { "authors": [ { "ids": [ "10379496" ], "name": "Drew Zagieboylo" }, { "ids": [ "2112306" ], "name": "Kazi A. Zaman" } ], "doi": "10.1145/3030207.3044529", "doiUrl": "https://doi.org/10.1145/3030207.3044529", "entities": [ "Best, worst and average case", "Central processing unit", "Memory footprint", "Multitenancy", "NoSQL", "Persistence (computer science)", "Provisioning", "Representational state transfer", "SQL", "Scalability", "Throughput", "User experience" ], "id": "0b643dc29fa5aca25d78d831f95009872f83cb26", "inCitations": [], "journalName": "", "journalPages": "201-212", "journalVolume": "", "outCitations": [ "01b96f0a1b5f8a0eb69d6a00081e7a5da20ba273", "f2f120074ed458e00d01d8ed390fea10541bf6e1", "58dbe0ea2befd1fb287582a507545a43cd4e0966" ], "paperAbstract": "Video game crash events are characterized primarily by large media payloads and by highly bursty traffic patterns, with hundreds of thousands or millions of reports being issued in only a few minutes. These events are invaluable in quickly responding to game breaking issues that directly impact user experience. Even the slightest delay in capturing, processing and reporting these events can lead to user abandonment and significant financial cost.\n A traditional standalone RESTful service, backed by a vertically scaled SQL database is neither a reliable nor cost-effective solution to this problem. An architecture that decouples capture and persistence and uses a horizontally scalable NoSQL database is not only easier to provision, but also uses fewer cpu and memory resources to provide the same end to end latency and throughput.\n By replacing our RESTful implementation with one that takes advantage both of the aforementioned design and multi-tenant provisioning, we have reduced our dedicated cpu footprint by 63% and memory footprint by 59%. Additionally, we have decreased our data loss during spikes to essentially 0, maintained sub-second persistence latency and improved query latency in the average case by 54% with only a 3% sacrifice for worst case queries.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3044529" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b643dc29fa5aca25d78d831f95009872f83cb26", "sources": [ "DBLP" ], "title": "Cost-Efficient and Reliable Reporting of Highly Bursty Video Game Crash Data", "venue": "ICPE", "year": 2017 }, "0b6ab61a4872ee95ea90ff8bd82d5b70d39d1072": { "authors": [ { "ids": [ "2240460" ], "name": "Zhihao Jia" }, { "ids": [ "2402978" ], "name": "Sean Treichler" }, { "ids": [ "3057817" ], "name": "Galen M. Shipman" }, { "ids": [ "1756761" ], "name": "Michael Bauer" }, { "ids": [ "34313805" ], "name": "Noah Watkins" }, { "ids": [ "3198700" ], "name": "Carlos Maltzahn" }, { "ids": [ "34694816" ], "name": "Patrick S. McCormick" }, { "ids": [ "4689402" ], "name": "Alexander Aiken" } ], "doi": "10.1109/HiPC.2017.00043", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00043", "entities": [ "Application checkpointing", "Computation", "Correctness (computer science)", "Fortran", "Legion (software)", "Memory-mapped I/O", "Out-of-core algorithm", "Programming model", "Requirement", "Simulation", "Snapshot (computer storage)", "Supercomputer" ], "id": "0b6ab61a4872ee95ea90ff8bd82d5b70d39d1072", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "307-316", "journalVolume": "", "outCitations": [ "2da4ab6c02d97fe47b589ddd450a5c41f2b47bb9", "223e592891817714daffabed1104477b6ed8ca5d", "09c5293b647fca40fde28ac6c38737f07e873e41", "644fc1f4f104401bd3d57fa5c7a2ce5896b5e75a", "932713096787f57fb406612dd82a1696d6b769be", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "94e09f8f9a88ffebcacfb298fa737eed0debaab4", "069ec88e2d30784746ab2224bc096e494c745382", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "2042b469be68653afcb2b7b38490c16369b4501a", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "058224ac7b9bc0a0b82e62257656c7a6df62219e", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "Accessing external resources (e.g., loading input data, checkpointing snapshots, and out-of-core processing) can have a significant impact on the performance of supercomputer applications. However, no existing programming systems for high-performance computing directly manage and optimize these external accesses. As a result, users must explicitly manage external accesses alongside their computation at the application level, which can result in both correctness and performance issues. We address this limitation by introducing Iris, a task-based programming model with semantics for external resources. Iris allows applications to describe their access requirements to external resources and the relationship of those accesses to the computation. Iris incorporates external I/O into a deferred execution model, reschedules external I/O to overlap I/O with computation, and reduces external I/O when possible. We evaluate Iris on three microbenchmarks representative of important workloads in HPC and a full combustion simulation, S3D. We demonstrate that the Iris implementation of S3D reduces the external I/O overhead by up to 20\u00d7, compared to the Legion and the Fortran implementations.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00043", "http://legion.stanford.edu/pdfs/hipc2017.pdf", "http://theory.stanford.edu/~aiken/publications/papers/hipc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b6ab61a4872ee95ea90ff8bd82d5b70d39d1072", "sources": [ "DBLP" ], "title": "Integrating External Resources with a Task-Based Programming Model", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "0b6ccddbd98fe274f9fb475565c1633518499549": { "authors": [ { "ids": [ "2154609" ], "name": "Alan Kuhnle" }, { "ids": [ "9597746" ], "name": "Victoria G. Crawford" }, { "ids": [ "1698253" ], "name": "My T. Thai" } ], "doi": "10.1109/ICDM.2017.33", "doiUrl": "https://doi.org/10.1109/ICDM.2017.33", "entities": [ "Algorithm", "Approximation algorithm", "Best, worst and average case", "Cluster analysis", "Relevance" ], "id": "0b6ccddbd98fe274f9fb475565c1633518499549", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "237-246", "journalVolume": "", "outCitations": [ "94101e55cdb37d065f9ededcf847349f88c62e2b", "85b2662fabf0e9ec2fc70dc4eef4723ef8537ea0", "3a6a59ba247e95b327e66a19a4d7908a7c2baaa2", "512d245b4b5e09eb6ec0503dd802e9fb4a92456f", "00fe03d70ecb67abf1199830b0454b072461ae3e", "00da9fc73fbab385f32b9f30cb9287aa43674c1e", "8592a96108ba72e53e46ac4428e86c144dd14382", "1471511609f7185544703f0e22777a64c6681f38", "297d9ac4ef83195a2dd1c537ce4d1497f42fc77e", "2a6887a127cc5e778b7027ed105e0622b18b3b81", "bb4535937dd3ba4e2415f1f99a8795fec5f53714", "0371f9e3efbcd4829b5ffbff585155746ef05284", "1e337ee8732f69d83f7d67f8500fee59066158ae", "6ea8894ef9edf31ed83e925a5650a0a8f0b79b76", "904b74aceb3cc14e1b7f65eccf7d91651508f3b5", "00a57850e14320bb41d58696cc409151466b98b2" ], "paperAbstract": "Motivated by the relevance of clustering or transitivity to a variety of network applications, we study the Triangle Interdiction Problem (TIP), which is to find a minimum-size set of edges that intersects all triangles of a network. As existing approximation algorithms for this NP-hard problem either do not scale well to massive networks or have poor solution quality, we formulate two algorithms, TARL and DART, with worst-case guarantees 5/2 and 3 with respect to optimal, respectively. Furthermore, DART is able to efficiently maintain its worst-case guarantee under dynamic edge insertion and removal to the network. In our comprehensive experimental evaluation, we demonstrate that DART is able to run on networks with billions of triangles within 2 hours and is able to dynamically update its solution in microseconds.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b6ccddbd98fe274f9fb475565c1633518499549", "sources": [ "DBLP" ], "title": "Scalable and Adaptive Algorithms for the Triangle Interdiction Problem on Billion-Scale Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "0b7cf947d6d9788c06a04b9c18ec03da8cf5b9d3": { "authors": [ { "ids": [ "2864417" ], "name": "Yonghwi Kwon" }, { "ids": [ "2617324" ], "name": "Brendan Saltaformaggio" }, { "ids": [ "3440899" ], "name": "I Luk Kim" }, { "ids": [ "1766676" ], "name": "Kyu Hyung Lee" }, { "ids": [ "35593490" ], "name": "Xiangyu Zhang" }, { "ids": [ "2696404" ], "name": "Dongyan Xu" } ], "doi": "", "doiUrl": "", "entities": [ "A(2)C", "Malware", "Mutation", "Mutation Abnormality", "POST (HTTP)", "Programs - Publication Format", "Reside", "SQL injection", "State space" ], "id": "0b7cf947d6d9788c06a04b9c18ec03da8cf5b9d3", "inCitations": [ "3181b251ca9f8af4ed41c49ef6f5b03bd95eef77", "6809b7c476b7340bba70a99ad92b1dbb883d75b5", "65c6bda16861410915c4b50d2540c9d058a1bb57" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "01b5b648af61ddb382da638a299fae2315b25192", "933dce04cfe0f8e21c5b3d3eaf4595891e695b4f" ], "paperAbstract": "Malicious payload injection attacks have been a serious threat to software for decades. Unfortunately, protection against these attacks remains challenging due to the ever increasing diversity and sophistication of payload injection and triggering mechanisms used by adversaries. In this paper, we develop A2C, a system that provides general protection against payload injection attacks. A2C is based on the observation that payloads are highly fragile and thus any mutation would likely break their functionalities. Therefore, A2C mutates inputs from untrusted sources. Malicious payloads that reside in these inputs are hence mutated and broken. To assure that the program continues to function correctly when benign inputs are provided, A2C divides the state space into exploitable and post-exploitable sub-spaces, where the latter is much larger than the former, and decodes the mutated values only when they are transmitted from the former to the latter. A2C does not rely on any knowledge of malicious payloads or their injection and triggering mechanisms. Hence, its protection is general. We evaluate A2C with 30 realworld applications, including apache on a real-world work-load, and our results show that A2C effectively prevents a variety of payload injection attacks on these programs with reasonably low overhead (6.94%).", "pdfUrls": [ "https://cyfi.ece.gatech.edu/publications/NDSS_17.pdf", "https://www.cs.purdue.edu/homes/kwon58/data/a2c_ndss17.pdf", "https://www.cs.purdue.edu/homes/bsaltafo/pubs/NDSS_17.pdf", "https://www.cs.purdue.edu/homes/kwon58/data/a2c_ndss17_slides.pdf", "http://friends.cs.purdue.edu/pubs/NDSS17.pdf", "http://cobweb.cs.uga.edu/~kyuhlee/publications/ndss17_1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0b7c/f947d6d9788c06a04b9c18ec03da8cf5b9d3.pdf", "s2Url": "https://semanticscholar.org/paper/0b7cf947d6d9788c06a04b9c18ec03da8cf5b9d3", "sources": [], "title": "A2C: Self Destructing Exploit Executions via Input Perturbation", "venue": "", "year": 2016 }, "0b86ab09a837281b2403fbd24b7f3aea3512dac5": { "authors": [ { "ids": [ "1727978" ], "name": "Junchen Jiang" }, { "ids": [ "1770094" ], "name": "Shijie Sun" }, { "ids": [ "1732751" ], "name": "Vyas Sekar" }, { "ids": [ "39280957" ], "name": "Hui Zhang" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Classless Inter-Domain Routing", "Interaction", "Machine learning", "Mathematical optimization", "Program optimization", "Relay" ], "id": "0b86ab09a837281b2403fbd24b7f3aea3512dac5", "inCitations": [ "7059afc68674fa925c48bb43284aab829f23a058", "e961d8ea7a52b1a0c04aec159fa2d5d68ccf1e8c", "53cc6bf305539b4bd8829df42996e0eb12512434", "c541f3eb52c85fde5175d3e1e2e024e6dad9299f", "0bdb6e2cb1d8960ecf754fd4d28ea11714178fdf", "4f529a014523fc1a54c03f781d49309f847bbfea", "cd8d1f75d152d3982e567c3483bc2bfd46a6cfb0", "7c4a7c37102f5cf97137c27181dcb7f24d3f32ef" ], "journalName": "", "journalPages": "393-406", "journalVolume": "", "outCitations": [ "26c2fc621d06f4b85cf145b04c963d4f2d59ccc0", "73f512de77dad7d0abe8076a856727021b9493d3", "6a97303b92477d95d1e6acf7b443ebe19a6beb60", "39b575b35f2be0dfe42763737a696ec4de218950", "0154103e091dea574c39f3c89d52ccfefc06af6c", "0f21eea1b6c1cfe89a3ef0a120ec62b9f7c3fda7", "411eb6534d39a37ed43443ba1d2e168c73171330", "11aad3a9bd17be8bd73dbde5f084ca7b623096f3", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "1a50e63ebf99d47e69175e16db9c621f9c558e1a", "257105d4299bec5eb639c510bd0b24f808bb102d", "aa027c65df541aa926da13c19f36e72773068af2", "02d1105bec3877ed8cd2d28f76b67ae8ba3f2331", "65bcc96576f30e7c976fa5d27b4e3e8ab9d70874", "09bdc85bea8970b7d77250ffa7b8788ce1f38d53", "8a66bf39ce825bd4fd1d4428f97d56d1e145dada", "3ff93ff1ccbfce995067a4617d73ea30115318b6", "4cf28bd50dbc329901552c249580ce1e34f66cba", "2d27cf59a420f46fc890ca501caabb53e1f57964", "002efcf9f0b58af153556b84395a37f6171195da", "6f54a7933235ced5684e3bff18f7e5dc40510018", "56893647902b4ab971fd092ce78687675b6942a7", "03955172eeb34b27be2ae82c129c6bc2fa7178c9", "2e8a322666a89adf83e8e0e7cbc5142fba5e7b01", "2b02f1b0a887d912bd3362472689ccff118faf1e", "65fd142f37c315cdf892184f8fb21281b88f6269", "1771487ec938af4b26a8f830fb0faee25c4843ad", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "35dde872db190b3bc990ba94eed2a7f9d95c0126", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e", "88e271cfc3ca9d4db09a0f29d4a2568246cab0ef" ], "paperAbstract": "Content providers are increasingly using data-driven mechanisms to optimize quality of experience (QoE). Many existing approaches formulate this process as a prediction problem of learning optimal decisions (e.g., server, bitrate, relay) based on observed QoE of recent sessions. While prediction-based mechanisms have shown promising QoE improvements, they are necessarily incomplete as they: (1) suffer from many known biases (e.g., incomplete visibility) and (2) cannot respond to sudden changes (e.g., load changes). Drawing a parallel from machine learning, we argue that data-driven QoE optimization should instead be cast as a real-time exploration and exploitation (E2) process rather than as a prediction problem. Adopting E2 in network applications, however, introduces key architectural (e.g., how to update decisions in real time with fresh data) and algorithmic (e.g., capturing complex interactions between session features vs. QoE) challenges. We present Pytheas, a framework which addresses these challenges using a group-based E2 mechanism. The insight is that application sessions sharing the same features (e.g., IP prefix, location) can be grouped so that we can run E2 algorithms at a per-group granularity. This naturally captures the complex interactions and is amenable to realtime control with fresh measurements. Using an endto-end implementation and a proof-of-concept deployment in CloudLab, we show that Pytheas improves video QoE over a state-of-the-art prediction-based system by up to 31% on average and 78% on 90th percentile of persession QoE.", "pdfUrls": [ "https://www.cs.cmu.edu/~junchenj/pytheas.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-jiang_0.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-jiang_0.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/jiang" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0b86/ab09a837281b2403fbd24b7f3aea3512dac5.pdf", "s2Url": "https://semanticscholar.org/paper/0b86ab09a837281b2403fbd24b7f3aea3512dac5", "sources": [ "DBLP" ], "title": "Pytheas: Enabling Data-Driven Quality of Experience Optimization Using Group-Based Exploration-Exploitation", "venue": "NSDI", "year": 2017 }, "0b877aed79939b2ba81b6dc58ce8544c6b532bcb": { "authors": [ { "ids": [ "1971184" ], "name": "Ana Klimovic" }, { "ids": [ "2655459" ], "name": "Heiner Litz" }, { "ids": [ "1700331" ], "name": "Christoforos E. Kozyrakis" } ], "doi": "10.1145/3037697.3037732", "doiUrl": "https://doi.org/10.1145/3037697.3037732", "entities": [ "Algorithm", "Data center", "Direct memory access", "IP address spoofing", "Quality of service", "Reflex (building design software)", "Requirement", "Scheduling (computing)", "Service-level agreement", "Throughput" ], "id": "0b877aed79939b2ba81b6dc58ce8544c6b532bcb", "inCitations": [ "14c9c04973ca9bd1cecf0892a9b90a54aa930098", "7818619eb25c7c1bb470a5b5572fa0371de721bc", "5483feb2b08312c7ed39336c0b3e68a39be8e016", "c206dd5b90104df0fd12a2c1f3fb0f913ee08c0b", "5451e8f4e616600f6061a95b435d3d3b75f4c69a", "e5c8095c2a87b474cb76e0d1d7dc9a8a37f3a80e", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041", "ae3d5033213ea2f022b806a88e28c74aa2a9a670", "874c4958275c645e1261f92e42c3fbefd777ec7d", "65fef0cd15b565055f21cf8c489cae39dd569220" ], "journalName": "", "journalPages": "345-359", "journalVolume": "", "outCitations": [ "57579a5708af144e71de6d41711dab7adac325f8", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "3fbba3719b3e07084cbc85daf2a1a094c9335b6d", "0d07c2ae7c8158b35cf58b19865d9f5efc7be5d6", "20e3bc47629b29a1b02dccd7c2b63beb7a7bd36e", "87269b265465eda145f1b12ee7761b76738d1ad4", "048a09d7c8713dc2533c1e31ac3f224868293461", "18a5f443299784479e78d9e77f175af57cb2fa2b", "0c4393689eb4c16ed490f93a6452befc1703a29e", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "2ddb902a1a995c26210b631fc33bf897dba76189", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "0abe5211e209b272890ba6820a33b72e938b0b3b", "1d2871c56d07a35e6709d535fbbb2df6b434962a", "8273e1895ae217a2e2f8654d5cbd4cddaaa32306", "68daa59a032b2260c4e1727cb4e654ce32778a68", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "478f51822252e4221c920bbf9d30a0b0491045ec", "7b420218b4e797dcc6ca96f6c6c3ec29a9688c07", "5cde06240acc288c986a10ee39f17ea28c9ef05c", "0b2aa4a87a6c253472e801080614da0dab47cfc2", "557cea900e954ef122b98ac2122fdde5ddbb3002", "3767bfac1eb78148bb61aca159f45dd4cdb588ff", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "1820a34042d6371a9e20484b0c63b698eb522a6c", "0c575d220d8bc125d6a2290984c8e2b87011631d", "102bf68c4227380dab0567d8f17fb720d3421525", "08632fe2b934ed15d3499e7321282c81adc2c390", "65a2cb8a02795015b398856327bdccc36214cdc6", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "69fde6e922375d9cc01c25f2b6b2161591812230", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "87064d58ef49df1b47c4ac74258fda1aecab2b68", "04f94f8d48badcdcaab93c28a60414c7b1ffd274", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "13d6c568c770ff5a070072e720fb34b0037cdab8", "9bcc0099f0d34c391ca1a3c5220cb0b3b33c4183", "118da4d571ee02b4f31b5c4c078857472e77ba1e", "13b26d008210fffeb8a77c9e90f1ff837523c536", "5909192b374eac0cda4df7c986ebc997cdcd6002", "5f3f9223c5c9f896be099bc177929febad508407", "04e7e499e89364fcb71954656539d6a53f9c991d", "132f00de21cee656d00ad6779f1926070ad59544", "169a08383bcb0577e6b7d4d1445359383fe07fc7", "431706e738f8c317bc76959a122b5bcbe2defd29", "7129b305ce45f83127e928e8510da9fae0783905" ], "paperAbstract": "Remote access to NVMe Flash enables flexible scaling and high utilization of Flash capacity and IOPS within a datacenter. However, existing systems for remote Flash access either introduce significant performance overheads or fail to isolate the multiple remote clients sharing each Flash device. We present ReFlex, a software-based system for remote Flash access, that provides nearly identical performance to accessing local Flash. ReFlex uses a dataplane kernel to closely integrate networking and storage processing to achieve low latency and high throughput at low resource requirements. Specifically, ReFlex can serve up to 850K IOPS per core over TCP/IP networking, while adding 21us over direct access to local Flash. ReFlex uses a QoS scheduler that can enforce tail latency and throughput service-level objectives (SLOs) for thousands of remote clients. We show that ReFlex allows applications to use remote Flash while maintaining their original performance with local Flash.", "pdfUrls": [ "http://csl.stanford.edu/~christos/publications/2017.reflex.asplos.pdf", "https://web.stanford.edu/~anakli/pdf/reflex.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final35.pdf", "http://doi.acm.org/10.1145/3037697.3037732" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b877aed79939b2ba81b6dc58ce8544c6b532bcb", "sources": [ "DBLP" ], "title": "ReFlex: Remote Flash \u2248 Local Flash", "venue": "ASPLOS", "year": 2017 }, "0b87b0ec9db31afc0b92e5c4fc12c3e1e3467d9c": { "authors": [ { "ids": [ "31682870" ], "name": "Abhiram Balasubramanian" }, { "ids": [ "32878777" ], "name": "Marek S. Baranowski" }, { "ids": [ "4698435" ], "name": "Anton Burtsev" }, { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "2050160" ], "name": "Zvonimir Rakamaric" }, { "ids": [ "3150866" ], "name": "Leonid Ryzhyk" } ], "doi": "10.1145/3139645.3139660", "doiUrl": "https://doi.org/10.1145/3139645.3139660", "entities": [ "Application checkpointing", "DBpedia", "Data-flow analysis", "Fault detection and isolation", "Garbage collection (computer science)", "Information flow (information theory)", "Overhead (computing)", "Programming language", "Rust", "Sandbox (computer security)", "Substructural type system", "System programming", "System programming language", "Systems theory", "Type system", "Zero-copy" ], "id": "0b87b0ec9db31afc0b92e5c4fc12c3e1e3467d9c", "inCitations": [ "e41440cff90683629228b308a94e48c7af11ca36", "5d5190477e22977b3c286558bc5fe3a27ab375d3", "16090ea0f2aabd3d890e2eafaf461c39a872b766" ], "journalName": "Operating Systems Review", "journalPages": "94-99", "journalVolume": "51", "outCitations": [ "67cf1189c859d66bac309f9438df434fb651f97a", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "0df445ca53975d93f27c9def03e964d3113a4607", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "2fd3879781c80076c0401d094a3899de1aebf92f", "17650831f1900b849fd1914d02337e1d006aea0c", "2bf4940710deb2571e93b1c922e8e7452e854afd", "0804ed47a40fbe6deb5ce93efe551086695ae393", "7277301fdfd711bcb556d7823c2f7d548e490f2f", "8188d1381f8c77f7df0117fd0dab1919693c1295", "0065c8c9bf4961d637a69e26a8045074929a8cd3", "6756d3e0669430fa6e006754aecb46084818d6b6", "067c7857753e21e7317b556c86e30be60aa7cac0", "cba77292e7f1f271fff1bd28238728f4f18dd13e", "33b85ea9b4fb28ac893167c29529d62d355c06a5", "4728bda27d89d524f0751ef0dddb5da0bffe0826", "820eb6eb399964af8f00bf2227ec07762dbaf840", "1dbb22145ca2c9d5e9c811619d709ecf7b062859", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "06567663b31f7b8cf1de3d5f2ca6c79422ef60c8", "161e0dde59d8046e6e19e234431199036afd060b", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "0478cf7e8f0262aad69c2c375bd151fc0e8bbf1e", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "2b3772ad2481650cabd39fac50f8bc99ec4709f8", "9f2233c08bb2457ab0c6491d136a6e7897648393", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "a287b3191f8688e15fa840fe1723397bedfdf41e", "9117c75f62162b0bcf8e1ab91b7e25e0acc919a8", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71", "99f7e02ae90cd27cc410c2f8ba8b2cf2f9f0278a", "34ea8ffd27f9bbb7fef494cfca41bed5f356be56", "0db9636ace0830b8b5e86b031a7a86d621446bd9" ], "paperAbstract": "Rust is a new system programming language that offers a practical and safe alternative to C. Rust is unique in that it enforces safety without runtime overhead, most importantly, without the overhead of garbage collection. While zero-cost safety is remarkable on its own, we argue that the superpowers of Rust go beyond safety. In particular, Rust's linear type system enables capabilities that cannot be implemented efficiently in traditional languages, both safe and unsafe, and that dramatically improve security and reliability of system software. We show three examples of such capabilities: zero-copy software fault isolation, efficient static information flow analysis, and automatic checkpointing. While these capabilities have been in the spotlight of systems research for a long time, their practical use is hindered by high cost and complexity. We argue that with the adoption of Rust these mechanisms will become commoditized.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103006", "http://www.ics.uci.edu/~aburtsev/doc/crust-hotos17.pdf", "http://doi.acm.org/10.1145/3139645.3139660" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b87b0ec9db31afc0b92e5c4fc12c3e1e3467d9c", "sources": [ "DBLP" ], "title": "System Programming in Rust: Beyond Safety", "venue": "HotOS", "year": 2017 }, "0b978f224b8520c8e3d9b2eb55431262fcb16c05": { "authors": [ { "ids": [ "27746441" ], "name": "Mat\u00fas Nemec" }, { "ids": [ "2456438" ], "name": "Marek S\u00fds" }, { "ids": [ "1804733" ], "name": "Petr Svenda" }, { "ids": [ "2720291" ], "name": "Dusan Klinec" }, { "ids": [ "1805863" ], "name": "Vashek Matyas" } ], "doi": "10.1145/3133956.3133969", "doiUrl": "https://doi.org/10.1145/3133956.3133969", "entities": [ "Authentication", "Average-case complexity", "Best, worst and average case", "Central processing unit", "Coppersmith's attack", "Cryptography", "Digital identity", "Entropy (information theory)", "FIPS 140", "FIPS 140-2", "Fingerprint", "Key generation", "Laptop", "Modulus robot", "Parallel algorithm", "Pretty Good Privacy", "Public key fingerprint", "Random number generation", "Trusted Computing", "Trusted Platform Module" ], "id": "0b978f224b8520c8e3d9b2eb55431262fcb16c05", "inCitations": [ "e2295305261c65fc85a9aec00a662259a183eb49" ], "journalName": "", "journalPages": "1631-1648", "journalVolume": "", "outCitations": [ "1f527fed31971e07093695c128c10b4f3c20d109", "664c10bdb3f0239b81ea34005eb3436e86d87565", "1e9eb8716d46839b62dc0171ba8b7d020de8f97d", "b6695347a10114b2697081446c7c83c2a421e33b", "b4f88871e64da5e7832b42e47b02621b7d7392db", "462afd7cc36165b3dd3d4605f74cbdb31edd7262", "8e82b861f1ad68d7bb55a5e0938cf5c93d3355b5", "d21f261bf5a9d7333337031a3fa206eaf0c6082c", "2dbcc7077a01981679007eceac6c6659a1c18200", "c04139f61c9a820dd9e59dd90f685a95810005bc", "80c886fd6bcdd4605b22a42da8b21bc864ca357c", "e3e49df9da4ee17c8ee37e7104177331bb14af99", "3c338bb3dcc10b7c840b4dbf3ad32e8256313ee3", "0e36d086e2f1d8b65423ef42c15a161347d1f5d9", "8f0a271fd78ab1ee1ff1e84b62d26869ee469df1", "0fb069a3b79552f0976476dde14931ab864242e4", "4f93999383e7fdcfb0de8423789c98389aef8aef", "61f18efffce84bf387ee5e431049b5e587a6ebbf", "1f27e4b81d694baa47aa104b66ed63387b4e3fea", "f066706e868e21e78c49ace256368a7672e80ec5", "576bb017fceb5ed3ad2e96c0e9fc78ac2fcb47e0", "70fce7bd4a0c23d742687c84ba4d1ec203742d2e", "47579074866f0199ce3a7993f6e5609a2c4c2000", "899a4fdc048102471875e24f7fecb3fb8998d754", "82c874949964d858457666de3f2d38a9781645c8", "ccfafaf2ccf0e3607829d449b8cf3c444e85405d", "1b9fa65c6dc9c18171437a61cdd92f5b63a47738", "519567268db1f5b9875bc70c9f43bf2e25d1f499", "c246547bef08f6364e31ceef4907d0047157f52c", "043a2dc8bddc2af1b03b320c1b9aef1f7ca01568", "3142eb86ee0fddedb344b94302a33197b92976d0", "7316e4252800c86157c4e4cd16a8110341c97348", "95a032938444fe3e1ff6989ca9a026bf02a12ec7", "85fd685efb93cfc880b9b72977feed3a026176be", "7c89fecabdb0ff4b5aae56149ea39520b781637e", "582302da008255ff515f05c3242f750878725745", "1f4d827da9b19e83b4fe750d04be54ded9c3a21c", "d0c6841e9cf40ea3714b35eb3b709bc46ce050b0" ], "paperAbstract": "We report on our discovery of an algorithmic flaw in the construction of primes for RSA key generation in a widely-used library of a major manufacturer of cryptographic hardware. The primes generated by the library suffer from a significant loss of entropy. We propose a practical factorization method for various key lengths including 1024 and 2048 bits. Our method requires no additional information except for the value of the public modulus and does not depend on a weak or a faulty random number generator. We devised an extension of Coppersmith's factorization attack utilizing an alternative form of the primes in question. The library in question is found in NIST FIPS 140-2 and CC~EAL~5+ certified devices used for a wide range of real-world applications, including identity cards, passports, Trusted Platform Modules, PGP and tokens for authentication or software signing. As the relevant library code was introduced in 2012 at the latest (and probably earlier), the impacted devices are now widespread. Tens of thousands of such keys were directly identified, many with significant impacts, especially for electronic identity documents, software signing, Trusted Computing and PGP. We estimate the number of affected devices to be in the order of at least tens of millions.\n The worst cases for the factorization of 1024 and 2048-bit keys are less than 3 CPU-months and 100 CPU-years on single core of common recent CPUs, respectively, while the expected time is half of that of the worst case. The attack can be parallelized on multiple CPUs. Worse still, all susceptible keys contain a strong fingerprint that is verifiable in microseconds on an ordinary laptop -- meaning that all vulnerable keys can be quickly identified, even in very large datasets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133969", "https://crocs.fi.muni.cz/_media/public/papers/nemec_roca_ccs17_preprint.pdf", "https://acmccs.github.io/papers/p1631-nemecA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0b978f224b8520c8e3d9b2eb55431262fcb16c05", "sources": [ "DBLP" ], "title": "The Return of Coppersmith's Attack: Practical Factorization of Widely Used RSA Moduli", "venue": "CCS", "year": 2017 }, "0ba344ff884030607fef6281e304e5bfe2be0e4f": { "authors": [ { "ids": [ "2730174" ], "name": "Syeda Khairunnesa Samantha" }, { "ids": [ "2909676" ], "name": "Hoan Anh Nguyen" }, { "ids": [ "2327697" ], "name": "Tien N. Nguyen" }, { "ids": [ "2241193" ], "name": "Hridesh Rajan" } ], "doi": "10.1145/3133907", "doiUrl": "https://doi.org/10.1145/3133907", "entities": [ "Application programming interface", "Computer programming", "Control flow", "Curiously recurring template pattern", "Library", "Library (computing)", "Multi-level cell", "Postcondition", "Precondition", "Programmer", "Software development", "Source lines of code", "Sparse matrix", "Text corpus" ], "id": "0ba344ff884030607fef6281e304e5bfe2be0e4f", "inCitations": [], "journalName": "PACMPL", "journalPages": "83:1-83:29", "journalVolume": "1", "outCitations": [ "3b22b7fb09fc50cb3c0d0fc422531e3f3d4c8a3f", "79bc52f283abeb8052e12e2c2f6cde317be4cec3", "0f2ef9e0555a582fdee13d4a3c219f13a2bbae3f", "6c8914a2920ba5f7a346b4742fea0633293f7420", "507a9155493acfe213fdd22a01ddaa9d50d26895", "14b750a0fd5a13f7494e4abf9b97718ff558f508", "a123f759ee93b9065758b9361d6a8ed9d6f01ed8", "dc8beee4bcab9ff7810e7077dd30e5a12156525a", "13a94e9847ceb7c55d38bd6567a6252f23caa406", "53c96fead0dc9307809c57e428d60665483ada9a", "2681cbdd353046557d5cb84616013bcbf054515b", "31a55ab4fb0f5b83a1b2f9c85f9590c24ab37ce3", "8ac6be0e3ea62e9819d5a25da645f2d350474693", "89dccd66067fc13356acd0c1ff503209be47c3c6", "5fbf739032dd548c1ff189e7333f05e215906a1b", "6052fd0845bb74b00c594d2edccf533d91adeaeb", "1c6dcc84016679fe36f5664662bc6b49169900af", "33aeeab47f547da8ff4b95c2328826a8d92f260a", "27c628db1275435a03e939220679417668406367", "79b3cdbca83235716e3e790e4de7c30f1a50d734", "34ff0423915c4513a2997e5b91d8043445223bf8", "c0eb24448d41a7fa8fb944e8367b9edd215b27f3", "1005d20be543e69311f34046352353410a8ae211", "1b80bbe6491d0b5ca6efc286317eb5d6fd618f9a", "194ab7a0e0cc7a31577603d30ebe6b2cb1f279bd", "691d93d4ecb1a7f1d8a24204a052531aefd2fd7f", "25fc0dd6b910e9025a076c56a985648eec42c4ae", "d8e338c18e425cba83644ba03a516079c3630e5e", "25a7c0a270d165eef3401d71a6d484bf26a9d364", "6d9cf1c4dbb9669ed57a52ee6e8f6e4f3e180a19", "67943b9ea82c304c728cbd16c2daeb6e54170405", "3e36696fc62604d6e29dea19fbbc9f576106ac80", "2d24755319f645f15ea69ddc2fa17e25d20f4833", "25bf72da526c7b04c3e4c3adca5fc82d99dd6960", "aae8e02007c9940e86d2d764918450cdf44c3f9d", "194322a0cc42f90f34c44b8e627b1d2cd94dcca4", "72e5294d5e3ce8155bfb1bb94b0bd8c3632025e8", "3e0a38f02fc1003ac996d030846aa6fdc4f9ab00", "3c542446426831e1e36a3a3271873f18d1346bda" ], "paperAbstract": "Frameworks and libraries provide application programming interfaces (APIs) that serve as building blocks in modern software development. As APIs present the opportunity of increased productivity, it also calls for correct use to avoid buggy code. The usage-based specification mining technique has shown great promise in solving this problem through a data-driven approach. These techniques leverage the use of the API in large corpora to understand the recurring usages of the APIs and infer behavioral specifications (preconditions and postconditions) from such usages. A challenge for such technique is thus inference in the presence of insufficient usages, in terms of both frequency and richness. We refer to this as a \"sparse usage problem.\" This paper presents the first technique to solve the sparse usage problem in usage-based precondition mining. Our key insight is to leverage implicit beliefs to overcome sparse usage. An implicit belief (IB) is the knowledge implicitly derived from the fact about the code. An IB about a program is known implicitly to a programmer via the language's constructs and semantics, and thus not explicitly written or specified in the code. The technical underpinnings of our new precondition mining approach include a technique to analyze the data and control flow in the program leading to API calls to infer preconditions that are implicitly present in the code corpus, a catalog of 35 code elements in total that can be used to derive implicit beliefs from a program, and empirical evaluation of all of these ideas. We have analyzed over 350 millions lines of code and 7 libraries that suffer from the sparse usage problem. Our approach realizes 6 implicit beliefs and we have observed that adding single-level context sensitivity can further improve the result of usage based precondition mining. The result shows that we achieve overall 60% in precision and 69% in recall and the accuracy is relatively improved by 32% in precision and 78% in recall compared to base usage-based mining approach for these libraries.", "pdfUrls": [ "http://design.cs.iastate.edu/papers/OOPSLA-17/implicit_belief.pdf", "http://doi.acm.org/10.1145/3133907" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ba344ff884030607fef6281e304e5bfe2be0e4f", "sources": [ "DBLP" ], "title": "Exploiting implicit beliefs to resolve sparse usage problem in usage-based specification mining", "venue": "PACMPL", "year": 2017 }, "0bd77e0753180016a35491a181c2b59ec2a143e8": { "authors": [ { "ids": [ "3271200" ], "name": "Benjamin Greschbach" }, { "ids": [ "1791678" ], "name": "Tobias Pulls" }, { "ids": [ "3469156" ], "name": "Laura M. Roberts" }, { "ids": [ "39700643" ], "name": "Phillip Winter" }, { "ids": [ "1800154" ], "name": "Nick Feamster" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Client (computing)", "Correlation attack", "Denial-of-service attack", "Relay", "Resolver One", "TRAVERSE", "Tor Messenger", "Traceroute" ], "id": "0bd77e0753180016a35491a181c2b59ec2a143e8", "inCitations": [ "e2309bfc222190aa066342f654b5bc7457503924" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1609.08187", "outCitations": [ "4c775488cbb7557b1e20d43d3fb7c6c286eeb7d2", "357af3dd66a8ee994f17c890422fda1b618586d3", "067c6f27a67976a7e67f567e5568996d6ca14962", "133e0e83dc6877c6d417431e875cd57876153893", "0ae31e412826cf1dfb45c85b14df33f1863b5011", "096a2026fa46abd43143e8ae3dc0ce6414310cf9", "356e0eece5bbd700ea3c388af8ea3e088baf7c6e", "0662c6e20750068e03711b950ee4b730f61ef342", "31f530f8d0ecb37d2d376313bc4d0dd371bc34de", "2452c0b6563b95c743e70b91782af73f4aba6826", "108747579aef6bf029623639a86070feaf5cad41", "395867fa1708ad9ef3572ab9be0b34b203707be3", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "2d2ff1db0079fc0a47a37d41be43c0c9a435e4bb", "02d0a3def2294d3d9c1ac73465b3c88bf9c61b2e", "0f1a01fde1ac0507b48c369c7f4f837bb3c628f6", "566333376dd1af014555b4cf82cda42c22501013", "f218f284294d893fe9f15bdad5cb98c10887fc97", "2b5e90bc36ded540baf24cdc8a5bb8e66032bc08", "3830d1be16e45466147598acaf4ce65628046758", "2e4b61ca5ff7af8743e4365edeb40cd87df15c5a", "1708eba3482a2bc755f405ef9446914f82a321ad", "1cd929e6cd9c62697de1a78102600d9accbb3a0e", "3977096a0c292e3af2f408927fd095db5a9df9e4", "7b102e4c57feaa1cb802b58f0f9ff2c934a82db7", "69a3527f4e2d301536cbe28e02d3789bcdc66c11", "0be8170df4c1ea1cf8312ae5ed326665224d5d9c", "2633619177fcb13211008c6f8b952933afc01cde", "18b1c62d6c7fa0e619f0c13172d8852b3d5a71fe", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "058ada9d78c689e03ea898008e2bcebf69910d4f", "0469044c24ac4897fcffc3c7db6a2d9c7fd08848", "4458751fda28db4b489b5626e2e9cc965f3c379a" ], "paperAbstract": "Previous attacks that link the sender and receiver of traffic in the Tor network (\u201ccorrelation attacks\u201d) have generally relied on analyzing traffic from TCP connections. The TCP connections of a typical client application, however, are often accompanied by DNS requests and responses. This additional traffic presents more opportunities for correlation attacks. This paper quantifies how DNS traffic can make Tor users more vulnerable to correlation attacks. We investigate how incorporating DNS traffic can make existing correlation attacks more powerful and how DNS lookups can leak information to third parties about anonymous communication. We (i) develop a method to identify the DNS resolvers of Tor exit relays; (ii) develop a new set of correlation attacks (DefecTor attacks) that incorporate DNS traffic to improve precision; (iii) analyze the Internet-scale effects of these new attacks on Tor users; and (iv) develop improved methods to evaluate correlation attacks. First, we find that there exist adversaries who can mount DefecTor attacks: for example, Google\u2019s DNS resolver observes almost 40% of all DNS requests exiting the Tor network. We also find that DNS requests often traverse ASes that the corresponding TCP connections do not transit, enabling additional ASes to gain information about Tor users\u2019 traffic. We then show that an adversary who can mount a DefecTor attack can often determine the website that a Tor user is visiting with perfect precision, particularly for less popular websites where the set of DNS names associated with that website may be unique to the site. We also use the Tor Path Simulator (TorPS) in combination with traceroute data from vantage points co-located with Tor exit relays to estimate the power of AS-level adversaries who might mount DefecTor attacks in practice.", "pdfUrls": [ "http://www.cs.princeton.edu/~pwinter/pdf/greschbach2016a.pdf", "https://www.freehaven.net/anonbib/cache/dnstor-ndss2017.pdf", "https://www.cs.princeton.edu/~pwinter/pdf/greschbach2016a.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_06B-2_Greschbach_paper.pdf", "http://arxiv.org/pdf/1609.08187v1.pdf", "http://arxiv.org/abs/1609.08187", "https://www.cs.princeton.edu/~pwinter/pdf/greschbach2016a-slides.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/e-effect-dns-tors-anonymity/", "https://arxiv.org/pdf/1609.08187v1.pdf", "https://arxiv.org/pdf/1609.08187v2.pdf", "http://www.cs.princeton.edu/~pwinter/pdf/greschbach2016a-slides.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/95e6/037b826c6e55c9231e20c8114c02b80c3897.pdf", "s2Url": "https://semanticscholar.org/paper/0bd77e0753180016a35491a181c2b59ec2a143e8", "sources": [ "DBLP" ], "title": "The Effect of DNS on Tor's Anonymity", "venue": "NDSS", "year": 2017 }, "0c02f8b36629b43e3825ab856b0a09e188e7355a": { "authors": [ { "ids": [ "1734704" ], "name": "Jingjing Wang" }, { "ids": [ "1718134" ], "name": "Magdalena Balazinska" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Failure rate", "Garbage collection (computer science)", "Java", "Memory management", "Scheduling (computing)", "Static variable", "Virtual machine" ], "id": "0c02f8b36629b43e3825ab856b0a09e188e7355a", "inCitations": [], "journalName": "", "journalPages": "745-758", "journalVolume": "", "outCitations": [ "33e1561a3c9c76fd20c1b6bfd73036769e2d7f67", "16faadbd3b7b51fb0b8a9ce88e2971344f641773", "33244b59fe506331926b3a33e348209ac456c532", "1a16c05b5e002bcb117d892fe4101d58ad8ac6c9", "67f49884d9418bdf4e68796ab4c77be951835e67", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "75cfd8ba815e41c475e0b89999352775d1759bcd", "18e14796f2da19e33dee2e455d18bf48600ef9a8", "5dd36ed50668b5cc1c95ce6cf83b1b9b21a5f560", "04f70629736c61ff2b658826df71e68c2c955b5e", "6d8c9fcce8177d6f8d122d653c7d32d7624d6714", "1d27d04e8cef4d32cb4e022c9f493a40a019f59f", "5bcc56aade8f79f7c1a33c5cae234150a8d2336b", "8f72d4446c9c840d0d7fc8bd538132db228727a8", "415ce8daaf2a0f6fb9cd5ed7884ae721cbcd38a5", "205ab133e78fa6f212fa185a5b57c7c0788657f6", "9117c75f62162b0bcf8e1ab91b7e25e0acc919a8", "a5d2c3d057029233704017ccbf9fb7f5d165c522", "0558c94a094158ecd64f0d5014d3d9668054fb97", "3d7fffd53c3d1ae8902cc27529a4f8f1d0f89872", "52d81096f46be0e75f85e0b7eeda65640c281630", "7a75c886b043e7c3f77829412774de27648f384a", "1d905342c3d78dd3236863382ae7bae0482d3055", "ead572634c6f7253bf187a3e9a7dc87ae2e34258", "042fdf16f6b9dd28b282dfceee5e8278ab7b6289", "72f91b486b8b867e5825d82db1cca5a5172f376b", "5fe3090971c9fb42ca0bdb67141040017d9a3f8e", "3431f528cff68be8ec874b8b06cc17f87861f2b5", "4352716287822b04a001f94e470ab42cc49b7b49", "e7d556643722cdf7e31c9e73f004b7fef17a024a", "00e6e8a8dc5c4100584af8175e24616a8a5efab1", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "0608d9937c074520cdc93cc444cc1c77039c5332", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "68f6f7aca213a3f14075491f3daaf805c5a5b741", "78f1801d45179d31e20bacaa9eeca96d7494a654", "beaaf1cad62e5b3b9a6692935902ee2b3004d203", "cc1cabae3da49e4710f3f7e7b0f9cac778ebad4a", "0b9c6fe7beb3971b27aff8c5aa5e133de74316a4", "bf82f0b0cf448b18fec979d25368c6cd9c04ce0c", "888764f05a60d770cfc0b49944308fd92ed45ee5", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0" ], "paperAbstract": "We develop an approach for the automatic and elastic management of memory in shared clusters executing data analytics applications. Our approach, called ElasticMem, comprises a technique for dynamically changing memory limits in Java virtual machines, models to predict memory usage and garbage collection cost, and a scheduling algorithm that dynamically reallocates memory between applications. Experiments with our prototype implementation show that our approach outperforms static memory allocation leading to fewer query failures when memory is scarce, up to 80% lower garbage collection overheads, and up to 30% lower query times when memory is abundant.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-wang.pdf", "http://homes.cs.washington.edu/~magda/papers/wang-usenix17.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/wang", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_wang.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0c02/f8b36629b43e3825ab856b0a09e188e7355a.pdf", "s2Url": "https://semanticscholar.org/paper/0c02f8b36629b43e3825ab856b0a09e188e7355a", "sources": [ "DBLP" ], "title": "Elastic Memory Management for Cloud Data Analytics", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0c03c044f10c2d165468afeb0cb5718953c315ac": { "authors": [ { "ids": [ "1750058" ], "name": "Jiho Choi" }, { "ids": [ "2130036" ], "name": "Thomas Shull" }, { "ids": [ "3106639" ], "name": "Mar\u00eda Jes\u00fas Garzar\u00e1n" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" } ], "doi": "10.1145/3079856.3080237", "doiUrl": "https://doi.org/10.1145/3079856.3080237", "entities": [ "Baseline (configuration management)", "Browser speed test", "Compiler", "Goto", "JavaScript", "Just-in-time compilation", "Keyboard shortcut", "Optimizing compiler", "Overhead (computing)", "Program optimization", "Programmer", "Run time (program lifecycle phase)", "SGI Octane", "Scripting language", "Simulation" ], "id": "0c03c044f10c2d165468afeb0cb5718953c315ac", "inCitations": [ "0343ae9ab99d0cbd719baf0d2cc1b82425f3664a", "ce992c5be70243c83a5faaeea3f314ebd36302a9" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "494-506", "journalVolume": "", "outCitations": [ "4f59431e4c4516f89c2ece1ca7edbcd9fa745427", "6b2866fe86c31a6ce23446c6daeafb09a5ba0243", "6454f7d955f7eb577971875d0b7dfeb8630cc4ff", "a11f3714fed5bbf595f8045efdd1ec2dc71dfb8d", "a3f3d0f41d0f914f0a7edaccb3d80cc69388cb59", "6abd933f15bb853aebd129e313474865b53a5fed", "99e4d7f26140f2b31b440882e1684600a62b042c", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "18bd7fc45fb824e7efa0b062e1e8f58c3d64cd02", "4ebdde61b162d46d4ed7fb5646052f37833f7ae2", "0653e2ed9f683868cb4539eb8718551242834f6b", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "b86cdf445376b282d7b9f5fd149ef2138e19e2bd", "21daa3125d02b839cf4cae9ab7ab6d2e0346ee4f", "bc42584c1d74f96d2e03dfcc487af642527a62fe", "43fc4a33f6401e5eb1c8727caac681019c04ac28", "004428345b7977c032174ecf7fbac72fee7af718", "2f4002755b309cdb91e18116b8028005497d8400", "5284651b0025be9de74064ba52b9e245aa022d62", "58b00f733f75f0dd4fa5236263b5e1a64c5161d7" ], "paperAbstract": "The same flexibility that makes dynamic scripting languages appealing to programmers is also the primary cause of their low performance. To access objects of potentially different types, the compiler creates a dispatcher with a series of if statements, each performing a comparison to a type and a jump to a handler. This induces major overhead in instructions executed and branches mispredicted.\n This paper proposes architectural support to significantly improve the efficiency of accesses to objects. The idea is to modify the instruction that calls the dispatcher so that, under most conditions, it skips most of the branches and instructions needed to reach the correct handler, and sometimes even the execution of the handler itself. Our novel architecture, called ShortCut, performs two levels of optimization. Its Plain design transforms the call to the dispatcher into a call to the correct handler --- bypassing the whole dispatcher execution. Its Aggressive design transforms the call to the dispatcher into a simple load or store --- bypassing the execution of both dispatcher and handler. We implement the ShortCut software in the state-of-the-art Google V8 JIT compiler, and the ShortCut hardware in a simulator. We evaluate ShortCut with the Octane and SunSpider JavaScript application suites. Plain ShortCut reduces the average execution time of the applications by 30% running under the baseline compiler, and by 11% running under the maximum level of compiler optimization. Aggressive ShortCut performs only slightly better.", "pdfUrls": [ "http://iacoma.cs.uiuc.edu/iacoma-papers/isca17_1.pdf", "http://doi.acm.org/10.1145/3079856.3080237", "http://iacoma.cs.uiuc.edu/iacoma-papers/PRES/present_isca17_1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c03c044f10c2d165468afeb0cb5718953c315ac", "sources": [ "DBLP" ], "title": "ShortCut: Architectural support for fast object access in scripting languages", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0c09107ebcab25a5410cb00fa3458f7344d77f3b": { "authors": [ { "ids": [ "1868315" ], "name": "X. Fu" }, { "ids": [ "23599789" ], "name": "M. A. Rol" }, { "ids": [ "6965178" ], "name": "C. C. Bultink" }, { "ids": [ "3201515" ], "name": "J. van Someren" }, { "ids": [ "3007719" ], "name": "Nader Khammassi" }, { "ids": [ "2628316" ], "name": "Imran Ashraf" }, { "ids": [ "19320414" ], "name": "R. F. L. Vermeulen" }, { "ids": [ "21516547" ], "name": "J. C. de Sterke" }, { "ids": [ "2101054" ], "name": "W. J. Vlothuizen" }, { "ids": [ "34796948" ], "name": "R. N. Schouten" }, { "ids": [ "3203810" ], "name": "Carmen G. Almud\u00e9ver" }, { "ids": [ "7265530" ], "name": "L. DiCarlo" }, { "ids": [ "1737836" ], "name": "Koen Bertels" } ], "doi": "10.1145/3123939.3123952", "doiUrl": "https://doi.org/10.1145/3123939.3123952", "entities": [ "Algorithm", "Central processing unit", "Code word", "Compiler", "Computer", "Computer engineering", "High- and low-level", "High-level programming language", "Integer factorization", "Microarchitecture", "Microcode", "Programming language", "Quantum", "Quantum algorithm", "Quantum computing", "Quantum mechanics", "Quantum system", "Qubit", "Transmon" ], "id": "0c09107ebcab25a5410cb00fa3458f7344d77f3b", "inCitations": [], "journalName": "", "journalPages": "813-825", "journalVolume": "", "outCitations": [ "5a371e1abdb57746411f3ddd456de262f612be11", "88331df302fa2b13d6f1dc99ada50d0003b8c404", "f7099b1a06deeea35531889fad74796eaee8dadb", "3253fb3dcfc20293906fa476f0788874d8c46d73", "2e546ff4977a87adddf32dff6532479f742f3817", "1de3978475aa0ba00ef1436d77e86a7f8c82e50c", "f7f752acd1042bf6098f8ce8e90f4dd331564e4a", "51ab7eef98f029a28330e2536938a5d494222d25", "792bd6f4832235e28b2eaef20f80f29f35c35aa8", "6ee8092d37e6ef1d982c695d95e0ae5fda0f5849", "02d90ab465536454e1cb7f325da8bf23569391d0", "e450796c2ccbcdb47b39bc594b3d0efdfe7f1537", "3cecc6af8cd0266a57c810461163932b9ddbfed0", "009fa33b58f7f3be361c987f3f86ecaf506907cc", "0c79078c46dd7424f77470d34b2708abfc03bccd", "150b07cc949afdf4be5774bf1b26c7c9b1b24366", "a6b75e7cbbff92124ad5a255e44b670990ce9e77", "c20e0489fd3a4c3ac9ee1f769a08a7fbc99fb9f0", "13c2de3fd55e54c674186583a9e48b745d5e3070", "3d36daeb53998c4c6b79addf6d39ad3b0d09b0a6", "02dc8507c3b52953851d93393c96863aec2970ff", "2c7abc2165c507ed8ccdaeb997180e5b9a3824ab", "3888c49c7e1f12d89cb330e3ddf01a80bbc15696", "d460ee23f222613b5a0e895b1cfc24c47a7d881b", "c2616dfb07d070eadf87324ab1fad456d305a12e", "ddd6258a1781179fabeca3d81ad645ab883d303a", "3d90cd3625a85431ece7589a261cd2ce0cba742e", "250d7f66ba77ba540f92db9120a4ea97ac7e1189", "2b0b0f969fdbd968e0dfa7130fbe916f6afc9afd", "067c4edcc88b700c3d17f5e8d86f257a8da1daee", "2987bcb8bcc16bf7e5506ad6b2919f14c1dcf6ef", "472025e00666282f9cd00581a09040b0e03ecfc6", "a160322cea30e47b9ac95f36e13c9d180e94be84", "b4ac17c649bc8dc4653ce6c114d4eeae3d6ed66f", "67bf4efa1b9b13c87e44adebd46ac8750120e527", "26b7c4232872cc2327029b5354a41fde703f8e02", "7fdb7a726c4d051f7c448cc43ddd320e3318222f", "84e0ff378c8018546144ef877c8e4939ef18ddf4", "6322979bfee2012343a93df514dff2430ba0182e", "140046dea646060d23eda0c483d64d97f1f5bf52", "064bc7761f83cd79458330320d87a1279cb35167", "eaa5af67af88aa155ed4ef3a76a716303ecd332d" ], "paperAbstract": "Quantum computers promise to solve certain problems that are intractable for classical computers, such as factoring large numbers and simulating quantum systems. To date, research in quantum computer engineering has focused primarily at opposite ends of the required system stack: devising high-level programming languages and compilers to describe and optimize quantum algorithms, and building reliable low-level quantum hardware. Relatively little attention has been given to using the compiler output to fully control the operations on experimental quantum processors. Bridging this gap, we propose and build a prototype of a flexible control microarchitecture supporting quantum-classical mixed code for a superconducting quantum processor. The microarchitecture is based on three core elements: (i) a codeword-based event control scheme, (ii) queue-based precise event timing control, and (iii) a flexible multilevel instruction decoding mechanism for control. We design a set of quantum microinstructions that allows flexible control of quantum operations with precise timing. We demonstrate the microarchitecture and microinstruction set by performing a standard gate-characterization experiment on a transmon qubit.", "pdfUrls": [ "https://arxiv.org/pdf/1708.07677v1.pdf", "http://arxiv.org/abs/1708.07677", "http://doi.acm.org/10.1145/3123939.3123952", "http://pure.tudelft.nl/ws/files/34684227/p813_fu.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c09107ebcab25a5410cb00fa3458f7344d77f3b", "sources": [ "DBLP" ], "title": "An experimental microarchitecture for a superconducting quantum processor", "venue": "MICRO", "year": 2017 }, "0c0994b08b790dc467b892d538321f9dbd10a3c2": { "authors": [ { "ids": [ "6152731" ], "name": "Wenhao Wang" }, { "ids": [ "2932786" ], "name": "Guoxing Chen" }, { "ids": [ "2625573" ], "name": "Xiaorui Pan" }, { "ids": [ "39939156" ], "name": "Yinqian Zhang" }, { "ids": [ "34989133" ], "name": "XiaoFeng Wang" }, { "ids": [ "3094927" ], "name": "Vincent Bindschaedler" }, { "ids": [ "40145425" ], "name": "Haixu Tang" }, { "ids": [ "1785347" ], "name": "Carl A. Gunter" } ], "doi": "10.1145/3133956.3134038", "doiUrl": "https://doi.org/10.1145/3133956.3134038", "entities": [ "Adversary (cryptography)", "DBpedia", "Denial-of-service attack", "Differential fault analysis", "Dynamic random-access memory", "Key (cryptography)", "Leaky bucket", "Memory management", "Page fault", "Secure channel", "Side-channel attack", "Threat model", "Translation lookaside buffer" ], "id": "0c0994b08b790dc467b892d538321f9dbd10a3c2", "inCitations": [ "33ae35cc24ef4303979b479671c2065256e1b3a7", "6f8fe3cbacb8436615e886b6188e2e62fd1a5b3c", "ccfc6c76716e0a48cd5ccb5202b48017cb951604", "32657d005f4501fecefbab7276f6aad32afb766c", "38a54f9bbbfc46599770a28999365144a273783f", "680fa994443080d43e7452f1137b339d5f74cc03", "ed84133ca8ef37a273d4b187202f55c6618b953e", "6db9824d4667b22310c51fe638403238f873e9f2", "b3f2a11d45757e675be123d55ec0eb192bcca990", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "2a7056e53f29bc73471048a77b0c55ea4e92b897", "2359529a1f95c62a9432753f7b8028590f32e51c", "e41440cff90683629228b308a94e48c7af11ca36" ], "journalName": "", "journalPages": "2421-2434", "journalVolume": "", "outCitations": [ "4ff758a11c8062cf4e0f98cbd62429caf78297f5", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "77a1532cb64eab28162a0277cde52b4b7eceda49", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "1d26d405ddc1c72e3ffd76506a1286071ad67197", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "907e4972815c0fcd484d335a9c3fd4cccc9a081e", "8372016fe38121358163c20f88e28fc0267e30b1", "135d6f50f43dc278d20026352f0051ac368ce315", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "027c0969d21de0d52af6c8c7e8d63f12245382ae", "87d49f253a0e623e0255afa06d63e9b5a9fb09d0", "1114ef6ef315a23755740545ee46c5af0cf1e02c", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "d296252ddf0e2c6b7422008d703843c1863bd15b", "1368d8dc507419a9999b2469da5c10befc47834f", "0b23e4be50e710dd9d339fc64f025ba89cc002d3", "1b033fb2c56f09bf91b5134b3a1cff1eb0d1a7ba", "3ca5880e4fe23ec2ee8025ff6c121ebb5348c6fc", "3aa7fa1563467801db724b046df439dc33de2407", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "4d624b942a58818f8d425460638cb4b65ed84e1c", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "b56bec459de1a4875520775b46979c226cbbeb9c", "70fb3cea8335aefdf849597e9d9dd7512d722d88", "cef9aef9b73c94eacc55670f3aa8f70329cd4bc6", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "0b70652541cb408152c468eaea7b114dc65beab1", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "52c2c050af5b32d4929b4b193967a3675d03aea0", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "0ea83243e4b53c5e82b878fe71f92bcc56d36dd5", "94f43879426de678696dd3616fd1559d13c8ad78", "1f932f0d49a4c56d9718e8506d6177c6a6848831" ], "paperAbstract": "Side-channel risks of Intel SGX have recently attracted great attention. Under the spotlight is the newly discovered page-fault attack, in which an OS-level adversary induces page faults to observe the page-level access patterns of a protected process running in an SGX enclave. With almost all proposed defense focusing on this attack, little is known about whether such efforts indeed raise the bar for the adversary, whether a simple variation of the attack renders all protection ineffective, not to mention an in-depth understanding of other attack surfaces in the SGX system. In the paper, we report the first step toward systematic analyses of side-channel threats that SGX faces, focusing on the risks associated with its memory management. Our research identifies 8 potential attack vectors, ranging from TLB to DRAM modules. More importantly, we highlight the common misunderstandings about SGX memory side channels, demonstrating that high frequent AEXs can be avoided when recovering EdDSA secret key through a new page channel and fine-grained monitoring of enclave programs (at the level of 64B) can be done through combining both cache and cross-enclave DRAM channels. Our findings reveal the gap between the ongoing security research on SGX and its side-channel weaknesses, redefine the side-channel threat model for secure enclaves, and can provoke a discussion on when to use such a system and how to use it securely.", "pdfUrls": [ "https://export.arxiv.org/pdf/1705.07289", "http://doi.acm.org/10.1145/3133956.3134038", "https://www.informatics.indiana.edu/xw7/papers/p2421-wang.pdf", "https://arxiv.org/pdf/1705.07289v2.pdf", "https://arxiv.org/pdf/1705.07289v1.pdf", "http://arxiv.org/abs/1705.07289" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c0994b08b790dc467b892d538321f9dbd10a3c2", "sources": [ "DBLP" ], "title": "Leaky Cauldron on the Dark Land: Understanding Memory Side-Channel Hazards in SGX", "venue": "CCS", "year": 2017 }, "0c2670b22448bfc1d1d537619d9041b6db7a45cb": { "authors": [ { "ids": [ "35640685" ], "name": "Naveen Kr. Sharma" }, { "ids": [ "6164629" ], "name": "Antoine Kaufmann" }, { "ids": [ "1748580" ], "name": "Thomas E. Anderson" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" }, { "ids": [ "2240839" ], "name": "Jacob Nelson" }, { "ids": [ "2126015" ], "name": "Simon Peter" } ], "doi": "", "doiUrl": "", "entities": [ "Approximation", "Communications protocol", "Computation", "Load balancing (computing)", "Network congestion", "Network switch", "Packet switching", "Routing" ], "id": "0c2670b22448bfc1d1d537619d9041b6db7a45cb", "inCitations": [ "39010bdc7ed8d67cc5f1b7868a74885f87d1bcfe", "b5e0b49ae23980a58eb1aec555bb1a1cfba64ca3", "571a253f7c5ed3517657ce8a49c25f0ebccc3d79", "a0081a90edd415be337fa851ddbf1485bbeea6c6", "046b41fae099cf2e5c459da615cb401cc9bd3246", "176d9770d290a878a7d5efbfcfe37cb401749375", "43f610fcb2becae60483f1e2366c19b953780dfa", "0d573d5f27504e51727b8c1f2be2f206e6a9cc18" ], "journalName": "", "journalPages": "67-82", "journalVolume": "", "outCitations": [ "194786ec31e5b515b073e1ddded8ea8e9454eb7e", "84e0660e922da41223b9723bef60f5350a98d427", "1aafc7066e52f18dee78103822da24a5d85da93c", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "ec2d30a6be8ee4c9927fc59f702c9b5ff4206de9", "00afe47832bc165dcc3744e33550252d997286c3", "7a278ee0578f194700cadc3811cdda4ec751f88a", "1447be3d899115a834874e585256360911036a4d", "094aca6103f4079521e6a596d099ed37f7d2b498", "3b988049dd8f62f772281e90196bbd793700c86b", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "11040f24714857941c569df70b21c4c8655e074a", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "b3493a8ec43df1861c5d7037ce57bcff7d343dfc", "4b69324981ac45543fd60bd2e2cf487a76ed57ff", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "034b937edbff280dfdd7b2e98639655fd3587402", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "1d1af146f23af1b1bb39b1f750c6e10b35f62b97", "544b0ba4ae011fe26c3f207a7c6f9d6de04468ae", "238dd4c308c1ee6ef3809fdf15fdc87be74bdbc8", "14c84514d25336223473290fe7c13ad66a68ef64", "5b999d36d5230eca01532b357c7cf338a5e0d641", "2e4ab1140b454fc6dacf4d23d3663aa34c741577", "025652412d507a8cf98ecacd8a44d32ce28995e1", "2986f9db238c57b638d54248c4ed1fcb5e4f459f", "61aa09bc1a3eae17480645c90b06a18cbd62d9af" ], "paperAbstract": "Recent hardware switch architectures make it feasible to perform flexible packet processing inside the network. This allows operators to configure switches to parse and process custom packet headers using flexible match+action tables in order to exercise control over how packets are processed and routed. However, flexible switches have limited state, support limited types of operations, and limit per-packet computation in order to be able to operate at line rate. Our work addresses these limitations by providing a set of general building blocks that mask these limitations using approximation techniques and thereby enabling the implementation of realistic network protocols. In particular, we use these building blocks to tackle the network resource allocation problem within datacenters and realize approximate variants of congestion control and load balancing protocols, such as XCP, RCP, and CONGA, that require explicit support from the network. Our evaluations show that these approximations are accurate and that they do not exceed the hardware resource limits associated with these flexible switches. We demonstrate their feasibility by implementing RCP with the production Cavium CNX880xx switch. This implementation provides significantly faster and lowervariance flow completion times compared with TCP.", "pdfUrls": [ "https://homes.cs.washington.edu/~antoinek/documents/17nsdi_flexswitch.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-sharma.pdf", "http://homes.cs.washington.edu/~arvind/papers/flexswitch.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/sharma", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-sharma.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_sharma.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_sharma.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/eb4a/6bd071b9a43d3aae85f215896d77e80b8bb9.pdf", "s2Url": "https://semanticscholar.org/paper/0c2670b22448bfc1d1d537619d9041b6db7a45cb", "sources": [ "DBLP" ], "title": "Evaluating the Power of Flexible Packet Processing for Network Resource Allocation", "venue": "NSDI", "year": 2017 }, "0c2cbc336b113a81933dce0e45736316d647eabe": { "authors": [ { "ids": [ "1808458" ], "name": "Shai Halevi" }, { "ids": [ "1719765" ], "name": "Tzipora Halevi" }, { "ids": [ "1741494" ], "name": "Victor Shoup" }, { "ids": [ "1788681" ], "name": "Noah Stephens-Davidowitz" } ], "doi": "10.1145/3133956.3133976", "doiUrl": "https://doi.org/10.1145/3133956.3133976", "entities": [ "Algorithm", "Binary decision diagram", "Cryptography", "Finite-state machine", "Lattice-based cryptography", "Nibble", "Obfuscation (software)", "Product bundling", "Sampling (signal processing)" ], "id": "0c2cbc336b113a81933dce0e45736316d647eabe", "inCitations": [ "3ec89e757f3485d44cd594428e4128a049b63950", "aa9d095fbc98cbc2c451bb4ed27b4d17f77b85f6", "06a4a18ff3f3f2604e72ee926a223d513aeffec0", "2ab193c6177900cb1b9b294bdc16bb782bd10838", "7880a1226a6530178d8b9b96b0e0d4d7dd1cb5bc", "a1a4af9e2d0f115a84a23887b8bb63537ce91bc7", "e1fc98f7072ef3b469bcd74cc6a8484a50a5c9b6", "7557e87f01563f1c37b771d6bce82ae69fa27343" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "104", "journalVolume": "2017", "outCitations": [ "7821ac7391c398aa94af65e3be9d01ee7cbfb1c2", "7f6b3bed8ffc51cf61f0499b4c82fc07ee895f93", "bd201871f22d48bbd85ba3c1ca4b51feb1b96358", "8f8cf4a3f24c22ba1f9e6a810ceb8b183c328e06", "4a94ad7d52370542090be52da30cc819fb403d47", "020ff6532a58ee426217cf4e75ed64af4658bd6a", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "2f7c97e82641e4f0f8c7b508b75af4952b0cc07d", "8c9f8dbb6af53f850f43db29f81197b1ed1ea93a", "0003c342fd0b3e48a483901bd3b731b974fc1f37", "0d209716908bfb494848f69a34264490e491ea22", "0821852bfff889566392e7adc19817488d11860b", "0677d17466f47dc8ef5fb89221ff3007c6196c33", "212fe709b564f26c63875c2621cd543149108cf7", "8a37efc82e54353d387cfb073f9379c053988aef", "05ef2ed1c6fcdcaa07a45a0ba924f08b0060a171", "34e8182107338c8d6d16fa3d7d5d3abfb3472754" ], "paperAbstract": "We implemented (a simplified version of) the branching-program obfuscator due to Gentry et al. (GGH15), which is itself a variation of the first obfuscation candidate by Garg et al. (GGHRSW13). To keep within the realm of feasibility, we had to give up on some aspects of the construction, specifically the \"multiplicative bundling\" factors that protect against mixed-input attacks. Hence our implementation can only support read-once branching programs.\n To be able to handle anything more than just toy problems, we developed a host of algorithmic and code-level optimizations. These include new variants of discrete Gaussian sampler and lattice trapdoor sampler, efficient matrix-manipulation routines, and many tradeoffs. We expect that these optimizations will find other uses in lattice-based cryptography beyond just obfuscation.\n Our implementation is the first obfuscation attempt using the GGH15 graded encoding scheme, offering performance advantages over other graded encoding methods when obfuscating finite-state machines with many states. In out most demanding setting, we were able to obfuscate programs with input length of 20 nibbles (80 bits) and over 100 states, which seems out of reach for prior implementations. Although further optimizations are surely possible, we do not expect any implementation of current schemes to be able to handle much larger parameters.", "pdfUrls": [ "http://eprint.iacr.org/2017/104", "http://doi.acm.org/10.1145/3133956.3133976", "http://www.shoup.net/papers/obfus.pdf", "https://eprint.iacr.org/2017/104.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c2cbc336b113a81933dce0e45736316d647eabe", "sources": [ "DBLP" ], "title": "Implementing BP-Obfuscation Using Graph-Induced Encoding", "venue": "CCS", "year": 2017 }, "0c3b1da050089cf8c701fa0cb4ddc18566d715e6": { "authors": [ { "ids": [ "1802824" ], "name": "David Dice" } ], "doi": "10.1145/3064176.3064203", "doiUrl": "https://doi.org/10.1145/3064176.3064203", "entities": [ "Best, worst and average case", "Cache (computing)", "Central processing unit", "Concurrency (computer science)", "Fairness measure", "Lock (computer science)", "Manifest (transportation)", "Memory management", "Paging", "Pipeline (computing)", "Resultant", "Scalability", "Test-and-set", "Thread (computing)", "Throughput" ], "id": "0c3b1da050089cf8c701fa0cb4ddc18566d715e6", "inCitations": [ "376f2db9939d49811b32090c5ed03deb3bbadd0f", "117025a430aaa984dd260bea97531da221b634a4" ], "journalName": "", "journalPages": "314-327", "journalVolume": "", "outCitations": [ "d15abf417614b88be323577ce6695501bffd3eef", "8291ac440fb905ed9406ae8ff4d753635fa59a8d", "1d07b7d897cfa631aa67f482af9514b1306803c9", "2840a0ad64a0474cd93c19c065bbe405706510fd", "003190fa9fa36f4e1ce44383e515d0db1316d38c", "475212b08f58461e2468a0af1a247763275cadc1", "2819b4f9ccae4ba8b9b7b9cf6b81081c41d4adc4", "ab8d97380703ae1d1c061e4ff3758ca23648c536", "7257c62ba8e82f074e0082e1e0b2d4cf50dd2544", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "cbef17a9a8c612424a42b83ebbea80ff1416c8f7", "6994d3b13e540784cfeca050b9713478a55aa864", "a3021aba46ea0b09bac5a6f9f1e5449b13da9c05", "2bc3d228716c8992fe5b65ac5953ba71dc05359b", "32e3df95dfca9d8e5a5b2af6cf291a38dc1f6c8e", "cf08496286767749c30b0a2b7a9ecced0faf6378", "bdb01ae36c185ef9a04e0562c29fc65471ea1363", "703aa01d432d0979780c2a6102bfb64f70ab1bcf", "1f1ca237ca06a05c36d1fa1877593bcf508258a9", "67cd6d7da59f4326b2fcb7aaada3f27c42e20743", "21d83b9fffef935fae7e473c44e804e801687faf", "1a08d9112464b1c1db949c948e33f556ceb537c8", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "6c96a74a1785843dead22eb00764f787bf4bfd92", "13875088254a585cd0b050f3bc27c1af9ada690f", "09ad3377c3e24320420b465e6ec70c23da47dd4d", "38628d26d4f624378f4303b61ae93c5d34d007c3", "34d33c19d0e893415b570ebdeea993db5b7af509", "13f50b4c784d037c43c7bb6ef5391ad8009c8863", "f10108a28ccca7df20518b20ce204aa74a35350f", "abf1157c2043274a8d580151db1d4ef5be2c892e", "f786e89ccbeb6c09a9521421d188f3ee18f7c9f8", "8b4682a90b39d0b95d92098be48f05687cb23086", "25c4dcffc6bc69b0885587aff9acb9f2dd949c07", "b6d87ec4b6990f0b797a5f41ff970c6b520cf79a", "6e0685417130e68b705dd63809a78ef6d5f05974", "1381b7f3aebb605ebf00b7ff9158fae8d55b4e6e", "5b3a9980f3512cb0ceb63093479beadeceddc2be", "9bd0f0527d8d2f751c622ec14388017224f4810b", "cf9f8ffb32fb47f9c1e6581c8fc32f92f22a4451", "2c968749e04fc16908fc543e4468f945a5c695da", "3e77a77247734dc918a5723573e1158eee1955f9", "390e82956e4c065df5a0474d62096a1e13e873b2", "60a45695845e3f1e5dd8d7a886b23fff89c295bc", "2f925c9b58c384d80e1febfc646900d584dcf515", "57cf29529977cc5407497aba2f9032e01a12c1a9", "32ad6f8b3afa3dd02cbfca0f1a1ba3f6af07f0ff", "afc4931dd371130c3d4c6d6dbfda881140847af1", "67330fa1b6ca0adc5b59f81aa63a65831aa2d15a", "1d4a3452d5246bf23938969ac5f76adbb1aa4303", "0254e7809ea94c30adedd5e853bdd0014b6521c9", "068820fc1c1807cb7482f9d0b8e87389eedb636a", "b895182cbadcd488cf9e64eb2f3f9b399efc73fe", "c8567e36908340a707734783fb58c1cd56b422c7", "18bdea3282d4f22c75b3d2677d7f546af8fd73ac" ], "paperAbstract": "Applications running in modern multithreaded environments are sometimes overthreaded. The excess threads do not improve performance, and in fact may act to degrade performance via scalability collapse, which can manifest even when there are fewer ready threads than available cores. Often, such software also has highly contended locks. We leverage the existence of such locks by modifying the lock admission policy so as to intentionally limit the number of distinct threads circulating over the lock in a given period. Specifically, if there are more threads circulating than are necessary to keep the lock saturated (continuously held), our approach will selectively cull and passivate some of those excess threads. We borrow the concept of swapping from the field of memory management and impose concurrency restriction (CR) if a lock suffers from contention. The resultant admission order is unfair over the short term but we explicitly provide long-term fairness by periodically shifting threads between the set of passivated threads and those actively circulating. Our approach is palliative, but is often effective at avoiding or reducing scalability collapse, and in the worst case does no harm. Specifically, throughput is either unaffected or improved, and unfairness is bounded, relative to common test-and-set locks which allow unbounded bypass and starvation1. By reducing competition for shared resources, such as pipelines, processors and caches, concurrency restriction may also reduce overall resource consumption and improve the overall load carrying capacity of a system.", "pdfUrls": [ "http://arxiv.org/abs/1511.06035", "http://arxiv.org/pdf/1511.06035v2.pdf", "https://arxiv.org/pdf/1511.06035v4.pdf", "https://arxiv.org/pdf/1511.06035v1.pdf", "https://arxiv.org/pdf/1511.06035v5.pdf", "https://arxiv.org/pdf/1511.06035v3.pdf", "https://arxiv.org/pdf/1511.06035v7.pdf", "http://arxiv.org/pdf/1511.06035v4.pdf", "https://arxiv.org/pdf/1511.06035v6.pdf", "http://arxiv.org/pdf/1511.06035v3.pdf", "http://arxiv.org/pdf/1511.06035v1.pdf", "http://doi.acm.org/10.1145/3064176.3064203", "https://arxiv.org/pdf/1511.06035v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c3b1da050089cf8c701fa0cb4ddc18566d715e6", "sources": [ "DBLP" ], "title": "Malthusian Locks", "venue": "EuroSys", "year": 2017 }, "0c556f5a05f857d13ea7f56cb9ebb78166a0454b": { "authors": [ { "ids": [ "8857102" ], "name": "Janis Kalofolias" }, { "ids": [ "1717148" ], "name": "Mario Boley" }, { "ids": [ "3183025" ], "name": "Jilles Vreeken" } ], "doi": "10.1109/ICDM.2017.29", "doiUrl": "https://doi.org/10.1109/ICDM.2017.29", "entities": [ "Algorithm", "Branch and bound", "Data mining", "Loss function", "Optimization problem", "Population" ], "id": "0c556f5a05f857d13ea7f56cb9ebb78166a0454b", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "197-206", "journalVolume": "", "outCitations": [ "33cb597dde84766b1cdd6f35c964d83c9a718b39", "182fd6e5a3fe4b0470f88547340c1c2774ca2b4e", "3db7e560344a67805bfe602b9aa7a3007317aec0", "831ecd69b4970902ea3b2fc00a27192e3a8638a5", "9e1bd7894db71b9b6352b089aec6f6154b01ab8c", "00b0dedeb3947d670492ac2f93ac00d641836649", "d70d05a4357a909cb1fb20dcbec2c2aee31b0028", "8c18aac3a5207bb98ce80d106f449128fdc6cd55", "9610111f3029bc79b47e92d4f8746b2d44c7406d", "2a2ec5ba621090cc081fdcaf6c1b29041d3fa50d", "3d0a325d82bb116070c6753eb5899eb44d0fca47", "bbbbaadcf3045d318868dfa55de60591d149e115", "295680676a3f985db7679ce3a4ef4d01e64c36cf", "5afd7ec160d9093d19147e5ec5085efda3d3e857", "4556f3f9463166aa3e27b2bec798c0ca7316bd65", "3b69d693c62919015c8fadb0957a636fb35e89df", "439cb4aca522b408e8d63567fc9fdf510ca0548c", "23b1080a395a0d714e48a884857de72939ac9f3c", "4cd2517e7fb42979bdfe6e5e3cacc929626c8c36", "1cea485bbaaf2465a971220d0072c1743a2feaec", "09f6d750eb89da1ca03c324b2ff54da59dc54cd2", "9e8b2d42ab824d23724aa2f957bdd33ef3f12e64", "827c08769370f95a8c13cde2b04238f48581c947", "779a8f97ae5720c0ad351ca29944283a0c2a99ad" ], "paperAbstract": "Subgroup discovery is a local pattern mining technique to find interpretable descriptions of sub-populations that stand out on a given target variable. That is, these sub-populations are exceptional with regard to the global distribution. In this paper we argue that in many applications, such as scientific discovery, subgroups are only useful if they are additionally representative of the global distribution with regard to a control variable. That is, when the distribution of this control variable is the same, or almost the same, as over the whole data. We formalise this objective function and give an efficient algorithm to compute its tight optimistic estimator for the case of a numeric target and a binary control variable. This enables us to use the branch-and-bound framework to efficiently discover the top-k subgroups that are both exceptional as well as representative. Experimental evaluation on a wide range of datasets shows that with this algorithm we discover meaningful representative patterns and are up to orders of magnitude faster in terms of node evaluations as well as time.", "pdfUrls": [ "http://arxiv.org/abs/1709.07941", "https://arxiv.org/pdf/1709.07941v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.29", "http://pubman.mpdl.mpg.de/pubman/item/escidoc:2488423:1/component/escidoc:2488422/arXiv:1709.07941.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c556f5a05f857d13ea7f56cb9ebb78166a0454b", "sources": [ "DBLP" ], "title": "Efficiently Discovering Locally Exceptional Yet Globally Representative Subgroups", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "0c56c62813329bbf616f21eca675f7df796d025b": { "authors": [ { "ids": [ "3341167" ], "name": "Yulong Ao" }, { "ids": [ "39408447" ], "name": "Chao Yang" }, { "ids": [ "3303339" ], "name": "Xinliang Wang" }, { "ids": [ "1712301" ], "name": "Wei Xue" }, { "ids": [ "1711877" ], "name": "Haohuan Fu" }, { "ids": [ "1702887" ], "name": "Fangfang Liu" }, { "ids": [ "38512584" ], "name": "Lin Gan" }, { "ids": [ "1724368" ], "name": "Ping Xu" }, { "ids": [ "33955807" ], "name": "Wenjing Ma" } ], "doi": "10.1109/IPDPS.2017.9", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.9", "entities": [ "Algorithm", "Computation", "Double-precision floating-point format", "Experiment", "FLOPS", "Full scale", "IBM WebSphere eXtreme Scale", "Inter-process communication", "Locality of reference", "Manycore processor", "Memory bound function", "Parallel computing", "Program optimization", "Scalability", "Simulation", "Speedup", "Stencil (numerical analysis)", "Stration", "Sunway", "Sunway TaihuLight", "Supercomputer", "Throughput" ], "id": "0c56c62813329bbf616f21eca675f7df796d025b", "inCitations": [ "e45dea6588d1de0a23618e019031e67eedeeee26" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "535-544", "journalVolume": "", "outCitations": [ "b76269bf962989ce271bef7ea863ff4adf9c9de6", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "3db50ba6e67fe4df04352c4203dfeb86690158a5", "0ebb8ef3ef660ea8484202e74e2e3df7b3c59cc6", "34b44a9e55184b48c94a15f29f052941b342e8bf", "0692f43523ebd6394a4ee76e3224f3c01cc2c4eb", "9c7da99a0a046dcd1b736b19bf694137dec5e1bc", "de3679256c7a30fbeb461086fceaf763e209d9fc", "57f635f67fa7a1f742bb1c1f1da3e400c954440f", "def34f422d6930bd23d5c58de78be98804e44e97", "fe7bd2137955540edc81e84c5051ae32daf1703d", "0382955dcc73511c3ae9b5327e0213272a1b4152", "7c458085fababbaafcbb96c86e0b6482556b841f", "1deeb53a514d9a54ad690626c5199bb0d117f9a2", "14a477cf712ad5647180e6233dd0638c6c269fdd", "eedb46c68a9c71ccb38de3933e5f7e1dd9a789c7", "092217c2267f6e0673590aa151d811e579ff7760", "3230131b14559a11c8ee9ab9beccf725dfb437de", "7b69e7c3dd0ede0eacb2c42c82559367c8f194d4", "27d2ac18ef4504df1460460c9711e69d166cc11e", "273d591af0bdcbefe37d7dd9150e2f612ca7121d", "2d21ca41ebdfb2b4e7a145e36dc8321386627e94", "db9a6214180872b8f85c08947da4cd653f2e481e", "11f2a5d947f5899d6060009462feb6888a07fb1c", "791370da29ba96d355c2fad1ecd06b8e709f8755", "408e61c117816833cdd807b5d8c9258f1c2022ab", "d40be7ee92f0281425a482f9837132fa5e34bfe1", "1ac8bf57669da9afce7a19d9a09dca0d6d4a9784", "17c44c4654cc53e792c3aafe3b01df9829fe8e90", "0b47e159ed9a3e5db1adc135620e7526d93abd87", "009e4da527a3518c29c95970efb79733a67979fb", "b8932adc9d9a80de33f891c3e94277b01d100c97" ], "paperAbstract": "Stencil computation arises from a broad set of scientific and engineering applications and often plays a critical role in the performance of extreme-scale simulations. Due to the memory bound nature, it is a challenging task to opti- mize stencil computation kernels on modern supercomputers with relatively high computing throughput whilst relatively low data-moving capability. This work serves as a demon- stration on the details of the algorithms, implementations and optimizations of a real-world stencil computation in 3D nonhydrostatic atmospheric modeling on the newly announced Sunway TaihuLight supercomputer. At the algorithm level, we present a computation-communication overlapping technique to reduce the inter-process communication overhead, a locality- aware blocking method to fully exploit on-chip parallelism with enhanced data locality, and a collaborative data accessing scheme for sharing data among different threads. In addition, a variety of effective hardware specific implementation and optimization strategies on both the process- and thread-level, from the fine-grained data management to the data layout transformation, are developed to further improve the per- formance. Our experiments demonstrate that a single-process many-core speedup of as high as 170x can be achieved by using the proposed algorithm and optimization strategies. The code scales well to millions of cores in terms of strong scalability. And for the weak-scaling tests, the code can scale in a nearly ideal way to the full system scale of more than 10 million cores, sustaining 25.96 PFLOPS in double precision, which is 20% of the peak performance.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c56c62813329bbf616f21eca675f7df796d025b", "sources": [ "DBLP" ], "title": "26 PFLOPS Stencil Computations for Atmospheric Modeling on Sunway TaihuLight", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "0c627783393b80ba4f9deaee0d87a743926e2df0": { "authors": [ { "ids": [ "1726907" ], "name": "Konstantin Avrachenkov" }, { "ids": [ "1776100" ], "name": "Jasper Goseling" }, { "ids": [ "1911706" ], "name": "Berksan Serbetci" } ], "doi": "10.1145/3084465", "doiUrl": "https://doi.org/10.1145/3084465", "entities": [ "Algorithm", "Cache (computing)", "Iteration", "Maxima and minima", "Nash equilibrium", "Numerical analysis", "Polynomial", "Simulated annealing", "Simulation", "Synthetic data" ], "id": "0c627783393b80ba4f9deaee0d87a743926e2df0", "inCitations": [], "journalName": "POMACS", "journalPages": "27:1-27:25", "journalVolume": "1", "outCitations": [ "381622b5ccd94b9d19debf0878038514d8919141", "2d3db65e12c332a8250265fdbd6055d69a98688e", "48517bd9a131f88eba941e34efe07866bf982c14", "01d669c7856ff7a951e88eb9f83bcc47d66475e3", "38d8d755e2d6ae927467f5f50c9fc1903eeeaf00", "dd1a4b2ca3ca5071e6016408a412306e8a82d7c4", "0b6e205fbbd179291c3ed198db6f403ca74624a1", "1f7cd6323a9befcbf427c48f5354afaa8c07e820", "4ce8bc485df9ac987f18d99c7af1d95f9cbea6b2", "d513cc6d469f3dcd858fde136a68fd4fea7b756c", "c5d954d13c1c620d78ebaba9afa120733e90ed09", "4ef750a115b108f233623e6da2ab17cd266d8c59", "0f6dc2439cd189f4a7bbd8d42649d718cd3e6456", "13a4ac8f6000812c712702c73683ffc1e3e7c577", "4e1ce62215f1fb989b80d324fae00b9f76ec2d34", "07bfa7253727961ea6f612624ba7f5290955dc9e", "df511a5d9d12bff681438e2dbe2ecef70268c9c9", "40a0cfc394048f51b7a13efa752020663cb448c9", "8651440f3f6e1e0d8a29564c0135673e9dd13829", "82b7e3aa8ed2dd5a7fa8f5563c2348a2c7d25004", "fa2a489c0f5d5cdc3e3792cc2883d858edea3a9f", "292c10dc62a35274d17a8893c6b7d6659cdde27e", "32e683a70c491d4a44cae0d1a3b7d66513e62019", "876dfc883acad11e785d0b226ce16c9814a01c87", "4cc296ed3762e8ca6c4629ceb2c3d69aaeba4c54", "589d034c3c2eaec53c18000df3c23d84b005eaf6", "06fc81197afdc225537785ee9a68d0bd06fc58e2", "50dadd75fcf38109b0668056380572f51a1418f0", "075321f87a446af752efa991acc30819377c7788", "78b4704ba12938c1c45aaccc5e3c8dc0297aeca6", "4fd524607a14043ede779ef8b65f1b6b795cf583", "1630de708ba9f90492e7e7554fcfa885cb0ba314", "03157e88a2ffbd94eaa8d4bab0f3234c92e46c19", "87083c3099e66bb30e4506baa586fefecc30bf74", "4ba8452582c2b4d79697e0baec18b3df78fc1487", "d3da4dca9e8901dd7ad56d47617b2aeb75f23081", "42064d006d7a0d59bb4b7ebbe0040318909ab67f", "200e53624f61afc1cf1bd4a0340e6b21ed9858f6", "b694374929ce829ee6e0545b0d17ccc56a3b6fbd" ], "paperAbstract": "We consider caching in cellular networks in which each base station is equipped with a cache that can store a limited number of files. The popularity of the files is known and the goal is to place files in the caches such that the probability that a user at an arbitrary location in the plane will find the file that she requires in one of the covering caches is maximized.\n We develop distributed asynchronous algorithms for deciding which contents to store in which cache. Such cooperative algorithms require communication only between caches with overlapping coverage areas and can operate in asynchronous manner. The development of the algorithms is principally based on an observation that the problem can be viewed as a potential game. Our basic algorithm is derived from the best response dynamics. We demonstrate that the complexity of each best response step is independent of the number of files, linear in the cache capacity and linear in the maximum number of base stations that cover a certain area. Then, we show that the overall algorithm complexity for a discrete cache placement is polynomial in both network size and catalog size. In practical examples, the algorithm converges in just a few iterations. Also, in most cases of interest, the basic algorithm finds the best Nash equilibrium corresponding to the global optimum. We provide two extensions of our basic algorithm based on stochastic and deterministic simulated annealing which find the global optimum.\n Finally, we demonstrate the hit probability evolution on real and synthetic networks numerically and show that our distributed caching algorithm performs significantly better than storing the most popular content, probabilistic content placement policy and Multi-LRU caching policies.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084465", "https://arxiv.org/pdf/1704.04465v2.pdf", "http://arxiv.org/abs/1704.04465", "https://arxiv.org/pdf/1704.04465v3.pdf", "https://arxiv.org/pdf/1704.04465v1.pdf", "https://arxiv.org/pdf/1704.04465v4.pdf", "http://doi.acm.org/10.1145/3078505.3078534" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c627783393b80ba4f9deaee0d87a743926e2df0", "sources": [ "DBLP" ], "title": "A Low-Complexity Approach to Distributed Cooperative Caching with Geographic Constraints", "venue": "SIGMETRICS", "year": 2017 }, "0c6f81e60514edbc6a936a5f8593838f14658653": { "authors": [ { "ids": [ "1870110" ], "name": "Arun Subramaniyan" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" } ], "doi": "10.1145/3079856.3080207", "doiUrl": "https://doi.org/10.1145/3079856.3080207", "entities": [ "Alphabet (formal languages)", "Automata theory", "Automaton", "Central processing unit", "Computation", "Connected component (graph theory)", "Context switch", "Data breach", "Deterministic finite automaton", "Finite-state machine", "Graphics processing unit", "Intrusion detection system", "Limiter", "Malware", "Motif", "Multi-core processor", "Non-deterministic Turing machine", "Nondeterministic finite automaton", "Parallel computing", "Speedup", "Time complexity", "Von Neumann architecture", "Xeon Phi" ], "id": "0c6f81e60514edbc6a936a5f8593838f14658653", "inCitations": [ "620eac63b314969fa88a0e3be1d3d682e5266f3d", "00cc482570d739e7b733f45b6f8f1836b24056bd" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "600-612", "journalVolume": "", "outCitations": [ "164bb40bac988ed0b90fe44366cd98c307e57b4b", "38adc6ce214ad89ad6a0c47b489608a0fbeedaaf", "90a72bdeac51650e2711ffd785a152c89c97dc4b", "5e2a75065a3e80a04d4aa9cc0987d1d51232961b", "0c621bbd45b8a5e0cdac61ac960bc7cc2bdaa0c5", "9fac7fd52435a4f4658308d75d833d337bc90aa6", "41338a3f79028c0a24f7786d6e9a01ce8d2e90a4", "2baee3037fa953e69b4b96045b5a3053bd191841", "3547ac839d02f6efe3f6f76a8289738a22528442", "1291dc27b5e569bfeae7c9d114eed350b31cb8b7", "947ab3927c41622a8bdafe3327158cd16eba21fd", "27b1d02ab9edf212682fdfc7f8478aab471e6183", "b21f51f327678430a218766d917051bee08d8eb8", "64b6ef88dcb71ca50e2b3b63330da9726f982503", "984273d01be0d66506ccf7d6bd5d260dfe8d9f30", "0f384b45ee96f84a20783fbbe0c11c942ba1073b", "6408891eea6af794dadd16f35485bc1c73473adc", "f6699544f79aaac77821bd4f6e6907d3abca5372", "289431393a4d2db657bb2d5109f60602b26013a2", "3a6fc87533e77528cc6d48aa7b0bdd74412c5095", "8d0bb67313c489aa90116c0c7df367a6ce46616d", "93c25da1b96dba6a83defeb05ebd5bd3c66feb87", "330b46ce848047b13fadc7a63c01abfe02fd4d8b", "20cee308639acf53d090ff7c7d639eb64fdca8ad", "2272e0efd155fb0862f251e168fd47e1ec9363ad", "3b6711bd158a375267999ac095b8c1a76d9dc464", "716b3455c4df7b8cfaade6801adf4e8538279ebd", "149ad380837451a3903dafbb13f6de3815547852", "3710d16919bf3a0bc7c3c5647d377ab449964ff9", "1b68aa68c70af87fc3b712ff7a4a9aa289bf23bf", "3f8a4239c14e7210d356e23503f6418e687b1b86", "44da8226d8aedc3e57432c33fe1a2795eb6442e5", "80527e7595530951081494d1b98f3f13da3033a2" ], "paperAbstract": "Finite State Machines (FSM) are widely used computation models for many application domains. These embarrassingly sequential applications with irregular memory access patterns perform poorly on conventional von-Neumann architectures. The Micron Automata Processor (AP) is an in-situ memory-based computational architecture that accelerates non-deterministic finite automata (NFA) processing in hardware. However, each FSM on the AP is processed sequentially, limiting potential speedups.\n In this paper, we explore the FSM parallelization problem in the context of the AP. Extending classical parallelization techniques to NFAs executing on AP is non-trivial because of high state-transition tracking overheads and exponential computation complexity. We present the associated challenges and propose solutions that leverage both the unique properties of the NFAs (connected components, input symbol ranges, convergence, common parent states) and unique features in the AP (support for simultaneous transitions, low-overhead flow switching, state vector cache) to realize parallel NFA execution on the AP.\n We evaluate our techniques against several important benchmarks including NFAs used for network intrusion detection, malware detection, text processing, protein motif searching, DNA sequencing, and data analytics. Our proposed parallelization scheme demonstrates significant speedup (25.5x on average) compared to sequential execution on AP. Prior work has already shown that sequential execution on AP is at least an order of magnitude better than GPUs, multi-core processors and Xeon Phi accelerator.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080207" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c6f81e60514edbc6a936a5f8593838f14658653", "sources": [ "DBLP" ], "title": "Parallel automata processor", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0c862eb2eb56d0a28cd6f4ac7684ca624b7a6169": { "authors": [ { "ids": [ "32129691" ], "name": "Djillali Boukhelef" }, { "ids": [ "3006590" ], "name": "Kamel Boukhalfa" }, { "ids": [ "2537219" ], "name": "Jalil Boukhobza" }, { "ids": [ "3142864" ], "name": "Hamza Ouarnoughi" }, { "ids": [ "31618580" ], "name": "Laurent Lemarchand" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud database", "Hard disk drive", "Heuristic", "Hoc (programming language)", "IP Multimedia Subsystem", "Optimization problem", "Program optimization", "Scalability", "Service-level agreement", "Solid-state drive" ], "id": "0c862eb2eb56d0a28cd6f4ac7684ca624b7a6169", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "659-664", "journalVolume": "", "outCitations": [ "63459ab18f191118e3c6c8e3ff825111b44ee84b", "5ee2fc47f1ed11c3c75e2620c5b93b1533784969", "542263ab157f526f6e7733ad7c600c0c813ab031", "f51674bb4796dc81d5ff9ee397b22b3aa0b8609a", "b261a1650567fd849f7efd71be56fc06a284527a", "3358850706a8ad2eb8489bb7790e8bbd3a5b6dba", "5e60215a881748f6c70c5be63b0589715e7c9af1", "5918fa52f47d9c9e06442c097f263caceac46e48", "b0fee4b84d4dbe2dfdd9d5e97fcaa703fba15212", "79fc8f5ac2e98842ab8b78d2d46b6e2714dc7ea3", "664172bce3d8f49742aede5dc160be9cc84892e7", "c71368751db5749977d0e211a02f19a72cce7f42", "1fb1f6818c6cfe0220b7ff702089c607b043f2b1", "119a5ad19a679db71b51b8793281652a00c3826d", "e8ba35c94e9e3768b8b28943c8f48de5c151d812", "7a886d2895fe7ddb2a1b88e22f90b181fb088f98", "620d7d2d07f75b2a7ab533eaae1f4aeb7c0cf7fa", "41ed0a8078727d3cf9009779b3eaddef7d872d53", "02e56828951df7fbc42853071af175aad103517d", "66f1b955cc92fa0726181e2450b0f55632d1fa56", "1b84952361a10515ebb75f7c987021223b48b8df", "1d5de7a7ed362ecd596ac9ed5b85bf19d5c08ef5", "6fa1a0602397319ec86817b65ca670d4ee5d5e63" ], "paperAbstract": "Solid State Drives (SSD) are integrated together with Hard Disk Drives (HDD) in Hybrid Storage Systems (HSS) for Cloud environment. When it comes to storing data, some placement strategies are used to find the best location (SSD or HDD). These strategies should minimize the cost of data placement while satisfying Service Level Objectives (SLO). This paper presents two Cost based Object Placement Strategies (COPS) for DBaaS objects in HSS: a Genetic based approach (G-COPS) and an ad-hoc Heuristic approach (H-COPS) based on incremental optimization. While G-COPS proved to be closer to the optimal solution in case of small instances, H-COPS showed a better scalability as it approached the exact solution even for large instances (by 10% in average). In addition, H-COPS showed small execution times (few seconds) even for large instances which makes it a good candidate to be used in runtime. Both H-COPS and G-COPS performed better than state-of-the-art solutions as they satisfied SLOs while reducing the overall cost by more than 40% for problems of small and large instances.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101203" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c862eb2eb56d0a28cd6f4ac7684ca624b7a6169", "sources": [ "DBLP" ], "title": "COPS: Cost Based Object Placement Strategies on Hybrid Storage System for DBaaS Cloud", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "0c9603eb7214879b8a999af8b043627172ea3619": { "authors": [ { "ids": [ "9273722" ], "name": "Animesh Chhotaray" }, { "ids": [ "3405521" ], "name": "Adib Nahiyan" }, { "ids": [ "8644086" ], "name": "Thomas Shrimpton" }, { "ids": [ "2925373" ], "name": "Domenic Forte" }, { "ids": [ "1741893" ], "name": "Mark Mohammad Tehranipoor" } ], "doi": "10.1145/3133956.3134040", "doiUrl": "https://doi.org/10.1145/3133956.3134040", "entities": [ "Black box", "Block cipher", "Cipher", "Ciphertext", "Cryptocurrency", "Cryptography", "Denial-of-service attack", "Encryption", "Key (cryptography)", "Plaintext", "Syntax error", "System on a chip", "Trojan horse (computing)" ], "id": "0c9603eb7214879b8a999af8b043627172ea3619", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "828", "journalVolume": "2017", "outCitations": [ "512e08451eb0d805c77b86e5821560f3b7dec565", "e45e88da9a0ed26a98cbd7b2ce77465e245a2b9f", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "143e8af9af7148ff5ef16bd7fda69975ffba15a7", "582302da008255ff515f05c3242f750878725745", "6d3753abbd8550d1e6864edc8b83c226b09685b5", "12ea59b3cf579882a52d9057fce3f9fc5703e387", "339a72b66db278bfffed48545344b75ac9730afd", "0c1a82f12c109d838d84c1cabdb9075b0dec1ce3", "718bb58e58163190b11d36a447e767b85c0b3708", "94206be34b27903edd8e6c35efde0820750c525c", "5447285a45d9833ce34d936068271f46509ab390", "7c473db8d503b5107292a98426e6c94fec857c65", "4d9d3df6f23afef5605ae45b46deb30ffe13894f", "691bb92ffd229e2cce7c42c1ede818915afa73ee", "129e9edc6afaf9f250a057d47c49526d82d7cfaa", "c496d22187548aae624315cda41cc14f5aa459c6", "3b194021b6dc691ab1867f93026b1516638b0b89", "5608eec85d4de48d7516e14adbbeb500b9ca5e5e", "7189c5e1aab8239a1016f1ec14df4af30047eb59", "b6f5251a67c5cf8539c0213c387a583cdcefd493", "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "178a7528c37be8f9975c71c046df60dece79085d", "46b2b9f10c52e83b57e60a224696296551f317ea", "a44d8ca755f1bb1feefb1777c625118fe99c854d", "2664f2413bf1e19388babbb8bc41c55d6b42ae28", "38b5e9528b2f978d1316ab2485c0e1803f4ec9b8", "442f60939cc7a5fd6e0e7470b49cd4f7cbae855a", "c7f88fd8161fdb03860cd5766d7f5165517afc5f" ], "paperAbstract": "We provide an analysis of IEEE standard P1735, which describes methods for encrypting electronic-design intellectual property (IP), as well as the management of access rights for such IP. We find a surprising number of cryptographic mistakes in the standard. In the most egregious cases, these mistakes enable attack vectors that allow us to recover the entire underlying plaintext IP. Some of these attack vectors are well-known, e.g. padding-oracle attacks. Others are new, and are made possible by the need to support the typical uses of the underlying IP; in particular, the need for commercial system-on-chip (SoC) tools to synthesize multiple pieces of IP into a fully specified chip design and to provide syntax errors. We exploit these mistakes in a variety of ways, leveraging a commercial SoC tool as a black-box oracle.\n In addition to being able to recover entire plaintext IP, we show how to produce standard-compliant ciphertexts of IP that have been modified to include targeted hardware Trojans. For example, IP that correctly implements the AES block cipher on all but one (arbitrary) plaintext that induces the block cipher to return the secret key. We outline a number of other attacks that the standard allows, including on the cryptographic mechanism for IP licensing. Unfortunately, we show that obvious \"quick fixes\" to the standard (and the tools that support it) do not stop all of our attacks. This suggests that the standard requires a significant overhaul, and that IP-authors using P1735 encryption should consider themselves at risk.", "pdfUrls": [ "https://eprint.iacr.org/2017/828.pdf", "http://doi.acm.org/10.1145/3133956.3134040", "http://eprint.iacr.org/2017/828" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0c9603eb7214879b8a999af8b043627172ea3619", "sources": [ "DBLP" ], "title": "Standardizing Bad Cryptographic Practice - A Teardown of the IEEE Standard for Protecting Electronic-design Intellectual Property", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "0c9fbe0ffae8c874fa826efa6ae3650a151afbff": { "authors": [ { "ids": [ "2442733" ], "name": "Mingkai Dong" }, { "ids": [ "1716528" ], "name": "Haibo Chen" } ], "doi": "", "doiUrl": "", "entities": [ "Atomicity (database systems)", "Byte", "Byte addressing", "CPU cache", "Correctness (computer science)", "Critical path method", "Data General Nova", "Data structure", "Linux", "Linux", "Memory bus", "Non-volatile memory", "POSIX", "Page cache", "Pointer (computer programming)", "Throughput", "Volatile memory" ], "id": "0c9fbe0ffae8c874fa826efa6ae3650a151afbff", "inCitations": [ "db57257e6b051e0f97d35209cc5aee0909cde1f1", "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d", "4994eb0dfa2d15d7b5013563d018e8c16b71b039" ], "journalName": "", "journalPages": "719-731", "journalVolume": "", "outCitations": [ "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "00eca22210d053222332434f4db54ef9a45ce7bc", "088e3e939ad234b6fdd0e321290fb26937dc2553", "23ee1c97c4a1229618bf6a614b02f33dc678fe6b", "05a1357946de5eca42a477b7b268db4944219a2e", "ba54471bd4c8684b5ae08a80e994a7b5f786e901", "400ae82ab2fc2c814033c65854229ecefbddbf67", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "24724ad8962a9e04eb496fddaefe9708f6960601", "0420266f84cc95d6b7a8100e601f67d1118d4965", "129f11028220d87525b37b4605a2c04eb26f3e73", "76791c6c0ee7e24d870515b897175ea5600d35b0", "2ce3726fa2bb2ae42880241dfa3baba50d29043c", "237158050b9498f441419a087cdd9aa9eeef283e", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "09c0d62190aedb53e820695ccbe98d90f877cc46", "57c823b3b07b98233394bf15cfbbaed6a84809df", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "47b78e7eb12859a141aed6a28a4e301eb0352629", "42512431ca7fffdbc80eb7280d093efcead3d48d", "265d18ced11e2e64d98afa97b0e86965e68101f7", "9183cde02e4306828089fb8adae74736a9df3ceb", "27611a1896feb8817eb9cebca344d9736916c3bb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "7e4ecfc13aba74db770378e640d5fbcce7fd3d2e", "14cb2d4f902544862076519d9e424d071612a15e", "0a6ac384b28e0c947bbd8763272b45fc01b67e21", "0204f40221260d00c5ee63646560a40dcd7d97d1", "5c06564087db9e53a72ef1eb5865696b0dddd8ca", "0623076ac6d3c109e180e776f5fb524dcb1bd9a9", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "765e6f4feeb1f7d59d2b3c011e2e38814a958afa", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2" ], "paperAbstract": "Fast, byte-addressable NVM promises near cache latency and near memory bus throughput for file system operations. However, unanticipated cache line eviction may lead to disordered metadata update and thus existing NVM file systems (NVMFS) use synchronous cache flushes to ensure consistency, which extends critical path latency. In this paper, we revisit soft updates, an intriguing idea that eliminates most synchronous metadata updates through delayed writes and dependency tracking, in the context of NVMFS. We show that on one hand byteaddressability of NVM significantly simplifies dependency tracking and enforcement by allowing better directory organization and closely matching the per-pointer dependency tracking of soft updates. On the other hand, per-cache-line failure atomicity of NVM cannot ensure the correctness of soft updates, which relies on block write atomicity; page cache, which is necessary for dual views in soft updates, becomes inefficient due to double writes and duplicated metadata. To guarantee the correctness and consistency without synchronous cache flushes and page cache, we propose pointer-based dual views, which shares most data structures but uses different pointers in different views, to allow delayed persistency and eliminate file system checking after a crash. In this way, our system, namely SoupFS1, significantly shortens the critical path latency by delaying almost all synchronous cache flushes. We have implemented SoupFS as a POSIX-compliant file system for Linux and evaluated it against state-of-the-art NVMFS like PMFS and NOVA. Performance results show that SoupFS can have notably lower latency and modestly higher throughput compared to existing NVMFS.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-dong.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final27.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/dong", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_dong.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0c9f/be0ffae8c874fa826efa6ae3650a151afbff.pdf", "s2Url": "https://semanticscholar.org/paper/0c9fbe0ffae8c874fa826efa6ae3650a151afbff", "sources": [ "DBLP" ], "title": "Soft Updates Made Simple and Fast on Non-volatile Memory", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0ca6a879ad1075869c96ac50b9cd7eaf15dfc666": { "authors": [ { "ids": [ "38516568" ], "name": "Pablo Fuentes" }, { "ids": [ "1879134" ], "name": "Enrique Vallejo" }, { "ids": [ "1762103" ], "name": "Ram\u00f3n Beivide" }, { "ids": [ "1794299" ], "name": "Cyriel Minkenberg" }, { "ids": [ "1741016" ], "name": "Mateo Valero" } ], "doi": "10.1109/IPDPS.2017.110", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.110", "entities": [ "Blocking (computing)", "Deadlock", "Lossless compression", "Network congestion", "Routing", "Simulation", "Stock and flow", "Throughput", "Virtual channel" ], "id": "0ca6a879ad1075869c96ac50b9cd7eaf15dfc666", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "842-854", "journalVolume": "", "outCitations": [ "a4e9124a08dcce799cd7b34655bb0d089f47b7df", "eeaf14c39d463ca11a6070450fe47bed91a0ae8f", "438584c2ea63887ad6b227ad5d6743aa8ab0b443", "b10ff7d0f23149d0b442a38964c737374c2fa4ff", "b1209f6ac85768f7fc4bd0159fa390200e3207d6", "1b27049bc69641a328a9b3a28f9b6bb8b9afd5f5", "943cf22e168a86fec0381ca380474c1da39e509c", "97900426468a277603579c1410134a2fef509cf3", "5f8991828def57d2f0cda942566afff56740d150", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "d488b45aaa4ceb4e994562199088c46fdb735925", "194a6fc5de629e4f55c00d0720b8279ac8b494de", "6e2ae987e8c0efc3bb92df4adfa728dc57f983f7", "854fc26b3fe9fb5d4e643be81df063d45415a993", "6f73820a6ef96c21ac6ae6f82d42b5b187b34138", "38bcb4ec26cf6755c4b2d0b74257ad17a4b99642", "8eb1c6d8d479192a22d3b2d3f351083a243e6011", "7a7158c463a87f7cd7a13a782645f15e13d649a1", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "8b0a16e9ab419a2096dbd55d5326607cbc385025", "1eeea239e84fa6901e74b6e8552ecec7dd800b11", "9c4b6c885bfc6038cdac56763663880e0f2624e6", "013a0623848119ad6082bc5f8893e4814ab07ea8", "565d6f6518e8b74b608b9e37d4c550f62d2909c8", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "4190d69147e34441ab4e4fa4fb4247eea092aa8e", "15f1312866a40e516f0e7f128864013ef6eb2df8", "5885d3525c1789aaa3aacc1740a3a6b51376f1b8", "a15bc58fa496b6cca937713723f19f45380fc2fe", "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "4654de106f5fd7caf1aab17468fad46a525c9da2", "c080810ae3dff3bf47305e56328418cdfab83592", "2d086787132666be7d425c5534132b0956c30435", "8ad197d6b10ddd3df3a1fd1c3f71773a83cfb9ac", "42e5e97272ad8728749f861ed7a920707e698778", "40bb02f20fd846424a065fc06c45ae237d8ec13e", "2f5e593d29a5eb8b3f7c65e4e5c740b792933757", "9d7d1fd7be593ca61a334f9b18eb8ba8e0149450", "2b8f7ce8460e7e183de754b09cfc0e624476d7f2", "528628b4d20e6a98130ac12083a8c879aa31c7ad" ], "paperAbstract": "Deadlock avoidance mechanisms for lossless lowdistance networks typically increase the order of virtual channel (VC) index with each hop. This restricts the number of buffer resources depending on the routing mechanism and limits performance due to an inefficient use. Dynamic buffer organizations increase implementation complexity and only provide small gains in this context because a significant amount of buffering needs to be allocated statically to avoid congestion. We introduce FlexVC, a simple buffer management mechanism which permits a more flexible use of VCs. It combines statically partitioned buffers, opportunistic routing and a relaxed distancebased deadlock avoidance policy. FlexVC mitigates Head-of-Line blocking and reduces up to 50% the memory requirements. Simulation results in a Dragonfly network show congestion reduction and up to 37.8% throughput improvement, outperforming more complex dynamic approaches. FlexVC merges different flows of traffic in the same buffers, which in some cases makes more difficult to identify the traffic pattern in order to support nonminimal adaptive routing. An alternative denoted FlexVCminCred improves congestion sensing for adaptive routing by tracking separately packets routed minimally and nonminimally, rising throughput up to 20.4% with 25% savings in buffer area.", "pdfUrls": [ "http://upcommons.upc.edu/bitstream/handle/2117/107647/FlexVC+Flexible+Virtual+Channel+Management+in.pdf;jsessionid=D92E427C1FA21BE4080A4FAF055A4EE6?sequence=3", "https://doi.org/10.1109/IPDPS.2017.110", "http://personales.unican.es/fuentesp/refs/IPDPS2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ca6a879ad1075869c96ac50b9cd7eaf15dfc666", "sources": [ "DBLP" ], "title": "FlexVC: Flexible Virtual Channel Management in Low-Diameter Networks", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "0ca87ce21f3feeef9e25bc2714d6a64ebaf875ee": { "authors": [ { "ids": [ "1733803" ], "name": "Reid Priedhorsky" }, { "ids": [ "28330125" ], "name": "Tim Randles" } ], "doi": "10.1145/3126908.3126925", "doiUrl": "https://doi.org/10.1145/3126908.3126925", "entities": [ "Daemon (computing)", "Docker", "Information needs", "Linux", "Linux", "Requirement", "Source lines of code", "namespaces" ], "id": "0ca87ce21f3feeef9e25bc2714d6a64ebaf875ee", "inCitations": [ "59fda98bb870dd76000a5d5d83e010941b91558f", "279231718b1bddb2a873a56b0f830980d7182872", "990ba6028c1abfb118eccd528504c8d09cb1be23", "bb730419a3042f91d6293452e6ddfebe3f287456", "0351f22135e61de52250654b1f8b277cd8c7a173", "d27d6bd6b5e859ae4d78b68a6815388d4c48a383" ], "journalName": "", "journalPages": "36:1-36:10", "journalVolume": "", "outCitations": [ "3b7e2038ec22cf637df70c833d473b0f3b43713a", "e398b8a77b849d1a4133b97b0b150b7392b7218d", "4a7872f0177e00c1c3621be90b72299fc3498474", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "b1451d4c4958687de6742b99fe8cfea7834d3c4f", "ac5acee1867d2e2bdc762e116bc2694f02e1f9aa", "1c8d06510ad449ad24fbdd164f8008cc730cab47", "0914d1fa86a1a5eeb16dcea904cc226fb010e508", "77d97e17c7129a810d14fb8dfd17fa4ca07e18bc" ], "paperAbstract": "Supercomputing centers are seeing increasing demand for user-defined software stacks (UDSS), instead of or in addition to the stack provided by the center. These UDSS support user needs such as complex dependencies or build requirements, externally required configurations, portability, and consistency. The challenge for centers is to provide these services in a usable manner while minimizing the risks: security, support burden, missing functionality, and performance. We present Charliecloud, which uses the Linux user and mount namespaces to run industry-standard Docker containers with no privileged operations or daemons on center resources. Our simple approach avoids most security risks while maintaining access to the performance and functionality already on offer, doing so in just 800 lines of code. Charliecloud promises to bring an industry-standard UDSS user workflow to existing, minimally altered HPC resources.", "pdfUrls": [ "http://permalink.lanl.gov/object/tr?what=info:lanl-repo/lareport/LA-UR-16-22370", "http://doi.acm.org/10.1145/3126908.3126925" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ca87ce21f3feeef9e25bc2714d6a64ebaf875ee", "sources": [ "DBLP" ], "title": "Charliecloud: unprivileged containers for user-defined software stacks in HPC", "venue": "SC", "year": 2017 }, "0cb1d325cef6ebcd5671478e5447320e91dea623": { "authors": [ { "ids": [ "2996979" ], "name": "Ewa Syta" }, { "ids": [ "31329226" ], "name": "Philipp Jovanovic" }, { "ids": [ "3374069" ], "name": "Eleftherios Kokoris-Kogias" }, { "ids": [ "3370481" ], "name": "Nicolas Gailly" }, { "ids": [ "20492140" ], "name": "Linus Gasser" }, { "ids": [ "3381036" ], "name": "Ismail Khoffi" }, { "ids": [ "1756673" ], "name": "Michael J. Fischer" }, { "ids": [ "4920811" ], "name": "Bryan Ford" } ], "doi": "10.1109/SP.2017.45", "doiUrl": "https://doi.org/10.1109/SP.2017.45", "entities": [ "Adversary (cryptography)", "Byzantine fault tolerance", "Experiment", "Formal verification", "Pigeonhole sort", "Randomness", "Scalability", "Secret sharing", "Selection bias", "Shard (database architecture)" ], "id": "0cb1d325cef6ebcd5671478e5447320e91dea623", "inCitations": [ "8a9fa1dc63ebe8d49b73a5b6d5ffb42f2ff930f4", "77d484b0194698366ba118e28287896829cf6dfe", "2d63a476723c6d783238083e6b6dee0aacc07525", "6e54b2a8aea4f4843185e974a396b21d35373da0", "19a90a5f759bf7c7de7df13590246c926c65895c", "71c5bc722f575665878dc3ca47953f384426899a", "9b64250206f8bf8bd71cd48e246ac45b9cbede67", "1d7d97012605bebdfa4f451fcbe493e7c95f6eb3", "318d0aca7efa3a7e833a1993cec125a07fb7f9b1", "bdd7454cdbf10b58cdbdf81c751ebf075126bfb4", "3ccd8850b72c35cfa727df2d617b93983871c2cb", "919e32847097416aada92dff7c8274cd9ca55582", "7d5344570d8e95b1ba3e7694829b071793210234", "e46050fc062630f663aa581d0523e7bacd47e86a" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "444-460", "journalVolume": "", "outCitations": [ "9d2c1271f1219522d13f150c2b04123bef300dd9", "09af9108cb5c196d5c15a6f3d26e604434203bea", "1a7a9097cb214e9f47690c69f2cb64ade8ae1717", "0cb88d5c6ed83552606ac1b2e477d5890b37be7a", "2f0a8d6744199556d0af5c1cc26ab99695c38a71", "2459b15dcd7c8d383980c0a118c0983d4ec010d5", "9de473e5833ffb9f0461b8e5c3adccc1985a02c4", "33888a32f1e2c90abdf6f802564c666cf50f5390", "a513c22df84d752391f050fa8e004ba2630409d4", "32d21ccc21a807627fcb21ea829d1acdab23be12", "98b0adee3fee4158651be4e7db11f8bb2577f8e2", "22a7482b5d466f483caf4b7aa5bb66c2b9d881d1", "8f3282f4141f3a096f821a19aeeaf0f9f6c491f6", "f31411fd1fb0078f8e3e277f1df6a462ce7ce6a4", "75d83792b880757a09e9a72978cc29beb57c4ad5", "462afd7cc36165b3dd3d4605f74cbdb31edd7262", "1be9599ad4fe106a1511e675a1684df0563b56a6", "12b52d74455d9de2a346c21338fe9aa4a5309a49", "203f9b92407c1f591f505049e0c941e5bffd1c2b", "2116bafe033c8a31d039ab11dc35ffa24dacc4ea", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "16d64925a863e34d1f339af7b92faed3f76a27e9", "6cf83f8acee567e3910faf03cef35c3cc145e1bd", "a2f1f2149878f68163781da8e439564ee65f9137", "b0e15fb07405c677ae004b8834f50c3d6d91a5fb", "3d049eb62dd331b066df3cd455287ec487a745bb", "9dde09e003f8200400567b0c7be7c60679daaa3c", "78e2d6b7a671d8e53f207adff088833fd7606e13", "415535e942a0f658314497f03413ded9d4685540", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "0ec7c564681c17570a0c4116c6b173c6bc0127eb", "742bf7ff7e39a8ea4fe62a7af1a1242a75ff09b4", "4701faa37e2c5ba5d1810b9b39db28bbcf4a230e", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "bf0d3e8eac1466d5c8d3e142cd7d9fb3f2799517", "0587b0708e48cf9fff42ff6f62edc04dfcf34bb7", "0098c0407d9ade8c2e37a5f65cc1121b6d55cf94", "d0c6841e9cf40ea3714b35eb3b709bc46ce050b0", "990770cb56bfb9cb08b3d90711a35f6f84693c4c", "8d69c06d48b618a090dd19185aea7a13def894a5", "20f5f8733134d87041b95b742d613051a1fb3fdb", "3713a2a7d4ad74e121dfee045f3ce5dc08c89e2a", "406a37d8ccb6cb1355b7aeded65e50fc00b2977c", "483ccd93de1e0e9b789a6fdac79554d80397c9de", "40a98bed1d10248d30e86304315df07280dad93e", "ebae9c7d91ea8b6a987642040a2142cc5ea67f7d", "37f4b4feabbcc37b3a9229fb35608f90fdb495d4", "8ffe05afdc78528b7ed5bc3669515b6d3655bc4c", "280250adda984a6464eddf98beac56f8e302fe07", "0b0e8149908240f26b53f50f1610db936dfabcfa", "054b0a3ff27511c6c2afd3f6ace4c2284b8bc858" ], "paperAbstract": "Bias-resistant public randomness is a critical component in many (distributed) protocols. Generating public randomness is hard, however, because active adversaries may behave dishonestly to bias public random choices toward their advantage. Existing solutions do not scale to hundreds or thousands of participants, as is needed in many decentralized systems. We propose two large-scale distributed protocols, RandHound and RandHerd, which provide publicly-verifiable, unpredictable, and unbiasable randomness against Byzantine adversaries. RandHound relies on an untrusted client to divide a set of randomness servers into groups for scalability, and it depends on the pigeonhole principle to ensure output integrity, even for non-random, adversarial group choices. RandHerd implements an efficient, decentralized randomness beacon. RandHerd is structurally similar to a BFT protocol, but uses RandHound in a one-time setup to arrange participants into verifiably unbiased random secret-sharing groups, which then repeatedly produce random output at predefined intervals. Our prototype demonstrates that RandHound and RandHerd achieve good performance across hundreds of participants while retaining a low failure probability by properly selecting protocol parameters, such as a group size and secret-sharing threshold. For example, when sharding 512 nodes into groups of 32, our experiments show that RandHound can produce fresh random output after 240 seconds. RandHerd, after a setup phase of 260 seconds, is able to generate fresh random output in intervals of approximately 6 seconds. For this configuration, both protocols operate at a failure probability of at most 0.08% against a Byzantine adversary.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.45", "https://infoscience.epfl.ch/record/230355/files/sp17-final.pdf", "http://eprint.iacr.org/2016/1067.pdf", "https://eprint.iacr.org/2016/1067.pdf", "http://eprint.iacr.org/2016/1067", "https://suri.epfl.ch/talks/randomness-syta.pdf", "https://icservices.epfl.ch/edic/down.asp?ID=2781&pid=2673", "https://zerobyte.io/talks/2017-05-23-randomness-ieeesp.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0cb1d325cef6ebcd5671478e5447320e91dea623", "sources": [ "DBLP" ], "title": "Scalable Bias-Resistant Distributed Randomness", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2016 }, "0cb7f47f628f5012918d91ae05a3727f4772af6b": { "authors": [ { "ids": [ "39240703" ], "name": "Aaron Johnson" }, { "ids": [ "40308136" ], "name": "Rob Jansen" }, { "ids": [ "1737906" ], "name": "Aaron D. Jaggard" }, { "ids": [ "1751820" ], "name": "Joan Feigenbaum" }, { "ids": [ "3213341" ], "name": "Paul F. Syverson" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Algorithm", "Autonomous car", "Autonomous system (Internet)", "De-anonymization", "High- and low-level", "Relay", "Routing", "Selection algorithm", "Simulation", "Tor Messenger", "Traffic analysis", "Trust metric" ], "id": "0cb7f47f628f5012918d91ae05a3727f4772af6b", "inCitations": [ "5348e6e2f9913319a05a514300cd0e20f48ae499", "3584c519259cc4bfdb910c8d2d8b6caf9d60a7d4", "43ee0b576739bdd81ab097d26e1ba308aa830775", "43478bc84578c3470371adba196bc9fcc8e1f0e3" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1511.05453", "outCitations": [ "1648be4fa6a517cdad08c6299e20869427242d02", "1ca1f10b05f9dc3a4306a5c06aa660f8c5d80792", "8853ca8425684a83ebb2feae1409563a5fbef7c8", "5009219dac4519d08ef0788ddfbd6eb4f7163c93", "56e9649bb6c33dce327b808ebd1ab7236099e110", "48a068fc680131e6c0f215ee41318086a5befec1", "566333376dd1af014555b4cf82cda42c22501013", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "04269ca9938b4d99658a3527cde3eff2b502f269", "0f1a01fde1ac0507b48c369c7f4f837bb3c628f6", "82e1c4a88a87b1a40a3ac5aa3358324da0aae897", "356e0eece5bbd700ea3c388af8ea3e088baf7c6e", "21f47e1d9078d12de1bd06341619923e8b9d85bb", "e372a3b15c5f080630d958dd56ffb7e4e8e2a4a0", "aa2e4066ed9c81ba9f444b6559644232a205a138", "0469044c24ac4897fcffc3c7db6a2d9c7fd08848", "d3763c4e052d0ac80ee3a7895cb86182f683b887", "20413c2b4e9aad658d4957ebcec435a637b11ddc", "108747579aef6bf029623639a86070feaf5cad41", "31f530f8d0ecb37d2d376313bc4d0dd371bc34de", "476b20ee2c8d0da7211afb245fd30ea39464f5e9", "3b13ddfca849418deed7035085b2276e2c8fd6e0", "266681c25e3d67e0322249ad0a2f23f45f8f6c28", "6a014bd00e784f2b0d5fb8789971a685d682f8c2", "05bb5174ee7de801b6e1a55086455b0341ff5649", "5f5f5fe8533b106a8d1e29dcbdf8ae7000882f3b" ], "paperAbstract": "Tor users are vulnerable to deanonymization by an adversary that can observe some Tor relays or some parts of the network. We demonstrate that previous network-aware path-selection algorithms that propose to solve this problem are vulnerable to attacks across multiple Tor connections. We suggest that users use trust to choose the paths through Tor that are less likely to be observed, where trust is flexibly modeled as a probability distribution on the location of the user\u2019s adversaries, and we present the Trust-Aware Path Selection algorithm for Tor that helps users avoid traffic-analysis attacks while still choosing paths that could have been selected by many other users. We evaluate this algorithm in two settings using a high-level map of Internet routing: (i) users try to avoid a single global adversary that has an independent chance to control each Autonomous System organization, Internet Exchange Point organization, and Tor relay family, and (ii) users try to avoid deanonymization by any single country. We also examine the performance of TrustAware Path selection using the Shadow network simulator.", "pdfUrls": [ "https://arxiv.org/pdf/1511.05453v2.pdf", "https://arxiv.org/pdf/1511.05453v1.pdf", "https://arxiv.org/pdf/1511.05453v3.pdf", "https://www.nrl.navy.mil/itd/chacs/sites/www.nrl.navy.mil.itd.chacs/files/pdfs/16-1231-4380.pdf", "http://arxiv.org/pdf/1511.05453v1.pdf", "http://arxiv.org/abs/1511.05453", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/avoding-man-wire-improving-tors-security-trust-aware-path-selection/", "https://www.internetsociety.org/sites/default/files/ndss2017_06B-3_Johnson_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3b75/4f7daa7496b99b9bd42c3dbdd45e1d4c6581.pdf", "s2Url": "https://semanticscholar.org/paper/0cb7f47f628f5012918d91ae05a3727f4772af6b", "sources": [ "DBLP" ], "title": "Avoiding The Man on the Wire: Improving Tor's Security with Trust-Aware Path Selection", "venue": "NDSS", "year": 2017 }, "0ccb8f05b8550c6059c9dd0be4ae171576daab0f": { "authors": [ { "ids": [ "33828705" ], "name": "Kevin Hsieh" }, { "ids": [ "3459901" ], "name": "Aaron Harlap" }, { "ids": [ "1920997" ], "name": "Nandita Vijaykumar" }, { "ids": [ "9758493" ], "name": "Dimitris Konomis" }, { "ids": [ "1707164" ], "name": "Gregory R. Ganger" }, { "ids": [ "1974678" ], "name": "Phillip B. Gibbons" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Amazon Elastic Compute Cloud (EC2)", "Centralisation", "Correctness (computer science)", "Data center", "Emulator", "Experiment", "Gaia hypothesis", "Machine learning", "Speedup", "Synchronization model" ], "id": "0ccb8f05b8550c6059c9dd0be4ae171576daab0f", "inCitations": [ "fa0412fc819fce2468a65b65a2820247c2776760", "02533c75f2fe991638c963bd107aa1d28b916b6e", "d0556be65e8564ab8bb3e26b6a0146a62027bc40", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "09276906df8cd0942d114dcfbf350a31c7205a5f", "3f99bb743fa9576f8da7d168f3858dd0acf35e79", "ecf5efd5fe18860b42a1abd198e94a868dbf944c", "011b55926aecfd0baf33eb467bea5a3f0bcc2d28", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "e188b85218c7e31c8073bfaad73287059ff57c50" ], "journalName": "", "journalPages": "629-647", "journalVolume": "", "outCitations": [ "b293405e9b3cfac8c58083b38bdc85d18dd0c187", "0144941d255dad89d3d90c2d131a15cc01df9829", "122a631fb23d06e86bbf0d3d1905e3496539234e", "1a034af839d43be34dcf492112ebd0870be230d5", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "725abc1a03355d8928d2c60898ef76f652454f01", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "a8e8f3c8d4418c8d62e306538c9c1292635e9d27", "2d83ba2d43306e3c0587ef16f327d59bf4888dc3", "411eb6534d39a37ed43443ba1d2e168c73171330", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "187d3d8109e51b5e2c4941048b0fd6cf1d464370", "04ca5de59edbdd49a9c0502c58331524d220bc8c", "1e987ea60c476bbabbb306e2e795bfb81ecc97aa", "8aa09720221bdeef43e150fc7f6896f71600fb86", "0346de4027e75bc194811be80421c6e403495c7a", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "bb2b45a0e650ca87590cfa3df93066eecf4e54f6", "09396d113a7f5ce282574ff8aa02bf93003bee03", "0558c94a094158ecd64f0d5014d3d9668054fb97", "2645a136bb1f0af81a526f04a1c9eb2b28dccb1b", "0b99d677883883584d9a328f6f2d54738363997a", "215aa495b4c860a1e6d87f2c36f34da464376cc4", "34b8809c214db18544ce93674bf85fce0e8b3330", "12325eaa502bc78762e628c1eecf1181841a75a7", "0607c1e5ed0fa225a9f2d20ae7ae526397adf96b", "0546fa6622b8b8db8527be777a692d88c5c037b0", "098cb3139059c6c8b51da998a5df585d6552c475", "45c11575829c295731535e04bfcbe1ea6c33343a", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "0ad8e89091eed09217e66adc98136126addc2619", "0122e063ca5f0f9fb9d144d44d41421503252010", "0788cda105da9853627d3e1ec8d01e01f7239c30", "510ac232c61e2b1fea3bf107dca4cde3663bc852", "3b2697d76f035304bfeb57f6a682224c87645065", "0a68c6226e04180671a474c73fa0a2b4a154d129", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "4aa4069693bee00d1b0759ca3df35e59284e9845", "2a894be44d07a963c28893cc6f45d29fbfa872f7", "fcc9a75d0cd12cfce81fcd22b22867f76b258e0c", "5aaa65fbe2abe27afb237b2f40909d686b14b1ee", "2d66dac85b819503ce6d311d37770e31bdf36692", "3bb6d5834bfb355553588e382ac5f9fa8a8d831d", "1e638c29489e006c6227aa2bb84d88d9f2597468", "62f991612959e6fff29128b1d1a54596f6504029", "2b3113b7fda6414548e88fc664f3be96d5209830", "1637ac4fed83b8309df2de07fbeb8b2511bb1170", "28b95582aea29cd700a99cc4ddf8cc5a91270446", "043afbd936c95d0e33c4a391365893bd4102f1a7", "4d2069788d8041e50f0107db060e14b196747602", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "044e2d7359411351afe78726e2a44b01284e905d", "3c029e72f5c75c8dd87a6acd43d05f23407e39cf", "2746a16b1cddf914f25acd9835ee4e6ea9ab44db", "7717b438da4ec3ca4247ff7abf6dd603e91fe41d", "4f1b0ecec49193d3621199db0763baa8722f99f1", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "6721de68fe57a4e01901450e1dfd381f065cc580", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "75a6bdbeaf768e40a0e3dff8d589af7746e68f72", "3f339760068da1db481c670490aa146975f892d6", "26f1b2bf3f13707e6be671a10c5a1f057bce2515", "154e4aaf1da9e5ed6ebf5e216adbaaa85324d39c", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "73f512de77dad7d0abe8076a856727021b9493d3", "71fca39ad4de8f844aa547683bcbae6eab92ecb0", "09dc889030f6da66664bd7d14af4cda00d857833", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "3784b73a1f392160523400ec0309191c0a96d86f", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "a6a8313f30420c60e7eaa9f34ea5a41833695af1" ], "paperAbstract": "Machine learning (ML) is widely used to derive useful information from large-scale data (such as user activities, pictures, and videos) generated at increasingly rapid rates, all over the world. Unfortunately, it is infeasible to move all this globally-generated data to a centralized data center before running an ML algorithm over it\u2014moving large amounts of raw data over wide-area networks (WANs) can be extremely slow, and is also subject to the constraints of privacy and data sovereignty laws. This motivates the need for a geo-distributed ML system spanning multiple data centers. Unfortunately, communicating over WANs can significantly degrade ML system performance (by as much as 53.7\u00d7 in our study) because the communication overwhelms the limited WAN bandwidth. Our goal in this work is to develop a geo-distributed ML system that (1) employs an intelligent communication mechanism over WANs to efficiently utilize the scarce WAN bandwidth, while retaining the accuracy and correctness guarantees of an ML algorithm; and (2) is generic and flexible enough to run a wide range of ML algorithms, without requiring any changes to the algorithms. To this end, we introduce a new, general geo-distributed ML system, Gaia, that decouples the communication within a data center from the communication between data centers, enabling different communication and consistency models for each. We present a new ML synchronization model, Approximate Synchronous Parallel (ASP), whose key idea is to dynamically eliminate insignificant communication between data centers while still guaranteeing the correctness of ML algorithms. Our experiments on our prototypes of Gaia running across 11 Amazon EC2 global regions and on a cluster that emulates EC2 WAN bandwidth show that Gaia provides 1.8\u201353.5\u00d7 speedup over two state-of-the-art distributed ML systems, and is within 0.94\u20131.40\u00d7 of the speed of running the same ML algorithm on machines on a local area network (LAN).", "pdfUrls": [ "http://www.ece.cmu.edu/~safari/pubs/gaia-geo-distributed-ML-at-LAN-speed_nsdi17.pdf", "https://users.ece.cmu.edu/~aharlap/papers/nsdi-2017.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_hsieh.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-hsieh.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/hsieh", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-hsieh.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_hsieh.pdf", "http://www.pdl.cmu.edu/PDL-FTP/BigLearning/gaia.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2db5/306c3679642bfbffc684de986cff80baf4c7.pdf", "s2Url": "https://semanticscholar.org/paper/0ccb8f05b8550c6059c9dd0be4ae171576daab0f", "sources": [ "DBLP" ], "title": "Gaia: Geo-Distributed Machine Learning Approaching LAN Speeds", "venue": "NSDI", "year": 2017 }, "0cd2dd864b17f17daba6096fcdccc7cec0a68e52": { "authors": [ { "ids": [ "1783053" ], "name": "Tim S\u00fc\u00df" }, { "ids": [ "2107074" ], "name": "Lars Nagel" }, { "ids": [ "26383371" ], "name": "Marc-Andre Vef" }, { "ids": [ "1726087" ], "name": "Andr\u00e9 Brinkmann" }, { "ids": [ "17631382" ], "name": "Dustin Feld" }, { "ids": [ "1767942" ], "name": "Thomas Soddemann" } ], "doi": "10.1109/CLUSTER.2017.32", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.32", "entities": [ "Automatic parallelization", "Central processing unit", "Compiler", "GNU Compiler Collection", "Infinite loop", "Parallel computing", "Polyhedron", "Programming language", "The C Programming Language", "Toolchain" ], "id": "0cd2dd864b17f17daba6096fcdccc7cec0a68e52", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "552-556", "journalVolume": "", "outCitations": [ "4baccc40c32cffbbe7d7235e72c23ec9c4ad22c7", "1bb4d630e8288968c3b0193691a53fe36987352a", "f93860914f723d0d48c9fd6bef515d75c3c9715e", "16de6f9e2bf6ee1068dbca8c9e5446295c904315", "61c6377d5437abb0eed21c8cadcceffd4b8c49a5", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "a0c7dc8201c0813203b2960a90a82bacff236717", "60d4b2c4d9630e4905748e7d3565a013d2304906", "0f4855901a89813cf39104293f086214200e5421", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "9bf03c62f747d78fd01ee917725d4c411c021d12", "151d002425a85b04eb76f1b485461020a04cefc4", "4d603c30e3db8f8a0a17c1fec57b9ad1fa957c1d", "f2335cee748590d17864c261cba791390a947f2e", "23affb01412312341fb336943756800c0bf2468c", "18e87fb45e3d501577724c4ed6a9f6d2f753c8e1", "afe199df9fcc5e3c4f39b380da74659b2c106ee9", "0125bdc8819ac8951d811efaa0914fc7ecb83a78", "6efa98d44b48a44394dbc90b9c67abc4b11c738c", "91e413ece54c911087295f1d9c06397e961e361f", "d6f883b74472da070df99475e41add97be23b901", "32ef8d891edde06cc01357fa5c4d1ab7fe631720" ], "paperAbstract": "The need for parallel task execution has been steadily growing in recent years since manufacturers mainly improve processor performance by scaling the number of installed cores instead of the frequency of processors. To make use of this potential, an essential technique to increase the parallelism of a program is to parallelize loops. However, a main restriction of available tools for automatic loop parallelization is that the loops often have to be 'polyhedral' and that it is, e.g., not allowed to call functions from within the loops.In this paper, we present a seemingly simple extension to the C programming language which marks functions without side-effects. These functions can then basically be ignored when checking the parallelization opportunities for polyhedral loops. We extended the GCC compiler toolchain accordingly and evaluated several real-world applications showing that our extension helps to identify additional parallelization chances and, thus, to significantly enhance the performance of applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0cd2dd864b17f17daba6096fcdccc7cec0a68e52", "sources": [ "DBLP" ], "title": "Pure Functions in C: A Small Keyword for Automatic Parallelization", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "0cdb019ac6744475e73c41bedb9df7da53d59a8e": { "authors": [ { "ids": [ "2189162" ], "name": "Ari B. Hayes" }, { "ids": [ "2635740" ], "name": "Lingda Li" }, { "ids": [ "2058766" ], "name": "Mohammad Hedayati" }, { "ids": [ "8516279" ], "name": "Jia-Huan He" }, { "ids": [ "31790133" ], "name": "Eddy Z. Zhang" }, { "ids": [ "1736894" ], "name": "Kai Shen" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Data-flow analysis", "Deep learning", "Encryption", "Graphics processing unit", "Image processing", "Information flow (information theory)", "Information leakage", "Malware", "Register file", "Run time (program lifecycle phase)", "Static library", "Taint checking", "Tracking system", "Vehicle tracking system" ], "id": "0cdb019ac6744475e73c41bedb9df7da53d59a8e", "inCitations": [ "892f83ce3116be08be29fc5797343711ef8239c0" ], "journalName": "", "journalPages": "209-220", "journalVolume": "", "outCitations": [ "3a998058f7e784836364ac3d93818619d5998239", "282c9fe995f5b5f94df6c4920d897f48dad55e99", "5578045657a90d2db6ac86bb4afbe38c035fc6a5", "0871c83e6f912796339ee03790eb52a7ac21b6e6", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "0fd17f6f5b2b48b518a7c72f42835b82b545b944", "26127796667203f4e015cc1f47072c24f3952356", "454b134e0ad83921cbe13f4e4332c79b93aa7612", "9842688e1936c660821b71abff35d29fd666c4fc", "7a0cdc6a29b230908df2c54e584af62a7eed8d52", "7132859e2843f7adb82ec89daf0eb2bdb1da590b", "0d992d6949553e5bc1d159f169a2fe201842d53e", "1ac9528121cc31e0e6946793d11bd4e10f692787", "023f23c300804754753cb11db51fb7f582556ab7", "9ff3c58d60625aa7da9151e79ff5009ae863802f", "2f2d69b165b8dbda97a70137fbe43ad80573b949", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0f9b2e598ee1ddde4fd5a2f3008a6983367cc22c", "25010bbdf127101e1fd5adea5e15f45765b87b0f", "2140cf164116b9c6b21ed44028902fdc3f6578e1", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "262d29b8400657b5cda4414470313f5ad679e6db", "0b0917c1080b1cae8a80c41d38f903aa55baea70", "31eb576209fd4a548ab8bb83f74e5a7732a45e52" ], "paperAbstract": "Dynamic tainting tracks the influence of certain inputs (taint sources) through execution and it is a powerful tool for information flow analysis and security. Taint tracking has primarily targeted CPU program executions. Motivated by recent recognition of information leaking in GPU memory and GPU-resident malware, this paper presents the first design and prototype implementation of a taint tracking system on GPUs. Our design combines a static binary instrumentation with dynamic tainting at runtime. We present new performance optimizations by exploiting unique GPU characteristics\u2014a large portion of instructions on GPU runtime parameters and constant memory can be safely eliminated from taint tracking; large GPU register file allows fast maintenance of a hot portion of the taint map. Experiments show that these techniques improved the GPU taint tracking performance by 5 to 20 times for a range of image processing, data encryption, and deep learning applications. We further demonstrate that GPU taint tracking can enable zeroing sensitive data to minimize information leaking as well as identifying and countering GPU-resident malware.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_hayes.pdf", "https://www.cs.rutgers.edu/~zz124/atc17.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/hayes", "https://www.usenix.org/system/files/conference/atc17/atc17-hayes.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0cdb/019ac6744475e73c41bedb9df7da53d59a8e.pdf", "s2Url": "https://semanticscholar.org/paper/0cdb019ac6744475e73c41bedb9df7da53d59a8e", "sources": [ "DBLP" ], "title": "GPU Taint Tracking", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0cf19e8178bd4248e7c5b5ca87fb1babf2619651": { "authors": [ { "ids": [ "2504006" ], "name": "Hari Cherupalli" }, { "ids": [ "3187702" ], "name": "Henry Duwe" }, { "ids": [ "7982124" ], "name": "Weidong Ye" }, { "ids": [ "8153371" ], "name": "Rakesh Kumar" }, { "ids": [ "1813088" ], "name": "John Sartori" } ], "doi": "10.1145/3079856.3080247", "doiUrl": "https://doi.org/10.1145/3079856.3080247", "entities": [ "Amortized analysis", "Bespoke", "Central processing unit", "Low-power broadcasting", "Microcontroller", "Microprocessor", "Printed electronics", "Printing", "Processor design", "Slack variable", "Wearable computer" ], "id": "0cf19e8178bd4248e7c5b5ca87fb1babf2619651", "inCitations": [ "6b2d1124170b848576640bb5516813527f368fcf", "322f5bca345b899b9c6d5cd138208dcaaeb48648" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "41-54", "journalVolume": "", "outCitations": [ "87553eec31dba4d8d55faa22237aff5941e8fc09", "151cb281d6486dcc231b6da3d32304e2b08b2fe0", "259730135cb6222dd64f298357efff5a7038ff90", "4165376a7aadf6a1c1acddd6fc236047b7becff1", "de0fda9c2b025d21b9e31eb2c10d9ac129a05fed", "db718b5a94b97e29dd1798738c4ce9621dfa537a", "7e7dd129e74727c2d5ea68613bdba22b0dfc7ae5", "1087e2e1244665c9574ab5914ae13c6c88bcc096", "41236387e01eacb63cefad6318dc48fc60e9829e", "e557136e4f5ea24658f0388aeb5767be896840be", "05ac8c6477c306c395433a6035706d265c9c961d", "033614852cfe29708ddebf6cb3f846582f5dd7ba", "11443efe465ad544f478524da6c66c085b16e28b", "7248d9df516209aa2c737bce3532994a5ac8a41a", "2fd2ae274d0c03fc47b90fd7c490c8fafea6450d", "e4429b2205c2187b25fc66f4d27fb02c4b72b63b", "b8f85d2b5886e9e07cf5e8f3dda2c0b731c260f2", "1abb651f5eb33d6a0c3c234c4c8a7dc2e9e47506", "03b2e534532e9558e560df0bed74976b8f48c1a5", "9458915e0b7e9abfd9f9c24e35b036505c899a8b", "9d4bc7c0569cb548b2a1e319948c8f91061abb49", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "3c11b4e74086db34430d5381031319cae83ce17a", "6228ed86d5ad276f47da6d491fd9072716e6ebf8", "7498e51ffce80639e96cfb3ab66b5558e595d07b", "0d01b42384dd92c400052a05e3d24cebaecd4056", "13ad5b5fdd3ba3d74fa96a12450b726696fbfe77", "2605c673c58f11c73166ffee54d1ae5950b532df", "5208576ac552a2bf2840558ec67f418370f5ff9d", "2188cc9ad6376f4d9877fccc1365505fd144f9fc", "5324cd42d9f04ff75037ce9cbd8e34fb278e15e4", "04a0485bec9dff9b8391bfa1372a71df2f79ad77", "14dde854414e143e737953b491e715f6b48bbebe", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "58c6ac5e3c98ad6cf476c2c99374ed07bfc1ac17", "1166d00fe5319beba9d8d713f9fdf0c4d80744b4", "063700ef01aad15a1981553fde02e8d162a553e7", "8bc590fd8d61603b0c7e6475f933f66f49bec0fc", "d724b43c9bbefc5c5f45503be77b378cd7c05114", "84b4a8b2f2fe58c57223dcb1e839d3106c820380", "c1144e92df404551e2db421b5b2dfb68b59135af", "2ddb5176006689ffbc7ba6f58f4c0eccfb3168d8", "2d172dfedc08654c47f6335550efa4d5a2d78f45", "675d53d75788b4cc580e3e90c5ef91d29454a295", "0a34381478b6576fd16623b3234f922a49b941b4" ], "paperAbstract": "A large number of emerging applications such as implantables, wearables, printed electronics, and IoT have ultra-low area and power constraints. These applications rely on ultra-low-power general purpose microcontrollers and microprocessors, making them the most abundant type of processor produced and used today. While general purpose processors have several advantages, such as amortized development cost across many applications, they are significantly over-provisioned for many area- and power-constrained systems, which tend to run only one or a small number of applications over their lifetime. In this paper, we make a case for bespoke processor design, an automated approach that tailors a general purpose processor IP to a target application by removing all gates from the design that can never be used by the application. Since removed gates are never used by an application, bespoke processors can achieve significantly lower area and power than their general purpose counterparts without any performance degradation. Also, gate removal can expose additional timing slack that can be exploited to increase area and power savings or performance of a bespoke design. Bespoke processor design reduces area and power by 62% and 50%, on average, while exploiting exposed timing slack improves average power savings to 65%.", "pdfUrls": [ "http://rakeshk.crhc.illinois.edu/isca_17_cam.pdf", "http://doi.acm.org/10.1145/3079856.3080247", "http://people.ece.umn.edu/users/jsartori/papers/isca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0cf19e8178bd4248e7c5b5ca87fb1babf2619651", "sources": [ "DBLP" ], "title": "Bespoke processors for applications with ultra-low area and power constraints", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0cf98c174f2d72891e48015ac4ebbc39a88b52e5": { "authors": [ { "ids": [ "1979057" ], "name": "Timothy Bourke" }, { "ids": [ "40638307" ], "name": "L\u00e9lio Brun" }, { "ids": [ "3111774" ], "name": "Pierre-\u00c9variste Dagand" }, { "ids": [ "1792338" ], "name": "Xavier Leroy" }, { "ids": [ "1680325" ], "name": "Marc Pouzet" }, { "ids": [ "2474210" ], "name": "Lionel Rieg" } ], "doi": "10.1145/3062341.3062358", "doiUrl": "https://doi.org/10.1145/3062341.3062358", "entities": [ "Assembly language", "CompCert", "Compiler", "Computation", "Control flow", "Dataflow", "Dataflow programming", "Diagram", "Embedded system", "Formal verification", "Imperative programming", "List of version control software", "Lustre", "Lustre (programming language)", "Program optimization", "Proof assistant", "Simulink" ], "id": "0cf98c174f2d72891e48015ac4ebbc39a88b52e5", "inCitations": [ "9e73899796e6ec39706174060b29436261a26bf6", "efbcf7cb491b62c78cca01efa8208bbd6cbb320f" ], "journalName": "", "journalPages": "586-601", "journalVolume": "", "outCitations": [ "3ffdfc1c1ed705f2f86a641a6d7621dac2096eab", "03056c3dc21c7bcce25a42381902311e9c925698", "40de668f54d8d307547a8877ffcd02bd1bab54a2", "2c0786752ca32b12c35862b9558107411003347c", "12ddfb2013c4f9d363105fdb97b56d6b1bf86d09", "0293190815bf2484efb06bf2249aa008c5f2ff63", "2867ae0cbd1f1fd394aca5f19ef62342d07eedac", "be91313fc4f44865a997bd2080985c104e163653", "2f5ec1b1e0da526f6d75bd1311267238dc3131a5", "223c6446055b81de0e9284fba140441389ac93e7", "4921fee4cd1509ff82341bf4538ebf6795f62ec8", "00287d8275bce540f5356f3b693c4fe4f3407d19", "35daaa0a92183cfa2328886900d3e931e81581a6", "b667b225a9bfa82ff5d01f0da492cdf62222c9b0", "fad209d423163d91fb2278f6a9939b3b6db0eafc", "f031d860b37b6c5280b743f6bb1e49b2a85d2f46", "8b2ec4c72c3e8bc4962a7ed9021a61d4358d1ce2", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "2777e5991511d4bf14310dd8e43fbe47edda6293", "e7620104d4d424f4cf7fe653076849ed92610cb5", "21e4ddf30b97adfda9288096a02074ff35de3b83", "673fdbfd9bc0e0540e51fced2ae1d0198743751d", "cd14bffcea4165b8bda586a79c328267099f70d6", "135a2c8ba39c4ed7fa223f0fd56e309e23d69c1f", "de6bb273be76b641e4a234a8832d90e23c372fb1", "f65896cd662edf7211362fc5cfe09e3c86d67731", "5a326be3e068a0e8a06c5fbc3debe917f147e649", "03b878ba4d1265c164a544ce6d022157fbaf10b4", "38b7e9721cc3e326580465deaf0f0028b92afe6a", "6a34b28a5103a1fd8791d28efb131aa94b7bb1bc", "745b3f15a0d525e8f79e9b0e98dd9fba2f5074ea", "ded5d352b1b4666759ff50530f74d0106105ff04", "47981a95074cbea3675b76b6fb1f12f856d872e9", "430ce88f430d22d131ca3f753dd576c61f7cced3", "7772b2e9a5a50984db0ce26b27111db92c3e7bc6", "172cff2625c8b05c5cfe3015e1de8c38099ed2cd", "0f3744290e7423d82ec36e2cf0bffa124ef3a32e", "087d641dfe61c8495dbab8e73051868c868e12a1", "d42a29e6977c28f7bf23d63b00c48f2e9100403e", "0a2a9fe04fe1c53e07d894993976b67c41b5a38b", "482a089de508bdba64bfa12924f8e2eea3fd652a", "81e259719a2bfa5a90283300a80138a95648ca0f", "4d21abe302e71b57ed44b21136c5faf6f70b9ca5", "2b8eccb151c1c9536a2cabd245601465fae3f551", "b8754816d0806e7b0fde5f105a96a280c24efdfc", "12feae93145fe4a6658b95121bcc902e2d98a5d0", "38d87376137cc92f22feb534b694add5df8e11b7", "04944a304e1f0920e5273988bb55cdec271b2556", "a78b31a7953f73ca32b5993de163820290a76e13", "05f92f213d42093f7275422f0e55c2d5ee5b4f4f", "b2d589cde150f8d868532a9d80cd334b4492f928", "2cbc143b70069e42a83bea7dcd6008903bb5ef03" ], "paperAbstract": "The correct compilation of block diagram languages like Lustre, Scade, and a discrete subset of Simulink is important since they are used to program critical embedded control software. We describe the specification and verification in an Interactive Theorem Prover of a compilation chain that treats the key aspects of Lustre: sampling, nodes, and delays. Building on CompCert, we show that repeated execution of the generated assembly code faithfully implements the dataflow semantics of source programs. \n We resolve two key technical challenges. The first is the change from a synchronous dataflow semantics, where programs manipulate streams of values, to an imperative one, where computations manipulate memory sequentially. The second is the verified compilation of an imperative language with encapsulated state to C code where the state is realized by nested records. We also treat a standard control optimization that eliminates unnecessary conditional statements.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062358", "http://www.di.ens.fr/~pouzet/bib/pldi2017.pdf", "https://www.di.ens.fr/~brun/publications/pldi-2017.pdf", "http://gallium.inria.fr/~xleroy/publi/velus-pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0cf98c174f2d72891e48015ac4ebbc39a88b52e5", "sources": [ "DBLP" ], "title": "A formally verified compiler for Lustre", "venue": "PLDI", "year": 2017 }, "0d09a33fc88ffb35ef35b84d104c1cadc5802cb1": { "authors": [ { "ids": [ "18112138" ], "name": "Matthew D. Sinclair" }, { "ids": [ "10187815" ], "name": "Johnathan Alsop" }, { "ids": [ "3196444" ], "name": "Sarita V. Adve" } ], "doi": "10.1145/3079856.3080206", "doiUrl": "https://doi.org/10.1145/3079856.3080206", "entities": [ "Central processing unit", "Consistency model", "Graphics processing unit", "Programmer", "Run time (program lifecycle phase)", "Sequential consistency" ], "id": "0d09a33fc88ffb35ef35b84d104c1cadc5802cb1", "inCitations": [ "46122831f2f1aea6b5f45025b8791ca29c239679" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "161-174", "journalVolume": "", "outCitations": [ "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "f359d33a1c09d2f626217e21f722508968c7057b", "5b2103ce053a4e0e3685920fac0248533e8b0718", "49d5b173aa52c762982b9654378f5eec21d77fcf", "13bc2fe34d03c49cfcb80a814c046b8ad8895deb", "68985eedbd76262d85c80c9d2c1b1a92148d6379", "a36cbffc24608143c6a69da550620139dcc8128f", "358e6e95c7359b87bdca56391332e1612e5ebb23", "352a8957005dc5519b15ed1870751ec494d66395", "74947ab07ed99ea9b7038f8984e9910a87586f57", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "3eae0271717f6b4d65024abf04e5d98aef41d748", "2900690eb3132a4d1536226d629727de41f38a66", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "0335bf6957ecb92f709fc79c72c4237939f32c9e", "fa15e80d71f831ed1a3f11d5b94c88b8f098a17c", "556e8d42e0864beb7246bae2651d56833154f88c", "19aab49210282cc19ec4fec06bed029a06497bf8", "2d6f002477015469075954c6748a1a85af352c94", "5eef609f21fc9327e551ab40425f7f1715c3e200", "987adbbb4b5baff729cf3907d7f05a86e8651849", "2b1415ed444d67e78439e63baa73c05d7246f8f2", "231f10c230f97cf08b2ecbbafad5064262b9d102", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "206cf736da91aef15bb598e097f6c233c3af2b17", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "4bce8e7c13331dbffa05d6cfc086efd04e0317a9", "5576e2cbe8b32ce08ad99eb7f29ce77abb3f2d3d", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "4bad51c7685254155733ee8def6a1294378aa1af", "58da996efd7320d1e484263c97c930c8979c474f", "59857e2857df6d69a12e3cbaa720648b5c299159", "8747dabeaeda342fbac4ebff628c574be4c53826", "ac35455b128baf4e280f2571160c242b67b3f85e", "5d279a21f65eef2bf5027d0cf1e56f2d740b314e", "0d69c5f9f205037a1234a7c4cd3658e076d267bf", "8d91b5a1de4fb877829b4b29702a5efd7d9e470f", "0c6e3ce37880d5766e6b340513bbfb5738737e5f", "2cea911044b0b9dc2cee2e2b04915b9aab22f86f", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "00c3b08c4e1dbfa080b6d3c422fa0da0131a743c", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "3371781698dbd3d3e78477af7528530024b828f8", "520f2bb3565ab01a28c35f5c7e506bbbef71ed79", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "2a88cb605d1fbc7dfa15aae9041c69bf03be85a1", "4e2f89e15cf80cc36dc9f211899480f22b5dcb32" ], "paperAbstract": "An unambiguous and easy-to-understand memory consistency model is crucial for ensuring correct synchronization and guiding future design of heterogeneous systems. In a widely adopted approach, the memory model guarantees sequential consistency (SC) as long as programmers obey certain rules. The popular data-race-free-0 (DRF0) model exemplifies this SC-centric approach by requiring programmers to avoid data races. Recent industry models, however, have extended such SC-centric models to incorporate relaxed atomics. These extensions can improve performance, but are difficult to specify formally and use correctly. This work addresses the impact of relaxed atomics on consistency models for heterogeneous systems in two ways. First, we introduce a new model, Data-Race-Free-Relaxed (DRFrlx), that extends DRF0 to provide SC-centric semantics for the common use cases of relaxed atomics. Second, we evaluate the performance of relaxed atomics in CPU-GPU systems for these use cases. We find mixed results -- for most cases, relaxed atomics provide only a small benefit in execution time, but for some cases, they help significantly (e.g., up to 51% for DRFrlx over DRF0).", "pdfUrls": [ "http://rsim.cs.illinois.edu/Pubs/17-ISCA-RAts.pdf", "http://rsim.cs.illinois.edu/Talks/17-isca-sinclair-rats-pitch.pdf", "http://doi.acm.org/10.1145/3079856.3080206", "http://rsim.cs.illinois.edu/Talks/17-isca-sinclair-rats.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d09a33fc88ffb35ef35b84d104c1cadc5802cb1", "sources": [ "DBLP" ], "title": "Chasing Away RAts: Semantics and evaluation for relaxed atomics on heterogeneous systems", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0d0b89fa3caa4b403a5dca4e6ea02cba82e8d293": { "authors": [ { "ids": [ "1801923" ], "name": "Irina Calciu" }, { "ids": [ "30721371" ], "name": "Siddhartha Sen" }, { "ids": [ "2590030" ], "name": "Mahesh Balakrishnan" }, { "ids": [ "2138789" ], "name": "Marcos K. Aguilera" } ], "doi": "10.1145/3037697.3037721", "doiUrl": "https://doi.org/10.1145/3037697.3037721", "entities": [ "Algorithm", "Computer data storage", "Concurrency (computer science)", "Concurrent data structure", "Data structure", "Distributed computing", "In-memory database", "Linearizability", "Noise reduction", "Non-blocking algorithm", "Non-uniform memory access", "Programmer", "Redis", "Shared memory", "Software bug", "Uniform memory access" ], "id": "0d0b89fa3caa4b403a5dca4e6ea02cba82e8d293", "inCitations": [ "a6ca37aeeef5911e4f36b904088479bea999cc81", "a46a66cf04959f0e66fe24d32a63632aff5c5ef9", "cb2a018979184f87692d423322e367cc42a215d2" ], "journalName": "", "journalPages": "207-221", "journalVolume": "", "outCitations": [ "413ebb1202367f3d389988e53ae4a584ddf574d6", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "363b85f61630ebdc1194a59816ad950bf305c40a", "045729ec838ecc50be166fe4511506ac4a08226d", "92f30431f89e0c1e9c44e362db24e90aa5d6f57a", "47f6c873364d8ba0228230465323a7e545435c27", "df51574fa8c5f92d09c46f2c6ccd815a26746df5", "0acea38881e694cfb89d2f8e30c5ddb64a5c6fa4", "4a418603a5820524987bf82085dcc162fb7f9f2c", "784dd6dbdf59896a42f134ab0bede3329030380c", "158ebe313a72857c5534a313f3ec0e413593b732", "045a975c1753724b3a0780673ee92b37b9827be6", "691793d665e8d86f95cafc619961fe97ce85a1ec", "42142c121b2dbe48d55e81c2ce198a5639645030", "07fb0b41b575e3e32e60c52fae30ab56a763d414", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "1de5ac65303d92b91f6b822ef992a9717c5c2d2e", "463bec3d0298e96e3702e071e241e3898f76eff2", "34d69cfa80f46b61e38386a5e7301869f5d085ca", "21e51da40ab080ca2b71ad36094e2b686008b6cc", "0b82470bb9cd233bff6228d3d1b484024b9f9c3b", "1b50fca5a5a8ab867779422da818e90266b3727e", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "7730f057ceda3ab75d50ebf0ee10942938b4f8df", "06001968f5a93a1bb94f7573edffb9a7126ff009", "04f020a4ab2134db6f9e98eadf216d94d440414a", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "c2450948d6049003b660018c98caa92f52c64eb5", "4ab775b9811a8b9f0ff24fa06b535986149e51e3", "1697056663684522a89fbc838ac03512122f0ea8", "d12d1289d2384c2ce642f01855637b9f0519e189", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "30df50d77ef9478a2848626dfe3bf65f3c991991", "29a1148d75878671dc3663bf480e33d7bd91597d", "2925f77705061ef35bb8b37f4c7cd324da344d1b", "cbe1e69cab8e56f8b93e6ff2dfbe86adea693c13", "6f9058b5175aee958e330527aeb55074702dbfd4", "0b9a4c5ca3530089edcd7e9ac2c718d2317718e3", "2b8de17d3a163489ef7d0814c9033a853b0725cf", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "517e239f97f50079bc557cccf1a6b56aa5736d30", "514a5c15e8cf3f681febecad954a4508d9189c99", "042f443418ff2ff98a1dccbf49df9fa258dab707", "09ed565e84057123c15ab12b885c235d1f241aed", "1cb0679ae82be093268747da0f634281ea6a41df", "54f5ef35bf00f18f742a71b49bd6831322c3a1c5", "8f92ae9d629ff3c9cc2f7821c05b750c7e24688b", "04067c73b6eeb60f6c2d18fce49f9c595adebd11", "51b67af3659501a8f6e6ca5c13e45b3b2b814cab", "4e3304e77dd2fecea4086e132981d1470434cf65", "9872bf81d8559bfb5fcf4dc65674afba98dec470", "c6d00dff21d0d4ca277bb2f0a0f5c3ceba5108b6", "03416be8097852a54dd3e309434e5a0806824646", "9aa0d7253574e50fe3a190ccd924433f048997dd" ], "paperAbstract": "High-performance servers are Non-Uniform Memory Access (NUMA) machines. To fully leverage these machines, programmers need efficient concurrent data structures that are aware of the NUMA performance artifacts. We propose Node Replication (NR), a black-box approach to obtaining such data structures. NR takes an arbitrary sequential data structure and automatically transforms it into a NUMA-aware concurrent data structure satisfying linearizability. Using NR requires no expertise in concurrent data structure design, and the result is free of concurrency bugs. NR draws ideas from two disciplines: shared-memory algorithms and distributed systems. Briefly, NR implements a NUMA-aware shared log, and then uses the log to replicate data structures consistently across NUMA nodes. NR is best suited for contended data structures, where it can outperform lock-free algorithms by 3.1x, and lock-based solutions by 30x. To show the benefits of NR to a real application, we apply NR to the data structures of Redis, an in-memory storage system. The result outperforms other methods by up to 14x. The cost of NR is additional memory for its log and replicas.", "pdfUrls": [ "http://cs.brown.edu/people/irina/slides/asplos2017_slides.pdf", "https://cs.brown.edu/people/irina/papers/asplos2017-final.pdf", "https://doi.org/10.4230/LIPIcs.DISC.2017.45", "https://cs.brown.edu/people/irina/slides/asplos2017_slides.pdf", "http://cs.brown.edu/people/irina/papers/asplos2017-final.pdf", "http://doi.acm.org/10.1145/3037697.3037721" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d0b89fa3caa4b403a5dca4e6ea02cba82e8d293", "sources": [ "DBLP" ], "title": "Black-box Concurrent Data Structures for NUMA Architectures", "venue": "ASPLOS", "year": 2017 }, "0d1199cec94a61b379ccb90d3b0238a1b31ddade": { "authors": [ { "ids": [ "1699192" ], "name": "Angela Bonifati" }, { "ids": [ "10757249" ], "name": "Ugo Comignani" }, { "ids": [ "1718479" ], "name": "Emmanuel Coquery" }, { "ids": [ "1721642" ], "name": "Romuald Thion" } ], "doi": "10.1145/3035918.3064028", "doiUrl": "https://doi.org/10.1145/3035918.3064028", "entities": [ "Data curation", "Interaction", "Refinement (computing)", "Requirement" ], "id": "0d1199cec94a61b379ccb90d3b0238a1b31ddade", "inCitations": [ "69f53559815f4645ec3e358b40e611ad3ad36a8c", "b8a0b3852627b083ba9922702838cd09782b09d1" ], "journalName": "", "journalPages": "667-682", "journalVolume": "", "outCitations": [ "13cbb8aa7d957987f76fcd3cc0ccb8ae623fdcb4", "585681b8a7f941ab6bf5b34fa6437fc75f38966b" ], "paperAbstract": "While schema mapping specification is a cumbersome task for data curation specialists, it becomes unfeasible for non-expert users, who are unacquainted with the semantics and languages of the involved transformations.\n In this paper, we present an interactive framework for schema mapping specification suited for non-expert users. The underlying key intuition is to leverage a few exemplar tuples to infer the underlying mappings and iterate the inference process via simple user interactions under the form of boolean queries on the validity of the initial exemplar tuples. The approaches available so far are mainly assuming pairs of complete universal data examples, which can be solely provided by data curation experts, or are limited to poorly expressive mappings.\n We present several exploration strategies of the space of all possible mappings that satisfy arbitrary user exemplar tuples. Along the exploration, we challenge the user to retain the mappings that fit the user's requirements at best and to dynamically prune the exploration space, thus reducing the number of user interactions. We prove that after the refinement process, the obtained mappings are correct. We present an extensive experimental analysis devoted to measure the feasibility of our interactive mapping strategies and the inherent quality of the obtained mappings.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064028", "https://liris.cnrs.fr/~ucomigna/files/poster-sigmod17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d1199cec94a61b379ccb90d3b0238a1b31ddade", "sources": [ "DBLP" ], "title": "Interactive Mapping Specification with Exemplar Tuples", "venue": "SIGMOD Conference", "year": 2017 }, "0d57c1222fd2bbf9842fff83ade3efa8b978c690": { "authors": [ { "ids": [ "1863675" ], "name": "Neville Grech" }, { "ids": [ "2726878" ], "name": "Yannis Smaragdakis" } ], "doi": "10.1145/3133926", "doiUrl": "https://doi.org/10.1145/3133926", "entities": [ "Algorithm", "Android", "Application programming interface", "Application security", "Data-flow analysis", "Facebook Messenger", "High- and low-level", "Information flow", "Information flow (information theory)", "Initial condition", "Java", "Pointer analysis", "Principal component analysis", "Program analysis", "Sanitization (classified information)", "Static program analysis", "Taint checking" ], "id": "0d57c1222fd2bbf9842fff83ade3efa8b978c690", "inCitations": [ "2638b231d41f04df54b5383b7e18f832522459d3" ], "journalName": "PACMPL", "journalPages": "102:1-102:28", "journalVolume": "1", "outCitations": [ "080f1f7a903ba3d77f0f21a3a89bd2db0d958e46", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "2554485ffdb8473262ce0cfde401cfdc5b85f3fe", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "0e7f5980e4083c12011be3783bd23e788e6b2ad2", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "e8af823e0b25acfc8e41b59805e17d9d1b126990", "28f538ccabcc9edbbf27d0f0a027031615936e42", "75a15fa4d5cb039170b494d5b3d63001d335199c", "03aacfe8d36a673ecc379d3b76e7df1245a8d9e5", "141727ec87084aea5e0d5bffaa63b4fc2d9bf478", "3597bcdb6f9eb154abb80c15368d67ef169bfacf", "29f56a7f34879033bc6ecf52e03099fb55277e0d", "5fbf739032dd548c1ff189e7333f05e215906a1b", "a6526df1d9b18fd3542fad7fdd95e93a5edce909", "042396ba29d59a083366154c29aab7a28dccac37", "00303793a087dda1f7a7cf42e2f32082a991354a", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "54943f66a73914cb84609cd25660f0f192db6459", "a8d7d8ccec9865c0fb4dfce74bdffdd09e3689c0", "a578530c785b14f54918720ee4acb672ffe3986e", "66c6e8f672a04d2b885a2fc4d43088b1abc18bae" ], "paperAbstract": "Static information-flow analysis (especially taint-analysis) is a key technique in software security, computing where sensitive or untrusted data can propagate in a program. Points-to analysis is a fundamental static program analysis, computing what abstract objects a program expression may point to. In this work, we propose a deep unification of information-flow and points-to analysis. We observe that information-flow analysis is not a mere high-level client of points-to information, but it is indeed identical to points-to analysis on artificial abstract objects that represent different information sources. The very same algorithm can compute, simultaneously, two interlinked but separate results (points-to and information-flow values) with changes only to its initial conditions. \nThe benefits of such a unification are manifold. We can use existing points-to analysis implementations, with virtually no modification (only minor additions of extra logic for sanitization) to compute information flow concepts, such as value tainting. The algorithmic enhancements of points-to analysis (e.g., different flavors of context sensitivity) can be applied transparently to information-flow analysis. Heavy engineering work on points-to analysis (e.g., handling of the reflection API for Java) applies to information-flow analysis without extra effort. We demonstrate the benefits in a realistic implementation that leverages the Doop points-to analysis framework (including its context-sensitivity and reflection analysis features) to provide an information-flow analysis with excellent precision (over 91%) and recall (over 99%) for standard Java information-flow benchmarks. \nThe analysis comfortably scales to large, real-world Android applications, analyzing the Facebook Messenger app with more than 55K classes in under 7 hours.", "pdfUrls": [ "http://yanniss.github.io/ptaint-oopsla17.pdf", "http://yanniss.github.io/ptaint-oopsla17-prelim.pdf", "http://doi.acm.org/10.1145/3133926" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d57c1222fd2bbf9842fff83ade3efa8b978c690", "sources": [ "DBLP" ], "title": "P/Taint: unified points-to and taint analysis", "venue": "PACMPL", "year": 2017 }, "0d5caaeb3f0a03bd396c556b9f422781b39dc5bb": { "authors": [ { "ids": [ "8817631" ], "name": "Ios Kotsogiannis" }, { "ids": [ "2357165" ], "name": "Ashwin Machanavajjhala" }, { "ids": [ "2103203" ], "name": "Michael Hay" }, { "ids": [ "1729605" ], "name": "Gerome Miklau" } ], "doi": "10.1145/3035918.3035945", "doiUrl": "https://doi.org/10.1145/3035918.3035945", "entities": [ "Algorithm", "Algorithm Selection", "Data dependency", "Differential privacy", "End-to-end principle", "Metaheuristic", "Naive Bayes classifier", "Naivety", "PYTHIA", "Range query (data structures)" ], "id": "0d5caaeb3f0a03bd396c556b9f422781b39dc5bb", "inCitations": [ "15c34de38e69935fd8c4b5147708d88a5f3c8552", "0c5eb2bcc294db127645324e26f6ddbe25b38db1", "25b6e649699a4121cd629828f8f7595d58a6fd0f", "531c3177ad2804ea1358da5bc68737fca3bc9a71", "ec250679861ba116b89cecd27a52364eb818ba73" ], "journalName": "", "journalPages": "1323-1337", "journalVolume": "", "outCitations": [ "47f8fc00b01f76c652da39173be4f5b4eb62757b", "0e12b1a390a8e6108cc8500acfadcb1f3409e535", "2dbb9570eca417eb62f349e3322366c730e4357b", "009d284fe935b5f421d24321073097a0cd34e21f", "1cacac4f0ea9fdff3cd88c151c94115a9fddcf33", "04ce064505b1635583fa0d9cc07cac7e9ea993cc", "167343175b3a8965585b45707f6b8a217ec327ad", "1050926bf7fc494f56f332bfbc0dee494b2ab8ff", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "63b88452574095639ef9a1f692eef3c1ec386b0a", "0fba3766c7d613da8f35a2872f728c0c9e081092", "7d093d143a149b3bcefd1727b05305a5c42f5580", "0b9286a010bee710e74362a35f96dd1c6fee0fdb", "1cd8cd2bcdae06d72c7da16091f1c525221a58e8", "12b5772dcd12ff9f2f7ec553a09c6f3cdd2091bb", "56c56187cdaa03372298fb6ad1dc51dba7b3499b", "17fac85921a6538161b30665f55991f7c7e0f940", "0ab8aa62424d49562964f7fe5119fc1a16104538", "188b3b5ffc060110a650eebe33ee911a8e93f553", "9509f45ebc129bd68ea94d55d90fee410afb8143", "ca89b599a814440ecf4ee36039d28ead6484e767", "0ce4e33a66a354b431fd9ca0cad65f2e528c1f11" ], "paperAbstract": "Differential privacy has emerged as a preferred standard for ensuring privacy in analysis tasks on sensitive datasets. Recent algorithms have allowed for significantly lower error by adapting to properties of the input data. These so-called data-dependent algorithms have different error rates for different inputs. There is now a complex and growing landscape of algorithms without a clear winner that can offer low error over all datasets. As a result, the best possible error rates are not attainable in practice, because the data curator cannot know which algorithm to select prior to actually running the algorithm.\n We address this challenge by proposing a novel meta-algorithm designed to relieve the data curator of the burden of algorithm selection. It works by learning (from non-sensitive data) the association between dataset properties and the best-performing algorithm. The meta-algorithm is deployed by first testing the input for low-sensitivity properties and then using the results to select a good algorithm. The result is an end-to-end differentially private system: Pythia, which we show offers improvements over using any single algorithm alone. We empirically demonstrate the benefit of Pythia for the tasks of releasing histograms, answering 1- and 2-dimensional range queries, as well as for constructing private Naive Bayes classifiers.", "pdfUrls": [ "https://users.cs.duke.edu/~iosk/pubs/pythia-SIGMOD2017.pdf", "http://people.cs.umass.edu/~miklau/assets/pubs/dp/Ios17Pythia.pdf", "http://doi.acm.org/10.1145/3035918.3035945" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d5caaeb3f0a03bd396c556b9f422781b39dc5bb", "sources": [ "DBLP" ], "title": "Pythia: Data Dependent Differentially Private Algorithm Selection", "venue": "SIGMOD Conference", "year": 2017 }, "0d8609d9ded6c15f627dd54250a6845a2b3a2ec8": { "authors": [ { "ids": [ "1751411" ], "name": "Virendra J. Marathe" }, { "ids": [ "1745942" ], "name": "Margo I. Seltzer" }, { "ids": [ "1907610" ], "name": "Steve Byan" }, { "ids": [ "1695968" ], "name": "Timothy L. Harris" } ], "doi": "", "doiUrl": "", "entities": [ "Byte", "Dynamic random-access memory", "Experience", "Key-value database", "Legacy code", "Memcached", "Multitier architecture", "Persistence (computer science)", "Persistent memory" ], "id": "0d8609d9ded6c15f627dd54250a6845a2b3a2ec8", "inCitations": [ "c8d937e3abc6c78b0d7358a1231280904428d946", "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "85280912d6234acfa2f239d3c41d706662c63d8e", "33918269fc2cfc235ae68ef11934b9dc375eaa39", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "8bfadfde21e1385c7dbceccd54d124fc437b3721", "3af216f371069b57c0dca5448384d052fb490fb4", "71c2deb5c3b4b0fd1ed68bdda534ec7ea76e845b", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0322c80c8b69d4efb256482165da7e230752ef43", "47b851237f240831abee3971bca6bb8d2a121eb1", "339632faa043d4697570fc4fe48a52d007c3cf06", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "57c823b3b07b98233394bf15cfbbaed6a84809df", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "94783d113951822195d4ba44599a8fcbdef9d4bf", "3438bc1d97adf57c8f0283ab52d9085cd0e27268", "000c82d2682b1117359db3db4e738b600b211c78" ], "paperAbstract": "We report our experience building and evaluating pmemcached, a version of memcached ported to byteaddressable persistent memory. Persistent memory is expected to not only improve overall performance of applications\u2019 persistence tier, but also vastly reduce the \u201cwarm up\u201d time needed for applications after a restart. We decided to test this hypothesis on memcached, a popular key-value store. We took the extreme view of persisting memcached\u2019s entire state, resulting in a virtually instantaneous warm up phase. Since memcached is already optimized for DRAM, we expected our port to be a straightforward engineering effort. However, the effort turned out to be surprisingly complex during which we encountered several non-trivial problems that challenged the boundaries of memcached\u2019s architecture. We detail these experiences and corresponding lessons learned.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-marathe-060717.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-marathe.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_marathe.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/marathe" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4e89/3400c15cb2a11b1b8d1d7a7f0f21a5995360.pdf", "s2Url": "https://semanticscholar.org/paper/0d8609d9ded6c15f627dd54250a6845a2b3a2ec8", "sources": [ "DBLP" ], "title": "Persistent Memcached: Bringing Legacy Code to Byte-Addressable Persistent Memory", "venue": "HotStorage", "year": 2017 }, "0d939c3826455ca42310a92d5c00a956c4630b0e": { "authors": [ { "ids": [ "2589502" ], "name": "Ben Gras" }, { "ids": [ "2072347" ], "name": "Kaveh Razavi" }, { "ids": [ "2194658" ], "name": "Erik Bosman" }, { "ids": [ "33900578" ], "name": "Herbert Box" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "", "doiUrl": "", "entities": [ "ARM architecture", "Address space", "Address space layout randomization", "CPU cache", "Cache (computing)", "Central processing unit", "Countermeasure (computer)", "High- and low-level", "JavaScript", "Linux", "Memory corruption", "Memory management", "Memory management unit", "Operating system", "Page table", "Requirement", "Sandbox (computer security)", "Vulnerability (computing)" ], "id": "0d939c3826455ca42310a92d5c00a956c4630b0e", "inCitations": [ "051961468e3e7a3855eaff8ac9ec35e0235a4a38", "1aea7174b6e709d09570738cdfe4720b841e0398", "79473986fe994d4aeb9d662e0b8e572758a4511b", "c3407b18b527c1bce4188f9309b1e03e3e10ccc5", "6ff08854494ec866510cbb23fb0e18c1f977007e", "0d47d32743e29f20249b076af0b4091bae630ec1", "b532c625457aede2f11ef0eae40de38e4b5a8ab6", "151caa8e687fbdeeef71723ca4eabbc07d6fa272", "2a7056e53f29bc73471048a77b0c55ea4e92b897", "4b1635be240c4f2d3fa01a416e61726d660839e8", "73519ef57ae48827a27398659df04a08095aa701", "9afe4b008fa2e867d49369ddbb2f073368d14f5a", "5394541bffc1715962c9e1e7f5bbeb85a5322d68", "65c302fc5eedfb33824ef18879eb53cc0327ea41", "570552045c4d26d37e87aaa35fe79f8b412974f8", "e766cb4ebdaaadb6e1d4c9022bedbc4100f91506", "b181777c520457db8a0c555b970cdb349d22e559", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "48b48faab8baa3f6877a9fe08540697c0c0b52f3", "b49d8c0d1d6dea24b41b39b58cf276c2f078fa1c", "831950908fcc713595a6b64b80c0aea60072da24" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2d4ef2f1ceeaba4acc46dec6c48dc18deb9ddb5f" ], "paperAbstract": "Address space layout randomization (ASLR) is an important first line of defense against memory corruption attacks and a building block for many modern countermeasures. Existing attacks against ASLR rely on software vulnerabilities and/or on repeated (and detectable) memory probing. In this paper, we show that neither is a hard requirement and that ASLR is fundamentally insecure on modern cachebased architectures, making ASLR and caching conflicting requirements (ASLR\u2295Cache, or simply AnC). To support this claim, we describe a new EVICT+TIME cache attack on the virtual address translation performed by the memory management unit (MMU) of modern processors. Our AnC attack relies on the property that the MMU\u2019s page-table walks result in caching page-table pages in the shared last-level cache (LLC). As a result, an attacker can derandomize virtual addresses of a victim\u2019s code and data by locating the cache lines that store the page-table entries used for address translation. Relying only on basic memory accesses allows AnC to be implemented in JavaScript without any specific instructions or software features. We show our JavaScript implementation can break code and heap ASLR in two major browsers running on the latest Linux operating system with 28 bits of entropy in 150 seconds. We further verify that the AnC attack is applicable to every modern architecture that we tried, including Intel, ARM and AMD. Mitigating this attack without naively disabling caches is hard, since it targets the low-level operations of the MMU. We conclude that ASLR is fundamentally flawed in sandboxed environments such as JavaScript and future defenses should not rely on randomized virtual addresses as a building block.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/aslrcache-practical-cache-attacks-mmu/", "https://down.dsg.cs.tcd.ie/cs7053/materials/anc_ndss17.pdf", "http://www.cs.vu.nl/~giuffrida/papers/anc-ndss-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0d93/9c3826455ca42310a92d5c00a956c4630b0e.pdf", "s2Url": "https://semanticscholar.org/paper/0d939c3826455ca42310a92d5c00a956c4630b0e", "sources": [ "DBLP" ], "title": "ASLR on the Line: Practical Cache Attacks on the MMU", "venue": "NDSS", "year": 2017 }, "0d9ce32eb9ca08750bf03d01ac290e260bc4d127": { "authors": [ { "ids": [ "7235469" ], "name": "Nirmal Prajapati" }, { "ids": [ "33776892" ], "name": "Waruna Ranasinghe" }, { "ids": [ "1747659" ], "name": "Sanjay V. Rajopadhye" }, { "ids": [ "2709194" ], "name": "Rumen Andonov" }, { "ids": [ "2882782" ], "name": "Hristo Djidjev" }, { "ids": [ "1756389" ], "name": "Tobias Grosser" } ], "doi": "10.1145/3018743.3018744", "doiUrl": "https://doi.org/10.1145/3018743.3018744", "entities": [ "Compiler", "Computation", "Data-intensive computing", "General-purpose computing on graphics processing units", "Mean squared error", "Memory map", "Parallel computing", "Run time (program lifecycle phase)", "Self-tuning", "Simple set", "Stencil code", "Tiling window manager" ], "id": "0d9ce32eb9ca08750bf03d01ac290e260bc4d127", "inCitations": [ "86de1e4181293760c9a16e223ad0dd2bf7056b10", "c951d7feaf6746132b0c0529cf7999f2e4450432", "334a7df472934478731c92f15052f451a1b0747e" ], "journalName": "", "journalPages": "163-177", "journalVolume": "", "outCitations": [ "0e12eb94aab5d64d08baacf0df36a4b7ed054c46", "06b25d569ee60fc4f973a468b091e1b0c0dc8cde", "9a7e961710b01b3e78b32f9696881fd57730439a", "1262176518bb210bd46f120d3782f1677af180cd", "b6b9d3ba92727fe9d031725a2f1e4d91032024b8", "a08bfd0a5187b34c0462d8982acca9f4860f0096", "0fb7ff960d8f65448b3c6e85613ae8334f93d93e", "8c27ed23bf974cfb87e93143ffc0556d1e04fb55", "f4dff66ba8f2338d118f379f2eff1410feb57ce6", "2fa3ca4c08365b0eb987c3abb0b4ad08971153f5", "0b9aaee517e0cabb274f5d7cfb01d8f58e51d76e", "16de6f9e2bf6ee1068dbca8c9e5446295c904315", "6d732e9edd7d06d70ce4e4f22cfb66aa243911d0", "4961ed921db492418491635c9127802fc3b0b44a", "38f5bd7a79437b97df9a174e10e89ab25d2578aa", "482f5e72c0a245a285ef198861e191fae73de481", "ab1644fddcdff1577d0ecbf5dd2c7c9a289ae3f6", "dae60807ef1e6fd61a2362c8187b733b08121e1e", "1253bd15364f5217bf55ded63f95dacff4f12264", "90c8a482ff463c02575888193dc06661d3dfdd98", "1deeb53a514d9a54ad690626c5199bb0d117f9a2", "03880f1d3faedb37aa51deab3b70a98b939dba28", "618fa1c14e3c6008eb047002311d21cae412eebc", "01729490bfe1ac39333772773ce42895b13a7348", "034ac8d8421866427c830764c4a10b0889a853db", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "791370da29ba96d355c2fad1ecd06b8e709f8755", "4d603c30e3db8f8a0a17c1fec57b9ad1fa957c1d", "267d498cdf011cf320d4250f366df3e026ac8294", "a695fdd90865835234ac5e062c1d1a6ae1f34632", "8dc2184214ee39b31e2c0d623842b66c0141984b", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "53aa3fe2ea1237a7f5f305ed1c2d6f5a1915bda4", "782d8591afd432a9b2bfe21553a4158a39cb9d1f", "493123382a634df9976191338a05445342d3e9c6", "1bec73cbc0ffb8eb32d6da63895f5319b7409386", "3087eeb39c88b1fc9bdc72812930451fc98cedec", "3c31999730ef19007df71909f1ae5223825e0ec9", "19e6866714631cf6104d2ba6e72ff8e7a074df51", "def34f422d6930bd23d5c58de78be98804e44e97", "0fc8566746dd67dcc30daa12ecf35460ee500833", "45bee4755852adc0c888c458645f83520bdf5f00", "4a2d7bf9937793a648a43c93029353ade10e64da", "36b832bbad91bb74e1808be466e5cc1b98f0d326", "30d69fe1a8f9c4c9fd4e0648411bdcad6e395e7e", "57f635f67fa7a1f742bb1c1f1da3e400c954440f", "26c86262a4f9bd7d26c3cf143908c55acbc35fb7" ], "paperAbstract": "Stencil computations are an important class of compute and data intensive programs that occur widely in scientific and engineeringapplications. A number of tools use sophisticated tiling, parallelization, and memory mapping strategies, and generate code that relies on vendor-supplied compilers. This code has a number of parameters, such as tile sizes, that are then tuned via empirical exploration. % such as auto-tuning.\n We develop a model that guides such a choice. Our model is a simple set of analytical functions that predict the execution time of the generated code. It is deliberately optimistic, since %we seek to model only the effect of %tile sizes and, moreover, the optimistic assumptions are intended to enable we are targeting modeling and parameter selections yielding highly tuned codes.\n We experimentally validate the model on a number of 2D and 3D stencil codes, and show that the root mean square error in the execution time is less than 10\\% for the subset of the codes that achieve performance within 20\\% of the best. Furthermore, %by analytically exploring the model predictions with a simple %script, based on using our model, we are able to predict tile sizes that achieve a further improvement of 9\\% on average.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018744" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0d9ce32eb9ca08750bf03d01ac290e260bc4d127", "sources": [ "DBLP" ], "title": "Simple, Accurate, Analytical Time Modeling and Optimal Tile Size Selection for GPGPU Stencils", "venue": "PPOPP", "year": 2017 }, "0da73832dee2c9b3d4c0d039d8e714e6ff098e40": { "authors": [ { "ids": [ "1869737" ], "name": "Matias Bj\u00f8rling" }, { "ids": [ "1763726" ], "name": "Javier Gonz\u00e1lez" }, { "ids": [ "1748177" ], "name": "Philippe Bonnet" } ], "doi": "", "doiUrl": "", "entities": [ "Database engine", "High- and low-level", "Input/output", "Linux", "Linux", "Open-channel SSD", "Parallel computing", "Principle of abstraction", "Solid-state drive", "Spatial variability" ], "id": "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "inCitations": [ "44c216b53c1f5a7091618c6b7ba9a32a35323dad", "a906694ac9739f9cb8547b494dddd006b5365a92", "0bd0819ff873acc26567ebf9a9cb73fea59d6cdd", "40f196e21a289394c4354961116587b8accba45e", "05a1bad1ef2341339e18d636d78594226d4ee8e6", "1858ed4ca900d9afd06d0b8a8430d0dda8f957bc", "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "3de30c8dafc720bf066e5e3a005d16212dd31149", "b8c87f3c5411557e7a21008bbb5db7485f98dbd0", "0f4386d4a521e36cb15252b4e908a948a65252ef", "19c0716139727ab7377bc73f394c9b99c86db16b", "4d1a62de587f05084e85a4168f960af1e48b9697", "c49feb5f91c8ba846eb2e90edf1b01c62a25c8d5", "4fa7ade25b7bd22ea3357da0516833a318cc72fc", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041", "ac924589f32d23c0eebe2173a77e1cc732f351b9", "0ae03e097cd936f564a60017b864beeb12635b09", "614bdb9fce7c3088050520fc769376722eebe8e2", "226ca798b529c13605a2aa7fe75d58f4188f850a" ], "journalName": "", "journalPages": "359-374", "journalVolume": "", "outCitations": [ "9b90568faad1fd394737b79503571b7f5f0b2f4b", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0cd52827be79113f01e2408411e95fa371c52728", "2dd70a51d787c1cb6e7854e32bd00c9050bfcac5", "d67adb456a315aee244babf4f20e318cc14d13f3", "0bba65fd5ac1db9a3293e9ebcfba092cf4ae58ee", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "2e46f9074bd81ea4ec29ecec7e0231c16fb2e8db", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "05a1bad1ef2341339e18d636d78594226d4ee8e6", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "72722e7602138e3896e5576d3f3ef730e7b7c4b4", "eb6d964a0d7d51533dc27e7905c309e13a0fea54", "84564d347d505467dd628e56319bc037b0a1ec28", "d58cc242fd70227cff98376a914e0b42b1b79db8", "356955d0f190829b7481b8dc39c5f90dfac1b652", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "1820a34042d6371a9e20484b0c63b698eb522a6c", "70ce10f47aafa0994627a9575565b5c98af58d98", "131e1e1d163a0f49881d7b5ac092892093391015", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "acd5b766ce2f210a0351059d2dc29977f5d8abf0", "1be96030c042ff6b5bbe05bf0fd86f5f9a4d27dc", "491715b493fd7780da43249d9f29f4b00b2d535d", "5a04b332441e2ff025313bfd303383e13050a274", "719aeeaff7353058a152b4eb3ff77a193624a481", "05dd6cb44124b8a210ac391f15ec25e68918ef22", "199ac28b6bc68bf05c77645ffae7640df114bca5", "08ca74190711c5cc1f0e51a4db805187a5c85a56", "5aafae8b45a5dc17f589ad6cc74510f657d60cd6", "27cb0c2229299a82cf767d19dcc68aa1e5f0f233", "12484231f130f2c3d3d8c3bec33ed2830f100b11", "663798bc529bb73f2b3ca8640bb4fcbd83ce5c31", "800f30aa6b7107b085727ebbb98eacf944657b55", "5271d6693ba950c389921ccc21110664f25a83db" ], "paperAbstract": "As Solid-State Drives (SSDs) become commonplace in data-centers and storage arrays, there is a growing demand for predictable latency. Traditional SSDs, serving block I/Os, fail to meet this demand. They offer a high-level of abstraction at the cost of unpredictable performance and suboptimal resource utilization. We propose that SSD management trade-offs should be handled through Open-Channel SSDs, a new class of SSDs, that give hosts control over their internals. We present our experience building LightNVM, the Linux Open-Channel SSD subsystem. We introduce a new Physical Page Address I/O interface that exposes SSD parallelism and storage media characteristics. LightNVM integrates into traditional storage stacks, while also enabling storage engines to take advantage of the new I/O interface. Our experimental results demonstrate that LightNVM has modest host overhead, that it can be tuned to limit read latency variability and that it can be customized to achieve predictable I/O latencies.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/bjorling", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_bjorling.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-bjorling.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-bjorling.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final62.pdf", "http://platformlab.stanford.edu/Seminar%20Talks/Matias_Bj_rling___Javier_Gonzales.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_bjorling.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0da7/3832dee2c9b3d4c0d039d8e714e6ff098e40.pdf", "s2Url": "https://semanticscholar.org/paper/0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "sources": [ "DBLP" ], "title": "LightNVM: The Linux Open-Channel SSD Subsystem", "venue": "FAST", "year": 2017 }, "0db289e3a13a457c3600a61a1a24784e23d81840": { "authors": [ { "ids": [ "2181199" ], "name": "Arka Rai Choudhuri" }, { "ids": [ "38436509" ], "name": "Matthew Green" }, { "ids": [ "1753595" ], "name": "Abhishek Jain" }, { "ids": [ "3452395" ], "name": "Gabriel Kaptchuk" }, { "ids": [ "2679804" ], "name": "Ian Miers" } ], "doi": "10.1145/3133956.3134092", "doiUrl": "https://doi.org/10.1145/3133956.3134092", "entities": [ "Certificate Transparency", "Computation", "Encryption", "Fairness measure", "Hardware restriction", "Secure multi-party computation" ], "id": "0db289e3a13a457c3600a61a1a24784e23d81840", "inCitations": [], "journalName": "", "journalPages": "719-728", "journalVolume": "", "outCitations": [ "90f836fc73b7e7e0b89a7dcfd6b9bfcb98890762", "1cc513e49e4a6672676763dd0c0a8d352a49ece5", "202b3e185b8c370160ecea9f99409c854b63549c", "4a29060831fdf21fb61ed6a8bfba062945a3d5e4", "ab39ac59408ad25186d0c1854ba7cf0e0ca69c36", "5fcb9c0d1052aa2b3252924be369c07f5113ccd3", "4d0c1efd1e397d7b26a89bb5bc25565af026e603", "723455474fc1e953bc7fad1169d52f178584af4e", "0aced25b2fca1cf1061edfd6dc688fa1829929b2", "0462cb34483ffe57d3b43e3d1ef10ea9ff7f422d", "cbef8f2802cc2b0bc60c66cfa3f465d47e9e86c4", "a153a6d4e793c9c8fe85d90e512a95605d0747cb", "0a7ac809f44823efb63d715e1e008d073310ee81", "452124bceed09e813ab5fc7f68bd5ffe970a823d", "1c14549f7ba7d6a000d79a7d12255eb11113e6fa", "febd8e6cb5d6dbef48492fcfe05388f3c888084a", "4701faa37e2c5ba5d1810b9b39db28bbcf4a230e", "00fc269c340357a2f12ff9901954d66f78cc105c", "7ffad11c1916d3e2d0e6e3ccb4699490b2bb3b9a", "5989535096f4af55d9442569975423b0168a81de", "f7e61d0a7e72b31ff38d79d654fb132f06d7e4ab", "5b298ac2c0735142b1b365ef2edfb3aa03f9eb01", "d79bad93aa04746a1d32344e16e5b1327b6a51c7", "00c7edb35648f7c7dca5f73ebafc39ab07209768", "14cafb1eb6c2c4bcc9cf340d6c5ef496d296c4d5", "72d21259eaacf42c397b191ea99a0d07b9ac6d18", "042789e36b2856874db425b58994ea480e983e87", "20be11764b7668d4c9bd6998abc2340a00cf5170", "122770d0c40e3dc339b7e149dd3c38856cb39d79", "2f7c97e82641e4f0f8c7b508b75af4952b0cc07d", "f20aa9cd4360af73d3d7d7ba5a1b7257f73fafa8", "19c3736da5116e0e80a64db35afe421663c4b4a8", "000bca48bd73bf8336937a638b2c758955d4b04c", "6c15a1a25d4d103ed251d82c95ff4f0b38866a06", "7e35ce39632b91f21b110601f7b8bb04b2c957b6", "f0cc73be34d8d02c9ca0577fed6e981403e594d6", "33853565b4dcad38b9b79091a48d3f40409f06d7", "edb90d14217b3e5ca9b5e1190b23112026b60237", "9e759181f4f3ff0883f3cf663cd7d3f5444f8ab2", "38edb4e1366d76e1260703bf2dc9c832e86b1dd2", "b0794750449323d6b635f97f6a40bfc29f10b299", "f2c4398e489bed6cd2ac00492c762f6b112aa7bc", "3bd9e5872c28921d45146caa2c541457d977dc39", "0677d17466f47dc8ef5fb89221ff3007c6196c33", "0ab7bd43a18cb6419407334ea2a439a3667ef7e3", "08f8fbc075c0c0822115315bcffa54964b0599a7", "d987feebe58c6e315cca4249dc63c1c576b452cf", "92f1b71eb2ed519a1ec3daaa7be367bac0295421", "d5c3d39d364078c1cbf2eed206a3cccf9bbb33a8", "0d5f7a1825bae713cebd66d121d5b01e31d8adab", "2424ef60c204af84ecbb5daf6862c19c180aea0a", "33148623fc14ea5735e73dd716d030ab17118299", "68b8390a6a2ef18c156f612986b3e8dca9714004" ], "paperAbstract": "Secure multiparty computation allows mutually distrusting parties to compute a function on their private inputs such that nothing but the function output is revealed. Achieving fairness --- that all parties learn the output or no one does -- is a long studied problem with known impossibility results in the standard model if a majority of parties are dishonest. We present a new model for achieving fairness in MPC against dishonest majority by using public bulletin boards implemented via existing infrastructure such as blockchains or Google's certificate transparency logs. We present both theoretical and practical constructions using either witness encryption or trusted hardware (such as Intel SGX). Unlike previous works that either penalize an aborting party or achieve weaker notions such as $\\Delta$-fairness, we achieve complete fairness using existing infrastructure.", "pdfUrls": [ "https://eprint.iacr.org/2017/1091.pdf", "http://doi.acm.org/10.1145/3133956.3134092", "http://eprint.iacr.org/2017/1091", "http://cs.jhu.edu/~imiers/pdfs/fairness.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0db289e3a13a457c3600a61a1a24784e23d81840", "sources": [ "DBLP" ], "title": "Fairness in an Unfair World: Fair Multiparty Computation from public Bulletin Boards", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "0db63083b01483674cade02e1966824d4f9e483c": { "authors": [ { "ids": [ "1922545" ], "name": "Shervin Hajiamini" }, { "ids": [ "1765372" ], "name": "Behrooz Shirazi" }, { "ids": [ "33360863" ], "name": "Chris Cain" }, { "ids": [ "40454458" ], "name": "Hongbo Dong" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.64", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.64", "entities": [ "Algorithm", "Dynamic voltage scaling", "Frequency scaling", "Integer programming", "Job shop scheduling", "Linear programming", "Makespan", "Multi-core processor", "Optimization problem", "Program optimization", "Run time (program lifecycle phase)", "Scheduling (computing)", "Symmetric multiprocessing" ], "id": "0db63083b01483674cade02e1966824d4f9e483c", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "490-497", "journalVolume": "", "outCitations": [ "9c154444ab777e83db6d3093af52261c1ff15154", "31e4e1bb57bd04bc02d7f7110e66122a215d7bfa", "72a57529869316ce74568de9458dda9ab54c1700", "a9a744a382bcc0fb2d8c643c5ea7ba8926cb77c1", "abcdac2fa21a005809f0ab2391a722c2a205dd11", "bbbe1ce1a11cc28250fe0106bab44b915bb81a8e", "031d644fbe3455768776e84f126b7b6d79da0f86", "4bf882a6964e1469654aa6db6622476a8ccaf045", "a268cfd17743e03bafacae479c793a7f3c1b3e61", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "422554854e439fd99d4f07c80f304d7d91a6943d", "352a8957005dc5519b15ed1870751ec494d66395", "2db9bcd369e59837278be7e6ffb4c4a96b24fc35", "642a0be3980dc7b54c09b61332639426ffad70a9", "15860f9f774f19f245f016d9cf479222e4f9a6ba", "3cf2bcab12160c888d6ed17124cb54628a9eea4f", "7477ba88a6ecb151c61e755cf608736367c6f4d3", "37bd5728e359535460dd7bf03bc265848f8f0249", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "642f72cdee8f3e9a5275e47cad844e1c54b57b83", "00ccbe8b4e5691d3ef9aa190d9b9cdf85b2266a0", "b2abc27230a97177f7dd660035d56d9fbfca87fa", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "35c8505c3090902d943dfdf616be07f29a876149", "83712692555c62db3b2bb91ca33f25fb71826b4f", "4e78cb65f6f7f65e10d3b2848249f4f727a88f6a", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "67a869fd3acf787899a86a7626dd0128316a502d" ], "paperAbstract": "Energy efficiency is considered a challenging problem in modern multicore systems. Partitioning the cores into multiple voltage and frequency islands (VFI) provides a compromise between simple global Dynamic Voltage Frequency Scaling (DVFS) and fine-grain per-core, per-task DVFS. This paper formulates the optimization problem of scheduling tasks statically on multiple VFIs as a Mixed Integer Linear Programming (MILP) such that for a given energy budget, the program execution time (makespan) is minimized. Our proposed solution consists of two steps. In the first step, we use an Integer Linear Programming (ILP)-based algorithm, from our previous work, to assign per-core fine-grain dynamic Voltage/Frequency (V/F) levels to each task in a task set (program) to minimize the makespan for a given energy budget. In the second step, which is the focus of this paper, we use the MILP framework to schedule this task set, with the given V/F levels provided in step one, on the islands of a VFI-enabled multicore system to again minimize the makespan subject to (1) the energy budget and (2) the task set's precedence (dependency) constraints. Together with the solutions obtained by MILP, a round-robin algorithm is used to compare these two methodologies to ILP that provides the best solution. Our experimental results show that across all the benchmarks considered, the MILP-based and round-robin makespan solutions are on average 1.2 and 2.28 times slower than the ILP-based makespan solutions, respectively.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.64" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0db63083b01483674cade02e1966824d4f9e483c", "sources": [ "DBLP" ], "title": "Optimal Energy-Aware Scheduling in VFI-enabled Multicore Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "0de06bf7929ce2190e04ea9e41980cff85c24ed4": { "authors": [ { "ids": [ "4353261" ], "name": "Hung Dang" }, { "ids": [ "34647008" ], "name": "Yue Huang" }, { "ids": [ "1781271" ], "name": "Ee-Chien Chang" } ], "doi": "10.1145/3133956.3133978", "doiUrl": "https://doi.org/10.1145/3133956.3133978", "entities": [ "Adversary (cryptography)", "Denial-of-service attack", "Evasion (network security)", "Malware", "Morphing", "Portable Document Format" ], "id": "0de06bf7929ce2190e04ea9e41980cff85c24ed4", "inCitations": [ "28c8b8714a1c072e49d0ffed7efa2e54f423b185", "1bc646bde191b28b16bee8043a226195c82b5d8e", "9da084097986f30897f3722febc70631fe2251d7", "19cf04f2f9727989cc802ec40fe4b61a8223fe5c", "9bec5e3292a6ca7cea5fb37a7f6719b1149b2bb0" ], "journalName": "", "journalPages": "119-133", "journalVolume": "", "outCitations": [ "7cdf1c29cb63423c9638dd4f5620956b3fe80d11", "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "4850442c8db48500ada13a060c4d1584a575de81", "595a00f0975b5d5c28d904ddba1ae5a493316573", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "66c75a5833b96e03884387d96e798b6b6c6498c2", "029181c7dab10fb756821ff7efcfa32cca282ef4", "b90dd2f366988d9bb76399d4137c1768fe460c8f", "39a651ace163e7741bc98e266201afe83ad63219", "14ce7635ff18318e7094417d0f92acbec6669f1c", "bc0eda5093cdfceeba46988bc9b3c7925764c1fe", "88a88808808bd9cbd95c120f0b3fef94ea888e44", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "4f6487d61ba6c2afa44be0e870599bb292e27638", "716e1fea03d45e61e94c49853e999eb70b275a04", "5e4fa9397c18062b970910f8ee168d3297cf098f", "7f57e9939560562727344c1c987416285ef76cda", "289e3e6b84982eb65aea8e3a64f2f6916c98e87e", "45f6957cab31e802934cc761380c1a4a37c66208", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "3ba179bceb9692d4d21109d0b87b120195761148", "3c8bf504ddc7db1829466b6e9da5251025dd48f1", "0992ef63a94c4b9dfc05f96b3a144c1e7237c539", "1b90ee5c846aafe7feb38b439a3e8fa212757899", "1780f4fc05d87356e923a75a8ab3ff4ce79b9fb0", "2cf3fd84f30e5cae30dd46a3d7ecc0d63583b1a6" ], "paperAbstract": "Learning-based systems have been shown to be vulnerable to evasion through adversarial data manipulation. These attacks have been studied under assumptions that the adversary has certain knowledge of either the target model internals, its training dataset or at least classification scores it assigns to input samples. In this paper, we investigate a much more constrained and realistic attack scenario wherein the target classifier is minimally exposed to the adversary, revealing only its final classification decision (e.g., reject or accept an input sample). Moreover, the adversary can only manipulate malicious samples using a blackbox morpher. That is, the adversary has to evade the targeted classifier by morphing malicious samples \"in the dark\". We present a scoring mechanism that can assign a real-value score which reflects evasion progress to each sample based on the limited information available. Leveraging on such scoring mechanism, we propose an evasion method -- EvadeHC? and evaluate it against two PDF malware detectors, namely PDFRate and Hidost. The experimental evaluation demonstrates that the proposed evasion attacks are effective, attaining 100% evasion rate on the evaluation dataset. Interestingly, EvadeHC outperforms the known classifier evasion techniques that operate based on classification scores output by the classifiers. Although our evaluations are conducted on PDF malware classifiers, the proposed approaches are domain agnostic and are of wider application to other learning-based systems.", "pdfUrls": [ "https://arxiv.org/pdf/1705.07535v3.pdf", "https://arxiv.org/pdf/1705.07535v2.pdf", "http://www.comp.nus.edu.sg/~changec/publications/2017_CCS_EvasionInTheDark_Slides.pdf", "http://www.comp.nus.edu.sg/~hungdang/papers/EvadeHC.pdf", "http://doi.acm.org/10.1145/3133956.3133978", "https://arxiv.org/pdf/1705.07535v1.pdf", "https://acmccs.github.io/papers/p119-dangA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0de06bf7929ce2190e04ea9e41980cff85c24ed4", "sources": [ "DBLP" ], "title": "Evading Classifiers by Morphing in the Dark", "venue": "CCS", "year": 2017 }, "0e02aaad36501e7930d126512e8f4a5c1c50323a": { "authors": [ { "ids": [ "2576333" ], "name": "Yipeng Huang" }, { "ids": [ "2219669" ], "name": "Ning Guo" }, { "ids": [ "2045759" ], "name": "Mingoo Seok" }, { "ids": [ "3246033" ], "name": "Yannis P. Tsividis" }, { "ids": [ "2161149" ], "name": "Kyle T. Mandli" }, { "ids": [ "1738240" ], "name": "Simha Sethumadhavan" } ], "doi": "10.1145/3123939.3124550", "doiUrl": "https://doi.org/10.1145/3123939.3124550", "entities": [ "Analog-to-digital converter", "Baseline (configuration management)", "Computation", "Computer", "Computer architecture", "Embedded system", "Graphics processing unit", "Model of computation", "Nonlinear system", "Supercomputer" ], "id": "0e02aaad36501e7930d126512e8f4a5c1c50323a", "inCitations": [], "journalName": "", "journalPages": "665-678", "journalVolume": "", "outCitations": [ "a213b244778e310bc4b27cbd021f964258b4c7a7", "2f46240e23c698a5aa735c2f1d6e21ba6f6e878c", "20ea0b6f17797962690caa2688f26bcc88aa0ec9", "ef098e0154da4b210a6ee11b84ca30bd3e445ac6", "2e6107926b33fae805509286912a83e6947cd6f0", "132f9123d155a61f816e34cc2598ae00e03d63a9", "3520e748e6f91b4f3d1a522775936238a733368a", "3bf8f8df338ba6cd9c10d1abdbbaf1987611ce97", "26f2d0249608e84b0c9d79a93d2f19c312b80f65", "9d893aec03f5cdb03e7a544c7336dd3eb2f2d54c", "55020020393b92f37629e063cab66ee2f60521a7", "b4024358c2fc4d46d9d5e542ad3e212b0d6ca16c", "8c42c6be2875fa34895feb99162bce499d459a2a", "41c445a27d1d3f28c6f5d019e69908c261c188c7", "ccce1abe109e4710ec729be835b6995c536dd6e6", "35202c61eae2719585f63898ab85e9427e1b221a", "0892a4477f02af4a9b47e456627497435e5d8159", "025e652e87d246c132277a32000d3efd8f5ccebd", "5fa4efa6c9a439172f83f0618e05e9e4ad9852b7", "54cff4a359e716d97d00ff642bfde75102117f5d", "05082179bc5d9803c49d05aa4c41a272b07d26d9", "f59ab2d004f31a20de5c0c4062f2f6f51a2cbf9e", "2de051acd0bf2908b4e7bcf4a986640c3d296035", "062347e9cc253c4f7859fef74e1a8f846d5d2079", "68c261bb88499a0ea8e6939d45f87abdbc4417fc" ], "paperAbstract": "We tackle the important problem class of solving nonlinear partial differential equations. While nonlinear PDEs are typically solved in high-performance supercomputers, they are increasingly used in graphics and embedded systems, where efficiency is important.\n We use a hybrid analog-digital computer architecture to solve nonlinear PDEs that draws on the strengths of each model of computation and avoids their weaknesses. A weakness of digital methods for solving nonlinear PDEs is they may not converge unless a good initial guess is used to seed the solution. A weakness of analog is it cannot produce high accuracy results. In our hybrid method we seed the digital solver with a high-quality guess from the analog side.\n With a physically prototyped analog accelerator, we use this hybrid analog-digital method to solve the two-dimensional viscous Burgers' equation ---an important and representative PDE. For large grid sizes and nonlinear problem parameters, the hybrid method reduces the solution time by 5.7×, and reduces energy consumption by 11.6×, compared to a baseline solver running on a GPU.", "pdfUrls": [ "http://yipenghuang.com/wp-content/uploads/2017/11/micro50_7A-1_lightning.pdf", "http://doi.acm.org/10.1145/3123939.3124550", "http://yipenghuang.com/wp-content/uploads/2017/11/micro50_7A-1_v6.pdf", "http://www.cs.columbia.edu/~simha/preprint_micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e02aaad36501e7930d126512e8f4a5c1c50323a", "sources": [ "DBLP" ], "title": "Hybrid analog-digital solution of nonlinear partial differential equations", "venue": "MICRO", "year": 2017 }, "0e03189871cd303b3438743f90232514dfa7885e": { "authors": [ { "ids": [ "2483738" ], "name": "Nicholas Carlini" }, { "ids": [ "4019963" ], "name": "David A. Wagner" } ], "doi": "10.1109/SP.2017.49", "doiUrl": "https://doi.org/10.1109/SP.2017.49", "entities": [ "Algorithm", "Artificial neural network", "Benchmark (computing)", "Defensive programming", "Machine learning", "Neural Networks" ], "id": "0e03189871cd303b3438743f90232514dfa7885e", "inCitations": [ "679f1322ae8c37bb67ca6bdea2ac6ac91f290d70", "5ce1cdd95b3977e66a5c22fb6cab577a8a65597d", "1c16dd87e39472f94c8510ddc7d6954dc6981bcd", "116c1176505f1ff8d55a6f4ea50f11dfcf37bd29", "044ef9a2b3f12a36cf4c01ff45b57fe6b414f2d9", "d1a9dec7c3d77985a3aed2c3238f65c988056642", "3f706be0c11eca0f0ee6b61da700aa8c066bbcc2", "1f9370a2ebf0d99188c23bbc9b61b338ac22846d", "3ce11ca731f7838ec6225e3f9388bbc3ccb2983a", "039df729edbc7c20085fda50599241ea626d20f0", "2b85b35a76b6c26a3fd8e661aed768d4c8782b06", "5317dca5a6f96cbad7451332d92061e8241d861b", "9bec5e3292a6ca7cea5fb37a7f6719b1149b2bb0", "fdede788af28db590b39d8185e272caf40ea835a", "4a6025ac9fa969846ab0ee32a6d8792734383105", "7b5e12f7784f8d5cecd3f2bd73c35860de2b21f8", "13094dc5bfaa3d176a0551e59619e6818fa59210", "c76490d34b9e5cf8b4cacfce9ae848b11fcd5bfc", "bee68f8950f392359f415ee63d92e6323a416b36", "14006f116b8d1ac3fb45f2095be12f7685176a2a", "19e6af20cedce8e3f8dc566032df883b101bdb96", "20bb637f67098d0f7630148cfc7c4d87310f6955", "23f97a0a6ce0c54b024213e200315be1ba391932", "405b6ff2ea2ec9a7c7d6b18ac951dc778892ffcf", "9123089825cad35ab5a9bc45452d67fb722be529", "831fbef657cc5e1bbf298ce6aad6b62f00a5b5d9", "406c17deef0b7ac7675845bb311c0adaf9202404", "2ba29b7c43f0d822ebf11e24b5f51a1dccf45903", "3d07040ea8f18b2472a18d862059d4f5dbfe6566", "398342181a3109fc24e567855211dc1428e65cd4", "58d190282ed59639b16e726a3237938b53976077", "087c8cf9e42d5f0d80002b381371f4dd2e0bdcd7", "1d0672e0c9ca0e3b15785746f20657c94bfa1bb8", "22ced17bb531703d3d96076b7c46c0a94691bb4d", "5bf93a7e9565af14e6f54819c660a9261816051a", "29176632807b17bf3da444713763b4b2b568306c", "69092affc3461a38eb05cf7982f104eb30b0492c", "73ab260c777b39f10d620cb93a7e49722165b26f", "219762f87743fd2740642f49b0b6e0e240f76985", "dd4d247b1b80cf28a7c6893697c99d19d36aabf2", "7a8ee83bf0c5d0e4b021cf1e64a93850daf9bd97", "f9a378543a8e3f6312152ebd3e6804c14be6d891", "437292c9b1f084a1c8c18db3e9204c8e2a9c00bd", "3476198004655a6d67c72259d8367eeebda73b2b", "1beea702eecb426474794c43faf1364463ab0ec0", "7781ce5bb1b53533d2060aefaf8ddb95a6c77316", "83a8235a231540e07743d67c9f127c2bee4389ae", "0a77313fa10a864e14f538c73d417d7b4d6f320e", "1d6e99556c987acef9fa03fff85f314b4ee8a74b", "c9ac183b932ab24256241d089e12d46daa2d8f12", "17d786aad51247107819d1f26c7f5bbcd0504603", "3758d7401ab1b835642bce811c0df6cb73fea20a", "638b7157cfff9937f0b98f631755cc05bda245d5", "15b4017d6f295accd02adc04494e854c9cf4434d", "581b0ca77a9560086fb90e883ccec1fb3a9dcdfc", "3cde0cea6d562761d0df848901fedc2f0f84a426", "98d98a37f9f11bd069baed4543e14c6ff701b1f1", "cd9a80dfb4b51840e6d69859a50c406da0c811cd", "b74c39dc11ed0fa62c0b4e4e4428267da413f589", "19c53c3ddf90c6ede05a6ac670083e238ba4589f", "3d0a8a8e01625fd2c668364c1ee31f3dc9098f39", "6e094447fccaa7594802c7f09b8ccc3dd24da137", "6096fe05f84f6913ead43529cb67ad678f772c2f", "759deb18024f38be0df71216b6e9010cf9729d1b", "7a6c9d71376311cdcfdd270121fbe6665881a7f8", "65ec7ffc0b33095479fd11bc4a01636fb6f2e97a", "15285d8ae6d2fef3dfecaeacbf5a246bfc7b3137", "00ff024e0800db79a8f7c661b47357fc98e6c0af", "5096ad277bbeea661dbc7ad9ceff79c41499dd14", "7916d7db7682b85b34ca1139b3781826340ffeca", "4fe1f34b078237996ff07f1c789659a584c4916c", "ea0eaaece0f4e0c3760d87850f65fb42df980c3c", "f1d49cd7244347cd1d7117fb931e580bf1789416", "686862f77abb9b4bc908a0e3d4a512a4a8732f8f", "7e6d508d7e9d3876311a5a399c30371c00e3548f", "2f0f8aa941fb64016a08a96eb38b4546b30a953d", "4b8f3fbcff1d32713ed4e3b5f6959c345fc82632", "3b36f0537390185553c767a25daca5b169e216a2", "29f712156b5c216fe00c2ec8fa115bdfcce6bbf3", "3ba76b1830a219399a6aed2d1c765b7c086da139", "b1369e4785dd0b23f89ca76f45468049c8667863", "136dee73f203df2f4831994bf4f0c0a4ad2e764e", "8cabab2e5bbc41734b7c55e33e87679ac43734c2", "170839d31da1f85b66bd08e055213bac126b2a22", "f621237cf2dbb4c94d70d75bc7f2ff48e2a327a9", "4115569538e2d71aa96389b01aa5ca1b8b30f8dd", "17f054bdd81032dcd1aa4e578b4b134c3d3dd889", "64042f1dba6b2fa1e85ed953057d67c5052e1106", "abf38db4775bec89c950013030d8eda56a89d32a", "9512857bf71969944a0b5387c6fad05e6170637e", "0b88fe8123b2e8059379bc43799745f3f7e97b5e", "71f62bec86cc0e1623d5ede64f1507f3c018230a", "72448a01870af329c8fe4123d0200c9091acddca", "f0e13128c14a405b084bd68275d461763889d052", "cd8313e6398b73b79040a4041a115f73efe34093", "0c379fa640e35c3b40b0930f8f9d5e8dbfe733b6", "0b1d353fdc864c9d32f57d6f1fc5073d07d9bd36", "40329662ab9f4c85b87967e42fb43bfca4ef1960", "22684e1fcecd742c246c50788095c591a23d1f5b", "a4111806db049fb8043e166a4567b843a0d38b76", "3f6f0114328df8b666cda25ac15188ba68b143cd", "6a49f5bc43686fa18cde261a1f298bd00216c6f8", "2fd8622bc357218f109374ce4bb25253cd700632", "b1cb101a38986f08a4fab90be02458ea6a5cfa4b", "f6a9cc2d748d9b662e7390e197b658e421a0a5cc", "1a0efa30f418f4909c14189edc7fa258b9bd908a", "48a1e2fca50e060ddabc9bc10f4fd2ee1b29066a", "a91fd02ed2231ead51078e3e1f055d8be7828d02", "f820c05024b75959199dcfba59ec6cfc7f162994", "6bc565939f5ff4d96cbfe502dd5fa539098d309a", "417b553d3863111cd1be284297e0924a07a9690b", "8c56911a73dfdf47dc79f727a6b6bbef3175b9f1", "48185257697b84c4ebfb137b44cf2e2bce182174", "1d65848c563b2c3a7f0153551c1b39e0e5c2d776", "f16d1c0dbff12aa9c05feae542cca7878e625b51", "6080a2066f3dab693db190f92133a4e1fd4d70a9", "5b5bab838ad9ef33232839cd67c3c85dec4de070", "50c0c4ecd55d7a194616fe9c8a28ffe66961c2d5", "1180a22488c285ada5b05d67a7fa7dd6b70b03a5", "b6e557f31ce8aca4408d117fe5ba72385fc8993d", "da6682635670b5b482d5b1f38e5882df2dea2758", "2b5e0b133c5a4b954e9a67e9e96a74d83f42f430", "abc136123ffdb1db6996735933a5c79771785544", "dd4af8debc5c777e6556021b0dad976719c737f6", "66d5ec7a71a8b92d0c9563edda94ca62d39f96be", "16f1ca5344ad3dc907c6214371e3d7c8ab1b07d1", "8f1dc314680ab0e2b780c093546395b499bb2b67", "350e50ad35934fd23cff6f027fd819871cd517c2", "614b543ddb648b3ca763def133c26d04ae2211c9", "6f61d15a31d6d051aeee3bf6d1482d332e68ebfe", "e34dd1ef0d4f382fbd4c6514e4dbc660538c0971", "acdb935d9f319750fc5bd54b393d5f76b21348b2", "c16375406e802bcef0f2de7e83a36866946e4fbe", "507cb37140affd3ccc6c1758e7d26a177f95c323", "0d3c439f89feaa8cc037220c75b298c64385251a", "1e77822d88d1064317d0e5d229b536820cc8df81", "7c883c6df6341e165287172b26bc41afff8f8ca1", "60f869d07a3ec9bf1395ba1288e822b9a7744f34", "02f7c16433dc046a815898f14d5e348176032d27", "dd215b777c1c251b61ebee99592250f44073d4c0", "7e17e21c3e48e5432c38d6a9f635f9708357d273", "c2e71feef89ffc06bfeae874d4368da50b2809c5", "8ca3cc2fff426f6b50546acbc8aa6b42246083c6", "e2078762c023dbddbcd02f17ef81d951633c023a", "0380ce7ef64c8e0aebf71cbecb130cf58a3a3367", "cf18432bb77bf41377c477b5aaab9abd0f1f306c", "1d8592a0202e16508061359440da42b520ccb53c", "f6bbc5d68546a455f7c09628c79a8e1cfc8e0147", "77a2dc85614422642afefe932c519a097d2d85e1", "b96c024ecc80f7cba4a9ca7cc7c546b0ada044ba", "543183b8f62ffacfb9c30e066dbd7ea7c9584bf7", "06696f14fbadd98365697fde0992d922e05d383e", "4deba3f41da26653f7764813af261b85d8d88b78", "1f003a9ed5a7032a78b42d122bee6755f17fc363", "0235c5db5f0fae76b32b35178179497c7cdff23f", "55cc1b26e3c718f04288811ac11fc0945c105eac", "99c920590861f54081e9a9613cfa57a4b1353909", "8e4808e71c9b9f852dc9558d7ef41566639137f3", "5563d7e6e5ee659bf26fa25e04d38c8d1a56e204", "57126589b3fe62c35a36a2646dac3045d095ecf5", "956272153ce970d99d182d99919c7c471cf48166", "6a6ae773b813f17b5bfdb97bfd3e59be5fd8a48e", "c79e0a859ec05b263a927aa8ceda5ddda661a27d", "680135b9d916ca9502c09fa5a2b3bb9710b9e387", "9da084097986f30897f3722febc70631fe2251d7", "a0c90e89d81469d5ab9ed93af5a020a94fa05188", "d658ee1f44669f4b8e572f3f1943983d7036c57d", "08c588465b7d801ad912ef3e9107fa511ea0e403", "1f70bbe8099daea2adccf4e9120e453fa935eefd", "c5f71486c16add42c3394edb41b8c064b0123824", "1d6bc45c31c17f5091eec3def813cc2cd26d811e", "a5756e5338521b42d95cf770ace182a5694c473f", "6b327af674145a34597986ec60f2a49cff7ed155", "49a5d855f91c6ec6d1724a200d33e92c41f73480", "1bd207db64b5a8a232861f36f20222a58b4983ef", "541c238a830698dc9a50aac220fe130bdb550752", "582ecf1e446f365d138998804fc3a434cd45a568", "21dc8ebc3b8373c233e66031dead3ed5a0024a5f", "04d2fe52b97ad769974650b76e47fb50842fed8f", "1c71e653f86b06eb7d5b1d92694f34e6f57173de", "831bdd1a8fdde9a042e959c78534c86671e79515", "1db3d317c45f5b3ad4a83fb64cd97e26f6057591", "32326e1d8cfb0e25c61d7139817316956cbb896e", "c37711212d217df50ad3490e67d8b36c17168877", "533892babde5b8390b3a02336c9a6a293378eb1d", "70f646b21115a896300d2ae5a1decf0cce5cdb82", "edce5208900a2702eee5b2452781909a4c8aa77a", "0cb0fb5fcc28d84932ac18a1541ceaec02406851", "c897893ad1fef253934be14d7266a5a02c40dee2", "4a8c332b09bb99333a8bce6a4640a20c1352aa63", "917c97c4d59bb756c466b63864f8a65844a2fdef", "1c72f9147124df2b29e172d7a31f459c47be443b", "f09db8a42d713774483f023b31e0ee96361823f4", "ec2df1a2b46279bfd658746c9ab0dcdcbca3177c", "b3e04836a8f1a1efda32d15296ff9435ab8afd86", "ae6a59df62e9bcd8ac1214cd3642477950d9d4fb", "d9716a34853188061ee5365d84677bfae635229d", "723e6b654459db5f99a531ec54363bc7f25878ba", "3d834d562a21421af9f06442c732de16ddd6dd0e", "6d6a791ba039b94f2898f867032ef73740c86c45", "2289742adfed1e321aa5397614a22591f4f216a1", "2b677cb0cdc2ec8174da3cd21410436fb04451f5", "5944e6b53fcf52d37ee72dccab84a006403c08f7", "0bf7cf1ef83f9ff4cc1b839a74a4833480998d87", "15e61bcdfea5d0a98496f444bf521aefeefad7cb", "6f78d7798cb5407bc998870618cc769383ef51eb", "a2d19828c435a48aaa0b9c2a08112f6a023b2df9", "f2cf24b7107f8b8061c9b0f28a716e246f3ea5ea", "3bb4418fc88303fc225e98f3da7e31b4dc4cfc41", "0e6613c4d17a4854bf5753561a54b607e3453f0b" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "39-57", "journalVolume": "", "outCitations": [ "169e0f340ed880b0c2d288bc8f3c8753fe7b0cfb", "35d659691fa0ae84733235b7cbce3bde4b510288", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "402da07a0ac4645e26370ff5ac8ab3540257a8ab", "35ee0d59bf0e38e73c87ba9c0feead0ed164c193", "24529bffa95f07c01ccf6f02eb4dc9d859430159", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "01fcae344d2edb715bcc63a40b6052c0331741bd", "272216c1f097706721096669d85b2843c23fa77d", "53852d69c008f9ebfb05939b4eb7c1f3279437e6", "49e77b981a0813460e2da2760ff72c522ae49871", "1e90219c8f23912a214e509b56c2945da8947756", "0e3cc46583217ec81e87045a4f9ae3478a008227", "38211dc39e41273c0007889202c69f841e02248a", "0f84a81f431b18a78bd97f59ed4b9d8eda390970", "8d350984ea3ab8bf196cf687ae4076253a3d30ff", "0ff9ea8409c932baf3c0302c89ede79add1431aa", "367f2c63a6f6a10b3b64b8729d601e69337ee3cc", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "74fc396d0b8ec548d600395182f12c9b06cc84e9", "5ce030f1650145a103527e883e7a9d9a25c45547", "04e2f7fe988a6eb3b79cb1a3d03d66e3a0035ae4", "55dda8f230566867acbfaa7bdd08fd8c7b8721ed", "cfd6dc6cf0b4c42d26e6b07b74bb544ae5effa0f", "010719cd94f8fea13b78f998d220499e6174e9c7", "6bdcedb895256357a6bc8ffef5a0790697403372", "9b618fa0cd834f7c4122c8e53539085e06922f8c", "0626908dd710b91aece1a81f4ca0635f23fc47f3", "0e78074a081f2d3a35fbb6f74ba9b7e27e64757b", "0bad381b84f48b28abc1a98f05993c8eb5be747d", "22410d40cc64428cbcd1028bf962dc41eb8a4ea8", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "20f28af7a5f14c994b5c62315f215d95939de18a", "38418928d6d842fe6edadc809f384278d793d610", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "31868290adf1c000c611dfc966b514d5a34e8d23", "4d49385ab9e7285318a480e2bfd3982796df07b7", "83bfdd6a2b28106b9fb66e52832c45f08b828541" ], "paperAbstract": "Neural networks provide state-of-the-art results for most machine learning tasks. Unfortunately, neural networks are vulnerable to adversarial examples: given an input x and any target classification t, it is possible to find a new input x' that is similar to x but classified as t. This makes it difficult to apply neural networks in security-critical areas. Defensive distillation is a recently proposed approach that can take an arbitrary neural network, and increase its robustness, reducing the success rate of current attacks' ability to find adversarial examples from 95% to 0.5%.In this paper, we demonstrate that defensive distillation does not significantly increase the robustness of neural networks by introducing three new attack algorithms that are successful on both distilled and undistilled neural networks with 100% probability. Our attacks are tailored to three distance metrics used previously in the literature, and when compared to previous adversarial example generation algorithms, our attacks are often much more effective (and never worse). Furthermore, we propose using high-confidence adversarial examples in a simple transferability test we show can also be used to break defensive distillation. We hope our attacks will be used as a benchmark in future defense attempts to create neural networks that resist adversarial examples.", "pdfUrls": [ "http://www.umiacs.umd.edu/~tdumitra/courses/ENEE657/Fall17/papers//Carlini17.pdf", "https://doi.org/10.1109/SP.2017.49", "http://arxiv.org/pdf/1608.04644v1.pdf", "https://arxiv.org/pdf/1608.04644v1.pdf", "https://arxiv.org/pdf/1608.04644v2.pdf", "http://arxiv.org/abs/1608.04644", "https://www.ieee-security.org/TC/SP2017/papers/518.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e03189871cd303b3438743f90232514dfa7885e", "sources": [ "DBLP" ], "title": "Towards Evaluating the Robustness of Neural Networks", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "0e0f7fa2de3f757a51e747399d93c570249e72ac": { "authors": [ { "ids": [ "3089873" ], "name": "Ryan Beckett" }, { "ids": [ "37131028" ], "name": "Aarti Gupta" }, { "ids": [ "38981616" ], "name": "Ratul Mahajan" }, { "ids": [ "1832958" ], "name": "David Walker" } ], "doi": "10.1145/3098822.3098834", "doiUrl": "https://doi.org/10.1145/3098822.3098834", "entities": [ "Border Gateway Protocol", "Cloud computing", "Constraint logic programming", "Fault tolerance", "Interaction", "Load balancing (computing)", "Network theory", "Reachability", "Routing", "Synthetic data", "Vulnerability (computing)", "Well-formed formula" ], "id": "0e0f7fa2de3f757a51e747399d93c570249e72ac", "inCitations": [ "c60960f93f657205b0f6247e00c79c97203e9a51", "04e214856e0b24f32bffb209ed803b0ecf546e0a", "659fb2739513bf565c7225ef8b9468a836675260", "8b69d6ecb3b0faab61fba7760e8bdcbc21de48ec", "13e7fc7a16889060740fefe6ff864a6c182e8240", "b74d2874646d36b36eee5c836adf6b29d9173425", "728cb61e78d55bc5039ed78920b20259a1135cb3", "8145aed599257bdf3b49715148974b337762fc93", "88b46e17199bfaa4cf65498bcaeced5284279b97" ], "journalName": "", "journalPages": "155-168", "journalVolume": "", "outCitations": [ "4f21cbaa02e89b7aed6c405232ca9b804ca748cb", "0355a7b4c66e42b73fa3d0d7198ce68b2dbcd5be", "c24809e301b30cb1dcc1da4ee14e4e1f87dd742b", "17059e939aa051d7db57f4af959b2af21fa3dd18", "022d24118024bb533c83376e60a6147c8aefcd0f", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "273de61c65c39e0e55942ea166a473e63ddaa02c", "24e10a0f77ef92eb86d26108ebe725178bc0bf94", "2f708dc8de91f08dba286a5e582e58c6f98e7f2c", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "4c28645e6f959aa2c5ebb6cd78e9b62f22e6fd30", "1b2ca813312e8cd69fcacf9b9692fb4fca8aa1fe", "e5f17a31e60dc67f84f905041b1f49b54317f21e", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "0145518f09c67070799d7a599d1ce00d7ae452cd", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "8b007f9bb907620e2dabda17ae09d8d239e7ff44", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "1b93d79cc923adb402cbdbd9d7eedf88426ec6b4", "36f396b52f93fa52742ce5052a40c1c90ea726e3", "6bf2daa2760a46b6d53c0d0a9776331e8a6d024c", "8f1339e356f8f0f8f088fd80f9fbda7316c64805", "4ad98b30cbee2fc78e50a3d4d149f0847f093956", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "1b2a56eaa107cb5ca31be45885084feec1953316", "ad806d9c69ab834d814865958cd3ded4df4f12f9" ], "paperAbstract": "We present Minesweeper, a tool to verify that a network satisfies a wide range of intended properties such as reachability or isolation among nodes, waypointing, black holes, bounded path length, load-balancing, functional equivalence of two routers, and fault-tolerance. Minesweeper translates network configuration files into a logical formula that captures the stable states to which the network forwarding will converge as a result of interactions between routing protocols such as OSPF, BGP and static routes. It then combines the formula with constraints that describe the intended property. If the combined formula is satisfiable, there exists a stable state of the network in which the property does not hold. Otherwise, no stable state (if any) violates the property. We used Minesweeper to check four properties of 152 real networks from a large cloud provider. We found 120 violations, some of which are potentially serious security vulnerabilities. We also evaluated Minesweeper on synthetic benchmarks, and found that it can verify rich properties for networks with hundreds of routers in under five minutes. This performance is due to a suite of model-slicing and hoisting optimizations that we developed, which reduce runtime by over 460x for large networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098834", "http://scholar.princeton.edu/sites/default/files/rbeckett/files/paper.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/08/NetConfig_SIGCOMM-2017.pdf", "https://www.cs.princeton.edu/~dpw/papers/minesweeper-sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e0f7fa2de3f757a51e747399d93c570249e72ac", "sources": [ "DBLP" ], "title": "A General Approach to Network Configuration Verification", "venue": "SIGCOMM", "year": 2017 }, "0e10e1485f71c9936afa6b40544466045b7bcc55": { "authors": [ { "ids": [ "37413060" ], "name": "Pedro A. R. S. Costa" }, { "ids": [ "2084010" ], "name": "Fernando M. V. Ramos" }, { "ids": [ "40003160" ], "name": "Miguel Correia" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Apache Hadoop", "Baseline (configuration management)", "Cloud computing", "Computation", "Correctness (computer science)", "MapReduce", "Scalability" ], "id": "0e10e1485f71c9936afa6b40544466045b7bcc55", "inCitations": [ "ab8194494e1257c939c2bce0761157a10abbe327" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "421-430", "journalVolume": "", "outCitations": [ "282f88f3e89813bba06aa0b23987955b987e9af3", "3487f9de0d3a81f0de7f45fefba714a67903b7f0", "2000c8bc2f5bbf1f2a579726d84368d911a20bb0", "4f86fa28602d9503a8575c5b31082284abc8415c", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "332f77fd05703c1607e3b57884ad31fb1fad0104", "0293e8c58298d73d5864a35ba1af3ae063e7353c", "6713ee6b1c9ba14e525f12958898e99eeb3003bc", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "b510fc913adc1bc248329bd26f0e7656adfb0e7c", "d7cc546ca2a893a85de7804e4165d8d9fcbba2a8", "d8d9ab6a72c532cda3905410c704f294f1e5db0f", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "01c2f978c5a22c61e7c2d096c24768f52846c82d", "4520f74dbf413fe6b6480d0f243ee75fba1167a8", "41e71c53ca2a7be0ba90919af8f3049d957e665e", "3562a80722270e427497ea501dea77483b9cb367", "4d8826f4bc8281224ff5dc1779d1d6d21a0366d6", "6902867509928c0e5c19aff3e62e1def3a19d581", "4dc9b6f2a9b44b06b4f4df6a011e77e392d67310", "10ef554cb3f3b3345a86eba975c004cf02691a47", "5396b4833350e232bca368687f0c3cdf7682467f", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "0541d5338adc48276b3b8cd3a141d799e2d40150", "1868e368a747f03e2042ed1e94980a43aa5c3070", "230c55dbfd2ce934acb0c86b4c8addb51ed03bc6", "9a16a82bd087a90fb20b317f433258eb5eee9569" ], "paperAbstract": "MapReduce is a framework for processing large data sets much used in the context of cloud computing. MapReduce implementations like Hadoop can tolerate crashes and file corruptions, but not arbitrary faults. Unfortunately, there is evidence that arbitrary faults do occur and can affect the correctness of MapReduce job executions. Furthermore, many outages of major cloud offerings have been reported, raising concerns about the dependence on a single cloud. In this paper we propose a novel execution system that allows to scale out MapReduce computations to a cloud-of-clouds and tolerate arbitrary faults, malicious faults, and cloud outages. Our system, Chrysaor, is based on a fine-grained replication scheme that tolerates faults at the task level. Our solution has three important properties: it tolerates the above-mentioned classes of faults at reasonable cost, it requires minimal modifications to the users' applications, and it does not involve changes to the Hadoop source code. We performed an extensive evaluation of our system in Amazon EC2, showing that our fine-grained solution is efficient in terms of computation by recovering only faulty tasks. This is achieved without incurring a significant penalty for the baseline case (i.e., without faults) in most workloads.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101170", "http://www.gsd.inesc-id.pt/~mpc/pubs/chrysaor-fine-grained-final-ccgrid2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e10e1485f71c9936afa6b40544466045b7bcc55", "sources": [ "DBLP" ], "title": "Chrysaor: Fine-Grained, Fault-Tolerant Cloud-of-Clouds MapReduce", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "0e26086a5c3f3610382f092cf488744130310e52": { "authors": [ { "ids": [ "2900492" ], "name": "Anselm Busse" }, { "ids": [ "1794629" ], "name": "Reinhardt Karnapke" }, { "ids": [ "1680898" ], "name": "Helge Parzyjegla" } ], "doi": "10.1145/3078468.3078475", "doiUrl": "https://doi.org/10.1145/3078468.3078475", "entities": [ "Address space layout randomization", "Call graph", "High- and low-level", "Region of interest", "Scheduling (computing)", "Side effect (computer science)", "Simulation", "Software development", "Traceability" ], "id": "0e26086a5c3f3610382f092cf488744130310e52", "inCitations": [], "journalName": "", "journalPages": "8:1-8:6", "journalVolume": "", "outCitations": [ "1aa63f896e11d4875a94becd0966de18d0cb3c06", "526ad0efab298489164587fa1ba3ba838bd38ee3", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "19c64da92a8c67c14ddd9cced0d1d0b9ff6b39d1" ], "paperAbstract": "Tracing and profiling low-level kernel functions (e.g. as found in the process scheduler) is a challenging task, though, necessary in both research and production in order to acquire detailed insights and achieve peak performance. Several kernel functions are known to be not traceable because of architectural limitations, whereas tracking other functions causes side effects and skews profiling results.\n In this paper, we present a novel, simulation-based approach to analyze the behavior and performance of kernel functions. Kernel code is executed on a simulated hardware platform avoiding the bias caused by collecting the tracing data within the system under observation. From the flat call trace generated by the simulator, we reconstruct the entire call graph and enrich it with detailed profiling statistics. Specifying regions of interest enables developers to systematically explore the system behavior and identify performance bottlenecks. As case study, we analyze the process scheduler of the Linux kernel. We are interested in quantifying the synchronization overhead caused by a growing number of CPU cores in a custom, semi-partitioned scheduler design. Conventional tracing methods were not able to obtain measurements with the required accuracy and granularity.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078475" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e26086a5c3f3610382f092cf488744130310e52", "sources": [ "DBLP" ], "title": "Simulation-based tracing and profiling for system software development", "venue": "SYSTOR", "year": 2017 }, "0e2baeca09fe10960c3ce9f1d27f11825fa5dba9": { "authors": [ { "ids": [ "1801197" ], "name": "Christopher De Sa" }, { "ids": [ "14465313" ], "name": "Matthew Feldman" }, { "ids": [ "1803218" ], "name": "Christopher R\u00e9" }, { "ids": [ "1746638" ], "name": "Kunle Olukotun" } ], "doi": "10.1145/3079856.3080248", "doiUrl": "https://doi.org/10.1145/3079856.3080248", "entities": [ "Algorithm", "CPU (central processing unit of computer system)", "Cache", "Central processing unit", "Computation", "Conceptualization (information science)", "Field-programmable gate array", "Gradient", "Gradient", "Gradient descent", "Machine learning", "Numerical analysis", "Stochastic gradient descent", "Throughput", "algorithm" ], "id": "0e2baeca09fe10960c3ce9f1d27f11825fa5dba9", "inCitations": [ "4543cb3a066cac7dca6c3547fe56004370dc6653", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "05233cf6194ddee6427f0bb76cb8749cc220d2bb", "2512a6ced085503c399ee512ecaeb88606081261", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "1fa40d8d84cb7a86033395eb9d5070549f41fbba", "2d0410efd86c335c9a45fee4d754614fca4d8547", "b06c83951a9c41a6019a7528ada826f18308be29", "13fa2471ba3da92e473d95c30503b35c96fdb7c5", "65dfe597c74b8e3f8aea6182f8c979bbefa9b005", "acaaf84fbe159a1d730c14e8f110bfd9d46a3d3b", "8ad3c6fa5725885d739daa214bc1dc77e8e2695a" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "561-574", "journalVolume": "", "outCitations": [ "62f41341d9ba292877e9e299d6eb70b5435ee8c8", "00bbbf3af78f80651e9f955209ee72711fa5d412", "b46e127f15b2c1cdc71600d08019f6944fd40434", "3ae4e53bcaa8f949184dfd6118a85c79c01053e7", "db97a0bfcaac723fd468d85b912739d44d167859", "46f74231b9afeb0c290d6d550043c55045284e5f", "52eaf7415eceb8e569173479790e4d1e860b5fc2", "5984d89301db24fe9bf6d45679a996b3f54ec857", "55bc52bbec8972d62874bcbe169dac573b57d1df", "441334210410f1ccc06de90aef2e97896c230d56", "3ad743e436bb8749b7f750e4b316550a9d124bac", "092217c2267f6e0673590aa151d811e579ff7760", "24251f02c34f32b1dd96572a1d984c4463a26a10", "baf4d05f5ae09ba57fdc8d949288c447d44a1495", "0790c77c1eaf2368b55c6a0def09a43690eeb848", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "a514e1eabb627778dedf00409cc6741cd2e51fe5", "1dbc1238409549ae6872a744b7b2ff1da5822053", "d0d2e2924e7258092af15581f90760bfda25f825", "01cca1fc2784f4b0f164ca5703ce793d0042649a", "3439a127e45fb763881f03ef3ec735a1db0e0ccc", "0a4b55a4ba0b60b3862d2d797e3aac0b2cde24a8", "1792758cc7e555ff31a8b8222ac89f409d79a06a", "3dff11679346f5344af1018cad57fa14cc349f2f", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "065e808aa05fe23de00ab4510d1607ddff04c232", "eaa8211f1410869469a06b40e82cfb56ba991857", "b7cf49e30355633af2db19f35189410c8515e91f", "098d5792ffa43e9885f9fc644ffdd7b6a59b0922", "4e171856b5eac3a2bf7ebc1c243d9937b55a09bc", "3bf23f74bf33ed52f7c28587fab315610b27221a", "01fcae344d2edb715bcc63a40b6052c0331741bd", "235fa2b1983eff9f13b27c620cda389359126bf4", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "9ac8ba7b5d78c8a8764a3064e59a0017b440991d", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "8caec7f48fbf19a086e3cf67fce16cf77dff9488", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "3b2697d76f035304bfeb57f6a682224c87645065", "043afbd936c95d0e33c4a391365893bd4102f1a7", "31dac06366b5d964aca7ce12c567369173db8e3a", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "466d8335086c3527f96d9252debf22bac3355f8a", "c7652bad78d1ea4f75f3e593336580fad67d52d1", "5d90f06bb70a0a3dced62413346235c02b1aa086", "73eeeb984a15bb83fac23d276c973312b4bf6dd1" ], "paperAbstract": "Stochastic gradient descent (SGD) is one of the most popular numerical algorithms used in machine learning and other domains. Since this is likely to continue for the foreseeable future, it is important to study techniques that can make it run fast on parallel hardware. In this paper, we provide the first analysis of a technique called Buck-wild! that uses both asynchronous execution and low-precision computation. We introduce the DMGC model, the first conceptualization of the parameter space that exists when implementing low-precision SGD, and show that it provides a way to both classify these algorithms and model their performance. We leverage this insight to propose and analyze techniques to improve the speed of low-precision SGD. First, we propose software optimizations that can increase throughput on existing CPUs by up to 11X. Second, we propose architectural changes, including a new cache technique we call an obstinate cache, that increase throughput beyond the limits of current-generation hardware. We also implement and analyze low-precision SGD on the FPGA, which is a promising alternative to the CPU for future SGD systems.", "pdfUrls": [ "http://stanford.edu/~cdesa/papers/isca2017_buckwild.pdf", "http://doi.acm.org/10.1145/3079856.3080248", "http://stanford.edu/~cdesa/papers/isca_buckwild_submission.pdf" ], "pmid": "29391770v1", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e2baeca09fe10960c3ce9f1d27f11825fa5dba9", "sources": [ "DBLP", "Medline" ], "title": "Understanding and optimizing asynchronous low-precision stochastic gradient descent", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "0e35aaa88a68decc92c3e9322851718bf2139bce": { "authors": [ { "ids": [ "1839879" ], "name": "Janos Szurdi" }, { "ids": [ "2637728" ], "name": "Nicolas Christin" } ], "doi": "10.1145/3131365.3131399", "doiUrl": "https://doi.org/10.1145/3131365.3131399", "entities": [ "ASEA IRB", "Credential", "Document", "Email", "Entity", "Information sensitivity", "Personally identifiable information", "Spamming", "Span and div", "Typosquatting", "Virtual Instrument Software Architecture" ], "id": "0e35aaa88a68decc92c3e9322851718bf2139bce", "inCitations": [], "journalName": "", "journalPages": "419-431", "journalVolume": "", "outCitations": [ "876dae4ee0323da2ae685160e9192cdf605e7299", "8ad5a8e1eacc0f7bc2fc237d8fde5b04a788df47", "d2767a9010ae11b10be2589f4f433174e7aab6e5", "0f743c006c2b5040906ee3729a487684b056a187", "22a78f31395e79cb6c99c3cedd248ecd6568b7f7", "b19c2aa855c247461d9caa48ea94b0b39d650001", "ba9af0bf228cedfad61daa481a71ed433076ab8d", "26bf3484e0cd96a3452c9e601d2d19141001ee46", "649468352e70532e80f68d362bf85fae8277bf22", "16bb76a01c6bdcdc7065ec1a8f356d2baa4db29b", "0a964c5ac7e19cbdc820fd4ee101a5263385733d", "2a9e564d6ed5c689fd5afeb14ed4de76b6da2246", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "f522211ed9336920b49f65d3bfa83b125b2c66e9", "c551c6e002ac4a6633476ffd8d97d90336d6c668" ], "paperAbstract": "While website domain typosquatting is highly annoying for legitimate domain operators, research has found that it relatively rarely presents a great risk to individual users. However, any application (e.g., email, ftp,...) relying on the domain name system for name resolution is equally vulnerable to domain typosquatting, and consequences may be more dire than with website typosquatting.\n This paper presents the first in-depth measurement study of email typosquatting. Working in concert with our IRB, we registered 76 typosquatting domain names to study a wide variety of user mistakes, while minimizing the amount of personal information exposed to us. In the span of over seven months, we received millions of emails at our registered domains. While most of these emails are spam, we infer, from our measurements, that every year, three of our domains should receive approximately 3,585 \"legitimate\" emails meant for somebody else. Worse, we find, by examining a small sample of all emails, that these emails may contain sensitive information (e.g., visa documents or medical records).\n We then project from our measurements that 1,211 typosquatting domains registered by unknown entities receive in the vicinity of 800,000 emails a year. Furthermore, we find that millions of registered typosquatting domains have MX records pointing to only a handful of mail servers. However, a second experiment in which we send \"honey emails\" to typosquatting domains only shows very limited evidence of attempts at credential theft (despite some emails being read), meaning that the threat, for now, appears to remain theoretical.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131399", "https://conferences.sigcomm.org/imc/2017/slides/imc17-presentation-toshare.pdf", "http://www.andrew.cmu.edu/user/nicolasc/publications/Szurdi-IMC17.pdf", "http://www.andrew.cmu.edu/user/nicolasc/publications/Szurdi-IMC17-appendix.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e35aaa88a68decc92c3e9322851718bf2139bce", "sources": [ "DBLP" ], "title": "Email typosquatting", "venue": "IMC", "year": 2017 }, "0e39f2545eabda68cf197ab0a7df163a69e78186": { "authors": [ { "ids": [ "10146361" ], "name": "Sandeep S. Kulkarni" }, { "ids": [ "1688346" ], "name": "Nitin H. Vaidya" } ], "doi": "10.1145/3087801.3087818", "doiUrl": "https://doi.org/10.1145/3087801.3087818", "entities": [ "Algorithm", "Binary logarithm", "Causality", "Computation", "Like button", "Online algorithm", "REPLAY (software)", "Star network", "Vector clock" ], "id": "0e39f2545eabda68cf197ab0a7df163a69e78186", "inCitations": [ "7faeab79cc8414bd032199e0a03c81c3554a79d3" ], "journalName": "", "journalPages": "263-272", "journalVolume": "", "outCitations": [ "223b9e0e1bf2d696458ca0fb7aabb1bb0ea0b639", "8cec8266b4652a1f476a13948a490a43e9e58ba8", "69f9c12bc7d32019f074cfae70e0cdcc9107ad14", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "05a618847e4f08e5bca29dff732757779722b2e0", "7b3533216d5064660458d3754a18fc69f8fbeba0", "8d25dcc6204f2dad9a4b487aa87d60625ef19c71", "54b221c08f2964803cfe5cda092a1a174f368e8f", "ef64751837bf9abce354511666cc9087e6589abd", "1b49f9b1185848548d86d230ff63c318eee744dc", "3029deea2124e2ed5a158b25123f95390747a2de", "929ada99343bf73a1fc0cf734a1a190dcb8595aa", "1230b86d3a777a669bb357bcc40051cea071c6dd", "1f6acadf60a361ae9b03be3634ff0cb6c3c0b3e6", "75f2f29e66715cfd3bcdc2775e899b956db73a8c", "86ed165adcfd254b511ff1bbb912cad65d45f0d6", "d605276ebdf41305ef3e4b65acd3e8f631ce6c78", "cdee1c49685a1e66b040b6c8381ce6e85f643f3a", "9cd9321b82d573447f08d84e9a8ca31c46fd6b8e", "297aeeb388366697573a03d7ea352f527de730ee", "343f316548d06246885050350c3d9ef532f1e293", "740ee3de6f8ca734797d7a808c956e303f4a5730", "3dbb0beee26501a93522230a094cb359eb121c70", "c82e5f800eb3a6fca0eda4aae39cdccf8aafd388" ], "paperAbstract": "Practical algorithms for determining causality by assigning timestamps to events have focused on online algorithms, where a permanent timestamp is assigned to an event as soon as it is created. We address the problem of reducing size of the timestamp by utilizing the underlying topology (which is o\u0089en not fully connected since not all processes talk to each other) and deferring the assignment of a timestamp to an event for a suitably chosen period of time a\u0089er the event occurs. Speci\u0080cally, we focus on inline timestamps, which are a generalization of o\u0084ine timestamps that are assigned a\u0089er the computation terminates. We show that for a graph with vertex coverVC, it is possible to assign inline timestamps which contains only 2|VC|+2 elements. In particular, for a system withn processes and K events per process, the size of a timestamp for any event is at most log2 n+ (2|VC|+1) log2(K +1)) bits. By contrast, if online timestamps are desired, then even for a star network, vector timestamp of length n (for the case of integer elements) or n\u2212 1 (for the case of real-valued elements) is required. Moreover, in addition to being e\u0081cient, the inline timestamps developed can be used to solve typical problems such as predicate detection, replay, recovery that are solved with vector clocks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087818" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e39f2545eabda68cf197ab0a7df163a69e78186", "sources": [ "DBLP" ], "title": "Effectiveness of Delaying Timestamp Computation", "venue": "PODC", "year": 2017 }, "0e80a25b12641468da87a5417fa128020a4dd2ef": { "authors": [ { "ids": [ "2638884" ], "name": "Yukihiro Tagami" } ], "doi": "10.1145/3097983.3097987", "doiUrl": "https://doi.org/10.1145/3097983.3097987", "entities": [ "Approximation algorithm", "Association rule learning", "Graph embedding", "Multi-label classification", "Nearest neighbor search", "Web analytics", "Web page" ], "id": "0e80a25b12641468da87a5417fa128020a4dd2ef", "inCitations": [ "29d1790c665f3c48af99888d4b8e339202e25aa2", "84c45d32971431a8c2b2e0955dbc7fb65a2330cc" ], "journalName": "", "journalPages": "455-464", "journalVolume": "", "outCitations": [ "041bb7570ddde1b9d699a99ff99ab916f4116abf", "1b7ac6c0bfc15c42f65cc6532cfa58df0bcf2f9c", "2305715410186e78dc5720f4c0e097616eec8921", "1211d3c950e2d1cc983d7a37fff1ea5062d54284", "2061a689341d7562ccd81e630038ec6fa4f310ac", "9f40ec54a840c77b7c4dcaa830c77e737adf57da", "5b9534442f91a87022427b74bca9fd95dd045383", "0ce46b5c8db8720582373ffa36fde3e40f4037ec", "bdc6acc8d11b9ef1e8f0fe2f0f41ce7b6f6a100a", "7bdf20d18b5a9411d729a0736c6a3a9a4b52bf4f", "4f5ef93300aafc04960b17de5641deeba83973d3", "8724631b1b16469fb57df1568d41d1039067c717", "3ec234373af61716d2bb291be74f8327847d34b1", "09849ca4b8159ff69721ebb2f25a81025188937e", "103ac7f316bf8cdad3133b4ce2bbd28d091e7974", "13261a8ab8f3518df5c6852ca96ae01a6f1d02ce", "8d44310637cd1443f6292ce0ec419b4c1154822b", "0cf6fe9e975a5496e9edd53818ae5c18a2a7e66b", "9ba53f53a55f22626e9496f2ddb7e58266640c37", "b14875d1e1850121d8720c39f853af5f455ecc44", "44f4cd28486c4730fbfb262f099cb5df30637211", "1ae3915647d701f155b5a92a5dfab2d9b274277c", "1e9f93ae9e860599d262dd581da453e1a351546f", "f8a0e4186efccf4a2861e769db3ad150e4e25ca6", "05aba481e8a221df5d8775a3bb749001e7f2525e", "181b88106fa6dfca0d95dfa567747924529631d7", "11d5f6b0eae5b0cc3e839fd5d5761453c8bd0042" ], "paperAbstract": "Extreme multi-label classification methods have been widely used in Web-scale classification tasks such as Web page tagging and product recommendation. In this paper, we present a novel graph embedding method called \"AnnexML\". At the training step, AnnexML constructs a k-nearest neighbor graph of label vectors and attempts to reproduce the graph structure in the embedding space. The prediction is efficiently performed by using an approximate nearest neighbor search method that efficiently explores the learned k-nearest neighbor graph in the embedding space. We conducted evaluations on several large-scale real-world data sets and compared our method with recent state-of-the-art methods. Experimental results show that our AnnexML can significantly improve prediction accuracy, especially on data sets that have larger a label space. In addition, AnnexML improves the trade-off between prediction time and accuracy. At the same level of accuracy, the prediction time of AnnexML was up to 58 times faster than that of SLEEC, which is a state-of-the-art embedding-based method.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097987" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e80a25b12641468da87a5417fa128020a4dd2ef", "sources": [ "DBLP" ], "title": "AnnexML: Approximate Nearest Neighbor Search for Extreme Multi-label Classification", "venue": "KDD", "year": 2017 }, "0e8e838a2a13724b90354c159ddf427daae95a42": { "authors": [ { "ids": [ "9918399" ], "name": "David Korczynski" }, { "ids": [ "1975143" ], "name": "Heng Yin" } ], "doi": "10.1145/3133956.3134099", "doiUrl": "https://doi.org/10.1145/3133956.3134099", "entities": [ "Code injection", "Code reuse", "Countermeasure (computer)", "Data-flow analysis", "Malware", "Operating system", "Synthetic data" ], "id": "0e8e838a2a13724b90354c159ddf427daae95a42", "inCitations": [], "journalName": "", "journalPages": "1691-1708", "journalVolume": "", "outCitations": [ "53fb53444672bb6a2325efd7a48dd2dc7b6ac374", "023f23c300804754753cb11db51fb7f582556ab7", "697bcaae27143c11d98a526a6d855f713d6ccbfa", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "21ed4f52274e284a85f8280283fe2140b6304027", "41289566ac0176dced2312f813328ad4c0552618", "43178a653e60f20ebecdd8ac70e0f64c71e90f0a", "348b0049b0c7b3f7e74b77cca30213cb7e550360", "39e10792fc3df90fe287576400e39c2bb5006539", "70696431430bab0d406cb23f503af5841961ba76", "13ef3abeb4dee6e3d720386308da5e6723cc0dd3", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "898ffbd983a4dc9770dd58f44974683835500990", "22ad05a7f8ea5c7aa1d04fa33a8b691325aff2cc", "4a250eef9f4897c47d28547b9e88327fb23dc7e9", "9fec4eef26542aa95d7bf1cd6a72b1ed319944cc", "45c3d6c670f9e7977bb99b396a7b4f714ef26628", "2b567737b5567f8e9f13b17f4f3583c6eea1e212", "5425dac9b3b70fe86edf846a7feb5beffa082ca6", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "129ed742b496b23efdf745aaf0c48958ef64d2c6", "28d1465ed7e378d4cf778f58fe4c4eaf33652251", "1f0e9613b1d47bbe8ba5b32a57e89b81ec02aba8", "03f827395a17beb941241dbd72322705bdf79791", "1a4c7185626d0f2acebf7f05a29fa2073a2fa841", "31631de15e8c4790abc7a9f2104d686aacf40f10", "2960c89331eb7afa86584792e2e11dbf6a125820" ], "paperAbstract": "Defending against malware involves analysing large amounts of suspicious samples. To deal with such quantities we rely heavily on automatic approaches to determine whether a sample is malicious or not. Unfortunately, complete and precise automatic analysis of malware is far from an easy task. This is because malware is often designed to contain several techniques and countermeasures specifically to hinder analysis. One of these techniques is for the malware to propagate through the operating system so as to execute in the context of benign processes. The malware does this by writing memory to a given process and then proceeds to have this memory execute. In some cases these propagations are trivial to capture because they rely on well-known techniques. However, in the cases where malware deploys novel code injection techniques, rely on code-reuse attacks and potentially deploy dynamically generated code, the problem of capturing a complete and precise view of the malware execution is non-trivial.\n In this paper we present a unified approach to tracing malware propagations inside the host in the context of code injections and code-reuse attacks. We also present, to the knowledge of the authors, the first approach to identifying dynamically generated code based on information-flow analysis. We implement our techniques in a system called Tartarus and match Tartarus with both synthetic applications and real-world malware. We compare Tartarus to previous works and show that our techniques substantially improve the precision for collecting malware execution traces, and that our approach can capture intrinsic characteristics of novel code injection techniques.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134099", "http://www.cs.ucr.edu/~heng/pubs/Tartarus-ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e8e838a2a13724b90354c159ddf427daae95a42", "sources": [ "DBLP" ], "title": "Capturing Malware Propagations with Code Injections and Code-Reuse Attacks", "venue": "CCS", "year": 2017 }, "0e95d0ea7264a802451bd4deb52379721d388668": { "authors": [ { "ids": [ "3226635" ], "name": "Mostafa Dehghani" }, { "ids": [ "2499986" ], "name": "Hamed Zamani" }, { "ids": [ "3091861" ], "name": "Aliaksei Severyn" }, { "ids": [ "1753628" ], "name": "Jaap Kamps" }, { "ids": [ "1704390" ], "name": "W. Bruce Croft" } ], "doi": "10.1145/3077136.3080832", "doiUrl": "https://doi.org/10.1145/3077136.3080832", "entities": [ "Artificial neural network", "Clickstream", "Computer vision", "Deep learning", "Experiment", "Feedforward neural network", "Information retrieval", "Natural language processing", "Okapi BM25", "Ranking (information retrieval)", "Sparse matrix", "Supervised learning", "Unsupervised learning", "Word embedding" ], "id": "0e95d0ea7264a802451bd4deb52379721d388668", "inCitations": [ "3c8063179345af107834671bfc5453b940ce07d7", "a339a2a3732d2a5dbfada7e7b6f0da453a989546", "22b56535aa94b133038aa9be05aa0e3af5037003", "f53d7c0b8a44a4d179a005ce98500456d53d1d63", "5d05e9307bf273b88a37bcd965faf492d7f7cbec", "432b36c1bec275c2778c66f9897f9e02f7d8b579", "f4b635da887e963b9caa05d4978a23866638796a", "98472179ed79d8d67362e61b4582d686755c9d3b", "575c8cbf97b0b5fb99cb359570f2ecb438b77036", "0acf2f684c0d0e2e3aba215dffab84b2cc175c31", "111867082baba045e654f3925b58329b03fa0dc5", "19e014c46a037e13ca9b2cc5d42247ddc503178c", "e8113b84ec2fcb7b4c36265ca133aa0b4bbe5c54", "d0193ac0342155461876ed8d752426aea2be19f3", "95e3662d5b58dcb76751473859993dc095b4d0b1", "448cba015c7d1111cc76a6944cc6a6efe2040a55", "06e7ba45e4def91c280360791031f2b24d9898e5", "9d5cf3567ffc462083df263873c176341b3ceea8", "33fd02f1485c562449650f8edbe5aac892476dca", "02d5f505fde945c4b3c1ba38830c69cfe644791b", "6f8fc12004fa068c424369793fd39426e772b07d" ], "journalName": "", "journalPages": "65-74", "journalVolume": "", "outCitations": [ "8478c0f46dd30ef7f4052145983d6d315c2e1f17", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "a339a2a3732d2a5dbfada7e7b6f0da453a989546", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "89e1de88f3f25a2581b4a0caaeb995c5a5b13005", "38612e346fdf3158c32c16058f7e8820a8f0325e", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "997dc5d9a058753f034422afe7bd0cc0b8ad808b", "17ce7734904a6162caa24e13a9454c1239924744", "bc69383a7d46cbaf80b5b5ef902a3dccf23df696", "82ff2dce7215b17128ff07752d221028e97f5a66", "2bdffe03bff8373445eda7e1e9439b7300a6f2fd", "3c7e73926d379288f0a3ab50b672de02ca47c5c6", "137a9ba6deee9ee6beda4cd2b0ccd93f7088b047", "1b9483efbac9894715344bf1ece48d65519223bb", "234dc7984668f72ee4917755915040386be4eb1f", "206d63c57430071a5b6efa261ff55122d0eed829", "2e42bf8747363161851dc04b85aedb1ada50daaf", "7161eb8d3b1cb01769a36528f9c6bddd663545a9", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "272216c1f097706721096669d85b2843c23fa77d", "bb1d6215f0cfd84b5efc7173247b016ade4c976e", "111867082baba045e654f3925b58329b03fa0dc5", "214adc2dfdc2160cdf5be54001daf2b2304a03b3", "35546ad33a3d877063a6a5e938c8d9e8a34badb9", "4aba54ea82bf99ed4690d45051f1b25d8b9554b5", "d48edf9e81653f4c3da716b037b0b50d54c5b034", "5b9534442f91a87022427b74bca9fd95dd045383", "3832b5bbb8d751da15cdfa466f85cfc684b16580", "8490234d79b47e459824dcf87c1e288211a3c964", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "0c5621753692ecffdf0b1d787ffa0d73b5172618", "73978536540aba479358f024d8e1af5094920d75", "1510cf4b8abea80b9f352325ca4c132887de21a0", "34f25a8704614163c4095b3ee2fc969b60de4698", "753a2ec1ed2f543c6734ae1cf574d698acd0abcc", "400f6f4304b1c12efb22acf7e80a1784015cb23a" ], "paperAbstract": "Despite the impressive improvements achieved by unsupervised deep neural networks in computer vision and NLP tasks, such improvements have not yet been observed in ranking for information retrieval. The reason may be the complexity of the ranking problem, as it is not obvious how to learn from queries and documents when no supervised signal is available. Hence, in this paper, we propose to train a neural ranking model using weak supervision, where labels are obtained automatically without human annotators or any external resources (e.g., click data). To this aim, we use the output of an unsupervised ranking model, such as BM25, as a weak supervision signal. We further train a set of simple yet effective ranking models based on feed-forward neural networks. We study their effectiveness under various learning scenarios (point-wise and pair-wise models) and using different input representations (i.e., from encoding query-document pairs into dense/sparse vectors to using word embedding representation). We train our networks using tens of millions of training instances and evaluate it on two standard collections: a homogeneous news collection (Robust) and a heterogeneous large-scale web collection (ClueWeb). Our experiments indicate that employing proper objective functions and letting the networks to learn the input representation based on weakly supervised data leads to impressive performance, with over 13% and 35% MAP improvements over the BM25 model on the Robust and the ClueWeb collections. Our findings also suggest that supervised neural ranking models can greatly benefit from pre-training on large amounts of weakly labeled data that can be easily obtained from unsupervised IR models.", "pdfUrls": [ "https://arxiv.org/pdf/1704.08803v2.pdf", "http://ciir-publications.cs.umass.edu/pub/web/getpdf.php?id=1266", "http://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/Neural%20Ranking%20Models%20with%20Weak%20Supervision.pdf", "http://arxiv.org/abs/1704.08803", "http://doi.acm.org/10.1145/3077136.3080832", "https://arxiv.org/pdf/1704.08803v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0e95d0ea7264a802451bd4deb52379721d388668", "sources": [ "DBLP" ], "title": "Neural Ranking Models with Weak Supervision", "venue": "SIGIR", "year": 2017 }, "0ea7fa66bf25e143498f15a076f3637aa5a5fbff": { "authors": [ { "ids": [ "19307916" ], "name": "Zhangkai Zhang" }, { "ids": [ "40172576" ], "name": "Xuhua Ding" }, { "ids": [ "1702391" ], "name": "Gene Tsudik" }, { "ids": [ "3166275" ], "name": "Jinhua Cui" }, { "ids": [ "1707275" ], "name": "Zhoujun Li" } ], "doi": "10.1145/3133956.3134094", "doiUrl": "https://doi.org/10.1145/3133956.3134094", "entities": [ "Computer security", "List of Code Lyoko episodes", "Location-based game", "Social presence theory", "Usability" ], "id": "0ea7fa66bf25e143498f15a076f3637aa5a5fbff", "inCitations": [], "journalName": "", "journalPages": "89-102", "journalVolume": "", "outCitations": [ "100ebdc07a14c85b5986d3adffa34b047b5be7a4", "468ea5727e8c2d64b32781544929cfd9054d311d", "764d2086f1ad5dd617c666c227708309d969fa03", "c63cf9caecbcfb01dc8013005e233dbd8f1ed2ca", "327be1bf8d8dff85cdfaf15159b6d760ef29717d", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "43c2eca1452dfc5c47cc091cdb4b03296d67fb08", "130bb6b3864729b771f68c326168e8e1aa2c7b51", "191982e2946a9bd1d5719eafbf129f6e50b91f0c", "42752f0808e34ca7fa9446a5eff5dfa15fe32f05", "3d400472b30906b1cbef37f3746c70f45ae74058", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "040f3b7db46a8d6cc0e9f58a807a29d9e8e13e3b", "7a6ca8144dbf3331e8ad34c4024670c6ef4ec9be", "162677cf8b2bd9bc2cfacbc7b83c5d150cbb82ab", "58156d27f80ee450ba43651a780ebd829b70c363", "b1e275bd3b09697233d450895dbcd6c2c8898f76", "136cf1976d2c91760e9ca766902cd1afefadcf01", "0cb5ddd2115d4777b112dd9e1381d667b8d16210", "576408a1805fbf7237a782ef504b36245258e216", "30f52a79ff53f8969ffcba19013b4a43e629875f", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "39d4af99edc754d829afaf5c1e02ea17f5a93fc2", "05f70f429a7bf38efa9e457fd486cb862bd495be" ], "paperAbstract": "Many popular modern processors include an important hardware security feature in the form of a DRTM (Dynamic Root of Trust for Measurement) that helps bootstrap trust and resists software attacks. However, despite substantial body of prior research on trust establishment, security of DRTM was treated without involvement of the human user, who represents a vital missing link. The basic challenge is: how can a human user determine whether an expected DRTM is currently active on her device?\n In this paper, we define the notion of \"presence attestation\", which is based on mandatory, though minimal, user participation. We present three concrete presence attestation schemes: sight-based, location-based and scene-based. They vary in terms of security and usability features, and are suitable for different application contexts. After analyzing their security, we assess their usability and performance based on prototype implementations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134094" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ea7fa66bf25e143498f15a076f3637aa5a5fbff", "sources": [ "DBLP" ], "title": "Presence Attestation: The Missing Link in Dynamic Trust Bootstrapping", "venue": "CCS", "year": 2017 }, "0ebded786ff34a7c41c02f2b4494875de6e0c306": { "authors": [ { "ids": [ "34574021" ], "name": "Mathias Gibbens" }, { "ids": [ "2672452" ], "name": "Chris Gniady" }, { "ids": [ "33114639" ], "name": "Lei Ye" }, { "ids": [ "1727135" ], "name": "Beichuan Zhang" } ], "doi": "10.1145/3084439", "doiUrl": "https://doi.org/10.1145/3084439", "entities": [ "Apache Hadoop", "Data center", "Data-intensive computing", "Digital distribution", "Distributed computing", "MapReduce", "Network traffic control" ], "id": "0ebded786ff34a7c41c02f2b4494875de6e0c306", "inCitations": [], "journalName": "POMACS", "journalPages": "2:1-2:21", "journalVolume": "1", "outCitations": [ "21854a5fb77a45f411865652a63663bb9ff3cde9", "018bdcad38c90963212795b9b43e9f50c7e34760", "43d4b77ca6f9a29993bce5bade90aa2b8e4d2cac", "18e7dde371cf17ca4089a4a59660483e70160b09", "4c44cbcea788cc024b29ddf178249ee1c367464a", "8900b40061708168197c034c9e16af6031e28235", "46851aa618427fcbf2b1edd21213a1bd6f66defd", "663e064469ad91e6bda345d216504b4c868f537b", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "02cbdb070f157598d243bac4e231e5d497d07ba4", "556b2b3c68209c8f009cdc09198c1d25260cc411", "74664eee9e23524b32ebe2f0236982f214488920" ], "paperAbstract": "The Named Data Networking (NDN) architecture retrieves content by names rather than connecting to specific hosts. It provides benefits such as highly efficient and resilient content distribution, which fit well to data-intensive distributed computing. This paper presents and discusses our experience in modifying Apache Hadoop, a popular MapReduce framework, to operate on an NDN network. Through this first-of-its-kind implementation process, we demonstrate the feasibility of running an existing, large, and complex piece of distributed software commonly seen in data centers over NDN. We show advantages such as simplified network code and reduced network traffic which are beneficial in a data center environment. There are also challenges faced by NDN, that are being addressed by the community, which can be magnified under data center traffic. Through detailed evaluation, we show a reduction of 16% for overall data transmission between Hadoop nodes while writing data with default replication settings. Preliminary results also show promise for in-network caching of repeated reads in distributed applications. We also show that overall performance is currently slower under NDN, and we identify challenges and opportunities for further NDN improvements.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084439", "http://doi.acm.org/10.1145/3078505.3078508", "http://www2.cs.arizona.edu/~gniady/papers/sigmetrics2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ebded786ff34a7c41c02f2b4494875de6e0c306", "sources": [ "DBLP" ], "title": "Hadoop on Named Data Networking: Experience and Results", "venue": "SIGMETRICS", "year": 2017 }, "0ec3cffef7fead78ecb3763a60c6269316d1219c": { "authors": [ { "ids": [ "30463363" ], "name": "Terry Penner" }, { "ids": [ "1732914" ], "name": "Mina Guirguis" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Experiment", "MTD-f", "Memory Technology Device", "Multi-armed bandit", "Simulation" ], "id": "0ec3cffef7fead78ecb3763a60c6269316d1219c", "inCitations": [ "ea3c118f549319a888904d0d5b7d70ff87efa8dc" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "411-420", "journalVolume": "", "outCitations": [ "82c1d0dbb14e9333a60a8e95f5a1183864fb6031", "2fc84ea4ffbee661ce90c5804101887abe8268a8", "336cc6866437a424448db5c358f03e3561274c29", "030dc3aa9c7324ec071e02067fbff28359605e92", "22760b8cc5c7916a3d8b33508f7349372e63e6bb", "62968a53cd7b65dfb67c9e98d4fe86301ef961d3", "c17ca55a1b269dc162f8e14ce3980c0e8f9b15e8", "002c2fefff584dfcf0e42dfb50281cd7f3595d61", "1328f20392004f3911b2640782ddb4f0b3c67a60", "2cc7cf26ca0e537fc5e384b7481584cec6fc13f9", "4218463a5406d0ede576311e09200ceffb20423e", "a970352e2a6c3c998c4e483e2d78c4b3643c7809", "329f6f0caa82be0fa95543721b4addeeae2e6890", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e", "eccfe28733877a295dc0c068d4b33bd5836797cc", "617b625d90ac6ea9cd32d8cd0e7a1abafc60ea27", "4dd87fa846dc344dcce5fb12de283a0d51dfe140", "1f1946887c8be228864ee23374bc0b427abf0884", "42352baeba582a5cf0e18887c12660ac4fc37434" ], "paperAbstract": "Security and privacy in cloud computing are critical components for various organizations that depend on the cloud in their daily operations. Customers' data and the organizations' proprietary information have been subject to various attacks in the past. In this paper, we develop a set of Moving Target Defense (MTD) strategies that randomize the location of the Virtual Machines (VMs) to harden the cloud against a class of Multi-Armed Bandit (MAB) policy-based attacks. These attack policies capture the behavior of adversaries that seek to explore the allocation of VMs in the cloud and exploit the ones that provide the highest rewards (e.g., access to critical datasets, ability to observe credit card transactions, etc). We assess through simulation experiments the performance of our MTD strategies, showing that they can make MAB policy-based attacks no more effective than random attack policies. Additionally, we show the effects of critical parameters – such as discount factors, the time between randomizing the locations of the VMs and variance in the rewards obtained – on the performance of our defenses. We validate our results through simulations and a real OpenStack system implementation in our lab to assess migration times and down times under different system loads.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101169", "http://cs.txstate.edu/~mg65/research/papers/ccgrid17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ec3cffef7fead78ecb3763a60c6269316d1219c", "sources": [ "DBLP" ], "title": "Combating the Bandits in the Cloud: A Moving Target Defense Approach", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "0ec62e028c2088f1aef01f2e65d167faf1d3569e": { "authors": [ { "ids": [ "39786262" ], "name": "Prasanna Venkatesh Rengasamy" }, { "ids": [ "1706667" ], "name": "Haibo Zhang" }, { "ids": [ "2456813" ], "name": "Nachiappan Chidambaram Nachiappan" }, { "ids": [ "1896477" ], "name": "Shulin Zhao" }, { "ids": [ "1743609" ], "name": "Anand Sivasubramaniam" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "8948708" ], "name": "Chita R. Das" } ], "doi": "10.1109/IISWC.2017.8167776", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167776", "entities": [ "Android", "Application programming interface", "Central processing unit", "Computation", "Handheld game console", "Hardware acceleration", "Mobile device", "Opcode", "Operand", "Speedup" ], "id": "0ec62e028c2088f1aef01f2e65d167faf1d3569e", "inCitations": [ "651ae380b5d500c613770dbf55c175c52576d7da", "2d1b2392585b09297dd79a14ca3fb853133d64e3", "884e104c13102e1353e85a6a91e41d3cff2c80f5" ], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "187-196", "journalVolume": "", "outCitations": [ "b827f9a11cbfe4444737bb0f29a9ec6c3be6fced", "55f2f237f6264e688057d5de4bae543e11c62746", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "1914bdccc0b3c87fb92b47cf035acb6a2df84dbe", "489bb42f015332d92f6275b7e6a0d7979409ecbd", "8a07b8b84cd4df7e4e20dadc5b5e914fe0e11162", "00ab25c6582d543932fccbb0f15fe93445f95d61", "22352e9cc1561ccb3b4704f36852002fd8bcffcb", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "be1d0cad67b7fc413cbb272c19f94a5d519f17c5", "6d0ea468a737ef666d6a9da0ee1c2f1cff3f43b3", "29c25d9aa0a55a4cd4564563d5ba3968af3523c9", "542882c44ee2f331e56b5d860d4bab62a1eb0a17", "2405ab404bdef20ff89d956fea63675af3f64feb", "0502eaea10b67788d74e4f4a635f1723bb29e7db", "44995c586069b20469c6ceff0dc11d9ad8cdce22", "884e104c13102e1353e85a6a91e41d3cff2c80f5", "0789af812af3aebac3853dc2745f3847d503fa02", "3186aead0cac0a94a8bf909a5023eae7afa8426b", "2960c89331eb7afa86584792e2e11dbf6a125820", "0a2af2773ca4fcbd22cd7580d29ac7739bcf028c", "b0f70ce823cc91b6a5fe3297b98f5fdad4796bab", "8cfa975a656838356dc4b211b6c2186bc2601a05", "0501505416977d9d97e4df1f1e296c15362033aa", "24cba2623f7d9387f74eda81b76cd281486aa540", "ebee86a031d0c05e68c25719a034ffaf287e542d", "264c8bcc515dc4f79893cd6b8cf552efb6c4aea9", "d609ae840b3ead89300e9e2d2cc8b8cc8772a6d8", "c90699f7aa669ac3f9817b22512c16ef8fb01490", "0846f5a52f223ee1677671acd3da98be01166edb", "2497b8117b15f5133d61fc1cb132114bd27c558d", "320a6faa396f27f6f83b22ded48944ffd574fa1e", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "3421f7eccb98a877883f8e3cc68b711f5c784fec", "4eacca1289de2e6cacf1f172c1836aa3f3ca6290", "41ae935202af42cbe2ebc50da546849b5f53121d", "20d0b7473429464fc2f9bfd59d513d63c844551c", "8d93985b5fe6468a9514fde7690870fb60c5b9d3", "07f3b8cfd59624acf80e16794bd3f2bc69acd8e7", "27059058f633f93c3fdc6ccedd7c0903b56f8402", "6f4d58486b1c6d710586b1d182ddad7d09a8da11" ], "paperAbstract": "Current handhelds incorporate a variety of acceler-ators/IPs for improving their performance and energy efficiency. While these IPs are extremely useful for accelerating parts of a computation, the CPU still expends a significant amount of time and energy in the overall execution. Coarse grain customized hardware of Android APIs and methods, though widely useful, is also not an option due to the high hardware costs. Instead, we propose a fine-grain sequence of instructions, called a Load-to-Store (LOST) sequence, for hardware customization. A LOST sequence starts with a load and ends with a store, including dependent instructions in between. Unlike prior approaches to customization, a LOST sequence is defined based on a sequence of opcodes rather than a sequence of PC addresses or operands. We identify such commonly occurring LOST sequences within and across several popular apps and propose a design to integrate these customized hardware sequences as macro functional units into the CPU data-path. Detailed evaluation shows that such customized LOST sequences can provide an average of 25% CPU speedup, or 12% speedup for the entire system.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167776", "http://www.cse.psu.edu/hpcl/docs/2017_IISWC_Prasanna.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ec62e028c2088f1aef01f2e65d167faf1d3569e", "sources": [ "DBLP" ], "title": "Characterizing diverse handheld apps for customized hardware acceleration", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "0ed9aad8b08f052b6ef96a2e2c0fc0e1b5450e78": { "authors": [ { "ids": [ "25530637" ], "name": "Cristobal A. Navarro" }, { "ids": [ "34896107" ], "name": "Raimundo Vega" }, { "ids": [ "1691646" ], "name": "Benjamin Bustos" }, { "ids": [ "2266518" ], "name": "Nancy Hitschfeld-Kahler" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.56", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.56", "entities": [ "Fractal", "Graphics processing unit", "Hausdorff dimension", "Minimum bounding box", "Sierpinski triangle", "Speedup", "Time complexity" ], "id": "0ed9aad8b08f052b6ef96a2e2c0fc0e1b5450e78", "inCitations": [], "journalName": "", "journalPages": "427-433", "journalVolume": "", "outCitations": [ "8be038d946d7f0991651a8972e6acbf43e194a78", "65d0d1804504b488c97837cb36c0237d9c3dd9b5", "67b2918c1804a76552002aa2ea6f35e0722d3b8f", "7418b4e800da0800b1b8cc7307414a8e60bdf16c", "20f344558d22f2fc10df9e5455fcaefa4842d582", "9d72b4341f812bb9e3e9de098b53076721f9d5cd", "8bb8c6754cede532bc98dec94eea3d3550bf7109", "2e2fc5cc8ebdf828df558d5ad30172155da7987a", "b613769edfaf1b673a5ad2b36fbfdda021d4e7b7", "a8cb014bc9fac3fba629f09fa85c5cedf6259d76", "559ee17e688a7440296ec4194d351559616b6c81", "22d09b761dd1265a072d87dba58fd295c7e8fac5", "0989b891d54547253c0964fcc042fb7c8b66f1f0", "4101482421f14913ab90c7054c93634551f13817", "2cbb9fa88a88ad662c364f932df89b483b0acc1d", "693f679b0780544220449537f2555b0f138451fe", "133c176b649618b1f6bc13ec6783647c87bf9935", "a57b0c777e1e04ced17991efa43d9aba1ff7b55e", "6bf1cde01c74429a39d7073bc4839eb24b7e48d1", "01eb452d6592d94824920e435c173ad2f0c716af", "596c4f92b44a59dbff0135a87c4ba883bdf90d49" ], "paperAbstract": "This work studies the problem of GPU thread mapping for a Sierpi\u0144ski gasket fractal embedded in a discrete Euclidean space of n \u00d7 n. A block-space map \u03bb : Z2E 7\u2192 Z 2 F is proposed, from Euclidean parallel space E to embedded fractal space F, that maps in O(log 2 log 2 (n)) time and uses no more than O(n) threads with H \u2248 1.58... being the Hausdorff dimension, making it parallel space efficient. When compared to a bounding-box map, \u03bb(\u03c9) offers a sub-exponential improvement in parallel space and a monotonically increasing speedup once n > n0. Experimental performance tests show that in practice \u03bb(\u03c9) can produce performance improvement at any block-size once n > n0 = 2 , reaching approximately 10\u00d7 of speedup for n = 2 under optimal block configurations. Keywords\u2014GPU computing; thread mapping; block-space fractal domains; Sierpinski gasket;", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.56", "https://arxiv.org/pdf/1706.04552v1.pdf", "http://arxiv.org/abs/1706.04552" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/0ed9/aad8b08f052b6ef96a2e2c0fc0e1b5450e78.pdf", "s2Url": "https://semanticscholar.org/paper/0ed9aad8b08f052b6ef96a2e2c0fc0e1b5450e78", "sources": [ "DBLP" ], "title": "Block-Space GPU Mapping for Embedded Sierpi\u0144ski Gasket Fractals", "venue": "HPCC/SmartCity/DSS", "year": 2017 }, "0ef2a3c23a7e5304fe943b3ec8c404a76911153c": { "authors": [ { "ids": [ "2438650" ], "name": "Nadime Francis" }, { "ids": [ "1681226" ], "name": "Leonid Libkin" } ], "doi": "10.1145/3034786.3056113", "doiUrl": "https://doi.org/10.1145/3034786.3056113", "entities": [ "Data model", "Database", "Graph database", "Null (SQL)", "Real life", "Recursion", "Recursive definition", "Undecidable problem" ], "id": "0ef2a3c23a7e5304fe943b3ec8c404a76911153c", "inCitations": [], "journalName": "", "journalPages": "389-401", "journalVolume": "", "outCitations": [ "98d892d5d2bc87413a35313d5b6c5977bea407ff", "0dc015847cec6a11b8071b5eaad5188edad2614a", "03601a58040f238e57a83023dd315c062c59b3a8", "5c0f927be0cbb34f8ea0cc5cfe9ce09625a96330", "7238976ef7450263deba5c473d53dbd6394ed0f1", "0b0bb84c6c6e1159d4fb0e98953d30325b2e0f97", "3dfeb940f27adf6b24515a8e9ec22b17eef0ab16", "fcd286b15eedf6f8a84e0a114ac0400d770d7549", "048c70cbd7ad8def131f8147a8c4d580076b3da9", "0465fb139ce90b89ecc02102df31bf4ca8dada5b", "32d33e439abb565aa9304b8b541312ab7fc19eb1", "12883e6f199ccf1ca534dfec88235315124787cc", "7001c94c6fc98d1e027a572840215e3f4485ed46", "40c24d27ea16bc36d296fac476a295450d9602a2", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "05a72f700e774922dd14dfadbe6b33c0f2127565", "5addde4627f8f82cdc0d58546d687d2364ed84b6", "428637b206f73533462581ebffa038b1907d3643", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "013e22ad53e5d188a49bee63ac2b35e08f0aae10", "14205ef519e40ee3f63baaaae25bb81984df3710", "95c6db930dff30937b85c9666a096e7d7e71d5b0", "7ee88564daa3c98f230e2c563158f764751e8008", "6f7f51d8818c251689f53e3474037a477b39e64a", "89bb85feef6886292d9f800c4ae069bcae140ea0", "2f68b0b8eedf821f6ce79875d42fe258e7e3742f", "4949f1caaf36b540f5b65f28d787bdfdaef30bf7", "90b3220904d48fc7cc9fe4560ca589893098df98", "e59e0f6cd9ce6fb9484619abf57c511ce70eb46c", "2c6a4af88d24e8a1acc2ceccc06edcb4fc03cf5e", "f5117084ca43e888fb3e17ab0f0e684cced0f8fd", "41770c3168a8918474603d96b634035677043980", "931f080660baa43c78b4f7eef1961830bb044539" ], "paperAbstract": "Schema mappings are a fundamental concept in data integration and exchange, and they have been thoroughly studied in different data models. For graph data, however, mappings have been studied in a restricted context that, unlike real-life graph databases, completely disregards the data they store. Our main goal is to understand query answering under graph schema mappings - in particular, in exchange and integration of graph data - for graph databases that mix graph structure with data. We show that adding data querying alters the picture in a significant way.\n As the model, we use data graphs: a theoretical abstraction of property graphs employed by graph database implementations. We start by showing a very strong negative result: using the simplest form of nontrivial navigation in mappings makes answering even simple queries that mix navigation and data undecidable. This result suggests that for the purposes of integration and exchange, schema mappings ought to exclude recursively defined navigation over target data. For such mappings and analogs of regular path queries that take data into account, query answering becomes decidable, although intractable. To restore tractability without imposing further restrictions on queries, we propose a new approach based on the use of null values that resemble usual nulls of relational DBMSs, as opposed to marked nulls one typically uses in integration and exchange tasks. If one moves away from path queries and considers more complex patterns, query answering becomes undecidable again, even for the simplest possible mappings.", "pdfUrls": [ "https://neukraksukpidd.files.wordpress.com/2017/06/schema-mappings-for-data-graphs.pdf", "http://doi.acm.org/10.1145/3034786.3056113", "http://homepages.inf.ed.ac.uk/libkin/papers/pods17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ef2a3c23a7e5304fe943b3ec8c404a76911153c", "sources": [ "DBLP" ], "title": "Schema Mappings for Data Graphs", "venue": "PODS", "year": 2017 }, "0f021ec0202875cbad964ac94d7990bd441962c7": { "authors": [ { "ids": [ "28921194" ], "name": "Austin Murdock" }, { "ids": [ "13154862" ], "name": "Frank Li" }, { "ids": [ "28962949" ], "name": "Paul Bramsen" }, { "ids": [ "3137133" ], "name": "Zakir Durumeric" }, { "ids": [ "1744800" ], "name": "Vern Paxson" } ], "doi": "10.1145/3131365.3131405", "doiUrl": "https://doi.org/10.1145/3131365.3131405", "entities": [ "3D scanner", "Address space", "Algorithm", "Aliasing", "Brute-force attack", "IP aliasing", "Scalability" ], "id": "0f021ec0202875cbad964ac94d7990bd441962c7", "inCitations": [ "1d633d9c9a402f94dcb7bcfe43b3206d9c626e93" ], "journalName": "", "journalPages": "242-253", "journalVolume": "", "outCitations": [ "d65615e16e66645a080175263c487755b3ea559f", "a14065f8d63acb7a839d99a4298ddc961e286a47", "4ec4ca5624cae0264fd9d3ffaa23753c51ad6c4b", "11e15245ed4f9bef57e99bcbddf6e3811be2c336", "201b0a185dda51629d7b6fdef3b380a0beaba455", "27b36b4f60a34b45af8924944a08ddbb51e5cf11", "49a8f9e8ed7dbd8382dbd30aa81321281cd54c07", "79bd38c9e0d04ac58f23da6e3ce12b241db1260f", "9da30179621536c00037d55d8ad4b29dc625daf5", "49cd31f0181b8c1a8e722679464fa13cc058f547", "4d6fb20251d8f4ae199720f37282c736073af527", "1f4d79745c230f1f01a0046d844b1d233ee3f17c", "b58b2942547b72377d7bf60c42cee552ed8a1980", "1450ef74c07f9aa38563c0e40ff69e146c1c5ce0", "83404fbce17628ef4002c8814ea6a05fd8915942", "50004edd385ca73ba931d567bcc19954638bdca1", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "28055aaeb478fd09f5a042408cd6b63cbf707d1e", "084facafd018e4af6f907ceb59458045622f6971", "2578bb886dcd386d47c39c8e567e15010134e2b6" ], "paperAbstract": "Fast IPv4 scanning has enabled researchers to answer a wealth of new security and measurement questions. However, while increased network speeds and computational power have enabled comprehensive scans of the IPv4 address space, a brute-force approach does not scale to IPv6. Systems are limited to scanning a small fraction of the IPv6 address space and require an algorithmic approach to determine a small set of candidate addresses to probe. In this paper, we first explore the considerations that guide designing such algorithms. We introduce a new approach that identifies dense address space regions from a set of known \"seed\" addresses and generates a set of candidates to scan. We compare our algorithm 6Gen against Entropy/IP---the current state of the art---finding that we can recover between 1--8 times as many addresses for the five candidate datasets considered in the prior work. However, during our analysis, we uncover widespread IP aliasing in IPv6 networks. We discuss its effect on target generation and explore preliminary approaches for detecting aliased regions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131405", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final245.pdf", "http://www.icir.org/vern/papers/ipv6-scanning-imc17.pdf", "https://conferences.sigcomm.org/imc/2017/slides/6Gen_imc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f021ec0202875cbad964ac94d7990bd441962c7", "sources": [ "DBLP" ], "title": "Target generation for internet-wide IPv6 scanning", "venue": "IMC", "year": 2017 }, "0f0e89417b2c9f8dae2d9ca969ce50ed784bfdc9": { "authors": [ { "ids": [ "40238765" ], "name": "Joe DeBlasio" }, { "ids": [ "1727599" ], "name": "Stefan Savage" }, { "ids": [ "1739245" ], "name": "Geoffrey M. Voelker" }, { "ids": [ "2199298" ], "name": "Alex C. Snoeren" } ], "doi": "10.1145/3131365.3131391", "doiUrl": "https://doi.org/10.1145/3131365.3131391", "entities": [ "Credential", "Email", "Open Source Tripwire", "Password", "Password strength", "Plaintext", "TOP500", "Value (ethics)" ], "id": "0f0e89417b2c9f8dae2d9ca969ce50ed784bfdc9", "inCitations": [], "journalName": "", "journalPages": "341-354", "journalVolume": "", "outCitations": [ "b645f19ed52b4315a82bf3564b8db5ce230cd49e", "4fcb4f03afc8f7d780929afbf9584bb7e9ced6f3", "30b9e1ac96067556eba30d07bb0f8185ea402596", "b19c2aa855c247461d9caa48ea94b0b39d650001", "38a96255f9e6a38d5ad570849c7a45079ca06bcf", "2946337452f2dc8b2942e81252c2ab0629a4b4e5", "1721632b420d8522b8581d44a8bdb8fa1ffaeb4a", "4ae160584dbb8474fb9309f140a8da0449b3b686", "86ed2800221fa44a58e9ddd28488d6544fd7a608", "17eceec10a0f5f3a3b2ce99309009bfb2e9ef389", "3b532950ded354ff3d657f8061aec210e9059da7", "2aa7a84cb3b33ce97f5b215eaf2ad792f8bbdb3e", "07c8dc37b1061784f3b55cf3ca5d2bc735e1693c", "a03986f4f3a8739d71b1d3269c1a2259fbaef89b", "9db1ca86b92cb5e0a21263de77e3e266b71637af", "199f3b8eef2bd7df237f3c8516fa01854381992d", "053982a9c7c0a16c9b080f800013b945d1135069" ], "paperAbstract": "Password reuse has been long understood as a problem: credentials stolen from one site may be leveraged to gain access to another site for which they share a password. Indeed, it is broadly understood that attackers exploit this fact and routinely leverage credentials extracted from a site they have breached to access high-value accounts at other sites (e.g., email accounts). However, as a consequence of such acts, this same phenomena of password reuse attacks can be harnessed to indirectly infer site compromises---even those that would otherwise be unknown. In this paper we describe such a measurement technique, in which unique honey accounts are registered with individual third-party websites, and thus access to an email account provides indirect evidence of credentials theft at the corresponding website. We describe a prototype system, called Tripwire, that implements this technique using an automated Web account registration system combined with email account access data from a major email provider. In a pilot study monitoring more than 2,300 sites over a year, we have detected 19 site compromises, including what appears to be a plaintext password compromise at an Alexa top-500 site with more than 45 million active users.", "pdfUrls": [ "http://cseweb.ucsd.edu/~snoeren/papers/tripwire-imc17.pdf", "http://www.sysnet.ucsd.edu/~voelker/pubs/tripwire-imc17.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final166.pdf", "http://doi.acm.org/10.1145/3131365.3131391" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f0e89417b2c9f8dae2d9ca969ce50ed784bfdc9", "sources": [ "DBLP" ], "title": "Tripwire: inferring internet site compromise", "venue": "IMC", "year": 2017 }, "0f14ea8b52e648c74eed0c638fbb30c2a57b8c60": { "authors": [ { "ids": [ "1808867" ], "name": "Ori Lahav" }, { "ids": [ "2762830" ], "name": "Viktor Vafeiadis" }, { "ids": [ "2243785" ], "name": "Jeehoon Kang" }, { "ids": [ "1777044" ], "name": "Chung-Kil Hur" }, { "ids": [ "2710559" ], "name": "Derek Dreyer" } ], "doi": "10.1145/3062341.3062352", "doiUrl": "https://doi.org/10.1145/3062341.3062352", "entities": [ "C++", "C++11", "Correctness (computer science)", "Load-link/store-conditional", "Power Architecture", "Sequential consistency", "Weak consistency" ], "id": "0f14ea8b52e648c74eed0c638fbb30c2a57b8c60", "inCitations": [ "0f26a8a75800c42cc473f467e0590728be3e5d91", "11db20e34c62414014f5387a2adbefbbb1e51e61", "2cea911044b0b9dc2cee2e2b04915b9aab22f86f", "deba49c12c039fbd667277207dbaa812fba2dece", "efbc11c574d7a036316b027eadbfa5686263f152", "01fafe33a34b0ba9915c9d8a13927bc9b57309df", "1c8a51d75dc13aa8646d62ef695a1d238996447f", "0956eb481bacadfff91177c31f9750bea772d259", "053d30806e9b629383b38db0ffc383c3ff28339d", "3c142ad4ca5ed211a606450801d54b3b30d687e9", "3cdb85ba7223ec85c99f7f1f4c345d0c03432b4b", "056b4b9f8f3b3a700c0f6af518ad4c36c13357cc", "8a424ab4a90bc2b2888c4f2e32f912f6f5282b7b", "da60333caaed5bd194257945e6b4947533f3f685", "c165c956d86b780bb4c1cb4bf078828ae459caa2" ], "journalName": "", "journalPages": "618-632", "journalVolume": "", "outCitations": [ "5eef609f21fc9327e551ab40425f7f1715c3e200", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "de47178b1458510dc12e0595cc9fda383effc998", "5527ef36d64fc703a0a195688deb10faec6224c9", "206cf736da91aef15bb598e097f6c233c3af2b17", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "0c6e3ce37880d5766e6b340513bbfb5738737e5f", "987adbbb4b5baff729cf3907d7f05a86e8651849", "19aab49210282cc19ec4fec06bed029a06497bf8", "74947ab07ed99ea9b7038f8984e9910a87586f57", "061a294940579506fcb89999370eba8b8799346a", "0f0046ae34181e08594ad9be7b5bfffdbaeda177", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "34d2db88f259d69022e7492225301ffd6e0f55c0", "2814d43ef6c8811d6844e3125dd3d4c87c2e226a", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "3c142ad4ca5ed211a606450801d54b3b30d687e9", "2cea911044b0b9dc2cee2e2b04915b9aab22f86f", "4a3f0c1b983315c863dd6f4820dc147b50ab6109", "55ca1942c563219a16b95e8f3b4bc0437e01fc5e", "ac35455b128baf4e280f2571160c242b67b3f85e", "35d9e23f4f0767873fcd38a995709a39c23afe41", "0a89fafea6184b469511ba73735d451da92c18fa", "9c0ec4b2cda00b6d5ea52e6b01a068fdc45b8dff", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "4d1e3d20531b7118c50b137715b69926d990d7c6" ], "paperAbstract": "The C/C++11 memory model defines the semantics of concurrent memory accesses in C/C++, and in particular supports racy \"atomic\" accesses at a range of different consistency levels, from very weak consistency (\"relaxed\") to strong, sequential consistency (\"SC\"). Unfortunately, as we observe in this paper, the semantics of SC atomic accesses in C/C++11, as well as in all proposed strengthenings of the semantics, is flawed, in that (contrary to previously published results) both suggested compilation schemes to the Power architecture are unsound. We propose a model, called RC11 (for Repaired C11), with a better semantics for SC accesses that restores the soundness of the compilation schemes to Power, maintains the DRF-SC guarantee, and provides stronger, more useful, guarantees to SC fences. In addition, we formally prove, for the first time, the correctness of the proposed stronger compilation schemes to Power that preserve load-to-store ordering and avoid \"out-of-thin-air\" reads.", "pdfUrls": [ "http://plv.mpi-sws.org/scfix/full.pdf", "https://people.mpi-sws.org/~dreyer/papers/scfix/paper.pdf", "http://doi.acm.org/10.1145/3062341.3062352", "http://sf.snu.ac.kr/gil.hur/publications/scfix.pdf", "https://www.mpi-sws.org/tr/2016-011.pdf", "http://plv.mpi-sws.org/scfix/paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f14ea8b52e648c74eed0c638fbb30c2a57b8c60", "sources": [ "DBLP" ], "title": "Repairing sequential consistency in C/C++11", "venue": "PLDI", "year": 2017 }, "0f1afa64525d8a03bed39fe200fb9baa40173cc5": { "authors": [ { "ids": [ "3261085" ], "name": "Steffen Bondorf" }, { "ids": [ "3366278" ], "name": "Paul Nikolaus" }, { "ids": [ "1738878" ], "name": "Jens B. Schmitt" } ], "doi": "10.1145/3084453", "doiUrl": "https://doi.org/10.1145/3084453", "entities": [ "Algorithm", "Algorithmic efficiency", "Best, worst and average case", "Computation", "Direct numerical control", "End-to-end principle", "Experiment", "Heuristic", "Network calculus", "Numerical analysis", "Program optimization" ], "id": "0f1afa64525d8a03bed39fe200fb9baa40173cc5", "inCitations": [ "9ad0b90c8ff9d1b68c4aca1ab52c92bc57dbb7d3", "8f26ac1c645f6d91a1ef1c92e868a277d88730c0", "afc48baa9eff83e82da5d2af0cb4e768bfd8d724", "d3e49bcbe6a7dc15aca40e95e6760b03ae0b9850", "dc1b785419fd6c0307b6d10dbfff087ae368086d" ], "journalName": "", "journalPages": "65", "journalVolume": "", "outCitations": [], "paperAbstract": "Networks are integral parts of modern safety-critical systems and certification demands the provision of guarantees for data transmissions. Deterministic Network Calculus (DNC) can compute a worst-case bound on a data flow's end-to-end delay. Accuracy of DNC results has been improved steadily, resulting in two DNC branches: the classical algebraic analysis and the more recent optimization-based analysis. The optimization-based branch provides a theoretical solution for tight bounds. Its computational cost grows, however, (possibly super-)exponentially with the network size. Consequently, a heuristic optimization formulation trading accuracy against computational costs was proposed. In this article, we challenge optimization-based DNC with a new algebraic DNC algorithm. We show that:
  1. no current optimization formulation scales well with the network size and
  2. algebraic DNC can be considerably improved in both aspects, accuracy and computational cost.
\n To that end, we contribute a novel DNC algorithm that transfers the optimization's search for best attainable delay bounds to algebraic DNC. It achieves a high degree of accuracy and our novel efficiency improvements reduce the cost of the analysis dramatically. In extensive numerical experiments, we observe that our delay bounds deviate from the optimization-based ones by only 1.142% on average while computation times simultaneously decrease by several orders of magnitude.", "pdfUrls": [ "https://arxiv.org/pdf/1603.02094v2.pdf", "http://arxiv.org/pdf/1603.02094v1.pdf", "https://arxiv.org/pdf/1603.02094v3.pdf", "http://arxiv.org/pdf/1603.02094.pdf", "http://doi.acm.org/10.1145/3084453", "https://disco.informatik.uni-kl.de/discofiles/publicationsfiles/BNS17-1.pdf", "https://disco.cs.uni-kl.de/discofiles/publicationsfiles/BNS17.pdf", "https://arxiv.org/pdf/1603.02094v1.pdf", "http://doi.acm.org/10.1145/3078505.3078594" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f1afa64525d8a03bed39fe200fb9baa40173cc5", "sources": [ "DBLP" ], "title": "Quality and Cost of Deterministic Network Calculus: Design and Evaluation of an Accurate and Fast Analysis", "venue": "SIGMETRICS", "year": 2017 }, "0f2a463eece4960dc62da2df58a224ae85e1473b": { "authors": [ { "ids": [ "40362329" ], "name": "Jianguo Wang" }, { "ids": [ "40648346" ], "name": "Chunbin Lin" }, { "ids": [ "1786049" ], "name": "Yannis Papakonstantinou" }, { "ids": [ "1760342" ], "name": "Steven Swanson" } ], "doi": "10.1145/3035918.3064007", "doiUrl": "https://doi.org/10.1145/3035918.3064007", "entities": [ "Algorithm", "Bitmap", "Database", "Experiment", "Information retrieval", "Inverted index", "Real life", "Synthetic data", "Zipf's law" ], "id": "0f2a463eece4960dc62da2df58a224ae85e1473b", "inCitations": [ "a5081939c059add7ed0754cee736e1f272916de3" ], "journalName": "", "journalPages": "993-1008", "journalVolume": "", "outCitations": [ "62c7aa24f09320320a309c4298e045faf25ac7cd", "ce97bda189755032f19e092dbbf7740707f2ae86", "0437e781bf22d47f3a13cca1e27eca6ae91d3f41", "0447f4b8b695f76b2ce768cc236068dd89841767", "409f067439ecbc2da37a03c9609034946393608e", "3f807d32f1033ddb76ae52527bbc5c3a72ef1e0e", "392d804a0fe5ad68f721857cf5cbe01963f3d8a4", "5d37dbcead67858f972056555745041250bb1b6a", "fc3272302461b74217662085a8a05a5e500dbf05", "1feb21ac7d86ed235f9989aea1131c6008061de1", "2c5b8766a1dae62b86ba38013253ab8673f6ec44", "6f4cc8b9561b5afb401bf07834e746d6ac28d141", "8f5dce9df50316a41578e0689b41b8ab69a0ff09", "55ce0391cae3d7663a26bc6bb1a1e5618b8b9475", "8ce69d30bca576f7230782a15df55b231ecd6cc3", "037f2f2a9b6d3438d87b2ae3f38f53be8c47f44b", "3cd9a873ba0f7fb754feb2cd7da567a31290b6e5", "11e7e1001f96fd537f1f24cf211cd8c170773970", "9233979387c07fb9dd32b91612804c33ce53b8b8", "20548990990c447ab54a3ecba82af2b5443a01d6", "e6bc99c79d2187a7a5f99dfc5e92b623f6bfc050", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "180477d8f809745da689372715c225ef5d3c098d", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "66713fbcb8d5e48a9eb6425bd7fdbb53751e60b1", "51f53b98ccc60bf255ab653a11b1573ed3c5d815", "e4c158621b855d30292d36558d19f90af28d7978", "040678daf6a49a88345ee0c680fccfd134f24d4b", "d2be22e50067b6ac13ccbafb3f7ece2a96988c9d", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "7531ab50766c71d05e7d8bd5039bab1e82623e97", "5df0ced15b18103c9517805c4c0447e2af72e217", "c83dd0b8ecbae77b799d61c8c40889d1d0b555a6", "3a387279bb9dd664dd29d65cda95c3fddad823e8", "0e7148699994155cf8afae0ed943812fbb4f4b7f", "3c513e3f47c87da19a12cc65fb809eab671bf7ee", "19ae1215523450e717acb3d510bced0b88499233", "924276d95d1bdaf087beb0ccf699443b9bf855ec", "46341278ed333089ba99bcc45eb87f61c32f63f5" ], "paperAbstract": "Bitmap compression has been studied extensively in the database area and many efficient compression schemes were proposed, e.g., BBC, WAH, EWAH, and Roaring. Inverted list compression is also a well-studied topic in the information retrieval community and many inverted list compression algorithms were developed as well, e.g., VB, PforDelta, GroupVB, Simple8b, and SIMDPforDelta. We observe that they essentially solve the same problem, i.e., how to store a collection of sorted integers with as few as possible bits and support query processing as fast as possible. Due to historical reasons, bitmap compression and inverted list compression were developed as two separated lines of research in the database area and information retrieval area. Thus, a natural question is: Which one is better between bitmap compression and inverted list compression?\n To answer the question, we present the first comprehensive experimental study to compare a series of 9 bitmap compression methods and 12 inverted list compression methods. We compare these 21 algorithms on synthetic datasets with different distributions (uniform, zipf, and markov) as well as 8 real-life datasets in terms of the space overhead, decompression time, intersection time, and union time. Based on the results, we provide many lessons and guidelines that can be used for practitioners to decide which technique to adopt in future systems and also for researchers to develop new algorithms.", "pdfUrls": [ "http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf", "http://doi.acm.org/10.1145/3035918.3064007" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f2a463eece4960dc62da2df58a224ae85e1473b", "sources": [ "DBLP" ], "title": "An Experimental Study of Bitmap Compression vs. Inverted List Compression", "venue": "SIGMOD Conference", "year": 2017 }, "0f5239630861698b0b206eaf1adda737012aa636": { "authors": [ { "ids": [ "29730207" ], "name": "Thanh-Chung Dao" }, { "ids": [ "1783820" ], "name": "Shigeru Chiba" } ], "doi": "10.1007/978-3-319-64203-1_32", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_32", "entities": [ "Apache Hadoop", "Computation", "Experiment", "In-memory database", "MapReduce", "Memcached", "Message Passing Interface", "Software deployment", "Supercomputer" ], "id": "0f5239630861698b0b206eaf1adda737012aa636", "inCitations": [], "journalName": "", "journalPages": "442-454", "journalVolume": "", "outCitations": [ "69884f09be947c43e1029bb3ddc95db5edc2a03d", "a8b429845ac951b0fe6cdb071ae862c7d305e36e", "0558c94a094158ecd64f0d5014d3d9668054fb97", "31e083d62c82309fbb7f80f2ff5ee198a7d53f8c", "2f47c7304aa5008911db59bef5c0fd3d3e212088", "914f287d6e83ac8e525d4c0e643cee6a1dce6fb4", "70bd563d00fcb402eb7d9f251bea544ecb08f213", "8ae9e0957f05ec79802f678abeaca97da18853b6", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "0276440f721b17ff77165f2b1ed24e029b9a2432", "f6a23e41d4805e7ffa63a7ee01148c40a4b59496" ], "paperAbstract": "This paper reports our experiments to compare various deployment strategies of memcached-like in-memory storage for Hadoop on supercomputers, where each node often does not have a local disk but shares a slow central disk. For the experiments, we developed our own memcached-like file system, named SEMem, for Hadoop. Since SEMem was designed for supercomputers, it uses MPI for communication. SEMem is configurable to adopt various deployment strategies and our experiments revealed that a good deployment strategy was allocating some nodes that work only for in-memory storage but do not directly perform map-reduce computation.", "pdfUrls": [ "https://www.csg.ci.i.u-tokyo.ac.jp/paper/chung-europar17-slide.pdf", "https://doi.org/10.1007/978-3-319-64203-1_32", "https://www.csg.ci.i.u-tokyo.ac.jp/paper/chung-europar17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c507/5f8d4cbce8c0e6cf8b93270bb7c5d4dc861d.pdf", "s2Url": "https://semanticscholar.org/paper/0f5239630861698b0b206eaf1adda737012aa636", "sources": [ "DBLP" ], "title": "SEMem: Deployment of MPI-Based In-Memory Storage for Hadoop on Supercomputers", "venue": "Euro-Par", "year": 2017 }, "0f645729155a85c2cde050c349c60792185c546f": { "authors": [ { "ids": [ "2605768" ], "name": "Edith Cohen" } ], "doi": "10.1145/3097983.3098020", "doiUrl": "https://doi.org/10.1145/3097983.3098020", "entities": [ "Approximation algorithm", "Approximation error", "Computation", "Concave function", "Distributed computing", "HyperLogLog", "Streaming media" ], "id": "0f645729155a85c2cde050c349c60792185c546f", "inCitations": [ "0f96356dd6b47683cc5dce18e7c1842f1a33af9d" ], "journalName": "", "journalPages": "105-114", "journalVolume": "", "outCitations": [ "1f55cc9ae9ea3d2ebb23c4427175f01829a4105e", "06f950d897ba590e20392035770d54ff6dcafc2b", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "3256ea37a85284116976edb6d8b578186f76720c", "e50a316f97c9a405aa000d883a633bd5707f1a34", "635be627d2d01996267b30d3263b309e409b36c6", "114bed8564dfe17089007744e8891a8d7bd47ceb", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "02acc390a765e098d3448451d8e24d60f3972722", "cf53d0fa1150a213d0e85eca126571c71765f771", "b36c153be410c0d937d7583de557c0375506d15a", "bd3102fa60685380af4c0d9da94237978d40b5bc", "269a701af1aba00837838030c9a862be26acd1da", "42b1400e01b976b8e9bfa7b772ec41338dc32ed3", "d495a2203b9cb84366f0b387d6ed4b09ac2d04ea", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "e18d00748d3c6c1f07415a6f4b751bee1d8091c1", "112b7fd113ec98e0aae7e8f8364eaaa1893231c1", "777df8324470704f0a2907e7db22e7b14218550f", "05256a303d9c6debd850da2c1c0dbab3a679ba90" ], "paperAbstract": "One of the most common statistics computed over data elements is the number of distinct keys. A thread of research pioneered by Flajolet and Martin three decades ago culminated in the design of optimal approximate counting sketches, which have size that is double logarithmic in the number of distinct keys and provide estimates with a small relative error. Moreover, the sketches are composable, and thus suitable for streamed, parallel, or distributed computation.\n We consider here all statistics of the frequency distribution of keys, where a contribution of a key to the aggregate is concave and grows (sub)linearly with its frequency. These fundamental aggregations are very common in text, graphs, and logs analysis and include logarithms, low frequency moments, and cap statistics.\n We design composable sketches of double-logarithmic size for all concave sublinear statistics. Our design combines theoretical optimality and practical simplicity. In a nutshell, we specify tailored mapping functions of data elements to output elements so that our target statistics on the data elements is approximated by the (max-) distinct statistics of the output elements, which can be approximated using off-the-shelf sketches. Our key insight is relating these target statistics to the complement Laplace transform of the input frequencies.", "pdfUrls": [ "http://arxiv.org/pdf/1607.06517v2.pdf", "https://arxiv.org/pdf/1607.06517v3.pdf", "https://arxiv.org/pdf/1607.06517v5.pdf", "https://arxiv.org/pdf/1607.06517v1.pdf", "https://arxiv.org/pdf/1607.06517v4.pdf", "http://doi.acm.org/10.1145/3097983.3098020", "https://arxiv.org/pdf/1607.06517v2.pdf", "http://arxiv.org/pdf/1607.06517v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f645729155a85c2cde050c349c60792185c546f", "sources": [ "DBLP" ], "title": "HyperLogLog Hyperextended: Sketches for Concave Sublinear Frequency Statistics", "venue": "KDD", "year": 2017 }, "0f6c10fe990f43e7b0efd29e8724ee6fd0b4aede": { "authors": [ { "ids": [ "2201690" ], "name": "Hamidreza Jahanjou" }, { "ids": [ "1779560" ], "name": "Erez Kantor" }, { "ids": [ "1696669" ], "name": "Rajmohan Rajaraman" } ], "doi": "10.1145/3087556.3087567", "doiUrl": "https://doi.org/10.1145/3087556.3087567", "entities": [ "Algorithm", "Approximation", "Approximation algorithm", "Asymptotically optimal algorithm", "Computation", "Data center", "Heuristic", "Linear programming", "Network topology", "Order of approximation", "Polynomial", "Scheduling (computing)", "Stock and flow", "Time complexity" ], "id": "0f6c10fe990f43e7b0efd29e8724ee6fd0b4aede", "inCitations": [ "ab889b0132ad40e44e8dfe338390a65324780a3b" ], "journalName": "", "journalPages": "45-54", "journalVolume": "", "outCitations": [ "bd87cc38abc992be2d154a522729cddfa90dc4fc", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "d7622866a173517e9a624e78b52ce637a431e3cb", "00ddc85d502aa4bdc45a3b8b9099fad75938b50a", "5bc734ab02ec4567471247a2994d3500d5380dce", "0541d5338adc48276b3b8cd3a141d799e2d40150", "b58a1ceb4a8fe36aed5734e4114bad6b5fc4bb7c", "1cafaac11664e48bd121695ac1be06b0930d00a5", "231ba17921ebd80e95771e28dfb5082e169d5a53", "c91946fcdd4e8cb8ff255213f978963503b07411", "1904d6df4434df0abb304a24a582fdd662f8e7be", "485ef908ae922559c530edb75ba71ea26ffba2ad", "332f77fd05703c1607e3b57884ad31fb1fad0104", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "01acb4d6bfc7b289a7a94ee0835eca83d1c2744c", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "4c4ba80e57e1969d5409a0224af338cde3018ba3", "2e0057911766d411b7a342c8bae2d6e3d29c47cd", "cdc0120a097a5d1fe56b54f822fe7c9510a655d1" ], "paperAbstract": "Many modern datacenter applications involve large-scale computations composed of multiple data flows that need to be completed over a shared set of distributed resources. Such a computation completes when all of its flows complete. A useful abstraction for modeling such scenarios is a coflow, which is a collection of flows (e.g., tasks, packets, data transmissions) that all share the same performance goal. In this paper, we present the first approximation algorithms for scheduling coflows over general network topologies with the objective of minimizing total weighted completion time. We consider two different models for coflows based on the nature of individual flows: circuits, and packets. We design constant-factor polynomial-time approximation algorithms for scheduling packet-based coflows with or without given flow paths, and circuit-based coflows with given flow paths. Furthermore, we give an O(log n/log log n)-approximation polynomial time algorithm for scheduling circuit-based coflows without given flow paths (here n is the number of network edges).\n We obtain our results by developing a general framework for coflow schedules, based on interval-indexed linear programs, which may extend to other coflow models and objective functions and may also yield improved approximation bounds for specific network scenarios. We also present an experimental evaluation of our approach for circuit-based coflows that show a performance improvement of at least %22 on average over competing heuristics.", "pdfUrls": [ "http://arxiv.org/abs/1606.06183", "https://arxiv.org/pdf/1606.06183v3.pdf", "http://arxiv.org/pdf/1606.06183v1.pdf", "https://arxiv.org/pdf/1606.06183v4.pdf", "http://arxiv.org/pdf/1606.06183v2.pdf", "https://arxiv.org/pdf/1606.06183v1.pdf", "http://doi.acm.org/10.1145/3087556.3087567", "https://arxiv.org/pdf/1606.06183v2.pdf", "https://arxiv.org/pdf/1606.06183v5.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f6c10fe990f43e7b0efd29e8724ee6fd0b4aede", "sources": [ "DBLP" ], "title": "Asymptotically Optimal Approximation Algorithms for Coflow Scheduling", "venue": "SPAA", "year": 2017 }, "0f6edbf18ee7630b351647739d37803be006ec14": { "authors": [ { "ids": [ "20165375" ], "name": "Pulkit A. Misra" }, { "ids": [ "2495308" ], "name": "I\u00f1igo Goiri" }, { "ids": [ "21188143" ], "name": "Jason Kace" }, { "ids": [ "2118138" ], "name": "Ricardo Bianchini" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Batch processing", "Data center", "Durability (database systems)", "Interference (communication)", "Requirement", "Scalability", "Server (computing)", "Simulation", "Software deployment", "Usability" ], "id": "0f6edbf18ee7630b351647739d37803be006ec14", "inCitations": [], "journalName": "", "journalPages": "799-811", "journalVolume": "", "outCitations": [ "01b54e85d5b02ad6af205106739a409a105fee93", "3a043714354fe498752b45e4cf429dbae0fb2558", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "2da760f90c3d2bf6598becdde9063093f488548c", "08632fe2b934ed15d3499e7321282c81adc2c390", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "90f437bc382915c072cc0f2e7145336630fdadb8", "396514fb219879a4a18762cddfae2a6a607f439f", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "4dfdd7cd8abbd68675ea19c5902e5a7d14709799", "3d3f22ee1797b0e086da07e00d0f59b1aca08bf3", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "4af63ed343df388b6353b6fc77c7137d27822bf4", "8969f883979ac45fe24cecde39c15ddc4bd756d3", "5044a5212e14924e5d39d4e63679bd3a8b550ebf", "638c917d981915bc7a00bb0941cdd38111df51de" ], "paperAbstract": "Datacenters can use distributed file systems to store data for batch processing on the same servers that run latencycritical services. Taking advantage of this storage capacity involves minimizing interference with the co-located services, while implementing user-friendly, efficient, and scalable file system access. Unfortunately, current systems fail one or more of these requirements, and must be manually partitioned across independent subclusters. Thus, in this paper, we introduce techniques for automatically and transparently scaling such file systems to entire resource-harvesting datacenters. We create a layer of software in front of the existing metadata managers, assign servers to subclusters to minimize interference and data movement, and smartly migrate data across subclusters in the background. We implement our techniques in HDFS, and evaluate them using simulation of 10 production datacenters and a real 4k-server deployment. Our results show that our techniques produce high file access performance, and high data durability and availability, while migrating a limited amount of data. We recently deployed our system onto 30k servers in Bing\u2019s datacenters, and discuss lessons from this deployment.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-misra.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/misra", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/paper-2.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_misra.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/edcb/a29fdebb76fe89ac7e5ba7c9af36f9b82890.pdf", "s2Url": "https://semanticscholar.org/paper/0f6edbf18ee7630b351647739d37803be006ec14", "sources": [ "DBLP" ], "title": "Scaling Distributed File Systems in Resource-Harvesting Datacenters", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "0f7f5679615effcc4c9b98cf2deb17c30744a6d7": { "authors": [ { "ids": [ "10430740" ], "name": "Leonardo Filipe Rodrigues Ribeiro" }, { "ids": [ "10767922" ], "name": "Pedro H. P. Saverese" }, { "ids": [ "2041244" ], "name": "Daniel R. Figueiredo" } ], "doi": "10.1145/3097983.3098061", "doiUrl": "https://doi.org/10.1145/3097983.3098061", "entities": [ "Experiment", "Numerical analysis", "Numerical linear algebra" ], "id": "0f7f5679615effcc4c9b98cf2deb17c30744a6d7", "inCitations": [ "25166ab3d394c06e75e6f17652932f65bc149a35", "df6bd4a73f33150aedf18db417753e1b0ac495aa", "fb76adeff0309ff4c8de4d0b413a8e3a637774d0", "3a241cdba54dfe035f005080964060bdd63d1500", "de76c69bcc81493b7e4008a0a8527fad895bf713", "cd9b25a3223300aa4c70050b19f6052e09c0be73", "b491b54fefb01e23c57c9470b702e4e5da98c704", "2d93b1396bc816b3681ae17961b8b31894555b3f", "93dd2d3ed46ef9e5edb103c439ec6e06da383109", "19db86e8598dacd340bf44a272458e4ecf3be467", "790d34fe5bbb768ca5d17bf7188fefd95783a703", "3915a22bdcd4f371e9a70fa4742fb7b5237f5388", "f6c565315ca36120ae656c27f46b18874e0f3206", "ffeacdfd523df26d5aa7b3c7317db76f19871222", "eed1d7303988334fe1afd581b9f37a1d2389cd57", "4d086243ac132523aa287100f5490739cc97c6f9", "6236d317f1242cab4d42c8d9f11746dae2f0d92c", "50b0425a2008bc3a11eab5bb9ddb59464a7a9f80", "e663a64b3c6fb232703dd4490fb203f1df3ef365", "5854a866716cedf321e13a8158d87ad55ed3bbb2", "7410c07c7e227ff27d720ad48394c1e198f1a0e4", "958ae18d84737749d30cb51c8f6ad764773d9f19", "ad7c7a35f42493a268ffd22e53b17b1e1f4dd15d", "873bb1d992e55afca552e27d9c58afd329220c7f", "fb1c5ad17b97fa51e8c77fb68ac5d92ca8cbfc8f" ], "journalName": "", "journalPages": "385-394", "journalVolume": "", "outCitations": [ "1a37f07606d60df365d74752857e8ce909f700b3", "cece7c3047859db7cae3474b665ddbf39ab0073f", "e02f59cf876cb40233573ff78a1609f969d301cc", "5420175febf6d9d94e6d4967cf06a6a74e5b3f12", "c0c807b59e6497fe07de537d9eb11fdbd442ecf6", "8c81ddd9c1a6f0d44851c0d2f7f6c4d28ba38382", "49866e773a2776ad4f09904d47e8da1a390d893d", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "7e1874986cf6433fabf96fff93ef42b60bdc49f8", "199369d8eaff23e00c106ef2ddc4181696600c1f", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "5f4c05ba08fac9cde40235ebd4eb9abc6ed2d712", "4afa6c2eb552ceef0e396fbfe449932492873034", "1871ea4cf23441d0297c99d9115f664a6ba0efda", "265ca585e5126e8fed7bec0a2bc71809c8447a1b", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "2bf3fb9d50a39ad727f248ffd4b14f07a2d48e4a", "30a569fcf7a455035abe9ad6f78d5858171e2695", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "43404ed61c4d67aa52a77f8e5f5c722c66af0d86", "907adadbf84399075b09086d97987eb4b1a38ef6", "05a20cde15e172fc82f32774dd0cf4fe5827cad2", "46f0d44599188dadf831a3c0e486b2f0391d0dec" ], "paperAbstract": "Structural identity is a concept of symmetry in which network nodes are identified according to the network structure and their relationship to other nodes. Structural identity has been studied in theory and practice over the past decades, but only recently has it been addressed with representational learning techniques. This work presents struc2vec, a novel and flexible framework for learning latent representations for the structural identity of nodes. struc2vec uses a hierarchy to measure node similarity at different scales, and constructs a multilayer graph to encode structural similarities and generate structural context for nodes. Numerical experiments indicate that state-of-the-art techniques for learning node representations fail in capturing stronger notions of structural identity, while struc2vec exhibits much superior performance in this task, as it overcomes limitations of prior approaches. As a consequence, numerical experiments indicate that struc2vec improves performance on classification tasks that depend more on structural identity.", "pdfUrls": [ "http://arxiv.org/abs/1704.03165", "https://arxiv.org/pdf/1704.03165v3.pdf", "http://www.land.ufrj.br/~leo/struc2vec-slides.pdf", "https://arxiv.org/pdf/1704.03165v1.pdf", "https://arxiv.org/pdf/1704.03165v2.pdf", "http://doi.acm.org/10.1145/3097983.3098061", "http://shichuan.org/hin/topic/Embedding/2017.%20KDD%20struc2vec%20Learning%20Node%20Representations%20from%20Structural%20Identity.pdf", "https://pdfs.semanticscholar.org/a734/e55941d0970c2f8a9f8eeba195c4ce1a3e90.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0f7f5679615effcc4c9b98cf2deb17c30744a6d7", "sources": [ "DBLP" ], "title": "struc2vec: Learning Node Representations from Structural Identity", "venue": "KDD", "year": 2017 }, "0fd85ea4f3701f6baebffcaab39b858b7142b0dc": { "authors": [ { "ids": [ "2840235" ], "name": "Matthew Curtis-Maury" }, { "ids": [ "2042885" ], "name": "Ram Kesavan" }, { "ids": [ "32638647" ], "name": "Mrinal K. Bhattacharjee" } ], "doi": "10.1109/ICPP.2017.35", "doiUrl": "https://doi.org/10.1109/ICPP.2017.35", "entities": [ "Computer data storage", "Core Storage", "Dirty data", "Manycore processor", "NetApp filer", "Operating system", "Parallel computing", "Persistence (computer science)", "Requirement", "Scalability", "Thread (computing)", "Throughput" ], "id": "0fd85ea4f3701f6baebffcaab39b858b7142b0dc", "inCitations": [ "8ee82c0bd80e86c55b56414a602d53164d4fb5c0", "ad897b9261a39cdae6e8b0fdcd755e6001e004bc" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "261-270", "journalVolume": "", "outCitations": [ "12a0046a1197ae63c3d616c74e367dc583cef196", "7ff303e7c450aee82b6fff5cc64be54e5604da01", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "7ef137faca4da278382ccdcb90da8fcd19faca36", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "152a527b4dfb4ef3c43a24d6590f15d9c4507b29", "045729ec838ecc50be166fe4511506ac4a08226d", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "4ef1fcc896885d383442b2aff92c2109cd0da9be", "120c8504b4290920309165d48bb032f2c724a161", "c9ef82a4ad0b1b33296cea86fb2ec7558cf798fb", "cee31c740be97c45ef02c22b69ae19cc8897a1f2", "7062268b78dff4a8819fe3f1e89c6b5344f715a5", "4eebd5fa091ab716224c6129f81a8d83320ac065", "1645af0ebea3336d50e7140000c5adbc9bc24833", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "f49c01b87b378ce26a6c6c16e79ee6ad17672877", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "20a108587321823ca9cdd93ac84fc316a0400630", "158ebe313a72857c5534a313f3ec0e413593b732", "d52dc593148fe2399f9274e08e641934a965de6f", "088e3e939ad234b6fdd0e321290fb26937dc2553" ], "paperAbstract": "Enterprise storage systems must scale to increasing core counts to meet stringent performance requirements. Both the NetApp® Data ONTAP® storage operating system and its WAFL® file system have been incrementally parallelized over the years, but some components remain single-threaded. The WAFL write allocator, which is responsible for assigning blocks on persistent storage to dirty data in a way that maximizes write throughput to the storage media, is single-threaded and has become a major scalability bottleneck. This paper presents a new write allocation architecture, White Alligator, for the WAFL file system that scales performance on many cores. We also place the new architecture in the context of the historical parallelization of WAFL and discuss the architectural decisions that have facilitated this parallelism. The resulting system demonstrates increased scalability that results in throughput gains of up to 274% on a many-core storage system.", "pdfUrls": [ "https://atg.netapp.com/wp-content/uploads/2017/08/sw-WAFL.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0fd85ea4f3701f6baebffcaab39b858b7142b0dc", "sources": [ "DBLP" ], "title": "Scalable Write Allocation in the WAFL File System", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "0fe510a34db5421b4cd762cf207138c20f2af1cf": { "authors": [ { "ids": [ "2087483" ], "name": "Alexey Ilyushkin" }, { "ids": [ "2415854" ], "name": "Ahmed Ali-Eldin" }, { "ids": [ "2527226" ], "name": "Nikolas Roman Herbst" }, { "ids": [ "1805880" ], "name": "Alessandro Vittorio Papadopoulos" }, { "ids": [ "2038661" ], "name": "Bogdan Ghit" }, { "ids": [ "1776848" ], "name": "Dick H. J. Epema" }, { "ids": [ "1760940" ], "name": "Alexandru Iosup" } ], "doi": "10.1145/3030207.3030214", "doiUrl": "https://doi.org/10.1145/3030207.3030214", "entities": [ "Autoscaling", "Cloud computing", "Elasticity (cloud computing)", "Performance Evaluation", "Provisioning", "Quality of service", "Scheduling (computing)", "Software as a service" ], "id": "0fe510a34db5421b4cd762cf207138c20f2af1cf", "inCitations": [ "83175ee2e7e6a0f609722d302b6b9ca3fcc8ac16", "bcc389897fcadbaa222b664fe44bfcf87e01576d", "61dfe13f073b1b0ae7c5d202d8346b0cf565cf87", "61a1afa693442d829072114910b1775a8e4ceefa", "685ec2c57f9076afc8832434e0b3dfd297f49715", "bdbd57ea557992f7f054b37b7d6af7a93d6b1a9f", "32908e3612bc167ff3bcb36388c2772965858b53", "752fc519fe07d4c33d235ec2436516861e0c0be8", "a880c4cf443affd98ba3dd0a52027c26b1a77983", "eeaaf554d145b0d3086b9aaf4139291b1565ed4d", "55fe03e02986586cfaa79962501ee8a421e4da00" ], "journalName": "", "journalPages": "75-86", "journalVolume": "", "outCitations": [ "55b5f88ba09e4f2f53aec5418835f2a6498cd289", "49b9edf9b222d858d7cfb92c820dbd63a4f5df04", "813e836347456fb2d61c8feb6a71b0a4165c5c6d", "0600ce64e1283df3e75da6447320d3ca5ed16a79", "108437fd06da184ef8bd79bb676c5db8fa8a8401", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "0a679469a275d81b1851d6293476cffc3855a76f", "a12f10e9eee218f3b50c3a93f52d33f945a369e5", "0b29121949897c275b76ea25a1f50b4868a152f0", "2c233a56e159792b5f25e4ff73391696675d00b6", "c808e997f6c32237ce0cb5a0f0eacf72c46e4611", "e51fdcace8037f2092333bb4c3da283b88de5571", "3000e77ed7282d9fb27216f3e862a3769119d89e", "764a381d0162eb9efd1083c2fb646718f881d6a6", "bc46cc852b4a240a81a10e3b00f20d1e06139915", "02dbaa17e5fafd081add4841cb78a549ab33531f", "94dc5570d2250eb624802664f08d0f5db2695d0d", "e31d26f25c60f81696ddefda81ae18ba95b16168", "64104ca3fe4552ce424c24ff28cd8678ec19b519", "4c3860437575aea5e479dc323a7edb1c2291ecee", "7251ed0fbf99b44712d438e84dd94a50e4407d3c", "1324f1d5b20f08cac775f10089a788767c56d5a9", "8b872fc1eae681f073559defc55ef42548d2d8f9", "e4a2becb7d0c58dedfcad218d3764f32b640a78f", "238c8481ff5b5cc52a272848d401a0219a2adbcc", "c1c71d0b6c0f2705e0e407f6823c928f83d67f73", "83ce161c60a85f7124ea2227fc3891bb626f40bd", "1db3e9e9e716d7c4f5b46a2666f6ebcf26c7e9de", "c49bbaf1a8a2f4ecefe496591197707aecbd12f8", "1cff7e796a4bf0c22765acfefc15b17091367243", "942ecc61675d81724823b893df0f1c9418b52d90", "7539420de7235248cc167d43afb4dcabaf52310d", "6168919f450a8ed906051f2562abbfe51aa4d97d", "6ab41c61ad176700e5ffd70c20be4e7f4b676b03", "5ec6157896c053600625da7a5da6f0451ed9c12e", "d60afcfd02d0dcf2d4c31ea6fa73e42076f94b55" ], "paperAbstract": "Simplifying the task of resource management and scheduling for customers, while still delivering complex Quality-of-Service (QoS), is key to cloud computing. Many autoscaling policies have been proposed in the past decade to decide on behalf of cloud customers when and how to provision resources to a cloud application utilizing cloud elasticity features. However, in prior work, when a new policy is proposed, it is seldom compared to the state-of-the-art, and is often compared only to static provisioning using a predefined QoS target. This reduces the ability of cloud customers and of cloud operators to choose and deploy an autoscaling policy. In our work, we conduct an experimental performance evaluation of autoscaling policies, using as application model workflows, a commonly used formalism for automating resource management for applications with well-defined yet complex structure. We present a detailed comparative study of general state-of-the-art autoscaling policies, along with two new workflow-specific policies. To understand the performance differences between the 7 policies, we conduct various forms of pairwise and group comparisons. We report both individual and aggregated metrics. Our results highlight the trade-offs between the suggested policies, and thus enable a better understanding of the current state-of-the-art.", "pdfUrls": [ "http://www.ds.ewi.tudelft.nl/fileadmin/pds/homepages/ilyushkin/papers/autoscaling-icpe17.pdf", "https://pure.tudelft.nl/portal/files/38016400/ICPE2017_ASIlyushkin.pdf", "http://pure.tudelft.nl/ws/files/38016400/ICPE2017_ASIlyushkin.pdf", "http://www.ds.ewi.tudelft.nl/~iosup/Articles/SPEC-ComparingAutoScalers17ICPE_bpa-nominee.pdf", "http://doi.acm.org/10.1145/3030207.3030214", "https://research.spec.org/icpe_proceedings/2017/proceedings/p75.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0fe510a34db5421b4cd762cf207138c20f2af1cf", "sources": [ "DBLP" ], "title": "An Experimental Performance Evaluation of Autoscaling Policies for Complex Workflows", "venue": "ICPE", "year": 2017 }, "0ff1d5159bcc590b7445af79342f98d785553119": { "authors": [ { "ids": [ "1750155" ], "name": "Susanne Albers" } ], "doi": "10.1145/3087556.3087560", "doiUrl": "https://doi.org/10.1145/3087556.3087560", "entities": [ "Algorithm", "Approximation algorithm", "Central processing unit", "Combinatorial optimization", "Computation", "Data center", "Low-power broadcasting", "Minimum-cost flow problem", "Multi-core processor", "Multiprocessing", "Optimization problem", "Program optimization", "Quantum state", "Rounding", "Server (computing)", "Sleep mode", "State transition table", "Time complexity" ], "id": "0ff1d5159bcc590b7445af79342f98d785553119", "inCitations": [], "journalName": "", "journalPages": "35-44", "journalVolume": "", "outCitations": [ "0f44833eb9047158221e7b3128cde1347b58ccd6", "1283a56a9b0c937b687d2018d358e3f0f42d0119", "1739b4ae505fdad0150f4f230a379b5c33b4aa49", "1ec1ea120bc50c39c39af6cba3adbbefba5b6fac", "3e12b4955d54a46cd52aed9123ba11ce9853aae7", "f5fed3c82ae151f6e04af84f25c38c31a21e39f3", "aeead2f05880f714c03e836dec0795e730cb6ebb", "213056a9ebe2e4d0b7dbfd8a3ab8ac50e6a10467", "001d53a455047ea13a2979f9ea59a4d29a09546c", "1c0965cec620b0e1de103a9c74694296bc20c609", "b102990fb59de006182ff090d837ddabfc2b4584", "33da06260d2c420f568793aab04d4677d220e791", "33ceeeb891405bda29b49b2613a1977843411ba1", "11fc07793bc4d44b01049bee0c631a1df04f6e24", "381c7853690a0fee6f00d2608a7779737f1365f9", "15417b01ca082c09ec07163cadf92c479c45dd67" ], "paperAbstract": "We formulate and study an optimization problem that arises in the energy management of data centers and, more generally, multiprocessor environments. Data centers host a large number of heterogeneous servers. Each server has an active state and several standby/sleep states with individual power consumption rates. The demand for computing capacity varies over time. Idle servers may be transitioned to low-power modes so as to rightsize the pool of active servers. The goal is to find a state transition schedule for the servers that minimizes the total energy consumed. On a small scale the same problem arises in multi-core architectures with heterogeneous processors on a chip. One has to determine active and idle periods for the cores so as to guarantee a certain service and minimize the consumed energy.\n For this power/capacity management problem, we develop two main results. We use the terminology of the data center setting. First, we investigate the scenario that each server has two states, i.e. an active state and a sleep state. We show that an optimal solution, minimizing energy consumption, can be computed in polynomial time by a combinatorial algorithm. The algorithm resorts to a single-commodity min-cost flow computation. Second, we study the general scenario that each server has an active state and multiple standby/sleep states. We devise a \\tau-approximation algorithm that relies on a two-commodity min-cost flow computation. Here \\tau is the number of different server types. A data center has a large collection of machines but only a relatively small number of different server architectures. Moreover, in the optimization one can assign servers with comparable energy consumption to the same class. Technically, both of our algorithms involve non-trivial flow modification procedures. In particular, given a fractional two-commodity flow, our algorithm executes advanced rounding and flow packing routines.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087560" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ff1d5159bcc590b7445af79342f98d785553119", "sources": [ "DBLP" ], "title": "On Energy Conservation in Data Centers", "venue": "SPAA", "year": 2017 }, "0ff3ac10e29f2d3aa37bb57f461f5cb0902d4801": { "authors": [ { "ids": [ "14445515" ], "name": "Srdjan Matic" }, { "ids": [ "2130458" ], "name": "Carmela Troncoso" }, { "ids": [ "40008021" ], "name": "Juan Caballero" } ], "doi": "", "doiUrl": "", "entities": [ "Cluster analysis", "Information privacy", "Internet", "Proxy server", "Relay", "Router (computing)", "Software deployment", "Tor Messenger" ], "id": "0ff3ac10e29f2d3aa37bb57f461f5cb0902d4801", "inCitations": [ "ce13fee9714c236989e9b04cb3788cd7d74d0618" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "00a9446982911cbd96a127f70976d39ecaaaf306", "271aa89b46e1a6fe3722c6fbf6ec268a7ce7973e", "49cd1030cd8f98b6ec7545750c78d580ca80a43d", "1708eba3482a2bc755f405ef9446914f82a321ad", "281789e224b09970cd25ee988a6f6f898c629bb8", "4c7a5b7d4067721079789156f4fa692934885334", "208448ed57cb0ff70866cb3828b06610c3ff25fd", "0be8170df4c1ea1cf8312ae5ed326665224d5d9c", "2dbcc7077a01981679007eceac6c6659a1c18200", "791382f7dc39154ec39ea249493d5f653b739df4", "5d121afe922953dd88407d7cc49bdfb4b1f42be5", "4ed253eb8200983537d80d9b158a9559e9c3e09f", "11a4ab490c76018c0c01b39d03db7e14f191d086", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "1e7ddb41095be915ec28bc85d14305df3d02a445", "04c2551de11d386ce6416c26f12bd9b428ac9d7e", "2e6c55bc293681e65ba6741b28f99002809734be", "9e414908a7d478d2cc6406810df8eee7eef5cf24", "1ff9b151019648eaea901ee3c2b795e921358b21", "14b0aee53705c9eda1bff2aebfc2c04eee9a3ee2", "0028bd64693541fe1433143e6e307697878c24a0", "6de027919d09e7b4445367ed7e162bccca8df06f" ], "paperAbstract": "Bridges are onion routers in the Tor Network whose IP addresses are not public. So far, no global security analysis of Tor bridges has been performed. Leveraging public data sources, and two known Tor issues, we perform the first systematic study on the security of the Tor bridges infrastructure. Our study covers both the public infrastructure available to all Tor users, and the previously unreported private infrastructure, comprising private nodes for the exclusive use of those who know their existence. Our analysis of the public infrastructure is twofold. First, we examine the security implications of the public data in the CollecTor service, identifying several pieces of data that may be detrimental for the security of bridges. Then, we measure security relevant properties of public bridges. Our results show that the 55% of public bridges that carry clients are vulnerable to aggressive blocking; that 90% of bridge clients use default bridges that are trivial to identify; that the concurrent deployment of Pluggable Transports in bridges reduces the security of the most secure transports; and that running non-Tor services in the same host as a bridge may harm its anonymity. To study the private infrastructure, we use an approach to discover 694 private bridges on the Internet and a novel technique to track bridges across IP changes. We are first to measure the size of the private bridge population (35% discovered bridges are private) and to report existence of infrastructures that use private proxies to forward traffic to backend bridges or relays. We use a novel clustering approach to analyze the different infrastructures using proxies and bridges, examining its hosting and security properties. We provide an extensive discussion on the security implications of our findings.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/dissecting-tor-bridges-security-evaluation-their-private-and-public-infrastructures/", "https://software.imdea.org/~carmela.troncoso/papers/Matic_NDSS17.pdf", "https://www.freehaven.net/anonbib/cache/dissecting-ndss2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6b2b/84b35fb38e6c5b344254e8316e562a578953.pdf", "s2Url": "https://semanticscholar.org/paper/0ff3ac10e29f2d3aa37bb57f461f5cb0902d4801", "sources": [ "DBLP" ], "title": "Dissecting Tor Bridges: A Security Evaluation of their Private and Public Infrastructures", "venue": "NDSS", "year": 2017 }, "0ff8ae30a0815431d86df54da62d6305f4e7e22b": { "authors": [ { "ids": [ "19272100" ], "name": "Jonas Traub" }, { "ids": [ "2629601" ], "name": "Sebastian Bre\u00df" }, { "ids": [ "1731210" ], "name": "Tilmann Rabl" }, { "ids": [ "1680579" ], "name": "Asterios Katsifodimos" }, { "ids": [ "1733290" ], "name": "Volker Markl" } ], "doi": "10.1145/3127479.3131621", "doiUrl": "https://doi.org/10.1145/3127479.3131621", "entities": [ "Adaptive sampling", "Experiment", "Internet of things", "Maxima and minima", "Sampling (signal processing)", "Sensor", "Smart meter" ], "id": "0ff8ae30a0815431d86df54da62d6305f4e7e22b", "inCitations": [ "b21fb765a522e5abe3cb766d910594ff5b670813" ], "journalName": "", "journalPages": "586-597", "journalVolume": "", "outCitations": [ "745890490eda5cd56c303eb72aa001d78c08a82f", "d535735a860b7977d3c8c01633b213bb2ecff05d", "1160e7bf54e72fa10c769c15a9d319d4798b7b1b", "4f17119eaa541f64f6ae9be2a0b6e30de70fe421", "6edf1c691ad47ae189e325fcc4f6291d4e369269", "a94b88377a7034642cd54cb907f2be1a120165c1", "6d5c0db32150a470d2a3aeb75b77cd992faa4e0c", "1f094495d9be422e5fb14ea8a0122c30e264f131", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "5ccc79c16d830f0c9f6fd464e6e18a033c5db51b", "69884f09be947c43e1029bb3ddc95db5edc2a03d", "6da6570ee13c04c9294581c290a793290b01f5cf", "28e39484d8658aed9f7f7b0ddccbdd65b0e78113", "8a994429d0ae5323fdae98f12bc94f234fed0e83", "5bcc5ffe048bf8c449897a4ba17b4e902e7848d7", "1012d6ab15f06ddb3a8a40a950394ab31f2e8db3", "1fcc527c54e692ab6db69a8a6b5f5ee9118e0dd6", "44007fee0d1c8a9115e9d7d3ec41af89b608b942", "0541d5338adc48276b3b8cd3a141d799e2d40150", "43a2e1fb5237c2704534942054e8f3245531dbbd", "4446b5aeb13a09729d1d52c7bdcce46a9723787b", "36b48d5ccdbf000a069e0c36db3d2b0508ee98f1", "6747d2e1ff640896b192e87b2a9cfb0e735b40cf", "32f400bb002bc72d53f1fd6ce41b957019626dc7", "1a1bb6ed97659c57469d51b5e60695376b56e79f", "2cf59e5b73377e3f3a40c6d9bdbb680fd10dd9e8", "ae551d49e07e257ae78531371739eacdf9a5fe88", "60b0434daae3cba7210f34cae79f02aa0ade12d1", "a512812f72321eb4b198cc11cbd6755bfa71aa1c", "32a6b330914ca4839822132b82ab9b02d8402db2", "0cbc08b2e318133653448214d2b4fbbd7f812136", "10f3630e92aab74d050ff4ecd47181befe97d274", "79db61293bfb597e5f5d3319251a2f374833d1fa", "48af4f06c972e4698b8dca3468a08be8868b1531", "4783c303f45d78323e1206c962ce6fcea57e724d", "652991f2bbf4b5133b0bea9a4b870ac4ec929e54", "2455f6f44b4a19cc2ff05df78df6ff13616e7122", "080a514703b7858ce7469a8c29d48327afbf4fc5", "42782aec0d2180324fac0202cd65ce55f5f14740", "fffd15dbfe4c2e0fd9093cd8327d25758224f18d", "5c6dcf91f1d4bd013bd926ea8bb5ea1dc8682b40" ], "paperAbstract": "Real-time sensor data enables diverse applications such as smart metering, traffic monitoring, and sport analysis. In the Internet of Things, billions of sensor nodes form a sensor cloud and offer data streams to analysis systems. However, it is impossible to transfer all available data with maximal frequencies to all applications. Therefore, we need to tailor data streams to the demand of applications.\n We contribute a technique that optimizes communication costs while maintaining the desired accuracy. Our technique schedules reads across huge amounts of sensors based on the data-demands of a huge amount of concurrent queries. We introduce user-defined sampling functions that define the data-demand of queries and facilitate various adaptive sampling techniques, which decrease the amount of transferred data. Moreover, we share sensor reads and data transfers among queries. Our experiments with real-world data show that our approach saves up to 87% in data transmissions.", "pdfUrls": [ "http://www.user.tu-berlin.de/powibol/assets/publications/traub-optimized-on-demand-data-streaming-from-sensor-nodes.pdf", "http://www.user.tu-berlin.de/powibol/assets/posters/ondemandstreamingposter.pdf", "http://doi.acm.org/10.1145/3127479.3131621" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/0ff8ae30a0815431d86df54da62d6305f4e7e22b", "sources": [ "DBLP" ], "title": "Optimized on-demand data streaming from sensor nodes", "venue": "SoCC", "year": 2017 }, "101ffff36113f50ca9b46c91fe7b5aece268ecbf": { "authors": [ { "ids": [ "2039788" ], "name": "Haoran Huang" }, { "ids": [ "39686335" ], "name": "Qi Zhang" }, { "ids": [ "10819933" ], "name": "Jindou Wu" }, { "ids": [ "1790227" ], "name": "Xuanjing Huang" } ], "doi": "10.1145/3077136.3080791", "doiUrl": "https://doi.org/10.1145/3077136.3080791", "entities": [ "Artificial neural network", "Convolutional neural network", "Deep learning", "F1 score", "Join (SQL)", "Memory architecture", "Social media" ], "id": "101ffff36113f50ca9b46c91fe7b5aece268ecbf", "inCitations": [], "journalName": "", "journalPages": "733-742", "journalVolume": "", "outCitations": [ "6ec53eed9e751da619f77c95bc0833ff79268a36", "113a0a371f8572bc8f8adab9ba8d126be8eebaa1", "da9a9d01dbe123536ca3227f5a458f278cd20eda", "e48c3431004231432902e591fc5186c62b816bc8", "07f3f736d90125cb2b04e7408782af411c67dd5a", "2a280a11143da9040801193040e0700f79b3bba0", "1650ffcae990c1179904beaa6e4f809751477305", "1b29786b7e43dda1a4d6ee93f520a2960b1e3126", "64b459fa81c4b7f2f49795d45c5250719621a4fc", "3bba67ba34427212d8526d155a47972effd998e1", "10ebd5c40277ecba4ed45d3dc12f9f1226720523", "77f5ff10a69d2d77d9a4f4c054b5b80229813c7e", "0e384290899348ec38448ea88d76edef0b3c36d1", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "37ee3ad4ce95e49f9b7d84328ce0652d9e54c071", "870af09e822775d164bf5cee38eaa6775c96eddc", "600191af13e7ae80885fa67ebd0a637d0e87abb5", "4f1fc7a3e14003e196d2e4261035b299e448fa87", "119c375a81588c5576644a56705a6b7a987592db", "22ae02d81c21cb90b0de071550cfb99e6a623e62", "5f47dff76174f0a0cda429e2ed0e733b4026ddea", "a584211768d49f80192f13b8ed2fda9c058dec34", "6a556d42f0c094af30630747bd99060d7a5ade8f", "494e7134a63404397815bf4bc3b7ea5b0f00dc69", "49b2a1b9606c0ccb95a36895760fc91b8b830266", "5b9534442f91a87022427b74bca9fd95dd045383", "452f7411af7d471dd3ba84c2b06b2aaffc38cdb9", "78c853aff9291eddfca6591b4e0256bee9c1c945", "04ee77ef1143af8b19f71c63b8c5b077c5387855", "4aba54ea82bf99ed4690d45051f1b25d8b9554b5", "1ee46c3b71ebe336d0b278de9093cfca7af7390b", "34f25a8704614163c4095b3ee2fc969b60de4698" ], "paperAbstract": "Every day, social media users send millions of microblogs on every imaginable topics. If we could predict which topics a user will join in the future, it would be easy to determine what topics will become popular and what kinds of users a topic may attract. It also can be of great interest for many applications. In this study, we investigate the problem of predicting whether a user will join a topic based on his posting history. We introduce a novel deep convolutional neural network with external neural memory and attention mechanism to perform this problem. User's posting history and topics were modeled with an external neural memory architecture. The convolutional neural network based matching methods were used to construct the relations between users and topics. Final decisions were made based on these matching results. To train and evaluate the proposed method, we collected a large-scale dataset from Twitter. The experimental results demonstrated that the proposed method could perform significantly better than other methods. Comparing to the state-of-the-art deep neural networks, our approach achieves a relative improvement of 18.2\\% in F1-score and 28.9\\% in MAP@10.", "pdfUrls": [ "http://jkx.fudan.edu.cn/~qzhang/paper/sigir2017topics.pdf", "http://doi.acm.org/10.1145/3077136.3080791" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/101ffff36113f50ca9b46c91fe7b5aece268ecbf", "sources": [ "DBLP" ], "title": "Predicting Which Topics You Will Join in the Future on Social Media", "venue": "SIGIR", "year": 2017 }, "10209181940b0af5edf534a6408f4bdfd661685f": { "authors": [ { "ids": [ "1976501" ], "name": "Hailong Yang" }, { "ids": [ "1708641" ], "name": "Quan Chen" }, { "ids": [ "38716473" ], "name": "Moeiz Riaz" }, { "ids": [ "1698860" ], "name": "Zhongzhi Luan" }, { "ids": [ "2235128" ], "name": "Lingjia Tang" }, { "ids": [ "3348715" ], "name": "Jason Mars" } ], "doi": "10.1145/3079856.3080224", "doiUrl": "https://doi.org/10.1145/3079856.3080224", "entities": [ "End-to-end encryption", "Natural language processing", "Responsiveness", "Sirius" ], "id": "10209181940b0af5edf534a6408f4bdfd661685f", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "133-146", "journalVolume": "", "outCitations": [ "092a1cf971fb8359d3293004c6f1de82f05f3afb", "3fa72c96b7548a3207a69f1e55e039fc5a314696", "da94b4bf79fcb32a3e24da9b152c1fd7efb199f5", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "4fa46a635102877d118f28e158662e563b263611", "0125e9060834b8f23f43461dd468e23f98eeb8a5", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "98fe37b292f6ec70181015f3ca384cc52cad02a5", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "6e669e90a34c4179f9364406d8a7a7f855745086", "269c24a4aad9be622b609a0860f5df80688c2f93", "1a4f15385f40d8ae503a29c4d70c5a908cf492d8", "330e5970507a1a62047ed737abd24e88207724d4", "18e9a7eea9c714c24152b9c6dd5cd12fb2c4b495", "ae70d1a24fc2a21d0b9b395d753ef81244d041f3", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "490020c0d4fa1eb85fe353add5713e49f08c628d", "30c5b89ef93b564781b9a7b8f03be0056d926876", "110c6e59991e2e9abe674f24c3a19c19488f034d", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "0e7148699994155cf8afae0ed943812fbb4f4b7f", "1a07186bc10592f0330655519ad91652125cd907", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0f44833eb9047158221e7b3128cde1347b58ccd6", "e72714c051aae9d4e32002d2b406a6a5f7d58d5f", "d63e4cada8347686372d63a3d00afa89a1515a31", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "04ba23e362786deee7af52b1987d956bb764ca7e", "a6a1b2a62676ccaa714c53f20cf4c6bf629e0102", "7529f2e9238ab5e5af3b8f2619b37995133dc9c4", "63405f39127cf05488bb40f96ac3bf1ccef41757", "08632fe2b934ed15d3499e7321282c81adc2c390", "76129c74a6a2223169d0525ca7857547eada36c8", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "0b408eeb286e7c75b2e87b5611cb4312e02eff61", "3000e77ed7282d9fb27216f3e862a3769119d89e", "7a978f2902460e732c50c36a171deb11733df1fc", "d7f02bb039b720fadb2af71572665af96c1195b2", "0c7d7b4c546e38a4097a97bf1d16a60012916758", "0dc46b312269b8cf3e80e6398db45f0088034431", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "01efff2d9fb655d7bc6532581857f8dfa27cd790", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "02b141ddc423469afde9c99cf76028095ef28127", "10ce2a90601895e96ed9c5d8b0c1e69c07f721fa", "0d683085618e654a173b3590c4d2b431569cbfb6", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "7692addeac2ffdcec2aa342cc8faa19221a8502d", "04a8d271bc4384dbfbb417bfb625feb01cb44666", "49c7b11728fd191db440a77776eda6b505904c27" ], "paperAbstract": "Modern user facing applications consist of multiple processing stages with a number of service instances in each stage. The latency profile of these multi-stage applications is intrinsically variable, making it challenging to provide satisfactory responsiveness. Given a limited power budget, improving the end-to-end latency requires intelligently boosting the bottleneck service across stages using multiple boosting techniques. However, prior work fail to acknowledge the multi-stage nature of user-facing applications and perform poorly in improving responsiveness on power constrained CMP, as they are unable to accurately identify bottleneck service and apply the boosting techniques adaptively.\n In this paper, we present PowerChief, a runtime framework that 1) provides joint design of service and query to monitor the latency statistics across service stages and accurately identifies the bottleneck service during runtime; 2) adaptively chooses the boosting technique to accelerate the bottleneck service with improved responsiveness; 3) dynamically reallocates the constrained power budget across service stages to accommodate the chosen boosting technique. Evaluated with real world multi-stage applications, PowerChief improves the average latency by 20.3x and 32.4x (99% tail latency by 13.3x and 19.4x) for Sirius and Natural Language Processing applications respectively compared to stage-agnostic power allocation. In addition, for the given QoS target, PowerChief reduces the power consumption of Sirius and Web Search applications by 23% and 33% respectively over prior work.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080224" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/10209181940b0af5edf534a6408f4bdfd661685f", "sources": [ "DBLP" ], "title": "PowerChief: Intelligent power allocation for multi-stage applications to improve responsiveness on power constrained CMP", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "1031f70e9155dd956e04c01ebed5ef060dc722ef": { "authors": [ { "ids": [ "31429628" ], "name": "Geoffrey Sawaya" }, { "ids": [ "33856916" ], "name": "Michael Bentley" }, { "ids": [ "1818419" ], "name": "Ian Briggs" }, { "ids": [ "1724910" ], "name": "Ganesh Gopalakrishnan" }, { "ids": [ "1689086" ], "name": "Dong H. Ahn" } ], "doi": "10.1109/IISWC.2017.8167780", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167780", "entities": [ "Best practice", "CFLAGS", "Compiler", "Experiment", "Litmus", "SQL", "Spatial variability", "Test automation" ], "id": "1031f70e9155dd956e04c01ebed5ef060dc722ef", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "229-238", "journalVolume": "", "outCitations": [ "063890953f066f5a2cffe95d951b03e2b5def08e", "ae3c5bb3e4b8274404b96b484d03c326d3d389cd", "e7e644649d1f771c63a9f07c520c24413e1e84d2", "0d8126da975a2bbebed2e47a55706777b01b026e", "d7cc9f48cae5658473790ea7230b54391c075e09", "84a676d3a92d6252c5bc49ed64b25bb86aefa0d0", "4ee21ecd6093bf49f00563e291caa2574ee4d5c4", "14ff6c9d0fb8f4aea4fe3058d3b12658dc47f87d", "79d318f39c589f16a6a05d728871edb0544b466c", "3ca411b1508970faa9ed9994fec5cc76fc803425", "3518cb6489d65d0f3bf33462afc7fa1bb3859ddc", "3169437b0581731f94c4a5ef9bbb69b8c8f9bb42", "4371aeb435458cfc423e1f73e65fd8ccd966dc22" ], "paperAbstract": "Understanding the extent to which computational results can change across platforms, compilers, and compiler flags can go a long way toward supporting reproducible experiments. In this work, we offer the first automated testing aid called FLiT (Floating-point Litmus Tester) that can show how much these results can vary for any user-given collection of computational kernels. Our approach is to take a collection of these kernels, disperse them across a collection of compute nodes (each with a different architecture), have them compiled and run, and bring the results to a central SQL database for deeper analysis. Properly conducting these activities requires a careful selection (or design) of these kernels, input generation methods for them, and the ability to interpret the results in meaningful ways. The results in this paper are meant to inform two different communities: (a) those interested in seeking higher performance by considering “IEEE unsafe” optimizations, but then want to understand how much result variability to expect, and (b) those interested in standardizing compiler flags and their meanings, so that one may safely port code across generations of compilers and architectures. By releasing FLiT, we have also opened up the possibility of all HPC developers using it as a common resource as well as contributing back interesting test kernels as well as best practices, thus extending the floating-point result-consistency workload we contribute. This is the first such workload and result-consistency tester underlying floating-point reproducibility of which we are aware.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167780", "http://www.cs.utah.edu/~mbentley/papers/paper-iiswc2017-flit.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1031f70e9155dd956e04c01ebed5ef060dc722ef", "sources": [ "DBLP" ], "title": "FLiT: Cross-platform floating-point result-consistency tester and workload", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "103baca878b17a15d148a684c0b0152e78591be1": { "authors": [ { "ids": [ "2494078" ], "name": "Andreas Sembrant" }, { "ids": [ "1759749" ], "name": "Erik Hagersten" }, { "ids": [ "1780873" ], "name": "David Black-Schaffer" } ], "doi": "10.1109/HPCA.2017.25", "doiUrl": "https://doi.org/10.1109/HPCA.2017.25", "entities": [ "CPU cache", "Corner case", "Data hierarchy", "Electronic data processing", "Indirection", "Mobile processor", "Server (computing)" ], "id": "103baca878b17a15d148a684c0b0152e78591be1", "inCitations": [ "fcb5c8fafc187216df2fc74ce8e831d3f022ff05" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "133-144", "journalVolume": "", "outCitations": [ "0b4a5d7fa7bd113c309a89bf858e085e95ab55a3", "6acd75781396e5dedcf2f06a7131ba7f3153bfb5", "579cf876bc66b2311f163c5a6d57079df505da54", "3e81ab06cc2bd359b8c95ef20cae50cf6924b488", "39e83bc7d1dd445a879c4ed7a50cb787103d1c4f", "196fa3b023966f39094b23927bc40d357c5d0d97", "2ab1b98b642e341006d18ebce41359e95373422f", "2253d6559ae9793b5cfa6e409d1d9de50dafa29a", "328ba999c9cd0c25b59aee706d05a1302bd12e89", "7adc7940bc250bab39c7823fbb1ef3f86fe0625a", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "0c7defe10c372f1a8a24ec4f840a5bc1870eea3e", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "1072d1f9eab9f69fab598f9d47ad323473b45ce3", "1e1b0f0411e35a7d8edacc3cd555e8a347674ef1", "ed20a5a4fc56c771f2d1d78f3730e3afc495b1f2", "b116227c9782509c1d5a667da3632deb4356727a", "3364bc50921a9566d61ef8cb73baa82341725e4b", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "a27d410ecd2b6c4b4c9d4587344f9dd64dae0fff", "335fae312d0c93ab16a3dbdf57f8912f3768149d", "2ca16a4b561ce75be6639d24bc2d93eeb8df86bc", "29f766723ca752138855500084ced04503bfc9c8", "35c348a3663de6387a45dc58b2c85092d247818a", "67b214b2bcfce35806e19c7894559539ac58761f", "53691325ee4a42e0b0cf3e9d463a0bc71f447c99", "105340930d1ab53a4ce663bc4601e120021e0567", "34f7312e5dd0c209a372b642ab8881a2e7dbcb4f", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "352a8957005dc5519b15ed1870751ec494d66395", "51b6b6391534bfceb58b2c42a1e6825fd96dc948", "165528cdf9c76edd98729c142faf50fbd6cfc69e", "031e84123c684741da3f10c1ace816731eca1ec1", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "5e1b47fbb0dab0902e25d8b85dde7c665404318d", "17110231cc5f98e3b24447ecbdccdede7af99fa3", "be47b4f2b2a1e85923a8c574880b4b35013cd712", "26eb0c85908fe173b4cc2c5e7838c51b49c60597", "0717371b254df3e466a11d1965c2c9541a43b7a3", "02d6e756adba10726e108c05ae72c06440aacd74", "7f55f54a6817cd8adbae81b69af96d9b15de5fdb" ], "paperAbstract": "Today's caches tightly couple data with metadata (Address Tags) at the cache line granularity. The co-location of data and its identifying metadata means that they require multiple approaches to locate data (associative way searches and level-by-level searches), evict data (coherent writebacks buffers and associative level-by-level searches) and keep data coherent (directory indirections and associative level-by-level searches). This results in complex implementations with many corner cases, increased latency and energy, and limited flexibility for data optimizations. We propose splitting the metadata and data into two separate structures: a metadata hierarchy and a data hierarchy. Themetadata hierarchy tracks the location of the data in the data hierarchy. This allows us to easily apply many differentoptimizations to the data hierarchy, including smart data placement, dynamic coherence, and direct accesses. The new split cache hierarchy, Direct-to-Master (D2M), provides a unified mechanism for cache searching, eviction, and coherence, that eliminates level-by-level data movement and searches, associative cache address tags comparisons andabout 90% of the indirections through a central directory. Optimizations such as moving LLC slices to the near-side ofthe network and private/shared data classification can easily be built on top off D2M to further improve its efficiency. Thisapproach delivers a 54% improvement in cache hierarchy EDP vs. a mobile processor and 40% vs. a server processor, reducesnetwork traffic by an average of 70%, reduces the L1 miss latency by 30% and is especially effective for workloads with high cache pressure.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/103baca878b17a15d148a684c0b0152e78591be1", "sources": [ "DBLP" ], "title": "A Split Cache Hierarchy for Enabling Data-Oriented Optimizations", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "10427d3ef0fe1779441c50d70dd526c9f3d99bc2": { "authors": [ { "ids": [ "6299439" ], "name": "Guy L. Steele" }, { "ids": [ "1755724" ], "name": "Jean-Baptiste Tristan" } ], "doi": "10.1145/3018743.3018757", "doiUrl": "https://doi.org/10.1145/3018743.3018757", "entities": [ "Binary search algorithm", "CUDA", "Double-precision floating-point format", "GeForce 700 series", "Graphics processing unit", "Latent Dirichlet allocation", "Machine learning", "Mixture model", "On the fly", "SIMD", "Sampling (signal processing)", "Single-precision floating-point format", "Topic model" ], "id": "10427d3ef0fe1779441c50d70dd526c9f3d99bc2", "inCitations": [], "journalName": "", "journalPages": "341-355", "journalVolume": "", "outCitations": [ "514514e3f6150d1f36a7820fc5da5a17953d62f7", "ffcb7146dce1aebf47a910b51a873cfec897d602", "5471c335066921844e3f47dd738eba41a5402e35", "2b0f017e5aa968fd13b4a9ba2c2d37d94be6041e", "7f6a25d37d09fe790b9d93ef96666e938ac8ed57", "215aa495b4c860a1e6d87f2c36f34da464376cc4", "2161ec2fcb848c29e3173f229111e83c4762e69d", "79c9bfe65a473a7f7d96ab536162d7eb101576f4", "a83a16010825d9ba9d57baf5a5ff52347b684d6c", "6c5fd8e060613cd6eec652dd4ede5a9b0b2e9840", "2120d48a6f2cc6950116c516b8d32b33575e4944", "97937a747a8174332e1b4c5d61138036fa4e5f77", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "35b02e70a267dacd862fd4a1da86548ab2de347c", "094cbfa06f8374b49b84524a466a63d34c9ef34f", "01f3290d6f3dee5978a53d9d2362f44daebc4008", "d1ee962c7c663380b57d80a7169d6e6710e96418", "6c0a7b47f223b60e45e7f4cb55412b23edb4e43f", "f2fb090234c24a3b1e0c6178727eddd141af4e30", "74d502b677abdc7c15a6925c3262cda0794cc3c9", "f65bcde1fcf82e05388b31de80cba10bf65acc07", "1b804dd0bdee7ceab1160610e2e63189df4f2218", "769fb8055fbe0997ef8d9dab6c9abf37489c6575", "0c4867f11c9758014d591381d8b397a1d38b04a7", "8524c4053411e2d4e029fc2da2016d83379bf676", "c5e8b04a00f8d5dea248375c9b5e60abcecf808b", "38b42b64eca378f056356881005771d54b9cb0f3" ], "paperAbstract": "We describe a SIMD technique for drawing values from multiple discrete distributions, such as sampling from the random variables of a mixture model, that avoids computing a complete table of partial sums of the relative probabilities. A table of alternate (\"butterfly-patterned\") form is faster to compute, making better use of coalesced memory accesses; from this table, complete partial sums are computed on the fly during a binary search. Measurements using CUDA 7.5 on an NVIDIA Titan Black GPU show that this technique makes an entire machine-learning application that uses a Latent Dirichlet Allocation topic model with 1024 topics about about 13% faster (when using single-precision floating-point data) or about 35% faster (when using double-precision floating-point data) than doing a straightforward matrix transposition after using coalesced accesses.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018757", "https://jtristan.github.io/papers/butterfly.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/10427d3ef0fe1779441c50d70dd526c9f3d99bc2", "sources": [ "DBLP" ], "title": "Using Butterfly-Patterned Partial Sums to Draw from Discrete Distributions", "venue": "PPOPP", "year": 2017 }, "104937eb97b25c72dd4444c5accc94a4f0c7d722": { "authors": [ { "ids": [ "1710797" ], "name": "Yongle Zhang" }, { "ids": [ "39059758" ], "name": "Serguei Makarov" }, { "ids": [ "3095428" ], "name": "Xiang Ren" }, { "ids": [ "2261519" ], "name": "David Lion" }, { "ids": [ "2042324" ], "name": "Ding Yuan" } ], "doi": "10.1145/3132747.3132768", "doiUrl": "https://doi.org/10.1145/3132747.3132768", "entities": [ "Data dependency", "Distributed computing", "Failure rate", "Integrated development environment", "Programmer", "Simulation", "Symbolic execution" ], "id": "104937eb97b25c72dd4444c5accc94a4f0c7d722", "inCitations": [], "journalName": "", "journalPages": "19-33", "journalVolume": "", "outCitations": [ "4daf1e0aeca142a23f816bd73daf2f86ab2c5c52", "0205b55e4fcf5710a97d5d561efa66c82e39f0f7", "60430eadf9f9e2eab06d4bcebb26f5cc9e32fddf", "2c45424945d8b50b3de631db92b5667538d7535d", "2c7143fc52996117bf14aa70ee384c3c49635b95", "9f87fdf3f3f1e2a48c6c21629457cdb3b1873c7d", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "03d88407c702b6dffaae48b3d55ee716bcaffb8d", "1e4874f3443d191a4f7f3ba63a04a264bd00e364", "7e411405e8fbf47a6b585bbb310ff03cd4eb51ce", "0e578433d4e8bb2a571c87a2d22816074902f009", "05a618847e4f08e5bca29dff732757779722b2e0", "7260363c8b9a3e9d8f0b560c67cc49619bf06e56", "114801eccb5eb0831fd1848f351a138253a42f15", "113772329678792fc2a3a8cb9322c164547f88a0", "51211c46f6587f74ef805c4d8e5b908e2ccfa0a0", "959cfe05045e1c7e80406209244d3346061ca4e6", "49e8721bd4821eff0f147d73bea970f2de3aab8a", "3edfc29b8a4f4fb1e245087cd1c59498f2255fe8", "1a11995cca0eb239a7b95d23b4a42c6a634fcf41", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "28227157274b1d2fa8f2b67ba459de24c15fd765" ], "paperAbstract": "Complex and unforeseen failures in distributed systems must be diagnosed and replicated in a development environment so that developers can understand the underlying problem and verify the resolution. System logs often form the only source of diagnostic information, and developers reconstruct a failure using manual guesswork. This is an unpredictable and time-consuming process which can lead to costly service outages while a failure is repaired.\n This paper describes Pensieve, a tool capable of reconstructing near-minimal failure reproduction steps from log files and system bytecode, without human involvement. Unlike existing solutions that use symbolic execution to search for the entire path leading to the failure, Pensieve is based on the Partial Trace Observation, which states that programmers do not simulate the entire execution to understand the failure, but follow a combination of control and data dependencies to reconstruct a simplified trace that only contains events that are likely to be relevant to the failure. Pensieve follows a set of carefully designed rules to infer a chain of causally dependent events leading to the failure symptom while aggressively skipping unrelated code paths to avoid the path-explosion overheads of symbolic execution models.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132768", "http://www.eecg.toronto.edu/~yuan/papers/pensieve-sosp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/104937eb97b25c72dd4444c5accc94a4f0c7d722", "sources": [ "DBLP" ], "title": "Pensieve: Non-Intrusive Failure Reproduction for Distributed Systems using the Event Chaining Approach", "venue": "SOSP", "year": 2017 }, "105a350a77a19c2a1fa8f36b550823fe34e32ac5": { "authors": [ { "ids": [ "2028830" ], "name": "Fabian Muehlboeck" }, { "ids": [ "6331083" ], "name": "Ross Tate" } ], "doi": "10.1145/3133880", "doiUrl": "https://doi.org/10.1145/3133880", "entities": [ "Gradual typing", "Type system", "Typing" ], "id": "105a350a77a19c2a1fa8f36b550823fe34e32ac5", "inCitations": [ "0ea251e1a45734c346f6d30e9679e1ec07e7db03", "3c77e744c44291b05ea7634251cfd764f3f1d383" ], "journalName": "PACMPL", "journalPages": "56:1-56:30", "journalVolume": "1", "outCitations": [ "95176ae43fa8435bac03055e61d1a91cc481b9c6", "45d488e020cdfb0e5bcc3766bad3d3ab58353db2", "04ef73b5a50e9a3ce3460909dcde9bec9b589006" ], "paperAbstract": "Recent research has identified significant performance hurdles that sound gradual typing needs to overcome. These performance hurdles stem from the fact that the run-time checks gradual type systems insert into code can cause significant overhead. We propose that designing a type system for a gradually typed language hand in hand with its implementation from scratch is a possible way around these and several other hurdles on the way to efficient sound gradual typing. Such a design process also highlights the type-system restrictions required for efficient composition with gradual typing. We formalize the core of a nominal object-oriented language that fulfills a variety of desirable properties for gradually typed languages, and present evidence that an implementation of this language suffers minimal overhead even in adversarial benchmarks identified in earlier work.", "pdfUrls": [ "http://www.cs.cornell.edu/~ross/publications/nomalive/nomalive-oopsla17-tr.pdf", "http://www.cs.cornell.edu/~fabianm/papers/nomalive-oopsla17.pdf", "http://www.cs.cornell.edu/~ross/publications/nomalive/nomalive-oopsla17.pdf", "http://doi.acm.org/10.1145/3133880", "http://www.cs.cornell.edu/~fabianm/papers/nomalive-oopsla17-tr.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/105a350a77a19c2a1fa8f36b550823fe34e32ac5", "sources": [ "DBLP" ], "title": "Sound gradual typing is nominally alive and well", "venue": "PACMPL", "year": 2017 }, "105c194b40f73c3f07068b15c813b01721375d9d": { "authors": [ { "ids": [ "17826426" ], "name": "Alexandros Evangelidis" }, { "ids": [ "1899439" ], "name": "David Parker" }, { "ids": [ "1804288" ], "name": "Rami Bahsoon" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Autoscaling", "Cloud computing", "Dynamic voltage scaling", "Formal verification", "Logic programming", "Quality of service", "Scalability", "Software as a service", "Utility" ], "id": "105c194b40f73c3f07068b15c813b01721375d9d", "inCitations": [ "752fc519fe07d4c33d235ec2436516861e0c0be8" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "355-364", "journalVolume": "", "outCitations": [ "a0a8a015da201afe3d4dd863d92a90b6822fb7fc", "5b9631561a89a3e071d8ec386a616a120220bfd9", "350dcb19a719ef01a4c683d25db0d11759f91b49", "85481df6f74808e0258179296f5df7fcf559d19b", "41da0c64d5884eed285c91e2938350160b5a6ccd", "1db3e9e9e716d7c4f5b46a2666f6ebcf26c7e9de", "4dbe55cdf630bd364a2ab19768c23f8545292c83", "ce8fd5625cd3416df598d6c71eb3c6460a6efe0c", "745782902e97be8fbacd1e05d283f11104e2fec6", "2bf6d2deda139349f035813a09f190aa05a45530", "3cdb00dc66e7e13965f8ecee9c754ff3facf25a0", "b4b34c7224fd81c8c1e6f03335b8eda407fe7824", "e3a4a341c72f6d32698ae0846b331b2242014092", "3b7c5da3a3888be5818159f31fb50d1e382efa26", "caeabc98e9b701aee79cd053cc1d0070e68d3a9e", "2df67134a8dd6410681eae06f719c16e351f2f92", "9e06775b0ea48c5bc196d0464782900f0209a9b1", "163e6d3d6d0f85624cdc700159a83b386f532e06" ], "paperAbstract": "Auto-scaling, a key property of cloud computing, allows application owners to acquire and release resourceson demand. However, the shared environment, along with theexponentially large configuration space of available parameters, makes configuration of auto-scaling policies a challenging task. Inparticular, it is difficult to quantify, a priori, the impact of a policyon Quality of Service (QoS) provision. To address this problem, we propose a novel approach based on performance modellingand formal verification to produce performance guaranteeson particular rule-based auto-scaling policies. We demonstratethe usefulness and efficiency of our model through a detailedvalidation process on the Amazon EC2 cloud, using two typesof load patterns. Our experimental results show that it can bevery effective in helping a cloud application owner configure anauto-scaling policy in order to minimise the QoS violations.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101161", "http://www.prismmodelchecker.org/papers/fgcs-autoscale.pdf", "http://www.prismmodelchecker.org/papers/ccgrid17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/105c194b40f73c3f07068b15c813b01721375d9d", "sources": [ "DBLP" ], "title": "Performance Modelling and Verification of Cloud-Based Auto-Scaling Policies", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "107d4303790bb03303bc7130f6b604b1fd2340f2": { "authors": [ { "ids": [ "39718339" ], "name": "Mojgan Khaledi" }, { "ids": [ "39503001" ], "name": "Mehrdad Khaledi" }, { "ids": [ "26654051" ], "name": "Shamik Sarkar" }, { "ids": [ "1685027" ], "name": "Sneha Kumar Kasera" }, { "ids": [ "1700521" ], "name": "Neal Patwari" }, { "ids": [ "40345904" ], "name": "Kurt Derr" }, { "ids": [ "38405343" ], "name": "Samuel Ramirez" } ], "doi": "10.1145/3117811.3117845", "doiUrl": "https://doi.org/10.1145/3117811.3117845", "entities": [ "Crowdsourcing", "Experiment", "Participatory sensing", "Sampling (signal processing)", "Smart city", "The Current", "Transmitter", "Video game localization" ], "id": "107d4303790bb03303bc7130f6b604b1fd2340f2", "inCitations": [ "0b12cf98c3232c03afcabc00921c5b8fd5302907" ], "journalName": "", "journalPages": "235-247", "journalVolume": "", "outCitations": [ "4ac53a9341ae34d4651eff34e729e91806ab0c44", "0fd785de3b5e1c1aa5f398f7120cd524e43cc863", "73bf837c14c36fd4d9e5e15ea8ef49ec5dc2f079", "b87c995c64cf7a0b0c803014c81e49a3f2468638", "45183fbbd1a330d000ed4b5aa014171558e19391", "22b26297e0cc5df3efdba54a45714e4e27b59e17", "3cd14e60d65dd25b59e61f70affe7269206ef5a8", "9fbf7bb9f8bd898cfb2f2164c269518359ef5f18", "b5ce64ca4a787d46425479810e4eb005ccc54e0f", "33ad799fedfcbd7f311ca13421f8e46cb6bba405", "a65b4adbda5d5e6f603958f29f66ecee4897b43c", "0a3efe8d76efda96a8c03f72ecaadac89a1c057a", "3deeb0b2ac1c4abafde7b9a18e31ec9d4a35f419", "08082acc06dcd925ab80c8a5e47228aa2ac4832b", "42213381a41e55e4979871304f2d7be6d962bfba", "5e37c6da7c68c43f36c53bc8018bbc2d03635aff", "aefe1c2f7b64edf624267c46e9a43be4ece2a045", "349d502a483ff0995cdc18197dadc7f0774b205d", "10530a0f77894ae009cbfe154380ba9a24fdcad7", "43a2183ac92a3ac892a0084d98bfdce2b5dd519b", "b60491375f1385fda3c50e91aceafd67cd83cc24", "2d084e4dda9dc7778451e18234fb5964bcb76db8", "0185de029010361932a914ff5230bda02116ecee", "16ccb8d307d3f33ebb395b32db23279b409f1228", "2bf3d1a821f7a1e7c6e63a3784b4823d3e678830", "3a7c78ba03583ed6d83410c06113ed2439f1593f", "7e4cb3ca74b9e0d83cb53340d4ead2331cc8328c", "95e34d951f520d5f187f4262febe65031730e1b5", "1b2a8ae588ee7c01d8a855fd1cb1789bf4876990", "ba5a4078a4e58dcf2a3184fff07493e490bd6c37", "69c09e2de52499111f8eb9714ec861cbdcb5d036" ], "paperAbstract": "The current mechanisms for locating spectrum offenders are time consuming, human-intensive, and expensive. In this paper, we propose a novel approach to locate spectrum offenders using crowdsourcing. In such a participatory sensing system, privacy and bandwidth concerns preclude distributed sensing devices from reporting raw signal samples to a central agency; instead, devices would be limited to measurements of received power. However, this limitation enables a smart attacker to evade localization by simultaneously transmitting from multiple infected devices. Existing localization methods are insufficient or incapable of locating multiple sources when the powers from each source cannot be separated at the receivers. In this paper, we first propose a simple and efficient method that simultaneously locates multiple transmitters using the received power measurements from the selected devices. Second, we build sampling approaches to select sensing devices required for localization. Next, we enhance our sampling to also take into account incentives for participation in crowdsourcing. We experimentally evaluate our localization framework under a variety of settings and find that we are able to localize multiple sources transmitting simultaneously with reasonably high accuracy in a timely manner.", "pdfUrls": [ "http://ansr.cs.utah.edu/assets/MobiCom17.pdf", "http://doi.acm.org/10.1145/3117811.3117845" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/107d4303790bb03303bc7130f6b604b1fd2340f2", "sources": [ "DBLP" ], "title": "Simultaneous Power-Based Localization of Transmitters for Crowdsourced Spectrum Monitoring", "venue": "MobiCom", "year": 2017 }, "109408ba8dc33adaf94f9bc4f40200bfd16341c1": { "authors": [ { "ids": [ "31117574" ], "name": "Mahmoud Abo Khamis" }, { "ids": [ "1692185" ], "name": "Hung Q. Ngo" }, { "ids": [ "9246931" ], "name": "Dan Suciu" } ], "doi": "10.1145/3034786.3056105", "doiUrl": "https://doi.org/10.1145/3034786.3056105", "entities": [ "Algorithm", "Black box", "Conjunctive grammar", "Conjunctive query", "Database theory", "Datalog", "Disjunctive normal form", "Functional dependency", "Information theory", "Polynomial", "Submodular set function" ], "id": "109408ba8dc33adaf94f9bc4f40200bfd16341c1", "inCitations": [ "6c62d7178854bc6e33d01dff5986a19d57ea42cb", "001ab484e44bf365eb2c9532e21a42f9cbcbe5f2", "88231080fbb3f0475d6e82cd15b90134f56ade37" ], "journalName": "", "journalPages": "429-444", "journalVolume": "", "outCitations": [ "31181e73befea410e25de462eccd0e74ba8fea0b", "23e2f84886b98cf04f483fd5f80029c518270462", "1ffc977d82798cfab971e4abdb46ae7b707c57c0", "4e1f27620a5b287d269a84ea2a3253340a41b105", "97fe4f19b3074756214e3d7cc03f5b40a2a48cf2", "fb0aed07aedabb52489e7b2cf3766b00691d7225", "021764d0804445e0869c57314c069d07c874cb4b", "07a0a39ff4447ce9f0d2b06978d7b00a6d9fab15", "262fa6737764ae18e441ec6400d9fb58b2821c00", "193b024e7cd973d001f214851775e5b496986b6b", "1c918798bbfd2caa2335c5cd9f15c08e56ed2cde", "c96e4840b0d2419e135035bb9c02c8a1031af4e7", "25c51377a34aeb18b51ea7de1f9632eaa09f591e", "9545a9eff37c79f7590e074365a80dd7e49ce036", "2b5c8b0848dc6ec0593a8ee1a293c80848253519", "c2ae1b0acd281a2fff041f3098e7aef9a2f5c794", "0664878d500f58e003f839e06204d0d1bfe85c7c", "2e95819c590da31df4627657cfef6504111ad59c", "3dad3d035648bcc7f7855486a3dd8ee8c1e3ae97", "b7d8d2891d83f023bcd1d30f75540c8b89f24575", "76c5a201f117700aa52af5242826827fbba552f1", "c5b1883a96d64b2ed7e6470155d11ab105fbdc06", "3ce2d233cee585ecff73729836918ba87195c18f", "1adb361e20ca38f1e358e969fdd9e62d6b63598b", "415fc39efcf05915976e2d73336c38fe3e240fed", "f6c33bf427061e4c7aa5c7260721743c18798756", "465f02fb10fff812a5270e1f8027f8511045477e", "44254a445525585a87cab60bb7e5991ec0c43824", "6925e7f4d52ede44937294d86b5579380df32b0d", "eaed7286bba82a3adc56dc17623d82cebe4b34c6", "0e8933300a20f3d799dc9f19e352967f41d8efcc", "3fa839f333d6063f90aa4ee35b98a5e8d7045742", "fc45cd4563ce8ab02a5fe5facb22ea69864d9ef0", "354ada397f82295958068b029238ef9b3c92b351", "ef9ecdfa98eba6827ba0140981fd0c259a72c877", "006ca7d3571497c73062b67ab1ab20a4b09b0972" ], "paperAbstract": "Recent works on bounding the output size of a conjunctive query with functional dependencies and degree bounds have shown a deep connection between fundamental questions in information theory and database theory. We prove analogous output bounds for disjunctive datalog rules, and answer several open questions regarding the tightness and looseness of these bounds along the way. The bounds are intimately related to Shannon-type information inequalities. We devise the notion of a \"proof sequence\" of a specific class of Shannon-type information inequalities called \"Shannon flow inequalities\". We then show how a proof sequence can be used as symbolic instructions to guide an algorithm called PANDA, which answers disjunctive datalog rules within the size bound predicted. We show that PANDA can be used as a black-box to devise algorithms matching precisely the fractional hypertree width and the submodular width runtimes for aggregate and conjunctive queries with functional dependencies and degree bounds.\n Our results improve upon known results in three ways. First, our bounds and algorithms are for the much more general class of disjunctive datalog rules, of which conjunctive queries are a special case. Second, the runtime of PANDA matches precisely the submodular width bound, while the previous algorithm by Marx has a runtime that is polynomial in this bound. Third, our bounds and algorithms work for queries with input cardinality bounds, functional dependencies, and degree bounds.\n Overall, our results showed a deep connection between three seemingly unrelated lines of research; and, our results on proof sequences for Shannon flow inequalities might be of independent interest.", "pdfUrls": [ "https://arxiv.org/pdf/1612.02503v3.pdf", "https://www.cse.buffalo.edu//~hungngo/papers/panda-slides.pdf", "https://arxiv.org/pdf/1612.02503v4.pdf", "https://arxiv.org/pdf/1612.02503v1.pdf", "http://arxiv.org/abs/1612.02503", "http://homes.cs.washington.edu/~suciu/pods055-abo-khamis.pdf", "http://doi.acm.org/10.1145/3034786.3056105", "https://arxiv.org/pdf/1612.02503v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/109408ba8dc33adaf94f9bc4f40200bfd16341c1", "sources": [ "DBLP" ], "title": "What Do Shannon-type Inequalities, Submodular Width, and Disjunctive Datalog Have to Do with One Another?", "venue": "PODS", "year": 2017 }, "109ad34d70e35de4544a993ea2d71585a3d4cc6f": { "authors": [ { "ids": [ "13360306" ], "name": "Weiming Feng" }, { "ids": [ "40477245" ], "name": "Yuxin Sun" }, { "ids": [ "9489786" ], "name": "Yitong Yin" } ], "doi": "10.1145/3087801.3087815", "doiUrl": "https://doi.org/10.1145/3087801.3087815", "entities": [ "Algorithm", "Computation", "Degree (graph theory)", "Distributed algorithm", "Distributed computing", "Email", "Independent set (graph theory)", "Markov chain", "Markov random field", "Sampling (signal processing)" ], "id": "109ad34d70e35de4544a993ea2d71585a3d4cc6f", "inCitations": [ "c5ad27362fcae17706c007f47403d8dc18c54b81", "07071ec014edf63549c45c0112df1e010b4114ce", "69d0be25a6ebfd2370addae708126062f348f990", "a9a5799c5cb496a05586218f88758076ed25cc32" ], "journalName": "", "journalPages": "121-130", "journalVolume": "", "outCitations": [ "e18dfb9ed0095a642f8e9f821b41d6e74a5fa867", "3f084898e62b5824cf70100b91a63f1c2450a467", "95404888a8c8122591f1a61229eb0ad035779050", "25186afb27fd7d50b2f6b0e03487b6020e1e439e", "ada572052717e8e38a908bba4364084c205b27d5", "1cd3258a492f4dc8c5dac478b96be626d3103888", "309ca9947b79acf9f079bd1f478ab44789d4fc33", "0128da111996b4e89bbe549ccb8da27293431367", "09ebfc62b0a6a92150dede1cb7183de5b466d0da", "56622335ab668bd14403154a2589414c610472ce", "ac0c0c186420a9ea00258473226de6877760bcfa", "234dcf7148677d3844115d0029ff5c27b7105706", "72fcbb83432ba984e42447c28a1e192fde4582f6", "4476807e765a84f47b1b18e9cdc252e1dbebbe86", "efc1d6449aa2a7997d8cdaa700f5020af13b8ac7", "f562d5471143e525b83e24859e0422fa21215ba3", "0b10645fb62b02a253fe16975a1973468eb0d897", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "9b91eaec9b1e1c15fce9e669105eba426ed6c771", "8caec7f48fbf19a086e3cf67fce16cf77dff9488", "31f667ddd9286d25b3bf22958270ffe1a4f98692", "19d7de5468c1a3992c0a498baae0cdfa5b27dbc5", "5cede579e4a03a60c630fd13e646507960d5d691", "3b02fa0413c118e8d8503177b7e37d4784546cf8", "2429e5a9a4d094d759c2d86f6506361bb4c907b1", "107bbea30cd16b7feb068eff9be6de6b30ed2aff", "17b27b1b6948c9dda5cc3d9f6e9a848ce99ab500", "145c3ca2ea0faebcdc42de8fa24dc57ecdca341d", "1a7b218579f87ab8c8c90e67610d5996c6260d81", "38b42b64eca378f056356881005771d54b9cb0f3", "13a31c5f957a8f1efca7ac6e5bf9dda7baf5e85c", "01f2dc9dbc45ec3179f825fd86c9e6487cc9b52f", "1f912ac1e1f8a9bfda1cf7664648a74b6559a407", "07071ec014edf63549c45c0112df1e010b4114ce", "620c3c09285a767c45ef39e6bb990c9467d40c32", "441334210410f1ccc06de90aef2e97896c230d56", "1b7142d759fddfc03e8735e1048e45c3cec73326", "9d24509639aa94d8116647766268be291654308c", "3b1cc719afb60324b499173668b0677b75a0a383", "44039a59510de5cfb055285acbf9273143acabc7", "d5aa5c2e5add29abd1023587c2e93a97bc2a6c8e", "26e8617b008604e6a710f1b7ee4428d95330678b", "1597409a71b61c1ab4b2230b80759e0cc83037c4", "03212f1bff727da9ffbda17a690fe77a108879c2", "874605f532a38828b2f1e0ed7b336d24744dc5cc", "0a4a34b9344b46596b2198560c7152178fe708cd", "45225c945bb5563e9d5849febad30aa329d4f2cf", "622adc72ff5a41dfa9887096beaa957710a41a65", "3f9d3cbdae5e192f77de4ac1407006b48d91263b", "a90d6525284983a978dc59b0437bcd9ce05cf561", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "b20315173380938fcf514b22e86a52abbba78b16" ], "paperAbstract": "The local computation of Linial [FOCS\u201987] and Naor and Stockmeyer [STOC\u201993] concerns with the question of whether a locally definable distributed computing problem can be solved locally: more specifically, for a given local CSP whether a CSP solution can be constructed by a distributed algorithm using local information. In this paper, we consider the problem of sampling a uniform CSP solution by distributed algorithms, and ask whether a locally definable joint distribution can be sampled from locally. More broadly, we consider sampling from Gibbs distributions induced by weighted local CSPs, especially the Markov random fields (MRFs), in the LOCAL model. We give two Markov chain based distributed algorithms which we believe to represent two fundamental approaches for sampling from Gibbs distributions via distributed algorithms. The first algorithm generically parallelizes the single-site sequential Markov chain by iteratively updating a random independent set of variables in parallel, and achieves an O(\u2206 log n) time upper bound in the LOCAL model, where \u2206 is the maximum degree, when the Dobrushin\u2019s condition for the Gibbs distribution is satisfied. The second algorithm is a novel parallel Markov chain which proposes to update all variables simultaneously yet still guarantees to converge correctly with no bias. It surprisingly parallelizes an intrinsically sequential process: stabilizing to a joint distribution with massive local dependencies, and may achieve an optimal O(log n) time upper bound independent of the maximum degree \u2206 under a stronger mixing condition. We also show a strong \u03a9(diam) lower bound for sampling: in particular for sampling independent set in graphs with maximum degree \u2206 \u2265 6. Independent sets are trivial to construct locally and the sampling lower bound holds even when every node is aware of the entire graph. This gives a strong separation between sampling and constructing locally checkable labelings. Department of Computer Science and Technology, Nanjing University. Emails: fengwm@smail.nju.edu.cn and sunyuxinxa@126.com State Key Laboratory for Novel Software Technology, Nanjing University. Email: yinyt@nju.edu.cn. Research supported by the National Science Foundation of China under Grant No. 61672275 and No. 61272081.", "pdfUrls": [ "https://arxiv.org/pdf/1702.00142v1.pdf", "https://arxiv.org/pdf/1702.00142v2.pdf", "http://doi.acm.org/10.1145/3087801.3087815", "http://arxiv.org/abs/1702.00142" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4394/5a381a582e36236916ff06ae375d7bbea74a.pdf", "s2Url": "https://semanticscholar.org/paper/109ad34d70e35de4544a993ea2d71585a3d4cc6f", "sources": [ "DBLP" ], "title": "What Can be Sampled Locally?", "venue": "PODC", "year": 2017 }, "109f26c285d48ba8f7b5e259364fecef0b3273f6": { "authors": [ { "ids": [ "2359017" ], "name": "Neha Agarwal" }, { "ids": [ "3334450" ], "name": "Thomas F. Wenisch" } ], "doi": "10.1145/3037697.3037706", "doiUrl": "https://doi.org/10.1145/3037697.3037706", "entities": [ "Approximation algorithm", "Cloud computing", "Computer data storage", "Dynamic random-access memory", "Limiter", "Linux", "Observable", "Page (computer memory)", "Paging", "Second Level Address Translation" ], "id": "109f26c285d48ba8f7b5e259364fecef0b3273f6", "inCitations": [ "e423c74455db069e6a5cc21f68954081ad22a36c", "2561d914980ab90d0e92fa045cbdc24867fe132c", "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "92229ef2d0bfdcba2fdf2bf265ae6d37d0b34e9f", "1cf5e11f8230c9badb8e963c070ecca2c1bda709", "cb14f2138d6de2f459841430aa52a2b6f2d3fc90", "ae39cff83d4850476855c06d02a8dc80ae55ad42", "24c0c34675eb35e300244c6ff682155a34a2e3d5", "8219bf467b82208a98aa7b45e67f35ed740b979f" ], "journalName": "", "journalPages": "631-644", "journalVolume": "", "outCitations": [ "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "34e2b75fd5717029fc9da92dd6160eb6e2d19ad9", "0b43a722d2ca43752750e4976f3056a006990143", "40c5050e470fa0890e85487e4679197e07a91c09", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "a725204b6d9981f818a88b68ac7498a6261f7dea", "755a8808e3ae70539ea8214318d371ca69a4ee83", "1dcaf21ff8e70d9a5dd85c8a8bd6ead7201fa08a", "1bed30d161683d279780aee34619f94a860fa973", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "2e5c003b2f2ad29a0f079309bc37e11da05458e6", "9aa0d7253574e50fe3a190ccd924433f048997dd", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "00dca7217305a31dcf5108eb7ecf862dd4827823", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "0d776c6b3d19e76a24c8c77bf33a5276294710b9", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0a6c15f75b0b52ea345caffabacd4c3f382b59a4", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "35c3882db9e1b2bdf838122787968679595f61de", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "2a660e81e6501ec3489d962fe87448ecf277237f", "0653e2ed9f683868cb4539eb8718551242834f6b", "18e9cd28be46edec0f3ecd39b78b8b7434db85d6", "8b10b13fb495101d1e4eb768907cff05e3bd9315" ], "paperAbstract": "The advent of new memory technologies that are denser and cheaper than commodity DRAM has renewed interest in two-tiered main memory schemes. Infrequently accessed application data can be stored in such memories to achieve significant memory cost savings. Past research on two-tiered main memory has assumed a 4KB page size. However, 2MB huge pages are performance critical in cloud applications with large memory footprints, especially in virtualized cloud environments, where nested paging drastically increases the cost of 4KB page management. We present Thermostat, an application-transparent huge-page-aware mechanism to place pages in a dual-technology hybrid memory system while achieving both the cost advantages of two-tiered memory and performance advantages of transparent huge pages. We present an online page classification mechanism that accurately classifies both 4KB and 2MB pages as hot or cold while incurring no observable performance overhead across several representative cloud applications. We implement Thermostat in Linux kernel version 4.5 and evaluate its effectiveness on representative cloud computing workloads running under KVM virtualization. We emulate slow memory with performance characteristics approximating near-future high-density memory technology and show that Thermostat migrates up to 50% of application footprint to slow memory while limiting performance degradation to 3%, thereby reducing memory cost up to 30%.", "pdfUrls": [ "https://web.eecs.umich.edu/~twenisch/papers/asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037706" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/109f26c285d48ba8f7b5e259364fecef0b3273f6", "sources": [ "DBLP" ], "title": "Thermostat: Application-transparent Page Management for Two-tiered Main Memory", "venue": "ASPLOS", "year": 2017 }, "10a823e88a8902b8edb37c6c0c96b5e35c8438c5": { "authors": [ { "ids": [ "2613669" ], "name": "Nir Shavit" } ], "doi": "10.1145/3018743.3018766", "doiUrl": "https://doi.org/10.1145/3018743.3018766", "entities": [ "Algorithm", "Central processing unit", "Connectome", "Connectomics", "Graphics processing unit", "High-throughput computing", "Image processing", "Machine learning", "MapReduce", "Mass storage", "Multi-core processor", "Parallel computing", "Performance engineering", "Scalability", "Terabyte", "Throughput" ], "id": "10a823e88a8902b8edb37c6c0c96b5e35c8438c5", "inCitations": [ "1d05c64694a18b7559641bf88d8d65047387cdfe", "ab77fa0fcdb5882b9cc992a4d870bc1ebf69cf5d", "a2479a1223fd44077e0685460b650c60086318cd", "0caff1a71d1c04003466972eacef599e08e2d1f7", "25428fd1f4f82c964e81be6257b32b60dc154ff9" ], "journalName": "", "journalPages": "211", "journalVolume": "", "outCitations": [ "3e77a77247734dc918a5723573e1158eee1955f9", "25428fd1f4f82c964e81be6257b32b60dc154ff9", "0c0a8e184ce33d3937317855fe4cc4e7b9a1041f", "31181e73befea410e25de462eccd0e74ba8fea0b", "13c8ba6f7d408784db5cbc3a056eb8c2ad7fd68a", "ff042ecf3de689b087cf60bfb177508813f7d3ec", "abd6ae3adf41c893c74268246abe9ac94564651c", "087337fdad69caaab8ebd8ae68a731c5bf2e8b14", "6453adf73e02bccf16e6478936f42a76c418ab02", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0bb21a76b5604211927cbc3c18f64437adbd834a", "07045f87709d0b7b998794e9fa912c0aba912281", "cc2b790265c30d6f7d31ebff3009d298236f66da", "01ded38376ad23fe97c3816fdea40b2bfe133f15", "932ed9910569b5ca9f6507d2536ef1608f92cff2", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "0558c94a094158ecd64f0d5014d3d9668054fb97", "30416bc0760f463fa90bd8a92a388fb6710fe589", "80c593a0668f4eb157a525831b7daad3bdb44381", "d246a4ef683a986d5ad636514ca4dc77f624b68c", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "8b7f2d26647ac7383b79ef0b5b99390ed3210ccb", "084bf93f7d90debbb666d4ba10bf635560805c2d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "c33ef60880d83232064f010ff3a9d0a5f20fe0eb", "12078fd9bee79fd2e9fae055c4cc33db382272af", "0df37799cedef8c3625cc554aee51e65cbcedd51", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "2c968749e04fc16908fc543e4468f945a5c695da", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "053a9e3bba8b14b446d198595e0884e1737619df", "273d591af0bdcbefe37d7dd9150e2f612ca7121d", "dd8622231b97aa0fbf78a116618bebad57b0d10d", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "eca7c0f1f5f33434f62970cb9a99cdcea1a0ab2e", "420c46d7cafcb841309f02ad04cf51cb1f190a48", "043afbd936c95d0e33c4a391365893bd4102f1a7", "8cbd0f398d2aa586e446b1048bfc34b6de80c9bb", "c9176349d02468494069e015ccd5e5a47b63ed55", "59f2b20561effddff6f61c3cd6cfe4ed661768d7", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "755fb4790e80e992346e87b4271da518163cc5e7", "0e0d499ecb3947b024ff8b211fcfcab9c4c272ce", "09193e19b59fc8f05bee9d6efbfb1607ca5b6501", "20752d876a849adfe71e8d97d5721ef2eb22ba78", "890d4725fa6e63806e76e89b93c6d0d6429a0d6b", "2c4648cc7db416c7597f0032b06b2b58b46c2948", "5b9dea07606d4f18bc7aaad7a5b6784fc2425afd", "17ccb526085ec88fa2d35d8c8d7dc246b9d1bbe3", "61cfa93c7e47ebcf3645ed41109fc123b63fcbb7", "37754b2911876dfda802a577ec43b2dba04b4a15", "37288fe76a864233214b6a5e4cee29923e9c36b9", "02045ecfc9748771cf52180497278fa9d26e7505" ], "paperAbstract": "The current design trend in large scale machine learning is to use distributed clusters of CPUs and GPUs with MapReduce-style programming. Some have been led to believe that this type of horizontal scaling can reduce or even eliminate the need for traditional algorithm development, careful parallelization, and performance engineering. This paper is a case study showing the contrary: that the benefits of algorithms, parallelization, and performance engineering, can sometimes be so vast that it is possible to solve \"cluster-scale\" problems on a single commodity multicore machine.\n Connectomics is an emerging area of neurobiology that uses cutting edge machine learning and image processing to extract brain connectivity graphs from electron microscopy images. It has long been assumed that the processing of connectomics data will require mass storage, farms of CPU/GPUs, and will take months (if not years) of processing time. We present a high-throughput connectomics-on-demand system that runs on a multicore machine with less than 100 cores and extracts connectomes at the terabyte per hour pace of modern electron microscopes.", "pdfUrls": [ "http://doi.acm.org/10.1145/2935764.2935825", "http://people.csail.mit.edu/amatveev/Connectomics_Pipeline_PPoPP2017.pdf", "http://people.csail.mit.edu/yaronm/PPoPP17_Matveev_Meirovitch.pdf", "http://dl.acm.org/citation.cfm?id=3018766" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/10a823e88a8902b8edb37c6c0c96b5e35c8438c5", "sources": [ "DBLP" ], "title": "A Multicore Path to Connectomics-on-Demand", "venue": "SPAA", "year": 2016 }, "10b141d7bf96e970ec6bca0a134c15ba712a06d1": { "authors": [ { "ids": [ "20329824" ], "name": "Giorgio Lucarelli" }, { "ids": [ "2998910" ], "name": "Fernando Machado Mendonca" }, { "ids": [ "1733901" ], "name": "Denis Trystram" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Central processing unit", "Generic programming", "Online and offline", "Parallel computing", "Scheduling (computing)" ], "id": "10b141d7bf96e970ec6bca0a134c15ba712a06d1", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "140-149", "journalVolume": "", "outCitations": [ "2e3ff7ffeecb29608b1eaf01bacde8f4a2f0b29f", "884b6dd840fc7a19b3917821bd9d7e9455ad9a3e", "1e8233a8c8271c3278f1b84bed368145c0034a35", "0a0432f604705963d15f299af02b242df7752dd9", "b56daafeb36e1c19180f401924a6f9009940efe9", "f02e1823cc1f80b129125ceb94af5f62f862b791", "4c8a2e0d68a2c68b2425d71c7fd50b15293831be", "26e359615d28782d90609dee0bcb5e45a8d3934e", "be4c1dd0e8afe5ac839bba41db32b9035fa64d5f", "62a75fe31462ec1ad899aaa29b41bf654fce8799", "0649ad65055d7062cb097b05f2ecc7e105bb411d", "509009e2d24e24f836a2f54156698b6a2d5aaeec" ], "paperAbstract": "We present a new method for scheduling independent tasks on a parallel machine composed of identical processors. This problem has been studied extensively for a long time with many variants. We are interested here in designing a generic algorithm in the on-line non-preemptive setting whose performance is good for various objectives. The basic idea of this algorithm is to detect some problematic tasks that are responsible for the delay of other shorter tasks. Then the former tasks are redirected to be executed in a dedicated part of the machine. We show through an extensive experimental campaign that this method is effective and in most cases is closer to some standard lower bounds than the base-line method for the problem.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101131" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/10b141d7bf96e970ec6bca0a134c15ba712a06d1", "sources": [ "DBLP" ], "title": "A New On-line Method for Scheduling Independent Tasks", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "10bfd5aed1410b88c5c1b5212f450a1994fc5afe": { "authors": [ { "ids": [ "30291549" ], "name": "Murad Kaplan" }, { "ids": [ "9762395" ], "name": "Azzam Alsudais" }, { "ids": [ "1750746" ], "name": "Eric Keller" }, { "ids": [ "1678308" ], "name": "Franck Le" } ], "doi": "", "doiUrl": "", "entities": [ "Computer cluster", "DPDK / dpdk.org", "Data store", "Docker", "Failover", "Firewall (computing)", "Load balancing (computing)", "Multidimensional scaling", "Network address", "Network address translation", "Network function virtualization", "OpenFlow", "Pipeline (computing)", "Software deployment", "Stateful firewall", "Stateless protocol", "Throughput", "Transfer function" ], "id": "10bfd5aed1410b88c5c1b5212f450a1994fc5afe", "inCitations": [ "6cfe9dc89d0fd4778ec9a42af1a39ed99f605211", "4fde15a77dd74bb4d1cb287074f1fd1a3fbbc2e0", "7a757b03f4b1868d5d8c27c4f6328e69f3d02edc", "7e4bc8c54dc01bb8019455a119e3d3666b3162b8", "54fe62cc4ceeefa56437ee8562ca37c38fdeb16e", "83a31c52bed8d3845201acb7a5b4603212b9e8b6", "3253860b8f1398b19db673ec99253d65db862adf", "841dd77064cd38a749c550f85ee1336733eee300", "a355edbb24d406761407e2728218d2192f2c1fcf", "91a9516b55d4bcd0180c80254d327c222f59bdd3", "2e7c1a2953e737ea43237c313751d3e5c5f73250" ], "journalName": "", "journalPages": "97-112", "journalVolume": "", "outCitations": [ "0433bb657317ac22f7c66d71dfd14c8ead607d73", "04f6a5dc6c2aac0586f8f1e83b434ea96fffcd66", "028378b395dc2a11e8ccc3d994df228340fd9697", "336b4f3099b8f629adc20a69aba15257e53539f9", "3547ac839d02f6efe3f6f76a8289738a22528442", "17650831f1900b849fd1914d02337e1d006aea0c", "1901753d2664473a3758a1c4413421611d7f17bc", "102090e6e2363e094439a41ef0439dfac5da0126", "514a5c15e8cf3f681febecad954a4508d9189c99", "07add9c98a979e732cfa215c901adb1975f3f43a", "6e4d333d5e53ee2dd71c8483e5aef59bd5f7f596", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "4eb9a7c9984ab9ba2970d6b6342360cb3262a3fd", "67b9afba33c809257beb33629db9a0bb02611eb6", "08ddde0eaf4925704222135788f79fe293c5894d", "4859d7eb90e7a662536cb0ae8272898239cf396b", "55e4c1c02a7499cc99082ceaaf13d32af46ce845", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "00dca7217305a31dcf5108eb7ecf862dd4827823", "2077579d62fc090d4ddf45f107ffae0468936165", "163247e7ed8db43c9529d85c384d8843e22a136b", "73966d417bdfe0fd2f1bfd82e7dddf51ccbda961", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "03d0421c30df23b719cfe1dd6b486472aed43f52", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "6276fdf6db1c4b23cc21191e2e8f14fb51606b5b", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71" ], "paperAbstract": "In this paper we present Stateless Network Functions, a new architecture for network functions virtualization, where we decouple the existing design of network functions into a stateless processing component along with a data store layer. In breaking the tight coupling, we enable a more elastic and resilient network function infrastructure. Our StatelessNF processing instances are architected around efficient pipelines utilizing DPDK for high performance network I/O, packaged as Docker containers for easy deployment, and a data store interface optimized based on the expected request patterns to efficiently access a RAMCloud-based data store. A network-wide orchestrator monitors the instances for load and failure, manages instances to scale and provide resilience, and leverages an OpenFlow-based network to direct traffic to instances. We implemented three example network functions (network address translator, firewall, and load balancer). Our evaluation shows (i) we are able to reach a throughput of 10Gbit/sec, with an added latency overhead of between 100\u03bcs and 500\u03bcs, (ii) we are able to have a failover which does not disrupt ongoing traffic, and (iii) when scaling out and scaling in we are able to match the ideal performance.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/kablan", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_kablan_0.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-kablan.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-kablan.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_kablan_0.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_kablan.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f380/e932eef3f785b01aa3c16fe85bf64f9aea1c.pdf", "s2Url": "https://semanticscholar.org/paper/10bfd5aed1410b88c5c1b5212f450a1994fc5afe", "sources": [ "DBLP" ], "title": "Stateless Network Functions: Breaking the Tight Coupling of State and Processing", "venue": "NSDI", "year": 2017 }, "1105f0bafa12ade41bbb22b4ba47f445f4a6982c": { "authors": [ { "ids": [ "2686270" ], "name": "Gaurav Pandey" }, { "ids": [ "2440174" ], "name": "Ambedkar Dukkipati" } ], "doi": "10.1109/ICDM.2017.46", "doiUrl": "https://doi.org/10.1109/ICDM.2017.46", "entities": [ "Discriminative model", "Encoder", "Feature learning", "Matrix regularization", "One-hot", "Overfitting", "Semi-supervised learning", "Supervised learning", "Unsupervised learning" ], "id": "1105f0bafa12ade41bbb22b4ba47f445f4a6982c", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "367-376", "journalVolume": "", "outCitations": [ "284b18d7196f608448ca3d9496bf220b1dfffcf5", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "187480101af3fb195993da1e2c17d917df24eb23", "245b46b10b46e765bcc36ee10df402d1d541a9ec", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "6c11626ae08706e6185fceff0a6d05e4bfd6bd06", "64fd6b7139be4b8b104a9aed768deaf62f71f5c0", "2bf973df141a57270457187d0d2c070a0f43b55d", "408e8eecc14c5cc60bbdfc486ba7a7fc97031788", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "5fe391303e703b87b672afbaa7d17dc4f4b01ceb", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "3dd2f70f48588e9bb89f1e5eec7f0d8750dd920a", "1c77cadb41e8eee056cba2ced825950195e54ce4", "543f21d81bbea89f901dfcc01f4e332a9af6682d", "838c9137e6fd807c871c80976b4f75c8c8bfcffc", "25534dbae461e383f4900d268e81306eac3398dc", "46aca9fd693cda49f7f02d575efaee0977f078c7", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "58513e5043c8a8fb61dbe83ab58225e7f60575af", "3362aa263d0a3296cb05101a4ebe576957e6c364", "007e86cb55f0ba0415a7764a1e9f9566c1e8784b", "087337fdad69caaab8ebd8ae68a731c5bf2e8b14", "8b11230bb90f9f98d7a791f13df438efc8dd29cd", "2e86402b354516d0a8392f75430156d629ca6281", "162d958ff885f1462aeda91cd72582323fd6a1f4", "357776cd7ee889af954f0dfdbaee71477c09ac18", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "59c2f171f95941b5e36fc56fcbc6fa4d66b5fb55", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "245414e768c3b8c8288ac0651604a36b1a44a446", "146f6f6ed688c905fb6e346ad02332efd5464616" ], "paperAbstract": "In recent years, deep discriminative models have achieved extraordinary performance on supervised learning tasks, significantly outperforming their generative counterparts. However, their success relies on the presence of a large amount of labeled data. How can one use the same discriminative models for learning useful features in the absence of labels? We address this question in this paper, by jointly modeling the distribution of data and latent features in a manner that explicitly assigns zero probability to unobserved data. Rather than maximizing the marginal probability of observed data, we maximize the joint probability of the data and the latent features using a two step EM-like procedure. To prevent the model from overfitting to our initial selection of latent features, we use adversarial regularization. Depending on the task, we allow the latent features to be one-hot or real-valued vectors, and define a suitable prior on the features. For instance, one-hot features correspond to class labels, and are directly used for unsupervised and semi-supervised classification task, whereas real-valued feature vectors are fed as input to simple classifiers for auxiliary supervised discrimination tasks. The proposed model, which we dub dicriminative encoder (or DisCoder), is flexible in the type of latent features that it can capture. The proposed model achieves state-of-the-art performance on several challenging tasks. Qualitative visualization of the latent features shows that the features learnt by the DisCoder are indeed meaningful.", "pdfUrls": [ "https://arxiv.org/pdf/1709.00672v1.pdf", "http://arxiv.org/abs/1709.00672", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1105f0bafa12ade41bbb22b4ba47f445f4a6982c", "sources": [ "DBLP" ], "title": "Unsupervised Feature Learning with Discriminative Encoder", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "111867082baba045e654f3925b58329b03fa0dc5": { "authors": [ { "ids": [ "2499986" ], "name": "Hamed Zamani" }, { "ids": [ "1704390" ], "name": "W. Bruce Croft" } ], "doi": "10.1145/3077136.3080831", "doiUrl": "https://doi.org/10.1145/3077136.3080831", "entities": [ "Algorithm", "Data mining", "Experiment", "Foreach loop", "Information retrieval", "Natural language processing", "Query expansion", "Relevance", "Semantic similarity", "Text corpus", "Unsupervised learning", "Vocabulary", "Web query classification", "Word embedding", "Word2vec" ], "id": "111867082baba045e654f3925b58329b03fa0dc5", "inCitations": [ "bc72e44040d57e0b56ea0c648c19226d43989af9", "7b54c5bcd4f79e06b441dc650feb2cc581cd1f1e", "432b36c1bec275c2778c66f9897f9e02f7d8b579", "0acf2f684c0d0e2e3aba215dffab84b2cc175c31", "e8113b84ec2fcb7b4c36265ca133aa0b4bbe5c54", "53722f84720c66e58106b630b89c7fba20184a87", "575c8cbf97b0b5fb99cb359570f2ecb438b77036", "a339a2a3732d2a5dbfada7e7b6f0da453a989546", "3c8063179345af107834671bfc5453b940ce07d7", "0e95d0ea7264a802451bd4deb52379721d388668", "6b24792298d47409cdf23012f593d49e2be0d4f3", "2c1d769fcee7dd19f993ad6673d40394461b0250" ], "journalName": "", "journalPages": "505-514", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "1005645c05585c2042e3410daeed638b55e2474d", "1df176123ada3c3aebba3d9fcec55386091b0d13", "8784f4a87e6c4140206ce794b643b9394dad6aad", "03a8cb23b78ae1e8662b226d96e4a0ac2bf5d3fd", "158f9e4e385645d2db3949483789cc84ceb41c3c", "5c45ebaf7ca2832048bec89e8fc579ca8caf3b8d", "93fd10b662f61673e6a95316c9646ef7be1dfcb6", "82ff2dce7215b17128ff07752d221028e97f5a66", "cc592527b85fa1f502db56dfb01c3cb9f4c40d09", "33151c9905102c47d431f59fc9a5a7667960507a", "206d63c57430071a5b6efa261ff55122d0eed829", "2871f115e7a11c903258491c75d4171fac679344", "8b40b159c2316dbea297a301a9c561b1d9873c4a", "2e42bf8747363161851dc04b85aedb1ada50daaf", "f0ec25cb2e7a17fed5eae185a5b779c1c0704719", "0e95d0ea7264a802451bd4deb52379721d388668", "328b00f1baaf08dedba3a788b4ce0a4b26003f18", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "69ab8fe2bdc2b1ea63d86c7fd64142e5d3ed88ec", "87ecd35731a9bf712862f748064bd81ec132ed5c", "15781aa355315f5b088fb58a2447ccc426f7b01b", "8490234d79b47e459824dcf87c1e288211a3c964", "9eb67ca57fecc691853636507e2b852de3f56fac", "3832b5bbb8d751da15cdfa466f85cfc684b16580", "88e5cd43d6fb9abc87a943b023d092fdff74c9c4", "2b3e1b56736c3e4081e7d5fc5993d35aaf27fe18", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "32db1a815e2ba6a6e5e2c3a2cce135135347782b", "517a461a8839733e34c9025154de3d6275543642", "7c1cdcbdd30163f3d7fd9789e42c4a37eb2f7f04", "bfea4d58717c83c67ac3f9eab855d15c59754757", "3257cfb4bb041efe583a3c80e4491419d8852275", "1ed45f7a115d6191dab18ab1d687077851f33462", "38612e346fdf3158c32c16058f7e8820a8f0325e", "4c9fafa3b1bed97bb00b8bc68db39a9ad48490f1", "214adc2dfdc2160cdf5be54001daf2b2304a03b3", "a339a2a3732d2a5dbfada7e7b6f0da453a989546", "2ed164a624809c4dc339f973a7e12c8dc847da47" ], "paperAbstract": "Learning a high-dimensional dense representation for vocabulary terms, also known as a word embedding, has recently attracted much attention in natural language processing and information retrieval tasks. The embedding vectors are typically learned based on term proximity in a large corpus. This means that the objective in well-known word embedding algorithms, e.g., word2vec, is to accurately predict adjacent word(s) for a given word or context. However, this objective is not necessarily equivalent to the goal of many information retrieval (IR) tasks. The primary objective in various IR tasks is to capture relevance instead of term proximity, syntactic, or even semantic similarity. This is the motivation for developing unsupervised relevance-based word embedding models that learn word representations based on query-document relevance information. In this paper, we propose two learning models with different objective functions; one learns a relevance distribution over the vocabulary set for each query, and the other classifies each term as belonging to the relevant or non-relevant class for each query. To train our models, we used over six million unique queries and the top ranked documents retrieved in response to each query, which are assumed to be relevant to the query. We extrinsically evaluate our learned word representation models using two IR tasks: query expansion and query classification. Both query expansion experiments on four TREC collections and query classification experiments on the KDD Cup 2005 dataset suggest that the relevance-based word embedding models significantly outperform state-of-the-art proximity-based embedding models, such as word2vec and GloVe.", "pdfUrls": [ "https://arxiv.org/pdf/1705.03556v1.pdf", "https://arxiv.org/pdf/1705.03556v2.pdf", "http://maroo.cs.umass.edu/pub/web/getpdf.php?id=1267", "http://doi.acm.org/10.1145/3077136.3080831", "http://arxiv.org/abs/1705.03556" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/111867082baba045e654f3925b58329b03fa0dc5", "sources": [ "DBLP" ], "title": "Relevance-based Word Embedding", "venue": "SIGIR", "year": 2017 }, "111e36157f3704f4c14fdf7126a5694537a29b08": { "authors": [ { "ids": [ "2422012" ], "name": "Goran Doychev" }, { "ids": [ "3147299" ], "name": "Boris K\u00f6pf" } ], "doi": "10.1145/3062341.3062388", "doiUrl": "https://doi.org/10.1145/3062341.3062388", "entities": [ "Bit-level parallelism", "Cache (computing)", "Central processing unit", "Cryptography", "Executable", "Interaction", "Key (cryptography)", "Memory management", "Modular exponentiation", "Personally identifiable information", "Run time (program lifecycle phase)" ], "id": "111e36157f3704f4c14fdf7126a5694537a29b08", "inCitations": [ "97579fb9cdb51141e25ba852f0ada16543097a05", "3a232a826e37bbf26fdc06cd569b1ef1b221e40b", "305d9eff8ac6200fbc53cb495df096e6e86e81eb", "9f17f4067bd97f061c5f4683795c7509277dec7e" ], "journalName": "", "journalPages": "406-421", "journalVolume": "", "outCitations": [ "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "434c9d54e2b942ac2543c5596c1b33f6b83e60a8", "310dd68bc8b327d260020c0248ca8e9ffb53df0b", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "4d67f221c595dbfc448e49b1b6e6bf9bfed40f7b", "321679c5fd1f624cfc332953408fed924484cc09", "6421c677dfc71fe0fada74b4adae27417cd50d00", "30d2f7030094603fa241080d77b89722672afcce", "37179bfc4836890a32950ea2fb74795823284362", "817eb7690c05d2f0caf1ed2faeb5b10c28bd3836", "52c2c050af5b32d4929b4b193967a3675d03aea0", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "3671af9d7655977e573bd123f93470f978ea7a62", "13fadf9e3fc927e9e7df14132feecc1899c68d63", "2afec7f7defb45e3b238bcd556ba6c399c401fe9", "3cca51b3acdc79784083b29047c48e1c3e3bb4c3", "615168555150d80752a1c195229642acbe6fb3d9", "4d624b942a58818f8d425460638cb4b65ed84e1c", "451ce08a5335b00cda49877ba1335e95a91c5af7", "9263789ba999bb726c9c7fdf0bbc77844ee03272", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "54483771c5897c23b7380adcd1e3dee2b8962489", "683b1f19926adda043f42565c51640378dc2bd9a", "18a41be780aa642c84a2c6850397fab147e0ac27", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "d296252ddf0e2c6b7422008d703843c1863bd15b", "09a3c26409032279393ee78fb46ca73b94ad3bab", "06f16d9430d5f6213cf5399b167a3d989c3ff798", "32e16ae384e03d76b74be2e04fcf5ac5007fc155", "bd79772a58dd4bf040ac9f9c1946614b6a51cc4c", "034ce3cb41350f5298c1044f77ef044fc7d2fa66", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "3273fb537a835b2a759b80eca7d0e3732ff25cb9", "7dc95fbf0293b00b877423403ff4d5581eb5f102", "40929bca481e33273d79d4f2d73e0f00a861381b", "352e74019d86163d73618f03429ae452ab429629", "3e91b04e6e98a9bdb72cc9acb8db550f94b58006", "4207ebe6f2656c1a40149ec446ca99885ce5b2ad", "5fbf739032dd548c1ff189e7333f05e215906a1b", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "c0c14c16813f0083b9e3bf602746a8be1270996a" ], "paperAbstract": "CPU caches introduce variations into the execution time of programs that can be exploited by adversaries to recover private information about users or cryptographic keys. \n Establishing the security of countermeasures against this threat often requires intricate reasoning about the interactions of program code, memory layout, and hardware architecture and has so far only been done for restricted cases. \n In this paper we devise novel techniques that provide support for bit-level and arithmetic reasoning about memory accesses in the presence of dynamic memory allocation. These techniques enable us to perform the first rigorous analysis of widely deployed software countermeasures against cache attacks on modular exponentiation, based on executable code.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062388", "https://arxiv.org/pdf/1603.02187v2.pdf", "http://arxiv.org/abs/1603.02187", "http://arxiv.org/pdf/1603.02187v1.pdf", "https://arxiv.org/pdf/1603.02187v3.pdf", "https://arxiv.org/pdf/1603.02187v1.pdf", "http://software.imdea.org/~bkoepf/papers/pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/111e36157f3704f4c14fdf7126a5694537a29b08", "sources": [ "DBLP" ], "title": "Rigorous analysis of software countermeasures against cache attacks", "venue": "PLDI", "year": 2017 }, "1130a13b74e11b99d5233dce7f157d54cfea4ed1": { "authors": [ { "ids": [ "1773557" ], "name": "Omer Subasi" }, { "ids": [ "1746771" ], "name": "Gokcen Kestor" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" } ], "doi": "10.1109/CLUSTER.2017.127", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.127", "entities": [ "Application checkpointing", "Data compression", "Fail-stop", "Failure rate", "Input/output", "Operating system", "Simulation", "Time complexity" ], "id": "1130a13b74e11b99d5233dce7f157d54cfea4ed1", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "464-474", "journalVolume": "", "outCitations": [ "11a79ddb0feddaecb1b2933491d11a0fab150ceb", "7e3fdbbad04a39b2f44436287668f1a682e26ab0", "7b2c34f8aa3183cff50871a8d16c85eefd0089a3", "8d417d46b5d76bb308802fd7a34127d0f0354da3", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "186e56c99b6392919e10734e8e9c174892663131", "ce9fd32b48148c824ad8ab53386027cb0cf007ee", "02f413ed4b2141ea1e95f6daaa59761b6614cfdb", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "01ab0f0d7a9574a5c052b364c749a9726b47f6a4", "53628588d0559a2f2b70ac260d511028902e0e0c", "06230d13e276bd871a378ca932a41b5cff94e29f", "60b38e8653374454ec44433900b44094080d0ca4", "6f4646f37530aef99d903ff0da5f1a4659c45cac", "01d62cd850496455ce1616500f491690effa5c98", "c7a2bbed2cbec1dd6e7ef26e93bde5713225310f", "05b1aeb6e8020c5d31e30cd4613ead87a4fb9b3f", "74ce9ab22a2b957de1e27fea1fd97ecc76ee1d77", "96d860caedf7731e2f598a768e85d04e26753868", "2657302160775f8766964d013efe242836693f3e", "3410b2df875d13f0f6bae2394170784a53b32323", "6a139103526a5068a6517f5986c702d9d0dca5c1", "30a22f2254f4874c6f0ae5a219138064a8495fa9", "ef0790e8c83d12692a7acb563369d3a1fd6dfea4" ], "paperAbstract": "Checkpoint/restart has been widely used to cope with fail-stop errors. The checkpointing frequency is most often optimized by assuming an exponential failure distribution. However, field studies show that most often failures do not follow a constant failure rate exponential distribution. Therefore, the optimal checkpointing frequency should be computed and tuned considering the different distributions that failures follow. Moreover, due to operating system and input/output jitter and hybrid solutions that combine checkpointing with other techniques, such as data compression, checkpointing time can no longer be assumed constant. Thus, time varying checkpointing time should be accounted for to realistically model the application execution.In this study, we develop a mathematical theory and model to optimize the checkpointing frequency with respect to arbitrary failure distributions while capturing time-dependent non-constant checkpointing time. We show that we can provide closed-form formulas for important failure distributions in most cases. By instantiating our model, we study and analyze 10 important failure distributions to obtain the optimal checkpointing frequency for these distributions. Experimental evaluation shows that our model is highly accurate and deviates from the simulations less than 1% on average.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.127" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1130a13b74e11b99d5233dce7f157d54cfea4ed1", "sources": [ "DBLP" ], "title": "Toward a General Theory of Optimal Checkpoint Placement", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "1138145ea2a489b67084b066b965629f5562b6d8": { "authors": [ { "ids": [ "2237748" ], "name": "Samer Al-Kiswany" }, { "ids": [ "2806362" ], "name": "Suli Yang" }, { "ids": [ "1743175" ], "name": "Andrea C. Arpaci-Dusseau" }, { "ids": [ "1703415" ], "name": "Remzi H. Arpaci-Dusseau" } ], "doi": "10.1145/3078597.3078612", "doiUrl": "https://doi.org/10.1145/3078597.3078612", "entities": [ "Attribute\u2013value pair", "Computer data storage", "Load balancing (computing)", "Multicast", "Replication (computing)", "Routing", "Scalability", "Software-defined networking", "Systems design", "Value (ethics)" ], "id": "1138145ea2a489b67084b066b965629f5562b6d8", "inCitations": [], "journalName": "", "journalPages": "29-40", "journalVolume": "", "outCitations": [ "155ca30ef360d66af571eee47c7f60f300e154db", "b682167e6bd92e1307f58f56c86613329e8ab79a", "0ab860d9226709aad6842e12b7a394089e396f35", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "130d640b53a1d6700b67a4ea4256071ae18e0ee8", "0541d5338adc48276b3b8cd3a141d799e2d40150", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0b5c26697d7fe2fd90f337934de63dc973195dfa", "01f3885dc0ca9ffabf787d9b825bcb4ff4ada06a", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "29a05cde1994548e2e9487822248c679626c6241", "3fc93257ac94aa8d6505c19077058e68622345b6", "514a5c15e8cf3f681febecad954a4508d9189c99", "4329fab4771dd4cf50694804d4bafca8f40dbbab", "2bef12742683926a29888fda5798ac32d12a30fd", "aaba89dc882c46cf0ff3e18ab663792a964f2272", "1c8195cadc7ad4a8b59b16fe77574dd6d160d7d2", "24c6e70c583daed1852637ec42d4589556ac59d3", "ac138994a057a7dc5683be734502643e4802f2bd", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "2da760f90c3d2bf6598becdde9063093f488548c", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "76eea8436996c7e9c8f7ad3dac34a12865edab24", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "3439120d0052525992e34bd200e7f3985c10b1bc", "1d99b7749a9311d2db24a3d84728e444eff23e4b" ], "paperAbstract": "We present NICE, a key-value storage system design that leverages new software-defined network capabilities to build cluster-based network-efficient storage system. NICE presents novel techniques to co-design network routing and multicast with storage replication, consistency, and load balancing to achieve higher efficiency, performance, and scalability. We implement the NICEKV prototype. NICEKV follows the NICE approach in designing four essential network-centric storage mechanisms: request routing, replication, consistency, and load balancing. Our evaluation shows that the proposed approach brings significant performance gains compared to the current key-value systems design: up to 7× put/get performance improvement, up to 2× reduction in network load, 3× to 9× load reduction on the storage nodes, and the elimination of scalability bottlenecks present in current designs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078612", "http://research.cs.wisc.edu/adsl/Publications/nice-hpdc17.pdf", "https://cs.uwaterloo.ca/~alkiswan/papers/NICE-HPDC17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1138145ea2a489b67084b066b965629f5562b6d8", "sources": [ "DBLP" ], "title": "NICE: Network-Integrated Cluster-Efficient Storage", "venue": "HPDC", "year": 2017 }, "11433bebdbb138fce1d40ef014efc252c53c08bc": { "authors": [ { "ids": [ "1709102" ], "name": "Tanakorn Leesatapornwongsa" }, { "ids": [ "22409029" ], "name": "Cesar A. Stuardo" }, { "ids": [ "3197683" ], "name": "Riza O. Suminto" }, { "ids": [ "40455668" ], "name": "Huan Ke" }, { "ids": [ "2235631" ], "name": "Jeffrey F. Lukman" }, { "ids": [ "1738725" ], "name": "Haryadi S. Gunawi" } ], "doi": "10.1145/3102980.3102985", "doiUrl": "https://doi.org/10.1145/3102980.3102985", "entities": [ "Debugging", "Distributed computing", "Scalability", "Software bug" ], "id": "11433bebdbb138fce1d40ef014efc252c53c08bc", "inCitations": [ "262c16d1bdd8d0ccef77bd66648144d584a24477" ], "journalName": "", "journalPages": "24-29", "journalVolume": "", "outCitations": [ "0ce898bf3f3e4af56492e9135c7c85e3917e20e8", "7e4d3ca41adc598e4a8b71df2d5c040ccb59be87", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "0706225eeac0f855b19c365313db61252ecde0d7", "ad8c8feae36e649d885af3df3d427a3ea40651c2", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "32d5d07713c7f91c9577d5c09f40bb688ae3a282", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "9dff0982bd58ef37d2c183d0c3a8818b91bc7e58", "0a3300d149a0f45623e5cde4f9114b9773b0054c", "91ec7ef1b6ffeba0a2b19f00501f2f7e52a76077", "11a68b5de90fc3f0b56f1acdfe688b91eff1b1ba", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "db263a07853ba5092112291be30f9b803c732676", "05a436f059c3897c3509dc059903364eff4a79af", "36222f8eb2ccf21ca345e15186cea64506581543", "56998b637705900121f2f02a8c153cb099c7ba49", "070c3a8c3ce10277424f23c01a54b377478ee59c", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "086820e40dc8046c30a8751394df167bec047fe1", "1d70fb525d138699177a5cebee29c324de783e4b", "2a5d9ff64cdde1cb9a2019d0fc2d2491e4ef6cf4", "566707209e3ace646b3b0cb1a3bc7d7215b1ec55", "7cccd0a78fd074eaf3a515d99f1284dc77c7d13b", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "2f6f80948f913ea3b3ae82d441f61aab34618536" ], "paperAbstract": "We highlight the problem of scalability bugs, a new class of bugs that appear in \"cloud-scale\" distributed systems. Scalability bugs are latent bugs that are cluster-scale dependent, whose symptoms typically surface in large-scale deployments, but not in small or medium-scale deployments. The standard practice to test large distributed systems is to deploy them on a large number of machines (\"real-scale testing\"), which is difficult and expensive. New methods are needed to reduce developers' burdens in finding, reproducing, and debugging scalability bugs. We propose \"scale check,\" an approach that helps developers find and replay scalability bugs at real scales, but do so only on one machine and still achieve a high accuracy (i.e., similar observed behaviors as if the nodes are deployed in real-scale testing).", "pdfUrls": [ "https://press3.mcs.anl.gov/ccusers2017/files/2017/09/suminto-scalability-ccusers17.pdf", "http://ucare.cs.uchicago.edu/pdf/hotos17-scalabilityBugs.pdf", "http://doi.acm.org/10.1145/3102980.3102985" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/11433bebdbb138fce1d40ef014efc252c53c08bc", "sources": [ "DBLP" ], "title": "Scalability Bugs: When 100-Node Testing is Not Enough", "venue": "HotOS", "year": 2017 }, "117025a430aaa984dd260bea97531da221b634a4": { "authors": [ { "ids": [ "1909974" ], "name": "Sanidhya Kashyap" }, { "ids": [ "7761504" ], "name": "Changwoo Min" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" } ], "doi": "", "doiUrl": "", "entities": [ "Blocking (computing)", "CPU cache", "Data structure", "Linux", "Lock (computer science)", "Mutual exclusion", "Non-uniform memory access", "Operating system", "Read-write memory", "Scalability", "Scheduling (computing)", "Shard (database architecture)", "Spinlock", "Synchronization (computer science)" ], "id": "117025a430aaa984dd260bea97531da221b634a4", "inCitations": [ "020af9e8d35b7f6ca563397a8e82778dfa7dac7b" ], "journalName": "", "journalPages": "603-615", "journalVolume": "", "outCitations": [ "0c3b1da050089cf8c701fa0cb4ddc18566d715e6", "9ded1893cef3521922a5fa5dbf28003031474aac", "eca7c0f1f5f33434f62970cb9a99cdcea1a0ab2e", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "3e77a77247734dc918a5723573e1158eee1955f9", "2f925c9b58c384d80e1febfc646900d584dcf515", "25c4dcffc6bc69b0885587aff9acb9f2dd949c07", "b6d87ec4b6990f0b797a5f41ff970c6b520cf79a", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "1963455d66a7fa9755216fd15ee47a2ad3d86827", "5cec4c7d82137333ea7f0166a26d04bba589c7da", "6c96a74a1785843dead22eb00764f787bf4bfd92", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "57eaf0036c74895a5e965915c6544041623719e0", "afc4931dd371130c3d4c6d6dbfda881140847af1", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "9bd0f0527d8d2f751c622ec14388017224f4810b", "34d33c19d0e893415b570ebdeea993db5b7af509", "abf1157c2043274a8d580151db1d4ef5be2c892e", "2c968749e04fc16908fc543e4468f945a5c695da", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "d82fde923093716dba6a723b984f7f4e57e503f8" ], "paperAbstract": "Application scalability is a critical aspect to efficiently use NUMA machines with many cores. To achieve that, various techniques ranging from task placement to data sharding are used in practice. However, from the perspective of an operating system, these techniques often do not work as expected because various subsystems in the OS interact and share data structures among themselves, resulting in scalability bottlenecks. Although current OSes attempt to tackle this problem by introducing a wide range of synchronization primitives such as spinlock and mutex, the widely used synchronization mechanisms are not designed to handle both underand over-subscribed scenarios in a scalable fashion. In particular, the current blocking synchronization primitives that are designed to address both scenarios are NUMA oblivious, meaning that they suffer from cache-line contention in an undersubscribed situation, and even worse, inherently spur long scheduler intervention, which leads to sub-optimal performance in an over-subscribed situation. In this work, we present several design choices to implement scalable blocking synchronization primitives that can address both underand over-subscribed scenarios. Such design decisions include memory-efficient NUMAaware locks (favorable for deployment) and schedulingaware, scalable parking and wake-up strategies. To validate our design choices, we implement two new blocking synchronization primitives, which are variants of mutex and read-write semaphore in the Linux kernel. Our evaluation shows that these locks can scale real-world applications by 1.2\u20131.6\u00d7 and some of the file system operations up to 4.7\u00d7 in both underand over-subscribed scenarios. Moreover, they use 1.5\u201310\u00d7 less memory than the stateof-the-art NUMA-aware locks on a 120-core machine.", "pdfUrls": [ "https://sslab.gtisc.gatech.edu/assets/papers/2017/kashyap:cst-slides.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/kashyap", "https://sslab.gtisc.gatech.edu/assets/papers/2017/kashyap:cst.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-kashyap.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ad5e/8e4f66139f85231b837988601d89aacde189.pdf", "s2Url": "https://semanticscholar.org/paper/117025a430aaa984dd260bea97531da221b634a4", "sources": [ "DBLP" ], "title": "Scalable NUMA-aware Blocking Synchronization Primitives", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "1171eecf13da9beb57b06a3c6e8a323e89b8e1ff": { "authors": [ { "ids": [ "1999972" ], "name": "Rachata Ausavarungnirun" }, { "ids": [ "34348348" ], "name": "Joshua Landgraf" }, { "ids": [ "33769839" ], "name": "Vance Miller" }, { "ids": [ "33801185" ], "name": "Saugata Ghose" }, { "ids": [ "34726949" ], "name": "Jayneel Gandhi" }, { "ids": [ "1692790" ], "name": "Christopher J. Rossbach" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3123939.3123975", "doiUrl": "https://doi.org/10.1145/3123939.3123975", "entities": [ "Address space", "CAS latency", "CCIR System I", "Central processing unit", "Computer data storage", "General-purpose computing on graphics processing units", "Graphics processing unit", "High-level programming language", "IBM System i", "In-place algorithm", "Industry Standard Architecture", "Manual memory management", "Memory management", "Memory protection", "Overhead (computing)", "PCI Express", "Page (computer memory)", "Page table", "Paging", "Q-Bus", "Synergy", "Task parallelism", "Translation lookaside buffer" ], "id": "1171eecf13da9beb57b06a3c6e8a323e89b8e1ff", "inCitations": [ "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "0581754e392d4a648f6a7b7665e3561df8627157", "ecf5efd5fe18860b42a1abd198e94a868dbf944c" ], "journalName": "", "journalPages": "136-150", "journalVolume": "", "outCitations": [ "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "1eeb50d5f7937f65a910203ae61430ff8b969012", "2aa997522d212ab74163b986be211ffc7f3e9e34", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "483876864b9624483252f1124312c91d0f772437", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "533d720a8542b707c316d39cf5beeb58738af86d", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "73dd5dde28119e41dd0f0a07275b7f722c4619d2", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "33196b69eeec351efd5178eae5da92979bdc6fd7", "054e4a6966d54eb9fd207cf0484214201f46424a", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "0524b5c458a3eeda6b3e70fb26ac8f9431de5f93", "6ed152e43e15d69f63e2ea5d16e719ff81572c85", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "68073f621072d793e95b9562bf9a9245415d5a96", "aa3859a68bbee9bb68036e24d0239369788a8604", "53fbfdf34e08d419d6708bffeb32537ecd3b271f", "102fd9c66b2a5f71a4a3890bdb48a813d0650eaf", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2394c6644efa856f0da160a0f0031d74cd3b5000", "21bda5f42e92f535c29012746915f6dd06adb97a", "0036adadc90e4826b2f7fc157752eea459070c32", "16d04ef2dfec414ca5cddace341e3961d03df579", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "ab82581f2225072865c1bf49c0044b05e5afca30", "00156e79606084497789662dfaf59c3b54a10722", "05c56f4abc527fbf384ad011dc9c0a613955641a", "128dad9a25e99affd9c87101c4c2e1ac9988df61", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "feb02327bc9d6a086f83deddaccaac4616899a5a", "caacd536fa218ef5218021506ebc041e3f460064", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113", "73e8627ae91003e19183b17ad7b24923c20aafa3", "472392b93150be7bb0132511d71d686770c2c79b", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "8314d58a250867e083838d177a40946039903e7b", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "671958087f3c24e7b025019476be8918302270e2", "4308295a2eaef30be423520918ad224dc2f3ffe2", "25011a77c8478ed154721775d6284db8b268368c", "61d13a9a4a6cb66e2d5fcf4f75d97570dca8f3fe", "45fcaf11eaf31228a218a24663067dab509a1031", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "9e5ab6456ce2bd6be5f63c757134b8b3720d1785", "0d0c47a7e8b63e72b93787a2f8afb1c9905ac4b0", "26e72340c47b7348e1b1de285f89dd96cc925b27", "343a384d5476ead9496f96559aba5ad09e95e01e", "0a934c1fa360491bebaa6fb4d0348179b9713b2d", "40ccd404abbc52c306442fc7c396e50021d764e7", "35c3882db9e1b2bdf838122787968679595f61de", "19de90c933c20849c85d5428c8a643210b97ec83", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "2f74bb6831df639e9e7cba61e719ed11ab0728c2", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "05a55820da0430f3b7e68f54bcb2cb6427c8cf28", "1f1a1f0cd075cef63083c8ec15321021dbff2cfc", "85398d5f19157c91bf00da3d36210e72d57887e4", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "2d6f002477015469075954c6748a1a85af352c94", "28c552da5dc505fe23644cfddf7daaf06c355e45", "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "f3e9db1605922044ca4506dbee54841caf821a0a", "3aa3795ddeb410db291b4fe10f11e52264885e75", "50de0f6a952131dfe562c5b3836e5d934b39b939", "5ece19ddc8abc5454426deece280d0750972c2da", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "26512755e7f78e10390b409ed4de3378aba2bac8", "2ad29134da93304e72dd047ca99ec6cfef2b4990", "1d55a4505bebb74ab47ec2365a660fda39c40d14" ], "paperAbstract": "Contemporary discrete GPUs support rich memory management features such as virtual memory and demand paging. These features simplify GPU programming by providing a virtual address space abstraction similar to CPUs and eliminating manual memory management, but they introduce high performance overheads during (1) address translation and (2) page faults. A GPU relies on high degrees of thread-level parallelism (TLP) to hide memory latency. Address translation can undermine TLP, as a single miss in the translation lookaside buffer (TLB) invokes an expensive serialized page table walk that often stalls multiple threads. Demand paging can also undermine TLP, as multiple threads often stall while they wait for an expensive data transfer over the system I/O (e.g., PCIe) bus when the GPU demands a page.\n In modern GPUs, we face a trade-off on how the page size used for memory management affects address translation and demand paging. The address translation overhead is lower when we employ a larger page size (e.g., 2MB large pages, compared with conventional 4KB base pages), which increases TLB coverage and thus reduces TLB misses. Conversely, the demand paging overhead is lower when we employ a smaller page size, which decreases the system I/O bus transfer latency. Support for multiple page sizes can help relax the page size trade-off so that address translation and demand paging optimizations work together synergistically. However, existing page coalescing (i.e., merging base pages into a large page) and splintering (i.e., splitting a large page into base pages) policies require costly base page migrations that undermine the benefits multiple page sizes provide. In this paper, we observe that GPGPU applications present an opportunity to support multiple page sizes without costly data migration, as the applications perform most of their memory allocation en masse (i.e., they allocate a large number of base pages at once). We show that this en masse allocation allows us to create intelligent memory allocation policies which ensure that base pages that are contiguous in virtual memory are allocated to contiguous physical memory pages. As a result, coalescing and splintering operations no longer need to migrate base pages.\n We introduce Mosaic, a GPU memory manager that provides application-transparent support for multiple page sizes. Mosaic uses base pages to transfer data over the system I/O bus, and allocates physical memory in a way that (1) preserves base page contiguity and (2) ensures that a large page frame contains pages from only a single memory protection domain. We take advantage of this allocation strategy to design a novel in-place page size selection mechanism that avoids data migration. This mechanism allows the TLB to use large pages, reducing address translation overhead. During data transfer, this mechanism enables the GPU to transfer only the base pages that are needed by the application over the system I/O bus, keeping demand paging overhead low. Our evaluations show that Mosaic reduces address translation overheads while efficiently achieving the benefits of demand paging, compared to a contemporary GPU that uses only a 4KB page size. Relative to a state-of-the-art GPU memory manager, Mosaic improves the performance of homogeneous and heterogeneous multi-application workloads by 55.5% and 29.7% on average, respectively, coming within 6.8% and 15.4% of the performance of an ideal TLB where all TLB requests are hits.", "pdfUrls": [ "http://pages.cs.wisc.edu/~jayneel/papers/micro17_mosaic.pdf", "http://doi.acm.org/10.1145/3123939.3123975", "http://www.pdl.cmu.edu/PDL-FTP/NVM/17micro_mosaic.pdf", "https://people.inf.ethz.ch/omutlu/pub/mosaic-application-transparent-multiple-page-sizes-for-GPUs_micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1171eecf13da9beb57b06a3c6e8a323e89b8e1ff", "sources": [ "DBLP" ], "title": "Mosaic: a GPU memory manager with application-transparent support for multiple page sizes", "venue": "MICRO", "year": 2017 }, "1172aeb1c26e5a7226ecea213f5517bc1a1a4677": { "authors": [ { "ids": [ "17311929" ], "name": "Vicent Sanz Marco" }, { "ids": [ "28801862" ], "name": "Ben Taylor" }, { "ids": [ "1719750" ], "name": "Barry Porter" }, { "ids": [ "40514580" ], "name": "Zheng Wang" } ], "doi": "10.1145/3135974.3135984", "doiUrl": "https://doi.org/10.1145/3135974.3135984", "entities": [ "Apache Spark", "Batch processing", "Big data", "Branch predictor", "Central processing unit", "Data center", "FITS", "Jumpstart Our Business Startups Act", "Memory model (programming)", "Multi-core processor", "Online and offline", "Run time (program lifecycle phase)", "Server (computing)", "Throughput" ], "id": "1172aeb1c26e5a7226ecea213f5517bc1a1a4677", "inCitations": [ "fa51827854e9d58e22cf55a9d1a4ae6870f081a7" ], "journalName": "", "journalPages": "95-108", "journalVolume": "", "outCitations": [ "1ce00e33eef166aed0b2bf88a83bdc876e6ee609", "4318a7c3bfdbd34efb5df805a55eee6da20c60f1", "5dc3c465ef293f0ddc355196be1b7eead3dd588f", "092a1cf971fb8359d3293004c6f1de82f05f3afb", "228c64750a11823a712f7414711e3b073b861c28", "0e4b886ba6c47faa4c38fc33316ca9e6124eb37f", "37355bae823c553f2db5311c1c6aaeab3b53675e", "1c79346e409f764ddef3c2d15bab2bb7d5f24f20", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "56bf17fb68d1b32838066997a888325d72eea83d", "1f7048ce71e8eddd6b6643fad3477571f567cf4e", "14b2882599d56a68aab4e8bd3dea4664a40aaaf9", "0380c641bf8ffe814e3e48c6964438bfd40e3480", "043b307af412fc7f9005822e6dabbe4f9d983472", "af85d1ff59fca88d1bf118acfac417627de955f3", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "3000e77ed7282d9fb27216f3e862a3769119d89e", "064f0793b2b7af8e8fccbf62bf39976dc4ff5b7a", "0bde2adacbdb9a66ba3103e3f128a9d6f3ee032e", "60f068dea641df784a379411c57aa8f2b23d1a98", "06c15f48f0f71cb034936cee635bec0fc4992594", "28a9dca6faeead651539c700bef413203b2b876e", "5f6d5608cf1b3071c938a2271637fc555bf53231", "5f3f9223c5c9f896be099bc177929febad508407", "02fe9b425b78a0211ccfaa2710f949fa2a769406", "146139716c9e8ec4f57475b9673171761ac34074", "54f3331b575b2d451c2d716f86496cada23d596d", "a776115d6567d38ed345c8c93fb23c7ff335cb1a", "94fc1aa5d1ad0be589e74fed4357d757c3cfeaed", "280f49d0bbcc23780d6452f0aae6851f61b012bf", "43776b15c034076a36b7143d58af8e04715e41d0", "1041d3f00afb5f5a53196813ceb2ebfab6d0a6ee", "7e955610a48f0a314251df3fa6a4a0430c3e0beb", "0bf963bd1fea6b6efdbfb1e829f1db562e367c11", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "37601bb6e655f2392ba1ca2086da0d1e03e19edc", "50f033f43478f949b7794c6a569d484b5705cfbd", "30e159925f150fd42e5b519820b7b6a02206e58f", "3605b9befd5f1b53019b8edb3b3d227901e76c89", "0cd29f73c2a6b073812e16c34182ace1de98b91a", "bb628d1c557f4c2d34549e67af9e724fc4753816", "9ee6209432316baf6776838917e06bca4d874747", "9700c76ee9dd6360d8fa366f983a7e7e5a1e9b9a", "7a978f2902460e732c50c36a171deb11733df1fc", "02d77effda6a81bbb3e15d95b2acfb6542d5b607", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "18c18cc0118afd086ea7259bec1d6157a105a5c8", "d0d2e2924e7258092af15581f90760bfda25f825", "381ce8b53187acf678cd47a189439098f56d75e0", "031f7b86cd2dcc219b93b4d23169024680b2aa30", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "26e32867cf2face66b528d446b6c84075b64e43d", "667ba06f64d3687d290a899deb2fdf7dabe7fd1c", "06545f48a6b25a3cafd76e514b2310254972888b", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "a9c1fa73f2c830f1f1d8526042a357a697dcfab4", "9c378565a0b510890b474df039caab1f2d58bded", "0ecad2b630fce029c1b7b577ed56e18fbba001ce" ], "paperAbstract": "Data analytic applications built upon big data processing frameworks such as Apache Spark are an important class of applications. Many of these applications are not latency-sensitive and thus can run as batch jobs in data centers. By running multiple applications on a computing host, task co-location can significantly improve the server utilization and system throughput. However, effective task co-location is a non-trivial task, as it requires an understanding of the computing resource requirement of the co-running applications, in order to determine what tasks, and how many of them, can be co-located. State-of-the-art co-location schemes either require the user to supply the resource demands which are often far beyond what is needed; or use a one-size-fits-all function to estimate the requirement, which, unfortunately, is unlikely to capture the diverse behaviors of applications.\n In this paper, we present a mixture-of-experts approach to model the memory behavior of Spark applications. We achieve this by learning, off-line, a range of specialized memory models on a range of typical applications; we then determine at runtime which of the memory models, or experts, best describes the memory behavior of the target application. We show that by accurately estimating the resource level that is needed, a co-location scheme can effectively determine how many applications can be co-located on the same host to improve the system throughput, by taking into consideration the memory and CPU requirements of co-running application tasks. Our technique is applied to a set of representative data analytic applications built upon the Apache Spark framework. We evaluated our approach for system throughput and average normalized turnaround time on a multi-core cluster. Our approach achieves over 83.9% of the performance delivered using an ideal memory predictor. We obtain, on average, 8.69x improvement on system throughput and a 49% reduction on turnaround time over executing application tasks in isolation, which translates to a 1.28x and 1.68x improvement over a state-of-the-art co-location scheme for system throughput and turnaround time respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135984", "http://eprints.lancs.ac.uk/87581/4/middleware17.pdf", "https://arxiv.org/pdf/1710.00610v1.pdf", "http://arxiv.org/abs/1710.00610" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1172aeb1c26e5a7226ecea213f5517bc1a1a4677", "sources": [ "DBLP" ], "title": "Improving spark application throughput via memory aware task co-location: a mixture of experts approach", "venue": "Middleware", "year": 2017 }, "1188cabcb873910161587c0b0592a069c2dd6166": { "authors": [ { "ids": [ "1806110" ], "name": "Shiping Chen" }, { "ids": [ "32067673" ], "name": "You Zhou" }, { "ids": [ "38125149" ], "name": "Shigang Chen" } ], "doi": "10.1109/CLOUD.2017.29", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.29", "entities": [ "Bandwidth management", "Centralisation", "Cloud computing", "Counter (digital)", "Data center", "Network traffic control", "Requirement", "Software-defined networking", "Stock and flow", "Telephone exchange", "Virtual machine" ], "id": "1188cabcb873910161587c0b0592a069c2dd6166", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "163-170", "journalVolume": "", "outCitations": [ "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "f9496eb065cc63b7735dc760c17d15fedd6f8992", "7a278ee0578f194700cadc3811cdda4ec751f88a", "2024310bdbb768baea08da6e9b4a00eeb8d75cc6", "81762f0a249edb584fdf6193497c7778e48e5f5b", "5fec11e738e3e666dca2db0e4766a4e9626ebb64", "a15161f791f848454672b6be2203756f3374acfb", "2bac875425b84096a0084cf8024d5e66b521297a", "760dcaff03ac4facd699074d7ea55b8540139688", "f4b861d11b7c541e0e9b653501d11c19d4fca6c4", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "3967126afbca6a722d7257cd671fe5e4979358a5" ], "paperAbstract": "Software-defined datacenters combine centralized resource management, software-defined networking, and virtualized infrastructure to meet diverse requirements of cloud computing. To fully realizing their capability in traffic engineering and flow-based bandwidth management, it is critical for the switches to measure network traffic for both individual flows between virtual machines and aggregate flows between clusters of physical or virtual machines. This paper proposes a novel hierarchical traffic measurement scheme for software-defined datacenter networks. It measures both aggregate flows and individual flows that are organized in a hierarchy with an arbitrary number of levels. The measurement is performed based on a new concept of hierarchical virtual counter arrays, which record each packet only once by updating a single counter, yet the sizes of all flows that the packet belongs to will be properly updated. We demonstrate that the new measurement scheme not only supports hierarchical traffic measurement with accuracy, but does so with memory efficiency, using a fewer number of counters than the number of flows.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.29" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1188cabcb873910161587c0b0592a069c2dd6166", "sources": [ "DBLP" ], "title": "Efficient Hierarchical Traffic Measurement in Software-Defined Datacenter Networks", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "1190febbdff90a6ded3832507a9dd17f5898b9c4": { "authors": [ { "ids": [ "1900699" ], "name": "George Michelogiannakis" }, { "ids": [ "2277880" ], "name": "Khaled Z. Ibrahim" }, { "ids": [ "1746446" ], "name": "John Shalf" }, { "ids": [ "3312441" ], "name": "Jeremiah J. Wilke" }, { "ids": [ "30477448" ], "name": "Samuel Knight" }, { "ids": [ "1778389" ], "name": "Joseph P. Kenny" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Byte", "Computation", "FLOPS", "Fat tree", "Heuristic", "High- and low-level", "Network topology", "Procurement", "Scalability", "Simulation", "Tree network" ], "id": "1190febbdff90a6ded3832507a9dd17f5898b9c4", "inCitations": [ "34f7dcf1f75a398da5ab6bdf62145accb64c8971" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "228-237", "journalVolume": "", "outCitations": [ "4b81b43bc42852730fd97a699764ecf5e648fb3c", "3c3fcd7a025f50bf598de03d41bc0fd00660f11f", "630b514e68c0de62fa3dca5a45e3131f1515c90c", "7d1a0dc43fa64aad6bbab3faed6eb820ca4642a9", "01e9cc3ac7805e043e1effac588cd5dab9d1480d", "e1055f373c566b7dc3bd204c051289a7a7ef6b66", "b5c3d39ce7149caf869a6a1a1e1b47a8fe9b91f0", "ff71759a3efa271670c1e7820873df872b4ca3b9", "bb65ddf274ef42e05da09bfb080a97d035876cff", "132eb0d09a6b1b18974bec326b6aa45b363f06a0", "5f8991828def57d2f0cda942566afff56740d150", "00a0ce824021313ee63c72e7bf05a4c708233cb0", "9a2915942682453ef24d50c09a154b2398172f39", "31cfefc79d64ede4c13f231b8b30ebfd45666d3d", "238c07107a2a4ff72a4f225d8aad8e34decfa716", "1318207254101caf48df5827ce276d37ac20de40", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "7ec74c4725e8b69db335ac6112d1b798e30aaf91", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "761b2e26efba7512704ffb768bf101a8025c257f", "4f08b68a94563a247f092effbdd46281f82a6b9a", "30b90ca62f5daad23308574b9e9beae0f225cfe7", "429d28998216da5648f40248bf4bc9e508edd2fd", "00275e6d134aea5602b048fc587cf33d6860d6a9", "256774b46b3265ae950ea3717e5a2d0c51ab2b55", "58d5e76b2c8404efafd2c48b8983cae2d7ad419a", "663e064469ad91e6bda345d216504b4c868f537b", "029920d4fe0dcd2677f9944b5ed62314a21ca4ec", "58232f5111cfd9d1ea6809324e9711be1b06b5b4", "a873f4e4bb24922d1748a8b3dedb464087b866c3", "18162120b3c1a48d8b85bf4a207c13b65a0eb712", "b148119329dbcedc01ecafb47218939e1f50d47a", "aeabbc5e83f8e2df70a15555c5f251d66a62d9cb", "917fc743f23a795cf86d65da9f20b3f67dbbb7dd", "9073fa56ea32f8ef48676ff8c7cc47c3c991d664", "061f972cdf3281f6e9f63dd67b0db11ef313579e", "2f5e593d29a5eb8b3f7c65e4e5c740b792933757", "42e5e97272ad8728749f861ed7a920707e698778", "9fe7cd5e5ea6e3d4e54dca2ead453629b28a80eb", "ed2b5f9a2ca37e55052eafbf5abc166245cf7995", "c96d3e9fef5f39b4fc720f4d4d4da13b82472c5b", "f57ac7f53438b2877022125bac957fda2bb2a97b", "96915117da9f9ae3578beffb83c198aa8456263f", "63121ae5e4bca65774a9ac89223a2e44a687bd30", "9bb6ee03d15def91dd6d99e6cf0dfbf503964a5a", "521638b43bf96d1b957b4f344d7d7be6e9f59533", "317b05a4e53fa3e7150bcc5bd65e2bdd6502b0ec" ], "paperAbstract": "The power and procurement cost of bandwidth in system-wide networks has forced a steady drop in the byte/flop ratio. This trend of computation becoming faster relative to the network is expected to hold. In this paper, we explore how cost-oriented task placement enables reducing the cost of system-wide networks by enabling high performance even on tapered topologies where more bandwidth is provisioned at lower levels. We describe APHiD, an efficient hierarchical placement algorithm that uses new techniques to improve the quality of heuristic solutions and reduces the demand on high-level, expensive bandwidth in hierarchical topologies. We apply APHiD to a tapered fat-tree, demonstrating that APHiD maintains application scalability even for severely tapered network configurations. Using simulation, we show that for tapered networks APHiD improves performance by more than 50% over random placement and even 15% in some cases over costlier, state-of-the-art placement algorithms.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101144", "https://pubarchive.lbl.gov/islandora/object/ir:1007126/datastream/PDF/download/citation.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1190febbdff90a6ded3832507a9dd17f5898b9c4", "sources": [ "DBLP" ], "title": "APHiD: Hierarchical Task Placement to Enable a Tapered Fat Tree Topology for Lower Power and Cost in HPC Networks", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "119386d04e88206a15a77a8f45fb27fc740aff65": { "authors": [ { "ids": [ "2177995" ], "name": "Sunimal Rathnayake" }, { "ids": [ "32919563" ], "name": "Dumitrel Loghin" }, { "ids": [ "1805512" ], "name": "Yong Meng Teo" } ], "doi": "10.1109/ICPP.2017.43", "doiUrl": "https://doi.org/10.1109/ICPP.2017.43", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Cloud computing", "Elasticity (cloud computing)", "Gradient", "Pareto efficiency", "Run time (program lifecycle phase)", "Scalability", "Simulation" ], "id": "119386d04e88206a15a77a8f45fb27fc740aff65", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "342-351", "journalVolume": "", "outCitations": [ "24251f02c34f32b1dd96572a1d984c4463a26a10", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "3ebcf69ea430e1397328ab2351cd1e85c6edd0cd", "31b433f8314c833ca870d841862aec0a86c6818e", "04e24be4c25539e4b4fa0498c85b3a3e2d026c02", "b76d259d4cfb68cc143cd1109138eca0d8ac8ce9", "aa5ae099220737abdd2bf6cf82861b61ba2e440d", "7536df9bbbef09ec5ddc4d67a954cdebb1bcaa34", "089c89f54c5dd0d2a873fbfc19183667d3be5b66", "5b02cf69f2f9efe0cb61c922974748d10d1506af", "a810c1766fb3f4c6c9beda22b77e7d69cafd36bd", "4e4348913b3198ae51b784db893938ae3afecaf5", "d73421f5d46e12a121942d42463b0133bbd7a433", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "a4ba539642998a6b9893dde2af8aee55c15b9f39", "813e836347456fb2d61c8feb6a71b0a4165c5c6d", "5c2fd3ada3458afb64213e3ec978bce68c96f2bd", "39879b7becd8ab4662008f8da92538fe368f939c", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "916ddf79bfc624503032d2e9e9219816d909ef64", "4ecd4b8851721eaf04626c721167d73fe0b0dd0e" ], "paperAbstract": "Clouds offer great flexibility for scaling applications due to the wide spectrum of resources with different cost-performance, inherent resource elasticity and pay-peruse charging. However, determining cost-time-efficient cloud configurations to execute a given application in the large resource configuration space remains a key challenge. The growing importance of elastic applications for which the accuracy is a function of resource consumption introduces new opportunities to exploit resource elasticity on clouds. In this paper, we introduce CELIA, a measurement-driven analytical modeling approach to determine cost-time-optimal cloud resource configurations to execute a given elastic application with a time deadline and a cost budget. We evaluate CELIA with three representative elastic applications on more than ten million configurations consisting of Amazon EC2 resource types with different cost-performance. Using CELIA, we show that multiple cost-time Pareto-optimal configurations exist among feasible cloud configurations that execute an elastic application within a time deadline and cost budget. These Pareto-optimal configurations exhibit up to 30% cost savings for an elastic application representing n-body simulation. We investigate the impact of fixed-time scaling on the cost of executing elastic applications on cloud. We show that cost gradient with respect to resource demand is smaller when cloud resources with better cost-performance are used. Furthermore, we show that the relative increase in cost is always smaller compared to the relative reduction of execution time deadline. For example, tightening the execution time deadline by two-thirds incurs only 40% increase in cost for the n-body simulation application.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~sunimalr/papers/2017-ICPP-CELIA-paper.pdf", "http://www.comp.nus.edu.sg/~teoym/pub/17/2017-ICPP-CELIA.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.43" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/119386d04e88206a15a77a8f45fb27fc740aff65", "sources": [ "DBLP" ], "title": "CELIA: Cost-Time Performance of Elastic Applications on Cloud", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "119e2f92baa9d2079f32f29089b29f4aa2ecaae7": { "authors": [ { "ids": [ "1717283" ], "name": "Abolfazl Asudeh" }, { "ids": [ "3038200" ], "name": "Azade Nazi" }, { "ids": [ "36856144" ], "name": "Nan Zhang" }, { "ids": [ "1690440" ], "name": "Gautam Das" } ], "doi": "10.1145/3035918.3035932", "doiUrl": "https://doi.org/10.1145/3035918.3035932", "entities": [ "Algorithm", "Approximation algorithm", "Computation", "Convex hull", "Database", "Experiment", "Linear function", "Linear function (calculus)", "Maxima", "Ranking (information retrieval)", "Regret (decision theory)", "Scalability", "Synthetic data", "Time complexity" ], "id": "119e2f92baa9d2079f32f29089b29f4aa2ecaae7", "inCitations": [ "23323ba9634f395d01e01a1b3f196e5e14e5d6b1", "4d77b6b7a8b7954a99d54280cc7f3c84a85c29e8" ], "journalName": "", "journalPages": "821-834", "journalVolume": "", "outCitations": [ "5dbe84872b42cb6167d3b601fdf68b4eb2d7f5d9", "0f7e7d9add82a37b44764bd706b60fddd97b13fd", "627948936c3c3d99ff539adf3a6e7cb59dd223b7", "23be8286c280f45599625fda69cd708d68453e04", "bfffeceec24b343526a0b2125edcaae1642dcd2e", "1a5fe34a83e3b6f08b628b7af2d915876ba13819", "04ff502fe2cd481a151230fb890d7ae9a093f561", "4b70aada3dbbf70608a9461afa586f76f271c5a2", "23ba8478c076752e21d57d43f82e32ac13b69a6b", "6156864e6fd0836726fc7e7ce1d5bdcff112fe27", "0d1537ac7c69142c02547bbccd5a8bdd5693edfd", "279c6d90821d46703fe6c17daa9c64064cc2044a", "561b0881fb83c7182bca4aec70bd287ea0f5be28", "383aea82514e1d148ec6dd81796ae5a64d50725d", "6cac85a7bc11b9cab6cf971ff0bc4628b380ee2f", "9cc857b1e9c4a8ad6b3b0c774a13bff9e5d01c77", "10927ae8a906b482330a95b411285e2e4e407ca6", "4e0f71061c47f45fd1281875db38902540f59c95", "7e4cb3ca74b9e0d83cb53340d4ead2331cc8328c", "3a456b2933cb09bc192b390defe3e479e85ba595", "959258cb7ff636fee908e6f6877388081ca706b6", "17d3d9783e4d2793fbf6ee18a490034d99534253", "114fd5089776a0562cf4e8276049cc11222fe51f", "2569420c9ed2ea744216351b803f68f9cceb5ce6", "ebb133a407796154622c625e54c57ead21e322da" ], "paperAbstract": "Finding the maxima of a database based on a user preference, especially when the ranking function is a linear combination of the attributes, has been the subject of recent research. A critical observation is that the em convex hull is the subset of tuples that can be used to find the maxima of any linear function. However, in real world applications the convex hull can be a significant portion of the database, and thus its performance is greatly reduced. Thus, computing a subset limited to $r$ tuples that minimizes the regret ratio (a measure of the user's dissatisfaction with the result from the limited set versus the one from the entire database) is of interest.\n In this paper, we make several fundamental theoretical as well as practical advances in developing such a compact set. In the case of two dimensional databases, we develop an optimal linearithmic time algorithm by leveraging the ordering of skyline tuples. In the case of higher dimensions, the problem is known to be NPcomplete. As one of our main results of this paper, we develop an approximation algorithm that runs in linearithmic time and guarantees a regret ratio, within any arbitrarily small user-controllable distance from the optimal regret ratio. The comprehensive set of experiments on both synthetic and publicly available real datasets confirm the efficiency, quality of output, and scalability of our proposed algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035932", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/09/EfficientRegretRatio.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/119e2f92baa9d2079f32f29089b29f4aa2ecaae7", "sources": [ "DBLP" ], "title": "Efficient Computation of Regret-ratio Minimizing Set: A Compact Maxima Representative", "venue": "SIGMOD Conference", "year": 2017 }, "11c57b6e63184c35fd1999a19c7969170f038973": { "authors": [ { "ids": [ "1734058" ], "name": "Hong Zhang" }, { "ids": [ "1723513" ], "name": "Hai Huang" }, { "ids": [ "9467302" ], "name": "Liqiang Wang" } ], "doi": "10.1109/IPDPS.2017.100", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.100", "entities": [ "Apache Hadoop", "Apache Hive", "Backward compatibility", "Cloud computing", "Commodity computing", "Computational complexity theory", "Experiment", "Jumpstart Our Business Startups Act", "Microsoft Azure", "Query language" ], "id": "11c57b6e63184c35fd1999a19c7969170f038973", "inCitations": [ "3ebb5abf41521032df5ace422a3fe696ea5f87ef", "7222cea092f37acac75488e425a029758677ef71", "0a87b1b2e089d8cf80b4b26cbb191f4648937732", "411c3c55361f2287b1a66cc52a93df1e7ed94863" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "459-468", "journalVolume": "", "outCitations": [ "23de558a10458c1be3062412f134500605eada98", "86c8f8c0cad85b189feade4b31f36d56ebd9f6c8", "47947ed7d4c12855b1b5a4c4ec3123528761d64b", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "93e67a1f3f0371114055de4db489385ea133ebc3", "706cd2c450fb7054c92916b300513a266a207652", "0541d5338adc48276b3b8cd3a141d799e2d40150", "25233d201be5af4e1e8926d742af678ca5938223", "797d93472c6aed26056de317c4a4cae0fd6e65aa", "0fa5455a3241fca461be6c14d0f296c394cadd85", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "7c3e88b0c762065bd0d974cb3d67a1e61479f647", "7207036cfb1e5e0aa74756e395dfc9dd94e46af0", "e20b3b988310a436d38a2fc310dbcc3b12f5a54d", "d24bbf5ad9b068aa476d30e3bb898c2e99942744", "089c89f54c5dd0d2a873fbfc19183667d3be5b66", "2f79404e566af175f03e94827383c3d7b43f4e31", "029068a33f6e9f9ba0ddfe5498a67e4c0d349d2f", "2988e34168fa91398fa397baf823af2063893e9c", "16139ba6fa6ad2828c20abdf5d9f34687836f932" ], "paperAbstract": "Data have been generated and collected at an accelerating pace. Hadoop has made analyzing large scale data much simpler to developers/analysts using commodity hardware. Interestingly, it has been shown that most Hadoop jobs have small input size and do not run for long time. For example, higher level query languages, such as Hive and Pig, would handle a complex query by breaking it into smaller adhoc ones. Although Hadoop is designed for handling complex queries with large data sets, we found that it is highly inefficient to operate at small scale data, despite a new Uber mode was introduced specifically to handle jobs with small input size. In this paper, we propose an optimized Hadoop extension called MRapid, which significantly speeds up the execution of short jobs. It is completely backward compatible to Hadoop, and imposes negligible overhead. Our experiments on Microsoft Azure public cloud show that MRapid can improve performance by up to 88% compared to the original Hadoop.", "pdfUrls": [ "http://www.cs.ucf.edu/~lwang/papers/IPDPS2017.pdf", "https://doi.org/10.1109/IPDPS.2017.100" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/11c57b6e63184c35fd1999a19c7969170f038973", "sources": [ "DBLP" ], "title": "MRapid: An Efficient Short Job Optimizer on Hadoop", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "11dd2c169928711e46b84920fd3ef20c5b0f813b": { "authors": [ { "ids": [ "3141371" ], "name": "Sylvan Clebsch" }, { "ids": [ "3346872" ], "name": "Juliana Franco" }, { "ids": [ "1758168" ], "name": "Sophia Drossopoulou" }, { "ids": [ "25635802" ], "name": "Albert Mingkun Yang" }, { "ids": [ "1756657" ], "name": "Tobias Wrigstad" }, { "ids": [ "1729644" ], "name": "Jan Vitek" } ], "doi": "10.1145/3133896", "doiUrl": "https://doi.org/10.1145/3133896", "entities": [ "Actor model", "Concurrent computing", "FreeType", "Garbage collection (computer science)", "Immutable object", "Message passing", "ORCA", "Programming language", "Pseudocode", "Race condition", "Scalability", "Type system", "Zero-copy" ], "id": "11dd2c169928711e46b84920fd3ef20c5b0f813b", "inCitations": [ "0bbb4fa1e9fc1abd476c3ce6c263521b7466c460", "829a2dfa16642a2967cc0edded1a52db2a1911ca" ], "journalName": "PACMPL", "journalPages": "72:1-72:28", "journalVolume": "1", "outCitations": [ "3a19a01929ce6945815142cbbf3e044829cec297", "61e55f6192c69cfab59411dec198d18db648ffa7", "471c946ff9e6a57f5367426d3d90342d7e5fe785", "3b45b71c66f7fe7f5160c253761619278efc9e17", "056f0abc818b697b48049a015ebc646d5f246010", "208aee805004d575035284b1c232209e9fa26b0e", "43393a561914f05be312a1dff5a757cbc384d1a1", "62f0aaa7146794f52d10a71c4ef28d64f7c77670", "bade6bdd6d10ece21b8b6828a54123f34ba28183", "0647391355016a20c70fe41839a3ff501f7c70eb", "65a4f56ce35d80eef26da64a666a2d60ff928462", "9e16933cb68789b8334f4dc5b01edba15f3e90f3", "22f151e4ea96bb05b5619494ff266891832a54eb", "320a3daa763dec229ca19fe4ec017ecae83b4011", "093cc9c91192ec3332793d2f27fe4b46ce55bb4d", "301221c5baa4192ed2ad6f4c6e9cf0f91430feb2", "0ac1b0ff1cdb2ee84d5634226950c7ceed1ec8c1", "cc840c21a958f1327efcadb2739d6ad5e933d832", "16c84bbabf5977dcb742791baea24a6f513505dd", "8a33c47c2a3f0e46dbb30f5203b6a1c6d8fefd8f", "c40b12bec98b1f70d596e4b1ef9c1efa9ecc532a", "2a4b5d93785b3432c1a8cc8a150c6c884c462d83", "02a3bc1fab286d792dcd00b13fa2cd5ee0b2d1be", "3f24c7f2fa782a96715820e7be0b6b6f79a30831" ], "paperAbstract": "ORCA is a concurrent and parallel garbage collector for actor programs, which does not require any STW steps, or synchronization mechanisms, and that has been designed to support zero-copy message passing and sharing of mutable data. ORCA is part of a runtime for actor-based languages, which was co-designed with the Pony programming language, and in particular, with its data race free type system. By co-designing an actor language with its runtime, it was possible to exploit certain language properties in order to optimize performance of garbage collection. Namely, ORCA relies on the guarantees of absence of race conditions in order to avoid read/write barriers, and it leverages the actor message passing, for synchronization among actors. \n In this paper we briefly describe Pony and its type system. We use pseudo-code in order to introduce how ORCA allocates and deallocates objects, how it shares mutable data without requiring barriers upon data mutation, and how can immutability be used to further optimize garbage collection. Moreover, we discuss the advantages of co-designing an actor language with its runtime, and we demonstrate that ORCA can be implemented in a performant and scalable way through a set of micro-benchmarks, including a comparison with other well-known collectors.", "pdfUrls": [ "http://janvitek.org/pubs/oopsla17a.pdf", "http://www.diva-portal.org/smash/get/diva2:1160319/FULLTEXT01.pdf", "http://doi.acm.org/10.1145/3133896" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/11dd2c169928711e46b84920fd3ef20c5b0f813b", "sources": [ "DBLP" ], "title": "Orca: GC and type system co-design for actor languages", "venue": "PACMPL", "year": 2017 }, "11f2a7da97ddc0c45b63c2b9a5b08e287c779381": { "authors": [ { "ids": [ "2314055" ], "name": "Deepak K. Tosh" }, { "ids": [ "1719516" ], "name": "Sachin Shetty" }, { "ids": [ "5868759" ], "name": "Xueping Liang" }, { "ids": [ "1769233" ], "name": "Charles A. Kamhoua" }, { "ids": [ "1723424" ], "name": "Kevin A. Kwiat" }, { "ids": [ "8829863" ], "name": "Laurent Njilla" } ], "doi": "", "doiUrl": "", "entities": [ "Bitcoin", "Cloud computing", "Consensus (computer science)", "Cryptography", "Digital asset", "Distributed computing", "Greedy algorithm", "Immutable object", "Peer-to-peer", "Rogue", "Simulation", "Tamper resistance" ], "id": "11f2a7da97ddc0c45b63c2b9a5b08e287c779381", "inCitations": [ "f3548c62a1c8b39d3379e5b230c62de2740a1731", "be19f18007845e0b4d10a9b6f63acf67a8e7b70e", "10b9d7206f16f27408e9e3472de576b12c6ea464", "178738930dc750ef8cf70f1dc7fbab6edca0d184", "f317a3f472c491d04e53845d43234e138450a522", "1fe0f64ab3695071eae144ebacf8b273e359614c", "cf1dcc9d8ae1655b1717c531c7d74d4b2e853750", "1752b34d9e8fd906052a59022e9cad5af37dee73" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "458-467", "journalVolume": "", "outCitations": [ "be19f18007845e0b4d10a9b6f63acf67a8e7b70e", "1d4abd83093f1343ee1f5b8ffb3c5999e3754c90", "25a17e483599215949cb3961fd945f6867d3bcae", "c27762257f068fdbb2ad34e8f787d8af13fac7d1", "48326c5da8fd277cc32e1440b544793c397e41d6", "1a466f3195490a0f5325c994f4ba14e02eae55b9", "b4925bd986101dbc87dc9c4661326b2a357e8fa3", "00f763e99bd9d1aa45350536d480e05851a055eb", "12d854f326b43232d906eb323db5d282786acb9d", "48b1a1b0db3dafbb4a82c2cdf8e1580dc73dfa1f", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "d827a9dfd11722249a375aa478ef2c849079f94c" ], "paperAbstract": "The blockchain technology has emerged as an attractive solution to address performance and security issues in distributed systems. Blockchain's public and distributed peer-to-peer ledger capability benefits cloud computing services which require functions such as, assured data provenance, auditing, management of digital assets, and distributed consensus. Blockchain's underlying consensus mechanism allows to build a tamper-proof environment, where transactions on any digital assets are verified by set of authentic participants or miners. With use of strong cryptographic methods, blocks of transactions are chained together to enable immutability on the records. However, achieving consensus demands computational power from the miners in exchange of handsome reward. Therefore, greedy miners always try to exploit the system by augmenting their mining power. In this paper, we first discuss blockchain's capability in providing assured data provenance in cloud and present vulnerabilities in blockchain cloud. We model the block withholding (BWH) attack in a blockchain cloud considering distinct pool reward mechanisms. BWH attack provides rogue miner ample resources in the blockchain cloud for disrupting honest miners' mining efforts, which was verified through simulations.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101175" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/11f2a7da97ddc0c45b63c2b9a5b08e287c779381", "sources": [ "DBLP" ], "title": "Security Implications of Blockchain Cloud with Analysis of Block Withholding Attack", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "11f5a54b9dfcaed90f69899d1bc9a85e54ce167f": { "authors": [ { "ids": [ "1679363" ], "name": "Sudipto Guha" }, { "ids": [ "2496686" ], "name": "Yi Li" }, { "ids": [ "35428741" ], "name": "Qin Zhang" } ], "doi": "10.1145/3087556.3087568", "doiUrl": "https://doi.org/10.1145/3087556.3087568", "entities": [ "Algorithm", "Algorithm design", "Cluster analysis", "Computational complexity theory", "Data point", "Database", "Defense Distributed", "Distributed algorithm", "K-means clustering", "Metric k-center", "Pervasive informatics", "Time complexity", "Uncertain data" ], "id": "11f5a54b9dfcaed90f69899d1bc9a85e54ce167f", "inCitations": [ "7df5efc8036f4c4a281346ca929cc81db39a091e", "712ee1f295cc473d5126abe9c6221986f25116c7", "7e0695d65ad3aedaa30bb7aaf28edc432ac711e7" ], "journalName": "", "journalPages": "143-152", "journalVolume": "", "outCitations": [ "0b490334ae4d06330bcfecbf7184b7c458a1f6e9", "6890040ee813e041d4777f921c743fa86db8b1a6", "88c30a1d7135af95490186c5b5824abba5398868", "04ac298d0516535289950588dfe850bbe8c68d7e", "20736a265f1fa739f63fede6b5b696cdcc229d77", "3b1d3572df1b1941c311cc4d4212c7da59f81d6f", "1add794ba468395711fa3e1c5899e7675ba27783", "1c36811b51717620b22cf7314bd52df39cb4f00e", "40649998fce80b714c49b66cf3f93d48bbc0ef04", "05e77cb441c5bfddca223b1170faf61b2e20315a", "1cdd3c62172b7598cd090e349d38e9644734edfd", "2a1db0387c25d0b701b641541470c8d1d5e0b356", "3c749cb52c322fb8436b9a80617c37f4da16b4fe", "121d996c4bb4bfd15cd857c72b42e176eb5db7d4", "093f1eed67998b5820c411bf711a8106bbce0ba7", "42bab63a1c36227b7b200a4af5682f366cac23d9", "32a24c1a89584cc7d1a19b62e5b4fc3605966696", "0d946bf624ec4b3b7b238a19214f7fed25acbc5e", "1fd6bacb90edf0f951e89a54bd54294db821c268" ], "paperAbstract": "Recent years have witnessed an increasing popularity of algorithm design for distributed data, largely due to the fact that massive datasets are often collected and stored in different locations. In the distributed setting communication typically dominates the query processing time. Thus it becomes crucial to design communication efficient algorithms for queries on distributed data. Simultaneously, it has been widely recognized that partial optimizations, where we are allowed to disregard a small part of the data, provide us significantly better solutions. The motivation for disregarded points often arise from noise and other phenomena that are pervasive in large data scenarios.\n In this paper we focus on partial clustering problems, k-center, k-median and k-means, in the distributed model, and provide algorithms with communication sublinear of the input size. As a consequence we develop the first algorithms for the partial k-median and means objectives that run in subquadratic running time. We also initiate the study of distributed algorithms for clustering uncertain data, where each data point can possibly fall into multiple locations under certain probability distribution.", "pdfUrls": [ "http://homes.soic.indiana.edu/qzhangcs/papers/dist-outliers-full.pdf", "http://arxiv.org/abs/1703.01539", "https://arxiv.org/pdf/1703.01539v1.pdf", "https://arxiv.org/pdf/1703.01539v3.pdf", "http://doi.acm.org/10.1145/3087556.3087568", "http://homes.soic.indiana.edu/qzhangcs/papers/spaa17-outlier.pdf", "https://arxiv.org/pdf/1703.01539v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/11f5a54b9dfcaed90f69899d1bc9a85e54ce167f", "sources": [ "DBLP" ], "title": "Distributed Partial Clustering", "venue": "SPAA", "year": 2017 }, "1210f11452d21c5bdc9261642b274b6cde0d7f55": { "authors": [ { "ids": [ "7280145" ], "name": "Fuliang Li" }, { "ids": [ "1780398" ], "name": "Jiannong Cao" }, { "ids": [ "1692648" ], "name": "Xingwei Wang" }, { "ids": [ "2302799" ], "name": "Yinchu Sun" }, { "ids": [ "3404790" ], "name": "Yuvraj Sahni" } ], "doi": "10.1109/CLOUD.2017.25", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.25", "entities": [ "Centralisation", "Cloud computing", "End-to-end principle", "Quality of service", "Routing", "Scheduling (computing)", "Software-defined networking", "Stock and flow", "Throughput" ], "id": "1210f11452d21c5bdc9261642b274b6cde0d7f55", "inCitations": [ "b3a31c4b6c951890ec59af0e3425fab5c901b343" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "130-137", "journalVolume": "", "outCitations": [ "7c5c33ba70397f1f40670bf8ab68d1562520a43c", "9aad0d11deaa0dcfc3a8af9bdbb9a7b0b74dc486", "c91da75958eacb59b5726c38e0c2268ea0e704d7", "2f3af20917549e007701f93d125e1ee00f6fa436", "26ac80c9d6a3385b9dd5b68d9e281a3ebb95d7bf", "67a7f077e3a90fe20abf0bc105f3f00dcbf927fa", "33a9956fde4b5c18093b045837735f3c240091fe", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "06845215adcdfcff316effc272ab6f7bf764f71a", "04b13d3c4c6480c2c78f9f80f6c9f1ff729e1a99", "3d9bf12bd10b0a3898c8780b69c34c4bf0530364", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "31cf19d1a5663313c19c32ace9936770d2efc99c", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "2885c2ba669ea8338900ef827e527aaea9d7d279", "8498624bbc3c907716c3f0e586f0c0c2deb1583d", "53bf890ddba4d6433e868c0a73a529243c23591c", "dd6dafc2c3a7cfd66ff6c1e95bc47e1264381b8e", "a87695adf8e3b534b86b9a52f8346c17266514b8", "5630345175e180b2dbe9f201a9dbbc2ea6ca6ac8" ], "paperAbstract": "Due to the centralized control, network-wide monitoring and flow-level scheduling of Software-Defined-Networking (SDN), it can be utilized to achieve Quality of Service (QoS) for cloud applications and services, such as voice over IP, video conference and online games, etc. However, most existing approaches stay at the QoS framework design and test level, while few works focus on studying the basic QoS techniques supported by SDN. In this paper, we enable SDN with QoS guaranteed abilities, which could provide end-to-end QoS routing for each cloud user service. First of all, we implement an application identification technique on SDN controller to determine required QoS levels for each application type. Then, we implement a queue scheduling technique on SDN switch. It queues the application flows into different queues and schedules the flows out of the queues with different priorities. At last, we evaluate the effectiveness of the proposed SDN-based QoS technique through an experimental analysis. Results show that when the output interface has sufficiently available bandwidth, the delay can be reduced by 28% on average. In addition, for the application flow with the highest priority, our methods can reduce 99.99% delay and increase 90.17% throughput on average when the output interface utilization approaches to the maximum bandwidth limitation.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1210f11452d21c5bdc9261642b274b6cde0d7f55", "sources": [ "DBLP" ], "title": "Enabling Software Defined Networking with QoS Guarantee for Cloud Applications", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "122cb33c617ce2760f8b26dfd85857aa6c5dd691": { "authors": [ { "ids": [ "2283122" ], "name": "Hamid Arabnejad" }, { "ids": [ "8516149" ], "name": "Claus Pahl" }, { "ids": [ "31948108" ], "name": "Pooyan Jamshidi" }, { "ids": [ "14011576" ], "name": "Giovani Estrada" } ], "doi": "", "doiUrl": "", "entities": [ "Autoscaling", "Cloud computing", "FMRIB Software Library (FSL)", "FUJITSU Cloud IaaS Trusted Public S5", "Facebook Query Language", "Fuzzy control system", "Fuzzy logic", "Q-learning", "Reinforcement learning", "Response time (technology)", "Run time (program lifecycle phase)", "Scalability", "Service-level agreement", "State-Action-Reward-State-Action", "Virtual machine" ], "id": "122cb33c617ce2760f8b26dfd85857aa6c5dd691", "inCitations": [ "ff2f3a792779fb34e63fef6cbe9144d7665c6a66", "26e365f6535e28aeb61890be5a4603a6272ad0b9" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "64-73", "journalVolume": "", "outCitations": [ "0b56c5c990051e879d341671d85408fbf519c7c8", "15dc663b6761d53e90415427d5a24cce1e0e38da", "9e98d529d158e2230d722f497fbc36373eaa8583", "61b19706a0ebd627f90bce3c1afc6f2df556c93f", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "ceb387818cadb80c514a60950f9e7a3202c8a518", "4c9b5b3ec35b92357936efe9401110e37e2e046c", "2a329bfb7906e722a23e593a30a116584ff83ea9", "cc02acc00ba07898c55040a6846aace71cfa75bd", "942ecc61675d81724823b893df0f1c9418b52d90", "66f81c8007a3d77ed40aba93ef62ca3d5d7b9cb7", "bb0b3db2085a7e85a378c0c0a4d9ca9a2969a6eb", "261597e8e95b6f3508f1aa00b93b2966fcb09924", "5ec6157896c053600625da7a5da6f0451ed9c12e", "4cbed84055a5b20dd2938822b8b8f9de6a490fd4", "caeabc98e9b701aee79cd053cc1d0070e68d3a9e", "285a5ef2b749705f396df2b607af6506a344300c", "3b7c5da3a3888be5818159f31fb50d1e382efa26", "05ea86d312ed4a19ad282ad28838e8e87b6ce156", "30922a3953ff740486bfd01461cc1f0c5185c39c", "360f91607287ffeb48f34f34dff78272fe159cf7", "a818086b1d93615d1e6bac0ed69fb68c07beee1b", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "bc46cc852b4a240a81a10e3b00f20d1e06139915", "9e94390e67fa2c44188634f6a4e8195b1eb309c8", "8260d12f5212a9a654486d86e5467d8a9e9a5bb5" ], "paperAbstract": "A goal of cloud service management is to design self-adaptable auto-scaler to react to workload fluctuations and changing the resources assigned. The key problem is how and when to add/remove resources in order to meet agreed service-level agreements. Reducing application cost and guaranteeing service-level agreements (SLAs) are two critical factors of dynamic controller design. In this paper, we compare two dynamic learning strategies based on a fuzzy logic system, which learns and modifies fuzzy scaling rules at runtime. A self-adaptive fuzzy logic controller is combined with two reinforcement learning (RL) approaches: (i) Fuzzy SARSA learning FSL and (ii) Fuzzy Q-learning FQL. As an off-policy approach, Q-learning learns independent of the policy currently followed, whereas SARSA as an on-policy always incorporates the actual agent's behavior and leads to faster learning. Both approaches are implemented and compared in their advantages and disadvantages, here in the OpenStack cloud platform. We demonstrate that both auto-scaling approaches can handle various load traffic situations, sudden and periodic, and delivering resources on demand while reducing operating costs and preventing SLA violations. The experimental results demonstrate that FSL and FQL have acceptable performance in terms of adjusted number of virtual machine targeted to optimize SLA compliance and response time.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101121", "https://arxiv.org/pdf/1705.07114v1.pdf", "https://export.arxiv.org/pdf/1705.07114", "https://pdfs.semanticscholar.org/122c/b33c617ce2760f8b26dfd85857aa6c5dd691.pdf", "http://arxiv.org/abs/1705.07114" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/122cb33c617ce2760f8b26dfd85857aa6c5dd691", "sources": [ "DBLP" ], "title": "A Comparison of Reinforcement Learning Techniques for Fuzzy Cloud Auto-Scaling", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "1239eeb350a9d6c90006c8fc4f6e70adf0643ab2": { "authors": [ { "ids": [ "3441800" ], "name": "Pierre-Louis Guhur" }, { "ids": [ "35084316" ], "name": "Emil M. Constantinescu" }, { "ids": [ "1955022" ], "name": "Debojyoti Ghosh" }, { "ids": [ "2284463" ], "name": "Tom Peterka" }, { "ids": [ "1721552" ], "name": "Franck Cappello" } ], "doi": "10.1109/CLUSTER.2017.13", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.13", "entities": [ "Approximation error", "Complex dynamics", "Error detection and correction", "Experiment", "Instability", "Numerical integration", "Overhead (computing)", "PETSc", "Protection mechanism", "Rejection sampling", "Smart Data Compression", "Supercomputer" ], "id": "1239eeb350a9d6c90006c8fc4f6e70adf0643ab2", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "592-602", "journalVolume": "", "outCitations": [ "7027eb880ee4d5a1f71bfc861bb36ae980f781fc", "fdb03db0c062964fa9fe1beff69efe39a5e07e2d", "2208278a005a638e598ddad9f5876073ced238a8", "531969063ddef6632e7e5caab72420dc75809ff9", "6c5494521e2e88306c8881098c35237f477d10e8", "7edb887ed7f15203eccb614095af001ea74bcfb6", "24760588276cca66402775b92604bcc061084491", "05bdfe9795a9212641fb636425eaa55a4330113a", "d672b95b7809a10de53ccd960e04ddc4463795a3", "627a32d5f5c0f848e8d6b522fa101f82f856d7b8", "dd286cdefbca8f6e435298f058ca413d131f53b0", "85f9179c37cc0df6a08f66e6fbf6c5bd651fe618", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "18fe996c6f43a8f301cd842507045b679ba3506a", "8d8ae22042e7b189a4c37e39e5ea4f74e6210860", "95caf0ab8e5613d4db4cea36545a44a5a39815b6", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "04d04a9a2942b8f13e405b9feafe854dca7952a7", "31f4bdde3501a9d52499668bf67f548220afbb79", "01d62cd850496455ce1616500f491690effa5c98", "f73620d1a4b1b03244256a37b852a4fd0acab1e4", "4b8eaa76c2d517eaf9df9823a7cb77c7ee4e8725", "947c449755567fb4560e543e966788cebb7cc4cc", "62b996c8b0845277f1b8a1459ecae454c054cd7c", "983d5cf52ccc1cd4e8338c6b7c4ee24168a6e807", "3eb59f28fbe7f2e3a61f54a5bf1a35cbe22bf1f8" ], "paperAbstract": "Scientific computing requires trust in results. In high-performance computing, trust is impeded by silent data corruption (SDC), in other words corruption that remains unnoticed. Numerical integration solvers are especially sensitive to SDCs because an SDC introduced in a certain step affects all the following steps. SDCs can even cause the solver to become unstable. Adaptive solvers can change the step size, by comparing an estimation of the approximation error with an user-defined tolerance. If the estimation exceeds the tolerance, the step is rejected and recomputed. Adaptive solvers have an inherent resilience, because some SDCs might have no consequences on the accuracy of the results, and some SDCs might push the approximation error beyond the tolerance. Our first contribution shows that the rejection mechanism is not reliable enough to reject all SDCs that affect the results' accuracy, because the estimation is also corrupted. We therefore provide another protection mechanism: at the end of each step, a second error estimation is employed to increase the redundancy. Because of the complex dynamics, the choice of the second estimate is difficult: two methods are explored. We evaluated them in HyPar and PETSc, on a cluster of 4,096 cores. We injected SDCs that are large enough to affect the trust or the convergence of the solvers. The new approach can detect 99% of the SDCs, reducing by more than 10 times the number of undetected SDCs. Compared with replication, a classic SDC detector, our protection mechanism reduces the memory overhead by more than 2 times and the computational overhead by more than 20 times in our experiments.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1239eeb350a9d6c90006c8fc4f6e70adf0643ab2", "sources": [ "DBLP" ], "title": "Detection of Silent Data Corruption in Adaptive Numerical Integration Solvers", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "12418c86dee2ce9b95b0bf46d40fe66c762d0410": { "authors": [ { "ids": [ "2629017" ], "name": "Yiyang Chang" }, { "ids": [ "32782897" ], "name": "Sanjay G. Rao" }, { "ids": [ "1749225" ], "name": "Mohit Tawarmalani" } ], "doi": "", "doiUrl": "", "entities": [ "Best, worst and average case", "Failure rate", "Multiprotocol Label Switching", "Network performance", "Network topology", "Routing", "Time complexity" ], "id": "12418c86dee2ce9b95b0bf46d40fe66c762d0410", "inCitations": [ "356f393023478396bdfb84409a97dd8aaf9e12f6" ], "journalName": "", "journalPages": "347-362", "journalVolume": "", "outCitations": [ "442a21840c5e54856e3f78c61a0ba48125e486b4", "2b91ee9218f5a26e655544ad0ff4694fa30692d7", "3e8ee62447587d3a875dc2f77a6df4dfda9be2de", "7ed8dd92f4a174b630836700cf12d0adebd5c708", "58099048c6dd8b6a7dcfac0855deb546e50024b3", "159f53ab0a7360ae4595d338111c5cc3a4e7c316", "945cf68786a81f2ba5d694c2a620260ac10d7f07", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "152d4269babc64a75fe7ba1abe68c517844f6bec", "1b11d4b0b04e0eb061029b57e1a1c436193f13b1", "6ae27ff737c6379298edf6aa069df1e2565feb7c", "766e33d87a1719d877ffdeb9250aa53629a13177", "2a78b294405635b93089806246f33204cc332985", "6d6030e70859795569baceaba58abb17ec62cca8", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "0b78e4ffc95c53549fad9ee5740dd6bcc3c5a07e", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "cda625e14bc5d265b9a411400ec4203a97c528e6", "4d03b65744ce64166457436b24d6db23d3c3e493", "54eb6bc34c46eef4521d8cac273101432d1fdd01", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "7b26ef4490794a4e98faa47aa3c10484239a5e5e", "16ac1193b1f28c7157a7c4a6be12b4b63bb18fbc", "0597e39c6b3df734f3dee2be3d8f7773eb5fd2b2", "1a6bac41dd71e7ac50581e413125b263c6a53a1d", "153d3de36f985349963b0bff1f0e4a9c63d7521f", "7bcc53f1baf3358517a602d856192faea9442c91", "5996fc1dd83cf4cb70723f168622791178776bca", "0dc44c3dbaeec29ed5e9b9dd196ff637b5def17d", "a6bdd76880dcdf67512b6450457ec804a029300d", "1dd35aa72f87396ac339399fc53aa251bdaf4e83", "2b4d720c6cb9bcd7c4fd8e1b615e6184ea43d633", "8cba7283df5e44f5e4c60753f0c59a3f0b303b7b", "232e9d2ac80f62500075aa687f55ce21da5f1c90", "06688b20889484482c1703bec9c34eaa1e09e9ab", "3c903855e111dc5a2bbd38e64f4a199f14fc29fd", "274946a974bc2bbbfe89c7f6fd3751396f295625", "4d6916eca48349f87ec1509207177c1c82e61746", "1dc62b09b964b8faeecbc03270f7d7a5f2fee733", "2730c3ec2908d852e73a62f93302e0e8bfe5510e", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97" ], "paperAbstract": "A key challenge confronting wide-area network architects is validating that their network designs provide assurable performance in the face of variable traffic demands and failures. Validation is hard because of the exponential, and possibly non-enumerable, set of scenarios that must be considered. Current theoretical tools provide overly conservative bounds on network performance since to remain tractable, they do not adequately model the flexible routing strategies that networks employ in practice to adapt to failures and changing traffic demands. In this paper, we develop an optimizationtheoretic framework to derive the worst-case network performance across scenarios of interest by modeling flexible routing adaptation strategies. We present an approach to tackling the resulting intractable problems, which can achieve tighter bounds on network performance than current techniques. While our framework is general, we focus on bounding worst-case link utilizations, and case studies involving topology design, and MPLS tunnels, chosen both for their practical importance and to illustrate key aspects of our framework. Evaluations over real network topologies and traffic data show the promise of the approach.", "pdfUrls": [ "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_chang.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/chang", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-chang.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-chang.pdf", "https://engineering.purdue.edu/~isl/papers/nsdi17-final59.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_chang.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5ecc/82e93bbe370cda324eb89f1e65d9cc8496dd.pdf", "s2Url": "https://semanticscholar.org/paper/12418c86dee2ce9b95b0bf46d40fe66c762d0410", "sources": [ "DBLP" ], "title": "Robust Validation of Network Designs under Uncertain Demands and Failures", "venue": "NSDI", "year": 2017 }, "124d11baf0059f0e385a4ef7646c60f047723f55": { "authors": [ { "ids": [ "2831260" ], "name": "Batya Kenig" }, { "ids": [ "1679226" ], "name": "Benny Kimelfeld" }, { "ids": [ "23625026" ], "name": "Haoyue Ping" }, { "ids": [ "1682824" ], "name": "Julia Stoyanovich" } ], "doi": "10.1145/3034786.3056111", "doiUrl": "https://doi.org/10.1145/3034786.3056111", "entities": [ "Conjunctive query", "Database", "Insertion sort", "Mallows's Cp", "Polynomial", "Probabilistic database", "Relational database" ], "id": "124d11baf0059f0e385a4ef7646c60f047723f55", "inCitations": [ "42c22509f8e7fd651859b0760693cf983a369654" ], "journalName": "", "journalPages": "21-36", "journalVolume": "", "outCitations": [ "7da8928779347989b9e8041ae85bf715a45708ad", "220ae239fcf8243fc66b02db760dc14895e97741", "67a2952f8437defdee65bf8ae6bb8865d53cdc1c", "9ec9b1de2ea7743b9fafa16e0b387526f1610604", "72bb6887e437942f829822cd9090f63672dd3441", "2c6dcf068ec73d937af1c99dbf342283c57a85a1", "4a20823dd4ce6003e31f7d4e0649fe8c719926f2", "56c52bb9720a0ba992e1b51be6d5325b556d1ca4", "5a4f6663ab426f01b4015cfa29585f585078cca8", "837c9537384d63e276ea92b49ce51d1f55613bf6", "11975b9ebafcdbe0d0925894d29b195c069bf418", "1fec8c049166400e0bd5613ac201d29cd5f3871a", "21283b318cb79ad8b6cf25b65b4eabfa21411043", "461a6ae49b899b3ceb41454ceea2500beda1efb8", "b3b6bab273bfd1f72368d8378fa25afb5e053ce4", "7faea0b51eb326b311ed93549ca6b5ac96cc58f1", "0841d99e2adf28bb1805c5f4e3c9df4d0e358b38", "05f169c2c61ee021fec666b989bab749498ffc22", "4ca743cd9095700f1ad11703d6ffcc1b98c4b2c1", "3eb5af157fca0b6705c2ee2f335316f14781c0f3", "53f39494a861e05561682d0e875c516bb0a9d9a9", "283f3f514278a462e330fa8f408c6a45b720c84c", "6cc7b7970d344bf4c2c40d5b337d1d02a87e4ed3", "a7e55cf24bb33a61fc4670316f7aa18589857752", "029f6a7e6079aa56c1bbe42d886a658befd05381", "f963a360ea0e2a9f6f7acc0c2eaf0eb66bd69701", "189b2505b215c709b7698595fb9cdc1a478141a2", "abc870699b13414bce0487342874e2ad7214783b", "03d9e06a8bbf15edf1e59664456ad95ba6ef6ad1" ], "paperAbstract": "We propose a novel framework wherein probabilistic preferences can be naturally represented and analyzed in a probabilistic relational database. The framework augments the relational schema with a special type of a relation symbol---a preference symbol. A deterministic instance of this symbol holds a collection of binary relations. Abstractly, the probabilistic variant is a probability space over databases of the augmented form (i.e., probabilistic database). Effectively, each instance of a preference symbol can be represented as a collection of parametric preference distributions such as Mallows. We establish positive and negative complexity results for evaluating Conjunctive Queries (CQs) over databases where preferences are represented in the Repeated Insertion Model (RIM), Mallows being a special case. We show how CQ evaluation reduces to a novel inference problem (of independent interest) over RIM, and devise a solver with polynomial data complexity.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056111", "http://batyak.cswp.cs.technion.ac.il/wp-content/uploads/sites/81/2017/07/ppd.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/124d11baf0059f0e385a4ef7646c60f047723f55", "sources": [ "DBLP" ], "title": "Querying Probabilistic Preferences in Databases", "venue": "PODS", "year": 2017 }, "1263810e3ef778ff0a838a6da3ccdc3088f2ad03": { "authors": [ { "ids": [ "1690710" ], "name": "Nageswara S. V. Rao" }, { "ids": [ "40197222" ], "name": "Qiang Liu" }, { "ids": [ "2495929" ], "name": "Satyabrata Sen" }, { "ids": [ "1705427" ], "name": "Donald F. Towsley" }, { "ids": [ "2545838" ], "name": "Gayane Vardoyan" }, { "ids": [ "9297377" ], "name": "Rajkumar Kettimuthu" }, { "ids": [ "1698701" ], "name": "Ian T. Foster" } ], "doi": "10.1145/3078597.3078615", "doiUrl": "https://doi.org/10.1145/3078597.3078615", "entities": [ "Concave function", "Data rate units", "Lyapunov fractal", "Network congestion", "Round-trip engineering", "Signal trace", "Stock and flow", "Supercomputer", "TCP congestion control", "Throughput" ], "id": "1263810e3ef778ff0a838a6da3ccdc3088f2ad03", "inCitations": [ "5b3ab4198f5d30502c3a99a4261a526f4813013e", "19e3d97fb829a6a9c15caef345803b40731550e8" ], "journalName": "", "journalPages": "193-204", "journalVolume": "", "outCitations": [ "c7aa8436c6c9536d53f2fcd24e79795ce8c6ea12", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "85666f8a6bd98bb977dc6423a6cf54831d6d58c3", "11e67f6e9a7dacf236cf855230a0b7d51c61bf32", "4952512cf0cb86860ba50f534ff88a9bdc194d4f", "5f971eb8b630bc8735be00a3348ecf02aae7438f", "572dd2d5d75227bb878430c9375b9be92cc7e6e9", "2d4906884bc5309f1539195ff5b181d41a15ff60", "8f2dddee9576996c911dc2e24b8acb480ae2c9bc", "fdf9c71255a0e9cf6b417f0accc265391058c51b", "1f453bfdfa2c2889cfcf21f647041314f7b69e04", "2d3722e140631aeb4e2cca2c003f8763a15100c6", "981c44bc03f38d4e6272fb0bacd65e39251c1072", "0156817d29acdb78b193e9dbbe7d41983df511ad", "ba823e1ec87cc7e41d558c3682f75dae8a01fae9" ], "paperAbstract": "ide-area data transfers in high-performance computing infrastructures are increasingly being carried over dynamically provisioned dedicated network connections that provide high capacities with no competing traffic. We present extensive TCP throughput measurements and time traces over a suite of physical and emulated 10 Gbps connections with 0-366 ms round-trip times (RTTs). Contrary to the general expectation, they show significant statistical and temporal variations, in addition to the overall dependencies on the congestion control mechanism, buffer size, and the number of parallel streams. We analyze several throughput profiles that have highly desirable concave regions wherein the throughput decreases slowly with RTTs, in stark contrast to the convex profiles predicted by various TCP analytical models. We present a generic throughput model that abstracts the ramp-up and sustainment phases of TCP flows, which provides insights into qualitative trends observed in measurements across TCP variants: (i) slow-start followed by well-sustained throughput leads to concave regions; (ii) large buffers and multiple parallel streams expand the concave regions in addition to improving the throughput; and (iii) stable throughput dynamics, indicated by a smoother Poincare map and smaller Lyapunov exponents, lead to wider concave regions. These measurements and analytical results together enable us to select a TCP variant and its parameters for a given connection to achieve high throughput with statistical guarantees.", "pdfUrls": [ "http://www.mcs.anl.gov/~kettimut/publications/hpdc17-TCP.pdf", "http://gaia.cs.umass.edu/networks/papers/RaoTCP_hpdc2017.pdf", "http://doi.acm.org/10.1145/3078597.3078615" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1263810e3ef778ff0a838a6da3ccdc3088f2ad03", "sources": [ "DBLP" ], "title": "TCP Throughput Profiles Using Measurements over Dedicated Connections", "venue": "HPDC", "year": 2017 }, "12b365394435a9ebaa9d47ac013495254a428457": { "authors": [ { "ids": [ "36188726" ], "name": "Ji Hu" }, { "ids": [ "3429624" ], "name": "Zidong Yang" }, { "ids": [ "39596641" ], "name": "Yuanchao Shu" }, { "ids": [ "40756835" ], "name": "Peng Cheng" }, { "ids": [ "1729225" ], "name": "Jiming Chen" } ], "doi": "10.1109/ICDM.2017.26", "doiUrl": "https://doi.org/10.1109/ICDM.2017.26", "entities": [ "Balanced line", "Self-balancing binary search tree" ], "id": "12b365394435a9ebaa9d47ac013495254a428457", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "167-176", "journalVolume": "", "outCitations": [ "8e8056c3022280c034d124cc10e851a59ccf5128", "40e0588779c473cf56a09d2b5bb0af00a8cdb8f0", "04534be9a1ca7a5c3749727e1f92b2affadf2595", "0ea85b8265cf9f604d1ead53a6ef89b674d619ac", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "3b2933796af110f6bef7acd2dec2c6492005ab76", "38bcfdac82c014a0d431c8910685f821a29ef18d", "63c7f13da6e5a6045b572a25df5b54d3b97adbdd", "b60fc9a53a7666ec1577d18a1aa9995a40834dd7", "f5fcd43a9feb70ef079c1bf38da97f55c45bf8e0", "16ba61b1845a26a6520180567d18e0f4e3d8f5f0", "d477939a2f578757ef94714d537b81321642d642", "d728122190c4f464dda7e8e6f892f24c6b3b844d", "a73bf6882d6363cade5015f051340a01bd56e3a5", "83803e0e073f1b010aba71355ea38a4dbc36874e", "c2a1f8b4f98befd2749acd1260313a7317c987b1", "25c405ceab2ecafa2247068b66c719302c525a67", "33aec385446f13116fb6da43363d19afa110e375", "7b324ee8fc5282ca8488561521f12c2532cf066e" ], "paperAbstract": "Rapid development of bike-sharing systems has brought people enormous convenience during the past decade. On the other hand, high transport flexibility comes with dynamic distribution of shared bikes, leading to an unbalanced bike usage and growing maintenance cost. In this paper, we consider to rebalance bicycle utilization by means of directing users to different stations. For the first time, we devise a trip advisor that recommends bike check-in and check-out stations with joint consideration of service quality and bicycle utilization. From historical data, we firstly identify that biased bike usage is rooted from circumscribed bicycle circulation among few active stations. Therefore, with defined station activeness, we optimize the bike circulation by leading users to shift bikes between highly active stations and inactive ones. We extensively evaluate the performance of our design through real-world datasets. Evaluation results show that the percentage of frequent used bikes decreases by 33.6% on usage number and 28.6% on usage time.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/12b365394435a9ebaa9d47ac013495254a428457", "sources": [ "DBLP" ], "title": "Data-Driven Utilization-Aware Trip Advisor for Bike-Sharing Systems", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "12db88fcf7cfee093c64e4e7737458e694a38181": { "authors": [ { "ids": [ "10028309" ], "name": "Ilya Lesokhin" }, { "ids": [ "1923973" ], "name": "Haggai Eran" }, { "ids": [ "2975688" ], "name": "Shachar Raindel" }, { "ids": [ "13044283" ], "name": "Guy Shapiro" }, { "ids": [ "9999375" ], "name": "Sagi Grimberg" }, { "ids": [ "38691238" ], "name": "Liran Liss" }, { "ids": [ "2777548" ], "name": "Muli Ben-Yehuda" }, { "ids": [ "35002050" ], "name": "Nadav Amit" }, { "ids": [ "3188958" ], "name": "Dan Tsafrir" } ], "doi": "10.1145/3037697.3037710", "doiUrl": "https://doi.org/10.1145/3037697.3037710", "entities": [ "Benchmark (computing)", "Computer data storage", "Direct Connect (protocol)", "Direct memory access", "Download", "InfiniBand", "Legacy system", "Memory overcommitment", "Multitenancy", "Network interface controller", "Page fault", "Paging", "Programming model" ], "id": "12db88fcf7cfee093c64e4e7737458e694a38181", "inCitations": [ "a4710ac80826e48a410b1b9da80c2ca0f4a6a357", "5016dedcbc51faec5f0aa0b5303a4e96c6e669de", "5451e8f4e616600f6061a95b435d3d3b75f4c69a", "1b1dda022e899b2d922adf330c96a8c9f7ad2abe", "5d37014068ec3113d9b403556c1fdf861bec0162" ], "journalName": "", "journalPages": "449-466", "journalVolume": "", "outCitations": [ "20a63e9826480867d7c70be89e18e3952d00310a", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "02fdca5fdba792e4f2c70b8b637abe4824343800", "1b6262f0533c202c1f140e60053ee3c72f216687", "081eef996224ea63b8d583c2b66d3f4651323b75", "05acc8f56dfa5216e96e1c6121dd64a6fe58fce4", "3a043714354fe498752b45e4cf429dbae0fb2558", "2387968d22927a79d5d601107e70220763fc6e12", "078a3183d87878156f172cb220f77bda0e1f9aed", "193fa9d86a063b3c4c6d56f0ffbdc1964279b9bb", "34d7bec4a25a27ec452f04228ff654bae1f90209", "67f49884d9418bdf4e68796ab4c77be951835e67", "9090142233801801411a28b30c653aae5408182a", "2636930ca4f2005371b903fc7670c2ca2d3c01b3", "6fb1ff36c0a4383bb804e33164cded2dc29b8b7b", "63ccaad290fa68953f20cd2c4647e5f2488e5ea6", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "22288dd563429acc2fb9846788845a5bfa4fcfe7", "ad4a6346ef0da6704d2017ae48839644de92c9ba", "dcedcc233c1f113760e3c7e46128e04fd02901be", "5110c4c092d86c52e8f8eaf5045249d821955b31", "5b0a769900c3817e911d6f24032cb00ffa173388", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "87064d58ef49df1b47c4ac74258fda1aecab2b68", "68a1e13c3bfdecb59a9f37c27ad04967ded44687", "66ede69aec0e37e0851464076e1719cd8036998e", "225603198cc415d363db8a8a2bd30b0df3c963b1", "301e4245bd4f7aae0e760ad2c0b1ea6e54ffb80f", "7932a4597cec5149c575aa2303fe8f12241e4320", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "70b34be92d117fe5044fae3dcc9dc13c4ffda79b", "f4cb0ddb31cd1be6cd56d9d339429ef970ed29e3", "1f33d86eddd1b1e4919e7867a56a86351c917eae", "1fb435233a3cd8e5af1b82cd1f098b9e940097cf", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "77f429051c7e069f7eb5e56d12d4391f24eb5aab", "23f139c4f77c0baf7dc75107b2ecb1bc10fe0784", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "7b40f864cbaeffe0ba87e51da4f945a31543642d", "092677c39262f72e15a56ab504417cf52247d971", "0d3f85933b6355789588476e491683532c68a906", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "17ded30902e29792b7caa1574278c6503b757ef1", "082146a8a918f4b64b2ccf32c47d2512821964bf", "17266c7296a381def5b4fd0c333b47c7b009194b", "3dfd3dfaac573c90e0eae54630881a2b412ee402", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "5706f5d4404fd41a2bad05eaf8962118ec928c90", "4b69324981ac45543fd60bd2e2cf487a76ed57ff", "9b545ee58ca474cccc8b0c9e6afaac047ea13cc8", "25e6d62400d5d458e23060bdb3a50d718b1d396b", "122eda0ff026311fbb6e8d589262da0b9821c19f", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "448766dba933d5d327d11ed3d0fa29de6aa72ca3", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "022e4c238f9cf85b9d8142725c6a2adbdcca2094", "29d9c1724230cbdfa635bf7ee969b813a843a2fd", "7129b305ce45f83127e928e8510da9fae0783905", "09fca67472a49f94c54fdd3f652ad586d5ab361b", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "25a973aa67a796233c2b988eae3ae02645216e8f", "4c2be7d70e8e521e6e845dfe1a4dfc22f60af7b7", "20093f8513154d469653dde042952172cb9ef822", "4f604eb3b4d92bbd7563a853674392abe0e5f4d9", "971563f7acd4250bbafdb5e90160dcd4dc6110e9" ], "paperAbstract": "Direct network I/O allows network controllers (NICs) to expose multiple instances of themselves, to be used by untrusted software without a trusted intermediary. Direct I/O thus frees researchers from legacy software, fueling studies that innovate in multitenant setups. Such studies, however, overwhelmingly ignore one serious problem: direct memory accesses (DMAs) of NICs disallow page faults, forcing systems to either pin entire address spaces to physical memory and thereby hinder memory utilization, or resort to APIs that pin/unpin memory buffers before/after they are DMAed, which complicates the programming model and hampers performance.\n We solve this problem by designing and implementing page fault support for InfiniBand and Ethernet NICs. A main challenge we tackle---unique to NICs---is handling receive DMAs that trigger page faults, leaving the NIC without memory to store the incoming data. We demonstrate that our solution provides all the benefits associated with \"regular\" virtual memory, notably (1) a simpler programming model that rids users from the need to pin, and (2) the ability to employ all the canonical memory optimizations, such as memory overcommitment and demand-paging based on actual use. We show that, as a result, benchmark performance improves by up to 1.9x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037710", "http://www.cs.technion.ac.il/~dan/papers/npf-asplos-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/12db88fcf7cfee093c64e4e7737458e694a38181", "sources": [ "DBLP" ], "title": "Page Fault Support for Network Controllers", "venue": "ASPLOS", "year": 2017 }, "12de15f0e283f748af4f1820c9a3d4361c026561": { "authors": [ { "ids": [ "7430032" ], "name": "Kristen Dorey" }, { "ids": [ "8383370" ], "name": "Nicholas Chang-Fong" }, { "ids": [ "2111768" ], "name": "Aleksander Essex" } ], "doi": "", "doiUrl": "", "entities": [ "Backdoor (computing)", "Cryptosystem", "Diffie\u2013Hellman key exchange", "Discrete logarithm", "Elliptic curve cryptography", "Integer factorization", "Modulus robot", "Opportunistic TLS", "Transport Layer Security" ], "id": "12de15f0e283f748af4f1820c9a3d4361c026561", "inCitations": [ "12a969cd25b06ba88eb2ce92903b1d7c8f959f1b", "00e363001d11bc47b6e347d7dd2db6addfdf7858" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "02b97d9a3fda357165257aa8bd3031743a5540d2", "fb46335b5a7b4cad0fd1935b97f90ebc443ad8e4", "a335f26d2cc5f254f039e418b0cf49d9961dbb0c", "333eb970ad0b91b2fb51a548b5d47551e647d481", "04fec8e39d83b4c5cc4fcfeeac5847ecf0134263", "574333697bcb88521d69d60190285d242726c895", "1107bc83167c79951dc2daac32dd5b5c2a6551f7", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "372e528fb9de9f062496af4530ea3e2ec5df02a1", "0f17ca31699de87aaa09dd31205b146bc472c861", "1be9599ad4fe106a1511e675a1684df0563b56a6", "5635e383c4a9edb01c35e07e83196ab0ba85f129", "e89c72d64fc056d4e52713229a0c8f6b17f4918e", "2dbcc7077a01981679007eceac6c6659a1c18200", "16687b6e4b5105f13311bcc902ff283d17d1e77d", "40860f2db7516f09836ef5bbd65288a4e0957af7", "b2a7da3aafc4787d3e1370fa9609e381ea296722", "271be477fda5bb096706bbb2615240dd3282f6db", "444a738feecb3f7b911886e7b5ec0d75afd12b6b", "19e5adf691d70deff696dfd27a521009cb1cf437", "834e9ad34048740f59826d3be75d5635fc7eb252", "26aa192869c6174a36b936a1f826d45023291657", "2b6ce083906634e3c3b084e4c9139fb58f082df6", "0670ef42e5c9f28d547ba0dfb816fdb99ca2992f", "db9955a0c061e33b09cd9726271d624e16dc3789", "fd1d864a95d7231eaf133b00a1757ee5d0bf0e07" ], "paperAbstract": "Software implementations of discrete logarithm based cryptosystems over finite fields typically make the assumption that any domain parameters they encounter define cyclic groups for which the discrete logarithm problem is assumed to be hard. In this paper we explore this trust assumption and examine situations where it may not be justified. In particular we focus on groups for which the order is unknown and not easily determined, and explore the scenario in which the modulus is trapdoored to make computing discrete logarithms efficient for an entity with knowledge of the trapdoor, while simultaneously leaving its very existence as matter of speculation to everyone else. We conducted an investigation of discrete logarithm domain parameters in use across the Internet and discovered a multitude of instances of groups of unknown order in use in TLS and STARTTLS spanning numerous countries, organizations, and implementations. Although our disclosures resulted in a number of organizations taking down their suspicious parameters, none were able or willing to rule out the possibility that their parameters were trapdoors, and obtaining conclusive evidence in each case could be as hard as factoring an RSA modulus, highlighting a key feature of this attack method\u2014deniability.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/indiscreet-logs-persistent-diffie-hellman-backdoors-tls/", "http://www.internetsociety.org/sites/default/files/ndss2017-04a_2-dorey_slides.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_04A-2_Dorey_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7fc8/4b4cd8c05402c15a7aeb6d7ca7a4c9c88786.pdf", "s2Url": "https://semanticscholar.org/paper/12de15f0e283f748af4f1820c9a3d4361c026561", "sources": [ "DBLP" ], "title": "Indiscreet Logs: Diffie-Hellman Backdoors in TLS", "venue": "NDSS", "year": 2017 }, "12f10f72e6aa43b37aa1289773f453e4af93516d": { "authors": [ { "ids": [ "2197142" ], "name": "Lei Yang" }, { "ids": [ "3084576" ], "name": "Qiongzheng Lin" }, { "ids": [ "3444521" ], "name": "Chunhui Duan" }, { "ids": [ "22260366" ], "name": "Zhenlin An" } ], "doi": "10.1145/3117811.3117835", "doiUrl": "https://doi.org/10.1145/3117811.3117835", "entities": [ "Hash function", "Heterogeneous Element Processor", "Radio-frequency identification", "Software deployment" ], "id": "12f10f72e6aa43b37aa1289773f453e4af93516d", "inCitations": [ "b588932e43a18c6c0bc03e9fa44c81b68a3415fc" ], "journalName": "", "journalPages": "301-314", "journalVolume": "", "outCitations": [ "07fb5d863329934ebf870f82e01db5d5d90e9918", "1c940efb1e0b966c9c0eb7b335f5759e2d1b74f0", "3676e0e2e1c9af2c98c3570911137f7a0bd3cc4d", "0dd96161deab7c342998b20adcf2c33331776530", "d2224306691566e2b363180240e0cc2aa06e20be", "2fba447e504606e90908966d5a5f4c1de972a7ba", "5cfd9c061954fca5e10d38bc39def68119a50308", "91b9389bae769e2cf1934ca7189217604a613ac9", "0108d5f081f8f5bea6e999b23e3f10fa4bdfe7bf", "8a85bc083ce3c69bbf84b5e0bac3dc251bf56338", "040832e416d54fb6b2b04c6d899175f92cd4d9f0", "53af9bc69e58060b0f04333e35eafdf24a177ca8", "2b3aabf4173e515a6e9bbc3410cd5dd9c87549ba", "0ee4ad1d06effe28544ef0a144c36384ad418df7", "bb1951f41f0b9eb4e5c84597723e03bdff1d71ab", "7fd4bfa6204a745824c787e6f86d5f9884b3fef0", "c60ca8b3fb7e70825e9f6fa1bbbb8cdf301cdd61", "0e165b89e365b6c9535794d9f05fd65bc68997c3", "32246f9efa11dcf46d1c7c8bb0ad2421d74e4666", "038afe4f12c873329f02aca9173924ff7bed8ca4", "5af30240e6350300b1ae7bae21c7a0bdc32bfc11", "14432b0419bf6f73ffa0ec2376136926325bd2db", "19b840e5c520885c2ee72a4ce40fa243cf7141af", "42831e63c56287b1d13dcf3c39d7707abcd4732c", "12c12e4bb10e1bd417ccc7ffa19d67a00aee0e85", "797d7b48cba59b2791ed971d2fb4fd8eb5adb61d", "3cdaac82651444b9aed5862df610415c323ca54b", "72ea6d850305766ef7eeb61f5bc02781899f1333", "e94d5f7d2093570cd3292e3d710b9b52151f965a", "9057e80082b3e999c28560955f471fd904a0002a", "05e3db5fd51efb9370a58041ad11939f7e05b350", "82802e411495bbad77fa2415c6d4633dde180764", "1c3c12ec3c28cb2682d5400fe5ba48b4a9c40aef", "0c639f235e146a0834b8fdb455e82285e5113cc2", "ac02b965b763510822d80533f9a9bd8df35645b0", "a066968793ea29266ca094bc957c0dc3fdcccdbf", "a61b858eacb8ce549bed1feda96e1027baa1add5", "d571ff888cc5f8c18df2565cfa543531665fc169", "614ca466b454ef07ef88e9f1a9aa1f46ea724012", "12d3f923da31336190a9eee7dfc65095229aa683", "02931a0d70758a5fdf559f03e1084bae60b73438", "026b660541694b768f4eee959d4dec5f9bf68cdd", "540ac698963add83d4e47d52edc54c84160eddd1", "1ab2445a2b0bc4b59c3cf2b853ac3ba828e6b14b", "0194df730df8fe82f30bb29dfeebd5d8dd1da3d9", "95d6c7d3d41f0a459b11e654e5376033fc3a3cc9", "7fca481caa31976a3c07989a295e804c1eeb9c76", "235920bae03d9a1086be9b96d0bd6085e71a18cc", "ec1dd03abb276061c2aba0aace599610ac2eb7a7", "c50b174f27928cdffc8c69bdd067a77a2d8ded7f", "48059d47eb27e58f40de3c1e6509b6ed3508b953", "339888b357e780c6e80fc135ec48a14c3b524f7d", "75519616967f8137753012a3bb3fe87de7656f1c", "1eb8d4cce3b9b5211aa18c322c9681d91aa0e90c", "14bfbc0abc246ea1f95b42aa465858d5450f9854", "8d25ed1b12cd567db171607b92b3bba4bb08a1a1", "4200ba887cb2178b0d39af0dd970bba9d15f06ae", "26a5afbd5855e992e8b9ecfb8169bd67c6a69568", "ae9dc6aaa2dc1853634bb77e54377e6ca77c54eb" ], "paperAbstract": "Deployment of billions of Commercial Off-The-Shelf (COTS) RFID tags has drawn much of the attention of the research community because of the performance gaps of current systems. In particular, hash-enabled protocol (HEP) is one of the most thoroughly studied topics in the past decade. HEPs are designed for a wide spectrum of notable applications (e.g., missing detection) without need to collect all tags. HEPs assume that each tag contains a hash function, such that a tag can select a random but predicable time slot to reply with a one-bit presence signal that shows its existence. However, the hash function has never been implemented in COTS tags in reality, which makes HEPs a 10-year untouchable mirage. This work designs and implements a group of analog on-tag hash primitives (called Tash) for COTS Gen2-compatible RFID systems, which moves prior HEPs forward from theory to practice. In particular, we design three types of hash primitives, namely, tash function, tash table function and tash operator. All of these hash primitives are implemented through selective reading, which is a fundamental and mandatory functionality specified in Gen2 protocol, without any hardware modification and fabrication. We further apply our hash primitives in two typical HEP applications (i.e., cardinality estimation and missing detection) to show the feasibility and effectiveness of Tash. Results from our prototype, which is composed of one ImpinJ reader and 3,000 Alien tags, demonstrate that the new design lowers 60% of the communication overhead in the air. The tash operator can additionally introduce an overhead drop of 29.7%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117835", "https://arxiv.org/pdf/1707.08883v1.pdf", "http://arxiv.org/abs/1707.08883" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/12f10f72e6aa43b37aa1289773f453e4af93516d", "sources": [ "DBLP" ], "title": "Analog On-Tag Hashing: Towards Selective Reading as Hash Primitives in Gen2 RFID Systems", "venue": "MobiCom", "year": 2017 }, "130299b5d0ec696208ad351b6c70559a30b2fc1b": { "authors": [ { "ids": [ "11135785" ], "name": "Giulio Ermanno Pibiri" }, { "ids": [ "1797933" ], "name": "Rossano Venturini" } ], "doi": "10.1145/3077136.3080798", "doiUrl": "https://doi.org/10.1145/3077136.3080798", "entities": [ "Data structure", "Database", "Graphic art software", "Information retrieval", "Lossless compression", "Machine learning", "N-gram", "Natural language", "Natural language processing", "Sparse matrix", "Trie" ], "id": "130299b5d0ec696208ad351b6c70559a30b2fc1b", "inCitations": [], "journalName": "", "journalPages": "615-624", "journalVolume": "", "outCitations": [ "1560272f4a258c3e690fd3703d3132913af91467", "31a44b74c2cb1b0584c5939b8224eefa15346cf2", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "d2044ca37a948fc34ea1f3f87e9090ec8bda4a33", "734200894aed64a4ceea11c62ae303d7749efd96", "27b92df79c638fc74bc4e8b2bd27d272d512c399", "ffb0b99ee9f988f68946dc96717d154b171d1336", "4c9fafa3b1bed97bb00b8bc68db39a9ad48490f1", "5bc7536e2d5c5b6da9c09650d5b229e75aef723a", "096d7d2e9b3fbc37f1c4e75b1896ae3797950ef9", "28b9a81cebf76696be07692f8b66e7b9ebbe111f", "7b11c263dde88960c4ab27c267d5e100b269bf53", "8ce69d30bca576f7230782a15df55b231ecd6cc3", "3b2baa6b588d892800d9ba37b7b4582e137c5f91", "18deb1448ba48bb3931ff16e65bcd2303dfe05df", "d4ace73d54ffca372d79136c9811728704a5e74d", "1375ffa728c1c7ae2b471fc2443d8342cfea84d1", "523182a9a66f96241d4bab0b92cc92e9cbda22c7", "20548990990c447ab54a3ecba82af2b5443a01d6", "1627704c73fab3573bc38ab99f158822b02464b6", "2bb1df67e235015d867bc2d3fdbf12028976a299", "69b833b00efb680a5296dceea3106083427a9969", "8b11f891f98ad7cc2ef94c0febdba1a57ef8a995", "160ec59f2eb9def5cca00f6c1bf198d13fb8b6a7", "5e644f5673efe090c2fa413c53e8ed6488a67fe5", "6ba48400a8740e67ae58bac28ebf81d96bc67fbd", "618f32ac4ee6ca111d385813f8ce84a17eae61ea", "4c211c9a7527840337042ba6ca82158b1aa84d14", "7ad19fa4baf1031aee118d0ffd00b2c031d60ccf" ], "paperAbstract": "The efficient indexing of large and sparse N-gram datasets is crucial in several applications in Information Retrieval, Natural Language Processing and Machine Learning. Because of the stringent efficiency requirements, dealing with billions of N-grams poses the challenge of introducing a compressed representation that preserves the query processing speed.\n In this paper we study the problem of reducing the space required by the representation of such datasets, maintaining the capability of looking up for a given N-gram within micro seconds. For this purpose we describe compressed, exact and lossless data structures that achieve, at the same time, high space reductions and no time degradation with respect to state-of-the-art software packages. In particular, we present a trie data structure in which each word following a context of fixed length k, i.e., its preceding k words, is encoded as an integer whose value is proportional to the number of words that follow such context. Since the number of words following a given context is typically very small in natural languages, we are able to lower the space of representation to compression levels that were never achieved before. Despite the significant savings in space, we show that our technique introduces a negligible penalty at query time.", "pdfUrls": [ "http://pages.di.unipi.it/pibiri/papers/IIR17.pdf", "http://doi.acm.org/10.1145/3077136.3080798", "http://pages.di.unipi.it/pibiri/slides/SIGIR17-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/130299b5d0ec696208ad351b6c70559a30b2fc1b", "sources": [ "DBLP" ], "title": "Efficient Data Structures for Massive N-Gram Datasets", "venue": "SIGIR", "year": 2017 }, "130d2175a34112e3e69c8a40d9ea1a0df5d88417": { "authors": [ { "ids": [ "1743078" ], "name": "Yi Ding" }, { "ids": [ "2039481" ], "name": "Chenghao Liu" }, { "ids": [ "1714894" ], "name": "Peilin Zhao" }, { "ids": [ "1741126" ], "name": "Steven C. H. Hoi" } ], "doi": "10.1109/ICDM.2017.18", "doiUrl": "https://doi.org/10.1109/ICDM.2017.18", "entities": [ "Algorithm", "Approximation algorithm", "Basis function", "Computational complexity theory", "Expectation\u2013maximization algorithm", "Feature vector", "Gradient", "Gradient descent", "Image gradient", "Kernel (operating system)", "Kernel method", "Linear separability", "Scalability", "Surrogates", "Test set" ], "id": "130d2175a34112e3e69c8a40d9ea1a0df5d88417", "inCitations": [ "0ef2abc9a42759a94a2c0ae87e1faa2fcfa77d68" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "91-100", "journalVolume": "", "outCitations": [ "12fa4a3ee546ba8eeb0b88b06bcb571d65d91cc4", "2688969848b75336891b1e58a2b6ee433343867c", "fc8cda36a0972e7de1ac3a7bcb81dc32da79bee4", "a035cccf0f57b505229f0e5db1a1dfedde062d43", "27e7ebe8b2030ec557278cf388e7e9feaa6cc19e", "0538e399046c74d95124c715760aa51ab4716dce", "179153e6ae011cd0ba4c0f52085863a0a5a3d6e0", "43216287965da7bfbe56b2e50a552cf0ac3144f0", "d25a6d88c0d6a5b56adb4197f1d6c8e722b588b7", "3da14fdd50e2cfdfb10b005425b176688569efc4", "1bd43bc8ec308a1ef3e9496868d8c6baa02e4f5d", "cbc4aa545290536e2a10726ed7d7673226ca00d9", "f68969eac758998c5bceedcd6f73499d23c62f07", "7de9e052e084704cdaf19cefd26a340dc548f5fa", "1ef475f169e5086404ddcae3195350a56f006c79", "c4fb8b86f5659bc59f705e683b5d6b31d3af74b9", "4e171856b5eac3a2bf7ebc1c243d9937b55a09bc", "b6fff8b8ea77f157913986e7af53951d9fc1128e", "08743d09f3ec33ab1f188d4c5f8f5550c312ace0", "a5404caefcf1ffc111e57f1313d95fd46a05caa0", "e17234ec0af7ec8e31dfdfdf8dcd5c083ced9577", "3864e796107d1ed2046eccb999c4b406414deee6", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "0d15202f2de79a5cdc4ba1ff005c3aef27aea2b0", "7af09246bae1d2d9abada79f441ba25858c69ef9", "1b20afbd2d2a349737ed3dc246e44bbdba203190", "3e73e107e0f65e5ca81fac14fadeee74cdbaab55", "87e10591db57b2688dcb9c77412c94a4aa01af88", "f88d1533a41199f29eae764595a4d9b9bcf521c6", "e429c37a15cfc4b8cf4e98c919c36087b460bfe7", "1fbfa8b590ce4679367d73cb8e4f2d169ae5c624", "17d2f027221d60cda373ecf15b03706c9e60269b", "175c1bb60ee46dac56d942ef8c7339977b4ebb0e", "0cb64395be7941da205f0a8f27629ce0e28b1ffa" ], "paperAbstract": "Learning to optimize AUC performance for classifying label imbalanced data in online scenarios has been extensively studied in recent years. Most of the existing work has attempted to address the problem directly in the original feature space, which may not suitable for non-linearly separable datasets. To solve this issue, some kernel-based learning methods are proposed for non-linearly separable datasets. However, such kernel approaches have been shown to be inefficient and failed to scale well on large scale datasets in practice. Taking this cue, in this work, we explore the use of scalable kernel-based learning techniques as surrogates to existing approaches: random Fourier features and Nyström method, for tackling the problem and bring insights to the differences between the two methods based on their online performance. In contrast to the conventional kernel-based learning methods which suffer from high computational complexity of the kernel matrix, our proposed approaches elevate this issue with linear features that approximate the kernel function/matrix. Specifically, two different surrogate kernel-based learning models are presented for addressing the online AUC maximization task: (i) the Fourier Online AUC Maximization (FOAM) algorithm that samples the basis functions from a data-independent distribution to approximate the kernel functions; and (ii) the Nyström Online AUC Maximization (NOAM) algorithm that samples a subset of instances from the training data to approximate the kernel matrix by a low rank matrix. Another novelty of the present work is the proposed mini-batch Online Gradient Descent method for model updating to control the noise and reduce the variance of gradients. We provide theoretical analyses for the two proposed algorithms. Empirical studies on commonly used large scale datasets show that the proposed algorithms outperformed existing state-of-the-art methods in terms of both AUC performance and computational efficiency.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/130d2175a34112e3e69c8a40d9ea1a0df5d88417", "sources": [ "DBLP" ], "title": "Large Scale Kernel Methods for Online AUC Maximization", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "1315ea1bf8f92bc1773556f727af0f85605cb677": { "authors": [ { "ids": [ "1695375" ], "name": "Roberto Gioiosa" }, { "ids": [ "2606269" ], "name": "Antonino Tumeo" }, { "ids": [ "1700855" ], "name": "Jian Yin" }, { "ids": [ "2616994" ], "name": "Thomas Warfel" }, { "ids": [ "1774041" ], "name": "David J. Haglin" }, { "ids": [ "2706560" ], "name": "Santiago Betel\u00fa" } ], "doi": "10.1109/IPDPS.2017.121", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.121", "entities": [ "Barrier (computer science)", "Benchmark (computing)", "Breadth-first search", "Fast Fourier transform", "Finite difference", "Giga-updates per second", "InfiniBand", "Interconnection", "Network architecture", "Simulation", "Vortex" ], "id": "1315ea1bf8f92bc1773556f727af0f85605cb677", "inCitations": [ "b11068b7dda0b4470132b868c841cb2fbd5ca879" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "409-418", "journalVolume": "", "outCitations": [ "a470a01cedc4df4d7719f41e174dda7c26a9ddb3", "2ee85e62099a2393366e33e794796afe62be75e0", "46798a562160a628681f4b69121d45dd1105937d", "33715194bf741fe17d6f6b9559af694907c26d2a", "5f8991828def57d2f0cda942566afff56740d150", "c4dd4dff3b7e03a372eaee00dba074f3ebf4a4dc", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "3de8fa07b2513240ed6d678ac1a3634f4ab52398", "143504cf0794163b60b93fb17cf61c885d7fd73c", "02115b80df6978eaded262ea58accb1e1d1364ed", "a9d590711b56bb2e66f98814802f1429c20ee863", "6891e0c7ded97a62df6fdc3c2553c07de2822b13", "33299bbc74d62c9d83f714f0753fc0f2ecadc645", "ff06c045b4d5de3c52504c96f43fe296070a8482", "01c1ba712c47112ac1d7970b280064132932c492", "304a6bac0bbdf16cb3d32853d891748ffd663ae4", "cbf8acf187297b22bf189cd057b9495f90bd973b", "57748a6246541a2ceb71de18be3a24165753e0a8", "848dd6c85c200325778dea4d2f80b30bef2efc12", "2b8184d84bf668ed213e3f0fe1081c2fcf08e14d", "c3fbbd9c1fc5e53c6a9e3fe27e1bfce4755c8ef3" ], "paperAbstract": "Emerging applications for data analytics and knowledge discovery typically have irregular or unpredictable communication patterns that do not scale well on parallel systems designed for traditional bulk-synchronous HPC applications. New network architectures that focus on minimizing (short) message latencies, rather than maximizing (large) transfer bandwidths, are emerging as possible alternatives to better support those applications with irregular communication patterns. We explore a system based upon one such novel network architecture, the Data Vortex interconnection network, and examine how this system performs by running benchmark code written for the Data Vortex network, as well as a reference MPI-over- Infiniband implementation, on the same cluster. Simple communication primitives (ping-pong and barrier synchronization), a few common communication kernels (distributed 1D Fast Fourier Transform, breadth-first search, Giga-Updates Per Second) and three prototype applications (a proxy application for simulating neutron transport-”SNAP”, a finite difference simulation for computing incompressible fluid flow, and an implementation of the heat equation) were all implemented for both network models. The results were compared and analyzed to determine what characteristics make an application a good candidate for porting to a Data Vortex system, and to what extent applications could potentially benefit from this new architecture.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.121" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1315ea1bf8f92bc1773556f727af0f85605cb677", "sources": [ "DBLP" ], "title": "Exploring DataVortex Systems for Irregular Applications", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1317e1a203c9241ddfa0202acb91662b14ca2d7b": { "authors": [ { "ids": [ "9353174" ], "name": "Benny Van Houdt" } ], "doi": "10.1109/MASCOTS.2017.10", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.10", "entities": [ "Algorithm", "Approximation", "Belief propagation", "Experiment", "Fixed point (mathematics)", "Free energy perturbation", "Free energy principle", "Gradient", "Numerical analysis", "Recursion", "Serializability", "Throughput" ], "id": "1317e1a203c9241ddfa0202acb91662b14ca2d7b", "inCitations": [ "67dbe487b2dee1501bba8cbceaabfdb0491350f0" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "121-131", "journalVolume": "", "outCitations": [ "f26da30b639ac79f3b74b00b00bea6e527a4bf7c", "2f9c27b2106835778bc9398de70365941f67a224", "455bb755e2dfade1901bf106afe85003e3cf694f", "59d410a89840932e5dbb269755ca29185002bc0f", "edd01707a38d2bfba8f9f37390f0cacba6d45bfb", "ff1826c7472c3eed2a929b703562fa4e08cc38a0", "ec328497984c50691d9a96201263516f1f09483c", "1ce51581e805acf0f53b3ffa0bbe86e8627811ec", "ebcf9ade9d53405f66be609baff7a94b2d6e1000", "25d27452f533420669b3db3af35859f38315955f", "95f3fd7019807b8972d65c0117e323d04c3a0657", "1c8003c27d0022f241b42a1d5ca12b85e44726e6", "41789272d6ed561fac405fb80d36c280d7f3a96b", "0beae75f3483b60633e546f0eb4a64c8ecb5a23f", "f2df4630b1bae444fd8cef9a9081f9ec5685ac3d", "2561d1fcefc801cbd4c4061ec16f3a51fc563abc", "40da05d49731b3ce3ba0db8adbfa68421ec347ba", "162e6b7ad50ca8de6d6a26428ea0a06f20e37b72", "476ac3f04a66c86fa8c45ed04c2d851774c45956", "95f692d4daa5c56a3f45732626524f6510380283", "08c370eb9ba13bfb836349e7f3ea428be4697818" ], "paperAbstract": "In this paper we study how to estimate the back-off rates in an idealized CSMA network consisting of n links to achieve a given throughput vector using free energy approximations. More specifically, we introduce the class of region-based free energy approximations with clique belief and present a closed form expression for the back-off rates based on the zero gradient points of the free energy approximation (in terms of the conflict graph, target throughput vector and counting numbers).Next we introduce the size k_max clique free energy approximation as a special case and derive an explicit expression for the counting numbers, as well as a recursion tocompute the back-off rates. We subsequently show that the size k_max clique approximation coincides with a Kikuchi free energy approximation and prove that it is exact on chordal conflict graphs when k_max = n. As a by-product these results provide us with an explicit expression of a fixed point of the inverse generalized belief propagation algorithm for CSMA networks.Using numerical experiments we compare the accuracy of the novel approximation method with existing methods.", "pdfUrls": [ "http://arxiv.org/abs/1703.10500", "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.10", "https://arxiv.org/pdf/1703.10500v1.pdf", "https://arxiv.org/pdf/1703.10500v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1317e1a203c9241ddfa0202acb91662b14ca2d7b", "sources": [ "DBLP" ], "title": "Free Energy Approximations for CSMA Networks", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "13387166efd4f6d66b9ab19828855090586b16fd": { "authors": [ { "ids": [ "25440110" ], "name": "Sabela Ramos" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/IPDPS.2017.30", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.30", "entities": [ "Algorithm", "Bitonic sorter", "Central processing unit", "Complex system", "Dynamic random-access memory", "Manycore processor", "Mesh networking", "Message Passing Interface", "Multi-core processor", "OpenMP", "Program optimization", "Xeon Phi" ], "id": "13387166efd4f6d66b9ab19828855090586b16fd", "inCitations": [ "53ee65bfc69cd55d81196537086137ef8efb2108", "48e6c7035b35a3ee8b8c2e430c158bfd7102a2fe" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "297-306", "journalVolume": "", "outCitations": [ "03514ed26ee815192ec46eb23a148e0b7b897775", "23177452df15b652dd54a59324502b92c99687a7", "68bb9a5919e5ae1b45c5923259ceeb8f67acb2ae", "645d9e7e5e3c5496f11e0e303dc4cc1395109773", "cf0591e00458d5d7ca20fbc82da70f783f57bfd6", "d2c16be2bf76ac16d3ffd0fc2aff8a1c6e3c3dc8", "32d355a7a20f92ccda0608f83d7456870231c570", "14a4369f0fd45b3ae2323dd71eac8980b1556f0d", "888d4ade3a7552ebafe997988a82cdd16128961e", "034c374b2d973a3ae6e5d80f8ba88b59e5215aca", "3f750233c3e20da134b4427eb6645f877ac0a503", "2ae65bebbd8c7f943811eb3417b1004870a88483", "67cf1189c859d66bac309f9438df434fb651f97a", "f3325ace129dec914966f9894d9f412e5e04bdc2" ], "paperAbstract": "Increasingly complex memory systems and onchip interconnects are developed to mitigate the data movement bottlenecks in manycore processors. One example of such a complex system is the Xeon Phi KNL CPU with three different types of memory, fifteen memory configuration options, and a complex on-chip mesh network connecting up to 72 cores. Users require a detailed understanding of the performance characteristics of the different options to utilize the system efficiently. Unfortunately, peak performance is rarely achievable and achievable performance is hardly documented. We address this with capability models of the memory subsystem, derived by systematic measurements, to guide users to navigate the complex optimization space. As a case study, we provide an extensive model of all memory configuration options for Xeon Phi KNL. We demonstrate how our capability model can be used to automatically derive new close-to-optimal algorithms for various communication functions yielding improvements 5x and 24x over Intel’s tuned OpenMP and MPI implementations, respectively. Furthermore, we demonstrate how to use the models to assess how efficiently a bitonic sort application utilizes the memory resources. Interestingly, our capability models predict and explain that the high bandwidthMCDRAM does not improve the bitonic sort performance over DRAM.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.30" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/13387166efd4f6d66b9ab19828855090586b16fd", "sources": [ "DBLP" ], "title": "Capability Models for Manycore Memory Systems: A Case-Study with Xeon Phi KNL", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1339ff7f37047f7e8517295b8542d99788e16497": { "authors": [ { "ids": [ "3543872" ], "name": "Zhenhua Li" }, { "ids": [ "33830008" ], "name": "Yuanyuan Yang" } ], "doi": "10.1109/IPDPS.2017.50", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.50", "entities": [ "Algorithm", "Average path length", "Data center", "Fault tolerance", "High availability", "Performance per watt", "Routing", "Server (computing)", "Simulation", "Throughput" ], "id": "1339ff7f37047f7e8517295b8542d99788e16497", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "718-727", "journalVolume": "", "outCitations": [ "57c1cb35aeff251a4ebc3118598ff4e60cca96c0", "46e3fb8bacbb39a7db86c30a8adda227496f0283", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "92c873750a26240e416f315442c0efd257f0cd5d", "ed2db031b9a02147777b7d3ad72da2e0cae19e10", "640af017aa8d11f9f31480155c8d5d1a0d8865d7", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "c068615b084aceef73e11628486ac2bfdab5fc26", "2629b70da4e456b54e96f1d1d2703bd518239415", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "61aa0f67a22fb72abc41241c358e5f0e2b565def", "84b24b6d01c1b9c624d2e31fd839994353e6a243", "70cfadda2de05949e8908e0ba35aa18f29928cd4", "b72bf8d50a699f1edccc68bf8e7d80921d88b7a2", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "0541d5338adc48276b3b8cd3a141d799e2d40150", "402405280ef60ffc35b3ce6c0845805b787bbb87", "5fa66ec92e8b0a1a33fae62ddbdab154b938a992", "663e064469ad91e6bda345d216504b4c868f537b", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "3365897178130e985acf671d6564f5dd6fa0ea1c", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "70afebbe36ffacc827bf995257f01cad7a1160bc" ], "paperAbstract": "Designing a cost-effective network for data centers that can deliver sufficient bandwidth and provide high availability has drawn tremendous attentions recently. In this paper, we propose a novel server-centric network structure called RCube, which is energy efficient and can deploy a redundancy scheme to improve the availability of data centers. Moreover, RCube shares many good properties with BCube, a well known server-centric network structure, yet its network size can be adjusted more conveniently. We also present a routing algorithm to find paths in RCube and an algorithm to build multiple parallel paths between any pair of source and destination servers. In addition, we theoretically analyze the power efficiency of the network and availability of RCube under server failure. Our comprehensive simulations demonstrate that RCube provides higher availability and flexibility to make trade-off among many factors, such as power consumption and aggregate throughput, than BCube, while delivering similar performance to BCube in many critical metrics, such as average path length, path distribution and graceful degradation, which makes RCube a very promising empirical structure for an enterprise data center network product.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1339ff7f37047f7e8517295b8542d99788e16497", "sources": [ "DBLP" ], "title": "RCube: A Power Efficient and Highly Available Network for Data Centers", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "133e90b28a98025587234656714c6298604b8d9c": { "authors": [ { "ids": [ "2045309" ], "name": "Ming-Hsien Tsai" }, { "ids": [ "35194343" ], "name": "Bow-Yaw Wang" }, { "ids": [ "1739175" ], "name": "Bo-Yin Yang" } ], "doi": "10.1145/3133956.3134076", "doiUrl": "https://doi.org/10.1145/3133956.3134076", "entities": [ "Algebraic specification", "Assembly language", "Computation", "Computer algebra system", "Coq (software)", "Cryptography", "Cryptosystem", "Diffie\u2013Hellman key exchange", "F-algebra", "High- and low-level", "Key exchange", "Montgomery modular multiplication", "OpenSSH", "Proof assistant", "Symbolic computation", "Verification and validation" ], "id": "133e90b28a98025587234656714c6298604b8d9c", "inCitations": [], "journalName": "", "journalPages": "1973-1987", "journalVolume": "", "outCitations": [ "a4b1ebe5e56c1b19a73cd6de79ea805cb39bb4ca", "05c34e5fc12aadcbb309b36dc9f0ed309fd2dd50", "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "0a977b239999f3c1e956dbecc933f434426c2a82", "237e9b0add47fc7660c0e6443d7918a904439f7f", "194f7d8647009dea5f4867ae27d340c84c46f51b", "20b8d907ce8ec3642ad874462587a9fd81a50dd0", "a60d00ba42a4bed7adb3dc40cd1c32cbaffda5df", "6bf3ec49afd260020727bab31d79bef80db4e1ec", "4b05daafd34919ef9857eb67c418c3f9a5dadedf", "23dadf25f3efacbc9c66f69093d656ad5b003529", "2e37af19b69f12699279e3dc754cfac681555d1f", "afb91cb334aa5892e1ae567e016ba9de63738575", "9f50019aa8161577e4fc62f79da41083ba03f70b", "0a36a523494c3c966f0a6e716c7ef851fcda4762", "7dd6bb3de7e797d8ff95325a78848fcea7414c39", "4bf466cd9a89f728777943ae91661bdff3b25c06", "0b70652541cb408152c468eaea7b114dc65beab1", "ebda35dd27f830c91fb989c5dd769bb981996175", "08ebb5535c4304a44d5518578aa9ef7486e88bb3", "3cc86ff94309bb58b2125eea173b23ab89f26a3b" ], "paperAbstract": "Mathematical constructs are necessary for computation on the underlying algebraic structures of cryptosystems. They are often written in assembly language and optimized manually for efficiency. We develop a certified technique to verify low-level mathematical constructs in X25519, the default elliptic curve Diffie-Hellman key exchange protocol used in OpenSSH. Our technique translates an algebraic specification of mathematical constructs into an algebraic problem. The algebraic problem in turn is solved by the computer algebra system Singular. The proof assistant Coq certifies the translation and solution to algebraic problems. Specifications about output ranges and potential program overflows are translated to SMT problems and verified by SMT solvers. We report our case studies on verifying arithmetic computation over a large finite field and the Montgomery Ladderstep, a crucial loop in X25519.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134076" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/133e90b28a98025587234656714c6298604b8d9c", "sources": [ "DBLP" ], "title": "Certified Verification of Algebraic Properties on Low-Level Mathematical Constructs in Cryptographic Programs", "venue": "CCS", "year": 2017 }, "133eba30fbd96f0551d692c76f4c851d4d2f9f27": { "authors": [ { "ids": [ "39203989" ], "name": "Scott Ames" }, { "ids": [ "3228981" ], "name": "Carmit Hazay" }, { "ids": [ "1688856" ], "name": "Yuval Ishai" }, { "ids": [ "3297014" ], "name": "Muthuramakrishnan Venkitasubramaniam" } ], "doi": "10.1145/3133956.3134104", "doiUrl": "https://doi.org/10.1145/3133956.3134104", "entities": [ "Amortized analysis", "Collision resistance", "Communication complexity", "Computation", "Cryptography", "Hash function", "Integrated circuit", "Interactivity", "Public-key cryptography", "Random oracle", "Reed\u2013Solomon error correction", "SHA-2", "Secure multi-party computation", "Symposium on Theory of Computing", "Time complexity", "Verification and validation", "Zero-knowledge proof" ], "id": "133eba30fbd96f0551d692c76f4c851d4d2f9f27", "inCitations": [ "e2882f7927b19df6bb8d429cea1c1ce54fd91931", "e0957a2d15cad958cc3cc90ae791f369ca5f426a", "80621d09c3d3dd896c7e2bff083b9e702dc2ed29", "6db9824d4667b22310c51fe638403238f873e9f2", "032cd72078d8ed4795a71f5b27036e8888c39742", "2f7b4ee46d284664fd1a4a679d1e610e2954ca8b", "0a85b3afc89958583642b7fd39b37e745a053190", "38a8ed0a65c8581e3b1d42ef32a7ab37cc9f98fc" ], "journalName": "", "journalPages": "2087-2104", "journalVolume": "", "outCitations": [ "b0f2557d50632081a7942812a05b130747daaff6", "1475c9eb499df6fbf9c318bad974ba8ef34fbe87", "1a33c542b064f95eb6fc9b0003e80ff4f1b9289a", "a705360b3c21cb1e68f9714495518339881c66b3", "3934442c534faa289fe9f74558e4ba992201e125", "3ff0fbebd3e0a2aa43e43963231131b9bd55336f", "4a252c982d125f2ff63db059468a3c4ce05c437e", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "16666593472d5924b2bac14fdbd180773741f5bf", "011d714a361b8ceb925c18e4a214e22aa5f899d8", "9c9dc6e15bedebf021415fa28e1c4571b6ac0083", "1b5e73314491f5ecbd4db9d211ca81be28e2eeb4", "6a03239a737bd3e923878043d51f56508ff11b13", "5644dd91f6e5ce01229dd94e9db9e2fba321da6e", "98ebc0c61e702daf58043aab2abd4a0d10df7240", "07c746c119b1d18e6580840b2166721e07b4433d", "3a5bc6c6c312fdb8c65cb797535f6462947e7181", "b26de63ff444be172e48c05aae7dd01e1e975c91", "e2882f7927b19df6bb8d429cea1c1ce54fd91931", "6f6cdd94b1d44e27ee0692745562bf4a05a84ca4", "7e32e068c7471c5dfd139c8a563e4644dd3d54eb", "5ef35be3b424a585106334563122c8e7c6d50272", "129db5ec39a453ea53c94ad529cf13dccafe4167", "d2e77cc7379a29f610d83f6ba2e2b058db17b2da", "9c13417ebe8148addde423f408b27e022061cde5", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "b7d4ca273925545f044e12c5426fbf58ef240911", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "0dd2f64d714d1e4b79bf3da64eb08f4f2ff531f2", "01e83a7ff59354527a01f440129e71aadbe2eade", "66818f952327b9145d3c7f6ef392240f06767755", "a12f684154a4e41c0d2066aee90c80426ae458d6", "5712204c77fdf6fa62e93c183b6c23751d21aa86", "6cba388f07e05f39aebd6da74d860584d642cd4e", "0f8e6351bbf5eb1c6f6b1f8a4de2a56c2027c9c5", "19de1229db1c2e62367a3d1459e24848064dfd02", "2e8ccf0dc75d889dead5ea67e6752fe3f59fd7bf", "447d889d722d171748bd2ff79328bfe822d05f97", "1a59302e8c3e39c6fb7249af7346c2d5158e03e1", "4dc3dc96281fee8c676578546c98c295219aa7fc", "27463b98f47a73de3b77549985aae80c344ddf69" ], "paperAbstract": "We design and implement a simple zero-knowledge argument protocol for NP whose communication complexity is proportional to the square-root of the verification circuit size. The protocol can be based on any collision-resistant hash function. Alternatively, it can be made non-interactive in the random oracle model, yielding concretely efficient zk-SNARKs that do not require a trusted setup or public-key cryptography.\n Our protocol is attractive not only for very large verification circuits but also for moderately large circuits that arise in applications. For instance, for verifying a SHA-256 preimage in zero-knowledge with 2-40 soundness error, the communication complexity is roughly 44KB (or less than 34KB under a plausible conjecture), the prover running time is 140 ms, and the verifier running time is 62 ms. This proof is roughly 4 times shorter than a similar proof of ZKB++ (Chase et al., CCS 2017), an optimized variant of ZKBoo (Giacomelli et al., USENIX 2016).\n The communication complexity of our protocol is independent of the circuit structure and depends only on the number of gates. For 2-40 soundness error, the communication becomes smaller than the circuit size for circuits containing roughly 3 million gates or more. Our efficiency advantages become even bigger in an amortized setting, where several instances need to be proven simultaneously.\n Our zero-knowledge protocol is obtained by applying an optimized version of the general transformation of Ishai et al. (STOC 2007) to a variant of the protocol for secure multiparty computation of Damgard and Ishai (Crypto 2006). It can be viewed as a simple zero-knowledge interactive PCP based on \"interleaved\" Reed-Solomon codes.", "pdfUrls": [ "https://acmccs.github.io/papers/p2087-amesA.pdf", "http://doi.acm.org/10.1145/3133956.3134104" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/133eba30fbd96f0551d692c76f4c851d4d2f9f27", "sources": [ "DBLP" ], "title": "Ligero: Lightweight Sublinear Arguments Without a Trusted Setup", "venue": "CCS", "year": 2017 }, "133ed10cb6896bf3aa54a76e0c8f4609285ce4d0": { "authors": [ { "ids": [ "2056880" ], "name": "Tianlong Yu" }, { "ids": [ "1833376" ], "name": "Seyed Kaveh Fayaz" }, { "ids": [ "2441497" ], "name": "Michael P. Collins" }, { "ids": [ "1732751" ], "name": "Vyas Sekar" }, { "ids": [ "1730191" ], "name": "Srinivasan Seshan" } ], "doi": "", "doiUrl": "", "entities": [ "Bro", "Computer security", "Interference (communication)", "Network function virtualization", "Network security", "Poor posture", "Scalability", "Snort", "Software-defined networking" ], "id": "133ed10cb6896bf3aa54a76e0c8f4609285ce4d0", "inCitations": [ "1c9cdca8ae0e6aeae000c46cf12b2bacebc6d4e1", "117e558a09e0957eb97a5b3945e7059031684c26", "28dd40f411fc91cd9d5b72e6bd8d07de4b36c0ea" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "7ed8dd92f4a174b630836700cf12d0adebd5c708", "981ba7b03695c6df049fb4edf43c2803d4dd535d", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "0d94e1cdf3e3c1b8e86c9d887b5c232e54d01c12", "2bac875425b84096a0084cf8024d5e66b521297a", "2b6f22d6d2b01c5df1d3949cfdd9740e2e899146", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "0ec186528f2aeb28889c7716bead275c03008fb3", "1ceb6bc425c586641a10012db4bdd6cad328a58f", "36bb67d8fba0c85f2495449a9926018827368df5", "20cee308639acf53d090ff7c7d639eb64fdca8ad", "0e74750ccdd195fa55369a68c7cc0e354ffee9d8", "49437c58b7482b480c8f81dd692cdc676de4a181", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "07ca726af9c235573654b85e8d478bd7303aa62f", "2baa50ceffb972260c877567a5dd513dc79fca21", "2077579d62fc090d4ddf45f107ffae0468936165", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "b84322d9f2eefe24c03832b9a701d344f58b869c", "94546cd187a5818811f6efec14c1360ad41cdd9d", "336b4f3099b8f629adc20a69aba15257e53539f9", "5692a5398e92ec43703145d512eef4d06a2a2fc8", "0300ba32e9d71891ea250643db80b6f67dc3f8f9", "43abbae09aff7bb1bb3447f1ada08ac50e39924e", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "59b7d9c1b8179e325784f73acd12a5d0e1a4fe6d", "7f45e511f53c80c31346a1ef01f1d14293044b0f", "16c5e4b3dbb699b47523a5882ed7a3b6adac962b", "6a0b8045e37b1679ebea2a8c88b1453c519c8fc8", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "004a046ad0db05084f103e8b1e3e9fb43b2f9386", "000c6c5a899088792d12ef3fea23654fd1021e0a", "1b54f6f1d93b0a409a6c58e8445a471be9c80603", "516cf9cc4d0886e1cd91832230a2d7645426a3ec", "1bd829a1c7c3652b9b413bb864c94844ce48fcd9", "316abee19b57af2d85bf09b56350e29fa426412f", "3d3abf7b60d6e762d635c3b997d48ddb1bc76eb6", "b85b61ef16d84fedd5e90714c5df51b0c0f10348" ], "paperAbstract": "Despite soaring investments in IT infrastructure, the state of operational network security continues to be abysmal. We argue that this is because existing enterprise security approaches fundamentally lack precision in one or more dimensions: (1) isolation to ensure that the enforcement mechanism does not induce interference across different principals; (2) context to customize policies for different devices; and (3) agility to rapidly change the security posture in response to events. To address these shortcomings, we present PSI, a new enterprise network security architecture that addresses these pain points. PSI enables fine-grained and dynamic security postures for different network devices. These are implemented in isolated enclaves and thus provides precise instrumentation on these above dimensions by construction. To this end, PSI leverages recent advances in software-defined networking (SDN) and network functions virtualization (NFV). We design expressive policy abstractions and scalable orchestration mechanisms to implement the security postures. We implement PSI using an industry-grade SDN controller (OpenDaylight) and integrate several commonly used enforcement tools (e.g., Snort, Bro, Squid). We show that PSI is scalable and is an enabler for new detection and prevention capabilities that would be difficult to realize with existing solutions.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/psi-precise-security-instrumentation-enterprise-networks/", "http://www.cs.duke.edu/courses/spring17/compsci590.7/Papers/ndss17_psi.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss201702A-2YuPaper.pdf", "http://www.andrew.cmu.edu/user/sfayazba/psi_ndss17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ebe5/45148b161dd15a2971ba49d9e75c26990466.pdf", "s2Url": "https://semanticscholar.org/paper/133ed10cb6896bf3aa54a76e0c8f4609285ce4d0", "sources": [ "DBLP" ], "title": "PSI: Precise Security Instrumentation for Enterprise Networks", "venue": "NDSS", "year": 2017 }, "134506e98aa78c6e2efb1b93e08e25b61c25d8a0": { "authors": [ { "ids": [ "39650803" ], "name": "Caleb Horst" }, { "ids": [ "2872966" ], "name": "Ryo Kikuchi" }, { "ids": [ "2650377" ], "name": "Keita Xagawa" } ], "doi": "10.1145/3035918.3035948", "doiUrl": "https://doi.org/10.1145/3035918.3035948", "entities": [ "Adversary model", "Ciphertext", "Ciphertext-only attack", "Cryptanalysis", "Dummy variable (statistics)", "Encryption", "Key (cryptography)", "Known-plaintext attack", "Linear algebra", "Plaintext", "Repetitive strain", "Scheme", "Security parameter" ], "id": "134506e98aa78c6e2efb1b93e08e25b61c25d8a0", "inCitations": [ "b2fcd2497eee0f6795dcf083c2dd6fd903b583ea", "0bd8f0ab2ade3cbb560dca22c2f5dfd203f4cfd3" ], "journalName": "", "journalPages": "1069-1084", "journalVolume": "", "outCitations": [ "0677d17466f47dc8ef5fb89221ff3007c6196c33", "197107b7ec65a623b59987cf7243921908068751", "9506214130b9682d887869089f5e28d363991441", "9e759181f4f3ff0883f3cf663cd7d3f5444f8ab2", "7cc6a150b1ba5b40c8e5aca6e94c817d5bebbc5c", "57d1cbb6501866a3dfeb1a34b29e1a12d528db39", "965299efb158bace13e71ab11c6d547d6234d009", "614ec837ec0f615c053fc97e9f1c1ceb8787926a", "62ed6d4155c9872314d7b67416401344b442fb8f", "c09241b21c87b0f3125f398254b0146281b578ae", "b07fb9ae940ddaaa690de67cee2029e4373fdbc4", "8776c004a351e23be9ef7a4d214da4fc93260484", "010ab443478bbfbecb03be9c250a49ae3b19b4d5", "18e704e31d06f955f39955cd4c785c4731e5fbd7", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "0227e83202440c13c4c2b97b49ef7c64dfbd52c3", "98cccb17fbefc01a6310574f25e591ab9d2586e2", "39e042b4a1ffa5818ccb4783008bab297145b697", "4beef78e9b21611a59237b63d512014e47f32d5e", "50dd8acd7bd2e8daa96724138180de9152c01ec3", "212fe709b564f26c63875c2621cd543149108cf7", "234e14d7509ef2a14e829e1c08648f84462ad4ff", "f36a8d4ea696a0d192a43c04158bae65bb472fb7", "4a0c0f2fb44306730e6b106bd885b8def515daf7", "33736e956a5c4703fb5f215bd3ad686eeeedf2de", "0d202a739d29b1896b48510413009c0ffbbfc2ff", "0aa4f7b8b5638731b474ede0a3c268c549092664", "9b19a14e3c6e0a94a8a0e201e18c26c1c7c3105c", "2cbe59fd40166a5cdab925bf6cd58d9d0d447d9c", "2fca67c297ee39d8522ff25e464dbee8ebba66f5", "2004123b3b9698abe916116910b2c46a712a5585", "2f7c97e82641e4f0f8c7b508b75af4952b0cc07d", "19f6680d750de9ceb0f88e1c801fb5014b094106", "46527c14457cf84d1cf26487d6b4c31f4825db71", "4af77753e00973f339fd93a27e4131047018e79c", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "3ed4d5df2a97d5d097e1fd837a8568eec0eb6d31" ], "paperAbstract": "Comparable Encryption proposed by Furukawa (ESORICS 2013, CANS 2014) is a variant of order-preserving encryption (OPE) and order-revealing encryption (ORE); we cannot compare a ciphertext of v and another ciphertext of v', but we can compare a ciphertext of v and a token of b and compare a token of $b$ and another token of b'. Comparable encryption allows us to implement range and point queries while keeping the order of v's as secret as possible.\n Recently, Karras, Malhotra, Bhatt, Nikitin, Antyukhov, and Idreos independently re-define comparable encryption and propose two schemes, a basic one and an \"ambiguous\" one, based on linear algebra~(SIGMOD 2016). The basic scheme is just comparable encryption. To hide the order revealed by tokens, they also proposed an ambiguous scheme where each ciphertext has two interpretations v and vdummy. In the context of an indexed database, this means that every encryption has two places in the database corresponding to the two interpretations, masking the correct placement in the database unless the dummy value is detectable. They assessed that their basic scheme (and ambiguous scheme upon the basic scheme) is secure against known-plaintext attacks; the adversary will require O(ℓ) plaintext-ciphertext pairs to recover secret key, where ℓ is a security parameter.\n This paper cryptanalyzes their comparable encryption schemes by using simple linear algebra. We show that a few tokens and a few plaintext-ciphertext pairs instead of O(ℓ) pairs allow us to mount several attacks efficiently. Our attacks are summarized as follows: ", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035948" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/134506e98aa78c6e2efb1b93e08e25b61c25d8a0", "sources": [ "DBLP" ], "title": "Cryptanalysis of Comparable Encryption in SIGMOD'16", "venue": "SIGMOD Conference", "year": 2017 }, "13467a8aff8ad6a2a7e5c242cb567e0e1cd6f6ec": { "authors": [ { "ids": [ "39509913" ], "name": "Ruben Mayer" }, { "ids": [ "26404336" ], "name": "Ahmad Slo" }, { "ids": [ "2572995" ], "name": "Muhammad Adnan Tariq" }, { "ids": [ "1700118" ], "name": "Kurt Rothermel" }, { "ids": [ "2931665" ], "name": "Manuel Gr\u00e4ber" }, { "ids": [ "1751741" ], "name": "Umakishore Ramachandran" } ], "doi": "10.1145/3135974.3135983", "doiUrl": "https://doi.org/10.1145/3135974.3135983", "entities": [ "Complex event processing", "Data parallelism", "Microsoft Windows", "Parallel computing", "Pattern recognition", "Scalability", "Speculative execution", "Throughput" ], "id": "13467a8aff8ad6a2a7e5c242cb567e0e1cd6f6ec", "inCitations": [], "journalName": "", "journalPages": "161-173", "journalVolume": "", "outCitations": [ "1c29623450879667f381463a6b06b4fc8088da3d", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "57dbda987a11b2ea83808638d35230306ba1120e", "37c9a357b512c9e373fd12359fcff525fdbf5d4d", "aa1862904bf5c17cc270089ea43fef4e6232d4ed", "1fad99d25da3a4894915941752c6fb50dcd2d8ae", "a512812f72321eb4b198cc11cbd6755bfa71aa1c", "010cf83f39ebbbd42080e1491884ac6e9fe4d9aa", "63115442310908b876aa1e81d877813ebee8b247", "4f17119eaa541f64f6ae9be2a0b6e30de70fe421", "36257015011dce35e3cc046b3b5d87e9b13feda4", "42490a37f9e284ba4d368cf5a41f2ea6c26b0ee1", "3779782a7bc5cd743e4029eba985e9a7c9bc3a00", "07b53fd59a063418695ac30365fe967045d63980", "26f7a5b6fc985c10e9271f95522213ce281622e2", "9b3d79afd171746ff4be7e4aa862f35677405dc2", "fc35ca097ae95ebc209a7059193c928333efcfe5", "a1042ef8a386ab815da4337e03adc20b337369ba", "987fad21690fe8df838327e1641a912c9033082d", "6bc617bfa7b43e5aec1c175c574e5f45301a28fc", "e0b3d5095ca65792b0ae77417c66578c0253d1aa", "8e4b845da0ad9d106a0a7c46a9335b972ef2b187", "91f77353a566141da69e39eb1f5b3e58d59702cf", "19aaac65a6ae9a3828ec0aed603c8fb307f39adc", "026b514e5214d8be0610ec6a024e49d738175c29", "2bc73c30bd107cc1a57c7e13982fc27a5c6aa579", "9f56431f1feb18750ac139cb403498e64a72f62d", "2ba18490467704d0937493206c995c238ac7dae2", "5ebcf0eebc36f9d355debb54816b81a9f4134673", "de8fd399ace478e21816fe0d5b2b1a151073fed6", "0c504d0c8319802c9f63eeea0d7b437cded2f4ef", "17f45696ee140a6d35ec13efe54d2041a8e04f7c", "616672b785fd6f8fc1c4a8e7ff20c39307dde8d5" ], "paperAbstract": "Distributed Complex Event Processing (DCEP) is a paradigm to infer the occurrence of complex situations in the surrounding world from basic events like sensor readings. In doing so, DCEP operators detect event patterns on their incoming event streams. To yield high operator throughput, data parallelization frameworks divide the incoming event streams of an operator into overlapping windows that are processed in parallel by a number of operator instances. In doing so, the basic assumption is that the different windows can be processed independently from each other. However, consumption policies enforce that events can only be part of one pattern instance; then, they are consumed, i.e., removed from further pattern detection. That implies that the constituent events of a pattern instance detected in one window are excluded from all other windows as well, which breaks the data parallelism between different windows. In this paper, we tackle this problem by means of speculation: Based on the likelihood of an event's consumption in a window, subsequent windows may speculatively suppress that event. We propose the SPECTRE framework for speculative processing of multiple dependent windows in parallel. Our evaluations show an up to linear scalability of SPECTRE with the number of CPU cores.", "pdfUrls": [ "https://arxiv.org/pdf/1709.01821v1.pdf", "http://doi.acm.org/10.1145/3135974.3135983", "http://arxiv.org/abs/1709.01821" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/13467a8aff8ad6a2a7e5c242cb567e0e1cd6f6ec", "sources": [ "DBLP" ], "title": "SPECTRE: supporting consumption policies in window-based parallel complex event processing", "venue": "Middleware", "year": 2017 }, "1358c52e9a3edc88c8bf41a9191a2321956710f1": { "authors": [ { "ids": [ "1698602" ], "name": "Yu Chen" }, { "ids": [ "1693515" ], "name": "Mohammed J. Zaki" } ], "doi": "10.1145/3097983.3098017", "doiUrl": "https://doi.org/10.1145/3097983.3098017", "entities": [ "Autoencoder", "Calculus of variations", "Document classification", "Downstream (software development)", "Experiment", "KDE Applications", "Neuron", "Noise reduction", "Sparse matrix", "Text corpus" ], "id": "1358c52e9a3edc88c8bf41a9191a2321956710f1", "inCitations": [ "3f8a63906ca762f84df099d68f7a228564ef05bb" ], "journalName": "", "journalPages": "85-94", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "195d0a8233a7a46329c742eaff56c276f847fadc", "5043022bf0d72b1769038be0cd8ff38865aaf192", "1489455ce20b097d9cb2f8c24de696aa8a725d1d", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "bd49da4c8b259c90999be2bde7a05bab96353d9d", "01a7995cf0b1c89ec6322cc2b734f70e6b18e222", "100c730003033151c0f78ed1aab23df3e9bd5283", "50e74b25959466273e62948f5073192e2f3717e0", "5c4f72418a66a170b4bcf55463df03c51a43d9b2", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "167e1359943b96b9e92ee73db1df69a1f65d731d", "bff8252c3d7a2557e8a4bbbc94079d23c7c8d9fd", "20fe32e4ac65f59e6f1442522f58c26d2849f500", "1f656b9c686c1e5db2a4d41f1ce7e270965def3e", "2abe6b9ea1b13653b7384e9c8ef14b0d87e20cfc", "9208ecbd7244040ba6ee59a067b527c8b095fe0a", "b94043a133e3d07ed0b1cfc036829e619ea0ba22", "54c32d432fb624152da7736543f2685840860a57", "1145859ba17172d517cdffe2a5f00a16366c5765", "be29cc9cd74fd7d260b4571a4b72518accae5127", "373f76633cc1f6c7a421e31c989842021a52fca4", "208e4469a378a0f7b55086553ec3fbf0db5d52fc", "31e362dee2355e9fef8b8b5dbb14dc74abebb80e", "83a6cacc126d85c45605797406262677c256a6af", "213d7af7107fa4921eb0adea82c9f711fd105232", "0a7fb47217e6d0e3b80159bc4f9e02a50ea1f391", "5aef8a6992fbd56b3309f6b56337d124951bb71d", "55ef10672b69aaa4db60faeb3776d85f7af1a5f9", "357776cd7ee889af954f0dfdbaee71477c09ac18", "4064696e69b0268003879c0bcae6527d3b786b85", "8729441d734782c3ed532a7d2d9611b438c0a09a", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "5bdb704fad81541b2831293a209e99e1f4ea0a85", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "0e8d3f68c0a0eb9dab241c63aab319dbf596e697", "1510cf4b8abea80b9f352325ca4c132887de21a0", "d1b78d136e9e6be0aeb814027f0f3fd843606155" ], "paperAbstract": "Autoencoders have been successful in learning meaningful representations from image datasets. However, their performance on text datasets has not been widely studied. Traditional autoencoders tend to learn possibly trivial representations of text documents due to their confoundin properties such as high-dimensionality, sparsity and power-law word distributions. In this paper, we propose a novel k-competitive autoencoder, called KATE, for text documents. Due to the competition between the neurons in the hidden layer, each neuron becomes specialized in recognizing specific data patterns, and overall the model can learn meaningful representations of textual data. A comprehensive set of experiments show that KATE can learn better representations than traditional autoencoders including denoising, contractive, variational, and k-sparse autoencoders. Our model also outperforms deep generative models, probabilistic topic models, and even word representation models (e.g., Word2Vec) in terms of several downstream tasks such as document classification, regression, and retrieval.", "pdfUrls": [ "https://arxiv.org/pdf/1705.02033v2.pdf", "http://www.cs.rpi.edu/~zaki/PaperDir/SIGKDD17.pdf", "http://doi.acm.org/10.1145/3097983.3098017", "http://arxiv.org/abs/1705.02033", "https://arxiv.org/pdf/1705.02033v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1358c52e9a3edc88c8bf41a9191a2321956710f1", "sources": [ "DBLP" ], "title": "KATE: K-Competitive Autoencoder for Text", "venue": "KDD", "year": 2017 }, "135c49e5543ce41ec8274b270b2ac25e015cabd9": { "authors": [ { "ids": [ "1898809" ], "name": "Shaizeen Aga" }, { "ids": [ "2934986" ], "name": "Supreet Jeloka" }, { "ids": [ "1870110" ], "name": "Arun Subramaniyan" }, { "ids": [ "1678884" ], "name": "Satish Narayanasamy" }, { "ids": [ "1687117" ], "name": "David Blaauw" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" } ], "doi": "10.1109/HPCA.2017.21", "doiUrl": "https://doi.org/10.1109/HPCA.2017.21", "entities": [ "Application checkpointing", "CPU cache", "Computation", "Cryptography", "Database", "In-memory database", "In-place algorithm", "Kernel (operating system)", "Locality of reference", "Operand", "Static random-access memory", "Throughput" ], "id": "135c49e5543ce41ec8274b270b2ac25e015cabd9", "inCitations": [ "d66d840d8dec196a0bb6c279dc088b23f96b1a44", "b6d93d63915d867f6aadea3c0f7ce206407a4b5f", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "4431efadb482fbc2619e9dbd50d5ce707efa6396", "d9a6cfad15c6268b30da1f5b45f720b96ead1805", "e629e31f7fcbb8e0197f5f8d3d24f48f09d2d278", "00cc482570d739e7b733f45b6f8f1836b24056bd", "9d7b70f7b69bf3f7309273a68d09964611715a77" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "481-492", "journalVolume": "", "outCitations": [ "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "028f56e788bacbbdbb70cca474e0f0382b9b73aa", "2394c6644efa856f0da160a0f0031d74cd3b5000", "58cde17e911d8eda9fe91c6e446fc2f5f1030acc", "73edc0858aac5cf61b72473145b01612c0fd416b", "52206ebb2b53a7cfebfd6025fff6c3621b0b0809", "0015d8b6ec47ec2bc4bc0564a11e2f98a3971650", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "500b80adc7e25dfffa9a05d25bdffce81b1b0031", "a2ffe8428e96dbf89d3a0d80d6e3495656da28ed", "352a8957005dc5519b15ed1870751ec494d66395", "245babd157fa6f26696e3ed790847aecb665d51c", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "2b88cc9988d70a950b3eb50e99c8f0a6722210b3", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "42524667961442587a9eac9b6612d8eb7690f0e6", "72a2076a56fc310b92c985a97ce2a4a785729b19", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "0c41bb4d1b3c049e42c12a5054eac206f35da859", "e419e3ba88e01ae42f2bf2387e34faee9af03bbf", "8e6681c2307b9b875ea580b89b94b405aa63e78e", "11b6aa6c5532024f4b721fe8eea14e69cc914aae", "3eae0271717f6b4d65024abf04e5d98aef41d748", "1e4d081c6fa2103ccd0b9d977d98dffaff3a6f3c", "853107ffd335c5151f42d193d10cc6a7c475d919" ], "paperAbstract": "This paper presents the Compute Cache architecturethat enables in-place computation in caches. ComputeCaches uses emerging bit-line SRAM circuit technology to repurpose existing cache elements and transforms them into active very large vector computational units. Also, it significantlyreduces the overheads in moving data between different levelsin the cache hierarchy. Solutions to satisfy new constraints imposed by ComputeCaches such as operand locality are discussed. Also discussedare simple solutions to problems in integrating them into aconventional cache hierarchy while preserving properties suchas coherence, consistency, and reliability. Compute Caches increase performance by 1.9× and reduceenergy by 2.4× for a suite of data-centric applications, includingtext and database query processing, cryptographic kernels, and in-memory checkpointing. Applications with larger fractionof Compute Cache operations could benefit even more, asour micro-benchmarks indicate (54× throughput, 9× dynamicenergy savings).", "pdfUrls": [ "http://blaauw.engin.umich.edu/wp-content/uploads/sites/342/2018/03/Aga-Compute-Caches.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/135c49e5543ce41ec8274b270b2ac25e015cabd9", "sources": [ "DBLP" ], "title": "Compute Caches", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "137c11e359096580b08fd4f80a5e80c784d7bcd4": { "authors": [ { "ids": [ "2881873" ], "name": "Caiwen Ding" }, { "ids": [ "39090782" ], "name": "Siyu Liao" }, { "ids": [ "1698242" ], "name": "Yanzhi Wang" }, { "ids": [ "38315753" ], "name": "Zhe Li" }, { "ids": [ "1680152" ], "name": "Ning Liu" }, { "ids": [ "10716503" ], "name": "Youwei Zhuo" }, { "ids": [ "1722340" ], "name": "Chao Wang" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" }, { "ids": [ "2062027" ], "name": "Yu Bai" }, { "ids": [ "9347641" ], "name": "Geng Yuan" }, { "ids": [ "1967275" ], "name": "Xiaolong Ma" }, { "ids": [ "2760404" ], "name": "Yipeng Zhang" }, { "ids": [ "26791708" ], "name": "Jian Tang" }, { "ids": [ "1862322" ], "name": "Qinru Qiu" }, { "ids": [ "1723145" ], "name": "Xue Lin" }, { "ids": [ "1765175" ], "name": "Bo Yuan" } ], "doi": "10.1145/3123939.3124552", "doiUrl": "https://doi.org/10.1145/3123939.3124552", "entities": [ "Application-specific integrated circuit", "Artificial neural network", "Circulant matrix", "Computational complexity theory", "Deep learning", "Embedded system", "Fast Fourier transform", "Field-programmable gate array", "Inference engine", "Multiplication algorithm", "Network architecture", "Pipeline (computing)", "Scalability", "Throughput" ], "id": "137c11e359096580b08fd4f80a5e80c784d7bcd4", "inCitations": [ "fc1add5d9403d307c4844a6e4fb820c8a69d9582", "71508ce4651048d6284359938eb5cfffd7b6cb5b", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "23a9e21613071bd8d1453722bac96a7d587d6d8b", "eb5ec200117014882a9c52a183652ab57f0c462e", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "b872dc3db68dea56f434a1a6d73665eb7c842e48", "de58743ea346bc5f69dbedecf440f87e5710f3d4", "540793c4a7dc34c4df3958bbb2330463afcad39e", "11acddb8e29c586d028b5ce2a3d5eb410fa6caa0" ], "journalName": "", "journalPages": "395-408", "journalVolume": "", "outCitations": [ "2d83ba2d43306e3c0587ef16f327d59bf4888dc3", "14ce7635ff18318e7094417d0f92acbec6669f1c", "dbdcf3e19600cc3ea964ab8fd9122d3a6242c483", "c2d4150a1200a055793bf4479456c3441a7d3652", "d2a1e3b286422da23df692ed39d3301d642a4b24", "40faa4b9a95f42e8ae1dff96ee2059eb90e3b039", "b0c7c3988910d048062b168d02a2516853250b04", "108961c7366e36825ffed94ac9eab603e05b6bc6", "021fc345d40d3e6332cd2ef276e2eaa5e71102e4", "d67de58011d0c403682a55471f5adec702acdf0c", "42dec19543930bffec09ab74441440fdec4c94b2", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "c2c10045880d31dc011fb2ff2935f910f9fcd182", "1a07186bc10592f0330655519ad91652125cd907", "5fd26b0954a723f1fa22aa0a9fadcb4de498884b", "211a125c77da70a958d1dc9f70ecc29b9a69f796", "02c78232075ac431834e3442dcb2954d4e708def", "08a7b4b45fe99dc3ff26ace8813bc7a024f5cbea", "1409c14b65f087efb231ccd0513977450eab52e9", "9f1f065bf08cd90431cc051267a708f56436cd82", "ef8536a209928c1596dfa1918fd853e5aaefc4bb", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "235fa2b1983eff9f13b27c620cda389359126bf4", "b4eac8295c90dbfb7d8d22ba560e025621287c58", "4c2fedecddcae64514ad99b7301ad6e04654f10d", "02227c94dd41fe0b439e050d377b0beb5d427cda", "18efe550bf70d8c3a2d100b8ced9217e2fe67bb7", "01fcae344d2edb715bcc63a40b6052c0331741bd", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "a7621b4ec18719b08f3a2a444b6d37a2e20227b7", "206d4ebdf93ae6c9b530efc94fa408ccca2b402f", "38211dc39e41273c0007889202c69f841e02248a", "26d4711763df78580adbe7c775f80b2240cc6af3", "5ed7b5ebb4048240d04cdac46df0481a9cf057c2", "c382406fd8db2744b2a609837395e5da05e1d2ed", "0eb329b3291944ee358fcbad6d26a2e111addd6b", "126df9f24e29feee6e49e135da102fbbd9154a48", "398c296d0cc7f9d180f84969f8937e6d3a413796", "235328f8bc8b62e04918f9b4f6afe3c64cfdb63d", "1ff9a37d766e3a4f39757f5e1b235a42dacf18ff", "34b4027791e8c397db3279259804ab06aa21db40", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "1b2c6fb85e7e776b533fda63f92fabb04ed5e887", "1a5ad04c3365a8435317044d5e1c14071a92b4b2", "46494c6c0e2dd1dca8cd39d40a9681c7d5d6ba62", "5d90f06bb70a0a3dced62413346235c02b1aa086", "49d5c37170fcd55f9bb8edd7c43cea3e24d10bb5", "132e3d3b5cfc2f59db6ed69ac1eac4a1ee6dca71", "12806c298e01083a79db77927530367d85939907", "04105898efe96c7f2d876e6bcb9e19afd3e23635", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "5bfecd14937da569eabec0afea710db846d3899b", "5bb73fa5157e58923732b26fecafecde91fb6cf9", "061356704ec86334dbbc073985375fe13cd39088", "4002ce8cc6dd3dc5e3ed1909584105294f96358b", "029ec829961dc97fddf8e69934b4eec0e45b5b0c", "46f74231b9afeb0c290d6d550043c55045284e5f", "40f85cbe67ce1ce89009985e9caed648dd08c12e", "b8242c9d0fb77a125c30d7b92e2e34a468c0d393", "16b3bc986e22e2eb8a8594c096fa79e1e55105e2", "6201fb6d59a909959edfb661f52470c04799b0e7", "052f7909a029b5783c02d0f7c2149b99bee7555f", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "d48fede5d00c55e115346dd0837cab4feb0ba7f5", "272e0074fdaa959005dfaba1fdf8bf5d6444ce1a", "3b2491ddeeaa7beae4d311b217c292a9e16112cf", "df4cbdbac85ebb122c821671100ca9391fe46eac", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4" ], "paperAbstract": "Large-scale deep neural networks (DNNs) are both compute and memory intensive. As the size of DNNs continues to grow, it is critical to improve the energy efficiency and performance while maintaining accuracy. For DNNs, the model size is an important factor affecting performance, scalability and energy efficiency. Weight pruning achieves good compression ratios but suffers from three drawbacks: 1) the irregular network structure after pruning, which affects performance and throughput; 2) the increased training complexity; and 3) the lack of rigirous guarantee of compression ratio and inference accuracy.\n To overcome these limitations, this paper proposes CirCNN, a principled approach to represent weights and process neural networks using block-circulant matrices. CirCNN utilizes the Fast Fourier Transform (FFT)-based fast multiplication, simultaneously reducing the computational complexity (both in inference and training) from O(n2) to O(n log n) and the storage complexity from O(n2) to O(n), with negligible accuracy loss. Compared to other approaches, CirCNN is distinct due to its mathematical rigor: the DNNs based on CirCNN can converge to the same \"effectiveness\" as DNNs without compression. We propose the CirCNN architecture, a universal DNN inference engine that can be implemented in various hardware/software platforms with configurable network architecture (e.g., layer type, size, scales, etc.). In CirCNN architecture: 1) Due to the recursive property, FFT can be used as the key computing kernel, which ensures universal and small-footprint implementations. 2) The compressed but regular network structure avoids the pitfalls of the network pruning and facilitates high performance and throughput with highly pipelined and parallel design. To demonstrate the performance and energy efficiency, we test CirCNN in FPGA, ASIC and embedded processors. Our results show that CirCNN architecture achieves very high energy efficiency and performance with a small hardware footprint. Based on the FPGA implementation and ASIC synthesis results, CirCNN achieves 6 - 102X energy efficiency improvements compared with the best state-of-the-art results.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124552", "http://alchem.usc.edu/portal/static/download/CirCNN_MICRO.pdf", "https://arxiv.org/pdf/1708.08917v1.pdf", "http://arxiv.org/abs/1708.08917" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/137c11e359096580b08fd4f80a5e80c784d7bcd4", "sources": [ "DBLP" ], "title": "CirCNN: accelerating and compressing deep neural networks using block-circulant weight matrices", "venue": "MICRO", "year": 2017 }, "137f027dff643b2a5f7f64234ec1359924fb6ebb": { "authors": [ { "ids": [ "1965417" ], "name": "Xiaolong Xie" }, { "ids": [ "2700163" ], "name": "Wei Tan" }, { "ids": [ "3343613" ], "name": "Liana L. Fong" }, { "ids": [ "1689247" ], "name": "Yun Liang" } ], "doi": "10.1145/3078597.3078602", "doiUrl": "https://doi.org/10.1145/3078597.3078602", "entities": [ "Algorithm", "Analysis of algorithms", "Big data", "Central processing unit", "Computation", "Computational complexity theory", "Data parallelism", "Gradient", "Gradient descent", "Graphics processing unit", "Information privacy", "Machine learning", "Manycore processor", "Parallel computing", "Stochastic gradient descent" ], "id": "137f027dff643b2a5f7f64234ec1359924fb6ebb", "inCitations": [ "0a465c15bebccd1500718548b18800fd3c463ed0", "05233cf6194ddee6427f0bb76cb8749cc220d2bb", "dd417bac758a05a6d2f26b0a6125108c5e281eff" ], "journalName": "", "journalPages": "79-92", "journalVolume": "", "outCitations": [ "4d338ca44be5c771a29b954a8eb8e916e4a8507a", "07cd9f122bf94075df0537e0e4cafa40cea2d146", "f08a5e7a23b44c37a22e011e31843aeeae0ed4e6", "5670a2391d0c085be2ff5c704cae8e76a80a15fb", "4e8e3e40a25fba903f40246705c3beb3c122f523", "3784b73a1f392160523400ec0309191c0a96d86f", "0ef82bbfdec840663026dc2fa9e3db111add7efa", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "05aba481e8a221df5d8775a3bb749001e7f2525e", "119bbe5f9ce908a2b65f24daad50f45d93a2e28c", "0513bee8e71db046b6183b014b91ce3052b0e357", "b46e127f15b2c1cdc71600d08019f6944fd40434", "282bf4894a0c15ffd1f01c06df3716fe588f2d46", "d4e58e7c95d66f810252af630e74adbdbaf38da7", "1eeb50d5f7937f65a910203ae61430ff8b969012", "07787b9601c713695215a82d80a2be2bc6cab8f3", "4266effa841802ce224c8670a416546f5315f825", "6dfb7cba3ab7d5090b88cf23865c76742473e381", "1d43ad43caa2e8364e21bd7628c42b5d1f3501f6", "4954fa180728932959997a4768411ff9136aac81", "0c1d559b1d48fb706f8b73d69e951273fc0ed93b", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "092217c2267f6e0673590aa151d811e579ff7760", "155997067af4570041162a4b95e4ce7621e0d022", "0122e063ca5f0f9fb9d144d44d41421503252010", "0cad8fd5fad523343f225bab0453cc60ba2f0bd6", "1b6c14e7c835a9557b78aeef3ba723084b9e6a4c", "f24c311701019a2928da3b2b49d7aac5b85d83f8", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "3ae4e53bcaa8f949184dfd6118a85c79c01053e7", "9aa88a8a354f1d322e242376d27d0474e50252f8", "35b1443da32b27178ef4a3b4b9f539b742592f6b", "08df5c4b333837a324c39c299e87fe6609409260", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "0036adadc90e4826b2f7fc157752eea459070c32", "0c07d26f82f3c84371bfd18f8327ce0a2d00da81", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "080aebd2cc1019f17e78496354c37195560b0697", "64335c55eb06c6cc5372f91b6feaf192550e36fd", "0e0fb6a3ccbd9da9dc216913ef77d346515936c6", "084ac0e0aaf541a924163f9ac21810d74fb5d875", "02cbcc134dd322f13b5911be32b7d0c42837fae8", "2ec490f40efb00fa0a3b3a63306069d64630c340", "0ef7d9e618cbb507d69f8ebcdc60b8a1f3135bff", "119faad9498e2265b88190c932c74e31ecf6f959", "61202eb74184c0d75276954c93ce774c72f8035d", "d75eaae86525546d11acd0852fc467c9251e08ab", "8ca9bdbbceec273265c3e0228e247fe5f01871c5", "29e71c50af32836ac2eb5481f9f422489935a580", "3cacfad4443b31024fe836c06bdebb5d85c29fbb", "0144941d255dad89d3d90c2d131a15cc01df9829", "a058935fd019c2367fd32c16cd1ce6983a29aafb", "235ff7f15798f793af75fe40747e284fe48e8cde", "5a445856add9aaf891ee651e87da24587411c9d5", "ddce2f41414d35592dda0d12ea33bfac29fe983f", "d28a21316307e71a720ca1a2e6ba57ba87629fcd", "148e202fe4075f0f047240b927ee5c7e436b2341" ], "paperAbstract": "Stochastic gradient descent (SGD) is widely used by many machine learning algorithms. It is efficient for big data ap- plications due to its low algorithmic complexity. SGD is inherently serial and its parallelization is not trivial. How to parallelize SGD on many-core architectures (e.g. GPUs) for high efficiency is a big challenge. In this paper, we present cuMF_SGD, a parallelized SGD solution for matrix factorization on GPUs. We first design high-performance GPU computation kernels that accelerate individual SGD updates by exploiting model parallelism. We then design efficient schemes that parallelize SGD updates by exploiting data parallelism. Finally, we scale cuMF SGD to large data sets that cannot fit into one GPU's memory. Evaluations on three public data sets show that cuMF_SGD outperforms existing solutions, including a 64- node CPU system, by a large margin using only one GPU card.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078602" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/137f027dff643b2a5f7f64234ec1359924fb6ebb", "sources": [ "DBLP" ], "title": "CuMF_SGD: Parallelized Stochastic Gradient Descent for Matrix Factorization on GPUs", "venue": "HPDC", "year": 2017 }, "1380a3d1e5d48c8c11b5df2e0f9487f9c70ccb88": { "authors": [ { "ids": [ "2249089" ], "name": "Dmitry Petrashko" }, { "ids": [ "3284421" ], "name": "Ondrej Lhot\u00e1k" }, { "ids": [ "1795826" ], "name": "Martin Odersky" } ], "doi": "10.1145/3062341.3062346", "doiUrl": "https://doi.org/10.1145/3062341.3062346", "entities": [ "Compiler", "Intermediate representation", "Performance Evaluation", "Scala", "Time complexity" ], "id": "1380a3d1e5d48c8c11b5df2e0f9487f9c70ccb88", "inCitations": [ "35d8727f5e726177d8f12a4955524804c5d531cf" ], "journalName": "", "journalPages": "201-216", "journalVolume": "", "outCitations": [ "451ead65a7260f50e4fa083f5e6c43182812fa46", "64d2a65a7d559f9b05570fb0fea8bb4cccd83ae2", "fd68bcc41917ed0a72bbe1947bca91fe269cfe04", "476952721ead5b7ea2fa2ff2d4f39d5440fc9144", "26108abd43132aba1a2941f7278b81b23485860b", "e5d0a599b9b7c4345ae051dd3281e84d930edffe", "b87a3dee7ff0e8bd36508bb5f9db956d07cb7a27", "80939be20f813f0681bc377bfc1601df98ca2784", "ed39562cc6153117e079a409efbe00483affa40f", "17907e18f11b5ab7ae266e87008acead6d1943d8", "0b61a17906637ece5a9c5e7e3e6de93378209706", "61b0892e3457e3c2f7ef64c885a0e522e8961eb0", "5f4da1df76b8878bf8358ec24d6592a8008d2b0d", "eef0e0820ca3ef8cee957c89373527e8a73dcaaf", "6ccbf47ad15e52f7dbdaaa2b471b1847b5b475f4", "6d2712a243246434750317f1e2f05d3e31f2d717", "849ae30f02a14f7a06ea3801b1469bff13319c29", "67dc83a15c020b84403f1b6b52140965f11e4588", "e543c2c0a3d898ba48ba0f0d6930a242e2444e54", "f244331cd7c806de0138b7cd7417b6e9537ea247", "67f311151efe765e58c46d2548ef2594422fb393" ], "paperAbstract": "Production compilers commonly perform dozens of transformations on an intermediate representation. Running those transformations in separate passes harms performance. One approach to recover performance is to combine transformations by hand in order to reduce number of passes. Such an approach harms modularity, and thus makes it hard to maintain and evolve a compiler over the long term, and makes reasoning about performance harder. This paper describes a methodology that allows a compiler writer to define multiple transformations separately, but fuse them into a single traversal of the intermediate representation when the compiler runs. This approach has been implemented in a compiler for the Scala language. Our performance evaluation indicates that this approach reduces the running time of tree transformations by 35% and shows that this is due to improved cache friendliness. At the same time, the approach improves total memory consumption by reducing the object tenuring rate by 50%. This approach enables compiler writers to write transformations that are both modular and fast at the same time.", "pdfUrls": [ "http://plg.uwaterloo.ca/~olhotak/pubs/pldi17b.pdf", "http://doi.acm.org/10.1145/3062341.3062346", "https://infoscience.epfl.ch/record/228518/files/paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1380a3d1e5d48c8c11b5df2e0f9487f9c70ccb88", "sources": [ "DBLP" ], "title": "Miniphases: compilation using modular and efficient tree transformations", "venue": "PLDI", "year": 2017 }, "13c6f202ce5ef51cf79f7f90ffb24897f9daf1bc": { "authors": [ { "ids": [ "22511249" ], "name": "Carlo Di Giulio" }, { "ids": [ "2493219" ], "name": "Read Sprabery" }, { "ids": [ "1769233" ], "name": "Charles A. Kamhoua" }, { "ids": [ "1723424" ], "name": "Kevin A. Kwiat" }, { "ids": [ "1687256" ], "name": "Roy H. Campbell" }, { "ids": [ "32621916" ], "name": "Masooda N. Bashir" } ], "doi": "10.1109/CLOUD.2017.16", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.16", "entities": [ "Cloud computing", "Cloud computing security", "FedRAMP", "ISO/IEC 27001:2013", "Information assurance", "Interactive evolutionary computation", "Relevance", "Requirement" ], "id": "13c6f202ce5ef51cf79f7f90ffb24897f9daf1bc", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "50-57", "journalVolume": "", "outCitations": [ "17fc4b59817417cc968d2466b9f0cd44e6dcd6ce", "76b2c2661e699259a5b23d86373f9dd12e3250ea", "aa971eebdec4881ef490db2520e89e995e8696ab", "8753b5537342bd612af714703d9cb4a86bc71128", "972458f60cb44adb3f5ad1534b10cc59895f735a", "95e6d469f2ac3c12e9e368f06872916e2f6c1305", "b67dc496b84010aa4f2a0f909fb3cb4d36ba78a0", "e80f2032df4a1257ad350d73259a05f89c7123c0", "5ea0bad30d6df3cb29b57ce305a5f57b09484a78", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "9b194ae029c6711eb133716722a42ecffdb572d3", "52c2c050af5b32d4929b4b193967a3675d03aea0", "768b6372cb91b89eb9ceaa53c4e6365bc6f72816", "9f1fd4b1c912ff7226bcfa0ebde09a3951837a3f", "8372016fe38121358163c20f88e28fc0267e30b1", "d185265c926739a30c1c45976c44b4f0cdf11c37", "7f7137820048e0a1611e180d483754240da588c4", "438054be692ca71e824131d081c10e22c48794dc", "b488764b42b99ea5a47284c3245101e177c3bdbe" ], "paperAbstract": "The increasing relevance of information assurance in cloud computing has forced governments and stakeholders to turn their attention to Information Technology (IT) security certifications and standards. The introduction of new frameworks such as FedRAMP in the US and C5 in Germany is aimed to raise the level of protection against threats and vulnerabilities unique to cloud computing. However, our in-depth and systematic analyses reveals that these new standards do not bring a radical change in the realm of certifications. Results also shows that the newly developed standards share much of their basis with older, more consolidated standards such as the ISO/IEC 27001 and hence the need for determining the added value. In this study, we provide an overview of ISO/IEC 27001, C5, and FedRAMP while examining their completeness and adequacy in addressing current threats to cloud assurance. We question the level of protection they offer by comparing these three certifications alongside each other. We identify weaknesses in the three frameworks and highlight necessary improvements to meet the security requirements indispensable in relation to the current threat landscape.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.16", "http://assured-cloud-computing.illinois.edu/files/2018/01/Cloud-Standards-in-Comparison-Are-New-Security-Frameworks-Improving-Cloud-Security.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/13c6f202ce5ef51cf79f7f90ffb24897f9daf1bc", "sources": [ "DBLP" ], "title": "Cloud Standards in Comparison: Are New Security Frameworks Improving Cloud Security?", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "13d1da7777ac8908f1eed5c8f6fdb019f241104a": { "authors": [ { "ids": [ "37812412" ], "name": "Ethan Cecchetti" }, { "ids": [ "1732157" ], "name": "Andrew C. Myers" }, { "ids": [ "2208884" ], "name": "Owen Arden" } ], "doi": "10.1145/3133956.3134054", "doiUrl": "https://doi.org/10.1145/3133956.3134054", "entities": [ "Authorization", "Compiler", "Confidentiality", "Declassification", "Downgrade", "End-to-end principle", "Haskell", "Information flow", "Information flow (information theory)", "Malleability (cryptography)", "Non-interference (security)", "Programming language", "Requirement", "Security type system", "Semantic security", "The Glorious Glasgow Haskell Compilation System", "Type system" ], "id": "13d1da7777ac8908f1eed5c8f6fdb019f241104a", "inCitations": [], "journalName": "", "journalPages": "1875-1891", "journalVolume": "", "outCitations": [ "7230f80b75c774b878c70de2290ceea5d624d353", "481b2235481c598470a8e18bee0b67cfbe8e444e", "02de9e1fac573240757b98f2f2a0971693c4238d", "4ea1a23b31a0c3c6c63edb6d5e22943f3a214739", "599b0b00439c07121f314cc08b07cf74d9e50cca", "37358a79bdcdee2c8f227e9b599e2d742284b07e", "046c311cd974a454207c0199adbde18a395ee39c", "008a307e2531033fbd4435cb57972f195bd40e58", "97e91f222b575c5d1a8ccb07594a38402552a27e", "4a0420c0b6da9de5a13b0a4b1dcbfd25ed6f2a64", "a88565c1105ae30c4b0ac14876dcbc82d6916feb", "442aad5465423f556183f16ea2bf8438a6ff9daa", "565e0fb8862ffa3be1a7185e62f22d431d0bd5a5", "069bb1e2ffb2c1298d013a966d2d7e2c8ffb8709", "6db178ae20979e4a1c45c8993efd3f44a2bf3df2", "0025870ef15a8f2858ff4186329d4bde316e9e01", "bf6136bfdf454a6926f63b75d2be62ea687a3e11", "0b84fb0ec9739e04f9b0fcbe040718d9f735200f", "2c6533d714d8dc4d3f7faf418db93c38df642fea", "53a1f0afdec13f51133a2c7940ad13d7d70a0d39", "3725e79119ec188e35f755e55dc9e0e366ac4634", "5f2b22b77559ddb4f3734459d1ff66c58d22df12", "3201bf85bee9995aafa569c47669db463551e6cc", "be7536d9baaef7ccdbff845f8e98c136b4c80bb3", "22c2c3b6fc0e437da7ca05e31d7afd056e4f0e48", "125268a25397dd17fb3c7dbd4018114a972e4acb", "284e63d8c1aba2f31dbfd3e0af58ce66c9649c63", "274537bd5a77326d44bae3f99da8908a7f57c3f3", "2b6df21137f30d25494bb58521a6062f93e915f8", "29e45dd3c4a986f2b2d8036994e693ec332752d8", "109ee1e1d4f4a9595a78d1edb38862cdfc3b08bf", "05f9e468a2e0d9b6274e1750436df75f3fd67461", "3738060221a508ba4b3ae8df4eae043a726058d3", "4b23b5946ae3027c7b13a3fa20102641596def40", "151f2e8807fbcfb83e71e329b8367b7eaf33f4af", "75dabcc0ad68c5cd24f727414ff465ee204ed407", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "23400d3b5efed8ab1015d9aed496dabad707c991", "2eff25247a836717a706f137ca1e0f6488d2f1c5", "1d01390129ce2738a3529e442dac8d2c51fe499f", "03ad81f6276792a78312471429fc9495b89a1ffc" ], "paperAbstract": "Noninterference is a popular semantic security condition because it offers strong end-to-end guarantees, it is inherently compositional, and it can be enforced using a simple security type system. Unfortunately, it is too restrictive for real systems. Mechanisms for downgrading information are needed to capture real-world security requirements, but downgrading eliminates the strong compositional security guarantees of noninterference.\n We introduce nonmalleable information flow, a new formal security condition that generalizes noninterference to permit controlled downgrading of both confidentiality and integrity. While previous work on robust declassification prevents adversaries from exploiting the downgrading of confidentiality, our key insight is transparent endorsement, a mechanism for downgrading integrity while defending against adversarial exploitation. Robust declassification appeared to break the duality of confidentiality and integrity by making confidentiality depend on integrity, but transparent endorsement makes integrity depend on confidentiality, restoring this duality. We show how to extend a security-typed programming language with transparent endorsement and prove that this static type system enforces nonmalleable information flow, a new security property that subsumes robust declassification and transparent endorsement. Finally, we describe an implementation of this type system in the context of Flame, a flow-limited authorization plugin for the Glasgow Haskell Compiler.", "pdfUrls": [ "https://users.soe.ucsc.edu/~owen/publications/pdfs/nmifc_ccs17.pdf", "http://www.cs.cornell.edu/~ethan/papers/nmifc.pdf", "http://doi.acm.org/10.1145/3133956.3134054", "http://www.cs.cornell.edu/andru/papers/nmifc/nmifc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/13d1da7777ac8908f1eed5c8f6fdb019f241104a", "sources": [ "DBLP" ], "title": "Nonmalleable Information Flow Control", "venue": "CCS", "year": 2017 }, "13e82aa82b2b1b4e3b9e3f92081447f9ab07273b": { "authors": [ { "ids": [ "1735704" ], "name": "David Elsweiler" }, { "ids": [ "1752070" ], "name": "Christoph Trattner" }, { "ids": [ "3014661" ], "name": "Morgan Harvey" } ], "doi": "10.1145/3077136.3080826", "doiUrl": "https://doi.org/10.1145/3077136.3080826", "entities": [ "Consciousness", "High- and low-level", "Machine learning", "Online food ordering", "Predictive modelling", "Usability testing" ], "id": "13e82aa82b2b1b4e3b9e3f92081447f9ab07273b", "inCitations": [ "4f673332dbc6cda8c353c5b002c642ddbe621aa6", "90023358eb95964d7050aa0876e2183c9b1005b7", "383b694d9d7662b908afd3aef533098fe4087930", "1bc76186e2811a9b41be51a1ba1c3bbdbca7a789", "ba2b4f9272753995ba8d3a6f3ab93974ab16b9b2" ], "journalName": "", "journalPages": "575-584", "journalVolume": "", "outCitations": [ "ab1591e29a1c1609456a6705d34d19af8d3066e0", "fd4a5a29423aa53f3f931687f97907c57164a3ad", "f45ebbb76546d8f88a6633dc3e15aa1993020de7", "7e7343a5608fff1c68c5259db0c77b9193f1546d", "45935574f3b30fc6936abaa70cf403dab35d1b7b", "800df20e950ead3409928c5926c06a943e38d598", "5b17709765b0a32583a66f3b7d66db40862104e8", "33154afe16d4fa46f60af7d174da5f242eca00dd", "e63fd7ce499618116c7f89f2cc9cc5c33175cdb6", "724543c1c5eedff8077c0590f60c5027f4cf0ef7", "66e3dbe3ed9af13ad2d604d3e593412d8ab2d7da", "3f594a9ff1ef209749a999e37e6f7d812ab869c8", "29ebe465751c50c754226027f1b5c0d1588c01e7", "ec8a1d77b90daec91093c9ced62a1a0617d2d117", "2f278d1dab0f6e3939c747a2fc2a4cecdfc912b9", "66a2fca65fa8e3f1836ee46d2ac5f59e1980ee6d", "2d26bae27a4fcebbb0d3a56bc7c571b60ecd86fe", "f48978e6874440f766fc4e2b4734c50081d6b407", "86ec379e71c0a23117ce92859aa0e7a1919ca575", "7d35110932250f5d07fc3a38ed84027a1d824ae7", "157e2ae212303d9b9602983cd5fb033b473bd2a8", "40c50afef355a7792f266630b0cc5368a23c1a82", "d53cb267ca498ae1b057b97509a7e359cfdc8050", "100432a0b0bd32ddfebf7476c9b308b8f8409f62", "e2b5e40197afefc1a1fe54fff9b522ebe7e9c47e", "02ab3724a3b444d6be0cc386c2b7ca2d82075bcc" ], "paperAbstract": "By incorporating healthiness into the food recommendation / ranking process we have the potential to improve the eating habits of a growing number of people who use the Internet as a source of food inspiration. In this paper, using insights gained from various data sources, we explore the feasibility of substituting meals that would typically be recommended to users with similar, healthier dishes. First, by analysing a recipe collection sourced from Allrecipes.com, we quantify the potential for finding replacement recipes, which are comparable but have different nutritional characteristics and are nevertheless highly rated by users. Building on this, we present two controlled user studies (n=107, n=111) investigating how people perceive and select recipes. We show participants are unable to reliably identify which recipe contains most fat due to their answers being biased by lack of information, misleading cues and limited nutritional knowledge on their part. By applying machine learning techniques to predict the preferred recipes, good performance can be achieved using low-level image features and recipe meta-data as predictors. Despite not being able to consciously determine which of two recipes contains most fat, on average, participants select the recipe with the most fat as their preference. The importance of image features reveals that recipe choices are often visually driven. A final user study (n=138) investigates to what extent the predictive models can be used to select recipe replacements such that users can be ``nudged'' towards choosing healthier recipes. Our findings have important implications for online food systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080826" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/13e82aa82b2b1b4e3b9e3f92081447f9ab07273b", "sources": [ "DBLP" ], "title": "Exploiting Food Choice Biases for Healthier Recipe Recommendation", "venue": "SIGIR", "year": 2017 }, "1418c58ccf2f85461dfff22f1a7ac0ce27e44f7b": { "authors": [ { "ids": [ "2861579" ], "name": "Sumayah A. Alrwais" }, { "ids": [ "39591879" ], "name": "Xiaojing Liao" }, { "ids": [ "33479991" ], "name": "Xianghang Mi" }, { "ids": [ "1722767" ], "name": "Peng Wang" }, { "ids": [ "1739993" ], "name": "Xiaofeng Wang" }, { "ids": [ "39845983" ], "name": "Feng Qian" }, { "ids": [ "2474340" ], "name": "Raheem A. Beyah" }, { "ids": [ "1703426" ], "name": "Damon McCoy" } ], "doi": "10.1109/SP.2017.32", "doiUrl": "https://doi.org/10.1109/SP.2017.32", "entities": [ "Address space", "Agile software development", "Autonomous system (Internet)", "Bulletproof hosting", "Cloud computing", "Ecosystem", "Statistical classification", "Strategic management", "Terabyte", "Underground" ], "id": "1418c58ccf2f85461dfff22f1a7ac0ce27e44f7b", "inCitations": [ "0a71dda39e97c9a4b8c4c88a135b9cda5bae588b", "061c38987c85b28d072fd0c2f94089b5992135cd", "287750d6ec07d5867be4bf489dba7bba14d51fe4", "71f375aa056ab0b63c4ee678ba2e51d54f745d35" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "805-823", "journalVolume": "", "outCitations": [ "7886e2c04665a77088bbe64512dbe62b4e71fc67", "022d706692916789172fec132e04afea4ca0753c", "365390ced4790d136292829b7b6a2eec61b57eeb", "38070061ca053c36c352bad794c6a07b11a7f410", "a7a624b5d5118f57fa8f582c29efb2f119a8bc0a", "77e3b3ef75b33e20039f5c56e25436974bf70c15", "24e6cf0796237f21c780a3f0c996817f57b3a1bd", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "fe716e3207efe82886726f7012a6c3e5b977a3fa", "2ff9c10a0a8f43306f3a0492f8d6eca744d4e7c7", "2242e52c2d3c4a90cfe546a6610ae0067afaed99", "20435852a429bdfc5e716ee0ba91e9caec33128b", "0224d4ed379f82dd0aade68f40410967dec6c6ff", "439f1c4b36cb4aef38be8661e2c270fee9f921f3" ], "paperAbstract": "BulletProof Hosting (BPH) services provide criminal actors with technical infrastructure that is resilient to complaints of illicit activities, which serves as a basic building block for streamlining numerous types of attacks. Anecdotal reports have highlighted an emerging trend of these BPH services reselling infrastructure from lower end service providers (hosting ISPs, cloud hosting, and CDNs) instead of from monolithic BPH providers. This has rendered many of the prior methods of detecting BPH less effective, since instead of the infrastructure being highly concentrated within a few malicious Autonomous Systems (ASes) it is now agile and dispersed across a larger set of providers that have a mixture of benign and malicious clients. In this paper, we present the first systematic study on this new trend of BPH services. By collecting and analyzing a large amount of data (25 snapshots of the entire Whois IPv4 address space, 1.5 TB of passive DNS data, and longitudinal data from several blacklist feeds), we are able to identify a set of new features that uniquely characterizes BPH on sub-allocations and that are costly to evade. Based upon these features, we train a classifier for detecting malicious sub-allocated network blocks, achieving a 98% recall and 1.5% false discovery rates according to our evaluation. Using a conservatively trained version of our classifier, we scan the whole IPv4 address space and detect 39K malicious network blocks. This allows us to perform a large-scale study of the BPH service ecosystem, which sheds light on this underground business strategy, including patterns of network blocks being recycled and malicious clients being migrated to different network blocks, in an effort to evade IP address based blacklisting. Our study highlights the trend of agile BPH services and points to potential methods of detecting and mitigating this emerging threat.", "pdfUrls": [ "https://www.cs.indiana.edu/~fengqian/paper/bulletproof_sp17.pdf", "http://damonmccoy.com/papers/alrwais2017under.pdf", "https://doi.org/10.1109/SP.2017.32", "http://iisp.gatech.edu/sites/default/files/images/under_the_shadow_of_sunshine.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1418c58ccf2f85461dfff22f1a7ac0ce27e44f7b", "sources": [ "DBLP" ], "title": "Under the Shadow of Sunshine: Understanding and Detecting Bulletproof Hosting on Legitimate Service Provider Networks", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "14206f58f77c80fa6ffd7ca76326a8df03e7588a": { "authors": [ { "ids": [ "2166431" ], "name": "Primal Wijesekera" }, { "ids": [ "2606009" ], "name": "Arjun Baokar" }, { "ids": [ "3407577" ], "name": "Lynn Tsai" }, { "ids": [ "2405771" ], "name": "Joel Reardon" }, { "ids": [ "2645852" ], "name": "Serge Egelman" }, { "ids": [ "4019963" ], "name": "David A. Wagner" }, { "ids": [ "1756107" ], "name": "Konstantin Beznosov" } ], "doi": "10.1109/SP.2017.51", "doiUrl": "https://doi.org/10.1109/SP.2017.51", "entities": [ "Field research", "Mobile operating system", "Operating system", "Privacy", "Smartphone", "Word error rate" ], "id": "14206f58f77c80fa6ffd7ca76326a8df03e7588a", "inCitations": [ "8b33223ee24dd81fa635d8c6ff488b6373b74113", "0339584e6c0b073e2f62383a7a76d448766143f1", "02498b709402f20642bf857466c7f6742435f030", "982327dd1cfd20bda2344068c82dd95155134c5a", "4ff86de963c9bd29c92adca8205db2162e1c6745" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "1077-1093", "journalVolume": "", "outCitations": [ "332e0791d27ee5cc569f0b593e91f2f011ba5679", "0c3b061dff00c9689970ab5b7fc53366cb4e4cf4", "078e7c470ecfbecaf557c9c29fe1cb679a52f937", "14a23ef72ceb106e4c95df6e396c6291c11a87c9", "29be05f17c8906d70659fe1110758a59d39d2a08", "8ffc32565380d35bcc68f175cb803918b56517a7", "03a613951421cf67237d5278d6bf3702a26da9aa", "0415f6b12202799e0a661138ba2d93e0a3c1ecfc", "1b9fae8255fda28e4adeb96a36f8e907e8aac6f9", "35716646b2ce09a1dd0dd584b5adc7242df0beff", "177356d35e6d2db31760d487614ecaf856f398bd", "30bf2252a83fbd566a7c7011cd56d82c32832aef", "29227faf3a23511acacc814a61077f5b24db9576", "43e2e4963ed8774c8849b682d8630731887bf86d", "9140a579de7da753a095356b5da24804af28e8f7", "53bf5903d05289f476e02107f17c655481ba8e39", "39a651ace163e7741bc98e266201afe83ad63219", "19f983e765a090d25f0ac4fa46873211f5718627", "42ba90a7b1a82f2572b1533caefb7cffb746b6ec", "761d83c872ef314acf2915d41a7f49bbdeeec5ee", "c3e3aaa6b186478a97135f96f7e5254a399f7e26", "2200f562feb2e7500a91c3a9dcc4cf40f05c50de", "0a4f96deacc1991ff6a8e8dac9e43963d0ddb485", "0bc0c1b27dc9431a92dfbc538c4b866b06a097f3", "1256d1a498f69c50a575020996b46938cb1d3208", "8c9c760b2079de26cf2aea31d128ff9054a3e6b1", "27a7497a46c9597b35d2120c224061423ff3f311", "0e916511718ab23bd3428cd5c264c7486efdf595", "6f006a3895dd8fb24f83235a67f2fe72418aa800", "4699893ea1226cbd1ed8433d329a9cc9a7de6d3c", "1f211c3198caf2e5ca0c972652424103c4fff6ab", "6b04fba371a8e50a9c9b158a1645f4d6425d5cff", "230863a379c8f0ee1d6fc2d36c0f74374ea53ae6", "2fe33f4b7c75d9e29bf80e7bdd719205cfafc3c9", "263519c5a43fbf981da5ba873062219c50fdf56d", "7204c8ee25517c5d15e32d7d9242e36002afcb37", "324fcfd190f3a755ee25955e39be4f94072073cc", "023f23c300804754753cb11db51fb7f582556ab7", "50749186978deefcae8e528dcf4b3c5b7d9e5ba3" ], "paperAbstract": "Current smartphone operating systems regulate application permissions by prompting users on an ask-on-first-use basis. Prior research has shown that this method is ineffective because it fails to account for context: the circumstances under which an application first requests access to data may be vastly different than the circumstances under which it subsequently requests access. We performed a longitudinal 131-person field study to analyze the contextuality behind user privacy decisions to regulate access to sensitive resources. We built a classifier to make privacy decisions on the user's behalf by detecting when context has changed and, when necessary, inferring privacy preferences based on the user's past decisions and behavior. Our goal is to automatically grant appropriate resource requests without further user intervention, deny inappropriate requests, and only prompt the user when the system is uncertain of the user's preferences. We show that our approach can accurately predict users' privacy decisions 96.8% of the time, which is a four-fold reduction in error rate compared to current systems.", "pdfUrls": [ "https://blues.cs.berkeley.edu/wp-content/uploads/2018/01/sp17-contextual-integrity.pdf", "https://doi.org/10.1109/SP.2017.51", "http://lersse-dl.ece.ubc.ca/record/318/files/wijesekera_oakland2017.pdf?subformat=pdfa", "http://lersse-dl.ece.ubc.ca/record/318/files/wijesekera_oakland2017.pdf", "https://www.ftc.gov/system/files/documents/public_comments/2016/09/00018-129026.pdf", "http://arxiv.org/abs/1703.02090", "https://arxiv.org/pdf/1703.02090v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/14206f58f77c80fa6ffd7ca76326a8df03e7588a", "sources": [ "DBLP" ], "title": "The Feasibility of Dynamically Granted Permissions: Aligning Mobile Privacy with User Preferences", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "142a51f57f3066efbb52d84ba1c43b068dc585b9": { "authors": [ { "ids": [ "2991150" ], "name": "Ngai Meng Kou" }, { "ids": [ "1698571" ], "name": "Yan Li" }, { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "1713638" ], "name": "Leong Hou U" }, { "ids": [ "1735422" ], "name": "Zhiguo Gong" } ], "doi": "10.1145/3035918.3035953", "doiUrl": "https://doi.org/10.1145/3035918.3035953", "entities": [ "Crowdsourcing", "Database", "Estimation theory", "Experiment", "Money" ], "id": "142a51f57f3066efbb52d84ba1c43b068dc585b9", "inCitations": [ "4d96002263faa311fa61d7fefda7361061356f26" ], "journalName": "", "journalPages": "1415-1430", "journalVolume": "", "outCitations": [ "0ee10b695a90d7699b0518436d9bac53410c2d0b", "3a5066bcd59f81228876b6bf7d5410c63a82f173", "7e402f75d09b2de259e14e07cdb88d40e3128e7c", "8b58ef72e680bfbceeae3baaa01487ac00aead48", "d8e320d7bacb68a13c90f8913cbf0c38fe4f80e0", "32be67f8fb5b21554b950fb49f3c81098d5d23c7", "146e33f6ac7ee643af0a6a10f78a5273e6dfad86", "162c68e07814704109122d61771c1ce067e95b86", "49ceba7f32b3d440f20b7b35d4c7462016666ef9", "5611660a5f09be040e2a1c5c942fac88bd1e5d0e", "8490234d79b47e459824dcf87c1e288211a3c964", "2220feec76a17e509a58abf8c742ea9b7866a99e", "4033104e3a37324df023fec7e95d852e962617de", "a96c51a903d244b6006a2999269f5e9931e25403", "01bc8e0e0048255596ec56498fc6b80e4340d244", "5f2c3d94ef509af1fc49dbecf5c69f2663dbc827", "35c71faec5506d8fe79bb997f6e9e3743dc436a6", "16c36a0ab390553d77877ee634607899e4eececf", "eb82d3035849cd23578096462ba419b53198a556", "1b264b6547cda91a7d1599b875669c7853974ad9", "199dcbb1e5287eedb458c867b171cc83c06b0d2a", "37ae295ba271939ca67a6d2ddc60a256829df9cf", "08f51a9138458f667f0c00d40b6a820c451c7d36", "2ae7aa1572cb39539b9b4c558f2bd7836cae151c", "5ee09c89bdcf9b1aa7b62e78549f17a6774417a7", "8a0b267493ac9510e47ceb4bcebb6d202b2f89a5", "f585d28de2c8bf0ab1f149d1cf9becb8e77b2af0", "58ce51c7e7875403904741390e1c81079f97441a", "a21398c57a2859b492602d854cf9ccfb36ba9541", "64edd2c5c41e856695e1dcb950c0512c3a87edec" ], "paperAbstract": "Crowdsourced query processing is an emerging processing technique that tackles computationally challenging problems by human intelligence. The basic idea is to decompose a computationally challenging problem into a set of human friendly microtasks (e.g., pairwise comparisons) that are distributed to and answered by the crowd. The solution of the problem is then computed (e.g., by aggregation) based on the crowdsourced answers to the microtasks. In this work, we attempt to revisit the crowdsourced processing of the top-k queries, aiming at (1) securing the quality of crowdsourced comparisons by a certain confidence level and (2) minimizing the total monetary cost. To secure the quality of each paired comparison, we employ two statistical tools, Student's t-distribution estimation and Stein's estimation, to estimate the confidence interval of the underlying mean value, which is then used to draw a conclusion to the comparison. Based on the pairwise comparison process, we attempt to minimize the monetary cost of the top-k processing within a Select-Partition-Rank framework. Our experiments, conducted on four real datasets, demonstrate that our stochastic method outperforms other existing top-k processing techniques by a visible difference.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035953" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/142a51f57f3066efbb52d84ba1c43b068dc585b9", "sources": [ "DBLP" ], "title": "Crowdsourced Top-k Queries by Confidence-Aware Pairwise Judgments", "venue": "SIGMOD Conference", "year": 2017 }, "144bc3accd625ed750593db4043d3e7334a659e5": { "authors": [ { "ids": [ "35625667" ], "name": "Joanna Asia Biega" }, { "ids": [ "40075164" ], "name": "Rishiraj Saha Roy" }, { "ids": [ "1751591" ], "name": "Gerhard Weikum" } ], "doi": "10.1145/3077136.3080830", "doiUrl": "https://doi.org/10.1145/3077136.3080830", "entities": [ "Antivirus software", "Experiment", "Human\u2013computer interaction", "Interaction", "Online service provider", "Personalization", "Privacy", "Recommender system", "User profile", "Web search engine" ], "id": "144bc3accd625ed750593db4043d3e7334a659e5", "inCitations": [], "journalName": "", "journalPages": "675-684", "journalVolume": "", "outCitations": [ "3957270267c2bba0ac00ab3c4461f0c47cfd95c3", "6a5aaba6eb1ca88f2937df276f13025db4823205" ], "paperAbstract": "Online service providers gather vast amounts of data to build user profiles. Such profiles improve service quality through personalization, but may also intrude on user privacy and incur discrimination risks. In this work, we propose a framework which leverages solidarity in a large community to scramble user interaction histories. While this is beneficial for anti-profiling, the potential downside is that individual user utility, in terms of the quality of search results or recommendations, may severely degrade. To reconcile privacy and user utility and control their trade-off, we develop quantitative models for these dimensions and effective strategies for assigning user interactions to Mediator Accounts. We demonstrate the viability of our framework by experiments in two different application areas (search and recommender systems), using two large datasets.", "pdfUrls": [ "http://people.mpi-inf.mpg.de/~jbiega/papers/privacy_solidarity_sigir2017.pdf", "http://people.mpi-inf.mpg.de/~rsaharo/sigir17preprint_jabrsrgw.pdf", "http://doi.acm.org/10.1145/3077136.3080830" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/144bc3accd625ed750593db4043d3e7334a659e5", "sources": [ "DBLP" ], "title": "Privacy through Solidarity: A User-Utility-Preserving Framework to Counter Profiling", "venue": "SIGIR", "year": 2017 }, "1452e50e9bb6452fede80d3365104ba0da6e4058": { "authors": [ { "ids": [ "3443131" ], "name": "Omer Y. Adam" }, { "ids": [ "1746054" ], "name": "Young Choon Lee" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" } ], "doi": "", "doiUrl": "", "entities": [ "Hypervisor", "Program optimization", "Python", "Quality of service", "Service-level agreement" ], "id": "1452e50e9bb6452fede80d3365104ba0da6e4058", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "110-119", "journalVolume": "", "outCitations": [ "0a4ba0904a7ca0f3ca4670ccba5cff4ac240afea", "011759094878369b86c8caceb87c089d0812faf2", "0b56c5c990051e879d341671d85408fbf519c7c8", "49987abca947745b97ce67fcb63534bae913b176", "067c7857753e21e7317b556c86e30be60aa7cac0", "8306554d07fdad9eca50096e2c22c543c577291c", "1bfaed6a3ae32c39f5b9a61fa528a8d23ff48801", "20926778c2725c7ed99efb124abba6e73c26c0fd", "64dded1957103cd4de7ad8ae090b92cc92aa6e54", "dbcdb4c402756b2b5ac910b9eb17ddb412290d16", "58cb5bac840de03a2de6a08019949996fbb91d6f", "0bcf41ad2fed07d0e1bca8eb15c082d5cf498ee0", "9e98d529d158e2230d722f497fbc36373eaa8583", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "2e72178091b2ca445f46200dcba71a53417b69eb", "4581948531998d5e5f23c131081ea0cdd9066bfe" ], "paperAbstract": "Consolidating applications of conflicting service level objectives (SLOs) to share virtualized resources in cloud datacenters requires efficient resource management to ensure overall high Quality-of-Service (QoS). Applications of different performance targets often exhibit different resource demands. Thus, it is not trivial to translate individual application SLOs to corresponding resource shares in a shared virtualized environment to meet performance targets. In this paper, we present CtrlCloud, a performance-aware resource controlling system, that adaptively allocates resources, with a resource-share controller and an allocation optimization model. The controller automatically adapts resource demands based on performance deviations, while the optimization model resolves conflicts in resource demands from multiple co-located applications based on their ongoing performance achieved. We implement a proof-of-concept prototype of CtrlCloud in Python on top of Xen hypervisor. Our experimental results indicate that CtrlCloud can optimize allocations of CPU resources across multiple applications to maintain the 95th percentile latency within predefined SLO targets. CtrlCloud also provides QoS differentiation and yet fulfilling of CPU share demands from applications is maximized given resource availability. We further compare CtrlCloud against two other resource allocation methods commonly used in current clouds. CtrlCloud improves resource utilization by allocating resource shares optimal to 'actual needs' as it employs share-performance online modeling.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101127" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1452e50e9bb6452fede80d3365104ba0da6e4058", "sources": [ "DBLP" ], "title": "CtrlCloud: Performance-Aware Adaptive Control for Shared Resources in Clouds", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "1453a41861ae423b8375e29529be3ef88e6751e2": { "authors": [ { "ids": [ "39256010" ], "name": "Edd Barrett" }, { "ids": [ "27019192" ], "name": "Carl Friedrich Bolz-Tereick" }, { "ids": [ "39610563" ], "name": "Rebecca Killick" }, { "ids": [ "34265412" ], "name": "Sarah Mount" }, { "ids": [ "1679001" ], "name": "Laurence Tratt" } ], "doi": "10.1145/3133876", "doiUrl": "https://doi.org/10.1145/3133876", "entities": [ "Compiler", "Dynamic compilation", "Just-in-time compilation", "Machine code", "Steady state", "Virtual machine" ], "id": "1453a41861ae423b8375e29529be3ef88e6751e2", "inCitations": [ "bf5fdf89e305ea7fc36efd71fbde19bb48d08d3d", "7952cec7c8a805ad9396100c0e6075775c4f9247", "f7b03863a9e16adfe1de13cef55f5fdea12a7bd9", "bbd8f8e111a738dbb4cd9c9fecdf411e4d4a33da", "16d0e0e40fedb965b9f47f66a87b6ade510faba5", "8b69691eef39fcdc49b4eb6a628bd90090a13621", "48b311805241c9055aac08e545a592a05639ee07", "ee0b3bad0aa922146b57e82048f3a2b5dd12f991" ], "journalName": "PACMPL", "journalPages": "52:1-52:27", "journalVolume": "1", "outCitations": [ "687bbce9a25c6e3617383049eb316e888af29911", "40b491d7b820783a79cfaa77f15b9400c72e54a7", "f72be76683ad0b6076e840fc2469dfc2d4741d22", "1144370d5899f9826c37db7dc1a63711262f6be1", "75a739c1dd74835c519e9b701cd1e60e38fc0b27", "160ad871b437c95e2f5d89b649a8392ad711cf8c", "26d04cad772da026fffe5659e0bb59c94d70d874" ], "paperAbstract": "Virtual Machines (VMs) with Just-In-Time (JIT) compilers are traditionally thought to execute programs in two phases: the initial warmup phase determines which parts of a program would most benefit from dynamic compilation, before JIT compiling those parts into machine code; subsequently the program is said to be at a steady state of peak performance. Measurement methodologies almost always discard data collected during the warmup phase such that reported measurements focus entirely on peak performance. We introduce a fully automated statistical approach, based on changepoint analysis, which allows us to determine if a program has reached a steady state and, if so, whether that represents peak performance or not. Using this, we show that even when run in the most controlled of circumstances, small, deterministic, widely studied microbenchmarks often fail to reach a steady state of peak performance on a variety of common VMs. Repeating our experiment on 3 different machines, we found that at most 43.5% of symmetric encryption schemes. At CCS'15, Ateniese, Magri, and Venturi extended this model to allow the attackers to work in a fully-adaptive and continuous fashion and proposed subversion attacks against digital signature schemes. Both papers also showed the impossibility of ASAs in cases where the cryptographic tools are deterministic. Also at CCS'15, Bellare, Jaeger, and Kane strengthened the original model and proposed a universal ASA against sufficiently random encryption schemes. In this paper we analyze ASAs from the perspective of steganography - the well known concept of hiding the presence of secret messages in legal communications. While a close connection between ASAs and steganography is known, this lacks a rigorous treatment. We consider the common computational model for secret-key steganography and prove that successful ASAs correspond to secure stegosystems on certain channels and vice versa. This formal proof allows us to conclude that ASAs are stegosystems and to \"rediscover\" several results concerning ASAs known in the steganographic literature.", "pdfUrls": [ "https://arxiv.org/pdf/1708.06199v2.pdf", "http://arxiv.org/abs/1708.06199", "http://doi.acm.org/10.1145/3133956.3133981", "https://arxiv.org/pdf/1708.06199v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/14c2ea2fade3324e72bdad6af5bdf5999826248e", "sources": [ "DBLP" ], "title": "Algorithm Substitution Attacks from a Steganographic Perspective", "venue": "CCS", "year": 2017 }, "14d341667c071ad3a4c8729e39c8d38b36e690e5": { "authors": [ { "ids": [ "27094060" ], "name": "Jose Rocher-Gonzalez" }, { "ids": [ "2749895" ], "name": "Jes\u00fas Escudero-Sahuquillo" }, { "ids": [ "34712227" ], "name": "Pedro Javier Garc\u00eda" }, { "ids": [ "1761901" ], "name": "Francisco J. Quiles" } ], "doi": "10.1109/HOTI.2017.16", "doiUrl": "https://doi.org/10.1109/HOTI.2017.16", "entities": [ "Algorithm", "Deterministic routing", "Experiment", "Fat tree", "Hogging and sagging", "Interconnection", "Network congestion", "Network performance", "Network switch", "Network topology", "Network traffic control", "Routing", "Simulation", "Stock and flow", "Turing machine equivalents" ], "id": "14d341667c071ad3a4c8729e39c8d38b36e690e5", "inCitations": [ "9a72f27c8d96492dc54e240864b83a42352afdb1" ], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "65-72", "journalVolume": "", "outCitations": [ "0aa2d7b5ce401f25651fce2d420380f76774ce63", "4b4ff870f731bf298479392694a3c17225a83768", "a86eb622eaaae24053a158a857624470af790bb6", "3d15d0b71d91ed863be3ecb552e0908a235d4f45", "506027ee1a7754aaeb3f6c5dfbd94a1a34179095", "c22cd78260126ea8e0183c23aeb9a2ec928658e3", "f57ac7f53438b2877022125bac957fda2bb2a97b", "565be173ed71b3273ec37312dd4010ae07cf80f2", "f6402c54f14fe3a399f90577cb0a477a3d5c978d", "552e9a91551e1e7ff284c21c46a4b7d5dea69bfb", "62c583a4b52f7b8a4b03581046786cb9791e003e", "18a8ab664b3ee23504c302640e5792202bafe401", "4a686a5abe526efbaf89dec6ce36fa7ada6e3c15", "54faa156e51c1910e4a1f84227ff4fbd4b1958b4", "a7e0263fc30a8eb68cae023ab086b7967e89c5a3", "3a4cc571b546a72b990e30b9309ccce79230a98b", "0fd211c5275a59827e3eabcf663a79e5f90c241f", "c080810ae3dff3bf47305e56328418cdfab83592", "73f254cd3b49a970593300cd4cad4e5e6c1e8318", "00b00ff9bc08868bc29fad61ba8b849ecd0261f9", "24ac696e598717e4752af563c9a477b22c29fefd", "1958f7bde16e8a57fd0667ca12bdcf50fd7104fc", "622ccdaec9b892ac8eae9792445b1d256a64a5bb" ], "paperAbstract": "In High-Performance Computing (HPC) systems, the design of the interconnection network is crucial. Indeed, the network topology, the switch architecture and the routing scheme determine the network performance and ultimately the system one. As the number of endnodes in HPC systems grows, and the supported applications become increasingly demanding for communication, the use of techniques to deal with network congestion and its negative effects gains importance. For that purpose, routing schemes such as adaptive or oblivious try to balance the network traffic in order to prevent and/or eliminate congestion. On the other hand, there are deterministic routing schemes that balance the number of paths per link with the aim of reducing the head-of-line blocking derived from congestion situations. Furthermore, other techniques to deal with congestion are based on queuing schemes. This approach is based on storing separately different packet flows at the ports buffers, so that the head-of-line blocking and/or buffer-hogging are reduced. Existing queuing schemes use different policies to separate flows, and they can be implemented in different ways. However, most queuing schemes are often used and designed assuming that the network is configured with deterministic routing, while actually they could be combined also with adaptive or oblivious routing.This paper analyzes the behavior of different queuing schemes under different routing algorithms: deterministic, adaptive or oblivious. We focus on fat-tree networks, configured with the most common routing algorithms of each type suitable for that topology. In order to evaluate these configurations, we have run simulation experiments modeling large fat-trees built from switches with radices available in the market, and supporting several queuing schemes. The experiments results show how different the performance of the queuing schemes may be when combined with either deterministic or oblivious/adaptive routing. Indeed, from these results we can conclude that some combinations of queuing schemes and routings are counterproductive.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/14d341667c071ad3a4c8729e39c8d38b36e690e5", "sources": [ "DBLP" ], "title": "On the Impact of Routing Algorithms in the Effectiveness of Queuing Schemes in High-Performance Interconnection Networks", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "14e6b6a9745261166d8a1c16b481e336e812f924": { "authors": [ { "ids": [ "39076993" ], "name": "James Wagner" }, { "ids": [ "6850164" ], "name": "Alexander Rasin" }, { "ids": [ "40654705" ], "name": "Tanu Malik" }, { "ids": [ "40369971" ], "name": "Karen Hart" }, { "ids": [ "7939230" ], "name": "Hugo Jehle" }, { "ids": [ "39168571" ], "name": "Jonathan Grier" } ], "doi": "", "doiUrl": "", "entities": [ "Apriori algorithm", "Cybercrime", "Database", "Database activity monitoring", "Database forensics", "Experiment", "Information sensitivity" ], "id": "14e6b6a9745261166d8a1c16b481e336e812f924", "inCitations": [ "9960845c3289c6047704b6ff268301c07a0c19e4", "3881d52fbd43098655f15f501a8de55550776f1e", "19ef4cdb4d1187d546b860646b1184645f2dfe8d" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0d67c26a9e113b8c4af978db3354fc0d0fc015d1", "0ad160b7f96cf0306e481b6055e1c260f33596e4", "dca94b7a6ed075d26d8804f2fb9fbfb2f9fd2bde", "180477d8f809745da689372715c225ef5d3c098d", "42a587b98d30aab0884035261536612bc93a0f7e", "fe094a0592ecc610efb7eafd1049b561da6d2efb", "5b40fda490cca073e47f16a36d22c0b6e71893a2", "e27211b54b7a7a999835726e628728618575c6af", "40f33d9b4872a7463c4e9a17ef601e0b1d0f972d", "c6679987a9b5e8672bec243caaf7b3f4ed7fca55" ], "paperAbstract": "The increasing use of databases in the storage of critical and sensitive information in many organizations has lead to an increase in the rate at which databases are exploited in computer crimes. While there are several techniques and tools available for database forensics, they mostly assume apriori database preparation, such as relying on tamper-detection software to be in place or use of detailed logging. Investigators, alternatively, need forensic tools and techniques that work on poorly-configured databases and make no assumptions about the extent of damage in a database. In this paper, we present DBCarver, a tool for reconstructing database content from a database image without using any log or system metadata. The tool uses page carving to reconstruct both query-able data and non-queryable data (deleted data). We describe how the two kinds of data can be combined to enable a variety of forensic analysis questions hitherto unavailable to forensic investigators. We show the generality and efficiency of our tool across several databases through a set of robust experiments. CCS Concepts \u2022Security and privacy \u2192 Information accountability and usage control; Database activity monitoring;", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p128-wagner-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/14e6/b6a9745261166d8a1c16b481e336e812f924.pdf", "s2Url": "https://semanticscholar.org/paper/14e6b6a9745261166d8a1c16b481e336e812f924", "sources": [ "DBLP" ], "title": "Database Forensic Analysis with DBCarver", "venue": "CIDR", "year": 2017 }, "14ea23fe5d2ed2bc7d93e9f3e60e5d18ca209568": { "authors": [ { "ids": [ "3164813" ], "name": "Yu-Ping Liu" }, { "ids": [ "1685169" ], "name": "Ding-Yong Hong" }, { "ids": [ "1726584" ], "name": "Jan-Jan Wu" }, { "ids": [ "2584163" ], "name": "Sheng-Yu Fu" }, { "ids": [ "1741913" ], "name": "Wei-Chung Hsu" } ], "doi": "10.1109/PACT.2017.15", "doiUrl": "https://doi.org/10.1109/PACT.2017.15", "entities": [ "ARM architecture", "AVX-512", "Advanced Vector Extensions", "Binary translation", "Parallel computing", "Performance per watt", "Register allocation", "SIMD", "Superword Level Parallelism", "X86" ], "id": "14ea23fe5d2ed2bc7d93e9f3e60e5d18ca209568", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "343-355", "journalVolume": "", "outCitations": [ "8705177ae97c80d29d6c976efc7624edc69112ea", "0a5033c0b2bb2421f8c46e196fb0fb1464a636b6", "10bd3ed1e85e59956382c8634f13088be058e93c", "0ac6b674dbd4c3f552ee8b2b79a4179ee330b785", "0a65844b2e318305c7031eb53cb306efe7763d22", "2194c3460ab71f3826db00b045b2ae590c753319", "0e65407ea4bea5f92860752c3056a82b7ed58cd1", "4aebded56ed41b8cfdeac42f25dc6437155bd5e7", "3ad8e1308849167d96355b9b1906994eb92283a3", "46dd6b8867a08bf8796963c937ccd3b09744f38e", "15bccb4ffd4f2f44fa0fae5cdbe85afc362855f6", "5afe81cf448c928d23fc0cf9e385c3febe92fda0", "0653e2ed9f683868cb4539eb8718551242834f6b", "2c0d13841a1c0b94ca7730a5bcbe443bd780c151", "2960c89331eb7afa86584792e2e11dbf6a125820", "6d12aea56165acf3715e2c82b5f560e48359366d", "09dbf94357b21ad14d2897282703ee99ae06a35e", "5eea7073acfa8b946204ff681aca192571a1d6c2", "6430084d77aaddbc692612b42654e73040b93a7b", "0cfd44531f917a1819346fc053e8f8662e3635bf", "58f1d8e4c3588fd4bb7d58276ef14bafa603aaa9", "0fd1f536dff3991738cfacb1acedf1a8e695cc9e", "5af4d8a14510af9ffa9832d7a4bb9a07b99f7ebb", "2549f9b455f75ebaaa3736208e319847140b705e", "039449e900b28ae63ef515063e59642fe501aed7", "06c0f336042ac299430728be10dd63bd57680081", "6b135a7d4fb3e2d205956c2f60ffd2695c2363c7", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "35e843d7d4b35d640e274b123b78ffeb919a4638", "b6d4a02cc699f081821ee4b84765e56c716953ce", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "49c04aea5a4038c911e3e0733371940b9bc7e74c", "88b42246806890ea88d2d64621c2ac828541be90", "64285179b8bdf7861aef8719a4aa49704aa16912", "a5ff2ef2177a466b8b9c8dec93d6b2d60d7a17a4", "60d607c26019503cc6521b45aed3bff2044c2cf8", "0856f6f40b889dba559f19654834114e9f469760", "d72023b94bd41915d163e13f3be7bc386280de29", "497e498e6031de6ebb567ffde85243237e91a387" ], "paperAbstract": "Processor manufacturers have adopted SIMD for decades because of its superior performance and power efficiency. The configurations of SIMD registers (i.e., the number and width) have evolved and diverged rapidly through various ISA extensions on different architectures. However, migrating legacy or proprietary applications optimized for one guest ISA to another host ISA that has fewer but longer SIMD registers through binary translation raises the issues of asymmetric SIMD register configurations. To date, these issues have been overlooked. As a result, only a small fraction of the potential performance gain is realized due to underutilization of the host's SIMD parallelism and register capacity.In this paper, we present a novel dynamic binary translation technique called spill-aware SLP (saSLP), which combines short ARMv8 NEON instructions and registers in the guest binary loops to fully utilize the x86 AVX host's parallelism as well as minimize register spilling. Our experiment results show that saSLP improves the performance by 1.6X (2.3X) across a number of benchmarks, and reduces spilling by 97% (99%) for ARMv8 NEON to x86 AVX2 (AVX-512) translation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/14ea23fe5d2ed2bc7d93e9f3e60e5d18ca209568", "sources": [ "DBLP" ], "title": "Exploiting Asymmetric SIMD Register Configurations in ARM-to-x86 Dynamic Binary Translation", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "151b71d7365328e929d5dbe75529a73d4de700c0": { "authors": [ { "ids": [ "2609688" ], "name": "Haopeng Liu" }, { "ids": [ "3238718" ], "name": "Guangpu Li" }, { "ids": [ "2235631" ], "name": "Jeffrey F. Lukman" }, { "ids": [ "35204651" ], "name": "Jiaxin Li" }, { "ids": [ "1682650" ], "name": "Shan Lu" }, { "ids": [ "1738725" ], "name": "Haryadi S. Gunawi" }, { "ids": [ "1740747" ], "name": "Chen Tian" } ], "doi": "10.1145/3037697.3037735", "doiUrl": "https://doi.org/10.1145/3037697.3037735", "entities": [ "Apache Cassandra", "Apache HBase", "Apache Hadoop", "Big data", "Cloud computing", "Computation", "Concurrency (computer science)", "Distributed computing", "Manifest (transportation)", "MapReduce", "Open-source software", "Software bug", "State space" ], "id": "151b71d7365328e929d5dbe75529a73d4de700c0", "inCitations": [ "334cb2afe4d4ab0ca9c731a12da1e3112cfea699", "5802c2ecb6e2449d9d6ddb3cac902f7cb10eaa10" ], "journalName": "", "journalPages": "677-691", "journalVolume": "", "outCitations": [ "10ba04904f12e44cd0569cb86aa6e97e47939e23", "4af63ed343df388b6353b6fc77c7137d27822bf4", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "80092f707ef876b38912a3880f593b817b0aa4b5", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "24e6f34e499634393416ea09c1aadd37ec9e8542", "1a780029267dfe0500e35b555b812e1d3adb953d", "91ec7ef1b6ffeba0a2b19f00501f2f7e52a76077", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "3be816a633ee79b9d734920faee820226c12a5b5", "0f327b5b4018fad294a744186177afea520be496", "bed6d0e530f20332c284a463c754ce1d304aca38", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "15cae3ea228f9f8f5ba4d48a45e6d75814671fa9", "67c2588c22bd4177f14f864a2091b6fbff34cfdd", "172e53475249525093594009251e7c4f60795b88", "8d0eec21e784142926120c2fdd80dd092e2dabf1", "36222f8eb2ccf21ca345e15186cea64506581543", "08cd9a0124faf900e5c155fa853efbc20600784b", "62f7869436b0719fa676717d6e945d48416a8bb7", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "15490c0643b1f35a49022526c879a2f273e4ab64", "3da2613398a04ddbf83f7a57d28e6bf445d18a72", "0608d9937c074520cdc93cc444cc1c77039c5332", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "63e56fa27e99e7eeead2a6106c6cbfcd38168712", "155ca30ef360d66af571eee47c7f60f300e154db", "1ee37e813203018a4f2124e7a87c9430bc5c3fb1", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "082e054aa9997ab58638eaca4531a328106d67d1", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "59250c7388caba98bd4adc2f1969fbec5500ed6a", "1e3182738045e85b289a90f2f6f53565f0d6f9ca", "18a5f443299784479e78d9e77f175af57cb2fa2b", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "00c181b8b64e824fbe0172339f1e4560b557fab5", "1003cd805c87467b9a3e8e1dc75f1ceafc390161", "406ee6ce01dbc906ad07a3c89a60c7d8b2252a9a", "2bb4ddb06f45bc7d7196e7047e182c89d737be63", "05a618847e4f08e5bca29dff732757779722b2e0", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "a60d00ba42a4bed7adb3dc40cd1c32cbaffda5df", "1371911e38975aebd3b0016b0c6137b43569afb9", "b7138e89931107fa6ff32143faea2d2f29bd40ff", "8352d79bde4ee4ee739d72508ba7079d9b1f83a2", "328a3437da451e77c02bdc9b660c720c4d5f67ed", "31f492d522bef8e9d2428e71319f5c5d4b5bf830" ], "paperAbstract": "In big data and cloud computing era, reliability of distributed systems is extremely important. Unfortunately, distributed concurrency bugs, referred to as DCbugs, widely exist. They hide in the large state space of distributed cloud systems and manifest non-deterministically depending on the timing of distributed computation and communication. Effective techniques to detect DCbugs are desired. This paper presents a pilot solution, DCatch, in the world of DCbug detection. DCatch predicts DCbugs by analyzing correct execution of distributed systems. To build DCatch, we design a set of happens-before rules that model a wide variety of communication and concurrency mechanisms in real-world distributed cloud systems. We then build runtime tracing and trace analysis tools to effectively identify concurrent conflicting memory accesses in these systems. Finally, we design tools to help prune false positives and trigger DCbugs. We have evaluated DCatch on four representative open-source distributed cloud systems, Cassandra, Hadoop MapReduce, HBase, and ZooKeeper. By monitoring correct execution of seven workloads on these systems, DCatch reports 32 DCbugs, with 20 of them being truly harmful.", "pdfUrls": [ "http://people.cs.uchicago.edu/~shanlu/paper/asplos17-preprint.pdf", "http://doi.acm.org/10.1145/3037697.3037735" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/151b71d7365328e929d5dbe75529a73d4de700c0", "sources": [ "DBLP" ], "title": "DCatch: Automatically Detecting Distributed Concurrency Bugs in Cloud Systems", "venue": "ASPLOS", "year": 2017 }, "151caa8e687fbdeeef71723ca4eabbc07d6fa272": { "authors": [ { "ids": [ "3432110" ], "name": "Koen Koning" }, { "ids": [ "1683647" ], "name": "Xi Chen" }, { "ids": [ "3053948" ], "name": "Herbert Bos" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" }, { "ids": [ "1729961" ], "name": "Elias Athanasopoulos" } ], "doi": "10.1145/3064176.3064217", "doiUrl": "https://doi.org/10.1145/3064176.3064217", "entities": [ "32-bit", "64-bit computing", "Address space", "Bounds checking", "Commodity computing", "Intel MPX", "Isolation (database systems)", "Memory protection", "X86", "X86-64" ], "id": "151caa8e687fbdeeef71723ca4eabbc07d6fa272", "inCitations": [ "9afe4b008fa2e867d49369ddbb2f073368d14f5a", "01dd689a40202b2b27cc7a7687fcbc2f55b63e85" ], "journalName": "", "journalPages": "437-452", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "4c35de159c4e01a5b5cb37e5e892468aa03da476", "201b0a185dda51629d7b6fdef3b380a0beaba455", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "23e8236644775fd5d8ff5536ba06b960e19f904b", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "70b4132f2be9a588f86687d319a159cdcf71ad95", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "3fa27974cade47e98993b98798f73594b902583b", "ab2177167b09f9be086d44188b845fc9b5458d66", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "79473986fe994d4aeb9d662e0b8e572758a4511b", "aa61c251722835979ee008b8237f68d6695ad70a", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "0ba9924ac38a425a9484dbc0a50cb71858ce416d", "216e02adde586cbc73cb0f242d580b9c5506ac86", "0db59f09437b7b90376f011f5150ed976ac66231", "acf32e644db8c3ac54834d294bba4cf46551480a", "30f52a79ff53f8969ffcba19013b4a43e629875f", "53396c842bc8a94575470fab3acb4aef91c5073d", "4cd63e0701177f04e377fa9f0857c5b0fa10b07e", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "3633b598e9a8318f716f829aef5c6258bae9eb6a", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "30e76f32c323adb0ff340760380fe5a08505b641", "0988a425689f6f3700e797f4a2c18f73692573c3", "f8eb4724ff1241a728786d30f6cf1bbb9f413e74", "565a174a24e7f47dcd7a21f57cabc252b5692a0f", "71da01051534d46fb3becd0a7506b64db56efc7a", "217742089058db1572042a0cebfcecdec8ce215e", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "592be7266ac5e1a423703242a5f976bdf05627af", "5aa4d6f28c803e5bd05d39794e12c759a60aa6a2", "0653e2ed9f683868cb4539eb8718551242834f6b", "6a8f65381a627a2db6c756a7185d9106f0acefec", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "74572d07252e2f0b60b16abb931c46e819e2b448", "8a6d19bea6f04e2bf2277c7ccd61becdf2bb48e7", "617588d9255cd9be96a0adbc1be69809941de09b", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "2947959aa2cfc45719fac7a54812614d1fa8707f", "d4914de7dbb5080d5c83004cab22df9100fb37d0", "ecb76005e478bf794369d163df3c5f7f9b04bc46", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "03f827395a17beb941241dbd72322705bdf79791", "3013fc25ace9eca344cb936124a42171d72b95ec", "0657eb7e069c2c2c7cae6636704e0f7fb3bcd9fc", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "10a59e595461de43e3183c99a380e6a35ae264bd", "0d844dec1c0e7b56c178fbb09945001ba00e0d05", "d3cf8d77c8c3f1d57ef0133df89f144f3dd63d26", "57f891b7213282bd58dc61230919fb531b0e4fde", "1fa355cabcaa6650603098c41a3a439fbed718a1", "0d939c3826455ca42310a92d5c00a956c4630b0e", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "698e9afaead2fedf5c2008bc0bd29184d438c71d", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9" ], "paperAbstract": "As modern 64-bit x86 processors no longer support the segmentation capabilities of their 32-bit predecessors, most research projects assume that strong in-process memory isolation is no longer an affordable option. Instead of strong, deterministic isolation, new defense systems therefore rely on the probabilistic pseudo-isolation provided by randomization to \"hide\" sensitive (or safe) regions. However, recent attacks have shown that such protection is insufficient; attackers can leak these safe regions in a variety of ways.\n In this paper, we revisit isolation for x86-64 and argue that hardware features enabling efficient deterministic isolation do exist. We first present a comprehensive study on commodity hardware features that can be repurposed to isolate safe regions in the same address space (e.g., Intel MPX and MPK). We then introduce MemSentry, a framework to harden modern defense systems with commodity hardware features instead of information hiding. Our results show that some hardware features are more effective than others in hardening such defenses in each scenario and that features originally conceived for other purposes (e.g., Intel MPX for bounds checking) are surprisingly efficient at isolating safe regions compared to their software equivalent (i.e., SFI).", "pdfUrls": [ "http://www.cs.vu.nl/~giuffrida/papers/memsentry_eurosys17.pdf", "http://doi.acm.org/10.1145/3064176.3064217", "https://www.cs.ucy.ac.cy/~eliasathan/papers/eurosys17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/151caa8e687fbdeeef71723ca4eabbc07d6fa272", "sources": [ "DBLP" ], "title": "No Need to Hide: Protecting Safe Regions on Commodity Hardware", "venue": "EuroSys", "year": 2017 }, "15388b06b42d9a61a1d083bc3bf140ef40f066fa": { "authors": [ { "ids": [ "1678662" ], "name": "Yang Li" }, { "ids": [ "33801185" ], "name": "Saugata Ghose" }, { "ids": [ "1689391" ], "name": "Jongmoo Choi" }, { "ids": [ "1894402" ], "name": "Jin Sun" }, { "ids": [ "1685600" ], "name": "Hui Wang" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1109/CLUSTER.2017.130", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.130", "entities": [ "Computer data storage", "Data-intensive computing", "Dynamic random-access memory", "Locality of reference", "Memory hierarchy", "Memory management", "Memory-level parallelism", "Non-volatile memory", "Parallel computing", "Scalability", "Volatile memory" ], "id": "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "inCitations": [ "1cf5e11f8230c9badb8e963c070ecca2c1bda709", "15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6", "0b393cab00401cb971cf71970e00c2767f881f75", "2561d914980ab90d0e92fa045cbdc24867fe132c" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "152-165", "journalVolume": "", "outCitations": [ "3c761857787b3efe5e65b25bd94c737bf2cd7632", "007394c2bae389cf43e46db4567dafe206355c25", "46eea309204f088ef9dda197d8273465a641f60c", "ab6888a1b024d109c768f81b49c77b585efc975a", "b308718d1ede5272ce1eeab1ea3cf5a5e57422df", "6d2465be30dbcbf9b76509eed81cd5f32c4f8618", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "4f70e1583f5d31d29ceed2998c52b2bf6c01e2ec", "0314667b05134952a7c0997a245b12dc7190f83b", "9efc374f6aef60429750859204aa15fadbee291f", "3c89345bb88a440096f7a057c28857cc4baf3695", "07a1373065dcb4305f6eb91418c5e8b06b7d9433", "468035263afa59095614f26a62e0217da4a1aeed", "0552af5b205a1dca3a22a062580651f41708ee05", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "74fcc3a4806da111405f057dc84de39f8fed15d7", "496a76da3380e45b187f32a7d55ad773a923ffad", "86b83278e895f53d95159fe8147e427c32ee9ef5", "03a4ead5f929d7a8b661a4626d3c868cff0ed225", "1d68e6f94aa8e10ebfdd785843c50427d1e820c4", "05c56f4abc527fbf384ad011dc9c0a613955641a", "0970e4835df451fe4793d22070ecf34f518e5cfe", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "8af127739fd9cac1a56b2466f643d25307fe9eb6", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "1b938edfde3b3b04c13599c2db87c72b7962f383", "1223b0c6c944eef6f907f3e59ed0ac23aecf6d2c", "870403ceaadbe9579b1841baa39c1ac2d03fef3e", "6f45e84202ee1678772899d3473a0b5d5ee4d886", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "0dc38d3afb68f617e23eced7ce2994a0a82feb11", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "65edeedb41696f66634627984573885e0bf6f55e", "eea1d3484c4f817bddcfd729e374ff7776eee89e", "15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6", "57f84612b664dde818ca651e137981baa1e237d9", "108c840d5d1847948a2de0250490a327ae069ee6", "012d556d67acedc6898930b4c93f54b87aabf5ee", "18633256bb17ba0744518479c0752ca87f0d03c6", "4acbc54d2494f67badd1b084f1696d1b43336534", "6c61473130ccb2009717a28962096d146fbde038", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "1dcaf21ff8e70d9a5dd85c8a8bd6ead7201fa08a", "968f21d66c841a7e564271bae73893dcce162531", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "0d2b7c2421475c7f054a2c1bba9a12b434de47c4", "24df43f92741fdc7d8a3ecb87d856b8486baf041", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "8301c813277cc59b47a84d25dc1e307eee8ce310", "1824677a301280f6e8278a9bd256174131476369", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "2996f72197c69044f90f203274a0135f8dc6e157", "1c15910d27ee940f71bd1d9a5c25c0230e3025fb", "40ccd404abbc52c306442fc7c396e50021d764e7", "50de0f6a952131dfe562c5b3836e5d934b39b939", "03b6a916498fa8591201a2de5f22344609b1e457", "27b09d430e0c1ecec6593596754f9a29661c21c0", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "8291ac440fb905ed9406ae8ff4d753635fa59a8d", "68073f621072d793e95b9562bf9a9245415d5a96", "426527a6ce80455948ac453b09dfa7d8001bd7c4", "38f7a05fc9a9aa10165550372e5c18c8643db9f8", "ab82581f2225072865c1bf49c0044b05e5afca30", "8c34cdd2bab66623d2831004fbd1fa1cdf8a0366", "45ce4be870f0a5be7b45b064726696dacd83c786", "1f80d8bdf5a0a1787a36ccfc4929f71d14a94e57", "705a129de84bcf24b4039150c2fc2be1c24cc24a", "fae8a785260ac5c34be82fca92a4abef4c30d655", "2092d64f8d99ab8cc5b353bbc3dddf4186bcb461", "2fee80acb6f7b4172622e0f40d350339ca4e3dc9", "5a113bf27c8f3602e56c78291a9e61800b9ec212", "0fca03c476d869660dec04fb83f54161767a4ba7", "2f919f99bf5b6d5667968c318b62d7335814ceff", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "11d50c97cca6eb9469aeab501c7ce8f9655e08b5", "9aa0d7253574e50fe3a190ccd924433f048997dd", "26e72340c47b7348e1b1de285f89dd96cc925b27", "0b885bb186445ee0c50277d990eca18c53fef09b", "8c3b449ed5e0e32e1e1934176265cec8dbc2bb4f", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "2dc59e60b34b3863e4eb381b17384105fe523cec", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "109f26c285d48ba8f7b5e259364fecef0b3273f6", "89b11dc5ec54d088be960e305aa442ff565fbfd9", "85398d5f19157c91bf00da3d36210e72d57887e4", "d98df13995c1098756e24a49e39872db09f3a537", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "64723f2309639eba4d321becc250e4373592144c", "0494a1ab6f0dd764fb9039772818b8f269ed70b4", "447f492235719d7c2b061b95d818f928d6cbdac5", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "2394c6644efa856f0da160a0f0031d74cd3b5000", "80b7dc0de1c9c4fd16b37e2cdcf3965745ce253e", "62a75fe31462ec1ad899aaa29b41bf654fce8799", "36de396ee9d1c9991e44c01be35e5206d79c3328", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "37b5850e3e75a3462f3991491ca26674925f233b", "1c32ad0a42109fab826eb3054df7cfc33b424125", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "0eacd1b47786f740b723d906d46e160f143c0378", "15e63d368aa803c73b8f5d1315a51ebd7ceea3c3", "e8f37a91c2c341c2ba3d082be7d602142784d0a1", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "6902867509928c0e5c19aff3e62e1def3a19d581", "12203385fbe8e26aefa1d82c9effaacb44f27a98", "42f174df3876256dd5606bb61b366116e9943beb", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "7738859976be5d1e4fdeec73a847ccb509138290", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "9341125876271d46cc25f86dac93f25acb343e8d", "a6aef44e203a16abe0b6354cf9e4856f211e8bde", "084037d504c95c1af6fb1398179f8495618b72d7", "96a0c614362aa4c4a9caf63359ac7cd8742c4539", "4b82766a16aa951020e43d6f70b5cf097a6b353c", "2a660e81e6501ec3489d962fe87448ecf277237f", "0653e2ed9f683868cb4539eb8718551242834f6b" ], "paperAbstract": "While the memory footprints of cloud and HPC applications continue to increase, fundamental issues with DRAM scaling are likely to prevent traditional main memory systems, composed of monolithic DRAM, from greatly growing in capacity. Hybrid memory systems can mitigate the scaling limitations of monolithic DRAM by pairing together multiple memory technologies (e.g., different types of DRAM, or DRAM and non-volatile memory) at the same level of the memory hierarchy. The goal of a hybrid main memory is to combine the different advantages of the multiple memory types in a cost-effective manner while avoiding the disadvantages of each technology. Memory pages are placed in and migrated between the different memories within a hybrid memory system, based on the properties of each page. It is important to make intelligent page management (i.e., placement and migration) decisions, as they can significantly affect system performance.In this paper, we propose utility-based hybrid memory management (UH-MEM), a new page management mechanism for various hybrid memories, that systematically estimates the utility (i.e., the system performance benefit) of migrating a page between different memory types, and uses this information to guide data placement. UH-MEM operates in two steps. First, it estimates how much a single application would benefit from migrating one of its pages to a different type of memory, by comprehensively considering access frequency, row buffer locality, and memory-level parallelism. Second, it translates the estimated benefit of a single application to an estimate of the overall system performance benefit from such a migration.We evaluate the effectiveness of UH-MEM with various types of hybrid memories, and show that it significantly improves system performance on each of these hybrid memories. For a memory system with DRAM and non-volatile memory, UH-MEM improves performance by 14% on average (and up to 26%) compared to the best of three evaluated state-of-the-art mechanisms across a large number of data-intensive workloads.", "pdfUrls": [ "http://www.andrew.cmu.edu/user/yangli1/UHMEM.pdf", "http://www.pdl.cmu.edu/PDL-FTP/NVM/17cluster_uhmem.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.130" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15388b06b42d9a61a1d083bc3bf140ef40f066fa", "sources": [ "DBLP" ], "title": "Utility-Based Hybrid Memory Management", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "154c5aa62805ae6556df409570b88f397a0cee6a": { "authors": [ { "ids": [ "34610942" ], "name": "Patrick Reisert" }, { "ids": [ "1835480" ], "name": "Alexandru Calotoiu" }, { "ids": [ "2060408" ], "name": "Sergei Shudler" }, { "ids": [ "1684034" ], "name": "Felix Wolf" } ], "doi": "10.1007/978-3-319-64203-1_8", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_8", "entities": [ "SEER-SEM" ], "id": "154c5aa62805ae6556df409570b88f397a0cee6a", "inCitations": [], "journalName": "", "journalPages": "106-118", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_8" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/154c5aa62805ae6556df409570b88f397a0cee6a", "sources": [ "DBLP" ], "title": "Following the Blind Seer - Creating Better Performance Models Using Less Information", "venue": "Euro-Par", "year": 2017 }, "1552f34f65ff71157275a23af374063a79e9a5d1": { "authors": [ { "ids": [ "1856427" ], "name": "Sunwoo Lee" }, { "ids": [ "3371309" ], "name": "Dipendra Jha" }, { "ids": [ "1725914" ], "name": "Ankit Agrawal" }, { "ids": [ "1686646" ], "name": "Alok N. Choudhary" }, { "ids": [ "1847672" ], "name": "Wei-keng Liao" } ], "doi": "10.1109/HiPC.2017.00030", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00030", "entities": [ "Backpropagation", "Channel (communications)", "Computation", "Convolutional neural network", "Data dependency", "Distributed memory", "ImageNet", "Inter-process communication", "Parallel computing", "Scalability" ], "id": "1552f34f65ff71157275a23af374063a79e9a5d1", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "183-192", "journalVolume": "", "outCitations": [ "1cfc1cce7ec6c199a2e43f7c312c398820778e6c", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "0122e063ca5f0f9fb9d144d44d41421503252010", "053912e76e50c9f923a1fc1c173f1365776060cc", "bd56b6ce19558b90b53216fbd3944fcdcd02b661", "126df9f24e29feee6e49e135da102fbbd9154a48", "64bad9c3e4d8ed38c16b0086da865ccd574e836a", "31b97e979ce9d8f908d8aadc769f83409cb2a085", "ec941934e8389addb8620f911de9c83825fac9be", "3f1c1427b175140e7f725a155096a4e73c1b8509", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "2cc157afda51873c30b195fff56e917b9c06b853", "58c45859350b7e9fc2dc6676e318e8f526073f5f", "0061b398627b9e31d6eb87ba7641edcd4e187981", "25555721e652ec45e461c77dbb1fd5f267014eaf", "326d9631dee9542c41d843b806fa7698be6f1994", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "043afbd936c95d0e33c4a391365893bd4102f1a7", "31dac06366b5d964aca7ce12c567369173db8e3a", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "0e32e9a888493971d9db62058952733cdfadd3ee", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "061356704ec86334dbbc073985375fe13cd39088", "464d94b3dc9a109dd64008a41a00181830f285aa" ], "paperAbstract": "Training Convolutional Neural Network (CNN) is a computationally intensive task whose parallelization has become critical in order to complete the training in an acceptable time. However, there are two obstacles to developing a scalable parallel CNN in a distributed-memory computing environment. One is the high degree of data dependency exhibited in the model parameters across every two adjacent minibatches and the other is the large amount of data to be transferred across the communication channel. In this paper, we present a parallelization strategy that maximizes the overlap of inter-process communication with the computation. The overlapping is achieved by using a thread per compute node to initiate communication after the gradients are available. The output data of backpropagation stage is generated at each model layer, and the communication for the data can run concurrently with the computation of other layers. To study the effectiveness of the overlapping and its impact on the scalability, we evaluated various model architectures and hyperparameter settings. When training VGG-A model using ImageNet data sets, we achieve speedups of 62.97\u00d7 and 77.97\u00d7 on 128 compute nodes using mini-batch sizes of 256 and 512, respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00030", "http://cucis.ece.northwestern.edu/publications/pdf/LJA17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1552f34f65ff71157275a23af374063a79e9a5d1", "sources": [ "DBLP" ], "title": "Parallel Deep Convolutional Neural Network Training by Exploiting the Overlapping of Computation and Communication", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "1553ae06a47263ad3689211aaeb97eaf0a7a9bfc": { "authors": [ { "ids": [ "2844539" ], "name": "Inci M. Baytas" }, { "ids": [ "2892704" ], "name": "Cao Xiao" }, { "ids": [ "1702286" ], "name": "Xi Zhang" }, { "ids": [ "1682816" ], "name": "Fei Wang" }, { "ids": [ "6680444" ], "name": "Anil K. Jain" }, { "ids": [ "5426025" ], "name": "Jiayu Zhou" } ], "doi": "10.1145/3097983.3097997", "doiUrl": "https://doi.org/10.1145/3097983.3097997", "entities": [ "Autoencoder", "Encoder", "Experiment", "Synthetic data" ], "id": "1553ae06a47263ad3689211aaeb97eaf0a7a9bfc", "inCitations": [ "f0e13128c14a405b084bd68275d461763889d052", "7562ca10e3e9fd466be3712830d63996adb74727" ], "journalName": "", "journalPages": "65-74", "journalVolume": "", "outCitations": [ "0f418b9985804a710d27bf97ac6356d6eb62d470", "025720574ef67672c44ba9e7065a83a5d6075c36", "2637a7a4aac59680ad60dc3b032634b3a119f998", "702b37564e2203717fcf9e463686004ae50b98ba", "895f2b13d00d6997387dce4bda2aade196a4cf10", "4f83431bb49eabc649040401b6a51573323eeaa8", "1a1e6b61834e5ab0ec60a0be89b7e5a4b7160081", "bf494f7c293aa217a97a3548169d1057813a967b", "eba36ac75bf22edf9a1bfd33244d459c75b98305", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "040678daf6a49a88345ee0c680fccfd134f24d4b", "a70e02b6e42b908cdbc53bc6cecb532cf72d4d4a", "086a4cc6bf83f21b2182dbbb9e67ab3b3931a119", "4b3643e5436a8b8430361e021a3c863765bab3fb", "695df73f2f4bca1e406622e8734e720332e4013b", "28407386a4a9d5b6de2c906d4bb6245962cba300", "0b3a0710031be11b2ef50437c7d9eb52c91d6a33", "a87bc818f7409ac97c8719aa8fae2c40d214ebbc", "0b544dfe355a5070b60986319a3f51fb45d1348e", "013cd20c0eaffb9cab80875a43086e0c3224fe20", "5537020cf8cc2f3e6123c6f85ecd3e90aa745ee1", "853be19d40fc9849b47bee7572661a8511f44872", "32c970882cae81f7e8ae1499acdb78ee389c7014", "53e28e1c1650133f1e78cb6d985ecf13530319e3", "eb3ea1d29f9b292a50fb55714f329ce677b2eed9", "8d9440495bb79a454a9a4cf121074979f94c7b31", "117488b4d440a12c0fddc7bc342dc2458c82bbd4", "a5e719dd5464af9b695a6ec4ce802f66b670f6f2", "01d4f6d69012919113e750391022f6182b1dbac2" ], "paperAbstract": "In the study of various diseases, heterogeneity among patients usually leads to different progression patterns and may require different types of therapeutic intervention. Therefore, it is important to study patient subtyping, which is grouping of patients into disease characterizing subtypes. Subtyping from complex patient data is challenging because of the information heterogeneity and temporal dynamics. Long-Short Term Memory (LSTM) has been successfully used in many domains for processing sequential data, and recently applied for analyzing longitudinal patient records. The LSTM units are designed to handle data with constant elapsed times between consecutive elements of a sequence. Given that time lapse between successive elements in patient records can vary from days to months, the design of traditional LSTM may lead to suboptimal performance. In this paper, we propose a novel LSTM unit called Time-Aware LSTM (T-LSTM) to handle irregular time intervals in longitudinal patient records. We learn a subspace decomposition of the cell memory which enables time decay to discount the memory content according to the elapsed time. We propose a patient subtyping model that leverages the proposed T-LSTM in an auto-encoder to learn a powerful single representation for sequential records of patients, which are then used to cluster patients into clinical subtypes. Experiments on synthetic and real world datasets show that the proposed T-LSTM architecture captures the underlying structures in the sequences with time irregularities.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097997", "http://biometrics.cse.msu.edu/Publications/MachineLearning/Baytasetal_PatientSubtypingViaTimeAwareLSTMNetworks.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1553ae06a47263ad3689211aaeb97eaf0a7a9bfc", "sources": [ "DBLP" ], "title": "Patient Subtyping via Time-Aware LSTM Networks", "venue": "KDD", "year": 2017 }, "1570141f259442faa0e6a9546be3ca631258d3b8": { "authors": [ { "ids": [ "3126255" ], "name": "David Formby" }, { "ids": [ "1783299" ], "name": "Anwar Elwalid" }, { "ids": [ "2474340" ], "name": "Raheem A. Beyah" } ], "doi": "10.1145/3084456", "doiUrl": "https://doi.org/10.1145/3084456", "entities": [ "Control system", "Critical infrastructure protection", "Network security", "Operating system", "Real-time operating system", "Relevance", "Simulation" ], "id": "1570141f259442faa0e6a9546be3ca631258d3b8", "inCitations": [], "journalName": "", "journalPages": "66", "journalVolume": "", "outCitations": [ "e67423f04c71baab8a00ada701d7964b6ed53c54", "9d2ce83141cea20c72d890a2f15d223ff3101359", "1d940e5b26f56ec93d91d6d61a29f88c5e61507e", "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "3e5a6e6a2779c4ab1f15ff36611ebaa8d54508e8", "6de80f14aa1aa717e3eafd73c269c50b2a7de390", "0214879c323661dd84a2fa1c52ce9c3514b40cfd", "8d7e97b2c084107b979521b82e56348e6a5ecdaa", "9941079c57622e4ca30f40d5a1c94f6c1f2f6020", "7c61cb31b75048378db11c39338406e21c5ec020", "aaa1a1aab201658f10637f570ad84216b8fe07bc", "9d026664e2a34dc9d69cdda6519094b3bec19ad2", "258d477e618527cb81f60d64a490c6d668673ccc", "d05657b77e0d1aa3856b9d1b0d89302dfa149682" ], "paperAbstract": "The modern world is becoming increasingly dependent on computing and communication technology to function, but unfortunately its application and impact on areas such as critical infrastructure and industrial control system (ICS) networks remains to be thoroughly studied. Significant research has been conducted to address the myriad security concerns in these areas, but they are virtually all based on artificial testbeds or simulations designed on assumptions about their behavior either from knowledge of traditional IT networking or from basic principles of ICS operation. In this work, we provide the most detailed characterization of an example ICS to date in order to determine if these common assumptions hold true. A live power distribution substation is observed over the course of two and a half years to measure its behavior and evolution over time. Then, a horizontal study is conducted that compared this behavior with three other substations from the same company. Although most predictions were found to be correct, some unexpected behavior was observed that highlights the fundamental differences between ICS and IT networks including round trip times dominated by processing speed as opposed to network delay, several well known TCP features being largely irrelevant, and surprisingly large jitter from devices running real-time operating systems. The impact of these observations is discussed in terms of generality to other embedded networks, network security applications, and the suitability of the TCP protocol for this environment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078525", "http://doi.acm.org/10.1145/3084456" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1570141f259442faa0e6a9546be3ca631258d3b8", "sources": [ "DBLP" ], "title": "A Case Study in Power Substation Network Dynamics", "venue": "SIGMETRICS", "year": 2017 }, "1574575034358cc08e96b4bd5f0145286490ea49": { "authors": [ { "ids": [ "38072728" ], "name": "John Kloosterman" }, { "ids": [ "38688871" ], "name": "Jonathan Beaumont" }, { "ids": [ "27059624" ], "name": "D. Anoushe Jamshidi" }, { "ids": [ "16556433" ], "name": "Jonathan Bailey" }, { "ids": [ "1751516" ], "name": "Trevor N. Mudge" }, { "ids": [ "1721289" ], "name": "Scott A. Mahlke" } ], "doi": "10.1145/3123939.3123974", "doiUrl": "https://doi.org/10.1145/3123939.3123974", "entities": [ "Best, worst and average case", "Compiler", "Computation", "Deployment environment", "Graphics", "Graphics processing unit", "Multithreading (computer architecture)", "Operand", "Register file", "Run time (program lifecycle phase)", "Thread (computing)" ], "id": "1574575034358cc08e96b4bd5f0145286490ea49", "inCitations": [ "3325110b9d6bc05f084579688bb34fb99b5aa122" ], "journalName": "", "journalPages": "151-164", "journalVolume": "", "outCitations": [ "3d50c803cc715e51d263f5a42b06858be9466c0f", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "5f4388e15af381d4c48b6376180a94db10dae0cd", "8ac1fa880e379cc37820751dacd3f58e918f7e61", "04ec5964a08a2ad62a30fea1fb9eff1e484a4524", "7132859e2843f7adb82ec89daf0eb2bdb1da590b", "284c7fde4bbaf19dd345e3b37d98085d7bfb9a4f", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "0036adadc90e4826b2f7fc157752eea459070c32", "0d394c72f9d769dfa021796a29fc142db573aec7", "ae62e0f802e44131f0fc7af965a55005bcdda4ac", "0d0c47a7e8b63e72b93787a2f8afb1c9905ac4b0", "1eeb50d5f7937f65a910203ae61430ff8b969012", "71b23cd5b4ab8fad012a8eca8e9f58fe4de153f7", "0063e4afef59c2342733b2895e231a4e8e88c516", "f26de3e8203d9ceb8b32e9246e6fb11209b58754", "ab6832a068fc86e646135724583de21f6ada0597", "025c084d6a73e05e5335cdbedd8e9a0fee832d94", "7bddb166337ba38d3c45ef84aa778bc6b38470ad", "914473e8239f5a4704c2d5f7fba520235a091638", "d4da44003341ee045b7f79b3517fb42d97278aed", "36e46139ac2d2f3242cfe49469ce09403b5df852", "10443d5d4f0e5048df514e581a9f364954158d00", "d65fa4cb118527ee236e25805fb7acd3b7f46d8d", "e45fef49532d56d0020e02f712a99fda0e3a0ab0", "009b98f3ec7c28f3c87acdcc60fb7a5f90e24f9c", "1ea52b210481b098ed59628067daa3f07143941f", "3e44b1d7811859e167b135ac58b6d90adc241a51", "32c8c7949a6efa2c114e482c830321428ee58d70", "2462a0d54e75de30fec9a9659df16316e45cdc7d", "1c0578cfc58b683017da938ef53275a7b88507cb", "d48200b88ce806c1fb0ebb96fd681caef3032bb3", "957106995cbed2214bc404e24e32ef9d249f5615", "1f1a1f0cd075cef63083c8ec15321021dbff2cfc", "0dcce77ec3dbbe511e9c53c284cb7f12cbc245e0", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "013685904132e312942c5364a0f4f677a4606dce", "4946ad542eacf5066d8268a43347e9f788d58f9c", "50ba1a6cf5000057a40234dd9fef3ca271f5ae2f", "205ac66123aa8ee38d487e02d6218e8f1ff4078c", "6a3f32f7d16677652746c98d50c94da907254cf0", "61d13a9a4a6cb66e2d5fcf4f75d97570dca8f3fe", "3b1eade1a6f4b3daeca29c7d38b67409456fe2ab", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "0345b41c8a708048a9f1d27cea06b867b52eead7", "64a14780ac4e588632a3c754566f6b20e1696204", "6635cd62124e589bc56667b31cc295db2fbd22a2", "bfa696236c766973328bdfe3f7fd3ffd7ac9a607", "ae6dc96dbe93fb8729e064f99021d983c4a7c4b0", "31eb576209fd4a548ab8bb83f74e5a7732a45e52", "96b4b72d1098674750c4a406c93efe43e036568b", "174ce50523cb31c0f23d28d0662c8bfa745fc6eb", "8bf6e8f95e8a22ecb8cca53190670568ac6b5082", "64335c55eb06c6cc5372f91b6feaf192550e36fd", "0ca2b92a4f992b35683c7fffcd49b4c883772a29", "4845474141b68b3b36e614b69c3682d064bc9a57", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "2d6f002477015469075954c6748a1a85af352c94", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "26512755e7f78e10390b409ed4de3378aba2bac8", "1d55a4505bebb74ab47ec2365a660fda39c40d14", "67bf737ceccf387cdd05c379487da8301f55e93d", "bda279b24583449e3a6d6dd35195478ee016c74a", "9a334c377686d8abe7711abcef58775ee02c0487", "d6bc55d06044c8dd82dfa96134a422b88d404aa0", "220ed6024de6be5ebe1a9ccd2b5da9c803acabef", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd" ], "paperAbstract": "The register file is one of the largest and most power-hungry structures in a Graphics Processing Unit (GPU), because massive multithreading requires all the register state for every active thread to be available. Previous approaches to making register accesses more efficient have optimized how registers are stored, but they must keep all values for active threads in a large, high-bandwidth structure. If operand storage is to be reduced further, there will not be enough capacity for every live value to be stored at the same time. Our insight is that computation graphs can be sliced into regions and operand storage can be allocated to these regions as they are encountered at run time, allowing a small operand staging unit to replace the register file. Most operand values have a short lifetime that is contained in one region, so their value does not need to persist in the staging unit past the end of that region. The small number of longer-lived operands can be stored in lower-bandwidth global memory, but the hardware must anticipate their use to fetch them early enough to avoid stalls. In RegLess, hardware uses compiler annotations to anticipate warps' operand usage at run time, allowing the register file to be replaced with an operand staging unit 25% of the size, saving 75% of register file energy and 11% of total GPU energy with no average performance loss.", "pdfUrls": [ "http://cccp.eecs.umich.edu/papers/jklooste-micro17.pdf", "http://doi.acm.org/10.1145/3123939.3123974" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1574575034358cc08e96b4bd5f0145286490ea49", "sources": [ "DBLP" ], "title": "Regless: just-in-time operand staging for GPUs", "venue": "MICRO", "year": 2017 }, "157810ccca1ab1acb815ca9c77afcd9040ecdd16": { "authors": [ { "ids": [ "40428350" ], "name": "Kexin Pei" }, { "ids": [ "3139121" ], "name": "Yinzhi Cao" }, { "ids": [ "1824216" ], "name": "Junfeng Yang" }, { "ids": [ "39400201" ], "name": "Suman Jana" } ], "doi": "10.1145/3132747.3132785", "doiUrl": "https://doi.org/10.1145/3132747.3132785", "entities": [ "Autonomous car", "Biological neuron model", "Corner case", "Correctness (computer science)", "Cross-reference", "Deep learning", "Gradient", "ImageNet", "Laptop", "Malware", "Neuron", "Optimization problem", "Program optimization", "Random oracle", "Ruby on Rails", "White-box testing" ], "id": "157810ccca1ab1acb815ca9c77afcd9040ecdd16", "inCitations": [ "c478c74ee7e52019bda6d3043efea8fb328745a4", "60f869d07a3ec9bf1395ba1288e822b9a7744f34", "5bf93a7e9565af14e6f54819c660a9261816051a", "9bec5e3292a6ca7cea5fb37a7f6719b1149b2bb0", "1c72f9147124df2b29e172d7a31f459c47be443b", "0939b060ba4832420a7be317806768fc40f13cc3", "5ce1cdd95b3977e66a5c22fb6cab577a8a65597d", "06b7787ad0f8bffcff3d45048a92b3e5dd4fe955", "581b0ca77a9560086fb90e883ccec1fb3a9dcdfc", "5563d7e6e5ee659bf26fa25e04d38c8d1a56e204", "3e080db4b655905fafb0407cf6e1201db5d5769d", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "9a3f11017ac3d745fb04699c50d4717a97e82deb", "417b553d3863111cd1be284297e0924a07a9690b" ], "journalName": "", "journalPages": "1-18", "journalVolume": "", "outCitations": [ "03086e02b706e6955735ed15603b1015334bc095", "38502c84f76aaebc436317fb1ec086c66b158d40", "208ed7512ea84f22a004920ea0b4c475bc836abc", "0178c79f4721887c084c670c3502b340caf2d339", "664ec878de4b7170712baae4a7821fc2602bba25", "34f25a8704614163c4095b3ee2fc969b60de4698", "0e3cc46583217ec81e87045a4f9ae3478a008227", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "1bbb6384076ff6aeb0d1a2c499a6026959671a9c", "0b2cbe47a9bdea2898bce630165ec04a304aed53", "fc881e8d0432ea8e4dd5fda4979243cac5e4b9e3", "05457985ed22d043e561043951753f959017ee90", "83bfdd6a2b28106b9fb66e52832c45f08b828541", "e5e1327ef05b629e5015631b562716ea2e024d1f", "5d73cb400be99ad966a87bee04a4d43954b4beb3", "01fcae344d2edb715bcc63a40b6052c0331741bd", "4954fa180728932959997a4768411ff9136aac81", "7f57e9939560562727344c1c987416285ef76cda", "a538b05ebb01a40323997629e171c91aa28b8e2f", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "8c4c723a74fe479c2b8af7d911817377dd6d85c9", "4958a6898f5b1926e40ea76c1e2a940802a777b3", "b90dd2f366988d9bb76399d4137c1768fe460c8f", "4eb6f9ffbd0e22d0943f52f18c32cb91b972a2a1", "38211dc39e41273c0007889202c69f841e02248a", "49e77b981a0813460e2da2760ff72c522ae49871", "45f6957cab31e802934cc761380c1a4a37c66208", "9b618fa0cd834f7c4122c8e53539085e06922f8c", "16cd50316e41cbb1d9dfeafeb524b31654cef37a", "0c940529269c8fde4495954168a8c4285fe452fa", "061356704ec86334dbbc073985375fe13cd39088", "0cf4b41ef5371bed73214bb67a0e12829f0df220", "5e4fa9397c18062b970910f8ee168d3297cf098f", "4a2983614b469a2f334bd467f2e8a874d58d19c7", "3ee096aff93ab9a2374cdde06973db1996331d86", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "62ae695dcfa7657e55e796b5fb735625bb2e9584", "376b078694f0c183e4832900debda4dfed021a9a", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "162d958ff885f1462aeda91cd72582323fd6a1f4", "0b549912e5f111c7c60eadda634ef4484427b684", "332e0791d27ee5cc569f0b593e91f2f011ba5679", "2f2ade8c4944a96a44e6f70ef403b80b058d1725", "5c62dcd5540d7cf8a600f4a49598b63428fd6908", "595a00f0975b5d5c28d904ddba1ae5a493316573", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "052b1d8ce63b07fec3de9dbb583772d860b7c769" ], "paperAbstract": "Deep learning (DL) systems are increasingly deployed in safety- and security-critical domains including self-driving cars and malware detection, where the correctness and predictability of a system's behavior for corner case inputs are of great importance. Existing DL testing depends heavily on manually labeled data and therefore often fails to expose erroneous behaviors for rare inputs.\n We design, implement, and evaluate DeepXplore, the first whitebox framework for systematically testing real-world DL systems. First, we introduce neuron coverage for systematically measuring the parts of a DL system exercised by test inputs. Next, we leverage multiple DL systems with similar functionality as cross-referencing oracles to avoid manual checking. Finally, we demonstrate how finding inputs for DL systems that both trigger many differential behaviors and achieve high neuron coverage can be represented as a joint optimization problem and solved efficiently using gradient-based search techniques.\n DeepXplore efficiently finds thousands of incorrect corner case behaviors (e.g., self-driving cars crashing into guard rails and malware masquerading as benign software) in state-of-the-art DL models with thousands of neurons trained on five popular datasets including ImageNet and Udacity self-driving challenge data. For all tested DL models, on average, DeepXplore generated one test input demonstrating incorrect behavior within one second while running only on a commodity laptop. We further show that the test inputs generated by DeepXplore can also be used to retrain the corresponding DL model to improve the model's accuracy by up to 3%.", "pdfUrls": [ "https://arxiv.org/pdf/1705.06640v4.pdf", "http://www.cs.columbia.edu/~junfeng/papers/deepxplore-sosp17.pdf", "https://arxiv.org/pdf/1705.06640v1.pdf", "http://doi.acm.org/10.1145/3132747.3132785", "http://www.srl.inf.ethz.ch/riai2017/deepxplore.pdf", "https://arxiv.org/pdf/1705.06640v2.pdf", "https://arxiv.org/pdf/1705.06640v3.pdf", "http://arxiv.org/abs/1705.06640", "http://www.cs.columbia.edu/~suman/docs/deepxplore.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/157810ccca1ab1acb815ca9c77afcd9040ecdd16", "sources": [ "DBLP" ], "title": "DeepXplore: Automated Whitebox Testing of Deep Learning Systems", "venue": "SOSP", "year": 2017 }, "157b439116e0dfb349f175d51c3793489355e08c": { "authors": [ { "ids": [ "2563777" ], "name": "Sanketh Nalli" }, { "ids": [ "3263582" ], "name": "Swapnil Haria" }, { "ids": [ "1736221" ], "name": "Mark D. Hill" }, { "ids": [ "9833675" ], "name": "Michael M. Swift" }, { "ids": [ "31411469" ], "name": "Haris Volos" }, { "ids": [ "2812707" ], "name": "Kimberly Keeton" } ], "doi": "10.1145/3037697.3037730", "doiUrl": "https://doi.org/10.1145/3037697.3037730", "entities": [ "Benchmark (computing)", "Byte", "Durability (database systems)", "Dynamic random-access memory", "Epoch (reference date)", "High- and low-level", "Industry Standard Architecture", "Non-volatile memory", "Persistence (computer science)", "Persistent memory", "Processor design", "Systems theory", "Volatile memory" ], "id": "157b439116e0dfb349f175d51c3793489355e08c", "inCitations": [ "aa0fb8802532106dcb78c62065258b8e4683ec94", "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "41ea95cc4dca373bf324555b897760054ec4a76e", "db57257e6b051e0f97d35209cc5aee0909cde1f1", "0671b93f7856891f3ee85acd54ccf0c825c30ef4", "1f50188cbbe2fbc0e649765e8e431badf1877a88", "a6b0d5a4b19d9a8e133e2fb30b40a6b9eae7283d", "004c2345477eda977f12b4485ac24a9e41557439", "e423c74455db069e6a5cc21f68954081ad22a36c", "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d" ], "journalName": "", "journalPages": "135-148", "journalVolume": "", "outCitations": [ "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "05a1357946de5eca42a477b7b268db4944219a2e", "7f112df5b4cf41ef54ac3352aca4aec31033c8aa", "2d45779437516ee55e5f9f4e7a7d8803fa795443", "a7592cb0c6f59211a2b48c3ed5c65a27a3f5cf12", "0948c0acfb779e551e5c2420081eab206f57f396", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "42c70d64890726f60556caf3eec3f06e85642dd9", "16653666b0005f91060a3e402566659749b84313", "265d18ced11e2e64d98afa97b0e86965e68101f7", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "57c823b3b07b98233394bf15cfbbaed6a84809df", "40cb40b7812e019c1051e3a457a8643400b81d51", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "0204f40221260d00c5ee63646560a40dcd7d97d1", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "412a9e54bbb31e12d008a9579994e009c5b40b46", "94783d113951822195d4ba44599a8fcbdef9d4bf", "823116269044ab4c713373c66c7da3fcb495b459", "b8735a449f0a1f1889c6b744061360aa85afaa6b", "277862a906af8489a1d98add2f6516a0e5df1bb1" ], "paperAbstract": "Emerging non-volatile memory (NVM) technologies promise durability with read and write latencies comparable to volatile memory (DRAM). We define Persistent Memory (PM) as NVM accessed with byte addressability at low latency via normal memory instructions. Persistent-memory applications ensure the consistency of persistent data by inserting ordering points between writes to PM allowing the construction of higher-level transaction mechanisms. An epoch is a set of writes to PM between ordering points.\n To put systems research in PM on a firmer footing, we developed and analyzed a PM benchmark suite called WHISPER (Wisconsin-HP Labs Suite for Persistence) that comprises ten PM applications we gathered to cover all current interfaces to PM. A quantitative analysis reveals several insights: (a) only 4% of writes in PM-aware applications are to PM and the rest are to volatile memory, (b) software transactions are often implemented with 5 to 50 ordering points (c) 75% of epochs update exactly one 64B cache line, (d) 80% of epochs from the same thread depend on previous epochs from the same thread, while few epochs depend on epochs from other threads.\n Based on our analysis, we propose the Hands-off Persistence System (HOPS) to track updates to PM in hardware. Current hardware design requires applications to force data to PM as each epoch ends. HOPS provides high-level ISA primitives for applications to express durability and ordering constraints separately and enforces them automatically, while achieving 24.3% better performance over current approaches to persistence.", "pdfUrls": [ "http://pages.cs.wisc.edu/~sankey/ASPLOS_main.pdf", "http://research.cs.wisc.edu/multifacet/papers/asplos17_whisper.pdf", "http://doi.acm.org/10.1145/3037697.3037730" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/157b439116e0dfb349f175d51c3793489355e08c", "sources": [ "DBLP" ], "title": "An Analysis of Persistent Memory Use with WHISPER", "venue": "ASPLOS", "year": 2017 }, "159d2d0eb9553b405fbbfb01deabc8f7f4ce51aa": { "authors": [ { "ids": [ "2276789" ], "name": "Dileep Kini" }, { "ids": [ "2374083" ], "name": "Umang Mathur" }, { "ids": [ "1729490" ], "name": "Mahesh Viswanathan" } ], "doi": "10.1145/3062341.3062374", "doiUrl": "https://doi.org/10.1145/3062341.3062374", "entities": [ "Algorithm", "Causality", "Happened-before", "Maximal set", "Programmer", "Time complexity" ], "id": "159d2d0eb9553b405fbbfb01deabc8f7f4ce51aa", "inCitations": [], "journalName": "", "journalPages": "157-170", "journalVolume": "", "outCitations": [ "0b84e66c171085b9665c2fbca4718bc9f888a4d0", "7c73b0c0e8a822401077f373d8d1ac5a8eb38507", "0d9c39200e541ce7c5a2f3cfa54302c2c9bc631a", "05a618847e4f08e5bca29dff732757779722b2e0", "01f8d20e8543a26df46a53a7cc726d3bd1a91a1b", "44a851e09e72741944ea01f855e5dac3ebbc4568", "fc990d3630ea9f6fd7481ae0afd137a7f2753f2c", "406ee6ce01dbc906ad07a3c89a60c7d8b2252a9a", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "02a5b676f25b7bf05d7c64278900e5eac21efd1a", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "5bc7a761cb77abe5aa964191d501385198b7f79d", "0958a63d9c6238b38377f076b487c413bc8642c1", "86adb0dd8f7a2dbd971aaa8ea79f30e81e434fda", "15cae3ea228f9f8f5ba4d48a45e6d75814671fa9", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "16a04050353b741974c7d0448e8b0149831bfdc0", "8c2b2fb1d4c44d1e1b63be4e5ef3bbb8d37dbfb5", "29aadfb658c589b3c4bc8214f9ab8ec619626b25", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "217fa6474533b7ca0981aaa8600543afb308ab66", "c4943f23c6fccf43253bbbcb7e16050b90b3e1be", "06802fa2a7c4658fad4ee2ed7ccc0561e8414c3c", "51cdebb39c53fadec02e3d4e4bcb48120dda067b", "8b28b02af1ba77fff5b08d6dea87ba8b043b479d", "108c4a1b24068e5ec1febc986d0a037755b07e94", "9f77ae41ffdb832dc0ca95cca5acb7b285ee949e", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "657a2241eda4e13e1847ab2e3b2f0b18896c9c40", "43a5d5111b7c60ddf3489fbfd2b0bf33daaf9450", "a45adba59080ad625e3005c669345c3a96ad3e18", "8c1c096bfa1a1905439acb88b01e432d9a142994", "6b5eeb5a017de5758e9773b52b0292cfc987ce3d", "0fc3b585f417c57c6491e2bbe6285f37823474e7", "00a9ba0063d34ec56792849a67ef57b4601becbb", "4edd5bfd9d9e846ccfb1d1830fb5fdfa3ab2efce", "8b44b7a9849307acc217772c7240ceb87883bee3", "0a44e8cd34a110ec4ed7221b0431694172eadda8", "5d882e0b90ac280c10b0734f47b8fafebb353f15", "059697e0824d06a43321a9f9d7450da9cc4dc0a8" ], "paperAbstract": "Writing reliable concurrent software remains a huge challenge for today's programmers. Programmers rarely reason about their code by explicitly considering different possible inter-leavings of its execution. We consider the problem of detecting data races from individual executions in a sound manner. The classical approach to solving this problem has been to use Lamport's happens-before (HB) relation. Until now HB remains the only approach that runs in linear time. Previous efforts in improving over HB such as causally-precedes (CP) and maximal causal models fall short due to the fact that they are not implementable efficiently and hence have to compromise on their race detecting ability by limiting their techniques to bounded sized fragments of the execution. We present a new relation weak-causally-precedes (WCP) that is provably better than CP in terms of being able to detect more races, while still remaining sound. Moreover, it admits a linear time algorithm which works on the entire execution without having to fragment it.", "pdfUrls": [ "http://arxiv.org/abs/1704.02432", "http://doi.acm.org/10.1145/3062341.3062374", "https://arxiv.org/pdf/1704.02432v3.pdf", "https://arxiv.org/pdf/1704.02432v2.pdf", "https://arxiv.org/pdf/1704.02432v1.pdf", "https://pdfs.semanticscholar.org/159d/2d0eb9553b405fbbfb01deabc8f7f4ce51aa.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/159d2d0eb9553b405fbbfb01deabc8f7f4ce51aa", "sources": [ "DBLP" ], "title": "Dynamic race prediction in linear time", "venue": "PLDI", "year": 2017 }, "15a7853875bf29a84b2d4e475029afaa032ccb76": { "authors": [ { "ids": [ "38691006" ], "name": "Bob Goodwin" }, { "ids": [ "22279087" ], "name": "Michael Hopcroft" }, { "ids": [ "5609719" ], "name": "Dan Luu" }, { "ids": [ "2010831" ], "name": "Alex Clemmer" }, { "ids": [ "22174631" ], "name": "Mihaela Curmei" }, { "ids": [ "1767761" ], "name": "Sameh Elnikety" }, { "ids": [ "1772774" ], "name": "Yuxiong He" } ], "doi": "10.1145/3077136.3080789", "doiUrl": "https://doi.org/10.1145/3077136.3080789", "entities": [ "Algorithm", "Antivirus software", "Cloud computing", "Digital signature", "Inverted index", "Production system (computer science)", "Type signature", "Usability", "Web search engine" ], "id": "15a7853875bf29a84b2d4e475029afaa032ccb76", "inCitations": [ "24436b296237167f7dab67535766d7e159c142c4", "00cc482570d739e7b733f45b6f8f1836b24056bd", "8a7b52dd3bf98996bdcafad45b5549ff0f199424", "1872c5f4bbd4e8c233363d70bc64c55196039bd2" ], "journalName": "", "journalPages": "605-614", "journalVolume": "", "outCitations": [ "00539f3da855c8f36660f5121c280e5860fc63bb", "2bb8062ce4d951c0768a38fc7501a5e457828c54", "3cf0822f63e51be5343028bad7ee72a5882ef7de", "dfc38ca4358c24b313f90063fad62bfb15f57a29", "8ce69d30bca576f7230782a15df55b231ecd6cc3", "a9b8622e103e4b2abc6238d35e8c098efc5f98cf", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "125670273768bf82405e0195e62a8d87aa6ba888", "2e48ca7c3786c175c1b7cab0dc95e660f38fc2f4", "1744faef11d8777ad69361e67a6ddb0874e7a9f3", "45876841fd4068c7a526b4df0118a3e9eb036daf", "3e248cb8a7e1f96271ecbd63d23484ec6e31099e", "66713fbcb8d5e48a9eb6425bd7fdbb53751e60b1", "4306478b5205fb34e7a1036db4b714f51e419e45", "42bab238258f8c862684945e30a37aca076decb4", "5e154e473f08af50c0cafff11b2a92fcd2e1aace", "e9db1dd1e09e6afc67d1c71dad5ad6210c04afb9", "111ecf2c567b817e1169ae5f763b9da90bf40b10", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "d7397451eeb2e3335a93a071db620b9a0d4fd776", "0a717685158723040c56b42d67fc8988fab9fc73", "2946f03fa14f29c54f2bd1b0c6b767bc887489b3", "51f53b98ccc60bf255ab653a11b1573ed3c5d815", "3519db1d03529bae4acded60cd7d7608df2d53cb", "9336c4feb0b86f7dc8fef1faa08d8a4bcf25eb81" ], "paperAbstract": "Since the mid-90s there has been a widely-held belief that signature files are inferior to inverted files for text indexing. In recent years the Bing search engine has developed and deployed an index based on bit-sliced signatures. This index, known as BitFunnel, replaced an existing production system based on an inverted index. The driving factor behind the shift away from the inverted index was operational cost savings. This paper describes algorithmic innovations and changes in the cloud computing landscape that led us to reconsider and eventually field a technology that was once considered unusable. The BitFunnel algorithm directly addresses four fundamental limitations in bit-sliced block signatures. At the same time, our mapping of the algorithm onto a cluster offers opportunities to avoid other costs associated with signatures. We show these innovations yield a significant efficiency gain versus classic bit-sliced signatures and then compare BitFunnel with Partitioned Elias-Fano Indexes, MG4J, and Lucene.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080789" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15a7853875bf29a84b2d4e475029afaa032ccb76", "sources": [ "DBLP" ], "title": "BitFunnel: Revisiting Signatures for Search", "venue": "SIGIR", "year": 2017 }, "15a80c22e9d71397ec36ab98c9c9fd7338149ad5": { "authors": [ { "ids": [ "3294666" ], "name": "Milind Chabbi" }, { "ids": [ "30563484" ], "name": "Abdelhalim Amer" }, { "ids": [ "2989804" ], "name": "Shasha Wen" }, { "ids": [ "1785951" ], "name": "Xu Liu" } ], "doi": "10.1145/3018743.3018768", "doiUrl": "https://doi.org/10.1145/3018743.3018768", "entities": [ "Fairness measure", "Lavasoft Ad-Aware", "Locality of reference", "Lock (computer science)", "Message Passing Interface", "Music Construction Set", "Non-uniform memory access", "Scalability", "Throughput", "Timeout (computing)", "Uniform memory access" ], "id": "15a80c22e9d71397ec36ab98c9c9fd7338149ad5", "inCitations": [ "132d1c096d74c53960a7511dafb886c73158a7ec", "597dbb0191f9ffa4f57d3773e35f4bfa91808a5f", "47b4884871be8c08c33e35438d69732b57f5129f" ], "journalName": "", "journalPages": "61-74", "journalVolume": "", "outCitations": [ "5cec4c7d82137333ea7f0166a26d04bba589c7da", "9bd0f0527d8d2f751c622ec14388017224f4810b", "f20bb59b95d2eb95013d386cde3f8969ffd7f0b7", "afc4931dd371130c3d4c6d6dbfda881140847af1", "08ef37c98fc96e273ea6759a25b98f636bde0b75", "3e77a77247734dc918a5723573e1158eee1955f9", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "0213e124eff26719ea7253d4f6a2532fe148a0b0", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "1963455d66a7fa9755216fd15ee47a2ad3d86827", "1008e1b5d8d30a7d2f3a3113521e840c58d7b4ac", "45b75209399f5b66ae462a7a6e0c2a521ff2da8b", "1040dae314611e2fd0a14aed1f1b017869175b1c", "9f00eca1a99f35e87b14749d58997d727c813407", "f5de3c70bd135695defa22992a0423a70dc90c59", "21e05bb43446475a2034a62dc8c67dfa368ea0d4", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "8b05280f21297c235917137a81f773b8819aa8fe" ], "paperAbstract": "The popularity of Non-Uniform Memory Access (NUMA) architectures has led to numerous locality-preserving hierarchical lock designs, such as HCLH, HMCS, and cohort locks. Locality-preserving locks trade fairness for higher throughput. Hence, some instances of acquisitions can incur long latencies, which may be intolerable for certain applications. Few locks admit a waiting thread to abandon its protocol on a timeout. State-of-the-art abortable locks are not fully locality aware, introduce high overheads, and unsuitable for frequent aborts. Enhancing locality-aware locks with lightweight timeout capability is critical for their adoption. In this paper, we design and evaluate the HMCS-T lock, a Hierarchical MCS (HMCS) lock variant that admits a timeout. HMCS-T maintains the locality benefits of HMCS while ensuring aborts to be lightweight. HMCS-T offers the progress guarantee missing in most abortable queuing locks. Our evaluations show that HMCS-T offers the timeout feature at a moderate overhead over its HMCS analog. HMCS-T, used in an MPI runtime lock, mitigated the poor scalability of an MPI+OpenMP BFS code and resulted in 4.3x superior scaling.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018768" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15a80c22e9d71397ec36ab98c9c9fd7338149ad5", "sources": [ "DBLP" ], "title": "An Efficient Abortable-locking Protocol for Multi-level NUMA Systems", "venue": "PPOPP", "year": 2017 }, "15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6": { "authors": [ { "ids": [ "3046907" ], "name": "Xiangyao Yu" }, { "ids": [ "1841252" ], "name": "Christopher J. Hughes" }, { "ids": [ "1759942" ], "name": "Nadathur Satish" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" }, { "ids": [ "1695217" ], "name": "Srinivas Devadas" } ], "doi": "10.1145/3123939.3124555", "doiUrl": "https://doi.org/10.1145/3123939.3124555", "entities": [ "Banshee", "Cache (computing)", "Cache coherence", "Design closure", "Dynamic random-access memory", "Lookup table", "Memory bandwidth", "Page table", "Spectral efficiency", "Translation lookaside buffer" ], "id": "15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6", "inCitations": [ "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "60aa9510638d4d9739ebfc3a0042187988482346", "0581754e392d4a648f6a7b7665e3561df8627157", "0b393cab00401cb971cf71970e00c2767f881f75" ], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "3b621e9a6b99f32caa518116cb400035d1deed29", "0cbc3b849eb23d23654c882c70cb65b19f99c011", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "2046f7c54470e7617269cc954aab877a4691c241", "22b4811bb8265e84d53c62a842cac10dda15f6af", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "2092d64f8d99ab8cc5b353bbc3dddf4186bcb461", "28552ecf4eaedb3461edca97304b29082b02fbab", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "1e63acf596fafe0e7099e99767e1bcac0b7600cb", "c07ebd47e86f0ece88b28c57d79ed7544f5a30f0", "468035263afa59095614f26a62e0217da4a1aeed", "3c89345bb88a440096f7a057c28857cc4baf3695", "f3325ace129dec914966f9894d9f412e5e04bdc2", "18633256bb17ba0744518479c0752ca87f0d03c6", "d47ab6dc259a57124cb9b86493147bbc04162dbd", "492cda7c95d4dc3dc99386cf78beb17bed9dbefa", "35c3882db9e1b2bdf838122787968679595f61de", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "2a660e81e6501ec3489d962fe87448ecf277237f", "3000b16ee204ffed4c602ed6f93fc7a692850b6e", "738b1253c656db5c82aad1838867ed7ab629677d", "1c32ad0a42109fab826eb3054df7cfc33b424125", "9c48179c07963a9fad69a359362c0aee87f9fe18", "0eacd1b47786f740b723d906d46e160f143c0378", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "0dc38d3afb68f617e23eced7ce2994a0a82feb11", "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "46eea309204f088ef9dda197d8273465a641f60c", "98ab001452b8392bb0d0b2677cfb91281bad7708", "3bf23f74bf33ed52f7c28587fab315610b27221a", "89b11dc5ec54d088be960e305aa442ff565fbfd9", "1dfa741bd668d0493d390292e5081439872c22da", "6f45e84202ee1678772899d3473a0b5d5ee4d886", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "59ca42e1911be417863d0f7068b89e1e59189cc9", "36de396ee9d1c9991e44c01be35e5206d79c3328", "3edacab130540193df4aba07cd07366ffd3600de" ], "paperAbstract": "Placing the DRAM in the same package as a processor enables several times higher memory bandwidth than conventional off-package DRAM. Yet, the latency of in-package DRAM is not appreciably lower than that of off-package DRAM. A promising use of in-package DRAM is as a large cache. Unfortunately, most previous DRAM cache designs optimize mainly for cache hit latency and do not consider bandwidth efficiency as a first-class design constraint. Hence, as we show in this paper, these designs are suboptimal for use with in-package DRAM.\n We propose a new DRAM cache design, Banshee, that optimizes for both in-package and off-package DRAM bandwidth efficiency without degrading access latency. Banshee is based on two key ideas. First, it eliminates the tag lookup overhead by tracking the contents of the DRAM cache using TLBs and page table entries, which is efficiently enabled by a new lightweight TLB coherence protocol we introduce. Second, it reduces unnecessary DRAM cache replacement traffic with a new bandwidth-aware frequency-based replacement policy. Our evaluations show that Banshee significantly improves performance (15% on average) and reduces DRAM traffic (35.8% on average) over the best-previous latency-optimized DRAM cache design.", "pdfUrls": [ "http://people.csail.mit.edu/yxy/pubs/banshee.pdf", "http://doi.acm.org/10.1145/3123939.3124555", "http://people.csail.mit.edu/devadas/pubs/banshee.pdf", "https://arxiv.org/pdf/1704.02677v1.pdf", "http://arxiv.org/abs/1704.02677", "https://people.inf.ethz.ch/omutlu/pub/banshee-bandwidth-efficient-DRAM-cache_micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6", "sources": [ "DBLP" ], "title": "Banshee: bandwidth-efficient DRAM caching via software/hardware cooperation", "venue": "MICRO", "year": 2017 }, "15ad36fe02346e6fd1224bc41cf1842002526ed2": { "authors": [ { "ids": [ "39717121" ], "name": "Ahmed E. Helal" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" }, { "ids": [ "39916306" ], "name": "Changhee Jung" }, { "ids": [ "1935184" ], "name": "Yasser Y. Hanafy" } ], "doi": "10.1109/IISWC.2017.8167754", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167754", "entities": [ "Adobe Streamline", "Compute kernel", "Loss function", "Open-source software", "Oracle Database", "Parallel computing", "Performance prediction", "Runtime system", "Speedup" ], "id": "15ad36fe02346e6fd1224bc41cf1842002526ed2", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "32-42", "journalVolume": "", "outCitations": [ "fb4d1597e6fb1c8c9e1e8888fe4bbc8530c977cd", "982b17f6441a51d53b7a7ce5093260604975fe00", "a4f3faf40a34c08db5381329012bdd0b9c8b374f", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "2fee9034f208596eefe51cc66acb98a99f6500dd", "13b9a0f935d11105e7cbebe75187568372a9c34b", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "3e1bfe26ae904c6949111aab499e8ebbb8344cf8", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "07f3965cb6906c6da87c41ecbf2afb4b9ef2e856", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "273d591af0bdcbefe37d7dd9150e2f612ca7121d", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "13fe6b22febed9e51dd5b63e8b360a625b9e9a88", "e1316c4e823a61a0c820e15f752d64eb62e1883e", "1400901fca7695d180a44d1f0f49f6830e0ceeeb", "87cd4d592fa384c9c4b95791b89591489f7240e8", "755e4ad5468747b31b9d6994885b17ad957dc9d7", "33f2baa7ca8b940e7606f030639c7953247685c1", "602dcccc2bf6af1ca84355d530ff1e0a79391217", "7926d0b9dfc36c13910a1850cd91a7db862f0014", "2abe6e7ba6393d84e987702f52b42681bf041e4f", "d8a67b13e2d4051dd7a451232314a5d778a1b047", "07f0f2be6fc670034eaf62e21863bc72891f4950", "70898287bad8556efaf7daaa206fb7ad175469e3", "23177452df15b652dd54a59324502b92c99687a7", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "e240f361ee577a1662b719a23c6117ecad3d307c", "048db2a71844b96963709b0f254d5a1a5afa6384", "42d91e6210fca1f9cee495f7181b6642f0237559", "79818ab3f303ec9ba2838b63de1dbb48dc6924d2", "021d7ca29cea1a55e5095e17ddb658e54e054793", "8a21e393f1b6ac5690e4fa9f0152a57e77dda62c" ], "paperAbstract": "Porting sequential applications to heterogeneous HPC systems requires extensive software and hardware expertise to estimate the potential speedup and to efficiently use the available compute resources in such systems. To streamline this daunting process, researchers have proposed several “black-box” performance prediction approaches that rely on the performance of a training set of parallel applications. However, due to the lack of a diverse set of applications along with their optimized parallel implementations for each architecture type, the predicted speedup by these approaches is not the speedup upper-bound, and even worse it can be misleading, if the reference parallel implementations are not equally-optimized for every target architecture. This paper presents AutoMatch, an automated framework for matching of compute kernels to heterogeneous HPC architectures. AutoMatch uses hybrid (static and dynamic) analysis to find the best dependency-preserving parallel schedule of a given sequential code. The resulting operations schedule serves as a basis to construct a cost function of the optimized parallel execution of the sequential code on heterogeneous HPC nodes. Since such a cost function informs the user and runtime system about the relative execution cost across the different hardware devices within HPC nodes, AutoMatch enables efficient runtime workload distribution that simultaneously utilizes all the available devices in performance-proportional way. For a set of open-source HPC applications with different characteristics, AutoMatch turns out to be very effective, identifying the speedup upper-bound of sequential applications and how close the parallel implementation is to the best parallel performance across five different HPC architectures. Furthermore, AutoMatch's workload distribution scheme achieves approximately 90% of the performance of a profiling-driven oracle.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167754", "http://synergy.cs.vt.edu/pubs/papers/helal-automatch-iiswc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15ad36fe02346e6fd1224bc41cf1842002526ed2", "sources": [ "DBLP" ], "title": "AutoMatch: An automated framework for relative performance estimation and workload distribution on heterogeneous HPC systems", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "15b3f5027614363581cd42cd2bd411aebcc983f0": { "authors": [ { "ids": [ "1719407" ], "name": "Bernd Finkbeiner" }, { "ids": [ "40028895" ], "name": "Christian M\u00fcller" }, { "ids": [ "1750380" ], "name": "Helmut Seidl" }, { "ids": [ "1759056" ], "name": "Eugen Zalinescu" } ], "doi": "10.1145/3133956.3134080", "doiUrl": "https://doi.org/10.1145/3133956.3134080", "entities": [ "Benchmark (computing)", "EasyChair", "Information flow (information theory)", "Linear temporal logic", "Multi-agent system", "Temporal logic", "Verification and validation", "Web application" ], "id": "15b3f5027614363581cd42cd2bd411aebcc983f0", "inCitations": [ "8249958af0287ede7cd9d16f2112d910c338c67c" ], "journalName": "", "journalPages": "633-645", "journalVolume": "", "outCitations": [ "d02ce0df0c3ee6b365f19bf4075f7cb6de073985", "bfd9f180da6d3ce68f97f28abe7b685e961ec34e", "507b5fe36714eb6aa8acd96d1eef14212eddb82b", "03f02ace66cd558fa4dd465bc988edbe6cee8d5e", "2d1023701b9a8fe1b2a4d26d1fecabdb5a53c9dc", "84bac5587ceb09bc8cba1bbb290f4cded000baad", "63a9331649d18280f0b08e0b1e464ca9ef0017d1", "2c12af7c56f54619213f4bce0597065c41adc7cb", "62f24cfe92a6b23e746bb57fe04312f91fe09c65", "8dbc653d8194c257dfa198b427523191b3865464", "acea96bb15573b4a7b70c529a70028b07f2f23b7", "1b2ca813312e8cd69fcacf9b9692fb4fca8aa1fe", "d93e945154e77fe1be5b6c30cf8f3f37a4f143f5", "42642a94bf0166cd243773d99098817fbde08067", "2b6df21137f30d25494bb58521a6062f93e915f8", "54a8486eece5347cfd8f1fa4bd445c961981310b", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "3cf9e90ddc3d274b87b28a9504adac18a3d004d7", "496b54ab0c9a06da08f21b08163edd1db4f8741a", "b9871c6902e0ef8b8b370574dc764b04ff247e64", "53a1f0afdec13f51133a2c7940ad13d7d70a0d39", "8491582f5fee20a3e9f2867d8e0f23a738a33d22", "b12793b0fbf78c15fb04923a2d89858805f4dc3a", "6bfd75d8f8e9ce9deaa159d845d644e77abecbc6", "a8280939a9d4eb0e5a9ff3b8c7545846613bbdc4", "4d4299bfd0ef670b2f913103b853f6394ed026a7", "1b552ae8c54ab08210f1f56bde77ca1b6b36a4ba", "7a96671dbae659462f3a4f3183bb798c89934fa7" ], "paperAbstract": "We consider the automatic verification of information flow security policies of web-based workflows, such as conference submission systems like EasyChair. Our workflow description language allows for loops, non-deterministic choice, and an unbounded number of participating agents. The information flow policies are specified in a temporal logic for hyperproperties. We show that the verification problem can be reduced to the satisfiability of a formula of first-order linear-time temporal logic, and provide decidability results for relevant classes of workflows and specifications. We report on experimental results obtained with an implementation of our approach on a series of benchmarks.", "pdfUrls": [ "http://arxiv.org/abs/1708.09013", "https://arxiv.org/pdf/1708.09013v1.pdf", "https://acmccs.github.io/papers/p633-finkbeinerA.pdf", "http://doi.acm.org/10.1145/3133956.3134080" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15b3f5027614363581cd42cd2bd411aebcc983f0", "sources": [ "DBLP" ], "title": "Verifying Security Policies in Multi-agent Workflows with Loops", "venue": "CCS", "year": 2017 }, "15dafb010476a45afde19ee780f1816930672cb9": { "authors": [ { "ids": [ "16637283" ], "name": "Dominic A. Orchard" }, { "ids": [ "2260424" ], "name": "Mistral Contrastin" }, { "ids": [ "2956402" ], "name": "Matthew Danish" }, { "ids": [ "1805120" ], "name": "Andrew C. Rice" } ], "doi": "10.1145/3133899", "doiUrl": "https://doi.org/10.1145/3133899", "entities": [ "Array programming", "Combinatory logic", "Computation", "Computational science", "Correctness (computer science)", "High- and low-level", "Numerical analysis", "Off-by-one error", "Programming idiom", "Software bug", "Source lines of code", "Specification language", "Static program analysis", "Text corpus", "Verification and validation" ], "id": "15dafb010476a45afde19ee780f1816930672cb9", "inCitations": [ "1e7ac464e2a45c22adc44fe529043d4346697e0a" ], "journalName": "PACMPL", "journalPages": "75:1-75:30", "journalVolume": "1", "outCitations": [ "cbffe7fe142d983413a2b54bf800951b2e0a8c3c", "b50cb0301d3ab2369fd90673d24a97d1cbec8404", "239be79bf7c4521f3ceeca7bb01fda983905d1f6", "580a3b20fdd5b2d971d5c47704f68ecb9a86cab3", "1eea0a35869c16130cbd7341c7c0f3db1ebe22c3", "85c8658ec44a66dce7ae2c83a43fcdcd9b513329" ], "paperAbstract": "Arrays computations are at the core of numerical modelling and computational science applications. However, low-level manipulation of array indices is a source of program error. Many practitioners are aware of the need to ensure program correctness, yet very few of the techniques from the programming research community are applied by scientists. We aim to change that by providing targetted lightweight verification techniques for scientific code. We focus on the all too common mistake of array offset errors as a generalisation of off-by-one errors. Firstly, we report on a code analysis study on eleven real-world computational science code base, identifying common idioms of array usage and their spatial properties. This provides much needed data on array programming idioms common in scientific code. From this data, we designed a lightweight declarative specification language capturing the majority of array access patterns via a small set of combinators. We detail a semantic model, and the design and implementation of a verification tool for our specification language, which both checks and infers specifications. We evaluate our tool on our corpus of scientific code. Using the inference mode, we found roughly 87,000 targets for specification across roughly 1.1 million lines of code, showing that the vast majority of array computations read from arrays in a pattern with a simple, regular, static shape. We also studied the commit logs of one of our corpus packages, finding past bug fixes for which our specification system distinguishes the change and thus could have been applied to detect such bugs.", "pdfUrls": [ "https://www.repository.cam.ac.uk/bitstream/handle/1810/267779/oopsla-proof.pdf?sequence=5", "http://doi.acm.org/10.1145/3133899" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15dafb010476a45afde19ee780f1816930672cb9", "sources": [ "DBLP" ], "title": "Verifying spatial properties of array computations", "venue": "PACMPL", "year": 2017 }, "15e3d493cffef853e478d188245a2b6eff4bd6c6": { "authors": [ { "ids": [ "1689575" ], "name": "Shumo Chu" }, { "ids": [ "2560192" ], "name": "Konstantin Weitz" }, { "ids": [ "39866566" ], "name": "Alvin Cheung" }, { "ids": [ "9246931" ], "name": "Dan Suciu" } ], "doi": "10.1145/3062341.3062348", "doiUrl": "https://doi.org/10.1145/3062341.3062348", "entities": [ "Conjunctive query", "Coq (software)", "Decision problem", "Denotational semantics", "Homotopy type theory", "Query language", "Query optimization", "Query string", "Relational database", "Rewrite (programming)", "Rewriting", "SQL", "Scientific literature", "Source lines of code", "Type theory", "Undecidable problem" ], "id": "15e3d493cffef853e478d188245a2b6eff4bd6c6", "inCitations": [ "2b740ddd333370f404a755759d38870f6fd9f7eb", "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f", "1dfc6a048fa9d7d8f6c4e9928c3dfe7332963850", "8c7cf9c759dcca3195dea6e27c2e25ee9a05671c", "a82f6eeaa9051cab2dcc17a264a5d759b08ec461", "71cebf34099632371f1088d43a824a2c97a28fad", "debd9e33f34b367008357c91c2c70cb85cfc532c" ], "journalName": "", "journalPages": "510-524", "journalVolume": "", "outCitations": [ "51a179eec117ec2105023c3e906433a7301140fd", "07bb2064663f36ec202bac943c62a0861a4f1bde", "1039106e89312c92efdd8a40968b810d1f5fe285", "16a455aeacd14529bee92b0c197619fa2d173151", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "bfd9f180da6d3ce68f97f28abe7b685e961ec34e", "019d8ba2274b5555bb71baebf76af35de23ef988", "6a048dc38250ffce49c5e6a5040b4c91ca05e83d", "17ee04f6e12e12509a39d203dbb43aa8e83bc526", "036e20936fc1e452509c0b64196a0e937ab733be", "3e60ef911adea3dbc771a07d8c388d47a2cf0d8d", "2ae24b7c4d91e1aa7d887bc9c3b2614145e1dc22", "0f30a4f371f8047362be15f8f1dc4061ac50bdb4", "17886b4911ffd50d7e02a574caad34a286458b3a", "4e60a72a0b58f62b405ab5eb43b184f5fff77710", "7c798b835099d95d8975e85d7fc38cc71a9ebb95", "2654f4716462718a37d6447c29cf47f45e45a3be", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "0c8e7d740eb65402f7dc87da399730a15df75b33", "15aaa56f06eca80760943e47f1781591209f2860", "a99a6ba9f985db9519aed18566e8c15481898e57", "128985b85556c30ad405863f2a34340049957616", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "4015eafeea8ec3a900984230ebb39688d417de2d", "6d96e42a887648034e4d6388c580bbdfb93557e2", "aea4d3b166b6fca9e97f799fdd752c8c458515df", "4a92de6121b7d49b7c4393ca1208bac5dd19aea1", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "8434d69ace5d5484cbce4e590bc62d17b467d4dd", "01e6cac15b2c3690436543008e6d1eaa23da266b", "848e654090c52ec7c629632510754d0c8b853e31", "0a9bcde0cc1e1051f4bb2311f1483237598bf0cb", "1f982643a36d56f986c2e75e57f9f3354aa94d27", "0bc3f74da97f976e1f94deff106860d39c477be3", "1fe41b1240a0eddec736b675e914b4858a955876", "fac319a34a9a1b93cb772d4cdb42cdb8741f2edc", "82f729361d8ea2e7bb98e15f6813fd3748961211", "317a14e620281bf74066fa22be313334459b2540", "0f642a292c967ad0a251ce1b87d063775f1974fa", "c83e247bad2f5bb48659153027cbe1a92935fca0", "001ab484e44bf365eb2c9532e21a42f9cbcbe5f2", "bff784498bffa84e0600361c2e864b852d85029d", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "11cbbb9f48fcaeb4950defe38ed861171ced309b", "45328c93843397173c284771c38ae107b3e91302", "a6b6c52f5b5b0068ce1387c8445c43e2c3441a6e", "360ecdfc79850873162ee4185bed8f334da30031", "088d9ce683308302ebe67f99c0dd2b0204c6084f" ], "paperAbstract": "Every database system contains a query optimizer that performs query rewrites. Unfortunately, developing query optimizers remains a highly challenging task. Part of the challenges comes from the intricacies and rich features of query languages, which makes reasoning about rewrite rules difficult. In this paper, we propose a machine-checkable denotational semantics for SQL, the de facto language for relational database, for rigorously validating rewrite rules. Unlike previously proposed semantics that are either non-mechanized or only cover a small amount of SQL language features, our semantics covers all major features of SQL, including bags, correlated subqueries, aggregation, and indexes. Our mechanized semantics, called HoTT SQL, is based on K-Relations and homotopy type theory, where we denote relations as mathematical functions from tuples to univalent types. We have implemented HoTTSQL in Coq, which takes only fewer than 300 lines of code and have proved a wide range of SQL rewrite rules, including those from database research literature (e.g., magic set rewrites) and real-world query optimizers (e.g., subquery elimination). Several of these rewrite rules have never been previously proven correct. In addition, while query equivalence is generally undecidable, we have implemented an automated decision procedure using HoTTSQL for conjunctive queries: a well studied decidable fragment of SQL that encompasses many real-world queries.", "pdfUrls": [ "http://arxiv.org/pdf/1607.04822v1.pdf", "http://arxiv.org/pdf/1607.04822v2.pdf", "https://arxiv.org/pdf/1607.04822v2.pdf", "https://homes.cs.washington.edu/~chushumo/files/cosette_pldi17.pdf", "http://arxiv.org/abs/1607.04822", "https://homes.cs.washington.edu/~chushumo/files/cosette_pldi_full.pdf", "http://doi.acm.org/10.1145/3062341.3062348", "https://arxiv.org/pdf/1607.04822v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15e3d493cffef853e478d188245a2b6eff4bd6c6", "sources": [ "DBLP" ], "title": "HoTTSQL: proving query rewrites with univalent SQL semantics", "venue": "PLDI", "year": 2017 }, "15e660fdc5eff2accfaa22bf1fd77a92328a8772": { "authors": [ { "ids": [ "3112463" ], "name": "Yang You" }, { "ids": [ "1700326" ], "name": "James Demmel" } ], "doi": "10.1109/ICPP.2017.54", "doiUrl": "https://doi.org/10.1109/ICPP.2017.54", "entities": [ "Algorithm", "Artificial neural network", "Benchmark (computing)", "Central processing unit", "Computation", "Computer data storage", "Data mining", "Deep learning", "Distributed computing", "Iteration", "Machine learning", "Megabyte", "Memory bandwidth", "Parallel computing", "Self-tuning", "Speedup" ], "id": "15e660fdc5eff2accfaa22bf1fd77a92328a8772", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "452-461", "journalVolume": "", "outCitations": [ "0f16f6f478b5c788dce466eb50e36c612273c36e", "84bd007b64d8f2d99d9bbde3a232f55da00283bc", "838c9137e6fd807c871c80976b4f75c8c8bfcffc", "490f3679089cec9b848b49c11e841b41fda9df27", "bfcd164984c53926133bcf1351415af8ddc44a45", "722d51a68f0c6d1f92fd2cd9a9ffcc7eb8f07a5a", "5d90f06bb70a0a3dced62413346235c02b1aa086", "c90922c865809713ff46edd7d2b9b0a786710539", "3b79c240fc93ef9f46ed80f0c51b488bbc78951d", "b14dea76cafede81c6ff5478d4221fce3aec9284", "a51baa5d6db6b76a12b4410a271e613610398b13", "d1fa8485ad749d51e7470d801bc1931706597601", "160e1a787a3364a10ea89a9a8c04238cd468d1a4", "3abfa83501c8649a57a5ef7b0e2337173e5538ef", "2635f61333900a6b4cd9b5db5d4c3bc31363b2ff", "5099224aea8b3d3e4c4bf2490c6255ffb9ff2c59", "773c63566918332d79ff711dc6c4c9dcda2895fd", "5aaf311172b9778d78f6904fbe40124c63463b57", "7a4092f170a3ed058a64f3156248d9c4e32c4d48", "0300febd0bea385dda1fbc94b8962658f71a7589", "1722a68fa20b4ee54eb1bdbe913a654a93a2516f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "fedce627f3fc84543b19a65efc71a67db573dd6d", "4e088d1c5bc436f1f84997906223e5f24e1df28c", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "8ecc044d920df247fbd455b752fd7cc0f7363ad7" ], "paperAbstract": "Machine Learning (ML) approaches are widelyused classification/regression methods for data mining applications. However, the time-consuming training process greatly limits the efficiency of ML approaches. We use the example of SVM (traditional ML algorithm) and DNN (state-of-the-art ML algorithm) to illustrate the idea in this paper. For SVM, a major performance bottleneck of current tools is that they use a unified data storage format because the data formats can have a significant influence on the complexity of storage and computation, memory bandwidth, and the efficiency of parallel processing. To address the problem above, we study the factors influencing the algorithm’s performance and conduct auto-tuning to speed up SVM training. DNN training is even slower than SVM. For example, using a 8-core CPUs to train AlexNet model by CIFAR-10 dataset costs 8.2 hours. CIFAR-10 is only 170 MB, which is not efficient for distributed processing. Moreover, due to the algorithm limitation, only a small batch of data can be processed at each iteration. We focus on finding the right algorithmic parameters and using auto-tuning techniques to make the algorithm run faster. For SVM training, our implementation achieves 1:7..16:3 speedup (6:8 on average) against the non-adaptive case (using the worst data format) for various datasets. For DNN training on CIFAR-10 dataset, we reduce the time from 8.2 hours to only roughly 1 minute. We use the benchmark of dollars per speedup to help the users to select the right deep learning hardware.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.54", "https://people.eecs.berkeley.edu/~youyang/publications/icpp2017.pdf", "https://people.eecs.berkeley.edu/~youyang/publications/icpp2017slide.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/15e660fdc5eff2accfaa22bf1fd77a92328a8772", "sources": [ "DBLP" ], "title": "Runtime Data Layout Scheduling for Machine Learning Dataset", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "15f211eaafc6ce421a511a413613e1d2683879d2": { "authors": [ { "ids": [ "1787012" ], "name": "Viktor Leis" }, { "ids": [ "39865427" ], "name": "Bernharde Radke" }, { "ids": [ "1772311" ], "name": "Andrey Gubichev" }, { "ids": [ "1740464" ], "name": "Alfons Kemper" }, { "ids": [ "1706846" ], "name": "Thomas Neumann" } ], "doi": "", "doiUrl": "", "entities": [ "Database", "Plan", "Relational database management system", "Sampling (signal processing)" ], "id": "15f211eaafc6ce421a511a413613e1d2683879d2", "inCitations": [ "1f0d8aba1e68be021f1a83b8dcd1cca21b46683c", "39b62c7fc926127d11f6d60d78066ef9d9564a55", "35ecfe4f0b22a1918656861aa5e6c6b676fe58a2", "6f10e7cc100865feec746294e90e8d9ca2322059", "104ec5771ab9cd92d705432ca05c8a1735bc3a69" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2df1e7f01621a21061c5ef44e9cca7f0cfdbb332", "e5c452fce1c2eb8690ae77d9f730a2699d736cf7", "070f03f5e8b016628903c56a0011e810d0914d73", "968d8ebed42c840e28f3bc27d929da61b525fe7c", "7a0ed0f1589dc811fea206482dc5d73538b89885", "cf08482fc32e5b2e5f5bb1262d5c324a010c276b", "3533be32b3fc8747e5e83dd5c94f1e482f822d46", "833495624ddf0fa3d47b2433fc6f80c732441f29", "4fea90ad226b109495a79468b166d409c8c60d3b", "31181e73befea410e25de462eccd0e74ba8fea0b", "415ce8daaf2a0f6fb9cd5ed7884ae721cbcd38a5", "14023b8086f53f29376fe6351cf5c68aac7b24f1", "888764f05a60d770cfc0b49944308fd92ed45ee5", "616e890edb3f71909bc30a6fcb5db7df01feae90", "463bec3d0298e96e3702e071e241e3898f76eff2", "5f9d84444231fb6f87f48be9928723a32e23e5ce", "0ec0ce9b04a869f7bf1a3fa5b6089da61f86f8eb", "167a96b9e8f6a9bb60255f9456f83ed8cd9e8f77", "ca0b88a7f00fb55448436abe01a725df7517f060", "5fae053c6426434a02b637707d804e100597025e", "0669f3076269e3cd3a13061a90c865a5b8d56c5c", "eabf7c8c6f215cfc676f2f43540dee629611ea21", "260d7674e292361dbd298acec3b21d2b2deb47ec", "2ccef008f1da82f0886df8c63385bb249778ace9", "24cad9705ccc13fce969021a708ca703db3ae31c", "6abf5107efc723c655956f027b4a67565b048799", "866e522d855e0b496696c34e4d65ce427cf41729", "66a6e8434ef51986cdf7669af526f9914c35d3a9", "226cd58acbd72254581cd76cc7412ea8a775e8cb", "55d1be99a198634ffdf5b8e7db2ca62abb8ad96a" ], "paperAbstract": "After four decades of research, today\u2019s database systems still suffer from poor query execution plans. Bad plans are usually caused by poor cardinality estimates, which have been called the \u201cAchilles Heel\u201d of modern query optimizers. In this work we propose indexbased join sampling, a novel cardinality estimation technique for main-memory databases that relies on sampling and existing index structures to obtain accurate estimates. Results on a real-world data set show that this approach significantly improves estimation as well as overall plan quality. The additional sampling effort is quite low and can be configured to match the desired application profile. The technique can be easily integrated into most systems.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p9-leis-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/15f2/11eaafc6ce421a511a413613e1d2683879d2.pdf", "s2Url": "https://semanticscholar.org/paper/15f211eaafc6ce421a511a413613e1d2683879d2", "sources": [ "DBLP" ], "title": "Cardinality Estimation Done Right: Index-Based Join Sampling", "venue": "CIDR", "year": 2017 }, "1600f441dc8c795f07cf1ee35c1e9deb788e482e": { "authors": [ { "ids": [ "1720735" ], "name": "Chao Zhang" }, { "ids": [ "3126738" ], "name": "Keyang Zhang" }, { "ids": [ "34284012" ], "name": "Quan Yuan" }, { "ids": [ "3180064" ], "name": "Fangbo Tao" }, { "ids": [ "1763785" ], "name": "Luming Zhang" }, { "ids": [ "3168604" ], "name": "Tim Hanratty" }, { "ids": [ "1722175" ], "name": "Jiawei Han" } ], "doi": "10.1145/3077136.3080814", "doiUrl": "https://doi.org/10.1145/3077136.3080814", "entities": [ "Experiment", "Generalized TTL security mechanism", "Multimodal interaction", "On the fly", "React", "Semi-supervised learning", "Social media" ], "id": "1600f441dc8c795f07cf1ee35c1e9deb788e482e", "inCitations": [ "fe717901d195fcd496363bce6df4086f094cfc8f", "afd74c7dfd99fafc41c83670bc4529b51912a675", "db5f682f80fbd8f65ff84374e21686d502231af0", "736e8deabcae7e2f9eb6c41a1bfae1b5270a8dbd" ], "journalName": "", "journalPages": "245-254", "journalVolume": "", "outCitations": [ "3cea3baf9469411d11399496a9590e18b954cbe4", "0ae76a541ff54e7e1007e14284e8e3f9c9a99935", "2910b8702ce197a683b17764e893155b1373f919", "087ab67119b7caf129e93d8daa170a7c12a2a8f6", "27208c88f07a1ffe97760c12be08fad3ab68fee2", "a47d80a21e6ce24bb5b3f6e8b0c72680875904b1", "75f4cde0545a45d5ee72c339c6b0d96b0a64da68", "21a0f88ba4c4481bb31f683376bbdc6c87986b02", "04034c4e773160209114c6f95d3f1e9f4aa7ee92", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "a32866e1a3fb2d01e5e09de16a2b8bd96c4efd4a", "b1153f6a0ddc806526467a6dc7e9cc7b1a5b018e", "7894683e9f0108245d43c3de91a3426e52e0d27f", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "0d7443bb67ea2d2ec9ba511a544e74425dde7fcd", "378ccfe2f96626570b501624459ca20f4de4d5aa", "84b6f189513f0be9db02353a677e99506491f18a", "1b255cda9c92a5fbaba319aeb4b4ec532693c2a4", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "0de8e6311773e3ad14f934460c001016a19daf76", "b5c13bda74db14b90d5a40da5d4155c7e1e473eb", "c5b28cae82b14417f1250e58bb241367248e827d", "bf828c16d0690c8310cc34f1950f697b0c8c6945", "404fceb167371ead0246b09bf721e7270f947973", "87d907a114409755ecd3c6886585de26a4e17ffe", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "8451c2812a1476d3e13f2a509139322cc0adb1a2", "823262c42414bfaba9a0cea736e1c77c7cea7837", "88dcb174402f481137a45d6e6fcc6dfdd0a511b3", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "39a93f7ea3e4a2fa0d46f045472f3acded81f094", "06501b7ea604a8b8ffff402ee492955e6892daad", "06584de7f82107ee61504fc7e5156c258c33f18f", "33efd3ecffca21efaf9d1469b7dc3d2a72a0a05e", "38bbe72359887fd60d3330c0df2f595dbbae52d9", "49b2a1b9606c0ccb95a36895760fc91b8b830266", "89af9e25c507eb316618a7c6020ae572cd3d5e8a", "597ffe1508467faece14c8201882bb4d4f0507dc", "3f1f762b2b6750f2feb1ca2e021b2a3e798774a1", "3d32af40ace7d12679a6ef29718984643013aa58", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c" ], "paperAbstract": "Spatiotemporalactivity modeling is an important task for applications like tour recommendation and place search. The recently developed geographical topic models have demonstrated compelling results in using geo-tagged social media (GTSM) for spatiotemporal activity modeling. Nevertheless, they all operate in batch and cannot dynamically accommodate the latest information in the GTSM stream to reveal up-to-date spatiotemporal activities. We propose ReAct, a method that processes continuous GTSM streams and obtains recency-aware spatiotemporal activity models on the fly. Distinguished from existing topic-based methods, ReAct embeds all the regions, hours, and keywords into the same latent space to capture their correlations. To generate high-quality embeddings, it adopts a novel semi-supervised multimodal embedding paradigm that leverages the activity category information to guide the embedding process. Furthermore, as new records arrive continuously, it employs strategies to effectively incorporate the new information while preserving the knowledge encoded in previous embeddings. Our experiments on the geo-tagged tweet streams in two major cities have shown that ReAct significantly outperforms existing methods for location and activity retrieval tasks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080814" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1600f441dc8c795f07cf1ee35c1e9deb788e482e", "sources": [ "DBLP" ], "title": "ReAct: Online Multimodal Embedding for Recency-Aware Spatiotemporal Activity Modeling", "venue": "SIGIR", "year": 2017 }, "1603d035494395812a78f2cde1b9e1db8b2cc5a2": { "authors": [ { "ids": [ "3264651" ], "name": "Chenyan Xiong" }, { "ids": [ "1686498" ], "name": "James P. Callan" }, { "ids": [ "2211939" ], "name": "Tie-Yan Liu" } ], "doi": "10.1145/3077136.3080768", "doiUrl": "https://doi.org/10.1145/3077136.3080768", "entities": [ "Backpropagation", "Entity", "Hoc (programming language)", "Interaction", "Knowledge Graph", "Knowledge base", "Learning to rank", "Ranking (information retrieval)" ], "id": "1603d035494395812a78f2cde1b9e1db8b2cc5a2", "inCitations": [ "432b36c1bec275c2778c66f9897f9e02f7d8b579", "9543239fbbc68909de74f9ffc35e5bc72c70b299", "75d8185300ec42837ba18799f936f25df7470082", "464a8916d7aea7195da3fc6c2fa2c7167084e581", "d51361359ab69ba1891ce08c97c48d5e0a666085", "7ed3ee44908eb760997ad627bae3e35e359478f3" ], "journalName": "", "journalPages": "763-772", "journalVolume": "", "outCitations": [ "57c56d38c82054413fba6f2e60abdb4764042380", "9aad05bebcd8743ea0648ca684ac6bfcdd35b764", "2d72febb59898604ee5faea88d4b942ae89ee0f0", "7161eb8d3b1cb01769a36528f9c6bddd663545a9", "994afdf0db0cb0456f4f76468380822c2f532726", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "1976c9eeccc7115d18a04f1e7fb5145db6b96002", "275ee0b2a3d9388933b48855a215129ccb5561d6", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "3734d31548756bb315e5b613cd6e9245c00b1430", "2a280a11143da9040801193040e0700f79b3bba0", "845af2c9c3d2dc7f9ecdc18a669188b8584a4d67", "08b4efb97d98f4e66d8872899661f31ea58664da", "4caa943ab1ceebf9e105148f702344e7fad344a9", "4c967da691861cd41cb9b24d8f085257bdca8e69", "04b52c8230c3f9f4f4032b06458069d81c8f07b2", "4fa0d9c4c3d17458085ee255b7a4b7c325d59e32", "617279b61a0a2b1e60d73dd799068dfdb7de15cd", "7902365152dd93d2b200e3adb14a97169dc78e89", "2865b91766d38ca4391a88dcebdedf78bf09139d", "11e1e395431c73a3a961b0c5f30b0227f98873b0", "8ecc044d920df247fbd455b752fd7cc0f7363ad7", "9b087350cfa1d07bc97329ddbf73b8cd0dc616ab" ], "paperAbstract": "This paper presents a word-entity duet framework for utilizing knowledge bases in ad-hoc retrieval. In this work, the query and documents are modeled by word-based representations and entity-based representations. Ranking features are generated by the interactions between the two representations, incorporating information from the word space, the entity space, and the cross-space connections through the knowledge graph. To handle the uncertainties from the automatically constructed entity representations, an attention-based ranking model AttR-Duet is developed. With back-propagation from ranking labels, the model learns simultaneously how to demote noisy entities and how to rank documents with the word-entity duet. Evaluation results on TREC Web Track ad-hoc task demonstrate that all of the four-way interactions in the duet are useful, the attention mechanism successfully steers the model away from noisy entities, and together they significantly outperform both word-based and entity-based learning to rank systems.", "pdfUrls": [ "http://ai2-website.s3.amazonaws.com/publications/Word-Entity-Duet-Representation-for-Document-Ranking.pdf", "http://doi.acm.org/10.1145/3077136.3080768", "https://arxiv.org/pdf/1706.06636v1.pdf", "http://www.cs.cmu.edu/~callan/Papers/sigir17-Chenyan-Xiong-b.pdf", "http://arxiv.org/abs/1706.06636" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1603d035494395812a78f2cde1b9e1db8b2cc5a2", "sources": [ "DBLP" ], "title": "Word-Entity Duet Representations for Document Ranking", "venue": "SIGIR", "year": 2017 }, "1624bfc7d972303f9c5faf7913e7f2d7494562c2": { "authors": [ { "ids": [ "3430743" ], "name": "Zhaofan Qiu" }, { "ids": [ "3202968" ], "name": "Yingwei Pan" }, { "ids": [ "2053452" ], "name": "Ting Yao" }, { "ids": [ "1724211" ], "name": "Tao Mei" } ], "doi": "10.1145/3077136.3080842", "doiUrl": "https://doi.org/10.1145/3077136.3080842", "entities": [ "Adversary (cryptography)", "Artificial neural network", "COMEFROM", "Convolution", "Convolutional neural network", "Display resolution", "End-to-end principle", "Experiment", "Generative adversarial networks", "Hash function", "Image retrieval", "Nearest neighbor search", "Semi-supervised learning", "Synthetic data", "Test set", "Triplet state" ], "id": "1624bfc7d972303f9c5faf7913e7f2d7494562c2", "inCitations": [ "19be4580df2e76b70a39af6e749bf189e1ca3975" ], "journalName": "", "journalPages": "225-234", "journalVolume": "", "outCitations": [ "0cf6d01642ae8d0b415095c4b57e400775cfbee0", "794d506a2ad89b89ae741773011bc1de3b62bc51", "eb0c64244dcf238a2cbf479ab2fdc9047fc80bc5", "39e019e4d1863bcf809a1cdb3fa366f770277cb2", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0823b293d13a5efaf9c3f37109a4a6018d05d074", "095db71a7a377c62695fa2b3e995e83474e59040", "5287d8fef49b80b8d500583c07e935c7f9798933", "27c64de4abf0a162366761f38b59961e888f7ec0", "1c8206df8d5ffaa8c4d3598718bf361556a6d651", "87a99f1e27c8f1a13f5c1971f4c38b0fa59456ef", "2c258eec8e4da9e65018f116b237f7e2e0b2ad17", "0ec905e3d2751674dcfde4f8d9882c88eb07a1ff", "454cb7c8f4e9c777b1062e71cf3bef717b941657", "12d0c11d546d91e776a170898ebf3a38c010695c", "0cd032a93890d61b9bd187119abee0d6aeb899f7", "219b7b157f2a559ecdffe21c2a0edf5285931298", "1c799eca7983c62f7815ac5f41787b3e552567b6", "ba753286b9e2f32c5d5a7df08571262e257d2e53", "213d7af7107fa4921eb0adea82c9f711fd105232", "061356704ec86334dbbc073985375fe13cd39088", "478815622d22d85b0ade98c59b6ac78c3fb1ac21", "1710c43395bb4fb0417854e87d0b171a849f4e7c", "56c05bd2779fa3738c71d82e272d4ebd1d8bfa42", "35756f711a97166df11202ebe46820a36704ae77", "6184ddbe780cb934f036b04dd1d28226b6bcbcce" ], "paperAbstract": "Hashing has been a widely-adopted technique for nearest neighbor search in large-scale image retrieval tasks. Recent research has shown that leveraging supervised information can lead to high quality hashing. However, the cost of annotating data is often an obstacle when applying supervised hashing to a new domain. Moreover, the results can suffer from the robustness problem as the data at training and test stage may come from different distributions. This paper studies the exploration of generating synthetic data through semi-supervised generative adversarial networks (GANs), which leverages largely unlabeled and limited labeled training data to produce highly compelling data with intrinsic invariance and global coherence, for better understanding statistical structures of natural data. We demonstrate that the above two limitations can be well mitigated by applying the synthetic data for hashing. Specifically, a novel deep semantic hashing with GANs (DSH-GANs) is presented, which mainly consists of four components: a deep convolution neural networks (CNN) for learning image representations, an adversary stream to distinguish synthetic images from real ones, a hash stream for encoding image representations to hash codes and a classification stream. The whole architecture is trained end-to-end by jointly optimizing three losses, i.e., adversarial loss to correct label of synthetic or real for each sample, triplet ranking loss to preserve the relative similarity ordering in the input real-synthetic triplets and classification loss to classify each sample accurately. Extensive experiments conducted on both CIFAR-10 and NUS-WIDE image benchmarks validate the capability of exploiting synthetic images for hashing. Our framework also achieves superior results when compared to state-of-the-art deep hash models.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/fp475-qiuA.pdf", "http://doi.acm.org/10.1145/3077136.3080842" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1624bfc7d972303f9c5faf7913e7f2d7494562c2", "sources": [ "DBLP" ], "title": "Deep Semantic Hashing with Generative Adversarial Networks", "venue": "SIGIR", "year": 2017 }, "1625054c364a02597f8528c226ad0160ffe299c6": { "authors": [ { "ids": [ "34973108" ], "name": "Patrick Flick" }, { "ids": [ "1740375" ], "name": "Srinivas Aluru" } ], "doi": "10.1109/IPDPS.2017.62", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.62", "entities": [ "Algorithm", "All nearest smaller values", "Artificial neural network", "Best, worst and average case", "Computational biology", "Data structure", "Distributed memory", "Information retrieval", "Natural language processing", "Parallel algorithm", "Shared memory", "Suffix tree", "Time complexity", "Worst-case complexity" ], "id": "1625054c364a02597f8528c226ad0160ffe299c6", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "12-21", "journalVolume": "", "outCitations": [ "44fdf50be9c3248b12f83072bd3db6c7914825a4", "643b57f4b0421a9efb3bf7397eb24822a2a7b882", "a5e766075757d85252b281bf13916fe80d43c32e", "a7705a6819179daa39bb717775548328f04e05a8", "84e8e291f307c3db9f4aa9da639a88ffd200f017", "c87671177ebdc6b8f955a82d1e3e60dbaef22b3f", "51f4bb5673ccddbf4bf27b02e2b16b77a175ac94", "d206440c08d9d27b7506d282317ba72a3196ca48", "a6e02957b814b301c3ac1ffdb5a3c36e3426b3d7", "4c67449de5ad4510ea68ad615ab90af613d18bab", "798cacfb9a8ca00806a876fcc6397124406a2234", "c8e2d72db05ad9ec096058d7a5ed4bcdbf37ec8a", "52283dd94ccf049d83f11b8af416be8488886df3", "4094d9add285c020f7b7639dace66eaabc9d7394", "8dfc1a49894632a27a88490db18441180a215fe2", "73921604081880cf903eab568341ebbd1525713d", "377c084a1c6ce806292f27856029b1ef19408b88" ], "paperAbstract": "A Suffix tree is a fundamental and versatile string data structure that is frequently used in important application areas such as text processing, information retrieval, and computational biology. Sequentially, the construction of suffix trees takes linear time, and optimal parallel algorithms exist only for the PRAM model. Recent works mostly target low core-count shared-memory implementations but achieve suboptimal complexity, and prior distributed-memory parallel algorithms have quadratic worst-case complexity. Suffix trees can be constructed from suffix and longest common prefix (LCP) arrays by solving the All-Nearest-Smaller-Values(ANSV) problem. In this paper, we formulate a more generalized version of the ANSV problem, and present a distributed-memory parallel algorithm for solving it in O(n/p +p) time. Our algorithm minimizes the overall and per-node communication volume. Building on this, we present a parallel algorithm for constructing a distributed representation of suffix trees, yielding both superior theoretical complexity and better practical performance compared to previous distributed-memory algorithms. We demonstrate the construction of the suffix tree for the human genome given its suffix and LCP arrays in under 2 seconds on 1024 Intel Xeon cores.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1625054c364a02597f8528c226ad0160ffe299c6", "sources": [ "DBLP" ], "title": "Parallel Construction of Suffix Trees and the All-Nearest-Smaller-Values Problem", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "162be49582b29ed18775f089810fb8cdc2ed6808": { "authors": [ { "ids": [ "39145389" ], "name": "Ashraf Mahgoub" }, { "ids": [ "35668056" ], "name": "Paul Wood" }, { "ids": [ "33785423" ], "name": "Sachandhan Ganesh" }, { "ids": [ "34054120" ], "name": "Subrata Mitra" }, { "ids": [ "34742961" ], "name": "Wolfgang Gerlach" }, { "ids": [ "2565819" ], "name": "Travis Harrison" }, { "ids": [ "30506753" ], "name": "Folker Meyer" }, { "ids": [ "1732163" ], "name": "Ananth Grama" }, { "ids": [ "1679009" ], "name": "Saurabh Bagchi" }, { "ids": [ "2228303" ], "name": "Somali Chaterji" } ], "doi": "10.1145/3135974.3135991", "doiUrl": "https://doi.org/10.1145/3135974.3135991", "entities": [ "Algorithm", "Apache Cassandra", "Artificial neural network", "Big data", "Brute-force search", "Configuration management", "Data model", "Database", "Genetic algorithm", "Metagenomics", "Middleware", "NoSQL", "Semi-structured data", "Service control point", "Surrogate model", "Throughput" ], "id": "162be49582b29ed18775f089810fb8cdc2ed6808", "inCitations": [], "journalName": "", "journalPages": "28-40", "journalVolume": "", "outCitations": [ "c7ef1e1c9ec1e8ed17b6b634b6963b37838b9614", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "d24359f702ff56d53dbecf6197b5af76bedbe8d0", "8913c7f26429b0e69ca4ad4ba07f7ec5c5567659", "8e14f79db4b0d7109ad4db46771ed8673cd265b6", "81ae1752547df73e8b83315515e11967035b8db3", "9b707fda4ef927f92d9ecb86dea82bd1ede59d49", "d7a86ea11c7683a46c324935f82a3065327974ef", "7fed1be51db2c780cd5504f519a5ab9bd484ac56", "3fbc9316a792974ba103be76702a6ce5c8d33f2d", "384b6bb8d2ea993ff3cb092ff41bb1c4d5308720", "4bfc9a7abd7caf5fb6ab35e3c0e7a7d96adaa1b8", "1b2457906994b5942b0ecc6e0ca38e2e3b2450c7", "9aa0d7253574e50fe3a190ccd924433f048997dd", "9c422cea3027bcb20f48ec94873edf2e071d3dd3", "24cece61e2128780072bc58f90b8ba47f624bc27", "51d6588ff7c1994f035a5a3be8d2e8ca62b78f22", "8162d4f3bfce2055c9a53c267af66103c3bfd167", "1f1f47da8fff8da53589d7eab36d6bae32b2c3d2", "6f2270c81885e2f5b3b6bc86f0b2099af9c55534", "841dbff787e715ffb5b4c6b5a9841e0a2da0f1a0", "13d98a5621e1249032a7ddc3a167dc15841a8ada", "6b9334e099770bb5e26e408d2bb238cd613afd71", "b93dd6f80d7685c21025aaaa27e972a2af6287d0", "40fecfef456c760912685b372151732b38e69d6e", "054383d121fef561a3d83882e5f2bae53bbe1a66", "18a5f443299784479e78d9e77f175af57cb2fa2b", "a6d3ceea74deba7ba762ec051492d79794933e98", "86c83cf81f2df8f6dc4c05843bb2412415e4655b", "1ccaac0fdcc5ab37a45d0cc616feeaa67a3d4ca1", "3a57e1242fdd3de7604f5b4fe21a8cd152dfce4f", "bc3f207f50ff335be514f66d3c3031b4040fabc2", "d82ea66f0de4d97abc16b6807acf9ae6c9061e77" ], "paperAbstract": "High performance computing (HPC) applications, such as metagenomics and other big data systems, need to store and analyze huge volumes of semi-structured data. Such applications often rely on NoSQL-based datastores, and optimizing these databases is a challenging endeavor, with over 50 configuration parameters in Cassandra alone. As the application executes, database workloads can change rapidly from read-heavy to write-heavy ones, and a system tuned with a read-optimized configuration becomes suboptimal when the workload becomes write-heavy.\n In this paper, we present a method and a system for optimizing NoSQL configurations for Cassandra and ScyllaDB when running HPC and metagenomics workloads. First, we identify the significance of configuration parameters using ANOVA. Next, we apply neural networks using the most significant parameters and their workload-dependent mapping to predict database throughput, as a surrogate model. Then, we optimize the configuration using genetic algorithms on the surrogate to maximize the workload-dependent performance. Using the proposed methodology in our system (Rafiki), we can predict the throughput for unseen workloads and configuration values with an error of 7.5% for Cassandra and 6.9-7.8% for ScyllaDB. Searching the configuration spaces using the trained surrogate models, we achieve performance improvements of 41% for Cassandra and 9% for ScyllaDB over the default configuration with respect to a read-heavy workload, and also significant improvement for mixed workloads. In terms of searching speed, Rafiki, using only 1/10000-th of the searching time of exhaustive search, reaches within 15% and 9.5% of the theoretically best achievable performances for Cassandra and ScyllaDB, respectively---supporting optimizations for highly dynamic workloads.", "pdfUrls": [ "https://engineering.purdue.edu/dcsl/presentations1/2017/final_middleware17_rafiki_ashraf.pdf", "https://engineering.purdue.edu/dcsl/publications/papers/2017/final_rafiki_middleware17_cameraready.pdf", "http://doi.acm.org/10.1145/3135974.3135991" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/162be49582b29ed18775f089810fb8cdc2ed6808", "sources": [ "DBLP" ], "title": "Rafiki: a middleware for parameter tuning of NoSQL datastores for dynamic metagenomics workloads", "venue": "Middleware", "year": 2017 }, "163b1068e22a1e4588dc7429a893c19ece344ef5": { "authors": [ { "ids": [ "40456757" ], "name": "Md E. Haque" }, { "ids": [ "1772774" ], "name": "Yuxiong He" }, { "ids": [ "1767761" ], "name": "Sameh Elnikety" }, { "ids": [ "1678732" ], "name": "Thu D. Nguyen" }, { "ids": [ "2118138" ], "name": "Ricardo Bianchini" }, { "ids": [ "1766093" ], "name": "Kathryn S. McKinley" } ], "doi": "10.1145/3123939.3123956", "doiUrl": "https://doi.org/10.1145/3123939.3123956", "entities": [ "Central processing unit", "Clock rate", "Control theory", "Dynamic frequency scaling", "Dynamic voltage scaling", "Frequency scaling", "Multi-core processor", "Operational amplifier", "Provisioning", "Scheduling (computing)", "Throughput" ], "id": "163b1068e22a1e4588dc7429a893c19ece344ef5", "inCitations": [ "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "625-638", "journalVolume": "", "outCitations": [ "1d3feee229f4bea3c8676b7ba560c46f233bc850", "c92cf7db5375c39e10aac25a0ad24185124cd848", "46742c000a65f676c00ec4e33d19d535a1c29dd7", "1a481ad18cbbc671e44679852067b763c66504b3", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "0d683085618e654a173b3590c4d2b431569cbfb6", "606f69c26c919d2b52272798c6350897879f5cfa", "6e669e90a34c4179f9364406d8a7a7f855745086", "d2fe3f26505c106cb2f61c86ba0a2dc316b0868f", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "c03998fba80568419db43e7e9fd78f8f7d1798dc", "08632fe2b934ed15d3499e7321282c81adc2c390", "4ffd50725b9cdff4ab0f13c9182cf3fdb671e76c", "5f36eab0d5936eefa6ace0d795312d3f1b879ddc", "e3fa998bede8f5db8d71349c7a0d53ad0aa4f7f7", "c7d468a29c281bf084d5a33774bd579b74ec66df", "18e9a7eea9c714c24152b9c6dd5cd12fb2c4b495", "00ab6bb0df7fd605038d64eb5798b31481a39dd0", "8f8a07137d8b015fb8d3fed6ab0294c05a5a3401", "167c651a235cf567ee8ca19b8d0e4d2f19e01b42", "3fd85d5f5217b7df40e8fd6a8ef7d285fc4bb7e8", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "e45246e686b5e15133fcfbe2c0eb61ac4a90c35d", "362d884ff43d8c7cd6bce184944cfc04cdd57c18", "3424f1f9679833a2f2b0d35dd3cbf97548ec0258", "1b1ea9f3f15f5160b77aa2177e7fdeb6eeed911a", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "5a9c74d872efbadf4fae3390d604b6437e7f7ac8", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "27f8ac77b89986f7a24f929b200b6a358b8f7d01", "1ed676aab20ee65f27718f6162ab589f319de1a4", "480a952f7d24cf6d3ccda62439424eea6a8fd469", "72b7fea9e527f18182ba44281b44de790e32d155", "41690be86b39c55a26ea056261513ddd726d6601", "352a8957005dc5519b15ed1870751ec494d66395", "56c726104fa2df0d20bf0d99a4592242716f2e28", "109df0e8e5969ddf01e073143e83599228a1163f", "00651cb1e9f1f5127f34bd0184b3c83a0693783d", "53a5485c197ec44f6fb9f0308cac7716e50d1584", "07f3b8cfd59624acf80e16794bd3f2bc69acd8e7", "6f090d59bde17b7604985acf38e26785e794bcc0", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "2c9b77a063a3459ed8f3be0c0066724a38e225e5" ], "paperAbstract": "Interactive service providers have strict requirements on high-percentile (tail) latency to meet user expectations. If providers meet tail latency targets with less energy, they increase profits, because energy is a significant operating expense. Unfortunately, optimizing tail latency and energy are typically conflicting goals. Our work resolves this conflict by exploiting servers with per-core Dynamic Voltage and Frequency Scaling (DVFS) and Asymmetric Multicore Processors (AMPs). We introduce the Adaptive Slow-to-Fast scheduling framework, which matches the heterogeneity of the workload --- a mix of short and long requests --- to the heterogeneity of the hardware --- cores running at different speeds. The scheduler prioritizes long requests to faster cores by exploiting the insight that long requests reveal themselves. We use control theory to design threshold-based scheduling policies that use individual request progress, load, competition, and latency targets to optimize performance and energy. We configure our framework to optimize Energy Efficiency for a given Tail Latency (EETL) for both DVFS and AMP. In this framework, each request self-schedules, starting on a slow core and then migrating itself to faster cores. At high load, when a desired AMP core speed s is not available for a request but a faster core is, the longest request on an s core type migrates early to make room for the other request. Compared to per-core DVFS systems, EETL for AMPs delivers the same tail latency, reduces energy by 18% to 50%, and improves capacity (throughput) by 32% to 82%. We demonstrate that our framework effectively exploits dynamic DVFS and static AMP heterogeneity to reduce provisioning and operational costs for interactive services.", "pdfUrls": [ "http://www.cs.utexas.edu/users/mckinley/papers/fof-micro-2017.pdf", "http://doi.acm.org/10.1145/3123939.3123956" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/163b1068e22a1e4588dc7429a893c19ece344ef5", "sources": [ "DBLP" ], "title": "Exploiting heterogeneity for tail latency and energy efficiency", "venue": "MICRO", "year": 2017 }, "16666c536b04035b013c718bd91aad3594b4b894": { "authors": [ { "ids": [ "3257188" ], "name": "Javier Picorel" }, { "ids": [ "2217840" ], "name": "Djordje Jevdjic" }, { "ids": [ "1701364" ], "name": "Babak Falsafi" } ], "doi": "10.1109/PACT.2017.56", "doiUrl": "https://doi.org/10.1109/PACT.2017.56", "entities": [ "Data-intensive computing", "Gigabyte", "Limiter", "MPU-401", "Memory address", "Memory management (operating systems)", "Microprocessor", "Page (computer memory)", "Page table", "Serialization" ], "id": "16666c536b04035b013c718bd91aad3594b4b894", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "303-317", "journalVolume": "", "outCitations": [ "0ec0e80ebb61ddf97dc26cea65e5013b6de998b1", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "33196b69eeec351efd5178eae5da92979bdc6fd7", "30c7ccf9860911b37bb6bf17248e2d84afc4b4ba", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "7b943438994370dc4903ce28e359ff98fc23027f", "0f44833eb9047158221e7b3128cde1347b58ccd6", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "1bed30d161683d279780aee34619f94a860fa973", "4678cdcf7e57c1563379ac7cc344254f01ace572", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "9e2dca06b0ea81c82aa749f9bc7bad220247ebe6", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "27ee92f60f650feda893a853d4e552a1e9dc2979", "61d13a9a4a6cb66e2d5fcf4f75d97570dca8f3fe", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "2a526bdacd04ea0d6d31cd0e62ed0a772f7d57c3", "3f63a2362b1fabc83194d10d6b5a0b2a56c1799b", "40f85cbe67ce1ce89009985e9caed648dd08c12e", "5ece19ddc8abc5454426deece280d0750972c2da", "d9043a6c844905687ac72054d83d7680a82ece9d", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "6017dd9b32e3c58b8a85c44971d6d82a1e3560cc", "1b3bcc24a88caf011201e1ed2a0f94f36a415d50", "2bb0f3f198b6f05454a2b4f8b6d3c3ed1c559371", "01a337488e77c2fec3037cf7432b7ac10e39b45c", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "0fdc029342552729b72e1245a22b60ac96020124", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "06902cb95ede2c305db4000852014f276b25c082", "30db3e0e6add0c2c699e863e56eb8b5e89b10951", "1930a2ecb0cd7439647cfe3e79d4bbad198f7697", "0653e2ed9f683868cb4539eb8718551242834f6b", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "f29dac2e26273532c81c933f091c7a60b9480f94", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "83d8f3fe2468adba47386a90e56e72373f757744", "1b1ea9f3f15f5160b77aa2177e7fdeb6eeed911a", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "19de90c933c20849c85d5428c8a643210b97ec83", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "28af524636137424ad574afa38463b4771e6f006", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "343a384d5476ead9496f96559aba5ad09e95e01e", "42c29d08d9a7bcf5b481300cb06974d6067de5be", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "7a978f2902460e732c50c36a171deb11733df1fc", "369702e8b28a410f7cf4347a1c26e2ea2e0a6a79", "d63e4cada8347686372d63a3d00afa89a1515a31", "1cb0679ae82be093268747da0f634281ea6a41df", "59ca42e1911be417863d0f7068b89e1e59189cc9", "73115d3d434d81ba79f125a853a993598c602d26", "04dcd8acdc16e42463e783ea5bc8283607ccee3f", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "01a7c93e6b5d65b9f8e9b9db8b556964dcf9bf1f", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "07add9c98a979e732cfa215c901adb1975f3f43a", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "08d800d993514c09e1bcf48f244e40346abdc361", "776846c6a922e3a9ae25d03e66dda5bba772f576" ], "paperAbstract": "Memory and logic integration on the same chip is becoming increasingly cost effective, creating the opportunity to offload data-intensive functionality to processing units placed inside memory chips. The introduction of memory-side processing units (MPUs) into conventional systems faces virtual memory as the first big showstopper: without efficient hardware support for address translation MPUs have highly limited applicability. Unfortunately, conventional translation mechanisms fall short of providing fast translations as contemporary memories exceed the reach of TLBs, making expensive page walks common.In this paper, we are the first to show that the historically important flexibility to map any virtual page to any page frame is unnecessary in today's servers. We find that while limiting the associativity of the virtual-to-physical mapping incurs no penalty, it can break the translate-then-fetch serialization if combined with careful data placement in the MPU's memory, allowing for translation and data fetch to proceed independently and in parallel. We propose the Distributed Inverted Page Table (DIPTA), a near-memory structure in which the smallest memory partition keeps the translation information for its data share, ensuring that the translation completes together with the data fetch. DIPTA completely eliminates the performance overhead of translation, achieving speedups of up to 3.81x and 2.13x over conventional translation using 4KB and 1GB pages respectively.", "pdfUrls": [ "https://arxiv.org/pdf/1612.00445v1.pdf", "https://arxiv.org/pdf/1612.00445v2.pdf", "https://infoscience.epfl.ch/record/230437/files/EPFL_TH7875.pdf?version=1", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.56", "http://arxiv.org/abs/1612.00445", "https://arxiv.org/pdf/1612.00445.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16666c536b04035b013c718bd91aad3594b4b894", "sources": [ "DBLP" ], "title": "Near-Memory Address Translation", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "166f30c1eee33b4fd4aebee01b0117cfaeddd29c": { "authors": [ { "ids": [ "1785569" ], "name": "Michael Elkin" } ], "doi": "10.1145/3087801.3087823", "doiUrl": "https://doi.org/10.1145/3087801.3087823", "entities": [ "Algorithm", "Deterministic algorithm", "Distributed minimum spanning tree", "Ink Serialized Format", "Line graph", "Minimum spanning tree", "Polylogarithmic function", "Randomized algorithm", "Spanning tree", "Time complexity" ], "id": "166f30c1eee33b4fd4aebee01b0117cfaeddd29c", "inCitations": [ "63f85cffda35c59ba2ce43901e634ceabe6a1c25", "dc6eac81c50ab561185844f59f6face43a59e876", "da5ffa5b037a39f8c599630c12ecbdda46213725", "2af2f01d056dd2c95182046ce2e7ee342b8182c6", "4a0963249a012d38ae8169bd2f552a7a59121105", "4f3369304f33baa8a4ab755de21a7d1d0d1f11ac", "57e0902f333023c1a242753577cedb0457d96fa3", "a789b21da70602d929f81a57c2653237be5848b6" ], "journalName": "", "journalPages": "157-163", "journalVolume": "", "outCitations": [ "fdbc86c139599783a8defd00989df1c39c605602", "ae51cb991ea1820115cc2e42dbe1b2468bcaff05", "8f7c839a3b7e9716db320748572e98b3fb01fa2b", "42941c1ce2a460828ceb80781da1b9300e7833a4", "037fa2c0495830857ed0e93ca05af754723c4165", "fd63237a0b1914ea4ff25f95083cc301b40cab05", "0b291c329178573c7c78a653ed3282a5350cae58", "73076dc931d4d337519f0a6f12afde63396490f4", "1000a23696290bb7f777c751933a91772ded4a07", "46b1390a0464a6f9dd73a34bc2688b839980c5fc", "7c8b19add74d0d94b6b2199b80b30c181d8c01c9", "6776a89b92b845ac591a945795ae1ff7d1540b1b", "a73740f7dfec1cb431b373d78b6a69b9776840b5", "2fce8527c81099869cc21c6d236b1ca72be6693b", "1565669b4e162eb0e8790613364bee17a3048ab6", "611c97494c9deabe1a5326ff4df47bd145457866", "44039a59510de5cfb055285acbf9273143acabc7", "cdfd5de78df6a2b97b05001de962c7112c736a51", "23a19cc949fb43f5420c7b1ab7ea5be2187b3bc9", "45e0b544421fec82f71e196db60258f0f71b7786", "17d3d569ff60ef353c50dc498f8fe31bc2884615", "1685f6fb2f162ffd995b4aafcaf96b5cc5bf08fc", "d49b5c88f0775329445f4be7054936e8cf382d1d", "d7d1bfd55973cef5fd93d4398cf997e584d75e69", "087b3230a146589bdab5065b97adcaf5b90a72c7", "0d34d16f8ed9395184952402b25d06f75106e03c", "68c5797320e4272e4d0a96440eba5558be3923e7" ], "paperAbstract": "Distributed minimum spanning tree (MST) problem is one of the most central and fundamental problems in distributed graph algorithms. Garay et al. [GKP98, KP98] devised an algorithm with running time O(D + \u221a n \u00b7 log\u2217 n), where D is the hop-diameter of the input nvertex m-edge graph, and with message complexity O(m+n). Peleg and Rubinovich [PR99] showed that the running time of the algorithm of [KP98] is essentially tight, and asked if one can achieve near-optimal running time together with near-optimal message complexity. In a recent breakthrough, Pandurangan et al. [PRS16] answered this question in the affirmative, and devised a randomized algorithm with time \u00d5(D + \u221a n) and message complexity \u00d5(m). They asked if such a simultaneous timeand message-optimality can be achieved by a deterministic algorithm. In this paper, building upon the work of [PRS16], we answer this question in the affirmative, and devise a deterministic algorithm that computes MST in time O((D + \u221a n) \u00b7 log n), using O(m \u00b7 logn + n logn \u00b7 log\u2217 n) messages. The polylogarithmic factors in the time and message complexities of our algorithm are significantly smaller than the respective factors in the result of [PRS16]. Also, our algorithm and its analysis are very simple and self-contained, as opposed to rather complicated previous sublinear-time algorithms [GKP98, KP98, Elk04b, PRS16]. \u2217This research was supported by the ISF grant No. (724/15).", "pdfUrls": [ "https://arxiv.org/pdf/1703.02411.pdf", "http://arxiv.org/abs/1703.02411", "https://arxiv.org/pdf/1703.02411v1.pdf", "http://doi.acm.org/10.1145/3087801.3087823" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/166f/30c1eee33b4fd4aebee01b0117cfaeddd29c.pdf", "s2Url": "https://semanticscholar.org/paper/166f30c1eee33b4fd4aebee01b0117cfaeddd29c", "sources": [ "DBLP" ], "title": "A Simple Deterministic Distributed MST Algorithm, with Near-Optimal Time and Message Complexities", "venue": "PODC", "year": 2017 }, "168ff6d977c0e2267223f189cfa70555851def54": { "authors": [ { "ids": [ "2891738" ], "name": "Rogers Jeffrey Leo John" }, { "ids": [ "2406599" ], "name": "Navneet Potti" }, { "ids": [ "2042232" ], "name": "Jignesh M. Patel" } ], "doi": "", "doiUrl": "", "entities": [ "AVA Radio Company", "Controlled natural language", "Data science", "Interface (Java)", "Natural language", "Pipeline (computing)" ], "id": "168ff6d977c0e2267223f189cfa70555851def54", "inCitations": [ "350203890dc2ea7535e876666e33eb7ec9323bef", "45cc47c1beaad4e08a85c7dfc69cb10913f824ed", "4853a26200889f033c0f509abf0f91d8cafba55b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "02a698470f476de722e78b20d0ddd92d3781bbfc", "2c6807215be611dbefe9c21ed439a9004268da0b", "4954fa180728932959997a4768411ff9136aac81", "0558c94a094158ecd64f0d5014d3d9668054fb97", "01e1fa7924b3eb76b73f1828c93805f3ba028bae", "378adc4d44939c1f93befa704273c41164972d63", "7c7ba9df3ab69f0f3502a7a873c031c8244187ee", "8ba86f29ef51b4e786e652449ae449fa68a625ee", "364fb0677a5d7083e56c0e38629a78cb94836f53", "47c27cd2c37ba331ced0b24fba43fd917d5f6d19", "973c9c46452c300d475c86245cee6cdda6a59d8b", "0a267d927cfae039cf0a9c995a59ded563344eb6", "695825823c33f54d2b2959e7855ecb6bfa9c0b49", "f106f7c38e7225f797261cb0fc3755ece2acbbd2", "4168865e09d4aa9204b114d1dcdec70dd2d1f784", "9fb9b09550dc6afcbe8b5ed05c9ced1282408112", "ac67d5f9c89d8d72fbd074f94079608220348f3f", "7e6c844fd558159b02e241d4ad03adef91ba3461", "ef10af10c87ab39e82865da94b77be64973a6de7", "412a0bb5a3baa91b62053d82c562bc172df0439f", "0544001c907a6f3a577626aee9fd2c14984d3fe3", "0d01a06340f03dd4ff1bb16ac462a58402253ebb", "60ab16c55c2353f89c59b0967ee3af123140a2b9" ], "paperAbstract": "Enterprises increasingly employ a wide array of tools and processes to make data-driven decisions. However, there are large inefficiencies in the enterprise-wide workflow that stem from the fact that business workflows are expressed in natural language but the actual computational workflow has to be manually translated into computational programs. In this paper, we present an initial approach to bridge this gap by targeting the data science component of enterprise workflows. In many cases, this component is the slowest part of the overall enterprise process, and focusing on it allows us to take an initial step in solving the larger enterprise-wide productivity problem. In this initial approach, we propose using a chatbot to allow a data scientist to assemble data analytics pipelines. A crucial insight is that while precise interpretation of general natural language continues to be challenging, controlled natural language methods are starting to become practical as natural interfaces in complex decision-making domains. In addition, we recognize that data science workflow components are often templatized. Putting these two insights together, we develop a practical system, called Ava, that uses (controlled) natural language to program data science workflows. We have an initial proof-of-concept that demonstrates the potential of our approach.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p87-john-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/168f/f6d977c0e2267223f189cfa70555851def54.pdf", "s2Url": "https://semanticscholar.org/paper/168ff6d977c0e2267223f189cfa70555851def54", "sources": [ "DBLP" ], "title": "Ava: From Data to Insights Through Conversations", "venue": "CIDR", "year": 2017 }, "16a0ff94b980d0ad9cd5ae3b9a3769c6f358842a": { "authors": [ { "ids": [ "3098251" ], "name": "Muhan Zhang" }, { "ids": [ "9527255" ], "name": "Yixin Chen" } ], "doi": "10.1145/3097983.3097996", "doiUrl": "https://doi.org/10.1145/3097983.3097996", "entities": [ "Adjacency matrix", "Algorithm", "Artificial neural network", "Experiment" ], "id": "16a0ff94b980d0ad9cd5ae3b9a3769c6f358842a", "inCitations": [ "df6bd4a73f33150aedf18db417753e1b0ac495aa", "4d086243ac132523aa287100f5490739cc97c6f9", "733f49039d1ce077a116d03633e3338125f33196" ], "journalName": "", "journalPages": "575-583", "journalVolume": "", "outCitations": [ "8cd9aa720a3a2f9dcb52ad9eb1bf258a80ce0648", "6074c1108997e0c1f97dc3c199323a162ffe978d", "26d51c7b5d666abda8b45eb1c942bbc3e0ea05f2", "1c1876754e0f1f67d652505ccfd37531488ec848", "e56f6386cf966339920a679680084e9d655eaa9a", "6abb292510ec6dfdb7a845d316c2d61539670de8", "114ea414b025a68d641efad9b74295a5625b9e7e", "5c69418969d8d5a286306f1087108dcceb50d39e", "5262fe8369992259be27165ccd09d1d31c7a4def", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "668ea7577f7205fbdd067e5df6a1722a976cf1b8", "64ea1937b118779757b21d13fef5bf5bddec7ed2", "3219fbcbe0f84e371b3266beb7678db567bd7c5a", "9aa88a8a354f1d322e242376d27d0474e50252f8", "1efdad6f91e830fd64306e4625f74191b05ef9c4", "0250b5c0f7a414dec8c7a0aa7be20c9637eeb6ec", "386de1c345b6ed30b3410edadb5f06a2dcb64b76", "7e1874986cf6433fabf96fff93ef42b60bdc49f8", "009dbf3187862352aac542bf7d61e27bce6b27f5", "e50f4d3316d13841c287dcdf5479d7820d593571", "15973b0bfdbe84d1cb6e2a35fc857a1d125a3923", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "1164ec0b8d2bd8e95b9fc07e9669ff9d4d379c64", "76c361552181f3798a3fae7485a22a333af85047", "1f0612de1f191abadf250b78cd78f884203cca5e", "39348c10c90be968357e2a6b65d5e0e479307735", "5987ac0ae8e468a402e3c2513e7779af1ab19591", "52152dac5c7320a4818b48140bfcd396e4e965b7", "2f84d432e46ed1253764e238e3038c9c791790e7" ], "paperAbstract": "In this paper, we propose a next-generation link prediction method, Weisfeiler-Lehman Neural Machine (WLNM), which learns topological features in the form of graph patterns that promote the formation of links. WLNM has unmatched advantages including higher performance than state-of-the-art methods and universal applicability over various kinds of networks. WLNM extracts an enclosing subgraph of each target link and encodes the subgraph as an adjacency matrix. The key novelty of the encoding comes from a fast hashing-based Weisfeiler-Lehman (WL) algorithm that labels the vertices according to their structural roles in the subgraph while preserving the subgraph's intrinsic directionality. After that, a neural network is trained on these adjacency matrices to learn a predictive model. Compared with traditional link prediction methods, WLNM does not assume a particular link formation mechanism (such as common neighbors), but learns this mechanism from the graph itself. We conduct comprehensive experiments to show that WLNM not only outperforms a great number of state-of-the-art link prediction methods, but also consistently performs well across networks with different characteristics.", "pdfUrls": [ "http://www.cse.wustl.edu/~muhan/papers/KDD_2017.pdf", "http://doi.acm.org/10.1145/3097983.3097996" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16a0ff94b980d0ad9cd5ae3b9a3769c6f358842a", "sources": [ "DBLP" ], "title": "Weisfeiler-Lehman Neural Machine for Link Prediction", "venue": "KDD", "year": 2017 }, "16b21c28b2de6c1f7786a6fde8eaa2e55f2f35d4": { "authors": [ { "ids": [ "2912682" ], "name": "Bilge Acun" }, { "ids": [ "2306354" ], "name": "Eun Kyung Lee" }, { "ids": [ "1792526" ], "name": "Yoonho Park" }, { "ids": [ "1731961" ], "name": "Laxmikant V. Kal\u00e9" } ], "doi": "10.1109/HiPC.2017.00020", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00020", "entities": [ "Air cooling", "Algorithm", "Artificial neural network", "Computer cooling", "Computer fan control", "Data center", "Load balancing (computing)", "Preemption (computing)", "Server (computing)" ], "id": "16b21c28b2de6c1f7786a6fde8eaa2e55f2f35d4", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "94-103", "journalVolume": "", "outCitations": [ "e0140bbf28f231ccddb639b299ba515f1f5f0e15", "14bd3627a85b658ea1b8450039df7fe0fb57379e", "b73e812786d3a92fa084dc7979caf7375b5b347d", "23a96fc4fd2d494a3ddf6d2438bef3ba768bc157", "025b0273eb6ccd57e6a949fe44225ca5d8041cf9", "916ceefae4b11dadc3ee754ce590381c568c90de", "098796d824396aed767fe4b7913c68eb6f7d9234", "1a84722c3b8ecba1b22c8ff249d61a231819c916", "6e5c8c274850d5e7e1a4d01a13b4d3d96d037227", "0c36ce0ea8ec9070edef08d833e2bd18ff919b20", "6c688369a155c72bcfb154e5b4d219bb09dcbf1b", "28defd15c54f603a5415406f6be771f8366ec6be", "c78f7a2e573d189c76d91958204cfe96248f97f9", "90b757b41aac8e5d5cecbde427e894b94bd6020a", "4cc39152b31cf39ff7ae65dff5b7badfd7c05d92", "1e8233a8c8271c3278f1b84bed368145c0034a35", "869efd7e0d1d00cc59e94089a29d492bda8b35bb", "ae70d1a24fc2a21d0b9b395d753ef81244d041f3", "4da726d463a9baaa7eac989544673d4bb6e1de74", "943588e0df950203d281bfd0c2f9c43673dfef26", "1621cedc045f66217bed460cf23c32d54b0af210", "81ae3970c9eaae058770420e7ebb31f97af498b5", "1585eaffcf9c9836eb1607e279e43ce2793e59a0" ], "paperAbstract": "Increasing scale of data centers and the density of server nodes pose significant challenges in producing power and energy efficient cooling infrastructures. Current fan based air cooling systems have significant inefficiencies in their operation causing oscillations in fan power consumption and temperature variations among cores. In this paper, we identify the cause these problems and propose proactive cooling mechanisms to mitigate the power peaks and temperature variations. An accurate temperature prediction model lies behind the basis of our solutions. We use a neural network-based modeling approach for predicting core temperatures of different workloads, under different core frequencies, fan speed levels, and ambient temperature. The model provides guidance for our proactive cooling mechanisms. We propose a preemptive and decoupled fan control mechanism that can remove the power peaks in fan power consumption and reduce the maximum cooling power by 53.3% on average as well as energy consumption by 22.4%. Moreover, through our decoupled fan control method and thermal-aware load balancing algorithm, we show that temperature variations in large scale platforms can be reduced from 25 C to 2 C, making cooling systems more efficient with negligible performance overhead.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00020" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16b21c28b2de6c1f7786a6fde8eaa2e55f2f35d4", "sources": [ "DBLP" ], "title": "Support for Power Efficient Proactive Cooling Mechanisms", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "16b4a0d02f844e27a30dd722811442dd8e6a580d": { "authors": [ { "ids": [ "38515566" ], "name": "Chaz Lever" }, { "ids": [ "3048941" ], "name": "Platon Kotzias" }, { "ids": [ "1706558" ], "name": "Davide Balzarotti" }, { "ids": [ "40008021" ], "name": "Juan Caballero" }, { "ids": [ "2805955" ], "name": "Manos Antonakakis" } ], "doi": "10.1109/SP.2017.59", "doiUrl": "https://doi.org/10.1109/SP.2017.59", "entities": [ "Computer security incident management", "Digital footprint", "Malware", "Malware analysis", "Network traffic control", "Potentially unwanted program", "Signal processing", "Threat (computer)", "Trace theory" ], "id": "16b4a0d02f844e27a30dd722811442dd8e6a580d", "inCitations": [ "80efa366c256e7a4ecbbe10e308f275d17be5a3c", "061c38987c85b28d072fd0c2f94089b5992135cd", "7b9f4e98fef61f5caa63dd04a554945e43860d56", "191cef5d1d84b81c8eb77119c7029fa74d23d9bc", "c284db823f7fd79444c6cf378949201479311789", "0a71dda39e97c9a4b8c4c88a135b9cda5bae588b", "15c76d5aade357cbe4c3c76adb00d478f50d7a65" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "788-804", "journalVolume": "", "outCitations": [ "022d706692916789172fec132e04afea4ca0753c", "21066aefc6f4ce68c889336a476600248c5a8fc0", "261360b612878b3e249b5357cef159385024e637", "2ec814320c8ee12cd002d9b5f40facbeffd05ecb", "2b44ed62bfcca19c448d5a53da723664133063c7", "10005c68cd81341b543a761e7cb7f4730127f10a", "192adf536648ad635ecb3768b943895579bcc7fe", "35347e426812f44addc5885ba54c9d48ca14fb72", "5626647d1718dfaf9c3d604e2e99b43b64813f92", "01e1cbc042ba2c0b6fa73e41cf0a5ba2e972d0cf", "7ddee6f70300cf6bc5860e6aa35ea6d79ed9cb6e", "225bfbba17ab76e807fabb87c95b71472738e3ba", "6e9f6466a125587b4eeeb3845a5d8afe8b1e902c", "35060a869ea38caf58426ea0c6aa65e59021f12a", "03fca0c36e12df46ed7b83f6ab010296a1b1e4ff", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "28013fc6979d483895e2998d05a7373807290d3e", "6ef3b5a0af396e42711aab8ffbfb1728c2cb95b2", "129ed742b496b23efdf745aaf0c48958ef64d2c6", "12f4f695491536ef4a3b9b1f9addb92b0cd2195b", "2e6dd802446d9c83bc40d058cffb78b1c506658d", "0224d4ed379f82dd0aade68f40410967dec6c6ff", "69349684bf61888dc9fe5ff679ff1c7572d2d535", "39b24b34ee1b0071f3fade608f3b2d9fa41fd050", "0bb1bb38263368784df02ca4546fba3ea12c7c0e", "519a022f6103a68331402f499a9bc9447ef70995", "4c6afa1af19482d9f4b18ad0093ec02c04177e70", "143cd817835243e873f82f28367c8866f779187d", "f74804eaf20b71da1ad2ebbbb429595c133459c8", "98b24163592c4d3e95a44ad6b8c567cbed593c4f", "31e618da461317bcce5799de7ba1b2ef0d5488b2", "6c5d03568e012a95c5a663309c8c21ff1e07e53f", "eddfa6492ad90f2ce6b6f8b5905e1eb23fd91a2a", "884a1fed267162e8659fe9ee5b8a9c161d407c50", "31478e07f1599d9f9adba8d598bcaa54455e9015", "0db0a7e1ec45634d27e655a5004bd76472a48a4e", "2265f0d8b0d6220829a9be834b2ed80d0da91370", "0c7c58bf7addb18b432ef36dc0e662624d7f3e4b", "4855dbfc2b5e2e00dabce672ba7d2716c3d77347", "023555bdd427c20fb77a795c3a15e77ca885142d", "41b4e8176fa66a7fdd88da4e038fc5a5108e827c", "0be628988f8ee0beaad57d68cbae8b635f0b555c", "3e60f205a6b96dc709462e2f3c485a66ee3d2f78", "0796bb6c803e4256d7bdc0885c6a26d058da3319", "092cb804373c3104ce9ef6a2b2237ec32534674e", "1ec6bb8cae8696bab6b01074e811c3350bf2ac53", "ade81073e3d2cf8de418c64bb32bab6365f8d7e0", "2220e36c5ecf94a7e763c97842c6d25025aef2ac", "03b254698d26c38f1d0ef6cad739e5d49dc31f9b", "9c8a062237f5e63c9378008ae34f9cd0e59256d1", "58719ae054885e86b3edcfb52057dabc4ae20067", "5bca35087d0b524944279217218cb161db1e1ea5", "2152f9f91e798c23715fdce699b6a8f0f8d43170", "000c557e5bcb9def56479751f06fc8eec3f8acda", "11efa6998c2cfd3de59cf0ec0321a9e17418915d" ], "paperAbstract": "Both the operational and academic security communities have used dynamic analysis sandboxes to execute malware samples for roughly a decade. Network information derived from dynamic analysis is frequently used for threat detection, network policy, and incident response. Despite these common and important use cases, the efficacy of the network detection signal derived from such analysis has yet to be studied in depth. This paper seeks to address this gap by analyzing the network communications of 26.8 million samples that were collected over a period of five years. Using several malware and network datasets, our large scale study makes three core contributions. (1) We show that dynamic analysis traces should be carefully curated and provide a rigorous methodology that analysts can use to remove potential noise from such traces. (2) We show that Internet miscreants are increasingly using potentially unwanted programs (PUPs) that rely on a surprisingly stable DNS and IP infrastructure. This indicates that the security community is in need of better protections against such threats, and network policies may provide a solid foundation for such protections. (3) Finally, we see that, for the vast majority of malware samples, network traffic provides the earliest indicator of infection—several weeks and often months before the malware sample is discovered. Therefore, network defenders should rely on automated malware analysis to extract indicators of compromise and not to build early detection systems.", "pdfUrls": [ "http://www.eurecom.fr/en/publication/5177/download/sec-publi-5177.pdf", "https://software.imdea.org/~juanca/papers/lustrum_oakland17.pdf", "http://www.iisp.gatech.edu/sites/default/files/documents/a_lustrum_of_malware_network_communication_-_evolution_and_insights.pdf", "http://astrolavos.gatech.edu/articles/sp17-candia.pdf", "https://doi.org/10.1109/SP.2017.59", "http://iisp.gatech.edu/sites/default/files/documents/ieeessp17_lever.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16b4a0d02f844e27a30dd722811442dd8e6a580d", "sources": [ "DBLP" ], "title": "A Lustrum of Malware Network Communication: Evolution and Insights", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "16d0de1e3d3c8e6020a0a3bd3accabfd6a937497": { "authors": [ { "ids": [ "27802227" ], "name": "Priyank Faldu" }, { "ids": [ "2309941" ], "name": "Boris Grot" } ], "doi": "10.1109/PACT.2017.32", "doiUrl": "https://doi.org/10.1109/PACT.2017.32", "entities": [ "Branch predictor", "Central processing unit", "Dynamic voltage scaling", "Manycore processor", "Moore's law", "Multi-core processor", "Spatial variability", "String metric", "Transistor" ], "id": "16d0de1e3d3c8e6020a0a3bd3accabfd6a937497", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "180-193", "journalVolume": "", "outCitations": [ "7bb72a9437a1ddb7e0eced6f243b8f9e66438f28", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "08237b5a7862d65185977e3dac0f81e616188add", "2dc59e60b34b3863e4eb381b17384105fe523cec", "43260df86b2aaa20824d73eff48e0b49162689cb", "06125169a21ef17641d7199544417b21c378eede", "313b6d6a2fe071869507ba7530aef10c91aefe11", "2ab1b98b642e341006d18ebce41359e95373422f", "057ecc6780a2b2cb533884167962654451e4960b", "294273a4a63a4d06d3dbd2880598a9cd64b3087f", "25e0dcb0e7b3446fbf16c48e9a6a4ad36f645f3b", "bfd1b422fa359ed49811e6b49fd9cdc443d07c22", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "0717371b254df3e466a11d1965c2c9541a43b7a3", "a169ca8993abcfa03eb22c50a4227983d740b31a", "8671317d25f917af263b457612f959823d5c86b1", "7779c10dfa1f84953016b6292844815c5faf84f5", "2790284b6a16790d03b0cb5ed46bc6b0fecde1eb", "77d4fb23ce0b5499016f2c162a5430d04f976542", "12ecd8079ae103ccdf9c8b5a5fb2e2844b7a25da", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "55043afbb87e38627778a323dfdc35a55357e47d", "6d5099039729d930841c21893c5585a194d90a79", "3c02593a7f5ea8a22583c507c20a65c0244d385e", "4408b7049f9241920ff8dcb5ad387e5358a75694", "f29dac2e26273532c81c933f091c7a60b9480f94" ], "paperAbstract": "The looming breakdown of Moore's Law and the end of voltage scaling are ushering a new era where neither transistors nor the energy to operate them is free. This calls for a new regime in computer systems, one in which every transistor counts. Caches are essential for processor performance and represent the bulk of modern processor's transistor budget. To get more performance out of the cache hierarchy, future processors will rely on effective cache management policies.This paper identifies variability in generational behavior of cache blocks as a key challenge for cache management policies that aim to identify dead blocks as early and as accurately as possible to maximize cache efficiency. We show that existing management policies are limited by the metrics they use to identify dead blocks, leading to low coverage and/or low accuracy in the face of variability. In response, we introduce a new metric – Live Distance – that uses the stack distance to learn the temporal reuse characteristics of cache blocks, thus enabling a dead block predictor that is robust to variability in generational behavior. Based on the reuse characteristics of an application's cache blocks, our predictor – Leeway – classifies application's behavior as streaming-oriented or reuse-oriented and dynamically selects an appropriate cache management policy. By leveraging live distance for LLC management, Leeway outperforms state-of-the-art approaches on single- and multi-core SPEC and manycore CloudSuite workloads.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/40016712/LEEWAY_PACT17_1.pdf", "http://homepages.inf.ed.ac.uk/bgrot/pubs/LEEWAY_PACT17.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16d0de1e3d3c8e6020a0a3bd3accabfd6a937497", "sources": [ "DBLP" ], "title": "Leeway: Addressing Variability in Dead-Block Prediction for Last-Level Caches", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "16d377084c97f771efd193b1339f537f7fce42c8": { "authors": [ { "ids": [ "2732284" ], "name": "Thomas Holterbach" }, { "ids": [ "2601982" ], "name": "Stefano Vissicchio" }, { "ids": [ "2054092" ], "name": "Alberto Dainotti" }, { "ids": [ "2562998" ], "name": "Laurent Vanbever" } ], "doi": "10.1145/3098822.3098856", "doiUrl": "https://doi.org/10.1145/3098822.3098856", "entities": [ "Border Gateway Protocol", "Downtime", "Experiment", "Fast reroute", "Forwarding plane", "Internet", "Router (computing)", "Routing", "Swift (programming language)" ], "id": "16d377084c97f771efd193b1339f537f7fce42c8", "inCitations": [ "9de9928a0b1c6d0e20c6b8f36d7ac777253a083f" ], "journalName": "", "journalPages": "460-473", "journalVolume": "", "outCitations": [ "8e17b845ad75afd3587f43a3080182307901f1d3", "122995724822f98b00a837a51700c12911d9887e", "6410b6cc29af234544f7706194aba20d6c4c90ae", "22e70e8e68f5f5247ae167ab1fe56bed636f50c9", "448bb6fbed4121290eba57ed228fadfea598a38f", "4ee6a9f360be506ba5ff9c67d0a8c22fd985ff70", "58f692e9b03cb973355aab46bb6f867239aeb513", "73a6eb2ae5e9aa37babb95748c4d8ecee7efaf22", "3651da2bc6e5b79d39f583e47b9f998a41e98794", "7e1a0c6a37712da6ec69965d7f30505f6069d88f", "20a531fb5b8b7d978f8f24c18c51ff58c949b60d", "0077495598b473eed7551ab41b1f55a435a518d5", "612acaa301933c4662862ae9503c48848ab953c8", "493825ee0009a84591dfd493b5fde1f3d5807c83", "260850ce2353c25d9ab125f9820b3185087cac4a", "252b9ae134790148c054d99c46aa68c91d1cfd79", "0ca78b58ac1971cfeb4e908fb964b8510eacfecd", "0fa37b92444d8fcbef150470226e216bce15e3a8", "031923dea69e3d40397b03c0485799e03326a73a", "4c1fed855bcbada64ea98bc3221841204109345e", "1b95f4118db5132c9d41089e98055f22a1cad117", "42a3551cc2c694caee90f69db6ac8a6c4f95ed6a", "04322dde94a71eeb610fad4522647fc7e4f53126", "1bec0ad7d9c3d62b70261d8358fa85df833c5724", "0c2ba696b642623439dded9b07e0bb9541f679a0", "57f821bb8eba8e9df8eea21f9eeafe44ef311235", "1e4da813c29a65f19f6e9432cb4efe8b7d45ac1d", "049b960b46db868a6d2dce2bb0deb3c2e6ad608b", "09b4dae698495e8229171a64cd78b23f106de653", "19e4c40941a3767afd51f200db85c4289f189e24", "6e65af632213bc129bbcb66b8ab39b6f82c6712e", "395867fa1708ad9ef3572ab9be0b34b203707be3", "94635a872ea9adb3665ece2101815ca58a26c18d", "99a297647eaa430d0587774f065bd542b859885e", "d3769235cbadf281396ff679f057ec172114d116", "36d8fbb7827343b11f54070bd055f7132d7b7273", "1942aff3bb24d4ff9c1e8688b1104a767f0bc346", "7b3d20bd219b38d9f27830003f4b881b61181146", "2d1d82c89f4d5464d4e20e9694918941c030cb40", "17d5a76bfaeb193ded5e2d700e6d096320bd4dc3", "77fe6d73575545fb30c61e38d37a089b9c01c09d", "55ef72fe52990f491ab939b91d75b7899a66180f", "82c72c48ff0dae1732a440fc4a8bce2c46911a0c", "5953b322d85d77db31ed4628e6fe8b20a8e8e07e", "6c0ed2f09a5d961bea5a2750958d400863537e3e", "3eae61ce8b04ef30155617d15a569510bc3da79f", "21e549445b8f4aa91e2550a83745e2143d7f2784", "3007bc296d5ab703e6e08d2a4bfb5e1c3e1a3a56", "6245ced0adb67151ea0f456d14cf002efc4e5c38" ], "paperAbstract": "Network operators often face the problem of remote outages in transit networks leading to significant (sometimes on the order of minutes) downtimes. The issue is that BGP, the Internet routing protocol, often converges slowly upon such outages, as large bursts of messages have to be processed and propagated router by router.\n In this paper, we present SWIFT, a fast-reroute framework which enables routers to restore connectivity in few seconds upon remote outages. SWIFT is based on two novel techniques. First, SWIFT deals with slow outage notification by predicting the overall extent of a remote failure out of few control-plane (BGP) messages. The key insight is that significant inference speed can be gained at the price of some accuracy. Second, SWIFT introduces a new data-plane encoding scheme, which enables quick and flexible update of the affected forwarding entries. SWIFT is deployable on existing devices, without modifying BGP.\n We present a complete implementation of SWIFT and demonstrate that it is both fast and accurate. In our experiments with real BGP traces, SWIFT predicts the extent of a remote outage in few seconds with an accuracy of ~90% and can restore connectivity for 99% of the affected destinations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098856", "https://swift.ethz.ch/files/swift_technical_report.pdf", "http://www.tik.ee.ethz.ch/file/09ac4af69dfded369c05287a1858cef0/p460-Holterbach.pdf", "http://www.caida.org/publications/papers/2017/swift/swift.pdf", "http://www0.cs.ucl.ac.uk/staff/S.Vissicchio/papers/Swift_sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/16d377084c97f771efd193b1339f537f7fce42c8", "sources": [ "DBLP" ], "title": "SWIFT: Predictive Fast Reroute", "venue": "SIGCOMM", "year": 2017 }, "16d7ab5f2956503d9c23f9963901b4003e5c5776": { "authors": [ { "ids": [ "33277789" ], "name": "Wenjie Lu" }, { "ids": [ "2296139" ], "name": "Shohei Kawasaki" }, { "ids": [ "1733719" ], "name": "Jun Sakuma" } ], "doi": "", "doiUrl": "", "entities": [ "Categorical variable", "Cloud computing", "Computation", "Contingency table", "Encryption", "Experiment", "Homomorphic encryption", "Level of measurement", "Numerical analysis", "Ordinal data", "Outsourcing", "Principal component analysis", "Zero suppression" ], "id": "16d7ab5f2956503d9c23f9963901b4003e5c5776", "inCitations": [ "8e3f04c9936949d13b9b1157857e66dd291c45d5", "3b1f4a26d47267b21019448515bb11e453862611", "f9cef80925958f02831523cdb7bd3c84f218e934", "900d3a5fa12d0894d593f65393605ffca8af66d9" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "1163", "journalVolume": "2016", "outCitations": [ "7f26fb5d83f2393aa8baf6a4db74458f8729fcb5", "8d63532f254c77549b40e232611948d36c85150b", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "40d68c0011958b9a990c9df65414fcf4fd539c72", "3181b9ce21265bbf8175314714e1535f75b3d80f", "9e487eea9772ba8742c5deee3f6b214ebfb79811", "031878496a4a53ce1be6023661151b1ba7dd0869", "362246709de205ec0ac5b34e07306839c38d5a3a", "39bd1f1f75ca061985833f7f1d339ace60047f45", "0471807906db31b7f477caa31a255a651e1e26fd", "5b56ba4835be8c54865c3e5eab71fa6f076f2534", "0abe7f77433cf5908bfe2b79aa91af881da83858", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "68224fb1dc69dfc391eb330d9dc77e46c4165842", "03c5ab0f31220b29d3b8eb60637a7a8140fd87fc", "31ccbbafadb1383aa31d8376a9a56cce16667d68", "5acc4f114311c2b285abdc1c18403859a6fafd72", "2b70b11bff4f08c0ea412b4ad3d73299bda7d624", "46527c14457cf84d1cf26487d6b4c31f4825db71", "829220b82ebad22259a89c043798e7b8a683e51c", "97cfeb3db6d6c3ce2dc5beef6fca95eeac07a2e1", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "8ff537ace7ff475a8f3b07c66ee0fe6ce7b00712", "b5d834003442221794cb1376cd1390eedc37c049", "004d14a2bf5cee0f5a32f084934933735ef13fb3" ], "paperAbstract": "In recent years, there has been a growing trend towards outsourcing of computational tasks with the development of cloud services. The Gentry\u2019s pioneering work of fully homomorphic encryption (FHE) and successive works have opened a new vista for secure and practical cloud computing. In this paper, we consider performing statistical analysis on encrypted data. To improve the efficiency of the computations, we take advantage of the batched computation based on the Chinese-Remainder-Theorem. We propose two building blocks that work with FHE: a novel batch greater-than primitive, and matrix primitive for encrypted matrices. With these building blocks, we construct secure procedures and protocols for different types of statistics including the histogram (count), contingency table (with cell suppression) for categorical data; k-percentile for ordinal data; and principal component analysis and linear regression for numerical data. To demonstrate the effectiveness of our methods, we ran experiments in five real datasets. For instance, we can compute a contingency table with more than 50 cells from 4000 of data in just 5 minutes, and we can train a linear regression model with more than 40k of data and dimension as high as 6 within 15 minutes. We show that the FHE is not as slow as commonly believed and it becomes feasible to perform a broad range of statistical analysis on thousands of encrypted data.", "pdfUrls": [ "http://eprint.iacr.org/2016/1163.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/using-fully-homomorphic-encryption-statistical-analysis-categorical-ordinal-and-numerical-data/", "http://eprint.iacr.org/2016/1163" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/16d7/ab5f2956503d9c23f9963901b4003e5c5776.pdf", "s2Url": "https://semanticscholar.org/paper/16d7ab5f2956503d9c23f9963901b4003e5c5776", "sources": [ "DBLP" ], "title": "Using Fully Homomorphic Encryption for Statistical Analysis of Categorical, Ordinal and Numerical Data", "venue": "NDSS", "year": 2016 }, "17108842b3ccfa88ad436949549d33e6076e943e": { "authors": [ { "ids": [ "1735891" ], "name": "Karine Altisen" }, { "ids": [ "27396610" ], "name": "Ajoy K. Datta" }, { "ids": [ "2996049" ], "name": "St\u00e9phane Devismes" }, { "ids": [ "38435293" ], "name": "Ana\u00efs Durand" }, { "ids": [ "2355852" ], "name": "Lawrence L. Larmore" } ], "doi": "10.1109/IPDPS.2017.23", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.23", "entities": [ "Algorithm", "Divergence (computer science)", "Leader election" ], "id": "17108842b3ccfa88ad436949549d33e6076e943e", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "182-191", "journalVolume": "", "outCitations": [ "4ad3d4c280431689ef3a78fd6757c99d93a45664", "7b892aef86782f5f63a064b2edb0a21629753e71", "2f1f27c95a68d5a3f1fb93b8c924bc06c02d936a", "b8031ff197d3e3894f3484318d65aced7054138d", "89e03249c2e8e0bc1948f27758da2fd139c7e950", "28d40408e35cbbc6ca18df5d767a80be9da23e6a", "2ca1408dedeb67edd181ea7aa8988d51c732e4d0", "eb9bfe0367605b8221e0266029719891a0ad9807", "a10691f71990e7f3f486ff9f28b43e7ac6d46934" ], "paperAbstract": "We study (deterministic) leader election in unidirectional rings of homonym processes that have no a priori knowledge on the number of processes. In this context, we show that there is no algorithm that solves process-terminating leader election for the class of asymmetric labeled rings. In particular, there is no process-terminating leader election algorithm in rings in which at least one label is unique. However, we show that process-terminating leader election is possible for the subclass of asymmetric rings, where multiplicity is bounded. We confirm this positive results by proposing two algorithms, which achieve the classical trade-off between time and space.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.23", "http://www-verimag.imag.fr/~adurand/docs/slidesIPDPS17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17108842b3ccfa88ad436949549d33e6076e943e", "sources": [ "DBLP" ], "title": "Leader Election in Asymmetric Labeled Unidirectional Rings", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "172bdccfd9803f00d22577e45314981db2a25365": { "authors": [ { "ids": [ "2955257" ], "name": "Mathias Jacquelin" }, { "ids": [ "39220346" ], "name": "Wibe A. de Jong" }, { "ids": [ "2418932" ], "name": "Eric J. Bylaska" } ], "doi": "10.1109/IPDPS.2017.26", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.26", "entities": [ "Clock rate", "Computer performance", "Density functional theory", "Interaction", "Knights", "Kohn\u2013Sham equations", "Lagrange multiplier", "Manycore processor", "Molecular dynamics", "Multi-core processor", "NWChem", "OpenMP", "Parallel computing", "Program optimization", "Pseudopotential", "Roofline model", "Scalability", "Simulation", "Task parallelism", "Test case", "Xeon Phi" ], "id": "172bdccfd9803f00d22577e45314981db2a25365", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "234-243", "journalVolume": "", "outCitations": [ "8a269f794c54b62d81ba76d23aaa4bdf12301ec8", "da2e894dc3f6ee40af4bcfd00f770cc0ba003003", "9bcc6eb32e5886cdad4522fe9f7403dda184c241", "11f6e08713de786eeaed409f578c0c1642783847", "6d9557d2e9d20052f22e837a27af75596db433a0", "c5aeb5d078cfa7d676b967cdaa4ff96b8a53f2f0", "d84db7a5314c2248db3a72905b2d284f15264dca", "99a1520bc334c111ff84619a1ac376f009d0d3bf", "7b78ccf4b0eb944d4c20ff98016d4d95c7c24f76", "14a4369f0fd45b3ae2323dd71eac8980b1556f0d", "092217c2267f6e0673590aa151d811e579ff7760" ], "paperAbstract": "The Ab Initio Molecular Dynamics (AIMD) method allows scientists to treat the dynamics of molecular and condensed phase systems while retaining a first-principles-based description of their interactions. This extremely important method has tremendous computational requirements, because the electronic Schrodinger equation, approximated using Kohn-Sham Density Functional Theory (DFT), is solved at every time step. With the advent of manycore architectures, application developers have a significant amount of processing power within each compute node that can only be exploited through massive parallelism. A compute intensive application such as AIMD forms a good candidate to leverage this processing power. In this paper, we focus on adding thread level parallelism to the plane wave DFT methodology implemented in NWChem. Through a careful optimization of tall-skinny matrix products, which are at the heart of the Lagrange Multiplier and non-local pseudopotential kernels, as well as 3D FFTs, our OpenMP implementation delivers excellent strong scaling on the latest Intel Knights Landing (KNL) processor. We assess the efficiency of our Lagrange multipliers kernels by building a Roofline model of the platform, and verify that our implementation is close to the roofline for various problem sizes. Finally, we present strong scaling results on the complete AIMD simulation for a 64 water molecules test case, that scales up to all 68 cores of the Knights Landing processor.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/172bdccfd9803f00d22577e45314981db2a25365", "sources": [ "DBLP" ], "title": "Towards Highly scalable Ab Initio Molecular Dynamics (AIMD) Simulations on the Intel Knights Landing Manycore Processor", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "174d569fbaabcd296246090c9994750312a3edd2": { "authors": [ { "ids": [ "1845933" ], "name": "Yandong Wang" }, { "ids": [ "1712838" ], "name": "Li Zhang" }, { "ids": [ "2758173" ], "name": "Yufei Ren" }, { "ids": [ "1726357" ], "name": "Wei Zhang" } ], "doi": "10.1109/MASCOTS.2017.34", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.34", "entities": [ "Computation", "Deep learning", "Graphics processing unit", "Nexus S", "Program optimization", "Scalability", "Theano (software)", "Torch" ], "id": "174d569fbaabcd296246090c9994750312a3edd2", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "12-21", "journalVolume": "", "outCitations": [ "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "3b2697d76f035304bfeb57f6a682224c87645065", "6dc61f37ecc552413606d8c89ffbc46ec98ed887", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "39f63dbdce9207b87878290c0e3983e84cfcecd9" ], "paperAbstract": "Demand is mounting in the industry for scalable GPU-based deep learning systems. Unfortunately, existing training applications built atop popular deep learning frameworks, including Caffe, Theano, and Torch, etc, are incapable of conducting distributed GPU training over large-scale clusters.To remedy such a situation, this paper presents Nexus, a platform that allows existing deep learning frameworks to easily scale out to multiple machines without sacrificing model accuracy. Nexus leverages recently proposed distributed parameter management architecture to orchestrate distributed training by a large number of learners spread across the cluster. Through characterizing the run-time behavior of existing single-node based applications, Nexus is equipped with a suite of optimization schemes, including hierarchical and hybrid parameter aggregation, enhanced network and computation layer, and quality-guided communication adjustment, etc, to strengthen the communication channels and resource utilization. Empirical evaluations with a diverse set of deep learning applications demonstrate that Nexus is easy to integrate and can deliver efficient distributed training services to major deep learning frameworks. In addition, Nexus's optimization schemes are highly effective to shorten the training time with targeted accuracy bounds.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/174d569fbaabcd296246090c9994750312a3edd2", "sources": [ "DBLP" ], "title": "Nexus: Bringing Efficient and Scalable Training to Deep Learning Frameworks", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "1776dc8987a210b2d78d9e0004546ab7cb57a0e2": { "authors": [ { "ids": [ "32817044" ], "name": "Samyam Rajbhandari" }, { "ids": [ "1769780" ], "name": "Fabrice Rastello" }, { "ids": [ "2010968" ], "name": "Karol Kowalski" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" }, { "ids": [ "1750948" ], "name": "P. Sadayappan" } ], "doi": "10.1145/3018743.3018771", "doiUrl": "https://doi.org/10.1145/3018743.3018771", "entities": [ "Basis (linear algebra)", "Computation", "Computational chemistry", "Computer data storage", "For loop", "Loop optimization", "NWChem", "Parallel computing", "Quantum mechanics", "Software suite", "Tiling window manager", "Transformation matrix" ], "id": "1776dc8987a210b2d78d9e0004546ab7cb57a0e2", "inCitations": [], "journalName": "", "journalPages": "327-340", "journalVolume": "", "outCitations": [ "665df81d95969cf8763f65dea6ef1c81992f948a", "31285b44630a42bd89545dab2b5bf6635e04accf", "0aee2a03dbda966488faea11b853824d02b44503", "3900598e45f2b5fea25d10242a4e33da9696b214", "8a329ee1e058295d51da149c9e61be957dacff4b", "226c35219310f058426679835a2f6c8fb6fd15ca", "5e85cb3ea6b8b8c4d954f0bd83fc7e96d0305878", "3e69317455f7db9b1325239c6f6f52cbe29a5491", "3d68d9abc28d1b333f6caccca7f78deff39dec18" ], "paperAbstract": "The four-index integral transform is a fundamental and computationally demanding calculation used in many computational chemistry suites such as NWChem. It transforms a four-dimensional tensor from one basis to another. This transformation is most efficiently implemented as a sequence of four tensor contractions that each contract a four- dimensional tensor with a two-dimensional transformation matrix. Differing degrees of permutation symmetry in the intermediate and final tensors in the sequence of contractions cause intermediate tensors to be much larger than the final tensor and limit the number of electronic states in the modeled systems.\n Loop fusion, in conjunction with tiling, can be very effective in reducing the total space requirement, as well as data movement. However, the large number of possible choices for loop fusion and tiling, and data/computation distribution across a parallel system, make it challenging to develop an optimized parallel implementation for the four-index integral transform. We develop a novel approach to address this problem, using lower bounds modeling of data movement complexity. We establish relationships between available aggregate physical memory in a parallel computer system and ineffective fusion configurations, enabling their pruning and consequent identification of effective choices and a characterization of optimality criteria. This work has resulted in the development of a significantly improved implementation of the four-index transform that enables higher performance and the ability to model larger electronic systems than the current implementation in the NWChem quantum chemistry software suite.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018771" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1776dc8987a210b2d78d9e0004546ab7cb57a0e2", "sources": [ "DBLP" ], "title": "Optimizing the Four-Index Integral Transform Using Data Movement Lower Bounds Analysis", "venue": "PPOPP", "year": 2017 }, "17878e05d92fe05e156a6063f0a187a9e4e152cf": { "authors": [ { "ids": [ "9107538" ], "name": "Rodrigo Bruno" }, { "ids": [ "1689747" ], "name": "Paulo Ferreira" } ], "doi": "10.1145/3135974.3135986", "doiUrl": "https://doi.org/10.1145/3135974.3135986", "entities": [ "Big data", "Credit card fraud", "Garbage collection (computer science)", "Java HotSpot Virtual Machine", "Java virtual machine", "Memory management", "Object lifetime", "OpenJDK", "Programmer", "Requirement", "Service-level agreement", "Throughput", "Virtual machine" ], "id": "17878e05d92fe05e156a6063f0a187a9e4e152cf", "inCitations": [], "journalName": "", "journalPages": "147-160", "journalVolume": "", "outCitations": [ "839ef90d70e4779a4b6795b06f314294ea539add", "363c01593f54d2ab08b2580bca0362976ef5dcf4", "1eaf923356a7418bd8e7a9e1caac7b0724ba3030", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "0bf0a5ba7045e7faab3546da103f0d69a5e91e72", "30345844de1c6969537ae2ac180ac0743b081e7f", "1b6f2c8f350b9d9dca6e4e466a06502f51beafde", "0144aaa4251d55ff1db1b601257345e1b021d9cc", "43fb7b102ea54ce51b6fcd42005698ae1399e25e", "2ee8ced15dfc4053f09aec9ead02aac9bb6b5b2a", "3aa3cd43b48dc4377b4324462e6059e0a6ec571a", "0706356c9ab6014d6b04577d38289ea8328291a5", "01bedc250da327dc4b48c1886c7b69ddf0d1657a", "17ef1db8cd489341bdfaf25217c59b996cb9cfb2", "519810f2bc7760e7873675d2b4ddadc51cf64d6e", "1b33dff471644f309392049c2791bca9a7f3b19c", "2e62e3c536a5b60a47a5d6dbfbfccf9e7219fc59", "026cbae9f7d28cedd477fe349ba71d41d645ee7a", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "43393a561914f05be312a1dff5a757cbc384d1a1", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "65a4f56ce35d80eef26da64a666a2d60ff928462", "127f9d5326351d92621b47b55a896cfd51654970", "1a6bf474b323f69550a15f31720265021f6f3492", "3570a31771bfa02467a6e14ad6fd1c0d982ed16b", "9225ce5b4359748953cb1de088da5b8a63397490", "737d041822cca60a341e4058ba2bac803fe0eed0", "5d4d9437c26c5dd095782b6812100e586f212e30", "2da08b8490a903cd9825867d25888a97ce27880f", "75cfd8ba815e41c475e0b89999352775d1759bcd", "e8ea2f9c7aac76014e9103728f67334bd68010f9", "3def91b90f5a12880cbe08f512bcf26bf5491ff9", "28c3b2e9cd7bead2f908871f3f5f6f9a5d914c27", "0910f0f65f0b4d309bf846d685f44163b2ae7837", "e65347dd70c6d3eeb1496dba08ab84eed26a3679", "00a9ba0063d34ec56792849a67ef57b4601becbb", "5128bfa2e95e165b3e70d0bc5062f190909079d2", "27d0e15faaf9cf8d00f57b9cd31f2d9c3f8407ff", "82173783467d0e1b770380b321b3ce30a7f6eb86", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "228c64750a11823a712f7414711e3b073b861c28", "69b5a2d53840677b9855cb22bb1b98da041e6733", "04f70629736c61ff2b658826df71e68c2c955b5e", "1563c06f78a62616991c93c1b08788eb74b82e42", "bcf1078b2d6bedd84e33fc020576c0f127b41483", "18e87b2286fbb4c27088343130207b731c9386fa", "19580e4beb903595082ced092c0bc5ba0a2e7bac" ], "paperAbstract": "Big Data applications suffer from unpredictable and unacceptably high pause times due to bad memory management (Garbage Collection, GC) decisions. This is a problem for all applications but it is even more important for applications with low pause time requirements such as credit-card fraud detection or targeted website advertisement systems, which can easily fail to comply with Service Level Agreements due to long GC cycles (during which the application is stopped). This problem has been previously identified and is related to Big Data applications keeping in memory (for a long period of time, from the GC's perspective) massive amounts of data objects.\n Memory management approaches have been proposed to reduce the GC pause time by allocating objects with similar lifetimes close to each other. However, they either do not provide a general solution for all types of Big Data applications (thus only solving the problem for a specific set of applications), and/or require programmer effort and knowledge to change/annotate the application code.\n This paper proposes POLM2, a profiler that automatically: i) estimates application allocation profiles based on execution records, and ii) instruments application bytecode to help the GC taking advantage of the profiling information. Thus, no programmer effort is required to change the source code to allocate objects according to their lifetimes. POLM2 is implemented for the OpenJDK HotSpot Java Virtual Machine 8 and uses NG2C, a recently proposed GC which supports multi-generational pretenuring. Results show that POLM2 is able to: i) achieve pauses as low as NG2C (which requires manual source code modification), and ii) significantly reduce application pauses by up to 80% when compared to G1 (default collector in OpenJDK). POLM2 does not negatively impact neither application throughput nor memory utilization.", "pdfUrls": [ "http://www.gsd.inesc-id.pt/~rbruno/publications/rbruno-middleware17.pdf", "http://doi.acm.org/10.1145/3135974.3135986" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17878e05d92fe05e156a6063f0a187a9e4e152cf", "sources": [ "DBLP" ], "title": "POLM2: automatic profiling for object lifetime-aware memory management for hotspot big data applications", "venue": "Middleware", "year": 2017 }, "1798e6ac7becadaa6fbfec78e3e363ee2f99636e": { "authors": [ { "ids": [ "2621619" ], "name": "Eddie Q. Yan" }, { "ids": [ "3189426" ], "name": "Kaiyuan Zhang" }, { "ids": [ "31825486" ], "name": "Xi Wang" }, { "ids": [ "1718508" ], "name": "Karin Strauss" }, { "ids": [ "1717411" ], "name": "Luis Ceze" } ], "doi": "", "doiUrl": "", "entities": [ "Bandwidth (signal processing)", "Baseline (configuration management)", "JPEG", "Peak signal-to-noise ratio", "Social media" ], "id": "1798e6ac7becadaa6fbfec78e3e363ee2f99636e", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "455652342384754f98e947180700220b38fcc7ca", "396514fb219879a4a18762cddfae2a6a607f439f", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "9b3573139330aaa917957f90c40bfe13aa814f76", "339db82712834ee2698c71f481dcf775fbca0d12", "0c3c065791a64556de32b13e918688fb01102198", "12f81c7fe69bda67cadd185c54cf02f45cf388cc", "50543dd00e1e63ed4caabd7f5f877034dc0fc33b", "a1aa6b7ca526cc0b896502566bac375c9e60295b", "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "e711b68e3fe1964aeedde33b38cb43c5a7fee743", "d8a99802f0606063a7b55be4e898f2a0ab8f5264" ], "paperAbstract": "Modern image storage services, especially those associated with social media services, host massive collections of images. These images are often replicated at many different resolutions to support different devices and contexts, incurring substantial capacity overheads. One approach to alleviate these overheads is to resize them at request time. However, this approach can be inefficient, as reading full-size source images for resizing uses more bandwidth than reading pre-resized images. We propose repurposing the progressive JPEG standard and customizing the organization of image data to reduce the bandwidth overheads of dynamic resizing. We show that at a PSNR of 32 dB, dynamic resizing with progressive JPEG provides 2.5\u00d7 read data savings over baseline JPEG, and that progressive JPEG with customized encode parameters can further improve these savings (up to 5.8\u00d7 over the baseline). Finally, we characterize the decode overheads of progressive JPEG to assess the feasibility of directly decoding progressive JPEG images on energy-limited devices. Our approach does not require modifications to current JPEG software stacks.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/yan", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_yan.pdf", "https://locore.cs.washington.edu/papers/yan-shoebox.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-yan.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ea24/bf5e8af8ddf7f83c6a5eb3ae7623db992054.pdf", "s2Url": "https://semanticscholar.org/paper/1798e6ac7becadaa6fbfec78e3e363ee2f99636e", "sources": [ "DBLP" ], "title": "Customizing Progressive JPEG for Efficient Image Storage", "venue": "HotStorage", "year": 2017 }, "179d03589dcfd6dac2d0e68f867b1b7c49cc6620": { "authors": [ { "ids": [ "1737740" ], "name": "Giuliano Casale" } ], "doi": "10.1145/3084445", "doiUrl": "https://doi.org/10.1145/3084445", "entities": [ "Approximation algorithm", "Closed system", "Distributed computing", "Enterprise software", "Information-theoretic security", "Monte Carlo method", "Multiclass classification", "Numerical integration", "Parallel computing", "Program optimization", "Sampling (signal processing)" ], "id": "179d03589dcfd6dac2d0e68f867b1b7c49cc6620", "inCitations": [ "782caf239f0555756c2ad5f5f4cacbcbe578787f" ], "journalName": "", "journalPages": "64", "journalVolume": "", "outCitations": [ "75a637a5f889a5b3218215a168c8a25f9a97cf39", "f412f7dd2b0dcadd4cafbcb30badd2164224f6d0", "30a13879770a420ab1499de4ee06aef54d7ee256", "07287e0ef785ba37d3ab507510688dfdda23528c", "52c55bbfa2cdeb88b949897b190876afc37d120a", "7a691b13519a1da1fef54a12cc03c2f379420164", "e06310faeb4bec542042fb7fdd9837158d145ac3", "44053d094fc7a9d423be8fa2c4767bdbc231fe2f", "341c42eb01c0e51c2a367b5804566f78b327a6ac", "4bd5ea9fbd030ac1d2021240792871b0aa951730", "fb408f6acb0c695fddd31960fcebf9b277673530", "a52835449e126a18d48a415c647b24caba90fc90", "2ff31d097ac55c5b4b16255cb4f36c6a0112ddd6", "01ae1e659c1013a19e9b05af69e77ac804494b69", "b5dd5f6821bda0039163a3f6c85b915734779705", "7d2e0e01089d98fa14ea1cbd1e308ecca836c2a4", "0a679469a275d81b1851d6293476cffc3855a76f", "de739003eaa0fb10b8cab7415f1e780f57dc559c", "5679356420d02f734e4ef9ca815514cf73d3e8fa", "393bcc47dca64a5c4570ec9045535ddf9c96a3c4", "fe5e1904c88055db1839a47c246d98913ec91b02", "b87baa0dd0f0b1788b91f3dfdfb197e2e29c7454", "6427a6d22ddf28986ac054c4d17a08bb0042d9b1", "b2afed913bea9e6840ed873d2c512915423fa72a", "0acb05c48040d01dd6d2b4f4eac3b8800ce76fcd", "57165ca5f5e088ab2afa45a9e60ce24cbd6f0f5a", "1388c5dca9d8b41a49d24cddcf192af9ace3ea27", "8361434f7f9dda7f8d044607ace14fd0fab5de2f", "3ecde367b1dcb682449fe8ab72a30a80567d2475", "87951e35e1ad5959f912514ffcc7835689d84bcc", "d1c83611f3939b8bb2a917e156b5e8280785a5b4" ], "paperAbstract": "Recent years have seen a rapid growth of interest in exploiting monitoring data collected from enterprise applications for automated management and performance analysis. In spite of this trend, even simple performance inference problems involving queueing theoretic formulas often incur computational bottlenecks, for example upon computing likelihoods in models of batch systems. Motivated by this issue, we revisit the solution of multiclass closed queueing networks, which are popular models used to describe batch and distributed applications with parallelism constraints. We first prove that the normalizing constant of the equilibrium state probabilities of a closed model can be reformulated exactly as a multidimensional integral over the unit simplex. This gives as a by-product novel explicit expressions for the multiclass normalizing constant. We then derive a method based on cubature rules to efficiently evaluate the proposed integral form in small and medium-sized models. For large models, we propose novel asymptotic expansions and Monte Carlo sampling methods to efficiently and accurately approximate normalizing constants and likelihoods. We illustrate the resulting accuracy gains in problems involving optimization-based inference.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084445", "http://doi.acm.org/10.1145/3078505.3078514" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/179d03589dcfd6dac2d0e68f867b1b7c49cc6620", "sources": [ "DBLP" ], "title": "Accelerating Performance Inference over Closed Systems by Asymptotic Methods", "venue": "SIGMETRICS", "year": 2017 }, "179d59854178accdb617e327c1e79636ea781e5a": { "authors": [ { "ids": [ "19269471" ], "name": "Iulian Brumar" }, { "ids": [ "2020430" ], "name": "Marc Casas" }, { "ids": [ "2703643" ], "name": "Miquel Moret\u00f3" }, { "ids": [ "1741016" ], "name": "Mateo Valero" }, { "ids": [ "1754655" ], "name": "Gurindar S. Sohi" } ], "doi": "10.1109/IPDPS.2017.49", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.49", "entities": [ "Adaptive algorithm", "Algorithm", "Approximate computing", "Compiler", "Computation", "Correctness (computer science)", "Linear algebra", "Machine learning", "Memoization", "Run time (program lifecycle phase)", "Runtime system", "Speedup", "Static program analysis", "Stencil (numerical analysis)" ], "id": "179d59854178accdb617e327c1e79636ea781e5a", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1140-1150", "journalVolume": "", "outCitations": [ "68837728232463651283edbb7ef0c93b2f502b2b", "38e4f247e907bda2275fd6139bd4fb651f915360", "1f2a00758fc38d764b05adb76110500870610bc8", "0110c80228683bc32879efb1b2f3931421e52eb6", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "16f425a8c6d42a09c16fa074d3b0d7a87fd9348e", "180189c3e8b0f783a8df6a1887a94a5e3f82148b", "3186aead0cac0a94a8bf909a5023eae7afa8426b", "1a8bfc3d9361dc23544c7bc81f4a5d88497a7b50", "cfd34380711f505e58289a524e6d154dc44355a1", "237a086708ccae0686c7d1995e0a7017650c5740", "15b275f0421c606f5903532e9964b140cbb2f878", "4f105edc6d373f41b998871962189ab9b2adb601", "785f69fbf3ca670bc082f1e669b9b433100a0596", "f6e5e70860080a69e232d14a98bf20128957b9b5", "1b015bee767db7c4aba13e0320b8fb93a0817445", "1105aa77a66a3dbaa6916c57eff1f161c51affc0", "9591a06a102a2c80159f6734753b96d23aae4b50" ], "paperAbstract": "Redundant computations appear during the execution of real programs. Multiple factors contribute to these unnecessary computations, such as repetitive inputs and patterns, calling functions with the same parameters or bad programming habits. Compilers minimize non useful code with static analysis. However, redundant execution might be dynamic and there are no current approaches to reduce these inefficiencies. Additionally, many algorithms can be computed with different levels of accuracy. Approximate computing exploits this fact to reduce execution time at the cost of slightly less accurate results. In this case, expert developers determine the desired tradeoff between performance and accuracy for each application. In this paper, we present Approximate Task Memoization (ATM), a novel approach in the runtime system that transparently exploits both dynamic redundancy and approximation at the task granularity of a parallel application. Memoization of previous task executions allows predicting the results of future tasks without having to execute them and without losing accuracy. To further increase performance improvements, the runtime system can memoize similar tasks, which leads to task approximate computing. By defining how to measure task similarity and correctness, we present an adaptive algorithm in the runtime system that automatically decides if task approximation is beneficial or not. When evaluated on a real 8-core processor with applications from different domains (financial analysis, stencil-computation, machine-learning and linear-algebra), ATM achieves a 1.4x average speedup when only applying memoization techniques. When adding task approximation, ATM achieves a 2.5x average speedup with an average 0.7% accuracy loss (maximum of 3.2%).", "pdfUrls": [ "http://upcommons.upc.edu/bitstream/handle/2117/107646/ATM+Approximate+Task+Memoization+in+the+Runtime+System.pdf;jsessionid=3B78BDEF7E686ABD5AFC1057A0F20D28?sequence=3", "https://doi.org/10.1109/IPDPS.2017.49" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/179d59854178accdb617e327c1e79636ea781e5a", "sources": [ "DBLP" ], "title": "ATM: Approximate Task Memoization in the Runtime System", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "17b1e371e58d0899154ad82e444873bdf3bb0176": { "authors": [ { "ids": [ "35158410" ], "name": "Kanika Sood" }, { "ids": [ "1763308" ], "name": "Boyana Norris" }, { "ids": [ "2047797" ], "name": "Elizabeth R. Jessup" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.4", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.4", "entities": [ "Algorithm", "Computation", "Computer simulation", "Homology modeling", "Krylov subspace", "Linear system", "Newton", "Newton\u2013Cotes formulas", "Numerical analysis", "PETSc", "Preconditioner", "Scalability", "Simulation", "Sparse matrix" ], "id": "17b1e371e58d0899154ad82e444873bdf3bb0176", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "26-33", "journalVolume": "", "outCitations": [ "b380f13381abc38ffd3883393898cd42c98676d6", "17c2a54a44cbb6d44c708d7976a514f1e420104a", "4a5b4b5dc513c5aafef5b18b5fe7d2bd885c97fb", "b8381e24014d8ec3b77b1c53a7d0ccdf5cf2e77c", "e16a06d92e4c1b7efd967c77b3441b8a23865234", "454bdeefe3dc1ef7054ca06be23203ca0b34f78a", "27d2ac18ef4504df1460460c9711e69d166cc11e", "9fa95b063a3e16a8b3666daee4a3787bd640261d", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "0354aff91dd843e01e396fbd635129bea73977b4", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "25bf3ab2cc2e5fa46526b14fbd996027a3df3de5", "62b996c8b0845277f1b8a1459ecae454c054cd7c" ], "paperAbstract": "Many scientific and engineering computations rely on the scalable solution of large sparse linear systems. Preconditioned Krylov methods are widely used and offer many algorithmic choices whose performance varies depending on the characteristics of the linear system. In previous work, we have shown that the performance of different Krylov methods at small scales can be modeled using a small number of features based on structural and numerical properties of the input linear system. In this paper, we focus on comparing the scalability of parallel Krylov methods given different input properties without requiring extensive empirical measurements. We consider the PETSc implementations of Newton-Krylov methods to produce scalability rankings based on our new comparative modeling approach. The model-based ranking is validated by comparison with empirical results on a numerical simulation of driven fluid flow in a cavity.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.4", "https://ix.cs.uoregon.edu/~kanikas/PetscUserMeeting" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17b1e371e58d0899154ad82e444873bdf3bb0176", "sources": [ "DBLP" ], "title": "Comparative Performance Modeling of Parallel Preconditioned Krylov Methods", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "17b2426a7d87fa13f59370454b1c1a411fedd472": { "authors": [ { "ids": [ "3229837" ], "name": "Sergey Legtchenko" }, { "ids": [ "39280615" ], "name": "Hugh Williams" }, { "ids": [ "2072347" ], "name": "Kaveh Razavi" }, { "ids": [ "2793107" ], "name": "Austin Donnelly" }, { "ids": [ "2825393" ], "name": "Richard Black" }, { "ids": [ "3808546" ], "name": "Andrew Douglas" }, { "ids": [ "2024014" ], "name": "Nathanael Cheriere" }, { "ids": [ "3345777" ], "name": "Daniel Fryer" }, { "ids": [ "31659063" ], "name": "Kai Mast" }, { "ids": [ "2366298" ], "name": "Angela Demke Brown" }, { "ids": [ "1971184" ], "name": "Ana Klimovic" }, { "ids": [ "21238468" ], "name": "Andy Slowey" }, { "ids": [ "1710239" ], "name": "Antony I. T. Rowstron" } ], "doi": "", "doiUrl": "", "entities": [ "Data center", "Hard disk drive", "Mass storage", "SAS", "Serial ATA" ], "id": "17b2426a7d87fa13f59370454b1c1a411fedd472", "inCitations": [ "c206dd5b90104df0fd12a2c1f3fb0f913ee08c0b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "006cd63664db53494cc61a44d5c6ebc668dc4b6a", "04ccdfc19e675b4b8439ebecddab093ab7c605fa", "0081c1fcb079f87147a68565764b59923c918d9c", "00dca7217305a31dcf5108eb7ecf862dd4827823", "048a09d7c8713dc2533c1e31ac3f224868293461", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "066ebaf1c399d5f2bf54b72b5189ec0577423d55", "028378b395dc2a11e8ccc3d994df228340fd9697", "0c3c065791a64556de32b13e918688fb01102198" ], "paperAbstract": "Disaggregation of resources in the data center, especially at the rack-scale, offers the opportunity to use valuable resources more efficiently. It is common that mass storage racks in large-scale clouds are filled with servers with Hard Disk Drives (HDDs) attached directly to each of them, either using SATA or SAS depending on the number of HDDs. What does disaggregated storage mean for these racks? We define four categories of in-rack disaggregation: complete, dynamic elastic, failure, and configuration disaggregation. We explore the benefits and impact of these design points by building a highly flexible research storage fabric, that allows us to build example systems that embody the four designs.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-legtchenko.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/legtchenko" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/17b2/426a7d87fa13f59370454b1c1a411fedd472.pdf", "s2Url": "https://semanticscholar.org/paper/17b2426a7d87fa13f59370454b1c1a411fedd472", "sources": [ "DBLP" ], "title": "Understanding Rack-Scale Disaggregated Storage", "venue": "HotStorage", "year": 2017 }, "17c3b0eb0c0cdc146cae9e5b12d3b57a4346558c": { "authors": [ { "ids": [ "40174652" ], "name": "Xia Li" }, { "ids": [ "2723840" ], "name": "Lingming Zhang" } ], "doi": "10.1145/3133916", "doiUrl": "https://doi.org/10.1145/3133916", "entities": [ "A Library for Support Vector Machines", "Benchmark (computing)", "Cognitive dimensions of notations", "Debugging", "Fault tolerance", "Internationalization and localization", "Learning to rank", "Software bug", "Video game localization" ], "id": "17c3b0eb0c0cdc146cae9e5b12d3b57a4346558c", "inCitations": [ "edb99573eabfd4d1f2546348833de409a439f085", "8919c81a9cb65abd364ad19a1d2894f125fdf37e", "a7c1d98864c672c8008fed62ec6c47a5f5c07514" ], "journalName": "PACMPL", "journalPages": "92:1-92:30", "journalVolume": "1", "outCitations": [ "3a562f03e628c32e35094884825a81844fb5c2ec", "38cd1cbcb68e99adc8dc1e2f1f588c8e7a263130", "3a6e90f11b4c7639f5b053d03fbf0f75cb808c0b", "0a84bbe4dd47d99bc77010931b7daffecf8c1a11", "48886ea4ee14f0151f186207e1b9ad1d947e83ef", "0f16f6f478b5c788dce466eb50e36c612273c36e", "48852c8241f8e1be64f419aca5fe6150bab89e85", "15a78815326b696a48dde0bdf56c933035a3c189", "4ed39d2773b0b0818e8c37fabe1894cf63d55772", "18797ef47a86e7535fe7e7441080695ea16a3173", "684be9e9bd41d148158c64ba811c08f66b58092a", "096079bfdd75c2baedbc049efd8b433a383650bd", "46ee08c31bb99d515f6e571dc2f9cb8585c71165", "4e177c25f97220d33dcc222485d951c817a77750", "5b4c8f105c39c033f5aad47ca2da3003ed6d9ee4", "3e43c50d959bb2a4d5f2b01982242e38c7c217b7", "f189f55077d0fe9e8d0b9586ffb3b6f33682b844", "0f0a2d31604d74c08ac278fbed48efa86e14293a", "69420bcf6dc36820df8934ffb1730bb103b25321", "12b50a334cf9f633bcd4f6db18db9b0002510ccb", "664ba5d8062ba9e5b09694cd73308cf9bf8e74a8", "7a48c5ccfee6bac04b678463dbe72738c3e8da06", "a84c931dfc30b4e52a492e6c970b2a90613601d6", "dd4d09fbd164ee9d6ad1aec53563803581eb1550", "41153ea83e44cc920dff0169b4d42664d2a5bc9d", "19d29a59ef1b472102c540b178415489e0353b37", "05607111cf79330d56164a10d351dbf94e2cfa44", "6b07dd22a109afd5e8b71b17449457b38858a870", "bd149fb90ee497cd2625f1fecf278a28281226ec", "018ffbd39a5ddf102d36ea649f778f878ce79aeb", "ba1243c047e03fed7826f868fdfd680dde5a1f76", "01c1795d00e1ad4a69c5dd3498a38715ca898833", "76af9a30ddcaebeeb8513a0766a75df90bd25ead", "946db868d0aaa97f579e115af787d0e8164127fe", "4c89a105a8638688b516f3d3e00efe396511ed7b", "2fccf2f5d21734f0766876e2853174d681dc3a97", "02deafef1a1f58061418c92bca99e75ffb70c7ae", "6629fa5caa0c8ebf2734e156b08d6bdbc513deb1", "3791a9a2056fc78d1908b58e6da52a33045ccd71", "0bc25a572260f0b726ff6681d264c5684a462c24", "37e2106bebd02f4ac9c410941fde7f358279e4a4", "bc6d070eb61c6590acbecb6fbfee922557b5d8dd", "204c3fc33bf49e2adaec8dea55b88390dad4b03b", "bbb69bd318054c489bed0dc2f39cabe2d93f98c4", "36d9943fd8f99d7b19c08ce945f05cb76fa04511", "c4244e6fb5b93dad41afcfda0401dd4e8fbaacb0", "1e66ab12901114529d7b68a4b75d20f769e409b8", "05db9c21148a3d0fdc4e184841ca2011834d91f6", "748136b6dcaac11f2f40acbf663ff53b35e80035", "3ec1a36e9e12a85d02adaa8ed682ee04f73ae332", "042fa7eb5edb9ea729ec5f339c78446a5a1ec36f", "32778a2eb4c74cd437e922ac1eb6a1477dfcb925", "0f31ce5e231057392e88c9751aea6e1f06d67b98", "1604de41ea8bc82aac0502daa309d3fed3f8495e", "ec3cd709d1d46df82d2cae24388ffe7fca938af3" ], "paperAbstract": "Localizing failure-inducing code is essential for software debugging. Manual fault localization can be quite tedious, error-prone, and time-consuming. Therefore, a huge body of research e orts have been dedicated to automated fault localization. Spectrum-based fault localization, the most intensively studied fault localization approach based on test execution information, may have limited effectiveness, since a code element executed by a failed tests may not necessarily have impact on the test outcome and cause the test failure. To bridge the gap, mutation-based fault localization has been proposed to transform the programs under test to check the impact of each code element for better fault localization. However, there are limited studies on the effectiveness of mutation-based fault localization on sufficient number of real bugs. In this paper, we perform an extensive study to compare mutation-based fault localization techniques with various state-of-the-art spectrum-based fault localization techniques on 357 real bugs from the Defects4J benchmark suite. The study results firstly demonstrate the effectiveness of mutation-based fault localization, as well as revealing a number of guidelines for further improving mutation-based fault localization. Based on the learnt guidelines, we further transform test outputs/messages and test code to obtain various mutation information. Then, we propose TraPT, an automated Learning-to-Rank technique to fully explore the obtained mutation information for effective fault localization. The experimental results show that TraPT localizes 65.12% and 94.52% more bugs within Top-1 than state-of-the-art mutation and spectrum based techniques when using the default setting of LIBSVM.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133916", "http://www.utdallas.edu/~lxz144130/publications/oopsla2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17c3b0eb0c0cdc146cae9e5b12d3b57a4346558c", "sources": [ "DBLP" ], "title": "Transforming programs and tests in tandem for fault localization", "venue": "PACMPL", "year": 2017 }, "17d77d5e2db5b9aaf54b8240f829b1d4f077df29": { "authors": [ { "ids": [ "38668659" ], "name": "Vicente A. B. Sanchez" }, { "ids": [ "17804514" ], "name": "Wonbae Kim" }, { "ids": [ "3289855" ], "name": "Youngmoon Eom" }, { "ids": [ "26393736" ], "name": "Kibeom Jin" }, { "ids": [ "3461436" ], "name": "Moohyeon Nam" }, { "ids": [ "2951933" ], "name": "Deukyeon Hwang" }, { "ids": [ "1687878" ], "name": "Jik-Soo Kim" }, { "ids": [ "1739708" ], "name": "Beomseok Nam" } ], "doi": "10.1109/CLUSTER.2017.12", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.12", "entities": [ "Apache Hadoop", "Attribute\u2013value pair", "Cache (computing)", "Consistent hashing", "Distributed hash table", "Distributed memory", "Fair queuing", "Hit (Internet)", "In-memory database", "Job scheduler", "Key-value database", "Load balancing (computing)", "Locality of reference", "Look and feel", "MapReduce", "Scheduling (computing)", "USB flash drive" ], "id": "17d77d5e2db5b9aaf54b8240f829b1d4f077df29", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "322-332", "journalVolume": "", "outCitations": [ "876186bfd05bdd01c1f2ea288c532b16b8a0694f", "6a21158317711c9b349c68cd7e7b2a92efbbb074", "b4d8da39f041d1f16fd106792df5d92e136af187", "c131f2b65169e3162e2d6430019bad81c7919ed5", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "2f47c7304aa5008911db59bef5c0fd3d3e212088", "3af2153f2c6825fd3106aa9efd56db8eef311767", "62a7c092e607640273f69cae1372d0677bad2615", "0368d2445d3ee4205ee73da933cb8b810a89091c", "1f50075cd3100832c9b82c4d78259d833a4f3288", "04fe7d8276178be18afd6c17e399e8df4ab693c7", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "3b75c563febb906bce3349c760def9337070946d", "c737aa8b2c916fe1f13a6fd4e847fa45da1e5434", "04a804e9720c67c16715ba96288821af92166a45", "35e8655b2c8845d607fc14ca12a42311dc30c379", "33316a454cf8eb7bf78e7d4fdc9525c945d9a118", "09dbd5e0e3b4ed956a0dfcacb2a2d007fb8e3d17", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "389965aeae8a46b725267b3bf025440609f67012", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "7ebb32dfffe74228b3877f3c825ca191eb5e7469", "0558c94a094158ecd64f0d5014d3d9668054fb97", "332f77fd05703c1607e3b57884ad31fb1fad0104", "0a12a179bebdf4bb69d692a1127795b3f536270b", "52bfb3aa30ec06784d839ab431287a657d0d7907", "47947ed7d4c12855b1b5a4c4ec3123528761d64b", "036d544defb7f8e6297bd4c57a3b430d04a269e8", "0541d5338adc48276b3b8cd3a141d799e2d40150", "277fdd6dbd792fd41e401b13e0fd897bfd911378", "16bdb244d50b0892535c6c8be4c4ec7e25a43de6", "5599ed5b57958b32889c9f4f6c9261941ce2e79f", "8dd808bd68d1c46e3678dce30ecc4791d71f9ee1", "37601bb6e655f2392ba1ca2086da0d1e03e19edc", "f19870a1b4847ca61beed722d557a50189479d27" ], "paperAbstract": "We present EclipseMR, a novel MapReduce framework prototype that efficiently utilizes a large distributed memory in cluster environments. EclipseMR consists of double-layered consistent hash rings - a decentralized DHT-based file system and an in-memory key-value store that employs consistent hashing. The in-memory key-value store in EclipseMR is designed not only to cache local data but also remote data as well so that globally popular data can be distributed across cluster serversand found by consistent hashing.In order to leverage large distributed memories and increase the cache hit ratio, we propose a locality-aware fair (LAF) job scheduler that works as the load balancer for the distributed in-memorycaches. Based on hash keys, the LAF job scheduler predicts which servers have reusable data, and assigns tasks to the servers so that they can be reused. The LAF job scheduler makes its best efforts to strike a balance between data locality and load balance, which often conflict with each other. We evaluate EclipseMR by quantifying the performance effect of each component using several representative MapReduce applications and show EclipseMR is faster than Hadoop andSpark by a large margin for various applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17d77d5e2db5b9aaf54b8240f829b1d4f077df29", "sources": [ "DBLP" ], "title": "EclipseMR: Distributed and Parallel Task Processing with Consistent Hashing", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "17dbf20b43563d1d39163108a0eded2cddffa03f": { "authors": [ { "ids": [ "2312500" ], "name": "Harry Wagstaff" }, { "ids": [ "1879368" ], "name": "Bruno Bodin" }, { "ids": [ "2856040" ], "name": "Tom Spink" }, { "ids": [ "2644436" ], "name": "Bj\u00f6rn Franke" } ], "doi": "10.1109/ISPASS.2017.7975293", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975293", "entities": [ "ARM architecture", "Backward compatibility", "Bare machine", "Benchmark (computing)", "Binary translation", "Compiler", "Emulator", "Exception handling", "Performance Evaluation", "SPECfp", "Simulation", "X86" ], "id": "17dbf20b43563d1d39163108a0eded2cddffa03f", "inCitations": [ "417514911cccd2804189cb29aa2acca33e6d5229" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "217-226", "journalVolume": "", "outCitations": [ "03e53dddc865bf688fe313a94ad186a4d96bffe0", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "453856ac671fa66f3ba520ffef0a6f1b1c253fce", "633172e29129cec3074e724f710756bc41814300", "4e4bedf3bb9369a01e6bb7ecfe6ca32bf089d5e8", "55ff3f1297d4ec72c556cb3a4fd5f5fc77096fdf", "05e19cccbfa8265194834219105cfd2a46a91ba8", "27dc47606d3632d8b7f3263d710a7c61d7ad6087", "fa8e9614381ed39e6b978c691aa81e5365984a20", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "b9d6620afdd7486acb0080890ef5dcbd090aa535", "c446abdfb233d0456d2820578ec5a1bef8474eeb", "46c69da1206fbe5de929a540793d4b188c912e4d", "2c993ae67f162b2465d49c89e4fadef201d3d2bb", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "85c02db93ffe61bed3887602f6b7bc9bf7a6c797", "0dea8362e54b2a6ad06aa1cf3aa09dcc60eef847", "d167b5c8b21c642662000417f313798d375ff38e", "8538a62978fb50c3d1cb3388821c79b721be2177", "6ac43f486d48f280296b102685d9ab6709f31c06", "796d962f67d5d82bd4c874e29d6c845140a82da7", "09a5136de811f4abdf8a92086c0e4c14b6e5b0cb", "71978728170fff7b66d34f452881066a5ff9bb4c", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "3a8b0e0d1efcb5acab8a6dd56876b1f5fd349046", "3f18c189cba9eaa04d61f5b099bd6c055dbfd7ed", "2960c89331eb7afa86584792e2e11dbf6a125820", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "775e31576b5a3d3df5333b1fba4aa3fb814929a2" ], "paperAbstract": "Full-system simulators are increasingly finding their way into the consumer space for the purposes of backwards compatibility and hardware emulation (e.g. for games consoles). For such compute-intensive applications simulation performance is paramount. In this paper we argue that existing benchmark suites such as SPEC CPU2006, originally designed for architecture and compiler performance evaluation, are not well suited for the identification of performance bottlenecks in full-system simulators. While their large, complex workloads provide an indication as to the performance of the simulator on ‘real-world’ workloads, this does not give any indication of why a particular simulator might run an application faster or slower than another. In this paper we present SimBench, an extensive suite of targeted micro-benchmarks designed to run bare-metal on a fullsystem simulator. SimBench exercises dynamic binary translation (DBT) performance, interrupt and exception handling, memory access performance, I/O and other performance-sensitive areas. SimBench is cross-platform benchmarking framework and can be retargeted to new architectures with minimal effort. For several simulators, including QEMU, Gem5 and SimIt-ARM, and targeting ARM and Intel x86 architectures, we demonstrate that SimBench is capable of accurately pinpointing and explaining real-world performance anomalies, which are largely obfuscated by existing application-oriented benchmarks.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/31304186/main_17.pdf", "https://www.research.ed.ac.uk/portal/files/31304186/main_17.pdf", "https://doi.org/10.1109/ISPASS.2017.7975293" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17dbf20b43563d1d39163108a0eded2cddffa03f", "sources": [ "DBLP" ], "title": "SimBench: A portable benchmarking methodology for full-system simulators", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "17e814bcc7e0b5d03245fe4975faa1b7f9e3dea9": { "authors": [ { "ids": [ "2556651" ], "name": "Dali Zhu" }, { "ids": [ "11949974" ], "name": "Wenjing Rong" }, { "ids": [ "1699240" ], "name": "Di Wu" }, { "ids": [ "38217269" ], "name": "Na Pang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.53", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.53", "entities": [ "Authentication", "Cryptography", "Electronic Product Code", "Hash function", "Hypertext Transfer Protocol", "Mutual authentication", "Radio-frequency identification", "Requirement", "Transport Layer Security" ], "id": "17e814bcc7e0b5d03245fe4975faa1b7f9e3dea9", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "404-411", "journalVolume": "", "outCitations": [ "4bf93680814354b76c0c7a19044d7bcb6fbbefd2", "704794c5d5f061ae01eb62e3d07eac953e475ccd", "2a643756d43a0c27201db1b769aa7df22111c72f", "2c350761446acf5ff0bbec6ffccb4e047296ccd1", "0dfef183848d5c7dabda0a589123a8911614db85", "3c74d6701f963627f7758446ee2182157a17da0a", "c64846236b01136d500bee8f117a34a9afce07f2", "249b83a23baa8d2960c59856020e55fd779baa54", "b2e2bdde8df1a563387e71bdd6e5679f6ba6f212", "07119728ce6987ef0dc6ce8e4937a89926adf6bf", "a46b5506c47541f028ac59a4d0c275d82cca2f50", "04a95071242ebcad53fc08b1638027129d8e1ecb", "94ce93920fdb5fc2459eae2c8fa6269488d49d09", "0914d543ec3ec5093379ea56a22b2363367c52d7", "1912d9ae8dceeac349424d7ad95bb489910dd2b0", "283548b32775ff49b82d66ff413f63b557056569", "954721963ecd59e04b2afdbaf8717667b218e630", "7ad7695e1df56abfda570f148d30767334df45a0", "c2d58f4308f1e7e816aa8370608ccfd4d682444d", "f387d3eceb1b2a99e20ab3b7e1a4d5246ef1e3e5", "e266c481797dc5862c72993dabf0c68e661f0a11", "31e55fe42a3fe07155eed3707bea5d64b6dc4d8f", "e40d28cd2c57bb363787d04089d69f1ea591cb90", "46c3d7a9dc5f729f8685ebd4464d2454d5421b9f", "2e7ec98405cd6f06b4469f777945b3c915601d8b", "0cf3b2b24e5f44386f7434e7522d9fe93d49b2ab" ], "paperAbstract": "In a RFID enabled system, tagged products is ordinarily required to be transferred from an owner to another, which makes the ownership of corresponding tags change for several times. To overcome problem of temporary ownership transfer, a lightweight anonymous group ownership transfer protocol is proposed in this paper. It involves ownership transfer and ownership recovery for group tags, which are all based upon mutual authentication. Compared with most of previous protocols using hash functions and cryptographic suites, our scheme only utilizes simple operations, which are completely compliant with EPC Class-1 Generation-2 standard. Through our scheme, the window problem in multi-owner environment is first solved. Security analysis shows that our protocol can provide security and privacy preservation. It is also demonstrated that it resists various attacks. Performance comparisons indicate that our scheme can meet necessary security requirements and achieve better performance.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.53" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17e814bcc7e0b5d03245fe4975faa1b7f9e3dea9", "sources": [ "DBLP" ], "title": "Lightweight Anonymous RFID Group Ownership Transfer Protocol in Multi-owner Environment", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "17ed47ae223da909afc9f7213f08146534b45ae9": { "authors": [ { "ids": [ "40005837" ], "name": "Sahil Suneja" }, { "ids": [ "6100194" ], "name": "Ricardo Koller" }, { "ids": [ "1765914" ], "name": "Canturk Isci" }, { "ids": [ "1879216" ], "name": "Eyal de Lara" }, { "ids": [ "40552834" ], "name": "Ali B. Hashemi" }, { "ids": [ "39231157" ], "name": "Arnamoy Bhattacharyya" }, { "ids": [ "1734109" ], "name": "Cristiana Amza" } ], "doi": "10.1145/3050748.3050766", "doiUrl": "https://doi.org/10.1145/3050748.3050766", "entities": [ "Broadcast automation", "Code injection", "DevOps", "KVM switch", "Live CD", "Machine code", "Memory footprint", "Software bug", "System monitoring" ], "id": "17ed47ae223da909afc9f7213f08146534b45ae9", "inCitations": [], "journalName": "", "journalPages": "97-111", "journalVolume": "", "outCitations": [ "8cbb73828527b1965e1cfb6a104f5ce7cf1ce3ce", "41094fdc5f833c85c488b3fb2bade1ecde006efe", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "5cd50a34d1418de1f8c57bc447e1407d561a42e1", "595bdc4d40c7696b829f8f2e45645fe0bb6154ef", "e093cdd49561d4739f300fd05c0eb09ec719376a", "62d6d99866697d5efdbba3df89051f9c96082567", "68dfa99bf31b85ce964edc2b7deb241af3c87527", "893e818f5cc7a160befd613131717fd2fd0f2ef8", "b634f4f4ff14c87952ba2713faad763753d34684", "3611a72405a106d8fc4682ab7dd32b8f241f3690", "5075192e0e25af961420412fed1f848282ae313e", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "f054c1489516b19704398b343ddbd41f3aa2c4e0", "1dc8960ee89252ba82d881b17211542017e4c597", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "07a849ef5c6efe194bf37027280eca63252cf21e", "4efe82c7ff97fb8e2b36a57d7431ddf8ac9382e4", "30d9132ef7845b8fb4e53d9ad982363700746928", "ca81be25da20dc29cc0a6deca1f6b285d7cf0975", "27f071ccbea5a4940dcc585ba4cfa9258bf2bcdf", "09d4c0b113ec0174bca0d4b0632814094a5cf14f", "1a16c05b5e002bcb117d892fe4101d58ad8ac6c9", "b3f7942b994d65cd72556487b45c7367bc9546ad", "1c676ab6b80c76533c6d92758dd96768f51cb6a9", "02ed0ec3bb95776b5c06e2784810b501c4d3f053", "174f8fdd45609c8962ffb9f997898128e44afeda", "9aa0d7253574e50fe3a190ccd924433f048997dd", "5d841fce8946ad9cbf6960827fa402b3551cc4c3", "6a1249f6eeed047ace81012c7578fe42f26b776f", "33623a9fec52e01e92c6ba1ae4d67b01f0c76fe5", "5fbf3dc6055f79a56cff1b2d3caf614081afcdba", "65192f3d0ffb066a4c47a09fc11fdfad47dd192e", "3cda09fdc91d7f85a138a4d56848a3a0708df76f", "340569da290505865cb2ba79a4201c7028d4d66a", "2df18f420ed0669c1bec74038d2ad068bf0831a2", "1c7e1a0bde89990a9173664d3ff6931542741226", "441cf0fe8091d09207374a9d96723419091345ab", "9bc637db1e3c0dbcde1b44139a08d50737aca116", "4650259fb4aadb376fd5994f9ab9dd07a4f83511", "b8b3b0b974f76abdf7d5be2cba019cecaedb8e17", "5752a746cd143d30b22d837e1077fa9c971860fb", "78bae2bd40431d495895cf60c126ee54fdcdf743", "24748ef2b88e6df370b5dccfb75cba47e132f92d", "63759a83f74f7637ed16c6b9de362b9349ee3d31", "1b43f9272d8806feae6460a02a5296f10efcfeee", "948ad0102d49842e7a785140a67cdf28c0c4567e", "02f28656a748c351bf92c76c5a0a31c2d1d9c45d", "2d5d6fceb2d20df7f9ed324f82afe73688dd9ed4", "1ce158e7922ad56e6b065026f50f12c94eb786f2", "bf0da9dadafe58af41801f7097d51c9442c79148", "3ec4b8237c86c4cccee8b4002dfabecb20d1c511", "04c43bcb8b233052a08a6a42f57398c6ce91e234", "b06c7df9404cf6d87b5d552808450b8c226deab9", "0187493c5cbd9b8bcf2019b8521082aea6db83f1" ], "paperAbstract": "With DevOps automation and an everything-as-code approach to lifecycle management for cloud-native applications, challenges emerge from an operational visibility and control perspective. Once a VM is deployed in production it typically becomes a hands-off entity in terms of restrictions towards inspecting or tuning it, for the fear of negatively impacting its operation. We present CIVIC (Cloning and Injection based VM Inspection for Cloud), a new mechanism that enables safe inspection of unmodified production VMs on-the-fly. CIVIC restricts all impact and side-effects of inspection or analysis operations inside a live clone of the production VM. New functionality over the replicated VM state is introduced using code injection. In this paper, we describe the design and implementation of our solution over KVM/QEMU. We demonstrate four of its use-cases-(i) safe reuse of system monitoring agents, (ii) impact-heavy problem diagnostics and troubleshooting, (iii) attaching an intrusive anomaly detector to a live service, and (iv) live tuning of a webserver's configuration parameters. Our evaluation shows CIVIC is nimble and lightweight in terms of memory footprint as well as clone activation time (6.5s), and has a low impact on the original VM (< 10%).", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050766", "http://www.eecg.toronto.edu/~amza/papers/vee17-paper.pdf", "http://sysweb.cs.toronto.edu/publication_files/0000/0305/Civic_VEEFinalVersion_1_.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/17ed47ae223da909afc9f7213f08146534b45ae9", "sources": [ "DBLP" ], "title": "Safe Inspection of Live Virtual Machines", "venue": "VEE", "year": 2017 }, "1804e67480b9c46b377d0a102e26d3a9a1a9139b": { "authors": [ { "ids": [ "1778464" ], "name": "Prateek Sharma" }, { "ids": [ "1697572" ], "name": "David E. Irwin" }, { "ids": [ "1705052" ], "name": "Prashant J. Shenoy" } ], "doi": "10.1145/3084442", "doiUrl": "https://doi.org/10.1145/3084442", "entities": [ "BOINC", "Computational science", "Electronic data processing", "Experiment", "Failure rate", "Message Passing Interface", "SPARK", "Server (computing)" ], "id": "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "inCitations": [ "84601d54f18d74082e3c99f86cbc9fd24c4a40b3", "2d40bd8a14b429142e487282761a2a6b95d6b96b", "21d2fe357a178d36a50398b05e0046b7b500b109", "613cdadb56592f704349bb25a359ebecd8fd9e0f", "4ce8ad1513e84cb464efa68827119295530ebaa4" ], "journalName": "", "journalPages": "59", "journalVolume": "", "outCitations": [ "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "66b015024d89c87fc9ad9ab9d23417602c4dc8ae", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "3d90fde9ced995e1ad3ffb9de26e3b45e90ad1fa", "e70ca4cd5560a2e81795564bde278f6334b16de6", "3a043714354fe498752b45e4cf429dbae0fb2558", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "21c24920914a8d781ffb43d08ba8f0d916968007", "3000e77ed7282d9fb27216f3e862a3769119d89e", "a072ef6581c007cc7eddbbae1fb5b61a3a15970c", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "26424829a41d47557fd81456d5245bbfa874b3f6", "23ddae93514a47b56dcbeed80e67fab62e8b5ec9", "0f9215aaf5a8376461ff3ec504a53172ce827647", "8730033f32fbcca2c82559fa0c218143c707d7f7", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "7f9d43c76bb5c077aabf1ef6ef20d35763e34291", "0d9b90af172613d0d6af3b3352a1d351a7a09b5a", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4581948531998d5e5f23c131081ea0cdd9066bfe", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "280863f80b6401bc6d65839ecb3dc7a0febdfa09", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "11310368999afdce94bca4316eea38216b2446c5", "5b36826135f9641f8e511c9c6224f5225aed9b1d", "70e38d47b83261e257bae61dc39ffbf391b30591", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "05be0db01d70bcce9530b462ab2368f9e15127d9", "835916e7ad1231d5aa2985340b0ee543cadbb5b6", "c59a5b60f276506b02cf8bc51095fa5f021fb6f6", "118c97ed0ff45bcbda0040d2acb8615a13c2d5fb", "96d40ea825ee21617b24732ad956f9b7307ea254", "1e2815b20142064cf06e1318b8d0bb32f6e7d70c", "48ec1b8cb2209276e02f7443323a59a944502e3e", "0dc346b58cfb5007b7b31d14a80ab9692049767f", "f060942169f56e0aa8f3253047fac49b7c8eff2d", "7e74ea151efcdcfecffdbeaec0728f9ac1f80389", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "985b0a763d3ae1aa2cd2752167f85ce079cfebb9", "d119f3f1a8b053892b9d4964e6394bc368520068", "b6571efa4483aa00d23bbcd36930c4877548ba38" ], "paperAbstract": "Cloud providers have begun to offer their surplus capacity in the form of low-cost transient servers, which can be revoked unilaterally at any time. While the low cost of transient servers makes them attractive for a wide range of applications, such as data processing and scientific computing, failures due to server revocation can severely degrade application performance. Since different transient server types offer different cost and availability tradeoffs, we present the notion of server portfolios that is based on financial portfolio modeling. Server portfolios enable construction of an \"optimal\" mix of severs to meet an application's sensitivity to cost and revocation risk. We implement model-driven portfolios in a system called ExoSphere, and show how diverse applications can use portfolios and application-specific policies to gracefully handle transient servers. We show that ExoSphere enables widely-used parallel applications such as Spark, MPI, and BOINC to be made transiency-aware with modest effort. Our experiments show that allowing the applications to use suitable transiency-aware policies, ExoSphere is able to achieve 80% cost savings when compared to on-demand servers and greatly reduces revocation risk compared to existing approaches.", "pdfUrls": [ "https://people.cs.umass.edu/~prateeks/papers/exosphere.pdf", "http://lass.cs.umass.edu/papers/pdf/exosphere-sigmetrics17.pdf", "http://arxiv.org/abs/1704.08738", "http://doi.acm.org/10.1145/3084442", "https://arxiv.org/pdf/1704.08738v1.pdf", "http://www.ecs.umass.edu/~irwin/exosphere.pdf", "http://doi.acm.org/10.1145/3078505.3078511" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1804e67480b9c46b377d0a102e26d3a9a1a9139b", "sources": [ "DBLP" ], "title": "Portfolio-driven Resource Management for Transient Cloud Servers", "venue": "SIGMETRICS", "year": 2017 }, "181b6916432d48489bad0692b0c4600d964684c0": { "authors": [ { "ids": [ "2569302" ], "name": "Michael Blondin" }, { "ids": [ "1722435" ], "name": "Javier Esparza" }, { "ids": [ "40264192" ], "name": "Stefan Jaax" }, { "ids": [ "2254368" ], "name": "Philipp J. Meyer" } ], "doi": "10.1145/3087801.3087816", "doiUrl": "https://doi.org/10.1145/3087801.3087816", "entities": [ "Algorithm", "Computation", "EXPSPACE", "Finite-state machine", "Petri net", "Primitive recursive function", "Reachability problem", "Recursion", "Whole Earth 'Lectronic Link" ], "id": "181b6916432d48489bad0692b0c4600d964684c0", "inCitations": [ "76759fa11225ea5d2cf3c8a7bf149ff9dfc727f7" ], "journalName": "", "journalPages": "423-430", "journalVolume": "", "outCitations": [ "1511e9339b5eba2146613b79d43c5e35accc6242", "728d8a86d19d4d32fc8f6594b7f62445e2e65c73", "20aa822b10e2d970944e2a9f3ad6dfe16ad37fb4", "026a0f721c6e95ca2db9e52df215ab1078b1e7fa", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "4866ced610bea69c2f7d3fb754acda959e6c9274", "7936c746aa7da921de5ade549b8f6bcbc548c806", "3ce2d233cee585ecff73729836918ba87195c18f", "15882cb74a1ac47c0c3e0fc26870d71746849600", "c6b5f4c10d6d769de971e02e33bac213f7e80b9b", "c1cb2c003ab53c6e0795ae7faf81ec2fb091a250", "f6993f4a347a3a46b190049a9a2f392f558ca926", "3b0d96ae2dedbe88ae13eaba040a080a1c769ecf", "4467eb5c15b07b4604e2590e560db8b07ab7e1bd", "0ce92742670438df936dab39ff9bf46db4430c10", "48b5adcdbca0aa2c7248a7af77b22b3126aae18e", "266d1fb8433a9faf8e4564fbfa8819cacffc31c4", "6adb714351970b96fe7e798a79226509c8b00c04", "1ec66e74810fe46faafd6743b2334e213e7ea29b", "7262fe7f6fb4ca26c0999c6a786f1f555238d7ec", "e36ab83038cbe6a235e487319684c02749794c6b" ], "paperAbstract": "Population protocols are a well establishedmodel of computation by anonymous, identical finite state agents. A protocol is well-specified if from every initial configuration, all fair executions of the protocol reach a common consensus. The central verification question for population protocols is the well-specification problem: deciding if a given protocol is well-specified. Esparza et al. have recently shown that this problem is decidable, but with very high complexity: it is at least as hard as the Petri net reachability problem, which is EXPSPACE-hard, and for which only algorithms of non-primitive recursive complexity are currently known. In this paper we introduce the class WS3 of well-specified strongly-silent protocols and we prove that it is suitable for automatic verification. More precisely, we show that WS3 has the same computational power as general well-specified protocols, and captures standard protocols from the literature. Moreover, we show that the membership problem forWS3 reduces to solving boolean combinations of linear constraints over N. This allowed us to develop the first software able to automatically prove well-specification for all of the infinitely many possible inputs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087816", "https://www7.in.tum.de/~blondin/papers/BEJM17.pdf", "https://arxiv.org/pdf/1703.04367v1.pdf", "https://www7.in.tum.de/~blondin/talks/oxford17.pdf", "http://arxiv.org/abs/1703.04367", "https://arxiv.org/pdf/1703.04367v2.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/43d1/8db3412550b5a7914cbf9f035f12f393b24a.pdf", "s2Url": "https://semanticscholar.org/paper/181b6916432d48489bad0692b0c4600d964684c0", "sources": [ "DBLP" ], "title": "Towards Efficient Verification of Population Protocols", "venue": "PODC", "year": 2017 }, "182cf3f5cfe4a0fa1a4f405bc8a14fe99d098752": { "authors": [ { "ids": [ "35511831" ], "name": "Weichen Qi" }, { "ids": [ "2951748" ], "name": "Yunchun Li" }, { "ids": [ "7678482" ], "name": "Honggang Zhou" }, { "ids": [ "1688012" ], "name": "Wei Li" }, { "ids": [ "1976501" ], "name": "Hailong Yang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.33", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.33", "entities": [ "Big data", "Data mining", "Decision tree learning", "High- and low-level", "Machine learning", "Optimization problem", "Program optimization", "Scheduling (computing)", "Speculative execution", "Whole Earth 'Lectronic Link" ], "id": "182cf3f5cfe4a0fa1a4f405bc8a14fe99d098752", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "254-261", "journalVolume": "", "outCitations": [ "155342e33dc1e58fc82fff705b2fc52f22c8d5a6", "596b88f8e5febfcb9746b0742ccb367c4e2feab6", "438110dc02f39f221896847a4d0e24f88e130598", "91426876f5b6a49f21e5391b72395fd6b3e83e65", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "5c0985b9b699d94c697a9a835e76d52ba916ca0b", "1a057fd874f7c1994618f1c7560c492d5f590cb1", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "4ebde35f15df8f882bf98d7bb5a5ccb6da6d1d2e", "4abe45828118c57f19b7f81e56b9fc75effdfecf", "3e257f01e3ee71545d824a1615c35659525b856a", "4eab97d0d1c75641671aa5b7761978322d904c5c", "7b368d1fcf149031e83ffc075409d08b3a1a7d3e", "0a12a179bebdf4bb69d692a1127795b3f536270b", "9ee6209432316baf6776838917e06bca4d874747", "73e7e3ae90d2872429b62be8f1d4ac78e33c1ec7", "230239fb61d7a6996ac9552706363323b34735f2" ], "paperAbstract": "Straggler task is commonly considered as the major bottleneck in parallel data processing. Previous work mainly focuses on the coarse-grained straggler detection and optimization such as speculative scheduling. However, fine-grained root-cause analysis of straggler tasks is rarely considered. In addition, existing work simply depends on empirical analysis, which lacks of useful guidance to performance optimization. In this paper, we propose a new methodology of fine-grained straggler root-cause analysis using machine learning. We collect raw metrics from Spark event log and hardware sampling tool, and refine them into high-level metrics for model learning. Then we present the root-cause analysis of stragglers through CART tree. A customized prune method is also applied to improve analysis accuracy. From the analysis, we derive several new findings beyond the well known causes of stragglers. Our work provides a new perspective on identifying and understanding the inefficiency in parallel data processing programs by applying machine learning techniques to fine-grained root-cause analysis of straggler tasks.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/182cf3f5cfe4a0fa1a4f405bc8a14fe99d098752", "sources": [ "DBLP" ], "title": "Data Mining Based Root-Cause Analysis of Performance Bottleneck for Big Data Workload", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "18307d7fea0fed1067a5704f9aa13c93541e0142": { "authors": [ { "ids": [ "1937142" ], "name": "Jos\u00e9 Bacelar Almeida" }, { "ids": [ "1722312" ], "name": "Manuel Barbosa" }, { "ids": [ "1737231" ], "name": "Gilles Barthe" }, { "ids": [ "3387178" ], "name": "Arthur Blot" }, { "ids": [ "1959667" ], "name": "Benjamin Gr\u00e9goire" }, { "ids": [ "3085897" ], "name": "Vincent Laporte" }, { "ids": [ "1736719" ], "name": "Tiago Oliveira" }, { "ids": [ "1752330" ], "name": "Hugo Pacheco" }, { "ids": [ "38290160" ], "name": "Benedikt Schmidt" }, { "ids": [ "3105882" ], "name": "Pierre-Yves Strub" } ], "doi": "10.1145/3133956.3134078", "doiUrl": "https://doi.org/10.1145/3133956.3134078", "entities": [ "Compiler", "Cryptographic hash function", "Cryptography", "Encryption software", "Formal verification", "Jasmin", "Memory safety", "Programming language", "Proof assistant", "Software verification and validation" ], "id": "18307d7fea0fed1067a5704f9aa13c93541e0142", "inCitations": [ "c47f1c156ea127aa985e715a61fc9f4b246a415a" ], "journalName": "", "journalPages": "1807-1823", "journalVolume": "", "outCitations": [ "0450987faf2baf11df986a6bf6d477c6ce4e9d93", "d4611529e2ac02c2a58cb526a566d68ae6fe330e", "194f7d8647009dea5f4867ae27d340c84c46f51b", "ac6a003a4e5d0cc12fd2ae8a57769b79283a7156", "4207ebe6f2656c1a40149ec446ca99885ce5b2ad", "3369e43abcb499eea4d208f2239df00551b8d2dd", "04402122e2fb065ed1280000981f7626496f0afb", "0a36a523494c3c966f0a6e716c7ef851fcda4762", "1aff1d96c7d0299c7d6edc2114186b38af911eb9", "4c8ad20e8d682d9956dad6a68d2e2a022773a959", "9c7218c8effa7691d507b08d4b222c403ce26c4a", "0356047e6f9a42ffe5cb7bc3e64b22205fdac918", "615168555150d80752a1c195229642acbe6fb3d9", "011f7da0095ac8c0d4477eeda2728e5f80a35767", "47a97d0c6d0ee4313fe8a0380c857e50fe93f7d4", "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "9feb4b268fea8a7f9513dcc9db475f5ee9c7dfde", "2e37af19b69f12699279e3dc754cfac681555d1f", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "0b70652541cb408152c468eaea7b114dc65beab1", "2c0786752ca32b12c35862b9558107411003347c", "5eb62221dfa721bb9411bfa1256c7632c671b2e3", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "5503686edf7ab29785c51a7c4b10e9dbbf80c140", "430ce88f430d22d131ca3f753dd576c61f7cced3", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "670cdcf84a0403cf15fc80dd042e1938847fdf29", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "57adf20f0fa575a43609937c8f1a695a444a0ae0", "963e289a8d191a114a5b0204b1e1d37a506ab9a7", "3916407cd711828f206ff378d01b1ae526a6ee84" ], "paperAbstract": "Jasmin is a framework for developing high-speed and high-assurance cryptographic software. The framework is structured around the Jasmin programming language and its compiler. The language is designed for enhancing portability of programs and for simplifying verification tasks. The compiler is designed to achieve predictability and efficiency of the output code (currently limited to x64 platforms), and is formally verified in the Coq proof assistant. Using the supercop framework, we evaluate the Jasmin compiler on representative cryptographic routines and conclude that the code generated by the compiler is as efficient as fast, hand-crafted, implementations. Moreover, the framework includes highly automated tools for proving memory safety and constant-time security (for protecting against cache-based timing attacks). We also demonstrate the effectiveness of the verification tools on a large set of cryptographic routines.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134078", "https://hal-polytechnique.archives-ouvertes.fr/hal-01649140/document" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18307d7fea0fed1067a5704f9aa13c93541e0142", "sources": [ "DBLP" ], "title": "Jasmin: High-Assurance and High-Speed Cryptography", "venue": "CCS", "year": 2017 }, "18344f8daf595f91f18fdd6d2b2dff99423087b6": { "authors": [ { "ids": [ "38504194" ], "name": "Luke Maurer" }, { "ids": [ "2065311" ], "name": "Paul Downen" }, { "ids": [ "1706685" ], "name": "Zena M. Ariola" }, { "ids": [ "35372552" ], "name": "Simon L. Peyton Jones" } ], "doi": "10.1145/3062341.3062380", "doiUrl": "https://doi.org/10.1145/3062341.3062380", "entities": [ "Compiler", "Continuation", "Haskell", "Intermediate representation", "Join (SQL)", "Join point", "The Glorious Glasgow Haskell Compilation System" ], "id": "18344f8daf595f91f18fdd6d2b2dff99423087b6", "inCitations": [ "acad7ce64bc015c6a2b3581e460b50d656d8126c", "6db61a6cdabb9036db9d1c6820fcbb52e13f153b", "08de23da3ed2240cce1b6f48e1096cfe806d90d2", "65e87447a357c0f2fccf3111ac0241aaec3f0a7f" ], "journalName": "", "journalPages": "482-494", "journalVolume": "", "outCitations": [ "46bd1887e7d15dba0ccfb410a37c6e6bec6f217c", "2de3e6d85b2e7cb41a6e87b3188b9101d147dd82", "73062e44e8a4b3d80c0a98e009c9604dc90d3911", "132aac31b7355eb3f3d110e7cca0c321e43541d6", "d5791235ad4439f48fcdd359d45d144ee6a23ac9", "13c1c1c4f8b7b87404a929e4e9b339008fd27138", "cfe34582dbc7ac0e48796494b4cc2d397c4ae5c2", "1e2c604e66c4439ad343b70d7ec0abedf72d006a", "d5b48ff78a806fe957eda9b951d726c8029cfd9b", "66da7820ffe84c54a6fdb9ac28e27f6346874a93", "43252ba020a456768700880f1e10eff3b30d4526", "984c126b595d1ce39500757ef85567b32b465250", "06e998884b6a31e0d09338377d78b52be402714b", "64d2a65a7d559f9b05570fb0fea8bb4cccd83ae2", "2f59689eb4801ca6bb8aaabad40abd333dda803a", "132a213cc5e8a1ca3be0cdeb2accd42511bde33f", "ac62767d3666b692dc55688e3650e6477d3bffa7", "1c7487d0879f29facd5f5644b250246b4cd11769", "98445cbe502ccc6b45953487aa926cdbe980a491", "917a644267e919e329b4f47665586beeece6b004", "0723583c40abf490571b6ce62afdac2ab28afd8e", "3ebb770bdd641a492f6159d7076ed3609ce7af47", "8a3a538bcdfbed630f6bf2fc3ee49fe740819602" ], "paperAbstract": "Many fields of study in compilers give rise to the concept of a join point—a place where different execution paths come together. Join points are often treated as functions or continuations, but we believe it is time to study them in their own right. We show that adding join points to a direct-style functional intermediate language is a simple but powerful change that allows new optimizations to be performed, including a significant improvement to list fusion. Finally, we report on recent work on adding join points to the intermediate language of the Glasgow Haskell Compiler.", "pdfUrls": [ "http://ix.cs.uoregon.edu/~pdownen/publications/pldi17_appendix.pdf", "http://doi.acm.org/10.1145/3062341.3062380", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/11/join-points-pldi17.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/11/compiling-without-continuations.pdf", "http://ix.cs.uoregon.edu/~pdownen/publications/pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18344f8daf595f91f18fdd6d2b2dff99423087b6", "sources": [ "DBLP" ], "title": "Compiling without continuations", "venue": "PLDI", "year": 2017 }, "186cf69df119c5640e49389e1ad23d96977f2659": { "authors": [ { "ids": [ "8704205" ], "name": "Zhaoshi Li" }, { "ids": [ "1743798" ], "name": "Leibo Liu" }, { "ids": [ "40381677" ], "name": "Yangdong Deng" }, { "ids": [ "3817476" ], "name": "Shouyi Yin" }, { "ids": [ "1711589" ], "name": "Yao Wang" }, { "ids": [ "1803672" ], "name": "Shaojun Wei" } ], "doi": "10.1145/3079856.3080228", "doiUrl": "https://doi.org/10.1145/3079856.3080228", "entities": [ "Central processing unit", "Compile time", "Compiler", "Computation", "Datapath", "Field-programmable gate array", "Hardware acceleration", "High- and low-level", "High-level programming language", "High-level synthesis", "Parallel computing", "Pipeline (computing)", "Programming language", "Reconfigurability", "Run time (program lifecycle phase)", "Scheduling (computing)", "Server (computing)" ], "id": "186cf69df119c5640e49389e1ad23d96977f2659", "inCitations": [ "5cf2f8dd9491b2cf0c42b0d2a6ccdae5c764f906" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "575-586", "journalVolume": "", "outCitations": [ "4390730c4765050e346433f1f51d821205f4616e", "0a37498405670953b560ed43c11a3bf85411c803", "808b90dadea6426924374633c8c49f78175f04a8", "a01d4a0f26ec9bba3e21f12f60489a6a20a8ae17", "3a1db3c1940bfdd4af47f1b675d75161598452c1", "663678293a25979efae27b5b9762c5c02bc9c72c", "c41810e0514c0b2b92ead025c260b27251ccc054", "47eb35b0fa9be86dfbf3adbcef89f8d98baf428b", "09ed565e84057123c15ab12b885c235d1f241aed", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "348d535dba9f1b6a7674d178214fc33972a7b1e8", "5c6c460e58b72651a60d880c42d7e14b5daf206c", "3f63a2362b1fabc83194d10d6b5a0b2a56c1799b", "79661a02f206ffb6aa41dbb8a92e8a7963257c31", "6f54bd4092672067ef92c0db252b53224cda3daf", "907d2c011942a78bf6acff8e048f4185d53ff8f2", "b1d14e2b28759afd361d50e14744224b654e205e", "0c7465f733161ed2c9818da22d77c2cb518f8f58", "3dad0016ad3a93419be0aabc427e40a2b75c82a7", "ef9004969a4f163fa8e377cabf515520b6eb8b45", "b678685b2cd0e3acb714c03cf82df08ed0770873", "6e5932f3b5831bba13008f7498cdbbd9309ab71e", "1114495d7c94db54f13ae48e7e20efbde5b9bf15", "057722f2cef2cb6ae976f4da804f8504074815b5", "4518d1127a018c51c8c01eef92cb87208bb22e7b", "39024b37b7e49f0dcfba44e124bfa84103cb2f55", "8df62aad18d6de13331479666c3b5d6a32b0ba58", "07f3b8cfd59624acf80e16794bd3f2bc69acd8e7", "165cf5e471b32122ba3a38709873cecf9b1b9a58", "bc7f7f3e28aa404f9c1dd3e9a8a3ef20898bb96b", "6f929d617765327040dfb2f86940b17c93441a5e", "6c15928f2a1b8525d2aa4e078cfa62847ba422bf", "01cca1fc2784f4b0f164ca5703ce793d0042649a", "44f8ae933426f273ef106625977827aec264cb72", "a76c4f1f29f1f71e34db333280c6e6816e5d8746", "da15bfaf970968a4ecdb3fa3cb9f540558558691", "6756d3e0669430fa6e006754aecb46084818d6b6", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "3a5e93a329af9d5f801e80f792f1f1573ed4ac30", "74e6b114822b712c100c7ffd1b01f4fb1564bd28" ], "paperAbstract": "CPU-FPGA heterogeneous platforms offer a promising solution for high-performance and energy-efficient computing systems by providing specialized accelerators with post-silicon reconfigurability. To unleash the power of FPGA, however, the programmability gap has to be filled so that applications specified in high-level programming languages can be efficiently mapped and scheduled on FPGA. The above problem is even more challenging for irregular applications, in which the execution dependency can only be determined at run time. Thus over-serialized accelerators are generated from existing works that rely on compile time analysis to schedule the computation.\n In this work, we propose a comprehensive software-hardware co-design framework, which captures parallelism in irregular applications and aggressively schedules pipelined execution on reconfigurable platform. Based on an inherently parallel abstraction packaging parallelism for runtime schedule, our framework significantly differs from existing works that tend to schedule executions at compile time. An irregular application is formulated as a set of tasks with their dependencies specified as rules describing the conditions under which a subset of tasks can be executed concurrently. Then datapaths on FPGA will be generated by transforming applications in the formulation into task pipelines orchestrated by evaluating rules at runtime, which could exploit fine-grained pipeline parallelism as handcrafted accelerators do.\n An evaluation shows that this framework is able to produce datapath with its quality close to handcrafted designs. Experiments show that generated accelerators are dramatically more efficient than those created by current high-level synthesis tools. Meanwhile, accelerators generated for a set of irregular applications attain 0.5x~1.9x performance compared to equivalent software implementations we selected on a server-grade 10-core processor, with the memory subsystem remaining as the bottleneck.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080228" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/186cf69df119c5640e49389e1ad23d96977f2659", "sources": [ "DBLP" ], "title": "Aggressive pipelining of irregular applications on reconfigurable hardware", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "187a407c0ddcbb73e227b06c08d2f7d1374014a6": { "authors": [ { "ids": [ "1855827" ], "name": "Xiaoqing Luo" }, { "ids": [ "1684152" ], "name": "Frank Mueller" }, { "ids": [ "2797656" ], "name": "Philip H. Carns" }, { "ids": [ "38538207" ], "name": "Jonathan Jenkins" }, { "ids": [ "1692762" ], "name": "Robert Latham" }, { "ids": [ "40211322" ], "name": "Robert B. Ross" }, { "ids": [ "39683248" ], "name": "Shane Snyder" } ], "doi": "10.1109/IPDPS.2017.45", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.45", "entities": [ "Experiment", "Extrapolation", "Job control (Unix)", "Linux", "Lossless compression", "Mathematical model", "Memory-mapped I/O", "P system", "Signal trace", "Simulation", "Supercomputer", "Synthetic data" ], "id": "187a407c0ddcbb73e227b06c08d2f7d1374014a6", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "585-594", "journalVolume": "", "outCitations": [ "0d744408b775b228dc6ba5064ee769ee4299f6df", "6ecf1ef46e34ddcb3e385743fc07a80b860250e2", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "59a7ce2144978a4cdb115760eb915f224136a8fc", "3fcc786e099ba07cb52744a80b895061b309df7f", "884fc7d1c8353a6ca2f0830a9f0f840a985afa7e", "48d2d23ca0707556e63f7182737612ed2ce8b14c", "b3a8bed5645b36606b29811c0a215eaaa017608d", "2a11832bb798de3315838c327bdcec6493cd2a5c", "9a7388b992631676c58fc06f891878e88e6d102c", "39c0b19b60b8e872230220ea8882488221f01941", "d6d6793a7049b810a0b1dbb4f6a4d517e69244d7", "6a4105c2e444bf4a164c498126bc35f45e497286", "483e6cb001353b145b8a0c73b52526c1fe1b0db7", "981f151192b553300daaab96d60c7cbe2cdbb1dd", "0fef8efee83bf73d50d29de247b1311d260547f3", "10b1552f5b7f7f95ebcc02779fed467ef2a812ca", "07afa1ea6934df5d325b07754f9eda290981735d", "0bb2a26a0adb2be2ee078df83409db27d76ea322", "8f16149eb792ef774487a0c008442cd5df72d9e3", "3a69f1592a65a85bab18a00481e98f95849d4d9d", "6e0039d62431ec95136f738c5020f6e3d3711168", "0c60a639dc9cd8014f685ec986c29bf55a10bb5a", "9edab79d681bae0071aa784328b0ce134d909c10", "aadb50ca39eb59321af14142e88c3fd293238b15" ], "paperAbstract": "Today’s rapid development of supercomputers has caused I/O performance to become a major performance bottleneck for many scientific applications. Trace analysis tools have thus become vital for diagnosing root causes of I/O problems. This work contributes an I/O tracing framework with (a) techniques to gather a set of lossless, elastic I/O trace files for small number of nodes, (b) a mathematical model to analyze trace data and extrapolate it to larger number of nodes, and (c) a replay engine for the extrapolated trace file to verify its accuracy. The traces can in principle be extrapolated even beyond the scale of presentday systems and provide a test if applications scale in terms of I/O. We conducted our experiments on three platforms: a commodity Linux cluster, an IBM BG/Q system, and a discrete event simulation of an IBM BG/P system. We investigate a combination of synthetic benchmarks on all platforms as well as a production scientific application on the BG/Q system. The extrapolated I/O trace replays closely resemble the I/O behavior of equivalent applications in all cases.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.45", "http://moss.csc.ncsu.edu/~mueller/ftp/pub/mueller/theses/luo-th.pdf", "http://moss.csc.ncsu.edu/~mueller/ftp/pub/mueller/papers/ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/187a407c0ddcbb73e227b06c08d2f7d1374014a6", "sources": [ "DBLP" ], "title": "ScalaIOExtrap: Elastic I/O Tracing and Extrapolation", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1883eb486e44c4a61864f538d2f0e90dca8f45f9": { "authors": [ { "ids": [ "40485705" ], "name": "Bo Wu" }, { "ids": [ "1785951" ], "name": "Xu Liu" }, { "ids": [ "1718639" ], "name": "Xiaobo Zhou" }, { "ids": [ "39033094" ], "name": "Changjun Jiang" } ], "doi": "10.1145/3037697.3037742", "doiUrl": "https://doi.org/10.1145/3037697.3037742", "entities": [ "Cloud computing", "Computer multitasking", "Graphics processing unit", "Kernel (operating system)", "Kernel preemption", "Preemption (computing)", "Priority inversion", "Runtime system", "Scheduling (computing)", "Software system", "Speedup" ], "id": "1883eb486e44c4a61864f538d2f0e90dca8f45f9", "inCitations": [ "d881d1e75144ab9bbe5f543dea115932f09ffe0a", "01ea8c68a5809d0aab377a6f8fa1faf627bf4e12" ], "journalName": "", "journalPages": "483-496", "journalVolume": "", "outCitations": [ "ccdf0a236d184c67330290a0c43aab2c0bea17bf", "6db4621f95879008ca5ee5a3de327ac5c36f80e8", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "08632fe2b934ed15d3499e7321282c81adc2c390", "72b8219baf03b5a18653eadd5d724499d422ec29", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "10443d5d4f0e5048df514e581a9f364954158d00", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "3c28d5967db86e8f5e4c37d03518967c285a32bf", "064f38e5edef42cb5a37f2a350e4413e17132b11", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "be4c1dd0e8afe5ac839bba41db32b9035fa64d5f", "00156e79606084497789662dfaf59c3b54a10722", "ec97e16a9ee20c90e7c65fc6dd4ddcd0098c6b3e", "b04c9e851ae605592d693aa65f0d753b8af08feb", "3be74a71c59c0e5e925aa84090fc1b1988ea6095", "7a2804fe421e853ef59abeffa41060ffe700602d", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "0f283421a2ecd7d63d4b804ebba5fbf7fbdab197", "00f355ce566bb51dc70925217c62e437cc7e14e2", "96b4b72d1098674750c4a406c93efe43e036568b", "21e5ea3c252c84137efcb45cef1437bdcc15c773", "16e57efb869966c49ad37dd56508a3b60f0f2985", "1618f89bc0936ab14b8ec38905120d658014ed48", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4", "035740197ba476892d6bc844436d39f3eedf4bb0", "68073f621072d793e95b9562bf9a9245415d5a96", "5dfb9848c10a13c6667109b2390e0dab14177c84", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "cfeb833da2d3ca20adfc05a762b3f68cffa13416" ], "paperAbstract": "GPUs are widely adopted in HPC and cloud computing platforms to accelerate general-purpose workloads. However, modern GPUs do not support flexible preemption, leading to performance and priority inversion problems in multi-tasking environments.\n In this paper, we propose and develop FLEP, the first software system that enables flexible kernel preemption and kernel scheduling on commodity GPUs. The FLEP compilation engine transforms the GPU program into preemptable forms, which can be interrupted during execution and yield all or part of the streaming multi-processors (SMs) in the GPU. The FLEP runtime engine intercepts all kernel invocations and determines which kernels and how those kernels should be preempted and scheduled. Experimental results on two-kernel co-runs demonstrate up to 24.2X speedup for high-priority kernels and up to 27X improvement on normalized average turnaround time for kernels with the same priority. FLEP reduces the preemption latency by up to 41% compared to yielding the whole GPU when the waiting kernels only need several SMs. With all the benefits, FLEP only introduces 2.5% runtime overhead, which is substantially lower than the kernel slicing approach.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037742" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1883eb486e44c4a61864f538d2f0e90dca8f45f9", "sources": [ "DBLP" ], "title": "FLEP: Enabling Flexible and Efficient Preemption on GPUs", "venue": "ASPLOS", "year": 2017 }, "18c3380550d93ae6b4101d9b06fe2b37b803dce3": { "authors": [ { "ids": [ "3631757" ], "name": "Yao Zhang" }, { "ids": [ "2734582" ], "name": "Arvind Ramanathan" }, { "ids": [ "3271584" ], "name": "Anil Vullikanti" }, { "ids": [ "1956099" ], "name": "Laura L. Pullum" }, { "ids": [ "38013066" ], "name": "B. Aditya Prakash" } ], "doi": "10.1109/ICDM.2017.71", "doiUrl": "https://doi.org/10.1109/ICDM.2017.71", "entities": [ "Algorithm", "Approximation algorithm", "Experiment", "Preemption (computing)", "Propagation constant", "Sampling (signal processing)", "Scalability" ], "id": "18c3380550d93ae6b4101d9b06fe2b37b803dce3", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "615-624", "journalVolume": "", "outCitations": [ "31e26c99c3472a316127f6d82296cb3fad816a72", "36c0b81a2ef2505e5c1c763c1abc25cdd72903f2", "8f37dfe4ea497f63dce28e47581bc38d2e3ab26d", "2f98cee40a3bf83dc5ab089bbd229e9550c39179", "0dfc3797b46fc2c7e5847f7eb85927271dcdd1ed", "f941020a43772d05ccf6bfc57fc6042e417116db", "6c16ffc774a93285cc5d2eb0b49dc10140df9c74", "5a88d6e0560d1c9b32dd7639fb2164bfbe938c6c", "68233e8899bee928174f83d4f8d8979f72f45132", "64daaf62d95f5f1d30819540a2072efcaa096651", "0ca2615a3d3e1e9736dc5ea6236f69356398c06b", "4ffa885ec40e52386235b2c9cfc6dbde76a48f79", "0fe4d959d93173a42809f0a3dcf3e0c3814703cb", "b36c153be410c0d937d7583de557c0375506d15a", "2e0fdaf23e6de0d6787519f671bc198d47f3acb1", "2f1188e841a944d78373cf1eeaaf714234677e95", "07ecb03e6bf439319fa087ab79801595d8e7f331", "376ead26a0e0a87ea9a177fc683b0bedf161fbd9", "06ed109b9c92b9a87e81ec98fc75fbe0ef7ca86f", "bf00bd73b63566d123715d121f8c3da584810025", "a0285b273bd505ef19029fd60e2cdcce7928f73a", "595af06140aa95f12f79914b04467ab59e9c5edf", "d2eee77aee6ca8306f3124762864f1b7bfe54a24", "f52f343687d9ae3ddff9966a429e3cb1a4b3a5e5", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "1413dbfbbae1b59d52656db3dc48a4ee278e082f", "7134b6f6a087a8d07aebc24edf80ed6f2954e5f8", "b9e43395663f74c581982e9ca97a0d7057a0008c" ], "paperAbstract": "Given a contact network and coarse-grained diagnostic information like electronic Healthcare Reimbursement Claims (eHRC) data, can we develop efficient intervention policies to control an epidemic? Immunization is an important problem in multiple areas especially epidemiology and public health. However, most existing studies focus on developing pre-emptive strategies assuming prior epidemiological models. In practice, disease spread is usually complicated, hence assuming an underlying model may deviate from true spreading patterns, leading to possibly inaccurate interventions. Additionally, the abundance of health care surveillance data (like eHRC) makes it possible to study data-driven strategies without too many restrictive assumptions. Hence, such an approach can help public-health experts take more practical decisions. In this paper, we take into account propagation log and contact networks for controlling propagation. We formulate the novel and challenging Data-Driven Immunization problem without assuming classical epidemiological models. To solve it, we first propose an efficient sampling approach to align surveillance data with contact networks, then develop an efficient algorithm with the provably approximate guarantee for immunization. Finally, we show the effectiveness and scalability of our methods via extensive experiments on multiple datasets, and conduct case studies on nation-wide real medical surveillance data.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.71", "http://people.cs.vt.edu/badityap/papers/dataimm-icdm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18c3380550d93ae6b4101d9b06fe2b37b803dce3", "sources": [ "DBLP" ], "title": "Data-Driven Immunization", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "18c5049806ab27e4a2a7ba6c388309843d6f90ab": { "authors": [ { "ids": [ "2416629" ], "name": "Yanick Fratantonio" }, { "ids": [ "3103967" ], "name": "Chenxiong Qian" }, { "ids": [ "1969375" ], "name": "Simon P. Chung" }, { "ids": [ "1738428" ], "name": "Wenke Lee" } ], "doi": "10.1109/SP.2017.39", "doiUrl": "https://doi.org/10.1109/SP.2017.39", "entities": [ "Accessibility", "Android", "Android software development", "Application programming interface", "Credential", "Denial-of-service attack", "Feedback", "Login", "Play Store", "Software development kit", "Usability testing", "User interface" ], "id": "18c5049806ab27e4a2a7ba6c388309843d6f90ab", "inCitations": [ "4a727288433c680afbfa12ac798d3c687b91501c", "add252c8eae6d8b62737fa02dba302ac6c7279a9", "23bdeecfdfc33b6cca27d470e0a607ccda17b5cc", "3b6d6ab93e9663940a18743f16719bd7ae505b87", "23c861be0e580204bf132a9a4c380ec23ce2b7e8", "72343f0551040a77e3251f490af60c33f4c26e63", "92e37b8e837005c775019bbdd4c3151b3ba96716", "1fb399bf4122b5b25ae7784ca73f9b1be6a91cde" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "1041-1057", "journalVolume": "", "outCitations": [ "0ee2efb0bacede2eaa4516795aa5150d20e3a266", "4301542658dd07a9775ea921282b20acd3ffc446", "4d3e8e7f7073b5795f34c291e21d338699571ecb", "699334161bf2d0ca394236a2a57755e1d8d92269", "cd1fdbc5bbe453339d26951a79e3ca533920a854", "8c9c760b2079de26cf2aea31d128ff9054a3e6b1", "28516ec511b31914e1b17e4a31ff128cb24bf41e", "50749186978deefcae8e528dcf4b3c5b7d9e5ba3", "27a7497a46c9597b35d2120c224061423ff3f311", "94b244c518f431f84d2e00317709c98771a91eca", "1a6218a174f5be297cfa4a16fd52a1b814ac4261", "2eee257c63ab21ee4c56577a288b2c751f0329ca", "fc49dafcc2a0fbfffef599f9d9bc3e8cf7a143d7", "29898e452f80ba09357a2fb716c7b14d75eb3bd6" ], "paperAbstract": "The effectiveness of the Android permission system fundamentally hinges on the user's correct understanding of the capabilities of the permissions being granted. In this paper, we show that both the end-users and the security community have significantly underestimated the dangerous capabilities granted by the SYSTEM_ALERT_WINDOW and the BIND_ACCESSIBILITY_SERVICE permissions: while it is known that these are security-sensitive permissions and they have been abused individually (e.g., in UI redressing attacks, accessibility attacks), previous attacks based on these permissions rely on vanishing side-channels to time the appearance of overlay UI, cannot respond properly to user input, or make the attacks literally visible. This work, instead, uncovers several design shortcomings of the Android platform and shows how an app with these two permissions can completely control the UI feedback loop and create devastating attacks. In particular, we demonstrate how such an app can launch a variety of stealthy, powerful attacks, ranging from stealing user's login credentials and security PIN, to the silent installation of a God-mode app with all permissions enabled, leaving the victim completely unsuspecting. To make things even worse, we note that when installing an app targeting a recent Android SDK, the list of its required permissions is not shown to the user and that these attacks can be carried out without needing to lure the user to knowingly enable any permission. In fact, the SYSTEM_ALERT_WINDOW permission is automatically granted for apps installed from the Play Store and our experiment shows that it is practical to lure users to unknowingly grant the BIND_ACCESSIBILITY_SERVICE permission by abusing capabilities from the SYSTEM_ALERT_WINDOW permission. We evaluated the practicality of these attacks by performing a user study: none of the 20 human subjects that took part of the experiment even suspected they had been attacked. We also found that it is straightforward to get a proof-of-concept app requiring both permissions accepted on the official store. We responsibly disclosed our findings to Google. Unfortunately, since these problems are related to design issues, these vulnerabilities are still unaddressed. We conclude the paper by proposing a novel defense mechanism, implemented as an extension to the current Android API, which would protect Android users and developers from the threats we uncovered.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/cloak_and_dagger_-_from_two_permissions_to_complete_control.pdf", "https://www.ieee-security.org/TC/SP2017/papers/117.pdf", "https://doi.org/10.1109/SP.2017.39", "http://iisp.gatech.edu/sites/default/files/documents/ieee_sp17_cloak_and_dagger_final.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18c5049806ab27e4a2a7ba6c388309843d6f90ab", "sources": [ "DBLP" ], "title": "Cloak and Dagger: From Two Permissions to Complete Control of the UI Feedback Loop", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "18d617b1e5fd207c890fcfe8341a0193d81478d2": { "authors": [ { "ids": [ "3409894" ], "name": "Erik Derr" }, { "ids": [ "2214494" ], "name": "Sven Bugiel" }, { "ids": [ "2200198" ], "name": "Sascha Fahl" }, { "ids": [ "3224778" ], "name": "Yasemin Acar" }, { "ids": [ "1749517" ], "name": "Michael Backes" } ], "doi": "10.1145/3133956.3134059", "doiUrl": "https://doi.org/10.1145/3133956.3134059", "entities": [ "Android", "Application programming interface", "Attack surface", "Drop-in replacement", "Ecosystem", "Library", "Library (computing)", "Play Store", "Requirement", "Software versioning" ], "id": "18d617b1e5fd207c890fcfe8341a0193d81478d2", "inCitations": [], "journalName": "", "journalPages": "2187-2200", "journalVolume": "", "outCitations": [ "4d683de8c6b9450da69d31982edcb211ebd7333c", "a0b9645813181e0e3e048353f628ca32a9a461aa", "56a61eec079806d3eb32bb7d5c957aee3578083e", "46cbd8fabcb093272bb7bbe3f10bdd804c5cb7f5", "1e73c2fa2709d3210c09f19933e99b71905364ab", "7661ff0c1cdfeff8ec6344f56c512f34ee558dfa", "38ee72b61e2fdde3c2af9ef93b66b11593182605", "af1808db4997c3a90c0a1c228814cf337b0145b4", "987df05dc608fff4fbbc8f8df5b9612626162a5a", "2ec14bc3f03861e750f054727369dd0f9933eef6", "a411c30fa4acb68b309a21167554bd97632968fa", "2ed5557a5d86964444b911e236ce3f40bc32e930", "33f8f2e76d0190905c2bd3a2e611d28504fa4353", "130633f0653e6ad5766144299aa17938e7a5fca2", "771821636c87f9b338e20c35674116e1b99bade6", "50b556396ebc887461015b48ce89c572424bcedf", "0e5cbb048a6ad899ba59ef661e53ffd4ad6c29fd", "21f07abad970093f561ef60bcbbee3830643fe55", "1ccae25d620eb9fb8ac4dd323b199ee72b77774a", "4e145225ce8918b36b311c08571e3839214b0604" ], "paperAbstract": "Third-party libraries in Android apps have repeatedly been shown to be hazards to the users' privacy and an amplification of their host apps' attack surface. A particularly aggravating factor to this situation is that the libraries' version included in apps are very often outdated.\n This paper makes the first contribution towards solving the problem of library outdatedness on Android. First, we conduct a survey with 203 app developers from Google Play to retrieve first-hand information about their usage of libraries and requirements for more effective library updates. With a subsequent study of library providers' semantic versioning practices, we uncover that those providers are likely a contributing factor to the app developers' abstinence from library updates in order to avoid ostensible re-integration efforts and version incompatibilities. Further, we conduct a large-scale library updatability analysis of 1,264,118 apps to show that, based on the library API usage, 85.6% of the libraries could be upgraded by at least one version without modifying the app code, 48.2% even to the latest version. Particularly alarming are our findings that 97.8% out of 16,837 actively used library versions with a known security vulnerability could be easily fixed through a drop-in replacement of the vulnerable library with the fixed version. Based on these results, we conclude with a thorough discussion of solutions and actionable items for different actors in the app ecosystem to effectively remedy this situation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134059" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18d617b1e5fd207c890fcfe8341a0193d81478d2", "sources": [ "DBLP" ], "title": "Keep me Updated: An Empirical Study of Third-Party Library Updatability on Android", "venue": "CCS", "year": 2017 }, "18e7ac5664caece8f826e7251673fd3946e653d0": { "authors": [ { "ids": [ "3398057" ], "name": "Fahad Shaon" }, { "ids": [ "1741044" ], "name": "Murat Kantarcioglu" }, { "ids": [ "34472423" ], "name": "Zhiqiang Lin" }, { "ids": [ "1685603" ], "name": "Latifur Khan" } ], "doi": "10.1145/3133956.3134095", "doiUrl": "https://doi.org/10.1145/3133956.3134095", "entities": [ "Benchmark (computing)", "Big data", "Computation", "Data science", "Encryption", "Hardware restriction", "High-level programming language", "Information sensitivity", "MATLAB", "Machine learning", "MapReduce", "Memory-mapped I/O", "Python", "SQL", "Trusted Computing" ], "id": "18e7ac5664caece8f826e7251673fd3946e653d0", "inCitations": [ "287da0ab3c169c41433b0e5504161dfd1afbfa6c" ], "journalName": "", "journalPages": "1211-1228", "journalVolume": "", "outCitations": [ "1521d39088b203ddac981d10d214f463449ae95b", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "3254d32e65b993be1e164b77d40510597fdfd3df", "43c2eca1452dfc5c47cc091cdb4b03296d67fb08", "69fa620d332120263317fafc41298b2a3d9b67c7", "bae99f2ff6069184da7a05fd0d35188e17066b61", "20b63210954f7c5a70664f301dcd7196856ccfa7", "4beef78e9b21611a59237b63d512014e47f32d5e", "7779c10dfa1f84953016b6292844815c5faf84f5", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "441f14c7ff945b61a95488b13268162adbc30974", "0a289fd7b14345822b1acda6d82750b15d59663e", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "01d1575116b8aaacde1fd0e164a932b1ceffa04d", "1bb07c114cb447552d36a95445cc207f496d85aa", "1bd2d9fb62832737735d011154834b7c80c7e50a", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "2b98beb92b3fd808571d42d3514ff916a0a17366", "74b6cd75916b3a6d1dbc0284582968d87c941db7", "c55469197b838ec68bfbf1c0602d180bd799355b", "1c2d161c5bb15efd73311d0a3223aee773d38cca", "4af77753e00973f339fd93a27e4131047018e79c", "f9fbcd303dc1948611022f24fcca29809cc24b4d", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "eb82d3035849cd23578096462ba419b53198a556", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "25810015c2ff27803089069e393b2868343c9d98", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "55d8a79ec7ffcb95ded1531104b173a2995e45e6", "ad7f3da7a5d6a1e18cc5a176f18f52687b912fea", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "961487973d4b33f96406fddbfcf1235dc587571f", "29cc0a8802126d4e97f28109763df26ab91c6531", "6c15a1a25d4d103ed251d82c95ff4f0b38866a06", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824" ], "paperAbstract": "Recently, using secure processors for trusted computing in cloud has attracted a lot of attention. Over the past few years, efficient and secure data analytic tools (e.g., map-reduce framework, machine learning models, and SQL querying) that can be executed over encrypted data using the trusted hardware have been developed. However, these prior efforts do not provide a simple, secure and high level language based framework that is suitable for enabling generic data analytics for non-security experts who do not have concepts such as \"oblivious execution\". In this paper, we thus provide such a framework that allows data scientists to perform the data analytic tasks with secure processors using a Python/Matlab-like high level language. Our framework automatically compiles programs written in our language to optimal execution code by managing issues such as optimal data block sizes for I/O, vectorized computations to simplify much of the data processing, and optimal ordering of operations for certain tasks. Furthermore, many language constructs such as if-statements are removed so that a non-expert user is less likely to create a piece of code that may reveal sensitive information while allowing oblivious data processing (i.e., hiding access patterns). Using these design choices, we provide guarantees for efficient and secure data analytics. We show that our framework can be used to run the existing big data benchmark queries over encrypted data using the Intel SGX efficiently. Our empirical results indicate that our proposed framework is orders of magnitude faster than the general oblivious execution alternatives.", "pdfUrls": [ "https://acmccs.github.io/papers/p1211-shaonA.pdf", "http://www.utdallas.edu/~zxl111930/file/CCS17b.pdf", "http://www.utdallas.edu/~zhiqiang.lin/file/CCS17b-slides.pdf", "http://doi.acm.org/10.1145/3133956.3134095" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18e7ac5664caece8f826e7251673fd3946e653d0", "sources": [ "DBLP" ], "title": "SGX-BigMatrix: A Practical Encrypted Data Analytic Framework With Trusted Processors", "venue": "CCS", "year": 2017 }, "18ebb2bbae3dfe4ab3f592494638f33e5072c8f7": { "authors": [ { "ids": [ "20491792" ], "name": "Heehoon Kim" }, { "ids": [ "20928779" ], "name": "Hyoungwook Nam" }, { "ids": [ "34398371" ], "name": "Wookeun Jung" }, { "ids": [ "1719118" ], "name": "Jaejin Lee" } ], "doi": "10.1109/ISPASS.2017.7975270", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975270", "entities": [ "Algorithm", "Artificial neural network", "Convolution", "Convolutional neural network", "Coppersmith\u2013Winograd algorithm", "Deep learning", "Fast Fourier transform", "Graphics processing unit", "Profiling (computer programming)", "Program optimization", "Scalability", "TensorFlow", "Theano (software)", "Torch" ], "id": "18ebb2bbae3dfe4ab3f592494638f33e5072c8f7", "inCitations": [ "466f9f9c4a63c0fbc337637a1619e3411ea14c59" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "55-64", "journalVolume": "", "outCitations": [ "6074c1108997e0c1f97dc3c199323a162ffe978d", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "326d65827307862ddc3d39b84ebc662e83ff95b3", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "0b99d677883883584d9a328f6f2d54738363997a", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "0122e063ca5f0f9fb9d144d44d41421503252010", "3b2697d76f035304bfeb57f6a682224c87645065", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "3439a127e45fb763881f03ef3ec735a1db0e0ccc", "071b16f25117fb6133480c6259227d54fc2a5ea0", "0ee1916a0cb2dc7d3add086b5f1092c3d4beb38a", "008154be54eefe4734b454c2841ac66877ac8db5", "061356704ec86334dbbc073985375fe13cd39088", "58c45859350b7e9fc2dc6676e318e8f526073f5f", "04f04c43ed1ed5bfa0706ed087277ef83de7e175", "2f7ad26514bce4df6c8ebc42c90383ef3a974df4", "342f6f3124b5336fa2c84f8546c88dd3ce0e6626", "9f1f065bf08cd90431cc051267a708f56436cd82", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "6745d914ef3889f03725af6e396eeaa717321e64", "6b570069f14c7588e066f7138e1f21af59d62e61", "99ef5dbd87c0796854e72acc9f52116cd8d79b46", "a7621b4ec18719b08f3a2a444b6d37a2e20227b7" ], "paperAbstract": "Thanks to modern deep learning frameworks that exploit GPUs, convolutional neural networks (CNNs) have been greatly successful in visual recognition tasks. In this paper, we analyze the GPU performance characteristics of five popular deep learning frameworks: Caffe, CNTK, TensorFlow, Theano, and Torch in the perspective of a representative CNN model, AlexNet. Based on the characteristics obtained, we suggest possible optimization methods to increase the efficiency of CNN models built by the frameworks. We also show the GPU performance characteristics of different convolution algorithms each of which uses one of GEMM, direct convolution, FFT, and the Winograd method. We also suggest criteria to choose convolution algorithms for GPUs and methods to build efficient CNN models on GPUs. Since scaling DNNs in a multi-GPU context becomes increasingly important, we also analyze the scalability of the CNN models built by the deep learning frameworks in the multi-GPU context and their overhead. The result indicates that we can increase the speed of training the AlexNet model up to 2X by just changing options provided by the frameworks.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975270" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18ebb2bbae3dfe4ab3f592494638f33e5072c8f7", "sources": [ "DBLP" ], "title": "Performance analysis of CNN frameworks for GPUs", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "18ee712faa428760c3b2b914fc5a9285cefd1733": { "authors": [ { "ids": [ "39532649" ], "name": "Sergei Ivanov" }, { "ids": [ "22252594" ], "name": "Konstantinos Theocharidis" }, { "ids": [ "2454554" ], "name": "Manolis Terrovitis" }, { "ids": [ "1731655" ], "name": "Panagiotis Karras" } ], "doi": "10.1145/3077136.3080788", "doiUrl": "https://doi.org/10.1145/3077136.3080788", "entities": [ "Admissible heuristic", "Baseline (configuration management)", "Greedy algorithm", "Heuristic", "Meme", "NP-completeness", "Social network" ], "id": "18ee712faa428760c3b2b914fc5a9285cefd1733", "inCitations": [ "262a5a784eedb127490f44736844a33123b30fe0" ], "journalName": "", "journalPages": "565-574", "journalVolume": "", "outCitations": [ "9126e0f4244349c57a56686cbd7f5d6890f7982e", "9f1a08eed809b2cfc38e3e34cb1c9fd567c31abb", "83246561a6374698fa6d6527b494078891fc3c75", "36c0b81a2ef2505e5c1c763c1abc25cdd72903f2", "084419435227a5bbf54d47ba54e11739756510fd", "3a33073e111f690c0fc6e1fad90b6018fa84e793", "add9dbf427b6c1e5160df87b02620f6a55764f21", "480b1e3419d38363604016a2746f3f9b3bdc7de6", "c22b6f9e37e14dcd42c745d3935b42455b8b3c7a", "1b0e39ba26279ddabc1a72fb8541473c00a0282f", "d7f9c3253552e13f24c3b73bc055ef60388af57c", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "c3920c6c06a70b11cbec596b22f3462ab5cb9d0b", "1e63600b5906a6e18c2b9540155b9adb85c4d437", "391919c8a42afe6daa7ea6e32b20021f4e7dd27c", "063ac7f23c65bd97633d4f6bf4c31eb70879aa7c", "e4dc2a1f431bcaa0798a1fea658a9c491a465d3a", "0fedb3f3459bb591c3c2e30beb7dd0a9d3af3bcc", "ad2b00d59d322795a90c13c0171b544bdaf7658b", "72465eb427490619a5a625a45dda81c92a8cfe14", "1f559f2eb174d05a912b2ec39a48eadfd0160b74", "6780823d309a4a96fd4fcab53544bb8724bf461b", "abb152802d5b4686a394e221abe951187ea06158", "e9d426d80f4048aee2e19bd939e3fdb32de0768d", "50ef0cbcc318ec98c7d6132b9c539e9cf41db010", "53bd06b86a3e2c510089fdd820e2a07d4d1dabbd", "fa4e93dbcdd1e4f20db9aeb34699a414420b7fd9", "618974de0768c4097126cb0a11f48c552b5616f0" ], "paperAbstract": "How do we create content that will become viral in a whole network after we share it with friends or followers' Significant research activity has been dedicated to the problem of strategically selecting a seed set of initial adopters so as to maximize a meme's spread in a network. This line of work assumes that the success of such a campaign depends solely on the choice of a tunable seed set of adopters, while the way users perceive the propagated meme is fixed. Yet, in many real-world settings, the opposite holds: a meme's propagation depends on users' perceptions of its tunable characteristics, while the set of initiators is fixed.\n In this paper, we address the natural problem that arises in such circumstances: Suggest content, expressed as a limited set of attributes, for a creative promotion campaign that starts out from a given seed set of initiators, so as to maximize its expected spread over a social network. To our knowledge, no previous work addresses this problem. We find that the problem is NP-hard and inapproximable. As a tight approximation guarantee is not admissible, we design an efficient heuristic, Explore-Update, as well as a conventional Greedy solution. Our experimental evaluation demonstrates that Explore-Update selects near-optimal attribute sets with real data, achieves 30% higher spread than baselines, and runs an order of magnitude faster than the Greedy solution.", "pdfUrls": [ "http://vbn.aau.dk/files/259120945/crvsi.pdf", "http://doi.acm.org/10.1145/3077136.3080788" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/18ee712faa428760c3b2b914fc5a9285cefd1733", "sources": [ "DBLP" ], "title": "Content Recommendation for Viral Social Influence", "venue": "SIGIR", "year": 2017 }, "19155ab6b3f8c6fe61491ce7373af2937bb724ea": { "authors": [ { "ids": [ "2041092" ], "name": "Andrew J. Younge" }, { "ids": [ "1785427" ], "name": "Kevin T. Pedretti" }, { "ids": [ "3021644" ], "name": "Ryan E. Grant" }, { "ids": [ "2960403" ], "name": "Brian L. Gaines" }, { "ids": [ "1705033" ], "name": "Ron Brightwell" } ], "doi": "10.1109/CLUSTER.2017.92", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.92", "entities": [ "Algorithms for Recovery and Isolation Exploiting Semantics", "Apache Spark", "Assistive technology", "Big data", "Bridging (networking)", "Compute Node Linux", "Distributed computing", "Ecosystem", "Emulator", "Goodyear MPP", "Hypervisor", "IBM WebSphere eXtreme Scale", "Linux", "Network emulation", "Scalability", "Simulation", "Software ecosystem", "Stack (abstract data type)", "Supercomputer", "Testbed", "libvirt" ], "id": "19155ab6b3f8c6fe61491ce7373af2937bb724ea", "inCitations": [ "3a7bb4582ac10f4b905e11be0d7bbce18b97ca88", "bb730419a3042f91d6293452e6ddfebe3f287456" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "310-321", "journalVolume": "", "outCitations": [ "0882f38681cc3fe2567f326bcf98e2ba48db0374", "77d97e17c7129a810d14fb8dfd17fa4ca07e18bc", "067c7857753e21e7317b556c86e30be60aa7cac0", "1e2207b8f1b2ce2f09d2d8bfa5e01324a5d4effa", "16aed35e6b4649369e06956b987d5563a02fa13a", "48abc9afae81402aa64992cea7bfd3e5b7a9946d", "2ab305079385594badd4233ebb9512d52ecaccfb", "beb2db2079bbac605fe37d20c530756ebb032830", "0d6787f19c7a521784a38d31420dd8da7bd490ef", "3784b73a1f392160523400ec0309191c0a96d86f", "6cdc292cd1674a5348789e3e6ecee239ccc940c8", "515392cf93c50e1ba13f78bfe075b23a889e41c0", "88435a01f7623d35501769ce9614f59aeb026d4e", "26b45b0df49e63d612a3a8ab3c89fcf53a343111", "daeff61502115efc4b9ee81607a8e5489215ea88", "3a46c11ad7afed8defbb368e478dbf94c24f43a3", "8964497eef0b88462213f152a776d260388cff36", "fbdaaccb307bc084340edd4856574236f9150399", "9721372966553fac834a493a0328e5918671a04f", "e87cba48ff23979930a8b13ec5bedb3283d1f629", "e81bb2d36b5e42b6b95c88c1086b04d38315629b", "6a53a2c03479e6bd59b37ab141bcecd3d59a9e91", "24251f02c34f32b1dd96572a1d984c4463a26a10", "ce619391718745f65f7ee9c09a862898d65ef464", "45e2dd9fe949025ff7f82d888e5be8693dbd317d", "7f6aea7cff8f079b8a7a79fd8fc2a4286f24a8ef", "9c0b4daa08cf295de210757dbe3c4861b3618893", "b04391910d19d2d0c64b62d300927f527417414e", "1147568f37d522a92fb77cbcb565a7e4519bd2b6", "4e8b68b2285eb126a964a9221703d5db0e608ca0", "80135b0552efcf47a2dc9eccbc885d4904b89a83", "26b5fb803c43f1c650809049cbec1795cc4a6e94", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "4be228917846a218ba00d30b42d709a11b7a5311", "1b67c5212fc54a597fa77314f604d3d5ad20d5af", "0558c94a094158ecd64f0d5014d3d9668054fb97", "8821f922d35d377305941190bdea41a3ce841102" ], "paperAbstract": "While large-scale simulations have been the hallmark of the High Performance Computing (HPC) community for decades, Large Scale Data Analytics (LSDA) workloads are gaining attention within the scientific community not only as a processing component to large HPC simulations, but also as standalone scientific tools for knowledge discovery. With the path towards Exascale, new HPC runtime systems are also emerging in a way that differs from classical distributed computing models. However, system software for such capabilities on the latest extreme-scale DOE supercomputing needs to be enhanced to more appropriately support these types of emerging software ecosystems.In this paper, we propose the use of Virtual Clusters on advanced supercomputing resources to enable systems to support not only HPC workloads, but also emerging big data stacks. Specifically, we have deployed the KVM hypervisor within Cray's Compute Node Linux on a XC-series supercomputer testbed. We also use libvirt and QEMU to manage and provision VMs directly on compute nodes, leveraging Ethernet-over-Aries network emulation. To our knowledge, this is the first known use of KVM on a true MPP supercomputer. We investigate the overhead our solution using HPC benchmarks, both evaluating single-node performance as well as weak scaling of a 32-node virtual cluster. Overall, we find single node performance of our solution using KVM on a Cray is very efficient with near-native performance. However overhead increases by up to 20% as virtual cluster size increases, due to limitations of the Ethernet-over-Aries bridged network. Furthermore, we deploy Apache Spark with large data analysis workloads in a Virtual Cluster, effectively demonstrating how diverse software ecosystems can be supported by High Performance Virtual Clusters.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.92" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19155ab6b3f8c6fe61491ce7373af2937bb724ea", "sources": [ "DBLP" ], "title": "Enabling Diverse Software Stacks on Supercomputers Using High Performance Virtual Clusters", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "191c5dcbf4b8cdc0b2dd423b84141cd9bf312217": { "authors": [ { "ids": [ "2686490" ], "name": "William Mansky" }, { "ids": [ "1804502" ], "name": "Andrew W. Appel" }, { "ids": [ "2371051" ], "name": "Aleksey Nogin" } ], "doi": "10.1145/3133911", "doiUrl": "https://doi.org/10.1145/3133911", "entities": [ "Communications protocol", "Coq (software)", "Correctness (computer science)", "Inter-process communication", "Liveness", "Non-blocking algorithm", "Parallel random-access machine", "Toolchain", "Verification and validation", "Write buffer" ], "id": "191c5dcbf4b8cdc0b2dd423b84141cd9bf312217", "inCitations": [ "052b689c67deee523916655f7d464a48e2ec5cc4", "3c874433b330676b693ffe45fedd9d2d10b0b767" ], "journalName": "PACMPL", "journalPages": "87:1-87:28", "journalVolume": "1", "outCitations": [ "35a77402e35c5fed4bae743e33cd68375f373d05", "d45eaee8b2e047306329e5dbfc954e6dd318ca1e", "3a2df802b68c1d1464d442cb1ec973ef93ce69a0", "219a164b4c9adea1262e7166d6ddcace1238d948", "57af923f720b261e7c88a86267997033759457d7", "9c3841de1c500d146c36aaf7f34b32f4db1988cd", "358f17d968420850dde44d6bef11b5e7e5db5b76", "a0a01f3515061d06dca3f991114b3c7f219279de", "987adbbb4b5baff729cf3907d7f05a86e8651849", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "256bdc6422830144641523773e158be09494b2b6", "1676007d4035795eba2c926f2200973f953bc0f6", "430ce88f430d22d131ca3f753dd576c61f7cced3", "36f697a128d80d164752c4a0c42aae2c66723342", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "a68dbb03f8a6405a9af3738209ff0223b7eda50f", "21e51da40ab080ca2b71ad36094e2b686008b6cc", "44a74c602f7a1410c33ddf4ac91d3cbe7534b18c" ], "paperAbstract": "We present a concurrent-read exclusive-write buffer system with strong correctness and security properties. Our motivating application for this system is the distribution of sensor values in a multicomponent vehicle-control system, where some components are unverified and possibly malicious, and other components are vehicle-control-critical and must be verified. Valid participants are guaranteed correct communication (i.e., the writer is always able to write to an unused buffer, and readers always read the most recently published value), while invalid readers or writers cannot compromise the correctness or liveness of valid participants. There is only one writer, all operations are wait-free, and there is no extra process or thread mediating communication. We prove the correctness of the system with valid participants by formally verifying a C implementation of the system in Coq, using the Verified Software Toolchain extended with an atomic exchange operation. The result is the first C-level mechanized verification of a nonblocking communication protocol.", "pdfUrls": [ "http://www.cs.princeton.edu/~wmansky/messaging.pdf", "http://doi.acm.org/10.1145/3133911" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/191c5dcbf4b8cdc0b2dd423b84141cd9bf312217", "sources": [ "DBLP" ], "title": "A verified messaging system", "venue": "PACMPL", "year": 2017 }, "193342874858249aed4796cee35a8bec1b70e236": { "authors": [ { "ids": [ "1708020" ], "name": "Qi Huang" }, { "ids": [ "35432574" ], "name": "Petchean Ang" }, { "ids": [ "2507366" ], "name": "Peter Knowles" }, { "ids": [ "2482422" ], "name": "Tomasz Nykiel" }, { "ids": [ "26964617" ], "name": "Iaroslav Tverdokhlib" }, { "ids": [ "26910300" ], "name": "Amit Yajurvedi" }, { "ids": [ "26937490" ], "name": "Paul Dapolito IV" }, { "ids": [ "27045165" ], "name": "Xifan Yan" }, { "ids": [ "5226966" ], "name": "Maxim Bykov" }, { "ids": [ "2474891" ], "name": "Chuen Liang" }, { "ids": [ "1815047" ], "name": "Mohit Talwar" }, { "ids": [ "2446332" ], "name": "Abhishek Mathur" }, { "ids": [ "1687593" ], "name": "Sachin Kulkarni" }, { "ids": [ "5006316" ], "name": "Matthew Burke" }, { "ids": [ "2665531" ], "name": "Wyatt Lloyd" } ], "doi": "10.1145/3132747.3132775", "doiUrl": "https://doi.org/10.1145/3132747.3132775", "entities": [ "Download", "Manufacturing execution system", "Programming model", "Requirement", "Streaming media", "Upload", "Video processing" ], "id": "193342874858249aed4796cee35a8bec1b70e236", "inCitations": [ "40dca29aea76ae426791e4c6bf0e24f3ae88e318", "957e98a2084f6c2d22694aadd22f57070b5d7e23" ], "journalName": "", "journalPages": "87-103", "journalVolume": "", "outCitations": [ "5208060771fd213eefd827e3e1260b939f1aed6d", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "3168681722207c86827e596860115a2977ce761f", "09c5293b647fca40fde28ac6c38737f07e873e41", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "13c27125584651329f66461981cbb20fa63e9023", "1ec3c93bf22e22f76dcf978fba7764f3f0696a82", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "396514fb219879a4a18762cddfae2a6a607f439f", "332f77fd05703c1607e3b57884ad31fb1fad0104", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "0608d9937c074520cdc93cc444cc1c77039c5332", "3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e", "e847c3ec130da57328db79a7fea794b07dbccdd9", "806df190aff153c1f1091fe3bba941b8a6c0db9c", "411eb6534d39a37ed43443ba1d2e168c73171330", "0558c94a094158ecd64f0d5014d3d9668054fb97", "0ef1dd03db41de69165075562a051021a186c230", "37601bb6e655f2392ba1ca2086da0d1e03e19edc", "624ab00ed715888d15e42246cb3c87fd13123082", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "41e71c53ca2a7be0ba90919af8f3049d957e665e", "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "0541d5338adc48276b3b8cd3a141d799e2d40150", "07c6107c3e38c49729799054ca24586c905caa67", "5e36511b8cab586d69047adfb03971380c17d427", "0a6103c182ede82da9e726b1b50d8a2383bf418a" ], "paperAbstract": "Videos are an increasingly utilized part of the experience of the billions of people that use Facebook. These videos must be uploaded and processed before they can be shared and downloaded. Uploading and processing videos at our scale, and across our many applications, brings three key requirements: low latency to support interactive applications; a flexible programming model for application developers that is simple to program, enables efficient processing, and improves reliability; and robustness to faults and overload. This paper describes the evolution from our initial monolithic encoding script (MES) system to our current Streaming Video Engine (SVE) that overcomes each of the challenges. SVE has been in production since the fall of 2015, provides lower latency than MES, supports many diverse video applications, and has proven to be reliable despite faults and overload.", "pdfUrls": [ "http://www.cs.princeton.edu/~wlloyd/papers/sve-sosp17.pdf", "http://www.cs.princeton.edu/~wlloyd/papers/sve-sosp17-talk-public.pdf", "http://doi.acm.org/10.1145/3132747.3132775" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/193342874858249aed4796cee35a8bec1b70e236", "sources": [ "DBLP" ], "title": "SVE: Distributed Video Processing at Facebook Scale", "venue": "SOSP", "year": 2017 }, "195f2d9d2e6282b31ffd320c2ceda9661d806927": { "authors": [ { "ids": [ "38436509" ], "name": "Matthew Green" }, { "ids": [ "2679804" ], "name": "Ian Miers" } ], "doi": "10.1145/3133956.3134093", "doiUrl": "https://doi.org/10.1145/3133956.3134093", "entities": [ "Bitcoin", "Micropayment", "Scalability", "Zcash" ], "id": "195f2d9d2e6282b31ffd320c2ceda9661d806927", "inCitations": [ "0aacfddf9cb22e661302ec77cc251ccafd5f8c71", "ac72566bbc7628255002a70ca5bec0874929eba4", "6db9824d4667b22310c51fe638403238f873e9f2", "7a2918f9f0192e9a83c46c1ee58742dd6bd98b87", "4e98a780beee9546e698bd393cf0936b32687de9", "dd0d40e7686d8040a978f749645da5b8fb39df8d", "52663a63d1aa3634cb004e44b5d7b1ea91cbe021", "5f80c78259e53442c59b870fb4f4727b67f540ff", "26ab9c27d995dadd553614045361ffb1afba9008", "20a6be5cfb29390059cc8a772f4fb9c775ef7242", "465ad8344d883f205b80f5f5bd51cd7cdc31fa75" ], "journalName": "", "journalPages": "473-489", "journalVolume": "", "outCitations": [ "0ee8b7fa83098e69fce0a26fdb1f623f6fb41dec", "10336cdef674893f41bf4824d44c4156be5e9ca2", "beeecc5cda7ef949e5bf00a6b4404bc58853c484", "0d5f7a1825bae713cebd66d121d5b01e31d8adab", "764e89025d68eda8010732285add5a4296f4e0ae", "0658394f2f6d0a4fcacdc92a33ce68c73bd4ebf3", "33853565b4dcad38b9b79091a48d3f40409f06d7", "19bab496d5d7f60d3e5b9217739b9cf7fedaf44b", "8c5e81a2badc7ed7c03914a8c12773084a96155a", "e67410d5ef6a064afd20d93650f39129d00f1a32", "93bae7155092c8ba1ae1c4ad9f30ae1b7c829dd7", "049e2c54fe8a35cd941937ba592e07bbc2dda591", "4a97ee94fe2646e0535377204b76864022ed395a", "011d714a361b8ceb925c18e4a214e22aa5f899d8", "090b43cddc8f68c2e157537e5ab4f2dcd94978ee", "14ee6a52b24d2f6160865871284421a2fbcbb497", "2b26cf1eab78d9f9c687073af36769e72aa3cc8b", "26ab9c27d995dadd553614045361ffb1afba9008", "0df07fc5f4e09f5c9030efecf359d989d81fa36d", "51b27a41ca1a33445a1041fcea84341fcf0b8c4c", "6238b6c8770314de535332ab61f6cb426643cf0c", "0cf21a6c9888ec25a43fa17edbe8a3cfb28238a3", "026848a58faaa17a393a50d703257cc2496659ad", "28fe42c5e8cc7cfd5a8f5394dd972196cff15fd1", "543a5619fe880ddbc85200075e4d13e2fcfe9b0d", "63231a5d55decb623aeb441b707cf2fb943d485b", "040e5e01d1d963ca70181e5857931c91b3b9c4d2", "11ef405a5ef00e402fe2f0d265f2fada864f02ad", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "0a08a6f2017717baca895cd5cd78383df97c93d6", "2a531c6f67c2dc3be1fcfe5a536d71de8851c5cc" ], "paperAbstract": "Bitcoin owes its success to the fact that transactions are transparently recorded in the blockchain, a global public ledger that removes the need for trusted parties. Unfortunately, recording every transaction in the blockchain causes privacy, latency, and scalability issues. Building on recent proposals for \"micropayment channels\" --- two party associations that use the ledger only for dispute resolution --- we introduce techniques for constructing anonymous payment channels. Our proposals allow for secure, instantaneous and private payments that substantially reduce the storage burden on the payment network. Specifically, we introduce three channel proposals, including a technique that allows payments via untrusted intermediaries. We build a concrete implementation of our scheme and show that it can be deployed via a soft fork to existing anonymous currencies such as ZCash.", "pdfUrls": [ "http://eprint.iacr.org/2016/701.pdf", "http://doi.acm.org/10.1145/3133956.3134093", "http://cs.jhu.edu/~imiers/pdfs/bolt.pdf", "http://diyhpl.us/~bryan/papers2/bitcoin/Bolt:%20Anonymous%20payment%20channels%20for%20decentralized%20currencies%20-%202016.pdf", "http://eprint.iacr.org/2016/701", "https://acmccs.github.io/papers/p473-greenA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/195f2d9d2e6282b31ffd320c2ceda9661d806927", "sources": [ "DBLP" ], "title": "Bolt: Anonymous Payment Channels for Decentralized Currencies", "venue": "CCS", "year": 2016 }, "1967c372ec0c483cecfc720b43ffc35ba690ead9": { "authors": [ { "ids": [ "2138705" ], "name": "Langshi Chen" }, { "ids": [ "40215595" ], "name": "Bo Peng" }, { "ids": [ "1745132" ], "name": "Bingjing Zhang" }, { "ids": [ "15460867" ], "name": "Tony Liu" }, { "ids": [ "3429259" ], "name": "Yiming Zou" }, { "ids": [ "40201553" ], "name": "Lei Jiang" }, { "ids": [ "1960393" ], "name": "Robert Henschel" }, { "ids": [ "1709841" ], "name": "Craig A. Stewart" }, { "ids": [ "40521852" ], "name": "Zhang Zhang" }, { "ids": [ "4646807" ], "name": "Emily McCallum" }, { "ids": [ "25100533" ], "name": "Zahniser Tom" }, { "ids": [ "31862326" ], "name": "Jon Omer" }, { "ids": [ "36199155" ], "name": "Judy Qiu" } ], "doi": "10.1109/CLOUD.2017.19", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.19", "entities": [ "Algorithm", "Apache Hadoop", "Big data", "Central processing unit", "Computation", "HARP", "Java", "Knights", "Machine learning", "Manycore processor", "MapReduce", "Memory bandwidth", "Microarchitecture", "Multi-core processor", "Plug-in (computing)", "Scalability", "Speedup", "Xeon Phi" ], "id": "1967c372ec0c483cecfc720b43ffc35ba690ead9", "inCitations": [ "0df834569840f5a16502d2ec5680db42d68874a2" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "82-89", "journalVolume": "", "outCitations": [ "3784b73a1f392160523400ec0309191c0a96d86f", "4e8e3e40a25fba903f40246705c3beb3c122f523", "0512c81c16f328ed7c01ea90dd971d6d2c1d5d83", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "9aa88a8a354f1d322e242376d27d0474e50252f8", "37355bae823c553f2db5311c1c6aaeab3b53675e", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "0c1d559b1d48fb706f8b73d69e951273fc0ed93b", "90dc80e1844ff4d3e2b5f73d3b01264c7b19a218", "6dfb7cba3ab7d5090b88cf23865c76742473e381" ], "paperAbstract": "Data analytics is undergoing a revolution in many scientific domains, and demands cost-effective parallel data analysis techniques. Traditional Java-based Big Data processing tools like Hadoop MapReduce are designed for commodity CPUs. In contrast, emerging manycore processors like the Xeon Phi have an order of magnitude greater computation power and memory bandwidth. To harness their computing capabilities, we propose the Harp-DAAL framework. We show that enhanced versions of MapReduce can be replaced by Harp, a Hadoop plug-in, that offers useful data abstractions for both high-performance iterative computation and MPI-quality communication, as well as drive Intel's native DAAL library. We select a subset of three machine learning algorithms and implement them within Harp-DAAL. Our scalability benchmarks ran on Knights Landing (KNL) clusters and achieved up to 2.5 times speedup of performance over the HPC solution in NOMAD and 15 to 40 times speedup over Java-based solutions in Spark. We further quantify the workloads on single node KNL with a performance breakdown at the micro-architecture level.", "pdfUrls": [ "http://dsc.soic.indiana.edu/presentations/Qiu_Harp_DAAL_June_27_2017.pdf", "http://dsc.soic.indiana.edu/publications/2017CLOUDResearchTrack_12261.pdf", "https://doi.org/10.1109/CLOUD.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1967c372ec0c483cecfc720b43ffc35ba690ead9", "sources": [ "DBLP" ], "title": "Benchmarking Harp-DAAL: High Performance Hadoop on KNL Clusters", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "196df26297e68c9aa03cb771d7008ce71ef43b19": { "authors": [ { "ids": [ "1686383" ], "name": "Haya Shulman" }, { "ids": [ "1803045" ], "name": "Michael Waidner" } ], "doi": "", "doiUrl": "", "entities": [ "Domain Name System Security Extensions" ], "id": "196df26297e68c9aa03cb771d7008ce71ef43b19", "inCitations": [ "1f10d74e409c46b50735729b6cd0c3b621bb62d5", "9c66d83a5700e0a6b0bdc1f48d8db5774ea6f6ac" ], "journalName": "", "journalPages": "131-144", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/shulman" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/196df26297e68c9aa03cb771d7008ce71ef43b19", "sources": [ "DBLP" ], "title": "One Key to Sign Them All Considered Vulnerable: Evaluation of DNSSEC in the Internet", "venue": "NSDI", "year": 2017 }, "19801df1bf64c24aa244af4686cc6829d5dcd48b": { "authors": [ { "ids": [ "2586552" ], "name": "Akash Jain" }, { "ids": [ "1696918" ], "name": "Rupesh Nasre" }, { "ids": [ "1723632" ], "name": "Balaraman Ravindran" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.19", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.19", "entities": [ "Algorithm", "Complex network", "Distributed firewall", "Heuristic", "Jaccard index", "Louvain Modularity", "Mutual information", "Non-maskable interrupt", "Recommender system", "Run time (program lifecycle phase)", "Scalability", "Scoring functions for docking", "Social network" ], "id": "19801df1bf64c24aa244af4686cc6829d5dcd48b", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "146-153", "journalVolume": "", "outCitations": [ "f9fd4f4945a623d348ebfc8da5d15c9a29fda722", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "0e85096efe34ac24f5212ce34434a828cc00e5d2", "5b72cf570bfcc84cb03a9e310e680363373565cf", "70954d2477d08afa838e827459df0e3ca5882912", "b18f1c22922c2c20f85bd2380fe73292b3819e3d", "586414efa54ba9f4a7def0dc5322b7723f22c552", "0558c94a094158ecd64f0d5014d3d9668054fb97", "2d1d0ee6e21c288d96577b24656cd3398082f857", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "16c83e37be3a423a8eefaa483e7a4cffe8cd3a70", "d66d8d5ec4c6e1f8cbd9fad4e46dbf92726b56dc", "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "977ff4a6e3730acd5e5dc6f7a45fb5ad8e26876d", "4cd7090ff070c5b5376046437577195ef311343b", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "3471292314a029558d4f4577db383c9908e94ac7", "295b9a93d06eb3ba4f8218f1a325770f93be73cc", "0ad8e89091eed09217e66adc98136126addc2619", "1156f60e40548096df49528b1342bb3e88b0f378" ], "paperAbstract": "Community detection in complex networks has a wide range of applications such as detection of cyber-communities in social networks, recommendations based on the interest group, and estimating hidden features in a social network. In distributed frameworks, the primary focus has been scalability. However, the accuracy of the algorithm's output is also critical. We propose the first distributed community detection algorithm based on the state-of-the-art CEIL scoring function. Our algorithm, named DCEIL, is fast, scalable and maintains the quality of communities. DCEIL outperforms the existing state-of-the-art distributed Louvain algorithm by 180% on an average in Normalized Mutual Information (NMI) Index and 6.61% on an average in Jaccard Index metrics. DCEIL completes execution for 1 billion edges within 112 minutes and outperforms state-of-the-art distributed Louvain algorithm by 4.3 ×. DCEIL critically exploits three novel heuristics which address the existing issues with distributed community detection algorithms that have the hierarchical structure of CEIL or Louvain methods. Further, our proposed heuristics are generic as well as efficient, and we illustrate their efficacy by enhancing the accuracy of distributed Louvain algorithm by 22.91% on an average in Jaccard Index, and the average execution time by 1.68 × over popular datasets.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19801df1bf64c24aa244af4686cc6829d5dcd48b", "sources": [ "DBLP" ], "title": "DCEIL: Distributed Community Detection with the CEIL Score", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "19aeb06b3ba5b454fc462254c178acdf233d955b": { "authors": [ { "ids": [ "34910126" ], "name": "Ben Fisch" }, { "ids": [ "2652991" ], "name": "Dhinakaran Vinayagamurthy" }, { "ids": [ "1752788" ], "name": "Dan Boneh" }, { "ids": [ "33439215" ], "name": "Sergey Gorbunov" } ], "doi": "10.1145/3133956.3134106", "doiUrl": "https://doi.org/10.1145/3133956.3134106", "entities": [ "Cryptography", "Encryption", "Functional encryption", "Plaintext", "Provable security" ], "id": "19aeb06b3ba5b454fc462254c178acdf233d955b", "inCitations": [ "f234f428eb552b94435683e7e784e805c201d309", "8b338e925ae623adbc4cf387d3dd7ccf839d66e6", "53f18a9a84c41ff532302166f4456856f3711830", "71365b1bd36e3923a35d708095d6f4fd6826b2e6", "b02c7f4f8ad474c05e8772b59ad57d63d2ad4bfa", "80621d09c3d3dd896c7e2bff083b9e702dc2ed29" ], "journalName": "", "journalPages": "765-782", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "25810015c2ff27803089069e393b2868343c9d98", "accf76de89308b2fc5f50d656e5d142557c1fc5d", "b8ee04a7e0896777cb727a3e75f04c522932634c", "2065450d96aca38c79cad5172b58660765533650", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "07774640c97f039bc9df9f01621709df1b3ab2db", "43c2eca1452dfc5c47cc091cdb4b03296d67fb08", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "8a37efc82e54353d387cfb073f9379c053988aef", "39e042b4a1ffa5818ccb4783008bab297145b697", "01d317affb6b1d57d25d4f6b39b493e03226afc4", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "136a5e50af7f75aca1a28db1a337bacc2cbdde5c", "1b82a2619e4500b908bf89a8a140cd4631b87e52", "0a289fd7b14345822b1acda6d82750b15d59663e", "4a9a45f2a3263ba1690cd6263285239d44bd8eea", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "3507821c37bfe3ca4e3cdd446f1ea9d3fe839851", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "6c15a1a25d4d103ed251d82c95ff4f0b38866a06", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "72880d15db2282512e5d3f0a3796b397d68cc7db", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "12b06d1555b07926b5691aabd6308ef3b452f53a", "42333e3f231bbfe508f6da6bad2feff9ae223113", "386c5e8f9e2f289c5c1df458e9043c04475cfdc5", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "1b80ae882afb809686f20765e4a42a5b99aa55de", "0003c342fd0b3e48a483901bd3b731b974fc1f37" ], "paperAbstract": "Functional encryption (FE) is an extremely powerful cryptographic mechanism that lets an authorized entity compute on encrypted data, and learn the results in the clear. However, all current cryptographic instantiations for general FE are too impractical to be implemented. We construct IRON, a provably secure, and practical FE system using Intel's recent Software Guard Extensions (SGX). We show that IRON can be applied to complex functionalities, and even for simple functions, outperforms the best known cryptographic schemes. We argue security by modeling FE in the context of hardware elements, and prove that IRON satisfies the security model.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134106", "https://eprint.iacr.org/2016/1071.pdf", "https://acmccs.github.io/papers/p765-fischA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19aeb06b3ba5b454fc462254c178acdf233d955b", "sources": [ "DBLP" ], "title": "IRON: Functional Encryption using Intel SGX", "venue": "CCS", "year": 2017 }, "19b2bdef27eca52df623a86ec39e9fc64637f77b": { "authors": [ { "ids": [ "2898845" ], "name": "Nachshon Cohen" }, { "ids": [ "2084183" ], "name": "Arie Tal" }, { "ids": [ "2210090" ], "name": "Erez Petrank" } ], "doi": "10.1145/3018743.3018753", "doiUrl": "https://doi.org/10.1145/3018743.3018753", "entities": [ "Data structure", "Dynamic data", "Lock (computer science)", "Multi-core processor", "Overhead (computing)", "Search tree" ], "id": "19b2bdef27eca52df623a86ec39e9fc64637f77b", "inCitations": [], "journalName": "", "journalPages": "17-29", "journalVolume": "", "outCitations": [ "1e74d5dcce5724091e7408ce9cc55bd7d54a34a7", "15b9cea4970ca2bf6bde3f54269f75e1ebda8bb5", "1cb0679ae82be093268747da0f634281ea6a41df", "8085460933105498577e741a02185c0097e36711", "06e3991f0aa199fe6ff9334659f93e81b04c78fd", "38611b424808954be2c1375da1a873b1e2487ace", "b22d2d7234fa30a5cc22f2db253fa42616739d4e", "1c870bf4b34aa96f04da771c8064cd415463f7f9", "97c649dc68ad8818c7e2b7f75b9c164aa840f6a5", "78e47b768c784fcb15004bab48e24f80fdad579e", "567ea8bf671735f120faa615ea6313faaaf07084", "2651c9bd8848fbe8955325fed39b038be32b988e", "1e113c73209f601cf34bb64445422db2e6d9dc67", "686d18c410900725a51e7972aafe41287bdf51fc", "3795b4928d3ed0750071fbef8c0ac8d5f094c97a", "00b3ebd315991e5b5f4e6beec2e1488281368028", "500adfb955f443c9fb0b8a44a5a03887fa4e9729", "2900690eb3132a4d1536226d629727de41f38a66", "b32facdc3cdfedb0d5ecf65d8890cb8b584684d7", "22a3110123362412f91ae44c2b15e2234324f6fd", "07ef40275b9fe6dfe290d5ad61456666e49d2eb5", "d82fde923093716dba6a723b984f7f4e57e503f8", "2ea25f6ba6e9758e25514e3a6b89fe968bfd6707", "18757558dac21007e01f553b2817528c28734061", "12d362cbe5bb546a075784430ec2c420af1fe8cb", "217d408f60f749aab6705ff3056b8e77640f2948", "2c968749e04fc16908fc543e4468f945a5c695da", "17f6abba7bbf3b894a758b8aba69a41edde0adc2", "c62c98b1240c0d5102a7826dea40ab54e4895aa9", "ac35455b128baf4e280f2571160c242b67b3f85e", "58da996efd7320d1e484263c97c930c8979c474f", "98ea8b57c429e896a30005581d617e5d77ac7826", "1031dd14412b59a28527e005f078f470b5b04dc0", "942f2a6df29234c304b69129872835d60cf5e9e9", "6075f8bfc541841270f223de64b577e17a748b75", "6187dbba5499565feb38d65292578275933b86e8" ], "paperAbstract": "Data-structures can benefit from dynamic data layout modifications when the size or the shape of the data structure changes during the execution, or when different phases in the program execute different workloads. However, in a modern multi-core environment, layout modifications involve costly synchronization overhead. In this paper we propose a novel layout lock that incurs a negligible overhead for reads and a small overhead for updates of the data structure. We then demonstrate the benefits of layout changes and also the advantages of the layout lock as its supporting synchronization mechanism for two data structures. In particular, we propose a concurrent binary search tree, and a concurrent array set, that benefit from concurrent layout modifications using the proposed layout lock. Experience demonstrates performance advantages and integration simplicity.", "pdfUrls": [ "http://www.cs.technion.ac.il/~erez/Papers/ctp-ppopp17.pdf", "http://dl.acm.org/citation.cfm?id=3018753" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19b2bdef27eca52df623a86ec39e9fc64637f77b", "sources": [ "DBLP" ], "title": "Layout Lock: A Scalable Locking Paradigm for Concurrent Data Layout Modifications", "venue": "PPOPP", "year": 2017 }, "19b4cabd5e6b03c41d62f86ae8ef99b9ffb69820": { "authors": [ { "ids": [ "36608964" ], "name": "Michael Sevilla" }, { "ids": [ "34313805" ], "name": "Noah Watkins" }, { "ids": [ "2210623" ], "name": "Ivo Jimenez" }, { "ids": [ "3064226" ], "name": "Peter Alvaro" }, { "ids": [ "38470159" ], "name": "Shel Finkelstein" }, { "ids": [ "40559862" ], "name": "Jeff LeFevre" }, { "ids": [ "3198700" ], "name": "Carlos Maltzahn" } ], "doi": "10.1145/3064176.3064208", "doiUrl": "https://doi.org/10.1145/3064176.3064208", "entities": [ "Computer data storage", "Hardening (computing)", "Load balancing (computing)", "Log-structured file system", "Program optimization", "Throughput", "User interface" ], "id": "19b4cabd5e6b03c41d62f86ae8ef99b9ffb69820", "inCitations": [ "44eda071f81de708fc849f08b4ace67549d3cb5c" ], "journalName": "", "journalPages": "175-190", "journalVolume": "", "outCitations": [ "0599ba259341963bf8abf2818c874713e570a039", "5f3f9223c5c9f896be099bc177929febad508407", "458902c0a4b5e9855c8a4be9eeb4cb4ce534b068", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "0f04a0b658f00f329687d8ba94d9fca25269b4b7", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "55e62284924ce63b0fcccf102a3551ae7396a145", "bce1901805ec6e07993cb248bd1a9279c1800971", "b20ea5b9e2eaea4541586c84ef45fc0b02e6e627", "4fbe8c8ace7546e3a10bfd8e151bc09a41fd3f9a", "04aae75ab8a040225024b6a96ab7cbb28ef74d0a", "8b1d8d46836a6d5eb4355315b64d85c128cbff27", "00c181b8b64e824fbe0172339f1e4560b557fab5", "0b6adc0dbc55076dc9c9a8931f4a4df58fd291b6", "a520c904b59a652625dd2bd815e6cd98c6c01ef8", "9d37f92a24245b65b82c8d03fec22c00e7ff024e", "3f3ed6abdc2f51021a66f4762999733048ce80ea", "4b0db76df9148e80806e7b45b13e85ee54cd5b6c", "01b769e1658d1b4d8176db247c63d8bae7bda41e", "756be6042073f40b71e86d3800de8ec11a6a48c2", "4af63ed343df388b6353b6fc77c7137d27822bf4", "60d0e6e2b45ad66ced53e336fb448646172eeb44", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "2ad184901a2f3551df5d0406f244ae655ac8c4d2", "76eea8436996c7e9c8f7ad3dac34a12865edab24", "acf0f99dd4bbcbf7049b787e10cb538c620ed110", "4510f0e664e2f35b78248e88624ee972c5293818", "015f34fa2e0d280428d59483db354562187b0c35", "1c0692596a5fa4baa50418a43e84b89872439092", "41fca6c199464c983cb6384ae65c83eb7522fb46", "36a6fbb818f8bf395a180989b5ef2a3d89bc4543", "844e8fdec5ad8246e323d00b2f6b7705f3c17ae2", "1eb43fa1bba359f5f28d764e3d1449f15961e415", "65a2cb8a02795015b398856327bdccc36214cdc6", "a960d20d7fe82e28970b3b52cc42fe90e8aa6b93", "1023b268d9f8d399f76dd34da8719a10d6439017", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29" ], "paperAbstract": "Storage systems need to support high-performance for special-purpose data processing applications that run on an evolving storage device technology landscape. This puts tremendous pressure on storage systems to support rapid change both in terms of their interfaces and their performance. But adapting storage systems can be difficult because unprincipled changes might jeopardize years of code-hardening and performance optimization efforts that were necessary for users to entrust their data to the storage system. We introduce the programmable storage approach, which exposes internal services and abstractions of the storage stack as building blocks for higher-level services. We also build a prototype to explore how existing abstractions of common storage system services can be leveraged to adapt to the needs of new data processing systems and the increasing variety of storage devices. We illustrate the advantages and challenges of this approach by composing existing internal abstractions into two new higher-level services: a file system metadata load balancer and a high-performance distributed shared-log. The evaluation demonstrates that our services inherit desirable qualities of the back-end storage system, including the ability to balance load, efficiently propagate service metadata, recover from failure, and navigate trade-offs between latency and throughput using leases.", "pdfUrls": [ "https://users.soe.ucsc.edu/~msevilla/papers/sevilla-eurosys17.pdf", "https://www.soe.ucsc.edu/sites/default/files/technical-reports/UCSC-SOE-17-04.pdf", "http://doi.acm.org/10.1145/3064176.3064208" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19b4cabd5e6b03c41d62f86ae8ef99b9ffb69820", "sources": [ "DBLP" ], "title": "Malacology: A Programmable Storage System", "venue": "EuroSys", "year": 2017 }, "19b7bb3902a9be620efb53f3ee5dc9e1267650e0": { "authors": [ { "ids": [ "1696783" ], "name": "Jan Camenisch" }, { "ids": [ "2192968" ], "name": "Manu Drijvers" }, { "ids": [ "1732175" ], "name": "Maria Dubovitskaya" } ], "doi": "10.1145/3133956.3134025", "doiUrl": "https://doi.org/10.1145/3133956.3134025", "entities": [ "Authentication", "Bitcoin", "Credential", "Digital credential", "Digital signature", "Information sensitivity", "Transaction authentication", "Universal instantiation" ], "id": "19b7bb3902a9be620efb53f3ee5dc9e1267650e0", "inCitations": [], "journalName": "", "journalPages": "683-699", "journalVolume": "", "outCitations": [ "9dde09e003f8200400567b0c7be7c60679daaa3c", "c11b9dba2097d18db45e82ec48e68eb160c694df", "4ff0de33c6c9e63055413f0ec3e8b75e1a23ac45", "e67410d5ef6a064afd20d93650f39129d00f1a32", "0a9ce8889505a151eea2515b7eec741a16fcee3a", "b56231a971677ffafe4d76844f01c3cb54e8504d", "11ef405a5ef00e402fe2f0d265f2fada864f02ad", "396a7b3289504052e115d65cf7a20ccb4e2c52ca", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "4bdb9517632ca54617b0e22c13f311d36cdc4d21", "8c9ce2108cfd83aee973b492cbef052cf75f61c1", "32cc3fd437950a098d6e93ae755fc6571554a955", "5d907726fab423bd8d04aeb377771628050c9d25", "0642b270475d67bc40c2f1959497f672266858c6", "0df07fc5f4e09f5c9030efecf359d989d81fa36d", "8d69c06d48b618a090dd19185aea7a13def894a5", "1d8e009e8e5c1974336f15ac1776fdfc6520c700", "15fb0b5cd673b9fd771bbf4746b537590e4838ac", "1ca47e23b91970ce358fb543130c4bfcda86630d", "e1df1aae2ebf7c356177b7eb422be83877ebfde8", "ee69e3313ea96ecd84a64134cb135eff52d4bd3c", "559f7da97577ec8543f281b039aff9e3fe12615f", "55d5a8451f64544595fa32c8e32befe6daa299b9", "20d965226892aeef1f3454b10911f355c6c2f8fa", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "ab705e602cfbcad8f1d9b302701ee70076a52630", "a9ca6a9079bcb5c513ebf63a029d7cdbb8245fa3", "44b49882e9a01466d850355f5adfdec1c4270e69", "51fb848987c83181d95360fdbb40edb6d90683d7", "42333e3f231bbfe508f6da6bad2feff9ae223113", "bb98db25a45e8ea3bbbc96bfaf65e314309f0a36" ], "paperAbstract": "Certification of keys and attributes is in practice typically realized by a hierarchy of issuers. Revealing the full chain of issuers for certificate verification, however, can be a privacy issue since it can leak sensitive information about the issuer's organizational structure or about the certificate owner. Delegatable anonymous credentials solve this problem and allow one to hide the full delegation (issuance) chain, providing privacy during both delegation and presentation of certificates. However, the existing delegatable credentials schemes are not efficient enough for practical use.\n In this paper, we present the first hierarchical (or delegatable) anonymous credential system that is practical. To this end, we provide a surprisingly simple ideal functionality for delegatable credentials and present a generic construction that we prove secure in the UC model. We then give a concrete instantiation using a recent pairing-based signature scheme by Groth and describe a number of optimizations and efficiency improvements that can be made when implementing our concrete scheme. The latter might be of independent interest for other pairing-based schemes as well. Finally, we report on an implementation of our scheme in the context of transaction authentication for blockchain, and provide concrete performance figures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134025" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19b7bb3902a9be620efb53f3ee5dc9e1267650e0", "sources": [ "DBLP" ], "title": "Practical UC-Secure Delegatable Credentials with Attributes and Their Application to Blockchain", "venue": "CCS", "year": 2017 }, "19c3a3a0e1ea3127c4961d070303c62c3755b72f": { "authors": [ { "ids": [ "1707287" ], "name": "Matthew J. Luckie" }, { "ids": [ "2151354" ], "name": "Robert Beverly" } ], "doi": "10.1145/3098822.3098858", "doiUrl": "https://doi.org/10.1145/3098822.3098858", "entities": [ "Border Gateway Protocol", "Causality", "Control plane", "Customer edge", "Downtime", "Failure rate", "Internet", "Internet backbone", "Missile guidance", "Multihoming", "ReBoot", "Reachability", "Reliability engineering", "Router (computing)", "Single point of failure" ], "id": "19c3a3a0e1ea3127c4961d070303c62c3755b72f", "inCitations": [], "journalName": "", "journalPages": "488-501", "journalVolume": "", "outCitations": [ "433d5241ba9809edb945ca11e3f468b762be7697", "2c3e8ad9c0b802f9b51492c4fe34bb18d443c0c1", "09fdc1e451e5960d9de2c0935f3c372b496a1e4c", "7bb4f53d3701ef7c5aeceb15ea2e69d86cf92169", "23ad7425efba9dd29b05821de46183c85d5c1350", "daf9db6926f65efbec840b409da28bcbf164146a", "4d6fb20251d8f4ae199720f37282c736073af527", "0573fdf68ea82cdeecacf50cba67c94fc3738ba5", "0bd835c021b7a7bd81df84b9d4065c756128665e", "716493caae63a0a2fe1a6310f4c3a924210410e2", "306a2933e91fdf8971bd160c5bbe365e48ec7fdd", "14a09e1fba9b74edba595260155ac7ebe9837e16", "38f0ec358c3f0952927370d314779a9ea7e0f34e", "54bdf79b699c26dab5047186d533ba10f1fe22a8", "1085045094f17ccdc8c4b25d28a257af98a0e38b", "358b5c94a80f3afad08bec0ad4ab0ed237ac8edf", "73a6eb2ae5e9aa37babb95748c4d8ecee7efaf22", "2dbcc7077a01981679007eceac6c6659a1c18200", "0a7151c200bf97973453ec05a28012cf03cf906b", "3dde335a66b174f224c479e93d23f1982a6c8a7e", "71fda542b243f32b3c9f75317905b1ea1ceacce9", "6c0ed2f09a5d961bea5a2750958d400863537e3e", "3a31f220f764d0761fd1ae29ed74296a93a6c417", "612acaa301933c4662862ae9503c48848ab953c8", "c05ea36991ebf4dd2cd15309a94e3e6fa2512533", "3007bc296d5ab703e6e08d2a4bfb5e1c3e1a3a56", "44fee6d785326c9e8111e931bac7f54e388129f8", "36cab0982e382002b2a72c2cffe81869d1f8d48a", "5053e01e98b654b8dd032f221af8a0439e6989b6", "02bec50aadc665dd3b87f68a51a700f69230d46c", "442a10cb8a50fd17fe28b85133a2a1d67039f468", "2f22cb5911d37a56f10f3cd991b566b65c144df2", "a5fa0a8b2f70ae5520bcfbdd0c3c6f47252fe669", "22e70e8e68f5f5247ae167ab1fe56bed636f50c9", "07bf42c44a0fa2b6422c7f983a4449fa75490158", "20a9f86cbf29c7f90bcb5ddaa379d3bfd2f99a1d", "1450ef74c07f9aa38563c0e40ff69e146c1c5ce0", "0243fcfd6f4d3c7dbfd0626cc71220ad6e4744d6", "6410b6cc29af234544f7706194aba20d6c4c90ae", "1164ec0b8d2bd8e95b9fc07e9669ff9d4d379c64", "36e3cca61206e1c5a226eda7ba663005b74c71da", "0fa37b92444d8fcbef150470226e216bce15e3a8" ], "paperAbstract": "We propose and evaluate a new metric for understanding the dependence of the AS-level Internet on individual routers. Whereas prior work uses large volumes of reachability probes to infer outages, we design an efficient active probing technique that directly and unambiguously reveals router restarts. We use our technique to survey 149,560 routers across the Internet for 2.5 years. 59,175 of the surveyed routers (40%) experience at least one reboot, and we quantify the resulting impact of each router outage on global IPv4 and IPv6 BGP reachability.\n Our technique complements existing data and control plane outage analysis methods by providing a causal link from BGP reachability failures to the responsible router(s) and multi-homing configurations. While we found the Internet core to be largely robust, we identified specific routers that were single points of failure for the prefixes they advertised. In total, 2,385 routers -- 4.0% of the routers that restarted over the course of 2.5 years of probing -- were single points of failure for 3,396 IPv6 prefixes announced by 1,708 ASes. We inferred 59% of these routers were the customer-edge border router. 2,374 (70%) of the withdrawn prefixes were not covered by a less specific prefix, so 1,726 routers (2.9%) of those that restarted were single points of failure for at least one network. However, a covering route did not imply reachability during a router outage, as no previously-responsive address in a withdrawn more specific prefix responded during a one-week sample. We validate our reboot and single point of failure inference techniques with four networks, finding no false positive or false negative reboots, but find some false negatives in our single point of failure inferences.", "pdfUrls": [ "http://www.caida.org/publications/papers/2017/impact_router_outages_as/impact_router_outages_as.pdf", "http://doi.acm.org/10.1145/3098822.3098858", "http://www.caida.org/~mjl/pubs/spf.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19c3a3a0e1ea3127c4961d070303c62c3755b72f", "sources": [ "DBLP" ], "title": "The Impact of Router Outages on the AS-level Internet", "venue": "SIGCOMM", "year": 2017 }, "19cf8aa19cc6ce4b13689275243c3e6d0da08cd0": { "authors": [ { "ids": [ "2043402" ], "name": "Shaileshh Bojja Venkatakrishnan" }, { "ids": [ "2188628" ], "name": "Giulia C. Fanti" }, { "ids": [ "1698842" ], "name": "Pramod Viswanath" } ], "doi": "10.1145/3084459", "doiUrl": "https://doi.org/10.1145/3084459", "entities": [ "Bitcoin", "Communications protocol", "Cryptocurrency", "De-anonymization", "Fallout", "Heuristic", "Peer-to-peer", "Peer-to-peer file sharing", "Plaintext", "Privacy", "Provable prime", "Transaction processing" ], "id": "19cf8aa19cc6ce4b13689275243c3e6d0da08cd0", "inCitations": [ "f41539e1d1dee746327b1273116b6057042fd935", "109864228a62da90454f4d41dfced7c274939f7f", "24f036498862dba97036df9c26de066c75e843c2", "c2039c2cf4e0c1222a1a2f964c73858093e47890" ], "journalName": "POMACS", "journalPages": "22:1-22:34", "journalVolume": "1", "outCitations": [ "69624f82c1fb3db2a70fcf658294c869ad4c1fdc", "09af9108cb5c196d5c15a6f3d26e604434203bea", "2d0bf053ce80f8864c293be6d469bfaf2e3e8363", "ef0b3ea6275731d6c2e984c3ec674b04a22fb041", "19bab496d5d7f60d3e5b9217739b9cf7fedaf44b", "5e5ead387baf1f9b8962733e994dff0cd7ab4d33", "6b20af22b0734757d9ead382b201a65f9dd637cc", "1313cab217185e4f00bf3baa8a3051b86432237c", "496b932a15efd27c98d2d3279cc7c1b551cd21cb", "29ba6f93c7ef7cb94ae725e5aee43b2c9c872e68", "20480eb94103d5ed4b5925a86430c872e4fabfdc", "188a6a7fd5fa8e14dc999a70896bda510c914f6b", "5e86853f533c88a1996455d955a2e20ac47b3878", "1652bc872f6dc07d8111de2d74fb9da2e1c7cd12", "eb314bddbc3c42302b6bbfc1d82ae642e18fa7da", "516e80e02d2191bf8c74b2dbe3f284312fab0dde", "557d8b988bca3d0033189723d11102e04c0c67c0", "0b22e134cfb5709888bef5dffc1a8d37f3cbfa35", "7693cafd6f29623f61d66f031cadd60b6ce827d7", "24a19ef2d8b5c0aef7f22793a268b3c16d339352", "00f763e99bd9d1aa45350536d480e05851a055eb", "c27762257f068fdbb2ad34e8f787d8af13fac7d1", "c9c74f18b974b0e3e4b44184e17aaefd05ee01d0", "049e2c54fe8a35cd941937ba592e07bbc2dda591", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "53d1f9f7c77ffb0af729c173c35f099550f27f6e", "efadebc17495d28b18f830d80d339c5198219f8a", "3eca189e1d19303a759eba87b978c0a2a4bc5ec1", "244ddba27efef35bac9b01d5b1780922f5f33ec4", "93bae7155092c8ba1ae1c4ad9f30ae1b7c829dd7", "7f17ee37b9cc8dbf5de6363c863d9e3c49768400", "12add45f5a0cc3d9a9a694d3a3802843438208e0", "12b66f7180072dd8d5ac1c935b12df381d71ad81", "15167da8d35184d062b988b5a6807e0fa72cd77f", "7be14a23d26a19786ed97807fa8bfbf11b299984", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "a7a65937247580668b7090410f975e629663489d", "234e6be0d4238f76b3ac038ee422be39f391c625", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "878e258687f89f0b39c6aa019313ed0c3b00170f", "3583b52ce4c2759e087b89b367cb72f0c9902ffe", "bd3a995d9683655a1fed285f8edbb873b38afb13", "67af8bf83dc4354d1513b6f60b13df60f694c5b3", "003758d5924bd12fc5fee1edba9cc06f5fe65b51", "c113913227e2509cee4d9d8ea8334b7df5b02699", "5ae4e852d333564923e1b6caf6b009729df6ca6a" ], "paperAbstract": "Bitcoin and other cryptocurrencies have surged in popularity over the last decade. Although Bitcoin does not claim to provide anonymity for its users, it enjoys a public perception of being a privacy preserving financial system. In reality, cryptocurrencies publish users' entire transaction histories in plaintext, albeit under a pseudonym; this is required for transaction validation. Therefore, if a user's pseudonym can be linked to their human identity, the privacy fallout can be significant. Recently, researchers have demonstrated deanonymization attacks that exploit weaknesses in the Bitcoin network's peer-to-peer (P2P) networking protocols. In particular, the P2P network currently forwards content in a structured way that allows observers to deanonymize users. In this work, we redesign the P2P network from first principles with the goal of providing strong, provable anonymity guarantees. We propose a simple networking policy called Dandelion which provides quasi-optimal, network-wide anonymity, with minimal cost to the network's utility. We also discuss practical implementation challenges and propose heuristic solutions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084459", "http://diyhpl.us/~bryan/papers2/bitcoin/Dandelion:%20Redesigning%20the%20bitcoin%20network%20for%20anonymity%20-%202017.pdf", "https://arxiv.org/pdf/1701.04439v1.pdf", "http://doi.acm.org/10.1145/3078505.3078528", "http://arxiv.org/abs/1701.04439" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19cf8aa19cc6ce4b13689275243c3e6d0da08cd0", "sources": [ "DBLP" ], "title": "Dandelion: Redesigning the Bitcoin Network for Anonymity", "venue": "SIGMETRICS", "year": 2017 }, "19d4a2340a887d61dfbf446a693af660a5432dcd": { "authors": [ { "ids": [ "2343790" ], "name": "Konstantinos Xirogiannopoulos" }, { "ids": [ "2313625" ], "name": "Amol Deshpande" } ], "doi": "10.1145/3035918.3035949", "doiUrl": "https://doi.org/10.1145/3035918.3035949", "entities": [ "Algorithm", "Algorithmic efficiency", "Data deduplication", "Data store", "Database", "Database schema", "Domain-specific language", "In-memory database", "Interconnection", "List of algorithms", "Memory footprint", "Relational database", "Synthetic data" ], "id": "19d4a2340a887d61dfbf446a693af660a5432dcd", "inCitations": [ "7b845167dd41349cafc9aab147822b9abcedeb2c", "034ba5fe35895db2b046633dc527115e29bd37a4" ], "journalName": "", "journalPages": "897-912", "journalVolume": "", "outCitations": [ "0ad8e89091eed09217e66adc98136126addc2619", "10aa9ee7caaf9381b6a0468ae899a9729824a6b7", "0c0800259bd40b1ac96cc437629c5ea0ad729f22", "165d9bd7e9c4a030b09cf21e35ea0bf96090d8cb", "3bb6d5834bfb355553588e382ac5f9fa8a8d831d", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "05d6e0185bcb48d396fe778ceedb2078e37e72ef", "78ad867eb6176d4e2f1cec4f7517f65d90a660f8", "1156f60e40548096df49528b1342bb3e88b0f378", "43ea93b01be7d3eed2641b9393c6438d19b825a0", "9f3583769b9226c59ca2469f230d2db0d6499647", "3486aeaf540c48952120fe853d672af984f40a6a", "5ee35281c2c5345e13890b7dcef3d17ee0506023", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "7f598b081df60565014cd943e4512710b682b734", "1452f20140dba52b928c9be5f385b5ac35537a2c", "47602a67f2f4ec905df9f0151e2da2366a90424c", "c8c4a820973364d5f39f49e24686154d504755bd", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "1fc5dc2fe308c9eadd15f1a1d18ed298d4d343ff", "1359d01962b882c95607a75aeafeb532188cb159", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "2138776f89bccc9362b239a6d33018ca2a847960", "05f7c10bac0581e13b75554197a4d6f0059f2481", "3ad21b253652d1778308b1531e5ba775084d150e", "49c820d33ef7f94e166a4e88ac052b49e1f54362", "ac5c1088151c8f9fbe9419415709c5c8b945a129", "4b573416043cf9cff42cbb7b753993c907a2be4a", "b2bf34c0c0007145a389e014b7ddaa3daa76f332" ], "paperAbstract": "Analyzing interconnection structures among underlying entities or objects in a dataset through the use of graph analytics can provide tremendous value in many application domains. However, graphs are not the primary representation choice for storing most data today, and in order to have access to these analyses, users are forced to manually extract data from their data stores, construct the requisite graphs, and then load them into some graph engine in order to execute their graph analysis task. Moreover, in many cases (especially when the graphs are dense), these graphs can be significantly larger than the initial input stored in the database, making it infeasible to construct or analyze such graphs in memory. In this paper we address both of these challenges by building a system that enables users to declaratively specify graph extraction tasks over a relational database schema and then execute graph algorithms on the extracted graphs. We propose a declarative domain specific language for this purpose, and pair it up with a novel condensed, in-memory representation that significantly reduces the memory footprint of these graphs, permitting analysis of larger-than-memory graphs. We present a general algorithm for creating such a condensed representation for a large class of graph extraction queries against arbitrary schemas. We observe that the condensed representation suffers from a duplication issue, that results in inaccuracies for most graph algorithms. We then present a suite of in-memory representations that handle this duplication in different ways and allow trading off the memory required and the computational cost for executing different graph algorithms. We also introduce several novel deduplication algorithms for removing this duplication in the graph, which are of independent interest for graph compression, and provide a comprehensive experimental evaluation over several real-world and synthetic datasets illustrating these trade-offs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035949", "https://arxiv.org/pdf/1701.07388.pdf", "https://arxiv.org/pdf/1701.07388v2.pdf", "https://webcourse.cs.technion.ac.il/236826/Winter2017-2018/ho/WCFiles/Extracting%20and%20Analyzing%20Hidden%20Graphs%20from%20Relational%20Databases%20(2).pdf?7277=", "http://cs.umd.edu/~kostasx/files/SIGMOD_Poster_final.pdf", "https://arxiv.org/pdf/1701.07388v1.pdf", "http://www.cs.umd.edu/sites/default/files/scholarly_papers/Xirogiannopoulos.pdf", "http://arxiv.org/abs/1701.07388", "https://www.cs.umd.edu/sites/default/files/scholarly_papers/Xirogiannopoulos.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/19d4a2340a887d61dfbf446a693af660a5432dcd", "sources": [ "DBLP" ], "title": "Extracting and Analyzing Hidden Graphs from Relational Databases", "venue": "SIGMOD Conference", "year": 2017 }, "19f3c85feddc4c65409cbb73941f63a98d39fec0": { "authors": [ { "ids": [ "34792930" ], "name": "Salman Niazi" }, { "ids": [ "37148494" ], "name": "Mahmoud Ismail" }, { "ids": [ "1694607" ], "name": "Seif Haridi" }, { "ids": [ "1684757" ], "name": "Jim Dowling" }, { "ids": [ "3422632" ], "name": "Steffen Grohsschmiedt" }, { "ids": [ "2984804" ], "name": "Mikael Ronstr\u00f6m" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Database", "Distributed File System (Microsoft)", "Downtime", "Experiment", "Failover", "In-memory database", "NewSQL", "Next-generation network", "Scalability", "Shared nothing architecture", "Throughput" ], "id": "19f3c85feddc4c65409cbb73941f63a98d39fec0", "inCitations": [ "b28ead3a0ceaa45394b713fd1c8efb0c4d5eb857", "9809bc2847bc9274564c6c3545561d920c5e44f3", "7de1da8eddd18cb58d1fdab18fc0c922d9a6ef27", "41e494275eb24b248bddc19c4f7185c9abba803d", "dbc4aeb409f4d10813290ddfc612d9a271925755" ], "journalName": "", "journalPages": "89-104", "journalVolume": "", "outCitations": [ "4af63ed343df388b6353b6fc77c7137d27822bf4", "a93d2871166991749ee865f8a1ceb7f3a7fe8bfc", "b0d7bfd07752108b53d885c2835004d49ca693c9", "e2c6297a9ad5118dc4a6a0dab6a2af2b83545e3d", "253d779cc8939c4f5e2d50158bc76586c743417d", "37617b02017b7912ad4d977ba420ab3fa232e445", "04b3aaf58a91557e15c8064660baa1cc5e8db14e", "3aaa142aeb475b0aaef21e3dcfb7951a97e0f19a", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "3d3f22ee1797b0e086da07e00d0f59b1aca08bf3", "42142c121b2dbe48d55e81c2ce198a5639645030", "b7014a268c35e377366634d6b8370a8a7db285a5", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "d545d252271c228375f6b24dfa9ab3b3d5592357", "b054076afd228c71b95f019f63ab1f8b102fcd56", "5ff311923cd8f80057b2cfc15cf7ec3ac0a6fdbc", "0b35861df3b66533b0a188b411dcc4de6723a5cd", "18a5f443299784479e78d9e77f175af57cb2fa2b", "2da760f90c3d2bf6598becdde9063093f488548c", "26c4c1dd27fdb449fe0267eac595930766917878", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "0712c325155f8af65602a08cc448d1e453466a33", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "afa94b1bd30b3c6c17766117f8eb543dc9f1f7bb", "2d7d50dea29a30b82e75187f8b71579de93e53b9", "9748241beb02ef1e2d0e6dc877c04b354033a838", "2d60d3596490d9999d8433bf41405060779bc11d", "e9852418b28b3d1990ce787193ed1deb2cbc406a", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "0f408cba7605f7a6fd65837dd1c7e6f193d181ef", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "b55aefc75061cca8d4697f7d789800a0f2d344a0", "e9aac4c3925553c6203293fa519fa1d9f56ef8fc", "5f3f9223c5c9f896be099bc177929febad508407", "bb294f18c25c877a453b14e80b40b56707753592", "0eca989d12fe222091ab92da85b812ed5537c06f", "41d2c530e0a44ab8b665caa5e17c8e0ca0c39726", "26c713c0775ac388370492c26e25d24d9e430e9a", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "4593ae644f04d76f582dedc4cc32d2acd33c9a93", "57efc2b9ba2a725af1d66cc43c472d0314190051", "2b25cfea56184fcad5b36f949a50238d4f810201", "87d47502bf40a4bfa7a0ded26c3efb2426250808", "2289754c17c95e53c982ca2f023af21dec824d29", "0973e45d3eeb9641d3de34d48f8d0432f1113dcf", "77733a36cb27577a664c1a657fbed51813202ebd", "b9fba8f9da216f4430180baea05239ddce648f3a", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "4fbe8c8ace7546e3a10bfd8e151bc09a41fd3f9a", "332f77fd05703c1607e3b57884ad31fb1fad0104", "6d3d5d4fbc6bc0fe419d9c6809a00f71a492c171", "152435c55c3d2900c90f106b6688dd3844372fdf", "0541d5338adc48276b3b8cd3a141d799e2d40150", "d5d868887ae05016e15d2bc050b037eeb0b9f111" ], "paperAbstract": "Recent improvements in both the performance and scalability of shared-nothing, transactional, in-memory NewSQL databases have reopened the research question of whether distributed metadata for hierarchical file systems can be managed using commodity databases. In this paper, we introduce HopsFS, a next generation distribution of the Hadoop Distributed File System (HDFS) that replaces HDFS\u2019 single node in-memory metadata service, with a distributed metadata service built on a NewSQL database. By removing the metadata bottleneck, HopsFS enables an order of magnitude larger and higher throughput clusters compared to HDFS. Metadata capacity has been increased to at least 37 times HDFS\u2019 capacity, and in experiments based on a workload trace from Spotify, we show that HopsFS supports 16 to 37 times the throughput of Apache HDFS. HopsFS also has lower latency for many concurrent clients, and no downtime during failover. Finally, as metadata is now stored in a commodity database, it can be safely extended and easily exported to external systems for online analysis and free-text search.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/fast17/fast17-niazi.pdf", "http://arxiv.org/abs/1606.01588", "https://arxiv.org/pdf/1606.01588v1.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-niazi.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/niazi", "https://arxiv.org/pdf/1606.01588v2.pdf", "http://arxiv.org/pdf/1606.01588v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5dfb/1e4e731a7c197cf5cefcd9f654a72f1690d0.pdf", "s2Url": "https://semanticscholar.org/paper/19f3c85feddc4c65409cbb73941f63a98d39fec0", "sources": [ "DBLP" ], "title": "HopsFS: Scaling Hierarchical File System Metadata Using NewSQL Databases", "venue": "FAST", "year": 2017 }, "1a0365567850837931d04126714ae6e2cbfc6270": { "authors": [ { "ids": [ "10607451" ], "name": "Jingzhou Liu" }, { "ids": [ "1702500" ], "name": "Wei-Cheng Chang" }, { "ids": [ "9287688" ], "name": "Yuexin Wu" }, { "ids": [ "35729970" ], "name": "Yiming Yang" } ], "doi": "10.1145/3077136.3080834", "doiUrl": "https://doi.org/10.1145/3077136.3080834", "entities": [ "Benchmark (computing)", "Convolutional neural network", "Deep learning", "Document classification", "Machine learning", "Multi-label classification", "Scalability", "Sparse matrix", "Wikipedia", "XMTC" ], "id": "1a0365567850837931d04126714ae6e2cbfc6270", "inCitations": [ "299bfd0ca5349f700f914fdab312e16aef429cdc", "75d26b9d706ae1737a8b7b99ada7233c4f00882c", "0deae63aba8adc4de4368454ad5c322092e1473d", "5c9cbb03d65d721d97abe26cfb9d7299b7e81346", "84c45d32971431a8c2b2e0955dbc7fb65a2330cc", "81631605111ea321bff600a17d3a2f59bb020fec" ], "journalName": "", "journalPages": "115-124", "journalVolume": "", "outCitations": [ "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "5bf7afed6ca76e8250835be2278c62b3ddf48c32", "19ebc66d741950012dc659b6ac9089dd66c1676f", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "5bdb704fad81541b2831293a209e99e1f4ea0a85", "2061a689341d7562ccd81e630038ec6fa4f310ac", "08197632189f8aec3a7b63894609cadbe5ed02a5", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "3b3b818a22e06720c4f88c4f1e762c866618da9e", "3946cfc939e924fe8c35bfacaa7aa643d9b89407", "9f3109501b16970761e7cba759787715d3ada242", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "b58b4f365d0e8308b7cff81ada755720ff223ec2", "a1aa865c6585a70bf83dd8ddf156e8a14e8add81", "07f0bb4c4ce5ac6f65383c742e54c892a18e8555", "4343fd5edb47bf39ed0dca8085a22097584ed597", "1967ad3ac8a598adc6929e9e6b9682734f789427", "4a31ca27b987606ae353b300488068b5240633ee", "47b7e7875ec87c26d56552b05f31a2b649ee1d16", "4afa6c2eb552ceef0e396fbfe449932492873034", "854cdfa80420f1aa9fef20aaf1254cc26a007972", "92271c2c714bab056ee389bfbd5e71c3cbbfb15a", "47a87c2cbdd928bb081974d308b3d9cf678d257e", "572b7f31aee095262057fe17b181ac262eb94995", "57d774b8592b4b3f83f1304be43701ad8517e79a", "a8b5866e2a717bfc4186b7a93bddcf460957e6bd", "65ad0e876216ea034b7958f016456e32666bc5c6", "364da079f91a6cb385997be990af06e9ddf6e888", "8a59c729bbb33d773ad447af48ebe4cc9c35647e", "0b3b14a13ee9e3c7f454d375212c5adceb6ea8ff", "071b16f25117fb6133480c6259227d54fc2a5ea0", "0cf6fe9e975a5496e9edd53818ae5c18a2a7e66b", "1211d3c950e2d1cc983d7a37fff1ea5062d54284", "3b049d8cfea6c3bed377090e0e7fa677d282a361", "3bc7c7cf58c9692d799f971cc1c294ba59ed3867", "896b9c1551b7ffa347baed144582ec3b5d88f703", "3a982594d902ef89257cebedc7098a87afdc161d", "04f615ec7157725ff76ecc8bf4cf1303ac49294d", "eba36ac75bf22edf9a1bfd33244d459c75b98305", "cbe8f17f6a0069e613ddea3ca18476f6ec373309", "5a73a3988015a0e6418ead06e38c1d8fac201fc5", "2abe6b9ea1b13653b7384e9c8ef14b0d87e20cfc", "ca70480f908ec60438e91a914c1075b9954e7834", "bf0121f325cff44af03edb2c45c8ea4206693803" ], "paperAbstract": "Extreme multi-label text classification (XMTC) refers to the problem of assigning to each document its most relevant subset of class labels from an extremely large label collection, where the number of labels could reach hundreds of thousands or millions. The huge label space raises research challenges such as data sparsity and scalability. Significant progress has been made in recent years by the development of new machine learning methods, such as tree induction with large-margin partitions of the instance spaces and label-vector embedding in the target space. However, deep learning has not been explored for XMTC, despite its big successes in other related areas. This paper presents the first attempt at applying deep learning to XMTC, with a family of new Convolutional Neural Network (CNN) models which are tailored for multi-label classification in particular. With a comparative evaluation of 7 state-of-the-art methods on 6 benchmark datasets where the number of labels is up to 670,000, we show that the proposed CNN approach successfully scaled to the largest datasets, and consistently produced the best or the second best results on all the datasets. On the Wikipedia dataset with over 2 million documents and 500,000 labels in particular, it outperformed the second best method by 11.7%~15.3% in precision@K and by 11.5%~11.7% in NDCG@K for K = 1,3,5.", "pdfUrls": [ "http://nyc.lti.cs.cmu.edu/yiming/Publications/jliu-sigir17.pdf", "http://doi.acm.org/10.1145/3077136.3080834" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a0365567850837931d04126714ae6e2cbfc6270", "sources": [ "DBLP" ], "title": "Deep Learning for Extreme Multi-label Text Classification", "venue": "SIGIR", "year": 2017 }, "1a0e59990e14d30665bd87030d9c895c7a650e71": { "authors": [ { "ids": [ "1736858" ], "name": "Grigory Fedyukovich" }, { "ids": [ "7745797" ], "name": "Maaz Bin Safeer Ahmad" }, { "ids": [ "1991345" ], "name": "Rastislav Bod\u00edk" } ], "doi": "10.1145/3062341.3062382", "doiUrl": "https://doi.org/10.1145/3062341.3062382", "entities": [ "Apache Hadoop", "Array processing", "C++", "Correctness (computer science)", "Excalibur: Morgana's Revenge", "Formal verification", "Parallel computing" ], "id": "1a0e59990e14d30665bd87030d9c895c7a650e71", "inCitations": [ "413c1a8a26de4de49c2b1204208ca5eb4386b5a6", "3d60feda349eccde4a36d2acb3b9f8ed2e799644", "0f37e37310dbeaa063589830159467c5a6f958a9" ], "journalName": "", "journalPages": "572-585", "journalVolume": "", "outCitations": [ "879734aa4da358358249be82b4a6b2051ad5aa09", "25af0e970cecbe879bbc3b9ff8f24475a9557ea1", "6f6e7035d2873ae3eb0f3aaa010e22cf2bed6b18", "bf176b6d05fdfdbc1711be2ed568db9cb0153ea4", "3793cd493c6b59bdb39593e370a542af84bf8a56", "0558c94a094158ecd64f0d5014d3d9668054fb97", "67d18339ed72b7fc2152cb42b63362b570c11946", "44a796b9a01c2adc6b7978359f3cdc10356e03ce", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "1e52a2e8535509ab0111c0c5d89a88d3bb10b34c", "5412468cac5613762699d107dd519da94541017c", "0de7254fb42f09a0bfbc191c71553ab8278636a9", "716b3455c4df7b8cfaade6801adf4e8538279ebd", "36ef8e3f51b57ee2ec3da311f1a7afa3a09c36a2", "c608fece067b98f739ccbc9ce0a434d41997bed9", "0282e990528c6a9b4aa92cc196f46257fb4ccee1", "1ef301c1b275091b6a50d620b41df4722f2108f0", "202e33581369f6050fc800ebc31615eb65649e78", "f5f7c64a1b60fa2992044828f1fd099118b05c33", "87de316ea08272afbda356b8d580385dd0d8382f", "2d692211c220f4b16eabb7639108fba88d00cf2f", "80527e7595530951081494d1b98f3f13da3033a2", "2311e71450d1c02703749dba3f96a9f842b80cf9", "49d5f1340aad43d48bbb3b9df58eb5a250a57396", "332f77fd05703c1607e3b57884ad31fb1fad0104", "5d0c6e456a9b4f858da875a7d758bc6134f643a7", "9e27e05d92801844c06d5296e370bd2b781ef150", "569a01f3506a46f2df1a1db1fc5fa638ed01334f", "0f37e37310dbeaa063589830159467c5a6f958a9", "0541d5338adc48276b3b8cd3a141d799e2d40150", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "25929ecf00df179c51b95b7be250a5220d86d6f6", "99cc63730e3079ed58311a4ec88f4f0c891ed61d" ], "paperAbstract": "Parallelizing of software improves its effectiveness and productivity. To guarantee correctness, the parallel and serial versions of the same code must be formally verified to be equivalent. We present a novel approach, called GRASSP, that automatically synthesizes parallel single-pass array-processing programs by treating the given serial versions as specifications. Given arbitrary segmentation of the input array, GRASSP synthesizes a code to determine a new segmentation of the array that allows computing partial results for each segment and merging them. In contrast to other parallelizers, GRASSP gradually considers several parallelization scenarios and certifies the results using constrained Horn solving. For several classes of programs, we show that such parallelization can be performed efficiently. The C++ translations of the GRASSP solutions sped performance by up to 5X relative to serial code on an 8-thread machine and Hadoop translations by up to 10X on a 10-node Amazon EMR cluster.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062382", "http://grassp.uwplse.org/paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a0e59990e14d30665bd87030d9c895c7a650e71", "sources": [ "DBLP" ], "title": "Gradual synthesis for static parallelization of single-pass array-processing programs", "venue": "PLDI", "year": 2017 }, "1a3ee7c6fa8105dad1bc7cc5c5a95e16871e8f50": { "authors": [ { "ids": [ "35169923" ], "name": "Seungsoo Lee" }, { "ids": [ "33472708" ], "name": "Changhoon Yoon" }, { "ids": [ "2807382" ], "name": "Chanhee Lee" }, { "ids": [ "2083381" ], "name": "Seungwon Shin" }, { "ids": [ "1780068" ], "name": "Vinod Yegneswaran" }, { "ids": [ "1800634" ], "name": "Phillip A. Porras" } ], "doi": "", "doiUrl": "", "entities": [ "Attack surface", "Emergence", "Hoc (programming language)", "Software-defined networking" ], "id": "1a3ee7c6fa8105dad1bc7cc5c5a95e16871e8f50", "inCitations": [ "4745fbc920784d8e46bd6020280f46581137493f", "3eb3bc03c120d49b472a09864176b99aa9ca7d8e", "e85f374b3772933c87816ec46a40bf5b55fc077f", "e3787b845b2474a7fbe22f7c39c4df3bc910d644", "29777b5a9b37afe38067ba9d37445954b7086157", "313f528c9e72c388dc6f527e5a1b429bd007ac5b", "a77f71105b9a0db2324989866fe828bca0096683", "fc92a3d4bc27ef30b9073450d52a2521edf5d33d", "82982506bd775c96d4e9fb776fc7e17294dcbf5c" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2b6f22d6d2b01c5df1d3949cfdd9740e2e899146", "ba456853bdc38f2559809558e7ac11886b0b47af", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "1bb402010f3aa859cc616b722acb51dfed5189a1", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "22a0d3d5ae316fc3627fe6820cd6a09663585034", "7822c4b1269c0f52e38485684abfa3c6b137902b", "a406cfb3e352fd1d5e4b2c6fe002828d36fa593a", "4c5d4902583313fea69033c6e528b95a42353da6", "271a00f8bb8674e66f7ec1005a0e6dc2f41dd4e0", "3967126afbca6a722d7257cd671fe5e4979358a5", "2afc8d9b3a0d17fb926a6a6dd05b1fb307130a27", "6ea63d09993b9a268689790ea8d25bc36345497e", "3d2aa904b9872736182e2776aa117f03d4a22281", "4c44cbcea788cc024b29ddf178249ee1c367464a", "73966d417bdfe0fd2f1bfd82e7dddf51ccbda961", "981ba7b03695c6df049fb4edf43c2803d4dd535d", "9b5869da492ca33db7a8ec17776ecdb4dac8d288", "168e8d8ff13846398a3cd2bfbcc86e34e2fed526", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "008cca8a521f23500ac6e934baad976e624d2dd7", "2a5c00109e15c68e8d8f479a2657c2889cc69cc5", "0cc9cee813d6b9b147cb9e1e2ee04ccca1d87bb3", "18f431e4494e0d3c72ffcf5c8c61b337e810b00e", "4fd92c8a4094ca4967b00181c29649ebdda47562", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "01b14d8bb5ff99d7422a4c1a6f88a36e15afa703" ], "paperAbstract": "Developing a systematic understanding of the attack surface of emergent networks, such as software-defined networks (SDNs), is necessary and arguably the starting point toward making it more secure. Prior studies have largely relied on ad hoc empirical methods to evaluate the security of various SDN elements from different perspectives. However, they have stopped short of converging on a systematic methodology or developing automated systems to rigorously test for security flaws in SDNs. Thus, conducting security assessments of new SDN software remains a non-replicable and unregimented process. This paper makes the case for automating and standardizing the vulnerability identification process in SDNs. As a first step, we developed a security assessment framework, DELTA, that reinstantiates published SDN attacks in diverse test environments. Next, we enhanced our tool with a protocol-aware fuzzing module to automatically discover new vulnerabilities. In our evaluation, DELTA successfully reproduced 20 known attack scenarios across diverse SDN controller environments and discovered seven novel SDN application mislead attacks.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/delta-security-assessment-framework-software-defined-networks/", "http://www.csl.sri.com/users/vinod/papers/delta.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1a3e/e7c6fa8105dad1bc7cc5c5a95e16871e8f50.pdf", "s2Url": "https://semanticscholar.org/paper/1a3ee7c6fa8105dad1bc7cc5c5a95e16871e8f50", "sources": [ "DBLP" ], "title": "DELTA: A Security Assessment Framework for Software-Defined Networks", "venue": "NDSS", "year": 2017 }, "1a4da3f3e4bf1777cad69ada2fcf335e4ccaa9f7": { "authors": [ { "ids": [ "17788118" ], "name": "Vineetha Kondameedi" }, { "ids": [ "3083298" ], "name": "Sathish S. Vadhiyar" } ], "doi": "", "doiUrl": "", "entities": [ "Batch processing", "Central processing unit", "Jumpstart Our Business Startups Act", "Linear programming", "Linear programming formulation", "Nonlinear programming", "Nonlinear system", "Requirement", "Run time (program lifecycle phase)", "Simulation", "Supercomputer" ], "id": "1a4da3f3e4bf1777cad69ada2fcf335e4ccaa9f7", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "90-99", "journalVolume": "", "outCitations": [ "48a6b370460dc8e6ce9c5a45eb39cf1fb654f1f3", "0eb1894a1b8d0188c9ea87d865de9b8c57732697", "2f5b1cf400eadd636114067fa995bfd4a9538ff6", "0784356b46a1345b352ab634bda835c07ff04af2", "0d550f13a70ad1a7ae73f2d5a9c58abd43c8ca30", "8e288c2a9efa3fb0bff35c5743c3675805823766", "c62a08a13b49967e6e35c5c48d97f3514ef0a3d9", "14b7127b55075d522b34a3b0fc305ef1ed7ded87", "d23d3027011f40ab3fde365c70f8d6a5a55772f7", "0c052f048fcb00eb4fde722e1026cf468d022727" ], "paperAbstract": "Supercomputers have batch queues to which parallel jobs with specific requirements are submitted. Commercial schedulers come with various configurable parameters for the queues which can be adjusted based on the requirements of the system. The employed configuration affects both system utilization and job response times. Often times, choosing an optimal configuration with good performance is not straightforward and requires good knowledge of the system behavior to various kinds of workloads. In this paper, we propose a dynamic scheme for setting queue configurations, namely, the number of queues, partitioning of the processor space and the mapping of the queues to the processor partitions, and the processor size and execution time limits corresponding to the queues based on the historical workload patterns. We use a novel non-linear programming formulation for partitioning and mapping of nodes to the queues for homogeneous HPC systems. We also propose a novel hybrid partitioned-nonpartitioned scheme for allocating processors to the jobs submitted to the queues. Our simulation results for a supercomputer system with 35,000+ CPU cores show that our hybrid scheme gives up to 74% reduction in queue waiting times and up to 12% higher utilizations than static queue configurations.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101125" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a4da3f3e4bf1777cad69ada2fcf335e4ccaa9f7", "sources": [ "DBLP" ], "title": "Adaptive Hybrid Queue Configuration for Supercomputer Systems", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "1a548ed86bc13f4a09dddbc9fcd0e9e907ab189c": { "authors": [ { "ids": [ "2927967" ], "name": "Pengpeng Zhao" }, { "ids": [ "3462370" ], "name": "Xiefeng Xu" }, { "ids": [ "3215702" ], "name": "Yanchi Liu" }, { "ids": [ "3280672" ], "name": "Ziting Zhou" }, { "ids": [ "38671700" ], "name": "Kai Zheng" }, { "ids": [ "2858764" ], "name": "Victor S. Sheng" }, { "ids": [ "1707713" ], "name": "Hui Xiong" } ], "doi": "10.1109/ICDM.2017.75", "doiUrl": "https://doi.org/10.1109/ICDM.2017.75", "entities": [ "Point of interest", "Polynomial matrix", "Precision and recall", "Program optimization", "Social network" ], "id": "1a548ed86bc13f4a09dddbc9fcd0e9e907ab189c", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "655-664", "journalVolume": "", "outCitations": [ "3be2609d565f436611090bf5201f73543b3d0610", "08b3a741347cc6ad93ea0b9b07d2f31b8acf49a7", "0b77f1875bcb00eb4c320b0d062fce3ff9c54d43", "96032c720e30b09cbda76067471a180e31fce4a4", "6a08a92f1d990f89e87aec2af243227984b98980", "9916eef828b461051d4c0b090e82aef8d4855485", "50054b2cafde87fb71cfbfed966a7ddc0feada50", "a1dfec89e0842e3e1ffaa773da8b9ef53a89fb28", "1b69b48119f4d68ffe7000bbadefd14ba5ece168", "d2e7d37dac6b9eb313bd7918f162485111608bea", "184b7281a87ee16228b24716ca02b29519d52eb5", "4814852815557deda9e56c9e05c233545a20d62d", "10adbab10ea1063c599ce137d9e5e7f9ca33303f", "63aaf7c8af91a249ce1ad6aac2bc1bec0a5ed76c", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "1b233b82ba35c913e0ddcf19b11ec50d013a8149", "0a519e9c18c9f4d0dd7f278b7eace9e6a4ed5c99", "1c347f18b5bbbd30baf49e28d523da2d3af9b7cd", "579967a676efe3391d01e5141daf0289d336fd1d", "ad49936242755a6fdb8b75ce28c8ed6e66c3f31b", "ca5df41d6bad715dfcbee9badd08daae6a1f174b", "91ffac42e3416f0a0a542b2d981636b02271fdbb", "32e6fbc44a79bad68087c003a6e31f55b9584758", "1477586737c4e00bea36320968cf84ae997aa4b5", "48373cee0cad67c854b77e672a2e33ce769b336f", "696e9c3571f6c88f7d9958192d7ba36e77ec9f39", "968dc609150f1d933b1fe222d6ebb0e3823a4522", "010a3f35d8d245ad0dcc87fc5f598ed0cda31ebf", "1f2de093c64679c99437c3031ede4fd4e32c66cc", "25353bc78453da76e43e199a925ab54457e18ca5" ], "paperAbstract": "With the rapid development of location-based social networks, Point-of-Interest (POI) recommendation has played an important role in helping people discover attractive locations. However, existing POI recommendation methods assume a flat structure of POIs, which are better described in a hierarchical structure in reality. Furthermore, we discover that both users' content and spatial preferences exhibit hierarchical structures. To this end, in this paper, we propose a hierarchical geographical matrix factorization model (HGMF) to utilize the hierarchical structures of both users and POIs for POI recommendation. Specifically, we first describe the POI influence degrees over regions with two-dimensional normal distribution, and learn the influence areas of different layers of POIs as the input of HGMF. Then, we perform matrix factorization on user content preference matrix, user spatial preference matrix, and POIs characteristic matrix jointly with the modeling of implicit hierarchical structures. Moreover, a two-step optimization method is proposed to learn the implicit hierarchical structure and find the solution of HGMF efficiently. Finally, we evaluate HGMF on two large-scale real-world location-based social networks datasets. Our experimental results demonstrate that it outperforms the state-of-the-art methods in terms of precision and recall.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.75" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a548ed86bc13f4a09dddbc9fcd0e9e907ab189c", "sources": [ "DBLP" ], "title": "Exploiting Hierarchical Structures for POI Recommendation", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "1a7fbe4366c028875756ebe180b9499e6a33a9da": { "authors": [ { "ids": [ "2383364" ], "name": "Misbah Mubarak" }, { "ids": [ "2797656" ], "name": "Philip H. Carns" }, { "ids": [ "38538207" ], "name": "Jonathan Jenkins" }, { "ids": [ "8549365" ], "name": "Jianping Kelvin Li" }, { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "39683248" ], "name": "Shane Snyder" }, { "ids": [ "40211322" ], "name": "Robert B. Ross" }, { "ids": [ "1759102" ], "name": "Christopher D. Carothers" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "1707383" ], "name": "Kwan-Liu Ma" } ], "doi": "10.1109/CLUSTER.2017.25", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.25", "entities": [ "Best, worst and average case", "Computational science", "Data-intensive computing", "Image resolution", "Interference (communication)", "Network packet", "Network performance", "Network switch", "Network traffic control", "Routing", "Scheduling (computing)", "Simulation" ], "id": "1a7fbe4366c028875756ebe180b9499e6a33a9da", "inCitations": [ "9ddab4cbc6c64e33119f735986a30257c0cfe0cc" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "204-215", "journalVolume": "", "outCitations": [ "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "0efcb98fab0c44bfc6a5403483b7cedca1278bcb", "251544e7c508771ab34cb2d6b97800960cde1f1e", "7e06d6922e32d30bd6f7e86ae660ed7bf2e99fd2", "10f3fa67bcb56322427d12f81abf49ed10198247", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "18fbcb1de113f5d60c8e81566231a0ecea46f3fe", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "c39c26d510c1a965c5f132edc989a598ca92b700", "9c4b6c885bfc6038cdac56763663880e0f2624e6", "8610331089fae62c109580e293bc896f6d87f0f7", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "050b6a5f0e650a12223c27fb133eb5e398df8480", "ba625fb8f294a5003a0880096695a92bc9bb843a", "0a9c8fef61634e392f9de6f34361cc1c690f7a00", "31cfefc79d64ede4c13f231b8b30ebfd45666d3d", "3ec4cf958f6ee00dc00aa14840c96268c4c3f9c9", "88bf414148412b88378d0bd1b7fa194946dba00a", "3037024ee9782764cfbe8e5c9c625e2edaaf83fd", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "885f7657cd858f3c48707946083f2a9aa7ee7aee", "865e20c6f2a2b09c21a73792c746270acbb64f46", "fd3da0fbbe8d736506f43ecf1af20343c0a32301", "b040e7de49f4b4e8e9e007d7e4149d7ef277c609", "b460a8c552ba24afa9b05cf551f5f55db1985e56", "5f8991828def57d2f0cda942566afff56740d150", "145dcba6ff585990ff051e9e0dbd52296ebda6c6", "a5c9bfcaf7b52edee6a94f58337b4a0e33575cd3", "d5e9b36ec7e7d5f71aa406d2068f72b984342b0c", "8cf9e252c8314e26f20b619acb6392d52abac647" ], "paperAbstract": "HPC systems have shifted to burst buffer storage and high radix interconnect topologies in order to meet the challenges of large-scale, data-intensive scientific computing. Both of these technologies have been studied in detail independently, but the interaction between them is not well understood. I/O traffic and communication traffic from concurrently scheduled applications may interfere with each other in unexpected ways, and this behavior may vary considerably depending on resource allocation, scheduling, and routing policies.In this work, we analyze I/O and network traffic interference on burst-buffer-equipped dragonfly-based systems using the high-resolution packet-level simulations provided by the CODES storage and interconnect simulation framework. The analysis is performed using realistic I/O workload sizes, a variety of resource allocation and network routing strategies employed in production environments, and a dragonfly network configuration modeled after current vendor options. We analyze the impact of interference on both I/O and communication traffic.We observe that although average network packet latency is stable across a wide variety of configurations, the maximum network packet latency in the presence of concurrent I/O traffic is highly sensitive to subtle policy changes. Our simulations reveal a worst-case single packet latency of 4,700 times the average latency for sub-optimal configurations. While a topology-aware mapping of compute nodes to burst buffer storage nodes can minimize the variation in maximum packet latency, it can slow down the I/O traffic by creating contention on the burst buffer nodes. Overall, balancing I/O and network performance requires careful selection of routing, data placement, and job placement policies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.25", "http://www.mcs.anl.gov/papers/P7080-0717.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a7fbe4366c028875756ebe180b9499e6a33a9da", "sources": [ "DBLP" ], "title": "Quantifying I/O and Communication Traffic Interference on Dragonfly Networks Equipped with Burst Buffers", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "1a805d51dceb525493da058092c2450472084305": { "authors": [ { "ids": [ "39652968" ], "name": "Daniel A. G. de Oliveira" }, { "ids": [ "1766536" ], "name": "La\u00e9rcio Lima Pilla" }, { "ids": [ "2419140" ], "name": "Mauricio Hanzich" }, { "ids": [ "10688475" ], "name": "Vinicius Fratin" }, { "ids": [ "6714557" ], "name": "Fernando Fernandes" }, { "ids": [ "2746510" ], "name": "Caio B. Lunardi" }, { "ids": [ "1766749" ], "name": "Jos\u00e9 Mar\u00eda Cela" }, { "ids": [ "1728532" ], "name": "Philippe Olivier Alexandre Navaux" }, { "ids": [ "1752570" ], "name": "Luigi Carro" }, { "ids": [ "2290186" ], "name": "Paolo Rech" } ], "doi": "10.1109/HPCA.2017.41", "doiUrl": "https://doi.org/10.1109/HPCA.2017.41", "entities": [ "Algorithm", "Approximation error", "Intrusion detection system", "Locality of reference", "Principle of locality", "Self-organized criticality", "Xeon Phi" ], "id": "1a805d51dceb525493da058092c2450472084305", "inCitations": [ "26fef351a3a671a64b32b3c673a332e912cfed24", "d72db57aeadc0ffcb3225d1711dbf9160fd92969", "06e4905bef810f8a2f089974c5291fb7dd84be46" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "577-588", "journalVolume": "", "outCitations": [ "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "0c96de3e65dbba2016602dc26f88abe666580927", "19d686007a37f599b850bfbca391a5d7d869def8", "2ae34c190902632d9bec7918f661426e98639256", "6561b4c794beef42e5acfca58ecd88e97febee3c", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "092217c2267f6e0673590aa151d811e579ff7760", "5d1dbf3e809d5823a508c95360a18676fb349065", "44c758e72011a62471cbcf605a7cb2e10fa60820", "dce7e4a338d08444e0847517648f6394975b8e0b", "b3aa0590b54b4f23723a7986f94806bea77a2392", "2b05d38d0b689da9a6edacbdc4043811944599b4", "3582e99fa79e8edfc083b0495884696257e88098", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "7f6c49645686f4814c01aca621341a0b244898b6", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "ac9d098a0504f6d35a731d748d0d33e03085eb6a", "44f8d8d66f1cac8822bdffab21fb348aa1c4e5e8", "c755f858361e90dcfc181c3d39295a06456ef00b", "ac516e531c0a70ac1890e7bafa360358aaa31aa6", "4d663445ecb6d530964ce4bc8f4ea9219103e6e1", "7b62ad9d447ebbb14d63869c9fd8d360f13edd59", "34981f10c629cbd396d1291a6c45a38e00eec009", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "73edc0858aac5cf61b72473145b01612c0fd416b", "def34f422d6930bd23d5c58de78be98804e44e97", "2640471efddd30a2855a2a4d76fde3459d36cdf6", "01d62cd850496455ce1616500f491690effa5c98", "2255a818da9e190540b66161f3aceb7ac377ea08", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72" ], "paperAbstract": "In this paper, we evaluate the error criticality of radiation-induced errors on modern High-Performance Computing~(HPC) accelerators (Intel Xeon Phi and NVIDIA K40) through a dedicated set of metrics. We show that, as long as imprecise computing is concerned, the simple mismatch detection is not sufficient to evaluate and compare the radiation sensitivity of HPC devices and algorithms. Our analysis quantifies and qualifies radiation effects on applications' output correlating the number of corrupted elements with their spatial locality. Also, we provide the mean relative error (dataset-wise) to evaluate radiation-induced error magnitude. We apply the selected metrics to experimental results obtained in various radiation test campaigns for a total of more than 400 hours of beam time per device. The amount of data we gathered allows us to evaluate the error criticality of a representative set of algorithms from HPC suites. Additionally, based on the characteristics of the tested algorithms, we draw generic reliability conclusions for broader classes of codes. We show that arithmetic operations are less critical for the K40, while Xeon Phi is more reliable when executing particles interactions solved through Finite Difference Methods. Finally, iterative stencil operations seem the most reliable on both architectures.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.41" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1a805d51dceb525493da058092c2450472084305", "sources": [ "DBLP" ], "title": "Radiation-Induced Error Criticality in Modern HPC Parallel Accelerators", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "1aaacfb2aca6187da43a9aa80adfa05944c1f4b6": { "authors": [ { "ids": [ "1773504" ], "name": "James Davis" }, { "ids": [ "10438197" ], "name": "Arun Thekumparampil" }, { "ids": [ "2006849" ], "name": "Dongyoon Lee" } ], "doi": "10.1145/3064176.3064188", "doiUrl": "https://doi.org/10.1145/3064176.3064188", "entities": [ "Atomicity (database systems)", "Client-side", "Concurrency (computer science)", "Ecosystem", "Error detection and correction", "Event-driven architecture", "Event-driven programming", "Node.js", "Open-source software", "Server (computing)", "Server-side", "Software bug", "Software deployment", "Test automation", "Thread (computing)" ], "id": "1aaacfb2aca6187da43a9aa80adfa05944c1f4b6", "inCitations": [ "777db975964f08b514cd05249cb2c029da7bea02", "445c2a2cf3587945f8786eb7c794ff0448457994", "334cb2afe4d4ab0ca9c731a12da1e3112cfea699", "55c68ee887efba085745f372633df95e55b3481b", "c79c6b242ffee2499dc670dc428a942b11176045", "561d95be369566a0a1598fa1d7ddee9f27c088d4", "e712211bf8628d16d55ec7c6f99b03309e455d4f", "e98ed76e7abda4f04b12e570fb30ae76edf5a5d2" ], "journalName": "", "journalPages": "145-160", "journalVolume": "", "outCitations": [ "6e69ad3daf1d4dfe98b83a4ed448cfa0ff016102", "0ac8188e490de61e8405b611fdf962abd3026860", "08d1e2257d40d1ce78d4412b7f45487a6cba1c90", "b4d503f189cf10c55a8376a8ce5a0e9cb8ee48bb", "0b6975dfee824f53f54281afe5755620c4ee9e92", "02ed0ec3bb95776b5c06e2784810b501c4d3f053", "186e82657c803bf9f5f58be4d6ff17d1420dbbeb", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "ce48a652ef299c9c25a1fd4f7f0e8622473d2e92", "87e782af17ee32570ec30c1fdc2b97f33b3053f6", "24e6f34e499634393416ea09c1aadd37ec9e8542", "18bd7fc45fb824e7efa0b062e1e8f58c3d64cd02", "47cdefebd5534d1d8c5d0f8061b482dbcd656e63", "be23ee790fd756cbc9d82ec3be7b05be089fe25a", "a74d2672e0f1bb05b321e60fffab0c003693dcef", "3903331813a494417acf595291029b4beba62737", "3a33dad8e9d12835fca95deec73e841096c8bec0", "3152ff9f63d45c6cef3a8739003cb559b6a424e0", "29e451fb5518c78380967b5514a7e8d4927a7545", "0e7319e23cffd76871cbd049f2fbb3b516abec61", "5be4c8ff3166d14ccedb4e8db00f8e09576008a9", "ff5e450280c73cd931373bc05e95941b32473195", "56b6af73e05de5b0a7990a608b98242c376e242c", "43c6d1d87e0287cd7c719cdbd90929911054a620", "476308dd1d3e76d80ad6d7971cafc5fbf9890f8b", "da245266502dc96125203895ec79bbe1821fff8e", "42b9f8c37b0f42cc6894f691a34de6ccb26bdfc7", "6db4d8beb058be614bc4cea69c688fd7fdba9089", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "3c8ca4af7cf3aecaab851727a14947f55f20590b", "03bb63660c3935ad2ec011a7f9e868587063f89c", "e21341d177ad2fd7fd2fff21be0582b38f1476b4", "15cae3ea228f9f8f5ba4d48a45e6d75814671fa9" ], "paperAbstract": "The importance of the Event-Driven Architecture (EDA) has never been greater. Web servers and the IoT alike have begun to adopt the EDA, and the popular server-side EDA framework, Node.js, boasts the world's largest package ecosystem. While multi-threaded programming has been well studied in the literature, concurrency bug characteristics and useful development tools remain largely unexplored for server-side EDA-based applications.\n We present the first (to the best of our knowledge) concurrency bug characteristic study of real world open-source event-driven applications, based in Node.js. Like multithreaded programs, event-driven programs are prone to concurrency bugs like atomicity violations and order violations. Our study shows the forms that atomicity violations and ordering violations take in the EDA context, and points out the limitations of existing concurrency error detection tools developed for client-side EDA applications.\n Based on our bug study, we propose Node.fz, a novel testing aid for server-side event-driven programs. Node.fz is a schedule fuzzing test tool for event-driven programs, embodied for server-side Node.js programs. Node.fz randomly perturbs the execution of a Node.js program, allowing Node.js developers to explore a variety of possible schedules. Thanks to its low overhead, Node.fz enables a developer to explore a broader \"schedule space\" with the same test time budget, ensuring that applications will be stable in a wide variety of deployment conditions. We show that Node.fz can expose known bugs much more frequently than vanilla Node.js, and that it can uncover new bugs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064188", "http://people.cs.vt.edu/~dongyoon/papers/EUROSYS-17-NodeFz.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1aaacfb2aca6187da43a9aa80adfa05944c1f4b6", "sources": [ "DBLP" ], "title": "Node.fz: Fuzzing the Server-Side Event-Driven Architecture", "venue": "EuroSys", "year": 2017 }, "1ab77fddfe3819303ef5b840de2db9099a334695": { "authors": [ { "ids": [ "40200699" ], "name": "Eyal Ronen" }, { "ids": [ "1706216" ], "name": "Adi Shamir" }, { "ids": [ "33072466" ], "name": "Achi-Or Weingarten" }, { "ids": [ "7572935" ], "name": "Colin O'Flynn" } ], "doi": "10.1109/MSP.2018.1331033", "doiUrl": "https://doi.org/10.1109/MSP.2018.1331033", "entities": [ "Firmware", "Hoc (programming language)", "Patch (computing)", "Smart city" ], "id": "1ab77fddfe3819303ef5b840de2db9099a334695", "inCitations": [ "9967124d10303d750c3eeedbf258acc47663e3e7", "2c9f461af0159f65ac642109d9cbd8d673e0acd7", "0ef83c32f2fcf09cdca0726132f66136c79f5e97", "a7b798c17595ad6e0558bcd5af5fbee1acafdb0a", "210a47978cf9d8a9751cde45f6f28d33bbbaa5e7", "2f9f26729ebeeb501ab2c2a985b437b113af78a5", "817f0a7e4797536e674f02d0069151125a0ac2b6", "a6630ee1a9eb6fc184c326530ee8eca1181aecbe", "5128275e5e9b1ef2e520fb50a5b52a857d12598d", "ad7bc6752f742c1cf979219160f62c275e441b68", "879a7fc87b9855b77786ac3bf5f97300a486df8b", "8b67679116f336b32d6d66c0da5dd6341ef38c9d", "87d6867c1f981447977f38a3e89e0b64c1ca0271", "d7ab842a069bd47d6cd580533821d31a3d23eaa3", "4419db72d945e0ddccdf8144ee8e6901af7686d8", "ff1c3ce91744b839a2d48574df5554b8f9c04c3f", "332631922d48edcacf36e9902605f1da88fcfa27", "96419ab77023b8f28033b90f011f981b4ab644a8", "423c2fd6d2b9811739130201d18e9be16f197098", "c9eb0e2d290fcc26daac33a0f957468b3e84959c", "301b4b2e511a5e053de43b82bd71b59e6af5402c", "34fd3b41d9c8f4b509a8b6c10492e74239997340", "23ee4369c4e47000ad352aee1823b0eb17fd3406", "56a89210d1ef08f6b1c27ad778ea3f47b5e61345", "459a1bc2a647f57d48789cc01844bfd101507180", "97a8bb0df083814ffddfed1c3fe589b0da7d069d", "bd83f6319c0fe69d326e76ef419b78e6367f57e3", "fe0982816ed0d6e1ffcda29812c17017ecc5cb28", "15897bcf6aa1c79b9d79f3a5abc2de2c7b9fa713", "ee538c0da7709c8d376749e74fc2451c70fb8d90", "3044d1d4bc2760b8a89f7fb9653ce1f68cc3f47e", "397023c57c2a9176da061f1f64153d258c02f429", "1db4494c55a645b51def07950fd308285597002a", "8267cc512d360f5859bec8d149bb4d017fa8519c", "ffd27efe73e3f4d3b878936a9cf2e6bd0152a378", "a3b9014b22d7de8b4734ba58ef4c2a7285fb82f1", "1a57e7ad113acec1d531b3b80b7128fc97ecc89a", "6d435125b348b0dd894be0673b4bd267e8a9f36e", "7c75557b78c56b577caca823b33a96588eaa373d", "4f8ef4460802062c781116f1af7e01debd671e7e", "90a209adc5bfb765ecb049ec8594e3fac0dc4de7" ], "journalName": "IEEE Security & Privacy", "journalPages": "54-62", "journalVolume": "16", "outCitations": [ "6f2b2af90d49942f1d75f070cf176811a7a05bd8", "ca140d1225a8a1815384c60e917398f767b66b05", "3e19e56bc7c8d36af43a9f03eb5575f1866662a9", "a7cad22a458ddab93fe0bfc07d1426476de9ccab", "44db0c2f729661e7b30af484a1ad5df4e70cb22a", "5b53f63c9e56a0f42bf39effaff9201f39617bc3", "9794b7afc3e798e53af18e686666da71210b70ad", "aa367f3512aa60b5d3ae494fae9206eaf8aa87aa", "1587b9c92008bf6f021b57d194577e817e2c10c1", "013fa70be97e4a68fd146f3e590b4932cd6ddfc4", "9075b38d4181d521d35d6cd9cf9b00ae35dacddb", "d5755c470d73cabdc4de4df7b4e94cd8412cb5ed", "03649419035882e1148cecb5e617aeff946ab23e", "e4696bc396d0e7424045c8b1e687a5588d5b8fbf", "e4049bdf9bd01d38dd6ca4132096986b4bed7c3e", "e28daf0228bb1c3f40adad4c4705e246549f3a28", "565ed53f4a40a98b18a389a3790a7fe62a525f58" ], "paperAbstract": "In this article, we describe a new type of attack on IoT devices, which exploits their ad hoc networking capabilities via the Zigbee wireless protocol, and thus cannot be monitored or stopped by standard Internet-based protective mechanisms. We developed and verified the attack using the Philips Hue smart lamps as a platform, by exploiting a major bug in the implementation of the Zigbee Light Link protocol, and a weakness in the firmware update process. By plugging in a single infected lamp anywhere in the city, an attacker can create a chain reaction in which a worm can jump from any lamp to all its physical neighbors, and thus stealthily infect the whole city if the density of smart lamps in it is high enough. This makes it possible to turn all the city’s smart lights on or off, to brick them, or to use them to disrupt nearby Wi-Fi transmissions.", "pdfUrls": [ "http://eprint.iacr.org/2016/1047", "http://doi.ieeecomputersociety.org/10.1109/MSP.2018.1331033", "https://doi.org/10.1109/SP.2017.14", "https://eprint.iacr.org/2016/1047.pdf", "http://eprint.iacr.org/2016/1047.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ab77fddfe3819303ef5b840de2db9099a334695", "sources": [ "DBLP" ], "title": "IoT Goes Nuclear: Creating a Zigbee Chain Reaction", "venue": "IEEE Security & Privacy", "year": 2016 }, "1ac14f8d2d6e34cb2a218476fd0f1586e8df8c14": { "authors": [ { "ids": [ "34828068" ], "name": "Ashlee Edwards" }, { "ids": [ "34961870" ], "name": "Diane Kelly" } ], "doi": "10.1145/3077136.3080818", "doiUrl": "https://doi.org/10.1145/3077136.3080818", "entities": [ "Conductance (graph)", "Infinite impulse response", "Interaction", "Scrolling", "Search engine results page", "Web search engine" ], "id": "1ac14f8d2d6e34cb2a218476fd0f1586e8df8c14", "inCitations": [ "cfc794089407d809f02c3b954a474efbcf5f4e66", "fbe5f1587de705be23aacbeb3f31cd73c0da7e02" ], "journalName": "", "journalPages": "125-134", "journalVolume": "", "outCitations": [ "f97e8e2d4194ec2e83cc3a1d13936b31854ac178", "07facb9dd8b7aebe39decdc680c62333aacd5d39", "12fca40c31a0c1a6f223beb535ad40f1a192b5af", "19f6905b53795a50749fafcadd20cd2129963896", "21697f9e4ced5f10ade4d785a8be936ca6e888d2", "d2991874e0f6f704202a81d712e782218a34b57d", "a2e170eeb8372fa748735e0e242735a7da80cfce", "45ab0092adfb8dae55050b7db67d679e9a46b59f", "70bf86727af633e0dcad2a9b4259f2de94af8b3c", "77c68553831923ae277dcc3daa4505a7d84e83e2", "9a530d7eb0fbc4168f95fff5ea8acbc7adbba6b2", "1cf66b58bd5b299a1e92493b6fcddcae1c7eacc4", "c7dae7a965595bb41f51439beb9bf39cc6b8e832", "9280e3c6daca1c9762655e2a180029eefdd51363", "ed4cbb28e02b3520252fe8089374deffc15800a6", "d71163858400f09037a2842827cd425afec9f574", "59e8a0371a8cdaf24cdc3e3c0f5e7e6f2d6a1251", "738e5e73e67875a4a1d9a0f724ef560296618a27", "8402125904a087a41ef976e3ec2ffd9351bd3568", "5ba7592fe3310106543cffad644addeffc2ad2b3", "29d47e1015d90a3bcde7db84399e34651fb37af7", "182fe8940dc9675b382a4ae48764bfeadc1a1b1b", "7ee6e6a07685b6728dc9a14ca1185d3ef1c1b8fb", "bab71002a0b58bb2fabd6675e3fc61ed0fbdcc7a" ], "paperAbstract": "One of the primary ways researchers have characterized engagement during information search is by increases in search behaviors, such as queries and clicks. However, studies have shown that frustration is also characterized by increases in these same behaviors. This research examines the differences in the search behaviors and physiologies of people who are engaged or frustrated during search. A 2x2 within-subject laboratory experiment was conducted with 40 participants. Engagement was induced by manipulating task interest and frustration was induced by manipulating the quality of the search results. Participants' interactions and physiological responses were recorded, and after they searched, they evaluated their levels of engagement, frustration and stress. Participants reported significantly greater levels of engagement when completing tasks that interested them and significantly less engagement during searches with poor results quality. For all search behaviors measured, only two significant differences were found according to task interest: participants had more scrolls and longer query intervals when searching for interesting tasks, suggesting greater interaction with content. Significant differences were found for nine behaviors according to results quality, including queries issued, number of SERPs displayed and number of SERP clicks, suggesting these are potentially better indicators of frustration rather than engagement. When presented with poor quality results, participants had significantly higher heart rates than when presented with normal quality results. Finally, participants had lower heart rates and greater skin conductance responses when conducting interesting tasks than when conducting uninteresting tasks. This research provides insight into the differences in search behaviors and physiologies of participants when they are engaged versus frustrated and presents techniques that can be used by those wishing to induce engagement and frustration during laboratory IIR studies.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080818" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ac14f8d2d6e34cb2a218476fd0f1586e8df8c14", "sources": [ "DBLP" ], "title": "Engaged or Frustrated?: Disambiguating Emotional State in Search", "venue": "SIGIR", "year": 2017 }, "1ac30950aaab6297d2a7cb2d7a55ba6b9b2a100f": { "authors": [ { "ids": [ "2880350" ], "name": "Theofilos Petsios" }, { "ids": [ "1739023" ], "name": "Jason Zhao" }, { "ids": [ "1720824" ], "name": "Angelos D. Keromytis" }, { "ids": [ "39400201" ], "name": "Suman Jana" } ], "doi": "10.1145/3133956.3134073", "doiUrl": "https://doi.org/10.1145/3133956.3134073", "entities": [ "Algorithm", "Analysis of algorithms", "Antivirus software", "Best, worst and average case", "Computational complexity theory", "Computational resource", "Denial-of-service attack", "Hash table", "Library (computing)", "PHP", "Parsing", "Regular expression", "Time complexity", "Web application", "Web application firewall", "Zig-zag in-line package", "bzip2" ], "id": "1ac30950aaab6297d2a7cb2d7a55ba6b9b2a100f", "inCitations": [ "b26c8b09bf05696e96bbb9578513730f3c63ec50" ], "journalName": "", "journalPages": "2155-2168", "journalVolume": "", "outCitations": [ "04b102d2e1d60eb58eb29b945d04c7ca1b10e07b", "95baae72c5fcca4038339c350556dd6143d9a263", "92530ca9a573d436043b5a66abd3141389849df5", "28304261637eb0cb535c698650dff038319a006d", "2a5c00109e15c68e8d8f479a2657c2889cc69cc5", "231e6a4fd7922c6adaaa48b2d02f7878e88c4048", "2ef531169a5593f1ec70806cce86b3838bf227b8", "502530dee6020382d0ac1fcc76bf3d5d8bc5596b", "d1dd3715e85e2d6cb5238b4930caccdd6bf78b8a", "7a70ec681ce44b8c7c12e7b91e75c16797a80ab1", "8bec3d859f38222e12c7b86b97a949adfdf6ee50", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "62d052813db5211f2572d2d25b3e19825cf75bad", "1fcdfc4fd5a2b491edc40a8152454bdb2b6e7244", "c43cbe799ec8fbebc549873bc346aa74047e6d51", "b88cd5696a7d854a4978c85876e8afeb72c8e155", "52dd991350330d44012e8659cd4d1fbe796fa322", "6ea63d09993b9a268689790ea8d25bc36345497e", "1e8de10f1fb67d89f9f19ae9f9588be74d497a27", "032f1a16ad4cd815ca5cbf3dbfca2714007a1a2e", "0a8ecb2408d83f0ffc274a0258b13a7ca45c0698", "0b549912e5f111c7c60eadda634ef4484427b684", "1278fe7f0d379c5c9b417d711dd5e0dcc31ea256", "0653e2ed9f683868cb4539eb8718551242834f6b", "1690178ea64cf4bcce257893ae7825cff6a81c97", "2acb87fa3aed6f09773c53c9b34db221941e3627", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "4461a740d26035ce2abd3ef292fbc0c2360a6c50", "5a7471998ee212f5c01b562f7d6c0f2b51117fae", "1557176b41ec6fbfb62c4dc1c78b061c94a8bcc8" ], "paperAbstract": "Algorithmic complexity vulnerabilities occur when the worst-case time/space complexity of an application is significantly higher than the respective average case for particular user-controlled inputs. When such conditions are met, an attacker can launch Denial-of-Service attacks against a vulnerable application by providing inputs that trigger the worst-case behavior. Such attacks have been known to have serious effects on production systems, take down entire websites, or lead to bypasses of Web Application Firewalls.\n Unfortunately, existing detection mechanisms for algorithmic complexity vulnerabilities are domain-specific and often require significant manual effort. In this paper, we design, implement, and evaluate SlowFuzz, a domain-independent framework for automatically finding algorithmic complexity vulnerabilities. SlowFuzz automatically finds inputs that trigger worst-case algorithmic behavior in the tested binary. SlowFuzz uses resource-usage-guided evolutionary search techniques to automatically find inputs that maximize computational resource utilization for a given application.\n We demonstrate that SlowFuzz successfully generates inputs that match the theoretical worst-case performance for several well-known algorithms. SlowFuzz was also able to generate a large number of inputs that trigger different algorithmic complexity vulnerabilities in real-world applications, including various zip parsers used in antivirus software, regular expression libraries used in Web Application Firewalls, as well as hash table implementations used in Web applications. In particular, SlowFuzz generated inputs that achieve 300-times slowdown in the decompression routine of the bzip utility, discovered regular expressions that exhibit matching times exponential in the input size, and also managed to automatically produce inputs that trigger a high number of collisions in PHP's default hashtable implementation.", "pdfUrls": [ "http://www1.cs.columbia.edu/~angelos/Papers/2017/ccs2017.pdf", "http://arxiv.org/abs/1708.08437", "https://arxiv.org/pdf/1708.08437v1.pdf", "http://doi.acm.org/10.1145/3133956.3134073", "http://www.cs.columbia.edu/~theofilos/files/slides/slowfuzz_ccs.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ac30950aaab6297d2a7cb2d7a55ba6b9b2a100f", "sources": [ "DBLP" ], "title": "SlowFuzz: Automated Domain-Independent Detection of Algorithmic Complexity Vulnerabilities", "venue": "CCS", "year": 2017 }, "1ac425def5f0de754c0a738cc8a528eaf9ab3381": { "authors": [ { "ids": [ "1696818" ], "name": "Venkatesan T. Chakaravarthy" }, { "ids": [ "14673425" ], "name": "Jee W. Choi" }, { "ids": [ "34348330" ], "name": "Douglas J. Joseph" }, { "ids": [ "1740904" ], "name": "Xing Liu" }, { "ids": [ "39229329" ], "name": "Prakash Murali" }, { "ids": [ "1787471" ], "name": "Yogish Sabharwal" }, { "ids": [ "2178450" ], "name": "Dheeraj Sreedhar" } ], "doi": "10.1109/IPDPS.2017.86", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.86", "entities": [ "Benchmark (computing)", "Design of experiments", "Heuristic", "Iterator", "Matrix multiplication", "Time complexity", "Tucker decomposition" ], "id": "1ac425def5f0de754c0a738cc8a528eaf9ab3381", "inCitations": [ "0f0bcf003e7de278514dff084487873762b9ffb3", "7c3c5b282948121244d330651e36b05f31c382cb" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1038-1047", "journalVolume": "", "outCitations": [ "048bfc88b9f54512304433bb2eeb68a3172159a8", "53132a1619b13215bcd791cd6b850ff154f4f837", "280bbaa66095fd6f89999003b802700935fdf77c", "1322c225b4e05dc22bbff7c5b9f5464f3cb7754b", "66479c2251088dae51c228341c26164f21250593", "608109b7643145d3559c962041c76207a58a3b57", "62dd02837c65b9c90de8d80c493f23ce1116cb3d", "41cef633b01c5cae5c9dde2ccc06ffc15b93fb8f", "07ed71b436b9adf23f0f93c8e4533461b82e769a", "8526f7d58b58294521636d4709a08272e6f1f3c8", "44ccdebc83766fb6a2016fa58c3c3a337356b79b", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "0072eb224991ada6fc8a4e2d3465e4a51c0b26bc" ], "paperAbstract": "The Tucker decomposition expresses a given tensor as the product of a small core tensor and a set of factor matrices. Our objective is to develop an efficient distributed implementation for the case of dense tensors. The implementation is based on the HOOI (Higher Order Orthogonal Iterator) procedure, wherein the tensor-times-matrix product forms the core routine. Prior work have proposed heuristics for reducing the computational load and communication volume incurred by the routine. We study the two metrics in a formal and systematic manner, and design strategies that are optimal under the two fundamental metrics. Our experimental evaluation on a large benchmark of tensors shows that the optimal strategies provide significant reduction in load and volume compared to prior heuristics, and provide up to 7x speed-up in the overall running time.", "pdfUrls": [ "https://arxiv.org/pdf/1707.05594v1.pdf", "http://arxiv.org/abs/1707.05594", "https://doi.org/10.1109/IPDPS.2017.86" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ac425def5f0de754c0a738cc8a528eaf9ab3381", "sources": [ "DBLP" ], "title": "On Optimizing Distributed Tucker Decomposition for Dense Tensors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1ac7667a185cdcbda194e9852c27dcc169fbbabf": { "authors": [ { "ids": [ "1685240" ], "name": "Edward Bortnikov" }, { "ids": [ "2829412" ], "name": "Eshcar Hillel" }, { "ids": [ "1777373" ], "name": "Idit Keidar" }, { "ids": [ "36805537" ], "name": "Ivan Kelly" }, { "ids": [ "39388165" ], "name": "Matthieu Morel" }, { "ids": [ "9753168" ], "name": "Sameer Paranjpye" }, { "ids": [ "2196178" ], "name": "Francisco Perez-Sorrosal" }, { "ids": [ "40076877" ], "name": "Ohad Shacham" } ], "doi": "", "doiUrl": "", "entities": [ "ACID", "Apache HBase", "Attribute\u2013value pair", "Data access", "High availability", "Key-value database", "Open-source software", "Transaction processing", "Transactions per second", "Value (ethics)" ], "id": "1ac7667a185cdcbda194e9852c27dcc169fbbabf", "inCitations": [], "journalName": "", "journalPages": "167-180", "journalVolume": "", "outCitations": [ "9748241beb02ef1e2d0e6dc877c04b354033a838", "0599ba259341963bf8abf2818c874713e570a039", "43fe3ad9ce1c3dbe4f905068ae2adc7bcb7fc9fb", "8d1c0ae7bbe138bc19abf66ca918f46b244b1f5d", "4827cc74dba0c39172554cf0116eb111797f0d1b", "1220e4a011c46804d4369b5580dc7fb6e387af54", "624cb175af600b7749bce00c0932e2a10f72e564", "57efc2b9ba2a725af1d66cc43c472d0314190051", "208b6e0a2492275c22f0320879b8ed037c08330f", "068e59b88a1230d709d99c83a45d3a5b91260810", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "24d565a61917967d0fcaf66cb4d8be9fff5a34fc", "665a0bcd3e35453702e655f86683417581517b4e", "039f09d49bc408db9e0e8429e6bd92be49c5f72e", "3a8c90ab13adb55e3610a020c69f03d72dfae274", "4b9c92dc611a6e0861d11240a036b39dd9dd4f7b", "18a5f443299784479e78d9e77f175af57cb2fa2b", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "7cd99fed3b3b701af32bfdad561b0900fb510b7c", "29a05cde1994548e2e9487822248c679626c6241", "4593ae644f04d76f582dedc4cc32d2acd33c9a93", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "517e239f97f50079bc557cccf1a6b56aa5736d30", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "07d847f310d5fa9138f461f0a25c5e0024f1c4af" ], "paperAbstract": "We present Omid \u2013 a transaction processing service that powers web-scale production systems at Yahoo. Omid provides ACID transaction semantics on top of traditional key-value storage; its implementation over Apache HBase is open sourced as part of Apache Incubator. Omid can serve hundreds of thousands of transactions per second on standard mid-range hardware, while incurring minimal impact on the speed of data access in the underlying key-value store. Additionally, as expected from always-on production services, Omid is highly available.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_shacham.pdf", "http://webee.technion.ac.il/people/idish/ftp/Omid.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-shacham.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_shacham.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/shacham", "https://www.usenix.org/system/files/conference/fast17/fast17-shacham.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ef73/d2a1433187418ce044ac81fa41452f728f29.pdf", "s2Url": "https://semanticscholar.org/paper/1ac7667a185cdcbda194e9852c27dcc169fbbabf", "sources": [ "DBLP" ], "title": "Omid, Reloaded: Scalable and Highly-Available Transaction Processing", "venue": "FAST", "year": 2017 }, "1ac8ee3baa34f793398057798930cee85730005f": { "authors": [ { "ids": [ "2574478" ], "name": "Mathias L\u00e9cuyer" }, { "ids": [ "2035772" ], "name": "Riley Spahn" }, { "ids": [ "1972091" ], "name": "Roxana Geambasu" }, { "ids": [ "1734755" ], "name": "Tzu-Kuo Huang" }, { "ids": [ "30721371" ], "name": "Siddhartha Sen" } ], "doi": "10.1109/SP.2017.60", "doiUrl": "https://doi.org/10.1109/SP.2017.60", "entities": [ "Big data", "Data hub", "Data store", "Information privacy", "Machine learning", "Monetization", "Personalization", "Scalability", "Selectivity (electronic)", "Working set" ], "id": "1ac8ee3baa34f793398057798930cee85730005f", "inCitations": [ "1dd6a6516cd01deca84add75daa98fa51b8032cd" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "78-95", "journalVolume": "", "outCitations": [ "3528c682cf344fed9088d8f8511b086dee93a572", "6b74ec27d76ae42c2faa9211e2640141595838b6", "041bb7570ddde1b9d699a99ff99ab916f4116abf", "2e8b9a7a085a8bc18783e76b776c6e780116efd8", "7a278ee0578f194700cadc3811cdda4ec751f88a", "577542a0017657546c9d6f5e7c983306c54f7662", "7ed04e8ca2217ce1906864a6a64e6153afbab132", "360d4003511682c5f5b5f82f6befdda88ca3fa73", "88ee45a393aa33cbd9ed2735272fe8bc08ffd260", "9771e382794af067f7360f1cac7b6d2a1e6dd1c4", "2b02f1b0a887d912bd3362472689ccff118faf1e", "5264ae4ea4411426ddd91dc780c2892c3ff933d3", "43afc11883fb147ac37b4dc40bf6e7fa5fccf341", "1ad0ffeb6e69a5bc09ffa53712888b84a3b9df95", "17fac85921a6538161b30665f55991f7c7e0f940", "33d23e5fa7106bce2f6bbb8efafccf7712a6c2b4", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "5d06b0b24f42fa7f48b01267458baf5e6e7e9fc0", "02d1105bec3877ed8cd2d28f76b67ae8ba3f2331", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "1026527f60f4df0c523dc4b4b07a06274f1f0517", "03b91a1e49805406f041ca3399be730729c62338", "0e9c50180730bfc455003cd6ed883b8020eedbb3", "cbc4aa545290536e2a10726ed7d7673226ca00d9", "6f54a7933235ced5684e3bff18f7e5dc40510018", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "4beef78e9b21611a59237b63d512014e47f32d5e", "55a6e8855b5f5d109e1e609d8ea1cfac0b703491", "0c9ffe6bfabf2c1cb013855d913b6089c4918966", "14d73480e38599a6997a37d871353e92c71e9503", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "046a1302079f56b94c81457bf7fd21c3417a9f72", "0b98d01ceccfb5781e2362783e2780a2c46c5b18", "049262f31708a51c1952eb3f7b301f5fc22907d0", "9c4b9a86c6beba0b88828e674ea809aee70641eb", "b532099ff8b67049f292cd62700dca37fc2be623", "5be36eb399ec5418a81fc10f062a7754db929727", "0154103e091dea574c39f3c89d52ccfefc06af6c", "e541c475457a731d7d434c4302867fc45af5876f", "0f7dcdd1736a29e02d7d33f0652547a24e6ebc3f", "374bf28854c23561976fc6a2b5abd4b9f7f117ba", "0f17c798597372ad819b4c87181b2bfd7ebf38bb", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "9aa88a8a354f1d322e242376d27d0474e50252f8", "040d9acab9003b9d50b2291cc6844b66b2a85d12" ], "paperAbstract": "Protecting vast quantities of data poses a daunting challenge for the growing number of organizations that collect, stockpile, and monetize it. The ability to distinguish data that is actually needed from data collected "just in case" would help these organizations to limit the latter's exposure to attack. A natural approach might be to monitor data use and retain only the working-set of in-use data in accessible storage, unused data can be evicted to a highly protected store. However, many of today's big data applications rely on machine learning (ML) workloads that are periodically retrained by accessing, and thus exposing to attack, the entire data store. Training set minimization methods, such as count featurization, are often used to limit the data needed to train ML workloads to improve performance or scalability. We present Pyramid, a limited-exposure data management system that builds upon count featurization to enhance data protection. As such, Pyramid uniquely introduces both the idea and proof-of-concept for leveraging training set minimization methods to instill rigor and selectivity into big data management. We integrated Pyramid into Spark Velox, a framework for ML-based targeting and personalization. We evaluate it on three applications and show that Pyramid approaches state-of-the-art models while training on less than 1% of the raw data.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.60", "http://mathias.lecuyer.me/assets/assets/oakland2017pyramid.pdf", "https://arxiv.org/pdf/1705.07512v1.pdf", "http://arxiv.org/abs/1705.07512" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ac8ee3baa34f793398057798930cee85730005f", "sources": [ "DBLP" ], "title": "Pyramid: Enhancing Selectivity in Big Data Protection with Count Featurization", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "1acdab652c6e07bd56404071bfd98e552b146cce": { "authors": [ { "ids": [ "2041657" ], "name": "Yi Cui" }, { "ids": [ "2225575" ], "name": "Di Xiao" }, { "ids": [ "34983248" ], "name": "Daren B. H. Cline" }, { "ids": [ "1737609" ], "name": "Dmitri Loguinov" } ], "doi": "10.1109/ICDM.2017.15", "doiUrl": "https://doi.org/10.1109/ICDM.2017.15", "entities": [ "Algorithm", "Big data", "Programming Computable Functions", "Random-access memory" ], "id": "1acdab652c6e07bd56404071bfd98e552b146cce", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "61-70", "journalVolume": "", "outCitations": [ "3674351c6ffe855af65d6003ad042551e7b49a38", "7a03e14cb1beecccbc030391703224d7beb62e94", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167", "00a57850e14320bb41d58696cc409151466b98b2", "6b6ae4ff053bcee2834b5e7718810cb5bc15c36c", "0d565ac2fcd83ad1753d576850aed03ebb3e35c0", "516f412a76911a13c9128aac827b52b27b98fad9", "1163b331215f934537ca6b78b8d77ceb1f0fc139", "7ce8d8a8f40b918acda0904eb3fb26369a105eed", "0f3fd2233b51ec5cbbb46451f1f76996d7493450", "5d3ad70f4f7817b73ecfef6065358df563dfab96", "0706356c9ab6014d6b04577d38289ea8328291a5", "0c5b579f824369e6367f7585c7dc12d8715bd10a", "10dac777afc83308fd10782bc2bc529469cb9ce9", "798d70db5b6e7fde1b0d51b34f6d03ad481addc4", "28c83441041f7defa682e2dae09655120fb6904e", "3c4194f25bda9d2ebdea8d91e8d7c13a5f8b485a", "110b55a017f52abfedca220036ea129d84b7cadc", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "8cca529e651867e5ac2a30ceca4e661ef0900ef7", "f655d41dcec06ab99c6377c6d426e486141caaa5", "8c671a8bb36514ea82d0fc782553b6a1adb1fb5f", "0371f9e3efbcd4829b5ffbff585155746ef05284", "4e0df13191a558fb619d3fcad1d7bd2c3668f844", "7805de482edfbec3a736bb6b3d1bb5163435752d", "159911284f2754b4158d990e70d54a2ce05267af", "1e67acd36a28ca4898e2e653f246638669730571", "1c15bbbac6dd19da1e3aa9fc361b12de2897cbd4", "de07e796c8d53c6ecc25b95565688aa9a9b5a213", "44b2dd390f32a6a77d4e2416351df0fa08a323c1", "6ea8894ef9edf31ed83e925a5650a0a8f0b79b76", "fd6399432895349f6d8cf88121dacb194d9fef82", "a3d6136ac17442b7da61b48a14044194131bf22d", "4af605a89bd4e52565b2c99c9fa290f39b7880f8" ], "paperAbstract": "In the age of big data, many graph algorithms are now required to operate in external memory and deliver performance that does not significantly degrade with the scale of the problem. One particular area that frequently deals with graphs larger than RAM is triangle listing, where the algorithms must carefully piece together edges from multiple partitions to detect cycles. In recent literature, two competing proposals (i.e., Pagh and PCF) have emerged; however, neither one is universally better than the other. Since little is known about the I/O cost of PCF or how these methods compare to each other, we undertake an investigation into the properties of these algorithms, model their I/O cost, understand their shortcomings, and shed light on the conditions under which each method defeats the other. This insight leads us to develop a novel framework we call Trigon that surpasses the I/O performance of both previous techniques in all graphs and under all RAM conditions.", "pdfUrls": [ "http://irl.cs.tamu.edu/people/yi/papers/icdm2017-tr.pdf", "http://irl.cs.tamu.edu/people/yi/papers/icdm2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.15", "http://irl.cse.tamu.edu/people/yi/papers/icdm2017-ppt.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1acdab652c6e07bd56404071bfd98e552b146cce", "sources": [ "DBLP" ], "title": "Improving I/O Complexity of Triangle Enumeration", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "1ad760a5c0a3803816202888181555198b8ec87c": { "authors": [ { "ids": [ "3408435" ], "name": "Xubin Tan" }, { "ids": [ "31846152" ], "name": "Jaume Bosch" }, { "ids": [ "2023320" ], "name": "Miquel Vidal" }, { "ids": [ "32341249" ], "name": "Carlos \u00c1lvarez" }, { "ids": [ "2048264" ], "name": "Daniel Jim\u00e9nez-Gonz\u00e1lez" }, { "ids": [ "1744495" ], "name": "Eduard Ayguad\u00e9" }, { "ids": [ "1741016" ], "name": "Mateo Valero" } ], "doi": "10.1109/IPDPS.2017.48", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.48", "entities": [ "ARM Cortex-A9", "ARM architecture", "Dataflow programming", "Deadlock", "Embedded system", "Field-programmable gate array", "Hardware acceleration", "Linux", "Linux", "Linux on embedded systems", "OpenMP", "Programming model", "Run time (program lifecycle phase)", "Scalability", "Speedup" ], "id": "1ad760a5c0a3803816202888181555198b8ec87c", "inCitations": [ "a17b7fa737461790dc17e033e84fa5f8344fe4e2" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "244-253", "journalVolume": "", "outCitations": [ "a6e8098671ccfc1147870db90e80360654cd92aa", "5f0a21670abe1634780d27f60228a83a6a8232ad", "87224645bdfb650d5e62a61ff1cbcf6fd5eaca10", "7d3c9eb93a673ba8fd543f857779091499cb01da", "5f2ff16892860418efd4946857b45d41712ece85", "637b73225d315c5c85d4b23b65db5215633ffc4f", "cfd34380711f505e58289a524e6d154dc44355a1", "2eadec89f39667a3bdb83d61b67e9e0aa2eb7ac4", "6fce4d4fef39e35ec6928286c5a5f1b8fda53718", "1abf2844268ef6fa9546bbe5dfcceb75415e3547", "0794a60523f9504ef9dee181659b6131b5c4afa5", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "14179ef4a46282957a1c142447ebef81bdf1e7f3", "68d3f1c60997c48854bafc8204e3b1c9716c302d" ], "paperAbstract": "Task-based programming models such as OpenMP, IntelTBB and OmpSs offer the possibility of expressing dependences among tasks to drive their execution at runtime. Managing these dependences introduces noticeable overheads when targeting fine-grained tasks, diminishing the potential speedups or even introducing performance losses. To overcome this drawback, we present a general purpose hardware accelerator, Picos++, to manage the inter-task dependences efficiently in both time and energy. Our design also includes a novel nested task support. To this end, a new hardware/software co-design is presented to overcome the fact that nested tasks with dependences could result in system deadlocks due to the limited amount of resources in hardware task dependence managers. In this paper we describe a detailed implementation of this design and evaluate a parallel task-based programming model using Picos++ in a Linux embedded system with two ARM Cortex-A9 and a FPGA. The scalability and energy consumption of the real system implemented have been studied and compared against a software runtime. Even in a system limited to 2 threads, using Picos++ results in more than 1.8x speedup and 40% of energy savings in the most demanding parallelizations of real benchmarks. As a matter of fact, a hardware task dependence manager should be able to achieve much higher speedup and provide more energy savings with more threads.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.48" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ad760a5c0a3803816202888181555198b8ec87c", "sources": [ "DBLP" ], "title": "General Purpose Task-Dependence Management Hardware for Task-Based Dataflow Programming Models", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1ad892a08a58a9eb504e4dc438fc8d245726cd90": { "authors": [ { "ids": [ "2610952" ], "name": "Christian Nieke" }, { "ids": [ "1720266" ], "name": "Wolf-Tilo Balke" } ], "doi": "10.1109/CLOUD.2017.32", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.32", "entities": [ "Benchmark (computing)", "Commodity computing", "Computational science", "Data science", "Downtime" ], "id": "1ad892a08a58a9eb504e4dc438fc8d245726cd90", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "188-195", "journalVolume": "", "outCitations": [ "210c93f24ca612de728de72a23daeaa6224285e7", "d73421f5d46e12a121942d42463b0133bbd7a433", "71ad31bd506ea571f6c04a293ff298f42fa7b47c", "9edc3150a1cdf9f827abc98fc6f82a9966bdd290", "2ad958cbff0d5d8880e6de20ec83f9364ba1407a", "233ac1d58ed190430c17301cc4d08a36d03eecbd", "2c8d291ea66848efc6db14378ebd06cbfacac10f", "7b420218b4e797dcc6ca96f6c6c3ec29a9688c07", "231b2da6a3d29f2632d3c1ad1d3ec3f2fb6737d3", "978008c232892b68bf0c5c7ed83586badf38d66c", "14a3a7534a52b0908939374aecd804b9632ad133", "0c34e00dcd7f15126110b9d430306157a0aae769", "40528e881a5a896466970650a7c8d7a41b2004ff", "8c09e3d1c562efce432befe5b5726d5899b0f4ff", "270428e66887d07a7f55c1a0507fe2197b3c2304", "4b4aa57a81270238221d53b770122fe5a2ac345b", "11e44206984ce4186fd4b6181a5d902056e50e64" ], "paperAbstract": "Providers of computing services such as data science clouds need to maintain large hardware infrastructures often with thousands of nodes. Using commodity hardware leads to heter-ogeneous setups that differ significantly in individual nodes' performance, which must be understood to allow for account-ing, strategic planning, and to identify problems and bottle-necks. Today's method of choice are active benchmarks, but they disturb normal operations and are too expensive to run continuously. They also struggle to be representative of an ever changing workload. We therefore design a passive benchmark-ing technique, which computes expressive and accurate perfor-mance metrics based on actual workloads. We prove the quality and performance benefits of our passive benchmark on a prac-tical workload in one of the world's largest scientific computing infrastructures, the CERN Computing Center. In fact, our ap-proach allows continuous benchmarking of the active system, while avoiding costs in terms of downtime and achieves predic-tion quality comparable to the state-of-the-art approach of active benchmarking.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ad892a08a58a9eb504e4dc438fc8d245726cd90", "sources": [ "DBLP" ], "title": "Monitoring Performance in Large Scale Computing Clouds with Passive Benchmarking", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "1ae7ba9910b07b1c38353615dd3fa668f4554c68": { "authors": [ { "ids": [ "2667024" ], "name": "Haoqiong Bian" }, { "ids": [ "30563822" ], "name": "Ying Yan" }, { "ids": [ "2893500" ], "name": "Wenbo Tao" }, { "ids": [ "40119206" ], "name": "Liang Jeff Chen" }, { "ids": [ "1743832" ], "name": "Yueguo Chen" }, { "ids": [ "1688063" ], "name": "Xiaoyong Du" }, { "ids": [ "1715172" ], "name": "Thomas Moscibroda" } ], "doi": "10.1145/3035918.3035930", "doiUrl": "https://doi.org/10.1145/3035918.3035930", "entities": [ "Algorithm", "Analysis of algorithms", "Apache Hadoop", "Column (database)", "Column-oriented DBMS", "Data deduplication", "Hard disk drive performance characteristics", "Input/output", "Memory-mapped I/O", "Real life", "Requirement" ], "id": "1ae7ba9910b07b1c38353615dd3fa668f4554c68", "inCitations": [ "18e93539fe6163a0b56f3427fc562733f89449a6" ], "journalName": "", "journalPages": "299-314", "journalVolume": "", "outCitations": [ "0b99db47b233e2ddb743a82c9a5cc755c8aedb84", "4ec54afc661d9fe0caebafb80a3de3482de3b545", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "8db5d8f4bf055bbe64ccfe29c5fd778ef24ade5b", "21854a5fb77a45f411865652a63663bb9ff3cde9", "40c2e4e3a0f9d2fef4e4c9bed8fce7b624fadac0", "495e4ab43ffc0e5c11919c6ec42e48a4ce651327", "78a44566e76c0d702524d8ff4a99d3f505e739e4", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "09c1b69ab0fe1315b0d5e5e0b0853585c4a319b5", "4c0afc69b07e3bb2daa32ca628ee491cb0338f80", "024157990c0257c454beae3915f83ce5b088d767", "229467e56c6093cb1f5927f8ffeddd51ac012934", "0a0c026b2b6c04baaf1fa2933d5998519bc9c5fa", "3e4af9e1e3e64be2ef79bbf63daf4ef640183719", "207def18c67fa8024741b7ae3cdc655b57f2053f", "4c7bfa933c11c7a802c2fa9c1dc475dba36a2bd5", "a087b0fcc9439faca5e3a68f8c9a2a47f5c95cc2", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "045d927280938686695693a1e265654969670f50", "21d3abf5a2ccbeb0aa58ab950784491ed59567d4", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "0109b8d4f75feed4ffbd4b5d555bac1e2d27815d", "03ff946dc6e4170ad4271b6e8ca26402ee957472", "9165824baf8b0dd3c4065cc1282f489185db95d8", "7459d07ca88484784a6c895aba0f8b019c971289", "1c27eafecd3d6f0008d74ffbe1e7c59a25869407", "1e557937f418accc13f9c5edb33a3d48259d80e5", "0b56f1f864a5949ab4ba06a6cb7ecc0c986b3f45", "9141bafcff1df2dbabf9a20671d2fa1bcb55aae5", "5046a718f92447642939f5c93414dc97225d726a", "0bc27c2354e6c86b0150662e45856dd4e446b2ed", "d7ac71ec88fc9e5a63f44b950e32d65eaf3b1c2f" ], "paperAbstract": "Modern data analytical tasks often witness very wide tables, from a few hundred columns to a few thousand. While it is commonly agreed that column stores are an appropriate data format for wide tables and analytical workloads, the physical order of columns has not been investigated. Column ordering plays a critical role in I/O performance, because in wide tables accessing the columns in a single horizontal partition may involve multiple disk seeks. An optimal column ordering will incur minimal cumulative disk seek costs for the set of queries applied to the data. In this paper, we aim to find such an optimal column layout to maximize I/O performance. Specifically, we study two problems for column stores on HDFS: column ordering and column duplication. Column ordering seeks an approximately optimal order of columns; column duplication complements column ordering in that some columns may be duplicated multiple times to reduce contention among the queries' diverse requirements on the column order. We consider an actual fine-grained cost model for column accesses and propose algorithms that take a query workload as input and output a column ordering strategy with or without storage redundancy that significantly improves the overall I/O performance. Experimental results over real-life data and production query workloads confirm the effectiveness of the proposed algorithms in diverse settings.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035930" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ae7ba9910b07b1c38353615dd3fa668f4554c68", "sources": [ "DBLP" ], "title": "Wide Table Layout Optimization based on Column Ordering and Duplication", "venue": "SIGMOD Conference", "year": 2017 }, "1af571b9517911a099d42fd0baeb57ec0e9d7c33": { "authors": [ { "ids": [ "36713497" ], "name": "Gregor Richards" }, { "ids": [ "10176050" ], "name": "Ellen Arteca" }, { "ids": [ "39905505" ], "name": "Alexi Turcotte" } ], "doi": "10.1145/3133879", "doiUrl": "https://doi.org/10.1145/3133879", "entities": [ "Compile time", "Gradual typing", "JavaScript", "Program optimization", "Programmer", "Run time (program lifecycle phase)", "Speculative execution", "Type system", "Typing", "Value (ethics)", "Virtual machine" ], "id": "1af571b9517911a099d42fd0baeb57ec0e9d7c33", "inCitations": [ "3c77e744c44291b05ea7634251cfd764f3f1d383" ], "journalName": "PACMPL", "journalPages": "55:1-55:27", "journalVolume": "1", "outCitations": [ "00f84f51e7a595efbcc1696dd0025171ac27baee", "12d73635fa7936e41bec5544a53f990ae18fdbc7", "26ac3ad840d8d773eec2ab7fc60d441b34c6adc5", "34743cbbe51e9706369bce3816f8a21b954ce3be", "0ff7e33a637f0a228501f8c29880e7e8d84a31e8", "45d7a1f16860716e93ac56192e49d41522a4facc", "18fd4a392089c543ecff2cadc60c13a7d21b0efc", "1469b0cbb109c2a788a346dd0480070de8334dea", "713baa5c3ff9b3f2979889dc90430d73a680942e", "1b4df92d7f0d9393103cafbdbc512c52a90296b8", "4f67ca2e3a937df7305ef85f7ca487dda4a0052c", "9906d3bbb3061954c38914cebeccae6a80ef9c42", "0014188b4abf19cf34f6b4b2769528e856cd93c6", "0d281938d3ff2377541704cab6ba1c4408420733", "5f4599513bc71e6c8ef48408bc4e27afb4e76806", "554558b662909b628292e56f016549eaeacd2cc8", "d963fb7cd968666170361a4485df48c807bb85d0", "4e7c51bc9cdd81655912b0947a628c5d7f8c14ff", "073540ca0aaf15c28f9571707dd846b746247d7b", "313819eb2c191c185fb5d81d218d31dc54545680", "99e4d7f26140f2b31b440882e1684600a62b042c", "53e2b31ad6fea91655ecbe64fe66968b934d0160" ], "paperAbstract": "Programmers in dynamic languages wishing to constrain and understand the behavior of their programs may turn to gradually-typed languages, which allow types to be specified optionally and check values at the boundary between dynamic and static code. Unfortunately, the performance cost of these run-time checks can be severe, slowing down execution by at least 10x when checks are present. Modern virtual machines (VMs) for dynamic languages use speculative techniques to improve performance: If a particular value was seen once, it is likely that similar values will be seen in the future. They combine optimization-relevant properties of values into cacheable “shapes”, then use a single shape check to subsume checks for each property. Values with the same memory layout or the same field types have the same shape. This greatly reduces the amount of type checking that needs to be performed at run-time to execute dynamic code. While very valuable to the VM’s optimization, these checks do little to benefit the programmer aside from improving performance. We present in this paper a design for intrinsic object contracts, which makes the obligations of gradually-typed languages’ type checks an intrinsic part of object shapes, and thus can subsume run-time type checks into existing shape checks, eliminating redundant checks entirely. With an implementation on a VM for JavaScript used as a target for SafeTypeScript’s soundness guarantees, we demonstrate slowdown averaging 7% in fully-typed code relative to unchecked code, and no more than 45% in pessimal configurations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133879" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1af571b9517911a099d42fd0baeb57ec0e9d7c33", "sources": [ "DBLP" ], "title": "The VM already knew that: leveraging compile-time knowledge to optimize gradual typing", "venue": "PACMPL", "year": 2017 }, "1b1dda022e899b2d922adf330c96a8c9f7ad2abe": { "authors": [ { "ids": [ "2984487" ], "name": "Shin-Yeh Tsai" }, { "ids": [ "2290416" ], "name": "Yiying Zhang" } ], "doi": "10.1145/3132747.3132762", "doiUrl": "https://doi.org/10.1145/3132747.3132762", "entities": [ "Central processing unit", "Data center", "Distributed shared memory", "High- and low-level", "High-throughput computing", "Indirection", "Kernel (operating system)", "Linux", "MapReduce", "Remote direct memory access", "Scalability", "Throughput" ], "id": "1b1dda022e899b2d922adf330c96a8c9f7ad2abe", "inCitations": [ "7206aead5a341f361e6571d607f3c032e65e2f7e" ], "journalName": "", "journalPages": "306-324", "journalVolume": "", "outCitations": [ "21474d50689bb4b4af6399c4bae2cb612f382713", "034b6fd2064e591fae65483a8b35e35e1f42bd45", "a00d9aacfa1ec50ccdab3fc431f3fae01ee0b7e4", "60ddf74dd5b443c3bfb59fe876b42f9d6112c4fb", "1220e4a011c46804d4369b5580dc7fb6e387af54", "225603198cc415d363db8a8a2bd30b0df3c963b1", "7932a4597cec5149c575aa2303fe8f12241e4320", "024e39f4185e48a0a692663a0f26dc323de47fed", "eb82d3035849cd23578096462ba419b53198a556", "6d38e49cf1f121712f19805ec779905bc9507e58", "2a59eb5eacb88eb893a31fc8bdee2c4385e22d7a", "0d3f85933b6355789588476e491683532c68a906", "cbf02684c23380fb61b8a9ba0be1bb3373aa4931", "52eda0d59de5e944fe108395f9ba69a7e7584619", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "18a1236e7bbc9f8d7caca9ff51056aee38701ee3", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "8c270f4a97b4aac0ab0b3cf48014f099b16bf8a2", "daf0cd0076b388712ea12ec4105572997fc50cdf", "7206aead5a341f361e6571d607f3c032e65e2f7e", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "205cf007cf77bbf81e55b74635017087585f7b7c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "25a973aa67a796233c2b988eae3ae02645216e8f", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "0270c2056eb50b5d4597afa722c50abf21e67a82", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "2fcfd74636e564467766fd4bf344efa1f277fcfa", "3c99a311b2c9e50accbe2253251cca4e60bcd23a", "03416be8097852a54dd3e309434e5a0806824646", "43f0c099d44a68783a773f91cd03098a5252bf98", "0fcd76125f6f3ae75ae1e10785b7b58659136f6f", "122eda0ff026311fbb6e8d589262da0b9821c19f", "5dc4ac5ac578fae726adcc5776d2a277f09dd9b5", "160f7b7091e939365d22e21fb02db404635f7759", "c7d6ee693eb72e274aa8702ea579902996e4f3d5", "14390fd81841cc4bb3d3764042481fc0a0e89e7b", "514a5c15e8cf3f681febecad954a4508d9189c99", "3f8948a91ecb86e1dce17c876684d2e4b55c7569", "7129b305ce45f83127e928e8510da9fae0783905", "793f5e737284925a176f8ec82b3bb0d2178bb330", "0541d5338adc48276b3b8cd3a141d799e2d40150", "20960a5cec02eccebd7a14273c5521074f6fcb80", "12db88fcf7cfee093c64e4e7737458e694a38181", "01815b1f48f8cdd4e78260deaddf4bfe7af26f60", "0706356c9ab6014d6b04577d38289ea8328291a5", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "59f043d927b7effc02e351e86e027fd2c997851e", "2510fa746a2ac5a7af009eee14a922958c9e1f2a", "9c52b0297b54b6207bbb9ba70921a270ddc8f405", "1b6f6168fa67ec9141ac1494a7d0f28995d51d3b", "10fede77f843e9eb5ef1768a17543013616d9243", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "29a1148d75878671dc3663bf480e33d7bd91597d", "0ad8e89091eed09217e66adc98136126addc2619", "029e03cd045b1fcda76e4c469eedfa0470c79624", "2233d39a189ab2bdf8ad370e61bea79c5c3192dc" ], "paperAbstract": "Recently, there is an increasing interest in building data-center applications with RDMA because of its low-latency, high-throughput, and low-CPU-utilization benefits. However, RDMA is not readily suitable for datacenter applications. It lacks a flexible, high-level abstraction; its performance does not scale; and it does not provide resource sharing or flexible protection. Because of these issues, it is difficult to build RDMA-based applications and to exploit RDMA's performance benefits.\n To solve these issues, we built LITE, a Local Indirection TiEr for RDMA in the Linux kernel that virtualizes native RDMA into a flexible, high-level, easy-to-use abstraction and allows applications to safely share resources. Despite the widely-held belief that kernel bypassing is essential to RDMA's low-latency performance, we show that using a kernel-level indirection can achieve both flexibility and low-latency, scalable performance at the same time. To demonstrate the benefits of LITE, we developed several popular datacenter applications on LITE, including a graph engine, a MapReduce system, a Distributed Shared Memory system, and a distributed atomic logging system. These systems are easy to build and deliver good performance. For example, our implementation of PowerGraph uses only 20 lines of LITE code, while outperforming PowerGraph by 3.5x to 5.6x.", "pdfUrls": [ "https://engineering.purdue.edu/~yiying/LITE-sosp17.pdf", "https://www.sigops.org/sosp/sosp17/slides/lite-sosp17-slides.pdf", "http://doi.acm.org/10.1145/3132747.3132762" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b1dda022e899b2d922adf330c96a8c9f7ad2abe", "sources": [ "DBLP" ], "title": "LITE Kernel RDMA Support for Datacenter Applications", "venue": "SOSP", "year": 2017 }, "1b243f086e5a7879310d3abd8a8bd651d34fd85f": { "authors": [ { "ids": [ "2527556" ], "name": "Ian En-Hsu Yen" }, { "ids": [ "2045821" ], "name": "Xiangru Huang" }, { "ids": [ "1727493" ], "name": "Wei Dai" }, { "ids": [ "2302443" ], "name": "Pradeep Ravikumar" }, { "ids": [ "1783667" ], "name": "Inderjit S. Dhillon" }, { "ids": [ "1752601" ], "name": "Eric P. Xing" } ], "doi": "10.1145/3097983.3098083", "doiUrl": "https://doi.org/10.1145/3097983.3098083", "entities": [ "Algorithm", "Benchmark (computing)", "Loss function", "Multi-label classification", "Multi-objective optimization", "Parallel computing", "Scalability", "Sparse matrix", "Tree structure" ], "id": "1b243f086e5a7879310d3abd8a8bd651d34fd85f", "inCitations": [ "0f6401093264806c0c45f719ce8b2ff0ee1b3efb", "638cbaa222eab1c3ac51fab11cf20ed6a5c3ebbc", "5c9cbb03d65d721d97abe26cfb9d7299b7e81346", "29e2d13358bacdb384c59c4aca15a9ea7b8b4685", "29d1790c665f3c48af99888d4b8e339202e25aa2" ], "journalName": "", "journalPages": "545-553", "journalVolume": "", "outCitations": [ "f88d1533a41199f29eae764595a4d9b9bcf521c6", "0389a414c5d0ef50e06fe0c15f6102f374ce1b04", "19ebc66d741950012dc659b6ac9089dd66c1676f", "2061a689341d7562ccd81e630038ec6fa4f310ac", "071769b25a2d3a2882809d928c3c644b5dd08e73", "1ae3915647d701f155b5a92a5dfab2d9b274277c", "0484f8a4ebe0af173f5c42e16db772321deb11b2", "2b3113b7fda6414548e88fc664f3be96d5209830", "245f3aa8423563b78367f7726399c8fa1841d7bc", "1211d3c950e2d1cc983d7a37fff1ea5062d54284", "47b7e7875ec87c26d56552b05f31a2b649ee1d16", "56436d67863a81fd52f670b7c9d77e8c6526a4c4", "2305715410186e78dc5720f4c0e097616eec8921", "46217f372a75dddc2254fdbc6b9418ba3554e453", "0ce46b5c8db8720582373ffa36fde3e40f4037ec", "6bfdcd1e6cb8b93cd571b98514990bb91fe7a4ea", "3ec234373af61716d2bb291be74f8327847d34b1", "5a26ec6568152731ce1667a426307ebccff5a50e", "d66ee4f4f46d24344ec1bec7624c56a2878d8db2", "43cce0f589896051adb56635dca9a803b67f73ad", "549554a6c16e8510598a7e3f3873df4571617942", "24c9b0b05c5e957e255b854f947472f9181772a4", "25642be46de0f2e74e0da81a14646f8bfcc9000a", "825315415eba86846605512c31d8adaf173e6f8d", "0cf6fe9e975a5496e9edd53818ae5c18a2a7e66b", "b14875d1e1850121d8720c39f853af5f455ecc44", "43014e1167790e42366042386e7fb9e052a6a27d", "3a982594d902ef89257cebedc7098a87afdc161d", "4e171856b5eac3a2bf7ebc1c243d9937b55a09bc", "bb2b45a0e650ca87590cfa3df93066eecf4e54f6", "3efcb97c1de1c87832a7a1d99e91801992a938ec" ], "paperAbstract": "Extreme Classification comprises multi-class or multi-label prediction where there is a large number of classes, and is increasingly relevant to many real-world applications such as text and image tagging. In this setting, standard classification methods, with complexity linear in the number of classes, become intractable, while enforcing structural constraints among classes (such as low-rank or tree-structure) to reduce complexity often sacrifices accuracy for efficiency. The recent PD-Sparse method addresses this via an algorithm that is sub-linear in the number of variables, by exploiting primal-dual sparsity inherent in a specific loss function, namely the max-margin loss. In this work, we extend PD-Sparse to be efficiently parallelized in large-scale distributed settings. By introducing separable loss functions, we can scale out the training, with network communication and space efficiency comparable to those in one-versus-all approaches while maintaining an overall complexity sub-linear in the number of classes. On several large-scale benchmarks our proposed method achieves accuracy competitive to the state-of-the-art while reducing the training time from days to tens of minutes compared with existing parallel or sparse methods on a cluster of 100 cores.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098083", "http://www.cs.utexas.edu/~xrhuang/publications/PPDSparse.pdf", "http://www.cs.cmu.edu/~eyan/publication/PPDSparse_Poster_KDD.pdf", "http://www.cs.utexas.edu/~inderjit/public_papers/ppdsparse_kdd17.pdf", "https://www.cs.cmu.edu/~eyan/publication/ParallelPDSparse.pdf", "http://www.cs.cmu.edu/~pradeepr/paperz/ppdsparse.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b243f086e5a7879310d3abd8a8bd651d34fd85f", "sources": [ "DBLP" ], "title": "PPDsparse: A Parallel Primal-Dual Sparse Method for Extreme Classification", "venue": "KDD", "year": 2017 }, "1b28c0597b150d0ee43650bab6cb2128734b4e55": { "authors": [ { "ids": [ "2896218" ], "name": "Xiufeng Xie" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" } ], "doi": "10.1145/3143361.3143381", "doiUrl": "https://doi.org/10.1145/3143361.3143381", "entities": [ "Adaptive compression", "Algorithm", "Data compression", "Mobile device", "Network congestion", "Region of interest", "Responsiveness", "Streaming media", "User interface", "Videotelephony" ], "id": "1b28c0597b150d0ee43650bab6cb2128734b4e55", "inCitations": [], "journalName": "", "journalPages": "336-349", "journalVolume": "", "outCitations": [ "206fc8c3c2277402cacedb9581014cfaf6aca084", "243bce677d84b403b43f0d7a2613f09fb0169719", "9615ef6deaad93d0d0455a22c92a65e3274cd1a9", "a3b84fc01369f4f05d411a695ad236b9025449e3", "456a5ae2e7a58b3ccbd50ae9139c235ae1e9a5a7", "ab4b31afdf2fb0900ed780f850d74d65a1598e6e", "1f79775b58072a2ab484aad798aec0c9c7fa8605", "1f19e20d4fbf0ec8bb6fe94136f90e50b4715ad1", "d61b9b499c7e371edf7f8bb45fe7934e7d60ba2d", "56893647902b4ab971fd092ce78687675b6942a7", "05eab78d9697a4e2822cd8877a597345d216b14e", "1547d8c905a838da18564f5c12fc28ec4786b8d0", "642664fba20d43cb9685916ad474f79c8ddf03bd", "33478e2011b4521e725d0f2c4a24085ed72a8011", "5b4bab41421f0bf02ff36b035065dd922e6bfa82", "5bb40974a9742b590401ea68ad22e6d595dbedf4", "558c3d8d3971afe202b53cdde6c8bec74db69b9f", "7bd19f37bd85824d52ecdd9a4141c841508dcb24", "5bf660501e1c3fbc933ac490eef07275e328fe3f", "e7bb09876ce8eb2eb401e4c06cf2344633261cbd", "be98ee43b95025abd2f04cba56865577d3e8e57d", "c5085a39b3aaec33444d26c23b20861f8f974910", "2f85f20a076cb91dcdf4b3e5b16886ee9b6b3543", "13fdd67a7b4dfe7ef73060cf2e63e15d00f61149", "549a34558573df25364b22be3ec5bea5c184dd51", "c5b19b801355c668a712ba5760c3267831c9804c", "88b29800f8f84890a39add60d3d782fa6e37fbaf", "4335d05354fd5c5179ea59de58946441f9f1b36b", "d3e2d9772228bbbd1c36d8997870b4a0dc2ec01a", "73e5cc87f4d7487c3ba58ec2e55ea52ea1025b0a" ], "paperAbstract": "Panoramic or 360° video streaming has been supported by a wide range of content providers and mobile devices. Yet existing work primarily focused on streaming on-demand 360° videos stored on servers. In this paper, we examine a more challenging problem: Can we stream real-time interactive 360° videos across existing LTE cellular networks, so as to trigger new applications such as ubiquitous 360° video chat and panoramic outdoor experience sharing? To explore the feasibility and challenges underlying this vision, we design POI360, a portable interactive 360° video telephony system that jointly investigates both panoramic video compression and responsive video stream rate control. For the challenge that the legacy spatial compression algorithms for 360° video suffer from severe quality fluctuations as the user changes her region-of-interest (ROI), we design an adaptive compression scheme, which dynamically adjusts the compression strategy to stabilize the video quality within ROI under various user input and network condition. In addition, to meet the responsiveness requirement of panoramic video telephony, we leverage the diagnostic statistics on commodity phones to promptly detect cellular link congestion, hence significantly boosting the rate control responsiveness. Extensive field tests for our real-time POI360 prototype validate its effectiveness in enabling panoramic video telephony over the highly dynamic cellular networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143381", "http://xyzhang.ucsd.edu/papers/XXie_CoNEXT17_POI360.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b28c0597b150d0ee43650bab6cb2128734b4e55", "sources": [ "DBLP" ], "title": "POI360: Panoramic Mobile Video Telephony over LTE Cellular Networks", "venue": "CoNEXT", "year": 2017 }, "1b3012864114e9a6d35de1c49591739f8512a669": { "authors": [ { "ids": [ "3422391" ], "name": "Najmeh Miramirkhani" }, { "ids": [ "19324496" ], "name": "Mahathi Priya Appini" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" }, { "ids": [ "1782812" ], "name": "Michalis Polychronakis" } ], "doi": "10.1109/SP.2017.42", "doiUrl": "https://doi.org/10.1109/SP.2017.42", "entities": [ "Antivirus software", "BIOS", "Bare machine", "Decision tree", "Emulator", "Evasion (network security)", "Fidelity of quantum states", "Malware", "Malware analysis", "Mobile app", "Statistical model", "Threat (computer)", "Virtual machine" ], "id": "1b3012864114e9a6d35de1c49591739f8512a669", "inCitations": [ "34fc2fc898d121fe07271dec7829804dd59044ec", "6934305a246cc0b5776dcdea2030584eb7a0f274", "5db680f254053a515f0dd1eb811fd36746fc6fa9", "3dcc7e006239d2be52945846660d9408bb89bee2" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "1009-1024", "journalVolume": "", "outCitations": [ "56c478351657e0ec0106520c8cca82f93991ad61", "0eff95f6fd369d8f479b895c9e5e5c609d46efa1", "5626647d1718dfaf9c3d604e2e99b43b64813f92", "6fe297f65b9acf8080421bdaaec65321d72e38c8", "ccaa4e57c5bb4be7c981ae7dace179db7a94a94a", "02eabff9cab5cb8cea696b35bbd4888e0a52057c", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "07fe0ddb6cef7ed8946d416c093452c1d0db0c34", "be6b283871ec6df396ff00bc2d844a9e4c056000", "1b90ee5c846aafe7feb38b439a3e8fa212757899", "011ab98701b398a68f54d435973301d299e5da64", "4bf69965b44af4dc4c7a665b64a4c1087b9ef668", "7a2254e0b0a3d72af03ea85cc6e74671e1838791", "151a94ddc1e26c94697eeb458d7a8849d8bf1522", "6b2ef620ca9363a4b996693c649fddf3c97a91c3", "11efa6998c2cfd3de59cf0ec0321a9e17418915d", "97cdcc50199a9c1f7f47deffee3fe869fd968220", "a72d98d5a478efa62383a63862fc07dba831c8a5", "93e390c7dd2f979fbd63e4c46977b791d92c6f41", "0238bfd6a96479a45715423abaaedaa78a2e8b8a", "09f4aa3a4483c3f790a58d5c987f03d49715788b", "2354c65ebd8bf1576c84b40d53585303fe2f1d75", "9e30324a423e051c6c19efc0b7fae82626becdc2", "35060a869ea38caf58426ea0c6aa65e59021f12a", "1e102df57ec826f0afee0dda578551e3da3b7289", "023555bdd427c20fb77a795c3a15e77ca885142d", "463dec0105456132f921f3075081e1fb824fb784", "862d2b3316f699e746931732f768b8a5bbfedbf7", "1917b60a4eaa731eed19e38f7981c36c9bcc61f5", "173f9ebbc0be6b591dfa72111aa78d3568e2db87", "abcf2ee479096377aead4940b447ced85dd8faeb" ], "paperAbstract": "Malware sandboxes, widely used by antivirus companies, mobile application marketplaces, threat detection appliances, and security researchers, face the challenge of environment-aware malware that alters its behavior once it detects that it is being executed on an analysis environment. Recent efforts attempt to deal with this problem mostly by ensuring that well-known properties of analysis environments are replaced with realistic values, and that any instrumentation artifacts remain hidden. For sandboxes implemented using virtual machines, this can be achieved by scrubbing vendor-specific drivers, processes, BIOS versions, and other VM-revealing indicators, while more sophisticated sandboxes move away from emulation-based and virtualization-based systems towards bare-metal hosts. We observe that as the fidelity and transparency of dynamic malware analysis systems improves, malware authors can resort to other system characteristics that are indicative of artificial environments. We present a novel class of sandbox evasion techniques that exploit the "wear and tear" that inevitably occurs on real systems as a result of normal use. By moving beyond how realistic a system looks like, to how realistic its past use looks like, malware can effectively evade even sandboxes that do not expose any instrumentation indicators, including bare-metal systems. We investigate the feasibility of this evasion strategy by conducting a large-scale study of wear-and-tear artifacts collected from real user devices and publicly available malware analysis services. The results of our evaluation are alarming: using simple decision trees derived from the analyzed data, malware can determine that a system is an artificial environment and not a real user device with an accuracy of 92.86%. As a step towards defending against wear-and-tear malware evasion, we develop statistical models that capture a system's age and degree of use, which can be used to aid sandbox operators in creating system images that exhibit a realistic wear-and-tear state.", "pdfUrls": [ "https://csaw.engineering.nyu.edu/application/files/9115/0825/7246/CSAW17_paper_8.pdf", "http://www3.cs.stonybrook.edu/~mikepo/papers/wearntear.sp17.pdf", "https://doi.org/10.1109/SP.2017.42" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b3012864114e9a6d35de1c49591739f8512a669", "sources": [ "DBLP" ], "title": "Spotless Sandboxes: Evading Malware Analysis Systems Using Wear-and-Tear Artifacts", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "1b5826083df7cc82a879b9072a55e448480aa6da": { "authors": [ { "ids": [ "24628212" ], "name": "Harshvardhan Das" }, { "ids": [ "6138843" ], "name": "Subodh Kumar" } ], "doi": "10.1109/ICPP.2017.65", "doiUrl": "https://doi.org/10.1109/ICPP.2017.65", "entities": [ "Algorithm", "Computation", "Graph partition", "Heuristic", "Parallel algorithm", "Partition problem", "Recursion", "Subroutine" ], "id": "1b5826083df7cc82a879b9072a55e448480aa6da", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "563-570", "journalVolume": "", "outCitations": [ "34398c1f6b0113c1e3b0e645be6aa93e576584fb", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "498ae20d5f8566c14ff378a80d0c391fc4006643", "acd10c290e7f6d83f0a808617e2a256b3ba168f1", "2b4ba58795f865c8f68fc18bccec6725e467a1d4", "87ea76767d9c0a6ee3b68c2d2dafa01ce5db3d4f", "7a5ace4f5f54702e8f6ad4e9d9b3a50657587d7b", "29ba261eed773065537297c0f0a10e6f31c4f209", "01e9cc3ac7805e043e1effac588cd5dab9d1480d", "fc801b6119fe0fe3fa3f7e6f79d439949224f7d0", "4ffb3ce035d378fa5ccb7f1ee77e9de689cb78a7", "8aacae19ccaddd7d8221a4c65806ed5de599dbd2", "f9e7891b1b8f584f5fb59f006c79145836638e67", "21fa4944b6cb4af47d0ef1baca2e3ca37628192c", "48733830145625430cc940e0ed3d77c6ddc04d62" ], "paperAbstract": "We propose a heuristic for parallel partitioning of graphs into equi-sized components. In particular, we identify a relationship between the graph partitioning problem (GPP) and the traveling saleman problem (TSP), and use that to reduce partitioning to TSP. Given that better performing heuristics are known for TSP than are for GPP, this reduction also leads to improved GPP heuristics. What is more, a good GPP solution can also be used to speed up computation of TSP.We first derive a good bi-partition from a cut of the TSP cycle in time proportional to the number of edges in the graph. We then continue this bi-partitioning recursively until the required number of partitions are left. Further, in order to speed up the computation of TSP, which we use as a subroutine, we perform an initial rough partitioning of the graph into K parts, compute TSP tours in each of these smaller partitions and then merge these local tours to solve the full TSP.We then use this full TSP solution to obtain the final partitioning in parallel. Our empirical analysis shows that for partition count k ≽ 32, our parallel algorithm gives a cut better in size than that of algorithms known for low cut-size (e.g., KaBaPE), and when time is of concern, it finishes in significantly less time with comparable cuts. We also show that our algorithm gives much smaller cuts in comparable time than those known for fast computation (e.g., PT-Scotch).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b5826083df7cc82a879b9072a55e448480aa6da", "sources": [ "DBLP" ], "title": "A Parallel TSP-Based Algorithm for Balanced Graph Partitioning", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "1b641441ded82bacf84cfb27986de81572ea5232": { "authors": [ { "ids": [ "3452475" ], "name": "Jiawei Jiang" }, { "ids": [ "1750622" ], "name": "Bin Cui" }, { "ids": [ "1776014" ], "name": "Ce Zhang" }, { "ids": [ "2697681" ], "name": "Lele Yu" } ], "doi": "10.1145/3035918.3035933", "doiUrl": "https://doi.org/10.1145/3035918.3035933", "entities": [ "Algorithm", "Gradient", "Gradient descent", "Iteration", "Machine learning", "SPARK", "Stochastic gradient descent", "Technological convergence", "TensorFlow" ], "id": "1b641441ded82bacf84cfb27986de81572ea5232", "inCitations": [ "48bbf730ec09dec82a7f86df3c200724ef2467fa", "0d561187be02ccf7905c0d2376796b5814e96a6c", "4571230343fef61fd2ebeee8ab9704b6bd0752e9", "f37f387ddba906fba0ae81d1b323b08cd4e1fe59", "26b827cd12b0ff1c52dbf6bea1c2286bf1788d29", "01ccc5b1487ad86632050e1c4cd546c6e140824f", "48231ac69e8d17ce08a2868b27d1a9b08f99be83", "a70ba22645eba9891e8cac8d08e36cc3d09e242b", "4461768c4453d056a602ae60e5d86e5f105c519d", "bd6cfcb0f3bd98d9fb9ebcb93c9389771563d96b", "d0556be65e8564ab8bb3e26b6a0146a62027bc40" ], "journalName": "", "journalPages": "463-478", "journalVolume": "", "outCitations": [ "70967767f355c34869029217bbaa0b2c32a193ec", "0144941d255dad89d3d90c2d131a15cc01df9829", "0122e063ca5f0f9fb9d144d44d41421503252010", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "45619a2b7b41fea02345badf880530519d3d4c8f", "11930afbab8b7629a45d8a717e7a924bc19ff4af", "d26d2f73f1f0a1a2e0b606a581567c29fa90bdda", "9cea29601e72fd8e6ef8419aa31ddc103eceb7f8", "1e557937f418accc13f9c5edb33a3d48259d80e5", "49503ab6a1f0c86a33c599adebe0e10e69b48c3a", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "121d9c737a887f7f6a03ffbc5be87b26bdad17f8", "3c029e72f5c75c8dd87a6acd43d05f23407e39cf", "2b3113b7fda6414548e88fc664f3be96d5209830", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "89fe2dc8fbea1876ad351ac413d1af9c4878b45c", "31f27864950a6c417cf996927b2d5558f70d2b14", "4baac77b6242eb4c7bdbc62720f9c26e6dd044f6", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "395d819e9df1f9dc92a6bb871d055a39ece74ba8", "34b8809c214db18544ce93674bf85fce0e8b3330", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "006b89abc356c1c3bf2dfa35f47c0601c39dce38", "043afbd936c95d0e33c4a391365893bd4102f1a7", "4c4f01846be3ecc79d030d577b9933ea64c4bf4f", "0790c77c1eaf2368b55c6a0def09a43690eeb848", "0558c94a094158ecd64f0d5014d3d9668054fb97", "419aebb6462f5b2021adb62385be1d778fcc685f", "0546fa6622b8b8db8527be777a692d88c5c037b0", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "ec941934e8389addb8620f911de9c83825fac9be", "347920406c9a9a3846adf485e2b864d4523a0652", "687c203c9d66f0b870fa4a48d02171c72824368d", "8a7e4164d954eb55617362dcb18ca1359b4b753b", "395ad865b5ab99e552f631fa713579c8fb6962e6", "04c73e4f13a19a2ce270a0aa391bd7842aa113ae", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "391a5f286f814d852dddcab1b2b68e5c1af6c79e", "44d4e4111ee7ba147c400a548d25108e92a4662e", "1156f60e40548096df49528b1342bb3e88b0f378", "b293405e9b3cfac8c58083b38bdc85d18dd0c187", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "04785a6c48ceeeaa0413c42cad206583868b863f", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "a058935fd019c2367fd32c16cd1ce6983a29aafb", "7717b438da4ec3ca4247ff7abf6dd603e91fe41d" ], "paperAbstract": "We study distributed machine learning in heterogeneous environments in this work. We first conduct a systematic study of existing systems running distributed stochastic gradient descent; we find that, although these systems work well in homogeneous environments, they can suffer performance degradation, sometimes up to 10x, in heterogeneous environments where stragglers are common because their synchronization protocols cannot fit a heterogeneous setting. Our first contribution is a heterogeneity-aware algorithm that uses a constant learning rate schedule for updates before adding them to the global parameter. This allows us to suppress stragglers' harm on robust convergence. As a further improvement, our second contribution is a more sophisticated learning rate schedule that takes into consideration the delayed information of each update. We theoretically prove the valid convergence of both approaches and implement a prototype system in the production cluster of our industrial partner Tencent Inc. We validate the performance of this prototype using a range of machine-learning workloads. Our prototype is 2-12x faster than other state-of-the-art systems, such as Spark, Petuum, and TensorFlow; and our proposed algorithm takes up to 6x fewer iterations to converge.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035933", "http://net.pku.edu.cn/~cuibin/Papers/2017%20sigmod.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b641441ded82bacf84cfb27986de81572ea5232", "sources": [ "DBLP" ], "title": "Heterogeneity-aware Distributed Parameter Servers", "venue": "SIGMOD Conference", "year": 2017 }, "1b81fe43e208f7f05ee4438b4668f1dd9005e375": { "authors": [ { "ids": [ "1720841" ], "name": "Xiaodong Wang" }, { "ids": [ "1752664" ], "name": "Shuang Chen" }, { "ids": [ "5291030" ], "name": "Jeff Setter" }, { "ids": [ "40611011" ], "name": "Jos\u00e9 F. Mart\u00ednez" } ], "doi": "10.1109/HPCA.2017.65", "doiUrl": "https://doi.org/10.1109/HPCA.2017.65", "entities": [ "CPU cache", "Cache coloring", "Central processing unit", "Graph coloring", "Linux", "Linux", "Manycore processor", "Multi-core processor", "Operating system", "Scalability", "Server (computing)" ], "id": "1b81fe43e208f7f05ee4438b4668f1dd9005e375", "inCitations": [ "a11e842fdf25256a2ded132db0af76b49fdf6e73", "dae0a4ef50b347f145ed6de8f6c7fb94d350f937" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "121-132", "journalVolume": "", "outCitations": [ "3000e77ed7282d9fb27216f3e862a3769119d89e", "68073f621072d793e95b9562bf9a9245415d5a96", "2fae39f82be19b311d743d3525badd777228e3ed", "3c5acb5166390abad4813942d67d53027c61e2e6", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "3efab8db0d6e024bb9cfca5f6bd0cb63a8d9f162", "42c29d08d9a7bcf5b481300cb06974d6067de5be", "2beb38e6e831790082b0578bcddd1b9f73714822", "3364bc50921a9566d61ef8cb73baa82341725e4b", "48930aa2539b12d60352283dd4f91c845cf9b69c", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "023de873621fc0b2664284238a624752a850be7d", "b5d79474b58a07f74806ff82742136d87e549145", "3bec21f0f1954d31642537c02b33e280d7e12029", "00eb10b64b9cb7a7314fb48260ee1d66a91b5492", "398cc68e6df0cffb5b06da2ab39b004bec8ad9ab", "43bf4c7af676fcedc629c2563eb1e9708bbe0db2", "346b77ea4a666700a37c9fffa0505c0df2f9ea66", "4c078fd1f07b9e8b65e27ecd9790e9f4fc579af1", "014ba063a3721973ba6af6503232d4d21d1456bb", "306d589976b94d385fcaef6924edd6f4c7fd840e", "1401df37cc3fc78f26570d601fd123f17646b2d2", "4215fbbff39a0213888718549f215b124bd2e611", "b34d1811dd3ced02a97e4df5ece83506a526f52e", "08632fe2b934ed15d3499e7321282c81adc2c390", "863bcc82bd61f2199a4f2c4c6bc34a26c217c87a", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "115713b2175047e746c8e7cd22ee1b8255866d0f", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "2e1ef90c5f86041b58047ca2a4819b566d2d753b" ], "paperAbstract": "Performance isolation is an important goal in server-class environments. Partitioning the last-level cache of a chip multiprocessor (CMP) across co-running applications has proven useful in this regard. Two popular approaches are (a) hardware support for way partitioning, or (b) operating system support for set partitioning through page coloring. Unfortunately, neither approach by itself is scalable beyond a handful of cores without incurring in significant performance overheads. We propose SWAP, a scalable and fine-grained cache management technique that seamlessly combines set and way partitioning. By cooperatively managing cache ways and sets, SWAP ("Set and WAy Partitioning") can successfully provide hundreds of fine-grained cache partitions for the manycore era.SWAP requires no additional hardware beyond way partitioning. In fact, SWAP can be readily implemented in existing commercial servers whose processors do support hardware way partitioning. In this paper, we prototype SWAP on a 48-core Cavium ThunderX platform running Linux, and we show average speedups over no cache partitioning that are twice as large as those attained with ThunderX's hardware way partitioning alone.", "pdfUrls": [ "http://www.csl.cornell.edu/~xiaodong/docs/SWAP.pdf", "https://sc2682cornell.github.io/pdf/SWAP.pdf", "https://sc2682cornell.github.io/poster/SWAP.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1b81fe43e208f7f05ee4438b4668f1dd9005e375", "sources": [ "DBLP" ], "title": "SWAP: Effective Fine-Grain Management of Shared Last-Level Caches with Minimum Hardware Support", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "1b98c203e9bd26801267fced78ea33a1151fadbd": { "authors": [ { "ids": [ "2176759" ], "name": "Xianzheng Dou" }, { "ids": [ "37845066" ], "name": "Peter M. Chen" }, { "ids": [ "1693763" ], "name": "Jason Flinn" } ], "doi": "", "doiUrl": "", "entities": [ "Chunk (information)", "Cloud storage", "Computation", "Counterfeit consumer goods", "Data deduplication", "Delta encoding", "Version control" ], "id": "1b98c203e9bd26801267fced78ea33a1151fadbd", "inCitations": [], "journalName": "", "journalPages": "73-88", "journalVolume": "", "outCitations": [ "1161b9270ada3686352100946f75a6d215dbd07c", "61b8ade95787896bb16978586e14fdda63149006", "67972276329a51525048b9cd10c4649e03efb9b5", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "d12d1289d2384c2ce642f01855637b9f0519e189", "459f06b4f27456617100ce4212af8b1dc589dfd2", "0e578433d4e8bb2a571c87a2d22816074902f009", "054658151ae048ac31140fc5bc32342a23c1e52b", "39e3d058a5987cb643e000bce555676d71be1c80", "5ae3566cd07a04e32f61ade2fe4dae98d766df8c", "336c18c068d37533654d96236dc48758f55fc818", "1a5464cda7cff01d5a0ebb49df17fa8c05882295", "6f8745fe003a9fcb782ec98b5a1671781cbd5771", "35339f6f2e99c04920f21883df1db8004436cdc7", "7840d62754c327c362d7b141e199dbc0f42c999a", "7aa0a3076808db60aab117179910213d932b2c0e", "195500f47236d16b8797fa5e0b0ac90b0e5aedd2", "044a9cb24e2863c6bcaaf39b7a210fbb11b381e9", "7b90149891786d6c34665ec2130628b16384eca7", "f19870a1b4847ca61beed722d557a50189479d27", "148a4703eafdb2b708b144e8e49544d3476c4844", "07d63fc651eda7771fd1962abbc5b5ad43a82d58", "2fbbf89a921e4aa19ee3bfe73d0b34a6ad764656", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "2f8da4fea7268c6f846af7453d763d2ec2da6111", "182cb3740940f403ff6f311fa54c5c1c9d7edc3f", "a22e9c937345e5d679773223a44e7b7ea30c20f1", "114801eccb5eb0831fd1848f351a138253a42f15", "6d5cfe7723c61149d9cf905fe173268075b8c976", "49cd4b3efdc10090150523e14b36de7ec9ff8755" ], "paperAbstract": "Cloud-based storage provides reliability and ease-ofmanagement. Unfortunately, it can also incur significant costs for both storing and communicating data, even after using techniques such as chunk-based deduplication and delta compression. The current trend of providing access to past versions of data exacerbates both costs. In this paper, we show that deterministic recomputation of data can substantially reduce the cost of cloud storage. Borrowing a well-known dualism from the faulttolerance community, we note that any data can be equivalently represented by a log of the nondeterministic inputs needed to produce that data. We design a file system, called Knockoff, that selectively substitutes nondeterministic inputs for file data to reduce communication and storage costs. Knockoff compresses both data and computation logs: it uses chunk-based deduplication for file data and delta compression for logs of nondeterminism. In two studies, Knockoff reduces the average cost of sending files to the cloud without versioning by 21% and 24%; the relative benefit increases as versions are retained more frequently.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/dou", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_dou.pdf", "http://web.eecs.umich.edu/~jflinn/group/papers/login17.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-dou.pdf", "https://web.eecs.umich.edu/~pmchen/papers/dou17.slides.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-dou.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_dou.pdf", "http://web.eecs.umich.edu/~jflinn/group/papers/fast17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1b98/c203e9bd26801267fced78ea33a1151fadbd.pdf", "s2Url": "https://semanticscholar.org/paper/1b98c203e9bd26801267fced78ea33a1151fadbd", "sources": [ "DBLP" ], "title": "Knockoff: Cheap Versions in the Cloud", "venue": "FAST", "year": 2017 }, "1badb9434ce76599937e74f41d6adfe369493768": { "authors": [ { "ids": [ "2292538" ], "name": "Jingwei Xu" }, { "ids": [ "39695715" ], "name": "Yuan Yao" }, { "ids": [ "8163721" ], "name": "Hanghang Tong" }, { "ids": [ "40492270" ], "name": "Xianping Tao" }, { "ids": [ "1731410" ], "name": "Jian Lu" } ], "doi": "10.1145/3097983.3098019", "doiUrl": "https://doi.org/10.1145/3097983.3098019", "entities": [ "Algorithm", "Optimization problem", "Program optimization", "Recommender system", "Scalability", "Uncertainty principle" ], "id": "1badb9434ce76599937e74f41d6adfe369493768", "inCitations": [], "journalName": "", "journalPages": "525-534", "journalVolume": "", "outCitations": [ "0739fad62026ca36f101a36f29d53630207a5748", "767ee7042a5f269bce42be4e38597a4004002793", "49b70ff3811980e7822d25a557b245ec249e66f0", "9aa88a8a354f1d322e242376d27d0474e50252f8", "58ceeb151558c1f322b9f6273b47e90e9c04e6b1", "0e9d0a83d6dce5cc93038f10daf617d7073f0f2f", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "8c5c728bb26c39f71272f4380f991e4446f781be", "92eb167f30ad59f6949667021760eb41078cf85c", "333b5c0f2b750f737b725496e2b75a6330f5cc54", "2947e8015a1362823ae12a79dfd05022d0d412d4", "ac936b3f3952662d6df50553b74ae7a0d174b128", "0eeb4ee1f6b889d6ac4d95ce3b42a24b52463537", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "184b7281a87ee16228b24716ca02b29519d52eb5", "5475b630d681f2efdabecd0f814b97c4576cca47", "0aa2a4d259433016ebc899c496faea03c024c0bd", "0ace72127a00b51623e44ec368121a8ef676410f", "71fbbc1675780f2f945073f9d92c09b8d76f80f0", "0c0ff60e9d39c203929457d1ac3f840f8c8e9619", "38e024c97f0e55097919066b7233a5caa622a62f" ], "paperAbstract": "Latent factor models have become a prevalent method in recommender systems, to predict users' preference on items based on the historical user feedback. Most of the existing methods, explicitly or implicitly, are built upon the first-order rating distance principle, which aims to minimize the difference between the estimated and real ratings. In this paper, we generalize such first-order rating distance principle and propose a new latent factor model (HoORaYs) for recommender systems. The core idea of the proposed method is to explore high-order rating distance, which aims to minimize not only (i) the difference between the estimated and real ratings of the same (user, item) pair (i.e., the first-order rating distance), but also (ii) the difference between the estimated and real rating difference of the same user across different items (i.e., the second-order rating distance). We formulate it as a regularized optimization problem, and propose an effective and scalable algorithm to solve it. Our analysis from the geometry and Bayesian perspectives indicate that by exploring the high-order rating distance, it helps to reduce the variance of the estimator, which in turns leads to better generalization performance (e.g., smaller prediction error). We evaluate the proposed method on four real-world data sets, two with explicit user feedback and the other two with implicit user feedback. Experimental results show that the proposed method consistently outperforms the state-of-the-art methods in terms of the prediction accuracy.", "pdfUrls": [ "http://moon.nju.edu.cn/people/jingweixu/static/kdd2017.pdf", "http://doi.acm.org/10.1145/3097983.3098019" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1badb9434ce76599937e74f41d6adfe369493768", "sources": [ "DBLP" ], "title": "HoORaYs: High-order Optimization of Rating Distance for Recommender Systems", "venue": "KDD", "year": 2017 }, "1bb24c175f9fa082937c51e9b9b8ae651fd9b111": { "authors": [ { "ids": [ "34986234" ], "name": "Keval Vora" }, { "ids": [ "1689014" ], "name": "Rajiv Gupta" }, { "ids": [ "38394648" ], "name": "Guoqing Xu" } ], "doi": "10.1145/3037697.3037748", "doiUrl": "https://doi.org/10.1145/3037697.3037748", "entities": [ "Algorithm", "Approximation", "Approximation algorithm", "Computation", "Dynamic problem (algorithms)", "Experiment", "Incremental computing", "Maxima", "Maxima and minima", "Streaming algorithm", "Vertex (geometry)" ], "id": "1bb24c175f9fa082937c51e9b9b8ae651fd9b111", "inCitations": [ "3c831e81d29dd5ae62f20120793ba7aaabc257b3", "1f896f601fc53038d0bbc28fde31ff84b12d06d9", "3b57c7bcece47f2a3198e6adec38f712f2914be5" ], "journalName": "", "journalPages": "237-251", "journalVolume": "", "outCitations": [ "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "027485f716ca4f6d9ee2e189790d6560e37fcab2", "1f2dd7197fca02d37d53311ce3569a1d1de45578", "09031aa6d6743bebebc695955cd77c032cd9192f", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "12809396d9e314df0c8f8e7ec9691bb69571b80d", "0975baea2e5a34f75c06284ac355af7f2de2499b", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "191b70c73d5d8836ecc2d448d18d28c5de861936", "6109c80c0314e458c426e63e2971221fd3108c91", "0ef1dd03db41de69165075562a051021a186c230", "44d4e4111ee7ba147c400a548d25108e92a4662e", "0608d9937c074520cdc93cc444cc1c77039c5332", "1753c2dc85cc40e0a2e8b4a405c1690eab066d8d", "62f41341d9ba292877e9e299d6eb70b5435ee8c8", "3486aeaf540c48952120fe853d672af984f40a6a", "9b5a12eb6ea3ade6d8218ccb7f4d8213c1531622", "0419262d403bb28871af32b7ba47de39470aaf20", "3c831e81d29dd5ae62f20120793ba7aaabc257b3", "36257015011dce35e3cc046b3b5d87e9b13feda4", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "124bf6c3f3b056d91644664c224d1386311fe27a", "495a5e45f464a587770575cd0865a96c5bcd8622", "0233a5735b6b5d12fb40aacfc0e7bf37fd3aa09b", "d5e1d6ac68b505a18336c9b5d4f44a10699bae23", "9da28672b71b658b8ea989e2bfc502582a79e079", "22a26f40877cbd7ce0fb6c8c94e061332469d071", "1ad8410d0ded269af4a0116d8b38842a7549f0ae", "1f33d840de99a81b2a9c4a9470f68fd52d9509f4", "3726c60552263e648c6856679e672de2e1c110e5", "35ffae4ccf5e7ac45162b4e50e6a7da71fc74bea", "5c6dcf91f1d4bd013bd926ea8bb5ea1dc8682b40", "201a5add33896165a50fb8111bf63175b84af3bf", "05aac0d89535e3dbfe756515a87505ad78b64ed4", "282bc59faefb734137d2ea978cb1eb5699e67c7c", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "105de19ab71db0a38bc0d734c8fd0efeba2faab7", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0706356c9ab6014d6b04577d38289ea8328291a5", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "7aa775accfe15dc051f6716143b0bdd3e4bde621", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "2060da9362efd14f9352dde99e7ef33c7c95aef4" ], "paperAbstract": "Continuous processing of a streaming graph maintains an approximate result of the iterative computation on a recent version of the graph. Upon a user query, the accurate result on the current graph can be quickly computed by feeding the approximate results to the iterative computation --- a form of incremental computation that corrects the (small amount of) error in the approximate result. Despite the effectiveness of this approach in processing growing graphs, it is generally not applicable when edge deletions are present --- existing approximations can lead to either incorrect results (e.g., monotonic computations terminate at an incorrect minima/maxima) or poor performance (e.g., with approximations, convergence takes longer than performing the computation from scratch).\n This paper presents KickStarter, a runtime technique that can trim the approximate values for a subset of vertices impacted by the deleted edges. The trimmed approximation is both safe and profitable, enabling the computation to produce correct results and converge quickly. KickStarter works for a class of monotonic graph algorithms and can be readily incorporated in any existing streaming graph system. Our experiments with four streaming algorithms on five large graphs demonstrate that trimming not only produces correct results but also accelerates these algorithms by 8.5--23.7x.", "pdfUrls": [ "http://www.cs.sfu.ca/~keval/contents/papers/kickstarter-asplos17.pdf", "http://www.ics.uci.edu/~guoqingx/papers/vora-asplos17.pdf", "https://people.csail.mit.edu/jshun/6886-s18/papers/KickStarter.pdf", "http://doi.acm.org/10.1145/3037697.3037748", "http://www.cs.sfu.ca/~keval/contents/talks/KickStarter-ASPLOS17.pdf", "http://www.cs.ucr.edu/~gupta/research/Publications/Comp/asplos17-kickstarter.pdf", "http://www.cs.ucr.edu/~kvora001/contents/papers/asplos17-kickstarter.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1bb24c175f9fa082937c51e9b9b8ae651fd9b111", "sources": [ "DBLP" ], "title": "KickStarter: Fast and Accurate Computations on Streaming Graphs via Trimmed Approximations", "venue": "ASPLOS", "year": 2017 }, "1bb6f7083b66b71a8c67622ef0e5541ae5ea8742": { "authors": [ { "ids": [ "1711589" ], "name": "Yao Wang" }, { "ids": [ "11772376" ], "name": "Benjamin Wu" }, { "ids": [ "2419859" ], "name": "G. Edward Suh" } ], "doi": "10.1109/HPCA.2017.27", "doiUrl": "https://doi.org/10.1109/HPCA.2017.27", "entities": [ "Information leakage", "Interleaved memory", "Memory controller", "Memory management", "Memory protection", "Scheduling (computing)", "Spectral leakage", "Timing channel" ], "id": "1bb6f7083b66b71a8c67622ef0e5541ae5ea8742", "inCitations": [ "fcf8efb59680ef79bcca894947aa46578d2bbd8c", "1696877c63b5d6d42ddcdf720250f87298b00d2c" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "301-312", "journalVolume": "", "outCitations": [ "3b7e821532a852d27eacd89bcaa869a6263eb144", "663d999090f35ed660b574804799c745b9737562", "d3216e8805687c458a82bea952ca4b1c6f4548b9", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "52c2c050af5b32d4929b4b193967a3675d03aea0", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "19218913ef99ba9acd2491d8bab1d154cb375fa3", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "89de1d99430a6adc28b4b65da9c769d72253ff9c", "3bf23f74bf33ed52f7c28587fab315610b27221a", "045bbbea384e9d54be38dd207bf237d5208ea599", "2200640161a8fe6ce3a03c7bad586e890f10679f", "b2ca498540a6001dd23146d9c8805839f2a5f557", "c9c818f6572b7c9a87992fcfbdb4cea39a96514f", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "0a679d9d08231b2856fe648e6b331d8e6e46a1fa", "5e7a7259528f032ae282347ff43a61c82bab5db1", "70c4e59351761ac1376a2dd71ed86af85ebc7bf3", "c0c14c16813f0083b9e3bf602746a8be1270996a", "07b0b5d59ef09f33a40f30d3a2dec880029a5002", "00ab25c6582d543932fccbb0f15fe93445f95d61", "352e74019d86163d73618f03429ae452ab429629" ], "paperAbstract": "This paper presents SecMC, a secure memory controller that provides efficient memory scheduling with a strong quantitative security guarantee against timing channel attacks. The first variant, named SecMC-NI, eliminates timing channels while allowing a tight memory schedule by interleaving memory requests that access different banks or ranks. Experimental results show that SecMC-NI significantly (45% on average) improves the performance of the best known scheme that does not rely on restricting memory placements. To further improve the performance, the paper proposes SecMC-Bound, which enables trading-off security for performance with a quantitative information theoretic bound on information leakage. The experimental results show that allowing small information leakage can yield significant performance improvements.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.27", "https://tsg.ece.cornell.edu/lib/exe/fetch.php?media=pubs:secmc-hpca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1bb6f7083b66b71a8c67622ef0e5541ae5ea8742", "sources": [ "DBLP" ], "title": "Secure Dynamic Memory Scheduling Against Timing Channel Attacks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "1bbde6b1d023373552d14636ae65c6f6b2990196": { "authors": [ { "ids": [ "2992779" ], "name": "Sebastian Zimmeck" }, { "ids": [ "2486512" ], "name": "Ziqi Wang" }, { "ids": [ "31959699" ], "name": "Lieyong Zou" }, { "ids": [ "37975434" ], "name": "Roger Iyengar" }, { "ids": [ "40219097" ], "name": "Bin Liu" }, { "ids": [ "34949741" ], "name": "Florian Schaub" }, { "ids": [ "31950200" ], "name": "Shomir Wilson" }, { "ids": [ "2464164" ], "name": "Norman M. Sadeh" }, { "ids": [ "1751667" ], "name": "Steven M. Bellovin" }, { "ids": [ "3435900" ], "name": "Joel R. Reidenberg" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "App Store", "Machine learning", "Mobile app", "Privacy", "Privacy policy", "Requirement", "Scalability", "Static program analysis" ], "id": "1bbde6b1d023373552d14636ae65c6f6b2990196", "inCitations": [ "5f02970e26b22369895be8a20d9a6b7941475e56", "5d38e4eeb005597fd94febe481a7477731a8ab91", "814df888c94e2cd7fe4231e2a76d865c27475336", "d86e643618ae0acbb4ed654ffc5c234c92fc62b9", "f7021aad499f201b89efad3482a9cf01148c94b4", "7918bdc169ffb50615be3348a7b1165131720cee", "cac3e4f3b264e3e557d8919a4b1f946b10eddc77", "0136c2a25513dbcd8cc22741f9e548f631e60602" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2fe33f4b7c75d9e29bf80e7bdd719205cfafc3c9", "0b7543518962a8b51854ec2f50aafecb516279f7", "2883a32fe32493c1519f404112cbdadd1fe90c7c", "9b8178825da4c94f7fbc071f0e9f07d417e7a822", "06e7f7823ae4b946624dee0edfaa39639be263fb", "ec14579225241e95ae691562a9cec3d86de1f054", "620a1323725708e2e5b38603f29b4e7541aa9042", "54ffff5550bb6f5a9512d4806d5b7c6867bed5a3", "9140a579de7da753a095356b5da24804af28e8f7", "a6624c872d8aa5fe14a706048cf258a440f0564a", "2a0fcf529ae209c944ba5465108037efd6e74ef2", "c6002e0ede09190166d8b7c17d7795d5520a226a", "0c9656e477364f4b8d6e5d22fcad85f9eb25c0b3", "9b21fb4b0048e53fa6219389a8fcc44e6b7a8b89", "4a8da50795fd4196f3aa28d1c095f84c0a40fe3d", "1a4143093141436dbac3801e6024bcb9ea376d50", "0fa3eabd538d777556f3e87399959d05cefa1f69", "3a9876912fd36b7650dfd6d045d1216a570b0601", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "21df779224682a6820eb76c393600a50817fa19f", "57e682daf705a22e463b2c27cc467ea8aa5e9e01", "040678daf6a49a88345ee0c680fccfd134f24d4b", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "2439b418b815dc08cc15d08ed6af46fb4baacf8a", "3c00b08d9e23e6b767795bbba909112cdb2506f3", "7e4d1297c47996ccd4624c920647553b724a7933", "3fa96fe0722db15568fdf2b2e76e140e5ad2034a", "8f3bbd754dba5c3737953ab9fc32e49ee84e7e5e" ], "paperAbstract": "Mobile apps have to satisfy various privacy requirements. App publishers are often obligated to provide a privacy policy and notify users of their apps\u2019 privacy practices. But how can we tell whether an app behaves as its policy promises? In this study we introduce a scalable system to help analyze and predict Android apps\u2019 compliance with privacy requirements. Our system is not only intended for regulators and privacy activists, but also meant to assist app publishers and app store owners in their internal assessments of privacy requirement compliance. Our analysis of 17,991 free apps shows the viability of combining machine learning-based privacy policy analysis with static code analysis of apps. Results suggest that 71% of apps that lack a privacy policy should have one. Also, for 9,050 apps that have a policy, we find many instances of potential inconsistencies between what the app policy seems to state and what the code of the app appears to do. Our results suggest that as many as 41% of these apps could be collecting location information and 17% could be sharing such with third parties without disclosing so in their policies. Overall, it appears that each app exhibits a mean of 1.83 inconsistencies.", "pdfUrls": [ "https://www.ftc.gov/system/files/documents/public_comments/2016/09/00009-128900.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_05A-5_Zimmeck_paper.pdf", "http://sebastianzimmeck.de/zimmeckEtAlCompliance2017ShortPaper.pdf", "http://cups.cs.cmu.edu/privacy-day/2017/posters/App_Analysis.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/automated-analysis-privacy-requirements-mobile-apps/", "http://shomir.net/pdf/publications/plt_2016s.pdf", "https://privacyassistant.andrew.cmu.edu/files/Automatic%20Mobile%20App%20Policy%20Analysis%20-%20camera%20ready%20FINAL.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/41ca/e276fbedaa9f434714fe618ed889e5ab6dea.pdf", "s2Url": "https://semanticscholar.org/paper/1bbde6b1d023373552d14636ae65c6f6b2990196", "sources": [ "DBLP" ], "title": "Automated Analysis of Privacy Requirements for Mobile Apps", "venue": "NDSS", "year": 2017 }, "1bc029b715f29e95063f27dc36396093394a1e19": { "authors": [ { "ids": [ "3048886" ], "name": "Jake Wires" }, { "ids": [ "1709411" ], "name": "Andrew Warfield" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Constraint satisfaction", "Control plane", "Data center", "Scalability" ], "id": "1bc029b715f29e95063f27dc36396093394a1e19", "inCitations": [ "226ca798b529c13605a2aa7fe75d58f4188f850a" ], "journalName": "", "journalPages": "213-228", "journalVolume": "", "outCitations": [ "a205ec6ef5dba0ab862cb4d127737104aae5a476", "67210b23a16f222f83d680a98ead15a3d953a434", "45ffb71b454dcbd6a77537d81926eb3eca1c49dc", "4af63ed343df388b6353b6fc77c7137d27822bf4", "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "3358850706a8ad2eb8489bb7790e8bbd3a5b6dba", "2607daeaf7bc30c7ba532471c628e989797d0384", "5f3f9223c5c9f896be099bc177929febad508407", "82c0d142c975e1cf224af674129d69647ef4d892", "1c5de2067774468c623438106d4697b80ac043b8", "7447b123eaef1d84a0f94b485f039534ad98015c", "3d019723a6f8678b6adc901e8eae2076263d9089", "1ccf3f5ad209fd736847fb2aa3252920d59efd88", "514a5c15e8cf3f681febecad954a4508d9189c99", "1f1269db397595f5b5a08eb2e65022e9a8759648", "2f6af58c7905fb8367652fe62fbb1f6ec7e28be0", "8b1d8d46836a6d5eb4355315b64d85c128cbff27", "638c917d981915bc7a00bb0941cdd38111df51de", "3ede477c16e5df0964eac62b3d33311514a66410", "4452a2f4f4fd1df19777f4b0ff482403f7b5091e", "7a2274412948765bf872b765dafd8139e51000ff", "1430c094d6fb90d87f38c36a92cdbcf66b87f60a", "41f07dfa0045c89aac19f97d7a471bdd514b8998", "3fc93257ac94aa8d6505c19077058e68622345b6", "70f3161f62a4a43580eb47d2157e98e880738594", "66e7577cd919979c642319632f9435217b016d87", "4b1ed250f7c53808d1910b6f040b47e178eb2460", "0a4110fda21f0de29824ead1df591d2c5e1da8d0", "120f401bf43b93d03a37c8c1de332e8b1664c5d9", "130d640b53a1d6700b67a4ea4256071ae18e0ee8", "6bd528b41b3f56a5ec85d2118bc5c4a00ad9944e", "3bcc8272a88796ae5d86a3dded89f66034c1cef7", "58429cae7ff648359ab83566d17ab3d7dd6ae3fd", "0368d2445d3ee4205ee73da933cb8b810a89091c", "497d61a0b8a4b03c57fe1c1ba45118fdfd3dace4", "2da760f90c3d2bf6598becdde9063093f488548c", "0da64b4241476515abcd6995baf2252c313c18ee", "6a1df9dae902f3d377f9c85ba9732b8d2270bf2b", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "828a6aa6984e292305aad964b9d167f0dd7dc513", "2a2a03c864e53862a84c4555f218c2c92e039a5b", "87b94c2f86b9e8838bf15276fcfe9be0fd293588", "3c5cc0f17dc2f956dbc278f24433f57affe49dce", "478f51822252e4221c920bbf9d30a0b0491045ec", "0a587144ad09fb0a784515c89ddfb6b90e8c057e", "0b9e161974df2e5563090d2c5b623de57f2c744a", "1dc5c9675b1f9662deac7a9d5f4b38cd13f76dba" ], "paperAbstract": "This paper describes Mirador, a dynamic placement service implemented as part of an enterprise scale-out storage product. Mirador is able to encode multidimensional placement goals relating to the performance, failure response, and workload adaptation of the storage system. Using approaches from dynamic constraint satisfaction, Mirador migrates both data and client network connections in order to continuously adapt and improve the configuration of the storage system.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/fast17/fast17-wires.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_wires.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-wires.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_wires.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/wires" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1bc0/29b715f29e95063f27dc36396093394a1e19.pdf", "s2Url": "https://semanticscholar.org/paper/1bc029b715f29e95063f27dc36396093394a1e19", "sources": [ "DBLP" ], "title": "Mirador: An Active Control Plane for Datacenter Storage", "venue": "FAST", "year": 2017 }, "1bd5866dd08522225b865019fc13f87f79c50404": { "authors": [ { "ids": [ "1678529" ], "name": "Shuo Zhang" }, { "ids": [ "1680484" ], "name": "Krisztian Balog" } ], "doi": "10.1145/3077136.3080796", "doiUrl": "https://doi.org/10.1145/3077136.3080796", "entities": [ "Column (database)", "Entity", "Knowledge base", "Large Hadron Collider", "Population", "Spreadsheet", "Table (database)" ], "id": "1bd5866dd08522225b865019fc13f87f79c50404", "inCitations": [ "7ed3ee44908eb760997ad627bae3e35e359478f3", "9daf8241eaf8185c83e4fb23d8a798b0467af97d", "37f54ef89c0744dba373397a34c07e6e4b9e852f" ], "journalName": "", "journalPages": "255-264", "journalVolume": "", "outCitations": [ "2b9cd09b949b7e69933d18ae408397e803987151", "1e8def0173f498303b0a252f16b7d4999385a32c", "73621c94263d4e42b1ee44a831d2cba480e4c895", "45e98175d9ceed16bdf1e7d2047b2738cbc0f4c2", "67357c10a866b0140c82c1a582d29cb28ca350a0", "1f990d98dcc3941f01bd6bb5405fbda37e00dd6a", "515b9903cb55e548b6732e953a1bd51f457c6353", "87ffc4a3dcfd52a136361a1c10302bfb2781c10b", "5a2c2277baa4d96db3e4c4f544652364055fb171", "513167c08db5139162710aad9b2c217b344df2c4", "05c858563e51d91d80741f4d3209b20dc19db942", "187c25de08261760cf48b4e9dabf308d2f7f15d9", "23eb5f05d8efd7399c27f46685067cb25c4dd197", "a38cd38da57c638574c00101edb04a78394f9aeb", "4586eae7af09bf653cee78d71ba25711ec3b3e5a", "3353af61c41db57afaca92e5990db39c9bd14f9f", "45b830bac3764a3c941f7b2bec905663ac9f24f0", "18524003036773156eb839faf677ee0549ea606d", "2132af47c82d5722b86747dfe3a9d7cf9e0bb4e1", "33b94ba1d8b02f05d42954025798210867a833b7", "00fce98c3fda59bcb84b6d0626fb3137d2fbb984", "7993679eb74516d684168cde702ab5555af8ead6", "967c904c9bb02c7989cde1ff9a2be133615440b0", "249e73a10351b9e9d2150762a287df0698d4f830", "5029918d406db6c4e008d5194891e3561c5734a7", "a75c2d26ca6a06cbee62a8d1dad5993356d96793", "00d37abb9ab0235c626d5eb1fd8bfc41092e8349", "34b39ed1879f11d8ab84b758f0b7ba292eabc279" ], "paperAbstract": "Tables are among the most powerful and practical tools for organizing and working with data. Our motivation is to equip spreadsheet programs with smart assistance capabilities. We concentrate on one particular family of tables, namely, tables with an entity focus. We introduce and focus on two specifc tasks: populating rows with additional instances (entities) and populating columns with new headings. We develop generative probabilistic models for both tasks. For estimating the components of these models, we consider a knowledge base as well as a large table corpus. Our experimental evaluation simulates the various stages of the user entering content into an actual table. A detailed analysis of the results shows that the models' components are complimentary and that our methods outperform existing approaches from the literature.", "pdfUrls": [ "http://arxiv.org/abs/1708.08721", "http://doi.acm.org/10.1145/3077136.3080796", "https://arxiv.org/pdf/1708.08721v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1bd5866dd08522225b865019fc13f87f79c50404", "sources": [ "DBLP" ], "title": "EntiTables: Smart Assistance for Entity-Focused Tables", "venue": "SIGIR", "year": 2017 }, "1bdc70a32c03b4455277639d4f79dd5ecac2de34": { "authors": [ { "ids": [ "6916919" ], "name": "Louis Jenkins" }, { "ids": [ "2390252" ], "name": "Tingzhe Zhou" }, { "ids": [ "1687335" ], "name": "Michael F. Spear" } ], "doi": "10.1109/PACT.2017.45", "doiUrl": "https://doi.org/10.1109/PACT.2017.45", "entities": [], "id": "1bdc70a32c03b4455277639d4f79dd5ecac2de34", "inCitations": [], "journalName": "", "journalPages": "14-26", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.45" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1bdc70a32c03b4455277639d4f79dd5ecac2de34", "sources": [ "DBLP" ], "title": "Redesigning Go's Built-In Map to Support Concurrent Operations", "venue": "PACT", "year": 2017 }, "1be67e00b82ccb9dca746dc1c4758932a5a5ff5e": { "authors": [ { "ids": [ "3491192" ], "name": "Seunghee Shin" }, { "ids": [ "1694458" ], "name": "James Tuck" }, { "ids": [ "1717365" ], "name": "Yan Solihin" } ], "doi": "10.1145/3079856.3080240", "doiUrl": "https://doi.org/10.1145/3079856.3080240", "entities": [ "Application checkpointing", "Byte", "Computer data storage", "Computer memory", "Content-addressable memory", "Data structure", "Dynamic random-access memory", "Fail-safe", "Non-volatile memory", "Overhead (computing)", "Persistence (computer science)", "Programmer", "Run time (program lifecycle phase)", "Serialization", "Speculative execution", "Volatile memory", "Write-ahead logging" ], "id": "1be67e00b82ccb9dca746dc1c4758932a5a5ff5e", "inCitations": [ "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "20f1081cf001f716037e20d9cff147f5ac50632a" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "175-186", "journalVolume": "", "outCitations": [ "885c666fbcfd1a10c613496d7a041d01b99c7a39", "69e6fb41751ebf0a6b99522a2fabcd3879e8cf2b", "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "19710fa0e64f36616e112c8a7b4e99ba4cb43c74", "57c823b3b07b98233394bf15cfbbaed6a84809df", "362e9b5afe5934a9d8046d758c17c5bada0652b3", "ae8ee52b076263e1108ac35714bf15c6dd514f11", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "1f482f44497c17be0573d9dff14a30d87b0bf0ca", "d76913152aeff892dbb028785f98ee8c84bfd8e3", "1824677a301280f6e8278a9bd256174131476369", "fd840d5275cac98d64e7778a1b9173b937a77386", "314919c141024c71cb17d525ecd8016138335002", "0fca03c476d869660dec04fb83f54161767a4ba7", "0458db0462170871be0298bdaedc851f36ea83aa", "16dc592aa326ecd1f8d46ca7e3485a7311af3dba", "babcdcb895a1b078e7db2dc0389fa0d9d4d30225", "0fc61e18ad7b294cdda7710a5e5516d42ffcf46e", "4a0fbca310242c2908b252903e2ecc84a73ce447", "3e033205357becbb70e0b697134a5fe6fa17da43", "0863ad1f08b5fcf113c0b1d9b34d92fd38c89cc2", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "277862a906af8489a1d98add2f6516a0e5df1bb1", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "5987b948677c5528a061890f4df507c85a5a97b5", "42c70d64890726f60556caf3eec3f06e85642dd9", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "5557b730f22e3e90272d477ecfa82013649086c8", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "14809fca6750caefcf091fae18258dd84ffa65f5", "82155a73552fe1daf752bda7567dce96a14219a8", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "0204f40221260d00c5ee63646560a40dcd7d97d1", "db97d135ddb8edec2dae6c10a830ac6e44045d94", "71cbd5b7858785e8946523ca59c051eb0f1347ba", "94783d113951822195d4ba44599a8fcbdef9d4bf", "39e3d058a5987cb643e000bce555676d71be1c80", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "05a1357946de5eca42a477b7b268db4944219a2e", "33dcafd805a3b44fd64270028633032ff0bb6fac", "3da14037fc6e2c3dee2d6808bc2d7e933325d054", "35d357020f53e6aba43fe5c8a42c07ad87be745f", "9858251a88afc29fa9fdb8234d998dcdf182f144", "16653666b0005f91060a3e402566659749b84313" ], "paperAbstract": "Byte-addressable non-volatile memory technology is emerging as an alternative for DRAM for main memory. This new Non-Volatile Main Memory (NVMM) allows programmers to store important data in data structures in memory instead of serializing it to the file system, thereby providing a substantial performance boost. However, modern systems reorder memory operations and utilize volatile caches for better performance, making it difficult to ensure a consistent state in NVMM. Intel recently announced a new set of persistence instructions, clflushopt, clwb, and pcommit. These new instructions make it possible to implement fail-safe code on NVMM, but few workloads have been written or characterized using these new instructions.\n In this work, we describe how these instructions work and how they can be used to implement write-ahead logging based transactions. We implement several common data structures and kernels and evaluate the performance overhead incurred over traditional non-persistent implementations. In particular, we find that persistence instructions occur in clusters along with expensive fence operations, they have long latency, and they add a significant execution time overhead, on average by 20.3% over code with logging but without fence instructions to order persists.\n To deal with this overhead and alleviate the performance bottleneck, we propose to speculate past long latency persistency operations using checkpoint-based processing. Our speculative persistence architecture reduces the execution time overheads to only 3.6%.", "pdfUrls": [ "http://www.pdl.cs.cmu.edu/news/calendar/2017/08/ece-grad-seminar-solihin.pdf", "http://doi.acm.org/10.1145/3079856.3080240" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1be67e00b82ccb9dca746dc1c4758932a5a5ff5e", "sources": [ "DBLP" ], "title": "Hiding the long latency of persist barriers using speculative execution", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "1be98efed329c080561820a39919222eb9d52bf2": { "authors": [ { "ids": [ "3438230" ], "name": "Sarah Pearman" }, { "ids": [ "40427406" ], "name": "Jeremy Thomas" }, { "ids": [ "3599539" ], "name": "Pardis Emami Naeini" }, { "ids": [ "40458669" ], "name": "Hana Habib" }, { "ids": [ "38572260" ], "name": "Lujo Bauer" }, { "ids": [ "2637728" ], "name": "Nicolas Christin" }, { "ids": [ "1699751" ], "name": "Lorrie Faith Cranor" }, { "ids": [ "2645852" ], "name": "Serge Egelman" }, { "ids": [ "35229534" ], "name": "Alain Forget" } ], "doi": "10.1145/3133956.3133973", "doiUrl": "https://doi.org/10.1145/3133956.3133973", "entities": [ "Computer", "Habitat", "Information needs", "Information security", "Password", "Password management", "Password manager", "Password strength", "Web application" ], "id": "1be98efed329c080561820a39919222eb9d52bf2", "inCitations": [ "63577654a93dcb808bce734973365ab8e6a991d4", "0b196550a65c6ebb17104cdc631286bc741baf18", "a7d50dbaf04757678ba9f6555e585f65dbcc73aa" ], "journalName": "", "journalPages": "295-310", "journalVolume": "", "outCitations": [ "3d46dbb0da1b4e0ee5b45c46525d9459fb94222d", "0b83159ebfdd930695afb54c151cde23774dc642", "bc2e813256b9101d8d98f6a165d45ac0c4f4821c", "4fcb4f03afc8f7d780929afbf9584bb7e9ced6f3", "c141e141f8d5e5cfe3f54317948d42700d433e46", "8650b37db13b8352da0b31711edab817e63cd281", "254d75c7b813201c38a69c1d12339845522f0af0", "2c61a7a2ec8ac2178812fab42a222f35918f47ce", "c218cad1ff66042802862e41f0e305a902453439", "3443bb2ecbcff2f591267e0ec2923ce98cfe4ee6", "956cbbf33ba15071efb11a54005f252d442700d2", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "1d944ba5648c72bc7686237044609305f07da1e4", "2f8fe860cc8f3d60e0174f87d0099657e10155af", "01d6be82fc9a1e103a93432f949ead3e3e208882", "7ecef22d97a591a4b022ff22c05613c536030440", "744109e3c147d5276391014364ed8beb90b054b7", "a8b16f43341adc956022bca26d3727ab8148b857", "84ef22df183ef4428882aa3a93369f994e0bc3ce", "7ea4242f6d047cf7b31c19def5b6a0944fd88992", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "053982a9c7c0a16c9b080f800013b945d1135069", "168488dc2088dc5a48e7c85e7fd487145d161223", "418e058c0dd22b18994ebdba8bd4713bf92588f7", "0cb0a2b5dac0972fa6388b2f31f76c89455a10db", "48f1145733cbcb1e438271572267ddde5b463702", "e8b16e99dd0b01bd897e11d58ecf4f8085755335", "099ac9a2c85e5ce992371a19b478ee5283f3b264", "4abdfb34d3511608bbd830c0549d2a3c929414b2", "369d707c6bdf17645a322d0e2bc610798bc8c1b8", "53852d69c008f9ebfb05939b4eb7c1f3279437e6", "04645e17a1acb783a2ffb2b9b201624c76d52ae2", "88dfa31abf1474407f2132ed911f52ec59b49ec6", "1558b1d92f41eb01c49ba9c548a9a5adfb1aebae", "2e1bd6af57a5717e84b56109d6c618cdaba67cc7", "c950df133c430d0091e6d231c04c5fdf5030ddca", "46f158aaace8a5cd2ba99320b43f201278b169cc", "6193c241ac38d4c8c6a43ca72f0c6910fb4455d2", "da2695f7ba0b56feccd9f4c3c2bad61c9881921a", "50310949e3ca1a725a8f5e827ab511cd8dbddbaa" ], "paperAbstract": "Text passwords---a frequent vector for account compromise, yet still ubiquitous---have been studied for decades by researchers attempting to determine how to coerce users to create passwords that are hard for attackers to guess but still easy for users to type and memorize. Most studies examine one password or a small number of passwords per user, and studies often rely on passwords created solely for the purpose of the study or on passwords protecting low-value accounts. These limitations severely constrain our understanding of password security in practice, including the extent and nature of password reuse, password behaviors specific to categories of accounts (e.g., financial websites), and the effect of password managers and other privacy tools.\n In this paper we report on an in situ study of 154 participants over an average of 147 days each. Participants' computers were instrumented---with careful attention to privacy---to record detailed information about password characteristics and usage, as well as man other computing behaviors such as use of security and privacy web browser extensions. This data allows a more accurate analysis of password characteristics and behaviors across the full range of participants' web-based accounts. Examples of our findings are that the use of symbols and digits in passwords predicts increased likelihood of reuse, while increased password strength predicts decreased likelihood of reuse; that password reuse is more prevalent than previously believed, especially when partial reuse is taken into account; and that password managers may have no impact on password reuse or strength. We also observe that users can be grouped into a handful of behavioral clusters, representative of various password management strategies. Our findings suggest that once a user needs to manage a larger number of passwords, they cope by partially and exactly reusing passwords across most of their accounts.", "pdfUrls": [ "https://acmccs.github.io/papers/p295-pearmanA.pdf", "https://blues.cs.berkeley.edu/wp-content/uploads/2018/01/sbo-passwords-2017.pdf", "https://www.ece.cmu.edu/~lbauer/papers/2017/ccs2017-password-reuse.pdf", "http://doi.acm.org/10.1145/3133956.3133973", "https://cups.cs.cmu.edu/~aforget/Pearman2017_CCS_SBOPasswords.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1be98efed329c080561820a39919222eb9d52bf2", "sources": [ "DBLP" ], "title": "Let's Go in for a Closer Look: Observing Passwords in Their Natural Habitat", "venue": "CCS", "year": 2017 }, "1beba39a3cf29d44a8c023a5898e5b26e38942f7": { "authors": [ { "ids": [ "2661238" ], "name": "Massimo Marchiori" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.52", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.52", "entities": [ "Division by two", "Point of sale", "Stock and flow" ], "id": "1beba39a3cf29d44a8c023a5898e5b26e38942f7", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "396-403", "journalVolume": "", "outCitations": [ "9f1492af0dcdadfbdf4cf49b47df74ada63f7ad5", "e842d546e571579fcab591f8851d6e1ae4dc913e", "f136528dbe05f61ceb73d7e5c260e337f63ef627", "97ab1f9e53d8ab7a487b42494debbf2b48376598", "6480443d877b1971c47c0a5672d27dc405b81a52", "21d4e5dbf8b411d7e8226bf5493f2170cbc05a1a", "082cb3c324ffb9b096088eba595d2dc46dc624d9", "9465f28df0acea37a3858581241d8467cdd3f1ba" ], "paperAbstract": "The Shopping Mall is in many countries a focus point for the lives of the people. It is the place where people go and shop, and in a certain sense it has become the modern equivalent of a square. Along the years, this attraction center has followed a definite evolutionary path, becoming wider, bigger, looking to maximize the comfort of the customers: a better place, for better sales. In this paper, we analyze the shopping mall under a different perspective, and check if this evolutionary line is in fact the best possible one, or if instead the mall could be shaped differently. We propose a structural modification of the mall (halving), and analyze its impact on the efficiency (the attractiveness of the shops). Counterintuitively, we find that this structural modification leads to a significant increase in the overall mall performance, a result that impacts the recent evolution of the mall, and that suggests that the mall as we know it might be far from being the best one, and some radical changes could be in order.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.52" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1beba39a3cf29d44a8c023a5898e5b26e38942f7", "sources": [ "DBLP" ], "title": "The Paradox of the Shopping Mall: Costumers Flows and Market Efficiency", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "1c0d0655cd0c7d93daa5faede81876034ac7c3ba": { "authors": [ { "ids": [ "2739832" ], "name": "Ioakeim Perros" }, { "ids": [ "3000659" ], "name": "Evangelos E. Papalexakis" }, { "ids": [ "34410258" ], "name": "Fei Wang" }, { "ids": [ "1771649" ], "name": "Richard W. Vuduc" }, { "ids": [ "3378372" ], "name": "Elizabeth Searles" }, { "ids": [ "1749483" ], "name": "Michael Thompson" }, { "ids": [ "1738536" ], "name": "Jimeng Sun" } ], "doi": "10.1145/3097983.3098014", "doiUrl": "https://doi.org/10.1145/3097983.3098014", "entities": [ "Algorithm", "Baseline (configuration management)", "Data mining", "Exploratory testing", "Scalability", "Sparse matrix", "Spartan", "Synthetic data" ], "id": "1c0d0655cd0c7d93daa5faede81876034ac7c3ba", "inCitations": [ "3db2c19485542f9c55e57ef1ed5657e47e84a4eb", "e9961fbe3261032e6fe4aeae57c436eb586f6bbd", "665d11f27707ba52eaa065ed6a5cadec592229ea", "682e83ac98510037b89fb76e46cdace7cfc76d53" ], "journalName": "", "journalPages": "375-384", "journalVolume": "", "outCitations": [ "bced963ba7966b0566bafa909914b857079cb816", "07ed71b436b9adf23f0f93c8e4533461b82e769a", "638e08e94843bc7264d97945e961ad8027cfc6b3", "3c9d7f13cf54105dae995ed53df87479818cbebc", "38b389580d774ce513284e671ff3bbcef0258de2", "a6a53b783ec3e01f91696b6ec846e3aac15f4a3d", "0ccec79dd844ebd593e861320f1f8427899584b7", "3cb3a6c2694ebdc5a74607a71e5e5a39391adbc0", "00f4c552db89fdfddc956ecb728f4f404d5b5df1", "012f7cd2f8e10f480d98a585b1583a8db39e3ff4", "f2bdc868e33937a52c519bdf13e51a7afffbc03c", "2e8ab628bc9f256c11c898aa44f049143c74d05d", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "348fddd3e469426b06fd1a4693a3f9b1e0bf30e6", "0a5aef2da6166c9b26ecc0a421f6bb5fd586ff97", "4b3643e5436a8b8430361e021a3c863765bab3fb", "0072eb224991ada6fc8a4e2d3465e4a51c0b26bc", "e9ba1e7f1a6cb2d66b7baf5c25cbc355a408838f", "96d122a51c124bc9b013a4504cef15d820cbf02a", "4f207372bc0d99bade54d5a97d814b249e4bebf0", "6ff715c841b0e7d0642649ecfb22a5c033414230", "0a180e1e8e5b07f4c361c62f7a1281d29f0248e4", "757cb62e3d1c0643c9f83bf57d45e427bd76e235", "5164f2ce39101171174e883d2ff402cb16af9212", "be1e8adcc39bd2a47d7b39267f1d78db64ff51b3", "d1ed2dfc6e8c16b44c5cd40d3a67f6f3b034d8e4", "1a513130558ff1127dfa5ea337bbe0995ac8ad21", "c5b28cae82b14417f1250e58bb241367248e827d", "53e28e1c1650133f1e78cb6d985ecf13530319e3" ], "paperAbstract": "In exploratory tensor mining, a common problem is how to analyze a set of variables across a set of subjects whose observations do not align naturally. For example, when modeling medical features across a set of patients, the number and duration of treatments may vary widely in time, meaning there is no meaningful way to align their clinical records across time points for analysis purposes. To handle such data, the state-of-the-art tensor model is the so-called PARAFAC2, which yields interpretable and robust output and can naturally handle sparse data. However, its main limitation up to now has been the lack of efficient algorithms that can handle large-scale datasets.\n In this work, we fill this gap by developing a scalable method to compute the PARAFAC2 decomposition of large and sparse datasets, called SPARTan. Our method exploits special structure within PARAFAC2, leading to a novel algorithmic reformulation that is both faster (in absolute time) and more memory-efficient than prior work. We evaluate SPARTan on both synthetic and real datasets, showing 22X performance gains over the best previous implementation and also handling larger problem instances for which the baseline fails. Furthermore, we are able to apply SPARTan to the mining of temporally-evolving phenotypes on data taken from real and medically complex pediatric patients. The clinical meaningfulness of the phenotypes identified in this process, as well as their temporal evolution over time for several patients, have been endorsed by clinical experts.", "pdfUrls": [ "https://arxiv.org/pdf/1703.04219v1.pdf", "http://www.cc.gatech.edu/~iperros3/pdf/kdd17.pdf", "http://doi.acm.org/10.1145/3097983.3098014", "https://www.cc.gatech.edu/~iperros3/pdf/kdd17.pdf", "http://arxiv.org/abs/1703.04219" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c0d0655cd0c7d93daa5faede81876034ac7c3ba", "sources": [ "DBLP" ], "title": "SPARTan: Scalable PARAFAC2 for Large & Sparse Data", "venue": "KDD", "year": 2017 }, "1c32a74001b390d385f6d977391b6c86ea0741d0": { "authors": [ { "ids": [ "1788117" ], "name": "Xiaosheng Li" }, { "ids": [ "2301135" ], "name": "Jessica Lin" } ], "doi": "10.1109/ICDM.2017.37", "doiUrl": "https://doi.org/10.1109/ICDM.2017.37", "entities": [ "Analysis of algorithms", "Archive", "Benchmark (computing)", "Data mining", "Dynamic time warping", "Time complexity", "Time series", "Whole Earth 'Lectronic Link" ], "id": "1c32a74001b390d385f6d977391b6c86ea0741d0", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "277-286", "journalVolume": "", "outCitations": [ "244e27bf8c5a5277a733c5271fdeb306f98115d5", "8dc22a89325105346ef9342c150873b4eb6a47ac", "3e0a271d9d5e2e42ed1212cf9e8a220de11633e1", "1ef345edfaff4e5f22c82efb01052de7f7f9fc9c", "6332a2f503d0ca33dc6403c5693d53aa93840ad1", "0028af619198dea14559e28973053df5261d8231", "0f7385035524cf74244d274401246c9aecaa90d6", "bd7b99b148d4c9163f24480de222ec651f8c1b0b", "a9a530fd3dae3df42ff72216fe0a8c05d6e60602", "5ed965000e6692f9c8a214c0d76e59e674e4e8ae", "161f03edf0f97bb3d8376eaa80fec09ee7cc677d", "99630eab37fe1bb3f917003e1169c2a2c4e74550", "c50510c8ccf2a24e98db1ca3b1d34e41a759d286", "85714714c18b71ad604e80b96cd67a5fc95c389d", "2666bfb6787da71ee1ab7170668097a3b05c1c65", "1ac57524ba2d2a69c1bb6defed7352a06fd7050d", "6b6d357fb4ef19f2330596183ce00d2f3797740d", "0152977431c1d7c76c539906cac1aa596a933374", "259c54fb0244e44f97d4218871f62deb64fa66f3", "6aad7940023151500673b45689d9e4a14349873c", "27db63ab642d9c27601a9311d65b63e2d2d26744", "b436b033856bc873ea8378666e764174722824f3", "037b3d3ddfded992dd068730f4b5d7e7793a780d" ], "paperAbstract": "Time series classification has attracted much attention due to the ubiquity of time series. With the advance of technologies, the volume of available time series data becomes huge and the content is changing rapidly. This requires time series data mining methods to have low computational complexities. In this paper, we propose a parameter-free time series classification method that has a linear time complexity. The approach is evaluated on all the 85 datasets in the well-known UCR time series classification archive. The results show that the new method achieves better overall classification accuracy performance than the widely used benchmark, i.e. 1-nearest neighbor with dynamic time warping, while consuming orders of magnitude less running time. The proposed method is also applied on a large real-world bird sounds dataset to verify its effectiveness.", "pdfUrls": [ "https://cs.gmu.edu/~jessica/publications/BOPF_ICDM17.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c32a74001b390d385f6d977391b6c86ea0741d0", "sources": [ "DBLP" ], "title": "Linear Time Complexity Time Series Classification with Bag-of-Pattern-Features", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "1c32b6ce44bd647090cf64e6e82c421c4c0c2f35": { "authors": [ { "ids": [ "36417289" ], "name": "Ada Lerner" }, { "ids": [ "1769675" ], "name": "Tadayoshi Kohno" }, { "ids": [ "3268360" ], "name": "Franziska Roesner" } ], "doi": "10.1145/3133956.3134042", "doiUrl": "https://doi.org/10.1145/3133956.3134042", "entities": [ "Archive", "Rewriting", "Vulnerability (computing)", "Wayback Machine", "Web archiving", "Web content" ], "id": "1c32b6ce44bd647090cf64e6e82c421c4c0c2f35", "inCitations": [ "ee1e8218fc8e0e591c96dc7a09768f41813e4e31", "8152e621012339cd950dd082ff711b6f1e325f1b", "6de85ece29a68d4226654f2ca40081daeb8977c4" ], "journalName": "", "journalPages": "1741-1755", "journalVolume": "", "outCitations": [ "3208feae829cba6bd319421fe1fea58962da8fd9", "db37a6c2668c82e80a765d92f272acba778903a4", "2f95e2ca11610cb334d8d777d7b0f0d5561e67bc", "0d2f693901fba451ede4d388724b0e3f57029cd3", "1bb336ebcd6c8fb2f25c2c93280b2cce9e5905c6", "5a032460c589a67e7c73b19c93aa591331758139", "05ad6c3ab7a0b1ab0c4fc3af9f1622cf6c0fa40e", "51b0ce84988e083d6253af098542f905e1fea0a8", "17eceec10a0f5f3a3b2ce99309009bfb2e9ef389", "5212ebce9982b9eda2ebdfade98cd9577be4c9ca", "07fe6891a148456e4e55d38b5f7667cbb5ee686e", "3cf93e3a355eb1d7bf87469782bbf467db3d84e8", "554245e0b95451d26ec7afd4afae9893031c051f", "268a89592d9669e32c99bc71a3a24cd12e0da24f" ], "paperAbstract": "The Internet Archive's Wayback Machine is the largest modern web archive, preserving web content since 1996. We discover and analyze several vulnerabilities in how the Wayback Machine archives data, and then leverage these vulnerabilities to create what are to our knowledge the first attacks against a user's view of the archived web. Our vulnerabilities are enabled by the unique interaction between the Wayback Machine's archives, other websites, and a user's browser, and attackers do not need to compromise the archives in order to compromise users' views of a stored page. We demonstrate the effectiveness of our attacks through proof-of-concept implementations. Then, we conduct a measurement study to quantify the prevalence of vulnerabilities in the archive. Finally, we explore defenses which might be deployed by archives, website publishers, and the users of archives, and present the prototype of a defense for clients of the Wayback Machine, ArchiveWatcher.", "pdfUrls": [ "http://www.franziroesner.com/pdf/Lerner-RewritingHistory-CCS17.pdf", "https://repository.wellesley.edu/cgi/viewcontent.cgi?article=1158&context=scholarship", "http://doi.acm.org/10.1145/3133956.3134042" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c32b6ce44bd647090cf64e6e82c421c4c0c2f35", "sources": [ "DBLP" ], "title": "Rewriting History: Changing the Archived Web from the Present", "venue": "CCS", "year": 2017 }, "1c4226f53c87db52af3808731a99dec83b997ab4": { "authors": [ { "ids": [ "3472043" ], "name": "Hyunwook Baek" }, { "ids": [ "1751513" ], "name": "Cheng Jin" }, { "ids": [ "1791767" ], "name": "Guofei Jiang" }, { "ids": [ "2470254" ], "name": "Cristian Lumezanu" }, { "ids": [ "2358499" ], "name": "Jacobus E. van der Merwe" }, { "ids": [ "2501678" ], "name": "Ning Xia" }, { "ids": [ "37377995" ], "name": "Qiang Xu" } ], "doi": "10.1145/3127479.3129258", "doiUrl": "https://doi.org/10.1145/3127479.3129258", "entities": [ "Approximation error", "Data center", "Java HotSpot Virtual Machine", "Multitenancy", "Sampling (signal processing)", "Traffic exchange" ], "id": "1c4226f53c87db52af3808731a99dec83b997ab4", "inCitations": [], "journalName": "", "journalPages": "309-322", "journalVolume": "", "outCitations": [ "138856ad6b8b4cca92965aacb20961aaa4e34a92", "5f4188f380bd2b7c16773e0e6b69a004a072441b", "58a17426e5b999634c2c8df8767095ad1ded7a7d", "b35c49180b6895c20cf1b0f3fd44fc76d4f49319", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "5e0a980d0cffc07dbff986b82a155f241dca3344", "399ecee43b56a4ae27d0d87ac42874116d05990a", "cfc82bce122399dae9868441c63b07f01127ea23", "098f2ebd339896a4eafc94495aecd54746fa2504", "2197cbec1c84d7fc8e42b95ba9a6bfe17f1b65ca", "69048c769ed75ae01679b40ca697a6ea4b378db8", "090599a2caf4591c87699ad850c75554cd712937", "66ca052a1977975db95c6c3d9fd881d87d893692", "4650259fb4aadb376fd5994f9ab9dd07a4f83511", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "067bd9d975b132dc668013895a5e4298623feebd", "f427015146fbae7b93207ae63b578472b85cb03f", "e551ab67157ea166f94786bbe7c1fe9fcbeb2757", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "0f318aa5af40450af9ba2f50872bdf26741e510a", "a81823711db57bac90f135bece07e647aee51e5f", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "cd642576ce8502b533e229b537f9ffbe9254aef6", "0706225eeac0f855b19c365313db61252ecde0d7", "0cfe1b85e5f1d56e41a95c3b2fa274e9fe8b45d0", "0c484c5d9e0a13c2bf195c102e2908928c94d9a3", "58f692e9b03cb973355aab46bb6f867239aeb513", "0849e21a444d4a3bbea735a788628bea5543f900" ], "paperAbstract": "Network usage accountability is critical in helping operators and customers of multi-tenant data centers deal with concerns such as capacity planning, resource allocation, hotspot detection, link failure detection, and troubleshooting. However, the cost of measurements and instrumentation to achieve flow-level accountability is non-trivial. We propose Polygravity to determine tenant traffic usage via lightweight measurements in multi-tenant data centers. We adopt a tomogravity model widely used in ISP networks, and adapt it to a multi-tenant data center environment. By integrating datacenter-specific domain knowledge, sampling-based partial estimation and gravity-based internal sinks/sources estimation, Polygravity addresses two key challenges for adapting tomogravity to a data center environment: sparse traffic matrices and internal traffic sinks/sources. We conducted extensive evaluation of our approach using realistic data center workloads. Our results show that Polygravity can determine tenant IP flow usage with less than 1% average relative error for tenants with fine-grained domain knowledge. In addition, for tenants with coarse-grained domain knowledge and with partial host-based sampling, Polygravity reduces the relative error of sampling-based estimation by 1/3.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129258", "http://www.cs.utah.edu/~baekhw/assets/polygravity.pdf", "http://www.flux.utah.edu/download?uid=262" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c4226f53c87db52af3808731a99dec83b997ab4", "sources": [ "DBLP" ], "title": "Polygravity: traffic usage accountability via coarse-grained measurements in multi-tenant data centers", "venue": "SoCC", "year": 2017 }, "1c42383a51b2832d842df418fc1c35023b6bbd6d": { "authors": [ { "ids": [ "30521811" ], "name": "Hosein Mohammadi Makrani" }, { "ids": [ "1747542" ], "name": "Houman Homayoun" } ], "doi": "10.1109/IISWC.2017.8167751", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167751", "entities": [ "Big data", "Brute-force search", "Central processing unit", "Clock rate", "Cloud computing", "Data rate units", "Internet bottleneck", "Memory hierarchy", "Multi-core processor", "Requirement", "Run time (program lifecycle phase)", "Scalability", "Server (computing)", "System configuration" ], "id": "1c42383a51b2832d842df418fc1c35023b6bbd6d", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "2-11", "journalVolume": "", "outCitations": [ "053825c0a1c111e76c18f28b6d8ae13b414f3bed", "76116aa12334141b48abc71024613d4280724d39", "76cea12429afa30f17064e2f343597d10a5efb06", "4c94cf9c41aa971a2a01b6d5108299036d0d891d", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "1bed9dbc346fcc7c39ac42c8a3be089a76f4d11a", "c3f443d86a5f1b34c4cbcbf18ec6c08a37d5a649", "6ea670c7deabcf9f0a516a5b89049f1febfbbe38", "9d182554b9c2b20569763c0ff4cd18a97bce923c", "1bed30d161683d279780aee34619f94a860fa973", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "243dcd92c3892f85f0177700a1801add9b32d5b2", "dc19eefd5e13601fa132aa2d8b2a4cdd12351a90", "09483e24a562eb423db73946cf20ab3b324be77e", "1c437e8220d4122476d3a1ea0ca2debc4871aa76", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "00de82b9bbc7528ca6d089cb69f01a4fd3d64301", "581b0e29991ffd8396e2d91b9c53ad483e72d9b8", "41fca6c199464c983cb6384ae65c83eb7522fb46", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "32aa639d91607c10520d163726dbafbcd79ea46e", "55b3e22b56599ed8520deb1d7cb9ac460f4fa6bb", "de0a9dc7edcb717711cc743165329791d6d5d0e2", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "03d6143cffcc8cf96abf78d23e74cb1083f54d1b", "5b9c21826a213857fbfe91f42ee423e0ad0af32b", "a751f6f93c46d72a4f8201a0a5df3432243a0991", "d63e4cada8347686372d63a3d00afa89a1515a31", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "00ecfb48c72709de7ea719e658b2a37301136cda", "a694f3583ca033904d9d3073d3693feac585a2bb", "4080cc361e61a3a1145b47c407e4fce88aa280f6", "358ce90c730bbd72c7a94c09ac05677ddfcca492", "9700c76ee9dd6360d8fa366f983a7e7e5a1e9b9a", "aa611707c5ea9d30eba199570e15a27fa422406c", "6d5823a8b19ec5e64ab3ab97fccd0b7ee070baec", "0c978f712efd21b0bdb38d5839063eed639d9e7d" ], "paperAbstract": "Scale-out infrastructure such as Cloud is built upon a large network of multi-core processors. Performance, power consumption, and capital cost of such infrastructure depend on the overall system configuration including number of processing cores, core frequency, memory hierarchy and capacity, number of memory channels, and memory data rate. Among these parameters, memory subsystem is known to be one of the performance bottlenecks, contributing significantly to the overall capital and operational cost of the server. Also, given the rise of Big Data and analytics applications, this could potentially pose an even bigger challenge to the performance of cloud applications and cost of cloud infrastructure. Hence it is important to understand the role of memory subsystem in cloud infrastructure and in particular for this emerging class of applications. Despite the increasing interest in recent years, little work has been done in understanding memory requirements trends and developing accurate and effective models to predict performance and cost of memory subsystem. Currently there is no well-defined methodology for selecting a memory configuration that reduces execution time and power consumption by considering the capital and operational cost of cloud. In this paper, through a comprehensive real-system empirical analysis of performance, we address these challenges by first characterizing diverse types of scale-out applications across a wide range of memory configuration parameters. The characterization helps to accurately capture applications' behavior and derive a model to predict their performance. Based on the developed predictive model, we propose MeNa, which is a methodology to maximize the performance/cost ratio of scale-out applications running in cloud environment. MeNa navigates memory and processor parameters to find the system configuration for a given application and a given budget, to maximum performance. Compared to brute force method, MeNa achieves more than 90% accuracy for identifying the right configuration parameters to maximize performance/cost ratio. Moreover, we show how MeNa can be effectively leveraged for server designers to find architectural insights or subscribers to allocate just enough budget to maximize performance of their applications in cloud", "pdfUrls": [ "http://ece.gmu.edu/~hhomayou/files/iiswc2017-2017-Hosein-2.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167751" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c42383a51b2832d842df418fc1c35023b6bbd6d", "sources": [ "DBLP" ], "title": "MeNa: A memory navigator for modern hardware in a scale-out environment", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "1c68a88cae1e486e14925637a3acb82164927ac5": { "authors": [ { "ids": [ "2030976" ], "name": "Adam Langley" }, { "ids": [ "32532680" ], "name": "Alistair Riddoch" }, { "ids": [ "33029528" ], "name": "Alyssa Wilk" }, { "ids": [ "1770954" ], "name": "Antonio Vicente" }, { "ids": [ "2645955" ], "name": "Charles Krasic" }, { "ids": [ "1729537" ], "name": "Dan Zhang" }, { "ids": [ "1752128" ], "name": "Fan Yang" }, { "ids": [ "22633977" ], "name": "Fedor Kouranov" }, { "ids": [ "2413810" ], "name": "Ian Swett" }, { "ids": [ "2250153" ], "name": "Janardhan R. Iyengar" }, { "ids": [ "40555475" ], "name": "Jeff Bailey" }, { "ids": [ "31935518" ], "name": "Jeremy Dorfman" }, { "ids": [ "3220319" ], "name": "Jim Roskind" }, { "ids": [ "34573933" ], "name": "Joanna Kulik" }, { "ids": [ "2499360" ], "name": "Patrik Westin" }, { "ids": [ "2407516" ], "name": "Raman Tenneti" }, { "ids": [ "3931866" ], "name": "Robbie Shade" }, { "ids": [ "2717420" ], "name": "Ryan Hamilton" }, { "ids": [ "12618889" ], "name": "Victor Vasiliev" }, { "ids": [ "5131817" ], "name": "Wan-Teh Chang" }, { "ids": [ "22640899" ], "name": "Zhongyi Shi" } ], "doi": "10.1145/3098822.3098842", "doiUrl": "https://doi.org/10.1145/3098822.3098842", "entities": [ "Ecosystem", "Encryption", "Experiment", "HTTPS", "Internet", "Iteration", "Multiplexing", "Software deployment", "Streaming media" ], "id": "1c68a88cae1e486e14925637a3acb82164927ac5", "inCitations": [ "23289d02c588bf3652b36bad86ebdd0159b93d8f", "c5c58af475d35c0b880f7a34145bd615cbd6ad7a", "29b29e1ec1491e1f91a1d967cf2324888d4176ac", "5e0077867ea32cfadaa8885206635348cf2f9b64", "715f202fc11670ea3063d0e55d61ca3777814cc2", "cd5fdc7ea21293acb52a7af34e01217d54b1c39e", "7cb20916c6f3cc7c552f2e4cf15801b9a60f4dab", "3a8e2eb20f987e518458386dcd44fd8d11df936d", "26f56d7a326bce3961d50bde268590f8c562d883", "f829bb1adffb7d4eed8918a6cf420e8dd43c6a97", "45cd38b6e3a1915c590edeb5af68b4f12e695fb6", "0457c76af0aa3d0586e3fdd6ece7ea6fda65b7da", "588c005210b2173131844086be5c9048c18a44c1", "f8b094e57d45c8767dd5a54e2baa7bbebb0d488b" ], "journalName": "", "journalPages": "183-196", "journalVolume": "", "outCitations": [ "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "5bb41f53394645cfce0db1bbf2bfcf8f1e3d4c86", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "0e180dbf4edbfd4360d379a68e649262341c35dd", "13d0784d6cb5f4ef4326c92a576525b3973f2c32", "3f200c41618d0c3d75c4cd287b4730aadcf596f7", "67e6d5e7478c4a7c44f4755f1a0ce160a768f94f", "33d97b125bc69c5b472f34eeeb17fbf4006c1335", "02adbca269b534eed78dfdb8e52b45b86894a406", "627c0d36688b2252ae3ca0b5f68ce97e341d338d", "78ac0eca5dbb865f24bc9669731eca492199f1e2", "3b2175e6ad4cd33da09d6346f04822345f60a385", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "f9844318b2ec3b1bf5399e54616dc95de3a33dc2", "4d1aa7547f0e8e9d76ba683983250b77aabd5b12", "2d4906884bc5309f1539195ff5b181d41a15ff60", "2d8a132fd622b6b8e46507911f7ab24cbd37e667", "25ded9f81378f6b85daf5a70c85bbadfb84ebc3d", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "c790c036ba874be05c0c9ee29187811070d67f28", "70439de84a56f242290362c6731f6d8bf9b98018", "29c324788b83463aa707784210edbca894694f20", "33b2464e3ae11b55f06685042fc63118fd34b83e", "14ea6606bd1ec5ad380734f8019c507d0851f18d", "b49d33054f525c24ffe1b9a19183fd34012d18bb", "0edd07551910c48f90fa07f7c5da50c8211fb994", "680c4a69a063c2b7c26df3a0ff717d7a61ad9e08", "16fb3aa11a32da5c3ef1e96c5408dcadca727d94", "295c704eaa9056a29604cde206ed0e79cf2a147d", "6fede3d74506ae4e81af2441a44e8aa8881ec6e2", "4da10dc04da8f59b018cb5f64d0b6c950b59309b", "93498ee275de0b3dd04bb43258c950291894ff69", "0f8ccb1bdba17f8c0843be0eb5fda2e8e2e32e95" ], "paperAbstract": "We present our experience with QUIC, an encrypted, multiplexed, and low-latency transport protocol designed from the ground up to improve transport performance for HTTPS traffic and to enable rapid deployment and continued evolution of transport mechanisms. QUIC has been globally deployed at Google on thousands of servers and is used to serve traffic to a range of clients including a widely-used web browser (Chrome) and a popular mobile video streaming app (YouTube). We estimate that 7% of Internet traffic is now QUIC. We describe our motivations for developing a new transport, the principles that guided our design, the Internet-scale process that we used to perform iterative experiments on QUIC, performance improvements seen by our various services, and our experience deploying QUIC globally. We also share lessons about transport design and the Internet ecosystem that we learned from our deployment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098842", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46403.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c68a88cae1e486e14925637a3acb82164927ac5", "sources": [ "DBLP" ], "title": "The QUIC Transport Protocol: Design and Internet-Scale Deployment", "venue": "SIGCOMM", "year": 2017 }, "1c69aace68cbd7648301b0337054e2c4bc6d4915": { "authors": [ { "ids": [ "20829764" ], "name": "Zhiyuan Ai" }, { "ids": [ "4408986" ], "name": "Mingxing Zhang" }, { "ids": [ "1725574" ], "name": "Yongwei Wu" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" }, { "ids": [ "1680073" ], "name": "Kang Chen" }, { "ids": [ "2225511" ], "name": "Weimin Zheng" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Experiment", "Graph (abstract data type)", "Iteration", "Locality of reference", "Out-of-core algorithm", "Programming model", "Speedup" ], "id": "1c69aace68cbd7648301b0337054e2c4bc6d4915", "inCitations": [ "2f9b520b16e05de8e705e374ebea3a121d64ebc2", "3b57c7bcece47f2a3198e6adec38f712f2914be5", "0231ffa4b9b095efbf0f302898cd7abd7dd0b764" ], "journalName": "", "journalPages": "125-137", "journalVolume": "", "outCitations": [ "141b705b2fc28e4a56349996a019d29f1fddf63e", "2d834402cd31dc6cafa23ed098a13cde7082d6af", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "f7f4136512d2d40ba455f161e64a31cdb099b9ae", "3231d62bec8e8cc1d837e85893889855767c3b13", "2b9e6181502369199bd89691a27f89bdbaac36e4", "8e67d1085da29e5aa1e758751bfa5469ac07023e", "c9bb3728b1e2afe09def0733caffcb99a68baef3", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "0706356c9ab6014d6b04577d38289ea8328291a5", "1156f60e40548096df49528b1342bb3e88b0f378", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "04311b15b444a0f75ea2bb74fca26cc1aefbf3c1", "520a5eac744506da883b1ca87205f70f2d163e98", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "0597d7259c0a61fa13f2f9aa852525d127a7fe16", "131ec93c0751b6cdeeff4a5d62a7e4810d06f0de", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "6cab78d47b96eed942110673f5749d2f8c8b3c59", "2a2eb0e00483288a8b3d2b561dd98e013c5c0275", "3b874ce8d1fedd7f1f31a3c5ec495f4907b59da7", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "1ef8c8c815b7268d7f7d4fe76af78aaa8df3e6da", "8ffd906129ad079e1df379a0be17d3f9f0d80b9c", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "22ca3a0e21139e24c64627c35490bebceef26d59", "12c725e60092b3e5b296066dd71eda704b427926", "b513711621e81d0abd042e0877ca751581a993f5", "95e0a5be5d39b5dc820f3d8a21bc94da022760df", "05370a6cc820ffe5393fcc948d7d600b5949a217", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "26deee037b221bd05ed34461819f5c067b745445", "0ad8e89091eed09217e66adc98136126addc2619", "2ae3ac3f7463f838c38e6ca250ca294e813529f2" ], "paperAbstract": "The current primary concern of out-of-core graph processing systems is improving disk I/O locality, which leads to certain restrictions on their programming and execution models. Although improving the locality, these constraints also restrict the expressiveness. As a result, only sub-optimal algorithms are supported for many kinds of applications. When compared with the optimal algorithms, these supported algorithms typically incur sequential, but much larger, amount of disk I/O. In this paper, we explore a fundamentally different tradeoff: less total amount of I/O rather than better locality. We show that out-of-core graph processing systems uniquely provide the opportunities to lift the restrictions of the programming and execution model (e.g., process each loaded block at most once, neighborhood constraint) in a feasible manner, which enable efficient algorithms that require drastically less number of iterations. To demonstrate the ideas, we build CLIP, a novel out-ofcore graph processing system designed with the principle of \u201csqueezing out all the value of loaded data\u201d. With the more expressive programming model and more flexible execution, CLIP enables more efficient algorithms that require much less amount of total disk I/O. Our experiments show that the algorithms that can be only implemented in CLIP are much faster than the original disklocality-optimized algorithms in many real-world cases (up to tens or even thousands of times speedup).", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-ai.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/ai", "http://alchem.usc.edu/portal/static/download/atc2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1c69/aace68cbd7648301b0337054e2c4bc6d4915.pdf", "s2Url": "https://semanticscholar.org/paper/1c69aace68cbd7648301b0337054e2c4bc6d4915", "sources": [ "DBLP" ], "title": "Squeezing out All the Value of Loaded Data: An Out-of-core Graph Processing System with Reduced Disk I/O", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "1c7c4a91e6fed536b35dc4ea6c8c781149563852": { "authors": [ { "ids": [ "1811289" ], "name": "Johanna Amann" }, { "ids": [ "37349739" ], "name": "Oliver Gasser" }, { "ids": [ "2615819" ], "name": "Quirin Scheitle" }, { "ids": [ "40636656" ], "name": "Lexi Brent" }, { "ids": [ "2902874" ], "name": "Georg Carle" }, { "ids": [ "16590481" ], "name": "Ralph Holz" } ], "doi": "10.1145/3131365.3131401", "doiUrl": "https://doi.org/10.1145/3131365.3131401", "entities": [ "Certificate Transparency", "DNS Certification Authority Authorization", "DNS-based Authentication of Named Entities", "Downgrade", "Ecosystem", "HTTP Public Key Pinning", "HTTP Strict Transport Security", "HTTPS", "Internet", "Man-in-the-middle attack", "Poor posture", "Public key infrastructure", "Software deployment", "Transport Layer Security" ], "id": "1c7c4a91e6fed536b35dc4ea6c8c781149563852", "inCitations": [ "32187449ad863fa01597b1a857ab5dc8677769cc", "ee75f05092768d372c72cb215006c551a5da7a8e" ], "journalName": "", "journalPages": "325-340", "journalVolume": "", "outCitations": [ "23eb53170c6de9ff5024db120eda200816fa803f", "13315d952a43c391bf4910271fc2582858e86e9e", "08026d939ac1f30951ff7f4f7c335bf3fef47be4", "06c87865bc8f19df60db5c37e504146b0735255a", "73209bd8fbd87e50a6d6396808fe08c33a64bd6a", "1fce7fc0be60d47a7b10644e38e378a6673e8bc3", "7a78dbb8ceb0200add67dbe6479685bbde7e77ab", "73a4b40d75a73cb692d7309ecc84086f6ad0ce7d", "8cdbab26fa0dee8f165b6680e59e8966679fd068", "6c5395868a818c6f414c653a30376461240bd366", "67f8f4962cd52ed81742e66991151d361cf7078a", "09f6e5f085965ccf8316c4450554b0c8db3d48cb", "1eea9f527d7902748b14b807e7d544d933734ce6", "54bdf79b699c26dab5047186d533ba10f1fe22a8", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "8eb774f27838a092d7a8692e5b7837f4ef58a42b", "84037ccdd1b5eacb404153194792c026b692a63f", "3c338bb3dcc10b7c840b4dbf3ad32e8256313ee3", "8eed5a9e15c54026a670c66207f14d0393f3df6b", "1e4ce85958bdb47f1b31802b6517334e6e26db7b", "1f0665485f7fbc06675c981866efab2c4ccbcdd4", "76a25aa39186c3dc2a3c90acb52f75a9498c5012", "ee5c1eb9410e64ca4b58036dd6746a02d32c7874", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "744159a1774cafade8c6726e33dd8741b7d27dd0", "0641830054d30adf5c115adc0fd369f3ecdc6d73", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "1684b97e72f2ce8cb47cadaf09287564df78a01f", "9f4fe4aa24f49ba66c9d5f80733b76fcac158fd7", "78da946e9b1d1156ecf209044bff05e9b3e92ccf", "0e38d1e8e6e2fb986c6e8a15cae93d6ea87cf848", "01c5cf8ec8377d4b551d52e94873530c7413293a", "dfb0ad47be3046e0c8a973bc46df70883006dce1", "79bd38c9e0d04ac58f23da6e3ce12b241db1260f", "08e9542de3cbfe791bf86a0dee6ba5e83bc29ea7", "2b6ce083906634e3c3b084e4c9139fb58f082df6", "07045bed2f4d3efc649f3665e84feb895f27fede" ], "paperAbstract": "Driven by CA compromises and the risk of man-in-the-middle attacks, new security features have been added to TLS, HTTPS, and the web PKI over the past five years. These include Certificate Transparency (CT), for making the CA system auditable; HSTS and HPKP headers, to harden the HTTPS posture of a domain; the DNS-based extensions CAA and TLSA, for control over certificate issuance and pinning; and SCSV, for protocol downgrade protection.\n This paper presents the first large scale investigation of these improvements to the HTTPS ecosystem, explicitly accounting for their combined usage. In addition to collecting passive measurements at the Internet uplinks of large University networks on three continents, we perform the largest domain-based active Internet scan to date, covering 193M domains. Furthermore, we track the long-term deployment history of new TLS security features by leveraging passive observations dating back to 2012.\n We find that while deployment of new security features has picked up in general, only SCSV (49M domains) and CT (7M domains) have gained enough momentum to improve the overall security of HTTPS. Features with higher complexity, such as HPKP, are deployed scarcely and often incorrectly. Our empirical findings are placed in the context of risk, deployment effort, and benefit of these new technologies, and actionable steps for improvement are proposed. We cross-correlate use of features and find some techniques with significant correlation in deployment. We support reproducible research and publicly release data and code.", "pdfUrls": [ "http://ralphholz.science/publications/MissionAccomplishedHttpsSecurityAfterDiginotar.pdf", "http://www.icir.org/johanna/papers/imc17httpssecurity.pdf", "https://conferences.sigcomm.org/imc/2017/slides/imc17_1.pdf", "http://doi.acm.org/10.1145/3131365.3131401" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c7c4a91e6fed536b35dc4ea6c8c781149563852", "sources": [ "DBLP" ], "title": "Mission accomplished?: HTTPS security after diginotar", "venue": "IMC", "year": 2017 }, "1c860ede7e14e4cd9210cbad8d0c8619673f87ca": { "authors": [ { "ids": [ "28981429" ], "name": "Guilherme Cox" }, { "ids": [ "21975961" ], "name": "Abhishek Bhattacharjee" } ], "doi": "10.1145/3037697.3037704", "doiUrl": "https://doi.org/10.1145/3037697.3037704", "entities": [ "CPU cache", "Central processing unit", "File Allocation Table", "Graphics processing unit", "MIX", "Memory protection", "Mix network", "Operating system", "Page (computer memory)", "Translation lookaside buffer" ], "id": "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "inCitations": [ "aea4c2741f5b173a03319d2aa9534b16045b70c9", "6ff08854494ec866510cbb23fb0e18c1f977007e", "65c302fc5eedfb33824ef18879eb53cc0327ea41", "8219bf467b82208a98aa7b45e67f35ed740b979f", "d1fcc29063f09305969a678313ddba7e9f4f6e9c", "5389fccd8e6679331eb4042d34f53ca8af3b9f5e", "1171eecf13da9beb57b06a3c6e8a323e89b8e1ff", "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "0581754e392d4a648f6a7b7665e3561df8627157", "09da9a22e89c5e3a2e6e9f1995fc6cd2b7e92a0b", "9320d0a0b7735afb46adc4574443a580951f778e" ], "journalName": "", "journalPages": "435-448", "journalVolume": "", "outCitations": [ "5fff6219bc66df34ef8dbdf00ae6848f69583883", "5389fccd8e6679331eb4042d34f53ca8af3b9f5e", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "533d720a8542b707c316d39cf5beeb58738af86d", "5ece19ddc8abc5454426deece280d0750972c2da", "028c2ca1fddbb8919e7fa28e6cce7b3bce0f88f0", "35c3882db9e1b2bdf838122787968679595f61de", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "d875686d4b910315859db0bc477875cc8d1c1acd", "2bfdc5c3d3da06843ecc0b8d04e54acff419487a", "014ba063a3721973ba6af6503232d4d21d1456bb", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "f3e9db1605922044ca4506dbee54841caf821a0a", "0653e2ed9f683868cb4539eb8718551242834f6b", "45f9391ba46daa4b119063f4d5077a1b7f7fde75", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "671958087f3c24e7b025019476be8918302270e2", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "956886e5d439f35864bb9ea0ea89e29932330b2d", "bd1b8a27acb66ade711f08e59bb11f50593f3f21", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "33196b69eeec351efd5178eae5da92979bdc6fd7", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "28af524636137424ad574afa38463b4771e6f006", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "262b171901ad2668a77897dcc7ddea8a2efe9e7c", "4bce8e7c13331dbffa05d6cfc086efd04e0317a9", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "2637871fde2a6393e8c63c924721bdc6303a2e70", "3364bc50921a9566d61ef8cb73baa82341725e4b", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "0a0bf9e017e05d58b85e793e58148d2946259a74", "2a59eb5eacb88eb893a31fc8bdee2c4385e22d7a", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "2a660e81e6501ec3489d962fe87448ecf277237f", "5f3e6fd1668e0732c91ff72061ece073224f1745", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "211f2beaaf36bb6a920a63dbbef6842cb1d22468" ], "paperAbstract": "Processors and operating systems (OSes) support multiple memory page sizes. Superpages increase Translation Lookaside Buffer (TLB) hits, while small pages provide fine-grained memory protection. Ideally, TLBs should perform well for any distribution of page sizes. In reality, set-associative TLBs -- used frequently for their energy efficiency compared to fully-associative TLBs -- cannot (easily) support multiple page sizes concurrently. Instead, commercial systems typically implement separate set-associative TLBs for different page sizes. This means that when superpages are allocated aggressively, TLB misses may, counter intuitively, increase even if entries for small pages remain unused (and vice-versa). We invent MIX TLBs, energy-frugal set-associative structures that concurrently support all page sizes by exploiting superpage allocation patterns. MIX TLBs boost the performance (often by 10-30%) of big-memory applications on native CPUs, virtualized CPUs, and GPUs. MIX TLBs are simple and require no OS or program changes.", "pdfUrls": [ "https://www.cs.rutgers.edu/~abhib/gcox-asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037704", "http://paul.rutgers.edu/~gc372/dw/gcox-asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "sources": [ "DBLP" ], "title": "Efficient Address Translation for Architectures with Multiple Page Sizes", "venue": "ASPLOS", "year": 2017 }, "1c9be5ee1aace2562220b1040a1133678d9a0b32": { "authors": [ { "ids": [ "32817044" ], "name": "Samyam Rajbhandari" }, { "ids": [ "1772774" ], "name": "Yuxiong He" }, { "ids": [ "2537545" ], "name": "Olatunji Ruwase" }, { "ids": [ "1701041" ], "name": "Michael Carbin" }, { "ids": [ "3191220" ], "name": "Trishul M. Chilimbi" } ], "doi": "10.1145/3037697.3037745", "doiUrl": "https://doi.org/10.1145/3037697.3037745", "entities": [ "Artificial neural network", "Central processing unit", "Computation", "Convolution", "Convolutional neural network", "Goodput", "Natural language", "Natural language processing", "Outline of object recognition", "Parallel computing", "Pattern recognition", "Program optimization", "Scalability", "Scheduling (computing)", "Sparse matrix", "Speech recognition" ], "id": "1c9be5ee1aace2562220b1040a1133678d9a0b32", "inCitations": [ "626f7c268b68a0955f9c7c6cfc2edff4d2e3291f" ], "journalName": "", "journalPages": "267-280", "journalVolume": "", "outCitations": [ "1262176518bb210bd46f120d3782f1677af180cd", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "132e3d3b5cfc2f59db6ed69ac1eac4a1ee6dca71", "14a477cf712ad5647180e6233dd0638c6c269fdd", "12e31accd9a9acc902706728c4c8258925366b3d", "34f25a8704614163c4095b3ee2fc969b60de4698", "209932cd2e3f5da071c4f6341a3b8b29cf50cc4a", "ba259c68719b8f43ead1d23c04e5389d12ea4dad", "2d83ba2d43306e3c0587ef16f327d59bf4888dc3", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "769d75e9cb010b76ba412d9654cf43c4edf15076", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "021fc345d40d3e6332cd2ef276e2eaa5e71102e4", "80d800dfadbe2e6c7b2367d9229cc82912d55889", "24a59d6746fec5ff5ead37041c8d03bce3369a9a", "6074c1108997e0c1f97dc3c199323a162ffe978d", "944a5c99260e96926c3ebe363114e080e81bf8a0", "398c296d0cc7f9d180f84969f8937e6d3a413796", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "1bec73cbc0ffb8eb32d6da63895f5319b7409386", "0934508c768ff8ba9744678ad92e51dfdbd5f122", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "1a07186bc10592f0330655519ad91652125cd907", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "043afbd936c95d0e33c4a391365893bd4102f1a7", "02c39e76d12154df1ab6372687814fb7e20d3601", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "3087eeb39c88b1fc9bdc72812930451fc98cedec", "4a2d7bf9937793a648a43c93029353ade10e64da", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "f5f1beada9e269b2a7faed8dfe936919ac0c2397", "7b1cc19dec9289c66e7ab45e80e8c42273509ab6", "4788873f23fbfbca24744f0fa0d8e602c9403fba", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "061356704ec86334dbbc073985375fe13cd39088", "2ffc74bec88d8762a613256589891ff323123e99", "2f45a46072455fbc52f781efea98c669ad7a6658", "2cc157afda51873c30b195fff56e917b9c06b853", "a7621b4ec18719b08f3a2a444b6d37a2e20227b7", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "0122e063ca5f0f9fb9d144d44d41421503252010" ], "paperAbstract": "Convolutional Neural Networks (CNN) are a class of Ar- tificial Neural Networks (ANN) that are highly efficient at the pattern recognition tasks that underlie difficult AI prob- lems in a variety of domains, such as speech recognition, object recognition, and natural language processing. CNNs are, however, computationally intensive to train. This paper presents the first characterization of the per- formance optimization opportunities for training CNNs on CPUs. Our characterization includes insights based on the structure of the network itself (i.e., intrinsic arithmetic inten- sity of the convolution and its scalability under parallelism) as well as dynamic properties of its execution (i.e., sparsity of the computation).\n Given this characterization, we present an automatic framework called spg-CNN for optimizing CNN training on CPUs. It comprises of a computation scheduler for efficient parallel execution, and two code generators: one that opti- mizes for sparsity, and the other that optimizes for spatial reuse in convolutions.\n We evaluate spg-CNN using convolutions from a variety of real world benchmarks, and show that spg-CNN can train CNNs faster than state-of-the-art approaches by an order of magnitude.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/samyam-asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037745" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1c9be5ee1aace2562220b1040a1133678d9a0b32", "sources": [ "DBLP" ], "title": "Optimizing CNNs on Multicores for Scalability, Performance and Goodput", "venue": "ASPLOS", "year": 2017 }, "1cbada0c285c0cd1f884afa82af6a505b81b9a1b": { "authors": [ { "ids": [ "1857191" ], "name": "Rebekah Overdorf" }, { "ids": [ "36654005" ], "name": "Mark Ju\u00e1rez" }, { "ids": [ "3307413" ], "name": "Gunes Acar" }, { "ids": [ "1754991" ], "name": "Rachel Greenstadt" }, { "ids": [ "3091712" ], "name": "Claudia D\u00edaz" } ], "doi": "10.1145/3133956.3134005", "doiUrl": "https://doi.org/10.1145/3133956.3134005", "entities": [ "Best, worst and average case", "Canvas fingerprinting", "Digital footprint", "Fingerprint", "Information", "Spatial variability", "Tor Messenger" ], "id": "1cbada0c285c0cd1f884afa82af6a505b81b9a1b", "inCitations": [ "9a8a43e774851e9f9029fa91189ec85d4db06b28", "97219fe63c4e3c95b39b0b9f2956217f3dedda14" ], "journalName": "", "journalPages": "2021-2036", "journalVolume": "", "outCitations": [ "1a987c4fe65fa347a863dece665955ee7e01791b", "0ae31e412826cf1dfb45c85b14df33f1863b5011", "42fc84bb1b9e1445ea393ff2d1dda78d748276ea", "18b1c62d6c7fa0e619f0c13172d8852b3d5a71fe", "2452c0b6563b95c743e70b91782af73f4aba6826", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "39a651ace163e7741bc98e266201afe83ad63219", "067c6f27a67976a7e67f567e5568996d6ca14962", "377240154366ee44e56e172c2279b3a81cf2c50b", "3b00ce776aa36ae4cdcb9d96d219bad61d5d510e", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "096a2026fa46abd43143e8ae3dc0ce6414310cf9", "133e0e83dc6877c6d417431e875cd57876153893", "0662c6e20750068e03711b950ee4b730f61ef342", "d9ef20252f9d90295460953e8ab78667b66919ad", "2e4b61ca5ff7af8743e4365edeb40cd87df15c5a", "69a3527f4e2d301536cbe28e02d3789bcdc66c11", "2fc986fd942797c0bcbebf01f464b375f1dd464d", "af5582fb02dceaaf4b0fef84ea6bd8fb7ca14acc", "2b5e90bc36ded540baf24cdc8a5bb8e66032bc08", "065bbbf17109015ebdb6921f58af8f33dce90a3c", "ac8bf8fb2489950d0cd616def80e5c78afc3e9fa", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "2d2ff1db0079fc0a47a37d41be43c0c9a435e4bb", "3212929ad5121464ac49741dd3462a5d469e668d" ], "paperAbstract": "Recent studies have shown that Tor onion (hidden) service websites are particularly vulnerable to website fingerprinting attacks due to their limited number and sensitive nature. In this work we present a multi-level feature analysis of onion site fingerprintability, considering three state-of-the-art website fingerprinting methods and 482 Tor onion services, making this the largest analysis of this kind completed on onion services to date.\n Prior studies typically report average performance results for a given website fingerprinting method or countermeasure. We investigate which sites are more or less vulnerable to fingerprinting and which features make them so. We find that there is a high variability in the rate at which sites are classified (and misclassified) by these attacks, implying that average performance figures may not be informative of the risks that website fingerprinting attacks pose to particular sites.\n We analyze the features exploited by the different website fingerprinting methods and discuss what makes onion service sites more or less easily identifiable, both in terms of their traffic traces as well as their webpage design. We study misclassifications to understand how onion services sites can be redesigned to be less vulnerable to website fingerprinting attacks. Our results also inform the design of website fingerprinting countermeasures and their evaluation considering disparate impact across sites.", "pdfUrls": [ "https://www.freehaven.net/anonbib/cache/unique-ccs2017.pdf", "http://doi.acm.org/10.1145/3133956.3134005", "https://homes.esat.kuleuven.be/~mjuarezm/index_files/pdf/vulnerability-onion-services.pdf", "https://users.cs.fiu.edu/~carbunar/teaching/cis5374/cis5374.2017/slides/onion.pdf", "https://arxiv.org/pdf/1708.08475v2.pdf", "http://arxiv.org/abs/1708.08475", "https://arxiv.org/pdf/1708.08475v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1cbada0c285c0cd1f884afa82af6a505b81b9a1b", "sources": [ "DBLP" ], "title": "How Unique is Your .onion?: An Analysis of the Fingerprintability of Tor Onion Services", "venue": "CCS", "year": 2017 }, "1ccd9611c8069cac328d62e73cc4a6200e26b2c6": { "authors": [ { "ids": [ "3014661" ], "name": "Morgan Harvey" }, { "ids": [ "9570205" ], "name": "Matthew Pointon" } ], "doi": "10.1145/3077136.3080770", "doiUrl": "https://doi.org/10.1145/3077136.3080770", "entities": [ "Mobile device", "Simulation", "USB On-The-Go", "User experience", "User interface" ], "id": "1ccd9611c8069cac328d62e73cc4a6200e26b2c6", "inCitations": [], "journalName": "", "journalPages": "155-164", "journalVolume": "", "outCitations": [ "31821f81c091d2deceed17206528223a8a5b8822", "4a516095f91364db4d0dea01e9c2576000dd7a02", "06b01345affa05eb5cdc77369fdbdf7f5da4a8fa", "162856303685ac7de423775bb4db4327d5fb689f", "cc0c631b5cd4f156992dd2500296ce85fd1dbadb", "b4f907cd4a4ace8ab17bc06472b003de9b2515fe", "449bbbf1bb3e39b119b264cb918dd4d82ccba013", "1784faff5fb5e821ddfc38138eb93691e2e20693", "8118cecf213ca462480d5d350eb1cbdd3dc13fdb", "465e3ec5d536ed718e513b5fd824ba154731a82b", "0353d56503f0418629f8306987d7a99dead41864", "3b87d215d12d68832b8c70dbc03122dff975f6ce", "95a2cf5575f56e49573bb138b2d26b1742dfe561", "92cb2ef1a0724bf8fa1a5d1ad890d5240fc17328", "da85b7f017a92a8faad06ac06874e38f450263f6", "7fe7ce5d78555825d084ab0f8cf6d8ae8d91f63a", "09c0211bf1458bd383cb984b577a9dce819d9bd5", "d88d23fd7dc6e07dc18e1fd7726fd3e3681ab675", "58a657e2b56d203185fa759bf7829cd7ba92764d", "0f49dae309c43413912eda0d7746d95846429cc6", "48d2b4ccbf7d64ad568a5acdfffcec3e41797021", "053601b6551263d43615760fb77e01ed7882c312", "4b81b7282f9cc928486ee3ad008f23fe76ed6353", "96bc08b0bd516c41a7872f83d31b6fb72bb0f4fa", "a67a2bc5e9f85fefd515c948691f5ecc03e560c1" ], "paperAbstract": "Smart phones and tablets are rapidly becoming our main method of accessing information and are frequently used to perform on-the-go search tasks. Mobile devices are commonly used in situations where attention must be divided, such as when walking down a street. Research suggests that this increases cognitive load and, therefore, may have an impact on performance. In this work we conducted a laboratory experiment with both device types in which we simulated everyday, common mobile situations that may cause fragmented attention, impact search performance and affect user perception.\n Our results showed that the fragmented attention induced by the simulated conditions significantly affected both participants' objective and perceived search performance, as well as how hurried they felt and how engaged they were in the tasks. Furthermore, the type of device used also impacted how users felt about the search tasks, how well they performed and the amount of time they spent engaged in the tasks. These novel insights provide useful information to inform the design of future interfaces for mobile search and give us a greater understanding of how context and device size affect search behaviour and user experience.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080770" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ccd9611c8069cac328d62e73cc4a6200e26b2c6", "sources": [ "DBLP" ], "title": "Searching on the Go: The Effects of Fragmented Attention on Mobile Web Search Tasks", "venue": "SIGIR", "year": 2017 }, "1cd788be91ebaf4b6b176cc5259042cc5edb4de0": { "authors": [ { "ids": [ "19303672" ], "name": "Mathias V. Pedersen" }, { "ids": [ "1795666" ], "name": "Aslan Askarov" } ], "doi": "10.1109/SP.2017.64", "doiUrl": "https://doi.org/10.1109/SP.2017.64", "entities": [ "Byte", "Data center", "Garbage collection (computer science)", "Information sensitivity", "Memory management", "Observable", "Stock and flow", "Timing channel" ], "id": "1cd788be91ebaf4b6b176cc5259042cc5edb4de0", "inCitations": [ "f3b12dccf59fac9eec0eada15cabc3d71f18cdf1", "84720803c56b7f4800154fd005c59cd79581d1a8" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "693-709", "journalVolume": "", "outCitations": [ "600ce036648f6d92b2a4aa7ac5e20407afba7e9b", "451ce08a5335b00cda49877ba1335e95a91c5af7", "77330a6d1345f62b99ae720cf128b9c60e2674f1", "a931c0ea2348f3a8105c1257748da77bad0f4b57", "4e7334db18da606f0ddb85caab476a026337aa1e", "14f4f56ce50c5fbfe5f9908097ba28dbf4a5f561", "bca9de6bffae8bfa2f04775087b85358a2a97dcb", "11e3be3860525e61ada7830e770d73af5fb8fd56", "03f02ace66cd558fa4dd465bc988edbe6cee8d5e", "0904bcfc6bac02fccaaae307cfec2c7c30d5b620", "72951c3fece50517787674b6402f67f732a50adf", "8969dd230fd98ac8fc50c6deb0c14a7cb9917c73", "4d624b942a58818f8d425460638cb4b65ed84e1c", "2baf1c8a26ad2fe930ae79712eb08913107d77fe", "2c6533d714d8dc4d3f7faf418db93c38df642fea", "545476e378e77049052d0b2a7b43f35b9bfb93be", "2200640161a8fe6ce3a03c7bad586e890f10679f", "09f65a44f846a0f312c1cfafd70d1475e38b0407", "0c515587e546ea2bdf9ac77eaf0d8bc578954443", "43393a561914f05be312a1dff5a757cbc384d1a1", "2a377a64762c5097e7e9c1e6d52c057bf11c4b93", "7fac24102875c92450b676577961e375ce517487", "0025870ef15a8f2858ff4186329d4bde316e9e01", "3671af9d7655977e573bd123f93470f978ea7a62", "dcd854ab0cd80cb33f62215be0d25f22077716b0", "0a0af7a99fa3772a7ed654f74758276406eaf9ef", "03c506f2952aae528039e9108a35a13243dfcc8b", "527d6b304013472b61091b8460c5d883cd2f65f2", "0bfc1f3d7613bec3ce0daec72a7ccf44e9f13bb6", "182a81eaf31b1a76be592c0890182cacd4199be0", "0aa7acd8b1158b0f868ffec9face871424f39946", "2606815d751808aa25b47615e13d48e608fa8158", "3cc88f6e02b919b48ffb7395489379f55edcb74c", "737d041822cca60a341e4058ba2bac803fe0eed0", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "4ea1a23b31a0c3c6c63edb6d5e22943f3a214739", "519810f2bc7760e7873675d2b4ddadc51cf64d6e", "2b6df21137f30d25494bb58521a6062f93e915f8", "03ad81f6276792a78312471429fc9495b89a1ffc", "5f4268edf3d28aacfe928af6719cdd3082207a5e", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "972f64e13e596f94f33c95e76757c1bbd7f23d53" ], "paperAbstract": "This paper studies information flows via timing channels in the presence ofautomatic memory management. We construct a series of example attacks thatillustrate that garbage collectors form a shared resource that can be used toreliably leak sensitive information at a rate of up to 1 byte/sec on a contemporarygeneral-purpose computer. The createdchannel is also observable across a network connection in a datacenter-likesetting. We subsequently present a design of automatic memory management that isprovably resilient against such attacks.", "pdfUrls": [ "https://cseweb.ucsd.edu/~dstefan/cse291-winter18/papers/pedersen:trash-to-treasure.pdf", "https://doi.org/10.1109/SP.2017.64" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1cd788be91ebaf4b6b176cc5259042cc5edb4de0", "sources": [ "DBLP" ], "title": "From Trash to Treasure: Timing-Sensitive Garbage Collection", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "1cfb1c37e4923003a5a424b008cc326d620f4464": { "authors": [ { "ids": [ "2363659" ], "name": "Yifeng Gao" }, { "ids": [ "2301135" ], "name": "Jessica Lin" }, { "ids": [ "1737800" ], "name": "Huzefa Rangwala" } ], "doi": "10.1109/ICMLA.2016.0011", "doiUrl": "https://doi.org/10.1109/ICMLA.2016.0011", "entities": [ "Algorithm", "Approximation algorithm", "Belief revision", "Data compression", "Experiment", "Grammar induction", "Iteration", "Motif", "Noise reduction", "Time complexity", "Time series" ], "id": "1cfb1c37e4923003a5a424b008cc326d620f4464", "inCitations": [ "7e72d44157966054debb8057ebf5bc367afdd21e", "db85a72d78ed70b126eb0e928c9c0e4fbe143a83", "08dd7e93a335e9bc5b070e9ab1d5ed217dd279ea" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "111-116", "journalVolume": "", "outCitations": [ "0bfdd7fb60bb4959ac38fadca7dcfbf549dd5456", "6bd284db683537ed8b1550999767c4bd31a2037c", "9b60975837beb2800ec510c62035700e2aa754d9", "3e9d1af4391f977f891292ced772c9ed54bd34b2", "051a1daa2678d61eb35f680aee2fc1cf57daa2b7", "c2f62efc8216bbbef4b82d8b65d17670c20c4ead", "7f4c86f6eb1e2538156e4ddebff3ac018f835404", "6423f92c8a5ba292d72031fdad999cfb976e0477", "abf98027e59d9278ea81d4ca12df3eb1196be070", "244e27bf8c5a5277a733c5271fdeb306f98115d5", "38eef1a61eb07f1e13da453ddd0c5d4acc3424bb", "2409557812a3d26258949ba73a05031591f42bdc", "0204b90797eeb00e63561e7decd4b60606b5be1d", "41ed0d295d76b387bf1e4d13ee99a1511c338756", "02aca8223525caa99efc4b0e2810e450ee6776ba", "1be90a2f40d10acd17d5910eb21fb3b4a117d08b", "128677d6312e8cc249e6039db83f40e3fc71b5b2", "1a8692bf51b0d35314f097320c5f376b4b5762d9" ], "paperAbstract": "In recent years, finding repetitive similar patterns in time series has become a popular problem. These patterns are called time series motifs. Recent studies show that using grammar compression algorithms to find repeating patterns from the symbolized time series holds promise in discovering approximate motifs with variable length. However, grammar compression algorithms are traditionally designed for string compression. Therefore, existing work on grammar induction has not fully utilized much available information that can be used to enhance the performance of the algorithms. In this work, an iterative framework based on grammar induction is proposed. In each iteration, a revision operator called Noise Reduction Operator is applied to revise the symbolized time series string based on the rules returned from a base grammar induction algorithm. In our experiments, we show that the proposed work can find motifs of the same quality, with much faster running time compared to the state-of-the-art variable-length exact motif discovery algorithm in real world time series data.", "pdfUrls": [ "http://cs.gmu.edu/~jessica/publications/ItrSequitur_ICMLA16.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.20", "https://doi.org/10.1109/ICMLA.2016.0011" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1cfb1c37e4923003a5a424b008cc326d620f4464", "sources": [ "DBLP" ], "title": "IterativE Grammar-Based Framework for Discovering Variable-Length Time Series Motifs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2016 }, "1d00ab27d0a359852660a1f6b61a93dac18d60a7": { "authors": [ { "ids": [ "3089958" ], "name": "Pavel Valov" }, { "ids": [ "2113555" ], "name": "Jean-Christophe Petkovich" }, { "ids": [ "3123889" ], "name": "Jianmei Guo" }, { "ids": [ "1733430" ], "name": "Sebastian Fischmeister" }, { "ids": [ "1726889" ], "name": "Krzysztof Czarnecki" } ], "doi": "10.1145/3030207.3030216", "doiUrl": "https://doi.org/10.1145/3030207.3030216", "entities": [ "Approximation error", "Computational complexity theory", "Feature selection", "Performance prediction", "Software system" ], "id": "1d00ab27d0a359852660a1f6b61a93dac18d60a7", "inCitations": [ "a060a4be76d6789113df800da0f20aa62ac99990", "24b5a87935a435182f902702676dbe8411a2ee82", "6851a96342efe8bebe0815d5391c1ce755fc5832", "26e3caeb87260e9e65924fc48645f51c90adaec4" ], "journalName": "", "journalPages": "39-50", "journalVolume": "", "outCitations": [ "79787cf7dbac7bfbefdad313d2671a4b02dc63fb", "187abd688e8ebd32b4d911f8c16655558d5ef229", "8b7247f0c2926651a74abaff8c819b5cfa576bff", "c7c090568375cfddcfee0701d9478a6285e1058d", "0c590a850fc9d87ca46d82b836b2cc1093876d23", "ee7583b318e6813ea1133cd0c574eae5112e1dec", "022c6f4d3048637ed87ab469613ccab01c6d56c2", "2551e3dab3164936bb43256e7d1f7ca0d07714ab", "66b3b797c60eec2cc2376356eff6ade2cf52ddbd", "8681e808a9ebd7f7f155590e75fb63563a8aae6e", "e831c1b8e16becad7c597db41c4663203c6a4b4b", "6b180123ab7ced1a81f4b65b5474fea0020dab5a", "81b0a75a0d29b2fd2c035ff6eae70c90b338bcd1", "7c5342920487b3652c5c17e6bcb88d31f6f95dbe", "68133a8fef39b4d7ceb60f9e6284255ac5b2ef1d" ], "paperAbstract": "Many software systems provide configuration options relevant to users, which are often called features. Features influence functional properties of software systems as well as non-functional ones, such as performance and memory consumption. Researchers have successfully demonstrated the correlation between feature selection and performance. However, the generality of these performance models across different hardware platforms has not yet been evaluated.\n We propose a technique for enhancing generality of performance models across different hardware environments using linear transformation. Empirical studies on three real-world software systems show that our approach is computationally efficient and can achieve high accuracy (less than 10% mean relative error) when predicting system performance across 23 different hardware platforms. Moreover, we investigate why the approach works by comparing performance distributions of systems and structure of performance models across different platforms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030216" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d00ab27d0a359852660a1f6b61a93dac18d60a7", "sources": [ "DBLP" ], "title": "Transferring Performance Prediction Models Across Different Hardware Platforms", "venue": "ICPE", "year": 2017 }, "1d1a33579f6baa1c96eb2ff129bc088b8161d0cf": { "authors": [ { "ids": [ "1726241" ], "name": "Chen Sun" }, { "ids": [ "1683085" ], "name": "Jun Bi" }, { "ids": [ "3248218" ], "name": "Zhilong Zheng" }, { "ids": [ "28825117" ], "name": "Heng Yu" }, { "ids": [ "7150554" ], "name": "Hongxin Hu" } ], "doi": "10.1145/3098822.3098826", "doiUrl": "https://doi.org/10.1145/3098822.3098826", "entities": [ "DPDK / dpdk.org", "LXC", "Linux", "Load balancing (computing)", "Network function virtualization", "Parallel algorithm", "Parallel computing", "Task parallelism", "Transfer function" ], "id": "1d1a33579f6baa1c96eb2ff129bc088b8161d0cf", "inCitations": [ "c770031f3067aca38a19af1428e68eb907120c72", "27e99bcb5dc44fd6aa55b3facbafc07777bc6899", "83a31c52bed8d3845201acb7a5b4603212b9e8b6", "5a8cd841f59a68c948c7aa05359c7df32dbc8d5c" ], "journalName": "", "journalPages": "43-56", "journalVolume": "", "outCitations": [ "42d1b52254873ecd0f36eb7342f95dbad9c50187", "7ed8dd92f4a174b630836700cf12d0adebd5c708", "bc3a52c29cb8755d0abab5b7d1f9c3e2dad2b38c", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "035e903cc8946617cf1b3b69d9093f7052caa71c", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "22eaa2f2b7abc3604717ffff4461b5cfbbac285e", "60212872aa40b660de117ba751542988bcfc406d", "d8a67b13e2d4051dd7a451232314a5d778a1b047", "1e6b539b598042a530629fa77f0256fb535d945b", "08ddde0eaf4925704222135788f79fe293c5894d", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "dcfa676e5170048dc0f3481f765b7f8d3079edf5", "ee7abb1d62a0ba2d211086a69b36be8b4930b9c5", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "163247e7ed8db43c9529d85c384d8843e22a136b", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "515d6eb9ed58632eeae14e440a47e569c6db4b9f", "156fa936f4c46972245c0720e30b11593e934574", "0541d5338adc48276b3b8cd3a141d799e2d40150", "8e226c40a8c056dc4c348eef256b711902e1d943", "8e4b7303ae1062b00e930403e70bb700f3ec990a", "cd642576ce8502b533e229b537f9ffbe9254aef6", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "336b4f3099b8f629adc20a69aba15257e53539f9", "7f45e511f53c80c31346a1ef01f1d14293044b0f", "5cb88831f543d30cc688fedc445d4e358ef73626", "94546cd187a5818811f6efec14c1360ad41cdd9d", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "044ce2a427c65d53f3d8279339b8eb6f020121c7", "56a42c71388e80f68efd350afe38175510aaa915", "f240748a498b38c225ab78e54441d90f6e6a0d3e", "2f4f2b503aec309b4c28b60c9d1713a9ceb0a3b2", "1901da4a02c0c4bf7f62424c3eecd1f977c3f35a", "36bb67d8fba0c85f2495449a9926018827368df5", "3f40cb40cdd3a6593986cac1ec9c2fae28bec732", "cca9f7e4d9dcc0368934026f1fe2d6590870fa68", "4be228917846a218ba00d30b42d709a11b7a5311", "47d5357957cabb610131db1b228e58b70860ee8d", "089b10645ee63cd9c5bb4ab661141dd813408e15", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751" ], "paperAbstract": "Software-based sequential service chains in Network Function Virtualization (NFV) could introduce significant performance overhead. Current acceleration efforts for NFV mainly target on optimizing each component of the sequential service chain. However, based on the statistics from real world enterprise networks, we observe that 53.8% network function (NF) pairs can work in parallel. In particular, 41.5% NF pairs can be parallelized without causing extra resource overhead. In this paper, we present NFP, a high performance framework, that innovatively enables network function parallelism to improve NFV performance. NFP consists of three logical components. First, NFP provides a policy specification scheme for operators to intuitively describe sequential or parallel NF chaining intents. Second, NFP orchestrator intelligently identifies NF dependency and automatically compiles the policies into high performance service graphs. Third, NFP infrastructure performs light-weight packet copying, distributed parallel packet delivery, and load-balanced merging of packet copies to support NF parallelism. We implement an NFP prototype based on DPDK in Linux containers. Our evaluation results show that NFP achieves significant latency reduction for real world service chains.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098826", "https://people.cs.clemson.edu/~hongxih/papers/SIGCOMM2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d1a33579f6baa1c96eb2ff129bc088b8161d0cf", "sources": [ "DBLP" ], "title": "NFP: Enabling Network Function Parallelism in NFV", "venue": "SIGCOMM", "year": 2017 }, "1d1fc81989ed1d26ba4cf6c3025ffaa740023a5d": { "authors": [ { "ids": [ "3031766" ], "name": "Riad S. Wahby" }, { "ids": [ "39733059" ], "name": "Ye Ji" }, { "ids": [ "34989246" ], "name": "Andrew J. Blumberg" }, { "ids": [ "2208880" ], "name": "Abhi Shelat" }, { "ids": [ "32523323" ], "name": "Justin Thaler" }, { "ids": [ "1756078" ], "name": "Michael Walfish" }, { "ids": [ "2237524" ], "name": "Thomas Wies" } ], "doi": "10.1145/3133956.3133984", "doiUrl": "https://doi.org/10.1145/3133956.3133984", "entities": [ "Application-specific integrated circuit", "Computation", "Formal verification", "Interactive proof system", "Outsourcing", "Parallel computing", "Precomputation", "Stock and flow" ], "id": "1d1fc81989ed1d26ba4cf6c3025ffaa740023a5d", "inCitations": [ "0a85b3afc89958583642b7fd39b37e745a053190", "1f1e5d327e8f5afa8c052e0090c3e2fb3acf301c", "2f7b4ee46d284664fd1a4a679d1e610e2954ca8b", "17f02267f4deb9213e000fc2aeb09cd17db8993e" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "242", "journalVolume": "2017", "outCitations": [ "369e22379d90f85d775ba999bb3da1e0324eabee", "0b5f3e446fff1c1674c6ca568f6a933631cd36fd", "5644dd91f6e5ce01229dd94e9db9e2fba321da6e", "01e83a7ff59354527a01f440129e71aadbe2eade", "7ee3e64ef377c70d496fa870c35db737d8c0096a", "1a33c542b064f95eb6fc9b0003e80ff4f1b9289a", "977d014244451a0182d5ca915dd0748f269321de", "442ead150438b912f0bcb83bb9e6c63d35dc0c96", "2118643f485da9554360f42c16d0778c404b7398", "5d1a57dc5b7536011ed6859cf9d811b9209680e2", "97d4f14fe33a9d5be1c7745eebc2ce7517033d14", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "1bab56c8ad874a900d385e9a94e31ca95b7191bf", "3ff0fbebd3e0a2aa43e43963231131b9bd55336f", "47ecf950ab4362cc704e9ad39e7ac52f24cc97e4", "9b7aa07f0ff31a601bb773897093c4c1ea98dc34", "35f2ee0596493100b94325632265116faaeea741", "242654ca9c67c28dd4dcc5ff4ec263cf9cb39469", "17d6efe2c48ba8b7e096ad0b34d88dd0946b6379", "32bd62191f501753b8307bef23758adb50d95627", "16666593472d5924b2bac14fdbd180773741f5bf", "1a59302e8c3e39c6fb7249af7346c2d5158e03e1", "53b67d4f8584decf5f86fc4ec9f0eee893cf6cbc", "42389207a73223b03f533e02e2524581cf3c2f00", "07c746c119b1d18e6580840b2166721e07b4433d", "9e38f65689de68019fb8a1fc4ffb00f7caac0dd4", "22476e875140a191e823aa3fa4709dff93a161ba", "1d9cf87fa6d6175a2c1543afff263113657765f6", "3a5bc6c6c312fdb8c65cb797535f6462947e7181", "5dd271ed2de406288e27cb57b1a9028b458d0578", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "364f621d4c9e97870cab0ad71de0b81040bc5495", "732ae647aa75acd7b7349679a4746c0539370122", "70870a970fc08b8a1870066117ce9d389b0ede7d", "4e5de3bee5ba3acb359a15091e4647ca28a89b4c", "011d714a361b8ceb925c18e4a214e22aa5f899d8", "2e8ccf0dc75d889dead5ea67e6752fe3f59fd7bf", "d38ce4112e450c54a5a48285c0707aa20579d487", "02a356366cc651e69df7c442deff9dcaa3cc8a5f", "48a6ed40923089d57af74f01fd35f92754d7537e", "9f50019aa8161577e4fc62f79da41083ba03f70b", "0ab575eb093cb7caef99fb8a5aede9e91a6808ad", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "29543bb7c680dde79f374e73930ca68833e2fb37", "bb2ed760b7d923dd145508993b4eea76a038ea0e", "101bb77cb2c8b2bfcde41973ac3473db325d7e6d", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "1bc3d4083b88ead61cec36734453460fa1253dd1", "d3a95ea430c0e33b044e9212a4857942e49621d5", "a40aedd4c9649e89f3807ee12730d7d0fefb9523", "4c3a78661fd920b4116afd0ad88247bbd00160ce", "15f841e403dc1706df05bd24447d1be51c9f8785", "8d31b4c231eec985473f681238c530bb410d9c78", "990bcf90f7ea605551b1b0c70c6d1ac6b88b6b4d", "201f43d5b419070dc25cb173793a8227a075d32a", "084c67fab5b80114dd8fd223149b2c64fe473cc0", "66f2bc0a618d85f0af0a32699c99f0a02b0053a1", "6a0529cc2ac81f8d2f0d4efa6f10345991158788", "ae2eecbe2d5a4365107dc2e4b8a2dcbd0b3938b7", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "1bfd50ede3bbb39337302848959fa5dc616db092", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "b0f2557d50632081a7942812a05b130747daaff6", "fa017f045faa7d445a492dafda01fed8e3eded8b", "2ac9ff8d0e0ef59e4333b4dcf884902fec3f018f", "1b5e73314491f5ecbd4db9d211ca81be28e2eeb4", "46cd7e1d4231e47873f3eb4e26ab73187deb5437", "57e22fa53cb6cc3c535e4995f1bbddef40731b28", "212700938f4489daecc5ca5fc984dcbd236d5231", "b6c15c2eff5cad736c900ebf824be76460041c43", "91451ae2ca6879cfdd25ac266dccdc57e011d930", "4afe2dabcc4b26cd9b0f81377b30c3dec3718eb6", "ca5a6857cf81445e11a2ac7b642bf8d807571ed5", "02557d37a9f129fbe23a46b7a00a90baf7909234", "2a266546c2609f079529688de7acbe0213f47373", "5f1622916e69b5bf985b339191ab1d3dd3517038" ], "paperAbstract": "Systems for verifiable outsourcing incur costs for a prover, a verifier, and precomputation; outsourcing makes sense when the combination of these costs is cheaper than not outsourcing. Yet, when prior works impose quantitative thresholds to analyze whether outsourcing is justified, they generally ignore prover costs. Verifiable ASICs (VA)---in which the prover is a custom chip---is the other way around: its cost calculations ignore precomputation.\n This paper describes a new VA system, called Giraffe; charges Giraffe for all three costs; and identifies regimes where outsourcing is worthwhile. Giraffe's base is an interactive proof geared to data-parallel computation. Giraffe makes this protocol asymptotically optimal for the prover and improves the verifier's main bottleneck by almost 3x, both of which are of independent interest. Giraffe also develops a design template that produces hardware designs automatically for a wide range of parameters, introduces hardware primitives molded to the protocol's data flows, and incorporates program analyses that expand applicability. Giraffe wins even when outsourcing several tens of sub-computations, scales to 500x larger computations than prior work, and can profitably outsource parts of programs that are not worthwhile to outsource in full.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133984", "https://www.pepper-project.org/giraffe-ccs17.pdf", "http://eprint.iacr.org/2017/242", "https://www.pepper-project.org/giraffe-ccs17-talk.pdf", "https://eprint.iacr.org/2017/242.pdf", "http://www.cs.nyu.edu/~mwalfish/papers/giraffe-ccs17.pdf", "https://www.pepper-project.org/giraffe-dimacs_outsourcing17-talk.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d1fc81989ed1d26ba4cf6c3025ffaa740023a5d", "sources": [ "DBLP" ], "title": "Full Accounting for Verifiable Outsourcing", "venue": "CCS", "year": 2017 }, "1d2f83d87984d31682d3e098f38570b1520977e2": { "authors": [ { "ids": [ "1974088" ], "name": "Yaniv David" }, { "ids": [ "2578198" ], "name": "Nimrod Partush" }, { "ids": [ "1743232" ], "name": "Eran Yahav" } ], "doi": "10.1145/3062341.3062387", "doiUrl": "https://doi.org/10.1145/3062341.3062387", "entities": [ "Binary file", "Compiler", "Program optimization", "Reverse engineering", "Scalability" ], "id": "1d2f83d87984d31682d3e098f38570b1520977e2", "inCitations": [ "963dd4f5e91ab50c5db2ec40472098eafd2f5e39" ], "journalName": "", "journalPages": "79-94", "journalVolume": "", "outCitations": [ "5fd9c8c91a67117e72731c1f33ca1527f2c59156", "7fa71e17142563013365daa8526a1323f123961a", "1618b1217fd43580390172f1c372a5aba91f5543", "10e6739668f5c81d0607d2068eaab77ef93991ed", "7e1569eebf13a4e906ce909a669e2a9ab1046124", "08e9332daaea5746c78433b9ce91a59ab61b7118", "09e5a2e7e048226a6d141c09ec61181507eca38d", "1bf9569aa108b6c19c8cc4fc15470cedddbd7ba9", "1e52a2e8535509ab0111c0c5d89a88d3bb10b34c", "384c49892315f9f7742b23c41777cf55bab73d74", "2dd6381baae25bab28454bdb716313010b05d61b", "0dc043122228447b6ea7218ae80aab01e21df140", "041e4a4661a28b45c1663070d1d3934cf30eb060", "0acdd72b339882f7db483486a85b8f82a91e4510", "e0764b9142c211ec094543c447612dfe79da2662", "4f53a6729b3127ed8d0b9040591afba4dc6046f2", "252e15047f3cb2cd2c26c21cf74fe2e7038a3f40", "c3a39721e079eb4baa3d286b738bf822007c20d7", "120c819da02fcb312986ac492f723ef9ea3223b5", "5e74f5ba5c7174e3ecf6ab2581a5e745bb69dd54", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "1be37ab7b64c78351e20952d4261033328ecd69c", "82bd162b04cef498dd2f4b6103c6e13107b7b782", "46e78409aa7441262492277a0d3c63728621adf5" ], "paperAbstract": "We present a scalable approach for establishing similarity between stripped binaries (with no debug information). The main challenge in binary similarity, is to establish similarity even when the code has been compiled using different compilers, with different optimization levels, or targeting different architectures. Overcoming this challenge, while avoiding false positives, is invaluable to the process of reverse engineering and the process of locating vulnerable code. \nWe present a technique that is scalable and precise, as it alleviates the need for heavyweight semantic comparison by performing out-of-context re-optimization of procedure fragments. It works by decomposing binary procedures to comparable fragments and transforming them to a canonical, normalized form using the compiler optimizer, which enables finding equivalent fragments through simple syntactic comparison. We use a statistical framework built by analyzing samples collected â\u0080\u009cin the wildâ\u0080\u009d to generate a global context that quantifies the significance of each pair of fragments, and uses it to lift pairwise fragment equivalence to whole procedure similarity. \nWe have implemented our technique in a tool called <pre>GitZ</pre> and performed an extensive evaluation. We show that <pre>GitZ</pre> is able to perform millions of comparisons efficiently, and find similarity with high accuracy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062387", "https://nimrodpar.github.io/assets/presentations/gitz-pldi17.pdf", "http://www.cs.technion.ac.il/~yanivd/pldi17/pldi17_GitZ.pdf", "https://nimrodpar.github.io/assets/publications/gitz.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d2f83d87984d31682d3e098f38570b1520977e2", "sources": [ "DBLP" ], "title": "Similarity of binaries through re-optimization", "venue": "PLDI", "year": 2017 }, "1d35abb7acc67187de8f9cd4c3828a8236539a51": { "authors": [ { "ids": [ "1757003" ], "name": "Jiaqi Liu" }, { "ids": [ "1742495" ], "name": "Gagan Agrawal" } ], "doi": "", "doiUrl": "", "entities": [ "Fault tolerance" ], "id": "1d35abb7acc67187de8f9cd4c3828a8236539a51", "inCitations": [], "journalName": "", "journalPages": "304-313", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101155" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d35abb7acc67187de8f9cd4c3828a8236539a51", "sources": [ "DBLP" ], "title": "Supporting Fault-Tolerance in Presence of In-Situ Analytics", "venue": "CCGrid", "year": 2017 }, "1d39c58f16e7b9b7eb382fdb342db85a8b957b4d": { "authors": [ { "ids": [ "1777848" ], "name": "Mingyu Gao" }, { "ids": [ "39792437" ], "name": "Jing Pu" }, { "ids": [ "3897285" ], "name": "Xuan Yang" }, { "ids": [ "1764167" ], "name": "Mark Horowitz" }, { "ids": [ "1700331" ], "name": "Christoforos E. Kozyrakis" } ], "doi": "10.1145/3037697.3037702", "doiUrl": "https://doi.org/10.1145/3037697.3037702", "entities": [ "Artificial neural network", "B-tree", "Brute-force search", "Computation", "Dataflow", "Deep learning", "Dynamic random-access memory", "Low-power broadcasting", "Parallel computing", "Scalability", "Scheduling (computing)", "Static random-access memory", "Tetris", "Tetris", "Throughput" ], "id": "1d39c58f16e7b9b7eb382fdb342db85a8b957b4d", "inCitations": [ "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "aea3f0c82f4d9926ff9a71b279ee9152a463a221", "bf5a2bed3a98f6dc093460a6592a5d1b99a60ae5", "540746504cfe51a146762cbbca06cbc03229c778", "5e8e46557e42940274e548246680c785eb729db2", "bdbb9c01016ce513f6ac5c432d61f66da8708bce", "6a8fb5989b3fb290ac0a654895aad6ff8601c7ab", "285f2e210f2b1893487de7aacb39873d225d977e", "3efa068494a91a825b9744c1ee4b83663f363533", "5bcf27ab86be9fa376237d2d2bd8ebbf52982088", "305806d53240aa523168d5aa59d902fb0c9a1581", "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "04984fc1683186a526917575c435733e9311ff6e", "381e7525bc8b9d47ae0343e471f5f1d5e6963bbe", "651ae380b5d500c613770dbf55c175c52576d7da", "2e5e824a5e98d505c49a62752566e871cd4de06d", "8033f293c894eae64c9f379dee2192bfe4f7883a" ], "journalName": "", "journalPages": "751-764", "journalVolume": "", "outCitations": [ "437b11128948f92e1139c555cf1326922ee36b39", "2ffc74bec88d8762a613256589891ff323123e99", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "21ec9704c69fd41ebee16b53e4d59e2424e70ea9", "49b4094f2c313a92da4461572c0bef80b0d7d649", "886f29f247fd49084fbf25fdd60049b47db4f4ea", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "14b5e8ba23860f440ea83ed4770e662b2a111119", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "02c78232075ac431834e3442dcb2954d4e708def", "a52945840b980adfef34466cb4186c7cda3b61e6", "1e40d8b7ccac6afbfdf5c89f203f368735e051f9", "7c6c7a97488fdbb7c06f85c345b348183bf0a704", "dbaebcde717a6669b40ebbc598ddb434d8ca93e4", "3bf23f74bf33ed52f7c28587fab315610b27221a", "508afc857f74370ef99558d98ab45e347ae98c33", "0122e063ca5f0f9fb9d144d44d41421503252010", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "93ec8d541655aa78e9ea982156ec7b468eff2816", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "8f89357b1768da284f1c746679f7acd44bd45a33", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "8b04ea524cb6ced72868c120a00c4679d84be006", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "3e4f4582511f60c22f4762c64b22328a1ef42764", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "061356704ec86334dbbc073985375fe13cd39088", "e2e017f56ed809d8b31c0b891d735b3b864f4cc4", "5646a51461b64be3e9511dff06ea5abef0a399d4", "01fcae344d2edb715bcc63a40b6052c0331741bd", "37e49c57dd4d0849380d177222db53e52ff21347", "2394c6644efa856f0da160a0f0031d74cd3b5000", "4954fa180728932959997a4768411ff9136aac81" ], "paperAbstract": "The high accuracy of deep neural networks (NNs) has led to the development of NN accelerators that improve performance by two orders of magnitude. However, scaling these accelerators for higher performance with increasingly larger NNs exacerbates the cost and energy overheads of their memory systems, including the on-chip SRAM buffers and the off-chip DRAM channels.\n This paper presents the hardware architecture and software scheduling and partitioning techniques for TETRIS, a scalable NN accelerator using 3D memory. First, we show that the high throughput and low energy characteristics of 3D memory allow us to rebalance the NN accelerator design, using more area for processing elements and less area for SRAM buffers. Second, we move portions of the NN computations close to the DRAM banks to decrease bandwidth pressure and increase performance and energy efficiency. Third, we show that despite the use of small SRAM buffers, the presence of 3D memory simplifies dataflow scheduling for NN computations. We present an analytical scheduling scheme that matches the efficiency of schedules derived through exhaustive search. Finally, we develop a hybrid partitioning scheme that parallelizes the NN computations over multiple accelerators. Overall, we show that TETRIS improves mthe performance by 4.1x and reduces the energy by 1.5x over NN accelerators with conventional, low-power DRAM memory systems.", "pdfUrls": [ "http://platformlab.stanford.edu/pdf/Mingyu_Gao.pdf", "http://doi.acm.org/10.1145/3037697.3037702", "http://csl.stanford.edu/~christos/publications/2017.tetris.asplos.pdf", "http://web.stanford.edu/group/mast/cgi-bin/drupal/system/files/tetris.asplos17.slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d39c58f16e7b9b7eb382fdb342db85a8b957b4d", "sources": [ "DBLP" ], "title": "TETRIS: Scalable and Efficient Neural Network Acceleration with 3D Memory", "venue": "ASPLOS", "year": 2017 }, "1d404fa87e3b94323ebab6068f7b42d216035c55": { "authors": [ { "ids": [ "2381851" ], "name": "Wai Kay Leong" }, { "ids": [ "2416185" ], "name": "Zixiao Wang" }, { "ids": [ "2566029" ], "name": "Ben Leong" } ], "doi": "10.1145/3143361.3143378", "doiUrl": "https://doi.org/10.1145/3143361.3143378", "entities": [ "Algorithm", "Bandwidth-delay product", "Cellular organizational structure", "Dynamic dispatch", "Mobile phone", "Network congestion", "Software deployment", "TCP congestion control", "Telecommunications link", "Throughput", "Uncompressed video" ], "id": "1d404fa87e3b94323ebab6068f7b42d216035c55", "inCitations": [], "journalName": "", "journalPages": "167-179", "journalVolume": "", "outCitations": [ "20e6388b2dd76242b5482ac3d70fd386657587bc", "51564d3d6b1fef25498cbe7df23fab3b2b767f24", "0427e82f0f31fd98ee4564df85a25d5e6175fc31", "3f7421f8c5be8197555f38603ff3cefb983d7c1e", "20737e2eab205e968b8d361cbf49ae1c1c6426ca", "8123851b2cc261f59a07f09c554139435ff71646", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "0f1181e2f58395f8f6d6f14707e4e44a489aaf3f", "ab4b31afdf2fb0900ed780f850d74d65a1598e6e", "47aa3758c0ac35bfb2a3d2bbeff1e0ac28e623c2", "2d4906884bc5309f1539195ff5b181d41a15ff60", "2bce0f3e815c471702fb9db657914b6169098c2b", "3f200c41618d0c3d75c4cd287b4730aadcf596f7", "2f85f20a076cb91dcdf4b3e5b16886ee9b6b3543", "41758f25a78f4223fefb7ac00cc70a9e6ba949af", "7dbf823faded34ccef8ab626a55790b6d041b3a8", "d61b9b499c7e371edf7f8bb45fe7934e7d60ba2d", "534c3ff417810166f358b9cf1096158a69153a12", "077b23a64c80039a9d36da0cab766262edc89af2", "b3493a8ec43df1861c5d7037ce57bcff7d343dfc", "aeb49176f30b43caf4205a6a11a16a860ba96c55", "4dc0668a57a8f871c701b25c6aa39b385f834595" ], "paperAbstract": "TCP does not work well in modern cellular networks because the current congestion-window-based (cwnd-based) congestion control mechanism intimately couples congestion control and packet dispatch, which provides TCP with only indirect control of the effective data rate. The throughput degradation arising from the cwnd-based mechanism is especially serious when the uplink is congested. We describe PropRate, a new rate-based TCP algorithm that directly regulates the packets in the bottleneckbuffer to achieve a trade-off in terms of delay and throughput along a more efficient frontier than conventional cwnd-based TCP variants. To the best of our knowledge, PropRate is the first TCP algorithm that allows an application to set and achieve a target average latency, if the network conditions allow for it. Also, unlike the cwnd-based TCP mechanism, our new rate-based TCP mechanism is significantly more resilient to saturated uplinks in cellular networks. PropRate does not require modifications at the receiver and is amenable to practical deployment in the base stations and proxies in mobile cellular networks.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~bleong/slides/conext17-proprate.pdf", "https://www.comp.nus.edu.sg/~bleong/publications/conext17-proprate.pdf", "http://doi.acm.org/10.1145/3143361.3143378" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d404fa87e3b94323ebab6068f7b42d216035c55", "sources": [ "DBLP" ], "title": "TCP Congestion Control Beyond Bandwidth-Delay Product for Mobile Cellular Networks", "venue": "CoNEXT", "year": 2017 }, "1d4c0211549a8fe259a273da88c63e8f00fef463": { "authors": [ { "ids": [ "2575120" ], "name": "Thomas W\u00fcrthinger" }, { "ids": [ "1682576" ], "name": "Christian Wimmer" }, { "ids": [ "2381046" ], "name": "Christian Humer" }, { "ids": [ "2176460" ], "name": "Andreas W\u00f6\u00df" }, { "ids": [ "6503919" ], "name": "Lukas Stadler" }, { "ids": [ "40584321" ], "name": "Chris Seaton" }, { "ids": [ "2467762" ], "name": "Gilles Duboscq" }, { "ids": [ "37231580" ], "name": "Doug Simon" }, { "ids": [ "2465562" ], "name": "Matthias Grimmer" } ], "doi": "10.1145/3062341.3062381", "doiUrl": "https://doi.org/10.1145/3062341.3062381", "entities": [ "Benchmark (computing)", "Compiler", "GNU", "Interpreter (computing)", "JRuby", "JavaScript", "Partial evaluation", "Profiling (information science)", "R language", "Ruby", "Runtime system", "Speedup", "Virtual machine" ], "id": "1d4c0211549a8fe259a273da88c63e8f00fef463", "inCitations": [ "e02e952a655a041c62675d0bad155876b726ee4c", "fbbd9fec8f42fb1aa2608ecd5d757002e62d8609", "0343ae9ab99d0cbd719baf0d2cc1b82425f3664a", "4e7fd86c3e88282215ef6221f5b0cbe49c35ba4f", "bbd8f8e111a738dbb4cd9c9fecdf411e4d4a33da", "561d95be369566a0a1598fa1d7ddee9f27c088d4", "3af5f5bb670a5cf7b1124eefda1b6e3332d2b769", "8b69691eef39fcdc49b4eb6a628bd90090a13621", "fec520eb7d2383621f50b7d43374896a00fa61ad", "110f3bb10394755ce07562f815e0c63d9b407502", "c0f742c67da71d23db56ed58c99e83247075aa49" ], "journalName": "", "journalPages": "662-676", "journalVolume": "", "outCitations": [ "1529cb68031e1005eefbfb1e172bd98819476654", "190dcdb71a119ec830d6e7e6e01bb42c6c10c2f3", "74fe9eea58eda300ffcef2502a40ec460b988c71", "7f57c0dd4b230d3fb70e23f1ea25a265a65cfc79", "6ac4b7f4f78e0ae2c59f47a9573b849014ddf4ab", "7e007883306b2d0b8da57ed608f5441dcc30a3e2", "3a54e9d683c172acf9d2a503754f1c68b7daf611", "85dbdb61a133eecbde3cf635de9c84eaecf249fa", "a3e88aa2505c1f4e7f176b1afa467c60fd30bdac", "2194c3460ab71f3826db00b045b2ae590c753319", "e13f1d64f636de03f352965b21d0334acc4f132b", "84c4310019cc0d0544e979c690782b36b82cf912", "687bbce9a25c6e3617383049eb316e888af29911", "db2d0542c7791ee6f29a9f35e3181a186866f881", "23bd210a62e0eb576f73f4aa93acfc9188faa6d0", "45d7a1f16860716e93ac56192e49d41522a4facc", "16d0e0e40fedb965b9f47f66a87b6ade510faba5", "52eee82594982f3c6ba7f0385a78002868fa30cb", "160ad871b437c95e2f5d89b649a8392ad711cf8c", "6bc708a10badb1654c04e90fc607b8f9c6e26c4f", "8896767a006c945b136139c234c2e73530953cba", "76dc6c91d484043c83cf143b399dc245f0bffdba", "2f2f5492f8714f7aeab21fc43c075fd701b26320", "1d8264e0f1d1b4c4b4dc2835fea45d0773b7249d", "c77326a6823b347d25863b8b98f9aee62da91d33", "2bd21f051e9603473b3c06bb326fd3a8d6e6b4d6", "7521513abd7acae00b3fd89001da47019606cf38", "661a60d688063b07e834e750f6c1fd56efb1fc30", "1c728c90d8e5cc9c46a738efa918fea70d3352b0", "2a9cca1f1b1bf6bd9996823b3985788866f461b1", "5d5e1b35dcfbf52299c327baab696568ba0e1d15", "fc4d469148283238404946bd8badbfef3358057b", "8ad2a48cf422e19de430fb8b622539afe3cf2622", "0ff7e33a637f0a228501f8c29880e7e8d84a31e8", "1debdffc881e9bc6c6e786a439442db274077cbc", "0ecd70ee774526cb98cf05defd4cdf9bf6c4b236", "bc42584c1d74f96d2e03dfcc487af642527a62fe", "53e2b31ad6fea91655ecbe64fe66968b934d0160", "c17ac40f0fb475c810c70a52b3dd6535454eabf4", "32d9f2ba15aa08559fd2ad99b299b6d77e5f1567", "25353d86a3bd6990d4a5317d92bc0ba9be1b4ebb", "2da08b8490a903cd9825867d25888a97ce27880f", "4dba9547af0be58d1fb0a8a3d06b1ddd36084133", "69869c5d232dee9966526edbdb996603db42f87f", "5fd4d636ab37d88f5fb4c94fcbb60d440fe3f00d", "c7f53f655314d621b007a67c149251c14105e0fb", "d0620f07a89fe5a203b5fb14a9a756e74a78a873", "063b88417b20d4c1984e3994c11a2b6683afe254", "b6d6f6baa38e21d242e213ab1c235d89de3dc8fa" ], "paperAbstract": "Most high-performance dynamic language virtual machines duplicate language semantics in the interpreter, compiler, and runtime system. This violates the principle to not repeat yourself. In contrast, we define languages solely by writing an interpreter. The interpreter performs specializations, e.g., augments the interpreted program with type information and profiling information. Compiled code is derived automatically using partial evaluation while incorporating these specializations. This makes partial evaluation practical in the context of dynamic languages: It reduces the size of the compiled code while still compiling all parts of an operation that are relevant for a particular program. When a speculation fails, execution transfers back to the interpreter, the program re-specializes in the interpreter, and later partial evaluation again transforms the new state of the interpreter to compiled code. We evaluate our approach by comparing our implementations of JavaScript, Ruby, and R with best-in-class specialized production implementations. Our general-purpose compilation system is competitive with production systems even when they have been heavily optimized for the one language they support. For our set of benchmarks, our speedup relative to the V8 JavaScript VM is 0.83x, relative to JRuby is 3.8x, and relative to GNU R is 5x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062381", "http://chrisseaton.com/rubytruffle/pldi17-truffle/pldi17-truffle.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d4c0211549a8fe259a273da88c63e8f00fef463", "sources": [ "DBLP" ], "title": "Practical partial evaluation for high-performance dynamic language runtimes", "venue": "PLDI", "year": 2017 }, "1d99ce3375cc1c65c07d9fb358e1a101f1d2590a": { "authors": [ { "ids": [ "39631165" ], "name": "Cas J. F. Cremers" }, { "ids": [ "1928621" ], "name": "Marko Horvat" }, { "ids": [ "35272108" ], "name": "Jonathan Hoyland" }, { "ids": [ "31892832" ], "name": "Sam Scott" }, { "ids": [ "3447500" ], "name": "Thyla van der Merwe" } ], "doi": "10.1145/3133956.3134063", "doiUrl": "https://doi.org/10.1145/3133956.3134063", "entities": [ "Authentication", "End-to-end principle", "Internet", "Requirement", "Software release life cycle", "Strong authentication", "Tamarin", "Transport Layer Security" ], "id": "1d99ce3375cc1c65c07d9fb358e1a101f1d2590a", "inCitations": [ "393f25d2e4fd5db52a338dc6783ccc6e90f4ff46", "7f361f82ff6d3b751b4c9217dc4490abe5bed88b" ], "journalName": "", "journalPages": "1773-1788", "journalVolume": "", "outCitations": [ "4e5b6f53c080b9dddd63446cd525ae07de65720a", "02b97d9a3fda357165257aa8bd3031743a5540d2", "ac2a9d093fef9b31d50222b737cc3aa686a0888d", "0037875e7321eb65867ff47b0e22a080b84502da", "eb1f681ec7e216da3865c2d107d62196b477f792", "46b2b9f10c52e83b57e60a224696296551f317ea", "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "29d8c3f55b87a04fa732b2c0a97e3504700dcfd5", "644837b8b24a5ec51e1b761f3f17bcb8a5b922dd", "81b7f4c7c782a63f2cf6771d096ea9177f7ca4f7", "2977e30243c4a93462cdb466d97abff4bcd638d2", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "582302da008255ff515f05c3242f750878725745", "bde332de7397463a2c641c9983eead2267a2143c", "04fec8e39d83b4c5cc4fcfeeac5847ecf0134263", "59261f7ccf03b580ed39f96b8928bc965c24d520", "25d4a303577ede7bc5727892413f06a6fe25fd3e", "57774604456cffa77fcf57087bbede72a23994b6", "082d2b922818331e2994aeebaaccb776cfa09145", "338d4815de02be38990db8cff9f96ef8e6959c80", "40860f2db7516f09836ef5bbd65288a4e0957af7", "04bb092c83242cb708d2653bd537c99643e8386d", "de01032a3ef48de910f687ab96662232b9e21e2c", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "54113d65b26940b290c1fe3f6324e012b3ae77d6", "23eb53170c6de9ff5024db120eda200816fa803f", "1e7768b135545d473bf4a857f2bbb374ae960dc5", "826fc54e994ce740f69a79cbdcc17181a3429234", "4e97b0de1c273fb83e26059d841a1df47a9f5872", "057d8ed3e2d24ae73e3466fecd0d8a80f4078f06", "738a58eff2dbc9bc76742289fbf9dc9cae3a1b1f", "11b9a10f849c1c7ab598ad1180fb3ad5ba1a7b0a", "1c125018da60c0527ceaee10d40c91e70468c41c", "7189c5e1aab8239a1016f1ec14df4af30047eb59", "d030862b5ab53b3fad21e1f48733b78d4a6e35b2" ], "paperAbstract": "The TLS protocol is intended to enable secure end-to-end communication over insecure networks, including the Internet. Unfortunately, this goal has been thwarted a number of times throughout the protocol's tumultuous lifetime, resulting in the need for a new version of the protocol, namely TLS 1.3. Over the past three years, in an unprecedented joint design effort with the academic community, the TLS Working Group has been working tirelessly to enhance the security of TLS.\n We further this effort by constructing the most comprehensive, faithful, and modular symbolic model of the TLS~1.3 draft 21 release candidate, and use the TAMARIN prover to verify the claimed TLS~1.3 security requirements, as laid out in draft 21 of the specification. In particular, our model covers all handshake modes of TLS 1.3.\n Our analysis reveals an unexpected behaviour, which we expect will inhibit strong authentication guarantees in some implementations of the protocol. In contrast to previous models, we provide a novel way of making the relation between the TLS specification and our model explicit: we provide a fully annotated version of the specification that clarifies what protocol elements we modelled, and precisely how we modelled these elements. We anticipate this model artifact to be of great benefit to the academic community and the TLS Working Group alike.", "pdfUrls": [ "https://acmccs.github.io/papers/p1773-cremersA.pdf", "http://doi.acm.org/10.1145/3133956.3134063" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d99ce3375cc1c65c07d9fb358e1a101f1d2590a", "sources": [ "DBLP" ], "title": "A Comprehensive Symbolic Analysis of TLS 1.3", "venue": "CCS", "year": 2017 }, "1d9f3a389a58b3e0eab47d905639b0a7f726f8f5": { "authors": [ { "ids": [ "1787088" ], "name": "Juan Li" }, { "ids": [ "3044689" ], "name": "Zhengguo Chen" }, { "ids": [ "7711823" ], "name": "Zhiguang Chen" }, { "ids": [ "1730284" ], "name": "Nong Xiao" }, { "ids": [ "12217448" ], "name": "Fang Liu" } ], "doi": "10.1109/ASAP.2017.7995286", "doiUrl": "https://doi.org/10.1109/ASAP.2017.7995286", "entities": [ "Associative entity", "Attribute\u2013value pair", "Collision (computer science)", "Data center", "Dynamic random-access memory", "Emergence", "FTL: Faster Than Light", "Flash file system", "Flash memory", "Flash memory controller", "Gigabyte", "Hash function", "Kinetic Void", "Map", "Multi-level cell", "Program optimization", "Solid-state drive", "Terabyte" ], "id": "1d9f3a389a58b3e0eab47d905639b0a7f726f8f5", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "106-114", "journalVolume": "", "outCitations": [], "paperAbstract": "With the emergence of 3D TLC/QLC NAND flash, the capacity of flash-based SSD is growing rapidly, from hundreds of gigabytes to tens/hundreds of terabytes. Accordingly, the Flash Translation Layer (FTL) within such a large SSD is confronted with serious problems that have not ever appeared before. Traditional FTLs either adopt a coarse-grained mapping mechanism, thus facilitating the mapping table being kept in DRAM completely, or adopt a fine-grained mapping mechanism but only keep frequently-accessed mapping information in DRAM depending on the localities of workloads. We argue that both of the above policies are unsuitable for SSDs supplying ultra-large capacity. Firstly, large SSDs introduce so many more mapping entries than ever before that even the coarse-grained mapping mechanism cannot produce a compact enough mapping table to be kept in DRAM completely. Secondly, large SSDs tend to be deployed in data centers to serve IO requests from massive numbers of users under various application backgrounds, where these IO requests exhibit weaker spatial and temporal localities. As a result, the method that keeps frequently-accessed mapping information in DRAM is also impractical for large scale SSDs. In this paper, we propose a novel KV-FTL approach for large scale SSDs, which mostly maps logical addresses to physical addresses via a simple hash function while handling hash collision and out-of-place data update in the traditional manner, i.e., the mapping table. Our KV-FTL is able to accelerate address translation by avoiding loading the mapping table from flash memory to DRAM, thus improving performance, as well as to reduce the write-traffic introduced by the mapping table, thus extending the lifespan of SSDs. To the best of our knowledge, this is the first time the key-value principle has been applied to FTL design. Experimental results show that our KV-FTL extends SSD lifespan by a factor of up to 18.7% with an average of 13.6%; improves read performance ranging from 18.4% to 50.7% with an average of 39% with optimization, and in the case of extremely intensive requests, improves the overall performance for requests with an average of 47%.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.14", "http://doi.ieeecomputersociety.org/10.1109/ASAP.2017.7995286" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1d9f3a389a58b3e0eab47d905639b0a7f726f8f5", "sources": [ "DBLP" ], "title": "KV-FTL: A Novel Key-Value-Based FTL Scheme for Large Scale SSDs", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "1dabf2fccc14485d1db1986cfa45ec473da97bbf": { "authors": [ { "ids": [ "3041710" ], "name": "Qizhen Zhang" }, { "ids": [ "2777062" ], "name": "Hongzhi Chen" }, { "ids": [ "2127614" ], "name": "Da Yan" }, { "ids": [ "1717691" ], "name": "James Cheng" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" }, { "ids": [ "2249549" ], "name": "Purushotham Bangalore" } ], "doi": "10.1145/3127479.3128606", "doiUrl": "https://doi.org/10.1145/3127479.3128606", "entities": [ "Centralisation", "In-memory database", "Infographic", "Open-source software", "Out-of-core algorithm", "Shared nothing architecture" ], "id": "1dabf2fccc14485d1db1986cfa45ec473da97bbf", "inCitations": [], "journalName": "", "journalPages": "40-51", "journalVolume": "", "outCitations": [ "a59b2099e400dadf5f04d7cd34ce326f5f273c23", "1156f60e40548096df49528b1342bb3e88b0f378", "17a2d0feb8754ef81a945b9f5046c68605f59560", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "7f598b081df60565014cd943e4512710b682b734", "78ad867eb6176d4e2f1cec4f7517f65d90a660f8", "0ad8e89091eed09217e66adc98136126addc2619", "a77dbe50505a1ee09982c1a1576f09d554eb45c3", "c0bbb56b4428e9a83d067c07054946293b475fe9", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "1e6891d5de44bdbc9d76bde0ac0a1f74f4b03356", "1452f20140dba52b928c9be5f385b5ac35537a2c", "2138776f89bccc9362b239a6d33018ca2a847960", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "5bd9374195809c73157ba876f463ea7c4ec9abb5", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "8174aac0f597e4910cf31dc10ca0de4430a71c4a", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "2b9e6181502369199bd89691a27f89bdbaac36e4", "977ff4a6e3730acd5e5dc6f7a45fb5ad8e26876d", "5f6d5608cf1b3071c938a2271637fc555bf53231", "3486aeaf540c48952120fe853d672af984f40a6a", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "017eb2deb11f48ef7350873e81c19391aa61b8e3", "bf32a27be69f44d529b3c80bf73277e8cd2ef94a", "ef8d3a389410124d21dfda44295de8af786f5516", "87f931f4d8aad3b71b8261703bbcfa18c1293181", "74e6b114822b712c100c7ffd1b01f4fb1564bd28", "b513711621e81d0abd042e0877ca751581a993f5", "0f014693b25d9846025219b88f8ca480fac68b0a", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "3726c60552263e648c6856679e672de2e1c110e5", "9359fa64a59105e93dd6ca9f5aa35e0d9f9055be", "26deee037b221bd05ed34461819f5c067b745445", "047565a5b15fbebc78e0bc7d8ca823237dac9de2", "272550f6745acba4da9a10ab29ba738cb2c19d3b", "ee947a4654479e4098142c0369de7698c2e1475d", "0c8ed7f86d881dffb82b24f718bece6cb0e5c76f", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "1359d01962b882c95607a75aeafeb532188cb159", "0608d9937c074520cdc93cc444cc1c77039c5332" ], "paperAbstract": "Graph analytics systems have gained significant popularity due to the prevalence of graph data. Many of these systems are designed to run in a shared-nothing architecture whereby a cluster of machines can process a large graph in parallel. In more recent proposals, others have argued that a single-machine system can achieve better performance and/or is more cost-effective. There is however no clear consensus which approach is better. In this paper, we classify existing graph analytics systems into four categories based on the architectural differences, i.e., processing infrastructure (centralized vs distributed), and memory consumption (in-memory vs out-of-core). We select eight open-source systems to cover all categories, and perform a comparative measurement study to compare their performance and cost characteristics across a spectrum of input data, applications, and hardware settings. Our results show that the best performing configuration can depend on the type of applications and input graphs, and there is no dominant winner across all categories. Based on our findings, we summarize the trends in performance and cost, and provide several insights that help to illuminate the performance and resource cost tradeoffs across different graph analytics systems and categories.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3128606", "http://www.cis.upenn.edu/~qizhen/socc17-zhang.pdf", "https://people.csail.mit.edu/jshun/6886-s18/papers/ZCYCLB17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1dabf2fccc14485d1db1986cfa45ec473da97bbf", "sources": [ "DBLP" ], "title": "Architectural implications on the performance and cost of graph analytics systems", "venue": "SoCC", "year": 2017 }, "1db445dc54ee1389a14d72ee628da61cd6c10428": { "authors": [ { "ids": [ "2289351" ], "name": "Mark Silberstein" } ], "doi": "10.1145/3102980.3102992", "doiUrl": "https://doi.org/10.1145/3102980.3102992", "entities": [ "Central processing unit", "Control plane", "Field-programmable gate array", "Graphics processing unit", "Memory-mapped I/O", "Operating system", "Span and div" ], "id": "1db445dc54ee1389a14d72ee628da61cd6c10428", "inCitations": [], "journalName": "", "journalPages": "69-75", "journalVolume": "", "outCitations": [ "43f0c099d44a68783a773f91cd03098a5252bf98", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "0d3f85933b6355789588476e491683532c68a906", "2e5132493276714e4cce3b2f64d60da4e47210cb", "1ea92529e75fe90ee1923b95d0fa8ad37ac1ed7c", "1eb9dc6955b0de81a078c9d6fa937c33f1f04545", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "0e5c646909bb762da0cd325e084655c12445578f", "600a6810334f46d9f44bec0d0a9927154ded60dd", "03b2e534532e9558e560df0bed74976b8f48c1a5", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "054572f0a9cf49fa9757ce937d097de6200fe942", "28552ecf4eaedb3461edca97304b29082b02fbab", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "225603198cc415d363db8a8a2bd30b0df3c963b1", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "0081c1fcb079f87147a68565764b59923c918d9c", "3dfd3dfaac573c90e0eae54630881a2b412ee402", "00f355ce566bb51dc70925217c62e437cc7e14e2", "3e6f5b5e8b7cb5408da8cd10d0cc625b00910291", "41cefe44d43a5e367db26611075f67a08914bddf", "22fd20f23c40ecb9044cae7ee58b76d39fcf45b6", "5cfc936d12bbd8a0f100687b12b20e406215f30a", "110c050c6c992d2b956f7b47d717810ac5c91bdc" ], "paperAbstract": "Future systemswill be omni-programmable: alongside CPUs, GPUs and FPGAs, theywill execute user code near-storage, near-network, near-memory, or on other Near-X accelerator Units, NXUs. This paper explores the design space ofOS support for omni-programmable systems, aiming to simplify the development of efficient applications that span multiple heterogeneous processors and near-data accelerators. OmniX is an accelerator-centric OS architecture that extends standard OS abstractions, such as task execution and I/O, into NXUs while maintaining a coherent viewof the systemamong all the processors. OmniX enables NXUs to directly invoke tasks and access I/O services among themselves, excluding the CPU from the performance-critical control plane operations. The host CPU serves as a controller - for protection, device configuration and monitoring.We discuss the hardware trends that motivate ourwork, outline OmniX design principles, and sketch the core implementation ideas while highlighting missing hardware features, in the hope of motivating hardware vendors to implement them soon.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102992" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1db445dc54ee1389a14d72ee628da61cd6c10428", "sources": [ "DBLP" ], "title": "OmniX: an accelerator-centric OS for omni-programmable systems", "venue": "HotOS", "year": 2017 }, "1db611aa5b87c731daecdd2f798dd7fd183a62ae": { "authors": [ { "ids": [ "1815239" ], "name": "Huan Zhao" }, { "ids": [ "3259992" ], "name": "Quanming Yao" }, { "ids": [ "2099949" ], "name": "Jianda Li" }, { "ids": [ "1809614" ], "name": "Yangqiu Song" }, { "ids": [ "1692152" ], "name": "Dik Lun Lee" } ], "doi": "10.1145/3097983.3098063", "doiUrl": "https://doi.org/10.1145/3097983.3098063", "entities": [ "Algorithm", "FM broadcast band", "FM broadcasting", "Firefox", "High- and low-level", "Lasso", "Recommender system" ], "id": "1db611aa5b87c731daecdd2f798dd7fd183a62ae", "inCitations": [ "85de598fa1149dffff84efa3c16b4655099582d2", "f5dc0b1b736df15bbe91980e53af27f103f85ed4", "424f18612429618793f94fa1af914a4f962ac54f", "f205c31b2d6b7c3f4ff6045e7d5243b92716e2a3" ], "journalName": "", "journalPages": "635-644", "journalVolume": "", "outCitations": [ "188f4d9b9d580d0432056b760b3372ec83543d1d", "9e18015bffe5e5f0ed7240e7af7ed19a934ae32f", "823888e601885ea5339ffb1d1898015e67e2d1f6", "35a0d1d2ba7f52a66ba5c675467b71b6a56d81e4", "2cbe0ba73d02aabbeefedf841203219796a551b7", "a3d86c97ffc750dc98ad6717cbc63979926e97a2", "b89ca9130095774d0478c299fc681e8ccb09858f", "65f8e3d819786754fecc6085ee5ded94c7c0b142", "1c8c9a7713395e9a176c42e49bc80574a013f89f", "054ba27fe5cc6085d20ea2707de886db6865dbed", "1c96cdb6bc0029b8ca4cd578aca5e939b359e578", "9aa88a8a354f1d322e242376d27d0474e50252f8", "9fe5a7a24ff81ba2b6769e811b6ab47188a45242", "23682ad5e9977dc4c3485d937a80ec0199436b92", "36b33d88b16186d136f2258c433e7619b7c83421", "e50f4d3316d13841c287dcdf5479d7820d593571", "5099057404a78157e80135a6ba4f9db523ab9fa3", "8cfffe4f41b37c7325ba912f98254075602934c8", "0896bbb0c56d63631b72886ddd773d0d468d2132", "769da9576149d585d7a39c474d6f6962ba8c2dec", "016366507205724e5456000e79d2e7d2630ce76b", "c92420f001e023c693db762758f9590571256e35", "79001a9da7c9fa5e08fc0d32aad60e984f193a92", "2f3fe6e7b7387ac6b96b122fafb8b423219b7516", "4c3103164ae3d2e79c9e1d943d77b7dfdf609307", "046fb6aa2ee81a3139b5eb6c21768bf68631afdb", "1970a644bc8a9fa7340f04785f8b19e9d33778e1", "cf839be160797b63648db5f0a3c24309f6aa407f", "8d9574b5e44a8368403263a422d06fc5d2144a1e", "20dc1890ca65e01856f31edf10126c2ad67e9d04", "2b9cd09b949b7e69933d18ae408397e803987151", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "61745e0a984e8fd9c45f545c3c83f7b99b110505", "d2e7d37dac6b9eb313bd7918f162485111608bea", "064fb3a6f2666e17f6d411c0a731d56aae0a785e", "37316fd0e34484c3afc29e97cdae6c5ba16e34a9", "83a6cacc126d85c45605797406262677c256a6af", "787d56ec5569f1054f490dcf9a9fb4b87b7990e8", "13e650e1297dd7c037bb7b81743aa78aa5aea9b2", "055144bbcd5fe555bbb17fab5fc9bc78c3d5c2a7", "2275762a28582716db92df6d525ed2481c7d7f14", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989" ], "paperAbstract": "Heterogeneous Information Network (HIN) is a natural and general representation of data in modern large commercial recommender systems which involve heterogeneous types of data. HIN based recommenders face two problems: how to represent the high-level semantics of recommendations and how to fuse the heterogeneous information to make recommendations. In this paper, we solve the two problems by first introducing the concept of meta-graph to HIN-based recommendation, and then solving the information fusion problem with a \"matrix factorization (MF) + factorization machine (FM)\" approach. For the similarities generated by each meta-graph, we perform standard MF to generate latent features for both users and items. With different meta-graph based features, we propose to use FM with Group lasso (FMG) to automatically learn from the observed ratings to effectively select useful meta-graph based features. Experimental results on two real-world datasets, Amazon and Yelp, show the effectiveness of our approach compared to state-of-the-art FM and other HIN-based recommendation algorithms.", "pdfUrls": [ "http://www.cse.ust.hk/~hzhaoaf/data/kdd17-slide.pdf", "http://shichuan.org/hin/topic/Recommendation/2017.%20KDD2017%20Meta-Graph%20Based%20Recommendation%20Fusion%20over%20HIN.pdf", "http://doi.acm.org/10.1145/3097983.3098063", "http://www.cse.ust.hk/~hzhaoaf/data/kdd17-paper.pdf", "http://www.cse.ust.hk/~yqsong/papers/2017-KDD-HINRec-FMG.pdf", "http://www.cse.ust.hk/~qyaoaa/papers/kdd2017paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1db611aa5b87c731daecdd2f798dd7fd183a62ae", "sources": [ "DBLP" ], "title": "Meta-Graph Based Recommendation Fusion over Heterogeneous Information Networks", "venue": "KDD", "year": 2017 }, "1ddd410257b00370c1fe58377f02f608ff16c3bf": { "authors": [ { "ids": [ "2715350" ], "name": "Ra\u00fal Gracia Tinedo" }, { "ids": [ "39637073" ], "name": "Josep Samp\u00e9" }, { "ids": [ "2013067" ], "name": "Edgar Zamora-G\u00f3mez" }, { "ids": [ "1749419" ], "name": "Marc S\u00e1nchez Artigas" }, { "ids": [ "1695568" ], "name": "Pedro Garc\u00eda L\u00f3pez" }, { "ids": [ "2888002" ], "name": "Yosef Moatti" }, { "ids": [ "3318516" ], "name": "Eran Rom" } ], "doi": "", "doiUrl": "", "entities": [ "Control plane", "Forwarding plane", "High- and low-level", "Multitenancy", "Pervasive informatics", "Requirement", "Scalability", "Software-defined storage", "Swift (programming language)" ], "id": "1ddd410257b00370c1fe58377f02f608ff16c3bf", "inCitations": [ "86991eb6eed3e12f5b3985340416302a2208bceb", "214f4f4f555b608e59314168b08ed9daa4087200", "0b53cea748bdb5a404ed6999c23eb097622f0f08" ], "journalName": "", "journalPages": "243-256", "journalVolume": "", "outCitations": [ "138a9c2a9579435cd8cb0f24e7ec135821074557", "044604ae6a0ba104f6ad5bc18a4f1dede23c17fe", "003b9858f46501081d8609ba9ad12a5b34deffb3", "27277971cb33674044917aacb4cc448b7084095f", "4288771fb413ed92b006fd46ddbee56132e0a21b", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "030c5d1b06de23942fbfc2f5eb3572f8dad24a90", "2dcb7a97c2fe1184ed4ec2b83e984214a908d0a2", "7b13f7706ab5341fa87128edb9860464675347ba", "4b8fbe5e18af87ce47b728bf7b4e644c9de0c95e", "1ea81e7477051ba7769dc50a97f3b2b01d5ee9da", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "65a2cb8a02795015b398856327bdccc36214cdc6", "8a33c47c2a3f0e46dbb30f5203b6a1c6d8fefd8f", "294b42e48050655469f4579daea4cae9bcffd861", "5053d80a916aa6be5d1f2253a5f420954da7a3e4", "18e35895d1f38608f61cbd4a9ecee05a28c1cd0b", "62c8ebe08eff7fe23d949940b7802f6aa0eb81b5", "75e74a0f013e9028c69df3addc0d161ef35d0c51", "31ee28ad7207eb9e3f558488786a888a42bbb907", "dbcdb4c402756b2b5ac910b9eb17ddb412290d16", "c4415396bd0f182a01ac6f9cf9e14894e51d08e4", "28eb88b180674f43381ede3e9573689496cfd321", "941ee828449a815e3bee12a967691d18ebfc0780", "1d2871c56d07a35e6709d535fbbb2df6b434962a", "92e536c1789bf301f456b01590006c9a3eff6cd8", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "a3de178c43b990b5755be4d640a7525f97ce2f33", "11ceeea43c970abede5aa95b4bfce621138a0bed", "807df0de011be333fc1dd06ac58c426e8b3437ef", "18a5db040efe208ec2728a91096d3cf3640282a1", "396514fb219879a4a18762cddfae2a6a607f439f" ], "paperAbstract": "Object stores are becoming pervasive due to their scalability and simplicity. Their broad adoption, however, contrasts with their rigidity for handling heterogeneous workloads and applications with evolving requirements, which prevents the adaptation of the system to such varied needs. In this work, we present Crystal, the first Software-Defined Storage (SDS) architecture whose core objective is to efficiently support multi-tenancy in object stores. Crystal adds a filtering abstraction at the data plane and exposes it to the control plane to enable high-level policies at the tenant, container and object granularities. Crystal translates these policies into a set of distributed controllers that can orchestrate filters at the data plane based on real-time workload information. We demonstrate Crystal through two use cases on top of OpenStack Swift: One that proves its storage automation capabilities, and another that differentiates IO bandwidth in a multi-tenant scenario. We show that Crystal is an extensible platform to deploy new SDS services for object stores with small overhead.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/fast17/fast17-gracia-tinedo.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_tinedo.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/gracia-tinedo", "http://www.usenix.org./system/files/conference/fast17/fast17-gracia-tinedo.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_tinedo.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1ddd/410257b00370c1fe58377f02f608ff16c3bf.pdf", "s2Url": "https://semanticscholar.org/paper/1ddd410257b00370c1fe58377f02f608ff16c3bf", "sources": [ "DBLP" ], "title": "Crystal: Software-Defined Storage for Multi-Tenant Object Stores", "venue": "FAST", "year": 2017 }, "1de6ac748387859f43bc15e15ff5380df05bae34": { "authors": [ { "ids": [ "34854131" ], "name": "Shaden Smith" }, { "ids": [ "1686843" ], "name": "Jongsoo Park" }, { "ids": [ "1681616" ], "name": "George Karypis" } ], "doi": "10.1109/IPDPS.2017.84", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.84", "entities": [ "Algorithm", "Collaborative product development", "Computation", "Computer security", "Data-intensive computing", "Dynamic random-access memory", "E-commerce", "High memory", "Knights", "Load balancing (computing)", "MCDRAM", "Manycore processor", "Memory bandwidth", "Multi-core processor", "Sparse matrix", "Speedup" ], "id": "1de6ac748387859f43bc15e15ff5380df05bae34", "inCitations": [ "7c3c5b282948121244d330651e36b05f31c382cb", "dab53f03682789b483822bc521204bfb39ee2458", "235d090c8549ff3b353103380313d70e33c47e4e", "232d892b423c24aaefcec9eb2ae211316be0f025" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1058-1067", "journalVolume": "", "outCitations": [ "47a6a274c648aeb5ff02eb09aff7ea310eae122e", "86159c2269566286a5e8f724deab749c9e2750b1", "08368dae4f102176b8e50a64ddde8a8150cde26e", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "41c4ccf14aa43d5694d69788894cbaa17f91f6ca", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "38b389580d774ce513284e671ff3bbcef0258de2", "7d50b6883c38e34016a4841ec4ab2b92bfdfe3ad", "28552ecf4eaedb3461edca97304b29082b02fbab", "2e8ab628bc9f256c11c898aa44f049143c74d05d", "a9653a27052d666b7ed47524871dc9c3a9b92cc4", "31af4b8793e93fd35e89569ccd663ae8777f0072", "255aeb5c2a8eea15db08c617481ddbb35a41bfe4", "008a6e4b2763736d2c6363ee6b546b09c0022e53", "ac0a0828c17c040c065a9285264094ba2560497d", "2d03baec8ac1568e6813aa43d625d552524f977e", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "00dbf46a7a4ba6222ac5d44c1a8c09f261e5693c", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "00ca166ea4521f5cc3d23e74a1b1090386b6831f", "5a3c8589d63fcee5dd40ef43aea6ef38e2fda9a8", "d6c4c76076efecb15655274adc648af8a445ed3a", "2d8d293baed5060034326781b261ca5f6464be11" ], "paperAbstract": "HPC systems are increasingly used for data intensive computations which exhibit irregular memory accesses, non-uniform work distributions, large memory footprints, and high memory bandwidth demands. To address these challenging demands, HPC systems are turning to many-core architectures that feature a large number of energy-efficient cores backed by high-bandwidth memory. These features are exemplified in Intel's recent Knights Landing many-core processor (KNL), which typically has 68 cores and 16GB of on-package multi-channel DRAM (MCDRAM). This work investigates how the novel architectural features offered by KNL can be used in the context of decomposing sparse, unstructured tensors using the canonical polyadic decomposition (CPD). The CPD is used extensively to analyze large multi-way datasets arising in various areas including precision healthcare, cybersecurity, and e-commerce. Towards this end, we (i) develop problem decompositions for the CPD which are amenable to hundreds of concurrent threads while maintaining load balance and low synchronization costs; and (ii) explore the utilization of architectural features such as MCDRAM. Using one KNL processor, our algorithm achieves up to 1.8x speedup over a dual socket Intel Xeon system with 44 cores.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.84", "http://shaden.io/pdf/2017-Smith-KNL-slides.pdf", "http://shaden.io/pdf/2017-Smith-KNL.pdf", "https://www.cs.umn.edu/sites/cs.umn.edu/files/tech_reports/14-006_0.pdf", "http://glaros.dtc.umn.edu/gkhome/fetch/papers/shaden2017ipdps.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1de6ac748387859f43bc15e15ff5380df05bae34", "sources": [ "DBLP" ], "title": "Sparse Tensor Factorization on Many-Core Processors with High-Bandwidth Memory", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1e0f15d5826680d17ae5356ff82f732baa8168e3": { "authors": [ { "ids": [ "1940437" ], "name": "Huiba Li" }, { "ids": [ "1908353" ], "name": "Yiming Zhang" }, { "ids": [ "1711631" ], "name": "Zhiming Zhang" }, { "ids": [ "1789188" ], "name": "Shengyun Liu" }, { "ids": [ "1718853" ], "name": "Dongsheng Li" }, { "ids": [ "1767061" ], "name": "Xiaohui Liu" }, { "ids": [ "2653582" ], "name": "Yuxing Peng" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud storage", "Delta encoding", "Disk image", "Durability (database systems)", "Erasure code", "Foreach loop", "Speculative execution" ], "id": "1e0f15d5826680d17ae5356ff82f732baa8168e3", "inCitations": [], "journalName": "", "journalPages": "581-587", "journalVolume": "", "outCitations": [ "0e69ee351252fd09a50e6baae53b4776009825ae", "152f85e4bd2853a458d1350bb64d4a6adca24832", "fdc7988ccc8850cccfd5b0dd75a336b9ea4759fa", "3168681722207c86827e596860115a2977ce761f", "514a5c15e8cf3f681febecad954a4508d9189c99", "07add9c98a979e732cfa215c901adb1975f3f43a", "05dc4814248843389e8d2557e2d1f0c45d494e10", "3b19249a576a6d0bd1d927062a3be47dd90e7237", "1cc9ebeab21d668c8fb197a2498380e95c6a65fb", "7ae26da9b7666812857883536870c315538f7f10", "35c2f7e0454adc0130c4279fce84a31701cebc67", "17b9c7fa7e420b427f9c443afcfa2304b1a54b1d", "088e3e939ad234b6fdd0e321290fb26937dc2553", "c6d01d9365d7b134ef2efed0063820d1b9be659a", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "8e5874938b2919a6331322eb2332cfaf3d103dcb", "58b628792d3eb22a034a871ed3cf373afe591928", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "5635733e78abf410539e11dde9ebd1dc65e12500", "45ae3be13288fbcf7ace9cf7266b45d54316a406", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "18a5f443299784479e78d9e77f175af57cb2fa2b", "a99dbbf6fa6546a21d451c06a2a878b1791f0da6", "2da760f90c3d2bf6598becdde9063093f488548c", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "098d792d1783b5f6fc098203f71f21f5d053c653", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "b3f3376296b46d83984de5d8a1b6087710a0da1f", "361c6ee2571a20be19345e1dfdcb5ba4e9f1c196", "47b78e7eb12859a141aed6a28a4e301eb0352629" ], "paperAbstract": "Erasure coding (EC) has been widely used in cloud storage systems because it effectively reduces storage redundancy while providing the same level of durability. However, EC introduces significant overhead to small write operations which perform partial write to an entire EC group. This has been a major barrier for EC to be widely adopted in small-write-intensive systems such as virtual disk service. Parity logging (PL) appends parity changes to a journal to accelerate partial writes. However, since previous PL schemes have to perform a time-consuming write-after-read for each partial write, i.e., read the current value of the data and then compute and write the parity delta, their write performance is still much lower than that of replication-based storage. This paper presents PARIX, a speculative partial write scheme for fast parity logging. We transform the original formula of parity calculation, so as to use the data deltas (between the current/original data values), instead of the parity deltas, to calculate the parities during journal replay. For each partial write, this allows PARIX to speculatively log only the current value of the data. The original value is needed only once in a journal when performing the first write to the data. For a series of n partial writes to the same data, PARIX performs pure write (instead of write-after-read) for the last n\u22121 ones while only introducing a small penalty of an extra network RTT (round-trip time) to the first one. Evaluation results show that PARIX remarkably outperforms stateof-the-art PL schemes in partial write performance.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_li_1.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-li_huiba.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/li-huiba" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/22b5/089fbc2399e28f142cdfbcd7e5e35e257824.pdf", "s2Url": "https://semanticscholar.org/paper/1e0f15d5826680d17ae5356ff82f732baa8168e3", "sources": [ "DBLP" ], "title": "PARIX: Speculative Partial Writes in Erasure-Coded Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "1e19568831a78a97c287f9848f7deca97e9574f2": { "authors": [ { "ids": [ "31452614" ], "name": "Ren Zhang" }, { "ids": [ "1718124" ], "name": "Bart Preneel" } ], "doi": "10.1145/3143361.3143389", "doiUrl": "https://doi.org/10.1145/3143361.3143389", "entities": [ "Backup", "Bitcoin", "Block size (cryptography)", "Cryptocurrency", "Throughput", "X86" ], "id": "1e19568831a78a97c287f9848f7deca97e9574f2", "inCitations": [ "f7b7573a727a2a922a244d697101eacaa7ba1d96" ], "journalName": "", "journalPages": "108-119", "journalVolume": "", "outCitations": [ "6fc9cd15134cdd282e25b8ea58b38240e96bfe90", "75d83792b880757a09e9a72978cc29beb57c4ad5", "7bf78054192d98e999edcdf08971a5eed42518d2", "35fe18606529d82ce3fc90961dd6813c92713b3c", "1d4abd83093f1343ee1f5b8ffb3c5999e3754c90", "fd6073c87dfa5ecf1aecdb5334c5d0c7e60d3c6f", "7bed3507a73099b5bb55be17fe3d436c82e39550", "6da1b216120a92debe1e6c3be6700ab8aaa38651", "6b766f6003886cd55ef7b2459ee9b404934aca31", "2f7bb6613154e1b3580c0114bf2cfb3c8ceb477e", "722c447bcf198b60279e4c1f447acb015f94e622", "4009f9edf4f3f3fa231f66bc24851631799589eb", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "bd3cf99171c5c18a8a087b5b13ce2acd489df694", "54a99d1ae66c525f9fdc0348317cca912f68b917", "70f84828183ba8116654a561fac630b6d646e397" ], "paperAbstract": "Bitcoin has not only attracted many users but also been considered as a technical breakthrough by academia. However, the expanding potential of Bitcoin is largely untapped due to its limited throughput. The Bitcoin community is now facing its biggest crisis in history as the community splits on how to increase the throughput. Among various proposals, Bitcoin Unlimited recently became the most popular candidate, as it allows miners to collectively decide the block size limit according to the real network capacity. However, the security of BU is heatedly debated and no consensus has been reached as the issue is discussed in different miner incentive models. In this paper, we systematically evaluate BU's security with three incentive models via testing the two major arguments of BU supporters: the block validity consensus is not necessary for BU's security; such consensus would emerge in BU out of economic incentives. Our results invalidate both arguments and therefore disprove BU's security claims. Our paper further contributes to the field by addressing the necessity of a prescribed block validity consensus for cryptocurrencies.", "pdfUrls": [ "https://eprint.iacr.org/2017/686.pdf", "http://doi.acm.org/10.1145/3143361.3143389", "http://diyhpl.us/~bryan/papers2/bitcoin/On%20the%20necessity%20of%20a%20prescribed%20block%20validity%20consensus:%20Analyzing%20bitcoin%20unlimited%20mining%20protocol%20-%202017.pdf", "http://eprint.iacr.org/2017/686" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e19568831a78a97c287f9848f7deca97e9574f2", "sources": [ "DBLP" ], "title": "On the Necessity of a Prescribed Block Validity Consensus: Analyzing Bitcoin Unlimited Mining Protocol", "venue": "CoNEXT", "year": 2017 }, "1e1e2e271506c1793609d45040ea7356e4cd4a68": { "authors": [ { "ids": [ "36736387" ], "name": "Nikita Mishra" }, { "ids": [ "1739581" ], "name": "John D. Lafferty" }, { "ids": [ "40085771" ], "name": "Henry Hoffmann" } ], "doi": "10.1109/ICAC.2017.29", "doiUrl": "https://doi.org/10.1109/ICAC.2017.29", "entities": [ "Heuristic", "IPsec", "Interference (communication)", "Linux", "Machine learning", "Matrix regularization", "Scheduling (computing)", "Throughput", "X86" ], "id": "1e1e2e271506c1793609d45040ea7356e4cd4a68", "inCitations": [ "4e544e6db8a17252e0cd6da00401bba734ad64b8", "18a16984f7a2f0f400dc1fc345ef1065b439dc72", "aafd0a6cb89710e2bfbee835914d63ddfecebca1" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "125-134", "journalVolume": "", "outCitations": [ "f08f79290a79800969a33ab209bd20931160557a", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "0dae827e8948c9e9bbbfdc9c8be0bcce72c95f0d", "b8234539e7720153a1e36f0da19d6cf599f60ea8", "bf82f0b0cf448b18fec979d25368c6cd9c04ce0c", "3000e77ed7282d9fb27216f3e862a3769119d89e", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "9e18015bffe5e5f0ed7240e7af7ed19a934ae32f", "277f20ddc0e9fa593753ef2778110508372c597f", "3b8b93e8aeb9b79965fc8807242ef6c202638ba0", "d9e895e013e001ce9e975213dd843f8db1b5cf32", "2f69fc9dd59048b9096b35e31ab9be1c28a03ae1", "0597e47c764cff4257c7ca0500c6a6d866b28152", "1eb845e672abc3e172725639eece560c3cd5ec2a", "46217f372a75dddc2254fdbc6b9418ba3554e453", "8242f42f077b59ff239e8cab19b99d94c190c608", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "c022ff7d75107e0a8f944cbc138c8d87a2411f0c", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "1212e1615891b005df762b669f396b1dce8a78ee", "3da4f5d052b3e04346507cf16471984e74fe63cb", "9c96514250c4a35deba5ae3ffb93e9731fe23a79", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "053d396415240493536e6003d789dd9c9376033d", "8c284352b49db7d49e6dad70eb1dd3ea7adcca63", "7a978f2902460e732c50c36a171deb11733df1fc", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "0703011e71e46b7d28f0ac3c918e3389f2c97ff7", "c1c9ea7e5baae0d42deeb27d1be7cbee274d439b", "5233d7195acccd2681f20b9f60e9f12ec1cbba70", "4caa5ec3be365a341ea9f8dcaa8a4f9a7bebe304", "af3c967e1ba1730ef3a2943ba654ab39fb1967be", "17e5d9d2c587f35f037175a8039f5a372b1e4379", "2361474f4a3b524a2761a4eee6046f956ed3c430", "2f919f99bf5b6d5667968c318b62d7335814ceff" ], "paperAbstract": "Independent applications co-scheduled on the same hardware will interfere with one another, affecting performance in complicated ways. Predicting this interference is key to efficiently scheduling applications on shared hardware, but forming accurate predictions is difficult because there are many shared hardware features that could lead to the interference. In this paper we investigate machine learning approaches (specifically, regularization) to understand the relation between those hardware features and application interference. We propose ESP, a highly accurate and fast regularization technique for application interference prediction. To demonstrate this practicality, we implement ESP and integrate it into a scheduler for both single and multi-node Linux/x86 systems and compare the scheduling performance to state-of-the-art heuristics. We find that ESP-based schedulers increase throughput by 1.25-1.8× depending on the scheduling scenario. Additionally, we find that ESP's accurate predictions allow schedulers to avoid catastrophic decisions, which heuristic approaches fundamentally cannot detect.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.29", "http://people.cs.uchicago.edu/~hankhoffmann/mishra-icac2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e1e2e271506c1793609d45040ea7356e4cd4a68", "sources": [ "DBLP" ], "title": "ESP: A Machine Learning Approach to Predicting Application Interference", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "1e23b9e0a2f95521d72fe39c0e310450e782b264": { "authors": [ { "ids": [ "1792031" ], "name": "Tao Lu" }, { "ids": [ "7920553" ], "name": "Eric Suchyta" }, { "ids": [ "2193258" ], "name": "David Pugmire" }, { "ids": [ "39899324" ], "name": "Jong Choi" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "1727669" ], "name": "Qing Liu" }, { "ids": [ "1734819" ], "name": "Norbert Podhorszki" }, { "ids": [ "2942322" ], "name": "Mark Ainsworth" }, { "ids": [ "4003076" ], "name": "Matthew Wolf" } ], "doi": "10.1109/CLUSTER.2017.62", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.62", "entities": [ "Assistive technology", "Code refactoring", "Computer data storage", "Data model", "Decimation (signal processing)", "Delta encoding", "IBM WebSphere eXtreme Scale", "Multitier architecture", "Pervasive informatics", "Simulation", "Software ecosystem", "Supercomputer", "Triangulated irregular network" ], "id": "1e23b9e0a2f95521d72fe39c0e310450e782b264", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "58-69", "journalVolume": "", "outCitations": [ "55a022ab83a0f848f3e18693ac3a4ffe016f2704", "64d4f6759b32697e6cbebf901624c93c0a0c1744", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "5066738eb7bbcd5f2e793488f7645f8ed946161b", "8097630668c8115f03b6d320a7b5cdc1f005066e", "2b0bce44b3840796d4ed578c43542ebf839d74af", "093fc19d440f33247e545ec6c047e0aa0afb0863", "05e0dd9ba23f99acf5537b51f3a3263d3febe6dc", "41c80e4077e7630688a8a511125c4662f37e6d34", "24b25dd17ee2396910f3df74481ee225d5d440bd", "1bbdc1c5ee0ac447472bc3f4de720ab885ff4c43", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "2185f1a1e8736207c91ac00cbd34a08e8a8a0c3e", "478b9e77e2bd5b4fb4d8628e8fbafdb2d1a0d23d", "d2f8a260a7ae14fc162d9c7976c6c6b7e5f00a77", "0d98b995638e1aa0de2f4a66cb727b05fea99b89", "7e7dc60e3c684e0e626909f73551334b69ea8817", "57d0f3ef74f97c58661e705218d9f19e04e519a3", "509f4a29499687ff1f0e3af5a00c0149aee66448", "a113e9ef7b31d0b9131d905ee15f5556fdb0652c", "09b71bc8d83e2583319b5bd42838e6c4ffa0bd70", "009342aa77a56c46a475fa85e66506219f271526", "1d3a151a18ac5a479fa46d342e464751dd668d23", "4e055f0ce6220e6d75aa2c6d7de50455dea572ef", "63bcdebeb86065679aaaad1eea7173c633e748e2", "4432b1ef0b18015f3f20f09d8a80ee3dc6a3edab", "7777d299e7b4217fc4b80234994b5a68b3031199", "ea1db5a68cea156b11eadb3d2ddcb791e5991949", "5355bcc49732bc71674e872097257c95f9e9a3ac", "616c3d993911812577235adfe994fdfe74af8f8f", "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "721c5be47c923d9c0303a3eefd3d42a57e0add03", "88a32f0546fccb673225fd2fcc4d9918e7a42298", "d158b5f2c77b8127aebcfbfceabe0b818c6bdc7f", "04fa1a1d9298f7d56cab3f897def24057d48993f", "d004de96c6c1712e77a802534c339628e626945d" ], "paperAbstract": "Scientific simulations on high performance computing (HPC) platforms generate large quantities of data. To bridge the widening gap between compute and I/O, and enable data to be more efficiently stored and analyzed, simulation outputs need to be refactored, reduced, and appropriately mapped to storage tiers. However, a systematic solution to support these steps has been lacking on the current HPC software ecosystem. To that end, this paper develops Canopus, a progressive JPEGlike data management scheme for storing and analyzing big scientific data. It co-designs the data decimation, compression and data storage, taking the hardware characteristics of each storage tier into considerations. With reasonably low overhead, our approach refactors simulation data into a much smaller, reduced-accuracy base dataset, and a series of deltas that is used to augment the accuracy if needed. The base dataset and deltas are compressed and written to multiple storage tiers. Data saved on different tiers can then be selectively retrieved to restore the level of accuracy that satisfies data analytics. Thus, Canopus provides a paradigm shift towards elastic data analytics and enables end users to make trade-offs between analysis speed and accuracy on-the-fly. We evaluate the impact of Canopus on unstructured triangular meshes, a pervasive data model used by scientific modeling and simulations. In particular, we demonstrate the progressive data exploration of Canopus using the “blob detection” use case on the fusion simulation data.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e23b9e0a2f95521d72fe39c0e310450e782b264", "sources": [ "DBLP" ], "title": "Canopus: A Paradigm Shift Towards Elastic Extreme-Scale Data Analytics on HPC Storage", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "1e557022b21ced558596fd37c26ce6a006e08bc8": { "authors": [ { "ids": [ "2061231" ], "name": "Caroline Trippel" }, { "ids": [ "2851729" ], "name": "Yatin A. Manerkar" }, { "ids": [ "3583740" ], "name": "Daniel Lustig" }, { "ids": [ "1790200" ], "name": "Michael Pellauer" }, { "ids": [ "1708269" ], "name": "Margaret Martonosi" } ], "doi": "10.1145/3037697.3037719", "doiUrl": "https://doi.org/10.1145/3037697.3037719", "entities": [ "C11 (C standard revision)", "Compiler", "High- and low-level", "High-level programming language", "Interaction", "Litmus", "Memory model (programming)", "Microarchitecture", "Multi-chip module", "Open-source software", "RISC-V", "Shared memory", "Software bug", "Solution stack", "Systems design", "Verification and validation" ], "id": "1e557022b21ced558596fd37c26ce6a006e08bc8", "inCitations": [ "d4d5a5baab3c0418447566724a4fd16c96e53517", "8d778809125172e79d5528de7dd4ab4af11e25d7", "deba49c12c039fbd667277207dbaa812fba2dece", "9c79e22df657e92d6d895ac424815ea750e6dc0c", "7cdf63e05545333f10f69317383a3a88c6e29d03" ], "journalName": "", "journalPages": "119-133", "journalVolume": "", "outCitations": [ "59857e2857df6d69a12e3cbaa720648b5c299159", "4292384b0b798feea238c7f0437d88476e342771", "93bd29a9d14d2a7facff9437caadd3b342416c67", "316fbad5e91f3102a7335dc4e4c854cc928d1a2a", "33dcafd805a3b44fd64270028633032ff0bb6fac", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "5406b20a9efa2e082638e7064e86342dc48f99c1", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "69e6fb41751ebf0a6b99522a2fabcd3879e8cf2b", "7c452c52ea03a4fac7578c8de13174334e8e0ee9", "1476bc7362e02995a8869ed6d3703e740284f450", "5d8223b9caf90736f4ca75750290a1a25f66b7a8", "0a89fafea6184b469511ba73735d451da92c18fa", "19aab49210282cc19ec4fec06bed029a06497bf8", "16dc592aa326ecd1f8d46ca7e3485a7311af3dba", "3a850f54e6dea4728aaa6a71ba222b7d612cd2b1", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "413d938109026fb513083a3b3f1c616da005639c", "51854f6133cd8d890beb8576e6f0b44a33916803", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "3a66a682ee36cde0738824b152a51df2ccbb80fd", "34d2db88f259d69022e7492225301ffd6e0f55c0", "1ea33a0ba2ded13492a4afa6817f953eede0e037", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "ae8ee52b076263e1108ac35714bf15c6dd514f11", "ce597a5e8b76a54907d4d08d41c6579c4be0b664", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "0ed62848d5c9e01f692c0c0b3851848ac7bb0764", "3e033205357becbb70e0b697134a5fe6fa17da43", "99d4c73c44f1049bf1e31f4f11f561d67eb5524d", "987adbbb4b5baff729cf3907d7f05a86e8651849", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "0f0046ae34181e08594ad9be7b5bfffdbaeda177", "263f588edb69272ccbf9f1b78a9625b914235f6d", "14d5edd85e4dccfa7457cae40cb33ef9eafdd68b", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e" ], "paperAbstract": "Memory consistency models (MCMs) which govern inter-module interactions in a shared memory system, are a significant, yet often under-appreciated, aspect of system design. MCMs are defined at the various layers of the hardware-software stack, requiring thoroughly verified specifications, compilers, and implementations at the interfaces between layers. Current verification techniques evaluate segments of the system stack in isolation, such as proving compiler mappings from a high-level language (HLL) to an ISA or proving validity of a microarchitectural implementation of an ISA.\n This paper makes a case for full-stack MCM verification and provides a toolflow, TriCheck, capable of verifying that the HLL, compiler, ISA, and implementation collectively uphold MCM requirements. The work showcases TriCheck's ability to evaluate a proposed ISA MCM in order to ensure that each layer and each mapping is correct and complete. Specifically, we apply TriCheck to the open source RISC-V ISA [55], seeking to verify accurate, efficient, and legal compilations from C11. We uncover under-specifications and potential inefficiencies in the current RISC-V ISA documentation and identify possible solutions for each. As an example, we find that a RISC-V-compliant microarchitecture allows 144 outcomes forbidden by C11 to be observed out of 1,701 litmus tests examined. Overall, this paper demonstrates the necessity of full-stack verification for detecting MCM-related bugs in the hardware-software stack.", "pdfUrls": [ "http://www.cs.princeton.edu/~ctrippel/ctrippel_ASPLOS17.pdf", "https://arxiv.org/pdf/1608.07547v1.pdf", "http://mrmgroup.cs.princeton.edu/papers/ctrippel_ASPLOS17.pdf", "http://arxiv.org/pdf/1608.07547v1.pdf", "https://arxiv.org/pdf/1608.07547v2.pdf", "http://doi.acm.org/10.1145/3037697.3037719", "http://www.cs.princeton.edu/~ctrippel/ctrippel_ASPLOS17_talk.pdf", "http://www.cs.princeton.edu/~manerkar/papers/ctrippel_ASPLOS17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e557022b21ced558596fd37c26ce6a006e08bc8", "sources": [ "DBLP" ], "title": "TriCheck: Memory Model Verification at the Trisection of Software, Hardware, and ISA", "venue": "ASPLOS", "year": 2017 }, "1e5bc511844f06e02700ee5fb255d4883fbaa973": { "authors": [ { "ids": [ "30299336" ], "name": "Hongbo Li" }, { "ids": [ "1756221" ], "name": "Zizhong Chen" }, { "ids": [ "1689014" ], "name": "Rajiv Gupta" } ], "doi": "10.1145/3126908.3126938", "doiUrl": "https://doi.org/10.1145/3126908.3126938", "entities": [ "Batch processing", "Computation", "Hang (computing)", "Scalability", "Supercomputer", "Timeout (computing)" ], "id": "1e5bc511844f06e02700ee5fb255d4883fbaa973", "inCitations": [], "journalName": "", "journalPages": "63:1-63:12", "journalVolume": "", "outCitations": [ "3cb204dede3e9d3dcb15f78fc4b4797a14f3530e", "5ed94e246e5d9755a9917548c0d22156e0f3a935", "1cc7bf68d9a8b304de057d042cc2a05dbae1a5c5", "2d3f2fabbba38867725cd8d3f72371b6d429cf8b", "06b5ce2fc7e86f2b4ee61dee4ff564dfa576d2d9", "01d62cd850496455ce1616500f491690effa5c98", "64209e02ea7889f616b1f4e2d79231a044ecc21f", "1382b9e97f88050d65c3aa19842a76252cdf9621", "990ccab41f5d2c04357dd712e77b0eecd6e91846", "de9f9c4ce9caf6158c5aaabd4d65ec251521c6bb", "050b6a5f0e650a12223c27fb133eb5e398df8480", "d29a1e9e6b7cafb3a32b505129b4f984644fbbd2", "61d5dbdd3f959df99ef747247085b1f2df66d599", "3679eeccf323ff83d68ad539f055da8939d26d01", "56998b637705900121f2f02a8c153cb099c7ba49", "587a8cd3a42cfc5db7b62ab8aab17f264f276f77", "0c1ef9519c3dcd4a309650bc24e5d5f906e369ed", "9977226e4acfafbf9ab27836f5a130364305b795", "566707209e3ace646b3b0cb1a3bc7d7215b1ec55", "c2169aad32215a190ca94d24723c9c3cddc21b8f", "8dc210dfdeedf56250c9ba1e5b60e2a87c660582", "0dc139928763bd7378a09076b94861660ba1c695", "8e07693e2d6b1a9949f7d3b3e81060e69f4bb420", "5972f4e381ca98bce0851a6336d59c795ca4f3f4" ], "paperAbstract": "While program hangs on large parallel systems can be detected via the widely used timeout mechanism, it is difficult for the users to set the timeout - too small a timeout leads to high false alarm rates and too large a timeout wastes a vast amount of valuable computing resources. To address the above problems with hang detection, this paper presents ParaStack, an extremely lightweight tool to detect hangs in a timely manner with high accuracy, negligible overhead with great scalability, and without requiring the user to select a timeout value. For a detected hang, it provides direction for further analysis by telling users whether the hang is the result of an error in the computation phase or the communication phase. For a computation-error induced hang, our tool pinpoints the faulty process by excluding hundreds and thousands of other processes. We have adapted ParaStack to work with the Torque and Slurm parallel batch schedulers and validated its functionality and performance on Tianhe-2 and Stampede that are respectively the world's current 2nd and 12th fastest supercomputers. Experimental results demonstrate that ParaStack detects hangs in a timely manner at negligible overhead with over 99% accuracy. No false alarm is observed in correct runs taking 66 hours at scale of 256 processes and 39.7 hours at scale of 1024 processes. ParaStack accurately reports the faulty process for computation-error induced hangs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126938", "http://www.cs.ucr.edu/~gupta/research/Publications/Comp/parastack-SC2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e5bc511844f06e02700ee5fb255d4883fbaa973", "sources": [ "DBLP" ], "title": "Parastack: efficient hang detection for MPI programs at large scale", "venue": "SC", "year": 2017 }, "1e5eed529ca7ec7f5fb6638c3be1053f000dd1f9": { "authors": [ { "ids": [ "4081852" ], "name": "Zhu" }, { "ids": [ "2879160" ], "name": "Y. Imamura" }, { "ids": [], "name": "M. Nikovski" }, { "ids": [ "40066036" ], "name": "D. N. Keogh" }, { "ids": [ "1740068" ], "name": "Yan Zhu" }, { "ids": [ "8886175" ], "name": "Makoto Imamura" }, { "ids": [ "2965906" ], "name": "Daniel Nikovski" }, { "ids": [ "1732516" ], "name": "Eamonn Keogh" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Broadway (microprocessor)", "Copyright", "Data mining", "Fees", "Laboratory", "Link analysis", "Scalability", "Sequence motif", "Thrombocytopenia", "Time series", "algorithm", "paragraphs" ], "id": "1e5eed529ca7ec7f5fb6638c3be1053f000dd1f9", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1cb514143a7ef9580274ffd608313732ba898817", "3e0a271d9d5e2e42ed1212cf9e8a220de11633e1", "887a246dae18b8e9a65826bea88983e4f7bf5270", "35a330dacb4731830b3aa7b9c121a2f39b65c13b", "754e32da1c4c250486dc1c1790a5a0662bee5c21", "19bc271d83b38c616e6dc79e241f37f724c1837e", "61523cfe6f51859e00aa8ce320114c03151208fa", "683e249c72a1f87e82d7aeb24e64b725da2a7665", "b6eaebe1e2737751ebee1fbe9c91d4c2052edb6a", "b10e5d308003dbb7d45fcf9bd12d7868752007fa", "646ac6ed88afadda8fd9290285791b9317b526ee", "8e0ff4b8bbeac8f301e00494a39bd1b4a199fba1", "0772e7a5e0ce50ab5e0fb6c662c617b75d728562", "047fd01bf1d71197b6f1b05c8fbdca511e7875d3", "2b32dd5199db8c3a8a04d975c41c745149bd52ef", "64e6764ad439d64ba54597b940dbfb2cb8fc1257", "280f9cc6ee7679d02a7b8b58d08173628057f3ea", "a7006d4e77fd8ac76bf208a2de91de4f183a8984" ], "paperAbstract": "Since their introduction over a decade ago, time series motifs have become a fundamental tool for time series analytics, finding diverse uses in dozens of domains. In this work we introduce Time Series Chains, which are related to, but distinct from, time series motifs. Informally, time series chains are a temporally ordered set of subsequence patterns, such that each pattern is similar to the pattern that preceded it, but the first and last patterns are arbitrarily dissimilar. In the discrete space, this is similar to extracting the text chain \u201dhit, hot, dot, dog\u201d from a paragraph. The first and last words have nothing in common, yet they are connected by a chain of words with a small mutual difference. Time series chains can capture the evolution of systems, and help predict the future. As such, they potentially have implications for prognostics. In this work, we introduce a robust definition of time series chains, and a scalable algorithm that allows us to discover them in massive datasets. International Conference on Data Mining This work may not be copied or reproduced in whole or in part for any commercial purpose. Permission to copy in whole or in part without payment of fee is granted for nonprofit educational and research purposes provided that all such whole or partial copies include the following: a notice that such copying is by permission of Mitsubishi Electric Research Laboratories, Inc.; an acknowledgment of the authors and individual contributions to the work; and all applicable portions of the copyright notice. Copying, reproduction, or republishing for any other purpose shall require a license with payment of fee to Mitsubishi Electric Research Laboratories, Inc. All rights reserved. Copyright c \u00a9 Mitsubishi Electric Research Laboratories, Inc., 2017 201 Broadway, Cambridge, Massachusetts 02139 Matrix Profile VII: Time Series Chains: A New Primitive for Time Series Data Mining Yan Zhu, Makoto Imamura, Daniel Nikovski, Eamonn Keogh University of California, Riverside, yzhu015@ucr.edu, eamonn@cs.ucr.edu Tokai University, imamura@tsc.u-tokai.ac.jp Mitsubishi Electric Research Laboratories, nikovski@merl.com Abstract\u2014 Since their introduction over a decade ago, time series motifs have become a fundamental tool for time series analytics, finding diverse uses in dozens of domains. In this work we introduce Time Series Chains, which are related to, but distinct from, time series motifs. Informally, time series chains are a temporally ordered set of subsequence patterns, such that each pattern is similar to the pattern that preceded it, but the first and last patterns are arbitrarily dissimilar. In the discrete space, this is similar to extracting the text chain \u201chit, hot, dot, dog\u201d from a paragraph. The first and last words have nothing in common, yet they are connected by a chain of words with a small mutual difference. Time series chains can capture the evolution of systems, and help predict the future. As such, they potentially have implications for prognostics. In this work, we introduce a robust definition of time series chains, and a scalable algorithm that allows us to discover them in massive datasets. Since their introduction over a decade ago, time series motifs have become a fundamental tool for time series analytics, finding diverse uses in dozens of domains. In this work we introduce Time Series Chains, which are related to, but distinct from, time series motifs. Informally, time series chains are a temporally ordered set of subsequence patterns, such that each pattern is similar to the pattern that preceded it, but the first and last patterns are arbitrarily dissimilar. In the discrete space, this is similar to extracting the text chain \u201chit, hot, dot, dog\u201d from a paragraph. The first and last words have nothing in common, yet they are connected by a chain of words with a small mutual difference. Time series chains can capture the evolution of systems, and help predict the future. As such, they potentially have implications for prognostics. In this work, we introduce a robust definition of time series chains, and a scalable algorithm that allows us to discover them in massive datasets. Keywords\u2014 Time Series, Motifs, Prognostics, Link Analysis", "pdfUrls": [ "http://www.merl.com/publications/docs/TR2017-168.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1e5e/ed529ca7ec7f5fb6638c3be1053f000dd1f9.pdf", "s2Url": "https://semanticscholar.org/paper/1e5eed529ca7ec7f5fb6638c3be1053f000dd1f9", "sources": [], "title": "Matrix Profile VII: Time Series Chains: A New Primitive for Time Series Data Mining", "venue": "" }, "1e6043554b903ff5c6d0f43eb0785a8a57090da9": { "authors": [ { "ids": [ "36197595" ], "name": "Md. Naim" }, { "ids": [ "2296505" ], "name": "Fredrik Manne" }, { "ids": [ "3285377" ], "name": "Mahantesh Halappanavar" }, { "ids": [ "2606269" ], "name": "Antonino Tumeo" } ], "doi": "10.1109/IPDPS.2017.16", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.16", "entities": [ "Algorithm", "Blue Gene", "Experiment", "Graphics processing unit", "Load balancing (computing)", "Louvain Modularity", "Multidimensional scaling", "Parallel computing", "Sequential algorithm", "Shared memory", "Supercomputer" ], "id": "1e6043554b903ff5c6d0f43eb0785a8a57090da9", "inCitations": [ "94d77111eee3b6c24f19f7397200cee33eb385d4" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "625-634", "journalVolume": "", "outCitations": [ "0af803edccda82003b909c630a074c3e1061b0ab", "0971a05d435ebc79711e1e74e029416b9d29b05d", "38d925c33a8433d36b9409b72d049d035fdd31fd", "20983b8a3ab571510697494d168f9685f5596245", "7e36674b63ab1c05579b26af6f30c6b0aa17e057", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "f5ee4b5287bee836f1de23c76ab4e1fa0a58752d", "740f4097ca7011a542766f35f2e9bd8064ca30b5", "31181e73befea410e25de462eccd0e74ba8fea0b", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "453d73995c98d6677a77bf547fe569ab7b1b02a8", "9e704f358d979d28a091f19adb7aacdbf4b6d83f", "2145058a9c15c0e9468638a6b56891271526df1c", "3095db6f07b089ebfd07685e8a98b72445b9e73c", "73f91eceee057aedba214e27c6dab9d9b081deaf", "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "769d75e9cb010b76ba412d9654cf43c4edf15076", "6c2a93d49e4082ab4b5ff4e8c67554654f79468c", "31b255ae31ea46bea5a9f9dad19c5cc1ca4c2db7", "f38510810eb9a34d10e729112541bda9d46e79d4", "cc37e7bacd29b26056163a79c411471d22bf8b0b", "de89a54c93e05c24e335c6e0a5f4855c5a06a73a", "5b72cf570bfcc84cb03a9e310e680363373565cf", "2440a3bce01e9a91f255d2d03447e5c1c53574da", "5e2fb453613a697f3aca6cea598d272c4c5536a5" ], "paperAbstract": "We present and evaluate a new GPU algorithm based on the Louvain method for community detection. Our algorithm is the first for this problem that parallelizes the access to individual edges. In this way we can fine tune the load balance when processing networks with nodes of highly varying degrees. This is achieved by scaling the number of threads assigned to each node according to its degree. Extensive experiments show that we obtain speedups up to a factor of 270 compared to the sequential algorithm. The algorithm consistently outperforms other recent shared memory implementationsand is only one order of magnitude slower than the current fastest parallel Louvain method running on a Blue Gene/Q supercomputer using more than 500K threads.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e6043554b903ff5c6d0f43eb0785a8a57090da9", "sources": [ "DBLP" ], "title": "Community Detection on the GPU", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "1e65d7e0bb44e11c0f6ee667c60d734d6b7790fb": { "authors": [ { "ids": [ "1765867" ], "name": "Danilo Carastan-Santos" }, { "ids": [ "2699080" ], "name": "Raphael Y. de Camargo" } ], "doi": "10.1145/3126908.3126955", "doiUrl": "https://doi.org/10.1145/3126908.3126955", "entities": [ "Heuristic", "Hoc (programming language)", "Machine learning", "Nonlinear system", "Scheduling (computing)", "Signal trace", "Simulation", "Synthetic data" ], "id": "1e65d7e0bb44e11c0f6ee667c60d734d6b7790fb", "inCitations": [], "journalName": "", "journalPages": "32:1-32:13", "journalVolume": "", "outCitations": [ "6195f808e1b2013a81f221518238678053305a22", "b94d6bb4506dbb02244467f989b8aa1f06389988", "f73a1381888c55c91f6602d4d05c250848df4a86", "699a084a65b2cf43fb774f085b84a7e303c16651", "70313795e4a13853a34df581524828df8627b703", "9b122ef696acf3c7eefd20807c079bc8b650edd9", "f19d1557bc395000882051b66a05e807376f8f1b", "d9787e72b5541b071b7f5888ecb812b5cd666acf", "affa722dd7cd83c6cae2a28bc41101943bb60148", "07c8dc1238106ed94d5357b72e4bfebd256f162f", "8a41bf2ef9a77fa5d47c9b482eb38f9f6d636300", "249afcd2fb3d4f164d85cff66372709a7d2df3d0", "c3fecbd5bbb1c34714d4359fe8012633b2c5d8e6", "4694a29a9d27c3a48f0b55c7bea522dbb4465004", "702ba56d3f4aa529b8b1ccec4b47a0a81130d5fa", "89814b8649033825c30897e38dde2bfc657427f1", "496d45a56d2d64b5ea917728c3edfe7cee045979", "a12f10e9eee218f3b50c3a93f52d33f945a369e5", "985f1d87ef2f31ec194713f5a3f042997835e0f8", "b56daafeb36e1c19180f401924a6f9009940efe9", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "59d58ad3394c88c95378dbee33d75a554d207ab6", "a427493c9606477f6683fbe364c4ee16b518aa5e" ], "paperAbstract": "Dynamic scheduling of tasks in large-scale HPC platforms is normally accomplished using ad-hoc heuristics, based on task characteristics, combined with some backfilling strategy. Defining heuristics that work efficiently in different scenarios is a difficult task, specially when considering the large variety of task types and platform architectures. In this work, we present a methodology based on simulation and machine learning to obtain dynamic scheduling policies. Using simulations and a workload generation model, we can determine the characteristics of tasks that lead to a reduction in the mean slowdown of tasks in an execution queue. Modeling these characteristics using a nonlinear function and applying this function to select the next task to execute in a queue improved the mean task slowdown in synthetic workloads. When applied to real workload traces from highly different machines, these functions still resulted in performance improvements, attesting the generalization capability of the obtained heuristics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126955" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e65d7e0bb44e11c0f6ee667c60d734d6b7790fb", "sources": [ "DBLP" ], "title": "Obtaining dynamic scheduling policies with simulation and machine learning", "venue": "SC", "year": 2017 }, "1e72d333ad9d5b4c1b00603f1d930f051ca5807f": { "authors": [ { "ids": [ "14234252" ], "name": "Andrew Todd" }, { "ids": [ "29359708" ], "name": "Marziyeh Nourian" }, { "ids": [ "2948456" ], "name": "Michela Becchi" } ], "doi": "10.1109/HiPC.2017.00053", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00053", "entities": [ "Automata theory", "Data pre-processing", "Deterministic finite automaton", "Finite-state machine", "Graphics processing unit", "Hamming distance", "Levenshtein distance", "Nondeterministic finite automaton", "Preprocessor", "Shared memory", "Speedup", "Throughput" ], "id": "1e72d333ad9d5b4c1b00603f1d930f051ca5807f", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "408-418", "journalVolume": "", "outCitations": [ "0e80ae605bef1d485606755bf5bbacd8ce32812a", "20ee61caa108938c2252dbefafb926f3d481465b", "0db79db13acb0808e3915c771b1147cbc8c8387c", "3ca09297ea549605c99a96daf8bc50b23cc54efc", "984273d01be0d66506ccf7d6bd5d260dfe8d9f30", "0c7f839b4a5c7771cfeb628f70d37e10b32b8368", "d28a21316307e71a720ca1a2e6ba57ba87629fcd", "185e235f94124f6d91ebb839c7e6c7401693f58b", "1b68aa68c70af87fc3b712ff7a4a9aa289bf23bf", "ae2bc1599510755ee93eac29b7dc2c66c8bb19ad", "280c60bc4e0005fdb05949de2ef21acd1ba1111c", "124f0dbb0743049047fc767a27c31e6533c1c9ec", "1ef38c80b1bc4352ce0df0ef7c05249fb64bf78d", "1acb5de00b892ac464e5e49647abf1bb4a88ed2b", "7255d0044049aadcfc25c3692ceee29c8251143a", "f3e52f82393d70ec2d68962ea4542d919e2b1ab1", "3b8c6253f25595d705c3ac27ca7fbd9eaec17ab6", "6814013255038c2a4daf909239a4886e98ef559d", "0f58152f6827b8d3d5d9e1161dbc53fc1cf7363b", "3eb5756758ea22447b10db9b9a730c8f049f4e52", "5842db2526c078ee53b245cb7dfade724ef41f09" ], "paperAbstract": "Several applications from computational linguistic and genomics perform similarity analysis between sequences of characters based on Hamming and Levenshtein distance measures. Hamming and Levenshtein distance-based matching maps well onto non-deterministic finite automata (NFAs), which have been accelerated on GPUs. However, designed with the flexibility to support generic topologies, existing NFA engines have inefficiencies when processing fixed-topology NFAs. In this work we target this problem and propose two methods to improve the preprocessing and traversal performance of Levenshtein and Hamming distance NFAs. Our methods are based on the following observation: for these fixed-topologies, the transitions do not need to be stored in device memory, but they can be inferred from the reference string (i.e., the string to be matched against) and the NFA topology alone. Our first, basic implementation (implicit-active-sq) minimizes preprocessing by bypassing NFA construction and packing, but exhibits several traversal inefficiencies. Our optimized method (implicit-rearranged-sv) includes space and time optimizations for global memory and radically different shared memory access patterns within the kernel, while at the same time incurring only modest preprocessing overhead over the basic implementation. Our experimental evaluation shows that, on large NFAs consisting of several millions states, implicit-rearranged-sv outperforms traditional GPU engines both in terms of traversal throughput (3-22x speedup) and preprocessing time (856-12,237x speedup).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00053" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e72d333ad9d5b4c1b00603f1d930f051ca5807f", "sources": [ "DBLP" ], "title": "A Memory-Efficient GPU Method for Hamming and Levenshtein Distance Similarity", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "1e9eb8716d46839b62dc0171ba8b7d020de8f97d": { "authors": [ { "ids": [ "3039546" ], "name": "Vasilios Mavroudis" }, { "ids": [ "40036643" ], "name": "Andrea Cerulli" }, { "ids": [ "1804733" ], "name": "Petr Svenda" }, { "ids": [ "1706925" ], "name": "Daniel Cvrcek" }, { "ids": [ "2720291" ], "name": "Dusan Klinec" }, { "ids": [ "1722262" ], "name": "George Danezis" } ], "doi": "10.1145/3133956.3133961", "doiUrl": "https://doi.org/10.1145/3133956.3133961", "entities": [ "Backdoor (computing)", "Confidentiality", "Cryptography", "Electronic circuit", "Experiment", "Hardware security module", "Integrated circuit", "Overhead (computing)", "Public-key cryptography", "Random number generation", "Semiconductor industry", "Supply chain attack", "Tamper resistance", "Time complexity" ], "id": "1e9eb8716d46839b62dc0171ba8b7d020de8f97d", "inCitations": [ "0b978f224b8520c8e3d9b2eb55431262fcb16c05" ], "journalName": "", "journalPages": "1583-1600", "journalVolume": "", "outCitations": [ "091eed12e9a3267afb860a257ca79edc46c46ba5", "462afd7cc36165b3dd3d4605f74cbdb31edd7262", "3584805b098a93705bcc012ee6f71849de0d57e8", "1f0a9342f12a550d343fea0e681ab7f6fb2e319d", "b715b93d417de5f625202bc5d7d4f4a5829e3349", "b81d5149f4b9d959b00f56655aea21a78f0d2c3b", "1ba7664c2707a02f598513ce236b3648543fe4d1", "62fc2cb6e4874dbd84759e466f286b695098008e", "0402e10c33b53a91b4ea55db4e6307ab0bd09130", "b4d9a259db32a92c162ed44e89d3a0cfdacd3f67", "60682144f3554174654248c4dd46db46e5b1cd37", "14ef57d70a10373ddd7970e2a5d9789e41d1e97d", "2c90d8ea2b67584728f51b6efc93b87e398d1d93", "034d822344298558d7efc4c0224cbda5c4d3b41d", "d0c6841e9cf40ea3714b35eb3b709bc46ce050b0", "0118cc9db1b42b0216977d85909c6b474408a80d", "19ccf2570baf13546d12696fe67cc83a0e03f43e", "df6e09f1b8e51fd7f71e3b4ee824bee859b4a8d8", "31dda2430f221437b28869ef57e563599f8f6c4a", "8c5e81a2badc7ed7c03914a8c12773084a96155a", "cfa8c73c7e2ff1798bf7fee16ee1d011c1fac490", "00a4bb425d3991162c1b309ef8a2e02bf4af3eb4", "21ea386b1f4fb9b83ed9955d532efc9739a7d460", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "04b61ac5b3447e53542e3a7070bd5d5310077da2", "1e9bda0ed356f0ded9a31fa1c59eb92319c20232", "7260d31f6c4e36c00ec0dfecf362882e98f0a57a", "085cdec32ef770cce3b919ad5c0e7edf92c0d300", "31759116005bdbe1240f026fde1d6d900bb6173e", "ca407f7dda96e95efde68394e7d73bfc48bcc22a", "0531200889b6fefda2db7c9ece671c85eba9d2ae", "718bb58e58163190b11d36a447e767b85c0b3708", "26eaa222afe327d5e705ae75f18583e4f60516d8", "b3baf3248365b4d11b2dafe0a0bd0598cb1b7bf3", "13cab010c7d25e38397382b567de0198f4f466de", "30544823b8924c29906013d7ec7c1c955d0351e8", "92d04c93d43d36cd9288a0acf94d7ee63d00014e", "00a184da929542719fb4e95536e4839447ba95d3", "d921838f47a636638cf1ca37a5f9c0226596a177", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "b25b5d3dba5307b5c61d22b9fe373c7bd1f33385", "6837d2922507d789f828f1e8bc34768704025218", "bc44df77508e02b5d2cb0edbef3dfa87625e8a33", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "11589128f74f596db627ba092d52023341ab7ccd", "15d76475ad3641fdf05460a06aa5a3d4b8062d45", "aef56eee16820c90be613b030017b3911f7c41e9", "93153529fad02364b61f53aa0b8991fe0906baa7", "67c70d0ebe1cef368c0ee47d38039e7614297837", "0e42df3278356643d63dcdd33b159ae265602c42", "321b8bfcda1091255bd2214b68c4e850de62e979", "2c9b8a9d9c64a95d549dd00ee5afe96273cbac83", "15c08a85e3c957b0f74708efd2cb3fa279a54fce", "043a2dc8bddc2af1b03b320c1b9aef1f7ca01568", "32d21ccc21a807627fcb21ea829d1acdab23be12", "d412e5ab35fd397931cef0f8202324308f44e545", "391b8519456685cc67865bc80a3df08c4c35bdc0", "3e78ee6b98f8cd2648d70e576b671c5e8479a1c0", "9dde09e003f8200400567b0c7be7c60679daaa3c", "8f57f0eb00848fe583015df4fac40b37525cb0d1", "5ad52212ef1960d2bed796bebb58e6c9fc462670", "b764477d1394f0d1f60f304ad036e85646369e4c", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "49565dd40c89680fdf9d6958f721eabcdfb89c22", "588c404fa3f64c58facb178ca957ed4697aa622c", "05ac3318c8b044625cc8181257d16b6f8be53650", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "6920aefa7a3058c21e50a450d187f85bf3f8f9ce", "a44d8ca755f1bb1feefb1777c625118fe99c854d" ], "paperAbstract": "The semiconductor industry is fully globalized and integrated circuits (ICs) are commonly defined, designed and fabricated in different premises across the world. This reduces production costs, but also exposes ICs to supply chain attacks, where insiders introduce malicious circuitry into the final products. Additionally, despite extensive post-fabrication testing, it is not uncommon for ICs with subtle fabrication errors to make it into production systems. While many systems may be able to tolerate a few byzantine components, this is not the case for cryptographic hardware, storing and computing on confidential data. For this reason, many error and backdoor detection techniques have been proposed over the years. So far all attempts have been either quickly circumvented, or come with unrealistically high manufacturing costs and complexity.\n This paper proposes Myst, a practical high-assurance architecture, that uses commercial off-the-shelf (COTS) hardware, and provides strong security guarantees, even in the presence of multiple malicious or faulty components. The key idea is to combine protective-redundancy with modern threshold cryptographic techniques to build a system tolerant to hardware trojans and errors. To evaluate our design, we build a Hardware Security Module that provides the highest level of assurance possible with COTS components. Specifically, we employ more than a hundred COTS secure cryptocoprocessors, verified to FIPS140-2 Level 4 tamper-resistance standards, and use them to realize high-confidentiality random number generation, key derivation, public key decryption and signing. Our experiments show a reasonable computational overhead (less than 1% for both Decryption and Signing) and an exponential increase in backdoor-tolerance as more ICs are added.", "pdfUrls": [ "https://arxiv.org/pdf/1709.03817v2.pdf", "https://arxiv.org/pdf/1709.03817v1.pdf", "http://doi.acm.org/10.1145/3133956.3133961", "https://acmccs.github.io/papers/p1583-mavroudisA.pdf", "http://arxiv.org/abs/1709.03817" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1e9eb8716d46839b62dc0171ba8b7d020de8f97d", "sources": [ "DBLP" ], "title": "A Touch of Evil: High-Assurance Cryptographic Hardware from Untrusted Components", "venue": "CCS", "year": 2017 }, "1ea2fd80993c826a22de284fa8365bd34cb8cd92": { "authors": [ { "ids": [ "1699598" ], "name": "Sheng Di" }, { "ids": [ "3265548" ], "name": "Rinku Gupta" }, { "ids": [ "1699887" ], "name": "Marc Snir" }, { "ids": [ "17871303" ], "name": "Eric Pershey" }, { "ids": [ "1721552" ], "name": "Franck Cappello" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Causality", "Cluster analysis", "Correlation does not imply causation", "Network topology", "Precision and recall", "Probabilistic analysis of algorithms", "Supercomputer", "System administrator" ], "id": "1ea2fd80993c826a22de284fa8365bd34cb8cd92", "inCitations": [ "78522d5ab004d27241bc4e34e5cf96d0e5e2630b" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "442-451", "journalVolume": "", "outCitations": [ "3fcb48efe8489061deeea37ceafdabe115ca4789", "2132429c69c47310569ea03783d86d5626511427", "33765080cd39654f606710d111bcb9b92e0b1d5b", "01d62cd850496455ce1616500f491690effa5c98", "354e588a56cba078b2a98e5ce0a045ff291ab64a", "9bfa5527511bc61f03882097a30c6f7f68acc0c6", "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "73b343b073bbc0b9660b5c500f994a5e61f53132", "957b6dc2077d3973e123b1c3c1cd840b8a7b42a7", "2b6a81efa2093feea67b9a8557334d67e47c5a23", "3679eeccf323ff83d68ad539f055da8939d26d01", "723694247d5365b99b588c3b53a832112386917f", "2398278a25035cfeefa3dd4aba91b16d48f540ba", "e98e419ae00ea017ce8995b361ede586e899eccc" ], "paperAbstract": "Today's large-scale supercomputers are producing a huge amount of log data. Exploring various potential correlations of fatal events is crucial for understanding their causality and improving the working efficiency for system administrators. To this end, we developed a toolkit, named LogAider, that can reveal three types of potential correlations: across-field, spatial, and temporal. Across-field correlation refers to the statistical correlation across fields within a log or across multiple logs based on probabilistic analysis. For analyzing the spatial correlation of events, we developed a generic, easy-to-use visualizer that can view any events queried by userson a system machine graph. LogAider can also mine spatial correlations by an optimized K-meaning clustering algorithm over a Torus network topology. It is also able to disclose the temporal correlations (or error propagations) over a certain period inside a log or across multiple logs, based on an effective similarity analysis strategy. We assessed LogAider using theone-year reliability-availability-serviceability (RAS) log of Mira system (one of the world's most powerful supercomputers), as well as its job log. We find that LogAider very helpful for revealing the potential correlations of fatal system events and job events, with an accurate mining of across-field correlation with both precision and recall of 99.9-100%, as well as precisedetection of temporal-correlation with a high similarity (up to 95%) to the ground-truth.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101172" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ea2fd80993c826a22de284fa8365bd34cb8cd92", "sources": [ "DBLP" ], "title": "LOGAIDER: A Tool for Mining Potential Correlations of HPC Log Events", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "1eb3b01d6c74157f35e59ea751d5a61bcf76ff23": { "authors": [ { "ids": [ "1835783" ], "name": "Runhui Li" }, { "ids": [ "2163260" ], "name": "Xiaolu Li" }, { "ids": [ "33431705" ], "name": "Patrick P. C. Lee" }, { "ids": [ "1793192" ], "name": "Qun Huang" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Apache Hadoop", "Clustered file system", "HTTP pipelining", "Instruction pipelining", "Mean time to repair", "Middleware", "Open-source software", "Pipeline (computing)", "Testbed" ], "id": "1eb3b01d6c74157f35e59ea751d5a61bcf76ff23", "inCitations": [ "6ee7d638c08c272b7d9cc7a4ea3b9c651b8656be" ], "journalName": "", "journalPages": "567-579", "journalVolume": "", "outCitations": [ "130d811ffc2daff43c5203471b70f3eada0f57d1", "0d77bb6ef2bb6d165f58bf0251bf3d7cf29f1491", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "3168681722207c86827e596860115a2977ce761f", "5676da9f4854aba9c0cab007c3e517cd81acf81d", "12fc9f6a930e660ff127acff2dcbb9729d8c1fbd", "2220feec76a17e509a58abf8c742ea9b7866a99e", "1748a4950413dbeab59c139b16cfb8ec99f21ff4", "24a342bf4226d0e4f6ab58b9812cc9b4bdd76976", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "3b547d706d33c110f96bf1c0e805ab8cc82afdbf", "77f651d37c1d1fa7c69c8966680aec180e8f48dc", "48e45b328b9a3e934583797206cce547514e0676", "3db5c29024481b22c07ca76d3493183de9865575", "1ba77164225515320452654e7b665a4e01cafd2b", "5f3f9223c5c9f896be099bc177929febad508407", "0b8b33dde00f31129b6a6c149ed2e6986a110380", "514a5c15e8cf3f681febecad954a4508d9189c99", "58b628792d3eb22a034a871ed3cf373afe591928", "308e3605f9b7a3a7bb6c61a8ab0b90603735d945", "29f3f5918946bf0a4d75bf5244f993847d03e26c", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4540a5f0debcd62d0ca418682af78febc54013c7", "a3e749e0120b7cd363f48b56a2e651c4ab9aa911", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "027733345e1d7df32de48c63cec756dd0ba4828d", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "b7014a268c35e377366634d6b8370a8a7db285a5", "03621901aaa2d77270478326b3a25350508a2b93", "1dc5fb4c388b254727965feadeab68b7bd774a68", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "5aa2e10bc34bca3a8a3c3688103163faf76e3ba8", "7ae26da9b7666812857883536870c315538f7f10" ], "paperAbstract": "We propose repair pipelining, a technique that speeds up the repair performance in general erasure-coded storage. By pipelining the repair of failed data in small-size units across storage nodes, repair pipelining reduces the repair time to approximately the same as the normal read time to the same amount of data in homogeneous environments. We further extend repair pipelining for heterogeneous environments. We implement a repair pipelining prototype called ECPipe and integrate it as a middleware system into two open-source distributed storage systems HDFS and QFS. Experiments on a local testbed and Amazon EC2 show that repair pipelining significantly improves the performance of both degraded reads and full-node recovery over existing repair techniques.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_li_0.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-li_runhui.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/li-runhui", "http://adslab.cse.cuhk.edu.hk/pubs/atc17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3a52/a09f84a11cfa007afb4a7e6e86af828c35ba.pdf", "s2Url": "https://semanticscholar.org/paper/1eb3b01d6c74157f35e59ea751d5a61bcf76ff23", "sources": [ "DBLP" ], "title": "Repair Pipelining for Erasure-Coded Storage", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6": { "authors": [ { "ids": [ "2781428" ], "name": "Samira Manabi Khan" }, { "ids": [ "37811136" ], "name": "Chris Wilkerson" }, { "ids": [ "1915826" ], "name": "Zhe Wang" }, { "ids": [ "1732082" ], "name": "Alaa R. Alameldeen" }, { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3123939.3123945", "doiUrl": "https://doi.org/10.1145/3123939.3123945", "entities": [ "Data dependency", "Dynamic random-access memory", "Failure rate", "File system permissions", "Forward error correction", "Pareto efficiency", "Refresh rate", "Single-core", "Software testing" ], "id": "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "inCitations": [ "042855085a52934e5599e02555071bb222f6a000", "0b393cab00401cb971cf71970e00c2767f881f75", "2976932bec7334a150e1bb6916b7564bdaa864ea", "781cf9b4d17f89ad4b971d2a1655421378149e2d", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6" ], "journalName": "", "journalPages": "27-40", "journalVolume": "", "outCitations": [ "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "b3d8160562b94eed15da0c1e854a6e7f78e2aa18", "04d4d8bd58e0c12fa6163d4f1b6004d380f9fadd", "2394c6644efa856f0da160a0f0031d74cd3b5000", "703c74b035ba667afeaa0d4287641bc87d2ea12f", "21bda5f42e92f535c29012746915f6dd06adb97a", "1e2eea981bd560de64d5ff717784da0802f0f353", "08f3113106e4b3f97af0586825631cf6442ad642", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "37b5850e3e75a3462f3991491ca26674925f233b", "32d40133459c318bc66aa781b6ce3c1921c0c13a", "60aa9510638d4d9739ebfc3a0042187988482346", "3f82aa1373e823ec622b3021fff9df4a82230267", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "468035263afa59095614f26a62e0217da4a1aeed", "588fd53a6cbdb2f2d7f2bd676944d7b5fdfafcb9", "04214d0a4c45f920433e0c5097a19de74d5a3563", "3c89345bb88a440096f7a057c28857cc4baf3695", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "dc060372253f1bacdea2c785e6525f781fe8c039", "f101cbc12bbdd127dad401cc5d64f63a6f7a6a37", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "6902867509928c0e5c19aff3e62e1def3a19d581", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "9341125876271d46cc25f86dac93f25acb343e8d", "012d556d67acedc6898930b4c93f54b87aabf5ee", "61ea230d0e757ff46d3a381e79691bd54b92a503", "c8b6e390eb9cf0a3452decfff8461359315416cd", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "108c840d5d1847948a2de0250490a327ae069ee6", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "0653e2ed9f683868cb4539eb8718551242834f6b", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "8a4f94a258f299085e2c153062ba850cb26fe1d2", "828c504705841dee0031e52bf9acb016fcec45de", "1c32ad0a42109fab826eb3054df7cfc33b424125", "36897d1d2661777913d492390c4ad9d004276308", "2d6cb831ecb36ce45f7b767d16c207e03733dfd6", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "0eacd1b47786f740b723d906d46e160f143c0378", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "94468d080421c4ec3141625a6c573b42d3b01261", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "071564baef078867847fc54a3a0b50dd22d29d62", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4", "76e29695c7c119d869d3b87886a611261a98e4a4", "447f492235719d7c2b061b95d818f928d6cbdac5", "5e60858a530a5da13bce10ce8b8740557b95e533", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "472392b93150be7bb0132511d71d686770c2c79b", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "073688e19290d53226404f1fb02d0d76a3906e5f", "ab6888a1b024d109c768f81b49c77b585efc975a", "e0a4d1dbd9d459f3613be9da56243d72c40e152e", "03d55467b20e662fbaa8416e853f57c93834a9fb", "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "1f80d8bdf5a0a1787a36ccfc4929f71d14a94e57", "00cc482570d739e7b733f45b6f8f1836b24056bd", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b" ], "paperAbstract": "DRAM cells in close proximity can fail depending on the data content in neighboring cells. These failures are called data-dependent failures. Detecting and mitigating these failures online, while the system is running in the field, enables various optimizations that improve reliability, latency, and energy efficiency of the system. For example, a system can improve performance and energy efficiency by using a lower refresh rate for most cells and mitigate the failing cells using higher refresh rates or error correcting codes. All these system optimizations depend on accurately detecting every possible data-dependent failure that could occur with any content in DRAM. Unfortunately, detecting all data-dependent failures requires the knowledge of DRAM internals specific to each DRAM chip. As internal DRAM architecture is not exposed to the system, detecting data-dependent failures at the system-level is a major challenge.\n In this paper, we decouple the detection and mitigation of data-dependent failures from physical DRAM organization such that it is possible to detect failures without knowledge of DRAM internals. To this end, we propose MEMCON, a memory content-based detection and mitigation mechanism for data-dependent failures in DRAM. MEMCON does not detect every possible data-dependent failure. Instead, it detects and mitigates failures that occur only with the current content in memory while the programs are running in the system. Such a mechanism needs to detect failures whenever there is a write access that changes the content of memory. As detection of failure with a runtime testing has a high overhead, MEMCON selectively initiates a test on a write, only when the time between two consecutive writes to that page (i.e., write interval) is long enough to provide significant benefit by lowering the refresh rate during that interval. MEMCON builds upon a simple, practical mechanism that predicts the long write intervals based on our observation that the write intervals in real workloads follow a Pareto distribution: the longer a page remains idle after a write, the longer it is expected to remain idle.\n Our evaluation shows that compared to a system that uses an aggressive refresh rate, MEMCON reduces refresh operations by 65--74%, leading to a 10%/17%/40% (min) to 12%/22%/50% (max) performance improvement for a single-core and 10%/23%/52% (min) to 17%/29%/65% (max) performance improvement for a 4-core system using 8/16/32 Gb DRAM chips.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123945", "https://people.inf.ethz.ch/omutlu/pub/MEMCON-system-level-data-dependent-DRAM-failure-detection-mitigation_micro17.pdf", "http://www.pdl.cmu.edu/PDL-FTP/NVM/MEMCON-khan_micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "sources": [ "DBLP" ], "title": "Detecting and mitigating data-dependent DRAM failures by exploiting current memory content", "venue": "MICRO", "year": 2017 }, "1ed9721b58c6a6c9bcee61f6a8061b222ae453d3": { "authors": [ { "ids": [ "1734498" ], "name": "Xu Chen" }, { "ids": [ "1739818" ], "name": "Yongfeng Zhang" }, { "ids": [ "35531065" ], "name": "Qingyao Ai" }, { "ids": [ "2468306" ], "name": "Hongteng Xu" }, { "ids": [ "3063894" ], "name": "Junchi Yan" }, { "ids": [ "1685620" ], "name": "Zheng Qin" } ], "doi": "10.1145/3077136.3080776", "doiUrl": "https://doi.org/10.1145/3077136.3080776", "entities": [ "Digital video", "Information retrieval", "Key frame", "Modal logic", "Online and offline", "Personalization", "Recommender system", "Unified Framework", "User (computing)", "Video", "Video content analysis" ], "id": "1ed9721b58c6a6c9bcee61f6a8061b222ae453d3", "inCitations": [ "8c3ee0fcd5d7ade463996280c4a5a34dd696c4d6", "ebe7eda4fd10caa326d57b93e8a1e2a13e09a403", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "d05ee44f524e48a5e112fad0fc1ab4c20594d1d7", "659701d68f3e5b517f1e7cd40d42a1b214acf1ad", "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "9f67c741738a101095400ed515fa98375ef3ed67", "5e3257540faa7bf220d0dda97085ceff18674f19" ], "journalName": "", "journalPages": "315-324", "journalVolume": "", "outCitations": [ "33ecac20d49374906afd6558551ce163063c5788", "97d502c820ea2b86fdf96dd0ffe9b65ad38cda0e", "dcf8ff046520de3adfe910b2ac567424765ab496", "ed333629005913610efa732183c4f725cae18aab", "6bf37516f3e77a160cd9405ec47d1bcc34b26e45", "23607916f4bfe2596eb27305dfd42e1770693f03", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "83174a52f38c80427e237446ccda79e2a9170742", "7e9a943f6f0160fc868b77cdb95e35b53f455faa", "760948698540118031e590fbc884fcea209f9104", "3796b557ced6254ab8c96682b3357ef73176c159", "ced6d0b0257273850af38a3757151264beefdd73", "707758c3c0bf44adfb50dca6f8d7518d8efd797b", "4afa6c2eb552ceef0e396fbfe449932492873034", "fbb65cf4be985f975285ea4e30ec02600b2ed125", "66a352027677574b2c0b88176e3126fb3cad6195", "db6389e0ca49ec0e4686e40604e7489cb4c0729d", "12cd48792e7960cdf39e4506f407b26cf07cec29", "2eb32b1a4c5bf741632a9fd5f852253fd0d53def", "0ffdc4881700de1c6626c4ac2c527b7db4ed2b35", "09eb5f50134b4665461c0c982071e65a5100eebd", "75ea947c5480f937e14ae4c85a6af833163455cd", "40c4b64d69963876f68dc6d2739df615291a2943", "11da2d589485685f792a8ac79d4c2e589e5f77bd", "6493cc35bd2f425e1fc2c6d21872778ff2c57e7c", "11d1805cf44b1226d0e8db37922eed094c592023", "01fcae344d2edb715bcc63a40b6052c0331741bd", "cbbf566672757dc8237f9ddd019b330afbc789ac", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "071b16f25117fb6133480c6259227d54fc2a5ea0", "1b41abbf9d3707a1a5c0fcf8e1f7734da0e61703", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "a175c95d2a141b9879cbb0fe6786fa8ea5839896", "3eaf79589dbb9bce5a502e867a8f03917e52de26", "375c307f64f00174bc673aeec3bb78976dac5de0", "778990420feb931a7f4c9b3e2d41eb47e4ff5a53", "2bb2ba7c96d40e269fc6a2d5384c739ff9fa16eb", "3e00265b3ea8f90c90d6ea77197de7a0ddf7768c", "e0d2cddc34edd3db9c9a854c937575095acda756", "4541e6605c73c2499d145fb3c8621b91fddf3a78", "39afbfe64d83b17368948c6cb3567431580b2a29", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "0b544dfe355a5070b60986319a3f51fb45d1348e", "c961fc0a87f80178d00404a35565a870a1579fae", "452281534b33189057e108ec3b9189177ce4e49f", "24e2ac56c810f773bd4b2d03e7e9bc1a4519ed7a", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "53fbf1d6f78811762e79a9a7761807e553437299", "1e7d7f76b3a7b494122f40c22487e60a51a2d1be", "48b0a08bf96b8e28ee982867ce0a9f568b788a0a", "06b231ed33b173b8fc91aa7fdfc9644f2cb0d60d", "4eb69937ca078b66de5ec108990f092c289cee8d", "b9baf83cefdb2ce21d58b5b2bc91bac8a601ea98" ], "paperAbstract": "Key frames are playing a very important role for many video applications, such as on-line movie preview and video information retrieval. Although a number of key frame selection methods have been proposed in the past, existing technologies mainly focus on how to precisely summarize the video content, but seldom take the user preferences into consideration. However, in real scenarios, people may cast diverse interests on the contents even for the same video, and thus they may be attracted by quite different key frames, which makes the selection of key frames an inherently personalized process. In this paper, we propose and investigate the problem of personalized key frame recommendation to bridge the above gap. To do so, we make use of video images and user time-synchronized comments to design a novel key frame recommender that can simultaneously model visual and textual features in a unified framework. By user personalization based on her/his previously reviewed frames and posted comments, we are able to encode different user interests in a unified multi-modal space, and can thus select key frames in a personalized manner, which, to the best of our knowledge, is the first time in the research field of video content analysis. Experimental results show that our method performs better than its competitors on various measures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080776", "http://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/personalized-keyframe-sigir17%20Personalized%20Key%20Frame%20Recommendation.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ed9721b58c6a6c9bcee61f6a8061b222ae453d3", "sources": [ "DBLP" ], "title": "Personalized Key Frame Recommendation", "venue": "SIGIR", "year": 2017 }, "1ee1c1f32a1d6165198c220d28e2a0b54b510ffa": { "authors": [ { "ids": [ "5597426" ], "name": "Mark Handley" }, { "ids": [ "1758591" ], "name": "Costin Raiciu" }, { "ids": [ "2015053" ], "name": "Alexandru Agache" }, { "ids": [ "2926206" ], "name": "Andrei Voinescu" }, { "ids": [ "31755529" ], "name": "Andrew W. Moore" }, { "ids": [ "2022218" ], "name": "Gianni Antichi" }, { "ids": [ "38358360" ], "name": "Marcin W\u00f3jcik" } ], "doi": "10.1145/3098822.3098825", "doiUrl": "https://doi.org/10.1145/3098822.3098825", "entities": [ "Clos network", "DPDK / dpdk.org", "Data center", "Linux", "Linux", "NetFPGA", "Network switch", "Simulation", "Throughput", "Transport Layer Security" ], "id": "1ee1c1f32a1d6165198c220d28e2a0b54b510ffa", "inCitations": [ "9bbd5be2829e49b1fac7f034baf7499cb069db95", "17cdd01291815ce50715bbe19fe953737b7f8ce9", "cd5fdc7ea21293acb52a7af34e01217d54b1c39e" ], "journalName": "", "journalPages": "29-42", "journalVolume": "", "outCitations": [ "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "3b988049dd8f62f772281e90196bbd793700c86b", "a948041d6278834988c813aec15bc60b5dd1f119", "3233ed7eb09d987ce2ae0dfcbdddefd54fcee288", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "47ae6c226dead9730fc2154996eb9f9e7cfb3ecb", "39300a6bb64f813bd233343b840cb169d8d0527f", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "49ad5c9b5b09e8dc248b52507ef5255eb689b240", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe", "faab36006aeb182fec58becb60ee18ccbf096c2c", "5594c2ddde27f4262a53668ca9b09ad7a9453102", "60ddf74dd5b443c3bfb59fe876b42f9d6112c4fb", "27956602112b9240efb996e2afd60cc49c6fb393", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "f4cb0ddb31cd1be6cd56d9d339429ef970ed29e3", "3833d87a79612f37babd819e21bdf8b93828e2b5", "663e064469ad91e6bda345d216504b4c868f537b", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "2082a94be79311cfb3c73f02a88f005b38d1a424", "764d7de61421968d6b477f0c055d72dcb0893544", "025652412d507a8cf98ecacd8a44d32ce28995e1", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "122229239aeba1eb4f1623adb40f1845c582a520", "129567778989fab23b50812b3df30e899e2d6a4e", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "11040f24714857941c569df70b21c4c8655e074a", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "1376bd56c64639af4645625fd9755c83b2bf7cda", "20400945c87f75acbad70f1f9ccfe94f556d2d02", "058f6752d85a517aae298586fdf117acdd7560ea", "327a02b19a60319cc35be860ad0259a5c1aef920", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "61dc587440e85438231197d52be697e25b3fecea", "07367703f587dbc3313cc613289c4330cebe5c8c" ], "paperAbstract": "Modern datacenter networks provide very high capacity via redundant Clos topologies and low switch latency, but transport protocols rarely deliver matching performance. We present NDP, a novel data-center transport architecture that achieves near-optimal completion times for short transfers and high flow throughput in a wide range of scenarios, including incast. NDP switch buffers are very shallow and when they fill the switches trim packets to headers and priority forward the headers. This gives receivers a full view of instantaneous demand from all senders, and is the basis for our novel, high-performance, multipath-aware transport protocol that can deal gracefully with massive incast events and prioritize traffic from different senders on RTT timescales. We implemented NDP in Linux hosts with DPDK, in a software switch, in a NetFPGA-based hardware switch, and in P4. We evaluate NDP's performance in our implementations and in large-scale simulations, simultaneously demonstrating support for very low-latency and high throughput.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098825", "http://cial.csie.ncku.edu.tw/presentation/st2017/pdf/Re-architecting%20datacenter%20networks%20and%20stacks%20for%20low%20latency%20and%20high%20performance.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ee1c1f32a1d6165198c220d28e2a0b54b510ffa", "sources": [ "DBLP" ], "title": "Re-architecting datacenter networks and stacks for low latency and high performance", "venue": "SIGCOMM", "year": 2017 }, "1ee52dc91368b925a15bc2448d9e1ea4a1643dc4": { "authors": [ { "ids": [ "3113990" ], "name": "Sangwook Shane Hahn" }, { "ids": [ "1744734" ], "name": "Sungjin Lee" }, { "ids": [ "3455892" ], "name": "Cheng Ji" }, { "ids": [ "38849152" ], "name": "Li-Pin Chang" }, { "ids": [ "37768059" ], "name": "Inhyuk Yee" }, { "ids": [ "1761924" ], "name": "Liang Shi" }, { "ids": [ "2297316" ], "name": "Chun Jason Xue" }, { "ids": [ "2086951" ], "name": "Jihong Kim" } ], "doi": "", "doiUrl": "", "entities": [], "id": "1ee52dc91368b925a15bc2448d9e1ea4a1643dc4", "inCitations": [], "journalName": "", "journalPages": "759-771", "journalVolume": "", "outCitations": [ "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "13d6c568c770ff5a070072e720fb34b0037cdab8", "188fb4bbc148ebb6bf25aef5d9936cc1c1f82baf", "f94000fec2736f4afc61c94e288966f606af9182", "4cb16dace8dd09919883285739039d05d4c1707f", "1820a34042d6371a9e20484b0c63b698eb522a6c", "663798bc529bb73f2b3ca8640bb4fcbd83ce5c31", "b45e1f16cf2b6f735013e9f279e45bf8b7a8d5db", "b7437111bf04a803878ebacbc275ba3715bccb18", "c9b65f850e1d0e29aac1953beee3b7348414191a", "7a6987f6b0b47d8c6a39cccebb2d3c9566e45054", "3c4ae51452823afafabe8d33d51218d1d95c2795", "0251eb70294c89cfef9119e6b930b2c1641e0fa8", "5fbfa1a601dfce23fb0d550fc04a6acabdb5afef" ], "paperAbstract": "", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_hahn.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-hahn.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/hahn" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a772/c9c76777281add3a131eda748d8c3d90078c.pdf", "s2Url": "https://semanticscholar.org/paper/1ee52dc91368b925a15bc2448d9e1ea4a1643dc4", "sources": [ "DBLP" ], "title": "Improving File System Performance of Mobile Storage Systems Using a Decoupled Defragmenter", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "1f06cd0ba8ade1716c5526202d54fab7019c5092": { "authors": [ { "ids": [ "2673657" ], "name": "Arpan Gujarati" }, { "ids": [ "1767761" ], "name": "Sameh Elnikety" }, { "ids": [ "1772774" ], "name": "Yuxiong He" }, { "ids": [ "1766093" ], "name": "Kathryn S. McKinley" }, { "ids": [ "2110867" ], "name": "Bj\u00f6rn B. Brandenburg" } ], "doi": "", "doiUrl": "", "entities": [ "Accident and Emergency department", "Autoscaling", "Azure A", "Inference", "Learning Disorders", "Machine learning", "Microsoft Azure", "Request - action", "Service-level agreement", "Web service", "Workload" ], "id": "1f06cd0ba8ade1716c5526202d54fab7019c5092", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "78f853271fe69da617d5a14a1e54cbae6a982a50", "ad73deea37cad9a9b945d929a86d82d781450345", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "3b14e5747b5fbc4055e721662f3b38e25648132d", "0a5ff7336879c99513dca6fce6ef44984ebf3f55", "32ad3827ca1e4b152943b197f54b177f582dd354", "438c51040ee6ccf9198e52d105c47e75d615b29c", "12aec629c71397938be7c5f23c0f59cc0784d808", "680c411f2455b7f2ea78a0473c67792703095141", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "5a8b7b33956e083f9600eed14cd3e80a01dee212", "4954fa180728932959997a4768411ff9136aac81", "0712c325155f8af65602a08cc448d1e453466a33", "594000e27b053226997de573c6f4d58a3d26d371", "2126a636ed4846f3ccdb73c416497bcf774a5220", "6168919f450a8ed906051f2562abbfe51aa4d97d", "09ed9cb47e09f56608bdbe6dffaa527c8ca0cd73", "6bc873b69b320195911a61073e689c564d91ba7b", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "234e6be0d4238f76b3ac038ee422be39f391c625", "45fb0a4895ba6222d19af2e8dd2da4b2d7c1e3b7" ], "paperAbstract": "Developers use Machine Learning (ML) platforms to train ML models and then deploy these ML models as web services for inference (prediction). A key challenge for platform providers is to guarantee response-time Service Level Agreements (SLAs) for inference workloads while maximizing resource e\ufffdciency. Swayam is a fully distributed autoscaling framework that exploits characteristics of production ML inference workloads to deliver on the dual challenge of resource e\ufffdciency and SLA compliance. Our key contributions are (1) model-based autoscaling that takes into account SLAs and ML inference workload characteristics, (2) a distributed protocol that uses partial load information and prediction at frontends to provision new service instances, and (3) a backend self-decommissioning protocol for service instances. We evaluate Swayam on 15 popular services that were hosted on a productionML-as-a-service platform, for the following service-speci\ufffdc SLAs: for each service, at least 99% of requests must complete within the response-time threshold. Compared to a clairvoyant autoscaler that always satis\ufffdes the SLAs (i.e., even if there is a burst in the request rates), Swayam decreases resource utilization by up to 27%, while meeting the service-speci\ufffdc SLAs over 96% of the time during a three hour window. Microsoft Azure\u2019s Swayam-based framework was deployed in 2016 and has hosted over 100,000 services.", "pdfUrls": [ "http://www.cs.utexas.edu/users/mckinley/papers/swayam-middlewear-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1f06/cd0ba8ade1716c5526202d54fab7019c5092.pdf", "s2Url": "https://semanticscholar.org/paper/1f06cd0ba8ade1716c5526202d54fab7019c5092", "sources": [], "title": "Swayam: Distributed Autoscaling to Meet SLAs of Machine Learning Inference Services with Resource Eiciency", "venue": "", "year": 2017 }, "1f09b8bffe732f7a2b3ca02111a401da7ef190fa": { "authors": [ { "ids": [ "2559102" ], "name": "Aiman Fang" }, { "ids": [ "2425932" ], "name": "Aur\u00e9lien Cavelan" }, { "ids": [ "1735015" ], "name": "Yves Robert" }, { "ids": [ "1695232" ], "name": "Andrew A. Chien" } ], "doi": "10.1109/ICPP.2017.67", "doiUrl": "https://doi.org/10.1109/ICPP.2017.67", "entities": [ "Computation", "Concurrency (computer science)", "Error detection and correction", "IBM WebSphere eXtreme Scale", "Programming complexity", "Scalability", "State (computer science)", "Transistor", "Very-large-scale integration" ], "id": "1f09b8bffe732f7a2b3ca02111a401da7ef190fa", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "581-590", "journalVolume": "", "outCitations": [ "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "17c44c4654cc53e792c3aafe3b01df9829fe8e90", "741a04ef3a0c3953a3d37726bf4d6170eaa68a55", "b305d41197a9a96c5622a7f13edbbb55dfefc43c", "4d931c6f2b099283552982bb745e5974a67fd8f0", "21b69e576b7ea9d5896ebebfeb370f64e037e298", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "b23f060a4574ff126e98b8fe13f8b508b9f82c1f", "0642df41e63e4f6223a6f4f9b9bb56c7dbebc34f", "453087bc7b5065d8995711dfd9a116655aa21013", "553cf4279c6d01e1da08a8e6511efb9c7354c5d0", "2657302160775f8766964d013efe242836693f3e", "a19563b4014919c405964cea5271bebe918ad265", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "6d957f0e756c74d03e663fe141b6c8a445aec60c", "874bcfe99bbd4798115f3b29bd22d1e52707831a", "c1416bd9b71ee8c8b204e09ec2f51afc40433f76", "455d253c61379bce5626fba8ef9897d3ac1307dc", "983d5cf52ccc1cd4e8338c6b7c4ee24168a6e807", "054be89214b39f314af8b8d0e4430cbb04222dbe", "e49176e44f4eba4bff009cb45282a5369c7dfe05", "750fcd66bca7941f0950455f79b4b1b4e3821e5b", "5ec3e8fccdd20e223978fc35b88327af82eb4324", "01d62cd850496455ce1616500f491690effa5c98", "dbac8ae956fd16055dd652916ca3a531002af7ef", "7b70d4dbe727d4eb3c1f1ec7d5515eb1625b1d5f" ], "paperAbstract": "Projections and measurements of error rates in near-exascale and exascale systems suggest a dramatic growth, due to extreme scale (10^9 cores), concurrency, software complexity, and deep submicron transistor scaling. Such a growth makes resilience a critical concern, and may increase the incidence of errors that "escape", silently corrupting application state. Such errors can often be revealed by application software tests but with long latencies, and thus are known as latent errors. We explore how to efficiently recover from latent errors, with an approach called application-based focused recovery (ABFR). Specifically we present a case study of stencil computations, a widely useful computational structure, showing how ABFR focuses recovery effort where needed, using intelligent testing and pruning to reduce recovery effort, and enables recovery effort to be overlapped with application computation. We analyze and characterize the ABFR approach on stencils, creating a performance model parameterized by error rate and detection interval (latency). We compare projections from the model to experimental results with the Chombo stencil application, validating the model and showing that ABFR on stencil can achieve a significant reductions in error recovery cost (up to 400x) and recovery latency (up to 4x). Such reductions enable efficient execution at scale with high latent error rates.", "pdfUrls": [ "http://www.icl.utk.edu/files/publications/2017/icl-utk-963-2017.pdf", "http://people.cs.uchicago.edu/~aachien/lssg/research/gvr/ICPP2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.67" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f09b8bffe732f7a2b3ca02111a401da7ef190fa", "sources": [ "DBLP" ], "title": "Resilience for Stencil Computations with Latent Errors", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "1f16b4f47f97a2c48f7f414f694e4954932937dd": { "authors": [ { "ids": [ "1999637" ], "name": "Doowon Kim" }, { "ids": [ "2457887" ], "name": "Bum Jun Kwon" }, { "ids": [ "3343194" ], "name": "Tudor Dumitras" } ], "doi": "10.1145/3133956.3133958", "doiUrl": "https://doi.org/10.1145/3133956.3133958", "entities": [ "Algorithm", "Antivirus software", "Binary file", "Code signing", "Digital signature", "Ecosystem", "Flaming (Internet)", "Malware", "Microsoft Windows", "Public key infrastructure", "Stuxnet", "Threat (computer)", "Threat model" ], "id": "1f16b4f47f97a2c48f7f414f694e4954932937dd", "inCitations": [], "journalName": "", "journalPages": "1435-1448", "journalVolume": "", "outCitations": [ "201b0a185dda51629d7b6fdef3b380a0beaba455", "066d553c5cfdb1fcc32cb7b036f943d0c290bb46", "5f56a4e8ca55facf14b35b2cebde6bae7fd1e356", "7a78dbb8ceb0200add67dbe6479685bbde7e77ab", "3591be0ccd08c80c0048ebaa0e7005556f49cf5e", "0641830054d30adf5c115adc0fd369f3ecdc6d73", "28013fc6979d483895e2998d05a7373807290d3e", "2dbcc7077a01981679007eceac6c6659a1c18200", "2152f9f91e798c23715fdce699b6a8f0f8d43170", "008d1c55a26eadea0c52350dd6a4ef0ace0a1c5c", "4187f06bcb0cae92ec422066d049dd7b157ff496", "f74804eaf20b71da1ad2ebbbb429595c133459c8", "b790b2f67c09bf5d4f957e4e1e55818b1801cb57", "8cdbab26fa0dee8f165b6680e59e8966679fd068", "1f0665485f7fbc06675c981866efab2c4ccbcdd4", "9f000d1bc7b213133b623a17a4ef3f34f85e7d7c", "0be628988f8ee0beaad57d68cbae8b635f0b555c", "39ac27363c06ade948e0cc3e7797523122a19085", "0774d20b08ba0ea6cc8f98ccf9caed5f337fcf22", "197f0b31f4088c7a7301e4e3079b43be2eae3dc3", "08fabacc44f1f7d3b968fa41e52e350a24e02abc", "6c5395868a818c6f414c653a30376461240bd366", "39b24b34ee1b0071f3fade608f3b2d9fa41fd050", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c" ], "paperAbstract": "Digitally signed malware can bypass system protection mechanisms that install or launch only programs with valid signatures. It can also evade anti-virus programs, which often forego scanning signed binaries. Known from advanced threats such as Stuxnet and Flame, this type of abuse has not been measured systematically in the broader malware landscape. In particular, the methods, effectiveness window, and security implications of code-signing PKI abuse are not well understood. We propose a threat model that highlights three types of weaknesses in the code-signing PKI. We overcome challenges specific to code-signing measurements by introducing techniques for prioritizing the collection of code signing certificates that are likely abusive. We also introduce an algorithm for distinguishing among different types of threats. These techniques allow us to study threats that breach the trust encoded in the Windows code signing PKI. The threats include stealing the private keys associated with benign certificates and using them to sign malware or by impersonating legitimate companies that do not develop software and, hence, do not own code-signing certificates. Finally, we discuss the actionable implications of our findings and propose concrete steps for improving the security of the code-signing ecosystem.", "pdfUrls": [ "http://legacydirs.umiacs.umd.edu/~tdumitra/papers/CCS-2017.pdf", "http://doi.acm.org/10.1145/3133956.3133958", "https://obj.umiacs.umd.edu/papers_for_stories/kim_ACMCCS2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f16b4f47f97a2c48f7f414f694e4954932937dd", "sources": [ "DBLP" ], "title": "Certified Malware: Measuring Breaches of Trust in the Windows Code-Signing PKI", "venue": "CCS", "year": 2017 }, "1f1f47da8fff8da53589d7eab36d6bae32b2c3d2": { "authors": [ { "ids": [ "40270373" ], "name": "Dana Van Aken" }, { "ids": [ "1774210" ], "name": "Andrew Pavlo" }, { "ids": [ "1736834" ], "name": "Geoffrey J. Gordon" }, { "ids": [ "2903546" ], "name": "Bohan Zhang" } ], "doi": "10.1145/3035918.3064029", "doiUrl": "https://doi.org/10.1145/3035918.3064029", "entities": [ "Cache (computing)", "Control knob", "Data-intensive computing", "Database", "Database tuning", "Machine learning", "Management system", "Relational database management system", "Supervised learning" ], "id": "1f1f47da8fff8da53589d7eab36d6bae32b2c3d2", "inCitations": [ "cb80292ff6fa871c917ab1fdaed7ef40d2c05a11", "178738930dc750ef8cf70f1dc7fbab6edca0d184", "d308092a5da30ef6687b6a26287f1e54ba4c5e10", "a55a685d254caeeb4f071062d5910734f8135057", "0bbbcf1376762a9e1a6289b75286f39dec625728", "502e33592ce72ccd5d68dae4acf3bb3c4d056e68", "4fcb1e0a25a5617ddee8174b48af80d88b4881f4", "ad64649f20cc20a2d1584cbc4b859d9fa9920538", "0fb3400d39c08b6dddbbfa8689711ca36a87afd0", "28e4ae18a652e7d67df3e3fa6f4703ae9ef930e9", "7254ad8940dc3ea502ef65fd9b71a9a2952daf81", "9d1ad91bff04bea0b8e6220c4da8244cd88d44b1", "5b5552aae4a3a6943aa9b4d1f1ea08e30c4065f8", "0bdb6e2cb1d8960ecf754fd4d28ea11714178fdf", "a060a4be76d6789113df800da0f20aa62ac99990", "8c7044398d1994b12a9bf7212e11398f59eaf446", "576f13a5f349ecc60e5e491395e8aa7a9c9f0c05", "162be49582b29ed18775f089810fb8cdc2ed6808", "e20549f3678d18f1788debdeacd0fa121220669e" ], "journalName": "", "journalPages": "1009-1024", "journalVolume": "", "outCitations": [ "71c1a0fc681a7a62cdd1c6a533e5f581e2287781", "73f585b1579c69d6b1e5c7c4a8226238e8448f94", "4fd6f219cd8347d96095e8b0b79372c47c8ca901", "153703ab30c7cb56a49718991f6bc450f0c2273f", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "a114adf98d48bbaf5c26b496803f448d85e87ff0", "1592fe924114866c1ac559bae33ea789930daa98", "162d50e6e2c000baf10148f761cc0929aad48ca2", "5dd36ed50668b5cc1c95ce6cf83b1b9b21a5f560", "55dfddfa4b2ed657a95600f617978aff2bb3c0ae", "4a66348c79300fc798db8fd45db84b39cc3da37f", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "4ca3c1040a4bc0d1ae200d26d1c18fbc6df0e95a", "36d858eb19bba43244b92f7faabfce47b13f2403", "0c40c39840e0eb577b67ec9f3f590d28fd30c683", "62be31097d51acb6530dd933a7f0ff8741019937", "347920406c9a9a3846adf485e2b864d4523a0652", "5f41cc7c081b294f684928c35a08626490ec4f8a", "723439f6dba7c6213772286a6d46bcd45265479f", "7df1de1c9663c2dfaefc1277a7d1cb3366b8c358", "e4984e24c407a629e7ad21315eb1e92449ee50cf", "0346de4027e75bc194811be80421c6e403495c7a", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "22584367753de3804867fe88530468c5984d86fa", "aeccdfbab928e994fe4a419130a4d24bea9571c8", "e9775d2e173f989c580df3fc967a905c336405c5", "9aa0d7253574e50fe3a190ccd924433f048997dd", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "36209da01ff06d4889f9472839a3373e1f211be3", "702879a978853613de4de67a72999b8cc6acc80a", "006b89abc356c1c3bf2dfa35f47c0601c39dce38", "40fecfef456c760912685b372151732b38e69d6e", "21c66366122ebde6c367b106e0067e10f3a1d4d4", "019902292dff81eae20f3e87970dd7a1151d9405", "502ff2f9220ebc8c3544e6c4a005e819429ab716", "8c6f43d8a0ea0b183a7c277f576b52ac46d5450e", "231a0787b6361fca082cefe580c41c74e230b255", "44662438b9659fc7a48d32eae112dd9bcdef9ee3", "1b2457906994b5942b0ecc6e0ca38e2e3b2450c7", "489996303d862cc86eb8010fb818d47eab75ed12", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "30c4b5432dded3ce170f58d96e8935d538c58b98", "f4e62f813e8cb019c85504597a87a6f1cd3c2194", "64cc18eacc8b33ab5b7ee2f789ca409bbd7455d9", "1e557937f418accc13f9c5edb33a3d48259d80e5", "9f74a87a39cf4922b7b13e4b5386eb52025959ee", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "e0dbc2deeb87f9c17e7b2b298e0c8f4eb1bc3dcc", "c5faa674df8fe81d17ec2537b865045c20d79990" ], "paperAbstract": "Database management system (DBMS) configuration tuning is an essential aspect of any data-intensive application effort. But this is historically a difficult task because DBMSs have hundreds of configuration \"knobs\" that control everything in the system, such as the amount of memory to use for caches and how often data is written to storage. The problem with these knobs is that they are not standardized (i.e., two DBMSs use a different name for the same knob), not independent (i.e., changing one knob can impact others), and not universal (i.e., what works for one application may be sub-optimal for another). Worse, information about the effects of the knobs typically comes only from (expensive) experience.\n To overcome these challenges, we present an automated approach that leverages past experience and collects new information to tune DBMS configurations: we use a combination of supervised and unsupervised machine learning methods to (1) select the most impactful knobs, (2) map unseen database workloads to previous workloads from which we can transfer experience, and (3) recommend knob settings. We implemented our techniques in a new tool called OtterTune and tested it on two DBMSs. Our evaluation shows that OtterTune recommends configurations that are as good as or better than ones generated by existing tools or a human expert.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064029", "http://www.pdl.cmu.edu/PDL-FTP/Database/parameters.pdf", "http://www.cs.cmu.edu/afs/.cs.cmu.edu/Web/Posters/CSSpeakingSkills-DVAken17.pdf", "http://www.cl.cam.ac.uk/~ey204/teaching/ACS/R244_2017_2018/presentation/S7/Ioana_Otto.pdf", "http://www.pdl.cmu.edu/ftp/News/newsletter17.pdf", "http://db.cs.cmu.edu/papers/2017/p1009-van-aken.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f1f47da8fff8da53589d7eab36d6bae32b2c3d2", "sources": [ "DBLP" ], "title": "Automatic Database Management System Tuning Through Large-scale Machine Learning", "venue": "SIGMOD Conference", "year": 2017 }, "1f2f1babd3624e9f09bec791e2d43f49279c1f09": { "authors": [ { "ids": [ "2777479" ], "name": "Sajjad Rizvi" }, { "ids": [ "1772379" ], "name": "Bernard Wong" }, { "ids": [ "1692842" ], "name": "Srinivasan Keshav" } ], "doi": "10.1145/3143361.3143394", "doiUrl": "https://doi.org/10.1145/3143361.3143394", "entities": [ "Attribute\u2013value pair", "Centralisation", "Consensus (computer science)", "Data center", "Distributed computing", "Entity", "Network topology", "Network traffic control", "Parallel computing", "Plug compatible", "Scalability", "Throughput" ], "id": "1f2f1babd3624e9f09bec791e2d43f49279c1f09", "inCitations": [], "journalName": "", "journalPages": "426-438", "journalVolume": "", "outCitations": [ "1a562dabbe5e17eac0060a87f855ac9f32f301bb", "38acf01a412d4bfeb810ab9fc5a7a1f1c8643c6b", "648e3ed128e6da4bd9364a952227f95ab72b03b5", "091a8b2a10483b9899c667862dcfa92fc130bb74", "00e3756119a91432622f6982b59ecd24a1340fbe", "0ded2712dcd13bada83065115c45465580936895", "b129f84262024128ee64300ab257744b0b5ed8fb", "3ef41da8f348dafc92d4f16aae31ae8e303958db", "59f9741a81db9840e69210d60349a912f03a4e7d", "9bf907a4844e9023ba1269f9c799a387c640347b", "056f77cdc09ce6a8bb28f632163758c16f681672", "12b2e6cd5e273102bedde005c1ff6de02fdbd0bb", "547ab6c764914f5ba75985dda1a2afbbc4d4dace", "b02c6b00bd5dbdbd951fddb00b906c82fa80f0b3", "1861776e08d4ce30ac63bd99b03501a80b98bf87", "4e0e26b2c421a3faf75bee48e9124b59914488a3", "4f5ef5dfe854a9c9f34af44d306534c9a1606b15", "58f692e9b03cb973355aab46bb6f867239aeb513", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "578c2b170aaab7ea88ea0cca472f123287c5b6b5", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "42a5af08a9cd396ff05540b4f928f58c2abb0cec", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "066dda2494a69c0cd50e6a2b758cfd45facad84f", "663e064469ad91e6bda345d216504b4c868f537b", "155ca30ef360d66af571eee47c7f60f300e154db", "4af63ed343df388b6353b6fc77c7137d27822bf4", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "3c903855e111dc5a2bbd38e64f4a199f14fc29fd", "058f6752d85a517aae298586fdf117acdd7560ea", "036ebe81fc7bd9000c3edda83fa30bee03fedc1a", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "3bc9d643f741343c6eef24363a20e2b483439149", "fc3fbb4c76448e8968f8a19f076d133b2e7a2849" ], "paperAbstract": "Achieving consensus among a set of distributed entities (or participants) is a fundamental problem at the heart of many distributed systems. A critical problem with most consensus protocols is that they do not scale well. As the number of participants trying to achieve consensus increases, increasing network traffic can quickly overwhelm the network from topology-oblivious broadcasts, or a central coordinator for centralized consensus protocols. Thus, either achieving strong consensus is restricted to a handful of participants, or developers must resort to weaker models of consensus.\n We propose Canopus, a highly-parallel consensus protocol that is 'plug-compatible' with ZooKeeper, which exploits modern data center network topology, parallelism, and consensus semantics to achieve scalability with respect to the number of participants and throughput (i.e., the number of key-value reads/writes per second). In our prototype implementation, compared to EPaxos and ZooKeeper, Canopus increases throughput by more than 4x and 16x respectively for read-heavy workloads.", "pdfUrls": [ "http://blizzard.cs.uwaterloo.ca/keshav/wiki/images/9/9f/Canopus.pdf", "https://cs.uwaterloo.ca/~bernard/S10_1.pdf", "https://cs.uwaterloo.ca/sites/ca.computer-science/files/uploads/files/cs-2017-08.1.pdf", "http://doi.acm.org/10.1145/3143361.3143394", "https://uwaterloo.ca/computer-science/sites/ca.computer-science/files/uploads/files/cs-2017-08.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f2f1babd3624e9f09bec791e2d43f49279c1f09", "sources": [ "DBLP" ], "title": "Canopus: A Scalable and Massively Parallel Consensus Protocol", "venue": "CoNEXT", "year": 2017 }, "1f83e48319270c0a004d277bbb5156f1f477f98d": { "authors": [ { "ids": [ "3426485" ], "name": "Xueyuan Han" }, { "ids": [ "3176695" ], "name": "Thomas F. J.-M. Pasquier" }, { "ids": [ "2191192" ], "name": "Tanvi Ranjan" }, { "ids": [ "39922558" ], "name": "Mark Goldstein" }, { "ids": [ "1745942" ], "name": "Margo I. Seltzer" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Anomaly detection", "Directed acyclic graph", "Fault detection and isolation", "Fraps", "Platform as a service" ], "id": "1f83e48319270c0a004d277bbb5156f1f477f98d", "inCitations": [ "41bb8d014bae6069b274f919591263d557efdb2f" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1711.11487", "outCitations": [ "6a47e4a3820f444f115941dbb45d9b1d587e9be7", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "2f8bf79c762924808d09c730132979228661de1a", "0bacca0993a3f51649a6bb8dbb093fc8d8481ad4", "31f14626b7233ed3f8922408c663ce58e813f2d4", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "76f5484e9332947651dd6a0e49469993046ccf25", "d67630b667b2c4dcbd1a284dd1c459742f161fb1", "28cf3794cf80c30c6ce4d3478d381af677cf40f9", "1aa94aa0c5eba9fe773f70b1a9c47db401f5cb66", "7e1874986cf6433fabf96fff93ef42b60bdc49f8", "10e62304e9b278339de49afb8cd09614ca54303d", "f9c74b45203266abf92f2f40e4b268aaf3274d38", "4b1f691ecdf7b78a9dc176ba4913543e7eb62232", "9e471ab3fd7a86701106a7f274feec4a09bc6785", "01911a1e4c78d2562ee3999413a5008d845b1c22", "07083f18b90323abedf8932f733656391cad5e21", "59a8f2546b4348b7997c766cd726b2fb868d6b79", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "04c724bad0963d1e6e7a3743be08c08810402582", "097e908d46b57041acf02eb97bab4ce6298c85be", "35339f6f2e99c04920f21883df1db8004436cdc7", "4553901b0417c16bd0743cf09bd538466352dc83", "7fa39d4ec44a8c736107ea6da8e111bf943ae9e7", "111864cac232d8a9c170bd63069eb4af155a9f7b", "00aad3642752496f95fe8dbaad5e5ddeca2a0b58", "6022b054aa59bf7cfc7e319cf3f53229d9e95503" ], "paperAbstract": "We present FRAPpuccino (or FRAP), a provenancebased fault detection mechanism for Platform as a Service (PaaS) users, who run many instances of an application on a large cluster of machines. FRAP models, records, and analyzes the behavior of an application and its impact on the system as a directed acyclic provenance graph. It assumes that most instances behave normally and uses their behavior to construct a model of legitimate behavior. Given a model of legitimate behavior, FRAP uses a dynamic sliding window algorithm to compare a new instance\u2019s execution to that of the model. Any instance that does not conform to the model is identified as an anomaly. We present the FRAP prototype and experimental results showing that it can accurately detect application anomalies.", "pdfUrls": [ "http://arxiv.org/abs/1711.11487", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-han.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/han", "https://arxiv.org/pdf/1711.11487v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d3d1/03db7c3686b9ef3b9f7bd5b90f1f9aaa6dac.pdf", "s2Url": "https://semanticscholar.org/paper/1f83e48319270c0a004d277bbb5156f1f477f98d", "sources": [ "DBLP" ], "title": "FRAPpuccino: Fault-detection through Runtime Analysis of Provenance", "venue": "HotCloud", "year": 2017 }, "1f855fe351176c61c3c4bcb215cc3808a3ffd2e3": { "authors": [ { "ids": [ "3236398" ], "name": "Abbas Razaghpanah" }, { "ids": [ "28931849" ], "name": "Arian Akhavan Niaki" }, { "ids": [ "2560825" ], "name": "Narseo Vallina-Rodriguez" }, { "ids": [ "2993481" ], "name": "Srikanth Sundaresan" }, { "ids": [ "1811289" ], "name": "Johanna Amann" }, { "ids": [ "36757962" ], "name": "Phillipa Gill" } ], "doi": "10.1145/3143361.3143400", "doiUrl": "https://doi.org/10.1145/3143361.3143400", "entities": [ "Android", "Ecosystem", "Fingerprint", "HTTP Public Key Pinning", "Library", "Mobile operating system", "Operating system", "Public key fingerprint", "Transport Layer Security", "Verification and validation", "Vulnerability (computing)" ], "id": "1f855fe351176c61c3c4bcb215cc3808a3ffd2e3", "inCitations": [ "52e76bc759f6da650dbfcb5ff4e7f052dcb13c3f", "f2b5963afef31e1c1b12c84bb3a9d1117916e673" ], "journalName": "", "journalPages": "350-362", "journalVolume": "", "outCitations": [ "3591be0ccd08c80c0048ebaa0e7005556f49cf5e", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "201b0a185dda51629d7b6fdef3b380a0beaba455", "4655c716f39a981830adf334769e6926e74212a6", "f066706e868e21e78c49ace256368a7672e80ec5", "17138b471f2dade960cd3969db0c08b623b33797", "1370b7ec6cb56b0ff25f512bd673acbab214708c", "a3eec7c9315fc077930cdd13850a540a808931b8", "4f93999383e7fdcfb0de8423789c98389aef8aef", "08026d939ac1f30951ff7f4f7c335bf3fef47be4", "094cca7a7bbfa274975e58f32d392404871ca2e5", "1f0665485f7fbc06675c981866efab2c4ccbcdd4", "06c87865bc8f19df60db5c37e504146b0735255a", "13315d952a43c391bf4910271fc2582858e86e9e", "3c338bb3dcc10b7c840b4dbf3ad32e8256313ee3", "2dbcc7077a01981679007eceac6c6659a1c18200", "39ac27363c06ade948e0cc3e7797523122a19085", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "2c9f073340ab55613f0e25e444bbd09b7851aa23", "828cc4f5f736e2d5ef555ef052e2a99f754e401a", "6e4480275887464a483cf85ada0fff26514b1313", "082d2b922818331e2994aeebaaccb776cfa09145", "08fabacc44f1f7d3b968fa41e52e350a24e02abc", "4d722cf0b47d6265ab23cdb91d3d24835352b5b0", "306824d39a66af5c75579bd8606fefd330b443e7", "3a2988775bc88de57a0641602e63be686f5817a0", "133eea63e0a9702207dc14fdd72740d402f5748b", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052" ], "paperAbstract": "Transport Layer Security (TLS), has become the de-facto standard for secure Internet communication. When used correctly, it provides secure data transfer, but used incorrectly, it can leave users vulnerable to attacks while giving them a false sense of security. Numerous efforts have studied the adoption of TLS (and its predecessor, SSL) and its use in the desktop ecosystem, attacks, and vulnerabilities in both desktop clients and servers. However, there is a dearth of knowledge of how TLS is used in mobile platforms. In this paper we use data collected by Lumen, a mobile measurement platform, to analyze how 7,258 Android apps use TLS in the wild. We analyze and fingerprint handshake messages to characterize the TLS APIs and libraries that apps use, and also evaluate weaknesses. We see that about 84% of apps use default OS APIs for TLS. Many apps use third-party TLS libraries; in some cases they are forced to do so because of restricted Android capabilities. Our analysis shows that both approaches have limitations, and that improving TLS security in mobile is not straightforward. Apps that use their own TLS configurations may have vulnerabilities due to developer inexperience, but apps that use OS defaults are vulnerable to certain attacks if the OS is out of date, even if the apps themselves are up to date. We also study certificate verification, and see low prevalence of security measures such as certificate pinning, even among high-risk apps such as those providing financial services, though we did observe major third-party tracking and advertisement services deploying certificate pinning.", "pdfUrls": [ "http://abbas.rpanah.ir/publications/conext2017_tls_paper.pdf", "http://eprints.networks.imdea.org/1690/1/Studying_TLS_Usage_in_Android_Apps_2017_EN.pdf", "http://www.icir.org/johanna/papers/conext17android.pdf", "https://people.cs.umass.edu/~phillipa/papers/conext2017_tls_paper.pdf", "http://doi.acm.org/10.1145/3143361.3143400" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f855fe351176c61c3c4bcb215cc3808a3ffd2e3", "sources": [ "DBLP" ], "title": "Studying TLS Usage in Android Apps", "venue": "CoNEXT", "year": 2017 }, "1f896f601fc53038d0bbc28fde31ff84b12d06d9": { "authors": [ { "ids": [ "7549792" ], "name": "Yunhao Zhang" }, { "ids": [ "40576767" ], "name": "Rong Chen" }, { "ids": [ "1716528" ], "name": "Haibo Chen" } ], "doi": "10.1145/3132747.3132777", "doiUrl": "https://doi.org/10.1145/3132747.3132777", "entities": [ "Computation", "Concurrency (computer science)", "Data model", "Linked data", "Locality of reference", "Persistence (computer science)", "Query plan", "Remote direct memory access", "SPARQL", "Snapshot (computer storage)", "State (computer science)", "Stateful firewall", "Stream (computing)", "Stream processing", "Streaming media", "Throughput" ], "id": "1f896f601fc53038d0bbc28fde31ff84b12d06d9", "inCitations": [ "372a2383891257520ad6dea816d3f14ddff8f003" ], "journalName": "", "journalPages": "614-630", "journalVolume": "", "outCitations": [ "1ec3c93bf22e22f76dcf978fba7764f3f0696a82", "122eda0ff026311fbb6e8d589262da0b9821c19f", "3b6dd340fb5442e0c31d73f40e241fdd73d42330", "1acadbf0c0616b4b9bd287ff8d9d164d96778589", "184aef65512c4812449258e1324bf30b9c1d2756", "75373ee7efaca72d4894bb2c86033a2cadeef655", "a063dfc74fccce8569f78f20e6f870f00b94862e", "036e006a9f2049d15c1533ac254dcfce2483a1f6", "26b4bb74dc87eefeba587fe2aceda0789174e476", "0608d9937c074520cdc93cc444cc1c77039c5332", "32ca6f94f8d1bc43edce6520b2b1891ca3541d08", "080ed793c12d97436ae29851b5e34c54c07e3816", "4ce25286205c62fffda7d685a916cf4508149245", "2a9a830aaac6857387369446f49eb18a0e4f2c75", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "2f7f5d0e989c74d6279e2620e10e8d0b0c021cb7", "ab2dd895c0d8e567071caf8704e6078d33cd6e22", "3df5b6b3b2b648f3d8224322e6a0f127850df017", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "1971425c8e95d75dca4fec126504d7fa6179c926", "e847c3ec130da57328db79a7fea794b07dbccdd9", "0ef1dd03db41de69165075562a051021a186c230", "1bb24c175f9fa082937c51e9b9b8ae651fd9b111", "26c0e76f89c5e1c12a3e9bc00e487bf0a8aa057f", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "3dfeb940f27adf6b24515a8e9ec22b17eef0ab16", "01e027c5c2e5375b88e9776c8d606675f51be827", "975a9eb18929faf31c653d3d6d923fc07c3806d2", "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "282bc59faefb734137d2ea978cb1eb5699e67c7c", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "0318ae2fd88c95cdd5dd3b8062042b14a559d372", "cc31b0318d769505d74e58d9d21603dbebff71ee", "739f18de865a83d048e0e9e0dcb3b224e32691be", "44d4e4111ee7ba147c400a548d25108e92a4662e", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "55d8efeaedfdf6db48b6e98b46c2a62876959ce9" ], "paperAbstract": "Applications like social networking, urban monitoring and market feed processing require stateful stream query: a query consults not only streaming data but also stored data to extract timely information; useful information from streaming data also needs to be continuously and consistently integrated into stored data to serve inflight and future queries. However, prior streaming systems either focus on stream computation, or are not stateful, or cannot provide low latency and high throughput to handle the fast-evolving linked data and increasing concurrency of queries.\n This paper presents Wukong+S, a distributed stream querying engine that provides sub-millisecond stateful query at millions of queries per-second over fast-evolving linked data. Wukong+S uses an integrated design that combines the stream processor and the persistent store with efficient state sharing, which avoids the cross-system cost and sub-optimal query plan in conventional composite designs (e.g., Storm/Heron+Wukong). Wukong+S uses a hybrid store to differentially manage timeless data and timing data accordingly and provides an efficient stream index with locality-aware partitioning to facilitate fast access to streaming data. Wukong+S further provides decentralized vector timestamps with bounded snapshot scalarization to scale with nodes and massive queries at efficient memory usage.\n We have designed Wukong+S conforming to the RDF data model and Continuous SPARQL (C-SPARQL) query interface and have implemented Wukong+S by extending a state-of-the-art static RDF store (namely Wukong). Evaluation on an 8-node RDMA-capable cluster using LSBench and CityBench shows that Wukong+S significantly outperforms existing system designs (e.g., CSPARQL-engine, Storm/Heron+Wukong, and Spark Streaming/Structured Streaming) for both latency and throughput, usually at the scale of orders of magnitude.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132777", "https://people.csail.mit.edu/jshun/6886-s18/papers/p614-zhang.pdf", "http://www.systems.cs.cornell.edu/syslunch/fa17/wukong-s-sosp.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f896f601fc53038d0bbc28fde31ff84b12d06d9", "sources": [ "DBLP" ], "title": "Sub-millisecond Stateful Stream Querying over Fast-evolving Linked Data", "venue": "SOSP", "year": 2017 }, "1f8be49d63c694ec71c2310309cd02a2d8dd457f": { "authors": [ { "ids": [ "3363560" ], "name": "NhatHai Phan" }, { "ids": [ "7916525" ], "name": "Xintao Wu" }, { "ids": [ "5064202" ], "name": "Han Hu" }, { "ids": [ "1721158" ], "name": "Dejing Dou" } ], "doi": "10.1109/ICDM.2017.48", "doiUrl": "https://doi.org/10.1109/ICDM.2017.48", "entities": [ "Artificial neural network", "Deep learning", "Differential privacy", "Experiment", "Loss function", "MNIST database" ], "id": "1f8be49d63c694ec71c2310309cd02a2d8dd457f", "inCitations": [ "bb7bf049f9bca00e9569a404ce37301a5db9e21d", "eec0bc4c3fddbaf78feb0872a195fb3aeb01010e", "e12d8bc67601fbc09a65f4fb4a6e85e9f3f986c7", "e1a06e07fd5838b3bd29524ecfec8acba338a868" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "385-394", "journalVolume": "", "outCitations": [ "16358a75a3a6561d042e6874d128d82f5b0bd4b3", "316d5642b39ba001efc8949cb87ed83eba1def95", "1ecfe23503600b7a6a6ed3dcce86542420e36a06", "17a273bbd4448083b01b5a9389b3c37f5425aac0", "83ef0b469a994b998d412d523e58256e7a151601", "4912c18161cf35a066a9e70b4e4ef45ff9d19035", "63b88452574095639ef9a1f692eef3c1ec386b0a", "8726139a30434175795fe924188bd5c6e0b0740d", "6154ce8c02375184f7928e41c4fae532500f7175", "8942804fe4e2425758ab68df4ff80a2cac1987b8", "64028c85cd7b7e42f208e29734028572d7735c61", "012b8a941e96594783fb10d3a785e91f13384413", "6b74ec27d76ae42c2faa9211e2640141595838b6", "4a4ba3a1fefa9b9551dcb0953fc8168e23b319be", "0fcaa5d69913b2601fb4fac3a16ba384e5f1883b", "56c56187cdaa03372298fb6ad1dc51dba7b3499b", "4062e487c042c5e7f2e8d45ac538e830965e3552", "326bb49d3ae9e1e1551028200916192e50004105", "472a63c41ef24257148d9cf4fd00aec70cf3add6", "1047c50bcd412d4cf2f735a33d427b0313be9d5e", "17fac85921a6538161b30665f55991f7c7e0f940", "0d67362a5630ec3b7562327acc278c1c996454b5", "132e3d3b5cfc2f59db6ed69ac1eac4a1ee6dca71", "5d90f06bb70a0a3dced62413346235c02b1aa086", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "830e508a2dccc4fdd105bcd9f4a52addbbf1d1f4", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "28c41e35948d0aca47497cbdff1d9486004093e5", "000f2d99632d5d6c494bf9e1b179638e48433e99" ], "paperAbstract": "In this paper, we focus on developing a novel mechanism to preserve differential privacy in deep neural networks, such that: (1) The privacy budget consumption is totally independent of the number of training steps; (2) It has the ability to adaptively inject noise into features based on the contribution of each to the output; and (3) It could be applied in a variety of different deep neural networks. To achieve this, we figure out a way to perturb affine transformations of neurons, and loss functions used in deep neural networks. In addition, our mechanism intentionally adds "more noise" into features which are "less relevant" to the model output, and vice-versa. Our theoretical analysis further derives the sensitivities and error bounds of our mechanism. Rigorous experiments conducted on MNIST and CIFAR-10 datasets show that our mechanism is highly effective and outperforms existing solutions.", "pdfUrls": [ "http://arxiv.org/abs/1709.05750", "http://ix.cs.uoregon.edu/~dou/research/papers/icdm17.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.48", "https://arxiv.org/pdf/1709.05750v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f8be49d63c694ec71c2310309cd02a2d8dd457f", "sources": [ "DBLP" ], "title": "Adaptive Laplace Mechanism: Differential Privacy Preservation in Deep Learning", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "1f93ee1306f9439eb7e82e638d0a48380d1bc0de": { "authors": [ { "ids": [ "2712026" ], "name": "Anurag Khandelwal" }, { "ids": [ "1867996" ], "name": "Zongheng Yang" }, { "ids": [ "10771670" ], "name": "Evan Ye" }, { "ids": [ "34292316" ], "name": "Rachit Agarwal" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3035918.3064012", "doiUrl": "https://doi.org/10.1145/3035918.3064012", "entities": [ "Application programming interface", "Computer data storage", "Data compression", "Distributed memory", "Facebook Graph Search", "Interactivity", "Neo4j", "Server (computing)", "TAO", "Terabyte", "Throughput", "Titan" ], "id": "1f93ee1306f9439eb7e82e638d0a48380d1bc0de", "inCitations": [ "306185d6b16f1ac9c770d2e2a80656cbdc1e9224" ], "journalName": "", "journalPages": "1149-1164", "journalVolume": "", "outCitations": [ "0d8fadb88666b1137e9e767b5c82d2a98f807f2d", "473fa1c5c66d4a51adbb64c263687d730fc6d217", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "0ad8e89091eed09217e66adc98136126addc2619", "45a916500ce98c8d018c13de4c1d5c53130e8a72", "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "1e557937f418accc13f9c5edb33a3d48259d80e5", "751d4061b9ec44b6824badd8384ce6022e377110", "ef8d3a389410124d21dfda44295de8af786f5516", "841e495ec20ab72170138b2f4fdf75bd17fd27bf", "1521d39088b203ddac981d10d214f463449ae95b", "c8e2d72db05ad9ec096058d7a5ed4bcdbf37ec8a", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "32ed33a9bb1173cb9d9df9a3f8922a8c3cb6bddd", "3dff11679346f5344af1018cad57fa14cc349f2f", "dac57d16256af2f4913b72164a7bc3cb593937c1", "3486aeaf540c48952120fe853d672af984f40a6a", "19cadcb4e7439bc525c604771ab4872ec93a5b53", "1a289721c6570d757f33be50dc80ebb22e29bf16", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "bcbc4b1f04f81e2af00ad0a5b83b55f52d638c24", "0b9c6fe7beb3971b27aff8c5aa5e133de74316a4", "62bfdedb87d1fed25eb5aa1bc6ff546c70a0ba6a", "d1c21c34936f587779c216ed79ca33883845caa1", "02c7714e034a832ce25bf0bf563cf0a789ad7342", "2e1c33f55f80cb7e440435deb6f5fdf8bed95f47", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "26deee037b221bd05ed34461819f5c067b745445", "13f7df91eb208a387d18fbad192c6f0f834f0b82", "2414283ed14ebb0eec031bb75cd25fbad000687e", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "73dd7060a97f8ae5728ac2533926aee492400261", "b9c5678100693e00b59e58f3368f4797b9f11e77", "aac9932004ff0b07bcc40224c368f72553b424d2", "0cc2bb8fe99755aacbb76100a345322d164835ab" ], "paperAbstract": "We present ZipG, a distributed memory-efficient graph store for serving interactive graph queries. ZipG achieves memory efficiency by storing the input graph data using a compressed representation. What differentiates ZipG from other graph stores is its ability to execute a wide range of graph queries directly on this compressed representation. ZipG can thus execute a larger fraction of queries in main memory, achieving query interactivity. ZipG exposes a minimal API that is functionally rich enough to implement published functionalities from several industrial graph stores. We demonstrate this by implementing and evaluating graph queries from Facebook TAO, LinkBench, Graph Search and several other workloads on top of ZipG. On a single server with 244GB memory, ZipG executes tens of thousands of queries from these workloads for raw graph data over half a TB; this leads to an order of magnitude (sometimes as much as 23×) higher throughput than Neo4j and Titan. We get similar gains in distributed settings compared to Titan.", "pdfUrls": [ "https://people.eecs.berkeley.edu/~anuragk/papers/zipg.pdf", "http://www.cs.cornell.edu/~ragarwal/pubs/zipg.pdf", "http://doi.acm.org/10.1145/3035918.3064012" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1f93ee1306f9439eb7e82e638d0a48380d1bc0de", "sources": [ "DBLP" ], "title": "ZipG: A Memory-efficient Graph Store for Interactive Queries", "venue": "SIGMOD Conference", "year": 2017 }, "1fc70e23da32168bec5509cb9f07bf5784134b90": { "authors": [ { "ids": [ "1852063" ], "name": "Muhammad Yasin" }, { "ids": [ "32173503" ], "name": "Abhrajit Sengupta" }, { "ids": [ "26939526" ], "name": "Mohammed Thari Nabeel" }, { "ids": [ "11864529" ], "name": "Mohammed Ashraf" }, { "ids": [ "1977116" ], "name": "Jeyavijayan Rajendran" }, { "ids": [ "1732778" ], "name": "Ozgur Sinanoglu" } ], "doi": "10.1145/3133956.3133985", "doiUrl": "https://doi.org/10.1145/3133956.3133985", "entities": [ "ARM Cortex-M", "ARM architecture", "Computer-aided design", "Key (cryptography)", "Lock (computer science)", "Microprocessor", "Netlist", "Provable prime", "Provable security", "Reverse engineering", "STRIPS", "Simulation" ], "id": "1fc70e23da32168bec5509cb9f07bf5784134b90", "inCitations": [ "4de8eb830a45c1ec281ccbc7f80ad60f20dcd106" ], "journalName": "", "journalPages": "1601-1618", "journalVolume": "", "outCitations": [ "5c1eff420eae32dc9f572b7be36827ea31206e5b", "afe2e65015535acf0aee8b6599dd7fe5329409d2", "105c78d9c72f123037ed03752775ce930931fe88", "59f33df362ffd96da2465d423c676dfe5ce4a6d8", "16e0303d1005ea1f30a8409e89f35c639b2da2a5", "0df5a63537b2053c41f851526987bb2b8a2a75ea", "b3d4fee78b588a9008749d300054fa0e084a8671", "27f989a4994a6b76acae6fe63f992a0146ed1168", "3a55a3091d607f75cb6f4aee7f14672c11ffe5e8", "3637fc3c63d4aef66765f6b5aa61a035bd42992a", "cfd4d1e4ad870bd70c3b98c0224d5fd33746de7c", "918ed5c4f774ef33f2b6a87f376dd48a855d9cb7", "151ea06c9c0bbd21ddc823a46626ba662774eabb", "a58158409dcc9c2b1a00f36cb24e67166bdba041", "ebbe5d0df5ab72a842f16f21bb7217bdba295589", "050128198c7ffb4a3e47b971936e1bb543bbed50", "7f0a9aa28d3ab7382decb4b9b3c38e63ecfbcbc6", "1c1ee7b39616c52e96d91e243dc8996cfed11027", "9506ae8cf8203c84c2e9cbed31d08d995e17d8b2", "20cb36b391e07f594c51f74bad18689634e58f64", "6aa37f2d24d84425a4ef625126868502989d29a9", "52ab000f95a65b42013b1decfcd72b1d53f34cc1", "2c5d9d05c675d7b70d5956d246aa798ff94819e7", "f4c97bddf3704ab88473d79437e77565b44710ad", "051aa7d66fc3b25f9552edce35812719b3200d32", "8a0844c6f60dc0e8e5ab274f4ddbf5648ceccdad", "230630732ade1b050cf788ece7e96eda31784564", "5b5bebed9337b7a6025f55e0a74ed6739a7bc845", "0706b75e62afde63056582c6c553320d2c84eb20", "4d9d3df6f23afef5605ae45b46deb30ffe13894f", "05ee6b24038822f2c90f315bcc979c4744482370", "e28921b31e1958f22dc2a0a8021cbd77f2a83080", "1c1d93993c9460224292bbb473d3580982e15f06", "330b46ce848047b13fadc7a63c01abfe02fd4d8b", "ea2de6883474da3dacb8d0201412ece225edb2cc", "0fbc6fc05d18f5301f3bde09509a5ec2f96b0a9f", "2416483c1cc1d4bef52916657aa66409ceefdd29", "29e5dc303568b68a77c48bed174c8e3b740691eb", "c82a74c8e86850a7a40ab4605cef626c2125344b", "2ee002d8a39b5c92691b440866563a6aa193c90e", "0c36a84fb154a8eb8fe4ba73d0386b316d7bd3d4", "2906d831cc7594cdf3bf8ec722fa9c00e5dbd084", "e0711b31bed82c68d9edd100607b650b2c39ba4d", "421a50d074c3ca431a6a4986600cecc81b890caf", "a45b003a210242d9c5ff60bf270f760f39cbf775", "58a2f18792b5f19499b4d45219078dcb4a1d6e8b" ], "paperAbstract": "Logic locking has been conceived as a promising proactive defense strategy against intellectual property (IP) piracy, counterfeiting, hardware Trojans, reverse engineering, and overbuilding attacks. Yet, various attacks that use a working chip as an oracle have been launched on logic locking to successfully retrieve its secret key, undermining the defense of all existing locking techniques. In this paper, we propose stripped-functionality logic locking (SFLL), which strips some of the functionality of the design and hides it in the form of a secret key(s), thereby rendering on-chip implementation functionally different from the original one. When loaded onto an on-chip memory, the secret keys restore the original functionality of the design. Through security-aware synthesis that creates a controllable mismatch between the reverse-engineered netlist and original design, SFLL provides a quantifiable and provable resilience trade-off between all known and anticipated attacks. We demonstrate the application of SFLL to large designs (>100K gates) using a computer-aided design (CAD) framework that ensures attaining the desired security level at minimal implementation cost, 8%, 5%, and 0.5% for area, power, and delay, respectively. In addition to theoretical proofs and simulation confirmation of SFLL's security, we also report results from the silicon implementation of SFLL on an ARM Cortex-M0 microprocessor in 65nm technology.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133985" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1fc70e23da32168bec5509cb9f07bf5784134b90", "sources": [ "DBLP" ], "title": "Provably-Secure Logic Locking: From Theory To Practice", "venue": "CCS", "year": 2017 }, "1fd268150d8dba28fa6f1e85c359d9e94da2d09a": { "authors": [ { "ids": [ "2133038" ], "name": "Thomas Espitau" }, { "ids": [ "1740168" ], "name": "Pierre-Alain Fouque" }, { "ids": [ "2417391" ], "name": "Beno\u00eet G\u00e9rard" }, { "ids": [ "1678628" ], "name": "Mehdi Tibouchi" } ], "doi": "10.1145/3133956.3134028", "doiUrl": "https://doi.org/10.1145/3133956.3134028", "entities": [ "8-bit", "Algorithm", "Antivirus software", "Atmel AVR", "BLISS", "Bliss bibliographic classification", "Branch trace", "Central processing unit", "Denial-of-service attack", "Desktop computer", "Digital signature", "Embedded system", "Fault injection", "Field-programmable gate array", "Graham scan", "Integer programming", "Key (cryptography)", "Key escrow", "Lattice model (finance)", "Linear programming", "Linux", "Linux", "Microcontroller", "Open-source software", "Polynomial", "Rejection sampling", "Sampling (signal processing)", "Side-channel attack", "Sparse matrix", "Spectral leakage", "Virtual private network", "strongSwan" ], "id": "1fd268150d8dba28fa6f1e85c359d9e94da2d09a", "inCitations": [ "665987a00ec1129f5c4587aa5544c221eac2cd5e", "876c7c63cf560780308ba213e4ed155fd28e3bc5", "79011ae4ac801c73e2828d48fab387af3eaeea93" ], "journalName": "", "journalPages": "1857-1874", "journalVolume": "", "outCitations": [ "a862ea5789514edaf569e005513296d62de6bf5d", "696c7e456d94e1af6a2ebf127520023774e7cfac", "714f33035df0526c8a19599f2c33fc9d2a9e85cb", "288b7d34af670f8c0e7ef04a3e9d1f335c14fb90", "f6a9083f7e3f7d7da8251cd4f2ce091a76ad1e35", "3cdae223c942c0ce876e92890849ceaa85752a6c", "20b850f14db2ac3135c1117b2ff6909a8a60007c", "5233cfdd874ff3b32fcfb4a6fa305d89ec6d271c", "cc8b718dd568292aa15389e64ece74291417c405", "43fb8926f0fce498351c442cc49beab0e6f7151f", "2762c266378290594e1715104ef8e98a8cb60d35", "0d257d899b72297f898f97bf9b1bd55d6941de4d", "63e21447b7098b41c4ff16e7e2be257fbb24e49a", "0641ef5523b236645b76d241f78e41c7cef8fb30", "0da99da07bfac01a7daa38ac7742e6c539261e3b", "11136c9841bb33a7433e122e586065fccbb746a8", "70fb3cea8335aefdf849597e9d9dd7512d722d88", "69874e1b910d99f46321f0b6cf24962b2bf3d8d4", "8c9f8dbb6af53f850f43db29f81197b1ed1ea93a", "07c44a928a7addb54406fdf4bfffb018f3323bb7", "394257e099e25217b57e3527e3d00e9411ef1872", "4a3b8263ba0f8f8976499e7780e64decf2608412", "0c677be877b858ee5de54bf9f9d2727b54a550cd", "16d916b69aa99a32d999068a376a5b4e96bfbd07", "c6e346084b7f06929d67f1dac2ab7a3dde02912b", "0410879f3f768f18669fbe57d475c72aa201f913", "881c6b094287554cac51252bef45dae5bf3d7e2d", "0d575c0e0b490502d8c78069c99a71dd712a5379", "ddfe43861382265a40ebb14664c50febd714aa7d", "14301266cd4ec54974ba127ec64469e6d60864a3", "bd4af7ebace58b1f3ffb1c2068d4ba3dca8bd936", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "55520e5f45a94a332097b750afd730ef57dd9168", "a4804007f3bb9ac62a406cad94302baadea48560", "3ad556181c59e22dd8138e43102b7daab7b1546f" ], "paperAbstract": "In this paper, we investigate the security of the BLISS lattice-based signature scheme, one of the most promising candidates for postquantum-secure signatures, against side-channel attacks. Several works have been devoted to its efficient implementation on various platforms, from desktop CPUs to microcontrollers and FPGAs, and more recent papers have also considered its security against certain types of physical attacks, notably fault injection and cache attacks. We turn to more traditional side-channel analysis, and describe several attacks that can yield a full key recovery.\n We first identify a serious source of leakage in the rejection sampling algorithm used during signature generation. Existing implementations of that rejection sampling step, which is essential for security, actually leak the \"relative norm\" of the secret key. We show how an extension of an algorithm due to Howgrave-Graham and Szydlo can be used to recover the key from that relative norm, at least when the absolute norm is easy to factor (which happens for a significant fraction of secret keys). We describe how this leakage can be exploited in practice both on an embedded device (an 8-bit AVR microcontroller) using electromagnetic analysis (EMA), and a desktop computer (recent Intel CPU running Linux) using branch tracing. The latter attack has been mounted against the open source VPN software strongSwan.\n We also show that other parts of the BLISS signing algorithm can leak secrets not just for a subset of secret keys, but for 100% of them. The BLISS Gaussian sampling algorithm in strongSwan is intrinsically variable time. This would be hard to exploit using a noisy source of leakage like EMA, but branch tracing allows to recover the entire randomness and hence the key: we show that a single execution of the strongSwan signature algorithm is actually sufficient for full key recovery. We also describe a more traditional side-channel attack on the sparse polynomial multiplications carried out in BLISS: classically, multiplications can be attacked using DPA; however, our target 8-bit AVR target implementation uses repeated shifted additions instead. Surprisingly, we manage to obtain a full key recovery in that setting using integer linear programming from a single EMA trace.", "pdfUrls": [ "http://eprint.iacr.org/2017/505", "https://acmccs.github.io/papers/p1857-espitauA.pdf", "https://eprint.iacr.org/2017/505.pdf", "http://eprint.iacr.org/2017/583", "http://doi.acm.org/10.1145/3133956.3134028" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1fd268150d8dba28fa6f1e85c359d9e94da2d09a", "sources": [ "DBLP" ], "title": "Side-Channel Attacks on BLISS Lattice-Based Signatures: Exploiting Branch Tracing against strongSwan and Electromagnetic Emanations in Microcontrollers", "venue": "CCS", "year": 2017 }, "1fe1033a508caa43dea180f4faa135c57d931752": { "authors": [ { "ids": [ "2688640" ], "name": "Raghu Prabhakar" }, { "ids": [ "1834695" ], "name": "Yaqi Zhang" }, { "ids": [ "2179980" ], "name": "David Koeplinger" }, { "ids": [ "14465313" ], "name": "Matthew Feldman" }, { "ids": [ "9232666" ], "name": "Tian Zhao" }, { "ids": [ "2543429" ], "name": "Stefan Hadjis" }, { "ids": [ "9182159" ], "name": "Ardavan Pedram" }, { "ids": [ "1700331" ], "name": "Christoforos E. Kozyrakis" }, { "ids": [ "1746638" ], "name": "Kunle Olukotun" } ], "doi": "10.1145/3079856.3080256", "doiUrl": "https://doi.org/10.1145/3079856.3080256", "entities": [ "Bit-level parallelism", "Burst mode (computing)", "Computer architecture simulator", "Control flow", "Dynamic random-access memory", "Field-programmable gate array", "Hardware acceleration", "High- and low-level", "Locality of reference", "Low-level programming language", "Maximum power transfer theorem", "Parallel computing", "Performance per watt", "Pipeline (computing)", "SIMD", "Scratchpad memory", "Simulation", "Sparse matrix", "Vectored I/O" ], "id": "1fe1033a508caa43dea180f4faa135c57d931752", "inCitations": [ "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "4805fa6c66509692ca51434463641ba5d7527341", "509cc745816d98de9921b999cfc35d210b5c9a2a", "33d2dcbefadda84578aa837c4b94526c2b8b03a4" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "389-402", "journalVolume": "", "outCitations": [ "d5c3933a7b6d627afba89182904da428ef91c9c5", "0bb11eb115407eed99483d1850d6ea41e675b3cf", "9e5ab6456ce2bd6be5f63c757134b8b3720d1785", "e66ab9039b49b6dd0ecce124a71f1044750107d2", "209932cd2e3f5da071c4f6341a3b8b29cf50cc4a", "01cca1fc2784f4b0f164ca5703ce793d0042649a", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "9cca3bd06826da16e1fef4e455419d6dea8a07de", "67ef28e349541a2efe5523ec1712a4c149655157", "bdeee33049e713f5a4e9bb3c12c31d24158ac9df", "9a4a47f76126c8560b5fe0501c3d4725c4287b73", "0846f5a52f223ee1677671acd3da98be01166edb", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "6c15928f2a1b8525d2aa4e078cfa62847ba422bf", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "907d2c011942a78bf6acff8e048f4185d53ff8f2", "d589123c9665f52c1c06a0b3c80aa94c423a8908", "7c6c7a97488fdbb7c06f85c345b348183bf0a704", "02c78232075ac431834e3442dcb2954d4e708def", "aa15673806c08fdc0f7efbb0b03c901f7e4f8874", "28c552da5dc505fe23644cfddf7daaf06c355e45", "3df5013fac2cada7b807ff1f45f87bbaf4bacc18", "60cfe41fd68644fb19cba99babae694a2acacc17", "31df37de79c2e3fbfe7e1fe8abdc703dd37825e2", "2ad5cc2a8d8a8daa5a46b5e552619c335c310fe3", "0086b1d36b41ab83bfddc59d5da228090c4688d2", "8df62aad18d6de13331479666c3b5d6a32b0ba58", "165cf5e471b32122ba3a38709873cecf9b1b9a58", "0fa77c50e41b1e5766f27059cb404a2fd82fc33d", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "31adf244a5da36629e769937bc2b7ec5fe00c458", "20e29444a28a763a45f9d9860ec4cd210ea5f084", "4a2d7bf9937793a648a43c93029353ade10e64da", "28477b0f88734ff547f6cd6804d651c299814d0e", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "269c24a4aad9be622b609a0860f5df80688c2f93", "00ab25c6582d543932fccbb0f15fe93445f95d61", "148d81592fc46eff9b9360aa294ffaf160b716fd", "93ec8d541655aa78e9ea982156ec7b468eff2816", "06ce77e4abea63948580340be25d7f2a80369e5a" ], "paperAbstract": "Reconfigurable architectures have gained popularity in recent years as they allow the design of energy-efficient accelerators. Fine-grain fabrics (e.g. FPGAs) have traditionally suffered from performance and power inefficiencies due to bit-level reconfigurable abstractions. Both fine-grain and coarse-grain architectures (e.g. CGRAs) traditionally require low level programming and suffer from long compilation times. We address both challenges with Plasticine, a new spatially reconfigurable architecture designed to efficiently execute applications composed of parallel patterns. Parallel patterns have emerged from recent research on parallel programming as powerful, high-level abstractions that can elegantly capture data locality, memory access patterns, and parallelism across a wide range of dense and sparse applications.\n We motivate Plasticine by first observing key application characteristics captured by parallel patterns that are amenable to hardware acceleration, such as hierarchical parallelism, data locality, memory access patterns, and control flow. Based on these observations, we architect Plasticine as a collection of Pattern Compute Units and Pattern Memory Units. Pattern Compute Units are multi-stage pipelines of reconfigurable SIMD functional units that can efficiently execute nested patterns. Data locality is exploited in Pattern Memory Units using banked scratchpad memories and configurable address decoders. Multiple on-chip address generators and scatter-gather engines make efficient use of DRAM bandwidth by supporting a large number of outstanding memory requests, memory coalescing, and burst mode for dense accesses. Plasticine has an area footprint of 113 mm2 in a 28nm process, and consumes a maximum power of 49 W at a 1 GHz clock. Using a cycle-accurate simulator, we demonstrate that Plasticine provides an improvement of up to 76.9x in performance-per-Watt over a conventional FPGA over a wide range of dense and sparse applications.", "pdfUrls": [ "http://platformlab.stanford.edu/Seminar%20Talks/retreat-2017/Raghu%20Prabhakar.pdf", "http://csl.stanford.edu/~christos/publications/2017.plasticine.isca.pdf", "http://doi.acm.org/10.1145/3079856.3080256" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1fe1033a508caa43dea180f4faa135c57d931752", "sources": [ "DBLP" ], "title": "Plasticine: A reconfigurable architecture for parallel patterns", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "1fe46b7df08ea13e52f86144de765f80e7567927": { "authors": [ { "ids": [ "31274102" ], "name": "Tsung Tai Yeh" }, { "ids": [ "1736081" ], "name": "Amit Sabne" }, { "ids": [ "1693799" ], "name": "Putt Sakdhnagool" }, { "ids": [ "1727592" ], "name": "Rudolf Eigenmann" }, { "ids": [ "32632026" ], "name": "Timothy G. Rogers" } ], "doi": "10.1145/3018743.3018754", "doiUrl": "https://doi.org/10.1145/3018743.3018754", "entities": [ "CUDA", "Central processing unit", "Daemon", "Daemon (computing)", "Graphics processing unit", "Image processing", "Kernel (operating system)", "Operating system", "POSIX Threads", "Pagoda (data structure)", "Parallel computing", "Runtime system", "Scheduling (computing)", "Spawn (computing)", "Speedup", "Thread (computing)", "Throughput" ], "id": "1fe46b7df08ea13e52f86144de765f80e7567927", "inCitations": [ "bb5364418ca7328bee7ddaa62e3a7f21030de920", "0581754e392d4a648f6a7b7665e3561df8627157", "d1fcc29063f09305969a678313ddba7e9f4f6e9c" ], "journalName": "", "journalPages": "221-234", "journalVolume": "", "outCitations": [ "33da45838d0b6c082cc71e603fd802bac4d56713", "c6b5a80cb67dbb084e4f0c6eca99a166dd22dbe7", "4b02f9ff133ed8a4ff80c00ba83a74e167fd86a4", "404b72fbf63ff8f3f15c26a88384a0d4d7bcfcd7", "43f0c099d44a68783a773f91cd03098a5252bf98", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "a87ad38b648cf2759150715f32723ae8a092142b", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "127b35b01f4d1186a0707aed4fdd50eb00ae2ea2", "2e8cceb50163e4c32b6d14a420394ac2a6ef175c", "8d7ab91362fa1319d696a0dc538ca881352bda76", "06cb7c6601b7ee0d89cccd5311dcda9e5316e02d", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "1ec519276b4bfea6cdd05d3092928e8fe8190b35", "3244347baf3bee547145e69c70822db24bed848f", "3c28d5967db86e8f5e4c37d03518967c285a32bf", "559d122ef5c04a872812f8621df8f181e527b8bb", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "cc8786711f75e957fbe81e798db07c2fefce644f", "78eb6fd7921612cd2c7ad16792c2b40c4033a877", "1af73449732360b4942193edd57d750d387a9718", "5bfb81407ab5102ba0369e86ca674eac081a4d0a", "26a40755a75e5a1d5f429061b8a88665548a668b", "03daf2d17337f000538d9d4727fa49d52bdb922c", "d2378cbfe444ca619aaf1de6e6240df5b2667912", "4cc504da30fd273e12f28bc0cf573ff37f829f89", "96b4b72d1098674750c4a406c93efe43e036568b", "064f38e5edef42cb5a37f2a350e4413e17132b11", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "6757659aeba247db2a35691ee3b4c029e1a2dcf4", "16b5fa19661e1e26c1b967104948bf2f031a3612" ], "paperAbstract": "Massively multithreaded GPUs achieve high throughput by running thousands of threads in parallel. To fully utilize the hardware, workloads spawn work to the GPU in bulk by launching large tasks, where each task is a kernel that contains thousands of threads that occupy the entire GPU.\n GPUs face severe underutilization and their performance benefits vanish if the tasks are narrow, i.e., they contain < 500 threads. Latency-sensitive applications in network, signal, and image processing that generate a large number of tasks with relatively small inputs are examples of such limited parallelism.\n This paper presents Pagoda, a runtime system that virtualizes GPU resources, using an OS-like daemon kernel called MasterKernel. Tasks are spawned from the CPU onto Pagoda as they become available, and are scheduled by the MasterKernel at the warp granularity. Experimental results demonstrate that Pagoda achieves a geometric mean speedup of 5.70x over PThreads running on a 20-core CPU, 1.51x over CUDA-HyperQ, and 1.69x over GeMTC, the state-of- the-art runtime GPU task scheduling system.", "pdfUrls": [ "https://engineering.purdue.edu/tgrogers/papers/tsung.tai.ppopp.2017.pdf", "http://dl.acm.org/citation.cfm?id=3018754" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1fe46b7df08ea13e52f86144de765f80e7567927", "sources": [ "DBLP" ], "title": "Pagoda: Fine-Grained GPU Resource Virtualization for Narrow Tasks", "venue": "PPOPP", "year": 2017 }, "1ff2cdb23079207c54a81cdf363af1b166aa1f6d": { "authors": [ { "ids": [ "1825407" ], "name": "Fanny Pascual" }, { "ids": [ "1780581" ], "name": "Krzysztof Rzadca" } ], "doi": "10.1007/978-3-319-64203-1_15", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_15", "entities": [ "Algorithm", "Approximation algorithm", "Baseline (configuration management)", "Best, worst and average case", "Circuit minimization for Boolean functions", "Colocation centre", "Combinatorial optimization", "Data center", "Job shop scheduling", "Makespan", "Multiprocessor scheduling", "NP (complexity)", "Polynomial-time approximation scheme", "Program optimization", "Scheduling (computing)", "Simulation" ], "id": "1ff2cdb23079207c54a81cdf363af1b166aa1f6d", "inCitations": [], "journalName": "", "journalPages": "206-219", "journalVolume": "", "outCitations": [ "be54bc9eb4fdd8f9f5210874e78ec3c071f86a33", "3e257f01e3ee71545d824a1615c35659525b856a", "28517cf1faa64455c7f8814acd3d3d19b5176915", "b8234539e7720153a1e36f0da19d6cf599f60ea8", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "3a043714354fe498752b45e4cf429dbae0fb2558", "1d5e81244451dc58a6e6d4c9d2b8fbff6f55e10b", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "74dedf3bab9d64648b955f3d85ea79a20ca3960b", "33ea3afa698b923e2452a05354e543ecd0d65c5f", "59ab46bfd59cb43876e701389f256b93430e6273", "6f5d96874b919df9e884a165a21859b860f2a5fd", "c037edd22215b89c8d2924d4e3c81eb84fdadec7", "351ad1609d4e0c3f3f27e522893739cba48492ba", "4ed44e18a43877fd53068f984fc683e97ad8b991", "42e9ef6c62e621aac87b1208a7234a162216c03b", "a03cdf8f9b181957071638d6224aa8caa1da8624", "443b8c56d7300f61b825d1dbafe06afdda23c3e1", "26c5818349f8b79ed3b3ba3341c9ff0b14c28d2f", "49864efddfc0cb3cd805d06434befb5642d4ad46", "cbb298a8bf034b0c0c171b6799e6502702491d20", "1eb845e672abc3e172725639eece560c3cd5ec2a", "604aefc37b2f46d1c0b7a391f726d724a9137a5d" ], "paperAbstract": "In data centers, up to dozens of tasks are colocated on a single physical machine. Machines are used more efficiently, but tasks\u2019 performance deteriorates, as colocated tasks compete for shared resources. As tasks are heterogeneous (CPU-, memory, networkor disk-intensive), the resulting performance dependencies are complex. We explore a new combinatorial optimization model that uses two parameters of a task \u2014 its size and its type \u2014 to characterize how a task influences the performance of the other tasks allocated to the same machine. We study the egalitarian optimization goal: maximizing the worst-off performance. This problem generalizes the classic makespan minimization on multiple processors (P ||Cmax). We prove that polynomially-solvable variants of multiprocessor scheduling become NP-hard and hard to approximate when the number of types is not constant. We propose a PTAS and a series of fast approximation algorithms when the number of types is constant. By simulation on instances derived from a trace of one of Google clusters, we show that our algorithms that take into account types lead to lower costs compared with P ||Cmax baseline. The notion of type enables us to model degeneration of performance caused by colocation using standard combinatorial optimization methods. Types add a layer of additional complexity. However, our results \u2014 approximation algorithms and good average-case performance \u2014 show that types can be handled efficiently.", "pdfUrls": [ "https://arxiv.org/pdf/1610.07339v1.pdf", "https://arxiv.org/pdf/1610.07339v2.pdf", "http://arxiv.org/abs/1610.07339", "https://arxiv.org/pdf/1610.07339v3.pdf", "https://doi.org/10.1007/978-3-319-64203-1_15", "http://www-poleia.lip6.fr/~pascualf/doc/europar2017.pdf", "https://export.arxiv.org/pdf/1610.07339" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/1ff2/cdb23079207c54a81cdf363af1b166aa1f6d.pdf", "s2Url": "https://semanticscholar.org/paper/1ff2cdb23079207c54a81cdf363af1b166aa1f6d", "sources": [ "DBLP" ], "title": "Optimizing Egalitarian Performance in the Side-Effects Model of Colocation for Data Center Resource Management", "venue": "Euro-Par", "year": 2017 }, "1ffcea721646e89a8ee821ba9b3c81bca4b984fe": { "authors": [ { "ids": [ "1797601" ], "name": "Weizhong Qiang" }, { "ids": [ "1744732" ], "name": "Yong Cao" }, { "ids": [ "16208520" ], "name": "Weiqi Dai" }, { "ids": [ "2068865" ], "name": "Deqing Zou" }, { "ids": [ "2156156" ], "name": "Hai Jin" }, { "ids": [ "4381434" ], "name": "Benxi Liu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.5", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.5", "entities": [ "Code reuse", "Coexist (image)", "Commodity computing", "Computer data storage", "Entry point", "Hardware virtualization", "Hardware-assisted virtualization", "Interface (Java)", "Library (computing)", "OpenSSL", "Performance Evaluation", "Type system" ], "id": "1ffcea721646e89a8ee821ba9b3c81bca4b984fe", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "34-41", "journalVolume": "", "outCitations": [ "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "02e5b7aa2c920d6cd251e954a3dd314a174164a2", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "1d5ca5dda6526012738276f3e58cd752a30b4652", "0ba9924ac38a425a9484dbc0a50cb71858ce416d", "40d58b4e500ec2342fff127ec544ac137b9ed931", "0e3ea3184e7d9966c574d119768bbf3ea396d8c1", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "d937dd67265c2ac0ba5ffa8022323d37c2344188", "565919855788bfcc7fbaad3006fe0f42c735b333", "30f52a79ff53f8969ffcba19013b4a43e629875f", "ca763f5f4f65711d0b0147137de4a92b2bfd0fb2", "0e039df712774fcea67f214d9b5780c1dc250747" ], "paperAbstract": "A typical application normally includes the main program and some shared libraries. The main program can arbitrarily execute the code of those shared libraries due to the coexistence in the same memory space. This feature can be used by attackers to carry out code-reuse attacks. Meanwhile, the shared libraries are shared across multiple applications, which can help attackers to simplify the process of code-reuse attacks. Isolating shared library into a separate execution environment and restricting the access to this library is a promising countermeasure, while the existing isolation approaches need to either modify the main program, or break the shared feature of the library. In this paper, we present Libsec, an efficient and transparent approach to isolate shared libraries, without the need of source code of the main program or shared libraries. Libsec adopts commodity hardware virtualization extension to isolate shared libraries from the main program. Meanwhile, Libsec relies on static instrumentation and dynamic processing to provide interfaces for legitimate cross-domain invocations. By this way, Libsec can guarantee that the main program and shared libraries are executed in the corresponding execution environment respectively, while cross-domain invocation is only allowed via specific interfaces, thus preventing the main program from jumping directly to the shared library. We implement a prototype of Libsec in KVM. To demonstrate its effectiveness, we deploy it to some real-world applications and libraries, such as Nginx and OpenSSL libraries. Security evaluation shows that Libsec can prevent the attacker from utilizing the functions or instruction sequences of the shared library for code-reuse attack. Performance evaluation shows that the performance and space overhead caused by Libsec are appropriate.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.5" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/1ffcea721646e89a8ee821ba9b3c81bca4b984fe", "sources": [ "DBLP" ], "title": "Libsec: A Hardware Virtualization-Based Isolation for Shared Library", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "2010e5ce157eede9642da725b7f1d55da34b19a9": { "authors": [ { "ids": [ "15334167" ], "name": "Robert Bill" }, { "ids": [ "37942063" ], "name": "Patrick Neubauer" }, { "ids": [ "1690158" ], "name": "Manuel Wimmer" } ], "doi": "10.1145/3136014.3136037", "doiUrl": "https://doi.org/10.1145/3136014.3136037", "entities": [ "Aspect-oriented programming", "Aspect-oriented software development", "Cognitive dimensions of notations", "Interdependence", "Introspection", "Open-source software", "Security through obscurity", "Simultaneous editing", "Software repository" ], "id": "2010e5ce157eede9642da725b7f1d55da34b19a9", "inCitations": [], "journalName": "", "journalPages": "67-78", "journalVolume": "", "outCitations": [ "0539d2d8d2515091e9a6a2b23fd137d905285805", "167366978d3923de5b7cc0a4f41deec01c6473c0", "2cb3af0ac1ac84ee60183fa2a9f9b64db7dab308", "7fba36f6dd5daec3f69de12345fef20f90486a7c", "6cd8bb7192347eb7defaef456d9a81c05f6e8037", "76a0257d5fc6026dc15cc71b8ce97e7f6a618b6c", "20b8f47e76a2866d8c223cf66432edb8d4332c9b", "1b50986f1787ea334d64c5f848e1096b6b154df0", "3a69b2518b5c639d8035fb2ce8dc1f442cbcaa44", "265e121e7de432dcbeb281418137236d1950b7c3", "67d1a1de663a5c52e694cc6eda5601bbe84e3787", "c54d93d3973972f610d96ad668bb8f4354b54508", "84f1c273b50487bbbb6f1c02d8a1b62f9f92d278", "ff291488ac21515b6fd0ee36f6cb555133a4a6ac", "21866c792bbbda991b19cfafef0375ac37018663", "be6142b34ead5f1187fb744fcbcbde4763bde5c5", "b547f8e9b17121f93b98c034322609fbaa4a2082", "3278529b50498a0dac812e39e7cdc22e109c9094", "4ac6fae400b55a4651a3576af8d2eec8a278f4d6", "ce98ab59237de545b5486810b6870da84bf63ed1", "8cb147d7ebd97efc27448d5bd0c44268a87bfabd", "0414d0a85097dee507e9a3f18f3747363b94a16b", "1f9c422ac5f4cae59f3c6959b62f45c213d05666", "1eabcc0f1feb32a28480abcb64cdc89af149ea7a", "bea4ba6ab20de9b8332eb87836d5f90dfe75017f", "b7dbfb1a8b9c4f36f1c947227150766f6a4fbc3f", "5383f464d555acb5c2df2777d197ec7681a60bfd" ], "paperAbstract": "The maintenance of modern systems often requires developers to perform complex and error-prone cognitive tasks, which are caused by the obscurity, redundancy, and irrelevancy of code, distracting from essential maintenance tasks. Typical maintenance scenarios include multiple branches of code in repositories, which involves dealing with branch-interdependent changes, and aspects in aspect-oriented development, which requires in-depth knowledge of behavior-interdependent changes. Thus, merging branched files as well as validating the behavior of statically composed code requires developers to conduct exhaustive individual introspection. \n In this work we present VirtualEdit for associative, commutative, and invertible model composition. It allows simultaneous editing of multiple model versions or variants through dynamically derived virtual models. We implemented the approach in terms of an open-source framework that enables multi-version editing and aspect-orientation by selectively focusing on specific parts of code, which are significant for a particular engineering task. \n The VirtualEdit framework is evaluated based on its application to the most popular publicly available Xtext-based languages. Our results indicate that VirtualEdit can be applied to existing languages with reasonably low effort.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136037", "https://publik.tuwien.ac.at/files/publik_261415.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2010e5ce157eede9642da725b7f1d55da34b19a9", "sources": [ "DBLP" ], "title": "Virtual textual model composition for supporting versioning and aspect-orientation", "venue": "SLE", "year": 2017 }, "201f43d5b419070dc25cb173793a8227a075d32a": { "authors": [ { "ids": [ "3092404" ], "name": "Yupeng Zhang" }, { "ids": [ "2062558" ], "name": "Daniel Genkin" }, { "ids": [ "2620997" ], "name": "Jonathan Katz" }, { "ids": [ "1714848" ], "name": "Dimitrios Papadopoulos" }, { "ids": [ "1790969" ], "name": "Charalampos Papamanthou" } ], "doi": "10.1109/SP.2017.43", "doiUrl": "https://doi.org/10.1109/SP.2017.43", "entities": [ "Benchmark (computing)", "Bulldozer (microarchitecture)", "Cloud database", "Column (database)", "Computation", "Correctness (computer science)", "Cryptographic protocol", "Cryptography", "Database", "Deterministic automaton", "Formal verification", "High-level programming language", "IBM Tivoli Storage Productivity Center", "Interactive proof system", "Microsoft SQL Server", "Outsourcing", "Polylogarithmic function", "Polynomial", "Preprocessor", "Relational database management system", "SQL", "Server (computing)", "Snark (graph theory)", "Verification and validation" ], "id": "201f43d5b419070dc25cb173793a8227a075d32a", "inCitations": [ "2f7b4ee46d284664fd1a4a679d1e610e2954ca8b", "1d1fc81989ed1d26ba4cf6c3025ffaa740023a5d", "0a85b3afc89958583642b7fd39b37e745a053190", "0f93693986c2b796d6ce594e36a6efd1dea0b616", "6b07b2e62d0e79e41eeb28f7fb9a2381319e8349" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "863-880", "journalVolume": "", "outCitations": [ "3591593e51716f7b470f11315fc4e4f4bf3169dc", "b6af621db98a3440cdf8745eec873a1bc7e349bc", "1a33c542b064f95eb6fc9b0003e80ff4f1b9289a", "084c67fab5b80114dd8fd223149b2c64fe473cc0", "5d1a57dc5b7536011ed6859cf9d811b9209680e2", "c53734d29e8836f635db1ce814022aec6be721a5", "2a266546c2609f079529688de7acbe0213f47373", "242654ca9c67c28dd4dcc5ff4ec263cf9cb39469", "584b25a80ea5455c5ef628e42efe6255cdcdd719", "1475c9eb499df6fbf9c318bad974ba8ef34fbe87", "0b491803fa7697b2712d8a8c3c766ac39f8764cf", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "07c746c119b1d18e6580840b2166721e07b4433d", "13c479acb6a0703d5577e89b3f08677e9cff017f", "732ae647aa75acd7b7349679a4746c0539370122", "1d9cf87fa6d6175a2c1543afff263113657765f6", "16d9372e86d6e35fdc84dddcf78f6474ac950e3d", "1bc3d4083b88ead61cec36734453460fa1253dd1", "66f2bc0a618d85f0af0a32699c99f0a02b0053a1", "32bd62191f501753b8307bef23758adb50d95627", "147f67a9aee83b9269ae0898d53606d51c16997d", "1cbe6c3e884e34807a870212e8584f8c3e550a3f", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "3d48f3bc44b34a4d5977cebef014ae126582515b", "101bb77cb2c8b2bfcde41973ac3473db325d7e6d", "887eae50cef6fd228c59d47b80887b35782a9a2a", "02a356366cc651e69df7c442deff9dcaa3cc8a5f", "4c3a78661fd920b4116afd0ad88247bbd00160ce", "7bda4012af62cd5d4a08da576b98de82be6587f0", "458dd2c5f46457184788675a23cd5f5e7f469125", "b3ffedd7c9473646fe94d8a14c92bd6554fe21bb", "1a59302e8c3e39c6fb7249af7346c2d5158e03e1", "53b67d4f8584decf5f86fc4ec9f0eee893cf6cbc", "1bab56c8ad874a900d385e9a94e31ca95b7191bf", "2aaeedb5e6988a2d3b268f3e16b369252edc8259", "01e83a7ff59354527a01f440129e71aadbe2eade", "b6c15c2eff5cad736c900ebf824be76460041c43", "29543bb7c680dde79f374e73930ca68833e2fb37", "2e2ba3ed517ab712b1c3ac399c167f72ad23ba04", "30c7878d8d0518d970c6a90375065e959d057fef", "0d67186a93678be3117f1cf760047f7d2019113a", "0afae9e394d29aa4f678514e711a43f769fc4f35", "509e797ae53674697ecaab51702ab1c6400396bb", "42d264105888ea0c8492db9f053988f519d187b9", "5a10c188802c8d115167838cfc875236943217f2", "35f2ee0596493100b94325632265116faaeea741", "c44a3fab266523844ef7dad435898207a6e1f981", "977d014244451a0182d5ca915dd0748f269321de", "9e38f65689de68019fb8a1fc4ffb00f7caac0dd4" ], "paperAbstract": "Cloud database systems such as Amazon RDS or Google Cloud SQLenable the outsourcing of a large database to a server who then responds to SQL queries. A natural problem here is to efficiently verify the correctness of responses returned by the (untrusted) server. In this paper we present vSQL, a novel cryptographic protocol for publicly verifiable SQL queries on dynamic databases. At a high level, our construction relies on two extensions of the CMT interactive-proof protocol [Cormode et al., 2012]: (i) supporting outsourced input via the use of a polynomial-delegation protocol with succinct proofs, and (ii) supporting auxiliary input (i.e., non-deterministic computation) efficiently. Compared to previous verifiable-computation systems based on interactive proofs, our construction has verification cost polylogarithmic in the auxiliary input (which for SQL queries can be as large as the database) rather than linear. In order to evaluate the performance and expressiveness of our scheme, we tested it on SQL queries based on the TPC-H benchmark on a database with 6 million rows and 13 columns. The server overhead in our scheme (which is typically the main bottleneck) is up to 120 times lower than previousapproaches based on succinct arguments of knowledge (SNARKs), and moreover we avoid the need for query-dependent pre-processing which is required by optimized SNARK-based schemes. In our construction, the server/client time and the communication cost are comparable to, and sometimessmaller than, those of existing customized solutions which only support specific queries.", "pdfUrls": [ "https://eprint.iacr.org/2017/1145.pdf", "https://doi.org/10.1109/SP.2017.43", "https://www.cis.upenn.edu/~danielg3/papers/vsql.pdf", "http://eprint.iacr.org/2017/1145", "http://www.ece.umd.edu/~cpap/published/vSQl-oakland-17.pdf", "https://obj.umiacs.umd.edu/papers_for_stories/vSQL_Zhang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/201f43d5b419070dc25cb173793a8227a075d32a", "sources": [ "DBLP" ], "title": "vSQL: Verifying Arbitrary SQL Queries over Dynamic Outsourced Databases", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "2026334918c7290588b232ceedc7dc416ae14d7b": { "authors": [ { "ids": [ "28877895" ], "name": "Emiliano Silvestri" }, { "ids": [ "7949965" ], "name": "Simone Economo" }, { "ids": [ "1886962" ], "name": "Pierangelo di Sanzo" }, { "ids": [ "39222438" ], "name": "Alessandro Pellegrini" }, { "ids": [ "1714807" ], "name": "Francesco Quaglia" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Care-of address", "IBM Tivoli Storage Productivity Center", "In-memory database", "Open-source software", "Operating system", "Programming paradigm", "Relational database management system", "Software transactional memory", "Transactional interpretation", "Transactional memory" ], "id": "2026334918c7290588b232ceedc7dc416ae14d7b", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "294-303", "journalVolume": "", "outCitations": [ "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "c8160b577871640613d6d3b2468fd1f05f2c82fb", "2bd3b5532712052d85b0207bd42795e0f31bdfae", "1d3ff17123bf148eab7a44bd6bebac358152781c", "2ae2684f120dab4c319e30d33b33e7adf384810a", "32111af17c0fa328f3afba5c436125a61026e1ea", "250244b7c65435a8aca822ccf072096dd75ea8c9", "d77063325129544c41b1422918c28e89e4fd5c10", "28b56e20e43f73e5291c4b0a76137ffcd8456fed", "d0aed743c956fc8da98bf958d9a101f94495334a", "6a5d6dc6dbc413a95f56fb97dee732659bb30e38", "9d1b33363d49540177be2efdbd61667f937a1d12", "76057a3c7b489290afd4a4dccf09b623502619fd", "57cf29529977cc5407497aba2f9032e01a12c1a9", "1a16975d1630756772b7d16e220236fe9a2830d3", "09ed565e84057123c15ab12b885c235d1f241aed", "44ea4f25e502f10246666634a16fd06091efe8fb", "ce48a652ef299c9c25a1fd4f7f0e8622473d2e92" ], "paperAbstract": "In state-of-the-art Software Transactional Memory (STM) systems, threads carry out the execution of transactions as non-interruptible tasks. Hence, a thread can react to the injection of a higher priority transactional task and take care of its processing only at the end of the currently executed transaction. In this article we pursue a paradigm shift where the execution of an in-memory transaction is carried out as a preemptable task, so that a thread can start processing a higher priority transactional task before finalizing its current transaction. We achieve this goal in an application-transparent manner, by only relying on Operating System facilities we include in our preemptive STM architecture. With our approach we are able to re-evaluate CPU assignment across transactions along a same thread every few tens of microseconds. This is mandatory for an effective priority-aware architecture given the typically finer-grain nature of in-memory transactions compared to their counterpart in database systems. We integrated our preemptive STM architecture with the TinySTM package, and released it as open source. We also provide the results of an experimental assessment of our proposal based on running a port of the TPC-C benchmark to the STM environment.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101154", "http://www.dis.uniroma1.it/~bibdis/index.php?Itemid=34&gid=112&option=com_docman&task=doc_download", "http://www.dis.uniroma1.it/~bibdis/index2.php?Itemid=34&gid=112&option=com_docman&task=doc_view", "http://wwwold.dis.uniroma1.it/~bibdis/index.php?Itemid=34&gid=112&option=com_docman&task=doc_download" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2026334918c7290588b232ceedc7dc416ae14d7b", "sources": [ "DBLP" ], "title": "Preemptive Software Transactional Memory", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "204ed869f69468d2c88ff64f67300d810f686c1a": { "authors": [ { "ids": [ "35302900" ], "name": "Dineshkumar Rajagopal" }, { "ids": [ "2858300" ], "name": "Daniele Tafani" }, { "ids": [ "3262245" ], "name": "Yiannis Georgiou" }, { "ids": [ "2247963" ], "name": "David Glesser" }, { "ids": [ "39096075" ], "name": "Michael Ott" } ], "doi": "10.1109/HiPC.2017.00025", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00025", "entities": [ "Algorithm", "Centralisation", "Experiment", "IP fragmentation", "Job scheduler", "Jumpstart Our Business Startups Act", "Open-source software", "Power supply", "Scheduling (computing)", "Slurm", "Synthetic data" ], "id": "204ed869f69468d2c88ff64f67300d810f686c1a", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "142-151", "journalVolume": "", "outCitations": [ "4dbc1467275f8a3152bab09b92fb42072cbbab23", "e61e70a4c6b79dcfdc5b1016952ca1e748285af1", "e0140bbf28f231ccddb639b299ba515f1f5f0e15", "62f7e49f77ca13b7690ce0106235063f6b0771d1", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "a9a6c4d79b8831f6d929ab6e31d681cc4b6982ee", "9404ac894cb14fede8846ce462b47a9e71afbc61", "22928113b4f63c326811baf36eea8392edddbb79", "0a2c06d968654aee6abf688b35f0f0191b9a6519", "8755fd59b74028b0bd45e9b5c355c64c0c70af04", "f103c1775462f4409ae15818cfa0a761e282d324", "4c6869e2f11121f23ce66439ea1aa7bbb95036ad", "061172417060dfe1ca5318cf30c65ce5ef1f2819", "30a12c7986d7c0de90d14557f80e46b38834ff76", "02d3d91f16330740cfb104f61f9aaf5a5dd6a69e", "bea77fd8eedb63f239dc01b907e717d2f43d1709", "693af15ad862fa302a37cd45a23cade64ed1aa36", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "81c4e99059104b00adc14f6797758aff998c066d", "f6ab527a5919b48b66908954a3086947c5bffde6", "1e8233a8c8271c3278f1b84bed368145c0034a35", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "4f86cc14eb05db64d8b037833c0b416ea1b138ee", "a7557a9fdd1572fd913fd0ed0fa927f220198942" ], "paperAbstract": "The ever-increasing energy demands of modern High Performance Computing (HPC) platforms is undeniably one of the most critical aspects for the future design and evolution of such systems. The capability of managing their energy consumption not only allows for significant reduction in electricity costs but is also a step forward on the road towards the exascale. Powercapping is a widely studied technique that contributes to address this challenge by instantaneously setting and maintaining a predefined power threshold (power cap) that cannot be exceeded. However, the lack of a centralized mechanism responsible for efficiently allocating the available power among resources and jobs may ultimately yield to fragmentation, low system utilization and increased user waiting times. Additionally, power cap violations can lead to high risk scenarios and/or increase operational costs. This paper proposes to prevent such issues with the introduction of the Enhanced Power Adaptive Scheduling (E-PAS) algorithm. The E-PAS algorithm combines scheduling and resource management mechanisms, correlating estimated and real power consumption data in order to optimize the resource utilization of the platform under a predefined power cap. The algorithm has been implemented in the widely used open-source resource and job management system SLURM and is planned to be pushed in a future mainstream version. Its effectiveness has been evaluated through real-scale experiments respectively on an ARM- and an Intel-based cluster of comparable size. All experiments have been performed using synthetic workloads from a set of mini-applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00025" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/204ed869f69468d2c88ff64f67300d810f686c1a", "sources": [ "DBLP" ], "title": "A Novel Approach for Job Scheduling Optimizations Under Power Cap for ARM and Intel HPC Systems", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "20614c320f0dca6144a3934c465d42f451c972f6": { "authors": [ { "ids": [ "1686211" ], "name": "Ming Liu" }, { "ids": [ "37086638" ], "name": "Liang Luo" }, { "ids": [ "2240839" ], "name": "Jacob Nelson" }, { "ids": [ "1717411" ], "name": "Luis Ceze" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" }, { "ids": [ "14826944" ], "name": "Kishore Atreya" } ], "doi": "10.1145/3037697.3037731", "doiUrl": "https://doi.org/10.1145/3037697.3037731", "entities": [ "Attribute\u2013value pair", "Byte", "Cache (computing)", "Computation", "Data center", "Emergence", "Fat tree", "Key-value database", "Low-power broadcasting", "Middlebox", "Multipath propagation", "Network congestion", "Network switch", "Networking hardware", "On the fly", "Power supply", "Throughput" ], "id": "20614c320f0dca6144a3934c465d42f451c972f6", "inCitations": [ "4e595957047360ce23310150566f228d6fa4507e", "54272876afd773cf27c58c95d95fe31fe2eebeaa", "6b6a5f2127b5ffbccd54d4823a9ca3a73969f3d1", "726c2e6b8d7f97d9a3256fc08d17f6fe99cc1a7b", "51c78913dd6acb4c5667c71e188f1ddb3033b85c" ], "journalName": "", "journalPages": "795-809", "journalVolume": "", "outCitations": [ "596e57e4ad70c8856391edd3bda70be46bb075ed", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "30e94e24d67994c5a8e2f20f852a51d28a720de2", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "12a6890e863d2a2c628261974dde5264edc3b922", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "640af017aa8d11f9f31480155c8d5d1a0d8865d7", "2ce49170835370478fb07ee98b01dcbad6fca4c5", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "225603198cc415d363db8a8a2bd30b0df3c963b1", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "058f6752d85a517aae298586fdf117acdd7560ea", "2d9d598c580f95889d24cbfc4a725d49dddfb0b9", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "0d3f85933b6355789588476e491683532c68a906", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "9b67de50e9eb9a6087e1aebc2733166c96d67685", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "1376bd56c64639af4645625fd9755c83b2bf7cda", "594539939eba83d4b21f6acc1414dc39f2f4f85d", "35a8bd9f56806f203b7fa47831bb3dde174a06e7", "09c5293b647fca40fde28ac6c38737f07e873e41", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "25f855c968af75e4617f25c71aee3cedec1dedaf", "332f77fd05703c1607e3b57884ad31fb1fad0104", "24c6e70c583daed1852637ec42d4589556ac59d3", "091dc35df0ed7788d987245c8758d58f0d57c755", "043afbd936c95d0e33c4a391365893bd4102f1a7", "03416be8097852a54dd3e309434e5a0806824646", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9" ], "paperAbstract": "The emergence of programmable network devices and the increasing data traffic of datacenters motivate the idea of in-network computation. By offloading compute operations onto intermediate networking devices (e.g., switches, network accelerators, middleboxes), one can (1) serve network requests on the fly with low latency; (2) reduce datacenter traffic and mitigate network congestion; and (3) save energy by running servers in a low-power mode. However, since (1) existing switch technology doesn't provide general computing capabilities, and (2) commodity datacenter networks are complex (e.g., hierarchical fat-tree topologies, multipath communication), enabling in-network computation inside a datacenter is challenging.\n In this paper, as a step towards in-network computing, we present IncBricks, an in-network caching fabric with basic computing primitives. IncBricks is a hardware-software co-designed system that supports caching in the network using a programmable network middlebox. As a key-value store accelerator, our prototype lowers request latency by over 30% and doubles throughput for 1024 byte values in a common cluster configuration. Our results demonstrate the effectiveness of in-network computing and that efficient datacenter network request processing is possible if we carefully split the computation across the different programmable computing elements in a datacenter, including programmable switches, network accelerators, and end hosts.", "pdfUrls": [ "https://ready.cs.washington.edu/~luisceze/publications/incbricks-asplos17.pdf", "http://homes.cs.washington.edu/~arvind/papers/incbricks.pdf", "http://doi.acm.org/10.1145/3037697.3037731" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20614c320f0dca6144a3934c465d42f451c972f6", "sources": [ "DBLP" ], "title": "IncBricks: Toward In-Network Computation with an In-Network Cache", "venue": "ASPLOS", "year": 2017 }, "20640acac8eb43a616f523ab652eedb4112107b8": { "authors": [ { "ids": [ "38393834" ], "name": "Carlos Vega" }, { "ids": [ "38607554" ], "name": "Jose Fernando Zazo" }, { "ids": [ "2932785" ], "name": "Hugo Meyer" }, { "ids": [ "1777868" ], "name": "Ferad Zyulkyarov" }, { "ids": [ "1765068" ], "name": "Sergio L\u00f3pez-Buedo" }, { "ids": [ "1714970" ], "name": "Javier Aracil" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.45", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.45", "entities": [ "Best, worst and average case", "Cost efficiency", "Data center", "High availability", "Network analysis (electrical circuits)", "Overhead (computing)", "Parallel computing", "Provisioning", "Scalability", "Server (computing)" ], "id": "20640acac8eb43a616f523ab652eedb4112107b8", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "340-347", "journalVolume": "", "outCitations": [ "53e82a56152612a11259b082c69583b7c8569c51", "21a0c328f428a1d4694246ed6c44ed472b74133a", "2e2f4a8f5a92a5bd5b21afe5200d31da2fb90a70", "44ff41c2076e6de583082892c1be8571707f60bd", "401d9e74491dae0d335fb2cd7684344398b3a7e8", "7e63611df04cfa8a5160edf8276ea63681eb9fb1", "298c343b898c6602cd0786ff84361d6c2e891e31", "d1fb58c07788ab8e2db198d357c5f708a94baadf", "5c852e0a3a1ca7beae37e120818c2b09bd68847b", "028378b395dc2a11e8ccc3d994df228340fd9697", "89d87c93b4c68b77743df6a0b679905343caf89d", "20093f8513154d469653dde042952172cb9ef822", "b7d4721fd8346e4015114a9f73057234cb9e351b", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "1b00a623815ce80959b589c97838e35d12fd91ea", "0452dec33d21c759b49a7545e0b7848237df5f66", "84f25ddd053e414f239b91552410dab0adbaedad", "7bc4cccfae6345a10fcf7e2f3d654f8916f16a06", "d5d88f9154c43adea2d7dc9ba74d78a4a4754b0c" ], "paperAbstract": "Traditional data centers are designed with a rigid architecture of fit-for-purpose servers that provision resources beyond the average workload in order to deal with occasional peaks of data. Heterogeneous data centers are pushing towards more cost-efficient architectures with better resource provisioning. In this paper we study the feasibility of using disaggregated architectures for intensive data applications, in contrast to the monolithic approach of server-oriented architectures. Particularly, we have tested a proactive network analysis system in which the workload demands are highly variable. In the context of the dReDBox disaggregated architecture, the results show that the overhead caused by using remote memory resources is significant, between 66% and 80%, but we have also observed that the memory usage is one order of magnitude higher for the stress case with respect to average workloads. Therefore, dimensioning memory for the worst case in conventional systems will result in a notable waste of resources. Finally, we found that, for the selected use case, parallelism is limited by memory. Therefore, using a disaggregated architecture will allow for increased parallelism, which, at the same time, will mitigate the overhead caused by remote memory.", "pdfUrls": [ "https://arxiv.org/pdf/1709.06127v1.pdf", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.45", "http://arxiv.org/abs/1709.06127", "http://arantxa.ii.uam.es/~jaracil/papers/hpcc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20640acac8eb43a616f523ab652eedb4112107b8", "sources": [ "DBLP" ], "title": "Diluting the Scalability Boundaries: Exploring the Use of Disaggregated Architectures for High-Level Network Data Analysis", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "207dfadf2425436de23305575b2c0cd23bad6a07": { "authors": [ { "ids": [ "2201537" ], "name": "Chui-Hui Chiu" }, { "ids": [ "11009181" ], "name": "Dipak Kumar Singh" }, { "ids": [ "34660837" ], "name": "Qingyang Wang" }, { "ids": [ "1899648" ], "name": "Kisung Lee" }, { "ids": [ "2794114" ], "name": "Seung-Jong Park" } ], "doi": "10.1109/CLOUD.2017.36", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.36", "entities": [ "Cloud storage", "Computational complexity theory", "Data center", "Deployment environment", "Limiter", "Network switch", "OpenFlow", "Operating system", "Rate limiting", "Requirement", "Routing", "Scheduling (computing)", "Software deployment", "Software-defined networking", "Storage area network" ], "id": "207dfadf2425436de23305575b2c0cd23bad6a07", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "222-229", "journalVolume": "", "outCitations": [ "35ae9aa1593c8ee5f03ffc0efcc710c7cdb63367", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "05b37f4722eddf503ca0d237aaeba14eb9ab9ea3", "1156f60e40548096df49528b1342bb3e88b0f378", "1cafaac11664e48bd121695ac1be06b0930d00a5", "0541d5338adc48276b3b8cd3a141d799e2d40150", "6ae990f7789715d4066b217fb95cd83fdd1b06ba", "8f6d44c87c7f5826c773312eb608a77878b0f1dc", "231ba17921ebd80e95771e28dfb5082e169d5a53", "0156817d29acdb78b193e9dbbe7d41983df511ad", "2e0057911766d411b7a342c8bae2d6e3d29c47cd", "44e8a3dc6bbcccaaee864e309b1d72c571874687", "663e064469ad91e6bda345d216504b4c868f537b" ], "paperAbstract": "Researches affirm that coflow scheduling/routing substantially shortens the average application inner communication time in data center networks(DCNs). The commonly desirable critical features of existing coflow scheduling/routing framework includes (1) coflow scheduling, (2) coflow routing, and (3) per-flow rate-limiting. However, to provide the 3 features, existing frameworks require customized computing frameworks, customized operating systems, or specific external commercial monitoring frameworks on software-defined networking(SDN) switches. These requirements defer or even prohibit the deployment of coflow scheduling/routing in production DCNs. In this paper, we design a coflow scheduling and routing framework, MinCOF which has minimal requirements on hosts and switches for cloud storage area networks(SANs) based on OpenFlow SDN. MinCOF accommodates all critical features of coflow scheduling/routing from previous works. The deployability in production environment is especially taken into consideration. The OpenFlow architecture is capable of processing the traffic load in a cloud SAN. Not necessary requirements for hosts from existing frameworks are migrated to the mature commodity OpenFlow 1.3 Switch and our coflow scheduler. Transfer applications on hosts only need slight enhancements on their existing connection establishment and progress reporting functions. Evaluations reveal that MinCOF decreases the average coflow completion time (CCT) by 12.94% compared to the latest OpenFlow-based coflow scheduling and routing framework.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.36", "http://csc.lsu.edu/~qywang/papers/MinCOF-CLOUD2017-CameraReady.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/207dfadf2425436de23305575b2c0cd23bad6a07", "sources": [ "DBLP" ], "title": "Minimal Coflow Routing and Scheduling in OpenFlow-Based Cloud Storage Area Networks", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "20948adf578a043667d6d6b2d51bd3954624f1e9": { "authors": [ { "ids": [ "1944074" ], "name": "Markus Lumpe" }, { "ids": [ "1750430" ], "name": "Mohan Baruwal Chhetri" }, { "ids": [ "2155028" ], "name": "Quoc Bao Vo" }, { "ids": [ "1722005" ], "name": "Ryszard Kowalczyk" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Coefficient", "F-Spot", "Strategic management", "Theil index", "Time complexity" ], "id": "20948adf578a043667d6d6b2d51bd3954624f1e9", "inCitations": [ "a5eee46eb54322912686dfc2fe2fae94b955e12e", "cd45e518b04b076b31d2762dac683898abf8e1d1" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "391-400", "journalVolume": "", "outCitations": [ "4f86fa28602d9503a8575c5b31082284abc8415c", "70e38d47b83261e257bae61dc39ffbf391b30591", "1da8852aa591d82f6dab3d93c8aba923e69a45d4", "db260995977209749a53297904091d05f0bcf289", "530b3179e8532e87520ccd0daebda3d81ef6319b", "5629ffd0234eee692e8283ecba82045cd8788065", "3a33424cd2ad63cc056a2d9a06b8794d78ba5214", "308589115917390b67a7781a985c6b972549f991", "4e44046bfb459c5f627ef141786773e2c4591de4", "4f739534a366799e170599d3ff3d65597f0118db", "42b9c18bba3ca3152099702d85ff30dd3cbab21e", "d608a95490b02839fdf71a412aab46ad20a70596", "05be0db01d70bcce9530b462ab2368f9e15127d9", "3d90fde9ced995e1ad3ffb9de26e3b45e90ad1fa" ], "paperAbstract": "Consumers can realize significant cost savings by procuring resources from computational spot markets such as Amazon Elastic Compute Cloud (EC2) Spot Instances. They can take advantage of the price differentials across time slots, regions, and instance types to minimize the total cost of running their applications on the cloud. However, Spot markets are inherently volatile and dynamic, as a consequence of which Spot prices change continuously. As such, prospective bidders can benefit from intelligent insights into the Spot market dynamics that can help them make more informed bidding decisions. To enable this, we propose a descriptive statistics approach for the analysis of Amazon EC2 Spot markets to detect typical pricing patterns including the presence of seasonal components, extremes and trends. We use three statistical measures - the Gini coefficient, the Theil index, and the exponential weighted moving average. We also devise a model for estimating minimum bids such that the Spot instances will run for specified durations with a probability greater than a set value based on different look back periods. Experimental results show that our estimation yields on average a bidding strategy that can reliably secure an instance at least 80% of the time at minimum target guarantee between 50% and 95%.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101166" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20948adf578a043667d6d6b2d51bd3954624f1e9", "sources": [ "DBLP" ], "title": "On Estimating Minimum Bids for Amazon EC2 Spot Instances", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "20a108587321823ca9cdd93ac84fc316a0400630": { "authors": [ { "ids": [ "2042885" ], "name": "Ram Kesavan" }, { "ids": [ "39454579" ], "name": "Rohit Singh" }, { "ids": [ "9762022" ], "name": "Travis Grusecki" }, { "ids": [ "40604671" ], "name": "Yuvraj Patel" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Central processing unit", "Copy-on-write", "Data structure", "Snapshot (computer storage)" ], "id": "20a108587321823ca9cdd93ac84fc316a0400630", "inCitations": [ "0fd85ea4f3701f6baebffcaab39b858b7142b0dc", "8ee82c0bd80e86c55b56414a602d53164d4fb5c0", "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d", "8d555af4ad0bcb45ac5ce62374fbd23ea429121f", "ad897b9261a39cdae6e8b0fdcd755e6001e004bc", "556f01b6764f866d7bd4a2d955115ca72bd3413f" ], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "12a0046a1197ae63c3d616c74e367dc583cef196", "7062268b78dff4a8819fe3f1e89c6b5344f715a5", "06bd4d2d21624c7713d7f10ccb7df61bf6b9ee71", "3d1abb7432c9e52758f1bccc5b1e2e60dfbfe91f", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "c9ef82a4ad0b1b33296cea86fb2ec7558cf798fb", "556f01b6764f866d7bd4a2d955115ca72bd3413f", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "740b2ff66ea305ffc5369ecee4498941c39efaab", "4ef1fcc896885d383442b2aff92c2109cd0da9be", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "088e3e939ad234b6fdd0e321290fb26937dc2553" ], "paperAbstract": "NetApp\u00aeWAFL\u00aeis a transactional file system that uses the copy-on-write mechanism to support fast write performance and efficient snapshot creation. However, copy-on-write increases the demand on the file system to find free blocks quickly; failure to do so may impede allocations for incoming writes. Efficiency is also important, because the task may consume CPU and other resources. In this paper, we describe the evolution (over more than a decade) of WAFL\u2019s algorithms and data structures for reclaiming space with minimal impact on the overall storage appliance performance.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/kesavan", "http://www.usenix.org./system/files/conference/fast17/fast17-kesavan.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-kesavan.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/20a1/08587321823ca9cdd93ac84fc316a0400630.pdf", "s2Url": "https://semanticscholar.org/paper/20a108587321823ca9cdd93ac84fc316a0400630", "sources": [ "DBLP" ], "title": "Algorithms and Data Structures for Efficient Free Space Reclamation in WAFL", "venue": "FAST", "year": 2017 }, "20a71681be001a27b35389d028e0edd97007d70f": { "authors": [ { "ids": [ "33396960" ], "name": "Ted Kaminski" }, { "ids": [ "39433275" ], "name": "Lucas Kramer" }, { "ids": [ "26946268" ], "name": "Travis Carlson" }, { "ids": [ "2777290" ], "name": "Eric Van Wyk" } ], "doi": "10.1145/3138224", "doiUrl": "https://doi.org/10.1145/3138224", "entities": [ "Attribute grammar", "C11 (C standard revision)", "Code refactoring", "Composability", "Extensible programming", "Operator overloading", "Parse tree", "Programmer", "Programming language", "Termination analysis" ], "id": "20a71681be001a27b35389d028e0edd97007d70f", "inCitations": [ "448acb8fbfff8c9b1daa48c177797ccac5f51068" ], "journalName": "PACMPL", "journalPages": "98:1-98:29", "journalVolume": "1", "outCitations": [ "4938f22c570cefc70c10bef53f4c4b8301d7eb73", "3b642005c78aca2d811619f85b85a81d1c68c834", "035da7cc36b2071cd07e093151be743f56c97c34", "81ca7ac75b1ccdee0bd8a9e93b7d79bdc17a31a9", "d4cb976755ae225a736444b5e242e4285dce1fb3", "403e17c145103919a9d8248d6bbdd8ef3516777f", "759da1f189a542b38dbb1e751b6bc485fe01b82f", "a11f81fc27df87459f2533ad354184de9d04046c", "ee6fb0de50a618d5f0347732d2a0a7751f9a4473", "0b6ecc7aa29ec0c7439e9000cb42ba7c59a3e3a3", "50ad80a30fd476c58488f9a5bbb91147a9650074", "7692a19dd425903781e9ac29cbb715f23b3a9a80", "8c0f865cba4d4e298c623252dad8ac9cd77e50b1", "383aec58bdf09e4549c4df2c984214838c5cb7f6", "be6e5629fcd1cef23c5541bcb3570fde326db4bd", "47ea9018921a5235c952675b02f0508260f6214e", "3e8d1efb560012e6cbfe6015038e905162427e31", "eef0e0820ca3ef8cee957c89373527e8a73dcaaf", "0954212d0d60a1053de84760d96df2f5dea6c208", "435c5036b6f0244b97269896e7dee03ad8cc0eaa", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "12a449686a13b2b1c0ecfe4492a76b4ae0fe36ef", "ce906365ddc9f040ecb9751683e4412bbe5995f5", "2f176f377081306c05e54db5a33270cb5cb1a1e3", "585706dc56e146c8fb42228fc5cbe1de0bb0a69d", "457e62e93d81b1aee73e543f1bc19b5fb4ca1416", "303b122551f37383a43acc1229f6e57dcde20f40", "72e40e4bef8906055fadc245c4773d520334f7f6", "0b61a17906637ece5a9c5e7e3e6de93378209706", "15de8dbafc40461c0ab264e60eee92133d108f22", "766bccd07ea3be7eaed961cc5e99e7c197bdb56a", "75388aabc9f9bfe89efae158554f1aa791990c81", "45414af0f8ea5472660da0d6c65fc3a5a927b974", "06d93235a1dff5d8c6ea85b8187ea78468d48e22", "1af286a37494250d70d7a8bd8cc1b229a572fdcb", "5fde4932e65c904588f197c2c0c72d9693b4c316", "17bd125578eac662eddb01e78c1dc08e4ef51cd3", "60dbcfaccba3a8ae7653d6dbe8e4148fa19609cc", "63366126e36348e1de0713138edf59f371212a9a", "1992efda26b659a9ec1e94d6aa9b2c64e5efd4a1", "6d5a3e0b92a121bbc85558601d4c6704e0f6aa88", "451ead65a7260f50e4fa083f5e6c43182812fa46", "23f9005fac3568c4af0c4beadc97a27ae18583f6", "00265211639e8647e0ce24e543ed9e4111f9f563", "062aed12baea7783302ba4d0cad113bbab878cc5", "829d7ace27e2a95184051bb667b79e203946972b", "0ed048fb273bd4563a1da91656ebb4976ea4fe9e", "d18e91ddfd00b2a04cdbbf800f25b3ce12e1c982", "1e4941a4f16c287248971fd8c96664ff2bf8d9ff", "0ba027d8f0e2ac77a57a02fb950da68795e9cf26" ], "paperAbstract": "This paper describes an extensible language framework, ableC, that allows programmers to import new, domain-specific, independently-developed language features into their programming language, in this case C. Most importantly, this framework ensures that the language extensions will automatically compose to form a working translator that does not terminate abnormally. This is possible due to two modular analyses that extension developers can apply to their language extension to check its composability. Specifically, these ensure that the composed concrete syntax specification is non-ambiguous and the composed attribute grammar specifying the semantics is well-defined. This assurance and the expressiveness of the supported extensions is a distinguishing characteristic of the approach. \n The paper describes a number of techniques for specifying a host language, in this case C at the C11 standard, to make it more amenable to language extension. These include techniques that make additional extensions pass these modular analyses, refactorings of the host language to support a wider range of extensions, and the addition of semantic extension points to support, for example, operator overloading and non-local code transformations.", "pdfUrls": [ "http://www-users.cs.umn.edu/~evw/pubs/kaminski17oopsla/kaminski17oopsla.pdf", "http://doi.acm.org/10.1145/3138224" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20a71681be001a27b35389d028e0edd97007d70f", "sources": [ "DBLP" ], "title": "Reliable and automatic composition of language extensions to C: the ableC extensible language framework", "venue": "PACMPL", "year": 2017 }, "20d8cc3f8a81fddd8168ca696e9182543b33dd43": { "authors": [ { "ids": [ "2753806" ], "name": "Michael G. Gowanlock" }, { "ids": [ "18772848" ], "name": "Cody M. Rude" }, { "ids": [ "33571655" ], "name": "David M. Blair" }, { "ids": [ "10660562" ], "name": "Justin D. Li" }, { "ids": [ "1682906" ], "name": "Victor Pankratius" } ], "doi": "10.1109/IPDPS.2017.17", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.17", "entities": [ "Buffer overflow", "Central processing unit", "Cluster analysis", "Computer cluster", "Graphics processing unit", "Multi-core processor", "Redshift", "Sparse matrix", "Speedup", "Throughput", "Total electron content" ], "id": "20d8cc3f8a81fddd8168ca696e9182543b33dd43", "inCitations": [ "1965b8b0a25956488542510c759b0d6e128d1b90" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "832-841", "journalVolume": "", "outCitations": [ "30267d0fb3bcdc7a39b2cc8e4ccb6383c8b2892a", "8b54c5824b74aba6fddb1729210c3fa8501e42b5", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "dbf36c7a3c5521b93aef699476ba37b3ca15bb61", "271f54ab5239b6a33d3b16bb99f1c16d1e8bb0c0", "92e96f85b5081ddab57923dde750e939faec9847", "cde4efcd58c9b39c8dd7cf0173643851bfedcbb9", "3448ff2614a49c16ad6be8b3e363c57e12762f24", "82fafb4c5dfb61901751f47b049d0c67ce64f803", "a644c02fc217c9fa49b7a91bd6f65def32196a2a", "44d3ffc5979aea32f76f137b6a40424e1437be2a", "13a375a84a6c414b85477a401541d3e28db1e11a", "63eac5f99c547d0ba3660464799d826a879b53fd", "30854a901a39404dbaacb1cb5363ab3c0a2e35e1", "bc36e6a50ca6fe42daf8041e7ec68abcdd8cc4fa", "be212f16400a7db90c14da51fd69600a124492db", "1daefd3a54681a127b54fb0fdba215ce790526f4", "1d3b776507f1c11bbcdcd1f8c0ea8c48df675904", "d94194381f7323c052891f9bbd40d0680ad01269", "0226adea5e4f5f739633a83d159ca989045eefe5", "9c5882ea02390e3ca93d04aeeb4ec440ae17ff50", "31dfabb8d1085ac468b60a83d32af2a558407c95", "560f35ad5e6512b5c26d43c275d3dfd7aabd8ca2", "12d49ecc6aa2bf20a850100cafe061a237a4874e", "0157f142bee7b462897424908cd6c73d84f225cc", "7005f2e50c4c9fcd679b8e6d2ddc5a5ae0c1bf15" ], "paperAbstract": "Large datasets in astronomy and geoscience often require clustering and visualizations of phenomena at different densities and scales in order to generate scientific insight. We examine the problem of maximizing clustering throughput for concurrent dataset clustering in spatial dimensions. We introduce a novel hybrid approach that uses GPUs in conjunction with multicore CPUs for algorithmic throughput optimizations. The key idea is to exploit the fast memory on the GPU for index searches and optimize I/O transfers in such a way that the low-bandwidth host-GPU bottleneck does not have a significant negative performance impact. To achieve this, we derive two distinct GPU kernels that exploit grid-based indexing schemes to improve clustering performance. To obviate limited GPU memory and enable large dataset clustering, our method is complemented by an efficient batching scheme for transfers between the host and GPU accelerator. This scheme is robust with respect to both sparse and dense data distributions and intelligently avoids buffer overflows that would otherwise degrade performance, all while minimizing the number of data transfers between the host and GPU. We evaluate our approaches on ionospheric total electron content datasets as well as intermediate-redshift galaxies from the Sloan Digital Sky Survey. Our hybrid approach yields a speedup of up to 50x over the sequential implementation on one of the experimental scenarios, which is respectable for I/O intensive clustering.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20d8cc3f8a81fddd8168ca696e9182543b33dd43", "sources": [ "DBLP" ], "title": "Clustering Throughput Optimization on the GPU", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "20daa938784970d06a2608819a9dcb5ab3e4c807": { "authors": [ { "ids": [ "2028147" ], "name": "Petra Berenbrink" }, { "ids": [ "1704036" ], "name": "Peter Kling" }, { "ids": [ "3382597" ], "name": "Christopher Liaw" }, { "ids": [ "1685567" ], "name": "Abbas Mehrabian" } ], "doi": "10.1109/IPDPS.2017.52", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.52", "entities": [ "Average-case complexity", "Game theory", "Load balancing (computing)", "Local search (optimization)", "Randomness", "Recursive least squares filter", "Time complexity" ], "id": "20daa938784970d06a2608819a9dcb5ab3e4c807", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "192-201", "journalVolume": "", "outCitations": [ "dd174e56a7fb404369cbc8bccfa0de6328749297", "87bda1eb78b01db931a5f9d80a27c83e133324fd", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "0d17c9e94b32b8cb89b43ba48ff7b3f4f20d92d9", "5266ec918f9ef745272bf5cb3c1c1ce17a8806f5", "234e6be0d4238f76b3ac038ee422be39f391c625", "2717239458fc145da7bcb2fbbda58bcbb7ddb103", "be2ab6d0fa97703171f740440fd778db9472461a", "0cfb537793001c70f40998dfba24b54fdd8498da", "34b6bfce7b72641d296409881da1e3544fa2c4da", "1331d03fbf000e0e62f4d09e5264ad56564eab38", "5b35a69c7940add7019d019faffcb031558f90b0", "111e1652da9c6bd61f976c33473bbc516816b64d", "c3973fb8e3628c8646174e1d0cc2a9b86153e41a", "0567cbdf0aa9fc682d96025d1b14ee8f88b23cb3", "f2ea6704541b3a0ee66fee106a54533cb12e5f2f", "5994423145310fda56133da59aa3f210a5202771", "50e8b6a669cfa2bcaf1cdba9e97abb70a3d3a703" ], "paperAbstract": "We consider the following balls-into-bins process with n bins andmballs: Each ball is equipped with a mutually independent exponential clock of rate 1. Whenever a ball’s clock rings, the ball samples a random bin and moves there if the number of balls in the sampled bin is smaller than in its current bin. This simple process models a typical load balancing problem where users (balls) seek a selfish improvement of their assignment to resources (bins). From a game theoretic perspective, this is a randomized approach to the well-known KPmodel [1], while it is known as Randomized Local Search (RLS) in load balancing literature [2], [3]. Up to now, the best bound on the expected time to reach perfect balance was O((ln n)2+ln(n)⋅n 2/m) due to [3]. We improve this to an asymptotically tight O(ln(n)+n2/m). Our analysis is based on the crucial observation that performing destructive moves (reversals of RLS moves) cannot decrease the balancing time. This allows us to simplify problem instances and to ignore “inconvenient moves” in the analysis.", "pdfUrls": [ "https://arxiv.org/pdf/1706.09997v1.pdf", "https://doi.org/10.1109/IPDPS.2017.52", "http://arxiv.org/abs/1706.09997" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20daa938784970d06a2608819a9dcb5ab3e4c807", "sources": [ "DBLP" ], "title": "Tight Load Balancing Via Randomized Local Search", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "20e411de49c854b28a28d3ce37c5aea17a88436b": { "authors": [ { "ids": [ "2691450" ], "name": "Stefan K. Muller" }, { "ids": [ "1693687" ], "name": "Umut A. Acar" }, { "ids": [ "33486652" ], "name": "Robert Harper" } ], "doi": "10.1145/3062341.3062370", "doiUrl": "https://doi.org/10.1145/3062341.3062370", "entities": [ "APL", "Analysis of algorithms", "Artificial intelligence", "Computation", "Computational science", "Computer", "Cooperative multitasking", "Machine learning", "Multi-core processor", "Operational semantics", "Parallel computing", "Programming language", "Response time (technology)", "Scheduling (computing)", "Thread (computing)", "Threaded code", "Throughput" ], "id": "20e411de49c854b28a28d3ce37c5aea17a88436b", "inCitations": [ "7a70a0c8520557b8ac9982257c78eca127e7e6b1", "16330ecf4c33a49d0cfcb0af947250fc8cd34683" ], "journalName": "", "journalPages": "677-692", "journalVolume": "", "outCitations": [ "2366c1b8f494417e80e4b1af0865ed25aa55d025", "2dad943978ff169904299778a5d76d25bc3c990a", "11a5cdac00df51114a77fafb62c72ac57f52e8c0", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "7d0a0c4068b66bb5ba8925a3520b3131c1079f9e", "01af5dd35e0b335e1b52dab3bc0d32a14c37b0c7", "20500fe3b879ba21490dd29e9bcefea16d07027a", "20e205ac71e14dee1e6a6ea5ea59a442e96d2d91", "153a854241d306d10f81fe11444e2d8643681a0b", "0e19b564e8e047f0bff5bc3c183f53fcebb0af87", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "f9432f01bd0093d751f62a01a3cc3adb177ccfec", "0d8f77b74460f5abcb8e7a885b677a000a2656be", "0d5d0341ba414decaa876f9845a179e55fd1f8f5", "450e2b747e8fe625cd61fa1aea8a8e1de0a8b6a3", "a0d71af1d7b64ad2898179cb2850dc5005085d2c", "d61c8a4c4bc506e66c92b6d060b7fc2c722ca98a", "60dcbce6532b6271b2825de468618617ea4e4c5d", "d1482f9fefcc5da493ee3aa7c3d05e29bfe64991", "5bd8ad07f0b900dc476ca75ab0838082abceb8e3", "b25836d60f8598f823dc245b65f5b8653dad81f6", "52aad68b6a150c5db537ef64c23e07d8abd58cc1", "14c0c9ba3e69846db02a6a3df1ef8e99149aa978", "8354790afedcd610a4d8f11020c082af3b609997", "9bc6f4adc167a0c58c808f6eabcfe86590c7d7ef", "400eff773b126991347692305474bdec6c5fe7e6", "339f4370bd02b977fc0d61fca50cc6a0ea26a24b", "34eb5e5ded51738861b8b844a1dbfddd6881fa46", "33163aca2c83959e8a002c94c9d44a0ad06b1073", "101f10b90ce859135868668478fbde5882c87458", "158ebe313a72857c5534a313f3ec0e413593b732", "9d1beb4d2ca5c07965bb4e309864a9dcbae65fec", "c037edd22215b89c8d2924d4e3c81eb84fdadec7", "1dff33cb24cf30be232d02bc48ebdf200480d2f3", "51dd1408eadbb56b03d78947e706cf0ae2169732", "133c176b649618b1f6bc13ec6783647c87bf9935", "407b01859534c5498f15f2ce9a71d964be156d46", "457051715ed2bf31093b1dbb12d5e86c570b7944", "054da57f9e3aa9d1cfd6014f380fd52378e03a1a", "675724a2a93195682ecd2c9da7d71702e0da3ec3", "302f619bb9f6d5f121f5ed556213375bfe7617b0", "035f6525aee69661768dadf19113069c385d043b", "da10643553d82940302f95e6234f428b13679f7b", "0784356b46a1345b352ab634bda835c07ff04af2", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "74850144350c33e164f5270b85a02a3b48c8aaed", "f73ae2fd526b63d5b04aa02f2b23eb747b768607", "128b4088985fd3773bca191b34b7b0cafea3fd64", "3d1e89c91510bfc5e18fbe92b8ed6a8e0b52b436", "0a7eb03e1efaf1b1db42cf802cbb2fed262f3746", "5069f6267707df50e3578afaa8dfa9c15f3c3b07", "11d49f3b3e7b29c380248da76d5c9fcc141308b2", "8723e38978fe1e48c9c219cd6e9bd88d5cd237a8", "2c73fbadf85ffe168a380ab65ad3f6de98ee09a9", "ce48a652ef299c9c25a1fd4f7f0e8622473d2e92" ], "paperAbstract": "Competitive and cooperative threading are widely used abstractions in computing. In competitive threading, threads are scheduled preemptively with the goal of minimizing response time, usually of interactive applications. In cooperative threading, threads are scheduled non-preemptively with the goal of maximizing throughput or minimizing the completion time, usually in compute-intensive applications, e.g. scientific computing, machine learning and AI. \n Although both of these forms of threading rely on the same abstraction of a thread, they have, to date, remained largely separate forms of computing. Motivated by the recent increase in the mainstream use of multicore computers, we propose a threading model that aims to unify competitive and cooperative threading. To this end, we extend the classic graph-based cost model for cooperative threading to allow for competitive threading, and describe how such a cost model may be used in a programming language by presenting a language and a corresponding cost semantics. Finally, we show that the cost model and the semantics are realizable by presenting an operational semantics for the language that specifies the behavior of an implementation, as well as an implementation and a small empirical evaluation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062370", "http://reports-archive.adm.cs.cmu.edu/anon/2017/CMU-CS-17-107.pdf", "http://www.cs.cmu.edu/~rwh/papers/resppar/pldi.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20e411de49c854b28a28d3ce37c5aea17a88436b", "sources": [ "DBLP" ], "title": "Responsive parallel computation: bridging competitive and cooperative threading", "venue": "PLDI", "year": 2017 }, "20f1081cf001f716037e20d9cff147f5ac50632a": { "authors": [ { "ids": [ "3491192" ], "name": "Seunghee Shin" }, { "ids": [ "27081950" ], "name": "Satish Kumar Tirukkovalluri" }, { "ids": [ "1694458" ], "name": "James Tuck" }, { "ids": [ "1717365" ], "name": "Yan Solihin" } ], "doi": "10.1145/3123939.3124539", "doiUrl": "https://doi.org/10.1145/3123939.3124539", "entities": [ "3D XPoint", "Allocate-on-flush", "Atom", "Atom", "Computer architecture simulator", "Experiment", "Limiter", "Memory controller", "Memristor", "Non-volatile memory", "Phase-change memory", "Program optimization", "Proteus", "Requirement", "Simulation", "Volatile memory" ], "id": "20f1081cf001f716037e20d9cff147f5ac50632a", "inCitations": [ "41ea95cc4dca373bf324555b897760054ec4a76e" ], "journalName": "", "journalPages": "178-190", "journalVolume": "", "outCitations": [ "15c80ec5104e98d6f84b5ed348ba0276c0739862", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "3057d43346280804b1636de9a9c0f950d5cf12c1", "56ad278ca41d14386d558f259f6a8b98ae6e86d1", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "9858251a88afc29fa9fdb8234d998dcdf182f144", "fd840d5275cac98d64e7778a1b9173b937a77386", "3af216f371069b57c0dca5448384d052fb490fb4", "314919c141024c71cb17d525ecd8016138335002", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "71cbd5b7858785e8946523ca59c051eb0f1347ba", "47b851237f240831abee3971bca6bb8d2a121eb1", "1be67e00b82ccb9dca746dc1c4758932a5a5ff5e", "82155a73552fe1daf752bda7567dce96a14219a8", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "94783d113951822195d4ba44599a8fcbdef9d4bf", "d26e3b4771a43822cfed79d44e3da7003e3e94db", "277862a906af8489a1d98add2f6516a0e5df1bb1", "40e2b6829eb5b4068e0918d15521020467530237", "57c823b3b07b98233394bf15cfbbaed6a84809df", "d76913152aeff892dbb028785f98ee8c84bfd8e3", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "b56681b12900336b202a6ed45719d71d5d844a25", "5bc06f8a33370f46f52f1d0282e5f91057a7192b", "03b6a916498fa8591201a2de5f22344609b1e457", "05bd926844ffa89f668237a6836825c59d6377e9", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "578667cbc39c6bfc1c89fe6a54506643c3b097f8", "3ede1909bf70d6e4bca46302f474083517b081a3", "512a8925693d5f4b8e4cfde32bcd3c846a14b71e", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "1f482f44497c17be0573d9dff14a30d87b0bf0ca", "0204f40221260d00c5ee63646560a40dcd7d97d1", "4f6fbe4484487e3983f673ff55bdec92f947311c", "db97d135ddb8edec2dae6c10a830ac6e44045d94", "16653666b0005f91060a3e402566659749b84313", "05a1357946de5eca42a477b7b268db4944219a2e", "3da14037fc6e2c3dee2d6808bc2d7e933325d054", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "42c70d64890726f60556caf3eec3f06e85642dd9" ], "paperAbstract": "Emerging non-volatile memory (NVM) technologies, such as phase-change memory, spin-transfer torque magnetic memory, memristor, and 3D Xpoint, are encouraging the development of new architectures that support the challenges of persistent programming. An important remaining challenge is dealing with the high logging overheads introduced by durable transactions.\n In this paper, we propose a new logging approach, Proteus for durable transactions that achieves the favorable characteristics of both prior software and hardware approaches. Like software, it has no hardware constraint limiting the number of transactions or logs available to it, and like hardware, it has very low overhead. Our approach introduces two new instructions: log-load creates a log entry by loading the original data, and log-flush writes the log entry into the log. We add hardware support, primarily within the core, to manage the execution of these instructions and critical ordering requirements between logging operations and updates to data. We also propose a novel optimization at the memory controller that is enabled by a persistent write pending queue in the memory controller. We drop log updates that have not yet written back to NVMM by the time a transaction is considered durable.\n We implemented our design on a cycle accurate simulator, MarssX86, and compared it against state-of-the-art hardware logging, ATOM [19], and a software only approach. Our experiments show that Proteus improves performance by 1.44--1.47× depending on configuration, on average, compared to a system without hardware logging and 9--11% faster than ATOM. A significant advantage of our approach is dropping writes to the log when they are not needed. On average, ATOM makes 3.4× more writes to memory than our design.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124539", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final93.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20f1081cf001f716037e20d9cff147f5ac50632a", "sources": [ "DBLP" ], "title": "Proteus: a flexible and fast software supported hardware logging approach for NVM", "venue": "MICRO", "year": 2017 }, "20fc7ec7834a055843ccb087c77656574a09bfb5": { "authors": [ { "ids": [ "1799329" ], "name": "Jee Ho Ryoo" }, { "ids": [ "2543676" ], "name": "Mitesh R. Meswani" }, { "ids": [ "2130920" ], "name": "Reena Panda" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1145/2967938.2974060", "doiUrl": "https://doi.org/10.1145/2967938.2974060", "entities": [ "Address space", "Dynamic random-access memory", "FM broadcast band", "Flat memory model", "Hot swapping", "Interleaved memory", "Load balancing (computing)", "Locality of reference", "Lock (computer science)", "Paging", "Placement syntax", "Principle of locality" ], "id": "20fc7ec7834a055843ccb087c77656574a09bfb5", "inCitations": [ "92229ef2d0bfdcba2fdf2bf265ae6d37d0b34e9f", "24c0c34675eb35e300244c6ff682155a34a2e3d5" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "349-360", "journalVolume": "", "outCitations": [ "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "745d50eb6b74b191191ce93c6ef1ec9760ce0cb0", "b0cd27efc4c73578e7fbabebfca173e00ac73574", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "5bf1fdb6dc950537962eafe888259272eed67737", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "054be29f5016aa668fce1a3eee1be40a2c001f46", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "90851f7e712bc8a2a201c0609fdf53520779d1f8", "1154b2fd6fb913b02eb6f64f5287a6b75a506e64", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "1c32ad0a42109fab826eb3054df7cfc33b424125", "0dc38d3afb68f617e23eced7ce2994a0a82feb11", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "417ab9b8b003982222017ef585e19680366609f3", "3b621e9a6b99f32caa518116cb400035d1deed29" ], "paperAbstract": "In this paper, we present a flat address space organization called SILC-FM that allows subblocks from two pages to co-exist in an interleaved fashion in die-stacked DRAM. Data movement at subblocked granularity consumes less bandwidth compared to migrating the entire large block and prevents fetching useless subblocks that may never get accessed. SILC-FM can get more spatial locality hits than CAMEO and PoM due to page-level operation and interleaving blocks respectively. The interleaved subblock placement improves performance by 55% on average over a static placement scheme without data migration. We also selectively lock hot blocks to prevent them from being involved in the hardware swapping operations. Additional features such as locking, associativity and bandwidth balancing improve performance by 11%, 8%, and 8% respectively, resulting in a total of 82% performance improvement over no migration static placement scheme. Compared to the best state-of-the-art scheme, SILC-FM gets performance improvement of 36%.", "pdfUrls": [ "http://ieeexplore.ieee.org/document/7756782/", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/20fc7ec7834a055843ccb087c77656574a09bfb5", "sources": [ "DBLP" ], "title": "SILC-FM: Subblocked InterLeaved Cache-Like Flat Memory Organization", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2016 }, "21158c013e6c723ed1e9b9033a49cbda7b8b7dd9": { "authors": [ { "ids": [ "3114123" ], "name": "Gordon V. Cormack" }, { "ids": [ "2637161" ], "name": "Maura R. Grossman" } ], "doi": "10.1145/3077136.3080812", "doiUrl": "https://doi.org/10.1145/3077136.3080812", "entities": [ "Email", "Ground truth", "Relevance", "Relevance feedback", "Simulation" ], "id": "21158c013e6c723ed1e9b9033a49cbda7b8b7dd9", "inCitations": [ "647b16e4e1c5015e4703edb9b0de638233b3e36d", "8cbd4c1087c1a3582df8191d87081dd060fd1283", "0bf8b8cebedd7d14f95183106d6435adc405755c", "b4fafe513114e292a16eea3f5b34776cb4c92101" ], "journalName": "", "journalPages": "5-14", "journalVolume": "", "outCitations": [ "3880381d915ecce470bbfa11e2a3f8714e0e3e1d", "b4932dcb86b96d91cf5eaff44550e3d08a9f46ef", "f916329efbbf21466af8d9ed74d7aacd32da4dfc", "38612e346fdf3158c32c16058f7e8820a8f0325e", "162c68e07814704109122d61771c1ce067e95b86", "17963c800077aedd3802b3e97d45c286ba953ba4", "9dddbe4640d0707793480aac156cac9a70a1bb1d", "1f5a201026d7af07fdf3335548e0515ff8e62ab1", "e09cb99bcc5cd9f1cac35f4f470d4ab057a10cac", "d857f3c10634e1aaf2c59d7e506527f33d519f07", "11d6e220405bb7aca16b23ae80fabc896fd3e5ae", "bee7f0e0b4f0af816e14bcc56171913863584e18", "5cb0cd0486518c5474e25b800ee210016f98b1e9", "9176475ed7913575d7a28a661c80df39a88afff4", "4825fd2a83eda08abdcf6eb49c53fa9b14f23b1a", "8fa336307bd4a4f80337469e5826c0b04161a125", "6e7457ca29cc5b7e48065c3a718a09ab64873b3d", "053256ace3e4fc6108a887f8525a565fabe629e0", "039ca56524ad5a368e91bade6b56108da622a1d4", "3bcdf298e528a514d044ed959498b34a0c81b354", "59c9da928fbf10da0c46eef255f14aba2a8ed9a9", "3b460e5b77e7822ca584e8d9042ba72e7a2d7425", "b2e45c74c0eec205eb3d8977fd1390667601c0a7", "126240dedd75626fd736f0485d06f1f516517e54", "e49d662652885e9b71622713838c840cca9d33ed" ], "paperAbstract": "Technology-assisted review (\"TAR\") systems seek to achieve \"total recall\"; that is, to approach, as nearly as possible, the ideal of 100% recall and 100% precision, while minimizing human review effort. The literature reports that TAR methods using relevance feedback can achieve considerably greater than the 65% recall and 65% precision reported by Voorhees as the \"practical upper bound on retrieval performance... since that is the level at which humans agree with one another\" (Variations in Relevance Judgments and the Measurement of Retrieval Effectiveness, 2000). This work argues that in order to build - as well as to, evaluate - TAR systems that approach 100% recall and 100% precision, it is necessary to model human assessment, not as absolute ground truth, but as an indirect indicator of the amorphous property known as \"relevance.\" The choice of model impacts both the evaluation of system effectiveness, as well as the simulation of relevance feedback. Models are presented that better fit available data than the infallible ground-truth model. These models suggest ways to improve TAR-system effectiveness so that hybrid human-computer systems can improve on both the accuracy and efficiency of human review alone. This hypothesis is tested by simulating TAR using two datasets: the TREC 4 AdHoc collection, and a dataset consisting of 401,960 email messages that were manually reviewed and classified by a single individual, Roger, in his official capacity as Senior State Records Archivist. The results using the TREC 4 data show that TAR achieves higher recall and higher precision than the assessments by either of two independent NIST assessors, and blind adjudication of the email dataset, conducted by Roger, more than two years after his original review, shows that he could have achieved the same recall and better precision, while reviewing substantially fewer than 401,960 emails, had he employed TAR in place of exhaustive manual review.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080812" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21158c013e6c723ed1e9b9033a49cbda7b8b7dd9", "sources": [ "DBLP" ], "title": "Navigating Imprecision in Relevance Assessments on the Road to Total Recall: Roger and Me", "venue": "SIGIR", "year": 2017 }, "21164c79302a3182064ca3cbedb248c3ebd463e0": { "authors": [ { "ids": [ "3445724" ], "name": "Jerry Ajay" }, { "ids": [ "37712692" ], "name": "Chen Song" }, { "ids": [ "32775344" ], "name": "Aditya Singh Rathore" }, { "ids": [ "1749211" ], "name": "Chi Zhou" }, { "ids": [ "2164973" ], "name": "Wenyao Xu" } ], "doi": "10.1145/3037697.3037752", "doiUrl": "https://doi.org/10.1145/3037697.3037752", "entities": [ "3D computer graphics", "3D printing", "Compiler", "Firmware", "Mathematical optimization", "Printer (computing)", "Printing", "Program optimization", "Semiconductor device fabrication" ], "id": "21164c79302a3182064ca3cbedb248c3ebd463e0", "inCitations": [ "a3132e4f33bdb1ada1ead58b2865e0dd7fdffb21" ], "journalName": "", "journalPages": "419-433", "journalVolume": "", "outCitations": [ "3212c1755c3b5416da0c6d08243b3f254953285e", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "9ba533adf4776c0a708d2f5a2431ce2ab35bf915", "e0de0e97bed6fcafb4efeaef6649174973254351", "69524407c10868fa8c547e963a272910304f03a6", "8ea88a68ddcc75a2ff8aef8ae002c3b9807c355c", "34cdcf6af7feb25dacf3ebf1067f953f9cc0e704", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "0af7b9623e35555710f3a30177c5b0c61e4e30af", "086699da0528ed47463cea3108851bd3dc5ba715", "5d1614bc44b4665e72f301727f549f276b12fef8", "17f5bbe172a783b79026ae43d2635c6817a5ba8b", "3d5d16284305a5afcf5ee4c7ec4d4af3122d9da5", "7791f7d594dc8ef36ba6b748638ff29428a7e670", "17c541cbc0409579b1f400519a3d5ad079a21d8d", "a9b9a7c47e715fd96c77ab325b7a4ad3e734bce9", "2857fd5657b58701dc6545ae9a1871999b9fdf30", "b37414a58f22efc3dbc5b36ecd7524d2947f13d0", "1b7d4cd1b688ef4561295e81cedb2dc9402c7679", "b6b9ee1bfecc15143556fdf1933462e37c2ac0dc", "15e07d192d360652e68e38b4ba267e160f972390", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "139448e4bcf8e4a4c2563e2efc97af36e1753ee8", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "097f3272d63e506f8940c6716dd6699bddf548e2", "ec8c64a984eeae89e6276ba0c9cca54e07d03182", "ac7bb3fc9d47f1f8f836c7f7a69e8ca4f26169ca", "066aa52ed6a8d679a581e59773ea92fff315b27f", "48b18c093cdb9a46887c6f94b8bd369ed0465564", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "70b35f39b9c69e2366d722339fe8e1e4beef0878", "2cac6e84d3d7fed13ec9a5d39fd2bd6e75423578" ], "paperAbstract": "As the next-generation manufacturing driven force, 3D printing technology is having a transformative effect on various industrial domains and has been widely applied in a broad spectrum of applications. It also progresses towards other versatile fields with portable battery-powered 3D printers working on a limited energy budget. While reducing manufacturing energy is an essential challenge in industrial sustainability and national economics, this growing trend motivates us to explore the energy consumption of the 3D printer for the purpose of energy efficiency. To this end, we perform an in-depth analysis of energy consumption in commercial, off-the-shelf 3D printers from an instruction-level perspective. We build an instruction-level energy model and an energy profiler to analyze the energy cost during the fabrication process. From the insights obtained by the energy profiler, we propose and implement a cross-layer energy optimization solution, called 3DGates, which spans the instruction-set, the compiler and the firmware. We evaluate 3DGates over 338 benchmarks on a 3D printer and achieve an overall energy reduction of 25%.", "pdfUrls": [ "https://jerryajay.com/wp-content/uploads/2017/06/p419-ajay-3.pdf", "http://doi.acm.org/10.1145/3037697.3037752", "http://www.cse.buffalo.edu/~wenyaoxu/papers/conference/xu-asplos2017.pdf", "https://www.cse.buffalo.edu//~wenyaoxu/papers/conference/xu-asplos2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21164c79302a3182064ca3cbedb248c3ebd463e0", "sources": [ "DBLP" ], "title": "3DGates: An Instruction-Level Energy Analysis and Optimization of 3D Printers", "venue": "ASPLOS", "year": 2017 }, "214f4f4f555b608e59314168b08ed9daa4087200": { "authors": [ { "ids": [ "2490811" ], "name": "Yingjin Qian" }, { "ids": [ "6916241" ], "name": "Xi Li" }, { "ids": [ "28295813" ], "name": "Shuichi Ihara" }, { "ids": [ "3348637" ], "name": "Lingfang Zeng" }, { "ids": [ "39664211" ], "name": "J\u00fcrgen Kaiser" }, { "ids": [ "1783053" ], "name": "Tim S\u00fc\u00df" }, { "ids": [ "1726087" ], "name": "Andr\u00e9 Brinkmann" } ], "doi": "10.1145/3126908.3126932", "doiUrl": "https://doi.org/10.1145/3126908.3126932", "entities": [ "Algorithm", "Best-effort delivery", "Classful network", "Denial-of-service attack", "Goal-oriented Requirements Language", "Lustre", "Lustre (programming language)", "Object storage", "Quality of service", "Rate limiting", "Remote procedure call", "Scheduling (computing)", "Token bucket" ], "id": "214f4f4f555b608e59314168b08ed9daa4087200", "inCitations": [], "journalName": "", "journalPages": "6:1-6:12", "journalVolume": "", "outCitations": [ "26b99a8bceca2611c376978e40e9ee6e33381b59", "78ccfd7e44dfc4e5f97f7b5047799ef8134a9d66", "b9691c81824568fc61dfb0af56b366fc7a579847", "dbe73781be3fcba36bb85b491789a53003e3292f", "42f16de22db4c6be67d35507bad88322620a7dff", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "3572e2e04e187fd9f9b9fa051fd0651931a95b0e", "7f0d159f3a4ce88f524669e08e19c0bbec4f261d", "16d33d151da2a7b3160864a22f0324a5080301a1", "0abe5211e209b272890ba6820a33b72e938b0b3b", "1fa4666006fd54280c180012251004e9fdf95a2f", "35501b12a19824cf2f4cf48eb65ceb1445b28c0e", "0be9b098bf8b72e84b684aa8fc98672bef32513e", "8fc8391ebed818f54f1f17dfe0b8dad57db24446", "1ddd410257b00370c1fe58377f02f608ff16c3bf", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "29fa5399ec245d2b2d99b591ff37dd477e36cc0a", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "65a2cb8a02795015b398856327bdccc36214cdc6", "0ccb855087109fb09d2c2e2445fe1df53c91085e", "9093b918c0fa66c9f34f69a6c422f5ff7f340df8", "31fcd061632d15567dbcbf4f9c5f7b781141a88a", "1cace06116f34820aa0fdd6f0681f83c7be006b0", "4ac4d917b384d9c0f26cfbe66ce31d4ab03bb1d0" ], "paperAbstract": "HPC file systems today work in a best-effort manner where individual applications can flood the file system with requests, effectively leading to a denial of service for all other tasks. This paper presents a classful Token Bucket Filter (TBF) policy for the Lustre file system. The TBF enforces Remote Procedure Call (RPC) rate limitations based on (potentially complex) Quality of Service (QoS) rules. The QoS rules are enforced in Lustre's Object Storage Servers, where each request is assigned to an automatically created QoS class.\n The proposed QoS implementation for Lustre enables various features for each class including the support for high-priority and real-time requests even under heavy load and the utilization of spare bandwidth by less important tasks under light load. The framework also enables dependent rules to change a job's RPC rate even at very small timescales. Furthermore, we propose a Global Rate Limiting (GRL) algorithm to enforce system-wide RPC rate limitations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126932" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/214f4f4f555b608e59314168b08ed9daa4087200", "sources": [ "DBLP" ], "title": "A configurable rule based classful token bucket filter network request scheduler for the lustre file system", "venue": "SC", "year": 2017 }, "21501c56fbd11f0a5e3347feabbbe217d03fcdfe": { "authors": [ { "ids": [ "1741866" ], "name": "Hongliang Li" }, { "ids": [ "36416867" ], "name": "Jie Wu" }, { "ids": [ "1764407" ], "name": "Zhen Jiang" }, { "ids": [ "1737850" ], "name": "Xiang Li" }, { "ids": [ "2094025" ], "name": "Xiaohui Wei" } ], "doi": "10.1109/CLUSTER.2017.10", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.10", "entities": [ "Algorithm", "Backup", "Computational complexity theory", "Correctness (computer science)", "Experiment", "Failure rate", "Fault tolerance", "Heuristic", "Heuristic (computer science)", "Real-time computing", "Stream processing", "Streaming media" ], "id": "21501c56fbd11f0a5e3347feabbbe217d03fcdfe", "inCitations": [ "585aa0d32c0c5a510e04a33039b7af9a85dab93a" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "379-383", "journalVolume": "", "outCitations": [ "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "63115442310908b876aa1e81d877813ebee8b247", "962d0f79f2a3adabef266375685e551844156130", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "5916526f6aed7da0ec6812729cf468cda5b9d49b", "036e006a9f2049d15c1533ac254dcfce2483a1f6", "e0b3d5095ca65792b0ae77417c66578c0253d1aa", "fbe47f7d7e8df21cbe39c1f65d25165195ecba54", "2495ac46de086e8b217e87400ab4b2e637d81dcf", "9d46900406ba1bfee140ce048350504ffb1fe7e5", "20eb6a33ebc85a551510447b73928148cec1dbeb", "478fbef8568a021c3d91c13128efa19ad719dd88", "0cded775165fd38f333e5b80ee233ea8d4405139", "e4fd518cd67e03ef263eb0ad6876c3578cd5bbf8", "bad84100cd1bffe83bd33212a79d5cbb7f4ffb12" ], "paperAbstract": "Stream processing applications continuously process large amounts of online streaming data in real-time or near real-time. They have strict latency constraints, but they are also vulnerable to failures. Failure recoveries may slow down the entire processing pipeline and break latency constraints. Upstream backup is one of the most widely applied fault-tolerant schemes for stream processing systems. It introduces complex backup dependencies to tasks, and increases the difficulty of controlling recovery latencies. Moreover, when dependent tasks are located on the same processor, they fail at the same time in processor-level failures, bringing extra recovery latencies that increase the impacts of failures. This paper presents a correlated failure effect model to describe the recovery latency of a stream topology in processor-level failures for an allocation plan. We introduce a Recovery-latency-aware Task Allocation Problem (RTAP) that seeks task allocation plans for stream topologies that will achieve guaranteed recovery latencies. We present a heuristic algorithm with a computational complexity of O(nlog^2n) to solve the problem. Extensive experiments were conducted to verify the correctness and effectiveness of our approach.", "pdfUrls": [ "https://cis.temple.edu/~jiewu/research/publications/Publication_files/Li_IPCCC_2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21501c56fbd11f0a5e3347feabbbe217d03fcdfe", "sources": [ "DBLP" ], "title": "Task Allocation for Stream Processing with Recovery Latency Guarantee", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "215b4c25ad34557644b1a177bd5aeac8b2e66bc6": { "authors": [ { "ids": [ "34739391" ], "name": "Paul Grubbs" }, { "ids": [ "1707461" ], "name": "Thomas Ristenpart" }, { "ids": [ "1723945" ], "name": "Vitaly Shmatikov" } ], "doi": "10.1145/3102980.3103007", "doiUrl": "https://doi.org/10.1145/3102980.3103007", "entities": [ "Database", "Encryption", "Provable security", "Snapshot (computer storage)", "Threat (computer)" ], "id": "215b4c25ad34557644b1a177bd5aeac8b2e66bc6", "inCitations": [ "7511fc23b094d20f3d4b1fd4883c1a5a7140db69", "ed84133ca8ef37a273d4b187202f55c6618b953e", "0a811fe6d15705573766947d135af4c416571fa7", "21a2ad337d1bbf53652959ff06e6de7a8072f2bc", "f39796b6656cac1e9ddf9e4758dec9d6a8aab8d1", "46e837585af419dc79a949fcb1cfa46a8621f9ff" ], "journalName": "", "journalPages": "162-168", "journalVolume": "", "outCitations": [ "0a7f6563c30dc276c4dec2c278dec086a91bee33", "0ad160b7f96cf0306e481b6055e1c260f33596e4", "484e521bcac4953b4d0ab982a7ab28e514c146e4", "4aab6d38e0d88d20114a658ca8688905dde98416", "c413a3f4bc3e02b2df471c017b44beb1ac91a6c2", "1402a2bf579db44ace810bbe2ea7287e35d90f8f", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "13868fa5a86ebde021a1c91415fb9bb718c4a804", "4af77753e00973f339fd93a27e4131047018e79c", "2ec1010b9270e55e722bbf540f81a7d310365f6a", "49e72b668dcde9fe57a8ed60e6890a5622733f19", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "86682116997dc1b2f8da25f4315d6f3f4d458067", "b60656fea43ed3389594c167f1837afed862bb62", "00562cead3e4d35f03edd286b40581e8781bf339", "9611f849a05e5880ef90725e19eaa40d1805317d", "7cc6a150b1ba5b40c8e5aca6e94c817d5bebbc5c", "225c357ee5490febc4fe9ca002fbf08b29adec46", "14dc5effd28d22cf7fc8aa6a1be8ae2d37859891", "8a29510d57bed4b33bde2ea7b6beb8c8a1950b92", "1939f908a8b47e16617bbba22d08e97ad3eadba8", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "1360a9e2fcd1504effe81f54bbd20ab5b5a07685", "90569b27c21f400cd818a58005fecd9f2033048a", "47564fdfc63a1a36102b8b6c74f978bbc5190c5a", "6a1df9dae902f3d377f9c85ba9732b8d2270bf2b", "02beed2e1350a0d0b01bb9622081cb93a965a716", "a10f35a6f3888977bdc1e360b01d17b5252bb67d", "1b2457906994b5942b0ecc6e0ca38e2e3b2450c7", "0227e83202440c13c4c2b97b49ef7c64dfbd52c3", "35dde872db190b3bc990ba94eed2a7f9d95c0126", "961487973d4b33f96406fddbfcf1235dc587571f", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "683c8f5c60916751bb23f159c86c1f2d4170e43f", "2420f3bd82b9b9a4fc99fa1e3b79b4cb6d6c3fef", "3cbf6df60d91d4f2422827c46ec4f85fb45bbeb7" ], "paperAbstract": "Encrypted databases, a popular approach to protecting data from compromised database management systems (DBMS's), use abstract threat models that capture neither realistic databases, nor realistic attack scenarios. In particular, the \"snapshot attacker\" model used to support the security claims for many encrypted databases does not reflect the information about past queries available in any snapshot attack on an actual DBMS.\n We demonstrate how this gap between theory and reality causes encrypted databases to fail to achieve their \"provable security\" guarantees.", "pdfUrls": [ "https://eprint.iacr.org/2017/468.pdf", "http://www.cs.cornell.edu/~shmat/shmat_hotos17.pdf", "http://doi.acm.org/10.1145/3102980.3103007", "http://eprint.iacr.org/2017/468" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/215b4c25ad34557644b1a177bd5aeac8b2e66bc6", "sources": [ "DBLP" ], "title": "Why Your Encrypted Database Is Not Secure", "venue": "HotOS", "year": 2017 }, "21b2da379bffca3c051351e644c63bfb4d9859ce": { "authors": [ { "ids": [ "1729559" ], "name": "Mohammad A. Islam" }, { "ids": [ "1691652" ], "name": "Shaolei Ren" }, { "ids": [ "1706859" ], "name": "Adam Wierman" } ], "doi": "10.1145/3133956.3133994", "doiUrl": "https://doi.org/10.1145/3133956.3133994", "entities": [ "Channel (communications)", "Data center", "Kalman filter", "Maximum power transfer theorem", "Multitenancy", "Operator overloading", "Overselling", "Reflow soldering", "Side-channel attack" ], "id": "21b2da379bffca3c051351e644c63bfb4d9859ce", "inCitations": [ "39fe9262cfea3c16011b14fcd11062649e7956cc" ], "journalName": "", "journalPages": "1079-1094", "journalVolume": "", "outCitations": [ "26bc20f3fcce5484be6c3b2a66e58567b7174478", "4c059a8900d24058c9cb27b85df96cc430a79970", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "9ba533adf4776c0a708d2f5a2431ce2ab35bf915", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "6956d9efde743bf7774e2089dbb75f8d3df8a5ce", "5a1bd2fe1724f5de0d6ab14959a5dd8600723d97", "4aee61ec4c87e48d9ddeb692fe515014ee265d68", "8aa09720221bdeef43e150fc7f6896f71600fb86", "0cf2cecac404e36220c6d257b31022c7b8475933", "ff661571ad1ba89537617d6876c5e876764471e7", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "1f076c900897fd89d2c3c7e506c2f5386d5dc42a", "07c8dc1238106ed94d5357b72e4bfebd256f162f", "f17317e0049aeb2a88d1cbd0f9aae4f91a1366ff", "de7661a8df52b761d6f1cb73bcb4ad777939bfa7", "44dd35e0d8a8ca29400b2bf4a19a1c33c22ba425", "54754cbd5011c059af8358b162ffd9ffbcb51f39", "520d87e06ac5ed55ff5ca2d37430083de90606d2", "ce0b5fbe51893d025a09ed9f8d6057f5ff838076", "0dae1dc977b9943ed1216cf86853df3be2510b80", "08632fe2b934ed15d3499e7321282c81adc2c390", "8d48628223451c494346d0a2c9a0b47eb379a546", "80edb3beef6cc2528ec16ec3d84b6a78cb458dec", "6b8969b865ccfcbf52309e66e8bd084ab71e0d8e", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "565ed53f4a40a98b18a389a3790a7fe62a525f58", "8da67302117617938b35d1c03526eb69c96541ce", "371d64572d5e2f6af298a42d84aca5807cd19946", "19e841c3fad9ba176c5e3cffdd40107b525951d6", "8ea2b1904ffca328d982539c3eafd8113325c23a" ], "paperAbstract": "The power capacity of multi-tenant data centers is typically oversubscribed in order to increase the utilization of expensive power infrastructure. This practice can create dangerous situations and compromise data center availability if the designed power capacity is exceeded. This paper demonstrates that current safeguards are vulnerable to well-timed power attacks launched by malicious tenants (i.e., attackers). Further, we demonstrate that there is a physical side channel --- a thermal side channel due to hot air recirculation --- that contains information about the benign tenants' runtime power usage and can enable a malicious tenant to time power attacks effectively. In particular, we design a state-augmented Kalman filter to extract this information from the side channel and guide an attacker to use its maximum power at moments that coincide with the benign tenants' high power demand, thus overloading the shared power capacity. Our experimental results show that an attacker can capture 54% of all attack opportunities, significantly compromising the data center availability. Finally, we discuss a set of possible defense strategies to safeguard the data center infrastructure against power attacks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133994", "http://www.ece.ucr.edu/~sren/doc/slides/slides_ccs_2017.pdf", "https://acmccs.github.io/papers/p1079-islamA.pdf", "http://www.ece.ucr.edu/~sren/doc/paper/ccs_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21b2da379bffca3c051351e644c63bfb4d9859ce", "sources": [ "DBLP" ], "title": "Exploiting a Thermal Side Channel for Power Attacks in Multi-Tenant Data Centers", "venue": "CCS", "year": 2017 }, "21be843f22e313bac6d1dc19fc53535f9b413033": { "authors": [ { "ids": [ "1814141" ], "name": "Akihiro Tabuchi" }, { "ids": [ "2814225" ], "name": "Masahiro Nakao" }, { "ids": [ "2575510" ], "name": "Hitoshi Murai" }, { "ids": [ "2616076" ], "name": "Taisuke Boku" }, { "ids": [ "1744801" ], "name": "Mitsuhisa Sato" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "Assignment (computer science)", "Benchmark (computing)", "Cg (programming language)", "Compiler", "Distributed memory", "Fortran", "Graphics processing unit", "Message Passing Interface", "Model\u2013view\u2013controller", "NAS Parallel Benchmarks", "Non-blocking algorithm", "OpenACC", "Parallel computing", "Partitioned global address space", "Pin grid array", "Programmer", "Programming model", "View model", "Viewpoint", "Xeon Phi" ], "id": "21be843f22e313bac6d1dc19fc53535f9b413033", "inCitations": [ "33595f1ada823f49766b981b546214f60ad14b4f", "7c0c8026ea9a9887d37834284bbd35bd7cd3e606" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "625-634", "journalVolume": "", "outCitations": [ "49c866296354ab54c42d234645cf0700ff4a7315", "6cae6a77516f387a9e1fb7c886611b50372ba4a8", "da78156c507c5e6f100ba4b3c6dc5ca977e2bea0", "8263c17277c2238c18c98f72ee980247b9b68095", "0a73bb2ec3b4f828c7b3f1e57d0336420980bbaa", "5e6ffbc007952535deb2b6b0d1ee30733c6bf70d", "38c48d4a31ab050c6e750cdae21e00421172f694", "2217c00471b61eb98a99674bc10cb71df3b46406", "5048b1199db383beda869e742691c22ca15e1d56", "1cbe65e0a338601c108c696d754b4e146c004b8a", "50581f45db850ecef936b95174b134916b612d99", "3bda9a412d20fdd74ef96a845adfa800d2aa7d9a", "5d25b4a77268437aa669e272cc81b56ed184e0b6" ], "paperAbstract": "Clusters equipped with accelerators such as graphics processing unit (GPU) and Many Integrated Core (MIC) are widely used. For such clusters, programmers write programs for their applications by combining MPI with one of the available accelerator programming models. In particular, OpenACC enables programmers to develop their applications easily, but with lower productivity owing to complex MPI programming. XcalableACC (XACC) is a new programming model, which is an "orthogonal" integration of a partitioned global address space (PGAS) language XcalableMP (XMP) and OpenACC. While XMP enables distributed-memory programming on both global-view and local-view models, OpenACC allows operations to be offloaded to a set of accelerators. In the local-view model, programmers can describe communication with the coarray features adopted from Fortran 2008, and we extend them to communication between accelerators. We have designed and implemented an XACC compiler for NVIDIA GPU and evaluated its performance and productivity by using two benchmarks, Himeno benchmark and NAS Parallel Benchmarks CG (NPB-CG). The performance of the XACC version with the Himeno benchmark and NPB-CG are over 85% and 97% in the local-view model against the MPI+OpenACC version, respectively. Moreover, using non-blocking communication makes the performance of local-view version over 89% with the Himeno benchmark. From the viewpoint of productivity, the local-view model provides an intuitive form of array assignment statement for communication.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101198" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21be843f22e313bac6d1dc19fc53535f9b413033", "sources": [ "DBLP" ], "title": "Implementation and Evaluation of One-Sided PGAS Communication in XcalableACC for Accelerated Clusters", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "21c24920914a8d781ffb43d08ba8f0d916968007": { "authors": [ { "ids": [ "3459901" ], "name": "Aaron Harlap" }, { "ids": [ "40044088" ], "name": "Alexey Tumanov" }, { "ids": [ "33984867" ], "name": "Andrew Chung" }, { "ids": [ "1707164" ], "name": "Gregory R. Ganger" }, { "ids": [ "1974678" ], "name": "Phillip B. Gibbons" } ], "doi": "10.1145/3064176.3064182", "doiUrl": "https://doi.org/10.1145/3064176.3064182", "entities": [ "Agile software development", "Backup", "Elasticity (cloud computing)", "Grid computing", "Machine learning", "Proteus", "Server (computing)" ], "id": "21c24920914a8d781ffb43d08ba8f0d916968007", "inCitations": [ "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "13e388ab3495d313ae6838b26e8d34517a67e698", "537efae13f33ad932034b8ad1db72a83d3691473", "4ce8ad1513e84cb464efa68827119295530ebaa4", "21d2fe357a178d36a50398b05e0046b7b500b109", "d3a9745c6359de94008abb4db42b75cf9d30cc3e" ], "journalName": "", "journalPages": "589-604", "journalVolume": "", "outCitations": [ "3a043714354fe498752b45e4cf429dbae0fb2558", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "1637ac4fed83b8309df2de07fbeb8b2511bb1170", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "0f9215aaf5a8376461ff3ec504a53172ce827647", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "3c029e72f5c75c8dd87a6acd43d05f23407e39cf", "0ad8e89091eed09217e66adc98136126addc2619", "0c4867f11c9758014d591381d8b397a1d38b04a7", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "2b3113b7fda6414548e88fc664f3be96d5209830", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "0910af95b5d45964060d62162ebedba0dcc9a4c1", "080aebd2cc1019f17e78496354c37195560b0697", "043afbd936c95d0e33c4a391365893bd4102f1a7", "4f4ab1e37ce1cf6bd5bae36db01f2bfaf6aafc1d", "3dff11679346f5344af1018cad57fa14cc349f2f", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "4f86fa28602d9503a8575c5b31082284abc8415c", "94859f850f345629c23526e1155aa9deb1852491", "0546fa6622b8b8db8527be777a692d88c5c037b0", "215aa495b4c860a1e6d87f2c36f34da464376cc4", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "b293405e9b3cfac8c58083b38bdc85d18dd0c187", "0b7c1bcd0289058b5dfc0d3ff114972712bc7f1a", "05be0db01d70bcce9530b462ab2368f9e15127d9" ], "paperAbstract": "Many shared computing clusters allow users to utilize excess idle resources at lower cost or priority, with the proviso that some or all may be taken away at any time. But, exploiting such dynamic resource availability and the often fluctuating markets for them requires agile elasticity and effective acquisition strategies. Proteus aggressively exploits such transient revocable resources to do machine learning (ML) cheaper and/or faster. Its parameter server framework, AgileML, efficiently adapts to bulk additions and revocations of transient machines, through a novel 3-stage active-backup approach, with minimal use of more costly non-transient resources. Its BidBrain component adaptively allocates resources from multiple EC2 spot markets to minimize average cost per work as transient resource availability and cost change over time. Our evaluations show that Proteus reduces cost by 85% relative to non-transient pricing, and by 43% relative to previous approaches, while simultaneously reducing runtimes by up to 37%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064182", "http://www.pdl.cmu.edu/PDL-FTP/BigLearning/Proteus.pdf", "http://www.cs.cmu.edu/~15719/lectures/719-s17-spot-proteus.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21c24920914a8d781ffb43d08ba8f0d916968007", "sources": [ "DBLP" ], "title": "Proteus: agile ML elasticity through tiered reliability in dynamic resource markets", "venue": "EuroSys", "year": 2017 }, "21cc283d896411c5dbde9641daf52a88c8c455e5": { "authors": [ { "ids": [ "39941616" ], "name": "Soudeh Ghorbani" }, { "ids": [ "1758273" ], "name": "Brighten Godfrey" } ], "doi": "10.1145/3064176.3064201", "doiUrl": "https://doi.org/10.1145/3064176.3064201", "entities": [ "Baseline (configuration management)", "Causality", "Forwarding plane", "Naivety", "Programmer", "Scalability", "Software-defined networking", "Vector clock" ], "id": "21cc283d896411c5dbde9641daf52a88c8c455e5", "inCitations": [], "journalName": "", "journalPages": "32-47", "journalVolume": "", "outCitations": [ "3f0fe61d13447ebdc7222e568064532252fbb532", "6f4617c67263cb4e9a185a9a35781a9c4b3cc9d4", "05a618847e4f08e5bca29dff732757779722b2e0", "088536c44bcbc67165dc064ba4af0bc015d58a65", "1cbbdf58133f763813b3a61b8faf2f5ab74464b7", "089b10645ee63cd9c5bb4ab661141dd813408e15", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "12af8de1e914b50b369dfcfc5edfbf2e2daf4b31", "6c60edaa919c7a238345f866d7e20bee2897fb6c", "a786e8f50bdf579968ea73fa2817c30f80092b3c", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "27f4001214ce0d449eb05d33626f444526accc7c", "4c5d4902583313fea69033c6e528b95a42353da6", "6ed044b659e89eb05a438037692e0c79cea04b07", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "17d122f143726288da193a767fd0a7634010f0ff", "66cce3d35853a916a5105eb56b6b896b0a6ea036", "4534c15b4760cb29a0ce74fcd43297fe83f2f277", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "340cd62b8b448ee43c61dd75a0a16f5aebd69b10", "5b999d36d5230eca01532b357c7cf338a5e0d641", "3967126afbca6a722d7257cd671fe5e4979358a5", "663e064469ad91e6bda345d216504b4c868f537b", "6216b7501b5529e5844a287ae3dbccee9889189c", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "23c59835af89599cc630797e0c13bb0da3ed53bf", "16b0a02e2db3e3beb3dd0ce83e610549f271c9f1", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "85cacb208b2e3aa16fd39f75dc858d44092782b6", "3c92e739842e82bf03d32b1e7820c2c85f386d6f", "7cf59608ca721037e634bfa610a6044cc82724f4", "0fba2b923a713087a359a07be4c7ad19a9dd0a5d", "06b52f11e2d1bb9b8f019ddcaa0607db45fc23b8", "a7037e7bcc9b582a521d459fffb0b2d3ca0d927f", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "04384f21bd772f18c7afa73956bcd55010ae1a4d", "96d2a84e57ff1475394b7702473f3e8e868feb68", "22db0fcb79e145483e3a284206ba75f2ee45fb5b", "058f6752d85a517aae298586fdf117acdd7560ea", "08ddde0eaf4925704222135788f79fe293c5894d", "7b3533216d5064660458d3754a18fc69f8fbeba0", "4cd7f86b305d54ba3a2c988c6d06dd221a05935d", "1b15861b55906d8eabce1e3c9f156ade8d1b4783", "055598e1e221d0758c7d83e311516b44beeb7ced", "252b9ae134790148c054d99c46aa68c91d1cfd79", "025512dc14277759843f1f8ed96ced4eff4898c2" ], "paperAbstract": "A key use of software-defined networking is to enable scale-out of network data plane elements. Naively scaling networking elements, however, can cause incorrect behavior. For example, we show that an IDS system which operates correctly as a single network element can erroneously and permanently block hosts when it is replicated.\n In this paper, we provide a system, COCONUT, for seamless scale-out of network forwarding elements; that is, an SDN application programmer can program to what functionally appears to be a single forwarding element, but which may be replicated behind the scenes. To do this, we identify the key property for seamless scale out, weak causality, and guarantee it through a practical and scalable implementation of vector clocks in the data plane. We prove that COCONUT enables seamless scale out of networking elements, i.e., the user-perceived behavior of any COCONUT element implemented with a distributed set of concurrent replicas is provably indistinguishable from its singleton implementation. Finally, we build a prototype of COCONUT and experimentally demonstrate its correct behavior. We also show that its abstraction enables a more efficient implementation of seamless scale-out compared to a naive baseline.", "pdfUrls": [ "http://pbg.cs.illinois.edu/papers/ghorbani17coconut.pdf", "http://doi.acm.org/10.1145/3064176.3064201", "http://publish.illinois.edu/science-of-security-lablet/files/2014/05/Coconut-Seamless-Scale-Out-of-Network-Elements.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21cc283d896411c5dbde9641daf52a88c8c455e5", "sources": [ "DBLP" ], "title": "COCONUT: Seamless Scale-out of Network Elements", "venue": "EuroSys", "year": 2017 }, "21cfe47a6db94a2a2056a57bf4b321359d2cf1b8": { "authors": [ { "ids": [ "40400989" ], "name": "Matt Martineau" }, { "ids": [ "2080824" ], "name": "Simon McIntosh-Smith" } ], "doi": "10.1109/CLUSTER.2017.83", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.83", "entities": [ "Access time", "Algorithm", "Central processing unit", "Embarrassingly parallel", "FLOPS", "Graphics processing unit", "High memory", "Hyper-threading", "Linearizability", "Load balancing (computing)", "Locality of reference", "Mathematical optimization", "Memory bandwidth", "Monte Carlo", "Monte Carlo method", "Parallel computing", "Supercomputer", "Xeon Phi" ], "id": "21cfe47a6db94a2a2056a57bf4b321359d2cf1b8", "inCitations": [ "089524773af07999e8d6468a7653b2929243536b" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "498-508", "journalVolume": "", "outCitations": [ "22801f339af1a18e7520be75e3520314f6b49059", "c1cb6f92bde6b9aa7d338b13e5e0310a8cd726c1", "61e492b448c4f1d188f2835fb4eca96ba92aac28", "8401ec377d947f4f89361fd7b8a8758a9f66e82f", "db568a20e7d10e04182cd6223b5191d584ce0371", "089524773af07999e8d6468a7653b2929243536b", "cacb79732312eafcd9690e1033b371ed09123012", "ccc7980c9829964ae6201c5fd3a5ff41f8e51c96", "cb3e056f8d9833e8e4256f05a893c5abe2ddef93", "73dd2c3eead22a54b46754e252f525f8d74a22e9", "9a5261d7fc18f67e2d1e9e78d63de163c867e6cb", "9bf015b9e91e94c62073424a13ab131735e9f824", "45406ceb9d0e57674ad0bdb2c786520e731428bc", "38bc7fc62136ec779d91b86b6e960a06d67b4a97" ], "paperAbstract": "In this research we describe the development and optimisation of a new Monte Carlo neutral particle transport mini-app, neutral. In spite of the success of previous research efforts to load balance the algorithm at scale, it is not clear how to take advantage of the diverse architectures being installed in the newest supercomputers. We explore different algorithmic approaches, and perform extensive investigations into the performance of the application on modern hardware including Intel Xeon and Xeon Phi CPUs, POWER8 CPUs, and NVIDIA GPUs.When applied to particle transport the Monte Carlo method is not embarrassingly parallel, as might be expected, due to dependencies on the computational mesh that expose random memory access patterns. The algorithm requires the use of atomic operations, and exhibits load imbalance at the node-level due to the random branching of particle histories. The algorithmic characteristics make it challenging to exploit the high memory bandwidth and FLOPS of modern HPC architectures.Both of the parallelisation schemes discussed in this paper are dominated latency issues caused by poor data locality, and are restricted by the use of atomic operations for tallying calculations. We saw a significant improvement in performance through the use of hyperthreading on all CPUs and best performance on the NVIDIA P100 GPU. A key observation is that architectures that are tolerant to latencies may be able to hide the negative characteristics of the algorithms.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.83" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21cfe47a6db94a2a2056a57bf4b321359d2cf1b8", "sources": [ "DBLP" ], "title": "Exploring On-Node Parallelism with Neutral, a Monte Carlo Neutral Particle Transport Mini-App", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "21d388edb22709775a8d926b7d645fa60bb1fce0": { "authors": [ { "ids": [ "1694864" ], "name": "Jia Wang" }, { "ids": [ "3113725" ], "name": "Vincent Wenchen Zheng" }, { "ids": [ "1779706" ], "name": "Zemin Liu" }, { "ids": [ "2491583" ], "name": "Kevin Chen-Chuan Chang" } ], "doi": "10.1109/ICDM.2017.57", "doiUrl": "https://doi.org/10.1109/ICDM.2017.57", "entities": [ "Artificial neural network", "Baseline (configuration management)", "Confusion and diffusion", "Data model", "Deep learning", "Directed acyclic graph", "Feature learning", "Machine learning", "Recurrent neural network", "Topo" ], "id": "21d388edb22709775a8d926b7d645fa60bb1fce0", "inCitations": [ "b18f628cb949750036fff7bc66e50765532ac2fe" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "475-484", "journalVolume": "", "outCitations": [ "130271241a3718323e439440d897ec26acfebf06", "371d4ea1ba9260092bc4689cb152cc9b20ca5e55", "25f0625a92f6054b11057423111f9285c78376fe", "77e8fe0ea9d6e44e6bbe10cb87ed0c9dba860f1c", "1594d954abc650bce2db445c52a76e49655efb0c", "003d49152831204c029f8d7a4b9e4f154d6ed8ad", "1e9e87fc99430a82621810b3ce7db51e339be315", "652dc7c3a37f7d3ccc293f0a1187ca0888412dac", "272216c1f097706721096669d85b2843c23fa77d", "5854a866716cedf321e13a8158d87ad55ed3bbb2", "d7f9c3253552e13f24c3b73bc055ef60388af57c", "89ee56421940d3d84bb0e6a32d1e6b57d871ff59", "51b46f63038fb9571f6e922002e2f731824a09c3", "4e88de2930a4435f737c3996287a90ff87b95c59", "858cdf1ad6822e644c53c7751f9b5bda40104262", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "039b08ab33e94c18e4976bad3bc363a9f8d24213", "146f6f6ed688c905fb6e346ad02332efd5464616", "c2fd72cb2a77941e655b5d949d0d59b01e173c3b", "00f1ee4a4264480edc8355a26b8dfbbdfc2b33aa", "2059be0aa4a57d00d204c9ccdf4deeed2c984e07", "0710099aee18aabc0605c52628c29ac5ad94ec60", "503755661e0b15e4357a5bdd8e8070fc6cb2fb9f", "237c9373585695c40bcd227e45a9e1e683520f69", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "3145fc2e5cbdf877ef07f7408dcaee5e44ba6d4f", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "f4ef2c9a97c183b24d08ca67f6f55b98d441e8e6", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "0ca0b85603a5054a7d694d3b68ee00cc61e35804", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "1b7a0048801f9d43dc48a8f04367be813146b05a", "47310b4e14990becd5d473a07092ded4df2fbef1", "1b1df9f75ee6f27433687dad302387f811dab64d", "28ad3064ab8237dbc5c1e3d79451641e63fac2da", "097b2eb8f97076644243a3c6f1c958e444578854", "79db418b4db4acbbb76608498a4839b7ec62af43", "04b52c8230c3f9f4f4032b06458069d81c8f07b2", "1073c29baf6d9cfc7b7b028024b58ed070696da3", "1762baa638866a13dcc6d146fd5a49b36cbd9c30", "21968ae000669eb4cf03718a0d97e23a6bf75926", "05caf9e66947d863bbbc5ff72d6bd84705954111", "c8cee328b1774c2d38bea10f9fe9d081d8074307", "c5d7763707d3504f0dec73c39424ec688b8692a3", "4e63bd70f3515ea64b2fe295e1fe814b37d5ea90" ], "paperAbstract": "In this paper, we study the problem of using representation learning to assist information diffusion prediction on graphs. In particular, we aim at estimating the probability of an inactive node to be activated next in a cascade. Despite the success of recent deep learning methods for diffusion, we find that they often underexplore the cascade structure. We consider a cascade as not merely a sequence of nodes ordered by their activation time stamps; instead, it has a richer structure indicating the diffusion process over the data graph. As a result, we introduce a new data model, namely diffusion topologies, to fully describe the cascade structure. We find it challenging to model diffusion topologies, which are dynamic directed acyclic graphs (DAGs), with the existing neural networks. Therefore, we propose a novel topological recurrent neural network, namely Topo-LSTM, for modeling dynamic DAGs. We customize Topo-LSTM for the diffusion prediction task, and show it improves the state-of-the-art baselines, by 20.1%-56.6% (MAP) relatively, across multiple real-world data sets.", "pdfUrls": [ "https://arxiv.org/pdf/1711.10162v2.pdf", "https://arxiv.org/pdf/1711.10162v1.pdf", "http://arxiv.org/abs/1711.10162", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.57" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21d388edb22709775a8d926b7d645fa60bb1fce0", "sources": [ "DBLP" ], "title": "Topological Recurrent Neural Network for Diffusion Prediction", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "21dc008dc35b10244b3f50dbd688453c0be46daa": { "authors": [ { "ids": [ "26631698" ], "name": "Lian Du" }, { "ids": [ "2592610" ], "name": "Tianyu Wo" }, { "ids": [ "1894612" ], "name": "Renyu Yang" }, { "ids": [ "1767378" ], "name": "Chunming Hu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.44", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.44", "entities": [ "Copy-on-write", "Deployment environment", "Docker", "Holism", "Observable", "Scalability", "Software deployment", "System deployment" ], "id": "21dc008dc35b10244b3f50dbd688453c0be46daa", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "332-339", "journalVolume": "", "outCitations": [ "708a83bf3f47fb4bab7d63b7f53b19d10343095a", "02c29ed1b0b713877b5fbc41519698b5423c4bc6", "3a043714354fe498752b45e4cf429dbae0fb2558", "797ed8ca77ced9c785aa5e9af97d8d35e5d8aabb", "f2c6dc63068903a38ddadb19dff66c0928c5e387", "43d03d6e9b192af04733a4b288068cd03ec56e9c", "594710511ce2177ff7dbbc62fa75dbf14fc7ca26", "2a86965cb89d6936aceb272295eb98794f58ceb8", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "5e0a980d0cffc07dbff986b82a155f241dca3344", "4650259fb4aadb376fd5994f9ab9dd07a4f83511", "2da760f90c3d2bf6598becdde9063093f488548c", "8bad70e3087bf7b4579734ecafd452281dd1c764" ], "paperAbstract": "Container technology has been prevalent and widely-adopted in production environment considering the huge benefits to application packing, deploying and management. However, the deployment process is relatively slow by using conventional approaches. In large-scale concurrent deployments, resource contentions on the central image repository would aggravate such situation. In fact, it is observable that the image pulling operation is mainly responsible for the degraded performance. To this end, we propose Cider — a novel deployment system to enable rapid container deployment in a high concurrent and scalable manner at scale. Firstly, on-demand image data loading is proposed by altering the local Docker storage of worker nodes into all-nodes-sharing network storage. Also, the local copy-on-write layer for containers can ensure Cider to achieve the scalability whilst improving the cost-effectiveness during the holistic deployment. Experimental results reveal that Cider can shorten the overall deployment time by 85% and 62% on average when deploying one container and 100 concurrent containers respectively.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.44" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21dc008dc35b10244b3f50dbd688453c0be46daa", "sources": [ "DBLP" ], "title": "Cider: a Rapid Docker Container Deployment System through Sharing Network Storage", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "21df6ab1f5524636a6ae7517654730cf56be1f49": { "authors": [ { "ids": [ "3446403" ], "name": "Jack Doerner" }, { "ids": [ "2208880" ], "name": "Abhi Shelat" } ], "doi": "10.1145/3133956.3133967", "doiUrl": "https://doi.org/10.1145/3133956.3133967", "entities": [ "Access time", "Byte", "Computation", "Computational complexity theory", "Data structure", "Initialization (programming)", "Kibibyte", "Overhead (computing)", "Phelim Boyle", "Random access", "Secret sharing", "Secure multi-party computation" ], "id": "21df6ab1f5524636a6ae7517654730cf56be1f49", "inCitations": [ "4fa70702fbd03a0a783f9737f46c829ed3d77d16", "bf9580375a1595d77fe9630fbf68c37a67903b3c" ], "journalName": "", "journalPages": "523-535", "journalVolume": "", "outCitations": [ "5842e372976095e8dfb7bcc24fa57d41623435f8", "c19bc1e9f84d6baefec08caa1eba6bb41aa97e47", "15c6894c5a559346947f4437ec799005a76c9762", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "61a297247f899995789dc6e32bcf3972502374b8", "1043df5d98333e60cc909e090920735bcda5b162", "854e13ba80f65a7cfc87a656f31549fcc90323e9", "20b63210954f7c5a70664f301dcd7196856ccfa7", "01ca4dd53f226dff9da314cc35d2fa6ee1979e57", "593c4242cc02b94c9511cc05e67439725a5c08e1", "0a27a75f47af3cf52bdcd34f5b82bc9af7249c12", "cbf908e366aaefc57c6de9d608232a439b7e99c9", "13dd09fb6e6310ba7c9874fdd60016b0be2e94d2", "2960d451a630c31f8e9601accec44c90f1188cd4", "4d8e2657d6c9032c28ac4878a442e83dd99b672a", "74596dc8853d4b386018d514b4184b3ba679d118", "77a8cb33c757f361a87dfcc88a135086783ddb9b", "6929d63026d3b75bde47e81eadf706add6b41787", "3657ea546cc04bb4e618c56fbe8f26354bc8cbf4", "6b3aea37625702e98e5033e1107403e319b4df01", "46527c14457cf84d1cf26487d6b4c31f4825db71", "5fda77c2967f4ff3d767b0a3d35fd312a43a7eab", "10d5282a8d25c4490338d5fb8ad2f57b8646ad38", "c910d0aa5333466278e6ab3dc064eadc7ed424dd", "e5d880d51f76894a3fb3d01eff9e603aee9b9e13", "8164e5930ffea56c960e449386b7ac3b58b8eb39", "15c76f461543c44a8b9d8b32b2bbd18c595aea52", "0eefa33a1ad9118ba91a2e4a88e555b453a952f1", "362246709de205ec0ac5b34e07306839c38d5a3a", "475b10209d1ed13b079d62aca57ec31da4284bcd", "e73f1cc40702aac3ee6aa8f087fd3b0e700a2257", "127adf86474103b6f05afcc5bceda45bb5e34a8a", "1c2d161c5bb15efd73311d0a3223aee773d38cca", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "2e0b1d4c03b9ef87518bbbafecd9678dd8c645c9", "cfad565bf99030e75fde317171a7e03ecfed001f", "022d421adcba40d8a4e0051417bab919ec5405cd", "a98f1d700c715c5f5d405409eb5c28dfd806040c", "f98cfc3c092d69c068054698bcb4c1b6840644c6", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "db5be3b1ab35769a57c17521599e44a8d63bcf07", "6f6e16de3b99c67e9fcdf7a98f283880159ba590", "04325e9c0f526d655f51cc922c34718d22741faa", "2905a5c4da8c9a0970f078a211742316ef0ab77d", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "92eaba06af12761b5c64b84e6028d21cd05af9dd" ], "paperAbstract": "We design and implement a Distributed Oblivious Random Access Memory (DORAM) data structure that is optimized for use in two-party secure computation protocols. We improve upon the access time of previous constructions by a factor of up to ten, their memory overhead by a factor of one hundred or more, and their initialization time by a factor of thousands. We are able to instantiate ORAMs that hold 234 bytes, and perform operations on them in seconds, which was not previously feasible with any implemented scheme.\n Unlike prior ORAM constructions based on hierarchical hashing, permutation, or trees, our Distributed ORAM is derived from the new Function Secret Sharing scheme introduced by Boyle, Gilboa and Ishai. This significantly reduces the amount of secure computation required to implement an ORAM access, albeit at the cost of O(n) efficient local memory operations.\n We implement our construction and find that, despite its poor O(n) asymptotic complexity, it still outperforms the fastest previously known constructions, Circuit ORAM and Square-root ORAM, for datasets that are 32 KiB or larger, and outperforms prior work on applications such as stable matching or binary search by factors of two to ten.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133967", "https://eprint.iacr.org/2017/827.pdf", "http://eprint.iacr.org/2017/827", "https://acmccs.github.io/papers/p523-doernerA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/21df6ab1f5524636a6ae7517654730cf56be1f49", "sources": [ "DBLP" ], "title": "Scaling ORAM for Secure Computation", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "2205f353648f9ef13010d0f117cc34103d98f01f": { "authors": [ { "ids": [ "38816279" ], "name": "Martin Maas" }, { "ids": [ "1760896" ], "name": "Krste Asanovic" }, { "ids": [ "1717872" ], "name": "John Kubiatowicz" } ], "doi": "10.1145/3102980.3103003", "doiUrl": "https://doi.org/10.1145/3102980.3103003", "entities": [ "Cloud computing", "Computer vision", "Data center", "Garbage collection (computer science)", "High- and low-level", "Java", "Machine learning", "Platform as a service", "Python", "Runtime system", "Scala", "Substrate (electronics)" ], "id": "2205f353648f9ef13010d0f117cc34103d98f01f", "inCitations": [ "081fdeea36d4b56a71e87b5b0de191aa368261c8" ], "journalName": "", "journalPages": "138-143", "journalVolume": "", "outCitations": [ "866bce77ca5201d182c0c43090eb75bf126efba6", "a29afef550bf4edbf3293a50ef3fdb785ff1e5a3", "4256339f61d809e5092b68a505f7d37099cbd341", "2194c3460ab71f3826db00b045b2ae590c753319", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "676e50a4d2141ae66a0d2aafcf79c8c989fcce33", "43393a561914f05be312a1dff5a757cbc384d1a1", "5142c8273e0273230a10d836cdb26b029dc5cef5", "596df6fb4d50c7886948b08f525c4e3393d05a44", "210b791e74e2c262e2fc47622b56f248b805287d", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "225603198cc415d363db8a8a2bd30b0df3c963b1", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "28f7f43774bce41023f9912a24219e33612a3842", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "1eaa543205c3fc0cb4685f2c7e8a631fa7776a74", "28a9dca6faeead651539c700bef413203b2b876e", "7129b305ce45f83127e928e8510da9fae0783905", "228c64750a11823a712f7414711e3b073b861c28", "1aa7ef5accd88ea4dd10a4576fcff7ecef6c9756", "45fcaf11eaf31228a218a24663067dab509a1031", "5cfc936d12bbd8a0f100687b12b20e406215f30a" ], "paperAbstract": "The public cloud is moving to a Platform-as-a-Service model where services such as data management, machine learning or image classification are provided by the cloud operator while applications are written in high-level languages and leverage these services.\n Managed languages such as Java, Python or Scala are widely used in this setting. However, while these languages can increase productivity, they are often associated with problems such as unpredictable garbage collection pauses or warm-up overheads.\n We argue that the reason for these problems is that current language runtime systems were not initially designed for the cloud setting. To address this, we propose seven tenets for designing future language runtime systems for cloud data centers. We then outline the design of a general substrate for building such runtime systems, based on these seven tenets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103003", "https://people.eecs.berkeley.edu/~maas/papers/maas-hotos17-cloud30.pdf", "https://people.eecs.berkeley.edu/~kubitron/papers/holistic_runtime/maas-hotos17-cloud30.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2205f353648f9ef13010d0f117cc34103d98f01f", "sources": [ "DBLP" ], "title": "Return of the Runtimes: Rethinking the Language Runtime System for the Cloud 3.0 Era", "venue": "HotOS", "year": 2017 }, "2206c95041c87485666fad591fb2f4443dbb5f8e": { "authors": [ { "ids": [ "2825891" ], "name": "Giorgis Georgakoudis" }, { "ids": [ "1696759" ], "name": "Ignacio Laguna" }, { "ids": [ "1698312" ], "name": "Dimitrios S. Nikolopoulos" }, { "ids": [ "1772965" ], "name": "Martin Schulz" } ], "doi": "10.1145/3126908.3126972", "doiUrl": "https://doi.org/10.1145/3126908.3126972", "entities": [ "Compiler", "Experiment", "Fast Infoset", "Fault injection", "Fax", "Machine code", "Mental representation", "Supercomputer" ], "id": "2206c95041c87485666fad591fb2f4443dbb5f8e", "inCitations": [], "journalName": "", "journalPages": "29:1-29:14", "journalVolume": "", "outCitations": [ "185bd6d5af538cd2c4b8c863829baa88ffd1ef11", "599330fdb065760d76471232616706fb7d434271", "31f4bdde3501a9d52499668bf67f548220afbb79", "7bdbd4ae0c3e4503cc42f20afa673f255d04aa00", "6e7d3595414889e6a1edaf4d2fbfe2c2cdca21ac", "01c365d6a5d54a1e723c8a6de581162b8acb197e", "1b61f857c29d1e0a56dbc65ff1dcefec66e73cbe", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72", "8eae6e0b818dbb9bb1664ac85f1de1b9f11549df", "5616fb8a969587c3ff274b79efd8357ff2c9f79f", "5e3fb6a4514550dbdb1bfeb4e5705e4a7ffcc84f", "2194c3460ab71f3826db00b045b2ae590c753319", "ab2c7421a3f7fda4a95b6db79261f499d55bca32", "7f6c49645686f4814c01aca621341a0b244898b6", "ac7b0302d527e19cc4988d0482d62755d52fcf25", "674b9ffeb40cd64063bd1a0c69370f83b802ed76", "a7739333971a94a9b4e67248aa6bfb22f8c274f7", "1ea99fa4bd602a20137f86e34d290e4194781b85", "87853ba3a1cc43708deecc95d2ecad03fd8a0948", "52e1f3c9d0d4a84d12f100b4934ff71f9976f295", "bcebac9534876de44268c9ed6be701458c3008c0", "20c146b68b6a3cd15a187a788c6ad27ba994ff79", "0e00a3e0b0120dcdb89f0ee03534643090235ff5", "18992850afed53b60ce696e20374a1e1b3d9da22", "c1307a1e01a7d1832d119d0140845dc9a827c432", "a0fd149d853373fbc2982cee3b667fd5889be917", "0af7b9623e35555710f3a30177c5b0c61e4e30af", "0a84622ac7743998763aa8f5d1d1c04918bc6230", "01f0f1302cb3c1b23fd88fb75801c05e079b2d80", "2c8c0afd9114624f7c18858f9744819211ba4faa", "42f36452fe7ad5c61d2afe4955c50a69b47ad7ed", "007630d085e968b552b0baf7406d0a9b1695f553", "39ef5d362200126497b2f74c33338383dcc9589c", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "7072186e5f3d1009c7c8a0afea82734743584850", "32ef8d891edde06cc01357fa5c4d1ab7fe631720" ], "paperAbstract": "Compiler-based fault injection (FI) has become a popular technique for resilience studies to understand the impact of soft errors in supercomputing systems. Compiler-based FI frameworks inject faults at a high intermediate-representation level. However, they are less accurate than machine code, binary-level FI because they lack access to all dynamic instructions, thus they fail to mimic certain fault manifestations. In this paper, we study the limitations of current practices in compiler-based FI and how they impact the interpretation of results in resilience studies.\n We propose REFINE, a novel framework that addresses these limitations, performing FI in a compiler backend. Our approach provides the portability and efficiency of compiler-based FI, while keeping accuracy comparable to binary-level FI methods. We demonstrate our approach in 14 HPC programs and show that, due to our unique design, its runtime overhead is significantly smaller than state-of-the-art compiler-based FI frameworks, reducing the time for large FI experiments.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126972" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2206c95041c87485666fad591fb2f4443dbb5f8e", "sources": [ "DBLP" ], "title": "REFINE: realistic fault injection via compiler-based instrumentation for accuracy, portability and speed", "venue": "SC", "year": 2017 }, "220eb711e0efd2c67759f169ff14ba3efc186bbe": { "authors": [ { "ids": [ "2103014" ], "name": "Giulio Malavolta" }, { "ids": [ "2970940" ], "name": "Pedro Moreno-Sanchez" }, { "ids": [ "1828965" ], "name": "Aniket Kate" }, { "ids": [ "4436634" ], "name": "Matteo Maffei" }, { "ids": [ "40020483" ], "name": "Srivatsan Ravi" } ], "doi": "10.1145/3133956.3134096", "doiUrl": "https://doi.org/10.1145/3133956.3134096", "entities": [ "Bitcoin", "Blocking (computing)", "Composability", "Concurrency (computer science)", "Deadlock", "Hop", "Non-blocking algorithm", "Performance Evaluation", "Privacy", "Product change notification", "Provable prime", "Provable security", "Smart contract", "Software deployment", "Throughput", "Time complexity", "Universal composability" ], "id": "220eb711e0efd2c67759f169ff14ba3efc186bbe", "inCitations": [ "f41539e1d1dee746327b1273116b6057042fd935", "197ac36ed5dea16e31f7e5058b5ad7318b4a7e63" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "820", "journalVolume": "2017", "outCitations": [ "00e3756119a91432622f6982b59ecd24a1340fbe", "51b27a41ca1a33445a1041fcea84341fcf0b8c4c", "45f95b64995d014d376512e3123427696d1287fd", "4392166a1194010c844ec915694fd5c56da94301", "32be75d8ffb0bf3ff2c8c84e12054edc51e55bd2", "26ab9c27d995dadd553614045361ffb1afba9008", "0a1ab83fc0655ef514864bc8af79711d1224ce36", "c06b756b4b6cb9575d931317f7d2eedd3e0c14fe", "4dc3dc96281fee8c676578546c98c295219aa7fc", "2583f8725e17a63187edc72bbacfa43d95dec9ec", "113af6bde95ddf055cd7cf89a9a3d217e5251479", "91d19287bf52222dc7dd8a662df40ce8941110ff", "1144078fe05a113c02d068962be9d17d0f2b9e53", "ac72566bbc7628255002a70ca5bec0874929eba4", "7fb2a15dcb26caaef8eba66b36918f4edbb870af", "011e01d92026acc87dcc3963f305dc2a5d3b9cb1", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "40b535e5579612cff48c45a071c003853ce996ee", "e7ab23d011e5183db78cfea48e303210f6e57e2e" ], "paperAbstract": "Permissionless blockchains protocols such as Bitcoin are inherently limited in transaction throughput and latency. Current efforts to address this key issue focus on off-chain payment channels that can be combined in a Payment-Channel Network (PCN) to enable an unlimited number of payments without requiring to access the blockchain other than to register the initial and final capacity of each channel. While this approach paves the way for low latency and high throughput of payments, its deployment in practice raises several privacy concerns as well as technical challenges related to the inherently concurrent nature of payments that have not been sufficiently studied so far. In this work, we lay the foundations for privacy and concurrency in PCNs, presenting a formal definition in the Universal Composability framework as well as practical and provably secure solutions. In particular, we present Fulgor and Rayo. Fulgor is the first payment protocol for PCNs that provides provable privacy guarantees for PCNs and is fully compatible with the Bitcoin scripting system. However, Fulgor is a blocking protocol and therefore prone to deadlocks of concurrent payments as in currently available PCNs. Instead, Rayo is the first protocol for PCNs that enforces non-blocking progress (i.e., at least one of the concurrent payments terminates). We show through a new impossibility result that non-blocking progress necessarily comes at the cost of weaker privacy. At the core of Fulgor and Rayo is Multi-Hop HTLC, a new smart contract, compatible with the Bitcoin scripting system, that provides conditional payments while reducing running time and communication overhead with respect to previous approaches. Our performance evaluation of Fulgor and Rayo shows that a payment with 10 intermediate users takes as few as 5 seconds, thereby demonstrating their feasibility to be deployed in practice.", "pdfUrls": [ "http://diyhpl.us/~bryan/papers2/bitcoin/Concurrency%20and%20privacy%20with%20payment%20channel%20networks%20-%202017.pdf", "https://eprint.iacr.org/2017/820.pdf", "http://eprint.iacr.org/2017/820", "http://doi.acm.org/10.1145/3133956.3134096" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/220eb711e0efd2c67759f169ff14ba3efc186bbe", "sources": [ "DBLP" ], "title": "Concurrency and Privacy with Payment-Channel Networks", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "2217d3548de60fe0725cdc6185bfe2082b213f73": { "authors": [ { "ids": [ "2691974" ], "name": "Carsten Binnig" }, { "ids": [ "34627565" ], "name": "Lorenzo De Stefani" }, { "ids": [ "1746961" ], "name": "Tim Kraska" }, { "ids": [ "1735099" ], "name": "Eli Upfal" }, { "ids": [ "1945956" ], "name": "Emanuel Zgraggen" }, { "ids": [ "3361009" ], "name": "Zheguang Zhao" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Mechanical Turk", "The Simpsons", "The Turk" ], "id": "2217d3548de60fe0725cdc6185bfe2082b213f73", "inCitations": [ "4b8c0324a58b954c3616d3cb1fae91f74f21f193", "12cd78b918eb09165844503a3a199752115a0657", "8995a40dbe8d976a04c055fc39f201d6443abdcf", "49cfadb861b0742040620009d1f39a4481becc6e", "a6613fc6e0f8dd9a538ea901db2317b4ce3644bb", "4119c775ac55f9400d5f2e183d5dd1723bb9d0f0", "5209f0dfbde07f3724ce7bce128b850b6c00780c", "ad8c152b67cdd6ad302414f1fa89824f1b92638b", "01cecc93c5b8c0268399ee0530582c0e89f3b5c8", "0e2cc73acdb6defe6d4f97360c91f84f1bf56a00" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6d05318d6ac2b5b902adf7db160de523d5902430", "0749973bd7f29e6f580faed564acdc9127426fcd", "6b87ac0501d93b98012bf35ea3825b1e9cc47a8e", "3076e152ea6e356bce6b0bf237565d303b863760", "a20d44cb70c3321df06ebc89a5273302c403d341", "183370b371f15df5c11bc4ba61beaf1d828d0287", "e5f740dfd69faf70e1e8b2018cd5a1001abc35ea", "01e1fa7924b3eb76b73f1828c93805f3ba028bae", "78349552b43ab9c73683e4a1253d9c4a9d61395a", "000953906a54de7c81b56b25f08c24bd83567d57", "29aef6a4d9f37d92b613170d29eee3ec319965d3", "b4266adb19b472422dc722c53c76b8ce6336781e", "0d22b6cab566e5dc5bdab95742ee51c8981d5422", "079e2aeb5f693ec505d3e262c5c3df7c1afd94e7", "1bbb160a886b61113f3ce494af055d1568e30594", "4c815e21c909211d7c047a2437938b24217e0a22", "1f5396ae6896885766a7cd75dce6692f7e36e222", "a36b028d024bf358c4af1a5e1dc3ca0aed23b553", "55fa55c379ad6a6c126f663cb9b169085b4b6c53", "9456b6c9e40567cdd884fab2c443d4ed653d7c4c", "e03fcae87b049671f979106c215bd985475e3f7e", "2c972f5a08e3dedf37f45b35ce17ae80c2139c1c", "9a94e1981c4ed1429ac66e244dc0c042e31097ca", "6920a43dc904b8e7d3868c9f906b10aaf3fd72b3", "4969f8f0feeb77128bd96cd15fc6ee323fb0f653", "0965fdce2189aced677903db4641fb3f09a5f7e9", "cccbc3da776de497ca9d0dde2d4a76dc6c1b0fc4", "4e3c1f3904d4b5404a03b6101370841f7c4798d5", "1317f7e3d1de6ffd0888303ca95d9c8c6bae2af3", "0d1346ced4b1b04765573433bfad5c328fd4e734" ], "paperAbstract": "Have you ever been in a sauna? If yes, according to our recent survey conducted on Amazon Mechanical Turk, people who go to saunas are more likely to know that Mike Stonebraker is not a character in \u201cThe Simpsons\u201d. While this result clearly makes no sense, recently proposed tools to automatically suggest visualizations, correlations, or perform visual data exploration, significantly increase the chance that a user makes a false discovery like this one. In this paper, we first show how current tools mislead users to consider random fluctuations as significant discoveries. We then describe our vision and early results for QUDE, a new system for automatically controlling the various risk factors during the data exploration process.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p56-binnig-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2217/d3548de60fe0725cdc6185bfe2082b213f73.pdf", "s2Url": "https://semanticscholar.org/paper/2217d3548de60fe0725cdc6185bfe2082b213f73", "sources": [ "DBLP" ], "title": "Toward Sustainable Insights, or Why Polygamy is Bad for You", "venue": "CIDR", "year": 2017 }, "221ceab1a74c8a9fa4e36a1bec2754e745e36a63": { "authors": [ { "ids": [ "2238476" ], "name": "Jeffrey C. Mogul" }, { "ids": [ "36224817" ], "name": "Rebecca Isaacs" }, { "ids": [ "17927692" ], "name": "Brent Welch" } ], "doi": "10.1145/3102980.3102983", "doiUrl": "https://doi.org/10.1145/3102980.3102983", "entities": [], "id": "221ceab1a74c8a9fa4e36a1bec2754e745e36a63", "inCitations": [ "5ae5348b9558729b98a70a7abed4145adeb45bbe" ], "journalName": "", "journalPages": "12-17", "journalVolume": "", "outCitations": [ "2de63b0c867b290d4f7217459c968aa98e5ad39d", "2acf4ea8ebbb795965bf05c155fb90b6ff926f26", "1521e801e8e08ecec3b0baabb07f9a6ce0a67a85", "40a00e89195903fbaffb364fe410a215faf6715c", "1c8195cadc7ad4a8b59b16fe77574dd6d160d7d2", "1d92def3f4f637ae028828e5a2343a590de8df48", "5c83882077fb84a624f8ad8b1f21e6a02eed408c", "4af63ed343df388b6353b6fc77c7137d27822bf4", "00c181b8b64e824fbe0172339f1e4560b557fab5", "50622af443f6d2f4db4a1f4bc43e0ff11a693a1d", "7d53405b1436c42fe086776892128294e430d01f", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "07bab5c94c3d096b21f81d05be081ea36812f3ef", "4be1be822928a0aeb277412bad1f20f350deb609", "48326c5da8fd277cc32e1440b544793c397e41d6", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "9046b8e109072c6286e60021009572e6a1032aa5", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "50dc3bb8c5b8f7bed0c6d6231e042f56ad6af1d4", "2dbe6e8a70c9c4340435cf09231f32fd7f49a1e1", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "3386a3417920dd16efec5459b9b48930ece73dd8", "277a2794d9ea67cc0657b019c2e77faa3b37eddf", "454dd673096a64d5ed41e4afe246ff4059a40a1a", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0cdd02d817ab76ccb7e6ccc4d39144a80faa80ee", "768b3ea7980bac9daf8fbd370026b004616eb770", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "06faf0e1f1b6c7bf786cdeb474e3dc6a3f5435c3" ], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102983", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46181.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/221ceab1a74c8a9fa4e36a1bec2754e745e36a63", "sources": [ "DBLP" ], "title": "Thinking about Availability in Large Service Infrastructures", "venue": "HotOS", "year": 2017 }, "222e7666dd37307b600b0a3ebc9b6d28fb51d6e9": { "authors": [ { "ids": [ "3427189" ], "name": "Steffen Maass" }, { "ids": [ "7761504" ], "name": "Changwoo Min" }, { "ids": [ "1909974" ], "name": "Sanidhya Kashyap" }, { "ids": [ "2654775" ], "name": "Woon-Hak Kang" }, { "ids": [ "39510203" ], "name": "Mohan Kumar" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" } ], "doi": "10.1145/3064176.3064191", "doiUrl": "https://doi.org/10.1145/3064176.3064191", "entities": [ "Algorithm", "Central processing unit", "Coprocessor", "Graph (abstract data type)", "Graph (discrete mathematics)", "Iteration", "Line graph", "Locality of reference", "NCSA Mosaic", "Out-of-core algorithm", "PageRank", "Solid-state drive", "Xeon Phi" ], "id": "222e7666dd37307b600b0a3ebc9b6d28fb51d6e9", "inCitations": [ "c7e2c4bea500ea7926a50973d861f01bb8e5e364", "26b4bb926d9928bcf1c3e8916c8674ef2dcf6fb2", "356100b33d589bb48fa1a6518a85efb551a13d9b", "2f9b520b16e05de8e705e374ebea3a121d64ebc2", "896134c7aa767e27cb3c3aa0662b335473923602", "69c6a91d6cb6807a7c730bd1445209737361d7b8", "f84aa869a21f083133b74e23d83ab2dd1378b7ff", "12d3a826eaf6a53bcd299dde7e3de700d387ce17" ], "journalName": "", "journalPages": "527-543", "journalVolume": "", "outCitations": [ "e2462bde978023a9069cc08326f626135a95cb89", "4339f17c10b91d2def6e16ab981d7b5428e6d82c", "8e67d1085da29e5aa1e758751bfa5469ac07023e", "1156f60e40548096df49528b1342bb3e88b0f378", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "0706356c9ab6014d6b04577d38289ea8328291a5", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "26deee037b221bd05ed34461819f5c067b745445", "586414efa54ba9f4a7def0dc5322b7723f22c552", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "ce18973fb7c23cb4fc1c1a61c1c1c4333f4abad1", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "13f7df91eb208a387d18fbad192c6f0f834f0b82", "254ded254065f2d26ca24ec024cefd7604bd74e7", "1141ff370d51c25ae17709ae9131097313215e18", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "047565a5b15fbebc78e0bc7d8ca823237dac9de2", "b364f870dba706912ebc530ebd92ddf961b3f141", "162e4c9d52af580b9d21ec1a631dfc25d4cd150b", "3486aeaf540c48952120fe853d672af984f40a6a", "0ad8e89091eed09217e66adc98136126addc2619", "2a17c90ed723d6a14415cc1f677a5c0aa512f501", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "141004dee9e799b40bfaf50b4a72618613137250", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0d06de003e8ca949b3b39f9a51750c050addb997", "3dff11679346f5344af1018cad57fa14cc349f2f", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "400c8f6d1bc0284b887f3f6412e07f9be70650f8", "0608d9937c074520cdc93cc444cc1c77039c5332", "0f014693b25d9846025219b88f8ca480fac68b0a", "b513711621e81d0abd042e0877ca751581a993f5", "3978225e3bee62839488dd3b92468fb40d12969f", "9207a7356d90343b5107e3e445fa7de86f3078eb", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "110b55a017f52abfedca220036ea129d84b7cadc", "11fff0d9f39f7e3187dee5f2b9d54fec13b9c192", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "29cf5904d4f2daf900aa309165a95fe4915d31b4", "ed618cb03388592219020d3726517d9610188302", "3231d62bec8e8cc1d837e85893889855767c3b13", "179cef3727082d923b20874bf5d23e6cf74c6026", "0f5a4d43622bea9de50abfe14f42345b78df28ac", "027485f716ca4f6d9ee2e189790d6560e37fcab2", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d" ], "paperAbstract": "Processing a one trillion-edge graph has recently been demonstrated by distributed graph engines running on clusters of tens to hundreds of nodes. In this paper, we employ a single heterogeneous machine with fast storage media (e.g., NVMe SSD) and massively parallel coprocessors (e.g., Xeon Phi) to reach similar dimensions. By fully exploiting the heterogeneous devices, we design a new graph processing engine, named Mosaic, for a single machine. We propose a new locality-optimizing, space-efficient graph representation---Hilbert-ordered tiles, and a hybrid execution model that enables vertex-centric operations in fast host processors and edge-centric operations in massively parallel coprocessors.\n Our evaluation shows that for smaller graphs, Mosaic consistently outperforms other state-of-the-art out-of-core engines by 3.2-58.6x and shows comparable performance to distributed graph engines. Furthermore, Mosaic can complete one iteration of the Pagerank algorithm on a trillion-edge graph in 21 minutes, outperforming a distributed disk-based engine by 9.2×.", "pdfUrls": [ "https://sslab.gtisc.gatech.edu/assets/papers/2017/maass:mosaic-slides.pdf", "http://doi.acm.org/10.1145/3064176.3064191", "https://taesoo.gtisc.gatech.edu/pubs/2017/maass:mosaic.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/222e7666dd37307b600b0a3ebc9b6d28fb51d6e9", "sources": [ "DBLP" ], "title": "Mosaic: Processing a Trillion-Edge Graph on a Single Machine", "venue": "EuroSys", "year": 2017 }, "22428cfc23d0925069f650f9f5f6a117debea665": { "authors": [ { "ids": [ "39889004" ], "name": "Michael D. Adams" }, { "ids": [ "1993034" ], "name": "Matthew Might" } ], "doi": "10.1145/3133906", "doiUrl": "https://doi.org/10.1145/3133906", "entities": [ "Automaton", "Context-free grammar", "Context-free language", "Data pre-processing", "Interaction", "JavaScript", "Order of operations", "Tree automaton" ], "id": "22428cfc23d0925069f650f9f5f6a117debea665", "inCitations": [ "0b58636ea52ac55ce61c31f96347000be48158bc" ], "journalName": "PACMPL", "journalPages": "82:1-82:25", "journalVolume": "1", "outCitations": [ "4ea9629dcf355ab3d38bae9e32d731b1fe4d7946", "578a6624343777891279094161a0c92da7a05f1b", "1ad2fe5d7d1fcd00218266898fe5371ab598639e", "93a6a7319dbbe63fe68f5bac5b5f4518ff5b14f2", "116af0cc4999896ddc511b353f98d64f7826d6cb", "fc230d6b4e6d275bff21b64dd0f457f07a92055f", "560fdc87fc0137c17d530d826d22dd304de33d7b", "74a0e8f05c1bd8d69574278764cc41520f3398af", "40113fe26c371902c2356364d3d0c662a51a3740", "9c403ff3348358c98e9257bc758887465d61fd13", "1cc8cf5ae368d2987bda78d15d30d371ab9e2e25" ], "paperAbstract": "Precedence and associativity declarations in systems like <pre>yacc</pre> resolve ambiguities in context-free grammars\u00c2 (CFGs) by specifying restrictions on allowed parses. However, they are special purpose and do not handle the grammatical restrictions that language designers need in order to resolve ambiguities like dangling <pre>else</pre>, the interactions between binary operators and functional <pre>if</pre> expressions in ML, and the interactions between object allocation and function calls in JavaScript. Often, language designers resort to restructuring their grammars in order to encode these restrictions, but this obfuscates the designerâ\u0080\u0099s intent and can make grammars more difficult to read, write, and maintain. \nIn this paper, we show how tree automata can modularly and concisely encode such restrictions. We do this by reinterpreting CFGs as tree automata and then intersecting them with tree automata encoding the desired restrictions. The results are then reinterpreted back into CFGs that encode the specified restrictions. This process can be used as a preprocessing step before other CFG manipulations and is well behaved. It performs well in practice and never introduces ambiguities or LR(k) conflicts.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133906" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22428cfc23d0925069f650f9f5f6a117debea665", "sources": [ "DBLP" ], "title": "Restricting grammars with tree automata", "venue": "PACMPL", "year": 2017 }, "224e0bdb201a6953a5fc6ffec587d5963dcb0c29": { "authors": [ { "ids": [ "2159885" ], "name": "Yuchen Li" }, { "ids": [ "3232294" ], "name": "Ju Fan" }, { "ids": [ "2712862" ], "name": "Dongxiang Zhang" }, { "ids": [ "1688848" ], "name": "Kian-Lee Tan" } ], "doi": "10.1145/3035918.3035952", "doiUrl": "https://doi.org/10.1145/3035918.3035952", "entities": [ "Approximation algorithm", "Best-effort delivery", "Computation", "NP-hardness", "Personalization", "Sampling (signal processing)", "Social network", "Speedup" ], "id": "224e0bdb201a6953a5fc6ffec587d5963dcb0c29", "inCitations": [ "53e6a2664eda8036576f77eda5134fa7d3640f31" ], "journalName": "", "journalPages": "619-634", "journalVolume": "", "outCitations": [ "084419435227a5bbf54d47ba54e11739756510fd", "abb152802d5b4686a394e221abe951187ea06158", "81aa239cb8f106a379f7571ac7b1498fb58ec712", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "4cc352fa9dc7fc87389ce95e6779a2603f4ee87f", "43a068652277ca9213202d984a69965e7fec5dd9", "8c1fa3949409eb65017a4625a7351039f72ebf04", "50ef0cbcc318ec98c7d6132b9c539e9cf41db010", "1de0168bb79272d3faafdcda5c9bb4b2a00a15d8", "6f8a2339aabb699d8810baed2564744ca837f009", "360a12e2f5c56e53cc97007678ee97dd85ecef14", "33679dcb1fa8873c7be583c45cd9982cd73b5db3", "706c83309fa09454a136d4e607364b27be66172c", "6be7b1817e67a773d7f17601d7bf6024b81435a8", "239c6ebf6172f8f34e9d103ca1c10056d532be09", "28bf0df09f97e7ef9108e71b45fe1b9a7aa201e2", "404d38143b1078fcb4329f6addbd781d2a99f9b0", "2ee8c773ef22776b0113c78325ca76b701cbb78e", "0a888fadf7f8ac8eb0f194f7979e421ce072bcc4", "182dae04254e79a8cd6acee81114a686c8ca1cec", "094cbfa06f8374b49b84524a466a63d34c9ef34f", "21968ae000669eb4cf03718a0d97e23a6bf75926", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "2e9b18872712817e5d98bc54ea6ee8a7bd66fbeb", "41c8ff7475225c9dfe9cedc25c038e9216345cc6", "1ad8410d0ded269af4a0116d8b38842a7549f0ae", "1f559f2eb174d05a912b2ec39a48eadfd0160b74", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "6eccbba04f448fa5bc93ed94bc63bb03d36e114c", "34ee11059d693642fa7a5376e96e147539a997b1", "fa4e93dbcdd1e4f20db9aeb34699a414420b7fd9", "0b95749c8751d46c3a15735e914ccc1e0f8590af", "a0bcdf26fcd382dd54ac38be07e6b33179bd52f4", "23d85a0008429845870780c6db3640c05165acaf", "1339b888fb739b8a1194fa94fbe848e01d93456a", "506b932cff9383319305cb29ec5855aebda059ba", "7909dc85b5d3016100bd6cba675f70de3a5e9413", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "02678fe75012986ee59be79b628dd0e0cea0a9f1", "89927a37854b2d479cf7ae51016faed9222a14ed" ], "paperAbstract": "Social influence has attracted significant attention owing to the prevalence of social networks (SNs). In this paper, we study a new social influence problem, called personalized social tags exploration (PITEX), to help any user in the SN explore how she influences the network. Given a target user, it finds a size-k tag set that maximizes this user's social influence. We prove the problem is NP-hard to be approximated within any constant ratio. To solve it, we introduce a sampling-based framework, which has an approximation ratio of 1-ε over 1+ε with high probabilistic guarantee. To speedup the computation, we devise more efficient sampling techniques and propose best-effort exploration to quickly prune tag sets with small influence. To further enable instant exploration, we devise a novel index structure and develop effective pruning and materialization techniques. Experimental results on real large-scale datasets validate our theoretical findings and show high performances of our proposed methods.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~a0047194/paper/tr2017-pitr.pdf", "http://doi.acm.org/10.1145/3035918.3035952" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/224e0bdb201a6953a5fc6ffec587d5963dcb0c29", "sources": [ "DBLP" ], "title": "Discovering Your Selling Points: Personalized Social Influential Tags Exploration", "venue": "SIGMOD Conference", "year": 2017 }, "2259f2a247fd7238f2b4c1741b13d533ccfe1a48": { "authors": [ { "ids": [ "6632192" ], "name": "Vinson Young" }, { "ids": [ "1936310" ], "name": "Prashant J. Nair" }, { "ids": [ "1740036" ], "name": "Moinuddin K. Qureshi" } ], "doi": "10.1145/3079856.3080243", "doiUrl": "https://doi.org/10.1145/3079856.3080243", "entities": [ "CPU cache", "Cache (computing)", "Dynamic programming", "Dynamic random-access memory", "Gigabyte", "Kilobyte", "Map", "Operating system", "Oracle Spatial and Graph", "Overhead (computing)", "Single-access key", "Time-slot interchange" ], "id": "2259f2a247fd7238f2b4c1741b13d533ccfe1a48", "inCitations": [ "9425569e47fec9706d62de99f56596b35addb77e", "999597f2a0a3d943f116a78505343a3b415f68d3" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "627-638", "journalVolume": "", "outCitations": [ "811ef5d7d64b1847835d67bc0a5fc7e0c65ed259", "5ab45c82a811162dc04efc9eea60f9b22b1e5a11", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "94973810b159138f16577179daf63fc3c19f3224", "417ab9b8b003982222017ef585e19680366609f3", "70a1daa3de288b42fd23cb4fed72663d3768815d", "ed20a5a4fc56c771f2d1d78f3730e3afc495b1f2", "3f538f597a457beb36f3137974455be3456d70fb", "1121c104e9d2951462f67f8eb364f043fe9a65e5", "014ba063a3721973ba6af6503232d4d21d1456bb", "281fdbdafbf8f2bcbde46099656063e5c82ce222", "2c0dc8ea2b2dba866f54ec6b42a7ecc823e33997", "28552ecf4eaedb3461edca97304b29082b02fbab", "d4e153d0ff33cb15cd6c13570599c6c36cc78db5", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "12bc20a1963859e9f76afb4b308b90ded1cff1fe", "309ad0357af7722a24192781340881390055a3db", "5a8ec73d07b6c4b37ca51e0025c79f288424d422", "3651b37643aec6cc071254ae898790e9eb6cc055", "607e98b19885a08c20e948f1b00387876a983fd1", "12d6da762b2a5d512d383f3b587bd30c23c3df97", "98ab001452b8392bb0d0b2677cfb91281bad7708", "314ddf7416f36203eb4287b52f9841c22ac1c8a7", "02febacc509320c0390a18e4975132e3a44018e7", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "2bfdc5c3d3da06843ecc0b8d04e54acff419487a", "bc51f1d149cd03f281210e293bd2b1d3413760f3", "1c8302c3bea2a72dffb4b43dfa438b7c4d19e988", "052095bb131a0942053be4ff4097b41c429c7e65", "18633256bb17ba0744518479c0752ca87f0d03c6", "d47ab6dc259a57124cb9b86493147bbc04162dbd", "bd64635ee260c3fec8589f6af402b92db8142c15", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "7c94e33347142e1a4046fe3dc5c7f63cbf3d6b73" ], "paperAbstract": "This paper investigates compression for DRAM caches. As the capacity of DRAM cache is typically large, prior techniques on cache compression, which solely focus on improving cache capacity, provide only a marginal benefit. We show that more performance benefit can be obtained if the compression of the DRAM cache is tailored to provide higher bandwidth. If a DRAM cache can provide two compressed lines in a single access, and both lines are useful, the effective bandwidth of the DRAM cache would double. Unfortunately, it is not straight-forward to compress DRAM caches for bandwidth. The typically used Traditional Set Indexing (TSI) maps consecutive lines to consecutive sets, so the multiple compressed lines obtained from the set are from spatially distant locations and unlikely to be used within a short period of each other. We can change the indexing of the cache to place consecutive lines in the same set to improve bandwidth; however, when the data is incompressible, such spatial indexing reduces effective capacity and causes significant slowdown.\n Ideally, we would like to have spatial indexing when the data is compressible and TSI otherwise. To this end, we propose Dynamic-Indexing Cache comprEssion (DICE), a dynamic design that can adapt between spatial indexing and TSI, depending on the compressibility of the data. We also propose low-cost Cache Index Predictors (CIP) that can accurately predict the cache indexing scheme on access in order to avoid probing both indices for retrieving a given cache line. Our studies with a 1GB DRAM cache, on a wide range of workloads (including SPEC and Graph), show that DICE improves performance by 19.0% and reduces energy-delay-product by 36% on average. DICE is within 3% of a design that has double the capacity and double the bandwidth. DICE incurs a storage overhead of less than 1KB and does not rely on any OS support.", "pdfUrls": [ "http://memlab.ece.gatech.edu/papers/ISCA_2017_1.pdf", "http://doi.acm.org/10.1145/3079856.3080243" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2259f2a247fd7238f2b4c1741b13d533ccfe1a48", "sources": [ "DBLP" ], "title": "DICE: Compressing DRAM caches for bandwidth and capacity", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "225c357ee5490febc4fe9ca002fbf08b29adec46": { "authors": [ { "ids": [ "34739391" ], "name": "Paul Grubbs" }, { "ids": [ "3482466" ], "name": "Kevin Sekniqi" }, { "ids": [ "3094927" ], "name": "Vincent Bindschaedler" }, { "ids": [ "1739327" ], "name": "Muhammad Naveed" }, { "ids": [ "1707461" ], "name": "Thomas Ristenpart" } ], "doi": "10.1109/SP.2017.44", "doiUrl": "https://doi.org/10.1109/SP.2017.44", "entities": [ "Adversary (cryptography)", "Ciphertext", "Database", "Denial-of-service attack", "Encryption", "Matching (graph theory)", "Range query (data structures)", "Sorting", "Spectral leakage" ], "id": "225c357ee5490febc4fe9ca002fbf08b29adec46", "inCitations": [ "f4ed6e5506b62faa5df8bb1407921b9d04052cdf", "67b380be262b074323f050d59d6e4ca2e2b958ee", "b5e592fe1e8a4ed60b0a4bef0c2e3005bfb84158", "063d724a6f7a376f5c276cc2c7113c68c33fc1c1", "d03b77a5f76764819315ce2daf22bb59d39b7832", "e7df1e80867fc5653d6f7716156333f55a7ba530", "0a811fe6d15705573766947d135af4c416571fa7", "fe1c81c00d516ef38da1a429d721e1d05cc488c0", "46e837585af419dc79a949fcb1cfa46a8621f9ff", "20113c7e3b95780bfff76b9c0f20330fed4ce962", "b2fcd2497eee0f6795dcf083c2dd6fd903b583ea", "7511fc23b094d20f3d4b1fd4883c1a5a7140db69", "215b4c25ad34557644b1a177bd5aeac8b2e66bc6", "3cbf6df60d91d4f2422827c46ec4f85fb45bbeb7", "8b338e925ae623adbc4cf387d3dd7ccf839d66e6", "f39796b6656cac1e9ddf9e4758dec9d6a8aab8d1", "000eef5859d7b9d05e3514ef982796beefd2b23e", "3ccf5ff1b8ca5d736647288c79107504b9ee111a", "97177a54ef6d916924ed36a26ddef171667db865", "34fe0c6e91d2a6a2325f5057222c3fbf22224fe5", "54b9a6e23c816903197ce093ff0ffa2f0245fe29", "b7a64553739fa597268a5f4b912837aced813ab8", "bba60b6f699929694bc9b88acb9f2a92a6e5602d", "87eed8a77ca8f1d61a0958e9dfb62001644b286e" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "655-672", "journalVolume": "", "outCitations": [ "18e704e31d06f955f39955cd4c785c4731e5fbd7", "3128cba3b4c23913b43a17a1737ed7b7955ea896", "85a3c518ae3f0d77a2a16e3a45761be2c8517b19", "02beed2e1350a0d0b01bb9622081cb93a965a716", "0003c342fd0b3e48a483901bd3b731b974fc1f37", "7cc6a150b1ba5b40c8e5aca6e94c817d5bebbc5c", "c413a3f4bc3e02b2df471c017b44beb1ac91a6c2", "02ee2bb87a1f6736c6f7366272f9bcbbecc58ce3", "2444f12daaa3808e80103ddd47115dd388cfd12e", "4af77753e00973f339fd93a27e4131047018e79c", "010ab443478bbfbecb03be9c250a49ae3b19b4d5", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "00562cead3e4d35f03edd286b40581e8781bf339", "19f6680d750de9ceb0f88e1c801fb5014b094106", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "234e14d7509ef2a14e829e1c08648f84462ad4ff", "140a563a1ef271b7dcd0675225cb543d92636f6c", "3864cfb41db27452cefe3b1f64f05623690201ab", "47564fdfc63a1a36102b8b6c74f978bbc5190c5a", "3ed4d5df2a97d5d097e1fd837a8568eec0eb6d31", "4179e14196f1c0a205a95117e0efed47327f009d", "3cbf6df60d91d4f2422827c46ec4f85fb45bbeb7", "197107b7ec65a623b59987cf7243921908068751", "2420f3bd82b9b9a4fc99fa1e3b79b4cb6d6c3fef", "8776c004a351e23be9ef7a4d214da4fc93260484", "1ab81ae077d6944fbff279a7a8a38df48f75eadf", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "93da6d5c305ccb37ac3f8b3b491171ce5407274b", "2cbe59fd40166a5cdab925bf6cd58d9d0d447d9c", "c6893bda51266222708b31c46fb7d620d52c2f80", "9cb5ddc95ff84bfc04365f7c1e660c2bd7342207", "978832b596b87a139696b790acae716971e4076f", "961487973d4b33f96406fddbfcf1235dc587571f", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "2004123b3b9698abe916116910b2c46a712a5585", "a0835c336ccc0e2f6f7cde1ba9c214996a70f1f3", "2f7c97e82641e4f0f8c7b508b75af4952b0cc07d" ], "paperAbstract": "Order-preserving encryption and its generalization order-revealing encryption (OPE/ORE) allow sorting, performing range queries, and filtering data — all while only having access to ciphertexts. But OPE and ORE ciphertexts necessarily leak information about plaintexts, and what level of security they provide in practice has been unclear. In this work, we introduce new leakage-abuse attacks that recover plaintexts from OPE/ORE-encrypted databases. Underlying our new attacks is a framework in which we cast the adversary's challenge as a non-crossing bipartite matching problem. This allows easy tailoring of attacks to a specific scheme's leakage profile. In a case study of customer records, we show attacks that recover 99% of first names, 97% of last names, and 90% of birthdates held in a database, despite all values being encrypted with the OPE scheme most widely used in practice. We also show the first attack against the recent frequency-hiding Kerschbaum scheme, to which no prior attacks have been demonstrated. Our attack recovers frequently occurring plaintexts most of the time.", "pdfUrls": [ "http://eprint.iacr.org/2016/895.pdf", "https://doi.org/10.1109/SP.2017.44", "https://eprint.iacr.org/2016/895.pdf", "http://eprint.iacr.org/2016/895" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/225c357ee5490febc4fe9ca002fbf08b29adec46", "sources": [ "DBLP" ], "title": "Leakage-Abuse Attacks against Order-Revealing Encryption", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2016 }, "226ca798b529c13605a2aa7fe75d58f4188f850a": { "authors": [ { "ids": [ "2649174" ], "name": "Mihir Nanavati" }, { "ids": [ "3048886" ], "name": "Jake Wires" }, { "ids": [ "1709411" ], "name": "Andrew Warfield" } ], "doi": "", "doiUrl": "", "entities": [ "Decibel", "Imperative programming", "Interference (communication)", "Non-volatile memory", "Scheduling (computing)", "Shared nothing architecture", "Volatile memory" ], "id": "226ca798b529c13605a2aa7fe75d58f4188f850a", "inCitations": [ "65fef0cd15b565055f21cf8c489cae39dd569220", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041", "c3f127570fd189054641dc0036b737c37863b2ea" ], "journalName": "", "journalPages": "17-33", "journalVolume": "", "outCitations": [ "5c73d35fcadede98d7c1ded769af7abdb94d9cb9", "cd71834408272f5dfcf32d276d03be18914bf376", "60ddf74dd5b443c3bfb59fe876b42f9d6112c4fb", "1cc9ebeab21d668c8fb197a2498380e95c6a65fb", "9e152eac71577b7ee9175d3e68fd76963170eebe", "2031a6decaf94ce41ac09fc355022429eeeb0e49", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "458902c0a4b5e9855c8a4be9eeb4cb4ce534b068", "8090a0702dae2a90bb614e6ef8de4f049e596233", "136eefe33796c388a15d25ca03cb8d5077d14f37", "38a9120f780602521af9744e31d80ef5cd9593a7", "0712c325155f8af65602a08cc448d1e453466a33", "07367703f587dbc3313cc613289c4330cebe5c8c", "132f00de21cee656d00ad6779f1926070ad59544", "0abe5211e209b272890ba6820a33b72e938b0b3b", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "1bc029b715f29e95063f27dc36396093394a1e19", "29a1148d75878671dc3663bf480e33d7bd91597d", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "631f3c144a27503f9135b412305422a4f3ce0acb", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "225603198cc415d363db8a8a2bd30b0df3c963b1", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "daf0cd0076b388712ea12ec4105572997fc50cdf", "2ab305079385594badd4233ebb9512d52ecaccfb", "048a09d7c8713dc2533c1e31ac3f224868293461", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "0f6a32792d0882db35fe9391445d4322232b619e", "bf1ffbdffc4ba38ce992b51d6c7016dd8b826291", "108ffa868b6dc5e8b4987342c90c79c8ccf841c2", "35bb4201683cf3525bfab90c35ca1a6ab72f3e60", "5cde06240acc288c986a10ee39f17ea28c9ef05c", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "05a1bad1ef2341339e18d636d78594226d4ee8e6", "65a2cb8a02795015b398856327bdccc36214cdc6", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "d58cc242fd70227cff98376a914e0b42b1b79db8", "7ae26da9b7666812857883536870c315538f7f10", "3fc93257ac94aa8d6505c19077058e68622345b6", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "1d2871c56d07a35e6709d535fbbb2df6b434962a", "9f99c550f37308f2d5763574733283ff0d11bdc4", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "def29d202e537d026b8d3ed91655b540ef86cceb", "05a1357946de5eca42a477b7b268db4944219a2e", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "118da4d571ee02b4f31b5c4c078857472e77ba1e", "765e6f4feeb1f7d59d2b3c011e2e38814a958afa", "13b26d008210fffeb8a77c9e90f1ff837523c536", "5909192b374eac0cda4df7c986ebc997cdcd6002", "7129b305ce45f83127e928e8510da9fae0783905" ], "paperAbstract": "The performance characteristics of modern non-volatile storage devices have led to storage becoming a shared, rack-scale resource. System designers have an imperative to rethink existing storage abstractions in light of this development. This paper proposes a lightweight storage abstraction that encapsulates full-system hardware resources and focuses only on isolating and sharing storage devices across multiple, remote tenants. In support of this, it prototypes a shared-nothing runtime capable of managing hardware and scheduling this abstraction in a manner that avoids performance interference and preserves the performance of the storage devices for tenants.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-nanavati.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_nanavati.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/nanavati", "http://www.cs.ubc.ca/~mihirn/papers/decibel-nsdi.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-nanavati.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_nanavati.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e96b/6c23d6ebe5ebb1a4b3b42fd9b047cd6e04fe.pdf", "s2Url": "https://semanticscholar.org/paper/226ca798b529c13605a2aa7fe75d58f4188f850a", "sources": [ "DBLP" ], "title": "Decibel: Isolation and Sharing in Disaggregated Rack-Scale Storage", "venue": "NSDI", "year": 2017 }, "2288e3e4686b5e91e30b164bd53e0c7334e5041c": { "authors": [ { "ids": [ "2869488" ], "name": "Boris Teabe" }, { "ids": [ "23204282" ], "name": "Patrick Lavoisier Wapet" }, { "ids": [ "1685006" ], "name": "Alain Tchana" }, { "ids": [ "1679417" ], "name": "Daniel Hagimont" } ], "doi": "10.1007/978-3-319-64203-1_24", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_24", "entities": [ "Central processing unit", "Cloud computing", "Data center", "Multi-core processor", "Scheduling (computing)", "Server (computing)", "Virtual machine" ], "id": "2288e3e4686b5e91e30b164bd53e0c7334e5041c", "inCitations": [], "journalName": "", "journalPages": "332-344", "journalVolume": "", "outCitations": [ "41bbedd069ffaa2b1ffa1ce640f101d2f2980f4a", "cce432dc890df08878af851f360f6b8fc9e1e4d6", "46fa3ec8f2fa7d0683ffaeeb438af76c6627823d", "132e28a9047b4a5159c08338ff65f112b124b3f0", "057339544e31d9cb2ef807bddff2b705b8c674dd", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "5ee9d84a9a000f3edf4d40301e53023b4f25abb1", "84c31932d221afbd7d50f55e16900664b1027a1a", "7a002e42c709cd3a0468e5c992ab296116721a00", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "17dea513763c57dcd0e62085045fb5be6770c600", "46742c000a65f676c00ec4e33d19d535a1c29dd7", "1fcec27437d40285684aad5c68d2db076b27a195", "aa883536e1a8622a6ec537b54dd115cf352f5107", "043029ff68d0449eacae8a67fc62ed4ee03215a2", "0b8060041bb38ec8d685dfabdca2be2fbc39d241", "221f78156aa66373f85883d21160a2445c0cc383", "398b95b6f7f7861819848ee193194a772abc1a72", "2f4b9ac4a0694f0b1681348334befba0bfe9d897" ], "paperAbstract": "In a Cloud computing data center and especially in a IaaS (Infrastructure as a Service), performance predictability is one of the most important challenges. For a given allocated virtual machine (VM) in one IaaS, a client expects his application to perform identically whatever is the hosting physical server or its resource management strategy. However, performance predictability is very difficult to enforce in a heterogeneous hardware environment where machines do not have identical performance characteristics, and even more difficult when machines are internally heterogeneous as for Asymmetric Multicore Processor machines. In this paper, we introduce a VM scheduler extension which takes into account hardware performance heterogeneity of Asymmetric Multicore Processor machines in the cloud. Based on our analysis of the problem, we designed and implemented two solutions: the first weights CPU allocations according to core performance, while the second adapts CPU allocations to reach a given instruction execution rate (Ips) regardless the core types. We demonstrate that such scheduler extensions can enforce predictability with a negligible overhead on application performance.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2288e3e4686b5e91e30b164bd53e0c7334e5041c", "sources": [ "DBLP" ], "title": "Dealing with Performance Unpredictability in an Asymmetric Multicore Processor Cloud", "venue": "Euro-Par", "year": 2017 }, "2290cb78c0c7780a52906746775d896852680fde": { "authors": [ { "ids": [ "3251920" ], "name": "Zengfeng Huang" }, { "ids": [ "2873542" ], "name": "Xuemin Lin" }, { "ids": [ "19262604" ], "name": "Wenjie Zhang" }, { "ids": [ "36890630" ], "name": "Ying Zhang" } ], "doi": "10.1145/3034786.3056119", "doiUrl": "https://doi.org/10.1145/3034786.3056119", "entities": [ "Algorithm", "Approximation theory", "Communication complexity", "Computation", "Server (computing)", "Video synopsis", "Workspace" ], "id": "2290cb78c0c7780a52906746775d896852680fde", "inCitations": [], "journalName": "", "journalPages": "347-359", "journalVolume": "", "outCitations": [ "e4c5a8575a2576c4b9a6df65af6b7d5e657373ac", "bf31569c8f8a88ac92cd724c804ca82faa28793a", "2ea62ecc37684cdcb39b6dad32fdf1387c175f2f", "0d0f5708846382d9877ec6814bffc41ef1f9a895", "a15663e0c0a2427ac4da5161e4ed75d331a5a2be", "6094392d07d36c086a988493686b73ebca39169b", "1a45cee17293eaaacea4d605df9a2c7d478a1f6c", "a25f6ee864f0c4fd95d9ceb2f4868e9e3fe51786", "5b8b0ca444c9efffb82d221ac01197730ebf58e6", "238a0814109dc166f1a1c0c3b5c33bc59250ae3f", "36797a5cad36a29fbc6540590130f99857f3b080", "1e76727be351d6289311d9d1d65c494a683ace0f", "b50e429252a5c3135977000c67f977ba222a8c59", "4ccf6079841b745d60250b626a7836c80e4334bd", "0e5514011217c2468895df1dc8bd7ae8399a0c10", "0e92d987bd466eebb7993e8bb04c811ce61a4882", "6ddb6f1fe0a183e41f0f03366fe0543879a454d3", "3946e3a4a4f5b42d55859153e98d3e83151303bb", "c3e41ef3164f8d347e293e1ed38d1fce0ade0d0c", "0284d47384583c2f056ae2e8f6bf14f8b4fc239d", "2a1db0387c25d0b701b641541470c8d1d5e0b356", "3391f3866f95ef17b3e2caf29bb07f51d770a316", "21a2ef66e80d3c6904d88ce3acbe0d2a179e8e72", "32081549f5f90db81f6569ef9eecedeeab5d46df", "1fc44197ffa68ee7143cfac9645d96f98fd7a4d4", "c10bc58db1fb2e409238949668b31db2eb84f98e", "6cbb32a31442991240b72051080425f698bc3206", "36c1dea92707b2e977930054059b01b4d9a388e9", "ed183bc71216446b69e32f21d5aad1e636b7be7c", "07f5b3d8f89645d87b1005ddb893dbbbeaae20c6", "28798eeef9efbb526a65ff350e68f044b3770bad", "687b2994d97cd25003531e77f804e16842af430c", "002d7258e20a8f75b7052ead7ab2bf26feeecdbb", "6f3bb84ee1b5d638e2d605ae0eb1014e2b6e3931", "54b93fe093fe15b809bc1a9783ee93cf12ef9c9c", "6d31406c47395cd6b8dac9f5b81483867e3ae67e" ], "paperAbstract": "A sketch or synopsis of a large dataset captures vital properties of the original data while typically occupying much less space. In this paper, we consider the problem of computing a sketch of a massive data matrix A ∈ℜnxd, which is distributed across a large number of s servers. Our goal is to output a matrix B∈ℜℓ x d which is significantly smaller than but still approximates A well in terms of covariance error, i.e., ||ATA-BTB||2||. Here, for a matrix A, ||A||2|| is the spectral norm of A, which is defined as the largest singular value of A. Following previous works, we call B a covariance sketch of A. We are mainly focused on minimizing the communication cost, which is arguably the most valuable resource in distributed computations. We show a gap between deterministic and randomized communication complexity for computing a covariance sketch. More specifically, we first prove a tight deterministic lower bound, then show how to bypass this lower bound using randomization. In Principle Component Analysis (PCA), the goal is to find a low-dimensional subspace that captures as much of the variance of a dataset as possible. Based on a well-known connection between covariance sketch and PCA, we give a new algorithm for distributed PCA with improved communication cost. Moreover, in our algorithms, each server only needs to make one pass over the data with limited working space.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056119" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2290cb78c0c7780a52906746775d896852680fde", "sources": [ "DBLP" ], "title": "Efficient Matrix Sketching over Distributed Data", "venue": "PODS", "year": 2017 }, "22a9113560dee21893be2aeef747bf706378f32f": { "authors": [ { "ids": [ "38722980" ], "name": "Esteban Rangel" }, { "ids": [ "2879411" ], "name": "Nicholas Frontiere" }, { "ids": [ "2675615" ], "name": "Salman Habib" }, { "ids": [ "2306356" ], "name": "Katrin Heitmann" }, { "ids": [ "1847672" ], "name": "Wei-keng Liao" }, { "ids": [ "1725914" ], "name": "Ankit Agrawal" }, { "ids": [ "1686646" ], "name": "Alok N. Choudhary" } ], "doi": "10.1109/HiPC.2017.00052", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00052", "entities": [ "Algorithm", "Cluster analysis", "Cray XK7", "Halo: Combat Evolved Anniversary", "Message Passing Interface", "Mock object", "Multi-level cell", "Parallel algorithm", "Simulation", "Supercomputer", "Synthetic biology", "Synthetic data", "Terabyte" ], "id": "22a9113560dee21893be2aeef747bf706378f32f", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "398-407", "journalVolume": "", "outCitations": [ "e895b5567d6c4f063a7f4d774a7d9d722c1b9a8e", "3197acd7fc01f3454ae995a47e2050f97325bef4", "09a8c6bf9824f54aab6c558101c409530e533f2a", "b23594789a9276c9ce3583e6d08e3e19dfa7f8e8", "007faa791fb3fa3b9d1b011fd6ec4315ba2de518", "2371632c6f867309d08d1f9c35de3d5d24ff985f", "4e6f0ff274f3bafc51ee885fc92b942222e8d386", "d0f1f6176018d45facd74e3aa46087d5a5c26ce8", "d4d599728e3e318b12545539d1e4c09518bcd9f2", "c64834f7c0fb091b72b82f3c05533f45aefe1e89", "52df6a1cf57ef5741cca4b5d96265ec628c1b2be", "73bf61058a8df6dca48a6e0ef6221527cc6d009d" ], "paperAbstract": "Cosmological N-body simulations rank among the most computationally intensive efforts today. A key challenge is the analysis of structure, substructure, and the merger history for many billions of compact particle clusters, called halos. Effectively representing the merging history of halos is essential for many galaxy formation models used to generate synthetic sky catalogs, an important application of modern cosmological simulations. Generating realistic mock catalogs requires computing the halo formation history from simulations with large volumes and billions of halos over many time steps, taking hundreds of terabytes of analysis data. We present fast parallel algorithms for producing halo merger trees and tracking halo substructure from a single-level, density-based clustering algorithm. Merger trees are created from analyzing the halo-particle membership function in adjacent snapshots, and substructure is identified by tracking the \"cores\" of merging halos \u2013 sets of particles near the halo center. Core tracking is performed after creating merger trees and uses the relationships found during tree construction to associate substructures with hosts. The algorithms are implemented with MPI and evaluated on a Cray XK7 supercomputer using up to 16,384 processes on data from HACC, a modern cosmological simulation framework. We present results for creating merger trees from 101 analysis snapshots taken from the Q Continuum, a large volume, high mass resolution, cosmological simulation evolving half a trillion particles.", "pdfUrls": [ "http://cucis.ece.northwestern.edu/publications/pdf/RFH17.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00052" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22a9113560dee21893be2aeef747bf706378f32f", "sources": [ "DBLP" ], "title": "Building Halo Merger Trees from the Q Continuum Simulation", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "22b283679a9f1127a9d6db96702be5b2af361808": { "authors": [ { "ids": [ "34706774" ], "name": "Tomoharu Ugawa" }, { "ids": [ "1959352" ], "name": "Tatsuya Abe" }, { "ids": [ "35219029" ], "name": "Toshiyuki Maeda" } ], "doi": "10.1145/3133877", "doiUrl": "https://doi.org/10.1145/3133877", "entities": [ "Algorithm", "Central processing unit", "Communications protocol", "Correctness (computer science)", "Garbage collection (computer science)", "Interaction", "Memory model (programming)", "Memory ordering", "Model checking", "Mutator method", "Phase-shift oscillator", "Programming language", "Race condition", "Sequential consistency" ], "id": "22b283679a9f1127a9d6db96702be5b2af361808", "inCitations": [], "journalName": "PACMPL", "journalPages": "53:1-53:26", "journalVolume": "1", "outCitations": [ "519810f2bc7760e7873675d2b4ddadc51cf64d6e", "186667745aac401719e0f8d815c54e87b3e94166", "b25836d60f8598f823dc245b65f5b8653dad81f6", "a66a99b317e745b1e840929664b7cdb468de5463", "527d6b304013472b61091b8460c5d883cd2f65f2", "4d1e3d20531b7118c50b137715b69926d990d7c6", "36585342826f89238e569a2a7ae75492eda54835", "2d58d5bb3eefdf5e83fd7bbca82d0c1d433da42e", "238882f2294e5e61f43997e50cb49967cdd0c1d2", "393a21e5916983c560c9c9cb82f8ba726b183861", "6899c00a5613dcebcaa6cd0c13c2cc445b5ce2e1", "3ec1e762885365333df933e15aa67a8bdad8f2f7", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "2e8e25b722faaa8791a61c9feb96db64a51a973f", "8d991d1814d4dc0246fa5b11257bc7c911f98830", "061a294940579506fcb89999370eba8b8799346a", "9efa0f0e898f1181d4f823594d90504921769838", "12a4287ea57aa7fdd21cd7527af6dd92ed1855b0", "f11efebeac8a1a99ba0df5100330e4aa3f9d7676", "987adbbb4b5baff729cf3907d7f05a86e8651849", "1cea13611f860f7e69924179222bd5b5aa75f5a0", "e7b713d44e9ac45843ccbd6bfdc09f9e27f72776", "07caae8e478da3201087287781a00cdf001d8694", "55ca1942c563219a16b95e8f3b4bc0437e01fc5e", "3cc88f6e02b919b48ffb7395489379f55edcb74c", "2dc5ecf5ac98dcf394e3aefbbc60c11e4e89de5c", "2130cef3c522d0388789dbb0adc268cec25dd746", "000bedb693c76cfb873227503fc693005ec320b3", "075bab8f98d132ad65683f6bb313c2ab2b822e7a", "9770fc9baf0f2b6c7521f00958973657bf03337d", "1c8378e621cec5ecd94974efaf305275b8e4186b", "3f67f2b1bcd351778d64b87dfde9227d4cee2444", "22f151e4ea96bb05b5619494ff266891832a54eb", "0483e40fcbf09a5b54c435e61ace63d108bf33f9", "254fe5dec3f90810a89ea02ae66e8f1d60b5054a", "114b990809a746371663b389cc474cc5e1409561", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "1031dd14412b59a28527e005f078f470b5b04dc0", "096f6d70619a2c52fee69026c48aba884d333185", "af96af875eb346882976db8fa318d0b9d6d71461", "42bc2d3b6f6d28be0f72ac4bf89ec636e015b892", "21161c8efa04cd2ec2e4f121fc720d7e2ffc4e38", "7ad22b491f452f5dce031948152ad9d6ffab4ae5", "efdf63a6258b8e42488feca23bdf4118c7b59b1a", "2814d43ef6c8811d6844e3125dd3d4c87c2e226a", "170746e36dfe606ca448ac4ca518b91bf6f828d0" ], "paperAbstract": "Modern concurrent copying garbage collection (GC), in particular, real-time GC, uses fine-grained synchronizations with a mutator, which is the application program that mutates memory, when it moves objects in its copy phase. It resolves a data race using a concurrent copying protocol, which is implemented as interactions between the collector threads and the read and write barriers that the mutator threads execute. The behavioral effects of the concurrent copying protocol rely on the memory model of the CPUs and the programming languages in which the GC is implemented. It is difficult, however, to formally investigate the behavioral properties of concurrent copying protocols against various memory models. \n To address this problem, we studied the feasibility of the bounded model checking of concurrent copying protocols with memory models. We investigated a correctness-related behavioral property of copying protocols of various concurrent copying GC algorithms, including real-time GC Stopless, Clover, Chicken, Staccato, and Schism against six memory models, total store ordering (TSO), partial store ordering (PSO), relaxed memory ordering (RMO), and their variants, in addition to sequential consistency (SC) using bounded model checking. For each combination of a protocol and memory model, we conducted model checking with a model of a mutator. In this wide range of case studies, we found faults in two GC algorithms, one of which is relevant to the memory model. We fixed these faults with the great help of counterexamples. We also modified some protocols so that they work under some memory models weaker than those for which the original protocols were designed, and checked them using model checking. We believe that bounded model checking is a feasible approach to investigate behavioral properties of concurrent copying protocols under weak memory models.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133877" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22b283679a9f1127a9d6db96702be5b2af361808", "sources": [ "DBLP" ], "title": "Model checking copy phases of concurrent copying garbage collection with various memory models", "venue": "PACMPL", "year": 2017 }, "22bc7549801fd359f932bbdc11b8ca24b87baadf": { "authors": [ { "ids": [ "2520493" ], "name": "Reza Shokri" }, { "ids": [ "34828439" ], "name": "Marco Stronati" }, { "ids": [ "3469125" ], "name": "Congzheng Song" }, { "ids": [ "1723945" ], "name": "Vitaly Shmatikov" } ], "doi": "10.1109/SP.2017.41", "doiUrl": "https://doi.org/10.1109/SP.2017.41", "entities": [ "Black box", "Denial-of-service attack", "Discharger", "Inference attack", "Machine learning", "Row (database)", "Spectral leakage" ], "id": "22bc7549801fd359f932bbdc11b8ca24b87baadf", "inCitations": [ "cf7e5a59cbe6fa10840a2f5e1c21adadc843d401", "0faf801e0511cfce8953b4766523c771d156cdb4", "142f519983e01cbac6c344b35e51d7c32da5db63", "6bc565939f5ff4d96cbfe502dd5fa539098d309a", "0c8072cc5e00910690103ef1bd015f0dc21412ac", "ebab687cd1be7d25392c11f89fce6a63bef7219d", "8d2bda7a475b28410966c876b1b3007dee7cd7c5", "13afff7af3a56163fdaa1a2449e5e06ae21137ad", "5dd660b3e464e0f9f9a1af62e81f6f17fd4e2f66", "26b4983e14e6c5c1b35120986c008982764c844b", "a8ae287de9f6610d609e03337cbec99ce58a64e5", "398342181a3109fc24e567855211dc1428e65cd4", "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "5ce1cdd95b3977e66a5c22fb6cab577a8a65597d", "46e7e14db896edb811cd90e317e356993d588eef", "1825136db5f034642f5ff5fe90280e9a1a36d70a", "616e94334177eb1e330115f19d02416709a3e373", "9dc1777eb88d2a128fdc42c965cbcb806ae43a4e", "eec0bc4c3fddbaf78feb0872a195fb3aeb01010e", "041ed3b277e5852a28acd23740b0772a7ce3c6ef", "abfa95eac71a3aa74a3eeb92158c88b06e16b6a1", "71a1bc401c7e11b60f830b800c32c86936cc5b15", "c6fc6608ddbf9557f4fbc16dd02bcfe3f29bbf61", "28c8b8714a1c072e49d0ffed7efa2e54f423b185", "44a97f4eaaefaf5338f8aed2913d5debb2459f7e", "ee9836b58c64fd6c6f40738a2383177a7e51d65a", "a4d513cfc9d4902ef1a80198582f29b8ba46ac28", "0fbce2f1578790c3ee8fcc44093bb3267269e99b", "b3f2a11d45757e675be123d55ec0eb192bcca990", "d5c7280a8e57261f394622a92a146481c36830e2", "92880b163ad99d4063295d77cde7d571fcd5106c", "32d8e555441c47fc27249940991f80502cb70bd5", "262407f20644748cd350930e9b7e889ab2f2db34", "988ae5e66cc727fb33d09e0a9860a537c50119e4", "5f3101a9ba19e618c3e05c70ecdba63c1c6a3f8d", "22684e1fcecd742c246c50788095c591a23d1f5b", "29b14b6f0aee8cb3ea6da4a5b08a21aaa868bba1", "636421d05f9eb19ce083af9ed01a8a7be23104a2", "5ae305964a06d749d382df9524ff00a98be10fa8", "16860d3f7b5a776a229f589ddcd6a7f6810098b1", "197443ffa4755dcbba03cc92160cbeb4dbb4f6de", "b18858ad6ec88d8b443dffd3e944e653178bc28b", "7c4f52328c2869bdff8034d2867baa5b67d0ce27", "4fbf4fd303d969606edc6f1cb42642ab2d11ce14", "ca4cddac342217e3c0143cb7f88daf1f50033c69", "f0fb3662d505536795a7a6a6a9a4459cd868742a", "0de06bf7929ce2190e04ea9e41980cff85c24ed4", "027158a5050a09a0f66188372e2eb1584215fac5", "17d786aad51247107819d1f26c7f5bbcd0504603", "4a8c332b09bb99333a8bce6a4640a20c1352aa63", "c28f0e12ad5fadb43a89c420da6523bd9a3aced4", "6f5c0bf2582b7b72812535d72a0215dd3070b822" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "3-18", "journalVolume": "", "outCitations": [ "158d62f4e3363495148cf16c7b800daab7765760", "595a00f0975b5d5c28d904ddba1ae5a493316573", "8fe09c5c88c3dd0700051c48fd917ac480436a4c", "032d59d75b26872d40081fb40d7a81c894455d91", "65926b61d0308954bd6cc4f6cbe46eef64147635", "459d3adf0528bed82420374ffb9ff50c2cb34f03", "20b24b61d0ba7265a659b56edb5bdc6f05e60ebe", "f63487b3fda2d96d8b3e97391448c76e00f2353c", "13309bd4abd75d0bbacb7d4b80944c994958237f", "6f2632d3569223056c040899b5891980288539d8", "17fac85921a6538161b30665f55991f7c7e0f940", "7ffe3790234f977caee2f4850ad2c33734d24827", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "c1d36203276052765afbc8d9cd822ba5d0384627", "65701a018397691d63142704716cdf358a1b5a54", "34f25a8704614163c4095b3ee2fc969b60de4698", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "37bbe6d64cb4ff9ad546bfa36b0512f580bc6bf8", "2824b6a3d0096b0b522f4b7a7659b5f792f93d8f", "5d90f06bb70a0a3dced62413346235c02b1aa086", "0279d698fbe6a3dd05893f69880019cad2b68014", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "49934d08d42ed9e279a82cbad2086377443c8a75", "006cb500fd0b25200e12eb5a024756aea3d569ed", "74fc396d0b8ec548d600395182f12c9b06cc84e9", "19c40cc8456950acc93553591f4a79b138bbeaf0", "5fa89d670611f44033598907a5d3c69af9c4ab68", "fd2711cfe890675e8d885df88f3f76b5be5b39a6", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "316d5642b39ba001efc8949cb87ed83eba1def95", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "000f2d99632d5d6c494bf9e1b179638e48433e99", "4afc353a68ce5cc9e17febaa3199da43ba549840", "38e40b8a740e7358d6b956ee8b8638b956354532", "0432b525f4e6e4a31023789581f6de7fc573c42f", "326bb49d3ae9e1e1551028200916192e50004105", "56dfa7b09295f6bd79d1b81bfacc156fa7171aa4" ], "paperAbstract": "We quantitatively investigate how machine learning models leak information about the individual data records on which they were trained. We focus on the basic membership inference attack: given a data record and black-box access to a model, determine if the record was in the model's training dataset. To perform membership inference against a target model, we make adversarial use of machine learning and train our own inference model to recognize differences in the target model's predictions on the inputs that it trained on versus the inputs that it did not train on. We empirically evaluate our inference techniques on classification models trained by commercial "machine learning as a service" providers such as Google and Amazon. Using realistic datasets and classification tasks, including a hospital discharge dataset whose membership is sensitive from the privacy perspective, we show that these models can be vulnerable to membership inference attacks. We then investigate the factors that influence this leakage and evaluate mitigation strategies.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.41", "https://arxiv.org/pdf/1610.05820v2.pdf", "https://arxiv.org/pdf/1610.05820v1.pdf", "http://arxiv.org/abs/1610.05820", "http://www.cs.cornell.edu/~shmat/shmat_oak17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22bc7549801fd359f932bbdc11b8ca24b87baadf", "sources": [ "DBLP" ], "title": "Membership Inference Attacks Against Machine Learning Models", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "22d9bd7d4d4e071ae573ac56fca7b58824c50801": { "authors": [ { "ids": [ "20652154" ], "name": "Syed Akbar Mehdi" }, { "ids": [ "2935796" ], "name": "Cody Littley" }, { "ids": [ "2174285" ], "name": "Natacha Crooks" }, { "ids": [ "2445753" ], "name": "Lorenzo Alvisi" }, { "ids": [ "3155879" ], "name": "Nathan Bronson" }, { "ids": [ "2665531" ], "name": "Wyatt Lloyd" } ], "doi": "", "doiUrl": "", "entities": [ "Causal consistency", "Causality", "Data store", "Lossy compression", "Observable", "Scalability", "Snapshot (computer storage)", "Snapshot isolation" ], "id": "22d9bd7d4d4e071ae573ac56fca7b58824c50801", "inCitations": [ "8f4dea0f10e050c46a73c3be32e1a6bc476b3877", "efb351341158c8cb92ea6f479021c05e8e2e6120", "922b8a397dfdd51f91ca27d56d2a2c5b6c61ba7a", "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2", "8c17cb64a2153ed38d7a2517ac6b57083e0a0eff", "f9764ea76e896d199bcb2864b92086fd5cd5d970" ], "journalName": "", "journalPages": "453-468", "journalVolume": "", "outCitations": [ "c8ec9739351931965979a1f1ed3b9d29c6ad7933", "13c27125584651329f66461981cbb20fa63e9023", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "7038e23695dbc4d8a9d1b7c6dff8dbc138009c4b", "ce40e225ffa0b2c4a4a2e25d7b65d33978af1eb2", "32257d8d2b08c87e58c7b7f4b2430d58e4b51a81", "2a42fb5ad895ed993219836b6b4ff93670c9db71", "98cca67dfd0320d56030dd6637a733436d2b521e", "2acf4ea8ebbb795965bf05c155fb90b6ff926f26", "36147a54dfca257c03663cb4e9bb8589f52b7cef", "091a8b2a10483b9899c667862dcfa92fc130bb74", "5c9793fa07fcaaae864eb89fd1c1b9f6905ec546", "223b9e0e1bf2d696458ca0fb7aabb1bb0ea0b639", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "3dbb0beee26501a93522230a094cb359eb121c70", "1c8195cadc7ad4a8b59b16fe77574dd6d160d7d2", "71c0dd6bd1dd57716b6797043e9f09b951c88a22", "24cece61e2128780072bc58f90b8ba47f624bc27", "4827cc74dba0c39172554cf0116eb111797f0d1b", "5862fda99c4d83ce7a6bc5fda774ceeb5d573845", "efb351341158c8cb92ea6f479021c05e8e2e6120", "259a11bb2ccac5af9128b00c2bd0237c3f712d3c", "bc631e10de057f1ae6f65cb1b6f4baac1024e449", "cdee1c49685a1e66b040b6c8381ce6e85f643f3a", "7f25a3491cc38f502c5eb9e00a4f88af1bf82a4c", "9cd9321b82d573447f08d84e9a8ca31c46fd6b8e", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "068e59b88a1230d709d99c83a45d3a5b91260810", "1aea752ab6558d73dcd4302750acb7132c7ba6ab", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "87b94c2f86b9e8838bf15276fcfe9be0fd293588", "4593ae644f04d76f582dedc4cc32d2acd33c9a93", "e706b8ae2952740cb95c0182c4c44b0d11cc54c1", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "1664b784dd7d446ee8838e0eec5b980f61792007", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "9748241beb02ef1e2d0e6dc877c04b354033a838", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "23346a18e78062e586cab22195819eb0f18ffc66", "35c2f7e0454adc0130c4279fce84a31701cebc67", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "9aa0d7253574e50fe3a190ccd924433f048997dd", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "0541d5338adc48276b3b8cd3a141d799e2d40150", "6fcaf13d4a3d72ea53060941efa4b5cd57de0503", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "635fe1706a2a719b9c7935712db6e720fd418fa9", "49532e318be89eed64725b32617c1fc570f824a4", "fc3fbb4c76448e8968f8a19f076d133b2e7a2849", "8318fa48ed23f9e8b9909385d3560f029c623171", "05a618847e4f08e5bca29dff732757779722b2e0" ], "paperAbstract": "We describe the design, implementation, and evaluation of Occult (Observable Causal Consistency Using Lossy Timestamps), the first scalable, geo-replicated data store that provides causal consistency to its clients without exposing the system to the possibility of slowdown cascades, a key obstacle to the deployment of causal consistency at scale. Occult supports read/write transactions under PC-PSI, a variant of Parallel Snapshot Isolation that contributes to Occult\u2019s immunity to slowdown cascades by weakening how PSI replicates transactions committed at the same replica. While PSI insists that they all be totally ordered, PC-PSI simply requires total order Per Client session. Nonetheless, Occult guarantees that all transactions read from a causally consistent snapshot of the datastore without requiring any coordination in how transactions are asynchronously replicated.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-mehdi.pdf", "http://www.cs.utexas.edu/~lorenzo/papers/Mehdi17Occult.pdf", "http://www.cs.utexas.edu/~ncrooks/2017-nsdi-occult.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-mehdi.pdf", "http://www-bcf.usc.edu/~wyattllo/papers/occult-nsdi17.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/mehdi", "http://www.cs.cornell.edu/~lorenzo/papers/Mehdi17Occult.pdf", "http://www.cs.utexas.edu/~samehdi/cloud2017_poster.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/22d9/bd7d4d4e071ae573ac56fca7b58824c50801.pdf", "s2Url": "https://semanticscholar.org/paper/22d9bd7d4d4e071ae573ac56fca7b58824c50801", "sources": [ "DBLP" ], "title": "I Can't Believe It's Not Causal! Scalable Causal Consistency with No Slowdown Cascades", "venue": "NSDI", "year": 2017 }, "22f136c1a906fb12d395a03b59f6be2e34d61cc3": { "authors": [ { "ids": [ "37931585" ], "name": "Luke Nelson" }, { "ids": [ "3133276" ], "name": "Helgi Sigurbjarnarson" }, { "ids": [ "3189426" ], "name": "Kaiyuan Zhang" }, { "ids": [ "27739949" ], "name": "Dylan Johnson" }, { "ids": [ "3211657" ], "name": "James Bornholt" }, { "ids": [ "1833123" ], "name": "Emina Torlak" }, { "ids": [ "31825486" ], "name": "Xi Wang" } ], "doi": "10.1145/3132747.3132748", "doiUrl": "https://doi.org/10.1145/3132747.3132748", "entities": [ "Broadcast automation", "Correctness (computer science)", "Intermediate representation", "Kernel (operating system)", "Linux", "Mental representation", "Operating system", "Push-button", "Recursion", "Software bug", "System call", "Unix", "Unix-like", "Verification and validation", "xv6", "z/OS" ], "id": "22f136c1a906fb12d395a03b59f6be2e34d61cc3", "inCitations": [], "journalName": "", "journalPages": "252-269", "journalVolume": "", "outCitations": [ "22f88f630451cdb6d1ee1a632df81a5e2dd50285", "bc89f7f86e0b8f08c42266ea3780692bc6c39ccd", "8761ff5d92737f409e5d1d326967892b3bd24371", "1d0f2662cca5c859419b78fea468f4bc2f39e87d", "3ed32ed20fb7161d7a8e7ace93c5d341ba3438d9", "05a715eb69fc28be46a7ba37d0a99e5fb72e4973", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "1a3bb67fdc6d5996f635ef9ae91ffae8ce7928e3", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "624168bb99821e7c9fef722c1758ceda42eba33f", "059170b316ecb882014beced829b682a04758dd5", "3251ddc15c1891f36a1c912179781da972851443", "7233db6fd176e80188cd087115383eda51cbaeb3", "673b65fe0d2b4b09999b5ab2dbc7dcce98be516b", "0c8f20da78ebc7891141c175fecb7a5c026f3e7d", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "17886b4911ffd50d7e02a574caad34a286458b3a", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "f72eb274d396640d8e4805218d05aaef3f485af2", "5ddc6a439cdc9b4eaebdad8c20976f1f0be4523f", "2f3edee1d3459096ba1de54450fca4d8406d1ed1", "047fdd696fdabd7b01af7d09c459e6abe7793170", "1ee63a3fa9bde0259be25e5511520b295c4b900e", "04d6f78e14a92fa72bcefc206c24b2df7b27e5e6", "37791336941a0d954e4a98c96b1a66ca7be43eb2", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "06567663b31f7b8cf1de3d5f2ca6c79422ef60c8", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "08fecf272208f917601b4e076177018b55f177bf", "2184b060ddb8da62693bc9466ee095f96a604f71", "16a455aeacd14529bee92b0c197619fa2d173151", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "fa6a25774e9c3e16265f99f87b78a5a34e31a731", "2a08bb0db5dee11f3e0ef6f84b500671d976ebe1", "2194c3460ab71f3826db00b045b2ae590c753319", "bb86ea6a08c9b8c09a600367785b27acefff710d", "225603198cc415d363db8a8a2bd30b0df3c963b1", "36222f8eb2ccf21ca345e15186cea64506581543", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "883a595fd76cb4dc0509a1005040286b31610059", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "bc9c4e30809c1a29b72c34d35029958135fe96df", "78edf6a49c26ebbb040c44b75365c432a8ff5737", "971563f7acd4250bbafdb5e90160dcd4dc6110e9", "6e7640c890edf815eb8a22e5f6b6d625a12676cb", "7129b305ce45f83127e928e8510da9fae0783905", "40eb1febce44e881e537d3440e33e2a11692be7f", "9cdc12df48f821ab8b5b5ed884d3176f1b98a7a7", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "09fca67472a49f94c54fdd3f652ad586d5ab361b", "a2c66511247a86f115e1718717bafdba478ed16f", "8dbc653d8194c257dfa198b427523191b3865464", "2d4fdf953dc3f79fd760b317bca228fe80ec9386", "62376bcf5e1b55abcaa301b9c8d0f0062b2cba27", "69b7456f3d47fed3745239b5f67996a0b9a1a5c9", "7277301fdfd711bcb556d7823c2f7d548e490f2f", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "235b9c8f10461a95398e169ecb91cf3e223d3350", "15aaa56f06eca80760943e47f1781591209f2860", "33b85ea9b4fb28ac893167c29529d62d355c06a5" ], "paperAbstract": "This paper describes an approach to designing, implementing, and formally verifying the functional correctness of an OS kernel, named Hyperkernel, with a high degree of proof automation and low proof burden. We base the design of Hyperkernel's interface on xv6, a Unix-like teaching operating system. Hyperkernel introduces three key ideas to achieve proof automation: it finitizes the kernel interface to avoid unbounded loops or recursion; it separates kernel and user address spaces to simplify reasoning about virtual memory; and it performs verification at the LLVM intermediate representation level to avoid modeling complicated C semantics.\n We have verified the implementation of Hyperkernel with the Z3 SMT solver, checking a total of 50 system calls and other trap handlers. Experience shows that Hyperkernel can avoid bugs similar to those found in xv6, and that the verification of Hyperkernel can be achieved with a low proof burden.", "pdfUrls": [ "https://homes.cs.washington.edu/~bornholt/website/papers/hyperkernel-sosp17.pdf", "https://homes.cs.washington.edu/~bornholt/papers/hyperkernel-sosp17.pdf", "https://locore.cs.washington.edu/posters/hyperkernel-sosp-poster.pdf", "http://doi.acm.org/10.1145/3132747.3132748", "http://locore.cs.washington.edu/slides/nelson-hyperkernel.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22f136c1a906fb12d395a03b59f6be2e34d61cc3", "sources": [ "DBLP" ], "title": "Hyperkernel: Push-Button Verification of an OS Kernel", "venue": "SOSP", "year": 2017 }, "22f88f630451cdb6d1ee1a632df81a5e2dd50285": { "authors": [ { "ids": [ "3218468" ], "name": "Alastair David Reid" } ], "doi": "10.1145/3133912", "doiUrl": "https://doi.org/10.1145/3133912", "entities": [ "ARM architecture", "Failure rate", "Formal verification", "Internet of things", "Machine code", "Operating system", "Security bug", "Software bug", "Software verification" ], "id": "22f88f630451cdb6d1ee1a632df81a5e2dd50285", "inCitations": [ "882eb456f90e5b79ca2ddf2da5e2c1d972929989", "22f136c1a906fb12d395a03b59f6be2e34d61cc3" ], "journalName": "PACMPL", "journalPages": "88:1-88:24", "journalVolume": "1", "outCitations": [ "246e66fc3b5a781e3b4a125a7ad996c99d6edcca", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "12b7519d2c834923851641c7e03eeea9d5607c1d", "15ea0a6ac7b5849e8fc06057bd45ce08fe20985d", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "22b0227b1d2db6c7e899e207b55c92c32b7635d4", "4d1e3d20531b7118c50b137715b69926d990d7c6", "3c142ad4ca5ed211a606450801d54b3b30d687e9", "15131bc21f599fb3f81fa0401daf94745301dd07", "2817df10c4ffe29482928cb97b8ee89d8560b4cd", "3133c223a3ae8a740dee4a47363231d3c3160b16", "6c5462d31a0d0f4e6cb2ff7ae795250957d9fcab", "24c4019462c7d61f6dedfbdb9a828f4c30beb93c", "1c406afc440c357764a4e686f571f52becaaed80", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "4271680ae4d95b130426e165ad9e9d9b81d938cd", "3960dda299e0f8615a7db675b8e6905b375ecf8a" ], "paperAbstract": "Software and hardware are increasingly being formally verified against specifications, but how can we verify the specifications themselves? This paper explores what it means to formally verify a specification. We solve three challenges: (1) How to create a secondary, higher-level specification that can be effectively reviewed by processor designers who are not experts in formal verification; (2) How to avoid common-mode failures between the specifications; and (3) How to automatically verify the two specifications against each other. \n One of the most important specifications for software verification is the processor specification since it defines the behaviour of machine code and of hardware protection features used by operating systems. We demonstrate our approach on ARM's v8-M Processor Specification, which is intended to improve the security of Internet of Things devices. Thus, we focus on establishing the security guarantees the architecture is intended to provide. Despite the fact that the ARM v8-M specification had previously been extensively tested, we found twelve bugs (including two security bugs) that have all been fixed by ARM.", "pdfUrls": [ "https://alastairreid.github.io/papers/oopsla2017-whoguardstheguards.pdf", "http://doi.acm.org/10.1145/3133912" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/22f88f630451cdb6d1ee1a632df81a5e2dd50285", "sources": [ "DBLP" ], "title": "Who guards the guards? formal validation of the Arm v8-m architecture specification", "venue": "PACMPL", "year": 2017 }, "230a0beede2f474e5c2e65da3f8b9f2709c83dfd": { "authors": [ { "ids": [ "9561490" ], "name": "Yanghua Peng" }, { "ids": [ "2473452" ], "name": "Ji Yang" }, { "ids": [ "1726963" ], "name": "Chuan Wu" }, { "ids": [ "39152478" ], "name": "Chuanxiong Guo" }, { "ids": [ "1736449" ], "name": "Chengchen Hu" }, { "ids": [ "1744839" ], "name": "Zongpeng Li" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Analysis of algorithms", "Approximation algorithm", "Data center", "End-to-end principle", "Failure rate", "Greedy algorithm", "Network performance", "Real-time computing", "Scalability", "Simulation", "Testbed", "The Matrix", "Traceroute", "Video game localization" ], "id": "230a0beede2f474e5c2e65da3f8b9f2709c83dfd", "inCitations": [], "journalName": "", "journalPages": "55-68", "journalVolume": "", "outCitations": [ "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "53d2179901b3df0b85c30c15cd75420b4e788524", "2071f3ee9ec4d17250b00626d55e47bf75ae2726", "bde33924af7cb40e29675408870b53a3bd3b36c2", "1dc94ea6beac4be0f1e4327fc81ac5c2f592d934", "1e4da813c29a65f19f6e9432cb4efe8b7d45ac1d", "13d673786ccead27a4ea93638682e444022f6bbf", "96a1e261a0cb1493a40648d200b5126113f10d67", "5b999d36d5230eca01532b357c7cf338a5e0d641", "0ec58ad7dffcc53018a786c069cb604ef1be5aae", "58f692e9b03cb973355aab46bb6f867239aeb513", "23ad7425efba9dd29b05821de46183c85d5c1350", "663e064469ad91e6bda345d216504b4c868f537b", "fff50b48f41fa4a48732cde720f456904fd5b468", "5af3c5d9c688f358f1f92a0e470c626d79044975", "0dd5aa9264802e93da9e216038e440862c5660b3", "629da79882b90091a7e20f8e2498f1f4c2220e02", "65da29a03c8905cbc0614612d1632864336c4786", "0875e47b0cb01b0eef3fa9bb9dacd7343bb177c4", "164d8d8238674cdfb9bbb2583cfc390e178420de", "058f6752d85a517aae298586fdf117acdd7560ea", "981058ba0c417be6377823bed3b204e6a85a61e6", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "4282e8d6678c58c3e50febeaac3d6952e9fc08d9", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "30e5e40cb96c1d15c80ff0aa199298675465c65c", "477019dacd6b7ec4e4919aa1976c6686ed59d2fe", "fca01f072bddcafb4f97e6e778dd9c2e1221b477", "7b9ebdab7e15cd1f99b712f620394b7754e8ad6f", "38f0ec358c3f0952927370d314779a9ea7e0f34e", "30cce0b50e211caf4193ba78b62427746b855aca" ], "paperAbstract": "Troubleshooting network performance issues is a challenging task especially in large-scale data center networks. This paper presents deTector, a network monitoring system that is able to detect and localize network failures (manifested mainly by packet losses) accurately in near real time while minimizing the monitoring overhead. deTector achieves this goal by tightly coupling detection and localization and carefully selecting probe paths so that packet losses can be localized only according to end-to-end observations without the help of additional tools (e.g., tracert). In particular, we quantify the desirable properties of the matrix of probe paths, i.e., coverage and identifiability, and leverage an efficient greedy algorithm with a good approximation ratio and fast speed to select probe paths. We also propose a loss localization method according to loss patterns in a data center network. Our algorithm analysis, experimental evaluation on a Fattree testbed and supplementary large-scale simulation validate the scalability, feasibility and effectiveness of deTector.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-peng.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_peng-yanghua.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/peng", "http://i.cs.hku.hk/~cwu/papers/yhpeng-atc17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f787/d25c78fcfb9b34118ca834f311bee790816f.pdf", "s2Url": "https://semanticscholar.org/paper/230a0beede2f474e5c2e65da3f8b9f2709c83dfd", "sources": [ "DBLP" ], "title": "deTector: a Topology-aware Monitoring System for Data Center Networks", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "231c2fbbf6fcc7bd234fdfb27341700c3bce0fd5": { "authors": [ { "ids": [ "33278013" ], "name": "Divya Mahajan" }, { "ids": [ "36491005" ], "name": "Ziliang Zong" } ], "doi": "10.1109/IGCC.2017.8323581", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323581", "entities": [ "Apache Cassandra", "Big data", "Complement System Proteins", "Database", "MongoDB", "NoSQL", "Published Database", "Question (inquiry)", "Relational database management system", "Response time (technology)", "SQL", "Structured Query Language", "Throughput", "Workload" ], "id": "231c2fbbf6fcc7bd234fdfb27341700c3bce0fd5", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "40c62a5883b28aa0091254984b33b3ec1a10e99a", "48aa5ae8f93680699e1a4d0d6d8815dcd16a64bd", "9beb812cebc35cd020e8c7818a93b08bce9e1495", "6628973ed0f86c87a40d596be071ac9b7673c09c", "c83923270797eaf7a21432516c86f509d61bcc35", "c0b9345ce0adb81bccf24ef5971f3a12f9c0ed13", "39c88337433b1ddedb1a180a90dacff53810ebe5", "2ce3bf63b11abe44f07d1491badd624c743c6f70", "33d7d2951c5fb585b21c7acdbf94456a71b52027" ], "paperAbstract": "As big data emerges, the complexity of database workloads and database systems has increased significantly. It is no longer possible for one type of database to efficiently handle all big data applications. NoSQL databases are widely used to complement conventional SQL databases. In addition to traditional metrics such as response time and throughput, large scale NoSQL database systems pose higher requirements on energy efficiency due to the incredible volume of data (and the associated cost) that need to be stored and processed. Unfortunately, research on optimizations for energy efficiency in database systems has been historically overlooked. In this paper, we investigate numerous optimizations for two NoSQL databases (MongoDB and Cassandra) and conduct a comprehensive study on the impact of these optimizations on performance and energy efficiency. Our experimental results derived from 100GB of Twitter data reveal that 1) energy efficiency can be improved significantly for both MongoDB and Cassandra via query optimizations without degrading performance; and 2) energy efficiency does not always scale linearly with performance improvement.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323581" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/231c2fbbf6fcc7bd234fdfb27341700c3bce0fd5", "sources": [ "DBLP" ], "title": "Energy efficiency analysis of query optimizations on MongoDB and Cassandra", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "231d1306894f1cf71612b25116e77387a4a243bc": { "authors": [ { "ids": [ "1845933" ], "name": "Yandong Wang" }, { "ids": [ "1712838" ], "name": "Li Zhang" }, { "ids": [ "2166550" ], "name": "Michel Hack" }, { "ids": [ "2758173" ], "name": "Yufei Ren" }, { "ids": [ "1713016" ], "name": "Min Li" } ], "doi": "10.1109/MASCOTS.2017.28", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.28", "entities": [ "Attribute\u2013value pair", "Backup", "Data synchronization", "High availability", "In-memory database", "Memory management", "Remote backup service", "Remote direct memory access", "Throughput", "USB flash drive", "YCSB" ], "id": "231d1306894f1cf71612b25116e77387a4a243bc", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "53-62", "journalVolume": "", "outCitations": [ "6479c756e597c38e57aa45e2eae8550fd738418b", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "145088fc0593b2f95168f3ba4693bbc5487e9068", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "068d0b393db03678ea1d346ee01871e91e88c560" ], "paperAbstract": "Memory price will continue dropping in the next few years according to Gartner. Such trend renders it affordable for in-memory key-value stores (IMKVs) to maintain redundant memory-resident copies of each key-value pair to provision enhanced reliability and high availability services. Though contemporary IMKVs have reached unprecedented performance, delivering single-digit microsecond-scale latency with up to tens of millions queries per second throughput, existing replication protocols are unable to keep pace with such an advancement of IMKVs, either incurring unbearable latency overhead or demanding intensive resource usage. Consequently, the adoption of those replication techniques always results in substantial performance degradation.In this paper, we propose MacR, a RDMA-based high-performance and lightweight replication protocol for IMKVs. The design of MacR centers around sharing the remote backup memory to enable RDMA-based replication protocol, and synthesizes a collection of optimizations, including memory allocator cooperative replication and adaptive bulk data synchronization to control the number of network operations and to enhance the recovery performance. Performance evaluations with a variety of YCSB workloads demonstrate that MacR can efficiently outperform alternative replication methods in terms of the throughput while preserving sufficiently low latency overhead. It can also efficiently speed up the recovery process.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/231d1306894f1cf71612b25116e77387a4a243bc", "sources": [ "DBLP" ], "title": "Lightweight Replication Through Remote Backup Memory Sharing for In-memory Key-Value Stores", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "2329023f53624e24879418832d73fb40c5af989f": { "authors": [ { "ids": [ "4145819" ], "name": "Yifan Wang" }, { "ids": [ "3255571" ], "name": "Xingzhou Zhang" }, { "ids": [ "9073585" ], "name": "Lu Chao" }, { "ids": [ "40339410" ], "name": "Lang Wu" }, { "ids": [ "13664102" ], "name": "Xiaohui Peng" } ], "doi": "10.1109/IGCC.2017.8323568", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323568", "entities": [ "Artificial neural network", "Biological Neural Networks", "Deep learning", "Electricity", "Inference", "Less Than", "Neural Network Simulation", "Power Architecture", "Power supply", "Server (computing)", "Steady state", "Virtual private server" ], "id": "2329023f53624e24879418832d73fb40c5af989f", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "911139ecd91536641ff8f6ca5ee8befdbd0344b9", "57ad58c42250c97c951514dccc0f49b8f42d5685", "0ec668e12ff9f7c5c968bac1b4c441320d190fa6", "25f0625a92f6054b11057423111f9285c78376fe", "da826d0a7a900dfb1b91266adf40bf63b3ef564e", "0924f9c2f4d6eea7905d75070af40960efaeb330", "15b550c3be57023b1aca8b0b81c98571cf233eca", "d154bd5a33b49ff8104d5a9362c007aee030f93b", "5ae9cdcd8052f3a527d662e947a12b802c5b76f7", "aac67f1d9aa42732cb2e31183b34b43432b3df4e", "1f3172f3ddaa74b5c697ae96a920610e3ad86606", "5512051589d357b40d24c673022c72c2a335cfcb", "0ea729892dd6ea4b2c09ff6824efd4dfe51c0b2a", "f3e33ace1d5e3436ca6d0b882c66a05685bff95e", "7ef3f88b656d1df120ebb465288721a601dc2774" ], "paperAbstract": "To save the electrical energy in a household, it is essential to monitor where and how the power is consumed. To maximize the efficiency of energy conservation, it is necessary to make the running power low in the power monitor system, which the tradition systems pay less attention to. This paper presents PowerAnalyzer, an energy-aware system for monitoring running states and power of each household appliance plugged into power line from a single point detection. PowerAnalyzer takes steady-state current waveforms as the appliances signature, and uses the deep neural network (DNN) models to infer the running states and running power of household appliances. We focus on the energy consumption of PowerAnalyzer itself. The energy efficiency of PowerAnalyzer is optimized from these aspects: Using dynamic time intervals to collect electric data, replacing a cloud server with an edge node to process data, and transmitting differential data over a low power wireless protocol. The evaluation results show that PowerAnalyzer offers 3.45% average power metering error and 98.38% average accuracy of inferring running states of appliances. PowerAnalyzer draws less than 247mW static power and 304mW peak power.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323568" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2329023f53624e24879418832d73fb40c5af989f", "sources": [ "DBLP" ], "title": "PowerAnalyzer: An energy-aware power monitor system aiming at energy-saving", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b": { "authors": [ { "ids": [ "2039588" ], "name": "Keith Bonawitz" }, { "ids": [ "3395858" ], "name": "Vladimir Ivanov" }, { "ids": [ "4908509" ], "name": "Ben Kreuter" }, { "ids": [ "2212030" ], "name": "Antonio Marcedone" }, { "ids": [ "2190183" ], "name": "H. Brendan McMahan" }, { "ids": [ "34521172" ], "name": "Sarvar Patel" }, { "ids": [ "1878835" ], "name": "Daniel Ramage" }, { "ids": [ "40627391" ], "name": "Aaron Segal" }, { "ids": [ "34185195" ], "name": "Karn Seth" } ], "doi": "10.1145/3133956.3133982", "doiUrl": "https://doi.org/10.1145/3133956.3133982", "entities": [ "16-bit", "Adversary (cryptography)", "Analysis of algorithms", "Artificial neural network", "Deep learning", "Machine learning", "Mobile device", "Plaintext", "Server (computing)" ], "id": "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "inCitations": [ "7c4f52328c2869bdff8034d2867baa5b67d0ce27", "c0e2986f8075e9dc0ebc0480e8e3cb1f5c9f80c8", "ca4cddac342217e3c0143cb7f88daf1f50033c69", "0646a88dfd7e7ce7233041eaad62076ccc55624c", "44a97f4eaaefaf5338f8aed2913d5debb2459f7e", "a902aa8d4f2fd0df2ed1dfd59a30283a3c6afe9a", "d88e815ae826a90f80dbb1bdebe5f9509aee3207", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "188537913b3c661740659ceb1bf2f4917181c946" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "281", "journalVolume": "2017", "outCitations": [ "04948723dec0e6724777ee56f0d10168cce44921", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "208448ed57cb0ff70866cb3828b06610c3ff25fd", "1808b64aec21863489f0fe66f250890a3ac2b843", "3671338dc8c84d51b285bee79f85e7f3937a5078", "69aebe5b9f65fe92cbf25c8fecf444225a18612d", "40caa4d4068dc506cae536f2783c62fdb0fb5a77", "60d6ac52ef063d01cea47601e9b9bde1e3148440", "13e622fca1a6b52aa85898e260f9455e4ba0d94b", "2e8b9a7a085a8bc18783e76b776c6e780116efd8", "0be8170df4c1ea1cf8312ae5ed326665224d5d9c", "326bb49d3ae9e1e1551028200916192e50004105", "b532099ff8b67049f292cd62700dca37fc2be623", "b0e32f83369313c18e3ad38b47a0f0dbe42decac", "512e08451eb0d805c77b86e5821560f3b7dec565", "0341b1daa77f817d78c106dfe99c3907f1d9ea7a", "3310cb2b0f1a473e6f98cdec6eb53ec6a962ac87", "3e9c5f6f48d9ef426655dc799e9b287d754e86c1", "cf64ed742ab694d8a0ebed6c96a6f8709b9e8705", "7f17ee37b9cc8dbf5de6363c863d9e3c49768400", "64533dbddc95edf3dacf5de7a115bc41f858ecc4", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "3157ed1fbad482520ca87045b308446d8adbdedb", "64028c85cd7b7e42f208e29734028572d7735c61", "8c442dab45400bc99ac63195a06fd531d13407fe", "31afd0a18126720eeef5880bcaa14768c4005387", "1e48fc5e033897f411a11e62adba0495bdab1a0e", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "05128a3c7b2debc27c7ffb4ccf7469876c2d94a2", "299c6f5d8552acede715c31befee30c392d0f7fb", "19c3736da5116e0e80a64db35afe421663c4b4a8", "3cb55d539b232e309f4a5974148ec6f22afb5888", "2d96ef55434ca6561891bd1769d07d9ba10c16fb", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "5a4691aab69b7e527519cc9abfe7cd62b4865fc2", "72ab425a9ea78eef9b270df09fdcf88b6898b370", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "bcb49a06e4fb7ea831257e146073d84234f4d238", "3be2300ed50bb7bb2d8a72ab1c7667d502f12fa9", "28a6e6ceb0a92de7a49048d094321af5fab227a0", "884c13074bf05188830a19bccbad58c4d1fcd6cc", "158d62f4e3363495148cf16c7b800daab7765760", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "2073b5fb43cefe522c20a6b550e25f654077edd3", "8893e1f7c2926aac06511ae0aa6e16afefef2c10", "3b03935dfc89c0cad63e05976c21fef6c9fb4190", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "1a68d1bbb2eab66239e51b26b7636c453f505b3b", "902ddd904effe50d228ea25ab23c16ae664d3ce5" ], "paperAbstract": "We design a novel, communication-efficient, failure-robust protocol for secure aggregation of high-dimensional data. Our protocol allows a server to compute the sum of large, user-held data vectors from mobile devices in a secure manner (i.e. without learning each user's individual contribution), and can be used, for example, in a federated learning setting, to aggregate user-provided model updates for a deep neural network. We prove the security of our protocol in the honest-but-curious and active adversary settings, and show that security is maintained even if an arbitrarily chosen subset of users drop out at any time. We evaluate the efficiency of our protocol and show, by complexity analysis and a concrete implementation, that its runtime and communication overhead remain low even on large data sets and client pools. For 16-bit input values, our protocol offers $1.73 x communication expansion for 210 users and 220-dimensional vectors, and 1.98 x expansion for 214 users and 224-dimensional vectors over sending data in the clear.", "pdfUrls": [ "http://eprint.iacr.org/2017/281", "https://eprint.iacr.org/2017/281.pdf", "http://doi.acm.org/10.1145/3133956.3133982" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "sources": [ "DBLP" ], "title": "Practical Secure Aggregation for Privacy-Preserving Machine Learning", "venue": "CCS", "year": 2017 }, "232d892b423c24aaefcec9eb2ae211316be0f025": { "authors": [ { "ids": [ "1691086" ], "name": "Ang Li" }, { "ids": [ "40474862" ], "name": "Weifeng Liu" }, { "ids": [ "1886041" ], "name": "Mads Ruben Burgdorff Kristensen" }, { "ids": [ "1750748" ], "name": "Brian Vinter" }, { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "1971458" ], "name": "Kaixi Hou" }, { "ids": [ "39871593" ], "name": "Andr\u00e8s M\u00e1rquez" }, { "ids": [ "1798309" ], "name": "Shuaiwen Song" } ], "doi": "10.1145/3126908.3126931", "doiUrl": "https://doi.org/10.1145/3126908.3126931", "entities": [ "Broadwell (microarchitecture)", "CPU cache", "Dynamic random-access memory", "Knights", "Manycore processor", "Memory bound function", "Memory hierarchy", "Multi-core processor", "Program optimization", "Programmer", "Throughput" ], "id": "232d892b423c24aaefcec9eb2ae211316be0f025", "inCitations": [ "907e5f587e25d0757ff2f1f1762052c3c6832f9f", "e45dea6588d1de0a23618e019031e67eedeeee26", "5de605df73ecc9f6efa619e569a057a134fa2160", "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0" ], "journalName": "", "journalPages": "26:1-26:14", "journalVolume": "", "outCitations": [ "7ee8186c9fc790085528f21c0ca0ca1cca42d109", "dd97355244bb2e1b369be0b2617e8452710ca44b", "323f7b288a676872bc1945d1c4a01041f5bd03b5", "57977d94ac12da26117a7fa8e927362660c77184", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "c0cbbd9fb2fc5da8b44854446b23e6c464111209", "51b293468f7d27bb05ebb82b86e67f6bc443b78e", "42d96591e5583c2001c100d979a8f180e1a4e6b1", "17ded16813a7ef6e179252585a742e83f004c0fb", "2e56eddf32abc9c94bffcfe680827f05a0490a5d", "0b47e159ed9a3e5db1adc135620e7526d93abd87", "168f2e12ae9fbb6c96146f4a7ded040d73e7b44b", "f01bf24bef27ed92321860d30081eb9d08ab5c2f", "11a963dadaec54347fdafe327512a3ad7c25bb55", "86159c2269566286a5e8f724deab749c9e2750b1", "9b54b4bac88d68277f4b8bba514d3562ff8cedbf", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "d98063b0eb446c99e98684b34cb53914ca6b7206", "0199bccf87b17291be6d8823152eabad2be4f242", "81c2a5fcdb3b192790d484ea822cce888b77f66b", "161d8fe96b53093c072643aaa0f9dd3ef8f61609", "28552ecf4eaedb3461edca97304b29082b02fbab", "53c702e611d8cad55dec1c62f209bf173f171ab1", "917fd12162c12c0fd2cb6409de1dd438531c553a", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "3370784dacf9df1e54384190dad40b817520ba3a", "f8e9b050c93af6dea582563f61b6460b590bc3af", "1de6ac748387859f43bc15e15ff5380df05bae34", "0ff4cfc95f42c49aaaac4289c84c605f192c5def", "14a4369f0fd45b3ae2323dd71eac8980b1556f0d", "32aa2d622c8260ecd6d6f52040b67db382192134", "769d75e9cb010b76ba412d9654cf43c4edf15076", "cb6beb68971de81435aca0356987fcff8fad176d", "092217c2267f6e0673590aa151d811e579ff7760", "ddfc947a1408d623bdceb9dab89ad3bd118716c2", "d6c4c76076efecb15655274adc648af8a445ed3a" ], "paperAbstract": "High-bandwidth On-Package Memory (OPM) innovates the conventional memory hierarchy by augmenting a new on-package layer between classic on-chip cache and off-chip DRAM. Due to its relative location and capacity, OPM is often used as a new type of LLC. Despite the adaptation in modern processors, the performance and power impact of OPM on HPC applications, especially scientific kernels, is still unknown. In this paper, we fill this gap by conducting a comprehensive evaluation for a wide spectrum of scientific kernels with a large amount of representative inputs, including dense, sparse and medium, on two Intel OPMs: eDRAM on multicore Broadwell and MCDRAM on manycore Knights Landing. Guided by our general optimization models, we demonstrate OPM's effectiveness for easing programmers' tuning efforts to reach ideal throughput for both compute-bound and memory-bound applications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126931", "http://people.cs.vt.edu/~kaixihou/papers/hbm_li_sc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/232d892b423c24aaefcec9eb2ae211316be0f025", "sources": [ "DBLP" ], "title": "Exploring and analyzing the real impact of modern on-package memory on HPC scientific kernels", "venue": "SC", "year": 2017 }, "23512459e0708ecc204057cb8a0eec6c94295c3c": { "authors": [ { "ids": [ "2211379" ], "name": "Jungsik Choi" }, { "ids": [ "3968500" ], "name": "Jiwon Kim" }, { "ids": [ "1780653" ], "name": "Hwansoo Han" } ], "doi": "", "doiUrl": "", "entities": [ "Emergence", "Memory-mapped I/O", "Non-volatile memory", "Overhead (computing)", "Overhead projector", "Page fault", "Page table", "Random-access memory", "Throughput" ], "id": "23512459e0708ecc204057cb8a0eec6c94295c3c", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "94783d113951822195d4ba44599a8fcbdef9d4bf", "3cda09fdc91d7f85a138a4d56848a3a0708df76f", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0c60a639dc9cd8014f685ec986c29bf55a10bb5a", "34a2ad9af5e33b8bc3646de027636b606a0ddf7a", "05a1357946de5eca42a477b7b268db4944219a2e", "1bed30d161683d279780aee34619f94a860fa973", "9aa0d7253574e50fe3a190ccd924433f048997dd", "bd5099af211725421f18e027e7c6ae4ccd3d70bc", "38a9120f780602521af9744e31d80ef5cd9593a7", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "562143806fbda2ad953f5d7d8ab598bcffba89dd", "5bb770af1973f929e8622f17ddf378d439245144", "243c522b56809292f1f50117a9915053d32bf4fb", "129f11028220d87525b37b4605a2c04eb26f3e73", "6b1da3b242207efdbd58bd0cc19c9cb6cf150e40", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "1463d8143ffb3ac734b85e8e492d943bbea8ff79", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "def29d202e537d026b8d3ed91655b540ef86cceb" ], "paperAbstract": "Recently, with the emergence of low-latency NVM storage, software overhead has become a greater bottleneck than storage latency, and memory mapped file I/O has gained attention as a means to avoid software overhead. However, according to our analysis, memory mapped file I/O incurs a significant amount of additional overhead. To utilize memory mapped file I/O to its true potential, such overhead should be alleviated. We propose map-ahead, mapping cache, and extended madvise techniques to maximize the performance of memory mapped file I/O on lowlatency NVM storage systems. This solution can avoid both page fault overhead and page table entry construction overhead. Our experimental results show throughput improvements of 38\u201370% in microbenchmarks and performance improvements of 6\u201318% in real applications compared to existing memory mapped I/O mechanisms.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_choi.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-choi.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/choi" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2351/2459e0708ecc204057cb8a0eec6c94295c3c.pdf", "s2Url": "https://semanticscholar.org/paper/23512459e0708ecc204057cb8a0eec6c94295c3c", "sources": [ "DBLP" ], "title": "Efficient Memory Mapped File I/O for In-Memory File Systems", "venue": "HotStorage", "year": 2017 }, "235d090c8549ff3b353103380313d70e33c47e4e": { "authors": [ { "ids": [ "34854131" ], "name": "Shaden Smith" }, { "ids": [ "31884444" ], "name": "Alec Beri" }, { "ids": [ "1681616" ], "name": "George Karypis" } ], "doi": "10.1109/ICPP.2017.20", "doiUrl": "https://doi.org/10.1109/ICPP.2017.20", "entities": [ "Ambient occlusion", "Computer security", "Definition", "Iteration", "Negativity (quantum mechanics)", "Parallel computing", "Program optimization", "Recommender system", "Sparse matrix", "Speedup" ], "id": "235d090c8549ff3b353103380313d70e33c47e4e", "inCitations": [ "585cec9677e5cdb04e882cb47cc491c54ecbeb80", "5b2095d318f8e4693645b3502d10153a1af62d83" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "111-120", "journalVolume": "", "outCitations": [ "ca161e7f5cb49740a5841361003b2d8a875d45ab", "1de6ac748387859f43bc15e15ff5380df05bae34", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "94cc6daad548a03c6edb0351d686c2d4aa364634", "008a6e4b2763736d2c6363ee6b546b09c0022e53", "2d03baec8ac1568e6813aa43d625d552524f977e", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "21c2071bd83f4e124d5ec79ebac371d50aee220d", "38b389580d774ce513284e671ff3bbcef0258de2", "9cc330d3be2109510935e9d3fa7d01e1000e712e", "ac0a0828c17c040c065a9285264094ba2560497d", "00ca166ea4521f5cc3d23e74a1b1090386b6831f", "255aeb5c2a8eea15db08c617481ddbb35a41bfe4", "2dc26e42bdb50be00b3f7affe745c4384ff833be", "4229f467b059188fc7a1234016a3c80557fa7df0", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "3fa4789491cda3b383938eb5f7bb3c3d707e0572", "08368dae4f102176b8e50a64ddde8a8150cde26e", "ecfc0934c6c7e028e268756606afd20303ee69a3", "0696d45227a88cf0a5306dad757f5edfc407ed4c", "88ab669e9706276b3ad2c4147bd5ed3166922714" ], "paperAbstract": "Low-rank sparse tensor factorization is a populartool for analyzing multi-way data and is used in domainssuch as recommender systems, precision healthcare, and cybersecurity.Imposing constraints on a factorization, such asnon-negativity or sparsity, is a natural way of encoding priorknowledge of the multi-way data. While constrained factorizationsare useful for practitioners, they can greatly increasefactorization time due to slower convergence and computationaloverheads. Recently, a hybrid of alternating optimization andalternating direction method of multipliers (AO-ADMM) wasshown to have both a high convergence rate and the ability tonaturally incorporate a variety of popular constraints. In thiswork, we present a parallelization strategy and two approachesfor accelerating AO-ADMM. By redefining the convergencecriteria of the inner ADMM iterations, we are able to splitthe data in a way that not only accelerates the per-iterationconvergence, but also speeds up the execution of the ADMMiterations due to efficient use of cache resources. Secondly,we develop a method of exploiting dynamic sparsity in thefactors to speed up tensor-matrix kernels. These combinedadvancements achieve up to 8 speedup over the state-of-the art on a variety of real-world sparse tensors.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.20", "http://glaros.dtc.umn.edu/gkhome/fetch/papers/smith2017constrained.pdf", "http://shaden.io/pdf/2017-Smith-Constrained.pdf", "http://shaden.io/pdf/2017-Smith-Constrained-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/235d090c8549ff3b353103380313d70e33c47e4e", "sources": [ "DBLP" ], "title": "Constrained Tensor Factorization with Accelerated AO-ADMM", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "2363978d45dbcc8cb0ba88d84800c95f58812039": { "authors": [ { "ids": [ "39326009" ], "name": "Amro Awad" }, { "ids": [ "1699757" ], "name": "Arkaprava Basu" }, { "ids": [ "2210762" ], "name": "Sergey Blagodurov" }, { "ids": [ "1717365" ], "name": "Yan Solihin" }, { "ids": [ "3308405" ], "name": "Gabriel H. Loh" } ], "doi": "10.1109/PACT.2017.38", "doiUrl": "https://doi.org/10.1109/PACT.2017.38", "entities": [ "Best, worst and average case", "Dynamic random-access memory", "Non-volatile memory", "Operating system", "Page table", "Pressure-tolerant electronics", "Translation lookaside buffer", "Volatile memory" ], "id": "2363978d45dbcc8cb0ba88d84800c95f58812039", "inCitations": [ "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "a4710ac80826e48a410b1b9da80c2ca0f4a6a357" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "273-287", "journalVolume": "", "outCitations": [ "0d5ec0f90b9d07ebc48f4e00b2e583e5d49130dc", "a1da20a5814d65623505440a2a18121b8b4b6b5f", "0bcea4e03620eb323d990119a9d35c45a278023f", "93ff2d8b02c7bb57e6289027565332f342879751", "2046f7c54470e7617269cc954aab877a4691c241", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "736f4a1c6eff51f8206530aca67baa95bafc5f1c", "22b4811bb8265e84d53c62a842cac10dda15f6af", "6bc785deeb35643d865157738548149e393f9dd3", "03d55467b20e662fbaa8416e853f57c93834a9fb", "9858251a88afc29fa9fdb8234d998dcdf182f144", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "882f0458bff25138346c358da6f4e036f36f1b60", "3230c6025956c2d3fd11971e0d30b690e3078a1e", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "8e72c795a5a45976ae476a8f280f7b37ae4b1873", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "10db0fdb8f13385780cadaa71934371c2286b346", "0a3232bd21ac1bdfe7a468e61abe60ac0aaac296", "08237b5a7862d65185977e3dac0f81e616188add", "2fee80acb6f7b4172622e0f40d350339ca4e3dc9", "2260e7a09f1aefa56a5b3b29bee91ce4c3dbefc6", "39c76eee1b779cbc5ecfc658689f0fe834b1c1bf", "5e58d895eb2ede3489d4acab01aaea02b79c844f", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "3142f44d2ab6153f9eb263f78fb6e09411c482ec", "0bd1fa344fbf2d5502e1886716aee15f67c3af78", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "3057d43346280804b1636de9a9c0f950d5cf12c1", "1591e9cf9c5d5fa42e7b5e48bd76f43a0a6e8f0b", "02f3eebd4281e9a241d5790da5bb783e018c8251", "0af7b9623e35555710f3a30177c5b0c61e4e30af", "846c3e29ad00d4670cb74836c8028f40eef4eb90", "32a01a917bc310388002e7c7231ba2c07416bed6", "59ca42e1911be417863d0f7068b89e1e59189cc9", "a55e4ac5c453115521ee0d428948cf7c2124c220", "3f3f0d3f490cc8d9db3e73b8b6477a54cb33449e", "0a7a637c60fa8da94921a348d1062350aae2daa4", "08956552adfff8feda37bac24c85d3d78efd264b", "0680bdfaf465947354218828a51ee5997505385b" ], "paperAbstract": "Updates to a process's page table entry (PTE) renders any existing copies of that PTE in any of a system's TLBs stale. To prevent a process from making illegal memory accesses using stale TLB entries, the operating system (OS) performs a costly TLB shootdown operation. Rather than explicitly issuing shootdowns, we propose a coordinated TLB and page table management mechanism where an expirationtime is associated with each TLB entry. An expired TLB entry is treated as invalid. For each PTE, the OS then tracks the latest expiration time of any TLB entry potentially caching that PTE. No shootdown is issued if the OS modifies a PTE when its corresponding latest expiration time has already passed.In this paper, we explain the hardware and OS support required to support Self-invalidating TLB entries (SITE). As an emerging use case that needs fast TLB shootdowns, we consider memory systems consisting of different types of memory (e.g., faster DRAM and slower non-volatile memory) where aggressive migrations are desirable to keep frequently accessed pages in faster memory, but pages cannot migratetoo often because each migration requires a PTE update and corresponding TLB shootdown. We demonstrate that such heterogeneous memory systems augmented with SITE can allow an average performance improvement of 45.5% over a similar system with traditional TLB shootdowns by avoiding more than 65% of the shootdowns.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.38", "http://pages.cs.wisc.edu/~basu/papers/pact2017_final_version.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2363978d45dbcc8cb0ba88d84800c95f58812039", "sources": [ "DBLP" ], "title": "Avoiding TLB Shootdowns Through Self-Invalidating TLB Entries", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "237d67f89d957068b83b3b3930ab7bf13b8d0e4f": { "authors": [ { "ids": [ "2717484" ], "name": "Asaf Cidon" }, { "ids": [ "17314133" ], "name": "Daniel Rushton" }, { "ids": [ "2224644" ], "name": "Stephen M. Rumble" }, { "ids": [ "3087426" ], "name": "Ryan Stutsman" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "Cache (computing)", "Central processing unit", "Dynamic random-access memory", "Memcached", "Memory bandwidth", "Multitenancy", "Server (computing)", "Throughput", "Web application", "Web cache" ], "id": "237d67f89d957068b83b3b3930ab7bf13b8d0e4f", "inCitations": [ "7917b9b0560cc71c83b79a9ff19c0cf69ee2f630", "a2408c13ce831b9aadca03f458b423cb28fb8a8a" ], "journalName": "", "journalPages": "321-334", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "048c75cd0a0d75c87e4362a7be3dae01754282ab", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "0420266f84cc95d6b7a8100e601f67d1118d4965", "1594118f2696b573f08510cf837f3b37db87face", "252384bae49e1f2092e6a9553cd9a67f41134ded", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "a3de178c43b990b5755be4d640a7525f97ce2f33", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "bf4064ef5bd87dbb6873df60b5ce74c004dd7ed8", "2804bcc9df4352c2da1367f182a54e7c67a160ec", "9c48179c07963a9fad69a359362c0aee87f9fe18", "2988e34168fa91398fa397baf823af2063893e9c", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "74711ebc709fb794ddbaefa75c9ec3b48e5a2a17", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "532787be1a70aebc1c0b975e31a81c538927f0af", "6d5099039729d930841c21893c5585a194d90a79", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "514a5c15e8cf3f681febecad954a4508d9189c99", "9aa0d7253574e50fe3a190ccd924433f048997dd", "31c1ebd6214a6146f2739fb81bf560229f413c91", "c6d01d9365d7b134ef2efed0063820d1b9be659a", "2077c3787e5a1545df312d51f9a7b8cd05e2c7f0", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "275f66e845043217d5c37328b5e71a178302469f", "60de50417a31e293540992a3a52af6a2f62de7c2", "0fd4a1b1b92a65b70fad60ad6e95ed54e8f6e86a", "23891969bf9f1c01bf27aebdf41e81f2d84e2a26", "2f8403f29dd9ee8934266406ac59979dc06935d6", "1e954c5cf302d76483ec0cc0049b4b1220077750", "17b9c7fa7e420b427f9c443afcfa2304b1a54b1d", "235ffbe72353aaa49d38fd973fa67cc2a15310fb", "088e3e939ad234b6fdd0e321290fb26937dc2553", "2e25d9bc47ad9b22d3068b2adc94c657e5a21120", "8f49ae0e014262cc5fb60a06e8e2bab0651100d4", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "400ae82ab2fc2c814033c65854229ecefbddbf67", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "0579cb7ceecac67eefb63bef0436fbf5e552cf72", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00" ], "paperAbstract": "Web application performance heavily relies on the hit rate of DRAM key-value caches. Current DRAM caches statically partition memory across applications that share the cache. This results in under utilization and limits cache hit rates. We present Memshare, a DRAM key-value cache that dynamically manages memory across applications. Memshare provides a resource sharing model that guarantees reserved memory to different applications while dynamically pooling and sharing the remaining memory to optimize overall hit rate. Key-value caches are typically memory capacity bound, which leaves cache server CPU and memory bandwidth idle. Memshare leverages these resources with a logstructured design that allows it to provide better hit rates than conventional caches by dynamically re-partitioning memory among applications. We implemented Memshare and ran it on a week-long trace from a commercial memcached provider. Memshare increases the combined hit rate of the applications in the trace from 84.7% to 90.8%, and it reduces the total number of misses by 39.7% without significantly affecting cache throughput or latency. Even for single-tenant applications, Memshare increases the average hit rate of the state-of-the-art key-value cache by an additional 2.7%.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_cidon.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-cidon.pdf", "https://cross.ucsc.edu/wp-content/uploads/2017/09/memshare_final-1.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/cidon" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/237d/67f89d957068b83b3b3930ab7bf13b8d0e4f.pdf", "s2Url": "https://semanticscholar.org/paper/237d67f89d957068b83b3b3930ab7bf13b8d0e4f", "sources": [ "DBLP" ], "title": "Memshare: a Dynamic Multi-tenant Key-value Cache", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "237e1b6c1b0711cdd16adad151a09f41db705f68": { "authors": [ { "ids": [ "5947982" ], "name": "Jihye Seo" }, { "ids": [ "3356158" ], "name": "Wook-Hee Kim" }, { "ids": [ "2692944" ], "name": "Woongki Baek" }, { "ids": [ "1739708" ], "name": "Beomseok Nam" }, { "ids": [ "1719212" ], "name": "Sam H. Noh" } ], "doi": "10.1145/3037697.3037737", "doiUrl": "https://doi.org/10.1145/3037697.3037737", "entities": [ "Byte", "Database transaction", "Durability (database systems)", "In-place algorithm", "Paging", "Persistent memory", "Relational database management system", "Response time (technology)", "SQLite", "Transactional memory" ], "id": "237e1b6c1b0711cdd16adad151a09f41db705f68", "inCitations": [ "4994eb0dfa2d15d7b5013563d018e8c16b71b039", "db57257e6b051e0f97d35209cc5aee0909cde1f1", "e423c74455db069e6a5cc21f68954081ad22a36c" ], "journalName": "", "journalPages": "91-104", "journalVolume": "", "outCitations": [ "801262793742c9c697d439c2d02317cb3affd7c9", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "7efeb43699d31e8ae365b1e4f7e56c066083a159", "24724ad8962a9e04eb496fddaefe9708f6960601", "94783d113951822195d4ba44599a8fcbdef9d4bf", "b56681b12900336b202a6ed45719d71d5d844a25", "1332c7cea4e586d064f1ab3b908d9d438cdbdaf0", "05a1357946de5eca42a477b7b268db4944219a2e", "195500f47236d16b8797fa5e0b0ac90b0e5aedd2", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "d137b83c3e43d4953cc389cb0a50619cc7be5319", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "0ff4cfc95f42c49aaaac4289c84c605f192c5def", "a43f2375fc8ac9fadbab91d5c10e61ef88a0525d", "4ea47f63c8b2a026a66566dd3f733d45e692d369", "48e57d5dad9d2f79116ea7c0af92dbc7ab7afd40", "314919c141024c71cb17d525ecd8016138335002", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "12d355e2ba7d002c5a4c1c67af417d2428dbb547", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "061944ca83bb46fac511394dca642f7af2d2858a", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "24dc5e5ca7766d8ec8bf2a5a1cd81ed60ac5a787", "512a8925693d5f4b8e4cfde32bcd3c846a14b71e", "7227999dfa663a2a1e0e81ee450f360e1e308ff7", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "0204f40221260d00c5ee63646560a40dcd7d97d1", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf", "81778c0996c46c77a66597e782ec0eb558f054f2", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "6e0ade8e4c0948e47b7e1ad78eacf42e5f9d8d0f", "9183cde02e4306828089fb8adae74736a9df3ceb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "3d2dfe972be7a60937df97bd309b423726375cb4", "57c823b3b07b98233394bf15cfbbaed6a84809df", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "4ce09dee71cc7cb146751e68f12d18fa240dcfea", "578667cbc39c6bfc1c89fe6a54506643c3b097f8" ], "paperAbstract": "The slotted-page structure is a database page format commonly used for managing variable-length records. In this work, we develop a novel \"failure-atomic slotted page structure\" for persistent memory that leverages byte addressability and durability of persistent memory to minimize redundant write operations used to maintain consistency in traditional database systems. Failure-atomic slotted paging consists of two key elements: (i) in-place commit per page using hardware transactional memory and (ii) slot header logging that logs the commit mark of each page. The proposed scheme is implemented in SQLite and compared against NVWAL, the current state-of-the-art scheme. Our performance study shows that our failure-atomic slotted paging shows optimal performance for database transactions that insert a single record. For transactions that touch more than one database page, our proposed slot-header logging scheme minimizes the logging overhead by avoiding duplicating pages and logging only the metadata of the dirty pages. Overall, we find that our failure-atomic slotted-page management scheme reduces database logging overhead to 1/6 and improves query response time by up to 33% compared to NVWAL.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037737" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/237e1b6c1b0711cdd16adad151a09f41db705f68", "sources": [ "DBLP" ], "title": "Failure-Atomic Slotted Paging for Persistent Memory", "venue": "ASPLOS", "year": 2017 }, "238cd2f2a8cdbd5fc696ee38a695dc6b3ee0537e": { "authors": [ { "ids": [ "3298489" ], "name": "Xinyang Ge" }, { "ids": [ "2694341" ], "name": "Mathias Payer" }, { "ids": [ "1699210" ], "name": "Trent Jaeger" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Attack surface", "Code page", "Compiler", "Computer language", "Dynamic loading", "Executable", "Function pointer", "Library", "Library (computing)", "Memory corruption", "Memory footprint", "Memory protection", "Memory segmentation", "Object file", "Privilege escalation", "Programmer", "Read-only memory", "Read-write memory", "Relocation (computing)", "Run time (program lifecycle phase)", "Ubuntu", "Vector (malware)", "X86 memory segmentation", "gettext" ], "id": "238cd2f2a8cdbd5fc696ee38a695dc6b3ee0537e", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "9aa132d9d2f346aff497cf6dbd463cc1f19819ce", "6a8f65381a627a2db6c756a7185d9106f0acefec", "29c462c88b969a93b6cfcdfcbdf0cdc455d06600", "0429bc6e23dbac663b845ca4148462c5406d6a38", "9b2585f7248c8b5a22e9c816506e01060213ca85", "522ef437c14885e224d219214684cfe58987e9f9", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "4f4590962bde0c2050122f91e5978271bb24d556", "704e2027ecdaa9561b75a854b585336c16cea89f", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "b0ecd2efb16b91f8ff3856d719aca24626406695", "01a2d5c69a09ec3fa82de6dfe12811f3d981ab7e", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "5493f512ba418c21f1ce20e20985157f7509007c", "116eaac2e498bc2c9bea10ea838309dcf143d764", "21f66596e1867cc7a3e952b19cda64570d617ab9", "03f827395a17beb941241dbd72322705bdf79791", "2fafad1553f320615034ef985bbc3378033de73c", "a4a7aaa197c29dcfa1556182aea425144137d4e3", "52612064aa065b29930b56fbf54745883bba94dc", "686150e2179840ed40a0166cba6c5d507f3aa49c", "91f86102b924d9b07c25bfdc4420d15ed216b7ab", "542db06acbafe6c8e0837dcbb0dbbeb7c19f976d", "53ccc87d5a75f3b59396d7e93f7d25287bd49232", "255bdcb05805c97d973081b59bc61c649263ceae", "0e039df712774fcea67f214d9b5780c1dc250747", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "2ced690b7eb25304c525f8a607f92dcf349d7b03", "80fcc489208a1588cfba0bfa8eba92028e3dca1b", "8846dc2c965a9d3bb6eed302c9fc1c613d8642d0", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "01b5b648af61ddb382da638a299fae2315b25192" ], "paperAbstract": "Dynamic loading is a core feature used on current systems to (i) enable modularity and reuse, (ii) reduce memory footprint by sharing code pages of libraries and executables among processes, and (iii) simplify update procedures by eliminating the need to recompile executables when a library is updated. The Executable and Linkable Format (ELF) is a generic specification that describes how executable programs are stitched together from object files produced from source code to libraries and executables. Programming languages allow fine-grained control over variables, including access and memory protections, so programmers may write defense mechanisms assuming that the permissions specified at the source and/or compiler level will hold at runtime. Unfortunately, information about memory protection is lost during compilation. We identify one case that has significant security implications: when instantiating a process, constant external variables that are referenced in executables are forcefully relocated to a writable memory segment without warning. The loader trades security for compatibility due to the lack of memory protection information on the relocated external variables. We call this new attack vector COREV for Copy Relocation Violation. An adversary may use a memory corruption vulnerability to modify such \u201cread-only\u201d constant variables like vtables, function pointers, format strings, and file names to bypass defenses (like FORTIFY SOURCE or CFI) and to escalate privileges. We have studied all Ubuntu 16.04 LTS packages and found that out of 54,045 packages, 4,570 packages have unexpected copy relocations that change read-only permissions to read-write, presenting new avenues for attack. The attack surface is broad with 29,817 libraries exporting relocatable read-only variables. The set of 6,399 programs with actual copy relocation violations includes ftp servers, apt-get, and gettext. We discuss the cause, effects, and a set of possible mitigation strategies for the COREV attack vector.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/evil-copy-how-loader-betrays-you/", "http://www.cse.psu.edu/~trj1/papers/ndss17.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_09-4_Ge_paper.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/12/corev-ndss17.pdf", "http://hexhive.github.io/publications/files/17NDSS.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8053/b37ac1aa10da5c44074b758e286452562ba3.pdf", "s2Url": "https://semanticscholar.org/paper/238cd2f2a8cdbd5fc696ee38a695dc6b3ee0537e", "sources": [ "DBLP" ], "title": "An Evil Copy: How the Loader Betrays You", "venue": "NDSS", "year": 2017 }, "239903a72df9a558b82c467f4d0445289901a1de": { "authors": [ { "ids": [ "34441280" ], "name": "Kevin Boos" }, { "ids": [ "32467844" ], "name": "Emilio Del Vecchio" }, { "ids": [ "1766339" ], "name": "Lin Zhong" } ], "doi": "10.1145/3064176.3064205", "doiUrl": "https://doi.org/10.1145/3064176.3064205", "entities": [ "Analysis of algorithms", "Computational complexity theory", "Fault detection and isolation", "Hoc (programming language)", "Modern Operating Systems", "Operating system", "Process migration", "Static program analysis", "Xojo" ], "id": "239903a72df9a558b82c467f4d0445289901a1de", "inCitations": [ "16090ea0f2aabd3d890e2eafaf461c39a872b766" ], "journalName": "", "journalPages": "389-404", "journalVolume": "", "outCitations": [ "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "b40ec9e07c0797f0f6a3fe2dd53554fe3529c143", "1aa8f66b663a3491f6f3ed33ad30d3849dd37885", "2dbd0bca3fb1a57f441f1867ac0fa7dfc245ae66", "0651d1863f3edac83e574e223f301be9ed564dd4", "45ceba0ca6cf8775637bec3f78eee54efa7cc259", "11a68b5de90fc3f0b56f1acdfe688b91eff1b1ba", "43f89337e570f36686acdda3dcd0b7885a963557", "1edf5957f9134e632544c1044047d739bdf714a1", "454dd673096a64d5ed41e4afe246ff4059a40a1a", "08832863bc3f041222f381c8ae143f8a66449059", "f9756073eb3d1fa924131def7b3182a275781d86", "15e09661e721b36313ca7aa58eac007dcc345091", "0ec4cfad338dfd355b1b09af7e7469b88fd7bd86", "4d3016927d2bb2e6c261f647b7e9a7106a04dc58", "08b11d0812f6cc3c9b954c116d36bd983ead6241", "02fdca5fdba792e4f2c70b8b637abe4824343800", "23fa7b866a1b1fee7bb71c8b5a9235cca7120bbc", "855446cfa6d2c827a454af7ad71fde31e8fe9fcc", "18757558dac21007e01f553b2817528c28734061", "59272d3ee7651f1f881a96096e59e82911950971", "e0f8a0ed923c1ed14ed5376b9aaf0f6d65c110e9", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "5cfc936d12bbd8a0f100687b12b20e406215f30a", "8f1701f01fe27538ec8c009ee1497fa5f4fdc3ac", "1d5ef7e755c689fcd24522e678a072b6f6e0bce8", "0364d9b50978071565a1abc6206daaa0b6178899", "4bc04f87a48cf8fb5fdcb87cbfe140fdf0fc0d74", "b3a296aa9be7592b0fd0be6631b5f812e72b2cca", "75d74f13e9064c01f4939740176961d72fe77a96", "0ac6fffc843ca167a8f65420e59f1be18e2ef5e7", "13f639dc3947d169dde46446d97bce45ff4a2b05", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "023f23c300804754753cb11db51fb7f582556ab7", "287d5bd4a085eac093591ce72c07f06b3c64acec", "06b02aa7b59fe821ad93ddf61436f35f72620691", "4e27f44ade4545931a99eee2dc8011b44f5db4b6", "1e53d00664a459ad7c56a8f085724c3ce9d6853f", "51280ae0374a3034fb98f59f878138dbb9aa8360", "56d4ee858f84f69fdd4c38420d026d3525eefb70", "274537bd5a77326d44bae3f99da8908a7f57c3f3", "8ca81f26081e2cfc9c286ca33fe318e6137aae91", "4f6f88a7ef79f1ffbf168b63e91a63ff02f97521", "125268a25397dd17fb3c7dbd4018114a972e4acb", "6bf453091c527c49e6b990da1f0f29aea04eab99", "31d4ff95152dc8e5a0cbd321dfae92b19bdf2af8", "4dd58b731136fd63c8490440f18204a701be0f56" ], "paperAbstract": "Understanding and managing the propagation of states in operating systems has become an intractable problem due to their sheer size and complexity. Despite modularization efforts, it remains a significant barrier to many contemporary computing goals: process migration, fault isolation and tolerance, live update, software virtualization, and more. Though many previous OS research endeavors have achieved these goals through ad-hoc, tedious methods, we argue that they have missed the underlying reason why these goals are so challenging: state spill.\n State spill occurs when a software entity's state undergoes lasting changes as a result of a transaction from another entity. In order to increase awareness of state spill and its harmful effects, we conduct a thorough study of modern OSes and contribute a classification of design patterns that cause state spill. We present StateSpy, an automated tool that leverages cooperative static and runtime analysis to detect state spill in real software entities. Guided by StateSpy, we demonstrate the presence of state spill in 94% of Android system services. Finally, we analyze the harmful impacts of state spill and suggest alternative designs and strategies to mitigate them.", "pdfUrls": [ "http://www.ruf.rice.edu/~mobile/publications/boos2017eurosys.pdf", "http://kevinaboos.web.rice.edu/docs/statespy_eurosys2017.pdf", "http://www.owlnet.rice.edu/~kevinaboos/docs/StateSpy%20Poster%20EuroSys%202017.pdf", "http://doi.acm.org/10.1145/3064176.3064205" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/239903a72df9a558b82c467f4d0445289901a1de", "sources": [ "DBLP" ], "title": "A Characterization of State Spill in Modern Operating Systems", "venue": "EuroSys", "year": 2017 }, "23a31f5bc28675c5f6dcf230240fa1f81d1803fe": { "authors": [ { "ids": [ "1997092" ], "name": "Yige Hu" }, { "ids": [ "1716807" ], "name": "Youngjin Kwon" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" }, { "ids": [ "1683338" ], "name": "Emmett Witchel" } ], "doi": "10.1145/3102980.3102997", "doiUrl": "https://doi.org/10.1145/3102980.3102997", "entities": [ "Attribute\u2013value pair", "Cognitive dimensions of notations", "Cross-validation (statistics)", "Database", "Embedded database", "Operating system", "SQLite", "Sync (Unix)", "System call" ], "id": "23a31f5bc28675c5f6dcf230240fa1f81d1803fe", "inCitations": [], "journalName": "", "journalPages": "100-105", "journalVolume": "", "outCitations": [ "0c3a060886185b59322bbd1296e08a63d84d2ad8", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "8c0573ba5f6aeb5a6391132ef26d613c045e6e1c", "036b85d48048b47180058034bde97ae633ba8c28", "1bff3dc20cc4bfda6bb58ef7990e9593cfa68f24", "4b2c69db8d9923954aaf1f48adc2990c4e23b37a", "4468cbc8a9ad13ebeaa210424e842f158415ab07", "9a022ebdffc8db4850dfff6592471ef6f0767d97", "4593ae644f04d76f582dedc4cc32d2acd33c9a93", "09cb54cad4fd84c24cb40716e2ed4b5a45463c8c", "2d1addf9bc1c37214d1656cd400f3f344e82ac33", "265d18ced11e2e64d98afa97b0e86965e68101f7", "061944ca83bb46fac511394dca642f7af2d2858a", "25a83ec7cc04a5bf22061b78164c9d09a4de21a5", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "01b1310ba97ec6892104bdf6190de6809e56c7aa", "948c881ab7f1f62e9c940458e74c3e435320df72", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf", "13efb4765441035c4192e0ea0f1fa15fdd5e2d84", "225603198cc415d363db8a8a2bd30b0df3c963b1", "ab68308d5c3bed437f8c1e1f67c3fe2bde9656ab", "2be26e8aa238ac37a80e08303f128d8014bb9f3b" ], "paperAbstract": "Modern applications use multiple storage abstractions such as the file system, key-value stores, and embedded databases such as SQLite. Maintaining consistency of data spread across multiple abstractions is complex and error-prone. Applications are forced to copy data unnecessarily and use long sequences of system calls to update state in a consistent manner. Not only does this create implementation complexity, it also introduces potential performance problems from redundant IO and fsync() calls, which fragment disk writes into small, random IOs. In this paper, we propose that the operating system should provide transactions across multiple storage abstractions; we can build such transactions with low development cost by taking advantage of a well-tested piece of software: the file-system journal. We present the design of our cross-abstraction transactions and some preliminary results, showing such transactions can increase performance by 31% in certain cases.", "pdfUrls": [ "http://www.cs.utexas.edu/~vijay/papers/hotos17-t2fs.pdf", "https://www.cs.utexas.edu/users/witchel/pubs/hu17hotos-txfs-slides.pdf", "http://doi.acm.org/10.1145/3102980.3102997", "https://www.cs.utexas.edu/users/witchel/pubs/hu17hotos-txfs.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/23a31f5bc28675c5f6dcf230240fa1f81d1803fe", "sources": [ "DBLP" ], "title": "From Crash Consistency to Transactions", "venue": "HotOS", "year": 2017 }, "23b19a167ff07e400e0d8d6daaa47122c764a247": { "authors": [ { "ids": [ "1692821" ], "name": "Duru T\u00fcrkoglu" }, { "ids": [ "1804097" ], "name": "Ata Turk" } ], "doi": "10.1109/ICDM.2017.55", "doiUrl": "https://doi.org/10.1109/ICDM.2017.55", "entities": [ "Algorithm", "Analysis of algorithms", "Approximation", "Approximation algorithm", "Degree distribution", "Hybrid algorithm", "Information", "Random graph", "Sampling (signal processing)", "Sparse matrix" ], "id": "23b19a167ff07e400e0d8d6daaa47122c764a247", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "455-464", "journalVolume": "", "outCitations": [ "0ae474036f6684b0b2333e7a7d09815efa8b037f", "4410f0c48f982f960a54500df7bd88e4cab88927", "110b55a017f52abfedca220036ea129d84b7cadc", "516f412a76911a13c9128aac827b52b27b98fad9", "0ad8e89091eed09217e66adc98136126addc2619", "0eaf38cd3d7c7fb456201d59b6d28b084010d358", "10dac777afc83308fd10782bc2bc529469cb9ce9", "141004dee9e799b40bfaf50b4a72618613137250", "cf23f5ae0e5d2b3dc90ed585d07e52eaf93ea9a2", "26e02fc5572fcf1e55496a2846aaa77b9b45b14d", "09eec0775494e0700e28b7faf6078ea17ac47766", "da006d704311c4183d4b528331bac84b9bd312f5", "9ce60b7baabfcdcac48f4d1d5d6812a5ce2fd55f", "1f0612de1f191abadf250b78cd78f884203cca5e", "0f3fd2233b51ec5cbbb46451f1f76996d7493450", "2b5c8b0848dc6ec0593a8ee1a293c80848253519", "0e1809aea71fcf3c19ab4257aa0150f5a7e51350", "6ea8894ef9edf31ed83e925a5650a0a8f0b79b76", "7c6d51677ffff060ac04e0a61ce2cf9cb2437709", "eb4ffe1f1a98902225619a5cfea7883f737c2428", "164541079ca8b7bb353bf24396b3574ac80a6022", "1534ae2aafc0892749917a1780f61a4c103f0cb9", "bb8926e5962246e30ee9a7decc886d570e345113", "8e1a0b89cf01cb5dd82b30bed640a7185b0c18e0", "3746511ef9ba685f34ceec9a3e94795be5836953", "7c2963f7029451f2b8154468fcaacd0f60da1071", "7805de482edfbec3a736bb6b3d1bb5163435752d" ], "paperAbstract": "The number of triangles in a graph is useful to deduce a plethora of important features of the network that the graph is modeling. However, finding the exact value of this number is computationally expensive. Hence, a number of approximation algorithms based on random sampling of edges, or wedges (adjacent edge pairs) have been proposed for estimating this value. We argue that for large sparse graphs with power-law degree distribution, random edge sampling requires sampling large number of edges before providing enough information for accurate estimation, and existing wedge sampling methods lead to biased samplings, which in turn lead to less accurate estimations. In this paper, we propose a hybrid algorithm between edge and wedge sampling that addresses the deficiencies of both approaches. We start with uniform edge sampling and then extend each selected edge to form a wedge that is more informative for estimating the overall triangle count. The core estimate we make is the number of triangles each sampled edge in the first phase participates in. This approach provides accurate approximations with very small sampling ratios, outperforming the state-of-the-art up to 8 times in sample size while providing estimations with 95% confidence.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.55", "https://arxiv.org/pdf/1710.09961v1.pdf", "http://arxiv.org/abs/1710.09961" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/23b19a167ff07e400e0d8d6daaa47122c764a247", "sources": [ "DBLP" ], "title": "Edge-Based Wedge Sampling to Estimate Triangle Counts in Very Large Graphs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "23b57241f4741209e6a667356c8ac924744eeefe": { "authors": [ { "ids": [ "29839844" ], "name": "Marc Anthony Warrior" }, { "ids": [ "3390049" ], "name": "Uri Klarman" }, { "ids": [ "34400673" ], "name": "Marcel Flores" }, { "ids": [ "1718392" ], "name": "Aleksandar Kuzmanovic" } ], "doi": "10.1145/3143361.3143365", "doiUrl": "https://doi.org/10.1145/3143361.3143365", "entities": [ "Change detection and notification", "Client-side", "Data assimilation", "Load balancing (computing)", "Server (computing)", "Subnetwork" ], "id": "23b57241f4741209e6a667356c8ac924744eeefe", "inCitations": [], "journalName": "", "journalPages": "41-54", "journalVolume": "", "outCitations": [ "90e444c5bce21b8a62c3f71b55d38ecc97c35648", "2c55cc95b6014bfa3f34307af141d0ddaa771c64", "7f44523fbd024d095dab1bdb5a860a6864a2f715", "d0914f8d1ad97bdb2f21ad499cf61b854c4a1514", "4c4ecb120fa7fa0ae9713138b897be329f9e6d39", "1643e122653b255d267763b1bc17fbb4346e10ce", "6566d98a370ee01ad78c12ec4471bb5ffbe7a8ab", "2d1d82c89f4d5464d4e20e9694918941c030cb40", "23a7c0ae980b86eaf5bf21cd7d26d3f157434e67", "3525a3688eef9dec048f2e15b7ac495abe15f208", "430cd2b1c08aa86bb4aef152ee2ca764c5342c3e", "143481d55d9f9d25e53f06a6afaf15feb7430c62", "9d4e261bd37ed1b5b06c6cc1d1cfe86244b23844", "03a141f5445c7cfab3a9c6267b12d06c6e67490d", "0f2f3e328608c9409adc820d82bfaf5940d3a8db", "65fd142f37c315cdf892184f8fb21281b88f6269", "a097c201e3410cb943eb6a336f4af1516e6a33e2", "039eab95462b2e8b5f3fc8a8a1056b401fa9f03d", "144d99900f16711dfd5ca94207d4fc5f5be2eb68", "4f2c94a8d689863859ac849ebb83823770cf3d6a", "135cd5dd511b1f004f1b9a34310e3a27621a6dd8", "19e4c40941a3767afd51f200db85c4289f189e24", "56893647902b4ab971fd092ce78687675b6942a7" ], "paperAbstract": "Currently, the attempt to choose the \"best\" content replica server for a client is carried out solely by CDNs. While CDNs have a decent view of load distribution and content placement, they receive little input from the clients themselves. We propose a hybrid solution, subnet assimilation, where the client participates in the server selection process while still leaving the final say to the CDN. Subnet assimilation allows clients to declare their own \"network location,\" different from the actual one, which in turn drives a CDN towards making better decisions. To demonstrate, we introduce Drongo, a client-side system, readily deployable on existing clients without any changes to the CDNs, that employs subnet assimilation to dramatically improve replica server selection. We implemented and extensively evaluated Drongo on a set of 429 clients spread across 177 countries and 6 major CDNs. We show that Drongo affects 69.93% of all clients, prompting better CDN replica choices which reduce the latency of affected requests by up to an order of magnitude and by 24.89% on average across six major providers, with Google's performance improving by 50% in the median case. Our results indicate that client participation holds great opportunities for the advancement of CDN performance.", "pdfUrls": [ "http://networks.cs.northwestern.edu/publications/drongo/drongo_conext17.pdf", "http://doi.acm.org/10.1145/3143361.3143365" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/23b57241f4741209e6a667356c8ac924744eeefe", "sources": [ "DBLP" ], "title": "Drongo: Speeding Up CDNs with Subnet Assimilation from the Client", "venue": "CoNEXT", "year": 2017 }, "23bde3dadb4effccb3b539c5ce46c295a11615bb": { "authors": [ { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "7494341" ], "name": "Wenting Zheng" }, { "ids": [ "34825255" ], "name": "Xiaohe Hu" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Autostereogram", "Consistency model", "Correctness (computer science)", "Distributed firewall", "Simulation", "Software-defined networking", "Strong consistency", "Structured text", "Turned A" ], "id": "23bde3dadb4effccb3b539c5ce46c295a11615bb", "inCitations": [ "49a4227a15be8845dc1698483d17d77765578e2f", "b3da33936070038c2ec26fe9fa4489060b32b001" ], "journalName": "", "journalPages": "329-345", "journalVolume": "", "outCitations": [ "17d122f143726288da193a767fd0a7634010f0ff", "2730c3ec2908d852e73a62f93302e0e8bfe5510e", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "0e93ce47cc5e1be1e91145b93fe98c1e242670f5", "4534c15b4760cb29a0ce74fcd43297fe83f2f277", "10baa5b029b3dcf3e67eab0234a0ea64294559a0", "1dc62b09b964b8faeecbc03270f7d7a5f2fee733", "1164ec0b8d2bd8e95b9fc07e9669ff9d4d379c64", "0a664d20663d6b201290adf7431fd57252c54646", "5acc6e0d4011d81419b81d7cd383bed48c4cb22c", "18fa3a12532504d678e546d58ffdc5365e727ebd", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "c079867c1782b958b3f1c6ea63a38c4256ce8fdb", "3f0fe61d13447ebdc7222e568064532252fbb532", "16b0a02e2db3e3beb3dd0ce83e610549f271c9f1", "9002e502571e65ad59dfdd59717538cd4ad74868", "2fb179c0b0ad6cf37f6272db05ccfad708a82f2d", "b46e192c84945528f6029138fdb26a9629f2dc6c", "3d3abf7b60d6e762d635c3b997d48ddb1bc76eb6" ], "paperAbstract": "We consider the following question: what consistency model is appropriate for coordinating the actions of a replicated set of SDN controllers? We first argue that the conventional requirement of strong consistency, typically achieved through the use of Paxos or other consensus algorithms, is conceptually unnecessary to handle unplanned network updates. We present an alternate approach, based on the weaker notion of eventual correctness, and describe the design of a simple coordination layer (SCL) that can seamlessly turn a set of single-image SDN controllers (that obey certain properties) into a distributed SDN system that achieves this goal (whereas traditional consensus mechanisms do not). We then show through analysis and simulation that our approach provides faster responses to network events. While our primary focus is on handling unplanned network updates, our coordination layer also handles policy updates and other situations where consistency is warranted. Thus, contrary to the prevailing wisdom, we argue that distributed SDN control planes need only be slightly more complicated than single-image controllers.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-panda-aurojit-scl.pdf", "http://www.cs.duke.edu/courses/spring17/compsci590.7/Papers/scl-preprint.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-panda-aurojit-scl.pdf", "http://people.eecs.berkeley.edu/~apanda/assets/papers/scl.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/panda-aurojit-scl" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/23bd/e3dadb4effccb3b539c5ce46c295a11615bb.pdf", "s2Url": "https://semanticscholar.org/paper/23bde3dadb4effccb3b539c5ce46c295a11615bb", "sources": [ "DBLP" ], "title": "SCL: Simplifying Distributed SDN Control Planes", "venue": "NSDI", "year": 2017 }, "23e8540631a262b72ac0f3f659136c2d480e9e39": { "authors": [ { "ids": [ "32634815" ], "name": "Heinz Ulrich Boehmer Fiehn" }, { "ids": [ "37462681" ], "name": "Mark A. Hinkle" }, { "ids": [ "39997022" ], "name": "Richard Wallace Kenyon" }, { "ids": [ "5679382" ], "name": "Alan Mickelson" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.15", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.15", "entities": [ "Algorithm", "Centralisation", "Computer data storage", "Experiment", "Management system", "Private network", "Systems management", "Testbed", "Tree structure" ], "id": "23e8540631a262b72ac0f3f659136c2d480e9e39", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "115-122", "journalVolume": "", "outCitations": [ "aa313e7de93b66e0fac3f0a06c6e278f150d00c1", "28158752f34b12eb33599d754efeb958d1c8b0ef", "67bab3034b3070ee3d7d894ac97f3c385b5922d4", "43075eef75aed330abbbaca218abe6e16a7e34ac", "1699b6ee8e7e4d9087cef97fcc08fe12f4f7d497", "ade08deec4cb2ef4e78f5607ef283232cc9e71cb", "7e9e83b38bd9c870b96f451fbb8502cbb34f44f3", "68a166d968e988777729a7f2bed35f6bfe458e86", "cc3832befc8d8bf32c6ef49c6344f1a79803146f", "255e1024a66d90a58952d54581b4aa9713ecaa77", "0a24569b42de43de437e559a8f3b9fdc761775c2", "2bf273e9eab8872bef108fb20352f341426c8e28", "358179d3cc45042af6ed24e785fb9b6f1419eea7", "8df39cb8eb8951d434f5b33628dd52313302ccab", "8f30525f33ae5bea7e7a88c24a3f60f8962a4232", "f775f7b244ec216ad29dfc8c82717ba5a8fa90a1", "5125b385447ad58b19960a2652b555373f640331", "22bf1ff0ac4ea601c50b0b457638c81488cfc7e7" ], "paperAbstract": "Centralized electrical storage system management in islanded micro-grids is treated both theoretically and experimentally. An overview of the types of distributed islands in Wi-Fi Long Distance networks is the starting point, followed by examples of WiLDNets and associated reliability problems. Presentation of an archetypical tree structure communication system serves to introduce a centralized management system that may be used to improve reliability of multi-community private networks. An existing Wi-Fi long distance testbed with distributed storage serves as an islanded micro-grid for experimental study. Centralized management algorithms are tested. The results are generalized to the case of large meshed communication system.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/23e8540631a262b72ac0f3f659136c2d480e9e39", "sources": [ "DBLP" ], "title": "Management of Distributed Electrical Storage in Wide Area Communication Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "240638416ce4afd30208bbbb616f2992800b2657": { "authors": [ { "ids": [ "2822949" ], "name": "Takashi Shimokawabe" }, { "ids": [ "39045319" ], "name": "Toshio Endo" }, { "ids": [ "6765406" ], "name": "Naoyuki Onodera" }, { "ids": [ "2584629" ], "name": "Takayuki Aoki" } ], "doi": "10.1109/CLUSTER.2017.97", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.97", "entities": [ "CUDA", "Computation", "Graphics processing unit", "Locality of reference", "Paging", "Scalability", "Stencil (numerical analysis)", "Supercomputer", "Tsubame (supercomputer)", "Wrapper library" ], "id": "240638416ce4afd30208bbbb616f2992800b2657", "inCitations": [ "28b80085f558e27659a0c0c0e51d86e3d77184dd" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "525-529", "journalVolume": "", "outCitations": [ "f8fb0bfa18199ec8f090b473c51f53b3de60a333", "7b69e7c3dd0ede0eacb2c42c82559367c8f194d4", "9ca6ffa755b02a845ee5bdf6a172b28672c3bd4f", "fe7bd2137955540edc81e84c5051ae32daf1703d", "3c3d4b4a7cfbab2361bbed3ea1af55d86767baae", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "5a979d604f49c351d8725cadf792801b557f5e31", "0692f43523ebd6394a4ee76e3224f3c01cc2c4eb", "2dc1dda85ff17b343d145c674ea67ebb11ed4704", "0d4074d95f5073df87594d9991cd1a2d431f9aea", "6847509e2c5b4d8e6f193264e5b8323df23c8a0e", "def34f422d6930bd23d5c58de78be98804e44e97", "790e3d062c27af02f461ded6bcbfa91203a69e5f", "408e61c117816833cdd807b5d8c9258f1c2022ab", "f8afcda83fc23a7f75a1b0269fb458ee0182b621", "b8932adc9d9a80de33f891c3e94277b01d100c97", "f4dff66ba8f2338d118f379f2eff1410feb57ce6" ], "paperAbstract": "Stencil-based applications such as CFD have succeeded in obtaining high performance on GPU supercomputers. The problem sizes of these applications are limited by the GPU device memory capacity, which is typically smaller than the host memory. On GPU supercomputers, a locality improvement technique using temporal blocking method with memory swapping between host and device enables large computation beyond the device memory capacity. However, because the loop management of temporal blocking with data movement across these memories increase programming difficulty, the applying this methodology to the real stencil applications demands substantially higher programming cost. Our high-productivity stencil framework automatically applies temporal blocking to boundary exchange required for stencil computation and supports automatic memory swapping provided by a MPI/CUDA wrapper library. The framework-based application for the airflow in an urban city maintains 80% performance even with the twice larger than the GPU memory capacity and have demonstrated good weak scalability on the TSUBAME 2.5 supercomputer.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.97" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/240638416ce4afd30208bbbb616f2992800b2657", "sources": [ "DBLP" ], "title": "A Stencil Framework to Realize Large-Scale Computations Beyond Device Memory Capacity on GPU Supercomputers", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "24184cacf24f42ec8e12831d20a295c4141b9526": { "authors": [ { "ids": [ "1998865" ], "name": "David B\u00f6hme" }, { "ids": [ "2309393" ], "name": "D. A. Beckingsale" }, { "ids": [ "1772965" ], "name": "Martin Schulz" } ], "doi": "10.1109/CLUSTER.2017.34", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.34", "entities": [ "Big data", "Data aggregation", "Data model", "Hard coding", "Profiling (computer programming)", "Requirement", "Run time (program lifecycle phase)" ], "id": "24184cacf24f42ec8e12831d20a295c4141b9526", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "419-428", "journalVolume": "", "outCitations": [ "81bd1bd083d8a116a5413c545335c77e5f9e68be", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "085a92d20bf2bf091a0729b610e5a4d6df4f0047", "55af531059610139bdba4f2ac4b1e63062712d6d", "e3d80fb27944a0dbc493886faf18472f83aaa227", "e65012425ff445a11728cc4922cfc09a4dfd6fd1", "51ec4530e2b6d73f410568952db220c05865e073", "afc22073b0c2fc62e742dd1a6e7fba6d54fb5e1f", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "869f19dca06cd831ebee02b29cf6319fb25f197a", "7c8f5cfe90578324da0fc7815075e610fc1161ff", "6af3228141a9891e57f879c6ea2b48787e56e17f", "972b71e221cbe66f3daa33b00a678d3cfd42b5af", "3fcb48efe8489061deeea37ceafdabe115ca4789", "2190f5b82326f4d61312aaa6e6226f1ae618fb0d" ], "paperAbstract": "Almost all performance analysis tools in the HPC space perform some form of aggregation to compute summary information of a series of performance measurements, from summations to more complex operations like histograms. Aggregation not only reduces data volumes and consequently storage space requirements and overheads, but is also crucial to extract insights from recorded measurement data. In current tools, however, most aspects that control the aggregation, such as the data dimensions to be reduced, are hard-coded in the tool for a set of particular use cases identified by the tool developer and cannot be extended or modified by the user. This limits their flexibility and often results in users having to learn and use multiple tools with different aggregation options for their performance analysis needs.We present a novel approach for performance data aggregation based on a flexible key:value data model with user-defined attributes, where users can define custom aggregation schemes in a simple description language. This not only gives users the control to deploy the particular data aggregation they need, but also opens the door for aggregations along application-specific data dimensions that cannot be achieved with traditional profiling tools. We show how our approach can be applied for performance profiling at runtime, cross-process data aggregation, and interactive data analysis and demonstrate its functionality with several case studies driven by real world codes.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/24184cacf24f42ec8e12831d20a295c4141b9526", "sources": [ "DBLP" ], "title": "Flexible Data Aggregation for Performance Profiling", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "247943eb67439626745b4385326d389c9855a37b": { "authors": [ { "ids": [ "1697444" ], "name": "Osbert Bastani" }, { "ids": [ "37529085" ], "name": "Rahul Sharma" }, { "ids": [ "4689402" ], "name": "Alexander Aiken" }, { "ids": [ "40085065" ], "name": "Percy Liang" } ], "doi": "10.1145/3062341.3062349", "doiUrl": "https://doi.org/10.1145/3062341.3062349", "entities": [ "Algorithm", "Baseline (configuration management)", "Context-free grammar", "Context-free language", "Glade Interface Designer", "Grammar induction" ], "id": "247943eb67439626745b4385326d389c9855a37b", "inCitations": [ "2eae8360b02580d7e459903d0c4ae634b087868e", "2cf43b8bc82f063e257bf21c92e5b038eacd34d3", "30023acba3ac198a7d260228dc51fda8414b8860", "64a8e117187cdf2d69711ed8e0d761303059b720", "2b4aaab57d3fd5b3df9c2d08918ebb0f3e834d40", "4a51228ef8b51f5917bbea1119754f062d9f0f00", "3656e8a19a363ab6f9d573a474b8d8280e0d31a6", "86b77bd922319f77f25f57d8e7596873f9ac6f66", "9f180e099c3c9afe5944d39d142129f965a318ed", "8ca7f74cfe80002ea33c461cbe17a20013cd9b65", "ba018256925fe64f8f26c71963197cde93af2c13", "d4d58d405a07b9aac669d410808c96ac7a038a4c", "0b549912e5f111c7c60eadda634ef4484427b684", "5141d62716dea6a7bb8f7b8d7a3dcf1744af4e28", "67ff9c425b17b78eaf7e3be970833aef41262cc8" ], "journalName": "", "journalPages": "95-110", "journalVolume": "", "outCitations": [ "0ab393affe9d674ef790be14fdfade368f3e5989", "129570333e7631456c70354113a43fe6eb193329", "04024fe315cfc57170a4d5f7711a7d8b2273bb73", "44f125b75f0abe0ba3aae1b4666d545c65fe810f", "3ff96aff948c8f07ae5b2ce0a64e04d61a85291a", "aaab8bc4028897e29789f26ed9afdd9da0beac33", "d4411711d9efe36e4c71cf5da83d249e7119479f", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "193d1415469cde989eafb01a1d4e8ba98dc03e0f", "9ab81764638d8c425e206fab2249eee8b242dd23", "263c160793ce938b462ba074fa263ecf0cda87c8", "3ba82ae0647dc5f8c8173307f22df68d61558dc5", "088a382a5af6a44ccb69c2f49517bf8d99ab6759", "426a2eb44a8f947edf9a92288e80fd0d6b515de2", "3fb4c74305c237147567a687857839ba0be2b4ab", "7149d00b10c8865a455d151595dd82a4880e3303", "1f7e5e582663868ed2f6763f98066ca278177a61", "a426c911b4fec0903c328ce3cb49ecbc1d2c51e4", "00524d47e1f2f62ac514c1f8eb47accd93b7d350", "bfbc6f830d2dc67d0840f9363f6b6076b842f120", "27145fe45450babe306513efb97ae0ec8590c246", "20cc59e8879305cbe18409c77464eff272e1cf55", "208e7934d900055b43b8b60e4a807ac00674ec4a", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "a1843173909eaa253f5a7f147752c8cd4b0e5d71", "5f3042cf6aafb2870d1dc772d1121b4b9d3975fe", "274d1483c213e93d3b745b68a2cd7530db6d1c4a", "0b53fab8dea434e1046836159e184d9565ffd401", "3ff91d7c8767b39557844849e3ee411421828572", "06804b07f0fac3c9e1bb55fcc261791f138138a0", "0bf69a49c2baed67fa9a044daa24b9e199e73093", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "12f37f340f4762c9edc9ee36d2b910be42889983", "03f628dbb91c226011fa11a964025177da7824ad", "341d33498388711a5303c5f51433b3d5739a21d2", "76dd3202f16fc700d31b5bffce36a61d99ff54d6", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "93048dc9441985260fdebaf3a9d2654696e98f87", "20c92e048be4493a0f355f2d4d83f94c831c7ecf", "0cf853d7dadba4abe32ae89613c2d58934c0c1fd", "0249374f6ccda4e394cb47485626af9f2bde88ff", "51d565c308d5fdd683c9e945cd27de3ed2ae9db2", "34ad2a3bd019d0b8250bba2f80d031bcc61477ff", "01c70d676184d3354ec2775b71923ad61354e7aa", "de926c8d53aa2c80946c1bb89110dc7110c66db0", "de21d6722e4858a2ac594729e1dfd2a3580879fe", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "11443efe465ad544f478524da6c66c085b16e28b", "6ea63d09993b9a268689790ea8d25bc36345497e", "19f9cccd47ac99d167eebfec5937e95138d2aed8", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "4dd36e37d417825d4c850fb3789224e63269daac", "8eeff5e62ad0dd4073eb80377db29e7cd7b8a24f" ], "paperAbstract": "We present an algorithm for synthesizing a context-free grammar encoding the language of valid program inputs from a set of input examples and blackbox access to the program. Our algorithm addresses shortcomings of existing grammar inference algorithms, which both severely overgeneralize and are prohibitively slow. Our implementation, GLADE, leverages the grammar synthesized by our algorithm to fuzz test programs with structured inputs. We show that GLADE substantially increases the incremental coverage on valid inputs compared to two baseline fuzzers.", "pdfUrls": [ "http://theory.stanford.edu/~aiken/publications/papers/pldi17_extended.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/05/paper_extended.pdf", "http://stanford.edu/~obastani/docs/pldi17_extended.pdf", "https://arxiv.org/pdf/1608.01723v2.pdf", "http://theory.stanford.edu/~aiken/publications/new/InputGrammars.pdf", "https://obastani.github.io/docs/pldi17-presentation.pdf", "http://doi.acm.org/10.1145/3062341.3062349", "https://arxiv.org/pdf/1608.01723v1.pdf", "http://theory.stanford.edu/~aiken/publications/papers/pldi17.pdf", "https://arxiv.org/pdf/1608.01723.pdf", "http://arxiv.org/abs/1608.01723", "http://stanford.edu/~obastani/docs/pldi17.pdf", "http://arxiv.org/pdf/1608.01723v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/247943eb67439626745b4385326d389c9855a37b", "sources": [ "DBLP" ], "title": "Synthesizing program input grammars", "venue": "PLDI", "year": 2017 }, "2497ab5ef44b3628a9765165d48a93d3e22a2e0c": { "authors": [ { "ids": [ "1910406" ], "name": "Philipp Leitner" }, { "ids": [ "1755660" ], "name": "Cor-Paul Bezemer" } ], "doi": "10.1145/3030207.3030213", "doiUrl": "https://doi.org/10.1145/3030207.3030213", "entities": [ "Best practice", "Continuous integration", "Exploratory testing", "Functional testing", "Java", "Open-source software", "Operating system", "Software performance testing", "Test suite" ], "id": "2497ab5ef44b3628a9765165d48a93d3e22a2e0c", "inCitations": [ "b26c8b09bf05696e96bbb9578513730f3c63ec50", "260fd481ee3896bc89e1b8e26d3cca2973bbbfd9", "48b311805241c9055aac08e545a592a05639ee07" ], "journalName": "PeerJ PrePrints", "journalPages": "e2496", "journalVolume": "4", "outCitations": [ "2f40d0ee2ca5f6845f553b7b6e385a03ace36607", "39129c0a15b74715a261c6c1342195d4084cf818", "00704dbeaffde1d6f4c695adfbe5a6cf282bd789", "88b7aecdc09aebe5099e490ff43ae40336371dcb", "2b4916a48ad3e1f5559700829297a244c54f4e9b", "91bc68f8dc732d6df48cdb3af7b781b3f90a10eb", "249b2421c39bfd617162e3b34f0ecf13b0960678", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "db988693f8cfea35fe7892bb2ba9770048531684", "d7b2d876862ad356e979b31ce9dfde4c7b340026", "3d2af27adb6fe7751b91248a5b4da60e032bf4f8", "905e3daea4c2fb235c6cf840c48bcd94ef6b278f", "5d6542640ded622f064af5a22172710a95f53db5", "9f0a9c9afa7556eef685f981f6c31d758e85b605", "37ea81df93db4bfb918d67d25d332a8b202a30e3", "5082fd673180d7c27852474e6c96852e0bda3951", "4fa8bacddbbb5f0bdd54ed61320ed883ec15bbe8", "5520674bf1dc5086b40e6f6f29f74b6c7f8fda62", "d53abdc1a13158c7c1133c123d86288b2c33ddc1", "032f1a16ad4cd815ca5cbf3dbfca2714007a1a2e", "18f3e20aa806cd8a94254b8affb3bec4ee4fbd8e", "00a9ba0063d34ec56792849a67ef57b4601becbb", "44f8119eca5a97527fe637c3cdf14178bd4e1fe4", "50051ae1f6341edfae95a62eb74072d472cdfe73" ], "paperAbstract": "The usage of open source (OS) software is wide-spread across many industries. While the functional quality of OS projects is considered to be similar to closed-source software, much is unknown about the quality in terms of performance. One challenge for OS developers is that, unlike for functional testing, there is a lack of accepted best practices for performance testing. To reveal the state of practice of performance testing in OS projects, we conduct an exploratory study on 111 Java-based OS projects from GitHub. We study the performance tests of these projects from five perspectives: (1) developers, (2) size, (3) test organization, (4) types of performance tests and (5) used tooling. We show that writing performance tests is not a popular task in OS projects: performance tests form only a small portion of the test suite, are rarely updated, and are usually maintained by a small group of core project developers. Further, even though many projects are aware that they need performance tests, developers appear to struggle implementing them. We argue that future performance testing frameworks should provider better support for low-friction testing, for instance via non-parameterized methods or performance test generation, as well as focus on a tight integration with standard continuous integration tooling.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030213", "https://research.spec.org/icpe_proceedings/2017/proceedings/p373.pdf", "http://sail.cs.queensu.ca/Downloads/icpe2017_AnExploratoryStudyOfTheStateOfPracticeOfPerformance.pdf", "https://doi.org/10.7287/peerj.preprints.2496v2" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2497ab5ef44b3628a9765165d48a93d3e22a2e0c", "sources": [ "DBLP" ], "title": "An Exploratory Study of the State of Practice of Performance Testing in Java-Based Open Source Projects", "venue": "ICPE", "year": 2016 }, "24b0b1ddc1a402e89804f612b66e1bbac8e3bf58": { "authors": [ { "ids": [ "26904843" ], "name": "Yihua Ethan Guo" }, { "ids": [ "2440972" ], "name": "Ashkan Nikravesh" }, { "ids": [ "3895596" ], "name": "Zhuoqing Morley Mao" }, { "ids": [ "39845983" ], "name": "Feng Qian" }, { "ids": [ "35049639" ], "name": "Subhabrata Sen" } ], "doi": "10.1145/3117811.3117829", "doiUrl": "https://doi.org/10.1145/3117811.3117829", "entities": [ "Decoupling (electronics)", "Download", "Mobile device", "Multipath propagation", "Scheduling (computing)", "Smartphone", "Web page" ], "id": "24b0b1ddc1a402e89804f612b66e1bbac8e3bf58", "inCitations": [ "53dd91c3ce45b3d4e58142666bb3896a6bb044e5", "3240bf446fb2fbdb93741ab7654614dd95645bd5", "0975013da667711a33121840411b02e1cbd1420f" ], "journalName": "", "journalPages": "141-153", "journalVolume": "", "outCitations": [ "013547c636d1526d3825a6077bdcae6831330e3d", "603b7e55d93b53b63f0a2da3fc2330007fe10ae2", "d76c543db9e6bcacb8b915cd672da08641d0e069", "437120941fb462977c6ce74ddb661054915df792", "004e97b9a3c934baa6451b3dcc6c35cb90b9f08f", "41758f25a78f4223fefb7ac00cc70a9e6ba949af", "295dfb4f77d6be0137abb03d060cd70a2c13334f", "b293ec57821a27bfb96d15cd11d8141e04610153", "834a75f99c355d1376d0eaa8c5f91f4c584a7eed", "b1d64bfc6c5ce1ba4c972a00bc4dd91a1a8571d2", "1c856842406d11d4ed15384afa0630c5a20be1fd", "9adb5a88b19e2edfef58472f8249ec995da89555", "58668f7720430470e285a9131fc0e054ddbcafd9", "4e04104bd54a4caab8380f4b74d8c7b32ea913da", "5463226068593d2b7e9e7f0fb52f6421c362169b", "0b701ba28f8c3c9aa3ac351cf60ab2c5d9bf98a7", "44e1d72c319059a680e47d6cd4fdfa8f99fbb5f6", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "1f79775b58072a2ab484aad798aec0c9c7fa8605", "5622df2d119efc72b4d4b80672e82a389e948887", "d61b9b499c7e371edf7f8bb45fe7934e7d60ba2d", "392fa17ebea0dbfc1396e186ce6eab40d00cc43b", "d0fafaaf6cf4b85225a88f52e1e7e4c142039993", "2302e796c9b16c0fa94e89ee8b4f34f9d4812b94", "32a7818ee01bea31068a0076060c75e88283a16a", "42a0772bb0d31806059539411a88710037a0fdcb", "036277d492dd5777e87e5b33ffd809e5c617a37a", "0213d528ee33f348ddc3cb45cfdd9f64c8683d07", "ecce9ea330498f5d49506854e87667a4deb546c2", "3e364e301f026a197fde0608481dfa2c09e85b7b", "6b9b5527cb91a6422c27a42728ac8ded7b757c7a" ], "paperAbstract": "Simultaneously using multiple network paths (e.g., WiFi and cellular) is an attractive feature on mobile devices. A key component in a multipath system such as MPTCP is the scheduler, which determines how to distribute the traffic over multiple paths. In this paper, we propose DEMS, a new multipath scheduler aiming at reducing the data chunk download time. DEMS consists of three key design decisions: (1) being aware of the chunk boundary and strategically decoupling the paths for chunk delivery, (2) ensuring simultaneous subflow completion at the receiver side, and (3) allowing a path to trade a small amount of redundant data for performance. We have implemented DEMS on smartphones and evaluated it over both emulated and real cellular/WiFi networks. DEMS is robust to diverse network conditions and brings significant performance boost compared to the default MPTCP scheduler (e.g., median download time reduction of 33%--48% for fetching files and median loading time reduction of 6%--43% for fetching web pages), and even more benefits compared to other state-of-the-art schedulers.", "pdfUrls": [ "https://www.cs.indiana.edu/~fengqian/paper/dems_mobicom17.pdf", "http://doi.acm.org/10.1145/3117811.3117829" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/24b0b1ddc1a402e89804f612b66e1bbac8e3bf58", "sources": [ "DBLP" ], "title": "Accelerating Multipath Transport Through Balanced Subflow Completion", "venue": "MobiCom", "year": 2017 }, "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef": { "authors": [ { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "2781428" ], "name": "Samira Manabi Khan" }, { "ids": [ "35164008" ], "name": "Lavanya Subramanian" }, { "ids": [ "33801185" ], "name": "Saugata Ghose" }, { "ids": [ "1999972" ], "name": "Rachata Ausavarungnirun" }, { "ids": [ "3257164" ], "name": "Gennady Pekhimenko" }, { "ids": [ "1720084" ], "name": "Vivek Seshadri" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3084464", "doiUrl": "https://doi.org/10.1145/3084464", "entities": [ "Code word", "DIMM", "Dynamic random-access memory", "Error detection and correction", "Failure rate", "Forward error correction", "Peripheral" ], "id": "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "inCitations": [ "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "0f41b9c0900b1c17b63d3d59bd4c334f7cf736af", "00cc482570d739e7b733f45b6f8f1836b24056bd", "0b393cab00401cb971cf71970e00c2767f881f75", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "2aa997522d212ab74163b986be211ffc7f3e9e34", "042855085a52934e5599e02555071bb222f6a000", "447f492235719d7c2b061b95d818f928d6cbdac5", "60aa9510638d4d9739ebfc3a0042187988482346", "b06b556169d8b55d6d8058164dd599c67c50c430", "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "a6ca37aeeef5911e4f36b904088479bea999cc81", "983e87929eeb3f77c2ddb02d17d6efe978c80667" ], "journalName": "POMACS", "journalPages": "26:1-26:36", "journalVolume": "1", "outCitations": [ "468035263afa59095614f26a62e0217da4a1aeed", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "3c761857787b3efe5e65b25bd94c737bf2cd7632", "0e77b8d3d0a7c6c4b5e09b2a90eb4c1e4279bdce", "31bc6abe2a9b33dfd30e6bca4b8cedfcca6e530c", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "8ebf014236c8791c82981b9434425713214feb97", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "6d48af0c03d647a020f90af31700d67d47b9b2cb", "012d556d67acedc6898930b4c93f54b87aabf5ee", "1c32ad0a42109fab826eb3054df7cfc33b424125", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "42f7ade4ab1ee6941da178b53712bb7ef7822815", "0eacd1b47786f740b723d906d46e160f143c0378", "15e63d368aa803c73b8f5d1315a51ebd7ceea3c3", "6aba2b1785bd26eb6d85820a734ddaa262d20571", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "7815c4243d581d0f96d0dac2c6e90e01d1ce94a3", "8d71fb5efe95801b31d65366ff1ce8c01525e493", "1be5ca1c9a94bd29c64d358e677b699e16c58f55", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "a5bd15d203c6aa740aba16776b422db010e66b58", "870403ceaadbe9579b1841baa39c1ac2d03fef3e", "85398d5f19157c91bf00da3d36210e72d57887e4", "809dc59799e36c9f322a8812e086bde0c00e8e7b", "60aa9510638d4d9739ebfc3a0042187988482346", "7bc046671369ba23568ff03bbee6ba04a91bd092", "9dbbe81cb508916722a129776643c315ec043347", "2d61939e21a40daed297cb3d1855b32ed0eaef67", "c8b6e390eb9cf0a3452decfff8461359315416cd", "3f82aa1373e823ec622b3021fff9df4a82230267", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "f101cbc12bbdd127dad401cc5d64f63a6f7a6a37", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "071564baef078867847fc54a3a0b50dd22d29d62", "05dfee99e74e9662d36d867a083680fdb0c45cba", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4", "76e29695c7c119d869d3b87886a611261a98e4a4", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "318c86751f018b5d7415dafc58e20c0ce06c68b6", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "1b938edfde3b3b04c13599c2db87c72b7962f383", "0fca03c476d869660dec04fb83f54161767a4ba7", "447f492235719d7c2b061b95d818f928d6cbdac5", "6902867509928c0e5c19aff3e62e1def3a19d581", "68073f621072d793e95b9562bf9a9245415d5a96", "5906fc1d9cc56d31b9373cdb868cb90aa613d90d", "14cd0daeed8c12db40be03dfd56e446fcc10f32a", "a0280c69589951383ea0dbcd06f11bc4c595eff1", "5d42d10103c36b3bade0fde917332918ad6108a7", "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "9341125876271d46cc25f86dac93f25acb343e8d", "1f80d8bdf5a0a1787a36ccfc4929f71d14a94e57", "0653e2ed9f683868cb4539eb8718551242834f6b", "8b0b2f2605e533c40cac32e1a3a989f7aa759841", "2fa80c8342dcb349f1d91c102a76400c86dfb042", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "3c89345bb88a440096f7a057c28857cc4baf3695", "36897d1d2661777913d492390c4ad9d004276308", "179f80848143cf109fa6aebae6c3844da03b062c", "03d55467b20e662fbaa8416e853f57c93834a9fb", "2394c6644efa856f0da160a0f0031d74cd3b5000", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "37b5850e3e75a3462f3991491ca26674925f233b", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "472392b93150be7bb0132511d71d686770c2c79b", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "42d96591e5583c2001c100d979a8f180e1a4e6b1", "710b3d324b07197a705683af18fc417ef712d042", "b2af71e1f2e72b27aa6914288bf4357bf913694f", "54b92179ede08158e2cf605f5e9f264ca06c01ff", "588fd53a6cbdb2f2d7f2bd676944d7b5fdfafcb9", "37e49c57dd4d0849380d177222db53e52ff21347" ], "paperAbstract": "Variation has been shown to exist across the cells within a modern DRAM chip. Prior work has studied and exploited several forms of variation, such as manufacturing-process- or temperature-induced variation. We empirically demonstrate a new form of variation that exists within a real DRAM chip, induced by the design and placement of different components in the DRAM chip: different regions in DRAM, based on their relative distances from the peripheral structures, require different minimum access latencies for reliable operation. In particular, we show that in most real DRAM chips, cells closer to the peripheral structures can be accessed much faster than cells that are farther. We call this phenomenon design-induced variation in DRAM. Our goals are to i) understand design-induced variation that exists in real, state-of-the-art DRAM chips, ii) exploit it to develop low-cost mechanisms that can dynamically find and use the lowest latency at which to operate a DRAM chip reliably, and, thus, iii) improve overall system performance while ensuring reliable system operation.\n To this end, we first experimentally demonstrate and analyze designed-induced variation in modern DRAM devices by testing and characterizing 96 DIMMs (768 DRAM chips). Our characterization identifies DRAM regions that are vulnerable to errors, if operated at lower latency, and finds consistency in their locations across a given DRAM chip generation, due to design-induced variation. Based on our extensive experimental analysis, we develop two mechanisms that reliably reduce DRAM latency. First, DIVA Profiling uses runtime profiling to dynamically identify the lowest DRAM latency that does not introduce failures. DIVA Profiling exploits design-induced variation and periodically profiles only the vulnerable regions to determine the lowest DRAM latency at low cost. It is the first mechanism to dynamically determine the lowest latency that can be used to operate DRAM reliably. DIVA Profiling reduces the latency of read/write requests by 35.1%/57.8%, respectively, at 55°C. Our second mechanism, DIVA Shuffling, shuffles data such that values stored in vulnerable regions are mapped to multiple error-correcting code (ECC) codewords. As a result, DIVA Shuffling can correct 26% more multi-bit errors than conventional ECC. Combined together, our two mechanisms reduce read/write latency by 40.0%/60.5%, which translates to an overall system performance improvement of 14.7%/13.7%/13.8% (in 2-/4-/8-core systems) across a variety of workloads, while ensuring reliable operation.", "pdfUrls": [ "https://people.inf.ethz.ch/omutlu/pub/DIVA-low-latency-DRAM_sigmetrics17-abstract.pdf", "http://doi.acm.org/10.1145/3084464", "http://doi.acm.org/10.1145/3078505.3078533", "https://users.ece.cmu.edu/~saugatag/papers/17sigmetrics_diva.pdf", "http://www.pdl.cmu.edu/PDL-FTP/NVM/17sigmetrics_diva.pdf", "http://www.cs.toronto.edu/~pekhimenko/Papers/DIVA.pdf", "http://www.cs.virginia.edu/~smk9u/DIVA_sigmetrics17-paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "sources": [ "DBLP" ], "title": "Design-Induced Latency Variation in Modern DRAM Chips: Characterization, Analysis, and Latency Reduction Mechanisms", "venue": "SIGMETRICS", "year": 2017 }, "24c6c779c9ee015efaff60487090c543493ba724": { "authors": [ { "ids": [ "3148716" ], "name": "Francisco J. Clemente-Castell\u00f3" }, { "ids": [ "2555685" ], "name": "Bogdan Nicolae" }, { "ids": [ "2685262" ], "name": "M. Mustafa Rafique" }, { "ids": [ "1714364" ], "name": "Rafael Mayo" }, { "ids": [ "39866505" ], "name": "Juan Carlos Fern\u00e1ndez" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Cloud computing", "Iteration", "Locality of reference", "MapReduce", "On-premises software", "Scalability", "Scheduling (computing)" ], "id": "24c6c779c9ee015efaff60487090c543493ba724", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "181-185", "journalVolume": "", "outCitations": [ "121df43546bdfa846751c75ca667013f4904e7a1", "5298c0f5363da64c7da8c877372e358ada4910db", "6ef3a7cb2fff474976b0974d0753f41a26c5f153", "bb9929fae87d5bb902e20a5bebeb6ffb36ba9ef1", "49da82b1fdebe56b4dd99fe3d1e4bd42e79192c4", "045236eb1d6d537bf3aa0bebdf3555e347f1947f", "093963ca4685956335954d930e7c993222c7a24f", "7a8fca200c202a5dca5b8deb821d683871037bbe", "72ec19ead4007e130786bde139c56c980f5466c5", "269f0fc193b17bab098a772eddd2650a42926c31", "51e6227cf57725fc7e36ba3811f5d3ea0cc05aac", "6756aa0b2e3443d33bcfc543fda66858a99caecb", "7ec2d1c2a4e3f752fa6cbdd36ebed5c97bb349ec", "0541d5338adc48276b3b8cd3a141d799e2d40150", "d4745fea917db12bc25e505eb7d65895ae786554" ], "paperAbstract": "Hybrid cloud bursting (i.e., leasing temporary off-premise cloud resources to boost the overall capacity during peak utilization) is a popular and cost-effective way to deal with the increasing complexity of big data analytics. It is particularly promising for iterative MapReduce applications that reuse massive amounts of input data at each iteration, which compensates for the high overhead and cost of concurrent data transfers from the on-premise to the off-premise VMs over a weak inter-site link that is of limited capacity. In this paper we study how to combine various MapReduce data locality techniques designed for hybrid cloud bursting in order to achieve scalability for iterative MapReduce applications in a cost-effective fashion. This is a non trivial problem due to the complex interaction between the data movements over the weak link and the scheduling of computational tasks that have to adapt to the shifting data distribution. We show that using the right combination of techniques, iterative MapReduce applications can scale well in a hybrid cloud bursting scenario and come even close to the scalability observed in single sites.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101136" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/24c6c779c9ee015efaff60487090c543493ba724", "sources": [ "DBLP" ], "title": "Evaluation of Data Locality Strategies for Hybrid Cloud Bursting of Iterative MapReduce", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "24dd5810e14e7c402919299f29b48544fa03d529": { "authors": [ { "ids": [ "1682796" ], "name": "Frank Wang" }, { "ids": [ "4225110" ], "name": "Catherine Yun" }, { "ids": [ "1706681" ], "name": "Shafi Goldwasser" }, { "ids": [ "1749858" ], "name": "Vinod Vaikuntanathan" }, { "ids": [ "1901948" ], "name": "Matei Zaharia" } ], "doi": "", "doiUrl": "", "entities": [ "AES instruction set", "Cryptographic primitive", "Cryptography", "E-services", "End-to-end principle", "Fixed-satellite service", "Information privacy", "Information retrieval", "Information sensitivity", "Map", "Max", "Privacy", "Private information retrieval", "Routing", "Secret sharing" ], "id": "24dd5810e14e7c402919299f29b48544fa03d529", "inCitations": [ "3f59e1a955022a3a74dd7b79e7c6faf746549c12", "2eaf1165b7f26cd1743e0f01de5b8662a8f4a1a3", "41c04b237615243a7fc778c3653851e0e348322f", "123c2f1e0d49e417002c8e261a378f127b0029ff", "53f18a9a84c41ff532302166f4456856f3711830", "99d2ee867ff4d7e8cb3168ef45852bf5274977aa", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "f4ed6e5506b62faa5df8bb1407921b9d04052cdf", "60568ad094e3c68e7f3dd696d8c30e99cabd5159", "533bb475a9373a233f6fe0881850a200210bc26a" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "1148", "journalVolume": "2016", "outCitations": [ "1cf87af22b3b4dd0ff1144d861e0573121d8de2e", "155ca30ef360d66af571eee47c7f60f300e154db", "0830a77f1934ad1afdd8d5f8eb2292518d5ef2f4", "00562cead3e4d35f03edd286b40581e8781bf339", "48b9b7893d90b54845e116364b277af5f071be03", "55c8020222a7e36ae27ecab6a058e0090b73d32d", "0609c475dd31632b705e9fb9a603060a3ff2c46a", "25fe832fcd649f3d1767a9982ec77d51974c7502", "f98cfc3c092d69c068054698bcb4c1b6840644c6", "e429bbe4e80fb52b53e148f1b00d1d5f9a4969c4", "ae5e5085b4e8f4851d9fd76e1d3845da942c3147", "70d2a37d5af527dfc345691e2f978f6e46dc4efe", "44b99570b83064ac1b300b1081e08cf4e4aa55f3", "1e0b693c1c9c69aae413729b58c552ad3cc838ca", "fabf3fe226629ef2df454c961eadafd563779e0c", "0db28e501c1f3008b17921770c2ba2588e9f1b82", "045a49e09ffdc901c9aa9d49de16fff127d64bc2", "2e8b9a7a085a8bc18783e76b776c6e780116efd8", "426b39e3abf91b5682380f0fc278a1f5b9baa2a9", "4d8e2657d6c9032c28ac4878a442e83dd99b672a", "31100ccd0867d6d5338612a62b2cde11be75f1b8", "265c1ff603d738e2bb879607abab0b203bf141bb", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "7f6311c08aec78a82d68bfef074d416c4862226e", "547733cc154ed53d72ad65341b248e7b6c576ac1", "bf9d7ae544e00f5a07debf464e9aa5e5365c183b", "769fb8055fbe0997ef8d9dab6c9abf37489c6575", "20b63210954f7c5a70664f301dcd7196856ccfa7", "2bb0a1dca77b3f3b09e15303f3b99e9204d0c521", "adb4d61cb5c8454671aea8eec6d0480a672be6c5", "017aba316f6d8447a4e045d8ddd417456629031e", "25518a206a45b3af9fbb68d11aa905480cf8f61d", "16e0361afc68b2c71cab43b21abf482da69b33d0", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "0aabb00066abd05cff1cde72f24053edde6ccd7d", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "1e3822536527f98c53b716c26fa05da5bf729f17", "0790cd3d852a21b190c0d20593aa9293bc18f745", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "1d89a12092d6323b9d3b1a5bd4e6790897e2a2be", "bb63c68855d42c95623ed9362d0853ea1d4cc858" ], "paperAbstract": "Many online services let users query public datasets such as maps, flight prices, or restaurant reviews. Unfortunately, the queries to these services reveal highly sensitive information that can compromise users\u2019 privacy. This paper presents Splinter, a system that protects users\u2019 queries on public data and scales to realistic applications. A user splits her query into multiple parts and sends each part to a different provider that holds a copy of the data. As long as any one of the providers is honest and does not collude with the others, the providers cannot determine the query. Splinter uses and extends a new cryptographic primitive called Function Secret Sharing (FSS) that makes it up to an order of magnitude more efficient than prior systems based on Private Information Retrieval and garbled circuits. We develop protocols extending FSS to new types of queries, such as MAX and TOPK queries. We also provide an optimized implementation of FSS using AES-NI instructions and multicores. Splinter achieves end-to-end latencies below 1.6 seconds for realistic workloads including a Yelp clone, flight search, and map routing.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-wang-frank.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_wang_frank.pdf", "https://frankwang.org/papers/wang-splinter.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_wang_frank.pdf", "https://cs.stanford.edu/~matei/papers/2017/nsdi_splinter.pdf", "http://eprint.iacr.org/2016/1148", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-wang-frank.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/wang-frank" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8035/a152c7a2906db524d381eb2459efbefbd028.pdf", "s2Url": "https://semanticscholar.org/paper/24dd5810e14e7c402919299f29b48544fa03d529", "sources": [ "DBLP" ], "title": "Splinter: Practical Private Queries on Public Data", "venue": "NSDI", "year": 2016 }, "25011a77c8478ed154721775d6284db8b268368c": { "authors": [ { "ids": [ "8362396" ], "name": "Yuchen Hao" }, { "ids": [ "3331952" ], "name": "Zhenman Fang" }, { "ids": [ "1718128" ], "name": "Glenn Reinman" }, { "ids": [ "2259796" ], "name": "Jason Cong" } ], "doi": "10.1109/HPCA.2017.19", "doiUrl": "https://doi.org/10.1109/HPCA.2017.19", "entities": [ "Address space", "Central processing unit", "Correctness (computer science)", "Input\u2013output memory management unit", "Memory management unit", "Programming model", "Scratchpad memory", "Tiling window manager", "Translation lookaside buffer", "Zero page" ], "id": "25011a77c8478ed154721775d6284db8b268368c", "inCitations": [ "d1fcc29063f09305969a678313ddba7e9f4f6e9c", "6ff08854494ec866510cbb23fb0e18c1f977007e", "0581754e392d4a648f6a7b7665e3561df8627157", "65c302fc5eedfb33824ef18879eb53cc0327ea41", "1171eecf13da9beb57b06a3c6e8a323e89b8e1ff", "b2e0bf24c1b9a95f4a70b217a19c00d0de497fcd", "2aa997522d212ab74163b986be211ffc7f3e9e34", "55fd68bddd40c949f2354f05d81be5f6d178ae9b", "a7fb4a8b755eee6542d844d898d55b639a1b61bc" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "37-48", "journalVolume": "", "outCitations": [ "27ee92f60f650feda893a853d4e552a1e9dc2979", "add1023b52ef4fe76a0dfdfe0916256348624da0", "533d720a8542b707c316d39cf5beeb58738af86d", "47eb35b0fa9be86dfbf3adbcef89f8d98baf428b", "1f33d86eddd1b1e4919e7867a56a86351c917eae", "1bed30d161683d279780aee34619f94a860fa973", "03b2e534532e9558e560df0bed74976b8f48c1a5", "006662a19c6383e8ee15616c90be206cd08867f0", "b9ecb3cfe9a923efc05c23a38adcf4bdd4b5b840", "3364bc50921a9566d61ef8cb73baa82341725e4b", "222c651bbde2d6ec42dfa148b9b9499ed5119389", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "4a7fa950a84803399438b2d9cdb2a2ac5dbeac7a", "7b943438994370dc4903ce28e359ff98fc23027f", "679511fd4d0fa7fc889de0c3c50ecee80d9996d3", "daeff61502115efc4b9ee81607a8e5489215ea88", "19de90c933c20849c85d5428c8a643210b97ec83", "311d93746ee1c1601cab988592b7df99f3695bd5", "085b6ad1fdc882e489f56f6fbed6935f4500aa57", "65fe0e3f826546a8104c54bba558cd8d1f78a262", "1f2a00758fc38d764b05adb76110500870610bc8", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "15b275f0421c606f5903532e9964b140cbb2f878", "32fb9de96b0772b33eca4815a2ad566b1ed45670", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "922f33b147b8e38d442b9171c463913a3c211b5a", "d33880a01318ec992071968c25059763146e6343", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "102df10591f98830cc3357b47729d6f9e9af3eca", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "102fc630c42ee3c73243bd08936aa7e72ebb8daf", "0c7465f733161ed2c9818da22d77c2cb518f8f58", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "61d13a9a4a6cb66e2d5fcf4f75d97570dca8f3fe", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "6e5c3dbb662432c056f66c7f1e2ff5f42490a3d0", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "33196b69eeec351efd5178eae5da92979bdc6fd7", "0a67c0c91f4aff5d7b492cc6c2151cae6030a15b", "2f7ae6b41a97bf7dc705b4a4bd42ec37a8dc1d87", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "0dea8362e54b2a6ad06aa1cf3aa09dcc60eef847" ], "paperAbstract": "While emerging accelerator-centric architectures offer orders-of-magnitude performance and energy improvements, use cases and adoption can be limited by their rigid programming model. A unified virtual address space between the host CPU cores and customized accelerators can largely improve the programmability, which necessitates hardware support for address translation. However, supporting address translation for customized accelerators with low overhead is nontrivial. Prior studies either assume an infinite-sized TLB and zero page walk latency, or rely on a slow IOMMU for correctness and safety—which penalizes the overall system performance. To provide efficient address translation support for accelerator-centric architectures, we examine the memory access behavior of customized accelerators to drive the TLB augmentation and MMU designs. First, to support bulk transfers of consecutive data between the scratchpad memory of customized accelerators and the memory system, we present a relatively small private TLB design to provide low-latency caching of translations to each accelerator. Second, to compensate for the effects of the widely used data tiling techniques, we design a shared level-two TLB to serve private TLB misses on common virtual pages, eliminating duplicate page walks from accelerators working on neighboring data tiles that are mapped to the same physical page. This two-level TLB design effectively reduces page walks by 75.8% on average. Finally, instead of implementing a dedicated MMU which introduces additional hardware complexity, we propose simply leveraging the host per-core MMU for efficient page walk handling. This mechanism is based on our insight that the existing MMU cache in the CPU MMU satisfies the demand of customized accelerators with minimal overhead. Our evaluation demonstrates that the combined approach incurs only a 6.4% performance overhead compared to the ideal address translation.", "pdfUrls": [ "http://web.cs.ucla.edu/~haoyc/pdf/hpca17.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.19", "http://vast.cs.ucla.edu/sites/default/files/publications/hpca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25011a77c8478ed154721775d6284db8b268368c", "sources": [ "DBLP" ], "title": "Supporting Address Translation for Accelerator-Centric Architectures", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "25113be728f6126f31683bd02460c8a72bd3e270": { "authors": [ { "ids": [ "28012900" ], "name": "Hung T. Nguyen" }, { "ids": [ "9398355" ], "name": "Tri P. Nguyen" }, { "ids": [ "3363560" ], "name": "NhatHai Phan" }, { "ids": [ "1745290" ], "name": "Thang N. Dinh" } ], "doi": "10.1109/ICDM.2017.43", "doiUrl": "https://doi.org/10.1109/ICDM.2017.43", "entities": [ "Algorithm", "Charge-coupled device", "Digital footprint", "Distributed System Security Architecture", "Expectation\u2013maximization algorithm", "Graph of a function", "Greedy algorithm", "Importance sampling", "Memory footprint", "Relational Interface System", "Reverse engineering", "Sampling (signal processing)", "Scalability", "Smart Common Input Method", "Speedup", "Telecommunications network" ], "id": "25113be728f6126f31683bd02460c8a72bd3e270", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "337-346", "journalVolume": "", "outCitations": [ "35adbe4c896c9f789279d18ae3f166b9a3d8b0ed", "0706356c9ab6014d6b04577d38289ea8328291a5", "d7f9c3253552e13f24c3b73bc055ef60388af57c", "0a3858df9b27de264f39d774e26e1be50ef28337", "8c1fa3949409eb65017a4625a7351039f72ebf04", "066e1cd75a4f37a3c58089e24ccf43eb5adf1f19", "97ed889c7b5bc5786001863b4763c64e62f5ddfa", "1a2114515c8aee2fb46a56fbffba0f2285570d02", "706c83309fa09454a136d4e607364b27be66172c", "a6d73877be2b91e8b6c9c0896e58942c93086ff8", "20c88e23020ea3c42e640f8ae3dc2a1f8569892d", "376ead26a0e0a87ea9a177fc683b0bedf161fbd9", "533ab699b4f989ef6b0e0243abd6f9586af548e8", "48d56847893c7fbf8d7ecc91192050e903ab1258", "6a5ae0e083ab69153ce395874c8dddcd830dfcfd", "abb152802d5b4686a394e221abe951187ea06158", "6a3146b4f60e66a3aaa06842e747099f5c735510", "28bf0df09f97e7ef9108e71b45fe1b9a7aa201e2", "4bb0f607c1f6be38ca720ad6913577a778cc2f15", "23d85a0008429845870780c6db3640c05165acaf", "7a278ee0578f194700cadc3811cdda4ec751f88a", "1f55cc9ae9ea3d2ebb23c4427175f01829a4105e", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "048a42699d9991ec18b34bdb484ef244830e1d71" ], "paperAbstract": "The blooming availability of traces for social, biological, and communication networks opens up unprecedented opportunities in analyzing diffusion processes in networks. However, the sheer sizes of the nowadays networks raise serious challenges in computational efficiency and scalability. In this paper, we propose a new hyper-graph sketching framework for influence dynamics in networks. The core of our sketching framework, called SKIS, is an efficient importance sampling algorithm that returns only non-singular reverse cascades in the network. Comparing to previously developed sketches like RIS and SKIM, our sketch significantly enhances estimation quality while substantially reducing processing time and memory-footprint. Further, we present general strategies of using SKIS to enhance existing algorithms for influence estimation and influence maximization which are motivated by practical applications like viral marketing. Using SKIS, wedesign high-quality influence oracles for seed sets with average estimation error up to 10x times smaller than those using RIS and 6x times smaller than SKIMs. In addition, our influence maximization using SKIS substantially improves the quality of solutions for greedy algorithms. It achieves up to 10x times speed-up and 4x memory reduction for the fastest RIS-based DSSA algorithm, while maintaining the same theoretical guarantees.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.43", "https://arxiv.org/pdf/1709.03565v1.pdf", "http://arxiv.org/abs/1709.03565" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25113be728f6126f31683bd02460c8a72bd3e270", "sources": [ "DBLP" ], "title": "Importance Sketching of Influence Dynamics in Billion-Scale Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "253b0efe6bd00c8d2bfa82478f4d913616897d8f": { "authors": [ { "ids": [ "17849115" ], "name": "Anchen Chai" }, { "ids": [ "17823733" ], "name": "Mohammad-Mahdi Bazm" }, { "ids": [ "2557998" ], "name": "Sorina Camarasu-Pop" }, { "ids": [ "1975196" ], "name": "Tristan Glatard" }, { "ids": [ "1771013" ], "name": "Hugues Benoit-Cattin" }, { "ids": [ "17809300" ], "name": "Fr\u00e9d\u00e9ric Sutern" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Dataflow", "Distributed computing", "File transfer", "Real life", "Signal trace", "Simulation", "Spatial variability" ], "id": "253b0efe6bd00c8d2bfa82478f4d913616897d8f", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "54-63", "journalVolume": "", "outCitations": [ "339d5df12fe0599ae36bc12d4b942c1d5946bbda", "55cd406c71d33d9fb481a44af07ccbb5d5f0f1a9", "3f710e14186e3286225429c5f5099cafe1d099c2", "137ee6d39b7244379da35b39a915ae4e2cab1b91", "ad7b99eb65739493c0a561308a23b151624ba894", "010a2d16eef8be8773ee2a73600f685ec0b2e371", "1661baf451086d8a33cc11ae390fd1c5cdd8dc40", "15b590fd9738aea8e5494762237c2f4b59c0726e", "cf1ceca2d2462711a6afc48d25f92d11437a1ad4", "5b9b452527d2b5fb336f470775a64c9b902b923a", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "4e23533b91d5ee16674ee632fcbfa5126c8ad125", "f5b2f1067c4f90c9d7f8820e400d9b9417f4fc7c", "23c1f94f97f1171d1fc3d9b45125a4fe52afc397", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "54fb43214ba4ce5a915f16cf230d9c7593685084", "0c06d0d41ed1a946a5927157312f5936349f62bf" ], "paperAbstract": "Simulation is a fast, controlled, and reproducible way to evaluate new algorithms for distributed computing platforms in a variety of conditions. However, the realism of simulations is rarely assessed, which critically questions the applicability of a whole range of findings. In this paper, we present our efforts to build platform models from application traces, to allow for the accurate simulation of file transfers across a distributed infrastructure. File transfers are key to performance, as the variability of file transfer times has important consequences on the dataflow of the application. We present a methodology to build realistic platform models from application traces and provide a quantitative evaluation of the accuracy of the derived simulations. Results show that the proposed models are able to correctly capture real-life variability and significantly outperform the state-of-the-art model.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101120", "http://www.hal.inserm.fr/hal-01452694/document" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/253b0efe6bd00c8d2bfa82478f4d913616897d8f", "sources": [ "DBLP" ], "title": "Modeling Distributed Platforms from Application Traces for Realistic File Transfer Simulation", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "256d5f9dcccb2d2f6f6b7dc541f2e95b9037d810": { "authors": [ { "ids": [ "13154862" ], "name": "Frank Li" }, { "ids": [ "1744800" ], "name": "Vern Paxson" } ], "doi": "10.1145/3133956.3134072", "doiUrl": "https://doi.org/10.1145/3133956.3134072", "entities": [ "Emergence", "National Vulnerability Database", "Open-source software", "Patch (computing)", "Security bug", "Software development process", "Software repository", "Vulnerability (computing)", "Vulnerability database" ], "id": "256d5f9dcccb2d2f6f6b7dc541f2e95b9037d810", "inCitations": [], "journalName": "", "journalPages": "2201-2215", "journalVolume": "", "outCitations": [ "0db9f19b55d83b48c29aa4a0827645bcbc817644", "f6d16a2bcbc4dc9127b3ab35666a4de71eab1ab4", "285805fa4b7cfe380ed3c0816627d14d771779b9", "1f157f2b144528924eec46d9316bd5517352b89a", "201b0a185dda51629d7b6fdef3b380a0beaba455", "2cd4e14463918a66e713b3764b8c2491bc81d519", "49a8f9e8ed7dbd8382dbd30aa81321281cd54c07", "5bab96b9c1b621106156157ee3102a1ac151758e", "453c53d3890e43534747ca34992addc1d1447b5f", "1e73c2fa2709d3210c09f19933e99b71905364ab", "37f0b5d6e334cd1d28b5a7c21fa3c37772482284", "18ec9aa174db18f94c4bab972d8631b3fee26881", "ace903059c6a51fe79427f73df5ff115ab1a120e", "22eb61d1cb7ab24809ef61a3740e62ad05d3601e", "627bd11712f87bfa4a3668a717e72b237ab9e701", "5b1ccd0692eccbc80944ec3d1e227eb2c3b0381c", "537afee84424ade9e9e319dfb7efef12565e303b", "48b5dd4b43e403a17c3a94688efa666b554b8882", "1947f28e3780857bea47052c5dafba0addafe7ad" ], "paperAbstract": "Given how the \"patching treadmill\" plays a central role for enabling sites to counter emergent security concerns, it behooves the security community to understand the patch development process and characteristics of the resulting fixes. Illumination of the nature of security patch development can inform us of shortcomings in existing remediation processes and provide insights for improving current practices. In this work we conduct a large-scale empirical study of security patches, investigating more than 4,000 bug fixes for over 3,000 vulnerabilities that affected a diverse set of 682 open-source software projects. For our analysis we draw upon the National Vulnerability Database, information scraped from relevant external references, affected software repositories, and their associated security fixes. Leveraging this diverse set of information, we conduct an analysis of various aspects of the patch development life cycle, including investigation into the duration of impact a vulnerability has on a code base, the timeliness of patch development, and the degree to which developers produce safe and reliable fixes. We then characterize the nature of security fixes in comparison to other non-security bug fixes, exploring the complexity of different types of patches and their impact on code bases.\n Among our findings we identify that: security patches have a lower footprint in code bases than non-security bug patches; a third of all security issues were introduced more than 3 years prior to remediation; attackers who monitor open-source repositories can often get a jump of weeks to months on targeting not-yet-patched systems prior to any public disclosure and patch distribution; nearly 5% of security fixes negatively impacted the associated software; and 7% failed to completely remedy the security hole they targeted.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134072", "https://acmccs.github.io/papers/p2201-liA.pdf", "http://www.icir.org/vern/papers/patch-study.ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/256d5f9dcccb2d2f6f6b7dc541f2e95b9037d810", "sources": [ "DBLP" ], "title": "A Large-Scale Empirical Study of Security Patches", "venue": "CCS", "year": 2017 }, "2573809b830eef714a7970dac203a235e014a34e": { "authors": [ { "ids": [ "1790279" ], "name": "Abdelhafid Mazouz" }, { "ids": [ "31201708" ], "name": "David C. Wong" }, { "ids": [ "1739086" ], "name": "David J. Kuck" }, { "ids": [ "1704285" ], "name": "William Jalby" } ], "doi": "10.1145/3030207.3030224", "doiUrl": "https://doi.org/10.1145/3030207.3030224", "entities": [ "Algorithm", "Automatic vectorization", "COMEFROM", "Central processing unit", "Multi-core processor", "Perf (Linux)", "Program optimization", "Uncore" ], "id": "2573809b830eef714a7970dac203a235e014a34e", "inCitations": [], "journalName": "", "journalPages": "15-26", "journalVolume": "", "outCitations": [ "2973052248a903ef2033f800c66979262d9ab718", "8b7f4ea10e4e0b6e2808c7d62a4c6e4927c471bf", "5ef621fd77140f17b4e44dce9cfa61f88d61b2ca", "12111b92dea58acf3fa2d71d572106f307719343", "06deced8cc4287812a69f83e1efa7be2e27d9ee1", "0a25b24b1935afa9e6bc7b8cae5ce883aa4d3d0e", "ab602d672875244c42a75559ba98f5606c6c1554", "48e51c059e98f74a9a59e1146ce9e311d5122ab9", "b1479a44735a4d93a99c3c1572acc6b752046c04", "019b125f3e2ce01d3e48c2c3f0619cfc4f1055bd", "0e8e26e9b86b8bc74997e6a28aeb49c0e8a31404", "260d0adfad93dfd02c7a945dee48c60f8fb938e1", "f4a91972bf1a05b195bce06a24dc33960bff1151", "a2463cd4bff00fc3133e2f5d8a2b8b82c871c3bd", "352a8957005dc5519b15ed1870751ec494d66395", "2119253191990d2d5bde80055b8c177ae367c55d", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "e831c1b8e16becad7c597db41c4663203c6a4b4b", "3c7739ce3ee18d44ae85cbc1dc72835cb8048b27", "886f29f247fd49084fbf25fdd60049b47db4f4ea" ], "paperAbstract": "This paper presents an empirical approach to measuring and modeling the energy consumption of multicore processors.The modeling approach allows us to find a breakdown of the energy consumption among a set of key hardware components, also called HW nodes. We explicitly model the front-end and the back-end in terms of the number of instructions executed. We also model the L1, L2 and L3 caches. Furthermore, we explicitly model the static and dynamic energy consumed by the the uncore and core components. From a software perspective, our methodology allows us to correlate energy to the executed code, which helps find opportunities for code optimization and tuning.\n We use binary analysis and hardware counters for performance characterization. Although, we use the on-chip counters (RAPL) for energy measurement, our methodology does not rely on a specific method for energy measurement. Thus, it is portable and easy to deploy in various computing environments. We validate our energy model using two Intel processors with a set of HPC codelets, where data sizes are varied to come from the L1, L2 and L3 caches and show 3% average modeling error. We present a comprehensive analysis and show energy consumption differences between kernels and relate those differences to the algorithms that are implemented. Finally, we discuss how vectorization leads to energy savings compared to non-vectorized codes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030224" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2573809b830eef714a7970dac203a235e014a34e", "sources": [ "DBLP" ], "title": "An Incremental Methodology for Energy Measurement and Modeling", "venue": "ICPE", "year": 2017 }, "257c1c169dd0ae98e273efd0d0948f2a028d4c3f": { "authors": [ { "ids": [ "3418470" ], "name": "Bharath Kumar Reddy Vangoor" }, { "ids": [ "28670096" ], "name": "Vasily Tarasov" }, { "ids": [ "1708491" ], "name": "Erez Zadok" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Experiment", "Internet bottleneck", "Operating system", "Systems design", "User interface", "User space" ], "id": "257c1c169dd0ae98e273efd0d0948f2a028d4c3f", "inCitations": [ "f912e1c1ded4faaa8576fc942a8931740d43664b", "af6365cce49512c386f97976e7ef1dab10aa2dbf", "18e93539fe6163a0b56f3427fc562733f89449a6", "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "9809bc2847bc9274564c6c3545561d920c5e44f3", "7f640b84dedbb95ec84d86563b3304035c9fc980", "9a397280f7e809008ebe027b0d53e0a8701933d3" ], "journalName": "", "journalPages": "59-72", "journalVolume": "", "outCitations": [ "12a0046a1197ae63c3d616c74e367dc583cef196", "03e255b248ce618f8891484cb747b2ef4bb75448", "6636baa2e6310cbc0da231c74e66e0f9f732e55c", "0abaa5b259f47e141b8888db3a102048b8a37554", "67ac8e37fd240844e0726a2d171e20042c7648f4", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "20993e1f999c6b4138b4a7ae61dc6471095d69f9", "5690155f0b0a16daf17a99da2b68e67495ec63cd", "160f182bf5c01c9559473a583e8a730fc1de3c3e", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "29d45feaa50b0304ab52bd5c6d0381c21c2b42bc", "0d5c71866e3e118fcba455feab3d06f86858adee", "b46cb54a87a448212af37f2594a512fec39a059e", "022fc284362d04569a1561c3d04dfe0f377d6112", "03695a9a42b0f64b8a13b4ddd3bfde076e9f604a", "0f55217987ec25afa0f815e0aa3957e669b0280e", "55becb668bc6cbf0c13b09caa92b849246c36882", "2d60d3596490d9999d8433bf41405060779bc11d", "35aecf2a6ad7f12ad06d9f9e6b7d4935fea840ac", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "3c59bf68086f4cc1f216bd4cf461293877d9f46a", "1e111e72efe7bbe2c8eeb792318eac40e8f56b80", "28e0b55b96bcab20c0f914d4c2d023c361c1b3c7", "7c5dc4de32a0d833eb87ae56fb24f9cb35f68fa9" ], "paperAbstract": "Traditionally, file systems were implemented as part of OS kernels. However, as complexity of file systems grew, many new file systems began being developed in user space. Nowadays, user-space file systems are often used to prototype and evaluate new approaches to file system design. Low performance is considered the main disadvantage of user-space file systems but the extent of this problem has never been explored systematically. As a result, the topic of user-space file systems remains rather controversial: while some consider user-space file systems a toy not to be used in production, others develop full-fledged production file systems in user space. In this paper we analyze the design and implementation of the most widely known user-space file system framework\u2014FUSE\u2014and characterize its performance for a wide range of workloads. We instrumented FUSE to extract useful statistics and traces, which helped us analyze its performance bottlenecks and present our analysis results. Our experiments indicate that depending on the workload and hardware used, performance degradation caused by FUSE can be completely imperceptible or as high as \u201383% even when optimized; and relative CPU utilization can increase by 31%.", "pdfUrls": [ "http://www.fsl.cs.stonybrook.edu/docs/fuse/fuse-performance-fast17.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-vangoor.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/vangoor", "http://www.usenix.org./system/files/conference/fast17/fast17-vangoor.pdf", "http://www.fsl.cs.sunysb.edu/docs/fuse/fuse-performance-fast17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/257c/1c169dd0ae98e273efd0d0948f2a028d4c3f.pdf", "s2Url": "https://semanticscholar.org/paper/257c1c169dd0ae98e273efd0d0948f2a028d4c3f", "sources": [ "DBLP" ], "title": "To FUSE or Not to FUSE: Performance of User-Space File Systems", "venue": "FAST", "year": 2017 }, "25a7185bcb2acb855bc3cc31ee29d0c4fd68edfd": { "authors": [ { "ids": [ "1703441" ], "name": "Mihir Bellare" }, { "ids": [ "39869181" ], "name": "Viet Tung Hoang" } ], "doi": "10.1145/3133956.3133995", "doiUrl": "https://doi.org/10.1145/3133956.3133995", "entities": [ "Block cipher", "Digital Forensics Framework (DFF)", "Discrete logarithm", "Encryption", "Format-preserving encryption" ], "id": "25a7185bcb2acb855bc3cc31ee29d0c4fd68edfd", "inCitations": [ "29d25e885c62d80309b45afc2d244b7c679c7e77" ], "journalName": "", "journalPages": "1515-1532", "journalVolume": "", "outCitations": [ "41d9f12f649b0589b481cd2462314aefde020b66", "1ab38cca0b86b8af1f46685fbff9d8a937b7b9ed", "34c4727882f379dcb9dde08d3a629caea73c77ee", "74e0e22cdefb11a49dac487c3151dcc19deca0cb", "136a5e50af7f75aca1a28db1a337bacc2cbdde5c", "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "90cfaee0ae64557f7485410d87c3a26ae532036e", "3d50917f8c667509ee91b8df21cc55904881a127", "079a00a216ca19407f8b37cc4ff28fcdfb077654", "aad268d8db8c2410844bfee17829e51773e1a221", "7790a913c43d743b79cc763fdd558b3a69b3a1a2", "99985b01ef7842615ff6ed0fe535adeb88c820d6", "3291b0fe283d72972f4246cf3773f2e054118bdd", "4381e5698fa6ed369ad783b250ea1f75ac3f6144", "014639d5d5cde265e48b8607ec9f785b31435d33", "3145a847d7a137716a7424f340b99f2b7446f902", "e6aeba3f580f456141e27a66436280ac456513d1", "3248b6c991d26d57d31d34fe4879b5abdc2019db", "94f53bda667df2030cae0b9ef5682e412fa7a332", "05e7e31f288e7182245341a1af7e10c32927ff65", "e4c1d1ad684535bf835475aafb8fcfe5d23b0a93", "4893aee8baba1cc6a3c0de35ca3aad87f267b50f", "9afdf8c0e664b2275a4f5d9e6ad50caf736a33f9", "dff0ac6c5a0a2d703024a5914dcd5ca1986d4364", "2ee3ce4286d31d43e9bb5bc61f43a06a020a7b53", "5281536f3d07af0074666f48884b9d8b860dd046", "93ea2fbd4bb36874aabef89e985650f43e0186a7", "215e514f25e02d979552992ea3044999cf5fe9dc", "3e426349f0cf3a65b502be05ebca23e693ec03fd", "259a83b7942a8a1a16da5ad1d405613d9c49300a", "258d9631d4131a106c66400e034f293cb6ca504f", "3c71fdbb2e5fcf36508eac2c471c33580f8adfe3", "5101c20112e7f6247929928ab6942b671e3cef9e", "2b5cbfbee5f2cb01866c051eb34f79b5b0b2886b", "2f832cadf3adbf2470d01fe071eb49465b5d704c", "1b9abaa9ece70fc8a6d0312f76fb0a0bc62902f5", "40991c781e3dcfef2f7f6b20aad9515e2f56d978", "9495db8ebbf897c4beb71c5d90e6e53a3aeeb402" ], "paperAbstract": "We introduce identity-based format-preserving encryption (IB-FPE) as a way to localize and limit the damage to format-preserving encryption (FPE) from key exposure. We give definitions, relations between them, generic attacks and two transforms of FPE schemes to IB-FPE schemes. As a special case, we introduce and cover identity-based tweakable blockciphers. We apply all this to analyze DFF, an FPE scheme proposed to NIST for standardization.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133995", "http://eprint.iacr.org/2017/877", "https://eprint.iacr.org/2017/877.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25a7185bcb2acb855bc3cc31ee29d0c4fd68edfd", "sources": [ "DBLP" ], "title": "Identity-Based Format-Preserving Encryption", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "25ac02feec81b523dfd01b27227bcd2c14f6152f": { "authors": [ { "ids": [ "35068009" ], "name": "Srinivasan Ramesh" }, { "ids": [ "3083298" ], "name": "Sathish S. Vadhiyar" }, { "ids": [ "3165131" ], "name": "Ravi S. Nanjundiah" }, { "ids": [ "7913822" ], "name": "P. N. Vinayachandran" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.42", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.42", "entities": [ "Automatic vectorization", "Categorization", "Computation", "Coprocessor", "False sharing", "OpenMP", "Scheduling (computing)" ], "id": "25ac02feec81b523dfd01b27227bcd2c14f6152f", "inCitations": [], "journalName": "", "journalPages": "316-323", "journalVolume": "", "outCitations": [ "fde04b7b524045969614382529bb1d6cc88efcf0", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "408e61c117816833cdd807b5d8c9258f1c2022ab", "23ed77d5aa2fe71f58d175d915e0209df7d202d5", "0382955dcc73511c3ae9b5327e0213272a1b4152", "3230131b14559a11c8ee9ab9beccf725dfb437de", "d7cf43c5e59f07604cea7589ac49b87c594f1888", "034ddc1749424d0335be143a7c7d07b7c559c2ad", "962c1aced7e215d880d200665084b5239b5efbf2", "c688010fdf80996d6f838d908d117f1c7eed8542", "73f44853f0f2b02c481b0a36023c37f0b44d6685", "eedb46c68a9c71ccb38de3933e5f7e1dd9a789c7" ], "paperAbstract": "Deep and shallow convection calculations occupy significant times in atmosphere models. These calculations also present significant load imbalances due to varying cloud covers over different regions of the grid. In this work, we accelerate these calculations on Intel R \u00a9 Xeon PhiTM Coprocessor Systems. By employing dynamic scheduling in OpenMP, we demonstrate large reductions in load imbalance and about 10% increase in speedups. By careful categorization of data as private, firstprivate and shared, we minimize data copying overheads for the coprocessors. We identify regions of false sharing among threads and eliminate them by loop rearrangements. We also employ proportional partitioning of independent column computations across both the CPU and coprocessor cores based on the performance ratio of the computations on the heterogeneous resources. These techniques along with various vectorization strategies resulted in about 30% improvement in convection calculations.", "pdfUrls": [ "http://arxiv.org/abs/1711.00289", "https://arxiv.org/pdf/1711.00289v1.pdf", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.42" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/25ac/02feec81b523dfd01b27227bcd2c14f6152f.pdf", "s2Url": "https://semanticscholar.org/paper/25ac02feec81b523dfd01b27227bcd2c14f6152f", "sources": [ "DBLP" ], "title": "Deep and Shallow Convections in Atmosphere Models on Intel\u00ae Xeon Phi\u2122 Coprocessor Systems", "venue": "HPCC/SmartCity/DSS", "year": 2017 }, "25bce1e0f3ed17e84cd7e598c8e0b6207fd45070": { "authors": [ { "ids": [ "1800485" ], "name": "S\u00e9bastien Bardin" }, { "ids": [ "39104126" ], "name": "Robin David" }, { "ids": [ "2287203" ], "name": "Jean-Yves Marion" } ], "doi": "10.1109/SP.2017.36", "doiUrl": "https://doi.org/10.1109/SP.2017.36", "entities": [ "Binary file", "Call stack", "Disassembler", "Malware", "Malware analysis", "Obfuscation (software)", "Scalability", "Symbolic execution", "Syntactic predicate" ], "id": "25bce1e0f3ed17e84cd7e598c8e0b6207fd45070", "inCitations": [], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "633-651", "journalVolume": "", "outCitations": [ "1a7b8bd4ce81d0fb0ee9272e3410af2388ad8f1b", "50dad1b5f35c0ba613fd79fae91d7270c64cea0f", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "0ab393affe9d674ef790be14fdfade368f3e5989", "974f362d3fcabfba7befbb7cba9d8027d5942f35", "1b93d79cc923adb402cbdbd9d7eedf88426ec6b4", "e7a3abf50e86bd7dc53dc4507a7d39a10eeb2497", "f3e52a199c15f6594f95f690f0db9695fc9c9164", "583a4aed2057b2b509fbdf9fdee5515886de7e86", "500ed24d8c840ce493e7154f4371c0cc1c897fda", "5ded56d0081fb0190b994be41606f584b7652f31", "68a39ea1849111c4813447a0400829d89805111e", "224f7b1e9e4a2e68fd06762914ce3e228d9dc6a0", "0719eace039eb8407062eac781177e080eea1567", "bafbb31035f22d714092b8adc385a7679f41990e", "211a4d5582dba6dccd78e9d049507e4b3df17fba", "2b223cf0fffd1905ddb49f43914d8bf2ecaa9c7e", "ac06b329e591053b5b55d3b2d6afa1fa13fa7652", "59d1aa5a9f27828dec64007cbffdca7fb0cb377a", "5bab5b8e793ab42c1666357ca66fc887d54016f7", "2309fe4f6b461ba9e91b945b9a87ae56e54e35d0", "264a8a66e293305f88793465727cc7903058687d", "17017bb963104de281415251613a8d3170828635", "284b7897abe8bb6bd724436d464ccf67ee437833", "11443efe465ad544f478524da6c66c085b16e28b", "3a0ad57ecb97795a8cc91290484ff9e576728c84", "8801d2091c1e3a691376b3a2cbc402ece8872759", "2fd85993e7e5cbf7dcd4985d6088020355c254d8", "0b53fab8dea434e1046836159e184d9565ffd401", "22b0d0225b196719abf2ea5c3c6e374a0601a65b", "20582dec866b1494480492b6257d4010d6ea3113", "552d9119e5815cee13f49c5a725465e4bc12d655", "70696431430bab0d406cb23f503af5841961ba76", "f8e4812bbb131c09a641e3e55c3f392e032679eb", "0b8633797e534b161a81e1666d79c2080d06ed00", "134f4e5946efe8af306fda71d9d47e1e81dbc27c", "4a250eef9f4897c47d28547b9e88327fb23dc7e9", "085e83f8760b81fd448c38ae432c0e99928286e1", "30deb4c2b87ed1f4698c709f09f3b2cb8f451ba2", "a2be3a50e1b31bd226941ce7ef70e15bf21b83a1", "55a441c1b7928492610bf37bfc5b3789f77e4291", "22a5502b3fc26ef27835284215146ce95f6c45cf", "18e965d40f7dacb88bca7b0a231eca5adbfb6201", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "0c79b3f6ad66762680c4e350fc99b19d512233d2", "0c99969b86bc1fc479fa96d7e4ab4155df2ac181" ], "paperAbstract": "Software deobfuscation is a crucial activity in security analysis and especially in malware analysis. While standard static and dynamic approaches suffer from well-known shortcomings, Dynamic Symbolic Execution (DSE) has recently been proposed as an interesting alternative, more robust than staticanalysis and more complete than dynamic analysis. Yet, DSE addresses only certain kinds of questions encountered by a reverser, namely feasibility questions. Many issues arising during reverse, e.g., detecting protection schemes such as opaque predicates, fall into the category of infeasibility questions. We present Backward-Bounded DSE, a generic, precise, efficient and robust method for solving infeasibility questions. We demonstrate the benefit of the method for opaque predicates and call stack tampering, and give some insight for its usage for some other protection schemes. Especially, the technique has successfully been used on state-of-the-art packers as well as on the government-grade X-Tunnel malware – allowing its entire deobfuscation. Backward-Bounded DSE does not supersede existing DSE approaches, but rather complements them by addressing infeasibility questions in a scalable and precise manner. Following this line, we proposesparse disassembly, a combination of Backward-Bounded DSE and static disassembly able to enlarge dynamic disassembly in a guaranteed way, hence getting the best of dynamic and static disassembly. This work paves the way for robust, efficient and precise disassembly tools for heavily-obfuscated binaries.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.36", "https://www.ieee-security.org/TC/SP2017/papers/220.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25bce1e0f3ed17e84cd7e598c8e0b6207fd45070", "sources": [ "DBLP" ], "title": "Backward-Bounded DSE: Targeting Infeasibility Questions on Obfuscated Codes", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "25be1e75c750185b37135006789217da86507313": { "authors": [ { "ids": [ "2635740" ], "name": "Lingda Li" }, { "ids": [ "17897978" ], "name": "Robel Geda" }, { "ids": [ "2189162" ], "name": "Ari B. Hayes" }, { "ids": [ "3399051" ], "name": "Yan-Hao Chen" }, { "ids": [ "17974030" ], "name": "Pranav Chaudhari" }, { "ids": [ "31790133" ], "name": "Eddy Z. Zhang" }, { "ids": [ "1686872" ], "name": "Mario Szegedy" } ], "doi": "10.1145/3084451", "doiUrl": "https://doi.org/10.1145/3084451", "entities": [ "Algorithm", "Distributed computing", "Graphics processing unit", "Locality of reference", "Manycore processor", "Parallel computing" ], "id": "25be1e75c750185b37135006789217da86507313", "inCitations": [ "fc7fca6f7f8872ab9d9f46a33e3d901980ab9af6" ], "journalName": "", "journalPages": "6", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "2ecbed77a22c1d3e4e006c796bb90c39adcc0ccc", "6d30db4bdb14d5a23320970407e1fa5bb514b7c2", "f8c965c5e658b6c44366a5048c36f6975f378599", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "1753c2dc85cc40e0a2e8b4a405c1690eab066d8d", "0a3b2d2ddf6e832a0b282380abb9b2eeb7d97177", "15b9cea4970ca2bf6bde3f54269f75e1ebda8bb5", "98183e6112652b211daa9ae5214cf92e1eb77a6c", "711ad6d4dcc1939b662be6550fd2d6c767dbdbc2", "0340939eec695ca5976ca8d8bc41fececbc8ddbc", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "b58da0c0f3b9ddb5230ec37a82bb457c37b058e6", "1f0612de1f191abadf250b78cd78f884203cca5e", "52a4130c74ad95664fbc067ef91fd75b748ac409", "6dce7b385ce4882d957752ed25451b5914f320d0", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "00dbf46a7a4ba6222ac5d44c1a8c09f261e5693c", "a06eb2e52176a5b6b941bb8544c544b64f527e32", "ff71759a3efa271670c1e7820873df872b4ca3b9", "0ad8e89091eed09217e66adc98136126addc2619" ], "paperAbstract": "Graph edge partition models have recently become an appealing alternative to graph vertex partition models for distributed computing due to their flexibility in balancing loads and their performance in reducing communication cost [6, 16]. In this paper, we propose a simple yet effective graph edge partitioning algorithm. In practice, our algorithm provides good partition quality (and better than similar state-of-the-art edge partition approaches, at least for power-law graphs) while maintaining low partition overhead. In theory, previous work [6] showed that an approximation guarantee of O(dmax√ log n log k) apply to the graphs with m=Ω(k2) edges (k is the number of partitions). We further rigorously proved that this approximation guarantee hold for all graphs.\n We show how our edge partition model can be applied to parallel computing. We draw our example from GPU program locality enhancement and demonstrate that the graph edge partition model does not only apply to distributed computing with many computer nodes, but also to parallel computing in a single computer node with a many-core processor.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078520", "http://doi.acm.org/10.1145/3084451" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25be1e75c750185b37135006789217da86507313", "sources": [ "DBLP" ], "title": "A Simple Yet Effective Balanced Edge Partition Model for Parallel Computing", "venue": "SIGMETRICS", "year": 2017 }, "25c3ee2e736c58eddc7182688e19fa7b65bef83a": { "authors": [ { "ids": [ "2679804" ], "name": "Ian Miers" }, { "ids": [ "1773836" ], "name": "Payman Mohassel" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Email", "Encryption", "Input/output", "Locality of reference", "Provable security", "Symmetric-key algorithm" ], "id": "25c3ee2e736c58eddc7182688e19fa7b65bef83a", "inCitations": [ "6dc0b00e15a8ee5168bae39dabc300002fa6173e", "9d7f4759f6e965e11557d4fec6ada7becd2d7ce6", "0bd8f0ab2ade3cbb560dca22c2f5dfd203f4cfd3", "812510b7b500eebe7a9a74eaeb331a0160434b2c", "689dd5f20fbe04fe4fb5faaf69792999bbd9d671" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "830", "journalVolume": "2016", "outCitations": [ "14dc5effd28d22cf7fc8aa6a1be8ae2d37859891", "02beed2e1350a0d0b01bb9622081cb93a965a716", "32dc88258734f6c9d8fd1d0151d0c763ae2df75a", "8f8de213b1318e0ef0914008010e87ab64ab94ff", "1c7e79160ab095c2de9d83da8ca3311b3bebb366", "ad0c881078b2cd3d69b5cc2ef63bcdb72070298e", "00ecd7b2e0c364ce4e9f5416ee1dbeaeabe87a62", "13868fa5a86ebde021a1c91415fb9bb718c4a804", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "9ea1bbb1d3302aa9504e71ca42e1c19c09e310e0", "10d5282a8d25c4490338d5fb8ad2f57b8646ad38", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "20b63210954f7c5a70664f301dcd7196856ccfa7", "1ab81ae077d6944fbff279a7a8a38df48f75eadf", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "1cef17420fe9a74a504b1d3250eaae3abe8b7595", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "3864cfb41db27452cefe3b1f64f05623690201ab", "e73f1cc40702aac3ee6aa8f087fd3b0e700a2257", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6" ], "paperAbstract": "Free cloud-based services are powerful candidates for deploying ubiquitous encryption for messaging. In the case of email and increasingly chat, users expect the ability to store and search their messages persistently. Using data from one of the top three mail providers, we confirm that for a searchable encryption scheme to scale to millions of users, it should be highly IO-efficient (locality), and handle a very dynamic message corpi. We observe that existing solutions fail to achieve both properties simultaneously. We then design, build, and evaluate a provably secure Dynamic Searchable Symmetric Encryption (DSSE) scheme with significant reduction in IO cost compared to preceding works when used for email or other highly dynamic message corpi.", "pdfUrls": [ "http://eprint.iacr.org/2016/830.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_01-1_Miers_paper.pdf", "http://eprint.iacr.org/2016/830", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/io-dsse-scaling-dynamic-searchable-encryption-millions-indexes-improving-locality/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e942/88a8b9b337a390e54b5c5cfc384eccb450d2.pdf", "s2Url": "https://semanticscholar.org/paper/25c3ee2e736c58eddc7182688e19fa7b65bef83a", "sources": [ "DBLP" ], "title": "IO-DSSE: Scaling Dynamic Searchable Encryption to Millions of Indexes By Improving Locality", "venue": "NDSS", "year": 2016 }, "25d1487c07a2dc4a511dcb9bc1c64d86fbeb94f0": { "authors": [ { "ids": [ "29235446" ], "name": "Alind Khare" }, { "ids": [ "1744939" ], "name": "Vikram Goyal" }, { "ids": [ "2968419" ], "name": "Srikanth Baride" }, { "ids": [ "1710248" ], "name": "Sushil K. Prasad" }, { "ids": [ "35215647" ], "name": "Michael McDermott" }, { "ids": [ "4459907" ], "name": "Dhara Shah" } ], "doi": "10.1109/HiPC.2017.00038", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00038", "entities": [ "Algorithm", "Apache Spark", "Big data", "Bloom filter", "Data mining", "Distributed algorithm", "Experiment", "Graph database", "Heuristic", "Heuristic (computer science)", "Program optimization", "Relevance", "Text mining" ], "id": "25d1487c07a2dc4a511dcb9bc1c64d86fbeb94f0", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "263-272", "journalVolume": "", "outCitations": [ "5ebdd5a84a2ba2ae5c4bdc244808a9a573660c30", "8e820679fcac844dce8691a92b5f332b44ee2145", "e81753ddaa1ed486f38e61dfc48108e3e271502e", "2b09d0796f82ec43564d72097d0e552ee85c7286", "0ecc0140fde8f6c6137ddd5f4c13f35685f7f3e8", "33b83cf4471905abf2bb08e0e9cfef8c85d6bcc6", "197ef20f1c652589da145a625093e8b31082c470", "70079b69a162880740c1e47b4a6c403fec280574", "7db8dfb0843c861bfe264be48f0e4314c9d92363", "89e701c706894415950eb4160ba95a717cdc9594", "18882e17425e2f37d24fb5c4fe24ab91656e5d1d", "a67ed341ac130d9625c8dc1a0d32ead797724d60", "0bbf4041adb8de3d56f74c2fb45b32976b843817", "075b693b3f9a685938284f1d7a1be8eb5c9f415a", "d4c7200ea0ee931e7a65de4515131c96c838c4d9", "75d5e3b7c38ae7ba60aabda376fc03b25f5f49c8", "d7d93714a650412dfe4236e2a6b23419789f285c", "0b698cf866906498ade8a8c77111cf704491db22" ], "paperAbstract": "Frequent subgraph pattern mining (FSM) finds subgraph patterns that occur in a graph database with a frequency that is more than a given threshold. In FSM, the notion of occurrence captures the presence or absence of a node and an edge in a binary fashion and considers relevance of each edge or node as same. However, an edge or a node may have different relevancy score. Therefore, the utility of a pattern should be defined using the relevance score of participating edges or nodes. This paper defines the utility notion of a pattern using this idea and presents algorithms to mine high-utility patterns from a given graph database. A significant issue in high-utility pattern mining is that the antimonotonic property no longer holds contrary to the FSM. Hence pruning of the search space becomes a daunting task. To address this issue, we incorporate a function to estimate an upper-bound utility of a pattern object that also satisfies the anti-monotonic property. This paper presents three optimization heuristics for the solution on a distributed platform, namely, a novel use of bloom filter to avoid exploration of non-candidates, avoidance of sending database information with each pattern, and avoidance of sending pattern embeddings with each pattern. The experimental study on Apache Spark shows the effectiveness of our proposed optimization strategies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00038" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/25d1487c07a2dc4a511dcb9bc1c64d86fbeb94f0", "sources": [ "DBLP" ], "title": "Distributed Algorithm for High-Utility Subgraph Pattern Mining Over Big Data Platforms", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "2614038b2e4c837adeeaab86341224065e76109d": { "authors": [ { "ids": [ "3112894" ], "name": "Hossein Siadati" }, { "ids": [ "1719861" ], "name": "Nasir D. Memon" } ], "doi": "10.1145/3133956.3134003", "doiUrl": "https://doi.org/10.1145/3133956.3134003", "entities": [ "Algorithm", "Anomaly detection", "Byte", "Credential", "Data breach", "Denial-of-service attack", "Hall effect", "Intrusion detection system", "Lateral thinking", "Login", "Network traffic control", "Simulation" ], "id": "2614038b2e4c837adeeaab86341224065e76109d", "inCitations": [], "journalName": "", "journalPages": "1273-1284", "journalVolume": "", "outCitations": [ "1fb14e65d91c2507c5615640744dc4769d98a9e0", "7ad9b11b446d29006ed857b0f13323f6875d601b", "16393b7f6c6564b31f453882d6adc4697a9c4591", "095b05f6f0803bb1871b677cf3c3d4b41dbe6d18", "5911131fd7add61adab0e30f5a7255064bd0a4ce", "996ce6a529c3d7652a304ca05bf9d32d3db44e95", "136e41a36bbdb38695a725fbf5ff9d3e698ce71f", "5e31a8dab43b95c1e2596510ab0f2e8172099acc", "7e9305eef5b3cd02c5d9e3768a5acf3c5654c8b3", "78908f567d67637f6f5b9d4cd018851f31bd9a6c", "3374241a54e2b1d4809e7957234ee22e7a112641", "716e1fea03d45e61e94c49853e999eb70b275a04", "232f7bb455bf054c9e9255adc88aff65caadaba4", "1c8d06510ad449ad24fbdd164f8008cc730cab47", "0d842cdc1aa77021326a03ccb2668b465f11d097", "28b746a538ee25055bf79df7ba70cdfc37c8e308", "1bd10813ade534b5500e92600d909bacb514138d", "b34fb6158cc3901814b56d98d77d726f4d0c30c6", "5c6c3a86d5218aefa20d7ea64444a5e186b769f1", "e0fb0ed46830c224f5f0f5e92d5fba2a7062b1b4", "067bd9d975b132dc668013895a5e4298623feebd", "397fc541620130e1aa26cbff6f4d61b6d9ecf787", "406a80247a60e950a1b0e69c0e0561a4264a8b0d" ], "paperAbstract": "Many network intrusion detection systems use byte sequences to detect lateral movements that exploit remote vulnerabilities. Attackers bypass such detection by stealing valid credentials and using them to transmit from one computer to another without creating abnormal network traffic. We call this method Credential-based Lateral Movement. To detect this type of lateral movement, we develop the concept of a Network Login Structure that specifies normal logins within a given network. Our method models a network login structure by automatically extracting a collection of login patterns by using a variation of the market-basket algorithm. We then employ an anomaly detection approach to detect malicious logins that are inconsistent with the enterprise network's login structure. Evaluations show that the proposed method is able to detect malicious logins in a real setting. In a simulated attack, our system was able to detect 82% of malicious logins, with a 0.3% false positive rate. We used a real dataset of millions of logins over the course of five months within a global financial company for evaluation of this work.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134003" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2614038b2e4c837adeeaab86341224065e76109d", "sources": [ "DBLP" ], "title": "Detecting Structurally Anomalous Logins Within Enterprise Networks", "venue": "CCS", "year": 2017 }, "262c16d1bdd8d0ccef77bd66648144d584a24477": { "authors": [ { "ids": [ "3197683" ], "name": "Riza O. Suminto" }, { "ids": [ "22409029" ], "name": "Cesar A. Stuardo" }, { "ids": [ "2856081" ], "name": "Alexandra Clark" }, { "ids": [ "40455668" ], "name": "Huan Ke" }, { "ids": [ "1709102" ], "name": "Tanakorn Leesatapornwongsa" }, { "ids": [ "1697120" ], "name": "Bo Fu" }, { "ids": [ "9383523" ], "name": "Daniar H. Kurniawan" }, { "ids": [ "40116925" ], "name": "Vincentius Martin" }, { "ids": [ "26337043" ], "name": "Maheswara Rao G. Uma" }, { "ids": [ "1738725" ], "name": "Haryadi S. Gunawi" } ], "doi": "10.1145/3127479.3131622", "doiUrl": "https://doi.org/10.1145/3127479.3131622", "entities": [ "Apache Flume", "Apache Hadoop", "Data parallelism", "Fault model", "MapReduce", "SPARK", "Speculative execution", "Throughput" ], "id": "262c16d1bdd8d0ccef77bd66648144d584a24477", "inCitations": [ "347e1352fb903b40dce606a1e581e9d601bc289c", "40f196e21a289394c4354961116587b8accba45e" ], "journalName": "", "journalPages": "295-308", "journalVolume": "", "outCitations": [ "91ec7ef1b6ffeba0a2b19f00501f2f7e52a76077", "b293405e9b3cfac8c58083b38bdc85d18dd0c187", "23b201f09b66bc7cbf2cfc1908a4fbc106cfa326", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0706225eeac0f855b19c365313db61252ecde0d7", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "040980f7892c9b562a3847cb97f0808858665070", "4eab97d0d1c75641671aa5b7761978322d904c5c", "7da8f04fa8c21811be41dcc08e3ff5953977652a", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "23ddae93514a47b56dcbeed80e67fab62e8b5ec9", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "1f3f65e0947393e9dc9ed06e546b834f6165fcfa", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4dfdd7cd8abbd68675ea19c5902e5a7d14709799", "14a2ba566f6c8f7f519b299042ccf358361c558f", "090599a2caf4591c87699ad850c75554cd712937", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "78f246756811e924825a03909952d2c32c593a52", "58b628792d3eb22a034a871ed3cf373afe591928", "0608d9937c074520cdc93cc444cc1c77039c5332", "02cbb22e2011938d8d2c0a42b175e96d59bb377f", "3de30c8dafc720bf066e5e3a005d16212dd31149", "09f0751d7452cd0480d572171593d07996325fcb", "ae24289a0ed3152de528f863c96279382b14ae61", "4954fa180728932959997a4768411ff9136aac81", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "086820e40dc8046c30a8751394df167bec047fe1", "28f7f43774bce41023f9912a24219e33612a3842", "230239fb61d7a6996ac9552706363323b34735f2", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "47f5bba54710b0e1663e9336790cb4609d16077d", "11433bebdbb138fce1d40ef014efc252c53c08bc", "626d66d41ebf9a126f0127796f5d81414905c31a", "b7014a268c35e377366634d6b8370a8a7db285a5", "59250c7388caba98bd4adc2f1969fbec5500ed6a", "04e24be4c25539e4b4fa0498c85b3a3e2d026c02", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "10da8673314188dd6ab1f16f73c05358771dd8cf", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "05a436f059c3897c3509dc059903364eff4a79af", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "679cfc41aa5f0174040b5ab23ea92cf04f495a6e", "070c3a8c3ce10277424f23c01a54b377478ee59c", "0ea5ac1eb04bcf16a8856d886be45ec90044a4c3", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4a567958238e3598e2bfa840fb9b3d221e644df8", "061316b7516e20a4d66e7d95b3543eded514ef5d", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "332f77fd05703c1607e3b57884ad31fb1fad0104", "808fadaaa7d7091e95809f419959917bb6ce4a6d" ], "paperAbstract": "We reveal loopholes of Speculative Execution (SE) implementations under a unique fault model: node-level network throughput degradation. This problem appears in many data-parallel frameworks such as Hadoop MapReduce and Spark. To address this, we present PBSE, a robust, path-based speculative execution that employs three key ingredients: path progress, path diversity, and path-straggler detection and speculation. We show how PBSE is superior to other approaches such as cloning and aggressive speculation under the aforementioned fault model. PBSE is a general solution, applicable to many data-parallel frameworks such as Hadoop/HDFS+QFS, Spark and Flume.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131622", "http://ucare.cs.uchicago.edu/pdf/socc17-pbse.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/262c16d1bdd8d0ccef77bd66648144d584a24477", "sources": [ "DBLP" ], "title": "PBSE: a robust path-based speculative execution for degraded-network tail tolerance in data-parallel frameworks", "venue": "SoCC", "year": 2017 }, "2638b231d41f04df54b5383b7e18f832522459d3": { "authors": [ { "ids": [ "1863675" ], "name": "Neville Grech" }, { "ids": [ "2565863" ], "name": "George Fourtounis" }, { "ids": [ "1771411" ], "name": "Adrian Francalanza" }, { "ids": [ "2726878" ], "name": "Yannis Smaragdakis" } ], "doi": "10.1145/3133892", "doiUrl": "https://doi.org/10.1145/3133892", "entities": [ "Apevia", "Call graph", "DACAPO", "Java", "Machine code", "Reification (computer science)", "Static program analysis", "Toolchain" ], "id": "2638b231d41f04df54b5383b7e18f832522459d3", "inCitations": [ "166583d9e65f9ec90326bbaf1bc1f65f11dd259c" ], "journalName": "PACMPL", "journalPages": "68:1-68:27", "journalVolume": "1", "outCitations": [ "4e24aee54000a7a0d9149478f032d807c71b42b1", "041fb17a8de187528529990e43f14280d420002f", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "012d35a0c1958ce1520ce88ba55c7a16629a429a", "7b2f26c1c2eaac02edb549b59ff89d9f2b59c07c", "d1916fecdda925fc856be0a73ea3e7972d4a38b2", "3a7dc1c69d637e632fef71459fc42a37702b9be0", "557555195d8cc631281988661984ed8c4e91b395", "0b53fab8dea434e1046836159e184d9565ffd401", "eafb06ac7202985750ce0a03351c7f4ec41b4521", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "1316b4af762f812052210fd51a18af2681c8dfe0", "6c232627a4992fe9fcafc13e54d39e7de7da5fe5", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "6bd1a113a0256a4d4fbd7c82f7e4e88f70d720d0", "ffc3357d1beb12e399c33415f67d8349b4c0aa5a", "1753d3e97fdbe7799b9625cb873b77eef506a608", "2b5eb50ae8b3258ed3f8985f2c084f99bda74b14", "a578530c785b14f54918720ee4acb672ffe3986e", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "0d57c1222fd2bbf9842fff83ade3efa8b978c690", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "fc1de916fa550c35e57ae8ccbc3874f509db0ca7", "94c387a12572d189ca7d04ea50cdb26e9bd19634", "2554485ffdb8473262ce0cfde401cfdc5b85f3fe", "982cc75e388e3de10faef121ba118f921f94106c" ], "paperAbstract": "Static analyses aspire to explore all possible executions in order to achieve soundness. Yet, in practice, they fail to capture common dynamic behavior. Enhancing static analyses with dynamic information is a common pattern, with tools such as Tamiflex. Past approaches, however, miss significant portions of dynamic behavior, due to native code, unsupported features (e.g., invokedynamic or lambdas in Java), and more. We present techniques that substantially counteract the unsoundness of a static analysis, with virtually no intrusion to the analysis logic. Our approach is reified in the HeapDL toolchain and consists in taking whole-heap snapshots during program execution, that are further enriched to capture significant aspects of dynamic behavior, regardless of the causes of such behavior. The snapshots are then used as extra inputs to the static analysis. The approach exhibits both portability and significantly increased coverage. Heap information under one set of dynamic inputs allows a static analysis to cover many more behaviors under other inputs. A HeapDL-enhanced static analysis of the DaCapo benchmarks computes 99.5% (median) of the call-graph edges of unseen dynamic executions (vs. 76.9% for the Tamiflex tool).", "pdfUrls": [ "http://yanniss.github.io/heapdl-oopsla17.pdf", "http://doi.acm.org/10.1145/3133892" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2638b231d41f04df54b5383b7e18f832522459d3", "sources": [ "DBLP" ], "title": "Heaps don't lie: countering unsoundness with heap snapshots", "venue": "PACMPL", "year": 2017 }, "26454b033fbe554436a20317edf78a7eafbfc6d6": { "authors": [ { "ids": [ "2138184" ], "name": "Adrian Sampson" }, { "ids": [ "1766093" ], "name": "Kathryn S. McKinley" }, { "ids": [ "1731438" ], "name": "Todd Mytkowicz" } ], "doi": "10.1145/3133895", "doiUrl": "https://doi.org/10.1145/3133895", "entities": [ "Application programming interface", "Compile time", "Compiler", "Deployment environment", "Graphics pipeline", "Graphics processing unit", "Graphics software", "High- and low-level", "Low-level programming language", "Metaprogramming", "Multi-stage programming", "OpenGL", "Preprocessor", "Program optimization", "Programmer", "Programming language", "Programming model", "Real-time computing", "Type system" ], "id": "26454b033fbe554436a20317edf78a7eafbfc6d6", "inCitations": [], "journalName": "PACMPL", "journalPages": "71:1-71:27", "journalVolume": "1", "outCitations": [ "0145bad77391bda8fb5b452257f838e38b57a515", "0178ff78d5e2014822d38bf1f62470abadf0dc74", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "11d49f3b3e7b29c380248da76d5c9fcc141308b2", "907d2c011942a78bf6acff8e048f4185d53ff8f2", "1041d3f00afb5f5a53196813ceb2ebfab6d0a6ee", "d18e91ddfd00b2a04cdbbf800f25b3ce12e1c982", "190dcdb71a119ec830d6e7e6e01bb42c6c10c2f3", "36fee45fe83af552ef8a0119f9ed69f050b1389a", "0abaa4c1d1b765c8be7a14204406ae2e7ee5a458", "43646798f334bba75401737909f812ed04acba4c", "50b848b1fa5a15044dfd865e745ebc1347a2bf26", "3783cc16ee7e9216e3a8164b211ffa80efa9b7da", "1c6477bc1b1c7b3767624be6d286d382ce05c211", "6ec92e86a541536d201c9e6270bdd33c792836e0", "67a22c0387c08cc4b0726aa6a59e402928d9e725", "b85944b4417938b7b2cc042e916f85b914d6872d", "4a2d7bf9937793a648a43c93029353ade10e64da", "07c45b5055f38e3519db7c5ba69bec81d77a8415", "06df787923f9b3f0a1ffdc50e8f290d40ecfaa7b", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "14fc4690d837d3ee806dfcef6e930af19699eea2", "66f0c57db9a5fe49645e3ed6eadafd649b004790", "00972bc1b13f01eedeefcf89b84cacf7051afdf1", "a0d71af1d7b64ad2898179cb2850dc5005085d2c", "2042b469be68653afcb2b7b38490c16369b4501a", "0a536ac40d7b9ad9630868d08520063699fd59da", "2da15482d72cc412157186819dc703d036d6e48c", "d802b277131892015086030c8c8354cc69f37e5b", "5f40956aca8e6d9e2f5e29c42440e00af33c0067", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "667519cc4f18e18b83f3c2d04676546e6da63c25", "69a3db7d221174bdd0f99eef30d6f846af1ed20d", "d57b42821ec782b33dd49ee0c37976bbd62d24a4", "01807b2f58610871de46f54cb5ae7bb66cd981ac", "609cddf50f6b745431f1dc94bd71481a3891bc37", "183897b7e57eeeb3a6f33f30276bc02b9fb2159c", "11d2e3d42b77020d34962a9045d5a0d8408efeab", "0928ba0322bcc2540acb6e4bcf2876cfab076e44", "d25d583ec20a0665b8acb7d39f3b5235c122ed49", "596df6fb4d50c7886948b08f525c4e3393d05a44", "06b374421be9312dc9beaf8a3d00c08ab862883b", "2a0134730dafe63f35643b67116ebdc6e68fc9a0", "6ec1b663eaf540f68fe318ae50a13a8e812aeecb", "19bc9d434aebef90ad483dfedbb1a92118a9abb2", "4af558236defe30e286b96570fb7e30492f0df29", "a03ee327f830387f6f99a8b18b73dd04714daa27", "0bdc1a6947943cc942c8186c65d8f397a4f2e2ce", "455625a53634a9978ef48e479c981a67152e0f1f", "1992efda26b659a9ec1e94d6aa9b2c64e5efd4a1", "ab930adcaab93490dd95cf8000a9ba9a24ab2857", "8e297292cf5e0bf28d3f466a1b8cc208a301b9d9", "23cc4d5d3c9018b2043132e27f964c8726d4da3e", "0708edfc0aef46135592ab19601ec5b9b9ca04e6", "269c24a4aad9be622b609a0860f5df80688c2f93", "6b1ea0f5a12f31b42e3d7dad74b9fa8ce861b311", "6e381f6be450a0dea2c21a1ab9791a122403e890" ], "paperAbstract": "Heterogeneous hardware is central to modern advances in performance and efficiency. Mainstream programming models for heterogeneous architectures, however, sacrifice safety and expressiveness in favor of low-level control over performance details. The interfaces between hardware units consist of verbose, unsafe APIs; hardware-specific languages make it difficult to move code between units; and brittle preprocessor macros complicate the task of specializing general code for efficient accelerated execution. We propose a unified low-level programming model for heterogeneous systems that offers control over performance, safe communication constructs, cross-device code portability, and hygienic metaprogramming for specialization. The language extends constructs from multi-stage programming to separate code for different hardware units, to communicate between them, and to express compile-time code optimization. We introduce static staging, a different take on multi-stage programming that lets the compiler generate all code and communication constructs ahead of time. \n To demonstrate our approach, we use static staging to implement BraidGL, a real-time graphics programming language for CPU-GPU systems. Current real-time graphics software in OpenGL uses stringly-typed APIs for communication and unsafe preprocessing to generate specialized GPU code variants. In BraidGL, programmers instead write hybrid CPU-GPU software in a unified language. The compiler statically generates target-specific code and guarantees safe communication between the CPU and the graphics pipeline stages. Example scenes demonstrate the language's productivity advantages: BraidGL eliminates the safety and expressiveness pitfalls of OpenGL and makes common specialization techniques easy to apply. The case study demonstrates how static staging can express core placement and specialization in general heterogeneous programming.", "pdfUrls": [ "http://www.cs.utexas.edu/users/mckinley/papers/shfl-oopsla-2017.pdf", "http://www.cs.utexas.edu/users/mckinley/papers/braid-oopsla-2017.pdf", "http://www.cs.cornell.edu/~asampson/media/papers/braid-oopsla2017.pdf", "http://www.cs.cornell.edu/~asampson/media/braid-oopsla2017-slides.pdf", "http://doi.acm.org/10.1145/3133895", "http://www.cs.cornell.edu/~asampson/media/papers/braid-oopsla2017-preprint.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26454b033fbe554436a20317edf78a7eafbfc6d6", "sources": [ "DBLP" ], "title": "Static stages for heterogeneous programming", "venue": "PACMPL", "year": 2017 }, "2649064305060701a524ecc816e1d969b2a51399": { "authors": [ { "ids": [ "1694403" ], "name": "Cheng Xu" }, { "ids": [ "1683114" ], "name": "Xiaohong Huang" }, { "ids": [ "1699139" ], "name": "Maode Ma" }, { "ids": [ "2090806" ], "name": "Hong Bao" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.54", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.54", "entities": [ "Authentication", "Authentication and Key Agreement (protocol)", "Compaq LTE", "Hoc (programming language)", "Key-agreement protocol", "Non-repudiation" ], "id": "2649064305060701a524ecc816e1d969b2a51399", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "412-418", "journalVolume": "", "outCitations": [ "edcdd0c6f6fd15deef91c5b2f453fb09fb1bc7d6", "37d4416d2cb3d8a248381f9c7265bc5720655e21", "56a86fbcd9f7ca8f2bfee698c6f020f1dc51683e", "41a03e1e9bb04205e19ae6352c9ce7bfd4be0213", "291bcc7bfadb6cf91bde2be0091f0d7b12da9f77", "cad06267700dc16c07ba61c2433c71e57588c6bf", "6286ef5e1b649bb2ecc5601993a77795527d1da3", "5d54789cf1bc86f7b1d624a4f7966070964d0156", "4e1a4dfc2e78c3477f329187d3b765b28c96fc2a", "2b85185e90c011497f78f0df6203b0dd44b8c027", "c10ccd00e2aa60dbfe6b5127dd48b5f3067fb5bd", "19cd416a1fa1ce54efda60dbad27c8089857c4ee", "3ba6dbd4b8dd913be895118601760772f9b18c2d", "2494bffbd811ded937c534c4961b92e7b602d2cf", "0a91a3fbb655163b89ccce2a52f6d45faf431132", "7391ff000fc0c1114fcd69fe6961b7000942d427", "03eee96ab1862b407bfa95d6fe7e22e0878e8021", "306d85311d5d2e4755dceba9154d15559daae201", "1999304a0391777f38899798678efbf264fafa52", "53843cc422eec3c272e640c77fab82e751de74d0", "82a5cb71e84be019d27b4114ef454a1a7965d6e1" ], "paperAbstract": "Vehicular ad hoc networks play an important role in current intelligent transportation networks which have gained much attention from academia and industry. The vehicular networks can be implemented by the LTE/LTE-A networks, which have been formally defined in a series of standards by the third generation partnership projects (3GPP). There are lots of challenges in the authentication processes in the LTE/LTE-A. In this paper, aimed to improve the security functionality of the vehicular networks, a new secure and efficient group authentication and key agreement scheme for the vehicular networks has been proposed, named as the GAKAV. Compared with the existing schemes, the proposed scheme can greatly reduce the amount of control message transmission from mass of vehicular equipment (VEs) to the network and avoid much overhead in the LTE/LTE-A networks. Besides, it has the following security functions including privacy preservation, non-frameability and non-repudiation verification.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2649064305060701a524ecc816e1d969b2a51399", "sources": [ "DBLP" ], "title": "GAKAV: Group Authentication and Key Agreement for LTE/LTE-A Vehicular Networks", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "26988397b2348e6d4bdbe2accebb2ecba96ddb13": { "authors": [ { "ids": [ "3194878" ], "name": "Chaoyun Zhang" }, { "ids": [ "39906431" ], "name": "Xi Ouyang" }, { "ids": [ "2976321" ], "name": "Paul Patras" } ], "doi": "10.1145/3143361.3143393", "doiUrl": "https://doi.org/10.1145/3143361.3143393", "entities": [ "Artificial neural network", "Computation", "Deep learning", "Experiment", "Gigabyte", "Image processing", "Image resolution", "Interpolation", "Provisioning", "Shadow Copy", "Super-resolution imaging", "Traffic analysis", "Video post-processing" ], "id": "26988397b2348e6d4bdbe2accebb2ecba96ddb13", "inCitations": [ "942e9318c44afda4399fc1cd0f0f2a00f5c65a87", "6cae9ad284a73471a8ed9e483b1673a60d61d946" ], "journalName": "", "journalPages": "363-375", "journalVolume": "", "outCitations": [ "2f45a46072455fbc52f781efea98c669ad7a6658", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "11c9aefb2fa45b9fd3292454ff8de134cfd1c6b1", "56142d781c2c1231ffbda59efb6f96fc7b5b5b52", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "cd3b315857e6a6a9d93e46cd54b402486c939ab9", "8dd53ef110dab30b9f850817c0ef350068191e94", "2e5dbe5b7b4b9840bbeec38086bab4bafe41133a", "105713f44bf88978aae6c87fbc708428b47c29c4", "05d3657ef0a94f1d0ad8291d8f2fb3b22aac782a", "44009373438452dd827cc9904301176c41ae85bb", "2a24b68ef180c0c8742bd494a55fb6f68864efed", "6c6d121c7fa80c8de52211427dc2615bd57c0f00", "6a944a7301fd55d976db6156a5daa310e0c2c4c9", "5e2ccb1fe781f474794a6323a5a7ac77cf55dd50", "3d64fd97ce6bcdc34a585a88f76d7aec294fed53", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "8ecd57162eef92a2832fa6dc67c0c877fdc18145", "0a1dc95e4c884a91bd141df8133d1b4961178123", "09b8120cbc52e7df46122e8e608146289fddbdfa", "061356704ec86334dbbc073985375fe13cd39088", "4288dd45bea2c630d2a2da06ec99077a2f421cf5", "0f0caece2fed6fc689b01f6d4521dca9f44c5dde", "367f2c63a6f6a10b3b64b8729d601e69337ee3cc", "272216c1f097706721096669d85b2843c23fa77d", "c58b7466f2855ffdcff1bebfad6b6a027b8c5ee1", "e8bdb83765c91fa8380c214cc4fb3fe8d2ff1e99", "3c8452ad39432b5f20e88c86bd7364527d24fdac", "9cecfbc04a8bf296bfe067810385458146a3b878", "6b7cbaa346b8f99b2d6679f24056aec0e6cca4e0", "11b012e76e5587cdcc310af57934cb9a8d32e4ff", "01fcae344d2edb715bcc63a40b6052c0331741bd", "4a9a98cc86b1e07e548b7edee045275a793f6698", "ca5766b91da4903ad6f6d40a5b31a3ead1f7f6de", "0cace3a97aad08fe7d1a5053f8b729f5af0dd688" ], "paperAbstract": "Large-scale mobile traffic analytics is becoming essential to digital infrastructure provisioning, public transportation, events planning, and other domains. Monitoring city-wide mobile traffic is however a complex and costly process that relies on dedicated probes. Some of these probes have limited precision or coverage, others gather tens of gigabytes of logs daily, which independently offer limited insights. Extracting fine-grained patterns involves expensive spatial aggregation of measurements, storage, and post-processing. In this paper, we propose a mobile traffic super-resolution technique that overcomes these problems by inferring narrowly localised traffic consumption from coarse measurements. We draw inspiration from image processing and design a deep-learning architecture tailored to mobile networking, which combines Zipper Network (ZipNet) and Generative Adversarial neural Network (GAN) models. This enables to uniquely capture spatio-temporal relations between traffic volume snapshots routinely monitored over broad coverage areas ('low-resolution') and the corresponding consumption at 0.05 km2 level ('high-resolution') usually obtained after intensive computation. Experiments we conduct with a real-world data set demonstrate that the proposed ZipNet(-GAN) infers traffic consumption with remarkable accuracy and up to 100X higher granularity as compared to standard probing, while outperforming existing data interpolation techniques. To our knowledge, this is the first time super-resolution concepts are applied to large-scale mobile traffic analysis and our solution is the first to infer fine-grained urban traffic patterns from coarse aggregates.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/46487575/zipnetgan_1.pdf", "http://doi.acm.org/10.1145/3143361.3143393", "https://arxiv.org/pdf/1711.02413v1.pdf", "http://arxiv.org/abs/1711.02413" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26988397b2348e6d4bdbe2accebb2ecba96ddb13", "sources": [ "DBLP" ], "title": "ZipNet-GAN: Inferring Fine-grained Mobile Traffic Patterns via a Generative Adversarial Neural Network", "venue": "CoNEXT", "year": 2017 }, "26ab5cfb06c19c80daa9453184ca317b4eac2833": { "authors": [ { "ids": [ "3376608" ], "name": "Dharma Teja Vooturi" }, { "ids": [ "3235932" ], "name": "Kishore Kothapalli" }, { "ids": [ "30999299" ], "name": "Upinder Singh Bhalla" } ], "doi": "10.1109/HiPC.2017.00051", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00051", "entities": [ "Action potential", "Algorithm", "Automatic parallelization", "Domain decomposition methods", "Experiment", "Graphics processing unit", "Linear equation", "Machine learning", "Mathematical model", "Neuron", "Parallel algorithm", "Recursion", "Scalability", "Simulation", "Speedup", "System of linear equations" ], "id": "26ab5cfb06c19c80daa9453184ca317b4eac2833", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "388-397", "journalVolume": "", "outCitations": [ "1ac1ea00886bba6ca2dbb33e04f8d432c506a66e", "8c56eccb0b857de4c5ebe68d444853171e1a8fe9", "c8b45db64492147745f025f3e8a1ad4f10a01ccd", "5be8dbd329f80f1179b527bc0e73be66956562e3", "29fc77b99289ccd2353dbc7a1b56e368ecbc7845", "6e50a514285711bea078c1da1c8193f1ffeec50e", "b67ad519057ee99a0a149f07a94e77eeab2e07e9", "d6d09c1889b8700c8041c745221cecf24e9d9ccf", "490bd661404359cbdd7bf6e5429f56cdcfe25b30", "3fea4d72858d106e37e43cc68a5a55920c000183", "4606678e5ca6e81c45ca5b48ad0de1913fe194f9", "bd822f45e128718e9a7035408e6064e8f8535b38", "c0bb22f544a98a9587a53322d381209cdf8e1443", "58d07607dd35c39fefffd373d80f2a77ab18f5c7" ], "paperAbstract": "Hines matrices arise in the simulations of mathematical models describing initiation and propagation of action potentials in a neuron. In this work, we exploit the structural properties of Hines matrices and design a scalable, linear work, recursive parallel algorithm for solving a system of linear equations where the underlying matrix is a Hines matrix, using the Exact Domain Decomposition Method (EDD). We give a general form for representing a Hines matrix and use the general form to prove that the intermediate matrix obtained via the EDD has the same structural properties as that of a Hines matrix. Using the above observation, we propose a novel decomposition strategy called fine decomposition which is suitable for a GPU architecture. Our algorithmic approach R-FINE-TPT based on fine decomposition outperforms the previously known approach in all the cases and gives a speedup of 2.5x on average for a variety of input neuron morphologies. We further perform experiments to understand the behaviour of R-FINE-TPT approach and show its robustness. We also employ a machine learning technique called linear regression to effectively guide recursion in our algorithm.", "pdfUrls": [ "http://web2py.iiit.ac.in/research_centres/publications/download/inproceedings.pdf.8dd334c410c68e02.70617065722831292e706466.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00051" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26ab5cfb06c19c80daa9453184ca317b4eac2833", "sources": [ "DBLP" ], "title": "Parallelizing Hines Matrix Solver in Neuron Simulations on GPU", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "26ab9c27d995dadd553614045361ffb1afba9008": { "authors": [ { "ids": [ "3174343" ], "name": "Ethan Heilman" }, { "ids": [ "3413078" ], "name": "Leen Alshenibr" }, { "ids": [ "1781543" ], "name": "Foteini Baldimtsi" }, { "ids": [ "1775686" ], "name": "Alessandra Scafuro" }, { "ids": [ "1743586" ], "name": "Sharon Goldberg" } ], "doi": "", "doiUrl": "", "entities": [ "Bitcoin", "Ecash", "Elliptic curve cryptography", "Ethernet hub", "Money", "Programming paradigm", "Random oracle", "Tumbler (Project Xanadu)" ], "id": "26ab9c27d995dadd553614045361ffb1afba9008", "inCitations": [ "5f80c78259e53442c59b870fb4f4727b67f540ff", "195f2d9d2e6282b31ffd320c2ceda9661d806927", "68652fad5599cf45f2d3db69dd5ca756446f9747", "3e2e76a31195fdbd56f80b941021292572eea6ec", "5f0db49bc309c6f82dec7368a1adb9bde4363b1c", "24f036498862dba97036df9c26de066c75e843c2", "892fe3dfc6d7d45475c818585e086016a268b345", "b46fb814cee6f8c49fcfd63d945b03f3a07c0f16", "5c79ea3451e002a8197eb00d6090a356165dac7e", "f234f428eb552b94435683e7e784e805c201d309", "c37ee0ba89c24abb08d88f38f464ed1b8cf401b7", "1f14c479f0506fa18e670c1b28b697b9aedeba75", "4d10e1338d4fca9085c96f8bf0d076eca67f26d7", "0ad2bb8a12a45df3c17f6f1ec20cdc58d056567f", "11c8549800acb66ad24e3ce2d65854a9b7634f8a", "6c39f2c252d095ab9d4a398fa66706c901387683", "220eb711e0efd2c67759f169ff14ba3efc186bbe", "7ab74b4e4c11626c2642fcb95342c9c318dbfdca", "0aacfddf9cb22e661302ec77cc251ccafd5f8c71", "bfe64f013aeed4ca2b0eaee20564f3806fc4375a" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "4123e9fecfc01c3cf32fb3d59ed1566ee0856874", "46cd7e1d4231e47873f3eb4e26ab73187deb5437", "c773c64ab52f702ae0aaba8c35b72dde471ea04a", "01f187c3f0390123e70e01f824101bf771e76b8f", "51b27a41ca1a33445a1041fcea84341fcf0b8c4c", "a9267bc516da5940740d95664c562b6fa14d4b34", "396a7b3289504052e115d65cf7a20ccb4e2c52ca", "723455474fc1e953bc7fad1169d52f178584af4e", "00f763e99bd9d1aa45350536d480e05851a055eb", "35fe18606529d82ce3fc90961dd6813c92713b3c", "139cfb65d375bba4ca59acc19efb0b7ac99247dc", "4284153a0bf0aa3d0f94ad3113f4d117e4767bef", "244ddba27efef35bac9b01d5b1780922f5f33ec4", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "cdb352365403a6ad09cdd00232def337df0a1b96", "049e2c54fe8a35cd941937ba592e07bbc2dda591", "3ba8fdb64059d322edaed63210829ba74ea2a571", "d48a08efef529856bfbaab8d5f3ed5ca300dcafa", "19bab496d5d7f60d3e5b9217739b9cf7fedaf44b", "15167da8d35184d062b988b5a6807e0fa72cd77f", "195f2d9d2e6282b31ffd320c2ceda9661d806927", "11a651253f8603c01ed29c00c76673a67bd291c7", "c27684f2fe5a85fe2871f693edc46061d0ecb20d", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "ee798a54f7eccbe5e42cc9460ea7240b3eb084bf", "19c3736da5116e0e80a64db35afe421663c4b4a8", "93bae7155092c8ba1ae1c4ad9f30ae1b7c829dd7", "33853565b4dcad38b9b79091a48d3f40409f06d7", "14829636fee5a1cf8dee9737849a8e2bdaf9a91f", "5cbfe46f4b026f8dee4afb1e788236b3fdf08b81", "1b23cd2050d5000c05e1da3c9997b308ad5b7903", "03250ef8dfa44bb26a43df0e7e846324286e35e5", "aa6136b744e95e7cf04e3ea841e5cb19b4d97275", "081e614534e6f7ae2ab0ea4d771eb7490528da7d", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "10336cdef674893f41bf4824d44c4156be5e9ca2", "019b5b8e54b10860d39dac8f449c9d3db173527b", "3ff1793ac5036dbc68b669ac43d4b0c235ea0745", "16b750e3232f7edc59379fa788e8d22b563309d6" ], "paperAbstract": "This paper presents TumbleBit, a new unidirectional unlinkable payment hub that is fully compatible with today\u2019s Bitcoin protocol. TumbleBit allows parties to make fast, anonymous, off-blockchain payments through an untrusted intermediary called the Tumbler. TumbleBit\u2019s anonymity properties are similar to classic Chaumian eCash: no one, not even the Tumbler, can link a payment from its payer to its payee. Every payment made via TumbleBit is backed by bitcoins, and comes with a guarantee that Tumbler can neither violate anonymity, nor steal bitcoins, nor \u201cprint money\u201d by issuing payments to itself. We prove the security of TumbleBit using the real/ideal world paradigm and the random oracle model. Security follows from the standard RSA assumption and ECDSA unforgeability. We implement TumbleBit, mix payments from 800 users and show that TumbleBit\u2019s offblockchain payments can complete in seconds.", "pdfUrls": [ "http://computing.boisestate.edu/wp-content/uploads/2017/11/Scafuro-2017-Colloquium-Flier.pdf", "http://eprint.iacr.org/2016/575.pdf", "http://diyhpl.us/~bryan/papers2/bitcoin/TumbleBit:%20An%20untrusted%20bitcoin-compatible%20anonymous%20payment%20hub%20-%202016.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_01-3_Heilman_paper.pdf", "https://eprint.iacr.org/2016/575.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/tumblebit-untrusted-bitcoin-compatible-anonymous-payment-hub/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fab2/be876aedf54c6de78192cce217c248544012.pdf", "s2Url": "https://semanticscholar.org/paper/26ab9c27d995dadd553614045361ffb1afba9008", "sources": [ "DBLP" ], "title": "TumbleBit: An Untrusted Bitcoin-Compatible Anonymous Payment Hub", "venue": "NDSS", "year": 2017 }, "26b4bb926d9928bcf1c3e8916c8674ef2dcf6fb2": { "authors": [ { "ids": [ "1792062" ], "name": "Jasmina Malicevic" }, { "ids": [ "2526126" ], "name": "Baptiste Lepers" }, { "ids": [ "1711100" ], "name": "Willy Zwaenepoel" } ], "doi": "", "doiUrl": "", "entities": [ "Adjacency list", "Algorithm", "Computation", "Data structure", "Graph (abstract data type)", "In-memory database", "Locality of reference", "Multi-core processor", "Out-of-core algorithm", "Preprocessor", "Radix sort", "Run time (program lifecycle phase)", "Social network" ], "id": "26b4bb926d9928bcf1c3e8916c8674ef2dcf6fb2", "inCitations": [ "13e388ab3495d313ae6838b26e8d34517a67e698" ], "journalName": "", "journalPages": "631-643", "journalVolume": "", "outCitations": [ "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "7f598b081df60565014cd943e4512710b682b734", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "74e6b114822b712c100c7ffd1b01f4fb1564bd28", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "63ea74539eb963d0be2672dcb5c29a52987c4b2b", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "3ebf3857a60c3e224284bbbe6c7127d0a12c546d", "4e8e3e40a25fba903f40246705c3beb3c122f523", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "3486aeaf540c48952120fe853d672af984f40a6a", "c3008dd707e4dfd43606a544d4cac4bf1f081f2b", "eb82d3035849cd23578096462ba419b53198a556", "222e7666dd37307b600b0a3ebc9b6d28fb51d6e9", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "175d795f44037ef60dd9df341701cd5fdc449f1f", "282bc59faefb734137d2ea978cb1eb5699e67c7c", "0ad8e89091eed09217e66adc98136126addc2619", "0608d9937c074520cdc93cc444cc1c77039c5332", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "e8bdeb57f2239b26a4dacfc63c331237d162e532", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "0706356c9ab6014d6b04577d38289ea8328291a5", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "332f77fd05703c1607e3b57884ad31fb1fad0104", "0f34ea8535dc5833a1a3692ffc7abc6740d2406a", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "3231d62bec8e8cc1d837e85893889855767c3b13", "ef8d3a389410124d21dfda44295de8af786f5516" ], "paperAbstract": "Graph processing systems are used in a wide variety of fields, ranging from biology to social networks, and a large number of such systems have been described in the recent literature. We perform a systematic comparison of various techniques proposed to speed up in-memory multicore graph processing. In addition, we take an endto-end view of execution time, including not only algorithm execution time, but also pre-processing time and the time to load the graph input data from storage. More specifically, we study various data structures to represent the graph in memory, various approaches to pre-processing and various ways to structure the graph computation. We also investigate approaches to improve cache locality, synchronization, and NUMA-awareness. In doing so, we take our inspiration from a number of graph processing systems, and implement the techniques they propose in a single system. We then selectively enable different techniques, allowing us to assess their benefits in isolation and independent of unrelated implementation considerations. Our main observation is that the cost of pre-processing in many circumstances dominates the cost of algorithm execution, calling into question the benefits of proposed algorithmic optimizations that rely on extensive preprocessing. Equally surprising, using radix sort turns out to be the most efficient way of pre-processing the graph input data into adjacency lists, when the graph input data is already in memory or is loaded from fast storage. Furthermore, we adapt a technique developed for out-of-core graph processing, and show that it significantly improves cache locality. Finally, we demonstrate that NUMA-awareness and its attendant pre-processing costs are beneficial only on large machines and for certain algorithms.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-malicevic.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/malicevic", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_malicevic.pdf", "https://infoscience.epfl.ch/record/228854/files/atc17-final234.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9aa2/e6ea92ca2f7cfd0458225af347c31eb066b9.pdf", "s2Url": "https://semanticscholar.org/paper/26b4bb926d9928bcf1c3e8916c8674ef2dcf6fb2", "sources": [ "DBLP" ], "title": "Everything you always wanted to know about multicore graph processing but were afraid to ask", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "26b5be9d7327d095f5adff76134b0bbc914f56f2": { "authors": [ { "ids": [ "2540772" ], "name": "Ting-Jung Chang" }, { "ids": [ "26882130" ], "name": "Zhuozhi Yao" }, { "ids": [ "2097585" ], "name": "Paul J. Jackson" }, { "ids": [ "3378176" ], "name": "Barry P. Rand" }, { "ids": [ "1752172" ], "name": "David Wentzlaff" } ], "doi": "10.1145/3123939.3123980", "doiUrl": "https://doi.org/10.1145/3123939.3123980", "entities": [ "Computation", "Computer architecture", "Pipeline (computing)", "Semiconductor", "Simulation", "Standard cell", "Superscalar processor", "Thin-film transistor", "Transistor" ], "id": "26b5be9d7327d095f5adff76134b0bbc914f56f2", "inCitations": [], "journalName": "", "journalPages": "706-717", "journalVolume": "", "outCitations": [ "bf2522e2fd6a2deab46367a3391d4cf04271d133", "65bb9afe0a62462a4efb31fcec0e882a51a63264", "22b77d7ebd303019378f188dd25bbdc931c100d8", "7030ac77faacc1279535b654bbe6df619802fe73", "19ee7afaf66d895eca9cb2ee0c0a87ad6398984d", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "8947cb567a18464874d1304cee43c1cffaaa837e", "9574765c92679754d4269cbe86f4684419680bb3", "b354a7a471e810b11f3d9ea5ecabfc51e0c54f9c", "649f417531ac7b1408b80fb35125319f86d00f79", "5853ed1b5ba517343cbcc92db2f6f333a432eb1a", "d89f7247767b7ac18dd944e9d2ff50c6a50aea13", "7007306713ecc5e0add5d05e956c030c85978b33", "6671069172327bdcade96abda12a25b9122e6192", "732a4a6079ada1472842122129affe90a1cc1e8c", "dec66229735a8384a32797414c368d713826e8e0", "33bf8419196e08ec2b20f4a12bb905d39bc24290", "26a635be4fc593b5fb6ec6bf11b03634e803f311", "1080c001b25063e0265f13df89c50acfc01cb787", "db5aa66ec7e20068d4c5d26f6002838f9a49d349", "a8da95fbcba0b96860d1448a6c9949cbf06cceb0", "11effc3c9847097fc2aec129601ce2b622b289a5", "79eb9473b28735cd42eee4298201b8a703bcd7c3", "9935ed5c26ccce075a1e791c4bb82389140283b5", "a4f74a903049ef7afa2fefb2d48b8b7757ea0436", "8a2df677e87c10e04d724ed4940ebe41138028d4", "e7e048707845ba66e4cb6dd30ad2d4fb23a795c0", "5301e5839c6d3eb3fc63dc79e28bb7b6fbb313f0", "fac9f581c1abe88acbcd445fec63a1aafe4cd5d9", "b4e7f4b5f150688370aaab24f89d69b26dfbbd7a", "11f65891ccc5d22878079540a6dfb6c1876a1903", "a6d3baaa5262986d0e68f947320dde0a83882dc0", "8f99de851ee207ae7231a8f6bc8b895aca63f7e6", "2dc1e4c4e9970f0c7ea2877c2be35a8e2eaa2297", "3464e4bac7c5f0c5a6c5a6554c7717404265b6bb", "8d0032611cfbe61d978d22bec60b7e113a9c7d08", "3b2b40a7cf7499f48f5f6afe569a785f8e7da985", "cd92bdedf9aaab30669cf303e286c4d357f136fa", "1016f5667bb524675d2f7207abeb098b068a1144", "94c4036ccebd7784a9d12b846cfde936fc490611", "520edc60a365fc08a34368149df8beb5fabb595f" ], "paperAbstract": "Organic thin-film transistors (OTFTs) have attracted increased attention because of the possibility to produce environmentally friendly low-cost, lightweight, flexible, and even biodegradable devices. With an increasing number of complex applications being proposed for organic and biodegradable semiconductors, the need for computation horsepower also rises. However, due to the process characteristic differences, direct adaptation of silicon-based circuit designs and traditional computer architecture wisdom is not applicable.\n In this paper, we analyze the architectural tradeoffs for processor cores made with an organic semiconductor process. We built an OTFT simulation framework based on experimental pentacene OTFTs. This framework includes an organic standard cell library and can be generalized to other organic semiconductors. Our results demonstrate that, compared to modern silicon, organic semiconductors favor building deeper pipelines and wider superscalar designs. To the best of our knowledge, this is the first work to explore the architectural differences between silicon and organic technology processes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123980", "http://parallel.princeton.edu/papers/micro17-chang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26b5be9d7327d095f5adff76134b0bbc914f56f2", "sources": [ "DBLP" ], "title": "Architectural tradeoffs for biodegradable computing", "venue": "MICRO", "year": 2017 }, "26c08f006ffad89e2d76f138373ce41942d7fd98": { "authors": [ { "ids": [ "2212753" ], "name": "Xuewen Cui" }, { "ids": [ "2125407" ], "name": "Thomas Scogland" }, { "ids": [ "1687845" ], "name": "Bronis R. de Supinski" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" } ], "doi": "10.1109/IPDPS.2017.96", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.96", "entities": [ "CUDA", "Code refactoring", "Computation", "Field-programmable gate array", "Graphics", "Graphics processing unit", "Naivety", "OpenACC", "OpenCL API", "OpenMP", "Pipeline (computing)", "Scheduling (computing)", "Speedup", "Supercomputer" ], "id": "26c08f006ffad89e2d76f138373ce41942d7fd98", "inCitations": [ "3b603228bf9419868e7518614c85338b7a132989" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "575-584", "journalVolume": "", "outCitations": [ "5f3cce1bc739ebfc03e003010d3438bb318efc14", "e423ed8b4824b6b63ac558ebb1332ccb5954a7ba", "2373aa844bdc6c2cf3e07e291f3090b7f5b941ad", "034b5cb0eb2506096ae6f30790834b4af0da9158", "c251fc6c99d8b515f3f0844604a21af92cce647f", "145ba8f0070a5e2ad061b358a66762ce1765c241", "47f05344d0d5fd252ebf645dddb8a1c5118cffc6", "679541d90bcfb71019c7407b4c408a80e88db99d", "3000c714a7d5afad6d3b498f3505685890fade46", "31db730ff99fb6d7d32569473b8136e81a7b1d7f", "ee1011d3de50c376361468daf6e40d567a18b3e5", "84993fad8fabe361cf5d69e05ac5045cf6ac99ae", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "4dbdfcf09af83ffb0a46989b981357540113f0fa", "6348bb3b140c47ea29621d1dc5218db52433840b", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "2bb29fce377e1ec9024ea7c45fd40fa178922602", "ddaeb6c5b5fe0154e3b8f7b4707825f900512993", "44673760887d14a7e8516f8b487a662785ee1480", "e44585b020c93b6755fd9637d235d08b72d8fb7c", "ba1d069dd686d0fb44a56778652ca5b7259d5ae5", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "5d70bd2207d2d28c9c7c284a8ac3ca5b7a6b016c", "b7e3dec8a041102c6febb9a9ba46d45da8f2e787", "03daf2d17337f000538d9d4727fa49d52bdb922c", "2868d51b26c3e2a7dd237af7c6809388db76ed4f", "a4bb437b1452d4a2b513c288bacee071d9050c88", "7fed9ac269bc998483fefd5480b7a3c209b890f2", "f2560793c90159397adbbed07da439f180289fbd" ], "paperAbstract": "The community needs simpler mechanisms to access the performance available in accelerators, such as GPUs, FPGAs, and APUs, due to their increasing use in state-of-the-art supercomputers. Programming models like CUDA, OpenMP, OpenACC and OpenCL can efficiently offload compute-intensive workloads to these devices. By default these models naively offload computation without overlapping it with communication (copying data to or from the device). Achieving performance can require extensive refactoring and hand-tuning to apply optimizations such as pipelining. Further, users must manually partition the dataset whenever its size is larger than device memory, which can be especially difficult when the device memory size is not exposed to the user. We propose a directive-based partitioning and pipelining extension for accelerators appropriate for either OpenMP or OpenACC. Its interface supports overlap of data transfers and kernel computation without explicit user splitting of data. It can map data to a pre-allocated device buffer and automate memory-constrained array indexing and sub-task scheduling. We evaluate a prototype implementation with four different applications. The experimental results show that our approach can reduce memory usage by 52% to 97% while delivering a 1.41× to 1.65× speedup over the naive offload model.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.96", "http://synergy.cs.vt.edu/pubs/papers/cui-pipelining-ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26c08f006ffad89e2d76f138373ce41942d7fd98", "sources": [ "DBLP" ], "title": "Directive-Based Partitioning and Pipelining for Graphics Processing Units", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "26d0ed88edde3a265169b37b404cabb78f5b84b4": { "authors": [ { "ids": [ "13167100" ], "name": "Zhiyong Cheng" }, { "ids": [ "38203359" ], "name": "Jialie Shen" }, { "ids": [ "1743245" ], "name": "Liqiang Nie" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" }, { "ids": [ "1744045" ], "name": "Mohan S. Kankanhalli" } ], "doi": "10.1145/3077136.3080772", "doiUrl": "https://doi.org/10.1145/3077136.3080772", "entities": [ "Algorithm", "Cloud computing", "Experiment", "Information retrieval", "Information theory", "Mobile computing", "Streaming media", "Text-based (computing)", "User agent", "User interface", "User-centered design" ], "id": "26d0ed88edde3a265169b37b404cabb78f5b84b4", "inCitations": [ "262a5a784eedb127490f44736844a33123b30fe0", "6cd4697795f9c990c7bd3c867a442abee2858abd" ], "journalName": "", "journalPages": "655-664", "journalVolume": "", "outCitations": [ "6278c908f7086d75e4074ec1158b730ba8affd7d", "a32b93e5337318a0e26982c219fd9927a46f7f5a", "83fe6bdf3bb32361d5812570ceed2a36d7bd5046", "52227cd6e3b3ffe71d2445b3d95db33457215cde", "4c5ba940d010ba7fe338afc6765b61254c7f52b2", "206965f60256024eb95a3d04a42313421b14121a", "1187d5b34fda59fc1e65b281d37e6a03aa886043", "0028af619198dea14559e28973053df5261d8231", "0cb3e8e756e071d01dddfd2cd51d27846ab43684", "86ececa5aca5bf43442a84723a2c4b88ceccecc9", "17b67e902819b055d2f385519590fe8e2b2148f2", "e0e1a359c0617be201d4e9ddfd021ed950dc0940", "491b57680a448ba646005e2357a6642015d0dae6", "141c66ef785e3cc765de90d8bb7b43598a9a8576", "6683426ca06560523fc7461152d4dd3b84a07854", "61181e71ca1b899b5fdaaac24daac2463b3e6c96", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "270c147c9b651a2e85e795675f80afed2c54c5fb", "39b6331a3bbebeaed41b567b0050eee445670073", "08c367ba58e67cd28f07976ca3529d5eeb591da1", "7f43387718ffbc98396bd1914ede6fb3fdb6751f", "5ca28c48f5e479c1f76e999b20b83f929952a966", "ba8224e4597003553de4aa5d4905e6e52955108f", "a7ef69e55244e3fa0b065746d596441103b293a5", "39210c16f77f7018007dfa20dc7a7b615b73303b", "90cd005a097750cdb020cafa0470ab3d41219afb", "7ca2ae03ac46053f5e7291fa37c0fd99ce4e9fb5", "4ee4fc69240ee6c062aeafbede9e96aa0a8596bf", "0cd6c70ac57b796c12bd59229ea901a77ce8f066", "8f9cb27ecdb52da08cbf6798edbcb91d2a818e5c", "32f57bb6a476e60e2f4432375534b9247656d5f9", "38351d2e2f09190cadb643fa8d3c3ae3963ec682", "4e9d2433b60f17c6fba15a144a0fbecf2ddb722a", "099d85f25e9336f48ff64287a4b53ee5fb64ab51", "7584a5c32600fadcc943809b5e788719910a4547", "1960c7d4365c1165283fc2304be7d09a853fb33d", "0fdb667b86b3b1b2175ed2a84e6358137652ca47", "438c0d86f451a4a853e741ef5876c2b197f0ca9f", "bc1f9209b134f149ab611dba116e52dbbb3e0900", "87d907a114409755ecd3c6886585de26a4e17ffe", "0cf464e413e1e051b535d4d42de4ecd49853053e", "c71ac3664d437546ee2ffc9e7ed496a34e694c19", "ee9b266b519c2d851cc51b33c40609a4a84a79d3", "3a8096d475468d939d36a2ad15c2044f6f21917b" ], "paperAbstract": "With the advancement of mobile computing technology and cloud-based streaming music service, user-centered music retrieval has become increasingly important. User-specific information has a fundamental impact on personal music preferences and interests. However, existing research pays little attention to the modeling and integration of user-specific information in music retrieval algorithms/models to facilitate music search. In this paper, we propose a novel model, named User-Information-Aware Music Interest Topic (UIA-MIT) model. The model is able to effectively capture the influence of user-specific information on music preferences, and further associate users' music preferences and search terms under the same latent space. Based on this model, a user information aware retrieval system is developed, which can search and re-rank the results based on age- and/or gender-specific music preferences. A comprehensive experimental study demonstrates that our methods can significantly improve the search accuracy over existing text-based music retrieval methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080772" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26d0ed88edde3a265169b37b404cabb78f5b84b4", "sources": [ "DBLP" ], "title": "Exploring User-Specific Information in Music Retrieval", "venue": "SIGIR", "year": 2017 }, "26f56d7a326bce3961d50bde268590f8c562d883": { "authors": [ { "ids": [ "38751321" ], "name": "Ahmed Saeed" }, { "ids": [ "2328708" ], "name": "Nandita Dukkipati" }, { "ids": [ "2591679" ], "name": "Vytautas Valancius" }, { "ids": [ "37427906" ], "name": "Vinh The Lam" }, { "ids": [ "9753146" ], "name": "Carlo Contavalli" }, { "ids": [ "1718754" ], "name": "Amin Vahdat" } ], "doi": "10.1145/3098822.3098852", "doiUrl": "https://doi.org/10.1145/3098822.3098852", "entities": [ "Algorithm", "Blocking (computing)", "Central processing unit", "Data center", "Limiter", "Network congestion", "Network switch", "Non-blocking algorithm", "Protocol stack", "Rate limiting", "Router (computing)", "Scalability", "Server (computing)", "Stock and flow", "Traffic shaping" ], "id": "26f56d7a326bce3961d50bde268590f8c562d883", "inCitations": [ "4d706ee0027880679a5358aef4e8feba58a53718", "9bbd5be2829e49b1fac7f034baf7499cb069db95" ], "journalName": "", "journalPages": "404-417", "journalVolume": "", "outCitations": [ "3013e17706df21957f53579f53fc67967f3bb548", "2de63b0c867b290d4f7217459c968aa98e5ad39d", "438110dc02f39f221896847a4d0e24f88e130598", "1c68a88cae1e486e14925637a3acb82164927ac5", "47d5357957cabb610131db1b228e58b70860ee8d", "31a3411d2e9748fe3590ae005b79634004c5e5d0", "0a142c84aeccc16b22c758cb57063fe227e83277", "132f00de21cee656d00ad6779f1926070ad59544", "1aafc7066e52f18dee78103822da24a5d85da93c", "46c79404092692c2c4eb8209d3301e2000b83212", "08417bc2a45e668699ae4805e809dd1ccd567753", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "52ece1e929758e9d282e818e8e9985f88570f2dd", "25ded9f81378f6b85daf5a70c85bbadfb84ebc3d", "5c39e0414c7de24268adb6c219c12f142a6343ff", "0d3f85933b6355789588476e491683532c68a906", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "03e216973f65fcee6e4e761592c3817386faa052", "0baf1bef6ee3bcb0b385a4ac303dcf0b406c64f4", "0e2249e3b0cd1fa9a7e0eee847b58be1cf2ec707", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "20165e23266d4753183a8a584a4364ebc3de1c7f", "8c9a91b774fcc126db7ce7c67bd97d1d16143932", "327a02b19a60319cc35be860ad0259a5c1aef920", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "42d1b52254873ecd0f36eb7342f95dbad9c50187", "404480f20069c176992986af17178d86ccae47a1", "0edd07551910c48f90fa07f7c5da50c8211fb994", "1b1d5091f1325752daf6931a086cd7a876d46c69" ], "paperAbstract": "Traffic shaping, including pacing and rate limiting, is fundamental to the correct and efficient operation of both datacenter and wide area networks. Sample use cases include policy-based bandwidth allocation to flow aggregates, rate-based congestion control algorithms, and packet pacing to avoid bursty transmissions that can overwhelm router buffers. Driven by the need to scale to millions of flows and to apply complex policies, traffic shaping is moving from network switches into the end hosts, typically implemented in software in the kernel networking stack.\n In this paper, we show that the performance overhead of end-host traffic shaping is substantial limits overall system scalability as we move to thousands of individual traffic classes per server. Measurements from production servers show that shaping at hosts consumes considerable CPU and memory, unnecessarily drops packets, suffers from head of line blocking and inaccuracy, and does not provide backpressure up the stack. We present Carousel, a framework that scales to tens of thousands of policies and flows per server, built from the synthesis of three key ideas: i) a single queue shaper using time as the basis for releasing packets, ii) fine-grained, just-in-time freeing of resources in higher layers coupled to actual packet departures, and iii) one shaper per CPU core, with lock-free coordination. Our production experience in serving video traffic at a Cloud service provider shows that Carousel shapes traffic accurately while improving overall machine CPU utilization by 8% (an improvement of 20% in the CPU utilization attributed to networking) relative to state-of-art deployments. It also conforms 10 times more accurately to target rates, and consumes two orders of magnitude less memory than existing approaches.", "pdfUrls": [ "https://www.cc.gatech.edu/~amsmti3/files/carousel_sigcomm_final.pdf", "http://www.cc.gatech.edu/~amsmti3/files/carousel-sigcomm17.pdf", "http://doi.acm.org/10.1145/3098822.3098852", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-9-4-carousel.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26f56d7a326bce3961d50bde268590f8c562d883", "sources": [ "DBLP" ], "title": "Carousel: Scalable Traffic Shaping at End Hosts", "venue": "SIGCOMM", "year": 2017 }, "26f753a7d8304922dff1f1b52f8f5fc30451497a": { "authors": [ { "ids": [ "7792071" ], "name": "Xiangnan He" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080777", "doiUrl": "https://doi.org/10.1145/3077136.3080777", "entities": [ "Artificial neural network", "Deep learning", "FM broadcast band", "FM broadcasting", "Feature vector", "Interaction", "Machine learning", "Nonlinear system", "One-hot", "Resultant", "Sparse matrix", "Web application" ], "id": "26f753a7d8304922dff1f1b52f8f5fc30451497a", "inCitations": [ "22873d98ff3f7b3b5490f3982c3fe0c0c5d665c7", "659701d68f3e5b517f1e7cd40d42a1b214acf1ad", "6cd4697795f9c990c7bd3c867a442abee2858abd", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "081ad92ce0e71541646218f11061c86414a960c2", "d097d75077ff9c626f74b0413a690011dcb56bee", "282f8120366629829149a0ce2990c3442ba28088", "5c07b1f9b680a848b38da0fdbe7c56ada7195609", "5e3257540faa7bf220d0dda97085ceff18674f19", "7a8fd8670b928b474c4dfeb1dc1898d08d545025", "91fd133adf2bd15ab814351b3a9e9f13f2951e38", "b3db66ddc80976b502379367e342dadb5f63abbe", "c33fd28d3b3af23b99d190776e896bbbabfda883", "70d7077b7cf81619c3b3d7c6898a4845f53ee30b", "c92403e2b6b1a9d4aa10d2ba0d87ad30cfa02153", "d4945381c8989f0909a432c9b1fb6c56480af284", "3879f54cbaaeaea44f1631f9f16412c8cfd591ac", "2db446061eb890f2fad10594ba14a8f6a553d60f", "1898b8b9789f187b2e34767ad8d7b2612e17d52a", "a483c62f661f57a883c06daa6ac25b7f80cad661", "cb08c775fc4e001e9ba5554267c9cebbf36d6e6f", "417074e735bba6dd67aa20917e8f563057377cce", "630843b2dd2a6b1315e1fc71af5e47c1669906ba", "0a561cdda9964822c5fc4a52beba4dd6a18b2b14", "8d42763372a0a8caa9a84b6365c802086767003a", "9f67c741738a101095400ed515fa98375ef3ed67" ], "journalName": "", "journalPages": "355-364", "journalVolume": "", "outCitations": [ "3be2609d565f436611090bf5201f73543b3d0610", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "d1ba9907cc610e8fe3799f13bcf5d336609e10bb", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "e0e1a359c0617be201d4e9ddfd021ed950dc0940", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "5e52a706c3b502e128ede0f5393850a2b038c88c", "1683ffc189d16b616131c300f45af87602d211f7", "1e06ef68559a8496650008a8ebbd41a47b1ec89e", "657fbf29ea0b4904a3e98d1556f9acf38dddae5f", "05f44cddc0884c5ae7ce6502a247c502d63c922f", "75dd41322a55cfca831b47aef8b822ea4f093c3e", "2607f0093fecd4fee5244d56fcf3f53ff22e949e", "5f3f5ccc3e926bea62dc2ca20dcc45377587e9b3", "e50f4d3316d13841c287dcdf5479d7820d593571", "3f3b3dc86415ebda1043d2be55e75a29ffe2bd95", "05aba481e8a221df5d8775a3bb749001e7f2525e", "66b7ad291f7ad0724fd8fabfa84da65794b43696", "00f4df1d155cc9bdf412e1d5d17c9193799e6e9f", "35a0d1d2ba7f52a66ba5c675467b71b6a56d81e4", "97fe4b0a2cf2395b6ad009cfa0ddf677ae783d73", "1bc072002d97808340b312b69427baf2dc9fcb8e", "823262c42414bfaba9a0cea736e1c77c7cea7837", "2ef7d506b25731d0f3ec0c8f90b718b6e5bbd069", "01fcae344d2edb715bcc63a40b6052c0331741bd", "16a651ad4a558d428c18fa92094433de89dbd7fc", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "85c6d5e69de119f4e553b1fbd44508e25e9cfdcf", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "379696f21d6bc8829c425295b917824edf687e0a", "5720a5015ca67400fadd0ff6863519f4b030e731", "61181e71ca1b899b5fdaaac24daac2463b3e6c96", "7c3ab4a8947a0d1d1014d3e09dcba7485db5ea22", "13e650e1297dd7c037bb7b81743aa78aa5aea9b2", "2275762a28582716db92df6d525ed2481c7d7f14", "9a89bcb1a6122b2d17079b56f14e26edc8c6426b", "52c0876b25a5721c4c6930d94d5308f0779734ec", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "70d7077b7cf81619c3b3d7c6898a4845f53ee30b", "34f25a8704614163c4095b3ee2fc969b60de4698", "400f6f4304b1c12efb22acf7e80a1784015cb23a" ], "paperAbstract": "Many predictive tasks of web applications need to model categorical variables, such as user IDs and demographics like genders and occupations. To apply standard machine learning techniques, these categorical predictors are always converted to a set of binary features via one-hot encoding, making the resultant feature vector highly sparse. To learn from such sparse data effectively, it is crucial to account for the interactions between features.\n Factorization Machines (FMs) are a popular solution for efficiently using the second-order feature interactions. However, FM models feature interactions in a linear way, which can be insufficient for capturing the non-linear and complex inherent structure of real-world data. While deep neural networks have recently been applied to learn non-linear feature interactions in industry, such as the Wide&Deep by Google and DeepCross by Microsoft, the deep structure meanwhile makes them difficult to train.\n In this paper, we propose a novel model Neural Factorization Machine (NFM) for prediction under sparse settings. NFM seamlessly combines the linearity of FM in modelling second-order feature interactions and the non-linearity of neural network in modelling higher-order feature interactions. Conceptually, NFM is more expressive than FM since FM can be seen as a special case of NFM without hidden layers. Empirical results on two regression tasks show that with one hidden layer only, NFM significantly outperforms FM with a 7.3% relative improvement. Compared to the recent deep learning methods Wide&Deep and DeepCross, our NFM uses a shallower structure but offers better performance, being much easier to train and tune in practice.", "pdfUrls": [ "https://arxiv.org/pdf/1708.05027v1.pdf", "http://www.comp.nus.edu.sg/~xiangnan/papers/sigir17-nfm.pdf", "http://arxiv.org/abs/1708.05027", "http://doi.acm.org/10.1145/3077136.3080777", "http://www.cse.cuhk.edu.hk/irwin.king/_media/presentations/sigir17-nfm-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/26f753a7d8304922dff1f1b52f8f5fc30451497a", "sources": [ "DBLP" ], "title": "Neural Factorization Machines for Sparse Predictive Analytics", "venue": "SIGIR", "year": 2017 }, "270a358d286449db645f332b81a3a1b37971b3a7": { "authors": [ { "ids": [ "14653047" ], "name": "Vitaly Aksenov" }, { "ids": [ "3345053" ], "name": "Vincent Gramoli" }, { "ids": [ "5620508" ], "name": "Petr Kuznetsov" }, { "ids": [ "2039113" ], "name": "Anna Malova" }, { "ids": [ "40020483" ], "name": "Srivatsan Ravi" } ], "doi": "10.1007/978-3-319-64203-1_42", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_42", "entities": [ "Binary search algorithm", "Concurrency (computer science)", "Data structure", "Interleaving (disk storage)", "Linearizability", "Lock (computer science)", "Optimal binary search tree", "Read-write memory", "Search tree" ], "id": "270a358d286449db645f332b81a3a1b37971b3a7", "inCitations": [], "journalName": "", "journalPages": "580-593", "journalVolume": "", "outCitations": [ "5146dd1aba264a8303041e05ed7c88836fbdbd04", "38611b424808954be2c1375da1a873b1e2487ace", "00b3ebd315991e5b5f4e6beec2e1488281368028", "813b7cdcf6d77ea34b4cf68378e2508db28fdf50", "663eda36657a10a2ab0d1e6482b0844efb1291cc", "6a26e67389ed9b975c79ddcd711bdadb32e56bdc", "42142c121b2dbe48d55e81c2ce198a5639645030", "30a4e12fd2c915f858dc660dc1638db77657da82", "0c11bb1c8a50f81c30a82d76b1f1bfb5b4f8693b", "4ee9724ad03f9b2ba1c6088de185225c07c31e33", "58da996efd7320d1e484263c97c930c8979c474f", "045a975c1753724b3a0780673ee92b37b9827be6", "40b535e5579612cff48c45a071c003853ce996ee", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "50444a19b79cfbfe6f6d27ce24d2999cb50a4d67", "3f339e7075319465a2a7937995d7d56089114a34", "04ef46c0e64eec241aac0fe31ca4e320f0b1f194", "e7ab23d011e5183db78cfea48e303210f6e57e2e", "61848c723b55af337973db69ba6298cac23dab20", "40cb40b7812e019c1051e3a457a8643400b81d51", "6075f8bfc541841270f223de64b577e17a748b75", "2cda119834a1bbaf73dcc6a83a0057af91a666a1", "500adfb955f443c9fb0b8a44a5a03887fa4e9729" ], "paperAbstract": "The paper presents the first concurrency-optimal implementation of a binary search tree (BST). The implementation, based on a standard sequential implementation of an internal tree, ensures that every schedule is accepted, i.e., interleaving of steps of the sequential code, unless linearizability is violated. To ensure this property, we use a novel read-write locking scheme that protects tree edges in addition to nodes. Our implementation outperforms the state-of-the art BSTs on most basic workloads, which suggests that optimizing the set of accepted schedules of the sequential code can be an adequate design principle for efficient concurrent data structures.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_42", "https://arxiv.org/pdf/1702.04441v3.pdf", "https://arxiv.org/pdf/1702.04441v1.pdf", "https://arxiv.org/pdf/1702.04441v2.pdf", "http://arxiv.org/abs/1702.04441" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/376d/67d5537ca19aa8304ae2cf27d3804ce11e27.pdf", "s2Url": "https://semanticscholar.org/paper/270a358d286449db645f332b81a3a1b37971b3a7", "sources": [ "DBLP" ], "title": "A Concurrency-Optimal Binary Search Tree", "venue": "Euro-Par", "year": 2017 }, "271a9d29658922aee6d18b24d33b6b721aae3fe6": { "authors": [ { "ids": [ "3109338" ], "name": "Masoumeh Ebrahimi" }, { "ids": [ "1807163" ], "name": "Masoud Daneshtalab" } ], "doi": "10.1145/3079856.3080253", "doiUrl": "https://doi.org/10.1145/3079856.3080253", "entities": [ "Algorithm", "Deadlock", "Deterministic routing", "Directed acyclic graph", "Interconnection", "Routing" ], "id": "271a9d29658922aee6d18b24d33b6b721aae3fe6", "inCitations": [ "814eca2c6edcecfd9ba6ea6cfd32aaef9924ea29", "a0a8615bf3021155f2a67c6d54879801001a15fc", "06a5b95708a04a3ae7e7da1c462d62088349401a" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "703-715", "journalVolume": "", "outCitations": [ "2538a796403ba39352e7ecd73637a0b8b768dd07", "14bb08611e09f2e85b2987df33d9a520227ddd89", "2c3d86e6fcb4f931ef90c2a5e075562cbfeb9c65", "56bf58eb183dbe8f6d420fae194f2c2be35fc850", "4bd0ff8b41826f7375f0eca91f44b1384a8e7cfb", "534fe8b09a0704cf2a87e97f4b7f30ea23b197e1", "935ae58c43fc7d785e044a596b83b1ba73c6fbb1", "3ab1c2d1cc4763c3290a68fe01eb176b43ec8199", "5676da9f4854aba9c0cab007c3e517cd81acf81d", "201b01c3d3c87dec8e09fc44536d1d30adcffbf0", "8aadfb171e4538b366f30fbbb0ee55643441ce5f", "6f73820a6ef96c21ac6ae6f82d42b5b187b34138", "c335fe0e9201734bce9e3d2d8613bbdb825e80b1", "1a1b53f6253d9f9d586775f879ced9d6abaaa2a1", "2a63eec82b28cfd1450b5ad581af6ec5b5e59312", "4f250b60885d833e318dd0b4da1f0ee209d801ee", "4b0fd3e52bcdc651d33af716c33e01ac1ca3cbe9", "1e763d76466b9628942fff3a5a947fc0f723c905", "8a334a1dc7e91a56c41657fac402ee6889cd9ef0", "369f7feb2457c7ee5c67b6f102de3fdfbbf221d9", "07cc98137c632be08ab1d1b194ffa20479858d4d", "bfd039474a05ae508f366ba63129044afa41e8b0", "66e0aa17f60779815d5eb35e68d545ae2dc351c3", "034bbbc269a9573b34bf0ebfe8b285672c8a70b5", "3940ce01856fed3791f0a886aed9f89a7227fef2", "b86635b45bfbe4e18e21230481941a2df5996550", "697ea9fd6d78e003738d06836fdf08c3abe40b28", "d488b45aaa4ceb4e994562199088c46fdb735925", "1f87d9a2ccadc34d563addc5cfb02d693d6ab1fd", "9ee0e1f68487473f631f816990e7267f877e7f88", "3a1ec7ffad8d1d17cd1bffc67113ea4bf8a27b7e", "2729137f58890c82977a354ef2e3f2290c4f7e03", "69dbd14c4ff684af0d3c918f040e281eaf49bd05", "c22cd78260126ea8e0183c23aeb9a2ec928658e3", "0367df96f30a6a32cafe4a74ac8fe201d364d380", "12a2dbec6ceb99a25ed78367b62979de1d0a11e3", "329756f2d29829e1b2e713360016995855d0ea26", "690bf669c4012ef13086dae463731355f6cf037f", "1b216be638fbb0a9099bbc1aabe4319676d5f573", "278dd6415f07db4d6ec13096642e4f1cf5189f58", "0eadb23fd9a83b28c6c861e8a40620b3a832ce50", "8576d87051f7c252de4ef89e475d89de8e9a45c9", "3191f28b942bd428fd4df250afc15bf68b402362", "2368a00495ea5d2ed318a8921366bb40d036d0b3", "002cca80b9deaa3c1e9d46ae0bfdc9fd79079907", "8c6b9d53a7ab2ef4c0de32d9df2245997166967b", "fb315084da1fe348bd340331af86c45f2f79cbaa" ], "paperAbstract": "Freedom from deadlock is one of the most important issues when designing routing algorithms in on-chip/off-chip networks. Many works have been developed upon Dally's theory proving that a network is deadlock-free if there is no cyclic dependency on the channel dependency graph. However, finding such acyclic graph has been very challenging, which limits Dally's theory to networks with a low number of channels. In this paper, we introduce three theorems that directly lead to routing algorithms with an acyclic channel dependency graph. We also propose the partitioning methodology, enabling a design to reach the maximum adaptiveness for the n-dimensional mesh and k-ary n-cube topologies with any given number of channels. In addition, deadlock-free routing algorithms can be derived ranging from maximally fully adaptive routing down to deterministic routing. The proposed theorems can drastically remove the difficulties of designing deadlock-free routing algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080253" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/271a9d29658922aee6d18b24d33b6b721aae3fe6", "sources": [ "DBLP" ], "title": "EbDa: A new theory on design and verification of deadlock-free interconnection networks", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "271e7b664d72f3f77b7bae7d7b64710067d3d1d4": { "authors": [ { "ids": [ "33734396" ], "name": "Dawei Zhou" }, { "ids": [ "35799000" ], "name": "Si Zhang" }, { "ids": [ "19225584" ], "name": "Mehmet Yigit Yildirim" }, { "ids": [ "31871077" ], "name": "Scott Alcorn" }, { "ids": [ "8163721" ], "name": "Hanghang Tong" }, { "ids": [ "1789798" ], "name": "Hasan Davulcu" }, { "ids": [ "37395525" ], "name": "Jingrui He" } ], "doi": "10.1145/3097983.3098015", "doiUrl": "https://doi.org/10.1145/3097983.3098015", "entities": [ "Algorithm", "Algorithmic efficiency", "Cluster analysis", "Conductance (graph)", "Cut (graph theory)", "Local algorithm", "Markov chain", "Personally identifiable information", "Polylogarithmic function", "Social network", "Structure mining", "Synthetic data", "Time complexity" ], "id": "271e7b664d72f3f77b7bae7d7b64710067d3d1d4", "inCitations": [ "782b2fbc15b50c15f12baa126817466ec5e8251c", "81ea117f312d5d909f7c18a8a573b28735c87abd", "5cd4d389f2666e15bebade593fbfd32c18fd8bc7" ], "journalName": "", "journalPages": "655-664", "journalVolume": "", "outCitations": [ "12d8b675b6bc49313764f89b5e64d721af0ec1ae", "727fa1f55462f732bcc8e3ae41a119d24e38bd69", "0f7118bbc22e6fe784a97e344abfef95e62a594c", "52474e58641a0fea46327c1da7189b2b172f3991", "b729b296afc5a922e4dfb6a1f568a8cb889daeb0", "a5160db2d31d47545fb68a4a17580969e1e02f80", "91316aad59d0492b63e713b8dedde3f32a89780e", "203c28f17d29c6800b73b3bbb47623b76639f7ca", "13a6e172d6e6dea18e7f3772227df67e77ac030b", "49f2214fa494c034106e050ab6d140cc6d215c15", "437a492ddaf7a68a4634036897f528e2c9dbc349", "68b2570fd637be133b4543bac412b083e5e01bd7", "228b3e64d66e8f00d03bf1edecb07b27c0c0b953", "6d7909355a1749b19f9dbe2b783973fe4e8eb61f", "5f944a6722a6e0f2114e8fd1262eb01ede6d5b9d", "416df2138a038471c6283e1a9c070c32f3772e44", "2d3e8543750185020303857171f48cdd5fb64e3a", "2a3bf91f393d2ae9c10707c7f4f9e9edd637754f", "66549f785d13a44171fcc21899802325e7d923cd", "35da337d6ca7c90fa7ec6747680b32fddb623209", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "01a0cb74b39fdc625c7925dc658452f9e382b2ec", "343293f0f35b2e273228a29e737533e651e13d5e", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "7aa249dcef7003165100a1280bb91230deade8aa", "1b348075d02cc532b1a01955e21ba3062e769113", "4a261b8e566fe3a5dd7ea9e57d189e7350498057", "0aea7c981f3c0bf140bbe30b135ad1d87eab3503", "276833a15492c5762e0739b4b0bb6ab2781cd705", "7b6e5ca851dd63b87e1be56cb5aef11e7623320e", "75ab25954614817c8676b5b9c85c8fd88acd3ece" ], "paperAbstract": "Nowadays, large-scale graph data is being generated in a variety of real-world applications, from social networks to co-authorship networks, from protein-protein interaction networks to road traffic networks. Many existing works on graph mining focus on the vertices and edges, with the first-order Markov chain as the underlying model. They fail to explore the high-order network structures, which are of key importance in many high impact domains. For example, in bank customer personally identifiable information (PII) networks, the star structures often correspond to a set of synthetic identities; in financial transaction networks, the loop structures may indicate the existence of money laundering. In this paper, we focus on mining user-specified high-order network structures and aim to find a structure-rich subgraph which does not break many such structures by separating the subgraph from the rest.\n A key challenge associated with finding a structure-rich subgraph is the prohibitive computational cost. To address this problem, inspired by the family of local graph clustering algorithms for efficiently identifying a low-conductance cut without exploring the entire graph, we propose to generalize the key idea to model high-order network structures. In particular, we start with a generic definition of high-order conductance, and define the high-order diffusion core, which is based on a high-order random walk induced by user-specified high-order network structure. Then we propose a novel High-Order Structure-Preserving LOcal Cut (HOSPLOC) algorithm, which runs in polylogarithmic time with respect to the number of edges in the graph. It starts with a seed vertex and iteratively explores its neighborhood until a subgraph with a small high-order conductance is found. Furthermore, we analyze its performance in terms of both effectiveness and efficiency. The experimental results on both synthetic graphs and real graphs demonstrate the effectiveness and efficiency of our proposed HOSPLOC algorithm.", "pdfUrls": [ "http://faculty.engineering.asu.edu/jingruihe/wp-content/uploads/2014/10/HOSPLOC.pdf", "http://www.public.asu.edu/~dzhou23/Slides/KDD2017_HOSPLOC.pdf", "http://www.public.asu.edu/~dzhou23/papers/KDD2017_HOSPLOC.pdf", "http://doi.acm.org/10.1145/3097983.3098015", "http://www.public.asu.edu/~hdavulcu/HOSPLOC.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/271e7b664d72f3f77b7bae7d7b64710067d3d1d4", "sources": [ "DBLP" ], "title": "A Local Algorithm for Structure-Preserving Graph Cut", "venue": "KDD", "year": 2017 }, "27256e59363d5ec896b359df8d5ec18a2d023d3e": { "authors": [ { "ids": [ "1917004" ], "name": "Vincent Chau" }, { "ids": [ "33099410" ], "name": "Minming Li" }, { "ids": [ "3055569" ], "name": "Samuel McCauley" }, { "ids": [ "1728316" ], "name": "Kai Wang" } ], "doi": "10.1145/3087556.3087573", "doiUrl": "https://doi.org/10.1145/3087556.3087573", "entities": [ "Algorithm", "Jumpstart Our Business Startups Act", "Online algorithm", "Scheduling (computing)", "Throughput", "WAITS" ], "id": "27256e59363d5ec896b359df8d5ec18a2d023d3e", "inCitations": [], "journalName": "", "journalPages": "67-76", "journalVolume": "", "outCitations": [ "6f2b07465f08eff8cd9005f66ec0d94c3e5a7e5a", "07d5536194c8d779a67768ba21aa98cdd8e34082", "45dc1c861e567dd1c0c89683970bc2812f74e849", "48e992a734ef6ecbc9d5aeb3fc9135bbee531e07", "95ef49746b497b9fb5697217eda25f2dab2089da", "034569353211b039339bb1337227247f4530fe68", "930ddffe457b3e55981662b6f8833bf922f60aa0", "7fa8228b7630839adce918ea58b6977033b2e7ae", "54c3f1f73ae2cb1bcf20ae007e0423aaaee83fa0", "b366c096fa2fc29c419c7387ad431fb9e3eefaaf", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "b869327f4a973e6c127a00c5e203f964ba855fdb", "306a9bead5f6e780903f8753151f8066c4314c5a", "f885482b6de7eaacbc3cbbd322232b7f95a55621", "afe1cfac603451543e5679a401c79d4dce936be0", "b29bfab73db6d16b24b4ff2f7bde213e62d8b353", "d85a97801d66ff2cbcf59eb7298c8942adf1aa7a" ], "paperAbstract": "In sensitive applications, machines need to be periodically calibrated to ensure that they run to high standards. Creating an efficient schedule on these machines requires attention to two metrics: ensuring good throughput of the jobs, and ensuring that not too much cost is spent on machine calibration. In this paper we examine flow time as a metric for scheduling with calibrations. While previous papers guaranteed that jobs would meet a certain deadline, we relax that constraint to a tradeoff: we want to balance how long the average job waits with how many costly calibrations we need to perform.\n One advantage of this metric is that it allows for online schedules (where an algorithm is unaware of a job until it arrives). Thus we give two types of results. We give an efficient offline algorithm which gives the optimal schedule on a single machine for a set of jobs which are known ahead of time. We also give online algorithms which adapt to jobs as they come. Our online algorithms are constant competitive for unweighted jobs on single or multiple machines, and constant-competitive for weighted jobs on a single machine.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087573" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27256e59363d5ec896b359df8d5ec18a2d023d3e", "sources": [ "DBLP" ], "title": "Minimizing Total Weighted Flow Time with Calibrations", "venue": "SPAA", "year": 2017 }, "27315b401b5b8fefbf0beade6470760b3407313e": { "authors": [ { "ids": [ "1789625" ], "name": "Binghui Wang" }, { "ids": [ "1990973" ], "name": "Neil Zhenqiang Gong" }, { "ids": [ "1690222" ], "name": "Hao Fu" } ], "doi": "10.1109/ICDM.2017.56", "doiUrl": "https://doi.org/10.1109/ICDM.2017.56", "entities": [ "Belief propagation", "Casio Loopy", "Directed graph", "Graph (discrete mathematics)", "Local binary patterns", "Markov random field", "Scalability", "Social Networks", "Social network", "Social structure", "Technological convergence", "World Online" ], "id": "27315b401b5b8fefbf0beade6470760b3407313e", "inCitations": [ "a6dac31aff249c75b44a072aa67673f9192a3f4b", "68f5f9650c0c0826f83a1c325d830e8577c3cac3" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "465-474", "journalVolume": "", "outCitations": [ "0ab90ce94103554de7536cd860236f14b6341794", "24e45f66c6ddfbfe1430ade5522709d51d908722", "23cc4b3d917b125e1b7ab9107e09c3af9e1ca5e5", "3e4bff987325827cbc433df7e32e66b2d58d5c4d", "16a725883eb9951a7530021b9b10692de6cb5c48", "0bee052af002eb197277cd222d62154c7de4ac8a", "0706356c9ab6014d6b04577d38289ea8328291a5", "05a39de8005c84d9a1a620d5e00081829c628e85", "383fbd8fe20d6a9f2cad2a7be337a244a3f8882c", "251313fb7e68f1a3a5c3467962c6fb9d4cc37d54", "5b660f6fb6b1277a5c8a311a7e688234cde909d9", "02346726af3cdb0c73ca16105f06f57c80ebe3aa", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "136d0daa5d918dfebe8a26a6991053ef372892ea", "98a79269b7919cdbab0c5570e3ccee99eeedd8a3", "80a1cf26258ab86aff9ff0726f25966187732bee", "5d51f7b942f7f4b77918941c7f694f6a90e65d6a", "23fe15713f8b18729960d1625820f500cc9bf6f2", "0a645cbb39ac37929a2e6ba55fc248dc4ca967e4", "5f4c05ba08fac9cde40235ebd4eb9abc6ed2d712", "97c23bdfd9e0921048558ef6aeaa327f368ce8e1", "82db87e2fd08e80b59d98a6f4b21b28898393fcd", "4f6487d61ba6c2afa44be0e870599bb292e27638", "516f412a76911a13c9128aac827b52b27b98fad9", "1f0612de1f191abadf250b78cd78f884203cca5e", "653fbfbad9d565dd5e5e0d48b6bb32dd02e8f157", "2830246be09bbd376cd7f2ed9ae150110dcb08cf", "47c8f4405372d10f638b2eb7336733078a359990", "7783fd2984ac139194d21c10bd83b4c9764826a3" ], "paperAbstract": "Detecting fraudulent users in online social networks is a fundamental and urgent research problem as adversaries can use them to perform various malicious activities. Global social structure based methods, which are known as guilt-by-association, have been shown to be promising at detecting fraudulent users. However, existing guilt-by-association methods either assume symmetric (i.e., undirected) social links, which oversimplifies the asymmetric (i.e., directed) social structure of real-world online social networks, or only leverage labeled fraudulent users or labeled normal users (but not both) in the training dataset, which limits detection accuracies. In this work, we propose GANG, a guilt-by-association method on directed graphs, to detect fraudulent users in OSNs. GANG is based on a novel pairwise Markov Random Field that we design to capture the unique characteristics of the fraudulent-user-detection problem in directed OSNs. In the basic version of GANG, given a training dataset, we leverage Loopy Belief Propagation (LBP) to estimate the posterior probability distribution for each user and uses it to predict a user's label. However, the basic version is not scalable enough and not guaranteed to converge because it relies on LBP. Therefore, we further optimize GANG and our optimized version can be represented as a concise matrix form, with which we are able to derive conditions for convergence. We compare GANG with various existing guilt-by-association methods on a large-scale Twitter dataset and a large-scale Sina Weibo dataset with labeled fraudulent and normal users. Our results demonstrate that GANG substantially outperforms existing methods, and that the optimized version of GANG is significantly more efficient than the basic version.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.56" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27315b401b5b8fefbf0beade6470760b3407313e", "sources": [ "DBLP" ], "title": "GANG: Detecting Fraudulent Users in Online Social Networks via Guilt-by-Association on Directed Graphs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20": { "authors": [ { "ids": [ "2802655" ], "name": "Omid Alipourfard" }, { "ids": [ "1925072" ], "name": "Hongqiang Harry Liu" }, { "ids": [ "1720246" ], "name": "Jianshu Chen" }, { "ids": [ "2697906" ], "name": "Shivaram Venkataraman" }, { "ids": [ "1914822" ], "name": "Minlan Yu" }, { "ids": [ "1720939" ], "name": "Ming Zhang" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Web Services", "Bayesian optimization", "Best, worst and average case", "Big data", "Cloud computing", "Curiously recurring template pattern", "Experiment", "Jumpstart Our Business Startups Act" ], "id": "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "inCitations": [ "748f97611f2c0a543d5a0e4de6360906cd0a80ba", "b323515ac22fb05ac740d097482b751141b52b23", "147ff168c5512b8bee49f4a1374f6f25298d95bc", "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "83aaf61e91053745e667427d2132527b8a05ef8a", "53cc6bf305539b4bd8829df42996e0eb12512434", "8e5cc516f0e6b62af0ef0fdb1069a93113539beb", "7325ef2e79191f143361b22ba486032a29643011", "301d189e85def6eaddbc7152416df1511b55e82b", "284b7631a9961f69eae1e0bac49438aee34edaa0", "576f13a5f349ecc60e5e491395e8aa7a9c9f0c05", "83c798881b911f7a15e5022ab4d0973fffcd040f", "d308092a5da30ef6687b6a26287f1e54ba4c5e10", "36a64cb68a3da37ed9d54f03750e1f1ac6d3d336", "5655f16d3c46537f951b5686c905f15c2f35991c", "537efae13f33ad932034b8ad1db72a83d3691473", "7818619eb25c7c1bb470a5b5572fa0371de721bc", "162be49582b29ed18775f089810fb8cdc2ed6808", "71cf6dd78c50f1e7b647e35e4783f0aea79ce76c", "00d3d1554166ab1dd91089111dabac7ca456f5be", "20fd19bb5f49fd37832d0d3e2564d7ff0cc072a7" ], "journalName": "", "journalPages": "469-482", "journalVolume": "", "outCitations": [ "0b4b28d8be5cab638d675af26a8b96ebbf469f04", "52d81096f46be0e75f85e0b7eeda65640c281630", "47b627916586fa7e0ba64f0fcdb80a5037d66dc7", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "2dcb0ed27b6a35b1dfe97b45604302a1f3705c01", "1592fe924114866c1ac559bae33ea789930daa98", "191c14ec67c561c6a3e3ce21c0a7e59e3afe490b", "23e3c3c2dba3cad503de6834f6fb680d3bfdcfbd", "23b8a55785318ce90957a392607e24f620c4fccc", "5ba6dcdbf846abb56bf9c8a060d98875ae70dbc8", "572dd2d5d75227bb878430c9375b9be92cc7e6e9", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "0b95a8628f90a78909447c4cfee2dce7cb92dd52", "c43bdddf596e0ca8115201027dc2eb374a78f101", "1d27d04e8cef4d32cb4e022c9f493a40a019f59f", "2a7d3b967a356c2a42f729048b0d3511b0005351", "0d868efa67bf06b1f784d60769c082fd9a58893e", "316486bada6023816c785c0d4eb401658737be3f", "43af0459ad86b3a7530e43c1916c63c1c656711a", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "5f41cc7c081b294f684928c35a08626490ec4f8a", "daa63f57c3fbe994c4356f8d986a22e696e776d2", "417f02dfc599699c94e732f3600d559d8e41fa17", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "04e24be4c25539e4b4fa0498c85b3a3e2d026c02", "3000e77ed7282d9fb27216f3e862a3769119d89e", "208cdf363b4fc8343815393aed9551eed033df18", "4c422d3b8df9b140b04d436d704b062c4f304dec", "cd5a26b89f0799db1cbc1dff5607cb6815739fe7", "9b707fda4ef927f92d9ecb86dea82bd1ede59d49" ], "paperAbstract": "Picking the right cloud configuration for recurring big data analytics jobs running in clouds is hard, because there can be tens of possible VM instance types and even more cluster sizes to pick from. Choosing poorly can significantly degrade performance and increase the cost to run a job by 2-3x on average, and as much as 12x in the worst-case. However, it is challenging to automatically identify the best configuration for a broad spectrum of applications and cloud configurations with low search cost. CherryPick is a system that leverages Bayesian Optimization to build performance models for various applications, and the models are just accurate enough to distinguish the best or close-to-the-best configuration from the rest with only a few test runs. Our experiments on five analytic applications in AWS EC2 show that CherryPick has a 45-90% chance to find optimal configurations, otherwise near-optimal, saving up to 75% search cost compared to existing solutions.", "pdfUrls": [ "http://shivaram.org/publications/cherrypick-nsdi17.pdf", "http://www.cs.yale.edu/homes/yu-minlan/talk/nsdi17-cherrypick.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_alipourfard.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-alipourfard.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-alipourfard.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_alipourfard.pdf", "http://www.cs.yale.edu/homes/yu-minlan/writeup/nsdi17-cherrypick.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/alipourfard" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5242/0c1238997d8eb970f7a9a61813b11b05e2e9.pdf", "s2Url": "https://semanticscholar.org/paper/274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "sources": [ "DBLP" ], "title": "CherryPick: Adaptively Unearthing the Best Cloud Configurations for Big Data Analytics", "venue": "NSDI", "year": 2017 }, "274f3d90c1585b8f6e999cbda8bc0b05d3125d0a": { "authors": [ { "ids": [ "7029133" ], "name": "Chinmay Kulkarni" }, { "ids": [ "3166590" ], "name": "Aniraj Kesavan" }, { "ids": [ "39345318" ], "name": "Tian Zhang" }, { "ids": [ "35604156" ], "name": "Robert Ricci" }, { "ids": [ "3087426" ], "name": "Ryan Stutsman" } ], "doi": "10.1145/3132747.3132784", "doiUrl": "https://doi.org/10.1145/3132747.3132784", "entities": [ "Attribute\u2013value pair", "B-tree", "Downtime", "Durability (database systems)", "Dynamic random-access memory", "FLOPS", "Fault tolerance", "High-level programming language", "In-memory database", "Key-value database", "Live CD", "Load balancing (computing)", "Pipeline (computing)", "Rapid application development", "Reconfigurability", "Response time (technology)", "Scalability", "Server (computing)", "USB flash drive" ], "id": "274f3d90c1585b8f6e999cbda8bc0b05d3125d0a", "inCitations": [ "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "390-405", "journalVolume": "", "outCitations": [ "225603198cc415d363db8a8a2bd30b0df3c963b1", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "19197540fcab68bc15531c90c6103a3f836a1791", "1220e4a011c46804d4369b5580dc7fb6e387af54", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "284c50888bc0a85d871d65d18f0509a11f663907", "514a5c15e8cf3f681febecad954a4508d9189c99", "f00d2dfb39b3b1b114220dba32a0fbccc2368c66", "29a1148d75878671dc3663bf480e33d7bd91597d", "7e4921a43378b2b7b9cf950604fe434e4b07da58", "1ea81e7477051ba7769dc50a97f3b2b01d5ee9da", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "535c8fe6680bb97ff5839d751c10594dcbbcb0fe", "8318fa48ed23f9e8b9909385d3560f029c623171", "00945bd2fe73a6f617010009d621b23b1e1303c6", "400ae82ab2fc2c814033c65854229ecefbddbf67", "0d3f85933b6355789588476e491683532c68a906", "205cf007cf77bbf81e55b74635017087585f7b7c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0368d2445d3ee4205ee73da933cb8b810a89091c", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "7129b305ce45f83127e928e8510da9fae0783905", "ab2dd895c0d8e567071caf8704e6078d33cd6e22", "463bec3d0298e96e3702e071e241e3898f76eff2", "0270c2056eb50b5d4597afa722c50abf21e67a82", "412a9e54bbb31e12d008a9579994e009c5b40b46", "29a05cde1994548e2e9487822248c679626c6241", "3abca96006f8a6c014635b6a111368f459110e83", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "108937f6a7220ae9370511bbcaa44674c48b1a65" ], "paperAbstract": "Scalable in-memory key-value stores provide low-latency access times of a few microseconds and perform millions of operations per second per server. With all data in memory, these systems should provide a high level of reconfigurability. Ideally, they should scale up, scale down, and rebalance load more rapidly and flexibly than disk-based systems. Rapid reconfiguration is especially important in these systems since a) DRAM is expensive and b) they are the last defense against highly dynamic workloads that suffer from hot spots, skew, and unpredictable load. However, so far, work on in-memory key-value stores has generally focused on performance and availability, leaving reconfiguration as a secondary concern.\n We present Rocksteady, a live migration technique for the RAMCloud scale-out in-memory key-value store. It balances three competing goals: it migrates data quickly, it minimizes response time impact, and it allows arbitrary, fine-grained splits. Rocksteady migrates 758 MB/s between servers under high load while maintaining a median and 99.9th percentile latency of less than 40 and 250 μs, respectively, for concurrent operations without pauses, downtime, or risk to durability (compared to 6 and 45 μs during normal operation). To do this, it relies on pipelined and parallel replay and a lineagelike approach to fault-tolerance to defer re-replication costs during migration. Rocksteady allows RAMCloud to defer all repartitioning work until the moment of migration, giving it precise and timely control for load balancing.", "pdfUrls": [ "http://www.flux.utah.edu/download?uid=257", "http://doi.acm.org/10.1145/3132747.3132784", "https://www.flux.utah.edu/download?slides=1&type=pdf&uid=257" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/274f3d90c1585b8f6e999cbda8bc0b05d3125d0a", "sources": [ "DBLP" ], "title": "Rocksteady: Fast Migration for Low-latency In-memory Storage", "venue": "SOSP", "year": 2017 }, "275c76fdc10c078c4cafc1f3c3e431dc47f4e425": { "authors": [ { "ids": [ "3460040" ], "name": "Gil Jae Lee" }, { "ids": [ "1705758" ], "name": "Jos\u00e9 A. B. Fortes" } ], "doi": "10.1109/ICAC.2017.45", "doiUrl": "https://doi.org/10.1109/ICAC.2017.45", "entities": [ "Algorithm", "Apache Hadoop", "Big data", "Concave function", "Domain-specific language", "Fuzzy control system", "Heuristic", "Heuristic (computer science)", "Job shop scheduling", "Jumpstart Our Business Startups Act", "Makespan", "MapReduce", "Self-tuning", "System Wide Information Management", "System administrator" ], "id": "275c76fdc10c078c4cafc1f3c3e431dc47f4e425", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "49-58", "journalVolume": "", "outCitations": [ "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "1c79346e409f764ddef3c2d15bab2bb7d5f24f20", "2af2d5e95a14365c82fb9e1795e1415c4e67dab4", "eb4a5755fd012acba606651a332e4a1faff68f46", "20244961dbba619d38e9115dfc63ebd90676d224", "1d8465c3f5aee1b7a790f6eeb44637343861ba47", "775cf1c0950692505aa72c8f5fd2140cf46c6b61", "1d27d04e8cef4d32cb4e022c9f493a40a019f59f", "0ea4380ff8bb30e6bd5fd888268d6f8f38229fb7", "86c8b3a135f0163d9e04b6292e1f32ae7f25efdb", "2a7d3b967a356c2a42f729048b0d3511b0005351", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "45e2dd9fe949025ff7f82d888e5be8693dbd317d", "bcc63f03bdf3418b380d2ecbf084935e8d5409a1", "25128b9c24f7fe9919a1adb5e15586541cc5817d", "f60c8d260dcc9f415de08150dde4182d7a89050f", "2a91510713dcdfbed68f5d8885d63d5d7bb491f3", "0541d5338adc48276b3b8cd3a141d799e2d40150", "9b707fda4ef927f92d9ecb86dea82bd1ede59d49", "6f2270c81885e2f5b3b6bc86f0b2099af9c55534", "b380e144b1a5052277b686285d420e2b0dcb49e4", "3d84f3e43358e8fad5386f5bf352d77286a655fa", "8ba425438b6457d7f13f5e5c36acb8c1654ef677", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1" ], "paperAbstract": "Many big-data processing applications use dataanalytics frameworks such as Apache Hadoop. Such frameworks have tunable configuration parameters set by experienced system administrators and/or application developers. However, tuning parameters manually can be hard and time-consuming because it requires domain-specific knowledge and understanding of complex inter-dependencies amongst parameters. Most of the frameworks seek efficient resource management by using slots or containers as resource units to be assigned to jobs or tasks, the maximum number of slots or containers in a system being part of the static configuration of the system. This static resource management has limited effectiveness in coping with jobs’ diversity and workload dynamics, even in the case of a single job. Seeking to improve performance (e.g., multiple-jobs makespan and job completion time) without modification of the framework, this paper proposes a hierarchical approach using a fuzzy-logic controller to dynamically adjust the number of concurrent jobs and additional controllers (one for each cluster node) to adjust the number of resource units assigned to jobs on each node. The fuzzylogic controller uses fuzzy rules based on a concave downward relationship between aggregate CPU usage and the number of concurrent jobs. The other controllers use a simple heuristic algorithm to adjust the number of resource units on the basis of resource usage by job tasks. A prototype of our approach was implemented for Apache Hadoop on a cluster running at CloudLab. The prototype was evaluated using realistic workloads generated by SWIM, a statistical workload injector for MapReduce. The evaluation shows that the proposed approach yields up to a 42% reduction of the jobs makespan that results from using Hadoop default settings.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.45" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/275c76fdc10c078c4cafc1f3c3e431dc47f4e425", "sources": [ "DBLP" ], "title": "Hierarchical Self-Tuning of Concurrency and Resource Units in Data-Analytics Frameworks", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "277ac28b4c05f1f57168f94a1cab699098706805": { "authors": [ { "ids": [ "1778215" ], "name": "Swagath Venkataramani" }, { "ids": [ "1764271" ], "name": "Ashish Ranjan" }, { "ids": [ "17821157" ], "name": "Subarno Banerjee" }, { "ids": [ "38858451" ], "name": "Dipankar Das" }, { "ids": [ "1777558" ], "name": "Sasikanth Avancha" }, { "ids": [ "1688089" ], "name": "Ashok Jagannathan" }, { "ids": [ "17854422" ], "name": "Ajaya Durg" }, { "ids": [ "2815769" ], "name": "Dheemanth Nagaraj" }, { "ids": [ "40279588" ], "name": "Bharat Kaul" }, { "ids": [ "1719384" ], "name": "Pradeep Dubey" }, { "ids": [ "1682337" ], "name": "Anand Raghunathan" } ], "doi": "10.1145/3079856.3080244", "doiUrl": "https://doi.org/10.1145/3079856.3080244", "entities": [ "Byte", "Compiler", "Dataflow", "Deep learning", "FLOPS", "Flip-flop (electronics)", "Graphics processing unit", "Half-precision floating-point format", "ImageNet", "Memory hierarchy", "Natural language", "Neural Networks", "Pipeline (computing)", "Scalability", "Server (computing)", "Simulation", "Single-precision floating-point format", "Speedup", "Video processing" ], "id": "277ac28b4c05f1f57168f94a1cab699098706805", "inCitations": [ "6a8fb5989b3fb290ac0a654895aad6ff8601c7ab", "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "5bcf27ab86be9fa376237d2d2bd8ebbf52982088", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "5e8e46557e42940274e548246680c785eb729db2", "08a0c1f5f2b7c91d81e28f896c2a001d58975014", "f6bf4a1c5fc337b8904580cfbe172bb4f4b73383", "540746504cfe51a146762cbbca06cbc03229c778" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "13-26", "journalVolume": "", "outCitations": [ "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "1cfc1cce7ec6c199a2e43f7c312c398820778e6c", "6a8851358df3ecd5164a417110aed4037793f64f", "bb0a9c9a4936298a7852f0411b7684789e37c282", "8c404062b0c10b0c70fdd54d17f91f03d7fadc04", "0b99d677883883584d9a328f6f2d54738363997a", "e1c4e2fa071046569a05e9cfdf13496d094025dd", "49b4094f2c313a92da4461572c0bef80b0d7d649", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "80d800dfadbe2e6c7b2367d9229cc82912d55889", "01fcae344d2edb715bcc63a40b6052c0331741bd", "38211dc39e41273c0007889202c69f841e02248a", "f2c2fbc35d0541571f54790851de9fcd1adde085", "061356704ec86334dbbc073985375fe13cd39088", "021fc345d40d3e6332cd2ef276e2eaa5e71102e4", "7458f8bfffecb1baf72e32590a1da5ca8ba923d5", "06cad81a163e345828c0804f42252177049dd1bc", "64bad9c3e4d8ed38c16b0086da865ccd574e836a", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "268a7cd1bf65351bba4eb97aac373d624af2c08f", "3439a127e45fb763881f03ef3ec735a1db0e0ccc", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "2329a46590b2036d508097143e65c1b77e571e8c", "233b1774f28c9972df2dfcf20dfbb0df45792bd0", "37754b2911876dfda802a577ec43b2dba04b4a15", "0e78074a081f2d3a35fbb6f74ba9b7e27e64757b", "0884df514837c87e705a26e1617cdc19564fc24b", "b7cf49e30355633af2db19f35189410c8515e91f", "02c78232075ac431834e3442dcb2954d4e708def", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "3f1c1427b175140e7f725a155096a4e73c1b8509", "9f1f065bf08cd90431cc051267a708f56436cd82", "52ac48c505a36b2e6d27bfa863aa212949975a62", "1336146e7f95b295bb73c7659c6af4befd86cbdd", "2bbce78462418348d2427b550e2aa19a8e4e05e9", "30ec6ebe977de36c2848da0f6e191d4fb18ccb69", "0cd5a228cbebb830adac40b0b225daa9a116dda9", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d" ], "paperAbstract": "Deep Neural Networks (DNNs) have demonstrated state-of-the-art performance on a broad range of tasks involving natural language, speech, image, and video processing, and are deployed in many real world applications. However, DNNs impose significant computational challenges owing to the complexity of the networks and the amount of data they process, both of which are projected to grow in the future. To improve the efficiency of DNNs, we propose ScaleDeep, a dense, scalable server architecture, whose processing, memory and interconnect subsystems are specialized to leverage the compute and communication characteristics of DNNs. While several DNN accelerator designs have been proposed in recent years, the key difference is that ScaleDeep primarily targets DNN training, as opposed to only inference or evaluation. The key architectural features from which ScaleDeep derives its efficiency are: (i) heterogeneous processing tiles and chips to match the wide diversity in computational characteristics (FLOPs and Bytes/FLOP ratio) that manifest at different levels of granularity in DNNs, (ii) a memory hierarchy and 3-tiered interconnect topology that is suited to the memory access and communication patterns in DNNs, (iii) a low-overhead synchronization mechanism based on hardware data-flow trackers, and (iv) methods to map DNNs to the proposed architecture that minimize data movement and improve core utilization through nested pipelining. We have developed a compiler to allow any DNN topology to be programmed onto ScaleDeep, and a detailed architectural simulator to estimate performance and energy. The simulator incorporates timing and power models of ScaleDeep's components based on synthesis to Intel's 14nm technology. We evaluate an embodiment of ScaleDeep with 7032 processing tiles that operates at 600 MHz and has a peak performance of 680 TFLOPs (single precision) and 1.35 PFLOPs (half-precision) at 1.4KW. Across 11 state-of-the-art DNNs containing 0.65M-14.9M neurons and 6.8M-145.9M weights, including winners from 5 years of the ImageNet competition, ScaleDeep demonstrates 6x-28x speedup at iso-power over the state-of-the-art performance on GPUs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080244" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/277ac28b4c05f1f57168f94a1cab699098706805", "sources": [ "DBLP" ], "title": "SCALEDEEP: A scalable compute architecture for learning and evaluating deep networks", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "27a28769c1e5902b2416c30bf06924bc3e2b9d70": { "authors": [ { "ids": [ "3256965" ], "name": "Robert Utterback" }, { "ids": [ "3379439" ], "name": "Kunal Agrawal" }, { "ids": [ "37103946" ], "name": "I-Ting Angelina Lee" }, { "ids": [ "1700486" ], "name": "Milind Kulkarni" } ], "doi": "10.1145/3018743.3018764", "doiUrl": "https://doi.org/10.1145/3018743.3018764", "entities": [ "Cilk Plus", "Debugging", "High- and low-level", "Library (computing)", "OpenMP", "Parallel computing", "Replay attack", "Runtime system", "Scheduling (computing)", "Spawn (computing)", "Task parallelism", "Thread (computing)", "Threading Building Blocks", "Work stealing" ], "id": "27a28769c1e5902b2416c30bf06924bc3e2b9d70", "inCitations": [], "journalName": "", "journalPages": "145-161", "journalVolume": "", "outCitations": [ "0821b7efb6a47783d8bf9a62291b24d94bbaaf31", "61f6ae61c4e0b0b4333e9cad1feead72ab73b9d4", "8a0af8ae748210ef571d074362b552af571e6d33", "0d9c39200e541ce7c5a2f3cfa54302c2c9bc631a", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "fde93bf5f0e33a723ba9b2f70d1ab2716791af24", "5d882e0b90ac280c10b0734f47b8fafebb353f15", "65069cfecd815c9ed62c5a77f97789ecafdc74ab", "0e578433d4e8bb2a571c87a2d22816074902f009", "3ca7e9729dd00830ca25396d535295648ea19a81", "0653e2ed9f683868cb4539eb8718551242834f6b", "5bfa5f23a424c82e007623108a930964679f946b", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "12324f77ec6d16da5d608447e60e874c4262fddc", "738ab88467f755b2c1360832e6f63da40bad389f", "1f33e83905ee40dfeeacd6c04f64c1af71c2b7fb", "1ebb08b527c2f476f9731dca1f3e6e694f5e6da1", "0ca7f4bd9212d0ed489c855916a82e8301961a32", "ca4114da5e6885e907ccf094f2f469dd23f6c816", "7f66a291f885617b1c975c0cae7ad0eb978f2aa5", "f4e10c197040252beeabcd3393c81062e60e7475", "3b62c1f19254820c75dd0011f038d7aae04b3414", "c1de36550bf324f964186105d6dd0769e86e3046", "1eac8c7fb82607a6d20187cfb29b3f9a02d578c2", "114801eccb5eb0831fd1848f351a138253a42f15", "20480eb94103d5ed4b5925a86430c872e4fabfdc", "519404f3a71f5684c405ebbb218aa29fa2028379", "ac7e5716b47cc2678b70dadd34d27648ceecfb0c", "47fea97038923902a502403219fc44fd22b5d19f", "2a974da13d6f956e37549378e00f86aa54bc5642", "0836859831c6c69412ae633bcf47e96355a92d6b", "feb5db279d43f6affb474398f96bb5c910aa2340", "11fb91cf78700428342aa3ed6636f655bb97ca33", "35939cfef9c11812bc51acf2729e5d03c15bd22c", "8368d2fc947cf6ac46a1d251d1895f2f87c7d498", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "bc78d21a2d8715c1bad52c5bf8fa014356bf23fa", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "2042b469be68653afcb2b7b38490c16369b4501a", "b483cabdce5562753abae51c144d26f3eaac75fb", "1c68746cb8bb97df3cc76dc125bb1998a064c93a", "1464629646aaf8662b725216ce8f6cb1443e6dc5", "3579d6615bedf827cd7e34f3b0e2feb35889b579", "217fa6474533b7ca0981aaa8600543afb308ab66", "064c377c070932a12377bf40101bdafdcb17fcf6", "2366a2c3c629bb20fdcb8f2b56136f3642a4edd6", "2fbbf89a921e4aa19ee3bfe73d0b34a6ad764656", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "60dcbce6532b6271b2825de468618617ea4e4c5d", "1201c82a903a45c780a06d6fa25ccb19037424ca", "f819e7daf5faa51290a68866a5e4308a78acf56e", "0065c8c9bf4961d637a69e26a8045074929a8cd3", "72657b0428f9b8f705546eb5a9147203a534d8f6", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "101f10b90ce859135868668478fbde5882c87458", "22a713d92a7f2a79f22c71e66b2511937b2a1a8f", "062008493d48ac414b45e3d989266d0574c1b3e5", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "017f9751ee75b4fe3db4a64a7e30c7067a1d75fb", "0df37799cedef8c3625cc554aee51e65cbcedd51", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "545748e39f008263395dfad5c2d2cd5b67bbfff1", "b1d14e2b28759afd361d50e14744224b654e205e", "3a6372cfcf55edb428943f25e62f9b6542bc7651" ], "paperAbstract": "Record-and-replay systems are useful tools for debugging non-deterministic parallel programs by first recording an execution and then replaying that execution to produce the same access pattern. Existing record-and-replay systems generally target thread-based execution models, and record the behaviors and interleavings of individual threads. Dynamic multithreaded languages and libraries, such as the Cilk family, OpenMP, TBB, etc., do not have a notion of threads. Instead, these languages provide a processor-oblivious model of programming, where programs expose task-parallelism using high-level constructs such as spawn/sync without regard to the number of threads/cores available to run the program. Thread-based record-and-replay would violate the processor-oblivious nature of these programs, as they incorporate the number of threads into the recorded information, constraining the replayed execution to the same number of threads.\n In this paper, we present a processor-oblivious record-and-replay scheme for such languages where record and replay can use different number of processors and both are scheduled using work stealing. We provide theoretical guarantees for our record and replay scheme --- namely that record is optimal for programs with one lock and replay is near-optimal for all cases. In addition, we implemented this scheme in the Cilk Plus runtime system and our evaluation indicates that processor-obliviousness does not cause substantial overheads.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018764", "http://www.cse.wustl.edu/~angelee/papers/po-replay.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27a28769c1e5902b2416c30bf06924bc3e2b9d70", "sources": [ "DBLP" ], "title": "Processor-Oblivious Record and Replay", "venue": "PPOPP", "year": 2017 }, "27a36203f14d73b95dfffec857b4ff923d9ef430": { "authors": [ { "ids": [ "31264686" ], "name": "Pandian Raju" }, { "ids": [ "22183919" ], "name": "Rohan Kadekodi" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" }, { "ids": [ "1804661" ], "name": "Ittai Abraham" } ], "doi": "10.1145/3132747.3132765", "doiUrl": "https://doi.org/10.1145/3132747.3132765", "entities": [ "Attribute\u2013value pair", "Benchmark (computing)", "Data structure", "Database engine", "HyperDex", "Key-value database", "LevelDB", "Log-structured file system", "MongoDB", "NoSQL", "Rewriting", "RocksDB", "Skip list", "Throughput", "YCSB" ], "id": "27a36203f14d73b95dfffec857b4ff923d9ef430", "inCitations": [], "journalName": "", "journalPages": "497-514", "journalVolume": "", "outCitations": [ "b38bfeba50bc0adde29082d7bc5ffbc390a215ec", "06bd4d2d21624c7713d7f10ccb7df61bf6b9ee71", "1820a34042d6371a9e20484b0c63b698eb522a6c", "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "b7c96b42556d800199209636d5358401c7043f41", "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "8542fdcb42804a31fedb86874e3c75cd03830d4d", "04f020a4ab2134db6f9e98eadf216d94d440414a", "243818e2fb740035f2f591fb1f50e40f49e0e7f2", "bd624db25340a435b121ba5cc9c9ca60437580f1", "76d4f2374e4f5a9dfa69df8a9a33f627fff7e861", "3a8c90ab13adb55e3610a020c69f03d72dfae274", "4523a15a22bcabad38c81e1eba13a1bddd6704c5", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "9aa0d7253574e50fe3a190ccd924433f048997dd", "93f45cdc7bb6d291d77ce1f987cd724e06d5dd3c", "199ac28b6bc68bf05c77645ffae7640df114bca5", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "b7437111bf04a803878ebacbc275ba3715bccb18", "1693e83e47a99667f4bd6ad6e24d8b62a1ba22c8", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "0139dceb6cef21b234e454d53154f30391495862", "d50b79bc49be772256a4e2b474f6cffd9bfa794c", "098d792d1783b5f6fc098203f71f21f5d053c653", "44028c00bf3872ae06aa46f569c3b9dceebdd909", "f4147b82166813bbe5dc01e9486664c273d1556c", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "5d02be0f9e558caaaeda15ff8abf11971074f70c", "b4087345c63a7b2412eeb31066b5e4bceadbbcb2", "125d42b58e0f871c65be5abddc81d5c188c37b20" ], "paperAbstract": "Key-value stores such as LevelDB and RocksDB offer excellent write throughput, but suffer high write amplification. The write amplification problem is due to the Log-Structured Merge Trees data structure that underlies these key-value stores. To remedy this problem, this paper presents a novel data structure that is inspired by Skip Lists, termed Fragmented Log-Structured Merge Trees (FLSM). FLSM introduces the notion of guards to organize logs, and avoids rewriting data in the same level. We build PebblesDB, a high-performance key-value store, by modifying HyperLevelDB to use the FLSM data structure. We evaluate PebblesDB using micro-benchmarks and show that for write-intensive workloads, PebblesDB reduces write amplification by 2.4-3x compared to RocksDB, while increasing write throughput by 6.7x. We modify two widely-used NoSQL stores, MongoDB and HyperDex, to use PebblesDB as their underlying storage engine. Evaluating these applications using the YCSB benchmark shows that throughput is increased by 18-105% when using PebblesDB (compared to their default storage engines) while write IO is decreased by 35-55%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132765", "http://www.cs.utexas.edu/~vijay/papers/pebblesdb-sosp17-slides.pdf", "http://www.cs.utexas.edu/~vijay/papers/sosp17-pebblesdb.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27a36203f14d73b95dfffec857b4ff923d9ef430", "sources": [ "DBLP" ], "title": "PebblesDB: Building Key-Value Stores using Fragmented Log-Structured Merge Trees", "venue": "SOSP", "year": 2017 }, "27a92bd3d6875fff1e3fbb8d18682aa43cdd8ec3": { "authors": [ { "ids": [ "1730671" ], "name": "Arpita Patra" }, { "ids": [ "39044260" ], "name": "Pratik Sarkar" }, { "ids": [ "29728149" ], "name": "Ajith Suresh" } ], "doi": "", "doiUrl": "", "entities": [ "Computation", "Cryptography", "Information retrieval", "Oblivious transfer", "Overhead (computing)", "Personally identifiable information", "Private information retrieval", "Secure multi-party computation", "Security parameter" ], "id": "27a92bd3d6875fff1e3fbb8d18682aa43cdd8ec3", "inCitations": [ "38a8ed0a65c8581e3b1d42ef32a7ab37cc9f98fc", "369756d09b28a70979483f5d786c35ec336c3b45", "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "940", "journalVolume": "2016", "outCitations": [ "58bb949ed93e2f21288638508e8818e58329cc41", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "588972fccb475cfaafdbb6efeef592eacadbe5f0", "547a94f8b16f521ee2eac299572a5c767d628289", "497bc6f8cdd2cb16c52d02208b4f3c82c6e72e61", "bc69e11d04f8f9a8b66c36b31468ab651bac51d7", "02477609a3568d7ab4c80bc3ca64f3d5bd0d8737", "23ec68ed03b485b645478a3f6905615617d905a6", "db0f82a419f89cda64fcbec2c58137862cd04475", "536754e19b8b2850497069a6e9c6b75d368621d4", "1d2a3436fc7ff4b964fa61c0789df19e32ddf0ed", "8f8cf4a3f24c22ba1f9e6a810ceb8b183c328e06", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "1eb0b401e7dbd8a4e638243713b39fffc991fe9f", "11f093a54c40d8ea8336d8e575d5ab717e0fbb51", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "42333e3f231bbfe508f6da6bad2feff9ae223113", "41d04d96d9da0cf3e940cd3e8038be84396488a4", "05c721f47d9a53a5739c88a14cb36baf12d2b0fa", "87260f234a7d604880ef4c37a1fed0a85d307586", "0affd3f06d26de268d81c288454dd7880e518f9e", "71f582193c434a57f0dd7e8d8da9bbb6cc86777e", "5a55071f59e367d12e759c346aa6ad3efade850a", "1890cecdbba895fbcf975c4aef1616e184e69abb", "12be907eaf5e2d5e2bd475857cbafcc821c3f31c", "f2c4398e489bed6cd2ac00492c762f6b112aa7bc", "71e166a85195362cd48311cfb473debc1614602a", "16d23baa55835434808a3420e0884e0dc44680f6", "de4b461d1f1cc7f7044c92b49c586a2463b28a8e", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "f2a37db2f2104375e6283d13b8bce6a4ee3d8bea", "0166c8b5c6445043b94fc7b62d145d0c3c8b6483", "19c3736da5116e0e80a64db35afe421663c4b4a8", "2a30e769b5b1787dc2b8dce5eaab253e9a4c4a76", "2eb315952f6a2e342b19cf95287c8a0b1f2c36fa", "47b8fd6ee8b07bd14de3c91df515b11180121de9" ], "paperAbstract": "Oblivious Transfer (OT) is one of the most fundamental cryptographic primitives with wide-spread application in general secure multi-party computation (MPC) as well as in a number of tailored and special-purpose problems of interest such as private set intersection (PSI), private information retrieval (PIR), contract signing to name a few. Often the instantiations of OT require prohibitive communication and computation complexity. OT extension protocols are introduced to compute a very large number of OTs referred as extended OTs at the cost of a small number of OTs referred as seed OTs. We present a fast OT extension protocol for small secrets in active setting. Our protocol when used to produce 1-out-of-n OTs outperforms all the known actively secure OT extensions. Our protocol is built on the semi-honest secure extension protocol of Kolesnikov and Kumaresan of CRYPTO\u201913 (referred as KK13 protocol henceforth) which is the best known OT extension for short secrets. At the heart of our protocol lies an efficient consistency checking mechanism that relies on the linearity of WalshHadamard (WH) codes. Asymptotically, our protocol adds a communication overhead ofO(\u03bc log \u03ba) bits over KK13 protocol irrespective of the number of extended OTs, where \u03ba and \u03bc refer to computational and statistical security parameter respectively. Concretely, our protocol when used to generate a large enough number of OTs adds only 0.011-0.028% communication overhead and 4-6% runtime overhead both in LAN and WAN over KK13 extension. The runtime overheads drop below 2% when in addition the number of inputs of the sender in the extended OTs is large enough. As an application of our proposed extension protocol, we show that it can be used to obtain the most efficient PSI protocol secure against a malicious receiver and a semi-honest sender.", "pdfUrls": [ "http://eprint.iacr.org/2016/940", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/fast-actively-secure-ot-extension-for-short-secrets/", "http://eprint.iacr.org/2016/940.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/27a9/2bd3d6875fff1e3fbb8d18682aa43cdd8ec3.pdf", "s2Url": "https://semanticscholar.org/paper/27a92bd3d6875fff1e3fbb8d18682aa43cdd8ec3", "sources": [ "DBLP" ], "title": "Fast Actively Secure OT Extension for Short Secrets", "venue": "NDSS", "year": 2016 }, "27afa8e3a231af1d322b55ba6c7c2db5d6e9db96": { "authors": [ { "ids": [ "2491442" ], "name": "Minho Bae" }, { "ids": [ "9159307" ], "name": "Junho Eum" }, { "ids": [ "9513414" ], "name": "Donghoon Kim" }, { "ids": [ "1678752" ], "name": "Sangyoon Oh" } ], "doi": "10.1109/ICPP.2017.29", "doiUrl": "https://doi.org/10.1109/ICPP.2017.29", "entities": [ "Algorithm", "Benchmark (computing)", "Database", "In-memory database", "Parallel computing", "Query optimization", "Resource Description Framework", "SPARQL", "Scalability", "UniProt" ], "id": "27afa8e3a231af1d322b55ba6c7c2db5d6e9db96", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "201-210", "journalVolume": "", "outCitations": [ "743376fa0ba57f5f9a46ca45e0315bb264833113", "1156f60e40548096df49528b1342bb3e88b0f378", "5f3f9223c5c9f896be099bc177929febad508407", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "5e9b8bb22ffc4d2432383109eed100e89c552546", "09bb5276b6997b97a46c4ee74ef76ba9e3bd83ff", "3b191386bf16b18dcf0c532b5fb40ac961a73ed7", "01e027c5c2e5375b88e9776c8d606675f51be827", "36e59e71a19fba9f61012e8653a9eee884eac93c", "8613beb2717b8a154ce44164facb054869d2fe67", "31f27864950a6c417cf996927b2d5558f70d2b14", "55d8efeaedfdf6db48b6e98b46c2a62876959ce9", "b8d502d42e82401e587638729a67cf5a07337c71", "0ff8f8980244f348e5b75849f831e8d6f4b4f89d", "fb90100135d1a47dca6ab6be046ea09754787449", "744eacc689e1be16de6ca1f386ea3088abacad49" ], "paperAbstract": "To overcome scalability and performance issues for process queries over a web-scale RDF data, various studies have proposed RDF SPARQL query processing algorithm using parallel processing manners. However, it is hard to resolve the scalability and performance issues together because the problem of communication overhead between nodes is closely related to the data distribution for parallel processing. For efficient RDF query parallel processing, it is essential to distribute and process data evenly while reducing communication overhead. In this paper, we propose RDF query parallel processing algorithms with RDF data partitioning technique to guarantee evenly distributed data over the cluster. We also propose our in-memory RDF query processing system as a form of Bulk Synchronization Parallel system to reduce network overhead. Our empirical evaluation results show that the proposed system outperforms a popular RDF-3X on LUBM benchmark and UniProt queries from 2.20 to 43.08 times. Especially, the effectiveness of the system improves significantly when the SPARQL queries are complex with high input and select.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.29" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27afa8e3a231af1d322b55ba6c7c2db5d6e9db96", "sources": [ "DBLP" ], "title": "High Performance Query Processing for Web Scale RDF Data using BSP Style Communication and Balanced Distribution", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "27b01b4f1c47c93913ef4efd9a88c091d8110529": { "authors": [ { "ids": [ "5060260" ], "name": "Ying-Ju Yu" }, { "ids": [ "2797270" ], "name": "Carole-Jean Wu" } ], "doi": "10.1109/IISWC.2017.8167768", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167768", "entities": [ "Augmented reality", "Autonomous car", "Central processing unit", "Control knob", "Dynamic frequency scaling", "Dynamic voltage scaling", "Embedded system", "Frequency scaling", "Limiter", "Mobile device", "Multi-function printer", "Response time (technology)", "Smartphone", "Transistor", "Wearable technology" ], "id": "27b01b4f1c47c93913ef4efd9a88c091d8110529", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "122-123", "journalVolume": "", "outCitations": [ "961c1840153791e10376630d6a428300389e98de", "53d2bf89576e95387a1004842722bd721d675c18", "938286fa80fe31fa3e35f450989f27659296f25f" ], "paperAbstract": "Modern high-performance electronics are embedded in smartphones, self-driving automobiles, and augmented reality wearable. These computing platforms are high performance and multifunctional. One of the major performance limiting factors in these platforms is the poorly designed thermal solution that is triggered to prevent overheating at the processor transistor junction and at the platform surface. Even though the dynamic voltage and frequency scaling (DVFS) feature widely available in modern processors can be used as a control knob in processor temperature management, its associated performance cost is tremendous and the response time for temperature control is significant.", "pdfUrls": [ "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Yu_iiswc2017_final.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167768", "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Yu_IISWC_2017_Poster.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27b01b4f1c47c93913ef4efd9a88c091d8110529", "sources": [ "DBLP" ], "title": "Understanding the thermal challenges of high-performance mobile devices with a detailed platform temperature model", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "27d25fe56defa415d6905af0699065eb165d2082": { "authors": [ { "ids": [ "2279739" ], "name": "Johannes Sp\u00e4th" }, { "ids": [ "2364480" ], "name": "Karim Ali" }, { "ids": [ "1752222" ], "name": "Eric Bodden" } ], "doi": "10.1145/3133923", "doiUrl": "https://doi.org/10.1145/3133923", "entities": [ "Alias analysis", "Benchmark (computing)", "DACAPO", "Data-flow analysis", "Dataflow", "Dataflow programming", "Experiment", "Pointer (computer programming)", "Static program analysis", "Typestate analysis" ], "id": "27d25fe56defa415d6905af0699065eb165d2082", "inCitations": [ "c452d481d20357297205e979137e4ef5a40468c9", "4b8c9eec7737918e3897f7ebcbc66cff2f3087b2" ], "journalName": "PACMPL", "journalPages": "99:1-99:27", "journalVolume": "1", "outCitations": [ "e8af823e0b25acfc8e41b59805e17d9d1b126990", "54943f66a73914cb84609cd25660f0f192db6459", "41755135116c0c137f0f64957d35c8ef9f11ab3d", "1789032a8348fccf2a3daf68bc06ed4608dba25b", "4c9dd5c75569d7bc0531d71aa309504ba965ac76", "5d4927ab2a6a43f0789c8008d9cff191a745d800", "ffb2003551fa97cde9d82f8219fb2754090e37b3", "24b653cce07313f3d38ec8755db3e384ccee70df", "0372c45058f84f08392ab66fa0b7b22cbcd716d0", "00f8127651d4ced1a874758f4af5b18bb594812c", "c7bce09cc041afb5189d5428f2988bba1889d7a3", "3990ed581a50f41e921f48aec510cd543154c92d", "115be3be1d6df75ff4defe0d7810ca6e45402040", "a9f9e9e5dfd9fc675c1c41a8e901f124d4b5d793", "85ff7e25c39d216bd50ac6eb89e335ca7aef43f4", "24af29d2c2f88e13e53296e2f0e10595663fb019", "1b12eb42a9e04af626c7ed266b2e299d7f6f96a3", "394635721bb5e72ccfb0289fa9b7b0f3a62b7612", "210252ed7f25201c5d33d9f7791f3cab25bc952b", "13ecf9e9ef6fad925d31c9055e073a2323b51f38", "837f2631ea9250d7b13e13a54d95e97ea94440c1", "30f02b5309d2d5c84d2ac76ebb1afe3023220164", "67816566ddb1012ba702880d9b169eff691c24ce", "5abd03095061c25ab7c4fab6b33a6ceb999c78e3", "71f8163801980fbaa494cb8c149bd7388034c2ba", "00a9ba0063d34ec56792849a67ef57b4601becbb", "47766fe0dd7fa54b2f4c37b0c11900c8ec3f2e36", "5cb216302bdebaec708f705f83b317eeccf73753", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "16a70aa4c8332ff823bf6053c7fff8271ccaa05a", "3df6243260b3cdc0882eaf1a01a28a778482f146", "2e64703efa2abf0d4e574c2c6249478a7cbbb277", "027d6c947ceac524bc076382a69e7e7046772ef7", "170746e36dfe606ca448ac4ca518b91bf6f828d0", "80a36a56472c4929ea9daf59516f4502320d4764", "eaf8a0bfb24a823de1352b92d42e43a4479acf99", "15f8f3f80c22008cb5f95e870403227d38420c4f", "38b7e9721cc3e326580465deaf0f0028b92afe6a", "eac8f58bfba1bb5d7d6c469332fa43225ad1e7a1" ], "paperAbstract": "Program analyses frequently track objects throughout a program, which requires reasoning about aliases. Most dataflow analysis frameworks, however, delegate the task of handling aliases to the analysis clients, which causes a number of problems. For instance, custom-made extensions for alias analysis are complex and cannot easily be reused. On the other hand, due to the complex interfaces involved, off-the-shelf alias analyses are hard to integrate precisely into clients. Lastly, for precision many clients require strong updates, and alias abstractions supporting strong updates are often relatively inefficient. \n In this paper, we present IDEal, an alias-aware extension to the framework for Interprocedural Distributive Environment (IDE) problems. IDEal relieves static-analysis authors completely of the burden of handling aliases by automatically resolving alias queries on-demand, both efficiently and precisely. IDEal supports a highly precise analysis using strong updates by resorting to an on-demand, flow-sensitive, and context-sensitive all-alias analysis. Yet, it achieves previously unseen efficiency by propagating aliases individually, creating highly reusable per-pointer summaries. \n We empirically evaluate IDEal by comparing TSf, a state-of-the-art typestate analysis, to TSal, an IDEal-based typestate analysis. Our experiments show that the individual propagation of aliases within IDEal enables TSal to propagate 10.4x fewer dataflow facts and analyze 10.3x fewer methods when compared to TSf. On the DaCapo benchmark suite, TSal is able to efficiently compute precise results.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133923" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27d25fe56defa415d6905af0699065eb165d2082", "sources": [ "DBLP" ], "title": "IDEal: efficient and precise alias-aware dataflow analysis", "venue": "PACMPL", "year": 2017 }, "27f503611a6020a2c6b196042ec63be0c79306ba": { "authors": [ { "ids": [ "26642757" ], "name": "Nitin Agrawal" }, { "ids": [ "3002190" ], "name": "Ashish Vulimiri" } ], "doi": "10.1145/3132747.3132758", "doiUrl": "https://doi.org/10.1145/3132747.3132758", "entities": [ "Algorithm", "Anomaly detection", "Approximation algorithm", "Best, worst and average case", "Data compaction", "Geocaching", "Machine learning", "Petabyte", "Range query (data structures)", "Real-time computing", "Synthetic data", "Terabyte", "Time series" ], "id": "27f503611a6020a2c6b196042ec63be0c79306ba", "inCitations": [], "journalName": "", "journalPages": "647-664", "journalVolume": "", "outCitations": [ "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "0b9c6fe7beb3971b27aff8c5aa5e133de74316a4", "af216ef386aaf509725f030e7667d1b58ef5521a", "02d1105bec3877ed8cd2d28f76b67ae8ba3f2331", "ab1f816ce79817a09487ea7866c95ce930d37497", "1a1b25efe423c7c864dcee8d5d33ab52e217aa14", "31b963f48ba38f1f9c5cc240f43331b07229861e", "044a6151d054c42b6a617a6bbb35daba41a7e687", "e605780b285a8d17174eddd9c35dc4b67cefa890", "37861bb8d8daeab11d379a012ab526222a3f9990", "37e0d25940bd49022c41e63909532acd88eb16b9", "1e96b0c0ac74070a984fec94f085109839d842a9", "619b3051795d04ac00e8c91e1966befea5263739", "14a3a7534a52b0908939374aecd804b9632ad133", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "b1d80550be78da0548b9f61cb0cc5cc59d6b3133", "3946e3a4a4f5b42d55859153e98d3e83151303bb", "2d6cb831ecb36ce45f7b767d16c207e03733dfd6", "50f5bc875c5c58cce53ba1a9e01f167bd597b00c", "52f12e65180f2ca8fa8536616f9ae83248ad7ad2", "595bdc4d40c7696b829f8f2e45645fe0bb6154ef", "8451c2812a1476d3e13f2a509139322cc0adb1a2", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "f6bf07a71c568f32861bd10bd9332ad37e4b08ee", "ec5540f6da71eb79a18bbcacb48b8ea847cad120", "048ff07f6f77b45d173dc21849ba5285c9132d78", "fff9acdff3b88efc12e196d39ecf27266e9e6c59", "376dfa43778847987e2e3ac17e4df12418091956", "3d11c0ce702f416401ec383e7cecd82802bc81b2", "73bd5eed405efe923144c6c34a28b76d328c5ade", "99a297647eaa430d0587774f065bd542b859885e", "a11b243c571ade72c1be5bbb4105b00388174bd6", "1b753f82e08a04d1442f357edbfd03385ef788f7", "38097f2792c53d237db4cf4d94ccf90bf094a86c", "a6855b98191b2b25af6b27b950ffb5fa5e90dad3", "a6e0e547d137327de6e808dd59a81edcad07634b", "0547b04865f84a41862d24340efd16d18fa03a24", "b1582e2a952924fecb86a8baa7f5e90902c3bbfb", "094ca99cc94e38984823776158da738e5bc3963d", "07641ebcb7726102c37f00525a0a7a3c859bf036", "eaf504d40ba3094467bdf0d355be793270f5f5c9", "4f53b1fbf0b21c75c11dc77c98a2ec08815227a0", "10a210177786481570c8e87e99366c7e53ac80d3", "45998ae76d590314e6fd930a02d9d49556bc5a64", "00141772c2343958b8aae54b42a6143e917f368a", "28b768c625b85cb59608df8fd55282e7fbb3c038", "411eb6534d39a37ed43443ba1d2e168c73171330", "da01f7fcc5c7eeba75bc09a41fdd946e65210090" ], "paperAbstract": "SummaryStore is an approximate time-series store, designed for analytics, capable of storing large volumes of time-series data (~1 petabyte) on a single node; it preserves high degrees of query accuracy and enables near real-time querying at unprecedented cost savings. SummaryStore contributes time-decayed summaries, a novel abstraction for summarizing data streams, along with an ingest algorithm to continually merge the summaries for efficient range queries; in conjunction, it returns reliable error estimates alongside the approximate answers, supporting a range of machine learning and analytical workloads. We successfully evaluated SummaryStore using real-world applications for forecasting, outlier detection, and Internet traffic monitoring; it can summarize aggressively with low median errors, 0.1 to 10%, for different workloads. Under range-query microbenchmarks, it stored 1PB synthetic stream data (10241TB streams), on a single node, using roughly 10 TB (100x compaction) with 95%-ile error below 5% and median cold-cache query latency of 1.3s (worst case latency under 70s).", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132758", "http://pages.cs.wisc.edu/~nitina/Publications/summarystore-sosp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/27f503611a6020a2c6b196042ec63be0c79306ba", "sources": [ "DBLP" ], "title": "Low-Latency Analytics on Colossal Data Streams with SummaryStore", "venue": "SOSP", "year": 2017 }, "28013fc6979d483895e2998d05a7373807290d3e": { "authors": [ { "ids": [ "2457887" ], "name": "Bum Jun Kwon" }, { "ids": [ "35984826" ], "name": "Virinshi Srinivas" }, { "ids": [ "2313625" ], "name": "Amol Deshpande" }, { "ids": [ "3343194" ], "name": "Tudor Dumitras" } ], "doi": "10.14722/ndss.2017.23220", "doiUrl": "https://doi.org/10.14722/ndss.2017.23220", "entities": [ "Antivirus software", "Client-side", "Download", "Ecosystem", "Human\u2013computer interaction", "Lockstep (computing)", "Malware", "Patch (computing)", "Potentially unwanted program", "Server (computing)", "Server-side", "Streaming media", "Trojan horse (computing)", "Underground", "Unsupervised learning" ], "id": "28013fc6979d483895e2998d05a7373807290d3e", "inCitations": [ "1106434ab977f1dc677431d94c74b527de01492d", "16b4a0d02f844e27a30dd722811442dd8e6a580d", "1f16b4f47f97a2c48f7f414f694e4954932937dd" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1611.02787", "outCitations": [ "365390ced4790d136292829b7b6a2eec61b57eeb", "192adf536648ad635ecb3768b943895579bcc7fe", "209eb826a40957a5d712dc35546954d88fb5c0ff", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "3871a09a4c64716182f2238c4144b223faa39202", "0c7c58bf7addb18b432ef36dc0e662624d7f3e4b", "e89e5ed31995a90b2454044325c92e138094d273", "2ff9c10a0a8f43306f3a0492f8d6eca744d4e7c7", "1d94cd572d4955b3e1776cae52861c283150fa76", "7e2cd5ee3395539b83b54cd93951637398423063", "1a16196ac67b8fd68553f6e02d290c880bb9cf3e", "6470930ff36cde541c837bedcf17c20490fedbbc", "b63dfcb71acddf00c88c2de0291291896069fde3", "0a645cbb39ac37929a2e6ba55fc248dc4ca967e4", "254687dec6e5456fa289826da6558186ef2cc24f", "31e618da461317bcce5799de7ba1b2ef0d5488b2", "3096595380cfc118bb163b74897e13a84d094432", "0796bb6c803e4256d7bdc0885c6a26d058da3319", "07cbb544b23a4f0e914863cb17afa4eafd9f59a8", "13e00ac021b75f895bfc963f5ecac42ed3dae86c", "2152f9f91e798c23715fdce699b6a8f0f8d43170", "35df1dcd036d8720525f95ee9330ed496f3dab42", "6e9f6466a125587b4eeeb3845a5d8afe8b1e902c", "01dd19809cd812ef9109c1870dbb9eeda82f846a", "71e5a6c95ff476c303c3adeae0c1a4387485f733", "3f7060aa21f5087cb0a83b4f6576fad5b2bf9055", "83e8d61a3d767e3b6c6b8dce63c35bc709fddc49", "996ce6a529c3d7652a304ca05bf9d32d3db44e95", "08ae384c2c68333419f76bcb5f14dc2ba2ef8d33", "b19c2aa855c247461d9caa48ea94b0b39d650001", "0be628988f8ee0beaad57d68cbae8b635f0b555c", "3e60f205a6b96dc709462e2f3c485a66ee3d2f78", "12de03e2691c11d29a82f1c3fc7e97121c07cb5b", "a200a5e0211d3c4cb3ab1f3358fd8c6dcfc3762f", "0d14221e3bbb1a58f115a7c7301dc4d4048be13f", "0597732c00590a4c1ef9f718b650a62a03daf97e", "39b24b34ee1b0071f3fade608f3b2d9fa41fd050", "20a56bd9359641de863ffc55a53436e0b7a32c16" ], "paperAbstract": "The growing commoditization of the underground economy has given rise to malware delivery networks, which charge fees for quickly delivering malware or unwanted software to a large number of hosts. A key method to provide this service is through the orchestration of silent delivery campaigns. These campaigns involve a group of downloaders that receive remote commands and then deliver their payloads without any user interaction. These campaigns can evade detection by relying on inconspicuous downloaders on the client side and on disposable domain names on the server side. We describe Beewolf, a system for detecting silent delivery campaigns from Internet-wide records of download events. The key observation behind our system is that the downloaders involved in these campaigns frequently retrieve payloads in lockstep. Beewolf identifies such locksteps in an unsupervised and deterministic manner, and can operate on streaming data. We utilize Beewolf to study silent delivery campaigns at scale, on a data set of 33.3 million download events. This investigation yields novel findings, e.g. malware distributed through compromised software update channels, a substantial overlap between the delivery ecosystems for malware and unwanted software, and several types of business relationships within these ecosystems. Beewolf achieves over 92% true positives and fewer than 5% false positives. Moreover, Beewolf can detect suspicious downloaders a median of 165 days ahead of existing anti-virus products and payload-hosting domains a median of 196 days ahead of existing blacklists.", "pdfUrls": [ "http://arxiv.org/abs/1611.02787", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_03B-5_Kwon_paper.pdf", "https://arxiv.org/pdf/1611.02787v1.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/catching-worms-trojan-horses-and-pups-unsupervised-detection-silent-delivery-campaigns/", "https://arxiv.org/pdf/1611.02787.pdf", "http://www.umiacs.umd.edu/~tdumitra/papers/NDSS-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/90a7/43fb3dcc2f76b8418dd67cabb5650d24e7e6.pdf", "s2Url": "https://semanticscholar.org/paper/28013fc6979d483895e2998d05a7373807290d3e", "sources": [ "DBLP" ], "title": "Catching Worms, Trojan Horses and PUPs: Unsupervised Detection of Silent Delivery Campaigns", "venue": "NDSS", "year": 2017 }, "2804bcc9df4352c2da1367f182a54e7c67a160ec": { "authors": [ { "ids": [ "2576892" ], "name": "Nathan Beckmann" }, { "ids": [ "39783437" ], "name": "Daniel S\u00e1nchez" } ], "doi": "10.1109/HPCA.2017.43", "doiUrl": "https://doi.org/10.1109/HPCA.2017.43", "entities": [ "L\u00e1szl\u00f3 B\u00e9l\u00e1dy", "Markov chain", "Markov decision process" ], "id": "2804bcc9df4352c2da1367f182a54e7c67a160ec", "inCitations": [ "237d67f89d957068b83b3b3930ab7bf13b8d0e4f", "f06233da50ed916579f5f536da5a66fd3c4c0ce8", "6f80eed89f96c8b91d08b2de387c4ac69c4b5368", "53955788f9b2afa2175309b81b60ea884322de4b", "6f0c1898575d56d1c1073b1f2eb6cba5bc931005", "f04671d1702dc404fa6c27d2ada5c70666a9abe0" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "109-120", "journalVolume": "", "outCitations": [ "1aa978bdf1f66952171b4b176c9200f1a286b842", "300130e8d199184c8c7921f44f338fae47150152", "3ee47780011ee618bd5a64624a662375e1958e0a", "9451f420a5d39d75d1e6c2cbbbae4544afb412a6", "bc4fbbaaf1ede8d8aff16a10243226419fc32cf8", "3c0d4b5f9085b659318cc74157aaf255194a5063", "74711ebc709fb794ddbaefa75c9ec3b48e5a2a17", "2d99655efd8fff69896b82b2f1eccef37a8c0949", "35c329ee747093757021ffa76eb5d3719aa0c3a5", "06125169a21ef17641d7199544417b21c378eede", "0ad250e85e62c5675cafb08d11632f79ecad589c", "9be5ca872bf2e2e1c1d7979fb79ca6c443902058", "0717371b254df3e466a11d1965c2c9541a43b7a3", "a169ca8993abcfa03eb22c50a4227983d740b31a", "7779c10dfa1f84953016b6292844815c5faf84f5", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "057ecc6780a2b2cb533884167962654451e4960b", "813007c4ca412b1b3d8eba4aad66e53ce3cd77a8", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "0af7a3650e3c905f0afb0e1b5d340552694f93d5", "993fd60f78603446088ad145a509ad3894095253", "55043afbb87e38627778a323dfdc35a55357e47d", "3bf23f74bf33ed52f7c28587fab315610b27221a", "8671317d25f917af263b457612f959823d5c86b1", "398cc68e6df0cffb5b06da2ab39b004bec8ad9ab", "05fbc97ba3af4414fd7f1cb5bc56f0cc50b3d554", "940cfddac96674a154d92c91c1ae4dae61865d76", "6d5099039729d930841c21893c5585a194d90a79", "c3334cbb903d860aa7cee43b5435e291e9422048", "3bbb5daf6e7be50c308b77730efb13b7bcf500b7", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "31b730826ca7db102f193b3b6f2cee62036ec205", "0ba50060cc68c61c0066335a947841965b03b6ab", "006b9d556986e23b994b2d68bd98d575ddb9cf3e", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "14700361bae459cbc0ad07019da44ff7db4f5b71" ], "paperAbstract": "Much prior work has studied cache replacement, but a large gap remains between theory and practice. The design of many practical policies is guided by the optimal policy, Belady's MIN. However, MIN assumes perfect knowledge of the future that is unavailable in practice, and the obvious generalizationsof MIN are suboptimal with imperfect information. What, then, is the right metric for practical cache replacement?We propose that practical policies should replace lines based on their economic value added (EVA), the difference of their expected hits from the average. Drawing on the theory of Markov decision processes, we discuss why this metric maximizes the cache's hit rate. We present an inexpensive implementation ofEVA and evaluate it exhaustively. EVA outperforms several prior policies and saves area at iso-performance. These results show that formalizing cache replacement yields practical benefits.", "pdfUrls": [ "https://people.csail.mit.edu/sanchez/papers/2017.eva.hpca.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.43", "https://www.cs.cmu.edu/~beckmann/publications/papers/2017.hpca.eva.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2804bcc9df4352c2da1367f182a54e7c67a160ec", "sources": [ "DBLP" ], "title": "Maximizing Cache Performance Under Uncertainty", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "2812b09ae0f6b27dd8a61b738cd6a45e5d729203": { "authors": [ { "ids": [ "23144610" ], "name": "Varun Velamuri" } ], "doi": "10.1007/978-3-319-64203-1_41", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_41", "entities": [ "Algorithm", "Binary search algorithm", "Blocking (computing)", "Compare-and-swap", "Data structure", "Dictionary", "Linearizability", "Lookup table", "Non-blocking algorithm", "Radix tree", "SWAP (instrument)", "Search tree", "Tree (data structure)", "Trie" ], "id": "2812b09ae0f6b27dd8a61b738cd6a45e5d729203", "inCitations": [], "journalName": "", "journalPages": "565-579", "journalVolume": "", "outCitations": [ "00b3ebd315991e5b5f4e6beec2e1488281368028", "478e5f62fd9a1cd9e2896b3c2d620089ebee7554", "38611b424808954be2c1375da1a873b1e2487ace", "753b63625daae0f39aba2c074e2d8254a6dcf6b3", "094dd52d6f308e0dbd6f12a7d5722303e3ba668a", "c37a726cde896633a469c33614bcd0034d6d11af", "58da996efd7320d1e484263c97c930c8979c474f", "938286fa80fe31fa3e35f450989f27659296f25f", "984d45494026f7a2fc9c4193ee65b5ef35d937ad" ], "paperAbstract": "Radix trees belong to the class of trie data structures, used for storing both sets and dictionaries in a way optimized for space and lookup. In this work, we present an efficient non-blocking implementation of radix tree data structure that can be configured for arbitrary alphabet sizes. Our algorithm implements a linearizable set with contains, insert and remove operations and uses single word compare-and-swap (CAS) instruction for synchronization. We extend the idea of marking the child edges instead of nodes to improve the parallel performance of the data structure. Experimental evaluation indicates that our implementation out-performs other known lock-free implementations of trie and binary search tree data structures using CAS by more than 100% under heavy contention.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_41" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2812b09ae0f6b27dd8a61b738cd6a45e5d729203", "sources": [ "DBLP" ], "title": "Efficient Non-blocking Radix Trees", "venue": "Euro-Par", "year": 2017 }, "2817df10c4ffe29482928cb97b8ee89d8560b4cd": { "authors": [ { "ids": [ "1729924" ], "name": "Pedro Fonseca" }, { "ids": [ "3189426" ], "name": "Kaiyuan Zhang" }, { "ids": [ "31825486" ], "name": "Xi Wang" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" } ], "doi": "10.1145/3064176.3064183", "doiUrl": "https://doi.org/10.1145/3064176.3064183", "entities": [ "Correctness (computer science)", "Distributed computing", "Formal verification", "Software bug", "Trusted Computing", "Trusted computing base" ], "id": "2817df10c4ffe29482928cb97b8ee89d8560b4cd", "inCitations": [ "e4a3a1ce28b472fdeb3910509784186d02ac8bf6", "2f5536f7cdb724d73932d45422a5f7c42b692437", "03e89626cbb864fb1243b4ee8b4037020a9250eb", "22f88f630451cdb6d1ee1a632df81a5e2dd50285" ], "journalName": "", "journalPages": "328-343", "journalVolume": "", "outCitations": [ "96d2a84e57ff1475394b7702473f3e8e868feb68", "0bee387ff5485315e9212c2195c71c8d0e23ea1a", "16a455aeacd14529bee92b0c197619fa2d173151", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "ad8c8feae36e649d885af3df3d427a3ea40651c2", "79aec2093b2a1b0197e7d145b5cf86abc70fee3e", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "21474d50689bb4b4af6399c4bae2cb612f382713", "06567663b31f7b8cf1de3d5f2ca6c79422ef60c8", "034a0ea624561648966f0a70b70f92574b11946f", "a39e7e753786fc5fa23eb0d8f69e0419c7ea63a2", "013989a60ef17d389252530bc25e68005376272b", "883a595fd76cb4dc0509a1005040286b31610059", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "1d0f2662cca5c859419b78fea468f4bc2f39e87d", "49496db3f17dea722e76d323e603ee8d4a7c1ba0", "2c519e5807b5f13a7c1d8ed6a50f092c3f19ac2a", "05a618847e4f08e5bca29dff732757779722b2e0", "4e177c25f97220d33dcc222485d951c817a77750", "971563f7acd4250bbafdb5e90160dcd4dc6110e9", "867ad29b3392965e40ede63f3cdbfdedac1c781b", "1b778694a913387cc89deb8cb2418c917540bcc9", "05c34e5fc12aadcbb309b36dc9f0ed309fd2dd50", "1664b784dd7d446ee8838e0eec5b980f61792007", "2184769d4176e19df63a273ba6049dfc7e1a655d", "3741d459138c0bc1a3eff051091e6704112f0e4d", "0314da057cfbf61d752674b7d8527e8be9ddc3b8", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "21305504e7ccde5505a4281419a5799bccc42378", "42142c121b2dbe48d55e81c2ce198a5639645030", "20f5f8733134d87041b95b742d613051a1fb3fdb", "277f3ec379d9ac15eb4ae295f0928d98b23555ec", "046af23d437d93cc7d03b0df2b773d6fb6f046c3", "5e09e0aa2ee9d9acce592ca281e249b05771951f", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "328a3437da451e77c02bdc9b660c720c4d5f67ed", "2a10aedda169c9ae4e2b6e478d47a36f43f3a489", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "cb98a28bbbe0e91a04dc43aaf6c8ba5c7f786dec", "4c8ad20e8d682d9956dad6a68d2e2a022773a959", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "8318fa48ed23f9e8b9909385d3560f029c623171", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "e50c67079d7e75dea747a728b7b79b3f7cf24445", "68694b8c85fa5b88c7d150fae7a8fa6ab9386b5a", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "3a33dad8e9d12835fca95deec73e841096c8bec0", "155ca30ef360d66af571eee47c7f60f300e154db", "1f7e5e582663868ed2f6763f98066ca278177a61", "bed6d0e530f20332c284a463c754ce1d304aca38", "36222f8eb2ccf21ca345e15186cea64506581543", "0f5bd2edf5b1ce8815e34f6090d726c35d9331d5" ], "paperAbstract": "Recent advances in formal verification techniques enabled the implementation of distributed systems with machine-checked proofs. While results are encouraging, the importance of distributed systems warrants a large scale evaluation of the results and verification practices.\n This paper thoroughly analyzes three state-of-the-art, formally verified implementations of distributed systems: Iron-Fleet, Verdi, and Chapar. Through code review and testing, we found a total of 16 bugs, many of which produce serious consequences, including crashing servers, returning incorrect results to clients, and invalidating verification guarantees. These bugs were caused by violations of a wide-range of assumptions on which the verified components relied. Our results revealed that these assumptions referred to a small fraction of the trusted computing base, mostly at the interface of verified and unverified components. Based on our observations, we have built a testing toolkit called PK, which focuses on testing these parts and is able to automate the detection of 13 (out of 16) bugs.", "pdfUrls": [ "http://6826.csail.mit.edu/2017/papers/empirical-study.pdf", "http://doi.acm.org/10.1145/3064176.3064183", "http://locore.cs.washington.edu/papers/fonseca-dsbugs.pdf", "http://homes.cs.washington.edu/~pfonseca/papers/eurosys2017-dsbugs.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2817df10c4ffe29482928cb97b8ee89d8560b4cd", "sources": [ "DBLP" ], "title": "An Empirical Study on the Correctness of Formally Verified Distributed Systems", "venue": "EuroSys", "year": 2017 }, "281ba1ce98fdb0e68093a9fcd4b9f01dda5defbf": { "authors": [ { "ids": [ "9763760" ], "name": "Hyunsub Song" }, { "ids": [ "34278137" ], "name": "Young Je Moon" }, { "ids": [ "8568662" ], "name": "Se Kwon Lee" }, { "ids": [ "1719212" ], "name": "Sam H. Noh" } ], "doi": "10.1109/ISPASS.2017.7975268", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975268", "entities": [ "Byte", "Byte addressing", "Data General Nova", "Dynamic random-access memory", "Key-value database", "Linux", "Linux", "Memory bus", "Persistent memory" ], "id": "281ba1ce98fdb0e68093a9fcd4b9f01dda5defbf", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "33-42", "journalVolume": "", "outCitations": [ "129f11028220d87525b37b4605a2c04eb26f3e73", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "fae8a785260ac5c34be82fca92a4abef4c30d655", "dc9f1fea06ff5e3d091137892109cc300e4fa828", "350f6ae536a4e8787f7f4513ba1be4a7d2d3b37b", "a2f3bb40653499eeb33babacf69579b5ea9d20e1", "48e57d5dad9d2f79116ea7c0af92dbc7ab7afd40", "69e2e47cfcf7115c74332a030b70c58d318427f7", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "05a1357946de5eca42a477b7b268db4944219a2e", "d7601446b81ed14d5e19e54dc51e686714f94fe5", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "793f5e737284925a176f8ec82b3bb0d2178bb330", "bfab13cd7b084743ac5a6d9dd33b80c889543b10", "9aa0d7253574e50fe3a190ccd924433f048997dd", "72a715a49360245a4e8875d782815e66e44bf8b4", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "9183cde02e4306828089fb8adae74736a9df3ceb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "94783d113951822195d4ba44599a8fcbdef9d4bf", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "0204f40221260d00c5ee63646560a40dcd7d97d1", "87eb6044798792bb4fffd2dcb477bc8ad0982268" ], "paperAbstract": "The advent of Persistent Memory (PM), which is anticipated to have byte-addressable access latency in par with DRAM and yet nonvolatile, has stepped up interest in using PM as storage. Hence, PM storage targeted file systems are being developed under the premise that legacy file systems are suboptimal on memory bus attached PM-based storage. However, many years of time and effort are ingrained in legacy file systems that are now time-tested and mature. Simply scrapping them altogether may be unwarranted. In this paper, we look into how we can leverage the maturity ingrained in legacy file systems to the fullest, while, at the same time, reaping the high performance offered by PM. To this end, we first go through a thorough analysis of legacy Ext4 file systems, and compare it with NOVA, PMFS, and Ext4 with DAX extension, which are new PM file systems available in Linux. Based on these analyses, we then propose the Persistent Memory Adaptation Layer (PMAL) module that is lightweight (roughly 180 LoC) and can easily be integrated into legacy file systems to take advantage of PM storage. Using Ext4, we show that the performance of PMAL integrated Ext4 is in par with PM file systems for the Filebench and key-value store benchmarks.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975268" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/281ba1ce98fdb0e68093a9fcd4b9f01dda5defbf", "sources": [ "DBLP" ], "title": "PMAL: Enabling lightweight adaptation of legacy file systems on persistent memory systems", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "2830923a31efb38a6e6dca45c9d61c9afe1f044d": { "authors": [ { "ids": [ "2222498" ], "name": "Taejoong Chung" }, { "ids": [ "2321783" ], "name": "Roland van Rijswijk-Deij" }, { "ids": [ "2450059" ], "name": "David R. Choffnes" }, { "ids": [ "36147319" ], "name": "Dave Levin" }, { "ids": [ "1711252" ], "name": "Bruce M. Maggs" }, { "ids": [ "1729928" ], "name": "Alan Mislove" }, { "ids": [ "35497150" ], "name": "Christo Wilson" } ], "doi": "10.1145/3131365.3131373", "doiUrl": "https://doi.org/10.1145/3131365.3131373", "entities": [ "Cognitive dimensions of notations", "Domain Name System Security Extensions", "Experience", "Recommender system", "Scalability", "Software deployment", "Vulnerability (computing)" ], "id": "2830923a31efb38a6e6dca45c9d61c9afe1f044d", "inCitations": [ "8a0cd805e9cafc4198da4120823686042a024420", "06b9f583f386e3db629c54abe5244a0d5324033e" ], "journalName": "", "journalPages": "369-383", "journalVolume": "", "outCitations": [ "0228d21869d7d1e6d1acdaf7d7086d9e7d1327a0", "ecc86a90f0f8919ccd5e88aa51e1cc055c81abda", "264e231658d8044b9916e289b3abcf745e0371a8", "131a3f838b98e1f292f898b8f4cf910ad00b1ca4", "933a74b8059db4a485660170111f33ddfa3fbaf8", "60ffc824f8caba02ee3f2a7310a5e72df504f729", "84037ccdd1b5eacb404153194792c026b692a63f", "1acb9ed7ee3486ba3bde813335eb03f204669338", "5de2f3e2ccfb52188028b01ab1f686a0ebcdfad0", "061ad7b7944264e1df20e0ad8c2cf09db48b67ba", "434a84fc7a42efbb48256ed1a809af084ff170f5", "1e03a724e948e66165bb76883048902bb665d3c2", "4855dbfc2b5e2e00dabce672ba7d2716c3d77347", "18b9ea8c9d19db8120527e9dea60391d299c8777", "e95624f0ca607694f801e9de396f7361017e2254", "5553ea8b369f87931529439f10aabe3ca5fa65dc", "08a91781b467508a6ebcf9b3031c7696a5bde659", "09f6e5f085965ccf8316c4450554b0c8db3d48cb", "4d11317f62687bc541ce79fa8a54a8c2aad0365b", "8deb460b447b72dda50f3d6a71d0fc80b3c3bfa2" ], "paperAbstract": "The Domain Name System (DNS) provides a scalable, flexible name resolution service. Unfortunately, its unauthenticated architecture has become the basis for many security attacks. To address this, DNS Security Extensions (DNSSEC) were introduced in 1997. DNSSEC's deployment requires support from the top-level domain (TLD) registries and registrars, as well as participation by the organization that serves as the DNS operator. Unfortunately, DNSSEC has seen poor deployment thus far: despite being proposed nearly two decades ago, only 1% of .com, .net, and .org domains are properly signed.\n In this paper, we investigate the underlying reasons why DNSSEC adoption has been remarkably slow. We focus on registrars, as most TLD registries already support DNSSEC and registrars often serve as DNS operators for their customers. Our study uses large-scale, longitudinal DNS measurements to study DNSSEC adoption, coupled with experiences collected by trying to deploy DNSSEC on domains we purchased from leading domain name registrars and resellers. Overall, we find that a select few registrars are responsible for the (small) DNSSEC deployment today, and that many leading registrars do not support DNSSEC at all, or require customers to take cumbersome steps to deploy DNSSEC. Further frustrating deployment, many of the mechanisms for conveying DNSSEC information to registrars are error-prone or present security vulnerabilities. Finally, we find that using DNSSEC with third-party DNS operators such as Cloudflare requires the domain owner to take a number of steps that 40% of domain owners do not complete. Having identified several operational challenges for full DNSSEC deployment, we make recommendations to improve adoption.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final53.pdf", "http://doi.acm.org/10.1145/3131365.3131373", "https://users.cs.duke.edu/~bmm/assets/pubs/ChungR-DCLMMW17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2830923a31efb38a6e6dca45c9d61c9afe1f044d", "sources": [ "DBLP" ], "title": "Understanding the role of registrars in DNSSEC deployment", "venue": "IMC", "year": 2017 }, "2834d13e15b530a15957c2ef13cabad689b86aa6": { "authors": [ { "ids": [ "1843219" ], "name": "Yuseok Jeon" }, { "ids": [ "10576707" ], "name": "Priyam Biswas" }, { "ids": [ "2925856" ], "name": "Scott A. Carr" }, { "ids": [ "2767582" ], "name": "Byoungyoung Lee" }, { "ids": [ "2694341" ], "name": "Mathias Payer" } ], "doi": "10.1145/3133956.3134062", "doiUrl": "https://doi.org/10.1145/3133956.3134062", "entities": [ "Adobe Flash", "Apache Xerces", "C++", "Class hierarchy", "Compiler", "Dangling pointer", "Data structure", "Firefox", "Google Chrome", "Optimizing compiler", "PHP", "Placement syntax", "Qt (software)", "Software bug", "Type conversion", "Type punning", "Vector (malware)", "Virtual machine" ], "id": "2834d13e15b530a15957c2ef13cabad689b86aa6", "inCitations": [], "journalName": "", "journalPages": "2373-2387", "journalVolume": "", "outCitations": [ "6a8f65381a627a2db6c756a7185d9106f0acefec", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "255bdcb05805c97d973081b59bc61c649263ceae", "2c3d491a3bea2c1016587aa8f9fee21293a84856", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "686150e2179840ed40a0166cba6c5d507f3aa49c", "683a5b90b6cc9e7c52fab36bf8086bdfb1bc5541", "0e039df712774fcea67f214d9b5780c1dc250747", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "52612064aa065b29930b56fbf54745883bba94dc", "542db06acbafe6c8e0837dcbb0dbbeb7c19f976d", "73de9120c975c6debd712af0136291ec1d4b2fde", "a4a7aaa197c29dcfa1556182aea425144137d4e3", "65192f3d0ffb066a4c47a09fc11fdfad47dd192e", "196d341cdfb85f1a1d2e431fc40f34604c30bb59", "0df445ca53975d93f27c9def03e964d3113a4607", "b2c444e8ab6b9bea1072bb0a7dd321543c8520ea", "0719b9670c8580db76547497df39caabdc20fc32", "4c1206d65920c8434d987e705bf21e9651fd21bb", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "440273d503939d01cba669079dbf3addca045fea" ], "paperAbstract": "Type confusion, often combined with use-after-free, is the main attack vector to compromise modern C++ software like browsers or virtual machines. Typecasting is a core principle that enables modularity in C++. For performance, most typecasts are only checked statically, i.e., the check only tests if a cast is allowed for the given type hierarchy, ignoring the actual runtime type of the object. Using an object of an incompatible base type instead of a derived type results in type confusion. Attackers abuse such type confusion issues to attack popular software products including Adobe Flash, PHP, Google Chrome, or Firefox. We propose to make all type checks explicit, replacing static checks with full runtime type checks. To minimize the performance impact of our mechanism HexType, we develop both low-overhead data structures and compiler optimizations. To maximize detection coverage, we handle specific object allocation patterns, e.g., placement new or reinterpret_cast which are not handled by other mechanisms. Our prototype results show that, compared to prior work, HexType has at least 1.1 -- 6.1 times higher coverage on Firefox benchmarks. For SPEC CPU2006 benchmarks with overhead, we show a 2 -- 33.4 times reduction in overhead. In addition, HexType discovered 4 new type confusion bugs in Qt and Apache Xerces-C++.", "pdfUrls": [ "http://hexhive.github.io/publications/files/17CCS.pdf", "http://doi.acm.org/10.1145/3133956.3134062", "https://acmccs.github.io/papers/p2373-jeonA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2834d13e15b530a15957c2ef13cabad689b86aa6", "sources": [ "DBLP" ], "title": "HexType: Efficient Detection of Type Confusion Errors for C++", "venue": "CCS", "year": 2017 }, "284fc0fb10b24bfad1b31333aea2ac82a8f154b7": { "authors": [ { "ids": [ "1752664" ], "name": "Shuang Chen" }, { "ids": [ "8808077" ], "name": "Shay GalOn" }, { "ids": [ "3234334" ], "name": "Christina Delimitrou" }, { "ids": [ "8218893" ], "name": "Srilatha Manne" }, { "ids": [ "40611011" ], "name": "Jos\u00e9 F. Mart\u00ednez" } ], "doi": "10.1109/IISWC.2017.8167770", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167770", "entities": [ "ARM architecture", "Cloud computing", "Memcached", "Parallel computing", "Price point", "Server (computing)", "Service-level agreement", "Task parallelism", "Web server" ], "id": "284fc0fb10b24bfad1b31333aea2ac82a8f154b7", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "125-134", "journalVolume": "", "outCitations": [ "2020e67ccc611ee8b79c300b2b71dec0632cb164", "9019c0ae583b3c3acfe71a6efa0cf3003f8f2431", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "3000e77ed7282d9fb27216f3e862a3769119d89e", "90b128f263bc164a97e0fed34083c3387404778a", "0d683085618e654a173b3590c4d2b431569cbfb6", "08632fe2b934ed15d3499e7321282c81adc2c390", "4441f6b08fb7a2439dc46a468a1d8c17fe87c622", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "226012d8c083ce9ad3a407b8a796032f8684bb35", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "d63e4cada8347686372d63a3d00afa89a1515a31", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "795f329868f050f84310eb071b74b09f731544eb", "6e78c1b830ff611d82ae00d75b3c6592e000a91f", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "269c24a4aad9be622b609a0860f5df80688c2f93", "67b9072f4c7d0b8e7e05983e3532aebddbe5098f", "7a978f2902460e732c50c36a171deb11733df1fc", "64f61ed10b09a213a8c716eea8166745755a3030", "0623414994c29a74c06eeea0a145e9d2e72e987a", "b62ad91dd797e1354652602abee5b9bcdda6957b", "d0903e4d0e04ea4809cb92cd9f7ab9367ec5b56f", "21e5111cc2d1fb5e8cc5fc239f34b26442dbad2f", "6d44790b6d952eff28f302998e8121f90786e3ff", "3a043714354fe498752b45e4cf429dbae0fb2558", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "e2898f513a5421c415ce24153c323efb52db2a6a", "596df6fb4d50c7886948b08f525c4e3393d05a44" ], "paperAbstract": "Key-value stores (e.g., Memcached) and web servers (e.g., NGINX) are widely used by cloud providers. As interactive services, they have strict service-level objectives, with typical 99th-percentile tail latencies on the order of a few milliseconds. Unlike average latency, tail latency is more sensitive to changes in usage load and traffic patterns, system configurations, and resource availability. Understanding the sensitivity of tail latency to application and system factors is critical to efficiently design and manage systems for these latency-critical services. We present a comprehensive study of the impact a diverse set of application, hardware, and isolation configurations have on tail latency for two representative interactive services, Memcached and NGINX. Examined factors include input load, thread-level parallelism, request size, virtualization, and resource partitioning. We conduct this study on two server platforms with significant differences in terms of architecture and price points: an Intel Xeon and an ARM-based Cavium ThunderX server. Experimental results show that latency on both platforms is subject to changes of several orders of magnitude depending on application and system settings, with Cavium ThunderX being more sensitive to configuration parameters.", "pdfUrls": [ "https://sc2682cornell.github.io/ppt/IISWC.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167770", "http://www.csl.cornell.edu/~delimitrou/papers/2017.iiswc.bigsmall.pdf", "https://sc2682cornell.github.io/pdf/iiswc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/284fc0fb10b24bfad1b31333aea2ac82a8f154b7", "sources": [ "DBLP" ], "title": "Workload characterization of interactive cloud services on big and small server platforms", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "2855adaf4b52feb22ef0390ca74baf927dbd1a09": { "authors": [ { "ids": [ "2431591" ], "name": "Beom Heyn Kim" }, { "ids": [ "2949536" ], "name": "Sukwon Oh" }, { "ids": [ "4260658" ], "name": "David Lie" } ], "doi": "10.1145/3102980.3102994", "doiUrl": "https://doi.org/10.1145/3102980.3102994", "entities": [ "Client (computing)", "Clustered file system", "Computer data storage", "Consistency model", "Eventual consistency", "Network partition", "Oracle Database", "Software system", "Value (ethics)", "Xojo" ], "id": "2855adaf4b52feb22ef0390ca74baf927dbd1a09", "inCitations": [ "2e904968e96242b9d96c5790f035e0a0f26abbf8" ], "journalName": "", "journalPages": "82-87", "journalVolume": "", "outCitations": [ "a62f2bcc232a295ed92cc93d9b02469f5f6e3a5c", "874c59d4801ad55a8eda16c045e34721c09169d8", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "2c0fba0ac99f9e73de7552a1e82ac5bc528c0864", "24d38bee3c3c08eee32c42aa7285fe414f443c15", "513bd1e5ec39f711f212d2105af3ee03dea4b53d", "7038e23695dbc4d8a9d1b7c6dff8dbc138009c4b", "ab0d8f966a6fd16865b9a459ccb5383bf58e70a3", "59250c7388caba98bd4adc2f1969fbec5500ed6a", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "63e56fa27e99e7eeead2a6106c6cbfcd38168712", "6f075ac9eba96247fff9dbfdcf9ec15d162dcc92", "017aba316f6d8447a4e045d8ddd417456629031e", "158cdc6716c8891bcbe6881e5ac5faac92a5975c", "0e227474e5e90dcdf796998a33126cbe70434ce1", "4f3be3ff4a21c41deb1ab6de76dd5d30ea874266", "077fa9e61bd2549bb4905a1fa7bd2f7eee55455d", "24829ced3be55a3a118d1d042699b899e4860a04", "3741d459138c0bc1a3eff051091e6704112f0e4d", "bca55bdc9fbc192b88848cf82d1679e3bee2f505", "17886b4911ffd50d7e02a574caad34a286458b3a", "bed6d0e530f20332c284a463c754ce1d304aca38", "36222f8eb2ccf21ca345e15186cea64506581543", "024735cc8a42fe4d9fa7c4c1c097b1f7a71286a3", "328a3437da451e77c02bdc9b660c720c4d5f67ed", "089895ef5f96bdb7eed9dd54f482c22350c2f30d" ], "paperAbstract": "Many modern distributed storage systems emphasize availability and partition tolerance over consistency, leading to many systems that provide weak data consistency. However, weak data consistency is difficult for both system designers and users to reason about. Formal specifications offer precise descriptions of consistency behavior, but they require expertise and specialized tools to apply to real software systems. In this paper, we propose and describe consistency oracles, an alternative way of specifying the consistency model of a system that provides interactive answers, making them easier and more flexible to use in a variety of ways. A consistency oracle mimics the interface of a distributed storage system, but returns all possible values that may be returned under a given consistency model. This allows consistency oracles to be directly applied in the testing and verification of both distributed storage systems and the client software that uses those systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102994", "http://www.eecg.toronto.edu/~lie/papers/bhkim-oracles-hotos2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2855adaf4b52feb22ef0390ca74baf927dbd1a09", "sources": [ "DBLP" ], "title": "Consistency Oracles: Towards an Interactive and Flexible Consistency Model Specification", "venue": "HotOS", "year": 2017 }, "28613f311914cbb7737692673c2385bf16f50091": { "authors": [ { "ids": [ "3383611" ], "name": "Kartik Lakhotia" }, { "ids": [ "2016801" ], "name": "Shreyas G. Singapura" }, { "ids": [ "39078626" ], "name": "Rajgopal Kannan" }, { "ids": [ "1728271" ], "name": "Viktor K. Prasanna" } ], "doi": "10.1109/HiPC.2017.00039", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00039", "entities": [ "Algorithm", "CPU cache", "Cache (computing)", "Graph (abstract data type)", "Heuristic", "Line graph", "Locality of reference", "Matching (graph theory)", "Precision and recall", "Principle of locality", "Program optimization", "Speedup" ], "id": "28613f311914cbb7737692673c2385bf16f50091", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "273-282", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "93ee8e1c05d11d63aa3d61653b2c8bae75e0aecd", "4fa429b8b44bf3c67d2b4ebf6625c9357a0c8e3d", "709f609b9a998d1ac7f2dc82c35917e5aa66fe50", "5dee3ae337fa3cd2e30731fb61b512e06613cc2d", "947c6bf534ccd620044f77c3bd6068f633b421fb", "348119d77d127dba6058802c12f98f06c8849f3d", "0987a3e925731f29b23f29c25ede370481c6396e", "4410f0c48f982f960a54500df7bd88e4cab88927", "2a4ca461fa847e8433bab67e7bfe4620371c1f77", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "9d172fa71c257e8c67a8ca0d3346130f20eebad7", "55b3e22b56599ed8520deb1d7cb9ac460f4fa6bb", "0d0b9664d28be6b212e721d77288814df638d5cf", "3b874ce8d1fedd7f1f31a3c5ec495f4907b59da7", "8c5b1d50db6ce6bf822ecd43328042e124eee028", "476b64be7cc0b985c02d69dd0532965924dd1869", "b513711621e81d0abd042e0877ca751581a993f5", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "eb82d3035849cd23578096462ba419b53198a556", "004e9b9ca0ce904030be124d7066b855fa6b61e6", "2c8671113b9af7c4cf6bb0aced6ddb941b1f9698", "8c084e71c986c17ed32f9c2ada413725ef00c670", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "652d93bd6e85fefffe6f2e47b35ed3c516d2d889", "2d7bf91ca184def17e15bf515532651fd5fe5f01", "7c8bacd836bf886f258083502e2208404823b557", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e" ], "paperAbstract": "Sparse graph processing generates highly irregular Memory Access Patterns (MAP) which lack locality and result in poor cache performance. In this paper, we propose a novel graph ordering algorithm that addresses this problem. We observe that existing reordering algorithms primarily try to improve cache line utilization by enhancing spatial locality. They are oblivious to cache data reuse which reflects the temporal locality that MAP can possess. Our premise is that peak efficiency can be achieved by a graph order for which the resulting MAP exhibit both spatial and temporal locality. Therefore, we first introduce a new metric Profit, that quantifies cache data reuse leading to a heuristic pH that enhances temporal locality in the MAP of graph algorithms. Then we define a notion of dynamically matching MAP with cache contents in a way that jointly maximizes both cache data reuse and cache line utilization. To perform this joint optimization, we develop a Block Reordering algorithm which utilizes pH to rearrange blocks of consecutive nodes with high spatial locality. We evaluate our algorithm using 8 real world datasets and 4 representative graph algorithms. Experimental results show that graphs obtained by Block Reordering can achieve upto 2.3\u00d7 speedup over the original graph order and consistently outperform the existing state of the art reordering technique by 20% to 25% reduction in cache misses.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00039" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/28613f311914cbb7737692673c2385bf16f50091", "sources": [ "DBLP" ], "title": "ReCALL: Reordered Cache Aware Locality Based Graph Processing", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "2862d11cc739a1c6baf6addb56502d869233c11c": { "authors": [ { "ids": [ "1694978" ], "name": "Cheng Wang" }, { "ids": [ "10375507" ], "name": "Qianlin Liang" }, { "ids": [ "1684443" ], "name": "Bhuvan Urgaonkar" } ], "doi": "10.1145/3164538", "doiUrl": "https://doi.org/10.1145/3164538", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "CPU cache", "Cloud computing", "Failure rate", "Field (computer science)", "In-memory database", "Memcached", "Procurement" ], "id": "2862d11cc739a1c6baf6addb56502d869233c11c", "inCitations": [ "21d2fe357a178d36a50398b05e0046b7b500b109", "2c68fc64ee8fd08bb7c742d3087f8a54da9ff296", "613cdadb56592f704349bb25a359ebecd8fd9e0f" ], "journalName": "", "journalPages": "63-74", "journalVolume": "", "outCitations": [ "0935bb723e4071ccd4c2334d3b6d728faa111d11", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "120ade88aecba9157eb1ab7bc0464a0215c46ccc", "ce8f8e86db523da990507f177c6c6df445cd8d46", "ec5f0f8d5b7176cb2e88271ec948f935ea0346a2", "71de39ceaaa0efecc2c84ce8fe0af8ceb5ed79e7", "045943438dd45f25f0127d97ed9116b3b05914a7", "b76d259d4cfb68cc143cd1109138eca0d8ac8ce9", "d608a95490b02839fdf71a412aab46ad20a70596", "39879b7becd8ab4662008f8da92538fe368f939c", "70e38d47b83261e257bae61dc39ffbf391b30591", "05be0db01d70bcce9530b462ab2368f9e15127d9", "5e3da1f0bef9cfb5b517ac210542ed69aca76106", "3a33424cd2ad63cc056a2d9a06b8794d78ba5214", "94859f850f345629c23526e1155aa9deb1852491", "227283968d278765a2a22754ac96b2d64be8e2dd", "0f9215aaf5a8376461ff3ec504a53172ce827647", "1a3263471a40f829c657ac30eaa66c5bb0675c2c", "1da8852aa591d82f6dab3d93c8aba923e69a45d4", "90b75afbf77307096c536a219dd23047059ff9a9", "440f3e59fde1fde9868bc4a0e8fa9132050ce89c", "12c28dd5ea0b2d0269a67a43c2eb0b1207b2b889", "a6a8313f30420c60e7eaa9f34ea5a41833695af1" ], "paperAbstract": "Many cost-conscious public cloud workloads (“tenants”) are turning to Amazon EC2’s spot instances because, on average, these instances offer significantly lower prices (up to 10 times lower) than on-demand and reserved instances of comparable advertised resource capacities. To use spot instances effectively, a tenant must carefully weigh the lower costs of these instances against their poorer availability. Toward this, we empirically study four features of EC2 spot instance operation that a cost-conscious tenant may find useful to model. Using extensive evaluation based on historical spot instance data, we show shortcomings in the state-of-the-art modeling of these features that we overcome. As an extension to our prior work, we conduct data analysis on a rich dataset of the latest spot price traces collected from a variety of EC2 spot markets. Our analysis reveals many novel properties of spot instance operation, some of which offer predictive value whereas others do not. Using these insights, we design predictors for our features that offer a balance between computational efficiency (allowing for online resource procurement) and cost efficacy. We explore “case studies” wherein we implement prototypes of dynamic spot instance procurement advised by our predictors for two types of workloads. Compared to the state of the art, our approach achieves (i) comparable cost but much better performance (fewer bid failures) for a latency-sensitive in-memory Memcached cache and (ii) an additional 18% cost savings with comparable (if not better than) performance for a delay-tolerant batch workload.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030210" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2862d11cc739a1c6baf6addb56502d869233c11c", "sources": [ "DBLP" ], "title": "An Empirical Analysis of Amazon EC2 Spot Instance Features Affecting Cost-effective Resource Procurement", "venue": "ICPE", "year": 2017 }, "287750d6ec07d5867be4bf489dba7bba14d51fe4": { "authors": [ { "ids": [ "39416427" ], "name": "Shehroze Farooqi" }, { "ids": [ "1685939" ], "name": "Fareed Zaffar" }, { "ids": [ "2984982" ], "name": "Nektarios Leontiadis" }, { "ids": [ "34616778" ], "name": "Zubair Shafiq" } ], "doi": "10.1145/3131365.3131404", "doiUrl": "https://doi.org/10.1145/3131365.3131404", "entities": [ "Access token", "Ecosystem", "Honeypot (computing)", "OAuth", "Usability" ], "id": "287750d6ec07d5867be4bf489dba7bba14d51fe4", "inCitations": [], "journalName": "", "journalPages": "355-368", "journalVolume": "", "outCitations": [ "1560272f4a258c3e690fd3703d3132913af91467", "09f0fe5a04836ca15c9a5a48845a67d99c6c9a91", "592029f9e20af564602e8a7a512d2924e999fa08", "79914eef0ca54f18e746c7dd5cc04953840a277a", "251313fb7e68f1a3a5c3467962c6fb9d4cc37d54", "2654113273e88df617d66821886e1790c75440e2", "a5adc8c3bbcc46e75c9300be64c8334d752e9601", "d917a006f1264c20bfdcd6b053835f8686f15afd", "24e45f66c6ddfbfe1430ade5522709d51d908722", "1660f24384ea0c0f4d811f1967e0d231bb422443", "5c371701115dee38149b29400c10eeeb2bd00a35", "55cf4d3b6b05d9bc5dbbbb0bfd5072b07a697437", "06240b5f5a928d49b53e848bcbaa4bf7c6beab35", "0088523c4bf74b35c2e7471e740d6758ab36f7b9", "078f2d64bae80c49cc65aedc40fd7f45f127d64a", "c91b460e70a43483832731757360aecbe44a9973", "3820c34d714e37acea5e17349f3cd12c5dc4e27c", "8152e621012339cd950dd082ff711b6f1e325f1b", "067b40badc7ad15a66b1a7f5837aaebef210a2dd", "0f5ab445f40c116d24e14062ccfce98ef9aad791", "1418c58ccf2f85461dfff22f1a7ac0ce27e44f7b", "4f6487d61ba6c2afa44be0e870599bb292e27638", "283b0864062319c567d9f77fbdff66b66aa6e293", "169ba9e7cdf9d5d58e585890a9683975d7ec26c0", "026972d6a94755c4a3ddf9ab0d463fd0416c4262", "56c00693558d88619e4a369a8b412c918f7fefbf", "13e185c42cf59a3ca4db0e47564d17b8f5801a3e", "2c5a5a2ab4f7b63523981ac790399c3ef2f08014", "d2e47b04b4dd88397d5a19db27ba6a0aa5d1317e", "363aae72873d094f9b863a571c2d67ef7c1a63dd", "30af8702c6c9f69a64d176d61784b4d313eb3e26", "83a96734000f6733ed92b3daf0c6ffa1528e5d39", "0a645cbb39ac37929a2e6ba55fc248dc4ca967e4", "92eb73af1caafada0eabc6ed2d936b9d565f3065", "f0eb1d0210f50cce3c9ba929ec51b75351b8a424", "086477d3ae19d679dfc2023c14822e50e9bb7c6e", "65efc7be49523679fe35cab8870bd308a53ab7ab", "1055042686c15a71e71c3c7bfedccba267456221", "12ac954894e7698665fb5da6b384938a9702b5c9", "12de03e2691c11d29a82f1c3fc7e97121c07cb5b" ], "paperAbstract": "We uncover a thriving ecosystem of large-scale reputation manipulation services on Facebook that leverage the principle of collusion. Collusion networks collect OAuth access tokens from colluding members and abuse them to provide fake likes or comments to their members. We carry out a comprehensive measurement study to understand how these collusion networks exploit popular third-party Facebook applications with weak security settings to retrieve OAuth access tokens. We infiltrate popular collusion networks using honeypots and identify more than one million colluding Facebook accounts by \"milking\" these collusion networks. We disclose our findings to Facebook and collaborate with them to implement a series of countermeasures that mitigate OAuth access token abuse without sacrificing application platform usability for third-party developers. These countermeasures remained in place until April 2017, after which Facebook implemented a set of unrelated changes in its infrastructure to counter collusion networks. We are the first to report and effectively mitigate large-scale OAuth access token abuse in the wild.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final235.pdf", "http://homepage.divms.uiowa.edu/~mshafiq/files/shehroze-oauth-imc2017.pdf", "http://doi.acm.org/10.1145/3131365.3131404" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/287750d6ec07d5867be4bf489dba7bba14d51fe4", "sources": [ "DBLP" ], "title": "Measuring and mitigating oauth access token abuse by collusion networks", "venue": "IMC", "year": 2017 }, "28a6e6ceb0a92de7a49048d094321af5fab227a0": { "authors": [ { "ids": [ "1715309" ], "name": "Henry Corrigan-Gibbs" }, { "ids": [ "1752788" ], "name": "Dan Boneh" } ], "doi": "", "doiUrl": "", "entities": [ "Aggregate data", "Computation", "Cryptography", "Information privacy", "Interactivity", "Least squares", "Least-squares function approximation", "Plaintext", "Server (computing)" ], "id": "28a6e6ceb0a92de7a49048d094321af5fab227a0", "inCitations": [ "409503078e6fa6a6f90fa144859cafbf5ef83498", "2eaf1165b7f26cd1743e0f01de5b8662a8f4a1a3", "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "0646a88dfd7e7ce7233041eaad62076ccc55624c", "d705cc4656b997f0b9cde0ac5fb5e476625f0e03" ], "journalName": "", "journalPages": "259-282", "journalVolume": "", "outCitations": [ "32ad30a791a7b1bd7ee7e84e9cd7fdfa477b2337", "04948723dec0e6724777ee56f0d10168cce44921", "c549f66aca1f25f44be71cbfc40481fc06adb106", "078f2d64bae80c49cc65aedc40fd7f45f127d64a", "3945bcc35b2b82c593984503f9ba4c32562e57dd", "34947a0301ecedfe16df9abd5c4ee75f318df7ff", "27c53991fa216fcde5af8ff36fc6853efea8e6f8", "44ca8016d618a915fc6a5f727def2699d2e6b178", "1808b64aec21863489f0fe66f250890a3ac2b843", "4d8e2657d6c9032c28ac4878a442e83dd99b672a", "09af9108cb5c196d5c15a6f3d26e604434203bea", "f98cfc3c092d69c068054698bcb4c1b6840644c6", "6393288400aadae9687d7082644f4b2e65adfe05", "0c017744351094c581cf619bbbda8164fbbdad0a", "1ea800c00dc8eb2e37ad2f13c135f4afe231e45c", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "2073b5fb43cefe522c20a6b550e25f654077edd3", "73fbbf4126098d50de0e40d0ee314ddeb691dd2a", "557d8b988bca3d0033189723d11102e04c0c67c0", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "249d2e15cfcd531e3f91d561877d5b23d31ec2e8", "6aa234f9c33ee957d6f34dfe85f2efcdfaa85211", "0e9c50180730bfc455003cd6ed883b8020eedbb3", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "698f4d12530c6953e89338d4dd7531171be19ce2", "f6dbdf621b0acc9a131f7f2a4d3efbd4bfb0db58", "e50ae4d480d84c7cbdb8edcebf13e57f5a47c8ad", "19c3736da5116e0e80a64db35afe421663c4b4a8", "21f47e1d9078d12de1bd06341619923e8b9d85bb", "22330328669bded692399a15d85fef3373117533", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "bcb49a06e4fb7ea831257e146073d84234f4d238", "67f3f39ddfe53cc73e0d355cf342f4d091aaee8c", "8fff335957604e0790b5b8591c9e335936272fd9", "15799e114a32f53f67e0a18edd347aa831745bd5", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "121fd7d0a401c67a0fdc369c48c2b3a16daaa56a", "5313470490703d8d4d3d2710f034890ea78826d6", "61a297247f899995789dc6e32bcf3972502374b8", "4a29060831fdf21fb61ed6a8bfba062945a3d5e4", "4577a392ecc0e0666fb0b705cd5520d0ff3e2f18", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "3d5c4a2eed55772238f41bf60a51addbeca2b92e", "19db199fd25aa604618d13e80cf317f0858d5604", "1d9cf87fa6d6175a2c1543afff263113657765f6", "15075f37c428827209a2bc8b1595a04d5a21dc8b", "32334506f746e83367cecb91a0ab841e287cd958", "453664306db9078a9bfb0d0f924ea590beb0f1f1", "3310cb2b0f1a473e6f98cdec6eb53ec6a962ac87", "d04f7f8eed11e5e58a41e314b00e49d7424d82ec", "2c0a4fb8335cd7a84a74abefbb6eaef4b23d1e4a", "35516916cd8840566acc05d0226f711bee1b563b", "277cefef176e4e0a52d5aa9359b9915eb5528fd3", "b913cf330852035f49b4ec5fe2db86c47d8a98fd", "33148623fc14ea5735e73dd716d030ab17118299", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "21fe30f93a1ce4e3de2b68ab613573ac8d4eb0e4", "6223684e14778e4d7948e994d2169ebf38e0a95f", "29543bb7c680dde79f374e73930ca68833e2fb37", "04b0c0a177661656eb1e3eb6d0aeebb150fa4cd7", "02a356366cc651e69df7c442deff9dcaa3cc8a5f", "7d1394fb6b041ed376c99cb230133023a59830e4", "60682144f3554174654248c4dd46db46e5b1cd37", "24b481a4e55ce6a6473054acb0da1cb96ad83e5d", "15c76f461543c44a8b9d8b32b2bbd18c595aea52", "1a33c542b064f95eb6fc9b0003e80ff4f1b9289a", "517d4d45013e3b040cec89ba1cffbd4a7eb0122d", "23864b54c1ee459fc39664ce947c0595794ea0b6", "66818f952327b9145d3c7f6ef392240f06767755", "5b566b58184e302e1bd364903010fcc55a226fd3", "64028c85cd7b7e42f208e29734028572d7735c61", "5de068c94fbe9976a7017ce0451c05941a2fe70a", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "208448ed57cb0ff70866cb3828b06610c3ff25fd", "a089defc1eea22b4d3afaeccf031ae110d7af459", "d36111fceeff7faba6c6526f6a3ec05dc99d3250", "32bd62191f501753b8307bef23758adb50d95627", "1aeddb4b15bbb4524b42934594909a34965de1dd", "d53c7989acf948fec62d4b1cfcb6c328df048638", "8d3be6feedf4302be1d76ab0f53924ae6e777f9f", "6ad481b4915f40c7b34256fbdc62a4aeccc3fbaa", "2a266546c2609f079529688de7acbe0213f47373", "2aa24ddd5c4eea28fc3b751fb5915c01d9337626", "7a278ee0578f194700cadc3811cdda4ec751f88a", "1055042686c15a71e71c3c7bfedccba267456221", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "54d539d7558b6db3a3043fbbe4c71abe3fb629db", "81acd0334fcfbff15bc4bb2e688c27dbe95bc16a", "147a76ea2d635de2dfd6b6eb1b53defdeb6e8c4a", "c6c63ebd731565b7d26f6e42028173f60f5b5883", "63c0fce10eba509738e72bd1bd8770e11cd64a26", "8ad88f65222febc015b2a74d7c75b835c617ad4a", "101bb77cb2c8b2bfcde41973ac3473db325d7e6d", "12add45f5a0cc3d9a9a694d3a3802843438208e0", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "6f11f215003af55d0f5628b24e1a54ee10ecd21a", "24a19ef2d8b5c0aef7f22793a268b3c16d339352", "1e0c2bbba98c3a6970eb88f3250a328e6893be66", "b0e32f83369313c18e3ad38b47a0f0dbe42decac", "215e514f25e02d979552992ea3044999cf5fe9dc", "2949851ab9827fdd334ecc3b392296df2aacaf92", "6b3aea37625702e98e5033e1107403e319b4df01", "4aeefad3378d26ca0563e843fdfb5532070a9be3", "2173406c4ca5fff0de66e8cbed4cb01ca959cb31", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "a853e0842d74fa3ff146f45ea7f2ed52dac08d1a" ], "paperAbstract": "This paper presents Prio, a privacy-preserving system for the collection of aggregate statistics. Each Prio client holds a private data value (e.g., its current location), and a small set of servers compute statistical functions over the values of all clients (e.g., the most popular location). As long as at least one server is honest, the Prio servers learn nearly nothing about the clients\u2019 private data, except what they can infer from the aggregate statistics that the system computes. To protect functionality in the face of faulty or malicious clients, Prio uses secret-shared non-interactive proofs (SNIPs), a new cryptographic technique that yields a hundred-fold performance improvement over conventional zero-knowledge approaches. Prio extends classic private aggregation techniques to enable the collection of a large class of useful statistics. For example, Prio can perform a least-squares regression on high-dimensional client-provided data without ever seeing the data in the clear.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-corrigan-gibbs.pdf", "https://crypto.stanford.edu/prio/paper.pdf", "https://arxiv.org/pdf/1703.06255v1.pdf", "http://arxiv.org/abs/1703.06255", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/corrigan-gibbs", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-corrigan-gibbs.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ec08/be24d9031ad420a03d401b53a9b49ec2cd34.pdf", "s2Url": "https://semanticscholar.org/paper/28a6e6ceb0a92de7a49048d094321af5fab227a0", "sources": [ "DBLP" ], "title": "Prio: Private, Robust, and Scalable Computation of Aggregate Statistics", "venue": "NSDI", "year": 2017 }, "28b65b0d7519a598c8229dbe54b7a29e4f071ef7": { "authors": [ { "ids": [ "2836500" ], "name": "Zekai J. Gao" }, { "ids": [ "40298273" ], "name": "Shangyu Luo" }, { "ids": [ "2248828" ], "name": "Luis Leopoldo Perez" }, { "ids": [ "1741680" ], "name": "Chris Jermaine" } ], "doi": "10.1145/3035918.3035937", "doiUrl": "https://doi.org/10.1145/3035918.3035937", "entities": [ "Algorithm", "Compiler", "Declarative programming", "Distributed computing", "Machine learning" ], "id": "28b65b0d7519a598c8229dbe54b7a29e4f071ef7", "inCitations": [ "4853a26200889f033c0f509abf0f91d8cafba55b" ], "journalName": "", "journalPages": "961-976", "journalVolume": "", "outCitations": [ "762756eba9168421d338f0aedd04e0111ca75462", "dc17f733f3e917c2076e9100ee4ecbdf17d9ac23", "65cdab424fe449deaab359a1f8e16899ac91dd2a", "2359b12b0f4c70477f51455d9eb41923e740104a", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "bd4019171413bb40d4c4f3a5cfaab1fe112455ee", "5b51ce3bbe7791e1533be7d4d76b2452bf043954", "264026d32af90ed520dd4d494627b67184d39a57", "247e25aa53dd61715f2a217138e30a917eeead08", "0122e063ca5f0f9fb9d144d44d41421503252010", "2f3902d4e4b793c74fd368ce9b6f3f28a2a1206f", "0f890546c00ee8b35c96bc712a2ecfe574af3754", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "3f4f9efbd2c62ccab0e55dade96489610d640ce3", "04c73e4f13a19a2ce270a0aa391bd7842aa113ae", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "09c5293b647fca40fde28ac6c38737f07e873e41", "d0bc3c139c9a0129a87aa5f724e7bf82b4b04ce6", "9216203ba2dda573ec5401f467cae84149335fcb", "341a08d1854b5ecf871bbb4c7833a435927abbda", "11dc0c04a27ec8f864893086430329efa7448335", "9cea29601e72fd8e6ef8419aa31ddc103eceb7f8", "3dff11679346f5344af1018cad57fa14cc349f2f", "128985b85556c30ad405863f2a34340049957616", "24679ccb0586642553a21e9fcd8aa5a57f97cabe", "11cbbb9f48fcaeb4950defe38ed861171ced309b", "3c4776e5f96ebe8a6de1a855f523a28c687eb994", "2937fd34930ef843a0bd8d6886f9915b8695b7b4", "61e2f35b4ef5d569aa3084e3118ea16df200965f", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "3c78905888472f4db518b8deb055e115586d3084", "5c634d35db77e635aa97ab31adee0653c80962e8", "ef5363f8d378ccbddbc8f2d3ec26517b75184f62", "29ce41f7a9cfc2e20fbb2085e9189d7a1fd67640", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "11a08a7a0442bfdb8f3f50f0b18096287b2daca3" ], "paperAbstract": "We describe BUDS, a declarative language for succinctly and simply specifying the implementation of large-scale machine learning algorithms on a distributed computing platform. The types supported in BUDS--vectors, arrays, etc.--are simply logical abstractions useful for programming, and do not correspond to the actual implementation. In fact, BUDS automatically chooses the physical realization of these abstractions in a distributed system, by taking into account the characteristics of the data. Likewise, there are many available implementations of the abstract operations offered by BUDS (matrix multiplies, transposes, Hadamard products, etc.). These are tightly coupled with the physical representation. In BUDS, these implementations are co-optimized along with the representation. All of this allows for the BUDS compiler to automatically perform deep optimizations of the user's program, and automatically generate efficient implementations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035937" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/28b65b0d7519a598c8229dbe54b7a29e4f071ef7", "sources": [ "DBLP" ], "title": "The BUDS Language for Distributed Bayesian Machine Learning", "venue": "SIGMOD Conference", "year": 2017 }, "28c5d290a2b044dc4a3352ee9e692de48c82e57d": { "authors": [ { "ids": [ "2549020" ], "name": "Yang Wu" }, { "ids": [ "2421465" ], "name": "Ang Chen" }, { "ids": [ "1719236" ], "name": "Andreas Haeberlen" }, { "ids": [ "33779522" ], "name": "Wenchao Zhou" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" } ], "doi": "", "doiUrl": "", "entities": [ "Backtesting", "Data structure", "Debugger", "Debugging", "Software-defined networking" ], "id": "28c5d290a2b044dc4a3352ee9e692de48c82e57d", "inCitations": [ "728cb61e78d55bc5039ed78920b20259a1135cb3", "64ad3b92f61a441c5b4080b0ac9291109a919886", "3efbf0223dc54fa48bcfd9652333911c4d796ecc", "53c0617eb76ed39f3ba9f3a45374839d7904ef93" ], "journalName": "", "journalPages": "719-733", "journalVolume": "", "outCitations": [ "273de61c65c39e0e55942ea166a473e63ddaa02c", "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "6c10b6e6e098dfc52e48023e2db0709ce140fbc6", "153506e97f5db120d28f0f4c726cbb5d751baa00", "17a23aaab0a713b7863ada44eca0c252a243c6b1", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "24a963758371e511e3749c865b14f697358f025c", "ad8c8feae36e649d885af3df3d427a3ea40651c2", "50b3005872797dd0d559306e9105a2ccc40b4d1a", "083e9b12c5566d953efdf6d90e77638a6b0c3693", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "13042f371ddb28793bf4329702a8df8082a76532", "2077cc18da002721390a23392ce4a25d19c3e2a2", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "2839048b432d1563f292a485fa59443bb19e2d78", "05607111cf79330d56164a10d351dbf94e2cfa44", "6410b6cc29af234544f7706194aba20d6c4c90ae", "0558c94a094158ecd64f0d5014d3d9668054fb97", "00db012933a992dcd072eb7559f65a69ff257602", "111864cac232d8a9c170bd63069eb4af155a9f7b", "12789fd5b47542937d1b83ef8b99bdb9c7a70dec", "6409e64aed68fcc9e3fdc35b87dd168eeb440d32", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "2d1c0b3b79d618bb50e111360fc4211f6f262bcb", "3ec1a36e9e12a85d02adaa8ed682ee04f73ae332", "121180b8c56026deb122eb738547944417239c66", "260376f12d3df66f105f03b5afd03c5562c5a96f", "99854e92c37b986b4f1ce2c15efc2ea6220ba8fa", "405377ca200df3f7da390c37516fe13582e70776", "2fac216f660ddd5c8eabcaadb342ed117b32bb2b", "3136ad216d30bdff223e5c3f02e07f980a6a45a5", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "0314da057cfbf61d752674b7d8527e8be9ddc3b8", "4350f7972bde4655c29d297b732edb67f36eb827", "0c97dfc049282a97222999ee4ddfa216a72c74a1", "79aec2093b2a1b0197e7d145b5cf86abc70fee3e", "0a7151c200bf97973453ec05a28012cf03cf906b", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "069103feb2d2d3f1b0115b484d5c2f978a983df0", "6801c6db43e280e2fab4268d4fd3eccc5957413a", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "2afc8d9b3a0d17fb926a6a6dd05b1fb307130a27", "33c9bc92c2a5ef616618c0d569b71733c62c8603", "1f0ea586a80833ee7b27ada93cc751449c4a3cdf", "51d1cfdf9233cab9cc40f72bf049c2ad2f36082c", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "35339f6f2e99c04920f21883df1db8004436cdc7", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "240b650045b4f59c58220e9cbac7f2f91bdaa3ec", "516cf9cc4d0886e1cd91832230a2d7645426a3ec", "0385a0c8b707d70bef33bb308d321b2647da0ca3", "46bcd5cfb3fa13aeb81fa7811b86dfa22bbb0310" ], "paperAbstract": "When debugging an SDN application, diagnosing the problem is merely the first step: the operator must still find a fix that solves the problem, without causing new problems elsewhere. However, most existing debuggers focus exclusively on diagnosis and offer the network operator little or no help with finding an effective fix. Finding a suitable fix is difficult because the number of candidates can be enormous. In this paper, we propose a step towards automated repair for SDN applications. Our approach consists of two elements. The first is a data structure that we call meta provenance, which can be used to efficiently find good candidate repairs. Meta provenance is inspired by the provenance concept from the database community; however, whereas standard provenance can only reason about changes to data, meta provenance can also reason about changes to programs. The second element is a system that can efficiently backtest a set of candidate repairs using historical data from the network. This is used to eliminate candidate repairs that do not work well, or that cause other problems. We have implemented a system that maintains meta provenance for SDNs, as well as a prototype debugger that uses the meta provenance to automatically suggest repairs. Results from several case studies show that, for problems of moderate complexity, our debugger can find high-quality repairs within one minute.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_wu_yang.pdf", "http://www.seas.upenn.edu/~yangwu6/paper/metaprov-tr.pdf", "http://www.cis.upenn.edu/~ahae/papers/meta-provenance-tr.pdf", "http://www.cis.upenn.edu/~yangwu6/paper/metaprov-nsdi2017.pdf", "http://www.cis.upenn.edu/~angchen/papers/nsdi-2017.pdf", "http://www.cis.upenn.edu/~ahae/papers/meta-provenance-nsdi2017.pdf", "http://www.seas.upenn.edu/~angchen/papers/nsdi-2017.pdf", "http://www.seas.upenn.edu/~yangwu6/paper/metaprov-nsdi2017.pdf", "http://repository.upenn.edu/cgi/viewcontent.cgi?article=2064&context=cis_reports", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/wu", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-wu.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_wu_yang.pdf", "http://www.cis.upenn.edu/~yangwu6/paper/metaprov-tr.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-wu.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6eae/eedd90f536f30c9df2b264feffd23c5d03a7.pdf", "s2Url": "https://semanticscholar.org/paper/28c5d290a2b044dc4a3352ee9e692de48c82e57d", "sources": [ "DBLP" ], "title": "Automated Bug Removal for Software-Defined Networks", "venue": "NSDI", "year": 2017 }, "28cc5f7674eb19d6321b1351ffd5732eed2e725e": { "authors": [ { "ids": [ "2076127" ], "name": "Wasuwee Sodsong" }, { "ids": [ "2099519" ], "name": "Robert Mittermayr" }, { "ids": [ "38304641" ], "name": "Yoojin Park" }, { "ids": [ "1894492" ], "name": "Bernd Burgstaller" }, { "ids": [ "3260423" ], "name": "Johann Blieberger" } ], "doi": "10.1007/978-3-319-64203-1_39", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_39", "entities": [ "Analysis of algorithms", "Benchmark (computing)", "Computation", "Data parallelism", "Deadlock", "Graphics processing unit", "Lazy evaluation", "Lock (computer science)", "Model checking", "Multi-core processor", "Parallel computing", "SPIN model checker", "Synthetic data", "Thread (computing)" ], "id": "28cc5f7674eb19d6321b1351ffd5732eed2e725e", "inCitations": [], "journalName": "", "journalPages": "538-552", "journalVolume": "", "outCitations": [ "6d54c4c946a2321220be85ced5414703ca64dec2", "17e720581fb87f3c2b42b6f2fa126a676203744d", "22be0e241218fd85809c0a0446e946702d2ee6b5", "a90b77180c2accbdd8499d11f8bc2bd6e790f689", "9e2a1aef7e31831753016fa022a293a97f903918", "dbecce5b49f2966c8a0bd74fa8784ccd7d74147e", "5caef7533f749adc5fc2bf59a691e67e1deb3de4", "0213e124eff26719ea7253d4f6a2532fe148a0b0", "71967d7b93cb115b48a39f40f6073d28fc3fd819", "55c6a8dbe094c73ede0701c5dc6947a4e28dfe66", "99ff7878f7967ff20c4d9f6dea5bd1b24d216639", "01094798b20e96e1d029d6874577167f2214c7b6", "47a0c5cd0edb8161de3eeb74b3cfa534ea9b42df", "36d1a9628a71ce1a9c5bebc6745523a4f7a6cc74", "756644ba7176ed212124b04fac76bbf07ab373f4" ], "paperAbstract": "Kronecker algebra is a matrix calculus which allows the generation of thread interleavings from the source-code of a program. Thread interleavings have been shown effective for proving the absence of deadlocks. Because the number of interleavings grows exponentially in the number of threads, deadlock analysis is still a challenging problem. To make the computation of thread interleavings tractable, we propose a lazy, parallel evaluation method for Kronecker algebra. Our method incorporates the constraints induced by synchronization constructs. To reduce problem size, only interleavings legal under the locking behavior of a program are considered. We leverage the data-parallelism of Kronecker sumand product-operations for multicores and GPUs. Proposed algebraic transformations further improve performance. For one synthetic and two real-world benchmarks, our GPU implementation is up to 5453\u00d7 faster than our multi-threaded version. Lazy evaluation significantly reduces memory consumption compared to both the sequential and the multicore versions of the SPIN model-checker.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_39", "http://elc.yonsei.ac.kr/publications/europar17kronecker_preprint.pdf", "http://elc.yonsei.ac.kr/publications/europar17kronecker_slides.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/28cc/5f7674eb19d6321b1351ffd5732eed2e725e.pdf", "s2Url": "https://semanticscholar.org/paper/28cc5f7674eb19d6321b1351ffd5732eed2e725e", "sources": [ "DBLP" ], "title": "Lazy Parallel Kronecker Algebra-Operations on Heterogeneous Multicores", "venue": "Euro-Par", "year": 2017 }, "28e4ae18a652e7d67df3e3fa6f4703ae9ef930e9": { "authors": [ { "ids": [ "1850433" ], "name": "Yuqing Zhu" }, { "ids": [ "3312710" ], "name": "Jianxun Liu" }, { "ids": [ "8808120" ], "name": "Mengying Guo" }, { "ids": [ "8422763" ], "name": "Yungang Bao" }, { "ids": [ "7884468" ], "name": "Wenlong Ma" }, { "ids": [ "26316229" ], "name": "Zhuoyue Liu" }, { "ids": [ "8799238" ], "name": "Kunpeng Song" }, { "ids": [ "7608051" ], "name": "Yingchun Yang" } ], "doi": "10.1145/3127479.3128605", "doiUrl": "https://doi.org/10.1145/3127479.3128605", "entities": [ "Algorithm", "Apache Hive", "Apache Tomcat", "MySQL", "Search algorithm", "Throughput", "Time complexity" ], "id": "28e4ae18a652e7d67df3e3fa6f4703ae9ef930e9", "inCitations": [ "53cc6bf305539b4bd8829df42996e0eb12512434", "d308092a5da30ef6687b6a26287f1e54ba4c5e10", "a55a685d254caeeb4f071062d5910734f8135057", "40dca29aea76ae426791e4c6bf0e24f3ae88e318", "5655f16d3c46537f951b5686c905f15c2f35991c" ], "journalName": "", "journalPages": "338-350", "journalVolume": "", "outCitations": [ "019902292dff81eae20f3e87970dd7a1151d9405", "78f13ea3bed2c301cc84ff4ecf09c1852705f490", "1521c2c45835a1dd3d29f9886010c652063b7bad", "208cdf363b4fc8343815393aed9551eed033df18", "339075774e069a3f00c94d143bf94be8aab19efd", "d23dc281afd418772c3dea9b056013471882ac15", "1f1f47da8fff8da53589d7eab36d6bae32b2c3d2", "9aa0d7253574e50fe3a190ccd924433f048997dd", "5f41cc7c081b294f684928c35a08626490ec4f8a", "d5153c5c6aebe99bcded20306fddd972c4001289", "0d868efa67bf06b1f784d60769c082fd9a58893e", "1edb070e3530f1a02ecd76f6621f7719d13b2109", "40fecfef456c760912685b372151732b38e69d6e", "1dc8960ee89252ba82d881b17211542017e4c597", "99ea614c021d300e46a6f1dda26423899c37e7f0", "502e33592ce72ccd5d68dae4acf3bb3c4d056e68", "0ab1bc82e8e0416e3c9ac9e32250396da0f7a899", "9ee6209432316baf6776838917e06bca4d874747", "4920d01a247e5a6ba93de7ad6c15d99304dea89d", "52d81096f46be0e75f85e0b7eeda65640c281630", "f344f55736db99bc09c89d9995964e0687cf0c9d", "af973efa53c43f603b3198e397a72d178cad0e6a", "1aa12511e66688225ad8fac3d68d5093855b9c70", "3fbc9316a792974ba103be76702a6ce5c8d33f2d" ], "paperAbstract": "An ever increasing number of configuration parameters are provided to system users. But many users have used one configuration setting across different workloads, leaving untapped the performance potential of systems. A good configuration setting can greatly improve the performance of a deployed system under certain workloads. But with tens or hundreds of parameters, it becomes a highly costly task to decide which configuration setting leads to the best performance. While such task requires the strong expertise in both the system and the application, users commonly lack such expertise.\n To help users tap the performance potential of systems, we present Best Config, a system for automatically finding a best configuration setting within a resource limit for a deployed system under a given application workload. BestConfig is designed with an extensible architecture to automate the configuration tuning for general systems. To tune system configurations within a resource limit, we propose the divide-and-diverge sampling method and the recursive bound-and-search algorithm. BestConfig can improve the throughput of Tomcat by 75%, that of Cassandra by 63%, that of MySQL by 430%, and reduce the running time of Hive join job by about 50% and that of Spark join job by about 80%, solely by configuration adjustment.", "pdfUrls": [ "https://arxiv.org/pdf/1710.03439v1.pdf", "http://arxiv.org/abs/1710.03439", "http://doi.acm.org/10.1145/3127479.3128605" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/28e4ae18a652e7d67df3e3fa6f4703ae9ef930e9", "sources": [ "DBLP" ], "title": "BestConfig: tapping the performance potential of systems via automatic configuration tuning", "venue": "SoCC", "year": 2017 }, "28fa6a23284eccca0b473c3bd3f0bf1427d712b1": { "authors": [ { "ids": [ "21061784" ], "name": "Om Rameshwar Gatla" }, { "ids": [ "2441395" ], "name": "Mai Zheng" } ], "doi": "", "doiUrl": "", "entities": [ "Emulator", "Interrupt" ], "id": "28fa6a23284eccca0b473c3bd3f0bf1427d712b1", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "274e495824827f5a9dc1ba3ab62620445e6b3d4b", "65b6079988ec29ef3c6d62daf88b0f9e2ceee14c", "232ac001bcd047ae90735980c2f913bd0aef9bd9", "170fc81c89a7fa5541d078b8400529fdea94af18", "34ef9c71821bd3ed7fa52c9178e1ee272fedb803", "8a7536f311d22bd588c5bc2306d54d13effaee82", "47b78e7eb12859a141aed6a28a4e301eb0352629", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "08632fe2b934ed15d3499e7321282c81adc2c390", "4cda001811dea15a35894cd1b657003bb7f3c6de", "146071dbe8246bb6c0fa8090816d21f7e0f9c73f", "129418f330e939f22a1727061c9519e0cfaeef24", "544e79671d523e54f976c8bc68221a8b96672a0c", "12a0046a1197ae63c3d616c74e367dc583cef196", "1af5f199dbe6f03aef7bd404a4236e9b29ba4410", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "42512431ca7fffdbc80eb7280d093efcead3d48d", "74cd2f76c2fc982332930a5053656158b67eaa47", "26c713c0775ac388370492c26e25d24d9e430e9a", "265d18ced11e2e64d98afa97b0e86965e68101f7", "7b4a2675963950cffa95dd060802867479e4cca0", "4108e4635351d6f2d0916ee19d0a0ef878649c3c", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "39e3d058a5987cb643e000bce555676d71be1c80", "71e648761fd56da03bab284127b1fa0331f2a2df", "13d6c568c770ff5a070072e720fb34b0037cdab8", "16f79f07da0a6e222bbdf5fb784f44267fd5cf3d", "1029c647d5a3906bc1cada451bffea7e6da72ee3", "65f1dc9c8c1aacdfcb1eb5455190c7a82b40823b", "883a595fd76cb4dc0509a1005040286b31610059", "02ac23384523c2e2f9bc52cd29313dfd5aad22a3", "1693e83e47a99667f4bd6ad6e24d8b62a1ba22c8", "098ce48343fd36e2b857b5d055973a01317b3c17", "3b2af12a43d06338dd62681328c75a1999fc87fd", "3533159037bc2c11bde6b314e040ee113ae52bdd", "5a04b332441e2ff025313bfd303383e13050a274", "013e7549a51257a29044c2e909881ec639b39d42", "16a455aeacd14529bee92b0c197619fa2d173151", "f11d2748e1e26f3b01b54db85ddcc287b678cb04" ], "paperAbstract": "File system checkers serve as the last line of defense to recover a corrupted file system back to a consistent state. Therefore, their reliability is critically important. Motivated by real accidents, in this paper we study the behavior of file system checkers under faults. We systematically inject emulated faults to interrupt the checkers and examine the impact on the file system images. In doing so, we answer two important questions: Does running the checker after an interrupted-check successfully return the file system to a correct state? If not, what goes wrong? Our results show that there are vulnerabilities in popular file system checkers which could lead to unrecoverable data loss under faults.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-gatla.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage_slides_gatla.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/gatla" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/28fa/6a23284eccca0b473c3bd3f0bf1427d712b1.pdf", "s2Url": "https://semanticscholar.org/paper/28fa6a23284eccca0b473c3bd3f0bf1427d712b1", "sources": [ "DBLP" ], "title": "Understanding the Fault Resilience of File System Checkers", "venue": "HotStorage", "year": 2017 }, "28fd5f8829daab351a0cc68fd221c23ecf90b045": { "authors": [ { "ids": [ "32067673" ], "name": "You Zhou" }, { "ids": [ "2785094" ], "name": "Yian Zhou" }, { "ids": [ "38108942" ], "name": "Min Chen" }, { "ids": [ "38125149" ], "name": "Shigang Chen" } ], "doi": "10.1145/3084452", "doiUrl": "https://doi.org/10.1145/3084452", "entities": [ "1-bit architecture", "BMP file format", "Bitmap", "Central processing unit", "Computer data storage", "Denial-of-service attack", "Experiment", "Flow network", "High-level programming language", "HyperLogLog", "Network processor", "Overhead (computing)", "Stock and flow" ], "id": "28fd5f8829daab351a0cc68fd221c23ecf90b045", "inCitations": [ "ffa6aa67be5f3c5eec2e30635de0d781484807ba" ], "journalName": "", "journalPages": "67", "journalVolume": "", "outCitations": [ "5eca06b11339c38fbfe4006051c7af31bc749941", "4a8c08d0d6165f07d05c1738efed2f102bba183b", "262468bebe2c75d4af7bef9f23b390ebc7a7676c", "9c09ac1b5e1440808bbb0877d9945675153f2bab", "64e7d37666ea9f172cc63cbef84d1d123d6e95e5", "4c8d1cbbf73941bb5b303ea3c88bf4420c4ce8a8", "07482b569b205b04fdab1114cb71e851df9a58cf", "11af8e2ac10831dd74ca3ed3a0118afceb86412f", "a261ba03d6c0ee1486bba40607b805e47acf5279", "d989e64c836acc036e7ec28893de41950e57006a", "269a701af1aba00837838030c9a862be26acd1da", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "6ae743e6babde557c84a478f2bea5b73c27bfce1", "271fc6e0178c4837287f2a1ba193a515036541ec", "353787781c36257bb6a1d7231e270281c0920fa5", "963d607576459161243146535c9a4456b99772b4", "2986f9db238c57b638d54248c4ed1fcb5e4f459f", "0c4fca06060aaef83e7d96484683eaecf512e955", "b36c153be410c0d937d7583de557c0375506d15a" ], "paperAbstract": "Persistent spread measurement is to count the number of distinct elements that persist in each network flow for predefined time periods. It has many practical applications, including detecting long-term stealthy network activities in the background of normal-user activities, such as stealthy DDoS attack, stealthy network scan, or faked network trend, which cannot be detected by traditional flow cardinality measurement. With big network data, one challenge is to measure the persistent spreads of a massive number of flows without incurring too much memory overhead as such measurement may be performed at the line speed by network processors with fast but small on-chip memory. We propose a highly compact Virtual Intersection HyperLogLog (VI-HLL) architecture for this purpose. It achieves far better memory efficiency than the best prior work of V-Bitmap, and in the meantime drastically extends the measurement range. Theoretical analysis and extensive experiments demonstrate that VI-HLL provides good measurement accuracy even in very tight memory space of less than 1 bit per flow.", "pdfUrls": [ "http://arxiv.org/abs/1704.03911", "https://arxiv.org/pdf/1704.03911v1.pdf", "http://doi.acm.org/10.1145/3078505.3078593", "https://www.cise.ufl.edu/~sgchen/paper/sigmetrics17.pdf", "http://doi.acm.org/10.1145/3084452", "https://www.cise.ufl.edu/~sgchen/Publications/2017%20Persistent%20Spread%20Measurement%20for%20Big%20Network%20Data%20Based%20on%20Register%20Intersection.pdf", "https://arxiv.org/pdf/1704.03911v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/28fd5f8829daab351a0cc68fd221c23ecf90b045", "sources": [ "DBLP" ], "title": "Persistent Spread Measurement for Big Network Data Based on Register Intersection", "venue": "SIGMETRICS", "year": 2017 }, "292f2d0442a00edb07aa84e92278ad0777bfc4cb": { "authors": [ { "ids": [ "7549121" ], "name": "Yizhou Zhang" }, { "ids": [ "1732157" ], "name": "Andrew C. Myers" } ], "doi": "10.1145/3133894", "doiUrl": "https://doi.org/10.1145/3133894", "entities": [ "APL", "Code reuse", "Expressive power (computer science)", "Extensibility", "Generic programming", "Hierarchical and recursive queries in SQL", "Interface (Java)", "PL/I", "Parametric polymorphism", "Programmer", "Programming language", "Type class", "Type system" ], "id": "292f2d0442a00edb07aa84e92278ad0777bfc4cb", "inCitations": [], "journalName": "PACMPL", "journalPages": "70:1-70:31", "journalVolume": "1", "outCitations": [ "68abb53b93f2abf18fbddf9986ae02cba0c45936", "3ad3327f18f460dc2d148ccdb6097edab70d9d2f", "0b7c1bc9636d8cc66c36fb7e676d3badfe5df696", "0f8671f461b4966851b935dfc1c7daccbf962c32", "afd116d04968c3ba3169965168c4037148c3f0cd", "284b689f86842b17fb12e7db5be4460ea4b7ba03", "a1d0c109155a9040f6b24355806f123744f3c841", "4fd5d9a636b437c4de1f8fe6758da439b9b2e70d", "32edd655b87b3e355a923643d48042e08686c06b", "97aabd8e34d7ecfe3220a9ec356625f15c996943", "7a7fbc0d50c95eea7313d8a96a51fdd43e113a31", "fde386065ff35452f7a7fc273ac3303075c6005f", "42e608b6586de82ae939de496da197d53020e91d", "0cfcb17f28b3010c5edd78c44d395d1c2d184036", "3d33fc86523ba107e2087c165a29df559a9c8f61", "0a40cd94906489d04fcbd0e5bd4742abca866521", "f8ef7be3d80d1d4b001ecd9e8593c8c4954c52e6", "17fe58e6115711ce4d5ceef941c60eb6d6898dcf", "4a2a301a1fe31f3f321a1557caea0458417c6179", "10b206ff108069c460bf1623f40ca4521393292e", "4bd3ad1143d4e426cccf7351c025ee4dd66eaca2", "53f39fbaadaeb92ede4445a1c9d0c8213791701b", "193650be355e7e112527fe4b36c6e76eeb6cf518", "14962787624eefa5b49d416099c33245408bb090", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "1f9f12e63c6750c29c1010530686c43d1b85a510", "591e232a9000316ce472423ac37750e6b39b62bd", "992542732c484cd63c04a08ddfc7ab8e4e4cf158", "cc69d57e53a4e1e4555e34be80106b1e0d8a0993", "a8507025bfa3b29d1efee53011d6ae4d76f4c325", "2f8f40a8f061709b20239eab9c9c830db63106e5", "133b2f98430e7739fd405ea57f024bd0235d5dcd", "b8719183f3579e6f0bdf2d98ee500097a28cb9cf", "0206ca8100a02488065b6a09893dc77c1dd30cf5", "86ba238264370f109c7852322201d5c624d8b217", "2ee725a17031f377028987db8057e5f660eaaf02", "38e31e68af9b260c51d5abc03b27041780e81e4b", "0f7be62ac797ef6711328a3d746fbcbcd0c5fd01", "2bcc56aa8f39ec3d5f16c0064e461e90a6a1764f" ], "paperAbstract": "Parametric polymorphism and inheritance are both important, extensively explored language mechanisms for providing code reuse and extensibility. But harmoniously integrating these apparently distinct mechanisms—and powerful recent forms of them, including type classes and family polymorphism—in a single language remains an elusive goal. In this paper, we show that a deep unification can be achieved by generalizing the semantics of interfaces and classes. The payoff is a significant increase in expressive power with little increase in programmer-visible complexity. Salient features of the new programming language include retroactive constraint modeling, underpinning both object-oriented programming and generic programming, and module-level inheritance with further-binding, allowing family polymorphism to be deployed at large scale. The resulting mechanism is syntactically light, and the more advanced features are transparent to the novice programmer. We describe the design of a programming language that incorporates this mechanism; using a core calculus, we show that the type system is sound. We demonstrate that this language is highly expressive by illustrating how to use it to implement highly extensible software and by showing that it can not only concisely model state-of-the-art features for code reuse, but also go beyond them.", "pdfUrls": [ "http://www.cs.cornell.edu/~yizhou/papers/familia-oopsla2017.pdf", "http://doi.acm.org/10.1145/3133894", "http://www.cs.cornell.edu/andru/papers/familia/familia.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/292f2d0442a00edb07aa84e92278ad0777bfc4cb", "sources": [ "DBLP" ], "title": "Familia: unifying interfaces, type classes, and family polymorphism", "venue": "PACMPL", "year": 2017 }, "293ca58169024b0f40ae3342200737767321f6b1": { "authors": [ { "ids": [ "40044675" ], "name": "Asaf Samuel" }, { "ids": [ "3154206" ], "name": "Eitan Zahavi" }, { "ids": [ "1785739" ], "name": "Isaac Keslassy" } ], "doi": "10.1109/HOTI.2017.14", "doiUrl": "https://doi.org/10.1109/HOTI.2017.14", "entities": [ "Algorithm", "Network congestion", "Programming paradigm", "Routing", "Scalability", "Scheduling (computing)", "Self-awareness", "Stock and flow" ], "id": "293ca58169024b0f40ae3342200737767321f6b1", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "9-16", "journalVolume": "", "outCitations": [ "06154716d6d51256ed2bb014ef65ec8b5d41aa26", "05b37f4722eddf503ca0d237aaeba14eb9ab9ea3", "a32bbaf7b97c3d0d18ad93a52d34c0771ff19bf0", "34d2d6e84c4a15eaefe7bcdf49015aad867b25ee", "c678e962b158153924bbb24c4900b84375be7e57", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "1e157cfbd2fa3ca1b786bdf6bcf3f6cf40ed39bb", "346965e005bbffdfb02d6536b057859dc51c879b", "1434811cbe1c7831f0ee2974e9093d1e57461f0f", "8b0a16e9ab419a2096dbd55d5326607cbc385025", "47f91197bf11a475c14b287050edd1476201d586", "00f6f16f4b76e931d3924e56674a74fca8d94df3", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "b2956f170dfe1df2155608ecfcab79687a131aa8", "ca200e61ce15f552bea554fc4635b4dbcd50466e", "129567778989fab23b50812b3df30e899e2d6a4e", "5a71fd875ff689961ad6dce1046e039af30fc88c", "025652412d507a8cf98ecacd8a44d32ce28995e1", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "093b8adeb29ee4c17b3528c1b9791f275630c8f2", "663e064469ad91e6bda345d216504b4c868f537b", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "2806af1373a9f1f42cf9fe199649c6a24b51e78c", "231ba17921ebd80e95771e28dfb5082e169d5a53", "c9cb7f464a72cfd833b1932be6bc310c64145784", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "3167f508865250baa3fd6b924d77a25274968bd1", "2e0057911766d411b7a342c8bae2d6e3d29c47cd", "24ac696e598717e4752af563c9a477b22c29fefd", "3309a3a199478f24f0c5544d8e79aff7c7ed4128", "122229239aeba1eb4f1623adb40f1845c582a520", "506027ee1a7754aaeb3f6c5dfbd94a1a34179095", "5c5d03e884d4f0094b217c62267466fa11432c8e", "a86eb622eaaae24053a158a857624470af790bb6" ], "paperAbstract": "The network plays a key role in High-Performance Computing (HPC) system efficiency. Unfortunately, current HPC routing solutions are not application-aware, and therefore cannot deal with the sudden HPC traffic bursts and their resulting congestion peaks.To address this problem, we introduce Routing Keys, a scalable routing paradigm for HPC networks that decouples intra- and inter-application flow contention. Our Application Routing Key (ARK) algorithm proactively allows each self-aware application to route its flows according to a predetermined routing key, i.e., its own intra-application contention-free routing. In addition, in our Network Routing Key (NRK) algorithm, a centralized scheduler chooses between several routing keys for the communication phases of each application, and therefore reduces inter-application contention while maintaining intra-application contention-free routing and avoiding scalability issues. Using extensive evaluations, we show that both ARK and NRK significantly improve the communication runtime by up to 2.7x.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/293ca58169024b0f40ae3342200737767321f6b1", "sources": [ "DBLP" ], "title": "Routing Keys", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "294792bb0138327171ecaacb8c2101a14b13812d": { "authors": [ { "ids": [ "10995410" ], "name": "Kiwan Maeng" }, { "ids": [ "19999240" ], "name": "Alexei Colin" }, { "ids": [ "2601423" ], "name": "Brandon Lucia" } ], "doi": "10.1145/3133920", "doiUrl": "https://doi.org/10.1145/3133920", "entities": [ "Compiler", "Computer data storage", "Emergence", "Failure rate", "Idempotence", "LLVM", "Programming model", "Software developer" ], "id": "294792bb0138327171ecaacb8c2101a14b13812d", "inCitations": [ "8525e8890da440477140e3f947d61b059a9eea8f", "e3aca014b04e379e2dc1b57f5fd637dff61ae872" ], "journalName": "PACMPL", "journalPages": "96:1-96:30", "journalVolume": "1", "outCitations": [ "7f1c44fe476cf69b003e4df7c3c85954da70c997", "fbce9b077f4d6075ef4db500595825c0a36a7538", "4b0f7bde293bc2e0c9c35fc191e5106d96cb559c", "497be8bc880ca84d302bd24f34c01e8f9e5951ee", "05a1357946de5eca42a477b7b268db4944219a2e", "24724ad8962a9e04eb496fddaefe9708f6960601", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "6f7303c27681b38167b9471cd77b16cce31aa0a6", "a822358f91441367cefa9eb7f9aca03a3ed553fe", "faffb9d5ab06a7913911a3fa4dbdbedafaf15752", "4ace5fbb66763f82d52d4f5e70c001100847e9b1", "09ed565e84057123c15ab12b885c235d1f241aed", "063d302f35c4698598c518bf1f9d720d3bcda02e", "42d5c5c227ed56f265dae8dcf4444ec646afdbc9", "2d472861877c1508daa92c2339c17e7670093981", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "84b4a8b2f2fe58c57223dcb1e839d3106c820380", "91b9389bae769e2cf1934ca7189217604a613ac9", "2922edf0152641eed6cfa4f42b9f1ab9b9c71c97", "18ef5a3821a4e1c64c66e5e8fa5d506647458e78", "3edaf0de6bbaf152a00cadcb1cb62a52b17fdee5", "3845e498eea927b40e78e39eb31d53a380759bf2", "7f418c6ff21a2588f57b24a8dd4f3205e7080419", "938286fa80fe31fa3e35f450989f27659296f25f", "2bf4940710deb2571e93b1c922e8e7452e854afd", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "82168d6fc7b02128fc79e7b521cd81aaf5e7059a", "aa9f6d4394b5d31d52fcc0cab05b9f20362bccc7", "4037fa9ff27be77096ceb2ca14119ed9e076c669", "46a231fcd7705fc1ebc910f5dd8ecbb59fc25e95", "5ecb81eb3d92d7142135bca4360a3435065343ed", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "94783d113951822195d4ba44599a8fcbdef9d4bf", "359bbca0ecd92321a861ffd578d75a74ff1644c6", "1f527fed31971e07093695c128c10b4f3c20d109", "e8da1f050efd07a64c3fb287ae82f4129d581124", "21a13e8143465f0b857e6d60a4e6d88874915afc", "2c1ea92d6a4237ede5ea112f1880710b25bec8b3", "314919c141024c71cb17d525ecd8016138335002", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "0f6d95c5a675e404ddb767c687ba3e9f24a1a4ab", "1afcff13547570faf678489fdb18c491cb506d51", "0bde70e9c5296aad56e7a7bcb1c04fff16052523", "c6854888735b5d21a8b9f81292c52ac6f7a47952", "03033fff684ff9e465328932f8197a57e073ef17" ], "paperAbstract": "The emergence of energy harvesting devices creates the potential for batteryless sensing and computing devices. Such devices operate only intermittently, as energy is available, presenting a number of challenges for software developers. Programmers face a complex design space requiring reasoning about energy, memory consistency, and forward progress. This paper introduces Alpaca, a low-overhead programming model for intermittent computing on energy-harvesting devices. Alpaca programs are composed of a sequence of user-defined tasks. The Alpaca runtime preserves execution progress at the granularity of a task. The key insight in Alpaca is the privatization of data shared between tasks. Shared values written in a task are detected using idempotence analysis and copied into a buffer private to the task. At the end of the task, modified values from the private buffer are atomically committed to main memory, ensuring that data remain consistent despite power failures. Alpaca provides a familiar programming interface, a highly efficient runtime model, and places fewer restrictions on a target device's hardware architecture. We implemented a prototype of Alpaca as an extension to C with an LLVM compiler pass. We evaluated Alpaca, and directly compared to two systems from prior work. Alpaca eliminates checkpoints, which improves performance up to 15x, and avoids static multi-versioning, which improves memory consumption by up to 5.5x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133920", "http://abstract.ece.cmu.edu/pubs/alpaca.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/294792bb0138327171ecaacb8c2101a14b13812d", "sources": [ "DBLP" ], "title": "Alpaca: intermittent execution without checkpoints", "venue": "PACMPL", "year": 2017 }, "296d404677ef334f27b3ef1b0bb73d6fa3e8b209": { "authors": [ { "ids": [ "2130920" ], "name": "Reena Panda" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1109/PACT.2017.44", "doiUrl": "https://doi.org/10.1109/PACT.2017.44", "entities": [ "Apache Cassandra", "Benchmark (computing)", "Big data", "Database", "Emulator", "Hardware performance counter", "Microarchitecture", "MongoDB", "MySQL", "NoSQL", "Proxy server", "SQL", "Simulation", "Translation lookaside buffer" ], "id": "296d404677ef334f27b3ef1b0bb73d6fa3e8b209", "inCitations": [ "ffe909083b5a4c4eeea43b14c22a73870a00911d", "889901712be503d74c6d06876a31f3a0275b4b14", "417514911cccd2804189cb29aa2acca33e6d5229" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "105-116", "journalVolume": "", "outCitations": [ "9aa0d7253574e50fe3a190ccd924433f048997dd", "0653e2ed9f683868cb4539eb8718551242834f6b", "59680d7d7feef0286605cb81d35bc8fa2292a608", "02fe9b425b78a0211ccfaa2710f949fa2a769406", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "b77e7ae60aed8f307075c5a261274938da41e1e8", "6d0a6c6147bfc2a501048644505ce86e2c36bcf4", "554b44bc1290f16fb1d3117de078ac832f341d4e" ], "paperAbstract": "Early design-space evaluation of computer-systems is usually performed using performance models such as detailed simulators, RTL-based models etc. Unfortunately, it is very challenging (often impossible) to run many emerging applications on detailed performance models owing to their complex application software-stacks, significantly long run times, system dependencies and the limited speed/potential of early performance models. To overcome these challenges in benchmarking complex, long-running database applications, we propose a fast and efficient proxy generation methodology, PerfProx that can generate miniature proxy benchmarks, which are representative of the performance of real-world database applications and yet, converge to results quickly and do not need any complex software-stack support. Past research on proxy generation utilizes detailed micro-architecture independent metrics derived from detailed functional simulators, which are often difficult to generate for many emerging applications. PerfProx enables fast and efficient proxy generation using performance metrics derived primarily from hardware performance counters. We evaluate the proposed proxy generation approach on three modern, real-world SQL and NoSQL databases, Cassandra, MongoDB and MySQL running both the data-serving and data-analytics class of applications on different hardware platforms and cache/TLB configurations. The proxy benchmarks mimic the performance (IPC) of the original database applications with ∼94.2% (avg) accuracy. We further demonstrate that the proxies mimic original application performance across several other key metrics, while significantly reducing the instruction counts.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975285", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.44" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/296d404677ef334f27b3ef1b0bb73d6fa3e8b209", "sources": [ "DBLP" ], "title": "Proxy Benchmarks for Emerging Big-Data Workloads", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "2983eb4742c6394e3df02ec918e00d8d4d3206be": { "authors": [ { "ids": [ "34088725" ], "name": "Kishwar Ahmed" }, { "ids": [ "2123959" ], "name": "Jason Liu" }, { "ids": [ "2133039" ], "name": "Xingfu Wu" } ], "doi": "10.1109/MASCOTS.2017.25", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.25", "entities": [ "Event-driven programming", "Expect", "Job scheduler", "Jumpstart Our Business Startups Act", "Power supply", "Provisioning", "Run time (program lifecycle phase)", "Scheduling (computing)", "Simulation", "Supercomputer" ], "id": "2983eb4742c6394e3df02ec918e00d8d4d3206be", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "175-186", "journalVolume": "", "outCitations": [ "1e8233a8c8271c3278f1b84bed368145c0034a35", "b20cd8bad06afab7c054f5e9fdcf830e8b43479d", "e0140bbf28f231ccddb639b299ba515f1f5f0e15", "54145fffaa16d9f612e0a82ad5bb2f1552b45b02", "8303554a48d900acf0a432fe06e48d48c5962601", "699a084a65b2cf43fb774f085b84a7e303c16651", "8dc253c5cdc741af4e3d4914757f48fd533ea051", "1550f51777cce2df13e64fd59c4cd45603b4a9f8", "20f0c2ab3cc8cbb6a533abf455f0894e69d387f2", "f6ab527a5919b48b66908954a3086947c5bffde6", "2951408162adf00a4aa989b813f69454af7b348e", "22ddd63b622aa19166322abed42c3971685accd1", "0a9c8fef61634e392f9de6f34361cc1c690f7a00", "62f7e49f77ca13b7690ce0106235063f6b0771d1", "68e012b1ea42244e531b346a2828256488808856", "244030cb8e73144251ef3701ac758168031d17f9", "0470f9981471066f45d317f99d44c9cf0e8ca70f", "2e9bbb3ce20a9f07998b498d1f31932eb0ec2d7b", "a7557a9fdd1572fd913fd0ed0fa927f220198942", "306a6e0b36008ba6502b919e20485ade97237641", "346ee93e610a95c60394900f857d398bc2ae74df", "a9c1fa73f2c830f1f1d8526042a357a697dcfab4", "07c8dc1238106ed94d5357b72e4bfebd256f162f", "6117dc3db5b28d34589ca05035dac863975783a5", "225e97c61dbd208a8c9923acce30c1e53a6aa0e2", "8eb97dbcc1aecb4dc82284dd58889c913f510125", "6f2e34bf83ed95070c4ccbbedd40ee86a9147840", "9d742984f04f2d04ec8a765aaba143abfb41ccac", "9bb6ee03d15def91dd6d99e6cf0dfbf503964a5a", "0e22cdb7b14e2e14b4bb0a0cb0caa1b9e3018090", "5154304f6167fdc5afa5e0f9abfa0d3f7892c5f1", "a42906baf2859879ec74f0a2c22f7c1b07ea809c", "0f190faad397ccae6e8e7cfc6235ccc494ed0410", "170d5cff8d2c252444d57f5f576dd52c7b891ac8", "c93e05556b136a47dbd0fb556bda34462d2f214a", "7ee529c7a72f7f228ba1e60011d5e1d5078730d6", "65d49f6bebb1e3b38181ba9f5970329e2d235b58", "584be288527b72b14da26bdfc9f80ea712350a95", "d23d3027011f40ab3fde365c70f8d6a5a55772f7", "29f97d7142302d7ca5c876a396fefb154549013d", "c53f2014a9c1e995525b269c2d770c718c15b98f", "5debb1f7efbc61405b980c8a858c12ee2c9843ec" ], "paperAbstract": "Demand response refers to reducing energy consumption of participating systems in response to transient surge in power demand or other emergency events. Demand response is particularly important for maintaining power grid transmission stability, as well as achieving overall energy saving. High Performance Computing (HPC) systems can be considered as ideal participants for demand-response programs, due to their massive energy demand. However, the potential loss of performance must be weighed against the possible gain in power system stability and energy reduction. In this paper, we explore the opportunity of demand response on HPC systems by proposing a new HPC job scheduling and resource provisioning model. More specifically, the proposed model applies power-bound energy-conservation job scheduling during the critical demand-response events, while maintaining the traditional performance-optimized job scheduling during the normal period. We expect such a model can attract willing participation of the HPC systems in the demand response programs, as it can improve both power stability and energy saving without significantly compromising application performance. We implement the proposed method in a simulator and compare it with the traditional scheduling approach. Using trace-driven simulation, we demonstrate that the HPC demand response is a viable approach toward power stability and energy savings with only marginal increase in the jobs' execution time.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.25", "http://people.cis.fiu.edu/liux/wp-content/uploads/sites/4/2017/07/mascots17-demandresp.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2983eb4742c6394e3df02ec918e00d8d4d3206be", "sources": [ "DBLP" ], "title": "An Energy Efficient Demand-Response Model for High Performance Computing Systems", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "298d85920197142e2b6237c31e38936d77c80047": { "authors": [ { "ids": [ "2109345" ], "name": "Kayhan Dursun" }, { "ids": [ "2691974" ], "name": "Carsten Binnig" }, { "ids": [ "2109957" ], "name": "Ugur \u00c7etintemel" }, { "ids": [ "1746961" ], "name": "Tim Kraska" } ], "doi": "10.1145/3035918.3035957", "doiUrl": "https://doi.org/10.1145/3035918.3035957", "entities": [ "Computer data storage", "Data structure", "Database", "Hash table", "In-memory database", "Locality of reference", "Query plan" ], "id": "298d85920197142e2b6237c31e38936d77c80047", "inCitations": [ "87367fb395c509bc54ceb5d0f26b0917df55a178", "18217d68fca6b1f5305c80a733a4a717e3e35052", "8c7044398d1994b12a9bf7212e11398f59eaf446" ], "journalName": "", "journalPages": "1275-1289", "journalVolume": "", "outCitations": [ "1dc19048a74d9bc7564f0114dc201ffc9e77d43a", "91abb4e9a7ccd81122203cc215e6a70e6231125b", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "67eb4c1794be54919266f70b5bf8ba7a6824f091", "92e0243e1a73c77ef8b90292e3798f765b38f269", "05c30866c26648bab70bbecb0b8e2919b1b1d1b8", "7e5abd1dccace3288bc214f810fedce942de68b3", "daf208ad61c0ba239439ab46f1d1d4bbac5b69f0", "888764f05a60d770cfc0b49944308fd92ed45ee5", "0c8e7d740eb65402f7dc87da399730a15df75b33", "a7a7110ca7fe9eec39f4c709920f9cad45dafb19", "1619cb89bf559cb4b21c3eead6a475f33e75d9e3", "3ca63ef954e07b65e67c50bdd9f443815c80fc44", "4f05a78c2e2abf932915c33c6a2bb9c726ce4ac2", "5d17cd9bd02396e5506b141c9e9a3bc12f9d6125", "b2c28faec6f88bbfec636cf4cc10e40b04f33284", "9611f849a05e5880ef90725e19eaa40d1805317d", "0bde6332ca75d8fbe095e07e566ea49d810b8182", "729d26e90b3404a85f56188ee8af070c70cb1e81", "624f3f81b2665ccc9fbc117e7082fc6ef08ec17e", "56bf17fb68d1b32838066997a888325d72eea83d", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "3c457cec00499e41dd05516db79c4daf836102ad", "363116c764453d9b740c46d23b1f5a3c5801d76e", "cccbc3da776de497ca9d0dde2d4a76dc6c1b0fc4", "291470e5e557ac526f79a59c83e98fbf53406401", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "5c9d62f348c7ed09ee51e1d56643ced039ec1121" ], "paperAbstract": "Reusing intermediates in databases to speed-up analytical query processing was studied in prior work. Existing solutions require intermediate results of individual operators to be materialized using materialization operators. However, inserting such materialization operations into a query plan not only incurs additional execution costs but also often eliminates important cache- and register-locality opportunities, resulting in even higher performance penalties. This paper studies a novel reuse model for intermediates, which caches internal physical data structures materialized during query processing (due to pipeline breakers) and externalizes them so that they become reusable for upcoming operations. We focus on hash tables, the most commonly used internal data structure in main memory databases to perform join and aggregation operations. As queries arrive, our reuse-aware optimizer reasons about the reuse opportunities for hash tables, employing cost models that take into account hash table statistics together with the CPU and data movement costs within the cache hierarchy. Experimental results, based on our prototype implementation, demonstrate performance gains of 2x for typical analytical workloads with no additional overhead for materializing intermediates.", "pdfUrls": [ "https://arxiv.org/pdf/1608.05678.pdf", "http://arxiv.org/pdf/1608.05678v1.pdf", "https://arxiv.org/pdf/1608.05678v1.pdf", "http://doi.acm.org/10.1145/3035918.3035957", "http://arxiv.org/abs/1608.05678" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/298d85920197142e2b6237c31e38936d77c80047", "sources": [ "DBLP" ], "title": "Revisiting Reuse in Main Memory Database Systems", "venue": "SIGMOD Conference", "year": 2017 }, "299c0dfd2119894ccb68e3fa0a42e529af4a402e": { "authors": [ { "ids": [ "1682679" ], "name": "Timos Antonopoulos" }, { "ids": [ "2725205" ], "name": "Paul Gazzillo" }, { "ids": [ "2895372" ], "name": "Michael Hicks" }, { "ids": [ "33681628" ], "name": "Eric Koskinen" }, { "ids": [ "1728982" ], "name": "Tachio Terauchi" }, { "ids": [ "39714885" ], "name": "Shiyi Wei" } ], "doi": "10.1145/3062341.3062378", "doiUrl": "https://doi.org/10.1145/3062341.3062378", "entities": [ "Analysis of algorithms", "Automaton", "DARPA Grand Challenge", "Digital footprint", "Java", "Java bytecode", "Lempel\u2013Ziv\u2013Stac", "Quotient filter", "Regular expression", "Self-assembly", "Taint checking", "Time complexity", "Timing channel", "Tracing (software)" ], "id": "299c0dfd2119894ccb68e3fa0a42e529af4a402e", "inCitations": [ "17d4908d2331f9b6eb50d73cd30b678074e63a09", "e94c2b2a52f635f728f7df4a0285aafe808dd33f", "c8b82a6791711abfab16812aa97d6b5981dbf1a9", "99373ef7eaaaf9831a5f16544bc6173bf2a3e342", "2effb0ea8b7e1b79955f8ffba2e237c070ab4fd4", "162e35a780c1fb8591a6bb80d13dfae6e829bd4f" ], "journalName": "", "journalPages": "362-375", "journalVolume": "", "outCitations": [ "68b395370c58998d1541932110dc27bb180b6af9", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "300c4f7b82c3a50eef679a5e57191e06a60ae797", "3671af9d7655977e573bd123f93470f978ea7a62", "59684cf4f60456f5eea2991a0d7f90095f37a657", "2ebeb81f0ff104c5d4f4d242edb08ef1fa6b3d9e", "182a81eaf31b1a76be592c0890182cacd4199be0", "6421c677dfc71fe0fada74b4adae27417cd50d00", "76fcae4e56fcf3074d75253b50b79d2e73822895", "5ee347834b3f909356be88ecd0f0cd66af8b1e66", "c8f6a8f081f49325eb97600eca05620887092d2c", "5854eeea90fafaa29b1bf09e09c301ab197a5d41", "0b84fb0ec9739e04f9b0fcbe040718d9f735200f", "7c555dfba844337d9ed1d56c231d99448069d83e", "451ce08a5335b00cda49877ba1335e95a91c5af7", "02fd1a072a72d24c5f61d709a1b3ce863da32729", "2effd77c019d1f574109d05fee8b1e27c9429c79", "5630cbd4e0a0f00ecc2fab7001e424f23adbf0a2", "9263789ba999bb726c9c7fdf0bbc77844ee03272", "250b04ee5e66a61d90e85efed312d14204ab90d9", "357ae774e66fa96f1f43db21b1bbeee6310a9c81", "6d2cb21b26297616eca97eacdf5cf350cfb6302e", "6af5688fad9277e47dcaefc7642db77f1a8604fc", "6a2572958b05d0bf0e65c179340b20d91ac0c787", "746a78f8c0dabc20d161244923063c4b689b1010", "9228c83bdf58fafd05e2517e777ee7402e297178", "a5ade56a2f37f3f5f5b956b0c5546de9a3428537", "3c1f11a1da88c8237842a246ed1a5dbe230737be", "43c11eae3ceb570ef627e502a3f041f0cf9a0c06", "27dadb8815afd52857a42a1d48949ed978401c48", "615168555150d80752a1c195229642acbe6fb3d9", "683b1f19926adda043f42565c51640378dc2bd9a", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "63ed588f834d3225fd520055128c3d8c6edc94c7", "886f1a67bd997a1e25d465b32a70d4b8a527a0e5", "eabb4b2a55ee1b56331804306d8dac77d18599a0", "08a293a579e02c37a880adc39621b16caa34ba83", "1be37ab7b64c78351e20952d4261033328ecd69c", "23e1ec4e0211d4b704f96b2078a306fbbe2e859a", "817eb7690c05d2f0caf1ed2faeb5b10c28bd3836", "164b11b1f9f8432db88424b1e4f9ba6e09e5c894" ], "paperAbstract": "We present a novel approach to proving the absence of timing channels. The idea is to partition the programâ\u0080\u0099s execution traces in such a way that each partition component is checked for timing attack resilience by a time complexity analysis and that per-component resilience implies the resilience of the whole program. We construct a partition by splitting the program traces at secret-independent branches. This ensures that any pair of traces with the same public input has a component containing both traces. Crucially, the per-component checks can be normal safety properties expressed in terms of a single execution. Our approach is thus in contrast to prior approaches, such as self-composition, that aim to reason about multiple (kâ\u0089\u00a5 2) executions at once. \nWe formalize the above as an approach called quotient partitioning, generalized to any k-safety property, and prove it to be sound. A key feature of our approach is a demand-driven partitioning strategy that uses a regex-like notion called trails to identify sets of execution traces, particularly those influenced by tainted (or secret) data. We have applied our technique in a prototype implementation tool called Blazer, based on WALA, PPL, and the brics automaton library. We have proved timing-channel freedom of (or synthesized an attack specification for) 24 programs written in Java bytecode, including 6 classic examples from the literature and 6 examples extracted from the DARPA STAC challenge problems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062378", "http://www.cs.umd.edu/~mwh/papers/blazer.pdf", "https://www.paulgazzillo.com/papers/pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/299c0dfd2119894ccb68e3fa0a42e529af4a402e", "sources": [ "DBLP" ], "title": "Decomposition instead of self-composition for proving the absence of timing channels", "venue": "PLDI", "year": 2017 }, "29a818924b0242167d75af0f456ef84680a98ad7": { "authors": [ { "ids": [ "2245695" ], "name": "Matthew Poremba" }, { "ids": [ "3456251" ], "name": "Itir Akgun" }, { "ids": [ "3160909" ], "name": "Jieming Yin" }, { "ids": [ "2163220" ], "name": "Onur Kayiran" }, { "ids": [ "27905006" ], "name": "Yuan Xie" }, { "ids": [ "3308405" ], "name": "Gabriel H. Loh" } ], "doi": "10.1145/3079856.3080251", "doiUrl": "https://doi.org/10.1145/3079856.3080251", "entities": [ "3D lookup table", "Data center", "Data structure", "Delta encoding", "Dynamic random-access memory", "Interconnection", "Non-volatile memory", "OLAP cube", "Scalability", "Skip list", "Systems architecture", "Volatile memory" ], "id": "29a818924b0242167d75af0f456ef84680a98ad7", "inCitations": [ "6b6a5f2127b5ffbccd54d4823a9ca3a73969f3d1" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "678-690", "journalVolume": "", "outCitations": [ "95f34f92013d98c336fe5bc3f3c0d6ca8dc9f89b", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "17d14aed5dfe63cd6d42abbb151b9142368f9342", "386c295bae8dcc634ae2b593bb52376c6fe78ea0", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "4bad51c7685254155733ee8def6a1294378aa1af", "670e1ef22cc09c2e0b0a8bf35f04d9db7e1bc824", "0dc38d3afb68f617e23eced7ce2994a0a82feb11", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "de7d933ed07ba0fe9275efbd6251f19f0883ce0f", "cbbb8186de93d9e79d20e2122b9a7903d6b08cd1", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "0f60dbcde160b77903ee2b6e4831801a455c04de", "4bce8e7c13331dbffa05d6cfc086efd04e0317a9", "32dc6016338a2098147e5edbb72c7c5670f78133", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "c140b5f0166fc0d2bd01fdfc9a866c2d9bfbf898", "2d4a94915f4c0f2aea8d320b0d27dfc48e867493", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "296ba67ecd48c7833e10520fdc99b9dc0ca7c584", "6813f13990e0553c7cadf2e0a3ffab217bc4e396", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "88b66e698dd4efabcd8c2ec2f3116921d819305d", "32a0f72fb9fb6e3614f7312702f23f2d241c1101", "36de396ee9d1c9991e44c01be35e5206d79c3328" ], "paperAbstract": "High-performance computing, enterprise, and datacenter servers are driving demands for higher total memory capacity as well as memory performance. Memory \"cubes\" with high per-package capacity (from 3D integration) along with high-speed point-to-point interconnects provide a scalable memory system architecture with the potential to deliver both capacity and performance. Multiple such cubes connected together can form a \"Memory Network\" (MN), but the design space for such MNs is quite vast, including multiple topology types and multiple memory technologies per memory cube.\n In this work, we first analyze several MN topologies with different mixes of memory package technologies to understand the key tradeoffs and bottlenecks for such systems. We find that most of a MN's performance challenges arise from the interconnection network that binds the memory cubes together. In particular, arbitration schemes used to route through MNs, ratio of NVM to DRAM, and specific topologies used have dramatic impact on performance and energy results. Our initial analysis indicates that introducing non-volatile memory to the MN presents a unique tradeoff between memory array latency and network latency. We observe that placing NVM cubes in a specific order in the MN improves performance by reducing the network size/diameter up to a certain NVM to DRAM ratio. Novel MN topologies and arbitration schemes also provide performance and energy deltas by reducing the hop count of requests and response in the MN. Based on our analyses, we introduce three techniques to address MN latency issues: (1) Distance-based arbitration scheme to improve queuing latencies throughout the network, (2) skip-list topology, derived from the classic data structure, to improve network latency and link usage, and (3) the MetaCube, a denser memory cube that leverages advanced packaging technologies to improve latency by reducing MN size.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080251", "https://seal.ece.ucsb.edu/sites/seal.ece.ucsb.edu/files/publications/p678-poremba.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29a818924b0242167d75af0f456ef84680a98ad7", "sources": [ "DBLP" ], "title": "There and back again: Optimizing the interconnect in networks of memory cubes", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "29b14b6f0aee8cb3ea6da4a5b08a21aaa868bba1": { "authors": [ { "ids": [ "1678924" ], "name": "Jian Liu" }, { "ids": [ "2464929" ], "name": "Mika Juuti" }, { "ids": [ "1803391" ], "name": "Yao Lu" }, { "ids": [ "1735412" ], "name": "N. Asokan" } ], "doi": "10.1145/3133956.3134056", "doiUrl": "https://doi.org/10.1145/3133956.3134056", "entities": [ "Artificial neural network", "Cloud computing", "Communications protocol", "Information sensitivity", "Machine learning", "Server (computing)" ], "id": "29b14b6f0aee8cb3ea6da4a5b08a21aaa868bba1", "inCitations": [ "b3f2a11d45757e675be123d55ec0eb192bcca990", "a46fdcae60e683b9fbca3a76530b00f69ad0aa82", "5603325eee0f5d70176860d8cc77a9a9c89289a7", "7b24bea661e4ab8fddd5e2c76d307ffa6e0a4aa5", "a55c8e5fa3c937414b458af2072ff195e9882e14" ], "journalName": "", "journalPages": "619-631", "journalVolume": "", "outCitations": [ "20b5b5c25e2b56693b38fe7f69caddca78872085", "595a00f0975b5d5c28d904ddba1ae5a493316573", "398c296d0cc7f9d180f84969f8937e6d3a413796", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "05b073c44188946aeb9c410c1447262cbdf77b6d", "0f84a81f431b18a78bd97f59ed4b9d8eda390970", "39f63dbdce9207b87878290c0e3983e84cfcecd9", "6dfcd57ff6e21f14df34b344290ef47d1531a657", "23ae5fa0e8d581b184a8749d764d2ded128fd87e", "35a5824d5dafbc392fdd7edd0ee774a56f9df6d3", "326bb49d3ae9e1e1551028200916192e50004105", "40d68c0011958b9a990c9df65414fcf4fd539c72", "46f74231b9afeb0c290d6d550043c55045284e5f", "5d0c26a4715395e37e1f412a1bcda30c2c55fadd", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "2da02ea20d499d70ef08a6ecfb71bd985c2828a4", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "2cc157afda51873c30b195fff56e917b9c06b853", "6223684e14778e4d7948e994d2169ebf38e0a95f", "63936fa32f9e75ab2a864daae6791ce02112183d", "0c4867f11c9758014d591381d8b397a1d38b04a7", "5d90f06bb70a0a3dced62413346235c02b1aa086", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "012b8a941e96594783fb10d3a785e91f13384413", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "0c3b9e8b3d75a914a0add39d017e2455e97d6cf1", "55dda8f230566867acbfaa7bdd08fd8c7b8721ed", "d04f7f8eed11e5e58a41e314b00e49d7424d82ec", "544282f5b1b23b8273b6332bd09504f7bca5da5d", "4954fa180728932959997a4768411ff9136aac81", "5c4ac6ef89040d67e00f365fb7f1f86d9d53cf72", "872e1ca858c2eb79558d6a260ebdbd9e8ca1d0f1", "027d6c52be01b583b9a0d9eb8c9364c6b701c656", "dd3b504c092df8bcf0901ad3afb2e74b00b9ed22", "3cfc0b1e3c19ffb422f0c98754c382a9d8fbbc0b", "829220b82ebad22259a89c043798e7b8a683e51c", "dc702d7721b244f3b81d29e41a45716b005d916e", "22aaafd787c6896ec9da2348dfa5f27ddf567cc9", "19c3736da5116e0e80a64db35afe421663c4b4a8", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "6c13367b99656196b079f167f62f279d3ee070ca", "7dd7ec46c05e73d0c447154069ce4860b1dd8d5e", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "171c4f717089b70ac98f348f4d3497f1b440bdaf", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "4bb6263d482d8f8f9fc8aa0146b70ddca971a671", "2a11eeef2a64752e9e6cb0397508a4045b1b111e", "1fd7fc06653723b05abe5f3d1de393ddcf6bdddb", "59d555e18313ba8f3c0a7c92099b03950fb521ce", "cc89bc832af9b42e9a00a0fafe6a76f3d9ed6209", "38f35dd624cd1cf827416e31ac5e0e0454028eca", "387bcd56e5818b3296f2583d396923baba74204c" ], "paperAbstract": "Machine learning models hosted in a cloud service are increasingly popular but risk privacy: clients sending prediction requests to the service need to disclose potentially sensitive information. In this paper, we explore the problem of privacy-preserving predictions: after each prediction, the server learns nothing about clients' input and clients learn nothing about the model.\n We present MiniONN, the first approach for transforming an existing neural network to an oblivious neural network supporting privacy-preserving predictions with reasonable efficiency. Unlike prior work, MiniONN requires no change to how models are trained. To this end, we design oblivious protocols for commonly used operations in neural network prediction models. We show that MiniONN outperforms existing work in terms of response latency and message sizes. We demonstrate the wide applicability of MiniONN by transforming several typical neural network models trained from standard datasets.", "pdfUrls": [ "http://eprint.iacr.org/2017/452", "https://eprint.iacr.org/2017/452.pdf", "http://doi.acm.org/10.1145/3133956.3134056" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29b14b6f0aee8cb3ea6da4a5b08a21aaa868bba1", "sources": [ "DBLP" ], "title": "Oblivious Neural Network Predictions via MiniONN Transformations", "venue": "CCS", "year": 2017 }, "29b5858fc907446023632d33359ebdf3a47ce763": { "authors": [ { "ids": [ "9035209" ], "name": "Christoph Rettinger" }, { "ids": [ "3328821" ], "name": "Christian Godenschwager" }, { "ids": [ "12519276" ], "name": "Sebastian Eibl" }, { "ids": [ "2063777" ], "name": "Tobias Preclik" }, { "ids": [ "17114269" ], "name": "Tobias Schruff" }, { "ids": [ "16686871" ], "name": "Roy Frings" }, { "ids": [ "1793279" ], "name": "Ulrich R\u00fcde" } ], "doi": "10.1007/978-3-319-58667-0_1", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_1", "entities": [ "Simulation" ], "id": "29b5858fc907446023632d33359ebdf3a47ce763", "inCitations": [ "8fafdad93c6c03708e51e7b69fa42de3b46541e2", "fad7ca95bdb8696c707ce9845f1752c60481772e" ], "journalName": "", "journalPages": "3-21", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_1" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29b5858fc907446023632d33359ebdf3a47ce763", "sources": [ "DBLP" ], "title": "Fully Resolved Simulations of Dune Formation in Riverbeds", "venue": "ISC", "year": 2017 }, "29c833b9a5e33b684cffcb76fb19997918180136": { "authors": [ { "ids": [ "2574972" ], "name": "Leo Prasath Arulraj" }, { "ids": [ "1743175" ], "name": "Andrea C. Arpaci-Dusseau" }, { "ids": [ "1703415" ], "name": "Remzi H. Arpaci-Dusseau" } ], "doi": "10.1145/3050748.3050755", "doiUrl": "https://doi.org/10.1145/3050748.3050755", "entities": [ "Data deduplication", "FreeBSD", "Linux", "Linux", "Open-source software", "Program optimization", "System call", "Virtual Machine Manager" ], "id": "29c833b9a5e33b684cffcb76fb19997918180136", "inCitations": [ "7ef329f9b8bc57ec874a6b66b5125d827380bd09" ], "journalName": "", "journalPages": "112-128", "journalVolume": "", "outCitations": [ "7c0699937a1775a01ee8ec97ca30f5427f020b99", "2387968d22927a79d5d601107e70220763fc6e12", "5bb770af1973f929e8622f17ddf378d439245144", "340569da290505865cb2ba79a4201c7028d4d66a", "4a7872f0177e00c1c3621be90b72299fc3498474", "043029ff68d0449eacae8a67fc62ed4ee03215a2", "26497a6800ece4c608b0fd4d42fab6ae3a9d7af6", "0e316f76dac185ee2d922e64d4659b2e36842196", "ec20eb666574a97d3c7f85d988589b0fb8ee5466", "a3676ecae39afd35b1f7075fc630e28cfbb5a188", "2c628dedb6a1fb0c566ec791c84b93a22dd9aaa9", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "03612ec719ad7b08a64ad2110e77fcaa1814eb5b", "40f91d7d050bb8f92e06039c20f150fde9674eaa", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "0b6adc0dbc55076dc9c9a8931f4a4df58fd291b6", "a658f8bd1abaf8f06183266d13de25f0559c9592", "3e29cdcdb12ffdb80a734d12c905c1df43e0b6a4", "755cd7170315ce736f556a79ce36296e5b9bf030", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "2960c89331eb7afa86584792e2e11dbf6a125820", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "27f071ccbea5a4940dcc585ba4cfa9258bf2bcdf", "4096f239b93dfee8fe033db2846a334db9c1f524", "67b2c85458667cb15c13beb66d1559f39637c145", "67ed19a01c5fa2a2000dfb07e821c3e98728367d", "4f27ece79537ebf5f8e4c8d8429b81ae59082035", "9c48179c07963a9fad69a359362c0aee87f9fe18", "04aae75ab8a040225024b6a96ab7cbb28ef74d0a", "0edd896bc82b7fb65ef63cb1e3512db795c7f7d4", "1e875b3536e3c8bcd976bc27d8c8a4d06e849626", "5ff311923cd8f80057b2cfc15cf7ec3ac0a6fdbc", "65a2cb8a02795015b398856327bdccc36214cdc6", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "02ed0ec3bb95776b5c06e2784810b501c4d3f053", "ea5bb6cf0b63c643e5325ee2bef143d2ca450f3d", "511feec4f8875108f093c0b91ffe1d841423bdde", "c2bc2e165fe6af3de5de600af57cb0b301ce0c0f", "1ee3e65a3e5cb1b814a39258aa0f7cb60a51f955", "a35295a26ded98e5649a94a8ad03baa8cc8d9dd3", "94a62be8355bf5be1edcc881a26559e5258e0f1d", "612a8604f26c32457f47e52aa4675fd5dab84c7c", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "045729ec838ecc50be166fe4511506ac4a08226d", "3574657705475722b6c398c266805f758268778b", "1346e4134764c1d226c357dce3f9a58c55909719", "009af3a1fa932ea1a9efa8d34cb0b6e32feae15e" ], "paperAbstract": "We introduce Sky, an extension to the VMM that gathers insights and information by intercepting system calls made by guest applications. We show how Sky gains three specific insights -- guest file-size information, metadata-data distinction, and file-content hints -- and uses said information to enhance virtualized-storage performance. By caching small files and metadata with higher priority, Sky reduces the runtime by 2.3 to 8.8 times for certain workloads. Sky also achieves 4.5 to 18.7 times reduction in the runtime of an open-source block-layer deduplication system by exploiting hints about file contents. Sky works underneath both Linux and FreeBSD guests, as well as under a range of file systems, thus enabling portable and general VMM-level optimization underneath a wide range of storage stacks.", "pdfUrls": [ "http://pages.cs.wisc.edu/~arulraj/vee17-sky.pdf", "http://doi.acm.org/10.1145/3050748.3050755" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29c833b9a5e33b684cffcb76fb19997918180136", "sources": [ "DBLP" ], "title": "Improving Virtualized Storage Performance with Sky", "venue": "VEE", "year": 2017 }, "29ea6c62b030ab1570cc332ac1f2cc4a7af9cdcd": { "authors": [ { "ids": [ "19254661" ], "name": "Sze Yiu Chau" }, { "ids": [ "2111695" ], "name": "Omar Chowdhury" }, { "ids": [ "2071310" ], "name": "Md. Endadul Hoque" }, { "ids": [ "2618949" ], "name": "Huangyi Ge" }, { "ids": [ "1828965" ], "name": "Aniket Kate" }, { "ids": [ "3271869" ], "name": "Cristina Nita-Rotaru" }, { "ids": [ "1740882" ], "name": "Ninghui Li" } ], "doi": "10.1109/SP.2017.40", "doiUrl": "https://doi.org/10.1109/SP.2017.40", "entities": [ "Authentication", "Black box", "Interoperability", "Library", "Library (computing)", "Public key certificate", "Public key infrastructure", "Root certificate", "Secure communication", "Symbolic execution", "Transport Layer Security", "X.509" ], "id": "29ea6c62b030ab1570cc332ac1f2cc4a7af9cdcd", "inCitations": [ "32187449ad863fa01597b1a857ab5dc8677769cc", "65c6bda16861410915c4b50d2540c9d058a1bb57" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "503-520", "journalVolume": "", "outCitations": [ "1f7e5e582663868ed2f6763f98066ca278177a61", "fc881e8d0432ea8e4dd5fda4979243cac5e4b9e3", "d33d17e66a1d0b5559ee4ca0cb035d76f22a3584", "36222f8eb2ccf21ca345e15186cea64506581543", "52dfa23a859d6d99dbd2450c4670aaa2d78e36f7", "17372d3c3b72edb24b5d354a247b61f512eea758", "29c242b2b73c376a61344877d5488f33e066ecc8", "0ab393affe9d674ef790be14fdfade368f3e5989", "509e60bc9d1d01ec10934f1c9b0faf80a33c3088", "16fad84b5cd76c403c94b16353fa6a4d64f19251", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "4866f9428056529a77889569d24397489b77c502", "580772507bca7b9a0318d251e13f5cd8fb028d7b", "903666296dbaddb006eb5ade1279362b8e6eea4b", "409aff4d69fd273874d1b33b31a1e035f6a4b9ac", "03086e02b706e6955735ed15603b1015334bc095", "5aac8e7cefc388e35a015d6ee551b429e9062429", "5af49fc44d7988562fe7c216562eb9e8f6742400", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "5459cd31ff0f182da81d5c58026546b995118676", "7ff157398d5ae3d73a7d7908a348d6a26cbc58e5", "05ae289245b5a9222a1a6fc3f36910c3cb0f4662", "04b319357d6bab89ec9575f4b044d7609aa4296a", "8a2484f8a7d57588b8bf3635c6e7d7ba2a10f76e", "82f729361d8ea2e7bb98e15f6813fd3748961211", "13615cea5da31373d1f1e79cdb4f40cec9d795ca", "08d23b65f12e358a3d902a961cf31a2ab748bea9", "18e97c2f513c024ffa714ae5942885a891fa3d3f", "0a951a77c4c5b96d48e96c4c82c3397c5bba187c", "3066557b1206eaab284dca4650d0f5fd0febac3f", "11443efe465ad544f478524da6c66c085b16e28b", "2ee822dbb7ef60cd79e17cd2deec68ac14f13522", "dd9127d71020f019cb328a442131906dd5c16255", "b7ed7f31f0b92c81f60441039f5db937323100a8", "18e965d40f7dacb88bca7b0a231eca5adbfb6201", "ec3967cd26596970d6a73b02a90073ca2425f28d", "bdfd34769911b3fb40eadf71bfb34a0ec98fe160", "120c819da02fcb312986ac492f723ef9ea3223b5", "605c9f90474e91c195d5be651018535a3770b452", "685ce1991e456149ea742bd165eeb00f9cc21a27", "0b53fab8dea434e1046836159e184d9565ffd401", "208ed7512ea84f22a004920ea0b4c475bc836abc", "76a48ed7e6daa2a2322ae07d97a2441b3d1053df", "2c21f9488edfb2586327528bb59461a41363fc42", "2100d14e855e4c66e845fb4dbddf00849b1be758", "8182e2082f59ce12ead5c8a26b0981007449ef4c", "889a6da567fc63dced1d145e0244964c1169fcb7", "03705958cb453b90654564c6b735031b2cb60ba6", "40860f2db7516f09836ef5bbd65288a4e0957af7", "65b6079988ec29ef3c6d62daf88b0f9e2ceee14c", "c92c3484c70fdfab3f021ae684218addbf53348a", "8169647e744faf5f08de3d5af69a22acf9532563", "119f99dd30e725040b5e5633ece9962de71f9d84", "06cf71fac4897f061c65570eb6304d63f9a47e14", "3194375689d38cf96e6fb8ec9585ec90d3dd58c7", "15228f96447fd6b634179e80441502a2dc2d7a84", "621fdec4176f938e473430bca52205b10d193f2c", "23eb53170c6de9ff5024db120eda200816fa803f", "0b8609443d3a9c4330b58f1f91569357d5d744e6", "9daa5773cd7abe382d55af087b6be7b2f1f2a3cd", "24e404b34d3746a7d6719da31f9df188e9d34a55", "0b5b42425deb371d8dc60ac9b090c7232702370a", "39ac27363c06ade948e0cc3e7797523122a19085" ], "paperAbstract": "The X.509 Public-Key Infrastructure has long been used in the SSL/TLS protocol to achieve authentication. A recent trend of Internet-of-Things (IoT) systems employing small footprint SSL/TLS libraries for secure communication has further propelled its prominence. The security guarantees provided by X.509 hinge on the assumption that the underlying implementation rigorously scrutinizes X.509 certificate chains, and accepts only the valid ones. Noncompliant implementations of X.509 can potentially lead to attacks and/or interoperability issues. In the literature, black-box fuzzing has been used to find flaws in X.509 validation implementations, fuzzing, however, cannot guarantee coverage and thus severe flaws may remain undetected. To thoroughly analyze X.509 implementations in small footprint SSL/TLS libraries, this paper takes the complementary approach of using symbolic execution. We observe that symbolic execution, a technique proven to be effective in finding software implementation flaws, can also be leveraged to expose noncompliance in X.509 implementations. Directly applying an off-the-shelf symbolic execution engine on SSL/TLS libraries is, however, not practical due to the problem of path explosion. To this end, we propose the use of SymCerts, which are X.509 certificate chains carefully constructed with a mixture of symbolic and concrete values. Utilizing SymCerts and some domain-specific optimizations, we symbolically execute the certificate chain validation code of each library and extract path constraints describing its accepting and rejecting certificate universes. These path constraints help us identify missing checks in different libraries. For exposing subtle but intricate noncompliance with X.509 standard, we cross-validate the constraints extracted from different libraries to find further implementation flaws. Our analysis of 9 small footprint X.509 implementations has uncovered 48 instances of noncompliance. Findings and suggestions provided by us have already been incorporated by developers into newer versions of their libraries.", "pdfUrls": [ "https://csaw.engineering.nyu.edu/application/files/4715/0825/7083/CSAW17_paper_32.pdf", "http://homepage.cs.uiowa.edu/~comarhaider/publications/symcert-oakland2017.pdf", "https://doi.org/10.1109/SP.2017.40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29ea6c62b030ab1570cc332ac1f2cc4a7af9cdcd", "sources": [ "DBLP" ], "title": "SymCerts: Practical Symbolic Execution for Exposing Noncompliance in X.509 Certificate Validation Implementations", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "29eb6849b0eb147673f51d6bc8a501131e4e7c52": { "authors": [ { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "1808867" ], "name": "Ori Lahav" }, { "ids": [ "1702632" ], "name": "Katerina J. Argyraki" }, { "ids": [ "1702872" ], "name": "Shmuel Sagiv" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "", "doiUrl": "", "entities": [ "Cache (computing)", "Correctness (computer science)", "Datapath", "Failure rate", "Firewall (computing)", "Immutable object", "Middlebox", "Reachability", "Scalability", "Verification and validation" ], "id": "29eb6849b0eb147673f51d6bc8a501131e4e7c52", "inCitations": [ "4059d74b7c3de3fa0ed5a22f55e5ac3c21f9975b", "6e7640c890edf815eb8a22e5f6b6d625a12676cb", "a88d74590c07b2bd62176e3e1788e4ad4224cdb7", "5a000302035aaacb52a884683d32bffcd43df717", "c6b91eaa1a045ec6302fbb01baee2b9be3d97855", "304ba54357f0ebd37d35d33fe8b3703c979b514e", "841dd77064cd38a749c550f85ee1336733eee300", "29f8c9d31e25979f1106ea716627578487e9b62d" ], "journalName": "", "journalPages": "699-718", "journalVolume": "", "outCitations": [ "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "27f4001214ce0d449eb05d33626f444526accc7c", "186388efa5fc67b940bda85881977b3efd8b879f", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "8d976ffd66e11526d88083b0ed1fba29866252f0", "30a7bba8d47d7eca9f7826a721e62032a5c8e77a", "260376f12d3df66f105f03b5afd03c5562c5a96f", "383bd23079fbe5194f1843d59967d602140d73c1", "168d5cbbc2251d1afd71d9c7f29dfa2a5d597b58", "4866f9428056529a77889569d24397489b77c502", "75aa5650e95026b4ce5e0f6166ed8809ebffbf83", "3de4aaba1034065cde1ea3784ea5dfb2504fb43a", "1451cbe10acd01d5a7925c90dd22db8359abd23f", "0385a0c8b707d70bef33bb308d321b2647da0ca3", "28ed63405cc70fbcef04b04fd3e61fd7b23b59bc", "08a572c06bdaa78d85a287111832d188e8e07f0b", "1c0c851e96fa13fc45e3298f4715f5d5d0a97e41", "55e4c1c02a7499cc99082ceaaf13d32af46ce845", "1b82b8e7e1d66e2186b083450e665d4ef1babd99", "2257a1b615511177da021ee72ddffe8fbb2849cc", "24a963758371e511e3749c865b14f697358f025c", "07ca726af9c235573654b85e8d478bd7303aa62f", "04b319357d6bab89ec9575f4b044d7609aa4296a", "4d638f59bf6c9d1150b99743e281932e3b9c1959", "507b5fe36714eb6aa8acd96d1eef14212eddb82b", "0236458f3f78ec42fbdfe931a13cd200ef97a0c7", "86a7d2cf1473e597b196a96d922a457ea686fa21", "4534c15b4760cb29a0ce74fcd43297fe83f2f277", "91a0b2b741fef4e50357d7eeca52d03457fb4118", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3" ], "paperAbstract": "Recent work has made great progress in verifying the forwarding correctness of networks [19\u201321, 26]. However, these approaches cannot be used to verify networks containing middleboxes, such as caches and firewalls, whose forwarding behavior depends on previously observed traffic. We explore how to verify reachability properties for networks that include such \u201cmutable datapath\u201d elements. We want our verification results to hold not just for the given network, but also in the presence of failures. The main challenge lies in scaling the approach to handle large and complicated networks, We address by developing and leveraging the concept of slices, which allow network-wide verification to only require analyzing small portions of the network. We show that with slices the time required to verify an invariant on many production networks is independent of the size of the network itself.", "pdfUrls": [ "http://arxiv.org/pdf/1607.00991v1.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/panda-mutable-datapaths", "https://people.eecs.berkeley.edu/~apanda/assets/papers/vmn.pdf", "https://arxiv.org/pdf/1607.00991v1.pdf", "http://arxiv.org/abs/1607.00991", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-panda-mutable-datapaths.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-panda-mutable-datapaths.pdf", "https://people.eecs.berkeley.edu/~apanda/assets/papers/vmn-nsdi17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/29eb/6849b0eb147673f51d6bc8a501131e4e7c52.pdf", "s2Url": "https://semanticscholar.org/paper/29eb6849b0eb147673f51d6bc8a501131e4e7c52", "sources": [ "DBLP" ], "title": "Verifying Reachability in Networks with Mutable Datapaths", "venue": "NSDI", "year": 2017 }, "29f8c9d31e25979f1106ea716627578487e9b62d": { "authors": [ { "ids": [ "1957537" ], "name": "Jiangyuan Yao" }, { "ids": [ "35037028" ], "name": "Zhiliang Wang" }, { "ids": [ "2395542" ], "name": "Xia Yin" }, { "ids": [ "40035356" ], "name": "Xingang Shi" }, { "ids": [ "4512591" ], "name": "Yahui Li" }, { "ids": [ "3244057" ], "name": "Chongrong Li" } ], "doi": "10.1109/MASCOTS.2017.20", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.20", "entities": [ "Attribute-value system", "Black box", "Black-box testing", "Correctness (computer science)", "Finite-state machine", "Model checking", "Simulation", "Software bug", "Software-defined networking", "Test automation", "White-box testing" ], "id": "29f8c9d31e25979f1106ea716627578487e9b62d", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "110-120", "journalVolume": "", "outCitations": [ "3de4aaba1034065cde1ea3784ea5dfb2504fb43a", "4d638f59bf6c9d1150b99743e281932e3b9c1959", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "9e89e9dbbdc6243e4c30175d4ecaa47c59c66545", "12fe79a92044f4d78143bb0a908c74fa8c91c289", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "507b5fe36714eb6aa8acd96d1eef14212eddb82b", "14ef2d955c8b7dd170c4e9dd5979ebbbb2783835", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "4a15bfe21a245a17fd145599a528b13109e9a7a3", "07ca726af9c235573654b85e8d478bd7303aa62f", "2afc8d9b3a0d17fb926a6a6dd05b1fb307130a27", "c82075a34b2056e6c4a5409dd5fa9d592b164482", "405377ca200df3f7da390c37516fe13582e70776", "d9a1eaf8cc9bdb259e6a30583dbd510b0c35ddae", "29eb6849b0eb147673f51d6bc8a501131e4e7c52", "04b319357d6bab89ec9575f4b044d7609aa4296a" ], "paperAbstract": "The programmability of Software-Defined Networking (SDN) challenges the correctness and reliability of networks. There may be design flaws as well as implementation bugs in SDN applications. White-box testing methods with formal models rely on source codes, which limits the applicability of these methods. Black-box methods without behavior models cannot systematically cover an application's functions. Most previous work has mainly focused on design flaws and has ignored implementation bugs. In this paper, we propose a new black-box test framework to detect both design flaws and implementation bugs. Following this test framework, we propose a new model, Information Table Extended State Machine (IT-EFSM), combining a group of parallel state machines and an abstract topology to specify the SDN applications. We employ a model checking tool to generate tests against design flaws and propose a test generation based on partial composition, symmetry simplification on the topology and topology simulated execution to expose implementation bugs. The experimental results of the testing process demonstrate the effectiveness and applicability of our method.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/29f8c9d31e25979f1106ea716627578487e9b62d", "sources": [ "DBLP" ], "title": "Testing Black-Box SDN Applications with Formal Behavior Models", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "2a081abdb6504c4b4178af301a0e0d2330af66d8": { "authors": [ { "ids": [ "2123628" ], "name": "Cheng Cao" }, { "ids": [ "2666978" ], "name": "Hancheng Ge" }, { "ids": [ "2887627" ], "name": "Haokai Lu" }, { "ids": [ "1687568" ], "name": "Xia Hu" }, { "ids": [ "1697232" ], "name": "James Caverlee" } ], "doi": "10.1145/3077136.3080820", "doiUrl": "https://doi.org/10.1145/3077136.3080820", "entities": [ "Baseline (configuration management)", "Experiment", "Hashtag", "Like button", "Personalization", "Program optimization", "Social media", "Unified Model", "User profile", "Wisdom of the crowd" ], "id": "2a081abdb6504c4b4178af301a0e0d2330af66d8", "inCitations": [], "journalName": "", "journalPages": "743-752", "journalVolume": "", "outCitations": [ "26b99024682a897888428f727805967b032d0a54", "0409e56956560dba788dfb3adc467f3475606c24", "7bf16619c01eeb178e983c626e59b51021d21501", "625ef5356b3e8f7a787a805c23ce4c34640ed596", "1c7ecb026eeba7b0b6e933f62be1ecde40af0f3f", "19e033066f2031d3ebada08897cd229fd2b046c4", "72b992cd5b0461d3922d61aba4e13fd71fd9bb79", "429299e7cace14421d62e184d82981e3d0b9766d", "38b389580d774ce513284e671ff3bbcef0258de2", "6195df04d7e529e8e2ba99c04c23d9ff04e66187", "2744288f090192987e980274999065ad2d6e45d6", "71423bb17133402965a5cbaf31fa28b0366149fd", "2fcb51423a87c46412b9737a67ea22b55f3f9a9f", "228769c177380bb6398150fa9112509d438fe77c", "c604b73f55eb0bc1ccaa2f9e8b2028c52e169a5a", "6d1f45f0615a131b11c0dafa21b0324feaf68f12", "572009f92181ee5a7363dd5d42ac591ec523dea6", "21a590921fd19055159f1bea03fbd7883d6132fb", "205ef58ed1c3b1777a18b8c467565d6da8e32116", "35a0d1d2ba7f52a66ba5c675467b71b6a56d81e4", "7545f0620626d3a2a633954a8e5bd150d9544899", "258c8adfba357ed20cc03b5c2229eb773924bc08", "00354cf992faa54b9bb89690976de7f8bd6f0243", "274bc40268671fa3fff54d2ef89454b13fb026da", "8c9595abdc915cc722ccfa30a7a0dc4566e11754", "3bddba3214fe9e7248a934455a2624aa5b781778", "4932e87824314fe65b7d0d4294ed9cbd81c34df8", "00f581aca4dd370615fa0ea99e730d6dd42fe347", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "3d5a1904127679f50e1b3fdad77c342760e4b283", "398dd8dcb4464a6fb8830868f6484476487d58e5", "3531e0eb9ec8a6ac786146c71ead7f8d624fd2ca", "6fc3ac2bd73b844025e684d8139a11fe4e4b0802", "165c428fec7d3aac4ab6e2c9d285af92883b643c", "7908c007f73ff641961b3c945d145e42a48eddb5", "2172a4f1cb35267ea7a67ffa692840de8be7e9a1", "01e6c986e4fa35f604b7c7b701f0bf682d4103be", "254687dec6e5456fa289826da6558186ef2cc24f", "116cbad0ad34f83023d0b231f378dae2975e3b5e", "787d56ec5569f1054f490dcf9a9fb4b87b7990e8", "e266bd9e72e8a220865079e9a30aaebd5b72d170", "2cbe0ba73d02aabbeefedf841203219796a551b7", "600191af13e7ae80885fa67ebd0a637d0e87abb5", "46d60fbefab0bc0091627c6aa0ab08c313d23968", "56b3a57397c2742e605581c76f93640d916c32af", "046931816999f8d9722b19cf94fdbbe57bc2e84e", "9d15f05f1f5f7c84745231b345fac1e5bfa66477", "29b29b80bc5b3d775f9c9d436cb3834231459823", "188f4d9b9d580d0432056b760b3372ec83543d1d", "0706356c9ab6014d6b04577d38289ea8328291a5", "42123a752e410548d1c26f055902aa91abc95d43", "0409577f9391dd0ab2fca06f355b792b3d40c664" ], "paperAbstract": "User interests and expertise are valuable but often hidden resources on social media. For example, Twitter Lists and LinkedIn's Skill Tags provide a partial perspective on what users are known for (by aggregating crowd tagging knowledge), but the vast majority of users are untagged; their interests and expertise are essentially hidden from important applications such as personalized recommendation, community detection, and expert mining. A natural approach to overcome these limitations is to intelligently learn user topical profiles by exploiting information from multiple, heterogeneous footprints: for instance, Twitter users who post similar hashtags may have similar interests, and YouTube users who upvote the same videos may have similar preferences. And yet identifying \"similar\" users by exploiting similarity in such a footprint space often provides conflicting evidence, leading to poor-quality user profiles. In this paper, we propose a unified model for learning user topical profiles that simultaneously considers multiple footprints. We show how these footprints can be embedded in a generalized optimization framework that takes into account pairwise relations among all footprints for robustly learning user profiles. Through extensive experiments, we find the proposed model is capable of learning high-quality user topical profiles, and leads to a 10-15% improvement in precision and mean average error versus a cross-triadic factorization state-of-the-art baseline.", "pdfUrls": [ "http://faculty.cse.tamu.edu/caverlee/pubs/cao17sigir.pdf", "http://doi.acm.org/10.1145/3077136.3080820" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2a081abdb6504c4b4178af301a0e0d2330af66d8", "sources": [ "DBLP" ], "title": "What Are You Known For?: Learning User Topical Profiles with Implicit and Explicit Footprints", "venue": "SIGIR", "year": 2017 }, "2a12a97476635f48467dfb2356e0038eae7b7e44": { "authors": [ { "ids": [ "37449345" ], "name": "Anran Wang" }, { "ids": [ "39552361" ], "name": "Vikram Iyer" }, { "ids": [ "2950667" ], "name": "Vamsi Talla" }, { "ids": [ "34103349" ], "name": "Joshua R. Smith" }, { "ids": [ "1805554" ], "name": "Shyamnath Gollakota" } ], "doi": "", "doiUrl": "", "entities": [ "Backscatter (email)", "Data rate units", "Denial-of-service attack", "Digital data", "E-textiles", "FM broadcast band", "FM broadcasting", "Modulation", "Radio broadcasting", "Radio frequency", "Smartphone" ], "id": "2a12a97476635f48467dfb2356e0038eae7b7e44", "inCitations": [ "8edf2ad191c35a53b4ed9b7e66c676cb26826c09", "bfdd63cc828f28b071b00c9f442b6d1db26297e7", "2b6b6d894d3929a02a78e3e3630a70272d7cf5d7", "a39107bd293cf0f5d9674223c4194eed0761745e", "625a3af5ee72738153a6d314d2d6869d1870f757", "07e62e29133f5ac1dc57da34347c49427282ebff", "3217e9e09d892ac55339f319b4eda32dea631214", "24b910feb038317a8a6304621cc765d7710b3081", "bc25550b5ce2da576db2e6958a5e51c836c2eff1", "11c846a6291b8500f9f0c673038c7552cf1058d7", "0d023aa7b708a02ebeb7853565c9d0f607932ae7", "aa0f89e887eec490f4fa8cceb03764c4c0c179fb", "093003a5bf55abb088903800ddf012855bea6c07" ], "journalName": "", "journalPages": "243-258", "journalVolume": "", "outCitations": [ "2d12b6189a0681b933f9a96b8ab14daac2bcfd73", "8012327465664ca6a64ee4d202536ec6c6d024f1", "77cbb0b6e8264a244f230f5b0acabc3e1a121df2", "0c9b68449b6241478ba38c2af220b393db86e206", "667cca11a46b4360dd3c85d374cebcfb00e8d1e6", "15a03a6f03a98e0fc1f64020247ea6c8479668a1", "64807002e1cdb189e90c23a78e328800502fd495", "9d8086fc8d0d56ea16f7a06013a924b30cfbe2a8", "02c75551123cae6dfbb0c69de96a199c974bcf89", "91bdacc904edb540fa57ea9a4535a1a1d79d855b", "df1b9d344a07c5de55486772a43b8ef3fc4f8a56", "8ecd08b194529ff6b8e8fc80b5c7a72504a059d3", "1ad6819ab62788a132f4d0773717fdacdd55af6a", "18336fdfca9e54b4a1a0dc03a0eaa66379778133", "27553dbe98c4c7f7d420a3694d4be4507db01790", "27d4dc8b28a22edd561b9fd38d481adb4e1504f4", "8347fa4ad280baf119580cc680fd85ddb16d7236", "498d2ed40427eeb78799fa96ac0f5a58c6648d05", "624af79d7d5d66fe8f272a1c84af520618fbc936", "21d0f3b4c847e04be0f3735f5f55bffe32e942e3", "c70e4a09a00c302f26ce60ac15e4e208af3b0621", "8e2821d7185de16bf88a4c90383ef3690ec04248" ], "paperAbstract": "This paper enables connectivity on everyday objects by transforming them into FM radio stations. To do this, we show for the first time that ambient FM radio signals can be used as a signal source for backscatter communication. Our design creates backscatter transmissions that can be decoded on any FM receiver including those in cars and smartphones. This enables us to achieve a previously infeasible capability: backscattering information to cars and smartphones in outdoor environments. Our key innovation is a modulation technique that transforms backscatter, which is a multiplication operation on RF signals, into an addition operation on the audio signals output by FM receivers. This enables us to embed both digital data as well as arbitrary audio into ambient analog FM radio signals. We build prototype hardware of our design and successfully embed audio transmissions over ambient FM signals. Further, we achieve data rates of up to 3.2 kbps and ranges of 5\u201360 feet, while consuming as little as 11.07 \u03bcW of power. To demonstrate the potential of our design, we also fabricate our prototype on a cotton t-shirt by machine sewing patterns of a conductive thread to create a smart fabric that can transmit data to a smartphone. We also embed FM antennas into posters and billboards and show that they can communicate with FM receivers in cars and smartphones.", "pdfUrls": [ "http://arxiv.org/abs/1702.07044", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-wang-anran.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-wang-anran.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/wang-anran", "http://homes.cs.washington.edu/~gshyam/Papers/fmbackscatter.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d094/a2ad8996c88cc486b0bd58bcc697bdc54440.pdf", "s2Url": "https://semanticscholar.org/paper/2a12a97476635f48467dfb2356e0038eae7b7e44", "sources": [ "DBLP" ], "title": "FM Backscatter: Enabling Connected Cities and Smart Fabrics", "venue": "NSDI", "year": 2017 }, "2a332da536550abaa5bc8cc265f306b0fffedf4e": { "authors": [ { "ids": [ "1716807" ], "name": "Youngjin Kwon" }, { "ids": [ "3215063" ], "name": "Henrique Fingler" }, { "ids": [ "36757897" ], "name": "Tyler Hunt" }, { "ids": [ "2126015" ], "name": "Simon Peter" }, { "ids": [ "1683338" ], "name": "Emmett Witchel" }, { "ids": [ "1748580" ], "name": "Thomas E. Anderson" } ], "doi": "10.1145/3132747.3132770", "doiUrl": "https://doi.org/10.1145/3132747.3132770", "entities": [ "Emulator", "Hard disk drive", "Layer (electronics)", "Linux", "Linux", "Logical volume management", "Memory hierarchy", "Non-volatile memory", "Operating system", "Persistence (computer science)", "Scalability", "Solid-state drive", "Storage area network", "Throughput", "User space" ], "id": "2a332da536550abaa5bc8cc265f306b0fffedf4e", "inCitations": [ "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4" ], "journalName": "", "journalPages": "460-477", "journalVolume": "", "outCitations": [ "99723365fc9fe6960201bf9d246a90ccbb6396fa", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "0712c325155f8af65602a08cc448d1e453466a33", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "2be26e8aa238ac37a80e08303f128d8014bb9f3b" ], "paperAbstract": "Current hardware and application storage trends put immense pressure on the operating system's storage subsystem. On the hardware side, the market for storage devices has diversified to a multi-layer storage topology spanning multiple orders of magnitude in cost and performance. Above the file system, applications increasingly need to process small, random IO on vast data sets with low latency, high throughput, and simple crash consistency. File systems designed for a single storage layer cannot support all of these demands together.\n We present Strata, a cross-media file system that leverages the strengths of one storage media to compensate for weaknesses of another. In doing so, Strata provides performance, capacity, and a simple, synchronous IO model all at once, while having a simpler design than that of file systems constrained by a single storage device. At its heart, Strata uses a log-structured approach with a novel split of responsibilities among user mode, kernel, and storage layers that separates the concerns of scalable, high-performance persistence from storage layer management. We quantify the performance benefits of Strata using a 3-layer storage hierarchy of emulated NVM, a flash-based SSD, and a high-density HDD. Strata has 20-30% better latency and throughput, across several unmodified applications, compared to file systems purpose-built for each layer, while providing synchronous and unified access to the entire storage hierarchy. Finally, Strata achieves up to 2.8x better throughput than a block-based 2-layer cache provided by Linux's logical volume manager.", "pdfUrls": [ "http://www.cs.utexas.edu/~simon/sosp17-final207.pdf", "https://www.sigops.org/sosp/sosp17/slides/strata-sosp17-slides.pdf", "http://doi.acm.org/10.1145/3132747.3132770", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final28.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2a332da536550abaa5bc8cc265f306b0fffedf4e", "sources": [ "DBLP" ], "title": "Strata: A Cross Media File System", "venue": "SOSP", "year": 2017 }, "2a350be2a1f9b7b60cd519bed28e0b655524ff79": { "authors": [ { "ids": [ "39417393" ], "name": "Jeff Smits" }, { "ids": [ "2447783" ], "name": "Eelco Visser" } ], "doi": "10.1145/3136014.3136029", "doiUrl": "https://doi.org/10.1145/3136014.3136029", "entities": [ "Control flow", "Control flow graph", "Data-flow analysis", "Dataflow", "Dataflow programming", "Declarative programming", "Domain-specific language", "Graph (abstract data type)", "Language workbench", "Programming language", "Specification language", "Workbench", "monotone" ], "id": "2a350be2a1f9b7b60cd519bed28e0b655524ff79", "inCitations": [], "journalName": "", "journalPages": "221-231", "journalVolume": "", "outCitations": [ "188426f3339b555dda2740ae59a1b9f8a0af17c8", "0b7c1bc9636d8cc66c36fb7e676d3badfe5df696", "0467a3b0e4afca0712b42f6e96cd879e2b274522", "3cce7db91b9e9ca58ad9c8b49d3d4c4fc3173cc2", "46b533681bfb665549acab7dc37af859ce537500", "eef0e0820ca3ef8cee957c89373527e8a73dcaaf", "2a957e47b0383d1d62d4b1745d48c06dd72b8664", "13be121d5683d909c6a4f0e5208a9d34bf1749e8", "0b61a17906637ece5a9c5e7e3e6de93378209706", "5c885865052790b09c2cd2d6be25bd53e8512da1", "5e567cda5999a6dd4e5da4bb30b9033f8d5687c4", "7a8251f687d09a661e77bc8a4e409736ea61ede9", "20ad7f3de49340dd6280e591b4ad639ef6ab1a40", "1be0f762b926d3b0a9f36e8e513f174054a65cf1", "7ef221d33b50067333a24076f17a3186847d97bc", "1d17b9b3c4067b4660e923a2041af0955b7af18d", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "457e62e93d81b1aee73e543f1bc19b5fb4ca1416" ], "paperAbstract": "We present FlowSpec, a declarative specification language for the domain of dataflow analysis. FlowSpec has declarative support for the specification of control flow graphs of programming languages, and dataflow analyses on these control flow graphs. We define the formal semantics of FlowSpec, which is rooted in Monotone Frameworks. We also discuss a prototype implementation of the language, built in the Spoofax Language Workbench. Finally, we evaluate the expressiveness and conciseness of the language with two case studies. These case studies are analyses for Green-Marl, an industrial, domain-specific language for graph processing. The first case study is a classical dataflow analysis, scaled to this full language. The second case study is a domain-specific analysis of Green-Marl.", "pdfUrls": [ "https://pure.tudelft.nl/portal/files/34447378/sle17_flowspec_preprint.pdf", "http://pure.tudelft.nl/ws/files/34447378/sle17_flowspec_preprint.pdf", "http://doi.acm.org/10.1145/3136014.3136029" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2a350be2a1f9b7b60cd519bed28e0b655524ff79", "sources": [ "DBLP" ], "title": "FlowSpec: declarative dataflow analysis specification", "venue": "SLE", "year": 2017 }, "2a6f1f5779034004de9b53ebdcac4fd57771941f": { "authors": [ { "ids": [ "2569311" ], "name": "Heng Lin" }, { "ids": [ "1928916" ], "name": "Xiongchao Tang" }, { "ids": [ "38849012" ], "name": "Bowen Yu" }, { "ids": [ "10716503" ], "name": "Youwei Zhuo" }, { "ids": [ "6301522" ], "name": "Wenguang Chen" }, { "ids": [ "2467444" ], "name": "Jidong Zhai" }, { "ids": [ "22987925" ], "name": "Wanwang Yin" }, { "ids": [ "2225511" ], "name": "Weimin Zheng" } ], "doi": "10.1109/IPDPS.2017.53", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.53", "entities": [ "Algorithm", "Breadth-first search", "Central processing unit", "Computation", "FLOPS", "Graph drawing", "Graph traversal", "Heterogeneous computing", "Intel Core (microarchitecture)", "Linpack benchmarks", "List of algorithms", "Manycore processor", "Maximum flow problem", "Memory hierarchy", "Performance per watt", "Shortest path problem", "Social network", "Sunway", "Sunway TaihuLight", "TOP500", "Traversed edges per second" ], "id": "2a6f1f5779034004de9b53ebdcac4fd57771941f", "inCitations": [ "e45dea6588d1de0a23618e019031e67eedeeee26", "896134c7aa767e27cb3c3aa0662b335473923602" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "635-645", "journalVolume": "", "outCitations": [ "569a19031523dbe03a8c9a3dbe6168912f3cc476", "3ffe60733f14d9416ab478c1f273390601f987d8", "175d795f44037ef60dd9df341701cd5fdc449f1f", "4a5872f80d33be4d448abce21d121ec67453f5a2", "ba75e4f7f6356d0c7a98ae813f085ce1a7a0aeec", "b76269bf962989ce271bef7ea863ff4adf9c9de6", "638deeb9efa10f081f74e6c2ee9195716afd2ceb", "c3fbbd9c1fc5e53c6a9e3fe27e1bfce4755c8ef3", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "455ecea199bc83ec6ee3667afc96ef5f58f2b0ce", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "5def0f95a08f4c8f2592ba3323f4f92a6c367335", "06f75b1b283569baf96f4a65ec7da734b9c840f8", "1e27b9b447cebd5047050e39bb9246fa6364b760", "a0618a3d620a4e61ba37b691800fc770e0a77a65", "0e939cfcf31d94e27a51fd894e32d62737eb00c6", "2ce27845038020ea43afa08e91f916a4ccf19924", "02a45f2bf6105bf83e605812735ffa8eb8db520e", "4a87972b28143b61942a0eb011b60f76be0ebf2e", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "254ded254065f2d26ca24ec024cefd7604bd74e7", "15b61dedce6c53245249a33e096ccce071d52edc", "7ebb9fad71ce8e08d5284b7644a5452cff6c75b3", "4ad495b07abc0d7080c020dd563d9406e1753d65", "259e93de2f10d395a1bdfb2dc6da72b6a3998572" ], "paperAbstract": "Interest has recently grown in efficiently analyzing unstructured data such as social network graphs and protein structures. A fundamental graph algorithm for doing such task is the Breadth-First Search (BFS) algorithm, the foundation for many other important graph algorithms such as calculating the shortest path or finding the maximum flow in graphs. In this paper, we share our experience of designing and implementing the BFS algorithm on Sunway TaihuLight, a newly released machine with 40,960 nodes and 10.6 million accelerator cores. It tops the Top500 list of June 2016 with a 93.01 petaflops Linpack performance [1]. Designed for extremely large-scale computation and power efficiency, processors on Sunway TaihuLight employ a unique heterogeneous many-core architecture and memory hierarchy. With its extremely large size, the machine provides both opportunities and challenges for implementing high-performance irregular algorithms, such as BFS. We propose several techniques, including pipelined module mapping, contention-free data shuffling, and group-based message batching, to address the challenges of efficiently utilizing the features of this large scale heterogeneous machine. We ultimately achieved 23755.7 giga-traversed edges per second (GTEPS), which is the best among heterogeneous machines and the second overall in the Graph500s June 2016 list [2].", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.53", "http://alchem.usc.edu/~youwei/publications/2017.ipdps.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2a6f1f5779034004de9b53ebdcac4fd57771941f", "sources": [ "DBLP" ], "title": "Scalable Graph Traversal on Sunway TaihuLight with Ten Million Cores", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2aa59371c05c1bb6dd9216fccae0e37ac285093a": { "authors": [ { "ids": [ "3241376" ], "name": "Peter Pessl" }, { "ids": [ "3364677" ], "name": "Leon Groot Bruinderink" }, { "ids": [ "2133826" ], "name": "Yuval Yarom" } ], "doi": "10.1145/3133956.3134023", "doiUrl": "https://doi.org/10.1145/3133956.3134023", "entities": [ "Algorithm", "BLISS", "Bliss bibliographic classification", "Cryptography", "Digital signature", "IPsec", "Key (cryptography)", "Key escrow", "Lattice reduction", "Lattice-based cryptography", "Linear programming", "Parity learning", "Post-quantum cryptography", "Quantum", "Quantum cryptography", "Quantum mechanics", "Side-channel attack", "Type signature", "Virtual private network", "strongSwan" ], "id": "2aa59371c05c1bb6dd9216fccae0e37ac285093a", "inCitations": [ "665987a00ec1129f5c4587aa5544c221eac2cd5e", "d9c4775506e7db89663586663e9f0786203c5aee" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "490", "journalVolume": "2017", "outCitations": [ "135d6f50f43dc278d20026352f0051ac368ce315", "1513be7178d849dc4363b31c9ea19a2baea076e7", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "ea73c6594ab0fe97a4054c9d61b4d669766011f1", "87d49f253a0e623e0255afa06d63e9b5a9fb09d0", "7d8e8b3379787a6f9c6fe60ebd7a09253c07fe58", "219e42a75820e128da43f8c5d1a56f92e6374ec5", "8ade32c4833474ffde68b8b0c0d162ffdae1b9e3", "0d257d899b72297f898f97bf9b1bd55d6941de4d", "43fb8926f0fce498351c442cc49beab0e6f7151f", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "29b04f83ea21386955754b936a3aeb9207a0b65c", "704a3fcbddf7045cf522d125031d5f009c9abb02", "75cb7a4c3528a0342d9602fd8961a0093ab280f5", "16d916b69aa99a32d999068a376a5b4e96bfbd07", "2ce210df0f284d5d35beadd55e5aaa0b8e41af48", "2c7cc21a48058749b08fa00a2ae3cc87f4996e87", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "4ad23e9e5745e8f3ee19317c54844b58d93513df", "11136c9841bb33a7433e122e586065fccbb746a8", "4d624b942a58818f8d425460638cb4b65ed84e1c", "39c8baedf47623a837feb0351abb323cff760d56", "548f2564274c33e7a646a99f7763496b1b7bf257", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "2fa8b58bb107f8cdfdec4e4365befb2dcccab018", "394257e099e25217b57e3527e3d00e9411ef1872", "0c677be877b858ee5de54bf9f9d2727b54a550cd", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "6ff3ba7754dbd376f431ac847d1452ae23c79a72", "5d3424acaf6da288409a34bfcbc188ec740dea1d", "407d206f741e98b97a257ff6af9eda8fa9f843d2", "63e21447b7098b41c4ff16e7e2be257fbb24e49a", "382892b73562f6ace340e181300d701f32987409", "676cc5e17cf76ec057810e5dd44baead41ad5ee7", "1dddf962c41e52b576d82d92b868b2c055e07caf", "e90a94bbc5eecd75a4b6b6e9e165d2735f9a91b5" ], "paperAbstract": "In the search for post-quantum secure alternatives to RSA and ECC, lattice-based cryptography appears to be an attractive and efficient option. A particularly interesting lattice-based signature scheme is BLISS, offering key and signature sizes in the range of RSA moduli. A range of works on efficient implementations of BLISS is available, and the scheme has seen a first real-world adoption in strongSwan, an IPsec-based VPN suite. In contrast, the implementation-security aspects of BLISS, and lattice-based cryptography in general, are still largely unexplored.\n At CHES 2016, Groot Bruinderink et al. presented the first side-channel attack on BLISS, thus proving that this topic cannot be neglected. Nevertheless, their attack has some limitations. First, the technique is demonstrated via a proof-of-concept experiment that was not performed under realistic attack settings. Furthermore, the attack does not apply to BLISS-B, an improved variant of BLISS and also the default option in strongSwan. This problem also applies to later works on implementation security of BLISS.\n In this work, we solve both of the above problems. We present a new side-channel key-recovery algorithm against both the original BLISS and the BLISS-B variant. Our key-recovery algorithm draws on a wide array of techniques, including learning-parity with noise, integer programs, maximimum likelihood tests, and a lattice-basis reduction. With each application of a technique, we reveal additional information on the secret key culminating in a complete key recovery.\n Finally, we show that cache attacks on post-quantum cryptography are not only possible, but also practical. We mount an asynchronous cache attack on the production-grade BLISS-B implementation of strongSwan. The attack recovers the secret signing key after observing roughly 6000 signature generations.", "pdfUrls": [ "https://eprint.iacr.org/2017/490.pdf", "http://eprint.iacr.org/2017/490", "http://doi.acm.org/10.1145/3133956.3134023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2aa59371c05c1bb6dd9216fccae0e37ac285093a", "sources": [ "DBLP" ], "title": "To BLISS-B or not to be: Attacking strongSwan's Implementation of Post-Quantum Signatures", "venue": "CCS", "year": 2017 }, "2acc406cec7601f88934f321dced810ab0ee4024": { "authors": [ { "ids": [ "2130720" ], "name": "Ali Ziat" }, { "ids": [ "32278921" ], "name": "Edouard Delasalles" }, { "ids": [ "8905591" ], "name": "Ludovic Denoyer" }, { "ids": [ "1741426" ], "name": "Patrick Gallinari" } ], "doi": "10.1109/ICDM.2017.80", "doiUrl": "https://doi.org/10.1109/ICDM.2017.80", "entities": [ "Artificial neural network", "Baseline (configuration management)", "Experiment", "Neural Networks", "Recurrent neural network", "Spatial analysis", "Time series" ], "id": "2acc406cec7601f88934f321dced810ab0ee4024", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "705-714", "journalVolume": "", "outCitations": [ "4a861d29f36d2e4f03477c5df2730c579d8394d3", "678d67db7b65b07b6d4b941cc138a33dcdf47b81", "c3708f2ee0cf0e701c8733744cf13614520e14ea", "c1a6bc3e20f8d0a7244211e96fdeeb59dfc7b9b0", "21d467a6528206a3b281349ba49306aa11119f75", "78120d3c6fac260da40cc51a89d0252f3789b6f8", "8dfddcfd67a586f6ed8957174adf1d35c4bd4584", "0808bb50993547a533ea5254e0454024d98c5e2f", "140253208f41c813713c4fa6c23f69cd15ee1608", "268c3b9992f3b2ae076379930357464bb9b60f00", "cbec12feff814f4d4d11e892ecdbd93fd393cb64", "2bc9cc3279a028c7e7067ab06304e2a621a0381b", "dcae7651136f3fadd450272aa04567418f46d5c2", "2d2a22f1f9eae9188f3d43254daa2d5b7f3a2470", "f19b99c04b09ab5d45040cedaa3591af6ac674d9", "8ecc044d920df247fbd455b752fd7cc0f7363ad7", "2fda9af7cbfe092235df8268532fa322ab846c13", "78352d5f30b4e33c93c96bad3615ed5a9b406495", "9126f8f956d01c665fcbf98aea4bacb7809d6834", "82e5c9a1ccb59b7ca6952d1f2741881c3a15ab57", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "0b544dfe355a5070b60986319a3f51fb45d1348e", "6b2a6e8107642bb2137db52ecbd1805fd9632d93", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "025720574ef67672c44ba9e7065a83a5d6075c36", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "6957be06ab8b197f127a32ded8523c3fcdad2c75" ], "paperAbstract": "We introduce a dynamical spatio-temporal model formalized as a recurrent neural network for forecasting time series of spatial processes, i.e. series of observations sharing temporal and spatial dependencies. The model learns these dependencies through a structured latent dynamical component, while a decoder predicts the observations from the latent representations. We consider several variants of this model, corresponding to different prior hypothesis about the spatial relations between the series. The model is evaluated and compared to state-of-the-art baselines, on a variety of forecasting problems representative of different application areas: epidemiology, geo-spatial statistics and car-traffic prediction. Besides these evaluations, we also describe experiments showing the ability of this approach to extract relevant spatial relations.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.80" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2acc406cec7601f88934f321dced810ab0ee4024", "sources": [ "DBLP" ], "title": "Spatio-Temporal Neural Networks for Space-Time Series Forecasting and Relations Discovery", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2acd762364c619d8706cd7f983160d342bfea5aa": { "authors": [ { "ids": [ "2120871" ], "name": "Ilias Marinos" }, { "ids": [ "2750619" ], "name": "Robert N. M. Watson" }, { "ids": [ "5597426" ], "name": "Mark Handley" }, { "ids": [ "2958660" ], "name": "Randall R. Stewart" } ], "doi": "10.1145/3098822.3098844", "doiUrl": "https://doi.org/10.1145/3098822.3098844", "entities": [ "CPU cache", "Central processing unit", "Commodity computing", "Direct memory access", "Disk buffer", "Disk storage", "Encryption", "End-to-end encryption", "Flash memory", "In-memory database", "Memory bandwidth", "Network traffic control", "Operating system", "PCI Express", "Page cache", "Plaintext", "Server (computing)", "Streaming media", "Throughput", "User space", "Web server", "Zero-copy" ], "id": "2acd762364c619d8706cd7f983160d342bfea5aa", "inCitations": [], "journalName": "", "journalPages": "211-224", "journalVolume": "", "outCitations": [ "67ac8e37fd240844e0726a2d171e20042c7648f4", "158ebe313a72857c5534a313f3ec0e413593b732", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "7b40f864cbaeffe0ba87e51da4f945a31543642d", "20a63e9826480867d7c70be89e18e3952d00310a", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "4f7e07d71a54b257261aefecef390f7f66b79f61", "5b7561f44f95ab68dbedb839849cbe72313aef20", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "29c324788b83463aa707784210edbca894694f20", "225603198cc415d363db8a8a2bd30b0df3c963b1", "7932a4597cec5149c575aa2303fe8f12241e4320", "1b2e68064f2bea7de1f73c4c04a061d05cb0f3e8", "0f04a0b658f00f329687d8ba94d9fca25269b4b7", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "10fede77f843e9eb5ef1768a17543013616d9243", "0a5bb204cf8e5cce872573445f3cdb17c12203ff", "69258ba9b1ace027daa767192698c84bf49b9fb6", "77a3133097ff59bae0b6ac8fae418a58b585dacb" ], "paperAbstract": "Conventional operating systems used for video streaming employ an in-memory disk buffer cache to mask the high latency and low throughput of disks. However, data from Netflix servers show that this cache has a low hit rate, so does little to improve throughput. Latency is not the problem it once was either, due to PCIe-attached flash storage. With memory bandwidth increasingly becoming a bottleneck for video servers, especially when end-to-end encryption is considered, we revisit the interaction between storage and networking for video streaming servers in pursuit of higher performance.\n We show how to build high-performance userspace network services that saturate existing hardware while serving data directly from disks, with no need for a traditional disk buffer cache. Employing netmap, and developing a new diskmap service, which provides safe high-performance userspace direct I/O access to NVMe devices, we amortize system overheads by utilizing efficient batching of outstanding I/O requests, process-to-completion, and zerocopy operation. We demonstrate how a buffer-cache-free design is not only practical, but required in order to achieve efficient use of memory bandwidth on contemporary microarchitectures. Minimizing latency between DMA and CPU access by integrating storage and TCP control loops allows many operations to access only the last-level cache rather than bottle-necking on memory bandwidth. We illustrate the power of this design by building Atlas, a video streaming web server that outperforms state-of-the-art configurations, and achieves ~72Gbps of plaintext or encrypted network traffic using a fraction of the available CPU cores on commodity hardware.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098844", "http://www.cl.cam.ac.uk/~rnw24/papers/201708-sigcomm-diskcryptnet.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2acd762364c619d8706cd7f983160d342bfea5aa", "sources": [ "DBLP" ], "title": "Disk|Crypt|Net: rethinking the stack for high-performance video streaming", "venue": "SIGCOMM", "year": 2017 }, "2aed1eef57c90b8776e586283847438443b599f8": { "authors": [ { "ids": [ "2176499" ], "name": "Zhen Zheng" }, { "ids": [ "3025447" ], "name": "Chanyoung Oh" }, { "ids": [ "2467444" ], "name": "Jidong Zhai" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" }, { "ids": [ "1714079" ], "name": "Youngmin Yi" }, { "ids": [ "6301522" ], "name": "Wenguang Chen" } ], "doi": "10.1145/3123939.3123978", "doiUrl": "https://doi.org/10.1145/3123939.3123978", "entities": [ "Face detection", "Graphics processing unit", "Pipeline (software)", "Software design pattern" ], "id": "2aed1eef57c90b8776e586283847438443b599f8", "inCitations": [], "journalName": "", "journalPages": "587-599", "journalVolume": "", "outCitations": [ "3607afdb204de9a5a9300ae98aa4635d9effcda2", "268cd46a06e8e3052bbd64e96fac73d600430281", "44237c798d670b3c3a40a3cb755cb4dc830b36b5", "4a2d7bf9937793a648a43c93029353ade10e64da", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "490020c0d4fa1eb85fe353add5713e49f08c628d", "9b3957141e108a99189639431de0309844f4fc9f", "0967839ec5595b6379df1d4d494d8535dffc6575", "0036adadc90e4826b2f7fc157752eea459070c32", "9477daf6e5cfc58d0daa41c893391d1eee8097e8", "4e669d0b4a796843f1eca7341467af2476abf6da", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "8bceb95db97c34f6c202c053ce1a3782d895c7d1", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "545748e39f008263395dfad5c2d2cd5b67bbfff1", "08d041581636f8eee888091b5539696d729f2bff", "5e5da2a57395b0ca6888f1bbd7de5d27e33b5a81", "1b91fcb25a395a12e7b6bc49473f223ad47f869f", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "7564661f026abd1d472707c15357494fd79e63c0", "9f14902ba961ad5710ae8ee6f20fd477b75000fa", "61608f5a57007fbba2b0ed201fc3e2c61acddbd9", "145ddf587c2f049bc188927e09a8a3ba4a296734", "347a08cd9ada1cee83713d24ec84ed49ab121987", "67bf737ceccf387cdd05c379487da8301f55e93d", "12a376e621d690f3e94bce14cd03c2798a626a38", "01112dcfba0770525e2773c0f95889d5fc1c2734", "60a6cbba6978c731c1d84dc5a965a09bb543c193", "1eeb50d5f7937f65a910203ae61430ff8b969012", "34880bc07ff1e11dea9eb3f02fffb01ed3ed1076", "1a3bf65285f1e7b295ff0dec13e9a0c367e82b29", "10443d5d4f0e5048df514e581a9f364954158d00", "63af4355721f417bc405886f383af096fbfe51b2", "1d809d4ea4f22d9e0df6ba1549d87d8aa45512af", "6b95c3e4a3cd481f6916f36c6a38fe8f4b7f6e4b", "28f5db383ed771c40e9131229de707af844cf197", "02103155cc11f1401ff67235e911ebd7ab11823b", "0c75806bfe62a119e1aa580327c2f8db01b898aa", "0b8b104fd863cb251dcf40eb6c309f65ee505315", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "455a208dcaf520794c6eddba4114ab991e428174", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06" ], "paperAbstract": "Pipeline is an important programming pattern, while GPU, designed mostly for data-level parallel executions, lacks an efficient mechanism to support pipeline programming and executions. This paper provides a systematic examination of various existing pipeline execution models on GPU, and analyzes their strengths and weaknesses. To address their shortcomings, this paper then proposes three new execution models equipped with much improved controllability, including a hybrid model that is capable of getting the strengths of all. These insights ultimately lead to the development of a software programming framework named VersaPipe. With VersaPipe, users only need to write the operations for each pipeline stage. VersaPipe will then automatically assemble the stages into a hybrid execution model and configure it to achieve the best performance. Experiments on a set of pipeline benchmarks and a real-world face detection application show that VersaPipe produces up to 6.90X (2.88X on average) speedups over the original manual implementations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123978", "https://people.engr.ncsu.edu/xshen5/Publications/micro17a.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2aed1eef57c90b8776e586283847438443b599f8", "sources": [ "DBLP" ], "title": "Versapipe: a versatile programming framework for pipelined computing on GPU", "venue": "MICRO", "year": 2017 }, "2b07c44cb9f08d8d4e0d5357dd8fbc6eab197bf1": { "authors": [ { "ids": [ "4605438" ], "name": "Jinshi Zhang" }, { "ids": [ "10384604" ], "name": "Eddie Dong" }, { "ids": [ "26846526" ], "name": "Jian Li" }, { "ids": [ "7203366" ], "name": "Haibing Guan" } ], "doi": "10.1145/3050748.3050753", "doiUrl": "https://doi.org/10.1145/3050748.3050753", "entities": [ "Approximation error", "Failure rate", "Server (computing)", "Systems management", "Terminate (software)", "Virtual machine", "Working set", "z/VM" ], "id": "2b07c44cb9f08d8d4e0d5357dd8fbc6eab197bf1", "inCitations": [], "journalName": "", "journalPages": "30-43", "journalVolume": "", "outCitations": [ "54f045c55bbd2a195c9ece257246523289042c40", "b53ca4243a5d6a5dfe6fed8ab68d94a43de96716", "94a62be8355bf5be1edcc881a26559e5258e0f1d", "5642285d925bb50d6e02d9a6f636ad8d18fd1bdd", "60e0d1038c8aed966ab5b73700384991e4991291", "6d12f09a14d4b1cf7fda642415e752741176cfd8", "ae0bce3a6612598250a74d3fc32c91e2b72a8a4c", "2a34289c09e7f60daf2838dcd55630080f95614d", "2dbd5a76b10508c732aa1cbc858099836a670d15", "056e8dad8fa0e33a65981d9d4d9d3a50459c4861", "3574657705475722b6c398c266805f758268778b", "21843a9de32675bc961bd0929ce6ab50215e1888", "23d7da91f129c28bb78df775badfce8bc480c9bd", "70458452066e724e5ff29c0c74046e816765beb5", "ef7bbede9cacd49b4cadd33458bcfbd49439c9c0", "1ee5679595d45f50ce33e9f1dd045b2da4ce0a2b", "2819b4f9ccae4ba8b9b7b9cf6b81081c41d4adc4", "4dc430f2439cc690eea612d5ef3e24c161938c80", "b6f3eb0b06c5ca5f4ce20b18b20d90aa51f00d8b", "5fe4eb1749a823469950456a123c77530e33ad73", "edd832899e1743c0a53e430d4aa13efac363af2b", "b2575e0081ad3cac5f34494de893b6bb0bf594b5", "a102b668ab736bb2dc6a41195620fb6b3865289b", "6560d5255cfa785b89b5b7e851b78db5fefeb2fd", "2a2b0a6e141faad24e105942a814044ee8a203d7", "8cb8de207db21b86e121876303e6b4ec4e4a9a6f", "7a280c6cdc0ab18f8809b7101330702bfd5bb759", "336c1e3936ce150907b50f624b060bdb43d5e11b", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "65c63845eedb72e437fed64e37e43a0d59fa8471", "3debcf6371ed03f441bf32e8e452a2ac6dd54d7b", "2238862d65174d55763a61bf0c2e28c96191a084", "5bc690391cb140731f88c8a68b4dee6dacd7097d", "35f3020a9ebb8180efefd17cff274e91c9ea873d", "24dc8d1de7e78ab100d2d83cbdf1390ddb9234c9", "262e3ef815bcfbf6e443c812ea55e6381f332934", "09a8a5cd0e6caa3ffa39afae01bea3575aa0bbf5", "2bef12742683926a29888fda5798ac32d12a30fd" ], "paperAbstract": "Live migration of a virtual machine (VM) is a powerful technique with benefits of server maintenance, resource management, dynamic workload re-balance, etc. Modern research has effectively reduced the VM live migration (VMLM) time to dozens of milliseconds, but live migration still exhibits failures if it cannot terminate within the given time constraint. The ability to predict this type of failure can avoid wasting networking and computing resources on the VM migration, and the associated system performance degradation caused by wasting these resources. The cost of VM live migration highly depends on the application workload of the VM, which may undergo frequent changes. At the same time, the available system resources for VM migration can also change substantially and frequently. To account for these issues, we present a solution called MigVisor, which can accurately predict the behaviour of VM migration using working-set model. This can enable system managers to predict the migration cost and enhance the system management efficacy. The experimental results prove the design suitability and show that the MigVisor has a high prediction accuracy since the average relative error between the predicted value and the measured value is only 6.2%~9%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050753" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b07c44cb9f08d8d4e0d5357dd8fbc6eab197bf1", "sources": [ "DBLP" ], "title": "MigVisor: Accurate Prediction of VM Live Migration Behavior using a Working-Set Pattern Model", "venue": "VEE", "year": 2017 }, "2b2c0da59480f8ba1e472b621f0d55e79a35f911": { "authors": [ { "ids": [ "10403687" ], "name": "Luhao Wang" }, { "ids": [ "1752664" ], "name": "Shuang Chen" }, { "ids": [ "1691311" ], "name": "Massoud Pedram" } ], "doi": "10.1109/IGCC.2017.8323565", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323565", "entities": [ "Advanced Power Management", "Algorithm", "Artificial neural network", "Baseline (configuration management)", "Bayesian network", "Cache", "Calculus of variations", "Deploy", "Inference", "Multi-armed bandit", "Multitier architecture", "Neural Network Simulation", "Next-generation network", "Optimization problem", "Power management", "Sleep mode", "Telephone exchange", "algorithm" ], "id": "2b2c0da59480f8ba1e472b621f0d55e79a35f911", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "3d030bcbb5401b732742c8af5a50e3cd0c8fde48", "a49be8fde336909daa6874b104869af55fb50a40", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "a941a62b00320ca4f6c9f19dd1118262af57a097", "3e090dac6019963715df50dc23d830d97a0e25ba", "e22cb6b01a4bf1e025e643021694579bfae3c901", "9028e2b0d3ce51d310458c922f747b76f6c46590", "a0e6c91295f97c6797f33067f545d568648a7bac", "07bfa7253727961ea6f612624ba7f5290955dc9e", "0bfc3626485953e2d3f87854a00a50f88c62269d", "4ef750a115b108f233623e6da2ab17cd266d8c59", "97031066b92605e74afa954b8e260e9e3e9c1ab7", "4d6c589de48d66c8347c98c5581bb713f5935c74", "4b3f985f8294c9976eb5dee1d4b93a417fe4d425", "f734de2939d83ac9f6083ad46bf87e25a9097fbd", "e08f327c487717c99d45b1f3bacf11f2a2d18a87", "cfa513cb1dfbb01414b30931a8487247930c4d07", "763f7b3efed57ab5d4c3d8f9e58ae4c82c90dc11", "98e0b8bbcad04687dbc85c307c94cf6c044ffb6c", "3b123745507f9ded02986d99635b4f01b67207c1", "461c209b198747a51c73a428006617faa46ff037", "860445583b111ca7f99d78434ea3b815bf47e17b", "874e45ebf85f365a89a3fc6805e55a384fa65a7f", "8d2dd62b1784794e545d44332a5cb66649af0eca", "a9b244b0a316f765483300d41d0c6b477b5cac5e", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "da6057368920585bcf2443295b98418840f1fc80", "1be836804cf948af68428671ec41d568519b8e77", "02b0e131f965f2f379d4c330668d5634057a8c0b" ], "paperAbstract": "Aggressive network densification in next generation cellular networks is accompanied by an increase of the system energy consumption and calls for more advanced power management techniques in base stations. In this paper, we present a novel proactive and decentralized power management method for small cell base stations in a cache-enabled multitier heterogeneous cellular network. User contexts are utilized to drive the decision of dynamically switching a small cell base station between the active mode and the sleep mode to minimize the total energy consumption. The online control problem is formulated as a contextual multi-armed bandit problem. A variational inference based Bayesian neural network is proposed as the solution method, which implicitly finds a proper balance between exploration and exploitation. Experimental results show that the proposed solution can achieve up to 46.9% total energy reduction compared to baseline algorithms in the high density deployment scenario and has comparable performance to an offline optimal solution.", "pdfUrls": [ "http://sportlab.usc.edu/~luhao/assets/papers/bnn_igsc2017.pdf", "https://doi.org/10.1109/IGCC.2017.8323565" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b2c0da59480f8ba1e472b621f0d55e79a35f911", "sources": [ "DBLP" ], "title": "Context-driven power management in cache-enabled base stations using a Bayesian neural network", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "2b35508ebfee8aee1124c6576ab8fb8c00d46a50": { "authors": [ { "ids": [ "34727063" ], "name": "Jonathan Kaldor" }, { "ids": [ "9267751" ], "name": "Jonathan Mace" }, { "ids": [ "26970028" ], "name": "Michal Bejda" }, { "ids": [ "27017163" ], "name": "Edison Gao" }, { "ids": [ "27075296" ], "name": "Wiktor Kuropatwa" }, { "ids": [ "40079382" ], "name": "Joe O'Neill" }, { "ids": [ "1902958" ], "name": "Kian Win Ong" }, { "ids": [ "35103599" ], "name": "Bill Schaller" }, { "ids": [ "27071421" ], "name": "Pingjia Shan" }, { "ids": [ "31884531" ], "name": "Brendan Viscomi" }, { "ids": [ "2851380" ], "name": "Vinod Venkataraman" }, { "ids": [ "3199448" ], "name": "Kaushik Veeraraghavan" }, { "ids": [ "2287930" ], "name": "Yee Jiun Song" } ], "doi": "10.1145/3132747.3132749", "doiUrl": "https://doi.org/10.1145/3132747.3132749", "entities": [ "Digital footprint", "End-to-end encryption", "Hoc (programming language)", "Mobile app", "Motorola Canopy", "Real-time computing", "Scalability" ], "id": "2b35508ebfee8aee1124c6576ab8fb8c00d46a50", "inCitations": [ "5fb211af51abb824f252116915600924cd76a144", "aff3af7989d3f8a3d28dbb9f13882ac65e0ea927" ], "journalName": "", "journalPages": "34-50", "journalVolume": "", "outCitations": [ "1861776e08d4ce30ac63bd99b03501a80b98bf87", "405544638e4a7b3d944ba4596066d09bffa06f45", "07641ebcb7726102c37f00525a0a7a3c859bf036", "531957a3e9e47f1993e99bab2391cd828393e2d2", "0ac085a7a8e7ba52938ccd4e59b2597dd2ec63b1", "9c6981f694317b205ff85060e2de30bc33d13e6a", "6632e05bf8efe9498f622c7af82b4ac0ac1db23d", "a5b48e2f75ca91de933aed5375c8123d9011c6a9", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "053d48a237868b716ae229cde1b44ef00c2e4094", "bb0f29af9a9d159cf70929783b645e43c1ce1c93" ], "paperAbstract": "This paper presents Canopy, Facebook's end-to-end performance tracing infrastructure. Canopy records causally related performance data across the end-to-end execution path of requests, including from browsers, mobile applications, and backend services. Canopy processes traces in near real-time, derives user-specified features, and outputs to performance datasets that aggregate across billions of requests. Using Canopy, Facebook engineers can query and analyze performance data in real-time. Canopy addresses three challenges we have encountered in scaling performance analysis: supporting the range of execution and performance models used by different components of the Facebook stack; supporting interactive ad-hoc analysis of performance data; and enabling deep customization by users, from sampling traces to extracting and visualizing features. Canopy currently records and processes over 1 billion traces per day. We discuss how Canopy has evolved to apply to a wide range of scenarios, and present case studies of its use in solving various performance challenges.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132749", "http://cs.brown.edu/~jcmace/papers/kaldor2017canopy.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b35508ebfee8aee1124c6576ab8fb8c00d46a50", "sources": [ "DBLP" ], "title": "Canopy: An End-to-End Performance Tracing And Analysis System", "venue": "SOSP", "year": 2017 }, "2b6095ce3d9aa4c3b4f3c600126c25612e2291d7": { "authors": [ { "ids": [ "2797120" ], "name": "Gokul Subramanian Ravi" }, { "ids": [ "1704076" ], "name": "Mikko H. Lipasti" } ], "doi": "10.1145/3079856.3080212", "doiUrl": "https://doi.org/10.1145/3079856.3080212", "entities": [ "Artificial neural network", "Branch predictor", "Clock signal", "Enterprise resource planning", "Frequency scaling", "Greedy algorithm", "Intel Core (microarchitecture)", "Microarchitecture", "Naivety", "Power gating", "Scalability", "Spectral leakage" ], "id": "2b6095ce3d9aa4c3b4f3c600126c25612e2291d7", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "147-160", "journalVolume": "", "outCitations": [ "24930871f613523921778af9ac7920d295b1c475", "80ea0ff80e6afe380b311f9b632ba009e2eef4ac", "b61ecd2898660abcdf129ec30ad955945f97ab03", "932487ce3c6cd16863ba642f7bf21dce81fa3293", "6f2902a7aa6815036bce24450dedd53b55606762", "0d8524a1eca5e41ee755acd30a0c28a782d05331", "74cb4cc2b117922b104834c615126ff63f0eda48", "32a9eaa4509ae9788a0a2572897dd4b5b312873a", "311e55ffac576c2903da694adcf5dab78304bf29", "01381abecc25db30ce67a6d9a46b76498db4ef5d", "7ce25a0852e2345be1a1bd02b8eb4cefb9d47073", "31491184a5a8f6c68d16ce39c7ce9e80bedcdf96", "d5d7d4026ebc93cbc93ada3b01f9ffd2da5655e7", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "4e2cb9a77bd5a9f031fd656225a0fba3c95aad27", "8ea356a7ef8d2221b47e5390610e3ccb09d487c3", "77d4fb23ce0b5499016f2c162a5430d04f976542", "4a642dc49fb43252418d138d45d31734cf99a7b0", "82faba23b112f1e5447bb092f21a7e5c1a5ad468", "cf3b57fb1e07cf247e2a6c5b8f2d4e0adc18f1a1", "9d4bc7c0569cb548b2a1e319948c8f91061abb49", "005ff76822937045101eb8c724275ba68e11329c", "81e4ef2b19e6363272f4d4e962ad2c0e1310f163", "dd90d37a73a96db7498ba1269e4c267dd2ac75fa", "03a128036c022b5295bb997438e97f14837b9975", "695a5f0efb934aad8c22b327df9e054ee05a31c6", "5b4ca29fd32b2d11b0e5ea7efbc34a34023915e2", "46706b89bd3ccb7585c459f0f495603f287e191f", "01613a5f7f908e57d25c3d584e50b3239545dc22", "352a8957005dc5519b15ed1870751ec494d66395", "a5081988ebfd951d1c9a01e0cddb69ae01d03e21", "29162b894c8b1f14a6dbf4dd207c81211ff0b3ae", "18bfccf0e0383f5867e60d35759091e5a25099e1", "370488843f80120797e1f0af22e9fdb0152ff657", "1a71aaba93358ade10693e2dd438a2d02db657b5", "05b120583b7d571f593380436a1cf8019739e7cd", "61ad52b2ef341e50ec133600a76c3bb67cc3dda6", "3d2cc8b941c74e2b79bc41491c2946061989cb6c", "a22cf335909c1502457e05a29f33cf5c7f0bc9c7", "ee6649fac2c085812421be9295d130feb55f3678", "15b275f0421c606f5903532e9964b140cbb2f878", "f61b37060504930348f2e7323977092f13d456e7", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "5b4a694a6569468ccc25daefc983aa3993b3bc3d", "5eeea5cbc6ce6a82fdd952bb9461e4787fa82374", "1491e934ee736e8219c2922ab1629322dac753a0", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "8c79053bd5e6d7cb5b595899950fdca61f258187", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "1c9a66d23ebf589eca01658b931bba38aefa4f5c", "5cbf69bad23e9b352a82cf904d7f6dc51a13b0bc", "73d87a3b24c59e2d7c6720b20733c1aaa6329b6b", "0c0838ae5d08de1a5db4486c04a37cb70019edac", "081a00e3c38a0b653b9f98dc1f2ef86336fab4e8", "556e6dcc55b57e7af30b0c5e0e46cb1a9f3c23da", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "748381c36e82ebca65fa7265d08812a30ef8a954", "82ab5e4ef9ddf57af1dd487392f6074b7fcc2f9e", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "bf495f6280829dca773b69b94e5fcfb2c14fd848", "48710c82bea8283382f81fcdba540160a0b00e16", "0b45a529cc64528c2e0016d263f1a3f211e16a1a", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "4e72b218559d8cf0b0c91554bdd088f30a40c1d6" ], "paperAbstract": "High-performance architectures are over-provisioned with resources to extract the maximum achievable performance out of applications. Two sources of avoidable power dissipation are the leakage power from underutilized resources, along with clock power from the clock hierarchy that feeds these resources. Most reconfiguration mechanisms either focus solely on power gating execution resources alone or in addition, simply turn off the immediate clock tree segment which supplied the clock to those resources. These proposals neither attempt to gate further up the clock hierarchy nor do they involve the clock hierarchy in influencing the reconfiguration decisions. The primary contribution of CHARSTAR is optimizing reconfiguration mechanisms to become clock hierarchy aware. Resource gating decisions are cognizant of the power consumed by each node in the clock hierarchy and additionally, entire branches of the clock tree are greedily shut down whenever possible.\n The CHARSTAR design is further optimized for balanced spatio-temporal reconfiguration and also enables efficient joint control of resource and frequency scaling. The proposal is implemented by leveraging the inherent advantages of spatial architectures, utilizing a control mechanism driven by a lightweight offline trained neural predictor. CHARSTAR, when deployed on the CRIB tiled microarchitecture, improves processor energy efficiency by 20-25%, with efficiency improvements of roughly 2x in comparison to a naive power gating mechanism. Alternatively, it improves performance by 10-20% under varying power and energy constraints.", "pdfUrls": [ "http://pharm.ece.wisc.edu/papers/isca17_gravi.pdf", "http://doi.acm.org/10.1145/3079856.3080212" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b6095ce3d9aa4c3b4f3c600126c25612e2291d7", "sources": [ "DBLP" ], "title": "CHARSTAR: Clock hierarchy aware resource scaling in tiled architectures", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2b69a2e2b6be64678b3fad96d96c561abd878892": { "authors": [ { "ids": [ "39284749" ], "name": "Samuel Haney" }, { "ids": [ "2357165" ], "name": "Ashwin Machanavajjhala" }, { "ids": [ "2959751" ], "name": "John M. Abowd" }, { "ids": [ "1865055" ], "name": "Matthew Graham" }, { "ids": [ "10791164" ], "name": "Mark Kutzbach" }, { "ids": [ "3026156" ], "name": "Lars Vilhuber" } ], "doi": "10.1145/3035918.3035940", "doiUrl": "https://doi.org/10.1145/3035918.3035940", "entities": [ "Algorithm", "Erd\u0151s\u2013R\u00e9nyi model", "Experiment", "Inferential programming", "Privacy", "Provable prime", "Requirement", "Table (information)" ], "id": "2b69a2e2b6be64678b3fad96d96c561abd878892", "inCitations": [ "cc01bd29a0503a63f4273441a2a53cc018c580dc", "03103f0f545bed8c21791af9af215edf1bd9db86" ], "journalName": "", "journalPages": "1339-1354", "journalVolume": "", "outCitations": [ "0ccbd0b421022170cdb3773cad5e946f860624a1", "1eddbbbdec587b0906013de1d377346cb7f8884b", "258cc5f2121fe05244f2dc8b2d6a6ade8719780c", "644d02d3aa28537635a1a247aa75b9e926dfda53", "7dbf444259ca7f999908c2edfdab81cfce77c324", "12d5e1c0cb4280730f174668803b8d0a32b002ff", "2f3acd94bfe590d425cae8d9558f38188d4e91eb", "004b439ff1e6a15deedc7a7c4c6685f5ceafd237", "ca5bf9c6f993bb6ce2883fbda35e0c37a4dfbce4", "0a43db07094d742aa731d81eada0d4a89c9331c9", "02c20353e2db867eac0952215b2e4edc718a2f2b", "451a8f7a1ac7bafcfd30db62fedf946f59d0f0d9", "0a10f3dfa73c6901aad886a7966999e6f889116a", "360d4003511682c5f5b5f82f6befdda88ca3fa73", "37bcd8bbe2cddd48f0ec152fc5ffa4fca93f3828", "b532099ff8b67049f292cd62700dca37fc2be623", "7412088f10be2032600f98e4e0347fa69222f16f", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "0fcaa5d69913b2601fb4fac3a16ba384e5f1883b", "1cf08cc1e0aafd6d783eec70add7e0875b7cd32a", "17fac85921a6538161b30665f55991f7c7e0f940", "9407fda128b185bdb0ced615ad8107381b831071", "34e7390fc54ba9b29ae88f7a135e2bb79b4ca714", "09026b3e5d73b5986a8ba311c2ed1f4f7501732d" ], "paperAbstract": "National statistical agencies around the world publish tabular summaries based on combined employer-employee (ER-EE) data. The privacy of both individuals and business establishments that feature in these data are protected by law in most countries. These data are currently released using a variety of statistical disclosure limitation (SDL) techniques that do not reveal the exact characteristics of particular employers and employees, but lack provable privacy guarantees limiting inferential disclosures.\n In this work, we present novel algorithms for releasing tabular summaries of linked ER-EE data with formal, provable guarantees of privacy. We show that state-of-the-art differentially private algorithms add too much noise for the output to be useful. Instead, we identify the privacy requirements mandated by current interpretations of the relevant laws, and formalize them using the Pufferfish framework. We then develop new privacy definitions that are customized to ER-EE data and satisfy the statutory privacy requirements. We implement the experiments in this paper on production data gathered by the U.S. Census Bureau. An empirical evaluation of utility for these data shows that for reasonable values of the privacy-loss parameter ε≥ 1, the additive error introduced by our provably private algorithms is comparable, and in some cases better, than the error introduced by existing SDL techniques that have no provable privacy guarantees. For some complex queries currently published, however, our algorithms do not have utility comparable to the existing traditional SDL algorithms. Those queries are fodder for future research.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035940", "https://users.cs.duke.edu/~ashwin/pubs/Haney-UtilityCost-SIGMOD2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b69a2e2b6be64678b3fad96d96c561abd878892", "sources": [ "DBLP" ], "title": "Utility Cost of Formal Privacy for Releasing National Employer-Employee Statistics", "venue": "SIGMOD Conference", "year": 2017 }, "2b6bc9aeefae50a14bb4f36b9bb7db2d41752a5a": { "authors": [ { "ids": [ "32064267" ], "name": "Alex Kogan" }, { "ids": [ "3142383" ], "name": "Yossi Lev" } ], "doi": "10.1145/3087801.3087838", "doiUrl": "https://doi.org/10.1145/3087801.3087838", "entities": [ "Vendor lock-in" ], "id": "2b6bc9aeefae50a14bb4f36b9bb7db2d41752a5a", "inCitations": [], "journalName": "", "journalPages": "231-240", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087838" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b6bc9aeefae50a14bb4f36b9bb7db2d41752a5a", "sources": [ "DBLP" ], "title": "Transactional Lock Elision Meets Combining", "venue": "PODC", "year": 2017 }, "2b75ba7f75170b73d913c515cc0deefef6c88f5f": { "authors": [ { "ids": [ "2030283" ], "name": "Chong Zhou" }, { "ids": [ "2470536" ], "name": "Randy C. Paffenroth" } ], "doi": "10.1145/3097983.3098052", "doiUrl": "https://doi.org/10.1145/3097983.3098052", "entities": [ "Anomaly detection", "Artificial neural network", "Autoencoder", "Benchmark (computing)", "Deep learning", "Display resolution", "Noise reduction", "Nonlinear system", "Pervasive informatics", "Principal component analysis", "Problem domain", "Remote Database Access", "Robust principal component analysis", "Sparse matrix" ], "id": "2b75ba7f75170b73d913c515cc0deefef6c88f5f", "inCitations": [ "22a43153e5c1bd3d9a8d7e2ccc8ed5c556dd372b", "1b7bd42294ad53f87121c06a7713ac5bef0a8936", "5cbca39eb8a7a97f998b62fa1b3cd061f0a79849", "27fe585a58013964670eca904d95479810d4f8a2", "43b200059177f46debd7973a0c1862897cca8d5b", "b3acb6f183b5f4b651f53c0eec5cb5c805224ac1", "a15d2b057a3acea9b26932c7954619a59165c006", "fa88490e3e13b1e50bcd5862e81b269c14759e39", "6cae9ad284a73471a8ed9e483b1673a60d61d946" ], "journalName": "", "journalPages": "665-674", "journalVolume": "", "outCitations": [ "00af02c2cb48920af477115e870a42ac4f8a3834", "02bd9dfe5a0e8f38af6a3f5b787e52ed169f09bd", "6bf99871f1791014fb1aa79071c4ca57b651bea5", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "195d0a8233a7a46329c742eaff56c276f847fadc", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "3424286d6d39de51080ddd683646565545d015e2", "c0306e61ff03e74af2e8bb71e72dd57c04b0952a", "2c5ee8c30bba238fbcb31456b10ebb2cdb8d1a35", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "1d3678f244f2d98ebba7d0e667ee8ae76f010e24", "357733cc76e31a499a27ba2da8612174aafb3213", "3b2bf65ebee91249d1045709200a51d157b0176e", "ec7e0ceea8f79735742ea671b3c37148cae9f22c" ], "paperAbstract": "Deep autoencoders, and other deep neural networks, have demonstrated their effectiveness in discovering non-linear features across many problem domains. However, in many real-world problems, large outliers and pervasive noise are commonplace, and one may not have access to clean training data as required by standard deep denoising autoencoders. Herein, we demonstrate novel extensions to deep autoencoders which not only maintain a deep autoencoders' ability to discover high quality, non-linear features but can also eliminate outliers and noise without access to any clean training data. Our model is inspired by Robust Principal Component Analysis, and we split the input data X into two parts, $X = L_{D} + S$, where $L_{D}$ can be effectively reconstructed by a deep autoencoder and $S$ contains the outliers and noise in the original data X. Since such splitting increases the robustness of standard deep autoencoders, we name our model a \"Robust Deep Autoencoder (RDA)\". Further, we present generalizations of our results to grouped sparsity norms which allow one to distinguish random anomalies from other types of structured corruptions, such as a collection of features being corrupted across many instances or a collection of instances having more corruptions than their fellows. Such \"Group Robust Deep Autoencoders (GRDA)\" give rise to novel anomaly detection approaches whose superior performance we demonstrate on a selection of benchmark problems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098052" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2b75ba7f75170b73d913c515cc0deefef6c88f5f", "sources": [ "DBLP" ], "title": "Anomaly Detection with Robust Deep Autoencoders", "venue": "KDD", "year": 2017 }, "2ba29b7c43f0d822ebf11e24b5f51a1dccf45903": { "authors": [ { "ids": [ "3325283" ], "name": "Yizheng Chen" }, { "ids": [ "2834380" ], "name": "Yacin Nadji" }, { "ids": [ "3453550" ], "name": "Athanasios Kountouras" }, { "ids": [ "1792232" ], "name": "Fabian Monrose" }, { "ids": [ "2822260" ], "name": "Roberto Perdisci" }, { "ids": [ "2805955" ], "name": "Manos Antonakakis" }, { "ids": [ "32426202" ], "name": "Nikolaos Vasiloglou" } ], "doi": "10.1145/3133956.3134083", "doiUrl": "https://doi.org/10.1145/3133956.3134083", "entities": [ "Adversarial machine learning", "Cluster analysis", "Feature vector", "Machine learning" ], "id": "2ba29b7c43f0d822ebf11e24b5f51a1dccf45903", "inCitations": [ "8e4808e71c9b9f852dc9558d7ef41566639137f3", "4a8c332b09bb99333a8bce6a4640a20c1352aa63" ], "journalName": "", "journalPages": "1125-1142", "journalVolume": "", "outCitations": [ "6f006a3895dd8fb24f83235a67f2fe72418aa800", "595a00f0975b5d5c28d904ddba1ae5a493316573", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "fd041c024676b04549c6351504e6565999250f03", "3c8bf504ddc7db1829466b6e9da5251025dd48f1", "3c3805d7479390b208001f3be908efc8520004ca", "5160c20a7c573d52bc32535a6bb9dd76a4eb2709", "335f644137cdc45de2046e02b365ee0353d4a418", "3212929ad5121464ac49741dd3462a5d469e668d", "6d337579fa90c4e0298b02589101cefb73c8f895", "1ad66bed51b77d5df6ff65b688bfe542a9e868cf", "87ed0f639e6d56b2c536570406c2fe0afd9c3665", "1edefabf4333cfa6253b0f7ed8e7e9244b07fca7", "46c9cf09aad712f4e064a5316f5e112b28e8a5dc", "6c5d03568e012a95c5a663309c8c21ff1e07e53f", "09415a3c94085a6b62ef41ea3336b51176e36d2f", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "6dd9bb6b38e5b84616e207f00a181dbadce06937", "f44f24f07ed8365233ffadc694d89bf397d048c9", "f64fa53905685bd155995dd04566b661ea04283f", "0e03189871cd303b3438743f90232514dfa7885e", "5932c52888d8494a04c60163ee3a729a66320fec", "09671d4c8205b765335e8a9a47a4e2917f9667a1", "1f4d67754c643103ec5ae6e0cd88f26a0aefdbde", "41a70062d260feb62e0ae64acf252a839c0bbd61", "0796bb6c803e4256d7bdc0885c6a26d058da3319", "3befa34619a695fd34f72f21683f1131c2c379b2", "0a1a57acfdcd91c663015ed101393af50012e549", "b90dd2f366988d9bb76399d4137c1768fe460c8f", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "51a23e9565756c71febb28f90a738ed4e604668d", "596a5e617c89a7333d894879637ccdbd64fb6423", "5e4fa9397c18062b970910f8ee168d3297cf098f", "0d14221e3bbb1a58f115a7c7301dc4d4048be13f", "d98a304e44391fbdbc8c7c57248a652b04c14d00", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "2b44ed62bfcca19c448d5a53da723664133063c7", "6a307d728f0d16252066b6275fe6a96260dfa3bb", "b2ea1d84e9a8887c3ed3370ab600b0ee2c5ee7ac", "45f6957cab31e802934cc761380c1a4a37c66208", "359c57b0035d5c2ca3f62a24c81a00aef5240836", "f74804eaf20b71da1ad2ebbbb429595c133459c8", "6ff68637a09d89c4aa3bb29a4d47391fb0000d06", "66f998a69f9acbe291900bc3488622b27bb8b919", "6bdcedb895256357a6bc8ffef5a0790697403372", "a1ac002ef676bb3c7d515081be79894384790a07", "83e8d61a3d767e3b6c6b8dce63c35bc709fddc49", "24c756be07e109ec1e41fe750da875d931463533" ], "paperAbstract": "Graph modeling allows numerous security problems to be tackled in a general way, however, little work has been done to understand their ability to withstand adversarial attacks. We design and evaluate two novel graph attacks against a state-of-the-art network-level, graph-based detection system. Our work highlights areas in adversarial machine learning that have not yet been addressed, specifically: graph-based clustering techniques, and a global feature space where realistic attackers without perfect knowledge must be accounted for (by the defenders) in order to be practical. Even though less informed attackers can evade graph clustering with low cost, we show that some practical defenses are possible.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/practical_attacks_against_graph-based_clustering_-_arxiv.pdf", "https://arxiv.org/pdf/1708.09056v1.pdf", "http://arxiv.org/abs/1708.09056", "https://csaw.engineering.nyu.edu/application/files/1115/0825/7130/CSAW17_paper_54.pdf", "https://acmccs.github.io/papers/p1125-chenA.pdf", "http://doi.acm.org/10.1145/3133956.3134083" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ba29b7c43f0d822ebf11e24b5f51a1dccf45903", "sources": [ "DBLP" ], "title": "Practical Attacks Against Graph-based Clustering", "venue": "CCS", "year": 2017 }, "2ba74c38c70622c7cd8e99d61b7bc18401a18790": { "authors": [ { "ids": [ "3311387" ], "name": "Dan Alistarh" }, { "ids": [ "2060484" ], "name": "William M. Leiserson" }, { "ids": [ "2765439" ], "name": "Alexander Matveev" }, { "ids": [ "2613669" ], "name": "Nir Shavit" } ], "doi": "10.1145/3064176.3064214", "doiUrl": "https://doi.org/10.1145/3064176.3064214", "entities": [ "C++", "Concurrent data structure", "Copy-on-write", "Correctness (computer science)", "Data structure", "Garbage collection (computer science)", "Legacy code", "Modern Operating Systems", "Operating system", "Parallel computing", "Programmer", "Requirement", "Scalability" ], "id": "2ba74c38c70622c7cd8e99d61b7bc18401a18790", "inCitations": [], "journalName": "", "journalPages": "483-498", "journalVolume": "", "outCitations": [ "7a8424572e9545c112884b9961c8b6b2613a5b5e", "18df2b9da1cd6d9a412c45ed99fdc4a608c4c4bd", "f28b4f4ddd1cf5bc3d605d723e6fe9e4e8750a62", "4634313264c26ca00c3b940865dc7c35d393bee7", "288bc21dc87f323909bd121713bddd95bd2bfb4a", "4d366bb516dd417bed788a47b512ac253cdb6ea1", "9091adee1cba8133ac66c312743b51d7d00563d2", "22a3110123362412f91ae44c2b15e2234324f6fd", "6899c00a5613dcebcaa6cd0c13c2cc445b5ce2e1", "32dcf013cbf190c8559f6d87d1a60bb34a8341c7", "327cbb1da2652b430a52171d510cf72235b890b6", "839632d00040ef59395786125057cac8dcc4683b", "34fe2764de8ed746e90cb95071cf9d713f3c3d66", "1c8378e621cec5ecd94974efaf305275b8e4186b", "ebc028c1146be508d63a98da0490d17d5de90750", "48c2af3d559fb2c7ef5e71efd24ab5ae217c1fee", "6d62297c2ca436707e59664178fe3e58b33d867e", "072cad08a6886c1800cb6144a8cfec4bced6f7d9", "15f43d2b38b60a1ae8f03e818c1532031be4fc18", "0b9a4c5ca3530089edcd7e9ac2c718d2317718e3", "9b67de50e9eb9a6087e1aebc2733166c96d67685", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "58da996efd7320d1e484263c97c930c8979c474f", "1ae7993c0c2d795b243354de48dab80bf2000356", "28c61efacce53078cc3770f61da520f5971c3f04", "9fd1952123016582979ad1124d28e3473951048d", "127c7de1acfe7e8da5d0e10daf8f628494489540", "178b92c9d7438aa44949a4f5441e83f8a9de3ccb", "942f2a6df29234c304b69129872835d60cf5e9e9", "4389e19875631de6cbeed391bffe6e5f9525f8ac", "4e3304e77dd2fecea4086e132981d1470434cf65", "3570a31771bfa02467a6e14ad6fd1c0d982ed16b", "0b82470bb9cd233bff6228d3d1b484024b9f9c3b", "f98062567adb3c98bfabeb99172f8bca026a0102", "2c74aeec68efd07d908d4f421a5d4afe8426a18c", "042f443418ff2ff98a1dccbf49df9fa258dab707", "062fa315bb3785ec5e2f4f51aec597274fc2c167", "715cc8f921d3c58cfabf3244d3de7b00b87e3fcb", "0c25ac1fb86259bc91e22b51bdaa56ce1dbc50c5", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "42fd424539fc40536cc44362cd324e5614ad86c6" ], "paperAbstract": "The problem of efficient concurrent memory reclamation in unmanaged languages such as C or C++ is one of the major challenges facing the parallelization of billions of lines of legacy code. Garbage collectors for C/C++ can be inefficient; thus, programmers are often forced to use finely-crafted concurrent memory reclamation techniques. These techniques can provide good performance, but require considerable programming effort to deploy, and have strict requirements, allowing the programmer very little room for error.\n In this work, we present Forkscan, a new conservative concurrent memory reclamation scheme which is fully automatic and surprisingly scalable. Forkscan's semantics place it between automatic garbage collectors (it requires the programmer to explicitly retire nodes before they can be reclaimed), and concurrent memory reclamation techniques (as it does not assume that nodes are completely unlinked from the data structure for correctness). Forkscan's implementation exploits these new semantics for efficiency: we leverage parallelism and optimized implementations of signaling and copy-on-write in modern operating systems to efficiently obtain and process consistent snapshots of memory that can be scanned concurrently with the normal program operation.\n Empirical evaluation on a range of classical concurrent data structure microbenchmarks shows that Forkscan can preserve the scalability of the original code, while maintaining an order of magnitude lower latency than automatic garbage collection, and demonstrating competitive performance with finely crafted memory reclamation techniques.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064214", "http://people.inf.ethz.ch/aldan/papers/forkscan.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ba74c38c70622c7cd8e99d61b7bc18401a18790", "sources": [ "DBLP" ], "title": "Forkscan: Conservative Memory Reclamation for Modern Operating Systems", "venue": "EuroSys", "year": 2017 }, "2bac8f76000eaf12807db4a0d0191a3b92e0817d": { "authors": [ { "ids": [ "2441120" ], "name": "Ari Kobren" }, { "ids": [ "2111658" ], "name": "Nicholas Monath" }, { "ids": [ "37019006" ], "name": "Akshay Krishnamurthy" }, { "ids": [ "1735747" ], "name": "Andrew McCallum" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "B-tree", "Cluster analysis", "Data point", "Dendrogram", "Experiment", "Greedy algorithm", "Hierarchical clustering", "Linear separability" ], "id": "2bac8f76000eaf12807db4a0d0191a3b92e0817d", "inCitations": [], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1704.01858", "outCitations": [ "021cbd4b618a51c5cbc76907e29780a0abfb5d82", "2a107c4919fd5c59148f96e33f0193fa8ec68c2b", "071769b25a2d3a2882809d928c3c644b5dd08e73", "07aefb3edfd57ad583a2270befef08ddb1807dda", "9a47ae757d9505fe1613ba9bdc8e399fe8899cdc", "53d81ddfd5fc2c5ac8baf7cea58d9b575999aaef", "14ca7c5d3fafc3a20a76d2677b67ddb1e7ae1da1", "bf56777e85829064be2e96c4f92439dc68219160", "40e8d23231469e6495d3e06086e64df93e9dcfa0", "429ce727d269108ecb0bf5225ffb5a4fa7eed47d", "914b192f5d921f23c24be7712d18f38604fbb7a1", "38211dc39e41273c0007889202c69f841e02248a", "06faa4bbe958e43abb6cef9d4eaacf4302dd84cb", "008639035d561c7e738f6a1504ae65dc37c67b34", "1fbff0d0d59e28bb8f943efc5b161ff0dbd11be0", "00ca2e50ae1c5f4499d9271c72466d2f9d4ae137", "2352d9105de31032538900dfb2ce7c95f6402963", "71e5a6c95ff476c303c3adeae0c1a4387485f733", "0d6f53159dc63dfc5c087b62eccddbe81acf6abe", "441954db2f5c003b38453deccfed482310c2b78c", "05476fc029c7975614fa96764dffacf2f9a8eb4e", "b2ecaf799d0c4e4f394e554254931b8b7c6da9c6", "02d843e3a008e76cf6a4c23bd01023d264b05686", "783480acff435bfbc15ffcdb4f15eccddaa0c810", "0626908dd710b91aece1a81f4ca0635f23fc47f3", "102deb561bdf50c6a8ab5f41b800c85a1cd1be76", "0de8e6311773e3ad14f934460c001016a19daf76", "20cf48240b89bd522beff22a0cf0c8cd5b2f8abf", "6f5c1f3c7015c0e15b28c8a2d2b8178be287fa75" ], "paperAbstract": "Many modern clustering methods scale well to a large number of data items, N , but not to a large number of clusters, K. This paper introduces PERCH, a new non-greedy algorithm for online hierarchical clustering that scales to both massive N and K\u2014a problem setting we term extreme clustering. Our algorithm efficiently routes new data points to the leaves of an incrementally-built tree. Motivated by the desire for both accuracy and speed, our approach performs tree rotations for the sake of enhancing subtree purity and encouraging balancedness. We prove that, under a natural separability assumption, our non-greedy algorithm will produce trees with perfect dendrogram purity regardless of online data arrival order. Our experiments demonstrate that PERCH constructs more accurate trees than other tree-building clustering algorithms and scales well with both N and K, achieving a higher quality clustering than the strongest flat clustering competitor in nearly half the time.", "pdfUrls": [ "https://arxiv.org/pdf/1704.01858v1.pdf", "http://arxiv.org/abs/1704.01858", "https://arxiv.org/pdf/1704.01858.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2bac/8f76000eaf12807db4a0d0191a3b92e0817d.pdf", "s2Url": "https://semanticscholar.org/paper/2bac8f76000eaf12807db4a0d0191a3b92e0817d", "sources": [ "DBLP" ], "title": "An Online Hierarchical Algorithm for Extreme Clustering", "venue": "ArXiv", "year": 2017 }, "2bb0f3f198b6f05454a2b4f8b6d3c3ed1c559371": { "authors": [ { "ids": [ "38756557" ], "name": "Mario Drumond" }, { "ids": [ "2089646" ], "name": "Alexandros Daglis" }, { "ids": [ "3492832" ], "name": "Nooshin Mirzadeh" }, { "ids": [ "8000917" ], "name": "Dmitrii Ustiugov" }, { "ids": [ "3257188" ], "name": "Javier Picorel" }, { "ids": [ "1701364" ], "name": "Babak Falsafi" }, { "ids": [ "2309941" ], "name": "Boris Grot" }, { "ids": [ "1799344" ], "name": "Dionisios N. Pnevmatikatos" } ], "doi": "10.1145/3079856.3080233", "doiUrl": "https://doi.org/10.1145/3079856.3080233", "entities": [ "Algorithm", "Baseline (configuration management)", "Central processing unit", "Dennard scaling", "Dynamic random-access memory", "Mondrian OLAP Server" ], "id": "2bb0f3f198b6f05454a2b4f8b6d3c3ed1c559371", "inCitations": [ "651ae380b5d500c613770dbf55c175c52576d7da", "6b6a5f2127b5ffbccd54d4823a9ca3a73969f3d1", "16666c536b04035b013c718bd91aad3594b4b894" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "639-651", "journalVolume": "", "outCitations": [ "9e668119671542ff2744ead289f006092eae5892", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "dcc4029f0907d39ceab161db001986db56ad0612", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "7c6c7a97488fdbb7c06f85c345b348183bf0a704", "01299bf5dce79d85aaa0d938670a93ddeeda4d0e", "097904d7691fb6d5cd15cea9ee0ed8d02ba1ce41", "70d42b312e2a3325b7596db1d1eac0958a5d3806", "886f29f247fd49084fbf25fdd60049b47db4f4ea", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "06902cb95ede2c305db4000852014f276b25c082", "bd80556653a915f53b932ad13189b9fa10453436", "06ce77e4abea63948580340be25d7f2a80369e5a", "293e7c79a37798032b3047324d101bc49d9a37a2", "746ce1f84401105286e0fc1adc18c3092fde50d4", "00ab25c6582d543932fccbb0f15fe93445f95d61", "f088374812301ed93fbfe8c5b72bf1351c084c01", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "30db3e0e6add0c2c699e863e56eb8b5e89b10951", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "15b7c7dbfece993f16054d1feaa8e141f59bc427", "8bf5afa21a0bd74551b261a7399eac4ffe2494e5", "01a337488e77c2fec3037cf7432b7ac10e39b45c", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "2e0d128a5695b02eeb39bd4e06e8eb54990026dd", "f0a2fe909f984e3109a0132a9fce28cfb59eaa12", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "08639cd6b89ac8f375cdc1076b9485ac9d657083", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "60cfe41fd68644fb19cba99babae694a2acacc17", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "1ab74d44982409beeca21efb2dbcb97a5c7de4b2", "9141bafcff1df2dbabf9a20671d2fa1bcb55aae5", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "4eec0e89509a1b0db15eb830799bb62498fdeb1b", "8b04ea524cb6ced72868c120a00c4679d84be006", "17d73aad3431f3fb064186d0a20d93b8919ab423", "f29dac2e26273532c81c933f091c7a60b9480f94", "ddf313f6fcc0520c716c54873164ded8e31703da", "5c71f2e8ab879bf508002d8f2e29c0f21317f3e9", "6f93e0325e577f49f4bed46a2adcfee4a649dc83", "a49984f69dc3f95ec406fda14852a4b799c20359", "2e50af2320dab632d8046b6d4c130ae6cce8903f" ], "paperAbstract": "The increasing demand for extracting value out of ever-growing data poses an ongoing challenge to system designers, a task only made trickier by the end of Dennard scaling. As the performance density of traditional CPU-centric architectures stagnates, advancing compute capabilities necessitates novel architectural approaches. Near-memory processing (NMP) architectures are reemerging as promising candidates to improve computing efficiency through tight coupling of logic and memory. NMP architectures are especially fitting for data analytics, as they provide immense bandwidth to memory-resident data and dramatically reduce data movement, the main source of energy consumption.\n Modern data analytics operators are optimized for CPU execution and hence rely on large caches and employ random memory accesses. In the context of NMP, such random accesses result in wasteful DRAM row buffer activations that account for a significant fraction of the total memory access energy. In addition, utilizing NMP's ample bandwidth with fine-grained random accesses requires complex hardware that cannot be accommodated under NMP's tight area and power constraints. Our thesis is that efficient NMP calls for an algorithm-hardware co-design that favors algorithms with sequential accesses to enable simple hardware that accesses memory in streams. We introduce an instance of such a co-designed NMP architecture for data analytics, the Mondrian Data Engine. Compared to a CPU-centric and a baseline NMP system, the Mondrian Data Engine improves the performance of basic data analytics operators by up to 49x and 5x, and efficiency by up to 28x and 5x, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080233", "http://homepages.inf.ed.ac.uk/bgrot/pubs/MONDRIAN_ISCA17.pdf", "https://infoscience.epfl.ch/record/227947/files/main_author.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2bb0f3f198b6f05454a2b4f8b6d3c3ed1c559371", "sources": [ "DBLP" ], "title": "The mondrian data engine", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2bb1d6a5ece860bd9ec8c19c81ba171cada60d3e": { "authors": [ { "ids": [ "39258407" ], "name": "Svilen Kanev" }, { "ids": [ "2885907" ], "name": "Sam Likun Xi" }, { "ids": [ "2255803" ], "name": "Gu-Yeon Wei" }, { "ids": [ "1896817" ], "name": "David M. Brooks" } ], "doi": "10.1145/3037697.3037736", "doiUrl": "https://doi.org/10.1145/3037697.3037736", "entities": [ "C dynamic memory allocation", "Computation", "Hardware acceleration", "Memory management", "Multi-core processor", "Throughput" ], "id": "2bb1d6a5ece860bd9ec8c19c81ba171cada60d3e", "inCitations": [ "add350d0c5605c98d285b87493fc77c1d68281df", "9131661c99a5f0b521b566abb02365d9d8fddeab" ], "journalName": "", "journalPages": "33-45", "journalVolume": "", "outCitations": [ "03e93625d185c0ac144c97fdf269b5ae5f38351e", "d517fb0b33e55305aacbdecd48328919296e9744", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "342a1b8bfdc86461ae9048cc43539198aabbe853", "bb2639eccea7beff738ac3efe468d0596f29a167", "269c24a4aad9be622b609a0860f5df80688c2f93", "06cee167c5c67e238dbbad080c084adbd14a8845", "8d7ab91362fa1319d696a0dc538ca881352bda76", "59707c84af98e62899c866b6869c4b70475c4f5e", "bcb288389d4318494887fe20ee68b6b18f39a3a5", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "3244347baf3bee547145e69c70822db24bed848f", "52f202f4e0e59e296f56bf6fd52ec76fffdd371a", "245babd157fa6f26696e3ed790847aecb665d51c", "072a52d151640b33ecafbd379deb72d25ba3ddeb", "d605a32a0d511a00ea34a0f13a95bfa92968b0eb", "d72269e01fe01d7e9b047e1d636557a73d4917de", "03416be8097852a54dd3e309434e5a0806824646", "1ddcc37ae33b4dea4fe74a0b83f48809f2ea01d8", "8d71fb5efe95801b31d65366ff1ce8c01525e493", "ba6670f09779bfde52d4f38559d9588d36e5b684", "352a8957005dc5519b15ed1870751ec494d66395", "e49b4f0091f0a07eae75b5f26c5a3f3c4393a2f3" ], "paperAbstract": "Recent work shows that dynamic memory allocation consumes nearly 7% of all cycles in Google datacenters. With the trend towards increased specialization of hardware, we propose Mallacc, an in-core hardware accelerator designed for broad use across a number of high-performance, modern memory allocators. The design of Mallacc is quite different from traditional throughput-oriented hardware accelerators. Because memory allocation requests tend to be very frequent, fast, and interspersed inside other application code, accelerators must be optimized for latency rather than throughput and area overheads must be kept to a bare minimum. Mallacc accelerates the three primary operations of a typical memory allocation request: size class computation, retrieval of a free memory block, and sampling of memory usage. Our results show that malloc latency can be reduced by up to 50% with a hardware cost of less than 1500 um2 of silicon area, less than 0.006% of a typical high-performance processor core.", "pdfUrls": [ "http://www.eecs.harvard.edu/~skanev/papers/asplos17mallacc.pdf", "http://vlsiarch.eecs.harvard.edu/wp-content/uploads/2017/02/asplos17mallacc.pdf", "http://doi.acm.org/10.1145/3037697.3037736" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2bb1d6a5ece860bd9ec8c19c81ba171cada60d3e", "sources": [ "DBLP" ], "title": "Mallacc: Accelerating Memory Allocation", "venue": "ASPLOS", "year": 2017 }, "2be116f60cee0a1ca0b751ce2247e2dfaaffac75": { "authors": [ { "ids": [ "38034242" ], "name": "Alexander Bakst" }, { "ids": [ "2582366" ], "name": "Klaus von Gleissenthall" }, { "ids": [ "3425302" ], "name": "Rami G\u00f6khan Kici" }, { "ids": [ "1695297" ], "name": "Ranjit Jhala" } ], "doi": "10.1145/3133934", "doiUrl": "https://doi.org/10.1145/3133934", "entities": [ "Clustered file system", "Compile time", "Compiler", "Complexity", "Concurrency (computer science)", "Distributed computing", "Haskell", "MapReduce", "Message passing", "Model checking", "Programmer", "Two-phase commit protocol", "Verification and validation" ], "id": "2be116f60cee0a1ca0b751ce2247e2dfaaffac75", "inCitations": [], "journalName": "PACMPL", "journalPages": "110:1-110:27", "journalVolume": "1", "outCitations": [ "68694b8c85fa5b88c7d150fae7a8fa6ab9386b5a", "b74bc46948e3f293a3cfbc4fd2a3207d4894ac8c", "6b9798bb9e602d61dd4330c3c91b270e7e17d3e3", "2ac1a3bc03ee8d1d0b6853c203a67e3864b989a3", "1f3cd9516f62f0c3649c5c565c278a4020995c18", "2925a0282b8dba6ea26f6d36d04cb06dc5f057ec", "aade8151a493d0f486d23704e24612bdada28a71", "11cfb5cea9ae10f909c7636d45390a582a0e9fd9", "74409c9e8461d1acb58f2bb09878ad426d12f542", "010738235c0f589663ac09cf6dc45aede7f616bc", "060aa588843d4fe0062b5d891ec4c7b0b763cfa1", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "165f3688f38f91da7bb642cf4fdeaf0fc95b0c95", "0541d5338adc48276b3b8cd3a141d799e2d40150", "ed9211320603f4f0ddf1e4131356d519ef04f459", "ba5e70ac4a2bdaeea5e61ff10f76212bb3fc1e3b", "6e87d84fefb952ab837afad47caa33ca256acd7f", "098739c37e2385a0666aed7f78d58c7e1c13b0d5", "669c3ff62fcadfda71a5a77ed2a41bf61e4e96cb", "8bedd1bd60713787249250e1360810395bf2d32c", "5ffa4bf21f6d8c78cbe2f089a7ad76ff825bf888", "bed6d0e530f20332c284a463c754ce1d304aca38", "7c23089c3b120371c6b86ffae4b850aaf047dee4", "402c11fc1c58680ffcaca9eb3e7544113c77e780", "13fe73aa6631b71ad4a5e680648c34f7ade680fd", "7e4d5a901a99128c04cc7d60ef1deb0dd12166af", "6517941e83041e6c0c847cd907ac8231f6b34773", "595dca367262f69b6cb3c9ea2416f9a22e1741ba", "0d288e995ed0af30ccc2419469b9607ba939b939", "584e5321cc4ee334f7dad11ab092960446a7fced", "316bfd53ade09b43b2722014b5518f35b70d6621", "d1eff42bbf26947d373e6b7085f9415d2e974a20", "6da535fdee8631e171dc8e719af2bd14d3de2035", "ce7989bc99502747c4defe7b950efd8c3ec9129a", "0507df3d03ab0fb1deebf50be7b5505f7fdca31d", "3243d4355df5e41fc157b5aaacfcc038da71e96a", "5fbf739032dd548c1ff189e7333f05e215906a1b", "38bb76891ab91b37e118d153d95094f59b6eea25", "41639c7e08244a4bea7987fce94f736691a019fd", "3903331813a494417acf595291029b4beba62737" ], "paperAbstract": "We introduce canonical sequentialization, a new approach to verifying unbounded, asynchronous, message-passing programs at compile-time. Our approach builds upon the following observation: due the combinatorial explosion in complexity, programmers do not reason about their systems by case-splitting over all the possible execution orders. Instead, correct programs tend to be well-structured so that the programmer can reason about a small number of representative executions, which we call the program’s canonical sequentialization. We have implemented our approach in a tool called Brisk that synthesizes canonical sequentializations for programs written in Haskell, and evaluated it on a wide variety of distributed systems including benchmarks from the literature and implementations of MapReduce, two-phase commit, and a version of the Disco distributed file-system. We show that unlike model checking, which gets prohibitively slow with just 10 processes Brisk verifies the unbounded versions of the benchmarks in tens of milliseconds, yielding the first concurrency verification tool that is fast enough to be integrated into a design-implement-check cycle.", "pdfUrls": [ "http://ranjitjhala.github.io/static/canonical_sequentialization.pdf", "http://doi.acm.org/10.1145/3133934", "http://abakst.github.io/oopsla17.pdf", "http://goto.ucsd.edu/~gleissen/papers/brisk.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2be116f60cee0a1ca0b751ce2247e2dfaaffac75", "sources": [ "DBLP" ], "title": "Verifying distributed programs via canonical sequentialization", "venue": "PACMPL", "year": 2017 }, "2be1c447aaa6e8d6a28aed927d6996f68216c282": { "authors": [ { "ids": [ "33829042" ], "name": "Dingqi Yang" }, { "ids": [ "1713520" ], "name": "Bin Li" }, { "ids": [ "34011720" ], "name": "Laura Rettig" }, { "ids": [ "1680925" ], "name": "Philippe Cudr\u00e9-Mauroux" } ], "doi": "10.1109/ICDM.2017.64", "doiUrl": "https://doi.org/10.1109/ICDM.2017.64", "entities": [ "Computation", "Concept drift", "Machine learning", "Speedup", "Stream (computing)", "Streaming media", "Synthetic data" ], "id": "2be1c447aaa6e8d6a28aed927d6996f68216c282", "inCitations": [ "fb619fae538dac110b000a3c63f9e44e7951974a" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "545-554", "journalVolume": "", "outCitations": [ "70ac255db930b424371a366f14ad8117e5580277", "ee6caed39ccfa30a157086aed5342e0282506d62", "30eac73e9b482bc28b5b68cd585557de48d0618f", "5eb328cf7e94995199e4c82a1f4d0696430a80b5", "933c9c7d3e4073edb0963646545ee79ed915369f", "bbacc62c5fb5f469492266a43dd04c37b2e8bcf1", "842cc0af44ef1815f883b0609eb3e1b2285f6109", "0772e7a5e0ce50ab5e0fb6c662c617b75d728562", "7f47a319c9bf0a5e0e9246354f64e52e39356c96", "7369fab6762902a3d92f122ad24c61d5cc4f4455", "74bee1ebf204dba4b2da0399a25a5ac9253a824e", "49148a479af20c09cc40c91eea9f7f76787df865", "e1dd1bdea4748c1fb673c76c862d586242e0a193", "125f042412c05c0f537717096424a04457add581", "55ef99b0cafe19e10245c70ed995d9c7617115b4", "b484849ccbe1b1d4c9f4e8c4654fa9b94c9536c1", "4568c5d17f9f770c0c11ac5ea3f92cfdd25cfe21", "5985014dda6d502469614aae17349b4d08f9f74c", "226bab70ecb9e13ef8fdb503a6183615d5c898ed", "ac23a69d02d06e4899c4a04a27183a1c96ffbe0c", "72dfd247b5eec340b6c5ccc7dd16244940853944", "13309bd4abd75d0bbacb7d4b80944c994958237f", "1ba2c3e6a28eba33ffc5f8f249029ea06556d0ce", "201512f04bdbfd4c65b3152d776d10bf0a090baa", "7a278ee0578f194700cadc3811cdda4ec751f88a", "a039a271156287ca852180ef0eb3f5dd9f62b110", "4a156f0ea13681c2997f46bef3718a09ac63f02d", "8451c2812a1476d3e13f2a509139322cc0adb1a2", "556afe048b858463bfb25962321cd76581f47413", "b5c13bda74db14b90d5a40da5d4155c7e1e473eb", "b36a5a36bf40e2a0dc3fe7aaa202a42ed8381347", "0217a43d49d80cc81af1211449147bb912e2bbfa", "d12da83249548a59793e01f9473a4e4e7b7d00c2", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "28055aaeb478fd09f5a042408cd6b63cbf707d1e", "1c799eca7983c62f7815ac5f41787b3e552567b6" ], "paperAbstract": "Histogram-based similarity has been widely adopted in many machine learning tasks. However, measuring histogram similarity is a challenging task for streaming data, where the elements of a histogram are observed in a streaming manner. First, the ever-growing cardinality of histogram elements makes any similarity computation inefficient. Second, the concept-drift issue in the data streams also impairs the accurate assessment of the similarity. In this paper, we propose to overcome the above challenges with HistoSketch, a fast similarity-preserving sketching method for streaming histograms with concept drift. Specifically, HistoSketch is designed to incrementally maintain a set of compact and fixed-size sketches of streaming histograms to approximate similarity between the histograms, with the special consideration of gradually forgetting the outdated histogram elements. We evaluate HistoSketch on multiple classification tasks using both synthetic and real-world datasets. The results show that our method is able to efficiently approximate similarity for streaming histograms and quickly adapt to concept drift. Compared to full streaming histograms gradually forgetting the outdated histogram elements, HistoSketch is able to dramatically reduce the classification time (with a 7500x speedup) with only a modest loss in accuracy (about 3.5%).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.64", "https://exascale.info/assets/pdf/icdm2017_HistoSketch.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2be1c447aaa6e8d6a28aed927d6996f68216c282", "sources": [ "DBLP" ], "title": "HistoSketch: Fast Similarity-Preserving Sketching of Streaming Histograms with Concept Drift", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2c3f05d7cd4bf5aa913f1d558b72e73cdfd269d1": { "authors": [ { "ids": [ "1815239" ], "name": "Huan Zhao" }, { "ids": [ "3259992" ], "name": "Quanming Yao" }, { "ids": [ "1776349" ], "name": "James T. Kwok" }, { "ids": [ "1692152" ], "name": "Dik Lun Lee" } ], "doi": "10.1109/ICDM.2017.74", "doiUrl": "https://doi.org/10.1109/ICDM.2017.74", "entities": [ "Approximation", "Collaborative filtering", "Interaction", "Matrix regularization", "Metafont", "Recommender system", "Singular value decomposition" ], "id": "2c3f05d7cd4bf5aa913f1d558b72e73cdfd269d1", "inCitations": [ "f5dc0b1b736df15bbe91980e53af27f103f85ed4", "012a59d7f01325d2ecb0c543bc7d640831284a55" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "645-654", "journalVolume": "", "outCitations": [ "6bfce0993936c9c9cd2d3c112c087c4d62690cfe", "c73287153c0a50102a40800c1ada626a410c63cc", "0032ace23c80506aaf6059d7de8df966ecd846de", "58b0dc8c3d11e43763b232e990c1bdf9030c7c12", "6ff68637a09d89c4aa3bb29a4d47391fb0000d06", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "319e439675c2d2f56bd2dfbb9836191a57b9ac36", "1b8dc814a7384285a52bc005bc54a3b5c87b219d", "09e5b4b6374a9ca5c8c12afc9d6b605131a67647", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "4a00ebe98d455d61dc1b708265c237fe2ee6ec64", "10e44c294a968ca91e361fac44aa9d0f2cdf3bd3", "2275762a28582716db92df6d525ed2481c7d7f14", "07ad62b6b5da5f226c88549378886ca062e207a0", "1f69d28a56246c99e1b7589c55871e1d8ce6a0c5", "052715e9292df2bb62e95616ac6486fba7cbf72f", "0e97ed876d7abe83fe11d5c0906168f3ce9707a5", "258c8adfba357ed20cc03b5c2229eb773924bc08", "94d7c9558f986f9f3967f331a968b599edc2b11c", "9644411e41ed9f36de06f226d3dbf6220b89e4cb", "0fcc45600283abca12ea2f422e3fb2575f4c7fc0", "020c2b5d942170bc5bf6401f8e9bbd12ae3155b5", "016366507205724e5456000e79d2e7d2630ce76b", "9b9050c89dd753163fd3db704018352ed781f9eb", "0cf6fe9e975a5496e9edd53818ae5c18a2a7e66b", "38e024c97f0e55097919066b7233a5caa622a62f", "0a27e088c3dd6c3fc9cba97b3dd76fcabd413108", "2a005868b79511cf8c924cd5990e2497527a0527", "1000025264ef85af0b116b3c12d5b283504d36eb", "787d56ec5569f1054f490dcf9a9fb4b87b7990e8", "e8cedf9f96aa266818642ffbc9e6fbeda626f3ff" ], "paperAbstract": "Matrix Factorization (MF) is a very popular method for recommendation systems. It assumes that the underneath rating matrix is low-rank. However, this assumption can be too restrictive to capture complex relationships and interactions among users and items. Recently, Local LOw-Rank Matrix Approximation (LLORMA) has been shown to be very successful in addressing this issue. It just assumes the rating matrix is composed of a number of low-rank submatrices constructed from subsets of similar users and items. Although LLORMA outperforms MF, how to construct such submatrices remains a big problem. Motivated by the availability of rich social connections in today's recommendation systems, we propose a novel framework, i.e., Social LOcal low-rank Matrix Approximation (SLOMA), to address this problem. To the best of our knowledge, SLOMA is the first work to incorporate social connections into the local low-rank framework. Furthermore, we enhance SLOMA by applying social regularization to submatrices factorization, denoted as SLOMA++. Therefore, the proposed model can benefit from both social recommendation and the local low-rank assumption. Experimental results from two real-world datasets, Yelp and Douban, demonstrate the superiority of the proposed models over LLORMA and MF.", "pdfUrls": [ "https://arxiv.org/pdf/1704.05735v1.pdf", "http://www.cse.ust.hk/~hzhaoaf/data/icdm2017-slides.pdf", "http://www.cse.ust.hk/~hzhaoaf/data/icdm2017-paper.pdf", "http://www.cse.ust.hk/~qyaoaa/papers/icdm2017paper.pdf", "https://arxiv.org/pdf/1704.05735v2.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.74" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c3f05d7cd4bf5aa913f1d558b72e73cdfd269d1", "sources": [ "DBLP" ], "title": "Collaborative Filtering with Social Local Models", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2c50094e7e5e1134033efc6565c8d7c21a04d2d9": { "authors": [ { "ids": [ "2182534" ], "name": "Qingda Hu" }, { "ids": [ "1771630" ], "name": "Jinglei Ren" }, { "ids": [ "1783539" ], "name": "Anirudh Badam" }, { "ids": [ "2684311" ], "name": "Jiwu Shu" }, { "ids": [ "1715172" ], "name": "Thomas Moscibroda" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Data store", "Fragmentation (computing)", "High memory", "IP fragmentation", "Memory management", "Non-volatile memory", "Polynomial texture mapping", "Throughput", "Transactional memory" ], "id": "2c50094e7e5e1134033efc6565c8d7c21a04d2d9", "inCitations": [ "77e4d412240f65056d1edf334ab7352a5df061bd", "5716db825bbd2c39836a2d6fa22e7f313fc12ccf", "4994eb0dfa2d15d7b5013563d018e8c16b71b039", "41ea95cc4dca373bf324555b897760054ec4a76e", "39d6b0bee42492871fd9d2a1a41fcc56add986cc" ], "journalName": "", "journalPages": "703-717", "journalVolume": "", "outCitations": [ "03e93625d185c0ac144c97fdf269b5ae5f38351e", "03b6a916498fa8591201a2de5f22344609b1e457", "3af216f371069b57c0dca5448384d052fb490fb4", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "94783d113951822195d4ba44599a8fcbdef9d4bf", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "fae8a785260ac5c34be82fca92a4abef4c30d655", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "24724ad8962a9e04eb496fddaefe9708f6960601", "823116269044ab4c713373c66c7da3fcb495b459", "7e362dfad276797611e98371ee10809d4fec65e3", "fd840d5275cac98d64e7778a1b9173b937a77386", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "04f020a4ab2134db6f9e98eadf216d94d440414a", "14dc05a51866b6832990fc7fe8c8f6b85730bb84", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "1ddcc37ae33b4dea4fe74a0b83f48809f2ea01d8", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "0204f40221260d00c5ee63646560a40dcd7d97d1", "4f6fbe4484487e3983f673ff55bdec92f947311c", "314919c141024c71cb17d525ecd8016138335002", "15d144f09b05af8e0d9076d401893f4a846ef9d5", "088e3e939ad234b6fdd0e321290fb26937dc2553", "3437a7e23e3f97b58f4cf73e7e5b711131e6706c", "ef38dd932706e38989ed29bd98496a7986fc5dd9", "bee25de126b12f3f9dc64b1da804a88831eb11b3", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "b8735a449f0a1f1889c6b744061360aa85afaa6b", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "9aa0d7253574e50fe3a190ccd924433f048997dd", "277862a906af8489a1d98add2f6516a0e5df1bb1", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "098d792d1783b5f6fc098203f71f21f5d053c653", "78e47b768c784fcb15004bab48e24f80fdad579e", "f4147b82166813bbe5dc01e9486664c273d1556c", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "5e3bf131096d59e5bbc58113f3840f5a0b7a392d", "57c823b3b07b98233394bf15cfbbaed6a84809df", "bf6275801e4bac2918f1b8698c2892e1a375808f" ], "paperAbstract": "Emerging non-volatile main memory (NVMM) unlocks the performance potential of applications by storing persistent data in the main memory. Such applications require a lightweight persistent transactional memory (PTM) system, instead of a heavyweight filesystem or database, to have fast access to data. In a PTM system, the memory usage, both capacity and bandwidth, plays a key role in dictating performance and efficiency. Existing memory management mechanisms for PTMs generate high memory fragmentation, high write traffic and a large number of persist barriers, since data is first written to a log and then to the main data store. In this paper, we present a log-structured NVMM system that not only maintains NVMM in a compact manner but also reduces the write traffic and the number of persist barriers needed for executing transactions. All data allocations and modifications are appended to the log which becomes the location of the data. Further, we address a unique challenge of log-structured memory management by designing a tree-based address translation mechanism where access granularities are flexible and different from allocation granularities. Our results show that the new system enjoys up to 89.9% higher transaction throughput and up to 82.8% lower write traffic than a traditional PTM system.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_hu.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/hu", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/05/lsnvmm_atc17.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-hu.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2c50/094e7e5e1134033efc6565c8d7c21a04d2d9.pdf", "s2Url": "https://semanticscholar.org/paper/2c50094e7e5e1134033efc6565c8d7c21a04d2d9", "sources": [ "DBLP" ], "title": "Log-Structured Non-Volatile Main Memory", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "2c6107749df44ee733215e41e11e8b03db0759f7": { "authors": [ { "ids": [ "36074345" ], "name": "Simone Bova" }, { "ids": [ "1701841" ], "name": "Stefan Szeider" } ], "doi": "10.1145/3034786.3034787", "doiUrl": "https://doi.org/10.1145/3034786.3034787", "entities": [ "Binary decision diagram", "Compiler", "Computation", "Conjunctive query", "Diagram", "Grammar-based code", "Knowledge compilation", "Pathwidth", "Polynomial", "Probabilistic database", "Query language", "Tame", "Treewidth" ], "id": "2c6107749df44ee733215e41e11e8b03db0759f7", "inCitations": [ "3df909e9e3fc4ede6e69be447f79763d3931525a", "b4f8d79059dade020adb3ad9a5202913110926a2", "cdbe4c04d453228704f1a9c3f7cbdf7f137eba5b" ], "journalName": "", "journalPages": "233-246", "journalVolume": "", "outCitations": [ "305221881759cb37b1f67790f3997bb5cedfcb67", "c244da607be13732aec78c6ef8f769f93bd6b971", "08b49cd825298b09620b69e9c37d25382895d9db", "16d2dd882faea0a5a53b5abc7b2be1a00bc75694", "d94e6aa9a1b1a40b22f1234a7419567c8bbb4b6c", "9685266d0abb1681120373da3ef512972763468e", "768af56fee441b59d191850a6a75d08f746a9895", "05aecc7d2953750b748dc45c4167ccbdbaa6b7bf", "03d9e06a8bbf15edf1e59664456ad95ba6ef6ad1", "297d97c8aef37788d38a7a66e62251eae559ec91", "1f838de3996c50b235601f55b1425a90b74dd9e4", "371a6a4c6ecbe243ef77d0701ac3c5d7eb1fdc38", "119c44b3dc9edca6751b64779da528bbe9a2f542", "15806f0c355985973b12a517d185b43f50b062a3", "17ac0f9f208caf331d71c9b158f681450c20e9db", "1fe41b1240a0eddec736b675e914b4858a955876", "c1c05f803697d3d06f2f62fc6196b013ea365f31", "7486a9091bd58822c8041af7dbbe491ef185bdbb", "1799deabb70064a34e901959a1b77cf87f3cb140", "5ec44ac2a5b9f72f0a42601de40ccc7b6072cb9f", "48204d1d23b03b72a9afca8aad6405c84512b9ef", "3391e99b80eab07c4b7bf713bd14acb028ddee8d", "473d5b14c1ff9efc9afb45af82efa9df39323bbd", "455e45f44dfd3113f0f5cfe3090acf19c6aa3f10", "41c52fd45b209a1a72f45fe7b99afddd653c5a12", "86624b243cae7920cea9aa55f8d47f2c77d40a67", "39dc786a942284e293eab1440f0eccbffdf0a4bf", "2cd3bc9972bf4a0bcf0fb80e2c90de5291abfd56", "a5535b94e0ead9576796f107094ed43ac51cec3b", "5db21f7b217fd269672156ee803deb586631da29", "6f87f9e552d74c6b531d17babeaa4022bca32ba7" ], "paperAbstract": "The evaluation of a query over a probabilistic database boils down to computing the probability of a suitable Boolean function, the lineage of the query over the database. The method of query compilation approaches the task in two stages: first, the query lineage is implemented (compiled) in a circuit form where probability computation is tractable; and second, the desired probability is computed over the compiled circuit. A basic theoretical quest in query compilation is that of identifying pertinent classes of queries whose lineages admit compact representations over increasingly succinct, tractable circuit classes.\n Fostering previous work by Jha and Suciu (ICDT 2012) and Petke and Razgon (SAT 2013), we focus on queries whose lineages admit circuit implementations with small treewidth, and investigate their compilability within tame classes of decision diagrams. In perfect analogy with the characterization of bounded circuit pathwidth by bounded OBDD width, we show that a class of Boolean functions has bounded circuit treewidth if and only if it has bounded SDD width. Sentential decision diagrams (SDDs) are central in knowledge compilation, being essentially as tractable as OBDDs but exponentially more succinct. By incorporating constant width (linear size) SDDs and polynomial size SDDs in the picture, we refine the panorama of query compilation for unions of conjunctive queries with and without inequalities.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034787", "https://arxiv.org/pdf/1701.04626v1.pdf", "http://arxiv.org/abs/1701.04626" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c6107749df44ee733215e41e11e8b03db0759f7", "sources": [ "DBLP" ], "title": "Circuit Treewidth, Sentential Decision, and Query Compilation", "venue": "PODS", "year": 2017 }, "2c68fc64ee8fd08bb7c742d3087f8a54da9ff296": { "authors": [ { "ids": [ "1694978" ], "name": "Cheng Wang" }, { "ids": [ "1684443" ], "name": "Bhuvan Urgaonkar" }, { "ids": [ "2772186" ], "name": "Neda Nasiriani" }, { "ids": [ "1809899" ], "name": "George Kesidis" } ], "doi": "10.1145/3084448", "doiUrl": "https://doi.org/10.1145/3084448", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Backup", "Burstable billing", "Full disclosure (computer security)", "Google Compute Engine", "In-memory database", "Memcached", "Multiplexing", "Procurement", "Throughput", "Token bucket", "Virtual machine" ], "id": "2c68fc64ee8fd08bb7c742d3087f8a54da9ff296", "inCitations": [], "journalName": "POMACS", "journalPages": "11:1-11:28", "journalVolume": "1", "outCitations": [ "0b2b809a031f6f74581f9f6687e9a86cfe4ca93b", "ec5f0f8d5b7176cb2e88271ec948f935ea0346a2", "33e82ac2571ec0902aaec1a3e9e375dae79894b3", "9ba15b9c2c05dd677f5347a98340a6ebf8340008", "4581948531998d5e5f23c131081ea0cdd9066bfe", "7f9d43c76bb5c077aabf1ef6ef20d35763e34291", "b0447d4880d2b35c25350fe0a5283afbad82c7f7", "368df1b2390ee959b3b7bca6c90cc4aa2da7cbcc", "5848da5058fed3b97bfd801ca19e5265f489abfe", "04e24be4c25539e4b4fa0498c85b3a3e2d026c02", "2316b4b4735ae39ddebce63c22644c93a855c262", "357c28cac5b8ffa1928d834557909ef6d6b9a2a7", "581b0e29991ffd8396e2d91b9c53ad483e72d9b8", "0d24c8ffa3a5f800c3c1b3146b96a955a84216b6", "6f0bf64788fec1b5eec08da21facc165fa51392a", "358ce90c730bbd72c7a94c09ac05677ddfcca492", "4e4348913b3198ae51b784db893938ae3afecaf5", "c2fa8d9d8418460fc44985473315c86cf7b240ce", "8aa76db6c34c565657a40944d1c6d9317da87333", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "1999881614aed9295f4359cf4761926bc23fcd82", "627cdd5ca07563b8e1a1f2de57a9ebdcec3086db", "144a6336a53b586713f28c36e0f2fc3703b127a5", "2862d11cc739a1c6baf6addb56502d869233c11c", "440f3e59fde1fde9868bc4a0e8fa9132050ce89c", "3e257f01e3ee71545d824a1615c35659525b856a", "1146b4781152d7eece00b0e5f0695a307bae0fe4", "14aea71603d45b92028608ff5866a0fb276379cd" ], "paperAbstract": "Amazon EC2 and Google Compute Engine (GCE) have recently introduced a new class of virtual machines called \"burstable\" instances that are cheaper than even the smallest traditional/regular instances. These lower prices come with reduced average capacity and increased variance. Using measurements from both EC2 and GCE, we identify key idiosyncrasies of resource capacity dynamism for burstable instances that set them apart from other instance types. Most importantly, certain resources for these instances appear to be regulated by deterministic token bucket like mechanisms. We find widely different types of disclosures by providers of the parameters governing these regulation mechanisms: full disclosure (e.g., CPU capacity for EC2 t2 instances), partial disclosure (e.g., CPU capacity and remote disk IO bandwidth for GCE shared-core instances), or no disclosure (network bandwidth for EC2 t2 instances). A tenant modeling these variations as random phenomena (as some recent work suggests) might make sub-optimal procurement and operation decisions. We present modeling techniques for a tenant to infer the properties of these regulation mechanisms via simple offline measurements. We also present two case studies of how certain memcached workloads might benefit from our modeling when operating on EC2 by: (i) augmenting cheap but low availability in-memory storage offered by spot instances with backup of popular content on burstable instances, and (ii) temporal multiplexing of multiple burstable instances to achieve the CPU or network bandwidth (and thereby throughput) equivalent of a more expensive regular EC2 instance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078591", "http://doi.acm.org/10.1145/3084448" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c68fc64ee8fd08bb7c742d3087f8a54da9ff296", "sources": [ "DBLP" ], "title": "Using Burstable Instances in the Public Cloud: Why, When and How?", "venue": "SIGMETRICS", "year": 2017 }, "2c713271f4fbc79bafafb215a5de17c64b30787e": { "authors": [ { "ids": [ "1811683" ], "name": "Rafael Pass" }, { "ids": [ "1726246" ], "name": "Elaine Shi" } ], "doi": "10.1145/3087801.3087809", "doiUrl": "https://doi.org/10.1145/3087801.3087809", "entities": [ "Adversary (cryptography)", "Bitcoin", "Computational resource", "Computer performance", "Cryptocurrency", "Cynthia Dwork", "Hall effect", "IBM Notes", "Liveness", "Random oracle", "Security parameter", "Sybil attack" ], "id": "2c713271f4fbc79bafafb215a5de17c64b30787e", "inCitations": [ "23a46804cc0bc54c04ae6f5e115bd164dbf42f59", "12316497673fb59ccef3df96dfb8232eb146a8e6", "81adf6aac3e75c24c4147018618a675fdab80a30", "290b36f8023771fdc29904487e1b1de26b94f7cf", "5ebd33297063bc3f79b0eb171b914cdd85eb91c7", "1969453d7960eaca8cfbd642877925f5f5028ce5", "e52c9e6f7f0b62d1526897f5c2ac233e0647754b", "71c5bc722f575665878dc3ca47953f384426899a", "fa380decdc61a1979f9afbef82e034abc56fc350", "edfa3de5a3ce5e26a214adb6faa46ecd0e7dcc4a", "09ee994214de3fdb1a764570cd00145102be0596", "fa724d178bd2a319822dc6b85144fc59ae25b89d", "8ecc3fb161cbb6a478441536e0bc3647306d7308", "ac482f29106a16778805db32a4e71f77737f8f3e", "13a39f769952a35cb177c5a9762427b0dd026922", "4b2542c8cabbbe0c2d9d1b55a43a6c0b7270d1fd", "43fb74fc45ea844ad087c770fa9be747fbd03b19", "708b47eaffa20cf7b4923d2ed8c1a6951cb4933f", "919e32847097416aada92dff7c8274cd9ca55582", "4ac40296fabf9ca34c08690d3f5fb116571d64f5", "4be8d3a9ee9bc916f7d5fdc1da5ed700923239ce", "161c24b98ce3af2c0f8a5e96d5055a367b81801e", "aa98f981fd50dd00a80f23baa0aa0b250b7e212a", "773428d448f024bbf23dcdcf58d9ff97febd9895", "827c58706816135fc268295c96d54c186437506d", "305992f0aed03476be6d8966d773ebd922c528be" ], "journalName": "", "journalPages": "315-324", "journalVolume": "", "outCitations": [ "8992bb749cb88b5e4aeb195d8649952301dad01d", "728b60c04afb5b87853b59265e49f430dbf631db", "1ccef9fa75e519daa10618fe9f2d7a46a34a7040", "161c24b98ce3af2c0f8a5e96d5055a367b81801e", "2128ec621d4509302bd64d18774403575066792f", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "81190f83636ebb1995622c57277a52ecf0837f44", "16390c730637b17ab574dc1216e34271eea71dd0", "ac1a918fc933b767d34574ec2cc6a33b4223dc1a", "2f7bb6613154e1b3580c0114bf2cfb3c8ceb477e" ], "paperAbstract": "Nakamoto\u2019s famous blockchain protocol enables achieving consensus in a so-called permissionless setting\u2014anyone can join (or leave) the protocol execution, and the protocol instructions do not depend on the identities of the players. His ingenious protocol prevents \u201csybil attacks\u201d (where an adversary spawns any number of new players) by relying on computational puzzles (a.k.a. \u201cmoderately hard functions\u201d) introduced by Dwork and Naor (Crypto\u201992). Recent work by Garay et al (EuroCrypt\u201915) and Pass et al (manuscript, 2016) demonstrate that this protocol provably achieves consistency and liveness assuming a) honest players control a majority of the computational power in the network, b) the puzzle-hardness is appropriately set as a function of the maximum network delay and the total computational power of the network, and c) the computational puzzle is modeled as a random oracle. Assuming honest participation, however, is a strong assumption, especially in a setting where honest players are expected to perform a lot of work (to solve the computational puzzles). In Nakamoto\u2019s Bitcoin application of the blockchain protocol, players are incentivized to solve these puzzles by receiving rewards for every \u201cblock\u201d (of transactions) they contribute to the blockchain. An elegant work by Eyal and Sirer (FinancialCrypt\u201914), strengthening and formalizing an earlier attack discussed on the Bitcoin forum, demonstrates that a coalition controlling even a minority fraction of the computational power in the network can gain (close to) 2 times its \u201cfair share\u201d of the rewards (and transaction fees) by deviating from the protocol instructions. In contrast, in a fair protocol, one would expect that players controlling a \u03c6 fraction of the computational resources to reap a \u03c6 fraction of the rewards. In this work, we present a new blockchain protocol\u2014the FruitChain protocol\u2014which satisfies the same consistency and liveness properties as Nakamoto\u2019s protocol (assuming an honest majority of the computing power), and additionally is \u03b4-approximately fair : with overwhelming probability, any honest set of players controlling a \u03c6 fraction of computational power is guaranteed to get at least a fraction (1 \u2212 \u03b4)\u03c6 of the blocks (and thus rewards) in any \u03a9(\u03ba\u03b4 ) length segment of the chain (where \u03ba is the security parameter). As a consequence, if this blockchain protocol is used as the ledger underlying a cryptocurrency system, where rewards and transaction fees are evenly distributed among the miners of blocks in a length \u03ba segment of the chain, no coalition controlling less than a majority of the computing power can gain more than a factor (1 + 3\u03b4) by deviating from the protocol (i.e., honest participation is an n2 -coalition-safe 3\u03b4-Nash equilibrium). Finally, the FruitChain protocol enables decreasing the variance of mining rewards and as such significantly lessens (or even obliterates) the need for mining pools. \u2217Supported in part by NSF Award CNS-1217821, NSF Award CNS-1561209, AFOSR Award FA9550-15-1-0262, a Microsoft Faculty Fellowship, and a Google Faculty Research Award. \u2020Supported in part by NSF Award CNS-1314857, CNS-1514261, CNS-1544613, CNS-1561209, CNS-1601879, CNS1617676, an Office of Naval Research Young Investigator Program Award, a Packard Fellowship, a Sloan Fellowship, Google Faculty Research Awards, and a VMWare Research Award.", "pdfUrls": [ "https://eprint.iacr.org/2016/916.pdf", "http://doi.acm.org/10.1145/3087801.3087809", "http://eprint.iacr.org/2016/916", "http://eprint.iacr.org/2016/916.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2c71/3271f4fbc79bafafb215a5de17c64b30787e.pdf", "s2Url": "https://semanticscholar.org/paper/2c713271f4fbc79bafafb215a5de17c64b30787e", "sources": [ "DBLP" ], "title": "FruitChains: A Fair Blockchain", "venue": "PODC", "year": 2016 }, "2c713ea0f3dacc2ce4189891a57c69aec0707c52": { "authors": [ { "ids": [ "2027332" ], "name": "Naga Praveen Katta" }, { "ids": [ "7551184" ], "name": "Aditi Ghag" }, { "ids": [ "3439759" ], "name": "Mukesh Hira" }, { "ids": [ "1785739" ], "name": "Isaac Keslassy" }, { "ids": [ "40404973" ], "name": "Aran Bergman" }, { "ids": [ "33742176" ], "name": "Changhoon Kim" }, { "ids": [ "1730356" ], "name": "Jennifer Rexford" } ], "doi": "10.1145/3143361.3143401", "doiUrl": "https://doi.org/10.1145/3143361.3143401", "entities": [ "Algorithm", "Equal-cost multi-path routing", "Hypervisor", "Load balancing (computing)", "Network congestion", "Network switch", "Packet switching", "Scalability", "Software deployment", "Traceroute" ], "id": "2c713ea0f3dacc2ce4189891a57c69aec0707c52", "inCitations": [], "journalName": "", "journalPages": "323-335", "journalVolume": "", "outCitations": [ "7f160d94d7684a5d3b0bb286044e26596284c96a", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "122229239aeba1eb4f1623adb40f1845c582a520", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "5c5d03e884d4f0094b217c62267466fa11432c8e", "58f692e9b03cb973355aab46bb6f867239aeb513", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "2730c3ec2908d852e73a62f93302e0e8bfe5510e", "129567778989fab23b50812b3df30e899e2d6a4e", "00f6f16f4b76e931d3924e56674a74fca8d94df3", "c678e962b158153924bbb24c4900b84375be7e57", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "093b8adeb29ee4c17b3528c1b9791f275630c8f2", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "327a02b19a60319cc35be860ad0259a5c1aef920", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "8e86374859a1d07e049a2c6e1cb11d12302552fb", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "1434811cbe1c7831f0ee2974e9093d1e57461f0f", "4a4c5da923653217473f1a253c75967c6f444c7f", "9570d6075ecaf7f5dc28e99edfabc64914d44ca5" ], "paperAbstract": "Most datacenters still use Equal Cost Multi-Path (ECMP), which performs congestion-oblivious hashing of flows over multiple paths, leading to an uneven distribution of traffic. Alternatives to ECMP come with deployment challenges, as they require either changing the tenant VM network stacks (e.g., MPTCP) or replacing all of the switches (e.g., CONGA). We argue that the hypervisor provides a unique point for implementing load-balancing algorithms that are easy to deploy, while still reacting quickly to congestion. We propose Clove, a scalable load-balancer that (i) runs entirely in the hypervisor, requiring no modifications to tenant VM networking stacks or physical switches, and (ii) works on any topology and adapts quickly to topology changes and traffic shifts. Clove relies on standard ECMP in physical switches, discovers paths using a novel traceroute mechanism, uses software-based flowlet-switching, and continuously learns congestion (or path utilization) state using standard switch features. It then manipulates packet-header fields in the hypervisor switch to direct traffic over less congested paths. Clove achieves 1.5 to 7 times smaller flow-completion times at 70% network load than other load-balancing algorithms that work with existing hardware. Clove also captures some 80% of the performance gain of best-of-breed hardware-based load-balancing algorithms like CONGA that require new equipment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143401", "https://www.cs.princeton.edu/~jrex/papers/clove17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c713ea0f3dacc2ce4189891a57c69aec0707c52", "sources": [ "DBLP" ], "title": "Clove: Congestion-Aware Load Balancing at the Virtual Edge", "venue": "CoNEXT", "year": 2017 }, "2c7cd28ce7222ac733703b79a1761066201d6419": { "authors": [ { "ids": [ "2686182" ], "name": "Channoh Kim" }, { "ids": [ "3448808" ], "name": "Jaehyeok Kim" }, { "ids": [ "4646196" ], "name": "Sungmin Kim" }, { "ids": [ "3109071" ], "name": "Doo-Young Kim" }, { "ids": [ "39553125" ], "name": "Namho Kim" }, { "ids": [ "9992479" ], "name": "Gitae Na" }, { "ids": [ "3072985" ], "name": "Young H. Oh" }, { "ids": [ "2963019" ], "name": "Hyeon-Gyu Cho" }, { "ids": [ "3091593" ], "name": "Jae W. Lee" } ], "doi": "10.1145/3037697.3037726", "doiUrl": "https://doi.org/10.1145/3037697.3037726", "entities": [ "Computer", "Electronic data processing", "Fetch-and-add", "Field-programmable gate array", "Garbage collection (computer science)", "High- and low-level", "Insertion sort", "JavaScript", "Lua", "Memory footprint", "Memory management", "Principle of abstraction", "Programming paradigm", "RISC-V", "Run time (program lifecycle phase)", "Scripting language", "Semiconductor device fabrication", "Single-board computer", "Type system", "Typing" ], "id": "2c7cd28ce7222ac733703b79a1761066201d6419", "inCitations": [ "ce992c5be70243c83a5faaeea3f314ebd36302a9" ], "journalName": "", "journalPages": "77-90", "journalVolume": "", "outCitations": [ "5007b598ed2c118bdf14c0a7562b6c4fb7974742", "17bff708b1b6791db2dec8621a417c17aa79448d", "636e0e7325e5fc96297b4385dbd34c6b14ebfa89", "0ab24dda560e79160a8d41bee4e6e9f37a6a554e", "f6489aaece77299093a351f3aa7c90337241ce91", "092b09f0ec09b2b10763f5697ca77099a37ab022", "06bbfb45f7bf27e0c79939cfac1f7b10f20183ca", "34e41ebc64b786e20efc490363aaeb5fa508866b", "10367b97247299547cab9d91598e4dca70e25553", "004428345b7977c032174ecf7fbac72fee7af718", "e69e30d7df7bb0b91b44c78e0906fd143f2808ac", "0657eb7e069c2c2c7cae6636704e0f7fb3bcd9fc", "a3f3d0f41d0f914f0a7edaccb3d80cc69388cb59", "be992f6cfb3748256c559504750f35000c11b3a4", "1810f70bdcb6f50ff70bed2c165918046e6a8aef", "69254b5c0e4b0e9ce9138e9edbf4df149acba7a5", "590c4315c999929e8bb3c60d04405724a1297f0b", "5878301fb9bcd3e6ca30e644670955bf07696607", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "104a9057b97b50d053a01e7a36c0de46480a1948", "5e3f8c2ba2fb225c29ba343565d52b9661e7198e", "4a4d61347bb00e73a6a80cdcf18b49da1dba5adb", "72682890677496da1a98f2d4ce9396ad13997e07", "93fdfbc47c6229ba80deec9d56a6b132198a5eaf" ], "paperAbstract": "Dynamic scripting languages are becoming more and more widely adopted not only for fast prototyping but also for developing production-grade applications. They provide high-productivity programming environments featuring high levels of abstraction with powerful built-in functions, automatic memory management, object-oriented programming paradigm and dynamic typing. However, their flexible, dynamic type systems easily become the source of inefficiency in terms of instruction count, memory footprint, and energy consumption. This overhead makes it challenging to deploy these high-productivity programming technologies on emerging single-board computers for IoT applications. Addressing this challenge, this paper introduces Typed Architectures, a high-efficiency, low-cost execution substrate for dynamic scripting languages, where each data variable retains high-level type information at an ISA level. Typed Architectures calculate and check the dynamic type of each variable implicitly in hardware, rather than explicitly in software, hence significantly reducing instruction count for dynamic type checking. Besides, Typed Architectures introduce polymorphic instructions (e.g., xadd), which are bound to the correct native instruction at runtime within the pipeline (e.g., add or fadd) to efficiently implement polymorphic operators. Finally, Typed Architectures provide hardware support for flexible yet efficient type tag extraction and insertion, capturing common data layout patterns of tag-value pairs. Our evaluation using a fully synthesizable RISC-V RTL design on FPGA shows that Typed Architectures achieve geomean speedups of 11.2% and 9.9% with maximum speedups of 32.6% and 43.5% for two production-grade scripting engines for JavaScript and Lua, respectively. Moreover, Typed Architectures improve the energy-delay product (EDP) by 19.3% for JavaScript and 16.5% for Lua with an area overhead of 1.6% at a 40nm technology node.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037726" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c7cd28ce7222ac733703b79a1761066201d6419", "sources": [ "DBLP" ], "title": "Typed Architectures: Architectural Support for Lightweight Scripting", "venue": "ASPLOS", "year": 2017 }, "2c7dc21fbfd64786cdffd9be4ccac1e81449ce33": { "authors": [ { "ids": [ "25611721" ], "name": "Long Li" }, { "ids": [ "1710608" ], "name": "Ke Liu" } ], "doi": "10.1145/2903150.2903482", "doiUrl": "https://doi.org/10.1145/2903150.2903482", "entities": [ "Algorithm", "Heuristic", "Heuristic (computer science)", "Link-state routing protocol", "Nonlinear programming", "Nonlinear system", "Virtual machine" ], "id": "2c7dc21fbfd64786cdffd9be4ccac1e81449ce33", "inCitations": [ "00082263500d4a6a214e764a3e6c1020f3996a15", "bbf5918648c057be54b28238b96ddce9b4c8dc3e" ], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "506-513", "journalVolume": "", "outCitations": [ "438110dc02f39f221896847a4d0e24f88e130598", "8aa09720221bdeef43e150fc7f6896f71600fb86", "2fcdb9f6f5c96ba8a7b7d54b90485a38c885e914" ], "paperAbstract": "A guarantee-aware cost effective virtual machine placement algorithm for the cloud is proposed in this paper. The algorithm is first formulated as a nonlinear programming problem of which the objective is to minimize the number of physical machines used. Specifically, apart from constraints for computing resources, we add an additional one for each network component to ensure the sum of offered guarantees for each link is not greater than the link capacity. We then devise a heuristic algorithm for the nonlinear programming problem. Results show that our approach can reduce the number of physical machines used by 32.5% compared to the most recent one.", "pdfUrls": [ "http://doi.acm.org/10.1145/2903150.2903482", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.66" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c7dc21fbfd64786cdffd9be4ccac1e81449ce33", "sources": [ "DBLP" ], "title": "Guarantee-Aware Cost Effective Virtual Machine Placement Algorithm for the Cloud", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2016 }, "2c7ead91dc8a1acdef1047ef4688bcd06b90bb6a": { "authors": [ { "ids": [ "2949392" ], "name": "Rahul Boyapati" }, { "ids": [ "3406930" ], "name": "Jiayi Huang" }, { "ids": [ "3450932" ], "name": "Ningyuan Wang" }, { "ids": [ "2224877" ], "name": "Kyung Hoon Kim" }, { "ids": [ "1680392" ], "name": "Ki Hwan Yum" }, { "ids": [ "1692009" ], "name": "Eun Jung Kim" } ], "doi": "10.1109/IPDPS.2017.77", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.77", "entities": [ "Algorithm", "Baseline (configuration management)", "Benchmark (computing)", "Best-effort delivery", "Centralisation", "Challenge-Handshake Authentication Protocol", "Global network", "Interconnection", "Network on a chip", "Parsec (parser)", "Power gating", "Router (computing)", "Routing", "Synthetic data" ], "id": "2c7ead91dc8a1acdef1047ef4688bcd06b90bb6a", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "708-717", "journalVolume": "", "outCitations": [ "0d01b42384dd92c400052a05e3d24cebaecd4056", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "8d0581b30015b3d5afb56347362ffdfec6622346", "c1818d4f005f0be1ee0eae940052bc3c05885923", "45fa92ee8e5be638c78da79c214bfb6ec7ed5a97", "0e98aeb6638084085dc40ed57d10d2d6a3ba94fe", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "25f0cbae4ee4237012865bce97b9449e74fd6599", "3c2916d16e119852a4da9476a4144a53cb83c47f", "56bf58eb183dbe8f6d420fae194f2c2be35fc850", "41236387e01eacb63cefad6318dc48fc60e9829e", "0d60537e54e10cd0fdf678532b4a41c86b0a485c", "23b564bfb4e3f84e9676247f90781d04cd8b6c71", "063700ef01aad15a1981553fde02e8d162a553e7", "08408ab6b2000662e63d431b424ce31c6e09fb70", "373b88e34295875fdab7f6cdee1438edbd0571cb", "960904bf8dd1de618d606a95fbca8d345d1e769a", "18e98cd806cc79fb2869d3819ba1469e6515a22d", "2266e34950bfcbc2af57618748ab4d7bcead8ad9", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "329756f2d29829e1b2e713360016995855d0ea26", "d311c8941818f400df3ca27381938e8a3d066051", "3c1a32c467c628cf72fea9f7d7720e1fccd13b46", "3ec18371eed24707fb16bf7cc258f3043088207f", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "55deba3bb943391e07d571d89c049b12b76a0c75", "167481d1f156698a2bb177049f67131e818eec61", "36ce17dd3f7734b7578fd6580c886b3d1b2a475b", "d589123c9665f52c1c06a0b3c80aa94c423a8908", "f246db6a2eaedf8f3eb84af60c29703ae9aef504", "423437335211cfd0fd61aad08b822ff349ecef3d", "30c5b89ef93b564781b9a7b8f03be0056d926876", "2c2e32267c43161f80241a2e1ba21d1f0f871dd4" ], "paperAbstract": "Scalable Networks-on-Chip (NoCs) have become the de facto interconnection mechanism in large scale Chip Multiprocessors. Not only are NoCs devouring a large fraction of the on-chip power budget but static NoC power consumption is becoming the dominant component as technology scales down. Hence reducing static NoC power consumption is critical for energy-efficient computing. Previous research has proposed to power-gate routers attached to inactive cores so as to save static power, but requires centralized control and global network knowledge. In this paper, we propose Fly-Over (FLOV), a light-weight distributed mechanism for power-gating routers, which encompasses FLOV router architecture, handshake protocols, and a partition-based dynamic routing algorithm to maintain network functionalities. With simple modifications to the baseline router architecture, FLOV can facilitate FLOV links over power-gated routers. Then we present two handshake protocols for FLOV routers, restricted FLOV that can power-gate routers under restricted conditions and generalized FLOV with more power saving capability. The proposed routing algorithm provides best-effort minimal path routing without the necessity for global network information. We evaluate our schemes using synthetic workloads as well as real workloads from PARSEC 2.1 benchmark suite. Our full system evaluations show that FLOV reduces the total and static energy consumption by 18% and 22% respectively, on average across several benchmarks, compared to state-of-the-art NoC power-gating mechanism while keeping the performance degradation minimal.", "pdfUrls": [ "http://faculty.cs.tamu.edu/ejkim/HPC_WEB/docs/ipdps17_flov.pdf", "http://engineering.tamu.edu/media/2551734/2015-7-1.pdf", "http://engineering.tamu.edu/media/3427396/2016_3_1.pdf", "http://faculty.cs.tamu.edu/ejkim/HPC_WEB/docs/poster_pact.pdf", "https://doi.org/10.1109/IPDPS.2017.77", "https://engineering.tamu.edu/media/3427396/2016_3_1.pdf", "https://engineering.tamu.edu/media/2551734/2015-7-1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c7ead91dc8a1acdef1047ef4688bcd06b90bb6a", "sources": [ "DBLP" ], "title": "Fly-Over: A Light-Weight Distributed Power-Gating Mechanism for Energy-Efficient Networks-on-Chip", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2c8c8e0ff5f726776fbd3e973e01161ee31e7d3e": { "authors": [ { "ids": [ "32488007" ], "name": "Bobby Powers" }, { "ids": [ "2506153" ], "name": "John Vilk" }, { "ids": [ "2925003" ], "name": "Emery D. Berger" } ], "doi": "10.1145/3037697.3037727", "doiUrl": "https://doi.org/10.1145/3037697.3037727", "entities": [ "Anonymous pipe", "C++", "Client\u2013server model", "Glue code", "JavaScript", "LaTeX", "Node.js", "Operating system", "POSIX", "Process (computing)", "Runtime system", "Server (computing)", "Server-side", "Unix", "Unix-like", "Web application" ], "id": "2c8c8e0ff5f726776fbd3e973e01161ee31e7d3e", "inCitations": [ "68fc3a0da7e96122e308c1bbe28e1ca3b879d461", "a518dbb3bce58142fa025a390b4ac5c1af2f8d52" ], "journalName": "", "journalPages": "253-266", "journalVolume": "", "outCitations": [ "1f62067f0cb66a26237f9a1654a7da33f8afd45b", "0ab24dda560e79160a8d41bee4e6e9f37a6a554e", "0abaa5b259f47e141b8888db3a102048b8a37554", "08d7f0ca5c6ba191886bc860942e5de3b00fe0a1", "1b87d68c7c4f7b897eeb09804657225f6f8f762f", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "699cb7f7addac731632d8e5101b4ae59bdad8c29" ], "paperAbstract": "Applications written to run on conventional operating systems typically depend on OS abstractions like processes, pipes, signals, sockets, and a shared file system. Porting these applications to the web currently requires extensive rewriting or hosting significant portions of code server-side because browsers present a nontraditional runtime environment that lacks OS functionality.\n This paper presents Browsix, a framework that bridges the considerable gap between conventional operating systems and the browser, enabling unmodified programs expecting a Unix-like environment to run directly in the browser. Browsix comprises two core parts: (1) a JavaScript-only system that makes core Unix features (including pipes, concurrent processes, signals, sockets, and a shared file system) available to web applications; and (2) extended JavaScript runtimes for C, C++, Go, and Node.js that support running programs written in these languages as processes in the browser. Browsix supports running a POSIX shell, making it straightforward to connect applications together via pipes.\n We illustrate Browsix's capabilities via case studies that demonstrate how it eases porting legacy applications to the browser and enables new functionality. We demonstrate a Browsix-enabled LaTeX editor that operates by executing unmodified versions of pdfLaTeX and BibTeX. This browser-only LaTeX editor can render documents in seconds, making it fast enough to be practical. We further demonstrate how Browsix lets us port a client-server application to run entirely in the browser for disconnected operation. Creating these applications required less than 50 lines of glue code and no code modifications, demonstrating how easily Browsix can be used to build sophisticated web applications from existing parts without modification.", "pdfUrls": [ "https://browsix.org/powers2017-browsix.pdf", "http://doi.acm.org/10.1145/3037697.3037727", "http://arxiv.org/abs/1611.07862", "https://web.cs.umass.edu/publication/docs/2016/UM-CS-2016-005.pdf", "https://arxiv.org/pdf/1611.07862v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c8c8e0ff5f726776fbd3e973e01161ee31e7d3e", "sources": [ "DBLP" ], "title": "Browsix: Bridging the Gap Between Unix and the Browser", "venue": "ASPLOS", "year": 2017 }, "2c9bbdd35ee20c6528c6409b2ddbe35289866712": { "authors": [ { "ids": [ "2729744" ], "name": "Fan Guo" }, { "ids": [ "1904670" ], "name": "Yongkun Li" }, { "ids": [ "7869811" ], "name": "Yinlong Xu" }, { "ids": [ "1804354" ], "name": "Song Jiang" }, { "ids": [ "1723366" ], "name": "John C. S. Lui" } ], "doi": "", "doiUrl": "", "entities": [ "Adaptive filter", "Data deduplication", "Experiment", "Hypervisor", "Kernel same-page merging", "Memory management", "Overhead (computing)", "Page (computer memory)", "Page table", "Paging", "Translation lookaside buffer" ], "id": "2c9bbdd35ee20c6528c6409b2ddbe35289866712", "inCitations": [ "b532c625457aede2f11ef0eae40de38e4b5a8ab6" ], "journalName": "", "journalPages": "733-744", "journalVolume": "", "outCitations": [ "aa3859a68bbee9bb68036e24d0239369788a8604", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "07042865b10297ca4fc9164829d6330db2f60b4c", "8d79844db85ddfe91910ada99f85d347cee58192", "73e8627ae91003e19183b17ad7b24923c20aafa3", "3aa3795ddeb410db291b4fe10f11e52264885e75", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "2593ebc83d22e846e2ba314c77f96a32bb7b2ef9", "8150bce0f1961ee5d1f40daa3e6edcb81f5439ba", "258e4e58c67ecc8a030f5ffd187657344e7d3cc7", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "182cb3740940f403ff6f311fa54c5c1c9d7edc3f", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "1506e49a71ffcc4d201928dbc76a881608c9c6c4" ], "paperAbstract": "In hypervisor-based virtualization environments, translation lookaside buffers (TLBs) misses may induce twodimensional page table walks, which may incur a long access latency, and this issue becomes worse with ever increasing memory capacity. To reduce the overhead of TLB misses, large pages (e.g., 2M-pages) are widely supported in modern hardware platforms to reduce the number of page table entries. However, memory management with large pages can be inefficient in deduplication, leading to low utilization of memory, which is a precious resource for a variety of applications. To simultaneously enjoy benefits of high performance by accessing memory with large pages (e.g., 2M-pages) and high deduplication rate by managing memory with base pages (e.g., 4K-pages), we propose Smart Memory Deduplciation, or SmartMD in short, which is an adaptive and efficient management scheme for mixed-page memory. Specifically, we propose two lightweight schemes to accurately monitor pages\u2019 access frequency and repetition rate, and present a dynamic and adaptive conversion scheme to selectively split or reconstruct large pages. We implement a prototype system and conduct extensive experiments with various workloads. Experiment results show that SmartMD can simultaneously achieve high access performance similar to systems using large pages, and achieves a deduplication rate similar to that applying aggressive deduplication scheme (i.e., KSM) at the same time on base pages.", "pdfUrls": [ "http://www.cs.cuhk.hk/~cslui/PUBLICATION/SmartMD.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_guo_0.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-guo_0.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/guo-fan" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d885/f0f83224e9910e3632c68663b6b84a44105a.pdf", "s2Url": "https://semanticscholar.org/paper/2c9bbdd35ee20c6528c6409b2ddbe35289866712", "sources": [ "DBLP" ], "title": "SmartMD: A High Performance Deduplication Engine with Mixed Pages", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "2c9f40ac8125cc5115d8d65eabb5c257d67c9b5f": { "authors": [ { "ids": [ "3109687" ], "name": "Michael S. Kester" }, { "ids": [ "1840402" ], "name": "Manos Athanassoulis" }, { "ids": [ "2203901" ], "name": "Stratos Idreos" } ], "doi": "10.1145/3035918.3064049", "doiUrl": "https://doi.org/10.1145/3035918.3064049", "entities": [ "Access control", "Cache (computing)", "Computer data storage", "Data compression", "Data system", "Experiment", "Full table scan", "Multi-core processor", "SIMD", "Selectivity (electronic)", "Single-access key", "Systems design" ], "id": "2c9f40ac8125cc5115d8d65eabb5c257d67c9b5f", "inCitations": [ "6f10e7cc100865feec746294e90e8d9ca2322059", "8c7044398d1994b12a9bf7212e11398f59eaf446" ], "journalName": "", "journalPages": "715-730", "journalVolume": "", "outCitations": [ "6abf5107efc723c655956f027b4a67565b048799", "92e0243e1a73c77ef8b90292e3798f765b38f269", "02f8e4a8b3f16a988233f309db548415268322c2", "459693b64731566a5cce5aa8b05fb88275865e9e", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "19629429a0ade02b450f5a585bdde880fd32b22b", "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "34fa41ccb6e548612886623916d502fce17fd3a8", "eb35aa48b342cf5733977ad2634701fe9c7cb603", "c0b438eee7bd423606da9335229602b9c77c10d4", "6521560a99dd3c4abeec8ad9634e949d5a0e77cd", "6033797f241a3687aab939db1d88b5184d32c0fb", "2d403a99e4dc71520aad53e46ba41c52d089207e", "079433997c7270ea95e23042ffd8105d75d22cdd", "2ceb62fcec9212fb408cf6ae847ff3a03b52f83e", "19e5a8ea876cee86e78b659fc96ae18eb8c3a834", "207def18c67fa8024741b7ae3cdc655b57f2053f", "a4d7e7af926b1aee100dbe370021ef3fc6d460bc", "1be04eef05d2547199ed787125f38610a2838658", "8db5d8f4bf055bbe64ccfe29c5fd778ef24ade5b", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "b59c6e2eca208e99a18d2778d99cccd1792835fc", "1dc19048a74d9bc7564f0114dc201ffc9e77d43a", "14023b8086f53f29376fe6351cf5c68aac7b24f1", "463bec3d0298e96e3702e071e241e3898f76eff2", "370e1fcea7074072fe5946d3e728affd582a9a44", "55ce0391cae3d7663a26bc6bb1a1e5618b8b9475", "1d5cbc071f918143dbedf67a513850eadf30cbae", "4139eedda8717ffd60052f68ed78b996aaebfced", "09c1b69ab0fe1315b0d5e5e0b0853585c4a319b5", "1c27eafecd3d6f0008d74ffbe1e7c59a25869407", "2b320e83114b35b35570010a9e8dbfb8d1b01e85", "96f73a90e87b330e60de8d403870aa81ee21a65d", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "52b32996324d12ae6c9068e6ff301ece06f09835", "9141bafcff1df2dbabf9a20671d2fa1bcb55aae5", "04700bcc5abfad47d11b67a2d9901f01c8f0adf5", "291470e5e557ac526f79a59c83e98fbf53406401", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "8541e44cfdc11587a04581987a03daccddd3514a", "0a5033c0b2bb2421f8c46e196fb0fb1464a636b6", "206b086db0cc1c807a9fdbf7bbc9c261a50bfd34", "cd4b958bf9dda5f44fbb457f7bf0eca96d6563e7", "da9a4fdd63d6c9f30e1f5b8032fbf8ac3f79f7b6", "ab4e0d6c59196243ab8d7f8644ecff86708fbefe", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "239c67e18caa0001ae05784d8dcbd4d1d1c67103", "5046a718f92447642939f5c93414dc97225d726a", "afda6470dd16dc0a865dbb6fc291e5806132379b", "3c6be4c9ea5c56d4ab97aabb4e7c9d5ced57bea8", "8479ca0d391184dc7c0c217df08e10046169c9fd", "18869d8964793da4837b5b38d4aec5854d37f08c", "31d2c9e85d395b0dcb48123d03b1b33440d389fb", "30b1293e39c52ddd0e2a617de47c1ad843621258", "03416be8097852a54dd3e309434e5a0806824646", "ef47742e72bd64fb1ae5359cd6d5dd6dfad34dc8", "bf24ea819099ce17fd5f9f497e4598e980288564", "6089f230642f394d3688e4a373117a5ab02c8521", "a1330395dda0d0174926f5152778ead7925983f7", "beceeaaeee67884b727248d1f9ecda075e4ce85d", "f8e04cb0d6ffa19b3cfe57f10c0763fbf762e2df", "1d3e8f4d3ce97a3779ed9e395a793d4a935dda60", "5ad1dd1aa78ba772c969aa01ee5e8ee0d255ce3d", "2616c0df5d07bd88356381976243f21b4ddd0344", "153703ab30c7cb56a49718991f6bc450f0c2273f", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "d4ca38d1a7786a7a11ebf874dceaf95d0b53d4da", "3c457cec00499e41dd05516db79c4daf836102ad", "3be993b9891d812581d7c2649a4224107570049a", "6c5462d31a0d0f4e6cb2ff7ae795250957d9fcab", "a34b2c1c1fdbb800ea84028325fb8b01f673157f", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "2c5b8766a1dae62b86ba38013253ab8673f6ec44", "45ac2218b74fd28ff170dc93cf4390649466c491", "2dbd58aa9b36388c92d3ccb5bd1bc387ea712a30", "4f05a78c2e2abf932915c33c6a2bb9c726ce4ac2", "4a283fe7de108d476ba8cab69acbbed907e5c4d8", "0b19f413ffb5bc68b43f3bd05a97c282a7c6d6ab", "834b72142d0737b12b53540ea671cd6b6d5be5c3", "1cf0a0e6c09bbc2539d2fffddc372d47fbce14fb", "295521cfe1a56458d53a58613de5fb92c97c5c23", "2c1a0af30cc12ec108ce8bffde856ced8b759022", "0f288da3454e8d5750dd319962a28cabe07709a6" ], "paperAbstract": "The advent of columnar data analytics engines fueled a series of optimizations on the scan operator. New designs include column-group storage, vectorized execution, shared scans, working directly over compressed data, and operating using SIMD and multi-core execution. Larger main memories and deeper cache hierarchies increase the efficiency of modern scans, prompting a revisit of the question of access path selection.\n In this paper, we compare modern sequential scans and secondary index scans. Through detailed analytical modeling and experimentation we show that while scans have become useful in more cases than before, both access paths are still useful, and so, access path selection (APS) is still required to achieve the best performance when considering variable workloads. We show how to perform access path selection. In particular, contrary to the way traditional systems choose between scans and secondary indexes, we find that in addition to the query selectivity, the underlying hardware, and the system design, modern optimizers also need to take into account query concurrency. We further discuss the implications of integrating access path selection in a modern analytical data system. We demonstrate, both theoretically and experimentally, that using the proposed model a system can quickly perform access path selection, outperforming solutions that rely on a single access path or traditional access path models. We outline a light-weight mechanism to integrate APS into main-memory analytical systems that does not interfere with low latency queries. We also use the APS model to explain how the division between sequential scan and secondary index scan has historically changed due to hardware and workload changes, which allows for future projections based on hardware advancements.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064049", "http://stratos.seas.harvard.edu/files/stratos/files/accespathselection.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2c9f40ac8125cc5115d8d65eabb5c257d67c9b5f", "sources": [ "DBLP" ], "title": "Access Path Selection in Main-Memory Optimized Data Systems: Should I Scan or Should I Probe?", "venue": "SIGMOD Conference", "year": 2017 }, "2ca81717939d40d34aed680709183638923e83b6": { "authors": [ { "ids": [ "31829814" ], "name": "Nicola Cadenelli" }, { "ids": [ "1678314" ], "name": "Jorda Polo" }, { "ids": [ "1727718" ], "name": "David Carrera" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.57", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.57", "entities": [ "Bioinformatics", "Bioinformatics", "Central processing unit", "Communications satellite", "Computation", "Emergence", "Graphics processing unit", "K-mer", "Mathematical optimization", "Mer", "Non-volatile memory", "Throughput", "Volatile memory" ], "id": "2ca81717939d40d34aed680709183638923e83b6", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "434-441", "journalVolume": "", "outCitations": [ "1225be40bc4249572b42340f306efae0d70c5dd8", "000634d00e45d43a7abbc57c02bea6d663cb9232", "41cdc268d9ec5595f40ebf2e457f3f7f87a503de", "6bf5ccdc097b6ce40dbd9dffbc483a6267377b6b", "7e80397d3dcb359761d163aaf10bf60c696642d1", "158178093e2f725bd2aad5d24319952d68a2fc74", "1401af1026fd819666ad523a7dc1db7c46f0d06d", "912ae5c7ed92e1ce4047bdbb82927767d5721b21", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "1b85d7b8180cd44170a46ad46e40c0633bde0879" ], "paperAbstract": "The emergence of Next Generation Sequencing (NGS) platforms has increased the throughput of genomic sequencing and in turn the amount of data that needs to be processed, requiring highly efficient computation for its analysis. In this context, modern architectures including accelerators and non-volatile memory are essential to enable the mass exploitation of these bioinformatics workloads. This paper presents a redesign of the main component of a state-of-the-art reference-free method for variant calling, SMUFIN, which has been adapted to make the most of GPUs and NVM devices. SMUFIN relies on counting the frequency of k-mers (substrings of length k) in DNA sequences, which also constitutes a well-known problem for many bioinformatics workloads, such as genome assembly. We propose techniques to improve the efficiency of k-mer counting and to scale-up workloads like SMUFIN that used to require 16 nodes of Marenostrum 3 to a single machine with a GPU and NVM drives. Results show that although the single machine is not able to improve the time to solution of 16 nodes, its CPU time is 7.5x shorter than the aggregate CPU time of the 16 nodes, with a reduction in energy consumption of 5.5x.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.57", "http://arxiv.org/abs/1712.03254", "https://arxiv.org/pdf/1712.03254v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ca81717939d40d34aed680709183638923e83b6", "sources": [ "DBLP" ], "title": "Accelerating K-mer Frequency Counting with GPU and Non-Volatile Memory", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "2ccf4fcca56f7d14579eab46c529fcdf06365cfc": { "authors": [ { "ids": [ "3273348" ], "name": "Maleen Abeydeera" }, { "ids": [ "1929462" ], "name": "Suvinay Subramanian" }, { "ids": [ "2573160" ], "name": "Mark C. Jeffrey" }, { "ids": [ "1775477" ], "name": "Joel S. Emer" }, { "ids": [ "39783437" ], "name": "Daniel S\u00e1nchez" } ], "doi": "10.1109/PACT.2017.37", "doiUrl": "https://doi.org/10.1109/PACT.2017.37", "entities": [ "Dynamic dispatch", "Lavasoft Ad-Aware", "Multithreading (computer architecture)", "Out-of-order execution", "Parallel computing", "Speculative execution", "Speculative multithreading", "Speedup", "Thread (computing)", "Transactional memory" ], "id": "2ccf4fcca56f7d14579eab46c529fcdf06365cfc", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "64-78", "journalVolume": "", "outCitations": [ "1d9889486e2e4e553e73f7154bb73bcb3e2024c8", "2933635dc761e1a343a8b7bed8045a0a7884ce7c", "3d50c803cc715e51d263f5a42b06858be9466c0f", "4fb0a129c06bd8752f965b27a19ea3255e4b771f", "12f2cfcbb56718a967ca37bafb1ec014bb17d3e0", "35d357020f53e6aba43fe5c8a42c07ad87be745f", "3d68e6c36e36f47c74368c917436b249218177b7", "43c36c242f0f7e1095227da0a04bb766d91fde04", "b575c072dcde1d7c627e6108da5814696fff9bfb", "44dce8469bc082356b71f49e7e05c6d8954af72c", "06570f434bb3d5d822d43bddbadd41d187878c02", "4aa993db77b888a02084a542a929b1a81a8d03f6", "1487996e2cd07b15dd42791cd5f567246f8b83e6", "29f766723ca752138855500084ced04503bfc9c8", "0c8fef219adbbd4eeae3498428fb6a334bc62a67", "0653e2ed9f683868cb4539eb8718551242834f6b", "09ed565e84057123c15ab12b885c235d1f241aed", "13e5cbd3303db47e422469dca09bb59bffe2eafe", "145763b88b9a3bc332180e9628a2642c99b2aac6", "6bd6a0cc1a4bf62784d8573ca1aeafe2673dcc02", "0948c0acfb779e551e5c2420081eab206f57f396", "49137d43a3c23d8f5aa3885813ae76a72ca763a5", "2b0cf3614e919ef3fb623ac9382caa444ec44fb0", "89c5340c85f56dc7192ea4f0a7ac8fce792cbdb9", "25855e37885c85803b2a015a2b8b5eaa7d1f5326", "e0857c644b1059323d15ef9d45ffe86f4f3b6a09", "32aa6bc4a8a014b65135e4d82f103f1d0017d578", "35c89b2ad35ff57c7006a65a84d05df1f00affbe", "5557b730f22e3e90272d477ecfa82013649086c8", "b5e9865f09f61f987f4e55b8bc07334d3972de84", "17ad1361dfabc1c50b506813d0f5d54df159fc36", "4805282c0f03457e297ab4cea11044a4a81316cc", "044cafde686e811d1a6aa19a93fe97d0e4d8ab51", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "1eeb50d5f7937f65a910203ae61430ff8b969012", "34a97a016e6c419eb4b1005a7306d45a775a407b", "7c17663c48a367fdabd0600c1cca6a3d0c711788", "5a1a774e3a4563fde667fbaa35adaff9a41014db", "a10fde829f83a5e5c101de85dd78e330c9c33d1b", "22f02a69bbcafa7bfbe7824ec30f29aa23ab303a", "14b1e403f0ab89b61e3e5431283f5aa764e301b7", "ae96e0563298c23b777be98d110bc86963b896c0", "57cf29529977cc5407497aba2f9032e01a12c1a9", "a01d4a0f26ec9bba3e21f12f60489a6a20a8ae17", "d8777ef17c18609781aa889055100603b13b2986", "770fdb91c74005c8b34a744c6b2188729b2a9c63", "57f0570911626318a13a69a378dd96feb011eedf", "106cfdc91f33ed647e8ca97e9d7ce495fa79dae3", "48f044ba3e7524413468d90a687c2a9ca3f2847c", "16ba3e5c5e0084fef0fa4705d639f2ad164f2dbe", "2c1ea92d6a4237ede5ea112f1880710b25bec8b3", "429e313d33a82bf086b69d47eee735450cbeb4ae", "01d32e62828315a140a5db4010431cac3d6868c6", "50b84e2e1d1289ac6e3fae14f292d274f6db27e1", "6e78c1b830ff611d82ae00d75b3c6592e000a91f", "6f090d59bde17b7604985acf38e26785e794bcc0", "0b6feaae9d2d2f3bc8e487a8228d712a68fea14d", "2b585692b2337286e88095c2341af4d8121e80b8", "323292fad95a1bce506e100ac8d622019a2012d2", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "40cb40b7812e019c1051e3a457a8643400b81d51", "28552ecf4eaedb3461edca97304b29082b02fbab", "3bf23f74bf33ed52f7c28587fab315610b27221a", "540892abd51e931839dbe15b4d55ab108b5a7f71", "b1d14e2b28759afd361d50e14744224b654e205e", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "44695679d7619ec3526627c9f93e388f1d24f3b6", "d67f67e2a8d2caf5ff04f315c21611571f7779c6", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "abf1157c2043274a8d580151db1d4ef5be2c892e", "51225f24b4bfb922bc9ed9738566de0b3cae5393", "03fc198adf79731c92070b8aa839c46ebf9b3c14", "a16d51087e5505b296e2b15a4b5b6fddff194ebf", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "4bad51c7685254155733ee8def6a1294378aa1af", "fd9dc505e3cf0b6a828ae67f1850658540ec9179", "a16ae7a7367391f7baeb5085655c329af12683ce", "5ff71859d9602c9a3d1332fa47ce01a5ec81db72", "200609035711763e162096cc010cc3e00895c6c6", "0335bf6957ecb92f709fc79c72c4237939f32c9e", "ab12cef09635b578d1c6479a2a693de8a75be2c7", "13f6ddd72bcf62dcc13cf4515be29d48948b9693" ], "paperAbstract": "This work studies the interplay between multithreaded cores and speculative parallelism (e.g., transactional memory or thread-level speculation). These techniques are often used together, yet they have been developed independently. This disconnect causes major performance pathologies: increasing the number of threads per core adds conflicts and wasted work, and puts pressure on speculative execution resources. These pathologies often squander the benefits of multithreading.We present speculation-aware multithreading (SAM), a simple policy that addresses these pathologies. By coordinating instruction dispatch and conflict resolution priorities, SAM focuses execution resources on work that is more likely to commit, avoiding aborts and using speculation resources more efficiently.We design SAM variants for in-order and out-of-order cores. SAM is cheap to implement and makes multithreaded cores much more beneficial on speculative parallel programs. We evaluate SAM on systems with up to 64 SMT cores. With SAM, 8-threaded cores outperform single-threaded cores by 2.33x on average, while a speculation-oblivious policy yields a 1.85x speedup. SAM also reduces wasted work by 52%.", "pdfUrls": [ "http://people.csail.mit.edu/sanchez/papers/2017.sam.pact.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.37", "https://people.csail.mit.edu/mcj/talks/2017.sam.slides.pact.pdf", "http://people.csail.mit.edu/emer/papers/2017.09.pact.sam.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ccf4fcca56f7d14579eab46c529fcdf06365cfc", "sources": [ "DBLP" ], "title": "SAM: Optimizing Multithreaded Cores for Speculative Parallelism", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "2cd95537bc9c94b76b69209964e46c79af1317ec": { "authors": [ { "ids": [ "3184437" ], "name": "Zhengchun Liu" }, { "ids": [ "2760849" ], "name": "Prasanna Balaprakash" }, { "ids": [ "9297377" ], "name": "Rajkumar Kettimuthu" }, { "ids": [ "1698701" ], "name": "Ian T. Foster" } ], "doi": "10.1145/3078597.3078605", "doiUrl": "https://doi.org/10.1145/3078597.3078605", "entities": [ "Central processing unit", "Communication endpoint", "Experiment", "File transfer", "Intrusion detection system", "Machine learning", "Network interface", "Network interface controller", "Nonlinear system", "Performance prediction", "Petabyte", "Predictive modelling", "Program optimization", "Scheduling (computing)", "Terabyte", "Usability" ], "id": "2cd95537bc9c94b76b69209964e46c79af1317ec", "inCitations": [ "72c22be8e97afc72dfa3ef35a7fad6f496a8d34f", "19e3d97fb829a6a9c15caef345803b40731550e8", "96a76b1519117046e18d141a788a20fd144b5c6d" ], "journalName": "", "journalPages": "167-178", "journalVolume": "", "outCitations": [ "8d14cbd226e7e95cef63161fa2bf8b52cffd8de3", "376692614007d993787db032fb642a7e042da720", "11e67f6e9a7dacf236cf855230a0b7d51c61bf32", "27b9abd31c74006abc1b0c530f9180224ef40b70", "1579de573d7327430a4685113a113e11f14043ce", "8bb2eb3841c4ffbf9f03b762db6c2b7d88e5fabd", "61c06e6b75c3b3fd95586d9eee8a60bf2bafd8dd", "0b700ea3741877f664d467ff46caabc8ab3972a0", "546b80d48e18ff7c0aad9fb3d9117e4a41132ebc", "354bad40266096c3961ece294e1b738dcbe28ee0", "88c37770028e7ed61180a34d6a837a9a4db3b264", "27eb0d9ffd6d32f85b1b33cfc79d15c2cd509f9c", "1f453bfdfa2c2889cfcf21f647041314f7b69e04", "a19531313118fc96621f70b8a18401ea4e4941ba", "8b6445d70461fe38d2a586b557db95e13d7dd261", "0f7038c8c482809edc4500a91443dc5a43f39cd5", "0f1181e2f58395f8f6d6f14707e4e44a489aaf3f", "fcd9bae37093d3b5456744cabb2d6dc64e692b71", "33622ba98a68f2e97c09e64a890b7ffbbe5cf511", "3089e2e920ab0235bba546166ea5229ed581a51e", "30ab54f982647bc0f18d2585ba48a3f2216697a1", "a96e5d846168fb0fc57861d457036e8b4143ed21", "61a926609a5a8eebfa1beb4ad9495414fb1d29b5", "6c1afafedf50f430a3155906658fd2e529811c6d", "6a4105c2e444bf4a164c498126bc35f45e497286", "30be60b8c417737285a0cc7ce61700137eb477d2", "26bc9195c6343e4d7f434dd65b4ad67efe2be27a", "ee537bb55917518a5d96ca0355da7deee0ca1284", "0a70825f83e029d16286006ac07f23d43240892e", "ba7d27962dcf255c0c558f711094b280b15e9e79", "2a55ee24556c910f0c435ca202790da8f2790416" ], "paperAbstract": "Disk-to-disk wide-area file transfers involve many subsystems and tunable application parameters that pose significant challenges for bottleneck detection, system optimization, and performance prediction. Performance models can be used to address these challenges but have not proved generally usable because of a need for extensive online experiments to characterize subsystems. We show here how to overcome the need for such experiments by applying machine learning methods to historical data to estimate parameters for predictive models. Starting with log data for millions of Globus transfers involving billions of files and hundreds of petabytes, we engineer features for endpoint CPU load, network interface card load, and transfer characteristics; and we use these features in both linear and nonlinear models of transfer performance, We show that the resulting models have high explanatory power. For a representative set of 30,653 transfers over 30 heavily used source-destination pairs (\"edges''),totaling 2,053 TB in 46.6 million files, we obtain median absolute percentage prediction errors (MdAPE) of 7.0% and 4.6% when using distinct linear and nonlinear models per edge, respectively; when using a single nonlinear model for all edges, we obtain an MdAPE of 7.8%. Our work broadens understanding of factors that influence file transfer rate by clarifying relationships between achieved transfer rates, transfer characteristics, and competing load. Our predictions can be used for distributed workflow scheduling and optimization, and our features can also be used for optimization and explanation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078605" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2cd95537bc9c94b76b69209964e46c79af1317ec", "sources": [ "DBLP" ], "title": "Explaining Wide Area Data Transfer Performance", "venue": "HPDC", "year": 2017 }, "2cdcb05bad9c38dfa39530b159a4ecc0e94d922f": { "authors": [ { "ids": [ "1697093" ], "name": "Jungwon Kim" }, { "ids": [ "3079326" ], "name": "Kittisak Sajjapongse" }, { "ids": [ "8568681" ], "name": "Seyong Lee" }, { "ids": [ "7553591" ], "name": "Jeffrey S. Vetter" } ], "doi": "10.1109/IPDPS.2017.72", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.72", "entities": [ "Admissible numbering", "Consistency model", "High- and low-level", "IBM WebSphere eXtreme Scale", "Non-volatile memory", "Papyrus", "Persistent memory", "Scalability", "Software system", "Tcl", "Web beacon" ], "id": "2cdcb05bad9c38dfa39530b159a4ecc0e94d922f", "inCitations": [ "41d01619b4f0d14be5c0135ca35f06fb5fc93b2a" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1151-1162", "journalVolume": "", "outCitations": [ "165d99c9d30be5d301b998dc23c1a6a28fd0c425", "0494a1ab6f0dd764fb9039772818b8f269ed70b4", "7377a53399ebd9ecd523cb7beeba0fc614239897", "27bcb72519d77192da2b30eca4e1442c8f3637b1", "a578daf478e4555e6e81c63cef4f138d92f93245", "61b8a8f5810f6670466f9ea58b7cb390ca1a4a89", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "94783d113951822195d4ba44599a8fcbdef9d4bf", "9183cde02e4306828089fb8adae74736a9df3ceb", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "0599ba259341963bf8abf2818c874713e570a039", "40c5050e470fa0890e85487e4679197e07a91c09", "4f0a03bd3c7e148e62eecaeaa6c9ce2a5b2a7d52", "098d792d1783b5f6fc098203f71f21f5d053c653", "b3bdfa918336a7c18093a1f6a14f32ba77d41991", "9a047672d9cad7fe11785682e8249e8a5f75c8f1", "6e0ade8e4c0948e47b7e1ad78eacf42e5f9d8d0f", "f8f52a402b8833ea1ad8eb34e48f011b25c0d306", "3a8c90ab13adb55e3610a020c69f03d72dfae274", "1c82d6dd3fde20878f9500c31351a3ceb9c05a46", "5c0e86f286972d34036da95b9c8d80581a985819", "35ae271bcc515d61dc113c35f8d3dc0300f8faad", "6a9a57dddf37adce1eb16c682205de8bf9447f60", "da8f5c3e65e2eb398dc5a4866023ef51e4056905" ], "paperAbstract": "A surprising development in recently announced HPC platforms is the addition of, sometimes massive amounts of, persistent (nonvolatile) memory (NVM) in order to increase memory capacity and compensate for plateauing I/O capabilities. However, there are no portable and scalable programming interfaces using aggregate NVM effectively. This paper introduces Papyrus: a new software system built to exploit emerging capability of NVM in HPC architectures. Papyrus (or Parallel Aggregate Persistent -YRU- Storage) is a novel programming system that provides features for scalable, aggregate, persistent memory in an extreme-scale system for typical HPC usage scenarios. Papyrus mainly consists of Papyrus Virtual File System (VFS) and Papyrus Template Container Library (TCL). Papyrus VFS provides a uniform aggregate NVM storage image across diverse NVM architectures. It enables Papyrus TCL to provide a portable and scalable high-level container programming interface whose data elements are distributed across multiple NVM nodes without requiring the user to handle complex communication, synchronization, replication, and consistency model. We evaluate Papyrus on two HPC systems, including UTK Beacon and NERSC Cori, using real NVM storage devices.", "pdfUrls": [ "http://www.csm.ornl.gov/newsite/documents/highlights/Science_Highlight_Papyrus.pdf", "https://doi.org/10.1109/IPDPS.2017.72" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2cdcb05bad9c38dfa39530b159a4ecc0e94d922f", "sources": [ "DBLP" ], "title": "Design and Implementation of Papyrus: Parallel Aggregate Persistent Storage", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2ce5b867a51e878969740a2224a65143f9e98a13": { "authors": [ { "ids": [ "2441482" ], "name": "Antoine Amarilli" }, { "ids": [ "3432118" ], "name": "Mika\u00ebl Monet" }, { "ids": [ "1734682" ], "name": "Pierre Senellart" } ], "doi": "10.1145/3034786.3056121", "doiUrl": "https://doi.org/10.1145/3034786.3056121", "entities": [ "Automaton", "Complexity", "Conjunctive query", "Constraint satisfaction", "Constraint satisfaction problem", "Data compression", "Database", "Graph homomorphism", "Probabilistic database", "Type signature" ], "id": "2ce5b867a51e878969740a2224a65143f9e98a13", "inCitations": [], "journalName": "", "journalPages": "217-232", "journalVolume": "", "outCitations": [ "f0aca164d1e8c582b474975608d919fb2d2b8503", "7cc81c5a0baf0593d32d5418293ec1aa26efec7f", "fae36500df3fdf5daff2d87f8b3708b47a974beb", "78845bb219181d635d89c7c8df49a729042bea61", "37d4a97b35429ddc0136260b7c396040af4a9404", "1421ad54b0e144e7843a4eb8f6076e5f2a64caad", "2aa9aa062f44210e5d51a4ec74fb0ede4691f138", "46cc0510dd7e797e72ee3b84bb5ec36b490ef9b5", "4af251ad7eb232e7197c40b7a5c1505f8e6ace5a", "03d9e06a8bbf15edf1e59664456ad95ba6ef6ad1", "5b5260407146f14bad3c0fa7f1eb1278389b1794", "3c94c7742cb355743a7ebc7eea789818a50fbb6e", "9c8bbee60dac4ab599276815068e11f487ccb69e", "297d97c8aef37788d38a7a66e62251eae559ec91", "0f48c76228e3f17ce5766614800121c767b72cbb", "2bae2ecbffbe756f0610ab9c77fdf93b93f527ac", "f595899ef35d8d4400d36de6dd26b49668171599", "59be89635d801630d021a2fbb93bd75d03d9a7f2", "4ca743cd9095700f1ad11703d6ffcc1b98c4b2c1", "a5535b94e0ead9576796f107094ed43ac51cec3b", "02c094b1093fefce28d5e0dda75a06d3028d1ed8", "863e77c6194b7a911309a5db428d7e372bf47fe9", "27b393b3e3cb0f83a8f18094f93fd7f31109ef9a", "0a0299fe4ab7ad973dd3be6527c47a05c3cc3d93", "7486a9091bd58822c8041af7dbbe491ef185bdbb", "c2ae1b0acd281a2fff041f3098e7aef9a2f5c794", "38b876bcb64bb93c0ddf2f55e044dd7fc2d8423a" ], "paperAbstract": "Query evaluation over probabilistic databases is known to be intractable in many cases, even in data complexity, i.e., when the query is fixed. Although some restrictions of the queries and instances[4] have been proposed to lower the complexity, these known tractable cases usually do not apply to combined complexity, i.e., when the query is not fixed. This leaves open the question of which query and instance languages ensure the tractability of probabilistic query evaluation in combined complexity.\n This paper proposes the first general study of the combined complexity of conjunctive query evaluation on probabilistic instances over binary signatures, which we can alternatively phrase as a probabilistic version of the graph homomorphism problem, or of a constraint satisfaction problem (CSP) variant. We study the complexity of this problem depending on whether instances and queries can use features such as edge labels, disconnectedness, branching, and edges in both directions. We show that the complexity landscape is surprisingly rich, using a variety of technical tools: automata-based compilation to d-DNNF lineages [4], ß-acyclic lineages, the X-property for tractable CSP[25], graded DAGs[28] and various coding techniques for hardness proofs.", "pdfUrls": [ "http://arxiv.org/abs/1703.03201", "http://doi.acm.org/10.1145/3034786.3056121", "https://arxiv.org/pdf/1703.03201v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ce5b867a51e878969740a2224a65143f9e98a13", "sources": [ "DBLP" ], "title": "Conjunctive Queries on Probabilistic Graphs: Combined Complexity", "venue": "PODS", "year": 2017 }, "2ceb5a5c2e8434003c641b61bfdc7c66de9c33ae": { "authors": [ { "ids": [ "31026832" ], "name": "Sui Chen" }, { "ids": [ "1733348" ], "name": "Lu Peng" }, { "ids": [ "40382020" ], "name": "Samuel Irving" } ], "doi": "10.1145/3079856.3080204", "doiUrl": "https://doi.org/10.1145/3079856.3080204", "entities": [ "Binary tree", "Computer", "Data structure", "Graphics processing unit", "Linked list", "On the fly", "Red\u2013black tree", "Snapshot (computer storage)", "Snapshot isolation", "Software bug", "Software versioning", "Transactional memory" ], "id": "2ceb5a5c2e8434003c641b61bfdc7c66de9c33ae", "inCitations": [ "f85ea15f5d417dc9b1cac8f58bec7157df47e6ba" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "282-294", "journalVolume": "", "outCitations": [ "0a777bb795d24bfed2631486e63810d1bf4422c3", "762f5a712f4d6994ead089fcc0c5db98479a2008", "2394c6644efa856f0da160a0f0031d74cd3b5000", "33d8743ad609524c5f7949ac44a4dcaffe228dde", "8d1c0ae7bbe138bc19abf66ca918f46b244b1f5d", "af247dd5f5bf188679f20cdc7423ade4711a25fe", "13e397480fbf36f1c0104db4f7bde8bf5e282f67", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "4515cb1d9416777a2be3a23efb46fd1ae20d369e", "3fa8e4ba23c712ff094b3321a38e9f029cef7a34", "253654b4a851747aa2e3c29bc259015da41a3b5c", "199267c2389b31ed3caaaddaa6293c1a6c4d2589", "4aa993db77b888a02084a542a929b1a81a8d03f6", "1bb2f9e63b68038843ef1a59295f057167ec7e1f", "917392fb11729b5b522d1ce5a00d3f23f4594e3c", "1f4df258283dfdffd91af50629ca65a4c79ec3c1", "4593ae644f04d76f582dedc4cc32d2acd33c9a93", "0335bf6957ecb92f709fc79c72c4237939f32c9e", "0b6feaae9d2d2f3bc8e487a8228d712a68fea14d", "166b2eee3cc2f08c98c00a29b959226fa6d0545d", "3ebbac596ccb16fa04bc205fe931a8fe1c936cd8", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "50b84e2e1d1289ac6e3fae14f292d274f6db27e1", "014ba063a3721973ba6af6503232d4d21d1456bb", "40cb40b7812e019c1051e3a457a8643400b81d51", "ef0b7babc7d4ede409d8ded5da20b69c46316d67" ], "paperAbstract": "Snapshot Isolation (SI) is an established model in the database community, which permits write-read conflicts to pass and aborts transactions only on write-write conflicts. With the Write Skew anomaly correctly eliminated, SI can reduce the occurrence of aborts, save the work done by transactions, and greatly benefit long transactions involving complex data structures.\n GPUs are evolving towards a general-purpose computing device with growing support for irregular workloads, including transactional memory. The usage of snapshot isolation on transactional memory has proven to be greatly beneficial for performance. In this paper, we propose a multi-versioned memory subsystem for hardware-based transactional memory on the GPU, with a method for eliminating the Write Skew anomaly on the fly, and finally incorporate Snapshot Isolation with this system.\n The results show that snapshot isolation can effectively boost the performance of dynamically sized data structures such as linked lists, binary trees and red-black trees, sometimes by as much as 4.5x, which results in improved overall performance of benchmarks utilizing these data structures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080204" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ceb5a5c2e8434003c641b61bfdc7c66de9c33ae", "sources": [ "DBLP" ], "title": "Accelerating GPU hardware transactional memory with snapshot isolation", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f": { "authors": [ { "ids": [ "23008160" ], "name": "Kevin K. Chang" }, { "ids": [ "11827442" ], "name": "Abdullah Giray Yaglik\u00e7i" }, { "ids": [ "33801185" ], "name": "Saugata Ghose" }, { "ids": [ "40248185" ], "name": "Aditya Agrawal" }, { "ids": [ "2866959" ], "name": "Niladrish Chatterjee" }, { "ids": [ "2466261" ], "name": "Abhijith Kashyap" }, { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "2341074" ], "name": "Mike O'Connor" }, { "ids": [ "40016363" ], "name": "Hasan Hassan" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3084447", "doiUrl": "https://doi.org/10.1145/3084447", "entities": [ "Bit error rate", "Circuit restoration", "Dynamic frequency scaling", "Dynamic random-access memory", "Dynamic voltage scaling", "Experiment", "Field-programmable gate array", "Frequency scaling", "Limiter", "Multi-core processor", "Simulation", "Voltage regulator", "Voltage source" ], "id": "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "inCitations": [ "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "983e87929eeb3f77c2ddb02d17d6efe978c80667", "5c478e5c774eb3cf71e446e2c9eb2166ca032b28", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "447f492235719d7c2b061b95d818f928d6cbdac5", "0f41b9c0900b1c17b63d3d59bd4c334f7cf736af", "a6ca37aeeef5911e4f36b904088479bea999cc81", "042855085a52934e5599e02555071bb222f6a000", "2976932bec7334a150e1bb6916b7564bdaa864ea", "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "15aa9bdac48ab6c3b1c223a676240b3cbbd4c3d6", "0b393cab00401cb971cf71970e00c2767f881f75" ], "journalName": "POMACS", "journalPages": "10:1-10:42", "journalVolume": "1", "outCitations": [ "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "3c761857787b3efe5e65b25bd94c737bf2cd7632", "74fcc3a4806da111405f057dc84de39f8fed15d7", "01debf23d55fcd72ff2d78f980c5c73a79b90102", "45ce4be870f0a5be7b45b064726696dacd83c786", "705a129de84bcf24b4039150c2fc2be1c24cc24a", "3f63640a31fb4624aeebc6e7959cb8ac26e54051", "ab82581f2225072865c1bf49c0044b05e5afca30", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "2efbc4631ff97be7807043735d62ec57f0201a6d", "31bc6abe2a9b33dfd30e6bca4b8cedfcca6e530c", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "46eea309204f088ef9dda197d8273465a641f60c", "03385e04bf3df318ee9a94237e6b5e96b8663a0d", "85398d5f19157c91bf00da3d36210e72d57887e4", "77f826132cf09ac91ea9c859387a8d52221a019a", "68073f621072d793e95b9562bf9a9245415d5a96", "5eb7d5e99a7ea7a17bc269217ee80f2b5d322a52", "942394566ccb9dbf40243dc2bd3c4d7605bbefa2", "0ae24644ca8866321ce6c117c5823ad9f149bef9", "4b5f67cba9a1f98a5390ed9cadcf018671c02c08", "0645f0f88e9a3cd6e9b1d0c21bc24666a7377666", "2394c6644efa856f0da160a0f0031d74cd3b5000", "219755b1056be1fc8329850d5a1ba1187f1fa8f0", "60aa9510638d4d9739ebfc3a0042187988482346", "7bc046671369ba23568ff03bbee6ba04a91bd092", "1c32ad0a42109fab826eb3054df7cfc33b424125", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "3c89345bb88a440096f7a057c28857cc4baf3695", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "0eacd1b47786f740b723d906d46e160f143c0378", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "15e63d368aa803c73b8f5d1315a51ebd7ceea3c3", "588fd53a6cbdb2f2d7f2bd676944d7b5fdfafcb9", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "6aba2b1785bd26eb6d85820a734ddaa262d20571", "04dcd8acdc16e42463e783ea5bc8283607ccee3f", "37b5850e3e75a3462f3991491ca26674925f233b", "8f126319c2c52347f3d32e5daf25bbccf759c761", "108c840d5d1847948a2de0250490a327ae069ee6", "447f492235719d7c2b061b95d818f928d6cbdac5", "00fc41f729269271aec836bb09b9c3f8c13c7c7e", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "4ec4ed133deb050d97615803412102a6d68611cc", "16ba3e5c5e0084fef0fa4705d639f2ad164f2dbe", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "0d2b7c2421475c7f054a2c1bba9a12b434de47c4", "472392b93150be7bb0132511d71d686770c2c79b", "510b4efe57fab4fa7bab5dedb95c97be96ff5315", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "7aee93b09ca568d84aa5522d4b9187ad50254961", "9ca87149f9e7cb0811c881ecac79ef02c87e1716", "257a47ec2982405a903eb8536a7321de528b149d", "dc060372253f1bacdea2c785e6525f781fe8c039", "42967ae5f115f9833a1da3e180d67c3a3aba2abc", "d89dbe46e5b7ade9e613d33ee068b68cbf63f614", "1dec8f5106d11047aaaf126121110cbf890f17c3", "02e965debeaf59e6f93adede60d7e39004e77fcc", "48534b21548e3692ad7d866387f1dc7f543109e1", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "026615150a7db9012ea247d3576957ca214258c6", "2ba01c3f28d5d2b88e39f3d000dc55ea634d4922", "37e49c57dd4d0849380d177222db53e52ff21347", "eed05ea58f8ee580ad02fdc3e996fa120b40c2dd", "65edeedb41696f66634627984573885e0bf6f55e", "35a2cb3f17bdd5d7ca5e5283b164fad21d1737ff", "a5bd15d203c6aa740aba16776b422db010e66b58", "071564baef078867847fc54a3a0b50dd22d29d62", "76e29695c7c119d869d3b87886a611261a98e4a4", "1144956f60e04e0839dd2fc5b8031fb4a4599072", "468035263afa59095614f26a62e0217da4a1aeed", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "352a8957005dc5519b15ed1870751ec494d66395", "94973810b159138f16577179daf63fc3c19f3224", "012d556d67acedc6898930b4c93f54b87aabf5ee", "42f7ade4ab1ee6941da178b53712bb7ef7822815", "12203385fbe8e26aefa1d82c9effaacb44f27a98", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "02817ebb4acc6bc2f015e4ded4fb1754ba6cd7d7", "1d68e6f94aa8e10ebfdd785843c50427d1e820c4", "05c56f4abc527fbf384ad011dc9c0a613955641a", "1c48c87c5cbc28933d7eb0432bb617300a114ee0", "8c34cdd2bab66623d2831004fbd1fa1cdf8a0366", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "9341125876271d46cc25f86dac93f25acb343e8d", "0fca03c476d869660dec04fb83f54161767a4ba7", "1de7a8de961624bfd482744c6be24fb15ae14776", "01ab8a70840cefc0c5e545bc76f4b5195bb37333", "31c299532c42106b71e909c2fc0fc7472c39ce90", "1d4b34a8165c8406106df7d7d1987f4a49d5af08", "4cfc98ded4191f9ae63acfbbc50ed4f358df9c58", "8b0b2f2605e533c40cac32e1a3a989f7aa759841", "69743194ca177ef816d31a99475c3ba3ff97808c", "08632fe2b934ed15d3499e7321282c81adc2c390", "50de0f6a952131dfe562c5b3836e5d934b39b939", "370baef5b5f9e2933a195bc025c93feb02baf494", "6902867509928c0e5c19aff3e62e1def3a19d581", "1f5b507c038b09f017bffd51d4f4e4257bef6ef4", "239e046347d5075b3eeef5439050e9f2ca760b7b", "33f410aed92033ea9180b7175ca24b63dd5b9792", "a662a40d7e8202f8ca5f55916f7e3c1f5b4379e9", "03d55467b20e662fbaa8416e853f57c93834a9fb", "084037d504c95c1af6fb1398179f8495618b72d7", "4b82766a16aa951020e43d6f70b5cf097a6b353c", "9aa0d7253574e50fe3a190ccd924433f048997dd", "36897d1d2661777913d492390c4ad9d004276308", "bfe6157690a837af71c62abc94811ef7faf45fd4", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "26e72340c47b7348e1b1de285f89dd96cc925b27", "3f82aa1373e823ec622b3021fff9df4a82230267" ], "paperAbstract": "The energy consumption of DRAM is a critical concern in modern computing systems. Improvements in manufacturing process technology have allowed DRAM vendors to lower the DRAM supply voltage conservatively, which reduces some of the DRAM energy consumption. We would like to reduce the DRAM supply voltage more aggressively, to further reduce energy. Aggressive supply voltage reduction requires a thorough understanding of the effect voltage scaling has on DRAM access latency and DRAM reliability.\n In this paper, we take a comprehensive approach to understanding and exploiting the latency and reliability characteristics of modern DRAM when the supply voltage is lowered below the nominal voltage level specified by DRAM standards. Using an FPGA-based testing platform, we perform an experimental study of 124 real DDR3L (low-voltage) DRAM chips manufactured recently by three major DRAM vendors. We find that reducing the supply voltage below a certain point introduces bit errors in the data, and we comprehensively characterize the behavior of these errors. We discover that these errors can be avoided by increasing the latency of three major DRAM operations (activation, restoration, and precharge). We perform detailed DRAM circuit simulations to validate and explain our experimental findings. We also characterize the various relationships between reduced supply voltage and error locations, stored data patterns, DRAM temperature, and data retention.\n Based on our observations, we propose a new DRAM energy reduction mechanism, called Voltron. The key idea of Voltron is to use a performance model to determine by how much we can reduce the supply voltage without introducing errors and without exceeding a user-specified threshold for performance loss. Our evaluations show that Voltron reduces the average DRAM and system energy consumption by 10.5% and 7.3%, respectively, while limiting the average system performance loss to only 1.8%, for a variety of memory-intensive quad-core workloads. We also show that Voltron significantly outperforms prior dynamic voltage and frequency scaling mechanisms for DRAM.", "pdfUrls": [ "https://users.ece.cmu.edu/~saugatag/papers/17sigmetrics_voltron.pdf", "http://www.cs.utah.edu/~nil/pubs/sigmetrics17.pdf", "https://arxiv.org/pdf/1705.10292v1.pdf", "http://www.pdl.cmu.edu/PDL-FTP/NVM/17sigmetrics_voltron.pdf", "http://www.ece.cmu.edu/~safari/pubs/Voltron-reduced-voltage-DRAM-sigmetrics17-abstract.pdf", "http://doi.acm.org/10.1145/3078505.3078590", "http://doi.acm.org/10.1145/3084447" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "sources": [ "DBLP" ], "title": "Understanding Reduced-Voltage Operation in Modern DRAM Devices: Experimental Characterization, Analysis, and Mechanisms", "venue": "SIGMETRICS", "year": 2017 }, "2cfa6c557899f09d9a6529ce5ce90251d699bf17": { "authors": [ { "ids": [ "2905015" ], "name": "Vojtech Nikl" }, { "ids": [ "16846587" ], "name": "Michal Hradecky" }, { "ids": [ "17100725" ], "name": "Jakub Keleceni" }, { "ids": [ "2231982" ], "name": "Jir\u00ed Jaros" } ], "doi": "10.1007/978-3-319-58667-0_20", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_20", "entities": [ "ARM architecture", "Haswell (microarchitecture)" ], "id": "2cfa6c557899f09d9a6529ce5ce90251d699bf17", "inCitations": [ "91ff6b19c86e74fab4b8c680c52587218d3067e0" ], "journalName": "", "journalPages": "377-393", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2cfa6c557899f09d9a6529ce5ce90251d699bf17", "sources": [ "DBLP" ], "title": "The Investigation of the ARMv7 and Intel Haswell Architectures Suitability for Performance and Energy-Aware Computing", "venue": "ISC", "year": 2017 }, "2d1b2392585b09297dd79a14ca3fb853133d64e3": { "authors": [ { "ids": [ "8573809" ], "name": "Xulong Tang" }, { "ids": [ "2833547" ], "name": "Orhan Kislal" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "3104729" ], "name": "Mustafa Karak\u00f6y" } ], "doi": "10.1145/3123939.3123954", "doiUrl": "https://doi.org/10.1145/3123939.3123954", "entities": [ "Algorithm", "Compiler", "Computation", "Data access", "Experiment", "Locality of reference", "Manycore processor", "Multi-core processor", "Network on a chip", "Programming paradigm", "Run time (program lifecycle phase)", "Thread (computing)" ], "id": "2d1b2392585b09297dd79a14ca3fb853133d64e3", "inCitations": [ "651ae380b5d500c613770dbf55c175c52576d7da", "884e104c13102e1353e85a6a91e41d3cff2c80f5", "c16c4fa113cd2c93f7557e05831039ed1436735a" ], "journalName": "", "journalPages": "730-744", "journalVolume": "", "outCitations": [ "a52945840b980adfef34466cb4186c7cda3b61e6", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "367d34d830482b349c73f373717a079d335c03e5", "6456603d61e7b09817c9e3821cf6845997d63e12", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "ef22b2c93c5c720a2b010f1280db8f8c7114c287", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "6bab4397efe09fa20cacb7fe54cc0cb2fc1c3b29", "386c295bae8dcc634ae2b593bb52376c6fe78ea0", "0fc52b5c7aa713802d69d3f1c6e7a26a5eb3493a", "3039a7c7b5172db2675283a41c3e9d79db328c5a", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "2194c3460ab71f3826db00b045b2ae590c753319", "4f70e1583f5d31d29ceed2998c52b2bf6c01e2ec", "53b402418835e6f34b8a9e5ea51440bbdd02581e", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "0ec62e028c2088f1aef01f2e65d167faf1d3569e", "79d27fb25d5e82ad87b2567208545356eb221364", "8814ba7515481fe16afc79cb41b05d4aa58f2df2", "2b585692b2337286e88095c2341af4d8121e80b8", "097904d7691fb6d5cd15cea9ee0ed8d02ba1ce41", "a3eb0826dd5d88669d506c0cbfb0f3dc90937fed", "296ba67ecd48c7833e10520fdc99b9dc0ca7c584", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "28552ecf4eaedb3461edca97304b29082b02fbab", "82203d72124b2c11ff552cabcb8bae00e51d79fa", "48a7323c4894de3afb90ef2135160205ebb55011", "ad702abe479b86ed043f83d6475a2e82c4718c9a", "3c0b5dd5a50ad2bacb282841fc6d7d16dcbbc2df", "29f766723ca752138855500084ced04503bfc9c8", "dc6207bf015aa47aa1890b91d5eb18925fae9e61", "205ff590dc7881db74d766c43e3509ddfbe24d81", "9382ea659ccde9e28047834d19ed9de8fa8d1760", "20eb85fca5818e610b1b488b5404148fd5762825", "38e855b28d99a8d8fde701d39b22247463e1bc89", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "b05bf3ee5d33d7610cca61985b44c22045c9ded2", "53e11fc15261cc5e3a47bfda9eeb4c3355053b6d", "05d6e23cfc591d66c366ed9f75c5d91d7f78c059", "96d2e5456b8d7b8ad763781a16b61beabf2d7fcf", "069eafae5ee9df25ff5c457bb636f73b98d8f6e9", "352a8957005dc5519b15ed1870751ec494d66395", "1401df37cc3fc78f26570d601fd123f17646b2d2", "40138cbd57a4632d6267cff4c91b55e7376a6693", "157d5b2488d953b7c88abc36791c2e897c152395", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "884e104c13102e1353e85a6a91e41d3cff2c80f5", "2b948b66d89b4d6eadbd7d893fc38471cd7c041d", "14cd0daeed8c12db40be03dfd56e446fcc10f32a", "38628d26d4f624378f4303b61ae93c5d34d007c3", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "32820f7d69f1ac020d2ce8bc5254725a7797f447", "7c036d5a4b79a735b279423358af4e8df6f7ec81", "938574649516c7690ce05891ef499760b9a0553b", "7be11273a2db5c8aad8c7dfa8080e14e6eead121" ], "paperAbstract": "Data access costs dominate the execution times of most parallel applications and they are expected to be even more important in the future. To address this, recent research has focused on Near Data Processing (NDP) as a new paradigm that tries to bring computation to data, instead of bringing data to computation (which is the norm in conventional computing). This paper explores the potential of compiler support in exploiting NDP in the context of emerging manycore systems. To that end, we propose a novel compiler algorithm that partitions the computations in a given loop nest into subcomputations and schedules the resulting subcomputations on different cores with the goal of reducing the distance-to-data on the on-chip network. An important characteristic of our approach is that it exploits NDP while taking advantage of data locality. Our experiments with 12 multithreaded applications running on a state-of-the-art commercial manycore system indicate that the proposed compiler-based approach significantly reduces data movements on the on-chip network by taking advantage of NDP, and these benefits lead to an average execution time improvement of 18.4%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123954", "http://xzt102.github.io/publications/2017_MICRO_Xulong.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d1b2392585b09297dd79a14ca3fb853133d64e3", "sources": [ "DBLP" ], "title": "Data movement aware computation partitioning", "venue": "MICRO", "year": 2017 }, "2d431f021e7fb87418fa6f2f23db901284f039d7": { "authors": [ { "ids": [ "8789909" ], "name": "Jayanth Kalyanasundaram" }, { "ids": [ "1761220" ], "name": "Yogesh L. Simmhan" } ], "doi": "10.1109/HiPC.2017.00032", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00032", "entities": [ "64-bit computing", "ARM architecture", "Apache Hadoop", "Apache Hive", "Big data", "Commodity computing", "Data center", "Data parallelism", "Electronic data processing", "High-throughput computing", "Machine learning", "Mobile device", "PageRank", "Parallel computing", "Run time (program lifecycle phase)", "Server (computing)", "Throughput", "Web page", "X86-64" ], "id": "2d431f021e7fb87418fa6f2f23db901284f039d7", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "203-212", "journalVolume": "", "outCitations": [ "04bda1bfbb271b4d132fd326c79cd7e0a6961fd3", "0d2c4723e9e5925cde74bd879611fda6f6e3980b", "91310da71680155ae4849563e203ccb121612143", "46763c8313e6aad2d3c1ba72bc4126d58c71de39", "1f4c829ab60055d0d0be40ef5e5ac8b94ae329c7", "7f583dcce82d27dbf2ec79f5d783509b7a11e3ad", "9ee6209432316baf6776838917e06bca4d874747", "5b54030ad0c5e4424e7c3c4e63a4143e7ed816dc", "05ad81a9d3a6843238fadba9d25ab5872a010ebb", "67b9072f4c7d0b8e7e05983e3532aebddbe5098f", "d0896199a1025c50fe3782af6a3faf5ba58454e1", "65d1532560a19daa80c95abb6a22cc73d17e3e89", "5f7264e24101ac4d42d2ef9cedd5eae8e7512eec", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "4d1e3d20531b7118c50b137715b69926d990d7c6", "8a9112daed1df3caeb8efd7e4b05e4ed05c23dbc" ], "paperAbstract": "ARM processors have dominated the mobile device market in the last decade due to their favorable computing to energy ratio. In this age of Cloud data centers and Big Data analytics, the focus is increasingly on power efficient processing, rather than just high throughput computing. ARM's first commodity server-grade processor is the recent AMD A1100-series processor, based on a 64-bit ARM Cortex A57 architecture. In this paper, we study the performance and energy efficiency of a server based on this ARM64 CPU, relative to a comparable server running an AMD Opteron 3300-series x64 CPU, for Big Data workloads. Specifically, we study these for Intel's HiBench suite of web, query and machine learning benchmarks on Apache Hadoop v2.7 in a pseudo-distributed setup, for data sizes up to 20GB files, 5M web pages and 500M tuples. Our results show that the ARM64 server's runtime performance is comparable to the x64 server for integer-based workloads like Sort and Hive queries, and only lags behind for floating-point intensive benchmarks like PageRank, when they do not exploit data parallelism adequately. We also see that the ARM64 server takes 1/3rd the energy, and has an Energy Delay Product (EDP) that is 50-71% lower than the x64 server. These results hold promise for ARM64 data centers hosting Big Data workloads to reduce their operational costs, while opening up opportunities for further analysis.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00032", "https://arxiv.org/pdf/1701.05996v2.pdf", "https://arxiv.org/pdf/1701.05996v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d431f021e7fb87418fa6f2f23db901284f039d7", "sources": [ "DBLP" ], "title": "ARM Wrestling with Big Data: A Study of Commodity ARM64 Server for Big Data Workloads", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "2d4695ab17a0255aaf2fad56adcbfd3f1980aba2": { "authors": [ { "ids": [ "2716177" ], "name": "Hongjia Wu" }, { "ids": [ "1683741" ], "name": "Qing Wang" }, { "ids": [ "1784158" ], "name": "Jie Xiong" }, { "ids": [ "1955669" ], "name": "Marco Zuniga" } ], "doi": "10.1145/3143361.3149824", "doiUrl": "https://doi.org/10.1145/3143361.3149824", "entities": [ "Commodity computing", "Experiment", "Modulation", "Real life", "Real-time computing", "Smart lighting", "Throughput", "VLC media player" ], "id": "2d4695ab17a0255aaf2fad56adcbfd3f1980aba2", "inCitations": [], "journalName": "", "journalPages": "212-223", "journalVolume": "", "outCitations": [ "e80983f38d34e5c68ac2f68a47f10d5839903e52", "b299e728ec8a2a4449899b7a16100d1998139c56", "c72c6e2d345241ab6a8f6d01cd2c984c854fe417", "cf8ee0efba707eb56d16a88bb97cf99e8ffca834", "de2aecac92b7fbf609b86f682c550879009a8cb5", "6ba8c478bca4434e100e209b2204128dc2002703", "18c55aad1423142ced6fb3725a5595e7a4758bae", "c701f0b7ac971a6ffd5c610d43357a4773c5ce3d", "5e7572a75e42e481e8cb1d591830b7d78a93e913", "d81f8dfffc51c7f4747a5337ab5c82f4b0e2921a", "487ea81007fa71873ee1b1f35a2eb4fd62346710", "16595d321b257dd3c28bff95bdd3e42d6254aeca", "75db6d3adb41e9fa5ec6f9a6ac84ef620e8ad1c5", "4e5d670be3650c664cc0a7a68edada1db01dfbf1", "a9727ec3f53d55821db25ae1066e07991ba48625", "3b664f92de3a2a658e3752e31db3a7ee1b7c0544", "3bd5c46eba124e95a98c1306adc2ba0ffb68aa91", "02f00b07581c316d21505bcdb1f65a8dac5a8ad8", "a210f466c7e97d969401fb463307d35fa16287fb", "9cf6ee281bbf2544c7d916a1f7220304cbf8351c", "f73ea77d4a2b45f0af3bf6157c863aaf129af69d", "7f04e77de47b4146b8e968755679ca9e7e9d92ac", "b2e44a4f2208e1a7cb84394a5d7869066f2f16ba", "1e37384874c84acc7919176d4e9598e9116da2ee", "786b684d577ae57aa2fbc7d1fb0870ad86b998b5", "9058f1bedc6a63574b5b3dfdd158676fea2a5232", "d9a05c76d1bdc67f5d9038ee2aaaf7b5a0ae1d2f", "126d5ce9dc0f1ab22e1ea3c8ee36aaa1f74d6837", "6d64c4163a3a1f6133171a25787e5a88e5f984c2" ], "paperAbstract": "Visible Light Communication (VLC) based on LEDs has been a hot topic investigated for over a decade. However, most of the research efforts in this area assume the intensity of the light emitted from LEDs is constant. This is not true any more when Smart Lighting is introduced to VLC in recent years, which requires the LEDs to adapt their brightness according to the intensity of the natural ambient light. Smart lighting saves power consumption and improves user comfort. However, intensity adaptation severely affects the throughput performance of the data communication. In this paper, we propose SmartVLC, a system that can maximize the throughput (benefit communication) while still maintaining the LEDs' illumination function (benefit smart lighting). A new adaptive multiple pulse position modulation scheme is proposed to support fine-grained dimming levels to avoid flickering and at the same time, maximize the throughput under each dimming level. SmartVLC is implemented on low-cost commodity hardware and several real-life challenges in both hardware and software are addressed to make SmartVLC a robust realtime system. Comprehensive experiments are carried out to evaluate the performance of SmartVLC under multifaceted scenarios. The results demonstrate that SmartVLC supports a communication distance up to 3.6m, and improves the throughput achieved with two state-of-the-art approaches by 40% and 12% on average, respectively, without bringing any flickering to users.", "pdfUrls": [ "https://pure.tudelft.nl/portal/files/35052607/paper.pdf", "http://wwwtmp.st.ewi.tudelft.nl/marco/files/smartVLC_CoNEXT17.pdf", "http://doi.acm.org/10.1145/3143361.3149824" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d4695ab17a0255aaf2fad56adcbfd3f1980aba2", "sources": [ "DBLP" ], "title": "SmartVLC: When Smart Lighting Meets VLC", "venue": "CoNEXT", "year": 2017 }, "2d4fdf953dc3f79fd760b317bca228fe80ec9386": { "authors": [ { "ids": [ "34038252" ], "name": "Juneyoung Lee" }, { "ids": [ "3031506" ], "name": "Yoonseung Kim" }, { "ids": [ "4704140" ], "name": "Youngju Song" }, { "ids": [ "1777044" ], "name": "Chung-Kil Hur" }, { "ids": [ "34518246" ], "name": "Sanjoy Das" }, { "ids": [ "17811247" ], "name": "David Majnemer" }, { "ids": [ "1783210" ], "name": "John Regehr" }, { "ids": [ "37101082" ], "name": "Nuno P. Lopes" } ], "doi": "10.1145/3062341.3062343", "doiUrl": "https://doi.org/10.1145/3062341.3062343", "entities": [ "C++", "Compile time", "Compiler", "GNU Compiler Collection", "Global value numbering", "High- and low-level", "Intermediate representation", "LLVM", "Loop unswitching", "Optimizing compiler", "Programming language", "Software bug", "Static program analysis", "Undefined behavior", "Undefined value" ], "id": "2d4fdf953dc3f79fd760b317bca228fe80ec9386", "inCitations": [ "0cf106fd9610cada7a8ed5c54ea128abd9083f0b", "22f136c1a906fb12d395a03b59f6be2e34d61cc3", "c201fab9b14db58342c53372ebf47d82c1ebf8ad", "41134682ce7d3b7a66f63cfabd20fb81a6e1cdb1", "78edf6a49c26ebbb040c44b75365c432a8ff5737", "46a337fbb27a9960cd8a956afc29066af84a9c5e", "d7c5006abfc83b664e86ce8e193ec511de4b9492" ], "journalName": "", "journalPages": "633-647", "journalVolume": "", "outCitations": [ "73062e44e8a4b3d80c0a98e009c9604dc90d3911", "6005fdb7813e0f07d90d6ed8e7beecd733ef4d04", "074067a1412e784796bab7d286019b7b6a5a62a4", "3235b27709b4c9aaad5d34b4f012ebe8581d9d86", "047fdd696fdabd7b01af7d09c459e6abe7793170", "5ddc6a439cdc9b4eaebdad8c20976f1f0be4523f", "37791336941a0d954e4a98c96b1a66ca7be43eb2", "c41810e0514c0b2b92ead025c260b27251ccc054", "0fcb4d0f6ceea5121ae922b9fcdc949c17c00ca7", "1726ad07525a17d1cd541a3167d3eb4f054de1aa", "0c8f20da78ebc7891141c175fecb7a5c026f3e7d", "03f5501e776ca19515df15e11f216265f3afc43d", "444b21f886c21d989dbdd42b23420475460c4cf6", "aa52fc8d7df97fb44dc2a7d90859a7873a942e07", "5e4196dd5cb7856cde63cf06bb8c9caa4adf29de", "5ff060008d4d868634ec7408bd952a751eb0461c", "0356047e6f9a42ffe5cb7bc3e64b22205fdac918", "1f30a873c5e3e167884ec4af03629f5f568b3a25", "6acd2cce7d04bba4af841a5d524a69cc423e671e", "1c8a51d75dc13aa8646d62ef695a1d238996447f", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "1e42755e05be2848607807a9d45de4fb4194fde9" ], "paperAbstract": "A central concern for an optimizing compiler is the design of its intermediate representation (IR) for code. The IR should make it easy to perform transformations, and should also afford efficient and precise static analysis. In this paper we study an aspect of IR design that has received little attention: the role of undefined behavior. The IR for every optimizing compiler we have looked at, including GCC, LLVM, Intel's, and Microsoft's, supports one or more forms of undefined behavior (UB), not only to reflect the semantics of UB-heavy programming languages such as C and C++, but also to model inherently unsafe low-level operations such as memory stores and to avoid over-constraining IR semantics to the point that desirable transformations become illegal. The current semantics of LLVM's IR fails to justify some cases of loop unswitching, global value numbering, and other important \"textbook\" optimizations, causing long-standing bugs. We present solutions to the problems we have identified in LLVM's IR and show that most optimizations currently in LLVM remain sound, and that some desirable new transformations become permissible. Our solutions do not degrade compile time or performance of generated code.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062343", "http://www.cs.utah.edu/~regehr/papers/undef-pldi17.pdf", "http://www2.cs.utah.edu/~regehr/papers/undef-pldi17.pdf", "http://sf.snu.ac.kr/gil.hur/publications/undefllvm.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/undef-pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d4fdf953dc3f79fd760b317bca228fe80ec9386", "sources": [ "DBLP" ], "title": "Taming undefined behavior in LLVM", "venue": "PLDI", "year": 2017 }, "2d596b9cfd4ab6b311fc2b1feb0298e236029439": { "authors": [ { "ids": [ "2001877" ], "name": "Ben Hu" }, { "ids": [ "1694557" ], "name": "Huaimin Wang" }, { "ids": [ "37575268" ], "name": "Pengfei Zhang" }, { "ids": [ "1689910" ], "name": "Bo Ding" }, { "ids": [ "2712459" ], "name": "Huimin Che" } ], "doi": "10.1109/CLOUD.2017.23", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.23", "entities": [ "Cloud computing", "Computation", "Outsourcing", "Quality of service", "Response time (technology)", "Robot", "Warez" ], "id": "2d596b9cfd4ab6b311fc2b1feb0298e236029439", "inCitations": [ "0bbc2a8925b3614f08edfb388654f5245654a887" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "114-121", "journalVolume": "", "outCitations": [ "6b61e008da191311ec47f73e81c8f58bdf4d7d87", "641563122a6cbc29a30d488dd583d3cf6cb2605a", "d45eaee8b2e047306329e5dbfc954e6dd318ca1e", "3dd2f70f48588e9bb89f1e5eec7f0d8750dd920a", "9e0f01c9a3a2530aa7e7044a4f18aec5f80265df", "19db767cbca3b22f8569a91818cc089ff46c4c44", "03d4eccb7c43d93bfa1fe3b7693f42b0ccabf064", "5e6773329f0762d2c1d7862d6a562eb216d66935", "b4c7f8aa611067ccdd5715ad8dd658820a67c770", "3aef6c3907e4eae24c3cce47737940822c15d0de", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "533d8c11c8e533c34774eff76fcbab6559ee67e4", "000ec83a08163544bac25d21747c1c4dcf28cd11", "b53cb88e1b966970f6d8a44010d8c1eafcbe2154", "b70febbafb547f88901929f9f9fa9ed3729f9c44", "09ea2e8942f624a6dad4c96f3c62320fbb0db7a8", "12bf156b71ed9aacbb640d5cbef709626b560e71" ], "paperAbstract": "Many robotic tasks require heavy computation, which can easily exceed the robot's onboard computer capability. A promising solution to address this challenge is outsourcing thecomputation to the cloud. However, exploiting the potential ofcloud resources in robotic software is difficult, because it in-volves complex code modification and extensive (re)configurationprocedures. Moreover, quality of service (QoS) such as timeliness, which is critical to robot's behavior, have to be considered. Inthis paper, we propose a transparent and QoS-aware softwareframework called Cloudroid for cloud robotic applications. Thisframework supports direct deployment of existing robotic soft-ware packages to the cloud, transparently transforming theminto Internet-accessible cloud services. And with the automati-cally generated service stubs, robotic applications can outsourcetheir computation to the cloud without any code modification. Furthermore, the robot and the cloud can cooperate to maintainthe specific QoS property such as request response time, evenin a highly dynamic and resource-competitive environment. Weevaluated Cloudroid based on a group of typical robotic scenariosand a set of software packages widely adopted in real-worldrobot practices. Results show that robots capability can beenhanced significantly without code modification and specific QoSobjectives can be guaranteed. In certain tasks, the "cloud + robot" setup shows improved performance in orders of magnitudecompared with the robot native setup.", "pdfUrls": [ "https://arxiv.org/pdf/1705.05691v1.pdf", "https://doi.org/10.1109/CLOUD.2017.23", "http://arxiv.org/abs/1705.05691" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d596b9cfd4ab6b311fc2b1feb0298e236029439", "sources": [ "DBLP" ], "title": "Cloudroid: A Cloud Framework for Transparent and QoS-Aware Robotic Computation Outsourcing", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "2d6222ee8f5ac92b9ca9bc5b7eb0341d1e3c9c58": { "authors": [ { "ids": [ "3448730" ], "name": "Moein Khazraee" }, { "ids": [ "2883723" ], "name": "Lu Zhang" }, { "ids": [ "1684673" ], "name": "Luis Vega" }, { "ids": [ "38303344" ], "name": "Michael Bedford Taylor" } ], "doi": "10.1145/3037697.3037749", "doiUrl": "https://doi.org/10.1145/3037697.3037749", "entities": [ "Application-specific integrated circuit", "Bitcoin", "Cloud computing", "Computation", "Curiously recurring template pattern", "Data center", "Deep learning", "Field-programmable gate array", "Graphics processing unit", "Litecoin", "Mathematical optimization", "Michael Jackson's Moonwalker", "Program optimization", "Semiconductor device fabrication", "Total cost of ownership" ], "id": "2d6222ee8f5ac92b9ca9bc5b7eb0341d1e3c9c58", "inCitations": [ "758f5ddae04664aa7338d9d394a95805c36baa2a", "2773b163d84fbd828561424ff3582ece97c363cf", "2f322ccb1e59daf5348f376a410e2ada22d55f7d" ], "journalName": "", "journalPages": "511-526", "journalVolume": "", "outCitations": [ "006112e1bd750a84bcddeae60abc730633e92932", "fd12d8d785de4fb1b0d6704a52161f4fa3c34088", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "1ea92529e75fe90ee1923b95d0fa8ad37ac1ed7c", "23e661acc510afd4398b7d21bd63ea5e89bc8d45", "55bc52bbec8972d62874bcbe169dac573b57d1df", "b04c9e851ae605592d693aa65f0d753b8af08feb", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "08632fe2b934ed15d3499e7321282c81adc2c390", "0f892fa9574f24bc7b50fed94e0abbd84883c2dc", "922f33b147b8e38d442b9171c463913a3c211b5a", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "2ddfc36a7b66ed0dae7a22b8484e592cb66145b7", "60cfe41fd68644fb19cba99babae694a2acacc17", "114b5aa30bcc7c5207bf22716d59fdc071159ae7", "d9043a6c844905687ac72054d83d7680a82ece9d", "269c24a4aad9be622b609a0860f5df80688c2f93", "d65bf1f6a8fb3bae704ea58d72bc610912edf7ee", "0789af812af3aebac3853dc2745f3847d503fa02", "8deafe947207eab416d8791f2e750289bd9ac73b", "a3bd48b23dd72c1fe592472c2a8341e14632e5eb", "3174b10d5efe0987ba6940a4e66943c2c3cbb3eb", "20e29444a28a763a45f9d9860ec4cd210ea5f084", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "8a155626c578c8daaf2c3d67a16c42e9f386ebf3", "51854f6133cd8d890beb8576e6f0b44a33916803", "c024820f93ef8f8ad74163fb9904c52ea4bbdaad", "2dfd01f873a68d61726953af6caae76754520c09", "3327a80c6b047bcf2dadc09dc425f4398fecc5a5", "1e40d8b7ccac6afbfdf5c89f203f368735e051f9", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "092a1cf971fb8359d3293004c6f1de82f05f3afb", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "3df5013fac2cada7b807ff1f45f87bbaf4bacc18", "48eff36f496105b110bcd7fff7a218a8a170e76b", "8fd98b77e558d05ee73e3cff0839545927e536be", "0ef58b9efde79db2914fec02a1f40288f176a66d", "b7eb530679645909a0bc1eac706a790474c86cd5", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "00e3147cbefd4a5b77943fe0df4aa09c48ad6527", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "4954fa180728932959997a4768411ff9136aac81", "9bf711ca7e5a58d10b173b96e8d604d192aeec88", "49b4094f2c313a92da4461572c0bef80b0d7d649", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "1f2a00758fc38d764b05adb76110500870610bc8", "1dec63e2a929bb3be57906bfef94f38e969cfbd9", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d" ], "paperAbstract": "Cloud services are becoming increasingly globalized and data-center workloads are expanding exponentially. GPU and FPGA-based clouds have illustrated improvements in power and performance by accelerating compute-intensive workloads. ASIC-based clouds are a promising way to optimize the Total Cost of Ownership (TCO) of a given datacenter computation (e.g. YouTube transcoding) by reducing both energy consumption and marginal computation cost.\n The feasibility of an ASIC Cloud for a particular application is directly gated by the ability to manage the Non-Recurring Engineering (NRE) costs of designing and fabricating the ASIC, so that it is significantly lower (e.g. 2X) than the TCO of the best available alternative.\n In this paper, we show that technology node selection is a major tool for managing ASIC Cloud NRE, and allows the designer to trade off an accelerator's excess energy efficiency and cost performance for lower total cost.\n We explore NRE and cross-technology optimization of ASIC Clouds for four different applications: Bitcoin mining, YouTube-style video transcoding, Litecoin, and Deep Learning. We address these challenges and show large reductions in the NRE, potentially enabling ASIC Clouds to address a wider variety of datacenter workloads. Our results suggest that advanced nodes like 16nm will lead to sub-optimal TCO for many workloads, and that use of older nodes like 65nm can enable a greater diversity of ASIC Clouds.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037749" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d6222ee8f5ac92b9ca9bc5b7eb0341d1e3c9c58", "sources": [ "DBLP" ], "title": "Moonwalk: NRE Optimization in ASIC Clouds", "venue": "ASPLOS", "year": 2017 }, "2d71b38bd26f6c58155a9b85d9c0fe7e4f09d942": { "authors": [ { "ids": [ "2300090" ], "name": "Amrita Mathuriya" }, { "ids": [ "37082461" ], "name": "Ye Luo" }, { "ids": [ "3444715" ], "name": "Anouar Benali" }, { "ids": [ "2692314" ], "name": "Luke Shulenburger" }, { "ids": [ "2718090" ], "name": "Jeongnim Kim" } ], "doi": "10.1109/IPDPS.2017.33", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.33", "entities": [ "B-spline", "Blocking (computing)", "Blue Gene", "Cache (computing)", "Cache coherence", "Central processing unit", "Coherence (physics)", "Coprocessor", "Manycore processor", "Monte Carlo", "Parallel computing", "Quantum", "Quantum Monte Carlo", "Quasi-Monte Carlo method", "Run time (program lifecycle phase)", "SIMD", "Scalability", "Shared memory", "Simulation", "Speedup", "Thread (computing)", "Throughput", "Xeon Phi" ], "id": "2d71b38bd26f6c58155a9b85d9c0fe7e4f09d942", "inCitations": [ "625b567202a6bef1d15c15d95c4d1cf743fc34c5", "82d3daba05a53421e52e0f44c8ecf17f8d28954f" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "213-223", "journalVolume": "", "outCitations": [ "f8e9b050c93af6dea582563f61b6460b590bc3af", "038c7b73b3699bf3fac8320681471f1c76af9cf7", "4f805391383b20dbc9992796d515029884ba468b", "8be19ccf819adef7259384d08a8aeed01d3ce387", "567a367a0c6656f3dd0d00759870c0aea59c2291", "f84210fdc29c2c535b9e294b037a27d5d29425bc", "1d471e865cd90bcfda44e51c1a277ab84a35e48a", "9b104131cdce7ab550d36614898ee3dfb5d6ded6", "182bb431401e4e748c27cf9786d1ab00d19a8d7e", "3b69e19aced65dfea57105faa8245a0413b932d3", "c085658c9e0cc73444bdaa22390798ccc1310742", "27e3e61e0f28301bce66d029fa1257ed946ead8b", "ac658261691d7d76f9763ab43e0fcedc499898d0" ], "paperAbstract": "B-spline based orbital representations are widely used in Quantum Monte Carlo (QMC) simulations of solids, historically taking as much as 50% of the total run time. Random accesses to a large four-dimensional array make it challenging to efficiently utilize caches and wide vector units of modern CPUs. We present node-level optimizations of B-spline evaluations on multi/many-core shared memory processors. To increase SIMD efficiency and bandwidth utilization, we first apply data layout transformation from array-of-structures to structure-of-arrays (SoA). Then by blocking SoA objects, we optimize cache reuse and get sustained throughput for a range of problem sizes. We implement efficient nested threading in B-spline orbital evaluation kernels, paving the way towards enabling strong scaling of QMC simulations. These optimizations are portableon four distinct cache-coherent architectures and result in up to 5.6x performance enhancements on Intel Xeon Phi processor 7250P (KNL), 5.7x on Intel Xeon Phi coprocessor 7120P, 10x on an Intel Xeon processor E5v4 CPU and 9.5x on BlueGene/Q processor. Our nested threading implementation shows nearly ideal parallel efficiency on KNL up to 16 threads. We employ roofline performance analysis to model the impacts of our optimizations. This work combined with our current efforts of optimizing other QMC kernels, result in greater than 4.5x speedup of miniQMC on KNL.", "pdfUrls": [ "https://arxiv.org/pdf/1611.02665v1.pdf", "https://doi.org/10.1109/IPDPS.2017.33", "http://arxiv.org/abs/1611.02665" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d71b38bd26f6c58155a9b85d9c0fe7e4f09d942", "sources": [ "DBLP" ], "title": "Optimization and Parallelization of B-Spline Based Orbital Evaluations in QMC on Multi/Many-Core Shared Memory Processors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2d80fe39d1e254e26e1e9915e2196559a2d65024": { "authors": [ { "ids": [ "7280096" ], "name": "Fangfan Li" }, { "ids": [ "3236398" ], "name": "Abbas Razaghpanah" }, { "ids": [ "3320568" ], "name": "Arash Molavi Kakhki" }, { "ids": [ "28931849" ], "name": "Arian Akhavan Niaki" }, { "ids": [ "2450059" ], "name": "David R. Choffnes" }, { "ids": [ "36757962" ], "name": "Phillipa Gill" }, { "ids": [ "1729928" ], "name": "Alan Mislove" } ], "doi": "10.1145/3131365.3131376", "doiUrl": "https://doi.org/10.1145/3131365.3131376", "entities": [ "Blocking (computing)", "Communication endpoint", "Deep packet inspection", "Malware", "Middlebox", "Net neutrality", "Proxy server", "Reverse engineering", "Run time (program lifecycle phase)", "Testbed", "Traffic classification" ], "id": "2d80fe39d1e254e26e1e9915e2196559a2d65024", "inCitations": [ "3aff512047e93f0435402f04ebcd007f33e44c36" ], "journalName": "", "journalPages": "128-141", "journalVolume": "", "outCitations": [ "4f3834bce798782623550f0584361672b45c837b", "dda7b731f7a5aec049414371ca0d65518a74cb96", "605ed83a6d1f4eaf995e85830f373923b11d6c13", "fa4726dfbbb48530d849fecf7ba8c95552cd5d48", "2f74156349d454fd054f18f23c305613df19d28b", "754d3aa641d9da8e50796c3c4015fa064f10c1ba", "c82e79459a998f9048aa8d902142a0d6e1c69651", "62a13d8ce14fe16365862828122e08c944aa1c65", "4c7a5b7d4067721079789156f4fa692934885334", "423dccf69f9b5f36f109b94ae66a058c26529bc8", "77eeb3832252feae78900cc398b37d6f409c535a", "269b2257400306de3e4ef270982ea645b94ac7f7", "b85b61ef16d84fedd5e90714c5df51b0c0f10348", "271aa89b46e1a6fe3722c6fbf6ec268a7ce7973e", "1f5af933197d5e935ca035bcf4899f9beab1af9d", "1e8c01cf85a1fb680e195e0ac6c9a7bd17268787", "3aff512047e93f0435402f04ebcd007f33e44c36", "9121e514167b45cd7defa4d9439435a2bf948921", "4049b2ef1f8d66d01cf0fcb99635d8aa9f78f20e", "791382f7dc39154ec39ea249493d5f653b739df4", "00a9446982911cbd96a127f70976d39ecaaaf306", "904c6d870b994896515d5de7b292d1143e3f482b", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "1a29dd17b602bbbb487285fc6aa5fa2bb9bf8649", "1708eba3482a2bc755f405ef9446914f82a321ad", "119af27b7fbcd4f2c224e253e6337b35fa1ca7a5", "b77fa8a2c03c234c79429416596046066905e459", "52fc20fd7ca60152c413073edbe891f77c0e57fd", "38e32bda21fa83020a77efd8c488a81cbfe34827", "38e9167b47364a57b3d8f8af39c6304c7570e50b", "03be16fd2899620b29cbc45d473bb050dd25eab9", "15037a71328f065ae8e8034958b9aad15c7a79c2", "294bdb204d649cae0823760095dffdd32f6c837e", "3647b277726dab8826a518d92d4728088ef7989b", "7ce530db4cb2d8f048bbc1cb3b197776095e345b", "64ffe7ac4c383723e3148d297c4139407e75c39e", "680c4a69a063c2b7c26df3a0ff717d7a61ad9e08" ], "paperAbstract": "Middleboxes implement a variety of network management policies (e.g., prioritizing or blocking traffic) in their networks. While such policies can be beneficial (e.g., blocking malware) they also raise issues of network neutrality and freedom of speech when used for application-specific differentiation and censorship. There is a poor understanding of how such policies are implemented in practice, and how they can be evaded efficiently. As a result, most circumvention solutions are brittle, point solutions based on manual analysis.\n This paper presents the design and implementation of lib•erate, a tool for automatically identifying middlebox policies, reverse-engineering their implementations, and adaptively deploying custom circumvention techniques. Unlike previous work, our approach is application-agnostic, can be deployed unilaterally (i.e., only at one endpoint) on unmodified applications via a linked library or transparent proxy, and can adapt to changes to classifiers at runtime. We implemented a lib•erate prototype as a transparent proxy and evaluate it both in a testbed environment and in operational networks that throttle or block traffic based on DPI-based classifier rules, and show that our approach is effective across a wide range of middlebox deployments.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final61.pdf", "https://www.ccis.northeastern.edu/wp-content/uploads/2017/10/poster-liberate.pdf", "https://people.cs.umass.edu/~phillipa/papers/imc2017_liberate_paper.pdf", "http://doi.acm.org/10.1145/3131365.3131376" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d80fe39d1e254e26e1e9915e2196559a2d65024", "sources": [ "DBLP" ], "title": "Lib\u2022erate, (n): a Library for Exposing (traffic-classification) Rules and Avoiding Them Efficiently", "venue": "IMC", "year": 2017 }, "2d95653f4cd2a227ae2ffc1a745570322f53ec57": { "authors": [ { "ids": [ "2479128" ], "name": "Yusuke Nagasaka" }, { "ids": [ "2763806" ], "name": "Akira Nukada" }, { "ids": [ "1696166" ], "name": "Satoshi Matsuoka" } ], "doi": "10.1109/ICPP.2017.19", "doiUrl": "https://doi.org/10.1109/ICPP.2017.19", "entities": [ "Algorithm", "Central processing unit", "Double-precision floating-point format", "Graphics processing unit", "Hash table", "Input/output", "Library (computing)", "Limiter", "Locality of reference", "Matrix multiplication", "Multigrid method", "Pascal (microarchitecture)", "Performance Evaluation", "Preconditioner", "Shared memory", "Single-precision floating-point format", "Sparse matrix", "The Matrix" ], "id": "2d95653f4cd2a227ae2ffc1a745570322f53ec57", "inCitations": [ "232e641a8b5f550c436af6336ee63e1cd771e073", "58d93090d4d1b7b028f29a92f01833429f9938b0", "fa3a07acd3937a0cc91e852a7cf52ef3c8c03cd2" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "101-110", "journalVolume": "", "outCitations": [ "cbb557ccf729e043cbb9ef6dd709bf75c9f64cdd", "53f31f82557d66b6de06ed749f426f49a78d5435", "b2da7e41dda6896444d8444b745f9c050a67dc4c", "6a525f07e0cf7cd4c8b617a0ea8d5f0a9e7093e2", "0987c923a5fb79934ff47ecdafdf858a51143750", "87601a4866373c63b4fd070214cab8b40b50058c", "7fb901192a4c7fa7c8c2396454b579414de3954b", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "5e762186f9710c3e357195d22488b5616d574da6", "b9ccc4c453db47515ea6a429b0261b3936d3cf4d", "878ea676658817b48777406f915e001a0d1a339d", "0c9a56eb4f45d3969943e8cff74593e9c6c5f549", "1cd294f3bcd647c8a2b2bbce47e827a8ece8b973", "2e0ee957a4dea94ed706028d4206e0c2fe369de7", "136ffe66f6bb69c5ad2537531373220c2c704b57", "477a2e92d2fd2ca56fd989d42de58248f1ce04ae" ], "paperAbstract": "Sparse general matrix-matrix multiplication (SpGEMM) is one of the key kernels of preconditioners such as algebraic multigrid method or graph algorithms. However, the performance of SpGEMM is quite low on modern processors due to random memory access to both input and output matrices. As well as the number and the pattern of non-zero elements in the output matrix, important for achieving locality, are unknown before the execution. Moreover, the state-of-the-art GPU implementations of SpGEMM requires large amounts of memory for temporary results, limiting the matrix size computable on fast GPU device memory. We propose a new fast SpGEMM algorithm requiring small amount of memory and achieving high performance. Calculation of the pattern and value in output matrix is optimized by using GPU's on-chip shared memory and a hash table. Additionally, our algorithm launches multiple kernels running concurrently to improve the utilization of GPU resources. The kernels for the calculation of each row of output matrix are chosen based on the number of non-zero elements. Performance evaluation using matrices from the Sparse Matrix Collection of University Florida on NVIDIA's Pascal generation GPU shows that our approach achieves speedups of up to x4.3 in single precision and x4.4 in double precision compared to existing SpGEMM libraries. Furthermore, the memory usage is reduced by 14.7% in single precision and 10.9% in double precision on average, allowing larger matrices to be computed.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d95653f4cd2a227ae2ffc1a745570322f53ec57", "sources": [ "DBLP" ], "title": "High-Performance and Memory-Saving Sparse General Matrix-Matrix Multiplication for NVIDIA Pascal GPU", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "2d98fc1f96e5bded2383f194f884d0865372e436": { "authors": [ { "ids": [ "38880105" ], "name": "Muhammad Shoaib Bin Altaf" }, { "ids": [ "32548473" ], "name": "David A. Wood" } ], "doi": "10.1145/3079856.3080216", "doiUrl": "https://doi.org/10.1145/3079856.3080216", "entities": [ "Dennard scaling", "Encryption", "Graphics processing unit", "Hardware acceleration", "High- and low-level", "Programmer", "Scalability", "User interface design" ], "id": "2d98fc1f96e5bded2383f194f884d0865372e436", "inCitations": [ "5942a6955880a7e575427683c7278c371ec2c5d4" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "375-388", "journalVolume": "", "outCitations": [ "0f892fa9574f24bc7b50fed94e0abbd84883c2dc", "0c1d5edab8931a19057a46685ee8c8c58a21489a", "02ca058f767d2706969255166d95d9d07cfdc614", "60cfe41fd68644fb19cba99babae694a2acacc17", "1ef593d1d50035e9abf98ad64da9095b2ecde95b", "1d286a264b233125b681e522e8f5fed596a8608c", "fb401b959710b71538a0cda8fd15c52718691e08", "f9a46020bd09cf19dbee24af0cd0ec6c3a2ea082", "1aa978bdf1f66952171b4b176c9200f1a286b842", "082c182f43333f2276ba505a896748a641aaeaaf", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "1087e2e1244665c9574ab5914ae13c6c88bcc096", "d33880a01318ec992071968c25059763146e6343", "772eb2b4334b1c0b905b0cca47d84fb8c2139e23", "370488843f80120797e1f0af22e9fdb0152ff657", "092217c2267f6e0673590aa151d811e579ff7760", "dee49d30a19f392ca9d002720a554800fa16d19e", "251d485f4a1e987886b92342482e613ec13b4d49", "0a2af2773ca4fcbd22cd7580d29ac7739bcf028c", "c50da84ed015168bd223a3234bec6cb750ee7c71", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "679511fd4d0fa7fc889de0c3c50ecee80d9996d3", "74463aee2d9d084e10c0cb4ee72a80eb6a641803", "c3c244e6a07810e738c8eb3c10d652b7da0267d6", "2c4198fa906221a81435a240c97f861044f367a4", "34575d258392298a871fcf58d9ed2b7ac6f5d4ad", "03b2e534532e9558e560df0bed74976b8f48c1a5", "0c63bf7cc53a745e18c384cf1de34999c3ffbff7", "0dea8362e54b2a6ad06aa1cf3aa09dcc60eef847", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "c42d901537023af85755aad6ccb81c536f87e703", "9a9f8973003098ad6065a7848a78cc9bf60926a9", "6757659aeba247db2a35691ee3b4c029e1a2dcf4", "320a6faa396f27f6f83b22ded48944ffd574fa1e", "68c3d3fca5c7bd172832f480a92e98d106da5c34", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "06291eb02688431e25b39265a8c42fc4270c9604", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "24251f02c34f32b1dd96572a1d984c4463a26a10", "562836843897bcb98801838b44f3e5ff09fe212a", "3dfd3dfaac573c90e0eae54630881a2b412ee402" ], "paperAbstract": "With the end of Dennard scaling, architects have increasingly turned to special-purpose hardware accelerators to improve the performance and energy efficiency for some applications. Unfortunately, accelerators don't always live up to their expectations and may under-perform in some situations. Understanding the factors which effect the performance of an accelerator is crucial for both architects and programmers early in the design stage. Detailed models can be highly accurate, but often require low-level details which are not available until late in the design cycle. In contrast, simple analytical models can provide useful insights by abstracting away low-level system details.\n In this paper, we propose LogCA---a high-level performance model for hardware accelerators. LogCA helps both programmers and architects identify performance bounds and design bottlenecks early in the design cycle, and provide insight into which optimizations may alleviate these bottlenecks. We validate our model across a variety of kernels, ranging from sub-linear to super-linear complexities on both on-chip and off-chip accelerators. We also describe the utility of LogCA using two retrospective case studies. First, we discuss the evolution of interface design in SUN/Oracle's encryption accelerators. Second, we discuss the evolution of memory interface design in three different GPU architectures. In both cases, we show that the adopted design optimizations for these machines are similar to LogCA's suggested optimizations. We argue that architects and programmers can use insights from these retrospective studies for improving future designs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080216", "http://research.cs.wisc.edu/multifacet/papers/isca17_logca_lightning.pdf", "http://research.cs.wisc.edu/multifacet/papers/isca17_logca_slides.pdf", "http://research.cs.wisc.edu/multifacet/papers/isca17_logca.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2d98fc1f96e5bded2383f194f884d0865372e436", "sources": [ "DBLP" ], "title": "LogCA: A high-level performance model for hardware accelerators", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2db25d8dc8b44081b94efd2e092e869aa142f85a": { "authors": [ { "ids": [ "2677436" ], "name": "Herv\u00e9 Yviquel" }, { "ids": [ "2006159" ], "name": "Guido Araujo" } ], "doi": "10.1109/ICPP.2017.44", "doiUrl": "https://doi.org/10.1109/ICPP.2017.44", "entities": [ "Access control", "Amazon Elastic Compute Cloud (EC2)", "Amazon Web Services", "Benchmark (computing)", "C++", "Cloud computing", "Computation", "Computation offloading", "Computer cluster", "Control system", "Gigabyte", "Graphics processing unit", "LLVM", "MapReduce", "Message Passing Interface", "Microsoft Azure", "OpenMP", "Programmer", "Programming model", "Scala", "Yet another" ], "id": "2db25d8dc8b44081b94efd2e092e869aa142f85a", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "352-361", "journalVolume": "", "outCitations": [ "99b2348bc0a4425294dedba612de72cef0b63402", "5ef48cdf259b244314340e37db710373524046ca", "5f3f9223c5c9f896be099bc177929febad508407", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "14e0d2bdfb3fca202b3fc0e19a12d3082f81b931", "74d3873793bf5a954f904db6d3f9a6e2f5819605", "cc9eb8497c1d45c26ce208a4c7785fc30a78a530", "0541d5338adc48276b3b8cd3a141d799e2d40150", "c78b35807cbc4873b6019c7f27e1d3ad5d61b2a7", "6123cf8a016086e48be535db6c36108bb1617cf9", "562836843897bcb98801838b44f3e5ff09fe212a", "2194c3460ab71f3826db00b045b2ae590c753319", "27bd1c009a54eb935f6138d0b0f61bb9a9d874fb", "f4dff66ba8f2338d118f379f2eff1410feb57ce6", "3664afc0363ea024446997896ae31df145c8338c", "ddc1d7b5bffff2a2579b1aca74cb9b6f69a1f0e2", "fcee5dcca592ff2ed4aaca0baa65bbd719793f52", "10aef22f272fc059200a8389cb653ddfb0b5661b", "16a74ec035f5cb660e839abf1ac076bea6469989", "7dc44454887df37b83dca85e9192e98f6e02b50b", "23d25aebea64861bb2d1277f7e90055cc64fd881", "2c2cfbec94307fc92192e5a4be0d0731799f9bf9", "f9ac0037556f0f10819c7241e62ca8e51021ee81", "38c48d4a31ab050c6e750cdae21e00421172f694", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "8b5c166f9bbc6efaf929828c4d11f1a494673c1b", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "dd5e7b04cb142a6a34ad3eff43dac7326cbe8247", "2660dcf5bd16d14862a7bbb241fa4d85ae34327f", "02cbb22e2011938d8d2c0a42b175e96d59bb377f", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "2d66dac85b819503ce6d311d37770e31bdf36692" ], "paperAbstract": "Computation offloading is a programming model in which program fragments (e.g. hot loops) are annotated so that their execution is performed in dedicated hardware or accelerator devices. Although offloading has been extensively used to move computation to GPUs, through directive-based annotation standards like OpenMP, offloading computation to very large computer clusters can become a complex and cumbersome task. It typically requires mixing programming models (e.g. OpenMP and MPI) and languages (e.g. C/C++ and Scala), dealing with various access control mechanisms from different clouds (e.g. AWS and Azure), and integrating all this into a single application. This paper introduces the cloud as a computation offloading device. It integrates OpenMP directives, cloud based map-reduce Spark nodes and remote communication management such that the cloud appears to the programmer as yet another device available in its local computer. Experiments using LLVM, OpenMP 4.5 and Amazon EC2 show the viability of the proposed approach and enable a thorough analysis of the performance and costs involved in cloud offloading. The results show that although data transfers can impose overheads, cloud offloading can still achieve promising speedups of up to 86x in 256 cores for the 2MM benchmark using 1GB matrices.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.44" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2db25d8dc8b44081b94efd2e092e869aa142f85a", "sources": [ "DBLP" ], "title": "The Cloud as an OpenMP Offloading Device", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "2dc0a89e159e33e49399b7a1eb6f503b58dd7d74": { "authors": [ { "ids": [ "1883826" ], "name": "Shen Wang" }, { "ids": [ "1959037" ], "name": "Lifang He" }, { "ids": [ "1678185" ], "name": "Bokai Cao" }, { "ids": [ "2828701" ], "name": "Chun-Ta Lu" }, { "ids": [ "1703117" ], "name": "Philip S. Yu" }, { "ids": [ "1679791" ], "name": "Ann B. Ragin" } ], "doi": "10.1145/3097983.3097988", "doiUrl": "https://doi.org/10.1145/3097983.3097988", "entities": [ "Bioinformatics", "Bioinformatics", "Convolutional neural network", "Experiment", "Feature learning", "Linear model", "Nonlinear system", "Supervised learning", "Unsupervised learning" ], "id": "2dc0a89e159e33e49399b7a1eb6f503b58dd7d74", "inCitations": [ "b3db66ddc80976b502379367e342dadb5f63abbe", "6e0c83a6cf058067298bf3176a81f680b4cf29f0" ], "journalName": "", "journalPages": "475-484", "journalVolume": "", "outCitations": [ "accb538090f1f90606b55262a0aa5d4dc4dd44ce", "4d0a6b528bdc4d6c272b916d586f14dcc4f030b2", "bd88472c33a76c4256c16c50a72aa9a88be1b5ad", "03d61a33796234b8bae5ac38de9b26c1c5ed9e2f", "05ea7881fe9be1bb81803c9debacbc45c8b974a3", "5ac1e10eabeaba2bcd2be26e32149d2396c9746e", "0b99d677883883584d9a328f6f2d54738363997a", "776ddfd73d881ddce55cd4144beae1f1cae90426", "6e7013f7e3801a5b56e12fc3f8dc1656f5d010bb", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "9515417f17537d1be2711c10db48224498571c84", "25a1ede91368f998f44945ff584aac1c6d1b9839", "013cd20c0eaffb9cab80875a43086e0c3224fe20", "a0b1a65cf7dbceba8973212e3b43643319b555d9", "9f3a772c08e89557e383bf1c5afea69331ac62c2", "061356704ec86334dbbc073985375fe13cd39088", "2d2a22f1f9eae9188f3d43254daa2d5b7f3a2470", "18862760ac708a589afa5848ab55931996db1b28", "6f390eee4c9a082e02843fb34046f653624e9b76", "6e7dadd63455c194e3472bb181aaf509f89b9166", "26b6b834f57f2b2986721520b16a5edcca88740c", "54d644e24172384e4b9875e804c878a74c2168a7", "55e6eb5ec219e7aa71018afddaf80b35aff53d0e", "b4de97dfef76a76eff34bf9ac6f72aabe0ca779f", "65ad0e876216ea034b7958f016456e32666bc5c6", "2a1471dd22a2585b5855b02b6886958aa827c941", "10854c4038e13219f77b4175f146b0f511685d02", "09827aed5e6e193a9b2db40ef13efa62329aa1d3", "14f2bc1234ed1418790262f56488dc4447c78bc8", "05aba481e8a221df5d8775a3bb749001e7f2525e", "45a916500ce98c8d018c13de4c1d5c53130e8a72", "4876f705c2d2b644b7f19f069834b2873b1f94e1", "63664c836ed5134a74aedc9d987a716290da0327", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "8b1da877a3dbc42426b4aa58822e3bcafd39d6ab", "3783fd271a4fa5b65894743c0a6b19a02b268120", "6b613c7e03b069a75c06758dd3ffdef2b9ce742f", "1fe81af049689e6bf5d62be9e12237955ffd5a1b", "1efdad6f91e830fd64306e4625f74191b05ef9c4", "69280c387a12c330ec3acdcd320f262788e157d1", "01fcae344d2edb715bcc63a40b6052c0331741bd", "0e78074a081f2d3a35fbb6f74ba9b7e27e64757b", "4532a6dc17973a44d068ef64d8a40b8070209fbd", "09b8120cbc52e7df46122e8e608146289fddbdfa", "e0336c0d72b3f6a4b7adbc0eb40da95cffda4544", "68151343db72713c87f646546d12c84306109643", "e49ff72d420c8d72e62a9353e3abc053445e59bd" ], "paperAbstract": "Mining from neuroimaging data is becoming increasingly popular in the field of healthcare and bioinformatics, due to its potential to discover clinically meaningful structure patterns that could facilitate the understanding and diagnosis of neurological and neuropsychiatric disorders. Most recent research concentrates on applying subgraph mining techniques to discover connected subgraph patterns in the brain network. However, the underlying brain network structure is complicated. As a shallow linear model, subgraph mining cannot capture the highly non-linear structures, resulting in sub-optimal patterns. Therefore, how to learn representations that can capture the highly non-linearity of brain networks and preserve the underlying structures is a critical problem.\n In this paper, we propose a Structural Deep Brain Network mining method, namely SDBN, to learn highly non-linear and structure-preserving representations of brain networks. Specifically, we first introduce a novel graph reordering approach based on module identification, which rearranges the order of the nodes to preserve the modular structure of the graph. Next, we perform structural augmentation to further enhance the spatial information of the reordered graph. Then we propose a deep feature learning framework for combining supervised learning and unsupervised learning in a small-scale setting, by augmenting Convolutional Neural Network (CNN) with decoding pathways for reconstruction. With the help of the multiple layers of non-linear mapping, the proposed SDBN approach can capture the highly non-linear structure of brain networks. Further, it has better generalization capability for high-dimensional brain networks and works well even for small sample learning. Benefit from CNN's task-oriented learning style, the learned hierarchical representation is meaningful for the clinical task. To evaluate the proposed SDBN method, we conduct extensive experiments on four real brain network datasets for disease diagnoses. The experiment results show that SDBN can capture discriminative and meaningful structural graph representations for brain disorder diagnosis.", "pdfUrls": [ "https://www.cs.uic.edu/~clu/doc/kdd17_SDBN.pdf", "http://doi.acm.org/10.1145/3097983.3097988" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2dc0a89e159e33e49399b7a1eb6f503b58dd7d74", "sources": [ "DBLP" ], "title": "Structural Deep Brain Network Mining", "venue": "KDD", "year": 2017 }, "2dc26e42bdb50be00b3f7affe745c4384ff833be": { "authors": [ { "ids": [ "1708704" ], "name": "Jiajia Li" }, { "ids": [ "40231150" ], "name": "Jee Choi" }, { "ids": [ "2739832" ], "name": "Ioakeim Perros" }, { "ids": [ "1738536" ], "name": "Jimeng Sun" }, { "ids": [ "1771649" ], "name": "Richard W. Vuduc" } ], "doi": "10.1109/IPDPS.2017.80", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.80", "entities": [ "Algorithm", "Collaborative product development", "Computation", "Computational complexity theory", "Data mining", "Memoization", "Scalability", "Space\u2013time tradeoff", "Sparse matrix" ], "id": "2dc26e42bdb50be00b3f7affe745c4384ff833be", "inCitations": [ "dab53f03682789b483822bc521204bfb39ee2458", "0f0bcf003e7de278514dff084487873762b9ffb3", "235d090c8549ff3b353103380313d70e33c47e4e" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1048-1057", "journalVolume": "", "outCitations": [ "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "2d03baec8ac1568e6813aa43d625d552524f977e", "757cb62e3d1c0643c9f83bf57d45e427bd76e235", "280bbaa66095fd6f89999003b802700935fdf77c", "8d4253cb0277a527a2d2a2c76e2ed767eb13b1b5", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "4b3643e5436a8b8430361e021a3c863765bab3fb", "669508257d4621864011252d0423047f98d9329c", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "28f38dbcb0b33ee3d881421d71c235018e8b54aa", "8eae36cabdce7cba7c1fc316596002cd84ed5e95", "0072eb224991ada6fc8a4e2d3465e4a51c0b26bc", "07ed71b436b9adf23f0f93c8e4533461b82e769a", "2e8ab628bc9f256c11c898aa44f049143c74d05d", "38b389580d774ce513284e671ff3bbcef0258de2", "08d82c6eef31d2259dcd5657d413e772e2cee051", "f3f6e7d88a59218a72c621abd0188367c7db3b48", "44ccdebc83766fb6a2016fa58c3c3a337356b79b", "0a5aef2da6166c9b26ecc0a421f6bb5fd586ff97", "6f6f1714af8551f5f18d419f00c8d3411802ee7a", "00f581aca4dd370615fa0ea99e730d6dd42fe347", "33d69e4c30d355193cb2aef0c94d28c6024e545f", "a9653a27052d666b7ed47524871dc9c3a9b92cc4", "255aeb5c2a8eea15db08c617481ddbb35a41bfe4", "07d996f6857341be66e75044addf010e72e93a4a", "70560383cbf7c0dc5e9be1f2fd9efba905377095", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "3166a9f1b284a5311468253f8efd9a35bf100d24" ], "paperAbstract": "Given an input tensor, its CANDECOMP/PARAFAC decomposition (or CPD) is a low-rank representation. CPDs are of particular interest in data analysis and mining, especially when the data tensor is sparse and of higher order (dimension). This paper focuses on the central bottleneck of a CPD algorithm, which is evaluating a sequence of matricized tensor times Khatri-Rao products (MTTKRPs). To speed up the MTTKRP sequence, we propose a novel, adaptive tensor memoization algorithm, AdaTM. Besides removing redundant computations within the MTTKRP sequence, which potentially reduces its overall asymptotic complexity, our technique also allows a user to make a space-time tradeoff by automatically tuning algorithmic and machine parameters using a model-driven framework. Our method improves as the tensor order grows, making its performance more scalable for higher-order data problems. We show speedups of up to 8× and 820× on real sparse data tensors with orders as high as 85 over the SPLATT package and Tensor Toolbox library respectively; and on a full CPD algorithm (CP-ALS), AdaTM can be up to 8× faster than state-of-the-art method implemented in SPLATT.", "pdfUrls": [ "http://fruitfly1026.github.io/static/files/ipdps17-jli-slides.pdf", "http://users.wfu.edu/ballard/SIAM-AN17/li.pdf", "http://fruitfly1026.github.io/static/files/ipdps17-jli.pdf", "https://doi.org/10.1109/IPDPS.2017.80" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2dc26e42bdb50be00b3f7affe745c4384ff833be", "sources": [ "DBLP" ], "title": "Model-Driven Sparse CP Decomposition for Higher-Order Tensors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2dc3ec722948c08987127647ae34a502cabaa6db": { "authors": [ { "ids": [ "8495818" ], "name": "Tara Safavi" }, { "ids": [ "19992758" ], "name": "Chandra Sripada" }, { "ids": [ "2479152" ], "name": "Danai Koutra" } ], "doi": "10.1109/ICDM.2017.50", "doiUrl": "https://doi.org/10.1109/ICDM.2017.50", "entities": [ "Baseline (configuration management)", "Connectivity (graph theory)", "Scalability", "Sparse matrix", "Time complexity", "Time series" ], "id": "2dc3ec722948c08987127647ae34a502cabaa6db", "inCitations": [ "8dcf09644b56f32802fb59796bd777433ee47510", "532c30eccd3629f55c4818bae98223a81126cf5d" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "405-414", "journalVolume": "", "outCitations": [ "7eccdd122c16999fc7f5be74a7e55ab9ac158247", "5f5873a30755c6ca3dfdedcb4bdd6081f1cc792c", "065677238c6dea01d8dfd5f6c2616c1595636f56", "2a43f9e8e9e4d6011a0287b5ef315bd24fd308e1", "6f1fc1a9234b6d29ca359ce0755cf0f616e1198f", "81b1dbb455358b1a8bf80542935a3a75069b120f", "103ac7f316bf8cdad3133b4ce2bbd28d091e7974", "dc9c1353be73c4c29f0c3f5b8a9b4412354fb707", "c96e312c96d92c9900610a98d57fe68701cd1203", "3d6a55c4f9aff16156f4eb4e0caab1543419769e", "241c93797b0a08da3193903b40936cfd1b8e0da4", "ed5528bf8bc7698e92bbe67ac6bf3ec89d21d307", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "2443463b62634a46a064ede7a0aa8002308946b2", "56052b0740f2b6c39c16c22087d7397f1b31a281", "122eddb0391a84eb40bca0370975229919e2e10b", "59cdf849049627e4c30f3bd866e3a7e03e893251", "0b1866b187148fed570809773111549684a0e68b", "5fd5654ae9cb9e452fc81430c1dcdb6786a27849", "323c882e3b79e8812d2186912dfd0e64d06a956c", "00af4d7a9de6f01b9b4e468bd8d63c4d5da6bebd", "05d8001426d1cec4ed962e72a28d777fd73d3ae9", "4c13b31514318a8f019a4f29d0c7c773bef6355b", "cb1dc33a1c6f9e00a1445a7d485fcd5a6f2849f4", "1df7a24940b1c64637cdd9e467204e6a9b875227", "e0336c0d72b3f6a4b7adbc0eb40da95cffda4544" ], "paperAbstract": "Discovering and analyzing networks from non-network data is a task with applications in fields as diverse as neuroscience, genomics, energy, economics, and more. In these domains, networks are often constructed out of multiple time series by computing measures of association or similarity between pairs of series. The nodes in a discovered graph correspond to time series, which are linked via edges weighted by the association scores of their endpoints. After graph construction, the network may be thresholded such that only the edges with stronger weights remain and the desired sparsity level is achieved. While this approach is feasible for small datasets, its quadratic time complexity does not scale as the individual time series length and the number of compared series increase. Thus, to avoid the costly step of building a fully-connected graph before sparsification, we propose a fast network discovery approach based on probabilistic hashing of randomly selected time series subsequences. Evaluation on real data shows that our methods construct graphs nearly 15 times as fast as baseline methods, while achieving both network structure and accuracy comparable to baselines in task-based evaluation.", "pdfUrls": [ "http://web.eecs.umich.edu/~dkoutra/papers/17_scalableNetDiscovery_ICDM.pdf", "https://tsafavi.github.io/assets/icdm2017hashing.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2dc3ec722948c08987127647ae34a502cabaa6db", "sources": [ "DBLP" ], "title": "Scalable Hashing-Based Network Discovery", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2dd2299ddbbd3daa9872efe5f613c1e22d43b4ad": { "authors": [ { "ids": [ "1679146" ], "name": "Cormac Herley" }, { "ids": [ "1748222" ], "name": "Paul C. van Oorschot" } ], "doi": "10.1109/SP.2017.38", "doiUrl": "https://doi.org/10.1109/SP.2017.38", "entities": [ "Computer security" ], "id": "2dd2299ddbbd3daa9872efe5f613c1e22d43b4ad", "inCitations": [ "29f5c3ee46d226bed4aec326c9d1081b957e7e69", "f10ea2c2bd1bfe3442e4f60b57eb429afb0f4edd", "1ef59af394e38d393a49a62162c842726c813f36", "793167a69e0f0a5b9621519eac4457a66bbf07d5", "7a3dd28b462da24d583c85bcb3a10e7bb4feffd6", "381397d7dfe7c5040cbd5616aee85cf339465a99" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "99-120", "journalVolume": "", "outCitations": [ "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "ccdf1011cb4ab34be1197c3f405feccdf2657f91", "52be8ea875492b8fa8828422a17d482aca89c6e4", "697bcaae27143c11d98a526a6d855f713d6ccbfa", "8b6a22b53e9ab50d29c804311e9151f09a8e7243", "fca560f900490f7fdadd4a031310a5288bcfec04", "fd3e081ff6e0f06f9130b0160d6239514b9b13a3", "e1a7f6a879bc68f58bd8af296147f4d920d871ef", "514dac065a1673377b417c1a1b60e0d54798abff", "e16cf0bf7169a0c23fe9e11656336588fc2b1688", "4fcacd750ceff5b5915bfbe678e9df0529a822d2", "61c88c3babb480aaa9e5bfdc4ab37b829ca4f59e", "6b22ed103bc3d86004599992380b8b3104c75df5", "29779635d72d177f0a88c7654c23df5bb3806257", "68ce6b2282118babb83ca90faa792e1a0aafd96b", "2275639ea46226789d7bf4b44b2c94c22cb801a9", "05ff208abbb01b87f6e7181a9af99f4590076b44", "2b6ce083906634e3c3b084e4c9139fb58f082df6", "380f0b9b16f933b240e53d38aebdd8773d6302de", "4f121360f731d1604a5a84007fb04c6e9f3f83de", "cf3ac00d8b38383d7f34a4419fe1d77915dd7a40", "913d3fc4afa5685249c8245edfae2d9fe4ce3c28", "c5bc1f9463d4e5ef10c3febf093ceb2e740aa516", "5a49af99a00bf63f28584892f5f69f9a4b12f5f7", "b157afc61de0c685a0faf95cb447f3250580ea05", "b43a5b044d0eabc0c7975967e35f482736727954", "26517ad4d5364fad35c32cc6c419481c9ba0b0d7", "1ab4f547971ffbf1db4e3a7c1eea1f033c20c590", "b2f6a6dbf63b9edaa8d78573531438626bd667a3", "1bbb160a886b61113f3ce494af055d1568e30594", "4a41d1e8f4e7e45d81dc188501e106338281ad6b", "3acae798376fde41cd7d3f668a063d3eb60a5f95", "0b83159ebfdd930695afb54c151cde23774dc642", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "54c21079a5913cf3f01d80a2d39fa562d4c3c696", "17886b4911ffd50d7e02a574caad34a286458b3a", "8a257fcbfaa25a1ffaad44e35ba2d35090d33ecc", "9730df71bdf9121732b1478e3411b6335bb891e7", "36222f8eb2ccf21ca345e15186cea64506581543", "69b7456f3d47fed3745239b5f67996a0b9a1a5c9", "258abedb80c104e544d6d406113a80099214b500", "6d3753abbd8550d1e6864edc8b83c226b09685b5", "2a0c41a9de560feef2bab4c2f3f47e2719b0e2b4", "884d0c99d757c075816f07f1f4b5a3e89376a119", "3d46dbb0da1b4e0ee5b45c46525d9459fb94222d", "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "5ee5eec37590adf3cda9d04c0e12dff796a0dbdf", "518ba869f17b0045be25a86e032f6343998ca1cb", "a37d7650f6ab5f116656a6042dae0bd9edc0f1b5", "ec12ed3ca9a21ac6f051c2bd237f50b2d28097e6", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "70b081fc63c84506a2ed53c06d172f653d380ba7", "33afae14132d0022c4d5e862d7d22174a3a21c87", "33e250711d6eab5a4cf3a5543a67c7179f376af3", "1d9802a3b8815435275ccb654b053c3f9ee23e67", "cf00caed101981f5334694c687d7cd76ec780007", "58f08948703827382db5e27d52c1907df9c3e97a", "74c5f2ec07faf5c39c39b24dcf7cdfe5466078b7", "492184989035226d1207801e3e5b9dff8b5f6427", "8d91c5dc758e2410a5dab045f1a244bdfc4dfff6", "2bf6bb4d36ccf3825517772aa0102f67adbec1a3", "d3b817cd7732e02420d6a4dbf58fac09ead6958a", "037d937dfea11f5bd939a74a656cf3f50b9c04f4", "0fd397f368c852cdfa98960f185ed2dcfd90aa8b", "5178eb172d49f901a87e5fd6b5b05a298c374074", "2e383f5441f059fe4bb4f3747b1427533fc95569", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "685ffd80cf779233d1823045274ac90ba010468f", "23e1f1e190d487159abdcb992b0cec162ce47d04", "fe1fd38e88bda87e02d06f2061138dd3d8d8e9cb", "6fececc819b345a6716c5b07e6977abad75f78a8", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "3045a26fead665427fd9b0d446afc93e7f187c6b", "18cea7e54dc801e7326b49e64df571122d9b2096", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "8b59a73c8b0a213f1916eb5d8dc8d28563d810ec", "2ba16a91a0fa1cdd06dec7b0df8f7808e7d9833e", "1bef43b8bca581c061d0a3acf6afdc586e7b21c4", "009583f0ac5518f5854fadea7f71a67b1d2fcb89", "389f55c5c376db4ce1c88161dca98c329614faa8", "93bfec5b4b45d10ebe27dc67952a307ae02079b0", "4716cca7c5c936d4db5d0faa62673997e3bdff3c", "6d01e72f05c4394ce738255bb42e8fd68ce71b8c", "9b5e942ee6c354219250a341fb663810f82c96e3", "2d7e7672ae8ed46fe77296ea87353ac8b8c6b499", "a9c7cab8757028405a54e5ef6854e182094c4ac7", "bc7a8dc3dd9d1071a2bdba158ed7760d6e36cd46", "226242629f3d21b9e86afe76b1849048148351de", "4c889b2900cfe755676f347b3337e09b18703d74", "168488dc2088dc5a48e7c85e7fd487145d161223", "c803471b2a56a58c3310ddc88bea2ecccfd8fdac", "36ad5b9db48cde06d82f9a45e3855c903526653d", "660daa676976e5f3d8cf50ebac5b95f7559dcec1", "e7ac90add63f3482ab53d46fd17d681df8255c81", "3dc342566a881634c38e2b72297d1801504a6d11", "11d593770621b76df4cc11657594ff81dc4a9e4b", "2a257fe0284c1b4490e6b16e1f2e29555d8fccc4", "3456da2ed3221b48c5bad9748209bb67be84666e", "30a46bc5246370250cc91351bf9c428312ee924c" ], "paperAbstract": "The past ten years has seen increasing calls to makesecurity research more "scientific".On the surface, most agree that this is desirable, given universal recognition of "science" as a positive force. However, we find that there is little clarity on what "scientific" means inthe context of computer security research, or consensus onwhat a "Science of Security" should look like. We selectively review work in the history and philosophy of scienceand more recent work under the label "Science of Security".We explore what has been done under the theme of relating science and security, put this in context with historical science, and offer observations and insights we hope maymotivate further exploration and guidance. Among our findings are thatpractices on which the rest of science has reached consensus appear little usedor recognized in security, and a pattern of methodological errors continues unaddressed.", "pdfUrls": [ "https://www.ieee-security.org/TC/SP2017/papers/165.pdf", "https://doi.org/10.1109/SP.2017.38", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/scienceAndSecuritySoK.pdf", "https://oaklandsok.github.io/papers/herley2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2dd2299ddbbd3daa9872efe5f613c1e22d43b4ad", "sources": [ "DBLP" ], "title": "SoK: Science, Security and the Elusive Goal of Security as a Scientific Pursuit", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "2dd2607086b9ace6b6d526e273959cf01d215918": { "authors": [ { "ids": [ "38724347" ], "name": "Mohammad Mejbah Ul Alam" }, { "ids": [ "1870340" ], "name": "Tongping Liu" }, { "ids": [ "1804358" ], "name": "Guangming Zeng" }, { "ids": [ "2581019" ], "name": "Abdullah Muzahid" } ], "doi": "10.1145/3064176.3064186", "doiUrl": "https://doi.org/10.1145/3064176.3064186", "entities": [ "Categorization", "Critical section", "Deployment environment", "Information", "Lock (computer science)", "Programmer", "Software bug", "Synchronization (computer science)" ], "id": "2dd2607086b9ace6b6d526e273959cf01d215918", "inCitations": [ "e795d8bf3f78bc2a0db904d3c9bb854ddad1e715", "aaca858e5d071b7215cd9954371d5911745145b3", "16e56c0147e1e9b331807263b821b76a5b9ff18d", "11bf94a59cd0f0fa4ae4ca34f73ea54901b4e166" ], "journalName": "", "journalPages": "298-313", "journalVolume": "", "outCitations": [ "411714a6890392f4a1794dd862917f04af84df2e", "a8bbbde26c19e013343cca08c758bcca3f60a0d3", "5a9922148d820c5c775a1d8d54906d273e671aa2", "211ee05c6635c5a4a9bd55c4df7893671b40db63", "01225251abeb1bd491c589ded10c88c6ddf01527", "1963455d66a7fa9755216fd15ee47a2ad3d86827", "41d56de3c6a766f171066bce6a4a63e51ce0befe", "3b9cb8d49bb42a2dc56df3929cdbb557f3fd1bae", "7f5f9991f4663f4cbd7bd279a1eaf7ffc0c01351", "71d584f310f11216d9e5771af58930c5a8f1dd47", "77e68c7ada7c42336aa99990bf7b0601ef2a0a19", "1ee116d4253bc30c0988c995a3be2c594337f856", "dea6f15f2cf7649e6a13347f5325ef19c9f25747", "082e054aa9997ab58638eaca4531a328106d67d1", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "fc9837533e588664bef8e66cc695ea5c2b7e916c", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "032f1a16ad4cd815ca5cbf3dbfca2714007a1a2e", "27da29cf8bb9e19edbde212f079169cdae7f7436", "10ce2a90601895e96ed9c5d8b0c1e69c07f721fa", "620bb971f3b71e8cc146e5a3748f090aaa03e839", "e23298e18aa92ac43fa941d0f5eacb339905b685", "7f24d63fb1100d827d31041cd2293763a04e677b", "7341c9112baa937788179436364c56af013bdf9e", "8090a0702dae2a90bb614e6ef8de4f049e596233", "0254e7809ea94c30adedd5e853bdd0014b6521c9", "cc8f94f9a26d2e27a1b028958a4aa349a0e1a640", "0653e2ed9f683868cb4539eb8718551242834f6b", "d82fde923093716dba6a723b984f7f4e57e503f8", "97c649dc68ad8818c7e2b7f75b9c164aa840f6a5" ], "paperAbstract": "Despite the obvious importance, performance issues related to synchronization primitives are still lacking adequate attention. No literature extensively investigates categories, root causes, and fixing strategies of such performance issues. Existing work primarily focuses on one type of problems, while ignoring other important categories. Moreover, they leave the burden of identifying root causes to programmers. This paper first conducts an extensive study of categories, root causes, and fixing strategies of performance issues related to explicit synchronization primitives. Based on this study, we develop two tools to identify root causes of a range of performance issues. Compare with existing work, our proposal, SyncPerf, has three unique advantages. First, SyncPerf's detection is very lightweight, with 2.3% performance overhead on average. Second, SyncPerf integrates information based on callsites, lock variables, and types of threads. Such integration helps identify more latent problems. Last but not least, when multiple root causes generate the same behavior, SyncPerf provides a second analysis tool that collects detailed accesses inside critical sections and helps identify possible root causes. SyncPerf discovers many unknown but significant synchronization performance issues. Fixing them provides a performance gain anywhere from 2.5% to 42%. Low overhead, better coverage, and informative reports make SyncPerf an effective tool to find synchronization performance bugs in the production environment.", "pdfUrls": [ "http://www.cs.utsa.edu/~tongpingliu/pubs/syncperf-eurosys17.pdf", "http://doi.acm.org/10.1145/3064176.3064186", "http://www.cs.utsa.edu/~muzahid/pdfs/eurosys17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2dd2607086b9ace6b6d526e273959cf01d215918", "sources": [ "DBLP" ], "title": "SyncPerf: Categorizing, Detecting, and Diagnosing Synchronization Performance Bugs", "venue": "EuroSys", "year": 2017 }, "2de953868b95608acb0f15feefd9df49bf06e958": { "authors": [ { "ids": [ "3286257" ], "name": "Shixiong Xu" }, { "ids": [ "1734249" ], "name": "David Gregg" } ], "doi": "10.1109/ICPP.2017.53", "doiUrl": "https://doi.org/10.1109/ICPP.2017.53", "entities": [ "Advanced Vector Extensions", "Bit slicing", "Bit-level parallelism", "Bitwise operation", "Code generation (compiler)", "Domain-specific language", "Electronic circuit", "Field-programmable gate array", "Logic optimization", "Program optimization", "SIMD", "Significant figures", "Vector processor" ], "id": "2de953868b95608acb0f15feefd9df49bf06e958", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "442-451", "journalVolume": "", "outCitations": [ "5edf5355bd312e90326c3a7b50b51ece407b6c42", "17c8851c47328dad603993f59e25bf67f8e64542", "1c1ee7b39616c52e96d91e243dc8996cfed11027", "8a43e928b6d47b1594175ebafb0d8f14f9abbe9b", "381101f3025de0800dc60a08481ed7c96e1cfac8", "e3cbc57f6553e08bdb3c9b1d0d0680a6d4e8e6d4", "5280fcaf7fd31ed0343068da3a984411c07219b8", "35e7adf57479f209b8ab993c7246403f060e3b39", "075474f19692bc2cbec5069bb0e61436c510af83", "266a9e83364863f56d53dca6912830c0a84ec7de", "7877b1ce32dde1e3a146e09c6a47c6dbf5630b1b", "afbb0e9cc2ed9d8172b2b7ecb20c570a9f10af0a", "8688bceead8ed526d14d736b800a812fb7b459ed", "1c519e486444a1dcfbf7e23aef92d6d91320d7e0", "22dd022ef0b803a5b97db52f55f8bc068498624d" ], "paperAbstract": "Customizing the precision of data can provide attractive trade-offs between accuracy and hardware resources. Custom hardware and FPGA designs allow bit-level control over precision, but software is typically limited by the range of types supported by the underlying processor. We propose a new form of vector computing aimed at arrays of custom-precision data on general-purpose processors with SIMD extensions. We represent these vectors in bitslice format and use bitwise instructions to build arithmetic operators that operate on the customized bit precision. We construct a domain-specific code generator that builds bit-level customizable floating-point and integer operators for our vector types. Using a hardware circuit optimization tool we optimize our logical expressions, and synthesize fast software arithmetic operators for bitslice vector types. We evaluate the resulting code and find that advanced logic optimization significantly improves performance. Experiments on a platform with Intel AVX2 SIMD extensions show that this approach is efficient for vectors of low-precision custom floating-point types, while providing arbitrary bit precision.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.53" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2de953868b95608acb0f15feefd9df49bf06e958", "sources": [ "DBLP" ], "title": "Bitslice Vectors: A Software Approach to Customizable Data Precision on Processors with SIMD Extensions", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "2e035b44c58eb9c24809d5af8c96162eed25358a": { "authors": [ { "ids": [ "2432203" ], "name": "Jason Jong Kyu Park" }, { "ids": [ "2225988" ], "name": "Yongjun Park" }, { "ids": [ "1721289" ], "name": "Scott A. Mahlke" } ], "doi": "10.1145/3037697.3037707", "doiUrl": "https://doi.org/10.1145/3037697.3037707", "entities": [ "Baseline (configuration management)", "Computer multitasking", "Graphics processing unit", "Interference (communication)", "Multikernel", "Multiprocessing", "Throughput" ], "id": "2e035b44c58eb9c24809d5af8c96162eed25358a", "inCitations": [ "e967dbdb1236627b440e7fa2256c5ed27f5e0bb2", "5a8cd841f59a68c948c7aa05359c7df32dbc8d5c", "8b45c4f8ad488aaf911f6b8972a765c74c1a9f8b", "01ea8c68a5809d0aab377a6f8fa1faf627bf4e12" ], "journalName": "", "journalPages": "527-540", "journalVolume": "", "outCitations": [ "109b416bdbf1739373638eb7e5b37f5d475fd40e", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "064f38e5edef42cb5a37f2a350e4413e17132b11", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "68073f621072d793e95b9562bf9a9245415d5a96", "10443d5d4f0e5048df514e581a9f364954158d00", "01d32e62828315a140a5db4010431cac3d6868c6", "21e5ea3c252c84137efcb45cef1437bdcc15c773", "040bd1162e05c709ac15d937cec485fae3a6af43", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4", "fb163d7fe546bb950294ffaf5ef6e225f630c76d", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "2d6f002477015469075954c6748a1a85af352c94", "957106995cbed2214bc404e24e32ef9d249f5615", "00f355ce566bb51dc70925217c62e437cc7e14e2", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "3a043714354fe498752b45e4cf429dbae0fb2558", "c3c244e6a07810e738c8eb3c10d652b7da0267d6", "ce48a652ef299c9c25a1fd4f7f0e8622473d2e92", "03fc198adf79731c92070b8aa839c46ebf9b3c14", "39a6aa81ec3e20c1d500b99b560deb039c451b83" ], "paperAbstract": "As graphics processing units (GPUs) are broadly adopted, running multiple applications on a GPU at the same time is beginning to attract wide attention. Recent proposals on multitasking GPUs have focused on either spatial multitasking, which partitions GPU resource at a streaming multiprocessor (SM) granularity, or simultaneous multikernel (SMK), which runs multiple kernels on the same SM. However, multitasking performance varies heavily depending on the resource partitions within each scheme, and the application mixes. In this paper, we propose GPU Maestro that performs dynamic resource management for efficient utilization of multitasking GPUs. GPU Maestro can discover the best performing GPU resource partition exploiting both spatial multitasking and SMK. Furthermore, dynamism within a kernel and interference between the kernels are automatically considered because GPU Maestro finds the best performing partition through direct measurements. Evaluations show that GPU Maestro can improve average system throughput by 20.2% and 13.9% over the baseline spatial multitasking and SMK, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037707", "http://cccp.eecs.umich.edu/papers/jasonjk-asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e035b44c58eb9c24809d5af8c96162eed25358a", "sources": [ "DBLP" ], "title": "Dynamic Resource Management for Efficient Utilization of Multitasking GPUs", "venue": "ASPLOS", "year": 2017 }, "2e10eff1ba1a336a9e43c89cfa3b9a29c99d7c4d": { "authors": [ { "ids": [ "2896184" ], "name": "Rina Nakazawa" }, { "ids": [ "2800355" ], "name": "Kazunori Ogata" }, { "ids": [ "6011944" ], "name": "Seetharami Seelam" }, { "ids": [ "1698545" ], "name": "Tamiya Onodera" } ], "doi": "10.1109/CLOUD.2017.33", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.33", "entities": [ "Cloud computing", "Computer data storage", "Docker", "Memory overcommitment", "Re-Loaded", "Swappiness", "Thrashing (computer science)", "Working set" ], "id": "2e10eff1ba1a336a9e43c89cfa3b9a29c99d7c4d", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "196-204", "journalVolume": "", "outCitations": [ "71ad31bd506ea571f6c04a293ff298f42fa7b47c", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "67f49884d9418bdf4e68796ab4c77be951835e67", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "2d40c4fe4c076abe1ceb058a0f63d01159d485c5", "3a03957218eda9094858087538e9668ab0db503b", "fd5938e280cfd4b72af63a308c697b4372977b4e" ], "paperAbstract": "The efficiency of datacenters is important consideration for cloud service providers to make their datacenters always ready for fulfilling the increasing demand for computing resources. Container-based virtualization is one approach to improving efficiency by reducing the overhead of virtualization. Resource overcommitment is another approach, but cloud providers tend to make conservative allocations of resources because there is no good understanding of the relationship between physical resource overcommitment and its impact on performance. This paper presents a quantitative study of performance degradation of containerized workloads due to memory overcommitment and a technique to mitigate it. We focused on physical memory overcommitment, where the sum of the working set memory is larger than the physical memory. We drove a small fraction of Docker containers at a high load level and the rest of them at a very low load level to emulate a common usage pattern of cloud datacenters. Detailed measurements revealed it is difficult to predict how many additional containers can be launched before thrashing hurts performance. We show that tuning the per-container swappiness of heavily loaded containers is effective for launching a larger number of containers and that it achieves an overcommitment of about three times.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e10eff1ba1a336a9e43c89cfa3b9a29c99d7c4d", "sources": [ "DBLP" ], "title": "Taming Performance Degradation of Containers in the Case of Extreme Memory Overcommitment", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "2e320c986c2e5cbac990093f083a6e91eb812079": { "authors": [ { "ids": [ "3234334" ], "name": "Christina Delimitrou" }, { "ids": [ "1700331" ], "name": "Christoforos E. Kozyrakis" } ], "doi": "10.1145/3037697.3037703", "doiUrl": "https://doi.org/10.1145/3037697.3037703", "entities": [ "Adversary (cryptography)", "Cloud computing", "Confidentiality", "Data mining", "Denial-of-service attack", "FUJITSU Cloud IaaS Trusted Public S5", "Interference (communication)", "Multi-user", "Time-sharing", "Usability testing", "Vulnerability (computing)" ], "id": "2e320c986c2e5cbac990093f083a6e91eb812079", "inCitations": [ "e9e40bfedfbcebab83ee7b3c8ef0c1daaafabeee" ], "journalName": "", "journalPages": "599-613", "journalVolume": "", "outCitations": [ "ea1eda4043423497269ad5c54aac2cf68280dd83", "edf45b301071ba8175484764c9296f2e395f61cd", "1aa4ae6a1575a8551a4265bec6e1912c401d0d75", "1999881614aed9295f4359cf4761926bc23fcd82", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "7209cdd1800ae8cd42403cc7cf87fce56106698d", "6bdff369a77ab287fc810b5240e50df8f2d51f4e", "027bd50767a7f61fb0fc3c27051a63b209c10a99", "4581948531998d5e5f23c131081ea0cdd9066bfe", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "08632fe2b934ed15d3499e7321282c81adc2c390", "98c47b463e3af26b13ac30d384e04d8b8baa467b", "2fc84ea4ffbee661ce90c5804101887abe8268a8", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "5e50c80bc7e7c288ceffeaa770e04c301546c2d5", "0091953db954f9a8c7a3d52f70e9188d2028aa37", "774bb691a858b8ae739076702236ca0dc5d057a6", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "1a16866c9fce54eb9c21b7730a42f1f17372907f", "490d862480cf30949dce90e832aa292c498ac768", "60f5f4f7ddc994664792dfc2f6790107d36bc5a0", "3e51539721b445e6b415ecd94db4b499aaec8c0a", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "16514cb7cf7edff6e7806560487e66f57b42efb8", "40e9b4a9fbffc0d0732137c40d255b2e0565bd4a", "0e602f642ed06861d35ff8e71241dfb98f118202", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "e2d16dcc0c22c3dcff936d1a1457691704c2bb76", "3501c7bb6342524e74708a1c6642cfdd6963a465", "74e0dfe61dcaac9e086e86221ba1407795e04a2f", "b6d87c6fcaf01a8ad54fdb85725f02db30bde099", "765c5d29bce0617e78b2ec3e918e31f6e543645e", "3b9dd0ee95f276a39e2b3b27447d3cd0ef171f99", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "656e782fe23364e36a49aeef2d8a74126a38ea04", "8c9a91b774fcc126db7ce7c67bd97d1d16143932", "6d44790b6d952eff28f302998e8121f90786e3ff", "b2a7d7c2073d4f512af89208b83a5bb54f91a24c", "24cba0ea8970cba2a4bb750347fb59b82d028126", "2831b316e1f669f456aad81d1a51f0fd0bd44fde", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "bd79772a58dd4bf040ac9f9c1946614b6a51cc4c", "371d64572d5e2f6af298a42d84aca5807cd19946", "1b938edfde3b3b04c13599c2db87c72b7962f383", "b56bec459de1a4875520775b46979c226cbbeb9c", "e7624b6f4952381782b5395e07f92f8474737708", "0276adfea086f9f92337669ca23b65a6d5a475b4", "316486bada6023816c785c0d4eb401658737be3f", "093f488e41a142e981c395f69f4946ed2b1983a7", "231e6a4fd7922c6adaaa48b2d02f7878e88c4048", "33e82ac2571ec0902aaec1a3e9e375dae79894b3", "062a4c79bc02fff7ea45af6485260080bc6faea4", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "3000e77ed7282d9fb27216f3e862a3769119d89e", "045bbbea384e9d54be38dd207bf237d5208ea599", "19916f1725b2fd4ff4416a072953fa24b4c500fd", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "4e4348913b3198ae51b784db893938ae3afecaf5", "c2fa8d9d8418460fc44985473315c86cf7b240ce", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "52c2c050af5b32d4929b4b193967a3675d03aea0", "127405d6995a004194821f32c86f83f39709f6e2", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "3598f4435066397ae9369792175f2cd78fb921a1", "0558c94a094158ecd64f0d5014d3d9668054fb97", "0d24c8ffa3a5f800c3c1b3146b96a955a84216b6" ], "paperAbstract": "Cloud providers routinely schedule multiple applications per physical host to increase efficiency. The resulting interference on shared resources often leads to performance degradation and, more importantly, security vulnerabilities. Interference can leak important information ranging from a service's placement to confidential data, like private keys. We present Bolt, a practical system that accurately detects the type and characteristics of applications sharing a cloud platform based on the interference an adversary sees on shared resources. Bolt leverages online data mining techniques that only require 2-5 seconds for detection. In a multi-user study on EC2, Bolt correctly identifies the characteristics of 385 out of 436 diverse workloads. Extracting this information enables a wide spectrum of previously-impractical cloud attacks, including denial of service attacks (DoS) that increase tail latency by 140x, as well as resource freeing (RFA) and co-residency attacks. Finally, we show that while advanced isolation mechanisms, such as cache partitioning lower detection accuracy, they are insufficient to eliminate these vulnerabilities altogether. To do so, one must either disallow core sharing, or only allow it between threads of the same application, leading to significant inefficiencies and performance penalties.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037703", "http://www.csl.cornell.edu/~delimitrou/slides/2017.asplos.bolt.slides.pdf", "http://csl.stanford.edu/~christos/publications/2017.bolt.asplos.pdf", "http://www.csl.cornell.edu/~delimitrou/papers/2017.asplos.bolt", "http://www.csl.cornell.edu/~delimitrou/papers/2017.asplos.bolt.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e320c986c2e5cbac990093f083a6e91eb812079", "sources": [ "DBLP" ], "title": "Bolt: I Know What You Did Last Summer... In The Cloud", "venue": "ASPLOS", "year": 2017 }, "2e4090f083744f803aedf4fac0b1f78075e335ef": { "authors": [ { "ids": [ "3358086" ], "name": "Yanqin Jin" }, { "ids": [ "2653363" ], "name": "Hung-Wei Tseng" }, { "ids": [ "1786049" ], "name": "Yannis Papakonstantinou" }, { "ids": [ "1760342" ], "name": "Steven Swanson" } ], "doi": "10.1109/HPCA.2017.15", "doiUrl": "https://doi.org/10.1109/HPCA.2017.15", "entities": [ "Attribute\u2013value pair", "Dynamic random-access memory", "Indirection", "Input/output", "Key-value database", "Lock (computer science)", "NoSQL", "Online transaction processing", "Page cache", "Row (database)", "Solid-state drive", "Solid-state electronics", "Transaction processing" ], "id": "2e4090f083744f803aedf4fac0b1f78075e335ef", "inCitations": [ "ac924589f32d23c0eebe2173a77e1cc732f351b9", "44c216b53c1f5a7091618c6b7ba9a32a35323dad", "0afb5d62cdde8056f6ba9ce6eed91c4cbc36c84e" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "373-384", "journalVolume": "", "outCitations": [ "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "56da687431aa4e8d6c7c37d775b6b106eb909071", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "66702084eca2b6ada4526b81fdc3d3c53b02535d", "424a0f460b4f261b386787bdec37a2b01347a930", "39e3d058a5987cb643e000bce555676d71be1c80", "c109e2140c5e949329d53a9bb3fe5786ed1c9a4f", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "098d792d1783b5f6fc098203f71f21f5d053c653", "00ac447d02035c26c7e2852c2457fe812e89038f", "05961fc1d02ca30653dd0b4c906113db796df941", "13d6c568c770ff5a070072e720fb34b0037cdab8", "0e5c646909bb762da0cd325e084655c12445578f", "6a9a57dddf37adce1eb16c682205de8bf9447f60", "7e4ecfc13aba74db770378e640d5fbcce7fd3d2e", "1eb9dc6955b0de81a078c9d6fa937c33f1f04545", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "bd263ebc3e8cec76ea3f3ffa83a7878af4dfdd63", "6fbfc4fe76e152e4d371930a0069cf204e482528", "033fd9ff33b69fbd8d9e24b98f77aa8adee06514", "1693e83e47a99667f4bd6ad6e24d8b62a1ba22c8", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "4bbb4e2bed21980cfe9ca7a6e243737705b0fd20", "858fd33929a9fdf5fc830d463cde1e98aac34029", "3e426349f0cf3a65b502be05ebca23e693ec03fd", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "948c881ab7f1f62e9c940458e74c3e435320df72", "27cb0c2229299a82cf767d19dcc68aa1e5f0f233", "663798bc529bb73f2b3ca8640bb4fcbd83ce5c31", "d67adb456a315aee244babf4f20e318cc14d13f3", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "1af7c3931769a270813a58b6d437f5a74752a532", "72722e7602138e3896e5576d3f3ef730e7b7c4b4", "70ce10f47aafa0994627a9575565b5c98af58d98" ], "paperAbstract": "Modern solid state drives (SSDs) unnecessarily confine host programs to the conventional block I/O interface, leading to suboptimal performance and resource under-utilization. Recent attempts to replace or extend this interface with a key-value-oriented interface and/or built-in support for transactions offer some improvements, but the details of their implementations make them a poor match for many applications. This paper presents the key-addressable, multi-log SSD (KAML), an SSD with a key-value interface that uses a novel multi-log architecture and stores data as variable-sized records rather than fixed-sized sectors. Exposing a key-value interface allows applications to remove a layer of indirection between application-level keys (e.g., database record IDs or file inode numbers) and data stored in the SSD. KAML also provides native transaction support tuned to support fine-grained locking, achieving improved performance compared to previous designs that require page-level locking. Finally, KAML includes a caching layer analogous to a conventional page cache that leverages host DRAM to improve performance and provides additional transactional features. We have implemented a prototype of KAML on a commercial SSD prototyping platform, and our results show that compared with existing key-value stores, KAML improves the performance of online transaction processing (OLTP) workloads by 1.1X – 4.0X, and NoSQL key-value store applications by 1.1X – 3.0X.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.15", "https://people.engr.ncsu.edu/htseng3/papers/2017HPCAKAML.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e4090f083744f803aedf4fac0b1f78075e335ef", "sources": [ "DBLP" ], "title": "KAML: A Flexible, High-Performance Key-Value SSD", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "2e5e824a5e98d505c49a62752566e871cd4de06d": { "authors": [ { "ids": [ "1790421" ], "name": "Angshuman Parashar" }, { "ids": [ "1998820" ], "name": "Minsoo Rhu" }, { "ids": [ "3374545" ], "name": "Anurag Mukkara" }, { "ids": [ "2274681" ], "name": "Antonio Puglielli" }, { "ids": [ "3172075" ], "name": "Rangharajan Venkatesan" }, { "ids": [ "2125244" ], "name": "Brucek Khailany" }, { "ids": [ "1775477" ], "name": "Joel S. Emer" }, { "ids": [ "1715863" ], "name": "Stephen W. Keckler" }, { "ids": [ "1696619" ], "name": "William J. Dally" } ], "doi": "10.1145/3079856.3080254", "doiUrl": "https://doi.org/10.1145/3079856.3080254", "entities": [ "Accumulator (computing)", "Artificial neural network", "Autonomous car", "Convolutional neural network", "Dataflow", "Hall effect", "Machine learning", "Mobile operating system", "Neural Networks", "Rectifier (neural networks)", "Sparse matrix" ], "id": "2e5e824a5e98d505c49a62752566e871cd4de06d", "inCitations": [ "460470aacc510467546440b0806ec8b920ed013d", "3d80f420b87bf16eabac6142275e71bf48aa61a5", "bf5a2bed3a98f6dc093460a6592a5d1b99a60ae5", "0f3056a84ea59a9b976163f28002401fa88ba80f", "2512a6ced085503c399ee512ecaeb88606081261", "bdbb9c01016ce513f6ac5c432d61f66da8708bce", "381e7525bc8b9d47ae0343e471f5f1d5e6963bbe", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "08a0c1f5f2b7c91d81e28f896c2a001d58975014", "64764e3b38e81ac0dd4607a74cdbdcf39895ff50", "18df932a1f2007718d2ae652c35615db9a9b35f3", "0b6dcfa9b829bb6591c55a8c09ceb6c8a2a6b40e", "12b31a84d5e8e26972131b221a3bb6725e92bd24", "144f24c7c0e0405bd9e36cdf13ddd3045e4ad2b2", "2d0f397c8228bb03c4a923a1200ddd2f116a857b", "0dabc0a97972102993bb024b1e05fb77740611df", "d0a96d4973388e98ddf10b11b67afc89d0f4c9e2", "305806d53240aa523168d5aa59d902fb0c9a1581", "cf8c44a703350ebc5df46a861c76db9f0e49457b", "6ed09dd400c784dc6744d9c7f5ab947010c16342", "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "d1be7f6de75dbe350d8d45bb0997e294fd58a985", "5bcf27ab86be9fa376237d2d2bd8ebbf52982088", "3a7509a72a01dae5e17f1405ab3d18e1e2fd8157", "626f7c268b68a0955f9c7c6cfc2edff4d2e3291f", "8033f293c894eae64c9f379dee2192bfe4f7883a", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "0c6b249d77e998068184e52a2d7fa7a5a867e12f", "5e8e46557e42940274e548246680c785eb729db2", "6a8fb5989b3fb290ac0a654895aad6ff8601c7ab" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "27-40", "journalVolume": "", "outCitations": [ "30ec6ebe977de36c2848da0f6e191d4fb18ccb69", "fd12d8d785de4fb1b0d6704a52161f4fa3c34088", "56828bb7ad555eed8d43e6d3eba4ee39e862defe", "19c9be6d3e9aba5a6b60d7b19c6b63c255562dc4", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "0ab85aefdda732705df5b102c0a2851f1266146b", "17c7747e63648d8d8d9d8ac4ac427d06ffe2c186", "8e0eacf11a22b9705a262e908f17b1704fd21fa7", "a38168015a783fecc5830260a7eb5b9e3e945ee2", "1d39c58f16e7b9b7eb382fdb342db85a8b957b4d", "49b4094f2c313a92da4461572c0bef80b0d7d649", "812c795ce4797b718a2947a9f9bdc5b6965c2b29", "02c78232075ac431834e3442dcb2954d4e708def", "9f1f065bf08cd90431cc051267a708f56436cd82", "f5f1beada9e269b2a7faed8dfe936919ac0c2397", "e4ecbd5bc6b219d0ff1993c230dff066a0fd8191", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "061356704ec86334dbbc073985375fe13cd39088", "087337fdad69caaab8ebd8ae68a731c5bf2e8b14", "0d21aa97f8918f7384714131c3f3ea2a3abeb757", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "06ce77e4abea63948580340be25d7f2a80369e5a", "0b99d677883883584d9a328f6f2d54738363997a" ], "paperAbstract": "Convolutional Neural Networks (CNNs) have emerged as a fundamental technology for machine learning. High performance and extreme energy efficiency are critical for deployments of CNNs, especially in mobile platforms such as autonomous vehicles, cameras, and electronic personal assistants. This paper introduces the Sparse CNN (SCNN) accelerator architecture, which improves performance and energy efficiency by exploiting the zero-valued weights that stem from network pruning during training and zero-valued activations that arise from the common ReLU operator. Specifically, SCNN employs a novel dataflow that enables maintaining the sparse weights and activations in a compressed encoding, which eliminates unnecessary data transfers and reduces storage requirements. Furthermore, the SCNN dataflow facilitates efficient delivery of those weights and activations to a multiplier array, where they are extensively reused; product accumulation is performed in a novel accumulator array. On contemporary neural networks, SCNN can improve both performance and energy by a factor of 2.7x and 2.3x, respectively, over a comparably provisioned dense CNN accelerator.", "pdfUrls": [ "https://arxiv.org/pdf/1708.04485v1.pdf", "http://people.csail.mit.edu/anurag_m/papers/2017.scnn.isca.slides.pdf", "http://doi.acm.org/10.1145/3079856.3080254", "http://www.cs.utexas.edu/~skeckler/pubs/ISCA_2017_SCNN.pdf", "http://people.csail.mit.edu/anurag_m/papers/2017.scnn.isca.pdf", "http://arxiv.org/abs/1708.04485" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e5e824a5e98d505c49a62752566e871cd4de06d", "sources": [ "DBLP" ], "title": "SCNN: An accelerator for compressed-sparse convolutional neural networks", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2e62f1a7683fb9727456443aea4443ee383c7539": { "authors": [ { "ids": [ "3356743" ], "name": "Alessandro Epasto" }, { "ids": [ "1895843" ], "name": "Silvio Lattanzi" }, { "ids": [ "1689637" ], "name": "Renato Paes Leme" } ], "doi": "10.1145/3097983.3098054", "doiUrl": "https://doi.org/10.1145/3097983.3098054", "entities": [ "Algorithm", "Cluster analysis", "Complex network", "Graph partition", "Provable prime", "Scalability" ], "id": "2e62f1a7683fb9727456443aea4443ee383c7539", "inCitations": [], "journalName": "", "journalPages": "145-154", "journalVolume": "", "outCitations": [ "219d8ad7b424ced8844fb62fbdf3cdfb8f296b56", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "141e35263ab810983c90d47ad62eb4fab5e51717", "23b4f10c592cf0419fe98cc5745d59075ffa7bce", "1d70f0d7bd782c65273bc689b6ada8723e52d7a3", "6ff68637a09d89c4aa3bb29a4d47391fb0000d06", "141004dee9e799b40bfaf50b4a72618613137250", "be99af09c1f967d9fd8c65af7d89d94e503b9b30", "888b56caecd8df9eb3585700fc92e27163600e4f", "a8dc47e370b17371e57ad070e669360794473efe", "9e4dc02a9aafa9166d1ad27c7d37814222af6326", "cec502ac639bb12110991d33a079ed87cb066745", "007f3290e1b5e3061a8b7089037ee775efc47b83", "71f724079756430ed94651fcf66dccd31ede6286", "aa9f7f54492ebf267bde2ca8155bfe9c47ab0755", "9254f46c5b7fae17f7fa3bc0a9f5096628643fdb", "3e656e08d2b8d1bf84db56090f4053316b01c10f", "51e66fb7966bc3caefe1e1941b263fd34750c7d1", "34ee11059d693642fa7a5376e96e147539a997b1", "2256124b2c84b268d603b335e8d4c35d442c1e93", "35778e259b0ccb37ec8c754852956f26bc8ff51c", "1bee27b20e4717d78c132225e90e6a8b3ea1d0d9", "0d2df513add257ff8edab5a5ee1db7c4f4b19004", "03147e48de0ff0a56e2ff4a7d85f2db63dc50567", "c3fba969d0c674931bae7e1f0f4fdf4fc3c30159", "c588b8ddca05dc951a20d1f84afefcce2738e505", "2a005868b79511cf8c924cd5990e2497527a0527", "4a00ebe98d455d61dc1b708265c237fe2ee6ec64", "07ad62b6b5da5f226c88549378886ca062e207a0", "283c8bde15f1b4160ee2842b2c7336521a5e49e4", "1e4d38a42e77e6146935d9e81773d08b53dbbbe9", "1521d39088b203ddac981d10d214f463449ae95b", "0eaf38cd3d7c7fb456201d59b6d28b084010d358", "10e44c294a968ca91e361fac44aa9d0f2cdf3bd3", "1871ea4cf23441d0297c99d9115f664a6ba0efda", "94d7c9558f986f9f3967f331a968b599edc2b11c", "01ff2b834772dfc2b8b7ba00620b65abb9444a75" ], "paperAbstract": "We propose ego-splitting, a new framework for detecting clusters in complex networks which leverage the local structures known as ego-nets (i.e. the subgraph induced by the neighborhood of each node) to de-couple overlapping clusters. Ego-splitting is a highly scalable and flexible framework, with provable theoretical guarantees, that reduces the complex overlapping clustering problem to a simpler and more amenable non-overlapping (partitioning) problem. We can scale community detection to graphs with tens of billions of edges and outperform previous solutions based on ego-nets analysis.\n More precisely, our framework works in two steps: a local ego-net analysis phase, and a global graph partitioning phase. In the local step, we first partition the nodes' ego-nets using a partitioning algorithm. We then use the computed clusters to split each node into its persona nodes that represent the instantiations of the node in its communities. Finally, in the global step, we partition the newly created graph to obtain an overlapping clustering of the original graph.", "pdfUrls": [ "http://epasto.org/papers/kdd2017.pdf", "http://www.renatoppl.com/papers/egonet.pdf", "http://doi.acm.org/10.1145/3097983.3098054" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e62f1a7683fb9727456443aea4443ee383c7539", "sources": [ "DBLP" ], "title": "Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters", "venue": "KDD", "year": 2017 }, "2e7c1a2953e737ea43237c313751d3e5c5f73250": { "authors": [ { "ids": [ "2270846" ], "name": "Jeongseok Son" }, { "ids": [ "2142901" ], "name": "Yongqiang Xiong" }, { "ids": [ "40165896" ], "name": "Kun Tan" }, { "ids": [ "15241400" ], "name": "Paul Wang" }, { "ids": [ "20759641" ], "name": "Ze Gan" }, { "ids": [ "33772810" ], "name": "Sue Moon" } ], "doi": "", "doiUrl": "", "entities": [ "Active redundancy", "Algorithm", "Cloud computing", "Control plane", "Distributed firewall", "Forwarding plane", "High availability", "IPsec", "Multitenancy", "Private network", "Provisioning", "Simulation", "Throughput", "Virtual machine", "Virtual private network" ], "id": "2e7c1a2953e737ea43237c313751d3e5c5f73250", "inCitations": [], "journalName": "", "journalPages": "473-485", "journalVolume": "", "outCitations": [ "4f88aa229971889e65a7b2ccda47ed7816e5c376", "8e226c40a8c056dc4c348eef256b711902e1d943", "1ee5679595d45f50ce33e9f1dd045b2da4ce0a2b", "7a3f1da4f346d3f6968f5b38c6cdb0c01ee4c6ca", "5b999d36d5230eca01532b357c7cf338a5e0d641", "4c165fb087b4861141577a07571c46fbd2324a69", "25f855c968af75e4617f25c71aee3cedec1dedaf", "5ee71ef451a1ecb555eaa3204124a370fce781be", "5f9ebd034181d56705f5658d11ddd472cf854247", "17650831f1900b849fd1914d02337e1d006aea0c", "1146b4781152d7eece00b0e5f0695a307bae0fe4", "10bfd5aed1410b88c5c1b5212f450a1994fc5afe", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "5b45d02dda9280ebea3debb8cd8c6261791e1783", "6e4d333d5e53ee2dd71c8483e5aef59bd5f7f596", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71", "2baa50ceffb972260c877567a5dd513dc79fca21", "5ca076b1bd1bca64e5029627111e49ce0e4c4f5a", "5cb88831f543d30cc688fedc445d4e358ef73626", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "22eaa2f2b7abc3604717ffff4461b5cfbbac285e", "1bc49b6df84965971e7f5724eac454ceaed52345", "59ab46bfd59cb43876e701389f256b93430e6273", "180310f5adcf9378c88e01a4a8b500f9b1a21e70", "56ca204b1133c63832207624b51bbd2528f316ae", "4728bda27d89d524f0751ef0dddb5da0bffe0826", "ce8f8e86db523da990507f177c6c6df445cd8d46", "1f41aa3b32e62ba85c8581d8da78a946886a5b87", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "c2505677c4e8cdd2d842155eebf6a1f9df0d0a8b", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "4bf97ac7427196bff2b9c689b53f34bbe98d52ce", "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "09a8a5cd0e6caa3ffa39afae01bea3575aa0bbf5", "336c1e3936ce150907b50f624b060bdb43d5e11b", "e3a4e0583a80d95c8329394d89dab00a9520bc50" ], "paperAbstract": "Virtual cloud network services let users have their own private networks in the public cloud. IPsec gateways are growing in importance accordingly as they provide VPN connections for customers to remotely access these private networks. Major cloud providers offer IPsec gateway functions to tenants using virtual machines (VMs) running a software IPsec gateway inside. However, dedicating individual IPsec gateway VMs to each tenant results in significant resource waste due to the strong isolation mechanism of VMs. In this paper, we design Protego, a distributed IPsec gateway service designed for multitenancy. By separating the control plane and the data plane of an IPsec gateway, Protego achieves high availability with active redundancy. Furthermore, Protego elastically scales in and out by seamlessly migrating IPsec tunnels between the data nodes without compromising their throughput. Our evaluation and simulation based on production data show that Protego together with a simple resource provisioning algorithm saves more than 80% of the resources compared with allocating independent VMs.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-son.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/son", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_son.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2e7c/1a2953e737ea43237c313751d3e5c5f73250.pdf", "s2Url": "https://semanticscholar.org/paper/2e7c1a2953e737ea43237c313751d3e5c5f73250", "sources": [ "DBLP" ], "title": "Protego: Cloud-Scale Multitenant IPsec Gateway", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "2e88c8e41b5a161f3c31cdd6d0606fad929b74c4": { "authors": [ { "ids": [ "1682591" ], "name": "Richard Wolski" }, { "ids": [ "1793260" ], "name": "John Brevik" } ], "doi": "10.1109/CLOUD.2017.31", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.31", "entities": [ "Cloud computing", "Scheduling (computing)", "Service-level agreement", "Signal trace" ], "id": "2e88c8e41b5a161f3c31cdd6d0606fad929b74c4", "inCitations": [ "b92de6bb9ec1c146b3f960b64085d5d164519ddb", "9407934ac72ffafda54b3b413fd0965f704674cb", "776a4af8e84d06cad3ce9d3d3e481f8e59ce5a60", "4b84e9ed6cff0979cbc7dd69bf7a6eaca1162ca4", "8e87cefc81c12eab8d27a8e71000d76977e17b61", "2073b57bc77fd2ca382e93b6053f3f395435cd4f", "b42c442f3b1658db182cfa237f4bba8280d3cee8" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "179-187", "journalVolume": "", "outCitations": [ "0f44833eb9047158221e7b3128cde1347b58ccd6", "7e88f9389ea712f7b0baee491df1718058c0832a", "56536c7181b5e7ac36c7706d9708fe115b1a0952", "17180b01e75876577f881f664e8d8334e279205f", "d8057d514036d51051af78476468fe350cb7488a", "ca8a803c608c489efeff0fdc51384dc033895a16", "d979ccabe675967ca9ad065ae292b43bbbf67dee", "481a5ac6c9ecbc75225f2e9a2f0a1199b9db372f", "dcedcc233c1f113760e3c7e46128e04fd02901be", "5ea507655b0bc8c1842342c9d47ccf20b9087e94", "48a6b370460dc8e6ce9c5a45eb39cf1fb654f1f3", "e0b0b8298c40102d8c5d4704d7ffd7f2300b9602", "3cd6ad03a55a0450f34043bc5091cb9a6827255f", "f5fed3c82ae151f6e04af84f25c38c31a21e39f3", "4e6ed452f518281e79c1dd5fb3a1940265cf0841" ], "paperAbstract": "In this paper we describe a new, efficient predictive scheduling methodology for implementing computing infrastructure power savings using private clouds. Our approach, termed "QPRED," estimates the quantiles on the distribution of future machine usage so that unneeded machines may be powered down to save power. A cloud administrator sets a bound on the probability that all available machines will be powered down when a cloud request arrives. This target probability is the basis of a Service Level Agreement between the cloud administrator and all cloud users covering start-up delay resulting from power savings. Our results, validated using activity traces from several private clouds used in commercial production, indicate that QPRED successfully reduces power consumption substantially while maintaining the SLAs specified by the cloud administrator.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.31", "http://www.cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master-tr.pdf", "http://cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master-tr.pdf", "http://www.cs.ucsb.edu/~ckrintz/papers/qpred17.pdf", "https://cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master-tr.pdf", "https://www.cs.ucsb.edu/sites/cs.ucsb.edu/files/docs/reports/master-tr.pdf", "http://cs.ucsb.edu/sites/www.cs.ucsb.edu/files/docs/reports/master-tr.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e88c8e41b5a161f3c31cdd6d0606fad929b74c4", "sources": [ "DBLP" ], "title": "QPRED: Using Quantile Predictions to Improve Power Usage for Private Clouds", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "2e8e25b722faaa8791a61c9feb96db64a51a973f": { "authors": [ { "ids": [ "2473889" ], "name": "Peizhao Ou" }, { "ids": [ "1707313" ], "name": "Brian Demsky" } ], "doi": "10.1145/3018743.3018749", "doiUrl": "https://doi.org/10.1145/3018743.3018749", "entities": [ "C++11", "Central processing unit", "Compare-and-swap", "Concurrent data structure", "Correctness (computer science)", "Data structure", "High- and low-level", "Linearizability", "Memory model (programming)", "Multi-core processor", "SWAP (instrument)", "Software bug" ], "id": "2e8e25b722faaa8791a61c9feb96db64a51a973f", "inCitations": [ "06dfabcf4e4b2f5887b404a8776ca6609fb4a44c", "22b283679a9f1127a9d6db96702be5b2af361808" ], "journalName": "", "journalPages": "45-59", "journalVolume": "", "outCitations": [ "8d082dc4f8b37ccdba60420fa4e76d036715b55e", "14b3ac198613899aba56c5af66fb3bc6b8636db9", "247e1138da869a58800945ea2430637b502bf34d", "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "12fe89979a7b7052f9e8b90925f96748ae05127e", "19e4e701f4a885875cd54f81c9c147c756811a34", "21161c8efa04cd2ec2e4f121fc720d7e2ffc4e38", "5eef609f21fc9327e551ab40425f7f1715c3e200", "f8cced0649e290c421c5f9dba9a6f014db73a081", "987adbbb4b5baff729cf3907d7f05a86e8651849", "21cd07a1ac2c9db4b44c862f053b26ec856c761c", "7c8f5897a30b4ea4cbfe73d74302dbe9a125a254", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "03d94d57020d7685f373460ac3b0a1551d330134", "358f17d968420850dde44d6bef11b5e7e5db5b76", "0a20fdd98c5427551e80ecddae73f2995bf0fdc2", "413cb63560d7eba323d810022e396363574303db", "382eb351cbb0ae7137aa23c4dff297d8c9624a61", "9c0ec4b2cda00b6d5ea52e6b01a068fdc45b8dff", "406ee6ce01dbc906ad07a3c89a60c7d8b2252a9a", "25d4d5aa3c0ea9b4b1084bc50fefb05bf3f6b2e1", "2167f19b6679b5c5cbe9b39dd3e842514f384795", "2a439d9fab53f6d04fb9f69a28a8b85c13cd1bad", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "68694b8c85fa5b88c7d150fae7a8fa6ab9386b5a", "719a7fd97d662e55e3cf8b2c6b99219b908eebea", "6013e099722e2dba1399452e8df4fd02e947b960", "2343ade036434ef11b2af3dab001f807e628f72c", "246a86f20dcceecc2e30b42323eb9b55f8eadc6f", "19aab49210282cc19ec4fec06bed029a06497bf8", "4bd41d10760ebcf7adfbab6fd02a12719d095ef6", "2900690eb3132a4d1536226d629727de41f38a66", "9ac0c2bb009443cadd571195bd99a3bcddb4e791", "655abf918e5ebc49bec229ecc29d4e7dda512698", "02a242e9aa14864858cd6eb84e86161d487a5cbf", "2814d43ef6c8811d6844e3125dd3d4c87c2e226a", "b5433ceb33dd9e1c2a9daf783152ac6328728ded", "382501ee3b61ba6ac7ea9c7662a5a5015ea86b14", "9f6000aa655375daeebdb489c2fd729c80ecba6d", "f18a9b83021af98449d9c8374a0775d6d672504d", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "42142c121b2dbe48d55e81c2ce198a5639645030" ], "paperAbstract": "Concurrent data structures often provide better performance on multi-core processors but are significantly more difficult to design and test than their sequential counterparts. The C/C++11 standard introduced a weak memory model with support for low-level atomic operations such as compare and swap (CAS). While low-level atomic operations can significantly improve the performance of concurrent data structures, they introduce non-intuitive behaviors that can increase the difficulty of developing code.\n In this paper, we develop a correctness model for concurrent data structures that make use of atomic operations. Based on this correctness model, we present CDSSPEC, a specification checker for concurrent data structures under the C/C++11 memory model. We have evaluated CDSSPEC on 10 concurrent data structures, among which CDSSPEC detected 3 known bugs and 93% of the injected bugs.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018749", "http://plrg.eecs.uci.edu/~peizhaoo/CDSSpec_preprint.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e8e25b722faaa8791a61c9feb96db64a51a973f", "sources": [ "DBLP" ], "title": "Checking Concurrent Data Structures Under the C/C++11 Memory Model", "venue": "PPOPP", "year": 2017 }, "2e9d33cba9f547a2e3febe088bae443f1d74d594": { "authors": [ { "ids": [ "2719313" ], "name": "Linghao Song" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" }, { "ids": [ "38929644" ], "name": "Hai Li" }, { "ids": [ "5442167" ], "name": "Yiran Chen" } ], "doi": "10.1109/HPCA.2017.55", "doiUrl": "https://doi.org/10.1109/HPCA.2017.55", "entities": [ "Algorithm", "Artificial neural network", "Batch processing", "Computation", "Convolution", "Data dependency", "Deep learning", "Graphics processing unit", "Parallel computing", "Pipeline (computing)", "Random access", "Random-access memory", "Resistive random-access memory", "Throughput" ], "id": "2e9d33cba9f547a2e3febe088bae443f1d74d594", "inCitations": [ "1cc0b5aaa294e56c6d20ec672ad7f89972227ff3", "e629e31f7fcbb8e0197f5f8d3d24f48f09d2d278", "381e7525bc8b9d47ae0343e471f5f1d5e6963bbe", "96bcfbb766744dbd2127ca50f4411a8857849dad", "e530b5dbced106b72ecd0d1ef542d2c9eaf00856", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "bf5a2bed3a98f6dc093460a6592a5d1b99a60ae5", "5fa68bb091d5a46c67c341d9c3d3b37442431abb", "270ac71d9fdfbeb27c296865a27ed068207a330e", "cb14f2138d6de2f459841430aa52a2b6f2d3fc90", "7f7e5559c6d25b8d42c61d83988fded70e3b10d1", "5ce80b41443518a14d800f6b93b4057bbb007432", "ab74a44813207a7728e1e13b7dfee1347fc20c3b", "7af861caca22d40f2680abaa0e1d7e46f6418acb", "29d80ba32a2fbe36aab8d4424802039d8be4f370", "6c07e102414ddf46551026c3cb9e4f93ee17c1ca", "de58743ea346bc5f69dbedecf440f87e5710f3d4", "1769a24a37dc3a3492c5ad686f8c11fb8789f5c9", "5a831b0846fc8fbaf1befa27a3f91ed89f865133", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "99a9304cc17be64d2e779c1ca5f824e7b52f4261", "6f537c85b5160a6375306f6eca1a3e8558e7dbd9" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "541-552", "journalVolume": "", "outCitations": [ "6a8851358df3ecd5164a417110aed4037793f64f", "0ec0e80ebb61ddf97dc26cea65e5013b6de998b1", "534f6ea4ce0127e5da7f1cafb6334b59ad15b83f", "8ed8b196e71ce675772a390c669d9b3426f8dddb", "211a125c77da70a958d1dc9f70ecc29b9a69f796", "f2c2fbc35d0541571f54790851de9fcd1adde085", "68837728232463651283edbb7ef0c93b2f502b2b", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "069eafae5ee9df25ff5c457bb636f73b98d8f6e9", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "1a07186bc10592f0330655519ad91652125cd907", "398c296d0cc7f9d180f84969f8937e6d3a413796", "eef1f3f44d249a37c8382f77ed9770b62e8bc158", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "01959ef569f74c286956024866c1d107099199f7", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "318a5a4119b27ca433e037a0f1a23f609a2845d4", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "a8e8f3c8d4418c8d62e306538c9c1292635e9d27", "1e40d8b7ccac6afbfdf5c89f203f368735e051f9", "c2cd8dee0a1d732858b41f27e4aeb0389f270563", "46f74231b9afeb0c290d6d550043c55045284e5f", "0404d8f580496852b7bd9ff65e30fded0c7f797f", "464b5d7f1a114eba10c344aea92339177c8adecc", "1e2e423a8e45e430be916ae92011a2e8cb8c805c", "6cabaaacd2fec7509be155b45c8e4e9352a7f442", "4a72e2599548361d4a4a3fb1249f17e842bcf681", "2b88cc9988d70a950b3eb50e99c8f0a6722210b3", "5f0ac9b48c392abff9773e36c11dd245a5e4eef9", "1bfea750e709e1720ec451bdf6bd57b55d85e3cb", "5d4c19546369ddcd68b2314be24996f0017d673b", "9e3c09b071088ab75cb5f977a0965234132be707", "b8d608ab84ce81e8c4df90503e0723f75005161c", "42f54d1656f40dbc41548d2dbbb00326005c7d12", "03ff3f8f4d5a700fbe8f3a3e63a39523c29bb60f", "7cd29ed1da71593bfb79b553ba6c5ee39ccf7a7b", "d67175d17c450ab0ac9c256103828f9e9a0acb85", "06902cb95ede2c305db4000852014f276b25c082", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "749076c5d579ef469e65bff964a1f2bffe0cc202", "061356704ec86334dbbc073985375fe13cd39088", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "2c709915e1fa01a21580096ece6c373fae69f1f7", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "3b2697d76f035304bfeb57f6a682224c87645065", "97158a13a871720757114a8dcb8d8f4e104d8693", "3b049d8cfea6c3bed377090e0e7fa677d282a361", "235fa2b1983eff9f13b27c620cda389359126bf4", "93e6e8532b4fdb5170d4a57fd492041946c16e18", "304a4823e540294704cc624f4e01c50cd1d291ec", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "179f80848143cf109fa6aebae6c3844da03b062c", "132e3d3b5cfc2f59db6ed69ac1eac4a1ee6dca71", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "6f93e0325e577f49f4bed46a2adcfee4a649dc83", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1" ], "paperAbstract": "Convolution neural networks (CNNs) are the heart of deep learning applications. Recent works PRIME [1] and ISAAC [2] demonstrated the promise of using resistive random access memory (ReRAM) to perform neural computations in memory. We found that training cannot be efficiently supported with the current schemes. First, they do not consider weight update and complex data dependency in training procedure. Second, ISAAC attempts to increase system throughput with a very deep pipeline. It is only beneficial when a large number of consecutive images can be fed into the architecture. In training, the notion of batch (e.g. 64) limits the number of images can be processed consecutively, because the images in the next batch need to be processed based on the updated weights. Third, the deep pipeline in ISAAC is vulnerable to pipeline bubbles and execution stall. In this paper, we present PipeLayer, a ReRAM-based PIM accelerator for CNNs that support both training and testing. We analyze data dependency and weight update in training algorithms and propose efficient pipeline to exploit inter-layer parallelism. To exploit intra-layer parallelism, we propose highly parallel design based on the notion of parallelism granularity and weight replication. With these design choices, PipeLayer enables the highly pipelined execution of both training and testing, without introducing the potential stalls in previous work. The experiment results show that, PipeLayer achieves the speedups of 42.45x compared with GPU platform on average. The average energy saving of PipeLayer compared with GPU implementation is 7.17x.", "pdfUrls": [ "http://alchem.usc.edu/portal/static/download/nn_memristor.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.55" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2e9d33cba9f547a2e3febe088bae443f1d74d594", "sources": [ "DBLP" ], "title": "PipeLayer: A Pipelined ReRAM-Based Accelerator for Deep Learning", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "2eda52d7a1723df6eee46d69496fd576e5787575": { "authors": [ { "ids": [ "2487014" ], "name": "Yu Feng" }, { "ids": [ "1872924" ], "name": "Ruben Martins" }, { "ids": [ "8099954" ], "name": "Jacob Van Geffen" }, { "ids": [ "1714075" ], "name": "Isil Dillig" }, { "ids": [ "35865989" ], "name": "Swarat Chaudhuri" } ], "doi": "10.1145/3062341.3062351", "doiUrl": "https://doi.org/10.1145/3062341.3062351", "entities": [ "Algorithm", "Approximation algorithm", "Component-based software engineering", "Data science", "Natural deduction", "Online and offline", "Partial evaluation", "R language", "Scalability", "Semiconductor consolidation", "Simultaneous multithreading", "Top-down and bottom-up design" ], "id": "2eda52d7a1723df6eee46d69496fd576e5787575", "inCitations": [ "33de4502da805dd10769d2412fd04ba5ad7867f7", "02e770fe56cc33834c8e81e35ed39074471997f7", "0a4ac94fc2434ca06f30c0caec96217e1c9896db", "a978a05e5b6983fd45113840e0d82b0293c010c8", "59862605432f2e9cd44ba036d0c2dc4907366bf6", "791714728fefcb067fb6b56c7f4de093d536cf00" ], "journalName": "", "journalPages": "422-436", "journalVolume": "", "outCitations": [ "2f63cffb283166bb09076dffd77e2bb1f39a6d48", "52abf8bad6b2007e72446aaf74874556fb6b2dad", "93048dc9441985260fdebaf3a9d2654696e98f87", "7149d00b10c8865a455d151595dd82a4880e3303", "21b2e1056b7fedac3f2c61e563d19a1cc0784f81", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "426a2eb44a8f947edf9a92288e80fd0d6b515de2", "2ca30d7cf4716fe81097472a37deaed3a33de9da", "3ba82ae0647dc5f8c8173307f22df68d61558dc5", "31a816f4fef768f29772a003e534b1378611bfe6", "1d63a9e3751293eda942b0db2891919b3b92996c", "197a7fc2f8d57d93727b348851b59b34ce990afd", "208e7934d900055b43b8b60e4a807ac00674ec4a", "67d18339ed72b7fc2152cb42b63362b570c11946", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "238be0efe497fc297013ae16109fbbd2ee3d9733", "1ef301c1b275091b6a50d620b41df4722f2108f0", "0af1c24e00dbf342517df2f50698502e3d793ea8", "11256a3695e1313bc0989935a94ee80342e25cd1", "3aa02c3771e9c4f0fc3b54cc6bbb25d9b19e1838", "e10d235a7d485894eb0c58ef8af4249c67e0c6d3", "807b9c73800f59ac191c0a43c242ef79ba5ec253", "05c8103e1b77437875a4c69c6258be988ab2946b", "4e7e046ea3b7dab2346ef2baaf51c76bf3ea964b", "99cc63730e3079ed58311a4ec88f4f0c891ed61d", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "1b6f9500c08846312d5fffdece51c127a3725250" ], "paperAbstract": "This paper presents a novel component-based synthesis algorithm that marries the power of type-directed search with lightweight SMT-based deduction and partial evaluation. Given a set of components together with their over-approximate first-order specifications, our method first generates a program sketch over a subset of the components and checks its feasibility using an SMT solver. Since a program sketch typically represents many concrete programs, the use of SMT-based deduction greatly increases the scalability of the algorithm. Once a feasible program sketch is found, our algorithm completes the sketch in a bottom-up fashion, using partial evaluation to further increase the power of deduction for rejecting partially-filled program sketches. We apply the proposed synthesis methodology for automating a large class of data preparation tasks that commonly arise in data science. We have evaluated our synthesis algorithm on dozens of data wrangling and consolidation tasks obtained from on-line forums, and we show that our approach can automatically solve a large class of problems encountered by R users.", "pdfUrls": [ "https://arxiv.org/pdf/1611.07502v1.pdf", "http://doi.acm.org/10.1145/3062341.3062351", "http://www.cs.utexas.edu/~isil/pldi17-morpheus.pdf", "http://arxiv.org/abs/1611.07502" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2eda52d7a1723df6eee46d69496fd576e5787575", "sources": [ "DBLP" ], "title": "Component-based synthesis of table consolidation and transformation tasks from examples", "venue": "PLDI", "year": 2017 }, "2edca953a00dbefa2516f71cb813546ce83096c9": { "authors": [ { "ids": [ "1720370" ], "name": "Qiang He" }, { "ids": [ "40383517" ], "name": "Xiaodong Zhu" }, { "ids": [ "2752395" ], "name": "Dongwei Li" }, { "ids": [ "2220166" ], "name": "Shuliang Wang" }, { "ids": [ "39404298" ], "name": "Jun Shen" }, { "ids": [ "1682341" ], "name": "Yun Yang" } ], "doi": "10.1109/CLOUD.2017.124", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.124", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Big data", "Cloud computing", "Computation", "Computational resource", "Data mining", "Data science", "K-means clustering", "Money" ], "id": "2edca953a00dbefa2516f71cb813546ce83096c9", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "74-81", "journalVolume": "", "outCitations": [ "3ae50b9b26ae879322e7cc06523055778eae63a7", "0bacca0993a3f51649a6bb8dbb093fc8d8481ad4", "cc6e17fe9d2525549dd73f6d8feea8f9a4c49f40", "1696397b40eaf7cede9b62a989dbe36d955d121d", "0288181f90c5f85ba219ebc4beb7c759fd052408", "cb67f13e51737b9bd36d2b9d942e4f095ca3abd6", "f720507fc8f42f6f9257fe13e6a14f6c17ea7021", "46c003ef53a0fa9bbafcdff919f5ce687d928b45", "732a0ef83b3b5674f5299b06e1c0191f09c03dd9", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "3895912b187adee599b1ea662da92865dd0b197d", "c61d90590994fd95d4eaad1396186c904fdaa162", "05e22d40ce1b4df9829c451433a03ce3f51eb41d", "ec02b379a635346ec1501e801263ea576a10ed4c", "525ac8da0afe2ba149b0e26c07bf9da1fa62650d", "4e4348913b3198ae51b784db893938ae3afecaf5", "11a963dadaec54347fdafe327512a3ad7c25bb55", "457b3796442d5944259d49f2f39c92767ffc3041", "343271490746e3dbcc4f951195365aace5298022", "1d6aebefbf69d6e423f75f10d35716be742e0730", "0b40af1ad2b9781fa14e999db2d7d3270b6d2862", "ca219dc41e6768a145ff4c793bbe45ff7fd1dc50", "8900b40061708168197c034c9e16af6031e28235", "2ad064e72abb2cdd97178817aa21a0ff2909f014", "17ca50768d706ec4bd114bbad59908115a471367", "017a7330fe2d2b5469fcc52ca933c41c2907ecca", "3320d97bf6ec4a467712577eb1f74f18fbeadba7" ], "paperAbstract": "Mining big data often requires tremendous computational resources. This has become a major obstacle to broad applications of big data analytics. Cloud computing allows data scientists to access computational resources on-demand for building their big data analytics solutions in the cloud. However, the monetary cost of mining big data in the cloud can still be unexpectedly high. For example, running 100 m4-xlarge Amazon EC2 instances for a month costs approximately $17,495.00. On this ground, it is a critical issue to analyze the cost effectiveness of big data mining in the cloud, i.e., how to achieve a sufficiently satisfactory result at the lowest possible computation cost. In certain big data mining scenarios, 100% accuracy is unnecessary. Instead, it is often more preferable to achieve a sufficient accuracy, e.g., 99%, at a much lower cost, e.g., 10%, than the cost of achieving the 100% accuracy. In this paper, we explore and demonstrate the cost effectiveness of big data mining with a case study using well known k-means. With the case study, we find that achieving 99% accuracy only needs 0.32%-46.17% computation cost of 100% accuracy. This finding lays the cornerstone for cost-effective big data mining in a variety of domains.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.124" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2edca953a00dbefa2516f71cb813546ce83096c9", "sources": [ "DBLP" ], "title": "Cost-Effective Big Data Mining in the Cloud: A Case Study with K-means", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "2ee14b821b4084aa1b9237390e280daf450419e5": { "authors": [ { "ids": [ "2329502" ], "name": "Marco Chiesa" }, { "ids": [ "1696115" ], "name": "Daniel Demmler" }, { "ids": [ "1709876" ], "name": "Marco Canini" }, { "ids": [ "1718880" ], "name": "Michael Schapira" }, { "ids": [ "37636206" ], "name": "Thomas Schneider" } ], "doi": "10.1145/3143361.3143362", "doiUrl": "https://doi.org/10.1145/3143361.3143362", "entities": [ "Computation", "Confidentiality", "Digital footprint", "Dynamic dispatch", "Internet privacy", "Peering", "Privacy", "Scalability", "Secure multi-party computation", "Telephone exchange" ], "id": "2ee14b821b4084aa1b9237390e280daf450419e5", "inCitations": [], "journalName": "", "journalPages": "120-133", "journalVolume": "", "outCitations": [ "122995724822f98b00a837a51700c12911d9887e", "20b5b5c25e2b56693b38fe7f69caddca78872085", "b0e32f83369313c18e3ad38b47a0f0dbe42decac", "4929d3bb880698a5f3ca27ff66e5d272c19c1088", "407947d5e679499cfedf581acb202b2364bb6463", "0b9c4145856c2cf2ec0ec348eb7a898fa9376726", "10f328c410d4ec36127ed7aa9fad1ba2a416e38e", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "41ce63a9f52ed060d6cbddc94d9805c60d809bef", "b273f47f97fc3f1ed922c3effda9ab88c52a1680", "3a956b0ff66f4acda00a91e4ba731569515e8806", "ce0a85240f8f2abf24d735f34cceb55907237a80", "0f53da522a998b677e99b2d30c3d3dddc9599a79", "375f5df1ccc60618e45ec0d6b48dc675209527c2", "128d0bbc8e09de492b7f92dcd3c7f2e2a0a36144", "33f45a4c8c4f86727c5073891d5629f3ebfbfc9c", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "6871b95c14dccca7636b498b5d363a743c5288e6", "c2ffc5a9163f3e97d5d466c9cf3e9325e3dd43fe", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "379010410434731a05d77728fe26ed5a7b53985c", "090e7e85833ef4d097fb7eaf099eb4a3d2c72683", "395867fa1708ad9ef3572ab9be0b34b203707be3", "61db5c046203d08bee7c3abbb996314a2d275e5d", "0701f960511a0deb2ba7af9000db478ef0736839", "1a68d1bbb2eab66239e51b26b7636c453f505b3b", "e50ae4d480d84c7cbdb8edcebf13e57f5a47c8ad", "6cfe1e553cb48c7087bb61e80031c415978a4ede", "25810015c2ff27803089069e393b2868343c9d98", "19c3736da5116e0e80a64db35afe421663c4b4a8", "1e8996bb03d45f2fbd25336441e9c3a67c4c1484", "603457793cf9a6056f6597420c838210342a3d2b", "b5de057f088ff09090653ab11077b1b740e2e013", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "41f6d924dd684966a9f6bc25e2266ce08cfdf4f4", "cdb352365403a6ad09cdd00232def337df0a1b96", "73478ab70b10db22457b7d537d8eef55d4c4f26a", "6223684e14778e4d7948e994d2169ebf38e0a95f", "1a05ffa0307e641ba12b0c2dbd33f10f1def2437", "0fd16916265836f99eab6a7c4e4b15dfe5ae5038", "55a62990a82bf9205e507f73d42583df46062806", "65a0c8b20fefef72bacfe69b93191cfbdfd86dc0", "1942aff3bb24d4ff9c1e8688b1104a767f0bc346", "0654bc1aeae2b46890ebd17cd7953d817fc99456", "fe5fd50ed7fc66a43e2d02f1b2870720598fd09d", "2bea63d41b8988e060a864247d995ceefc7f637c", "55ef72fe52990f491ab939b91d75b7899a66180f", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "223825035845a583fa9aa77473f3606ba9ad486a", "00f2c09fa202a3a5d8b875caf2a4a155ed89b38a", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "7154b7b4d8578e5f1510978e4802d33c3b083655", "2daa396ebf3a77e11f88ca82b2cddec0eec8dfd6", "5b2092b54860f134f78b2ec884c910750def71e6", "601ebc1627187a2b891de9c98596cab840cde759", "337b0e64f9f8be2ecf34fad6bdf31ee34c7971d4", "25518a206a45b3af9fbb68d11aa905480cf8f61d", "1bef4d26c917f0060814e86cffa9b22bea70a847", "3b06a48b190c933f1e8ac09fb1a4a24e6a32de3e", "12d93144cb16c0ebb8c2aadaefbae147513cc200", "1d69c5005858e4359f6567e13e4af84443ef10cb", "22eeb4735c3eb51b0a7813b5e644077f1f28f830", "097ca8b402d3eb1ee125396dc2e36b1d7713a5ea", "4643ee85d636ae2b5d7a334548b5553646925230", "8a37efc82e54353d387cfb073f9379c053988aef", "9ea2b1782e4de6719381b043c7c6d62df52c6357", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "12a53e72768fe1e50f60c59f61e4bc3ffc739f4a", "187fb09496d6b9cd4141a0917e365de28b4fc0c8", "30909df12b1b01760ae4c5406e15f302a6524446" ], "paperAbstract": "Internet eXchange Points (IXPs) play an ever-growing role in Internet inter-connection. To facilitate the exchange of routes amongst their members, IXPs provide Route Server (RS) services to dispatch the routes according to each member's peering policies. Nowadays, to make use of RSes, these policies must be disclosed to the IXP. This poses fundamental questions regarding the privacy guarantees of route-computation on confidential business information. Indeed, as evidenced by interaction with IXP administrators and a survey of network operators, this state of affairs raises privacy concerns among network administrators and even deters some networks from subscribing to RS services. We design Sixpack1, an RS service that leverages Secure Multi-Party Computation (SMPC) to keep peering policies confidential, while extending, the functionalities of today's RSes. As SMPC is notoriously heavy in terms of communication and computation, our design and implementation of Sixpack aims at moving computation outside of the SMPC without compromising the privacy guarantees. We assess the effectiveness and scalability of our system by evaluating a prototype implementation using traces of data from one of the largest IXPs in the world. Our evaluation results indicate that Sixpack can scale to support privacy-preserving route-computation, even at IXPs with many hundreds of member networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143362", "https://cdn.uclouvain.be/groups/cms-editors-icteam/scihigh2016/Canini_Sixpack.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ee14b821b4084aa1b9237390e280daf450419e5", "sources": [ "DBLP" ], "title": "SIXPACK: Securing Internet eXchange Points Against Curious onlooKers", "venue": "CoNEXT", "year": 2017 }, "2eff823bdecb1a506ba88e1127fa3cdb1a263682": { "authors": [ { "ids": [ "3034469" ], "name": "Juncheng Gu" }, { "ids": [ "8650889" ], "name": "Youngmoon Lee" }, { "ids": [ "34619093" ], "name": "Yiwen Zhang" }, { "ids": [ "2579531" ], "name": "Mosharaf Chowdhury" }, { "ids": [ "1730051" ], "name": "Kang G. Shin" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Spark", "Central processing unit", "Memcached", "Operating system", "Paging", "Remote direct memory access", "Slab allocation", "VoltDB" ], "id": "2eff823bdecb1a506ba88e1127fa3cdb1a263682", "inCitations": [ "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "a1c66612e768989e510e9c7cd584b3081973c80a", "4a1aa8a3564c2dc5546299755220d39f3ab9a373", "d62daff09dd5082b895e8355315e1ae480c87e32", "b8431ade39d7171469029a5c15542ba642e51b07", "43e9cd8221c82d1777303692f2989beb223c4530", "9bbd5be2829e49b1fac7f034baf7499cb069db95", "13e7fc7a16889060740fefe6ff864a6c182e8240", "7206aead5a341f361e6571d607f3c032e65e2f7e", "3105cd78fb5f9c62ccf0346e061579e2bcd130c6" ], "journalName": "", "journalPages": "649-667", "journalVolume": "", "outCitations": [ "47f5bba54710b0e1663e9336790cb4609d16077d", "5616fe711d3134f0f336a82548891ba86562fcbc", "490431788e3b7aa36a60a7156c55e9ab3796af04", "2510fa746a2ac5a7af009eee14a922958c9e1f2a", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "867ad29b3392965e40ede63f3cdbfdedac1c781b", "0ad8e89091eed09217e66adc98136126addc2619", "daf0cd0076b388712ea12ec4105572997fc50cdf", "028378b395dc2a11e8ccc3d994df228340fd9697", "3a043714354fe498752b45e4cf429dbae0fb2558", "60ddf74dd5b443c3bfb59fe876b42f9d6112c4fb", "3e257f01e3ee71545d824a1615c35659525b856a", "4eab97d0d1c75641671aa5b7761978322d904c5c", "28000a585fdd7b0c11b3d9a9b7008fbc4afafd92", "43776b15c034076a36b7143d58af8e04715e41d0", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "1e954c5cf302d76483ec0cc0049b4b1220077750", "29a7e3bd8a2ab6ae962de935e3b2eabf09557189", "1b6f6168fa67ec9141ac1494a7d0f28995d51d3b", "00dca7217305a31dcf5108eb7ecf862dd4827823", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "08632fe2b934ed15d3499e7321282c81adc2c390", "984953d5143743c713006474239729d2396ad3b4", "14390fd81841cc4bb3d3764042481fc0a0e89e7b", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "559e4671b87c3f76d3c485ebdaefe734323879f0", "c01d5136874e5f5b46bed068bd2da87f162718e9", "468043e200bb5844a4ce6ed906f29453894778ff", "006cd63664db53494cc61a44d5c6ebc668dc4b6a", "04ccdfc19e675b4b8439ebecddab093ab7c605fa", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "544afa259d6dfe0214f029a9fa515dd0482dbba2", "0270c2056eb50b5d4597afa722c50abf21e67a82", "013a3ba3ee318047ef0f2f3097a8d4c6208401f4", "327a02b19a60319cc35be860ad0259a5c1aef920", "0558c94a094158ecd64f0d5014d3d9668054fb97", "25a973aa67a796233c2b988eae3ae02645216e8f", "13b6dd42357b77a2e01915f164ab5740a791d2f5", "0706356c9ab6014d6b04577d38289ea8328291a5", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "624ab00ed715888d15e42246cb3c87fd13123082", "917d89c22bec482e6cc56e26c3a1bda5302e4800", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "07367703f587dbc3313cc613289c4330cebe5c8c", "07add9c98a979e732cfa215c901adb1975f3f43a", "225129e1c1626d7e1c2b5fd6146a90ffd137ba02", "6304a1e1712b5ee19880d6d1a359b9041f191979", "29a1148d75878671dc3663bf480e33d7bd91597d", "33e64874996ac6d163e4e5a97e28b617de7cc0f5" ], "paperAbstract": "Memory-intensive applications suffer large performance loss when their working sets do not fully fit in memory. Yet, they cannot leverage otherwise unused remote memory when paging out to disks even in the presence of large imbalance in memory utilizations across a cluster. Existing proposals for memory disaggregation call for new architectures, new hardware designs, and/or new programming models, making them infeasible. This paper describes the design and implementation of INFINISWAP, a remote memory paging system designed specifically for an RDMA network. INFINISWAP opportunistically harvests and transparently exposes unused memory to unmodified applications by dividing the swap space of each machine into many slabs and distributing them across many machines\u2019 remote memory. Because one-sided RDMA operations bypass remote CPUs, INFINISWAP leverages the power of many choices to perform decentralized slab placements and evictions. We have implemented and deployed INFINISWAP on an RDMA cluster without any modifications to user applications or the OS and evaluated its effectiveness using multiple workloads running on unmodified VoltDB, Memcached, PowerGraph, GraphX, and Apache Spark. Using INFINISWAP, throughputs of these applications improve between 4\u00d7 (0.94\u00d7) to 15.4\u00d7 (7.8\u00d7) over disk (Mellanox nbdX), and median and tail latencies between 5.4\u00d7 (2\u00d7) and 61\u00d7 (2.3\u00d7). INFINISWAP achieves these with negligible remote CPU usage, whereas nbdX becomes CPU-bound. INFINISWAP increases the overall memory utilization of a cluster and works well at scale.", "pdfUrls": [ "https://www.eecs.umich.edu/eecs/about/articles/2017/infiniswap-nsdi17.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-gu.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-gu.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/gu", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_gu.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_gu.pdf", "https://kabru.eecs.umich.edu/wordpress/wp-content/uploads/infiniswap-nsdi17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b6b6/8de1eaf5f28dea6596990c83f363c44ea208.pdf", "s2Url": "https://semanticscholar.org/paper/2eff823bdecb1a506ba88e1127fa3cdb1a263682", "sources": [ "DBLP" ], "title": "Efficient Memory Disaggregation with Infiniswap", "venue": "NSDI", "year": 2017 }, "2f17bcaa861dd6e6dff107e8ff39d92a24af5c74": { "authors": [ { "ids": [ "38597003" ], "name": "Guilherme Cavalcanti" }, { "ids": [ "1724814" ], "name": "Paulo Borba" }, { "ids": [ "1701589" ], "name": "Paola R. G. Accioly" } ], "doi": "10.1145/3133883", "doiUrl": "https://doi.org/10.1145/3133883", "entities": [ "Correctness (computer science)", "Open-source software" ], "id": "2f17bcaa861dd6e6dff107e8ff39d92a24af5c74", "inCitations": [ "6faefa5a2c3b7ede32bbc766110f7cfcbcb94739", "c776f5adbdef6f5885ee793e25258a8533a412c1" ], "journalName": "PACMPL", "journalPages": "59:1-59:27", "journalVolume": "1", "outCitations": [ "2a00bc8a532a501d36bf041b22416de00fa1d983", "4abb51ab3277e819bde1272edf2ae169bba8c137", "5e9bccf4164dfddf6941e44c1954fe7146a691d1", "0a3e07b4da6e2eba422964da6661910d405f02e3", "095994a9207b13366f23753aa783d67058d16a8c", "caf7737b93748839ec1a8e8bfaccc14ba2385e4e", "9a7ab21c75d486930847f396582c31231880c907", "172bb6eb4441de02b5e6058a684808a2fd701574", "124837b88ff00b933b71e9d6015c9ea698e3c700", "7c750d6d4d6dda773bb73ddc9cfacffdffcd30f6", "b21d1922f83e5ea146c152d9e8f8f280df5b2ebf", "8b2d3bd91bd51d0ca197670756d031e348b794df", "f6c6310f9364dcf0a3bd202ef217866ceaebfd13", "ac531be267ad831d8b22754522353bdf84e1ea26", "230bb7040fbf81eee7b395957c0c6be95f65440b", "005fbcd6a7b5261ee4d679be48808cf1ecca3909", "4c11a7b668dee651cc2d8eb2eaf8665449b1738f" ], "paperAbstract": "While unstructured merge tools rely only on textual analysis to detect and resolve conflicts, semistructured merge tools go further by partially exploiting the syntactic structure and semantics of the involved artifacts. Previous studies compare these merge approaches with respect to the number of reported conflicts, showing, for most projects and merge situations, reduction in favor of semistructured merge. However, these studies do not investigate whether this reduction actually leads to integration effort reduction (productivity) without negative impact on the correctness of the merging process (quality). To analyze that, and better understand how merge tools could be improved, in this paper we reproduce more than 30,000 merges from 50 open source projects, identifying conflicts incorrectly reported by one approach but not by the other (false positives), and conflicts correctly reported by one approach but missed by the other (false negatives). Our results and complementary analysis indicate that, in the studied sample, the number of false positives is significantly reduced when using semistructured merge. We also find evidence that its false positives are easier to analyze and resolve than those reported by unstructured merge. However, we find no evidence that semistructured merge leads to fewer false negatives, and we argue that they are harder to detect and resolve than unstructured merge false negatives. Driven by these findings, we implement an improved semistructured merge tool that further combines both approaches to reduce the false positives and false negatives of semistructured merge. We find evidence that the improved tool, when compared to unstructured merge in our sample, reduces the number of reported conflicts by half, has no additional false positives, has at least 8% fewer false negatives, and is not prohibitively slower.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133883" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f17bcaa861dd6e6dff107e8ff39d92a24af5c74", "sources": [ "DBLP" ], "title": "Evaluating and improving semistructured merge", "venue": "PACMPL", "year": 2017 }, "2f20b9d441d12ecb17e6d8b2564835863144f8ab": { "authors": [ { "ids": [ "1860892" ], "name": "Di Jin" }, { "ids": [ "2479152" ], "name": "Danai Koutra" } ], "doi": "10.1109/ICDM.2017.28", "doiUrl": "https://doi.org/10.1109/ICDM.2017.28", "entities": [ "Anomaly detection", "Experiment", "Exploratory testing", "Feature selection", "Principal component analysis", "Program optimization", "Structure mining", "Synthetic data" ], "id": "2f20b9d441d12ecb17e6d8b2564835863144f8ab", "inCitations": [ "968605c99dc6f40cbfeb4cf6e3322fee531e667f", "532c30eccd3629f55c4818bae98223a81126cf5d", "8dcf09644b56f32802fb59796bd777433ee47510", "1850e9b7b72dfcb178b8b813cc6110dd5ee1147b" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "187-196", "journalVolume": "", "outCitations": [ "0d06de003e8ca949b3b39f9a51750c050addb997", "945c05d471ef987e387f5fedf00c167181da8c8e", "cb1dc33a1c6f9e00a1445a7d485fcd5a6f2849f4", "2ef9255a5740f35b754dde050ec36f44db0f28ad", "199369d8eaff23e00c106ef2ddc4181696600c1f", "08468bb51eaf13a4925f5465200d3fa2ffd69833", "24e45f66c6ddfbfe1430ade5522709d51d908722", "52526fd96abe1b0d6a275c768d10fc30b26387cc", "99b8f95c06669ffa4176e68f4efe85f6deaebfed", "acb20be4e5ccc882782c076feb7e7c34cb1424a0", "b1eb965147a249a6ea7fc6f62ea4297b8c4832d4", "658f760273b4739797397b11a1bdced2b9a234ee", "62c95eb8ccc24aa83202af8a04516594fbc645e9", "59cdf849049627e4c30f3bd866e3a7e03e893251", "c2cc03904293a0890a54d83c1cdb7b4d29d2fbd3", "31699c35f42e4d9d108b4c595f9cea9655f5022e", "941a10fe4d910cb6dd546146ddaccf0b86a2518e", "45969603820a845d531cdf3fb8dc7bd0c4198dc2", "4781b899447abc3439eb785281aa754126f1d818", "107983e8e6318091813eea0988422c90d82a3841", "5264ae4ea4411426ddd91dc780c2892c3ff933d3" ], "paperAbstract": "Given the soaring amount of data being generated daily, graph mining tasks are becoming increasingly challenging, leading to tremendous demand for summarization techniques. Feature selection is a representative approach that simplifies a dataset by choosing features that are relevant to a specific task, such as classification, prediction, and anomaly detection. Although it can be viewed as a way to summarize a graph in terms of a few features, it is not well-defined for exploratory analysis, and it operates on a set of observations jointly rather than conditionally (i.e., feature selection from many graphs vs. selection for an input graph conditioned on other graphs). In this work, we introduce EAGLE (Exploratory Analysis of Graphs with domain knowLEdge), a novel method that creates interpretable, feature-based, and domain-specific graph summaries in a fully automatic way. That is, the same graph in different domains–e.g., social science and neuroscience–will be described via different EAGLE summaries, which automatically leverage the domain knowledge and expectations. We propose an optimization formulation that seeks to find an interpretable summary with the most representative features for the input graph so that it is: diverse, concise, domain-specific, and efficient. Extensive experiments on synthetic and real-world datasets with up to ~1M edges and ~400 features demonstrate the effectiveness and efficiency of EAGLE and its benefits over existing methods. We also show how our method can be applied to various graph mining tasks, such as classification and exploratory analysis.", "pdfUrls": [ "http://web.eecs.umich.edu/~dkoutra/papers/17_EAGLE_ICDM.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f20b9d441d12ecb17e6d8b2564835863144f8ab", "sources": [ "DBLP" ], "title": "Exploratory Analysis of Graph Data by Leveraging Domain Knowledge", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2f3799a21c7b1d9c226332e42f4702122241fcf1": { "authors": [ { "ids": [ "3164047" ], "name": "Usman Shahid" }, { "ids": [ "39416427" ], "name": "Shehroze Farooqi" }, { "ids": [ "33098267" ], "name": "Raza Ahmad" }, { "ids": [ "34616778" ], "name": "Zubair Shafiq" }, { "ids": [ "1795211" ], "name": "Padmini Srinivasan" }, { "ids": [ "1685939" ], "name": "Fareed Zaffar" } ], "doi": "10.1109/ICDM.2017.52", "doiUrl": "https://doi.org/10.1109/ICDM.2017.52", "entities": [ "Blade Runner", "Dictionary", "Spamming", "Stylometry", "Text corpus", "Web search engine" ], "id": "2f3799a21c7b1d9c226332e42f4702122241fcf1", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "425-434", "journalVolume": "", "outCitations": [ "7903d62d97676610f9205e83975b3ebcc2a0ea99", "5bd7f2cd44b03e9f8a392f5f5a7511376105555c", "0eb67087452418b61065511cfed6097be93178bf", "d8d816585c36c32cd1bb145a4025da150801ef03", "050dc61904fb3ab1add296ad98a7aefbcd8020a5", "096d7d2e9b3fbc37f1c4e75b1896ae3797950ef9", "69a076f962b868b0e276ddb0399041ba54f736d9", "46f766c11df69808453e14c900bcb3f4e081fcae", "11aaae1167725f340c937698eb20757a829aabbb", "d4ad85d16da39ff216877ded4d15673e4301763e", "20163f0e34cbf6405033fe258de095dcbdff4db4", "045e2001fa31884da07ba9f1c9b2ac92c1df6dcc", "653eddac5447381e9a7e221498973296e8eb732c", "d055444c58fffebebe2cfba521fb73ad80bf7991", "3cf36724a6b960078e0fa8783265cf30af3c92d8", "06589346b81ff118fabdfb2499c88b2c45bdbd7e", "862bfb5a02943539ab2c6d3a7f5ebb817cfb217e", "31c7004afa43c587302ad5c7d0c9cab2f6485483", "792cf600eece556bb44a7036f158a19de4880b75", "0f82d49b571e4d75e5cfeda572fe4795757bf2c7", "5cbe5b9f6b2c293e1b1f0f376e084e7771cf9737", "061a1d8c72f89763b63600833c0292074e3b8759", "9520a7bf0ed28554de7fdabd170446485e5bf621", "6bc73c422260bff6d72ba96fe663a3d4fdc26430", "8752434bf4383fbf1422b86e5632d51606856bb2", "48ce15ce00bc029f53c7e624799c401f18e66ce3", "565fe5516bea26810560d5f01b692912724ea5e9", "0d30e15147fd45fffa8115a853d8d05bd434eb31", "1b8351e7f15d7bb538c83a6ad42e71e32253b117", "ecce2d601a45d7c6e63444f85ffe5212dde76e38", "83a96734000f6733ed92b3daf0c6ffa1528e5d39", "3334a80676fefc486575bd2ddf1b281a640742f1" ], "paperAbstract": "Spammers use automated content spinning techniques to evade plagiarism detection by search engines. Text spinners help spammers in evading plagiarism detectors by automatically restructuring sentences and replacing words or phrases with their synonyms. Prior work on spun content detection relies on the knowledge about the dictionary used by the text spinning software. In this work, we propose an approach to detect spun content and its seed without needing the text spinner's dictionary. Our key idea is that text spinners introduce stylometric artifacts that can be leveraged for detecting spun documents. We implement and evaluate our proposed approach on a corpus of spun documents that are generated using a popular text spinning software. The results show that our approach can not only accurately detect whether a document is spun but also identify its source (or seed) document - all without needing the dictionary used by the text spinner.", "pdfUrls": [ "http://homepage.divms.uiowa.edu/~sfarooqi/Files/spinningdetection-icdm17.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.52", "http://homepage.divms.uiowa.edu/~mshafiq/files/shehroze-text-spinner-icdm2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f3799a21c7b1d9c226332e42f4702122241fcf1", "sources": [ "DBLP" ], "title": "Accurate Detection of Automatically Spun Content via Stylometric Analysis", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "2f4684a58a63a9addcf11d2f6212d7d95160733d": { "authors": [ { "ids": [ "3110521" ], "name": "Qirun Zhang" }, { "ids": [ "34751114" ], "name": "Chengnian Sun" }, { "ids": [ "38319925" ], "name": "Zhendong Su" } ], "doi": "10.1145/3062341.3062379", "doiUrl": "https://doi.org/10.1145/3062341.3062379", "entities": [ "Algorithm", "C++", "Clang", "CompCert", "Compiler", "Data dependency", "Enumerated type", "GNU Compiler Collection", "Identifier", "Optimizing compiler", "Regression testing", "Scala", "Software bug" ], "id": "2f4684a58a63a9addcf11d2f6212d7d95160733d", "inCitations": [ "689562ef27645c7fff28fec43774676bd180ac1d", "ae219ae071fb77bdbd252437a4684816fbea2b36" ], "journalName": "", "journalPages": "347-361", "journalVolume": "", "outCitations": [ "20fd34fbcf9c5b7991b3c7d01b85b2e2f2b92a04", "826b0a0d754c82fa18fb15587163862625cf518f", "fc881e8d0432ea8e4dd5fda4979243cac5e4b9e3", "5078e95c3f687416f2ebe75fdc164f75e3baaa87", "4e177c25f97220d33dcc222485d951c817a77750", "072b37d00d0960f4dfab882eaa59c20154b14b7f", "0cb3e50ab75ffd0b9b7f44a560975d5d27ddc58f", "6f18a235985a4f6da097aded3ef7e35107feb4da", "d8b4164fef65ffc7082a3c95b0a706e5c3aa38f9", "0cf9a1c966436426b65195d2578c51e4e6737555", "782d823cb0499a969f3cd0ca7aaba0bcf892fba3", "30a28506c804139c6a3716c2d0980a62a928342c", "0b919e1e9668a236f2c26e860d551a192aa004af", "05f0c383c785f168da8e80c903517ec5fdf71d41", "8fb5fe430419145d32fa3e4c225d4b36b4956e16", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "3369e43abcb499eea4d208f2239df00551b8d2dd", "269344e7eabb9820eb91031cd4bcdcb5eba39e3d", "43c72b790778098b5534d7e02a815e49c10090dd", "2460c9b40ea3c4bbaef53c5f4ad2717154cf15b5", "4ed39d2773b0b0818e8c37fabe1894cf63d55772", "c9f225b7585f472d6002909efa0bb17c232dc511", "d4411711d9efe36e4c71cf5da83d249e7119479f", "a05e223169ab022f800bf9f2664847919844cab9", "2bbda639ab417ca79d5948e393171e62ccdd7367", "1a487fbe5add8d02af6d22f2b277d0ee0f54ea12", "05d28dfb947b040b604ccfcef824c44033b122cf", "011f7da0095ac8c0d4477eeda2728e5f80a35767", "0b7c1bc9636d8cc66c36fb7e676d3badfe5df696", "0f978ed8a9592ea35e286230741bf3b12473ae2a", "2fa83a4555cc148ab71ac2033ba99b5507e65d29", "0ecac51f093db517b11c0853fa761f2b36d17201", "46aabcca02c47095b29f934e8d7198bf4c58c27d", "5680b51c34fae849be22a0a59ab828a1d9cde403", "0efda63ce44492d67dce5982175b91889be21fbc", "6f616f20e4ae76492b2c7ca0d98742a1e1d3de3b", "006a8a2b6cb24e11fc296d594a020c0c44bb3dff", "43355917bdfeecc08c64acfcbc2ea7ddbd1a806b", "6b201a6c630535743cd04f2421ce1ac1e3ee836c", "45bd9c91e174b4f6cd9cbb7004fd8dbb3f5f26a1", "31147e5b293aa18e414bd125f0eccbf42ae2caba", "02838cb6982e67992ae54fa616162b16ce5110c6", "ec23a18bacf7528b0dd065619bf49dc8b33783d2", "8d3044b2dda25065ad047c91c1291eab3c42e814", "c5de89e46de617992e1b6fb25014d02281f606e8", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "2122f9ca2570d9fa2e6f94f6b7be6733640976f1", "0b502cf1982d321f4f90575d06c31ae8c26c6256" ], "paperAbstract": "A program can be viewed as a syntactic structure P (syntactic skeleton) parameterized by a collection of identifiers V (variable names). This paper introduces the skeletal program enumeration (SPE) problem: Given a syntactic skeleton P and a set of variables V , enumerate a set of programs P exhibiting all possible variable usage patterns within P. It proposes an effective realization of SPE for systematic, rigorous compiler testing by leveraging three important observations: (1) Programs with different variable usage patterns exhibit diverse control- and data-dependence, and help exploit different compiler optimizations; (2) most real compiler bugs were revealed by small tests (i.e., small-sized P) — this “small-scope” observation opens up SPE for practical compiler validation; and (3) SPE is exhaustive w.r.t. a given syntactic skeleton and variable set, offering a level of guarantee absent from all existing compiler testing techniques. \n \n The key challenge of SPE is how to eliminate the enormous amount of equivalent programs w.r.t. α-conversion. Our main technical contribution is a novel algorithm for computing the canonical (and smallest) set of all non-α-equivalent programs. To demonstrate its practical utility, we have applied the SPE technique to test C/C++ compilers using syntactic skeletons derived from their own regression test-suites. Our evaluation results are extremely encouraging. In less than six months, our approach has led to 217 confirmed GCC/Clang bug reports, 119 of which have already been fixed, and the majority are long latent despite extensive prior testing efforts. Our SPE algorithm also provides six orders of magnitude reduction. Moreover, in three weeks, our technique has found 29 CompCert crashing bugs and 42 bugs in two Scala optimizing compilers. These results demonstrate our SPE technique’s generality and further illustrate its effectiveness.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062379", "https://arxiv.org/pdf/1610.03148v4.pdf", "http://arxiv.org/abs/1610.03148", "https://arxiv.org/pdf/1610.03148v2.pdf", "https://arxiv.org/pdf/1610.03148v1.pdf", "https://arxiv.org/pdf/1610.03148v3.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f4684a58a63a9addcf11d2f6212d7d95160733d", "sources": [ "DBLP" ], "title": "Skeletal program enumeration for rigorous compiler testing", "venue": "PLDI", "year": 2017 }, "2f94c1482a43338c0f7105fc0ccc9628ee2c1b2b": { "authors": [ { "ids": [ "1759515" ], "name": "Michael Menth" }, { "ids": [ "38715805" ], "name": "Frederik Hauser" } ], "doi": "10.1145/3030207.3030212", "doiUrl": "https://doi.org/10.1145/3030207.3030212", "entities": [ "Adaptive system", "Complexity", "Control system" ], "id": "2f94c1482a43338c0f7105fc0ccc9628ee2c1b2b", "inCitations": [ "126885cad3a1efa8a9ec4d718a10a41ef2aa3016", "91295da1d9a04ef726727d5e88f346441846cf2f" ], "journalName": "", "journalPages": "103-114", "journalVolume": "", "outCitations": [ "2613e7d1b36e751f66402acd6aaf8d8501b91b19", "d2d57264938173760b2aa618f115e2af4d5c74e1", "025652412d507a8cf98ecacd8a44d32ce28995e1", "882e93570eae184ae737bf0344cb50a2925e353d", "4c82cd09463cbb139e75d6adc21c53714d7f6a16", "96427b170754bd2cbb4fbeab269c60dd82de22f5", "7972269fbf1376495d44ac13485f306aed008810", "d7b05c7f3b680f4d2d14b22c9db6fe9116290d63", "91295da1d9a04ef726727d5e88f346441846cf2f", "88a797a0a3a24acb64bc18dc8313ad8dc2d6352d", "455742bb30f0115bcd06dab4e4b6c03a795a895b" ], "paperAbstract": "Moving averages (MAs) are often used in adaptive systems to monitor the state during operation. Their output is used as input for control purposes. There are multiple methods with different ability, complexity, and parameters. We propose a framework for the definition of MAs and develop performance criteria, e.g., the concept of memory, that allow to parameterize different methods in a comparable way. Moreover, we identify deficiencies of frequently used methods and propose corrections. We extend MAs to moving histograms which facilitate the approximation of time-dependent quantiles. We further extend the framework to rate measurement, discuss various approaches, and propose a novel method which reveals excellent properties. The proposed concepts help to visualize time-dependent data and to simplify design, parametrization, and evaluation of technical control systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030212" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f94c1482a43338c0f7105fc0ccc9628ee2c1b2b", "sources": [ "DBLP" ], "title": "On Moving Averages, Histograms and Time-DependentRates for Online Measurement", "venue": "ICPE", "year": 2017 }, "2f953d8f24e5545fa0e6f6114ce8436ed11066f1": { "authors": [ { "ids": [ "11611372" ], "name": "Panagiotis Mandros" }, { "ids": [ "1717148" ], "name": "Mario Boley" }, { "ids": [ "3183025" ], "name": "Jilles Vreeken" } ], "doi": "10.1145/3097983.3098062", "doiUrl": "https://doi.org/10.1145/3097983.3098062", "entities": [ "Algorithm", "Approximation algorithm", "Database", "Functional dependency", "Information theory", "Sparse matrix" ], "id": "2f953d8f24e5545fa0e6f6114ce8436ed11066f1", "inCitations": [ "cb3caf2f5bcb5be7fb4ff2ca9c49cf26de8f40b9" ], "journalName": "", "journalPages": "355-363", "journalVolume": "", "outCitations": [ "4cbd95b0fd8a6a82e3b3eb523ea50de06c9d10aa", "3beaa98ff1ab2867969aa3c26e74a8b308fcf8c8", "547fc697865c564539bce9959bb4bf691ebd92e0", "1ada62c1cd611c411a26a0e1ebaaa16b08d0e509", "70f3ca595b2d32a90fea3f426467625df6453c76", "2b80b93ddfa9fbaf1d9c7d580664e91626066db5", "05dda0e5a518058dde3b6431d1f22b6d95cbab18", "1a83dd0348994c9077f36b0094cd0841678f0913", "0404bd58e5f1edbd288cd69fcbc224485af415bf", "d1e77cb86439822c8fe5e20cbc9ea231e23af6b1", "09058987e8764af8a3d5d44993eaff3885cadd24", "259da70238f076c670c6cc2901b82b3f20d472df", "b1addae9f2c9e38d2b54b0d69414b9f7f4466836", "1dc53b91327cab503acc0ca5afb9155882b717a5", "439cb4aca522b408e8d63567fc9fdf510ca0548c", "0ce1050bd5fa72e8f4ba9081cf35ca8ccbce7a25", "35c15d7dfa1fddd2c0292146412ebbec704e8be9", "f6197538138b9ff255683e27fba86aeaf258d4c3" ], "paperAbstract": "Given a database and a target attribute of interest, how can we tell whether there exists a functional, or approximately functional dependence of the target on any set of other attributes in the data? How can we reliably, without bias to sample size or dimensionality, measure the strength of such a dependence? And, how can we efficiently discover the optimal or α-approximate top-k dependencies? These are exactly the questions we answer in this paper.\n As we want to be agnostic on the form of the dependence, we adopt an information-theoretic approach, and construct a reliable, bias correcting score that can be efficiently computed. Moreover, we give an effective optimistic estimator of this score, by which for the first time we can mine the approximate functional dependencies from data with guarantees of optimality. Empirical evaluation shows that the derived score achieves a good bias for variance trade-off, can be used within an efficient discovery algorithm, and indeed discovers meaningful dependencies. Most important, it remains reliable in the face of data sparsity.", "pdfUrls": [ "http://arxiv.org/abs/1705.09391", "https://arxiv.org/pdf/1705.09391v2.pdf", "https://arxiv.org/pdf/1705.09391v1.pdf", "http://pubman.mpdl.mpg.de/pubman/item/escidoc:2461383:2/component/escidoc:2461382/arXiv:1705.09391.pdf", "http://doi.acm.org/10.1145/3097983.3098062" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f953d8f24e5545fa0e6f6114ce8436ed11066f1", "sources": [ "DBLP" ], "title": "Discovering Reliable Approximate Functional Dependencies", "venue": "KDD", "year": 2017 }, "2f963c50025d607c5009be311bfe737aac12a7bb": { "authors": [ { "ids": [ "19169712" ], "name": "Jaime Arteaga Molina" }, { "ids": [ "36609334" ], "name": "St\u00e9phane Zuckerman" }, { "ids": [ "1745279" ], "name": "Guang R. Gao" } ], "doi": "10.1109/IPDPS.2017.63", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.63", "entities": [ "Cube", "Data cube", "Data-intensive computing", "Dynamic programming", "Graph500", "IBM WebSphere eXtreme Scale", "Multithreading (computer architecture)", "NWChem", "OpenMP", "Parallel computing", "Programmer", "Thread (computing)" ], "id": "2f963c50025d607c5009be311bfe737aac12a7bb", "inCitations": [ "3d7de57b5dae7e7a5d8a2424c79e46f73c184938" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "799-808", "journalVolume": "", "outCitations": [ "94bc92a2275894b498cfed61fee1d261d4daf708", "1134aaa6a93f502cac9ce551b13c00b10ff34feb", "153fda7d7963c3bbec36e69909973f96a242d1f7", "7ac941fa8c72f8a931ae2a48118a9893d6f1d083", "4099889b566e4c1d0f9f90457f77b414cca5cb3c", "6a668dfe4fa05408a5f752201ad83e02181ed6e2", "59fb33ff0f35170529107f8a1a519cdac8464fd6", "004c68d94d2806be41cf40cff60bffcf9d4aff0b" ], "paperAbstract": "The overwhelming wealth of parallelism exposed by Extreme-scale computing is rekindling the interest for finegrain multithreading, particularly at the intranode level. Indeed, popular parallel programming models, such as OpenMP, are integrating fine-grain tasking in their newest standards. Yet, classical coarse-grain constructs are still largely preferred, as they are considered simpler to express parallelism. In this paper, we present a Multigrain Parallel Programming environment that allows programmers to use these well-known coarse-grain constructs to generate a fine-grain multithreaded application to be run on top of a fine-grain event-driven program execution model. Experimental results with four scientific benchmarks (Graph500, NAS Data Cube, NWChem-SCF, and ExMatEx's CoMD) show that fine-grain applications generated by and run on our environment are competitive and even outperform their OpenMP counterparts, especially for data-intensive workloads with irregular and dynamic parallelism, reaching speedups as high as 2.6x for Graph500 and 50x for NAS Data Cube.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.63" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2f963c50025d607c5009be311bfe737aac12a7bb", "sources": [ "DBLP" ], "title": "Multigrain Parallelism: Bridging Coarse-Grain Parallel Programs and Fine-Grain Event-Driven Multithreading", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2fb5246ca54fa57bb42ded5c49581015d6f43f0e": { "authors": [ { "ids": [ "35451534" ], "name": "George Papadimitriou" }, { "ids": [ "2820066" ], "name": "Manolis Kaliorakis" }, { "ids": [ "2001585" ], "name": "Athanasios Chatzidimitriou" }, { "ids": [ "1718647" ], "name": "Dimitris Gizopoulos" }, { "ids": [ "2315734" ], "name": "Peter Lawthers" }, { "ids": [ "1769754" ], "name": "Shidhartha Das" } ], "doi": "10.1145/3123939.3124537", "doiUrl": "https://doi.org/10.1145/3123939.3124537", "entities": [ "64-bit computing", "ARM architecture", "Benchmark (computing)", "Central processing unit", "ChIP-on-chip", "Correctness (computer science)", "Dynamic voltage scaling", "Multi-core processor", "X86" ], "id": "2fb5246ca54fa57bb42ded5c49581015d6f43f0e", "inCitations": [ "aaeedaf5fc183a6ebcc634959218461eccf84f0a" ], "journalName": "", "journalPages": "503-516", "journalVolume": "", "outCitations": [ "5fbd4b6569af716e32d1fa098d14a1b81a8a5f8c", "3d5f734da528ea05bc63615481c4caadbdbc9f86", "6977f6a87c11d76cf326d8420f4397af1991b4f9", "b4bd596a12c7d2258a2d4437a91642a8ce40e9a5", "27516f9d8649b9e26a3e68e8348c39766f4489f8", "110e17c5f1e1def935a3e684ae16e695eb32d004", "485b9204a1df97fceb4aedbc62e9efbc09a4525a", "2baf1be469ee5fdc07606e6c67eccf758ac1edc1", "c1a88fb8ab3042eb378d02e83b68e6c11c28df94", "b1162cf8d956f02acecb40e1ca457a62619c4d8a", "02514ff22c82354b0ebb065dcb604c016e67a15e", "1144956f60e04e0839dd2fc5b8031fb4a4599072", "4b9b392f456dd54d51f2167182102c2d860c5a1c", "157ffa82bbade4bdbce4b183262911518ecd66e2", "204e4cef7acf5ca52cb7cdd4aab3be6403cd6120", "9e0ef1dee22e75edfb73a0ee1015e13e35130538", "63ca46c9f0291f6b16d4185d451fcde06378ef21", "7232af3c4a3c4641ece9818d1c484260ce33a266", "b108f21502bd2b2cbd7df361af291a4f1afa65e8", "2def54d33c1f3f05f56ef71b4f77a17003f55684", "b8ef6ceef770a9af674c9cf2e78e737872afbd9a", "0b0b8f1c3ffa887bf8080a3d44348c3d98a11272", "4ec82dc8773675c304602c8d8f6ff38ce3ee945b", "e61283985b604a6a907f21326a6d136e72628fe2", "541164c421575009d8639619de2349e64b18b08b", "eae567f67e7fecac4a030735a871f6cf72a94882", "69c19cc1272d1958599e9b8fd1f3a103b0cd21e0", "17abf3d9cb0db9583f7a19ab1b1e4a9c1a32aec5", "564094a52142a1717387befe831b5047dc4beaee", "55e061ec1faa9e4a8f305be1eecbe97f180f1585", "30690e66ed3a2f7989a389d0f0189c49e6483888", "614bdb722ea07158b4a0ee30bf1829b8f1b5ddff", "10dc03bab236aa58778b74520978ec280ecdf731", "6ee0d9a40c60e50fab636cca74c6301853d42367", "f4600c43fd44b79695434464c30dee9c36bc903c", "0ae24644ca8866321ce6c117c5823ad9f149bef9", "363c05c179bcb90e3341e76780196f7f526f3d71", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "35a1ae598c53785ec3957e368040563ee366ecbe", "2ab0b905f40d76e6df6523a7c6c0b2bc480f4304" ], "paperAbstract": "In this paper, we present the first automated system-level analysis of multicore CPUs based on ARMv8 64-bit architecture (8-core, 28nm X-Gene 2 micro-server by AppliedMicro) when pushed to operate in scaled voltage conditions. We report detailed system-level effects including SDCs, corrected/uncorrected errors and application/system crashes. Our study reveals large voltage margins (that can be harnessed for energy savings) and also large Vmin variation among the 8 cores of the CPU chip, among 3 different chips (a nominal rated and two sigma chips), and among different benchmarks.\n Apart from the Vmin analysis we propose a new composite metric (severity) that aggregates the behavior of cores when undervolted and can support system operation and design protection decisions. Our undervolting characterization findings are the first reported analysis for an enterprise class 64-bit ARMv8 platform and we highlight key differences with previous studies on x86 platforms. We utilize the results of the system characterization along with performance counters information to measure the accuracy of prediction models for the behavior of benchmarks running in particular cores. Finally, we discuss how the detailed characterization and the prediction results can be effectively used to support design and system software decisions to harness voltage margins for energy efficiency while preserving operation correctness. Our findings show that, on average, 19.4% energy saving can be achieved without compromising the performance, while with 25% performance reduction, the energy saving raises to 38.8%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124537", "http://cgi.di.uoa.gr/~dgizop/index_files/MICRO-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fb5246ca54fa57bb42ded5c49581015d6f43f0e", "sources": [ "DBLP" ], "title": "Harnessing voltage margins for energy efficiency in multicore CPUs", "venue": "MICRO", "year": 2017 }, "2fb9d624d8d5fac39e32e067c272e1260b8b9e84": { "authors": [ { "ids": [ "2003728" ], "name": "Harald Servat" }, { "ids": [ "24636606" ], "name": "Antonio J. Pe\u00f1a" }, { "ids": [ "2600478" ], "name": "Germ\u00e1n Llort" }, { "ids": [ "1767394" ], "name": "Estanislao Mercadal" }, { "ids": [ "40215479" ], "name": "Hans-Christian Hoppe" }, { "ids": [ "1699563" ], "name": "Jes\u00fas Labarta" } ], "doi": "10.1109/CLUSTER.2017.50", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.50", "entities": [ "Binary file", "Dynamic random-access memory", "Multitier architecture", "Volatility", "Xeon Phi" ], "id": "2fb9d624d8d5fac39e32e067c272e1260b8b9e84", "inCitations": [ "1cf5e11f8230c9badb8e963c070ecca2c1bda709" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "126-136", "journalVolume": "", "outCitations": [ "f6d5ff67efd843caa15002b1a4a66cb704668044", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "5d0fc2dcc3ce19998e4de6c4dd1702df0367c108", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "d1a6a735e72ababac022d0d5a35fcb620742ca98", "4511599bc8cf05af6355a36dad3e1a9b75bb2301", "f3325ace129dec914966f9894d9f412e5e04bdc2", "298c14f1afc65a9c58b8ae5abe16a27ea4f13a71", "c9eac2b2f2224f8aa4deb1a08d8e1228cf3dbd62", "8b8d9dbe3e755cbbab950b6133b1cc11d8e08943", "398aaf00253e2c29e6238dd0499aa3a75c76914c", "6d124439630bd2347ebe25b48879e01ee747f716", "18f1e9248ad1dc3d249f911b3f609a087c3aca39", "403cbd3649669b52f0fac0d23af13f8e292864d5", "0653e2ed9f683868cb4539eb8718551242834f6b", "912358039b7ecf026a04e7e34b2f36c19913b1ef", "04ce0bd4df15e05d376cad98de8b9a83380341d3", "178599e5e976e82528e71cb2e1b812d588fa0e44", "7dc5dcf29c65c576b37ee6359f58df3ede32b90c", "534c2f4f1165a6afcbab125254cf8fcdf4ca10d1", "1c15910d27ee940f71bd1d9a5c25c0230e3025fb", "24ff16b30689cb61df2ac391f5306584769ea7fb", "65128e128751d8f27d0bf765db70e563755f027a" ], "paperAbstract": "Multi-tiered memory systems, such as those based on Intel® Xeon Phi™processors, are equipped with several memory tiers with different characteristics including, among others, capacity, access latency, bandwidth, energy consumption, and volatility. The proper distribution of the application data objects into the available memory layers is key to shorten the time– to–solution, but the way developers and end-users determine the most appropriate memory tier to place the application data objects has not been properly addressed to date.In this paper we present a novel methodology to build an extensible framework to automatically identify and place the application’s most relevant memory objects into the Intel Xeon Phi fast on-package memory. Our proposal works on top of inproduction binaries by first exploring the application behavior and then substituting the dynamic memory allocations. This makes this proposal valuable even for end-users who do not have the possibility of modifying the application source code. We demonstrate the value of a framework based in our methodology for several relevant HPC applications using different allocation strategies to help end-users improve performance with minimal intervention. The results of our evaluation reveal that our proposal is able to identify the key objects to be promoted into fast on-package memory in order to optimize performance, leading to even surpassing hardware-based solutions.", "pdfUrls": [ "http://upcommons.upc.edu/bitstream/handle/2117/109407/Automating+the+Application+Data+Placement+in.pdf;jsessionid=EC18689FCF2BDA1227D746629E3340AA?sequence=4", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fb9d624d8d5fac39e32e067c272e1260b8b9e84", "sources": [ "DBLP" ], "title": "Automating the Application Data Placement in Hybrid Memory Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "2fbac3e9572e08ce9ce63a64e9e0e7a7ffbe7083": { "authors": [ { "ids": [ "2998413" ], "name": "Wim Lavrijsen" }, { "ids": [ "1702354" ], "name": "Costin Iancu" } ], "doi": "10.1109/IPDPS.2017.98", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.98", "entities": [ "Algorithms for Recovery and Isolation Exploiting Semantics", "Blocking (computing)", "Concurrency (computer science)", "Direct memory access", "Heuristic", "High- and low-level", "InfiniBand", "Non-blocking algorithm", "Remote direct memory access", "Revolution in Military Affairs", "SPMD", "Scheduling (computing)", "Software portability", "Speedup", "Synthetic data", "Throughput" ], "id": "2fbac3e9572e08ce9ce63a64e9e0e7a7ffbe7083", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "988-997", "journalVolume": "", "outCitations": [ "150c6c618a3ea2b3180f53cf0a63e172add8a872", "201b01c3d3c87dec8e09fc44536d1d30adcffbf0", "0d3e6362886b326901c5d740767d9aa7172bdb55", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "4fafd03a57348374f894823b0c7cfe6c85e5bd93", "9860074998cf01059d46bec2063f059276e749d8", "5d8e5e3eef73dbd4cc3b01cb323dcc6369532f66", "41ab20fc1a3815dc8aa4e23b0316e26f7acb5f8e", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "f8e9b050c93af6dea582563f61b6460b590bc3af", "09cc6a87c7ae189ae6dbc2fd246c1b06726ab3c5", "057a8310124ef6565fbd13ae1ec1412b96dedae8", "25f017efd2905c6d0c6a92f2dfe19113ee42938e", "30c8c8f389ee00b6321814d35412698d0a28307b", "f4c217923ceebd709e8eb106b1f7d25fd5d088c2", "4110d5ad162fbf43a3418f28b4d46609c2a147be" ], "paperAbstract": "We present methods for the effective application level reordering of non-blocking RDMA operations. We supplement out-of-order hardware delivery mechanisms with heuristics to account for the CPU side overhead of communication and for differences in network latency: a runtime scheduler takes into account message sizes, destination and concurrency and reorders operations to improve overall communication throughput. Results are validated on InfiniBand and Cray Aries networks, for SPMD and hybrid (SPMD+OpenMP) programming models. We show up to 5! potential speedup, with 30-50% more typical, for synthetic message patterns in microbenchmarks. We also obtain up to 33% improvement in the communication stages in application settings. While the design space is complex, the resulting scheduler is simple, both internally and at the application level interfaces. It also provides performance portability across networks and programming models. We believe these techniques can be easily retrofitted within any application or runtime framework that uses one-sided communication, e.g. using GASNet, MPI 3.0 RMA or low level APIs such as IBVerbs.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.98" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fbac3e9572e08ce9ce63a64e9e0e7a7ffbe7083", "sources": [ "DBLP" ], "title": "Application Level Reordering of Remote Direct Memory Access Operations", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "2fbb2ed3e1be11e1407d8cffd612fbe0172ceaab": { "authors": [ { "ids": [ "8803278" ], "name": "Matt M. T. Yiu" }, { "ids": [ "31929161" ], "name": "Helen H. W. Chan" }, { "ids": [ "33431705" ], "name": "Patrick P. C. Lee" } ], "doi": "10.1145/3078468.3078470", "doiUrl": "https://doi.org/10.1145/3078468.3078470", "entities": [ "Attribute\u2013value pair", "Data redundancy", "Degraded mode", "Erasure code", "Experiment", "Failure rate", "High availability", "In-memory database", "Kinetic Void", "Normal mode", "Testbed", "Throughput", "USB flash drive", "YCSB" ], "id": "2fbb2ed3e1be11e1407d8cffd612fbe0172ceaab", "inCitations": [ "bacdc6c3ead0d41f0537941d30bb000f5b569c51", "1d434ae65664b49d57418d546e0839aed43fe282" ], "journalName": "", "journalPages": "14:1-14:12", "journalVolume": "", "outCitations": [ "3168681722207c86827e596860115a2977ce761f", "18642fd39dd265cbc149b937f2f5ca2e925e3484", "165d99c9d30be5d301b998dc23c1a6a28fd0c425", "4af63ed343df388b6353b6fc77c7137d27822bf4", "717773a6fdf93a9f0627b73e2d18a2049b36545b", "6d1ca1108d9d96e5607571502552ad04464d7f15", "3b547d706d33c110f96bf1c0e805ab8cc82afdbf", "77f651d37c1d1fa7c69c8966680aec180e8f48dc", "7ae26da9b7666812857883536870c315538f7f10", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "09e9a5a9a0ad54816b6f8608b29e5818d14d7697", "18d114d70b28203dcb847edcc4c527803f564f70", "361c6ee2571a20be19345e1dfdcb5ba4e9f1c196", "1594118f2696b573f08510cf837f3b37db87face", "20a44558eed182a971f7add68ecc5931fbca2a65", "eafe6fa26fa484dd8b1a17e87a35de79ff7cdd29", "529cab4102290a3fbce3541b668d52812e6ead0f", "a2f9c297485360bc46e3cd9cfd7561176290a7f3", "4cb7f6fd48468da2f985a44f021fa5b49eb7a6ce", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "0579cb7ceecac67eefb63bef0436fbf5e552cf72", "02ac23384523c2e2f9bc52cd29313dfd5aad22a3", "2faf1a539c71f8e1deccbeae90663fffe9dae750", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0e69ee351252fd09a50e6baae53b4776009825ae", "03621901aaa2d77270478326b3a25350508a2b93", "58b628792d3eb22a034a871ed3cf373afe591928", "51098280164dcc12b1ef69632430a8a362b70452", "4b49d374c9306b929743e7d213c28cd47fc2d4fc", "627b93073977b7b7c5ae0cf610f41ee0ed27669c", "0742cebd319c73d45a72d5d0303e33472a16a64b" ], "paperAbstract": "We present MemEC, an erasure-coding-based in-memory key-value (KV) store that achieves high availability and fast recovery while keeping low data redundancy across storage servers. MemEC is specifically designed for workloads dominated by small objects. By encoding objects in entirety, MemEC is shown to incur 60% less storage redundancy for small objects than existing replication- and erasure-coding-based approaches. It also supports graceful transitions between decentralized requests in normal mode (i.e., no failures) and coordinated requests in degraded mode (i.e., with failures). We evaluate our MemEC prototype via testbed experiments under read-heavy and update-heavy YCSB workloads. We show that MemEC achieves high throughput and low latency in both normal and degraded modes, and supports fast transitions between the two modes.", "pdfUrls": [ "http://adslab.cse.cuhk.edu.hk/pubs/systor17.pdf", "https://arxiv.org/pdf/1701.08084v2.pdf", "https://arxiv.org/pdf/1701.08084v1.pdf", "http://arxiv.org/abs/1701.08084", "http://doi.acm.org/10.1145/3078468.3078470" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fbb2ed3e1be11e1407d8cffd612fbe0172ceaab", "sources": [ "DBLP" ], "title": "Erasure coding for small objects in in-memory KV storage", "venue": "SYSTOR", "year": 2017 }, "2fcfd74636e564467766fd4bf344efa1f277fcfa": { "authors": [ { "ids": [ "1694978" ], "name": "Cheng Wang" }, { "ids": [ "11324564" ], "name": "Jianyu Jiang" }, { "ids": [ "3302534" ], "name": "Xusheng Chen" }, { "ids": [ "16235588" ], "name": "Ning Yi" }, { "ids": [ "2944075" ], "name": "Heming Cui" } ], "doi": "10.1145/3127479.3128609", "doiUrl": "https://doi.org/10.1145/3127479.3128609", "entities": [ "AMD Accelerated Processing Unit", "Failure rate", "Inbound marketing", "Internet protocol suite", "MySQL", "Paxos (computer science)", "Redis", "Remote direct memory access", "Response time (technology)", "Scalability", "Server (computing)", "Shingled magnetic recording", "State machine replication", "Throughput" ], "id": "2fcfd74636e564467766fd4bf344efa1f277fcfa", "inCitations": [ "340d6db56d94623ac090599cf9ea5287370607ef", "1b1dda022e899b2d922adf330c96a8c9f7ad2abe" ], "journalName": "", "journalPages": "94-107", "journalVolume": "", "outCitations": [ "1220e4a011c46804d4369b5580dc7fb6e387af54", "4af63ed343df388b6353b6fc77c7137d27822bf4", "b77bb6ff9a7018fa4f24893a38c27ac6efbfd4e1", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "b129f84262024128ee64300ab257744b0b5ed8fb", "af8b04305b92127b468a610b591b07f7897b2446", "663e064469ad91e6bda345d216504b4c868f537b", "065f9a08dfbe89ed602b1d4f062d77dd8964858f", "066dda2494a69c0cd50e6a2b758cfd45facad84f", "4e0e26b2c421a3faf75bee48e9124b59914488a3", "2649ad4f86a067ec4fdfd3f52efedabc0c759a23", "0c8f7a7819c410f7c74b771e9db7e66892fe02d5", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "4f5ef5dfe854a9c9f34af44d306534c9a1606b15", "29a1148d75878671dc3663bf480e33d7bd91597d", "20f5f8733134d87041b95b742d613051a1fb3fdb", "e9b416fa09588ab59506670927af2d5d4a02ff42", "114801eccb5eb0831fd1848f351a138253a42f15", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "bed6d0e530f20332c284a463c754ce1d304aca38", "3b62c1f19254820c75dd0011f038d7aae04b3414", "082bc77513862f8d709322916f44d6fe2f2d06d7", "3279255afe7610dfe377bcd082e7bb15a6dec441", "573e9cb890d39c790b58bfa805526d40e8b472ec", "152f85e4bd2853a458d1350bb64d4a6adca24832", "8d19a833b182562c7416159e14f5ace38872b87e", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "205cf007cf77bbf81e55b74635017087585f7b7c", "036ebe81fc7bd9000c3edda83fa30bee03fedc1a", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "1e9714508f9cc07cbf33ebce6f8d7bdfc395e9be", "48dd6190a3741cfae823a07e08b0c9e9c68b1397", "3cc2336cb701ab40273d0b5603064a70a209b4c6", "225603198cc415d363db8a8a2bd30b0df3c963b1", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "38ea37420f44e747af3e3e9c6a6349db6f88b42c", "6c08b4037b0390f66e0255cce14636f0327588f0", "6bfa310218e10ebd88ae290a5254f4ab19355967", "42492ee02ac884006672f26661c05e0dd601ce59", "2035c8f33909ac206c4d1a3bdee611577fb2c5d1", "0c5ba461681846fdf12dcc26c66f55bf9d1ed54c", "88d02195a5d5e8b178d2baf524cca48164f85930", "59250c7388caba98bd4adc2f1969fbec5500ed6a", "959cfe05045e1c7e80406209244d3346061ca4e6", "50415403b35868328a07e8a7a537c96eb19baa36", "00c181b8b64e824fbe0172339f1e4560b557fab5", "155ca30ef360d66af571eee47c7f60f300e154db", "75c9e96dd2c12cece82c27d441b932620d451647", "daf0cd0076b388712ea12ec4105572997fc50cdf", "0e6f25ca2e9dbcca8a630ac5924470aafa3fbcac", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "514a5c15e8cf3f681febecad954a4508d9189c99", "27f071ccbea5a4940dcc585ba4cfa9258bf2bcdf", "a1c704b281e939d343219edffbc84b379ab8a571", "2c7db186b95476c5e0dc63703efdd2460d9e0120", "c346300f1b64093acf535a8b1a55da0fa1f6a785", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe", "7db303e5171f09dfd25065ba8a70a4d74e9ae345" ], "paperAbstract": "State machine replication (SMR) uses Paxos to enforce the same inputs for a program (e.g., Redis) replicated on a number of hosts, tolerating various types of failures. Unfortunately, traditional Paxos protocols incur prohibitive performance overhead on server programs due to their high consensus latency on TCP/IP. Worse, the consensus latency of extant Paxos protocols increases drastically when more concurrent client connections or hosts are added. This paper presents APUS, the first RDMA-based Paxos protocol that aims to be fast and scalable to client connections and hosts. APUS intercepts inbound socket calls of an unmodified server program, assigns a total order for all input requests, and uses fast RDMA primitives to replicate these requests concurrently.\n We evaluated APUS on nine widely-used server programs (e.g., Redis and MySQL). APUS incurred a mean overhead of 4.3% in response time and 4.2% in throughput. We integrated APUS with an SMR system Calvin. Our Calvin-APUS integration was 8.2X faster than the extant Calvin-ZooKeeper integration. The consensus latency of APUS outperformed an RDMA-based consensus protocol by 4.9X. APUS source code and raw results are released on github.com/hku-systems/apus.", "pdfUrls": [ "http://www.cs.hku.hk/research/techreps/document/TR-2017-03.pdf", "http://doi.acm.org/10.1145/3127479.3128609" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fcfd74636e564467766fd4bf344efa1f277fcfa", "sources": [ "DBLP" ], "title": "APUS: fast and scalable paxos on RDMA", "venue": "SoCC", "year": 2017 }, "2fd790e7ad1deb72bf3497a3f16c1759d185f71f": { "authors": [ { "ids": [ "3184110" ], "name": "John P. Rula" }, { "ids": [ "2267500" ], "name": "Fabi\u00e1n E. Bustamante" }, { "ids": [ "40042941" ], "name": "Moritz Steiner" } ], "doi": "10.1145/3131365.3131402", "doiUrl": "https://doi.org/10.1145/3131365.3131402", "entities": [ "Application programming interface", "Cellular organizational structure", "Change detection and notification", "Classless Inter-Domain Routing", "JavaScript", "Landline" ], "id": "2fd790e7ad1deb72bf3497a3f16c1759d185f71f", "inCitations": [], "journalName": "", "journalPages": "191-204", "journalVolume": "", "outCitations": [ "58c65c7cf4929052dcc3e00daadb293a24daecab", "1f56e729fd9e9a8c36ba3a53f8b58b89ba426a14", "4f2c94a8d689863859ac849ebb83823770cf3d6a", "2f74156349d454fd054f18f23c305613df19d28b", "0892739439c0b4a92f78da0577e7b012996ae00e", "23955ab6c6fa1c382b173815cde04886e6cac988", "454040aab20371dba932aaa2c78858c0b9b4006e", "03a264cb2394da393689ec120209cd7583510ac0", "546c0cfed69f188a0ca661c8db9b099f554a63d1", "00aaa84be127c04a21b5f5f8dc5d2426921654e2", "34eda9d54ec01fed99068c2b66aa198008b99105", "3f62fe7de3bf15af1e5871dd8f623db29d8f0c35", "036277d492dd5777e87e5b33ffd809e5c617a37a", "841bf3cd10f63737d95979cb5648625cb308c394", "9a2c1a8673b84e4276552bbfb3662c0c7df1168c", "4d6fb20251d8f4ae199720f37282c736073af527", "9b5933c0c539c9c48afe423da915cba71be270c3", "32f84721be8c2e6db2ddcde053a7dca50a1d22aa", "cdbb5b5bf2e7c3b320e1b4a6b12d023a0c180570", "59298e526c73fd2b3fd5e57d4102c9a6412f37c8", "098cc8b16697307a241658d69c213954ede76d59", "0507b04c131f2244524fda97cd1707af5760216e" ], "paperAbstract": "The impressive growth of the mobile Internet has motivated several industry reports retelling the story in terms of number of devices or subscriptions sold per regions, or the increase in mobile traffic, both WiFi and cellular. Yet, despite the abundance of such reports, we still lack an understanding of the impact of cellular networks around the world.\n We present the first comprehensive analysis of global cellular networks. We describe an approach to accurately identify cellular network IP addresses using the Network Information API, a non-standard Javascript API in several mobile browsers, and show its effectiveness in a range cellular network configurations. We combine this approach with the vantage point of one of the world's largest CDNs, with servers located in 1,450 networks and clients distributed across across 245 countries, to characterize cellular access around the globe.\n We find that the majority of cellular networks exist as mixed networks (i.e., networks that share both fixed-line and cellular devices), requiring prefix - not ASN - level identification. We discover over 350 thousand /24 and 23 thousand /48 cellular IPv4 and IPv6 prefixes respectively. By utilizing addresses level traffic from the same CDN, we calculate the fraction of traffic coming from cellular addresses. Overall we find that cellular traffic comprises 16.2% of the CDN's global traffic, and that cellular traffic ranges widely in importance between countries, from capturing nearly 96% of all traffic in Ghana to just 12.1% in France.", "pdfUrls": [ "http://aqualab.cs.northwestern.edu/component/attachments/download/772", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final233.pdf", "http://doi.acm.org/10.1145/3131365.3131402" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2fd790e7ad1deb72bf3497a3f16c1759d185f71f", "sources": [ "DBLP" ], "title": "Cell spotting: studying the role of cellular networks in the internet", "venue": "IMC", "year": 2017 }, "2fea3924ed086fd94ec56543e0136e0a18099de8": { "authors": [ { "ids": [ "1772752" ], "name": "Mohsen Ghaffari" } ], "doi": "10.1145/3087801.3087830", "doiUrl": "https://doi.org/10.1145/3087801.3087830", "entities": [ "Algorithm", "Clique (graph theory)", "Distributed algorithm", "Maximal independent set", "Randomized algorithm", "With high probability" ], "id": "2fea3924ed086fd94ec56543e0136e0a18099de8", "inCitations": [ "b0f1df2cdaad3f271169ddf44a51d3e93eb22ce6", "297dbe1090536a25b80a2b6d9f113dba90c1da5a" ], "journalName": "", "journalPages": "141-149", "journalVolume": "", "outCitations": [ "ae51cb991ea1820115cc2e42dbe1b2468bcaff05", "6faa7a1dc1bb32220e0153f109e397ff567a43cb", "0b291c329178573c7c78a653ed3282a5350cae58", "04efeaef84bb9fd0919798efd0d4e88b1542508f", "26093f353ac3956ecc1de56fe9fd9cdfc448b01f", "07bfe5a6f578548e01b3f5ae11878c5e7ea1c2e1", "f562d5471143e525b83e24859e0422fa21215ba3", "9b8426ca1bf4d362f1d446a16da6ce40f2e26a55", "222a8b02a0f81b485c26ba71138e3ed726877d3a", "622adc72ff5a41dfa9887096beaa957710a41a65", "25186afb27fd7d50b2f6b0e03487b6020e1e439e", "9871fd28db4458acb0e82e9807c40a06c366f468", "db32b0fde36679286b2406dad2926ab81a6b4020", "b2e31dac80a70a1ecddb47b09556003e3ece6928", "9692877950141f4bf289701e776015c717850764", "5674293866d738cdb7fea2f14a8935fb04725f33", "4018a9578fd25afbdc3b1a1e1b2ebfe99528043e", "3000cee8f203c944ac7a4d3707474c80895d78d7", "a2cf1792a861746fa99f05125440709035ad6dc2", "ca8450d56a5fc7d407ffb001b0ca9f5ad296be08", "b29b445de9b5f07640084bffa971d649c8dde7be", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "117e8e6cdddbc7c99da4dd37e513ad0051c1097a", "245dce914e257f12391b0170025374323693acc7", "29ca229d9fbb0870690244687b56913efbd42cb6", "44bd1e89cfc90fb10b673340d1385816ec998b7e", "ec2544af2429518eefffdd094cbebd5e994661c3", "cb769c987476d4b3552cdfd90220657af01e0839", "d00db20e0a1a92d4bb566d20e3341060b9f4158a", "76af46d5c85bff984562fb22db33112c88479c9f", "037fa2c0495830857ed0e93ca05af754723c4165", "145c3ca2ea0faebcdc42de8fa24dc57ecdca341d", "48e4dd20c5c8fa110c31a4c3409c92d91f2f3e5f", "3f084898e62b5824cf70100b91a63f1c2450a467", "1f912ac1e1f8a9bfda1cf7664648a74b6559a407", "073a1729adfd962364743ab160bb5f211b6c63ad", "f6993f4a347a3a46b190049a9a2f392f558ca926" ], "paperAbstract": "Computing a Maximal Independent Set (MIS) is a central problem in distributed graph algorithms. This paper presents an improved randomized distributed algorithm for computing an MIS in an allto-all communication distributed model, known as the congested clique model, defined as follows: Given a graphG = (V ,E), initially each node knows only its neighbors. Communication happens in synchronous rounds over a complete graph, and per round each node can send O (logn) bits to each other node. We present a randomized algorithm that computes an MIS in \u00d5 (log\u2206/ \u221a logn + 1) \u2264 \u00d5 ( \u221a log\u2206) rounds of congested clique, with high probability. Here \u2206 denotes the maximum degree in the graph. This improves quadratically on theO (log\u2206) algorithm of [Ghaffari, SODA\u201916]. The core technical novelty in this result is a certain local sparsification technique for MIS, which we believe to be of independent interest.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087830", "http://groups.csail.mit.edu/tds/papers/Ghaffari/podc124.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/2fea/3924ed086fd94ec56543e0136e0a18099de8.pdf", "s2Url": "https://semanticscholar.org/paper/2fea3924ed086fd94ec56543e0136e0a18099de8", "sources": [ "DBLP" ], "title": "Distributed MIS via All-to-All Communication", "venue": "PODC", "year": 2017 }, "2ff7fdd38c05a7a763b7426a32cd036a312b2e43": { "authors": [ { "ids": [ "1751579" ], "name": "Tony Nowatzki" }, { "ids": [ "1979955" ], "name": "Vinay Gangadhar" }, { "ids": [ "2774880" ], "name": "Newsha Ardalani" }, { "ids": [ "1720300" ], "name": "Karthikeyan Sankaralingam" } ], "doi": "10.1145/3079856.3080255", "doiUrl": "https://doi.org/10.1145/3079856.3080255", "entities": [ "Algorithm", "Big data", "Computer vision", "Concurrency (computer science)", "Dataflow", "Dataflow programming", "Domain-specific language", "Fixed-function", "General-purpose computing on graphics processing units", "Low-power broadcasting", "Machine learning", "Microarchitecture", "SIMD" ], "id": "2ff7fdd38c05a7a763b7426a32cd036a312b2e43", "inCitations": [ "65c302fc5eedfb33824ef18879eb53cc0327ea41", "4f4486bc7085c92d8b2797e92194ec7aa1d32292", "4d33967a95e9678d6c29bfe016ce12072ee2e8e9", "6ff08854494ec866510cbb23fb0e18c1f977007e" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "416-429", "journalVolume": "", "outCitations": [ "6c15928f2a1b8525d2aa4e078cfa62847ba422bf", "5646a51461b64be3e9511dff06ea5abef0a399d4", "60cfe41fd68644fb19cba99babae694a2acacc17", "8627aaa6a728d5c39e28c9b2e52f3c0ffe075630", "f632d67c13a113fd468d910078b4be180f92127f", "3f210a463e6d8054929ee98267b7800e49275e06", "8f6ecd0731a0d45de7d8d31ce0fe3c454d5f6ec1", "7ce25a0852e2345be1a1bd02b8eb4cefb9d47073", "ae3a86639fff689bd8ae5554cc356edb0909261c", "02c94f9cc8e60f1d51ebca1524adfcb15ee34a0e", "b27f70e9076b23b56d12960866bbe96b12dd4b62", "07f3b8cfd59624acf80e16794bd3f2bc69acd8e7", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "1082c6c938be68225d928f9a327b96d1dd65d7d1", "d589123c9665f52c1c06a0b3c80aa94c423a8908", "6750c76ab238cc9df10d3146a8c3dd2891b13976", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "27c204d3de3e9289bdf9d67d8e646e6527b18b1a", "c03e193c400a0a9965b07a2b1ca100ca5849eb7a", "55bc52bbec8972d62874bcbe169dac573b57d1df", "a4f5a4296f29e1457b84b68c08d98e3f338145d6", "3cf9e144449d5d62d687e2a28aa81f47e6e13ffc", "02a17a291cbacc0666162845559ea0873f10122d", "269c24a4aad9be622b609a0860f5df80688c2f93", "3364bc50921a9566d61ef8cb73baa82341725e4b", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "320a6faa396f27f6f83b22ded48944ffd574fa1e", "7564661f026abd1d472707c15357494fd79e63c0", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "3f63a2362b1fabc83194d10d6b5a0b2a56c1799b" ], "paperAbstract": "Demand for low-power data processing hardware continues to rise inexorably. Existing programmable and \"general purpose\" solutions (eg. SIMD, GPGPUs) are insufficient, as evidenced by the order-of-magnitude improvements and industry adoption of application and domain-specific accelerators in important areas like machine learning, computer vision and big data. The stark tradeoffs between efficiency and generality at these two extremes poses a difficult question: how could domain-specific hardware efficiency be achieved without domain-specific hardware solutions?\n In this work, we rely on the insight that \"acceleratable\" algorithms have broad common properties: high computational intensity with long phases, simple control patterns and dependences, and simple streaming memory access and reuse patterns. We define a general architecture (a hardware-software interface) which can more efficiently expresses program with these properties called stream-dataflow. The dataflow component of this architecture enables high concurrency, and the stream component enables communication and coordination at very-low power and area overhead. This paper explores the hardware and software implications, describes its detailed microarchitecture, and evaluates an implementation. Compared to a state-of-the-art domain specific accelerator (DianNao), and fixed-function accelerators for MachSuite, Softbrain can match their performance with only 2x power overhead on average.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080255", "http://web.cs.ucla.edu/~tjn//papers/isca2017-stream-dataflow.pdf", "http://research.cs.wisc.edu/vertical/talks/2017/isca17-stream-dataflow-talk" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ff7fdd38c05a7a763b7426a32cd036a312b2e43", "sources": [ "DBLP" ], "title": "Stream-dataflow acceleration", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "2ffea63eb3be3bfbe26cd538911aa3de60d7abf3": { "authors": [ { "ids": [ "2983976" ], "name": "Brandon Wang" }, { "ids": [ "10685934" ], "name": "Xiaoye Li" }, { "ids": [ "2080159" ], "name": "Leandro Pfleger de Aguiar" }, { "ids": [ "1735876" ], "name": "Daniel Sadoc Menasch\u00e9" }, { "ids": [ "34616778" ], "name": "Zubair Shafiq" } ], "doi": "10.1145/3084455", "doiUrl": "https://doi.org/10.1145/3084455", "entities": [ "Autoregressive integrated moving average", "Control system", "Critical system", "Denial-of-service attack", "Hacktivism", "Mission critical", "Patch (computing)", "SHODAN", "Scheduling (computing)", "Time series", "Vulnerability (computing)" ], "id": "2ffea63eb3be3bfbe26cd538911aa3de60d7abf3", "inCitations": [], "journalName": "POMACS", "journalPages": "18:1-18:23", "journalVolume": "1", "outCitations": [ "84b0923f2426df9593c98e9e3b2934be6756b015", "0122744f9c8e704c8605cbb133fa0ba6cdcc0131" ], "paperAbstract": "Industrial Control Systems (ICS) are widely deployed in mission critical infrastructures such as manufacturing, energy, and transportation. The mission critical nature of ICS devices poses important security challenges for ICS vendors and asset owners. In particular, the patching of ICS devices is usually deferred to scheduled production outages so as to prevent potential operational disruption of critical systems. Unfortunately, anecdotal evidence suggests that ICS devices are riddled with security vulnerabilities that are not patched in a timely manner, which leaves them vulnerable to exploitation by hackers, nation states, and hacktivist organizations.\n In this paper, we present the results from our longitudinal measurement and characterization study of ICS patching behavior. Our study is based on IP scan data collected from Shodan over the duration of three years for more than 500 known industrial ICS protocols and products. Our longitudinal measurements reveal the impact of vulnerability disclosures on ICS patching. Our analysis of more than 100 thousand Internet-exposed ICS devices reveals that about 50% upgrade to newer patched versions within 60 days of a vulnerability disclosure. Based on our measurement and analysis, we further propose a variation of the Bass model to forecast the patching behavior of ICS devices. The evaluation shows that our proposed models have comparable prediction accuracy when contrasted against traditional ARIMA timeseries forecasting models, while requiring less parameters and being amenable to direct physical interpretation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078524", "http://doi.acm.org/10.1145/3084455", "http://homepage.divms.uiowa.edu/~mshafiq/files/wang-sigmetrics2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/2ffea63eb3be3bfbe26cd538911aa3de60d7abf3", "sources": [ "DBLP" ], "title": "Characterizing and Modeling Patching Practices of Industrial Control Systems", "venue": "SIGMETRICS", "year": 2017 }, "300a8539d2655ebaba8cd96dbd5c2586665e2834": { "authors": [ { "ids": [ "2292514" ], "name": "Tiago Cogumbreiro" }, { "ids": [ "40330958" ], "name": "Rishi Surendran" }, { "ids": [ "2020358" ], "name": "Francisco Martins" }, { "ids": [ "1728219" ], "name": "Vivek Sarkar" }, { "ids": [ "1706939" ], "name": "Vasco Thudichum Vasconcelos" }, { "ids": [ "36328809" ], "name": "Max Grossman" } ], "doi": "10.1145/3143359", "doiUrl": "https://doi.org/10.1145/3143359", "entities": [ "Algorithm", "C++", "Cycle detection", "Deadlock", "Direction finding", "Futures and promises", "Immutable object", "Imperative programming", "Java", "Parallel computing", "Proof assistant", "Race condition", "Shared Variables", "Shared memory", "Software bug", "Time complexity", "Uncontrolled format string" ], "id": "300a8539d2655ebaba8cd96dbd5c2586665e2834", "inCitations": [], "journalName": "PACMPL", "journalPages": "103:1-103:26", "journalVolume": "1", "outCitations": [ "31181e73befea410e25de462eccd0e74ba8fea0b", "d6e4353cb62d54204db63244670cb784b7ef8e8e", "049c2d51ab60dd99ce94086909a586134f06d625", "38318df4b82b1f32cec6695ad936b9d3aec66fc2", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "7b3533216d5064660458d3754a18fc69f8fbeba0", "5f0b264f377c03fe1714456fa5e43f864088ca75", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "f68911078030c56f34e68041908c26808b63f57b", "0d9c39200e541ce7c5a2f3cfa54302c2c9bc631a", "738ab88467f755b2c1360832e6f63da40bad389f", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "be6449c9ac329ea21e4022f824a86456eb7f1c32", "197fb71fc7b78b9190c75ef017be612615522796", "2042b469be68653afcb2b7b38490c16369b4501a", "657a2241eda4e13e1847ab2e3b2f0b18896c9c40", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "e706b8ae2952740cb95c0182c4c44b0d11cc54c1", "16c3fd5b439e9c35fc1eafc52524396a6ac5e652", "3f3d2e6f372377606d499c7c96fec7aa2443511c", "4460b958e3490e113d174fa7bf902fd1c25a7ff2", "6efe82abd6002d1681a4711b520437fd7a41541f", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "4f7e3eaf599ede34bc4f7919d10662b09afbc315", "521172ac92de0880243aadced4c4e8659748df31", "1e90fd5d10f69f8a4b77f519a889bec56c57628b", "29d60d3943197febda3ec9fa264454dbdb66f5f7", "2466407dddb5b1a15f1885721390ac24953fa39a", "17abd45e0a36cd05d70584c38142be06a77971f4", "3d51d410bf1cf521fe9270cbc413d25f1c51c44c", "00b0b512c6ad05d62660221ed34a9befa483fbc6", "21b6911a45dea1628dfa9a43333fae740e0ad00d", "16a04050353b741974c7d0448e8b0149831bfdc0", "3f18cada83f9ac74ebbeeb62b5c4616975ae5848", "8368d2fc947cf6ac46a1d251d1895f2f87c7d498" ], "paperAbstract": "Futures are an elegant approach to expressing parallelism in functional programs. However, combining futures with imperative programming (as in C++ or in Java) can lead to pernicious bugs in the form of data races and deadlocks, as a consequence of uncontrolled data flow through mutable shared memory. \n In this paper we introduce the Known Joins (KJ) property for parallel programs with futures, and relate it to the Deadlock Freedom (DF) and the Data-Race Freedom (DRF) properties. Our paper offers two key theoretical results: 1) DRF implies KJ, and 2) KJ implies DF. These results show that data-race freedom is sufficient to guarantee deadlock freedom in programs with futures that only manipulate unsynchronized shared variables. To the best of our knowledge, these are the first theoretical results to establish sufficient conditions for deadlock freedom in imperative parallel programs with futures, and to characterize the subset of data races that can trigger deadlocks (those that violate the KJ property). \n From result 2), we developed a tool that avoids deadlocks in linear time and space when KJ holds, i.e., when there are no data races among references to futures. When KJ fails, the tool reports the data race and optionally falls back to a standard deadlock avoidance algorithm by cycle detection. Our tool verified a dataset of ∼2,300 student’s homework solutions and found one deadlocked program. The performance results obtained from our tool are very encouraging: a maximum slowdown of 1.06× on a 16-core machine, always outperforming deadlock avoidance via cycle-detection. Proofs of the two main results were formalized using the Coq proof assistant.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143359" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/300a8539d2655ebaba8cd96dbd5c2586665e2834", "sources": [ "DBLP" ], "title": "Deadlock avoidance in parallel programs with futures: why parallel tasks should not wait for strangers", "venue": "PACMPL", "year": 2017 }, "301d189e85def6eaddbc7152416df1511b55e82b": { "authors": [ { "ids": [ "2397000" ], "name": "Neeraja J. Yadwadkar" }, { "ids": [ "1790580" ], "name": "Bharath Hariharan" }, { "ids": [ "30503077" ], "name": "Joseph Gonzalez" }, { "ids": [ "40563791" ], "name": "Burton Smith" }, { "ids": [ "38793222" ], "name": "Randy H. Katz" } ], "doi": "10.1145/3127479.3131614", "doiUrl": "https://doi.org/10.1145/3127479.3131614", "entities": [ "Amazon Web Services", "Cloud computing", "Collaborative filtering", "Interpolation", "Linear interpolation", "Microsoft Azure", "Performance prediction" ], "id": "301d189e85def6eaddbc7152416df1511b55e82b", "inCitations": [ "d308092a5da30ef6687b6a26287f1e54ba4c5e10", "537efae13f33ad932034b8ad1db72a83d3691473", "53cc6bf305539b4bd8829df42996e0eb12512434", "5655f16d3c46537f951b5686c905f15c2f35991c", "71cf6dd78c50f1e7b647e35e4783f0aea79ce76c", "8e5cc516f0e6b62af0ef0fdb1069a93113539beb", "7818619eb25c7c1bb470a5b5572fa0371de721bc", "83aaf61e91053745e667427d2132527b8a05ef8a" ], "journalName": "", "journalPages": "452-465", "journalVolume": "", "outCitations": [ "5075192e0e25af961420412fed1f848282ae313e", "6af399d9168b6710fa486fed3c5d778e425b1e60", "144a6336a53b586713f28c36e0f2fc3703b127a5", "7afa08d7c1c6c8758ee1227437c69463d5441d09", "3000e77ed7282d9fb27216f3e862a3769119d89e", "f24b702c16849ba88da1df30cc3f1e126a487ede", "30f04cd7dfd1623e62327e207fd92af738116fdd", "98fe37b292f6ec70181015f3ca384cc52cad02a5", "471932379ea02f9f29172dac5c991181a749287d", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "768b3ea7980bac9daf8fbd370026b004616eb770", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "a6fae22e8e40e350f005c4133256fe68cbcf5356", "8242f42f077b59ff239e8cab19b99d94c190c608", "191c14ec67c561c6a3e3ce21c0a7e59e3afe490b", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "061316b7516e20a4d66e7d95b3543eded514ef5d", "85dfe3c3053506f7602c410cfa97cc1595cd6143", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2fee9034f208596eefe51cc66acb98a99f6500dd", "2988e34168fa91398fa397baf823af2063893e9c", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "6632e05bf8efe9498f622c7af82b4ac0ac1db23d", "2992b8985e094c3943e29dffc550862791fae147", "0d868efa67bf06b1f784d60769c082fd9a58893e", "f060942169f56e0aa8f3253047fac49b7c8eff2d", "40158c6ea4474106b8ae010659b6505057aa252e", "550960fc696179dde6bc387ef5209c54fc327d31", "0784356b46a1345b352ab634bda835c07ff04af2", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "63b945168a7ca6e369b398d8f33056e014eed392", "06545f48a6b25a3cafd76e514b2310254972888b", "17180b01e75876577f881f664e8d8334e279205f", "5233d7195acccd2681f20b9f60e9f12ec1cbba70", "490d862480cf30949dce90e832aa292c498ac768", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "79b689f201c475e5016ffd4eacd4eb43165e1ab0", "9aa0d7253574e50fe3a190ccd924433f048997dd", "8e0d370d868655bd6ec920c0bf6da5cf05b3257a", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "9c96514250c4a35deba5ae3ffb93e9731fe23a79", "277f20ddc0e9fa593753ef2778110508372c597f", "2997435fe9f0e646e6a37d9783b520b9cdbdd38b", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "0c65d05478483a294701d38c98e111d8a4b033f5", "23b201f09b66bc7cbf2cfc1908a4fbc106cfa326", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "9c6b7e5f4c3233e282facd97fb7e812cc2816126", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "fd50fd9af37f860f1cf6bcd5b4f26148811bc9a8", "d39c9c79469666845d4ee59c88686c7a24536574", "9e6b3f356f6c159280c4381b35c1e8153a801b9f" ], "paperAbstract": "Users of cloud services are presented with a bewildering choice of VM types and the choice of VM can have significant implications on performance and cost. In this paper we address the fundamental problem of accurately and economically choosing the best VM for a given workload and user goals. To address the problem of optimal VM selection, we present PARIS, a data-driven system that uses a novel hybrid offline and online data collection and modeling framework to provide accurate performance estimates with minimal data collection. PARIS is able to predict workload performance for different user-specified metrics, and resulting costs for a wide range of VM types and workloads across multiple cloud providers. When compared to sophisticated baselines, including collaborative filtering and a linear interpolation model using measured workload performance on two VM types, PARIS produces significantly better estimates of performance. For instance, it reduces runtime prediction error by a factor of 4 for some workloads on both AWS and Azure. The increased accuracy translates into a 45% reduction in user cost while maintaining performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131614", "https://people.eecs.berkeley.edu/~neerajay/paris_socc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/301d189e85def6eaddbc7152416df1511b55e82b", "sources": [ "DBLP" ], "title": "Selecting the best VM across multiple public clouds: a data-driven performance modeling approach", "venue": "SoCC", "year": 2017 }, "306db40fbc1805465050e4d18dc2ca91b6de1bad": { "authors": [ { "ids": [ "33895019" ], "name": "Matthew Hicks" } ], "doi": "10.1145/3079856.3080238", "doiUrl": "https://doi.org/10.1145/3079856.3080238", "entities": [ "ARM Cortex-M", "Central processing unit", "Computation", "Computer hardware", "Computer memory", "Continuous operation", "Embedded system", "Idempotence", "Internet of things", "Power cycling", "Programmer", "Run time (program lifecycle phase)", "Ubiquitous computing", "Verilog" ], "id": "306db40fbc1805465050e4d18dc2ca91b6de1bad", "inCitations": [ "2fd6522eca6f38239d5b41d01e2d02d036aef850", "96c49ab5890370e1aeace9a7f7c7afbddb0fcf27", "2cb4702cc54f537775a97223b21d308c3b5cda96", "6b2d1124170b848576640bb5516813527f368fcf", "4af09a0e41f3c274831d6d9472869c986ba3d368", "e3aca014b04e379e2dc1b57f5fd637dff61ae872", "a39107bd293cf0f5d9674223c4194eed0761745e" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "228-240", "journalVolume": "", "outCitations": [ "4b0f7bde293bc2e0c9c35fc191e5106d96cb559c", "175c1a4e902db50264f374ac03e5608233d31e62", "4ace5fbb66763f82d52d4f5e70c001100847e9b1", "6f7303c27681b38167b9471cd77b16cce31aa0a6", "063d302f35c4698598c518bf1f9d720d3bcda02e", "5ae9cdcd8052f3a527d662e947a12b802c5b76f7", "073e7615c57ab6af3e6fbd9b7a713c9cd438effa", "91b9389bae769e2cf1934ca7189217604a613ac9", "3593938057b7734c872aaba26240400f37fbad88", "b4f12d637476b4e51b061d32437fafdbe0106d76", "e70ab6b55419e7720eed04ec7b408d0b0cd315f9", "573c2a3b4b3d904b7796775e3969c50349995a15", "db6ea526192f31cc5b03855cfd50fbb654586436", "18ef5a3821a4e1c64c66e5e8fa5d506647458e78", "46a231fcd7705fc1ebc910f5dd8ecbb59fc25e95", "3edaf0de6bbaf152a00cadcb1cb62a52b17fdee5", "5ae1b819f9e92c2ff447687fea5fc17eac2e8788", "0551fb9cf805e5e09ac432675dd2b3019bb45657", "c28251c4824163b037092a01d19e912216e697f2", "6dd8688229d0448b93391fa7767b1d5d0d1f76d7", "78349766c91fd665c97de9388e4d0bbd4ffdcfb7", "774ec7516c1194172d52ee1a1d7431f2af1218ab", "c0abdcbf1736066c2b56eff6d814b7d86dfb913c", "0aaf629dcab6bd6f9947be2390fbd27bf86d5eac", "113772329678792fc2a3a8cb9322c164547f88a0", "aa9f6d4394b5d31d52fcc0cab05b9f20362bccc7", "8d181ccff96ed9100521c10327624eaa5b1a41e2", "403c287c3df7ece55c956f7eaf6ea4bd13e29de6", "29245de3627432ed56dc33556d1cb55b5366b489", "84b4a8b2f2fe58c57223dcb1e839d3106c820380", "497be8bc880ca84d302bd24f34c01e8f9e5951ee", "2922edf0152641eed6cfa4f42b9f1ab9b9c71c97", "515eab3d528b4ba798e98c744539a7ec2673d34b" ], "paperAbstract": "The processors that drive embedded systems are getting smaller; meanwhile, the batteries used to provide power to those systems have stagnated. If we are to realize the dream of ubiquitous computing promised by the Internet of Things, processors must shed large, heavy, expensive, and high maintenance batteries and, instead, harvest energy from their environment. One challenge with this transition is that harvested energy is insufficient for continuous operation. Unfortunately, existing programs fail miserably when executed intermittently.\n This paper presents Clank: lightweight architectural support for correct and efficient execution of long-running applications on harvested energy---without programmer intervention or extreme hardware modifications. Clank is a set of hardware buffers and memory-access monitors that dynamically maintain idempotency. Essentially, Clank dynamically decomposes program execution into a stream of restartable sub-executions connected via lightweight checkpoints.\n To validate Clank's ability to correctly stretch program execution across frequent, random power cycles, and to explore the associated hardware and software overheads, we implement Clank in Verilog, formally verify it, and then add it to an ARM Cortex M0+ processor which we use to run a set of 23 embedded systems benchmarks. Experiments show run-time overheads as low as 2.5%, with run-time overheads of 6% for a version of Clank that adds 1.7% hardware. Clank minimizes checkpoints so much that re-execution time becomes the dominate contributor to run-time overhead.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080238", "http://static1.1.sqspcdn.com/static/f/543048/27585412/1496712833517/Clank_ISCA_2017.pdf?token=26CLzvpicf5yRT/wK/122sOQRng%3D" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/306db40fbc1805465050e4d18dc2ca91b6de1bad", "sources": [ "DBLP" ], "title": "Clank: Architectural support for intermittent computation", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "307a170469153d037b2267d2daf0f54e47ea8a7d": { "authors": [ { "ids": [ "24628601" ], "name": "Mallipeddi Hardhik" }, { "ids": [ "3228969" ], "name": "Dip Sankar Banerjee" }, { "ids": [ "3159938" ], "name": "Kiran Raj Ramamoorthy" }, { "ids": [ "3235932" ], "name": "Kishore Kothapalli" }, { "ids": [ "3052515" ], "name": "K. Srinathan" } ], "doi": "10.1109/ICPP.2017.14", "doiUrl": "https://doi.org/10.1109/ICPP.2017.14", "entities": [ "Algorithm", "Approximation algorithm", "Computation", "Connected component (graph theory)", "Heterogeneous computing", "Parallel computing", "Proof-of-work system", "Sampling (signal processing)", "Sorting", "Sparse matrix" ], "id": "307a170469153d037b2267d2daf0f54e47ea8a7d", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "50-59", "journalVolume": "", "outCitations": [ "7bfb1966c95481a7ffa529d6abe52ecc69fe01aa", "4a0fb8a29534e0dd3e95494a7a3cc7fd128209d3", "5f491a183c71b0322b16e4f5dc69538c50db79e0", "648fe4e8d720c414e5edf1eb000cf84a9ae5046a", "e60093dd10e32e34b20c366ec11b90d65969e074", "34b44a9e55184b48c94a15f29f052941b342e8bf", "490bd661404359cbdd7bf6e5429f56cdcfe25b30", "ac3db322cbe82d627bb5c576ae316ee79a40d2b4", "96def001f76a9254345ccc7ced9d1b81bbeed1b1", "7bcc53f1baf3358517a602d856192faea9442c91", "9f0756259aff27f7c7a22cdfcbf0d35ffcb8bec1", "98ae1d41deab44c3bfdbac51ccbcc5faf7cd0bcf", "092175c7bfa7d879ec44f864e856b61a492f7b51", "b9ccc4c453db47515ea6a429b0261b3936d3cf4d", "45692750cb1bf50da6451e70ae06b6519992e4ec", "146e869dbfb5575715cdbbd392b8f84c99654970", "0ea110472ee018a8034898588c9bdede1e0c8df8", "7854be682608a8fc469bed880fef69b78f361879", "edc02740e7eea68ae57f4c1b54933ecf0a3f9eec", "022160326e1c54ad37b1969ce85412f0d27024ba", "0064df0d06312711f5163c4440f3d7f099fc8d9e", "c0bb22f544a98a9587a53322d381209cdf8e1443", "1fd8d8ffd5d478e4fbb530551de1a8e096fbc263", "6f5c6297f9c7dccddac313c8344061cfd12509f7", "5c301c646c5e178927dd4cbe40c66b32c36bfa80", "9f7c716d2c525c9d07a527a25e7fb736f881ea73" ], "paperAbstract": "The architectural trend towards heterogeneity has pushed heterogeneous computing to the fore of parallel computing research. Heterogeneous algorithms, often carefully handcrafted, have been designed for several important problems from parallel computing such as sorting, graph algorithms, matrix computations, and the like. A majority of these algorithms follow a work partitioning approach where the input is divided into appropriate sized parts so that individual devices can process the “right” parts of the input. However, arriving at a good work partitioning is usually non-trivial and may require extensive empirical search. Such an extensive empirical search can potentially offset any gains accrued out of heterogeneous algorithms. Other recently proposed approaches too are in general inadequate.In this paper, we propose a simple and effective technique for work partitioning in the context of heterogeneous algorithms. Our technique is based on sampling and therefore can adapt to both the algorithm used and the input instance. Our technique is generic in its applicability as we will demonstrate in this paper. We validate our technique on three problems: finding the connected components of a graph (CC), multiplying two unstructured sparse matrices (spmm), and multiplying two scalefree sparse matrices. For these problems, we show that using our method, we can find the required threshold that is under 10% away from the best possible thresholds.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/307a170469153d037b2267d2daf0f54e47ea8a7d", "sources": [ "DBLP" ], "title": "Nearly Balanced Work Partitioning for Heterogeneous Algorithms", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "3099d91ba890344d565a3a327236318a417b852a": { "authors": [ { "ids": [ "1728316" ], "name": "Kai Wang" }, { "ids": [ "2775289" ], "name": "Calvin Lin" } ], "doi": "10.1145/3079856.3080205", "doiUrl": "https://doi.org/10.1145/3079856.3080205", "entities": [ "Benchmark (computing)", "CAS latency", "Computation", "Decoupling (electronics)", "General-purpose computing on graphics processing units", "Graphics processing unit", "Memory address", "Memory bound function", "Prefetcher", "Single instruction, multiple threads", "Speculative execution" ], "id": "3099d91ba890344d565a3a327236318a417b852a", "inCitations": [ "fa21c85107516c7f0a341de27856d7ffe4a6c5d9" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "295-306", "journalVolume": "", "outCitations": [ "5f3cce1bc739ebfc03e003010d3438bb318efc14", "49fb77e166dc26849e37db3d5a53496ab547a545", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "c592507ecf124838ba95004fdb85f7a1b1e7ee2b", "adb9710f94fb2992ebb056e4d0fa55541565acb3", "400bcff8e17d10d11053b4853babd573d9601ed5", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "4377307d51b459b89e768dc17cd532983766ba9e", "0114fb72afbd9cc0bca35940beb21eda596aa5e0", "8f9cb8ef9c253e113a8c3a86cd3c5b9e9a7c1611", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "9448d2f1f8260da11a0b5d9b27ee6c8eb916d8ee", "1c1805ea457142e0e5ad547f04ae04d5ac671ffc", "0269cb98b0c91a804326d8f8888c32f01fda8661", "7bee024cfab6e16be7c57e2ddbe13618d2a2968c", "458a7be947dae7c4cdc312ff0d95d6ae20b5dea0", "04af8b01df1ad953159bf34a0b1b19f429bd71d2", "0036adadc90e4826b2f7fc157752eea459070c32", "6635cd62124e589bc56667b31cc295db2fbd22a2", "60a1389c827f9f706c9dc1639e2584f0f3de878e", "a213b244778e310bc4b27cbd021f964258b4c7a7", "2d6f002477015469075954c6748a1a85af352c94", "27c204d3de3e9289bdf9d67d8e646e6527b18b1a", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd" ], "paperAbstract": "This paper introduces a method of decoupling affine computations---a class of expressions that produces extremely regular values across SIMT threads---from the main execution stream, so that the affine computations can be performed with greater efficiency and with greater independence from the main execution stream. This decoupling has two benefits: (1) For compute-bound programs, it significantly reduces the dynamic warp instruction count; (2) for memory-bound workloads, it significantly reduces memory latency, since it acts as a non-speculative prefetcher for the data specified by the many memory address calculations that are affine computations.\n We evaluate our solution, known as Decoupled Affine Computation (DAC), using GPGPU-sim and a set of 29 GPGPU programs. We find that on average, DAC improves performance by 40% and reduces energy consumption by 20%. For the 11 compute-bound benchmarks, DAC improves performance by 34%, compared with 11% for the previous state-of-the-art. For the 18 memory-bound programs, DAC improves performance by an average of 44%, compared with 16% for state-of-the-art GPU prefetcher.", "pdfUrls": [ "http://www.cs.utexas.edu/~lin/papers/isca17.pdf", "http://doi.acm.org/10.1145/3079856.3080205" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3099d91ba890344d565a3a327236318a417b852a", "sources": [ "DBLP" ], "title": "Decoupled affine computation for SIMT GPUs", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "309bd4c9b1b9cf81cbf071b8b2ad80e97acf7c60": { "authors": [ { "ids": [ "1794267" ], "name": "Yanfei Guo" }, { "ids": [ "2312753" ], "name": "Charles J. Archer" }, { "ids": [ "1685720" ], "name": "Michael Blocksome" }, { "ids": [ "2032331" ], "name": "Scott Parker" }, { "ids": [ "2446648" ], "name": "Wesley Bland" }, { "ids": [ "2673895" ], "name": "Kenneth Raffenetti" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "10.1109/IPDPS.2017.18", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.18", "entities": [ "AV-TEST", "Lookup table", "Message Passing Interface", "Network address" ], "id": "309bd4c9b1b9cf81cbf071b8b2ad80e97acf7c60", "inCitations": [ "478c09086fcdb2bdaf5f48542dee3e3267790d0f", "6b8cc1c8358b84a55ec2858910adf839928370ef" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1008-1017", "journalVolume": "", "outCitations": [ "46d4192ee0506779b311ecb5a0229737acd1d09e", "5b16adaf3a0be648032d0996743575c0d939ad1b", "21cf16cec9af3a62d73bf5fb811a528694031e0c", "99a1520bc334c111ff84619a1ac376f009d0d3bf", "a7ee6fa73b34b7e03808dd06b3d5482d5410fa1f", "2faf6e0b2b08be9a3ab46d6e932e2c642b882195", "5652436b860413016238f4e54589726bde1e99ad" ], "paperAbstract": "MPI allows applications to treat processes as a logical collection of integer ranks for each MPI communicator, while internally translating these logical ranks into actual network addresses. In current MPI implementations the management and lookup of such network addresses use memory sizes that are proportional to the number of processes in each communicator. In this paper, we propose a new mechanism, called AV-Rankmap, for managing such translation. AV-Rankmap takes advantage of logical patterns in rank-address mapping that most applications naturally tend to have, and it exploits the fact that some parts of network address structures are naturally more performance critical than others. It uses this information to compress the memory used for network address management. We demonstrate that AV-Rankmap can achieve performance similar to or better than that of other MPI implementations while using significantly less memory.", "pdfUrls": [ "http://www.mcs.anl.gov/papers/P6078-1016.pdf", "http://www.mcs.anl.gov/~yguo/pubs/ANL-MCS-P6078-1016.pdf", "http://www.mcs.anl.gov/papers/P6051-0916.pdf", "https://doi.org/10.1109/IPDPS.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/309bd4c9b1b9cf81cbf071b8b2ad80e97acf7c60", "sources": [ "DBLP" ], "title": "Memory Compression Techniques for Network Address Management in MPI", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "30a4f133ea9c6d5c25fdbe74615d09d8e7af6227": { "authors": [ { "ids": [ "1690525" ], "name": "Georgios Bouloukakis" }, { "ids": [ "1708457" ], "name": "Nikolaos Georgantas" }, { "ids": [ "1793631" ], "name": "Ajay Kattepur" }, { "ids": [ "1688880" ], "name": "Val\u00e9rie Issarny" } ], "doi": "10.1145/3030207.3030220", "doiUrl": "https://doi.org/10.1145/3030207.3030220", "entities": [ "Asynchronous serial communication", "End-to-end encryption", "Middleware", "Publish\u2013subscribe pattern", "Quality of service", "Queueing theory", "Response time (technology)", "Signal trace", "Simulation" ], "id": "30a4f133ea9c6d5c25fdbe74615d09d8e7af6227", "inCitations": [], "journalName": "", "journalPages": "275-286", "journalVolume": "", "outCitations": [], "paperAbstract": "Systems deployed in mobile environments are typically characterized by intermittent connectivity and asynchronous sending/reception of data. To create effective mobile systems for such environments, it is essential to guarantee acceptable levels of timeliness between sending and receiving mobile users. In order to provide QoS guarantees in different application scenarios and contexts, it is necessary to model the system performance by incorporating the intermittent connectivity. Queueing Network Models (QNMs) offer a simple modeling environment, which can be used to represent various application scenarios, and provide accurate analytical solutions for performance metrics, such as system response time. In this paper, we provide an analytical solution regarding the end-to-end response time between users sending and receiving data by modeling the intermittent connectivity of mobile users with QNMs. We utilize the publish/subscribe (pub/sub) middleware as the underlying communication infrastructure for mobile users. To represent the user's connections/disconnections, we model and solve analytically an ON/OFF queueing system by applying a mean value approach. Finally, we validate our model using simulations with real-world workload traces. The deviations between the performance results foreseen by the analytical model and the ones provided by the simulator are shown to be less than 5% for a variety of scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030220" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30a4f133ea9c6d5c25fdbe74615d09d8e7af6227", "sources": [ "DBLP" ], "title": "Timeliness Evaluation of Intermittent Mobile Connectivity over Pub/Sub Systems", "venue": "ICPE", "year": 2017 }, "30a7614f66c6b27bad8a705d4dd90d1d48ef9dc8": { "authors": [ { "ids": [ "33209645" ], "name": "Zhoulai Fu" }, { "ids": [ "38319925" ], "name": "Zhendong Su" } ], "doi": "10.1145/3062341.3062383", "doiUrl": "https://doi.org/10.1145/3062341.3062383", "entities": [ "Code coverage", "Equation solving", "Random testing", "Software quality", "Symbolic execution", "Test automation", "american fuzzy lop" ], "id": "30a7614f66c6b27bad8a705d4dd90d1d48ef9dc8", "inCitations": [], "journalName": "", "journalPages": "306-319", "journalVolume": "", "outCitations": [ "7f5fcc6fc5b1b14894292aacbfa9ee1f85d243a2", "843f5459164fa8a494b551f5b27a7bee146c6b03", "23c3a072e110f86ee1e4112d02eea6068c63f670", "0b5b42425deb371d8dc60ac9b090c7232702370a", "61819919af096dc949ebd8eb36c41e303e26a87c", "35adad1b7230a1ad3ff63bb231c8b930736451d1", "b42fd73cd9eff12012ab88b8de62040f2385c453", "0ab393affe9d674ef790be14fdfade368f3e5989", "1c0c851e96fa13fc45e3298f4715f5d5d0a97e41", "8165544e5518f25bc27677d2df22bf75725852ea", "16a61beee0d3152d936b77267ffeeb396bef16ee", "3339f78b2d27233d22fd806cfcdc6800a2a2d945", "153f586c3b4f3047900f9f5b5ddf61a37309d698", "34ddd8865569c2c32dec9bf7ffc817ff42faaa01", "869505350316ecc8642f8be561e7cc16e1f6b350", "9c83752460cd1024985981d4acaa7bc85e15c0f7", "b0ef716257f2e9e2778157a8bb51a2f45ba237b8", "dba6797496abfeba3f55c4604c246172a0011aec", "c04c3e9ca649e5709a33b53cf35fd20862297426", "0f0c5511d06df02094af355760e90096878c01d2", "580772507bca7b9a0318d251e13f5cd8fb028d7b", "2359b12b0f4c70477f51455d9eb41923e740104a", "ac5d8274930923b31d6a9f1d22105f792a182888", "3c1df844a948f1401a253e1aeaa453edefc60c96", "0b53fab8dea434e1046836159e184d9565ffd401", "416910ac4dda8e3da99f320bec93585d96c24caf", "2a4423b10725e54ad72f4f1fcf77db5bc835f0a6", "90f4cc3a0df2f265765f6a57fb2e6734457abfa2", "b9aa494d4d694a5cf6c478e8495638ef4d8872fc", "9eec3698f6515884fd274113b4613545ba3cad33", "f624eb9cc97ce2f1ab39e43e807e131c719e614a", "4f789439fe5a121e6f47453d8a95ec733baca537", "44f026c3f50e4b3a3104a83bae4dcdffe4fe09d7", "11443efe465ad544f478524da6c66c085b16e28b", "1af20927ec4429b4374c3da4c23aeee713b3e60a", "0bb708e3f35ffacbad413351e3995e20e4779d2f", "60cdcebff3ea7032443ce2de76b37dc1512682cc" ], "paperAbstract": "Achieving high code coverage is essential in testing, which gives us confidence in code quality. Testing floating-point code usually requires painstaking efforts in handling floating-point constraints, e.g., in symbolic execution. This paper turns the challenge of testing floating-point code into the opportunity of applying unconstrained programming --- the mathematical solution for calculating function minimum points over the entire search space. Our core insight is to derive a representing function from the floating-point program, any of whose minimum points is a test input guaranteed to exercise a new branch of the tested program. This guarantee allows us to achieve high coverage of the floating-point program by repeatedly minimizing the representing function. \n We have realized this approach in a tool called CoverMe and conducted an extensive evaluation of it on Sun's C math library. Our evaluation results show that CoverMe achieves, on average, 90.8% branch coverage in 6.9 seconds, drastically outperforming our compared tools: (1) Random testing, (2) AFL, a highly optimized, robust fuzzer released by Google, and (3) Austin, a state-of-the-art coverage-based testing tool designed to support floating-point code.", "pdfUrls": [ "http://arxiv.org/abs/1704.03394", "https://arxiv.org/pdf/1704.03394v1.pdf", "http://doi.acm.org/10.1145/3062341.3062383" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30a7614f66c6b27bad8a705d4dd90d1d48ef9dc8", "sources": [ "DBLP" ], "title": "Achieving high coverage for floating-point code via unconstrained programming", "venue": "PLDI", "year": 2017 }, "30e159925f150fd42e5b519820b7b6a02206e58f": { "authors": [ { "ids": [ "9492117" ], "name": "Qiuyun Llull" }, { "ids": [ "2072463" ], "name": "Songchun Fan" }, { "ids": [ "2837959" ], "name": "Seyed Majid Zahedi" }, { "ids": [ "2650454" ], "name": "Benjamin C. Lee" } ], "doi": "10.1109/HPCA.2017.22", "doiUrl": "https://doi.org/10.1109/HPCA.2017.22", "entities": [ "Colocation centre", "Data center", "Fairness measure", "Game theory", "Heuristic", "Resource contention" ], "id": "30e159925f150fd42e5b519820b7b6a02206e58f", "inCitations": [ "d4a247292b371fce74b2b39661d7f447e2b0a489", "24c8f9ac1d626c8f9a50cad92e9a833c25c7477f", "218365d147be95376cbb35d6be3aa3ef7811a044", "74a5d913350a2026e6b0093d4038412ef4d2cae1", "1172aeb1c26e5a7226ecea213f5517bc1a1a4677" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "421-432", "journalVolume": "", "outCitations": [ "1ae2d637edb6742f68c2b5dc4f560a8bc87e78fe", "82921751be7c6dee2ece4092db0d05434bbf3e7f", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "0558c94a094158ecd64f0d5014d3d9668054fb97", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "155c52f62328525ce1433b04e71bb52345eaaa66", "2988e34168fa91398fa397baf823af2063893e9c", "b6571efa4483aa00d23bbcd36930c4877548ba38", "06545f48a6b25a3cafd76e514b2310254972888b", "7355123bf4bb08d41e462a60cd4e6f11a3ffcf85", "2969fc07a03b3b751a7624705d2cc05295a02cb7", "1c73a4980657fc57942bd49d2d4c21b279122bab", "2077c3787e5a1545df312d51f9a7b8cd05e2c7f0", "6357bd31db46d2114ba6b4dc145e85d5a669a488", "17f820491ffb223d553a9efb73933abfd3db67c1", "43776b15c034076a36b7143d58af8e04715e41d0", "b2a7d7c2073d4f512af89208b83a5bb54f91a24c", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "3c4ebf5e1af20c18fe32ad7d17af69273eeca4c0", "635210aa01bd460f5dad80c5fffef8a0dfb4993e", "0b885bb186445ee0c50277d990eca18c53fef09b", "610425bf03641e29b98cdcb2b8f187f951644891", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "08632fe2b934ed15d3499e7321282c81adc2c390", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "870403ceaadbe9579b1841baa39c1ac2d03fef3e", "146139716c9e8ec4f57475b9673171761ac34074", "321583b5498939d21e3ee24f8ed7cacbd4135b4a", "c120ba78c0a09f4a97f2979f3e9ff9710c3932bf", "7c3c78524b6045f01308e2f2fc000cc5cb2299e3", "83bbec4d4f56b5631f48607b7b6c75a505a8b448", "563964bcd9bc4dfbf79d5d77ed03e86be6524164", "b79e141c783c309b79f72ca18280acd6df783778", "ed2c12585971efbbd2b52059bd8ccd650291b485", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "6db4a99e90690f5a916b5b3060f22b0a858ecd79", "914a5dbaf2a024b68c0c4ee85c13f2893fe9b767", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "38307e87a75952fde8d0cf54f7722b55ae251746", "7a978f2902460e732c50c36a171deb11733df1fc" ], "paperAbstract": "Task colocation improves datacenter utilization but introduces resource contention for shared hardware. In this setting, a particular challenge is balancing performance and fairness. We present Cooper, a game-theoretic framework for task colocation that provides fairness while preserving performance. Cooper predicts users' colocation preferences and finds stable matches between them. Its colocations satisfy preferences and encourage strategic users to participate inshared systems. Given Cooper's colocations, users' performance penalties are strongly correlated to their contributions to contention, which is fair according to cooperative game theory. Moreover, its colocations perform within 5% of prior heuristics.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.22", "http://people.duke.edu/~bcl15/documents/llull17-hpca-cooper.pdf", "http://people.duke.edu/~qw33/resume/cooper-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30e159925f150fd42e5b519820b7b6a02206e58f", "sources": [ "DBLP" ], "title": "Cooper: Task Colocation with Cooperative Games", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "30e2f414dd22b56508fb20135b7df080c666f16f": { "authors": [ { "ids": [ "1684598" ], "name": "Rui Liu" }, { "ids": [ "33316629" ], "name": "Soumya Ray" } ], "doi": "10.1109/ICDM.2017.38", "doiUrl": "https://doi.org/10.1109/ICDM.2017.38", "entities": [ "AdaBoost", "Algorithm", "Baseline (configuration management)", "Experiment", "Field (computer science)", "Iteration", "Linear classifier", "Multiple-instance learning", "Overfitting", "Signal-to-noise ratio", "Whole Earth 'Lectronic Link" ], "id": "30e2f414dd22b56508fb20135b7df080c666f16f", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "287-296", "journalVolume": "", "outCitations": [ "598db8fa9bf8c7c4b4a467509c7fea7db4af8520", "3447fe054f6af70403cfc39b4d21076337a71128", "e2a6ebf4f1789ca34a3b204977b9ab0dc3b28f44", "056e13d5045e7d594489705f78834cfaf6642c36", "fc8cda36a0972e7de1ac3a7bcb81dc32da79bee4", "020576e57f27dc2d60ac8d4b74a1070c77ce9cf9", "41ddb9a998bbaea7eff93ad9438178265260badf", "3c1547d2d0975c714b718874244833603e05392e", "1cb77e2a09db58e9e8b37878c7de313d41e6f854", "3f02266e86128012c3ea87ee4cf3695b9b6cacd6", "4fb8d45f396e47df403f222818745e82499c4270", "1f7e3b0c425ea6f554b10989dec5f726ecb8bd23", "04a20cd0199d0a24fea8e6bf0e0cc61b26c1f3ac", "1bbb6384076ff6aeb0d1a2c499a6026959671a9c", "251bef63ac9c8e99387c2b24b2bc5721baafee6d", "e2682f2a2752cba7a05fd3db1cb43731c1afb002", "1430399adf0c41e432a655ff6a98212cd9aff304", "77ccc0d04f99ea1e45db74aeda7923046ef6ab7d", "0bbc1913b2fded7a63a0b3453f43ee3f1d547b98", "1f7a25f45718bf1b86df847d7ca4980b9d87fe55", "c3fc1ef004edf47d494bae14cb5f0bd5f663a222", "64372501affd8571db20dc606b0146a76c266303", "99cb96ea60e27a1485b5a1d563e9181cf815136f", "bd045021caf22a4a599a45493dd05116e3bc16cb", "01153363e20b41d7e19dc25b7c2cbd13605ed65b", "0bb4401b9a1b064c513bda3001f43f8f2f3e28de", "7ae3c54b4aa78dd03925fe87e94f87f0f15d289e", "260c3c635da95638600604470584e4d4e9096c57", "0c3596f19ec1dce57796b3556c3220143c75f1e4", "1c7d38f68fe1150895a186e30b60c02dd89a676a" ], "paperAbstract": "An interesting observation about the well-known AdaBoost algorithm is that, though theory suggests it should overfit when applied to noisy data, experiments indicate it often does not do so in practice. In this paper, we study the behavior of AdaBoost on datasets with one-sided uniform class noise using linear classifiers as the base learner. We show analytically that, under some ideal conditions, this approach will not overfit, and can in fact recover a zero-error concept with respect to the true, uncorrupted instance labels. We also analytically show that AdaBoost increases the margins of predictions over boosting iterations, as has been previously suggested in the literature. We then compare the empirical behavior of AdaBoost using real world datasets with one-sided noise derived from multiple-instance data. Although our assumptions may not hold in a practical setting, our experiments show that standard AdaBoost still performs well, as suggested by our analysis, and often outperforms baseline variations in the literature that explicitly try to account for noise.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30e2f414dd22b56508fb20135b7df080c666f16f", "sources": [ "DBLP" ], "title": "An Analysis of Boosted Linear Classifiers on Noisy Data with Applications to Multiple-Instance Learning", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "30e4b8de89b0c5e50185362964310a49b961e6a4": { "authors": [ { "ids": [ "1945813" ], "name": "Hua Fan" }, { "ids": [ "32733689" ], "name": "Wojciech Golab" }, { "ids": [ "2350862" ], "name": "Charles B. Morrey" } ], "doi": "10.1145/3127479.3127487", "doiUrl": "https://doi.org/10.1145/3127479.3127487", "entities": [ "Amortized analysis", "Atomicity (database systems)", "Communications protocol", "Concurrency (computer science)", "Concurrency control", "Distributed transaction", "FLOPS", "Failure rate", "Isolation (database systems)", "Key-value database", "Kinetic Void", "Read-only memory", "Read-write memory", "Scalability", "Serializability", "Server (computing)", "Write-only documentation" ], "id": "30e4b8de89b0c5e50185362964310a49b961e6a4", "inCitations": [], "journalName": "", "journalPages": "561-572", "journalVolume": "", "outCitations": [ "fe9303074167e5a732ea1357732edbf0eba9a18c", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "29a05cde1994548e2e9487822248c679626c6241", "08b8009cc59c035bdbcb69fdd76f1a58363d3da5", "bce1901805ec6e07993cb248bd1a9279c1800971", "9748241beb02ef1e2d0e6dc877c04b354033a838", "0d923afc5ca379e7a488f3a0eefd5767bb2a191d", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "27611a1896feb8817eb9cebca344d9736916c3bb", "56f6aec0132e56769e2036bbeff791dfa137d107", "ddf313f6fcc0520c716c54873164ded8e31703da", "00ac447d02035c26c7e2852c2457fe812e89038f", "517e239f97f50079bc557cccf1a6b56aa5736d30", "1f102935cc21d54f91ae70c09d84157b6011e6dd", "3b55c7f2ff9ff6b11fb973dab4a7dc60c39bb09c", "e7ab23d011e5183db78cfea48e303210f6e57e2e", "362855ec18b3febbbb668a85221d59ff094ec1b2", "86c6f4555b4e37dd8afe08c4445123caddcdfa4b", "3815581be1cd86b4cf707db68d6d70d001720858", "3d11c0ce702f416401ec383e7cecd82802bc81b2", "557f89269aadfcce9089914199da4555c3f87eea", "cf855ba4a09c2181d0166705717b5788454fcfa5", "71ea786d06d331d91f4113b3a87df296985fe95d", "068e59b88a1230d709d99c83a45d3a5b91260810", "1664b784dd7d446ee8838e0eec5b980f61792007", "2ad184901a2f3551df5d0406f244ae655ac8c4d2", "0538e05e1ced11b91cda5d1aed88a73969def882", "2289754c17c95e53c982ca2f023af21dec824d29", "032e6705b2c9cdb68bc66c28c8ddb4956db2b2e5", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "039f09d49bc408db9e0e8429e6bd92be49c5f72e" ], "paperAbstract": "There is a trend in recent database research to pursue coordination avoidance and weaker transaction isolation under a long-standing assumption: concurrent serializable transactions under read-write or write-write conflicts require costly synchronization, and thus may incur a steep price in terms of performance. In particular, distributed transactions, which access multiple data items atomically, are considered inherently costly. They require concurrency control for transaction isolation since both read-write and write-write conflicts are possible, and they rely on distributed commitment protocols to ensure atomicity in the presence of failures. This paper presents serializable read-only and write-only distributed transactions as a counterexample to show that concurrent transactions can be processed in parallel with low-overhead despite conflicts.\n Inspired by the slotted ALOHA network protocol, we propose a simpler and leaner protocol for serializable read-only write-only transactions, which uses only one round trip to commit a transaction in the absence of failures irrespective of contention. Our design is centered around an epoch-based concurrency control (ECC) mechanism that minimizes synchronization conflicts and uses a small number of additional messages whose cost is amortized across many transactions. We integrate this protocol into ALOHA-KV, a scalable distributed key-value store for read-only write-only transactions, and demonstrate that the system can process close to 15 million read/write operations per second per server when each transaction batches together thousands of such operations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127487" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30e4b8de89b0c5e50185362964310a49b961e6a4", "sources": [ "DBLP" ], "title": "ALOHA-KV: high performance read-only and write-only distributed transactions", "venue": "SoCC", "year": 2017 }, "30e5c79b25cab0af0efe2f756c7f4ebca30bcba0": { "authors": [ { "ids": [ "1749517" ], "name": "Michael Backes" }, { "ids": [ "2230149" ], "name": "Mathias Humbert" }, { "ids": [ "1739527" ], "name": "Jun Pang" }, { "ids": [ "1698138" ], "name": "Yang Zhang" } ], "doi": "10.1145/3133956.3133972", "doiUrl": "https://doi.org/10.1145/3133956.3133972", "entities": [ "Adversary (cryptography)", "Electron mobility", "Experiment", "Feature learning", "Global Positioning System", "Inference attack", "Information sensitivity", "Privacy", "Stemming" ], "id": "30e5c79b25cab0af0efe2f756c7f4ebca30bcba0", "inCitations": [ "01c3b9bc6299bf521787902a31d86b2a7c94f514", "6b2cc009bf31007e558a096ab1b1a9c63abce665" ], "journalName": "", "journalPages": "1943-1957", "journalVolume": "", "outCitations": [ "69850ad4116d5ba0e41dfe53bccfa581f6371312", "0001c638511772a45944ca7cc8bc68a6380f4544", "2d474083f3c9e8dab83b9f6d572743dd5cdd52a7", "97dcb1979ad773f9d902caad7bd0f6bb6c6c5e0d", "5c69418969d8d5a286306f1087108dcceb50d39e", "4918ea060e838e5531392b09ccb3ca7756a2e162", "11baf128445828df62e8d5606533c476feb1da33", "444f4aa49df20291689b954681c40de849d080a5", "0e7441a8138bdcfcc15dfb0e0f809ab280871bce", "06501b7ea604a8b8ffff402ee492955e6892daad", "2a48dff17fe4ad91a8cc2d42ce945644d6fdc72e", "4afa6c2eb552ceef0e396fbfe449932492873034", "48bb1585f5efb77f9b943d999ae3202d4f853dc1", "0adbbdf261b2042a05699698994211e7e7461670", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "62e9bea58ae6617fa076411664b64c832b161f1e", "10adbab10ea1063c599ce137d9e5e7f9ca33303f", "1ce7f038403a15c69c167941f112912a0b768b45", "00d23e5c06f90bed0c9d4aec22babb2f7488817f", "36aa70f51eb36b7b9dea27c3c84b96c85471ab22", "155d7e95d408c72fda5986ccd933505d9d8a52bf", "0ba744c28f18b413e22e0cd5cfb4315e8226c3df", "0db3f5968537bfb2abe031b0883661436b478b15", "046c6c8e15d9b9ecd73b5d2ce125db20bbcdec4b", "67a77d3242a357571541d39c26d305c5dda89445", "850b234faaecb8c3ffb1b37e5fdb18843183d220", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "9f3cacea2f2cbd8d8342b7c2a6f2c7ff0f0c36cc", "0ec9ee064d9acc194adc0deace107d50a7f50c93", "1a5ced36faee7ae1d0316c0461c50f4ea1317fad", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "14bb1212e15d6232114ddcf7cc48bdd278ee0d26", "57d76d5716a0007e0fbf451866bf665887628613", "0706356c9ab6014d6b04577d38289ea8328291a5", "02cc048f063a41d6ec99e1c066e097a44961267f", "30ed0801ccab74988cde86cd5d797c2ded55c7fd", "41027725b82b431d0e7bfea0ef93b953816054d3", "14a48cbcb93867ff91ceb4db5e3628a4eb0bc457" ], "paperAbstract": "The development of positioning technologies has resulted in an increasing amount of mobility data being available. While bringing a lot of convenience to people's life, such availability also raises serious concerns about privacy. In this paper, we concentrate on one of the most sensitive information that can be inferred from mobility data, namely social relationships. We propose a novel social relation inference attack that relies on an advanced feature learning technique to automatically summarize users' mobility features. Compared to existing approaches, our attack is able to predict any two individuals' social relation, and it does not require the adversary to have any prior knowledge on existing social relations. These advantages significantly increase the applicability of our attack and the scope of the privacy assessment. Extensive experiments conducted on a large dataset demonstrate that our inference attack is effective, and achieves between 13% to 20% improvement over the best state-of-the-art scheme. We propose three defense mechanisms -- hiding, replacement and generalization -- and evaluate their effectiveness for mitigating the social link privacy risks stemming from mobility data sharing. Our experimental results show that both hiding and replacement mechanisms outperform generalization. Moreover, hiding and replacement achieve a comparable trade-off between utility and privacy, the former preserving better utility and the latter providing better privacy.", "pdfUrls": [ "http://arxiv.org/abs/1708.08221", "https://users.cs.fiu.edu/~carbunar/teaching/cis5374/cis5374.2017/slides/walk2friends.pdf", "https://arxiv.org/pdf/1708.08221v1.pdf", "http://doi.acm.org/10.1145/3133956.3133972", "https://arxiv.org/pdf/1708.08221v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/30e5c79b25cab0af0efe2f756c7f4ebca30bcba0", "sources": [ "DBLP" ], "title": "walk2friends: Inferring Social Links from Mobility Profiles", "venue": "CCS", "year": 2017 }, "3105ea84d80845247c71690561955433975df633": { "authors": [ { "ids": [ "2103014" ], "name": "Giulio Malavolta" }, { "ids": [ "2970940" ], "name": "Pedro Moreno-Sanchez" }, { "ids": [ "1828965" ], "name": "Aniket Kate" }, { "ids": [ "4436634" ], "name": "Matteo Maffei" } ], "doi": "", "doiUrl": "", "entities": [], "id": "3105ea84d80845247c71690561955433975df633", "inCitations": [ "ac72566bbc7628255002a70ca5bec0874929eba4", "42debc5ec28e3e1709a3005962f1dbc3120355be", "3c95a6110b3f877f7a8d2a5b9a40353b25db30ff", "197ac36ed5dea16e31f7e5058b5ad7318b4a7e63", "2061315031961c1d881582234b9f71f25155d6d3" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "1054", "journalVolume": "2016", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.cs.purdue.edu/homes/pmorenos/public/silentwhispers-slides.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_01-5_malavolta_slides.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/silentwhispers-enforcing-security-and-privacy-decentralized-credit-networks/", "http://eprint.iacr.org/2016/1054" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3105ea84d80845247c71690561955433975df633", "sources": [ "DBLP" ], "title": "SilentWhispers: Enforcing Security and Privacy in Decentralized Credit Networks", "venue": "NDSS", "year": 2016 }, "310e2276c660e127b1fe424e32c334760a68fe0c": { "authors": [ { "ids": [ "1977734" ], "name": "Daniel Ruprecht" } ], "doi": "10.1007/978-3-319-64203-1_48", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_48", "entities": [ "Compiler", "Legacy system", "Lock (computer science)", "Memory footprint", "OpenMP", "Parareal", "Runtime system", "Shared memory" ], "id": "310e2276c660e127b1fe424e32c334760a68fe0c", "inCitations": [], "journalName": "", "journalPages": "669-681", "journalVolume": "", "outCitations": [ "af5d92b7a0265f496431da278c6080d017af62dd", "402722e6947555819af9d518411f38202245cdab", "0ef461357e147c55d64ef8a50f70c41f31557008", "5aeaf2c8c262d2e78df56adccdfaf6dc2022d705", "4416052fca95270b50a29e9e3cc245cca8962861", "987a593ef4769f8749f70d9ffd5d5e572406c825", "9283668a04922da806f0e24baaa1435c51f3789a", "a2d8b01aeea079e11e5a04d2de53a8afcd3d9dc0", "7069f210a5a475096adaa00b6b4856a946468733", "c039212be9c0faae9b90403a2a45d7f83fd19cf5", "b63732695214655a494643f9009d498b713f87e0", "2773ae9947e2f6cb58152824eca994434f1b8322", "e05ca7c20ff88942b37e6dbd914ab7b37eb48179", "e16e1668d8aa54a1236eb60c2f394f283102acfe", "6367fc65e43ec8fc2426d6513bacfcd1d4773177", "db440dee965b78b89792a5f30ff0d1672f589876", "30d69fe1a8f9c4c9fd4e0648411bdcad6e395e7e", "e2bc07071fbb0916661f9aadbcd17ab0f55128dc", "3d9fd2bd708e1a8db8a5bad61dbbd74c64d107b0", "78d5b5d15fe3cf0dfe9751e8cd744a0701c718a4", "346c5896ff2032d7c7a8400cbbd3bd2f61c72f1a" ], "paperAbstract": "The paper introduces an OpenMP implementation of pipelined Parareal and compares it to a standard MPI-based implementation. Both versions yield essentially identical runtimes, but, depending on the compiler, the OpenMP variant consumes about 7% less energy. However, its key advantage is a significantly smaller memory footprint. The higher implementation complexity, including manual control of locks, might make it difficult to use in legacy codes, though.", "pdfUrls": [ "https://arxiv.org/pdf/1509.06935v1.pdf", "http://arxiv.org/pdf/1509.06935v2.pdf", "https://doi.org/10.1007/978-3-319-64203-1_48", "https://arxiv.org/pdf/1509.06935v2.pdf", "http://arxiv.org/pdf/1509.06935v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3513/7f5cda9458876ce57cb0501acf5b08f20b43.pdf", "s2Url": "https://semanticscholar.org/paper/310e2276c660e127b1fe424e32c334760a68fe0c", "sources": [ "DBLP" ], "title": "Shared Memory Pipelined Parareal", "venue": "Euro-Par", "year": 2017 }, "310ecac3477a51aa303284f0853bd49ae8383ac3": { "authors": [ { "ids": [ "2266189" ], "name": "Jing Zhang" }, { "ids": [ "3215200" ], "name": "Sanchit Misra" }, { "ids": [ "34266162" ], "name": "Hao Wang" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" } ], "doi": "10.1109/IPDPS.2017.120", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.120", "entities": [ "Algorithm", "BLAST", "Central processing unit", "Computational biology", "Data access", "Database", "Heuristic", "Indexed search", "Locality of reference", "Memory hierarchy", "Multi-core processor", "Parallel computing", "Peptide sequence", "Speedup", "Thread (computing)" ], "id": "310ecac3477a51aa303284f0853bd49ae8383ac3", "inCitations": [ "62c0af943a259c66b91dc932d3a5611afd014a4c", "3b603228bf9419868e7518614c85338b7a132989" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "62-71", "journalVolume": "", "outCitations": [ "dc0a162462fbc2d3be52e8882dfad60364cadccf", "2878976835c73ed0906e17a7336b9bf10d491a1a", "4520f74dbf413fe6b6480d0f243ee75fba1167a8", "5cba0d330a947249af144b0402fe037a0c7166cf", "53d1657eef932911c95ed051961c8136d34ba486", "649e858552ed8a289b94ae1f33846b1255b3f07d", "65b54461a0436e69969b2e2679dcbedcddd40d95", "03deb21d07674506d11d46cc44672f1d9dc65fb0", "3979cf5a013063e98ad0caf2e7110c2686cf1640", "78be8c9517b822afa0ce858ff6d39a7854893e74", "4b7f05a35378a0b17a0f9af3180d43cf7970aa15", "1845fc6f7874f46777ebb5ecb37ad07eced75770", "0cfd90449754c0ecdedf1d676d9094bc11612c28", "583a320a9c612124d62da5741fede120495126fc", "1915d4717bbb20849e733f711d194d198df45fe5", "62c0af943a259c66b91dc932d3a5611afd014a4c", "3e1a3b5741a0c4cf84dd6e8742f42e1b7cd5fdab", "bcfbbe50b4b2b9b5c24b0628d31b2b03bf6cb274", "0e2993ddba78626376651c3ab8d14f0d680f0595", "4dc8f5d19d37e82f3ea0335f9b0c0eb914c166be", "1b65277f50406900a475a68856df8fe8835c19be", "28f33a53302b8ceae33997d7c94ef46ced26511e", "102090e6e2363e094439a41ef0439dfac5da0126", "6c9c8d72280301fe55bc2b6be3b271a448153bd8", "7e39604a4b65b27da14200b23e950d350da649f5", "3ad54fb5c05b4336fc291fe00a8acd2009e5afb5", "6c0cc0d86586c6992c4eb940136e20af61252a84", "ab756f4ed89c8e17632befe15c3579f0b9f04800", "21e913af85844937f9862ac216e8f8509a6bd199", "32d355a7a20f92ccda0608f83d7456870231c570", "148c8255d4083270ad673ddf2619c867ff4c6839" ], "paperAbstract": "Finding regions of local similarity between biological sequences is a fundamental task in computational biology. BLAST is the most widely-used tool for this purpose, but it suffers from irregularities due to its heuristic nature. To achieve fast search, recent approaches construct the index from the database instead of the input query. However, database indexing introduces more challenges in the design of index structure and algorithm, especially for data access through the memory hierarchy on modern multicore processors. In this paper, based on existing heuristic algorithms, we design and develop a database indexed BLAST with the identical sensitivity as query indexed BLAST (i.e., NCBI-BLAST). Then, we identify that existing heuristic algorithms of BLAST can result in serious irregularities in database indexed search. To eliminate irregularities in BLAST algorithm, we propose muBLASTP, that uses multiple optimizations to improve data locality and parallel efficiency for multicore architectures and multi-node systems. Experiments on a single node demonstrate up to a 5.1-fold speedup over the multi-threaded NCBI BLAST. For the inter-node parallelism, we achieve nearly linear scaling on up to 128 nodes and gain up to 8.9-fold speedup over mpiBLAST.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.120", "http://synergy.cs.vt.edu/pubs/papers/zhang-seq-search-ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/310ecac3477a51aa303284f0853bd49ae8383ac3", "sources": [ "DBLP" ], "title": "Eliminating Irregularities of Protein Sequence Search on Multicore Architectures", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3115d42d3a2a7ac8a0148d93511bd282613b8396": { "authors": [ { "ids": [ "1862782" ], "name": "Defu Lian" }, { "ids": [ "1684598" ], "name": "Rui Liu" }, { "ids": [ "1874059" ], "name": "Yong Ge" }, { "ids": [ "38671700" ], "name": "Kai Zheng" }, { "ids": [ "1926303" ], "name": "Xing Xie" }, { "ids": [ "1719149" ], "name": "Longbing Cao" } ], "doi": "10.1145/3097983.3098008", "doiUrl": "https://doi.org/10.1145/3097983.3098008", "entities": [ "Algorithm", "Analysis of algorithms", "Binary code", "Cold start", "Discrete optimization", "Experiment", "Hamming distance", "Hamming space", "Information", "Matrix regularization", "Sparse matrix", "Statistical classification" ], "id": "3115d42d3a2a7ac8a0148d93511bd282613b8396", "inCitations": [ "682e83ac98510037b89fb76e46cdace7cfc76d53" ], "journalName": "", "journalPages": "325-334", "journalVolume": "", "outCitations": [ "70967767f355c34869029217bbaa0b2c32a193ec", "f953553d4b8854cf8716015cacf57d35e418b375", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "141487cd6d32f6916bdcb029ac8159eba44e23de", "e407ea7fda6d152d2186f4b5e27aa04ec2d32dcd", "2b9ed535a2ab9296850e8c11678f07e84ac748a3", "03a419f507c887a01d7858443dbf6eb627a7660c", "c32e8d3d86695d2da36ccfce60d51956e2e4818c", "82a3074be5297bad6b279c006c7a5507f8c54555", "e50f4d3316d13841c287dcdf5479d7820d593571", "25ad8ad3d1549888a4609659bd55ad825c5df82e", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "2a88541448be2eb1b953ac2c0c54da240b47dd8a", "4e8e3e40a25fba903f40246705c3beb3c122f523", "a4675a6429efba8ad337c3e39293d407699842f6", "03fcaa855332fdd11d5b9ac8f369aa904347d577", "2ee120e0bd4de0e27648845352129027c4d7c50d", "6338670193f9fdcfd3b6e7cec414f15ca906a85f", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "816e65aaf8a211f174551b9ac34e66f06f35fb3c", "23bbea130398331084021cc895a132064219b4b1", "52c0876b25a5721c4c6930d94d5308f0779734ec", "031854648e0688c1bfc991e7597e54947928fb74", "10e8ebc9a2397336cd03dda18842ad6e7e7299bb", "92eb167f30ad59f6949667021760eb41078cf85c", "7d50b6883c38e34016a4841ec4ab2b92bfdfe3ad", "184b7281a87ee16228b24716ca02b29519d52eb5", "ddcafba6f5404e720c5b02d9bd2ab0f9fc2984fb", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "71fbbc1675780f2f945073f9d92c09b8d76f80f0", "12d0c11d546d91e776a170898ebf3a38c010695c", "5ae98595bba7eb02ba95df2989871bfa86fb02cf", "940b01d0d7931cb9d4d24f5bd50625b941b31a13", "36cb4ed29f9f0b6ea37343b3d98154293a374ec2", "8b577105564c81e0336935c0b69d5e2262e89ceb", "7e5a550cdcb59464a73bfdc54f541fc5e5636ea3", "1f2de093c64679c99437c3031ede4fd4e32c66cc", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a" ], "paperAbstract": "Precisely recommending relevant items from massive candidates to a large number of users is an indispensable yet computationally expensive task in many online platforms (e.g., Amazon.com and Netflix.com). A promising way is to project users and items into a Hamming space and then recommend items via Hamming distance. However, previous studies didn't address the cold-start challenges and couldn't make the best use of preference data like implicit feedback. To fill this gap, we propose a Discrete Content-aware Matrix Factorization (DCMF) model, 1) to derive compact yet informative binary codes at the presence of user/item content information; 2) to support the classification task based on a local upper bound of logit loss; 3) to introduce an interaction regularization for dealing with the sparsity issue. We further develop an efficient discrete optimization algorithm for parameter learning. Based on extensive experiments on three real-world datasets, we show that DCFM outperforms the state-of-the-arts on both regression and classification tasks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098008" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3115d42d3a2a7ac8a0148d93511bd282613b8396", "sources": [ "DBLP" ], "title": "Discrete Content-aware Matrix Factorization", "venue": "KDD", "year": 2017 }, "31226293557ed738793640961475eb33a7e178a6": { "authors": [ { "ids": [ "2927115" ], "name": "Huansong Fu" }, { "ids": [ "3135311" ], "name": "Manjunath Gorentla Venkata" }, { "ids": [ "17853593" ], "name": "Ahana Roy Choudhury" }, { "ids": [ "1794961" ], "name": "Neena Imam" }, { "ids": [ "1709886" ], "name": "Weikuan Yu" } ], "doi": "", "doiUrl": "", "entities": [ "In-memory database", "Key-value database", "Memcached", "Runtime system", "Supercomputer", "Throughput", "Titan (supercomputer)", "USB flash drive" ], "id": "31226293557ed738793640961475eb33a7e178a6", "inCitations": [ "f9a8b38b7f208f9a03ca04022227b1c2cb6b735c", "f901f71c5ebab0a7e30e55213d13e47d0b276373" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "559-568", "journalVolume": "", "outCitations": [ "1f1223fe0db94c8a6a380f98308844cb9dda8254", "184687c92c6890743a663a7cdb0216d04f8e9fbf", "5048b1199db383beda869e742691c22ca15e1d56", "23f7dd46caee94ca9d02c151778a2d96ea8e152c", "1a9e81def534358ce3a20c3b209af8f3fd7edbb8", "07a0d9b06b876921379aa130fc08c418cda2e62f", "29a1148d75878671dc3663bf480e33d7bd91597d", "4ac2aa1a6f2aa6ad77b068b1290e1549221fd7ea", "9f8e68a1768a44d1e93044f26ec67dd657fb8a21", "029e03cd045b1fcda76e4c469eedfa0470c79624", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "6853f8f61e6cb6056a1add55aa3916d91c290d10", "1594118f2696b573f08510cf837f3b37db87face", "11c136aa1136ccf6ebbb23c3b3e1fbdd8447bb00", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "065f9a08dfbe89ed602b1d4f062d77dd8964858f", "0276440f721b17ff77165f2b1ed24e029b9a2432", "0139dceb6cef21b234e454d53154f30391495862", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "9aa0d7253574e50fe3a190ccd924433f048997dd", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "daf0cd0076b388712ea12ec4105572997fc50cdf", "433207f45ac2c9dbe3876ab53af28ed569e94da9" ], "paperAbstract": "Recently, there has been a growing interest in enabling fast data analytics by leveraging system capabilities from large-scale high-performance computing (HPC) systems. OpenSHMEM is a popular run-time system on HPC systems that has been used for large-scale compute-intensive scientific applications. In this paper, we propose to leverage OpenSHMEM to design a distributed in-memory key-value store for fast data analytics. Accordingly, we have developed SHMEMCache on top of OpenSHMEM to leverage its symmetric global memory, efficient one-sided communication operations and general portability. We have also evaluated SHMEMCache through extensive experimental studies. Our results show that SHMEMCache has accomplished significant performance improvements over hte original Memcached in terms of latency and throughput. Our evaluation on the Titan supercomputer has also demonstrated that SHMEMCache can scale to 1024 nodes.", "pdfUrls": [ "http://ww2.cs.fsu.edu/~fu/files/shmemcache-ccgrid17-paper.pdf", "http://ww2.cs.fsu.edu/~fu/files/ccgrid17-slides.pdf", "http://dl.acm.org/citation.cfm?id=3101188" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/31226293557ed738793640961475eb33a7e178a6", "sources": [ "DBLP" ], "title": "High-Performance Key-Value Store on OpenSHMEM", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "3128d81ec9915faf1195f9ed59b3fd5f0b1b88ad": { "authors": [ { "ids": [ "2022955" ], "name": "Stijn Volckaert" }, { "ids": [ "1873224" ], "name": "Bart Coppens" }, { "ids": [ "9366424" ], "name": "Bjorn De Sutter" }, { "ids": [ "7161646" ], "name": "Koen De Bosschere" }, { "ids": [ "1772810" ], "name": "Per Larsen" }, { "ids": [ "1721575" ], "name": "Michael Franz" } ], "doi": "10.1145/3064176.3064178", "doiUrl": "https://doi.org/10.1145/3064176.3064178", "entities": [ "Computational resource", "Kullback\u2013Leibler divergence", "Lockstep (computing)", "Memory corruption", "Parallel computing", "Parsec (parser)", "Persistence (computer science)", "Scheduling (computing)", "System call", "Thread (computing)" ], "id": "3128d81ec9915faf1195f9ed59b3fd5f0b1b88ad", "inCitations": [ "64d2d0450e0100998e5b7f53b59ff51f9cf7a210" ], "journalName": "", "journalPages": "270-285", "journalVolume": "", "outCitations": [ "209f4c5dc7c65670473836304f8c478e1e0a0980", "0e7162b8e7d86281058fbde558be5278438e02e5", "3dbb0beee26501a93522230a094cb359eb121c70", "023e3bcd1c1d374f894836dc7dce688bdb406817", "6a8f65381a627a2db6c756a7185d9106f0acefec", "393b648f2aa9b4144e37f2adc21ecd6265dfa71d", "0261afd40eee66cea4ea682fab322a439a28f37d", "f4e10c197040252beeabcd3393c81062e60e7475", "0821b7efb6a47783d8bf9a62291b24d94bbaaf31", "0d29a696d8c66d795336ab34aff0b6fb8decb06d", "3b62c1f19254820c75dd0011f038d7aae04b3414", "4cd88094e64a801e6973eae2eec74144c4f49668", "a465ba2fdc04aa9c461ef6f93c1aeae37a8fad32", "242b43fc76229ce4a3e9182f49267a5ad53ec106", "6e86e5af5e0bfe734a68d3a814adf6fd3eb01385", "1d9e276dc901978f5e0bc6f6d9898b5777d1b86a", "fcae2fcef595059529ebe553431ab41b44062ae4", "884425f38f4c4b23e0f8facb0b0683e76c545dbb", "026c3b3cafb262f4f597589413b0d1a2bcf97b50", "026c84df70942697ae850f9097c1676531a49821", "8a0af8ae748210ef571d074362b552af571e6d33", "05a618847e4f08e5bca29dff732757779722b2e0", "49cae25a1796b6a2898b99b2684b33eed8f58ee9", "11fb91cf78700428342aa3ed6636f655bb97ca33", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "6e8d42ea4e8b88eacc337000c2e0b46d489f8437", "2a974da13d6f956e37549378e00f86aa54bc5642", "0336a45cb84a4838c2b6a81ba08adb9a473dcd98", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "290feda5f85153f434f4efe813cc2a98940385b0", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "959cfe05045e1c7e80406209244d3346061ca4e6", "2366a2c3c629bb20fdcb8f2b56136f3642a4edd6", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "47fb661743d25a99c10296feaade23799efa62e3", "589a90423f9d63fd16d28b0389692f754fa156fd", "229252e83bfa3af97ce4a66eb173ba024728e298", "1b3ac17ccbacdf87612c8455c8f5040ffc7ece8a", "f06e556b7dab631846ac566b65fa63df553509c0", "13cab010c7d25e38397382b567de0198f4f466de" ], "paperAbstract": "Exploit mitigations, by themselves, do not stop determined and well-resourced adversaries from compromising vulnerable software through memory corruption. Multi-variant execution environments (MVEEs) add additional assurance by executing multiple, diversified copies (variants) of the same program in lockstep while monitoring their behavior for signs of attacks (divergence). While executing multiple copies of the same program requires additional computational resources, modern MVEEs run many workloads at near-native speed and can detect adversaries before they leak secrets or achieve persistence on the host system.\n Multi-threaded programs are challenging to execute in lockstep by an MVEE. If the threads in a set of variants are not scheduled in the exact same order, the variants will diverge from each other in terms of the system calls they make. While benign, such divergence undermines the MVEEs ability detect divergence caused by malicious program inputs. To address this problem, we developed an MVEE-specific synchronization scheme that lets us execute a set of multithreaded variants in lockstep without causing benign divergence. Our fully-fledged MVEE runs the PARSEC 2.1 and SPLASH-2x parallel benchmarks (with four worker threads per variant) with a slowdown of less than 15% relative to unprotected execution. Addressing this longstanding compatibility issue makes MVEEs a viable defense for a far greater range of realistic workloads.", "pdfUrls": [ "http://www.ics.uci.edu/~perl/eurosys17_parallelism.pdf", "http://doi.acm.org/10.1145/3064176.3064178" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3128d81ec9915faf1195f9ed59b3fd5f0b1b88ad", "sources": [ "DBLP" ], "title": "Taming Parallelism in a Multi-Variant Execution Environment", "venue": "EuroSys", "year": 2017 }, "313819775e5c7212fad7bece790a2282225fdc70": { "authors": [ { "ids": [ "32485139" ], "name": "Jong Youl Choi" }, { "ids": [ "1730909" ], "name": "Jeremy S. Logan" }, { "ids": [ "4003076" ], "name": "Matthew Wolf" }, { "ids": [ "1781276" ], "name": "George Ostrouchov" }, { "ids": [ "1753288" ], "name": "Tahsin M. Kur\u00e7" }, { "ids": [ "1727669" ], "name": "Qing Liu" }, { "ids": [ "1734819" ], "name": "Norbert Podhorszki" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "3001630" ], "name": "Melissa Romanus" }, { "ids": [ "35186090" ], "name": "Qian Sun" }, { "ids": [ "1750983" ], "name": "Manish Parashar" }, { "ids": [ "15808319" ], "name": "Randy Michael Churchill" }, { "ids": [ "3256940" ], "name": "Choong-Seock Chang" } ], "doi": "10.1109/CLUSTER.2017.67", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.67", "entities": [ "Algorithm", "Computation", "Deployment environment", "Distributed computing", "Graph embedding", "Graph theory", "Machine learning", "Network topology", "Observable", "Optimal control", "Parallel computing", "Simulation", "Titan" ], "id": "313819775e5c7212fad7bece790a2282225fdc70", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "587-591", "journalVolume": "", "outCitations": [], "paperAbstract": "Task mapping is an important problem in parallel and distributed computing. The goal in task mapping is to find an optimal layout of the processes of an application (or a task) onto a given network topology. We target this problem in the context of staging applications. A staging application consists of two or more parallel applications (also referred to as staging tasks) which run concurrently and exchange data over the course of computation. Task mapping becomes a more challenging problem in staging applications, because not only data is exchanged between the staging tasks, but also the processes of a staging task may exchange data with each other. We propose a novel method, called Task Graph Embedding (TGE), that harnesses the observable graph structures of parallel applications and network topologies. TGE employs a machine learning based algorithm to find the best representation of a graph, called an embedding, onto a space in which the task-to-processor mapping problem can be solved. We evaluate and demonstrate the effectiveness of TGE experimentally with the communication patterns extracted from runs of XGC, a large-scale fusion simulation code, on Titan.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.67" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/313819775e5c7212fad7bece790a2282225fdc70", "sources": [ "DBLP" ], "title": "TGE: Machine Learning Based Task Graph Embedding for Large-Scale Topology Mapping", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "3145ae24017a76dd4b3fb01db0533eae88efb6c4": { "authors": [ { "ids": [ "3101979" ], "name": "Guangxu Xun" }, { "ids": [ "2694924" ], "name": "Yaliang Li" }, { "ids": [ "1947899" ], "name": "Jing Gao" }, { "ids": [ "1769577" ], "name": "Aidong Zhang" } ], "doi": "10.1145/3097983.3098009", "doiUrl": "https://doi.org/10.1145/3097983.3098009", "entities": [ "Baseline (configuration management)", "Information theory", "Language model", "Text corpus", "Word embedding" ], "id": "3145ae24017a76dd4b3fb01db0533eae88efb6c4", "inCitations": [ "1014d71f8f2225d2ff3529b86a7e2cfd6d84c0b1", "db6d960bcdbf6c32a075c3c554681dff77b6f5fe", "ad64649f20cc20a2d1584cbc4b859d9fa9920538" ], "journalName": "", "journalPages": "535-543", "journalVolume": "", "outCitations": [ "2cf379819632deb93b2cd9250da25bf21fa25171", "0062b9ff8522498b34f467e36af218d87fcf5d9a", "175fc7cd36a126d1de55c092d391790896060952", "1510cf4b8abea80b9f352325ca4c132887de21a0", "06f5c8e8485c87681ba648e2775d825c34e66c7d", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "22eff4a1cd15d76b4e89ff3111713607a348816a", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "1f656b9c686c1e5db2a4d41f1ce7e270965def3e", "00a28138c74869cfb8236a18a4dbe3a896f7a812", "15fcecf899acf594ebb7b04ba2df49aa4adc4799", "9208ecbd7244040ba6ee59a067b527c8b095fe0a", "6363cfe79b33d66deeeba0e68e89f15b3e1e657f", "319764d09e38c9bb3508ca57fe8173ba46e271b3", "142f38642629b9d268999ad876af482177d36697", "980c2517368aaa0d4a4972ff7bcc5cf3ec41656a", "244a152b82401d1619ef4ac88c51672b1662e3a2", "132c8b4d0760d2d35c99b0358c8bc5a51170e5e7", "768b130d70911178c6b5b0dbaf8ccb19d899ac42", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "0826c98d1b1513aa2f45e6654bb5075a58b64649", "1145859ba17172d517cdffe2a5f00a16366c5765", "1b179d6890c3055cea12bc65337dcebc3167d436", "87d907a114409755ecd3c6886585de26a4e17ffe", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "328b00f1baaf08dedba3a788b4ce0a4b26003f18", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "917fbd64a435cb33e0e5b4cd73fe830db7b166db" ], "paperAbstract": "A text corpus typically contains two types of context information -- global context and local context. Global context carries topical information which can be utilized by topic models to discover topic structures from the text corpus, while local context can train word embeddings to capture semantic regularities reflected in the text corpus. This encourages us to exploit the useful information in both the global and the local context information. In this paper, we propose a unified language model based on matrix factorization techniques which 1) takes the complementary global and local context information into consideration simultaneously, and 2) models topics and learns word embeddings collaboratively. We empirically show that by incorporating both global and local context, this collaborative model can not only significantly improve the performance of topic discovery over the baseline topic models, but also learn better word embeddings than the baseline word embedding models. We also provide qualitative analysis that explains how the cooperation of global and local context information can result in better topic structures and word embeddings.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098009" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3145ae24017a76dd4b3fb01db0533eae88efb6c4", "sources": [ "DBLP" ], "title": "Collaboratively Improving Topic Discovery and Word Embeddings by Coordinating Global and Local Contexts", "venue": "KDD", "year": 2017 }, "316278bc22f57ca17fe66b80833eb47f484785da": { "authors": [ { "ids": [ "40598941" ], "name": "Donald Kline" }, { "ids": [ "1687807" ], "name": "Rami G. Melhem" }, { "ids": [ "1678617" ], "name": "Alex K. Jones" } ], "doi": "10.1109/IGCC.2017.8323584", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323584", "entities": [ "Anatomic Node", "Apathy", "Bit-level parallelism", "Cross Reactions", "Crosstalk", "Embodied energy", "Error detection and correction", "Heart failure", "Holism", "Pointer ", "Solutions", "error correction" ], "id": "316278bc22f57ca17fe66b80833eb47f484785da", "inCitations": [ "7581f218f733a2c806c0b26c84ec55b8bcf20822", "041b9f4e7f1bf62054eba1196c63165612fb1305" ], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "bd268b7cca7d55f5235360e7545688f6e984a85f", "421bef45c5db3454759fd2ce3174fb41f64c6486", "30a74b804f7f25e817793e40bf17241d64c2f431", "4e2e21f9f4ff9f7f2811a75ab6be48989838f25f", "17672773ef914b8449831be3cc78c13ab49ebd3b", "03d55467b20e662fbaa8416e853f57c93834a9fb", "2d61939e21a40daed297cb3d1855b32ed0eaef67", "151ea06c9c0bbd21ddc823a46626ba662774eabb", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "de0012dbc0423d964c2b5fd61feb39b0feba8505", "068fe6109efe8edde57ae43d0e012d4075646af9", "9514f2b670d8581fb70d7f91cfb4cf33681646ac", "da904786a44b3cdd129b3eca9de7ffe8b0ab40c1", "d89dbe46e5b7ade9e613d33ee068b68cbf63f614", "6aba2b1785bd26eb6d85820a734ddaa262d20571", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "ef5d90dc1a28065b5a2210516d0f9ab09ae1d7f4", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "539294f351cdafb2d96d2de5008d841a4b03ff87", "0e762ec70194b758044864dee34d40e946ee7faf", "352a8957005dc5519b15ed1870751ec494d66395", "b0731359b5f8730b11664fabef39a0cd52a43dfd", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "95255aff0e319c543809c78a98d6eeebafaf6d0b", "747ad718761b7d848a12e4f3a82aa0f46117a815", "1535e84c1782f930d59fd31ebdcf2a530b11f183" ], "paperAbstract": "As technology nodes continue to scale, main memories experience both increasing energy consumption as well as reliability challenges. In order to address rising failure rates due to problems with yield and runtime effects, such as crosstalk, due to process variation in small feature sizes, improved correction capabilities at the bit-level are increasingly essential. To address this challenge, we propose a sustainable approach to error correction in deeply scaled memories. In particular, we propose a novel area-efficient and sustainable fault map (SFaultMap) which targets holistic energy considerations to improve reliability while minimizing both operational and embodied energy. To demonstrate the effectiveness of SFaultMap we conduct a sustain-ability study, based on holistic energy consumption, to evaluate under which scenarios different solutions should be employed. In all cases and scenarios with moderate to high fault rates, SFaultMap has reduced energy over Error Correcting Pointers (ECP) for a five year lifetime. Moreover, as fault rate increases, the indifference time for ECP to recover upfront manufacturing energy increases from years to decades.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323584" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/316278bc22f57ca17fe66b80833eb47f484785da", "sources": [ "DBLP" ], "title": "Sustainable fault management and error correction for next-generation main memories", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "3162f956caffd1751dfdda1ddef546b588f6393b": { "authors": [ { "ids": [ "1872111" ], "name": "Jieying She" }, { "ids": [ "8230559" ], "name": "Yongxin Tong" }, { "ids": [ "37833805" ], "name": "Lei Chen" }, { "ids": [ "3406090" ], "name": "Tianshu Song" } ], "doi": "10.1145/3035918.3064020", "doiUrl": "https://doi.org/10.1145/3035918.3064020", "entities": [ "Algorithm", "Login", "Multi-armed bandit", "Positive feedback", "Social network", "Thompson sampling" ], "id": "3162f956caffd1751dfdda1ddef546b588f6393b", "inCitations": [ "20677673ca4d660298f236760b9568902a6a8117" ], "journalName": "", "journalPages": "851-865", "journalVolume": "", "outCitations": [ "05da530858a790fdc71dc6d54c445555d0be9d48", "4b83b18ecbb46a0e03b1cf7009c7f2244a729f66", "454fd57b4ec44780198d786e28a74b9d72610f1a", "e95db95b7b4e0552e3133f9b878d4977f92bd9df", "3ef959ddbee41a976256fc604204c47ad313d164", "248b5910f543e204e92789a507a2067f58b537b2", "012c5167ca8732b47cd99ba879808718cb81df5b", "3d4cc9ab7d0f31cec262938e687b01612d9a8e94", "8d23410f5e3b8cc3235e13b0b7b04d177b107081", "7807a5aadf3cc187b38f7a399f5700ea2d08b760", "475304672319c1737a4e13b7c58701ce4086bb54", "a0d1e0214565b6eaff4b454ebcb9336466278f2a", "130b0cac6151e414b5bc0eb13cb02bf43ee66c11", "14487f261dfdbb2dea70fa90e7789ded6b128356", "103f6fe35033f9327611ddafde74a2b544072980", "2fa7c63ee5484c688e8ec62846ed7137c4924607", "572379c1d56e7e19422ae38218ee228c61aefb2f" ], "paperAbstract": "Online event-based social networks (EBSNs) and studies on global event-participant arrangement strategies for EBSNs are becoming popular recently. Existing works measure satisfaction of an arrangement by a linear combination of few factors, weights of which are predefined and fixed, and do not allow users to provide feedbacks on whether accepting the arrangement or not. Besides, most of them only consider offline scenarios, where full information of users is known in advance. However, on real-world EBSN platforms, users can dynamically log in the platform and register for events on a first come, first served basis. In other words, online scenarios of event-participant arrangement strategies should be considered. In this work, we study a new event-participant arrangement strategy for online scenarios, the Feedback-Aware Social Event-participant Arrangement (FASEA) problem, where satisfaction scores of an arrangement are learned adaptively and users can choose to accept or reject the arranged events. Particularly, we model the problem as a contextual combinatorial bandit setting and use efficient and effective algorithms to solve the problem. The effectiveness and efficiency of the solutions are evaluated with extensive experimental studies and our findings indicate that the state-of-the-art Thompson Sampling that is reported to work well under basic multi-armed bandit does not perform well under FASEA.", "pdfUrls": [ "http://www.cse.ust.hk/~yxtong/feedback_sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3064020" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3162f956caffd1751dfdda1ddef546b588f6393b", "sources": [ "DBLP" ], "title": "Feedback-Aware Social Event-Participant Arrangement", "venue": "SIGMOD Conference", "year": 2017 }, "317e88dfbe42709490e90b3d63c22a6507494908": { "authors": [ { "ids": [ "19252730" ], "name": "Bram Veenboer" }, { "ids": [ "1776907" ], "name": "Matthias Petschow" }, { "ids": [ "1722141" ], "name": "John W. Romein" } ], "doi": "10.1109/IPDPS.2017.68", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.68", "entities": [ "Adjusted winner procedure", "Algorithm", "Central processing unit", "Graphics", "Graphics processing unit", "Next-generation network", "Numerical analysis", "Parallel computing" ], "id": "317e88dfbe42709490e90b3d63c22a6507494908", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "545-554", "journalVolume": "", "outCitations": [ "3dad87731664dc767db210b76dfc6db2bf206a39", "32bf0e2294bc545dbee875a30d6a391691b0d671", "21f3559282697fbe5ac6a75b3078fb74cf3c7d1c", "07da7745ea84f3be0957035496f53d2b5a2acc42", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "d209b629ad39798ad2d1dae140b8541334a12db3", "b7d792cded48c43ee69559276699d767d63ac9aa", "2b83e301a84b076fbfc6d97034491cbab4ec0290", "2516524a25fdba2c54f9a1d80b26300d896f2c9e", "9db02ca8d27a6fc7acb7ed38d9af3527f4a01132", "8de213c04eff53b819708487c579718180fb36e8", "ea448a83c329e82a9592379e02507feacdf5676a", "0919fe1248698a8a7b2a174d4fb160594da46ef9", "5b1b04d1587ba870c5a0f0bb9a7e5de31b2b745d", "092217c2267f6e0673590aa151d811e579ff7760", "b5f3cdf1af34060e475ef6aba96059fb347c436c", "05918ee9491afa7aef5936034de834f389da01f3" ], "paperAbstract": "Realizing the next generation of radio telescopes such as the Square Kilometre Array (SKA) requires both more efficient hardware and algorithms than today's technology provides. The recently introduced image-domain gridding (IDG) algorithm is a novel approach towards solving the most compute-intensive parts of creating sky images: gridding and degridding. It avoids the performance bottlenecks of traditional AW-projection gridding by applying instrumental and environmental corrections in the image domain instead of in the Fourier domain. In this paper, we present the first implementations of this new algorithm for CPUs and Graphics Processing Units (GPUs). A thorough performance analysis, in which we apply a modified roofline analysis, shows that our parallelization approaches and optimizations lead to nearly optimal performance on these architectures. The analysis also indicates that, by leveraging dedicated hardware to evaluate trigonometric functions, GPUs are both much faster and more energy efficient than regular CPUs. This makes IDG on GPUs a candidate for meeting the computational and energy efficiency constraints of future telescopes.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.68" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/317e88dfbe42709490e90b3d63c22a6507494908", "sources": [ "DBLP" ], "title": "Image-Domain Gridding on Graphics Processors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "317f86138fb8ce99ccc9c1e00cf2d12682752963": { "authors": [ { "ids": [ "40373059" ], "name": "Aaron Blankstein" }, { "ids": [ "30721371" ], "name": "Siddhartha Sen" }, { "ids": [ "3122063" ], "name": "Michael J. Freedman" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Cache (computing)", "Data structure", "Django", "Lazy evaluation", "Memcached", "Redis", "Simulation", "Throughput", "Web application", "Web framework" ], "id": "317f86138fb8ce99ccc9c1e00cf2d12682752963", "inCitations": [ "7917b9b0560cc71c83b79a9ff19c0cf69ee2f630", "0f35d1156d2667232855578b50b8fb02ea5bbf51" ], "journalName": "", "journalPages": "499-511", "journalVolume": "", "outCitations": [ "0154103e091dea574c39f3c89d52ccfefc06af6c", "af561f565363e97aed97505468bcd78874e72443", "67a81027bc7f3b2b4425d9b795346dc6c7ce7adb", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "57c26cb30d57f12fad0b93cb2db73c229e0557ff", "0fd4a1b1b92a65b70fad60ad6e95ed54e8f6e86a", "3a0f1485dc13ad3a1cf78900be809f3d040be6eb", "1bf6fc53b086fd55007f98652b06de60a1433dec", "337e4b7f57ccbb7485950b93da9c5bb4ec4dc9ad", "9c48179c07963a9fad69a359362c0aee87f9fe18", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "2077c3787e5a1545df312d51f9a7b8cd05e2c7f0", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "0da3e7bf3125534a7fe08c1d630b3cf32259c5fa", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "19c3fcffda8e6e5870b3a533c483bca024501ab5", "9aa0d7253574e50fe3a190ccd924433f048997dd", "d537c328ae831d6751d095138938c0ad62fad026", "73ba74bfbdd56c291f8619019350ebfdb8416c5e", "5b5f778f9b990860716e62380deb351a7bf935e1", "01d5e4482c4fa7c8c4dd9a40783efc79fa7640af", "dd4e5ae575b7bed9bca2b23594c6cf9a2e5232aa", "f422c1fbdd62c9afdffb67fe2e759f4bb46954a5", "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "0579cb7ceecac67eefb63bef0436fbf5e552cf72", "74fc18c7aa0e299091bfbb312c21eafd5fed9a0a", "3d5b229981595a5864270d9336e941e0ae374fec", "537c75e080adad3bdc57a214c83da429148c8b72", "281b2fbe338c0577c4f3caccb7962a32e93d0c77", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "141032077b5d133b70fdf895c783acfc4f9f6afe", "72ce8389b939965624a4f9a3c5aafd4835d65bb8", "1594118f2696b573f08510cf837f3b37db87face" ], "paperAbstract": "Today\u2019s web applications rely heavily on caching to reduce latency and backend load, using services like Redis or Memcached that employ inflexible caching algorithms. But the needs of each application vary, and significant performance gains can be achieved with a tailored strategy, e.g., incorporating cost of fetching, expiration time, and so forth. Existing strategies are fundamentally limited, however, because they rely on data structures to maintain a total ordering of the cached items. Inspired by Redis\u2019s use of random sampling for eviction (in lieu of a data structure) and recent theoretical justification for this approach, we design a new caching algorithm for web applications called hyperbolic caching. Unlike prior schemes, hyperbolic caching decays item priorities at variable rates and continuously reorders many items at once. By combining random sampling with lazy evaluation of the hyperbolic priority function, we gain complete flexibility in customizing the function. For example, we describe extensions that incorporate item cost, expiration time, and windowing. We also introduce the notion of a cost class in order to measure the costs and manipulate the priorities of all items belonging to a related group. We design a hyperbolic caching variant for several production systems from leading cloud providers. We implement our scheme in Redis and the Django web framework. Using real and simulated traces, we show that hyperbolic caching reduces miss rates by ~10-20% over competitive baselines tailored to the application, and improves end-toend throughput by ~5-10%.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_blankstein.pdf", "https://www.cs.princeton.edu/~ablankst/stuff/hyperbolic_caching.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/blankstein", "https://www.usenix.org/system/files/conference/atc17/atc17-blankstein.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/317f/86138fb8ce99ccc9c1e00cf2d12682752963.pdf", "s2Url": "https://semanticscholar.org/paper/317f86138fb8ce99ccc9c1e00cf2d12682752963", "sources": [ "DBLP" ], "title": "Hyperbolic Caching: Flexible Caching for Web Applications", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "31d91b9729c40e04f776b260f8b3bea080c4bfae": { "authors": [ { "ids": [ "3151672" ], "name": "Yoann Dieudonn\u00e9" }, { "ids": [ "1808444" ], "name": "Andrzej Pelc" } ], "doi": "10.1145/3087556.3087563", "doiUrl": "https://doi.org/10.1145/3087556.3087563", "entities": [ "Algorithm", "Distributed computing", "Leader election", "Path (graph theory)", "Polylogarithmic function", "Polynomial", "Symmetry breaking", "Time complexity" ], "id": "31d91b9729c40e04f776b260f8b3bea080c4bfae", "inCitations": [ "0318da0df08cd2c0cbba23343777143ca684ff9f", "e95966c42344a5d5f7cb61350fe905cd7e090ba0" ], "journalName": "", "journalPages": "207-215", "journalVolume": "", "outCitations": [ "089e70dfa16002109fc1a621ad6f4b6e82521214", "14ca952dca30de804918ccc4969f0270fc772615", "eb9bfe0367605b8221e0266029719891a0ad9807", "8c80410ecf280cbe12a6e9119ddee4f45c0ef889", "0f32bb9b0ad9e97dbedd638429d0640340dd0fc2", "53be44a49e847440c01cae30457b1d4ea54b34b5", "298f7050e3c10b6a2c29191f3c9fad9df5a56239", "7c3e689d77a539f8da50e9c1f78898359042ea30", "00d05e767988e3fd7e60510aa8476ef50c138e8c", "9ac2f1ae0bbcb4e0f3f1c01e8b8816849a7e25f7", "26cb1dae1bf001d3631108b39c873643e309e364", "9847c7f143120de67a9c60d2378d5bbd17bb1f8b", "94c1f682b569f11d51ceda817780fcc5d732cb1a", "11a3218722f6f8c2809845e8f14fc7e706f00903", "a58595ea701344bc20c8646aa1c9ca8e19c82ff1", "fd4bf98be8490793794fb9ed10957f3fe67b6309", "018b0be3548b2aa3a00ac174cb7a8617ed3e6a07", "010e594e33d9fad8187a07b3197bac6cd0bc7723", "33a2ddf4aa1e3773f15b0e361fd7d07c4f147b29", "28d40408e35cbbc6ca18df5d767a80be9da23e6a", "e087f75672877fc7dc3087a8bfad981bcff0c0ac", "4a6eff30e3fc023beb93b53349f9c08b7b363b58", "2c6f5339903c7684c4a1624ac0819b054e7a7304", "7e653a532d541a90398ffec1730cff5de166047e", "a5f22f5626fd056965b8ad87406b7df65b6804ef", "4aa6f3d285dc12fe9f2d0ac7c94a6b0161d8d05c", "2539524d6dfc22c31c8fb009ec027b5cfc40a822", "3c32a0e74cc9b35b50dc179836e9847ac931adcf", "b8031ff197d3e3894f3484318d65aced7054138d", "d3a747acb53f1f02bae0ef9aff53a0c9f72bf78f", "ccb4e5f9fc72ce780b3bfdc1b30e37d541550f87", "89c9807e028de21f57ae3b2e5f3bc6d53dd21479", "17891e45e366a02c0574ac98ec074e2352bccf96", "1200a852a357f6b83cc9c3f755105ccdb6a38dce" ], "paperAbstract": "Leader election is one of the basic problems in distributed computing. This is a symmetry breaking problem: all nodes of a network must agree on a single node, called the leader. If the nodes of the network have distinct labels, then such an agreement means that all nodes have to output the label of the elected leader. For anonymous networks, the task of leader election is formulated as follows: every node v of the network must output a simple path, which is coded as a sequence of port numbers, such that all these paths end at a common node, the leader. In this paper, we study deterministic leader election in arbitrary anonymous networks.\n It is well known that deterministic leader election is impossible in some networks, regardless of the allocated amount of time, even if nodes know the map of the network. This is due to possible symmetries in it. However, even in networks in which it is possible to elect a leader knowing the map, the task may be still impossible without any knowledge, regardless of the allocated time. On the other hand, for any network in which leader election is possible knowing the map, there is a minimum time, called the election index, in which this can be done. Informally, the election index of a network is the minimum depth at which views of all nodes are distinct. Our aim is to establish tradeoffs between the allocated time \\tau and the amount of information that has to be given a priori to the nodes to enable leader election in time \\tau in all networks for which leader election in this time is at all possible. Following the framework of algorithms with advice, this information (a single binary string) is provided to all nodes at the start by an oracle knowing the entire network. The length of this string is called the size of advice. For a given time \\tau allocated to leader election, we give upper and lower bounds on the minimum size of advice sufficient to perform leader election in time \\tau.\n We focus on the two sides of the time spectrum. For the smallest possible time, which is the election index of the network, we show that the minimum size of advice is linear in the size n of the network, up to polylogarithmic factors. On the other hand, we consider large values of time: larger than the diameter D by a summand, respectively, linear, polynomial, and exponential in the election index; for these values, we prove tight bounds on the minimum size of advice, up to multiplicative constants. We also show that constant advice is not sufficient for leader election in all graphs, regardless of the allocated time.", "pdfUrls": [ "http://arxiv.org/pdf/1604.05023v1.pdf", "https://arxiv.org/pdf/1604.05023v1.pdf", "http://doi.acm.org/10.1145/3087556.3087563", "http://arxiv.org/abs/1604.05023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/31d91b9729c40e04f776b260f8b3bea080c4bfae", "sources": [ "DBLP" ], "title": "Impact of Knowledge on Election Time in Anonymous Networks", "venue": "SPAA", "year": 2017 }, "31edf47da8494e9985c55efcda8e178f5b87140c": { "authors": [ { "ids": [ "2887206" ], "name": "Hyukjoong Kim" }, { "ids": [ "3149588" ], "name": "Dongkun Shin" }, { "ids": [ "2256812" ], "name": "Yunho Jeong" }, { "ids": [ "2041606" ], "name": "Kyung Ho Kim" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "Dynamic random-access memory", "Flash memory", "Locality of reference", "Principle of locality", "Sequential access", "Solid-state drive" ], "id": "31edf47da8494e9985c55efcda8e178f5b87140c", "inCitations": [ "0f4386d4a521e36cb15252b4e908a948a65252ef", "acd1ffefbe465f7cbb2584d32394d54bd5988a39" ], "journalName": "", "journalPages": "271-284", "journalVolume": "", "outCitations": [ "088e3e939ad234b6fdd0e321290fb26937dc2553" ], "paperAbstract": "Recent advances in flash memory technology have reduced the cost-per-bit of flash storage devices, thereby enabling the development of large-capacity SSDs. However, two major concerns arise in designing SSDs. The first is the poor performance of random writes, and the second is the large size of the internal DRAM of an SSD. Although the previously proposed demand map loading technique can reduce the required DRAM size, the technique aggravates the poor random performance. We propose a novel address reshaping technique called sequentializing in host and randomizing in device (SHRD), which transforms random write requests into sequential write requests by assigning the address space of the reserved log area in the SSD. SHRD can restore the sequentially written data to the original location without requiring explicit copy operations by utilizing the address mapping scheme. We implement SHRD in a real SSD device and demonstrate the improved performance resulting from SHRD for various workloads1.", "pdfUrls": [ "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final43.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-kim-hyukjoong.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_kim.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_kim.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-kim-hyukjoong.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/kim" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b307/6f29ad699eb7b0373254eab63a42ba6a1dfa.pdf", "s2Url": "https://semanticscholar.org/paper/31edf47da8494e9985c55efcda8e178f5b87140c", "sources": [ "DBLP" ], "title": "SHRD: Improving Spatial Locality in Flash Storage Accesses by Sequentializing in Host and Randomizing in Device", "venue": "FAST", "year": 2017 }, "31f14626b7233ed3f8922408c663ce58e813f2d4": { "authors": [ { "ids": [ "3378428" ], "name": "Enrico Mariconti" }, { "ids": [ "2371085" ], "name": "Lucky Onwuzurike" }, { "ids": [ "1749393" ], "name": "Panagiotis Andriotis" }, { "ids": [ "1728207" ], "name": "Emiliano De Cristofaro" }, { "ids": [ "34672651" ], "name": "Gordon J. Ross" }, { "ids": [ "2350947" ], "name": "Gianluca Stringhini" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Application programming interface", "Behavioral modeling", "F1 score", "Malware", "Markov chain", "Operating system" ], "id": "31f14626b7233ed3f8922408c663ce58e813f2d4", "inCitations": [ "0db098c1c2435565d18fb0c08f3448d3382b50fe", "378e04a526d455e77758c0914ea9cf6e552db664", "22684e1fcecd742c246c50788095c591a23d1f5b", "c284db823f7fd79444c6cf378949201479311789", "6107b9d8eeef8091c24fa7d428d1c174ef1053c2", "4a727288433c680afbfa12ac798d3c687b91501c", "1f83e48319270c0a004d277bbb5156f1f477f98d", "bce29b3b37efcbfbab1209de8b36fd32349fbaa3", "4936fb57d2c5278615569fb63aeb8e6669870083", "5febb017c29f5f596f7c2f1b27f4dc6dc25931aa", "7e6d508d7e9d3876311a5a399c30371c00e3548f", "92dcc8eb770f59be2b1a0e422336132741783989", "155635ebe2acb2b0bad12768f954ddceb4fb5f86", "1d7c51ecede8eff1a6a608967f487f80539758a6", "12f0a5268975d535761d99819856992f7021780b" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1612.04433", "outCitations": [ "12ef153d9c7ccc374d56acf34b59fb2eaec6f755", "03a613951421cf67237d5278d6bf3702a26da9aa", "7b1996d4446f7682fa0ae36527f3cbc5e46dad58", "07fe0ddb6cef7ed8946d416c093452c1d0db0c34", "e0358dbc7fba258ae92ccee678e2b924e42ef9de", "1ba779d5a5c9553ee8ecee5cf6bafb4b494ea7bc", "023f23c300804754753cb11db51fb7f582556ab7", "7520336ec2a08ad4fcbc5073082a8318571d679c", "17138b471f2dade960cd3969db0c08b623b33797", "0ab393affe9d674ef790be14fdfade368f3e5989", "2c0ff4497579f2edb83648c4d2153dfc37be2ccf", "08d32340e0e6aa50952860b90dfba2fe4764a85a", "6f36ec040624a1083222942dfde657e576afe701", "73df043a6c6cc1a8b395e14c116636a47622f574", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "4491e3a4869ca6dbf404ddbf1dbb6c4d2583b5c8", "1370b7ec6cb56b0ff25f512bd673acbab214708c", "3c324d1f33842e459d9a57c3241232164283aa4c", "05457985ed22d043e561043951753f959017ee90", "14490c37be179400c86cf89aac7c9272dddf60e7", "353ff92759248416c3a3a5cd980b5deb43a86c5c", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "922a324c99e97c511477637f3cfe16c7f06ce382", "06c291ac85e9297707f74e13f90f041c77e9eed4", "0b7f62a2ac217e035e0cd9cb73d2de4fb6135af5", "1ac58ab550f1f8f075b373211d76371d52979ce6", "32b5b6031df5dc97a3e4a94a802711592625b0b0", "14a1a2fdb66ba18060d2f82c5f08c13d97ee5854", "ff2cd9692e642d85438d475428e30654ebbdba3c", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "365859840ba0b560a6e82e0e808ee26a7734af51", "9e1bcd6414fc6fdd3b63aab48cc3732dc761f538", "65d3595a5cfd6c9920f07e04c81360369b301130", "8007cb93af7cd1a8017e9c4c518b1ebc0104518d", "8b3d38936cbe9292fc70a1f3aa9b37627e304c5c", "983aba89fd12061ab144f2ddaca2b07bd3b65d1d", "6d59f58f7408362036196048c9ba11f399dd9bc2", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "a2e72b5753315b97b5d5f07adaa7097ac70268d9", "00225736ddcd3698c5c404fd211909a571b7e569", "59e54345997b728b58ef3c61d1324460c35b1253", "9f212d2a1b98393dacc394da7739181604623c6c", "01911a1e4c78d2562ee3999413a5008d845b1c22", "d1875bc7fb14c0db150ce3c4826240b39a1f2834", "820ed28656da64b23a19906001bf4472e9cf483c", "02fe41e07def449132516345d7152477c0d7c949", "6ab76471696ce227e4178ca73bb86cc62520c547", "738feaca3e125c6c6eae4c160b60fdedd12c89aa", "41289566ac0176dced2312f813328ad4c0552618", "29be05f17c8906d70659fe1110758a59d39d2a08", "959093db69abc3b0fb4f7acc696a7f6ef39d0e23", "4a4c0cfc26020d519679a98fe683fce6aab1eefa", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "d35f952faa418a5d017e9c88fdda959bbc6dfecb", "8ab305c52000ef4c4fe2ed8f354ea41aba2a4957", "0cfd44531f917a1819346fc053e8f8662e3635bf", "432ec065b07e59d55b7be30d9d3436b13332c47a" ], "paperAbstract": "The rise in popularity of the Android platform has resulted in an explosion of malware threats targeting it. As both Android malware and the operating system itself constantly evolve, it is very challenging to design robust malware mitigation techniques that can operate for long periods of time without the need for modifications or costly re-training. In this paper, we present MAMADROID, an Android malware detection system that relies on app behavior. MAMADROID builds a behavioral model, in the form of a Markov chain, from the sequence of abstracted API calls performed by an app, and uses it to extract features and perform classification. By abstracting calls to their packages or families, MAMADROID maintains resilience to API changes and keeps the feature set size manageable. We evaluate its accuracy on a dataset of 8.5K benign and 35.5K malicious apps collected over a period of six years, showing that it not only effectively detects malware (with up to 99% F-measure), but also that the model built by the system keeps its detection capabilities for long periods of time (on average, 87% and 73% F-measure, respectively, one and two years after training). Finally, we compare against DROIDAPIMINER, a state-of-the-art system that relies on the frequency of API calls performed by apps, showing that MAMADROID significantly outperforms it.", "pdfUrls": [ "https://arxiv.org/pdf/1612.04433v3.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_03B-3_Mariconti_paper.pdf", "https://iseclab.org/media/uploads/zotero/mamadroid-ndss2017.pdf", "https://arxiv.org/pdf/1612.04433v2.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_3b-3_mariconti-slides.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/mamadroid-detecting-android-malware-building-markov-chains-behavioral-models/", "https://csaw.engineering.nyu.edu/application/files/1515/0825/8193/CSAW17_paper_128.pdf", "http://discovery.ucl.ac.uk/1532047/1/Stringhini_mamadroid.pdf", "https://arxiv.org/pdf/1612.04433v1.pdf", "http://arxiv.org/abs/1612.04433" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/31f1/4626b7233ed3f8922408c663ce58e813f2d4.pdf", "s2Url": "https://semanticscholar.org/paper/31f14626b7233ed3f8922408c663ce58e813f2d4", "sources": [ "DBLP" ], "title": "MaMaDroid: Detecting Android Malware by Building Markov Chains of Behavioral Models", "venue": "NDSS", "year": 2017 }, "3204c776cf067cb07916d01b3acf35db667e9803": { "authors": [ { "ids": [ "3460027" ], "name": "Yuanshun Yao" }, { "ids": [ "34824488" ], "name": "Bimal Viswanath" }, { "ids": [ "40016131" ], "name": "Jenna Cryan" }, { "ids": [ "2704852" ], "name": "Haitao Zheng" }, { "ids": [ "1972108" ], "name": "Ben Y. Zhao" } ], "doi": "10.1145/3133956.3133990", "doiUrl": "https://doi.org/10.1145/3133956.3133990", "entities": [ "Artificial neural network", "Countermeasure (computer)", "Crowdsourcing", "Deep learning", "Lossy compression", "Neural Networks", "Online service provider", "Recurrent neural network", "Scalability", "Traction TeamPage", "Usability testing", "Utility" ], "id": "3204c776cf067cb07916d01b3acf35db667e9803", "inCitations": [ "b8d85f62fd0975f3c2fd54b477de62592555cb28", "1856d07f955cad7498f01673c89fd9f088eaba12", "e3daa732b176b3a237d33257cf7b9562adb1d864", "5f3101a9ba19e618c3e05c70ecdba63c1c6a3f8d", "7141267b68cc99bb18dc1b01e988e00ca8db460b", "1afb781adfcb9253fe0cde898c09800379756c5b" ], "journalName": "", "journalPages": "1143-1158", "journalVolume": "", "outCitations": [ "149cdc2f5287d021ce7a2bb7d3123df2d225fa26", "86fcd4fec5607a59978117fb48fd6e38db4dd924", "25c731735a77a0a26589f5ae35d2c09a56edb0f6", "069340a9fb06268b19e12a59de87547c9750fc79", "189911eb70143bc5056155cd3db2977127dc2133", "19deb0bd92624e0fccfdc030e25eb2f071f4eacd", "e406c7d0bf67ea13dc9553fd4514ceaa3a61f6df", "53852d69c008f9ebfb05939b4eb7c1f3279437e6", "5a39d6c1bb04737cc81634f3ea2e81d3bc1ee6dd", "cd60e751a02e7104a68717c5ba29f534d9037ace", "1a58ba303834253955825fc723207e4d92ffe384", "02b3d1d162080d9aefd3fc30a0bcc9a843073b5d", "1eb6449ae040f051120e4d44348a0f68af9c36e8", "d055444c58fffebebe2cfba521fb73ad80bf7991", "652537f9d1c0f41b185fecd0f218a0e37729474f", "4d4e6d81c36ea78e09fe078227c6f551a67f045e", "83ce1ca586e939eda8c3af7cd452b7f15cc0ea06", "52e011e52d1a9e2c9ea12f31f5a2d3c1b459017e", "a1b2bad40a3368ca1db59dfbba825c10588e54b0", "2b3e4df9463e721577f7058d39949b0d74fd58a9", "0b3a0710031be11b2ef50437c7d9eb52c91d6a33", "36cb4ed29f9f0b6ea37343b3d98154293a374ec2", "1c61f9ef06fe74505775a833ff849185757199e7", "01ece6060aaadf2e72f0b06dfcb9d02677e99e9a", "58e00cde75cf11589ca53e7d2e62179a80cad19a", "7ca32395ad3c61bfc0295c9b3714d22bd693b7d3", "7ba203bd857adf2951809d62919e529d2a1bc5c5", "30e81d8675be66271e362e5276395397631336ee", "6b2c9d307ee618f5efd8691f44fae85c7d079351", "324fc9c732116fa81624faad07524039f193cede", "648c0a6d5023374c0c93fafb571b782da1dfbeed", "2019ede61cc0be14859908312e18458a7c79908f", "8bf9e4b21d30409d21fd2d7b020a187e85051164", "1123d66ebec34422f68899645db0e0097ddf8fbf", "272216c1f097706721096669d85b2843c23fa77d", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "70ae67f22e47ffe9dbefcbb79cbacbc5bb8ae8cb", "c3e20b9193e75b1245e931ab97632dddabed7e87", "0ab90ce94103554de7536cd860236f14b6341794", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "aa0ba4af25e606a65f299b9033839a5fcf92a466", "2e81e7190412f2d72aca7084eae29982ee528a2c", "3f59748d8dba7b928344d49dc8bf20e4329c1eb8", "03ccd0396e718198d717bf7bd9b73419a57846ed", "16f2deb863ef6d3d6f432de12a2e81149ab03e5a", "5643b2bf3b8d8a6d7ebf231becb9123b5b4a9287", "6471fd1cbc081fb3b7b5b14d6ab9eaaba02b5c17", "a17745f1d7045636577bcd5d513620df5860e9e5", "8dfddcfd67a586f6ed8957174adf1d35c4bd4584", "0f56311dad9f03083a4f4e791aab0b6e0aa2ff07", "363aae72873d094f9b863a571c2d67ef7c1a63dd", "18c85652b2e7ccfaadb66f34a5e4474c7216a9d9", "2215b95e6930e3a2674b0af1dcd17b85df8bd19e", "83a96734000f6733ed92b3daf0c6ffa1528e5d39", "96364af2d208ea75ca3aeb71892d2f7ce7326b55", "2ae6014a451801671d41b6171f86e657d8b1fbaf", "0d99a8787bd3abe24c7737775da4d842bb86e4ab", "108961c7366e36825ffed94ac9eab603e05b6bc6", "017969a70ef8c404a866e24017e2d8c4b9b8e2c8", "0cf4b41ef5371bed73214bb67a0e12829f0df220", "612764a0a4e41fad4629eafcc4a1e4f4141457a7", "50d6a8834013b29927c708c556baee06ce94337b", "351df512735096126454f5d4bc8e9ae56f4cd288", "22f9592bdab3523f144e40e5b5a546ec639b1ff7" ], "paperAbstract": "Malicious crowdsourcing forums are gaining traction as sources of spreading misinformation online, but are limited by the costs of hiring and managing human workers. In this paper, we identify a new class of attacks that leverage deep learning language models (Recurrent Neural Networks or RNNs) to automate the generation of fake online reviews for products and services. Not only are these attacks cheap and therefore more scalable, but they can control rate of content output to eliminate the signature burstiness that makes crowdsourced campaigns easy to detect.\n Using Yelp reviews as an example platform, we show how a two phased review generation and customization attack can produce reviews that are indistinguishable by state-of-the-art statistical detectors. We conduct a survey-based user study to show these reviews not only evade human detection, but also score high on \"usefulness\" metrics by users. Finally, we develop novel automated defenses against these attacks, by leveraging the lossy transformation introduced by the RNN training and generation cycle. We consider countermeasures against our mechanisms, show that they produce unattractive cost-benefit tradeoffs for attackers, and that they can be further curtailed by simple constraints imposed by online service providers.", "pdfUrls": [ "http://people.cs.uchicago.edu/~ysyao/papers/fakereview-ccs17.pdf", "http://people.cs.uchicago.edu/~ravenben/publications/pdf/crowdturf-ccs17.pdf", "https://arxiv.org/pdf/1708.08151v1.pdf", "http://arxiv.org/abs/1708.08151", "https://arxiv.org/pdf/1708.08151v2.pdf", "http://doi.acm.org/10.1145/3133956.3133990", "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/crowdturfing.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3204c776cf067cb07916d01b3acf35db667e9803", "sources": [ "DBLP" ], "title": "Automated Crowdturfing Attacks and Defenses in Online Review Systems", "venue": "CCS", "year": 2017 }, "322448d81cc6785dfbd32db2b46a0b1d68c2c6d6": { "authors": [ { "ids": [ "2329032" ], "name": "Yuxin Bai" }, { "ids": [ "3752409" ], "name": "Victor W. Lee" }, { "ids": [ "1787439" ], "name": "Engin Ipek" } ], "doi": "10.1145/3037697.3037717", "doiUrl": "https://doi.org/10.1145/3037697.3037717", "entities": [ "Baseline (configuration management)", "Dynamic frequency scaling", "Dynamic voltage scaling", "Frequency scaling", "Power management", "Reinforcement learning", "Scalability", "Spatial variability", "Switched-mode power supply", "Transformer", "Voltage regulator" ], "id": "322448d81cc6785dfbd32db2b46a0b1d68c2c6d6", "inCitations": [ "cb9bf80c655fc5c09de77132cef64af6fe273bf3" ], "journalName": "", "journalPages": "825-838", "journalVolume": "", "outCitations": [ "31fb2b92f92968fcd60112f86b2201e874cfba19", "25d96424d82e2f51f9d9f3db51e62a1f7a27a4f9", "bbbe1ce1a11cc28250fe0106bab44b915bb81a8e", "41b380539d15a733e78c2b29388ffa8bef4bb370", "4940a6e514aad00af660337c41db21f92ae322f3", "04a8986ea5df3d6c29fb21627ac1f51ccf68eb15", "061a6b94ebecd2a8af8b901d41f15700543b2bd9", "c3b832615ab75d29892e05d9c6b9e9ee8f207ca8", "a9271b688eea97df79c3012d2f3ffc77ad319d1e", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "352a8957005dc5519b15ed1870751ec494d66395", "2eb8a42529ff20d376be980e41bfd5d032a6abd4", "a88f20c6e08bba17b27ba50311a0e1dff3978acc", "092217c2267f6e0673590aa151d811e579ff7760", "0d4d20c9f025a54b4c55f8d674f475306ebc88a6", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "500b80adc7e25dfffa9a05d25bdffce81b1b0031", "4cb7628686448f997ffd0b2436cf5e5986aec630", "3370784dacf9df1e54384190dad40b817520ba3a", "7d164af7d16264456b1b0a49a59fbbcee51af6d2", "4416052fca95270b50a29e9e3cc245cca8962861", "0b249229db485ae94719d5e23b56b397022f8a6e", "164827c8add47ab8f36ac49924b617e328f618e5", "81948123ef8c63aa68abd8119073274d31f59909", "1eba6732b42656bf912fbd2aaf76fd23cbdad2a8", "6f78a05c4ca85fdfbf0992a63506df591213392c", "a9c1fa73f2c830f1f1d8526042a357a697dcfab4", "d8c71b0710e725cad12e5fc44f5230213f075e46", "3d16e1f937112a93744bd916bc82e212fb27af81", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "48710c82bea8283382f81fcdba540160a0b00e16", "22b3c6d4b8f04a2b36679dfeea622ab95583f1a4", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "b32aea3fa52042d8e68b850bf29788471da5689a", "5b8d6b66516eb9d74ac3601247eaef00f0465698", "5e15675d26025913b0d844b6166b6e72857e0937", "ac7bb3fc9d47f1f8f836c7f7a69e8ca4f26169ca", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "1c78d3c154fabf1bc42b0f8a2b563de238dd49a4", "680bdb42cac3c75b37bec67da2b33a4abc380b48", "cf7b74b7ea4f3051fe52bb8f8e41d0d7c37e7dd1", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "5c986118ab8c03435191b764b828ace581511230", "7c2ba35d6ba4947d1038638222281063e301debf", "48534b21548e3692ad7d866387f1dc7f543109e1", "cd2ad5f5a8a2f3287eb5b2d768c967cf2ff5bbb7", "676ff3d6d04b5748771f843dbf8dd07ee0a612db", "88dfee10842bbfd2ebc74980ab64c1cac5753883" ], "paperAbstract": "Conventional off-chip voltage regulators are typically bulky and slow, and are inefficient at exploiting system and workload variability using Dynamic Voltage and Frequency Scaling (DVFS). On-die integration of voltage regulators has the potential to increase the energy efficiency of computer systems by enabling power control at a fine granularity in both space and time. The energy conversion efficiency of on-chip regulators, however, is typically much lower than off-chip regulators, which results in significant energy losses. Fine-grained power control and high voltage regulator efficiency are difficult to achieve simultaneously, with either emerging on-chip or conventional off-chip regulators.\n A voltage conversion framework that relies on a hierarchy of off-chip switching regulators and on-chip linear regulators is proposed to enable fine-grained power control with a regulator efficiency greater than 90%. A DVFS control policy that is based on a reinforcement learning (RL) approach is developed to exploit the proposed framework. Per-core RL agents learn and improve their control policies independently, while retaining the ability to coordinate their actions to accomplish system level power management objectives. When evaluated on a mix of 14 parallel and 13 multiprogrammed workloads, the proposed voltage conversion framework achieves 18% greater energy efficiency than a conventional framework that uses on-chip switching regulators. Moreover, when the RL based DVFS control policy is used to control the proposed voltage conversion framework, the system achieves a 21% higher energy efficiency over a baseline oracle policy with coarse-grained power control capability.", "pdfUrls": [ "http://www.cs.rochester.edu/~ipek/asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037717" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/322448d81cc6785dfbd32db2b46a0b1d68c2c6d6", "sources": [ "DBLP" ], "title": "Voltage Regulator Efficiency Aware Power Management", "venue": "ASPLOS", "year": 2017 }, "32274b8db3b9eb23799bf0b44181203afba1c080": { "authors": [ { "ids": [ "7384331" ], "name": "Li Ye" }, { "ids": [ "1709209" ], "name": "Hong Xie" }, { "ids": [ "40185051" ], "name": "Weijie Wu" }, { "ids": [ "1723366" ], "name": "John C. S. Lui" } ], "doi": "10.1109/ICDM.2017.65", "doiUrl": "https://doi.org/10.1109/ICDM.2017.65", "entities": [ "Algorithm", "Analysis of algorithms", "Approximation algorithm", "Computational complexity theory", "E-services", "Expectation\u2013maximization algorithm", "Experiment", "Internet access", "Online shopping", "Optimal control", "Personally identifiable information", "Product bundling", "Provable prime", "Purchasing", "Software deployment", "Streaming media" ], "id": "32274b8db3b9eb23799bf0b44181203afba1c080", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "555-564", "journalVolume": "", "outCitations": [ "ef02028969173fd532dbe37cd79434b463567bd7", "1655ae04c843613183d6f29c5feed3355d3d0ad9", "5fa927469e070edd17202f356cc50e332970c509", "184b7281a87ee16228b24716ca02b29519d52eb5", "03a8d876ef19ba40082286783dbf432168826a51", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "0e601d009fd118cc165bfa2825c70b01940bdd9c", "5a4659dab31936b81a6253b8d01fe609e097d9c8", "70b2879bf06b72faddb28923b136a8c3862d0c48", "d63626a97427103cbba91d86ab3b7a98f8267b6a", "1dc645cb27950d2694498059cabce83c1ea5fa85", "0a97bed6daea60728dc7fc78910edd6ba47f5dd2", "644170b1266aa09f25cd4cff7e7670e328810f05", "7cc290879b7f86f0c9404028cf0ecf32d1b38d00" ], "paperAbstract": "Product bundling is widely adopted for information goods and online services because it can increase profit for companies. For example, cable companies often bundle Internet access and video streaming services together. However, it is challenging to obtain an optimal bundling strategy, not only because it is computationally expensive, but also that customers’ private information (e.g., valuations for products) is needed for the decision, and we need to infer it from accessible datasets. As customers’ purchasing data are getting richer due to the popularity of online shopping, doors are open for us to infer this information. This paper aims to address: (1) How to infer customers’ valuations from the purchasing data? (2) How to determine the optimal product bundle to maximize the profit? We first formulate a profit maximization framework to select the optimal bundle set. We show that finding the optimal bundle set is NPhard. We then identify key factors that impact the profitability of product bundling. These findings give us insights to develop a computationally efficient algorithm to approximate the optimal product bundle with a provable performance guarantee. To obtain the input of the bundling algorithm, we infer the distribution of customers’ valuations from their purchasing data, based on which we run our bundling algorithm and conduct experiments on an Amazon co-purchasing dataset. We extensively evaluate the accuracy of our inference and the bundling algorithm. Our results reveal conditions under which bundling is highly profitable and provide insights to guide the deployment of product bundling.", "pdfUrls": [ "http://www.cs.cuhk.hk/~cslui/PUBLICATION/ICDM17-Bundling.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32274b8db3b9eb23799bf0b44181203afba1c080", "sources": [ "DBLP" ], "title": "Mining Customer Valuations to Optimize Product Bundling Strategy", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "322f5bca345b899b9c6d5cd138208dcaaeb48648": { "authors": [ { "ids": [ "35337973" ], "name": "Pedro Duarte" }, { "ids": [ "1680461" ], "name": "Pedro Tom\u00e1s" }, { "ids": [ "4364158" ], "name": "Gabriel Falc\u00e3o Paiva Fernandes" } ], "doi": "10.1145/3123939.3123953", "doiUrl": "https://doi.org/10.1145/3123939.3123953", "entities": [ "Algorithm", "Artificial intelligence", "Benchmark (computing)", "Clock signal", "Embedded system", "End-to-end principle", "Field-programmable gate array", "General-purpose computing on graphics processing units", "Graphics", "Graphics processing unit", "Joule", "Legacy code", "Performance per watt", "Requirement", "Scratch (programming language)", "Signal processing", "Speedup", "Supercomputer", "Throughput" ], "id": "322f5bca345b899b9c6d5cd138208dcaaeb48648", "inCitations": [], "journalName": "", "journalPages": "165-177", "journalVolume": "", "outCitations": [ "45becf9db6725dbd7b24426e324d89b06ff31564", "76baeab09944d683b2c73d8034e3279a31daeda9", "58f9fe6efcd2ec6ae334675764ec995a131dc5c7", "03aa649535c7e01ac2b3255f2f44131380dc93c7", "00a171d5606485633b6ae9fae973156dd936ba99", "9bf711ca7e5a58d10b173b96e8d604d192aeec88", "284c7fde4bbaf19dd345e3b37d98085d7bfb9a4f", "167aa2172a662489d67f1835cc6cb6bc4fb85f27", "0b99d677883883584d9a328f6f2d54738363997a", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "01fcae344d2edb715bcc63a40b6052c0331741bd", "38211dc39e41273c0007889202c69f841e02248a", "d40bf0b4b8e5cd2f337020ecacf487154c28d4eb", "09b8120cbc52e7df46122e8e608146289fddbdfa", "79eb9473b28735cd42eee4298201b8a703bcd7c3", "9c4b1c13a2d8c7753a90ce6a348a8a49efcc59b5", "57c47f9cc942202fa619023e7716c858b3e06db3", "60a1389c827f9f706c9dc1639e2584f0f3de878e", "f7cb3a19a33b9cb75bce30074f27fcb2e0a55846", "173561914128ae74b9d85eab5663c34b48b113f7", "89450edc162692cea533caf753cc691afde81869", "352edd2c567a202f2e5d3f5a022740a3fac23986", "3ba179bceb9692d4d21109d0b87b120195761148", "0cf19e8178bd4248e7c5b5ca87fb1babf2619651", "524bee75fc8deeb1504d1d906a2ee7429ae27246", "3b2491ddeeaa7beae4d311b217c292a9e16112cf", "7ca0505be8f0a41a5b061f625384296c46f11e19", "89a0d3584af8a992e2b8b3d55a2e45371447d37f", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "14505c2bdd3822d7a62385121d28ba3eb36fea1d" ], "paperAbstract": "Applying advanced signal processing and artificial intelligence algorithms is often constrained by power and energy consumption limitations, in high performance and embedded, cyber-physical and super-computing devices and systems. Although Graphics Processing Units (GPUs) helped to mitigate the throughput-per-Watt performance problem in many compute-intensive applications, dealing more efficiently with the autonomy requirements of intelligent systems demands power-oriented customized architectures that are specially tuned for each application, preferably without manual redesign of the entire hardware and capable of supporting legacy code. Hence, this work proposes a new SCRATCH framework that aims at automatically identifying the specific requirements of each application kernel, regarding instruction set and computing unit demands, allowing for the generation of application-specific and FPGA-implementable trimmed-down GPU-inspired architectures. The work is based on an improved version of the original MIAOW system (here named MIAOW2.0), which is herein extended to support a set of 156 instructions and enhanced to provide a fast prefetch memory system and a dual-clock domain. Experimental results with 17 highly relevant benchmarks, using integer and floating-point arithmetic, demonstrate that we have been able to achieve an average of 140× speedup and 115× higher energy-efficiency levels (instructions-per-Joule) when compared to the original MIAOW system, and a 2.4× speedup and 2.1× energy-efficiency gains compared against our optimized version without pruning.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123953" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/322f5bca345b899b9c6d5cd138208dcaaeb48648", "sources": [ "DBLP" ], "title": "SCRATCH: an end-to-end application-aware soft-GPGPU architecture and trimming tool", "venue": "MICRO", "year": 2017 }, "323f6694dd7cdec2dcf0d7d4ad35b2ff129cd766": { "authors": [ { "ids": [ "2377269" ], "name": "Xinyue Liu" }, { "ids": [ "1920461" ], "name": "Yuanfang Song" }, { "ids": [ "1682418" ], "name": "Charu C. Aggarwal" }, { "ids": [ "3631757" ], "name": "Yao Zhang" }, { "ids": [ "1833914" ], "name": "Xiangnan Kong" } ], "doi": "10.1109/ICDM.2017.39", "doiUrl": "https://doi.org/10.1109/ICDM.2017.39", "entities": [ "Experiment", "Interaction", "Neural coding", "Recommender system", "Stationary process", "User (computing)" ], "id": "323f6694dd7cdec2dcf0d7d4ad35b2ff129cd766", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "297-306", "journalVolume": "", "outCitations": [ "184b7281a87ee16228b24716ca02b29519d52eb5", "409b0cce867d59c941f40c66306400c32f2aaac2", "319d2362092a2a9e0441bce60b720d57fd26e07c", "2275762a28582716db92df6d525ed2481c7d7f14", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "205a4bc5e53e8db52190c72e581e709139acf57b", "109d0dcfdd37ea23589dc6d45f93eac545d964f8", "756cda3c88eac81dc03a465d6ad48bb0fdb49b43", "2cbe0ba73d02aabbeefedf841203219796a551b7", "088168b73376ac9e000c449053f8b453f5430aac", "b12c37c357035e2f20c112227c982ee50a5e1b74", "127c27f135bed13706774168bef2c87e9d0791a1", "e72ddf60463b901fad9f079a9fd2c80c0f6ad131", "44df77b146468e4a7c132eb3e90ae26e9afd06b9", "dbdc21e0830afc51a2e915202942e5943529a8f3", "091aded505b84cf87c197875ccfde24d98a300c9", "9aa88a8a354f1d322e242376d27d0474e50252f8", "377be2653191fc59811347961ec94fe987ea057a" ], "paperAbstract": "Recommender systems have attracted much attention in last decades, which can help the users explore new items in many applications. As a popular technique in recommender systems, item recommendation works by recommending items to users based on their historical interactions. Conventional item recommendation methods usually assume that users and items are stationary, which is not always the case in real-world applications. Many time-aware item recommendation models have been proposed to take the temporal effects into the considerations based on the absolute time stamps associated with observed interactions. We show that using absolute time to model temporal effects can be limited in some circumstances. In this work, we propose to model the temporal dynamics of both users and items in item recommendation based on their life cycles. This problem is very challenging to solve since the users and items can co-evolve in their life cycles and the sparseness of the data become more severe when we consider the life cycles of both users and items. A novel time-aware item recommendation model called BiCycle is proposed to address these challenges. BiCycle is designed based on two important observations: 1) correlated users or items usually share similar patterns in the similar stages of their life cycles. 2) user preferences and item characters can evolve gradually over different stages of their life cycles. Extensive experiments conducted on three real-world datasets demonstrate the proposed approach can significantly improve the performance of recommendation tasks by considering the inner life cycles of both users and items.", "pdfUrls": [ "http://cinv.ro/files/ICDM17_Slides.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.39", "http://cinv.ro/files/ICDM17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/323f6694dd7cdec2dcf0d7d4ad35b2ff129cd766", "sources": [ "DBLP" ], "title": "BiCycle: Item Recommendation with Life Cycles", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "325390173841d52f7a2791ba6b0e32ad80bf2630": { "authors": [ { "ids": [ "3354846" ], "name": "Victor van der Veen" }, { "ids": [ "2480507" ], "name": "Dennis Andriesse" }, { "ids": [ "1881042" ], "name": "Manolis Stamatogiannakis" }, { "ids": [ "1683647" ], "name": "Xi Chen" }, { "ids": [ "3053948" ], "name": "Herbert Bos" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "10.1145/3133956.3134026", "doiUrl": "https://doi.org/10.1145/3133956.3134026", "entities": [ "Binary code", "Call stack", "Code reuse", "Disjoint-set data structure", "Interaction", "Newton", "Return-oriented programming", "Server (computing)", "Stackable switch", "Static program analysis", "Subroutine", "Taint checking" ], "id": "325390173841d52f7a2791ba6b0e32ad80bf2630", "inCitations": [ "723931de6d91a965bc2fa24ac649291c9f1a4639" ], "journalName": "", "journalPages": "1675-1689", "journalVolume": "", "outCitations": [ "0719b9670c8580db76547497df39caabdc20fc32", "64544d30077a54ca97752f9ffd62c80e9038ddbb", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "23e8236644775fd5d8ff5536ba06b960e19f904b", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "3fa27974cade47e98993b98798f73594b902583b", "6a8f65381a627a2db6c756a7185d9106f0acefec", "3301b50b56d91099988f9e3db1e556445c767077", "0e039df712774fcea67f214d9b5780c1dc250747", "19985cad0eb4010b22c2ae1ef9442e036a924244", "6458f4c0c029b038ebd1d7f61005a010ac250892", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "49cae25a1796b6a2898b99b2684b33eed8f58ee9", "6a7a3033f2e9adc294633667c01689c5bed167f2", "ab2177167b09f9be086d44188b845fc9b5458d66", "b4b92eb555dd9c672f894216c5d50bf6164df78b", "2947959aa2cfc45719fac7a54812614d1fa8707f", "79473986fe994d4aeb9d662e0b8e572758a4511b", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "360e88b37ce5a9743d363309d147f783f7c1611f", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "09b6a3fcb9a7e76d8b5041b4b8f4fb39058889ba", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "4f4590962bde0c2050122f91e5978271bb24d556", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "0988a425689f6f3700e797f4a2c18f73692573c3", "01b5b648af61ddb382da638a299fae2315b25192", "387e571981a8ee2bd49b1f30563e3a3a215e3b65", "2c067e092c35d71d23c09d9c09376aa5b684152c", "0ad15428453e6f4962755933bd82f395eaf787b8", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "242b43fc76229ce4a3e9182f49267a5ad53ec106", "bcbfbf2777e6ee87df990b871d1cc7b44a3e998d", "acf32e644db8c3ac54834d294bba4cf46551480a", "9b2585f7248c8b5a22e9c816506e01060213ca85", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "c48ab0b89d0f1ea81ad44d5f49cb5aafb927982f", "1798b9bc347ca826724b6d80766200ebaad8dfb0", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "cb0da1ed189087c9ba716cc5c99c75b52430ec06", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "b9d25c16577646bb502a17b65131e788ef58330b", "f0ac31c2248ef8eb597448395da6f79227ffe916", "53396c842bc8a94575470fab3acb4aef91c5073d", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "71da01051534d46fb3becd0a7506b64db56efc7a", "217742089058db1572042a0cebfcecdec8ce215e", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "13e83680f0bc1ffb29b620945357ec832726ae90", "348b0049b0c7b3f7e74b77cca30213cb7e550360", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "1fa355cabcaa6650603098c41a3a439fbed718a1", "116eaac2e498bc2c9bea10ea838309dcf143d764", "7e61bd6abdcb68ed9b3871311cabe09753de88ff", "0db59f09437b7b90376f011f5150ed976ac66231", "30e76f32c323adb0ff340760380fe5a08505b641", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "592be7266ac5e1a423703242a5f976bdf05627af", "74572d07252e2f0b60b16abb931c46e819e2b448", "2811354f6f13b12176f81bc989d2e80534effa80", "4acdc975e3507a6e26a47f3858a74ec0de75ae38", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "28ab79d604962031585fd149941a5c0594e3d0ed", "03f827395a17beb941241dbd72322705bdf79791", "09de4307bbb752ed92a67736f3d9b174966ac769" ], "paperAbstract": "In 2007, Shacham published a seminal paper on Return-Oriented Programming (ROP), the first systematic formulation of code reuse. The paper has been highly influential, profoundly shaping the way we still think about code reuse today: an attacker analyzes the \"geometry\" of victim binary code to locate gadgets and chains these to craft an exploit. This model has spurred much research, with a rapid progression of increasingly sophisticated code reuse attacks and defenses over time. After ten years, the common perception is that state-of-the-art code reuse defenses are effective in significantly raising the bar and making attacks exceedingly hard.\n In this paper, we challenge this perception and show that an attacker going beyond \"geometry\" (static analysis) and considering the \"dynamics\" (dynamic analysis) of a victim program can easily find function call gadgets even in the presence of state-of-the-art code-reuse defenses. To support our claims, we present Newton, a run-time gadget-discovery framework based on constraint-driven dynamic taint analysis. Newton can model a broad range of defenses by mapping their properties into simple, stackable, reusable constraints, and automatically generate gadgets that comply with these constraints. Using Newton, we systematically map and compare state-of-the-art defenses, demonstrating that even simple interactions with popular server programs are adequate for finding gadgets for all state-of-the-art code-reuse defenses. We conclude with an nginx case study, which shows that a Newton-enabled attacker can craft attacks which comply with the restrictions of advanced defenses, such as CPI and context-sensitive CFI.", "pdfUrls": [ "http://vvdveen.com/publications/newton.pdf", "http://doi.acm.org/10.1145/3133956.3134026" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/325390173841d52f7a2791ba6b0e32ad80bf2630", "sources": [ "DBLP" ], "title": "The Dynamics of Innocent Flesh on the Bone: Code Reuse Ten Years Later", "venue": "CCS", "year": 2017 }, "325e00509090fafddcf2e53d5bdb81ccd1c5637a": { "authors": [ { "ids": [ "3422391" ], "name": "Najmeh Miramirkhani" }, { "ids": [ "2448867" ], "name": "Oleksii Starov" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" } ], "doi": "", "doiUrl": "", "entities": [ "ASEA IRB", "Evasion (network security)", "Malware", "Social engineering (security)", "Technical support", "Technical support scam", "Telephone number" ], "id": "325e00509090fafddcf2e53d5bdb81ccd1c5637a", "inCitations": [ "44396f73c94712f0ea6734da398fd94431b49770", "1607b1b0115c84d61e5d01d7824acac95a7c9615", "3d14bdb4c5bf02aa5baa658fb5f033e71af6c650", "28526f972d1640e8cad22d42e29f5e70473a368e", "2ab7da2ac8da95a24a688ec7a90df3908ae93a09", "6e63485bc4754aed5be30a8dc6e73e6f549d4591", "9b5bc50b465192247d89c0ca3c540938ed0fb5d8", "0bc07ed8b94173ceb856b5f8aee25e1fbe55a556", "5ef760d070916f24cc8eb9cbbe7b37b5f8fbc523", "fcf9c18a9c2c6460adda0b91209327df94184fa5", "c5a9638041bb4294039a451330466fa2b25af5d3" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "22b716af826fa94f8d139dff4c292da61acec6ad", "ba9af0bf228cedfad61daa481a71ed433076ab8d", "0dc3d4c054e5a9e3260d65bcb78d229cf258682f", "876dae4ee0323da2ae685160e9192cdf605e7299", "dba8a9813124c80914b6001637d087c5127cf4c3", "0a964c5ac7e19cbdc820fd4ee101a5263385733d", "393ed20364b4be9f0ec4e8978e4b5c8e384aa2cf", "a19aa12ef0726bff27838faf907d9c7e059ef2c3", "281b17817b064e99bd63a3b121c794f80b2cd648", "01f2e7f3dee35c2c7b9179d8eb46f4f766c101d1", "63b94d37f0826f65c2bc7ecc22c10da91efa6520", "0597732c00590a4c1ef9f718b650a62a03daf97e", "358843af3655391738c8435fc8a74eb49170e8ea", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "06240b5f5a928d49b53e848bcbaa4bf7c6beab35", "003281fae213f16c6b6098762bf9f965ebe3f490", "6fd3c5146fc90d4ce14cafc85f3a92be40f22213", "57bc190337534bf4d84b5b0a7939783426557d92", "649468352e70532e80f68d362bf85fae8277bf22", "2ccde1be1ff725aa8740868929cd1a6f5072ab3a", "07cbb544b23a4f0e914863cb17afa4eafd9f59a8", "22a78f31395e79cb6c99c3cedd248ecd6568b7f7", "5521a1758105b5f6c79ed0f85ec4e9d2941a3ad0", "0b4954346d47d6b7871d8e0df16a4db227df481f", "a03986f4f3a8739d71b1d3269c1a2259fbaef89b", "0ae15a14901771cc2c920e60edca72ab80505c0b", "29c9deb411912cbc7aecd8e8db12c77c615df104" ], "paperAbstract": "In technical support scams, cybercriminals attempt to convince users that their machines are infected with malware and are in need of their technical support. In this process, the victims are asked to provide scammers with remote access to their machines, who will then \u201cdiagnose the problem\u201d, before offering their support services which typically cost hundreds of dollars. Despite their conceptual simplicity, technical support scams are responsible for yearly losses of tens of millions of dollars from everyday users of the web. In this paper, we report on the first systematic study of technical support scams and the call centers hidden behind them. We identify malvertising as a major culprit for exposing users to technical support scams and use it to build an automated system capable of discovering, on a weekly basis, hundreds of phone numbers and domains operated by scammers. By allowing our system to run for more than 8 months we collect a large corpus of technical support scams and use it to provide insights on their prevalence, the abused infrastructure, the illicit profits, and the current evasion attempts of scammers. Finally, by setting up a controlled, IRB-approved, experiment where we interact with 60 different scammers, we experience first-hand their social engineering tactics, while collecting detailed statistics of the entire process. We explain how our findings can be used by law-enforcing agencies and propose technical and educational countermeasures for helping users avoid being victimized by technical support scams.", "pdfUrls": [ "https://www.internetsociety.org/sites/default/files/ndss2017_03B-1_Miramirkhani_paper.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_03B-1_Miramirkhani_paper.pdf", "https://www.securitee.org/files/tss_ndss2017.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/dial-one-scam-large-scale-analysis-technical-support-scams/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fe0f/69a749813060e62784b8fab463910d0fec82.pdf", "s2Url": "https://semanticscholar.org/paper/325e00509090fafddcf2e53d5bdb81ccd1c5637a", "sources": [ "DBLP" ], "title": "Dial One for Scam: A Large-Scale Analysis of Technical Support Scams", "venue": "NDSS", "year": 2017 }, "326395e317f1b34926a1ec2482a7fde98ab12869": { "authors": [ { "ids": [ "1821026" ], "name": "Niyazi Sorkunlu" }, { "ids": [ "3332891" ], "name": "Varun Chandola" }, { "ids": [ "34806140" ], "name": "Abani K. Patra" } ], "doi": "10.1109/CLUSTER.2017.70", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.70", "entities": [ "Algorithm", "Anomaly detection", "Low-rank approximation", "Supercomputer", "Tracking system", "Usage data" ], "id": "326395e317f1b34926a1ec2482a7fde98ab12869", "inCitations": [ "83ef14979ab56a6155ee3dc6bf9fbe7de4c7a4ee" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "410-418", "journalVolume": "", "outCitations": [ "06bd1b7c539e13d30abe7e6715753b12c587058c", "d9d055477bfaf383a1b99824215b8a41d0ed3bc1", "62dd02837c65b9c90de8d80c493f23ce1116cb3d", "bda0fb0cde6a1120c721a6caa9f58f6d049ddb7a", "03b84b789cb342587db621c7e88eeb005cc21578", "bd1ed675f2fb2d47b7bd9ba8d0bdf71a99699693", "27c989c48f81da3a4b27ca5196d3e9ecc24b3b0c", "1314e6ea34a8d749ca6190a0d2dd00b3a1879cc6", "a20def90994cab53b1e5202147848bb5bd4891a4", "0368011142340e9ea904a3c022412f02a8e60f02", "f0ba9dfcc0d3de1c1c941c9d42435350ed662557", "27d864dc2708a2804cac20606a8195c5313eed5c", "6ccf50496e73a69535f50262bd3dba7548677fff", "2d1c16f01c89136e171d5421ec303a695de7748b", "0ec40760465c0e7cb0f5d25179d1e8b44049a1e3", "66479c2251088dae51c228341c26164f21250593", "9fdde5b04351bde57e805da89fb6ef93cc1a5aa2", "502ff2f9220ebc8c3544e6c4a005e819429ab716", "12d4c92f0a3a70538ed609bf6f7b603e44d11abd", "33282ae3f2929d70d37234e763f40bac5bbbbc6f", "881e0395816f8e8518ae157f3c21898b18cff1d8" ], "paperAbstract": "Resource usage data, collected using tools such as TACC_Stats, capture the resource utilization by nodes within a high performance computing system. We present methods to analyze the resource usage data to understand the system performance and identify performance anomalies. The core idea is to model the data as a three-way tensor corresponding to the compute nodes, usage metrics, and time. Using the reconstruction error between the original tensor and the tensor reconstructed from a low rank tensor decomposition, as a scalar performance metric, enables us to monitor the performance of the system in an online fashion. This error statistic is then used for anomaly detection that relies on the assumption that the normal/routine behavior of the system can be captured using a low rank approximation of the original tensor. We evaluate the performance of the algorithm using information gathered from system logs and show that the performance anomalies identified by the proposed method correlates with critical errors reported in the system logs. Results are shown for data collected for 2013 from the Lonestar4 system at the Texas Advanced Computing Center (TACC).", "pdfUrls": [ "https://arxiv.org/pdf/1705.10756v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.70", "http://arxiv.org/abs/1705.10756" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/326395e317f1b34926a1ec2482a7fde98ab12869", "sources": [ "DBLP" ], "title": "Tracking System Behavior from Resource Usage Data", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "32877824e74f8e28bbbd1b78c03c4341f3e20925": { "authors": [ { "ids": [ "2807190" ], "name": "Romain Fontugne" }, { "ids": [ "1736430" ], "name": "Cristel Pelsser" }, { "ids": [ "2461898" ], "name": "Emile Aben" }, { "ids": [ "2532425" ], "name": "Randy Bush" } ], "doi": "10.1145/3131365.3131384", "doiUrl": "https://doi.org/10.1145/3131365.3131384", "entities": [ "Forwarding plane", "Information privacy", "Network packet", "Real-time computing", "Traceroute", "Usability" ], "id": "32877824e74f8e28bbbd1b78c03c4341f3e20925", "inCitations": [ "5eb2d2ad43056ac8d4d927196d091c1d01014646", "d2a574246f3ad2089da6f96c06e7165ba154c2f7", "224d377e1ebd1184f5ecb4d61f82687a77ba1ea2" ], "journalName": "", "journalPages": "15-28", "journalVolume": "", "outCitations": [ "de17cf40a4db13315c631c597959ae26f691f2fa", "b3afefa8e89adda9112724015142e99daeabf9e9", "2b75e5dc143ada58e9018b51e1e7d2f08c0ce443", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "4ebe2a9232302756b7f68b160607230adc593dd7", "0244d99872812bebfdb5257fd7d994b0c3df03b6", "cac46cb7f7724e625315c290536da4d94e30f9e0", "2d1fb0e96ce03a97f31844df33523a27d06e76c7", "5c47c9936d2bc8d70a873295d476ef3838a42bad", "139391874e4720b881a42eb09659c8ff1fde7b02", "2efca12b23aa31bbdbe70df0fba3fb1a34fe59a0", "41f6d924dd684966a9f6bc25e2266ce08cfdf4f4", "1f1dc292d3afb6612e1711a9315a4327789f68a9", "011abcefc5ee3172381d4c4c077a987b2893ea2d", "15d9c3fda3492df57e237b47a080462ef343ebfb", "bd879b4a4bfb9406a0f596f3e266c13d22d4478e", "20813b6d8e29022bcdd67dd954a585a2705ef4d8", "4fdb7be799ef72223155d6bef68c2c07ed7b143d", "856f35d1f8ebd0da6fc98a3709e4eef4fa9eaca4", "6245ced0adb67151ea0f456d14cf002efc4e5c38", "55c4f36d38f84eeb0c315afb9bc25e2e8510cded", "b2536f7fa3e5d9ba7aca2bd6b642ad6fa9cfa970", "0a0c93beb454c67518ec6c11b158fcb4665ff68c", "c48dab5058f808f69b113efe8f622b522a84b4b1", "0cc37877c801d0a4cd64195f7940f79a68db5766", "31264b04c2f9b7b3b3de006cab089b91bc074868", "6c0ed2f09a5d961bea5a2750958d400863537e3e", "0be4add1e9de8ddc6cd9aae13f6c629702d59a33", "0fbfe111ae47ba19bc509398149c0b03d1e6ecef", "8dba68daafa717d148f425fac87c24b580ee9a99", "3a0119a905d674b08cda2136009c9b3980863eb8", "01cfeb90ff0f75e5a7d4c969b8432c6fe920ea89", "71fda542b243f32b3c9f75317905b1ea1ceacce9", "44ec88e76014cda3582871682eaf85ca594dfb8f", "7bc9d35e6d59dc098c79201e9ed14381f9eedc3a", "08192b37ec20d37630730557b79dae6a7b10effc", "224d377e1ebd1184f5ecb4d61f82687a77ba1ea2" ], "paperAbstract": "Understanding data plane health is essential to improving Internet reliability and usability. For instance, detecting disruptions in distant networks can identify repairable connectivity problems. Currently this task is difficult and time consuming as operators have poor visibility beyond their network's border. In this paper we leverage the diversity of RIPE Atlas traceroute measurements to solve the classic problem of monitoring in-network delays and get credible delay change estimations to monitor network conditions in the wild. We demonstrate a set of complementary methods to detect network disruptions and report them in near real time. The first method detects delay changes for intermediate links in traceroutes. Second, a packet forwarding model predicts traffic paths and identifies faulty routers and links in cases of packet loss. In addition, we define an alarm score that aggregates changes into a single value per AS in order to easily monitor its sanity, reducing the effect of uninteresting alarms. Using only existing public data we monitor hundreds of thousands of link delays while adding no burden to the network. We present three cases demonstrating that the proposed methods detect real disruptions and provide valuable insights, as well as surprising findings, on the location and impact of the identified events.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/slides/slides.pdf", "https://arxiv.org/pdf/1605.04784v2.pdf", "http://doi.acm.org/10.1145/3131365.3131384", "http://arxiv.org/pdf/1605.04784v1.pdf", "https://arxiv.org/pdf/1605.04784v1.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final106.pdf", "http://arxiv.org/abs/1605.04784" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32877824e74f8e28bbbd1b78c03c4341f3e20925", "sources": [ "DBLP" ], "title": "Pinpointing delay and forwarding anomalies using large-scale traceroute measurements", "venue": "IMC", "year": 2017 }, "3296ac52e3e3be506de6f7847bba979788b8df56": { "authors": [ { "ids": [ "1878923" ], "name": "Dustin Rhodes" }, { "ids": [ "1717674" ], "name": "Cormac Flanagan" }, { "ids": [ "35267242" ], "name": "Stephen N. Freund" } ], "doi": "10.1145/3062341.3062350", "doiUrl": "https://doi.org/10.1145/3062341.3062350", "entities": [ "Correctness (computer science)", "Dynamic data", "FastTrack", "Location-based service", "Memory address", "Memory management", "Null (SQL)", "Race condition" ], "id": "3296ac52e3e3be506de6f7847bba979788b8df56", "inCitations": [ "0e5aa7a7cd7676640b176b59d6ef2a34d2276297", "e4b305f247ad9244f06f639a1b4def8bfe1f40f1", "2968fdf952edd08d2e7b2f303cba2339e4ee8c40", "432e4baac0bad1c4a4c45d8a4398f695f72a206a", "33445fdc18f34429820342a9290dfdb687158736" ], "journalName": "", "journalPages": "141-156", "journalVolume": "", "outCitations": [ "2f821a758267bbe68cf2b6f8c1f564e9fcccc396", "09ed565e84057123c15ab12b885c235d1f241aed", "fc990d3630ea9f6fd7481ae0afd137a7f2753f2c", "0a42dcd2e80185cada08b0eb2ee318bad73cd904", "2d4f01286cb0c8cc81cfba1dfc52e9c0f1da5122", "512cfa52d11dba5e9ffd6ebbe1db350289c39731", "5b1f19b5be9e93addb7a93715bcec590bf6138af", "5da5bf7eed0edae4b7e96f65ea99eb167d6b07d5", "b8719183f3579e6f0bdf2d98ee500097a28cb9cf", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "11bd8d0769616d29dbb7662db28cd179219b2a0a", "10b206ff108069c460bf1623f40ca4521393292e", "8b44b7a9849307acc217772c7240ceb87883bee3", "2bcc56aa8f39ec3d5f16c0064e461e90a6a1764f", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "dd916d401b90d848dd0c1a99d78c034e3c8bb448", "37ad5b0cb6952d97e37053dede98db125afd9036", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "05a618847e4f08e5bca29dff732757779722b2e0", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "1ef3d10196d91aed5939009846bd7ab3a5e3f8e8", "559c109296a65d57eab8ec9ce8be65094dd6d372", "44a851e09e72741944ea01f855e5dac3ebbc4568", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "2a9ce8b678cbf6a26316f6d5e9bca4960836575d", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "1753d3e97fdbe7799b9625cb873b77eef506a608", "b1cb8339ed437ce74deaf4b080b33cf61bbebd5d", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "7ab605ff3ced95d816286dd0c1f1c42ac2deeef7", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "469f16708746f3d432fffe7c43710f3001d054d5", "6e235552c18354a306f7bb749a04f95d7440d18b", "5a1b3704ca2fb87a0396e8bc3bcd8a0cc916cf57", "1e2c604e66c4439ad343b70d7ec0abedf72d006a", "3ca0b8fe78bb0af6acf52c522983c77424a66c96", "8c1c096bfa1a1905439acb88b01e432d9a142994", "430f66819f758f6a84aaac4b5f516f9ee4861482", "754e054cda963ef14b9050f4aa546cf041499e51", "6b5eeb5a017de5758e9773b52b0292cfc987ce3d", "285024b15197b5face8bdef1d03f36949b8339c4", "0fc3b585f417c57c6491e2bbe6285f37823474e7", "968f8a1d37e7ae479c2534a29d0d9d9225134605", "00a9ba0063d34ec56792849a67ef57b4601becbb", "143589e1900fd8abe92bf7cc4ae9bd2d6a20859f", "a45adba59080ad625e3005c669345c3a96ad3e18", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "406ee6ce01dbc906ad07a3c89a60c7d8b2252a9a", "d02617e05ede8f929c14edcf1fc8a80a5f55d09c" ], "paperAbstract": "Precise dynamic data race detectors provide strong correctness guarantees but have high overheads because they generally keep analysis state in a separate shadow location for each heap memory location, and they check (and potentially update) the corresponding shadow location on each heap access. The BigFoot dynamic data race detector uses a combination of static and dynamic analysis techniques to coalesce checks and compress shadow locations. With BigFoot, multiple accesses to an object or array often induce a single coalesced check that manipulates a single compressed shadow location, resulting in a performance improvement over FastTrack of 61%.", "pdfUrls": [ "https://users.soe.ucsc.edu/~cormac/papers/pldi17.pdf", "http://doi.acm.org/10.1145/3062341.3062350", "http://cs.williams.edu/~freund/papers/bigfoot-tr.pdf", "http://cs.williams.edu/~freund/papers/17-pldi.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3296ac52e3e3be506de6f7847bba979788b8df56", "sources": [ "DBLP" ], "title": "BigFoot: static check placement for dynamic race detection", "venue": "PLDI", "year": 2017 }, "32aec3324ffa1d4bef7748e310471692064d11d0": { "authors": [ { "ids": [ "27010590" ], "name": "Daphne I. Gorman" }, { "ids": [ "1737725" ], "name": "Matthew R. Guthaus" }, { "ids": [ "2379918" ], "name": "Jose Renau" } ], "doi": "10.1145/3123939.3123973", "doiUrl": "https://doi.org/10.1145/3123939.3123973", "entities": [ "Best, worst and average case", "Bluetooth", "Compaq LTE", "Compiler", "Computer architecture", "Dynamic logic (digital electronics)", "EMI", "Frequency band", "Interference (communication)", "Radio frequency", "Run time (program lifecycle phase)" ], "id": "32aec3324ffa1d4bef7748e310471692064d11d0", "inCitations": [], "journalName": "", "journalPages": "774-785", "journalVolume": "", "outCitations": [ "544db01aca933177fcc53c37e2a80c59c417bc81", "772a1c59f8dfaab694c6abf6f8c6130aed36cff2", "5e3f8c2ba2fb225c29ba343565d52b9661e7198e", "14bfc28b5652b605c4936c74f4f53ec6f2e215a8", "102c32b73ee0fb172b746d9ff983d5f20fc0767c", "fa6ed9385f24ea7bb479052c66ad8984e7b127c0", "9085824f84b2ac309a0f934faf8661ac76b5c66a", "00a255bb28f7a0ffad92ed16fbd8ff78d883f501", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "0856f6f40b889dba559f19654834114e9f469760", "7671fa1a6cd1cd4e9e44675c8a0eb4adcda8cb4c", "5b110c2049859f7969512ba9936b5d84333d3a08", "6496c02548f1f3ae3afa9564d0061135b03cd278", "e1e9d02370120ff59dbf689fcd76b3c0b2eed81a" ], "paperAbstract": "Processors emit non-trivial amounts of electromagnetic radiation, creating interference in frequency bands used by wireless communication technologies such as cellular, WiFi and Bluetooth. We introduce the problem of in-band radio frequency noise as a form of electromagnetic interference (EMI) to the computer architecture community as a technical challenge to be addressed.\n This paper proposes the new idea of Dynamic EMI Shifting (DEMIS) where architectural and/or compiler changes allow the EMI to be shifted at runtime. DEMIS processors dynamically move the interference from bands used during communication to other unused frequencies. Unlike previous works that leverage static techniques, DEMIS dynamically targets specific frequency bands; the type of techniques used here are only possible from an architectural perspective. This paper is also the first to provide insights in the new area of dynamic EMI shifting by evaluating several platforms and showing the EMI is sensitive to many architectural and compilation parameters.\n Our evaluation over real systems shows a decrease of in-band EMI ranging from 3 to 15 dB with less than a 10% average performance impact. A 15dB EMI reduction for LTE can represent over 3x bandwidth improvement for EMI bound communication.", "pdfUrls": [ "https://masc.soe.ucsc.edu/docs/micro17.pdf", "http://doi.acm.org/10.1145/3123939.3123973" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32aec3324ffa1d4bef7748e310471692064d11d0", "sources": [ "DBLP" ], "title": "Architectural opportunities for novel dynamic EMI shifting (DEMIS)", "venue": "MICRO", "year": 2017 }, "32b9e31d4b51d419896fbdd4c8a5213715e3977c": { "authors": [ { "ids": [ "2215171" ], "name": "Jianwei Xiao" }, { "ids": [ "40102422" ], "name": "Ming Gu" }, { "ids": [ "1786954" ], "name": "Julien Langou" } ], "doi": "10.1109/HiPC.2017.00035", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00035", "entities": [ "Algorithm", "Approximation", "Distributed memory", "Low-rank approximation", "Numerical analysis", "Oversampling", "ScaLAPACK", "Shared memory", "Uniprocessor system" ], "id": "32b9e31d4b51d419896fbdd4c8a5213715e3977c", "inCitations": [ "225478cfe9d93b0887b0474ef0bdf5a4990e666d" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "233-242", "journalVolume": "", "outCitations": [ "4392e0bd15de8c19421158915787627a0f6cc365", "7a3bbfdd6d90d38f2c02339f33a7221825dab713", "459fdf32a40afda6290f351cefe64a8abfa0aa71", "7cda32aeefdd3cabd76871b8ee06bd1a1ea2ba10", "0df92684396540f140094ec304871164dc385c9d", "43d37485707c9a6118df603772d0f84d05ad8e0d", "0d0f5708846382d9877ec6814bffc41ef1f9a895", "a244717eef0b7cb421a6710f4508a91e8a3b52a9", "ee75b9a5d84858c8abc8ec9145a68f3a06d065e5", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "53137ebc45ab6bbfe29318517f68be5fd9653dd4", "1fcaa630c6ae6a7d28ead7d8906f3bf682ccb680", "cebc6a22f21e4ba3688521d1076944e5e0c23e94", "1604dd2e1ad465eb48374f37a302eb0bd3bceff1", "913c7581c4cc62c0435e4ecc50cee7c4005ca75a", "062472a5b7a3e1a83bf00596397a112f226dfcf2", "64c3d952198805a73d15cad68d1b8b18f3455ac2" ], "paperAbstract": "Factorizing large matrices by QR with column pivoting (QRCP) is substantially more expensive than QR without pivoting, owing to communication costs required for pivoting decisions. In contrast, randomized QRCP (RQRCP) algorithms have proven themselves empirically to be highly competitive with high-performance implementations of QR in processing time, on uniprocessor and shared memory machines, and as reliable as QRCP in pivot quality. We show that RQRCP algorithms can be as reliable as QRCP with failure probabilities exponentially decaying in oversampling size. We also analyze efficiency differences among different RQRCP algorithms. More importantly, we develop distributed memory implementations of RQRCP that are significantly better than QRCP implementations in ScaLAPACK. As a further development, we introduce the concept of and develop algorithms for computing spectrum-revealing QR factorizations for low-rank matrix approximations, and demonstrate their effectiveness against leading low-rank approximation methods in both theoretical and numerical reliability and efficiency.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00035" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32b9e31d4b51d419896fbdd4c8a5213715e3977c", "sources": [ "DBLP" ], "title": "Fast Parallel Randomized QR with Column Pivoting Algorithms for Reliable Low-Rank Matrix Approximations", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "32d89db0cb14af99d06524466cf1b3c0d25a7e85": { "authors": [ { "ids": [ "3209512" ], "name": "Wei-Lun Hung" }, { "ids": [ "1747249" ], "name": "Vijay K. Garg" } ], "doi": "10.1109/IPDPS.2017.57", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.57", "entities": [ "Boolean expression", "Busy waiting", "Java", "Lock (computer science)", "Programmer", "Reentrancy (computing)", "Span and div", "Synchronization (computer science)", "Test case", "Transactional memory" ], "id": "32d89db0cb14af99d06524466cf1b3c0d25a7e85", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "927-936", "journalVolume": "", "outCitations": [ "6756d3e0669430fa6e006754aecb46084818d6b6", "9592f877b421ae269c940ef96e5e8931e98aa043", "f8c75e2d94c07aee81b37598cff8412b9afb12c9", "167d2cfd31948e72243a5f442544c0d4b1f826b9", "bbac864f6815762a57ad18bdc3e6c456b7140947", "2bf4940710deb2571e93b1c922e8e7452e854afd", "56f2beface9bf100b2be028259a8f1d987ccc6f0", "1548c821b925cee264dd58a6a193bc31ceb62502", "43b9fc3dabc4f9cf6550f64e50b92bbe58dd3893", "1caec20acae9ea9da817c59bba6b13c8d745efd2", "c44b97f870b862f7f6f8aebc9ffde4565dd64380", "0127006ec09ad6e4d20001536d8788f45b30fef4", "09ed565e84057123c15ab12b885c235d1f241aed" ], "paperAbstract": "Current monitor based systems have some disadvantages for multi-object operations. They require the programmers to (1) manually determine the order of locking operations, (2) manually determine the points of execution where threads should signal other threads, (3) use global locks or perform busy waiting for operations that depend upon a condition that spans multiple objects. Transactional memory systems eliminate the need for explicit locks, but do not support conditional synchronization. They also require the ability to rollback transactions. In this paper, we propose new monitor based methods that provide automatic signaling for global conditions that span multiple objects. Our system provides automatic notification for global conditions. Assuming that the global condition is a Boolean expression of local predicates, our method allows efficient monitoring of the conditions without any need for global locks. Furthermore, our system solves the monitor composition problem without requiring global locks. We have implemented our constructs on top of Java and have evaluated their overhead. Our results show that on most of the test cases, not only our code is simpler but also faster than Java's reentrant- lock as well as the Deuce transactional memory system.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.57", "http://users.ece.utexas.edu/~garg/dist/ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32d89db0cb14af99d06524466cf1b3c0d25a7e85", "sources": [ "DBLP" ], "title": "Automatic-Signal Monitors with Multi-object Synchronization", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "32d8d4ec335e53f4ea8961ef047371eee15d2afe": { "authors": [ { "ids": [ "32219380" ], "name": "Sanjeev Sondur" }, { "ids": [ "34643286" ], "name": "Madhurima Ray" }, { "ids": [ "32273851" ], "name": "Joyanta Biswas" }, { "ids": [ "1716613" ], "name": "Krishna Kant" } ], "doi": "10.1109/IGCC.2017.8323566", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323566", "entities": [ "Backlink", "Backplane", "Backplane Device Component", "Communication endpoint", "Controllers", "Data center", "Decision", "Duplex (telecommunications)", "Fat tree", "Holism", "NOONAN SYNDROME 3", "Platelet Glycoprotein 4, human", "Power management", "Router (computing)", "Semiconductor consolidation", "Simulation", "Simulators", "Sleep mode", "Software-defined networking", "TRANSMITTER (Medical Device)", "Transmitter", "Two-port network" ], "id": "32d8d4ec335e53f4ea8961ef047371eee15d2afe", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "c1d4d3b2dab80ac3eb4a0a537696248adabd1922", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "d00a718acf86b02977489c9cd323c22f322c547b", "306c1c1c05e9fb8db5ad4d0b4e715073f54de6fc", "019a591f7edebf3d06817d4f9b1d5a82bf78c085", "1369ecf67ce019c08d5e52e5e09b57f2b245e640", "4b891a1e8c785463de16be3e953d7094f9f4de43" ], "paperAbstract": "In this paper, we present an enhanced and holistic energy model for the widely used Network Simulator, NS3. As computing becomes more energy efficient, data movement, and hence data center networks consume an increasing percentage of the total energy consumption and it is important to provide energy management capabilities in the network simulators. Our enhanced NS3 simulator supports the use of different sleep modes for each network port on a switch/router and endpoint, and backplane on switch/router in order to reduce the network energy consumption. It also supports two port level power management mechanisms — unidirectional (transmitter only) and bidirectional (transmitter and receiver). Moreover, the simulator supports local consolidation that consolidates traffic across multiple outgoing/incoming links into a node, and a global controller that monitors all links in the network via a SDN-like mechanism and helps make better local consolidation decisions. The implemented capabilities are illustrated by applying them to the popular fat-tree based data center network. It is shown that the local consolidation mechanism coupled with bidirection port-level energy management can reduce the network energy consumption substantially.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323566" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32d8d4ec335e53f4ea8961ef047371eee15d2afe", "sources": [ "DBLP" ], "title": "Implementing data center network energy management capabilities in NS3", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "32d8e555441c47fc27249940991f80502cb70bd5": { "authors": [ { "ids": [ "3469125" ], "name": "Congzheng Song" }, { "ids": [ "1707461" ], "name": "Thomas Ristenpart" }, { "ids": [ "1723945" ], "name": "Vitaly Shmatikov" } ], "doi": "10.1145/3133956.3134077", "doiUrl": "https://doi.org/10.1145/3133956.3134077", "entities": [ "Adversary (cryptography)", "Algorithm", "Black box", "Computer vision", "Convolutional neural network", "Facial recognition system", "Internet Movie Database (IMDb)", "Machine learning", "Matrix regularization", "Predictive modelling", "Standard ML", "Test set" ], "id": "32d8e555441c47fc27249940991f80502cb70bd5", "inCitations": [ "6bc565939f5ff4d96cbfe502dd5fa539098d309a", "088aabe3da627432fdccf5077969e3f6402f0a80", "b3f2a11d45757e675be123d55ec0eb192bcca990", "5f3101a9ba19e618c3e05c70ecdba63c1c6a3f8d", "6888f3402039a36028d0a7e2c3df6db94f5cb9bb", "71a1bc401c7e11b60f830b800c32c86936cc5b15", "6acd95817e6ccbb9376194d84a846964033f1ed0", "cf7e5a59cbe6fa10840a2f5e1c21adadc843d401", "d8c6b7644923692bf460bde0b7720708a82666fc", "616e94334177eb1e330115f19d02416709a3e373" ], "journalName": "", "journalPages": "587-601", "journalVolume": "", "outCitations": [ "0a7f6563c30dc276c4dec2c278dec086a91bee33", "37bbe6d64cb4ff9ad546bfa36b0512f580bc6bf8", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "16358a75a3a6561d042e6874d128d82f5b0bd4b3", "10eb7bfa7687f498268bdf74b2f60020a151bdc6", "162d958ff885f1462aeda91cd72582323fd6a1f4", "0a7fb47217e6d0e3b80159bc4f9e02a50ea1f391", "24f1795f8d9b0a5696439e09bb6d6d0a838008ec", "6f006a3895dd8fb24f83235a67f2fe72418aa800", "0279d698fbe6a3dd05893f69880019cad2b68014", "370b5757a5379b15e30d619e4d3fb9e8e13f3256", "0756d1e7ed9e0d20f0c6e7cfbebfc7153db8d3a1", "03309aff49ff59c89b72e59d91cb8fd93cfbc837", "30c9bb327b7f2b9f1d1e5b69b9d0c97b410948d9", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "326bb49d3ae9e1e1551028200916192e50004105", "24e6cf0796237f21c780a3f0c996817f57b3a1bd", "47dd6b9d9cedbe2526ad22a01ca4fea1025e07d1", "8a29510d57bed4b33bde2ea7b6beb8c8a1950b92", "c641fb8f6ff1ac0c6d0d4ad9fbb7d50d8464729b", "fd2711cfe890675e8d885df88f3f76b5be5b39a6", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "572dd2d5d75227bb878430c9375b9be92cc7e6e9", "9f1f065bf08cd90431cc051267a708f56436cd82", "02fdc2743f6c5ddddc39af8d3af1f04e301e17ef", "2d3482dcff69c7417c7b933f22de606a0e8e42d4", "27da8d31b23f15a8d4feefe0f309dfaad745f8b0", "1bb07c114cb447552d36a95445cc207f496d85aa", "3ec2838f52bdefab699e6b364bc73126dd27d79f", "40d68c0011958b9a990c9df65414fcf4fd539c72", "f63487b3fda2d96d8b3e97391448c76e00f2353c", "4cbaac7455ac02f2b5d4d266d3dc6788ee56cc83", "0d3bb75852098b25d90f31d2f48fd0cb4944702b", "7b1cc19dec9289c66e7ab45e80e8c42273509ab6", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "4af182338ee63754d4569c26cb6a5c3bbdd8cf2a", "ebab687cd1be7d25392c11f89fce6a63bef7219d", "7cdf1c29cb63423c9638dd4f5620956b3fe80d11", "05aba481e8a221df5d8775a3bb749001e7f2525e", "4e4d59de8e85f5934acb7eb34149c749e235509e", "05ec11e91b834eb14c4ab6a4d70fe5eda5929048", "759a3b3821d9f0e08e0b0a62c8b693230afc3f8d", "4afc353a68ce5cc9e17febaa3199da43ba549840", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "032d59d75b26872d40081fb40d7a81c894455d91", "bff8252c3d7a2557e8a4bbbc94079d23c7c8d9fd", "384ac22ddf645108d085f6f9ec6d359813776a80", "5d90f06bb70a0a3dced62413346235c02b1aa086", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "9a53abcd90ce847ba776bc933b19f77e698b020a", "3c50f1652c97fb3f032b864750c2982704727e93", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "3ff65f0ff04525283c51d03b5b4b71a1bafdca94", "1bd10813ade534b5500e92600d909bacb514138d", "01fcae344d2edb715bcc63a40b6052c0331741bd", "1c61f9ef06fe74505775a833ff849185757199e7" ], "paperAbstract": "Machine learning (ML) is becoming a commodity. Numerous ML frameworks and services are available to data holders who are not ML experts but want to train predictive models on their data. It is important that ML models trained on sensitive inputs (e.g., personal images or documents) not leak too much information about the training data.\n We consider a malicious ML provider who supplies model-training code to the data holder, does \\emph{not} observe the training, but then obtains white- or black-box access to the resulting model. In this setting, we design and implement practical algorithms, some of them very similar to standard ML techniques such as regularization and data augmentation, that \"memorize\" information about the training dataset in the model\\textemdash yet the model is as accurate and predictive as a conventionally trained model. We then explain how the adversary can extract memorized information from the model. We evaluate our techniques on standard ML tasks for image classification (CIFAR10), face recognition (LFW and FaceScrub), and text analysis (20 Newsgroups and IMDB). In all cases, we show how our algorithms create models that have high predictive power yet allow accurate extraction of subsets of their training data.", "pdfUrls": [ "https://arxiv.org/pdf/1709.07886v1.pdf", "http://arxiv.org/abs/1709.07886", "http://www.cs.cornell.edu/~shmat/shmat_ccs17.pdf", "http://doi.acm.org/10.1145/3133956.3134077" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/32d8e555441c47fc27249940991f80502cb70bd5", "sources": [ "DBLP" ], "title": "Machine Learning Models that Remember Too Much", "venue": "CCS", "year": 2017 }, "330d450b626c6716ebdafb5642560905b526d54c": { "authors": [ { "ids": [ "7368358" ], "name": "William Spoth" }, { "ids": [ "3275836" ], "name": "Bahareh Sadat Arab" }, { "ids": [ "8926608" ], "name": "Eric S. Chan" }, { "ids": [ "3241986" ], "name": "Dieter Gawlick" }, { "ids": [ "2319764" ], "name": "Adel Ghoneimy" }, { "ids": [ "1798930" ], "name": "Boris Glavic" }, { "ids": [ "2632725" ], "name": "Beda Christoph Hammerschmidt" }, { "ids": [ "2043556" ], "name": "Oliver Kennedy" }, { "ids": [ "3409770" ], "name": "Seokki Lee" }, { "ids": [ "39789752" ], "name": "Zhen Hua Liu" }, { "ids": [ "2067901" ], "name": "Xing Niu" }, { "ids": [ "39897670" ], "name": "Ying Yang" } ], "doi": "", "doiUrl": "", "entities": [ "Context-sensitive language", "Data curation", "Database", "Probabilistic database", "Programming paradigm", "Relational database", "Workspace" ], "id": "330d450b626c6716ebdafb5642560905b526d54c", "inCitations": [ "bb03895e9c72854ca9facf0874d08c22a577ead7", "06b4a124db7553211edc7e96beff42b72d6b1e24", "1a9af9093012ffa916d73a24d288c022ecc4e205", "8c4e1daaf9de552a78e74b1e85d0e412a81787f7" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3f2a520607bdf271a0472a17d504f9926ccda331", "941e319b09438e3b39ddf8469f5ff04c49f82054", "96531057874ad205c2ca3fc097082325c88d2599", "01722e1e5d304c5832e2402bb332ab1f338aee9d", "022e367b68958d6818e6f6b688970caec7155ea6", "1ec2d02bd12f3a357449cf1bbc67b6adf7cd6296", "5fec451ab851aada6379a0f9d868ee740b5d2fb3", "5b40fda490cca073e47f16a36d22c0b6e71893a2", "3459fa3493ec2b400a4230ad6b0d73329be8fea0", "213cb7593934bc675c336f53dd6c61a3c799be80", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "0332d94ae4b198df2b8a7a0da46ade1e371dcff8", "4710d3f1da775c99a4facc96a022738ff63b61e8", "9045fdf362098d725db0ddc4386cde5625bd1b40", "5d5df06316ee3b06797f4d739177bb133923663c", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "ac0c8a350db74ecbefc190eecc067c2363da39a4", "2654970f704c7c450a05c41c8d4adc2f8b0a5028", "151673abe01271dc3fc37725c02e95e7970f3bed" ], "paperAbstract": "The rigid schemas of classical relational databases help users in specifying queries and inform the storage organization of data. However, the advantages of schemas come at a high upfront cost through schema and ETL process design. In this work, we propose a new paradigm where the database system takes a more active role in schema development and data integration. We refer to this approach as adaptive schema databases (ASDs). An ASD ingests semi-structured or unstructured data directly using a pluggable combination of extraction and data integration techniques. Over time it discovers and adapts schemas for the ingested data using information provided by queries and user-feedback. In contrast to relational databases, ASDs maintain multiple schema workspaces that represent individualized views over the data which are fine-tuned to the needs of a particular user or group of users. A novel aspect of ASDs is that probabilistic database techniques are used to encode ambiguity in automatically generated data extraction workflows and in generated schemas. ASDs can provide users with context-dependent feedback on the quality of a schema, both in terms of its ability to satisfy a user\u2019s queries, and the quality of the resulting answers. We outline our vision for ASDs, and present a proof-of concept implementation as part of the Mimir probabilistic data curation system.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p84-spoth-cidr17.pdf", "https://odin.cse.buffalo.edu/papers/2017/CIDR-ASDs-submitted.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/330d/450b626c6716ebdafb5642560905b526d54c.pdf", "s2Url": "https://semanticscholar.org/paper/330d450b626c6716ebdafb5642560905b526d54c", "sources": [ "DBLP" ], "title": "Adaptive Schema Databases", "venue": "CIDR", "year": 2017 }, "331281c3339440afadd08fa7501c2048bba3197c": { "authors": [ { "ids": [ "10419477" ], "name": "Amir Gholami" }, { "ids": [ "3144543" ], "name": "Andreas Mang" }, { "ids": [ "8453654" ], "name": "Klaudius Scheufele" }, { "ids": [ "1740714" ], "name": "Christos Davatzikos" }, { "ids": [ "10837693" ], "name": "Miriam Mehl" }, { "ids": [ "2395747" ], "name": "George Biros" } ], "doi": "10.1145/3126908.3126930", "doiUrl": "https://doi.org/10.1145/3126908.3126930", "entities": [ "Cubic Hermite spline", "Cubic function", "Discretization", "Fast Fourier transform", "Image analysis", "Image registration", "Interpolation", "Medical image computing", "Medical imaging", "Population dynamics", "Scalability", "X86" ], "id": "331281c3339440afadd08fa7501c2048bba3197c", "inCitations": [ "a72d44335734307f342124c85c84b15b9024626e" ], "journalName": "", "journalPages": "19:1-19:13", "journalVolume": "", "outCitations": [ "69507532348cbd60ea29e1c4b6aa00173423666e", "c24bf8f00610880ebf2209171a76c5d3259bf23f", "051b542ba631c3de701f2a6ef0188a97d3a121f7", "080375256438edb90b3129967b3159e289657264", "6f7ab17073172acd5eaacd867c0785082825dc20", "7f943cd3dbe42a7bfb97308e8627b3b561a62fef", "10e2e296f2f4990e6d260edef41fa4e4a4d34f1b", "086e763b63dc1d81f801773575cc1eaf9dbd2d04", "0e14a23f020314048b33606717b472cab2429e99", "ef87b11d0b3e69b200b970951da3653889de1d16", "149f8838b2d0d2901173fcab9e9039fff98944ea", "5cc81fdcff86bc0f43f11759297ee8b5be49a34f", "04a1079f1205ab0f782ba214443e897a0d7d7d87", "2fd3f826e25cf863dc7c1f59f7d0f2ef0d71f2e8", "4be83c23f60e48009a9f627a110f533caba00c0c", "3131ad55a80f39ce1974860fabd3b62d45f2fc99", "2ea8110ea990d1fa786e189ade60ae69ebb6a3e9", "8d801b624240589264d5bfeefe6364aaa4547d16", "4893db523d69625df8cd6d87bc798d165908ee23", "05562c109791c316c66b2b84b0cee4733d9f36b0", "1e597e6e69e7cf5914b8817420ea0bd95ac86545", "7e548850d86a15d0bcb72f4501a44777131eb8ca", "b723aa0098da71175c6396add30e1528b29123df", "fb1d63bae118ea0d314348c36f1ab82e286948ac", "0233f40158f293c2e5711e287d665b66c3225b18", "43edeb71f44b024cc08cd0092a13a7f72119c0b3", "6ed93060d03c721e37863c8dc49680097277bf96", "b12947a52414c87bcdc6efc25baa27caf9a3d99b", "012de07cb6de28663ace71b84546f97cfbeddbb8", "8349f4093827d1aeb2d8690a3292013dc89c029e", "6fa56633781586752a62b5df8292eab92977fda1", "696152f0d39dc268c2bb7f132d133020af801143", "3643c8cfec4c9dc722cf701b8c5164f93d02bd96", "eae9d628725686c2b491fc891434290e4d2bdda4", "b2a0b0cf96ec1d3f08be025cd777a12f949c543f", "84ff181750f57f25237e50704b186b22450d1ddf", "c746fa5bea4ef651ac74ff6f5df4469d4237c7c3", "56404c8bdc6239697bc427ad37451d14a7ff4547", "98c87085d69e2ce5891d3afbb4467f42690853f1", "aec60672e5ee9edd8606a5d2949e17dfbd01c863", "3a35746fa5e848c785ad9b9d986d58f5a951bf5e", "42d70040b8fcca1c572c4c984f2b580304d96eb3", "0cc4baae4f621e64826534c575d3870d6f49d37d", "154300ee04427f1b2e931ecc432eaf7f3bf01d9c", "6e612c9ec742ec7d5ef523bd656b6200c0bb71ca", "03b39cb714f9e24e1883870f99398629ecf37166", "2dce1954d34521ac9be7a8c3baae537a7f27ec64" ], "paperAbstract": "We present SIBIA (Scalable Integrated Biophysics-based Image Analysis), a framework for coupling biophysical models with medical image analysis. It provides solvers for an image-driven inverse brain tumor growth model and an image registration problem, the combination of which can eventually help in diagnosis and prognosis of brain tumors. The two main computational kernels of SIBIA are a Fast Fourier Transformation (FFT) implemented in the library AccFFT to discretize differential operators, and a cubic interpolation kernel for semi-Lagrangian based advection. We present efficiency and scalability results for the computational kernels, the inverse tumor solver and image registration on two x86 systems, Lonestar 5 at the Texas Advanced Computing Center and Hazel Hen at the Stuttgart High Performance Computing Center. We showcase results that demonstrate that our solver can be used to solve registration problems of unprecedented scale, 40963 resulting in ∼ 200 billion unknowns---a problem size that is 64X larger than the state-of-the-art. For problem sizes of clinical interest, SIBIA is about 8X faster than the state-of-the-art.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126930" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/331281c3339440afadd08fa7501c2048bba3197c", "sources": [ "DBLP" ], "title": "A framework for scalable biophysics-based image analysis", "venue": "SC", "year": 2017 }, "33136ce44ea4127275a118e176679a9cfc1d70e2": { "authors": [ { "ids": [ "2647263" ], "name": "Renan Fischer e Silva" }, { "ids": [ "2410518" ], "name": "Paul M. Carpenter" } ], "doi": "10.1109/CLUSTER.2017.19", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.19", "entities": [ "Acknowledgement (data networks)", "Active queue management", "Apache Hadoop", "Data center", "Electroconvulsive therapy", "Encoding Control Notation", "Explicit Congestion Notification", "High-throughput computing", "IP Multimedia Subsystem", "Network congestion", "Network switch", "Protocol Buffers", "Throughput" ], "id": "33136ce44ea4127275a118e176679a9cfc1d70e2", "inCitations": [ "9c145728e98d7da6352af2cd2ae3d900bf90820c" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "349-353", "journalVolume": "", "outCitations": [ "1eddf92320697dbaae59cb84fafd5af73e0fc865", "abc339bb2aaa988a15eb891f418dde7d95c6337f", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "4db4c8a3688d54a14c417af4795620a976de7b90", "063e13ef8dbe06b69ecd07988898c102532a7458", "de17cf40a4db13315c631c597959ae26f691f2fa", "9c145728e98d7da6352af2cd2ae3d900bf90820c", "426b5989c089ac3ba5c28ae339bbd51ad2439859", "2c153242eb1a02a3417f2eddb8373d109d309fe0", "779030e48938c6c04087bf682bc630cb02a65fda", "06db78ece7ba41bccab5df77240541e32cffd623", "3c66994ac5c16064132e3f241b0fec97092e6164", "418958e8ffe65d47548c9f198689c85311ebc7bb", "15ab8257535d40442516654494a92be5ca97d5dc" ], "paperAbstract": "Various extensions of TCP/IP have been proposed to reduce network latency; examples include Explicit Congestion Notification (ECN), Data Center TCP (DCTCP) and several proposals for Active Queue Management (AQM). Combining these techniques requires adjusting various parameters, and recent studies have found that it is difficult to do so while obtaining both high performance and low latency. This is especially true for mixed use data centres that host both latency-sensitive applications and high-throughput workloads such as Hadoop.This paper studies the difficulty in configuration, and characterises the problem as related to ACK packets. Such packets cannot be set as ECN Capable Transport (ECT), with the consequence that a disproportionate number of them are dropped. We explain how this behavior decreases throughput, and propose a small change to the way that non-ECT-capable packets are handled in the network switches. We demonstrate robust performance for modified AQMs on a Hadoop cluster, maintaining full throughput while reducing latency by 85%. We also demonstrate that commodity switches with shallow buffers are able to reach the same throughput as deeper buffer switches. Finally, we explain how both TCP-ECN and DCTCP can achieve the best performance using a simple marking scheme, in constrast to the current preference for relying on AQMs to mark packets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33136ce44ea4127275a118e176679a9cfc1d70e2", "sources": [ "DBLP" ], "title": "High Throughput and Low Latency on Hadoop Clusters Using Explicit Congestion Notification: The Untold Truth", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "3332b43767d69691e226d22ef2e2f2bd459c9b57": { "authors": [ { "ids": [ "9279606" ], "name": "Yuxin Su" }, { "ids": [ "1706259" ], "name": "Irwin King" }, { "ids": [ "1681775" ], "name": "Michael R. Lyu" } ], "doi": "10.1145/3077136.3080828", "doiUrl": "https://doi.org/10.1145/3077136.3080828", "entities": [ "Algorithm", "Experiment", "Feature vector", "Learning to rank", "Machine learning", "Relevance", "Stylish" ], "id": "3332b43767d69691e226d22ef2e2f2bd459c9b57", "inCitations": [], "journalName": "", "journalPages": "45-54", "journalVolume": "", "outCitations": [ "91ffba783a71800e6af6969f4199b68cc9951e4a", "b8ee760984ad10f8cdd8b9fa3883d06643c06b54", "138b08d82dda6f56053a393836de4728d3a85709", "2704a9af1b368e2b68b0fe022b2fd48b8c7c25cc", "8e56842627add6db2646eeabfb3d28efed820e49", "4ceb34fe6d20d271048306128896eaa8256149ae", "62d2be041702fe9fb9828f87ce168ad6dd09b026", "4ccbc1a02811fdf83dd081be1e6f5baf08500369", "263808c77c6c5cf4925bc9b3912c7deebbc24503", "133b08c4c3d707df3846a566387bbb8c93c8c548", "501789f894bb75b6c80eab778ddac84381207180", "4fc6da3829b6b881e3d7d4ac029fb121bf257003", "b88ea5081557d69e91b567fdefa7f20f6e9eef48", "0932d7952a1a7c07c2ffa3ad89b0b827e24d6a9e", "684be9e9bd41d148158c64ba811c08f66b58092a", "7a11809fc4dad4cda72c20bab32bee8b0f6b6fa9", "2f42f117f8ef8bcf3ec11bae394541724b064de4", "13b3a1e7e0bd80bf38b444ce1568213ebe98d0df", "451de5fc736bfa3284b1ae5c5d9c0e1fb1c2f0c8", "8490234d79b47e459824dcf87c1e288211a3c964", "0a0d6c886d669a18874bf70490cfbd3e8e29d12d", "0d97ee4888506beb30a3f3b6552d88a9b0ca11f0", "f189f55077d0fe9e8d0b9586ffb3b6f33682b844", "4ee4fc69240ee6c062aeafbede9e96aa0a8596bf", "0418df588620d6c8d189f781e95d975df95d2280", "4fffbf5406482305d9adcf8e24887e6f1773027a", "0df9c70875783a73ce1e933079f328e8cf5e9ea2", "4f0d5cbcd30fef3978b9691c2e736daed2f841c1", "2d225c17e30df3cf65bd4d64cd91206f5e56821e", "3cf70be8b97e5831eb7c701130de618decdd4ff5", "48886ea4ee14f0151f186207e1b9ad1d947e83ef", "155f4899a4904ebdb06f166e625ff3e4618ed371", "29d591806cdc6ef0d580e4a21f32e5ad9d09d148" ], "paperAbstract": "Many learning-to-rank~(LtR) algorithms focus on query-independent model, in which query and document do not lie in the same feature space, and the rankers rely on the feature ensemble about query-document pair instead of the similarity between query instance and documents. However, existing algorithms do not consider local structures in query-document feature space, and are fragile to irrelevant noise features. In this paper, we propose a novel Riemannian metric learning algorithm to capture the local structures and develop a robust LtR algorithm. First, we design a concept called ideal candidate document to introduce metric learning algorithm to query-independent model. Previous metric learning algorithms aiming to find an optimal metric space are only suitable for query-dependent model, in which query instance and documents belong to the same feature space and the similarity is directly computed from the metric space. Then we extend the new and extremely fast global Geometric Mean Metric Learning (GMML) algorithm to develop a localized GMML, namely L-GMML. Based on the combination of local learned metrics, we employ the popular Normalized Discounted Cumulative Gain~(NDCG) scorer and Weighted Approximate Rank Pairwise~(WARP) loss to optimize the ideal candidate document for each query candidate set. Finally, we can quickly evaluate all candidates via the similarity between the ideal candidate document and other candidates. By leveraging the ability of metric learning algorithms to describe the complex structural information, our approach gives us a principled and efficient way to perform LtR tasks. The experiments on real-world datasets demonstrate that our proposed L-GMML algorithm outperforms the state-of-the-art metric learning to rank methods and the stylish query-independent LtR algorithms regarding accuracy and computational efficiency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080828", "https://arxiv.org/pdf/1705.07563v1.pdf", "http://www.cse.cuhk.edu.hk/lyu/_media/publications/yxsu_sigir17.pdf?cache=cache&id=home", "http://www.cse.cuhk.edu.hk/lyu/_media/conference/yxsu_sigir17.pdf?cache=cache&id=students:phd", "http://arxiv.org/abs/1705.07563" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3332b43767d69691e226d22ef2e2f2bd459c9b57", "sources": [ "DBLP" ], "title": "Learning to Rank Using Localized Geometric Mean Metrics", "venue": "SIGIR", "year": 2017 }, "3343cbbdbf1de4b4caf73834304e2b9c641ee17a": { "authors": [ { "ids": [ "2487014" ], "name": "Yu Feng" }, { "ids": [ "1697444" ], "name": "Osbert Bastani" }, { "ids": [ "1872924" ], "name": "Ruben Martins" }, { "ids": [ "1714075" ], "name": "Isil Dillig" }, { "ids": [ "33209907" ], "name": "Saswat Anand" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Android", "Antivirus software", "Approximation algorithm", "Dataflow", "Malware", "Message sequence chart", "Signature block", "Static program analysis", "Zero-day (computing)" ], "id": "3343cbbdbf1de4b4caf73834304e2b9c641ee17a", "inCitations": [ "155635ebe2acb2b0bad12768f954ddceb4fb5f86", "f9de687d1d04171cb84937b4cdb4fa1a64070971", "20bc9af5e7265c128f77a16c4ca7b7a68b4e4eea", "69673c05fdfccaae3da809df53ff6d75094342ae" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1608.06254", "outCitations": [ "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "2c0ff4497579f2edb83648c4d2153dfc37be2ccf", "9e1bcd6414fc6fdd3b63aab48cc3732dc761f538", "29ef615b4d2e5d79bbe1dbf99de0a49f2a97c4d2", "d1875bc7fb14c0db150ce3c4826240b39a1f2834", "023f23c300804754753cb11db51fb7f582556ab7", "0ee3be6abce231c9e5b2a9102f0ff180e3ba0606", "190e76829af738652132dd181f6c358b97f71531", "4584c6acc71816a8d13d1b741ab88fbb2195832c", "0c216f20a00819d9cb88adb57e478536cc43a13c", "4a4c0cfc26020d519679a98fe683fce6aab1eefa", "a48994cf1474d22be671e6e53f4ce8da6634f33e", "b865e337b25d00abb427d4126a1cde456a99c4dc", "17c83a2bae9430ca890ad5fec1b207ee7e2d07ca", "12f3e3e02788bbd39e8892814574a6296d577d23", "6ab76471696ce227e4178ca73bb86cc62520c547", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "0a7267f1088ee19d3a6d6105d633c0c93ea4cb6b", "421c305a0d2773d1132d9539e42d1f1337f1600a", "265b6313093a3c5ea4a5c75096592739f2999f05", "27110e4f97261faacea32e9567ed7a6aa2b34757", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "05457985ed22d043e561043951753f959017ee90", "0cb4ed5d73b4885f05facfa6aee45bdcdec1847e", "1ba779d5a5c9553ee8ecee5cf6bafb4b494ea7bc", "53522eba23e195d778f590edf925670bb6305a61", "2d78d4e22f8c87f1e8f2202aea97e5795e0ab216", "1ac58ab550f1f8f075b373211d76371d52979ce6", "8ab305c52000ef4c4fe2ed8f354ea41aba2a4957", "1f72657906b810324a68e7f9f305e9bb176d7830", "14490c37be179400c86cf89aac7c9272dddf60e7", "0f16f6f478b5c788dce466eb50e36c612273c36e", "41289566ac0176dced2312f813328ad4c0552618" ], "paperAbstract": "This paper proposes a technique for automatically learning semantic malware signatures for Android from very few samples of a malware family. The key idea underlying our technique is to look for a maximally suspicious common subgraph (MSCS) that is shared between all known instances of a malware family. An MSCS describes the shared functionality between multiple Android applications in terms of inter-component call relations and their semantic metadata (e.g., data-flow properties). Our approach identifies such maximally suspicious common subgraphs by reducing the problem to maximum satisfiability. Once a semantic signature is learned, our approach uses a combination of static analysis and a new approximate signature matching algorithm to determine whether an Android application matches the semantic signature characterizing a given malware family. We have implemented our approach in a tool called ASTROID and show that it has a number of advantages over state-of-theart malware detection techniques. First, we compare the semantic malware signatures automatically synthesized by ASTROID with manually-written signatures used in previous work and show that the signatures learned by ASTROID perform better in terms of accuracy as well as precision. Second, we compare ASTROID against two state-of-the-art malware detection tools and demonstrate its advantages in terms of interpretability and accuracy. Finally, we demonstrate that ASTROID\u2019s approximate signature matching algorithm is resistant to behavioral obfuscation and that it can be used to detect zero-day malware. In particular, we were able to find 22 instances of zero-day malware in Google Play that are not reported as malware by existing tools.", "pdfUrls": [ "http://www.internetsociety.org/sites/default/files/ndss2017_03B-2_Feng_paper.pdf", "http://www.cs.utexas.edu/~yufeng/papers/ndss17-astroid.pdf", "http://arxiv.org/abs/1608.06254", "http://www.cs.utexas.edu/users/isil/ndss17-astroid.pdf", "https://obastani.github.io/docs/ndss17.pdf", "http://www.cs.utexas.edu/~isil/ndss17-astroid.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_03B-2_Feng_paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/automated-synthesis-semantic-malware-signatures-using-maximum-satisfiability/", "https://arxiv.org/pdf/1608.06254v2.pdf", "https://stanford.edu/~obastani/docs/ndss17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6dac/bb7eaf6c1a883270de0efc2b130f18799f44.pdf", "s2Url": "https://semanticscholar.org/paper/3343cbbdbf1de4b4caf73834304e2b9c641ee17a", "sources": [ "DBLP" ], "title": "Automated Synthesis of Semantic Malware Signatures using Maximum Satisfiability", "venue": "NDSS", "year": 2017 }, "33445fdc18f34429820342a9290dfdb687158736": { "authors": [ { "ids": [ "40475627" ], "name": "Benjamin P. Wood" }, { "ids": [ "3319406" ], "name": "Man Cao" }, { "ids": [ "2864858" ], "name": "Michael D. Bond" }, { "ids": [ "8319903" ], "name": "Dan Grossman" } ], "doi": "10.1145/3133893", "doiUrl": "https://doi.org/10.1145/3133893", "entities": [ "Adaptive optimization", "Algorithm", "Atomicity (database systems)", "Compare-and-swap", "Complex adaptive system", "Dynamic data", "Java virtual machine", "Jikes", "Microsoft Windows", "Naivety", "Race condition", "SWAP (instrument)", "Selection bias", "Temporal logic", "Thread-local storage", "Virtual machine" ], "id": "33445fdc18f34429820342a9290dfdb687158736", "inCitations": [], "journalName": "PACMPL", "journalPages": "69:1-69:31", "journalVolume": "1", "outCitations": [ "5b3e9430856b3f1147339a3ac1f5d25aac3b04e3", "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "3296ac52e3e3be506de6f7847bba979788b8df56", "0a92088c1cf7463ed5d347d2624976e0126ffced", "059697e0824d06a43321a9f9d7450da9cc4dc0a8", "eb509e32c85e0aa89955b55f18c7aa3cba4aca4d", "07ab0964c6afca7fec1d1a00df9375de2ae26e1e", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "98493b6182dfc19be77f7532bd26fec2ae3d9545", "dad0df622554ad578af806cc7b57ff5e210460c0", "9e562fa998619a05b5f2b43a884b87fab680b762", "17abd45e0a36cd05d70584c38142be06a77971f4", "1152f6aaf760355ae3c08ff6f37c442aeed31dc4", "3eae0271717f6b4d65024abf04e5d98aef41d748", "11bd8d0769616d29dbb7662db28cd179219b2a0a", "024ecd71116a7438b3eba7a97de9f428d1933ccd", "91cdb8e8e9f3bb85a993b57a6309dbee9a7ec298", "012f8e43e7973c8fad3c9a48b4dd7be773c770d1", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "2a974da13d6f956e37549378e00f86aa54bc5642", "430f66819f758f6a84aaac4b5f516f9ee4861482", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "1d54c70351e9ee93b87273b2e93750c89e32256f", "02140b856a0a946e64645aa232d8e244e5a683fb", "0c28172b2f30ac9d41f2cb4b470f926771ac7fa8", "4e624272a61a228bcf9565b0e48e86ae3936db80", "05927a36ff88960d1624a95aabc25bd781ad1275", "3371781698dbd3d3e78477af7528530024b828f8", "0773a61ea05e35b4b29360e67c4963f5d2e610dd", "6b6033d761558ad3867a91821f73c6a0deb66309", "1ee37e813203018a4f2124e7a87c9430bc5c3fb1", "1ef3d10196d91aed5939009846bd7ab3a5e3f8e8", "5e870d5430c1287081f063fb510d7b4256b72bf4", "87bf9c81ed472f79e067d59db244f7c8870735d6", "6b5eeb5a017de5758e9773b52b0292cfc987ce3d", "b22122f79ec2812fd6b32308b4acff500de0e5dd", "3e2ec0aad456f8e88abcbb4736d4114d568fc9ed", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "00a9ba0063d34ec56792849a67ef57b4601becbb", "3ca0b8fe78bb0af6acf52c522983c77424a66c96", "40f80ad8e81dcfbf7c19d864f6ef44c279a99a58", "aaa647117dbf4fec6f2734c93c112fcac880d26a", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "1a5fccfe478d455a8cd2ffe2f7bbebd93b810b17", "884749059cae01a003a4f0d9011df3d4ab7dd166", "79c163d6aa3f1a14e64d4288995b0ae76d5e6b4c", "356a2d9859520c9161d67828d45e758a24ecce20", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "42524667961442587a9eac9b6612d8eb7690f0e6", "86ed165adcfd254b511ff1bbb912cad65d45f0d6", "67eeaaa45793098e8923fc74cc6c1d51aa33f27b", "8a0af8ae748210ef571d074362b552af571e6d33", "05a618847e4f08e5bca29dff732757779722b2e0", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "7673efb3f26da627247838b427ae3980d97689be", "8b44b7a9849307acc217772c7240ceb87883bee3", "4979b94ae5ca344ac4a7c30e86a4ff10e5ca13b0", "0a44e8cd34a110ec4ed7221b0431694172eadda8", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "7ab605ff3ced95d816286dd0c1f1c42ac2deeef7", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "1d4e73d76027c2c7876f9cbf2d892ae4564f51ba" ], "paperAbstract": "This paper presents Fast Instrumentation Bias (FIB), a sound and complete dynamic data race detection algorithm that improves performance by reducing or eliminating the costs of analysis atomicity. In addition to checking for errors in target programs, dynamic data race detectors must introduce synchronization to guard against metadata races that may corrupt analysis state and compromise soundness or completeness. Pessimistic analysis synchronization can account for nontrivial performance overhead in a data race detector. \n The core contribution of FIB is a novel cooperative ownership-based synchronization protocol whose states and transitions are derived purely from preexisting analysis metadata and logic in a standard data race detection algorithm. By exploiting work already done by the analysis, FIB ensures atomicity of dynamic analysis actions with zero additional time or space cost in the common case. Analysis of temporally thread-local or read-shared accesses completes safely with no synchronization. Uncommon write-sharing transitions require synchronous cross-thread coordination to ensure common cases may proceed synchronization-free. \n We implemented FIB in the Jikes RVM Java virtual machine. Experimental evaluation shows that FIB eliminates nearly all instrumentation atomicity costs on programs where data often experience windows of thread-local access. Adaptive extensions to the ownership policy effectively eliminate high coordination costs of the core ownership protocol on programs with high rates of serialized sharing. FIB outperforms a naive pessimistic synchronization scheme by 50% on average. Compared to a tuned optimistic metadata synchronization scheme based on conventional fine-grained atomic compare-and-swap operations, FIB is competitive overall, and up to 17% faster on some programs. Overall, FIB effectively exploits latent analysis and program invariants to bring strong integrity guarantees to an otherwise unsynchronized data race detection algorithm at minimal cost.", "pdfUrls": [ "https://cs.wellesley.edu/~bpw/research/files/fib-oopsla2017.pdf", "http://doi.acm.org/10.1145/3133893", "https://homes.cs.washington.edu/~djg/papers/fibpaper.pdf", "https://repository.wellesley.edu/cgi/viewcontent.cgi?article=1167&context=scholarship" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33445fdc18f34429820342a9290dfdb687158736", "sources": [ "DBLP" ], "title": "Instrumentation bias for dynamic data race detection", "venue": "PACMPL", "year": 2017 }, "334ec6e57110ece9f482f9ec2e85412b0be8072a": { "authors": [ { "ids": [ "2104538" ], "name": "Chia-che Tsai" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" }, { "ids": [ "2145730" ], "name": "Mona Vij" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "GNU Compiler Collection", "Graphene", "Hypervisor", "Library (computing)", "Linux", "Linux", "Loader (computing)", "Open-source software", "Operating system", "Out of the box (feature)", "R language", "Shim (computing)", "Task Control Block" ], "id": "334ec6e57110ece9f482f9ec2e85412b0be8072a", "inCitations": [ "b3f2a11d45757e675be123d55ec0eb192bcca990", "34fe0c6e91d2a6a2325f5057222c3fbf22224fe5", "8ca1436fe1e9bbdb39a92178fa80c7869d92573d", "4e6841a87f67a39d5b145f1dbc88000cca3b213d", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "4e48446aedda2ac568d0cbd4ddc2e08352388204", "b897c4c09b480f9934d5e9e4cfa2d540aaed522f", "287da0ab3c169c41433b0e5504161dfd1afbfa6c", "e41440cff90683629228b308a94e48c7af11ca36", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "ed84133ca8ef37a273d4b187202f55c6618b953e", "8569785f80712b5787e12b86a3870a28c0182b2c", "33ae35cc24ef4303979b479671c2065256e1b3a7", "38a54f9bbbfc46599770a28999365144a273783f", "b053033ad436cd404bb0eb2e75b3aac83b70d62c", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "21a402631dff504755e281934eaa90bc9dbe8ae9", "28dd40f411fc91cd9d5b72e6bd8d07de4b36c0ea" ], "journalName": "", "journalPages": "645-658", "journalVolume": "", "outCitations": [ "5b2092b54860f134f78b2ec884c910750def71e6", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "408ba239cece0308dbd180d86ee217d3c8d0b855", "565919855788bfcc7fbaad3006fe0f42c735b333", "05d6a284a55c07434325f8554e67741860e38c30", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "30f52a79ff53f8969ffcba19013b4a43e629875f", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "72880d15db2282512e5d3f0a3796b397d68cc7db", "c960d385a8467db3f282ce945d618bda1b53c4f0", "411b78b9d6fa52a656959cb356c3cd6b9325c8f2", "01d1575116b8aaacde1fd0e164a932b1ceffa04d", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "5cfc936d12bbd8a0f100687b12b20e406215f30a", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "3f4eef59703ff179e2faf08d04156d00acb1b352", "a23094f0ec3ea80481cbeb9484829a7fa8173d6c", "0a289fd7b14345822b1acda6d82750b15d59663e", "477bbcb5655a9c64893207bb49032e87c06a05f2", "32bc3a563f7cdb230ea267741cdb93a219a367b8", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "6cfe1e553cb48c7087bb61e80031c415978a4ede", "05f70f429a7bf38efa9e457fd486cb862bd495be", "3702d43d62b6154773d573594e21a39bbed93271", "0957332f8beb1ec4071fcb6fc44cb0b5396463d5", "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "3700aad5fab8a98dd8113d2c769a78b1cdc4e5f3", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "6b6fae57882fd193461fca64654107068ce9fd9a", "22d4b19f404b866d13ac6397c32855f108f82f8a", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "3b5b9cbd2f0cfd390eeb968c99266115cb2c9597", "1495c7daaba55dd2e68e026fc6c1848eee1ee710", "178fc755cef313f3231f1fba183570c02d5e471f", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "2d968ef0c5ad0cc6718e2f8b40ce7f4c323dbbdd" ], "paperAbstract": "Intel SGX hardware enables applications to protect themselves from potentially-malicious OSes or hypervisors. In cloud computing and other systems, many users and applications could benefit from SGX. Unfortunately, current applications will not work out-of-the-box on SGX. Although previous work has shown that a library OS can execute unmodified applications on SGX, a belief has developed that a library OS will be ruinous for performance and TCB size, making application code modification an implicit prerequisite to adopting SGX. This paper demonstrates that these concerns are exaggerated, and that a fully-featured library OS can rapidly deploy unmodified applications on SGX with overheads comparable to applications modified to use \u201cshim\u201d layers. We present a port of Graphene to SGX, as well as a number of improvements to make the security benefits of SGX more usable, such as integrity support for dynamically-loaded libraries, and secure multi-process support. Graphene-SGX supports a wide range of unmodified applications, including Apache, GCC, and the R interpreter. The performance overheads of GrapheneSGX range from matching a Linux process to less than 2\u00d7 in most single-process cases; these overheads are largely attributable to current SGX hardware or missed opportunities to optimize Graphene internals, and are not necessarily fundamental to leaving the application unmodified. Graphene-SGX is open-source and has been used concurrently by other groups for SGX research.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/tsai", "https://www.usenix.org/system/files/conference/atc17/atc17-tsai.pdf", "http://www.cs.unc.edu/~porter/pubs/graphene-sgx.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/334e/c6e57110ece9f482f9ec2e85412b0be8072a.pdf", "s2Url": "https://semanticscholar.org/paper/334ec6e57110ece9f482f9ec2e85412b0be8072a", "sources": [ "DBLP" ], "title": "Graphene-SGX: A Practical Library OS for Unmodified Applications on SGX", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "33531d2344c7b45316dd84b5b0f84f46c033c849": { "authors": [ { "ids": [ "2084223" ], "name": "Daniel Huang" }, { "ids": [ "1755724" ], "name": "Jean-Baptiste Tristan" }, { "ids": [ "1758821" ], "name": "J. Gregory Morrisett" } ], "doi": "10.1145/3062341.3062375", "doiUrl": "https://doi.org/10.1145/3062341.3062375", "entities": [ "Algorithm", "Cognitive dimensions of notations", "Compiler", "Executable", "Graphics processing unit", "High- and low-level", "Intermediate representation", "Markov chain", "Markov chain Monte Carlo", "Modeling language", "Monte Carlo", "Monte Carlo method", "Statistical model" ], "id": "33531d2344c7b45316dd84b5b0f84f46c033c849", "inCitations": [ "3d8a1e427d8e86bf687f5ba3c44ede323e9682f5" ], "journalName": "", "journalPages": "111-125", "journalVolume": "", "outCitations": [ "a9d8a6d3f39fe0fb7854d9520700d4af1eeeb64a", "186afe5b50ae139d6da7811ca77dc9620cf67899", "41c910445a5f69966787a63ba1382bf028c202fd", "0c4867f11c9758014d591381d8b397a1d38b04a7", "b0e6bf7a7f508e4e1fcc84a27722f306c9449008", "65cdab424fe449deaab359a1f8e16899ac91dd2a", "7ab07450eac83ff21b8ffc8ddb195a2003825863", "0f890546c00ee8b35c96bc712a2ecfe574af3754", "c3eba5fcba83f9637e83c1ad8be15944f22b15c1", "be3d17df872d41465dabda2fc9a9a61394658a1a", "12bdce48f85da91cd5c8513ce0de1a514e95d981", "4b6363ed24982fbda91c8e92657730db55c198d7", "84de86fa10fe4685a50457df3e02904444f9211b", "7b741ab6f0a59a2eeabaf66291474ff230f2b3fc", "4954fa180728932959997a4768411ff9136aac81", "0b89c0c8096b60d939da90da5fcb989447fdfbd5", "bc4d9febd19e30f376e4d26deeeb75047bde24d4", "2697e6720564614816e54da74d8a1bcd883cc817", "2b0f017e5aa968fd13b4a9ba2c2d37d94be6041e", "1acee01a3d5c33435b109c2d290bc32848080944", "3627d9504f522e7f5d8caeb1c0939c33f5678c7b" ], "paperAbstract": "The problem of probabilistic modeling and inference, at a high-level, can be viewed as constructing a (model, query, inference) tuple, where an inference algorithm implements a query on a model. Notably, the derivation of inference algorithms can be a difficult and error-prone task. Hence, researchers have explored how ideas from probabilistic programming can be applied. In the context of constructing these tuples, probabilistic programming can be seen as taking a language-based approach to probabilistic modeling and inference. For instance, by using (1) appropriate languages for expressing models and queries and (2) devising inference techniques that operate on encodings of models (and queries) as program expressions, the task of inference can be automated. \nIn this paper, we describe a compiler that transforms a probabilistic model written in a restricted modeling language and a query for posterior samples given observed data into a Markov Chain Monte Carlo (MCMC) inference algorithm that implements the query. The compiler uses a sequence of intermediate languages (ILs) that guide it in gradually and successively refining a declarative specification of a probabilistic model and the query into an executable MCMC inference algorithm. The compilation strategy produces composable MCMC algorithms for execution on a CPU or GPU.", "pdfUrls": [ "http://www.cs.cornell.edu/~jgm/papers/pldi17.pdf", "http://doi.acm.org/10.1145/3062341.3062375" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33531d2344c7b45316dd84b5b0f84f46c033c849", "sources": [ "DBLP" ], "title": "Compiling Markov chain Monte Carlo algorithms for probabilistic modeling", "venue": "PLDI", "year": 2017 }, "33595f1ada823f49766b981b546214f60ad14b4f": { "authors": [ { "ids": [ "2814225" ], "name": "Masahiro Nakao" }, { "ids": [ "2575510" ], "name": "Hitoshi Murai" }, { "ids": [ "35486615" ], "name": "Hidetoshi Iwashita" }, { "ids": [ "1814141" ], "name": "Akihiro Tabuchi" }, { "ids": [ "2616076" ], "name": "Taisuke Boku" }, { "ids": [ "1744801" ], "name": "Mitsuhisa Sato" } ], "doi": "10.1109/CLUSTER.2017.58", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.58", "entities": [ "CUDA", "Distributed memory", "Graphics processing unit", "Lattice QCD", "Message Passing Interface", "OpenACC", "Programmer", "Programming complexity", "Quantum mechanics" ], "id": "33595f1ada823f49766b981b546214f60ad14b4f", "inCitations": [ "e9a79a290f0ca04507c1e909dd6ba93746136f8f", "a164d6544b48c8c1dcdd3773064fdcaf828879b3" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "429-438", "journalVolume": "", "outCitations": [ "21be843f22e313bac6d1dc19fc53535f9b413033", "13f3f2b23e733a0297beaf4e70375134c30113b0", "3bda9a412d20fdd74ef96a845adfa800d2aa7d9a", "780af8ae12ebb6be7b737ec17cc8357bb090b84e", "5048b1199db383beda869e742691c22ca15e1d56", "368226068a865b5ebfe1590b47a368558e6af36f", "47808cb20e9cc0d2d873d5b7eed936e2e77fe691", "9e490640f84581bd8a63b785e16ebdd2649a32be", "2f3c8e4bb0a738aecfad51bdff7dc75a4fea28fd", "28e32f4ec3395476a6885149c1d73ff3d0edbb46", "f61f7446ad75776cc7b2f2b1fb8fd184e3c455f5", "49c866296354ab54c42d234645cf0700ff4a7315", "b16d97845cae75606faa59aa69552ec426f01d1e", "38c48d4a31ab050c6e750cdae21e00421172f694", "aca67206447039a63bd3bed50381003cd10ac882" ], "paperAbstract": "Accelerated clusters, which are distributed memory systems equipped with accelerators, have been used in various fields. For accelerated clusters, programmers often implement their applications by a combination of MPI and CUDA (MPI+CUDA). However, the approach faces programming complexity issues. This paper introduces the XcalableACC (XACC) language, which is a hybrid model of XcalableMP (XMP) and OpenACC. While XMP is a directive-based language for distributed memory systems, OpenACC is also a directive-based language for accelerators. XACC enables programmers to develop applications on accelerated clusters with ease. To evaluate XACC performance and productivity levels, we implemented a lattice quantum chromodynamics (Lattice QCD) application using XACC on 64 compute nodes and 256 GPUs and found its performance was almost the same as that of MPI+CUDA. Moreover, we found that XACC requires much less change from the serial Lattice QCD code than MPI+CUDA to implement the parallel Lattice QCD code.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.58" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33595f1ada823f49766b981b546214f60ad14b4f", "sources": [ "DBLP" ], "title": "Implementing Lattice QCD Application with XcalableACC Language on Accelerated Cluster", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "336302a2627cacd723fe7e4052968d2508ee5a90": { "authors": [ { "ids": [ "34710203" ], "name": "Katarzyna Olejnik" }, { "ids": [ "2461431" ], "name": "Italo Dacosta" }, { "ids": [ "32591463" ], "name": "Joana Soares Machado" }, { "ids": [ "1716108" ], "name": "K\u00e9vin Huguenin" }, { "ids": [ "3094988" ], "name": "Mohammad Emtiyaz Khan" }, { "ids": [ "1757221" ], "name": "Jean-Pierre Hubaux" } ], "doi": "10.1109/SP.2017.25", "doiUrl": "https://doi.org/10.1109/SP.2017.25", "entities": [ "Android", "Information privacy", "Machine learning", "Mobile operating system", "Run time (program lifecycle phase)", "Smartphone", "Usability", "iOS" ], "id": "336302a2627cacd723fe7e4052968d2508ee5a90", "inCitations": [ "0339584e6c0b073e2f62383a7a76d448766143f1", "02498b709402f20642bf857466c7f6742435f030", "155b5baa90760949c7e13dfd61492fd647dbf6ca" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "1058-1076", "journalVolume": "", "outCitations": [ "04bd64577c1f66486825c4ef9132cb94bb5334b4", "f467eb5e7f9d49ab318401f961109882c00f2720", "63245612834324e9afcd29c7d945a99b561cea14", "56c2fb2438f32529aec604e6fc3b06a595ddbfcc", "11c55aa088117e6d0e172b60c37eb2a65553bbea", "8ffc32565380d35bcc68f175cb803918b56517a7", "0c4867f11c9758014d591381d8b397a1d38b04a7", "275b8b1273424d8472e255e4cc3749e6d46ad6a9", "1b9fae8255fda28e4adeb96a36f8e907e8aac6f9", "efec2a88f0e74a1668df4b80b571c04af9ebf707", "0415f6b12202799e0a661138ba2d93e0a3c1ecfc", "0150bf8f97a4682c0bc87097a16db03bbebba8fa", "2d474083f3c9e8dab83b9f6d572743dd5cdd52a7", "73071a056403ed5f9fd5b16b9dd70a93e9a4e375", "74b30a08ffd643a47a698ba83498ef297af03d60", "4308f53244bbb6a1e22ba1d39e079e5065a51364", "c088f15e81e686291e534d0179352b4159b9130a", "4a52090065c3da8231400dd2c3916951eb88e5b3", "2edd97abf77fdb475a31d70b08f183e1c4b8dded", "603d1ea62463a665702ad0d3e2dc25a322a26145", "708beb6b5638b4abc57082af2e58161699712323", "0e658618c9dad4d70dd7dcd5c519185ec4f845f5", "2200f562feb2e7500a91c3a9dcc4cf40f05c50de", "0a4f96deacc1991ff6a8e8dac9e43963d0ddb485", "3597a8f0de4819c112fcc69f6a09e144fac82a4d", "14a23ef72ceb106e4c95df6e396c6291c11a87c9", "75d70671eee7ead26c5636fe5d1e00fef5d993b3", "0c0e6a70a41a5574f06b60b5567013571b071233", "7204c8ee25517c5d15e32d7d9242e36002afcb37", "0a5192954c6229694bcfa6963887b60436d394d9", "324fcfd190f3a755ee25955e39be4f94072073cc", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "10482ef05285f2859fd524fba8f14b345eeb1c06", "15cd71ac0333ade954201db6979abb39bde3d181", "12a96f5e1a8c788f8664f45248f1e6a71ccd1214", "67bdf75ece4bc09dd3a8235c189325eacb77a2bf", "35716646b2ce09a1dd0dd584b5adc7242df0beff", "691cdb6e6ea50c8d30a7ada3933f0381ae42ba91", "cd5a26b89f0799db1cbc1dff5607cb6815739fe7", "9e62c595cfd66cf4f97ef7ab7d4505efa0d57560", "65adc8447b4818ce7274bc51bc040d0350e909dd" ], "paperAbstract": "Permission systems are the main defense that mobile platforms, such as Android and iOS, offer to users to protect their private data from prying apps. However, due to the tension between usability and control, such systems have several limitations that often force users to overshare sensitive data. We address some of these limitations with SmarPer, an advanced permission mechanism for Android. To address the rigidity of current permission systems and their poor matching of users' privacy preferences, SmarPer relies on contextual information and machine learning methods to predict permission decisions at runtime. Note that the goal of SmarPer is to mimic the users' decisions, not to make privacy-preserving decisions per se. Using our SmarPer implementation, we collected 8,521 runtime permission decisions from 41 participants in real conditions. With this unique data set, we show that using an efficient Bayesian linear regression model results in a mean correct classification rate of 80% (±3%). This represents a mean relative reduction of approximately 50% in the number of incorrect decisions when compared with a user-defined static permission policy, i.e., the model used in current permission systems. SmarPer also focuses on the suboptimal trade-off between privacy and utility, instead of only "allow" or "deny" type of decisions, SmarPer also offers an "obfuscate" option where users can still obtain utility by revealing partial information to apps. We implemented obfuscation techniques in SmarPer for different data types and evaluated them during our data collection campaign. Our results show that 73% of the participants found obfuscation useful and it accounted for almost a third of the total number of decisions. In short, we are the first to show, using a large dataset of real in situ permission decisions, that it is possible to learn users' unique decision patterns at runtime using contextual information while supporting data obfuscation, this is an important step towards automating the management of permissions in smartphones.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.25", "https://infoscience.epfl.ch/record/226751/files/Olejnik2017SP.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/336302a2627cacd723fe7e4052968d2508ee5a90", "sources": [ "DBLP" ], "title": "SmarPer: Context-Aware and Automatic Runtime-Permissions for Mobile Devices", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "3376caa6140cf46ccb4c22454aa0069d8f05615c": { "authors": [ { "ids": [ "2910553" ], "name": "David Sidler" }, { "ids": [ "1753260" ], "name": "Zsolt Istv\u00e1n" }, { "ids": [ "2094139" ], "name": "Muhsen Owaida" }, { "ids": [ "1687400" ], "name": "Gustavo Alonso" } ], "doi": "10.1145/3035918.3035954", "doiUrl": "https://doi.org/10.1145/3035918.3035954", "entities": [ "Column-oriented DBMS", "Computer data storage", "Database", "Field-programmable gate array", "Hardware acceleration", "Intel QuickPath Interconnect", "MonetDB", "Multi-core processor", "Pattern matching", "Response time (technology)", "SQL", "Throughput", "User-defined function" ], "id": "3376caa6140cf46ccb4c22454aa0069d8f05615c", "inCitations": [ "d251e2b4ddf95d84c2ff3d5eef01311ec1a323c4", "c84206427e3c4978fb8d0e725e8f26e0bfed29dd", "ea5d5ad680f54c81f455a194094cf02c669452f9", "cfd6cfdab32782394652944ca4ccac7ec9f2c2a5", "6f537c85b5160a6375306f6eca1a3e8558e7dbd9" ], "journalName": "", "journalPages": "403-415", "journalVolume": "", "outCitations": [ "82e5e034d02a0d3e1219fe527ab8480401fefd50", "4881b2d926ef5f4757898534c91db884414e6b3c", "f87bee8ca2b7bbd78d6eb985dba3ea15a1d12aae", "9b10f9a29167b3350b01f00db84410f40a066fdd", "5f15172770c0250ae385182f9bb9882c1be44b45", "b6f83d871948e3f4216026b0455ddebfb1cf3b1a", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "2620a9ecf588a4a76d22d5a8dd14657aec97d71b", "704b2e130ad85ee95e136d00d55cce7da883cf57", "5750815fc3230623164fa3cd3a983b6e58bf64f4", "643c43c21c1aab97faa18669dd7ef27bd33e8989", "6a6bc40832da2e03451fe1caa812d10c434f9b1a", "f6c62e96e2cec8ea1f73047d4692aafd73dd9dc5" ], "paperAbstract": "Taking advantage of recently released hybrid multicore architectures, such as the Intel's Xeon+FPGA machine, where the FPGA has coherent access to the main memory through the QPI bus, we explore the benefits of specializing operators to hardware. We focus on two commonly used SQL operators for strings: LIKE, and REGEXP_LIKE, and provide a novel and efficient implementation of these operators in reconfigurable hardware. We integrate the hardware accelerator into MonetDB, a main-memory column store, and demonstrate a significant improvement in response time and throughput. Our Hardware User Defined Function (HUDF) can speed up complex pattern matching by an order of magnitude in comparison to the database running on a 10-core CPU. The insights gained from integrating hardware based string operators into MonetDB should also be useful for future designs combining hardware specialization and databases.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035954", "http://www.davidsidler.com/files/sigmod17-patternmatching.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3376caa6140cf46ccb4c22454aa0069d8f05615c", "sources": [ "DBLP" ], "title": "Accelerating Pattern Matching Queries in Hybrid CPU-FPGA Architectures", "venue": "SIGMOD Conference", "year": 2017 }, "3386403da8d9dc141f11e92854f3d830e1c8f401": { "authors": [ { "ids": [ "7146460" ], "name": "Hao He" }, { "ids": [ "33797843" ], "name": "Jiang Hu" }, { "ids": [ "2045541" ], "name": "Dilma Da Silva" } ], "doi": "10.1109/IPDPS.2017.27", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.27", "entities": [ "Amazon Web Services", "Apache Hadoop", "Complex system", "Data center", "Fairness measure", "Interaction", "Job scheduler", "Jumpstart Our Business Startups Act", "Linear temporal logic", "MapReduce", "Multitenancy", "Power management", "Reinforcement learning", "Scheduling (computing)", "Temporal logic" ], "id": "3386403da8d9dc141f11e92854f3d830e1c8f401", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "133-142", "journalVolume": "", "outCitations": [ "a8b5bb125c1f3eb3ccf5ad860abacbd8b042e5b6", "117fc19e17a12c49eebcfce9e1b9870fd04bd230", "573f79a888993a3cd32d3380ba2cfe8668539332", "f1269591359fddc20f95da10c7bd4c054080b447", "5b03317403f4cd2526b5e8c1df74d5d0adf96641", "3e257f01e3ee71545d824a1615c35659525b856a", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "980773ca869fc17562e4fbcf4202a8f21893b114", "1b2a950e79eec4bd731b0b5f76ac0946481e328a", "6c429bd0b68e685af16f98866d05bb6c561289de", "aeda1dca3f62ea3e16b17a846f32e9f5e98fe6ce", "c3c262b8e56536d14826926b69af59eaefc29bc2", "071f0054c73024be125f4c9daaea516b3b6ea4cf", "5c5f8717f31b7e4334b450df15442223f988ff41", "5b9631561a89a3e071d8ec386a616a120220bfd9", "f4bd6691f59eb95c58b6d104d5122158ab7ddaa4", "2988e34168fa91398fa397baf823af2063893e9c", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "03564af5d388c08123f527cea3c1252c20093c05", "6707147b1a2f4c037fc1b55f509a0ba7c6701e56", "1a617b1c29596e3d09b8bf378ae20568932834cb", "c39e98c6ef583a5555ce2f5823e242afb64a5432", "8c05becd4d5e39d3d8c3b1fd36b643068bc4254b", "8d26f3a3726f6dd2741bf12fcd3bc1abdeab482a", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae" ], "paperAbstract": "Resource management of modern datacenters needs to consider multiple competing objectives that involve complex system interactions. In this work, Linear Temporal Logic (LTL) is adopted in describing such interactions by leveraging its ability to express complex properties. Further, LTL-based constraints are integrated with reinforcement learning according the recent progress on control synthesis theory. The LTL-constrained reinforcement learning facilitates desired balance among the competing objectives in managing resources for datacenters. The effectiveness of this new approach is demonstrated by two scenarios. In datacenter power management, the LTL-constrained manager reaches the best balance among power, performance and battery stress compared to the previous work and other alternative approaches. In multitenant job scheduling, 200 MapReduce jobs are emulated on the Amazon AWS cloud. The LTL-constrained scheduler achieves the best balance between system performance and fairness compared to several other methods including three Hadoop schedulers.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.27" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3386403da8d9dc141f11e92854f3d830e1c8f401", "sources": [ "DBLP" ], "title": "Enhancing Datacenter Resource Management through Temporal Logic Constraints", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "338d4815de02be38990db8cff9f96ef8e6959c80": { "authors": [ { "ids": [ "1736343" ], "name": "Karthikeyan Bhargavan" }, { "ids": [ "14394114" ], "name": "Bruno Blanchet" }, { "ids": [ "2403131" ], "name": "Nadim Kobeissi" } ], "doi": "10.1109/SP.2017.26", "doiUrl": "https://doi.org/10.1109/SP.2017.26", "entities": [ "Computational hardness assumption", "Computational model", "CryptoVerif", "Cryptographic primitive", "Cryptography", "HTTPS", "Interoperability", "JavaScript", "Logjam (computer security)", "ProVerif", "Reference implementation", "Shadow Copy", "Transport Layer Security", "Triple DES", "Type system" ], "id": "338d4815de02be38990db8cff9f96ef8e6959c80", "inCitations": [ "594dc2ad971479e1abe78ddb6f5818650810c127", "416b5e19e465911f7fe008cb2d93fd16095e9138", "0b465eb882ea52ad9f592188d2d3f8a313745f47", "c5c6e0bc606ad920418e04405ac43666e7d42903", "625c072ff155ab1517842df66bf217ff3fb3bfaf", "393f25d2e4fd5db52a338dc6783ccc6e90f4ff46", "1d99ce3375cc1c65c07d9fb358e1a101f1d2590a", "23685d74f1f41e94c8dbf4ae197181cd776a2fec" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "483-502", "journalVolume": "", "outCitations": [ "8bc6ce36585a3432682e1f3e08f166a6526145a3", "1d823908b913bc53fd5a6c7bc91939f3e2e87e21", "23eb53170c6de9ff5024db120eda200816fa803f", "909cfc05ca554c344279e5136549c9b2dcd2c7a2", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "d8954dea6c3684a28f4ac2b60648df8ef245c26a", "02b97d9a3fda357165257aa8bd3031743a5540d2", "04fec8e39d83b4c5cc4fcfeeac5847ecf0134263", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "6b22ed103bc3d86004599992380b8b3104c75df5", "2a61439362e5f1f16d9759a44fbddfbf16d69a37", "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "d2712ce067a604c61a28778babebeced19b6bf8e", "0f17ca31699de87aaa09dd31205b146bc472c861", "22144483d329aaaf82bc4380c8317aa3ac84234e", "512e08451eb0d805c77b86e5821560f3b7dec565", "2638a939cd8f4bbdd927dbe8a277569c0d202e93", "0037875e7321eb65867ff47b0e22a080b84502da", "2b6ce083906634e3c3b084e4c9139fb58f082df6", "31e4845a40cfa6a953aef78387b34ea3284cdff9", "ac2a9d093fef9b31d50222b737cc3aa686a0888d", "2aa0e44b8529de8ee75138eade8aba0bfb9f008f", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "2637b8eaff1b2f6dadbbe8ac4157085ebfa345b8", "17e8416d8a71275f05ace6c74d67dfe05db64efc", "2977e30243c4a93462cdb466d97abff4bcd638d2", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "972f42265fd17d8b406df557d6c682ebc5688ec8", "1a79a3efec3d4a177bae7326dd75e33cf362120d", "582302da008255ff515f05c3242f750878725745", "59261f7ccf03b580ed39f96b8928bc965c24d520", "0fcd80f1e27afd1b06306c512e29900bfe3a725b", "f4a463f37f29f205451652a842ff75c718040279", "d4a8fccaffc440f52172fc3e3cacf048b72e244c", "1c3c8b62cd29a33c396d0422fb8331616c87d5c2", "517d4d45013e3b040cec89ba1cffbd4a7eb0122d", "082d2b922818331e2994aeebaaccb776cfa09145", "08756f4d4e69d78ab3bd1276d95965f70dbb7a8e", "7a7a794baaa5567a04e15f760da3d2d3b2b25ef8", "e6d0ac36f37643ab15875c3a5a830e9e51dbf08d", "02adbca269b534eed78dfdb8e52b45b86894a406", "1e7768b135545d473bf4a857f2bbb374ae960dc5", "40860f2db7516f09836ef5bbd65288a4e0957af7", "6d0b3dabf73e0b3fc7cf1c6486a9cd47855a74af", "b6526c52e75d664442b40d11556a37d1e4288bee", "35d603028fe164b46e71898d6780811a84d18c1e", "4a21248e3575dcebe66d1de6f1fe9f008d54e02f", "738a58eff2dbc9bc76742289fbf9dc9cae3a1b1f", "6c484d52c1c58fb7c1d6dca551db2e1291072360", "43c046c3f3b78bec2b528d45b3ded4bb0046d426", "369a232610b53bdda600c35bf8ae6d1e9195c7e5", "93e1542b3e051b46b7b96eec94cc306468b9f745", "2d8a132fd622b6b8e46507911f7ab24cbd37e667", "eb7b3e957346cbfab43742796dfb7d7b9b00c0d0", "11b9a10f849c1c7ab598ad1180fb3ad5ba1a7b0a", "77d3a887978603663940cc00c8ada6784641e674", "6903dafec75cec816e38248bf55397fb8f8ad13c", "1ddb640116b1dffba839c4f868176a69de768187", "9017963b1a610d9516d7a3fc74f915d44e3f167d", "94206be34b27903edd8e6c35efde0820750c525c", "04bb092c83242cb708d2653bd537c99643e8386d", "9f8d95c5dbca0712b11b9111316bffdd38c053d2", "4121c4f761ad3b1779506f5a725ed44b5c308461", "0e982be63c47a340bf3749401160ea29b9f8d10f", "81b7f4c7c782a63f2cf6771d096ea9177f7ca4f7" ], "paperAbstract": "TLS 1.3 is the next version of the Transport Layer Security (TLS) protocol. Its clean-slate design is a reaction both to the increasing demand for low-latency HTTPS connections and to a series of recent high-profile attacks on TLS. The hope is that a fresh protocol with modern cryptography will prevent legacy problems, the danger is that it will expose new kinds of attacks, or reintroduce old flaws that were fixed in previous versions of TLS. After 18 drafts, the protocol is nearing completion, and the working group has appealed to researchers to analyze the protocol before publication. This paper responds by presenting a comprehensive analysis of the TLS 1.3 Draft-18 protocol. We seek to answer three questions that have not been fully addressed in previous work on TLS 1.3: (1) Does TLS 1.3 prevent well-known attacks on TLS 1.2, such as Logjam or the Triple Handshake, even if it is run in parallel with TLS 1.2? (2) Can we mechanically verify the computational security of TLS 1.3 under standard (strong) assumptions on its cryptographic primitives? (3) How can we extend the guarantees of the TLS 1.3 protocol to the details of its implementations?To answer these questions, we propose a methodology for developing verified symbolic and computational models of TLS 1.3 hand-in-hand with a high-assurance reference implementation of the protocol. We present symbolic ProVerif models for various intermediate versions of TLS 1.3 and evaluate them against a rich class of attacks to reconstruct both known and previously unpublished vulnerabilities that influenced the current design of the protocol. We present a computational CryptoVerif model for TLS 1.3 Draft-18 and prove its security. We present RefTLS, an interoperable implementation of TLS 1.0-1.3 and automatically analyze its protocol core by extracting a ProVerif model from its typed JavaScript code.", "pdfUrls": [ "http://prosecco.gforge.inria.fr/personal/bblanche/publications/BhargavanBlanchetKobeissiSP2017.pdf", "https://suri.epfl.ch/talks/tls-blanchet.pdf", "https://doi.org/10.1109/SP.2017.26", "https://www.ieee-security.org/TC/SP2017/papers/234.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/338d4815de02be38990db8cff9f96ef8e6959c80", "sources": [ "DBLP" ], "title": "Verified Models and Reference Implementations for the TLS 1.3 Standard Candidate", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "33e28ab30ce23a4abeedeae3f4213fcba80d1947": { "authors": [ { "ids": [ "2312211" ], "name": "Simon Kassing" }, { "ids": [ "3338812" ], "name": "Asaf Valadarsky" }, { "ids": [ "2645944" ], "name": "Gal Shahaf" }, { "ids": [ "1718880" ], "name": "Michael Schapira" }, { "ids": [ "34891793" ], "name": "Ankit Singla" } ], "doi": "10.1145/3098822.3098836", "doiUrl": "https://doi.org/10.1145/3098822.3098836", "entities": [ "Data center", "Network topology", "Online optimization", "Program optimization", "Routing" ], "id": "33e28ab30ce23a4abeedeae3f4213fcba80d1947", "inCitations": [], "journalName": "", "journalPages": "281-294", "journalVolume": "", "outCitations": [ "c678e962b158153924bbb24c4900b84375be7e57", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "09adae55a947e420e2d73de8d4e3f5a1cf4e483f", "025652412d507a8cf98ecacd8a44d32ce28995e1", "0c44588bdcbb82c7183958abab5ebc89c0e650f4", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "663e064469ad91e6bda345d216504b4c868f537b", "c23aaab5d4a9fb966703211356e8d19e9a63ad22", "aa6a64afc25f48ad44e510d0055405836c8cc325", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "5f8991828def57d2f0cda942566afff56740d150", "a05548af9f54a7cd57a5c3f2d51b9e76f559f04a", "534ee575a6b0c37e03d1dddb92493b57e9271298", "1090436f31f8d225631afb8b0d847bd49322fe07", "116223b9d3fccaabedf2350f557ab7c651f2fe42", "6cef37401134e650bcf60748c2a8ead42af56b66", "201c0366e232ed7073ccd80a6ed91c65d9cee952", "058f6752d85a517aae298586fdf117acdd7560ea", "7b5144c88098a183eb2f8395276b0be6196a442b", "18326cb5a02a4eafdc908cfce62ddeb93c244fd1", "5c5d03e884d4f0094b217c62267466fa11432c8e", "21c039e563ec0ca023a5b9c729e92a2fd611946a", "6069f23cf3e413a6ce60bec60acb60952d88cc95", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "3365897178130e985acf671d6564f5dd6fa0ea1c", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "08d410ea6f0c3934324467d809e2ea6ffc8a9a73", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "065c8bfcb45e8c342d26aa1855cf292f9a5cbeff", "177dee5388017f7119100bb283f946ad92722a6b", "288763b8420ef17baf2f0214cf283433fcb4a447", "3b988049dd8f62f772281e90196bbd793700c86b", "420f8d26d069ac34750a7240850cc6192ae1be08", "606b8763a3ed206716bf61a5b00afd048907f6e3" ], "paperAbstract": "Recent studies have observed that large data center networks often have a few hotspots while most of the network is underutilized. Consequently, numerous data center network designs have explored the approach of identifying these communication hotspots in real-time and eliminating them by leveraging flexible optical or wireless connections to dynamically alter the network topology. These proposals are based on the premise that statically wired network topologies, which lack the opportunity for such online optimization, are fundamentally inefficient, and must be built at uniform full capacity to handle unpredictably skewed traffic.\n We show this assumption to be false. Our results establish that state-of-the-art static networks can also achieve the performance benefits claimed by dynamic, reconfigurable designs of the same cost: for the skewed traffic workloads used to make the case for dynamic networks, the evaluated static networks can achieve performance matching full-bandwidth fat-trees at two-thirds of the cost. Surprisingly, this can be accomplished even without relying on any form of online optimization, including the optimization of routing configuration in response to the traffic demands.\n Our results substantially lower the barriers for improving upon today's data centers by showing that a static, cabling-friendly topology built using commodity equipment yields superior performance when combined with well-understood routing methods.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-7-2-beyond-fat-tree.pdf", "https://people.inf.ethz.ch/asingla/papers/sigcomm17.pdf", "http://doi.acm.org/10.1145/3098822.3098836" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33e28ab30ce23a4abeedeae3f4213fcba80d1947", "sources": [ "DBLP" ], "title": "Beyond fat-trees without antennae, mirrors, and disco-balls", "venue": "SIGCOMM", "year": 2017 }, "33e5f1f64122e5d9e4f0e40e2632cfaa143378c3": { "authors": [ { "ids": [ "35002833" ], "name": "Van Chan Ngo" }, { "ids": [ "19226770" ], "name": "Mario Dehesa-Azuara" }, { "ids": [ "2623167" ], "name": "Matt Fredrikson" }, { "ids": [ "40473030" ], "name": "Jan Hoffmann" } ], "doi": "10.1109/SP.2017.53", "doiUrl": "https://doi.org/10.1109/SP.2017.53", "entities": [ "Algorithm", "Amortized analysis", "Broadcast automation", "Confidentiality", "Database", "Encryption", "Enterprise resource planning", "Information flow (information theory)", "Non-interference (security)", "Personally identifiable information", "Potential method", "Precision and recall", "Programmer", "Provable prime", "Side-channel attack", "Type inference", "Type system", "Verification and validation" ], "id": "33e5f1f64122e5d9e4f0e40e2632cfaa143378c3", "inCitations": [ "45bbacc22f5913c488042b750e56fcaa2a7e31cf", "05cc2379fe53ca68b8e04e1aba473f008dc47b87", "162e35a780c1fb8591a6bb80d13dfae6e829bd4f", "2d92f591b555e101ce083a73a6cbbfe68e3016d7", "295f26fff0468358b4781646b0da2a3c3466d8ca", "2effb0ea8b7e1b79955f8ffba2e237c070ab4fd4", "17d4908d2331f9b6eb50d73cd30b678074e63a09", "e94c2b2a52f635f728f7df4a0285aafe808dd33f", "81d80c7031426343519b3912956837a02262f21c" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "710-728", "journalVolume": "", "outCitations": [ "a7f3a72a82a4f7864e1a8e6c1b0183d3b3249f20", "12fbc18477afc5dec741a7ad630feaf8eda777f4", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "2c6533d714d8dc4d3f7faf418db93c38df642fea", "0e639ae7d0caae09489f7fbfb6f4739d96f626e8", "20f3fcd714230fbcb88661ba0f623d9e6217a717", "13ba12ca10806c709e4365b849312d4d35ebe8af", "2342738aab04922f8e5128a4ea0b3e4d387d22e4", "2e5122e23ee5f354cea7189fd2900c1aaf290fbb", "15cd2cc6657ecf305a94528819d1326ac313bb61", "55bcdc4cb642538edfc654fa5792ce7cc9abbc4f", "897c06f5a3219bfad70f1d1bb4dcaab39d492d1f", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "5503686edf7ab29785c51a7c4b10e9dbbf80c140", "dcca6b40661c280294b526756b9aa67857c1eee2", "be7536d9baaef7ccdbff845f8e98c136b4c80bb3", "c8f6a8f081f49325eb97600eca05620887092d2c", "53989b355dd5ff48a157535141546616ad4b0518", "0156eeb1cef24931454fa901b0f0e9e0b99489dd", "0547bd2bd244a16437e73080cce81a62ac30142b", "2b6df21137f30d25494bb58521a6062f93e915f8", "0025870ef15a8f2858ff4186329d4bde316e9e01", "70f340e80468832b7a293da8a4f1d08ed2786448", "451ce08a5335b00cda49877ba1335e95a91c5af7", "06f16d9430d5f6213cf5399b167a3d989c3ff798", "b872abe565d270c4822430d394c4f3e983fdec66", "12cb1b261106fe238505c0772e8826a294fa3546", "d6becde92d4f27a3256e4e852c35c95791525c43", "75068766c0a09523504d14be8aac8a029ad097ef", "076a6e4338424c8df96ee1937eb0426eeaba6885", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "4b8f1518b21b73d30cedf31560a83a8322f8147d", "36eac00175dcea2b33cf998e8a2b5bca2b567ba0", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "1c82c9e0e480f1aef3427c74450c436cbb234a72", "2200640161a8fe6ce3a03c7bad586e890f10679f", "5bdfefde75957f416abfa6f38feb352cfae6170b", "2a4c3590a661af91fd9262dbc7f49c04b5fb9641", "08f3d15318d657323136cb185805876f608325b5", "352e74019d86163d73618f03429ae452ab429629", "049452db9e14098cf5eaa19756cbe2dfdd2d303a", "182a81eaf31b1a76be592c0890182cacd4199be0", "5ebd59de39d5e79328d84903f47be4c2f5efccb9", "327c5c7540a17718e77bc7bd8be3db12f684f7f2", "0c0b7a24e5e0cf93a966708f26d31f0a74aabeba", "6421c677dfc71fe0fada74b4adae27417cd50d00", "226242629f3d21b9e86afe76b1849048148351de", "43de5136309e262007d3f14893959af69749caf8", "04402122e2fb065ed1280000981f7626496f0afb", "615168555150d80752a1c195229642acbe6fb3d9", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "32e16ae384e03d76b74be2e04fcf5ac5007fc155", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "39d85ddbf6c9aad76689fd02306dcc7583f5b094", "87a18187e2de928ebb04865390f95ce1852e0eb2", "3671af9d7655977e573bd123f93470f978ea7a62", "4d67f221c595dbfc448e49b1b6e6bf9bfed40f7b", "eabb4b2a55ee1b56331804306d8dac77d18599a0", "4d624b942a58818f8d425460638cb4b65ed84e1c", "8c3d2f83506b2703a11ea034ba2fab175246e2ec", "675724a2a93195682ecd2c9da7d71702e0da3ec3", "3d8775945f7c62b2bca55b7097fde9427b0363bd", "a3dea181e3f3e513fdbdce6252bfcb9bc1403620", "8414a94327b0aacd6a16d74ae2a3bfc5bed1d22a", "4151aabd0b9dc2ff612d90cf10f0b19057901628", "49cdac2e08cbe9f5bbf407f51c047b6583b90c17", "7cf0f7844fa2d7dbb87ba9eb626574d09ad3049d", "525fb146f7f73ba425bb7e806d1f9f9e9b9ad2e4", "0858ff5f1871dd8f5c17f74f22d7a351e1f3a5b4", "11e8ad8d5302e8149cc787de5778b52e7e976ca8", "3ccced52d24cad8b3d9b4f69cd2e3d4872e447d9", "a4d1265e3b7473e73ab168b8fa06d185733f853c", "ffbfd62de840f6ccf3dc4fbb920934d908d554bc", "2ae20abb4b3a02814d3a5f1c06474e8429ee51a8", "5f4268edf3d28aacfe928af6719cdd3082207a5e", "1b31c65d8b5023dabcdd18fd57241488834c7206", "a638ddb00cdb5d2fcc9616c7e254eaaa790d48ef", "a718b85520bea702533ca9a5954c33576fd162b0", "1f58abb9a06ecc182f4cd923685a6095cb0be49e", "240f3f64d918fe6cc0f17d8c419a90a7bb664a3f", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "490b02129f552dd694ccb7a7c12cb2e6dd211f51", "8925a2067468c7ae6b2b3556487a362725cb55be", "01d3d3e934e7695bda093ff7c264cd2e4c5f4527", "556b89320e2fba041bb9325f5ca6c6d4d85f787e", "046af23d437d93cc7d03b0df2b773d6fb6f046c3", "7047874fc68b902e30fd1a38c19b99f7e5824bb8" ], "paperAbstract": "Side channel attacks have been used to extract critical data such as encryption keys and confidential user data in a variety of adversarial settings. In practice, this threat is addressed by adhering to a constant-time programming discipline, which imposes strict constraints on the way in which programs are written. This introduces an additional hurdle for programmers faced with the already difficult task of writing secure code, highlighting the need for solutions that give the same source-level guarantees while supporting more natural programming models. We propose a novel type system for verifying that programs correctly implement constant-resource behavior. Our type system extends recent work on automatic amortized resource analysis (AARA), a set of techniques that automatically derive provable upper bounds on the resource consumption of programs. We devise new techniques that build on the potential method to achieve compositionality, precision, and automation. A strict global requirement that a program always maintains constant resource usage is too restrictive for most practical applications. It is sufficient to require that the program's resource behavior remain constant with respect to an attacker who is only allowed to observe part of the program's state and behavior. To account for this, our type system incorporates information flow tracking into its resource analysis. This allows our system to certify programs that need to violate the constant-time requirement in certain cases, as long as doing so does not leak confidential information to attackers. We formalize this guarantee by defining a new notion of resource-aware noninterference, and prove that our system enforces it. Finally, we show how our type inference algorithm can be used to synthesize a constant-time implementation from one that cannot be verified as secure, effectively repairing insecure programs automatically. We also show how a second novel AARA system that computes lower bounds on resource usage can be used to derive quantitative bounds on the amount of information that a program leaks through its resource use. We implemented each of these systems in Resource Aware ML, and show that it can be applied to verify constant-time behavior in a number of applicationsincluding encryption and decryption routines, database queries, and other resource-aware functionality.", "pdfUrls": [ "https://arxiv.org/pdf/1801.01896v1.pdf", "https://doi.org/10.1109/SP.2017.53", "http://arxiv.org/abs/1801.01896", "http://www.cs.cmu.edu/~mfredrik/papers/Ngo17Oakland.pdf", "http://www.cs.cmu.edu/~janh/papers/NgoDFH2016.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33e5f1f64122e5d9e4f0e40e2632cfaa143378c3", "sources": [ "DBLP" ], "title": "Verifying and Synthesizing Constant-Resource Implementations with Types", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "33f95f238e12e1790ad880ec40cf6c63ea4a70dc": { "authors": [ { "ids": [ "40340635" ], "name": "Jialin Li" }, { "ids": [ "2044926" ], "name": "Ellis Michael" }, { "ids": [ "2883709" ], "name": "Dan R. K. Ports" } ], "doi": "10.1145/3132747.3132751", "doiUrl": "https://doi.org/10.1145/3132747.3132751", "entities": [ "Atomicity (database systems)", "Computer data storage", "Concurrency (computer science)", "Concurrency control", "Consistency (database systems)", "Data center", "Distributed transaction", "Fault tolerance", "Mercenary III: The Dion Crisis", "Replication (computing)", "Scalability", "Strong consistency", "Throughput" ], "id": "33f95f238e12e1790ad880ec40cf6c63ea4a70dc", "inCitations": [ "a6472fe7fbc978de8597c2f783891aa1eb1f87a5" ], "journalName": "", "journalPages": "104-120", "journalVolume": "", "outCitations": [ "d4cd2fe3510c438beeaddd6df22f5cc8b0ff2b92", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "2ad184901a2f3551df5d0406f244ae655ac8c4d2", "0599ba259341963bf8abf2818c874713e570a039", "1e32492f456bcf58d07b1658825733dbfb9d816c", "24edd449ea31b850bc0d3bb3ffc1b8b6eb66e13a", "152f85e4bd2853a458d1350bb64d4a6adca24832", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "1ae507f38fbe2301f4f7fbcd64e2f49afe00a59c", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "57efc2b9ba2a725af1d66cc43c472d0314190051", "1220e4a011c46804d4369b5580dc7fb6e387af54", "02b1103e592fa6bf0499e27f1519692441fad557", "8c8b44029fbdac1572ae47b8eaab3929c9987098", "9aa0d7253574e50fe3a190ccd924433f048997dd", "2077579d62fc090d4ddf45f107ffae0468936165", "3bfa50099269ef3ce832bc7f3710ba6484165092", "3f18cada83f9ac74ebbeeb62b5c4616975ae5848", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "517e239f97f50079bc557cccf1a6b56aa5736d30", "18a5f443299784479e78d9e77f175af57cb2fa2b", "0bfe52c4e90fcb4f65d77cec9fc0e6840fa652df", "091a8b2a10483b9899c667862dcfa92fc130bb74", "3bc9d643f741343c6eef24363a20e2b483439149", "4827cc74dba0c39172554cf0116eb111797f0d1b", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "50415403b35868328a07e8a7a537c96eb19baa36", "068e59b88a1230d709d99c83a45d3a5b91260810", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "578c2b170aaab7ea88ea0cca472f123287c5b6b5", "cc977141ecda4914987bb3d91b3d6aee603bef46", "8fb49c4aaea8621e9ec2b54a087d671e4404bab0", "c5ebb02959ab18bd590be4ece10beace708cc3af", "2a3a426ebd38277390d26082405f47a95fb05385", "412a9e54bbb31e12d008a9579994e009c5b40b46", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "062c47d2a3afa47f42c6d97d72990b53a48ee9c6", "136eefe33796c388a15d25ca03cb8d5077d14f37", "362855ec18b3febbbb668a85221d59ff094ec1b2", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "9748241beb02ef1e2d0e6dc877c04b354033a838", "0eed0d2db2198a6c7856152f509510b5d058efad", "83684cc2fddbe64f8902d1ee5d5112bf95eaeffe", "308d1c063329de15559c133f95957b51dba8aae2", "6816c447cc4d3d945e0452564ff5d3220e1fdcab", "00ac447d02035c26c7e2852c2457fe812e89038f", "34dff37950ff12788eccac14bc9be9500021a093", "624cb175af600b7749bce00c0932e2a10f72e564", "3d3abf7b60d6e762d635c3b997d48ddb1bc76eb6", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "42142c121b2dbe48d55e81c2ce198a5639645030", "083e8efe5b4a1eb2f9954575c4f44c13a8510c5d", "01d8f75b6382c7534a67637249122de28a780ce9", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "066dda2494a69c0cd50e6a2b758cfd45facad84f", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "05885dbd3ccbbe744a2ee1c39126bd263140e741" ], "paperAbstract": "Distributed storage systems aim to provide strong consistency and isolation guarantees on an architecture that is partitioned across multiple shards for scalability and replicated for fault tolerance. Traditionally, achieving all of these goals has required an expensive combination of atomic commitment and replication protocols -- introducing extensive coordination overhead. Our system, Eris, takes a different approach. It moves a core piece of concurrency control functionality, which we term multi-sequencing, into the datacenter network itself. This network primitive takes on the responsibility for consistently ordering transactions, and a new lightweight transaction protocol ensures atomicity.\n The end result is that Eris avoids both replication and transaction coordination overhead: we show that it can process a large class of distributed transactions in a single round-trip from the client to the storage system without any explicit coordination between shards or replicas in the normal case. It provides atomicity, consistency, and fault tolerance with less than 10% overhead -- achieving throughput 3.6-35x higher and latency 72-80% lower than a conventional design on standard benchmarks.", "pdfUrls": [ "https://homes.cs.washington.edu/~lijl/papers/eris-sosp17.pdf", "https://syslab.cs.washington.edu/papers/eris-tr17.pdf", "https://syslab.cs.washington.edu/papers/eris-sosp17.pdf", "http://doi.acm.org/10.1145/3132747.3132751" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/33f95f238e12e1790ad880ec40cf6c63ea4a70dc", "sources": [ "DBLP" ], "title": "Eris: Coordination-Free Consistent Transactions Using In-Network Concurrency Control", "venue": "SOSP", "year": 2017 }, "3416a5227b1e1956acfacba812081c2686367471": { "authors": [ { "ids": [ "40409510" ], "name": "Jeremy Andrus" }, { "ids": [ "2715408" ], "name": "Naser AlDuaij" }, { "ids": [ "1700208" ], "name": "Jason Nieh" } ], "doi": "10.1145/3135974.3135981", "doiUrl": "https://doi.org/10.1145/3135974.3135981", "entities": [ "Android", "Application programming interface", "Dynamic linker", "Graphics processing unit", "Pervasive informatics", "Safari (web browser)", "Smartphone", "Tablet computer", "Thread (computing)", "Web content", "WebKit", "World-system", "iOS" ], "id": "3416a5227b1e1956acfacba812081c2686367471", "inCitations": [], "journalName": "", "journalPages": "55-67", "journalVolume": "", "outCitations": [ "27b14797071561b43e68a5695a9dc9710e5c36ec", "5368c3ef82b0b982aec225acb066f0974e89a84e", "2960c89331eb7afa86584792e2e11dbf6a125820", "28d0b30592d994a7a736f81cfb7f7237e4f364f8", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "39369c4a4601aeed8ad6e22215983d81d800df52", "5e7567dc5c9922527e7ce5e4fd62981488a09829", "6d12aea56165acf3715e2c82b5f560e48359366d", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "ae2e07f55c6deda39a5b42f3e0d9a6f817623d96", "4e27f44ade4545931a99eee2dc8011b44f5db4b6", "335504b014e48069c6dcf227645ae61830d6cf27", "423455ad8afb9b2534c0954a5e61c95bea611801", "08832863bc3f041222f381c8ae143f8a66449059", "9156a8a04250a6eab18f1bd63c30a7227fa2051d" ], "paperAbstract": "Mobile apps make extensive use of GPUs on smartphones and tablets to access Web content. To support pervasive Web content, we introduce three key OS techniques for binary graphics compatibility necessary to build a real-world system to run iOS and Android apps together on the same smartphone or tablet. First diplomat usage patterns manage resources to bridge proprietary iOS and Android graphics implementations. Second, thread impersonation allows a single thread-specific context to be shared amongst multiple threads using multiple iOS and Android personas. Third, dynamic library replication allows multiple, independent instances of the same library to be loaded in a single process to support iOS apps on Android while using multiple graphics API versions at the same time. We use these techniques to build a system prototype, and demonstrate that it runs widely-used iOS apps, including apps such as Safari that use the popular GPU-accelerated WebKit framework, using a Google Nexus tablet running Android.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135981" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3416a5227b1e1956acfacba812081c2686367471", "sources": [ "DBLP" ], "title": "Binary compatible graphics support in Android for running iOS apps", "venue": "Middleware", "year": 2017 }, "34285a71b3c468a3a6b26793c8921fcd4dab8b14": { "authors": [ { "ids": [ "1961319" ], "name": "Nethanel Gelernter" }, { "ids": [ "14269945" ], "name": "Senia Kalma" }, { "ids": [ "19211525" ], "name": "Bar Magnezi" }, { "ids": [ "19248748" ], "name": "Hen Porcilan" } ], "doi": "10.1109/SP.2017.9", "doiUrl": "https://doi.org/10.1109/SP.2017.9", "entities": [ "3-D Secure", "Denial-of-service attack", "Experiment", "Man-in-the-middle attack", "Mobile app", "Mobile phone", "Password", "Recommender system", "Self-service password reset", "User (computing)" ], "id": "34285a71b3c468a3a6b26793c8921fcd4dab8b14", "inCitations": [ "0e8642d9c621c23493447d22693e8dac232d4109", "a91d17a86bd28cfbb6b2c4eb7396c5ddb9345deb" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "251-267", "journalVolume": "", "outCitations": [ "685ffd80cf779233d1823045274ac90ba010468f", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "bc2e813256b9101d8d98f6a165d45ac0c4f4821c", "6d852fbe810b2b6851e66a2e780314d72097ed6b", "e8b16e99dd0b01bd897e11d58ecf4f8085755335", "1e15d382cffc80bfcebfad1cf935dd03c9705189", "2c61a7a2ec8ac2178812fab42a222f35918f47ce", "4fcb4f03afc8f7d780929afbf9584bb7e9ced6f3", "0ce331085b6b7b95a6e9b5c8fa7dea564d2161fe", "aacfb2cfe450ecceb9d607f982dcd7eb45761748", "011327a1f7fd17f0dd73125f9cfb2c4515f379f0", "26aad3afefe147c2b30ba410b09de47da938f512", "4516bee86ae04321d476dceba43d8d2decc5aee1", "3c1e2f4ad11d2cc6ca140d0bd7ad82ad13a52057", "327cc58e608c01855b0a72f6162b98639d3c4a63", "616f66cb4e58beafc87c5f98752aac4bf5d38d10", "5f1427809520ff92e970f71742edf34c76c268d7", "bf61d7f044c08f43367b0eb22cd198b95773e985", "204a47b89d24d7fcac80a30f3deaf5a5f2bcdb25", "12d6cf6346f6d693b6dc3b88d176a8a7b192355c", "1a88eff832e36c0e064ae7bb756663081d71f520", "114580bca9932bfc4e0018886646751adfac724f", "492184989035226d1207801e3e5b9dff8b5f6427", "05dc50efa361dabeaa6d735a0701fc5706ebb6cb", "26a3c709974cd405b5bd64b733a8a62efe708a28", "3946fb0e7c7fb3af40b0f0fdb4e0007a7dac975d", "362f40b7121ec60791546577c796ac9ec4433c21", "13792cfb72b1400b73d5dac005e2d771f44508e8", "82f913e730d47ced52233fd056dc8e7269f0e9bf", "3272595fc86f13c7cce0547f2b464c2befe5a69f", "c950df133c430d0091e6d231c04c5fdf5030ddca", "6d487827e936ce1634765d904abf2454c83b2f33", "3188dc28042effbd519005ec18c07e7afa51c975", "a95abe7eeafa4c2efca85d090b5ed689b8092f9e", "956cbbf33ba15071efb11a54005f252d442700d2", "7855e607955f15f9d818235bc9783dad5f5bee76" ], "paperAbstract": "We present the password reset MitM (PRMitM) attack and show how it can be used to take over user accounts. The PRMitM attack exploits the similarity of the registration and password reset processes to launch a man in the middle (MitM) attack at the application level. The attacker initiates a password reset process with a website and forwards every challenge to the victim who either wishes to register in the attacking site or to access a particular resource on it. The attack has several variants, including exploitation of a password reset process that relies on the victim's mobile phone, using either SMS or phone call. We evaluated the PRMitM attacks on Google and Facebook users in several experiments, and found that their password reset process is vulnerable to the PRMitM attack. Other websites and some popular mobile applications are vulnerable as well. Although solutions seem trivial in some cases, our experiments show that the straightforward solutions are not as effective as expected. We designed and evaluated two secure password reset processes and evaluated them on users of Google and Facebook. Our results indicate a significant improvement in the security. Since millions of accounts are currently vulnerable to the PRMitM attack, we also present a list of recommendations for implementing and auditing the password reset process.", "pdfUrls": [ "https://www.ieee-security.org/TC/SP2017/papers/207.pdf", "https://doi.org/10.1109/SP.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34285a71b3c468a3a6b26793c8921fcd4dab8b14", "sources": [ "DBLP" ], "title": "The Password Reset MitM Attack", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "345533e1f72f3f9e215e1fc468a3131a90481414": { "authors": [ { "ids": [ "7986482" ], "name": "Yuan Xiao" }, { "ids": [ "2375328" ], "name": "Mengyuan Li" }, { "ids": [ "10414771" ], "name": "Sanchuan Chen" }, { "ids": [ "39939156" ], "name": "Yinqian Zhang" } ], "doi": "10.1145/3133956.3134016", "doiUrl": "https://doi.org/10.1145/3133956.3134016", "entities": [ "3-D Secure", "Adversary model", "Central processing unit", "Channel (communications)", "Ciphertext", "Confidentiality", "Control flow", "Cryptographically secure pseudorandom number generator", "Cryptography", "Ecosystem", "Encryption", "Global Positioning System", "GnuTLS", "Graphene", "Hockey Night in Canada", "Intel Developer Zone", "Kernel (operating system)", "Library", "Library (computing)", "Open-source software", "OpenSSL", "Operating system", "Padding oracle attack", "Programming paradigm", "Public-key cryptography", "Random oracle", "Secure communication", "Side-channel attack", "Transport Layer Security" ], "id": "345533e1f72f3f9e215e1fc468a3131a90481414", "inCitations": [ "32657d005f4501fecefbab7276f6aad32afb766c", "680fa994443080d43e7452f1137b339d5f74cc03", "6db9824d4667b22310c51fe638403238f873e9f2", "26edca5c337b6b6ec4416356f270c35dc074057d" ], "journalName": "", "journalPages": "859-874", "journalVolume": "", "outCitations": [ "2aa0e44b8529de8ee75138eade8aba0bfb9f008f", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "7189c5e1aab8239a1016f1ec14df4af30047eb59", "85d555f7ce19740b4fc656ff797623c6e1513018", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "208ed7512ea84f22a004920ea0b4c475bc836abc", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "13315d952a43c391bf4910271fc2582858e86e9e", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "0653e2ed9f683868cb4539eb8718551242834f6b", "1495c7daaba55dd2e68e026fc6c1848eee1ee710", "feb5db279d43f6affb474398f96bb5c910aa2340", "558ab57a66e18b9973fc75d5f6ef5d78c1624561", "cef9aef9b73c94eacc55670f3aa8f70329cd4bc6", "691bb92ffd229e2cce7c42c1ede818915afa73ee", "1f0526247a8b393b981abbe539faf3e20390d860", "3ca5880e4fe23ec2ee8025ff6c121ebb5348c6fc", "35ed2649c1808293dd316cb6520749957b8cfc48", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "e5e1327ef05b629e5015631b562716ea2e024d1f", "05ae289245b5a9222a1a6fc3f36910c3cb0f4662", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "582302da008255ff515f05c3242f750878725745", "16fad84b5cd76c403c94b16353fa6a4d64f19251", "46b2b9f10c52e83b57e60a224696296551f317ea", "3702d43d62b6154773d573594e21a39bbed93271", "565a174a24e7f47dcd7a21f57cabc252b5692a0f", "4fcc61b190c3797de5b4fbc120ff1305d1461086", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "0e98262b7d5d42ad0f7f7f83a5be5107f6f24df5", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "33c1a2f0bd5140300d40cc8316c480e3aa5d63cb", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "b6f5251a67c5cf8539c0213c387a583cdcefd493", "cb3fff16823e134f9cf4124aaac4b80bcd5a8d09", "05f70f429a7bf38efa9e457fd486cb862bd495be", "3d6f626d383048fda0ac5b56864141f2521dd38f", "41c2c11acde144ccf62cb6eff30731195d22775b", "35d603028fe164b46e71898d6780811a84d18c1e", "23eb53170c6de9ff5024db120eda200816fa803f", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "d296252ddf0e2c6b7422008d703843c1863bd15b", "0957332f8beb1ec4071fcb6fc44cb0b5396463d5", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "452c803f91ab670bf36403ed5412875b13ae9e94", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "20086842060b22285da69c689a0b3a3a9f1db386", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "72880d15db2282512e5d3f0a3796b397d68cc7db", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "46e46c77423fcaf6e4a435fecca4430b1e78bd5d", "94206be34b27903edd8e6c35efde0820750c525c", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "615168555150d80752a1c195229642acbe6fb3d9", "792ee164ef93370d859a4ec4cea903d382ea6dcb", "71691ee2dbe001d599334e5389d80dd32c44a74e", "6e9672dd24ee686fba44969ddd181506d5a8f92f" ], "paperAbstract": "Intel Software Guard Extension (SGX) offers software applications a shielded execution environment, dubbed enclave, to protect their confidentiality and integrity from malicious operating systems. As processors with this extended feature become commercially available, many new software applications are developed to enrich to the SGX-enabled ecosystem. One important primitive for these applications is a secure communication channel between the enclave and a remote trusted party. The SSL/TLS protocol, which is the de facto standard for protecting transport-layer network communications, has been broadly regarded a natural choice for such purposes. However, in this paper, we show that the marriage between SGX and SSL may not be smooth sailing.\n Particularly, we consider a category of side-channel attacks against SSL/TLS implementations in secure enclaves, which we call the control-flow inference attacks. In these attacks, the malicious operating system kernel may perform a powerful man-in-the-kernel attack to collect execution traces of the enclave programs at the page level, the cacheline level, or the branch level, while positioning itself in the middle of the two communicating parties. At the center of our work is a differential analysis framework, dubbed Stacco, to dynamically analyze the SSL/TLS implementations and detect vulnerabilities-discernible execution traces-that can be exploited as decryption oracles. Surprisingly, in spite of the prevailing constant-time programming paradigm adopted by many cryptographic libraries, we found exploitable vulnerabilities in the latest versions of all the SSL/TLS libraries we have examined.\n To validate the detected vulnerabilities, we developed a man-in-the-kernel adversary to demonstrate Bleichenbacher attacks against the latest OpenSSL library running in the SGX enclave (with the help of Graphene) and completely broke the PreMasterSecret encrypted by a 4096-bit RSA public key with only 57286 queries. We also conducted CBC padding oracle attacks against the latest GnuTLS running in Graphene-SGX and an open-source SGX implementation of mbedTLS (i.e., mbedTLS-SGX) that runs directly inside the enclave, and showed that it only needs 48388 and 25717 queries, respectively, to break one block of AES ciphertext. Empirical evaluation suggests these man-in-the-kernel attacks can be completed within 1 or 2 hours. Our results reveal the insufficient understanding of side-channel security in SGX settings, and our study will provoke discussions on the secure implementation and adoption of SSL/TLS in secure enclaves.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134016", "https://arxiv.org/pdf/1707.03473v1.pdf", "http://arxiv.org/abs/1707.03473", "https://arxiv.org/pdf/1707.03473v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/345533e1f72f3f9e215e1fc468a3131a90481414", "sources": [ "DBLP" ], "title": "STACCO: Differentially Analyzing Side-Channel Traces for Detecting SSL/TLS Vulnerabilities in Secure Enclaves", "venue": "CCS", "year": 2017 }, "3470761bbe2c48b763a31a684abaeb8c99596b7c": { "authors": [ { "ids": [ "1682750" ], "name": "Yehuda Lindell" }, { "ids": [ "2192482" ], "name": "Ariel Nof" } ], "doi": "10.1145/3133956.3133999", "doiUrl": "https://doi.org/10.1145/3133956.3133999", "entities": [ "Adversary (cryptography)", "Adversary model", "Arithmetic circuit complexity", "Compiler", "Computation", "Denial-of-service attack", "Experiment", "Fairness measure", "LiveCode", "Secret sharing", "Secure multi-party computation" ], "id": "3470761bbe2c48b763a31a684abaeb8c99596b7c", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "816", "journalVolume": "2017", "outCitations": [ "04948723dec0e6724777ee56f0d10168cce44921", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "19c3736da5116e0e80a64db35afe421663c4b4a8", "23e2071ddc2cfd7872839716260eb23a5fd3a821", "6a03239a737bd3e923878043d51f56508ff11b13", "796ff7cef7dcd8b9c577a86473fc1067e1078144", "13e622fca1a6b52aa85898e260f9455e4ba0d94b", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "6d5ba62ac58083fd47739ae09df5bbfcb18f681d", "4701faa37e2c5ba5d1810b9b39db28bbcf4a230e", "28b5bbfb9f1b4738d6ed567d62fdfe52be9cb11e", "7dd5a9a774b96ef8f551ded6418fe8adf28e8952", "470d8902d1e250923e9fb0289b484cc9b2149abb", "42333e3f231bbfe508f6da6bad2feff9ae223113", "2f5f19d59342dee92fd9a52e235ad30300f7f56e", "bcb49a06e4fb7ea831257e146073d84234f4d238", "61883fbd35396888924520e109355e912337d2b8", "28e9eaaeaa886b0896744e71f7f8a474e98f80bb", "15799e114a32f53f67e0a18edd347aa831745bd5", "9888dff0c68e01d8d1ec5cef5033e3f3b896ea3d", "4a77fe1e0f42d44339bf0f690deddab56ce0aa7c", "94f133780f7c4b09e2513628e5cebe67c009b7d5", "8651440f3f6e1e0d8a29564c0135673e9dd13829", "6223684e14778e4d7948e994d2169ebf38e0a95f", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "33148623fc14ea5735e73dd716d030ab17118299", "37d41c44e034a282820f698bb70cf15c2083a9ab", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "3e52ecdd8d547ee03a6132d3ee27aab11a54403e", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "d04f7f8eed11e5e58a41e314b00e49d7424d82ec", "18f5d7663632c92c84f89151823dff2120ae43cf", "5432a4eae229c4304dd20645e4491286b05295e8", "a9ca6a9079bcb5c513ebf63a029d7cdbb8245fa3", "a853e0842d74fa3ff146f45ea7f2ed52dac08d1a" ], "paperAbstract": "Protocols for secure multiparty computation enable a set of parties to compute a function of their inputs without revealing anything but the output. The security properties of the protocol must be preserved in the presence of adversarial behavior. The two classic adversary models considered are semi-honest (where the adversary follows the protocol specification but tries to learn more than allowed by examining the protocol transcript) and malicious (where the adversary may follow any arbitrary attack strategy). Protocols for semi-honest adversaries are often far more efficient, but in many cases the security guarantees are not strong enough.\n In this paper, we present a new efficient method for \"compiling\" a large class of protocols that are secure in the presence of semi-honest adversaries into protocols that are secure in the presence of malicious adversaries. Our method assumes an honest majority (i.e., that t<n/2 where t is the number of corrupted parties and n is the number of parties overall), and is applicable to many semi-honest protocols based on secret-sharing. In order to achieve high efficiency, our protocol is secure with abort and does not achieve fairness, meaning that the adversary may receive output while the honest parties do not.\n We present a number of instantiations of our compiler, and obtain protocol variants that are very efficient for both a small and large number of parties. We implemented our protocol variants and ran extensive experiments to compare them with each other. Our results show that secure computation with an honest majority can be practical, even with security in the presence of malicious adversaries. For example, we securely compute a large arithmetic circuit of depth 20 with 1,000,000 multiplication gates, in approximately 0.5 seconds with three parties, and approximately 29 seconds with 50 parties, and just under 1 minute with 90 parties.", "pdfUrls": [ "http://eprint.iacr.org/2017/816", "https://eprint.iacr.org/2017/816.pdf", "http://doi.acm.org/10.1145/3133956.3133999" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3470761bbe2c48b763a31a684abaeb8c99596b7c", "sources": [ "DBLP" ], "title": "A Framework for Constructing Fast MPC over Arithmetic Circuits with Malicious Adversaries and an Honest-Majority", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "34a5aa85c26bf8142148aab3ccfbdab6535b6a0a": { "authors": [ { "ids": [ "6516728" ], "name": "Phuong Nguyen" }, { "ids": [ "17812673" ], "name": "Steven Konstanty" }, { "ids": [ "38583552" ], "name": "Todd Nicholson" }, { "ids": [ "2203385" ], "name": "Thomas O'Brien" }, { "ids": [ "4066299" ], "name": "Aaron Schwartz-Duval" }, { "ids": [ "7596836" ], "name": "Timothy Spila" }, { "ids": [ "1688353" ], "name": "Klara Nahrstedt" }, { "ids": [ "1687256" ], "name": "Roy H. Campbell" }, { "ids": [ "1722544" ], "name": "Indranil Gupta" }, { "ids": [ "2169061" ], "name": "Michael Chan" }, { "ids": [ "2433690" ], "name": "Kenton McHenry" }, { "ids": [ "40263285" ], "name": "Normand Paquin" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Data acquisition", "Data curation", "Digital data", "Experiment", "Real-time data" ], "id": "34a5aa85c26bf8142148aab3ccfbdab6535b6a0a", "inCitations": [ "e1460607eda2df80ce69a031dcadba4225d95434" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "11-20", "journalVolume": "", "outCitations": [ "16287f07e76eaec9cfe06c76c859161b2607e7ef", "357dda6d8d0fd4503d5517a415a972d28a062e28", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "e4ec707578ad735fcdb28391e7c9593951603032", "404ec4548bae645f0e1ec03b5f935c5611216773", "2fcc056c68f9d46c3f0d4ed2b1ef8cf4f9bff4f0", "2738be2b43e83bc472d5f6fd79e01fc6a76209d0", "6013bcebbd8f86b29db74b92ce52d0d5e4f67191", "72da36b47c94c6eac54a8a63291415a4314b1f29", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "eadbb2004ffaa92b665df6e4ac2477bc628cb8db", "03363ed04e9d4d2e8c9348551815e80615969611", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "a7f3469085bf4a5c2b67e6a678540ad4d33883d3", "8688c96c558f20f65ca886f7ce1470ce63aec63c", "e9d2a035dd1af0ab06b1c95c4b668c5746e590e0", "5053d80a916aa6be5d1f2253a5f420954da7a3e4" ], "paperAbstract": "In this paper, we present a data acquisition and analysis framework for materials-to-devices processes, named 4CeeD, that focuses on the immense potential of capturing, accurately curating, correlating, and coordinating materials-to-devices digital data in a real-time and trusted manner before fully archiving and publishing them for wide access and sharing. In particular, 4CeeD consists of novel services: a curation service for collecting data from microscopes and fabrication instruments, curating, and wrapping of data with extensive metadata in real-time and in a trusted manner, and a cloud-based coordination service for storing data, extracting meta-data, analyzing and finding correlations among the data. Our evaluation results show that our novel cloud framework can help researchers significantly save time and cost spent on experiments, and is efficient in dealing with high-volume and fast-changing workload of heterogeneous types of experimental data.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101115", "https://www.ideals.illinois.edu/bitstream/handle/2142/91007/4CeeD_technicalreport.pdf?isAllowed=y&sequence=2" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34a5aa85c26bf8142148aab3ccfbdab6535b6a0a", "sources": [ "DBLP" ], "title": "4CeeD: Real-Time Data Acquisition and Analysis Framework for Material-Related Cyber-Physical Environments", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "34a9eba074b1439d972541ffcffe70d90bac02aa": { "authors": [ { "ids": [ "2608031" ], "name": "Albert Kwon" }, { "ids": [ "1715309" ], "name": "Henry Corrigan-Gibbs" }, { "ids": [ "1695217" ], "name": "Srinivas Devadas" }, { "ids": [ "4920811" ], "name": "Bryan Ford" } ], "doi": "10.1145/3132747.3132755", "doiUrl": "https://doi.org/10.1145/3132747.3132755", "entities": [], "id": "34a9eba074b1439d972541ffcffe70d90bac02aa", "inCitations": [ "19a90a5f759bf7c7de7df13590246c926c65895c", "016d1541f81655d3c193aafcfb3e9fab64dba2b3" ], "journalName": "", "journalPages": "406-422", "journalVolume": "", "outCitations": [ "09af9108cb5c196d5c15a6f3d26e604434203bea", "56e9649bb6c33dce327b808ebd1ab7236099e110", "8cbc90cc22bb8e4aab312cb5f22c4db31325391d", "23864b54c1ee459fc39664ce947c0595794ea0b6", "54eeba9b6b2c464f1ee8b8168add49352209c9a7", "d53c7989acf948fec62d4b1cfcb6c328df048638", "2fc986fd942797c0bcbebf01f464b375f1dd464d", "8eaef162b486b59ba310881d77c1ee2de0eb53ab", "266681c25e3d67e0322249ad0a2f23f45f8f6c28", "3945bcc35b2b82c593984503f9ba4c32562e57dd", "c5e33aea15b656a97607593b75f86536be5239f5", "9d2c1271f1219522d13f150c2b04123bef300dd9", "5b566b58184e302e1bd364903010fcc55a226fd3", "78ff315b118aec23d9759841b53263fb49307ada", "c19bc1e9f84d6baefec08caa1eba6bb41aa97e47", "2949851ab9827fdd334ecc3b392296df2aacaf92", "17570a014c53a0d88af9c5e1ded67b82b82caf18", "78e2d6b7a671d8e53f207adff088833fd7606e13", "2ef8cff0eb4afcc2ad611f664d5d1ddf75e022a6", "6e8cf181b6e4d759f0416665a3a9f62ad37b316c", "26e86abf586ffd17d38692d5c5c4311d3e2b32f5", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "0708b1989b2e508416dddfd77b469351152b32c5", "2b5e90bc36ded540baf24cdc8a5bb8e66032bc08", "bedd4ccdaa4f35c58b1b333202b8156715ce60a5", "a513c22df84d752391f050fa8e004ba2630409d4", "33bcd8da1f6dc589cde6415434175548fd527ef7", "47ee59d3ee71a798c1f84e8b88cab89c5afcb636", "b1fa37ec7cf8c76ed30961a86019bb78073f6287", "02dad9c51e3a2e2117ffc41d624de4a090271d1f", "60d6ac52ef063d01cea47601e9b9bde1e3148440", "2e4b61ca5ff7af8743e4365edeb40cd87df15c5a", "e95cd876170c1bf0a35f21e7c6d98946698bcaf5", "dc52aabb6483298397518a996112977dcb76a164", "899eaaeccb304c15deaf727841a10eb63c08bead", "0587b0708e48cf9fff42ff6f62edc04dfcf34bb7", "03a9f96a5e95587ab319fb3bddb931ee84fb648d", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "345947186f190649c582204776071ac9a62e8d67", "a089defc1eea22b4d3afaeccf031ae110d7af459", "81547539c2c8db766cf2fb82f7d34687a342569e", "18b1c62d6c7fa0e619f0c13172d8852b3d5a71fe", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "1cf87af22b3b4dd0ff1144d861e0573121d8de2e", "566333376dd1af014555b4cf82cda42c22501013", "ccaf799c227b79152058c24eab158187e2586a04", "406a37d8ccb6cb1355b7aeded65e50fc00b2977c", "108747579aef6bf029623639a86070feaf5cad41", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "8fff335957604e0790b5b8591c9e335936272fd9", "9b2c3acc1806ccfdbae67bc0a353692f0ed31091", "8d69c06d48b618a090dd19185aea7a13def894a5", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "20ef4778cd48f946bfa63ffb18332199fd3f2ad5", "096a2026fa46abd43143e8ae3dc0ce6414310cf9", "357af3dd66a8ee994f17c890422fda1b618586d3", "ebae9c7d91ea8b6a987642040a2142cc5ea67f7d", "7e923623aacbd0e28d2ce7d4e4539c75decc8fd6" ], "paperAbstract": "Atom is an anonymous messaging system that protects against traffic-analysis attacks. Unlike many prior systems, each Atom server touches only a small fraction of the total messages routed through the network. As a result, the system's capacity scales near-linearly with the number of servers. At the same time, each Atom user benefits from \"best possible\" anonymity: a user is anonymous among all honest users of the system, even against an active adversary who monitors the entire network, a portion of the system's servers, and any number of malicious users. The architectural ideas behind Atom have been known in theory, but putting them into practice requires new techniques for (1) avoiding heavy general-purpose multi-party computation protocols, (2) defeating active attacks by malicious servers at minimal performance cost, and (3) handling server failure and churn.\n Atom is most suitable for sending a large number of short messages, as in a microblogging application or a high-security communication bootstrapping (\"dialing\") for private messaging systems. We show that, on a heterogeneous network of 1,024 servers, Atom can transit a million Tweet-length messages in 28 minutes. This is over 23x faster than prior systems with similar privacy guarantees.", "pdfUrls": [ "https://arxiv.org/pdf/1612.07841v3.pdf", "http://doi.acm.org/10.1145/3132747.3132755", "https://www.sigops.org/sosp/sosp17/slides/atom-sosp17-slides.pdf", "https://arxiv.org/pdf/1612.07841v2.pdf", "https://arxiv.org/pdf/1612.07841v1.pdf", "https://people.csail.mit.edu/devadas/pubs/atom.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34a9eba074b1439d972541ffcffe70d90bac02aa", "sources": [ "DBLP" ], "title": "Atom: Horizontally Scaling Strong Anonymity", "venue": "SOSP", "year": 2017 }, "34b871ba7fe283a69fd28b641866251fa3c5921e": { "authors": [ { "ids": [ "19322386" ], "name": "Alena Naiakshina" }, { "ids": [ "36045338" ], "name": "Anastasia Danilova" }, { "ids": [ "24010906" ], "name": "Christian Tiefenau" }, { "ids": [ "24048223" ], "name": "Marco Herzog" }, { "ids": [ "1930390" ], "name": "Sergej Dechand" }, { "ids": [ "15484498" ], "name": "Matthew Smith" } ], "doi": "10.1145/3133956.3134082", "doiUrl": "https://doi.org/10.1145/3133956.3134082", "entities": [ "Computer science", "Human error assessment and reduction technique", "Password", "Usability", "Usability testing" ], "id": "34b871ba7fe283a69fd28b641866251fa3c5921e", "inCitations": [ "b1400438b4822d59a64fba31d0dc590306418ac3", "671e039c3ba333393c02877e1ff06e6ad778ea95" ], "journalName": "", "journalPages": "311-328", "journalVolume": "", "outCitations": [ "3d46dbb0da1b4e0ee5b45c46525d9459fb94222d", "9e463eefadbcd336c69270a299666e4104d50159", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "01ddb2881ee1e912ca52c5d59248e451d9827519", "1b05173afeba91da0f8fc3429904de18b4b14fdc", "28fd36a24a38de08532efe7594b2d29f4035fd14", "5f192e4b543662f36d3b70427959f555c20900c4", "16cdcedec9a5fc51d89591034908b8580b911b7c", "10ea51fec6da43703b9a5935a278dca952e20087", "11808ebcb5579a56dcd07267b420a5411536cd0d", "4e07480c8a99307752c8c3b5614e723029921975", "5c3bb603b22c147f294a48cbf7af746e017030db", "b13ea783a9090fba3bc345b0ed595b39c0bf7281", "5ad46d82930280df86f9337cfe53bd9718035488", "f60628636b64c187db1f106823f5af5730b973cd", "11d974e42b5c67696f5858afc467d84a1a3422e9", "0f019a6450df97b7241be109bdb9739c5a969f1a", "71f8163801980fbaa494cb8c149bd7388034c2ba", "cb386915dfab2f665829861d43377d637bf5df05", "423a967be1c58e40466d34b2d9a9445ade19d03a", "e8b16e99dd0b01bd897e11d58ecf4f8085755335", "6209474964e02ba34ed539cf3cee8044048a1bda", "3bb1268a8cd6442b85505dd6b675c4d7372ac53f", "d9891de8530cde280f01a8afb735e271a355ea58", "633b4e8184880b5d4bcbe144e4396fe8efca5605", "2c61a7a2ec8ac2178812fab42a222f35918f47ce", "d1395d4b168697426550bd560e83f8ca1c77192d", "3c97fad7f822a3ae08340e4f8102312f8b9643c0", "8feb09cc758a2bb79f20bd5dc67f027951eefabe", "d4fb399133e3f0625f9276be1d8f054bf6029393", "389f55c5c376db4ce1c88161dca98c329614faa8", "80dc1bcd0d0479b271021a1aa481258669bbc173", "3272595fc86f13c7cce0547f2b464c2befe5a69f", "0b12990c4ed123ab6cdd1141fb34401f259d61aa", "349fd8a34f0d308e30ec4258c924aa1bab7ce23f", "0cb0a2b5dac0972fa6388b2f31f76c89455a10db", "782df863151f3f6c93f5dde08f33577b0061071a", "ffb2003551fa97cde9d82f8219fb2754090e37b3", "f8b27873043cf56f158d013c6175996475a6509d", "d02ecbe31041d38be4febad484994a2c04bd9014", "3df3139d3349bb70e0bb17aacc46da2fec045c31", "6d16c1742df0f56c16c9fb4e407cc3aa83c6efe5", "369d707c6bdf17645a322d0e2bc610798bc8c1b8", "41dbaa15a59c35ed3df64ee26cba6b501a6c425f", "6609a3b07433c59cb8949b77547420d284cfae78", "ab227751db0b40356144fc89a580927656fdba39", "84fc67e73a6b20ea4cacd5eb189a469bc7ea79a2", "8f52060ced8136c9691a44d89e77ff1b1a373682", "983b547e23fb1093876ab2c594a0d9e0259b1b9c", "4fbe19f14bf24b7c32142ec6c627758ac4a18c52", "de6688ceeb8ebdc458b2b205846abd156086363b", "5570835de31f18f294733cb5d1c4e8dbb6e7b582", "cb706f6488b6960bfb742676ac4bfb372a3e055d", "492184989035226d1207801e3e5b9dff8b5f6427" ], "paperAbstract": "Passwords are still a mainstay of various security systems, as well as the cause of many usability issues. For end-users, many of these issues have been studied extensively, highlighting problems and informing design decisions for better policies and motivating research into alternatives. However, end-users are not the only ones who have usability problems with passwords! Developers who are tasked with writing the code by which passwords are stored must do so securely. Yet history has shown that this complex task often fails due to human error with catastrophic results. While an end-user who selects a bad password can have dire consequences, the consequences of a developer who forgets to hash and salt a password database can lead to far larger problems. In this paper we present a first qualitative usability study with 20 computer science students to discover how developers deal with password storage and to inform research into aiding developers in the creation of secure password systems.", "pdfUrls": [ "http://net.cs.uni-bonn.de/fileadmin/user_upload/naiakshi/API_Usability_Developer.pdf", "http://arxiv.org/abs/1708.08759", "https://arxiv.org/pdf/1708.08759v2.pdf", "https://acmccs.github.io/papers/p311-naiakshinaA.pdf", "https://arxiv.org/pdf/1708.08759v1.pdf", "http://doi.acm.org/10.1145/3133956.3134082", "https://users.cs.fiu.edu/~carbunar/teaching/cis5374/cis5374.2017/slides/storage.pdf", "http://net.cs.uni-bonn.de/fileadmin/user_upload/danilova/API_Usability_Developer_1_.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34b871ba7fe283a69fd28b641866251fa3c5921e", "sources": [ "DBLP" ], "title": "Why Do Developers Get Password Storage Wrong?: A Qualitative Usability Study", "venue": "CCS", "year": 2017 }, "34c432c3a4a068e64eb34bb41c4e3e0f3762363c": { "authors": [ { "ids": [ "2526126" ], "name": "Baptiste Lepers" }, { "ids": [ "1711100" ], "name": "Willy Zwaenepoel" }, { "ids": [ "2160840" ], "name": "Jean-Pierre Lozi" }, { "ids": [ "3238041" ], "name": "Nicolas Palix" }, { "ids": [ "22771336" ], "name": "Redha Gouicem" }, { "ids": [ "2052509" ], "name": "Julien Sopena" }, { "ids": [ "1793096" ], "name": "Julia L. Lawall" }, { "ids": [ "1715488" ], "name": "Gilles Muller" } ], "doi": "10.1145/3102980.3102984", "doiUrl": "https://doi.org/10.1145/3102980.3102984", "entities": [ "Formal verification", "Multi-core processor", "Operating system", "Scheduling (computing)" ], "id": "34c432c3a4a068e64eb34bb41c4e3e0f3762363c", "inCitations": [ "d5e72eb9d5eb35303b0885612f74a0b230355b35" ], "journalName": "", "journalPages": "18-23", "journalVolume": "", "outCitations": [ "23ee1c97c4a1229618bf6a614b02f33dc678fe6b", "8761ff5d92737f409e5d1d326967892b3bd24371", "1d0f2662cca5c859419b78fea468f4bc2f39e87d", "1bcd5669ec9b19a8af40c9bacf980610ff39907b", "92fd6bf920ac934218253085be239ce77b4aa3c4", "12a760827e2531d3bfef437fbdd88178e717c961", "18e020e199c735428556834fff06e2304dc100b3", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "07d50264195a7bebb394cac60866cccfc4689e13", "9aa2587c1b61535a1e848d1f0b8d680221374861", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "22a3f0837bd6a913f516ba497469176be641c7d4", "0fc4415291af1e74f23dfcf3ba3ab192c6649a79", "6d04b5a9559199c483b696abac683c6d720cc61d", "dd916d401b90d848dd0c1a99d78c034e3c8bb448", "04d6f78e14a92fa72bcefc206c24b2df7b27e5e6", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "05aa7c516dc49ae567fd8f376a389cc3fe9dcd80", "36a0e9c2be06f5ec6a89d8cc7e2793e33e0f6efb", "45ac0e85b3ff21bc12a7147df167be38f0d24b9c", "513bd1e5ec39f711f212d2105af3ee03dea4b53d", "1e6cd21fee09c029447b597d6146a229a9d1377b", "013989a60ef17d389252530bc25e68005376272b", "16a455aeacd14529bee92b0c197619fa2d173151" ], "paperAbstract": "Operating systems have been shown to waste machine resources by leaving cores idle while work is ready to be scheduled. This results in suboptimal performance for user applications, and wasted power.\n Recent progress in formal verification methods have led to operating systems being proven safe, but operating systems have yet to be proven free of performance bottlenecks. In this paper we instigate the first effort in proving performance properties of operating systems by designing a multicore scheduler that is proven to be work-conserving.", "pdfUrls": [ "https://infoscience.epfl.ch/record/227943/files/main.pdf", "http://doi.acm.org/10.1145/3102980.3102984" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34c432c3a4a068e64eb34bb41c4e3e0f3762363c", "sources": [ "DBLP" ], "title": "Towards Proving Optimistic Multicore Schedulers", "venue": "HotOS", "year": 2017 }, "34d7cbb53b5fa6d1885d0f7601b2eddd058d5b78": { "authors": [ { "ids": [ "21274233" ], "name": "Sandeep R. Agrawal" }, { "ids": [ "3273774" ], "name": "Sam Idicula" }, { "ids": [ "2571988" ], "name": "Arun Raghavan" }, { "ids": [ "2446731" ], "name": "Evangelos Vlachos" }, { "ids": [ "1804735" ], "name": "Venkatraman Govindaraju" }, { "ids": [ "2568313" ], "name": "Venkatanathan Varadarajan" }, { "ids": [ "1977332" ], "name": "Cagri Balkesen" }, { "ids": [ "1707630" ], "name": "Georgios Giannikis" }, { "ids": [ "31644216" ], "name": "Charlie Roth" }, { "ids": [ "37495353" ], "name": "Nipun Agarwal" }, { "ids": [ "3243326" ], "name": "Eric Sedlar" } ], "doi": "10.1145/3123939.3123985", "doiUrl": "https://doi.org/10.1145/3123939.3123985", "entities": [ "Atomicity (database systems)", "Big data", "Computation", "Double data rate", "Exabyte", "Hardware acceleration", "In-memory database", "Intel Core (microarchitecture)", "Limiter", "Manycore processor", "Memory controller", "Performance per watt", "Processor design", "Remote procedure call", "Shared memory" ], "id": "34d7cbb53b5fa6d1885d0f7601b2eddd058d5b78", "inCitations": [ "108d3e6081695a0901a36c62dc4a17d9a93d0110" ], "journalName": "", "journalPages": "245-258", "journalVolume": "", "outCitations": [ "bcb288389d4318494887fe20ee68b6b18f39a3a5", "6160c4fccece40854d1c992eeeb1ff4ecac9d2cd", "43b319ab2e717a1a711d6b06ea9598ea042afe1a", "5821447d63168b6a19ff534028a4aee8ace16747", "03e93625d185c0ac144c97fdf269b5ae5f38351e", "4954fa180728932959997a4768411ff9136aac81", "c0b438eee7bd423606da9335229602b9c77c10d4", "cc05fdb70b630138dd9b64a901eec9c36146c371", "19867fdcb493dbff53c5e36b28b50e1366bdf007", "0f16f6f478b5c788dce466eb50e36c612273c36e", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "40913f24add55cb2d166242b9dfe0044d8411035", "b09723a5ba7d3b99f67550212957c79532a0db3f", "15c0ef33776dffdafd66cb212b88f394d14b9b01", "60cfe41fd68644fb19cba99babae694a2acacc17", "e8283537aa41f07b9cedb016d25cc2c0d713226b", "efa5558bddd68abe4adc81adbbef6f739e648392", "08639cd6b89ac8f375cdc1076b9485ac9d657083", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "66fd2f7f4e4aa6f890a68241c9194f97348c59d9", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "60a46436e466cc81f2da1443e0d98ba750337f0d", "9141bafcff1df2dbabf9a20671d2fa1bcb55aae5", "74efa2599d603cc91618c52e586f25c01e4089b8", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "90b2e3db0f9a2d783382e6cfe69c927c56efc82d", "3c75dd2f34997749104cf7e18378ea58713b86b2", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "2238562e6b65b0bb1a43e2f3041fe85ee8bb7ec4", "0258946b2046bec0d52d1e5b90204af335ce870e", "1f7d29b2a8918b3d57c1ffe8b4947ef6fe3dfc67", "c630a97772d9b5d2380d409b2ef2c28dced5e392", "133eacaf0ad25b8364cb4510007d9363298e8adf", "0139dceb6cef21b234e454d53154f30391495862", "93df052cf26fa878a2381b5ecf8f5649a01267f3", "9d2c683bf5227eda7110d8a746ccc971dbad4c0d", "1bed30d161683d279780aee34619f94a860fa973", "3a237a55c2c1895f8d6ef9a41013cfda9e4de462", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "06ce77e4abea63948580340be25d7f2a80369e5a", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "6b37f1d8c7afc971c96c78afa99ec9daef357e85", "5f0a21670abe1634780d27f60228a83a6a8232ad", "1be969b6bc855741434eea9cfb9700d99398e146", "1d3b7546a9c45bad6bb0f53c8ce2b776e31af724", "7d3c9eb93a673ba8fd543f857779091499cb01da", "269c24a4aad9be622b609a0860f5df80688c2f93", "06060217245f7f4678982fa5ea453da99afa7dbe", "30db3e0e6add0c2c699e863e56eb8b5e89b10951", "4139eedda8717ffd60052f68ed78b996aaebfced", "26a7c5cd92c018f8348c1424e10042811ec15148" ], "paperAbstract": "For many years, the highest energy cost in processing has been data movement rather than computation, and energy is the limiting factor in processor design [21]. As the data needed for a single application grows to exabytes [56], there is clearly an opportunity to design a bandwidth-optimized architecture for big data computation by specializing hardware for data movement. We present the Data Processing Unit or DPU, a shared memory many-core that is specifically designed for high bandwidth analytics workloads. The DPU contains a unique Data Movement System (DMS), which provides hardware acceleration for data movement and partitioning operations at the memory controller that is sufficient to keep up with DDR bandwidth. The DPU also provides acceleration for core to core communication via a unique hardware RPC mechanism called the Atomic Transaction Engine. Comparison of a DPU chip fabricated in 40nm with a Xeon processor on a variety of data processing applications shows a 3× - 15× performance per watt advantage.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123985", "http://pages.cs.wisc.edu/~venkatv/MICRO-50_223-Camera-Ready.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34d7cbb53b5fa6d1885d0f7601b2eddd058d5b78", "sources": [ "DBLP" ], "title": "A many-core architecture for in-memory data processing", "venue": "MICRO", "year": 2017 }, "34f7dcf1f75a398da5ab6bdf62145accb64c8971": { "authors": [ { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "1958455" ], "name": "Louis H. Howell" }, { "ids": [ "1998865" ], "name": "David B\u00f6hme" }, { "ids": [ "33950551" ], "name": "Ian Karlin" }, { "ids": [ "32757734" ], "name": "Edgar A. Le\u00f3n" }, { "ids": [ "2383364" ], "name": "Misbah Mubarak" }, { "ids": [ "2546078" ], "name": "Noah Wolfe" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" }, { "ids": [ "3062202" ], "name": "Matthew L. Leininger" } ], "doi": "10.1145/3126908.3126967", "doiUrl": "https://doi.org/10.1145/3126908.3126967", "entities": [ "Computation", "Experiment", "Fat tree", "Inter-process communication", "Interference (communication)", "Job Control Language", "Library", "Network topology", "Scalability", "Simulation", "Tree network" ], "id": "34f7dcf1f75a398da5ab6bdf62145accb64c8971", "inCitations": [], "journalName": "", "journalPages": "50:1-50:13", "journalVolume": "", "outCitations": [ "0a9c8fef61634e392f9de6f34361cc1c690f7a00", "db93ae396b804437b10cfa2fad57ea01d3091eaf", "a665aff6b9a3b7ac8a8b6cd483904710e3ae1971", "9a981a5f95c70666b6d9ece6522e7d2894631ed2", "256774b46b3265ae950ea3717e5a2d0c51ab2b55", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "33715194bf741fe17d6f6b9559af694907c26d2a", "288cd78a14e88a4064d5dc4646aa93a36b387989", "5a8e3f36ad9db8756e735e3281489fbd18df6f65", "d6d6793a7049b810a0b1dbb4f6a4d517e69244d7", "75d69c48f94d16eccf1f8c253ac1ab31fa8c0c35", "f57ac7f53438b2877022125bac957fda2bb2a97b", "07ea038993d22482ebac87d60009bb30bdfd4f7f", "1190febbdff90a6ded3832507a9dd17f5898b9c4", "70f670c7572dc28a450c6c5f90b5bf497b74bb2f", "c39c26d510c1a965c5f132edc989a598ca92b700", "373b88e34295875fdab7f6cdee1438edbd0571cb", "c64582aec1819679d48f0acf5a8d4edca5ce74cb", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "e1d29194ff677c7426d65af3f5b6fec3ffa182b6", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "89cb312f9978c04e6a4259f11af53db13fc36c63" ], "paperAbstract": "The fat-tree topology is one of the most commonly used network topologies in HPC systems. Vendors support several options that can be configured when deploying fat-tree networks on production systems, such as link bandwidth, number of rails, number of planes, and tapering. This paper showcases the use of simulations to compare the impact of these design options on representative production HPC applications, libraries, and multi-job workloads. We present advances in the TraceR-CODES simulation framework that enable this analysis and evaluate its prediction accuracy against experiments on a production fat-tree network. In order to understand the impact of different network configurations on various anticipated scenarios, we study workloads with different communication patterns, computation-to-communication ratios, and scaling characteristics. Using multi-job workloads, we also study the impact of inter-job interference on performance and compare the cost-performance tradeoffs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126967" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/34f7dcf1f75a398da5ab6bdf62145accb64c8971", "sources": [ "DBLP" ], "title": "Predicting the performance impact of different fat-tree configurations", "venue": "SC", "year": 2017 }, "3502a22cd34038377c18f2cfae0207525bbbbecb": { "authors": [ { "ids": [ "3214936" ], "name": "Ramanujan K. Sheshadri" }, { "ids": [ "1785674" ], "name": "Karthikeyan Sundaresan" }, { "ids": [ "2573539" ], "name": "Eugene Chai" }, { "ids": [ "1755986" ], "name": "Mohammad Ali Khojastepour" }, { "ids": [ "2083839" ], "name": "Sampath Rangarajan" }, { "ids": [ "2706303" ], "name": "Dimitrios Koutsonikolas" } ], "doi": "10.1145/3143361.3143373", "doiUrl": "https://doi.org/10.1145/3143361.3143373", "entities": [ "Blu-ray", "Coexist (image)", "Compaq LTE", "Interference (communication)", "MIMO", "Multi-user", "Multi-user MIMO", "Multiplexing", "Printing", "Scheduling (computing)", "Simulation Interoperability Standards Organization", "Speculative execution", "Telecommunications link", "Throughput", "Time complexity" ], "id": "3502a22cd34038377c18f2cfae0207525bbbbecb", "inCitations": [], "journalName": "", "journalPages": "15-27", "journalVolume": "", "outCitations": [ "0e60a50161d9df6c1a0fa4ebb70fd636e08e80be", "205443fb07540cfca5e581476617589b2a60d996", "0ea1c736d76fdfccfd5b4c0a4cdd65ea7daf2019", "0e7d8823cfd5486357fbfa6ee8986233cd382c43", "98643418a8d12388d54347d264fd85631c44d572", "5475ebd986deddb62893ce3ee921299ad6f4b85f", "e95bb9f5e55f2b0f41d7f7acf6fcf795def80682", "31cc47d919588c06eefd1763a9c0bd4e5130d019", "ad873879b9f15657a57fbcb929b43dc172d1cec1", "1c825d7ae8877e58b00da31c50926e5d13bedd9c", "6fd51b17d312045cbc20b688b19306f0398de23e", "3dc657fa70abe74f011f6621b9a3da47f83d7cbf", "9212cc1af29ba5ea69949758c65fc3ba3232ea85", "6c5c49a63711214939f781542000314234e7cbb8", "235163250cd014eefd5f44dabd7e406230624764", "4e001febd8698985f13799383c2453aa4ce2910c", "4bd01656686e08d5b0f0e4405cf317781ce5674c", "59c32294c2817504af1bccaef9f25a212fa1d30f", "0f27c45d6f631d3a9966e462621518d36ede6020", "bbb906c8f088ab7b7ff6b83bec170f7a0cb7dca1" ], "paperAbstract": "Deploying LTE networks in unlicensed spectrum requires us to move beyond coexistence mechanisms and understand the suitability of LTE's synchronous operation in a spectrum that is governed by asynchronous access principles. Our study reveals a fundamental conflict in LTE uplink access that arises between the scheduled nature of LTE's multi-user transmissions -- critical for leveraging the diversity (OFDMA) and multiplexing (multi-user MIMO) gains -- and the asynchronous nature of interference on the clients. The result is a significant loss in spectrum utilization and throughput that scales with the number of interfering terminals.\n To tackle this critical challenge on the LTE uplink, we propose Blu. Blu transforms today's LTE schedulers into speculative schedulers that leverage interference diversity across clients to intelligently over-schedule clients on the same spectral resources to prevent this utilization loss. Blu's challenges lie in how to over-schedule appropriate clients on the same resources without paying the penalty of collisions, while containing the exponential overhead incurred in measuring the required interference dependencies between clients. The under-pinning of Blu's design includes a novel mechanism to blue-print the very source of interference on LTE clients along with their dependencies, which allows for a constant, significantly reduced overhead. Blu can be realized in today's LTE base stations. Its realization in an enterprise environment with SDRs (hosting LTE release 10) reveals appreciable gains of 1.5-2x in both utilization and throughput over existing schemes for SISO and MU-MIMO transmissions in unlicensed spectrum.", "pdfUrls": [ "https://www.cse.buffalo.edu//faculty/dimitrio/publications/conext17.pdf", "http://www.acsu.buffalo.edu/~ramanuja/blu.pdf", "http://doi.acm.org/10.1145/3143361.3143373" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3502a22cd34038377c18f2cfae0207525bbbbecb", "sources": [ "DBLP" ], "title": "BLU: Blue-printing Interference for Robust LTE Access in Unlicensed Spectrum", "venue": "CoNEXT", "year": 2017 }, "350b7adf480a7740af0466b82759c49eaec4289d": { "authors": [ { "ids": [ "34986234" ], "name": "Keval Vora" }, { "ids": [ "1740747" ], "name": "Chen Tian" }, { "ids": [ "1689014" ], "name": "Rajiv Gupta" }, { "ids": [ "2434904" ], "name": "Ziang Hu" } ], "doi": "10.1145/3037697.3037747", "doiUrl": "https://doi.org/10.1145/3037697.3037747", "entities": [ "Algorithm", "Application checkpointing", "Experiment", "Failure rate", "Graph (abstract data type)", "Programming language", "Snapshot (computer storage)" ], "id": "350b7adf480a7740af0466b82759c49eaec4289d", "inCitations": [ "76774fbc4e9ccbdc3fb9f3916c0be0f3a641f2cd", "ea760de3220b85790c9def82b9f18e1c3702339c", "2f9b520b16e05de8e705e374ebea3a121d64ebc2" ], "journalName": "", "journalPages": "223-236", "journalVolume": "", "outCitations": [ "badae465de041a04ee5b1aa58ce843b1bc85ea01", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "1156f60e40548096df49528b1342bb3e88b0f378", "4af63ed343df388b6353b6fc77c7137d27822bf4", "0558c94a094158ecd64f0d5014d3d9668054fb97", "eb82d3035849cd23578096462ba419b53198a556", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "95e0a5be5d39b5dc820f3d8a21bc94da022760df", "6640c8b144f84fbd9756bfd3a74d8fd52c2d5737", "fb35b5bc1e02de4d9b31176c39247ee9ad6c3290", "62f41341d9ba292877e9e299d6eb70b5435ee8c8", "0371b65e431972dfe0f81573274d10eb8c9eec7f", "41c80483e80fab3a18280da790cec2c8d6060bdb", "3726c60552263e648c6856679e672de2e1c110e5", "2706db42926e0e58e35336331f6d3b62f0811cf5", "9359fa64a59105e93dd6ca9f5aa35e0d9f9055be", "7f6ed0ae932d021cd1b806671e2f5662e291a2fe", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "2a4ca461fa847e8433bab67e7bfe4620371c1f77", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "09031aa6d6743bebebc695955cd77c032cd9192f", "514a5c15e8cf3f681febecad954a4508d9189c99", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "0546fa6622b8b8db8527be777a692d88c5c037b0", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "0706356c9ab6014d6b04577d38289ea8328291a5", "bd4e5420e5b3016eaff344f566bb5a3b034cb20b", "1e8c283cedbbceb2a56bf962bc0a86fd40f1cea6" ], "paperAbstract": "Existing distributed asynchronous graph processing systems employ checkpointing to capture globally consistent snapshots and rollback all machines to most recent checkpoint to recover from machine failures. In this paper we argue that recovery in distributed asynchronous graph processing does not require the entire execution state to be rolled back to a globally consistent state due to the relaxed asynchronous execution semantics. We define the properties required in the recovered state for it to be usable for correct asynchronous processing and develop CoRAL, a lightweight checkpointing and recovery algorithm. First, this algorithm carries out confined recovery that only rolls back graph execution states of the failed machines to affect recovery. Second, it relies upon lightweight checkpoints that capture locally consistent snapshots with a reduced peak network bandwidth requirement. Our experiments using real-world graphs show that our technique recovers from failures and finishes processing 1.5x to 3.2x faster compared to the traditional asynchronous checkpointing and recovery mechanism when failures impact 1 to 6 machines of a 16 machine cluster. Moreover, capturing locally consistent snapshots significantly reduces intermittent high peak bandwidth usage required to save the snapshots -- the average reduction in 99th percentile bandwidth ranges from 22% to 51% while 1 to 6 snapshot replicas are being maintained.", "pdfUrls": [ "https://people.csail.mit.edu/jshun/6886-s18/papers/Coral.pdf", "http://www.cs.sfu.ca/~keval/contents/talks/CoRAL-ASPLOS17.pdf", "http://www.cs.ucr.edu/~kvora001/contents/papers/asplos17-coral.pdf", "http://www.cs.sfu.ca/~keval/contents/papers/coral-asplos17.pdf", "http://www.cs.ucr.edu/~gupta/research/Publications/Comp/asplos17-coral.pdf", "http://doi.acm.org/10.1145/3037697.3037747" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/350b7adf480a7740af0466b82759c49eaec4289d", "sources": [ "DBLP" ], "title": "CoRAL: Confined Recovery in Distributed Asynchronous Graph Processing", "venue": "ASPLOS", "year": 2017 }, "3531157f71ad58c823c6b31f0fd7acb53a292655": { "authors": [ { "ids": [ "1683544" ], "name": "Joshua S. Auerbach" }, { "ids": [ "1728836" ], "name": "Martin Hirzel" }, { "ids": [ "14976116" ], "name": "Louis Mandel" }, { "ids": [ "2100468" ], "name": "Avraham Shinnar" }, { "ids": [ "1753075" ], "name": "J\u00e9r\u00f4me Sim\u00e9on" } ], "doi": "10.1145/3035918.3035961", "doiUrl": "https://doi.org/10.1145/3035918.3035961", "entities": [ "Accident (philosophy)", "Combinatory logic", "Compiler", "Correctness (computer science)", "Program optimization", "Proof assistant", "Query optimization", "Reification (computer science)", "Relational algebra", "Variable shadowing" ], "id": "3531157f71ad58c823c6b31f0fd7acb53a292655", "inCitations": [ "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f", "89ac209aebdded8feec383eea815fa5d2a317286", "c19cad7fdf15e7e581736102babaff7d5b8bfcd0" ], "journalName": "", "journalPages": "1555-1569", "journalVolume": "", "outCitations": [ "341fa0df69b4cef8b1d9b99b983a92b82530a07d", "2df35bc5cc28c1204183ee7dcf6250aea8eb3ce4", "28d0280e2b972155c203b96bd6eb9f826aa73850", "d9c4e4c079891361c9f9921f622b3339bb2bb4d9", "9c7cd4b1f99048f700f690ce372dc7306f448d26", "44f70b768c77f38f90c927b3089c61e55c79909e", "2d93e81f3b77ee974646857d40e61fa99fea190d", "4549a6e24a3159d086d557eab0d5a987a9c5bba3", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "7feed976f5b50606d6d6b3501e40df6e481c0745", "0f30a4f371f8047362be15f8f1dc4061ac50bdb4", "088d9ce683308302ebe67f99c0dd2b0204c6084f", "7ddb0230137648491a1a117fe73105f77485660b", "5f486215a1d315df0196f1d7f82c75e7c3ef2147", "a7ddf51230e5fb77461204a7b1c6c0f2e65407bc", "eb80b83eea907144c111af9d1058c99b6403edeb", "e9e3c0705653fd98402b2fa941eeb49123c6b4b3", "c338a303f73f3a8f7f2526b81d6bbcc68ba018b5", "61672fb2415f4fbb985f171966dd0ea238f84043", "6c866743bfb70fb6872db69053e26d2102830cd9", "35bcafe56b8c6afd0447ffe22612049373dbb783", "301ab40bac3d556e5b6219092ceca56677a67943", "0f642a292c967ad0a251ce1b87d063775f1974fa", "41159bbfc5f839c09fb9282357f7f807355c3f83", "6c4cab747a73625b3c2f96d3f33b7c3329577828", "5608b89b731d80e23a649dcd338202b524fe9bd5", "65f9bd37f9c7f6256a92c04e4fe9bd77f5dee4b0", "8cb5ab7b6697d1713e4b41d03a5cff2b6309dbef", "9ad64edd7b95e9d46cc875e4ae0b0caf716d3390", "080ed793c12d97436ae29851b5e34c54c07e3816", "07a33610dd24db8042b691a6b6a11c9c0a8c5fd1", "75cd0e36bf00844247fc0874eb900b50f479f342", "245afc1cb8c08a3e72fdb67e86ad54f7d06b26e3" ], "paperAbstract": "Algebras based on combinators, i.e., variable-free, have been proposed as a better representation for query compilation and optimization. A key benefit of combinators is that they avoid the need to handle variable shadowing or accidental capture during rewrites. This simplifies both the optimizer specification and its correctness analysis, but the environment from the source language has to be reified as records, which can lead to more complex query plans.\n This paper proposes NRAe, an extension of a combinators-based nested relational algebra (NRA) with built-in support for environments. We show that it can naturally encode an equivalent NRA with lambda terms and that all optimizations on NRA carry over to NRAe. This extension provides an elegant way to represent views in query plans, and can radically simplify compilation and optimization for source languages with rich environment manipulations.\n We have specified a query compiler using the Coq proof assistant with NRAe at its heart. Most of the compiler, including the query optimizer, is accompanied by a (machine-checked) correctness proof. The implementation is automatically extracted from the specification, resulting in a query compiler with a verified core.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035961" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3531157f71ad58c823c6b31f0fd7acb53a292655", "sources": [ "DBLP" ], "title": "Handling Environments in a Nested Relational Algebra with Combinators and an Implementation in a Verified Query Compiler", "venue": "SIGMOD Conference", "year": 2017 }, "3562e3b585eda80839b3c175bb8eef5105e7c2a5": { "authors": [ { "ids": [ "3431458" ], "name": "Debankur Mukherjee" }, { "ids": [ "13546228" ], "name": "Souvik Dhara" }, { "ids": [ "1731931" ], "name": "Sem C. Borst" }, { "ids": [ "2423913" ], "name": "Johan van Leeuwaarden" } ], "doi": "10.1145/3084463", "doiUrl": "https://doi.org/10.1145/3084463", "entities": [ "Algorithm", "Asymptotically optimal algorithm", "Autoscaling", "Centralisation", "Comparison of raster-to-vector conversion software", "Data center", "Elasticity (cloud computing)", "Experiment", "Idle (CPU)", "Load balancing (computing)", "Perceived performance", "Scalability", "Server (computing)", "Simulation" ], "id": "3562e3b585eda80839b3c175bb8eef5105e7c2a5", "inCitations": [ "1d5e78afea2456abbfa6cb9cde092526a5184fb1", "18de5f0afca41e13f9629569ea7a163177196b72", "a98aa7bbd18ccebea80a89fcb66b2946ba95c057", "874d4cf60ceb91ee51d29179f0b37f589ac93363", "ed43642b7ada1805c2f2d18ca975e0bf1ae87173", "4cec5b24eaa7f21df2e4a6b5fd7c299775ceffdf" ], "journalName": "POMACS", "journalPages": "25:1-25:28", "journalVolume": "1", "outCitations": [ "9ef384d453d2f69e168ca10b86e8083a42eceb8b", "30fd94389b20e568d15ec69a5f1b75e48c8845f9", "457e1d17518b5aba6516bdd91cd7938926521795", "93d29db7e7daa91075ec95afc88ad83ba288bd2c", "28c35994ad743fb284bc7410c59fd231e3ce2d77", "0f44833eb9047158221e7b3128cde1347b58ccd6", "ad73deea37cad9a9b945d929a86d82d781450345", "68a71ac53bd422290c79fa0c9ae4b8e221091382", "7aa6e4bd858bf822bde817e653975988bc72230a", "1f809df0c69e4962caf2fd85a1fc59b60f640a03", "38a0bced15718230eeec1f5ffd29ada0f4f10a7a", "2899e535980411e873e7d709d2b0e78973cc24cc", "0b4fe9a2d3a3fed7cc8640c709dbf3820753b07d", "17f7424f3d469b6436e6d3c5158926759fbe822b", "55fd0d580e8bf328ea58b2ebe182a61fbb807625", "234e6be0d4238f76b3ac038ee422be39f391c625", "750a893064474de2b422f7299eac7484dace94cd", "31b2714a574d3ffc0125a17db42c65524240b55e", "2d26f6a4c75a25e984e1061f02b4f5578f1e05a2", "24aa7ab8e6dfcdcf5010e7aaf0672766798355e0", "1d9f3b99d70f3d1ed307bdf102690d4aa5c64ce9", "6c429bd0b68e685af16f98866d05bb6c561289de", "51832de6efb63f50edb3a2ecc2e2b6f2d6fd8f53", "2d42a2b470dfc9d1ee542e7b5cfead1b171dbdd7", "ad3c368c9e40fd4c81f443055833062cea46fdcd", "dcde99889ec984f934ad0054db3597709e0d5e04", "0af92c2ceb5c03452ca303fb4222c6d04568c5ac", "06b6547ffb66748bb79befa03c8780bc1fd5eb91", "b00b32c242f59d8bb41c09db8cbbf8e603c4e8ab", "4331b5ba6adbf6a0f35c26f4f1d2c1189b295e4a" ], "paperAbstract": "A fundamental challenge in large-scale cloud networks and data centers is to achieve highly efficient server utilization and limit energy consumption, while providing excellent user-perceived performance in the presence of uncertain and time-varying demand patterns. Auto-scaling provides a popular paradigm for automatically adjusting service capacity in response to demand while meeting performance targets, and queue-driven auto-scaling techniques have been widely investigated in the literature. In typical data center architectures and cloud environments however, no centralized queue is maintained, and load balancing algorithms immediately distribute incoming tasks among parallel queues. In these distributed settings with vast numbers of servers, centralized queue-driven auto-scaling techniques involve a substantial communication overhead and major implementation burden, or may not even be viable at all.\n Motivated by the above issues, we propose a joint auto-scaling and load balancing scheme which does not require any global queue length information or explicit knowledge of system parameters, and yet provides provably near-optimal service elasticity. We establish the fluid-level dynamics for the proposed scheme in a regime where the total traffic volume and nominal service capacity grow large in proportion. The fluid-limit results show that the proposed scheme achieves asymptotic optimality in terms of user-perceived delay performance as well as energy consumption. Specifically, we prove that both the waiting time of tasks and the relative energy portion consumed by idle servers vanish in the limit. At the same time, the proposed scheme operates in a distributed fashion and involves only constant communication overhead per task, thus ensuring scalability in massive data center operations. Extensive simulation experiments corroborate the fluid-limit results, and demonstrate that the proposed scheme can match the user performance and energy consumption of state-of-the-art approaches that do take full advantage of a centralized queue.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078532", "http://doi.acm.org/10.1145/3084463", "https://arxiv.org/pdf/1703.08373v1.pdf", "http://arxiv.org/abs/1703.08373" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3562e3b585eda80839b3c175bb8eef5105e7c2a5", "sources": [ "DBLP" ], "title": "Optimal Service Elasticity in Large-Scale Distributed Systems", "venue": "SIGMETRICS", "year": 2017 }, "35634a173dc77967a0c10d0578f10fbf808e0972": { "authors": [ { "ids": [ "1730464" ], "name": "Jiepu Jiang" }, { "ids": [ "40249854" ], "name": "Daqing He" }, { "ids": [ "1722517" ], "name": "James Allan" } ], "doi": "10.1145/3077136.3080840", "doiUrl": "https://doi.org/10.1145/3077136.3080840", "entities": [ "Eisenstein's criterion", "Human\u2013computer interaction", "Relevance", "Session (web analytics)", "Telephone exchange", "Usability testing", "User experience", "Utility" ], "id": "35634a173dc77967a0c10d0578f10fbf808e0972", "inCitations": [], "journalName": "", "journalPages": "405-414", "journalVolume": "", "outCitations": [ "26217ccb8c7df509aeb3b80dbb862b0721413745", "ca876bb3b8a2303dcd283c00de8b2f602b501fbe", "72900e0eadbe8ce187f36dbe8bcbab100cf67c06", "c77be34db96695159244723fe9ffa4a88dc4a36d", "833e1468f902e5092e2d2a2974f019827a0c57b9", "48f895a0e9aaf4e59fa4104c0974c82974e6c0c5", "ac1601d6da7197ead5adf44b8d9798d4d8515df5", "26ec0b0a8fae543e19c8bb8cad8fe279a70df89f", "229b9e80568169ac90357428ed3cfadd3513c823", "e95a1ce95b4f0e7b542b70ef80073b9525646717", "1915e0d8a50d111fc974027b33efd4fba0a3ed84", "d0cfaed38630f9a812706c75af94bc8d1db68479", "7ef9f22727abc2c78d5d364995fa4ac75c666f9b", "fad4cf87ca2f3948b7a71b306e321454af7b346b", "39ffe2f42b7d5a972b077b86bbd5ea89d968a869", "890ff7033187d11325329ab12d22e9ca125df94d", "3257cfb4bb041efe583a3c80e4491419d8852275", "0ddc065cc92046857d35555c3af4ade1310aca4a", "245675a3a711dd3e4c48b1effd3f678868fb259d", "3eae360c6ee52950f27f577aedd5f9934a04e137", "07facb9dd8b7aebe39decdc680c62333aacd5d39", "517a461a8839733e34c9025154de3d6275543642", "6d1eb878e1d2530c197c962dd4a61d2aba015261", "61cff88b0ca0a19b5e3bbab41586a11b67b5fadd", "2a6998ce1bc2001bfe796aa3951a15fa499ab795", "3c37515e7037925c3f0a475b03be72dc853b8533", "15004aadabd967ac722a28a9c3bb39cf5bc32605", "201b29dc80cab5c0adb35cf415d2b9a9d15a0ba7", "241e1b42b3733d75466c98324dd051055d3a8687", "e3ebe8742c42212b580f6d3e9db5cd12c95a8434", "1ab758492347723ae8ad20257715f3fd49e75c27", "0a4baa9044f273ba3da73e0e7df19e198781bf6a", "59c56132b8abae36dd0216663cd7ea8b47289dbd", "f904fbdcc35a83002718de07630894af547d8b19", "ed4cbb28e02b3520252fe8089374deffc15800a6", "e43833c5142f254d250e8694ee58cdd22d85a760", "14895145cc7fe8e4bf133bbe8ce2d3c4fd33fef0", "fe56b4181802dbd4a38ea0711a48e24f7e7ee94f", "5c444ac251c2a70b8b70494e1c2fb4400c12839b", "d8a61dfe823cd78f438c422d7e31c636a5101388", "dbcd79bd7edcdcbb5912a50796fc3c2746729eb5", "081b5f02df981029e268a57a0c2acfbb18f0cbcf", "4d9835ba915beaaa01347cc87febe43d48cf6f34", "7aa38947cff9c25f1301f8bf582aa94944b986b0", "7e5e32d3845fd8304841aef51156ffb4d605a0b5", "51f32b1db78ea048149de0407430e8792210fe38", "0c2b756e145e04680ee2e01186d659d6e567985a", "4678d9bd2eb00ed7b1acbab2909feb281b4e470c", "012927ab94f0e542ce137e032b0288bbfbc2f9ca", "4d035638f614dd648ef5c531797f3e826d329f13", "4047d5efd1683bbd3280500c3244149089412024", "2167054da02b0bb2dddfccfa2c60866858478da2", "49dadc93d5fbd349f4bfe45a6fb1350b00a7a926" ], "paperAbstract": "To address concerns of TREC-style relevance judgments, we explore two improvements. The first one seeks to make relevance judgments contextual, collecting in situ feedback of users in an interactive search session and embracing usefulness as the primary judgment criterion. The second one collects multidimensional assessments to complement relevance or usefulness judgments, with four distinct alternative aspects examined in this paper - novelty, understandability, reliability, and effort.\n We evaluate different types of judgments by correlating them with six user experience measures collected from a lab user study. Results show that switching from TREC-style relevance criteria to usefulness is fruitful, but in situ judgments do not exhibit clear benefits over the judgments collected without context. In contrast, combining relevance or usefulness with the four alternative judgments consistently improves the correlation with user experience measures, suggesting future IR systems should adopt multi-aspect search result judgments in development and evaluation.\n We further examine implicit feedback techniques for predicting these judgments. We find that click dwell time, a popular indicator of search result quality, is able to predict some but not all dimensions of the judgments. We enrich the current implicit feedback methods using post-click user interaction in a search session and achieve better prediction for all six dimensions of judgments.", "pdfUrls": [ "http://ciir-publications.cs.umass.edu/pub/web/getpdf.php?id=1272", "http://doi.acm.org/10.1145/3077136.3080840" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35634a173dc77967a0c10d0578f10fbf808e0972", "sources": [ "DBLP" ], "title": "Comparing In Situ and Multidimensional Relevance Judgments", "venue": "SIGIR", "year": 2017 }, "358371444e037519ba8837bed0437985d4dc582e": { "authors": [ { "ids": [ "3490923" ], "name": "Milad Nasr" }, { "ids": [ "1972973" ], "name": "Amir Houmansadr" }, { "ids": [ "2923112" ], "name": "Arya Mazumdar" } ], "doi": "10.1145/3133956.3134074", "doiUrl": "https://doi.org/10.1145/3133956.3134074", "entities": [ "Algorithm", "Compressed sensing", "Computation", "Encryption", "Evasion (network security)", "Fingerprint", "Information sensitivity", "Network packet", "Network traffic control", "Program animation", "Scalability", "Signal processing", "Traffic analysis" ], "id": "358371444e037519ba8837bed0437985d4dc582e", "inCitations": [], "journalName": "", "journalPages": "2053-2069", "journalVolume": "", "outCitations": [ "a67a56e11b77af0fbc10c2613a2cc9325674afa7", "144144d06ceb6df35eeefc3b8ee63653fe4f2479", "04b2734b2b3d9983bbdbe2afbcd59fd65677a693", "0de068a18fe91cf539fd741ae9b35df33f66a09c", "32b83bc42da28a4b8736ba714e9834a41cdb5e4a", "72e9bbc0979bb5c29bd4e49b5674c3d282e8599f", "21f47e1d9078d12de1bd06341619923e8b9d85bb", "03c6142349e9db27979f53e53504658296957415", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "9723342d787bf2677c80b77d8f0e73be98e01d21", "a5d6544de55fb0ea3c91904ff4d021804efd53bd", "53dd59981ee7aa4cbf66ba737d50f5076a148414", "5e2c6fd62545a2b48e3f0fad2c16cea7f79430bf", "d1223386152f3da2a7150038345a67b7c6ecea26", "049504df22c77010e5bb62a2088f70fabc5ecb6d", "1fb34dbc5bfa0b8e9d5848c8b248421d6bf04f71", "b61ffc7f0b4fdeb22288b866cb58ac02f46adcee", "2452c0b6563b95c743e70b91782af73f4aba6826", "3f853e6e13e5a4fa0abf39a072eb066081f93295", "df046a710123c0f01d1f1ff1172ffa744db29837", "13b1ce21065b0303939f08c1c9a25855a78f8735", "005aea80a403da18f95fcb9944236a976d83580e", "9837a70c231c0ef3d33c2c9f5b56afd40548acce", "af50f22169b50c38db915e749db2702c2d26523a", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "d9ef20252f9d90295460953e8ab78667b66919ad", "8750c0b8094957003fd7f681f9ef8af47b86a99d", "712f3d0f7418185d18ccc36e820959372f8fbec5", "95801b98deb4683014a548f3338a4b06429e2bee", "0c80892c17873ff91d6023124e7feed8abc8d0e9", "af5582fb02dceaaf4b0fef84ea6bd8fb7ca14acc", "4b20857de1c96d80311dc89c64a4f900851cede4", "2323173a0bddac0dd2586b17a2f3ac33f401c45c", "515c49821a11b09d3e7565338cd9c072d1dec7e1", "377240154366ee44e56e172c2279b3a81cf2c50b", "f91a7d841141b4bdf6346eaeac511b88fc3ef234", "0a65097bcb2dd400a27a65d42608658dc9f6898d", "27319e4085963161ebfdb99d261783cac1677823", "db583ca798dae7ed3beadaa80755462ff5ab3549", "6721e925f7673061fb744ef4ef8864fe8a34a3fa", "34760b63a2ae964a0b04d1850dc57002f561ddcb", "5ca40acf3e272f2a5b5d6e451bab5d899e7be6eb", "4ce5ff2f46595ee5cf8899ca003ddd411715ea9b", "c21bccf1ab4bb090fd5fc1109421a1a3979e7106", "268a218e861d4107fb3cf71ec061703ed965820f", "5770870c631b48137515840813041e0a6712c5d8", "1e0f433f5f5692c76d13304b22b922d07ace2132", "dce89cf89ba169d3d3b4595b1c1d247a0f974f96", "487c39672393b17f789b84e3f24527c274f345e7", "6ddf6a8941d925c7e07f36c1c36d002a6965d127", "318142fbcea893df32d6b4585f292f5c3e02a2c7", "9a91ef83abf4251fa55019a26682c72ce79cadb8", "55b49e2302f389c1dd468e6414d871e44710320f", "12add45f5a0cc3d9a9a694d3a3802843438208e0", "7fc6e368df553233b61981dbb34772f5d8013821", "4607325f47318b12e129f0d38c370a00fe7f6281", "3b1e020ae2f431f47351f7e2df5cc8c8ac245905", "2e4b61ca5ff7af8743e4365edeb40cd87df15c5a", "1e7ddb41095be915ec28bc85d14305df3d02a445", "096a2026fa46abd43143e8ae3dc0ce6414310cf9", "0c1bbd5384785dd347ce3749bdbb60551bb876bd", "2d2ff1db0079fc0a47a37d41be43c0c9a435e4bb", "2f960fe01372456a211d96427679d73eb873508c", "01cd72b5ac91023a1b9b12f54a9c94fc55033123", "54059ca5d7dd57fe7061459333cd2ced01c5bf20", "0fba6763b78c55eb512683dcf599a03b0d5c70d2", "a244717eef0b7cb421a6710f4508a91e8a3b52a9", "357af3dd66a8ee994f17c890422fda1b618586d3", "0c9822c7ecc0fc8bf8930f8951286c3d7a7b6c6f", "364bcbe6e3cb439d1ca694b259c2066d3769c860", "6cac3f5009b5728bda2457fef6180d28357b3ae6", "aad65f26e952d64c4ac343e4051b99c6ac1ecee3", "133e0e83dc6877c6d417431e875cd57876153893", "17a7c7c22842ffa5f4e280bec7ee9c68bbdedc36", "605ed83a6d1f4eaf995e85830f373923b11d6c13", "b61272d39b01e2e74cb1c500daea448a5bc53d7b", "381c83803885326109721782c85e1ec0e3056936", "0fe8bdb33cb234dd221d7cf6895af338d80eac0d", "00a9446982911cbd96a127f70976d39ecaaaf306", "dbbe21e92ce10f2bd8206ec460bf1a4b5aaa970c", "18b1c62d6c7fa0e619f0c13172d8852b3d5a71fe", "14d19771bc69f1d41f63052e56e134f9ed569c1e" ], "paperAbstract": "Traffic analysis is the practice of inferring sensitive information from communication patterns, particularly packet timings and packet sizes. Traffic analysis is increasingly becoming relevant to security and privacy with the growing use of encryption and other evasion techniques that render content-based analysis of network traffic impossible. The literature has investigated traffic analysis for various application scenarios, from tracking stepping stone cybercriminals to compromising anonymity systems.\n The major challenge to existing traffic analysis mechanisms is scaling to today's exploding volumes of network traffic, i.e., they impose high storage, communications, and computation overheads. In this paper, we aim at addressing this scalability issue by introducing a new direction for traffic analysis, which we call \\emph{compressive traffic analysis}. The core idea of compressive traffic analysis is to compress traffic features, and perform traffic analysis operations on such compressed features instead of on raw traffic features (therefore, improving the storage, communications, and computation overheads of traffic analysis due to using smaller numbers of features). To compress traffic features, compressive traffic analysis leverages linear projection algorithms from compressed sensing, an active area within signal processing. We show that these algorithms offer unique properties that enable compressing network traffic features while preserving the performance of traffic analysis compared to traditional mechanisms.\n We introduce the idea of compressive traffic analysis as a new generic framework for scalable traffic analysis. We then apply compressive traffic analysis to two widely studied classes of traffic analysis, namely, flow correlation and website fingerprinting. We show that the compressive versions of state-of-the-art flow correlation and website fingerprinting schemes\\textemdash significantly\\textemdash outperform their non-compressive (traditional) alternatives, e.g., the compressive version of Houmansadr et al. [44]'s flow correlation is two orders of magnitude faster, and the compressive version of Wang et al. [77] fingerprinting system runs about 13 times faster. We believe that our study is a major step towards scaling traffic analysis.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134074", "https://www.freehaven.net/anonbib/cache/compressive-ccs2017.pdf", "http://people.cs.umass.edu/~milad/papers/compress_CCS.pdf", "http://people.cs.umass.edu/~amir/papers/CCS17-CompressiveTA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/358371444e037519ba8837bed0437985d4dc582e", "sources": [ "DBLP" ], "title": "Compressive Traffic Analysis: A New Paradigm for Scalable Traffic Analysis", "venue": "CCS", "year": 2017 }, "35862d3e3197c36820b781a6a02f25ab91fa1372": { "authors": [ { "ids": [ "37071348" ], "name": "Wouter B. de Vries" }, { "ids": [ "2268715" ], "name": "Ricardo de Oliveira Schmidt" }, { "ids": [ "34752482" ], "name": "Wes Hardaker" }, { "ids": [ "2330605" ], "name": "John S. Heidemann" }, { "ids": [ "34932490" ], "name": "Pieter-Tjerk de Boer" }, { "ids": [ "2615509" ], "name": "Aiko Pras" } ], "doi": "10.1145/3131365.3131371", "doiUrl": "https://doi.org/10.1145/3131365.3131371", "entities": [ "Allen Brain Atlas", "Anycast", "Failover", "Map", "Routing", "Sparse matrix", "Testbed" ], "id": "35862d3e3197c36820b781a6a02f25ab91fa1372", "inCitations": [], "journalName": "", "journalPages": "477-488", "journalVolume": "", "outCitations": [ "043e3a96ec61cd40e7b4c3d0bfcae2611dacfb68", "630015f0febe0d407f25a16a810af88c5cb8e792", "45b3f392eab9bf2f6a29c75dd1f09e70e07e4c48", "7dbd88abd90e154286ea6cbded2d78f938f68b5f", "0f4e34b62730eb12e5215c9f5131fddb7490a9ea", "2d64edaac4547176afb4c22c338bd330327031e5", "161d9c0156656cce193b64434c3264ed94445d59", "4a584b753d0ce0ca64aed8facea10bf7eb95da6f", "4f260fd1edb28ec2b868bffaefdcb45cf9cf5c8a", "13e66cd49ab55d89cf40c596786a6bf26260e46b", "d4ed1a24432fe539949a7799704a39150a5776c4", "bfafe26d192d8f2102ecab45c307014e1656cc6c", "8bb584dd12dd82b9041b819b8f25633eadf1c5d5", "32490550057e999a38854cf5858142b035282e69", "03d4f29de44dfbfe8c29dc1ad0e495cb54684e6b", "1fddc54bc1a1610a1162fde15ac6a87336bffc3e", "2c55cc95b6014bfa3f34307af141d0ddaa771c64", "513833133f86c0180d1dbe251cc9660430154549", "71fda542b243f32b3c9f75317905b1ea1ceacce9", "13bf13f019632a4edb967635e72e3e140f89e90e", "3c69ed9cfb2cdea79f08f55c91e47a9b1f083e8d", "28d931067f9a7ea393910fed8c68d098f7ca9fcd", "0f2f3e328608c9409adc820d82bfaf5940d3a8db" ], "paperAbstract": "IP anycast provides DNS operators and CDNs with automatic fail-over and reduced latency by breaking the Internet into catchments, each served by a different anycast site. Unfortunately, understanding and predicting changes to catchments as anycast sites are added or removed has been challenging. Current tools such as RIPE Atlas or commercial equivalents map from thousands of vantage points (VPs), but their coverage can be inconsistent around the globe. This paper proposes Verfploeter, a new method that maps anycast catchments using active probing. Verfploeter provides around 3.8M passive VPs, 430x the 9k physical VPs in RIPE Atlas, providing coverage of the vast majority of networks around the globe. We then add load information from prior service logs to provide calibrated predictions of anycast changes. Verfploeter has been used to evaluate the new anycast deployment for B-Root, and we also report its use of a nine-site anycast testbed. We show that the greater coverage made possible by Verfploeter's active probing is necessary to see routing differences in regions that have sparse coverage from RIPE Atlas, like South America and China.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131371", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final46.pdf", "https://www.isi.edu/~johnh/PAPERS/Vries17b.pdf", "https://conferences.sigcomm.org/imc/2017/slides/Broad%20and%20load-aware%20anycast%20mapping%20with%20verfploeter_v4.pdf", "http://ftp.isi.edu/~johnh/PAPERS/Vries17b.pdf", "https://wbdv.nl/files/Vries17b.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35862d3e3197c36820b781a6a02f25ab91fa1372", "sources": [ "DBLP" ], "title": "Broad and load-aware anycast mapping with verfploeter", "venue": "IMC", "year": 2017 }, "3599f286493ce985263d9ecab6a7d4e1f8fe5309": { "authors": [ { "ids": [ "8765791" ], "name": "Goran Flegar" }, { "ids": [ "1684436" ], "name": "Enrique S. Quintana-Ort\u00ed" } ], "doi": "10.1007/978-3-319-64203-1_50", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_50", "entities": [ "Sparse matrix" ], "id": "3599f286493ce985263d9ecab6a7d4e1f8fe5309", "inCitations": [ "e7599d18d91e487cd882bd7ed9e79e46887f97fd" ], "journalName": "", "journalPages": "697-709", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3599f286493ce985263d9ecab6a7d4e1f8fe5309", "sources": [ "DBLP" ], "title": "Balanced CSR Sparse Matrix-Vector Product on Graphics Processors", "venue": "Euro-Par", "year": 2017 }, "35aecf2a6ad7f12ad06d9f9e6b7d4935fea840ac": { "authors": [ { "ids": [ "2756452" ], "name": "Zhen Cao" }, { "ids": [ "28670096" ], "name": "Vasily Tarasov" }, { "ids": [ "9749757" ], "name": "Hari Prasath Raman" }, { "ids": [ "32401480" ], "name": "Dean Hildebrand" }, { "ids": [ "1708491" ], "name": "Erez Zadok" } ], "doi": "", "doiUrl": "", "entities": [ "Experiment", "Spatial variability" ], "id": "35aecf2a6ad7f12ad06d9f9e6b7d4935fea840ac", "inCitations": [ "257c1c169dd0ae98e273efd0d0948f2a028d4c3f", "b8c87f3c5411557e7a21008bbb5db7485f98dbd0" ], "journalName": "", "journalPages": "329-344", "journalVolume": "", "outCitations": [ "b4d9a48d77cdc71290a49e81881528be9a45435b", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "31330de2f2bc268ee2bf5ccbe3bed08b52c3e4fa", "12a0046a1197ae63c3d616c74e367dc583cef196", "8b90229a5f7af5b4b3d602f63de72fc421d55a24", "227e529c08f821d134dd15fb9296419250ab9301", "36698f71fea78a0ee1a058484c0c0c781e354f61", "039124197fac7a16e36611d8beed94524dd5fed5", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "13c27125584651329f66461981cbb20fa63e9023", "2d60d3596490d9999d8433bf41405060779bc11d", "3c89345bb88a440096f7a057c28857cc4baf3695", "11fe43dfcf43802595c2076c7641aff6f025e1ec", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "14a2ba566f6c8f7f519b299042ccf358361c558f", "8301c813277cc59b47a84d25dc1e307eee8ce310", "3f7971691970a4924bc62f3766541a4b4294fa0d", "2018f3fc13cd38122abdf37bf939b5011cd2e3c9", "b53d8516bf83c1b58147e2b4dbc870a8d396e53e", "4ba4613eab33cddc53bec9e14e50d03fa66270ca", "1a5fc0a7aca4a8e2f831d0edb1e5d160acea19ac", "2da760f90c3d2bf6598becdde9063093f488548c", "29a1a2a8f34d17c791e98775cdf7f8580b13abf6", "27f8ac77b89986f7a24f929b200b6a358b8f7d01", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "239e046347d5075b3eeef5439050e9f2ca760b7b", "1b1ea9f3f15f5160b77aa2177e7fdeb6eeed911a", "a8d5edc845fe8512e01ddfd4af0d09c397fbcbec" ], "paperAbstract": "Ensuring stable performance for storage stacks is important, especially with the growth in popularity of hosted services where customers expect QoS guarantees. The same requirement arises from benchmarking settings as well. One would expect that repeated, carefully controlled experiments might yield nearly identical performance results\u2014but we found otherwise. We therefore undertook a study to characterize the amount of variability in benchmarking modern storage stacks. In this paper we report on the techniques used and the results of this study. We conducted many experiments using several popular workloads, file systems, and storage devices\u2014and varied many parameters across the entire storage stack. In over 25% of the sampled configurations, we uncovered variations higher than 10% in storage performance between runs. We analyzed these variations and found that there was no single root cause: it often changed with the workload, hardware, or software configuration in the storage stack. In several of those cases we were able to fix the cause of variation and reduce it to acceptable levels. We believe our observations in benchmarking will also shed some light on addressing stability issues in production systems.", "pdfUrls": [ "http://www.fsl.cs.stonybrook.edu/docs/evos/evos-instability-fast17.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_cao.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-cao.pdf", "http://www.fsl.cs.sunysb.edu/docs/evos/evos-instability-fast17.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/cao", "http://www.usenix.org./system/files/conference/fast17/fast17-cao.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_cao.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6be0/5e89a9df87f3c2675fe4a5b39e0601c68706.pdf", "s2Url": "https://semanticscholar.org/paper/35aecf2a6ad7f12ad06d9f9e6b7d4935fea840ac", "sources": [ "DBLP" ], "title": "On the Performance Variation in Modern Storage Stacks", "venue": "FAST", "year": 2017 }, "35bb4201683cf3525bfab90c35ca1a6ab72f3e60": { "authors": [ { "ids": [ "1851223" ], "name": "Zhaoyan Shen" }, { "ids": [ "1692998" ], "name": "Feng Chen" }, { "ids": [ "2246860" ], "name": "Yichen Jia" }, { "ids": [ "1714148" ], "name": "Zili Shao" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "Cache (computing)", "Experiment", "Flash memory", "Open-channel SSD", "Solid-state drive", "Systems design", "Throughput" ], "id": "35bb4201683cf3525bfab90c35ca1a6ab72f3e60", "inCitations": [ "c49feb5f91c8ba846eb2e90edf1b01c62a25c8d5", "c9e997cccec19141972a64fafcbb55c1f007c370", "226ca798b529c13605a2aa7fe75d58f4188f850a", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041" ], "journalName": "", "journalPages": "391-405", "journalVolume": "", "outCitations": [ "ff64eac2cf7d5e58c02fefa398ba1a1b9670f09f", "02544c9b385813aade4512532cd357e294a74eb4", "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "861ead96ed080f88df28473b16f1fcd98d735445", "088e3e939ad234b6fdd0e321290fb26937dc2553", "274e495824827f5a9dc1ba3ab62620445e6b3d4b", "05961fc1d02ca30653dd0b4c906113db796df941", "0bba65fd5ac1db9a3293e9ebcfba092cf4ae58ee", "c1c4cbcf12c283d9b88bfa7de6ab5a1d02f3f7f6", "199ac28b6bc68bf05c77645ffae7640df114bca5", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "7019d566d10fcdb836aa338c344de4f0ed2131b6", "3b2af12a43d06338dd62681328c75a1999fc87fd", "1693e83e47a99667f4bd6ad6e24d8b62a1ba22c8", "05948e66aeefea1c969fdce16edb94ae94fb651e", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "048a09d7c8713dc2533c1e31ac3f224868293461", "d67adb456a315aee244babf4f20e318cc14d13f3", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "0720cfa5330462593b20ea0bbb7d8b5862a6b730", "34bbe13996b8cd0ea21cb1fe125fe79979587049", "70ce10f47aafa0994627a9575565b5c98af58d98", "098d792d1783b5f6fc098203f71f21f5d053c653", "13d6c568c770ff5a070072e720fb34b0037cdab8", "20a44558eed182a971f7add68ecc5931fbca2a65", "40f04909aaa24b09569863aa71e76fe3d284cdb0", "9efc40b9a71a128c073fe09bb77e0e97f08514d8", "b4087345c63a7b2412eeb31066b5e4bceadbbcb2", "ec5bcf2186fa0b4b7399f7233f9ade80966e6fe5", "0903d6b3b5a26fea2cb7b4956f66365d71c78549", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "4cda001811dea15a35894cd1b657003bb7f3c6de", "248a93ed3be23972343d18bc27cf4a2b43781972", "627b93073977b7b7c5ae0cf610f41ee0ed27669c", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "1820a34042d6371a9e20484b0c63b698eb522a6c", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "73e85836599b5ab4f83afa2ae10fea99cb5d29d7", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "68a047707ff765af006fcb481feb3d6eaa4625b1", "0e5c646909bb762da0cd325e084655c12445578f", "0b6adc0dbc55076dc9c9a8931f4a4df58fd291b6" ], "paperAbstract": "In recent years, flash-based key-value cache systems have raised high interest in industry, such as Facebook\u2019s McDipper and Twitter\u2019s Fatcache. These cache systems typically use commercial SSDs to store and manage key-value cache data in flash. Such a practice, though simple, is inefficient due to the huge semantic gap between the key-value cache manager and the underlying flash devices. In this paper, we advocate to reconsider the cache system design and directly open device-level details of the underlying flash storage for key-value caching. This co-design approach bridges the semantic gap and well connects the two layers together, which allows us to leverage both the domain knowledge of key-value caches and the unique device properties. In this way, we can maximize the efficiency of key-value caching on flash devices while minimizing its weakness. We implemented a prototype, called DIDACache, based on the Open-Channel SSD platform. Our experiments on real hardware show that we can significantly increase the throughput by 35.5%, reduce the latency by 23.6%, and remove unnecessary erase operations by 28%.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/fast17/fast17-shen.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-shen.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/shen", "http://www.csc.lsu.edu/~fchen/publications/papers/fast17-didacache.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/35bb/4201683cf3525bfab90c35ca1a6ab72f3e60.pdf", "s2Url": "https://semanticscholar.org/paper/35bb4201683cf3525bfab90c35ca1a6ab72f3e60", "sources": [ "DBLP" ], "title": "DIDACache: A Deep Integration of Device and Application for Flash Based Key-Value Caching", "venue": "FAST", "year": 2017 }, "35bc3b88d20098869a2e5cdb8cb83ed926627af0": { "authors": [ { "ids": [ "3456943" ], "name": "Ao Ren" }, { "ids": [ "38315753" ], "name": "Zhe Li" }, { "ids": [ "2881873" ], "name": "Caiwen Ding" }, { "ids": [ "1862322" ], "name": "Qinru Qiu" }, { "ids": [ "1698242" ], "name": "Yanzhi Wang" }, { "ids": [ "2398347" ], "name": "Ji Li" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" }, { "ids": [ "1765175" ], "name": "Bo Yuan" } ], "doi": "10.1145/3037697.3037746", "doiUrl": "https://doi.org/10.1145/3037697.3037746", "entities": [ "Activation function", "Algorithm", "Application-specific integrated circuit", "Binary number", "Bitstream", "Convolutional neural network", "Embedded system", "Feature extraction", "Field-programmable gate array", "General-purpose computing on graphics processing units", "Graphics processing unit", "Holism", "Internet of things", "Map", "Multiplexer", "Program optimization", "Scalability", "Software deployment", "Stochastic computing", "Throughput", "Top-down and bottom-up design", "Wearable technology" ], "id": "35bc3b88d20098869a2e5cdb8cb83ed926627af0", "inCitations": [ "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "5bcf27ab86be9fa376237d2d2bd8ebbf52982088", "bc8c5a79ead2385a70df4ace6339424dfbad88dd", "fd87cfccc1f672be8923b21efcfd8b45afec5437", "b59e50c09ce26c87d29e531dc8e1612c91f50725", "67355b7f4edede98a3d568c9d8951bd738e280c5", "e66e7d5f13606230ea1d2f8eaf2812c996e8a655", "1cafc9976a57e7989faa05b3a534149e0da8f078", "828b0baa6fe74273cfdd2bd14cb176e0b293b246", "4e9f932a07aee9c9e3d60ccbb73594c918edfad9", "907d61971c6271e05cd813accb3a00379a73626f", "dca29b3f58d38c274b9f88c633a6328f46fca4c0", "d07f8f4b4b3bcc82ae07465729707832ff057f86", "0c6b249d77e998068184e52a2d7fa7a5a867e12f", "96bcfbb766744dbd2127ca50f4411a8857849dad", "3992153325e449cfde38a1de78fddcad46891e31", "89ddeab7a37d4268794c7473902d637b3eb51830", "28b5494e9760e9cc7ec7db41f46a2317cee22ff5" ], "journalName": "", "journalPages": "405-418", "journalVolume": "", "outCitations": [ "6b3760ded659a4d416dba318022cce1d45db0ffa", "1853613a290537b4353763340ab8b37ad236bca2", "64eb963caa7c7de401a406c8f866e022addd0f41", "2d83ba2d43306e3c0587ef16f327d59bf4888dc3", "2b33ddc362cb5e7a21ed2b2484c8f22365f176dd", "06ce77e4abea63948580340be25d7f2a80369e5a", "973e2d45df8d4f70367e8c26115b0ffcb2a91753", "7f6edfb7edd567a9e311282407fbbd86fcf88dd7", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "1ce0f045e3f6c0bb95c0f2b795ef199c824b875f", "681e086dbfe3860b57de18ce4f693b116a04c34e", "99b5f43adf0231eab112ae78130ebe92a1631e3a", "061356704ec86334dbbc073985375fe13cd39088", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "339632faa043d4697570fc4fe48a52d007c3cf06", "24e555913192d8722f4a0240445bf73db71bd884", "1571e83ecb6405900780c1a95e521e4e75395500", "ad0fac81d56f4609bb47fa923a4ea782614ac5dd", "2c3f37bd849d6f6b1cabac7ccae708091fbb6136", "599d0462bb6894243bc098a1993d68d38ad7db27", "04105898efe96c7f2d876e6bcb9e19afd3e23635", "e4502d2acf8889ff66184ca03c9cc323d86df339", "46f74231b9afeb0c290d6d550043c55045284e5f", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "08cdcf2504ab5ed25904ca9329b5caff33fe544e", "5bf9329d87cec93ffd9b25dba75574157991dc90", "6d8cb089963d6ba946cc3a0174ba5465c0341238", "1a5ad04c3365a8435317044d5e1c14071a92b4b2", "156d36c9f1e832990f78b5e74c2553a658b85790", "28b5494e9760e9cc7ec7db41f46a2317cee22ff5", "48278e26d6a9b722d697fee3da7f7bc9bc4e8aaa", "8c014d9c33621fe366b391c41069de5dfb16e7cb", "1a07186bc10592f0330655519ad91652125cd907", "0a3ad4a0ec19926128e307e7ec178fd7288b5a37", "46384a8afb3b55012a00d79890361f89d41e619e", "d7e3dd2cf1607c6accf99b80e0ea1313a3ae7f50", "a3604586cc37c111e2d29659faa4980885c64c1d", "2ffc74bec88d8762a613256589891ff323123e99" ], "paperAbstract": "With the recent advance of wearable devices and Internet of Things (IoTs), it becomes attractive to implement the Deep Convolutional Neural Networks (DCNNs) in embedded and portable systems. Currently, executing the software-based DCNNs requires high-performance servers, restricting the widespread deployment on embedded and mobile IoT devices. To overcome this obstacle, considerable research efforts have been made to develop highly-parallel and specialized DCNN accelerators using GPGPUs, FPGAs or ASICs.\n Stochastic Computing (SC), which uses a bit-stream to represent a number within [-1, 1] by counting the number of ones in the bit-stream, has high potential for implementing DCNNs with high scalability and ultra-low hardware footprint. Since multiplications and additions can be calculated using AND gates and multiplexers in SC, significant reductions in power (energy) and hardware footprint can be achieved compared to the conventional binary arithmetic implementations. The tremendous savings in power (energy) and hardware resources allow immense design space for enhancing scalability and robustness for hardware DCNNs.\n This paper presents SC-DCNN, the first comprehensive design and optimization framework of SC-based DCNNs, using a bottom-up approach. We first present the designs of function blocks that perform the basic operations in DCNN, including inner product, pooling, and activation function. Then we propose four designs of feature extraction blocks, which are in charge of extracting features from input feature maps, by connecting different basic function blocks with joint optimization. Moreover, the efficient weight storage methods are proposed to reduce the area and power (energy) consumption. Putting all together, with feature extraction blocks carefully selected, SC-DCNN is holistically optimized to minimize area and power (energy) consumption while maintaining high network accuracy. Experimental results demonstrate that the LeNet5 implemented in SC-DCNN consumes only 17 mm2 area and 1.53 W power, achieves throughput of 781250 images/s, area efficiency of 45946 images/s/mm2, and energy efficiency of 510734 images/J.", "pdfUrls": [ "https://arxiv.org/pdf/1611.05939v2.pdf", "http://arxiv.org/abs/1611.05939", "https://arxiv.org/pdf/1611.05939v1.pdf", "http://doi.acm.org/10.1145/3037697.3037746", "http://alchem.usc.edu/portal/static/download/sc_dcnn.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35bc3b88d20098869a2e5cdb8cb83ed926627af0", "sources": [ "DBLP" ], "title": "SC-DCNN: Highly-Scalable Deep Convolutional Neural Network using Stochastic Computing", "venue": "ASPLOS", "year": 2017 }, "35ceec0f14213fbd9da4c62f856181346bad821b": { "authors": [ { "ids": [ "40211793" ], "name": "Yan Chen" }, { "ids": [ "2357165" ], "name": "Ashwin Machanavajjhala" }, { "ids": [ "2103203" ], "name": "Michael Hay" }, { "ids": [ "1729605" ], "name": "Gerome Miklau" } ], "doi": "10.1145/3133956.3134102", "doiUrl": "https://doi.org/10.1145/3133956.3134102", "entities": [ "Algorithm", "Event monitoring", "Internet of things", "Microsoft Windows", "Pegasus", "Personally identifiable information", "Provable prime", "Real-time computing", "Sensor", "Stream processing", "Wireless access point" ], "id": "35ceec0f14213fbd9da4c62f856181346bad821b", "inCitations": [], "journalName": "", "journalPages": "1375-1388", "journalVolume": "", "outCitations": [ "10e42a172c34f7f3a0dbc1114427758d32e9de4a", "3835e7baae639beffc8d2136d7533d6b2da595a1", "451a8f7a1ac7bafcfd30db62fedf946f59d0f0d9", "2cd42bed1c31f74fb3034395e73d0a98b1484469", "abe4cbdbc3a3a8843ee8bbd56bb27ee9061e4cfc", "3c37fb4ebd7a167e1c0d25994169a4dd8826e04c", "9bd2ddd2f401dda852d982eb5c914542e9f8bc53", "040d9acab9003b9d50b2291cc6844b66b2a85d12", "1ecfe23503600b7a6a6ed3dcce86542420e36a06", "0ccbd0b421022170cdb3773cad5e946f860624a1", "2b00e526490d65f2ec00107fb7bcce0ace5960c7", "209eca1a0626e7afb93306c6d82020e2eec09b66", "2086920b485ab2ef94b5b78021bc1d9436a292c2", "4062e487c042c5e7f2e8d45ac538e830965e3552", "472a63c41ef24257148d9cf4fd00aec70cf3add6", "61b66a8324742a09d259a24f98effbb1fbfec9b2", "b532099ff8b67049f292cd62700dca37fc2be623", "2824b6a3d0096b0b522f4b7a7659b5f792f93d8f", "17fac85921a6538161b30665f55991f7c7e0f940", "55a6e8855b5f5d109e1e609d8ea1cfac0b703491", "3db22d9005cf67412c1d3065cd63a8ad19bda0ce", "0c9ffe6bfabf2c1cb013855d913b6089c4918966", "0fcaa5d69913b2601fb4fac3a16ba384e5f1883b" ], "paperAbstract": "Individuals are continually observed by an ever-increasing number of sensors that make up the Internet of Things. The resulting streams of data, which are analyzed in real time, can reveal sensitive personal information about individuals. Hence, there is an urgent need for stream processing solutions that can analyze these data in real time with provable guarantees of privacy and low error.\n We present PeGaSus, a new algorithm for differentially private stream processing. Unlike prior work that has focused on answering individual queries over streams, our algorithm is the first that can simultaneously support a variety of stream processing tasks -- counts, sliding windows, event monitoring -- over multiple resolutions of the stream. PeGaSus uses a Perturber to release noisy counts, a data-adaptive Perturber to identify stable uniform regions in the stream, and a query specific Smoother, which combines the outputs of the Perturber and Grouper to answer queries with low error. In a comprehensive study using a WiFi access point dataset, we empirically show that PeGaSus can answer continuous queries with lower error than the previous state-of-the-art algorithms, even those specialized to particular query types.", "pdfUrls": [ "https://users.cs.duke.edu/~ashwin/pubs/Chen-PeGaSus-CCS2017-final.pdf", "http://doi.acm.org/10.1145/3133956.3134102", "http://people.cs.umass.edu/~miklau/assets/pubs/dp/Chen17PeGaSus.pdf", "https://acmccs.github.io/papers/p1375-chenA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35ceec0f14213fbd9da4c62f856181346bad821b", "sources": [ "DBLP" ], "title": "PeGaSus: Data-Adaptive Differentially Private Stream Processing", "venue": "CCS", "year": 2017 }, "35d8727f5e726177d8f12a4955524804c5d531cf": { "authors": [ { "ids": [ "34746935" ], "name": "Laith Sakka" }, { "ids": [ "9996721" ], "name": "Kirshanthan Sundararajah" }, { "ids": [ "1700486" ], "name": "Milind Kulkarni" } ], "doi": "10.1145/3133900", "doiUrl": "https://doi.org/10.1145/3133900", "entities": [ "Abstract syntax tree", "Clang", "Compiler", "Computer simulation", "Document Object Model", "Fusebox", "Locality of reference", "Loop-invariant code motion", "Program optimization", "Recursion", "Simulation", "Tree traversal", "\u039c-recursive function" ], "id": "35d8727f5e726177d8f12a4955524804c5d531cf", "inCitations": [], "journalName": "PACMPL", "journalPages": "76:1-76:30", "journalVolume": "1", "outCitations": [ "61b0892e3457e3c2f7ef64c885a0e522e8961eb0", "45f6d75a4a01121b3f210a2ae2b3ed35af96d56c", "1a94dcb3ca9adca3e9af415d5a4b9646331873ee", "1f801246ff8045414a282bfb3ddcd915fbfbfa2b", "9fe9e5fec3dcf749a913c1c8c1208a372861d582", "e5d0a599b9b7c4345ae051dd3281e84d930edffe", "16de6f9e2bf6ee1068dbca8c9e5446295c904315", "ae37cc2fb535d038e16dceaddc6d82c06bf1a52e", "9eb4268f46059d73ff3b247439c81264f2425a9a", "4eb2ce783f4f8b7e6821fc297ae245a997e7ecd6", "fd68bcc41917ed0a72bbe1947bca91fe269cfe04", "1793930fb533c17e3bfac398554b78a6421efc25", "4932995d38848288e7cde258d5f1ee962dc7c58f", "feeb4e10f70d2aa0f3c93b76e8003ee09e86dcd7", "115be3be1d6df75ff4defe0d7810ca6e45402040", "c33fb3559ef47ef8827c8d885fe6aa9fe36e24a2", "370ce5ed5b9f96ce229264a32d9021e25ee165ce", "b9f4b3b28876c3c8b76b258bcfa69244f71ac93c", "0b61a17906637ece5a9c5e7e3e6de93378209706", "41f80c443d61fd87f06088eed9769b685cd184d0", "8d39df40831d9dd239ccd9da60de93292ea894a3", "170746e36dfe606ca448ac4ca518b91bf6f828d0", "2f4b494536fccae7ce49b5009faead36f531c8a9", "e543c2c0a3d898ba48ba0f0d6930a242e2444e54", "6b63b0bd3471d04afb88333d638736a120ce32b0", "116af0cc4999896ddc511b353f98d64f7826d6cb", "9675d09e0af07aa58ce5a313e59df126195de745", "1380a3d1e5d48c8c11b5df2e0f9487f9c70ccb88", "03ba72042a52f7e87f507095c0dbbf3c6f52c9e3", "507e8e8b48f6bce4c136dec692b27bba3b9da640", "14d0b773700c4899f856852831f42e13bd8c44ae", "3d65e1276236bcdc0d2a05310c6d6cc2f1f9e2a5", "0f8d24dbc8ea3f140c6c0f51904589108d34f00b", "67f311151efe765e58c46d2548ef2594422fb393", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "2d3d10ed67d91b5e4042979b39283ec52f183ded", "688384fc5e643445e835435e96b9dfcfb6598d36", "c30dd8002f101c1d44f890e3b26eb974eb13347c", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "73f072ead051f3f3c764b31e88f3a3aeb0373f7b", "9a773d062aed34730b7aa90ea93d8d11d87b094e", "0c218e59f36adcf0c7a3a779702396894e1f1aa2", "0954212d0d60a1053de84760d96df2f5dea6c208" ], "paperAbstract": "Series of traversals of tree structures arise in numerous contexts: abstract syntax tree traversals in compiler passes, rendering traversals of the DOM in web browsers, kd-tree traversals in computational simulation codes. In each of these settings, a tree is traversed multiple times to compute various values and modify various portions of the tree. While it is relatively easy to write these traversals as separate small updates to the tree, for efficiency reasons, traversals are often manually fused to reduce the number of times that each portion of the tree is traversed: by performing multiple operations on the tree simultaneously, each node of the tree can be visited fewer times, increasing opportunities for optimization and decreasing cache pressure and other overheads. This fusion process is often done manually, requiring careful understanding of how each of traversals of the tree interact. This paper presents an automatic approach to traversal fusion: tree traversals can be written independently, and then our framework analyzes the dependences between the traversals to determine how they can be fused to reduce the number of visits to each node in the tree. A critical aspect of our framework is that it exploits two opportunities to increase the amount of fusion: i) it automatically integrates code motion, and ii) it supports partial fusion, where portions of one traversal can be fused with another, allowing for a reduction in node visits without requiring that two traversals be fully fused. We implement our framework in Clang, and show across several case studies that we can successfully fuse complex tree traversals, reducing the overall number of traversals and substantially improving locality and performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133900" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35d8727f5e726177d8f12a4955524804c5d531cf", "sources": [ "DBLP" ], "title": "TreeFuser: a framework for analyzing and fusing general recursive tree traversals", "venue": "PACMPL", "year": 2017 }, "35f894ec47f160f5506a2ba6cac559bb6ecfe5d0": { "authors": [ { "ids": [ "2840209" ], "name": "Jiyan Sun" }, { "ids": [ "32055832" ], "name": "Yan Zhang" }, { "ids": [ "1705489" ], "name": "Xin Wang" }, { "ids": [ "1994540" ], "name": "Shihan Xiao" }, { "ids": [ "2100058" ], "name": "Zhen Xu" }, { "ids": [ "1703429" ], "name": "Hongjing Wu" }, { "ids": [ "1719935" ], "name": "Xin Chen" }, { "ids": [ "8361658" ], "name": "Yanni Han" } ], "doi": "10.1109/IPDPS.2017.40", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.40", "entities": [ "Algorithm", "Data center", "Dynamic circuit network", "Failure rate", "Simulation", "Stock and flow", "Throughput", "Time complexity" ], "id": "35f894ec47f160f5506a2ba6cac559bb6ecfe5d0", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "419-428", "journalVolume": "", "outCitations": [ "1eddf92320697dbaae59cb84fafd5af73e0fc865", "3a5121571e1c49cdb961a4d89d555240f388088c", "a91f04a19f41fd2fb9bf19daf6b4141f81bd7d44", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "1376bd56c64639af4645625fd9755c83b2bf7cda", "c38507bf3f1ccaf594fc86b9b86d630224f412cf", "74bfea5aeddb147b9749a72780b50f69badd3e17", "5a76b9bb4d2e4be3d327eb8c15c8b52d6436f50c", "32fef7e6561b553355821f78cda52cf9e4832030", "756883a19a9ed652647d625a863e22f70e94d7c9", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "00ddc85d502aa4bdc45a3b8b9099fad75938b50a", "050c5155ac485bc8a43bfff3979d2d6bdc7320ae", "69e1f457561b1832983e1289adde3f288aa7e3a0", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "288763b8420ef17baf2f0214cf283433fcb4a447", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "1716eab9042dcb5595c94ea68f23b66ab76e237f", "129567778989fab23b50812b3df30e899e2d6a4e", "1338104c39381c594f699e1942ea1c937608b7a5", "5594c2ddde27f4262a53668ca9b09ad7a9453102", "bb8b82c7855897bcdd6f41e61b0642666f193ad2", "6bf13254711f8bf287fd12749003a43ef3631612" ], "paperAbstract": "Multi-path TCP has recently shown great potential to take advantage of the rich path diversity in data center networks (DCN) to increase transmission throughput. However, the small flows, which take a large fraction of data center traffic, will easily get a timeout when split onto multiple paths. Moreover, the dynamic congestions and node failures in DCN will exacerbate the reorder problem of parallel multi-path transmissions for large flows. In this paper, we propose DC2-MTCP (Data Center Coded Multi-path TCP), which employs a fast and light-weight coding method to address the above challenges while maintaining the benefit of parallel multi-path transmissions. To meet the high flow performance in DCN, we insert a very low ratio of coded packets with a careful selection of the packets to be coded. We further present a progressive decoding algorithm to decode the packets online with a low time complexity. Extensive ns2-based simulations show that with two orders of magnitude lower coding delay, DC2-MTCP can reduce on average 40% flow completion time for small flows and increase 30% flow throughput for large flows compared to the peer schemes in varying network conditions.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/35f894ec47f160f5506a2ba6cac559bb6ecfe5d0", "sources": [ "DBLP" ], "title": "DC^2-MTCP: Light-Weight Coding for Efficient Multi-Path Transmission in Data Center Network", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "36090d8c6422a4055df52c97ad7d910c807cd74e": { "authors": [ { "ids": [ "1685849" ], "name": "Pierre Fraigniaud" }, { "ids": [ "1880491" ], "name": "Dennis Olivetti" } ], "doi": "10.1145/3087556.3087571", "doiUrl": "https://doi.org/10.1145/3087556.3087571", "entities": [ "Algorithm", "Censoring (statistics)", "Cycle detection", "Distributed algorithm", "Property testing" ], "id": "36090d8c6422a4055df52c97ad7d910c807cd74e", "inCitations": [ "cc2c4065befdb2554eeae8650e0f5c7785fc0507", "a78a1624328bce76350d5bc981f56ee859923cad" ], "journalName": "", "journalPages": "153-162", "journalVolume": "", "outCitations": [ "0b000c7f9581eb35e3aebf4d87c05f7b409f2dad", "732150ad6a74906e4c58502c5b414bd2075ca9ba", "010e594e33d9fad8187a07b3197bac6cd0bc7723", "4b08960bfd73d3b84f4211fde9d00fe9a8090962", "71835e095f55cf019b8f9080ded902b3837dde48", "56868532bc7454d19c5080ebcf01bab01606c8b5", "ef694eb71401fff4df71057056c8c97bde250bc3", "5ea503b73546578c6d7ef8d9ca85cbcd3c72ce9f", "1e339b60a7da2d31220d94d841bc96a870cd16e3", "0ba9a19d71bf3e5c95f14b9a202e2b616a83ece0", "4225a33dd931e163da95a2c432236b98fe015536", "6ecf1295f85b72ea7e3f2e0a7aec0ae4061bb29e", "8b30695cb60ecc2d7b08cd6d2242885ad57ad5fb", "04aad0d7fb78c02b25ed91256352094ab78beb36", "20f1dfc912bcacfc8c12b75cbc024b9890437f64", "22f158712c5f765e15840da38e6eadd88a093967", "a9ee13114cbf734fc664097d0f8ddc3403b06e4b", "c8124725e2b4294001f5dbd7a82d2c51b606a8db", "c8045e36f0eb6af65fb326a96f3c7b926de56666", "6c16c543adb398f36bb7d7e90d3a06c43d104475", "0094bd8fbb6841300d9a8bfcc206fc4f1a1f94bb", "26e8617b008604e6a710f1b7ee4428d95330678b", "fb366e7cff7592bb621e323654f29773d75a62a2" ], "paperAbstract": "Distributed property testing in networks has been introduced by Brakerski and Patt-Shamir (2011), with the objective of detecting the presence of large dense sub-networks in a distributed manner. Recently, Censor-Hillel et al. (2016) have shown how to detect 3-cycles in a constant number of rounds by a distributed algorithm. In a follow up work, Fraigniaud et al. (2016) have shown how to detect 4-cycles in a constant number of rounds as well. However, the techniques in these latter works were shown not to generalize to larger cycles Ck with k ≥ 5. In this paper, we completely settle the problem of cycle detection, by establishing the following result. For every k ≥ 3, there exists a distributed property testing algorithm for Ck-freeness, performing in a constant number of rounds. All these results hold in the classical congest/ model for distributed network computing. Our algorithm is 1-sided error. Its round-complexity is O(1/ε) where ε ∈(0,1) is the property testing parameter measuring the gap between legal and illegal instances.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087571", "https://arxiv.org/pdf/1706.03992v1.pdf", "http://arxiv.org/abs/1706.03992" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/36090d8c6422a4055df52c97ad7d910c807cd74e", "sources": [ "DBLP" ], "title": "Distributed Detection of Cycles", "venue": "SPAA", "year": 2017 }, "3641cb70c8b14a4840c2f18fce982d00637cb6f9": { "authors": [ { "ids": [ "40293316" ], "name": "Matthieu Dreher" }, { "ids": [ "12950973" ], "name": "Kiran Sasikumar" }, { "ids": [ "35159357" ], "name": "Subramanian Sankaranarayanan" }, { "ids": [ "2284463" ], "name": "Tom Peterka" } ], "doi": "10.1109/CLUSTER.2017.31", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.31", "entities": [ "Application checkpointing", "Channel (communications)", "Data buffer", "FIFO (computing and electronics)", "Flow control (data)", "GROMACS", "Interactive visualization", "Large-scale Atomic/Molecular Massively Parallel Simulator", "Simulation", "Snapshot (computer storage)", "Synthetic data", "Viz: The Computer Game" ], "id": "3641cb70c8b14a4840c2f18fce982d00637cb6f9", "inCitations": [ "ea5cec32e04610174e53ddd02c8ea784de2c44b3" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "509-519", "journalVolume": "", "outCitations": [ "d158b5f2c77b8127aebcfbfceabe0b818c6bdc7f", "22461880994425508a659df74f8df6ddf2cec3da", "e221fe63da1051bd4798acfb6b75fdd4ceee2e2e", "2204cf6af63b34175e3e1d60ee47d75c494bfd58", "02d3739f3d1af8a529fb60366c854b4e207e6e75", "70e1cef1129793954694e0f4519441284448d938", "701c90f0593e5675d62fc3882bd5da9b7c296394", "3e55d3755bad9911f82c2c095106ddceb4fe1062", "771156b34f7f4f539ef7289027e2205692206aed", "4fe2bf624e18d71d87ae36824606c42c64446562", "85f2aaff0e0c06bbd0cd5d52a9bfee4d8d7ab910", "8fe10a1189cbd7644f38a2f65df509d9f84893fd", "415c7835aa18984d92086edbb9d9937fcdd0a6eb", "53bcccd314c5c7483933881a6c29235407b3e1c3", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "f707ed2deaf679e091fdf1ce8ffccb8c625ce640", "5f971eb8b630bc8735be00a3348ecf02aae7438f", "02c7714e034a832ce25bf0bf563cf0a789ad7342", "3cb0d6e921e50cc798920890f20aca9241c5cc1f", "ea5cec32e04610174e53ddd02c8ea784de2c44b3", "e6ead3a8f84da301792c359ea6b75e04ba075675", "5770929d647dedee65c4a8f706a45ae9f613e834", "7717cb7fbbf26557238c2ef847d0a48def176d0b", "2073266dfb3f034d55cd5a3fca62d230832afd43", "5b5dfbfffeade87035fca8fadca1a7f27f8a72fe" ], "paperAbstract": "Tasks coupled in an in situ workflow may not process data at the same speed, potentially causing overflows in the communication channel between them. To prevent this problem, software infrastructures for in situ workflows usually impose a strict FIFO policy that has the side-effect of slowing down faster tasks to the speed of the slower ones. This may not be the desired behavior; for example, a scientist may prefer to drop older data in the communication channel in order to visualize the latest snapshot of a simulation. In this paper, we present Manala, a flexible flow control library designed to manage the flow of messages between a producer and a consumer in an in situ workflow. Manala intercepts messages from the producer, stores them, and selects the message to forward to the consumer depending on the flow control policy. The library is designed to ease the creation of new flow control policies and buffering mechanisms. We demonstrate with three examples how changing the flow control policy between tasks can influence the performance and results of scientific workflows. The first example focuses on materials science with LAMMPS and a synthetic diffraction analysis code. The second example is an interactive visualization scenario with Gromacs as the producer and Damaris/Viz as consumer. Our third example studies different strategies to perform an asynchronous checkpoint with Gromacs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3641cb70c8b14a4840c2f18fce982d00637cb6f9", "sources": [ "DBLP" ], "title": "Manala: A Flexible Flow Control Library for Asynchronous Task Communication", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "3642b5f40d78b81e4379c14dab47c1d7021641bc": { "authors": [ { "ids": [ "7588439" ], "name": "Qingtian Gan" }, { "ids": [ "1685757" ], "name": "Song Wu" }, { "ids": [ "2156156" ], "name": "Hai Jin" }, { "ids": [ "1708846" ], "name": "Kun Wang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.69", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.69", "entities": [ "Cloud computing", "Hardware virtualization", "Non-uniform memory access", "Operating system", "Power Management Unit", "Sampling (signal processing)", "Scheduling (computing)", "Uniform memory access", "Virtual Machine Manager", "Virtual machine" ], "id": "3642b5f40d78b81e4379c14dab47c1d7021641bc", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "531-538", "journalVolume": "", "outCitations": [ "575984915952de4383679b0c54029f2a01ab002e", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "02ed0ec3bb95776b5c06e2784810b501c4d3f053", "b388e6807c3708104e3d1edabc56c44d4d11ed84", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "4d51031ce850ea0f72c865011280a0aeeaaf9e02", "41bbedd069ffaa2b1ffa1ce640f101d2f2980f4a", "9872bf81d8559bfb5fcf4dc65674afba98dec470", "6565cc1520fcaf69205a2c5d4d9a1065e7c6bd5b", "d582f634687f20e5343cd81ffbb92c4009b2b10f", "294ad206a120a519cfd99294c8b5e004dcc06abf", "76b73a657ef1cb543790acc99fc8abc80dbe4fc7", "5b6977dafe715278117882424054ea2e27f1107f", "c2450948d6049003b660018c98caa92f52c64eb5", "146139716c9e8ec4f57475b9673171761ac34074", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "14f2ab7b89c9f508f9e886e4fd5bb702c867a190", "36e1b02a66ed928ef13e3a2ba6852e90a8713036" ], "paperAbstract": "Non-Uniform Memory Access (NUMA) architecture has become the dominant architecture and is widely used in virtualization platforms. In NUMA-based cloud computing platform, arbitrary topology of vCPUs and memory may cause significant performance degradation for VMs, which introduces great challenges for virtual machine monitors (VMMs) to efficiently manage the vCPUs and memory. Previous studies mainly sample the characteristics of the vCPUs to indicate the optimizing strategies to reduce the NUMA overheads in virtualization platforms. But the typical periodical sampling methods have some deviations with the real vCPU characteristics. This leads to the inaccurate sampling and scheduling decisions for the optimizing strategies. Motivated by the inaccuracy in sampling methods and scheduling decisions, we propose a fine-grained scheduler, named vScope, which makes accurate scheduling decisions according to the guest OS processes in the vCPUs, to improve the performance of memory-intensive workloads in cloud platforms. In vScope, the VMM identifies the guest OS processes in the vCPUs and calculates the NUMA affinity of each process from the PMU data. At the end of vCPU's scheduling cycle, the scheduler appropriately schedules the vCPUs to their local NUMA node to alleviate the unnecessary NUMA overhead. We implement vScope in Xen-4.5.1 VMM and evaluate its effectiveness with some memory-intensive benchmarks. The experimental results shows that vScope can achieve up to 11.5% performance improvement for these workloads when compared with the Credit scheduler in Xen. Moreover, vScope only introduces limited overhead into the system.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.69" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3642b5f40d78b81e4379c14dab47c1d7021641bc", "sources": [ "DBLP" ], "title": "vScope: A Fine-Grained Approach to Schedule vCPUs in NUMA Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "366259c4eb05c71aa2f2ae0c3da087991ccb47e0": { "authors": [ { "ids": [ "2948429" ], "name": "Xiaokang Qiu" }, { "ids": [ "1745989" ], "name": "Armando Solar-Lezama" } ], "doi": "10.1145/3133889", "doiUrl": "https://doi.org/10.1145/3133889", "entities": [ "Control flow", "Correctness (computer science)", "Data structure", "Functional specification", "Imperative programming", "Loop invariant", "Natural deduction", "Natural proof", "Speech synthesis", "Theory" ], "id": "366259c4eb05c71aa2f2ae0c3da087991ccb47e0", "inCitations": [], "journalName": "PACMPL", "journalPages": "65:1-65:28", "journalVolume": "1", "outCitations": [ "d654918822d11617314e83b7d2b4a5abab4641cc", "6517941e83041e6c0c847cd907ac8231f6b34773", "acd8d3df882c7be3be2c3af1c2e42575f1fc138a", "3e2c5db0e1a0b7ce7e3bbbf5bd4b0b6482a5f2a3", "1ef301c1b275091b6a50d620b41df4722f2108f0", "727c9a475f25de7d99972ae711a5bb891b674d16", "b12f8ce4f08743c663b1ed6df22dfa4366c565c3", "160b0741f10eafec7ba8ec1f00c460cfe6623d30", "64178ea5d06ac252e246e2962a0757565900be7b", "38b7e9721cc3e326580465deaf0f0028b92afe6a", "3eac05c165acf65b228968c1a1dc764dcbd156d0", "235b9c8f10461a95398e169ecb91cf3e223d3350", "1b0c3ef8e2eabf218549e9926b51a51044b7639d", "5b3e07e7220551233d1ffd04830ebb18387f345a", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "33ecc7d8369b699b0dd8341ba6fb33809abc4bd6", "2184b060ddb8da62693bc9466ee095f96a604f71", "85b02aefce44557fba22fff9c1fbefe842cdabfc", "06543a6b9f14c23aacef35fa7de129afa8c23ad8", "03875a7ec6e6f29622c8a383fe579af9e8bd35bb", "26cc8e115d38b17add568e956dc48d4d031f0468", "219c95e028a1a8e2baebdecb8b998e12a03bc33b", "05c8103e1b77437875a4c69c6258be988ab2946b", "1ec7a6456958359132117635e12b682e39220b7c", "2de47fc883ae50e9850ab3ec797f8c6cabd294d1", "9a40f24f7249ae95c6ea45f3b44bc38c1cdedf8b", "0c218e59f36adcf0c7a3a779702396894e1f1aa2" ], "paperAbstract": "This paper presents natural synthesis, which generalizes the proof-theoretic synthesis technique to support very expressive logic theories. This approach leverages the natural proof methodology and reduces an intractable, unbounded-size synthesis problem to a tractable, bounded-size synthesis problem, which is amenable to be handled by modern inductive synthesis engines. The synthesized program admits a natural proof and is a provably-correct solution to the original synthesis problem. We explore the natural synthesis approach in the domain of imperative data-structure manipulations and present a novel syntax-guided synthesizer based on natural synthesis. The input to our system is a program template together with a rich functional specification that the synthesized program must meet. Our system automatically produces a program implementation along with necessary proof artifacts, namely loop invariants and ranking functions, and guarantees the total correctness with a natural proof. Experiments show that our natural synthesizer can efficiently produce provably-correct implementations for sorted lists and binary search trees. To our knowledge, this is the first system that can automatically synthesize these programs, their functional correctness and their termination in tandem from bare-bones control flow skeletons.", "pdfUrls": [ "https://engineering.purdue.edu/~xqiu/natural-synthesis.pdf", "http://doi.acm.org/10.1145/3133889", "https://engineering.purdue.edu/~xqiu/impsketch.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/366259c4eb05c71aa2f2ae0c3da087991ccb47e0", "sources": [ "DBLP" ], "title": "Natural synthesis of provably-correct data-structure manipulations", "venue": "PACMPL", "year": 2017 }, "3699e8e1556416f26a2d1fb70659fefb971613cf": { "authors": [ { "ids": [ "40505029" ], "name": "Xiaolu Lu" }, { "ids": [ "1751760" ], "name": "Alistair Moffat" }, { "ids": [ "1691169" ], "name": "J. Shane Culpepper" } ], "doi": "10.1145/3077136.3080793", "doiUrl": "https://doi.org/10.1145/3077136.3080793", "entities": [ "Approximation algorithm", "Document", "Fubini\u2013Study metric", "Relevance", "Sampling (signal processing)", "Spatial variability" ], "id": "3699e8e1556416f26a2d1fb70659fefb971613cf", "inCitations": [ "3821bac3ae629cf271a119cda269f508b7f08ad5" ], "journalName": "", "journalPages": "35-44", "journalVolume": "", "outCitations": [ "0e59a828d3abb342922e2e275e4dc39470af0712", "99cb96ea60e27a1485b5a1d563e9181cf815136f", "150a31a1d38d90acefb560c2a42efed1ae67f7f7", "bacff358bfd85a4673782c1f56f030283bdaa4e5", "f7fcc97be18be855f0b337972b740a036606d7c0", "0ddb8af211f2af018e8adf5affe8fc497f056e67", "5416ab359f9d024f582af3544a7b4baf877a625a", "67526725638bbb8f389cbaef106756f40cc073b6", "405381203d858a4313f2b51efac69f0787f824d9", "21529b32a59c213109b9704b7fbceb4197abb3b0", "c41950d7d0ce14e41691c0fcff67dd7aaaada571", "580c9f1d9836b13164316fd832792d8bf59ed964", "dafc464a515d0fd81bc1c65728a39d1c957a9964", "e06ceef1cabae1c01f4f4bd1fcdb9ba7487d7977", "355146c49d983f5c35c6033374a7252ac0141fd8", "1260e6de04361b76d751d3a1049dea3f3f54dac8", "471cb4c2e5039bdaacb0274fee70c7fe2e93493e", "618ee0a69b0955509fb78f2a2263c33386a804cf", "f2928bc2fe1953fbe1e40621456d2ac57a70d729", "e7584e203f7811ae285f28eab8d1e81d72411227", "039ca56524ad5a368e91bade6b56108da622a1d4", "f82989fb843e58cd98569552c700eb07deabd3eb", "1b50c8353d9e4a241a5f4a9c0088eb9c5f593e1f", "08badaad9669b69b16ce9437d0b2d52b5f33c8dd", "1b9458181d29fdb3745e3b50f41fe4ef6c2b4492", "54d477a9f652000812e8f6b9f8c9096525d07b05", "1150351d79e64f8768b5cbde00215478c7474aa8", "007165fe06a9ffcc67263dfe70f06b0ec0f09e2b" ], "paperAbstract": "Increasing test collection sizes and limited judgment budgets create measurement challenges for IR batch evaluations, challenges that are greater when using deep effectiveness metrics than when using shallow metrics, because of the increased likelihood that unjudged documents will be encountered. Here we study the problem of metric score adjustment, with the goal of accurately estimating system performance when using deep metrics and limited judgment sets, assuming that dynamic score adjustment is required per topic due to the variability in the number of relevant documents. We seek to induce system orderings that are as close as is possible to the orderings that would arise if full judgments were available. Starting with depth-based pooling, and no prior knowledge of sampling probabilities, the first phase of our two-stage process computes a background gain for each document based on rank-level statistics. The second stage then accounts for the distributional variance of relevant documents. We also exploit the frequency statistics of pooled relevant documents in order to determine a threshold for dynamically determining the set of topics to be adjusted. Taken together, our results show that: (i) better score estimates can be achieved when compared to previous work; (ii) by setting a global threshold, we are able to adapt our methods to different collections; and (iii) the proposed estimation methods reliably approximate the system orderings achieved when many more relevance judgments are available. We also consider pools generated by a two-strata sampling approach.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080793" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3699e8e1556416f26a2d1fb70659fefb971613cf", "sources": [ "DBLP" ], "title": "Can Deep Effectiveness Metrics Be Evaluated Using Shallow Judgment Pools?", "venue": "SIGIR", "year": 2017 }, "36a64cb68a3da37ed9d54f03750e1f1ac6d3d336": { "authors": [ { "ids": [ "3108945" ], "name": "Haoyu Zhang" }, { "ids": [ "26315993" ], "name": "Logan Stafman" }, { "ids": [ "3340845" ], "name": "Andrew Or" }, { "ids": [ "3122063" ], "name": "Michael J. Freedman" } ], "doi": "10.1145/3127479.3127490", "doiUrl": "https://doi.org/10.1145/3127479.3127490", "entities": [ "Algorithm", "Approximation algorithm", "Experiment", "Exploratory testing", "Fairness measure", "Iteration", "Jumpstart Our Business Startups Act", "Machine learning", "Resource contention", "Scheduling (computing)" ], "id": "36a64cb68a3da37ed9d54f03750e1f1ac6d3d336", "inCitations": [ "570348129778890e621626be0aa3875b5316d4ef" ], "journalName": "", "journalPages": "390-404", "journalVolume": "", "outCitations": [ "54cd614a15ce790e2144e45e160596efd36e6316", "453fded1ab86ed9d9d90ff3ff83a4faa493d3db2", "679cfc41aa5f0174040b5ab23ea92cf04f495a6e", "078ee0e2cb096636c57b6efb9caa4c62f2371871", "22899cc07f2b2ae632ac58b2ec58fbe239f3fbd3", "e6c961f54734085b73087abe8e30301ceb91c8b2", "6a912e2c1f818a047bc620f475b6b6e3b0dbacfe", "aa936077447217db8970ec799ccfeb09b2dd03f0", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "3784b73a1f392160523400ec0309191c0a96d86f", "32192d744d86e7cde73f0c9aa773214f88619a9e", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "5936754b5762260bf102ac95d7b26cfc9d31956a", "184014795c3c2bbf23f3959f6d8b1ab8bc03aea8", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "7693cafd6f29623f61d66f031cadd60b6ce827d7", "3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e", "1ee88e64945503c93b68344e639a7ae085f6e37d", "0546fa6622b8b8db8527be777a692d88c5c037b0", "48dcef999ef41e839bf66386c0c0a54c13be1fcf", "08f13e484e7e51831ec13076d14570ced91a50fb", "55416b8613af06855bd94059c3d0305adc58057b", "235fa2b1983eff9f13b27c620cda389359126bf4", "9f1f065bf08cd90431cc051267a708f56436cd82", "17c0a7de3c17d31f79589d245852b57d083d386e", "080aebd2cc1019f17e78496354c37195560b0697", "214c966d1f9c2a4b66f4535d9a0d4078e63a5867", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "148b83a379a549e3e88fc9506e91e14b09756c7d", "ecf9adafc610cd417be2aa4092e809446e0f361f", "c3c262b8e56536d14826926b69af59eaefc29bc2", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "0d1f1d0984b3e8d6b8e9169c05e2a7d29e2a32d0", "477653f4cb3c213d1d4252cfc7a185e7785b635c", "046a1302079f56b94c81457bf7fd21c3417a9f72", "d65f897b7cea2761f88411e757e9587c0282cb41", "043afbd936c95d0e33c4a391365893bd4102f1a7", "483015dc170d20e1a19828d493eff364cd7a42ce", "2a7d3b967a356c2a42f729048b0d3511b0005351", "0d868efa67bf06b1f784d60769c082fd9a58893e", "4954fa180728932959997a4768411ff9136aac81", "0558c94a094158ecd64f0d5014d3d9668054fb97", "b6571efa4483aa00d23bbcd36930c4877548ba38", "454639ed30a2d336ea976c53b9fe851acbedb7fd", "725f90b576c41c7329bdf3e11714cac54436f2b6", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "47f5bba54710b0e1663e9336790cb4609d16077d", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9" ], "paperAbstract": "Training machine learning (ML) models with large datasets can incur significant resource contention on shared clusters. This training typically involves many iterations that continually improve the quality of the model. Yet in exploratory settings, better models can be obtained faster by directing resources to jobs with the most potential for improvement. We describe SLAQ, a cluster scheduling system for approximate ML training jobs that aims to maximize the overall job quality.\n When allocating cluster resources, SLAQ explores the quality-runtime trade-offs across multiple jobs to maximize system-wide quality improvement. To do so, SLAQ leverages the iterative nature of ML training algorithms, by collecting quality and resource usage information from concurrent jobs, and then generating highly-tailored quality-improvement predictions for future iterations. Experiments show that SLAQ achieves an average quality improvement of up to 73% and an average delay reduction of up to 44% on a large set of ML training jobs, compared to resource fairness schedulers.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127490", "http://www.cs.princeton.edu/~haoyuz/publications/slaq-socc17.pdf", "http://www.sysml.cc/doc/86.pdf", "https://arxiv.org/pdf/1802.04819v1.pdf", "http://www.cs.princeton.edu/~haoyuz/publications/slaq-slides.pdf", "http://arxiv.org/abs/1802.04819" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/36a64cb68a3da37ed9d54f03750e1f1ac6d3d336", "sources": [ "DBLP" ], "title": "SLAQ: Quality-Driven Scheduling for Distributed Machine Learning", "venue": "SoCC", "year": 2017 }, "3709ec18aa09b58cc45133d39b4f4f930249d042": { "authors": [ { "ids": [ "1930940" ], "name": "Jiamin Huang" }, { "ids": [ "2198667" ], "name": "Barzan Mozafari" }, { "ids": [ "3334450" ], "name": "Thomas F. Wenisch" } ], "doi": "10.1145/3064176.3064179", "doiUrl": "https://doi.org/10.1145/3064176.3064179", "entities": [ "Asynchronous I/O", "Best, worst and average case", "Call graph", "Control flow", "Control flow graph", "Critical path method", "Database transaction", "Event-driven programming", "Interleaved memory", "MySQL", "Open-source software", "PostgreSQL", "Profiling (computer programming)", "Programmer", "Run time (program lifecycle phase)", "Server (computing)", "Software system", "Web server" ], "id": "3709ec18aa09b58cc45133d39b4f4f930249d042", "inCitations": [ "7254ad8940dc3ea502ef65fd9b71a9a2952daf81", "4f5dd5c31143e4813b195ae74318bea712302e49" ], "journalName": "", "journalPages": "64-79", "journalVolume": "", "outCitations": [ "029f5fdbcbd621e2795f9dcd9b7b0a440a69e251", "27018dd4fb8779ca98c6ed2686664f64841cc1ed", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "1cc643b82f19a3774901d0500b4352e9ce388f5f", "7636e0309d7d44a2c024569b9beaacaad4c1d606", "46bd42a58c0cd9ab4d4ffe6bfde1acc98a1f63d8", "4d0ff88dd2970cbe26e98364c8243087e24d0d63", "990ba8f4f4af23ab60e9b9f1bfd5aabf485a4fa1", "6fbc80d0cb04bd30844efe3c358cb6b5a254c084", "d1b6298527269ad1ea235c77ba47b97ee87d2de1", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "5a51a18a63fc57cd9ef206bcfdb303933c2bcfb9", "ced5a63b410b5e0a2fd29896785498da94954da1", "3ab1a2ccf6a9e0a546c67c994836e3c33eba503a", "430611bb598deb44861324f75da01a612979bafb", "ca05684712ef959cf707c085f1cfa731c1a86d3d", "718c1aee5db0471cd1014840e774b4fec4655aa9", "6033797f241a3687aab939db1d88b5184d32c0fb", "4e6ba973d4023f7463301180c294fffcde535e1c", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "a0b1b8ee4a9e6ae68ce6a712ad0a66ddb4a12117", "578667cbc39c6bfc1c89fe6a54506643c3b097f8", "b53d8516bf83c1b58147e2b4dbc870a8d396e53e", "92e0243e1a73c77ef8b90292e3798f765b38f269", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "3c457cec00499e41dd05516db79c4daf836102ad", "6e0039d62431ec95136f738c5020f6e3d3711168", "22a3f0837bd6a913f516ba497469176be641c7d4", "4f05a78c2e2abf932915c33c6a2bb9c726ce4ac2", "3c77787fbaf5cb17cd600cd6e66534be490a26ee", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "02d34867bb3a1e6c1a1ef5e1e3dd291066a61bf7", "eecbbd2e4475d79fc5aeabe166a82cffca2ee72b", "48875f03f4788f91c1d5c53d0918b27b8aa353a1", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "7df1de1c9663c2dfaefc1277a7d1cb3366b8c358", "070c3a8c3ce10277424f23c01a54b377478ee59c", "4b1f691ecdf7b78a9dc176ba4913543e7eb62232", "0541d5338adc48276b3b8cd3a141d799e2d40150", "6632e05bf8efe9498f622c7af82b4ac0ac1db23d", "91713f09aef58aea7b5a319156bdb92400cc2e24", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "12a760827e2531d3bfef437fbdd88178e717c961", "33c519690a5cbd955d21888eab0c9c1d680ebae9", "4f5dd5c31143e4813b195ae74318bea712302e49", "6a9677f82aef666d76090773f74fa01196c3525f", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "408d77abb094e0125e39cf6e5f5d9dfcbe6c3595", "705a1f30d9b5e8c6025cbc72faffb998208b389e", "41f8af6e2fafbf65f4f84534cb905c8824d7854d", "3386a3417920dd16efec5459b9b48930ece73dd8", "4f089c183d486d6f21a57cbaa8754849e05fb45d", "fcfc9da7c8b72421849f001b2a220c0b0a7e7d76", "55af531059610139bdba4f2ac4b1e63062712d6d", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "83f2087f3c602d043277927380e35885879210f5", "a96e5d846168fb0fc57861d457036e8b4143ed21", "10818733bad4a86f77da645accc6c49d0e8db7c0", "093f488e41a142e981c395f69f4946ed2b1983a7" ], "paperAbstract": "Most software profiling tools quantify average performance and rely on a program's control flow graph to organize and report results. However, in interactive server applications, performance predictability is often an equally important measure. Moreover, the end user is often concerned with the performance of a semantically defined interval of execution, such as a request or transaction, which may not directly map to any single function in the call graph, especially in high-performance applications that use asynchrony or event-based programming. It is difficult to distinguish functionality that lies on the critical path of a semantic interval from other activity (e.g., periodic logging or side operations) that may nevertheless appear prominent in a conventional profile. Existing profilers lack the ability to (i) aggregate results for a semantic interval and (ii) attribute its performance variance to individual functions.\n We propose a profiler called VProfiler that, given the source code of a software system and programmer annotations indicating the start and end of semantic intervals of interest, is able to identify the dominant sources of latency variance in a semantic context. Using a novel abstraction, called a variance tree, VProfiler analyzes the thread interleaving and deconstructs overall latency variance into variances and covariances of the execution time of individual functions. It then aggregates latency variance along a backwards path of dependence relationships among threads from the end of an interval to its start. We evaluate VProfiler's effectiveness on three popular open-source projects (MySQL, Postgres, and Apache Web Server). By identifying a few culprit functions in these complex code bases, VProfiler allows us to eliminate 27%--82% of the overall latency variance of these systems with a modest programming effort.", "pdfUrls": [ "http://web.eecs.umich.edu/~mozafari/php/data/uploads/eurosys_2017.pdf", "http://doi.acm.org/10.1145/3064176.3064179" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3709ec18aa09b58cc45133d39b4f4f930249d042", "sources": [ "DBLP" ], "title": "Statistical Analysis of Latency Through Semantic Profiling", "venue": "EuroSys", "year": 2017 }, "3730e37138e719a161a22aa7b0f62af6edcf5e4e": { "authors": [ { "ids": [ "2673238" ], "name": "Samuel K. Gutierrez" }, { "ids": [ "7645272" ], "name": "Kei Davis" }, { "ids": [ "1689138" ], "name": "Dorian C. Arnold" }, { "ids": [ "37509486" ], "name": "Randal S. Baker" }, { "ids": [ "2768905" ], "name": "Robert W. Robey" }, { "ids": [ "34694816" ], "name": "Patrick S. McCormick" }, { "ids": [ "15037751" ], "name": "Daniel Holladay" }, { "ids": [ "31418261" ], "name": "Jon A. Dahl" }, { "ids": [ "19269703" ], "name": "R. Joe Zerr" }, { "ids": [ "12419611" ], "name": "Florian Weik" }, { "ids": [ "1711228" ], "name": "Christoph Junghans" } ], "doi": "10.1109/IPDPS.2017.13", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.13", "entities": [ "Block code", "Library", "Library (computing)", "Message passing", "Multithreading (computer architecture)", "Parallel computing", "Runtime system", "Thread (computing)" ], "id": "3730e37138e719a161a22aa7b0f62af6edcf5e4e", "inCitations": [ "a2921d217af6dc2c39089e784a06a6ef2b2cda2d" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "469-478", "journalVolume": "", "outCitations": [ "178cf471f97b7acd72780028659648329423679e", "346c5896ff2032d7c7a8400cbbd3bd2f61c72f1a", "058e086bb1235376491470143244502f6678bbc5", "4cbdbf2ca478894817ae18e0f4d007b298efd6d0", "9e899a90a24787f391346810bfe1c72480f344d2", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "149e6aa3e77eed60305ec51f8cda94e9706a38ca", "7421d28428e041c271fe6370c331353f4a3fa974", "602dcccc2bf6af1ca84355d530ff1e0a79391217", "7067391b82722e3b8b0434dcf0a5ada48e76a209", "069ec88e2d30784746ab2224bc096e494c745382", "5efbddc09b301537d559b5876756b18df32a0e02", "48aa3489d4290c0f3683771bd9dc5f23745b4b56", "16b14944bdfa3c34da1367cb7882667c4d09cb99", "6396e234e8e03ca2a7a6c7be78dfd2c8f775ac3a", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "b1afc99d24247003eb5f969374fff3bd2af71f27" ], "paperAbstract": "Hybrid parallel program models that combine message passing and multithreading (MP+MT) are becoming more popular, extending the basic message passing (MP) model that uses single-threaded processes for both inter- and intra-node parallelism. A consequence is that coupled parallel applications increasingly comprise MP libraries together with MP+MT libraries with differing preferred degrees of threading, resulting in thread-level heterogeneity. Retroactively matching threading levels between independently developed and maintained libraries is difficult; the challenge is exacerbated because contemporary parallel job launchers provide only static resource binding policies over entire application executions. A standard approach for accommodating thread-level heterogeneity is to under-subscribe compute resources such that the library with the highest degree of threading per process has one processing element per thread. This results in libraries with fewer threads per process utilizing only a fraction of the available compute resources. We present and evaluate a novel approach for accommodating thread-level heterogeneity. Our approach enables full utilization of all available compute resources throughout an application's execution by providing programmable facilities to dynamically reconfigure runtime environments for compute phases with differing threading factors and memory affinities. We show that our approach can improve overall application performance by up to 5.8x in real-world production codes. Furthermore, the practicality and utility of our approach has been demonstrated by continuous production use for over one year, and by more recent incorporation into a number of production codes.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3730e37138e719a161a22aa7b0f62af6edcf5e4e", "sources": [ "DBLP" ], "title": "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "37676b91fdf1e4b7ff5edeb230e3dce67033d717": { "authors": [ { "ids": [ "39611936" ], "name": "Pawel Garncarek" }, { "ids": [ "1681361" ], "name": "Tomasz Jurdzinski" }, { "ids": [ "2878308" ], "name": "Krzysztof Lorys" } ], "doi": "10.1109/IPDPS.2017.105", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.105", "entities": [ "2.5D", "Adversary (cryptography)", "Algorithm", "Best, worst and average case", "Failure rate", "Online algorithm", "Radio jamming", "Scheduling (computing)", "Throughput" ], "id": "37676b91fdf1e4b7ff5edeb230e3dce67033d717", "inCitations": [ "a28c00aea28c7a0d6fe484fc1b8a9da56d094531" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "347-356", "journalVolume": "", "outCitations": [ "1c852777f2ecc9763eca2c376347ebcc69aedbdd", "d0c82a11f7ce32a060ffbb2cf2f30a207a2b7d63", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "760de5ae1fa7d8ca69db08cbd504982eda94dabc", "2dd47a99081828629dfe38bfeb36311a54f1ab1b", "a56835a713cec9f3c18cd45d38faa1309145b00b", "31dccb2338fc1d2b38463f32b485ab7ee5ed5d9c", "22a9148898411fbe6b55ad852c0f01df26bf85a9", "13d6d06c7ba1274188ed59de6dec7f590115a088", "43a2d43972183f86696e073d5e55f228a7d717e8", "053393e06597c1cc81bc47b8414b608e853a6559", "678d52cb631cfb479179c8a01a0270178717b904" ], "paperAbstract": "We consider the problem of scheduling packets of different lengths via k directed parallel communication links. The links are prone to simultaneous errors --- if an error occurs, all links are affected. Dynamic packet arrivals and errors are modelled by a worst-case adversary. The goal is to optimize competitive throughput of online scheduling algorithms. Two types of failures are considered: jamming, when currently scheduled packets are simply not delivered, and crashes, when additionally the channel scheduler crashes losing its current state. For the former, milder type of failures, we prove an upper bound on competitive throughput of 3/4 - 1/(4k) for odd values of k, and 3/4 - 1/(4k+4) for even values of k. On constructive side, we design an online algorithm that, for packets of two different lengths, matches the upper bound on competitive throughput. To compare, scheduling on independent channels, that is, when adversary could cause errors on each channel independently, reaches throughput of 1/2. This shows that scheduling under simultaneous jamming is provably more efficient than scheduling under channel-independent jamming. In the setting with crash failures we prove a general upper bound for competitive throughput of (√5-1)/2 and design an algorithm achieving it for packets of two different lengths. This result has two interesting implications. First, simultaneous crashes are significantly stronger than simultaneous jamming. Second, due to the above mentioned upper bound of 1/2 on throughput under channel-independenterrors, scheduling under simultaneous crashes is significantly stronger than channel-independent crashes, similarly as in the case of jamming errors.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.105" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37676b91fdf1e4b7ff5edeb230e3dce67033d717", "sources": [ "DBLP" ], "title": "Fault-Tolerant Online Packet Scheduling on Parallel Channels", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "376c5f9084d8721aed49db7a2fbe693a6b5670ed": { "authors": [ { "ids": [ "1706349" ], "name": "Thomas Rauber" }, { "ids": [ "2485020" ], "name": "Gudula R\u00fcnger" }, { "ids": [ "36223145" ], "name": "Matthias Stachowski" } ], "doi": "10.1109/IGCC.2017.8323578", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323578", "entities": [ "Analysis", "Benchmark (computing)", "Central processing unit", "Dynamic voltage scaling", "Electronic data processing", "Execution", "Frequency scaling", "Graphics processing unit", "PARSEC", "Parsec", "Program optimization", "Run time (program lifecycle phase)", "Thread (computing)", "energy balance", "voltage" ], "id": "376c5f9084d8721aed49db7a2fbe693a6b5670ed", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "2516524a25fdba2c54f9a1d80b26300d896f2c9e", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "5b20182516558c9da4b930064272a6d9dde74a3d", "85029306db36c3ee8e3ae2f9a42318680db58558", "dc9338d735ebcb4bb3e2665db0ee216ffe68d0cd", "17229ff498587500726f7bdeee7ddfd32092779a", "a540bbeca747b67655cc52307bf7801dfb0faab2", "7d3e1fa8ced75cf794d3ec3089ddfa6bff50056d", "56e4263251aa8d1888ca5840e0bf187af043f49c", "e7f3a439a130fd1035bbdb3c60ad960d7d533c5e", "3ee47780011ee618bd5a64624a662375e1958e0a", "208a5e499a2836effd9d15c2ff867cf5697796ac", "56385c2c6f48bd142e2f0e8aca7b9cba14d4c9b8", "b1479a44735a4d93a99c3c1572acc6b752046c04" ], "paperAbstract": "Energy efficiency is considered to be a critical concern for modern hardware and a variety of hardware features have been developed to improve the energy balance for executing applications. This article focuses on the dynamic voltage frequency scaling (DVFS) technique, which is available for many platforms, including CPUs and GPUs. Analytical models for capturing the energy efficiency are considered and it is investigated whether such an analytical model is able to support an a priori selection of the operational frequency that leads to a near optimal energy consumption for the application code to be executed. Also the energy-delay product (EDP) is investigated, weighting the power against the square of execution time. The experimental evaluation is performed on the basis of the multi-threaded PARSEC benchmarks. We show that the operational frequency selected according to the analytical models leads to an energy consumption that is near the minimum energy consumption over all frequencies available.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323578" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/376c5f9084d8721aed49db7a2fbe693a6b5670ed", "sources": [ "DBLP" ], "title": "Model-based optimization of the energy efficiency of multi-threaded applications", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "376f2db9939d49811b32090c5ed03deb3bbadd0f": { "authors": [ { "ids": [ "2053557" ], "name": "Hoang-Vu Dang" }, { "ids": [ "2632706" ], "name": "Sangmin Seo" }, { "ids": [ "30563484" ], "name": "Abdelhalim Amer" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "", "doiUrl": "", "entities": [ "Enterprise resource planning", "Interoperability", "Locality of reference", "Message Passing Interface", "Message passing", "Scalability", "Synchronization (computer science)", "Synthetic data", "Thread (computing)", "Throughput" ], "id": "376f2db9939d49811b32090c5ed03deb3bbadd0f", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "314-324", "journalVolume": "", "outCitations": [ "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "0c3b1da050089cf8c701fa0cb4ddc18566d715e6", "8b05280f21297c235917137a81f773b8819aa8fe", "0ef994850d0eaf94ccb3fb2c6e940566a4573827", "3e77a77247734dc918a5723573e1158eee1955f9", "4750c08ab4b3f7b474799e95a6e096cb734933b4", "afc4931dd371130c3d4c6d6dbfda881140847af1", "5cef39c640e45183ad4122787a6c0bd76b6e3f30", "33bf94c462ecb645ca580f0919014ec98db3d69a", "3c97f0261890268fdd80117dd4b835aba48f3913", "4d5f97d550d711413fcd391b3e9471bbdc20f712", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "3aa0a5ca29ad07f90866719d4a6edeca7ffa9cd4", "49623c3ab7dbba611116466d3c98303864051b6c", "5efbddc09b301537d559b5876756b18df32a0e02", "45b75209399f5b66ae462a7a6e0c2a521ff2da8b", "48aa3489d4290c0f3683771bd9dc5f23745b4b56", "14858a0573b1eb38d82e3c82a0a22ffefa4d5b81", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "f5de3c70bd135695defa22992a0423a70dc90c59" ], "paperAbstract": "Concurrent multithreaded access to the Message Passing Interface (MPI) is gaining importance to support emerging hybrid MPI applications. The interoperability between threads and MPI, however, is complex and renders efficient implementations nontrivial. Prior studies showed that threads waiting for communication progress (waiting threads) often interfere with others (active threads) and degrade their progress. This situation occurs when both classes of threads compete for the same MPI resource and ownership passing to waiting threads does not guarantee communication to advance. The best-known practical solution prioritizes active threads and adapts first-in-first-out arbitration within each class. This approach, however, suffers from residual wasted resource acquisitions (waste) and ignores data locality, thus resulting in poor scalability. In this work, we propose thread synchronization improvements to eliminate waste while preserving data locality in a production MPI implementation. First, we leverage MPI knowledge and a fast synchronization method to eliminate waste and accelerate progress. Second, we rely on a cooperative progress model that dynamically elects and restricts a single waiting thread to drive a communication context for improved data locality. Third, we prioritize active threads and synchronize them with a locality-preserving lock that is hierarchical and exploits unbounded bias for high throughput. Results show significant improvement in synthetic microbenchmarks and two MPI+OpenMP applications.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101156", "http://www.mcs.anl.gov/papers/P6087-1216.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/376f2db9939d49811b32090c5ed03deb3bbadd0f", "sources": [ "DBLP" ], "title": "Advanced Thread Synchronization for Multithreaded MPI Implementations", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "37782ba980effbbb63d8518625d0f795be866822": { "authors": [ { "ids": [ "3186693" ], "name": "Yebin Lee" }, { "ids": [ "3394118" ], "name": "Hyeonggyu Kim" }, { "ids": [ "2766004" ], "name": "Seokin Hong" }, { "ids": [ "3073201" ], "name": "Soontae Kim" } ], "doi": "10.1109/HPCA.2017.35", "doiUrl": "https://doi.org/10.1109/HPCA.2017.35", "entities": [ "Bandwidth (signal processing)", "Dynamic random-access memory", "Low-power broadcasting" ], "id": "37782ba980effbbb63d8518625d0f795be866822", "inCitations": [ "1e947c799a463fba91f3f4d8f93e8325af01d9ec", "4a140b837b5c403bbc327317c0d3dcc9b2cfb849", "781cf9b4d17f89ad4b971d2a1655421378149e2d", "6aeb1bc8306f0c079d24cded5c4eff7de5da46e8" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "217-228", "journalVolume": "", "outCitations": [ "27273c1e8605302a3f69c2939112ed0b2dedad0d", "fdcbd17adb63baed191750bdd24fc6ccafb957d9", "77f826132cf09ac91ea9c859387a8d52221a019a", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "705a129de84bcf24b4039150c2fc2be1c24cc24a", "0d929e2e2c5a15a9c51366395968819dfd8159ec", "1dec8f5106d11047aaaf126121110cbf890f17c3", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "08632fe2b934ed15d3499e7321282c81adc2c390", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "0e8b71aab0a8933813fee6064276a9bf7aa6b603", "3216ab441ef92aedededd7c72dcacc866423ce69", "b2bbb08faa8018bcb41ef3039f4010bb17176d01", "1e63acf596fafe0e7099e99767e1bcac0b7600cb", "c07ebd47e86f0ece88b28c57d79ed7544f5a30f0", "464af3debb8434807ab04eb749d63594e78ee786", "747ad718761b7d848a12e4f3a82aa0f46117a815", "01ab8a70840cefc0c5e545bc76f4b5195bb37333", "bfe6157690a837af71c62abc94811ef7faf45fd4", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "02e965debeaf59e6f93adede60d7e39004e77fcc", "2c413bdc17e8719787bbb154ac7314465e8f0ed0", "1e659a7c2f03809243d96f1e78f4c6f42acad3e6", "4bb2223e62a5560bb4bcf0ebb7c951f0cbc1069a", "50c14d56c59df0550c1d9ba35874ede4047416ea", "1339efd08ff6249a1f819ea7e02ccf349ac62ab2", "9ca87149f9e7cb0811c881ecac79ef02c87e1716", "6ae3e9839ce12bcb24ef40c5a38a62fc5ccfc320", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "019059f27a1de3b91125882c9ed03741b4fee01b", "4ec4ed133deb050d97615803412102a6d68611cc", "002e5d1003a5d8192f43419350cee2c94562478f", "1cbcd683d675bedaf8e9a3a01dbe6b34878f6e92", "d2ab8a8fc0f4f06c0b89ef1d05314fb882ded44c", "32c8c7949a6efa2c114e482c830321428ee58d70", "72af62917a53f7f88f54fc658f3daade61284937", "00ab25c6582d543932fccbb0f15fe93445f95d61", "1f3611aa60accc2ebd229162b8919b2a7ccbae33", "026615150a7db9012ea247d3576957ca214258c6", "c797c15492e635ce850158dbe01f402c0f8e78cd", "37e49c57dd4d0849380d177222db53e52ff21347", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "01299bf5dce79d85aaa0d938670a93ddeeda4d0e", "85398d5f19157c91bf00da3d36210e72d57887e4", "b34823a63f1cd1d870c7af7179c8d08b603ec791" ], "paperAbstract": "Owing to increasing demand of faster and larger DRAM system, the DRAM system accounts for a large portion of the total power consumption of computing systems. As memory traffic and DRAM bandwidth grow, the row activation and I/O power consumptions are becoming major contributors to total DRAM power consumption. Thus, reducing row activation and I/O power consumptions has big potential for improving the power and energy efficiency of the computing systems. To this end, we propose a partial row activation scheme for memory writes, in which DRAM is re-architected to mitigate row overfetching problem of modern DRAMs and to reduce row activation power consumption. In addition, accompanying I/O power consumption in memory writes is also reduced by transferring only a part of cache line data that must be written to partially opened rows. In our proposed scheme, partial rows ranging from a one-eighth row to a full row can be activated to minimize row activation granularity for memory writes and the full bandwidth of the conventional DRAM can be maintained for memory reads. Our partial row activation scheme is shown to reduce total DRAM power consumption by up to 32% and 23% on average, which outperforms previously proposed schemes in DRAM power saving with almost no performance loss.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37782ba980effbbb63d8518625d0f795be866822", "sources": [ "DBLP" ], "title": "Partial Row Activation for Low-Power DRAM System", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "377debbed1c93e7d65badff31c4f357aa100dced": { "authors": [ { "ids": [ "39660624" ], "name": "Christian Dietrich" }, { "ids": [ "3161837" ], "name": "Valentin Rothberg" }, { "ids": [ "20551316" ], "name": "Ludwig F\u00fcracker" }, { "ids": [ "21570512" ], "name": "Andreas Ziegler" }, { "ids": [ "36548921" ], "name": "Daniel Lohmann" } ], "doi": "", "doiUrl": "", "entities": [ "Abstract syntax tree", "Build automation", "C preprocessor", "Compiled language", "Compiler", "Include directive", "Preprocessor", "ccache" ], "id": "377debbed1c93e7d65badff31c4f357aa100dced", "inCitations": [], "journalName": "", "journalPages": "527-538", "journalVolume": "", "outCitations": [ "96936e784919849bf5b041d6a2f93ddd58d364d1", "0b8ba9b426ea81fcef016117c6cd4633eb2f1ac2", "23f03fa6a94c5f7b3dddda96ecf923c5f2e439a3", "0458e1208e4183098744cccea46451431999162a", "cf3664991a0d46ccadd9aa7d930723fe899f2edb", "c44152af2ac41d1491472ddfc2c282ed7daa78cd", "2194c3460ab71f3826db00b045b2ae590c753319", "14dbcfbc50d929b4dcb795967fb7a31064ba7b84", "b4d503f189cf10c55a8376a8ce5a0e9cb8ee48bb", "fddc87b775ac8f112b1ace7fdc7b0cd7296b5fb1", "213bac456c73223e890cb38fef7f7a40ef44b087", "106f0355ed696f7e2ead2d6eb397d49c67cd4da1", "40cf2b47de20d0b930cd4b5184febe40bdc681c8", "50341a2e4dec45165c75da37296ee7984b71e044", "a2b468ecc76929acbd98143c4c9ef3fcb824426d", "26d029d6608cc11258d60d7d9c705642bf8e1128", "f1d01c7839a2af2f7d151871ce3a9ecf2d274811", "5559d515181e725216b592bea7ac79060e35e27a", "5af57e304238435fd4149b05c781cda6ff3a5406", "25e0cd0ea010c0d1a438408c553a5c6d23e6eddf", "495f11c02fc8736902474453141cf73d3c34f7b2" ], "paperAbstract": "Software projects that use a compiled language are built hundreds of thousands of times during their lifespan. Hence, the compiler is invoked over and over again on an incrementally changing source base. As previous work has shown, up to 97 percent of these invocations are redundant and do not lead to an altered compilation result. In order to avoid such redundant builds, many developers use caching tools that are based on textual hashing of the source files. However, these tools fail in the presence of modifications that leave the compilation result unchanged. Especially for C projects, where module-interface definitions are imported textually with the C preprocessor, modifications to header files lead to many redundant compilations. In this paper, we present the cHash approach and compiler extension to quickly detect modifications on the language level that will not lead to a changed compilation result. By calculating a hash over the abstract syntax tree, we achieve a high precision at comparatively low costs. While cHash is light-weight and build system agnostic, it can cancel 80 percent of all compiler invocations early and reduce the build-time of incremental builds by up to 51 percent. In comparison to the state-of-the-art CCache tool, cHash is at least 30 percent more precise in detecting redundant compilations.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-dietrich.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_dietrich.pdf", "https://www4.cs.fau.de/Publications/2017/dietrich_17_atc.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/dietrich" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9b13/284ef0a4f87eb343a3c517332048b9025fef.pdf", "s2Url": "https://semanticscholar.org/paper/377debbed1c93e7d65badff31c4f357aa100dced", "sources": [ "DBLP" ], "title": "cHash: Detection of Redundant Compilations via AST Hashing", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "3783bde154bf6e7db5447fe50f8fccb3afb0ab34": { "authors": [ { "ids": [ "2707325" ], "name": "Priya Govindan" }, { "ids": [ "3335223" ], "name": "Morteza Monemizadeh" }, { "ids": [ "1711192" ], "name": "S. Muthukrishnan" } ], "doi": "10.1145/3034786.3056118", "doiUrl": "https://doi.org/10.1145/3034786.3056118", "entities": [ "Algorithm", "Link analysis", "Reblogging", "Rewriting", "Sorting", "Streaming algorithm" ], "id": "3783bde154bf6e7db5447fe50f8fccb3afb0ab34", "inCitations": [], "journalName": "", "journalPages": "337-346", "journalVolume": "", "outCitations": [ "5944eecf543d048b3a0359400bc49ea4f461835f", "65679cb68aacf5737d927f39903b08118ac51f51", "a2669bc42e6f04d32247feb0208f9a3ea201464c", "1dc28bc6093f1cdf4d2c882de538f94edb323d98", "7b5fc14d2f4074aeeaea0e970e573c613783c897", "028e534092e48aa2435884f0bdbb5d01b46c7821", "73f5c0b199f78e9d0b846350ac27c8a029907806", "71a400d02b247e26b4bcac2ff2d02d34755ea9cf", "0eaf38cd3d7c7fb456201d59b6d28b084010d358", "47e5ef73248d4cf34a752c245e81d2229893d765", "da546afd0bde7bcb6753e1954c2818f7c824bcae", "1e602df3d8feec53f155f9001790c77f3fe9c583" ], "paperAbstract": "We consider publication settings with positive user feedback, such as, users publishing tweets and other users retweeting them, friends posting photos and others liking them or even authors publishing research papers and others citing these publications. A well-accepted notion of \"impact\" for users in these settings is the H-Index: Query rewriting through link analysis of the click graph. PVLDB, 1(1):408--421, 2008., which is the largest k such that at least k publications have k or more (positive) feedback.\n We study how to calculate H-index on large streams of user publications and feedback. If all the items can be stored, H-index of a user can be computed by sorting. We focus on the streaming setting where as is typical, we do not have space to store all the items.\n We present the first known streaming algorithm for computing the H-index of a user in the cash register streaming model using space poly(1/ε,log(1/δ),logn); this algorithm provides an additive ε approximation. For the aggregated model where feedback for a publication is collated, we present streaming algorithms that use much less space, either only dependent on ε and even a small constant. We also address the problem of finding \"heavy hitters\" users in H-index without estimating everyones? H-index. We present randomized streaming algorithms for finding 1 + ε approximation to heavy hitters that uses space poly(1/ε,log(1/δ),logn) and succeeds with probability at least 1 -- δ. Again, this is the first sublinear space algorithm for this problem, despite extensive research on heavy hitters in general. Our work initiates study of streaming algorithms for problems that estimate impact or identify impactful users.", "pdfUrls": [ "http://eden.rutgers.edu/~priyagn/priya/papers/streaminghindex_PODS2017.pdf", "http://doi.acm.org/10.1145/3034786.3056118" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3783bde154bf6e7db5447fe50f8fccb3afb0ab34", "sources": [ "DBLP" ], "title": "Streaming Algorithms for Measuring H-Impact", "venue": "PODS", "year": 2017 }, "378a21246cb98b56b28683845914f762f3e9699d": { "authors": [ { "ids": [ "2649336" ], "name": "Mattijs Jonker" }, { "ids": [ "2391696" ], "name": "Alistair King" }, { "ids": [ "2853254" ], "name": "Johannes Krupp" }, { "ids": [ "1701081" ], "name": "Christian Rossow" }, { "ids": [ "3008669" ], "name": "Anna Sperotto" }, { "ids": [ "2054092" ], "name": "Alberto Dainotti" } ], "doi": "10.1145/3131365.3131383", "doiUrl": "https://doi.org/10.1145/3131365.3131383", "entities": [ "Countermeasure (computer)", "Denial-of-service attack", "Ecosystem", "Honeypot (computing)", "Network telescope", "PictBridge", "Software modernization", "World Wide Web" ], "id": "378a21246cb98b56b28683845914f762f3e9699d", "inCitations": [], "journalName": "", "journalPages": "100-113", "journalVolume": "", "outCitations": [ "334ca522814ed02d73c62a114ea2026847b81c90", "05e0185b212e455080b8b568737fa54ecbf87009", "1656e1ca6293e4775265c6e70210d1d7c46e6a5c", "0b9a4742cd2837ee5f5612fff0b6808129a7b72e", "6173558ba7f1cec8c8cee0ddad6b9e81e89bdebb", "06e0268297e0a4ddeff842dc0496ed2c7966a80d", "81a9bb0db4573f243ae980447c6eed0275d49d35", "84037ccdd1b5eacb404153194792c026b692a63f", "0112891050537d4f587529c396c8b9855796d182", "1ba9175f266160f864b4f4eab8d7067e4c17946c", "1a68f5963645a30eb5a0bae9f31a4b55fb0ea1a9", "48b6f52ae483a6b1d098fd340a2cf142c2d6151b", "0f7a0c691816725080bf508d2830c2b1a074e291", "54d7d2d35b3f7cb677b36a7825b8293017cdd934", "8bb584dd12dd82b9041b819b8f25633eadf1c5d5", "135ccdabbd9f8c2f3af6b507a226ec2308163e92", "4cead48e2eac91560105871b78268e3164eb382b", "3ed41761416f6e6f1840de07f35c4059418e0cbb", "674087f4cc6f6847f2c9b823faf53f627fb43fde" ], "paperAbstract": "Denial-of-Service attacks have rapidly increased in terms of frequency and intensity, steadily becoming one of the biggest threats to Internet stability and reliability. However, a rigorous comprehensive characterization of this phenomenon, and of countermeasures to mitigate the associated risks, faces many infrastructure and analytic challenges. We make progress toward this goal, by introducing and applying a new framework to enable a macroscopic characterization of attacks, attack targets, and DDoS Protection Services (DPSs). Our analysis leverages data from four independent global Internet measurement infrastructures over the last two years: backscatter traffic to a large network telescope; logs from amplification honeypots; a DNS measurement platform covering 60% of the current namespace; and a DNS-based data set focusing on DPS adoption. Our results reveal the massive scale of the DoS problem, including an eye-opening statistic that one-third of all / 24 networks recently estimated to be active on the Internet have suffered at least one DoS attack over the last two years. We also discovered that often targets are simultaneously hit by different types of attacks. In our data, Web servers were the most prominent attack target; an average of 3% of the Web sites in .com, .net, and .org were involved with attacks, daily. Finally, we shed light on factors influencing migration to a DPS.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/slides/imc2017_jonkerm.pdf", "https://www.caida.org/publications/presentations/2017/millions_targets_under_attack_imc/millions_targets_under_attack_imc.pdf", "http://doi.acm.org/10.1145/3131365.3131383", "https://conferences.sigcomm.org/imc/2017/papers/imc17-103.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/378a21246cb98b56b28683845914f762f3e9699d", "sources": [ "DBLP" ], "title": "Millions of targets under attack: a macroscopic characterization of the DoS ecosystem", "venue": "IMC", "year": 2017 }, "37ae9996dcb64d180861edc754c32b9b1759b7da": { "authors": [ { "ids": [ "3452496" ], "name": "Yujin Kwon" }, { "ids": [ "1882963" ], "name": "Dohyun Kim" }, { "ids": [ "2697031" ], "name": "Yunmok Son" }, { "ids": [ "1761991" ], "name": "Eugene Y. Vasserman" }, { "ids": [ "1721630" ], "name": "Yongdae Kim" } ], "doi": "10.1145/3133956.3134019", "doiUrl": "https://doi.org/10.1145/3133956.3134019", "entities": [ "Bitcoin", "Cryptography", "Data mining", "Fork (software development)", "Nash equilibrium", "Pools of Darkness" ], "id": "37ae9996dcb64d180861edc754c32b9b1759b7da", "inCitations": [], "journalName": "", "journalPages": "195-209", "journalVolume": "", "outCitations": [ "1999857212c7f8157230bc3a32b1d519ef124e00", "0171a99c93632846f9b3213a8275adf042f19871", "0feee2140342d183a556ca74a372687c8ed33f42", "4ab6b28bb3342cb4f65555a37418b6a25297425e", "1d4abd83093f1343ee1f5b8ffb3c5999e3754c90", "6da1b216120a92debe1e6c3be6700ab8aaa38651", "6fc9cd15134cdd282e25b8ea58b38240e96bfe90", "efadebc17495d28b18f830d80d339c5198219f8a", "9bb0aa7c062a1ac3df0a73d1e7caa88937e9716e", "822693248834147245d6ff2309192122d1326396", "261893f4c8a7c311a97249a8f42071c566372493", "2f7bb6613154e1b3580c0114bf2cfb3c8ceb477e", "5e86853f533c88a1996455d955a2e20ac47b3878", "728b60c04afb5b87853b59265e49f430dbf631db", "f65ee3a9f171da68b57039a5d5f2f1ad70798488", "35fe18606529d82ce3fc90961dd6813c92713b3c", "6b766f6003886cd55ef7b2459ee9b404934aca31", "084da7c90567476907522d91d22a8a8a6f818447", "04efafa2c9c401e6d5eaff1a4159cb67c9645564", "5fb1285e05bbd78d0094fe8061c644ea09d9da8d", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "40a98bed1d10248d30e86304315df07280dad93e" ], "paperAbstract": "In the Bitcoin system, participants are rewarded for solving cryptographic puzzles. In order to receive more consistent rewards over time, some participants organize mining pools and split the rewards from the pool in proportion to each participant's contribution. However, several attacks threaten the ability to participate in pools. The block withholding (BWH) attack makes the pool reward system unfair by letting malicious participants receive unearned wages while only pretending to contribute work. When two pools launch BWH attacks against each other, they encounter the miner's dilemma: in a Nash equilibrium, the revenue of both pools is diminished. In another attack called selfish mining, an attacker can unfairly earn extra rewards by deliberately generating forks.\n In this paper, we propose a novel attack called a fork after withholding (FAW) attack. FAW is not just another attack. The reward for an FAW attacker is always equal to or greater than that for a BWH attacker, and it is usable up to four times more often per pool than in BWH attack. When considering multiple pools --- the current state of the Bitcoin network -- the extra reward for an FAW attack is about 56% more than that for a BWH attack. Furthermore, when two pools execute FAW attacks on each other, the miner's dilemma may not hold: under certain circumstances, the larger pool can consistently win. More importantly, an FAW attack, while using intentional forks, does not suffer from practicality issues, unlike selfish mining. We also discuss partial countermeasures against the FAW attack, but finding a cheap and efficient countermeasure remains an open problem. As a result, we expect to see FAW attacks among mining pools.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134019", "https://syssec.kaist.ac.kr/pub/2017/kwon_ccs_2017.pdf", "http://arxiv.org/abs/1708.09790", "https://arxiv.org/pdf/1708.09790v1.pdf", "http://people.cs.georgetown.edu/~clay/classes/fall2017/835/papers/Be_Selfish_and_Avoid_Dilemmas.pdf", "http://diyhpl.us/~bryan/papers2/bitcoin/Be%20selfish%20and%20avoid%20dilemmas:%20Fork%20after%20withholding%20(FAW)%20attacks%20on%20bitcoin%20-%202017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37ae9996dcb64d180861edc754c32b9b1759b7da", "sources": [ "DBLP" ], "title": "Be Selfish and Avoid Dilemmas: Fork After Withholding (FAW) Attacks on Bitcoin", "venue": "CCS", "year": 2017 }, "37af5c79fea5a2b852e8dd79b59e67bd354d8434": { "authors": [ { "ids": [ "1855037" ], "name": "David Swasey" }, { "ids": [ "40187132" ], "name": "Deepak Garg" }, { "ids": [ "2710559" ], "name": "Derek Dreyer" } ], "doi": "10.1145/3133913", "doiUrl": "https://doi.org/10.1145/3133913", "entities": [ "Concurrency (computer science)", "Coq (software)", "Cryptographic protocol", "Formal specification", "Formal verification", "Immutable object", "Object-capability model", "Programmer", "Proof assistant", "Separation logic", "Verification and validation", "Web development" ], "id": "37af5c79fea5a2b852e8dd79b59e67bd354d8434", "inCitations": [ "6def267489b99197edc4d237bad8bda1f29c80f9", "ee5afea98ddad6bbd4900e7f7b81aab67e4aef33", "0eefadf3ff7e4b17fbc68de7f6db183d25722d65", "9b59b3e0c0b555b0ea57069c7354107ba01b9599", "c588f55d38ff259eab110db0ec2563e323590a47", "16221520496a27548b262fd9141b7735834f189e", "2bd400b03bfaad711e7fb40f3f64e5ae05bccd1b" ], "journalName": "PACMPL", "journalPages": "89:1-89:26", "journalVolume": "1", "outCitations": [ "2de3e6d85b2e7cb41a6e87b3188b9101d147dd82", "41639da89938f4d8a160bd7a2f3d890e51a18bf1", "22967d3dbc936bdd24aca1ef68a955c3eb790cd8", "2b52fa44fc607717b5ceca2069d233baa29e95f9", "b929aa17dc7d48cbeb708437d826b114088dafef", "72a9ae7e4e65526db0a32ba19e1ae33203a88668", "ed2229a068b5a6d520fb7ea6de378c9bd6d4667e", "77a0a86e410970e8018af1dad1f9d2cb3fa8c20e", "165fda6774f941cbe862a2f8a035e3aad17a828e", "30f2b672b3e3a9ca96507d4b9aaf904bfae7b781", "02cdfa95529573d3ca7de8483f71a4066073816e", "068379d1d64c59268480a53089051591b8e240bc", "3bd66b34cd168ff2331e1776ffc59a95a93b4265", "83e415ef0dfc365b816ff5b4c0979c472faddc2b", "3e651a701c9d814616a67acb4f72532ef976a0b3", "6714dfd9db77a7a59a549aee8ff954c8d42b7349", "1f6b2d2310e999173829fc6cb38ed6984112d8e1", "0c9b26e1ef82c243f6889f5073997364347f0b39", "d10dcc76b718966e741bbbabc5f7d94eaa12b3df", "28d7eade19b343db6db0c0270f24e819f4dd5a9b", "3b2421ee95201519ff4ee7bf6231c1d5dd2ecea2", "0d0bf88c1a85ae6280b596d4ce0fcedbcb70dfa0", "80805478f28049121a2430498a7fc7ac854fc367", "49e219b2326ee2a90bf1805b0c7c4456c741de29", "0423fad3ecc3ad53164f52f63ac4a9679c605b57", "16221520496a27548b262fd9141b7735834f189e", "6886cd7517ae299eefc1a16e8f5d09a767c3435d", "27a7a125fc04a94470277eb195ff813b8e3260af", "256bdc6422830144641523773e158be09494b2b6", "0a8d3007ce2fbd15ee15e7c4440526ad326adcb6", "19f80a7072f604ef6abca164581f3e3a5c60e25d", "492cb2d1739ca886db04e9e42fc01ccc80b383e6", "17c9c2df925ac5e2e99d78433596870ed417856f", "001477a33ca44ed2535c889e4fcaf34316da73a0", "2ec97bd46ca1b3df1959573ff1684c6ec96cb74e" ], "paperAbstract": "In scenarios such as web programming, where code is linked together from multiple sources, object capability patterns (OCPs) provide an essential safeguard, enabling programmers to protect the private state of their objects from corruption by unknown and untrusted code. However, the benefits of OCPs in terms of program verification have never been properly formalized. In this paper, building on the recently developed Iris framework for concurrent separation logic, we develop OCPL, the first program logic for compositionally specifying and verifying OCPs in a language with closures, mutable state, and concurrency. The key idea of OCPL is to account for the interface between verified and untrusted code by adopting a well-known idea from the literature on security protocol verification, namely robust safety. Programs that export only properly wrapped values to their environment can be proven robustly safe, meaning that their untrusted environment cannot violate their internal invariants. We use OCPL to give the first general, compositional, and machine-checked specs for several commonly-used OCPsâ\u0080\u0094including the dynamic sealing, membrane, and caretaker patternsâ\u0080\u0094which we then use to verify robust safety for representative client code. All our results are fully mechanized in the Coq proof assistant.", "pdfUrls": [ "https://people.mpi-sws.org/~dg/papers/oopsla17-obj-full.pdf", "https://people.mpi-sws.org/~swasey/papers/ocpl/ocpl-oopsla17.pdf", "http://doi.acm.org/10.1145/3133913", "https://people.mpi-sws.org/~swasey/papers/ocpl/ocpl-20170802.pdf", "https://people.mpi-sws.org/~dreyer/papers/ocpl/paper.pdf", "https://people.mpi-sws.org/~dg/papers/oopsla17-obj.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37af5c79fea5a2b852e8dd79b59e67bd354d8434", "sources": [ "DBLP" ], "title": "Robust and compositional verification of object capability patterns", "venue": "PACMPL", "year": 2017 }, "37c5fc44b36b5dc1bdb361b1e08caf83f109b7ce": { "authors": [ { "ids": [ "3123314" ], "name": "James T. Yu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.38", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.38", "entities": [ "Automation", "Broadcast automation", "Broadcast radiation", "Hierarchical state routing", "High availability", "One-way function", "Scalability", "Signalling System No. 7", "Simulation", "Software-defined networking" ], "id": "37c5fc44b36b5dc1bdb361b1e08caf83f109b7ce", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "292-298", "journalVolume": "", "outCitations": [ "eed513c8bdb48ba5cf1bc9ffb34ae584ba701fef", "a72771670aa6293b2abe90b1acf6fcb00cc774b3", "a740b859c14413ed4eec030e6d5d636b1814b325", "095acc8245505255e0ea3b388d044d9d31cbda7b", "151bf33bb99098f10005beae4be36897fe33734f", "4f568283b79747140f7dbe4bcfa80b811c1618f6", "8f3206758b8eb0b315ab114cba4a6864233afe3e", "bb60bcb62555ca40d3092b5b1be8c05e1f0c27e7", "5ffc518d356b92afa0fc2b6194b077ac083689d2" ], "paperAbstract": "Ethernet is being expanded to many industrial automation applications, and the demand for high availability drives the adoption of High Availability Seamless Redundancy (HSR) due to its capability to achieve zero recovery time. A major challenge of HSR is long delay of large HSR network due to its ring architecture. A solution to this issue is to use QuadBox to design multiple rings which raise many design questions because of different architecture options. Our approach is to apply the concept of SS7 signaling to classify the links as A, B, C, and D from which we present a flexible and scalable architecture. We developed a simulation model to validate this architecture, and the results show that one-way delay is not negatively affected by network growth. Another contribution of this paper is to use a central controller, like Software Defined Network (SDN), to create MAC forwarding tables on individual HSR nodes. The proposed scheme prevents flooding and broadcast storm in this loop topology.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37c5fc44b36b5dc1bdb361b1e08caf83f109b7ce", "sources": [ "DBLP" ], "title": "A Scalable Architecture for High Availability Seamless Redundancy (HSR)", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "37c698dcc062596a85291c6138cae2787ef3ca20": { "authors": [ { "ids": [ "1928916" ], "name": "Xiongchao Tang" }, { "ids": [ "2467444" ], "name": "Jidong Zhai" }, { "ids": [ "38849012" ], "name": "Bowen Yu" }, { "ids": [ "6301522" ], "name": "Wenguang Chen" }, { "ids": [ "2225511" ], "name": "Weimin Zheng" } ], "doi": "10.1145/3018743.3018745", "doiUrl": "https://doi.org/10.1145/3018743.3018745", "entities": [ "Application checkpointing", "Computer data storage", "Diskless node", "Failure rate", "Fault tolerance", "In-memory database", "Linpack benchmarks", "Scalability", "Supercomputer" ], "id": "37c698dcc062596a85291c6138cae2787ef3ca20", "inCitations": [ "dcccd1b3a1b4b143f1bc30ec9c88eb60d9aac09e", "612cf39494d5ea3db60616ef50836746dd289674", "19eba1cfdecdd9a918f22880bc3599ca461c6454" ], "journalName": "", "journalPages": "401-413", "journalVolume": "", "outCitations": [ "56558ac2a49e9242d99c5a8c444c20c3660a9a47", "a6bf93b4238dcd4d425f1c6f456d24ec37cd26a9", "7bf742cd243e91b256afb437c020deafe6fe8035", "96d860caedf7731e2f598a768e85d04e26753868", "3fa792fb63f453bd9d492f23ef7662aaaf6f7ca5", "2e663c1047ff14ddc2416229459922757a20edfb", "0f6a32792d0882db35fe9391445d4322232b619e", "741a04ef3a0c3953a3d37726bf4d6170eaa68a55", "3e99a917b9a4e89497541bbc3bb72079054644c6", "049484d5b58620ad1cc4a32d87c0fb71bab55277", "442686f068097d4cd27680a0ee08272aea19c922", "455d253c61379bce5626fba8ef9897d3ac1307dc", "29eba6139249e2557b2210f88e31c959605d5cb7", "277b0a0975ca19e7410ed87dd872c0728b0bf06a", "4848984af698cb2cb71d4cf7789faea3c80d1359", "5c7e119f7bb2a11cee9f9ed6d4d1ed3248a26ab1", "0029d10f3f95f490de62ebf392b82ed9bba65f80", "211ca2ab5b929089023a47a14f82b18f07dd7f04", "23d5a70cfffd1b637173a171df53bfdc1859dce5", "65a6f9dc3811d15f212a724fa7addc7e3f4650ee", "a3925b76404c3315e8f9d11c8784a48c85c30d5e", "52fc987b858770ac02a54effd74da945ec06c224", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "345ff2f19178c983f2742b1f3198fa045cca2121", "daf0a5b16eb51ae418f18a6324970626a29dcc96", "2657302160775f8766964d013efe242836693f3e", "1c20521112e3bf937e756a28061ad4887f4ad720", "3876af5f9f7d7588bfa0fcdf9df8637925e0063e", "0c72a5c114adceffb0a59c0920eb6b0d9bd9f6f5", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "18fe996c6f43a8f301cd842507045b679ba3506a" ], "paperAbstract": "Fault tolerance is increasingly important in high performance computing due to the substantial growth of system scale and decreasing system reliability. In-memory/diskless checkpoint has gained extensive attention as a solution to avoid the IO bottleneck of traditional disk-based checkpoint methods. However, applications using previous in-memory checkpoint suffer from little available memory space. To provide high reliability, previous in-memory checkpoint methods either need to keep two copies of checkpoints to tolerate failures while updating old checkpoints or trade performance for space by flushing in-memory checkpoints into disk.\n In this paper, we propose a novel in-memory checkpoint method, called self-checkpoint, which can not only achieve the same reliability of previous in-memory checkpoint methods, but also increase the available memory space for applications by almost 50%. To validate our method, we apply the self-checkpoint to an important problem, fault tolerant HPL. We implement a scalable and fault tolerant HPL based on this new method, called SKT-HPL, and validate it on two large-scale systems. Experimental results with 24,576 processes show that SKT-HPL achieves over 95% of the performance of the original HPL. Compared to the state-of-the-art in-memory checkpoint method, it improves the available memory size by 47% and the performance by 5%.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018745" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37c698dcc062596a85291c6138cae2787ef3ca20", "sources": [ "DBLP" ], "title": "Self-Checkpoint: An In-Memory Checkpoint Method Using Less Space and Its Practice on Fault-Tolerant HPL", "venue": "PPOPP", "year": 2017 }, "37d543efda665556815dc45af537a3400fb106c7": { "authors": [ { "ids": [ "1774210" ], "name": "Andrew Pavlo" }, { "ids": [ "2097105" ], "name": "Gustavo Angulo" }, { "ids": [ "2135406" ], "name": "Joy Arulraj" }, { "ids": [ "2384732" ], "name": "Haibin Lin" }, { "ids": [ "2323488" ], "name": "Jiexi Lin" }, { "ids": [ "34264361" ], "name": "Lin Ma" }, { "ids": [ "39882369" ], "name": "Prashanth Menon" }, { "ids": [ "1761585" ], "name": "Todd C. Mowry" }, { "ids": [ "34106619" ], "name": "Matthew Perron" }, { "ids": [ "7455366" ], "name": "Ian Quah" }, { "ids": [ "1940209" ], "name": "Siddharth Santurkar" }, { "ids": [ "1693125" ], "name": "Anthony Tomasic" }, { "ids": [ "30046815" ], "name": "Skye Toor" }, { "ids": [ "40270373" ], "name": "Dana Van Aken" }, { "ids": [ "2486512" ], "name": "Ziqi Wang" }, { "ids": [ "7860942" ], "name": "Yingjun Wu" }, { "ids": [ "2304675" ], "name": "Ran Xian" }, { "ids": [ "1706297" ], "name": "Tieying Zhang" } ], "doi": "", "doiUrl": "", "entities": [ "Autonomic computing", "Autonomous car", "Database", "Deep learning", "Integrated circuit", "Management system", "Physical design (electronics)" ], "id": "37d543efda665556815dc45af537a3400fb106c7", "inCitations": [ "0fb3400d39c08b6dddbbfa8689711ca36a87afd0", "4853a26200889f033c0f509abf0f91d8cafba55b", "7831bc987940bbf060ca4ea18cecb5ae5ed21186", "8b93796b9ea3d4b6aa4b23c79331beaee10c2a3a", "791453489be6112655b3049d9ae0a403bb31678b", "0c2f0a79192eee1e90629e8475a8968be0bf2558", "db12b1acdf950527ee8eccbdaa99ee9dcf5c1274", "c6018876ce114cc236588e4f6e9c213f84203b6c", "40791626f1f99ec663d8b07023dc3909ac728dc3", "e6e5176bdfe07612c6915f1b89197cbb74714dd7", "72000109547f17c849c2ee6e2825784e64b70cea", "4fcb1e0a25a5617ddee8174b48af80d88b4881f4", "5977a741cbc79c9b72a9587d40732bf2d64ff376" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0f942ab3483ec72dc815d61d080c396eacd461c6", "5dd36ed50668b5cc1c95ce6cf83b1b9b21a5f560", "b2fc430d7606ebc9199b08232dc9c024a303dc55", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "5b92b418053cf758650d823762d134ee0385fb92", "8db5d8f4bf055bbe64ccfe29c5fd778ef24ade5b", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "019902292dff81eae20f3e87970dd7a1151d9405", "296b2a3232cd548d2b65ec4d77ab3cbf69893393", "9e94390e67fa2c44188634f6a4e8195b1eb309c8", "ba1e6fb740640276c8f19d0464bbd756a972423a", "f4e62f813e8cb019c85504597a87a6f1cd3c2194", "624073f9a3053217afb1dbd8adfc44d1052ba282", "8be85f906ea94556788b48aeccaf6037366cda51", "52535547bd9a829d4ae80e8fc386331eddfe7523", "c25381cb6eb72d16748ab98a126f97826df686a6", "8c2557768dadf016adff061f6a29f440a80e123e", "153703ab30c7cb56a49718991f6bc450f0c2273f", "34f25a8704614163c4095b3ee2fc969b60de4698", "162d50e6e2c000baf10148f761cc0929aad48ca2", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "b4ab86a93959db80adffcea30dd5b8ed94e62443", "4d0ff88dd2970cbe26e98364c8243087e24d0d63", "82567ca0cdbf1c64b2bf998c98777d0da02ea9d6", "347920406c9a9a3846adf485e2b864d4523a0652", "7df1de1c9663c2dfaefc1277a7d1cb3366b8c358", "6cc7f1ac032a7e6da1de2a08036f1ee1f322b44d", "21ccbe34d25998a1fc898ae371a076bca7edc3a1", "e0dbc2deeb87f9c17e7b2b298e0c8f4eb1bc3dcc", "669a754df3cffa8f52bbfad60c44f8ae8aa83183", "73f585b1579c69d6b1e5c7c4a8226238e8448f94", "64cc18eacc8b33ab5b7ee2f789ca409bbd7455d9", "00da6a22c436ea087e677efc2d54ebd53a2a59bc", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "40fecfef456c760912685b372151732b38e69d6e", "78f9ce506df537b8c36b49857123d90bf819a860", "38ed30944eb895190750221b73ecc4cf3aa8f011", "2c701eae4bdc89f18eab1277b9c9a909841b2663", "933b49f419ed419b58296963ffb15f4e6f4840b2", "0464e501007ae27db5360b7268bd56908ddf5173", "3b618235d134ceb6559bdc43a54df701ba631bd1", "8674fe29de6ac680013c8570120bbaed6144382e", "04c692a690333b377022b873c13fb58edb598b27", "231a0787b6361fca082cefe580c41c74e230b255", "44662438b9659fc7a48d32eae112dd9bcdef9ee3", "1b2457906994b5942b0ecc6e0ca38e2e3b2450c7", "489996303d862cc86eb8010fb818d47eab75ed12", "22584367753de3804867fe88530468c5984d86fa", "e9775d2e173f989c580df3fc967a905c336405c5", "4a66348c79300fc798db8fd45db84b39cc3da37f", "97f18a7fd4a91a28c93545930a913e481425d57e", "9f74a87a39cf4922b7b13e4b5386eb52025959ee" ], "paperAbstract": "In the last two decades, both researchers and vendors have built advisory tools to assist database administrators (DBAs) in various aspects of system tuning and physical design. Most of this previous work, however, is incomplete because they still require humans to make the final decisions about any changes to the database and are reactionary measures that fix problems after they occur. What is needed for a truly \u201cself-driving\u201d database management system (DBMS) is a new architecture that is designed for autonomous operation. This is different than earlier attempts because all aspects of the system are controlled by an integrated planning component that not only optimizes the system for the current workload, but also predicts future workload trends so that the system can prepare itself accordingly. With this, the DBMS can support all of the previous tuning techniques without requiring a human to determine the right way and proper time to deploy them. It also enables new optimizations that are important for modern high-performance DBMSs, but which are not possible today because the complexity of managing these systems has surpassed the abilities of human experts. This paper presents the architecture of Peloton, the first selfdriving DBMS. Peloton\u2019s autonomic capabilities are now possible due to algorithmic advancements in deep learning, as well as improvements in hardware and adaptive database architectures.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p42-pavlo-cidr17.pdf", "http://www.cl.cam.ac.uk/~ey204/teaching/ACS/R244_2017_2018/presentation/S7/George_Peloton.pdf", "http://www.pdl.cmu.edu/PDL-FTP/Database/p42-pavlo-cidr17.pdf", "http://db.cs.cmu.edu/papers/2017/p42-pavlo-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5706/6a677539cf9c42a5dc084176069437fafa21.pdf", "s2Url": "https://semanticscholar.org/paper/37d543efda665556815dc45af537a3400fb106c7", "sources": [ "DBLP" ], "title": "Self-Driving Database Management Systems", "venue": "CIDR", "year": 2017 }, "37e0382cb276303860d35b2e88f98b3a28b52938": { "authors": [ { "ids": [ "39055225" ], "name": "Jun Wang" }, { "ids": [ "3469209" ], "name": "Lantao Yu" }, { "ids": [ "8031058" ], "name": "Weinan Zhang" }, { "ids": [ "1945527" ], "name": "Yu Gong" }, { "ids": [ "7869979" ], "name": "Yinghui Xu" }, { "ids": [ "2894465" ], "name": "Benyou Wang" }, { "ids": [ "40188000" ], "name": "Peng Zhang" }, { "ids": [ "37510526" ], "name": "Dell Zhang" } ], "doi": "10.1145/3077136.3080786", "doiUrl": "https://doi.org/10.1145/3077136.3080786", "entities": [ "Discriminative model", "Generative model", "Information retrieval", "Minimax", "Question answering", "Ranking (information retrieval)", "Relevance", "Unified Framework", "Web search engine" ], "id": "37e0382cb276303860d35b2e88f98b3a28b52938", "inCitations": [ "23d406a313aeefa30b4cbbad752db511c5625cb9", "131c43daad0d9f1a8defffd474af7d2ffff47174", "63dc033fb4efd8271202b6de1465e6bbb7051c7f", "32e7f0863e7c56cfced89abedaee46e2288bc127", "cd9b25a3223300aa4c70050b19f6052e09c0be73", "7b54c5bcd4f79e06b441dc650feb2cc581cd1f1e", "5466922c3dff6400f77f6b18db229b123c3d411c", "bbfa20c4da7e04762c6c928db1cff511cdea51ee", "1354476ddeae8354b03e3ab6479fefa58baeb0e1", "2c4fbd9ed9b1702541d4892d9b54340c6d29e62a", "081ad92ce0e71541646218f11061c86414a960c2", "59b51782dcf4287d1f47a42422ad500d14d3cd6c" ], "journalName": "", "journalPages": "515-524", "journalVolume": "", "outCitations": [ "4e8e560328f8d7358535913d56b35f0ac89606be", "39f52477c2efeef451a309caf74c4145e05efa79", "1a043adaf7e6e70bfed6d6478c058c77c3a6db4f", "23d2d3a6ffebfecaa8930307fdcf451c147757c8", "172d19688ebac586cda9cc291816b5bba4b81f41", "f189f55077d0fe9e8d0b9586ffb3b6f33682b844", "c6222824b95750be2ed98b2a9f3f9d361a529115", "ba753286b9e2f32c5d5a7df08571262e257d2e53", "760948698540118031e590fbc884fcea209f9104", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "76a50e38d7713b6853f8d0626467cdbfdb25a1d5", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "269036ebf47d00cf5ae28ec5d159259da1d9bd6e", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "828dbeb7cf922dc9b6657dd169b8d26d2b58eedb", "2ef7d506b25731d0f3ec0c8f90b718b6e5bbd069", "9aa88a8a354f1d322e242376d27d0474e50252f8", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "18cd08248be658abcfb6f72a3033b911474541e9", "01c1795d00e1ad4a69c5dd3498a38715ca898833", "33151c9905102c47d431f59fc9a5a7667960507a", "27f3c2b0bb917091f92e4161863ec3559452280f", "46aca9fd693cda49f7f02d575efaee0977f078c7", "27cf79ed0e7158ed128db2f4cb0436b36958e5aa", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "684be9e9bd41d148158c64ba811c08f66b58092a", "4ab2cfe6766a5007b2fcf8cfffbf7fb566c077f4", "158f9e4e385645d2db3949483789cc84ceb41c3c", "dbcd79bd7edcdcbb5912a50796fc3c2746729eb5", "4fc6da3829b6b881e3d7d4ac029fb121bf257003", "0e6b8efeefe1a2c73db9dae51d84662ebfdd4be2", "87d907a114409755ecd3c6886585de26a4e17ffe", "88e5cd43d6fb9abc87a943b023d092fdff74c9c4", "9eb0f02e4bb52f84596a40ca27ff2d1370a5ecae", "039ce79349302a2f124014304279c5e23d39da63", "393d1f3b81410536944dff9514a2b787094e846d", "9a55705cf240d9b0da23d45eb46c2c9b6cbbd8c9", "452f7411af7d471dd3ba84c2b06b2aaffc38cdb9", "0d44c84ebbd1294c8fbfa423b1f59e531d3716da", "2e635989e232816546ef352edc38881580b04c1e", "0e4a97a0ccbf699272e3d6dc25b6fe16eb35382d", "0df9c70875783a73ce1e933079f328e8cf5e9ea2", "1d93be2ec0e0a77b4d625e7a537a7e669a861e9e", "48886ea4ee14f0151f186207e1b9ad1d947e83ef" ], "paperAbstract": "This paper provides a unified account of two schools of thinking in information retrieval modelling: the generative retrieval focusing on predicting relevant documents given a query, and the discriminative retrieval focusing on predicting relevancy given a query-document pair. We propose a game theoretical minimax game to iteratively optimise both models. On one hand, the discriminative model, aiming to mine signals from labelled and unlabelled data, provides guidance to train the generative model towards fitting the underlying relevance distribution over documents given the query. On the other hand, the generative model, acting as an attacker to the current discriminative model, generates difficult examples for the discriminative model in an adversarial way by minimising its discrimination objective. With the competition between these two models, we show that the unified framework takes advantage of both schools of thinking: (i) the generative model learns to fit the relevance distribution over documents via the signals from the discriminative model, and (ii) the discriminative model is able to exploit the unlabelled data selected by the generative model to achieve a better estimation for document ranking. Our experimental results have demonstrated significant performance gains as much as 23.96% on Precision@5 and 15.50% on MAP over strong baselines in a variety of applications including web search, item recommendation, and question answering.", "pdfUrls": [ "http://arxiv.org/abs/1705.10513", "https://arxiv.org/pdf/1705.10513v1.pdf", "http://doi.acm.org/10.1145/3077136.3080786", "http://discovery.ucl.ac.uk/10028075/1/fp078-wang-arxiv.pdf", "https://arxiv.org/pdf/1705.10513v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37e0382cb276303860d35b2e88f98b3a28b52938", "sources": [ "DBLP" ], "title": "IRGAN: A Minimax Game for Unifying Generative and Discriminative Information Retrieval Models", "venue": "SIGIR", "year": 2017 }, "37e25e1f29dfb461ae9b77d0477eeb768c024dcf": { "authors": [ { "ids": [ "1866229" ], "name": "Sonia Ikken" }, { "ids": [ "1871388" ], "name": "\u00c9ric Renault" }, { "ids": [ "40626650" ], "name": "Amine Barkat" }, { "ids": [ "30350962" ], "name": "Abdelkamel Tari" }, { "ids": [ "35469130" ], "name": "Tahar Kechad" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.67", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.67", "entities": [ "Algorithm", "Cloud computing", "Cloud storage", "Data center", "Data dependency", "Integer programming", "Linear programming", "Programming model", "Requirement", "User requirements document" ], "id": "37e25e1f29dfb461ae9b77d0477eeb768c024dcf", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "514-521", "journalVolume": "", "outCitations": [ "684250ab1e1459197f4e37304e4103cd6848a826", "4a0aff9af02d829f2fc61f9e903b27382b8bd371", "23594fb6a2886568549a628f304102822f5e7355", "66865b6edcd99d3b5acdd62a4a60a11e1b935891", "b690122120da0eb8143c70ee9945ce378d9e4dc7", "8b10f5c62907bccb269b8c45a45e8f1a0667b50f", "703793ec4a350e72657fbbd6d771e3c6446b51b6", "44d6faabca90eaee56355f2da80fce105409e145", "3b5ba780d8a58a54ac5f354083746e5c45735d78", "8bf20c52b5da2f7d450e02cfac22b6c485472179", "534a0a4fa8c3f8b582b1245b2fad299c54538316" ], "paperAbstract": "Collaborative cloud storage environment, which share resources of multiple geographically distributed datacenters owned by different providers enable scientific workflow from different locations to process large scale big intermediate data through the Internet. Distributed datacenters are federated and each member can collaborate with each other to efficiently share and process the intermediate data from distributed workflow instances. This paper focuses on the storage cost minimization of intermediate data placement in federated cloud datacenters. Through collaborative and federation mechanisms, we propose an exact federation data placement algorithm based on integer linear programming model (ILP) to assist multiple datacenters hosting intermediate data files generated from a scientific workflow. Under the constraints of the problem, the proposed algorithm finds an optimal intermediate data placement with a cost saving over the federated cloud datacenters, taking into account scientific user requirements, data dependency and size. Experimental results show the cost-efficiency of the proposed cloud storage federation algorithm.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.67" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37e25e1f29dfb461ae9b77d0477eeb768c024dcf", "sources": [ "DBLP" ], "title": "Cost-Efficient Big Intermediate Data Placement in a Collaborative Cloud Storage Environment", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "37f54ef89c0744dba373397a34c07e6e4b9e852f": { "authors": [ { "ids": [ "1951737" ], "name": "Faegheh Hasibi" }, { "ids": [ "1680484" ], "name": "Krisztian Balog" }, { "ids": [ "2895191" ], "name": "Svein Erik Bratsberg" } ], "doi": "10.1145/3077136.3080810", "doiUrl": "https://doi.org/10.1145/3077136.3080810", "entities": [ "Crowdsourcing", "Information needs", "Knowledge base", "Video synopsis", "Web search engine" ], "id": "37f54ef89c0744dba373397a34c07e6e4b9e852f", "inCitations": [ "3e4727c2c82cd37681fdebab5bff682f2a4ca537", "a5cdfcd033be20297e5006e005250c4f8900fd2f", "14a17bb90a3cdc4b19968a33f9affce44e6072bf" ], "journalName": "", "journalPages": "773-782", "journalVolume": "", "outCitations": [ "3a197ce85e35890c15de0bca2f1b405037129829", "77908e0588a18c3b8bb04ab10c8726cad5363434", "9c8dfbafc40b4f6bb8558985384d8eb40bf30a54", "3420d8a5bf5e3fcc92c688fd9bccf3aa42a72ada", "d31b93a9d8b8f30043fc8fa971dbbcc6370e5034", "607a834558b16c318be9c735bea048ae6638841d", "6063f0dc42a467388d8846b092c59582cb2fa49e", "9aad05bebcd8743ea0648ca684ac6bfcdd35b764", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "a7cb7de7329f1145626a3689349ce92e54f81756", "288959d9f0aed8138541c9a66d9a41c5fc0949ad", "0d97ee4888506beb30a3f3b6552d88a9b0ca11f0", "81a449c8b0771b51f3354505135ae7f74cbc431e", "48a12a320ec4751096e9bb298bf16b4dfb29a8bf", "3ff7b8cc06fac9fec6f825930dd62f278de33963", "01215c2dacf8abb0bfa2e7fa868863b80069cc87", "0f18e94cd16ce3bb2adfed39b66bf7fbb635b7ab", "3453d79657e8f3e1614e73608f92465ab7f5a291", "b1a13b7c0911eb06fc347f77c18e1f6fdf8f1fb4", "1bd5866dd08522225b865019fc13f87f79c50404", "37188110036d4b88aabc870b9c4199489e519b1b", "11e1e395431c73a3a961b0c5f30b0227f98873b0", "57c56d38c82054413fba6f2e60abdb4764042380", "342be3d0e7529e63d16d1b22dbddf26d63e14bd6", "233f5e420642b33ee4fea3af9db846324ccba469", "089cb31234c0a14f223f52d74264fb3b1500bba3", "1772143a707f02685e3445485b89c888b2535e7a", "12aa36062bc7cd2edc77279a3c05390555cbbc76", "782120110f696343bb7ab662fe090e7a0194c3c8", "24b8fb9cdcf47b21dc7333cb4e5b8f4016ef0c11", "142f21fec89ca30221c74ffa6624d8dbecaf5a0d", "a99d85e6ff4e018507b6900c1849d94f15e5cf5e", "316eee5aa9c7a545283f71a293f81137b43c10e2", "3a90a4a68fdaab674fbd19ec742978d27c22e659", "de79e2cea3527c0921b7ff78011658003f3ae244", "6ae6dc55308bf1d02e3d26947c29c33347dad26d" ], "paperAbstract": "Entity cards are being used frequently in modern web search engines to offer a concise overview of an entity directly on the results page. These cards are composed of various elements, one of them being the entity summary: a selection of facts describing the entity from an underlying knowledge base. These summaries, while presenting a synopsis of the entity, can also directly address users' information needs. In this paper, we make the first effort towards generating and evaluating such factual summaries. We introduce and address the novel problem of dynamic entity summarization for entity cards, and break it down to two specific subtasks: fact ranking and summary generation. We perform an extensive evaluation of our method using crowdsourcing. Our results show the effectiveness of our fact ranking approach and validate that users prefer dynamic summaries over static ones.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080810" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/37f54ef89c0744dba373397a34c07e6e4b9e852f", "sources": [ "DBLP" ], "title": "Dynamic Factual Summaries for Entity Cards", "venue": "SIGIR", "year": 2017 }, "381416c19b636c9bbab6ec5ebb1c1fa1be6faeca": { "authors": [ { "ids": [ "2593764" ], "name": "Shruti Padmanabha" }, { "ids": [ "2175353" ], "name": "Andrew Lukefahr" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" }, { "ids": [ "1721289" ], "name": "Scott A. Mahlke" } ], "doi": "10.1145/3123939.3123969", "doiUrl": "https://doi.org/10.1145/3123939.3123969", "entities": [ "Hall-effect thruster", "Thread (computing)", "Throughput" ], "id": "381416c19b636c9bbab6ec5ebb1c1fa1be6faeca", "inCitations": [], "journalName": "", "journalPages": "745-758", "journalVolume": "", "outCitations": [ "cf64cdc889a4edaf641a307aa2b11d89d4d10a09", "043a4cba78435e1fe85f399a6ce6f3abcb9d4fc2", "46742c000a65f676c00ec4e33d19d535a1c29dd7", "283550fce0fdc0876db5df533625dffdfcd8d099", "6f090d59bde17b7604985acf38e26785e794bcc0", "ee80140a1cbe8dfe6102ffca3e622a2b7f45a342", "7ce25a0852e2345be1a1bd02b8eb4cefb9d47073", "f016d23ffca72cdf1eb584613452720eaacafd9c", "2238562e6b65b0bb1a43e2f3041fe85ee8bb7ec4", "2ca87cdedfc5375a4f922cd111fdb5fad4182e82", "109df0e8e5969ddf01e073143e83599228a1163f", "3421f7eccb98a877883f8e3cc68b711f5c784fec", "595f7d9954ca67604b9c4f60feffb1c3bbac2bd6", "6ab6c1334c70db6e7705455a2db359e8d83042f9", "362d884ff43d8c7cd6bce184944cfc04cdd57c18", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "352a8957005dc5519b15ed1870751ec494d66395", "3640fd02d3a62c22aaee643aaa8083a3b37325b7", "2cbca64fbcd3eb397cf918b2df6ceac3f579efdd", "15860f9f774f19f245f016d9cf479222e4f9a6ba", "84c31932d221afbd7d50f55e16900664b1027a1a", "480a952f7d24cf6d3ccda62439424eea6a8fd469", "7dcfa2708b4af25fc55087878ffb966ec9bd93d4", "221f78156aa66373f85883d21160a2445c0cc383", "27925fe362385297fb16eded3461b2f7c82238ef", "1ba9756e5bca19f2753d3afe255021f8a7387c62", "5b4ca29fd32b2d11b0e5ea7efbc34a34023915e2", "117d72bba7f9fb9e489d9d1af8cf13a109a094e4", "775e31576b5a3d3df5333b1fba4aa3fb814929a2", "0949733ed913c7b6fc8ea1f9cfe985715fa99d03", "55862c5802a745d4a12c881630e38167af3da432", "676ff3d6d04b5748771f843dbf8dd07ee0a612db", "13a6c714cddeded37a69205f39935da4e7082f43", "0b2f5fb96e4b243407a69b2e39b5e1024d4e06af", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "1fcec27437d40285684aad5c68d2db076b27a195", "daa5538192e0058e12a83bd64fd19866c01adcf6", "1a229f1d21abe442520cba31a6e08663b3d31777", "540892abd51e931839dbe15b4d55ab108b5a7f71", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "0bb6affe5099e22203527e73aef297cf279a458d", "2b585692b2337286e88095c2341af4d8121e80b8", "0aac782fe3ba9619e745898c530a1e92a72240b3" ], "paperAbstract": "Heterogenous chip multiprocessors (Het-CMPs) offer a combination of large Out-of-Order (OoO) cores optimized for high single-threaded performance and small In-Order (InO) cores optimized for low-energy and area costs. Due to practical constraints, CMP designers must choose to either optimize for total system throughput by utilizing many InO cores or maximize single-thread execution with fewer OoO cores. We propose Mirage Cores, a novel Het-CMP design where clusters of InO cores are architected around an OoO in a manner that optimizes for both throughput and single-thread performance. The insight behind Mirage Cores is that InO cores can achieve near-OoO performance if they are provided with the dynamic instruction schedule of an OoO core. To leverage this, Mirage Cores employs an OoO core as an optimal instruction schedule generator as well as a high-performance alternative for all neighboring InO cores. We also develop intelligent runtime schedulers which orchestrate the arbitration and migration of applications between the InO cores and the central OoO. Fast and timely transfer of dynamic schedules from the OoO to InO allows Mirage Cores to create the appearance of all OoO cores to the user using underlying In-Order hardware.\n Overall, with an 8 InO per OoO configuration, Mirage Cores can achieve on average 84% of the performance of a CMP with 8 OoO cores, a 28% increase relative to current systems, while conserving 55% of energy and 25% of area costs. We find that we can scale the design to around 12 InOs per OoO before starvation for the OoO starts to hamper system performance.", "pdfUrls": [ "http://cccp.eecs.umich.edu/papers/shrupad-micro17.pdf", "http://doi.acm.org/10.1145/3123939.3123969" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/381416c19b636c9bbab6ec5ebb1c1fa1be6faeca", "sources": [ "DBLP" ], "title": "Mirage cores: the illusion of many out-of-order cores using in-order hardware", "venue": "MICRO", "year": 2017 }, "381ce8b53187acf678cd47a189439098f56d75e0": { "authors": [ { "ids": [ "35000558" ], "name": "Chris Cummins" }, { "ids": [ "1955185" ], "name": "Pavlos Petoumenos" }, { "ids": [ "40514580" ], "name": "Zheng Wang" }, { "ids": [ "2476647" ], "name": "Hugh Leather" } ], "doi": "10.1109/PACT.2017.24", "doiUrl": "https://doi.org/10.1109/PACT.2017.24", "entities": [ "Artificial neural network", "Deep learning", "Graphics processing unit", "Heuristic", "Machine learning", "Mathematical optimization", "Optimization problem", "Parallel computing", "Predictive modelling", "Program optimization", "Unicom System Architect" ], "id": "381ce8b53187acf678cd47a189439098f56d75e0", "inCitations": [ "a6400f91c0b08b41dde2b3e36f4b4c4377d04c1f", "7e700572f78faaa673eac1955c35aa96d4eb85b6", "1172aeb1c26e5a7226ecea213f5517bc1a1a4677" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "219-232", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "69b2f8482713be20b6025c01a475f6da6fddd15b", "2a042a60aef9eaa6b23241fec2b95831b40030dd", "14b2882599d56a68aab4e8bd3dea4664a40aaaf9", "a538b05ebb01a40323997629e171c91aa28b8e2f", "0380c641bf8ffe814e3e48c6964438bfd40e3480", "065e9991a5481f10da8d35ddb436d3ca3eeae3ee", "bc4638f55f6ec57e37ac201ec3a61fdf58540aca", "60f068dea641df784a379411c57aa8f2b23d1a98", "0172cec7fef1815e460678d12eb51fa0d051a677", "1ab2dc3c109c2d20cd663e4183f2aed174b71248", "0c6c018e90705b85e5a41f58b6e60a8c45988aa6", "14b5e8ba23860f440ea83ed4770e662b2a111119", "235317b7ce64b4b3aefe57c1304e8157a4832d2c", "075d460a4737d7c0b3fd4b7aa03e315f7256b1af", "278730d2c05757f83417a423d758d3bbe91e6d32", "1ce7ab36a94bf59817992a906fb4f0b3bf4cc90a", "0c76a904b28c775eb5f33cd982f0bfeddab353e3", "2c5d6794acdb528070df68038a33c1bd16ced1b3", "272216c1f097706721096669d85b2843c23fa77d", "10d3e0f0648d0a5cfaebb3044ea7b14a52e54466", "44efef85d56e61fb304f27010cc0d1bd80283a69", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "3db24ba1a7db13fb9506713a226bc075a0f8057f", "3ab98cab824062228713b1278ad1e4f026ec346e", "4506fe8399bd04892293051e895db2ae6b52b4e3", "1ce00e33eef166aed0b2bf88a83bdc876e6ee609", "03028a78daf97a01a26975a72c59c8d97cb18810", "18c18cc0118afd086ea7259bec1d6157a105a5c8", "0e8b91f458389684849ae73afea707d71af2c12e", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "01fcae344d2edb715bcc63a40b6052c0331741bd", "00156e79606084497789662dfaf59c3b54a10722", "a0327bb12a31beabf5dac9b9e21669047e1565b6", "8ac6be0e3ea62e9819d5a25da645f2d350474693", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "03e3a481d9713ad4d39dc608959d87b3f8d8144e", "46e107b7e1cecb04310e9c7be51567aefcc18f6f", "597a37c1282639a3f5920455bed38dacc5d1aa71", "02e0bc77460469aefec5bd794ee6c4efc15e6adb", "33cd9eda21fa21e3efb6e67fde1d6b98878e32a1", "4708c983f55344273764e064f4f45f94f3e3920e", "27e1dbe9f7c71cd6cc1b0357f49aef497e572d09", "5643b2bf3b8d8a6d7ebf231becb9123b5b4a9287", "a5eb8900450908f3e245c3740420af4cb2348ef8", "7f013f172a45824d907f68481e92a22e0188ea0b", "0bde2adacbdb9a66ba3103e3f128a9d6f3ee032e", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "081651b38ff7533550a3adfc1c00da333a8fe86c", "4954fa180728932959997a4768411ff9136aac81", "23f3a802af8493d6fd72d3034ab3bfa7345e9db1" ], "paperAbstract": "Accurate automatic optimization heuristics are necessary for dealing with thecomplexity and diversity of modern hardware and software. Machine learning is aproven technique for learning such heuristics, but its success is bound by thequality of the features used. These features must be hand crafted by developersthrough a combination of expert domain knowledge and trial and error. This makesthe quality of the final model directly dependent on the skill and availabletime of the system architect.Our work introduces a better way for building heuristics. We develop a deepneural network that learns heuristics over raw code, entirely without using codefeatures. The neural network simultaneously constructs appropriaterepresentations of the code and learns how best to optimize, removing the needfor manual feature creation. Further, we show that our neural nets can transferlearning from one optimization problem to another, improving the accuracy of newmodels, without the help of human experts.We compare the effectiveness of our automatically generated heuristics againstones with features hand-picked by experts. We examine two challenging tasks:predicting optimal mapping for heterogeneous parallelism and GPU threadcoarsening factors. In 89% of the cases, the quality of our fully automaticheuristics matches or surpasses that of state-of-the-art predictive models usinghand-crafted features, providing on average 14% and 12% more performance withno human effort expended on designing features.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.24", "https://chriscummins.cc/pub/2017-pact.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/381ce8b53187acf678cd47a189439098f56d75e0", "sources": [ "DBLP" ], "title": "End-to-End Deep Learning of Optimization Heuristics", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "384043c9b44cf6848913a3101b63e161e88adb07": { "authors": [ { "ids": [ "1682067" ], "name": "Yossi Gilad" }, { "ids": [ "3246066" ], "name": "Avichai Cohen" }, { "ids": [ "1748437" ], "name": "Amir Herzberg" }, { "ids": [ "1718880" ], "name": "Michael Schapira" }, { "ids": [ "1686383" ], "name": "Haya Shulman" } ], "doi": "", "doiUrl": "", "entities": [ "Classless Inter-Domain Routing", "Human error assessment and reduction technique", "IP address spoofing", "Public key infrastructure", "Recommender system", "Resource Public Key Infrastructure", "Simulation", "Software deployment", "Vulnerability (computing)" ], "id": "384043c9b44cf6848913a3101b63e161e88adb07", "inCitations": [ "4b6a46ba0fb4c3e9381b8796c48a3c2c28816ce4", "0ddec6b6648842489408628263ebb069078bbca0", "369ccbcfa288afc923bcd0cb0c036685e4c3c149", "800b6ced689db9497fe02ddcc4d893dbc1863ab3", "fdc49ed406b90f832dde9cc9423a945c168815a0" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "1010", "journalVolume": "2016", "outCitations": [ "f44527267b8817909eb7694bd1b1ffc27836383a", "0dd046fd2f1ba04690c1f41be83326cbf6c4897b", "378b196a675c179a24ef2311e920403449e4a150", "63629554e7139352f0295f6a5ed51cc6b5612a7e", "e3f235338c3452ed7d813ebef817d031359ed40c", "1684b97e72f2ce8cb47cadaf09287564df78a01f", "187fb09496d6b9cd4141a0917e365de28b4fc0c8", "8fda944e50c69c9176a03bebc4225cd6c1c133a0", "3dffbec1ef84f40df69876e57900b49395f76cc0", "89dfaab876ef69d08c7332d560f3b4664cacb9fb", "cef5c0918481a2e7a7bbcec8fce3bc452e9c06ad", "019a759fc1af07bfb4bdc322f268d4c9917a0e63", "52f7273fc7c7e3cfd9d668e653b5bc694d367e32", "f5e3b51727b164962794b3e3cf523f4dc86cd31d", "96d1137d5c3745be516d8d1b9318f0480ca4534f", "7a4445b8b5ff4bf077b33b485861c70a4c82d3e7", "8129f7595e1294ae41be54399eafda2eb60c07e0", "8766897d7f9c51d1e141c39050418ef849f6a9db", "294a33d55f566724598311035d1a26dbe792c5fd", "df18eafdafbc48c42ddf7bf339bb4c7365c69eb1", "24530e358b248d47358bb1d2732bd090a64ad4b6", "e1b544f3583a9ea14894600e43cc6955c3122067", "160dace7a8dc7c58c5de48ff59d30cd66ee34b37", "603457793cf9a6056f6597420c838210342a3d2b", "03a6909f6cffe4021d37cf875ada566a716fc2e7", "50ad7e07d0f59fea481d556f91f4105c0618ade0", "b1bd89d8c0fdf7beef57add2068cae20ac1b7bfe", "ba1ac16af1bee3fcd9b8476c021269ea8299f7be", "3a1625fd7789714a6a5e2e01b3122a9621b33d27", "54df502bfd6aac0c13ae111ca46ddc36d645bdfb" ], "paperAbstract": "The Resource Public Key Infrastructure (RPKI) binds IP address blocks to owners\u2019 public keys. RPKI enables routers to perform Route Origin Validation (ROV), thus preventing devastating attacks such as IP prefix hijacking. Yet, despite extensive effort, RPKI\u2019s deployment is frustratingly sluggish, leaving the Internet largely insecure. We tackle fundamental questions regarding today\u2019s RPKI\u2019s deployment and security: What is the adoption status of RPKI and ROV? What are the implications for global security of partial adoption? What are the root-causes for slow adoption? How can deployment be pushed forward? We address these questions through a combination of empirical analyses, a survey of over 100 network practitioners, and extensive simulations. Our main contributions include the following. We present the first study measuring ROV enforcement, revealing disappointingly low adoption at the core of the Internet. We show, in contrast, that without almost ubiquitous ROV adoption by large ISPs significant security benefits cannot be attained. We next expose a critical security vulnerability: about a third of RPKI authorizations issued for IP prefixes do not protect the prefix from hijacking attacks. We examine potential reasons for scarce adoption of RPKI and ROV, including human error in issuing RPKI certificates and inter-organization dependencies, and present recommendations for addressing these challenges.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/are-we-there-yet-rpkis-deployment-and-security/", "https://eprint.iacr.org/2016/1010.pdf", "http://eprint.iacr.org/2016/1010", "http://eprint.iacr.org/2016/1010.pdf", "https://www.crisp-da.de/fileadmin/content/Publikationen/Publication_Are_We_There_Yet.pdf?_=1478260786" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4d36/39126f7ddb8e29a56067264e164169656741.pdf", "s2Url": "https://semanticscholar.org/paper/384043c9b44cf6848913a3101b63e161e88adb07", "sources": [ "DBLP" ], "title": "Are We There Yet? On RPKI's Deployment and Security", "venue": "NDSS", "year": 2016 }, "38574a13b6626e0c5a63e4298fc6a314e848789e": { "authors": [ { "ids": [ "2448867" ], "name": "Oleksii Starov" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" } ], "doi": "10.1109/SP.2017.18", "doiUrl": "https://doi.org/10.1109/SP.2017.18", "entities": [ "AdBlock", "Device fingerprint", "Fingerprint", "Google Chrome", "Google Chrome extension", "Public key fingerprint" ], "id": "38574a13b6626e0c5a63e4298fc6a314e848789e", "inCitations": [ "29696aad37a2e83480a4a071f7b463ddac072669", "08f70c16e397d2c2e0e09f4f158cb5c566855938" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "941-956", "journalVolume": "", "outCitations": [ "0c9565025771f6e43f11e543350e56e8136a7b3a", "1ac30f626837e58d02340a3b3656ec99f97468fd", "156c09cc5a384e03271ca6cfbdd83fd62dee085c", "01dbc5466cce6abd567cc5b34a481f5c438fb15a", "820a83807b6530afde5ddbccb81af9794780993f", "70f340e80468832b7a293da8a4f1d08ed2786448", "fe2f4faec5cf209ae7d8a73100db9cce46ce53d4", "51b0ce84988e083d6253af098542f905e1fea0a8", "598848aaa4aa40bb6b7ab51490821a173cf18800", "5a032460c589a67e7c73b19c93aa591331758139", "81ac9efc35d85103963e9444887f7775e2d67b6d", "1f38c11fe8511c77fb7d383126214c9e7dc28e4a", "0c246863ee7d0513cdc2cebff9b173cd4bdc8134", "254f86dc50c6a2e2bce7241416372c290883e7ec", "104b2ec3a9be37294d23d5c968eeb3fb0c081d8e", "0d2f693901fba451ede4d388724b0e3f57029cd3", "3d8775945f7c62b2bca55b7097fde9427b0363bd", "a155264f143aafd380f40fd0167c9b7960f64ea2", "143cd817835243e873f82f28367c8866f779187d", "3957270267c2bba0ac00ab3c4461f0c47cfd95c3", "834cdbde6e7800f9f50d4884858bb093fc3b65f6", "2494382813fa0b7aa405c4cc0f1ef5be90ca2d79", "64636610404013914bc73ae4ae9d057e6835c4b4" ], "paperAbstract": "In recent years, researchers have shown that unwanted web tracking is on the rise, as advertisers are trying to capitalize on users' online activity, using increasingly intrusive and sophisticated techniques. Among these, browser fingerprinting has received the most attention since it allows trackers to uniquely identify users despite the clearing of cookies and the use of a browser's private mode. In this paper, we investigate and quantify the fingerprintability of browser extensions, such as, AdBlock and Ghostery. We show that an extension's organic activity in a page's DOM can be used to infer its presence, and develop XHound, the first fully automated system for fingerprinting browser extensions. By applying XHound to the 10,000 most popular Google Chrome extensions, we find that a significant fraction of popular browser extensions are fingerprintable and could thus be used to supplement existing fingerprinting methods. Moreover, by surveying the installed extensions of 854 users, we discover that many users tend to install different sets of fingerprintable browser extensions and could thus be uniquely, or near-uniquely identifiable by extension-based fingerprinting. We use XHound's results to build a proof-of-concept extension-fingerprinting script and show that trackers can fingerprint tens of extensions in just a few seconds. Finally, we describe why the fingerprinting of extensions is more intrusive than the fingerprinting of other browser and system properties, and sketch two different approaches towards defending against extension-based fingerprinting.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.18", "http://www.securitee.org/files/xhound-oakland17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38574a13b6626e0c5a63e4298fc6a314e848789e", "sources": [ "DBLP" ], "title": "XHOUND: Quantifying the Fingerprintability of Browser Extensions", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "386cfe7eddabfb13c481dd5ebfdada1ad77c26cf": { "authors": [ { "ids": [ "20977255" ], "name": "Tiffany A. Connors" }, { "ids": [ "1721526" ], "name": "Apan Qasem" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.58", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.58", "entities": [ "Block size (cryptography)", "Graphics", "Graphics processing unit", "Machine learning", "Programmer", "Thread block" ], "id": "386cfe7eddabfb13c481dd5ebfdada1ad77c26cf", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "442-449", "journalVolume": "", "outCitations": [ "8ec3a8936bf7ddd1fa04a2e66827c1b7a467be09", "f3a60fa0e951c14d5051d0e0506889730f746729", "4506fe8399bd04892293051e895db2ae6b52b4e3", "1108af609469e420aeae551ba8a893c3200e07fa", "a47b408349a8146f71cb54c38226d2f7d92700fe", "a5a95ad4b217cf5b2f1038753ba76fae94da1bec", "326d1495d5288ce7fbe548809df56a8ac11da544", "763d57077a23967cdaba8986e8d8ebfd6d5a29c4", "94fc1aa5d1ad0be589e74fed4357d757c3cfeaed", "24fb4e260cbb59ca371aea6ef9aac97b4f32f8c2", "5dc3c465ef293f0ddc355196be1b7eead3dd588f", "09ea2e8942f624a6dad4c96f3c62320fbb0db7a8", "3acb06ddf2fa04037312fe984b11b4240a3c29cb", "c658634494efda05dfac80a6fc2e6770f4d46bd1", "85e94f77a60285a6891bb364be6ef9b0a6a4b19a", "4bceae20f3d438c2c99fd2c0b58cb941b0f8866a", "59b0afb917493c4070f335bd87f55a172429195d" ], "paperAbstract": "Graphics processing units (GPUs) provide high performance at low power consumption as long as resources are well utilized. Thread block size is one factor in determining a kernel's occupancy, which is a metric for measuring GPU utilization. A general guideline is to find the block size that leads to the highest occupancy. However, many combinations of block and grid sizes can provide highest occupancy, but performance can vary significantly between different configurations. This is because variation in thread structure yields different utilization of hardware resources. Thus, optimizing for occupancy alone is insufficient and thread structure must also be considered. It is the programmer's responsibility to set block size, but selecting the right size is not always intuitive. In this paper, we propose using machine learning to automatically select profitable block sizes. Additionally, we show that machine learning techniques coupled with performance counters can provide insight into the underlying reasons for performance variance between different configurations.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.58" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/386cfe7eddabfb13c481dd5ebfdada1ad77c26cf", "sources": [ "DBLP" ], "title": "Automatically Selecting Profitable Thread Block Sizes for Accelerated Kernels", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "3874d0a02177ffac50c40e9b0539b2705d808db8": { "authors": [ { "ids": [ "1728260" ], "name": "Erik Saule" }, { "ids": [ "13423524" ], "name": "Dinesh Panchananam" }, { "ids": [ "2317042" ], "name": "Alexander Hohl" }, { "ids": [ "40019333" ], "name": "Wenwu Tang" }, { "ids": [ "1709703" ], "name": "Eric M. Delmelle" } ], "doi": "10.1109/ICPP.2017.57", "doiUrl": "https://doi.org/10.1109/ICPP.2017.57", "entities": [ "Algorithm", "Algorithm design", "Analysis of algorithms", "Exploratory testing", "Geographic information system", "Information Systems", "Kernel density estimation", "Real-time computing", "Scheduling (computing)", "Social media", "Time complexity", "Web crawler" ], "id": "3874d0a02177ffac50c40e9b0539b2705d808db8", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "483-492", "journalVolume": "", "outCitations": [ "044c1f31a27014301b5c879406275b70d62f320a", "6c2a93d49e4082ab4b5ff4e8c67554654f79468c", "3e841a11a4b7a57f57e68422b0edcf78e6d343ef", "50fa3c3e5575bf816428a0c47427ad1f78c41db9", "0c88eb0d571a91d5c6675b4f30330cf8fb575382", "342656aa298185aa96223c9c26a82f576f0e42ba", "823d5ab92cceae1865cf43b7b70280b1acbcedb3", "08a66903d4341996d8943facc24d75ac77d267b7", "b4edeef779eb3cbc6984bc082e1475452ed606b2", "5fa4392b643f9dd01e93bc54bfb6616d6de8b23e", "c037edd22215b89c8d2924d4e3c81eb84fdadec7", "8c030adee8b8c4c5887b7e4ac64a3111fad22263", "c05b1c4deec39147c5be2401e6ffed19b45992db", "2ac0739e1731145a7d73da906ea486a6356a9bcc", "51d42bbb23ea90d10177a2bebbb64e6b5ab11d7e", "45ae5be6c1936265927557b9b41a53b5bcbb2bd8", "135bea45605ad370bed17fd0edf79e2ae3521e96", "8d0929cfc41b035cac603d9ac3a97b3d00bac145", "ae4d65769b6551a51d6fc6be2f021515bffa0798", "814ac19204be9f8e94037359c674f7d2c2ac9a95", "547f5943f242d9db917343b6520d10a45c00507d", "4b971a801b81c6220751de4b27006323da51b4b4", "df778b2181cae51353155580d959183ff177c635" ], "paperAbstract": "The exponential growth of available data has increased the need for interactive exploratory analysis. Dataset can no longer be understood through manual crawling and simple statistics. In Geographical Information Systems (GIS), the dataset is often composed of events localized in space and time; and visualizing such a dataset involves building a map of where the events occurred.We focus in this paper on events that are localized among three dimensions (latitude, longitude, and time), and on computing the first step of the visualization pipeline, space-time kernel density estimation (STKDE), which is most computationally expensive. Starting from a gold standard implementation, we show how algorithm design and engineering, parallel decomposition, and scheduling can be applied to bring near real-time computing to space-time kernel density estimation. We validate our techniques on real world datasets extracted from infectious disease, social media, and ornithology.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.57", "https://webpages.uncc.edu/~esaule/public-website/papers/arxiv17-SPHTD.pdf", "https://webpages.uncc.edu/~esaule/public-website/slides/saule17-knoxville.pdf", "https://arxiv.org/pdf/1705.09366v1.pdf", "http://arxiv.org/abs/1705.09366", "https://webpages.uncc.edu/~esaule/public-website/papers/icpp17-SPHTD.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3874d0a02177ffac50c40e9b0539b2705d808db8", "sources": [ "DBLP" ], "title": "Parallel Space-Time Kernel Density Estimation", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "387f90087c28a41f46d5718fa808c56499ae07f1": { "authors": [ { "ids": [ "36931703" ], "name": "Daniel Patterson" }, { "ids": [ "17810669" ], "name": "Jamie Perconti" }, { "ids": [ "21431682" ], "name": "Christos Dimoulas" }, { "ids": [ "4239511" ], "name": "Amal Ahmed" } ], "doi": "10.1145/3140587.3062347", "doiUrl": "https://doi.org/10.1145/3140587.3062347", "entities": [ "Assembly language", "Basic block", "Callback (computer programming)", "Continuation", "Embedded system", "Functional programming", "High- and low-level", "Interoperability", "Stack-oriented programming language", "Type system", "Typed assembly language" ], "id": "387f90087c28a41f46d5718fa808c56499ae07f1", "inCitations": [ "17aaf2846fa447b6a6a8e2851e9f720930a21f0f", "049a1f364154b161ba8064ccebe74de1f995cf3c", "65e87447a357c0f2fccf3111ac0241aaec3f0a7f", "acad7ce64bc015c6a2b3581e460b50d656d8126c", "6db61a6cdabb9036db9d1c6820fcbb52e13f153b" ], "journalName": "", "journalPages": "495-509", "journalVolume": "", "outCitations": [ "872d3684cb079b25dd2fbffcce2f3929474e7b37", "cfa2902a122280060e2a5b43ca35075702a6177d", "192aa338c36d0c21f3afbf5d8d1b375b05a28b11", "48a43d2b963ad52c99873d3857bb3f9e6c662b40", "52144f985ac264217d5c83ab79aee86a43932d72", "39cc1db391764199da8d6adda8c98666bdee9b31", "df1d7b884d6974eae24d9cddc76591caacfe0a96", "0c175d5b7aad01b5f3847287b64a428f2f2455c0", "202d46204b747f2392321427dd7fb608fc46ec6d", "73a4bed113c0f622e02c2590b65c969f34d0d281", "a9fc8d376be9484307c9f42b20692963e06770e0", "2fc73f6662ec1fc40d30208115a7f02531068d00", "07c55029ddb4a104343293649be656232e581bcb", "353b5681c03311d8c3e62cf8676b682d58ec14fb", "1e6e9fca268261f64221f3af9fa4172917dc87e4", "2ade7d2c0fbed892925a9d8ca7e0946eb28b2b3a", "9e1eaab73e07f2f418accce1a3ac55f5a464a130", "59c7fc199d204a9444b5f501f6df1e5daf4a0638", "0d8af0068ca4861193d843e21dc2f2b292ec3855", "12eeed063c2fe705463a3063afaccf83007bed17", "00990ced0ee83eb1a53f493dc71dfdacd1d4ff7d", "1a6c35f4dbbf47ddd6a7bd3133830459fa041ff2" ], "paperAbstract": "We present FunTAL, the first multi-language system to formalize safe interoperability between a high-level functional language and low-level assembly code while supporting compositional reasoning about the mix. A central challenge in developing such a multi-language is bridging the gap between assembly, which is staged into jumps to continuations, and high-level code, where subterms return a result. We present a compositional stack-based typed assembly language that supports components, comprised of one or more basic blocks, that may be embedded in high-level contexts. We also present a logical relation for FunTAL that supports reasoning about equivalence of high-level components and their assembly replacements, mixed-language programs with callbacks between languages, and assembly components comprised of different numbers of basic blocks.", "pdfUrls": [ "https://arxiv.org/pdf/1711.03871v1.pdf", "http://arxiv.org/abs/1711.03871", "https://dbp.io/pubs/2017/funtal.pdf", "http://doi.acm.org/10.1145/3062341.3062347" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/387f90087c28a41f46d5718fa808c56499ae07f1", "sources": [ "DBLP" ], "title": "FunTAL: reasonably mixing a functional language with assembly", "venue": "PLDI", "year": 2017 }, "389c15a86b956c95f946ed2ecd48be4f32af7960": { "authors": [ { "ids": [ "1764889" ], "name": "Meghyn Bienvenu" }, { "ids": [ "1788762" ], "name": "Stanislav Kikot" }, { "ids": [ "1721171" ], "name": "Roman Kontchakov" }, { "ids": [ "1835798" ], "name": "Vladimir V. Podolskii" }, { "ids": [ "1735935" ], "name": "Vladislav Ryzhikov" }, { "ids": [ "1749505" ], "name": "Michael Zakharyaschev" } ], "doi": "10.1145/3034786.3034791", "doiUrl": "https://doi.org/10.1145/3034786.3034791", "entities": [ "Conjunctive normal form", "Conjunctive query", "Data access", "Datalog", "LOGCFL", "NL (complexity)", "Ontology (information science)", "Parameterized complexity", "Scalability", "Treewidth", "Web Ontology Language" ], "id": "389c15a86b956c95f946ed2ecd48be4f32af7960", "inCitations": [ "1eb136be743fe4db6d1c534aa352fe9745974302", "ec11bf2ea3d5a685b86ae1b4cf31a5ecec28604b" ], "journalName": "", "journalPages": "201-216", "journalVolume": "", "outCitations": [ "c2ae1b0acd281a2fff041f3098e7aef9a2f5c794", "65783c300d71a4cb5cf34662f9e5a7119af3d2e2", "2048ed53dfb325200839a3f6390692d6c62bff53", "013e22ad53e5d188a49bee63ac2b35e08f0aae10", "96db28d5673e3b3f8b0b643343dd3c48e5f79807", "7c10232fd5b0e71a956253485c3b45f54d5f927a", "221f8db89ab305d8eec84cd054f652efa7c7e15e", "060c83990c6c5f906ba419ed9d5a62f7d49049bf", "2a1cbb7dc3d85fb7e1b0bce3e57550e5ee86ed5b", "174d4f02e7d64c05f60171244ca5ceb8a7445f94", "d56bf5e694ce836590fc2d67d8d1357a1d2a43a2", "0d07b93cb2ee53cdfa27b116102f38c8e8b921f7", "931f080660baa43c78b4f7eef1961830bb044539", "63051e0f448390d5cdb26229347e3af3c15e2c99", "05642f39729fdd01311203903793abeafa894ee7", "c011c2077a4521455d9934b5b5dd65f61ca1e613", "ea2e47ce45ee31fe3abb139f1515e57c8cf16dc4", "97166f9b284d8e7da46bd6cd43b43a7af14b773c", "62a5b3eff3e2dd25cec251d69b3a75fae47781c9", "05aecc7d2953750b748dc45c4167ccbdbaa6b7bf", "2132f2419f0e38d57199a5046885b04196fc0155", "0d491b3378ceebddaf6f76b123d2a103a342e88d", "f533a433a23ab745ef8fb959e66c453c90ace345", "8e358695cf1dfaa78f52673e1f0cdcb1d97e0a7d", "076ccc37cdb3776bfe8f1ec27ff336aee0562ce4", "21d1ddaa1ba6ba0648560f5f43d56b13da9a25a0", "25c51377a34aeb18b51ea7de1f9632eaa09f591e", "7f43019a1a48b1cb80544366c571163e1ea545dc", "1d28353790aae0bf3e05654a9153d8627e633d0c", "4a3a2f8ccaf34909ba6b60c51e83c0f0d9f3d30b", "be601c1613b9d09adb731a9983dcd00a811cc66d", "407748e97d8d3878535f6371ad324708915bf6d9", "a5fa13231ca2cefcf60ae97eafee8d772fdfdfc5", "0ccd91e81ea34b33bd3875888f6be2722d056af2", "4c5098ef0cb1885ee1036abb0dd211305d90ab83", "91b151fbdc1fa3ba4e2351fb5cf9b5192b7ba78d", "4d11568fb6552845d1a6fa7ac254e6a0a9ef7251", "c1206139d29d03c97a7399a216117520635f7b3d", "a8ca16e3d42ae9b1c02adc219fce0252d519884c", "eadea88b9259ec13eaac3b19ce92d438557e0ccb", "d627789407f9a263ac7b40b73ad085e5efda81f8", "3d49bd9dc99f6d0fe050bf47920971942169fe6e", "664406d2a7047b99da639ec49b13bfe31cf3fa60", "af21def975d12de6bf3d92570f487235c89966e5", "0889f11cbf8c8e82d455a6d605cacbf80a260674", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "1dfd5fff7eb35a28d7c2471445caa0d89dae3cc1", "67bf6321ab806dfeb8aa40e8fe7cdee87ca70e36", "12a8bd964439843aeed47f32b1dfcecd6072975f", "526494866c15e8d1a417e4995f7a8daa26cc4424", "83b88b76d932f07e629edbf4c06904cdf5bb2de4", "13093198b166235f8cca8757afc6ce13d54c1796", "6305d44de435f0c77434669b5d330cec9bc97445", "873458eb5a10f2a599c36d14743526247ea646d7", "466a0b7cffa32a62ca3fe8db8fd65f363a3a6463", "050fb3d6fb3eafb9d0095927a534fc55ccbe3218" ], "paperAbstract": "Our concern is the overhead of answering OWL 2 QL ontology-mediated queries (OMQs) in ontology-based data access compared to evaluating their underlying tree-shaped and, more generally, bounded treewidth conjunctive queries (CQs). We show that OMQs with bounded depth ontologies have nonrecursive datalog (NDL) rewritings that can be constructed and evaluated in LOGCFL for combined complexity, and even in NL if their CQs are tree-shaped with a bounded number of leaves. Thus, such OMQs incur no overhead in complexity-theoretic terms. For OMQs with arbitrary ontologies and bounded-leaf tree-shaped CQs, NDL-rewritings are constructed and evaluated in LOGCFL. We experimentally demonstrate feasibility and scalability of our rewritings compared to previously proposed NDL-rewritings. On the negative side, we prove that answering OMQs with tree-shaped CQs is not fixed-parameter tractable if the ontology depth or the number of leaves in the CQs is regarded as the parameter, and that answering OMQs with a fixed ontology (of infinite depth) is NP-complete for tree-shaped CQs and LOGCFL-complete for bounded-leaf CQs.", "pdfUrls": [ "http://www.dcs.bbk.ac.uk/~michael/PODS.pdf", "http://arxiv.org/abs/1702.03358", "https://arxiv.org/pdf/1702.03358v1.pdf", "http://doi.acm.org/10.1145/3034786.3034791" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/389c15a86b956c95f946ed2ecd48be4f32af7960", "sources": [ "DBLP" ], "title": "The Complexity of Ontology-Based Data Access with OWL 2 QL and Bounded Treewidth Queries", "venue": "PODS", "year": 2017 }, "38aa1e6fb4795b3e538570b3613744735da93e92": { "authors": [ { "ids": [ "1680280" ], "name": "Li Li" }, { "ids": [ "38638723" ], "name": "Bruce Beitman" }, { "ids": [ "2441395" ], "name": "Mai Zheng" }, { "ids": [ "1690476" ], "name": "Xiaorui Wang" }, { "ids": [ "37110735" ], "name": "Feng Qin" } ], "doi": "10.1109/IGCC.2017.8323567", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323567", "entities": [ "Abdomen", "Android", "Digital footprint", "Mobile app", "Smartphone", "Smartphone" ], "id": "38aa1e6fb4795b3e538570b3613744735da93e92", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "1ce8a69918894b197ee83c02a9e6e232795d02ec", "11fbcfedfdc701e563a8888df6cd8de8c7652b69", "2cac6e84d3d7fed13ec9a5d39fd2bd6e75423578", "0d3e3f7fb94d20eebc3022b724d008bf35c71b60", "526415d115d6eb042490ae5ce23395226640422f", "08037783f06e48af4e99d234683d0cbb20771035", "84fdccb41f31247dfb86aadba6f2b4d75538767f", "e5dc8be9b4678ae1f91764494acc96299cf44009", "8d78b035469b2c0c8238c2b4c85460b04aa6d4ef", "0b369ac8bd9e0c618e4ea3568ebaa944f460c454", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "262a69f6ffbd3dbed222569e1e703a9132ca05ae", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "70ae2ba890bf6ce2ff645ca2fd4262480f8048db", "0495641c590874be9e09c3743d0d15c536cd3f4e", "431f551d832347ee8a0d993c651a9e796ebde104" ], "paperAbstract": "Many smartphone apps can consume an unnecessarily high amount of energy, shortening battery life. Although users can easily notice the undesired fast battery drain, it is almost impossible for them to precisely remember how the abnormal battery drain (ABD) is triggered, making it difficult for developers to fix the problem. Therefore, app developers are in an urgent need for a tool that can provide them helpful information. In this paper, we propose eDelta, a framework that assists developers in pinpointing the APIs with high energy deviation, which usually have a high probability of being relevant to the non-deterministic ABD. Specifically, eDelta performs comparative trace analysis to identify APIs that have significant energy consumption deviation in different user traces. With the information provided by eDelta, developers can substantially reduce the time they spend searching for the ABD root causes. We have prototyped eDelta in Android 4.4 and evaluated it with twenty real-world apps. Our results show that eDelta can effectively pinpoint the APIs with high energy deviation and those APIs indeed cause ABD. Specifically, it reduces, on average, 94.6% of the amount of code that the developers would need to search for ABD root causes.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323567" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38aa1e6fb4795b3e538570b3613744735da93e92", "sources": [ "DBLP" ], "title": "eDelta: Pinpointing energy deviations in smartphone apps via comparative trace analysis", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "38c1eba6c628ee7fcf2633aa0299b091e6378b60": { "authors": [ { "ids": [ "35238735" ], "name": "Richard A. Eisenberg" }, { "ids": [ "35372552" ], "name": "Simon L. Peyton Jones" } ], "doi": "10.1145/3062341.3062357", "doiUrl": "https://doi.org/10.1145/3062341.3062357", "entities": [ "Calling convention", "Compiler", "Haskell", "Parametric polymorphism", "Standard library", "Type class", "Type system" ], "id": "38c1eba6c628ee7fcf2633aa0299b091e6378b60", "inCitations": [ "9ff2015fca95d40c79fd7304b82b117f6d099cd8", "391076c916028d7258c9d7e0d0abf8d9ce7ba050", "abf5c913e989d41d8b20e1fb1999294e1db41a2b" ], "journalName": "", "journalPages": "525-539", "journalVolume": "", "outCitations": [ "ce317f10bd484735e629d4fa6936946f41d805f2", "2d4aa0f63c26dee36310c6c1ce3fe1fe4b4551e9", "3393e9f1b5d32e4c5e20425d575c106d71cf86b9", "1352cd995747a1bce60edd7cf6b8dc44a0259558", "0c8e838a6b8692a44478442c5d91d45f50b3bbcd", "0d5bdcfea6df1d8c7adf71dcc8c5b782f5c3e9d1", "1e2c604e66c4439ad343b70d7ec0abedf72d006a", "c58994b0e6614b3dcc01a6f1eee3ca0ec477f8f2", "0df445ca53975d93f27c9def03e964d3113a4607", "4173f18a9f3fa9f9767cfb476a05cbe1ad707ada", "43c137bb5e909d101b503d30a6c32d624487cacc", "0fd582a8335379c50cbee8957c5da063b802fec7", "0e935c9bedd1902a30f51ceeb5ad060723a7ceab", "6ef83b2c078ac27e8153162b92e5a12d10a1dde2", "184e9846b86c0d95a104fe15ffc78b23684ff7d9", "1a6c35f4dbbf47ddd6a7bd3133830459fa041ff2", "75f70e8992fd12c61edac2059b3e5614c5c61f65", "cc7f2242dba6f09023128897762d07517f13ba4a", "ca43e1a3bca565351ea6e05144fa7fbb3418dfb9", "07dc9cbc4c4a8a6c10287b793f3b3e0cdf4590b0", "3521ccc507611cf983f1193894f494e5154beca1", "13c1c1c4f8b7b87404a929e4e9b339008fd27138", "0723583c40abf490571b6ce62afdac2ab28afd8e", "933aa774c4fd242f6deb0fa6a20daab32481bec5", "1856b5946b1c3acb008ce0bfdb478c46c1028e33", "d1f5d96374e32f02e1e85c7710fa26860cdc48d3", "03e7b7bdb9377f521b84e97c6d9e051d1a6f42eb", "32e632786bee13c0b1b1377f6de8b6773232b5d4" ], "paperAbstract": "Parametric polymorphism is one of the linchpins of modern typed programming, but it comes with a real performance penalty. We describe this penalty; offer a principled way to reason about it (kinds as calling conventions); and propose levity polymorphism. This new form of polymorphism allows abstractions over calling conventions; we detail and verify restrictions that are necessary in order to compile levity-polymorphic functions. Levity polymorphism has created new opportunities in Haskell, including the ability to generalize nearly half of the type classes in GHC's standard library.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062357", "http://repository.brynmawr.edu/cgi/viewcontent.cgi?article=1065&context=compsci_pubs", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/11/levity-pldi17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38c1eba6c628ee7fcf2633aa0299b091e6378b60", "sources": [ "DBLP" ], "title": "Levity polymorphism", "venue": "PLDI", "year": 2017 }, "38c51e4660fef8ff635cc4da66c23e3f1107e079": { "authors": [ { "ids": [ "1932642" ], "name": "Costas Busch" }, { "ids": [ "1744502" ], "name": "Maurice Herlihy" }, { "ids": [ "3134838" ], "name": "Miroslav Popovic" }, { "ids": [ "1749522" ], "name": "Gokarna Sharma" } ], "doi": "10.1145/3087556.3087565", "doiUrl": "https://doi.org/10.1145/3087556.3087565", "entities": [ "Algorithm", "Approximation algorithm", "Caldwell catalogue", "Multi-core processor", "Multiprocessing", "NP (complexity)", "Run time (program lifecycle phase)", "Scheduling (computing)", "Transactional memory", "Tunnel Setup Protocol", "Xerox Star" ], "id": "38c51e4660fef8ff635cc4da66c23e3f1107e079", "inCitations": [], "journalName": "", "journalPages": "173-182", "journalVolume": "", "outCitations": [ "029a74cdab8672a3a0852bc07ab008a9a131b672", "3aa65d43e959daf077933ad995d802d2913aeb95", "4aa993db77b888a02084a542a929b1a81a8d03f6", "058978f0ab2dcbe9fbc67a3ef00422455e1d1c39", "4b8ea4f3bc7c8d852afd73cac39ddb6af01fe6f9", "57cf29529977cc5407497aba2f9032e01a12c1a9", "b1e59845ecaa17350a211db5bfaa0f4078939208", "23cb57ed50a017c090b407500169a4a5f138744e", "011962fc1a1ea0d1a3bd512e1c4b927a1330f4bc", "b28a6a6db502363f65481f0655bac1f0dc8d4fa4", "1ccc5074d2967893889e8ad6913272f814dee3f0", "2510fa746a2ac5a7af009eee14a922958c9e1f2a", "c7ea0774356e4b10f0a49f9c3feeb0a7713fe0ad", "429e313d33a82bf086b69d47eee735450cbeb4ae", "b07124bc3c35e56b7acaa8780136c5e0669dccd0", "ab12cef09635b578d1c6479a2a693de8a75be2c7", "8d9e169b8e5fa071286a63dd2f868d89f45af287", "61094e631df646c7b53d0ee5f756b7a5ccfda0b5", "0598f50d4b9777a14e8d55c037aa3149ac3a03c9", "211f80edde1bf423abae852d90bcc6f9f5e8882d", "51225f24b4bfb922bc9ed9738566de0b3cae5393", "59d50e8b504169d9fd04615882dc80b33eebc4bc", "2caf328f2e372cccacf9191ca132551a3d06d081", "ecc4ce3181303c2abd9eb0893ffc185586a6f11a", "abe8ddcefab2f6edd9deff8c5803ae1621817db0", "2e86ebcbfd28619525a32375a4415faebac5c659", "367388823f17595fe6922959259c7d520e693e60", "5bbf6d1944e0e46562f0a8cfd48a3e87ce81dee8", "2132b584f3341aab2ab900f7bc84444b1f7097d3", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a" ], "paperAbstract": "We investigate scheduling algorithms for distributed transactional memory systems where transactions residing at nodes of a communication graph operate on shared, mobile objects. A transaction requests the objects it needs, executes once those objects have been assembled, and then possibly forwards those objects to other waiting transactions. Minimizing execution time in this model is known to be NP-hard for arbitrary communication graphs, and also hard to approximate within any factor smaller than the size of the graph. Nevertheless, networks on chips, multi-core systems, and clusters are not arbitrary. Here, we explore efficient execution schedules in specialized graphs likely to arise in practice: Clique, Line, Grid, Cluster, Hypercube, Butterfly, and Star. In most cases, when individual transactions request k objects, we obtain solutions close to a factor O(k) from optimal, yielding near-optimal solutions for constant k. These execution times approximate the TSP tour lengths of the objects in the graph. We show that for general networks, even for two objects (k=2), it is impossible to obtain execution time close to the objects' optimal TSP tour lengths, which is why it is useful to consider more realistic network models. To our knowledge, this is the first attempt to obtain provably fast schedules for distributed transactional memory.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087565" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38c51e4660fef8ff635cc4da66c23e3f1107e079", "sources": [ "DBLP" ], "title": "Fast Scheduling in Distributed Transactional Memory", "venue": "SPAA", "year": 2017 }, "38cc07e87baf5600a3301f37f4cdef423c30eb45": { "authors": [ { "ids": [ "40419297" ], "name": "Gottfried Herold" }, { "ids": [ "32299356" ], "name": "Max Hoffmann" }, { "ids": [ "26998165" ], "name": "Michael Kloo\u00df" }, { "ids": [ "1740872" ], "name": "Carla R\u00e0fols" }, { "ids": [ "35135100" ], "name": "Andy Rupp" } ], "doi": "10.1145/3133956.3134068", "doiUrl": "https://doi.org/10.1145/3133956.3134068", "entities": [ "Antivirus software", "Benchmark (computing)", "Bilinear filtering", "Credential", "Cryptographic protocol", "Cryptography", "Digital credential", "Interactivity", "Pairing-based cryptography", "Polynomial", "Polynomial identity testing", "Schwartz\u2013Zippel lemma", "Software verification and validation", "Zero-knowledge proof" ], "id": "38cc07e87baf5600a3301f37f4cdef423c30eb45", "inCitations": [ "036a89c1652d47ccdde91a5296de7c83042dbac5" ], "journalName": "", "journalPages": "1547-1564", "journalVolume": "", "outCitations": [ "845e96c20e5a5ff3b03f4caf72c3cb817a7fa542", "9068e9b88b3e30d27f24c34addf7caf3cdff281f", "5e2e76770b7f2c525b91c64d31c45fbc49fa9ddd", "25f8eca68804bc27925888e690834f49d9e9c16f", "aa3dcf5bb260a1e3c34b67f59d54b237ae0068b4", "02dc2a93a48d38deae9f1369d5b33ce98af2a3f2", "20d965226892aeef1f3454b10911f355c6c2f8fa", "259424f10f76729aa5bd18977f8b92a44a09b308", "b80bc63731b354a0e3d19abc2ed1b51d290db4ed", "0a9ce8889505a151eea2515b7eec741a16fcee3a", "092d37ddf40fb5e3a4b230518982d43c64d33d13", "546d30fc460accd15d23cf2d21d4275fdd907275", "62c2cb6ce7df8b8f0e26f7288d319e0b1e9d9c52", "3b6911dc5d98faeb79d3d3e60bcdc40cfd7c9273", "5350c31c1b71025fc80952ffe6e2643bbc343d74", "0e39b4e0f6026ee833dba226b26096667954d2ba", "3709e4e0b4ea3868ffede5fa4a3b84186d6de64c", "972df22c50fa60092dd66715f19e335c0297e8ff", "162e94a65d553cc34ca68020c8c90cd959bd52b1", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "0c48ae6e50c19329b2be1433433d20a6d788af2a", "87906f889908221b5ee8ff39c488494d3afb4d03", "311d277eb662486aa605b5fd681c419b77443627", "0523e2fdf2c908c2a22dcd4f93e7f8b702b0331a", "002e89bd8f7c6a72ba003282e74f7eff2d89a984", "136a5e50af7f75aca1a28db1a337bacc2cbdde5c", "144c8080bd04780a2db43d6a5230d1fd00b72657", "44ce3dd5ab486499d5fc793e84a62a5f6ac23ba3", "51fb848987c83181d95360fdbb40edb6d90683d7", "6e84bed29b3b7c28ab58877e49de3ffd209f244b", "1c4a5b630bc4e13bd0714132c9d22923c81d5108", "d64f8f3646e8b63dd28f0b53d7bd0aefa52d5d2d", "1144078fe05a113c02d068962be9d17d0f2b9e53", "d1bce67d0d824f6f651c36830915152d40a39c00", "176ceb84f0919724dfb0e1a473a6ddd741607099", "0aa20fb7c3a5aa0f2af3e2a1f857bf9073ec157f", "362d1bf46a749b6c7eb73a87476734c8dee489ed", "15f5ce559c8f3ea14a59cf49bacead181545dfb0", "a1d29511591bdafdee461feca763c96ebfb4cbe2", "1a4e124a124d3c09dcf6f62fbbee308771d9a11d", "3611b5f7e169f24a9f9c0915ab21a7cc40009ea9", "b913cf330852035f49b4ec5fe2db86c47d8a98fd", "2b26cf1eab78d9f9c687073af36769e72aa3cc8b", "0cf21a6c9888ec25a43fa17edbe8a3cfb28238a3", "11ef405a5ef00e402fe2f0d265f2fada864f02ad", "79c1867a8e4b7bb09fb5a048b4c52a1033f5ce96" ], "paperAbstract": "Bilinear groups form the algebraic setting for a multitude of important cryptographic protocols including anonymous credentials, e-cash, e-voting, e-coupon, and loyalty systems. It is typical of such crypto protocols that participating parties need to repeatedly verify that certain equations over bilinear groups are satisfied, e.g., to check that computed signatures are valid, commitments can be opened, or non-interactive zero-knowledge proofs verify correctly. Depending on the form and number of equations this part can quickly become a performance bottleneck due to the costly evaluation of the bilinear map.\n To ease this burden on the verifier, batch verification techniques have been proposed that allow to combine and check multiple equations probabilistically using less operations than checking each equation individually.\n In this work, we revisit the batch verification problem and existing standard techniques. We introduce a new technique which, in contrast to previous work, enables us to fully exploit the structure of certain systems of equations. Equations of the appropriate form naturally appear in many protocols, e.g., due to the use of Groth-Sahai proofs.\n The beauty of our technique is that the underlying idea is pretty simple: we observe that many systems of equations can alternatively be viewed as a single equation of products of polynomials for which probabilistic polynomial identity testing following Schwartz-Zippel can be applied. Comparisons show that our approach can lead to significant improvements in terms of the number of pairing evaluations. Indeed, for the BeleniosRF voting system presented at CCS 2016, we can reduce the number of pairings (required for ballot verification) from 4k+140, as originally reported by Chaidos et al., to k+7. As our implementation and benchmarks demonstrate, this may reduce the verification runtime to only 5% to 13% of the original runtime.", "pdfUrls": [ "https://eprint.iacr.org/2017/802.pdf", "http://eprint.iacr.org/2017/802", "http://doi.acm.org/10.1145/3133956.3134068" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38cc07e87baf5600a3301f37f4cdef423c30eb45", "sources": [ "DBLP" ], "title": "New Techniques for Structural Batch Verification in Bilinear Groups with Applications to Groth-Sahai Proofs", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "38d3ff3b608a334b12400b031c7cc483923bc629": { "authors": [ { "ids": [ "2795047" ], "name": "Jonathan Lifflander" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" } ], "doi": "10.1145/3062341.3062385", "doiUrl": "https://doi.org/10.1145/3062341.3062385", "entities": [ "Algorithm", "Cilk Plus", "Compile time", "Compiler", "Concurrency (computer science)", "For loop", "Interference (communication)", "Interleaved memory", "Locality of reference", "Multi-core processor", "Parallel computing", "Program optimization", "Programming model", "Recursion", "Recursion (computer science)", "Scheduling (computing)", "User space", "Work stealing" ], "id": "38d3ff3b608a334b12400b031c7cc483923bc629", "inCitations": [ "581a067e10b03d21deb8ebff4788d1e088ef8a37", "6146e57ea9eea9a2c0a7ad53a24782db6119f783" ], "journalName": "", "journalPages": "1-16", "journalVolume": "", "outCitations": [ "c5d0d547b6a3fa470dcc77f558f6c7c5768edabd", "0d5362c97bf199411b73926e7c233bb1329e19a0", "16c3fd5b439e9c35fc1eafc52524396a6ac5e652", "93ccefbb580ea4c5c00ec8cc3981bb9cf42a8192", "212703ebce8624dd2a4e1568a990011fc79b6aee", "7fb9cfac02565c0b5ad3ce2a5662057a7474d80e", "14264ee48a23c3248ccf9b65aa1c29cf33ef547e", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "1d5f78ce40eba82eccf8489d4182b2693cf43107", "8c4310984cfe7a9842fd34fe4e99320deb67d02a", "8d39df40831d9dd239ccd9da60de93292ea894a3", "0660fe0a1cc9ca03847de601589b3beb74f7a51d", "027d73cc11576ca9b3ff773c3f65b4159ebded5e", "6db6459a0d7cb58b688941a98f80a7e5e1bed94b", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "365099628c7cd3bbca3a8cf20611ae508b74b8ef", "67dc83a15c020b84403f1b6b52140965f11e4588", "a986da35bfb19849788c212bbe99a0eea87e97fc", "13b24c5c3cc047890fc908490bc593e48598e68a", "273fcf24c3c9c07cde1cc68b23786ff7910e0d47", "1dff33cb24cf30be232d02bc48ebdf200480d2f3", "3245519444fd3f706bb133f4cf01b093a0816ba5", "002372b86e831dd0d61e37d70b23831eaeb11fcc", "0215915cc57ab4a725d96f853eade1d80f8fc7d3", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "9dbffa78077dde33ab5a2fca8a50bb107b9ff4d3", "5025e9c6a57dfa036afe38ea89b4e696b352f04c", "73a219c149e510bce0c49d2690e498e109cc419b", "39867c7d684e77377f03d4f83e5dadf4acd8d61b", "aa15673806c08fdc0f7efbb0b03c901f7e4f8874", "d2268a9ae1607965c0cd6a85da6194e630cb9496", "06a6da00498357a2b908b1da7bfb5f19662abf1f", "0d778f8d34a47649270c13d68f0e3915064d4f07", "1ffa34e8b3ef9ec23ffb8223658b650ce98f843d", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "1a413400b4573db8c91728626396f7717f77b02f", "df3ab817ab89a99650c5ec5b9fc2e50ed890482b", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "6826cf4a31e479a46f91d5dfab88a37fe9d697b9", "05db6b886ce6bf260dc725450873cdb0b2a0c065" ], "paperAbstract": "We present an approach to optimize the cache locality for recursive programs by dynamically splicing---recursively interleaving---the execution of distinct function invocations. By utilizing data effect annotations, we identify concurrency and data reuse opportunities across function invocations and interleave them to reduce reuse distance. We present algorithms that efficiently track effects in recursive programs, detect interference and dependencies, and interleave execution of function invocations using user-level (non-kernel) lightweight threads. To enable multi-core execution, a program is parallelized using a nested fork/join programming model. Our cache optimization strategy is designed to work in the context of a random work stealing scheduler. We present an implementation using the MIT Cilk framework that demonstrates significant improvements in sequential and parallel performance, competitive with a state-of-the-art compile-time optimizer for loop programs and a domain-specific optimizer for stencil programs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062385" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38d3ff3b608a334b12400b031c7cc483923bc629", "sources": [ "DBLP" ], "title": "Cache locality optimization for recursive programs", "venue": "PLDI", "year": 2017 }, "38f84c67235d17ea9a85136dacf57c14b5b3f5b0": { "authors": [ { "ids": [ "3285880" ], "name": "Qilong Gu" }, { "ids": [ "2416003" ], "name": "Joshua Trzasko" }, { "ids": [ "3027563" ], "name": "Arindam Banerjee" } ], "doi": "10.1109/ICDM.2017.23", "doiUrl": "https://doi.org/10.1109/ICDM.2017.23", "entities": [ "Algorithm", "Application domain", "Approximation algorithm", "Computation", "Experiment", "Gradient descent", "Low-rank approximation", "Lucas\u2013Lehmer\u2013Riesel test", "Medical imaging", "Noise reduction", "Proximal operator", "Recommender system", "Relaxation (approximation)", "Scalability", "Singular value decomposition", "Synthetic data" ], "id": "38f84c67235d17ea9a85136dacf57c14b5b3f5b0", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "137-146", "journalVolume": "", "outCitations": [ "13400fdff75fc0b73652fcdab59e52814022b9b0", "51aa84631d6412a831a93870408a0d0ff8f47c79", "023f6fc69fe1f6498e35dbf85932ecb549d36ca4", "2275762a28582716db92df6d525ed2481c7d7f14", "17050a92374032f17a4dfdc01d2ae8c8ef8cc9e7", "153cd6650fe994a368b7096f2140059c0628a0ae", "0032ace23c80506aaf6059d7de8df966ecd846de", "e79b11028fc3f9921c67900f27a0b856a4f1bbef", "c7ad372fdb95bd6dce080b42ccd27d649cfb0153", "25c85f7b9007f3dea1df72f11d946c7099ae63c6", "90ca230d517c337a2ed073f5f57e069f1b5524a9", "70d0c4061cf8b50d544ef965a022501a0794e7ca", "658d97c2ea8a6ed1d9de4ef0f85da21a8816d29a", "973fd83d5f6ed58ca509c2f498bfa87200d768c8", "255d36f4019510957a81ef507291f8022a931ab6", "247f9d5759bfc8b03af4d16981f71a7a9197bd2b", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "881b141b34f1f8493eb58d01f1f657d402369cdb", "7eb284687145589f26313fe91b192858168db1ef", "1cb0c6573195aeb933e9ff663dad71d8ad1b0e13", "2c2ff5a11a3c895e8ee403a2ad3f393509b86910" ], "paperAbstract": "We consider the problem of modeling data matrices with locally low rank (LLR) structure, a generalization of the popular low rank structure widely used in a variety of real world application domains ranging from medical imaging to recommendation systems. While LLR modeling has been found to be promising in real world application domains, limited progress has been made on the design of scalable algorithms for such structures. In this paper, we consider a convex relaxation of LLR structure, and propose an efficient algorithm based on dual projected gradient descent (D-PGD) for computing the proximal operator. While the original problem is non-smooth, so that primal (sub)gradient algorithms will be slow, we show that the proposed D-PGD algorithm has geometrical convergence rate. We present several practical ways to further speed up the computations, including acceleration and approximate SVD computations. With experiments on both synthetic and real data from MRI (magnetic resonance imaging) denoising, we illustrate the superior performance of the proposed D-PGD algorithm compared to several baselines.", "pdfUrls": [ "http://www-users.cs.umn.edu/~banerjee/papers/17/llr-main.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/38f84c67235d17ea9a85136dacf57c14b5b3f5b0", "sources": [ "DBLP" ], "title": "Scalable Algorithms for Locally Low-Rank Matrix Modeling", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "390081f63e9e8014a3e2574fe02930d8ebd9d197": { "authors": [ { "ids": [ "2387745" ], "name": "Anshuman Dutt" }, { "ids": [ "1765803" ], "name": "Vivek R. Narasayya" }, { "ids": [ "1728620" ], "name": "Surajit Chaudhuri" } ], "doi": "10.1145/3035918.3064040", "doiUrl": "https://doi.org/10.1145/3035918.3064040", "entities": [ "Benchmark (computing)", "Mathematical optimization", "Microsoft SQL Server", "Online optimization", "Optimization problem", "Parameter (computer programming)", "Plan", "Prepared statement", "Program optimization", "Query optimization", "SQL" ], "id": "390081f63e9e8014a3e2574fe02930d8ebd9d197", "inCitations": [], "journalName": "", "journalPages": "1539-1554", "journalVolume": "", "outCitations": [ "4625894637eb3cafbdeb532e8c6a4d5c6f37aec2", "350f2592b78f4b56adae1dedd07d370b81349652", "3a20575b768022e3d3f20c0b8da386086cc49c57", "53ad950cf2b6bc38bf377ffeff984c5175a064a9", "280e98c45ceb53d878adbba0f8bee688c6716f7d", "b6059f5464b8c03708a06b1aa3007d6825bbc9d0", "f7991290e9555c18f5093de1d0c6c49bd1ad0bf0", "018a9648b76366ef01057e4f235a0af5ade0068c", "0685db30b47d0b389fdebcc9fbc0097894abb52a", "a71d88bb81df4a0841bb5b06dbbe0835fa75876a", "7d9ab1a068ea85df72a66826579feb397bd8687a", "214fbc64cb1aed66af3dc948eceb35760c06e788", "b082b168501959694f0181b1a01fa097bf62f937", "1484eb5ced5ebe4eaf6f29a762f74f56d1d39340", "037e3628bb6c0ed8db46927526ab13e15c79c832" ], "paperAbstract": "Parametric query optimization (PQO) deals with the problem of finding and reusing a relatively small number of plans that can achieve good plan quality across multiple instances of a parameterized query. An ideal solution to PQO would process query instances online and ensure (a) tight, bounded cost sub-optimality for each instance, (b) low optimization overheads, and (c) only a small number of plans need to be stored. Existing solutions to online PQO however, fall short on at least one of the above metrics. We propose a plan re-costing based approach that enables us to perform well on all three metrics. We empirically show the effectiveness of our technique on industry benchmark and real-world query workloads with our modified version of the Microsoft SQL Server query optimizer.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064040", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/onlinePQO.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/390081f63e9e8014a3e2574fe02930d8ebd9d197", "sources": [ "DBLP" ], "title": "Leveraging Re-costing for Online Optimization of Parameterized Queries with Guarantees", "venue": "SIGMOD Conference", "year": 2017 }, "39440ec35745f3524a0f9e04784b2eb0cf33b211": { "authors": [ { "ids": [ "3447293" ], "name": "Savvas Zannettou" }, { "ids": [ "1747580" ], "name": "Tristan Caulfield" }, { "ids": [ "1728207" ], "name": "Emiliano De Cristofaro" }, { "ids": [ "1946641" ], "name": "Nicolas Kourtellis" }, { "ids": [ "2909360" ], "name": "Ilias Leontiadis" }, { "ids": [ "2698864" ], "name": "Michael Sirivianos" }, { "ids": [ "2350947" ], "name": "Gianluca Stringhini" }, { "ids": [ "6688191" ], "name": "Jeremy Blackburn" } ], "doi": "10.1145/3131365.3131390", "doiUrl": "https://doi.org/10.1145/3131365.3131390", "entities": [ "4chan", "Alt attribute", "Information ecology", "Social media", "Social network", "Stock and flow", "World Wide Web" ], "id": "39440ec35745f3524a0f9e04784b2eb0cf33b211", "inCitations": [ "2f8c9de0985b1d93e4634a08153044013775baf3", "b0f4f99bd741b875bc12fa05f4e4481fb9f86d87", "53780c2bb38711d99aebbe996039b75256d3f08e", "3b28b1fc9109e4d50535f332dfd42a2b1b296e70", "6de85ece29a68d4226654f2ca40081daeb8977c4", "df53f9f7a1c115008232554738255ce8d3658da6" ], "journalName": "", "journalPages": "405-417", "journalVolume": "", "outCitations": [ "28ef385643cfa7728227029b62f712459ede5280", "3e9059740ca3c54b213b3f70e91a503afaa271af", "5e8d465142f3c1e5d840ba03d1c3f2ae8af87e76", "35fc15f9ed1b249145ccd1e928bae4a25ff0df67", "3491abac449e43977db7df8e26cddc34fcd057db", "f2931c21b57c56e6322c62d5df6a712bf350fc58", "1e3d1055fc65966bc2c64505785207d7e8d71022", "2141a4655f65c09dce412bc9125d960b453ed64d", "02fd02b1789716d55a784df1f26895c18deba13f", "7333ed97bd8a3676685df72fb5b737c916405847", "6bbf1e10e1618888fd690df85e7cd5ed276289bb", "c9c74f18b974b0e3e4b44184e17aaefd05ee01d0", "1480b0edc8ca3b38218d418982118ce480c54c8b", "f26af9e00e1450dacfbfc908b29e16b486e55ec3", "16a0448d45593cb6235614c83dad53e34416f219", "a846ddb05a397d1a2d8e717d23c0fd166604b350", "2d0bf053ce80f8864c293be6d469bfaf2e3e8363", "279d9b73f65d0a507098c1b8b484c2c0cea5a704", "2cb981df45ecc6db0f364f5d556a96b1a78668f3", "08c3751faba7db174099f3324205922ce3efed27", "1e3f05b22b1804858ef3034fdebc7908633e1c5c", "1d491634e09f94ca428908ad87b68a6ea1d828c2" ], "paperAbstract": "As the number and the diversity of news outlets on the Web grows, so does the opportunity for \"alternative\" sources of information to emerge. Using large social networks like Twitter and Facebook, misleading, false, or agenda-driven information can quickly and seamlessly spread online, deceiving people or influencing their opinions. Also, the increased engagement of tightly knit communities, such as Reddit and 4chan, further compounds the problem, as their users initiate and propagate alternative information, not only within their own communities, but also to different ones as well as various social media. In fact, these platforms have become an important piece of the modern information ecosystem, which, thus far, has not been studied as a whole.\n In this paper, we begin to fill this gap by studying mainstream and alternative news shared on Twitter, Reddit, and 4chan. By analyzing millions of posts around several axes, we measure how mainstream and alternative news flows between these platforms. Our results indicate that alt-right communities within 4chan and Reddit can have a surprising level of influence on Twitter, providing evidence that \"fringe\" communities often succeed in spreading alternative news to mainstream social networks and the greater Web.", "pdfUrls": [ "https://arxiv.org/pdf/1705.06947v2.pdf", "https://conferences.sigcomm.org/imc/2017/slides/imc17_web_centipede_slides.pdf", "http://arxiv.org/abs/1705.06947", "http://www0.cs.ucl.ac.uk/staff/G.Stringhini/papers/centipede-IMC2017.pdf", "https://arxiv.org/pdf/1705.06947v1.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final145.pdf", "http://doi.acm.org/10.1145/3131365.3131390" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39440ec35745f3524a0f9e04784b2eb0cf33b211", "sources": [ "DBLP" ], "title": "The web centipede: understanding how web communities influence each other through the lens of mainstream and alternative news sources", "venue": "IMC", "year": 2017 }, "395cd405b40df333f6f634736e02e8d6493f12cd": { "authors": [ { "ids": [ "1889746" ], "name": "Sebastian Brandt" }, { "ids": [ "26604973" ], "name": "Juho Hirvonen" }, { "ids": [ "3132681" ], "name": "Janne H. Korhonen" }, { "ids": [ "1680991" ], "name": "Tuomo Lempi\u00e4inen" }, { "ids": [ "1709397" ], "name": "Patric R. J. \u00d6sterg\u00e5rd" }, { "ids": [ "30894501" ], "name": "Christopher Purcell" }, { "ids": [ "3357676" ], "name": "Joel Rybicki" }, { "ids": [ "1710431" ], "name": "Jukka Suomela" }, { "ids": [ "2343306" ], "name": "Przemyslaw Uznanski" } ], "doi": "10.1145/3087801.3087833", "doiUrl": "https://doi.org/10.1145/3087801.3087833", "entities": [ "A-normal form", "Algorithm", "Complexity class", "Computation", "Computational complexity theory", "Electronic design automation", "Graph coloring", "Independent set (graph theory)", "Matching (graph theory)", "Maxima and minima", "Maximal independent set", "Model of computation", "Ramsey's theorem", "Toroidal graph", "Undecidable problem" ], "id": "395cd405b40df333f6f634736e02e8d6493f12cd", "inCitations": [ "1777217e05a7b486831f65f0fe5423e3b7eb84af", "5da625866b8f81274e63c2a3bb6dd6cb382aed22" ], "journalName": "", "journalPages": "101-110", "journalVolume": "", "outCitations": [ "596c4f92b44a59dbff0135a87c4ba883bdf90d49", "1777217e05a7b486831f65f0fe5423e3b7eb84af", "01034f06666331bb392c94c63f4607355492e594", "0a18b0195b85a34d42652527932103595671a31c", "a36f6d902d74d6fa76f72774d5d65c701853bcce", "3f084898e62b5824cf70100b91a63f1c2450a467", "2492685847defcff7c2b251559b824b55177e565", "16611d39c261e5569479ce7fb61e81ac8bb8b840", "3788bb4eec1eade6a2d8134b4b815faae933b749", "e69764d312adb05a2202ab187c983cd334dfd26e", "25064c0734a2a37db2a299c06c04a226f63d69c5", "2ce076b522f1d0b7a6250c11955eec3145de82b0", "084d01ba2ba85a75e9c6dfb49e8b7eb2e6d735b3", "94c899462170819e0a68fc56f49f8a36e023a5e6", "570ce27e305a9bb3593200922efec4e0d0ad3a46", "108ac30e6f90fcd21383e85c72cf0e7a072f5fa8", "cba1bf72e90a1ead0dd6a924dfed7676e0477bf0", "5cd7369da3bc6fefd45dfeb0b87829191db1290a", "58ac284b881146c1b55554c69092a161ec6758c5", "554922e32e72e3d7d02d03490cb42240e52a4c20", "0128da111996b4e89bbe549ccb8da27293431367", "423d4de6173479ca4e2835149aed81653f658319", "8d80509cfc9ad2cad3a23af122abc6cf6d6d9c56", "d5aa5c2e5add29abd1023587c2e93a97bc2a6c8e", "306073b8ed807c858411035bc6a79c04310b3061", "14225dc19d6f6ff65909abdd2e85166bebdf0b7d", "34bb750d3bb437b47b8ebb5998d40165fdad3ca8", "9dab04bc2d1c508c1dde200a497e64b4bd523e71", "4fd1046b1f5b6ea300fa0751f7ea9e07225e900b", "5c545400a562b8c2b69798c5d603f7f107401da4", "4b08960bfd73d3b84f4211fde9d00fe9a8090962", "4541836fa0d7777cbe476b4110908789c9e024e8", "17d3d569ff60ef353c50dc498f8fe31bc2884615", "0a4a34b9344b46596b2198560c7152178fe708cd", "3594a36262a673fb69f4c5b98a0e1c4a6e5dc92c", "8eda73158abd579eadd72ffd348929336956aa97", "9d3c47109b5ba7d80d88e8bf8f83ec9ced71be43", "e0905d1b5cf144bbadd085acb94aee9880f482fc", "01f2dc9dbc45ec3179f825fd86c9e6487cc9b52f", "25186afb27fd7d50b2f6b0e03487b6020e1e439e", "0481a2b112ade48a8b5eed438982e70d22ad9e62" ], "paperAbstract": "LCLs or locally checkable labelling problems (e.g. maximal independent set, maximal matching, and vertex colouring) in the LOCAL model of computation are very well-understood in cycles (toroidal 1-dimensional grids): every problem has a complexity of O(1), \u0398(log\u2217 n), or \u0398(n), and the design of optimal algorithms can be fully automated. This work develops the complexity theory of LCL problems for toroidal 2-dimensional grids. The complexity classes are the same as in the 1-dimensional case: O(1), \u0398(log\u2217 n), and \u0398(n). However, given an LCL problem it is undecidable whether its complexity is \u0398(log\u2217 n) or \u0398(n) in 2-dimensional grids. Nevertheless, if we correctly guess that the complexity of a problem is \u0398(log\u2217 n), we can completely automate the design of optimal algorithms. For any problem we can find an algorithm that is of a normal form A\u2032 \u25e6 Sk, where A\u2032 is a finite function, Sk is an algorithm for finding a maximal independent set in kth power of the grid, and k is a constant. Finally, partially with the help of automated design tools, we classify the complexity of several concrete LCL problems related to colourings and orientations. ar X iv :1 70 2. 05 45 6v 2 [ cs .D C ] 2 4 M ay 2 01 7", "pdfUrls": [ "http://arxiv.org/abs/1702.05456", "http://users.ics.aalto.fi/suomela/doc/grid-lcl.pdf", "https://arxiv.org/pdf/1702.05456v1.pdf", "https://arxiv.org/pdf/1702.05456v2.pdf", "http://www.tik.ee.ethz.ch/file/081e2ca2a419c214ac2b73c940e5ff6a/grid-lcl.pdf", "http://doi.acm.org/10.1145/3087801.3087833" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fcb5/5eee9a0663910c1ae5d81ad1de539d935ad2.pdf", "s2Url": "https://semanticscholar.org/paper/395cd405b40df333f6f634736e02e8d6493f12cd", "sources": [ "DBLP" ], "title": "LCL Problems on Grids", "venue": "PODC", "year": 2017 }, "39748866fbc87e47d535e530c92744ddb545e3dc": { "authors": [ { "ids": [ "17074331" ], "name": "Ellis Fenske" }, { "ids": [ "9396418" ], "name": "Akshaya Mani" }, { "ids": [ "39240703" ], "name": "Aaron Johnson" }, { "ids": [ "1765309" ], "name": "Micah Sherr" } ], "doi": "10.1145/3133956.3134034", "doiUrl": "https://doi.org/10.1145/3133956.3134034", "entities": [ "Adversary model", "Composability", "Correctness (computer science)", "Cryptographic protocol", "Differential privacy", "Distributed computing", "Distributed version control", "Overhead (computing)", "Pittsburgh Supercomputing Center", "Tor Messenger", "Unique user", "Universal composability" ], "id": "39748866fbc87e47d535e530c92744ddb545e3dc", "inCitations": [], "journalName": "", "journalPages": "2295-2312", "journalVolume": "", "outCitations": [ "360d4003511682c5f5b5f82f6befdda88ca3fa73", "2484330f40e318036d81c63974b5c28fe9c31f99", "16a7c31409e2a66b48dbab55cee3d717fdfcbc9e", "04948723dec0e6724777ee56f0d10168cce44921", "c6d5c51b8074c94335b71210b8f22d5a03a133c1", "63b88452574095639ef9a1f692eef3c1ec386b0a", "208448ed57cb0ff70866cb3828b06610c3ff25fd", "0be8170df4c1ea1cf8312ae5ed326665224d5d9c", "357af3dd66a8ee994f17c890422fda1b618586d3", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "a089defc1eea22b4d3afaeccf031ae110d7af459", "0368d2445d3ee4205ee73da933cb8b810a89091c", "42333e3f231bbfe508f6da6bad2feff9ae223113", "122770d0c40e3dc339b7e149dd3c38856cb39d79", "3cb56e1426d8bd33697dcb36ec9038be003c0b03", "e94e047cb0045da8335ea9a7a66b9dc3537ab632", "31dda2430f221437b28869ef57e563599f8f6c4a", "0174ae0bf7be0dcd4e3280b831b08dd381ed8fb8", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "28c58f216b4b43bd630e4e97327971111e370c77", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "2365d8c74dbf329b6dfb5227a4dc95fccb872d9e", "406a37d8ccb6cb1355b7aeded65e50fc00b2977c", "78e2d6b7a671d8e53f207adff088833fd7606e13", "17fac85921a6538161b30665f55991f7c7e0f940", "f08b47869d10c04fa0dd3278e45643dfffa0b801", "05a0e62ecf23ba6cbb20b9dddac856ec2cdf255a", "5ec475d08978cd2a3bf70ea9b86a2a1cbabf6585", "7d5ac16ddf82acfa063ca43fcaae94806ec4d1b5", "2949851ab9827fdd334ecc3b392296df2aacaf92", "73e4c4d4483d82970c3084076289cc5b12b36cec", "080c40c5510d1d34c2e02edb6fb72826b2f82df3", "3cb55d539b232e309f4a5974148ec6f22afb5888", "5b566b58184e302e1bd364903010fcc55a226fd3", "ada825ba76ae506dd30092c99af702ec3859272a", "a03dde33b1af161bfd100a1cf8b4740dc8351a99", "0efa9ee4557c8b0cc8f0d329a0dab34c53fd55f2", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "03c1711090d76cc9163e238686786a71c028377e", "6d1bf8d4cda8f2bcf2305b5c364f696e3d3fd724", "45f4a6c915709c734e034faae7f94683d4bccbcf" ], "paperAbstract": "This paper introduces a cryptographic protocol for efficiently aggregating a count of unique items across a set of data parties privately - that is, without exposing any information other than the count. Our protocol allows for more secure and useful statistics gathering in privacy-preserving distributed systems such as anonymity networks; for example, it allows operators of anonymity networks such as Tor to securely answer the questions: how many unique users are using the distributed service? and how many hidden services are being accessed?. We formally prove the correctness and security of our protocol in the Universal Composability framework against an active adversary that compromises all but one of the aggregation parties. We also show that the protocol provides security against adaptive corruption of the data parties, which prevents them from being victims of targeted compromise. To ensure safe measurements, we also show how the output can satisfy differential privacy.\n We present a proof-of-concept implementation of the private set-union cardinality protocol (PSC) and use it to demonstrate that PSC operates with low computational overhead and reasonable bandwidth. In particular, for reasonable deployment sizes, the protocol run at timescales smaller than the typical measurement period would be and thus is suitable for distributed measurement.", "pdfUrls": [ "https://www.nrl.navy.mil/itd/chacs/sites/www.nrl.navy.mil.itd.chacs/files/pdfs/17-1231-3131.pdf", "http://doi.acm.org/10.1145/3133956.3134034" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39748866fbc87e47d535e530c92744ddb545e3dc", "sources": [ "DBLP" ], "title": "Distributed Measurement with Private Set-Union Cardinality", "venue": "CCS", "year": 2017 }, "39759499bca8e1733dff368cb9ee77847a9c4470": { "authors": [ { "ids": [ "2595341" ], "name": "Alberto Ros" }, { "ids": [ "3083590" ], "name": "Trevor E. Carlson" }, { "ids": [ "2708081" ], "name": "Mehdi Alipour" }, { "ids": [ "1769059" ], "name": "Stefanos Kaxiras" } ], "doi": "10.1145/3079856.3080220", "doiUrl": "https://doi.org/10.1145/3079856.3080220", "entities": [ "Application checkpointing", "Cache coherence", "Consistency model", "Correctness (computer science)", "Deadlock", "Interconnection", "Mathematical induction", "Speculative execution" ], "id": "39759499bca8e1733dff368cb9ee77847a9c4470", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "187-200", "journalVolume": "", "outCitations": [ "6c7d1815b0a5017ea19e9c5ad5b7cec3e0f894d7", "33dcafd805a3b44fd64270028633032ff0bb6fac", "186a233b40da5b4537bdd13325e492c08edd66d2", "52ad66e45e0a47a2689810d5d1c88665c9388e4d", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "0f40465cda539890932e4aafd9c59b7c3beb1c4e", "245c7c2cf1aa38a832db7bc357f3c91a4f331136", "39c76eee1b779cbc5ecfc658689f0fe834b1c1bf", "e68ba70a7b16525d7804e6b82f2d621d768e62dd", "4bad51c7685254155733ee8def6a1294378aa1af", "e5488b47f8eca7476165256d2889d0fc62431e5d", "5d0d908dac5ac179df6a83fb81bd17f7ff52f89d", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "413d938109026fb513083a3b3f1c616da005639c", "383efe177c6e57193cd4c8f0bdc2c51f6d2ea709", "b7f9dfbaec7ba1575ba8a6246ec3109dcaaae290", "6e0e2f3d4ce7e39126333bd08e8f92b3d82e4a60", "810b7f6f88a5ffb65c634b7b7b0756671a011758", "395faf7827ce6fbb20d894a6e0e6f4be839aebcb", "16dc592aa326ecd1f8d46ca7e3485a7311af3dba", "14bbc93ee56bd60f6009441f377658936e4cd73d", "56ff7a1baf35c68ff69258ddf12fedd2e7ad8139", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "1476bc7362e02995a8869ed6d3703e740284f450", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "666796d5f3ff560cf3ee925f952db083a1aee56e", "0f2f65d78184b7bd47e7e98897faa2d69edb380a", "362e9b5afe5934a9d8046d758c17c5bada0652b3", "14d5edd85e4dccfa7457cae40cb33ef9eafdd68b", "10f1faeec4ee2158b8535b249a20de5419998153", "3ea9bd5eb3c29ea9522b12761247bbe3be4d3d0a", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "d78e28a5c2511e2f94aae7832cf2830f9e5b2180", "0258946b2046bec0d52d1e5b90204af335ce870e", "69e698b026f38875787e4f91599c40603f9e3b00", "1c88ecdf374e815439ef2352f003bf243d357116", "5ced6a0aab1350ef1dba574e1faa05a726d9517e" ], "paperAbstract": "In Total Store Order memory consistency (TSO), loads can be speculatively reordered to improve performance. If a load-load reordering is seen by other cores, speculative loads must be squashed and re-executed. In architectures with an unordered interconnection network and directory coherence, this has been the established view for decades. We show, for the first time, that it is not necessary to squash and re-execute speculatively reordered loads in TSO when their reordering is seen. Instead, the reordering can be hidden form other cores by the coherence protocol. The implication is that we can irrevocably bind speculative loads. This allows us to commit reordered loads out-of-order without having to wait (for the loads to become non-speculative) or without having to checkpoint committed state (and rollback if needed), just to ensure correctness in the rare case of some core seeing the reordering. We show that by exposing a reordering to the coherence layer and by appropriately modifying a typical directory protocol we can successfully hide load-load reordering without perceptible performance cost and without deadlock. Our solution is cost-effective and increases the performance of out-of-order commit by a sizable margin, compared to the base case where memory operations are not allowed to commit if the consistency model could be violated.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080220" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39759499bca8e1733dff368cb9ee77847a9c4470", "sources": [ "DBLP" ], "title": "Non-speculative load-load reordering in TSO", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "398e280e5afaeab753ca567eb137e31bd7f55e9b": { "authors": [ { "ids": [ "2364071" ], "name": "Tyler Szepesi" }, { "ids": [ "31442165" ], "name": "Benjamin Cassell" }, { "ids": [ "1867025" ], "name": "Tim Brecht" }, { "ids": [ "1736203" ], "name": "Derek L. Eager" }, { "ids": [ "1941002" ], "name": "Jim Summers" }, { "ids": [ "1772379" ], "name": "Bernard Wong" } ], "doi": "10.1145/3030207.3030231", "doiUrl": "https://doi.org/10.1145/3030207.3030231", "entities": [ "CPU cache", "FreeBSD", "Hypertext Transfer Protocol", "Kernel (operating system)", "Linux", "Linux", "Operating system", "Scheduling (computing)", "Serialization", "Shim (computing)", "Streaming media", "Throughput", "Video server", "Web server" ], "id": "398e280e5afaeab753ca567eb137e31bd7f55e9b", "inCitations": [ "4794c3b5da05e34e4739f0ec1be5464577bb41eb" ], "journalName": "", "journalPages": "51-62", "journalVolume": "", "outCitations": [ "555412196c36e2707fbd59dda1563c929fb3299b", "a4172f84efa3d7dfd70551726148070aac9f64e7", "0e047cd129be9f0b3a81f75ebf26e7afcdbcaa82", "1ad19898df56cde4d213d6ed03d20f246f2ded73", "0ac8188e490de61e8405b611fdf962abd3026860", "646c2d0aeb595552b7fe257a4ef02d08f0cdb5b8", "7eaec4e6ccd24b376996f878201a043c635807d1", "7645b535c1ac7eeeea4aab047580c31a44ebf4c1", "7111f0441de5fc35840bb810873ddf207ff54df9", "8fc7e468d8aef052c7c265c178e9816b89a4022b", "233997563379e02d37778f80c028a34209de5817", "3cda09fdc91d7f85a138a4d56848a3a0708df76f", "522f13224a0012b1b40bf5f834af2069c9bf463e", "9dfb6b03a9104361760e3e2a44984d1e24a05d16", "392afa2aa42dba78d764d3db00b5c9b1a9596d40", "083fd3438b6d1a444fe46cb5a4e131f68f5afd4d", "0b64eab2cd477598b0fc2468b6c10d35f76b2afa", "1a5911d6c682ce1a851e8666f570a4c734d99678", "26433a0152e7eb0682ae9ec849dbd908dd507b82", "576e7d4970a5a0b6d48ab184af8b09c5161cc1ad", "b0d549bcd02d14b0f0d32735a6913a537953186e", "be7ca1f8296d974b073436de6cc03a2947917942", "355b52c99eaf0ceb7b8fb5b72ac4d4cb3cbc21d5", "16e367708e50a9ed6228334c9d49f4db0dab4cd8", "2c08d5a490310110eaf6836790c01fb76c27e84a", "d7b3344bf4ada92ad4f746ed72c75523f72f4679", "7f7028c02dc53da4ccda5db9200a3b5001dd27a8", "00b889ac3f6bbe725f899578613b193f38fe0d0f", "0a5bb204cf8e5cce872573445f3cdb17c12203ff", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1" ], "paperAbstract": "Video streaming applications generate a large fraction of Internet traffic. Much of this content is delivered over HTTP using standard web servers. Unlike other types of web workloads, HTTP video streaming workloads are typically disk bound, and therefore an important problem is that of optimizing disk access.\n In this paper we design, implement and evaluate Libception, an application-level shim library that implements techniques for improving disk I/O efficiency. Web servers can achieve the benefits of these techniques simply by linking with Libception, without the need to modify source code. In contrast to making kernel changes or attempting to optimize kernel tuning, Libception provides a portable and relatively simple setting in which techniques for optimizing I/O in HTTP video streaming servers can be implemented and evaluated.\n We report experimental results evaluating the efficacy of the aggressive prefetching and disk I/O serialization techniques currently implemented in Libception, for three web servers (Apache, nginx and the userver) and two operating systems (FreeBSD, Linux). We find that on FreeBSD, video streaming throughput with all three web servers can be doubled by linking with Libception. On Linux, performance similar to that provided with Libception was eventually obtained by examining the kernel source to understand and tune kernel parameters. With the default kernel parameter settings, however, and regardless of which Linux disk scheduler is selected, we find that use of Libception can approximately double throughput. We find that both aggressive prefetching and serialization are necessary to achieve these benefits.", "pdfUrls": [ "https://www.cs.usask.ca/faculty/eager/icpe-libception-2017.pdf", "http://doi.acm.org/10.1145/3030207.3030231" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/398e280e5afaeab753ca567eb137e31bd7f55e9b", "sources": [ "DBLP" ], "title": "Using Libception to Understand and Improve HTTP Streaming Video Server Throughput", "venue": "ICPE", "year": 2017 }, "39a1fb4cb03f5fada6e485a1d0dc6a723a17ed22": { "authors": [ { "ids": [ "14441372" ], "name": "Terry Ching-Hsiang Hsu" }, { "ids": [ "10377580" ], "name": "Helge Br\u00fcgner" }, { "ids": [ "37977699" ], "name": "Indrajit Roy" }, { "ids": [ "2812707" ], "name": "Kimberly Keeton" }, { "ids": [ "1697599" ], "name": "Patrick Th. Eugster" } ], "doi": "10.1145/3064176.3064204", "doiUrl": "https://doi.org/10.1145/3064176.3064204", "entities": [ "C++", "Drop-in replacement", "Fault tolerance", "Memristor", "Multi-objective optimization", "Non-volatile memory", "Overhead (computing)", "POSIX Threads", "Persistence (computer science)", "Phase-change memory", "Programming model", "Schedule (computer science)", "SerDes", "Serialization", "Synchronization (computer science)", "Thread (computing)", "Volatile memory" ], "id": "39a1fb4cb03f5fada6e485a1d0dc6a723a17ed22", "inCitations": [ "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "db57257e6b051e0f97d35209cc5aee0909cde1f1", "cb2a018979184f87692d423322e367cc42a215d2" ], "journalName": "", "journalPages": "468-482", "journalVolume": "", "outCitations": [ "09031aa6d6743bebebc695955cd77c032cd9192f", "0c96b3ac2e720448054f1bcebdfd52ee341eac57", "44022fb8c1acc3ebd2b1f4f0f43f111422941e57", "2efed59aab7ba6ab1f3e29784c6f9d79b09a20fe", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "05a1357946de5eca42a477b7b268db4944219a2e", "35963f0115d7a9b01d453d4ea33d42d9c26313d6", "3b62c1f19254820c75dd0011f038d7aae04b3414", "e23298e18aa92ac43fa941d0f5eacb339905b685", "1bed30d161683d279780aee34619f94a860fa973", "8bfadfde21e1385c7dbceccd54d124fc437b3721", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "300b683b6e04b691d5f4ec617948a303d3acbbe5", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "94783d113951822195d4ba44599a8fcbdef9d4bf", "39e3d058a5987cb643e000bce555676d71be1c80", "2db3fcbc192ccc04682a5c528f86190b30e1d11e", "3ec5430347717ff2f02f435cdcf951f35cd1479a", "47b851237f240831abee3971bca6bb8d2a121eb1", "1fb1cfc676f3fcee296b3210a5c71f7f054ff1b0", "6b60ac5240d9c291aa11412712d817119581db13", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "e37a835ef26a261f879eeaa2c59f4a2d95f0af90", "512a8925693d5f4b8e4cfde32bcd3c846a14b71e", "10a7ccd660e7f59bf984ce74886590b7c33e6da0", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "339632faa043d4697570fc4fe48a52d007c3cf06", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "948c881ab7f1f62e9c940458e74c3e435320df72", "a95436fb5417f16497d90cd2aeb11a0e2873f55f" ], "paperAbstract": "Non-volatile memory technologies, such as memristor and phase-change memory, will allow programs to persist data with regular memory instructions. Liberated from the overhead to serialize and deserialize data to storage devices, programs can aim for high performance and still be crash fault-tolerant. Unfortunately, to leverage non-volatile memory, existing systems require hardware changes or extensive program modifications.\n We present NVthreads, a programming model and runtime that adds persistence to existing multi-threaded C/C++ programs. NVthreads is a drop-in replacement for the pthreads library and requires only tens of lines of program changes to leverage non-volatile memory. NVthreads infers consistent states via synchronization points, uses the process memory to buffer uncommitted changes, and logs writes to ensure a program's data is recoverable even after a crash. NVthreads' page level mechanisms result in good performance: applications that use NVthreads can be more than 2× faster than state-of-the-art systems that favor fine-grained tracking of writes. After a failure, iterative applications that use NVthreads gain speedups by resuming execution.", "pdfUrls": [ "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final60.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-paper60-presentations-slides.pdf", "http://doi.acm.org/10.1145/3064176.3064204" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39a1fb4cb03f5fada6e485a1d0dc6a723a17ed22", "sources": [ "DBLP" ], "title": "NVthreads: Practical Persistence for Multi-threaded Applications", "venue": "EuroSys", "year": 2017 }, "39a68607cfc1cf0acdfcdbea208b8f7cc78041ac": { "authors": [ { "ids": [ "1719358" ], "name": "David Eppstein" }, { "ids": [ "1679615" ], "name": "Michael T. Goodrich" }, { "ids": [ "1745699" ], "name": "Michael Mitzenmacher" }, { "ids": [ "2770558" ], "name": "Manuel R. Torres" } ], "doi": "10.1145/3034786.3056115", "doiUrl": "https://doi.org/10.1145/3034786.3056115", "entities": [ "Algorithm", "Arboricity", "Average-case complexity", "Cuckoo hashing", "Data structure", "Graph (discrete mathematics)", "Hash table", "Out-of-core algorithm" ], "id": "39a68607cfc1cf0acdfcdbea208b8f7cc78041ac", "inCitations": [ "49d6146f78e270e036f2a113b4ed75aea446d61e", "746ff01cf8bf954e15eb6ad9c240205d39dca387", "3a6f98e3cb616dc7fe479282f032975cb898fa5a", "198c6071e5a6aab6a9b50ca13ce4c1dc2c0fec51" ], "journalName": "", "journalPages": "247-260", "journalVolume": "", "outCitations": [ "141e35263ab810983c90d47ad62eb4fab5e51717", "59fb2e3092a945cd63e6f16a0d5e3b8a3d37c2a1", "6d1ca1108d9d96e5607571502552ad04464d7f15", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "9db92ae2beb42f4355a7ca4e5f0b98d8fbbeb2ae", "59de7ffad588a4ade9cc0a0aeb938ab9e067e252", "44b2dd390f32a6a77d4e2416351df0fa08a323c1", "4ff9e57adb516688fcf295f5da161ab7c0f7a30e", "d5d319d1413e99b4adf383ab633e6337037cfa93", "2c0e63c99e51fb7eefd96d9f63e5dc5a8709e179", "556ea938c79cf3f3cf210b234902bbd4f1714075", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167", "a9373f53960efca26defbd4480ef690fd2d74ecf", "8dcda633036c3e90d4e94c96c4990e98f70bdf7d", "157d99f5e65e8e2a069dc7e123667b6b47ea9903", "29db10ae32a3728821335e6a2b9e43cb374dbc03", "eb4ffe1f1a98902225619a5cfea7883f737c2428", "1cdcab443a9d66e08e3c25653ecfcfb3d996867a", "4d552e231c1e7f708d049a18d9e8ceea66040310", "19b84f3926683e939618ead62b8d71650f1b2e45", "3a2f37d3648592ffb42155c28f71894ad61937fe", "ddebb9326d027738289d6dc55cbbe8dd63fce842", "4587d4722317acd4e2a90b12f58ccc9de1ecc6ee", "c910d0aa5333466278e6ab3dc064eadc7ed424dd", "6e0a908dc3b4092783073150eff4994370a1b098", "392caca3c05177f63ffb133db458f9652c8b6e16", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "f1568317d98612408de8982a7a9907bf9fd6826a", "01094798b20e96e1d029d6874577167f2214c7b6", "1521d39088b203ddac981d10d214f463449ae95b", "812315fc4036676de8454d4e99e2bc85b9a900ab", "e73a88636be10346da1fe4280c0eab7d16b3d336", "01cecc63d5e58fec81f8b75ef81c9ab23df34369", "06501b7ea604a8b8ffff402ee492955e6892daad", "339888b357e780c6e80fc135ec48a14c3b524f7d", "234e6be0d4238f76b3ac038ee422be39f391c625", "20480eb94103d5ed4b5925a86430c872e4fabfdc", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "c1dd0de3daacb174215b51499652c920ed95ae92", "629554e6b358bfa6efc2aaf63746f1fe935b107c", "1508502719f66259b2e65882c5b91c458cb8ff39", "05c721f47d9a53a5739c88a14cb36baf12d2b0fa", "de40c5f5ff5e5bd33a8c876b11ab0149bf7fd7d9", "135d89a35623359aa3af7ce6f95b0078c6acc43a", "1163b331215f934537ca6b78b8d77ceb1f0fc139", "29cc0a8802126d4e97f28109763df26ab91c6531" ], "paperAbstract": "We introduce new dynamic set intersection data structures, which we call 2-3 cuckoo filters and hash tables. These structures differ from the standard cuckoo hash tables and cuckoo filters in that they choose two out of three locations to store each item, instead of one out of two, ensuring that any item in an intersection of two structures will have at least one common location in both structures. We demonstrate the utility of these structures by using them in improved algorithms for listing triangles and answering set intersection queries in internal or external memory. For a graph G of n vertices and m edges, our internal-memory triangle listing algorithm runs in O(m⌈(α(G)log w)/w⌉ + k) expected time, where α(G) is the arboricity of G, w is the number of bits in a machine word, and k is the number of output triangles. Our external-memory algorithm uses O(sort(n,α(G))+ sort(m⌈(α(G)log w)/w⌉) + sort(k)) expected number of I/Os.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056115", "http://www.ics.uci.edu/~goodrich/teach/graph/notes/GoodrichPODS.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39a68607cfc1cf0acdfcdbea208b8f7cc78041ac", "sources": [ "DBLP" ], "title": "2-3 Cuckoo Filters for Faster Triangle Listing and Set Intersection", "venue": "PODS", "year": 2017 }, "39d4e7c9f4342104ffc1931d4aa1817646ca3ab5": { "authors": [ { "ids": [ "26638855" ], "name": "Hiroshi Sasaki" }, { "ids": [ "3075072" ], "name": "Fang-Hsiang Su" }, { "ids": [ "2760588" ], "name": "Teruo Tanimoto" }, { "ids": [ "1738240" ], "name": "Simha Sethumadhavan" } ], "doi": "10.1109/IISWC.2017.8167771", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167771", "entities": [ "Compiler", "Customer relationship management", "Directed graph", "Emergence", "Fractal", "Producer\u2013consumer problem", "Social network", "Tails" ], "id": "39d4e7c9f4342104ffc1931d4aa1817646ca3ab5", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "135-145", "journalVolume": "", "outCitations": [ "4c775488cbb7557b1e20d43d3fb7c6c286eeb7d2", "2f4dd9ec6c821ec4397f83b5a26e0cd99111efa7", "30ce002c359452982455a7d93ebf84d05c1f0477", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "55f07fea268cc777ae4a63219f3ba9b6d58aa22b", "00a9ba0063d34ec56792849a67ef57b4601becbb", "058ada9d78c689e03ea898008e2bcebf69910d4f", "0653e2ed9f683868cb4539eb8718551242834f6b", "08cfe650fdfa907764423958b1923e42ba945b7e", "273349f99205457ce056ee57daa8b0f3f56fbb2e", "5240aedc03d3a203da7548c1efcf4e42dcb6e5c7", "246be658a2ce791070a440cfc965a3ddac325c18" ], "paperAbstract": "Designing and optimizing computer systems require deep understanding of the underlying system. Historically many important observations that led to the development of essential hardware and software optimizations were driven by empirical studies of program behavior. In this paper we report an interesting property of dynamic program execution by viewing it as a changing (or social) network. In a program social network, two instructions are friends if there is a producer-consumer relationship between them. One prominent result is that the outdegree of instructions follow heavy tails or power law distributions, i.e., a few instructions produce values for many instructions while most instructions do so for very few instructions. In other words, the number of instruction dependencies is highly skewed. In this paper we investigate this curious phenomenon. By analyzing a large set of workloads under different compilers, compilation options, ISAs and inputs we find that the dependence skew is widespread, suggesting that it is fundamental. We also observe that the skew is fractal across time and space. Finally, we describe conditions under which skew emerges within programs and provide evidence that suggests that the heavy-tailed distributions are a unique program property.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167771", "http://www.cs.columbia.edu/~simha/preprint_iiswc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39d4e7c9f4342104ffc1931d4aa1817646ca3ab5", "sources": [ "DBLP" ], "title": "Why do programs have heavy tails?", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "39f6571e188f704eb84f9a6d682be7c0483fd443": { "authors": [ { "ids": [ "3252651" ], "name": "Ruiyu Zhu" }, { "ids": [ "34638466" ], "name": "Yan Huang" }, { "ids": [ "35172700" ], "name": "Darion Cassel" } ], "doi": "10.1145/3133956.3134070", "doiUrl": "https://doi.org/10.1145/3133956.3134070", "entities": [ "Computation", "Experiment", "Malware", "Scalability", "Secure multi-party computation", "Threat model", "Usability" ], "id": "39f6571e188f704eb84f9a6d682be7c0483fd443", "inCitations": [ "3dbb3dcaff97a0db797d01def0f96b6e37289daa" ], "journalName": "", "journalPages": "245-257", "journalVolume": "", "outCitations": [ "588972fccb475cfaafdbb6efeef592eacadbe5f0", "db0f82a419f89cda64fcbec2c58137862cd04475", "61883fbd35396888924520e109355e912337d2b8", "04948723dec0e6724777ee56f0d10168cce44921", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "0affd3f06d26de268d81c288454dd7880e518f9e", "3dbb3dcaff97a0db797d01def0f96b6e37289daa", "9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b", "5c07dadf28f3cfdd67ab60a12d3ea9860bcd8b24", "42333e3f231bbfe508f6da6bad2feff9ae223113", "475b10209d1ed13b079d62aca57ec31da4284bcd", "8fa56ecfb46b8dadf8a4dd063d15da5b975c83f1", "23ec68ed03b485b645478a3f6905615617d905a6", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "362246709de205ec0ac5b34e07306839c38d5a3a", "a797a0346e106e0d1d1d2db778aa509031c7bf8c", "15964bef0c5a10420ccf44f4e02f4905aa9d85d0", "2fb3c68ac20704fcda5b6ec91a3e166ec41f6c13", "e50ae4d480d84c7cbdb8edcebf13e57f5a47c8ad", "3657ea546cc04bb4e618c56fbe8f26354bc8cbf4", "33148623fc14ea5735e73dd716d030ab17118299", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "01ca4dd53f226dff9da314cc35d2fa6ee1979e57", "3fb1b878daafbd54989438e4fb778380a03226e6", "47b8fd6ee8b07bd14de3c91df515b11180121de9", "5adc94602d07e49cc1e94e2aa2b1bdf3481a47f8", "cd7d6df7ec98254301674c6d3a1401d2336db00a", "0ada489ea8cffd88d86108695eb6b7f0cee0f0c5", "127adf86474103b6f05afcc5bceda45bb5e34a8a", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "3fab56c42fe6efc0b4febd21596ef89188f5d21a", "11484e276a27191b043c2ccab243addcdf4c965a", "0a27a75f47af3cf52bdcd34f5b82bc9af7249c12", "490b2ab76335de294498bff727c0a25314317c63" ], "paperAbstract": "This paper considers the problem of running a long-term on-demand service for executing actively-secure computations. We examined state-of-the-art tools and implementations for actively-secure computation and identified a set of key features indispensable to offer meaningful service like this. Since no satisfactory tools exist for the purpose, we developed Pool, a new tool for building and executing actively-secure computation protocols at extreme scales with nearly zero offline delay. With Pool, we are able to obliviously execute, for the first time, reactive computations like ORAM in the malicious threat model. Many technical benefits of Pool can be attributed to the concept of pool-based cut-and-choose. We show with experiments that this idea has significantly improved the scalability and usability of JIMU, a state-of-the-art LEGO protocol.", "pdfUrls": [ "http://homes.soic.indiana.edu/yh33/mypub/pool.pdf", "http://doi.acm.org/10.1145/3133956.3134070" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/39f6571e188f704eb84f9a6d682be7c0483fd443", "sources": [ "DBLP" ], "title": "Pool: Scalable On-Demand Secure Computation Service Against Malicious Adversaries", "venue": "CCS", "year": 2017 }, "3a078aa9d8c0de53389481e4e9c4ef84eee33aba": { "authors": [ { "ids": [ "1750087" ], "name": "Lei Ying" } ], "doi": "10.1145/3084449", "doiUrl": "https://doi.org/10.1145/3084449", "entities": [ "Algorithm", "Approximation", "Approximation error", "Data center", "Dynamical system", "Expanded memory", "Interchange circuit", "Limiter", "Perturbation theory", "Perturbation theory (quantum mechanics)", "Point of View (computer hardware company)", "Quantum field theory", "Queueing theory", "Stationary process", "Steady state", "Stochastic process", "Telecommunications network", "Weak measurement" ], "id": "3a078aa9d8c0de53389481e4e9c4ef84eee33aba", "inCitations": [ "8b52a1fff81cd20ea4801779fd029cf8ef7946f0", "a98aa7bbd18ccebea80a89fcb66b2946ba95c057", "b8fb092352449a4fda8386ff0e79ce2e0154d451" ], "journalName": "POMACS", "journalPages": "12:1-12:27", "journalVolume": "1", "outCitations": [ "30edb202ce468a6533f555e4adbdda4b04585de9", "0a671fc71f8e6e66ded15370e62f849ce1469489", "28c35994ad743fb284bc7410c59fd231e3ce2d77", "42406bba8ac3466dbda99931e7166f456acfab6b", "cebde0b3844b30e4a4a1f200470aeae70a6d15cc", "234e6be0d4238f76b3ac038ee422be39f391c625", "72901ecafbcf77ed95365972a548630e7b34b544", "2f9aea036407868166e41a73bdbc89950663668f", "da707c9d4f29d2d186d72cfa64dc4edc907717c8", "069e68a96e05458cd0ff4885f73ce1bc65b7bbb3", "602403b2eba0a70f94f463fe28122f35bce5f0b6", "0b13aa1443f1620e6b8d996f0abc6d67cad7cbb7", "691fbdd8ab7b7ab5c60ac5214730a6b715954fcd", "6d68b95bde5d13ce40201667e3fc3577e5c642b3", "4414619c00ef5886ed3845d04a5a37f97946cfb0", "6611714d0558ee2ab170ba7f1868ccde12b9b598", "d112bbf7216d3a9f44de158da9e721ba25c3c436", "fdf9c71255a0e9cf6b417f0accc265391058c51b" ], "paperAbstract": "Mean-field analysis is an analytical method for understanding large-scale stochastic systems such as large-scale data centers and communication networks. The idea is to approximate the stationary distribution of a large-scale stochastic system using the equilibrium point (called the mean-field limit) of a dynamical system (called the mean-field model). This approximation is often justified by proving the weak convergence of stationary distributions to its mean-field limit. Most existing mean-field models concerned the light-traffic regime where the load of the system, denote by ρ, is strictly less than one and is independent of the size of the system. This is because a traditional mean-field model represents the limit of the corresponding stochastic system. Therefore, the load of the mean-field model is ρ= limN→ ∞ ρ(N), where ρ(N) is the load of the stochastic system of size N. Now if ρ(N)→ 1 as N→ ∞ (i.e., in the heavy-traffic regime), then ρ=1. For most systems, the mean-field limits when ρ=1 are trivial and meaningless. To overcome this difficulty of traditional mean-field models, this paper takes a different point of view on mean-field models. Instead of regarding a mean-field model as the limiting system of large-scale stochastic system, it views the equilibrium point of the mean-field model, called a mean-field solution, simply as an approximation of the stationary distribution of the finite-size system. Therefore both mean-field models and solutions can be functions of N. This paper first outlines an analytical method to bound the approximation error based on Stein's method and the perturbation theory. We further present two examples: the M/M/N queueing system and the supermarket model under the power-of-two-choices algorithm. For both applications, the method enables us to characterize the system performance under a broad range of traffic loads. For the supermarket model, this is the first paper that rigorously quantifies the steady-state performance of the-power-of-two-choices in the heavy-traffic regime. These results in the heavy-traffic regime cannot be obtained using the traditional mean-field analysis and the interchange of the limits.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078592", "http://doi.acm.org/10.1145/3084449" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a078aa9d8c0de53389481e4e9c4ef84eee33aba", "sources": [ "DBLP" ], "title": "Stein's Method for Mean-Field Approximations in Light and Heavy Traffic Regimes", "venue": "SIGMETRICS", "year": 2017 }, "3a0995882b3459e444a36e0f473f38610d1ca21d": { "authors": [ { "ids": [ "7777279" ], "name": "Yunfei Ma" }, { "ids": [ "3979901" ], "name": "Nicholas Selby" }, { "ids": [ "1761544" ], "name": "Fadel Adib" } ], "doi": "10.1145/3117811.3117833", "doiUrl": "https://doi.org/10.1145/3117811.3117833", "entities": [ "Algorithm", "Internationalization and localization", "Radio-frequency identification", "Super-resolution imaging", "Ultra-wideband", "Video game localization" ], "id": "3a0995882b3459e444a36e0f473f38610d1ca21d", "inCitations": [ "eeb1dada0f006a3e77deed5ac8bc9de31de933fc", "af8ae2a2a2d74ae63c599865beaaf54d85c69acc", "98cb0ce9ca0cc29f2468f7da50c75666fd09483f", "034a5e215c86052fb1f6f3d7871839f909c38a0a" ], "journalName": "", "journalPages": "248-260", "journalVolume": "", "outCitations": [ "2c9ffd6648a3fe12f9cbe54642a63d3b81cb4ead", "bc2391b30a6cd8b74a0033255ba7cdb99a9ae548", "7b19ec871e1e883dc7d417c1168c7ee5577f9292", "29e9cd18af650b7e448dea668121a1d98afd3c46", "0ee2a9e66999c14eafd5f5ce15cf1c4535d24410", "bd26625c4c325803d4f83a037fc25f8fb435f178", "666fbcf4697fc64d576b9e007af6ee612d10a9f8", "2a6f3ab065ad5425075fd43a83d335f40f626b07", "3b3188f59e6dff7034f160650fb4b5c32a33c3cf", "bd948e55c880a6ea914a676d94d90b8a161bd986", "21b2010e4b18ed32dba0470603001461fb085eb6", "b40c3bfbad4f350d584bb34150eb69de511f7079", "3a50b26964a9b2b50bdf76fd91296d0f590b3d2f", "a450f48f461467b5a24bc928f534e6a24d12d725", "8c024fb6d09064af6a672e9265abc7fe1bc57455", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "82802e411495bbad77fa2415c6d4633dde180764", "be5e1a5cc02e5811fa5691026e340e26128a8532", "57fa9b02ccb9a8302b5983ddf55a7d7654fb9f2b", "05640aeaa46c8f1604eee84e88f3ba1267a232e2", "24d96f44682195f9901dcbdd9506ac1cc1a19879", "6012bb4e45d089d4645bc585f972394156105bb3", "15c6da8909cfc35c4831013a99dd26206df444de", "0d9f7d4509f5b53c8db4fb45beaf6cc2f267ebbf", "0d205c14b34cfe6d047ab5406ac5812ee558b633", "03fa4cc8bbea0b257afcf86a16f3d4fe8ec7af21", "1c4494813938ec02afd7518fc454e5ab34bc702a", "d8eb86a87598969bc3ed57f6a61555d2b55e17bc", "566e2663f897523d3309138a846e687626430771", "052b36fd8bde6035c11eb316c3f9a3665c0110f0", "381d605d38e372c4f3d9306aeb781f7204c29385", "0b3fa65882b095e97353814c4266d8b934f62eab", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "77f60920dc00f36964b553f95e484d05e60e2554", "31453684017625ba3bd29973b588f3bb42a2f12e", "dce0cf77ab9a96f800550b3f91f28245fe2cd511", "80a8eb168c35295fbad91a55bd9e83d0f4477234", "0fe946c28abcc30b7ceecaed8261b7d76b724167", "83b4a70b6c7a22dbb543f8fe39ca45479fba551a" ], "paperAbstract": "State-of-the-art RFID localization systems fall under two categories. The first category operates with off-the-shelf narrowband RFID tags but makes restrictive assumptions on the environment or the tag's movement patterns. The second category does not make such restrictive assumptions; however, it requires designing new ultra-wideband hardware for RFIDs and uses the large bandwidth to directly compute a tag's 3D location. Hence, while the first category is restrictive, the second one requires replacing the billions of RFIDs already produced and deployed annually. This paper presents RFind, a new technology that brings the benefits of ultra-wideband localization to the billions of RFIDs in today's world. RFind does not require changing today's passive narrowband RFID tags. Instead, it leverages their underlying physical properties to emulate a very large bandwidth and uses it for localization. Our empirical results demonstrate that RFind can emulate over 220MHz of bandwidth on tags designed with a communication bandwidth of only tens to hundreds of kHz, while remaining compliant with FCC regulations. This, combined with a new super-resolution algorithm over this bandwidth, enables RFind to perform 3D localization with sub-centimeter accuracy in each of the x/y/z dimensions, without making any restrictive assumptions on the tag's motion or the environment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117833", "http://www.mit.edu/~fadel/papers/RFind-paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a0995882b3459e444a36e0f473f38610d1ca21d", "sources": [ "DBLP" ], "title": "Minding the Billions: Ultra-wideband Localization for Deployed RFID Tags", "venue": "MobiCom", "year": 2017 }, "3a1f3429bbb163e050188cce42a647a11312260c": { "authors": [ { "ids": [ "2170241" ], "name": "Yuechao Pan" }, { "ids": [ "1772360" ], "name": "Yangzihao Wang" }, { "ids": [ "1929145" ], "name": "Yuduo Wu" }, { "ids": [ "2695365" ], "name": "Carl Yang" }, { "ids": [ "1758404" ], "name": "John D. Owens" } ], "doi": "10.1109/IPDPS.2017.117", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.117", "entities": [ "Algorithm", "Graph (abstract data type)", "Graphics processing unit", "Memory management", "Programmer", "Scalability" ], "id": "3a1f3429bbb163e050188cce42a647a11312260c", "inCitations": [ "e2207382768cef76f63a16d91a169078cfdc9b46", "46f3bb6751419b87856c4db0193e7a72ef3fa17c", "b292326ae0bb4b3192f425ab9928579c6ea8d4f2", "db63d47efa261ce1bb1a154e140e4a059f9bb999", "0aa83b7afe8d5f7c39b6bd97c3b9394c4b6e5cdf", "896134c7aa767e27cb3c3aa0662b335473923602", "6401ed8f6f0b37f30573edcc2743134c6fe7a682" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "479-490", "journalVolume": "", "outCitations": [ "2a17c90ed723d6a14415cc1f677a5c0aa512f501", "1186c4a90fb212bdd466159c3a9d45a83189088f", "93ee8e1c05d11d63aa3d61653b2c8bae75e0aecd", "175d795f44037ef60dd9df341701cd5fdc449f1f", "5c874558bfc493260c520a43dd5e29acae68b028", "0e570045a764fb1f49c2e33e5ab02b9eba06fbc6", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "32c8c7949a6efa2c114e482c830321428ee58d70", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "56d5d3f3ec4d95d13b0a2d6c08ee46f8704b82dc", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "687b65a6e91e429c6c1369aee3b493ffd83c0da0", "14edc660cb7db680f2e471460a794f68ba03f295", "2ce27845038020ea43afa08e91f916a4ccf19924", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "7ebb9fad71ce8e08d5284b7644a5452cff6c75b3", "80d8fe9fc7b965e1f6289677922a81cd03c54dd5", "3ebf3857a60c3e224284bbbe6c7127d0a12c546d", "c85c784038d6f4f4845842bdc41877f8581ac796", "0d49c615e4a261824677e1e7c08411f745471c79", "0074e55e67c74420b725fbb09a8f2f351d6947a9", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632" ], "paperAbstract": "We present a single-node, multi-GPU programmable graph processing library that allows programmers to easily extend single-GPU graph algorithms to achieve scalable performance on large graphs with billions of edges. Directly using the single-GPU implementations, our design only requires programmers to specify a few algorithm-dependent concerns, hiding most multi-GPU related implementation details. We analyze the theoretical and practical limits to scalability in the context of varying graph primitives and datasets. We describe several optimizations, such as direction optimizing traversal, and a just-enough memory allocation scheme, for better performance and smaller memory consumption. Compared to previous work, we achieve best-of-class performance across operations and datasets, including excellent strong and weak scalability on most primitives as we increase the number of GPUs in the system", "pdfUrls": [ "http://www.idav.ucdavis.edu/~yzhwang/SC2015-poster.pdf", "https://arxiv.org/pdf/1504.04804v2.pdf", "http://idav.ucdavis.edu/~yzhwang/SC2015-poster.pdf", "http://arxiv.org/pdf/1504.04804v2.pdf", "https://arxiv.org/pdf/1504.04804v1.pdf", "https://arxiv.org/pdf/1504.04804v4.pdf", "https://people.csail.mit.edu/jshun/6886-s18/papers/Pan17.pdf", "https://arxiv.org/pdf/1504.04804v3.pdf", "https://doi.org/10.1109/IPDPS.2017.117", "http://arxiv.org/abs/1504.04804" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a1f3429bbb163e050188cce42a647a11312260c", "sources": [ "DBLP" ], "title": "Multi-GPU Graph Analytics", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3a647aa056e6f4a2fca88c83d8f65cadcc13169a": { "authors": [ { "ids": [ "2789169" ], "name": "Xunyun Liu" }, { "ids": [ "1678182" ], "name": "Aaron Harwood" }, { "ids": [ "2476111" ], "name": "Shanika Karunasekera" }, { "ids": [ "1868067" ], "name": "Benjamin I. P. Rubinstein" }, { "ids": [ "1709598" ], "name": "Rajkumar Buyya" } ], "doi": "10.1109/ICPP.2017.66", "doiUrl": "https://doi.org/10.1109/ICPP.2017.66", "entities": [ "Apache Storm", "Application checkpointing", "Backup", "Computation", "Data store", "Experiment", "Fault tolerance", "Floor and ceiling functions", "Management system", "State management", "Stream processing", "Synthetic data", "Throughput", "Velocity" ], "id": "3a647aa056e6f4a2fca88c83d8f65cadcc13169a", "inCitations": [ "916f2ae5cda7bab2436f6a49f38c0c8636c832b2" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "571-580", "journalVolume": "", "outCitations": [ "5578cabaef7b5dfc88443626e74d2e04951818f7", "63115442310908b876aa1e81d877813ebee8b247", "a6d6dad952d35658d5e9a5c481401f7d3a5d7a7d", "5208060771fd213eefd827e3e1260b939f1aed6d", "bad84100cd1bffe83bd33212a79d5cbb7f4ffb12", "511562debe051d77e38c374e4080b768c1151e66", "88fd5ae53854a26b9edb2eb42ce6dfdd6e186ea5", "a1e6b8c0633bc06c712edae9a4d7266be31d0aac", "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "76ec30f9d6516a94a032831a60368f2cc376f664", "b982a7040edb85888e0ebf746a13bd9bdfd2474f", "85753015a8d5f6c702f2de34fe962324249fe7c2", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "3f4cedff46e2fd542899ad9cfac286bf9976e022", "113dfad2cac3dd66ef57e1651c711d0e2d420deb", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "2a97b492689c589a50f6269f76367ff1c80a0917" ], "paperAbstract": "Apache Storm is a fault-tolerant, distributed inmemory computation system for processing large volumes of high-velocity data in real-time. As an integral part of the fault-tolerance mechanism, Storm's state management is achieved by a checkpointing framework, which commits states regularly and recovers lost states from the latest checkpoint. However, this method involves a remote data store for state preservation and access, resulting in significant overheads to the performance of error-free execution.In this paper, we propose E-Storm, a replication-based state management system that actively maintains multiple state backups on different worker nodes. We build a prototype on top of Storm by extending it with monitoring and recovery modules to support inter-task state transfer whenever needed. The experiments carried out on synthetic and real-world streaming applications confirm that E-Storm outperforms the existing checkpointing method in terms of the resulting application performance, obtaining as much as 9.44 times throughput improvement while reducing the application latency down to 9.8%.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.66", "http://www.cloudbus.org/papers/E-Storm2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a647aa056e6f4a2fca88c83d8f65cadcc13169a", "sources": [ "DBLP" ], "title": "E-Storm: Replication-Based State Management in Distributed Stream Processing Systems", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "3a8fd354119608d310be342e0e5ecc741d448ac4": { "authors": [ { "ids": [ "2505238" ], "name": "Wonil Choi" }, { "ids": [ "2372241" ], "name": "Mohammad Arjomand" }, { "ids": [ "36895144" ], "name": "Myoungsoo Jung" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" } ], "doi": "10.1145/3084458", "doiUrl": "https://doi.org/10.1145/3084458", "entities": [ "Archive", "FTL: Faster Than Light", "Flash memory", "Hard disk drive", "Magnetic storage", "Multi-level cell", "Non-volatile memory", "Relaxation (approximation)", "Simulation", "Solid-state drive", "Usability", "Wear leveling" ], "id": "3a8fd354119608d310be342e0e5ecc741d448ac4", "inCitations": [ "3847cd1d3473b7cd5f939ce61bb728def35dea56" ], "journalName": "", "journalPages": "53", "journalVolume": "", "outCitations": [], "paperAbstract": "Storage-class memory (SCM) combines the benefits of a solid-state memory, such as high-performance and robustness, with the archival capabilities and low cost of conventional hard-disk magnetic storage. Among candidate solid-state nonvolatile memory technologies that could potentially be used to construct SCM, flash memory is a well-established technology and have been widely used in commercially available SCM incarnations. Flash-based SCM enables much better tradeoffs between performance, space and power than disk-based systems. However, write endurance is a significant challenge for a flash-based SCM (each act of writing a bit may slightly damage a cell, so one flash cell can be written 10^4-10^5 times, depending on the flash technology, before it becomes unusable). This is a well-documented problem and has received a lot of attention by manufactures that are using some combination of write reduction and wear-leveling techniques for achieving longer lifetime. In an effort to improve flash lifetime, first, by quantifying data longevity in an SCM, we show that a majority of the data stored in a solid-state SCM do not require long retention times provided by flash memory (i.e., up to 10 years in modern devices); second, by exploiting retention time relaxation, we propose a novel mechanism, called Dense-SLC (D-SLC), which enables us perform multiple writes into a cell during each erase cycle for lifetime extension; and finally, we discuss the required changes in the flash management software (FTL) in order to use D-SLC mechanism for extending the lifetime of the solid-state part of an SCM. Using an extensive simulation-based analysis of an SLC flash-based SCM, we demonstrate that D-SLC is able to significantly improve device lifetime (between 5.1X and 8.6X) with no performance overhead and also very small changes at the FTL software.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084458", "http://doi.acm.org/10.1145/3078505.3078527", "https://arxiv.org/pdf/1704.05138v1.pdf", "http://arxiv.org/abs/1704.05138" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a8fd354119608d310be342e0e5ecc741d448ac4", "sources": [ "DBLP" ], "title": "Exploiting Data Longevity for Enhancing the Lifetime of Flash-based Storage Class Memory", "venue": "SIGMETRICS", "year": 2017 }, "3a9ee93ff98b15a780b283a28bb5b943666229b0": { "authors": [ { "ids": [ "31299982" ], "name": "Alexei Zamyatin" }, { "ids": [ "1725735" ], "name": "Katinka Wolter" }, { "ids": [ "34699197" ], "name": "Sam Werner" }, { "ids": [ "1745490" ], "name": "Peter G. Harrison" }, { "ids": [ "11130589" ], "name": "Catherine E. A. Mulligan" }, { "ids": [ "1786779" ], "name": "William J. Knottenbelt" } ], "doi": "10.1109/MASCOTS.2017.22", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.22", "entities": [ "Cryptocurrency", "Cryptography", "Data mining", "Digital gold currency", "Ethereum", "Mining pool", "Simulation", "Solo" ], "id": "3a9ee93ff98b15a780b283a28bb5b943666229b0", "inCitations": [ "3acba13b1f3234e2e4993fba4ad3ff8bc63f6ca5" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "99-109", "journalVolume": "", "outCitations": [ "96d806e0b0f89e344df129d63c292e00ceb75946", "33189adda43ee49005373f9dbbb351ff78a45199", "b9305c1d9aa7241cc149b0db69a2a1833af0c20a", "0db38d32069f3341d34c35085dc009a85ba13c13", "098d5792ffa43e9885f9fc644ffdd7b6a59b0922", "12fd427f54a730aa75ed4b00248793302a132e40", "822693248834147245d6ff2309192122d1326396", "1999857212c7f8157230bc3a32b1d519ef124e00", "0171a99c93632846f9b3213a8275adf042f19871", "084da7c90567476907522d91d22a8a8a6f818447" ], "paperAbstract": "Cryptocurrency mining can be said to be the modern alchemy, involving as it does the transmutation of electricity into digital gold. The goal of mining is to guess the solution to a cryptographic puzzle, the difficulty of which is determined by the network, and thence to win the block reward and transaction fees. Because the return on solo mining has a very high variance, miners band together to create so-called mining pools. These aggregate the power of several individual miners, and, by distributing the accumulated rewards according to some scheme, ensure a more predictable return for participants.In this paper we formulate a model of the dynamics of a queue-based reward distribution scheme in a popular Ethereum mining pool and develop a corresponding simulation. We show that the underlying mechanism disadvantages miners with above-average hash rates. We then consider two-miner scenarios and show how large miners may perform attacks to increase their profits at the expense of other participants of the mining pool. The outcomes of our analysis show the queue-based reward scheme is vulnerable to manipulation in its current implementation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3a9ee93ff98b15a780b283a28bb5b943666229b0", "sources": [ "DBLP" ], "title": "Swimming with Fishes and Sharks: Beneath the Surface of Queue-Based Ethereum Mining Pools", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "3aa13d8ff3bb862f98530d6da387280a7989cad1": { "authors": [ { "ids": [ "2175685" ], "name": "Hojin Jung" }, { "ids": [ "2917665" ], "name": "Songkuk Kim" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.49", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.49", "entities": [ "Algorithm", "Categorization", "Clustering coefficient", "Coefficient", "Complex system", "Degree (graph theory)", "Graph (discrete mathematics)", "Social network", "Vertex (geometry)" ], "id": "3aa13d8ff3bb862f98530d6da387280a7989cad1", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "372-379", "journalVolume": "", "outCitations": [ "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "6c76870260c0e3caa4057a72386125c6d7d6ceb6", "4665f10ba0006df8d4fec85e368bcc503d57ce44", "06214a0cf38875da38586e81539890f7ad8aeb1c", "d295e62f9bef00b42417c57a8e4d9e503519b67a", "00c35b117b9aab849ce0a29bcf7692cbd3d2d16f", "a21e07443de7c7fb0e47a3031b68875c108f560c", "93ee8e1c05d11d63aa3d61653b2c8bae75e0aecd", "3e0c97f3e9ba990612d52c177ffe68f4f2c69da7", "5b72cf570bfcc84cb03a9e310e680363373565cf", "3582cd3b7ce374732bf732c28c674b5f96454b15", "d832fb2b7a72640844e1eef439c2092b35e40f60", "a151fcaa3d003321d6e09602a927fc434d19b032", "8c81ddd9c1a6f0d44851c0d2f7f6c4d28ba38382", "9f3c2d5364aab82a24e24e56f6013cfc4c404e13", "2ef9255a5740f35b754dde050ec36f44db0f28ad", "2440a3bce01e9a91f255d2d03447e5c1c53574da", "0371f9e3efbcd4829b5ffbff585155746ef05284", "62c95eb8ccc24aa83202af8a04516594fbc645e9", "0d06de003e8ca949b3b39f9a51750c050addb997", "b11f541b43c6de86e09d97f334e117e392fde01c", "4a00ebe98d455d61dc1b708265c237fe2ee6ec64", "044accf7d0abbee03f7ede29332fc8abf03fda22", "0ed877bab75b32042a887715380c84ac27e64a8b" ], "paperAbstract": "Depicting a complex system like social networks as a graph helps understand its structure and relation. As advances in technology increase the amount of data, simplifying a large-scale graph has attracted interests. Simplification reduces the size of a graph while preserving its important properties. In this paper, we propose the summarization algorithm to simplify a graph focusing on degree correlation and clustering coefficient. The degree correlation is a measure to assess the influence of each vertex and their connections. The clustering coefficient estimates latent connections between two distinct vertices. To this end, we first separate a graph into communities. Looking at groups instead of a graph itself allows us to extract important vertices and edges more easily. We then categorize communities into four cases and simplify them in different ways to preserve the innate characteristics. The quantitative and qualitative evaluations demonstrate how effectively our algorithm serves the goal. Overall, our contributions are as follows: (a) unique pattern: We found that hubs are connected indirectly via low-degree vertices. Sigcon preserves these vertices during simplification. (b) efficient algorithm: Sigcon identifies influential vertices and edges to simplify a graph on the basis of degree correlation and clustering coefficient connecting vertices effectively.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.49" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3aa13d8ff3bb862f98530d6da387280a7989cad1", "sources": [ "DBLP" ], "title": "Sigcon: Simplifying a Graph Based on Degree Correlation and Clustering Coefficient", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "3aaa98d867c663e815ba02856c996b049215b401": { "authors": [ { "ids": [ "2005401" ], "name": "Yupeng Gu" }, { "ids": [ "1792614" ], "name": "Yizhou Sun" }, { "ids": [ "2713727" ], "name": "Jianxi Gao" } ], "doi": "10.1145/3097983.3098002", "doiUrl": "https://doi.org/10.1145/3097983.3098002", "entities": [ "Baseline (configuration management)", "Generative model", "Noise (electronics)", "Simulation", "Social network", "Social network analysis", "Utility" ], "id": "3aaa98d867c663e815ba02856c996b049215b401", "inCitations": [ "e68213f04381f127ab3c6b4e056faa116c25c902" ], "journalName": "", "journalPages": "175-184", "journalVolume": "", "outCitations": [ "5cf74a701fe7de37c1ef916699a04119366cfd10", "270c6409280ce423c2f228e3a2e5fd292f6b3683", "13893ece19f1df85bd5909685d4ff8fca5dfce53", "04317e6f6c43b08c17c20d3647be7f7461871d72", "00d23e5c06f90bed0c9d4aec22babb2f7488817f", "be6973a82ebb62e4a58d4494afc7742af5ae5588", "27fe869a53610e094f3354e4de3811c169fa1d60", "04080a4fc2cb3316d63e9f6e8161873be21f6637", "1bb87819767f5d68a4368f06c45718231b791be3", "2b1ec3fdf5b695de2d7ec17393ec0ad9445ceb61", "258c8adfba357ed20cc03b5c2229eb773924bc08", "cba60c4092b00c7aa2aa08029ebee2f8ca06a63b", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "29d880dfd7f39b1a91d5f6a66e2a3170b8f62703", "4b42bf82aaa2fec0c3513ecf9fbb9efb630b7448", "224462e7ca603df32c712ceb2aea68fb25bdc975", "2c5a8dcb6fc679a110ab4e66974b193e455256d2", "2c7a50371c618198dfbc96619fcf74e89b604a15", "ab30b9de25048c15df0ebc353c64f4f3cf6ed52b", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "877cbde094294a1b948eab0b0fe1429688595de0", "89d831a76cb5e64b96bc48f1053ea8c0ed927205", "1eee5e52a57d7402f5863096b140217fc2f44dce", "1558a06fb4f0473f76792e830b0b07c79f7decc0", "1339b888fb739b8a1194fa94fbe848e01d93456a", "c73287153c0a50102a40800c1ada626a410c63cc", "1f0612de1f191abadf250b78cd78f884203cca5e", "0318f19753bd9d42128150d9a7d27dda0a8da96e", "6d0e60f75427b152ec4d0e4f84a7c423152e90a5", "d0a7df360fcc15e2331bfc6b8541a3233cea018f", "03ac93a2219b496d3f6e515042b3cff5bd7432a3", "36978ea3a0571537c80b334591e2a48510c56d68", "063c050f02782e17fbea4867b2216e5a896967fd", "62e14dec73970514a5e3f81b059d63b34e9ad37c", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "2414283ed14ebb0eec031bb75cd25fbad000687e", "063ac7f23c65bd97633d4f6bf4c31eb70879aa7c" ], "paperAbstract": "Almost all real-world social networks are dynamic and evolving with time, where new links may form and old links may drop, largely determined by the homophily of social actors (i.e., nodes in the network). Meanwhile, (latent) properties of social actors, such as their opinions, are changing along the time, partially due to social influence received from the network, which will in turn affect the network structure. Social network evolution and node property migration are usually treated as two orthogonal problems, and have been studied separately. In this paper, we propose a co-evolution model that closes the loop by modeling the two phenomena together, which contains two major components: (1) a network generative model when the node property is known; and (2) a property migration model when the social network structure is known. Simulation shows that our model has several nice properties: (1) it can model a broad range of phenomena such as opinion convergence (i.e., herding) and community-based opinion divergence; and (2) it allows to control the evolution via a set of factors such as social influence scope, opinion leader, and noise level. Finally, the usefulness of our model is demonstrated by an application of co-sponsorship prediction for legislative bills in Congress, which outperforms several state-of-the-art baselines.", "pdfUrls": [ "http://web.cs.ucla.edu/~ypgu/papers/KDD17_coevolution.pdf", "http://doi.acm.org/10.1145/3097983.3098002", "http://web.cs.ucla.edu/~yzsun/papers/2017_kdd_coevolution.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3aaa98d867c663e815ba02856c996b049215b401", "sources": [ "DBLP" ], "title": "The Co-Evolution Model for Social Network Evolving and Opinion Migration", "venue": "KDD", "year": 2017 }, "3acb4d57415650bda8e85a1827ef082c5431a540": { "authors": [ { "ids": [ "1829302" ], "name": "Chenzi Zhang" }, { "ids": [ "3228071" ], "name": "Fan Wei" }, { "ids": [ "1752133" ], "name": "Qin Liu" }, { "ids": [ "15855105" ], "name": "Zhihao Gavin Tang" }, { "ids": [ "7718952" ], "name": "Zhenguo Li" } ], "doi": "10.1145/3097983.3098033", "doiUrl": "https://doi.org/10.1145/3097983.3098033", "entities": [ "Algorithm", "Benchmark (computing)", "Best, worst and average case", "Computation", "Edge coloring", "Experiment", "Heuristic", "NP-completeness", "Partition problem", "Polynomial", "Time complexity" ], "id": "3acb4d57415650bda8e85a1827ef082c5431a540", "inCitations": [ "f226b5932b3311d42df2d944d2eb1369b504b5eb" ], "journalName": "", "journalPages": "605-614", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "21a56bfed90e46a74415329024cd5194dbecf284", "70954d2477d08afa838e827459df0e3ca5882912", "53bd1357a20550caf1317803e7bc88d3440a6984", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "0ad8e89091eed09217e66adc98136126addc2619", "0706356c9ab6014d6b04577d38289ea8328291a5", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "c68af4273c06f29a26cf60c112a3999aca930f20", "0a3b2d2ddf6e832a0b282380abb9b2eeb7d97177", "0bf5b73d421b69c49de0665d581e1d3ebc8cb0bf", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "15ad785d44ff34ad028426c31a1e8d43b2b44ab6" ], "paperAbstract": "We consider the edge partitioning problem that partitions the edges of an input graph into multiple balanced components, while minimizing the total number of vertices replicated (one vertex might appear in more than one partition). This problem is critical in minimizing communication costs and running time for several large-scale distributed graph computation platforms (e.g., PowerGraph, Spark GraphX). We first prove that this problem is NP-hard, and then present a new partitioning heuristic with polynomial running time. We provide a worst-case upper bound of replication factor for our heuristic on general graphs. To our knowledge, we are the first to provide such bound for edge partitioning algorithms on general graphs. Applying this bound to random power-law graphs greatly improves the previous bounds of expected replication factor. Extensive experiments demonstrated that our partitioning algorithm consistently produces much smaller replication factors on various benchmark data sets than the state-of-the-art. When deployed in the production graph engine, PowerGraph, in average it reduces replication factor, communication, and running time by 54%, 66%, and 21%, respectively.", "pdfUrls": [ "http://www.ee.columbia.edu/~zgli/papers/KDD-2017-GraphEdgePartitioning.pdf", "http://doi.acm.org/10.1145/3097983.3098033" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3acb4d57415650bda8e85a1827ef082c5431a540", "sources": [ "DBLP" ], "title": "Graph Edge Partitioning via Neighborhood Heuristic", "venue": "KDD", "year": 2017 }, "3ad895a6e4ce6f07b722325613b27decf7aef4bc": { "authors": [ { "ids": [ "31901851" ], "name": "Vincent T. Lee" }, { "ids": [ "3451487" ], "name": "Justin Kotalik" }, { "ids": [ "2896556" ], "name": "Carlo C. del Mundo" }, { "ids": [ "1698528" ], "name": "Armin Alaghi" }, { "ids": [ "1717411" ], "name": "Luis Ceze" }, { "ids": [ "1723213" ], "name": "Mark Oskin" } ], "doi": "10.1109/IPDPS.2017.12", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.12", "entities": [ "Algorithm", "Automaton", "Central processing unit", "Computation", "Computer vision", "Database", "Field-programmable gate array", "Graphics processing unit", "Internet bottleneck", "K-nearest neighbors algorithm", "Machine learning", "Natural language processing", "Program optimization", "Recommender system", "Robotics", "Similarity search", "Speedup", "Throughput" ], "id": "3ad895a6e4ce6f07b722325613b27decf7aef4bc", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "523-534", "journalVolume": "", "outCitations": [ "d4e445281bd0cffb900a7605872849ac2a2a5e31", "086d4ffac8b60821aa05fd14cae101e32eb1e462", "1b20afbd2d2a349737ed3dc246e44bbdba203190", "630eb0c8cf211e95afc1696a2c627abe9e779bb3", "268cd46a06e8e3052bbd64e96fac73d600430281", "396514fb219879a4a18762cddfae2a6a607f439f", "3ad277770454b1f53a1ae8109c35b1b59a22d33f", "19d2b786fec5ded4d6cdca0e21f3c3f5264ecadf", "2731046193a1d034ca5544e9cc642957c2d6310d", "10911d4e163f7eeca5b53786814a01694643267a", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "a8ab0fc2f42476d2f76629747ecb981a438ea8ed", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "10e8ebc9a2397336cd03dda18842ad6e7e7299bb", "12d0c11d546d91e776a170898ebf3a38c010695c", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "93c25da1b96dba6a83defeb05ebd5bd3c66feb87", "06902cb95ede2c305db4000852014f276b25c082", "1b68aa68c70af87fc3b712ff7a4a9aa289bf23bf", "1fcfbc935db4d3297dc69e96d5b6741b7d151a2b", "06c00c5de872edbb24ba5d67992cfbc912ffa7fb", "4cd6b5e470b4205cd1560de42cde8108fa42ba4b", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "1fbbc34d163b42a6cfd14eaff9556359c072e210", "8d0bb67313c489aa90116c0c7df367a6ce46616d", "6bccf2ba321177023d0f1d83484ae81fba687d97", "0bd156b327f14b915a44848b1a0267fe9c30198c", "3f3a44e1ef5acb51d6c53099fd296aa7d40355e0", "8deafe947207eab416d8791f2e750289bd9ac73b", "d4f2f0b971984fa5235ccd76a8bb1441a736bfa5", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "179f80848143cf109fa6aebae6c3844da03b062c", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "4f86a09f1c3778203807c60f968605b139efe8d3", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "149ad380837451a3903dafbb13f6de3815547852", "a888f137b7d821497ad3a1264ae28a93852c0d75", "56ef240a30a228ea6a6885d09dd3c60d2b021788", "2871f115e7a11c903258491c75d4171fac679344", "10b014e882764f5800ecdcbaba1fa08795d0c54d" ], "paperAbstract": "Similarity search is a critical primitive for a wide variety of applications including natural language processing, content-based search, machine learning, computer vision, databases, robotics, and recommendation systems. At its core, similarity search is implemented using the k-nearest neighbors (kNN) algorithm, where computation consists of highly parallel distance calculations and a global top-k sort. In contemporary von-Neumann architectures, kNN is bottlenecked by data movement which limits throughput and latency. In this paper, we present and evaluate a novel automata-based algorithm for kNN on the Micron Automata Processor (AP), which is a non-von Neumann near-data processing architecture. By employing near-data processing, the AP minimizes the data movement bottleneck and is able to achieve better performance. Unlike prior work in the automata processing space, our work combines temporal encodings with automata design to augment the space of applications for the AP. We evaluate our design's performance on the AP and compare to state-of-the-art CPU, GPU, and FPGA implementations; we show that the current generation of AP hardware can achieve over 50x speedup over CPUs while maintaining competitive energy efficiency gains. We also propose several automata optimization techniques and simple architectural extensions that highlight the potential of the AP hardware.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.12", "https://export.arxiv.org/pdf/1608.03175", "http://homes.cs.washington.edu/~vlee2/docs/vlee-ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ad895a6e4ce6f07b722325613b27decf7aef4bc", "sources": [ "DBLP" ], "title": "Similarity Search on Automata Processors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3aff512047e93f0435402f04ebcd007f33e44c36": { "authors": [ { "ids": [ "1696074" ], "name": "Zhongjie Wang" }, { "ids": [ "1696087" ], "name": "Yue Cao" }, { "ids": [ "1794927" ], "name": "Zhiyun Qian" }, { "ids": [ "2252367" ], "name": "Chengyu Song" }, { "ids": [ "38774813" ], "name": "Srikanth V. Krishnamurthy" } ], "doi": "10.1145/3131365.3131374", "doiUrl": "https://doi.org/10.1145/3131365.3131374", "entities": [ "Evasion (network security)", "Failure rate", "Firewall (computing)", "Hypertext Transfer Protocol", "Middlebox", "Pursuit-evasion", "Server (computing)", "State (computer science)", "Tor Messenger" ], "id": "3aff512047e93f0435402f04ebcd007f33e44c36", "inCitations": [ "2d80fe39d1e254e26e1e9915e2196559a2d65024" ], "journalName": "", "journalPages": "114-127", "journalVolume": "", "outCitations": [ "754cc0dc9d88b80eb2a1564fd767d1d7a5b2f833", "1a29dd17b602bbbb487285fc6aa5fa2bb9bf8649", "49cd1030cd8f98b6ec7545750c78d580ca80a43d", "c82e79459a998f9048aa8d902142a0d6e1c69651", "77eeb3832252feae78900cc398b37d6f409c535a", "28d931067f9a7ea393910fed8c68d098f7ca9fcd", "067eaf07784cdab6836395a2b2f34a506d860fee", "2d80fe39d1e254e26e1e9915e2196559a2d65024", "b77fa8a2c03c234c79429416596046066905e459", "791382f7dc39154ec39ea249493d5f653b739df4", "70696431430bab0d406cb23f503af5841961ba76", "39e45a420ee287489bbd5245946ba8c6a2305848", "1ff9b151019648eaea901ee3c2b795e921358b21", "807c486f37a114bc726ca9457b3c25139313f9c1", "67c4aad4acd0cc748868731b71bd3940d2674adc", "b47f692948689d7d3fb9d902b722a52144ea5059", "5e4510e4f452d518a150ce1c8fd0ddaf29314f47", "269b2257400306de3e4ef270982ea645b94ac7f7", "15037a71328f065ae8e8034958b9aad15c7a79c2", "2cc82a4971c957a7e5654e078e5087312f097316", "754d3aa641d9da8e50796c3c4015fa064f10c1ba", "2ed69f9aa374af4113f937df1482d9731911d511", "62a13d8ce14fe16365862828122e08c944aa1c65" ], "paperAbstract": "Understanding the behaviors of, and evading state-level Internet-scale censorship systems such as the Great Firewall (GFW) of China, has emerged as a research problem of great interest. One line of evasion is the development of techniques that leverage the possibility that the TCP state maintained on the GFW may not represent the state at end-hosts. In this paper we undertake, arguably, the most extensive measurement study on TCP-level GFW evasion techniques, with several vantage points within and outside China, and with clients subscribed to multiple ISPs. We find that the state-of-the art evasion techniques are no longer very effective on the GFW. Our study further reveals that the primary reason that causes these failures is the evolution of GFW over time. In addition, other factors such as the presence of middleboxes on the route from the client to the server also contribute to previously unexpected behaviors.\n Our measurement study leads us to new understandings of the GFW and new evasion techniques. Evaluations of our new evasion strategies show that our new techniques provide much higher success rates of (compared to prior schemes) ≈ 90% or higher. Our results further validate our new understandings of the GFW's evolved behaviors. We also develop a measurement-driven tool INTANG, that systematically looks for and finds the best strategy that works with a server and network path. Our measurements show that INTANG can yield near perfect evasion rates and is extremely effective in aiding various protocols such as HTTP, DNS over TCP, and Tor in evading the GFW.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final59.pdf", "https://conferences.sigcomm.org/imc/2017/slides/IMC17.pdf", "http://doi.acm.org/10.1145/3131365.3131374", "http://www.cs.ucr.edu/~zhiyunq/pub/imc17_censorship_tcp.pdf", "http://www.cs.ucr.edu/~krish/imc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3aff512047e93f0435402f04ebcd007f33e44c36", "sources": [ "DBLP" ], "title": "Your state is not mine: a closer look at evading stateful internet censorship", "venue": "IMC", "year": 2017 }, "3b051bc284d1db60ed4d81851c1a016f35bcd506": { "authors": [ { "ids": [ "1994372" ], "name": "Huan Feng" }, { "ids": [ "1910642" ], "name": "Kassem Fawaz" }, { "ids": [ "1730051" ], "name": "Kang G. Shin" } ], "doi": "10.1145/3117811.3117823", "doiUrl": "https://doi.org/10.1145/3117811.3117823", "entities": [ "Authentication", "Broadcast automation", "Electron mobility", "Google Now", "Headphones", "Home automation", "Microphone", "Replay attack", "Siri", "User interface", "Wearable computer", "Wearable technology" ], "id": "3b051bc284d1db60ed4d81851c1a016f35bcd506", "inCitations": [ "5c571f2cd237ba1740dae2caa182e43e27fbb3e9", "3485f74389f45d8dd4c5aac80ed59e7d171aab12", "9123089825cad35ab5a9bc45452d67fb722be529" ], "journalName": "", "journalPages": "343-355", "journalVolume": "", "outCitations": [ "2c3471292ae54ba8c2342f186c9d825b697a377a", "7f6fdcd579bc17734f25c88d69ab476e31bc95af", "02d7e9997499471f994b1850db97aa8e60924ced", "3adf5f34cf1208e3708d8c3a917690dd2bebe017", "4e751b8493030dbbcf7d313cafb829130f95d714", "5844afbc36db54d1792e74a00bd70870de54faf2", "d1fa8485ad749d51e7470d801bc1931706597601", "2996fcfe70f9cb61ba4e637dc1676983bbc8bc35", "22410d40cc64428cbcd1028bf962dc41eb8a4ea8", "26897471f6bfc0f093711b44d73696c5cca60d0e", "968b491c42925478d4ffb7561cdf3095851da5ab", "3b59f6a0d1b0b982c5324132ad1ca9e7a923d345", "190c25f9f564b4703dc362b21c7fe3996a098ef4", "03967dc536786d93b1c5a17d9f04a39c46532094", "abce5f6d32ce30e707d53b62dd83fdf6c0111caf", "53d34dd1d2d434e232228da9457cafb06fd4638f", "a105fdd213d8089d1b23acdbabfbe330208be468", "755eebe66751ff9b2aee8bc8349247a769da25be", "35716646b2ce09a1dd0dd584b5adc7242df0beff", "05d3774554e6a957df0c7ae1f78173f427385c74", "54a8251af7c57d7c62cca3ccbc2e0c2d7295a0b4", "0b4d07005f9a8b406697353a29a9f7d79caf6f59", "7095ea5eb985d8a06f3d9aa65698c2b916313064", "2d09cf5a9329887bb7bb05fffe15439ea89261b1", "6d8c9fcce8177d6f8d122d653c7d32d7624d6714", "ed7ae9a10afc1d6ed7e7a99f31693fb96fa0ce0e" ], "paperAbstract": "Voice has become an increasingly popular User Interaction (UI) channel, mainly contributing to the current trend of wearables, smart vehicles, and home automation systems. Voice assistants such as Alexa, Siri, and Google Now, have become our everyday fixtures, especially when/where touch interfaces are inconvenient or even dangerous to use, such as driving or exercising. The open nature of the voice channel makes voice assistants difficult to secure, and hence exposed to various threats as demonstrated by security researchers. To defend against these threats, we present VAuth, the first system that provides continuous authentication for voice assistants. VAuth is designed to fit in widely-adopted wearable devices, such as eyeglasses, earphones/buds and necklaces, where it collects the body-surface vibrations of the user and matches it with the speech signal received by the voice assistant's microphone. VAuth guarantees the voice assistant to execute only the commands that originate from the voice of the owner. We have evaluated VAuth with 18 users and 30 voice commands and find it to achieve 97% detection accuracy and less than 0.1% false positive rate, regardless of VAuth's position on the body and the user's language, accent or mobility. VAuth successfully thwarts various practical attacks, such as replay attacks, mangled voice attacks, or impersonation attacks. It also incurs low energy and latency overheads and is compatible with most voice assistants.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117823", "https://arxiv.org/pdf/1701.04507v1.pdf", "https://kabru.eecs.umich.edu/wordpress/wp-content/uploads/continuous-authentication-voice.pdf", "http://arxiv.org/abs/1701.04507" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b051bc284d1db60ed4d81851c1a016f35bcd506", "sources": [ "DBLP" ], "title": "Continuous Authentication for Voice Assistants", "venue": "MobiCom", "year": 2017 }, "3b07df59e873be87142a49c229456569ece15475": { "authors": [ { "ids": [ "1738814" ], "name": "Yin Li" }, { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "3133825" ], "name": "Xuebin Zhang" }, { "ids": [ "2407199" ], "name": "Ning Zheng" }, { "ids": [ "1891136" ], "name": "Shafa Dahandeh" }, { "ids": [ "32180339" ], "name": "Tong Zhang" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Code word", "Data center", "Erasure code", "Experiment", "Hard disk drive", "Magnetic storage", "Moore's law", "Non-RAID drive architectures", "Reed\u2013Solomon error correction", "Retry" ], "id": "3b07df59e873be87142a49c229456569ece15475", "inCitations": [ "ec3924af8c1cb428b4f1309b9a9ca3c86abd6631" ], "journalName": "", "journalPages": "135-148", "journalVolume": "", "outCitations": [ "bcdd54c3faa4d0fdebebba7b0815341ed45abd90", "d2ab8a8fc0f4f06c0b89ef1d05314fb882ded44c", "0e1a80517cb5ddba06111ec20a0da937bf105e5a", "af0a16a96fae5407d7e83ebf33d96b1523c828a5", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "a4c529cfc2cd2ff5f3f1f018ea82fb2e22630695", "3b547d706d33c110f96bf1c0e805ab8cc82afdbf", "0d77bb6ef2bb6d165f58bf0251bf3d7cf29f1491", "6a180e2e4a171c39577afe90b9629bae3b9294ed", "841489d2b678ca60b552b0610d06eaf43f54ac15", "2976de31d1b21978e3ba8b723250553fe5d1d0c5", "133eacaf0ad25b8364cb4510007d9363298e8adf", "173f31b150fb7e000a4f0e9d5a880e54d94b4b21", "114308433282441cbe153e39ff10ce4cf5fbc0ef", "0b5c8ed0dac9bf073568870c86adfd5d0775c0bf", "8a68400ed270a83c5239f0e90319b69c604354ab", "09bed5a75cbdba4b930cdca6bd2499d61121e030", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "42512431ca7fffdbc80eb7280d093efcead3d48d", "c2f4ccc7feb6bd3928d14f2352d156d391eb0111", "1f15211337ecfb76b9bcba5f3ab844351d0b063e", "58b628792d3eb22a034a871ed3cf373afe591928", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "3b50c635cd65e286ac1e32c0a26842d7d84d20b6", "4b14acc92dee6c04165cfa8a13a56dbd379bc2ec", "01b1c99cb2e179a26d7237b835b085945ef28a00" ], "paperAbstract": "This paper presents a simple yet effective design solution to facilitate technology scaling for hard disk drives (HDDs) being deployed in data centers. Emerging magnetic recording technologies improve storage areal density mainly through reducing the track pitch, which however makes HDDs subject to higher read retry rates. More frequent HDD read retries could cause intolerable tail latency for large-scale systems such as data centers. To reduce the occurrence of costly read retry, one intuitive solution is to apply erasure coding locally on each HDD or JBOD (just a bunch of disks). To be practically viable, local erasure coding must have very low coding redundancy, which demands very long codeword length (e.g., one codeword spans hundreds of 4kB sectors) and hence large file size. This makes local erasure coding mainly suitable for data center applications. This paper contends that local erasure coding should be implemented transparently within filesystems, and accordingly presents a basic design framework and elaborates on important design issues. Meanwhile, this paper derives the mathematical formulations for estimating its effect on reducing HDD read tail latency. Using Reed-Solomon (RS) based erasure codes as test vehicles, we carried out detailed analysis and experiments to evaluate its implementation feasibility and effectiveness. We integrated the developed design solution into ext4 to further demonstrate its feasibility and quantitatively measure its impact on average speed performance of various big data benchmarks.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/fast17/fast17-li.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-li.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_li.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/li", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_li.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3b07/df59e873be87142a49c229456569ece15475.pdf", "s2Url": "https://semanticscholar.org/paper/3b07df59e873be87142a49c229456569ece15475", "sources": [ "DBLP" ], "title": "Facilitating Magnetic Recording Technology Scaling for Data Center Hard Disk Drives through Filesystem-Level Transparent Local Erasure Coding", "venue": "FAST", "year": 2017 }, "3b15ecfa5e4dda290d58d238690f7cd64b1f25fe": { "authors": [ { "ids": [ "2162613" ], "name": "Aditya Sundarrajan" }, { "ids": [ "2801708" ], "name": "Mingdong Feng" }, { "ids": [ "3140767" ], "name": "Mangesh Kasbekar" }, { "ids": [ "1719320" ], "name": "Ramesh K. Sitaraman" } ], "doi": "10.1145/3143361.3143368", "doiUrl": "https://doi.org/10.1145/3143361.3143368", "entities": [ "Change detection and notification", "Distributed cache", "Fourier analysis", "Memory footprint", "Provisioning", "Server (computing)", "Web cache", "Web page" ], "id": "3b15ecfa5e4dda290d58d238690f7cd64b1f25fe", "inCitations": [], "journalName": "", "journalPages": "55-67", "journalVolume": "", "outCitations": [ "32f6ded4e88667f34fe49a0ee80d9a9093b00547", "10ba5bd8732e8460e2876c6132129aa5f9c9b337", "4e1ce62215f1fb989b80d324fae00b9f76ec2d34", "4f1cf2a9244816dc9ea7be304b85a45b7e0941a4", "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "0a40663fdcf7c5fb7cfc459693116c41309e7eca", "38628d26d4f624378f4303b61ae93c5d34d007c3", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "11ebb411b138d2acdd481a6920b822fbc213cdc0", "0563ad22510edae664f9c04386ad91ec57eb7786", "4cead48e2eac91560105871b78268e3164eb382b", "0332013fc380ca283d3afc457c430c513d19cc51", "15a1e04faa63dae8e8cef5d8cb1478ea2655d124", "32e683a70c491d4a44cae0d1a3b7d66513e62019", "00d40e74ccffef2ba3e4477f48b6265dbc5e9c1f", "fa2a489c0f5d5cdc3e3792cc2883d858edea3a9f", "4f2c94a8d689863859ac849ebb83823770cf3d6a", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "2892b18872a42fbc6173e76263ac3e9251e2a334", "47ccfd0c9dc218f5496783310a28c581730b9ca7", "fe20c9537ea0be2f2a79388a53e1f2fb4b7fe771", "4452a2f4f4fd1df19777f4b0ff482403f7b5091e" ], "paperAbstract": "Modern CDNs cache and deliver a highly-diverse set of traffic classes, including web pages, images, videos and software downloads. It is economically advantageous for a CDN to cache and deliver all traffic classes using a shared distributed cache server infrastructure. However, such sharing of cache resources across multiple traffic classes poses significant cache provisioning challenges that are the focus of this paper.\n Managing a vast shared caching infrastructure requires careful modeling of user request sequences for each traffic class. Using extensive traces from Akamai's CDN, we show how each traffic class has drastically different object access patterns, object size distributions, and cache resource requirements. We introduce the notion of a footprint descriptor that is a succinct representation of the cache requirements of a request sequence. Leveraging novel connections to Fourier analysis, we develop a footprint descriptor calculus that allows us to predict the cache requirements when different traffic classes are added, subtracted and scaled to within a prediction error of 2.5%. We integrated our footprint calculus in the cache provisioning operations of the production CDN and show how it is used to solve key challenges in cache sizing, traffic mixing, and cache partitioning.", "pdfUrls": [ "http://adityasundarrajan.github.io/FD-conext-2017.pdf", "http://doi.acm.org/10.1145/3143361.3143368" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b15ecfa5e4dda290d58d238690f7cd64b1f25fe", "sources": [ "DBLP" ], "title": "Footprint Descriptors: Theory and Practice of Cache Provisioning in a Global CDN", "venue": "CoNEXT", "year": 2017 }, "3b1654018723547187476181f583092989793314": { "authors": [ { "ids": [ "2423797" ], "name": "Panagiotis Papadopoulos" }, { "ids": [ "1801086" ], "name": "Pablo Rodriguez" }, { "ids": [ "1946641" ], "name": "Nicolas Kourtellis" }, { "ids": [ "1683137" ], "name": "Nikolaos Laoutaris" } ], "doi": "10.1145/3131365.3131397", "doiUrl": "https://doi.org/10.1145/3131365.3131397", "entities": [ "Browser extension", "Browsing", "Ecosystem", "Encryption", "Ground truth", "Online advertising", "Plaintext", "Real-time bidding", "Server (computing)" ], "id": "3b1654018723547187476181f583092989793314", "inCitations": [ "0e17db9ac9c888cff6a76472256dccb4ec6e4072" ], "journalName": "", "journalPages": "142-156", "journalVolume": "", "outCitations": [ "5b8d44acc28ee24e5efb6337f78002b212383b30", "0ccd81d4cb36a23c817a5df196cb7ef42bf623b3", "6aeeca521405d9c7da0620e366b5ea585616d06a", "a8ad983b9ce6ec697029600ab076a613f1c0398f", "5a032460c589a67e7c73b19c93aa591331758139", "2e7abf1e48588dd07f6aaba750b83ec8fbbb70be", "9a3c791067911d17a79918b1b0b5826beaeb2fe1", "fbb99bae6880af32757b1e7074b4d3dbc70bc8ec", "3168dcbdce9ba0f8076133da4ffa6e6732484a4a", "ae51dd320b0699ad0774ffd0f9bc9f8357934ab6", "108b8f5d1835030823787a054a3b7ba7bba4308b", "6231f260835942769ff90505724eb97f99796a0b", "05ad6c3ab7a0b1ab0c4fc3af9f1622cf6c0fa40e", "834cdbde6e7800f9f50d4884858bb093fc3b65f6", "5776373e3e6c3a619252e2c132b5a4414d6c2271", "47ba7e608477f169f6805af1221a7534ceadadd3", "c75dfb6d16d58a6f61a07ae5b0682b47c9724e37", "36a6dd85cbf5d1e2eec1b7f2f77cfcb22de40a24", "4cf4ea6c801d3dad696464b198e7c51f1a77b302", "1e2a5126486820abea0cdaccf996c975b9103443", "1265cc97d5e9da0ec8f2bee63ce3c7807fdc166c", "0d2f693901fba451ede4d388724b0e3f57029cd3", "c72d3dcfee09798f83f3d9aa8e7926662a9df8f4", "a13093bc1c6f82f730833a2a1989e9304e868253", "0d25768f709b3455e0e6c1d526cc41433b86f7f5", "01dbc5466cce6abd567cc5b34a481f5c438fb15a", "cb8e2e279a13f0c81861cd726dbd0c7a4dbf97e8" ], "paperAbstract": "Online advertising is progressively moving towards a programmatic model in which ads are matched to actual interests of individuals collected as they browse the web. Letting the huge debate around privacy aside, a very important question in this area, for which little is known, is: How much do advertisers pay to reach an individual?\n In this study, we develop a first of its kind methodology for computing exactly that - the price paid for a web user by the ad ecosystem - and we do that in real time. Our approach is based on tapping on the Real Time Bidding (RTB) protocol to collect cleartext and encrypted prices for winning bids paid by advertisers in order to place targeted ads. Our main technical contribution is a method for tallying winning bids even when they are encrypted. We achieve this by training a model using as ground truth prices obtained by running our own \"probe\" ad-campaigns. We design our methodology through a browser extension and a back-end server that provides it with fresh models for encrypted bids. We validate our methodology using a one year long trace of 1600 mobile users and demonstrate that it can estimate a user's advertising worth with more than 82% accuracy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131397", "https://arxiv.org/pdf/1701.07058v3.pdf", "https://conferences.sigcomm.org/imc/2017/slides/rtbPaper.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final193.pdf", "https://arxiv.org/pdf/1701.07058v2.pdf", "https://arxiv.org/pdf/1701.07058v1.pdf", "http://www.ics.forth.gr/_publications/imc17-panpap.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b1654018723547187476181f583092989793314", "sources": [ "DBLP" ], "title": "If you are not paying for it, you are the product: how much do advertisers pay to reach you?", "venue": "IMC", "year": 2017 }, "3b25e9113a7efef353261afa746dc40b371079d4": { "authors": [ { "ids": [ "1841992" ], "name": "Zolt\u00e1n \u00c1d\u00e1m Mann" }, { "ids": [ "1909926" ], "name": "Andreas Metzger" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Component-based software engineering", "Information privacy", "Malware", "Multitenancy", "Open-source hardware", "Personally identifiable information", "Software deployment", "Virtual machine" ], "id": "3b25e9113a7efef353261afa746dc40b371079d4", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "609-618", "journalVolume": "", "outCitations": [ "0d867d68e872ec10b60dc2515a670ba6db873a07", "57f8f89ca601a55fa65faa6a268bb6547af8d12b", "6d1ec493efd37a78292dc90f15933f29dc722025", "d9e16bbf2d491612c3fd52e22270af73908a730d", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "fcbde3f4ea02b812d77d6370c61d41bc413d40d0", "e913c9bcf3bf094c1ac260da39156c473351c4bc", "cc5b6bcb58e6ef1a6fc3d08dd39681ec8d9c5897", "a04754b06128be7dd27ff38bbaf9de5335bf0f5a", "a4e460da5b7460c8b32f4f7fe9811dfc65412f4e", "7f636035e48069345bef69d5a3e9b1dacdb54e81", "60084409959d518e0f0a2f0b89d3afeb93f3eb67", "3d8b53581886c5b25ff5811bea97eeb0b9fba5d1", "56720cac668ccbe173146f3a16798f714b07ef26", "061e1a06bbd86c1a20d22476e7be63083dd76fff", "587458690ae9c34aabbfb48a74e2d2ece9a13bac", "082d69dd527bc3dd20ba7c6df9900607acebcfa7", "61c36c2b51310846f212f806c849726d10fa4a84", "2ddd179040a880aa059c1f02d6f49af776e86e69", "66df27e43c2313e15301e2168e8a3ad3f905c11a", "46d63d1b3ea2cab49a863b56b97f263f75c956f3" ], "paperAbstract": "Concerns about protecting personal data and intellectual property are major obstacles to the adoption of cloud services. To ensure that a cloud tenant's data cannot be accessed by malicious code from another tenant, critical software components of different tenants are traditionally deployed on separate physical machines. However, such physical separation limits hardware utilization, leading to cost overheads due to inefficient resource usage. Secure hardware enclaves offer mechanisms to protect code and data from potentially malicious code deployed on the same physical machine, thereby offering an alternative to physical separation. We show how secure hardware enclaves can be employed to address data protection concerns of cloud tenants, while optimizing hardware utilization. We provide a model, formalization and experimental evaluation of an efficient algorithmic approach to compute an optimized deployment of software components and virtual machines, taking into account data protection concerns and the availability of secure hardware enclaves. Our experimental results suggest that even if only a small percentage of the physical machines offer secure hardware enclaves, significant cost savings can be achieved.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101194" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b25e9113a7efef353261afa746dc40b371079d4", "sources": [ "DBLP" ], "title": "Optimized Cloud Deployment of Multi-tenant Software Considering Data Protection Concerns", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "3b29957c1624402e2a6fc5754f7c4ae9a0692fe5": { "authors": [ { "ids": [ "17816610" ], "name": "Piyus Kedia" }, { "ids": [ "38731436" ], "name": "Manuel Costa" }, { "ids": [ "12353823" ], "name": "Matthew Parkinson" }, { "ids": [ "1796965" ], "name": "Kapil Vaswani" }, { "ids": [ "1757457" ], "name": "Dimitrios Vytiniotis" }, { "ids": [ "40373059" ], "name": "Aaron Blankstein" } ], "doi": "10.1145/3062341.3062376", "doiUrl": "https://doi.org/10.1145/3062341.3062376", "entities": [ "Compiler", "Garbage collection (computer science)", "Imperative programming", "Java", "Manual memory management", "Memory management", "Memory safety", "Pointer (computer programming)", "Programming language", "Programming model", "Run time (program lifecycle phase)", "Software bug", "Type safety", "Working set" ], "id": "3b29957c1624402e2a6fc5754f7c4ae9a0692fe5", "inCitations": [ "84b7a2fb52a618f0129ac324406fa5ab121cb168" ], "journalName": "", "journalPages": "233-247", "journalVolume": "", "outCitations": [ "8af43ca7e7d2748f1eaffa854a4bb8a5ed75c178", "1e2ec62af1100ec0c5da0416e1a8489b63044e03", "3829df26d4ce686251b9b5030893febd75162539", "4e0ee850f7e8323fbb0fbb3591c671926cf22f4d", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "1c8378e621cec5ecd94974efaf305275b8e4186b", "d3d55c6471fc151457f13ebf7600582d292aff4b", "13abd63c5f3be0381fa790cecae0b5d258c9d3ca", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "0ac1b0ff1cdb2ee84d5634226950c7ceed1ec8c1", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "05cfd07bb56ad7c8077e8aad76cfa1d4e75b0d5e", "0608d9937c074520cdc93cc444cc1c77039c5332", "4256339f61d809e5092b68a505f7d37099cbd341", "15d144f09b05af8e0d9076d401893f4a846ef9d5", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "21ca94f33fde816ac4f5e69cf9a35bd5e246bcf3", "28c3b2e9cd7bead2f908871f3f5f6f9a5d914c27", "41639da89938f4d8a160bd7a2f3d890e51a18bf1", "44b11f4ecab634a3283f55929a9c2ed30513ae2d", "43fb7b102ea54ce51b6fcd42005698ae1399e25e", "30345844de1c6969537ae2ac180ac0743b081e7f", "14d3104c58ad60e02c3ab9d9433093fe5f21d00c", "1591e9cf9c5d5fa42e7b5e48bd76f43a0a6e8f0b", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "2813b5741442ca4910e456576dbbd48bc2cb58e6", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "426d1336511155446b638684f92a28f61ef06d1c", "2815e28b533f8770a3ef84618e189c325c2ece61", "77539d44b73f3e84bca585d309129e9a1a39bd2a", "228c64750a11823a712f7414711e3b073b861c28", "1665a8b2967730fb32b985bd8a38b43ff74550e8", "dbce3d345f3c43c51d8cc71c17d073e716a4d07c", "9117c75f62162b0bcf8e1ab91b7e25e0acc919a8", "0c36460e328643f98c09c7175608f51f1df5ccdf", "2ce7803031fcba0fa5a397e85039e762cee59f83", "d6a9a192352c78215127c09edf18e7a329e50942", "269d84d3d48dfb90cd230d2bcf027973f4562052" ], "paperAbstract": "Safe programming languages are readily available, but many applications continue to be written in unsafe languages because of efficiency. As a consequence, many applications continue to have exploitable memory safety bugs. Since garbage collection is a major source of inefficiency in the implementation of safe languages, replacing it with safe manual memory management would be an important step towards solving this problem. \n Previous approaches to safe manual memory management use programming models based on regions, unique pointers, borrowing of references, and ownership types. We propose a much simpler programming model that does not require any of these concepts. Starting from the design of an imperative type safe language (like Java or C#), we just add a delete operator to free memory explicitly and an exception which is thrown if the program dereferences a pointer to freed memory. We propose an efficient implementation of this programming model that guarantees type safety. Experimental results from our implementation based on the C# native compiler show that this design achieves up to 3x reduction in peak working set and run time.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062376", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/kedia2017mem.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b29957c1624402e2a6fc5754f7c4ae9a0692fe5", "sources": [ "DBLP" ], "title": "Simple, fast, and safe manual memory management", "venue": "PLDI", "year": 2017 }, "3b2ca54fc3f6722a5709f6fa3c22e6c96850cb53": { "authors": [ { "ids": [ "34854419" ], "name": "Ian Fox" }, { "ids": [ "9700172" ], "name": "Lynn Ang" }, { "ids": [ "5346009" ], "name": "Mamta Jaiswal" }, { "ids": [ "3417103" ], "name": "Rodica Pop-Busui" }, { "ids": [ "38556322" ], "name": "Jenna Wiens" } ], "doi": "10.1145/3097983.3098068", "doiUrl": "https://doi.org/10.1145/3097983.3098068", "entities": [ "Contextual inquiry", "Motif", "NSA product types", "Sequence motif", "Simulation", "Waveform" ], "id": "3b2ca54fc3f6722a5709f6fa3c22e6c96850cb53", "inCitations": [], "journalName": "", "journalPages": "155-164", "journalVolume": "", "outCitations": [ "23026fd456fdd44f116a42bb0dbacfd48c303217", "8d82a4eab721eebb8ed5ed1b5b9b4e613471c84d", "0c469e044f597854d458c4bb21e4054859ce7d21", "157bf3d0c1aa3812967473b6eca0185bcddbdde2", "0e22016845f8fc8c1e29b9a2964788fb940ebc8a", "0a56aa7c56c3d2adbf4184d0cd98687cb0bc6b8f", "0bfdd7fb60bb4959ac38fadca7dcfbf549dd5456", "831ac3ce82b3f742df99135d41cdd1864ee75012", "2636fd7eeb50df17c2d9c955af65954222a541d2", "bbd6a4c91cb4aef877d0895c780c6acdbf085c81", "03c9e963e0896040c96e32057a7bd97eafc3c595", "301d01e74a2d8b5644e73578b04e99fdd76c78d5", "f7c3c44fc2cf0410c9cbb024a765587e8e2d1596", "6bd284db683537ed8b1550999767c4bd31a2037c", "0f7385035524cf74244d274401246c9aecaa90d6", "5fe6f62267f8cba97217820bdf30f319f4dc540a", "244e27bf8c5a5277a733c5271fdeb306f98115d5", "2a51b4fbea138a3a63836715624f2a71d25e25df", "33e79061345188ee7e6a92597023e376e455720f", "ab6d38a9b0e33fea0e4be64c6e0da872c3e412f0", "4b8ba5355635373c624e8efb69e61519123cda69", "0a3ff98cb320527c89a293866309a709b3a6e3d0", "b6eaebe1e2737751ebee1fbe9c91d4c2052edb6a", "2e61fc82bcbdeaa0f8778d51c166e904c04ed34e", "d6932169084b763f16cad7072aabced6118c59f3", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "9443918183e41b72ae581a3db5dc431a984081ad", "1109b7db7e85e6d9030aef74041501c8b9cca0c7", "b50399c29d0ad68bd88cb72db7c996073051edae", "0204b90797eeb00e63561e7decd4b60606b5be1d", "a9efa2d463a9b80a46648b1fc7efab8f7451a45f", "8ec7fde00942c203590cf191dcf61bc0dd03111b", "a433272a07d6840bb49bf19c313f67c0bca11e69", "013214ee2867d02385cef97d87c71f4d1bc78fdd" ], "paperAbstract": "Motifs are a powerful tool for analyzing physiological waveform data. Standard motif methods, however, ignore important contextual information (e.g., what the patient was doing at the time the data were collected). We hypothesize that these additional contextual data could increase the utility of motifs. Thus, we propose an extension to motifs, contextual motifs, that incorporates context. Recognizing that, oftentimes, context may be unobserved or unavailable, we focus on methods to jointly infer motifs and context. Applied to both simulated and real physiological data, our proposed approach improves upon existing motif methods in terms of the discriminative utility of the discovered motifs. In particular, we discovered contextual motifs in continuous glucose monitor (CGM) data collected from patients with type 1 diabetes. Compared to their contextless counterparts, these contextual motifs led to better predictions of hypo- and hyperglycemic events. Our results suggest that even when inferred, context is useful in both a long- and short-term prediction horizon when processing and interpreting physiological waveform data.", "pdfUrls": [ "http://arxiv.org/abs/1703.02144", "http://doi.acm.org/10.1145/3097983.3098068" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b2ca54fc3f6722a5709f6fa3c22e6c96850cb53", "sources": [ "DBLP" ], "title": "Contextual Motifs: Increasing the Utility of Motifs using Contextual Data", "venue": "KDD", "year": 2017 }, "3b38eff9030f362bc0337501f562187479dbb0dd": { "authors": [ { "ids": [ "1769447" ], "name": "Kai Lu" }, { "ids": [ "2128414" ], "name": "Wenzhe Zhang" }, { "ids": [ "1683712" ], "name": "Xiaoping Wang" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" }, { "ids": [ "2476643" ], "name": "Andy Nisbet" } ], "doi": "10.1145/3050748.3050751", "doiUrl": "https://doi.org/10.1145/3050748.3050751", "entities": [ "Application checkpointing", "Garbage collection (computer science)", "Location-based service", "Memory management", "Memory management unit", "Memory protection", "Page (computer memory)", "Program analysis", "Virtual machine" ], "id": "3b38eff9030f362bc0337501f562187479dbb0dd", "inCitations": [], "journalName": "", "journalPages": "201-213", "journalVolume": "", "outCitations": [ "16a9c88fad400497635ce4736679abb3c48c84d2", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "2194c3460ab71f3826db00b045b2ae590c753319", "624168bb99821e7c9fef722c1758ceda42eba33f", "04a953ba760845232c0f3c6e4dc3ca7b1fb8da4e", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "fcae2fcef595059529ebe553431ab41b44062ae4", "27650fee2ed40f0e6c214ca112bffd9164b2e0b5", "e67d7810e9f7baad696e1e5be0c9f1dde39a178d", "f6715c2d9d8a76a20f4b857f7377ce63a23f0654", "1591e9cf9c5d5fa42e7b5e48bd76f43a0a6e8f0b", "03e93625d185c0ac144c97fdf269b5ae5f38351e", "2a974da13d6f956e37549378e00f86aa54bc5642", "ae041f8e6228f0ccd8b01ffdeba150e63635c2c4", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "0c0ff71e1f225312bd24a2d78153f0b3f3816285", "2c74aeec68efd07d908d4f421a5d4afe8426a18c", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "3574657705475722b6c398c266805f758268778b", "0653e2ed9f683868cb4539eb8718551242834f6b", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "40cb40b7812e019c1051e3a457a8643400b81d51" ], "paperAbstract": "Page protection is often used to achieve memory access monitoring in many applications, dealing with program-analysis, checkpoint-based failure recovery, and garbage collection in managed runtime systems. Typically, low overhead access monitoring is limited by the relatively large page-level granularity of memory management unit hardware support for virtual memory protection. In this paper, we improve upon traditional page-level mechanisms by additionally using hardware support for virtualization in order to achieve fine and flexible granularities that can be smaller than a page. We first introduce a memory allocator based on page protection that can achieve fine-grained monitoring. Second, we explain how virtualization hardware support can be used to achieve dynamic adjustment of the monitoring granularity. In all, we propose a process-level virtual machine to achieve dynamic and fine-grained monitoring. Any application can run on our process-level virtual machine without modification. Experimental results for an incremental checkpoint tool provide a use-case to demonstrate our work. Comparing with traditional page-based checkpoint, our work can effectively reduce the amount of checkpoint data and improve performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050751", "https://www.research.manchester.ac.uk/portal/files/56101450/vee2017_checkpointing.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b38eff9030f362bc0337501f562187479dbb0dd", "sources": [ "DBLP" ], "title": "Flexible Page-level Memory Access Monitoring Based on Virtualization Hardware", "venue": "VEE", "year": 2017 }, "3b506ead7aff20082a5cd55330d12f92f4296dc8": { "authors": [ { "ids": [ "1696196" ], "name": "Pamela Zave" }, { "ids": [ "40238585" ], "name": "Ronaldo A. Ferreira" }, { "ids": [ "31940424" ], "name": "Xuan Kelvin Zou" }, { "ids": [ "9414797" ], "name": "Masaharu Morimoto" }, { "ids": [ "1730356" ], "name": "Jennifer Rexford" } ], "doi": "10.1145/3098822.3098827", "doiUrl": "https://doi.org/10.1145/3098822.3098827", "entities": [ "Bitstream", "Byte", "Correctness (computer science)", "Distributed firewall", "Electron mobility", "Experiment", "Formal verification", "Linux", "Load balancing (computing)", "Middlebox", "Multihoming", "Network security", "Reliability engineering", "Routing", "Scalability", "Single point of failure", "TRAVERSE" ], "id": "3b506ead7aff20082a5cd55330d12f92f4296dc8", "inCitations": [ "c770031f3067aca38a19af1428e68eb907120c72", "83a31c52bed8d3845201acb7a5b4603212b9e8b6", "728cb61e78d55bc5039ed78920b20259a1135cb3" ], "journalName": "", "journalPages": "57-70", "journalVolume": "", "outCitations": [ "29ce5b365786c16ebba8e01a6d67d6fd3145063c", "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "02c76f7d61f1ff47609a19f46aec3e6d0c8a9425", "217b49b27c7137a4d7eb91151e0c6ee65e68d383", "382501ee3b61ba6ac7ea9c7662a5a5015ea86b14", "67e6d5e7478c4a7c44f4755f1a0ce160a768f94f", "3369350838fbf5dd53265da784eacf721924bb2d", "a7155ce5f89804bd12a4deea185e34d68257224b", "08ddde0eaf4925704222135788f79fe293c5894d", "36bb67d8fba0c85f2495449a9926018827368df5", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "8006e5208727cae4e6cda7cd8ce42049f4455c74", "3c3da76635e8a4a3fca5fbcc26aaaa0ca7ae3908", "24e13c33e8ac68f6eae9784052e8e1ee70feff98", "692c7931f52fb367930c59fb6ee51b3be30539ab", "05cd49dca40332e85ed5f2d4cb8bde7d5970519c", "207d4c8807382125f20b81f7d657b40c78332dd7", "263e540ae0f7eea6d0b480da9f1cd9ec5227551b", "156fa936f4c46972245c0720e30b11593e934574", "36f16de41470808ada912399f9776018db6b9d71", "0e410a7baeae7f1c8676a6c72898650d1f144ba5", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "8f6e58b4bc6cf450984ed19c5560eea3b1c7cd89", "7ed8dd92f4a174b630836700cf12d0adebd5c708", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "4d03b65744ce64166457436b24d6db23d3c3e493" ], "paperAbstract": "Middleboxes are crucial for improving network security and performance, but only if the right traffic goes through the right middleboxes at the right time. Existing traffic-steering techniques rely on a central controller to install fine-grained forwarding rules in network elements---at the expense of a large number of rules, a central point of failure, challenges in ensuring all packets of a session traverse the same middleboxes, and difficulties with middleboxes that modify the \"five tuple.\" We argue that a session-level protocol is a fundamentally better approach to traffic steering, while naturally supporting host mobility and multihoming in an integrated fashion. In addition, a session-level protocol can enable new capabilities like dynamic service chaining, where the sequence of middleboxes can change during the life of a session, e.g., to remove a load-balancer that is no longer needed, replace a middlebox undergoing maintenance, or add a packet scrubber when traffic looks suspicious. Our Dysco protocol steers the packets of a TCP session through a service chain, and can dynamically reconfigure the chain for an ongoing session. Dysco requires no changes to end-host and middlebox applications, host TCP stacks, or IP routing. Dysco's distributed reconfiguration protocol handles the removal of proxies that terminate TCP connections, middleboxes that change the size of a byte stream, and concurrent requests to reconfigure different parts of a chain. Through formal verification using Spin and experiments with our Linux-based prototype, we show that Dysco is provably correct, highly scalable, and able to reconfigure service chains across a range of middleboxes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098827", "http://www.cs.princeton.edu/courses/archive/spring17/cos598D/DynamicServiceChaining17.pdf", "http://www.cs.princeton.edu/~jrex/papers/dysco17.pdf", "https://www.cs.princeton.edu/~jrex/papers/dysco17.pdf", "http://www2.research.att.com/content/dam/sites/labs_research/content/publications/SDN_Dynamic_service_chaining_with_Dysco.pdf", "http://web2.research.att.com/export/sites/att_labs/techdocs/TD_101657.pdf", "https://www.cs.princeton.edu/courses/archive/spring17/cos598D/DynamicServiceChaining17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b506ead7aff20082a5cd55330d12f92f4296dc8", "sources": [ "DBLP" ], "title": "Dynamic Service Chaining with Dysco", "venue": "SIGCOMM", "year": 2017 }, "3b56cea1f5d57851cdc9371ca35bb6d426c3c22b": { "authors": [ { "ids": [ "39466169" ], "name": "Jens M\u00fcller" }, { "ids": [ "31913263" ], "name": "Vladislav Mladenov" }, { "ids": [ "1800162" ], "name": "Juraj Somorovsky" }, { "ids": [ "1736206" ], "name": "J\u00f6rg Schwenk" } ], "doi": "10.1109/SP.2017.47", "doiUrl": "https://doi.org/10.1109/SP.2017.47", "entities": [ "Complex network", "Confidentiality", "Denial-of-service attack", "Document processing", "Google Cloud Print", "Internet", "Jumpstart Our Business Startups Act", "Network Computer", "Open-source software", "Paperless office", "Printer (computing)", "Printing", "Real Time Kinematic" ], "id": "3b56cea1f5d57851cdc9371ca35bb6d426c3c22b", "inCitations": [ "7c75557b78c56b577caca823b33a96588eaa373d", "12d02f92a0b55fa0445acd496b94f4e6f906bedb" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "213-230", "journalVolume": "", "outCitations": [ "619fb5c5073849f84f59248c1b2e85b10a60c85d", "14ccb8ec1551e7f5c69a8415fa902e01a6d1656d", "78a42a4860a53df3f4508b670fcc0d0c39f2cf77", "0054871fa317a7acc13e830ab6bea9af1f27a776", "55b97032a03aeaca9fd3fdcb87baa789a1f968b6", "da232a9ef2494cfd68a6c273a54e078fc667d597", "494d24a72be788a91a6ca45373c2d33013f33395", "2530df7a3d864c532d9795d848fded731c239af5", "451fc36586fc26c383d22aa77be6baef75c9a2dc", "3e691f1ff41d034732a368cd5391866a9678b59e", "a8a73b74d24249d5d8c90dd8250a7bab34442d9f", "b630c1347972123040c075bbc384fc0f959ccc96", "c93057aec13d8b1557ef1f6f68331cd3aef280c8", "40de144aee6e84d8638684133d0e48cce8ed67c9", "2244e275db4c92056b37058a12b9f3df769baa59", "03f4cd1da04ae4fe9aacb9e01e3b350eb7de70e4" ], "paperAbstract": "The idea of a paperless office has been dreamed of for more than three decades. However, nowadays printers are still one of the most essential devices for daily work and common Internet users. Instead of removing them, printers evolved from simple devices into complex network computer systems, installed directly into company networks, and carrying considerable confidential data in their print jobs. This makes them to an attractive attack target. In this paper we conduct a large scale analysis of printer attacks and systematize our knowledge by providing a general methodology for security analyses of printers. Based on our methodology, we implemented an open-source tool called PRinter Exploitation Toolkit (PRET). We used PRET to evaluate 20 printer models from different vendors and found all of them to be vulnerable to at least one of the tested attacks. These attacks included, for example, simple DoS attacks or skilled attacks, extracting print jobs and system files. On top of our systematic analysis we reveal novel insights that enable attacks from the Internet by using advanced cross-site printing techniques, combined with printer CORS spoofing. Finally, we show how to apply our attacks to systems beyond typical printers like Google Cloud Print or document processing websites.", "pdfUrls": [ "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/printer.pdf", "https://www.ieee-security.org/TC/SP2017/papers/64.pdf", "https://doi.org/10.1109/SP.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b56cea1f5d57851cdc9371ca35bb6d426c3c22b", "sources": [ "DBLP" ], "title": "SoK: Exploiting Network Printers", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "3b59a3d653fe3ed2892be57cbf89ce4258e4e209": { "authors": [ { "ids": [ "2981838" ], "name": "Toke H\u00f8iland-J\u00f8rgensen" }, { "ids": [ "9946500" ], "name": "Michal Kazior" }, { "ids": [ "9927524" ], "name": "Dave T\u00e4ht" }, { "ids": [ "3328244" ], "name": "Per Hurtig" }, { "ids": [ "1691426" ], "name": "Anna Brunstrom" } ], "doi": "", "doiUrl": "", "entities": [ "Airtime", "Anomaly detection", "Bufferbloat", "Computer simulation", "Downstream (software development)", "Experiment", "Fairness measure", "Linux", "Linux", "Multiseat configuration", "Network congestion", "Scheduling (computing)", "Software bug", "Testbed", "Throughput", "Wireless access point" ], "id": "3b59a3d653fe3ed2892be57cbf89ce4258e4e209", "inCitations": [ "07a6c453adc7a5844180a10d111f1cc3ebfc7717", "8981ce81580c2de0839e8aba09847ed28f79f073" ], "journalName": "", "journalPages": "139-151", "journalVolume": "", "outCitations": [ "26e7211f6a6ded6220c342751228f6817261d900", "1045aca116f8830e364147de75285e86f9a24474", "cc2bcd6c39ea690bd1fb674c69a3e5f7a7a8e723", "9e10b3969033b183823ce252592a96ca7117b96a", "4c411a54df8df956b5cc24aedbbb6734bca74128", "926921387749a5cf7bcf57967627d47ca9c68292", "6797f718cfb0f93f0fa09cad8618788e5d1158c5", "149f62ea9cc01dc88bb10532707db49db10f37b9", "ce3c286e790cedb567344ea09df3189300fc93a2", "608dcee6e91a0c436f9d2b2fdf3fbb3ebcb6d5a4", "2ea8133112b9ed0f6f5d50c70ea5dbbaf2f13b6d", "4095ea1cb2a7925cc2a162add710c553f8a666e1", "e77ca99060b0baa43b23a30fc2a3b6fdbd642d8f", "54f3927de33092a4b22e8689fb1faeb29215c8d4", "4136cfe4a5389480dbaa6bb2a3a23ce00c2d064d", "6935e531624237152c9d413fc6d3ca2497139e0c", "5bc87181b6b4227fd3e60b39929e0030ee8665db", "531957a3e9e47f1993e99bab2391cd828393e2d2", "24b9c3cd060acab854791bedec6f2ac15ba6105f", "be2dd7e6fe602b832f1b8e67476b41bb9239d52e", "976599ba02a2e9de4129f16a11c53ee919abc21b", "7129b305ce45f83127e928e8510da9fae0783905", "ed47e43073b85c27d82d1c286b89de3becedf6fe" ], "paperAbstract": "With more devices connected, delays and jitter at theWiFi hop become more prevalent, and correct functioning during network congestion becomes more important. However, two important performance issues prevent modern WiFi from reaching its potential: increased latency under load caused by excessive queueing (i.e. bufferbloat) and the 802.11 performance anomaly. To remedy these issues, we present a novel two-part solution. We design a new queueing scheme that eliminates bufferbloat in the wireless setting. Leveraging this queueing scheme, we then design an airtime fairness scheduler that operates at the access point and doesn\u2019t require any changes to clients. We evaluate our solution using both a theoretical model and experiments in a testbed environment, formulating a suitable analytical model in the process. We show that our solution achieves an order of magnitude reduction in latency under load, large improvements in multi-station throughput, and nearly perfect airtime fairness for both TCP and downstream UDP traffic. Further experiments with application traffic confirm that the solution provides significant performance gains for real-world traffic.We develop a production quality implementation of our solution in the Linux kernel, the platform powering most access points outside of the managed enterprise setting. The implementation has been accepted into the mainline kernel distribution, making it available for deployment on billions of devices running Linux today.", "pdfUrls": [ "https://arxiv.org/pdf/1703.00064v2.pdf", "https://arxiv.org/pdf/1703.00064v1.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/hoilan-jorgesen", "https://www.usenix.org/system/files/conference/atc17/atc17-hoiland-jorgensen.pdf", "http://arxiv.org/abs/1703.00064" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8c1b/f8ef68c11bd23b62d16a08d70fabf9289dff.pdf", "s2Url": "https://semanticscholar.org/paper/3b59a3d653fe3ed2892be57cbf89ce4258e4e209", "sources": [ "DBLP" ], "title": "Ending the Anomaly: Achieving Low Latency and Airtime Fairness in WiFi", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "3b5e08fa4554a27220b5abd41ca9ddf891960b12": { "authors": [ { "ids": [ "32967785" ], "name": "Liang Yuan" }, { "ids": [ "2646526" ], "name": "Yunquan Zhang" }, { "ids": [ "3836055" ], "name": "Peng Guo" }, { "ids": [ "1743257" ], "name": "Shan Huang" } ], "doi": "10.1145/3126908.3126920", "doiUrl": "https://doi.org/10.1145/3126908.3126920", "entities": [ "Computation", "Iteration", "Locality of reference", "Parallel computing", "Tiling window manager" ], "id": "3b5e08fa4554a27220b5abd41ca9ddf891960b12", "inCitations": [], "journalName": "", "journalPages": "49:1-49:13", "journalVolume": "", "outCitations": [ "0e12eb94aab5d64d08baacf0df36a4b7ed054c46", "318def48f4414636555d44f52da8c0bab16a46bc", "53aa3fe2ea1237a7f5f305ed1c2d6f5a1915bda4", "6963b27339daff5ac27ed5552aac5a5df19b9764", "1c067c63ae74483b7f7aa8af395c8bd6111e6366", "fe7bd2137955540edc81e84c5051ae32daf1703d", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "847a43d0d0748478efc233b16a8c17ef600cd3a7", "f8afcda83fc23a7f75a1b0269fb458ee0182b621", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "524da50949c3954ca9353da49256d2414e35ce20", "06b25d569ee60fc4f973a468b091e1b0c0dc8cde", "363859c57d086301b2ea8385e4adf9cfbeb1ea50", "f4dff66ba8f2338d118f379f2eff1410feb57ce6", "6e2c8ec9ff4e3761e18cbc252acc8a95a137215b", "23a5e9c8feb5b28d063d719880faaa56fb32b923", "09b1520aea25ff0b5852d8a777e48eacf5300fac", "4c233ebba1bfa87248d1f4f9d4d010c880f1d5b6", "67064341d0d6b12f3cacbb664d50d6f01abe17d7", "17056314e26434c4e71cf8f30da8926bb858651f", "def34f422d6930bd23d5c58de78be98804e44e97", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "b8e7a9c68d568e62c9ca99375949747b0271ea3c", "a21f19614db362d7c462541022dd2b590742f9d2", "78ee4f6b1eb5405d81cfb45e777c9f909f34008b", "618fa1c14e3c6008eb047002311d21cae412eebc", "2be29021bc9b0fc40f5bfd0ee32fb22861c8f74b", "5b3a623a001d77839964f41a2c20000d2699ba5f", "1d271d3c0f845ebfc54c5046a3f1b54647e8443e", "f30a6f55e4dca3d749de0d02a67aae5a646d1a33", "768141fc4da96c71e04103c6ccdd52169724ecbd", "782d8591afd432a9b2bfe21553a4158a39cb9d1f", "57f635f67fa7a1f742bb1c1f1da3e400c954440f", "337a000aa5891901ee45d19c346c903521eaba0a", "8dc2184214ee39b31e2c0d623842b66c0141984b", "0ebb8ef3ef660ea8484202e74e2e3df7b3c59cc6", "0eb81ce51dcaafe69c3d2d533d1c0cf20ac59d6d", "1bec73cbc0ffb8eb32d6da63895f5319b7409386", "e183b8626cc7ce149237c28821a517a0a31cd241", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "1deeb53a514d9a54ad690626c5199bb0d117f9a2", "5c1697ae6a204c1660885107c7513ffe976b62cd", "1426b32f40126a0a906121984918ff5fbcb0b4b3", "c688010fdf80996d6f838d908d117f1c7eed8542", "1614eee9fa973d2b9554f37e9ead09ff6dc7542c", "13d3eb5213678ffbbb4773e57754ad94c4ee61b2", "cd8460b716ded1bf937bb78cb29befdfad64faa5", "7c2beb7b8770b40caf55dc10437023e60a8fed35", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "8d2fb424ccd5ae011dd444ac4fa8282bad9e76ab", "101b0f26c6720eb64f82334a7a958ebcc4257424", "dae60807ef1e6fd61a2362c8187b733b08121e1e", "a695fdd90865835234ac5e062c1d1a6ae1f34632", "a26ea27bd62a9257eab5b7448642e971ddc8b1a3", "3087eeb39c88b1fc9bdc72812930451fc98cedec", "ae14fad252dfe0328caf7084c4ac2abf33eddee1", "518cd72a5f12f050492b246ad300a46de7604af2", "3c31999730ef19007df71909f1ae5223825e0ec9", "19e6866714631cf6104d2ba6e72ff8e7a074df51", "4ac67eae41d99eb06c234f2f4997d8a8fb8650b0", "1322fd55045d22849bbc879193af44791e28e510", "998e6eb3d90327c38fdc7f680c75137e4976c679" ], "paperAbstract": "Stencil computations represent a very common class of nested loops in scientific and engineering applications. The exhaustively studied tiling is one of the most powerful transformation techniques to explore the data locality and parallelism. Unlike previous work, which mostly blocks the iteration space of a stencil directly, this paper proposes a novel two-level tessellation scheme. A set of blocks are designed to tessellate the spatial space in various ways. The blocks can be processed in parallel without redundant computation. This corresponds to extending them along the time dimension and can form a tessellation of the iteration space. Experimental results show that our code performs up to 12% better than the existing highly concurrent schemes for the 3d27p stencil.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126920" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3b5e08fa4554a27220b5abd41ca9ddf891960b12", "sources": [ "DBLP" ], "title": "Tessellating stencils", "venue": "SC", "year": 2017 }, "3b9d74f36dcbac12ab912b35aac0d4dec9e9fe97": { "authors": [ { "ids": [ "1793096" ], "name": "Julia L. Lawall" }, { "ids": [ "20609305" ], "name": "Derek Palinski" }, { "ids": [ "17706847" ], "name": "Lukas Gnirke" }, { "ids": [ "1715488" ], "name": "Gilles Muller" } ], "doi": "", "doiUrl": "", "entities": [ "2.5D", "Cognitive dimensions of notations", "Compiler", "Device driver", "Error message", "Experiment", "GNU Compiler Collection", "Kernel (operating system)", "Linux", "Linux" ], "id": "3b9d74f36dcbac12ab912b35aac0d4dec9e9fe97", "inCitations": [], "journalName": "", "journalPages": "15-26", "journalVolume": "", "outCitations": [ "46dd6b8867a08bf8796963c937ccd3b09744f38e", "52c02abced0c0a02d25e876f1000db69b319c12f", "76e553eee1d907becc17864891baf09144d8a4d9", "310a8ae3de93b01b300bfa0825110b2752ca150f", "957ab6f57dd99224e0c0a1419605ff9464d38e9f", "661d7617dc27a55d26c1b0dc601b88ef896daafd", "a07ee0c6f7e3989c6c76e1bbeb090394c231a980", "28dff06106ff79bc2196c6602d204d916f9877b2", "8fc3684ea5fe6ef3c06f57746d23cdbcdffd30be", "9e93a9bac27dab3e0f1d4d2ab6e1038c524d192e", "3f7830e9373f7404d137e9ae0e3e11ee4f1ac94b", "a38eb22203553795b7841b183a5a2a724955aef9" ], "paperAbstract": "Porting Linux device drivers to target more recent and older Linux kernel versions to compensate for the everchanging kernel interface is a continual problem for Linux device driver developers. Acquiring information about interface changes is a necessary, but tedious and error prone, part of this task. In this paper, we propose two tools, Prequel and gcc-reduce, to help the developer collect the needed information. Prequel provides language support for querying git commit histories, while gcc-reduce translates error messages produced by compiling a driver with a target kernel into appropriate Prequel queries. We have used our approach in porting 33 device driver files over up to 3 years of Linux kernel history, amounting to hundreds of thousands of commits. In these experiments, for 3/4 of the porting issues, our approach highlighted commits that enabled solving the porting task. For many porting issues, our approach retrieves relevant commits in 30 seconds or less.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/lawall", "https://www.usenix.org/system/files/conference/atc17/atc17-lawall.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3b9d/74f36dcbac12ab912b35aac0d4dec9e9fe97.pdf", "s2Url": "https://semanticscholar.org/paper/3b9d74f36dcbac12ab912b35aac0d4dec9e9fe97", "sources": [ "DBLP" ], "title": "Fast and Precise Retrieval of Forward and Back Porting Information for Linux Device Drivers", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "3ba2c28c1477ad04c7f089a725dee2b3ef6ee196": { "authors": [ { "ids": [ "2762014" ], "name": "Chunfeng Yang" }, { "ids": [ "2060052" ], "name": "Huan Yan" }, { "ids": [ "15121583" ], "name": "Donghan Yu" }, { "ids": [ "1689181" ], "name": "Yong Li" }, { "ids": [ "39450301" ], "name": "Dah-Ming Chiu" } ], "doi": "10.1145/3077136.3080769", "doiUrl": "https://doi.org/10.1145/3077136.3080769", "entities": [ "Data mining", "Digital video", "Generative model", "Personally identifiable information", "Sparse matrix", "Streaming media", "User (computing)", "Video clip" ], "id": "3ba2c28c1477ad04c7f089a725dee2b3ef6ee196", "inCitations": [ "f945338f5aacab48e6461d113ccfa40becdf8aab", "417074e735bba6dd67aa20917e8f563057377cce" ], "journalName": "", "journalPages": "175-184", "journalVolume": "", "outCitations": [ "479a7e182c7a6a2e2a802b4085adf91709867d33", "a98014f32fd66a55bd2c851694718a656eed35a4", "184b7281a87ee16228b24716ca02b29519d52eb5", "07d1db388cd489420d40d0edb13e074d86c77dbd", "32f01f0132d456bd271296281b3e1380db39389e", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "4509771bb71500d411ced0d1cb53722fb73c9716", "d5a55a548fc7cd703c1dd8d867ca1eb6b0c0764c", "d5fdc3c0b2049a025091179a73e0e4174105fcd4", "5d1dd378962c7601526f65f69e408f8800a0d3c4", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "0aa2a4d259433016ebc899c496faea03c024c0bd", "240db1ebec5a066bab49ca03f80c961a3be615c5", "fcd9c244d1a208d33f906c3ce81e765e9f0d552b", "2744288f090192987e980274999065ad2d6e45d6", "913d2c8417c7ee4fd63a39817ccfbab9aa2d1da8", "a73b8b201a873854f5423e0510d1560e715eb603", "030dd6b28b0c55dbb411f1eb8c280f5042969260" ], "paperAbstract": "As online video service continues to grow in popularity, video content providers compete hard for more eyeball engagement. Some users visit multiple video sites to enjoy videos of their interest while some visit exclusively one site. However, due to the isolation of data, mining and exploiting user behaviors in multiple video websites remain unexplored so far. In this work, we try to model user preferences in six popular video websites with user viewing records obtained from a large ISP in China. The empirical study shows that users exhibit both consistent cross-site interests as well as site-specific interests. To represent this dichotomous pattern of user preferences, we propose a generative model of Multi-site Probabilistic Factorization (MPF) to capture both the cross-site as well as site-specific preferences. Besides, we discuss the design principle of our model by analyzing the sources of the observed site-specific user preferences, namely, site peculiarity and data sparsity. Through conducting extensive recommendation validation, we show that our MPF model achieves the best results compared to several other state-of-the-art factorization models with significant improvements of F-measure by 12.96%, 8.24% and 6.88%, respectively. Our findings provide insights on the value of integrating user data from multiple sites, which stimulates collaboration between video service providers.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080769" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ba2c28c1477ad04c7f089a725dee2b3ef6ee196", "sources": [ "DBLP" ], "title": "Multi-site User Behavior Modeling and Its Application in Video Recommendation", "venue": "SIGIR", "year": 2017 }, "3bddb2157cd707c20fe3ee9c3db98d7b1ff3b57f": { "authors": [ { "ids": [ "2741855" ], "name": "Yeongpil Cho" }, { "ids": [ "35065371" ], "name": "Donghyun Kwon" }, { "ids": [ "24275066" ], "name": "Hayoon Yi" }, { "ids": [ "1809564" ], "name": "Yunheung Paek" } ], "doi": "", "doiUrl": "", "entities": [ "64-bit computing", "ARM architecture", "Experiment", "Hypervisor", "Malware", "Operating system", "Privilege level", "Privilege separation", "Software design", "X86" ], "id": "3bddb2157cd707c20fe3ee9c3db98d7b1ff3b57f", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2497ebc497e3bc167db2c9412cc6964da8dd3566", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "01334117dc8bb99b0ff884d6c2984f79c23f5deb", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "257ae2454e688e12025d68af99a1863a1241a2ac", "0362131110123dc512447731d3f7e644ad0660d7", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "9e5db350ba34f2b4c662cdea7acb6e906484ada9", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "078b855c40fefabd766a09f23280c59feef21634", "90df476a4070cd797ef682f30a408086899ad16b", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "02e5b7aa2c920d6cd251e954a3dd314a174164a2", "a8b22274d97a967034eb98e9e9fa1e4e6de71a14", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "567fbe38b1e63d3e718527b3ea9918440dd703ad", "8fa94a9ec8aac7287ca4a18e1c0fa3596f201859", "1de5ae8534fc76323e4d926e10dc0fc76a28a361", "1c8d06510ad449ad24fbdd164f8008cc730cab47", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "58156d27f80ee450ba43651a780ebd829b70c363", "02fdca5fdba792e4f2c70b8b637abe4824343800", "3162a8dba56712de3f0aa2c9192dd47211848baa", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "11c6a5905966c437055dcf7f11ae80401a18d0dd", "4ea80cc42b2140a3bce7e64f49225323eaf56912", "505ee623397666c0ce158e103ffac0c62dbcf2fa", "07083f18b90323abedf8932f733656391cad5e21", "8c8ffe8e4fdadbf42b46944d0339eafc3e4de4c3", "0957332f8beb1ec4071fcb6fc44cb0b5396463d5", "5bddb52a9def1c1330e8139b8496fbb8bb8c5937", "2fafad1553f320615034ef985bbc3378033de73c", "3c7e73f92beb3eebc0ea1ad48235f4fd4bd6ee53", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "27d03685e3b438c37a9ed03fd6aa5843945a415b" ], "paperAbstract": "Privilege separation has long been considered as a fundamental principle in software design to mitigate the potential damage of a security attack. Much effort has been given to develop various privilege separation schemes where a monolithic OS or hypervisor is divided into two privilege domains where one domain is logically more privileged than the other even if both run at an identical processor privilege level. We say that privilege separation is intra-level if it is implemented for software of a certain privilege level without any involvement or assistance of more privileged software. In general, realizing intra-level privilege separation mandates developers to rely on certain security features of the underlying hardware. So far, such development efforts however have been much less focused on ARM architectures than on the Intel x86 family mainly because the architectural provision of ARM security features was relatively insufficient. Unlike on x86, as a result, there exists no full intra-level scheme that can be universally applied to any privilege level on ARM. However, as malware and attacks increase against virtually every level of privileged software including an OS, a hypervisor and even the highest privileged software armored by TrustZone, we have been motivated to develop a technique, named as Hilps, to realize true intra-level privilege separation in all these levels of privileged software on ARM. Pivotal to the success of Hilps is the support from a new hardware feature of ARM\u2019s latest 64-bit architecture, called TxSZ, which we manipulate to elastically adjust the accessible virtual address range for a program. In our experiments, we have applied Hilps to retrofit the core software mechanisms for privilege separation into existing system software and evaluated the performance of the resulting system. According to the experimental results, the system incurs on average just less than 1 % overhead; hence, we conclude that Hilps is quite promising for practical use in real deployments.", "pdfUrls": [ "https://www.internetsociety.org/sites/default/files/ndss2017_05B-5_Cho_paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/dynamic-virtual-address-range-adjustment-intra-level-privilege-separation-arm/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3bdd/b2157cd707c20fe3ee9c3db98d7b1ff3b57f.pdf", "s2Url": "https://semanticscholar.org/paper/3bddb2157cd707c20fe3ee9c3db98d7b1ff3b57f", "sources": [ "DBLP" ], "title": "Dynamic Virtual Address Range Adjustment for Intra-Level Privilege Separation on ARM", "venue": "NDSS", "year": 2017 }, "3bf5bd82f4be1d2da6afc224de4827c81e22cb18": { "authors": [ { "ids": [ "37253985" ], "name": "Khairul Kabir" }, { "ids": [ "2168650" ], "name": "Azzam Haidar" }, { "ids": [ "1706099" ], "name": "Stanimire Tomov" }, { "ids": [ "1746341" ], "name": "Aurelien Bouteiller" }, { "ids": [ "1708869" ], "name": "Jack J. Dongarra" } ], "doi": "10.1007/978-3-319-58667-0_9", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_9", "entities": [ "Algorithm", "Big data", "Central processing unit", "Computer data storage", "Disk storage", "Graphics processing unit", "Information retrieval", "Memory hierarchy", "Multi-core processor", "Numerical analysis", "Numerical weather prediction", "Out of memory", "Out-of-core algorithm", "Singular value decomposition", "Tiled web map" ], "id": "3bf5bd82f4be1d2da6afc224de4827c81e22cb18", "inCitations": [ "a25c7a004a5073c8284d643822dbd3b980c7d0fb" ], "journalName": "", "journalPages": "158-178", "journalVolume": "", "outCitations": [ "980bbba2c2a4fc66c2d19b34856a0d625910369a", "33c2130b4459ecba1b58dfaf19012271ccb0998b", "0a5617cf569abe3c669a71f4c604d47ca334ae12", "70f1197b71aab1617e3c12cd61ee9977bd475c57", "a6cbd9bde9bfd59f1594e8a9b96b6d40a7d106e9", "82292f38366cbe3167c9de2d71ce86c75fba78a9", "bad9b3e0cfddfeb33653bf869af7fffffcd6df8d", "dd33bb8de5c88ffd5d71f550c003b0a6ec6440a2", "57b832b09eacc30d2742ecb9e3249a0fef2f8d3a", "30555c7ea92f59a9b2d3455ea98b1138015dce37", "1145ebcc1738ac9b4d1c932402cd23460ae0bed4", "c0ce67cfd7e60c0f2cd585353ab4d81be7e0cf7e", "76d97e8cae2f5a2d660c294eb2a34faa493175a3", "9cb5af4ec44a08510a31d5a6e4856152df89cd63", "0747c3f13d5a8c906594caf6994b1e8f6654f175", "f6430121b2af7d55b090a1c260570630e6cf1f41", "14dec304cb9bbe9f37e04463b9746bc9ec569bd6", "8ccb3d51dfceb07a752a62433dcc637efba3930d" ], "paperAbstract": "Many important applications \u2013 from big data analytics to information retrieval, gene expression analysis, and numerical weather prediction \u2013 require the solution of large dense singular value decompositions (SVD). In many cases the problems are too large to fit into the computer\u2019s main memory, and thus require specialized out-of-core algorithms that use disk storage. In this paper, we analyze the SVD communications, as related to hierarchical memories, and design a class of algorithms that minimizes them. This class includes out-of-core SVDs but can also be applied between other consecutive levels of the memory hierarchy, e.g., GPU SVD using the CPU memory for large problems. We call these out-of-memory (OOM) algorithms. To design OOM SVDs, we first study the communications for both classical one-stage blocked SVD and two-stage tiled SVD. We present the theoretical analysis and strategies to design, as well as implement, these communication avoiding OOM SVD algorithms. We show performance results for multicore architecture that illustrate our theoretical findings and match our performance models.", "pdfUrls": [ "http://www.netlib.org/utk/people/JackDongarra/PAPERS/a-framework.pdf", "https://doi.org/10.1007/978-3-319-58667-0_9", "http://www.icl.utk.edu/files/publications/2017/icl-utk-952-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3db1/504b6711395e2cb733ae5734353605678242.pdf", "s2Url": "https://semanticscholar.org/paper/3bf5bd82f4be1d2da6afc224de4827c81e22cb18", "sources": [ "DBLP" ], "title": "A Framework for Out of Memory SVD Algorithms", "venue": "ISC", "year": 2017 }, "3c05074147fcca6d35dd20a56153769afbc8545e": { "authors": [ { "ids": [ "2934941" ], "name": "Saravanan Thirumuruganathan" }, { "ids": [ "7219988" ], "name": "Laure Berti-\u00c9quille" }, { "ids": [ "2168047" ], "name": "Mourad Ouzzani" }, { "ids": [ "1712430" ], "name": "Jorge-Arnulfo Quian\u00e9-Ruiz" }, { "ids": [ "8669763" ], "name": "Nan Tang" } ], "doi": "10.1145/3035918.3064024", "doiUrl": "https://doi.org/10.1145/3035918.3064024", "entities": [ "Algorithm", "Anomaly detection", "Approximation algorithm", "Density functional theory", "Dirty data", "Error detection and correction", "Experiment", "Functional dependency", "Ground truth" ], "id": "3c05074147fcca6d35dd20a56153769afbc8545e", "inCitations": [ "80c7ad5e1d47a7acca3c886c426bb5f1efeaa7a4", "8c7044398d1994b12a9bf7212e11398f59eaf446" ], "journalName": "", "journalPages": "1385-1397", "journalVolume": "", "outCitations": [ "06a26cc7afa95781fa2942a7582ed400b0842da7", "6f6f7b695e755a2c6cfa8d31d8331121a1f8c0d0", "34cac14ff57deca90a5d74b8d12f69a4780717d9", "5c42fa8c6dd0f64977cd6836ea9d55fbf031450b", "079e2aeb5f693ec505d3e262c5c3df7c1afd94e7", "fb6c1ab37f62f24c6253301fcb44ef518c672e7a", "4f5dc77b2ca806294b8e6c6ed0d9d01dfa79f7b2", "fb48f430b9e19ad2c7093f622df559ae103f1307", "f30e03f687c3185180b60f49629d3b076d30c54b", "543f17e523779edaa7b9d57fe17f382bc3239389", "7f6ea2669aeb5184321e8305fc553119e54ecb79", "7254253b9ba200277e7bea7c05f896f363e61a1f", "579e1e9217cfed6d563cedf8f8fdcd1604fc0917", "148608f2b16646905daff9e9d2e92a90b47cb078", "74feb1f64b3773173e296c93d1e72b9734914c37", "23dadf25f3efacbc9c66f69093d656ad5b003529", "33fe60d22d00861f79234e89f71ecdc2ea41b8e3", "547fc697865c564539bce9959bb4bf691ebd92e0", "100e6ac4215956869103850dcb078eed638d366f", "6306fbfa8fcde8e98a677cd4a833b8c76c613974", "1e8a9013ff98ada5e919bea8e665b5c5eb4e1b5f", "19f10c75265a43829cf00e619224ab3e481c4fad", "006336b4082bbba1ab1e5e2e6c633a44971a7dc3", "98e74116f55b3a016bad8ee963e9694cf9832905", "e672542e9b666546920d29589a7fe4d751bf6241", "bbb869e6415a336ee4d234be1536f99fb0fb6ef2", "e7291ab2865648f6a271801aff3eef5541fa6df2", "47ad5cd7653ea558e24dab5dea1cf061db97cc26", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62" ], "paperAbstract": "Error detection is the process of identifying problematic data cells that are different from their ground truth. Functional dependencies (FDs) have been widely studied in support of this process. Oftentimes, it is assumed that FDs are given by experts. Unfortunately, it is usually hard and expensive for the experts to define such FDs. In addition, automatic data profiling over dirty data in order to find correct FDs is known to be a hard problem. In this paper, we propose an end-to-end solution to detect FD-detectable errors from dirty data. The broad intuition is that given a dirty dataset, it is feasible to automatically find approximate FDs, as well as data that is possibly erroneous. Arguably, at this point, only experts can confirm true FDs or true errors. However, in practice, experts never have enough budget to find all errors. Hence, our problem is, given a limited budget of expert's time, which questions we should ask, either FDs, cells, or tuples, such that we can find as many data errors as possible. We present efficient algorithms to interact with the user. Extensive experiments demonstrate that our proposed framework is effective in detecting errors from dirty data.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064024", "http://da.qcri.org/ntang/pubs/sigmod2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c05074147fcca6d35dd20a56153769afbc8545e", "sources": [ "DBLP" ], "title": "UGuide: User-Guided Discovery of FD-Detectable Errors", "venue": "SIGMOD Conference", "year": 2017 }, "3c1bac9eb4a1525a9fac6051cd727478a1ebcc3b": { "authors": [ { "ids": [ "34596687" ], "name": "Mengjia Yan" }, { "ids": [ "3384083" ], "name": "Bhargava Gopireddy" }, { "ids": [ "2130036" ], "name": "Thomas Shull" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" } ], "doi": "10.1145/3079856.3080222", "doiUrl": "https://doi.org/10.1145/3079856.3080222", "entities": [ "Algorithm", "Best, worst and average case", "CPU cache", "Computer architecture simulator", "Page replacement algorithm", "Programmer", "Side-channel attack", "Simulation" ], "id": "3c1bac9eb4a1525a9fac6051cd727478a1ebcc3b", "inCitations": [ "6ac57944ec24a26c9eb632fa024708dcaffb5d90" ], "journalName": "", "journalPages": "347-360", "journalVolume": "", "outCitations": [ "a5ea83ad9abffe6c44b93617728e5f06f73bb9be", "53549635de2fbfaaabf4ff24a910797cc3499d9b", "045bbbea384e9d54be38dd207bf237d5208ea599", "3b7e821532a852d27eacd89bcaa869a6263eb144", "70ad3cd2dd46f01821207922c78ddd7c7ea82898", "081dec43c2dbe76ff43c810594495f11ab092a10", "4d624b942a58818f8d425460638cb4b65ed84e1c", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "379049169638ee215b11f4a701c85ce2c1b942ca", "1734463e8278c8d2412182a1a15267d3a3aa760b", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "70fb3cea8335aefdf849597e9d9dd7512d722d88", "3a34b385e685b80e850ed36e08bd105ac87ed278", "f644e5ed34af1a47b80a8428c8e812082f1c3dae", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "2762c266378290594e1715104ef8e98a8cb60d35", "0c7b18190730db1887b2fae8d4474e9c49a9fa46", "fd67e410437ae239dcc9beaddaf1dc80fddb6461", "39242f7b6ab35ce5ec6b7930252de6fccb6360de", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "5c40cdb6386021d68288e7158e1330ad3b8c223e", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "90f2e587256b8b3cc7651f257a8066ff9f2f544e", "531847b1e582c5353ff436744ba0c60682cbd022", "94f43879426de678696dd3616fd1559d13c8ad78", "4514e8d10bddd734ccd88335c9d911e345f27972", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "6b3bd3d03cbcc622a8168f761b49816e7db89404", "868a1bad5683fdc234c3f280494192d55f6d2e69", "1ae7be5d55833e6aa53d24f620be5df9006a3558", "603befb258ad7ac3662e398e84ac2f28582724ea", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "29621133de70a2769470c13a2d9c27d3a5ed9587", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "77a1532cb64eab28162a0277cde52b4b7eceda49", "96ba6f5c06850c009e5b77094c0d4532744dedc2", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "2ba9c60483ce810cc001aa620598cda98001af7e", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "663d999090f35ed660b574804799c745b9737562", "145d0b5cdf9517ce32f372e1cbb2f8f8b3d35345", "52c2c050af5b32d4929b4b193967a3675d03aea0", "63679824606b1b0deb4a44639a4e4b3e5eb49303", "7cfb0fc85a6d3872b1d7b89bcb8706ad64b71830", "21ddf1f7ab7e2cd2ae07073bf3238ce46314bac9", "f68e4b414a284b990cc25e8427793e1d8a8ab691", "3e74ae88cdaa33bf89136800258bde97ab397ec9" ], "paperAbstract": "In cache-based side channel attacks, a spy that shares a cache with a victim probes cache locations to extract information on the victim's access patterns. For example, in evict+reload, the spy repeatedly evicts and then reloads a probe address, checking if the victim has accessed the address in between the two operations. While there are many proposals to combat these cache attacks, they all have limitations: they either hurt performance, require programmer intervention, or can only defend against some types of attacks.\n This paper makes the following observation for an environment with an inclusive cache hierarchy: when the spy evicts the probe address from the shared cache, the address will also be evicted from the private cache of the victim process, creating an inclusion victim. Consequently, to disable cache attacks, this paper proposes to alter the line replacement algorithm of the shared cache, to prevent a process from creating inclusion victims in the caches of cores running other processes. By enforcing this rule, the spy cannot evict the probe address from the shared cache and, hence, cannot glimpse any information on the victim's access patterns. We call our proposal SHARP (Secure Hierarchy-Aware cache Replacement Policy). SHARP efficiently defends against all existing cross-core shared-cache attacks, needs only minimal hardware modifications, and requires no code modifications. We implement SHARP in a cycle-level full-system simulator. We show that it protects against real-world attacks, and that it introduces negligible average performance degradation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080222" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c1bac9eb4a1525a9fac6051cd727478a1ebcc3b", "sources": [ "DBLP" ], "title": "Secure Hierarchy-Aware Cache Replacement Policy (SHARP): Defending Against Cache-Based Side Channel Atacks", "venue": "ISCA", "year": 2017 }, "3c37fb4ebd7a167e1c0d25994169a4dd8826e04c": { "authors": [ { "ids": [ "6970155" ], "name": "Shuang Song" }, { "ids": [ "39475599" ], "name": "Yizhen Wang" }, { "ids": [ "38120884" ], "name": "Kamalika Chaudhuri" } ], "doi": "10.1145/3035918.3064025", "doiUrl": "https://doi.org/10.1145/3035918.3064025", "entities": [ "Computational complexity theory", "Database", "Differential privacy", "Information privacy", "Personally identifiable information", "Privacy", "Social network", "Synthetic data" ], "id": "3c37fb4ebd7a167e1c0d25994169a4dd8826e04c", "inCitations": [ "6e62afd47ded1fcc8fc9e4de1c95c25cd54adfd2", "1665dfa9d948aae7cd024f0df186a8e0e235c4d3", "74205b80dff21b1f21205b7564080dc8cdaf3e70", "35ceec0f14213fbd9da4c62f856181346bad821b", "eec0bc4c3fddbaf78feb0872a195fb3aeb01010e", "add09610b18960a797690ccd758e4eefdb7d54b3", "061a0f2232aaa5d9a60e2976f0d331c13c12dd59" ], "journalName": "", "journalPages": "1291-1306", "journalVolume": "", "outCitations": [ "451a8f7a1ac7bafcfd30db62fedf946f59d0f0d9", "644d02d3aa28537635a1a247aa75b9e926dfda53", "9e45b00297f9481020f1c4fcd59486c867f00d3b", "2f207e27cad923b81253542a6f76439d49d87925", "58bcb1a4f429da03590c5622fb6e7f3c11d05f77", "bbcf437efe0d037dc1224365961caac724a68d6b", "763afb9dc8650101be06053e2eb612d9e3a1ce18", "8726139a30434175795fe924188bd5c6e0b0740d", "3e732c08e17e802d6c2f2144735cff5dc83ddad9", "fc2b51b1ca0460ad747e11cca6517ed7482f31b1", "3cefca055656278e4b54d3dc89ec29438d0c98d0", "009d284fe935b5f421d24321073097a0cd34e21f", "d921036a6cb7e340b019afa557a19bc65586a1ad", "fa7e9af14a46e07db867d9d01cd885e02a06fd62", "6154ce8c02375184f7928e41c4fae532500f7175", "633d82be7fe9a58bfe2cbbd55b86968d8b0329bb", "13d635d04e6d44f976ba713768789f8da14d5a6f", "06f0a2e1678b4d849286c92249908ac5d1ed8b55", "17fac85921a6538161b30665f55991f7c7e0f940", "b532099ff8b67049f292cd62700dca37fc2be623", "63b88452574095639ef9a1f692eef3c1ec386b0a", "31afd0a18126720eeef5880bcaa14768c4005387", "9407fda128b185bdb0ced615ad8107381b831071", "bcecf29525971fa388091fe153a1ff1f73ad96c4", "49934d08d42ed9e279a82cbad2086377443c8a75", "360d4003511682c5f5b5f82f6befdda88ca3fa73", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "1ecfe23503600b7a6a6ed3dcce86542420e36a06", "2c47904cda5134e8b2862dfa93ecbf283b6e8724" ], "paperAbstract": "Many modern databases include personal and sensitive correlated data, such as private information on users connected together in a social network, and measurements of physical activity of single subjects across time. However, differential privacy, the current gold standard in data privacy, does not adequately address privacy issues in this kind of data.\n This work looks at a recent generalization of differential privacy, called Pufferfish, that can be used to address privacy in correlated data. The main challenge in applying Pufferfish is a lack of suitable mechanisms. We provide the first mechanism -- the Wasserstein Mechanism -- which applies to any general Pufferfish framework. Since this mechanism may be computationally inefficient, we provide an additional mechanism that applies to some practical cases such as physical activity measurements across time, and is computationally efficient. Our experimental evaluations indicate that this mechanism provides privacy and utility for synthetic as well as real data in two separate domains.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064025", "https://simons.berkeley.edu/sites/default/files/docs/5679/simonsuncertaintyincomputationworkshop.pdf", "http://cseweb.ucsd.edu/~shs037/pufferfish_slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c37fb4ebd7a167e1c0d25994169a4dd8826e04c", "sources": [ "DBLP" ], "title": "Pufferfish Privacy Mechanisms for Correlated Data", "venue": "SIGMOD Conference", "year": 2017 }, "3c38e08647d052fc7f1235b6f202ead0f5db4d60": { "authors": [ { "ids": [ "8826771" ], "name": "Martin L. Kersten" }, { "ids": [ "2469220" ], "name": "Lefteris Sidirourgos" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Big data", "Database", "Money", "Response time (technology)", "Responsiveness", "Scalability", "Software rot", "Solution stack", "Velocity" ], "id": "3c38e08647d052fc7f1235b6f202ead0f5db4d60", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "370e1fcea7074072fe5946d3e728affd582a9a44", "3f14887cc4e65aecd6fc571eef2774a19e6ecb6a", "b67eb616bf948d72afc26fe0e0aae21f4b3bf1c1", "18abf4ef5723623459b2aa6f1796a6f671cc902e", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "8bc100f1b5e313278a09d04ca210506d95c3b4f5", "65c46b04244c194aafe5cff074e824b4aad081ce", "188ee74dce31c215759027b2ce4d3e67575c3020", "41a4f20e54ef8a33e4542b5622fa8ed0ddd6a11e", "1a64007ba18a3759cb5a93d27fc93c0c20b76239", "60a488e29b5b64c44f6ce124bce7ced9602636d4", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "fed3f3b4bf301512d7d7d499f22d620fd049e3f7", "aceb35bee902b709af8fd3b9186007aa93493260", "f74c2ce508cc0e9f60fbc087feb231011fcb937d" ], "paperAbstract": "Big Data comes with huge challenges. Its volume and velocity makes handling, curating, and analytical processing a costly affair. Even to simply \u201clook at\u201d the data within an a priori defined budget and with a guaranteed interactive response time might be impossible to achieve. Commonly applied scale-out approaches will hit the technology and monetary wall soon, if not done so already. Likewise, blindly rejecting data when the channels are full, or reducing the data resolution at the source, might lead to loss of valuable observations. An army of well-educated database administrators or full software stack architects might deal with these challenges albeit at substantial cost. This calls for a mostly knobless DBMS with a fundamental change in database management. Data rotting has been proposed as a direction to find a solution [10, 11]. For the sake of storage management and responsiveness, it lets the DBMS semi-autonomously rot away data. Rotting is based on the systems own unwillingness to keep old data as easily accessible as fresh data. This paper sheds more light on the opportunities and potential impacts of this radical departure in data management. Specifically, we study the case where a DBMS selectively forgets tuples (by marking them inactive) under various amnesia scenarios and with different implementation strategies. Our ultimate goal is to use the findings of this study to morph an existing data management engine to serve demanding big data scientific applications with well-chosen built-in data amnesia algorithms.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p58-kersten-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3c38/e08647d052fc7f1235b6f202ead0f5db4d60.pdf", "s2Url": "https://semanticscholar.org/paper/3c38e08647d052fc7f1235b6f202ead0f5db4d60", "sources": [ "DBLP" ], "title": "A Database System with Amnesia", "venue": "CIDR", "year": 2017 }, "3c3b8ee97b1f5082a0ea91bf2539607e52212e43": { "authors": [ { "ids": [ "3313731" ], "name": "Robert O'Callahan" }, { "ids": [ "2235654" ], "name": "Chris Jones" }, { "ids": [ "2063434" ], "name": "Nathan Froyd" }, { "ids": [ "11888573" ], "name": "Kyle Huey" }, { "ids": [ "33414470" ], "name": "Albert Noll" }, { "ids": [ "2578198" ], "name": "Nimrod Partush" } ], "doi": "", "doiUrl": "", "entities": [ "Black box", "Central processing unit", "Compiler", "Debugger", "Debugging", "Failure rate", "Linux", "Linux", "Linux for PlayStation 2", "Open-source software", "Operating system", "Pervasive informatics", "Rapid Refresh", "Runtime system", "Software deployment", "Systems design", "User space", "Virtual machine" ], "id": "3c3b8ee97b1f5082a0ea91bf2539607e52212e43", "inCitations": [ "3f7411cb35e4ac8dc3d3cc9e862d785f38de098f", "aaca858e5d071b7215cd9954371d5911745145b3" ], "journalName": "", "journalPages": "377-389", "journalVolume": "", "outCitations": [ "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "fb34f663b34a8cb09a75fe98685c003d86f32e15", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "72657b0428f9b8f705546eb5a9147203a534d8f6", "113772329678792fc2a3a8cb9322c164547f88a0", "feb5db279d43f6affb474398f96bb5c910aa2340", "b59f1a01da347031ed4f09b8b2b9503c221a5a97", "259ff151c4f79cdc5ec593bec29650c9643c6043", "f4e10c197040252beeabcd3393c81062e60e7475", "2bc2069d506d8c1381320619715928a4f0901909", "bc42584c1d74f96d2e03dfcc487af642527a62fe", "7533f479d9d2377d73132c418df6e57ee52d181e", "5eea7073acfa8b946204ff681aca192571a1d6c2", "2a85b683073c2c8b762079c52a0d54392b243afb", "452b7f1eb4899fb83d6bc21a180643c4433684bb", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "07893f73dfb921ef141b7ed80d2d82874e2f193e", "63d2e311f16ac8f745bc44677ba13bfa5b67b5b8", "2fbbf89a921e4aa19ee3bfe73d0b34a6ad764656", "0e578433d4e8bb2a571c87a2d22816074902f009", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "13c092188ae9136d1323fbcb8f6095e0465f5988", "37774a7853b6336611175b9d8ec9baffb650acf6", "59ee76587fde9f3fef3b44872c58fb076165c6e5", "8a0af8ae748210ef571d074362b552af571e6d33", "b634f4f4ff14c87952ba2713faad763753d34684", "13d4fa20983a6605fb7b13371a01bbafdbabf7d5", "06488cd2cc8c145b3b1c8e26b782b55e0dedfa6c", "3ca7e9729dd00830ca25396d535295648ea19a81", "44474a05056efdcc7fbf75aa66227ac70e88748f", "4b83751a2d5db62b894bae064fdcb8a5d16b7cd6", "031e76f20897108925c6942e0ba00a76045a2e49", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e" ], "paperAbstract": "The ability to record and replay program executions with low overhead enables many applications, such as reverse-execution debugging, debugging of hard-toreproduce test failures, and \u201cblack box\u201d forensic analysis of failures in deployed systems. Existing record-andreplay approaches limit deployability by recording an entire virtual machine (heavyweight), modifying the OS kernel (adding deployment and maintenance costs), requiring pervasive code instrumentation (imposing significant performance and complexity overhead), or modifying compilers and runtime systems (limiting generality). We investigated whether it is possible to build a practical record-and-replay system avoiding all these issues. The answer turns out to be yes \u2014 if the CPU and operating system meet certain non-obvious constraints. Fortunately modern Intel CPUs, Linux kernels and user-space frameworks do meet these constraints, although this has only become true recently. With some novel optimizations, our system RR records and replays real-world lowparallelism workloads with low overhead, with an entirely user-space implementation, using stock hardware, compilers, runtimes and operating systems. RR forms the basis of an open-source reverse-execution debugger seeing significant use in practice. We present the design and implementation of RR, describe its performance on a variety of workloads, and identify constraints on hardware and operating system design required to support our approach.", "pdfUrls": [ "https://nimrodpar.github.io/assets/publications/rr.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/ocallahan", "https://www.usenix.org/system/files/conference/atc17/atc17-o_callahan.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3c3b/8ee97b1f5082a0ea91bf2539607e52212e43.pdf", "s2Url": "https://semanticscholar.org/paper/3c3b8ee97b1f5082a0ea91bf2539607e52212e43", "sources": [ "DBLP" ], "title": "Engineering Record and Replay for Deployability", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "3c4fdf1e1ab3fbac23575d76a6f37deccbee5ad0": { "authors": [ { "ids": [ "3434870" ], "name": "Yujie An" }, { "ids": [ "3067853" ], "name": "Quentin F. Stout" } ], "doi": "10.1109/IPDPS.2017.74", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.74", "entities": [ "Algorithm", "Computer", "Graph property", "Parallel random-access machine", "Sorting", "Spanning tree" ], "id": "3c4fdf1e1ab3fbac23575d76a6f37deccbee5ad0", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "937-946", "journalVolume": "", "outCitations": [], "paperAbstract": "We give efficient algorithms to solve fundamental data movement problems on mesh-connected computers augmented with limited global bandwidth. Adding a small amount of global bandwidth makes a practical design that combines aspects of mesh and fully connected models to achieve the benefits of each. We give algorithms for sorting, finding the median, finding a spanning tree, and determining various graph properties to show that the small amount of global communication can significantly reduce the time, and that concurrent read helps even more. Most of these algorithms are optimal. We also extend our results to mesh-connected computers with row and column buses.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.74" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c4fdf1e1ab3fbac23575d76a6f37deccbee5ad0", "sources": [ "DBLP" ], "title": "Optimal Algorithms for a Mesh-Connected Computer with Limited Additional Global Bandwidth", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3": { "authors": [ { "ids": [ "2229491" ], "name": "Aleksandr Farseev" }, { "ids": [ "3478795" ], "name": "Ivan Samborskii" }, { "ids": [ "2517152" ], "name": "Andrey Filchenkov" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080774", "doiUrl": "https://doi.org/10.1145/3077136.3080774", "entities": [ "Baseline (configuration management)", "Model\u2013view\u2013controller", "Multi-source", "Social media", "Social network", "User-generated content", "Venue (sound system)", "Virtual community", "Virtual reality" ], "id": "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "inCitations": [ "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "1ab2d67ab1cf651939e4d78248b078f76cc20a02", "6ba4e10d06d9842765a4350bf5abbd3dd095045c" ], "journalName": "", "journalPages": "195-204", "journalVolume": "", "outCitations": [ "127847b3f60f3c807dab392cc157c856d993e6d6", "532ded2b9b2021fe09ab164f9dabf97b9fdaa3c8", "b92c88c552d5a6457defde6427d6fd95f1e1738b", "2d732c3f798954106f53e60d2c804faf23e09f8a", "c0042d771768a1dc08b611c59e13b47a9ff680f5", "2275762a28582716db92df6d525ed2481c7d7f14", "1ab2d67ab1cf651939e4d78248b078f76cc20a02", "2bf11b00938e73468e3ab02dfe678985212f1aea", "78dc785a7f323c0e0afddb18dc0099fcae59b3d5", "fc8d2643020c6208cf99778744e07fe01626316c", "e6f857af109fd6da45e7282f58159044fc535523", "11a81c78412f6a8a70b9e450260fb30257126817", "38211dc39e41273c0007889202c69f841e02248a", "091aded505b84cf87c197875ccfde24d98a300c9", "6f813ccf106360cc9c3d6df849cc04d881d0a6e8", "24056f101c601a13024e0c91842d5e3f8aca9372", "2a3f086ba4b44628fb1921fb3934e84558a85f60", "bb4c1818159ea11d0d016b0b044d27a76285f913", "14f2bc1234ed1418790262f56488dc4447c78bc8", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "58b452c930785a50218811e5fd3819f2a97dde3d", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "1c9ea604d8873193cf2ea8090160555a8dc19035", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "0db3841e12bd2dae3665c4b4280cb77294fd4c77", "2a0556539cdcea37bc3f243fbb6c854abe253fea", "b08eb5529aa59fc36cfafcd31f8da1bcd61217b4", "4e8307b54da6d35a0a4e4946726b621f065664fc", "95a5bc99ed75547e40d10378299f60844dd49eab", "f6a526a4abecd1efb54b925892d2ffa74f643225", "0a7d444963b5979e571641438cc4aeabced8aa0f", "3aefab415bc4cfaff98ef7c6091526760c43dbc2", "e2b196bd3842afcf43d99084e215de18769badef", "70d7688df5e2bfc7aa66516a26b4546344b26c60", "c4c9f645ca334f03b27d6ca646ba7e32fa323aa3", "6fdd336652bab3976c77fed66b75954aa784f1f7", "59aad606950349bd64cc220a0df8eca804229865", "6446405d851f7e940314d2d07aa8ff67b86d1da6", "44228f85514537dfe46603da4fc7edd5c0f5ad35", "700a692371434e0de4ee831fa13ff984bd2f5ee6", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "9f0193fe9a2e963968aac8426795226ffa5ef530", "b388f307b0b3cc9de2bcfd82bc9f037a0e375b54", "b6943f0e97a7a7c1a71d58521f5ba5abfcf34107", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "315a061bf865e25e27c245f1b1db2ce75b740d57", "b6575f6b6a9e3fde827a2e12bb806a4a00b7a7d5" ], "paperAbstract": "Venue category recommendation is an essential application for the tourism and advertisement industries, wherein it may suggest attractive localities within close proximity to users' current location. Considering that many adults use more than three social networks simultaneously, it is reasonable to leverage on this rapidly growing multi-source social media data to boost venue recommendation performance. Another approach to achieve higher recommendation results is to utilize group knowledge, which is able to diversify recommendation output. Taking into account these two aspects, we introduce a novel cross-network collaborative recommendation framework C3R, which utilizes both individual and group knowledge, while being trained on data from multiple social media sources. Group knowledge is derived based on new cross-source user community detection approach, which utilizes both inter-source relationship and the ability of sources to complement each other. To fully utilize multi-source multi-view data, we process user-generated content by employing state-of-the-art text, image, and location processing techniques. Our experimental results demonstrate the superiority of our multi-source framework over state-of-the-art baselines and different data source combinations. In addition, we suggest a new approach for automatic construction of inter-network relationship graph based on the data, which eliminates the necessity of having pre-defined domain knowledge.", "pdfUrls": [ "http://farseev.azurewebsites.net/papers/farseevC3R.pdf", "http://farseev.azurewebsites.net/slides/SIGIR17_Farseev.pdf", "http://doi.acm.org/10.1145/3077136.3080774" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "sources": [ "DBLP" ], "title": "Cross-Domain Recommendation via Clustering on Multi-Layer Graphs", "venue": "SIGIR", "year": 2017 }, "3c831e81d29dd5ae62f20120793ba7aaabc257b3": { "authors": [ { "ids": [ "1728316" ], "name": "Kai Wang" }, { "ids": [ "38856719" ], "name": "Aftab Hussain" }, { "ids": [ "1690380" ], "name": "Zhiqiang Zuo" }, { "ids": [ "38394648" ], "name": "Guoqing Xu" }, { "ids": [ "34651617" ], "name": "Ardalan Amiri Sani" } ], "doi": "10.1145/3037697.3037744", "doiUrl": "https://doi.org/10.1145/3037697.3037744", "entities": [ "Big data", "Computation", "Dataflow", "Linux", "Linux", "Model of computation", "Pattern matching", "Pointer (computer programming)", "PostgreSQL", "Programming language", "Scalability", "Software bug", "Source lines of code", "Static program analysis" ], "id": "3c831e81d29dd5ae62f20120793ba7aaabc257b3", "inCitations": [ "10e7f753944d450f4c3ce50b16a645c895b98220", "0ed6e7d571fed36bd705675ce9261af440a7a7bb", "830f6be24ab13dcbc4154bd52469fbb85ff25f0e", "1f0572f47be66c2c0fbf3fd0f98f25e5b5f88361", "1bb24c175f9fa082937c51e9b9b8ae651fd9b111" ], "journalName": "", "journalPages": "389-404", "journalVolume": "", "outCitations": [ "a38f20ccaf6369feadb2341109f1857848adfe8b", "1f7e5e582663868ed2f6763f98066ca278177a61", "76e553eee1d907becc17864891baf09144d8a4d9", "c741fd0b894d27620d1b4ba92ec3fca388bc8ba9", "22e98d48c4cb573adec6fa875d18d14955113456", "8e67d1085da29e5aa1e758751bfa5469ac07023e", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "1bb24c175f9fa082937c51e9b9b8ae651fd9b111", "1c11e7e36134fee2a41be720b7ce533919beb768", "243230d5b623f79c22750b42447e902ab07a2db9", "200824b1b1c6b681598fca3f791dfd51ae669e06", "0f408cba7605f7a6fd65837dd1c7e6f193d181ef", "759da1f189a542b38dbb1e751b6bc485fe01b82f", "a0d875193974f727d93c8f377fefb3efd58acc62", "09f87aa5788f0bda0be67c3be3f7da9deeed825c", "9c76c17c546f49802aed76bd8db3f94618078966", "2cb21b68888be8cd2e2672ee842302a97fdddc4a", "1d3c426af09d90b51085c9f9c12fc4500aac22af", "05b59ab485b601ff5939bdf3b5846ed8d7bdc56f", "5eab6c72ba39e0fea5c3aac0c2f5f9cc0a03eb0c", "0e23117148029fbef47d1eed869c7952546e53aa", "9da28672b71b658b8ea989e2bfc502582a79e079", "1ffeb2932f515e260c932eeff6dc4b001eff4de3", "047fdd696fdabd7b01af7d09c459e6abe7793170", "44004d3b596916a764a3d04cd3a54be200d1d6bc", "3486aeaf540c48952120fe853d672af984f40a6a", "1f32cece629d41929e6913f3b445b93bf2c168ac", "78ad867eb6176d4e2f1cec4f7517f65d90a660f8", "0314da057cfbf61d752674b7d8527e8be9ddc3b8", "1e8c283cedbbceb2a56bf962bc0a86fd40f1cea6", "0371b65e431972dfe0f81573274d10eb8c9eec7f", "1f261218bd19d42e3787ec8b975a633c822fe0be", "405e69f0214d84de0eb42eb5c94cda8428da4248", "8ac2772b65a9b7024020f28c8be9d1a35ae7fa02", "3dfc531d0040144a828d7c5a615498a977fbb901", "29f56a7f34879033bc6ecf52e03099fb55277e0d", "62f41341d9ba292877e9e299d6eb70b5435ee8c8", "2bd3222ec96cb6e40fe2080411fd952f58106e7e", "18213c660b56a10bf3cdcede940281d31b132356", "8bfd64fe8f9192a8b3c801c7d91fd46cabfc5319", "62fa835ba18dfac1e5ee801da1e27634cc5370e7", "14b750a0fd5a13f7494e4abf9b97718ff558f508", "36fc90752b88340a0b2be8990f36e1ae7f1c7e6c", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "485b7a6c48ab33beaaf65ddbc9d3543bb8b8c145", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "02584960fcf229817eb69ff5dc942d89ff9381be", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "6a4ea7c4936e45acb85923a905c7df65f3fa8561", "85ff7e25c39d216bd50ac6eb89e335ca7aef43f4", "15f8f3f80c22008cb5f95e870403227d38420c4f", "0608d9937c074520cdc93cc444cc1c77039c5332", "2ba9c6a40a168ab93399b7b48fd3a51d082da0c5", "758dd733ade7dfb5aa345a11b4dde536dec60bc1", "8ffd906129ad079e1df379a0be17d3f9f0d80b9c", "440273d503939d01cba669079dbf3addca045fea", "87c631d44c288c3596da809d2e55061c48036cdb", "059170b316ecb882014beced829b682a04758dd5", "4f212c2b6cb6fc64754bfb6284c95ab9f5218ec1", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "0ad8e89091eed09217e66adc98136126addc2619", "34ea8ffd27f9bbb7fef494cfca41bed5f356be56", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "0d0e7d8a17fac1595df7c928d086853721ed5dc2", "971563f7acd4250bbafdb5e90160dcd4dc6110e9", "3df6243260b3cdc0882eaf1a01a28a778482f146", "6b142100e77b676661afd70bfe51664f8191b272", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "10d08b14128138faa34b0a3014e15ba94bbc4241", "1156f60e40548096df49528b1342bb3e88b0f378", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "027485f716ca4f6d9ee2e189790d6560e37fcab2", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "1dfa9e8190eb9f67187acb90d033eb7142ac7db8", "1b12eb42a9e04af626c7ed266b2e299d7f6f96a3", "394635721bb5e72ccfb0289fa9b7b0f3a62b7612", "3ef15e17f7932c6d86eb06e3eb254e6dc621e029", "03aacfe8d36a673ecc379d3b76e7df1245a8d9e5", "0f6b42446071553a137fe4e0c9742a37dde5f1e8", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "067b7f06fdaa1aceeb3fff534fd1f2649303922f", "08a4a296f447ca84443c48bf647bbfc4a0ffcf56", "472959925b67134b96219754dff41c9b5b993a1e", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "21049557142d769298c2c3749e0c9d0d32743317", "24e404b34d3746a7d6719da31f9df188e9d34a55" ], "paperAbstract": "There is more than a decade-long history of using static analysis to find bugs in systems such as Linux. Most of the existing static analyses developed for these systems are simple checkers that find bugs based on pattern matching. Despite the presence of many sophisticated interprocedural analyses, few of them have been employed to improve checkers for systems code due to their complex implementations and poor scalability. In this paper, we revisit the scalability problem of interprocedural static analysis from a \"Big Data\" perspective. That is, we turn sophisticated code analysis into Big Data analytics and leverage novel data processing techniques to solve this traditional programming language problem. We develop Graspan, a disk-based parallel graph system that uses an edge-pair centric computation model to compute dynamic transitive closures on very large program graphs.\n We implement context-sensitive pointer/alias and dataflow analyses on Graspan. An evaluation of these analyses on large codebases such as Linux shows that their Graspan implementations scale to millions of lines of code and are much simpler than their original implementations. Moreover, we show that these analyses can be used to augment the existing checkers; these augmented checkers uncovered 132 new NULL pointer bugs and 1308 unnecessary NULL tests in Linux 4.4.0-rc5, PostgreSQL 8.3.9, and Apache httpd 2.2.18.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037744", "http://www.ics.uci.edu/~guoqingx/papers/wang-asplos17.pdf", "https://people.csail.mit.edu/jshun/6886-s18/papers/Graspan.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3c831e81d29dd5ae62f20120793ba7aaabc257b3", "sources": [ "DBLP" ], "title": "Graspan: A Single-machine Disk-based Graph System for Interprocedural Static Analyses of Large-scale Systems Code", "venue": "ASPLOS", "year": 2017 }, "3cabd75d1fcbc1ffeddad121cee327abf4223d49": { "authors": [ { "ids": [ "1777183" ], "name": "Wenfei Fan" }, { "ids": [ "1767378" ], "name": "Chunming Hu" }, { "ids": [ "1692159" ], "name": "Chao Tian" } ], "doi": "10.1145/3035918.3035944", "doiUrl": "https://doi.org/10.1145/3035918.3035944", "entities": [ "Algorithm", "Computation", "Dynamic problem (algorithms)", "Graph traversal", "Incremental computing", "Incremental search", "Pattern matching", "Polynomial", "Real life", "Search algorithm", "Undo" ], "id": "3cabd75d1fcbc1ffeddad121cee327abf4223d49", "inCitations": [ "68d3ada8bc4fb3de685cb870d9e72853d56b5c7d", "24c0e93c284487857c7d0efd83411036291ec812", "771610413f3654b8e4f38aab4dd970a481c7196f" ], "journalName": "", "journalPages": "155-169", "journalVolume": "", "outCitations": [ "56974d490966a9d4f5c28f8be37fc34a08256388", "23621097458ef14730bdf01c22b2b7a869c26d8e", "04bd9aa1c56056d6ac4cf560de60d198e2094c62", "68d3ada8bc4fb3de685cb870d9e72853d56b5c7d", "87b7f6f406259c03b96412bbecde9711a9b8fccf", "08b1a03f8e1e80273cdfe8d5049d6034177efdca", "0af1d9eb0c04296b9f6336ae5ee66ed4ac735e53", "38e2d3218a63c81f850fe6cc48743cd0c8099561", "2b9798550d7fdec148b21c6a190bd73d7c3bb4c6", "59a6fa9ef3841302aef0efb17d46579841b80031", "19aaf29ead773187824695f3dcdae028ed917967", "5599ed5b57958b32889c9f4f6c9261941ce2e79f", "0b43a0626314c092acdb66181a839efd01b54139", "6f53dbfe291a90ce168b3bea709cb0a4df8a0312", "064f9e6f1db223cc761c7a98126b9337b833a9e9", "081a8373c4ad9fda1d3aa9f86db48f7cbf0ac96f", "0456a5c3b2001465d05e84ce6786ef200184de65", "87507a498558ed6ed23115a42f42376c0884f7f2", "0990405c5642e93e22e28a302935bdd2b08bd11c", "182040ed27e6c9461a163fc6bff749a9687b2114", "ab20b24788cbbeab7d32e6d47aacf1afffe63396", "74737c472e6ba4fef759887753ab73146f96d0f2", "167283803961a7216e349d27bcbfb4ee377393c8", "385742fffcf113656f0d3cf6c06ef95cb8439dc6", "4ffce047b0189e30e51665f0c8872d05f383a962", "9da28672b71b658b8ea989e2bfc502582a79e079", "be94c5051438209816397be214ee8c1bdda94165", "d381f9a7234fcfb57c2f615e5c99cc7362ab60c9", "04226c2c505cddbf760ba15aebb66e1685c41343", "3de0169f727f8ec758d52774ec45f4c0b96bffe8", "3ba66b7716d2f93ae39b2bb79427038e449f5a7c", "55efcebcaaebe1765e788db748cab9342671561b", "3b034ee536cbf8c0152c8eae29b74a821d958976", "b8497f9088ae74a13ca498e7ecf68af8ad18e373", "6099814c55861b15467d26010631124bea5dfbda", "75e217284d18901ce8b1fc4a389d3c1152b544fb", "46805903a8757bf4ae81a0f80cd14cc3a1423d54", "54b00856c79b67e0e255952cc16c82974248dc82", "628b470c664be4eaf9ae3f75ecb630d64353bc4c", "18f59797fb675ae928e1c5a3af5006b44bfa28d8" ], "paperAbstract": "The incremental problem for a class Q of graph queries aims to compute, given a query Q in 'Q, graph G, output Q(G) and updates Δ G to G as input, changes Δ O to Q(G) such that Q(G ⊕ Δ G) = Q(G) ⊕ Δ O. It is called bounded if its cost can be expressed as a polynomial function in the sizes of Q, Δ G and Δ O. It is to reduce computations on possibly big G to small Δ G and Δ O. No matter how desirable, however, our first results are negative: for common graph queries such as graph traversal, connectivity, keyword search and pattern matching, their incremental problems are unbounded.\n In light of the negative results, we propose two characterizations for the effectiveness of incremental computation: (a) localizable, if its cost is decided by small neighbors of nodes in Δ G instead of the entire G; and (b) bounded relative to a batch algorithm T, if the cost is determined by the sizes of Δ G and changes to the affected area that is necessarily checked by T. We show that the incremental computations above are either localizable or relatively bounded, by providing corresponding incremental algorithms. That is, we can either reduce the incremental computations on big graphs to small data, or incrementalize batch algorithms by minimizing unnecessary recomputation. Using real-life graphs, we experimentally verify the effectiveness of our algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035944" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3cabd75d1fcbc1ffeddad121cee327abf4223d49", "sources": [ "DBLP" ], "title": "Incremental Graph Computations: Doable and Undoable", "venue": "SIGMOD Conference", "year": 2017 }, "3cb21132f55d8c7cbfc44d18f4a9a708b29049c2": { "authors": [ { "ids": [ "1777183" ], "name": "Wenfei Fan" }, { "ids": [ "1723390" ], "name": "Ping Lu" } ], "doi": "10.1145/3034786.3056114", "doiUrl": "https://doi.org/10.1145/3034786.3056114", "entities": [ "Axiomatic system", "Chase (algorithm)", "Complexity", "Functional dependency" ], "id": "3cb21132f55d8c7cbfc44d18f4a9a708b29049c2", "inCitations": [ "aca14454517490ff3820ebe2149146e5154bc58b" ], "journalName": "", "journalPages": "403-416", "journalVolume": "", "outCitations": [ "f80dd549b390f5cead90d43bdfe0fa33a8b92edc", "8e43e5eafe749c857e80bcf36bfd13ba24c07b45", "6a00c48d13a14612a005ec6ab3c03136b52eab6d", "511bf179e362f6b071f0a5fb329dece4f4e7ae8b", "216194fd3962261d75a93a17376adb6ca3e8845b", "7254253b9ba200277e7bea7c05f896f363e61a1f", "61d85687537a7399366f597b96a28f785081e812", "1b7785a99bb9e010df2fbbfdc4a6bc35786c0029", "1aa5e61ce8d0eaa79eefd6cbd412ed1f3da714eb", "1d4b990fff8a095eb2f726a04fdcf6ed10ff9948", "8581999bf98a4b5a1b4f410c792cb2c7ec46901f", "0b2298814b73d8e1ea10545e7ea74b1b74479d2e", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "1ccbe32f1f4de198c204b9786809e079f500fc5d", "8b3cb3d5dd580bcccd079edd9b47e20e45dfdec3", "21df57c55c00d44b8ab235c230d58b17a6637466", "5415182a2a36a614721bfd385310f2a4f2f2a58a", "5320a779fd76ffd0fc82ab8ce5cc3456baf902be", "32bf95adcc61c4b5dbd40b40c0aa8ba5483d2300", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "19aaf29ead773187824695f3dcdae028ed917967", "5df0a894345ed5cfa6d74c8b277acdfd5b5a1587", "6a048dc38250ffce49c5e6a5040b4c91ca05e83d", "136d0daa5d918dfebe8a26a6991053ef372892ea", "454358f9c07c479fdca901dd61e0c56b3a8a6a7a", "b07b8217dfe1d4b1393d6132a211e0cac512a496", "8653c9bf0b4c6369f3a866c38f26f065a04638e4", "b8c82881cd4a29906edfb6684dfab6d00cf2445d", "2ee495cdf71dbab740771c4f02176a6f48827467", "18199ef67e170ba4f379ab45a8cc2581994e9178", "8a927261b51ad61ecfb73ed4cbcf34edc3197791", "3be60ca1d272b3a24fb637738fba28e2c73a5bb2", "78378cf6088e8b0264d7943c037f5137f59f2fc0", "6fc4b3b548c2c8283a280796e1cc62a5fcac9097", "2aba939d884130af73ef5894bf3728e0f48d1008" ], "paperAbstract": "This paper proposes a class of dependencies for graphs, referred to as graph entity dependencies (GEDs). A GED is a combination of a graph pattern and an attribute dependency. In a uniform format, GEDs express graph functional dependencies with constant literals to catch inconsistencies, and keys carrying id literals to identify entities in a graph.\n We revise the chase for GEDs and prove its Church-Rosser property. We characterize GED satisfiability and implication, and establish the complexity of these problems and the validation problem for GEDs, in the presence and absence of constant literals and id literals. We also develop a sound and complete axiom system for finite implication of GEDs. In addition, we extend GEDs with built-in predicates or disjunctions, to strike a balance between the expressive power and complexity. We settle the complexity of the satisfiability, implication and validation problems for the extensions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056114", "http://homepages.inf.ed.ac.uk/wenfei/papers/pods17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3cb21132f55d8c7cbfc44d18f4a9a708b29049c2", "sources": [ "DBLP" ], "title": "Dependencies for Graphs", "venue": "PODS", "year": 2017 }, "3cb9b37f3833ac594a630625f5812d9447dca06a": { "authors": [], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Baseline (configuration management)", "Behavior", "Crowdsensing", "Data Collection", "Fairness measure", "Frozen Premix Intravenous Piggyback Solution Dosage Form", "Map", "Middleware", "Network as a service", "Participatory sensing", "Selection algorithm", "Sensor", "Smartphone", "Smartphone", "Usability testing", "kilometer", "sensor (device)" ], "id": "3cb9b37f3833ac594a630625f5812d9447dca06a", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "df0d45fb16e69c79593b2d7f5394686470774f24", "7dc4eeec47a324d578f86169019b794c2537a344", "5de39c7c376512e4c68b4a1d3f4b2ee5d66cebdb", "5dcdee3ccb3f0bf84c818bbea3d4f8b55846b3da", "0c53aed7b6e94b2dfeae6d3b593e1f01059f81a0", "3f4f00024beda5436dce4d677f27fe4209c3790f", "7fcef2d6d36597d241e8a199d3b3a1a935a66032", "9155b200dd74a32949c97a2d632d99b8c0f55d9d", "174766da72dc03187217210aa8aaa8f84943e8af", "28337e9ffe6f860c0aba502b0067018d3a7adb84", "2fa6e201b968643d4223fab65721f699d8088075", "086699da0528ed47463cea3108851bd3dc5ba715", "607b132b0a27f764c1b4de4f707fc2eee0111558", "4528bd7e92e0b45a6743cee4db80c34c6cdc1902", "32de6a22689e041c643a596a62530cc1c14e1bd4", "db61ce862405ca89968f43fad8aff2e8136ccb41", "de0638301ff917b6aaf17560a3cb8707f299770c", "a253ef8d1ae3b3efe519588677a6c8af1eba4b8e", "a26ab1a06723b4748fc46d905d3a5ed0cf0a0a6e", "5b89c298175d546212abce1b0fb782fb7b4a3f20", "1d06c896a60a729f41093b6bf17ae3580b5015c0", "036277d492dd5777e87e5b33ffd809e5c617a37a", "0af5b25f0e1b95d0dc0ad9d3de2888843022b950", "0ce16fca876dd2a1596bcd84ce44843c2476d6b9", "25a1543aae8b2cd17d75a7930a8b13c23290eea4", "036d75b3bf4c75a08c491fbb3e47ae63ce4fb32f", "fb3ba5c7446fe8767dd9da5f6228f8bb15b5396b" ], "paperAbstract": "\u0091e rapid adoption of smartphones with di\u0082erent types of advanced sensors has led to an increasing trend in the usage of mobile crowdsensing applications, e.g., to create hyperlocal weather maps. However, the high energy consumption of crowdsensing, chie\u0083y due to expensive network communication, has been found to be detrimental to the wide-spread adoption. We propose a framework, called Sense-Aid, that can provide energy-e\u0081cient mobile crowdsensing service, coexisting with the cellular network. \u0091ere are two key innovations in Sense-Aid beyond prior work (Piggyback Crowdsensing-Sensys13)\u2014the middleware running on the cellular network edge to orchestrate multiple devices present in geographical proximity to suppress redundant data collection and communication. It understands the state of each device (radio state, ba\u008bery state, etc.) to decide which ones should be selected for crowdsensing activities at any point in time. It also provides a simple programming abstraction to help with the development of crowdsensing applications. We show the bene\u0080t of Sense-Aid by conducting a user study consisting of 60 students in our campus, compared to a baseline periodic data collection method and Piggyback Crowdsensing. We \u0080nd that energy saving is 93.3% for Sense-Aid compared with Piggyback Crowdsensing in a representative case which requires 2 devices to provide barometric values within an area of a circle whose radius is 1 kilometer and requires periodic data collection every 5 minutes for a 90-minute test. \u0091e selection algorithm of Sense-Aid also ensures reasonable fairness in the use of the di\u0082erent devices.", "pdfUrls": [ "https://engineering.purdue.edu/dcsl/publications/papers/2017/final_senseaid_middleware17_submitted.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3cb9/b37f3833ac594a630625f5812d9447dca06a.pdf", "s2Url": "https://semanticscholar.org/paper/3cb9b37f3833ac594a630625f5812d9447dca06a", "sources": [], "title": "Sense-Aid: A Framework for Enabling Network as a Service for Participatory Sensing", "venue": "", "year": 2017 }, "3cbfbeeddc81a73b6974549dcbc296707e2bbe7a": { "authors": [ { "ids": [ "1856316" ], "name": "Pawel Janus" }, { "ids": [ "1780581" ], "name": "Krzysztof Rzadca" } ], "doi": "10.1145/3127479.3132244", "doiUrl": "https://doi.org/10.1145/3127479.3132244", "entities": [ "Algorithm", "Approximation", "Bin packing problem", "Central processing unit", "Colocation centre", "Data center", "Experiment", "Preemption (computing)", "Requirement", "Simulation" ], "id": "3cbfbeeddc81a73b6974549dcbc296707e2bbe7a", "inCitations": [], "journalName": "", "journalPages": "256-268", "journalVolume": "", "outCitations": [ "3a043714354fe498752b45e4cf429dbae0fb2558", "351ad1609d4e0c3f3f27e522893739cba48492ba", "62a68d15bbfef566170fc610183eb7ebf8313dce", "dc69b14f1dfaa0d4d4949268bfb8a2e20135036e", "26c5818349f8b79ed3b3ba3341c9ff0b14c28d2f", "59ab46bfd59cb43876e701389f256b93430e6273", "7a898ef6316cf340e211c8395131f7e96eed4a21", "33ea3afa698b923e2452a05354e543ecd0d65c5f", "8730033f32fbcca2c82559fa0c218143c707d7f7", "5c83b946c6c539f2a70cc1fd99f245b928dfe6ac", "e17e7bffaa7bdda0dcdec8eb4d200a13e4e156a4", "6f5d96874b919df9e884a165a21859b860f2a5fd", "2735a3f6c7a669784e8ba72e9674d6145bdc4e92", "028f453ae36196e279369e358413befa19874b82", "b4bc2a33a36c7cea43c7eefd044f1d55e33d7ffd", "74dedf3bab9d64648b955f3d85ea79a20ca3960b", "11eeb124cffcd749f46f8d25ede967d1a6d997f6", "7ba475aae0b51b43df0e088c87f1da400e4e3497", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "be8cb70c82f2dca180c7753590b7a8e6ee576ded", "2fcdb9f6f5c96ba8a7b7d54b90485a38c885e914", "2d9e62ca99119615436b7d4c6aef76eabac1a4e8", "71ad31bd506ea571f6c04a293ff298f42fa7b47c", "1ee3e65a3e5cb1b814a39258aa0f7cb60a51f955", "81b797ed61d790ea031bc415cbdd953f0377f0fb", "4bfa477c7dfa70367baee31e5eecc93730b3ba59", "6d37e31161d5f7ac35a6598e91270344983236eb", "b6571efa4483aa00d23bbcd36930c4877548ba38", "3e257f01e3ee71545d824a1615c35659525b856a" ], "paperAbstract": "In a cloud data center, a single physical machine simultaneously executes dozens of highly heterogeneous tasks. Such colocation results in more efficient utilization of machines, but, when tasks' requirements exceed available resources, some of the tasks might be throttled down or preempted. We analyze version 2.1 of the Google cluster trace that shows short-term (1 second) task CPU usage. Contrary to the assumptions taken by many theoretical studies, we demonstrate that the empirical distributions do not follow any single distribution. However, high percentiles of the total processor usage (summed over at least 10 tasks) can be reasonably estimated by the Gaussian distribution. We use this result for a probabilistic fit test, called the Gaussian Percentile Approximation (GPA), for standard bin-packing algorithms. To check whether a new task will fit into a machine, GPA checks whether the resulting distribution's percentile corresponding to the requested service level objective, SLO is still below the machine's capacity. In our simulation experiments, GPA resulted in colocations exceeding the machines' capacity with a frequency similar to the requested SLO.", "pdfUrls": [ "http://arxiv.org/abs/1709.01384", "https://arxiv.org/pdf/1709.01384v1.pdf", "http://doi.acm.org/10.1145/3127479.3132244" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3cbfbeeddc81a73b6974549dcbc296707e2bbe7a", "sources": [ "DBLP" ], "title": "SLO-aware colocation of data center tasks based on instantaneous processor requirements", "venue": "SoCC", "year": 2017 }, "3ce9010b0abf292fac3188b943a8704dbfe78746": { "authors": [ { "ids": [ "1695199" ], "name": "Chen Avin" }, { "ids": [ "1732387" ], "name": "Zvi Lotker" }, { "ids": [ "2946913" ], "name": "Yinon Nahum" }, { "ids": [ "1722034" ], "name": "David Peleg" } ], "doi": "10.1145/3097983.3098012", "doiUrl": "https://doi.org/10.1145/3097983.3098012", "entities": [ "Degree (graph theory)", "Graph (discrete mathematics)", "Ohm's law" ], "id": "3ce9010b0abf292fac3188b943a8704dbfe78746", "inCitations": [], "journalName": "", "journalPages": "45-53", "journalVolume": "", "outCitations": [ "736435d881ce14243725d746c66d3d803f9a7fa4", "3387184100441187814b0b81873ca34d6c56a4d0", "85c2ce303c920848e7615061db1ea5bf9e126ab6", "a2169f2d107ae71a06132cf6f8aed0a029e5eb4e", "506e253afd8b5219e0441387af6f7fab723cbb45", "2bde20e5b469c96a90b34f67eeb4997b7a265ef0", "058ada9d78c689e03ea898008e2bcebf69910d4f", "24f0c3f080b15708c9c38341f65c3a0b8fab51f2", "29cc0a8802126d4e97f28109763df26ab91c6531", "427aab560632f381f62063c6b9fcb075b6a5b773", "f7fbb4f9d6c623af0eaf93a41a67658ad1ba8c62", "b192a8757f97fc643e59acecda17f876a7efc3d6", "7a160de26bd8c83273394cce0dd2a7ac66a80a33", "1f0612de1f191abadf250b78cd78f884203cca5e", "5eabaf7f36cbd6be74c2e78e91f0457f358ba14c", "ffb8d10eb9c060b54806de2dae437aa61bbfe47b", "bddc50ec2c127ad822b0e095dedfb622e305a16f" ], "paperAbstract": "Consider a random preferential attachment model G(p) for network evolution that allows both node and edge arrivals. Starting with an arbitrary nonempty graph G0, at each time step, there are two possible events: with probability p > 0 a new node arrives and a new edge is added between the new node and an existing node, and with probability 1 - p a new edge is added between two existing nodes. In both cases, the involved existing nodes are chosen at random according to preferential attachment, i.e., with probability proportional to their degree. G(p) is known to generate power law networks, i.e., the fraction of nodes with degree k is proportional to k-β. Here β=(4-p)/(2-p) is in the range (2,3].\n Denoting the number of nodes of degree k at time t by mk,t, we significantly improve some long-standing results. In particular, we show that mk,t is concentrated around its mean with a deviation of O(√t), which is independent of k. We also tightly bound the expectation Emk,t with an additive error of O(1/k), which is independent of t. These new bounds allow us to tightly estimate mk,t for a considerably larger k values than before. This, in turn, enables us to estimate other important quantities, e.g., the size of the k-rich club, namely, the set of all nodes with a degree at least k.\n Finally, we introduce a new generalized model, G(pt, rt, qt), which extends G(p) by allowing also time-varying probabilities for node and edge arrivals, as well as the formation of new components. We show that the extended model can produce power law networks with any exponent β in the range (1,∞). Furthermore, the concentration bounds established for mk,t in G(p) also apply in G(pt, rt, qt).", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098012" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ce9010b0abf292fac3188b943a8704dbfe78746", "sources": [ "DBLP" ], "title": "Improved Degree Bounds and Full Spectrum Power Laws in Preferential Attachment Networks", "venue": "KDD", "year": 2017 }, "3d14f8d1d085e3bf513ca205a39ae44256e78c33": { "authors": [ { "ids": [ "2720913" ], "name": "Danyang Zhuo" }, { "ids": [ "2639398" ], "name": "Monia Ghobadi" }, { "ids": [ "38981616" ], "name": "Ratul Mahajan" }, { "ids": [ "36232499" ], "name": "Klaus-Tycho F\u00f6rster" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" }, { "ids": [ "1748580" ], "name": "Thomas E. Anderson" } ], "doi": "10.1145/3098822.3098849", "doiUrl": "https://doi.org/10.1145/3098822.3098849", "entities": [ "Cloud computing", "Data center", "Network congestion", "Network packet", "Recommender system", "Telephone exchange" ], "id": "3d14f8d1d085e3bf513ca205a39ae44256e78c33", "inCitations": [ "63efcd0695d3de798e2743739c8b6a32a568fb84", "a0660414bd091a3515fc4649d7d61b3de5950bbd", "c6b58a571ce2931f5a8a02c29f2e4570e9f72d4f" ], "journalName": "", "journalPages": "362-375", "journalVolume": "", "outCitations": [ "3b988049dd8f62f772281e90196bbd793700c86b", "0b834b113793154a918f3ef11cd6bf60be77259e", "2d274fd896268c6e5ffbe4d9ec7dfbe7f0956b7a", "07367703f587dbc3313cc613289c4330cebe5c8c", "08b12c65fa237ed030954efcdc42094f4bc7ecf1", "39300a6bb64f813bd233343b840cb169d8d0527f", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "559e4671b87c3f76d3c485ebdaefe734323879f0", "9289860d43896b2d174a136eb56f03bb1b05e8d9", "46eba995c5371d7966d59549f61c203cecd1d3c7", "c20d8cdb35c495c4e4e2571d709396e92fb94514", "025652412d507a8cf98ecacd8a44d32ce28995e1", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "a05548af9f54a7cd57a5c3f2d51b9e76f559f04a", "23dadf25f3efacbc9c66f69093d656ad5b003529", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "0ea5ac1eb04bcf16a8856d886be45ec90044a4c3", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "122229239aeba1eb4f1623adb40f1845c582a520", "25ded9f81378f6b85daf5a70c85bbadfb84ebc3d", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "08ccc42650ca6c449bda88484eab4e847990eb85", "1643e122653b255d267763b1bc17fbb4346e10ce", "58f692e9b03cb973355aab46bb6f867239aeb513", "3b50af00a37195726ede36cb080e3f44bb3a3f08", "0f35b3fd2ef4638a23ee07db4057cc78365c982a", "2fa9756c55473f2a667b608394d89269535bca84", "077b23a64c80039a9d36da0cab766262edc89af2" ], "paperAbstract": "We take a comprehensive look at packet corruption in data center networks, which leads to packet losses and application performance degradation. By studying 350K links across 15 production data centers, we find that the extent of corruption losses is significant and that its characteristics differ markedly from congestion losses. Corruption impacts fewer links than congestion, but imposes a heavier loss rate; and unlike congestion, corruption rate on a link is stable over time and is not correlated with its utilization.\n Based on these observations, we developed CorrOpt, a system to mitigate corruption. To minimize corruption losses, it intelligently selects which corrupting links can be safely disabled, while ensuring that each top-of-rack switch has a minimum number of paths to reach other switches. CorrOpt also recommends specific actions (e.g., replace cables, clean connectors) to repair disabled links, based on our analysis of common symptoms of different root causes of corruption. Our recommendation engine has been deployed in over seventy data centers of a large cloud provider. Our analysis shows that, compared to current state of the art, CorrOpt can reduce corruption losses by three to six orders of magnitude and improve repair accuracy by 60%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098849", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/CorrOpt_SIGCOMM2017.pdf", "https://homes.cs.washington.edu/~arvind/papers/corropt.pdf", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-9-1-CorrOpt.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d14f8d1d085e3bf513ca205a39ae44256e78c33", "sources": [ "DBLP" ], "title": "Understanding and Mitigating Packet Corruption in Data Center Networks", "venue": "SIGCOMM", "year": 2017 }, "3d1870f7756b05d1ca7aab0eadf56fdd06af1f12": { "authors": [ { "ids": [ "23303542" ], "name": "Francesco Pace" }, { "ids": [ "2271649" ], "name": "Daniele Venzano" }, { "ids": [ "1721979" ], "name": "Damiano Carra" }, { "ids": [ "1715270" ], "name": "Pietro Michiardi" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Baseline (configuration management)", "Heuristic", "High- and low-level", "Little Big Adventure", "Responsiveness", "Scheduling (computing)", "Simulation", "Tracing (software)" ], "id": "3d1870f7756b05d1ca7aab0eadf56fdd06af1f12", "inCitations": [ "44af1c2f37b3ec6f4d1ca99375b122871488cf69", "0b53cea748bdb5a404ed6999c23eb097622f0f08" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "100-109", "journalVolume": "", "outCitations": [ "6734d1e0faddcfb710d389856a5a7dabfb16d4a9", "27977ec1208ec8e830d4e30322cd5929440c67f8", "0998bc7d524a915acc9ecf31950884d4751729e4", "00a98bc28e62f5f9e258f17b2b205c4fa5fb20a2", "332f77fd05703c1607e3b57884ad31fb1fad0104", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "4581948531998d5e5f23c131081ea0cdd9066bfe", "d5a537632e7a40af2d20001984e6b80862f30f34", "027bd50767a7f61fb0fc3c27051a63b209c10a99", "0541d5338adc48276b3b8cd3a141d799e2d40150", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "0558c94a094158ecd64f0d5014d3d9668054fb97", "91509396b1f7f250a777cbf8db17149bb58f2cb2", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "4a8a46173188b44467a967272f17254092c690c5", "dc69b14f1dfaa0d4d4949268bfb8a2e20135036e", "43f233a105d2bda1bea23c2f37956d5ca17d8f0c", "3e257f01e3ee71545d824a1615c35659525b856a", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "0608d9937c074520cdc93cc444cc1c77039c5332", "57c2192c290fd944d6623853b695a255d06b28f8", "855bdf107bfac7780cefbb512bf16f7b5c1cb2d1", "bafa7e2d586e7bfe77d9a55ac1cff4eb2f6ff292", "3a043714354fe498752b45e4cf429dbae0fb2558", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "3784b73a1f392160523400ec0309191c0a96d86f", "11b12a29a9efb60a892b48fc61e70ab63e59b37e" ], "paperAbstract": "This work addresses the problem of scheduling user-defined analytic applications, which we define as high-level compositions of frameworks, their components, and the logic necessary to carry out work. The key idea in our application definition, is to distinguish classes of components, including rigid and elastic types: the first being required for an application to make progress, the latter contributing to reduced execution times. We show that the problem of scheduling such applications poses new challenges, which existing approaches address inefficiently. Thus, we present the design and evaluation of a novel, flexible heuristic to schedule analytic applications, that aims at high system responsiveness, by allocating resources efficiently. Our algorithm is evaluated using trace-driven simulations, with large-scale real system traces: our flexible scheduler outperforms a baseline approach across a variety of metrics, including application turnaround times, and resource allocation efficiency. We also present the design and evaluation of a full-fledged system, which we have called Zoe, that incorporates the ideas presented in this paper, and report concrete improvements in terms of efficiency and performance, with respect to prior generations of our system.", "pdfUrls": [ "http://www.eurecom.fr/en/publication/5164/download/data-publi-5164.pdf", "http://arxiv.org/abs/1611.09528", "http://dl.acm.org/citation.cfm?id=3101126", "http://www.eurecom.fr/fr/publication/5083/download/data-publi-5083.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d1870f7756b05d1ca7aab0eadf56fdd06af1f12", "sources": [ "DBLP" ], "title": "Flexible Scheduling of Distributed Analytic Applications", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "3d3331991fcce7d7889e590e20e48aa4826dcd73": { "authors": [ { "ids": [ "40600553" ], "name": "Ismail Akturk" }, { "ids": [ "1695310" ], "name": "Ulya R. Karpuzcu" } ], "doi": "10.1145/3037697.3037741", "doiUrl": "https://doi.org/10.1145/3037697.3037741", "entities": [ "Computation", "Computer data storage", "Memory hierarchy" ], "id": "3d3331991fcce7d7889e590e20e48aa4826dcd73", "inCitations": [], "journalName": "", "journalPages": "811-824", "journalVolume": "", "outCitations": [ "0f892fa9574f24bc7b50fed94e0abbd84883c2dc", "d2ca94b978834e8a08ae2933ed3ca060458ab0ba", "09c6f94f84784fed6d222b3d7b31977eda214d63" ], "paperAbstract": "Due to imbalances in technology scaling, the energy consumption of data storage and communication by far exceeds the energy consumption of actual data production, i.e., computation. As a consequence, recomputing data can become more energy efficient than storing and retrieving precomputed data. At the same time, recomputation can relax the pressure on the memory hierarchy and the communication bandwidth. This study hence assesses the energy efficiency prospects of trading computation for communication. We introduce an illustrative proof-of-concept design, identify practical limitations, and provide design guidelines.", "pdfUrls": [ "http://www.cs.utah.edu/asplos14/files/amnesiac.pdf", "http://doi.acm.org/10.1145/3037697.3037741", "http://altai.ece.umn.edu/Publications_files/amnesiac_errata.pdf", "https://www.cs.utah.edu/asplos14/files/amnesiac.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d3331991fcce7d7889e590e20e48aa4826dcd73", "sources": [ "DBLP" ], "title": "AMNESIAC: Amnesic Automatic Computer", "venue": "ASPLOS", "year": 2017 }, "3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e": { "authors": [ { "ids": [ "3108945" ], "name": "Haoyu Zhang" }, { "ids": [ "2849491" ], "name": "Ganesh Ananthanarayanan" }, { "ids": [ "1775084" ], "name": "Peter Bod\u00edk" }, { "ids": [ "3041721" ], "name": "Matthai Philipose" }, { "ids": [ "2292948" ], "name": "Paramvir Bahl" }, { "ids": [ "3122063" ], "name": "Michael J. Freedman" } ], "doi": "", "doiUrl": "", "entities": [ "Approximation", "Microsoft Azure", "Real-time computing", "Scheduling (computing)", "Smart city", "Streaming media", "Video content analysis" ], "id": "3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e", "inCitations": [ "cbbf860f8065a3e1bd72a07d4cbac5f798065ca1", "66dd732e588bac4580342da21302c36270d615c5", "193342874858249aed4796cee35a8bec1b70e236", "94d96a4e255519b75b2d1f8aaa322aba1cb77822", "1775944e16cbafc703fd62793fa5e93ca2c9a7a1", "5715e1e106dae3a49a40314e3b05c142d38da6d9", "36a64cb68a3da37ed9d54f03750e1f1ac6d3d336", "0c9bccd940a26e5074165b2ce082db9c4eac0fd8", "391a6a423e06b0767e9fc9df4f43c5533c0ab662", "081fdeea36d4b56a71e87b5b0de191aa368261c8" ], "journalName": "", "journalPages": "377-392", "journalVolume": "", "outCitations": [ "6b3d607cf34655e5c5b8f0eb77bc63bafc9ce23b", "3a043714354fe498752b45e4cf429dbae0fb2558", "679cfc41aa5f0174040b5ab23ea92cf04f495a6e", "d8057d514036d51051af78476468fe350cb7488a", "4b65024cd376067156a5ac967899a7748fa31f6f", "477653f4cb3c213d1d4252cfc7a185e7785b635c", "0dddc9f39c6083de14e3a405eee27a4cd5463a9c", "23ebd0c6297a00659831207f0e353b4fc4a0c3f8", "4af5f1cecc3d41fc2cec5e99b918468ed02a7bf8", "184014795c3c2bbf23f3959f6d8b1ab8bc03aea8", "53d1d70eac1dbd76a0baea4a6d5f181dd56cb972", "563debb9959f0e1742bb83d38f03383b611ae56d", "a27c3f0a249dc122104b937c5783f83b3585bb53", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "0707d61295b35d9db46ee368e79c59dd08c7c82c", "88a44593454773d887fe9b51b20db66ef5d3956b", "2ce63d77eecc35faef85a3b752a314c93a077ac9", "198a818721809dcf9b3aebc23c568f8db92918db", "bf241e2c30349e81df70053b9cd2d8fd708cd13a", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "561b0881fb83c7182bca4aec70bd287ea0f5be28", "453fded1ab86ed9d9d90ff3ff83a4faa493d3db2", "685f1e1a88f299704591266f2ee9abe29afdd124", "38211dc39e41273c0007889202c69f841e02248a", "43776b15c034076a36b7143d58af8e04715e41d0", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "aa936077447217db8970ec799ccfeb09b2dd03f0", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "9d6108ffaf98474b9f5082eacfeaea753d23c54f", "b6571efa4483aa00d23bbcd36930c4877548ba38", "9f1f065bf08cd90431cc051267a708f56436cd82", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "1bd012ce49c66ab91d144e15fd9bfa9fac53d78b", "fa6d3585158f11d3b8ce16367b9894c80cdb53ac", "8af01e6cb7375ff671ed6efd8576253ab6e12d04", "20926778c2725c7ed99efb124abba6e73c26c0fd", "d445f483a8f2f6a31c75fc00d43b02dbac083f21", "ae0eec4ab8f4fc75c776b7b8284f86907cf8654c", "c3c262b8e56536d14826926b69af59eaefc29bc2", "47f5bba54710b0e1663e9336790cb4609d16077d", "4783c303f45d78323e1206c962ce6fcea57e724d", "eb861220e6ae74235e00e0ca7c036ed356141590", "6067366349bb22b5e04ab4d820eb203cf41ef7e0", "163bcebf0de69c18fe6542d6b7058395a30ac037", "4b85560ce4467806cd0a838f5eb478ece0e05eef", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "22899cc07f2b2ae632ac58b2ec58fbe239f3fbd3", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "6a912e2c1f818a047bc620f475b6b6e3b0dbacfe", "00a9d4c95eae5a382d36068b60ec4ccf4811fa75", "3d7730154a2b0f31e37863a68d89059df3acb416", "37e882e696162cbce0d2a4f4f022162a418a58dd", "0b05075bd235ce4f0f0e715c486ab9a874f4f3fe", "519d2a6b454f3d0f258ae8d9d266df7ef622a955", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "061356704ec86334dbbc073985375fe13cd39088", "411eb6534d39a37ed43443ba1d2e168c73171330", "59ab46bfd59cb43876e701389f256b93430e6273", "2a7d3b967a356c2a42f729048b0d3511b0005351", "0d868efa67bf06b1f784d60769c082fd9a58893e", "c25381cb6eb72d16748ab98a126f97826df686a6", "1c7220880cc20fe1cb6e0a40134c298134651192", "044131dbc2267712d20a5708b30c5db77e2b34dc", "48dcef999ef41e839bf66386c0c0a54c13be1fcf", "08f13e484e7e51831ec13076d14570ced91a50fb" ], "paperAbstract": "Video cameras are pervasively deployed for security and smart city scenarios, with millions of them in large cities worldwide. Achieving the potential of these cameras requires efficiently analyzing the live videos in realtime. We describe VideoStorm, a video analytics system that processes thousands of video analytics queries on live video streams over large clusters. Given the high costs of vision processing, resource management is crucial. We consider two key characteristics of video analytics: resource-quality tradeoff with multi-dimensional configurations, and variety in quality and lag goals. VideoStorm\u2019s offline profiler generates query resourcequality profile, while its online scheduler allocates resources to queries to maximize performance on quality and lag, in contrast to the commonly used fair sharing of resources in clusters. Deployment on an Azure cluster of 101 machines shows improvement by as much as 80% in quality of real-world queries and 7\u00d7 better lag, processing video from operational traffic cameras.", "pdfUrls": [ "http://www.cs.princeton.edu/~mfreed/docs/videostorm-nsdi17.pdf", "http://www.cs.princeton.edu/~haoyuz/publications/videostorm-slides.pdf", "http://www.cs.princeton.edu/~haoyuz/publications/videostorm-nsdi17.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/zhang", "http://www.cs.cornell.edu/courses/cs6453/2017sp/slides/approxvideo.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/02/videostorm_nsdi17.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-zhang.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-zhang.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_zhang_haoyu.pdf", "https://www.systems.ethz.ch/sites/default/files/hadp2017-benjamin_rothenberger.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4aa4/3a0a70a39efdc37fc7218747a9806fe8eeb6.pdf", "s2Url": "https://semanticscholar.org/paper/3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e", "sources": [ "DBLP" ], "title": "Live Video Analytics at Scale with Approximation and Delay-Tolerance", "venue": "NSDI", "year": 2017 }, "3d4ea30bf71e100945ad0b431e5f539509be8d2d": { "authors": [ { "ids": [ "1693997" ], "name": "Liang Wang" }, { "ids": [ "34739391" ], "name": "Paul Grubbs" }, { "ids": [ "4683204" ], "name": "Jiahui Lu" }, { "ids": [ "3094927" ], "name": "Vincent Bindschaedler" }, { "ids": [ "40203106" ], "name": "David Cash" }, { "ids": [ "1707461" ], "name": "Thomas Ristenpart" } ], "doi": "10.1109/SP.2017.50", "doiUrl": "https://doi.org/10.1109/SP.2017.50", "entities": [ "Controlled vocabulary", "Document", "Document retrieval", "Elasticsearch", "Experiment", "Multitenancy", "Side-channel attack", "Solr", "Tf\u2013idf", "Web search engine", "Web search query" ], "id": "3d4ea30bf71e100945ad0b431e5f539509be8d2d", "inCitations": [ "fe1c81c00d516ef38da1a429d721e1d05cc488c0" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "673-692", "journalVolume": "", "outCitations": [ "2fc84ea4ffbee661ce90c5804101887abe8268a8", "c38c8e3567dd01792fef967a8c3b56ba5b9b2c6d", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "3501c7bb6342524e74708a1c6642cfdd6963a465", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "040678daf6a49a88345ee0c680fccfd134f24d4b", "05466cfe18824fd3886d38c9ceac13f75fd86289", "8372016fe38121358163c20f88e28fc0267e30b1", "d38fa038f2e37e25d7e7546d4268a7f356bce70e", "2abe6b9ea1b13653b7384e9c8ef14b0d87e20cfc", "260014722dbca049cbef9f60ee10ace6c7c75ea2", "187467e789413f7dbaf9c66efebcffeeec0b9923", "652b955ded01429125d0f6f1c668d10ef3433dd8", "2bdf7087d706ec519c2d9ff9517d44c4dcde3768", "4d624b942a58818f8d425460638cb4b65ed84e1c", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "64c22174a3a43c1224c88ba37eae6421a0086730", "656e782fe23364e36a49aeef2d8a74126a38ea04", "639351f84d03f671b30e2f4780f28b84664d6f64" ], "paperAbstract": "Full-text search systems, such as Elasticsearch and Apache Solr, enable document retrieval based on keyword queries. In many deployments these systems are multi-tenant, meaning distinct users' documents reside in, and their queries are answered by, one or more shared search indexes. Large deployments may use hundreds of indexes across which user documents are randomly assigned. The results of a search query are filtered to remove documents to which a client should not have access. We show the existence of exploitable side channels in modern multi-tenant search. The starting point for our attacks is a decade-old observation that the TF-IDF scores used to rank search results can potentially leak information about other users' documents. To the best of our knowledge, no attacks have been shown that exploit this side channel in practice, and constructing a working side channel requires overcoming numerous challenges in real deployments. We nevertheless develop a new attack, called STRESS (Search Text RElevance Score Side channel), and in so doing show how an attacker can map out the number of indexes used by a service, obtain placement of a document within each index, and then exploit co-tenancy with all other users to (1) discover the terms in other tenants' documents or (2) determine the number of documents (belonging to other tenants) that contain a term of interest. In controlled experiments, we demonstrate the attacks on popular services such as GitHub and Xen.do. We conclude with a discussion of countermeasures.", "pdfUrls": [ "https://www.ieee-security.org/TC/SP2017/papers/449.pdf", "https://doi.org/10.1109/SP.2017.50", "http://www.research.cs.rutgers.edu/~dc789/sp17stress.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d4ea30bf71e100945ad0b431e5f539509be8d2d", "sources": [ "DBLP" ], "title": "Side-Channel Attacks on Shared Search Indexes", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "3d5fa0ff4a34d8e6dcab5f49a4064cb676bfcb0c": { "authors": [ { "ids": [ "1793192" ], "name": "Qun Huang" }, { "ids": [ "1785303" ], "name": "Xin Jin" }, { "ids": [ "33431705" ], "name": "Patrick P. C. Lee" }, { "ids": [ "1835783" ], "name": "Runhui Li" }, { "ids": [ "3852589" ], "name": "Lu Tang" }, { "ids": [ "1687666" ], "name": "Yi-Chao Chen" }, { "ids": [ "17294285" ], "name": "Gong Zhang" } ], "doi": "10.1145/3098822.3098831", "doiUrl": "https://doi.org/10.1145/3098822.3098831", "entities": [ "Compressed sensing", "Experiment", "Fast path", "Forwarding plane", "Open vSwitch", "Overhead (computing)", "Testbed", "Throughput" ], "id": "3d5fa0ff4a34d8e6dcab5f49a4064cb676bfcb0c", "inCitations": [ "88b46e17199bfaa4cf65498bcaeced5284279b97", "3549a20b8ba853ac529e8737d4ea54992c163efd" ], "journalName": "", "journalPages": "113-126", "journalVolume": "", "outCitations": [ "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "051179ff84ec9ef8ed37c00f111f76784e61a6a1", "7a278ee0578f194700cadc3811cdda4ec751f88a", "4a445eafd8d432adae9f8123a60416d39749ece8", "1c74f84dcfaaa317a82708ad30f395a893dbb9c6", "5f28bf666498d5800e015f12318930ce03cd5587", "77aac7f986134593b711416d60691f9e6862643a", "13400fdff75fc0b73652fcdab59e52814022b9b0", "5ca2ff0d86302bc9ea69007ecdb490af37eee157", "1a5bff15fdffa1205c0b1e6aa101158915dd8e87", "7efe0dc4cde074bd87089491a6f95dde84397cca", "0dd16e993f715a0c8b8d992d5c6ec1fd5d54eda0", "3f5dc20732d3af093fec0fe2806d3c63d0652682", "27f4001214ce0d449eb05d33626f444526accc7c", "740437f83a1ee1f9326ad68349fdd50431bcc702", "e4c5a8575a2576c4b9a6df65af6b7d5e657373ac", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "0f318aa5af40450af9ba2f50872bdf26741e510a", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "53dd59981ee7aa4cbf66ba737d50f5076a148414", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "2e4ab1140b454fc6dacf4d23d3663aa34c741577", "77f1fee2ef313f7830c0ff39d6f525c877a624cf", "64e7d37666ea9f172cc63cbef84d1d123d6e95e5", "049504df22c77010e5bb62a2088f70fabc5ecb6d", "0849e21a444d4a3bbea735a788628bea5543f900", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "b36c153be410c0d937d7583de557c0375506d15a", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "3318f54d21edb825cb223e2fd88754c61b362e4c", "8112c4305b88d85199267e9e03d3a0aca4432059", "044ce2a427c65d53f3d8279339b8eb6f020121c7", "0cfe1b85e5f1d56e41a95c3b2fa274e9fe8b45d0", "b670caf8a78f2a797da0ddc691093223fa17227d", "933c9c7d3e4073edb0963646545ee79ed915369f", "0f35b3fd2ef4638a23ee07db4057cc78365c982a", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "13bf79b773cc84590d3efeb88187f2675dea4b81", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "0026eee31421ce11c665c8a5de319f1f492f4060", "0ce8a32702fd734d6547a1cbb3ba5d3effd79932", "005aea80a403da18f95fcb9944236a976d83580e", "4004e51f8f6bb775bd394942007f761d42fdaaad", "718492ffcd94939bb092418ea126e942d22d8ecb", "3b406e5bb5740b660355613f42ace67ad2d2126e", "401b598ffcec2b7ccb11ac3f2bf8da83c2ac542b", "0f77e06ba24387bc351d84de95703f19212b2ea7", "e3497952347101a3535434bc35d378224cf87bcc", "ff17f3e5cb6398cbd3da2c87c887a6c6de0468e9", "0f48c8c449b82647e98383d124578d95cc57e95d" ], "paperAbstract": "Network measurement remains a missing piece in today's software packet processing platforms. Sketches provide a promising building block for filling this void by monitoring every packet with fixed-size memory and bounded errors. However, our analysis shows that existing sketch-based measurement solutions suffer from severe performance drops under high traffic load. Although sketches are efficiently designed, applying them in network measurement inevitably incurs heavy computational overhead.\n We present SketchVisor, a robust network measurement framework for software packet processing. It augments sketch-based measurement in the data plane with a fast path, which is activated under high traffic load to provide high-performance local measurement with slight accuracy degradations. It further recovers accurate network-wide measurement results via compressive sensing. We have built a SketchVisor prototype on top of Open vSwitch. Extensive testbed experiments show that SketchVisor achieves high throughput and high accuracy for a wide range of network measurement tasks and microbenchmarks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098831", "https://classes.cs.uoregon.edu/17F/cis607netsem/slides/Lumin1.pdf", "http://www.cse.cuhk.edu.hk/~pclee/www/pubs/sigcomm17.pdf", "https://www.cs.jhu.edu/~xinjin/files/SIGCOMM17_SketchVisor.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d5fa0ff4a34d8e6dcab5f49a4064cb676bfcb0c", "sources": [ "DBLP" ], "title": "SketchVisor: Robust Network Measurement for Software Packet Processing", "venue": "SIGCOMM", "year": 2017 }, "3d75acef1b01ace103e46e0444c1c286a248a9bd": { "authors": [ { "ids": [ "2780982" ], "name": "Raoufehsadat Hashemian" }, { "ids": [ "1739691" ], "name": "Niklas Carlsson" }, { "ids": [ "1909050" ], "name": "Diwakar Krishnamurthy" }, { "ids": [ "2185810" ], "name": "Martin F. Arlitt" } ], "doi": "10.1145/3030207.3030225", "doiUrl": "https://doi.org/10.1145/3030207.3030225", "entities": [ "Benchmark (computing)", "Experiment", "Kullback\u2013Leibler divergence", "Simulation", "Software system", "Spatial variability", "Synthetic data" ], "id": "3d75acef1b01ace103e46e0444c1c286a248a9bd", "inCitations": [ "7c096c79fa4932c283cbc86732ef9cf2d724097e" ], "journalName": "", "journalPages": "143-154", "journalVolume": "", "outCitations": [ "a99d67541e57ec1f2a8ecaf78ce10857e3afe852", "6e8e17ce99c3a12ec713bfed2b3cc02a778647cc", "81b0a75a0d29b2fd2c035ff6eae70c90b338bcd1", "a613b4125fecd2309bedbc46bc152e22c7d5e962", "6a7ad7b2c7beb50b237cd118c7a9aecb4e31668f", "3237988284481bcd75894f9cb4f4d43b6aa4b561", "22a8f899e13f62bf28629b273466c4bf3ae40faf", "aa64660fa3ae07af7ef8813ec6ba928179ec5b4f", "3beea89f916647bac6554e4851568eb97090c66b", "3a628e5f41388e9a690fc4d79ec0b3208d726a31", "02544882276ff1a35f4b6f1a8504a972b8df4087", "2d9e3524c8ec306a6eaac43d255ea36404ba580e" ], "paperAbstract": "Benchmarking is a widely-used technique to quantify the performance of software systems. However, the design and implementation of a benchmarking study can face several challenges. In particular, the time required to perform a benchmarking study can quickly spiral out of control, owing to the number of distinct variables to systematically examine. In this paper, we propose IRIS, an IteRative and Intelligent Experiment Selection methodology, to maximize the information gain while minimizing the duration of the benchmarking process. IRIS selects the region to place the next experiment point based on the variability of both dependent, i.e., response, and independent variables in that region. It aims to identify a performance function that minimizes the response variable prediction error for a constant and limited experimentation budget. We evaluate IRIS for a wide selection of experimental, simulated and synthetic systems with one, two and three independent variables. Considering a limited experimentation budget, the results show IRIS is able to reduce the performance function prediction error up to 4.3 times compared to equal distance experiment point selection. Moreover, we show that the error reduction can further improve through system-specific parameter tuning. Analysis of the error distributions obtained with IRIS reveals that the technique is particularly effective in regions where the response variable is sensitive to changes in the independent variables.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030225", "http://liu.diva-portal.org/smash/get/diva2:1141629/FULLTEXT01.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d75acef1b01ace103e46e0444c1c286a248a9bd", "sources": [ "DBLP" ], "title": "IRIS: Iterative and Intelligent Experiment Selection", "venue": "ICPE", "year": 2017 }, "3d856550c3d4b4cd7b181af70e6a49e3df4a846d": { "authors": [ { "ids": [ "15509874" ], "name": "Pritom Ahmed" }, { "ids": [ "34690612" ], "name": "Mahbub Hasan" }, { "ids": [ "2466261" ], "name": "Abhijith Kashyap" }, { "ids": [ "1754970" ], "name": "Vagelis Hristidis" }, { "ids": [ "1761528" ], "name": "Vassilis J. Tsotras" } ], "doi": "10.1145/3035918.3064032", "doiUrl": "https://doi.org/10.1145/3035918.3064032", "entities": [ "Algorithm", "Baseline (configuration management)", "Computation", "Experiment", "Geolocation", "Geotagging", "R+ tree", "R-tree", "Range query (data structures)", "Region of interest", "Requirement", "Search algorithm", "Social network", "Streaming media" ], "id": "3d856550c3d4b4cd7b181af70e6a49e3df4a846d", "inCitations": [], "journalName": "", "journalPages": "1227-1241", "journalVolume": "", "outCitations": [ "2220feec76a17e509a58abf8c742ea9b7866a99e", "8360d6bca6555a381567830d067256a33c59db1e", "723360b7d1cc434a93ec4e773191bdad987cb9fe", "4563f501931a5e69ab39cf11bea1e914cdcdc2ab", "f7d80c2248ac19581af2584eadafb8fd61f375a4", "23964c0620e8ad6d6d9c02a515b453d8471b71cd", "70b584ace45b89b5116249785e9fe97a43f70aa1", "dca4cab4bee5eb807d858312a00858230eb4b712", "02dcad6ad979b6b7f0ebf3e79afdd03dda2e939a", "60ea77daa1cd87b0e15e22d06edc641a8ae4bbb3", "90cd5bec75579eaf5d9bd99ba3c0af3aa22d7991", "5746d077d8298baf05eb0ddf9bd012e3a6dadecb", "36f47f205610bb3325bd40760a597541a9d8ba05", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "1bf5942e121c2e7ad1598b159066d62dd0dbe5d8", "d285e040ca14f16a017f386e258246323c135297", "3d6ca65b1b5cfa80bd71d1f1a7087f19783e74ab", "03672a0fb9e695ef896ec1c59f07a50acfff084c", "0f19200bfdf8159b1217b48e006640739b5fba86", "6baaf1b2dc375e21a8ca8e8d17e5dc9d7483f4e8", "9e9f6e08c824d1dd5bb493c3157fe8dac82a09d7", "623241d9f1148bd0446822e3496d055adebb9a18", "4bd9dfe53079f22ac56102464de199719d1a6ef3", "c82695ea72cac8cfe49582619fbd81ebac6692ce", "4599eaf5e8709e486ce08143b2521eaecae1705a", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930" ], "paperAbstract": "The wide availability of tracking devices has drastically increased the role of geolocation in social networks, resulting in new commercial applications; for example, marketers can identify current trending topics within a region of interest and focus their products accordingly. In this paper we study a basic analytics query on geotagged data, namely: given a spatiotemporal region, find the most frequent terms among the social posts in that region. While there has been prior work on keyword search on spatial data (find the objects nearest to the query point that contain the query keywords), and on group keyword search on spatial data (retrieving groups of objects), our problem is different in that it returns keywords and aggregated frequencies as output, instead of having the keyword as input. Moreover, we differ from works addressing the streamed version of this query in that we operate on large, disk resident data and we provide exact answers. We propose an index structure and algorithms to efficiently answer such top-k spatiotemporal range queries, which we refer as Top-k Frequent Spatiotemporal Terms (kFST) queries. Our index structure employs an R-tree augmented by top-k sorted term lists (STLs), where a key challenge is to balance the size of the index to achieve faster execution and smaller space requirements. We theoretically study and experimentally validate the ideal length of the stored term lists, and perform detailed experiments to evaluate the performance of the proposed methods compared to baselines on real datasets.", "pdfUrls": [ "http://www.cs.ucr.edu/~vagelis/publications/top-k%20spatial%20sigmod2017.pdf", "http://doi.acm.org/10.1145/3035918.3064032" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d856550c3d4b4cd7b181af70e6a49e3df4a846d", "sources": [ "DBLP" ], "title": "Efficient Computation of Top-k Frequent Terms over Spatio-temporal Ranges", "venue": "SIGMOD Conference", "year": 2017 }, "3d860e0e8240fca52f7b28f8b82c3ba0bb2868ae": { "authors": [ { "ids": [ "3124331" ], "name": "Jens Gustedt" }, { "ids": [ "1795494" ], "name": "Emmanuel Jeannot" }, { "ids": [ "2756384" ], "name": "Farouk Mansouri" } ], "doi": "10.1109/CLUSTER.2017.71", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.71", "entities": [ "Analysis of algorithms", "Cache (computing)", "Instruction pipelining", "Matrix multiplication", "Memory hierarchy", "OpenMP", "Runtime system", "Scalability", "Shared memory", "Video tracking" ], "id": "3d860e0e8240fca52f7b28f8b82c3ba0bb2868ae", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "389-399", "journalVolume": "", "outCitations": [ "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "e2040ba29ebab0e5ef48d9edb857378bc13b4f8c", "3245b047c145272e16b097ba21939c80ffead168", "917fc743f23a795cf86d65da9f20b3f67dbbb7dd", "087bc3160eafd51c513ea677159e34b111b4615e", "6bfc68c836d92d5ec8d9f5e71336e7809c70f367", "80d4a300e7b8bf77533a9be0ddd0de09470488bd", "d57212cbbc3cd44179b782256173149aaeb5b1b9", "bc681b0350757ee8aecc9703e44251e4c6ee56fd", "9ee8943866b9e7771f957dd5721128fe6afedbf5", "927808939090d881a56fdbec28e7f8341f996c4f", "7421d28428e041c271fe6370c331353f4a3fa974", "10b71b1e95db11c0ad0429a1d2c75811573106db", "7d76ba8c4f6776c645673e2c3f6eb88b1a0ca7aa", "688a0e25527122c65f983cc65be3196b75d6b66a" ], "paperAbstract": "Efficiently programming shared-memory machines is a difficult challenge because mapping application threads onto the memory hierarchy has a strong impact on the performance. However, optimizing such thread placement is difficult: architectures become increasingly complex and application behavior changes with implementations and input parameters, e.g problem size and number of threads. In this work, we propose a fully automatic, abstracted and portable affinity module. It produces and implements an optimized affinity strategy that combines knowledge about application characteristics and the platform topology. Implemented in theback-end of our runtime system (ORWL), our approach was used to enhance the performance and the scalability of several unmodified ORWL-coded applications: matrix multiplication, a 2D stencil (Livermore Kernel 23), and a video tracking real world application. On two SMP machines with quite different hardware characteristics, our tests show spectacular performance improvements for these unmodified application codes due to a dramatic decrease of cache misses and pipeline stalls. A comparison to reference implementations using OpenMP confirms this performance gain of almost one order of magnitude.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.71" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3d860e0e8240fca52f7b28f8b82c3ba0bb2868ae", "sources": [ "DBLP" ], "title": "Automatic, Abstracted and Portable Topology-Aware Thread Placement", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "3db6757e5c65e80dca77bc2a6cd9e742c229df9f": { "authors": [ { "ids": [ "10966012" ], "name": "Xiang Wang" }, { "ids": [ "7792071" ], "name": "Xiangnan He" }, { "ids": [ "1743245" ], "name": "Liqiang Nie" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080771", "doiUrl": "https://doi.org/10.1145/3077136.3080771", "entities": [ "E-commerce", "Experiment", "Interaction", "Rationality", "Recommender system", "Silk Road", "Social network" ], "id": "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "inCitations": [ "ee21dc39d96204720756e2618e8cdb20058890e8", "12b161b4256292a338fac99317465c295092d710", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "9f67c741738a101095400ed515fa98375ef3ed67", "5e3257540faa7bf220d0dda97085ceff18674f19", "081ad92ce0e71541646218f11061c86414a960c2", "446f7fc5e46def1ae860b341257f09d6cb0e5967", "6cd4697795f9c990c7bd3c867a442abee2858abd", "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "22873d98ff3f7b3b5490f3982c3fe0c0c5d665c7", "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "7a8fd8670b928b474c4dfeb1dc1898d08d545025", "3c21d2985d4d1b3c140b991c59e936682719b197", "26f753a7d8304922dff1f1b52f8f5fc30451497a" ], "journalName": "", "journalPages": "185-194", "journalVolume": "", "outCitations": [ "794f63b0ddbd78156913272a7f4275366bbd24e4", "71423bb17133402965a5cbaf31fa28b0366149fd", "54736f0e0489f1022efe7b0c680ce04f59b3c525", "2eb32b1a4c5bf741632a9fd5f852253fd0d53def", "516db58d63486de94d7c48b498ea39a4aef43cae", "52c0876b25a5721c4c6930d94d5308f0779734ec", "760948698540118031e590fbc884fcea209f9104", "50d53cc562225549457cbc782546bfbe1ac6f0cf", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "d5a55a548fc7cd703c1dd8d867ca1eb6b0c0764c", "44eafda243dad122f4cc4e378e71bce2402685cd", "2d9b6147fab8096ac2ff7f58cfcb3d4e94bdc8fb", "e50f4d3316d13841c287dcdf5479d7820d593571", "34f25a8704614163c4095b3ee2fc969b60de4698", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "5720a5015ca67400fadd0ff6863519f4b030e731", "61181e71ca1b899b5fdaaac24daac2463b3e6c96", "7894683e9f0108245d43c3de91a3426e52e0d27f", "dbe9d04bffb5c1df8eb721dab4f744ea81d9a4c1", "1e7d7f76b3a7b494122f40c22487e60a51a2d1be", "878a34fd9e1af1f1371ffbd897dbe8c3c54fc85d", "26b99024682a897888428f727805967b032d0a54", "2607f0093fecd4fee5244d56fcf3f53ff22e949e", "84ad75dd2bb8aef4b70c5f7db6ff947767c30264", "0988e0c825d376c32c7f1fd2393102251e514ba4", "26f753a7d8304922dff1f1b52f8f5fc30451497a", "282f8120366629829149a0ce2990c3442ba28088", "2ef7d506b25731d0f3ec0c8f90b718b6e5bbd069", "fc8d2643020c6208cf99778744e07fe01626316c", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "01fcae344d2edb715bcc63a40b6052c0331741bd", "18ae92c03e60def0a35bc5c31abed188dfaa6e6c", "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "787d56ec5569f1054f490dcf9a9fb4b87b7990e8", "1d9b302a5a004e279b984f35d01190cb59658c50", "9285f3c70c6cab770d2f7d4ccb32ecae0397b134" ], "paperAbstract": "Online platforms can be divided into information-oriented and social-oriented domains. The former refers to forums or E-commerce sites that emphasize user-item interactions, like Trip.com and Amazon; whereas the latter refers to social networking services (SNSs) that have rich user-user connections, such as Facebook and Twitter. Despite their heterogeneity, these two domains can be bridged by a few overlapping users, dubbed as bridge users. In this work, we address the problem of cross-domain social recommendation, i.e., recommending relevant items of information domains to potential users of social networks. To our knowledge, this is a new problem that has rarely been studied before.\n Existing cross-domain recommender systems are unsuitable for this task since they have either focused on homogeneous information domains or assumed that users are fully overlapped. Towards this end, we present a novel Neural Social Collaborative Ranking (NSCR) approach, which seamlessly sews up the user-item interactions in information domains and user-user connections in SNSs. In the information domain part, the attributes of users and items are leveraged to strengthen the embedding learning of users and items. In the SNS part, the embeddings of bridge users are propagated to learn the embeddings of other non-bridge users. Extensive experiments on two real-world datasets demonstrate the effectiveness and rationality of our NSCR method.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~xiangnan/papers/sigir17-SilkRoad.pdf", "https://arxiv.org/pdf/1706.03205v1.pdf", "http://doi.acm.org/10.1145/3077136.3080771", "http://www.comp.nus.edu.sg/~xiangnan/papers/sigir17-SilkRoad-slides.pdf", "http://arxiv.org/abs/1706.03205" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "sources": [ "DBLP" ], "title": "Item Silk Road: Recommending Items from Information Domains to Social Users", "venue": "SIGIR", "year": 2017 }, "3db98850ad40dd5d778e8045f145f8dcd540131d": { "authors": [ { "ids": [ "2955048" ], "name": "Pansy Arafa" }, { "ids": [ "1722017" ], "name": "Guy Martin Tchamgoue" }, { "ids": [ "2377300" ], "name": "Hany Kashif" }, { "ids": [ "1733430" ], "name": "Sebastian Fischmeister" } ], "doi": "10.1109/MASCOTS.2017.19", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.19", "entities": [ "Database", "Debugging", "Experiment", "Hypertext Transfer Protocol", "Internet bottleneck", "MySQL", "NetBSD Gzip / FreeBSD Gzip", "Perl DBI", "Quality of service", "Redis", "Requirement", "Run time (program lifecycle phase)", "Server (computing)", "User experience", "Web server" ], "id": "3db98850ad40dd5d778e8045f145f8dcd540131d", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "132-142", "journalVolume": "", "outCitations": [ "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "0557beb82bd471232c4fe2c06de9a05edb58d665", "376b28d33bd444f178fd75cd185611c923572df9", "bd9d8d8fb8a9e3e98aa52cc86bc5ffd937ce281d", "7eaedbcd1010f11929104c8e3d63de2122ef04a5", "4ccc46d4c81f7d0d390f833361b6ecbeb2b249d8", "33d7a5866dcd8044df675d606f230982b644ced8", "2b598fa1fa6dcc9020fe73805294e4b03c2f9116", "77b3b55f0fbd03f18e939e6058aee793f9619f93", "2ae885cc822b0a61f5a8a22124e61e61789cb5cd", "0ce54aa0e4fb7363ad3fe87a7e66033c4fed104b", "0861850312f23072269884445161c4d67bc85a89", "8b8d9dbe3e755cbbab950b6133b1cc11d8e08943", "ed8aee4bece261310e9a62181d0669d167e8259c", "6e82b684c5b72ed2017c1afe93e7cba40127fa15", "51356ed1bf619eb4ccf74d4bc59e962fabd558cd", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "8d3d42706198efef0e0987c570bf4690a20334a1", "251996ed813cc6cd06bce2d58ead7e39e9a4bbca", "91607d7bc71823360de59b894ae37b4f1738bca0", "55cde409703468c3b7bb1797738affb5f44c8f3f", "85c02db93ffe61bed3887602f6b7bc9bf7a6c797", "0c0ff71e1f225312bd24a2d78153f0b3f3816285", "3f6eb56461bb589604a0aeefc355ce7ea3345280", "6ac43f486d48f280296b102685d9ab6709f31c06", "05e2da39b46ae0fa131c3c955f1fd8adbf6d9d3b", "99bd118f2504579694b6dbf02d3902e45c942b32", "37d8c3d11f75c11dedd7b35601c61a3723df3a91", "0bc4eb53c23c8c3c853e8eb0a07ab958e1f09d35", "bf0da9dadafe58af41801f7097d51c9442c79148", "326cdcbce0831d873ef41ad56e98eddfa6dff235", "0653e2ed9f683868cb4539eb8718551242834f6b", "0f9681ab16505cab01487ac39c990d258db95413" ], "paperAbstract": "Software systems with quality of service (QoS), such as database management systems and web servers, are ubiquitous. Such systems must meet strict performance requirements. Instrumentation is a useful technique for the analysis and debugging of QoS systems. Dynamic binary instrumentation (DBI) extracts runtime information to comprehend system's behavior and detect performance bottlenecks. However, existing DBI tools are intrusive; adding unacceptable delay to the program execution. Such delay alters the performance requirements and degrades the overall quality and the user experience of the system. Moreover, the delay may change the system behavior, thus, producing misleading run-time information.This paper presents QDIME, a QoS-aware dynamic binary instrumentation technique that respects system's performance requirements. QDIME takes a user-defined QoS threshold as an input and periodically gathers QoS feedback from the system under analysis to decide its instrumentation budget.We implemented QDIME on top of PIN, a popular DBI framework. We evaluated QDIME with Gzip, MySQL server, Apache HTTP server, and Redis. The experiments show that QDIME respects the user-defined QoS threshold and, thus, improves the performance of the monitored application by manifolds. QDIME is able to provide up to 100% instrumentation coverage with an average of 92% when compared to PIN. Moreover, QDIME reduces the slow-down factor of the instrumented application by 1.41, 5.67, and 10.26 folds for Sys-trace, Call-trace, and Branch-profile respectively. A release of QDIME is available for download at https://github.com/pansy-arafa/qdime.", "pdfUrls": [ "https://uwaterloo.ca/embedded-software-group/sites/ca.embedded-software-group/files/uploads/files/mascots-2017-qdime.pdf", "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3db98850ad40dd5d778e8045f145f8dcd540131d", "sources": [ "DBLP" ], "title": "QDIME: QoS-Aware Dynamic Binary Instrumentation", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "3dbec0fa0704329875802de070545c7774a78b3d": { "authors": [ { "ids": [ "6529278" ], "name": "Inho Cho" }, { "ids": [ "39702070" ], "name": "Keon Jang" }, { "ids": [ "1729324" ], "name": "Dongsu Han" } ], "doi": "10.1145/3098822.3098840", "doiUrl": "https://doi.org/10.1145/3098822.3098840", "entities": [ "Algorithm", "Convex hull", "Data center", "Data rate units", "End-to-end principle", "Experiment", "Explicit Congestion Notification", "Network congestion", "Network packet", "Network switch", "Requirement", "Scheduling (computing)", "Simulation", "Stock and flow", "Testbed" ], "id": "3dbec0fa0704329875802de070545c7774a78b3d", "inCitations": [ "17cdd01291815ce50715bbe19fe953737b7f8ce9", "01aa1362af63629e8d221ba1342bf632242d6136", "9bbd5be2829e49b1fac7f034baf7499cb069db95" ], "journalName": "", "journalPages": "239-252", "journalVolume": "", "outCitations": [ "3b988049dd8f62f772281e90196bbd793700c86b", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "2ed4585aafa46fa69d29b5149e9c6484d379d885", "663e064469ad91e6bda345d216504b4c868f537b", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe", "0161bcefcb9713554794156260ef99c71d0137a8", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "3d47531d2600de665d99944b52c30a4252784fec", "14c84514d25336223473290fe7c13ad66a68ef64", "507399bf041475bb6902dc6275bc41bb60d1ba1c", "07367703f587dbc3313cc613289c4330cebe5c8c", "1643e122653b255d267763b1bc17fbb4346e10ce", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "3ec219ec5f6a1fb2e02fa657d34314c2c48d6f15", "134a2c4316655de7cca9424d51850dbf0401363a", "42d1b52254873ecd0f36eb7342f95dbad9c50187", "1d9f31a31dcded90d2b5c7e6d357b4680f8dbbd4", "59939cbf3ed4cc6976ec8061da8750e7b41091b9", "d5b0aa2f37ce5344faddff286792878f7554ece4", "0f6f717d198ab1b99a63814facaf2fceace6b0fe", "0baf1bef6ee3bcb0b385a4ac303dcf0b406c64f4", "64d94ea08b4b25751983dc91a2f44d812e662e54", "0b7301fe4766447af960f9a2c06ccde042538e9c", "0b90433a2df3363d77edf97fd5e998da7c7660de", "84ee7fa441939aa53f3d070922f6479381a056ed", "230239fb61d7a6996ac9552706363323b34735f2", "058f6752d85a517aae298586fdf117acdd7560ea", "20400945c87f75acbad70f1f9ccfe94f556d2d02", "ac8f96be81c85422dcc335d7edeeea9fb17a3201", "426b5989c089ac3ba5c28ae339bbd51ad2439859", "980773ca869fc17562e4fbcf4202a8f21893b114", "2d4906884bc5309f1539195ff5b181d41a15ff60", "094aca6103f4079521e6a596d099ed37f7d2b498", "444d38795db716c05133fc8ab2269f8917684044", "3e6ef71e19d36886d6fdd233f4c223d1b3e2c2a8", "0541d5338adc48276b3b8cd3a141d799e2d40150", "7365135511b7510ac59c47725ab45ecb3e69f748", "4e4f4b3d04f1e1ec2b3a18318d9a42886b10ad25", "122229239aeba1eb4f1623adb40f1845c582a520", "853fc3b071cdc873b4e768ef1cf1b0459abfbad0" ], "paperAbstract": "Small RTTs (~tens of microseconds), bursty flow arrivals, and a large number of concurrent flows (thousands) in datacenters bring fundamental challenges to congestion control as they either force a flow to send at most one packet per RTT or induce a large queue build-up. The widespread use of shallow buffered switches also makes the problem more challenging with hosts generating many flows in bursts. In addition, as link speeds increase, algorithms that gradually probe for bandwidth take a long time to reach the fair-share. An ideal datacenter congestion control must provide 1) zero data loss, 2) fast convergence, 3) low buffer occupancy, and 4) high utilization. However, these requirements present conflicting goals.\n This paper presents a new radical approach, called ExpressPass, an end-to-end credit-scheduled, delay-bounded congestion control for datacenters. ExpressPass uses credit packets to control congestion even before sending data packets, which enables us to achieve bounded delay and fast convergence. It gracefully handles bursty flow arrivals. We implement ExpressPass using commodity switches and provide evaluations using testbed experiments and simulations. ExpressPass converges up to 80 times faster than DCTCP in 10 Gbps links, and the gap increases as link speeds become faster. It greatly improves performance under heavy incast workloads and significantly reduces the flow completion times, especially, for small and medium size flows compared to RCP, DCTCP, HULL, and DX under realistic workloads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098840", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-6-2-exppass.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3dbec0fa0704329875802de070545c7774a78b3d", "sources": [ "DBLP" ], "title": "Credit-Scheduled Delay-Bounded Congestion Control for Datacenters", "venue": "SIGCOMM", "year": 2017 }, "3dd92f999bea0d5fa2a679b0f6556e5911fe0a49": { "authors": [ { "ids": [ "2897748" ], "name": "Liangyue Li" }, { "ids": [ "8163721" ], "name": "Hanghang Tong" }, { "ids": [ "36528055" ], "name": "Yong Wang" }, { "ids": [ "40551931" ], "name": "Conglei Shi" }, { "ids": [ "1836083" ], "name": "Nan Cao" }, { "ids": [ "40459280" ], "name": "Norbou Buchler" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Autonomous system (Internet)", "Coordinate descent", "Crowdsourcing", "Evaluation", "Interdependence", "Name", "Nonlinear system", "Optimization problem", "Program optimization", "algorithm", "discipline", "teams" ], "id": "3dd92f999bea0d5fa2a679b0f6556e5911fe0a49", "inCitations": [ "189695f16c83b0a54d436c632353af2a0043cdad", "6aacbf58cdb0712812c9c1824603b97cd9ff17fb", "02179e5e0847d6df75e3a94d1d4fb40d73bb254b", "782b2fbc15b50c15f12baa126817466ec5e8251c" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0293d1907b999549eed95ce4ef1edcbb2c9a5bab", "9469c1e36d85e0ae612391e05604cff5f773dacc", "8acec511269abad173755e5b85e6a22e76c34095", "3241b953f6f772beea9e8bc87895c94a3fe33a6a", "21d667c5943ee32e96456533d2c1ce21fb9af0ba", "017618993f45a934f31078eb8e77a45a7e01f05a", "1cb0c6573195aeb933e9ff663dad71d8ad1b0e13", "727fa1f55462f732bcc8e3ae41a119d24e38bd69", "d58e04d7288bf30e566fa2f77438ca7c111f1222", "1f400605b04bdefc31a576fb399de88dba2d3ac1", "82fdabfbaee8cb5b47c7a25396ac76b092331922", "1104cfc1f233d9208b1b1177499b56cb8996e982", "07d1db388cd489420d40d0edb13e074d86c77dbd", "10dba1f2002082b5d6706ee73f5b7ece7b05085b", "a67b221e7ddb38cf7ae89515aebf96c23cef4e41", "8669d61a2c9910e033ae71a446e85054acc1ba55", "635e97a39137c2c5d2f08fe55c05c6df6543e757", "4e45928ad19a990237928162a6be275fbca0b38b", "475ff6fc68f5bf47dc1c590cc9c10dc2ba4c2281", "24ae973fa79d44f8ea5a93e0ed6b1ee65294a78a", "a1b28c54803c08cfb982d7c8c57c1180c78d875b", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "2b7adb066d6d6a0a106ff4ad4e3e0a7a84347e76", "1b0644187f24bcffd8aae376cbf188ecdfed172d" ], "paperAbstract": "\u008ce part-whole relationship routinely \u0080nds itself in many disciplines, ranging from collaborative teams, crowdsourcing, autonomous systems to networked systems. From the algorithmic perspective, the existing work has primarily focused on predicting the outcomes of the whole and parts, by either separate models or linear joint models, which assume the outcome of the parts has a linear and independent e\u0082ect on the outcome of the whole. In this paper, we propose a joint predictive method named PAROLE to simultaneously and mutually predict the part and whole outcomes. \u008ce proposed method o\u0082ers two distinct advantages over the existing work. First (Model Generality), we formulate joint part-whole outcome prediction as a generic optimization problem, which is able to encode a variety of complex relationships between the outcome of the whole and parts, beyond the linear independence assumption. Second (Algorithm E\u0081cacy), we propose an e\u0082ective and e\u0081cient block coordinate descent algorithm, which is able to \u0080nd the coordinate-wise optimum with a linear complexity in both time and space. Extensive empirical evaluations on real-world datasets demonstrate that the proposed PAROLE (1) leads to consistent prediction performance improvement by modeling the non-linear part-whole relationship as well as part-part interdependency, and (2) scales linearly in terms of the size of the training dataset.", "pdfUrls": [ "http://home.cse.ust.hk/~ywangct/publication/yong-KDD-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3dd9/2f999bea0d5fa2a679b0f6556e5911fe0a49.pdf", "s2Url": "https://semanticscholar.org/paper/3dd92f999bea0d5fa2a679b0f6556e5911fe0a49", "sources": [], "title": "Is the Whole Greater Than the Sum of Its Parts?", "venue": "", "year": 2017 }, "3de112ed0b197ef2dfb828d1ed0bc91e24b1ab76": { "authors": [ { "ids": [ "2886479" ], "name": "Dongpeng Xu" }, { "ids": [ "34385946" ], "name": "Jiang Ming" }, { "ids": [ "2628673" ], "name": "Dinghao Wu" } ], "doi": "10.1109/SP.2017.56", "doiUrl": "https://doi.org/10.1109/SP.2017.56", "entities": [ "AES instruction set", "Algorithm", "Binary code", "Binary file", "Cryptographic hash function", "Cryptography", "Firewall (computing)", "Hoc (programming language)", "Library (computing)", "MD5", "Malware", "Obfuscation (software)", "Pervasive informatics", "RC4", "Reference implementation", "Symbolic execution", "Synthetic data", "Tea" ], "id": "3de112ed0b197ef2dfb828d1ed0bc91e24b1ab76", "inCitations": [ "12d02f92a0b55fa0445acd496b94f4e6f906bedb", "40dc09f5fbd3776c3f34adedc7a4718307ace0d6", "67677571d4fb9ee4a8800db5ce0355841bc7a9f6", "629191336187398e43f1021bdcc6c293a72d1ca8", "2443ce6ba8fb382fa49b2027b4897e00e163b402", "feccab89928b2a15ad475014cbbfc7980a0323a8" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "921-937", "journalVolume": "", "outCitations": [ "894431d48991b88acde625c0ff866938a0372991", "be6b283871ec6df396ff00bc2d844a9e4c056000", "1f0e9613b1d47bbe8ba5b32a57e89b81ec02aba8", "8711a402d3b4e9133884116e5aaf6931c86ae46b", "25123431afb111964b1ed141e64966e1706f2a80", "8cd8298f1d91e92421c83d666669468fb9679840", "a4ecb785468103dcf0b0706fab230edc9fdecf1c", "66e5f253c0fd73548b4cec637f683b1186740f07", "16d5b70f5f2bbe10fd56ecfea8f14b028976ab84", "15bb9e2d8579a6901bc9ee4d7a57623da4262f97", "c70912d9d905e49ba8394ed2e43cc438d2dcd21f", "d074ce0a24482afc521925c92f4d735860058c92", "974f362d3fcabfba7befbb7cba9d8027d5942f35", "0b5b42425deb371d8dc60ac9b090c7232702370a", "8b0acc7ebcd30607010ea62a1a1f253d3ff47bcc", "05875e17ac67cc16d20e4338043d687c04e757b4", "7fa71e17142563013365daa8526a1323f123961a", "a75e4bede6a459e6358db2a66b4292604722e5b8", "1bf9569aa108b6c19c8cc4fc15470cedddbd7ba9", "9a8a79f0f9a7809bfc831bd1a744748042151a64", "2fd85993e7e5cbf7dcd4985d6088020355c254d8", "53fb53444672bb6a2325efd7a48dd2dc7b6ac374", "585706dc56e146c8fb42228fc5cbe1de0bb0a69d", "583a4aed2057b2b509fbdf9fdee5515886de7e86", "0cffc09e3fd3c8c1569df766c391ba3afb96c208", "fdfa65ffc34365f92128368e6df6870f05acf416", "6125fa88ab17c8e2879482e10f0028ab3f681524", "1f527fed31971e07093695c128c10b4f3c20d109", "6ea63d09993b9a268689790ea8d25bc36345497e", "0653e2ed9f683868cb4539eb8718551242834f6b", "000ead8f90917616c51365f2529bc5bd719479a0", "6686920ea4ccd36f74b8c5c728675fe98a76b0b9", "2ae885cc822b0a61f5a8a22124e61e61789cb5cd", "a21921eb0c5600562c8dad8e2bc40fff1ec8906b", "085e83f8760b81fd448c38ae432c0e99928286e1", "129ed742b496b23efdf745aaf0c48958ef64d2c6", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "b9fab885944f006abb8c21e774dca050b478c1c4", "ae7a207a693a00b7d8d44eaef32c2a34a0978e4a", "40cebb3d22c7400935286dc2c4cf1f5f062a640a", "4f09377086284ffed61435d4c121e5d93ededbef", "cd0105649926af00e1f8fe4d32438ea2141628e8", "b13ea783a9090fba3bc345b0ed595b39c0bf7281", "45d0737424b37a1607f992d5e23bcedc7adbb3b0", "21d1e64bfd87f8d079f7b943c0399668f6971d13", "7a5307ad6c06e5eb06d18276c8e3c586625c4433", "065066a94860279587ecc7c7caaa65303008940f", "1f7e5e582663868ed2f6763f98066ca278177a61", "562cb7849d5640530b5eaa0d2dfcce3d2b2693d5", "5bab5b8e793ab42c1666357ca66fc887d54016f7", "580772507bca7b9a0318d251e13f5cd8fb028d7b", "4815f4122fa6c83b1691fde8b4ce21775d400c59", "3a0ad57ecb97795a8cc91290484ff9e576728c84", "9b022981c2eec830c5fefd8966c2c7822858cac0", "93e390c7dd2f979fbd63e4c46977b791d92c6f41", "82bd162b04cef498dd2f4b6103c6e13107b7b782", "57b7f4979b3d647845099c8b24445d850793f514", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "4cc53528df5dd553931fb72e903e7d7acbfd1e72", "6fd3c5146fc90d4ce14cafc85f3a92be40f22213", "008c2c2cf69fd4936a64e67d265b9b173f0d190f", "1a4c7185626d0f2acebf7f05a29fa2073a2fa841" ], "paperAbstract": "Cryptographic functions have been commonly abused by malware developers to hide malicious behaviors, disguise destructive payloads, and bypass network-based firewalls. Now-infamous crypto-ransomware even encrypts victim's computer documents until a ransom is paid. Therefore, detecting cryptographic functions in binary code is an appealing approach to complement existing malware defense and forensics. However, pervasive control and data obfuscation schemes make cryptographic function identification a challenging work. Existing detection methods are either brittle to work on obfuscated binaries or ad hoc in that they can only identify specific cryptographic functions. In this paper, we propose a novel technique called bit-precise symbolic loop mapping to identify cryptographic functions in obfuscated binary code. Our trace-based approach captures the semantics of possible cryptographic algorithms with bit-precise symbolic execution in a loop. Then we perform guided fuzzing to efficiently match boolean formulas with known reference implementations. We have developed a prototype called CryptoHunt and evaluated it with a set of obfuscated synthetic examples, well-known cryptographic libraries, and malware. Compared with the existing tools, CryptoHunt is a general approach to detecting commonly used cryptographic functions such as TEA, AES, RC4, MD5, and RSA under different control and data obfuscation scheme combinations.", "pdfUrls": [ "https://faculty.ist.psu.edu/wu/papers/CryptoHunt.pdf", "https://doi.org/10.1109/SP.2017.56" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3de112ed0b197ef2dfb828d1ed0bc91e24b1ab76", "sources": [ "DBLP" ], "title": "Cryptographic Function Detection in Obfuscated Binaries via Bit-Precise Symbolic Loop Mapping", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "3de2e76ea71f9c785c25a29578507ee00dcfc174": { "authors": [ { "ids": [ "1875774" ], "name": "Romil Bhardwaj" }, { "ids": [ "1751888" ], "name": "Krishna Chintalapudi" }, { "ids": [ "2794488" ], "name": "Ramachandran Ramjee" } ], "doi": "", "doiUrl": "", "entities": [ "Backward compatibility", "Field-programmable gate array" ], "id": "3de2e76ea71f9c785c25a29578507ee00dcfc174", "inCitations": [], "journalName": "", "journalPages": "227-242", "journalVolume": "", "outCitations": [ "a7e8413633fd975873c12376de85d63b192a49d8", "a20a2c0f9f585d62e7439ee3ede3574ff6cf7283", "96656b7aba10c673624a02008425e87fa4f90485", "1897a4cc61979f316f762ca1a4eb08785a74d8bd", "690aad60d85651f47dbf7a771ed789c61c9ba6f6", "392fd12eab11fbb62e7812574c1d956fffb023e4", "1a8a6790496ab3b0aa0fee7de8f24918e553964c", "3ba5aafbb9cd584cd6d10c24c456a46ac43dfc2a", "1171619efdfc2f0a3ecfa94b079fed9ce8d9fb0c", "16e7cf26e1331e0308cfabb779b6e4402e0ae888", "428c601c818fb069e2a36cd13e18804964a39011", "e04ad88f3b1144f9f0024327063e914eb2c39d59", "28b7be3423437fb21a6c2f68a4bdf115eefd35a0", "ea4adc4a65af056d3fb8c21bad372fe96c68b264", "0698e451ea91ea5df2e9c9c096d16b729d8e1a6c", "9632a7fb9dbca558990febc543da0d16a98ab6a8", "912e1a8ec8ec6c290a6f4e00ecde9a8874fcceb7" ], "paperAbstract": "Carrier sensing is a key mechanism that enables decentralized sharing of unlicensed spectrum. However, carrier sensing in its current form is fundamentally unsuitable when devices transmit at different power levels, a scenario increasingly common given the diversity of Wi-Fi APs in the market and the need for WiFi\u2019s co-existence with new upcoming standards such as LAA/LWA. The primary contribution of this paper is a novel carrier sensing mechanism \u2013 skip correlation \u2013 that extends carrier sensing to accommodate multiple transmit power levels. Through an FPGA based implementation on the WARP platform, we demonstrate the effectiveness of our technique in a variety of scenarios including support for backward compatibility.", "pdfUrls": [ "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_bhardwaj.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_bhardwaj.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/bhardwaj", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-bhardwaj.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-bhardwaj.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ccf0/36633b9fb78e8f01026d43d4917468f31505.pdf", "s2Url": "https://semanticscholar.org/paper/3de2e76ea71f9c785c25a29578507ee00dcfc174", "sources": [ "DBLP" ], "title": "Skip-Correlation for Multi-Power Wireless Carrier Sensing", "venue": "NSDI", "year": 2017 }, "3de30c8dafc720bf066e5e3a005d16212dd31149": { "authors": [ { "ids": [ "9765570" ], "name": "Shiqin Yan" }, { "ids": [ "9751178" ], "name": "Huaicheng Li" }, { "ids": [ "2725752" ], "name": "Mingzhe Hao" }, { "ids": [ "32249376" ], "name": "Michael Hao Tong" }, { "ids": [ "2752943" ], "name": "Swaminathan Sundararaman" }, { "ids": [ "1695232" ], "name": "Andrew A. Chien" }, { "ids": [ "1738725" ], "name": "Haryadi S. Gunawi" } ], "doi": "10.1145/3121133", "doiUrl": "https://doi.org/10.1145/3121133", "entities": [ "Adobe Flash", "Allocate-on-flush", "Expect", "Flash memory", "Garbage collection (computer science)", "Instability", "Long tail", "Random-access memory", "Service control point", "Solid-state drive", "USB flash drive", "Whole Earth 'Lectronic Link" ], "id": "3de30c8dafc720bf066e5e3a005d16212dd31149", "inCitations": [ "3a426d5e6835af117465e2bbef965cc19f7a6e8e", "4d1a62de587f05084e85a4168f960af1e48b9697", "ec3924af8c1cb428b4f1309b9a9ca3c86abd6631", "8bbba8c51e79b4ec86d95141a24b6c9a3c6eac6b", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041", "40f196e21a289394c4354961116587b8accba45e", "1858ed4ca900d9afd06d0b8a8430d0dda8f957bc", "8f849c0051edc612327e1121ccfa70a4ec0bacea", "262c16d1bdd8d0ccef77bd66648144d584a24477", "6450300d1d15ce03ddca2339184fc6b964189498", "347e1352fb903b40dce606a1e581e9d601bc289c" ], "journalName": "", "journalPages": "15-28", "journalVolume": "", "outCitations": [ "09bcd050bb006639ae8bcacb3af149f0b6d964f3", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "13b925352e4ee3066a6d38ef9f16efdfa967cabb", "057d21830cde5b3be2fdb3a74ee69a3c7e9109f8", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "4ba4613eab33cddc53bec9e14e50d03fa66270ca", "7a6987f6b0b47d8c6a39cccebb2d3c9566e45054", "3cf9039fa2fc01f711870e33d868669caf5c4df4", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "a6069e65c318f07d2b35934b0d4109148f190342", "070c3a8c3ce10277424f23c01a54b377478ee59c", "bbb2c69a8018ac50d97a912282b1ec4ff8302ca7", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "424a0f460b4f261b386787bdec37a2b01347a930", "5271d6693ba950c389921ccc21110664f25a83db", "f11d2748e1e26f3b01b54db85ddcc287b678cb04", "1820a34042d6371a9e20484b0c63b698eb522a6c", "1425d2c0d221762fe1f7d9be0c86e5b92adf3b44", "2e46f9074bd81ea4ec29ecec7e0231c16fb2e8db", "19b90ae79266b89ecc44113409b424044ec0300f", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "086820e40dc8046c30a8751394df167bec047fe1", "3f9d4a16ec5d08c0309df743e73745f876b9abfa", "e03a77db7bdbef30192b6846cfa09d57e135e8c7", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "f9fa36f07645df8765faeca8f8a95f1856bb5bb0", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "303f71ad0e145415aba9efe9ba96a1f734c63391", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "1ecbb1f2080029357bba55e3747bfcaac82aee51", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "1f0c405f9fa2cc9de23a45710fa85b9e7330a958" ], "paperAbstract": "Flash storage has become the mainstream destination for storage users. However, SSDs do not always deliver the performance that users expect. The core culprit of flash performance instability is the well-known garbage collection (GC) process, which causes long delays as the SSD cannot serve (blocks) incoming I/Os, which then induces the long tail latency problem. We present ttFlash as a solution to this problem. ttFlash is a “tiny-tail” flash drive (SSD) that eliminates GC-induced tail latencies by circumventing GC-blocked I/Os with four novel strategies: plane-blocking GC, rotating GC, GC-tolerant read, and GC-tolerant flush. These four strategies leverage the timely combination of modern SSD internal technologies such as powerful controllers, parity-based redundancies, and capacitor-backed RAM. Our strategies are dependent on the use of intra-plane copyback operations. Through an extensive evaluation, we show that ttFlash comes significantly close to a “no-GC” scenario. Specifically, between the 99 and 99.99th percentiles, ttFlash is only 1.0 to 2.6× slower than the no-GC case, while a base approach suffers from 5–138× GC-induced slowdowns.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/yan", "http://www.usenix.org./system/files/conference/fast17/fast17-yan.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-yan.pdf", "http://doi.acm.org/10.1145/3121133" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3de30c8dafc720bf066e5e3a005d16212dd31149", "sources": [ "DBLP" ], "title": "Tiny-Tail Flash: Near-Perfect Elimination of Garbage Collection Tail Latencies in NAND SSDs", "venue": "FAST", "year": 2017 }, "3df47bfe4d94e697d21cf2c48d61085c35459a2c": { "authors": [ { "ids": [ "2484938" ], "name": "Jaeha Kung" }, { "ids": [ "3007999" ], "name": "Yun Long" }, { "ids": [ "2306075" ], "name": "Duckhwan Kim" }, { "ids": [ "1741842" ], "name": "Saibal Mukhopadhyay" } ], "doi": "10.1145/3079856.3080252", "doiUrl": "https://doi.org/10.1145/3079856.3080252", "entities": [ "Dataflow", "Dynamical system", "Graphics processing unit", "Hardware acceleration", "Memory hierarchy", "Nonlinear system", "Simulation" ], "id": "3df47bfe4d94e697d21cf2c48d61085c35459a2c", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "403-415", "journalVolume": "", "outCitations": [ "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "a3a8dcfaae7afd971c24bd033fb0f7310e8fc741", "351fcec759315e525a814b60a39d48bd9af4287a", "e31603124f10d2ec6ec44ecaade35e24e1e04cfa", "21d7130230162af2a4cc1b9375bfe9b37dbbd499", "b7cf49e30355633af2db19f35189410c8515e91f", "41c445a27d1d3f28c6f5d019e69908c261c188c7", "9ffdd1894a84613b298c4704442ddcb4ba95429b", "391a92ea03e7e055ef3ded07f8bace0b1c150749", "2ffc74bec88d8762a613256589891ff323123e99", "634fe0238b37e596be8681cefccecac76c512c76", "87ec40b2142484e2823caeb826f8ba6f5bbef9fa", "73ab55ec1f35773bc7d94acad0f23231a4844a1c", "fdf409152397b4914e285f97ca8a262603fdac3b", "357b648c572b4ec9a523fd314f51aae286fcfcbf", "a813e5d4a9db366e7e42e3bdd67292d31cd752bc", "49246cd9e0cf4a71d89d06c403e22115b59d3cc4", "c577dbe113809adfdce7fdc3b57f74956e0cde31", "1599e4784a811f7d98722f86d0061a6e41873752", "6efca659c6b4316a5b301665979ea3298c53b92e", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "0548ab587826c85cd55f1586f8b6fad807bdc24f", "1474853a6ba097f5880501bbdd94fb782ef3e4d3", "235f8d65c1b66df0a0e011d5819a38bc7092618b", "c5b4d4025dca2e1669d911e7406e457999a0db54", "72be3a9006e226b6faf9161e789f4e34a974e80a", "13e03d48091ffcddda07735edb2c7e9dd9bb434c", "ad2db2e0885b57ab22e01fca4b8b2e688399d8f8", "8eb3c621dcbee6fa21a802364de9d7b6d99521c3", "8b04ea524cb6ced72868c120a00c4679d84be006", "ff6e3caa32c90d08f53fcb095dd89339f28767e2", "5a9c64bbda24a624c8badb7afeea82c9011bb571", "437b11128948f92e1139c555cf1326922ee36b39", "b626ab3776e37b95de8f5f2e147fca020e34d233", "21b8f0653e939ff757d197357fdcd6a7fba2a8d9", "55b5e730062d6e1c56b31d89dcf3a0b239a3951e", "ae5bd10e7a5c0853760c717eac6202e6422b9799" ], "paperAbstract": "The fast and energy-efficient simulation of dynamical systems defined by coupled ordinary/partial differential equations has emerged as an important problem. The accelerated simulation of coupled ODE/PDE is critical for analysis of physical systems as well as computing with dynamical systems. This paper presents a fast and programmable accelerator for simulating dynamical systems. The computing model of the proposed platform is based on multilayer cellular nonlinear network (CeNN) augmented with nonlinear function evaluation engines. The platform can be programmed to accelerate wide classes of ODEs/PDEs by modulating the connectivity within the multilayer CeNN engine. An innovative hardware architecture including data reuse, memory hierarchy, and near-memory processing is designed to accelerate the augmented multilayer CeNN. A dataflow model is presented which is supported by optimized memory hierarchy for efficient function evaluation. The proposed solver is designed and synthesized in 15nm technology for the hardware analysis. The performance is evaluated and compared to GPU nodes when solving wide classes of differential equations and the power consumption is analyzed to show orders of magnitude improvement in energy efficiency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080252" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3df47bfe4d94e697d21cf2c48d61085c35459a2c", "sources": [ "DBLP" ], "title": "A programmable hardware accelerator for simulating dynamical systems", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "3e1e61d1128dae2038cd9701ba95f348f6d12db1": { "authors": [ { "ids": [ "2712837" ], "name": "Benjamin Klenk" }, { "ids": [ "1731123" ], "name": "Holger Fr\u00f6ning" }, { "ids": [ "2298713" ], "name": "Hans Eberle" }, { "ids": [ "17930267" ], "name": "Larry Dennison" } ], "doi": "10.1109/IPDPS.2017.94", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.94", "entities": [ "Algorithm", "Autonomous car", "Central processing unit", "Control flow", "Experiment", "Graphics processing unit", "Message Passing Interface", "Message passing", "Network switch", "Peer-to-peer", "Run time (program lifecycle phase)", "Single instruction, multiple threads", "Wildcard character" ], "id": "3e1e61d1128dae2038cd9701ba95f348f6d12db1", "inCitations": [ "63d9562d2e50c57e684faed416801732a37d39fd" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "855-865", "journalVolume": "", "outCitations": [ "1d887b6e2bcdce92f13878e220859e948930734a", "cf60b4d7f37cc74ca7345a579201b89a010a67e8", "07ecda8ad7075a97baa460ae8b03e2f7fb27c2b9", "14d3c4a56abb7680d6523c0bc88d80899b631a09", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "03c316a2177d112efb7c64fae4fc10377419610b", "4849bbb611153b5a7c53894fa1c1314138f5ae89", "43498db7de27abf14e5d2903a8318c62b3c4c0e9", "284c7fde4bbaf19dd345e3b37d98085d7bfb9a4f", "6335be42a352d1d4daa907533854410f57269926", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "6f197e5aa64900079d760a397bb6a062df152ea6", "8ce244596d60478c4c9c4dd5cf43c57e45fccfa2", "401140aefbcefccfcc1dc4e5c5ab913ed9189e6a", "63d9562d2e50c57e684faed416801732a37d39fd", "28552ecf4eaedb3461edca97304b29082b02fbab", "7f2cbf3dd422dec88f5725700913a1d44c6f5beb" ], "paperAbstract": "Accelerators, such as GPUs, have proven to be highly successful in reducing execution time and power consumption of compute-intensive applications. Even though they are already used pervasively, they are typically supervised by general-purpose CPUs, which results in frequent control flow switches and data transfers as CPUs are handling all communication tasks. However, we observe that accelerators are recently being augmented with peer-to-peer communication capabilities that allow for autonomous traffic sourcing and sinking. While appropriate hardware support is becoming available, it seems that the right communication semantics are yet to be identified. Maintaining the semantics of existing communication models, such as the Message Passing Interface (MPI), seems problematic as they have been designed for the CPU’s execution model, which inherently differs from such specialized processors. In this paper, we analyze the compatibility of traditional message passing with massively parallel Single Instruction Multiple Thread (SIMT) architectures, as represented by GPUs, and focus on the message matching problem. We begin with a fully MPI-compliant set of guarantees, including tag and source wildcards and message ordering. Based on an analysis of exascale proxy applications, we start relaxing these guarantees to adapt message passing to the GPU’s execution model. We present suitable algorithms for message matching on GPUs that can yield matching rates of 60M and 500M matches/s, depending on the constraints that are being relaxed. We discuss our experiments and create an understanding of the mismatch of current message passing protocols and the architecture and execution model of SIMT processors.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.94" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3e1e61d1128dae2038cd9701ba95f348f6d12db1", "sources": [ "DBLP" ], "title": "Relaxations for High-Performance Message Passing on Massively Parallel SIMT Processors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3e23a19a94d4cf363e6f3dc278c45f6c53a1daa4": { "authors": [ { "ids": [ "1879231" ], "name": "Vladimir Mironov" }, { "ids": [ "2143727" ], "name": "Yuri Alexeev" }, { "ids": [ "3095566" ], "name": "Kristopher Keipert" }, { "ids": [ "4694106" ], "name": "Michael D'mello" }, { "ids": [ "3246206" ], "name": "Alexander Moskovsky" }, { "ids": [ "35112573" ], "name": "Mark S. Gordon" } ], "doi": "10.1145/3126908.3126956", "doiUrl": "https://doi.org/10.1145/3126908.3126956", "entities": [ "Algorithm", "Data structure", "Fock state", "GAMESS (US)", "Hartree\u2013Fock method", "Legacy code", "Memory footprint", "Message Passing Interface", "Microprocessor", "OpenMP", "Parallel computing", "Supercomputer", "Thread (computing)", "Xeon Phi" ], "id": "3e23a19a94d4cf363e6f3dc278c45f6c53a1daa4", "inCitations": [], "journalName": "", "journalPages": "39:1-39:12", "journalVolume": "", "outCitations": [ "9ad521237d5d6a52c260f9772973bb7c564a2b8b", "003f450dae97526423328285debf7c9a02f24cd0", "b9992dbb2c10bdcb561f072e9fac0fd61f7c9799", "b4e3057bcd459a68d8c63d2e96202c3742e82e10", "62255642e3e314705567a6f45a5d3054227397f9", "2f28a3d34e5d7372dd266f0b26ba31358dd5f225", "06b7bfd1a5af3e2fda259ac658f0629717a4c939", "d399ad91e0cefccffc843e9f6448fa6fc40919b5", "ef06e488a15f0c6d0763e308ae17ac90229724db", "0f3fd3bf563564f18a1470b714c89ba59689c459", "6fbec8a91fb52aff579ca45369a862a445c9e0fb", "a1cfb89c870d587a5ba37e73af7c78221e2ceca7", "3941f89c8367bb070ed6e66a3def190c4ff2cdf4" ], "paperAbstract": "Modern OpenMP threading techniques are used to convert the MPI-only Hartree-Fock code in the GAMESS program to a hybrid MPI/OpenMP algorithm. Two separate implementations that differ by the sharing or replication of key data structures among threads are considered, density and Fock matrices. All implementations are benchmarked on a super-computer of 3,000 Intel® Xeon Phi processors. With 64 cores per processor, scaling numbers are reported on up to 192,000 cores. The hybrid MPI/OpenMP implementation reduces the memory footprint by approximately 200 times compared to the legacy code. The MPI/OpenMP code was shown to run up to six times faster than the original for a range of molecular system sizes.", "pdfUrls": [ "https://arxiv.org/pdf/1708.00033v1.pdf", "http://doi.acm.org/10.1145/3126908.3126956", "http://arxiv.org/abs/1708.00033", "https://arxiv.org/pdf/1708.00033v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3e23a19a94d4cf363e6f3dc278c45f6c53a1daa4", "sources": [ "DBLP" ], "title": "An efficient MPI/openMP parallelization of the Hartree-Fock method for the second generation of Intel\u00ae Xeon Phi\u2122 processor", "venue": "SC", "year": 2017 }, "3e2e76a31195fdbd56f80b941021292572eea6ec": { "authors": [ { "ids": [ "37812412" ], "name": "Ethan Cecchetti" }, { "ids": [ "1726798" ], "name": "Fan Zhang" }, { "ids": [ "1783838" ], "name": "Yan Ji" }, { "ids": [ "2404928" ], "name": "Ahmed E. Kosba" }, { "ids": [ "1687161" ], "name": "Ari Juels" }, { "ids": [ "1726246" ], "name": "Elaine Shi" } ], "doi": "10.1145/3133956.3134010", "doiUrl": "https://doi.org/10.1145/3133956.3134010", "entities": [ "Benchmark (computing)", "Confidentiality", "Correctness (computer science)", "Formal verification", "Oblivious ram", "Oblivious transfer", "Random-access memory", "User (computing)" ], "id": "3e2e76a31195fdbd56f80b941021292572eea6ec", "inCitations": [ "6db9824d4667b22310c51fe638403238f873e9f2" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "317", "journalVolume": "2017", "outCitations": [ "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "e67410d5ef6a064afd20d93650f39129d00f1a32", "c773c64ab52f702ae0aaba8c35b72dde471ea04a", "beeecc5cda7ef949e5bf00a6b4404bc58853c484", "2a531c6f67c2dc3be1fcfe5a536d71de8851c5cc", "46e9d622600e2a116feee553017f74c46763ca7a", "19bab496d5d7f60d3e5b9217739b9cf7fedaf44b", "475b10209d1ed13b079d62aca57ec31da4284bcd", "10d5282a8d25c4490338d5fb8ad2f57b8646ad38", "8c9ce2108cfd83aee973b492cbef052cf75f61c1", "6f640462a43c5feac2849381fb0cdb8421caed91", "e1aed9296c3af9139f48d15e043e2e8beab55409", "07c29e3c3e6e57647b09d0baa26b091003665ce9", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "34c53b97e8befa13b6ef60dd258723e1b63c15ed", "52210124ac84b31b855f481b25c6ac5e80afab97", "155ca30ef360d66af571eee47c7f60f300e154db", "4af63ed343df388b6353b6fc77c7137d27822bf4", "2865de7ce2f1263bfc3ddeb5cef1b983b0f65f09", "0260431abc8910474e728f05556a12f20c44288b", "00ecd7b2e0c364ce4e9f5416ee1dbeaeabe87a62", "011d714a361b8ceb925c18e4a214e22aa5f899d8", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "46e46c77423fcaf6e4a435fecca4430b1e78bd5d", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "20f5f8733134d87041b95b742d613051a1fb3fdb", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "11e118806341f3ed4757e6091d6ef1dc5ccf0907", "15167da8d35184d062b988b5a6807e0fa72cd77f", "8c5e81a2badc7ed7c03914a8c12773084a96155a", "63231a5d55decb623aeb441b707cf2fb943d485b", "dae60c85a305dce2d690cb4a2f6cc777d488b903", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "20b63210954f7c5a70664f301dcd7196856ccfa7", "68433c49fc2273a698dffa5eecf023e8b522b5c8", "452c803f91ab670bf36403ed5412875b13ae9e94", "049e2c54fe8a35cd941937ba592e07bbc2dda591", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "42333e3f231bbfe508f6da6bad2feff9ae223113", "26ab9c27d995dadd553614045361ffb1afba9008" ], "paperAbstract": "Blockchains and more general distributed ledgers are becoming increasingly popular as efficient, reliable, and persistent records of data and transactions. Unfortunately, they ensure reliability and correctness by making all data public, raising confidentiality concerns that eliminate many potential uses.\n In this paper we present Solidus, a protocol for confidential transactions on public blockchains, such as those required for asset transfers with on-chain settlement. Solidus operates in a framework based on real-world financial institutions: a modest number of banks each maintain a large number of user accounts. Within this framework, Solidus hides both transaction values and the transaction graph (i.e., the identities of transacting entities) while maintaining the public verifiability that makes blockchains so appealing. To achieve strong confidentiality of this kind, we introduce the concept of a Publicly-Verifiable Oblivious RAM Machine (PVORM). We present a set of formal security definitions for both PVORM and Solidus and show that our constructions are secure. Finally, we implement Solidus and present a set of benchmarks indicating that the system is efficient in practice.", "pdfUrls": [ "http://eprint.iacr.org/2017/317", "http://doi.acm.org/10.1145/3133956.3134010", "https://obj.umiacs.umd.edu/papers_for_stories/kosba_ACMCCS2017.pdf", "https://eprint.iacr.org/2017/317.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3e2e76a31195fdbd56f80b941021292572eea6ec", "sources": [ "DBLP" ], "title": "Solidus: Confidential Distributed Ledger Transactions via PVORM", "venue": "CCS", "year": 2017 }, "3e5feac4302e36658f4777e58b9c03b315819008": { "authors": [ { "ids": [ "2003815" ], "name": "Xun Gong" }, { "ids": [ "1791987" ], "name": "Rafael Ubal" }, { "ids": [ "1771736" ], "name": "David R. Kaeli" } ], "doi": "10.1109/ISPASS.2017.7975298", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975298", "entities": [ "Benchmark (computing)", "CUDA", "Computer architecture", "Disassembler", "Graphics", "Graphics processing unit", "Kepler (microarchitecture)", "Microarchitecture", "Multiprocessing", "Pipeline (computing)", "Shader", "Simulation" ], "id": "3e5feac4302e36658f4777e58b9c03b315819008", "inCitations": [ "6ca0d660157ba938ef6bb318b2207626bb32a5d6" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "269-278", "journalVolume": "", "outCitations": [ "061394b6f5c6ef2ebdb06eb9787a8f8fdc43bc62", "c49c37470e00b50786cf3772b4ac30dca93e99f3", "43f8e4f54c9b28911164ebe3af8e11362f9a8b04", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "075b8446920448acc25bfa83f55ae983037e3f70", "054e4a6966d54eb9fd207cf0484214201f46424a", "2ad29134da93304e72dd047ca99ec6cfef2b4990", "2d6f002477015469075954c6748a1a85af352c94", "466ff5d1f695c5472db9f6746ac29575f16de753", "23177452df15b652dd54a59324502b92c99687a7", "10a0ab781e94a75fdcbde819f3f4cddcab768bbd" ], "paperAbstract": "Presilicon simulation is one of the key toolsets for computer architects to evaluate and optimize their future designs. As Graphics Processing Units (GPUs) have become the platform of choice in many computing communities due to their impressive processing capabilities, computer architecture researchers need a simulation framework that allows them to quantitatively consider design tradeoffs. In this paper, we present the Multi2Sim Kepler simulator framework, a new detailed GPU microarchitecture performance simulator that supports NVIDIA's Kepler shader assembly (SASS) code execution. The toolset provides a disassembler, a functional simulator and a detailed cycle-based simulator. We provide insight into the architecture of the NVIDIA Kepler GPU, describing the details of the streaming multiprocessor, front-end and instruction pipelines. We compare the performance of this new simulator against an NVIDIA K20X, a high-end Kepler device. We also evaluate the performance of NVIDIA's CUDA benchmark suite on our GPU performance simulator.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975298" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3e5feac4302e36658f4777e58b9c03b315819008", "sources": [ "DBLP" ], "title": "Multi2Sim Kepler: A detailed architectural GPU simulator", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "3e8c858c827b3248ac33ff57ea5b0b07fba83326": { "authors": [ { "ids": [ "2757586" ], "name": "Kartik Joshi" }, { "ids": [ "27023186" ], "name": "Arun Raj" }, { "ids": [ "1941698" ], "name": "D. Janaki Ram" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.68", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.68", "entities": [ "Benchmark (computing)", "Cloud computing", "Hypervisor", "Interference (communication)", "OpenVMS", "Profiling (information science)", "Virtual machine" ], "id": "3e8c858c827b3248ac33ff57ea5b0b07fba83326", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "522-530", "journalVolume": "", "outCitations": [ "3c0bc4e9d30719269b0048d4f36752ab964145dd", "4e7c251022412959a32679353bd0b90963338a05", "1999881614aed9295f4359cf4761926bc23fcd82", "097ec320bb82712203fc18ef60111b03dafa937f", "d9e895e013e001ce9e975213dd843f8db1b5cf32", "01253966029919cdc80d83caa4c4dfd786a9c42f", "0b4fdb6542884d3874a29ce072a38370d0747b47", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "0276adfea086f9f92337669ca23b65a6d5a475b4", "5848da5058fed3b97bfd801ca19e5265f489abfe", "1ecd36058e48734213c81728f42ff798a2c52833", "1b938edfde3b3b04c13599c2db87c72b7962f383", "16514cb7cf7edff6e7806560487e66f57b42efb8", "067c7857753e21e7317b556c86e30be60aa7cac0", "490d862480cf30949dce90e832aa292c498ac768", "117c8dca0918376176e7bc8c0432103ed8e9c34f", "f24b702c16849ba88da1df30cc3f1e126a487ede", "0b75a3d88e0b5ffc4ddcf6fe0a33f76f3d5ebb34", "277f20ddc0e9fa593753ef2778110508372c597f", "3574657705475722b6c398c266805f758268778b", "4514e8d10bddd734ccd88335c9d911e345f27972", "1ff2a26cf246fc7c390e907426fb2bce8026bb38", "1eb845e672abc3e172725639eece560c3cd5ec2a" ], "paperAbstract": "Cloud instances are usually virtual machines hosted on shared hardware. Containers are often used to deploy services in cloud instances. However, excessive consumption of shared hardware resources by some VMs may lead to unpredictability in the performance of containers running in co-located VMs. Existing techniques to detect performance interference in applications are either too expensive in terms of profiling or applicable only from the perspective of the infrastructure owner. In this paper, we propose Sherlock, a lightweight subscriber-centric mechanism to detect performance interference and estimate its impact on cloud services. Sherlock does not require access to hardware counters and can work on unmodified clouds without any support from the cloud provider or changes to the hypervisor. Sherlock uses a simple profiling technique which is performed only for a short duration before deployment. When interference from co-resident VMs is observed, Sherlock notifies the cloud subscriber, so that any remedial actions can be taken. We also define a metric IScore, which is an estimate of the impact of interference on a service. Experiments on the real-world web benchmark CloudSuite show that our approach is able to detect interference with accuracy ranging from 89% to 98.4%, and with very less false positives ( 8%).", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.68" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3e8c858c827b3248ac33ff57ea5b0b07fba83326", "sources": [ "DBLP" ], "title": "Sherlock: Lightweight Detection of Performance Interference in Containerized Cloud Services", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "3eb0660f970a76a078d2db96c4e6714d5f0c1484": { "authors": [ { "ids": [ "3094459" ], "name": "Mayuresh Kunjir" }, { "ids": [ "2605019" ], "name": "Brandon Fain" }, { "ids": [ "1679666" ], "name": "Kamesh Munagala" }, { "ids": [ "1778813" ], "name": "Shivnath Babu" } ], "doi": "10.1145/3035918.3064018", "doiUrl": "https://doi.org/10.1145/3035918.3064018", "entities": [ "Algorithm", "Apache Hadoop", "Approximation algorithm", "Arbitrary-precision arithmetic", "Big data", "Cache (computing)", "Central processing unit", "Data item", "Data parallelism", "Database", "Fairness measure", "Game theory", "Multitenancy", "Parallel database", "Pareto efficiency", "Polynomial", "Proportionally fair", "SPARK", "Time complexity" ], "id": "3eb0660f970a76a078d2db96c4e6714d5f0c1484", "inCitations": [ "bf22e7a56929fa0bee305022d7d229449fe26a81" ], "journalName": "", "journalPages": "219-234", "journalVolume": "", "outCitations": [ "3c6be4c9ea5c56d4ab97aabb4e7c9d5ced57bea8", "18ea18ad0e7adb2fc1e123c0c13d95a99ebeb312", "87d47502bf40a4bfa7a0ded26c3efb2426250808", "2077c3787e5a1545df312d51f9a7b8cd05e2c7f0", "30fffc671f9d6911da230b17d68ec4c2984a0890", "009523862551ecec0da53dfd0365892cb9cb430b", "5c6ab0726c1b2e680b7d8147d9d3a45d11fe289a", "0d62a556f60fcf144a171c3b522b253f8bd443b9", "1be315723e77916d1ce398dcc4157cae66e68b41", "578979a22a6bfb915c85acb42a6297e996f9c45e", "298239c297d781496f2988c804339bd4f1e17308", "a7a7110ca7fe9eec39f4c709920f9cad45dafb19", "213c5d93b11c6f7620e91369290b0711876e0751", "624073f9a3053217afb1dbd8adfc44d1052ba282", "23a4971ea1a47a827df398cbabc26e0e05132203", "8d1aa2db67f214bf4b396d26240330a2ea00bd9a", "291470e5e557ac526f79a59c83e98fbf53406401", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "30a13879770a420ab1499de4ee06aef54d7ee256", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "0ca45c8a2d16a949586c8b86a73047a9f5cfa71f", "363116c764453d9b740c46d23b1f5a3c5801d76e", "277fdd6dbd792fd41e401b13e0fd897bfd911378", "2b781818dedf1f31ed4cfa0c6150e020d89d0d05", "2988e34168fa91398fa397baf823af2063893e9c", "1df0f37e87b542d62a7a30607aea96693d84fdcc", "1e954c5cf302d76483ec0cc0049b4b1220077750", "08c30bbfb9ff90884f9d1f873a1eeb6bb616e761", "c3c262b8e56536d14826926b69af59eaefc29bc2", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "72f91b486b8b867e5825d82db1cca5a5172f376b", "04085d8281bf3e9b0493c7e4441671cbd23d9ac2", "980773ca869fc17562e4fbcf4202a8f21893b114", "07add9c98a979e732cfa215c901adb1975f3f43a", "50e1460abd160b92b38f206553f7917cf6470324", "5c0985b9b699d94c697a9a835e76d52ba916ca0b", "f465e873cb9d9e5cd74cc759c2b015da06385a86", "e6400e666809149ce3ec9bca67f7a6d1074728a0", "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "4a81486f7f1d03d547e78046fc154a6890d8eabd", "2f52cbef51a6a8a2a74119ad821526f9e0b57b39", "12ab5f0813dd557057c68359ae12cd18a4213b9e", "9ad46d3a4d32b11e3654c5df490db520dde9e110", "cd8231a86589d205c64167ad462671c357f56d8d", "947955f1d97505638fc01d24642904df7071588d", "d2ceab98e96695ea58f919e1141e7aff5d6088ab", "eaf2bccd82bf4cffcf1ef85487d6722f6a04716c", "5dd36ed50668b5cc1c95ce6cf83b1b9b21a5f560", "2a72118cc2581f0a4e7fe323e67305f2742e0563", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "82ce0158c14708b01153ac0fe7d6dc9688dfbb18", "049a69b03ff4af9999e1ace889d5f6864f481ccc", "0648bd696a4b80f251dc6f4da210bdf94f208a93" ], "paperAbstract": "Systems for processing big data---e.g., Hadoop, Spark, and massively parallel databases---need to run workloads on behalf of multiple tenants simultaneously. The abundant disk-based storage in these systems is usually complemented by a smaller, but much faster, cache. Cache is a precious resource: Tenants who get to use the cache can see two orders of magnitude performance improvement. Cache is also a limited and hence shared resource: Unlike a resource like a CPU core which can be used by only one tenant at a time, a cached data item can be accessed by multiple tenants at the same time. Cache, therefore, has to be shared by a multi-tenancy-aware policy across tenants, each having a unique set of priorities and workload characteristics.\n In this paper, we develop cache allocation strategies that speed up the overall workload while being fair to each tenant. We build a novel fairness model targeted at the shared resource setting that incorporates not only the more standard concepts of Pareto-efficiency and sharing incentive, but we also define envy freeness via the notion of core from cooperative game theory. Our cache management platform, ROBUS, uses randomization over small time batches, and we develop a proportionally fair allocation mechanism that satisfies the core property in expectation. We show that this algorithm and related fair algorithms can be approximated to arbitrary precision in polynomial time. We evaluate these algorithms on a ROBUS prototype implemented on Spark with RDD store used as cache. Our evaluation on an industry-standard workload shows that our algorithms score high on both performance and fairness metrics across a wide variety of practical multi-tenant setups.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064018" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3eb0660f970a76a078d2db96c4e6714d5f0c1484", "sources": [ "DBLP" ], "title": "ROBUS: Fair Cache Allocation for Data-parallel Workloads", "venue": "SIGMOD Conference", "year": 2017 }, "3eb572b15e54fba028000dd305448a6e8cae735e": { "authors": [ { "ids": [ "34687701" ], "name": "Orcun Yildiz" }, { "ids": [ "2494447" ], "name": "Amelie Chi Zhou" }, { "ids": [ "2629067" ], "name": "Shadi Ibrahim" } ], "doi": "10.1109/CLUSTER.2017.73", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.73", "entities": [ "Big data", "CPU cache", "Interference (communication)", "Run time (program lifecycle phase)" ], "id": "3eb572b15e54fba028000dd305448a6e8cae735e", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "87-91", "journalVolume": "", "outCitations": [ "47f5bba54710b0e1663e9336790cb4609d16077d", "9ac43af1dac92cc2c03e3a3c46fc5892fca6e988", "23de558a10458c1be3062412f134500605eada98", "220d17b570a2355454730fb561cb602f72301a5a", "0541d5338adc48276b3b8cd3a141d799e2d40150", "ae24289a0ed3152de528f863c96279382b14ae61", "d5e9b36ec7e7d5f71aa406d2068f72b984342b0c", "8dd97ace0d9bddaaa7004c7325f30c2145fbe41f", "589e89d77f689ebfc3f36bc1f76fd518ae4a237c", "abc9d1c519c350845506d58bdd109c9f4d5c2492", "73199e434ef9adca471e686b34f7a6a41c7e959e", "3aa1bc5f67254b4e2d86170b70adfacf937008f6", "61ad98f7f693221bf2149897955aa93eac8950ba", "436373807a0a9dc8660e7739e018d18cc18dacd7", "c16589dc1bbea3cc4dc8c151c115b284bf5643d2" ], "paperAbstract": "Burst Buffer is an effective solution for reducing the data transfer time and the I/O interference in HPC systems. Extending Burst Buffers (BBs) to handle Big Data applications is challenging because BBs must account for the large data inputs of Big Data applications and the performance guarantees of HPC applications - which are considered as first-class citizens in HPC systems. Existing BBs focus on only intermediate data of Big Data applications and incur a high performance degradation of both Big Data and HPC applications. We present Eley, a burst buffer solution that helps to accelerate the performance of Big Data applications while guaranteeing the performance of HPC applications. In order to improve the performance of Big Data applications, Eley employs a prefetching technique that fetches the input data of these applications to be stored close to computing nodes thus reducing the latency of reading data inputs. Moreover, Eley is equipped with a full delay operator to guarantee the performance of HPC applications - as they are running independently on a HPC system. The experimental results show the effectiveness of Eley in obtaining shorter execution time of Big Data applications (shorter map phase) while guaranteeing the performance of HPC applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.73" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3eb572b15e54fba028000dd305448a6e8cae735e", "sources": [ "DBLP" ], "title": "Eley: On the Effectiveness of Burst Buffers for Big Data Processing in HPC Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "3ebb5abf41521032df5ace422a3fe696ea5f87ef": { "authors": [ { "ids": [ "1922658" ], "name": "Zixia Liu" }, { "ids": [ "1734058" ], "name": "Hong Zhang" }, { "ids": [ "9467302" ], "name": "Liqiang Wang" } ], "doi": "10.1109/CLOUD.2017.20", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.20", "entities": [ "Apache Spark", "Big data", "Cloud computing", "Computer cluster", "Data security", "Distributed computing", "Grand Challenges" ], "id": "3ebb5abf41521032df5ace422a3fe696ea5f87ef", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "90-97", "journalVolume": "", "outCitations": [ "73bcdb7e7c042ae80d15be3fe88702bb36fc05cf", "582bde977340390b95b1b97427f3e0d2bc4c02d7", "c5f9c75ea9a4906345ac0fe3a400354e9b321b5f", "3fe81cc61506493549a13747e68bdb5c1d965592", "d4745fea917db12bc25e505eb7d65895ae786554", "11c57b6e63184c35fd1999a19c7969170f038973", "121df43546bdfa846751c75ca667013f4904e7a1", "706cd2c450fb7054c92916b300513a266a207652", "089c89f54c5dd0d2a873fbfc19183667d3be5b66", "0fa5455a3241fca461be6c14d0f296c394cadd85", "131dea19f27555ccd634ba62f35306b9a796693f", "797d93472c6aed26056de317c4a4cae0fd6e65aa", "a09679761c16759f3625b7f2210cb39699b0693d", "b044e16a6e375ca13fb4667f90040d985b09a5e3", "998df69751b93679888449d3327c2e47acbaaaa3", "2583c913ad2be00feb063a23f3735232e0ac0ece", "8015f2aa20a9f75802ec13cd8f24881a929d20e2" ], "paperAbstract": "Nowadays, with the increasing burst of newly generated data everyday, as well as the vast expanding needs for corresponding data analyses, grand challenges have been brought to big data computing platforms. Computing resources in a single cluster are often not able to fulfill the computing capability needs. The requests of distributed computing resources are dramatically arising. In addition, with increasing popularity of cloud computing platforms, many organizations with data security concerns are more favor to hybrid cloud, a multi-cluster environment composed by both public cloud and private cloud in purpose of keeping sensitive data local. All these scenarios show great necessity of migrating big data computing to multi-cluster environment. In this paper, we present a hierarchical multi-cluster big data computing framework built upon Apache Spark. Our framework supports combination of heterogeneous Spark computing clusters. With an integrated controller within the framework, it also facilitates ability for submitting, monitoring, executing of Spark workflow. Our experimental results show that the proposed framework not only enables possibility of distributing Spark workflow throughout multiple clusters, but also provides significant performance improvement compared to single cluster environment by optimizing utilization of multi-cluster computing resources.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.20", "http://www.cs.ucf.edu/~lwang/papers/Cloud2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ebb5abf41521032df5ace422a3fe696ea5f87ef", "sources": [ "DBLP" ], "title": "Hierarchical Spark: A Multi-Cluster Big Data Computing Framework", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "3ec7b32baf5d3d683793804e7f11789eb66e10ea": { "authors": [ { "ids": [ "10404450" ], "name": "Wenrui Yan" }, { "ids": [ "1748735" ], "name": "Jie Yao" }, { "ids": [ "39167624" ], "name": "Qiang Cao" }, { "ids": [ "2072948" ], "name": "Changsheng Xie" }, { "ids": [ "9280383" ], "name": "Hong Jiang" } ], "doi": "10.1145/3064176.3064207", "doiUrl": "https://doi.org/10.1145/3064176.3064207", "entities": [ "19-inch rack", "Accessibility", "Computer data storage", "DR-DOS", "Digital data", "Disk storage", "Hard disk drive", "Hierarchical storage management", "Imperative programming", "Library (computing)", "Memory hierarchy", "Optical storage", "POSIX", "Robot", "Robot Operating System", "Robotic arm", "Samba TNG", "Solid-state drive", "Tape library", "Throughput", "User interface", "Virtualize" ], "id": "3ec7b32baf5d3d683793804e7f11789eb66e10ea", "inCitations": [], "journalName": "", "journalPages": "161-174", "journalVolume": "", "outCitations": [ "066ebaf1c399d5f2bf54b72b5189ec0577423d55", "36b89d663279e0c4aee8e1cf575bc42ecd11708c", "b5b2a56539b2da468790b5d690efb7d345344e65", "b1d9b56ad9eed7130aebca227ad8a5b4e7699839", "1f990609719241252d85970761866ad98e88cf3d", "3ce8bc5520c099837b8c0c59a4808ce988a50cd9", "7c5dc4de32a0d833eb87ae56fb24f9cb35f68fa9", "740b2ff66ea305ffc5369ecee4498941c39efaab", "0c3b70bad0f626ffeea1939218cf884b33dbc1cb", "94d44939f7294998fc10491de4cd0ea7c651d042", "5d249d91d68d8cd34431e840fcfa67d57905f94d", "55becb668bc6cbf0c13b09caa92b849246c36882", "6337c3bb0f75984ee6f3b76582d140f7d7700982", "3bcd7aa0cb64d8f21e9a36f75ba04c19f51560ae", "3e820b6248a518fdb1c62bfc2dc65f763e1c4fe3", "30528c9782c37a30e6652ea626f353e74eaaca1f" ], "paperAbstract": "The combination of the explosive growth in digital data and the need to preserve much of this data in the long term has made it an imperative to find a more cost-effective way than HDD arrays and more easily accessible way than tape libraries to store massive amounts of data. While modern optical discs are capable of guaranteeing more than 50-year data preservation without migration, individual optical disks' lack of the performance and capacity relative to HDDs or tapes has significantly limited their use in datacenters. This paper presents a Rack-scale Optical disc library System, or ROS in short, that provides a PB-level total capacity and inline accessibility on thousands of optical discs built within a 42U Rack. A rotatable roller and robotic arm separating and fetching the discs are designed to improve disc placement density and simplify the mechanical structure. A hierarchical storage system based on SSD, hard disks and optical discs are presented to hide the delay of mechanical operation. On the other hand, an optical library file system is proposed to schedule mechanical operation and organize data on the tiered storage with a POSIX user interface to provide an illusion of inline data accessibility. We evaluate ROS on a few key performance metrics including operation delays of the mechanical structure and software overhead in a prototype PB-level ROS system. The results show that ROS stacked on Samba and FUSE can provide almost 323MB/s read and 236MB/s write throughput, about 53ms file write and 15ms read latency via 10GbE network for external users, exhibiting its inline accessibility. Besides, ROS is able to effectively hide and virtualize internal complex operational behaviors and be easily deployable in datacenters.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064207" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ec7b32baf5d3d683793804e7f11789eb66e10ea", "sources": [ "DBLP" ], "title": "ROS: A Rack-based Optical Storage System with Inline Accessibility for Long-Term Data Preservation", "venue": "EuroSys", "year": 2017 }, "3ee90eaa6a389b046157889206ce155ac76c178f": { "authors": [ { "ids": [ "2196277" ], "name": "Zhaoxia Deng" }, { "ids": [ "37223260" ], "name": "Ariel Feldman" }, { "ids": [ "2578247" ], "name": "Stuart A. Kurtz" }, { "ids": [ "1691956" ], "name": "Frederic T. Chong" } ], "doi": "10.1145/3079856.3080226", "doiUrl": "https://doi.org/10.1145/3079856.3080226", "entities": [ "Adversary (cryptography)", "Brute-force attack", "Decision tree", "Design space exploration", "Encryption", "Login", "One-time pad", "Password", "Password cracking", "Reed\u2013Solomon error correction", "Secret sharing", "Smartphone", "TRAVERSE", "Time complexity" ], "id": "3ee90eaa6a389b046157889206ce155ac76c178f", "inCitations": [ "4e544e6db8a17252e0cd6da00401bba734ad64b8", "22f0a538f061db451fcb76be205a45c876064310" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "361-374", "journalVolume": "", "outCitations": [ "22079975aa06e628f0e41c064baafd8cbdeadc02", "5c1eff420eae32dc9f572b7be36827ea31206e5b", "9b4d828d5ddcea2b9b1198f1a8fc770db093333b", "b76c6baeb385498ea3e9e1a66291bb8b23c73554", "cd6feaa03cf0e3a61a77c32902ea2b496bc188ee", "b4a77957fd2c8e0f9ffbf20c7d999ab2da599bfb", "e1c85ada89a41960fe1fd3acfb188c60bd9d0f59", "0d6ccf5e45334fb43acd5ef7834be962a1b3d988", "855f376284ff30b56d2f25a90ef33b91d7641337", "05dd6cb44124b8a210ac391f15ec25e68918ef22", "0143553ccf83694f0b89860f4765ebbba59ce797", "59c1ebc6cede6f8921c077f5f3d53108371ed963", "1895d42788fe064d4c087326789bea946c038eb5", "7deb0d80a43388948b4bf972ea0a2979245bbd9d", "1287e293b2468bb6767a845b11e1478e2362475c", "114f7ca5533ef14db97f318dfe9abd6045b8535d", "d874593ca58ae436ab05628167728f50d52719eb", "ddf58d2884ab4a0aaba98cb6f3ec04221d2fb5d3", "b852bc961328ce74f7231a4b569eec1ab6c3cf50", "97e960c22d45cf381f3df3eeabdee53a7c8837e1", "b30e537bc1fdf576508870722cac2f492855f28f", "7264bde9e774c7cc9257b4ddef0a5e27270fa263", "bfadfaea896a2e5c06d47ddd12082698a83d4511", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "7149f5be82fee9985ddda42075b513e53567a2c6", "a259d3205fcef2a70d5ceb87a39e2150c058fb81", "4bb5ec22edecbc5018af225b65348fd567296165", "295d26753f22a57303526ea6a74e9a0e433b4432", "1b736ca05fec4da9558969f1d57065a5bab2bc6e", "94fc35243e7400ef16d1f978772a6c26446edf35", "86d9ed1fd793a89f364b75bb986d1d7d4d143489", "6665af0bf0a0dc328a66e6a8da3048cbcc2990ae", "bfa4d140c5c3b1bd96b5cedb6d3885b7d48129ee", "0129bc149db1bcb9b6cdcfe0b08fb9acef9fa203", "7eb4a96898726118d8b56d72be05b3c719927788", "27c686aa8dd3bbdba240df218fc018907169375f", "db6e414480be82e495a6c5e684bfc2d3dcbcf115", "5d33fc756f3e283bcc166f63767fd3d633c66a73", "b3ee81b5d8ab6de93a891652d57b11306a386a97", "30532fe0df76e4f3169db85dc2a4d61af415dbd7", "7bf65d3eb75190e691eeb332a7909bc03ba88ead", "48f1145733cbcb1e438271572267ddde5b463702", "45124ebcde598bbdbb3abe4db93005eae40c7269", "7f7ec8a01312333a2005384d098541aba3b113fc", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "44b6b60a593c4136aefa2087baaec2adf4e97aee", "f7484996fa52e9ca6415ecde662c5c1a32d62226", "1328f5245cf6ccbb7cc189a8608f6ed59adfee85" ], "paperAbstract": "Most architectures are designed to mitigate the usually undesirable phenomenon of device wearout. We take a contrarian view and harness this phenomenon to create hardware security mechanisms that resist attacks by statistically enforcing an upper bound on hardware uses, and consequently attacks. For example, let us assume that a user may log into a smartphone a maximum of 50 times a day for 5 years, resulting in approximately 91,250 legitimate uses. If we assume at least 8-character passwords and we require login (and retrieval of the storage decryption key) to traverse hardware that wears out in 91,250 uses, then an adversary has a negligible chance of successful brute-force attack before the hardware wears out, even assuming real-world password cracking by professionals. M-way replication of our hardware and periodic re-encryption of storage can increase the daily usage bound by a factor of M.\n The key challenge is to achieve practical statistical bounds on both minimum and maximum uses for an architecture, given that individual devices can vary widely in wearout characteristics. We introduce techniques for architecturally controlling these bounds and perform a design space exploration for three use cases: a limited-use connection, a limited-use targeting system and one-time pads. These techniques include decision trees, parallel structures, Shamir's secret-sharing mechanism, Reed-Solomon codes, and module replication. We explore the cost in area, energy and latency of using these techniques to achieve system-level usage targets given device-level wearout distributions. With redundant encoding, for example, we can improve exponential sensitivity to device lifetime variation to linear sensitivity, reducing the total number of NEMS devices by 4 orders of magnitude to about 0.8 million for limited-use connections (compared with 4 billion if without redundant encoding).", "pdfUrls": [ "http://people.cs.uchicago.edu/~ftchong/papers/isca17-lemons.pdf", "http://doi.acm.org/10.1145/3079856.3080226" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3ee90eaa6a389b046157889206ce155ac76c178f", "sources": [ "DBLP" ], "title": "Lemonade from lemons: Harnessing device wearout to create limited-use security architectures", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "3efa068494a91a825b9744c1ee4b83663f363533": { "authors": [ { "ids": [ "10041544" ], "name": "Jagadish B. Kotra" }, { "ids": [ "34933560" ], "name": "Diana Guttman" }, { "ids": [ "2456813" ], "name": "Nachiappan Chidambaram Nachiappan" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "8948708" ], "name": "Chita R. Das" } ], "doi": "10.1109/MASCOTS.2017.26", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.26", "entities": [ "Approximation algorithm", "Computation", "Manycore processor", "Multi-core processor", "Network on a chip", "Program optimization", "Programming paradigm", "Time complexity" ], "id": "3efa068494a91a825b9744c1ee4b83663f363533", "inCitations": [ "ae39cff83d4850476855c06d02a8dc80ae55ad42", "ce14ff3b9a139629e699882ca26434a29b5c07b3" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "198-209", "journalVolume": "", "outCitations": [ "f69cf820714d69406bba646ca3e9ace7c444da0e", "4cb9325b008e1551404c526d5ba0b7b3b559f4ab", "6b4d7472ab61c70b2b1a0616410052f0ce301e6e", "367d34d830482b349c73f373717a079d335c03e5", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "8fb808a890a099896e34851179daba15659df11a", "938574649516c7690ce05891ef499760b9a0553b", "ef22b2c93c5c720a2b010f1280db8f8c7114c287", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "179f80848143cf109fa6aebae6c3844da03b062c", "26a7c5cd92c018f8348c1424e10042811ec15148", "6b37f1d8c7afc971c96c78afa99ec9daef357e85", "a0280c69589951383ea0dbcd06f11bc4c595eff1", "2e3c9d69f75900e0614a77bf21665d1fde02a85c", "274d7d0415ad8fc787f15b244339f8d0b37e6956", "53e11fc15261cc5e3a47bfda9eeb4c3355053b6d", "1d39c58f16e7b9b7eb382fdb342db85a8b957b4d", "009342aa77a56c46a475fa85e66506219f271526", "4678cdcf7e57c1563379ac7cc344254f01ace572", "29f766723ca752138855500084ced04503bfc9c8", "549cca620961e5093e315a4b0f9e670da3ff258f", "cf0591e00458d5d7ca20fbc82da70f783f57bfd6", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "2c2e32267c43161f80241a2e1ba21d1f0f871dd4", "64d4f6759b32697e6cbebf901624c93c0a0c1744", "40138cbd57a4632d6267cff4c91b55e7376a6693", "4ebbbeab6e0f4ba9815889854441548fa414e16b", "54f3331b575b2d451c2d716f86496cada23d596d", "85398d5f19157c91bf00da3d36210e72d57887e4", "4e8505919eb22265f107ebbeeee3fa78bf6d893a" ], "paperAbstract": "Increasing data set sizes motivate for a shift of focus from computation-centric systems to data-centric systems, where data movement is treated as a first-class optimization metric. An example of this emerging paradigm is in-situ computing in largescale computing systems. Observing that data movement costs are increasing at an exponential rate even at a node level (as a node itself is fast-becoming a large manycore system), this paper provides a limit study of near-data computing within a manycore chip. Specifically, it makes the following two contributions. First, it quantifies the potential performance benefits of three incarnations of the near-data computing paradigm under the assumption of zero on-chip network latency and an infinite number of extra cores for offloading computations close to data they require. Our detailed experimental evaluation indicates that the most successful of these incarnations can boost the performance of the original execution by as much as 75%. The second contribution of this paper is an investigation of more realistic schemes that can approximate the potential savings achieved by perfect near-data computing. Our results demonstrate performance improvements ranging between 44% and 52%, over the original execution. We also discuss the pros and cons of each of these realistic schemes, and point to further research directions.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3efa068494a91a825b9744c1ee4b83663f363533", "sources": [ "DBLP" ], "title": "Quantifying the Potential Benefits of On-chip Near-Data Computing in Manycore Processors", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "3f0bae9a6525e9832c29b6523bc3965b7ebd9c59": { "authors": [ { "ids": [ "2833699" ], "name": "Binhang Yuan" }, { "ids": [ "32269195" ], "name": "Vijayaraghavan Murali" }, { "ids": [ "1741680" ], "name": "Chris Jermaine" } ], "doi": "10.1145/3133882", "doiUrl": "https://doi.org/10.1145/3133882", "entities": [ "Algorithm", "Control flow", "Data dependency", "Usability testing" ], "id": "3f0bae9a6525e9832c29b6523bc3965b7ebd9c59", "inCitations": [], "journalName": "PACMPL", "journalPages": "58:1-58:26", "journalVolume": "1", "outCitations": [ "2395745569d01506d9697047700b64553b4ccfee", "6081ceb60d07fa0a2f0037ece6e540228e4edf73", "68f95b014349a491400a3d11ae201f9380f4c979", "80676b9c23d161fd1d6da43936d8393502eca6af", "2a9e412ef30a21cf64a2fd113843dfd1263338c8", "05078dca08161b714d3dda3499727ba2b53330eb", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "0560fc4924bbbe7e920122dc25c1ecfc3e59e374", "083e9b12c5566d953efdf6d90e77638a6b0c3693", "4a92c5bfd0d393926eea5dd34658543a41b13c7a", "d0c92b106f758b045643a1ea8a82864bece2bfb6", "47a53bd6a9c8414268f6cfc174ee1b89d618c27e", "8f34e35cd94fdabe3b7de431c27ae4fded51014e", "802b58b789338b2325d4d7361044809f3e2ca949", "a416b66ca5da011b3a1def3f65657910321ffeb8", "dfaccc9fd710444da02f626ba3459017fd082185", "b1646aa181fedc6f150e79b969d45fa2e9602dac", "19ce1d152371f74eb8f5a61fbddbf251690e2c8b", "139eb52cb427d91f6d8728ea82220e85fa11f5cd", "47bdc0c4eedcb226c0c5f88471b69c5a1b67dfba", "b2a1a100367a802fba0793196516a5047871f622", "3db4291a1a629876516bb06ae798a98475fb0148", "13c5a74de5cfc06ced006db3aa82fabe0b122491", "65bf11db83a2befc70cbf23c18d9099e01a9893c", "108652344f125ca0f1260a9c17713f9120fa21d9", "092b276f3411c9f8070411492129750231845682", "56c0b6c9397d7b986d71d95d442458afbfdbb35e", "12e60b409c097ac641eb28e0d3013d94aab9362e", "01f3290d6f3dee5978a53d9d2362f44daebc4008", "bc703153632cf01a5102f63fd1012192f2db557e", "78d74e0ea432aa9e27845a4116fbceadfd911fdc", "d8e338c18e425cba83644ba03a516079c3630e5e" ], "paperAbstract": "In this paper, we consider the problem of source code abridgment, where the goal is to remove statements from a source code in order to display the source code in a small space, while at the same time leaving the ``important'' parts of the source code intact, so that an engineer can read the code and quickly understand purpose of the code. To this end, we develop an algorithm that looks at a number of examples, human-created source code abridgments, and learns how to remove lines from the code in order to mimic the human abridger. The learning algorithm takes into account syntactic features of the code, as well as semantic features such as control flow and data dependencies. Through a comprehensive user study, we show that the abridgments that our system produces can decrease the time that a user must look at code in order to understand its functionality, as well as increase the accuracy of the assessment, while displaying the code in a greatly reduced area.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133882" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f0bae9a6525e9832c29b6523bc3965b7ebd9c59", "sources": [ "DBLP" ], "title": "Abridging source code", "venue": "PACMPL", "year": 2017 }, "3f494abb49c43a6a392e72ccbed2ac6ac216ba42": { "authors": [ { "ids": [ "29882501" ], "name": "Biplab Kumar Saha" }, { "ids": [ "20977255" ], "name": "Tiffany A. Connors" }, { "ids": [ "40288152" ], "name": "Saami Rahman" }, { "ids": [ "1721526" ], "name": "Apan Qasem" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.3", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.3", "entities": [ "Compiler", "Feature extraction", "Feature selection", "Graphics processing unit", "Heuristic", "Machine learning", "Optimizing compiler", "Performance prediction", "Program optimization", "Register allocation", "Toolchain" ], "id": "3f494abb49c43a6a392e72ccbed2ac6ac216ba42", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "18-25", "journalVolume": "", "outCitations": [ "5dc3c465ef293f0ddc355196be1b7eead3dd588f", "bc4638f55f6ec57e37ac201ec3a61fdf58540aca", "326d1495d5288ce7fbe548809df56a8ac11da544", "1108af609469e420aeae551ba8a893c3200e07fa", "a47b408349a8146f71cb54c38226d2f7d92700fe", "064f0793b2b7af8e8fccbf62bf39976dc4ff5b7a", "85e94f77a60285a6891bb364be6ef9b0a6a4b19a", "1a7d301df4e18b41df59d34ee0981104daa282bc", "0c76a904b28c775eb5f33cd982f0bfeddab353e3", "44efef85d56e61fb304f27010cc0d1bd80283a69", "10d3e0f0648d0a5cfaebb3044ea7b14a52e54466", "94fc1aa5d1ad0be589e74fed4357d757c3cfeaed", "24fb4e260cbb59ca371aea6ef9aac97b4f32f8c2", "4bceae20f3d438c2c99fd2c0b58cb941b0f8866a", "1e375b7bd9b02336371dbbb06bee4a94b2a93fc8", "0a361ac9c017eaa73d39af7bb8f11a9fb8a5fc14", "3acb06ddf2fa04037312fe984b11b4240a3c29cb", "7ffab7a9898c3cd9425eb94accedd56bd9e06e60", "59b0afb917493c4070f335bd87f55a172429195d" ], "paperAbstract": "Recent interest in machine-learning based methods has produced many sophisticated models for performance modeling and optimization. These models tend to be sensitive to parameters of the underlying architecture and hence yield the highest prediction accuracy when trained on the target platform. Training a classifier, however, is a fairly involved process and requires knowledge of statistics and machine learning that the end users of such models may not possess. This paper presents a new framework for automatically generating machine-learning based performance models. A tool-chain is developed that provides automated mechanisms for sample generation, dynamic feature extraction, feature selection, data labeling, validation and hyper parameter tuning. We describe the design and implementation of this system and demonstrate its efficacy by developing a learning heuristic for register allocation in GPU kernels. Results show that auto-generated models can predict register thresholds that lead to integer factor performance improvements over kernels produced by state-of-the-art optimizing compilers.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.3" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f494abb49c43a6a392e72ccbed2ac6ac216ba42", "sources": [ "DBLP" ], "title": "A Machine Learning Approach to Automatic Creation of Architecture-Sensitive Performance Heuristics", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "3f4fe56d0bee1049b97a0335198401be21c99c03": { "authors": [ { "ids": [ "2605713" ], "name": "Kento Sato" }, { "ids": [ "1689086" ], "name": "Dong H. Ahn" }, { "ids": [ "1696759" ], "name": "Ignacio Laguna" }, { "ids": [ "1682498" ], "name": "Gregory L. Lee" }, { "ids": [ "1772965" ], "name": "Martin Schulz" }, { "ids": [ "9538540" ], "name": "Christopher M. Chambreau" } ], "doi": "10.1145/3018743.3018767", "doiUrl": "https://doi.org/10.1145/3018743.3018767", "entities": [ "Debugging", "Interference (communication)", "Programmer", "Software bug", "Synthetic data" ], "id": "3f4fe56d0bee1049b97a0335198401be21c99c03", "inCitations": [ "f62928671049d04ce2b48e0e7218b1021e910680", "c44adaaa00360d30ee44de2646d540b32a055633" ], "journalName": "", "journalPages": "89-101", "journalVolume": "", "outCitations": [ "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "ca8a8660af5ef7f196e32289228fdafef8a9bcb4", "1ee89eb169d6e2e60a733cd2fde50d8aa5cceb73", "4f085c06c0dc2b50d2e37c3b97f2cd53acc14f39", "34ff0969f1a6a00aca59b612112284773b48ddcf", "7ddd6b10bf7277794b472115a329766fcb2407ff", "6e69ad3daf1d4dfe98b83a4ed448cfa0ff016102", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "277e409419410f3b0c7e9ba6db8c82ba52114018", "d9f92d79dc68721941cc16913e4951a88d4154a3", "a0a1538f47ed3dbd82696ec80e0eb1cfd3c46930", "f0d91a7de97092cdb4a4bcf01b3d6f6b44aaf124", "3d64fc1b30ae50577f5b7d0f41b4399f34b32ed9", "d6481cd3377de9b555630cfda0b1967d26d53342", "24bb5f66906421f42aff2d64dfa35b4beb3ead7a", "5d927b1b2f8471c32417bddd67f50f4141a9c3c2", "26933d7dcc2c9c8cf2cd225d0fcff4e8374a8549", "85358a5d7015b3fec8790f897981a733a3621d4b", "005fc9cc5fd4e991612e3411950c5da5b0c07958", "0b68e74e679ed268310e43e0265d27bb983d49c5" ], "paperAbstract": "Debugging intermittently occurring bugs within MPI applications is challenging, and message races, a condition in which two or more sends race to match with a receive, are one of the common root causes. Many debugging tools have been proposed to help programmers resolve them, but their runtime interference perturbs the timing such that subtle races often cannot be reproduced with debugging tools. We present novel noise injection techniques to expose message races even under a tool's control. We first formalize this race problem in the context of non-deterministic parallel applications and use this analysis to determine an effective noise-injection strategy to uncover them. We codified these techniques in NINJA (Noise INJection Agent) that exposes these races without modification to the application. Our evaluations on synthetic cases as well as a real-world bug in Hypre-2.10.1 show that NINJA significantly helps expose races.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018767" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f4fe56d0bee1049b97a0335198401be21c99c03", "sources": [ "DBLP" ], "title": "Noise Injection Techniques to Expose Subtle and Unintended Message Races", "venue": "PPOPP", "year": 2017 }, "3f59e1a955022a3a74dd7b79e7c6faf746549c12": { "authors": [ { "ids": [ "9563939" ], "name": "Syed Mahbub Hafiz" }, { "ids": [ "38805211" ], "name": "Ryan Henry" } ], "doi": "10.1145/3133956.3134008", "doiUrl": "https://doi.org/10.1145/3133956.3134008", "entities": [ "Data store", "Database", "Experiment", "Integrated circuit layout", "Open-source software", "Personally identifiable information", "Private information retrieval", "Server (computing)" ], "id": "3f59e1a955022a3a74dd7b79e7c6faf746549c12", "inCitations": [ "2d918c9d5d510e2cf45ca1a9742e377c90b66310", "41c04b237615243a7fc778c3653851e0e348322f" ], "journalName": "", "journalPages": "1361-1373", "journalVolume": "", "outCitations": [ "88cbe78707882009c9b4f923667da4bb4ed4823e", "35cd80ccef2e5c5e5845694a3b7f7359c609c442", "0db28e501c1f3008b17921770c2ba2588e9f1b82", "f98cfc3c092d69c068054698bcb4c1b6840644c6", "0fd372c97256aab30da9d89d477d7b7a7b656edb", "0c22c5fd02c563c94c616a64424cab45e287e49c", "7e6294370a5dbc1e9e8610421c9de13ee63d135d", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "70d2a37d5af527dfc345691e2f978f6e46dc4efe", "37731a2477845a51bc59e24722b75d6ac50d4a51", "7f6311c08aec78a82d68bfef074d416c4862226e", "9cf0c702707e0715c7ccf484588414891c0ee42b", "6b3aea37625702e98e5033e1107403e319b4df01", "24dd5810e14e7c402919299f29b48544fa03d529", "426b39e3abf91b5682380f0fc278a1f5b9baa2a9", "1fb374e944f500df41f5a9f4083c54111f7393ee", "47ee59d3ee71a798c1f84e8b88cab89c5afcb636", "97d7c23228d0d4c5c366eb7da83ea6e0b1ed1cc8", "1e3ded57c5d1e90e95f4f6bdbbe1cbf30ff6690f", "39e0cb9a2c3d913f94ded8fd05f114c78ee1b4c2" ], "paperAbstract": "We propose indexes of queries, a novel mechanism for supporting efficient, expressive, and information-theoretically private single-round queries over multi-server PIR databases. Our approach decouples the way that users construct their requests for data from the physical layout of the remote data store, thereby enabling users to fetch data using \"contextual\" queries that specify which data they seek, as opposed to \"positional\" queries that specify where those data happen to reside. For example, an open-access eprint repository could employ indexes of queries to let researchers fetch academic articles via PIR queries such as for \"this year's 5 most cited papers about PIR\" or \"the 3 most recently posted papers about PIR\". Our basic approach is compatible with any PIR protocol in the ubiquitous \"vector-matrix\" model for PIR, though the most sophisticated and useful of our constructions rely on some nice algebraic properties of Goldberg's IT-PIR protocol (Oakland 2007). We have implemented our techniques as an extension to Percy++, an open-source implementation of Goldberg's IT-PIR protocol. Our experiments indicate that the new techniques can greatly improve not only utility for private information retrievers but also efficiency for private information retrievers and servers alike.", "pdfUrls": [ "http://eprint.iacr.org/2017/825", "https://eprint.iacr.org/2017/825.pdf", "http://doi.acm.org/10.1145/3133956.3134008", "http://homes.soic.indiana.edu/henry/publications/ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f59e1a955022a3a74dd7b79e7c6faf746549c12", "sources": [ "DBLP" ], "title": "Querying for Queries: Indexes of Queries for Efficient and Expressive IT-PIR", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "3f67d1b9a3fa7c68fb58dcbe9d4d2d3532824f01": { "authors": [ { "ids": [ "27970309" ], "name": "Ambika Kaul" }, { "ids": [ "19245157" ], "name": "Saket Maheshwary" }, { "ids": [ "1787044" ], "name": "Vikram Pudi" } ], "doi": "10.1109/ICDM.2017.31", "doiUrl": "https://doi.org/10.1109/ICDM.2017.31", "entities": [], "id": "3f67d1b9a3fa7c68fb58dcbe9d4d2d3532824f01", "inCitations": [], "journalName": "", "journalPages": "217-226", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f67d1b9a3fa7c68fb58dcbe9d4d2d3532824f01", "sources": [ "DBLP" ], "title": "AutoLearn - Automated Feature Generation and Selection", "venue": "ICDM", "year": 2017 }, "3f688d6104055458a01a9748d7d84b9dd8cf1d2b": { "authors": [ { "ids": [ "3099404" ], "name": "Rukma Talwadker" } ], "doi": "10.1145/3078468.3078484", "doiUrl": "https://doi.org/10.1145/3078468.3078484", "entities": [ "Command history", "Computer data storage", "Customer support", "Data center", "Dexter (malware)", "Downtime", "Log analysis", "Noise reduction", "Software system", "Static program analysis", "Tracing (software)" ], "id": "3f688d6104055458a01a9748d7d84b9dd8cf1d2b", "inCitations": [], "journalName": "", "journalPages": "7:1-7:12", "journalVolume": "", "outCitations": [ "5eab6c72ba39e0fea5c3aac0c2f5f9cc0a03eb0c", "08632fe2b934ed15d3499e7321282c81adc2c390", "3dc4bf9bc2bbd8962ca96e48ab63f05acf91cfe6", "49e8721bd4821eff0f147d73bea970f2de3aab8a", "2d9235228f146544bc31b110930292946cd79d13", "48f7a11ca35c1ab5860760af1af8353c895e416a", "37c04a742561ac2e2fd0069a9e2f92a048df4c0e", "14b750a0fd5a13f7494e4abf9b97718ff558f508", "18b876ceee8ecc5667e152f24f48a612a260507b", "1521c2c45835a1dd3d29f9886010c652063b7bad", "3386a3417920dd16efec5459b9b48930ece73dd8", "a0d875193974f727d93c8f377fefb3efd58acc62", "3aaa142aeb475b0aaef21e3dcfb7951a97e0f19a", "37e3da0c04ce13b1d00e2c6f4e9723807d777dda", "0f28af5e2f0ec33a29c5b12e5e5be78c8f9d14e8", "1e0752c6406458875c987f9427182ccd9aae4b8b", "9f94aadf6b2b57d6131e50af232a05e6dea1e48d", "4fa0029515a0cb4842fc82e29a98aa8683ef4853", "0f1f6d64a653c275e4251a29442272fcc3b74c25" ], "paperAbstract": "Misconfigurations in the storage systems can lead to business losses due to system downtime with substantial people resources invested into troubleshooting. Hence, faster troubleshooting of software misconfigurations has been critically important for the customers as well as the vendors.\n This paper introduces a framework and a tool called Dexter, which embraces the recent trend of viewing systems as data to derive the troubleshooting clues. Dexter provides quick insights into the problem root cause and possible resolution by solely using the storage system logs. This differentiates Dexter from other previously known approaches which complement log analysis with source code analysis, execution traces etc.. Furthermore, Dexter analyzes command history logs from the sick system after it has been healed and predicts the exact command(s) which resolved the problem. Dexter's approach is simple and can be applied to other software systems with diagnostic logs for immediate problem detection without any pre-trained models.\n Evaluation on 600 real customer support cases shows 90% accuracy in root causing and over 65% accuracy in finding an exact resolution for the misconfiguration problem. Results show up to 60% noise reduction in system logs and at least 10x savings in case resolution times, bringing down the troubleshooting times from days to minutes at times. Dexter runs 24x7 in the NetApp's® support data center.\n The paper also presents insights from study on thousands of real customer support cases over thousands of deployed systems over the period of 1.5 years. These investigations uncover facts that cause potential delays in customer case resolutions and influence Dexter's design.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078484" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f688d6104055458a01a9748d7d84b9dd8cf1d2b", "sources": [ "DBLP" ], "title": "Dexter: faster troubleshooting of misconfiguration cases using system logs", "venue": "SYSTOR", "year": 2017 }, "3f70df394548cd6e73a864c9b1d9b5151322fe5c": { "authors": [ { "ids": [ "4405233" ], "name": "Sourav Sikdar" }, { "ids": [ "2083970" ], "name": "Kia Teymourian" }, { "ids": [ "1741680" ], "name": "Chris Jermaine" } ], "doi": "10.1145/3127479.3129248", "doiUrl": "https://doi.org/10.1145/3127479.3129248", "entities": [ "Big data", "Cloud computing", "Complex systems", "Data model", "Data structure", "Database", "Dataflow", "Dataflow programming", "Interchange circuit", "JSON", "Java", "MongoDB", "Physical data model", "Programmer", "Programming language", "SPARK", "Systems design" ], "id": "3f70df394548cd6e73a864c9b1d9b5151322fe5c", "inCitations": [ "61d42fafe09329b00bba177f444567be51444dc2" ], "journalName": "", "journalPages": "432-444", "journalVolume": "", "outCitations": [ "24281c886cd9339fe2fc5881faf5ed72b731a03e", "495e4ab43ffc0e5c11919c6ec42e48a4ce651327", "4146918f2d045723e771e93e4394836ac3bb87c5", "018fea354b8702eebbe6797405654322bb1b6da0", "15eb06aa520c83abfeb9c2a80bb43a025b848917", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "20f63375d2aef16a56c1f0e09e8d295ffe208c34", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "a2b9a68fd82f12c98909fa5e3fe25bc1119977d1", "b2509e383854fb29ebbd5cb9a5c43419feb408d4", "3a72a91dadce5774dcbc42e76f4c1b4a99766d05", "81b761ea5c679b452f4a78fa176b8e2d608e77ac", "019d8ba2274b5555bb71baebf76af35de23ef988", "79a2632742b826622e3cb101bd8b8c1a151a9dee", "03363ed04e9d4d2e8c9348551815e80615969611", "83b0fdadebe55c1ab0989c58bbe3afbc45a8fc87", "2c5b8766a1dae62b86ba38013253ab8673f6ec44", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "190727a716f6529d1d651d73740555eeb19b1196", "207def18c67fa8024741b7ae3cdc655b57f2053f" ], "paperAbstract": "Many cloud-based data management and analytics systems support complex objects. Dataflow platforms such as Spark and Flink allow programmers to manipulate sets consisting of objects from a host programming language (often Java). Document databases such as MongoDB make use of hierarchical interchange formats---most popularly JSON---which embody a data model where individual records can themselves contain sets of records. Systems such as Dremel and AsterixDB allow complex nesting of data structures.\n Clearly, no system designer would expect a system that stores JSON objects as text to perform at the same level as a system based upon a custom-built physical data model. The question we ask is: How significant is the performance hit associated with choosing a particular physical implementation? Is the choice going to result in a negligible performance cost, or one that is debilitating? Unfortunately, there does not exist a scientific study of the effect of physical complex model implementation on system performance in the literature. Hence it is difficult for a system designer to fully understand performance implications of such choices. This paper is an attempt to remedy that.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129248" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f70df394548cd6e73a864c9b1d9b5151322fe5c", "sources": [ "DBLP" ], "title": "An experimental comparison of complex object implementations for big data systems", "venue": "SoCC", "year": 2017 }, "3f7bf92efaac034606ba1e8beaba8c3aaa158ba6": { "authors": [ { "ids": [ "39154340" ], "name": "Xuchao Zhang" }, { "ids": [ "39164176" ], "name": "Liang Zhao" }, { "ids": [ "2929832" ], "name": "Arnold P. Boedihardjo" }, { "ids": [ "1752590" ], "name": "Chang-Tien Lu" } ], "doi": "10.1109/ICDM.2017.72", "doiUrl": "https://doi.org/10.1109/ICDM.2017.72", "entities": [ "Algorithm", "Big data", "Coefficient", "Distributed algorithm", "Download", "Experiment", "Heuristic", "Least squares", "Semiconductor consolidation", "Synthetic data", "Thresholding (image processing)" ], "id": "3f7bf92efaac034606ba1e8beaba8c3aaa158ba6", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "625-634", "journalVolume": "", "outCitations": [ "9928a7e52cde0af42e581e8c0b8cdecb9106cd38", "1a589a7afa6c123478b69d31fde3a7774aa646be", "0c93cb1af3bba1bd90a03e921ff2d55acf35c01f", "818ceedb72216e07435e7f268dfd49c41765b721", "09ba5efa9a96406a87d07f7086c2b22a44eedd18", "fd91a28c69e99d9d11de59ab31a83ea3c3a71918", "3accd0b572e3267e6b2832c70ac8c4aa92064f5c", "08743d09f3ec33ab1f188d4c5f8f5550c312ace0", "242bd5d9e22e95bd9b768b958b24d6721882ef29", "05aba481e8a221df5d8775a3bb749001e7f2525e", "7d105eedc420e0e3c6a365d92a920081380d11c7", "1cfed5bf217a76ba5c0f1c9298004ceb21bea566", "0541d5338adc48276b3b8cd3a141d799e2d40150", "7a825749d1a2c8467d934b2976ce372ba878ba37", "8d56d4bc69a8c562434b9a129542bb79e9d6f1d6", "7b6fd17670e1dcb8be9ff1dafe203bae84e9e93c", "1361d3f2cc82fd5e0f2d219700191888fd59db1f", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "41b07d61e2ec61cde3e5a084e313621cc6e12fd6", "efe9635e65e130d53b3d9da19b27d07a820d4697", "8dda9e02fba6cd269b29d3aebf0b8c52cfc6a98d", "752a7486dee0b6d19006a4d0d64bc9709f3ad0ca", "712714f0733510e0f8a3634932d739a1e193544b", "a0718cb855565adba24f9fc169e7adb0a8fafa63", "6a0e11220ee29dec3c432e88205a6ec7b0227621", "266bf8847801ff302c6f91f899f36269807317ee", "36599e4aca38995f5e8ca18e19e03cbcc089d229" ], "paperAbstract": "In today's era of big data, robust least-squares regression becomes a more challenging problem when considering the adversarial corruption along with explosive growth of datasets. Traditional robust methods can handle the noise but suffer from several challenges when applied in huge dataset including 1) computational infeasibility of handling an entire dataset at once, 2) existence of heterogeneously distributed corruption, and 3) difficulty in corruption estimation when data cannot be entirely loaded. This paper proposes online and distributed robust regression approaches, both of which can concurrently address all the above challenges. Specifically, the distributed algorithm optimizes the regression coefficients of each data block via heuristic hard thresholding and combines all the estimates in a distributed robust consolidation. Furthermore, an online version of the distributed algorithm is proposed to incrementally update the existing estimates with new incoming data. We also prove that our algorithms benefit from strong robustness guarantees in terms of regression coefficient recovery with a constant upper bound on the error of state-of-the-art batch methods. Extensive experiments on synthetic and real datasets demonstrate that our approaches are superior to those of existing methods in effectiveness, with competitive efficiency.", "pdfUrls": [ "https://dac.cs.vt.edu/wp-content/uploads/2017/11/online-and-distributed.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.72", "https://arxiv.org/pdf/1710.00904v1.pdf", "http://arxiv.org/abs/1710.00904" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f7bf92efaac034606ba1e8beaba8c3aaa158ba6", "sources": [ "DBLP" ], "title": "Online and Distributed Robust Regressions Under Adversarial Data Corruption", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "3f9b77306c3160aafe370a6d36895f9dca124ebe": { "authors": [ { "ids": [ "39614837" ], "name": "Brett Walenz" }, { "ids": [ "31938009" ], "name": "Sudeepa Roy" }, { "ids": [ "2845918" ], "name": "Jun Yang" } ], "doi": "10.1145/3035918.3064053", "doiUrl": "https://doi.org/10.1145/3035918.3064053", "entities": [ "Algorithm", "Baseline (configuration management)", "Cache (computing)", "Computation", "Computational complexity theory", "Decision support system", "Join (SQL)", "Memoization", "PostgreSQL", "Program optimization", "Relational algebra", "Rewrite (programming)", "SQL", "Value (ethics)" ], "id": "3f9b77306c3160aafe370a6d36895f9dca124ebe", "inCitations": [], "journalName": "", "journalPages": "1243-1258", "journalVolume": "", "outCitations": [ "78b3f0d59de6f43bbb2725cec75d55d6137a88e4", "335606158cd0e80d353c0820aa7f3519c0edef87", "036e20936fc1e452509c0b64196a0e937ab733be", "006cafc8d8f1af2595daa20078b501c10d9fcf5e", "fc7f5ed67a23f708222e6a3c2243a12baa898c28", "3f419db6f66c32bbb7ea887b139abd4e088a0405", "07f617c9ceece546c5180ed301a010cde17b28b8", "f9ba2d1dcd7a58436bd401b56766ad0050f9aab0", "5ce1483f20173531cc259b60595ecd827cbbe8eb", "0bc3f74da97f976e1f94deff106860d39c477be3", "8df1e547181611e390c428de59d31ab69d64bcd1", "a0203b4a547a6d172a053d39d1d618ee47ce3e31", "89e701c706894415950eb4160ba95a717cdc9594", "11ea30b09ba54b39f8d4c19e600d87f96b96ffaa", "128985b85556c30ad405863f2a34340049957616", "1f21c3d03726264e48d156f0430ec6357d6fa642", "65e0af21793f0dc748a1755b736db4fbeb9bb4e8", "114fd5089776a0562cf4e8276049cc11222fe51f", "28e702e1a352854cf0748b9a6a9ad6679b1d4e83" ], "paperAbstract": "Iceberg queries, commonly used for decision support, find groups whose aggregate values are above or below a threshold. In practice, iceberg queries are often posed over complex joins that are expensive to evaluate. This paper proposes a framework for combining a number of techniques---a-priori, memoization, and pruning---to optimize iceberg queries with complex joins. A-priori pushes partial GROUP BY and HAVING condition before a join to reduce its input size. Memoization caches and reuses join computation results. Pruning uses cached results to infer that certain tuples cannot contribute to the final query result, and short-circuits join computation. We formally derive conditions for correctly applying these techniques. Our practical rewrite algorithm produces highly efficient SQL that can exploit combinations of optimization opportunities in ways previously not possible. We evaluate our PostgreSQL-based implementation experimentally and show that it outperforms both baseline PostgreSQL and a commercial database system.", "pdfUrls": [ "https://users.cs.duke.edu/~sudeepa/sigmod2017-iceberg.pdf", "http://doi.acm.org/10.1145/3035918.3064053" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f9b77306c3160aafe370a6d36895f9dca124ebe", "sources": [ "DBLP" ], "title": "Optimizing Iceberg Queries with Complex Joins", "venue": "SIGMOD Conference", "year": 2017 }, "3f9cc2832b801a52677bad31f7f420e41c49bc38": { "authors": [ { "ids": [ "3325511" ], "name": "Timo Schneider" }, { "ids": [ "2604669" ], "name": "James Dinan" }, { "ids": [ "2567894" ], "name": "Mario Flajslik" }, { "ids": [ "1742910" ], "name": "Keith D. Underwood" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/HOTI.2017.17", "doiUrl": "https://doi.org/10.1109/HOTI.2017.17", "entities": [ "Data center", "Gigabyte", "Memory hierarchy", "Multitier architecture", "Network congestion", "Network switch", "Non-volatile memory", "Portals", "Stock and flow", "Throughput", "Volatile memory" ], "id": "3f9cc2832b801a52677bad31f7f420e41c49bc38", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "17-24", "journalVolume": "", "outCitations": [ "a56ee0c2ed063a4842d8ca785e9fadfda29d9123", "144b1ba05cb64b493858b665cc38374f3ef7e332", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "e1d29194ff677c7426d65af3f5b6fec3ffa182b6", "112582b5903fe1c7d8cfbac85d6a5dca3561688c", "7fcfcc1ea179cab57893ffb407a8e43d9c9da289", "2d086787132666be7d425c5534132b0956c30435", "a8205dcf9663742bd103f23e6fa598c6b6f4513e", "c250168f0103da6ed94d20a9fb00e98cce05756d", "005fc9cc5fd4e991612e3411950c5da5b0c07958", "49ffbf9a6a6602ea571709c313432c216847272a", "3b1c1002d1f051fbe3212961f88809c0714f7e61", "a15bc58fa496b6cca937713723f19f45380fc2fe", "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "33299bbc74d62c9d83f714f0753fc0f2ecadc645", "400c8f6d1bc0284b887f3f6412e07f9be70650f8", "e116690dfdfb44f9e350cb36fbf8e8b9b3da1d3f", "031c9ab264449e18192d55e0f373d3bdb640b0e9", "870c48a2c2fb067d12138bc0620b1a11a40a6a97", "14e0d2bdfb3fca202b3fc0e19a12d3082f81b931", "13fec303fe55489045ff5b66014b618fca83206a", "373b88e34295875fdab7f6cdee1438edbd0571cb", "7ac7265bf479558e99ec5edf21400f57c5205712" ], "paperAbstract": "The advent of non-volatile memory (NVM) technologies has added an interesting nuance to the node level memory hierarchy. With modern 100 Gb/s networks, the NVM tier of storage can often be slower than the high performance network in the system; thus, a new challenge arises in the datacenter. Whereas prior efforts have studied the impacts of multiple sources targeting one node (i.e., incast) and have studied multiple flows causing congestion in inter-switch links, it is now possible for a single flow from a single source to overwhelm the bandwidth of a key portion of the memory hierarchy. This can subsequently spread to the switches and lead to congestion trees in a flow-controlled network or excessive packet drops without flow control. In this work we describe protocols which avoid overwhelming the receiver in the case of a source/sink rate mismatch. We design our protocols on top of Portals 4, which enables us to make use of network offload. Our protocol yields up to 4x higher throughput in a 5k node Dragonfly topology for a permutation traffic pattern in which only 1% of all nodes have a memory write-bandwidth limitation of 1/8th of the network bandwidth.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3f9cc2832b801a52677bad31f7f420e41c49bc38", "sources": [ "DBLP" ], "title": "Fast Networks and Slow Memories: A Mechanism for Mitigating Bandwidth Mismatches", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "3fa5de3b8c3933885becdd3fe8540109309ab863": { "authors": [ { "ids": [ "3671570" ], "name": "Yonghong Yan" }, { "ids": [ "3775274" ], "name": "Jiawen Liu" }, { "ids": [ "1717511" ], "name": "Kirk W. Cameron" }, { "ids": [ "40083137" ], "name": "Mariam Umar" } ], "doi": "10.1109/IPDPS.2017.99", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.99", "entities": [ "Algorithm", "Broadcast automation", "Central processing unit", "Embodied energy", "For loop", "Graphics processing unit", "Heterogeneous computing", "Heuristic", "Load balancing (computing)", "OpenMP", "Parallel computing", "Parallel language", "Runtime system" ], "id": "3fa5de3b8c3933885becdd3fe8540109309ab863", "inCitations": [ "806c7bd6734e408f6c1e855a31e47a030cb9c577", "8296beee679b5f6f65903b62ce740a1089e728f2" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "788-798", "journalVolume": "", "outCitations": [ "230b075c5726dfdbc03f445883befef1170a9858", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "a5895946af933fc3fb32f7e975a35ded0b63d619", "679541d90bcfb71019c7407b4c408a80e88db99d", "4958354ca96a04beee8eacc909fd2eea5cf788b4", "ddd6bf11cd8564f413b9f5dd9c9a295c5076249f", "0652168cd1dfe44892ef6c42004b5dec267ac254", "03028a78daf97a01a26975a72c59c8d97cb18810", "30930f09fd054056f60bc4626ebca47beb2dc3ef", "66f0c57db9a5fe49645e3ed6eadafd649b004790", "10c8ae128003be22c29e3afac6dfbc701323ce72", "55220bc99ffe36591a4b31a2ee9e40620381e0ca", "8bae0d458d6b9bd76774f30e37288ebe36b89f9d", "87145a355e6648faa20d189cf991556f2eaa413e", "42be94de2b517cb19b0af8aff489122d2500759c", "3ce340cb7a11120c8b8a51512269125b53c080cc", "a4192f5a30a095ec79fc8e5984530366f248a314", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "1041d3f00afb5f5a53196813ceb2ebfab6d0a6ee", "7e44eadb263e5de0b3495bc1427e2cc27b9bc271", "7ce5c4e3cdbad16ea8718bb78c3ba227c98c21b8", "8db3c11cd85195f459b8ba82fe3326e8f86f1d52", "3c59a0befe85eea6e4917b639bb43e12a35db029", "a3c10798e153b92dba64075904f4314badc38149", "2042b469be68653afcb2b7b38490c16369b4501a", "092217c2267f6e0673590aa151d811e579ff7760" ], "paperAbstract": "Heterogeneous computing systems, e.g., those with accelerators than the host CPUs, offer the accelerated performance for a variety of workloads. However, most parallel programming models require platform dependent, time-consuming hand-tuning efforts for collectively using all the resources in a system to achieve efficient results. In this work, we explore the use of OpenMP parallel language extensions to empower users with the ability to design applications that automatically and simultaneously leverage CPUs and accelerators to further optimize use of available resources. We believe such automation will be key to ensuring codes adapt to increases in the number and diversity of accelerator resources for future computing systems. The proposed system combines language extensions to OpenMP, load-balancing algorithms and heuristics, and a runtime system for loop distribution across heterogeneous processing elements. We demonstrate the effectiveness of our automated approach to program on systems with multiple CPUs, GPUs, and MICs.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.99" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fa5de3b8c3933885becdd3fe8540109309ab863", "sources": [ "DBLP" ], "title": "HOMP: Automated Distribution of Parallel Loops and Data in Highly Parallel Accelerator-Based Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "3fa62e121a18e6ad44bcce5f6e783f0dc4a0d52f": { "authors": [ { "ids": [ "32483327" ], "name": "Ji Kim" }, { "ids": [ "1755787" ], "name": "Shunning Jiang" }, { "ids": [ "2136471" ], "name": "Christopher Torng" }, { "ids": [ "3449863" ], "name": "Moyang Wang" }, { "ids": [ "1747794" ], "name": "Shreesha Srinath" }, { "ids": [ "2906275" ], "name": "Berkin Ilbeyi" }, { "ids": [ "8824371" ], "name": "Khalid Al-Hawaj" }, { "ids": [ "3206189" ], "name": "Christopher Batten" } ], "doi": "10.1145/3123939.3136952", "doiUrl": "https://doi.org/10.1145/3123939.3136952", "entities": [ "Baseline (configuration management)", "Central processing unit", "Data parallelism", "Data structure alignment", "Decoupling (electronics)", "Microarchitecture", "Parallel computing", "Program lifecycle phase", "SIMD", "Speedup" ], "id": "3fa62e121a18e6ad44bcce5f6e783f0dc4a0d52f", "inCitations": [ "498b084f3e65c8f53983829b65e136c26f61d757" ], "journalName": "", "journalPages": "759-773", "journalVolume": "", "outCitations": [ "0689c8056abadaa8c7df8498e511e56bd59094e4", "8bd6f67ef03b3c138c52f3e9b1716aebe937d244", "5069f6267707df50e3578afaa8dfa9c15f3c3b07", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "0b11c230e9b01f5f1d04aa6e868819e668dac6e0", "4ad495b07abc0d7080c020dd563d9406e1753d65", "0836859831c6c69412ae633bcf47e96355a92d6b", "cbb841fa252ad5223e1a170baaee78dce484e25c", "8627aaa6a728d5c39e28c9b2e52f3c0ffe075630", "858bb499ad942bbf24cd82ae837bacb7f3a99bab", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "dfcc61c8f1e9dc7c7e44e52e4b2bb0bd7a6d7022", "377177bb82105c35e6e26ebad1698a20688473bd", "14d98ecba21e404f80daf024a03effe259cf9b88", "352a8957005dc5519b15ed1870751ec494d66395", "3370784dacf9df1e54384190dad40b817520ba3a", "32c8c7949a6efa2c114e482c830321428ee58d70", "054e4a6966d54eb9fd207cf0484214201f46424a", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "0f48fd50a9f0cb30d1f0495010a45a0e732e8a12", "26a635be4fc593b5fb6ec6bf11b03634e803f311", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "db568a20e7d10e04182cd6223b5191d584ce0371", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd", "1dff33cb24cf30be232d02bc48ebdf200480d2f3", "a6e8098671ccfc1147870db90e80360654cd92aa", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "480a952f7d24cf6d3ccda62439424eea6a8fd469", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "32bc6af9a529d47f81aa9520b9e0b1cb7a8766c2", "03fb875d5022a5e98f19c271e2403232acc55318", "273d591af0bdcbefe37d7dd9150e2f612ca7121d", "024f3e0ea6a49e536f3d135e73d77323a924498d", "c683c532ea27c3b9866517da6adc554511241251", "98bbc48dda4f68ce0edcc02b12130f21999bb2ba", "2fa81b7177d69c54755d7c2b87e4a165bba1a8cc", "28f6482a14e258ed5a7fe12c330045a349363478", "1c05733fa16907f235e6400625c2ae88f1942b42", "02a17a291cbacc0666162845559ea0873f10122d", "09dbf94357b21ad14d2897282703ee99ae06a35e", "1201e9d5443cc888e389fe1587570ffa4d8e76d9", "10bd65d72a0d67c4807d833c683045ca4ba0d381", "52aad68b6a150c5db537ef64c23e07d8abd58cc1" ], "paperAbstract": "Task-based parallel programming frameworks offer compelling productivity and performance benefits for modern chip multi-processors (CMPs). At the same time, CMPs also provide packed-SIMD units to exploit fine-grain data parallelism. Two fundamental challenges make using packed-SIMD units with task-parallel programs particularly difficult: (1) the intra-core parallel abstraction gap; and (2) inefficient execution of irregular tasks. To address these challenges, we propose augmenting CMPs with intra-core loop-task accelerators (LTAs). We introduce a lightweight hint in the instruction set to elegantly encode loop-task execution and an LTA microarchitectural template that can be configured at design time for different amounts of spatial/temporal decoupling to efficiently execute both regular and irregular loop tasks. Compared to an in-order CMP baseline, CMP+LTA results in an average speedup of 4.2X (1.8X area normalized) and similar energy efficiency. Compared to an out-of-order CMP baseline, CMP+LTA results in an average speedup of 2.3X (1.5X area normalized) and also improves energy efficiency by 3.2X. Our work suggests augmenting CMPs with lightweight LTAs can improve performance and efficiency on both regular and irregular loop-task parallel programs with minimal software changes.", "pdfUrls": [ "http://www.csl.cornell.edu/~cbatten/pdfs/kim-lta-poster-micro2017.pdf", "http://www.csl.cornell.edu/~ctorng/pdfs/kim-lta-micro2017.pdf", "http://www.csl.cornell.edu/~cbatten/pdfs/kim-lta-slides-micro2017.pdf", "http://doi.acm.org/10.1145/3123939.3136952" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fa62e121a18e6ad44bcce5f6e783f0dc4a0d52f", "sources": [ "DBLP" ], "title": "Using intra-core loop-task accelerators to improve the productivity and performance of task-based parallel programs", "venue": "MICRO", "year": 2017 }, "3fa745df5576e54464cc665e323597bd057cb650": { "authors": [ { "ids": [ "2764129" ], "name": "Aviad Zuck" }, { "ids": [ "22763652" ], "name": "Udi Shriki" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" }, { "ids": [ "3188958" ], "name": "Dan Tsafrir" } ], "doi": "10.1145/3102980.3102989", "doiUrl": "https://doi.org/10.1145/3102980.3102989", "entities": [ "Adversary (cryptography)", "Computer data storage", "Encryption", "Explanatory combinatorial dictionary", "Firmware", "Information privacy", "Key (cryptography)", "Operating system", "Pseudorandomness", "R language", "Requirement", "Snapshot (computer storage)", "Terabyte" ], "id": "3fa745df5576e54464cc665e323597bd057cb650", "inCitations": [ "5e79fde8dc3590f6274a6716373a33745a8b59da", "4d1a62de587f05084e85a4168f960af1e48b9697" ], "journalName": "", "journalPages": "50-55", "journalVolume": "", "outCitations": [ "3c315416b19cf69a7e542b731e7796860225ee97", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "14b639045731afd3762bcc61723f866724edc2f0", "16b5bce9ca7a24553a065f523d5843eb3b25f896", "3006ff2407040263c6379dea32e9dde07439a311", "1820a34042d6371a9e20484b0c63b698eb522a6c", "c0320b4cdca24a5da776cbf46ec551beac2f6a61", "0be943b44a03a16b544b21ea1b578b1816705831", "00ecd7b2e0c364ce4e9f5416ee1dbeaeabe87a62", "1526d412d7bdb83dcafadd1c28cf8b4c7e4f130d", "04b58d40b8a789d354c93a12a4fc418793b827fc", "f4f37512b9786dbd9980d343e47fdc41032d0ca6" ], "paperAbstract": "This paper presents a storage system that can hide the presence of hidden data alongside a larger volume of public data. Encryption allows a user to hide the contents of data, but not the fact that sensitive data is present. Under duress, the owner of high-value data can be coerced by a powerful adversary to disclose decryption keys. Thus, private users and corporations have an interest in hiding the very presence of some sensitive data, alongside a larger body of less sensitive data (e.g., the operating system and other benign files); this property is called plausible deniability. Existing plausible deniability systems do not fulfill all of the following requirements: (1) resistance to multiple snapshot attacks where an attacker compares the state of the device over time; (2) ensuring that hidden data won't be destroyed when the public volume is modified by a user unaware of the hidden data; and (3) disguising writes to secret data as normal system operations on public data.\n We explain why existing solutions do not meet all these requirements and present the Ever-Changing Disk (ECD), a generic scheme for plausible deniability storage systems that meets all of these requirements. An ECD stores hidden data inside a large volume of pseudorandom data. Portions of this volume are periodically migrated in a log-structured manner. Hidden writes can then be interchanged with normal firmware operations. The expected access patterns and time until hidden data is overwritten are completely predictable, and insensitive to whether data is hidden. Users control the rate of internal data migration (R), trading write bandwidth to hidden data for longevity of the hidden data. For a typical 2TB disk and setting of R, a user preserves hidden data by entering her secret key every few days or weeks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102989", "http://www.cs.technion.ac.il/~dan/papers/everchange-hotos-2017.pdf", "http://cs.unc.edu/~porter/pubs/hotos17-final30.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fa745df5576e54464cc665e323597bd057cb650", "sources": [ "DBLP" ], "title": "Preserving Hidden Data with an Ever-Changing Disk", "venue": "HotOS", "year": 2017 }, "3fab56c42fe6efc0b4febd21596ef89188f5d21a": { "authors": [ { "ids": [ "12142608" ], "name": "Xiao Wang" }, { "ids": [ "2424723" ], "name": "Samuel Ranellucci" }, { "ids": [ "2620997" ], "name": "Jonathan Katz" } ], "doi": "10.1145/3133956.3134053", "doiUrl": "https://doi.org/10.1145/3133956.3134053", "entities": [ "Amortized analysis", "Authentication", "Computation", "Data pre-processing", "Garbled circuit", "Preprocessor", "Secure two-party computation" ], "id": "3fab56c42fe6efc0b4febd21596ef89188f5d21a", "inCitations": [ "39f6571e188f704eb84f9a6d682be7c0483fd443", "3dbb3dcaff97a0db797d01def0f96b6e37289daa", "fe94997ec905da08756aa1bb80203c0f1e77d538", "6db9824d4667b22310c51fe638403238f873e9f2", "922a6775722dcc59850262beba325516ce2d027d", "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "da0c48743e6e4a6b1b0e4758e466767a1068909c", "a2a8b5cc914c653730a251cf1a0b3452dac322b3", "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69" ], "journalName": "", "journalPages": "21-37", "journalVolume": "", "outCitations": [ "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "5adc94602d07e49cc1e94e2aa2b1bdf3481a47f8", "5c07dadf28f3cfdd67ab60a12d3ea9860bcd8b24", "ba757aff2d1ceda66f2c4fdf7f3c9ae74ce6d8a4", "e010ccd6b9ce0d8905a3a9d2be1494fece8cfea3", "db0f82a419f89cda64fcbec2c58137862cd04475", "04948723dec0e6724777ee56f0d10168cce44921", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "eacc0f9a8646de38020d781438c68cc23ec47b05", "3cb55d539b232e309f4a5974148ec6f22afb5888", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "411e4ecb35e5385ed0c88a36f0b2821c42af8f70", "9596178a19c4770164ac8c095b947021976f70d7", "19de1229db1c2e62367a3d1459e24848064dfd02", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "26b4199c18d93495c34b29be6f96f220f85e38da", "d04f7f8eed11e5e58a41e314b00e49d7424d82ec", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "72ba7c639e3d7b075fde8eeca3385923551c6a39", "2f9c590bb2df7fe3e4caffaaa709fa6840d02d62", "19c3736da5116e0e80a64db35afe421663c4b4a8", "1eb0b401e7dbd8a4e638243713b39fffc991fe9f", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "a797a0346e106e0d1d1d2db778aa509031c7bf8c", "2eb315952f6a2e342b19cf95287c8a0b1f2c36fa", "817987d78b91ebf29f30c6ada64684364a705752", "9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b", "9c04890c31c3fd10dcc3593454e2a2700923e19a", "444630ced6bda572461744423ff420106472d5e3", "46527c14457cf84d1cf26487d6b4c31f4825db71", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "18f5d7663632c92c84f89151823dff2120ae43cf", "3fb1b878daafbd54989438e4fb778380a03226e6", "15c76f461543c44a8b9d8b32b2bbd18c595aea52", "362246709de205ec0ac5b34e07306839c38d5a3a", "116877b6fb854e2db97f7f887c2ca7d1b734e90d", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "48a6ed40923089d57af74f01fd35f92754d7537e", "d89c91e556c9ebc345931547f579a8494a573391", "36250592849fc8dc50b3b5df0a72a8b072ce34e4", "ad0564d120af0e7471cd32d4c0438b8c25f33a0d", "47b8fd6ee8b07bd14de3c91df515b11180121de9", "218bbd0efffc2ee63edffb8c5220f06155e23578", "0affd3f06d26de268d81c288454dd7880e518f9e", "0166c8b5c6445043b94fc7b62d145d0c3c8b6483", "842eb3de44e0538769f1509d1b8d35161fb212bb" ], "paperAbstract": "We propose a simple and efficient framework for obtaining efficient constant-round protocols for maliciously secure two-party computation. Our framework uses a function-independent preprocessing phase to generate authenticated information for the two parties; this information is then used to construct a single \"authenticated\" garbled circuit which is transmitted and evaluated. We also show how to efficiently instantiate the preprocessing phase with a new, highly optimized version of the TinyOT protocol by Nielsen et al.\n Our protocol outperforms existing work in both the single-execution and amortized settings, with or without preprocessing: \n Our work shows that the performance penalty for maliciously secure two-party computation (as compared to semi-honest security) is much smaller than previously believed.", "pdfUrls": [ "http://eprint.iacr.org/2017/030.pdf", "http://doi.acm.org/10.1145/3133956.3134053", "https://acmccs.github.io/papers/p21-wangA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fab56c42fe6efc0b4febd21596ef89188f5d21a", "sources": [ "DBLP" ], "title": "Authenticated Garbling and Efficient Maliciously Secure Two-Party Computation", "venue": "CCS", "year": 2017 }, "3faeda3f1b7d25bf6a4983a44d40097d63c91e6b": { "authors": [ { "ids": [ "17865449" ], "name": "Marina Billes" }, { "ids": [ "32710366" ], "name": "Anders M\u00f8ller" }, { "ids": [ "1884064" ], "name": "Michael Pradel" } ], "doi": "10.1145/3062341.3062364", "doiUrl": "https://doi.org/10.1145/3062341.3062364", "entities": [ "Black box", "Complex systems", "Interaction", "Scalability", "Two-phase commit protocol", "Web application", "ownCloud" ], "id": "3faeda3f1b7d25bf6a4983a44d40097d63c91e6b", "inCitations": [], "journalName": "", "journalPages": "171-184", "journalVolume": "", "outCitations": [ "3e8396d977df0996a4461fe7477bc5661a2058a7", "3c10b218e76ed78109ffaa5023b1d7ce07182826", "90fcde4095031324f54a8b8b2f5b79eb6a4f8e5a", "98ed78d119be90bf594e43bf4b8e31c27140664c", "40efb402ba3381e94a115c6dfa638fb4b321fac1", "2925a0282b8dba6ea26f6d36d04cb06dc5f057ec", "103f2107e7e66788684e51ae69bd3bf67abf5e4d", "7f80e0c58b9b9ff61a2ad3dadde50172e3c3a49d", "5d313483033862f1fc4fae1220a89d165a556f77", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "3243d4355df5e41fc157b5aaacfcc038da71e96a", "183595c2536ca201c2da9ee02f7752c93a8e00ae", "406ee6ce01dbc906ad07a3c89a60c7d8b2252a9a", "0b6975dfee824f53f54281afe5755620c4ee9e92", "b4cbb38b10417c84b5876df62379ba20eca374cb", "3be816a633ee79b9d734920faee820226c12a5b5", "1eee336fbc45c89a45f240611bb706ebfd588126", "1f46df8917518a1478201c1a51f1654f4f7b21f2", "014cebf17f19709bc76387213438bbb6aabc8fe9", "786a82e77439c970a672aed8b2fc67dfa9696f5a", "3a33dad8e9d12835fca95deec73e841096c8bec0", "8d0eec21e784142926120c2fdd80dd092e2dabf1", "558ad34ab42d7a5d19562b0696899a41ebcabb7c", "a0533a82716147deab8196008609ff2e006ddadb", "05c63d224eb2ebaeb29e8a414fba6f23c2d10e9f", "8b28b02af1ba77fff5b08d6dea87ba8b043b479d", "15cae3ea228f9f8f5ba4d48a45e6d75814671fa9", "197fb71fc7b78b9190c75ef017be612615522796", "0f32c8ad75c8c8e42c0c07989c4874afd35fc69e", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "62eb82646c852f903e9a8b08a04bd34b59e11b04", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "744d5ace9546c5acc70b18141c97d31401ac6cdd", "5d2dab8c10f5d88338459510cfd98e5cd88c2efb", "24e6f34e499634393416ea09c1aadd37ec9e8542", "068e9f8dd77ecfcc2019fdf3123d163b159fe4eb", "119f99dd30e725040b5e5633ece9962de71f9d84", "21cfdff1ef354da7cb65296799316a05d7844c08", "dea6f15f2cf7649e6a13347f5325ef19c9f25747", "03bb63660c3935ad2ec011a7f9e868587063f89c", "90b90e8e7a29daf5c5cc22bc9d46b1d46124f8d0", "2d81b844654806778d3cafda59715183a6659a41", "07ef38249ef922c26c660d596a6db04af2c9e8a6", "193d1415469cde989eafb01a1d4e8ba98dc03e0f", "2760a433d6f87294cdfb5605df03ea3bd2b35e76", "b5433ceb33dd9e1c2a9daf783152ac6328728ded", "7038e23695dbc4d8a9d1b7c6dff8dbc138009c4b" ], "paperAbstract": "Web applications, such as collaborative editors that allow multiple clients to concurrently interact on a shared resource, are difficult to implement correctly. Existing techniques for analyzing concurrent software do not scale to such complex systems or do not consider multiple interacting clients. This paper presents Simian, the first fully automated technique for systematically analyzing multi-client web applications. \n Naively exploring all possible interactions between a set of clients of such applications is practically infeasible. Simian obtains scalability for real-world applications by using a two-phase black-box approach. The application code remains unknown to the analysis and is first explored systematically using a single client to infer potential conflicts between client events triggered in a specific context. The second phase synthesizes multi-client interactions targeted at triggering misbehavior that may result from the potential conflicts, and reports an inconsistency if the clients do not converge to a consistent state. \n We evaluate the analysis on three widely used systems, Google Docs, Firepad, and ownCloud Documents, where it reports a variety of inconsistencies, such as incorrect formatting and misplaced text fragments. Moreover, we find that the two-phase approach runs 10x faster compared to exhaustive exploration, making systematic analysis practically applicable.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062364" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3faeda3f1b7d25bf6a4983a44d40097d63c91e6b", "sources": [ "DBLP" ], "title": "Systematic black-box analysis of collaborative web applications", "venue": "PLDI", "year": 2017 }, "3fc68bf55557ac4b377bd97bed8b28f3e201d775": { "authors": [ { "ids": [ "23473919" ], "name": "Weiping He" }, { "ids": [ "1717128" ], "name": "David Hung-Chang Du" } ], "doi": "", "doiUrl": "", "entities": [ "Cold boot attack", "DriveSpace", "Experiment", "Garbage collection (computer science)", "Hard disk drive", "In-place algorithm", "Magnetic storage", "Magnetic tape data storage", "Overhead (computing)", "SMART", "Shingled magnetic recording", "Simulation", "Space\u2013time tradeoff" ], "id": "3fc68bf55557ac4b377bd97bed8b28f3e201d775", "inCitations": [ "ec3924af8c1cb428b4f1309b9a9ca3c86abd6631", "537d37be13687758d01e35fc6a62be118ec48ea1", "2fe51b5c34484b5fb8f0ec54483750ffc842fd4a", "218ad6a5c79d2bc61c513e8b65b7504ad6a2187b", "8e4cdaa006bce928ed7a6d37b9bfbfdffe2a6367", "7626ba5ea8754f99699509784251e49d1e700d86" ], "journalName": "", "journalPages": "121-134", "journalVolume": "", "outCitations": [ "3de54cb50512fffe9ab48f69b92dbf6a43bd2d1b", "501f491dd60ea26bcb8152bfd3f9ac2456e69da8", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "31ceeced5d23193c369b98170c45e66bae6ff77d", "4956257ba37029ffdaadf3bdcca9b89bb5eea561", "238c00c28e016b9341a4cd8c086aa76018db9133", "2018f3fc13cd38122abdf37bf939b5011cd2e3c9", "8dee6c0a8438a995b1d2452b84c7544be5f00578", "0edf5e6b5caab5f62f8e71293c58b29e7b8bb6e1", "9d5e9f98f85629d9dae20d181ff2c9fcdcdb5520", "ba356329a7c6672eca15815ed622dac2c71b4513", "0bf50c9aff7d5182504dd18b7cc0f6041b5e520b" ], "paperAbstract": "Shingled Magnetic Recording (SMR) is a new technique for increasing areal data density in hard drives. Drivemanaged SMR (DM-SMR) drives employ a shingled translation layer to mask internal data management and support block interface to the host software. Two major challenges of designing an efficient shingled translation layer for DM-SMR drives are metadata overhead and garbage collection overhead. In this paper we introduce SMaRT, an approach to Shingled Magnetic Recording Translation which adapts its data management scheme as the drive utilization changes. SMaRT uses a hybrid update strategy which performs in-place update for the qualified tracks and outof-place updates for the unqualified tracks. Background Garbage Collection (GC) operations and on-demand GC operations are used when the free space becomes too fragmented. SMaRT also has a specially crafted space allocation and track migration scheme that supports automatic cold data progression to minimize GC overhead in the long term. We implement SMaRT and compare it with a regular Hard Disk Drive (HDD) and a simulated Seagate DM-SMR drive. The experiments with several block I/O traces demonstrate that SMaRT performs better than the Seagate drive and even provides comparable performance as regular HDDs when drive space usage is below a certain threshold.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/he", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_he.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-he.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-he.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_he.pdf", "http://www-users.cselabs.umn.edu/classes/Spring-2017/csci5980/files/SMR/SMaRT.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/3fc6/8bf55557ac4b377bd97bed8b28f3e201d775.pdf", "s2Url": "https://semanticscholar.org/paper/3fc68bf55557ac4b377bd97bed8b28f3e201d775", "sources": [ "DBLP" ], "title": "SMaRT: An Approach to Shingled Magnetic Recording Translation", "venue": "FAST", "year": 2017 }, "3fd4aa3b657b4fb457ed1dfd54dc87e34f651e8b": { "authors": [ { "ids": [ "38639927" ], "name": "Yang Hu" }, { "ids": [ "3102340" ], "name": "Mingcong Song" }, { "ids": [ "39429972" ], "name": "Tao Li" } ], "doi": "10.1145/3037697.3037713", "doiUrl": "https://doi.org/10.1145/3037697.3037713", "entities": [ "CPU cache", "Cache coloring", "Cache pollution", "Cloud computing", "Data buffer", "Graph coloring", "Linux", "Locality of reference", "Manifest (transportation)", "Memory management", "Network function virtualization", "Operating system", "Provisioning", "Run time (program lifecycle phase)", "Software framework", "Stock and flow" ], "id": "3fd4aa3b657b4fb457ed1dfd54dc87e34f651e8b", "inCitations": [], "journalName": "", "journalPages": "467-481", "journalVolume": "", "outCitations": [ "0b885bb186445ee0c50277d990eca18c53fef09b", "2beb38e6e831790082b0578bcddd1b9f73714822", "8dc427e562569a48199ddc391bb272987169920a", "3cb1c133cf24aac036f08c278713290dd5f7b5f6", "284bc76cd122b8295704b0746363266d5dc568f6", "90fe816f5af055871f63a77282d4a4849e0764d3", "1401df37cc3fc78f26570d601fd123f17646b2d2", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "99ce7f3cdef62f743db6df52eeedb7f5335ba669", "aeba281ee3caf611bf98c7b989daf393315bed89", "3b903a39571836bcf080891eb79afad2fe0fb1f2", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "2892b18872a42fbc6173e76263ac3e9251e2a334", "7a978f2902460e732c50c36a171deb11733df1fc", "6678b17fc8758efea8d32c2d47f9924f8a0cdc6d", "2fae39f82be19b311d743d3525badd777228e3ed", "0332013fc380ca283d3afc457c430c513d19cc51", "31c1ebd6214a6146f2739fb81bf560229f413c91", "4215fbbff39a0213888718549f215b124bd2e611", "a04f070b9ca16bac04ee76b54a1da5eca52e1cc2", "7165fbf57c455e23746592b5621773c80e462ae7", "58478dbe1e7463d781b0be04d337ebe9cfabaf80", "b9c2c52955f448a52d7893b5eb8b33e8592c29dd", "013b529f4ec9c1d9ddcef88a1a1f4b0efcc0c9c5", "24dc8d1de7e78ab100d2d83cbdf1390ddb9234c9", "7129b305ce45f83127e928e8510da9fae0783905", "490d862480cf30949dce90e832aa292c498ac768", "77a3133097ff59bae0b6ac8fae418a58b585dacb", "4c2bec041e4aac45be6276864c2ba231d8139108", "521116a7dceed9e37f8dfb130098f6c3bf100940", "53177b318070482270d625ba69e50159f160e7c1", "0720cfa5330462593b20ea0bbb7d8b5862a6b730", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "7c429cda99d8c7bab8f02e311b6ec1a752983397", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "57d3035be09a0703d503da8af082b128af3dfdf6", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "863bcc82bd61f2199a4f2c4c6bc34a26c217c87a", "1817aa7ffd9932244e6c7161205f5099692242f3", "1ae7be5d55833e6aa53d24f620be5df9006a3558", "1ecd36058e48734213c81728f42ff798a2c52833", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "07c29281f2f7dddb9ee99f0d806e1fa45ac47454", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "ce94cd005aa253e92270ff2e952603d44f75aa5b", "cd828d4ebd953017e2935af555ed29d6ae4a79a4", "27f4001214ce0d449eb05d33626f444526accc7c", "9c7e0c435a94c17c16853937f60edd8c9b3a3a4a", "277f20ddc0e9fa593753ef2778110508372c597f", "11ebb411b138d2acdd481a6920b822fbc213cdc0", "138a2f42fad86624adec7ddac2ac556e9e20d0c5", "e66455a61821afde0c4d2cbd95c3d9ed0f843675", "4f1cf2a9244816dc9ea7be304b85a45b7e0941a4", "0e2a26def9432e19fb96dcf8956d7e786da0d814", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "603791529a45c93e398149e1fc5f0bd94139a414", "1cb7d1b3226d0a2fc77e54dee596a7a2d6d098d5", "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "806c3a77f5128e7bd927e07784de655971058a07", "59a8a5c5b08ea695f341eb0329bd27b5ff645012", "5e17f6f1ef31817f3914d788ebc09a13287ff7ca", "10fede77f843e9eb5ef1768a17543013616d9243", "e259a25e9b241618a55abe9962f6656c993ef094", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "83d8f3fe2468adba47386a90e56e72373f757744", "165528cdf9c76edd98729c142faf50fbd6cfc69e", "3000e77ed7282d9fb27216f3e862a3769119d89e", "63e9e7c5508c486d01f270aa000662a2a176b8c3", "28c14bbd81e89b29ca1d00b109d1188ed9af3c9a" ], "paperAbstract": "With exploding traffic stuffing existing network infra-structure, today's telecommunication and cloud service providers resort to Network Function Virtualization (NFV) for greater agility and economics. Pioneer service provider such as AT&T proposes to adopt container in NFV to achieve shorter Virtualized Network Function (VNF) provisioning time and better runtime performance. However, we characterize typical NFV work-loads on the containers and find that the performance is unsatisfactory. We observe that the shared host OS net-work stack is the main bottleneck, where the traffic flow processing involves a large amount of intermediate memory buffers and results in significant last level cache pollution. Existing OS memory allocation policies fail to exploit the locality and data sharing information among buffers. In this paper, we propose NetContainer, a software framework that achieves fine-grained hardware resource management for containerized NFV platform. NetContainer employs a cache access overheads guided page coloring scheme to coordinately address the inter-flow cache access overheads and intra-flow cache access overheads. It maps the memory buffer pages that manifest low cache access overheads (across a flow or among the flows) to the same last level cache partition. NetContainer exploits a footprint theory based method to estimate the cache access overheads and a Min-Cost Max-Flow model to guide the memory buffer mappings. We implement the NetContainer in Linux kernel and extensively evaluate it with real NFV workloads. Exper-imental results show that NetContainer outperforms conventional page coloring-based memory allocator by 48% in terms of successful call rate.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037713" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fd4aa3b657b4fb457ed1dfd54dc87e34f651e8b", "sources": [ "DBLP" ], "title": "Towards \"Full Containerization\" in Containerized Network Function Virtualization", "venue": "ASPLOS", "year": 2017 }, "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5": { "authors": [ { "ids": [ "12142608" ], "name": "Xiao Wang" }, { "ids": [ "2424723" ], "name": "Samuel Ranellucci" }, { "ids": [ "2620997" ], "name": "Jonathan Katz" } ], "doi": "10.1145/3133956.3133979", "doiUrl": "https://doi.org/10.1145/3133956.3133979", "entities": [ "Authentication", "Boolean circuit", "Computation", "Data pre-processing", "Experiment", "Garbled circuit", "High-level programming language", "Scalability", "Secure multi-party computation" ], "id": "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "inCitations": [ "05a6887c6142326263cd32304e222f22e35bff17", "3fab56c42fe6efc0b4febd21596ef89188f5d21a", "6db9824d4667b22310c51fe638403238f873e9f2", "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69" ], "journalName": "", "journalPages": "39-56", "journalVolume": "", "outCitations": [ "2484330f40e318036d81c63974b5c28fe9c31f99", "b0e32f83369313c18e3ad38b47a0f0dbe42decac", "3fab56c42fe6efc0b4febd21596ef89188f5d21a", "5efa700b61efac0b571da693e06d0af085f7344c", "842eb3de44e0538769f1509d1b8d35161fb212bb", "411e4ecb35e5385ed0c88a36f0b2821c42af8f70", "9596178a19c4770164ac8c095b947021976f70d7", "33148623fc14ea5735e73dd716d030ab17118299", "816a5e6b0b7ebf6af116057c2dc94eb03fd73050", "19c3736da5116e0e80a64db35afe421663c4b4a8", "db0f82a419f89cda64fcbec2c58137862cd04475", "3cb55d539b232e309f4a5974148ec6f22afb5888", "94f133780f7c4b09e2513628e5cebe67c009b7d5", "13e5ca27f887c2be2795cdb335201c4c247c60f3", "1a68d1bbb2eab66239e51b26b7636c453f505b3b", "9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b", "444630ced6bda572461744423ff420106472d5e3", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "04948723dec0e6724777ee56f0d10168cce44921", "7dd5a9a774b96ef8f551ded6418fe8adf28e8952", "1c07a74467c912602b33f28e90abd6eeaa60af6d", "13e622fca1a6b52aa85898e260f9455e4ba0d94b", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "8ad88f65222febc015b2a74d7c75b835c617ad4a", "b0dfc6d7324678f6973d2ece8ea264cef6f5ce60", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "36250592849fc8dc50b3b5df0a72a8b072ce34e4", "a853e0842d74fa3ff146f45ea7f2ed52dac08d1a", "ada825ba76ae506dd30092c99af702ec3859272a", "796ff7cef7dcd8b9c577a86473fc1067e1078144", "6e4a81752657b66a65e23a60e9741240486555b7", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43" ], "paperAbstract": "We propose a new, constant-round protocol for multi-party computation of boolean circuits that is secure against an arbitrary number of malicious corruptions. At a high level, we extend and generalize recent work of Wang et al. in the two-party setting. Namely, we design an efficient preprocessing phase that allows the parties to generate authenticated information; we then show how to use this information to distributively construct a single \"authenticated\" garbled circuit that is evaluated by one party.\n Our resulting protocol improves upon the state-of-the-art both asymptotically and concretely. We validate these claims via several experiments demonstrating both the efficiency and scalability of our protocol: ", "pdfUrls": [ "https://obj.umiacs.umd.edu/papers_for_stories/wang_ACMCCS2017.pdf", "http://doi.acm.org/10.1145/3133956.3133979", "http://eprint.iacr.org/2017/189.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "sources": [ "DBLP" ], "title": "Global-Scale Secure Multiparty Computation", "venue": "CCS", "year": 2017 }, "3fde9f175dd91ebd5accbb337c9abdefef31c81b": { "authors": [ { "ids": [ "9623412" ], "name": "El Mahdi El Mhamdi" }, { "ids": [ "1727558" ], "name": "Rachid Guerraoui" } ], "doi": "10.1109/IPDPS.2017.66", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.66", "entities": [ "Artificial neural network", "Artificial neuron", "Byzantine fault tolerance", "Computation", "Distributed computing", "Failure rate", "Fault tolerance", "Neural Networks", "Neuron", "Social network", "Synaptic Package Manager" ], "id": "3fde9f175dd91ebd5accbb337c9abdefef31c81b", "inCitations": [ "3c93f07a402dd865c86604628c3b019de2f484d9", "18f2a39119a83297bfd2d86bc78fb25e5dcc736f", "6b25106ad8f0a8167516921c3d3966c89f639d13", "3bcba9b71275c764c95ccf6b202cfdec1b233dd7" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1028-1037", "journalVolume": "", "outCitations": [ "563e821bb5ea825efb56b77484f5287f08cf3753", "01d47976b70be92ed2a5e06f8bf19a3af5439aad", "11951e035379c1dc039af868fbaba8d9737702c3", "38ad1e2d6e85d2e68901fe20c45769fa343726d1", "34f25a8704614163c4095b3ee2fc969b60de4698", "58ceeb151558c1f322b9f6273b47e90e9c04e6b1", "126df9f24e29feee6e49e135da102fbbd9154a48", "04113e8974341f97258800126d05fd8df2751b7e", "171ef6765ddf9d22806146d8327ba082028ec32f", "f74ded11f72099d16591a1191d72262ae6b5f14a", "443044b5873706aadaeeb87c8594528d83687462", "351ff36dff3c2f8b4bf0b49c0d1a46042e4e0716", "17e1bb7fc17b45fe5ad8724a635d285ed000efa8", "17c0a7de3c17d31f79589d245852b57d083d386e", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "1766e97c83f698ce3a292bc851a3bdee8179fba6", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "80c593a0668f4eb157a525831b7daad3bdb44381", "64192e5b1ce902b9dc397c53641d876a8a5506cd", "43cab718dbfdb9e9b0a515e897f8e26f3e0ac935", "2c94451cfcd8399fd0030c1846a5a012b49660a6", "f4416d7035103e9d39622178b65f229d68bb6a96", "4dc1641582a60abdc66a9d818c313a9d783a74be", "0122e063ca5f0f9fb9d144d44d41421503252010", "891e61c55b49dc55e95c4ed1803cd0801df02d00", "222e13bfa2eddcfe9a3eb6895f05186a3bd05b22", "d12d1289d2384c2ce642f01855637b9f0519e189", "251b6726fcad6b853de3d22939a4b2e36ef802ea", "b05b2d3a93cf01229cfbe124475d70674d91134d", "e637d66ac3c76f805b4873095a748d1035f1f435" ], "paperAbstract": "Neural networks have been traditionally considered robust in the sense that their precision degrades gracefully with the failure of neurons and can be compensated by additional learning phases. Nevertheless, critical applications for which neural networks are now appealing solutions, cannot afford any additional learning at run-time. In this paper, we view a multilayer neural network as a distributed system of which neurons can fail independently, and we evaluate its robustness in the absence of any (recovery) learning phase. We give tight bounds on the number of neurons that can fail without harming the result of a computation. To determine our bounds, we leverage the fact that neuralactivation functions are Lipschitz-continuous. Our bound isgiven in the form of quantity, we call the Forward ErrorPropagation, computing this quantity only requires looking atthe topology of the network, while experimentally assessingthe robustness of a network requires the costly experiment oflooking at all the possible inputs and testing all the possibleconfigurations of the network corresponding to different failuresituations, facing a discouraging combinatorial explosion. We distinguish the case of neurons that can fail and stop their activity (crashed neurons) from the case of neurons that can fail by transmitting arbitrary values (Byzantine neurons). In the crash case, our bound involves the number of neuronsper layer, the Lipschitz constant of the neural activationfunction, the number of failing neurons, the synaptic weightsand the depth of the layer where the failure occurred. In thecase of Byzantine failures, our bound involves, in addition, thesynaptic transmission capacity. Interestingly, as we show inthe paper, our bound can easily be extended to the case wheresynapses can fail. We present three applications of our results. The first is aquantification of the effect of memory cost reduction on theaccuracy of a neural network. The second is a quantification ofthe amount of information any neuron needs from its precedinglayer, enabling thereby a boosting scheme that prevents neuronsfrom waiting for unnecessary signals. Our third applicationis a quantification of the trade-off between neural networksrobustness and learning cost.", "pdfUrls": [ "https://infoscience.epfl.ch/record/217561/files/When_Neurons_Fail_1.pdf", "http://infoscience.epfl.ch/record/217561/files/When_Neurons_Fail_1.pdf", "http://infoscience.epfl.ch/record/217561/files/When_Neurons_Fail.pdf", "https://arxiv.org/pdf/1706.08884v1.pdf", "https://doi.org/10.1109/IPDPS.2017.66", "http://arxiv.org/abs/1706.08884" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/3fde9f175dd91ebd5accbb337c9abdefef31c81b", "sources": [ "DBLP" ], "title": "When Neurons Fail", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4011e818dc77fbc8cf5412154b519d3425bf33c2": { "authors": [ { "ids": [ "2368056" ], "name": "Yaacov Y. Weiss" }, { "ids": [ "39279200" ], "name": "Sara Cohen" } ], "doi": "10.1145/3034786.3056112", "doiUrl": "https://doi.org/10.1145/3034786.3056112", "entities": [ "Analysis of algorithms", "File select", "Join (SQL)", "Numerical analysis", "Overfitting", "Parameterized complexity", "Reverse engineering" ], "id": "4011e818dc77fbc8cf5412154b519d3425bf33c2", "inCitations": [ "69f53559815f4645ec3e358b40e611ad3ad36a8c" ], "journalName": "", "journalPages": "151-166", "journalVolume": "", "outCitations": [ "06733016ddea7d2f5ff4117031e5e644a2dd2d96", "d50570e61f54a98d548f581eb047eab43cabcd3d", "0c9c1933b74fc499fc152787ad72f7168bd7633c", "6a95af4027feaf177837de186eaaa9b19adfd734", "a1843173909eaa253f5a7f147752c8cd4b0e5d71", "2529bb021fda27e1b4012ea62d68c26fd53efcd3", "0d099dd87a281eb75b915ef07371cc2c429e449a", "18199ef67e170ba4f379ab45a8cc2581994e9178", "1ea471965c62072fdd52f1c4f3cc3df89ca6ca51", "3fbde7139bf3c0f4490d8aa14c0f773028008b51", "3e68d730b678eb4994e46fb5b4edeaa2c5740ad8", "e249d55f83c87bb72aa25abae016ff0a5e4b3ad0", "1573c88814effcc449bbe5c2b6733151bed80672", "1495d20f1cc8100a00252636287df5be4f015ca9", "e16333259bb4a688fe2568f0d757299846a3e696", "38804fb24e353117159c329eed56fa4e72a71c56", "61b0d9887dbdf4645e310be77e9e01bcad1344cc", "b4ed83b1ff5553baf6e090efa147c514fd17d918", "17a23aaab0a713b7863ada44eca0c252a243c6b1", "9dfa951bec812bd7b8c905c587bca50b7883a10f", "39e225be6b7c5a74e8b4928f4e5fa4a26baf5e90" ], "paperAbstract": "This paper investigates the problem of reverse engineering, i.e., learning, select-project-join (SPJ) queries from a user-provided example set, containing positive and negative tuples. The goal is then to determine whether there exists a query returning all the positive tuples, but none of the negative tuples, and furthermore, to find such a query, if it exists. These are called the satisfiability and learning problems, respectively. The ability to solve these problems is an important step in simplifying the querying process for non-expert users.\n This paper thoroughly investigates the satisfiability and learning problems in a variety of settings. In particular, we consider several classes of queries, which allow different combinations of the operators select, project and join. In addition, we compare the complexity of satisfiability and learning, when the query is, or is not, of bounded size. We note that bounded-size queries are of particular interest, as they can be used to avoid over-fitting (i.e., tailoring a query precisely to only the seen examples).\n In order to fully understand the underlying factors which make satisfiability and learning (in)tractable, we consider different components of the problem, namely, the size of a query to be learned, the size of the schema and the number of examples. We study the complexity of our problems, when considering these as part of the input, as constants or as parameters (i.e., as in parameterized complexity analysis). Depending on the setting, the complexity of satisfiability and learning can vary significantly. Among other results, our analysis also provides new problems that are complete for W[3], for which few natural problems are known. Finally, by considering a variety of settings, we derive insight on how the different facets of our problem interplay with the size of the database, thereby providing the theoretical foundations necessary for a future implementation of query learning from examples.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056112" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4011e818dc77fbc8cf5412154b519d3425bf33c2", "sources": [ "DBLP" ], "title": "Reverse Engineering SPJ-Queries from Examples", "venue": "PODS", "year": 2017 }, "40332ef0ae0dde6be56407416b632ed0e4d354c8": { "authors": [ { "ids": [ "2535238" ], "name": "Karl Bringmann" }, { "ids": [ "2755722" ], "name": "Ralph Keusch" }, { "ids": [ "2964328" ], "name": "Johannes Lengler" }, { "ids": [ "2957419" ], "name": "Yannic Maus" }, { "ids": [ "2255662" ], "name": "Anisur Molla" } ], "doi": "", "doiUrl": "", "entities": [ "Backtracking", "Computer Peripherals", "Computer science", "Dietary Iron", "Exhibits as Topic", "Experiment", "Fifty Nine", "Graph - visual representation", "Greedy algorithm", "Informatics", "Informatics (discipline)", "Milgram experiment", "Probability", "Random graph", "Routing", "SLPI protein, human", "Shortest path problem", "Small-world experiment", "Switzerland", "computer science" ], "id": "40332ef0ae0dde6be56407416b632ed0e4d354c8", "inCitations": [ "5ee1749819385bbe23863bfcca0c0f5a5a9ab71c", "dc55b39a1473f103456456a2482d8cd934a5e107", "7213222cf6dce25696add80a1b896cfa64e5111c", "8eb684f0c3bdda8a4d2fb6e6143a16e7d0efef9c", "e4cc7c07785464a58de1610e29f8495557599fd9" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "71af1a1141d344644acf1b4d2f1bb3b65a70e317", "0a8e6683a646b9f56fd8e105f69e96962f692c8f", "2a442410d949be2ec1b9cc4b87dcbbf0ff259e21", "9047465afe1e8bedb96a4fea6a517f6d57a55494", "70b127bdffb5747a59b25ba6d289cf169de7a38e", "50419f039d750295876025cb863363efe2dcf8da", "28cc7bdc3759836dbec332a23f33d86ba22d6911", "86c545d3793a9e99cf277afdea79ad9c49613a77", "a0ef2eedf37dab16dc30f6da75993dc1f938da97", "0cb9928daa3b90ac6959d9fb863c5f8ad6422dde", "1cdc9cd96b3544493e7db5e1adaef1a908fe5254", "bd6db9ddf236240fe88530a384dadef879e1cacf", "3b12ac2a7c4981710314086bdf0a19392b3129b0", "052715e9292df2bb62e95616ac6486fba7cbf72f", "16dde76c2ecc94c34004c640f603a8920c2fe1be", "7e3fe53a45fa3bd9440e265bec1b20847aaad325", "9f06ff9b1872d47b9ba20d888160330e09a71688", "310f63f56ac9a36d3e86c751997702680091dbb4", "1f9a344e33795e543e4cb39ebdd8cfb5b26da33d", "86f089e8a560b174d17d9a401338053f4fe84f7f", "26e4adf3958fccbac4639fd6ed46190d8e5c340f", "1ccbc50382d0e902e4c7d501e61607ee77f36d5f", "fa14b044dd9de93f46dbb9da7db617ae8d8d7d8f", "1537360a3e673a0eb28135279e97d462e5a792a4", "2bde20e5b469c96a90b34f67eeb4997b7a265ef0", "210403f065366de7dd6fc7e2f61c70d4e3d80067", "05d6e0185bcb48d396fe778ceedb2078e37e72ef", "c2e8eef6b7c811d84184493f311e07c4767dfada", "00f3f69339f1e202b0cef62aa1ab8b78f9f9f7da", "2403723e392cbe398c709ffed099e770cc0f38d4", "27993b651540af13a8a1dc6d7f956c5463c8d692", "34beb0bf0e60ef45b3504d73576202fc63b87afa", "241da88818870f658368703f8f3cb5b9c59d834f", "1002b31ed19a0e7373908fb620859a00bcdb1c90", "fec0420fb7c7a37d52a03ec76aacc54b79631f34", "0217083f3eea81788078e7f10fe8f13aa773bb68", "5c484fd5017a6e78f844b59f0a4971ed8ac0a9d4", "0f3d8ce6edec131e03784c18ae6e23a2ad55377b", "0617a139a4654d41cf276c3347d4fc2a1aaf9341", "327f0765b4ba25615a7942ac82f1c7ecd11d54c6", "06329b990ed3fbdfcff2fe37338c6d4ed4297484", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "04889344a3e52d7e9f85816b65e3028dfca7f8d0", "34124b5ebfea5e62e3141ead5df6604586031cb8", "9cfda3c4a45806683aa78822d00bc9f0c28e6b0a", "1d302ae4abe2e938080a79bfc11c7955e5597a0e", "eda46c4606ea782555b5598f54853eaeb05bc6b6", "2815d33a87e56dd63c9e6691e182c4ee3e01060f", "1fe4da32959efa5697751cb82855cc46e3bc516c", "031923dea69e3d40397b03c0485799e03326a73a", "3de2972503b0611ab81eddedfe13bd6268ad3587", "69b7446047ad88eca4b02247a279fb36411db2de", "a8039a838b2e352b5af9895abbc9e1d450fe9fac", "20bb5370e3c303ba8b5787e0ba8e9d9d278f66ec", "74584f149329207eca99cd95739905d69f6bd850", "24074dc0e10c094dcfc2c9c00e883cec024b89a1", "1f0a3074b78278670b91e13be9744b9baee27bd2", "ce8e861410208575319c52aafccddcd3c3e11f9f", "004b359c4200c0ae1551b3c819bd057ce2502fa7", "12d0b0e13054103f8831aee7ca27d69bb1341ad6", "aa44b70b417f9725b4d2cc6bd4582347b9339e03", "e3f215bd61db9ef5eb05aef82d59bfac2684c53c" ], "paperAbstract": "The algorithmic small-world phenomenon, empirically established by Milgram\u2019s letter forwarding experiments from the 60s [59], was theoretically explained by Kleinberg in 2000 [46]. However, from today\u2019s perspective his model has several severe shortcomings that limit the applicability to real-world networks. In order to give a more convincing explanation of the algorithmic small-world phenomenon, we study decentralized greedy routing in a more flexible random graph model (geometric inhomogeneous random graphs) which overcomes all previous shortcomings. Apart from exhibiting good properties in theory, it has also been extensively experimentally validated that this model reasonably captures real-world networks. In this model, the greedy routing protocol is purely distributed as each vertex only needs to know information about its direct neighbors. We prove that it succeeds with constant probability, and in case of success almost surely finds an almost shortest path of length \u0398(log log n), where our bound is tight including the leading constant. Moreover, we study natural local patching methods which augment greedy routing by backtracking and which do not require any global knowledge. We show that such methods can ensure success probability 1 in an asymptotically tight number of steps. These results also address the question of Krioukov et al. [51] whether there are efficient local routing protocols for the internet graph. There were promising experimental studies, but the question remained unsolved theoretically. Our results give for the first time a rigorous and analytical affirmative answer. \u2217Max-Planck-Institute for Informatics, Saarbr\u00fccken, Germany, kbringma@mpi-inf.mpg.de \u2020Department of Computer Science, ETH Zurich, Switzerland, rkeusch@inf.ethz.ch \u2021Department of Computer Science, ETH Zurich, Switzerland, lenglerj@inf.ethz.ch \u00a7Department of Computer Science, University of Freiburg, Germany, yannic.maus@cs.uni-freiburg.de \u00b6Department of Computer Science, University of Freiburg, Germany, armolla@cs.uni-freiburg.de ar X iv :1 61 2. 05 53 9v 3 [ cs .S I] 1 9 Fe b 20 17", "pdfUrls": [ "https://arxiv.org/pdf/1612.05539v2.pdf", "https://arxiv.org/pdf/1612.05539v3.pdf", "https://arxiv.org/pdf/1612.05539v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b29a/adcfab8a0437041bbc86b853653603a1765f.pdf", "s2Url": "https://semanticscholar.org/paper/40332ef0ae0dde6be56407416b632ed0e4d354c8", "sources": [], "title": "Greedy Routing and the Algorithmic Small-World Phenomenom", "venue": "", "year": 2016 }, "408374a349e307878091cc44f83246dd3f5b9be7": { "authors": [ { "ids": [ "2144291" ], "name": "Haoyuan Zhang" }, { "ids": [ "5423026" ], "name": "Huang Li" }, { "ids": [ "2878256" ], "name": "Bruno C. d. S. Oliveira" } ], "doi": "10.1145/3136014.3136016", "doiUrl": "https://doi.org/10.1145/3136014.3136016", "entities": [ "Abstract syntax tree", "Combinatory logic", "Compiler", "Extensibility", "Parser combinator", "Parsing", "Software engineering", "TRAVERSE", "Type safety", "Types and Programming Languages" ], "id": "408374a349e307878091cc44f83246dd3f5b9be7", "inCitations": [], "journalName": "", "journalPages": "2-13", "journalVolume": "", "outCitations": [ "8ded2ccf3baf49a9025330abb14d41f58141a746", "7eaf2903b047b9402cc2ef7fa125fcaeb45fcd0f", "2535dad02ef32007b9c02dec9621aa59eba3cb9e", "7692a19dd425903781e9ac29cbb715f23b3a9a80", "829d7ace27e2a95184051bb667b79e203946972b", "78b2cec16719ca27dfd23a0dc912696f21369a47", "12021148216a4b1405172889d6a391e85efde400", "3ed5235695f821c36d2264bb60a607cd163d2292", "0b7abaf33e48237022ae5a8bca859a0c8e2d448e", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "854d434e09a02d7f9fa6258a98eab5d4910832b0", "19518dbe94dcc5b39998b7f27f3e1fdc0652f0a8", "31eec0a79ee6d42481b6b3640881b5e9ca612e6a", "05459fe4d1c3a0b7621f604ee9ce0e1219be7c85", "714455987437ee28b4b531b4665c36b7b2c378d7", "6713ff1746e7f1ef12e7231acf6310007079f443", "288af592aab1ddcb715ca05842f84073a79a4368", "544c433c7711b5d6d44f85fe342d9ff09eb00c25", "66dbd3cf61357b98daf498184a1ad279b4f56d68", "736768fe05e6d114f9d0d2b10ba4a04db6c5ba75", "272ea844e4cf657a3c1c1f303a5e47de07ec4523", "12a449686a13b2b1c0ecfe4492a76b4ae0fe36ef", "2f176f377081306c05e54db5a33270cb5cb1a1e3", "0e744a4fa57637988e3146fe213d9034e6c143e1", "fd3044e96c8e296afcf7a7b8ddd4dda7e8cdbd4d", "20752b24804cc244634e39345975c5b96e02eb78", "84af715301c5ee76cc32dd71305e48d308e25365", "8b895f7df0be6a8a13780284512efff968260c8a", "168a16c36878b84c0dedd5e5449d8c118224a630", "59e145dfef77797b57b28d71b019c2e8b6dd9f01", "1c2fe5234463c6d72359e2d77083bbe67943c36f", "451ead65a7260f50e4fa083f5e6c43182812fa46", "06860b9fcaa98b473053907ed11ef51f6925c7c7", "0954212d0d60a1053de84760d96df2f5dea6c208", "45f9a0ceba5d4aaa56525fa7beb4a9537bfe011c", "e9cf4cbee9a0409f4828388a44cf32ae3c2d9b22", "5c6f64b0e4746d64718700c2413f1831c6d936d8", "8971f289b8af593d88e03f35491a780a38491b2d", "840f54df8023023de2c60c79d230e1942dc8cb58", "81ca7ac75b1ccdee0bd8a9e93b7d79bdc17a31a9", "869172852497052bc81d041c914c328bb16561ca", "6d5a3e0b92a121bbc85558601d4c6704e0f6aa88", "a11f81fc27df87459f2533ad354184de9d04046c" ], "paperAbstract": "Over the years a lot of effort has been put on solving extensibility problems, while retaining important software engineering properties such as modular type-safety and separate compilation. Most previous work focused on operations that traverse and process extensible Abstract Syntax Tree (AST) structures. However, there is almost no work on operations that build such extensible ASTs, including parsing. \nThis paper investigates solutions for the problem of modular parsing. We focus on semantic modularity and not just syntactic modularity. That is, the solutions should not only allow complete parsers to be built out of modular parsing components, but also enable the parsing components to be modularly type-checked and separately compiled. We present a technique based on parser combinators that enables modular parsing. Interestingly, the modularity requirements for modular parsing rule out several existing parser combinator approaches, which rely on some non-modular techniques. We show that Packrat parsing techniques, provide solutions for such modularity problems, and enable reasonable performance in a modular setting. Extensibility is achieved using multiple inheritance and Object Algebras. To evaluate the approach we conduct a case study based on the â\u0080\u009cTypes and Programming Languagesâ\u0080\u009d interpreters. The case study shows the effectiveness at reusing parsing code from existing interpreters, and the total parsing code is 69% shorter than an existing code base using a non-modular parsing approach.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136016", "http://i.cs.hku.hk/~bruno/papers/sle17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/408374a349e307878091cc44f83246dd3f5b9be7", "sources": [ "DBLP" ], "title": "Type-safe modular parsing", "venue": "SLE", "year": 2017 }, "408ba239cece0308dbd180d86ee217d3c8d0b855": { "authors": [ { "ids": [ "3193053" ], "name": "Dmitrii Kuvaiskii" }, { "ids": [ "2333637" ], "name": "Oleksii Oleksenko" }, { "ids": [ "1873477" ], "name": "Sergei Arnautov" }, { "ids": [ "7612177" ], "name": "Bohdan Trach" }, { "ids": [ "3025359" ], "name": "Pramod Bhatotia" }, { "ids": [ "1743906" ], "name": "Pascal Felber" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" } ], "doi": "10.1145/3064176.3064192", "doiUrl": "https://doi.org/10.1145/3064176.3064192", "entities": [ "AddressSanitizer", "Benchmark (computing)", "Compiler", "Confidentiality", "Heartbleed", "Intel MPX", "LLVM", "Memcached", "Memory safety", "Parsec (parser)", "SQLite", "Tagged pointer", "Thread (computing)" ], "id": "408ba239cece0308dbd180d86ee217d3c8d0b855", "inCitations": [ "287da0ab3c169c41433b0e5504161dfd1afbfa6c", "187e2d1c888c5c0529e5a50c8c90efe9889cbd69", "8ca1436fe1e9bbdb39a92178fa80c7869d92573d", "b89b8c0e9f63c64d3c6514125b65c98a0cbf7987", "a1a2fd4399e3b649a210c43d69f186c946bc90a2", "e5e0ae77487cea8272caa939608b7b4119b2b94e", "4c35de159c4e01a5b5cb37e5e892468aa03da476", "eac6a750de49b17ee443a44d296889706acaac6c", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "8569785f80712b5787e12b86a3870a28c0182b2c", "d72458f9501963670b50ee9fe78e622425955630", "38a54f9bbbfc46599770a28999365144a273783f", "46b19795bbda3b8c0c3e02896482fe61bb2943bc" ], "journalName": "", "journalPages": "205-221", "journalVolume": "", "outCitations": [ "79473986fe994d4aeb9d662e0b8e572758a4511b", "2f2128b60e15d87d4e565a8532076efa84fc752e", "9e235446d2c4fe77b56b4e0bfd1a645421d95f76", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "0a90f6db154f6a9f3565ee596d23da3196361454", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "0d0154d589205cc519607fbb142ecefe0f96aef0", "529d3d3fb82afe905c410e8a7b3fc9d09ca623c5", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "4c35de159c4e01a5b5cb37e5e892468aa03da476", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "7926d0b9dfc36c13910a1850cd91a7db862f0014", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "933dce04cfe0f8e21c5b3d3eaf4595891e695b4f", "acf32e644db8c3ac54834d294bba4cf46551480a", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "1f333ade9620ad695556353d5a052f1c71ae297b", "e23298e18aa92ac43fa941d0f5eacb339905b685", "272a1f065adb2fa98b3c4df55787db944028791c", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "01a2d5c69a09ec3fa82de6dfe12811f3d981ab7e", "0719b9670c8580db76547497df39caabdc20fc32", "5007b598ed2c118bdf14c0a7562b6c4fb7974742", "0d4843d31be7198a94a68b00488148d4fa693567", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "d296252ddf0e2c6b7422008d703843c1863bd15b", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "0a289fd7b14345822b1acda6d82750b15d59663e", "04363665e3f99a839c051d938fc8782f1be574fe", "4ced5380095c3f659a15e0f5b16061713f630c2e", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "7a9f655133788b2bd23c1171683f81b702b4b5b6", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "00f975d2b23b96de697096f3f16eeb76156d8bf9", "1378b35dc3ca7a65922defc80b8960440bd4b325", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "0657eb7e069c2c2c7cae6636704e0f7fb3bcd9fc", "31dc6002c403d930375a6e8a03e46fa3aa796733", "635f3a25ca8626072d1eedc6aebddcb429de4b4e", "4d75cd2764c45baf46c72fddc5c676fdfce6f60e", "6b6fae57882fd193461fca64654107068ce9fd9a", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "116eaac2e498bc2c9bea10ea838309dcf143d764", "20083a475bda69032e69372569ada9d9ad9d6e4b", "0988a425689f6f3700e797f4a2c18f73692573c3", "3ca369fa2cadb403db7ac5e75deefd9acbb10723", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "5f8bbc28027342b16df77fa1b9a1efe4628d41dc", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "c2d68a3dd269d4a0d2dbe0314797ccb410589602", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "9a8bf1a6e4e71f59620a53b0637c38a416966c4b", "eac6a750de49b17ee443a44d296889706acaac6c", "5ddc6a439cdc9b4eaebdad8c20976f1f0be4523f", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "d988e247c13edf67429a3c823172e2c2b7e30e00", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "528c068c8a28229ed37fbba555dca55283b3ca26" ], "paperAbstract": "Shielded execution based on Intel SGX provides strong security guarantees for legacy applications running on untrusted platforms. However, memory safety attacks such as Heartbleed can render the confidentiality and integrity properties of shielded execution completely ineffective. To prevent these attacks, the state-of-the-art memory-safety approaches can be used in the context of shielded execution.\n In this work, we first showcase that two prominent software- and hardware-based defenses, AddressSanitizer and Intel MPX respectively, are impractical for shielded execution due to high performance and memory overheads. This motivated our design of SGXBounds---an efficient memory-safety approach for shielded execution exploiting the architectural features of Intel SGX. Our design is based on a simple combination of tagged pointers and compact memory layout.\n We implemented SGXBounds based on the LLVM compiler framework targeting unmodified multithreaded applications. Our evaluation using Phoenix, PARSEC, and RIPE benchmark suites shows that SGXBounds has performance and memory overheads of 17% and 0.1% respectively, while providing security guarantees similar to AddressSanitizer and Intel MPX. We have obtained similar results with SPEC CPU2006 and four real-world case studies: SQLite, Memcached, Apache, and Nginx.", "pdfUrls": [ "http://se.inf.tu-dresden.de/pubs/papers/sgxbounds2017.pdf", "http://doi.acm.org/10.1145/3064176.3064192" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/408ba239cece0308dbd180d86ee217d3c8d0b855", "sources": [ "DBLP" ], "title": "SGXBOUNDS: Memory Safety for Shielded Execution", "venue": "EuroSys", "year": 2017 }, "40b2652cf3bdee159dacb6e18c761003c31f4205": { "authors": [ { "ids": [ "40155297" ], "name": "Yongjoo Park" }, { "ids": [ "9949978" ], "name": "Ahmad Shahab Tajik" }, { "ids": [ "1725561" ], "name": "Michael J. Cafarella" }, { "ids": [ "2198667" ], "name": "Barzan Mozafari" } ], "doi": "10.1145/3035918.3064013", "doiUrl": "https://doi.org/10.1145/3035918.3064013", "entities": [ "Approximation", "Approximation algorithm", "Database", "Experiment", "Programming paradigm", "SQL" ], "id": "40b2652cf3bdee159dacb6e18c761003c31f4205", "inCitations": [ "39b62c7fc926127d11f6d60d78066ef9d9564a55", "18217d68fca6b1f5305c80a733a4a717e3e35052", "2b6a2ec50b841f435a89b1711001ee8bf776a760", "140de9a2a670d2468cfeb4d0c5c677cf64c80866", "ad64649f20cc20a2d1584cbc4b859d9fa9920538" ], "journalName": "", "journalPages": "587-602", "journalVolume": "", "outCitations": [ "ed3249f83fafbf2b0224a664e4617c614ff410d5", "0beca56d0260ffa0c68d17b7e90ccff42b820076", "38050cf2f8289da85efada8baa0933ba5224ac7e", "115ac1e107a0a1da87e12455de7e3f645fd00836", "1367718ca1be8161667d2880ed4eb19a7beba058", "b41761392859fdc7a4345e8b5d259c3c62c94740", "4e3c1f3904d4b5404a03b6101370841f7c4798d5", "0c88a6f41374306058ea8f19a3eeac5b6ce6650e", "1dde3f45a23efe5c7fa9b400e81388296527c6da", "483015dc170d20e1a19828d493eff364cd7a42ce", "0c0cfae57d32de295292fff6e67b2a22001bde23", "78f9ce506df537b8c36b49857123d90bf819a860", "21cc08ddc2644290c3c9bad27321ba7187a0b0b0", "4969f8f0feeb77128bd96cd15fc6ee323fb0f653", "22fa7b136662f51f712bdc9ccbb811a08327805f", "2d03baec8ac1568e6813aa43d625d552524f977e", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "30c70576767b3b8c1612891b98f43212e690e61c", "0d1f1d0984b3e8d6b8e9169c05e2a7d29e2a32d0", "37861bb8d8daeab11d379a012ab526222a3f9990", "1ad1019d9eb5fb0f1833b8a98a52aee4a14fecdd", "617e05daeaee61af81dfa4413e3b1d5e0c077523", "4c815e21c909211d7c047a2437938b24217e0a22", "2889dbb1e5770e1eb6e5e3087be5be5841b11fa8", "37601bb6e655f2392ba1ca2086da0d1e03e19edc", "1a1280a047c380ddf281d486f7fe644c7fc95fbb", "6017608b32421ce761c2baae98dc752ec22a0dbf", "b55b8fcafc62be1ca5db4864200cf0dec0a9941e", "da696840f88ae88eb15828e5a22518447e756aab", "ad45ffbf9ba7e7c223b4f4b24c56f31e965eeea2", "01f6f9e17988133989b39da37801f6c674df1040", "5ccb180e8142da6c1a04fb1e1477d7f1c4867c0d", "2069e42716f16b01e0085b8715e40f8609357485", "8631ae4a69f409ed09f451867c3ef4bf17129b79", "1b19eb5c3ed02dcda18b9300f2804bda0a4c94e6", "266907e673944ff0bd4486fe6f8b4f14a3c8c9f6", "1317f7e3d1de6ffd0888303ca95d9c8c6bae2af3", "c372964be3e6482d714d2be716da6f6f80f43575", "5f3f9223c5c9f896be099bc177929febad508407", "4af182338ee63754d4569c26cb6a5c3bbdd8cf2a", "999b2f918f4a68b7d71d90a10a18e3af08dafc24", "5bfcf932d67b8abc128f41d57d10ea6292615599", "7a278ee0578f194700cadc3811cdda4ec751f88a", "148b83a379a549e3e88fc9506e91e14b09756c7d", "080ed793c12d97436ae29851b5e34c54c07e3816", "b63eaa5aab7788f783bc0f25c94c4eca7f19313b", "3f14887cc4e65aecd6fc571eef2774a19e6ecb6a", "cc2bc224cc1c8ef3d5e644f971b02ea30e1a4ca6", "26337762b9b06c7d8a952bebab6408a5e7f9935d", "3357eede86832c2344e9a255e0520a01b6189393", "4f280d6f6b2af06bc7c7a34e4f610608e9f82156", "65c46b04244c194aafe5cff074e824b4aad081ce", "7535340c5c62f40d5eccce082537a16cb8e03b27", "0f466df201a4ae105724cf16c868024134a96727", "7e209751ae6f0e861a7763d3d22533b39aabd7eb", "9d774d272dc7627abeca901a35b2c92958ee002e", "6a912e2c1f818a047bc620f475b6b6e3b0dbacfe", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "4b163245cdc7a1d80ded5e26424fb382910965b9", "befb28484716491fa8e06454a3b92c14ed4ed039", "37e0d25940bd49022c41e63909532acd88eb16b9", "d89131b5939b51f191d5efef3f18258e1135845c", "4b66ce55c4a8b5c6dfa804146ad32a5a0797d7ec", "a5afa88e7ae2c9a455e7a006e6380f25b3d3728f", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "eed62d36d1b976ac3873c83645f1c25f5096f89c" ], "paperAbstract": "In today's databases, previous query answers rarely benefit answering future queries. For the first time, to the best of our knowledge, we change this paradigm in an approximate query processing (AQP) context. We make the following observation: the answer to each query reveals some degree of knowledge about the answer to another query because their answers stem from the same underlying distribution that has produced the entire dataset. Exploiting and refining this knowledge should allow us to answer queries more analytically, rather than by reading enormous amounts of raw data. Also, processing more queries should continuously enhance our knowledge of the underlying distribution, and hence lead to increasingly faster response times for future queries.\n We call this novel idea---learning from past query answers---Database Learning. We exploit the principle of maximum entropy to produce answers, which are in expectation guaranteed to be more accurate than existing sample-based approximations. Empowered by this idea, we build a query engine on top of Spark SQL, called Verdict. We conduct extensive experiments on real-world query traces from a large customer of a major database vendor. Our results demonstrate that database learning supports 73.7% of these queries, speeding them up by up to 23.0x for the same accuracy level compared to existing AQP systems.", "pdfUrls": [ "http://web.eecs.umich.edu/~mozafari/php/data/uploads/dbl_techreport.pdf", "http://web.eecs.umich.edu/~mozafari/php/data/uploads/sigmod_2017_dbl.pdf", "http://arxiv.org/abs/1703.05468", "http://web.eecs.umich.edu/~michjc/papers/ypark_sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3064013", "https://arxiv.org/pdf/1703.05468v2.pdf", "https://arxiv.org/pdf/1703.05468v1.pdf", "http://yongjoopark.com/resources/dbl-sub.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40b2652cf3bdee159dacb6e18c761003c31f4205", "sources": [ "DBLP" ], "title": "Database Learning: Toward a Database that Becomes Smarter Every Time", "venue": "SIGMOD Conference", "year": 2017 }, "40bf84c1c2509c8dc14fd233fe6b93c431d8df9f": { "authors": [ { "ids": [ "3419933" ], "name": "Jordi Wolfson-Pou" }, { "ids": [ "1978328" ], "name": "Edmond Chow" } ], "doi": "10.1145/3126908.3126966", "doiUrl": "https://doi.org/10.1145/3126908.3126966", "entities": [ "Algorithm", "Deadlock", "Distributed computing", "Distributed memory", "Iteration", "Iterative method", "Jacobi method", "Multigrid method", "Preconditioner", "Smoothing" ], "id": "40bf84c1c2509c8dc14fd233fe6b93c431d8df9f", "inCitations": [], "journalName": "", "journalPages": "48:1-48:13", "journalVolume": "", "outCitations": [ "c6207645e4b1cc91e8fad537e2c0637dab102f21", "0b8372e651763eee5c1aa8706123e59d99e1fd15", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "2bd4c65d88b0776e3efebeec4fc0fa1df3a529e8", "c4e0c55469d70a83c5ed7ef5e3b062c56c21b596", "1f19f03fc3a11bc2a7a0265c5d1cf71990e662b0", "693cb000198eafd44a6f02d96e2dc2126bd645e5", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "6a067c116b1b07078c85efcd6667c4598668f71d", "1e0b241ffd46adc5f7f98c8b9d156336e25ab6c4", "5fb372ffd91b5cd5ad8826bad932cf9badc554cc" ], "paperAbstract": "We present a new algorithm, the Distributed Southwell method, as a competitor to Block Jacobi for preconditioning and multigrid smoothing. It is based on the Southwell iterative method, which is sequential, where only the equation with the largest residual is relaxed per iteration. The Parallel Southwell method extends this idea by relaxing equation i if it has the largest residual among all the equations coupled to variable i. Since communication is required for processes to exchange residuals, this method in distributed memory can be expensive. Distributed Southwell uses a novel scheme to reduce this communication of residuals while avoiding deadlock. Using test problems from the SuiteSparse Matrix Collection, we show that Distributed Southwell requires less communication to reach the same accuracy when compared to Parallel Southwell. Additionally, we show that the convergence of Distributed Southwell does not degrade like that of Block Jacobi when the number of processes is increased.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126966" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40bf84c1c2509c8dc14fd233fe6b93c431d8df9f", "sources": [ "DBLP" ], "title": "Distributed southwell: an iterative method with low communication costs", "venue": "SC", "year": 2017 }, "40dc09f5fbd3776c3f34adedc7a4718307ace0d6": { "authors": [ { "ids": [ "36937479" ], "name": "Jian Huang" }, { "ids": [ "1688881" ], "name": "Jun Xu" }, { "ids": [ "1680554" ], "name": "Xinyu Xing" }, { "ids": [ "1687577" ], "name": "Peng Liu" }, { "ids": [ "1740036" ], "name": "Moinuddin K. Qureshi" } ], "doi": "10.1145/3133956.3134035", "doiUrl": "https://doi.org/10.1145/3133956.3134035", "entities": [ "Algorithm", "Antivirus software", "Backup", "Data recovery", "Encryption", "Experiment", "Firmware", "Flash memory", "Garbage collection (computer science)", "Kernel (operating system)", "Malware", "Solid-state drive", "Terminate (software)" ], "id": "40dc09f5fbd3776c3f34adedc7a4718307ace0d6", "inCitations": [], "journalName": "", "journalPages": "2231-2244", "journalVolume": "", "outCitations": [ "3de112ed0b197ef2dfb828d1ed0bc91e24b1ab76", "2ab731e0263229327d43a4e716ac6d7f0473a56d", "a94be9e2c70ca70573ab48287cb982cf67c2cb7f", "31ceeced5d23193c369b98170c45e66bae6ff77d", "088e3e939ad234b6fdd0e321290fb26937dc2553", "61ea7ef665186310a24af134441de0a18b6c351e", "418e5e5e58cd9cafe802d8b679651f66160d3728", "6b10253d92c050eeba9c650d58b2bc08bac50e4b", "24d0eb102394abeb5c6728b5682e43c9c4448484", "1625f94b9a74f01fb00b4f10baac1ae84f450624", "0870848b1193987a50ee9c8427c4f597af6fda1b", "394cc3b1bfc88d7bbe4b0f120004d95d0c966df1", "010bf8e639dbdee2c31a58ca9b65e89aeac11315", "1820a34042d6371a9e20484b0c63b698eb522a6c", "5b5608919bbc69786c1e5946781921efc91ac5c6", "957ae212c16ea9a70a53d1143e0f8a908a496648", "1e102df57ec826f0afee0dda578551e3da3b7289", "3f9d4a16ec5d08c0309df743e73745f876b9abfa", "0cb9e2cee074684b04ad7567cbcdf1bc83ef7645", "41e5b42b7e6c6bb5468d3aaae44279156b135fbf", "05a1bad1ef2341339e18d636d78594226d4ee8e6", "6ea84cc03a14601101bbbcb045dd73ba6ee55858", "6fd3c5146fc90d4ce14cafc85f3a92be40f22213", "70d687ca03cb47731bf4424c36adc09b11847e68", "4468cbc8a9ad13ebeaa210424e842f158415ab07", "6bd33530e5dcd90f367748bf88ba1ff337b7caf4", "19ffc4f5129ed9d39f498f4eb901024c514263c7", "07d799dfb834fab9059ae57d86c0ab772faa11b3", "80c09f309e783a88e6ab951bd0fedf90910be92e", "307d887b872e7282b639ca38a8f3b18ca156c68e" ], "paperAbstract": "Encryption ransomware is a malicious software that stealthily encrypts user files and demands a ransom to provide access to these files. Several prior studies have developed systems to detect ransomware by monitoring the activities that typically occur during a ransomware attack. Unfortunately, by the time the ransomware is detected, some files already undergo encryption and the user is still required to pay a ransom to access those files. Furthermore, ransomware variants can obtain kernel privilege, which allows them to terminate software-based defense systems, such as anti-virus. While periodic backups have been explored as a means to mitigate ransomware, such backups incur storage overheads and are still vulnerable as ransomware can obtain kernel privilege to stop or destroy backups. Ideally, we would like to defend against ransomware without relying on software-based solutions and without incurring the storage overheads of backups.\n To that end, this paper proposes FlashGuard, a ransomware tolerant Solid State Drive (SSD) which has a firmware-level recovery system that allows quick and effective recovery from encryption ransomware without relying on explicit backups. FlashGuard leverages the observation that the existing SSD already performs out-of-place writes in order to mitigate the long erase latency of flash memories. Therefore, when a page is updated or deleted, the older copy of that page is anyway present in the SSD. FlashGuard slightly modifies the garbage collection mechanism of the SSD to retain the copies of the data encrypted by ransomware and ensure effective data recovery. Our experiments with 1,447 manually labeled ransomware samples show that FlashGuard can efficiently restore files encrypted by ransomware. In addition, we demonstrate that FlashGuard has a negligible impact on the performance and lifetime of the SSD.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134035", "http://memlab.ece.gatech.edu/papers/CCS_2017_1.pdf", "http://memlab.ece.gatech.edu/slides/CCS_2017_1_slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40dc09f5fbd3776c3f34adedc7a4718307ace0d6", "sources": [ "DBLP" ], "title": "FlashGuard: Leveraging Intrinsic Flash Properties to Defend Against Encryption Ransomware", "venue": "CCS", "year": 2017 }, "40f137ea7a002685fb9cbf0fb04086e96904619c": { "authors": [ { "ids": [ "1757205" ], "name": "Vasileios Giotsas" }, { "ids": [ "2897689" ], "name": "Christoph Dietzel" }, { "ids": [ "2088273" ], "name": "Georgios Smaragdakis" }, { "ids": [ "1782612" ], "name": "Anja Feldmann" }, { "ids": [ "1728325" ], "name": "Arthur W. Berger" }, { "ids": [ "2461898" ], "name": "Emile Aben" } ], "doi": "10.1145/3098822.3098855", "doiUrl": "https://doi.org/10.1145/3098822.3098855", "entities": [ "Border Gateway Protocol", "Colocation centre", "Downtime", "Failure rate", "Information source", "Internet", "Lightweight methodology", "Peering", "Real-time computing", "Routing", "Virtual private network" ], "id": "40f137ea7a002685fb9cbf0fb04086e96904619c", "inCitations": [ "f4357788267d36dc12b1d635a84bb0f0a3df8c85", "a3ef6a9c08657bca69687d5a61402e5124acc5e2", "32f0b3a3120b834ee82c5af744b91169f2f78ae4", "078a9e13d57159e2482b3d71c1c7639dcc093e7f", "3e8f77e2cc36ec33d25e85e3eb161dde17ed70d0", "625922f62451f1e3b556e6aceca588946880427a", "f8f7c2fb6c7ca2af10f94de9fba92fdad8601cf9" ], "journalName": "", "journalPages": "446-459", "journalVolume": "", "outCitations": [ "631118cd04caa1bca7a2010228447f25eb4378a9", "0ca7f8a8a1e6468ca9dad4ee0643ca4796a4eade", "5880497106a0e3b4a16fe5c0026673f8daade248", "033fb48ba30c7f40073a7c1c27f3baba1bec218f", "108aed364e3c849b5acc62e6d285a05e3fbbaa23", "36e3cca61206e1c5a226eda7ba663005b74c71da", "2d274fd896268c6e5ffbe4d9ec7dfbe7f0956b7a", "3e52e4d429e0d676f7cb4c5431cef915557062ab", "16c19d2f99609ce35673519d440a44161977963c", "6410b6cc29af234544f7706194aba20d6c4c90ae", "4f410ab5c8b12b34b38421241366ee456bbebab9", "23eab8551b95795afcc26767fcdc780198278e0e", "306a2933e91fdf8971bd160c5bbe365e48ec7fdd", "f258497c8e57a885656ed5125cc4d8c95c57cf57", "e57dd79802779cf73263ae7c3d6c2fb6425554f2", "73a6eb2ae5e9aa37babb95748c4d8ecee7efaf22", "0a7151c200bf97973453ec05a28012cf03cf906b", "6ed50af8cde99d0e797173d9fafb97cab4a5fd08", "13bf13f019632a4edb967635e72e3e140f89e90e", "09b4dae698495e8229171a64cd78b23f106de653", "41f6d924dd684966a9f6bc25e2266ce08cfdf4f4", "0fecce8b0cc8b52b69167d71921c02a003916e63", "7a0e7065d521e31e74fc367597db41b62b19a789", "0a03b67644a6411ab7ec73551aa27060b8e4ab1d", "1788428f03e3e7ad9f330fdf46be8d165cd5a453", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "160ce6681166d4b84f8c0bad7fb2aa25d7e04eb0", "23f88889f86013645965d91e96711e949405e763", "1813a88797677e684f51d72e349dae10674c087f", "50957ff587eeeae3e1f618bd0f9cf4c2924bfd85", "43374f791570a03f909ac9b6d43057a14565f4c5", "1d65193c80e49f6ea2ecc0eb8c331965f328df51", "29693d3abaab2c3cd1785a498f043859c52961ef", "0a59ef4961a6eea7354c6b6da90f251bb157879d", "1bef4d26c917f0060814e86cffa9b22bea70a847", "00fd3220a51630625be397c9114b71fda62ba3d9", "041b6d817c6c27b24d91ed84b842170f54c259da", "1f896c8476ad4ff8acec72893ca704009cefe41e", "4737f27f87c199c42266913fe78d9f541cc78d89", "6245ced0adb67151ea0f456d14cf002efc4e5c38", "2d1d82c89f4d5464d4e20e9694918941c030cb40", "1085045094f17ccdc8c4b25d28a257af98a0e38b", "4c68e99eb7993b60872851d74cd1d726c6abe438", "06970c7f0f92dcd6851ef2a4b9bfae672dbd7434", "0ecec13abda69472bdee76f60ab3d97601661af7", "4991ad5ceedfef67fc4be2b4d5fcae7f138ff59b", "1eb7e1f89e8038d595e96f1ba0d6bd02b30a4ed1", "6badb9f32ab0cd3342d84b12d7436326d501e8b2", "89f964daecf636845c317b92c028a83df060b2c1", "0e0178a2e6d1858b5c32242acc534515b772723c", "2daa396ebf3a77e11f88ca82b2cddec0eec8dfd6", "67eb93bb872a06ba9796d1b54e14f14e8d63e5ba", "02df3d50dbd1d15c38db62ff58a5601ebf815d59", "7266ec7df4fbdfc58ee8653bf224307c0577bc0a", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "369c18470c8be5fea7754f2e7a4659c582a0aadc", "71fda542b243f32b3c9f75317905b1ea1ceacce9", "29c1d53ac3861e1b95da32349c756b349f586ea2" ], "paperAbstract": "Peering infrastructures, namely, colocation facilities and Internet exchange points, are located in every major city, have hundreds of network members, and support hundreds of thousands of interconnections around the globe. These infrastructures are well provisioned and managed, but outages have to be expected, e.g., due to power failures, human errors, attacks, and natural disasters. However, little is known about the frequency and impact of outages at these critical infrastructures with high peering concentration.\n In this paper, we develop a novel and lightweight methodology for detecting peering infrastructure outages. Our methodology relies on the observation that BGP communities, announced with routing updates, are an excellent and yet unexplored source of information allowing us to pinpoint outage locations with high accuracy. We build and operate a system that can locate the epicenter of infrastructure outages at the level of a building and track the reaction of networks in near real-time. Our analysis unveils four times as many outages as compared to those publicly reported over the past five years. Moreover, we show that such outages have significant impact on remote networks and peering infrastructures. Our study provides a unique view of the Internet's behavior under stress that often goes unreported.", "pdfUrls": [ "http://www.caida.org/publications/presentations/2017/detecting_peering_infrastructure_outages_sigcomm/detecting_peering_infrastructure_outages_sigcomm.pdf", "http://people.csail.mit.edu/gsmaragd/publications/SIGCOMM2017/SIGCOMM2017.pdf", "http://doi.acm.org/10.1145/3098822.3098855", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-10-3-outages-in-wild.pdf", "http://people.csail.mit.edu/gsmaragd/publications/SIGCOMM2017/SIGCOMM2017-presentation.pdf", "http://people.csail.mit.edu/awberger/papers/Detecting_Peering_Infrastructure_Outages_in_the_Wild.pdf", "http://www.caida.org/publications/presentations/2017/detecting_peering_infrastructure_outages_ucla/detecting_peering_infrastructure_outages_ucla.pdf", "http://www.caida.org/publications/papers/2017/detecting_peering_infrastructure_outages/detecting_peering_infrastructure_outages.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40f137ea7a002685fb9cbf0fb04086e96904619c", "sources": [ "DBLP" ], "title": "Detecting Peering Infrastructure Outages in the Wild", "venue": "SIGCOMM", "year": 2017 }, "40f196e21a289394c4354961116587b8accba45e": { "authors": [ { "ids": [ "2725752" ], "name": "Mingzhe Hao" }, { "ids": [ "9751178" ], "name": "Huaicheng Li" }, { "ids": [ "32249376" ], "name": "Michael Hao Tong" }, { "ids": [ "26907030" ], "name": "Chrisma Pakha" }, { "ids": [ "3197683" ], "name": "Riza O. Suminto" }, { "ids": [ "22409029" ], "name": "Cesar A. Stuardo" }, { "ids": [ "1695232" ], "name": "Andrew A. Chien" }, { "ids": [ "1738725" ], "name": "Haryadi S. Gunawi" } ], "doi": "10.1145/3132747.3132774", "doiUrl": "https://doi.org/10.1145/3132747.3132774", "entities": [ "Central processing unit", "Failover", "Operating system", "Retry", "Solid-state drive" ], "id": "40f196e21a289394c4354961116587b8accba45e", "inCitations": [ "347e1352fb903b40dce606a1e581e9d601bc289c", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041" ], "journalName": "", "journalPages": "168-183", "journalVolume": "", "outCitations": [ "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "006cb2c8713bff9e97a8c68c65e66b98379731f7", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "2018f3fc13cd38122abdf37bf939b5011cd2e3c9", "09bcd050bb006639ae8bcacb3af149f0b6d964f3", "3de30c8dafc720bf066e5e3a005d16212dd31149", "5848da5058fed3b97bfd801ca19e5265f489abfe", "09f0751d7452cd0480d572171593d07996325fcb", "4eab97d0d1c75641671aa5b7761978322d904c5c", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "75d60809b9ac769a4a7e2a9907b3bc028ac58935", "6a728101f8d24da7d153f5a92e71e94f7a323dae", "262c16d1bdd8d0ccef77bd66648144d584a24477", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "4ba4613eab33cddc53bec9e14e50d03fa66270ca", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "dbcdb4c402756b2b5ac910b9eb17ddb412290d16", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "2e46f9074bd81ea4ec29ecec7e0231c16fb2e8db", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "23015f1b4df6d84f73db0f31fa42992c18a5fff8", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "08f13e484e7e51831ec13076d14570ced91a50fb", "16e367708e50a9ed6228334c9d49f4db0dab4cd8", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "514a5c15e8cf3f681febecad954a4508d9189c99", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "0b6adc0dbc55076dc9c9a8931f4a4df58fd291b6", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "090599a2caf4591c87699ad850c75554cd712937", "65a2cb8a02795015b398856327bdccc36214cdc6", "070c3a8c3ce10277424f23c01a54b377478ee59c", "2167c708155dac4bb63d29a4bcc960dd320d8e2a", "74948946b70b35efe4ac3b4f10002714525ac255", "5dd3323a738940bfa194a2503c9fcaa09156dbd5", "13b925352e4ee3066a6d38ef9f16efdfa967cabb", "05a1bad1ef2341339e18d636d78594226d4ee8e6", "670cc70dddd4d4cfe76734b845ff5550a6fde988", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "9bcc0099f0d34c391ca1a3c5220cb0b3b33c4183", "0541d5338adc48276b3b8cd3a141d799e2d40150", "3d6f626d383048fda0ac5b56864141f2521dd38f", "1693e83e47a99667f4bd6ad6e24d8b62a1ba22c8", "9aa0d7253574e50fe3a190ccd924433f048997dd", "4256339f61d809e5092b68a505f7d37099cbd341", "120c8504b4290920309165d48bb032f2c724a161", "4956257ba37029ffdaadf3bdcca9b89bb5eea561", "057d21830cde5b3be2fdb3a74ee69a3c7e9109f8", "086820e40dc8046c30a8751394df167bec047fe1", "242b5b545bb17879a73161134bc84d5ba3e3cf35" ], "paperAbstract": "MittOS provides operating system support to cut millisecond-level tail latencies for data-parallel applications. In MittOS, we advocate a new principle that operating system should quickly reject IOs that cannot be promptly served. To achieve this, MittOS exposes a fast rejecting SLO-aware interface wherein applications can provide their SLOs (e.g., IO deadlines). If MittOS predicts that the IO SLOs cannot be met, MittOS will promptly return EBUSY signal, allowing the application to failover (retry) to another less-busy node without waiting. We build MittOS within the storage stack (disk, SSD, and OS cache managements), but the principle is extensible to CPU and runtime memory managements as well. MittOS' no-wait approach helps reduce IO completion time up to 35% compared to wait-then-speculate approaches.", "pdfUrls": [ "http://ucare.cs.uchicago.edu/pdf/sosp17-mittos.pdf", "http://doi.acm.org/10.1145/3132747.3132774" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40f196e21a289394c4354961116587b8accba45e", "sources": [ "DBLP" ], "title": "MittOS: Supporting Millisecond Tail Tolerance with Fast Rejecting SLO-Aware OS Interface", "venue": "SOSP", "year": 2017 }, "40f38a197dd08b44f1e1ba39e4a401aef38c2f13": { "authors": [ { "ids": [ "2689966" ], "name": "Jiasi Shen" }, { "ids": [ "1720971" ], "name": "Martin C. Rinard" } ], "doi": "10.1145/3136014.3136030", "doiUrl": "https://doi.org/10.1145/3136014.3136030", "entities": [ "Iterator", "Language construct" ], "id": "40f38a197dd08b44f1e1ba39e4a401aef38c2f13", "inCitations": [], "journalName": "", "journalPages": "244-255", "journalVolume": "", "outCitations": [ "2f2128b60e15d87d4e565a8532076efa84fc752e", "11d676173bc1ccfc03e6bab12b0c879ad7ae4707", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "14b88bbd16edde7606a350d7294868c232291406", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "71e9f6771400d14db7dbaf2f21a9127e7474895f", "1a54d102af8e7d7c8a7d5ea1463ae67e9b75a646", "2e8812dd2549fa0fb6f72216880e36814ea2df3e", "9693b93dc7dd8220681ac27bd8c17da0365dd581", "71e0257bb18b9f4aadf1ebed38c27a157a814ba7", "012089d194955f546353cc87a76948b26aca1ab6", "09ed565e84057123c15ab12b885c235d1f241aed", "5a6d9e1c28c29626d4fca7aa1b822c3921f0c23e", "5300f92dba234f183230d656a4dc16d9cbda9f73", "8ce8d8e6083caffe9abebb8b45048297ee875cf8", "17fe58e6115711ce4d5ceef941c60eb6d6898dcf", "30b30b2da89e9a287f235cdec1d346de163e50c5", "7da816d0f1d2a2b33d6512a1e694c04cbe4d4963", "a8ea89fdc03195f6e77521eac9194c571fc5729b", "1f8116db538169de3553b1091e82107f7594301a", "686ceb62e5af2b4200139942c17d0a378e33c222", "02d4b6a359a3c3216a4b0af0e3c4797a0601a322", "0e7286223fbc751d70e36ba7c6ee1107a63d13db", "788683544adb2a23b53d5a00bbf05d1d69c4a55f", "a9024417726ae7d9246c9fc717c27493ced8bb43", "ce98527fe1daba98353a1b12062e05c529e7928c", "9b9d17da57e83272a53292850b5e956643a94a4d", "d8b256d85f6069a0608ec03eefbc10f7bcf7004f", "0164554d2f46b6642d2ad652ba4f7c0e784808ee", "5b02198ef544cd488cf1d3b22e55ba2973b1d098", "0462ad3cc734ae4a23ea9068335832a1505abb16", "272a1f065adb2fa98b3c4df55787db944028791c", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "ad058c7f12f203a8e8ca3be78e5fc1ee742f0c4c", "046a9e129fba46d78301ead661949f5290c79989", "215ac9b23a9a89ad7c8f22b5f9a9ad737204d820", "29f9c339028e4dfd1098bb90a6045da15b7b0ba0", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "2598f4455415ae284ff1645b9d994012808079f8", "44d7370695122bb4e2aed1fa3d8baf4f32532614" ], "paperAbstract": "We present a new language construct, filtered iterators, for robust input processing. Filtered iterators are designed to eliminate many common input processing errors while enabling robust continued execution. The design is inspired by (1) observed common input processing errors and (2) successful strategies implemented by human developers fixing input processing errors. Filtered iterators decompose inputs into input units and atomically and automatically discard units that trigger errors. Statistically significant results from a developer study demonstrate the effectiveness of filtered iterators in enabling developers to produce robust input processing code without common input processing defects.", "pdfUrls": [ "http://people.csail.mit.edu/jiasi/pdf/sle17_rifl_slides.pdf", "http://people.csail.mit.edu/jiasi/pdf/sle17_rifl.pdf", "http://people.csail.mit.edu/rinard/paper/sle17.pdf", "http://doi.acm.org/10.1145/3136014.3136030" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40f38a197dd08b44f1e1ba39e4a401aef38c2f13", "sources": [ "DBLP" ], "title": "Robust programs with filtered iterators", "venue": "SLE", "year": 2017 }, "40fe3bee79d95d6886d778cf16b6b120ad0509f2": { "authors": [ { "ids": [ "3035292" ], "name": "Olga Poppe" }, { "ids": [ "40536846" ], "name": "Chuan Lei" }, { "ids": [ "3681379" ], "name": "Salah Ahmed" }, { "ids": [ "1715020" ], "name": "Elke A. Rundensteiner" } ], "doi": "10.1145/3035918.3035947", "doiUrl": "https://doi.org/10.1145/3035918.3035947", "entities": [ "Algorithm", "Central processing unit", "Complex event processing", "Computation", "Experiment", "Graph partition", "Kleene star", "Responsiveness" ], "id": "40fe3bee79d95d6886d778cf16b6b120ad0509f2", "inCitations": [ "18f97e0f25ff60651992c30eed70d3b0b6e24e68", "f43ab3ebfe43a33ff0204832b8ca89e6a9b79f0f", "eb23b67ff0450e83691548cd8e8043edcd9d6f47" ], "journalName": "", "journalPages": "109-124", "journalVolume": "", "outCitations": [ "a087916314b7986ac11510dc2e26a812431675f5" ], "paperAbstract": "Event processing applications from financial fraud detection to health care analytics continuously execute event queries with Kleene closure to extract event sequences of arbitrary, statically unknown length, called Complete Event Trends (CETs). Due to common event sub-sequences in CETs, either the responsiveness is delayed by repeated computations or an exorbitant amount of memory is required to store partial results. To overcome these limitations, we define the CET graph to compactly encode all CETs matched by a query. Based on the graph, we define the spectrum of CET detection algorithms from CPU-optimal to memory-optimal. We find the middle ground between these two extremes by partitioning the graph into time-centric graphlets and caching partial CETs per graphlet to enable effective reuse of these intermediate results. We reveal cost monotonicity properties of the search space of graph partitioning plans. Our CET optimizer leverages these properties to prune significant portions of the search to produce a partitioning plan with minimal CPU costs yet within the given memory limit. Our experimental study demonstrates that our CET detection solution achieves up to 42--fold speed-up even under rigid memory constraints compared to the state-of-the-art techniques in diverse scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035947", "http://users.wpi.edu/~opoppe/papers/CET-poster.pdf", "http://users.wpi.edu/~opoppe/papers/CET.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/40fe3bee79d95d6886d778cf16b6b120ad0509f2", "sources": [ "DBLP" ], "title": "Complete Event Trend Detection in High-Rate Event Streams", "venue": "SIGMOD Conference", "year": 2017 }, "410e450226e5a4225bf69dbdb36c9b117575afa4": { "authors": [ { "ids": [ "1702888" ], "name": "Liwei Wu" }, { "ids": [ "1793529" ], "name": "Cho-Jui Hsieh" }, { "ids": [ "1745601" ], "name": "James Sharpnack" } ], "doi": "10.1145/3097983.3098071", "doiUrl": "https://doi.org/10.1145/3097983.3098071", "entities": [ "Algorithm", "Chroma subsampling", "Collaborative filtering", "Foreach loop", "Iteration", "Numerical analysis", "Recommender system", "Scalability", "Time complexity" ], "id": "410e450226e5a4225bf69dbdb36c9b117575afa4", "inCitations": [], "journalName": "", "journalPages": "515-524", "journalVolume": "", "outCitations": [ "00ea580216bc4c8a6c175135bbb88d0f4a75a473", "47bfb230261b48f2a910f73fc82b0a7a26e1dee5", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "61202eb74184c0d75276954c93ce774c72f8035d", "23bbea130398331084021cc895a132064219b4b1", "4f5ef93300aafc04960b17de5641deeba83973d3", "7811dff921a6d8275136530d99b80580e3adbe0b", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "769fb8055fbe0997ef8d9dab6c9abf37489c6575", "5b0a650b10016e7c2b1df9ce2792da70088826c3", "e632df96ddf8454d64a06b5d0cb4cac8678ba0cd", "0aa2a4d259433016ebc899c496faea03c024c0bd", "15636c07e7a17e3ba476420d4699a012dcba0445", "554f6cc9cb9c64a25670eeb12827b803f3db2f71", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "75cca03b03dc514f03c102ccfdd53a7c0af625fc", "2e635989e232816546ef352edc38881580b04c1e", "0043e12a88227fd578f94f8532b3bbe4949d7f50", "217b616d5aefb133e9b182eb3d17e22a7f500f7c", "1085caff15921c8e63d007268bd7722fe3dc1be5", "46820e8d6aca201bb5cd0d8e7bd685b3c497e12c", "60dd6131aa1f052a56d50388c98c508ef113be65", "9aa88a8a354f1d322e242376d27d0474e50252f8" ], "paperAbstract": "In this paper, we consider the Collaborative Ranking (CR) problem for recommendation systems. Given a set of pairwise preferences between items for each user, collaborative ranking can be used to rank un-rated items for each user, and this ranking can be naturally used for recommendation. It is observed that collaborative ranking algorithms usually achieve better performance since they directly minimize the ranking loss; however, they are rarely used in practice due to the poor scalability. All the existing CR algorithms have time complexity at least O(|Ω|r) per iteration, where r is the target rank and |Ω| is number of pairs which grows quadratically with number of ratings per user. For example, the Netflix data contains totally 20 billion rating pairs, and at this scale all the current algorithms have to work with significant subsampling, resulting in poor prediction on testing data.\n In this paper, we propose a new collaborative ranking algorithm called Primal-CR that reduces the time complexity to O(|Ω|+d1 |d2 r), where d1 is number of users and |d2 is the averaged number of items rated by a user. Note that d1 |d2 is strictly smaller and often much smaller than |Ω|.\n Furthermore, by exploiting the fact that most data is in the form of numerical ratings instead of pairwise comparisons, we propose Primal-CR++ with O(d1|d2 (r+ log |d2)) time complexity. Both algorithms have better theoretical time complexity than existing approaches and also outperform existing approaches in terms of NDCG and pairwise error on real data sets. To the best of our knowledge, this is the first collaborative ranking algorithm capable of working on the full Netflix dataset using all the 20 billion rating pairs, and this leads to a model with much better recommendation compared with previous models trained on subsamples. Finally, compared with classical matrix factorization algorithm which also requires O(d1d2r) time, our algorithm has almost the same efficiency while making much better recommendations since we consider the ranking loss.", "pdfUrls": [ "http://www.stat.ucdavis.edu/~chohsieh/rf/KDD_Collaborative_Ranking.pdf", "http://doi.acm.org/10.1145/3097983.3098071" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/410e450226e5a4225bf69dbdb36c9b117575afa4", "sources": [ "DBLP" ], "title": "Large-scale Collaborative Ranking in Near-Linear Time", "venue": "KDD", "year": 2017 }, "4118e82b31d5aa75b00b9debbe757204f9ec779f": { "authors": [ { "ids": [ "3010843" ], "name": "Dongyao Chen" }, { "ids": [ "1730051" ], "name": "Kang G. Shin" }, { "ids": [ "2703630" ], "name": "Yurong Jiang" }, { "ids": [ "5445008" ], "name": "Kyu-Han Kim" } ], "doi": "10.1145/3143361.3143385", "doiUrl": "https://doi.org/10.1145/3143361.3143385", "entities": [ "Bluetooth", "Floppy disk", "Image sensor", "Interaction", "Motion detector", "Online and offline", "Personalized marketing", "RSS", "Smartphone" ], "id": "4118e82b31d5aa75b00b9debbe757204f9ec779f", "inCitations": [], "journalName": "", "journalPages": "263-275", "journalVolume": "", "outCitations": [ "7e403d160f3db4a5d631ac450abcba190268c0e6", "03ca2b2494aa4977bfef1d30d314490feb68760c", "5c610def30a6fccb890bbb02f431008797d0cf20", "42157b64537a5ce4415177ab6ccb69480c2120c6", "6f8d7127b5dff5977e0e848ac81a90f9792592b4", "08616ca445012df0e3c982f742d2662bf0f0ce6e", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "2cb251202b00311eb246bfa974cf73c6bc7f1cd4", "31f56e187cbb1142dbe82064a7d825c8c50df6a7", "22df69b94613676f6501c2509d1955494d7c70c3", "2c12feaf8f69e5bced639cbb1ae4c16c257401a4", "bcbf261b0c98b563b842313d02990e386cad0d24", "06f00d7b2e3ac8252a114a04a3552bc35e9bb97d", "21d0f3b4c847e04be0f3735f5f55bffe32e942e3", "05fe031e53dd8990e7076a91277cb2b74e22b811", "38ec3a01b51c8b533544a05b3188b67cf38cfbfd", "16ccb8d307d3f33ebb395b32db23279b409f1228", "26401111a68b8fc219739f356ad969394b79efe7", "0eaf73b88896796c7ad8a5c98e94e21b6d87c01e", "5b23e4b6765ecebea10c8911973771f242b58159", "00f324e77f618eb32f9f5b26f2943f287f596f80", "27060a0947f4d0c05f7ad3ccaf0141412f98c7c2", "3b0ffca1e81021432aee341c41ea9cb084259207", "3663e540628f703c5c82bf78171f43dd6df0a084" ], "paperAbstract": "We present a smartphone-based application, called LocBLE, for enabling users to estimate the location of nearby Bluetooth low energy (BLE) beacons. In contrast to existing BLE beacon-based proximity applications that can only show coarse-grained (immediate, near, and far) distance estimation, LocBLE's fine-grained estimation can enhance human-environment interactions.\n LocBLE has three salient features in estimating location from BLE beacon signals. First, it is adaptive to dynamic signal propagation environments by learning the environmental changes directly from the received signal strength (RSS). Second, it performs sensor-fusion for location estimation by utilizing motion sensor data and RSS readings from a smartphone. Finally, LocBLE improves location tracking accuracy with novel on-line calibration on a set of beacons nearby. We have built a prototype of LocBLE on smartphones and evaluated it on commodity proximity-enabled beacons. Our experimental results demonstrate that LocBLE achieves an average of 1.8m and 1.2m accuracies in locating indoor and outdoor BLE beacons, respectively.", "pdfUrls": [ "https://kabru.eecs.umich.edu/wordpress/wp-content/uploads/LocBLE_conext.pdf", "http://doi.acm.org/10.1145/3143361.3143385" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4118e82b31d5aa75b00b9debbe757204f9ec779f", "sources": [ "DBLP" ], "title": "Locating and Tracking BLE Beacons with Smartphones", "venue": "CoNEXT", "year": 2017 }, "4119512e9d3a6ce9581de33f4ffe4b413943b00e": { "authors": [ { "ids": [ "2610507" ], "name": "Rakesh Pandey" }, { "ids": [ "32151627" ], "name": "Aryabartta Sahu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.43", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.43", "entities": [ "Benchmark (computing)", "Dynamic random-access memory", "Memory management", "Multi-core processor", "Network architecture", "Network on a chip", "Operating system", "Overhead projector", "Page (computer memory)", "Program optimization", "Run time (program lifecycle phase)", "Scheduling (computing)", "Simulated annealing", "Thread (computing)" ], "id": "4119512e9d3a6ce9581de33f4ffe4b413943b00e", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "324-331", "journalVolume": "", "outCitations": [ "2ae11d565e0016332914d619c8a91c3b6f00148d", "0eff1cb1cc7af126b87b86c8c929fe5ff5106ea3", "634b9eddd17f4c14ba1a5efeedd609b57623dc49", "0cdfaf12c62ad15d8d8c6a9eaaf403c4b18e67e3", "1604717bec7e2166a4dbf69099656f53054b147d", "00bdbb3c0f457a226b5c04dd52a5e5fbe5830925", "b194fdf11c9651bd5fa1096599cbd1c4577530ab", "0d1d94b6897b0672b991704259f7056562b56b37", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "2dab7537b8e70c1c0acbbaef2e0e71ff3d67b1c6", "d4536b49bdb553721953016c7a1a6902763fe44a", "5ea6e424c9ca6ea3103fc28baab7cc6c412eaf4d", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "4c0c319823e1aa9df1d32f756f5e30b6b002723b", "5382cdf5ae2a566f5a2805f319c946eb615b4514", "d47ab6dc259a57124cb9b86493147bbc04162dbd", "48a7323c4894de3afb90ef2135160205ebb55011", "42f174df3876256dd5606bb61b366116e9943beb", "21ae47765f014f81ac2a23615b10da44b6e21470", "0e2efda23894526e869e57cb81c76de22f6a8d20", "04539b97463b1ff668450ab2918a3483b6cf357a", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "a22eb73c30917a0f3622f56eade55975a58dc74b", "443b8c56d7300f61b825d1dbafe06afdda23c3e1", "8cb6436b59f98e51e6c13176876fe1ccd62d5bb1", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "82203d72124b2c11ff552cabcb8bae00e51d79fa", "02903a2d438b242d87904d18c20f191ec98f9b2b" ], "paperAbstract": "Memory size and the number of cores inside a chip are increasing with the advancement of technology. With 3D stacked IC technology, memory size becomes even larger inside the chip-multiprocessor. So memory mapping, task scheduling and along with communication optimization of the network-on-chip become crucial. Hence operating system memory management and scheduling approach need to consider this advancement of the on-chip memory size and on-chip network architecture for efficient mapping. In this paper, we have proposed an efficient virtual page to memory slice mapping and used simulated annealing based thread to core mapping of multi-threaded application onto 3D stacked memory chip-multiprocessor. Our experimental result shows, the thread to core mapping reduces the overall on-chip communication cost up to 26% and an average of 12%. Moreover, our proposed virtual pages to DRAM slices mapping reduces the overall on-chip communication cost up to a maximum of 86% and an average of 56% for many real multi-threaded benchmarks and multi-benchmark workloads. Results also show that overall on-chip communication cost does not improve much due to the thread only mapping, but when it is combined with the virtual page to DRAM slice mapping then improvements are significant. Further, we extended the approach to do dynamic virtual page mapping at runtime, which reduces overall communication cost up to 78% with incurring a negligible amount of overhead.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.43" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4119512e9d3a6ce9581de33f4ffe4b413943b00e", "sources": [ "DBLP" ], "title": "Efficient Mapping of Multi-threaded Applications onto 3D Stacked Chip-Multiprocessor", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4130a8dce26eaa8d0f19b9f3b2bfdc8a58a8c5e6": { "authors": [ { "ids": [ "3316949" ], "name": "Ofir Weisse" }, { "ids": [ "1683260" ], "name": "Valeria Bertacco" }, { "ids": [ "1769314" ], "name": "Todd M. Austin" } ], "doi": "10.1145/3079856.3080208", "doiUrl": "https://doi.org/10.1145/3079856.3080208", "entities": [ "Baseline (configuration management)", "Computation", "Encryption", "Memcached", "OpenVPN", "Requirement", "Speedup", "Spinlock", "System call", "Throughput" ], "id": "4130a8dce26eaa8d0f19b9f3b2bfdc8a58a8c5e6", "inCitations": [ "21a402631dff504755e281934eaa90bc9dbe8ae9", "f0a95cb1ed1daa7643ed186faa379b6e54b73640", "788b9e288c8db9decbbb2668fdee3737e386e143" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "81-93", "journalVolume": "", "outCitations": [ "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "27f53aec412891f6eae209abf102c5e7cfc6655c", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "452c803f91ab670bf36403ed5412875b13ae9e94", "c30846b11acb22f4f73fd2e2e7f936b27a22289e", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "08832863bc3f041222f381c8ae143f8a66449059", "be72f098670c9ad84901895c42d88df800d273e9", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "232302a7a9aeb7eba0d296a5e846664efcb6ca4f", "ed6c6a8b669015a73b1f4af39e02899a4c1bd14a", "7932a4597cec5149c575aa2303fe8f12241e4320", "8f6e0bb0f41f94b18066d055d6bbc0d7790bbcc2", "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "4ce02fb69245a84d3ffceae20e596dcf0497508d", "5e7567dc5c9922527e7ce5e4fd62981488a09829", "6b6fae57882fd193461fca64654107068ce9fd9a", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "df7a6176b9d538f2017b7c1a95af41e8bce5fc20" ], "paperAbstract": "Intel's SGX secure execution technology allows running computations on secret data using untrusted servers. While recent work showed how to port applications and large-scale computations to run under SGX, the performance implications of using the technology remains an open question. We present the first comprehensive quantitative study to evaluate the performance of SGX. We show that straightforward use of SGX library primitives for calling functions add between 8,200 - 17,000 cycles overhead, compared to 150 cycles of a typical system call. We quantify the performance impact of these library calls and show that in applications with high system calls frequency, such as memcached, openVPN, and lighttpd, which all have high bandwidth network requirements, the performance degradation may be as high as 79%. We investigate the sources of this performance degradation by leveraging a new set of microbenchmarks for SGX-specific operations such as enclave entry-calls and out-calls, and encrypted memory I/O accesses. We leverage the insights we gain from these analyses to design a new SGX interface framework HotCalls. HotCalls are based on a synchronization spin-lock mechanism and provide a 13-27x speedup over the default interface. It can easily be integrated into existing code, making it a practical solution. Compared to a baseline SGX implementation of memcached, openVPN, and lighttpd - we show that using the new interface boosts the throughput by 2.6-3.7x, and reduces application latency by 62-74%.", "pdfUrls": [ "http://web.eecs.umich.edu/~valeria/research/publications/HotCalls-ISCA17.pdf", "http://doi.acm.org/10.1145/3079856.3080208" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4130a8dce26eaa8d0f19b9f3b2bfdc8a58a8c5e6", "sources": [ "DBLP" ], "title": "Regaining lost cycles with HotCalls: A fast interface for SGX secure enclaves", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "413c1a8a26de4de49c2b1204208ca5eb4386b5a6": { "authors": [ { "ids": [ "1795170" ], "name": "Azadeh Farzan" }, { "ids": [ "39414768" ], "name": "Victor Nicolet" } ], "doi": "10.1145/3062341.3062355", "doiUrl": "https://doi.org/10.1145/3062341.3062355", "entities": [ "Algorithm", "Library (computing)", "Parallel computing", "Programmer", "Read-only memory", "TRAVERSE", "Thread (computing)" ], "id": "413c1a8a26de4de49c2b1204208ca5eb4386b5a6", "inCitations": [ "a526e9305e0240849ef713409e41d4f358d64d66" ], "journalName": "", "journalPages": "540-555", "journalVolume": "", "outCitations": [ "bf305ec894e7404e54e36a4ee4c13c7ded519f4d", "fc2e307fa3a6fe5fdb7cf1c9f2fdc643972d2a07", "1291dc27b5e569bfeae7c9d114eed350b31cb8b7", "fd9dc505e3cf0b6a828ae67f1850658540ec9179", "a6e8098671ccfc1147870db90e80360654cd92aa", "55a52d7a86066f8d432e5c4d0447ccb7f72f71c2", "91abee69a6498d5f38ed6975b0d9a8e7b93120f3", "8d99b92100c7e6d86b46435a11eeaa9b4c79bfc5", "0e95e0ff4014053ac11fcbcee556eaab4dc1a92d", "1a0e59990e14d30665bd87030d9c895c7a650e71", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "b070069bf40ebf2934bf3ba6f3b52c9ee0f7dace", "67d18339ed72b7fc2152cb42b63362b570c11946", "9eb614fe0af810e5c2833c6b472f7358cdddfa5e", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "3793cd493c6b59bdb39593e370a542af84bf8a56", "202e33581369f6050fc800ebc31615eb65649e78", "25929ecf00df179c51b95b7be250a5220d86d6f6", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "3ebb770bdd641a492f6159d7076ed3609ce7af47", "3720a767b99c56c69e3d2003eabd3c599d73817d", "3f94d6b6761d27fe04aae47818cb4bed79000ee0", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "ecda917a3279a7027812a0566bda1ab0d8c8012b", "7da3a2f70f7d606836a48ab35d003a1814f56ba7", "19096e10c13d16dc0afd5bedcd80cb3afc4b671d", "7877b1ce32dde1e3a146e09c6a47c6dbf5630b1b", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "e8a732e85540102732a0e729a26009891a7548b6", "db0630d01d5af1310bed614901b20a41b97d47dc", "016b912dfa328781f2c6779a28061f17828b8046", "102d5d1d8cd5d0cd9c690effe42e5341e3feea16", "cf89b25ac818bebbc8ba52984e411ae48e080da9", "2c7af45bb17cd6f3a1eed911f9cd9f504d9db7c8", "56d002964786aea0fef64d6f1c81d96c22195070", "012e452a8c8e53fefadae2c61c218bd77bd8fbac", "580a3b20fdd5b2d971d5c47704f68ecb9a86cab3", "1031677de0eb2df41fcecc24f41cabb04f6455fc", "098f97e9e583907b7b4f988104aa2125d84436ef", "b0ed5853c5d2c733e2e499b1605311cfa487a61c", "a4a74418053529de786b10be51d2e6242ff7ad67", "4a7bbb5718449555f63eb45a1ab2c71fd212a75c", "585706dc56e146c8fb42228fc5cbe1de0bb0a69d", "381596e7b51259d3c7ad16c79e9602e6b42dc27e" ], "paperAbstract": "Divide-and-conquer is a common parallel programming skeleton supported by many cross-platform multithreaded libraries, and most commonly used by programmers for parallelization. The challenges of producing (manually or automatically) a correct divide-and-conquer parallel program from a given sequential code are two-fold: (1) assuming that a good solution exists where individual worker threads execute a code identical to the sequential one, the programmer has to provide the extra code for dividing the tasks and combining the partial results (i.e. joins), and (2) the sequential code may not be suitable for divide-and-conquer parallelization as is, and may need to be modified to become a part of a good solution. We address both challenges in this paper. We present an automated synthesis technique to synthesize correct joins and an algorithm for modifying the sequential code to make it suitable for parallelization when necessary. This paper focuses on class of loops that traverse a read-only collection and compute a scalar function over that collection. We present theoretical results for when the necessary modifications to sequential code are possible, theoretical guarantees for the algorithmic solutions presented here, and experimental evaluation of the approachâ\u0080\u0099s success in practice and the quality of the produced parallel programs.", "pdfUrls": [ "http://www.cs.toronto.edu/~azadeh/papers/pldi17-ex.pdf", "http://www.cs.toronto.edu/~azadeh/resources/papers/pldi17.pdf", "http://doi.acm.org/10.1145/3062341.3062355" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/413c1a8a26de4de49c2b1204208ca5eb4386b5a6", "sources": [ "DBLP" ], "title": "Synthesis of divide and conquer parallelism for loops", "venue": "PLDI", "year": 2017 }, "41473059134cd72af64cfaccc3bb217cf59558f6": { "authors": [ { "ids": [ "35002050" ], "name": "Nadav Amit" }, { "ids": [ "38566150" ], "name": "Michael Wei" }, { "ids": [ "3202634" ], "name": "Cheng-Chun Tu" } ], "doi": "10.1145/3102980.3102987", "doiUrl": "https://doi.org/10.1145/3102980.3102987", "entities": [ "Decoupling (electronics)" ], "id": "41473059134cd72af64cfaccc3bb217cf59558f6", "inCitations": [], "journalName": "", "journalPages": "37-41", "journalVolume": "", "outCitations": [ "1c8195cadc7ad4a8b59b16fe77574dd6d160d7d2", "7aaef1c9497a31b411ba012e2a130a058c768413", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "06075f376d895be4828efd12fd600e92331fe8db", "4caf14eeccd5e5c8ce477b444b14cbdb266f6523", "17ded30902e29792b7caa1574278c6503b757ef1", "02e5b7aa2c920d6cd251e954a3dd314a174164a2", "97f355e50deffa3416b34dba7f2e3ab505ac8b2d", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "2ee01ab9aca4163d391bd29c2123d9be44b0e986", "76b73a657ef1cb543790acc99fc8abc80dbe4fc7", "71456379e8ed03e612cf870031a3e0473d446686", "3574657705475722b6c398c266805f758268778b", "043029ff68d0449eacae8a67fc62ed4ee03215a2", "e8c412a53cfd2279efedbe2d7da7b1be57d8e93e", "ca6e70cca64c928872a8cd137515d72708b58a69", "067c7857753e21e7317b556c86e30be60aa7cac0", "01d50c4063e985a08450fd11e90d853648d9d29d", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "4a66a0c4137dfdfbb618b1c2d199032f6d719a9c", "c8e7d53d48c06a5b8a9f5b9b3920f33d3b1b5e93", "7c833a334df551456885bd8b55d63753afac1fae", "7a5e615756e128ba4b47085d171916e6dff3a688", "86337138bb6dfabef8e1d45ec3c4e30d64c3ce36", "39f864cfc2bc05f89c30aaec0b5a29a66e01d650", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "be72f098670c9ad84901895c42d88df800d273e9", "a3021aba46ea0b09bac5a6f9f1e5449b13da9c05", "0bee387ff5485315e9212c2195c71c8d0e23ea1a", "07042865b10297ca4fc9164829d6330db2f60b4c" ], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102987" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41473059134cd72af64cfaccc3bb217cf59558f6", "sources": [ "DBLP" ], "title": "Hypercallbacks: Decoupling Policy Decisions and Execution", "venue": "HotOS", "year": 2017 }, "415012ec86c7a6acebd34bf7eb02eff46dd96e68": { "authors": [ { "ids": [ "7494341" ], "name": "Wenting Zheng" }, { "ids": [ "2959744" ], "name": "Ankur Dave" }, { "ids": [ "3107642" ], "name": "Jethro G. Beekman" }, { "ids": [ "34523435" ], "name": "Raluca A. Popa" }, { "ids": [ "30503077" ], "name": "Joseph Gonzalez" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "", "doiUrl": "", "entities": [ "Authentication", "Cloud computing", "Computation", "Confidentiality", "Data breach", "Encryption", "Program optimization", "Query optimization", "Relational operator", "SQL" ], "id": "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "inCitations": [ "6f8fe3cbacb8436615e886b6188e2e62fd1a5b3c", "24bb8328ad26f21ca2e2322ec2c5da16586dccac", "f4ed6e5506b62faa5df8bb1407921b9d04052cdf", "0bd8f0ab2ade3cbb560dca22c2f5dfd203f4cfd3", "796de1fa2703e049cfc24be1a9ac260a987b93ce", "d034fa0209b2b2e8c68dfdb77f4bb5672a4c7713", "60a72da351f9e706b1417c5ce531aa661f314456", "8b338e925ae623adbc4cf387d3dd7ccf839d66e6", "9ca155165434e4dfd0832e4b325c88381dc603de", "70d6082e2920ceac947989f3adc45c6a9aa9c226", "0646a88dfd7e7ce7233041eaad62076ccc55624c", "a355edbb24d406761407e2728218d2192f2c1fcf", "091c3ab3e30621efc6326c4438b3300d203d8ddc", "ed84133ca8ef37a273d4b187202f55c6618b953e", "38a54f9bbbfc46599770a28999365144a273783f", "56ee02f6ca43075036c8d783b76b7ffd0105727f", "4130a8dce26eaa8d0f19b9f3b2bfdc8a58a8c5e6", "80621d09c3d3dd896c7e2bff083b9e702dc2ed29", "2fea0c41dbd7878d6b285b9f3dc62e32adba94d6", "8569785f80712b5787e12b86a3870a28c0182b2c", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "53f18a9a84c41ff532302166f4456856f3711830", "0d6647696799a1f2471555f19fc050aaa8a926ea", "b3f2a11d45757e675be123d55ec0eb192bcca990" ], "journalName": "", "journalPages": "283-298", "journalVolume": "", "outCitations": [ "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "3784b73a1f392160523400ec0309191c0a96d86f", "1bb07c114cb447552d36a95445cc207f496d85aa", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "35a24265019c8c0c08a24e4f1865e71938f5ea86", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "13868fa5a86ebde021a1c91415fb9bb718c4a804", "7d9fb3131e9830ec034844742c9a4d476c42c54b", "3ca369fa2cadb403db7ac5e75deefd9acbb10723", "9436b2e2c095edab3bdd12d49abf177ca62342d9", "47a7e2ac4ad74a45c6ff47d43c47ff6126573c8c", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "0558c94a094158ecd64f0d5014d3d9668054fb97", "080ed793c12d97436ae29851b5e34c54c07e3816", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "4beef78e9b21611a59237b63d512014e47f32d5e", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "8fa56ecfb46b8dadf8a4dd063d15da5b975c83f1", "281abfbab7e5f4a388adeae8e7b0680b2ba5356e", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "6871b95c14dccca7636b498b5d363a743c5288e6", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "74df90c73883c7192ec36def60ac560e37b8f1c7", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "3b2849c55fe6fd719cc298be03292a93ce78d107", "2420f3bd82b9b9a4fc99fa1e3b79b4cb6d6c3fef", "2ed92ed2d4420bcf4844e335cdcd71c0b1e9160e", "20b63210954f7c5a70664f301dcd7196856ccfa7", "2065450d96aca38c79cad5172b58660765533650" ], "paperAbstract": "Many systems run rich analytics on sensitive data in the cloud, but are prone to data breaches. Hardware enclaves promise data confidentiality and secure execution of arbitrary computation, yet still suffer from access pattern leakage. We propose Opaque, a distributed data analytics platform supporting a wide range of queries while providing strong security guarantees. Opaque introduces new distributed oblivious relational operators that hide access patterns, and new query planning techniques to optimize these new operators. Opaque is implemented on Spark SQL with few changes to the underlying system. Opaque provides data encryption, authentication and computation verification with a performance ranging from 52% faster to 3.3x slower as compared to vanilla Spark SQL; obliviousness comes with a 1.6\u201346x overhead. Opaque provides an improvement of three orders of magnitude over state-of-the-art oblivious protocols, and our query optimization techniques improve performance by 2\u20135x.", "pdfUrls": [ "https://people.eecs.berkeley.edu/~wzheng/opaque.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/zheng", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-zheng.pdf", "http://platformlab.stanford.edu/Seminar%20Talks/Raluca_Popa.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-zheng.pdf", "http://inst.eecs.berkeley.edu/~cs261/fa17/scribe/Opaquep.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/83eb/b8c63c6516243b20767323a5d3f7051a75f6.pdf", "s2Url": "https://semanticscholar.org/paper/415012ec86c7a6acebd34bf7eb02eff46dd96e68", "sources": [ "DBLP" ], "title": "Opaque: An Oblivious and Encrypted Distributed Analytics Platform", "venue": "NSDI", "year": 2017 }, "4163d9c39c8783c96f66bd145b1ee03d00931e41": { "authors": [ { "ids": [ "33516690" ], "name": "Lu\u00eds Pina" }, { "ids": [ "13846407" ], "name": "Daniel Grumberg" }, { "ids": [ "9533995" ], "name": "Anastasios Andronidis" }, { "ids": [ "2279823" ], "name": "Cristian Cadar" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Digital subscriber line", "Domain-specific language", "Machine code", "System call", "Tracing (software)" ], "id": "4163d9c39c8783c96f66bd145b1ee03d00931e41", "inCitations": [], "journalName": "", "journalPages": "417-429", "journalVolume": "", "outCitations": [ "bc44df77508e02b5d2cb0edbef3dfa87625e8a33", "026c84df70942697ae850f9097c1676531a49821", "066c14ffe58a07749487ea02bc1c5a5453ec455f", "209f4c5dc7c65670473836304f8c478e1e0a0980", "18b8ef71bc01b8658b4ef2c8b9a9e4e6e5c2a07b", "1f157f2b144528924eec46d9316bd5517352b89a", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "70ae295b9a7696f2d0c2fdb3a7a53f0d0e0a9320", "2fe30ea3d8d5a920b4d616a374a36d69092921a0", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "229252e83bfa3af97ce4a66eb173ba024728e298", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "5f6a808bedd3dbfd1290063b3cd8221132ce5c95", "3ba82ae0647dc5f8c8173307f22df68d61558dc5", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "08145bd40a1abaa824585adac2263ba82a4c0548", "c77a40aa3bd7193ac72b90f77be27001cdcb72a1", "cb5ac87043a4925eaee2e432993d12586b62994b", "0d337abc01e11356869571954679519fb88a18c7", "49cae25a1796b6a2898b99b2684b33eed8f58ee9", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "5e38237703db5a9d90104c159a5cda19389590f8", "208e7934d900055b43b8b60e4a807ac00674ec4a", "a1843173909eaa253f5a7f147752c8cd4b0e5d71", "6e8d42ea4e8b88eacc337000c2e0b46d489f8437", "e930e75fc639dacc04a0dae5d929b31367bda122", "0336a45cb84a4838c2b6a81ba08adb9a473dcd98", "1bf78b9ab152b8c916beb6a8c196df14a467ccea", "148478868b7b1bbe221c1dbb50e1f7f152d1c3e8", "d2f7ec2d7ef5fc89bab7f6c4a8b5b8290bd56584", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "0ab7f239a4784492779437ac54ae0917b5dd6730", "99cc63730e3079ed58311a4ec88f4f0c891ed61d", "3a37b82340125ef285ce5ecb1536ccaf043bbb13" ], "paperAbstract": "Multi-Version Execution (MVE) deploys multiple versions of the same program, typically synchronizing their execution at the level of system calls. By default, MVE requires all deployed versions to issue the same sequence of system calls, which limits the types of versions which can be deployed. In this paper, we propose a Domain-Specific Language (DSL) to reconcile expected divergences between different program versions deployed through MVE. We evaluate the DSL by adding it to an existing MVE system (Varan) and testing it via three scenarios: (1) deploying the same program under different configurations, (2) deploying different releases of the same program, and (3) deploying dynamic analyses in parallel with the native execution. We also present an algorithm to automatically extract DSL rules from pairs of system call traces. Our results show that each scenario requires a small number of simple rules (at most 14 rules in each case) and that writing DSL rules can be partially automated.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/pina", "https://www.usenix.org/system/files/conference/atc17/atc17-pina.pdf", "http://srg.doc.ic.ac.uk/files/papers/varan-dsl-atc-17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4163/d9c39c8783c96f66bd145b1ee03d00931e41.pdf", "s2Url": "https://semanticscholar.org/paper/4163d9c39c8783c96f66bd145b1ee03d00931e41", "sources": [ "DBLP" ], "title": "A DSL Approach to Reconcile Equivalent Divergent Program Executions", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "416acb90be46d03f7402954dddde545a2891cfa0": { "authors": [ { "ids": [ "2609325" ], "name": "Conglong Li" }, { "ids": [ "34752743" ], "name": "David G. Andersen" }, { "ids": [ "1959429" ], "name": "Qiang Fu" }, { "ids": [ "1767761" ], "name": "Sameh Elnikety" }, { "ids": [ "1772774" ], "name": "Yuxiong He" } ], "doi": "10.1145/3127479.3129255", "doiUrl": "https://doi.org/10.1145/3127479.3129255", "entities": [ "Algorithm", "Analysis of algorithms", "Cache (computing)", "Computation", "Domain controller", "Frequency capping", "Machine learning", "Search advertising", "Simulation" ], "id": "416acb90be46d03f7402954dddde545a2891cfa0", "inCitations": [], "journalName": "", "journalPages": "170-180", "journalVolume": "", "outCitations": [ "635c67d342ac2c2e26c5338d8dac115cde289828", "3ac21dd513a91d4535a0e6d4e52555657d788b9b", "0a1c762e3565a02a7305774703872dc170ec00ca", "da2b9f56a556030ea0c709f5cbc79b25abf38582", "1ff88585ce3fd5fdaab6573722d4874641359951", "daf9ed5dc6c6bad5367d7fd8561527da30e9b8dd", "2246eb11d3a82058459072d26d652143647ef58f", "668b7370d15381c4efe2445415fb806605741dd0", "8a732568ee5590e38afebaf7d94963e82a026bf7", "677bae0a827e910fc38ec6632aa5b1e6ea837a8d", "0579cb7ceecac67eefb63bef0436fbf5e552cf72", "19c3fcffda8e6e5870b3a533c483bca024501ab5", "7f7d58dd3163389470e67a1ffd4da45bb6f3f398", "c4a317a205429dc826034a17a869f98ba407aea0", "c69f555adb1814e85d315387e99dc4a5a05f5a3e", "ed31c96f2b2a3f17982e1b7df90cc162940fade4", "2ecc74f8662bb3455bbfec2f82f01ab3b17743eb" ], "paperAbstract": "Search advertising depends on accurate predictions of user behavior and interest, accomplished today using complex and computationally expensive machine learning algorithms that estimate the potential revenue gain of thousands of candidate advertisements per search query. The accuracy of this estimation is important for revenue, but the cost of these computations represents a substantial expense, e.g., 10% to 30% of the total gross revenue. Caching the results of previous computations is a potential path to reducing this expense, but traditional domain-agnostic and revenue-agnostic approaches to do so result in substantial revenue loss. This paper presents three domain-specific caching mechanisms that successfully optimize for both factors. Simulations on a trace from the Bing advertising system show that a traditional cache can reduce cost by up to 27.7% but has negative revenue impact as bad as -14.1%. On the other hand, the proposed mechanisms can reduce cost by up to 20.6% while capping revenue impact between -1.3% and 0%. Based on Microsoft's earnings release for FY16 Q4, the traditional cache would reduce the net profit of Bing Ads by $84.9 to $166.1 million in the quarter, while our proposed cache could increase the net profit by $11.1 to $71.5 million.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129255", "http://www.pdl.cmu.edu/PDL-FTP/associated/socc17-li.pdf", "http://www.cs.cmu.edu/~conglonl/adscache-socc2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/416acb90be46d03f7402954dddde545a2891cfa0", "sources": [ "DBLP" ], "title": "Workload analysis and caching strategies for search advertising systems", "venue": "SoCC", "year": 2017 }, "4181911f86d35215ebf52668831ac8383f5894ba": { "authors": [ { "ids": [ "2798398" ], "name": "Antonis Papadimitriou" }, { "ids": [ "2348923" ], "name": "Arjun Narayan" }, { "ids": [ "1719236" ], "name": "Andreas Haeberlen" } ], "doi": "10.1145/3064176.3064218", "doiUrl": "https://doi.org/10.1145/3064176.3064218", "entities": [ "Cascading Style Sheets", "Computation", "Confidentiality", "Financial risk modeling", "Provable prime" ], "id": "4181911f86d35215ebf52668831ac8383f5894ba", "inCitations": [ "43da71a79fe0007bc97ed5c45044adc8d597c1e6", "5c9c8a8896b8814fb8082099662a948732c3c4b1" ], "journalName": "", "journalPages": "560-574", "journalVolume": "", "outCitations": [ "1e96b0c0ac74070a984fec94f085109839d842a9", "34e7390fc54ba9b29ae88f7a135e2bb79b4ca714", "0788c8b95fdc0f4b3f2578abb3ab33d6bdd81f3f", "5728cfb6d691e3e17e3de32547f610c9634c333e", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "c35842884e2edc14c1c6535afd2ea28fa921484b", "11a2dabd1d579dc3ad7f8572101833be74761746", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "1811ae18a40d07a185951ca65baa1d680000d642", "4e97598b494576d20b1a8e7c897e5987a85a5b66", "bcb49a06e4fb7ea831257e146073d84234f4d238", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "1156f60e40548096df49528b1342bb3e88b0f378", "6e77caeabde0e5825e5fd02c43b7f75cab67b689", "b0e32f83369313c18e3ad38b47a0f0dbe42decac", "1026527f60f4df0c523dc4b4b07a06274f1f0517", "149665fb875c90039a415349f61c018a4c62d58f", "2e8b9a7a085a8bc18783e76b776c6e780116efd8", "0ad8e89091eed09217e66adc98136126addc2619", "1808b64aec21863489f0fe66f250890a3ac2b843", "4fc60830ca05cbf8c017c75cdbb40df16a67c4d9", "6b04574c2093f72889a1efec18aa96171c5b922c", "05a436f059c3897c3509dc059903364eff4a79af", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "8fa56ecfb46b8dadf8a4dd063d15da5b975c83f1", "94f133780f7c4b09e2513628e5cebe67c009b7d5", "cf64ed742ab694d8a0ebed6c96a6f8709b9e8705", "145d9e6a324494a9097563b0ff850ee9e2e13c0b", "852bc8cf86fcd0a9e6d1d71246b13ea5e09db8d2", "6ca1e2f45a4e6aaed4cb0626d46a31b84fe44805", "23ec68ed03b485b645478a3f6905615617d905a6", "70fda5147aedd42c64143a464117b5ffde18a2e4", "4cebeddc0451a62aff08e6058e4a74c91f1f3cfd", "c9b6959f908807727aec298bc0d80da0cd385ca1", "371cea2d63b4c282253b7ff0002b4dd9eb29f5fb", "d60deb4ddc893a68cf0d5c33358c6cf240f88e32", "3dff11679346f5344af1018cad57fa14cc349f2f", "88915d3d45829e9b929e3c5019dda47985a13b7d", "5de068c94fbe9976a7017ce0451c05941a2fe70a", "17fac85921a6538161b30665f55991f7c7e0f940", "4f553ee2246dd617d89c487f260d77388177e1c4", "611f8a4217467e45fae660a61ee7d7951570d0a3", "02c3735fb561274588309aa7ac692f8c28088af9", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "bea8a00e0d2b9de80e723ee42f369546b84aee32", "748ea798880781a5db00af68d13b0cd7e03f4701", "4b8f1518b21b73d30cedf31560a83a8322f8147d", "1b39b9859937c1a1fd30f103c93e7cb408a432e8", "19c3736da5116e0e80a64db35afe421663c4b4a8", "19db199fd25aa604618d13e80cf317f0858d5604", "437d82f8fe2418ec0280efd363639211f1eff2e9", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "63a0aaffe78a9f2cf8b1225407668e5845ca28f6", "13e5ca27f887c2be2795cdb335201c4c247c60f3", "5d325fc5adaa61f6240173cba8e7c391e5184343" ], "paperAbstract": "In this paper, we present DStress, a system that can efficiently perform computations on graphs that contain confidential data. DStress assumes that the graph is physically distributed across many participants, and that each participant only knows a small subgraph; it protects privacy by enforcing tight, provable limits on how much each participant can learn about the rest of the graph.\n We also study one concrete instance of this problem: measuring systemic risk in financial networks. Systemic risk is the likelihood of cascading bankruptcies -- as, e.g., during the financial crisis of 2008 -- and it can be quantified based on the dependencies between financial institutions; however, the necessary data is highly sensitive and cannot be safely disclosed. We show that DStress can implement two different systemic risk models from the theoretical economics literature. Our experimental evaluation suggests that DStress can run the corresponding computations in about five hours, whereas a naïve approach could take several decades.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064218", "http://www.cis.upenn.edu/~ahae/papers/dstress-eurosys2017.pdf", "http://repository.upenn.edu/cgi/viewcontent.cgi?article=2063&context=cis_reports" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4181911f86d35215ebf52668831ac8383f5894ba", "sources": [ "DBLP" ], "title": "DStress: Efficient Differentially Private Computations on Distributed Data", "venue": "EuroSys", "year": 2017 }, "4196cfaf354cd0cabab206bb932c135a173bf1b9": { "authors": [ { "ids": [ "3027703" ], "name": "Deepak Vasisht" }, { "ids": [ "2084050" ], "name": "Zerina Kapetanovic" }, { "ids": [ "2979895" ], "name": "Jongho Won" }, { "ids": [ "5200121" ], "name": "Xinxin Jin" }, { "ids": [ "39403445" ], "name": "Ranveer Chandra" }, { "ids": [ "1757937" ], "name": "Sudipta N. Sinha" }, { "ids": [ "2189118" ], "name": "Ashish Kapoor" }, { "ids": [ "32655294" ], "name": "Madhusudhan Sudarshan" }, { "ids": [ "9764168" ], "name": "Sean Stratman" } ], "doi": "", "doiUrl": "", "entities": [ "End-to-end principle", "Sensor", "Systems design" ], "id": "4196cfaf354cd0cabab206bb932c135a173bf1b9", "inCitations": [ "879a7fc87b9855b77786ac3bf5f97300a486df8b", "561259884f6308a91fc60216b38b2ee64ddcafe0", "076cefb7b5de0ad5b6ab53402f238e97359de39c", "54a4992e1f56742c0ce7fc9cc08168c0d1d0ef9a", "34d4e2b298e6e2fba9b222f090d2c5481c5af221", "0a3ce42de0e5c126309ca3dfaf61ca1d259af5d7", "71036a5e1489276628175390f82effb306adbced", "395958ab32bc6f19b116643ee003d774e7527a32", "8a060b3f57433680159c0295e86e23fe6723e787", "3cb86d6757d03312754ea655ec97b2b856c34c15" ], "journalName": "", "journalPages": "515-529", "journalVolume": "", "outCitations": [ "c3034c2476af69e0e279899e3ffd0749a5e784f4", "d754ce38cc84c35d20156aaf6726e3156b4b844f", "486c3f1bded5dd34025135ffa5f1246fb556fb16", "9370a3cbfef13d5cbf92a96315116bb2fbcd1601", "cdcb4cf0271a1049c8240ea70fd8579bdcdf0c97", "ad0abd106577f58475149c40c09e19cc429291bf", "ba6a9101184856d9906299b2f8dc7336e6853320", "3c29a7f3e25cb582326110308375b149e8a1239b", "18336fdfca9e54b4a1a0dc03a0eaa66379778133", "47d2d84efed00231d1e058e967d9cee898c7eefe", "332e1f6b86760a02e17c0c98abc5b89bae9088a6", "ca26e27de601999dd60284f201727d1b46725f6a", "ebfc96d215389f1f284c1e51d20473fd935d6448", "65ab5d4f190cacc3b021e9aae431a54a5bb0b378", "089c550e616574fef1e8eb6b511bd092e277feb0", "371c8b352ae4faaf052930039adf9dbeec5e6b5f", "2e0b1facffd6e9a0b1bc6b87d1dab0874846fee0", "088551fb13e50f25148429a467e397ef60199a91", "14a1fa19d5c2cc8cdbe8e34cd62d3ca3a047b732", "88b329bc40cc1786b5a25a4bd87a5ce6cf89e0f9", "968f7ae59acf5350dae6862292ec4d22615346ae", "4b0356fa20156907825f319a29c61f3f0aba31b1", "1592fe924114866c1ac559bae33ea789930daa98", "91d12b77cfb84e85c882fd57ab6e380c24dc24aa", "50fc6949a8208486e26a716c2f4b255405715bbd", "0e27540c58aca9081f63903ef64e10f17763196f", "926fb71f444a1b5bd967cbbe51f125ba03c5981c", "16d0a51e76bf2e5d13d785e3fe79fcb975f409ce", "9b4684d15e622f1ccc3f0f2092c7d4dc98bb2c4a", "07fab67a4903c6a5daadac3a3561569cd6db0357" ], "paperAbstract": "Data-driven techniques help boost agricultural productivity by increasing yields, reducing losses and cutting down input costs. However, these techniques have seen sparse adoption owing to high costs of manual data collection and limited connectivity solutions. In this paper, we present FarmBeats, an end-to-end IoT platform for agriculture that enables seamless data collection from various sensors, cameras and drones. FarmBeats\u2019s system design that explicitly accounts for weather-related power and Internet outages has enabled six month long deployments in two US farms.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-vasisht.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/vasisht", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-vasisht.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/FarmBeats-webpage-1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/786b/30a6f49a513db1ef5cb55dfabd52076fb7be.pdf", "s2Url": "https://semanticscholar.org/paper/4196cfaf354cd0cabab206bb932c135a173bf1b9", "sources": [ "DBLP" ], "title": "FarmBeats: An IoT Platform for Data-Driven Agriculture", "venue": "NSDI", "year": 2017 }, "41bb8d014bae6069b274f919591263d557efdb2f": { "authors": [ { "ids": [ "3176695" ], "name": "Thomas F. J.-M. Pasquier" }, { "ids": [ "3426485" ], "name": "Xueyuan Han" }, { "ids": [ "39922558" ], "name": "Mark Goldstein" }, { "ids": [ "39732831" ], "name": "Thomas Moyer" }, { "ids": [ "1784358" ], "name": "David M. Eyers" }, { "ids": [ "1745942" ], "name": "Margo I. Seltzer" }, { "ids": [ "1691945" ], "name": "Jean Bacon" } ], "doi": "10.1145/3127479.3129249", "doiUrl": "https://doi.org/10.1145/3127479.3129249", "entities": [ "Computer forensics", "Data loss prevention software", "Distributed computing", "Experiment", "Intrusion detection system", "Kernel (operating system)", "Linux", "Linux Security Modules", "Platform as a service", "Streaming media", "Usability" ], "id": "41bb8d014bae6069b274f919591263d557efdb2f", "inCitations": [], "journalName": "", "journalPages": "405-418", "journalVolume": "", "outCitations": [ "9491f7dcfe83c95c61be9bae632c6e83e2f40bca", "467edeaadd009bf2cdb3dbb2f06299a782100711", "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "961b6236019491c82213ffc1d6ba9f477b4441db", "17df839f91341db394652f84b2a8d0500559ee0e", "88be5447b8bb12f2fbf8c494f8002de37df1683e", "db03dbf76fa3148b3da6caa76422c707a7485b17", "4b5f43b41e11e9c50177d1b435514b159b545778", "090634660009ce2f92c8913baadaa9b8b44b2eab", "7fa39d4ec44a8c736107ea6da8e111bf943ae9e7", "231793a731604a9f756fe8453098ea814c1c0ef3", "0a30411ad3f537fe026bc6335adbde75a6da3a8e", "4578afb3d3108a9064f2299b47f2f32cb94926ee", "1c86fd98ff7b0fe1da0298746532ce56be7914f3", "023f23c300804754753cb11db51fb7f582556ab7", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "00aad3642752496f95fe8dbaad5e5ddeca2a0b58", "2f0a9b577ac599452a74d1b3be2e566981491864", "90e761b9936be5d2b18fc8e0bad87cf3baa564c6", "165cfb92427feca9f1e16df49a9b811a2d435313", "c62bf9819d0dbe8cc333b332b217cf442b1f1a0c", "0e33c7bb8b1626d00483fd34aab16403bf1a0e9c", "28cf3794cf80c30c6ce4d3478d381af677cf40f9", "2b9f7297fd2845e57e21636ab201ca620788d5a3", "41775c2dfdfcb34dc06a481b12140c43b8eba7ca", "886f78481666486628e3a7fab7bf2060a1826977", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "e092a234a725915e04e6b2d6e1c24d7f18c76e28", "382591224df8b8b2eb39712f282860424575754e", "1b0fc490ef2493f79cc67f2409570e75f4f64b3d", "009968fe83faa9bea30abdbc2f47a853489db633", "9e820e230fe52ac4b948d9d50f246a47a811d507", "bdf67e216b17a1d09991786a098877055a015cd7", "0bae160fd88c8422b303e6db2d5b99e17bf61bb9", "50f66ded285cd26c5da6b99987fb514a5b1c37cc", "6aa282b860fbb6b34d1ec90b8278ad76789b183b", "34953ebdcf96d4499472039312bda86a0ba4e7ca", "1f83e48319270c0a004d277bbb5156f1f477f98d", "4553901b0417c16bd0743cf09bd538466352dc83", "76f3f7adc5cf7c0def8654ffb9089e86d3c9256e", "f9e62a963265d243874a3149fc8fe259a3c61c4b", "67c108097001dd17bfc0d78dd1e6fd39e0c4cfdf", "8915ad31480f54e4b8ce890878c40788a56dd06f", "040f3b7db46a8d6cc0e9f58a807a29d9e8e13e3b", "867a9a2aa40e1cf558eb2fe5331a323179e6c19d", "73dffadfc50ebfc3d8e426ea9c05d06708f612ff", "2809d4876e34b8c64fc1783fe6a0a278770505b0", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "5a1f54de8db128c604d077af4e010ecc004ee6a1", "35339f6f2e99c04920f21883df1db8004436cdc7", "182ffc1a1d0cc8ca1229a82fbf93dcd666ed8730", "3ca1e779ade787c375d776e18ea0ec05f027754b", "067e9ce84204404d19272448b2af05929c936653", "37cae0d04ec6067a919f74d3e269d54d707170c9", "a0049b565fe186f46b78e0584cf5dfc7fe461014", "d67630b667b2c4dcbd1a284dd1c459742f161fb1", "4e734135edce9cac9d91d9c9c50a0c8bec0618c3", "64a098e7de5b3200ec7513d068bf5760871025f8", "2f8bf79c762924808d09c730132979228661de1a", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "157d932bfc401b28b94e2bf642445b2b951237b7", "2077cc18da002721390a23392ce4a25d19c3e2a2", "d3b817cd7732e02420d6a4dbf58fac09ead6958a", "3b1e5c60d72ab419a721b234701c9c520f8c6719", "74d093c990f984562b3e5acfeec4aa376e13d5ee", "6409e64aed68fcc9e3fdc35b87dd168eeb440d32" ], "paperAbstract": "Data provenance describes how data came to be in its present form. It includes data sources and the transformations that have been applied to them. Data provenance has many uses, from forensics and security to aiding the reproducibility of scientific experiments. We present CamFlow, a whole-system provenance capture mechanism that integrates easily into a PaaS offering. While there have been several prior whole-system provenance systems that captured a comprehensive, systemic and ubiquitous record of a system's behavior, none have been widely adopted. They either A) impose too much overhead, B) are designed for long-outdated kernel releases and are hard to port to current systems, C) generate too much data, or D) are designed for a single system. CamFlow addresses these shortcoming by: 1) leveraging the latest kernel design advances to achieve efficiency; 2) using a self-contained, easily maintainable implementation relying on a Linux Security Module, NetFilter, and other existing kernel facilities; 3) providing a mechanism to tailor the captured provenance data to the needs of the application; and 4) making it easy to integrate provenance across distributed systems. The provenance we capture is streamed and consumed by tenant-built auditor applications. We illustrate the usability of our implementation by describing three such applications: demonstrating compliance with data regulations; performing fault/intrusion detection; and implementing data loss prevention. We also show how CamFlow can be leveraged to capture meaningful provenance without modifying existing applications.", "pdfUrls": [ "http://arxiv.org/abs/1711.05296", "https://arxiv.org/pdf/1711.05296v1.pdf", "https://www.cl.cam.ac.uk/research/dtg/www/files/publications/public/tfjmp2/socc2017.pdf", "http://doi.acm.org/10.1145/3127479.3129249", "https://projects.iq.harvard.edu/files/provenance-at-harvard/files/socc-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41bb8d014bae6069b274f919591263d557efdb2f", "sources": [ "DBLP" ], "title": "Practical whole-system provenance capture", "venue": "SoCC", "year": 2017 }, "41d01619b4f0d14be5c0135ca35f06fb5fc93b2a": { "authors": [ { "ids": [ "1697093" ], "name": "Jungwon Kim" }, { "ids": [ "8568681" ], "name": "Seyong Lee" }, { "ids": [ "7553591" ], "name": "Jeffrey S. Vetter" } ], "doi": "10.1145/3126908.3126943", "doiUrl": "https://doi.org/10.1145/3126908.3126943", "entities": [ "Application checkpointing", "Attribute\u2013value pair", "Byte", "Distributed computing", "High- and low-level", "Key-value database", "Non-volatile memory", "Programmer", "Scalability", "Supercomputer", "Zero-copy" ], "id": "41d01619b4f0d14be5c0135ca35f06fb5fc93b2a", "inCitations": [], "journalName": "", "journalPages": "57:1-57:14", "journalVolume": "", "outCitations": [ "1d99b7749a9311d2db24a3d84728e444eff23e4b", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "6bab4397efe09fa20cacb7fe54cc0cb2fc1c3b29", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "3b2af12a43d06338dd62681328c75a1999fc87fd", "1c82d6dd3fde20878f9500c31351a3ceb9c05a46", "6c1f7496580d1169b232c53981f1e63e593be21f", "11c136aa1136ccf6ebbb23c3b3e1fbdd8447bb00", "2cdcb05bad9c38dfa39530b159a4ecc0e94d922f", "18a5f443299784479e78d9e77f175af57cb2fa2b", "6e0ade8e4c0948e47b7e1ad78eacf42e5f9d8d0f", "10bc2b0ffb70f0f894a4c30c7f2fb4cce0e035cf", "a578daf478e4555e6e81c63cef4f138d92f93245", "da8f5c3e65e2eb398dc5a4866023ef51e4056905", "298c343b898c6602cd0786ff84361d6c2e891e31", "2706db42926e0e58e35336331f6d3b62f0811cf5", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "24b557e9f98829a18ef808daa3c7b5522e0ea81a" ], "paperAbstract": "This paper introduces PapyrusKV, a parallel embedded key-value store (KVS) for distributed high-performance computing (HPC) architectures that offer potentially massive pools of nonvolatile memory (NVM). PapyrusKV stores keys with their values in arbitrary byte arrays across multiple NVMs in a distributed system. PapyrusKV provides standard KVS operations such as put, get, and delete. More importantly, PapyrusKV provides advanced features for HPC such as dynamic consistency control, zero-copy workflow, and asynchronous checkpoint/restart. Beyond filesystems, PapyrusKV provides HPC programmers with a high-level interface to exploit distributed NVM in the system, and it transparently organizes data to achieve high performance. Also, it allows HPC applications to specialize PapyrusKV to meet their specific requirements. We empirically evaluate PapyrusKV on three HPC systems with real NVM devices: OLCF's Summitdev, TACC's Stampede, and NERSC's Cori. Our results show that PapyrusKV can offer high performance, scalability, and portability across these various distributed NVM architectures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126943" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41d01619b4f0d14be5c0135ca35f06fb5fc93b2a", "sources": [ "DBLP" ], "title": "PapyrusKV: a high-performance parallel key-value store for distributed NVM architectures", "venue": "SC", "year": 2017 }, "41da20c0fb04dd4769f3772e392362acd893af57": { "authors": [ { "ids": [ "2823917" ], "name": "Srivatsa S. Bhat" }, { "ids": [ "27088090" ], "name": "Rasha Eqbal" }, { "ids": [ "3098834" ], "name": "Austin T. Clements" }, { "ids": [ "1681493" ], "name": "M. Frans Kaashoek" }, { "ids": [ "1789973" ], "name": "Nickolai Zeldovich" } ], "doi": "10.1145/3132747.3132779", "doiUrl": "https://doi.org/10.1145/3132747.3132779", "entities": [ "CPU cache", "Data structure", "Directory (computing)", "In-memory database", "Linux", "Linux", "Multi-core processor", "Scalability", "Sync (Unix)", "Systems design", "Test case", "Throughput" ], "id": "41da20c0fb04dd4769f3772e392362acd893af57", "inCitations": [ "4e731dfc4eee0006865d131b384f46b29965f42e" ], "journalName": "", "journalPages": "69-86", "journalVolume": "", "outCitations": [ "6f9058b5175aee958e330527aeb55074702dbfd4", "961eb67eb799aa73428207dfe2a888fa509c3265", "c9ef82a4ad0b1b33296cea86fb2ec7558cf798fb", "23ee1c97c4a1229618bf6a614b02f33dc678fe6b", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "7ef137faca4da278382ccdcb90da8fcd19faca36", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "40adb634bd47d1490a8786a6ffc2545cbba31044", "274e7e576534b3e091f09e801cce807f5fd221c1", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "0389b1a838654044822c8fb73728d0ac7f01a8a7", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "47b78e7eb12859a141aed6a28a4e301eb0352629", "ad43b820f35a18a45438a295b2c546b689a35e0c", "088e3e939ad234b6fdd0e321290fb26937dc2553", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "09c0d62190aedb53e820695ccbe98d90f877cc46", "7c93da85cffca5374a2eab5dbaa8ae0c581d62bf", "1b0eace707f6b86e94793d1a7c83b7d065e604fa", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "6c6d79ada6ea6ed4d8957f23579bea739d90bc19", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "2be26e8aa238ac37a80e08303f128d8014bb9f3b", "243c522b56809292f1f50117a9915053d32bf4fb", "acca916dcf29e548a8f3bd53b05acd18380b0f03", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "42142c121b2dbe48d55e81c2ce198a5639645030", "998eaf2eadcb9c6dbd07d61c201e94091fa854b2", "0f09c5a706eae3fa3f90875524ebaf3b3747c5f9", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "120c8504b4290920309165d48bb032f2c724a161", "00caa4dea9216bec01b465f8a69d0e1becc07b7a", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "93b58f721de046dacada133902e6d07c6f46501f", "20f5f8733134d87041b95b742d613051a1fb3fdb" ], "paperAbstract": "It is challenging to simultaneously achieve multicore scalability and high disk throughput in a file system. For example, even for commutative operations like creating different files in the same directory, current file systems introduce cache-line conflicts when updating an in-memory copy of the on-disk directory block, which limits scalability.\n ScaleFS is a novel file system design that decouples the in-memory file system from the on-disk file system using per-core operation logs. This design facilitates the use of highly concurrent data structures for the in-memory representation, which allows commutative operations to proceed without cache conflicts and hence scale perfectly. ScaleFS logs operations in a per-core log so that it can delay propagating updates to the disk representation (and the cache-line conflicts involved in doing so) until an fsync. The fsync call merges the per-core logs and applies the operations to disk. ScaleFS uses several techniques to perform the merge correctly while achieving good performance: timestamped linearization points to order updates without introducing cache-line conflicts, absorption of logged operations, and dependency tracking across operations.\n Experiments with a prototype of ScaleFS show that its implementation has no cache conflicts for 99% of test cases of commutative operations generated by Commuter, scales well on an 80-core machine, and provides on-disk performance that is comparable to that of Linux ext4.", "pdfUrls": [ "https://pdos.csail.mit.edu/papers/scalefs.pdf", "http://people.csail.mit.edu/nickolai/papers/bhat-scalefs.pdf", "http://doi.acm.org/10.1145/3132747.3132779" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41da20c0fb04dd4769f3772e392362acd893af57", "sources": [ "DBLP" ], "title": "Scaling a file system to many cores using an operation log", "venue": "SOSP", "year": 2017 }, "41f474879e4fa61b3b7d32ea0c89a260151516d4": { "authors": [ { "ids": [ "3414970" ], "name": "Muhammed Nufail Farooqi" }, { "ids": [ "2749676" ], "name": "Didem Unat" }, { "ids": [ "2787365" ], "name": "Tan Nguyen" }, { "ids": [ "2360381" ], "name": "Weiqun Zhang" }, { "ids": [ "2896475" ], "name": "Ann S. Almgren" }, { "ids": [ "1746446" ], "name": "John Shalf" } ], "doi": "10.1007/978-3-319-64203-1_49", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_49", "entities": [ "Adaptive Multi-Rate audio codec", "Asynchrony (computer programming)" ], "id": "41f474879e4fa61b3b7d32ea0c89a260151516d4", "inCitations": [], "journalName": "", "journalPages": "682-694", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_49" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41f474879e4fa61b3b7d32ea0c89a260151516d4", "sources": [ "DBLP" ], "title": "Nonintrusive AMR Asynchrony for Communication Optimization", "venue": "Euro-Par", "year": 2017 }, "41fe6f6d4c0947a0b078bd52a9fea0821b539381": { "authors": [ { "ids": [ "28982001" ], "name": "Kai Zhang" }, { "ids": [ "3032149" ], "name": "Chuanren Liu" }, { "ids": [ "1698586" ], "name": "Jie Zhang" }, { "ids": [ "1707713" ], "name": "Hui Xiong" }, { "ids": [ "1752601" ], "name": "Eric P. Xing" }, { "ids": [ "37513601" ], "name": "Jieping Ye" } ], "doi": "10.1145/3097983.3098050", "doiUrl": "https://doi.org/10.1145/3097983.3098050", "entities": [ "Algorithm", "Approximation algorithm", "Column (database)", "Computational science", "Computer science", "Data mining", "Grammar-based code", "Lossy compression", "Machine learning", "Memory footprint", "Quadratic function", "Randomized algorithm", "Sampling (signal processing)", "Signal compression", "Time complexity", "Von Neumann architecture" ], "id": "41fe6f6d4c0947a0b078bd52a9fea0821b539381", "inCitations": [], "journalName": "", "journalPages": "615-623", "journalVolume": "", "outCitations": [ "36ab10bf5230969194efed442e6ffdaeaa32bb13", "0d0f5708846382d9877ec6814bffc41ef1f9a895", "4a85424b802a65b1e2c48e6e6c48117191c5644c", "20b1a042f55cc836c1a4ea1199375dcd370a85fd", "658d97c2ea8a6ed1d9de4ef0f85da21a8816d29a", "6f3bb84ee1b5d638e2d605ae0eb1014e2b6e3931", "1c95bfb79b0605fd1d0dbd15fa98c61cb54fbd54", "161efa43554434a82b09ce0eaeda66fb946d75c6", "2e939434c9bae1fefd9e543ae67398ef99528480", "36797a5cad36a29fbc6540590130f99857f3b080", "54ebea40c36d710c8206a6c91062c31e48bb50bf", "5189721cf85e40f024a65ca83d910a4fc1ab4449", "0686b17e6b9e04e767a7ab7874627c8dd1601bf6", "56a4f2df5c7680e44dd76aa08f65383cf8c910f1", "16dec5a26418783d880c6f622779a64f14092a3f", "3244a2738ffb7376bf335600113ec91d12043194", "1145859ba17172d517cdffe2a5f00a16366c5765", "3885e216c305cbc002ad017315076573c9df6eb4", "0faacea3b9184036a27fa799f8b70f4b1c890397", "7bd4240e3a894dd7cc457912b9222ce8f853e5d0", "b6fff8b8ea77f157913986e7af53951d9fc1128e", "64b3435826a94ddd269b330e6254579f3244f214", "0e92d987bd466eebb7993e8bb04c811ce61a4882", "33d23e5fa7106bce2f6bbb8efafccf7712a6c2b4", "aef0161883cfd49bbe26826cf2e40f8195ce59cf", "50353738eb935a0546aeb2e81882e2f334e32ab8", "01cbff216f2888f96151fb490338af40a09a0c30", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "8479a404b73afd6a61d8a872086d9e7d6d2bdf30", "d42e086f8c2e646d545761b098c9f35149af3b06", "6fd35dfdb8971312f2a0cfa2899b2f4eb63536c7", "062472a5b7a3e1a83bf00596397a112f226dfcf2" ], "paperAbstract": "Matrix sketching is aimed at finding compact representations of a matrix while simultaneously preserving most of its properties, which is a fundamental building block in modern scientific computing. Randomized algorithms represent state-of-the-art and have attracted huge interest from the fields of machine learning, data mining, and theoretic computer science. However, it still requires the use of the entire input matrix in producing desired factorizations, which can be a major computational and memory bottleneck in truly large problems. In this paper, we uncover an interesting theoretic connection between matrix low-rank decomposition and lossy signal compression, based on which a cascaded compression sampling framework is devised to approximate an m-by-n matrix in only O(m+n) time and space. Indeed, the proposed method accesses only a small number of matrix rows and columns, which significantly improves the memory footprint. Meanwhile, by sequentially teaming two rounds of approximation procedures and upgrading the sampling strategy from a uniform probability to more sophisticated, encoding-orientated sampling, significant algorithmic boosting is achieved to uncover more granular structures in the data. Empirical results on a wide spectrum of real-world, large-scale matrices show that by taking only linear time and space, the accuracy of our method rivals those state-of-the-art randomized algorithms consuming a quadratic, O(mn), amount of resources.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098050" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/41fe6f6d4c0947a0b078bd52a9fea0821b539381", "sources": [ "DBLP" ], "title": "Randomization or Condensation?: Linear-Cost Matrix Sketching Via Cascaded Compression Sampling", "venue": "KDD", "year": 2017 }, "420fc03783c7948a7940e237a7b521a6459c727f": { "authors": [ { "ids": [ "1798546" ], "name": "Leonid Barenboim" }, { "ids": [ "1785569" ], "name": "Michael Elkin" }, { "ids": [ "32572413" ], "name": "Tzalik Maimon" } ], "doi": "10.1145/3087801.3087812", "doiUrl": "https://doi.org/10.1145/3087801.3087812", "entities": [ "Algorithm", "Arboricity", "Clique (graph theory)", "Color", "Computation", "DTIME", "Edge coloring", "Email", "Graph coloring", "Line graph", "Message passing", "Polylogarithmic function", "Recursion", "Telecommunications network", "The Globe and Mail", "Time complexity", "Vertex (graph theory)" ], "id": "420fc03783c7948a7940e237a7b521a6459c727f", "inCitations": [ "c18459919bd10630eeaad61e59dac7d63e6e0186", "9c17f5c76ff9e48a065d0022c56f18533290400d" ], "journalName": "", "journalPages": "175-184", "journalVolume": "", "outCitations": [ "211a34bc1ef86b550be3af4fc95df3d5a556c1b0", "10d9b4b9984d32c8211ce98ac0284811f9e093b1", "1d3e168bee7fe6216ec668614e5da7ca6cb82504", "aa052149b40abaabbcdf637b85c7bc3c6ed570cc", "ce14dc549c2b8dd9d71e538dea1265bfc1d81019", "9979072b031d2d65a590a4f421e9700ff952dcc0", "64257fbe36c61d28f1761d4ae98efd62adfecc6d", "1b7142d759fddfc03e8735e1048e45c3cec73326", "e18dfb9ed0095a642f8e9f821b41d6e74a5fa867", "2faae8bab8d7314608f0c268d5e485604971dadf", "145c3ca2ea0faebcdc42de8fa24dc57ecdca341d", "e84d4c6ee79b0515a5f8d4873151107960c55c4e", "0e0b8def400fb8c4b9c42a1b99679af6dfeb511a", "029f5343ab0234cac6bfc99fd1a92ef5ecae3821", "56622335ab668bd14403154a2589414c610472ce", "2492685847defcff7c2b251559b824b55177e565", "0a18b0195b85a34d42652527932103595671a31c", "5d1d295955032ce426ceb5d7aebebd4ce6099ec0", "f394c14552688b35ac2e1ee31de8aceffe682c75", "5e493ab3e938d34aeca99d463463d58a863ee97e", "92de7f724a73fbf1a79fd369f69f0905cb80e530", "aedc64e55b822d3db28090086338d1609b10161a", "d00db20e0a1a92d4bb566d20e3341060b9f4158a", "e0905d1b5cf144bbadd085acb94aee9880f482fc", "d5aa5c2e5add29abd1023587c2e93a97bc2a6c8e", "01f2dc9dbc45ec3179f825fd86c9e6487cc9b52f", "17d3d569ff60ef353c50dc498f8fe31bc2884615", "5b54368b3ee99678c92bac4137224a45a34d89dd", "02109a749ee1d0cf4570fd23b04f08dcc568c586", "3f00b1360b7be9605022137d16c98458225c92c1", "1b5b48e349c7a264a62a05cc1d654a8122f9133f", "1f912ac1e1f8a9bfda1cf7664648a74b6559a407" ], "paperAbstract": "In the distributed message-passing setting a communication network is represented by a graph whose vertices represent processors that perform local computations and communicate over the edges of the graph. In the distributed edge-coloring problem the processors are required to assign colors to edges, such that all edges incident on the same vertex are assigned distinct colors. The previouslyknown deterministic algorithms for edge-coloring employed at least (2\u2206\u2212 1) colors, even though any graph admits an edge-coloring with \u2206 + 1 colors [36]. Moreover, the previously-known deterministic algorithms that employed at most O(\u2206) colors required superlogarithmic time [3, 6, 7, 17]. In the current paper we devise deterministic edge-coloring algorithms that employ only \u2206 + o(\u2206) colors, for a very wide family of graphs. Specifically, as long as the arboricity a of the graph is a = O(\u22061\u2212 ), for a constant > 0, our algorithm computes such a coloring within polylogarithmic deterministic time. We also devise significantly improved deterministic edge-coloring algorithms for general graphs for a very wide range of parameters. Specifically, for any value \u03c7 in the range [4\u2206, 2 \u2206) \u00b7 \u2206], our \u03c7-edge-coloring algorithm has smaller running time than the best previously-known \u03c7-edge-coloring algorithms. Our algorithms are actually much more general, since edge-coloring is equivalent to vertexcoloring of line graphs. Our method is applicable to vertex-coloring of the family of graphs with bounded diversity that contains line graphs, line graphs of hypergraphs, and many other graphs. We significantly improve upon previous vertex-coloring of such graphs, and as an implication also obtain the improved edge-coloring algorithms for general graphs. Our results are obtained using a novel technique that connects vertices or edges in a certain way that reduces clique size. The resulting structures, which we call connectors, can be colored more efficiently than the original graph. Moreover, the color classes constitute simpler subgraphs that can be colored even more efficiently using appropriate connectors. Hence, we recurse until we obtain sufficiently simple structures that are colored directly. We introduce several types of connectors that are useful for various scenarios. We believe that this technique is of independent interest. \u2217 Open University of Israel. E-mail: leonidb@openu.ac.il \u2217\u2217 Ben-Gurion University of the Negev. Email: elkinm@cs.bgu.ac.il \u2217\u2217\u2217 Open University of Israel. Email: tzali.tm@gmail.com This research has been supported by the Israeli Academy of Science, grant 724/15. ar X iv :1 61 0. 06 75 9v 1 [ cs .D C ] 2 1 O ct 2 01 6", "pdfUrls": [ "http://arxiv.org/abs/1610.06759", "https://arxiv.org/pdf/1610.06759v1.pdf", "http://doi.acm.org/10.1145/3087801.3087812" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/420f/c03783c7948a7940e237a7b521a6459c727f.pdf", "s2Url": "https://semanticscholar.org/paper/420fc03783c7948a7940e237a7b521a6459c727f", "sources": [ "DBLP" ], "title": "Deterministic Distributed (Delta + o(Delta))-Edge-Coloring, and Vertex-Coloring of Graphs with Bounded Diversity", "venue": "PODC", "year": 2017 }, "4210fcb8a1d6e40c664004c21984e476c1b7af5e": { "authors": [ { "ids": [ "1692860" ], "name": "Hao Yin" }, { "ids": [ "3245681" ], "name": "Austin R. Benson" }, { "ids": [ "1702139" ], "name": "Jure Leskovec" }, { "ids": [ "1757913" ], "name": "David F. Gleich" } ], "doi": "10.1145/3097983.3098069", "doiUrl": "https://doi.org/10.1145/3097983.3098069", "entities": [ "Algorithm", "Cluster analysis", "Clustering coefficient", "Conductance (graph)", "Graph partition", "Motif", "PageRank", "Personalization", "Synthetic data", "Time complexity" ], "id": "4210fcb8a1d6e40c664004c21984e476c1b7af5e", "inCitations": [ "779d35337dd920306560653b41cbfdd5414d286b", "ffeacdfd523df26d5aa7b3c7317db76f19871222", "b8c09ce815e4bade6452615f3c076d7b16cb1161", "e68213f04381f127ab3c6b4e056faa116c25c902", "62654e3e3afe43ff069c60cf303456ecca479663", "49bab785d8a7a7a78bc71cce860acacaba111ab4", "54efa3f172b58a98afd03079c519f5896d233b83", "053bc51ab3bf6505c973db1a96d04da2cf35c8ad", "7de76091110d02684e4839527b43b766b2030e91", "81ea117f312d5d909f7c18a8a573b28735c87abd" ], "journalName": "", "journalPages": "555-564", "journalVolume": "", "outCitations": [ "1521d39088b203ddac981d10d214f463449ae95b", "7664b703efab1eb2815b94b02f2ec31e534ca9cb", "51cd0cb2ebf03b987417ff0c6b220e606c1b5c1f", "3e656e08d2b8d1bf84db56090f4053316b01c10f", "121f2965514f9b7b20e6ea5c440f5e6bb1ac2998", "36dc8d9a0cd5a33681c5a65dd0f8751819d37733", "370420c7e4019d1d261462e39dc07b70f09f48b9", "1ca4cb45e719d3840ae030725f943b7429581789", "36ecc904453d282178236d0b98d4e125b9bf83f9", "141e35263ab810983c90d47ad62eb4fab5e51717", "246dc6e35eef61bcc61b8051b42372d058ceb2b6", "8e01e7a7eac30456459ec17024d1a0ea957a5fe2", "0cf0c343ae17b0db4b5564b22480aa0415ef61af", "335f644137cdc45de2046e02b365ee0353d4a418", "e4f5ce20cd5c5c0b53b0ae0c87fbf2dd60d1da57", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167", "1091923685d6b3462a6a1cd3ecdcf7d115816234", "ae3fe517a28e4f84779ef931413cca01498f8e7e", "c4c9f645ca334f03b27d6ca646ba7e32fa323aa3", "fcf2fde78b856eca9b79adb201935e36cd206985", "135d89a35623359aa3af7ce6f95b0078c6acc43a", "b117e3ab62ce5a405428bd3b9f76afd1293e0c85", "437a492ddaf7a68a4634036897f528e2c9dbc349", "1e2459b78813d1f69dda225527a6f3f54245aa0b", "481061586800e4018442f02634834c0706e2c7e3", "220a5c1dae985cc4166a122bb3f1cc9b169e15c0", "29cc0a8802126d4e97f28109763df26ab91c6531", "46e0de0924d3f49eefc8547cfa6b7e862fee2deb", "5f944a6722a6e0f2114e8fd1262eb01ede6d5b9d", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "551cc8f8570c91746baa07c627b548d6c3f34000", "2440a3bce01e9a91f255d2d03447e5c1c53574da", "13c347cff81bdb5753ff8f626d13d603d469592b", "2ec587a3b1a3ff5544fda95f00e1681be0c0e8b9", "1b348075d02cc532b1a01955e21ba3062e769113", "9a29101c7e4c2946a061b6bb27a105beb7bc521d", "3ba6c70ee9e07bbf31ba34f3c447c0e873fbfe11", "52474e58641a0fea46327c1da7189b2b172f3991", "1783b3872afe4265007c636199c631ad616893cd", "01b489e00d66360d070d9fa5779107c9383cfb2e", "3e8464dfec833b1629beb2dd04a6db816627b4c0", "5b0d31f0dfed5d94c97eedfc32b7d7d8eea21d21", "4ab328d8a9e7e85661c762bd36a66ba70bb5ec5e" ], "paperAbstract": "Local graph clustering methods aim to find a cluster of nodes by exploring a small region of the graph. These methods are attractive because they enable targeted clustering around a given seed node and are faster than traditional global graph clustering methods because their runtime does not depend on the size of the input graph. However, current local graph partitioning methods are not designed to account for the higher-order structures crucial to the network, nor can they effectively handle directed networks. Here we introduce a new class of local graph clustering methods that address these issues by incorporating higher-order network information captured by small subgraphs, also called network motifs. We develop the Motif-based Approximate Personalized PageRank (MAPPR) algorithm that finds clusters containing a seed node with minimal \\emph{motif conductance}, a generalization of the conductance metric for network motifs. We generalize existing theory to prove the fast running time (independent of the size of the graph) and obtain theoretical guarantees on the cluster quality (in terms of motif conductance). We also develop a theory of node neighborhoods for finding sets that have small motif conductance, and apply these results to the case of finding good seed nodes to use as input to the MAPPR algorithm. Experimental validation on community detection tasks in both synthetic and real-world networks, shows that our new framework MAPPR outperforms the current edge-based personalized PageRank methodology.", "pdfUrls": [ "http://cs.stanford.edu/people/jure/pubs/mappr-kdd17.pdf", "http://doi.acm.org/10.1145/3097983.3098069", "http://www.cs.cornell.edu/~arb/papers/local-higher-order-kdd-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4210fcb8a1d6e40c664004c21984e476c1b7af5e", "sources": [ "DBLP" ], "title": "Local Higher-Order Graph Clustering", "venue": "KDD", "year": 2017 }, "4220a37e7543fc3c2f9ca467491393f918cbf10d": { "authors": [ { "ids": [ "39510193" ], "name": "Mohit Kumar" }, { "ids": [ "2835837" ], "name": "Youhuizi Li" }, { "ids": [ "38737155" ], "name": "Weisong Shi" } ], "doi": "10.1109/IGCC.2017.8323579", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323579", "entities": [ "Acclimatization", "Carbon Footprint", "Clock gating", "Computation", "EAF2 gene", "Exception handling", "Frequency scaling", "Granule", "Java", "Java Programming Language", "Perf (Linux)", "Performance per watt", "Power gating", "Programming Languages", "RASSF5 gene", "Software developer", "Supercomputer", "Watt", "voltage" ], "id": "4220a37e7543fc3c2f9ca467491393f918cbf10d", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "732cd4d859a4478a8535dcb1958be5146b8bc19a", "97038a39292325c4f7742dc38b4a31ba91117802" ], "paperAbstract": "There has been a 10,000-fold increase in performance of supercomputers since 1992 but only 300-fold improvement in performance per watt. Dynamic adaptation of hardware techniques such as fine-grain clock gating, power gating and dynamic voltage/frequency scaling, are used for many years to improve the computer's energy efficiency. However, recent demands of exascale computation, as well as the increasing carbon footprint, require new breakthrough to make ICT systems more energy efficient. Energy efficient software has not been well studied in the last decade. In this paper, we take an early step to investigate the energy efficiency of Java which is one of the most common languages used in ICT systems. We evaluate energy consumption of data types, operators, control statements, exception, and object in Java at a granular level. Intel Running Average Power Limit (RAPL) technology is applied to measure the relative power consumption of small code snippets. Several observations are found, and these results will help in standardizing the energy consumption traits of Java which can be leveraged by software developers to generate energy efficient code in future.", "pdfUrls": [ "http://www.cs.wayne.edu/~weisong/papers/kumar17-eejava.pdf", "https://doi.org/10.1109/IGCC.2017.8323579" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4220a37e7543fc3c2f9ca467491393f918cbf10d", "sources": [ "DBLP" ], "title": "Energy consumption in Java: An early experience", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "4235f05c95d685591b84e3cd572dda23e914f158": { "authors": [ { "ids": [ "2444195" ], "name": "David Hallac" }, { "ids": [ "19216916" ], "name": "Sagar Vare" }, { "ids": [ "1872152" ], "name": "Stephen P. Boyd" }, { "ids": [ "1702139" ], "name": "Jure Leskovec" } ], "doi": "10.1145/3097983.3098060", "doiUrl": "https://doi.org/10.1145/3097983.3098060", "entities": [ "Algorithm", "Augmented Lagrangian method", "Cluster analysis", "Dynamic programming", "Expectation\u2013maximization algorithm", "Experiment", "Interdependence", "Markov random field", "Multiprogram Research Facility", "Scalability", "Simultaneous equations model", "Synthetic data", "Time series", "Toeplitz Hash Algorithm" ], "id": "4235f05c95d685591b84e3cd572dda23e914f158", "inCitations": [ "ba480ace3750b4dcf92ed882fbdade4d1c73b282", "5b19ff1b60a042717566e2efe0854a39abfb26f8" ], "journalName": "", "journalPages": "215-223", "journalVolume": "", "outCitations": [ "23884091409d0c0b8f9f20bd74086402c22c8ffd", "4ec814ef479ac03dbdef95e4aff60bdb333939ef", "25c760c11c7803b2aefd6b6ae36f15908f76b544", "277c68718ebdf79be1d79cb957ac468fa4519277", "037b3d3ddfded992dd068730f4b5d7e7793a780d", "322e8b627d89fd082903af8979f335ac24b27271", "81e1a745cc65dbbbd5bf747471630a21b0301eb9", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "0a8cae43856f9c5a08b79b2153dcbbd46698c9c2", "01175f0720d955e030ae74690706a1f14ba23278", "0af803c078f337b19a6b32b28b168654271dc8bf", "0ad9f3c2baf70d1fdfdfa4688b46a0c660888dbe", "9e18015bffe5e5f0ed7240e7af7ed19a934ae32f", "59ae1824954c553a53bd9a8d03e8fd2421948690", "f16841e022038e94a59f7e0a82002102b78d79a4", "38eef1a61eb07f1e13da453ddd0c5d4acc3424bb", "1ac57524ba2d2a69c1bb6defed7352a06fd7050d", "1ef8c8c815b7268d7f7d4fe76af78aaa8df3e6da", "c92420f001e023c693db762758f9590571256e35", "603233a11e93193a3d4fc8b9cbfb2e2c1ebf8f9a", "7de1f5079ed7a8a8a5690f72ad2099f52d697393", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "244e27bf8c5a5277a733c5271fdeb306f98115d5", "e294c01b3f586cda75058ec16fe64b90e2807f54", "0f98317ec66c6eb631b0860b7362612049532c1f", "4eee16c62efb50b2438031b1d949e0c959ab4e32", "3c84e2ed018dd1d971b526f87e9d7c1f08e6230f", "2679a2729924ac9eaf27c08a8fac3d36f4cb3ae0", "83b989c8aa7043873cc324c8a54ea461037db720", "49b02f363d137d7ca99b499ce4bf438103e35e7a", "014d6c2cc94f04e92a16137bf60a4f3174245064", "23628d066737e98dd8877c8e92a2cce20def1825", "2f3876251cc2f03a06190ef0a47544042da416aa", "2ace11d00f638bc6c7cba39c0ba63401e0e53eb0", "33328d6b669a550c88ab9db4102be3f00fc1c402", "1adce5409b4829746322136e243f4b0f9a1716dc", "b14cdccfc712cd16e240e13b30be07b75fd0900b", "0cd9f50f2c5a5ebdd110a51b91e96b9da016a848", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "d921036a6cb7e340b019afa557a19bc65586a1ad", "c3708f2ee0cf0e701c8733744cf13614520e14ea" ], "paperAbstract": "Subsequence clustering of multivariate time series is a useful tool for discovering repeated patterns in temporal data. Once these patterns have been discovered, seemingly complicated datasets can be interpreted as a temporal sequence of only a small number of states, or clusters. For example, raw sensor data from a fitness-tracking application can be expressed as a timeline of a select few actions (i.e., walking, sitting, running). However, discovering these patterns is challenging because it requires simultaneous segmentation and clustering of the time series. Furthermore, interpreting the resulting clusters is difficult, especially when the data is high-dimensional. Here we propose a new method of model-based clustering, which we call Toeplitz Inverse Covariance-based Clustering (TICC). Each cluster in the TICC method is defined by a correlation network, or Markov random field (MRF), characterizing the interdependencies between different observations in a typical subsequence of that cluster. Based on this graphical representation, TICC simultaneously segments and clusters the time series data. We solve the TICC problem through alternating minimization, using a variation of the expectation maximization (EM) algorithm. We derive closed-form solutions to efficiently solve the two resulting subproblems in a scalable way, through dynamic programming and the alternating direction method of multipliers (ADMM), respectively. We validate our approach by comparing TICC to several state-of-the-art baselines in a series of synthetic experiments, and we then demonstrate on an automobile sensor dataset how TICC can be used to learn interpretable clusters in real-world scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098060", "http://cs.stanford.edu/people/jure/pubs/ticc-kdd17.pdf", "https://web.stanford.edu/~hallac/TICC.pdf", "http://arxiv.org/abs/1706.03161", "https://arxiv.org/pdf/1706.03161v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4235f05c95d685591b84e3cd572dda23e914f158", "sources": [ "DBLP" ], "title": "Toeplitz Inverse Covariance-Based Clustering of Multivariate Time Series Data", "venue": "KDD", "year": 2017 }, "424ada88740ee0d2f1218becdba14ab38aa37804": { "authors": [ { "ids": [ "1682067" ], "name": "Yossi Gilad" }, { "ids": [ "18176124" ], "name": "Rotem Hemo" }, { "ids": [ "1689467" ], "name": "Silvio Micali" }, { "ids": [ "2901586" ], "name": "Georgios Vlachos" }, { "ids": [ "1789973" ], "name": "Nickolai Zeldovich" } ], "doi": "10.1145/3132747.3132757", "doiUrl": "https://doi.org/10.1145/3132747.3132757", "entities": [ "Bitcoin", "Business architecture", "Byzantine fault tolerance", "Cryptocurrency", "End-user license agreement", "Scalability", "Throughput", "Virtual machine" ], "id": "424ada88740ee0d2f1218becdba14ab38aa37804", "inCitations": [ "1d7d97012605bebdfa4f451fcbe493e7c95f6eb3", "a66aae6977be1d346f5c74958ffa4bd568e48a93", "919e32847097416aada92dff7c8274cd9ca55582", "bdd7454cdbf10b58cdbdf81c751ebf075126bfb4", "7c4ffeb9635ff7023b782721ae7de72b65d7fd84", "3a8d5e81704632d4d2516f53cd7fa4928c026c99", "e62df2883a75f11ca9d288450ece4f61e077a3e4", "ba83474ac983727ad9436891653fff68ebbd35f2", "71c5bc722f575665878dc3ca47953f384426899a", "ca3980fac7bca4f7ae5f3d223b4fbac6268e2627", "8f5404b411e1a931be66ac95ec7065fa9065b3d2", "64dca7e620a7d54127d5cb00ce769e980f424bd2", "13a39f769952a35cb177c5a9762427b0dd026922", "edfa3de5a3ce5e26a214adb6faa46ecd0e7dcc4a", "318d0aca7efa3a7e833a1993cec125a07fb7f9b1", "aa98f981fd50dd00a80f23baa0aa0b250b7e212a", "839f34554c9d9e69504c0848a4c4510e48c99e5c", "f234f428eb552b94435683e7e784e805c201d309" ], "journalName": "", "journalPages": "51-68", "journalVolume": "", "outCitations": [ "0b704e724b2ae86d11afe6def7e92f30b2756c06", "39856a57fa0c6e7d646b7db88f48f17688693fe4", "5c22a73f4cc4d1138a1137681397db1e369d7ec2", "0d5f7a1825bae713cebd66d121d5b01e31d8adab", "1521e801e8e08ecec3b0baabb07f9a6ce0a67a85", "4be1be822928a0aeb277412bad1f20f350deb609", "722c447bcf198b60279e4c1f447acb015f94e622", "24f9f2fc97461c8ee2ffbe6c322c2a8843f5000d", "5e86853f533c88a1996455d955a2e20ac47b3878", "728b60c04afb5b87853b59265e49f430dbf631db", "9c592eb42573518f54e162708f33c5bec735b132", "12d854f326b43232d906eb323db5d282786acb9d", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "08f8fbc075c0c0822115315bcffa54964b0599a7", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "0db38d32069f3341d34c35085dc009a85ba13c13", "4c05ed08650cf001c64b53ec0bcf030961bb038c", "dda1eb7aabbda6ab5ee8df31b20a7e1becb75df7", "43fb74fc45ea844ad087c770fa9be747fbd03b19", "20f5f8733134d87041b95b742d613051a1fb3fdb", "40a98bed1d10248d30e86304315df07280dad93e", "7f14b9b28ec6e67a34301a1390c3e4f04d8f0f61", "35516916cd8840566acc05d0226f711bee1b563b", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "0cb88d5c6ed83552606ac1b2e477d5890b37be7a" ], "paperAbstract": "Algorand is a new cryptocurrency that confirms transactions with latency on the order of a minute while scaling to many users. Algorand ensures that users never have divergent views of confirmed transactions, even if some of the users are malicious and the network is temporarily partitioned. In contrast, existing cryptocurrencies allow for temporary forks and therefore require a long time, on the order of an hour, to confirm transactions with high confidence.\n Algorand uses a new Byzantine Agreement (BA) protocol to reach consensus among users on the next set of transactions. To scale the consensus to many users, Algorand uses a novel mechanism based on Verifiable Random Functions that allows users to privately check whether they are selected to participate in the BA to agree on the next set of transactions, and to include a proof of their selection in their network messages. In Algorand's BA protocol, users do not keep any private state except for their private keys, which allows Algorand to replace participants immediately after they send a message. This mitigates targeted attacks on chosen participants after their identity is revealed.\n We implement Algorand and evaluate its performance on 1,000 EC2 virtual machines, simulating up to 500,000 users. Experimental results show that Algorand confirms transactions in under a minute, achieves 125x Bitcoin's throughput, and incurs almost no penalty for scaling to more users.", "pdfUrls": [ "http://people.csail.mit.edu/nickolai/papers/gilad-algorand.pdf", "http://people.csail.mit.edu/nickolai/papers/gilad-algorand-eprint.pdf", "http://www.mit.edu/~yossigi/Algorand.pdf", "https://eprint.iacr.org/2017/454.pdf", "http://eprint.iacr.org/2017/454", "https://web.eecs.umich.edu/~manosk/assets/papers/algorand.pdf", "http://doi.acm.org/10.1145/3132747.3132757" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/424ada88740ee0d2f1218becdba14ab38aa37804", "sources": [ "DBLP" ], "title": "Algorand: Scaling Byzantine Agreements for Cryptocurrencies", "venue": "SOSP", "year": 2017 }, "4261e50b407355c9dfc0394741a2fae78fea3d43": { "authors": [ { "ids": [ "2681668" ], "name": "Spenser Bauman" }, { "ids": [ "27019192" ], "name": "Carl Friedrich Bolz-Tereick" }, { "ids": [ "2274043" ], "name": "Jeremy G. Siek" }, { "ids": [ "2187693" ], "name": "Sam Tobin-Hochstadt" } ], "doi": "10.1145/3133878", "doiUrl": "https://doi.org/10.1145/3133878", "entities": [ "Compiler", "Data structure", "Gradual typing", "Just-in-time compilation", "Program optimization", "Programmer", "PyPy", "Racket", "Run time (program lifecycle phase)", "Symposium on Principles of Programming Languages", "Tracing just-in-time compilation", "Typing" ], "id": "4261e50b407355c9dfc0394741a2fae78fea3d43", "inCitations": [ "3c77e744c44291b05ea7634251cfd764f3f1d383" ], "journalName": "PACMPL", "journalPages": "54:1-54:24", "journalVolume": "1", "outCitations": [ "44cb75f193d15809cf28093e3f3abfb735a29b5d", "5fbfe6211925970eed3da174ddf1bc41a20b8673", "8b47d69f96cbc08da737d1f78f3ffd4376f8e135", "6348969f77387fb4f1a295361d950af5f0e265c0", "1a3d050337d8e514e2dc02acf587dc2b784db648", "31e38caa20c122cdcd0838398ffb3321331d213d", "26ac3ad840d8d773eec2ab7fc60d441b34c6adc5", "1e2c604e66c4439ad343b70d7ec0abedf72d006a", "021af3b63fbcf5d867a4b27ca161841bf129c759", "5e3f8c2ba2fb225c29ba343565d52b9661e7198e", "9d084dc32aac134b9941e1e7d111150c2fa1f24a", "333ea43ab30ae453d6bd847360cd475275e0acbf", "1b4df92d7f0d9393103cafbdbc512c52a90296b8", "073540ca0aaf15c28f9571707dd846b746247d7b", "4e7c51bc9cdd81655912b0947a628c5d7f8c14ff", "d963fb7cd968666170361a4485df48c807bb85d0", "8259100eae4e956caa77e8c8b6d0db66b16da6f8", "42d612c985d2138b6f0134507f4af8dd52cc5e17", "9906d3bbb3061954c38914cebeccae6a80ef9c42", "5f4599513bc71e6c8ef48408bc4e27afb4e76806", "39796704851a76709671e7c3e10538ba4dd856fe", "59e145dfef77797b57b28d71b019c2e8b6dd9f01", "6bc6852208a32278a2ef88bbfaf57b2041405f5d", "433d6e67bb7c99e58cbc8ba13a7cb12f9c0da32c", "bd594733417f634255ebefa856ea0a687d550263", "1469b0cbb109c2a788a346dd0480070de8334dea", "4dba9547af0be58d1fb0a8a3d06b1ddd36084133" ], "paperAbstract": "While gradual typing has proven itself attractive to programmers, many systems have avoided sound gradual typing due to the run time overhead of enforcement. In the context of sound gradual typing, both anecdotal and systematic evidence has suggested that run time costs are quite high, and often unacceptable, casting doubt on the viability of soundness as an approach. \nWe show that these overheads are not fundamental, and that with appropriate improvements, just-in-time compilers can greatly reduce the overhead of sound gradual typing. Our study takes benchmarks published in a recent paper on gradual typing performance in Typed Racket (Takikawa et al., POPL 2016) and evaluates them using a experimental tracing JIT compiler for Racket, called Pycket. On typical benchmarks, Pycket is able to eliminate more than 90% of the gradual typing overhead. While our current results are not the final word in optimizing gradual typing, we show that the situation is not dire, and where more work is needed. \nPycketâ\u0080\u0099s performance comes from several sources, which we detail and measure individually. First, we apply a sophisticated tracing JIT compiler and optimizer, automatically generated in Pycket using the RPython framework originally created for PyPy. Second, we focus our optimization efforts on the challenges posed by run time checks, implemented in Racket by chaperones and impersonators. We introduce representation improvements, including a novel use of hidden classes to optimize these data structures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133878" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4261e50b407355c9dfc0394741a2fae78fea3d43", "sources": [ "DBLP" ], "title": "Sound gradual typing: only mostly dead", "venue": "PACMPL", "year": 2017 }, "426549c65f3594d5c4c889424ecfeea1ccc2536f": { "authors": [ { "ids": [ "14577845" ], "name": "Seher Acer" }, { "ids": [ "2064658" ], "name": "R. Oguz Selvitopi" }, { "ids": [ "1731638" ], "name": "Cevdet Aykanat" } ], "doi": "10.1007/978-3-319-64203-1_45", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_45", "entities": [ "Sparse matrix" ], "id": "426549c65f3594d5c4c889424ecfeea1ccc2536f", "inCitations": [], "journalName": "", "journalPages": "625-637", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_45" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/426549c65f3594d5c4c889424ecfeea1ccc2536f", "sources": [ "DBLP" ], "title": "Addressing Volume and Latency Overheads in 1D-parallel Sparse Matrix-Vector Multiplication", "venue": "Euro-Par", "year": 2017 }, "42685a7f175b44c3365d20f41853e18c7998e2b7": { "authors": [ { "ids": [ "2115432" ], "name": "Su Yong Kim" }, { "ids": [ "30685860" ], "name": "Sangho Lee" }, { "ids": [ "33875038" ], "name": "Insu Yun" }, { "ids": [ "1706290" ], "name": "Wen Xu" }, { "ids": [ "2767582" ], "name": "Byoungyoung Lee" }, { "ids": [ "3085949" ], "name": "Youngtae Yun" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" } ], "doi": "", "doiUrl": "", "entities": [ "Concolic testing", "Crash (computing)", "Cryptography", "Microsoft Windows", "Operating system", "Privilege escalation", "Run time (program lifecycle phase)", "Software bug", "Symbolic execution", "Test automation", "Vulnerability (computing)" ], "id": "42685a7f175b44c3365d20f41853e18c7998e2b7", "inCitations": [ "020af9e8d35b7f6ca563397a8e82778dfa7dac7b", "2968fdf952edd08d2e7b2f303cba2339e4ee8c40" ], "journalName": "", "journalPages": "689-701", "journalVolume": "", "outCitations": [ "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "06f7617a591dc6931ca0cbebccad508da93ff433", "169b32238d98d4a29b8fa4e4b9528c4bec0cbaa2", "2d6495c5fd084edbf2a320aaae4fe65606a9fc7c", "889a6da567fc63dced1d145e0244964c1169fcb7", "1f4175655f7c73803ac773ad65feeb3fd6dab0a5", "0719b9670c8580db76547497df39caabdc20fc32", "192a76caa8c3cea4e4750aed47b36862f062fa3e", "063586121decdcbbdbb47404f2654806d9fc2ceb", "95baae72c5fcca4038339c350556dd6143d9a263", "45d0737424b37a1607f992d5e23bcedc7adbb3b0", "23044aaddf05d0f74940614d3510bc57368deb4e", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "642fb2cdcd5b02a97d08f06a9dbff9829f63a5c0", "11a83152766239e535ba4d01f8b0c1f8502d0190", "11443efe465ad544f478524da6c66c085b16e28b", "2a43e20d932a0da61edcfdf5373ba4ecbc294f7d", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "d23980847222dbb57384fe95828b94a313f4735d", "2e36bb1f7499eca3e0a62ecb574efe5ebd09aa34", "0d1d0900cf862f11d3d7812c01d28be27c71a6c7", "15a7fedd9081aba61571b79499cc3ad0fd52070c", "072b37d00d0960f4dfab882eaa59c20154b14b7f", "6ea63d09993b9a268689790ea8d25bc36345497e", "15883946146cf0dde6799e5ee5147f2ec839bf9f", "026128c44bfd1647137511e05cc5e68087f30424", "dc954931e0051aa681b3dd57a2f8f90b36c0a925", "0f2d6f8769060937fe32207eda9e257ca95dc08c", "25f9a83ec783dd58789de14435ad9a9f772c3b50", "03bb7b695ac75061248b72445a45b348e2bceceb", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "30dd12a894eff2a488c83f565bc287b4dd03c0cc", "b00672fc5ff99434bf5347418a2d2762a3bb2639", "db7c853361ab9da5e1e5847771e996411e578dbb", "2c21f9488edfb2586327528bb59461a41363fc42", "11f7876aa83d79c90c7ddb49b01186c80f6777b5", "1aabe6009e67aa3d2a33b8de3ee8683a1d675496", "1f7e5e582663868ed2f6763f98066ca278177a61" ], "paperAbstract": "Discovering the security vulnerabilities of commercial off-the-shelf (COTS) operating systems (OSes) is challenging because they not only are huge and complex, but also lack detailed debug information. Concolic testing, which generates all feasible inputs of a program by using symbolic execution and tests the program with the generated inputs, is one of the most promising approaches to solve this problem. Unfortunately, the state-of-the-art concolic testing tools do not scale well for testing COTS OSes because of state explosion. Indeed, they often fail to find a single bug (or crash) in COTS OSes despite their long execution time. In this paper, we propose CAB-FUZZ (Context-Aware and Boundary-focused), a practical concolic testing tool to quickly explore interesting paths that are highly likely triggering real bugs without debug information. First, CAB-FUZZ prioritizes the boundary states of arrays and loops, inspired by the fact that many vulnerabilities originate from a lack of proper boundary checks. Second, CAB-FUZZ exploits real programs interacting with COTS OSes to construct proper contexts to explore deep and complex kernel states without debug information. We applied CAB-FUZZ to Windows 7 and Windows Server 2008 and found 21 undisclosed unique crashes, including two local privilege escalation vulnerabilities (CVE2015-6098 and CVE-2016-0040) and one information disclosure vulnerability in a cryptography driver (CVE2016-7219). CAB-FUZZ found vulnerabilities that are non-trivial to discover; five vulnerabilities have existed for 14 years, and we could trigger them even in the initial version of Windows XP (August 2001).", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_lee.pdf", "https://taesoo.gtisc.gatech.edu/pubs/2017/kim:cab-fuzz.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/kim", "https://taesoo.gtisc.gatech.edu/pubs/2017/kim:cab-fuzz-slides.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-kim.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4268/5a7f175b44c3365d20f41853e18c7998e2b7.pdf", "s2Url": "https://semanticscholar.org/paper/42685a7f175b44c3365d20f41853e18c7998e2b7", "sources": [ "DBLP" ], "title": "CAB-Fuzz: Practical Concolic Testing Techniques for COTS Operating Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "4284153a0bf0aa3d0f94ad3113f4d117e4767bef": { "authors": [ { "ids": [ "2345352" ], "name": "Matteo Campanelli" }, { "ids": [ "1734147" ], "name": "Rosario Gennaro" }, { "ids": [ "2642595" ], "name": "Steven Goldfeder" }, { "ids": [ "2648302" ], "name": "Luca Nizzardo" } ], "doi": "10.1145/3133956.3134060", "doiUrl": "https://doi.org/10.1145/3133956.3134060", "entities": [ "Bitcoin", "Contingency (philosophy)", "Retrievability", "SHA-2", "Server (computing)", "Sudoku", "Trusted third party", "Zero" ], "id": "4284153a0bf0aa3d0f94ad3113f4d117e4767bef", "inCitations": [ "7e32e068c7471c5dfd139c8a563e4644dd3d54eb", "48bc4d6226bc147f0657e75b90145a1333777a34", "46e46c77423fcaf6e4a435fecca4430b1e78bd5d", "8ee11249c02c0a86cd7a759cfdf3ea2705b80d04", "e389d6f7929227999908284168dd1c819008824a", "10480843ca8804f7fadca50173001ea1166d6dba", "26ab9c27d995dadd553614045361ffb1afba9008", "032cd72078d8ed4795a71f5b27036e8888c39742" ], "journalName": "", "journalPages": "229-243", "journalVolume": "", "outCitations": [ "0dfe484fec2cb8ae112d0363304864c5b57ec88c", "4c65005c8822c3117bd3c3746e3a9b9e17386328", "345e83dd58f26f51d75e2fef330c02c9aa01e61b", "70d866ec61b0353e75f13aee5b7e6bdb9b1aec77", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "8456b670e1e0f65e714b90db04e67eae7df3819f", "0b7e6c5d49b7681fa7426b86040072e3b36a2223", "1d9cf87fa6d6175a2c1543afff263113657765f6", "b0f0a5bd5fa074d1720fb23c47d67b539e4c4591", "5c07dadf28f3cfdd67ab60a12d3ea9860bcd8b24", "5b298ac2c0735142b1b365ef2edfb3aa03f9eb01", "f65ee3a9f171da68b57039a5d5f2f1ad70798488", "46e9d622600e2a116feee553017f74c46763ca7a", "27fddee0f2ecec9be32389bd25382ae73c2a55b5", "cbef8f2802cc2b0bc60c66cfa3f465d47e9e86c4", "153aec6ba6d8eadd4ece50f0ad469203859a3ff6", "2459b15dcd7c8d383980c0a118c0983d4ec010d5", "75841d6fdb1c37b92063cae58a0d19d3cd1d1476", "1e4b941215d539981086f599ce74fa8e48184eb9", "e298114c5abac2b048020d658ecfb7d73d6a422a", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "d987feebe58c6e315cca4249dc63c1c576b452cf", "46e46c77423fcaf6e4a435fecca4430b1e78bd5d", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "1f8278af786cfae1770def84091c3116f7b2b8c6", "2eb315952f6a2e342b19cf95287c8a0b1f2c36fa", "362246709de205ec0ac5b34e07306839c38d5a3a", "2a266546c2609f079529688de7acbe0213f47373", "09e73a08ee516df2d69ae6a6126bb05ff58e2042", "5cbfe46f4b026f8dee4afb1e788236b3fdf08b81", "c27684f2fe5a85fe2871f693edc46061d0ecb20d", "446961b27f6c14413ae6cc2f78ad7d7c53ede26c" ], "paperAbstract": "Zero Knowledge Contingent Payment (ZKCP) protocols allow fair exchange of sold goods and payments over the Bitcoin network. In this paper we point out two main shortcomings of current proposals for ZKCP, and propose ways to address them.\n First we show an attack that allows a buyer to learn partial information about the digital good being sold, without paying for it. This break in the zero-knowledge condition of ZKCP is due to the fact that in the protocols we attack, the buyer is allowed to choose common parameters that normally should be selected by a trusted third party. We implemented and tested this attack: we present code that learns, without paying, the value of a Sudoku cell in the \"Pay-to-Sudoku\" ZKCP implementation. We also present ways to fix this attack that do not require a trusted third party.\n Second, we show that ZKCP are not suited for the purchase of digital services} rather than goods. Current constructions of ZKCP do not allow a seller to receive payments after proving that a certain service has been rendered, but only for the sale of a specific digital good. We define the notion of Zero-Knowledge Contingent Service Payment (ZKCSP) protocols and construct two new protocols, for either public or private verification. We implemented our ZKCSP protocols for Proofs of Retrievability, where a client pays the server for providing a proof that the client's data is correctly stored by the server.We also implement a secure ZKCP protocol for \"Pay-to-Sudoku\" via our ZKCSP protocol, which does not require a trusted third party.\n A side product of our implementation effort is a new optimized circuit for SHA256 with less than a quarter than the number of AND gates of the best previously publicly available one. Our new SHA256 circuit may be of independent use for circuit-based MPC and FHE protocols that require SHA256 circuits.", "pdfUrls": [ "http://stevengoldfeder.com/papers/ZKCSP.pdf", "http://doi.acm.org/10.1145/3133956.3134060", "http://eprint.iacr.org/2017/566", "http://diyhpl.us/~bryan/papers2/bitcoin/Zero-knowledge%20contingent%20payments%20revisited:%20Attacks%20and%20payments%20for%20services.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4284153a0bf0aa3d0f94ad3113f4d117e4767bef", "sources": [ "DBLP" ], "title": "Zero-Knowledge Contingent Payments Revisited: Attacks and Payments for Services", "venue": "CCS", "year": 2017 }, "429b2866bd20df130eceb147034a2dbcd026637c": { "authors": [ { "ids": [ "3521193" ], "name": "Yong Zhao" }, { "ids": [ "3446210" ], "name": "Kun Suo" }, { "ids": [ "40227618" ], "name": "Luwei Cheng" }, { "ids": [ "1786877" ], "name": "Jia Rao" } ], "doi": "10.1145/3135974.3135975", "doiUrl": "https://doi.org/10.1145/3135974.3135975", "entities": [ "Blocking (computing)", "Central processing unit", "Cloud computing", "Critical section", "High- and low-level", "Hypervisor", "Institute for Operations Research and the Management Sciences", "Interference (communication)", "Library for WWW in Perl", "Light-weight process", "Linux", "Linux", "Load balancing (computing)", "Lock (computer science)", "Loop heat pipe", "Multithreading (computer architecture)", "Operating system", "Preemption (computing)", "Scheduler activations", "Scheduling (computing)", "Semiconductor consolidation", "Server (computing)", "Thread (computing)", "Virtual machine" ], "id": "429b2866bd20df130eceb147034a2dbcd026637c", "inCitations": [], "journalName": "", "journalPages": "222-234", "journalVolume": "", "outCitations": [ "2707689d8751e98f67000aa50f0d1b52b56db1e5", "6666169c758925a2950c8de1211c9b412312573c", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "008f16f7af27244b507659be26bebb8bb07aded3", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "09cef59336519ce93d15841bc2756a79ce13477d", "16e367708e50a9ed6228334c9d49f4db0dab4cd8", "1ecd36058e48734213c81728f42ff798a2c52833", "7c833a334df551456885bd8b55d63753afac1fae", "76b73a657ef1cb543790acc99fc8abc80dbe4fc7", "6a285b0a2243223ee6905692d79b4a8d39f5af5e", "51e878ed0979919041030f871f6e34531ca39750", "14153a923dabe1a1415e93df15de6f7f1fa54b1b", "ca6e70cca64c928872a8cd137515d72708b58a69", "294ad206a120a519cfd99294c8b5e004dcc06abf", "067c7857753e21e7317b556c86e30be60aa7cac0", "136c75e41eb66c85aab922c7fdf62820d63b139f" ], "paperAbstract": "The wide adoption of SMP virtual machines (VMs) and resource consolidation present challenges to efficiently executing multi-threaded programs in the cloud. An important problem is the semantic gaps between the guest OS and the hypervisor. The well-known lock-holder preemption (LHP) and lock-waiter preemption (LWP) problems are examples of such semantic gaps, in which the hypervisor is unaware of the activities in the guest OS and adversely deschedules virtual CPUs (vCPUs) that are executing in critical sections. Existing studies have focused on inferring a high-level semantic state of the guest OS to aid hypervisor-level scheduling so as to avoid the LHP and LWP problems.\n In this work, we find a reverse semantic gap - the guest OS is oblivious of the scheduling events at the hypervisor, leaving the potential of addressing the LHP and LWP problems in the guest OS unexploited. Inspired by scheduler activations (SAs) in hybrid threading, we proposed interference-resilient scheduling (IRS), a guest-hypervisor coordinated approach to enhancing load balancing in the guest. IRS informs the guest OS before vCPU preemption happens at the hypervisor to activate in-guest load balancing. As such, critical threads on preempted vCPUs can be migrated to other running vCPUs so that the LHP and LWP problems are all alleviated. Experimental results with Xen and Linux guests show as much as 42%, 43%, and 46% performance improvement for parallel programs with blocking, spinning synchronizations, and multithreaded server workloads, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135975" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/429b2866bd20df130eceb147034a2dbcd026637c", "sources": [ "DBLP" ], "title": "Scheduler activations for interference-resilient SMP virtual machine scheduling", "venue": "Middleware", "year": 2017 }, "42a5af08a9cd396ff05540b4f928f58c2abb0cec": { "authors": [ { "ids": [ "1967881" ], "name": "Marius Poke" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" }, { "ids": [ "2272728" ], "name": "Colin W. Glass" } ], "doi": "10.1145/3078597.3078598", "doiUrl": "https://doi.org/10.1145/3078597.3078598", "entities": [ "Algorithm", "Atomic broadcast", "Distributed computing", "Failure rate", "Fault tolerance", "InfiniBand", "Overlay network", "Scalability", "Throughput", "Web server" ], "id": "42a5af08a9cd396ff05540b4f928f58c2abb0cec", "inCitations": [ "522256622fc911ebdb0270159f52451544d0277e", "1f2f1babd3624e9f09bec791e2d43f49279c1f09", "53738491f60b5bfca5fae5e5a4fe53727d2d2786", "d01ac7040c253b941192dcd7710635d47d837609", "340d6db56d94623ac090599cf9ea5287370607ef" ], "journalName": "", "journalPages": "205-218", "journalVolume": "", "outCitations": [ "8d63b44ea043fc3c2b0ec90b2ffbbf84ba446674", "b9e8407809babd5c8fe3a5334e0660f80f704767", "edf84a4b216fa0b01354aea5e8946c62bbce22dc", "0d27a23d03984cb635d391031e958782d87b871d", "4af63ed343df388b6353b6fc77c7137d27822bf4", "1220e4a011c46804d4369b5580dc7fb6e387af54", "00c181b8b64e824fbe0172339f1e4560b557fab5", "f8ef7be3d80d1d4b001ecd9e8593c8c4954c52e6", "6f379bb20a3051a21ce64540dd66ed97c8ba5b4d", "00e3756119a91432622f6982b59ecd24a1340fbe", "df5b06a8e3d77fb312407d3da2ca8888b78dbd1f", "06154716d6d51256ed2bb014ef65ec8b5d41aa26", "a2514450633e633b6213c71cd3d153f8e8bdbe04", "100846b5e7dc9c0d967059146371aa3f67e2c7e4", "0bcf2ffd821dd6382b8c8c1cfcb20ca5dea84db5", "00da0fe74a7dfdc6bae623781e517b9bba488c0f", "9028fe4e30d51bfdf494a36d2b8c4bcfb10cfe8a", "d12d1289d2384c2ce642f01855637b9f0519e189", "5a8e3f36ad9db8756e735e3281489fbd18df6f65", "14dbefbdc471aa9eccdedb6c3688d16248865194", "efb1a85cf540fd4f901a78100a2e450d484aebac", "9a3bf49876037108551bfb348d8c70a50b168911", "a3dcc1d2d083f6ed8371e6619557cf7ffdc4b410", "b129f84262024128ee64300ab257744b0b5ed8fb", "a8589e694fd53e073366bb43dc67c18632eacac6", "3cc2336cb701ab40273d0b5603064a70a209b4c6", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "fc3fbb4c76448e8968f8a19f076d133b2e7a2849", "062c47d2a3afa47f42c6d97d72990b53a48ee9c6", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "550285725684e2d286ffd9fa5cebdc52d7c4f860", "aa931bfc67b3e7b56671e14facdfe7a85d26992a", "5b3f43a02fe5bce776833d95d5a2b8afc904b375", "152f85e4bd2853a458d1350bb64d4a6adca24832", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "1c504129dc8296d6e461eb7ec59f6f23ff36ee5c", "261893f4c8a7c311a97249a8f42071c566372493", "aae636bd99bc4bae4cd4afcfa4621ef573a55c26", "02b1103e592fa6bf0499e27f1519692441fad557", "173123116b4a54ec3307e508532d99e84447708c", "74ce9ab22a2b957de1e27fea1fd97ecc76ee1d77", "4795fab9ef60ef9a91b1920c467c5d874f6bd901", "036ebe81fc7bd9000c3edda83fa30bee03fedc1a", "cc2c743ea08757a2fc47edbe1206ab7b055cf511", "01acb4d6bfc7b289a7a94ee0835eca83d1c2744c", "d01ac7040c253b941192dcd7710635d47d837609", "06d8aa948ed0ff654f772439c00711dfe7fa3d1a", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "155ca30ef360d66af571eee47c7f60f300e154db", "569de2eececd3adb7219d63eb85e4bdc63486c42" ], "paperAbstract": "Many distributed systems require coordination between the components involved. With the steady growth of such systems, the probability of failures increases, which necessitates scalable fault-tolerant agreement protocols. The most common practical agreement protocol, for such scenarios, is leader-based atomic broadcast. In this work, we propose AllConcur, a distributed system that provides agreement through a leaderless concurrent atomic broadcast algorithm, thus, not suffering from the bottleneck of a central coordinator. In AllConcur, all components exchange messages concurrently through a logical overlay network that employs early termination to minimize the agreement latency. Our implementation of AllConcur supports standard sockets-based TCP as well as high-performance InfiniBand Verbs communications. AllConcur can handle up to 135 million requests per second and achieves 17x higher throughput than today's standard leader-based protocols, such as Libpaxos. Thus, AllConcur is highly competitive with regard to existing solutions and, due to its decentralized approach, enables hitherto unattainable system designs in a variety of fields.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078598" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/42a5af08a9cd396ff05540b4f928f58c2abb0cec", "sources": [ "DBLP" ], "title": "AllConcur: Leaderless Concurrent Atomic Broadcast", "venue": "HPDC", "year": 2017 }, "42b352ef7835dc97ac85a3dcf6b3abc83fca2f21": { "authors": [ { "ids": [ "4239456" ], "name": "Sukhan Lee" }, { "ids": [ "2317634" ], "name": "Yuhwan Ro" }, { "ids": [ "3173425" ], "name": "Young Hoon Son" }, { "ids": [ "2077514" ], "name": "Hyunyoon Cho" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" }, { "ids": [ "2575874" ], "name": "Jung Ho Ahn" } ], "doi": "10.1109/IISWC.2017.8167762", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167762", "entities": [ "Central processing unit", "Computer data storage", "Double data rate", "Dynamic random-access memory", "Memory module" ], "id": "42b352ef7835dc97ac85a3dcf6b3abc83fca2f21", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "110-111", "journalVolume": "", "outCitations": [ "9ca87149f9e7cb0811c881ecac79ef02c87e1716", "2644b8562292e2492459db3ed214d3330ef7712d", "08632fe2b934ed15d3499e7321282c81adc2c390", "02e965debeaf59e6f93adede60d7e39004e77fcc", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "290849abe19f6af0c09ff0dc1b504858743c0150", "8b10b13fb495101d1e4eb768907cff05e3bd9315" ], "paperAbstract": "As servers are equipped with more memory modules each with larger capacity, main-memory systems are now the second highest energy-consuming component in big-memory servers and their energy consumption even becomes comparable to processors in some servers. Meanwhile, it is critical for big-memory servers and their main-memory systems to offer high energy efficiency. Prior work exploited mobile LPDDR devices' advantages (lower power than DDR devices) while attempting to surmount their limitations (longer latency, lower bandwidth, or both). However, we demonstrate that such main memory architectures (based on the latest LPDDR4 devices) are no longer effective. This is because the power consumption of present DDR4 devices has substantially decreased by adopting the strength of mobile and graphics memory whereas LPDDR4 has sacrificed energy efficiency and focused more on increasing data transfer rates; we also exhibit that the power consumption of DDR4 devices can substantially vary across manufacturers. Moreover, investigating a new energy-saving feature of DDR4 devices in depth, we show that activating this feature often hurts overall energy efficiency of servers due to its performance penalties.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167762" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/42b352ef7835dc97ac85a3dcf6b3abc83fca2f21", "sources": [ "DBLP" ], "title": "Understanding power-performance relationship of energy-efficient modern DRAM devices", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "42dba360569d6a8b6eaaf7922dcac85e01cb5924": { "authors": [ { "ids": [ "1844903" ], "name": "Ali Pourmiri" }, { "ids": [ "1743607" ], "name": "Mahdi Jafari Siavoshani" }, { "ids": [ "1708300" ], "name": "Seyed Pooya Shariatpanahi" } ], "doi": "10.1109/IPDPS.2017.24", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.24", "entities": [ "Distributed computing", "Load balancing (computing)", "Power of two", "Randomized algorithm", "Server (computing)", "Simulation", "Time complexity" ], "id": "42dba360569d6a8b6eaaf7922dcac85e01cb5924", "inCitations": [ "054155d089205a85a3d0acb3a713ad5092095932", "f71c309987c6f6c9d7452a8a3dc817dbcb60f53d" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1068-1077", "journalVolume": "", "outCitations": [ "529462c0d6d8aab16b4cd76317439d1545f8b7fa", "73750f2ef08f6943495f8d7d3de7a799908901bd", "20480eb94103d5ed4b5925a86430c872e4fabfdc", "34d537178cf343a1b1036616938659b4ba76cd46", "0f72abea4de4d3b1a47bfa114163b318c9507827", "00b26163a1b2c0ddd49766ef4614ec4024f732ab", "2fe57efa9d15d4db871173e29032c1687ef33292", "8cf989ebadf69674c8ddc56f645ad9e61e7ad431", "4cead48e2eac91560105871b78268e3164eb382b", "075321f87a446af752efa991acc30819377c7788", "3f065d08c925153314c77015b99aa843859f4cfe", "346965e005bbffdfb02d6536b057859dc51c879b", "6e044cdfbe2e8e3bbfa67e74e55d69c974aa6e97", "406d6f7a2a21d3b31a32c2bf306132def7163e06", "337e4b7f57ccbb7485950b93da9c5bb4ec4dc9ad", "4ef750a115b108f233623e6da2ab17cd266d8c59", "3d601571fdf082b3f2bbf16befbeaa6702664dff", "a3578399c4c73a17d93e4f2bbf190d65a669881e", "66e4fd683171f38fcf8719c4bf19b32f1a691698", "0bd1b088142f413345b0f2f2020a792811a76d71", "284495671bf2b16fca3f8784586e3d248ea10801", "2209890b596593c697dc8e1ab02517fbbaea21bf", "638c917d981915bc7a00bb0941cdd38111df51de", "15871f885a879ae12668a1ce0d95cc54dc53c8d3", "320778bd3e5c1e9b6e0438149964d56f3900730f", "4383810bfb6043a0cc4085f412db320cd095a925", "234e6be0d4238f76b3ac038ee422be39f391c625", "0056edb1bd74d150fdd385f74f3e6dc0534cc678", "1fc083dac294f88f912ee22645f3ddf0db2c73c7", "247beb4be0164c97878d2e6336d3922e2d595bbc", "a8bdf1665b828e13dd8bcc2c73bf94863685f964", "0160729ec657235f10cfe76dc892c08ae4d0f2e7", "401fa67d01d8725ee301c9a2464dead1879a4c53" ], "paperAbstract": "We consider load balancing in a network of caching servers delivering contents to end users. Randomized load balancing via the so-called power of two choices is a well-known approach in parallel and distributed systems that reduces network imbalance. In this paper, we propose a randomized load balancing scheme which simultaneously considers cache size limitation and proximity in the server redirection process. Since the memory limitation and the proximity constraint cause correlation in the server selection process, we may not benefit from the power of two choices in general. However, we prove that in certain regimes, in terms of memory limitation and proximity constraint, our scheme results in the maximum load of order Θ(log log n) (here n is the number of servers and requests), and at the same time, leads to a low communication cost. This is an exponential improvement in the maximum load compared to the scheme which assigns each request to the nearest available replica. Finally, we investigate our scheme performance by extensive simulations.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.24", "https://arxiv.org/pdf/1610.05961v1.pdf", "http://arxiv.org/abs/1610.05961", "https://arxiv.org/pdf/1610.05961v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/42dba360569d6a8b6eaaf7922dcac85e01cb5924", "sources": [ "DBLP" ], "title": "Proximity-Aware Balanced Allocations in Cache Networks", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4319055a9a2004c054582c8ad4c2c31e39b1fa56": { "authors": [ { "ids": [ "4165014" ], "name": "Ahmet \u00c7elik" }, { "ids": [ "33403976" ], "name": "Sreepathi Pai" }, { "ids": [ "1733030" ], "name": "Sarfraz Khurshid" }, { "ids": [ "2020926" ], "name": "Milos Gligoric" } ], "doi": "10.1145/3133918", "doiUrl": "https://doi.org/10.1145/3133918", "entities": [ "Backtracking", "Central processing unit", "Computational resource", "Data structure", "Executable", "Formal specification", "Graphics processing unit", "Software bug" ], "id": "4319055a9a2004c054582c8ad4c2c31e39b1fa56", "inCitations": [], "journalName": "PACMPL", "journalPages": "94:1-94:25", "journalVolume": "1", "outCitations": [ "4c5c96bfe52cf1d7d486245ecb26eb66eb875113", "088b7a02c000b5d3b9b45231f1ae2e1abddaed3c", "109c4450b7fdbf5c760bc8ee5c28bec3d1186c0e", "25010bbdf127101e1fd5adea5e15f45765b87b0f", "a05e223169ab022f800bf9f2664847919844cab9", "14a881960158ae4672cd110a21b613a6512ddd99", "0efda63ce44492d67dce5982175b91889be21fbc", "ed8b4d61fe7f10290c13e56fcf7e48cc232bea96", "3b14bd69d8fb14e43a111c39f6e3d954119d5d28", "7830505e9a9c68bb167db9245a4ce4d29a98e114", "4f6bcd2bffc305b8ba56933af332d9da82daeebc", "2920dd48c526b1158b8298b69304ab6a7da59b4f", "5adb8c1968ff62d86c60bb5781df3f26106d34fa", "3fe87f0079cc8a29adfabf5fadab69993cb16c65", "8fee93746c5752f2f85f76daae7a9cb4db326649", "5b975248796c2ee3f65b2f4430fd3be4d7e6191e", "20fd34fbcf9c5b7991b3c7d01b85b2e2f2b92a04", "072b37d00d0960f4dfab882eaa59c20154b14b7f" ], "paperAbstract": "Bounded exhaustive testing is an effective methodology for detecting bugs in a wide range of applications. A well-known approach for bounded exhaustive testing is Korat. It generates all test inputs, up to a given small size, based on a formal specification that is written as an executable predicate and characterizes properties of desired inputs. Korat uses the predicate's executions on candidate inputs to implement a backtracking search based on pruning to systematically explore the space of all possible inputs and generate only those that satisfy the specification. \n This paper presents a novel approach for speeding up test generation for bounded exhaustive testing using Korat. The novelty of our approach is two-fold. One, we introduce a new technique for writing the specification predicate based on an abstract representation of candidate inputs, so that the predicate executes directly on these abstract structures and each execution has a lower cost. Two, we use the abstract representation as the basis to define the first technique for utilizing GPUs for systematic test generation using executable predicates. Moreover, we present a suite of optimizations that enable effective utilization of the computational resources offered by modern GPUs. We use our prototype tool KoratG to experimentally evaluate our approach using a suite of 7 data structures that were used in prior studies on bounded exhaustive testing. Our results show that our abstract representation can speed up test generation by 5.68 times on a standard CPU, while execution on a GPU speeds up the execution, on average, by 17.46 times.", "pdfUrls": [ "https://www.cs.utexas.edu/~celik/papers/CelikETAL17KoratG.pdf", "http://users.ece.utexas.edu/~gligoric/papers/CelikETAL17intKorat.pdf", "http://doi.acm.org/10.1145/3133918" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4319055a9a2004c054582c8ad4c2c31e39b1fa56", "sources": [ "DBLP" ], "title": "Bounded exhaustive test-input generation on GPUs", "venue": "PACMPL", "year": 2017 }, "434018137a965b8d0f5328aa26ade528f9905f32": { "authors": [ { "ids": [ "2946551" ], "name": "Binzhang Fu" }, { "ids": [ "1747205" ], "name": "John Kim" } ], "doi": "10.1145/3079856.3080249", "doiUrl": "https://doi.org/10.1145/3079856.3080249", "entities": [ "Algorithm", "Blocking (computing)", "Communication endpoint", "IP address blocking", "Network congestion", "Network performance", "Routing", "Simulation", "Throughput", "VC dimension", "Virtual channel" ], "id": "434018137a965b8d0f5328aa26ade528f9905f32", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "691-702", "journalVolume": "", "outCitations": [ "1c634ce972c6177f9d3813169967729c99f4bb9a", "03771342b2ec13944bc7f34c65af9a2d843bc428", "6a26e31b81d3eda9e90dfeae39a2e48012297e75", "9c4b6c885bfc6038cdac56763663880e0f2624e6", "bcf4b4e4ffdc26bf1f162f2e9652a511bf7164f8", "a0f972f1de91f07852b3b5bc6350b5f51e350ce6", "3ab1c2d1cc4763c3290a68fe01eb176b43ec8199", "d589123c9665f52c1c06a0b3c80aa94c423a8908", "0eadb23fd9a83b28c6c861e8a40620b3a832ce50", "100d5bf9d9f760eaeaa61f89e81488a7d3808383", "54ade7ae16c9495b238d260de2de79c1a588453f", "14093ec7179ca5e4331ee914c946ca6c9b4f257b", "3191f28b942bd428fd4df250afc15bf68b402362", "251f9180bd7c3dd0e3c7743cc9bea7c308b97cdc", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "1b216be638fbb0a9099bbc1aabe4319676d5f573", "0dd57dbc7e47ed7e27affd8d289585005d4d62a5", "5f8991828def57d2f0cda942566afff56740d150", "b194fdf11c9651bd5fa1096599cbd1c4577530ab", "43649ab7ecbfa60f4acdc4f0729fc0286767b05a", "6c61473130ccb2009717a28962096d146fbde038", "8576d87051f7c252de4ef89e475d89de8e9a45c9", "f4cb0ddb31cd1be6cd56d9d339429ef970ed29e3", "89922c99a0b090f09ce81684826c75aeff5a176c", "f6402c54f14fe3a399f90577cb0a477a3d5c978d", "a15bc58fa496b6cca937713723f19f45380fc2fe", "8b3b893cd0c8001630562611286221b3ca48947b", "aed84410735fa23b7faebfba2432d79e1e980e4c", "2b8f7ce8460e7e183de754b09cfc0e624476d7f2", "02cd35fe566b9cd6e0b7af1ee8f88135b36c9eba", "d3216e8805687c458a82bea952ca4b1c6f4548b9", "373b88e34295875fdab7f6cdee1438edbd0571cb", "56bf58eb183dbe8f6d420fae194f2c2be35fc850", "045d80100cebdcff78d5fe232e08133d17ab42ab", "2f5e593d29a5eb8b3f7c65e4e5c740b792933757" ], "paperAbstract": "Routing algorithms can improve network performance by maximizing routing adaptiveness but can be problematic in the presence of endpoint congestion. Tree-saturation is a well-known behavior caused by endpoint congestion. Adaptive routing can, however, spread the congestion and result in thick branches of the congestion tree -- creating Head-of-Line (HoL) blocking and degrading performance. In this work, we identify how ignoring virtual channels (VCs) and their occupancy during adaptive routing results in congestion trees with thick branches as congestion is spread to all VCs. To address this limitation, we propose Footprint routing algorithm -- a new adaptive routing algorithm that minimizes the size of the congestion tree, both in terms of the number of nodes in the congestion tree as well as branch thickness. Footprint achieves this by regulating adaptiveness by requiring packets to follow the path of prior packets to the same destination if the network is congested instead of forking a new path or VC. Thus, the congestion tree is dynamically kept as slim as possible and reduces HoL blocking or congestion spreading while maintaining high adaptivity and maximizing VC buffer utilization. We evaluate the proposed Footprint routing algorithm against other adaptive routing algorithms and our simulation results show that the network saturation throughput can be improved by up to 43% (58%) compared with the fully adaptive routing (partially adaptive routing) algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080249" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/434018137a965b8d0f5328aa26ade528f9905f32", "sources": [ "DBLP" ], "title": "Footprint: Regulating routing adaptiveness in Networks-on-Chip", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "43afc757d291ff48795d9ca424d6c281082a7a05": { "authors": [ { "ids": [ "1737944" ], "name": "Philip A. Bernstein" }, { "ids": [ "1793711" ], "name": "Sebastian Burckhardt" }, { "ids": [ "39632191" ], "name": "Sergey Bykov" }, { "ids": [ "2174285" ], "name": "Natacha Crooks" }, { "ids": [ "1829445" ], "name": "Jose M. Faleiro" }, { "ids": [ "2282387" ], "name": "Gabriel Kliot" }, { "ids": [ "27011205" ], "name": "Alok Kumbhare" }, { "ids": [ "2399069" ], "name": "Muntasir Raihan Rahman" }, { "ids": [ "36704918" ], "name": "Vivek Shah" }, { "ids": [ "2893116" ], "name": "Adriana Szekeres" }, { "ids": [ "34590342" ], "name": "Jorgen Thelin" } ], "doi": "10.1145/3133931", "doiUrl": "https://doi.org/10.1145/3133931", "entities": [ "Actor model", "Cache (computing)", "Cache coherence", "Distributed cache", "Eventual consistency", "Fault tolerance", "Linearizability", "Locality of reference", "Multitier architecture", "Open-source software", "Programming model", "Scalability", "Single-instance storage", "Throughput" ], "id": "43afc757d291ff48795d9ca424d6c281082a7a05", "inCitations": [ "540890501c622d87002d424d441ae7a5860c316c", "0473acbb3e927841e8dd47ed768061f0623e020d" ], "journalName": "PACMPL", "journalPages": "107:1-107:26", "journalVolume": "1", "outCitations": [ "583aa7cfcacdab3df24b9b3281d34763a22accef", "b143aaac2a3fe2fbac08c062bcdf0cc71d0181a6", "24829ced3be55a3a118d1d042699b899e4860a04", "206b20f225fc655dfac733b6f0bd8077ed86215e", "7960dcacda423270883a0939fa6b4ba293f6f629", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "1eb4e754d9b79f0806667302801fc9a0b50f88af", "5ad4db3c6e6bea5246e800681e44a2ae723e6412", "248d8dbb2016315e91d4e33515204f6d44380433", "7ee123ce7763d3886e8061f29729ecbeab1b4d80", "aacfbf0d34bc24dc3b72e56719ec083759a072ce", "02a9c6a346ad5fbbeac23245529dd0208d4a43a7", "624ab00ed715888d15e42246cb3c87fd13123082", "3dd4f937b4c9922a5c0c4027519c949ebbc7e98b", "e76dbeda3e7da0e7ce17dd192127f2800305965c", "23346a18e78062e586cab22195819eb0f18ffc66", "94c8661920066b0a995d45801bfd8d012a49e762", "1a3b939a7d669f8e72c8f4ae2c35af04f50a075d", "cd39f11ac1aa0d16db569e47e08760efd648f4b0", "155ca30ef360d66af571eee47c7f60f300e154db", "6816c447cc4d3d945e0452564ff5d3220e1fdcab", "0804ed47a40fbe6deb5ce93efe551086695ae393", "74cb6530b56e140ea29a57062578448117c1a292", "b57aaf54a444d2c4ab1d6627eef2832fd4c79e6c", "540890501c622d87002d424d441ae7a5860c316c", "dc2d652bc6ba61a7e90a7921e1e4f813af8c3814", "1664b784dd7d446ee8838e0eec5b980f61792007", "42142c121b2dbe48d55e81c2ce198a5639645030", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "2888c136064ff5527a0bb370ac1d9bf71939e066", "70fb409cea4b61aac7be2a65e0abeeb74a0825a2", "18a5f443299784479e78d9e77f175af57cb2fa2b", "1f6226cad38c791e91fa1bff905c66e7d421847a", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "5563ff2972232dfa8b75548d8ceeda044e4c6f1d", "6ea978b73c0655e5530c121e483f0de2f9d261fa", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "40c47420fdda6b715430153437ac77d62d1da6d8", "20c450f099b661c5a2dff3f348773a0d1af1b09b", "068e59b88a1230d709d99c83a45d3a5b91260810", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "a1ec9cdd6e261e7c8f9d43b39cc39aba9bb9d931", "33457f49553d918e912c2d8c54b81f4fd8a4c234" ], "paperAbstract": "Many service applications use actors as a programming model for the middle tier, to simplify synchronization, fault-tolerance, and scalability. However, efficient operation of such actors in multiple, geographically distant datacenters is challenging, due to the very high communication latency. Caching and replication are essential to hide latency and exploit locality; but it is not a priori clear how to combine these techniques with the actor programming model. \n We present Geo, an open-source geo-distributed actor system that improves performance by caching actor states in one or more datacenters, yet guarantees the existence of a single latest version by virtue of a distributed cache coherence protocol. Geo's programming model supports both volatile and persistent actors, and supports updates with a choice of linearizable and eventual consistency. Our evaluation on several workloads shows substantial performance benefits, and confirms the advantage of supporting both replicated and single-instance coherence protocols as configuration choices. For example, replication can provide fast, always-available reads and updates globally, while batching of linearizable storage accesses at a single location can boost the throughput of an order processing workload by 7x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133931", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/01/Geo-Orleans-TR-012717.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/10/Geo-Orleans-OOPSLA17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/43afc757d291ff48795d9ca424d6c281082a7a05", "sources": [ "DBLP" ], "title": "Geo-distribution of actor-based services", "venue": "PACMPL", "year": 2017 }, "43c0ff1070def3d98f548b7cbf523fdd4a83827a": { "authors": [ { "ids": [ "2124915" ], "name": "Jingyuan Chen" }, { "ids": [ "5462268" ], "name": "Hanwang Zhang" }, { "ids": [ "7792071" ], "name": "Xiangnan He" }, { "ids": [ "1743245" ], "name": "Liqiang Nie" }, { "ids": [ "1722649" ], "name": "Wei Liu" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080797", "doiUrl": "https://doi.org/10.1145/3077136.3080797", "entities": [ "Anisotropic conductive film", "Artificial neural network", "Collaborative filtering", "Division by zero", "Experiment", "Feature extraction", "Information" ], "id": "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "inCitations": [ "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "3b3e413978f6b40ad32ef91883c9ee03773e05e6", "5e3257540faa7bf220d0dda97085ceff18674f19", "96111aa2b0f3a8a8f53e5b1fb756c0d9122b7dd2", "630843b2dd2a6b1315e1fc71af5e47c1669906ba", "0a561cdda9964822c5fc4a52beba4dd6a18b2b14", "85de598fa1149dffff84efa3c16b4655099582d2", "1898b8b9789f187b2e34767ad8d7b2612e17d52a", "cb08c775fc4e001e9ba5554267c9cebbf36d6e6f", "102f41cb0df160453b1b88ecc836cb9fa2443318", "fe93121ff205bfd4dca834c2a8aa1fe6ec5649c5", "0c6338604754a40fefda9146d95eed176adc0bc6", "7a8fd8670b928b474c4dfeb1dc1898d08d545025" ], "journalName": "", "journalPages": "335-344", "journalVolume": "", "outCitations": [ "fc8d2643020c6208cf99778744e07fe01626316c", "52c0876b25a5721c4c6930d94d5308f0779734ec", "091aded505b84cf87c197875ccfde24d98a300c9", "10e1fb949e10d5fe99d5f1b32bb48d625149bce8", "1ed9721b58c6a6c9bcee61f6a8061b222ae453d3", "2607f0093fecd4fee5244d56fcf3f53ff22e949e", "b6bbda4a8cd2a7bdeb88ecd7f8a47e866823e32d", "2275762a28582716db92df6d525ed2481c7d7f14", "0aa2a4d259433016ebc899c496faea03c024c0bd", "0cba3aa5b8702c6eea0f2b435b3368db0888a664", "7c14c0d71882c35a8ee43e8ea68e214a5c43ed31", "e0e1a359c0617be201d4e9ddfd021ed950dc0940", "4a01961f300813505e85e967207a19d42bd95c0d", "44eafda243dad122f4cc4e378e71bce2402685cd", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "184b7281a87ee16228b24716ca02b29519d52eb5", "f9a4c7cb5a26fd3655e1aedcebd1724dda8aeefa", "2ef7d506b25731d0f3ec0c8f90b718b6e5bbd069", "071b16f25117fb6133480c6259227d54fc2a5ea0", "9226535b23ccbbf0030f98161c0ff54199830c7a", "01fcae344d2edb715bcc63a40b6052c0331741bd", "48b0a08bf96b8e28ee982867ce0a9f568b788a0a", "792305a1febd4ac6173fd86164990461a832664f", "bfcbd77d1d4fb1d3b2cf06dac779d2dfa67145ba", "675932329bb06f73c5e3894ed18f40c71283411e", "3c78ada32da2aac0ef2e23d16e786c24efd8b2a3", "130f21d2d1691af611dd32e3086122f8770c8ead", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "55e63d01b237281927d46856ff67776f06df25bf", "15f5721502c2905c555a4eb0a110d6fc211c1fb2", "0c0ff60e9d39c203929457d1ac3f840f8c8e9619", "66b7ad291f7ad0724fd8fabfa84da65794b43696", "5d06f630188a5ec9c05c4961eddbf9f24e2e6916", "623759a673b50ab8d5bc9d345640e2db77c2a508", "714544b7cf35a3b8bdc12fb1967624a38f257a42", "12441a74e709ddab53f9039cf507491df7b3840a", "a360526696a2698ad31dfca4c529e098d2dbdbd1", "1d60ae6720118273b02fd60b54c27441e17e719a", "e0913ff2f472b9c350daf263ebd18d55ac99f4a3", "1960c7d4365c1165283fc2304be7d09a853fb33d", "ea9d2a2b4ce11aaf85136840c65f3bc9c03ab649", "6338670193f9fdcfd3b6e7cec414f15ca906a85f" ], "paperAbstract": "Multimedia content is dominating today's Web information. The nature of multimedia user-item interactions is 1/0 binary implicit feedback (e.g., photo likes, video views, song downloads, etc.), which can be collected at a larger scale with a much lower cost than explicit feedback (e.g., product ratings). However, the majority of existing collaborative filtering (CF) systems are not well-designed for multimedia recommendation, since they ignore the implicitness in users' interactions with multimedia content. We argue that, in multimedia recommendation, there exists item- and component-level implicitness which blurs the underlying users' preferences. The item-level implicitness means that users' preferences on items (e.g. photos, videos, songs, etc.) are unknown, while the component-level implicitness means that inside each item users' preferences on different components (e.g. regions in an image, frames of a video, etc.) are unknown. For example, a 'view'' on a video does not provide any specific information about how the user likes the video (i.e.item-level) and which parts of the video the user is interested in (i.e.component-level). In this paper, we introduce a novel attention mechanism in CF to address the challenging item- and component-level implicit feedback in multimedia recommendation, dubbed Attentive Collaborative Filtering (ACF). Specifically, our attention model is a neural network that consists of two attention modules: the component-level attention module, starting from any content feature extraction network (e.g. CNN for images/videos), which learns to select informative components of multimedia items, and the item-level attention module, which learns to score the item preferences. ACF can be seamlessly incorporated into classic CF models with implicit feedback, such as BPR and SVD++, and efficiently trained using SGD. Through extensive experiments on two real-world multimedia Web services: Vine and Pinterest, we show that ACF significantly outperforms state-of-the-art CF methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080797", "http://www.ee.columbia.edu/~wliu/SIGIR17_attention.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/43c0ff1070def3d98f548b7cbf523fdd4a83827a", "sources": [ "DBLP" ], "title": "Attentive Collaborative Filtering: Multimedia Recommendation with Item- and Component-Level Attention", "venue": "SIGIR", "year": 2017 }, "43c36c242f0f7e1095227da0a04bb766d91fde04": { "authors": [ { "ids": [ "1929462" ], "name": "Suvinay Subramanian" }, { "ids": [ "2573160" ], "name": "Mark C. Jeffrey" }, { "ids": [ "3273348" ], "name": "Maleen Abeydeera" }, { "ids": [ "17856526" ], "name": "Hyun Ryong Lee" }, { "ids": [ "17790173" ], "name": "Victor A. Ying" }, { "ids": [ "1775477" ], "name": "Joel S. Emer" }, { "ids": [ "39783437" ], "name": "Daniel S\u00e1nchez" } ], "doi": "10.1145/3079856.3080218", "doiUrl": "https://doi.org/10.1145/3079856.3080218", "entities": [ "Algorithm", "Fractal", "HTML", "Parallel algorithm", "Parallel computing", "Programmer", "Speculative execution", "Swarm", "Transactional memory" ], "id": "43c36c242f0f7e1095227da0a04bb766d91fde04", "inCitations": [ "2ccf4fcca56f7d14579eab46c529fcdf06365cfc", "bc2b11d3a1aec4e3914f6a4e8712154ba6591c7e" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "587-599", "journalVolume": "", "outCitations": [ "19710fa0e64f36616e112c8a7b4e99ba4cb43c74", "106cfdc91f33ed647e8ca97e9d7ce495fa79dae3", "882f0458bff25138346c358da6f4e036f36f1b60", "2c1ea92d6a4237ede5ea112f1880710b25bec8b3", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "245c9703304dc058417f5503ea324bcde8b2eefd", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "11bc7335153854aaa78f45225794a8bd7e1f1b53", "044cafde686e811d1a6aa19a93fe97d0e4d8ab51", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "35d357020f53e6aba43fe5c8a42c07ad87be745f", "3c0d4b5f9085b659318cc74157aaf255194a5063", "6ef79198cc5f5e2993676130a5fda2c992c353a1", "f88dc7577badc423bfb84d9b620880be1bf9535f", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "29f766723ca752138855500084ced04503bfc9c8", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "62d2f81529a491dd1d6994c872f7a3fb6ac4d4a7", "3bf23f74bf33ed52f7c28587fab315610b27221a", "448f4144a1d818754d91d0821ece830501ae6f9f", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "1487996e2cd07b15dd42791cd5f567246f8b83e6", "fb51f2601f47355e8e729af794f889d6f7259284", "12fab45bab0c56ac8bb29f83f431d7233ee9232c", "3d68e6c36e36f47c74368c917436b249218177b7", "5557b730f22e3e90272d477ecfa82013649086c8", "5f2b280c68fd4de0d24e92e07115f68b3ef0110f", "4e068c978a66c48de248ae136b7a361bf0d80ca0", "13f6ddd72bcf62dcc13cf4515be29d48948b9693", "5d882e0b90ac280c10b0734f47b8fafebb353f15", "0c8fef219adbbd4eeae3498428fb6a334bc62a67", "0653e2ed9f683868cb4539eb8718551242834f6b", "00d6f98a44d6af28d0df625af4a2e93308ad7fde", "6bd6a0cc1a4bf62784d8573ca1aeafe2673dcc02", "2042b469be68653afcb2b7b38490c16369b4501a", "b1d14e2b28759afd361d50e14744224b654e205e", "40cb40b7812e019c1051e3a457a8643400b81d51", "6a870ca2e39d804b02bb450f81cac62df2ef024a", "14c39e511c16db80d8cffee4043ea7c553dbd3a4", "24ca5dcd1e96857f262b3cd0dddf0798e1f25c87", "ae96e0563298c23b777be98d110bc86963b896c0", "0948c0acfb779e551e5c2420081eab206f57f396", "786dc9e5ec7c77c2615671691357ed127f0ace50", "23b1c323dee6a97b43786dfeb9f95a7f6b78774c", "0d5a5a3235281ad2b9cca9bae3a2e8c0c7594470", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "8e6eb9705e4631b862d2e0583f086a148142d642", "29be34b502086bfb36e41721a6a7d4d4b518a579", "a16ae7a7367391f7baeb5085655c329af12683ce", "12ea331478054cafc6e67336e62f0105202f7dfe", "251cea0f5116219456b59950c8dd81691330b12b", "0743afcca25183b7fece8003b77eac3e4fb47656", "48f044ba3e7524413468d90a687c2a9ca3f2847c", "c9036d02b1fcee7432b42c1881ff572696b89dc2", "1e3778a9aa1283ca322bd5b60262f09595b3e0c0", "d8777ef17c18609781aa889055100603b13b2986", "a38901fd5b7143b30407435a373edafe96145561", "0dab54e85abfbf1e58487d913d82f930cd150fb3" ], "paperAbstract": "Most systems that support speculative parallelization, like hardware transactional memory (HTM), do not support nested parallelism. This sacrifices substantial parallelism and precludes composing parallel algorithms. And the few HTMs that do support nested parallelism focus on parallelizing at the coarsest (shallowest) levels, incurring large overheads that squander most of their potential.\n We present FRACTAL, a new execution model that supports unordered and timestamp-ordered nested parallelism. FRACTAL lets programmers seamlessly compose speculative parallel algorithms, and lets the architecture exploit parallelism at all levels. FRACTAL can parallelize a broader range of applications than prior speculative execution models. We design a FRACTAL implementation that extends the Swarm architecture and focuses on parallelizing at the finest (deepest) levels. Our approach sidesteps the issues of nested parallel HTMs and uncovers abundant fine-grain parallelism. As a result, FRACTAL outperforms prior speculative architectures by up to 88x at 256 cores.", "pdfUrls": [ "http://people.csail.mit.edu/mcj/talks/2017.fractal.slides.isca.pdf", "http://people.csail.mit.edu/emer/papers/2017.06.isca.fractal.pdf", "http://people.csail.mit.edu/suvinay/pubs/2017.fractal.isca.pdf", "http://doi.acm.org/10.1145/3079856.3080218", "https://people.csail.mit.edu/jshun/6886-s18/papers/SJALYES17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/43c36c242f0f7e1095227da0a04bb766d91fde04", "sources": [ "DBLP" ], "title": "Fractal: An execution model for fine-grain nested speculative parallelism", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "43ef02647ad28e9b31572dd47cce39a965336776": { "authors": [ { "ids": [ "2504006" ], "name": "Hari Cherupalli" }, { "ids": [ "3187702" ], "name": "Henry Duwe" }, { "ids": [ "7982124" ], "name": "Weidong Ye" }, { "ids": [ "8153371" ], "name": "Rakesh Kumar" }, { "ids": [ "1813088" ], "name": "John Sartori" } ], "doi": "10.1145/3123939.3123955", "doiUrl": "https://doi.org/10.1145/3123939.3123955", "entities": [ "Algorithm", "Central processing unit", "Computer security", "Design by contract", "Digital electronics", "Information flow", "Information flow (information theory)", "Information security", "Low-power broadcasting", "Microprocessor", "Secure by design", "Stock and flow", "Vulnerability (computing)" ], "id": "43ef02647ad28e9b31572dd47cce39a965336776", "inCitations": [], "journalName": "", "journalPages": "328-340", "journalVolume": "", "outCitations": [ "19ca5f86807610a7aed1008155c7105e43808d4f", "afa1b511c12ef6b20cc23f095561c9423fe645bf", "cba87856ab1fee9161fc21afb64f741acf2c4bf9", "2ed8c0aacd2d905a550a184fe861bfff6576281c", "9458915e0b7e9abfd9f9c24e35b036505c899a8b", "1a7160058a87a2a7dedd2f6e95f25892ec4f3d35", "3c11b4e74086db34430d5381031319cae83ce17a", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "13ad5b5fdd3ba3d74fa96a12450b726696fbfe77", "1af4c13abdb4a0440a7017bb3f72b335e0f7a24f", "636e0e7325e5fc96297b4385dbd34c6b14ebfa89", "0eedea89ee0626aa09139a37549e3da096aef4db", "5578045657a90d2db6ac86bb4afbe38c035fc6a5", "17bff708b1b6791db2dec8621a417c17aa79448d", "262d29b8400657b5cda4414470313f5ad679e6db", "26127796667203f4e015cc1f47072c24f3952356", "033614852cfe29708ddebf6cb3f846582f5dd7ba", "11443efe465ad544f478524da6c66c085b16e28b", "4165376a7aadf6a1c1acddd6fc236047b7becff1", "18a41be780aa642c84a2c6850397fab147e0ac27", "09dce8e6947261600ec145f4544ede7ae5dc437e", "563202938fb372dae6b9e0fcb65e2a7884b8eb3c", "24e74d608559e176ad01f95b3b1f3221e474f357", "8862841f91bea97d39872eb2aa954bd6e6b570da", "6af0aae393ec64ed761610e5c2e3f029b5d64686", "1abb651f5eb33d6a0c3c234c4c8a7dc2e9e47506", "3846f779e22fbd5d84e6ef8956be68fe496bdb6b", "4204fad49d84c19156fa8b08bbf7942cde8f5aa1", "14b40195928b2be688d74afbe82810f90d70dccc" ], "paperAbstract": "The growing movement to connect literally everything to the internet (internet of things or IoT) through ultra-low-power embedded microprocessors poses a critical challenge for information security. Gate-level tracking of information flows has been proposed to guarantee information flow security in computer systems. However, such solutions rely on non-commodity, secure-by-design processors. In this work, we observe that the need for secure-by-design processors arises because previous works on gate-level information flow tracking assume no knowledge of the application running in a system. Since IoT systems typically run a single application over and over for the lifetime of the system, we see a unique opportunity to provide application-specific gate-level information flow security for IoT systems. We develop a gate-level symbolic analysis framework that uses knowledge of the application running in a system to efficiently identify all possible information flow security vulnerabilities for the system. We leverage this information to provide security guarantees on commodity processors. We also show that security vulnerabilities identified by our analysis framework can be eliminated through software modifications at 15% energy overhead, on average, obviating the need for secure-by-design hardware. Our framework also allows us to identify and eliminate only the vulnerabilities that an application is prone to, reducing the cost of information flow security by 3.3× compared to a software-based approach that assumes no application knowledge.", "pdfUrls": [ "http://rakeshk.crhc.illinois.edu/micro17_cam.pdf", "http://doi.acm.org/10.1145/3123939.3123955", "http://people.ece.umn.edu/users/jsartori/papers/micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/43ef02647ad28e9b31572dd47cce39a965336776", "sources": [ "DBLP" ], "title": "Software-based gate-level information flow security for IoT systems", "venue": "MICRO", "year": 2017 }, "4416236e5ee4239e86e3cf3db6a2d1a2ff2ae720": { "authors": [ { "ids": [ "2642390" ], "name": "Shoumik Palkar" }, { "ids": [ "34998803" ], "name": "James J. Thomas" }, { "ids": [ "4012116" ], "name": "Anil Shanbhag" }, { "ids": [ "22252150" ], "name": "Deepak Narayanan" }, { "ids": [ "2436756" ], "name": "Holger Pirk" }, { "ids": [ "1962485" ], "name": "Malte Schwarzkopf" }, { "ids": [ "35192281" ], "name": "Saman Amarasinghe" }, { "ids": [ "1901948" ], "name": "Matei Zaharia" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Spark", "Data-intensive computing", "Intermediate representation", "Libraries", "Library", "Library (computing)", "Machine learning", "NumPy", "Parallel computing", "Program optimization", "SQL", "Structured Query Language", "TensorFlow", "Workload", "pandas" ], "id": "4416236e5ee4239e86e3cf3db6a2d1a2ff2ae720", "inCitations": [ "dfa37e1ad351ae889502fb704cc93f4a77c1c642", "391a6a423e06b0767e9fc9df4f43c5533c0ab662", "01c71603c2e9c42eb3141dd32d0ee564fb5274b4", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "2b914583df10b31c4b5d80a4bcf731a265e6ab7f", "416a3348da1114e26171a50694b66f8c35024571", "61d42fafe09329b00bba177f444567be51444dc2", "c48dc6a8549898ee389432e7072a6c989727aed2", "3b0143befcc8a02cb4745f47260402d2ff47843c", "7c4c34ab0cb5b9a02b548593533ec108f754daa4", "95228c931949b3d6cb709ba36a53f419fa7b8253", "1156373cdb17608780bd2c00fff26bcbeeb2189c", "a66fc8cca220b7ac885a03745836899c037346d8" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "676e50a4d2141ae66a0d2aafcf79c8c989fcce33", "09c5293b647fca40fde28ac6c38737f07e873e41", "2194c3460ab71f3826db00b045b2ae590c753319", "8925a2067468c7ae6b2b3556487a362725cb55be", "669a754df3cffa8f52bbfad60c44f8ae8aa83183", "45a91b4d9bcb2178c05e95685aca2f2aac94cdb0", "73f072ead051f3f3c764b31e88f3a3aeb0373f7b", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "080ed793c12d97436ae29851b5e34c54c07e3816", "165d9bd7e9c4a030b09cf21e35ea0bf96090d8cb", "4954fa180728932959997a4768411ff9136aac81", "0c325c32039656541760b2d8f02be4636e026785", "b513711621e81d0abd042e0877ca751581a993f5", "4cf0f575677146eeb002487e56abc2cf7cafe591", "831cc6d9b7a333b38d34d923b52aed438e90ee1e", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "7b1157db688dd5c8b9df1f42caa76d4790fdf2c8", "74241ccbd1045cf937ad8aeb44ed4e22bcdb9ea1", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "a2f2662b1d3510af048a713b2690ff6d720ebdda", "213a719cdecdd2e3a449c736db0d4449476ab323", "bbb9c3119edd9daa414fd8f2df5072587bfa3462", "5b3ac250f3f50073f6234cafa6fc1a233cc624bb", "62ea7fbdc3349f4fe8f12f098f1ce4a746faa5db", "ef62e60b81317a24dbeb8ded6dc4a8ed89b776a8", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "b2bf34c0c0007145a389e014b7ddaa3daa76f332", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6" ], "paperAbstract": "Modern analytics applications combine multiple functions from different libraries and frameworks to build increasingly complex workflows. Even though each function may achieve high performance in isolation, the performance of the combined workflow is often an order of magnitude below hardware limits due to extensive data movement across the functions. To address this problem, we propose Weld, a runtime for data-intensive applications that optimizes across disjoint libraries and functions. Weld uses a common intermediate representation to capture the structure of diverse dataparallel workloads, including SQL, machine learning and graph analytics. It then performs key data movement optimizations and generates efficient parallel code for the whole workflow. Weld can be integrated incrementally into existing frameworks like TensorFlow, Apache Spark, NumPy and Pandas without changing their user-facing APIs. We show that Weld can speed up these frameworks, as well as applications that combine them, by up to 30\u00d7.", "pdfUrls": [ "https://cs.stanford.edu/~matei/papers/2017/cidr_weld.pdf", "http://groups.csail.mit.edu/commit/papers/2017/cidr_weld.pdf", "https://people.csail.mit.edu/malte/pub/papers/2017-cidr-weld.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4416/236e5ee4239e86e3cf3db6a2d1a2ff2ae720.pdf", "s2Url": "https://semanticscholar.org/paper/4416236e5ee4239e86e3cf3db6a2d1a2ff2ae720", "sources": [], "title": "Weld: A Common Runtime for High Performance Data Analytics", "venue": "", "year": 2016 }, "4442cbe90a479ad5aa172731bea32c875d7162db": { "authors": [ { "ids": [ "1891319" ], "name": "Shin-Ying Lee" }, { "ids": [ "2797270" ], "name": "Carole-Jean Wu" } ], "doi": "10.1109/IISWC.2017.8167755", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167755", "entities": [ "Algorithm", "Branch predictor", "CPU cache", "Cache (computing)", "Central processing unit", "Computation", "Computer data storage", "Fairness measure", "Graphics processing unit", "Hardware acceleration", "Interference (communication)", "Kernel (operating system)", "OpenCL API", "Program optimization", "Real computation", "Run time (program lifecycle phase)", "Scalability", "Scheduling (computing)", "Speedup", "Throughput" ], "id": "4442cbe90a479ad5aa172731bea32c875d7162db", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "43-53", "journalVolume": "", "outCitations": [ "f359d33a1c09d2f626217e21f722508968c7057b", "348ba95c8168b20af8b15f60dd214174b12cea26", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "5e2a41dd1755df34ada65ac046ac9b09483b4475", "1400901fca7695d180a44d1f0f49f6830e0ceeeb", "75be9b3d5047d1c44f0a5d95d7ea9314fa0af469", "1cb7d1b3226d0a2fc77e54dee596a7a2d6d098d5", "bdf72cb8005c17a3c1a03658419aa3bf1d734679", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "2ae2d80ffb19521bcd7fdbf26e9ed2a5d9641bb0", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113", "109df0e8e5969ddf01e073143e83599228a1163f", "173c39f15841ab5ee2976c34f6b4c3808c386d94", "6aebe661168743a0b948229c57be7ffb6217e0e8", "00156e79606084497789662dfaf59c3b54a10722", "4bce8e7c13331dbffa05d6cfc086efd04e0317a9", "68073f621072d793e95b9562bf9a9245415d5a96", "1458ae1786123fee04d7f863e93b7aa60295bf77", "081dec43c2dbe76ff43c810594495f11ab092a10", "7a2804fe421e853ef59abeffa41060ffe700602d" ], "paperAbstract": "Modern computer systems are accelerator-rich, equipped with many types of hardware accelerators to speed up computation. For example, graphics processing units (GPUs) are a type of accelerators that are widely employed to accelerate parallel workloads. In order to well utilize different accelerators to gain better execution time speedup or reduce total energy consumption, many scheduling algorithms have been proposed to select the optimal target device to process an OpenCL kernel according to the kernel's individual characteristics. However, in a real computer system, there are a lot of workloads co-located together on a single machine and would be processed on different devices simultaneously. The CPU cores and accelerators may contend shared resources, such as the host main memory and shared last-level cache. Thus, it is not robust to schedule an OpenCL kernel execution by simply considering the characteristics of the kernel. To maximize the system throughput, it is important to consider the execution behavior of all co-located applications when performing OpenCL kernel execution scheduling. In this paper, we provide a detailed characterization study demonstrating that scheduling an OpenCL kernel to run on different devices can introduce varying performance impact to itself and the other co-located applications due to memory interference. Based on the characterization results, we then develop a light-weight, scalable performance degradation predictor specifically for heterogeneous computer systems, called HeteroPDP. HeteroPDP aims to dynamically predict and balance the execution time slowdown of all co-located applications in a heterogeneous computation environment. Our real system evaluation results show that comparing with always running an OpenCL kernel on the host CPU, HeteroPDP is able to achieve 3X execution time speedup when an OpenCL kernel runs alone and improve the system fairness from 24% to 65% when an OpenCL kernel is co-located with other applications.", "pdfUrls": [ "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Lee_iiswc2017_final.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167755", "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Lee_IISWC_2017_talk.pdf", "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Lee_iiswc2017_final-1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4442cbe90a479ad5aa172731bea32c875d7162db", "sources": [ "DBLP" ], "title": "Performance characterization, prediction, and optimization for heterogeneous systems with multi-level memory interference", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "4446c7b23be25015416e2fdb9d423e34acfd307c": { "authors": [ { "ids": [ "3056465" ], "name": "Chin-Chia Michael Yeh" }, { "ids": [ "35153405" ], "name": "Nickolas Kavantzas" }, { "ids": [ "1732516" ], "name": "Eamonn J. Keogh" } ], "doi": "10.1109/ICDM.2017.66", "doiUrl": "https://doi.org/10.1109/ICDM.2017.66", "entities": [ "Algorithm", "Association rule learning", "Cluster analysis", "Data mining", "Exploratory testing", "Motif", "Sequence motif", "Statistical classification", "Stream (computing)", "Time series" ], "id": "4446c7b23be25015416e2fdb9d423e34acfd307c", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "565-574", "journalVolume": "", "outCitations": [ "7b476ac3cd7d0bb54d8e7ba44ff13525c93b4a06", "30c7703f4d1105c1a57a982ae18bb6a33df16add", "5598dcce57887498b3a74f00648b1ae0f64775c0", "8e0ff4b8bbeac8f301e00494a39bd1b4a199fba1", "3a7468e1c9c0afea88c16ded5e4be4535cff20a2", "aef0d43af55799cefcac0b0d2485aa607c46c475", "887a246dae18b8e9a65826bea88983e4f7bf5270", "f8626419c32d8ac9eee0629ef194b4d5ffbc6f11", "ce52c50c93ae3be998a5c36f646342cdcb2de1a9", "add39272e8762cea5a24c95ad238af5d61c3bd54", "2a51b4fbea138a3a63836715624f2a71d25e25df", "2409557812a3d26258949ba73a05031591f42bdc", "62acca98a5bd6ffb3071e9c2a2f559950e4301fc", "0a0dfb112946b30862fedf8cc2798dc976d0f20c", "d382b9c11e5c6a8e173fbeb442545e3be8d3e3a5", "61523cfe6f51859e00aa8ce320114c03151208fa", "287cd6ea9468e6d207904b5c23588cd6579d9e2e", "78813791a933e48a77962ff4aa12168e3b997f26", "0d5017f05a9b4db635a013dab8dec06855a96dee", "0bacca0993a3f51649a6bb8dbb093fc8d8481ad4", "a3d1aaa147efd46919d381965815c42a222869a9" ], "paperAbstract": "Time series motifs are approximately repeating patterns in real-valued time series data. They are useful for exploratory data mining and are often used as inputs for various time series clustering, classification, segmentation, rule discovery, and visualization algorithms. Since the introduction of the first motif discovery algorithm for univariate time series in 2002, multiple efforts have been made to generalize motifs to the multidimensional case. In this work, we show that these efforts, which typically attempt to find motifs on all dimensions, will not produce meaningful motifs except in the most contrived situations. We explain this finding and introduce mSTAMP, an algorithm that allows meaningful discovery of multidimensional motifs. Beyond producing objectively and subjectively meaningful results, our algorithm has a host of additional advantages, including being much faster, requiring fewer parameters and supporting streaming data. We demonstrate the utility of our mSTAMP-based motif discovery framework on domains as diverse as audio processing, industry, and sports analytics.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.66", "http://www.cs.ucr.edu/~eamonn/Motif_Discovery_ICDM.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4446c7b23be25015416e2fdb9d423e34acfd307c", "sources": [ "DBLP" ], "title": "Matrix Profile VI: Meaningful Multidimensional Motif Discovery", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "447f492235719d7c2b061b95d818f928d6cbdac5": { "authors": [ { "ids": [ "33082895" ], "name": "Minesh Patel" }, { "ids": [ "17766732" ], "name": "Jeremie S. Kim" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3079856.3080242", "doiUrl": "https://doi.org/10.1145/3079856.3080242", "entities": [ "Best, worst and average case", "Brute-force attack", "Dynamic random-access memory", "End-to-end principle", "Failure rate", "Forward error correction", "Interval arithmetic", "Memory refresh", "Profiling (information science)", "REAPER", "Refresh rate", "WAITS" ], "id": "447f492235719d7c2b061b95d818f928d6cbdac5", "inCitations": [ "0f41b9c0900b1c17b63d3d59bd4c334f7cf736af", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "60aa9510638d4d9739ebfc3a0042187988482346", "15388b06b42d9a61a1d083bc3bf140ef40f066fa", "983e87929eeb3f77c2ddb02d17d6efe978c80667", "b06b556169d8b55d6d8058164dd599c67c50c430", "0b393cab00401cb971cf71970e00c2767f881f75", "2976932bec7334a150e1bb6916b7564bdaa864ea", "5c478e5c774eb3cf71e446e2c9eb2166ca032b28", "00cc482570d739e7b733f45b6f8f1836b24056bd", "1ebdf99bf03787a10d1c37bc9f93e89116e29bd6", "042855085a52934e5599e02555071bb222f6a000" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "255-268", "journalVolume": "", "outCitations": [ "870403ceaadbe9579b1841baa39c1ac2d03fef3e", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "3c89345bb88a440096f7a057c28857cc4baf3695", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "68073f621072d793e95b9562bf9a9245415d5a96", "703c74b035ba667afeaa0d4287641bc87d2ea12f", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "65b97ce84be7cc5bb2fd75574592d6205eaf8137", "05c56f4abc527fbf384ad011dc9c0a613955641a", "4b5f67cba9a1f98a5390ed9cadcf018671c02c08", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "108c840d5d1847948a2de0250490a327ae069ee6", "dc060372253f1bacdea2c785e6525f781fe8c039", "468035263afa59095614f26a62e0217da4a1aeed", "472392b93150be7bb0132511d71d686770c2c79b", "4b812319f4fe762ab18a0b13bd126d4a740ff26c", "a5bd15d203c6aa740aba16776b422db010e66b58", "9341125876271d46cc25f86dac93f25acb343e8d", "a56683f144d7498e1fc5b34a9314c138221d71c5", "5e60858a530a5da13bce10ce8b8740557b95e533", "303217473b686415c4af060caab1cb04ff174cd7", "0ffbd9cd0fe4fa005fc9b6eea24ecf9bff67c806", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "4118a6b6a4741cb85a254d705084862afdd842a9", "48248d695ea792ee7a3f7ddc18bb145b61a877f1", "071564baef078867847fc54a3a0b50dd22d29d62", "ddc3e4501691c41bda5d927628f5f4abb2cfeb7f", "2fa80c8342dcb349f1d91c102a76400c86dfb042", "d38fdbc9f51084461bf5ff22ad4d6e81a90ad6f0", "012d556d67acedc6898930b4c93f54b87aabf5ee", "da3b79d36c5a0581e5e45e545a825b4336d63695", "35235e03fd84d273235abbe71357b9b9dea77e3d", "60aa9510638d4d9739ebfc3a0042187988482346", "7bc046671369ba23568ff03bbee6ba04a91bd092", "8963230958477fd760ac060d5d2eb66d310f78c6", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "3f82aa1373e823ec622b3021fff9df4a82230267", "13089a313be0836f3fa8911236250e36b970ba2a", "1a8c7439080c2e5d42bf173c4db084713e5f05b7", "44077076ba79033a3a73713f2041ecf224a3c359", "1db11a76fa33ca81970aa345fe4bc150ae846ce0", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "bae2a2142b97d3fa9f3be8b9716201fff46b6663", "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "d89dbe46e5b7ade9e613d33ee068b68cbf63f614", "1c32ad0a42109fab826eb3054df7cfc33b424125", "a662a40d7e8202f8ca5f55916f7e3c1f5b4379e9", "03d55467b20e662fbaa8416e853f57c93834a9fb", "00f04d81b2d8733b041152cc70ac36e6683f96e8", "36897d1d2661777913d492390c4ad9d004276308", "1ac13e114099c51f86b7bc31b63cf87f4472488b", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "6ea53f34af4a42ebb2ac49dd05c5f53d649f0f83", "0eacd1b47786f740b723d906d46e160f143c0378", "07edb51d5c37fbd44d30e7619be8c352cbe53472", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "6aba2b1785bd26eb6d85820a734ddaa262d20571", "61ea230d0e757ff46d3a381e79691bd54b92a503", "cbc3b549904f3cab1fbe2dd4ccb67809525fe6eb", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "85398d5f19157c91bf00da3d36210e72d57887e4", "06d5e64635ff941d08cf833706554c493deb7acb", "831b348bbabaf2fbab1700de982440de11bedf72", "ab6888a1b024d109c768f81b49c77b585efc975a", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "170fc81c89a7fa5541d078b8400529fdea94af18", "1be96030c042ff6b5bbe05bf0fd86f5f9a4d27dc", "588fd53a6cbdb2f2d7f2bd676944d7b5fdfafcb9", "13db8d6e4e983ed397c7f76cfed04b5ed22e730c", "35a2cb3f17bdd5d7ca5e5283b164fad21d1737ff", "5a04b332441e2ff025313bfd303383e13050a274", "2e56eddf32abc9c94bffcfe680827f05a0490a5d", "6902867509928c0e5c19aff3e62e1def3a19d581", "1da48d8173e34eb7825870248c4c12b6bbe7d9c1", "2394c6644efa856f0da160a0f0031d74cd3b5000", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "00737c5ef0c750d311e1e464f13630ca5b554954", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "e1c29d3df660295a192aca11f827e955bcf8a39b", "84564d347d505467dd628e56319bc037b0a1ec28", "356955d0f190829b7481b8dc39c5f90dfac1b652" ], "paperAbstract": "Modern DRAM-based systems suffer from significant energy and latency penalties due to conservative DRAM refresh standards. Volatile DRAM cells can retain information across a wide distribution of times ranging from milliseconds to many minutes, but each cell is currently refreshed every 64ms to account for the extreme tail end of the retention time distribution, leading to a high refresh overhead. Due to poor DRAM technology scaling, this problem is expected to get worse in future device generations. Hence, the current approach of refreshing all cells with the worst-case refresh rate must be replaced with a more intelligent design.\n Many prior works propose reducing the refresh overhead by extending the default refresh interval to a higher value, which we refer to as the target refresh interval, across parts or all of a DRAM chip. These proposals handle the small set of failing cells that cannot retain data throughout the entire extended refresh interval via retention failure mitigation mechanisms (e.g., error correcting codes or bit-repair mechanisms). This set of failing cells is discovered via retention failure profiling, which is currently a brute-force process that writes a set of known data to DRAM, disables refresh and waits for the duration of the target refresh interval, and then checks for retention failures across the DRAM chip. We show that this brute-force approach is too slow and is detrimental to system execution, especially with frequent online profiling.\n This paper presents reach profiling, a new methodology for retention failure profiling based on the key observation that an overwhelming majority of failing DRAM cells at a target refresh interval fail more reliably at both longer refresh intervals and higher temperatures. Using 368 state-of-the-art LPDDR4 DRAM chips from three major vendors, we conduct a thorough experimental characterization of the complex set of tradeoffs inherent in the profiling process. We identify three key metrics to guide design choices for retention failure profiling and mitigation mechanisms: coverage, false positive rate, and runtime. We propose reach profiling, a new retention failure profiling mechanism whose key idea is to profile failing cells at a longer refresh interval and/or higher temperature relative to the target conditions in order to maximize failure coverage while minimizing the false positive rate and profiling runtime. We thoroughly explore the tradeoffs associated with reach profiling and show that there is significant room for improvement in DRAM retention failure profiling beyond the brute-force approach. We show with experimental data that on average, by profiling at 250ms above the target refresh interval, our first implementation of reach profiling (called REAPER) can attain greater than 99% coverage of failing DRAM cells with less than a 50% false positive rate while running 2.5x faster than the brute-force approach. In addition, our end-to-end evaluations show that REAPER enables significant system performance improvement and DRAM power reduction, outperforming the brute-force approach and enabling high-performance operation at longer refresh intervals that were previously unreasonable to employ due to the high associated profiling overhead.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080242", "https://people.inf.ethz.ch/omutlu/pub/reaper-dram-retention-profiling-lpddr4_isca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/447f492235719d7c2b061b95d818f928d6cbdac5", "sources": [ "DBLP" ], "title": "The reach profiler (REAPER): Enabling the mitigation of DRAM retention failures via profiling at aggressive conditions", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "44a86aa5b47e158619d2cb815b6dc99201e8f099": { "authors": [ { "ids": [ "6152731" ], "name": "Wenhao Wang" }, { "ids": [ "3044625" ], "name": "Xiaoyang Xu" }, { "ids": [ "3071249" ], "name": "Kevin W. Hamlen" } ], "doi": "10.1145/3133956.3133986", "doiUrl": "https://doi.org/10.1145/3133956.3133986", "entities": [ "Component-based software engineering", "Confused deputy problem", "Control flow", "Control-flow integrity", "Fault detection and isolation", "Immutable object", "Sandbox (computer security)", "Scalability", "Virtual method table" ], "id": "44a86aa5b47e158619d2cb815b6dc99201e8f099", "inCitations": [], "journalName": "", "journalPages": "1909-1924", "journalVolume": "", "outCitations": [ "23e8236644775fd5d8ff5536ba06b960e19f904b", "b0ecd2efb16b91f8ff3856d719aca24626406695", "522ef437c14885e224d219214684cfe58987e9f9", "6a8f65381a627a2db6c756a7185d9106f0acefec", "015012ed5d793c7aa00d6b19934869ae22b3b132", "483266ca3a33ac23b4b1459873de08f2284d5f7a", "5f8bbc28027342b16df77fa1b9a1efe4628d41dc", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "52612064aa065b29930b56fbf54745883bba94dc", "316eb469e6b4ca848d1b68f4a52650f880d495c6", "542db06acbafe6c8e0837dcbb0dbbeb7c19f976d", "348b0049b0c7b3f7e74b77cca30213cb7e550360", "35a0dcb48754b4a8f382ffb86a0b5794b2574fce", "a1f1d25e88f9f4a0f12684a7df6fe86c1d9b5b7f", "2abd63717d05a73c9269349fc24403faf094c594", "4f4590962bde0c2050122f91e5978271bb24d556", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "5493f512ba418c21f1ce20e20985157f7509007c", "675248fe5de874ab3fcef348611c4a01595510c1", "b4b92eb555dd9c672f894216c5d50bf6164df78b", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "2a8fb1894c268fac270925a2f2ddc347e001c9f5", "686150e2179840ed40a0166cba6c5d507f3aa49c", "1dc5aebebf425c1c225daef502e2d966771968fe", "100ebdc07a14c85b5986d3adffa34b047b5be7a4", "7c8c9bdb30ae9b40365c355504bdb457a51e108c", "082b2a75e94cf1142a5c3a301418e6e05568d8b0", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "a4a7aaa197c29dcfa1556182aea425144137d4e3", "713baa5c3ff9b3f2979889dc90430d73a680942e", "323f248664ce1505da85e24eeffbb4b8a0f06d7a", "2caf47ac0035c27019965e04b0ba2711f20d59a9", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "67b086caacc543b7d30b2f006f77a315bc9572e0", "4b41bb221ccae289bd66dfc1210f36cc172350c5", "642bdae15a4a3f2e580e49f9726e2eee675d5ebf", "0988a425689f6f3700e797f4a2c18f73692573c3", "698e9afaead2fedf5c2008bc0bd29184d438c71d", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "30e76f32c323adb0ff340760380fe5a08505b641", "01b5b648af61ddb382da638a299fae2315b25192", "2ac1846337c4a8c09bc37db69ebbf07ddf83c889", "14a5ce46f45853af912cd0618983f8174baa7fab", "3d285335c5bb0fb1f8659e977cea6ba0efe00a17", "0623c7f04ea2697fff3ff487736f94536d5301d8", "01a2d5c69a09ec3fa82de6dfe12811f3d981ab7e", "9cf8d8ebe9a59d8605fa1ecf5872fb886fdc4b38", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "22050b3ee9c69c64dc796358c7f0ba247d4adce3", "129359a872783b7c3a82c2c9dbef75df2956d2d3", "4c1206d65920c8434d987e705bf21e9651fd21bb", "e45f652aac78e7eaa0ae950304d5f4647066d55f", "255bdcb05805c97d973081b59bc61c649263ceae", "10a59e595461de43e3183c99a380e6a35ae264bd", "0e039df712774fcea67f214d9b5780c1dc250747", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "9b2585f7248c8b5a22e9c816506e01060213ca85", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "4cd63e0701177f04e377fa9f0857c5b0fa10b07e", "704e2027ecdaa9561b75a854b585336c16cea89f", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "02feae068442486dd657292b156b4bbbb348b4cd", "03f827395a17beb941241dbd72322705bdf79791" ], "paperAbstract": "Object flow integrity (OFI) augments control-flow integrity (CFI) and software fault isolation (SFI) protections with secure, first-class support for binary object exchange across inter-module trust boundaries. This extends both source-aware and source-free CFI and SFI technologies to a large class of previously unsupported software: those containing immutable system modules with large, object-oriented APIs---which are particularly common in component-based, event-driven consumer software. It also helps to protect these inter-module object exchanges against confused deputy-assisted vtable corruption and counterfeit object-oriented programming attacks.\n A prototype implementation for Microsoft Component Object Model demonstrates that OFI is scalable to large interfaces on the order of tens of thousands of methods, and exhibits low overheads of under 1% for some common-case applications. Significant elements of the implementation are synthesized automatically through a principled design inspired by type-based contracts.", "pdfUrls": [ "http://utdallas.edu/~hamlen/wang17ccs.pdf", "http://doi.acm.org/10.1145/3133956.3133986" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/44a86aa5b47e158619d2cb815b6dc99201e8f099", "sources": [ "DBLP" ], "title": "Object Flow Integrity", "venue": "CCS", "year": 2017 }, "44a97f4eaaefaf5338f8aed2913d5debb2459f7e": { "authors": [ { "ids": [ "1997436" ], "name": "Briland Hitaj" }, { "ids": [ "1700850" ], "name": "Giuseppe Ateniese" }, { "ids": [ "1693990" ], "name": "Fernando P\u00e9rez-Cruz" } ], "doi": "10.1145/3133956.3134012", "doiUrl": "https://doi.org/10.1145/3133956.3134012", "entities": [ "Adversary (cryptography)", "Algorithm", "COMEFROM", "Centralisation", "Computer", "Database", "Deep learning", "Differential privacy", "End-to-end principle", "Federation (information technology)", "Information extraction", "Information leakage", "Information privacy", "Information sensitivity", "Machine learning", "Privacy", "Server (computing)", "Test set" ], "id": "44a97f4eaaefaf5338f8aed2913d5debb2459f7e", "inCitations": [ "87cc61f175b7b58242849812a31ff6a96dfe5827", "088aabe3da627432fdccf5077969e3f6402f0a80", "13afff7af3a56163fdaa1a2449e5e06ae21137ad", "a9286519e12675302b1d7d2fe0ca3cc4dc7d17f6", "6acd95817e6ccbb9376194d84a846964033f1ed0", "bb7bf049f9bca00e9569a404ce37301a5db9e21d", "7c4f52328c2869bdff8034d2867baa5b67d0ce27", "cf7e5a59cbe6fa10840a2f5e1c21adadc843d401", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "2a6a4eb81c3dd4ceaf006519beeeac592a623b8b", "530a4ab0308bc98995ffd64207135ca0ae36db7f", "e4c2f8e4aace8cb851cb74478a63d9111ca550ae", "c60146e2c3568dd0eb7e4f9a28be8cacfce140e0", "be51854ef513362bc236b85dd6f0e2c2da51614b", "6ffc4e334b16bab1c08e354196c8b93316a26a0e", "08e2db0c2f79b7a807747f19707ab3e96d3541a0", "16860d3f7b5a776a229f589ddcd6a7f6810098b1", "6888f3402039a36028d0a7e2c3df6db94f5cb9bb" ], "journalName": "", "journalPages": "603-618", "journalVolume": "", "outCitations": [ "595a00f0975b5d5c28d904ddba1ae5a493316573", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "185c811f94c6526c50dcf3da0aff78fe032a27f7", "1592fe924114866c1ac559bae33ea789930daa98", "326bb49d3ae9e1e1551028200916192e50004105", "14ce7635ff18318e7094417d0f92acbec6669f1c", "f7dd8d2ef9ab3bac9a4241055ebb02af947fe632", "158d62f4e3363495148cf16c7b800daab7765760", "0db4d7a384c7feb5832ff3563c24cb0f6140e0ef", "3c57e28a4eb463d532ea2b0b1ba4b426ead8d9a0", "6f2632d3569223056c040899b5891980288539d8", "0144941d255dad89d3d90c2d131a15cc01df9829", "4bc5486b60b09d06adbbcdc4875b37c90f385257", "7196fde179eb31e5ee212adb8e509e852edc0046", "6154ce8c02375184f7928e41c4fae532500f7175", "05b073c44188946aeb9c410c1447262cbdf77b6d", "1ecfe23503600b7a6a6ed3dcce86542420e36a06", "0d67362a5630ec3b7562327acc278c1c996454b5", "6b74ec27d76ae42c2faa9211e2640141595838b6", "29831b8830e278c8c28e45c8e9c41c619c89f86a", "83bfdd6a2b28106b9fb66e52832c45f08b828541", "459d3adf0528bed82420374ffb9ff50c2cb34f03", "4b8f1518b21b73d30cedf31560a83a8322f8147d", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "891e61c55b49dc55e95c4ed1803cd0801df02d00", "402da07a0ac4645e26370ff5ac8ab3540257a8ab", "1336146e7f95b295bb73c7659c6af4befd86cbdd", "52eec5b914f72c4cd3f03eaedf1d38bb9a4df6de", "5e279a183435995cbafb09d87365c0e5c9103235", "2198e3c498334a23de88d74a9bcaa9022904a4be", "6bdcedb895256357a6bc8ffef5a0790697403372", "49934d08d42ed9e279a82cbad2086377443c8a75", "0122e063ca5f0f9fb9d144d44d41421503252010", "02bc27c39eaaa6b85d336be81b15ca19f112a950", "85e7a06b802a35f1f95112db58ca91d8a5eaaac6", "763afb9dc8650101be06053e2eb612d9e3a1ce18", "35756f711a97166df11202ebe46820a36704ae77", "20b24b61d0ba7265a659b56edb5bdc6f05e60ebe", "126df9f24e29feee6e49e135da102fbbd9154a48", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "32d2a9389e5fab7e3065acc7d0c2e57278fa0fac", "185192f5d410e838ee6996d9e41a1756e224fb7e", "fc8cda36a0972e7de1ac3a7bcb81dc32da79bee4", "98db1043277736681843f97f2171f7a93627eada", "1047c50bcd412d4cf2f735a33d427b0313be9d5e", "0ef7d9e618cbb507d69f8ebcdc60b8a1f3135bff", "46f74231b9afeb0c290d6d550043c55045284e5f", "46aca9fd693cda49f7f02d575efaee0977f078c7", "721d4ae075de7bf9ea2cac01fe02e2920ee5c789", "11ccb00bd3ff98e3f46a51cca059241c70954d4f", "105713f44bf88978aae6c87fbc708428b47c29c4", "14ec4c626dbaa6390ef1d3c78d2689e3e8f098ca", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "fbc0f9ddb774fd585726ba3fdca3317726232a06", "43b4aee8c254412fee7653a6d6a477e0eb8e9928", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "9b618fa0cd834f7c4122c8e53539085e06922f8c", "8215eed5098c6f0615351afe0d60710d30e59a3b", "b532099ff8b67049f292cd62700dca37fc2be623", "0fcaa5d69913b2601fb4fac3a16ba384e5f1883b", "40a63746a710baf4a694fd5a4dd8b5a3d9fc2846", "000f2d99632d5d6c494bf9e1b179638e48433e99", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "f63487b3fda2d96d8b3e97391448c76e00f2353c", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "012b8a941e96594783fb10d3a785e91f13384413", "232bb5913f12cdcf8419a3e44d06a5d6fffe2c9b", "65701a018397691d63142704716cdf358a1b5a54", "45f6957cab31e802934cc761380c1a4a37c66208" ], "paperAbstract": "Deep Learning has recently become hugely popular in machine learning for its ability to solve end-to-end learning systems, in which the features and the classifiers are learned simultaneously, providing significant improvements in classification accuracy in the presence of highly-structured and large databases.\n Its success is due to a combination of recent algorithmic breakthroughs, increasingly powerful computers, and access to significant amounts of data.\n Researchers have also considered privacy implications of deep learning. Models are typically trained in a centralized manner with all the data being processed by the same training algorithm. If the data is a collection of users' private data, including habits, personal pictures, geographical positions, interests, and more, the centralized server will have access to sensitive information that could potentially be mishandled. To tackle this problem, collaborative deep learning models have recently been proposed where parties locally train their deep learning structures and only share a subset of the parameters in the attempt to keep their respective training sets private. Parameters can also be obfuscated via differential privacy (DP) to make information extraction even more challenging, as proposed by Shokri and Shmatikov at CCS'15.\n Unfortunately, we show that any privacy-preserving collaborative deep learning is susceptible to a powerful attack that we devise in this paper. In particular, we show that a distributed, federated, or decentralized deep learning approach is fundamentally broken and does not protect the training sets of honest participants. The attack we developed exploits the real-time nature of the learning process that allows the adversary to train a Generative Adversarial Network (GAN) that generates prototypical samples of the targeted training set that was meant to be private (the samples generated by the GAN are intended to come from the same distribution as the training data). Interestingly, we show that record-level differential privacy applied to the shared parameters of the model, as suggested in previous work, is ineffective (i.e., record-level DP is not designed to address our attack).", "pdfUrls": [ "http://arxiv.org/abs/1702.07464", "https://arxiv.org/pdf/1702.07464v3.pdf", "https://arxiv.org/pdf/1702.07464v1.pdf", "http://doi.acm.org/10.1145/3133956.3134012", "https://arxiv.org/pdf/1702.07464v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/44a97f4eaaefaf5338f8aed2913d5debb2459f7e", "sources": [ "DBLP" ], "title": "Deep Models Under the GAN: Information Leakage from Collaborative Deep Learning", "venue": "CCS", "year": 2017 }, "44b92a386c4c30fa9de99bc30abadd9d04007b0e": { "authors": [ { "ids": [ "1873477" ], "name": "Sergei Arnautov" }, { "ids": [ "1743906" ], "name": "Pascal Felber" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" }, { "ids": [ "7612177" ], "name": "Bohdan Trach" } ], "doi": "10.1109/IPDPS.2017.41", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.41", "entities": [ "Algorithm", "Application framework", "Context-free grammar", "FIFO (computing and electronics)", "High-throughput computing", "Multi-core processor", "Non-blocking algorithm", "Operating system", "Scalability", "Thread (computing)", "Throughput" ], "id": "44b92a386c4c30fa9de99bc30abadd9d04007b0e", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "907-916", "journalVolume": "", "outCitations": [ "fae286bb9f154cb9009ef24abfaa7529e079b466", "363b85f61630ebdc1194a59816ad950bf305c40a", "026846d1c3a79a6ad6067007b0eac8922502550c", "045a975c1753724b3a0780673ee92b37b9827be6", "13d660826130d12d696b86e79052191c6d3b1a18", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "68df322f2263ba1e32050beea657b108c49de8ae", "1c5c8e567439e46feff03981b47fc5ba7ceb44d8", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "523b2e438d43364b6e70bb3a97e395aff5488113", "52ac2f1620687a9070c6c3354c30343c3de80671", "51a745c63f2e551488924a20650c5179d0332dfd", "0a289fd7b14345822b1acda6d82750b15d59663e", "bf104ebfbd44924b6b7602e48b0a74e987baaca8", "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "2d0fa88a9644cca92730869bf8ba8ce1b86f7dbd", "33da45838d0b6c082cc71e603fd802bac4d56713", "1c74f84dcfaaa317a82708ad30f395a893dbb9c6" ], "paperAbstract": "With the spreading of multi-core architectures, operating systems and applications are becoming increasingly more concurrent and their scalability is often limited by the primitives used to synchronize the different hardware threads. In this paper, we address the problem of how to optimize the throughput of a system with multiple producer and consumer threads. Such applications typically synchronize their threads via multi-producer/multi-consumer FIFO queues, but existing solutions have poor scalability, as we could observe when designing a secure application framework that requires high-throughput communication between many concurrent threads. In our target system, however, the items enqueued by different producers do not necessarily need to be FIFO ordered. Hence, we propose a fast FIFO queue, FFQ, that aims at maximizing throughput by specializing the algorithm for single-producer/multiple-consumer settings: each producer has its own queue from which multiple consumers can concurrently dequeue. Furthermore, while we provide a wait-free interface for producers, we limit ourselves to lock-free consumers to eliminate the need for helping. We also propose a multi-producer variant to show which synchronization operations we were able to remove by focusing on a single producer variant. Our evaluation analyses the performance using micro-benchmarks and compares our results with other state-of-the-art solutions: FFQ exhibits excellent performance and scalability.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.41" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/44b92a386c4c30fa9de99bc30abadd9d04007b0e", "sources": [ "DBLP" ], "title": "FFQ: A Fast Single-Producer/Multiple-Consumer Concurrent FIFO Queue", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "44eafda243dad122f4cc4e378e71bce2402685cd": { "authors": [ { "ids": [ "3440740" ], "name": "Fuli Feng" }, { "ids": [ "1743245" ], "name": "Liqiang Nie" }, { "ids": [ "10966012" ], "name": "Xiang Wang" }, { "ids": [ "2248826" ], "name": "Richang Hong" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080773", "doiUrl": "https://doi.org/10.1145/3077136.3080773", "entities": [ "Computation", "Fusebox", "Unified Model" ], "id": "44eafda243dad122f4cc4e378e71bce2402685cd", "inCitations": [ "b9108dd2bb537916bb284fc88007c3dfec261427", "43c0ff1070def3d98f548b7cbf523fdd4a83827a", "12b161b4256292a338fac99317465c295092d710", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "3db6757e5c65e80dca77bc2a6cd9e742c229df9f", "446f7fc5e46def1ae860b341257f09d6cb0e5967", "22873d98ff3f7b3b5490f3982c3fe0c0c5d665c7" ], "journalName": "", "journalPages": "455-464", "journalVolume": "", "outCitations": [ "07fd261cc56a8f25cdda21152a11473d6e7555dc", "fc57291062df473c678bc89eba56056259bd2546", "0cd6c70ac57b796c12bd59229ea901a77ce8f066", "3c78b47bee5408aaa372963525b0390890a0c8bf", "55e63d01b237281927d46856ff67776f06df25bf", "004d5491f673cd76150f43b0a0429214f5bfd823", "406d8cd2c6528d55e231ab07063067ffbd6fd114", "cc83e27cec14e2fccde7d685d36321885df8e8b0", "ec08a32d68bd540a485b0b72018b47bfb444105c", "27208c88f07a1ffe97760c12be08fad3ab68fee2", "57c622d227307b8cf192f73e76211fe1983c5fa7", "fc8d2643020c6208cf99778744e07fe01626316c", "24e5ace6d7ffd78df13ea9da923f344e81684dad", "06af31d3dbba868f8305b87385bfe0c6d4682426", "04ed2f856f33e50072103e0f0821378f45fc140e", "0d1ec1eab64bcf1ec0a0b037e95a4f41f0d55ffe", "c1885303abec299c028b86208eb02f71540ac436", "5f4c05ba08fac9cde40235ebd4eb9abc6ed2d712", "4f3417e73528025a5429547814e5a2fd91deb818", "013eb535ed8ca00cd47ba50b475429c2d7316ceb", "25f3fcad9a7fbde43fc1c4ec5cf64aaba1766366", "1e510551f87556ac6012ad5c7f01ec4ac0f34675", "1172ed012bf4d1fd9f99604a065d7fa19dc69919", "f9a4c7cb5a26fd3655e1aedcebd1724dda8aeefa", "75d663b9d6a492b4c5a466f9ba2dfa06fc450d6e", "5b2b1b6bf4fca41e87fca6d41e95e5efa1218aa4", "5c09b57411d5c0ddf1c1ac916af92a2d62e05bb5", "2947e8015a1362823ae12a79dfd05022d0d412d4", "15cfa64511f169972feba1031ece0eee0925e03a", "332572059aabc2bdf91c93ed9d0835750e36dd74", "2405890883d9bb9842924b2620f4f7af43f72531", "66a6dde6a6a20f77ce52cb2464a52777837bd81e", "12b161b4256292a338fac99317465c295092d710" ], "paperAbstract": "Many professional organizations produce regular reports of social indicators to monitor social progress. Despite their reasonable results and societal value, early efforts on social indicator computing suffer from three problems: 1) labor-intensive data gathering, 2) insufficient data, and 3) expert-relied data fusion. Towards this end, we present a novel graph-based multi-channel ranking scheme for social indicator computation by exploring the rich multi-channel Web data. For each channel, this scheme presents the semi-structured and unstructured data with simple graphs and hypergraphs, respectively. It then groups the channels into different clusters according to their correlations. After that, it uses a unified model to learn the cluster-wise common spaces, perform ranking separately upon each space, and fuse these rankings to produce the final one. We take Chinese university ranking as a case study and validate our scheme over a real-world dataset. It is worth emphasizing that our scheme is applicable to computation of other social indicators, such as Educational attainment.", "pdfUrls": [ "http://lms.comp.nus.edu.sg/sites/default/files/news-attachments/15-FengFuli_Poster.pdf", "http://doi.acm.org/10.1145/3077136.3080773" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/44eafda243dad122f4cc4e378e71bce2402685cd", "sources": [ "DBLP" ], "title": "Computational Social Indicators: A Case Study of Chinese University Ranking", "venue": "SIGIR", "year": 2017 }, "44eb1eb1013a35940fc1ab424dd1aad411d205f0": { "authors": [ { "ids": [ "2811961" ], "name": "Baijun Wu" }, { "ids": [ "37473728" ], "name": "Sheng Chen" } ], "doi": "10.1145/3133929", "doiUrl": "https://doi.org/10.1145/3133929", "entities": [ "Compiler", "Debugger", "Debugging", "Functional programming", "Type safety", "Type system" ], "id": "44eb1eb1013a35940fc1ab424dd1aad411d205f0", "inCitations": [ "88f408ffdf6eb960662a1a0dd409a13476f6790f" ], "journalName": "PACMPL", "journalPages": "105:1-105:27", "journalVolume": "1", "outCitations": [ "c85086843e6c90a6f7382334a965be546b0748fe", "218955c8a99c6ff568501d619ac8c44346c619dc", "88f408ffdf6eb960662a1a0dd409a13476f6790f", "e62009e4e87c38aa62907827babd10180fb45121", "1b424497a13d40055ec6e0d6c1b2fdcc88a7320d", "814c164c88ba7dd22e7e501cdd1a951586a3117b", "0e5d9ca8c876c6881109445d6d96010c388752db", "ffe182236cc7592edd445c9a270c151b822457d4", "592d1fd6f5f269c0fa4aef6aada79d0c98deee6d", "7115b9ebbe497f3ea90a07b1542de1f75887cc30", "e057ee990a303301897bd831d6a6add3b4c3e965", "2dd3fde015c45b0b4cf5b4fc3bc2b10be9d60a20", "5ae82d62ceba02154696ed1f5a6bc84596e8c9e6", "7387d78320087c2f7753a1094ee5718ada9f0141", "01bcd4d16ab8c1afe1b77f0a99431bb1a68724e3", "05783fc38071c5bc97a29cfac49595a71b79b9b3", "fc6fe79a608df0dd42d4973f6c1efd68cfdf1bc7", "0f78ad571b6c60ef5b294976648d278f5cfaca9a", "6ade35cd41b1597fc9622cebe7f60565ce67d0b0", "66d16114080346cfa1be77e3306d511f97c63d27" ], "paperAbstract": "Providing better supports for debugging type errors has been an active research area in the last three decades. Numerous approaches from different perspectives have been developed. Most approaches work well under certain conditions only, for example, when type errors are caused by single leaves and when type annotations are correct. However, the research community is still unaware of which conditions hold in practice and what the real debugging situations look like. We address this problem with a study of 3 program data sets, which were written in different years, using different compilers, and were of diverse sizes. They include more than 55,000 programs, among which more than 2,700 are ill typed. We investigated all the ill-typed programs, and our results indicate that current error debugging support is far from sufficient in practice since only about 35% of all type errors were caused by single leaves. In addition, type annotations cannot always be trusted in error debuggers since about 30% of the time type errors were caused by wrong type annotations. Our study also provides many insights about the debugging behaviors of students in functional programming, which could be exploited for developing more effective error debuggers.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133929" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/44eb1eb1013a35940fc1ab424dd1aad411d205f0", "sources": [ "DBLP" ], "title": "How type errors were fixed and what students did?", "venue": "PACMPL", "year": 2017 }, "450f66cd38a37201759384b33493798d2a82b9f6": { "authors": [ { "ids": [ "39559311" ], "name": "Panruo Wu" }, { "ids": [ "1775855" ], "name": "Nathan DeBardeleben" }, { "ids": [ "1764948" ], "name": "Qiang Guan" }, { "ids": [ "2970218" ], "name": "Sean Blanchard" }, { "ids": [ "2506582" ], "name": "Jieyang Chen" }, { "ids": [ "3058378" ], "name": "Dingwen Tao" }, { "ids": [ "40026298" ], "name": "Xin Liang" }, { "ids": [ "9547335" ], "name": "Kaiming Ouyang" }, { "ids": [ "1756221" ], "name": "Zizhong Chen" } ], "doi": "10.1145/3018743.3018750", "doiUrl": "https://doi.org/10.1145/3018743.3018750", "entities": [ "Algorithm", "Checksum", "Cholesky decomposition", "Error detection and correction", "Exception handling", "Experiment", "Fault tolerance", "Karl Hessenberg", "LAPACK", "QR decomposition", "ScaLAPACK", "Scalability", "Singular value decomposition" ], "id": "450f66cd38a37201759384b33493798d2a82b9f6", "inCitations": [ "7d9da3f5be055f36ac8294d44b688c356072c6cd", "02700e7e0cdc291e55af704530e181e0da668c1e" ], "journalName": "", "journalPages": "415-427", "journalVolume": "", "outCitations": [ "a2f99528a2dd954f38f6e0bd42b686c165f23403", "a5ff4253946266cc5300bc34d00b345fffc9fcb0", "f6430121b2af7d55b090a1c260570630e6cf1f41", "01d62cd850496455ce1616500f491690effa5c98", "be977870b0ac2ea1a9be9365ec29f40436467395", "8d63b44ea043fc3c2b0ec90b2ffbbf84ba446674", "79c0062e0eae09d6715054fe7fc46d4164443aba", "e8a566fe85f7187f14f0b345847207509a4c274c", "a19563b4014919c405964cea5271bebe918ad265", "7716c84e876af200211dc7344ef93c055f607e10", "07dde3b790082698bdee0507497853754f78f8d2", "0642df41e63e4f6223a6f4f9b9bb56c7dbebc34f", "47afcd5db3c0e34134ea52c2727fe305e3d0b7f1", "0dd9623584f0d80071631b9bf899817df2db2e37", "3abf71e837cb7b1e9fe7e54192d986142d87b1a2", "b295a2e3667b52b900950417c2e9b58b01938f34", "1c20521112e3bf937e756a28061ad4887f4ad720", "aa862560b2a98460de34024de9a8e04be0fe9299", "0ed15c747d5499fc63b011b6e252d9a8c6906229", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "18fe996c6f43a8f301cd842507045b679ba3506a", "627a32d5f5c0f848e8d6b522fa101f82f856d7b8", "99a592c9fdff5f26312a70913c38c4084d41c8af" ], "paperAbstract": "This paper presents an algorithm based fault tolerance method to harden three two-sided matrix factorizations against soft errors: reduction to Hessenberg form, tridiagonal form, and bidiagonal form. These two sided factorizations are usually the prerequisites to computing eigenvalues/eigenvectors and singular value decomposition. Algorithm based fault tolerance has been shown to work on three main one-sided matrix factorizations: LU, Cholesky, and QR, but extending it to cover two sided factorizations is non-trivial because there are no obvious \\textit{offline, problem} specific maintenance of checksums. We thus develop an \\textit{online, algorithm} specific checksum scheme and show how to systematically adapt the two sided factorization algorithms used in LAPACK and ScaLAPACK packages to introduce the algorithm based fault tolerance.\n The resulting ABFT scheme can detect and correct arithmetic errors \\textit{continuously} during the factorizations that allow timely error handling. Detailed analysis and experiments are conducted to show the cost and the gain in resilience. We demonstrate that our scheme covers a significant portion of the operations of the factorizations. Our checksum scheme achieves high error detection coverage and error correction coverage compared to the state of the art, with low overhead and high scalability.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018750" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/450f66cd38a37201759384b33493798d2a82b9f6", "sources": [ "DBLP" ], "title": "Silent Data Corruption Resilient Two-sided Matrix Factorizations", "venue": "PPOPP", "year": 2017 }, "452b7f1eb4899fb83d6bc21a180643c4433684bb": { "authors": [ { "ids": [ "2121270" ], "name": "Ali Jos\u00e9 Mashtizadeh" }, { "ids": [ "3355945" ], "name": "Tal Garfinkel" }, { "ids": [ "2480704" ], "name": "David Terei" }, { "ids": [ "2506126" ], "name": "David Mazi\u00e8res" }, { "ids": [ "8373823" ], "name": "Mendel Rosenblum" } ], "doi": "10.1145/3037697.3037751", "doiUrl": "https://doi.org/10.1145/3037697.3037751", "entities": [ "C++", "Castor", "Compiler", "Failure rate", "Fault tolerance", "FreeBSD", "Library (computing)", "Memcached", "Multi-core processor", "Server (computing)", "Software bug", "Throughput", "Web server" ], "id": "452b7f1eb4899fb83d6bc21a180643c4433684bb", "inCitations": [ "7b0d3331717729f0b03077575ad2798e69073736", "aaca858e5d071b7215cd9954371d5911745145b3", "76fe760bd7bb162331f51f6b3d53976a31f5695b", "4fe72f556bbc79d9e048b8c9bc480f0a0445fb4c", "3c3b8ee97b1f5082a0ea91bf2539607e52212e43", "5802c2ecb6e2449d9d6ddb3cac902f7cb10eaa10", "037070f1e362b008254f45467c861db0b7406b04", "08fbc539322d4da8fe14e63e92214539a66d9c81" ], "journalName": "", "journalPages": "693-708", "journalVolume": "", "outCitations": [ "1c7e1a0bde89990a9173664d3ff6931542741226", "54f3331b575b2d451c2d716f86496cada23d596d", "44474a05056efdcc7fbf75aa66227ac70e88748f", "0e6f25ca2e9dbcca8a630ac5924470aafa3fbcac", "05a618847e4f08e5bca29dff732757779722b2e0", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "5e51e70eb2e423988cf73262d9cb3adf72f5b6f1", "72657b0428f9b8f705546eb5a9147203a534d8f6", "4096f239b93dfee8fe033db2846a334db9c1f524", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "059476c845d38253efad824010ed15df14941d85", "13d4fa20983a6605fb7b13371a01bbafdbabf7d5", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "0821b7efb6a47783d8bf9a62291b24d94bbaaf31", "47b7f413e553f8534b584c51a7cc7903b98d3c48", "114801eccb5eb0831fd1848f351a138253a42f15", "2a85b683073c2c8b762079c52a0d54392b243afb", "ca4114da5e6885e907ccf094f2f469dd23f6c816", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "07ac72d6e957799195672b7a85b72f8200e2e122", "3d15e4bdb161f1bac2eea817ff9ef0eddba64141", "1066cce77abb53eea67bfcc1d2dee8e7f4e3ebcf", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "519404f3a71f5684c405ebbb218aa29fa2028379", "29d42e6d55bd74eaf1ff2e86778cc11eee6c8f4f", "1ec96c3938c037982cb75a40d5efd619f487911f", "63d2e311f16ac8f745bc44677ba13bfa5b67b5b8", "2fbbf89a921e4aa19ee3bfe73d0b34a6ad764656", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "1d54c70351e9ee93b87273b2e93750c89e32256f", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "0e578433d4e8bb2a571c87a2d22816074902f009", "92a8e3696a9c0b5d0b225133132db1f8c3c4bed5" ], "paperAbstract": "We present Castor, a record/replay system for multi-core applications that provides consistently low and predictable overheads. With Castor, developers can leave record and replay on by default, making it practical to record and reproduce production bugs, or employ fault tolerance to recover from hardware failures.\n Castor is inspired by several observations: First, an efficient mechanism for logging non-deterministic events is critical for recording demanding workloads with low overhead. Through careful use of hardware we were able to increase log throughput by 10x or more, e.g., we could record a server handling 10x more requests per second for the same record overhead. Second, most applications can be recorded without modifying source code by using the compiler to instrument language level sources of non-determinism, in conjunction with more familiar techniques like shared library interposition. Third, while Castor cannot deterministically replay all data races, this limitation is generally unimportant in practice, contrary to what prior work has assumed.\n Castor currently supports applications written in C, C++, and Go on FreeBSD. We have evaluated Castor on parallel and server workloads, including a commercial implementation of memcached in Go, which runs Castor in production.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037751", "http://www.mashtizadeh.com/papers/asplos17-castor.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/452b7f1eb4899fb83d6bc21a180643c4433684bb", "sources": [ "DBLP" ], "title": "Towards Practical Default-On Multi-Core Record/Replay", "venue": "ASPLOS", "year": 2017 }, "453cea3c3010ce40af9ffbf55d8d0c2cb56de468": { "authors": [ { "ids": [ "2649892" ], "name": "Gwangsun Kim" }, { "ids": [ "2866959" ], "name": "Niladrish Chatterjee" }, { "ids": [ "2341074" ], "name": "Mike O'Connor" }, { "ids": [ "33828705" ], "name": "Kevin Hsieh" } ], "doi": "10.1145/3126908.3126965", "doiUrl": "https://doi.org/10.1145/3126908.3126965", "entities": [ "Computation", "Dynamic random-access memory", "Graphics processing unit", "Locality of reference", "Memory bandwidth", "Memory management unit" ], "id": "453cea3c3010ce40af9ffbf55d8d0c2cb56de468", "inCitations": [ "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6" ], "journalName": "", "journalPages": "24:1-24:12", "journalVolume": "", "outCitations": [ "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "4308295a2eaef30be423520918ad224dc2f3ffe2", "0eff1cb1cc7af126b87b86c8c929fe5ff5106ea3", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "37e49c57dd4d0849380d177222db53e52ff21347", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "4678cdcf7e57c1563379ac7cc344254f01ace572", "1d413979868946143313f4119bac440eb2f6e6fb", "09ba565ec5dd3816968edaaee8351cf653e26d81", "217beeb53274ba6972d660afff1841e890f3721e", "97f37efade44dabfd25b467d594c843d56db875d", "7c6c7a97488fdbb7c06f85c345b348183bf0a704", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "2d6f002477015469075954c6748a1a85af352c94", "d9043a6c844905687ac72054d83d7680a82ece9d", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "03e3a481d9713ad4d39dc608959d87b3f8d8144e", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "179f80848143cf109fa6aebae6c3844da03b062c", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "4e8505919eb22265f107ebbeeee3fa78bf6d893a" ], "paperAbstract": "3D-stacked memory devices with processing logic can help alleviate the memory bandwidth bottleneck in GPUs. However, in order for such Near-Data Processing (NDP) memory stacks to be used for different GPU architectures, it is desirable to standardize the NDP architecture. Our proposal enables this standardization by allowing data to be spread across multiple memory stacks as is the norm in high-performance systems without an MMU on the NDP stack. The keys to this architecture are the ability to move data between memory stacks as required for computation, and a partitioned execution mechanism that offloads memory-intensive application segments onto the NDP stack and decouples address translation from DRAM accesses. By enhancing this system with a smart offload selection mechanism that is cognizant of the compute capability of the NDP and cache locality on the host processor, system performance and energy are improved by up to 66.8% and 37.6%, respectively.", "pdfUrls": [ "http://users.ece.cmu.edu/~tsuwangh/pub/toward-standardized-ndp-gpu_sc17.pdf", "http://doi.acm.org/10.1145/3126908.3126965", "http://www.cs.utah.edu/~nil/pubs/sc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/453cea3c3010ce40af9ffbf55d8d0c2cb56de468", "sources": [ "DBLP" ], "title": "Toward standardized near-data processing with unrestricted data placement for GPUs", "venue": "SC", "year": 2017 }, "456d3118c93d30fbafb076f0a9d0d614d2c847a2": { "authors": [ { "ids": [ "3356743" ], "name": "Alessandro Epasto" }, { "ids": [ "1728881" ], "name": "Vahab S. Mirrokni" }, { "ids": [ "1724391" ], "name": "Morteza Zadimoghaddam" } ], "doi": "10.1145/3087556.3087574", "doiUrl": "https://doi.org/10.1145/3087556.3087574", "entities": [ "Algorithm", "Computation", "Distributed algorithm", "Expectation\u2013maximization algorithm", "Mathematical optimization", "Optimization problem", "Submodular set function" ], "id": "456d3118c93d30fbafb076f0a9d0d614d2c847a2", "inCitations": [ "0199ff847bcfdfa7663da8532c8d8a1fad6cc8a5" ], "journalName": "", "journalPages": "25-33", "journalVolume": "", "outCitations": [ "157aef34d39c85d6576028f29df1ea4c6480a979", "3d25eb8241345f86101fda145d95d89c27844fd1", "379ef18377d803d87859314c0e110cdf64f2ea73", "52d9477a8293d44b0f8be5c07d56d468d035b0b0", "141e35263ab810983c90d47ad62eb4fab5e51717", "46b51960a073a759e1d55b41c75b6bb3e5273be8", "55b19c1f31992ad3b9e8ac7c6782dad7a3518125", "69b6a42ad7068962363687c038c6ae2e0760867a", "5866275c8455d5fa93d7915fb64875fec4029f7a", "02f34f9d891ec0561439008028a4059db52f3aac", "30e547dfab832ea0428b137d9e4824a22d8efd0b", "abbdb6177b4408c5885a569dc24e6361f91cf169", "30fc67dfcc25ab3ce1642cb3b4f114940414dee8", "a7ccb9bbdb4fc36a02e1290e7f50bb279dc6bc36", "1eb5fce431067ab19a44a7962dfef28ef7127ab4", "7e4cb3ca74b9e0d83cb53340d4ead2331cc8328c", "82afdb9f6b3441bbe4e8a5d6e1d0f5a647748d68", "594d2e123ecb8ec0bc781aec467007d65ab5464d", "6518035089d0c87b925c6262bbf5b949d3bb3fff", "0ddae0a1b2ade9f8f35895e98c6ec15e882282bb", "b9e43395663f74c581982e9ca97a0d7057a0008c", "047d078f6cfed4cc067643b5e15f4e8f65a97c0b", "01413e1fc981a8c041dc236dcee64790e2239a36", "54d2b5c64a67f65c5dd812b89e07973f97699552", "83a6cacc126d85c45605797406262677c256a6af" ], "paperAbstract": "We study the problem of efficiently optimizing submodular functions under cardinality constraints in distributed setting. Recently, several distributed algorithms for this problem have been introduced which either achieve a sub-optimal solution or they run in super-constant number of rounds of computation. Unlike previous work, we aim to design distributed algorithms in multiple rounds with almost optimal approximation guarantees at the cost of outputting a larger number of elements. Toward this goal, we present a distributed algorithm that, for any ε > 0 and any constant r, outputs a set S of O(rk1/r) items in r rounds, and achieves a (1-ε)-approximation of the value of the optimum set with k items. This is the first distributed algorithm that achieves an approximation factor of (1-ε) running in less than log 1/ε number of rounds. We also prove a hardness result showing that the output of any 1-ε approximation distributed algorithm limited to one distributed round should have at least Ω(k/ε) items. In light of this hardness result, our distributed algorithm in one round, r = 1, is asymptotically tight in terms of the output size. We support the theoretical guarantees with an extensive empirical study of our algorithm showing that achieving almost optimum solutions is indeed possible in a few rounds for large-scale real datasets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087574" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/456d3118c93d30fbafb076f0a9d0d614d2c847a2", "sources": [ "DBLP" ], "title": "Bicriteria Distributed Submodular Maximization in a Few Rounds", "venue": "SPAA", "year": 2017 }, "4576d8d29b76bf1733f1aa1cf7cbca79fe71aef6": { "authors": [ { "ids": [ "34313805" ], "name": "Noah Watkins" }, { "ids": [ "36608964" ], "name": "Michael Sevilla" }, { "ids": [ "2210623" ], "name": "Ivo Jimenez" }, { "ids": [ "36006864" ], "name": "Kathryn Dahlgren" }, { "ids": [ "3064226" ], "name": "Peter Alvaro" }, { "ids": [ "38470159" ], "name": "Shel Finkelstein" }, { "ids": [ "3198700" ], "name": "Carlos Maltzahn" } ], "doi": "", "doiUrl": "", "entities": [], "id": "4576d8d29b76bf1733f1aa1cf7cbca79fe71aef6", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "29845cab369ca85cb2c21d756f96123f01d38e7c", "09a180d9d410d8e551f42401d6453d57406b6d29", "0f55217987ec25afa0f815e0aa3957e669b0280e", "458902c0a4b5e9855c8a4be9eeb4cb4ce534b068", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "914f287d6e83ac8e525d4c0e643cee6a1dce6fb4", "d755e4504c15c46a0306d043d091b0a49e71c353", "38e154a13a91c683b209f115f4b8b70037ac7a52", "4d242926067c489fb1113e39d76db551b861b0ed", "130ee77295f5f95e9c8a45c9c56bbc650258dba0", "24679ccb0586642553a21e9fcd8aa5a57f97cabe", "2da4ab6c02d97fe47b589ddd450a5c41f2b47bb9", "1c0692596a5fa4baa50418a43e84b89872439092", "be69c449c3eb18301decc3ecc097ce739e0549a7", "d58cc242fd70227cff98376a914e0b42b1b79db8", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "69f10e0a399b5a03645b8d56d67f7ff5f156ab55", "6cbcd4239345787caff1884bf8029acfac87354f" ], "paperAbstract": "", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-watkins.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_watkins.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/watkins", "https://cross.ucsc.edu/wp-content/uploads/2017/09/DeclStore-Layering-is-for-the-Faint-of-Heart.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4576/d8d29b76bf1733f1aa1cf7cbca79fe71aef6.pdf", "s2Url": "https://semanticscholar.org/paper/4576d8d29b76bf1733f1aa1cf7cbca79fe71aef6", "sources": [ "DBLP" ], "title": "DeclStore: Layering Is for the Faint of Heart", "venue": "HotStorage", "year": 2017 }, "45857188467abec8672aca3a4735b13a8f3d1990": { "authors": [ { "ids": [ "12212141" ], "name": "MohammadReza HoseinyFarahabady" }, { "ids": [ "2383231" ], "name": "Javid Taheri" }, { "ids": [ "1699399" ], "name": "Zahir Tari" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" } ], "doi": "10.1109/ICPP.2017.42", "doiUrl": "https://doi.org/10.1109/ICPP.2017.42", "entities": [ "Best-effort delivery", "Bin packing problem", "Database", "Enterprise software", "Graph database", "Heuristic", "In-memory database", "Lambda architecture", "Online analytical processing", "Performance Evaluation", "Programming paradigm", "Quality of service", "Scalability", "Service-oriented architecture", "Spatial variability", "Stream (computing)" ], "id": "45857188467abec8672aca3a4735b13a8f3d1990", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "332-341", "journalVolume": "", "outCitations": [ "12203385fbe8e26aefa1d82c9effaacb44f27a98", "c6e9c32ebecf55ad4a3373d6111ab90705ed9b40", "808585a76d350dbe567c35b74086948cdd95cad4", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "3e19046c665867bbe557685da60738a40738010a", "7a978f2902460e732c50c36a171deb11733df1fc", "2fe662e2c777f676cea85e1745b3134a2a780a3c", "203dfcfffbb069f7d13fd4e7c1aafa9237817aaa", "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "fcd61f5508b15f7369258ae0d0db01eacf399f7b", "bf82f0b0cf448b18fec979d25368c6cd9c04ce0c", "a205ec6ef5dba0ab862cb4d127737104aae5a476", "28b9804c3fac26b5e61dc570a175a590b3162193", "0fc6c9dcd7a850e7a0d3796d32e6771353154fd9", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "1833dee660500dd104ca84d99600b70c2479ba3c" ], "paperAbstract": "Lambda architecture is a novel event-driven serverless paradigm that allows companies to build scalable and reliable enterprise applications. As an attractive alternative to traditional service oriented architecture (SOA), Lambda architecture can be used in many use cases including BI tools, in-memory graph databases, OLAP, and streaming data processing. In practice, an important aim of Lambda's service providers is devising an efficient way to co-locate multiple Lambda functions with different attributes into a set of available computing resources. However, previous studies showed that consolidated workloads can compete fiercely for shared resources, resulting in severe performance variability/degradation. This paper proposes a resource allocation mechanism for a Lambda platform based on the model predictive control framework. Performance evaluation is carried out by comparing the proposed solution with multiple resource allocation heuristics, namely enhanced versions of spread and binpack, and best-effort approaches. Results confirm that the proposed controller increases the overall resource utilization by 37% on average and achieves a significant improvement in preventing QoS violation incidents compared to others.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.42" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/45857188467abec8672aca3a4735b13a8f3d1990", "sources": [ "DBLP" ], "title": "A Dynamic Resource Controller for a Lambda Architecture", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "458b90fbaa4437ff081af62d86fb6fb094dc3034": { "authors": [ { "ids": [ "1689115" ], "name": "Tao Zhang" }, { "ids": [ "2764129" ], "name": "Aviad Zuck" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" }, { "ids": [ "3188958" ], "name": "Dan Tsafrir" } ], "doi": "10.1145/3102980.3102988", "doiUrl": "https://doi.org/10.1145/3102980.3102988", "entities": [ "Experiment", "Mobile device", "Operability", "Smartphone" ], "id": "458b90fbaa4437ff081af62d86fb6fb094dc3034", "inCitations": [], "journalName": "", "journalPages": "42-49", "journalVolume": "", "outCitations": [ "92bc53a3a28a2cc02e02d959c439c80fce1846f1", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "36352c1efb3fea13912e5bf3afd00cb2418d948d", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "1b9fae8255fda28e4adeb96a36f8e907e8aac6f9", "a8432f8160dc899e66976b9887efa1d4a544cd56", "acdac4631219ff22f1bfb3438645b38531e85ae6", "04b58d40b8a789d354c93a12a4fc418793b827fc", "0c42f33ac9f6a3e874db9e5c3564d6c17f99a464", "1820a34042d6371a9e20484b0c63b698eb522a6c", "1a8c7439080c2e5d42bf173c4db084713e5f05b7", "615b2cab6372a8ec6715e081e6ed0c40f8c8fef1", "3fdaf6ce655d7b0208314d9812ef67cb3ebe5077", "176ef2564eeca2d4ad6d83dd5db5b6b5a04f70f4", "169a08383bcb0577e6b7d4d1445359383fe07fc7", "377084771036256909a7032b80aeeca8c268ce67", "0eb006817f850971e48eb1763bdda1adf10f7244", "0fc3069343aebb2692fb05e9a982b5e226e4958c", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "d36e9fbd0f6c34e60292534ee1b0d43575128f23", "0be943b44a03a16b544b21ea1b578b1816705831", "3dd94ad58e9e72b1bec99fa4c9e9c7c1ee1d1289", "013e7549a51257a29044c2e909881ec639b39d42", "010bf8e639dbdee2c31a58ca9b65e89aeac11315", "0dce301ca2375068184e8888ea7777b5e3c1d4a7", "04d5571cb68ae7877b5edb2f754dc59fd5e5a9d2", "d6238df523ea7f6bdede96a9cda2a319de6ef076", "08866cb45cafc2452ce20044759c6ec920a355e7", "66fb412a9481d0ef7582aec85241633cbba017c8", "35b26770e5adad61ef972ada4882c9bf17224397", "12ef153d9c7ccc374d56acf34b59fb2eaec6f755", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "9d40ff60854d7a6adc6e6d607fc6c24dd79a5ce4", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "520017c26e102db1784ee35a4e3e7c70f538190d", "356955d0f190829b7481b8dc39c5f90dfac1b652", "1e838c54fc0cbc22978cd1ee1a7d884f3737a9a2", "a243e42384694885358be0408aa29257bc389db0", "91912a461d30035639ddda2b6de97a388823fb4b", "87b99f3e8632e5915717d184bc8309ac9debfe03", "703dbb18e5c24dc546c66679deb677c66fd7b88d", "70ce10f47aafa0994627a9575565b5c98af58d98", "34f6cc5a0d7656a5c1d92049eda0533ca7c07add", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf" ], "paperAbstract": "When flash was introduced, wear-out was a known problem. Over time, a number of techniques have been developed to estimate the expected number of program/erase cycles under typical usage patterns, and sufficiently over-provision the cells such that the device meets its expected lifespan, even if individual cells fail. This paper started as a simple experiment: measuring whether the lifespan of flash devices in smartphones and other mobile devices, match the estimates. To our surprise, we find that, in a matter of days, simple, unprivileged applications can render the drive of several smartphones (and thus, the phone) inoperable. This result is concerning, as it means that installing malicious or poorly-written software could destroy the device itself. We experimentally demonstrate the problem, discuss reasons why it occurs, and consider potential solutions.", "pdfUrls": [ "http://cs.unc.edu/~porter/pubs/hotos17-final29.pdf", "http://doi.acm.org/10.1145/3102980.3102988", "http://www.cs.technion.ac.il/~dan/papers/fbrick-hotos-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/458b90fbaa4437ff081af62d86fb6fb094dc3034", "sources": [ "DBLP" ], "title": "Flash Drive Lifespan *is* a Problem", "venue": "HotOS", "year": 2017 }, "459c33ccdeb78baff36c2d4a3d1b5b0b41b08b60": { "authors": [ { "ids": [ "8562471" ], "name": "Elias Stehle" }, { "ids": [ "1738552" ], "name": "Hans-Arno Jacobsen" } ], "doi": "10.1145/3035918.3064043", "doiUrl": "https://doi.org/10.1145/3035918.3064043", "entities": [ "32-bit", "Algorithm", "Attribute\u2013value pair", "Byte", "Central processing unit", "End-to-end encryption", "Endeavour (supercomputer)", "Gigabyte", "Graphics processing unit", "Memory bandwidth", "PCI Express", "Radix sort", "Sorting", "Sorting algorithm" ], "id": "459c33ccdeb78baff36c2d4a3d1b5b0b41b08b60", "inCitations": [], "journalName": "", "journalPages": "417-432", "journalVolume": "", "outCitations": [ "aea592e9886c28a546ac3c74d3b2cf262836acc7", "c06dbf2b7ff03f422b89dcd9c28a44c279099c8f", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "0d75e43d77976353dc8fae02945d69c651604b88", "a03d4b0cad5de84c025bc341945fd28da0582569", "9c5882ea02390e3ca93d04aeeb4ec440ae17ff50", "223353fed7921739e91e3757d5c0cbca14d45d32", "1898098b68d4e664f0ebf9faec772c9a8ed4f946", "4b7f05a35378a0b17a0f9af3180d43cf7970aa15", "377571a4e7153f2607619ccab6ae32534f638b03", "c5c16c6a974213168d589b647ae1858633a5fbe0", "8572f800eeaae01b7faf7be62e041e3d08ea83ec", "5a81b638c966141b12d47565eeecbfadc0e16fd1", "60e7f7f9367e952f53b8545ef441886a84e3ff58", "4139eedda8717ffd60052f68ed78b996aaebfced", "2dbd58aa9b36388c92d3ccb5bd1bc387ea712a30", "18cce5c91c9bc01b7e432980beaab00511af74e0", "36305430c70e3b0d3409e47cf71d1e844e163122", "05d372b38bb05c96e7575a9f48fe5e292fa34e0e", "53d1657eef932911c95ed051961c8136d34ba486", "07c1bb1c7169d1d6c8b6729be265d73a5fd6cc64", "04abc8ba1238b7eb29c52e3677a55b5079e83b39", "f465e873cb9d9e5cd74cc759c2b015da06385a86", "32d355a7a20f92ccda0608f83d7456870231c570", "8900d8141d4c81ec9aaac0f97399fed1e36827b2", "138887a7f5c6c9614ab876de1d42c9a85462c5c8", "b2bbe784fe5a691beae60a9710ff66fcda819dab", "580835ee30534043f53b1cb03fe1f27ce85bcdcf", "08639cd6b89ac8f375cdc1076b9485ac9d657083", "2e166453a92457400c1127704f9e57c2f14634d2", "0e2993ddba78626376651c3ab8d14f0d680f0595" ], "paperAbstract": "Sorting is at the core of many database operations, such as index creation, sort-merge joins, and user-requested output sorting. As GPUs are emerging as a promising platform to accelerate various operations, sorting on GPUs becomes a viable endeavour. Over the past few years, several improvements have been proposed for sorting on GPUs, leading to the first radix sort implementations that achieve a sorting rate of over one billion 32-bit keys per second. Yet, state-of-the-art approaches are heavily memory bandwidth-bound, as they require substantially more memory transfers than their CPU-based counterparts. Our work proposes a novel approach that almost halves the amount of memory transfers and, therefore, considerably lifts the memory bandwidth limitation. Being able to sort two gigabytes of eight-byte records in as little as 50 milliseconds, our approach achieves a 2.32-fold improvement over the state-of-the-art GPU-based radix sort for uniform distributions, sustaining a minimum speed-up of no less than a factor of 1.66 for skewed distributions. To address inputs that either do not reside on the GPU or exceed the available device memory, we build on our efficient GPU sorting approach with a pipelined heterogeneous sorting algorithm that mitigates the overhead associated with PCIe data transfers. Comparing the end-to-end sorting performance to the state-of-the-art CPU-based radix sort running 16 threads, our heterogeneous approach achieves a 2.06-fold and a 1.53-fold improvement for sorting 64 GB key-value pairs with a skewed and a uniform distribution, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064043", "https://arxiv.org/pdf/1611.01137v1.pdf", "https://arxiv.org/pdf/1611.01137v2.pdf", "http://arxiv.org/abs/1611.01137", "https://arxiv.org/pdf/1611.01137.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/459c33ccdeb78baff36c2d4a3d1b5b0b41b08b60", "sources": [ "DBLP" ], "title": "A Memory Bandwidth-Efficient Hybrid Radix Sort on GPUs", "venue": "SIGMOD Conference", "year": 2017 }, "45b63f73b1512b8181f8243129951a8538773060": { "authors": [ { "ids": [ "2086958" ], "name": "Zachary Estrada" }, { "ids": [ "2493219" ], "name": "Read Sprabery" }, { "ids": [ "2805337" ], "name": "Lok Yan" }, { "ids": [ "10385489" ], "name": "Zhongzhi Yu" }, { "ids": [ "1687256" ], "name": "Roy H. Campbell" }, { "ids": [ "1687748" ], "name": "Zbigniew T. Kalbarczyk" }, { "ids": [ "1686653" ], "name": "Ravishankar K. Iyer" } ], "doi": "10.1145/3050748.3050759", "doiUrl": "https://doi.org/10.1145/3050748.3050759", "entities": [ "Cloud computing", "Hypervisor", "Keystroke logging", "Operating system", "Virtual machine", "z/VM" ], "id": "45b63f73b1512b8181f8243129951a8538773060", "inCitations": [], "journalName": "", "journalPages": "157-170", "journalVolume": "", "outCitations": [ "c25a66d96e796b5cc013607153ff2a0e139c93ad", "3574657705475722b6c398c266805f758268778b", "009af3a1fa932ea1a9efa8d34cb0b6e32feae15e", "0a22675c7bd5729b06816e3788f7b906d029ca03", "5752a746cd143d30b22d837e1077fa9c971860fb", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "97cdcc50199a9c1f7f47deffee3fe869fd968220", "42286822d70bdd531abf8aea29e7a73086e949d5", "2960c89331eb7afa86584792e2e11dbf6a125820", "9df577f0e884db39bff445b58757e6a3f42ca1b5", "883f3778fcb560c555f7ea7a64fa061e70e7f256", "82d236d0df331988c7482228d63a560316149bae", "93e6deb8ac31807df341728a469984add00aed27", "4162ec72047f0af323e11494ed3a6ecfe6beffe1", "76b73a657ef1cb543790acc99fc8abc80dbe4fc7", "bebe4a03f445e8460fd7664b92ce30f21895bf7a", "0edd896bc82b7fb65ef63cb1e3512db795c7f7d4", "1d811442b124056060f8ba236e09376b26dacede", "4ac3a866bc1ce82a67f2cf00dca9ec1349598c07", "3fcc6e3eaa94aec4612b7225b167ac003a370e20", "3f8c6ecb15bbdf667ec6bbe1b132db1945110976", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "2723d4dc7884d1237b315edc39f9fe345885d8ea", "2c628dedb6a1fb0c566ec791c84b93a22dd9aaa9", "88b6a71ad0f166769c3c51cbe802c972d524a78f", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "86013daaae16572bceb755e65ee5fa2fdfb63848", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d" ], "paperAbstract": "This paper extends the concepts behind cloud services to offer hypervisor-based reliability and security monitors for cloud virtual machines. Cloud VMs can be heterogeneous and as such guest OS parameters needed for monitoring can vary across different VMs and must be obtained in some way. Past work involves running code inside the VM, which is unacceptable for a cloud environment. We solve this problem by recognizing that there are common OS design patterns that can be used to infer monitoring parameters from the guest OS. We extract information about the cloud user's guest OS with the user's existing VM image and knowledge of OS design patterns as the only inputs to analysis. To demonstrate the range of monitoring functionality possible with this technique, we implemented four sample monitors: a guest OS process tracer, an OS hang detector, a return-to-user attack detector, and a process-based keylogger detector.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050759", "http://assured-cloud-computing.illinois.edu/files/2014/03/Using-OS-Design-Patterns-to-Provide-Reliability-and-Security-as-a-Services-for-VM-based-Clouds.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/45b63f73b1512b8181f8243129951a8538773060", "sources": [ "DBLP" ], "title": "Using OS Design Patterns to Provide Reliability and Security as-a-Service for VM-based Clouds", "venue": "VEE", "year": 2017 }, "45bbacc22f5913c488042b750e56fcaa2a7e31cf": { "authors": [ { "ids": [ "3175807" ], "name": "Jia Chen" }, { "ids": [ "2487014" ], "name": "Yu Feng" }, { "ids": [ "1714075" ], "name": "Isil Dillig" } ], "doi": "10.1145/3133956.3134058", "doiUrl": "https://doi.org/10.1145/3133956.3134058", "entities": [ "Automated reasoning", "Cartesian tree", "End-to-end principle", "Hoare logic", "Interference (communication)", "Java", "Non-interference (security)", "Relaxation (approximation)", "Scalability", "Side-channel attack", "Static program analysis", "Taint checking", "Verification and validation" ], "id": "45bbacc22f5913c488042b750e56fcaa2a7e31cf", "inCitations": [ "742baee0d51de6c2a5a7f443d41c63465a32331a" ], "journalName": "", "journalPages": "875-890", "journalVolume": "", "outCitations": [ "0c0b7a24e5e0cf93a966708f26d31f0a74aabeba", "33e5f1f64122e5d9e4f0e40e2632cfaa143378c3", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "1058c7d52327d8ca379b7ede43d9d823688fb139", "1254c9428aa7c59f7e6acfc26cde6bb5c05241f9", "2afec7f7defb45e3b238bcd556ba6c399c401fe9", "b6f5251a67c5cf8539c0213c387a583cdcefd493", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "4b8f1518b21b73d30cedf31560a83a8322f8147d", "9a955bd87e0d30662efe069a47ecdb1b5966ff07", "1b31c65d8b5023dabcdd18fd57241488834c7206", "db3b449afe1e02e9d7b995d1f565db3b5c1e7ccc", "226242629f3d21b9e86afe76b1849048148351de", "187467e789413f7dbaf9c66efebcffeeec0b9923", "71fd9557fe1719eaed68aaa71127ffe4252a0989", "182a81eaf31b1a76be592c0890182cacd4199be0", "43de5136309e262007d3f14893959af69749caf8", "88298d2be1ded162d3a3a3931a9aaba2e191a245", "560449db771b678e5d36ae50f28f2e759e67e774", "3c1f11a1da88c8237842a246ed1a5dbe230737be", "3d8775945f7c62b2bca55b7097fde9427b0363bd", "0fe8395b85e1529e3863631376b27c92970fb544", "2065450d96aca38c79cad5172b58660765533650", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "5503686edf7ab29785c51a7c4b10e9dbbf80c140", "11e8ad8d5302e8149cc787de5778b52e7e976ca8", "0017cc232d7d45d5b915e1360a71f9e877d84cd5", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "96e28d82e42fffa1c29ddb443d74986b8cf6d608", "7c555dfba844337d9ed1d56c231d99448069d83e", "c65ce591b42b816279e741bb9612b832d21288e0", "7047874fc68b902e30fd1a38c19b99f7e5824bb8", "43c11eae3ceb570ef627e502a3f041f0cf9a0c06", "419b718b87d216820ee2f6ef076d9889f4aac6ee", "0e59ef54ec8d715f285db56d45dbfd26c3c21702", "07f627e080722b1b314baa79441aa5f8914fb030", "09d877e92aaab3e7a41a14d30ab28e943dcb4034", "0025870ef15a8f2858ff4186329d4bde316e9e01", "3671af9d7655977e573bd123f93470f978ea7a62", "55520e5f45a94a332097b750afd730ef57dd9168", "59684cf4f60456f5eea2991a0d7f90095f37a657", "b872abe565d270c4822430d394c4f3e983fdec66", "70f340e80468832b7a293da8a4f1d08ed2786448", "304be7b2c9ce2263adb011e46484293aeae13f58", "1753d3e97fdbe7799b9625cb873b77eef506a608", "1c82c9e0e480f1aef3427c74450c436cbb234a72", "746a78f8c0dabc20d161244923063c4b689b1010", "20f3fcd714230fbcb88661ba0f623d9e6217a717", "326bb49d3ae9e1e1551028200916192e50004105", "02b72a79f17d7d86bb7b1d1e8ff8f659ca2bb1f0", "a5ade56a2f37f3f5f5b956b0c5546de9a3428537", "0b84fb0ec9739e04f9b0fcbe040718d9f735200f", "9837a70c231c0ef3d33c2c9f5b56afd40548acce", "03b1932785190d0fce2e3fc0384b7bd6f5efbc5c", "2c6533d714d8dc4d3f7faf418db93c38df642fea", "3811b03c4ebbd93dfc602eef422fc8237fda8654", "04402122e2fb065ed1280000981f7626496f0afb", "21ef1edaac7cd43a172806f14e8c5b02bfbe2f5e", "615168555150d80752a1c195229642acbe6fb3d9", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "32e16ae384e03d76b74be2e04fcf5ac5007fc155", "045bbbea384e9d54be38dd207bf237d5208ea599", "48a0fb31fbc7440bd0d92d4f9a5378e09018e20f", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "2b6df21137f30d25494bb58521a6062f93e915f8", "c8f6a8f081f49325eb97600eca05620887092d2c", "02fd1a072a72d24c5f61d709a1b3ce863da32729", "2effd77c019d1f574109d05fee8b1e27c9429c79" ], "paperAbstract": "This paper presents Themis, an end-to-end static analysis tool for finding resource-usage side-channel vulnerabilities in Java applications. We introduce the notion of epsilon-bounded non-interference, a variant and relaxation of Goguen and Meseguer's well-known non-interference principle. We then present Quantitative Cartesian Hoare Logic (QCHL), a program logic for verifying epsilon-bounded non-interference. Our tool, Themis, combines automated reasoning in CHL with lightweight static taint analysis to improve scalability. We evaluate Themis on well known Java applications and demonstrate that Themis can find unknown side-channel vulnerabilities in widely-used programs. We also show that Themis can verify the absence of vulnerabilities in repaired versions of vulnerable programs and that Themis compares favorably against Blazer, a state-of-the-art static analysis tool for finding timing side channels in Java applications.", "pdfUrls": [ "http://www.cs.utexas.edu/~isil/ccs17.pdf", "http://doi.acm.org/10.1145/3133956.3134058", "http://www.cs.utexas.edu/~yufeng/papers/ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/45bbacc22f5913c488042b750e56fcaa2a7e31cf", "sources": [ "DBLP" ], "title": "Precise Detection of Side-Channel Vulnerabilities using Quantitative Cartesian Hoare Logic", "venue": "CCS", "year": 2017 }, "45d2e6c24c77a66ad5b55fada65e1fa9fe18661c": { "authors": [ { "ids": [ "2677960" ], "name": "Qingkai Liang" }, { "ids": [ "1731931" ], "name": "Sem C. Borst" } ], "doi": "10.1109/MASCOTS.2017.33", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.33", "entities": [ "Data center", "Experiment", "Hash function", "Join (SQL)", "Load balancing (computing)", "Scalability", "Server (computing)", "Simulation", "Static hashing" ], "id": "45d2e6c24c77a66ad5b55fada65e1fa9fe18661c", "inCitations": [ "df5a4934ddd836ec9c34d0e6214a2ad89063f014" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "63-72", "journalVolume": "", "outCitations": [ "1d1745927b527a7e20877e174efd963402fd7da1", "f455a190a21dea6cc72581c57382795814eaaea6", "dedc84ff2eab9e799ad046272a48e85cad3aad9e", "2f9aea036407868166e41a73bdbc89950663668f", "5ee3dc5f9343e41d10a092522c05072fe61b2708", "ad73deea37cad9a9b945d929a86d82d781450345", "5b999d36d5230eca01532b357c7cf338a5e0d641", "234e6be0d4238f76b3ac038ee422be39f391c625", "ad3c368c9e40fd4c81f443055833062cea46fdcd", "88203ee6a847f506d4c07b5447a6bf5870f54843", "28c35994ad743fb284bc7410c59fd231e3ce2d77" ], "paperAbstract": "Most load balancing techniques implemented in current data centers tend to rely on a mapping from packets to server IP addresses through a hash value calculated from the flow five-tuple. The hash calculation allows extremely fast packet forwarding and provides flow `stickiness', meaning that all packets belonging to the same flow get dispatched to the same server. Unfortunately, such static hashing may not yield an optimal degree of load balancing, e.g. due to variations in server processing speeds or traffic patterns. On the other hand, dynamic schemes, such as the Join-the-Shortest-Queue (JSQ) scheme, provide a natural way to mitigate load imbalances, but at the expense of stickiness violation.In the present paper we examine the fundamental trade-off between stickiness violation and packet-level latency performance in large-scale data centers. We establish that stringent flow stickiness carries a significant performance penalty in terms of packet-level delay. Moreover, relaxing the stickiness requirement by a minuscule amount is highly effective in clipping the tail of the latency distribution. We further propose a bin-based load balancing scheme that achieves a good balance among scalability, stickiness violation and packet-level delay performance. Extensive simulation experiments corroborate the analytical results and validate the effectiveness of the bin-based load balancing scheme.", "pdfUrls": [ "http://arxiv.org/abs/1703.10575", "https://arxiv.org/pdf/1703.10575v3.pdf", "https://arxiv.org/pdf/1703.10575v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.33", "https://arxiv.org/pdf/1703.10575v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/45d2e6c24c77a66ad5b55fada65e1fa9fe18661c", "sources": [ "DBLP" ], "title": "Delay Versus Stickiness Violation Trade-Offs for Load Balancing in Large-Scale Data Centers", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "460464955cee59f610c94c9360cad879edb5d880": { "authors": [ { "ids": [ "4043890" ], "name": "Teng Wei" }, { "ids": [ "2786986" ], "name": "Anfu Zhou" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" } ], "doi": "", "doiUrl": "", "entities": [ "Ambient network", "Experiment", "Mi-Case", "Network performance", "Ray tracing (graphics)", "Testbed", "Transmitter", "Wireless access point" ], "id": "460464955cee59f610c94c9360cad879edb5d880", "inCitations": [ "9905d9e816d7106bed6496eb8a3ad90947342afe", "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "73f615dc7f8162998016e7d990872087040afd96", "e785649d3e19bc31d6fa19319e0f8e60928459a4", "91ef11f547c6641cdfb634670137cb621b90b22f", "9823f8c8c43b64cc6c0c7fd09e9380d908122148", "4cbbcb448dfb96f83e05af0ba85dc0e8b47e7fb8" ], "journalName": "", "journalPages": "213-226", "journalVolume": "", "outCitations": [ "8ee2e4cdc964b0dad050112ddd28f20c6827fa5e", "27644a68d3a0dd999b040ec47f08560bbce71773", "2fca6867babd34da9f04c26d20c4915e4bcfe8cb", "4e1e0e545ba6086c5d3c1b26de5e1d1150fe4dd1", "831ed2a5f40861866b4ebfe60257b997701e38e2", "46614df4d37cac7d647aec72a8bf6279fcdf1bfc", "e5edfbdf645a3dbcdaf7d9fcbf350c67fbbadae5", "2448ba93c7993a0ab7a6c45a96640d34d298fbd0", "85fdc9788c9353a95831939c3954e181e92616fe", "e3b29456e4a3fa24217f030b585719f9b6748918", "671ec6b2b7997f61192a184632203b525c9bee23", "05fe031e53dd8990e7076a91277cb2b74e22b811", "048050777395f86219216960e8eadab6ebd476eb", "4b8b4e8506ddb07390213c277e832a309224a325", "6f07a10dfbd583fdda034c7d606e53148f162f2d", "08e113957de42813c5b90b3f9a8a0d1cef667f61", "1bc04cbbce54bc027b6147eb0a49189a2691a35c", "8317f40c569af2b5bb0aefbb6b07d6a991c1204e", "f87aa8b1087f11606cff27da4d60852d6671993a", "ce3813f215293e0f8ba79b2f8c88cbe3ec15cfb1", "3789f0b79c16e8baa7e4fdf0dc90d0920a611299", "cba5934149976560bbbe589756ed371730386ea3", "600376bb397c5f3d8d33856b3a6db314b6bda67d", "0d8dba43dfe0d165804d9fa0098ed0ada6a9c402", "30f67b7275cec21a94be945dfe4beff08c7e004a", "47439480b8a13ea60aeef644f2f4aac0b3329a6f", "666c44973030409a1b89d933b1461ded5b656269", "09c8f664a5e1a0c8ee51dbb87b2d977ede4875a6", "06e8e428d6c1e36575657c6c4aeda65e4930ef4b", "58392cd42505bf2bc0675610188f6465bc20fd6f", "6f7a8ba88be527dcff7496e2f72681b6de10e03c", "4c5d4150aedca478e69eda5a20270a485f51d48d", "0cd493a73a827f6be241239017b3eaa2d995d2a2", "671ef43e50af2bd00cb91b4aad6815c1b95083dc", "fc32f882e0ed37ae786ac9a2063418d92f5b52d5", "a115ef0244b2add4f528bbc994c2a779a5a9a185", "552aedbe4645afa87a5f54805e144e05057696da", "02d843e3a008e76cf6a4c23bd01023d264b05686", "ce919f7990812f2b73de5a13cb9f1d3b12f31ac9", "1943466070019e48204ebbee0914d87ced4ba09a", "f3784fa2ab5085b2cfcb19c1ca2f9ae1220a083e", "cc1c0876c066b2e11bad73c71c27632872a68ae0", "a9bd9cf0e5ffe1bf44ce6742eb5bd1bb85c5c475", "0f3230715c53618e531d66befce53159e730d374" ], "paperAbstract": "60 GHz millimeter-wave networks represent the next frontier in high-speed wireless access technologies. Due to the use of highly directional and electronically steerable beams, the performance of 60 GHz networks becomes a sensitive function of environment structure and reflectivity, which cannot be handled by existing networking paradigms. In this paper, we propose E-Mi, a framework that harnesses 60 GHz radios\u2019 sensing capabilities to boost network performance. E-Mi uses a single pair of 60 GHz transmitter and receiver to sense the environment. It can resolve all dominant reflection paths between the two nodes, from which it reconstructs a coarse outline of major reflectors in the environment. It then feeds the reflector information into a ray-tracer to predict the channel and network performance of arbitrarily located links. Our experiments on a custom-built 60 GHz testbed verify that E-Mi can accurately sense a given environment, and predict the channel quality of different links with 2.8 dB median error. The prediction is then used to optimize the deployment of 60 GHz access points, with 2.2\u00d7 to 4.5\u00d7 capacity gain over empirical approaches.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_wei.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-wei-teng.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/wei-teng", "http://xyzhang.ucsd.edu/papers/TWei_NSDI17_EMi.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-wei-teng.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_wei.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9c8d/1035d701fa169579e3d83f8d6408757663f7.pdf", "s2Url": "https://semanticscholar.org/paper/460464955cee59f610c94c9360cad879edb5d880", "sources": [ "DBLP" ], "title": "Facilitating Robust 60 GHz Network Deployment By Sensing Ambient Reflectors", "venue": "NSDI", "year": 2017 }, "460755d456f84628265593d2f2e6a1ef637b31e1": { "authors": [ { "ids": [ "3103158" ], "name": "Andreas Prodromou" }, { "ids": [ "2543676" ], "name": "Mitesh R. Meswani" }, { "ids": [ "2012110" ], "name": "Nuwan Jayasena" }, { "ids": [ "3308405" ], "name": "Gabriel H. Loh" }, { "ids": [ "1740142" ], "name": "Dean M. Tullsen" } ], "doi": "10.1109/HPCA.2017.39", "doiUrl": "https://doi.org/10.1109/HPCA.2017.39", "entities": [ "Access time", "Address space", "Algorithm", "Big data", "CAS latency", "Computer data storage", "Dynamic random-access memory", "Flat memory model", "Memory management", "Requirement", "Scalability" ], "id": "460755d456f84628265593d2f2e6a1ef637b31e1", "inCitations": [ "92229ef2d0bfdcba2fdf2bf265ae6d37d0b34e9f", "88824f4400bf03caed2f99879e68f3543b214c92", "24c0c34675eb35e300244c6ff682155a34a2e3d5" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "433-444", "journalVolume": "", "outCitations": [ "22b4811bb8265e84d53c62a842cac10dda15f6af", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "0a934c1fa360491bebaa6fb4d0348179b9713b2d", "3b621e9a6b99f32caa518116cb400035d1deed29", "746ce1f84401105286e0fc1adc18c3092fde50d4", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "48a7323c4894de3afb90ef2135160205ebb55011", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "257a47ec2982405a903eb8536a7321de528b149d", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "1c32ad0a42109fab826eb3054df7cfc33b424125", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "18633256bb17ba0744518479c0752ca87f0d03c6", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "40952ef7fe2d22daec75a6ab7e0fe030ce447e0a" ], "paperAbstract": "In the near future, die-stacked DRAM will be increasingly present in conjunction with off-chip memories in hybrid memory systems. Research on this subject revolves around using the stacked memory as a cache or as part of a flat address space. This paper proposes MemPod, a scalable and efficient memory management mechanism for flat address space hybrid memories. MemPod monitors memory activity and periodically migrates the most frequently accessed memory pages to the faster on-chip memory. MemPod's partitioned architectural organization allows for efficientscaling with memory system capabilities. Further, a big data analytics algorithm is adapted to develop an efficient, low-cost activity tracking technique. MemPod improves the average main memory access time of multi-programmed workloads, by up to 29% (9% on average) compared to the state of the art, and that will increase as the differential between memory speeds widens. MemPod's novel activity tracking approach leads to significant cost reduction (12800x lower storage space requirements) and improved future prediction accuracy over prior work which maintains a separatecounter per page.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.39", "http://cseweb.ucsd.edu/~tullsen/mempod.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/460755d456f84628265593d2f2e6a1ef637b31e1", "sources": [ "DBLP" ], "title": "MemPod: A Clustered Architecture for Efficient and Scalable Migration in Flat Address Space Multi-level Memories", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "46120ec9f262072b281b2932da6aa1f54950d157": { "authors": [ { "ids": [ "2241445" ], "name": "Badrish Chandramouli" }, { "ids": [ "1710965" ], "name": "Johannes Gehrke" }, { "ids": [ "37070730" ], "name": "Jonathan Goldstein" }, { "ids": [ "1691108" ], "name": "Donald Kossmann" }, { "ids": [ "2386988" ], "name": "Justin J. Levandoski" }, { "ids": [ "3454947" ], "name": "Renato Marroquin" }, { "ids": [ "3120255" ], "name": "Wenlei Xie" } ], "doi": "", "doiUrl": "", "entities": [ "Data integrity", "Database", "Eye of the Beholder", "Relational database management system" ], "id": "46120ec9f262072b281b2932da6aa1f54950d157", "inCitations": [ "a02a15e7a23a7478d468e58b0eadacdbee192037" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "072f363c798b6fbf0e8213fdaae58f5d3a65a588", "28be19275227aca5ea6ebd54186f336ccc0e6fee", "a8510f39b2c1352524cf1ce170ee5b5b3d1058de", "2321a150c84d771d81fd81759757795dcda25750", "a73fba5745c1659ef6258ee28d763f135c74779a", "527635e8bed9137a11cf74711b2ca5e456f3d642", "3ba66b7716d2f93ae39b2bb79427038e449f5a7c", "05a26b5deeed6f6f7e9584555b73c5af3905063b", "dc11bc1ba27e4cb807f5c5ca1257b06b9a025aa6", "0fb2ab7176f91e34061b128c86ef100401a1b037", "5d1855f696e78186a50413ac487cdf2a89f4eeb8", "d30726315b0c8ab4fc0781ee082fd5b0a63f3055", "43dd3663744fed023f033df681221620d4736818", "219d1747f0afb8b9d2c603d0f3503764d5257796" ], "paperAbstract": "Modern database systems support one set of integrity constraints per database. Imagine you could specify multiple sets of integrity constraints per database, one for each type of application. This paper argues why this might be a good idea and introduces a system that implements this idea.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p18-chandramouli-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4612/0ec9f262072b281b2932da6aa1f54950d157.pdf", "s2Url": "https://semanticscholar.org/paper/46120ec9f262072b281b2932da6aa1f54950d157", "sources": [ "DBLP" ], "title": "READY: Completeness is in the Eye of the Beholder", "venue": "CIDR", "year": 2017 }, "46122831f2f1aea6b5f45025b8791ca29c239679": { "authors": [ { "ids": [ "18112138" ], "name": "Matthew D. Sinclair" }, { "ids": [ "10187815" ], "name": "Johnathan Alsop" }, { "ids": [ "3196444" ], "name": "Sarita V. Adve" } ], "doi": "10.1109/IISWC.2017.8167781", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167781", "entities": [ "Address space", "Algorithm", "Benchmark (computing)", "Cache coherence", "General-purpose computing on graphics processing units", "Graphics processing unit", "Memory hierarchy", "Scalability", "Uniform memory access" ], "id": "46122831f2f1aea6b5f45025b8791ca29c239679", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "239-249", "journalVolume": "", "outCitations": [ "762f5a712f4d6994ead089fcc0c5db98479a2008", "59857e2857df6d69a12e3cbaa720648b5c299159", "8747dabeaeda342fbac4ebff628c574be4c53826", "5d279a21f65eef2bf5027d0cf1e56f2d740b314e", "04379477b31622586b3a632a5ac528c664f88d7a", "bd23f9b45fcf7552c7eae2433c0f59883a63daf2", "4bad51c7685254155733ee8def6a1294378aa1af", "0d69c5f9f205037a1234a7c4cd3658e076d267bf", "2462dc12b9ffda182ab894ee55938249420b81af", "a539b64493cd05fe13b9c371cb0c50becee59769", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "7b93d3e42a7498e4de67a76b8f6861875fa74d79", "4aa993db77b888a02084a542a929b1a81a8d03f6", "0335bf6957ecb92f709fc79c72c4237939f32c9e", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "33a5387150c4df54b89af46dacceb9296c396733", "6ac0c44e4e56583914de316346977c8461716141", "58da996efd7320d1e484263c97c930c8979c474f", "ac35455b128baf4e280f2571160c242b67b3f85e", "2d6f002477015469075954c6748a1a85af352c94", "a36cbffc24608143c6a69da550620139dcc8128f", "0d09a33fc88ffb35ef35b84d104c1cadc5802cb1", "3c9b5b9e3e8ad647498f1650df08ac2a4fa83346", "fa15e80d71f831ed1a3f11d5b94c88b8f098a17c", "f359d33a1c09d2f626217e21f722508968c7057b", "5b2103ce053a4e0e3685920fac0248533e8b0718", "7e5a3bd8981b6d3dc0d420ae427c14275f514f4d", "0d5ec0f90b9d07ebc48f4e00b2e583e5d49130dc", "445e5f45ed2e5a804e2baafe68e1424e44042a1c", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "00c3b08c4e1dbfa080b6d3c422fa0da0131a743c", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "3371781698dbd3d3e78477af7528530024b828f8" ], "paperAbstract": "Traditionally GPUs focused on streaming, data-parallel applications, with little data reuse or sharing and coarse-grained synchronization. However, the rise of general-purpose GPU (GPGPU) computing has made GPUs desirable for applications with more general sharing patterns and fine-grained synchronization, especially for recent GPUs that have a unified address space and coherent caches. Prior work has introduced microbenchmarks to measure the impact of these changes, but each paper uses its own set of microbenchmarks. In this work, we combine several of these sets together in a single suite, HeteroSync. HeteroSync includes several synchronization primitives, data sharing at different levels of the memory hierarchy, and relaxed atomics. We characterize the scalability of HeteroSync for different coherence protocols and consistency models on modern, tightly coupled CPU-GPU systems and show that certain algorithms, coherence protocols, and consistency models scale better than others.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167781", "http://rsim.cs.illinois.edu/Pubs/17-IISWC-HeteroSync.pdf", "http://rsim.cs.illinois.edu/Talks/17-iiswc-sinclair-heterosync.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/46122831f2f1aea6b5f45025b8791ca29c239679", "sources": [ "DBLP" ], "title": "HeteroSync: A benchmark suite for fine-grained synchronization on tightly coupled GPUs", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "4623f8c33045436448eefb3f63493a43935c54ad": { "authors": [ { "ids": [ "2347792" ], "name": "Olli Saarikivi" }, { "ids": [ "1758545" ], "name": "Margus Veanes" }, { "ids": [ "1731438" ], "name": "Todd Mytkowicz" }, { "ids": [ "1702346" ], "name": "Madan Musuvathi" } ], "doi": "10.1145/3062341.3062362", "doiUrl": "https://doi.org/10.1145/3062341.3062362", "entities": [ "Algorithm", "Computation", "Iteration", "List comprehension", "Pipeline (computing)", "Reachability", "Regular expression", "Satisfiability modulo theories", "Serialization", "Stream (computing)", "XML", "XPath" ], "id": "4623f8c33045436448eefb3f63493a43935c54ad", "inCitations": [ "ab5a3ee384c0dfebe9c6d6ec946f402afb3f3474", "ab437a154008a30dfd5f76e84808dfa550d4bdd0", "8135bb03e657db4bc1119eba5041054784c7fdf9" ], "journalName": "", "journalPages": "17-32", "journalVolume": "", "outCitations": [ "64786f14780c24c2fc767b868158e2868a34cb38", "3ca09297ea549605c99a96daf8bc50b23cc54efc", "5b92d54f7fb2f28ed1c183fa9592ef79f8aef9f5", "0eee8a8e9e7e82fc16009c4dde65ddc99fc768c8", "dcdc692810e55073b81505243050b90e2ecddf86", "64d2a65a7d559f9b05570fb0fea8bb4cccd83ae2", "0455148206bd0e7975b8c65a5fd6ba0b18583152", "67f311151efe765e58c46d2548ef2594422fb393", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "43dfb8212d7eb7660a81ce66de2405f1acee4638", "7f2210ff39ef9669f2a84db611c80c4b28f9fffc", "22198349a3a6c37ef84b6270b53f0e9dba2f5433", "5cc69b3b5e601e486348da79d32274836f0b8ad5", "2dcab9e92c6006e0e123d076ff2ab28aa859805a", "b39ac6725302b95023db49d6fadc4301a7aa4f9e", "85f0831997f0cdaef796e667a86665bbf3f04d05", "0f384b45ee96f84a20783fbbe0c11c942ba1073b", "113dfad2cac3dd66ef57e1651c711d0e2d420deb", "3ec8f2f88ca12353391e8a8b73154db795f871af", "8ad190feef8bc7744f6b3f155661f5a1c3389ab5", "84b928c418a49ba743435e8273665c5e1372e8b1", "22c5378f03ff7ce3945c907965e08efbee0d5f9a", "2dd52b31c8f0aa236853062f5ad18d5c686c9e40", "7b1157db688dd5c8b9df1f42caa76d4790fdf2c8", "0bd8ca3e3568549687a61828cf7be6fe504b362b", "128de07664b40535ce339881a7f60e9a88041635", "0541d5338adc48276b3b8cd3a141d799e2d40150", "127b35b01f4d1186a0707aed4fdd50eb00ae2ea2", "80240114e68bbe8f8ef983bda2a6afef5f94de63", "1b76022e2b84e582c20a599be5631b821c130d2e", "29d0199f1ef68881af6e2985293d3c3f581a0b12", "1efdbb7e1549087ff08b630438645b2df75d09cc", "24776cf99d75248ddbe66451eb09942615785b44", "ca4903512d99eeee6fdfe8af9907a27f5001b7c1", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "42490a37f9e284ba4d368cf5a41f2ea6c26b0ee1", "ce317f10bd484735e629d4fa6936946f41d805f2", "11219749fb3f079efe407237618cee13e10cf07f", "c9ee10b0b83a9388cfc6a762ac7d2349cead5ed8" ], "paperAbstract": "List comprehensions provide a powerful abstraction mechanism for expressing computations over ordered collections of data declaratively without having to use explicit iteration constructs. This paper puts forth effectful comprehensions as an elegant way to describe list comprehensions that incorporate loop-carried state. This is motivated by operations such as compression/decompression and serialization/deserialization that are common in log/data processing pipelines and require loop-carried state when processing an input stream of data. \nWe build on the underlying theory of symbolic transducers to fuse pipelines of effectful comprehensions into a single representation, from which efficient code can be generated. Using background theory reasoning with an SMT solver, our fusion and subsequent reachability based branch elimination algorithms can significantly reduce the complexity of the fused pipelines. Our implementation shows significant speedups over reasonable hand-written code (3.4\u00c3\u0097, on average) and traditionally fused version of the pipeline (2.6\u00c3\u0097, on average) for a variety of examples, including scenarios for extracting fields with regular expressions, processing XML with XPath, and running queries over encoded data.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/pldi17paper.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/10/fusing_effectful_comprehensions.pdf", "http://doi.acm.org/10.1145/3062341.3062362", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/pldi17final.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4623f8c33045436448eefb3f63493a43935c54ad", "sources": [ "DBLP" ], "title": "Fusing effectful comprehensions", "venue": "PLDI", "year": 2017 }, "4655c716f39a981830adf334769e6926e74212a6": { "authors": [ { "ids": [ "3137133" ], "name": "Zakir Durumeric" }, { "ids": [ "10030115" ], "name": "Zane Ma" }, { "ids": [ "38405544" ], "name": "Drew Springall" }, { "ids": [ "1888184" ], "name": "Richard Barnes" }, { "ids": [ "31612094" ], "name": "Nick Sullivan" }, { "ids": [ "1687723" ], "name": "Elie Bursztein" }, { "ids": [ "1842668" ], "name": "Michael Bailey" }, { "ids": [ "2349976" ], "name": "J. Alex Halderman" }, { "ids": [ "1744800" ], "name": "Vern Paxson" } ], "doi": "", "doiUrl": "", "entities": [ "Airport security", "Algorithm", "Antivirus software", "Cipher", "Client-side", "Content delivery network", "Cryptography", "Digital distribution", "Downgrade", "E-commerce", "Encryption", "Firefox", "HTTPS", "Hardening (computing)", "Heuristic", "Hypertext Transfer Protocol", "International Standard Book Number", "Library", "Man-in-the-middle attack", "Middlebox", "Network traffic control", "Plaintext", "Population", "Proxy server", "Recommender system", "Relay", "Server (computing)", "Software deployment", "Telephone number", "Terminate (software)", "Transport Layer Security", "User agent", "Vulnerability (computing)", "Web server" ], "id": "4655c716f39a981830adf334769e6926e74212a6", "inCitations": [ "50e52b44df056efc7c4b1fa919ea9916ef4ca626", "0ed85812e55bcc6a514fdb79261cfea83b4dbd22", "082b402e28248a2a6bf8cf45dff08cdef3b71fe6", "1f855fe351176c61c3c4bcb215cc3808a3ffd2e3", "0aa883a6e27b9374e53225bfd830a890fc44a683", "37996e5017fba1a736b3deac1ed4e091c6739f29", "216a0d080a6cf6725c075cfc69af7cc5be24af08" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "31e4845a40cfa6a953aef78387b34ea3284cdff9", "08026d939ac1f30951ff7f4f7c335bf3fef47be4", "267260a76081a57c73aa61a803559695f5a23191", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "01523e9ee2ef484fd74c21c26db3761fee37e1ee", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "05575e85d4b0c09c09552921b2ee0db79e5e9cf9", "39ac27363c06ade948e0cc3e7797523122a19085", "1f38c11fe8511c77fb7d383126214c9e7dc28e4a", "4458751fda28db4b489b5626e2e9cc965f3c379a", "15921484ef80b0dfb6629f6fae7b5c9b8c8877e7", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "0d2f693901fba451ede4d388724b0e3f57029cd3", "05cd49dca40332e85ed5f2d4cb8bde7d5970519c", "a8ba5152807babb6ed4b513b755e2dca3f84e9d9", "5a032460c589a67e7c73b19c93aa591331758139", "133eea63e0a9702207dc14fdd72740d402f5748b", "1e3822536527f98c53b716c26fa05da5bf729f17" ], "paperAbstract": "As HTTPS deployment grows, middlebox and antivirus products are increasingly intercepting TLS connections to retain visibility into network traffic. In this work, we present a comprehensive study on the prevalence and impact of HTTPS interception. First, we show that web servers can detect interception by identifying a mismatch between the HTTP User-Agent header and TLS client behavior. We characterize the TLS handshakes of major browsers and popular interception products, which we use to build a set of heuristics to detect interception and identify the responsible product. We deploy these heuristics at three large network providers: (1) Mozilla Firefox update servers, (2) a set of popular e-commerce sites, and (3) the Cloudflare content distribution network. We find more than an order of magnitude more interception than previously estimated and with dramatic impact on connection security. To understand why security suffers, we investigate popular middleboxes and clientside security software, finding that nearly all reduce connection security and many introduce severe vulnerabilities. Drawing on our measurements, we conclude with a discussion on recent proposals to safely monitor HTTPS and recommendations for the security community. I . I N T R O D U C T I O N When it comes to HTTPS, the security community is working at cross purposes. On the one hand, we are striving to harden and ubiquitously deploy HTTPS in order to provide strong endto-end connection security [5], [20], [22], [23], [34], [51]. At the same time, middlebox and antivirus products increasingly intercept (i.e., terminate and re-initiate) HTTPS connections in an attempt to detect and block malicious content that uses the protocol to avoid inspection [6], [12], [15], [27]. Previous work has found that some specific HTTPS interception products dramatically reduce connection security [7], [12], [58]; however, the broader security impact of such interception remains unclear. In this paper, we conduct the first comprehensive study of HTTPS interception in the wild, quantifying both its prevalence in traffic to major services and its effects on real-world security. We begin by introducing a novel technique for passively detecting HTTPS interception based on handshake characteristics. HTTPS interception products typically function as transparent proxies: they terminate the browser\u2019s TLS connection, inspect the HTTP plaintext, and relay the HTTP data over a new TLS connection to the destination server. We show that web servers can detect such interception by identifying a mismatch between the HTTP User-Agent header and the behavior of the TLS client. TLS implementations display varied support (and preference order) for cipher suites, extensions, elliptic curves, compression methods, and signature algorithms. We characterize these variations for major browsers and popular interception products in order to construct heuristics for detecting interception and identifying the responsible product. Next, we assess the prevalence and impact of HTTPS interception by applying our heuristics to nearly eight billion connection handshakes. In order to avoid the bias inherent in any single network vantage point, we analyzed connections for one week at three major Internet services: (1) Mozilla Firefox update servers, (2) a set of popular e-commerce websites, and (3) the Cloudflare content distribution network. These providers serve different types of content and populations of users, and we find differing rates of interception: 4.0% of Firefox update connections, 6.2% of e-commerce connections, and 10.9% of U.S. Cloudflare connections were intercepted. While these rates vary by vantage point, all are more than an order of magnitude higher than previous estimates [27], [46]. To quantify the real-world security impact of the observed interception, we establish a grading scale based on the TLS features advertised by each client. By applying the metric to unmodified browser handshakes and to the intercepted connections seen at each vantage point, we calculate the change in security for intercepted connections. While for some older clients, proxies increased connection security, these improvements were modest compared to the vulnerabilities introduced: 97% of Firefox, 32% of e-commerce, and 54% of Cloudflare connections that were intercepted became less secure. Alarmingly, not only did intercepted connections use weaker cryptographic algorithms, but 10\u201340% advertised support for known-broken ciphers that would allow an active man-in-the-middle attacker to later intercept, downgrade, and decrypt the connection. A large number of these severely broken connections were due to network-based middleboxes rather than client-side security software: 62% of middlebox connections were less secure and an astounding 58% had severe vulnerabilities enabling later interception. Finally, we attempt to understand why such a large number of intercepted connections are vulnerable by testing the security of a range of popular corporate middleboxes, antivirus products, and other software known to intercept TLS. The default settings for eleven of the twelve corporate middleboxes we evaluated expose connections to known attacks, and five introduce severe vulnerabilities (e.g., incorrectly validate certificates). Similarly, 18 of the 20 client-side security products we tested reduce Permission to freely reproduce all or part of this paper for noncommercial purposes is granted provided that copies bear this notice and the full citation on the first page. Reproduction for commercial purposes is strictly prohibited without the prior written consent of the Internet Society, the first-named author (for reproduction of an entire paper only), and the author\u2019s employer if the paper was prepared within the scope of employment. NDSS\u201917, 26 February\u20131 March, 2017, San Diego, CA, USA Internet Society, ISBN 1-891562-46-0 http://dx.doi.org/10.14722/ndss.2017.23456 connection security, and half introduce severe vulnerabilities. In some cases, manufacturers attempted to customize libraries or re-implement TLS, introducing negligent vulnerabilities. In other cases, products shipped with libraries that were years out of date. Across the board, companies are struggling to correctly deploy the base TLS protocol, let alone implement modern HTTPS security features. Our results indicate that HTTPS interception has become startlingly widespread, and that interception products as a class have a dramatically negative impact on connection security. We hope that shedding light on this state of affairs will motivate improvements to existing products, advance work on recent proposals for safely intercepting HTTPS [26], [38], [44], [54], and prompt discussion on long-term solutions. I I . B A C K G R O U N D In this section, we provide a brief background on HTTPS interception and describe the aspects of HTTP and TLS that are relevant to our fingerprinting techniques. We refer the reader to RFC 5280 [14] for a detailed description of TLS.", "pdfUrls": [ "http://www.internetsociety.org/sites/default/files/ndss2017_04A-4_Durumeric_paper_0.pdf", "http://mdbailey.ece.illinois.edu/publications/ndss17_interception.pdf", "https://zakird.com/papers/https_interception.pdf", "http://www.icir.org/vern/papers/tls-interception-ndss17.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017-04a_4-durumeric_slides.pdf", "https://down.dsg.cs.tcd.ie/cs7053/materials/interception-ndss17.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/security-impact-https-interception/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6956/559b8e5dceb9193b68cc893f5c403f71eb98.pdf", "s2Url": "https://semanticscholar.org/paper/4655c716f39a981830adf334769e6926e74212a6", "sources": [ "DBLP" ], "title": "The Security Impact of HTTPS Interception", "venue": "NDSS", "year": 2017 }, "46690cdff60ef7f35c2c19d6eaac89964a6b4f79": { "authors": [ { "ids": [ "37763788" ], "name": "Akhil Arunkumar" }, { "ids": [ "2256123" ], "name": "Evgeny Bolotin" }, { "ids": [ "13715761" ], "name": "Benjamin Cho" }, { "ids": [ "1837948" ], "name": "Ugljesa Milic" }, { "ids": [ "3149281" ], "name": "Eiman Ebrahimi" }, { "ids": [ "2338598" ], "name": "Oreste Villa" }, { "ids": [ "1684691" ], "name": "Aamer Jaleel" }, { "ids": [ "2797270" ], "name": "Carole-Jean Wu" }, { "ids": [ "2899855" ], "name": "David W. Nellans" } ], "doi": "10.1145/3079856.3080231", "doiUrl": "https://doi.org/10.1145/3079856.3080231", "entities": [ "Dynamic random-access memory", "Graphical path method", "Graphics processing unit", "Locality of reference", "Moore's law", "Multi-chip module", "Scalability", "Speedup", "Supercomputer", "Transistor" ], "id": "46690cdff60ef7f35c2c19d6eaac89964a6b4f79", "inCitations": [ "d1fcc29063f09305969a678313ddba7e9f4f6e9c", "517a01774f760d08c73bd5de2a561c98fd5814dc", "2aa997522d212ab74163b986be211ffc7f3e9e34", "0581754e392d4a648f6a7b7665e3561df8627157", "84851b61293a4199c3f9164e21103b417aee49f7" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "320-332", "journalVolume": "", "outCitations": [ "f2bc77fdcea85738d1062da83d84dfa3371d378d", "45f119a7334f482513f6d71f3fffa4e9e239622c", "8400d290d55005839b678a95f4f18ecdce76dbe1", "512392937104faea422de3a9eb0d3fbc53848f2c", "98b4f2a851f94bfe5dead27fc963d078fa39d2b7", "cdc52c330fb6e44f412e39cb9457fc6834b10646", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "363f34245c38c45eafa9c1e50e790ed33c69f224", "6c86a995c3454d888713e66948c0d09b1451f0c2", "df46e5c551f64adc2f188e2e1282c77f3c3570f8", "6335be42a352d1d4daa907533854410f57269926", "a60b9978e067f68dd373dda690abbb058d3fe91c", "1121ff5cdeaa470521b8dff084ba1424dd613cc1", "804df33cbda438274e1ae2d6d9e7609238a8bb27", "413ebb1202367f3d389988e53ae4a584ddf574d6", "63af4355721f417bc405886f383af096fbfe51b2", "a24c68f1d034ae19ea2cfcfdccdf189118cd70f9", "2185c38c4664bf75296655dd6cc16499926a7845", "a17cdeb56351994047796dbf30c686d9932c4935", "39d175c582c329aee890f2f507f0080c12677502", "32c8c7949a6efa2c114e482c830321428ee58d70", "69c97da85dd80abc472fb7a2fb59f22628a736de", "061356704ec86334dbbc073985375fe13cd39088", "4b078e69e4eca0dd7905a54f88f397e98790e407", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "2992b8985e094c3943e29dffc550862791fae147" ], "paperAbstract": "Historically, improvements in GPU-based high performance computing have been tightly coupled to transistor scaling. As Moore's law slows down, and the number of transistors per die no longer grows at historical rates, the performance curve of single monolithic GPUs will ultimately plateau. However, the need for higher performing GPUs continues to exist in many domains. To address this need, in this paper we demonstrate that package-level integration of multiple GPU modules to build larger logical GPUs can enable continuous performance scaling beyond Moore's law. Specifically, we propose partitioning GPUs into easily manufacturable basic GPU Modules (GPMs), and integrating them on package using high bandwidth and power efficient signaling technologies. We lay out the details and evaluate the feasibility of a basic Multi-Chip-Module GPU (MCM-GPU) design. We then propose three architectural optimizations that significantly improve GPM data locality and minimize the sensitivity on inter-GPM bandwidth. Our evaluation shows that the optimized MCM-GPU achieves 22.8% speedup and 5x inter-GPM bandwidth reduction when compared to the basic MCM-GPU architecture. Most importantly, the optimized MCM-GPU design is 45.5% faster than the largest implementable monolithic GPU, and performs within 10% of a hypothetical (and unbuildable) monolithic GPU. Lastly we show that our optimized MCM-GPU is 26.8% faster than an equally equipped Multi-GPU system with the same total number of SMs and DRAM bandwidth.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080231", "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/MCM-GPU_ISCA17_Arunkumar.pdf", "http://research.nvidia.com/sites/default/files/publications/ISCA_2017_MCMGPU.pdf", "http://faculty.engineering.asu.edu/carolewu/wp-content/uploads/2012/12/Arunkumar_ISCA_2017_lightning.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/46690cdff60ef7f35c2c19d6eaac89964a6b4f79", "sources": [ "DBLP" ], "title": "MCM-GPU: Multi-chip-module GPUs for continued performance scalability", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "46902aa4926f2f70cfcd8662a702a1e11a369e65": { "authors": [ { "ids": [ "39718171" ], "name": "Rishabh Mehrotra" }, { "ids": [ "35466594" ], "name": "Emine Yilmaz" } ], "doi": "10.1145/3077136.3080823", "doiUrl": "https://doi.org/10.1145/3077136.3080823", "entities": [ "Crowdsourcing", "Experiment", "Information needs", "Multi-task learning", "Personalization", "Recommender system", "Web search query" ], "id": "46902aa4926f2f70cfcd8662a702a1e11a369e65", "inCitations": [ "a3b0819915289f6b44dc34d46e4e6e43fce718e7", "642622661860f5a6ffab678cddd87e403fec2cf3" ], "journalName": "", "journalPages": "285-294", "journalVolume": "", "outCitations": [ "46cf276bdb27dd12c4c36f35855041e79e4ec981", "2279b6c32509eeb47184c7fa468cae1e84da24eb", "3a9793acf40c10021648dcadb3d32818b8181500", "25ff4e83ca020fb5764e7544262ea3ab53269adc", "4350bc9f6f4fe43ae2d245fa42a2e42f76881119", "72de8805ee27bb37767b8709ad3c9767a9a097ea", "13fa4297fcd0ddc1725c2f88b1e0901594ac0380", "077ddb6f7f291b8a78be1107149d4515e8a65caf", "c3b151f77d55c8093d9b43306ee7ebea3282df0c", "e09c5d29d6a6668dbdb5326236285eb8d07f64b2", "3176797ede1961f31bd92ba0313892f9c67f4d99", "28479965b3a4e66383210f31ff7a1e14dea8e4eb", "a0e8752d5f83e721ee74ab25248b83d958c9e80c", "b2d26ed1e4658b8bace957b6f4a7d0b2d5e671fc", "2a107c4919fd5c59148f96e33f0193fa8ec68c2b", "8a15d2114cf5fb8d76d4c93c4410ebccc1a2b352", "3e40f8313a5f439fdc561140af67349f1a3b3f47", "1707eaecc443448ab8ef9a0bb9d3666b532086e1", "6baf85803d77a05d3ea42db11a291e04509fc319", "25494a7e6244a61d47f6e7da8ac8221eb92f1089", "04e715f3664c39e7eb6484b7b53f5a1e1c9a422e", "10ce6749c14f6376da22c8cf076a5ac2da3f7b38", "09a54c9c2624edfb54ea931d310453449b35af1d", "598ae927883728b4c5df6d9a833d7dcd19832b57", "23f339ca476c0246e178104fce59254c208bcb4b", "1686596d24edcc2dd723920ea800a1c0b76c3d9e", "833da40180f19a6f32ea22338297872aab83a068", "032eb8a055bfe1f9d1272048498395fcd3502206", "42b7f508e31be6565ecaff974818abd6330aab35", "06ad0d17210d48427222d8f0848d3855146aff4a", "3dd3e986fda5541a039239c8b5c705b030ecc73f", "67c0339af5df01b8a9b14512276c3d9ebc4b7e09", "60b249993cc7245969c6ed16bd0c233593c88cfe", "2c2db87aedf64e66bd55f2df9266439d609852b8", "42e38615c917116dde670a6f7a10f673bd70b017", "8e31f3c7e70e9a5f8afafd86cebc004d5eca8c2b", "784ed469c4708eac6d5e5e740c4de61f23a77c4d", "8d742b3beab06d269bd8c3685848f52eff9f1d07" ], "paperAbstract": "A significant amount of search queries originate from some real world information need or tasks [13]. In order to improve the search experience of the end users, it is important to have accurate representations of tasks. As a result, significant amount of research has been devoted to extracting proper representations of tasks in order to enable search systems to help users complete their tasks, as well as providing the end user with better query suggestions [9], for better recommendations [41], for satisfaction prediction [36] and for improved personalization in terms of tasks [24, 38]. Most existing task extraction methodologies focus on representing tasks as flat structures. However, tasks often tend to have multiple subtasks associated with them and a more naturalistic representation of tasks would be in terms of a hierarchy, where each task can be composed of multiple (sub)tasks. To this end, we propose an efficient Bayesian nonparametric model for extracting hierarchies of such tasks & subtasks. We evaluate our method based on real world query log data both through quantitative and crowdsourced experiments and highlight the importance of considering task/subtask hierarchies.", "pdfUrls": [ "https://arxiv.org/pdf/1706.01574v2.pdf", "https://arxiv.org/pdf/1706.01574v1.pdf", "http://arxiv.org/abs/1706.01574", "http://www.rishabhmehrotra.com/papers/SIGIR-mehrotra-17a.pdf", "http://doi.acm.org/10.1145/3077136.3080823" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/46902aa4926f2f70cfcd8662a702a1e11a369e65", "sources": [ "DBLP" ], "title": "Extracting Hierarchies of Search Tasks & Subtasks via a Bayesian Nonparametric Approach", "venue": "SIGIR", "year": 2017 }, "46c0f934ef0705b953ba8b14c5dee79b4df724db": { "authors": [ { "ids": [ "3242478" ], "name": "Niv Dayan" }, { "ids": [ "1840402" ], "name": "Manos Athanassoulis" }, { "ids": [ "2203901" ], "name": "Stratos Idreos" } ], "doi": "10.1145/3035918.3064054", "doiUrl": "https://doi.org/10.1145/3035918.3064054", "entities": [ "Attribute\u2013value pair", "Best, worst and average case", "Bloom filter", "Computational complexity theory", "Computer data storage", "Experiment", "Key-value database", "LevelDB", "Lookup table", "Memory footprint", "Value (ethics)" ], "id": "46c0f934ef0705b953ba8b14c5dee79b4df724db", "inCitations": [ "8542fdcb42804a31fedb86874e3c75cd03830d4d", "5fca7b8c3ada3be2b41160a776c6389501998ce0", "33b44d897e73fb50b9ab7bc3ebdc2986e3c77736", "004fc351f16722654bf9d7ca2d36e90fec00cd5d" ], "journalName": "", "journalPages": "79-94", "journalVolume": "", "outCitations": [ "39c88337433b1ddedb1a180a90dacff53810ebe5", "7f49cad23f57a8bc48133b2c599d40c216d1c046", "214c966d1f9c2a4b66f4535d9a0d4078e63a5867", "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "2b56f0ca7e74a43a54b70a7bb3507855c653a85b", "9b90568faad1fd394737b79503571b7f5f0b2f4b", "098d792d1783b5f6fc098203f71f21f5d053c653", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "06bd4d2d21624c7713d7f10ccb7df61bf6b9ee71", "f4147b82166813bbe5dc01e9486664c273d1556c", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "18a5f443299784479e78d9e77f175af57cb2fa2b", "b05f104f5a28a1a2c2fdb216d3d0959a5786f0ad", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "46d76119315b7f999ff52f97bbc7f078fe037aad", "b4087345c63a7b2412eeb31066b5e4bceadbbcb2", "395b3c67b88c7d094997e1c6ad75c5425cbc0400", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "199ac28b6bc68bf05c77645ffae7640df114bca5", "19e5a8ea876cee86e78b659fc96ae18eb8c3a834" ], "paperAbstract": "In this paper, we show that key-value stores backed by an LSM-tree exhibit an intrinsic trade-off between lookup cost, update cost, and main memory footprint, yet all existing designs expose a suboptimal and difficult to tune trade-off among these metrics. We pinpoint the problem to the fact that all modern key-value stores suboptimally co-tune the merge policy, the buffer size, and the Bloom filters' false positive rates in each level.\n We present Monkey, an LSM-based key-value store that strikes the optimal balance between the costs of updates and lookups with any given main memory budget. The insight is that worst-case lookup cost is proportional to the sum of the false positive rates of the Bloom filters across all levels of the LSM-tree. Contrary to state-of-the-art key-value stores that assign a fixed number of bits-per-element to all Bloom filters, Monkey allocates memory to filters across different levels so as to minimize this sum. We show analytically that Monkey reduces the asymptotic complexity of the worst-case lookup I/O cost, and we verify empirically using an implementation on top of LevelDB that Monkey reduces lookup latency by an increasing margin as the data volume grows (50%-80% for the data sizes we experimented with). Furthermore, we map the LSM-tree design space onto a closed-form model that enables co-tuning the merge policy, the buffer size and the filters' false positive rates to trade among lookup cost, update cost and/or main memory, depending on the workload (proportion of lookups and updates), the dataset (number and size of entries), and the underlying hardware (main memory available, disk vs. flash). We show how to use this model to answer what-if design questions about how changes in environmental parameters impact performance and how to adapt the various LSM-tree design elements accordingly.", "pdfUrls": [ "http://stratos.seas.harvard.edu/files/stratos/files/monkeykeyvaluestore.pdf", "http://doi.acm.org/10.1145/3035918.3064054" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/46c0f934ef0705b953ba8b14c5dee79b4df724db", "sources": [ "DBLP" ], "title": "Monkey: Optimal Navigable Key-Value Store", "venue": "SIGMOD Conference", "year": 2017 }, "46f3bb6751419b87856c4db0193e7a72ef3fa17c": { "authors": [ { "ids": [ "3243512" ], "name": "Tal Ben-Nun" }, { "ids": [ "38772073" ], "name": "Michael Sutton" }, { "ids": [ "33403976" ], "name": "Sreepathi Pai" }, { "ids": [ "1776186" ], "name": "Keshav Pingali" } ], "doi": "10.1145/3018743.3018756", "doiUrl": "https://doi.org/10.1145/3018743.3018756", "entities": [ "Algorithm", "Computer programming", "Graphics processing unit", "Programming model", "Runtime system", "Scalability", "Speedup", "Supercomputer", "Synchronous programming language" ], "id": "46f3bb6751419b87856c4db0193e7a72ef3fa17c", "inCitations": [ "e191fb663b1dd33d59a8f2c9048a408ae145fca3", "f4e42c15a7a35a198a04a74cbdbe19d360a2d00c", "22b0e79a1c31f3016e91c7dec20989aa04c80ff3", "896134c7aa767e27cb3c3aa0662b335473923602", "79d68db415c56f5641cd645173f7d3f0b5307035", "0268d90edd6d122e570011d0c66b838156211ce6" ], "journalName": "", "journalPages": "235-248", "journalVolume": "", "outCitations": [ "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "175d795f44037ef60dd9df341701cd5fdc449f1f", "21f35a5ecc0faf0c5f760e20cb9ce9e63a30a768", "1ad8410d0ded269af4a0116d8b38842a7549f0ae", "56d5d3f3ec4d95d13b0a2d6c08ee46f8704b82dc", "5053d80a916aa6be5d1f2253a5f420954da7a3e4", "5b75c61e3183ea6228d08b2f6c00fd2cd74baada", "e9526c64d991837c9c985c044212a5a0bc636860", "530a24fba4029c83b6b5c84d1565750114fa85a4", "eebeef3b780a4904982a0a0130c063c65d6cb913", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "b997070b0cc16bdcd62e982834b9899d3b8ea921", "0c75806bfe62a119e1aa580327c2f8db01b898aa", "993e21ed73fc39048a42d06855bc85236ffd1063", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "05ef51cba0404893c1d13259b049890bfe06366a", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "ce18973fb7c23cb4fc1c1a61c1c1c4333f4abad1", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "4803fb704ed0ba065ccf9620fe441bc068e42300", "2d5711cc4b0a2aef74a63fd1101a1dcd91c4d043", "3a1f3429bbb163e050188cce42a647a11312260c", "c3008dd707e4dfd43606a544d4cac4bf1f081f2b", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "8400d290d55005839b678a95f4f18ecdce76dbe1" ], "paperAbstract": "Nodes with multiple GPUs are becoming the platform of choice for high-performance computing. However, most applications are written using bulk-synchronous programming models, which may not be optimal for irregular algorithms that benefit from low-latency, asynchronous communication. This paper proposes constructs for asynchronous multi-GPU programming, and describes their implementation in a thin runtime environment called Groute. Groute also implements common collective operations and distributed work-lists, enabling the development of irregular applications without substantial programming effort. We demonstrate that this approach achieves state-of-the-art performance and exhibits strong scaling for a suite of irregular applications on 8-GPU and heterogeneous systems, yielding over 7x speedup for some algorithms.", "pdfUrls": [ "http://www.cs.rochester.edu/u/sree/papers/groute-ppopp17.pdf", "http://dl.acm.org/citation.cfm?id=3018756", "http://www.cs.huji.ac.il/~talbn/docs/groute-ppopp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/46f3bb6751419b87856c4db0193e7a72ef3fa17c", "sources": [ "DBLP" ], "title": "Groute: An Asynchronous Multi-GPU Programming Model for Irregular Computations", "venue": "PPOPP", "year": 2017 }, "47242a132a1fc42b11c989d823f6e9d369c24f81": { "authors": [ { "ids": [ "3249639" ], "name": "Tingjin Luo" }, { "ids": [ "8030859" ], "name": "Weizhong Zhang" }, { "ids": [ "11806086" ], "name": "Shang Qiu" }, { "ids": [ "1708973" ], "name": "Yang Yang" }, { "ids": [ "34187920" ], "name": "Dongyun Yi" }, { "ids": [ "2148300" ], "name": "Guangtao Wang" }, { "ids": [ "37513601" ], "name": "Jieping Ye" }, { "ids": [ "1729576" ], "name": "Jie Wang" } ], "doi": "10.1145/3097983.3097984", "doiUrl": "https://doi.org/10.1145/3097983.3097984", "entities": [ "Algorithm", "Coordinate descent", "Experiment", "Feature vector", "Ground truth", "Hinge loss", "Loss function", "Loss functions for classification", "Optimization problem", "Program optimization", "Sparse matrix" ], "id": "47242a132a1fc42b11c989d823f6e9d369c24f81", "inCitations": [], "journalName": "", "journalPages": "345-354", "journalVolume": "", "outCitations": [ "9684c7dc9fd536a9c806c4a69aa7993978ebfe08", "5be4c06e24ae190429bfa60f5ef6ae3cd33cf0e6", "3447fe054f6af70403cfc39b4d21076337a71128", "4ff172341fd872ea513195d9ceba7b42b0e9c3ae", "68dec9f98eddde001b47ffbde9698bc3402e086d", "4d8340eae2c98ab5e0a3b1a7e071a7ddb9106cff", "054e8add9e1df6a7c75e2b2cd2f18dd6627c81e4", "3513b42020ffe7cad04e682a43e3a81cc1e183d1", "9434320fda0ae93244662caffba6597767834a57", "1c7d38f68fe1150895a186e30b60c02dd89a676a", "03e8d6373b63bb15e11d3092477c55c74c063b72", "9e475eff11c29f0d532f18a0710bfd87010ef44d", "46217f372a75dddc2254fdbc6b9418ba3554e453", "1019613a56e97542503ff8c5d6e8c359fa56da50", "40d7c9c6fa0e0c1073ce422b4dd4a3c8c101e61d", "8552adffd9783b1a63e82757f3886e93d5ab015d", "8d8ac7bf953845a6c0c1b0bcf222dedbb58057fc", "077ef373d33952ac1a03ce34758c68ebf3ed4abc", "63d440eb606c7aa4ee3c7fcd94d65af3f5c92c96", "3094456890fb14340ed26c254e776e39f9533f75", "4a3684ee0c64d22386a44c0ddfbf8d609cd2de48", "c3a580d1f16b5166af486d9f92e6d7537ad62400", "3f02266e86128012c3ea87ee4cf3695b9b6cacd6", "d28f0f07659b1b1a9cb8e62dc4152f50b84a4d39", "6e00f7980c4efc55ba76efdccebc6411f054a7da", "64372501affd8571db20dc606b0146a76c266303", "00095810acabeb72a60743cf42fc15f7ee90e28e", "1cb0c6573195aeb933e9ff663dad71d8ad1b0e13", "08acd521416464ea0deebd6060be6e972dd575c5" ], "paperAbstract": "Functional annotation of human genes is fundamentally important for understanding the molecular basis of various genetic diseases. A major challenge in determining the functions of human genes lies in the functional diversity of proteins, that is, a gene can perform different functions as it may consist of multiple protein coding isoforms (PCIs). Therefore, differentiating functions of PCIs can significantly deepen our understanding of the functions of genes. However, due to the lack of isoform-level gold-standards (ground-truth annotation), many existing functional annotation approaches are developed at gene-level. In this paper, we propose a novel approach to differentiate the functions of PCIs by integrating sparse simplex projection---that is, a nonconvex sparsity-inducing regularizer---with the framework of multi-instance learning (MIL). Specifically, we label the genes that are annotated to the function under consideration as positive bags and the genes without the function as negative bags. Then, by sparse projections onto simplex, we learn a mapping that embeds the original bag space to a discriminative feature space. Our framework is flexible to incorporate various smooth and non-smooth loss functions such as logistic loss and hinge loss. To solve the resulting highly nontrivial non-convex and non-smooth optimization problem, we further develop an efficient block coordinate descent algorithm. Extensive experiments on human genome data demonstrate that the proposed approaches significantly outperform the state-of-the-art methods in terms of functional annotation accuracy of human PCIs and efficiency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097984" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47242a132a1fc42b11c989d823f6e9d369c24f81", "sources": [ "DBLP" ], "title": "Functional Annotation of Human Protein Coding Isoforms via Non-convex Multi-Instance Learning", "venue": "KDD", "year": 2017 }, "4733ce0d918bffb88d806bae1614d19e02b27a5c": { "authors": [ { "ids": [ "2869628" ], "name": "Hongke Zhao" }, { "ids": [ "9496222" ], "name": "Hefu Zhang" }, { "ids": [ "1874059" ], "name": "Yong Ge" }, { "ids": [ "3231800" ], "name": "Qi Liu" }, { "ids": [ "1703319" ], "name": "Enhong Chen" }, { "ids": [ "2703486" ], "name": "Huayu Li" }, { "ids": [ "2688093" ], "name": "Le Wu" } ], "doi": "10.1145/3097983.3098030", "doiUrl": "https://doi.org/10.1145/3097983.3098030", "entities": [ "Belief revision", "Crowdfunding", "Experiment", "Indiegogo", "Money", "Telephone exchange", "Time series" ], "id": "4733ce0d918bffb88d806bae1614d19e02b27a5c", "inCitations": [], "journalName": "", "journalPages": "625-634", "journalVolume": "", "outCitations": [ "1499fe40fdf50f1e85a2757b82b4538b5d2b2f9b", "73e2060e61b5e6bfc6069cb4112168c8d25e9a3c", "3ce3d335fb7198e73e68bafed61821621c167d38", "1291225d583bae2b4f5cedf947111ed6a2a41bb8", "677d180c0291db7da6a52b2db00ad573571874f1", "40441c11df37f542ff4d0512a61e38d0103868fa", "12ad705dc527e83f28a1d909f8b113966b08e888", "1510cf4b8abea80b9f352325ca4c132887de21a0", "20faa2ef4bb4e84b1d68750cda28d0a45fb16075", "4c506aab9868a53251dc8dd43809040612a970f1", "18249a9e726ab561b9561dfec8f98ebd4f6fda0e", "f19b99c04b09ab5d45040cedaa3591af6ac674d9", "9989ba9e12054afce501b4aaf16009d18446fdf1", "5ca38fc2c835acac6fe5e35b924ca434a7a425e6", "e7175f13d4422794f348ec168d2bab13715b673a", "61e17c84599a0a11c22521f8136219dc3ef9afaf", "767ee7042a5f269bce42be4e38597a4004002793", "a244c47a1d4a8c2894b22807df8c7eec16cc110a", "4a8e60bb6e965ad992d91805e9d350c49d9b003b", "2eed12870c56e1fb565f7961aaa0c645d9e2af15", "894fd6b1a71cc22551584247a836cda69379548f", "23384c68515493f9f51bfa86063e18864175d9cc", "1e59e7101cbca8c123e2f37130b7446d667cda78", "5e3265777b96f24d54ddaaee4a3f35858642b8e7", "cebd268d73522c97bae7bfddebb6a2ad759bd155", "1b5648fe28b58286898d10c48ac4811536b648a7" ], "paperAbstract": "Crowdfunding is an emerging Internet fundraising mechanism by raising monetary contributions from the crowd for projects or ventures. In these platforms, the dynamics, i.e., daily funding amount on campaigns and perks (backing options with rewards), are the most concerned issue for creators, backers and platforms. However, tracking the dynamics in crowdfunding is very challenging and still under-explored. To that end, in this paper, we present a focused study on this important problem. A special goal is to forecast the funding amount for a given campaign and its perks in the future days. Specifically, we formalize the dynamics in crowdfunding as a hierarchical time series, i.e., campaign level and perk level. Specific to each level, we develop a special regression by modeling the decision making process of the crowd (visitors and backing probability) and exploring various factors that impact the decision; on this basis, an enhanced switching regression is proposed at each level to address the heterogeneity of funding sequences. Further, we employ a revision matrix to combine the two-level base forecasts for the final forecasting. We conduct extensive experiments on a real-world crowdfunding data collected from Indiegogo.com. The experimental results clearly demonstrate the effectiveness of our approaches on tracking the dynamics in crowdfunding.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098030" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4733ce0d918bffb88d806bae1614d19e02b27a5c", "sources": [ "DBLP" ], "title": "Tracking the Dynamics in Crowdfunding", "venue": "KDD", "year": 2017 }, "47340eab259689cfa2cae9bd207d5c8fbc425a78": { "authors": [ { "ids": [ "26597650" ], "name": "Thomas J. Repetti" }, { "ids": [ "31751371" ], "name": "Jo\u00e3o Pedro Cerqueira" }, { "ids": [ "18709381" ], "name": "Martha A. Kim" }, { "ids": [ "2045759" ], "name": "Mingoo Seok" } ], "doi": "10.1145/3123939.3124551", "doiUrl": "https://doi.org/10.1145/3123939.3124551", "entities": [ "Autonomous car", "CMOS", "Dennard scaling", "Field-programmable gate array", "Graphics processing unit", "Hazard (computer architecture)", "Low-power broadcasting", "Manycore processor", "Microarchitecture", "Pipeline (computing)", "Shared memory", "Very-large-scale integration", "Video-in video-out" ], "id": "47340eab259689cfa2cae9bd207d5c8fbc425a78", "inCitations": [], "journalName": "", "journalPages": "96-108", "journalVolume": "", "outCitations": [ "73b205c99f4d70be5d4858879821e56b7b169f81", "35a8bd9f56806f203b7fa47831bb3dde174a06e7", "8f650f6fd41e95faa6bbcc0754c7b19510a37510", "02c78232075ac431834e3442dcb2954d4e708def", "e3fa998bede8f5db8d71349c7a0d53ad0aa4f7f7", "174ce50523cb31c0f23d28d0662c8bfa745fc6eb", "1d0f185d33d5dd80d317490ff53b6e698bd6a682", "5ef5f4ce1b5093d0a8277798e31dbd0c1041db39", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "185e235f94124f6d91ebb839c7e6c7401693f58b", "269c24a4aad9be622b609a0860f5df80688c2f93", "05a99a31ed4a5a25e21859fca562b7c0f70ab0dc", "3d044d4f708b8803ca2323ede66ba5f303ac1fba", "30a41babda1d355ab386a93d1dbafce4daaabcf7", "5d02394e98900bae1c0176b8b33e1f87ed2d3254", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "f654cb5f0b66f67883d770d3630c96b6f8bea468", "3f4a72724642486531e034e29145a172d7115897", "6e9fb40e3e06e3903ecdb439a1d99f1888e6b910", "60cfe41fd68644fb19cba99babae694a2acacc17", "8587f6195917761cb5668b64147a5b6004be6ebe", "203c89561b5a2bf7f15cad5c0f9d91a11c853f5f", "258fb4929a1e804d1e0bd84af0704410293517f4", "1080c001b25063e0265f13df89c50acfc01cb787", "1b82108089107d726ad6b4167ed59b3de78654f5", "7b4d2b27d4b768f96dd39e71a4a35fb4f85a38f4", "d589123c9665f52c1c06a0b3c80aa94c423a8908" ], "paperAbstract": "Programmable spatial architectures composed of ensembles of autonomous fixed-ISA processing elements offer a compelling design point between the flexibility of an FPGA and the compute density of a GPU or shared-memory many-core. The design regularity of spatial architectures demands examination of the processing element microarchitecture early in the design process to optimize overall efficiency.\n This paper considers the microarchitectural issues surrounding pipelining a spatial processing element with triggered-instruction control. We propose two new techniques to mitigate pipeline hazards particular to spatial accelerators and non-program-counter architectures, evaluating them using in-vivo performance counters from an FPGA prototype coupled with a rigorous VLSI power and timing estimation methodology. We consider the effect of modern, post-Dennard-scaling CMOS technology on the energy-delay tradeoffs and identify a set of microarchitectures optimal for both high-performance and low-power application settings. Our analysis reveals the effectiveness of our hazard mitigation techniques as well as the range of microarchitectures designers might consider when selecting a processing element for triggered spatial accelerators.", "pdfUrls": [ "http://arcade.cs.columbia.edu/pipe-micro17.pdf", "http://doi.acm.org/10.1145/3123939.3124551" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47340eab259689cfa2cae9bd207d5c8fbc425a78", "sources": [ "DBLP" ], "title": "Pipelining a triggered processing element", "venue": "MICRO", "year": 2017 }, "4742d03534db861bf878807602386662f43f5bca": { "authors": [ { "ids": [ "1708174" ], "name": "Georgios C. Chasparis" }, { "ids": [ "2697602" ], "name": "Michael Rossbory" }, { "ids": [ "1938609" ], "name": "Vladimir Janjic" } ], "doi": "10.1007/978-3-319-64203-1_12", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_12", "entities": [ "HTTP Public Key Pinning", "Parallel computing", "Reinforcement learning" ], "id": "4742d03534db861bf878807602386662f43f5bca", "inCitations": [ "f95ff14f0b28e9ea7799e9b3745e6dc68015b0f5" ], "journalName": "", "journalPages": "164-176", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4742d03534db861bf878807602386662f43f5bca", "sources": [ "DBLP" ], "title": "Efficient Dynamic Pinning of Parallelized Applications by Reinforcement Learning with Applications", "venue": "Euro-Par", "year": 2017 }, "475066a848dc63b0eca8da7a989144cdb9d39298": { "authors": [ { "ids": [ "7536938" ], "name": "Rezaul Alam Chowdhury" }, { "ids": [ "2606471" ], "name": "Pramod Ganapathi" }, { "ids": [ "32011355" ], "name": "Yuan Tang" }, { "ids": [ "2168897" ], "name": "Jesmin Jahan Tithi" } ], "doi": "10.1145/3087556.3087586", "doiUrl": "https://doi.org/10.1145/3087556.3087586", "entities": [ "Algorithm", "Atomic broadcast", "Cache-oblivious algorithm", "Correctness (computer science)", "Dynamic programming", "Linearizability", "Lock (computer science)", "Parallel computing", "Recurrence plot", "Recursion", "Scalability", "Scheduling (computing)", "Time complexity", "Work stealing" ], "id": "475066a848dc63b0eca8da7a989144cdb9d39298", "inCitations": [ "e89de4cb6056ee935418d65a93b703a7e121b8fa", "6c08e4b98d4b0b73f37b118d54d7875cce5f14c4" ], "journalName": "", "journalPages": "339-350", "journalVolume": "", "outCitations": [ "2ed5d6b35f8971fb9d7434a2683922c3bfcc058e", "8ca6edc152d81089b4e23d51d9e60a3b46ff52f2", "a14ad62f09ed19d6cf4876fb4e9518d52f57b942", "708dc802e9fafe94ba7b5c39eb6df06c3d6e6169", "6a67648db6ed2dd35bf8dde97d7bfcdf21db9631", "6145f0b0df53ec2581d733f003d251e0cabef04a", "deb14bcd12100da8c4aae466a1127a8834931a0f", "8441410f75f32aebcca3827117fbc0c775e3d6cf", "bab268857baf555ea6d2c1f638857e28b4fd5aa5", "ba811b0576dccf419836c8086ee80626df42fcbe", "19971985fd9f4a1a602e29062f27708dd5fdca2c", "ed97e14f912ca28aea90aa1c50a748954ba1883b", "008b490697d36e43dc2df656efff524bedcf076f", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "7316b2853d3699a537bb5349bc258c4642f0999f", "c7a153810337e170d3e65c7ad01fbe4da8da4f76", "13ec7c133cb95913ea4f8d1413a41518b2bd9236", "31181e73befea410e25de462eccd0e74ba8fea0b", "133c176b649618b1f6bc13ec6783647c87bf9935", "1fb31b1031dbe93bde81e4c282f267b1f3446a5f", "6212a4bb31299f0375e9d926460763d586a05765", "768141fc4da96c71e04103c6ccdd52169724ecbd", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "146a4c6a7c94be0113da61d135f74df7be636819", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "2111d3d2270b0e72170badc82f6de2ad0de69aa2", "c3e01fbc9ffba4911cfcb5faf91e0a90b90b191b", "18f355d7ef4aa9f82bf5c00f84e46714efa5fd77", "f8b25b305622a1dd7bb107276d6577a6552b7ff8", "05db6b886ce6bf260dc725450873cdb0b2a0c065", "1322fd55045d22849bbc879193af44791e28e510", "202116387ccf060e90867468f18041561fbc7735", "02d0a24f01c02035f0e7ec890094b546e6482e56", "17475642d89ba37a880b0a598a06ea837439b9f1", "ae14fad252dfe0328caf7084c4ac2abf33eddee1", "310eb3d55550b8dcfc0d6928589e1f756fe80a53", "23c393ba7718da5d0dd160c68001ead6231d8db4", "432def4fa3c820b6f1820f67070241510298a19d", "21baf97b23c5a72f5a1628bc6c129878530b11f7", "a38671e77606af2212e2ecb319334f1877b0f6fe" ], "paperAbstract": "Iterative wavefront algorithms for evaluating dynamic programming recurrences exploit optimal parallelism but show poor cache performance. Tiled-iterative wavefront algorithms achieve optimal cache complexity and high parallelism but are cache-aware and hence are not portable and not cache-adaptive. On the other hand, standard cache-oblivious recursive divide-and-conquer algorithms have optimal serial cache complexity but often have low parallelism due to artificial dependencies among subtasks. Recently, we introduced cache-oblivious recursive wavefront (COW) algorithms, which do not have any artificial dependencies, but they are too complicated to develop, analyze, implement, and generalize. Though COW algorithms are based on fork-join primitives, they extensively use atomic operations for ensuring correctness, and as a result, performance guarantees (i.e., parallel running time and parallel cache complexity) provided by state-of-the-art schedulers (e.g., the randomized work-stealing scheduler) for programs with fork-join primitives do not apply. Also, extensive use of atomic locks may result in high overhead in implementation.\n In this paper, we show how to systematically transform standard cache-oblivious recursive divide-and-conquer algorithms into recursive wavefront algorithms to achieve optimal parallel cache complexity and high parallelism under state-of-the-art schedulers for fork-join programs. Unlike COW algorithms these new algorithms do not use atomic operations. Instead, they use closed-form formulas to compute the time when each divide-and-conquer function must be launched in order to achieve high parallelism without losing cache performance. The resulting implementations are arguably much simpler than implementations of known COW algorithms. We present theoretical analyses and experimental performance and scalability results showing a superiority of these new algorithms over existing algorithms.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3019031", "http://doi.acm.org/10.1145/3087556.3087586" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/475066a848dc63b0eca8da7a989144cdb9d39298", "sources": [ "DBLP" ], "title": "Provably Efficient Scheduling of Cache-oblivious Wavefront Algorithms", "venue": "SPAA", "year": 2017 }, "47734696239c5090f81cf821a0962a0ab47d2376": { "authors": [ { "ids": [ "34999391" ], "name": "James Gleeson" }, { "ids": [ "1879216" ], "name": "Eyal de Lara" } ], "doi": "", "doiUrl": "", "entities": [ "Batch processing", "Downgrade", "General-purpose computing on graphics processing units", "Graphics processing unit", "OpenCL API" ], "id": "47734696239c5090f81cf821a0962a0ab47d2376", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "51280ae0374a3034fb98f59f878138dbb9aa8360", "cb85c0ea860c073bb655e1082c75b9f40b020124", "4719002c81cced8fa11fa228d4aa44777eabd944", "9fb72d48e20450fb42448fe30d26f96a36b5db6a", "269c24a4aad9be622b609a0860f5df80688c2f93", "32bd15d39a63696ffa6fb11e2c2bb60f6355c6ae", "04e9d7b1544ec76e3e5c24b46ccae5d5096b638b", "aa931bfc67b3e7b56671e14facdfe7a85d26992a", "3a33424cd2ad63cc056a2d9a06b8794d78ba5214", "4954fa180728932959997a4768411ff9136aac81", "054572f0a9cf49fa9757ce937d097de6200fe942", "cf10de935c08c5b600fe2d08ed9ecf176f727837" ], "paperAbstract": "Emerging cloud markets like spot markets and batch computing services scale up services at the granularity of whole VMs. In this paper, we observe that GPU workloads underutilize GPU device memory, leading us to explore the benefits of reallocating heterogeneous GPUs within existing VMs. We outline approaches for upgrading and downgrading GPUs for OpenCL GPGPU workloads, and show how to minimize the chance of cloud operator VM termination by maximizing the heterogeneous environments in which applications can run.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/gleeson", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-gleeson.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4773/4696239c5090f81cf821a0962a0ab47d2376.pdf", "s2Url": "https://semanticscholar.org/paper/47734696239c5090f81cf821a0962a0ab47d2376", "sources": [ "DBLP" ], "title": "Heterogeneous GPU reallocation", "venue": "HotCloud", "year": 2017 }, "477bbcb5655a9c64893207bb49032e87c06a05f2": { "authors": [ { "ids": [ "2734514" ], "name": "Meni Orenbach" }, { "ids": [ "1871448" ], "name": "Pavel Lifshits" }, { "ids": [ "40139942" ], "name": "Marina Minkin" }, { "ids": [ "2289351" ], "name": "Mark Silberstein" } ], "doi": "10.1145/3064176.3064219", "doiUrl": "https://doi.org/10.1145/3064176.3064219", "entities": [ "Adversary model", "Computer data storage", "Intel Developer Zone", "Memcached", "Operating system", "Paging", "Server (computing)", "System call", "Throughput", "Translation lookaside buffer", "Trusted Execution Technology" ], "id": "477bbcb5655a9c64893207bb49032e87c06a05f2", "inCitations": [ "02a46ed5ea2bc0d8e59c0f876c99306e11a22f0a", "33ae35cc24ef4303979b479671c2065256e1b3a7", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "ed84133ca8ef37a273d4b187202f55c6618b953e", "a355edbb24d406761407e2728218d2192f2c1fcf", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9", "03e89626cbb864fb1243b4ee8b4037020a9250eb", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "d32d738f04d5c4ae4d12be1f6de88b132a7b12a7", "641ee47b43f67815ddbe0660e274ec7483b906d2", "e41440cff90683629228b308a94e48c7af11ca36", "21a402631dff504755e281934eaa90bc9dbe8ae9", "788b9e288c8db9decbbb2668fdee3737e386e143" ], "journalName": "", "journalPages": "238-253", "journalVolume": "", "outCitations": [ "6f29318586da614d7d816905f07ca9347d17e4d3", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "452c803f91ab670bf36403ed5412875b13ae9e94", "01d1575116b8aaacde1fd0e164a932b1ceffa04d", "43f0c099d44a68783a773f91cd03098a5252bf98", "0a289fd7b14345822b1acda6d82750b15d59663e", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "0852a44c86db434e9b51c67704636791e9940487", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "043afbd936c95d0e33c4a391365893bd4102f1a7", "4c546ee67a38355fbd91fce26bfc3c6ff2e7e048", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "861b44a6a42fde3733080c32919c114a93727d62", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "7932a4597cec5149c575aa2303fe8f12241e4320", "72880d15db2282512e5d3f0a3796b397d68cc7db", "21bcf85fdd6d857a0bf050da9fad07cf1d153352", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "3607afdb204de9a5a9300ae98aa4635d9effcda2", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd" ], "paperAbstract": "Intel Software Guard extensions (SGX) enable secure and trusted execution of user code in an isolated enclave to protect against a powerful adversary. Unfortunately, running I/O-intensive, memory-demanding server applications in enclaves leads to significant performance degradation. Such applications put a substantial load on the in-enclave system call and secure paging mechanisms, which turn out to be the main reason for the application slowdown. In addition to the high direct cost of thousands-of-cycles long SGX management instructions, these mechanisms incur the high indirect cost of enclave exits due to associated TLB flushes and processor state pollution.\n We tackle these performance issues in Eleos by enabling exit-less system calls and exit-less paging in enclaves. Eleos introduces a novel Secure User-managed Virtual Memory (SUVM) abstraction that implements application-level paging inside the enclave. SUVM eliminates the overheads of enclave exits due to paging, and enables new optimizations such as sub-page granularity of accesses.\n We thoroughly evaluate Eleos on a range of microbenchmarks and two real server applications, achieving notable system performance gains. memcached and a face verification server running in-enclave with Eleos, achieves up to 2.2× and 2.3× higher throughput respectively while working on datasets up to 5× larger than the enclave's secure physical memory.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064219" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/477bbcb5655a9c64893207bb49032e87c06a05f2", "sources": [ "DBLP" ], "title": "Eleos: ExitLess OS Services for SGX Enclaves", "venue": "EuroSys", "year": 2017 }, "478c09086fcdb2bdaf5f48542dee3e3267790d0f": { "authors": [ { "ids": [ "2673895" ], "name": "Kenneth Raffenetti" }, { "ids": [ "30563484" ], "name": "Abdelhalim Amer" }, { "ids": [ "2652675" ], "name": "Lena Oden" }, { "ids": [ "2058835" ], "name": "Charles Archer" }, { "ids": [ "2446648" ], "name": "Wesley Bland" }, { "ids": [ "1713011" ], "name": "Hajime Fujita" }, { "ids": [ "1794267" ], "name": "Yanfei Guo" }, { "ids": [ "1801345" ], "name": "Tomislav Janjusic" }, { "ids": [ "28294088" ], "name": "Dmitry Durnov" }, { "ids": [ "1685720" ], "name": "Michael Blocksome" }, { "ids": [ "3120064" ], "name": "Min Si" }, { "ids": [ "2632706" ], "name": "Sangmin Seo" }, { "ids": [ "34914217" ], "name": "Akhil Langer" }, { "ids": [ "1729841" ], "name": "Gengbin Zheng" }, { "ids": [ "2243970" ], "name": "Masamichi Takagi" }, { "ids": [ "3315212" ], "name": "Paul Coffman" }, { "ids": [ "31792655" ], "name": "Jithin Jose" }, { "ids": [ "2302544" ], "name": "Sayantan Sur" }, { "ids": [ "7959432" ], "name": "Alexander Sannikov" }, { "ids": [ "28371726" ], "name": "Sergey Oblomov" }, { "ids": [ "1980633" ], "name": "Michael Chuvelev" }, { "ids": [ "20679817" ], "name": "Masayuki Hatanaka" }, { "ids": [ "1708899" ], "name": "Xin Zhao" }, { "ids": [ "1682196" ], "name": "Paul F. Fischer" }, { "ids": [ "40552468" ], "name": "Thilina Rathnayake" }, { "ids": [ "40033106" ], "name": "Matthew Otten" }, { "ids": [ "2063968" ], "name": "Misun Min" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "10.1145/3126908.3126963", "doiUrl": "https://doi.org/10.1145/3126908.3126963", "entities": [ "Application programming interface", "Critical path method", "High- and low-level", "Message Passing Interface", "Requirement" ], "id": "478c09086fcdb2bdaf5f48542dee3e3267790d0f", "inCitations": [], "journalName": "", "journalPages": "62:1-62:12", "journalVolume": "", "outCitations": [ "cf60b4d7f37cc74ca7345a579201b89a010a67e8", "39e2c2017e5855c4a9010d1470ad76d27766c317", "9c7566f3820b0ae59adadc7249bb972668d0845d", "1a355960bad4ac65e4d6ae25f6d9af2b62bc7a04", "1a7a3c3945947358e99257b3461d3eff8a06946d", "309bd4c9b1b9cf81cbf071b8b2ad80e97acf7c60", "3174e077cdf74252a489f64cd61052b7e1a482d9", "61b8a8f5810f6670466f9ea58b7cb390ca1a4a89", "f7478e8d8ab68c906c5aa1a2ebed7ad15ca13261", "13fec303fe55489045ff5b66014b618fca83206a", "e8464a4cfcefc5e7318b342e7870615a4ade2ff8", "15fac76a4de8cd20b13d84d47bb8e0d7851c1988", "8c9654d6f22d3c4e34e7e1be16129b5016cb8899", "71dad05cbc0fab822e2e52084641caf7504515eb", "11b102456d8d6d907103bb6a3c4bde80dc359c6a" ], "paperAbstract": "This paper provides an in-depth analysis of the software overheads in the MPI performance-critical path and exposes mandatory performance overheads that are unavoidable based on the MPI-3.1 specification. We first present a highly optimized implementation of the MPI-3.1 standard in which the communication stack---all the way from the application to the low-level network communication API---takes only a few tens of instructions. We carefully study these instructions and analyze the root cause of the overheads based on specific requirements from the MPI standard that are unavoidable under the current MPI standard. We recommend potential changes to the MPI standard that can minimize these overheads. Our experimental results on a variety of network architectures and applications demonstrate significant benefits from our proposed changes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126963" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/478c09086fcdb2bdaf5f48542dee3e3267790d0f", "sources": [ "DBLP" ], "title": "Why is MPI so slow?: analyzing the fundamental limits in implementing MPI-3.1", "venue": "SC", "year": 2017 }, "47b8c02885507a60532540d4ec80ee54c106052c": { "authors": [ { "ids": [ "3451869" ], "name": "Subhadeep Karan" }, { "ids": [ "2738904" ], "name": "Jaroslaw Zola" } ], "doi": "10.1109/HiPC.2017.00014", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00014", "entities": [ "Apache Spark", "Assignment problem", "Bayesian network", "Clinical decision support system", "Decision support system", "Distributed memory", "Dynamic programming", "MDL (programming language)", "Machine learning", "Scalability", "Scoring functions for docking" ], "id": "47b8c02885507a60532540d4ec80ee54c106052c", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "33-41", "journalVolume": "", "outCitations": [ "2eec492a8d887a68ab4689727b38171d803d4964", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "a9cf1c8408a35fd0b94b5a2deef79f44cdeec338", "0a332757b8a52c0aea548c1f9fde106d397e5334", "18d98ffdc51d47b0c5a647c14fa0f0df869027b0", "7a869aa6f13f945c212e59d3dbe8ad4c86700e47", "bf7e6d7bc896bafdaef0b195b762b164e14b5ee9", "507c9b4917638d77b0bad5df47edff92f2ff229a", "9edea4ec7c927df2c4c3b025a3c62de8532b6df7", "5371312df4a9549bfb31b2cb53ea67859516925b", "191c965300aff126bceb751f8cb68ea4104ea2c2", "17ad1361dfabc1c50b506813d0f5d54df159fc36", "9e4291de6cdce8e6f247effa308d72e2ec3f6122", "f7303cf1c00144429ba81ff7b6699bb1a2616cf3", "7aff9327bf347ea4661139f6deba022c47bde9bc", "02010fcffa202fa891767d0a3a995dee25e62f63", "0826b4328ec673f6b5b452fa217d35a2bb92eca3", "bbb9c3119edd9daa414fd8f2df5072587bfa3462", "26724ed93ef33b88353a70d77f232018fac44366", "3aa5213572910bb4623e9aab9b65c81b13fe03ac", "1c919079d154b213e18a83649e0a465287afa937", "4b704a990c2b89328969e74e1c40337183a8ba8b", "2f4366ccd90e49de7787bfa5644440ea0bfbe9b6" ], "paperAbstract": "In Machine Learning, the parent set identification problem is to find a set of random variables that best explain selected variable given the data and some predefined scoring function. This problem is a critical component to structure learning of Bayesian networks and Markov blankets discovery, and thus has many practical applications, ranging from fraud detection to clinical decision support. In this paper, we introduce a new distributed memory approach to the exact parent sets assignment problem. To achieve scalability, we derive theoretical bounds to constraint the search space when MDL scoring function is used, and we reorganize the underlying dynamic programming such that the computational density is increased and fine-grain synchronization is eliminated. We then design efficient realization of our approach in the Apache Spark platform. Through experimental results, we demonstrate that the method maintains strong scalability on a 500-core standalone Spark cluster, and it can be used to efficiently process data sets with 70 variables, far beyond the reach of the currently available solutions.", "pdfUrls": [ "https://arxiv.org/pdf/1705.06390v2.pdf", "https://arxiv.org/pdf/1705.06390v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00014", "http://arxiv.org/abs/1705.06390" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47b8c02885507a60532540d4ec80ee54c106052c", "sources": [ "DBLP" ], "title": "Scalable Exact Parent Sets Identification in Bayesian Networks Learning with Apache Spark", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "47c3d413056fe8538cb5ce2d6adc860e70062bf6": { "authors": [ { "ids": [ "1892673" ], "name": "Himabindu Lakkaraju" }, { "ids": [ "3371403" ], "name": "Jon M. Kleinberg" }, { "ids": [ "1702139" ], "name": "Jure Leskovec" }, { "ids": [ "2110332" ], "name": "Jens Ludwig" }, { "ids": [ "2062143" ], "name": "Sendhil Mullainathan" } ], "doi": "10.1145/3097983.3098066", "doiUrl": "https://doi.org/10.1145/3097983.3098066", "entities": [ "Counterfactual conditional", "Counterfactual definiteness", "Human reliability", "Machine learning", "Predictive modelling" ], "id": "47c3d413056fe8538cb5ce2d6adc860e70062bf6", "inCitations": [ "27b98ffbb4549167dda0841d873fef317db34567", "162dc19e3795062292cc78b2f226b2f4bc5245ec" ], "journalName": "", "journalPages": "275-284", "journalVolume": "", "outCitations": [ "d290797a05041405bfd179c397e571cc3dba5374", "4b7216a8c022e12546cdf2c478d024c4315ab780", "a85acbe6ff39173031d877eaf79af3ca52bbc20f", "e7b4bc5ac43da7d798fb09b625ebdc76bed36945", "206caa3a3ec1a15ae6280707c05c750fa100300f", "6103a6435b84f43b6e8ba335717c74f8cb5f4960", "2eec797d8ffb4429d31ccbb69bfa497df9c6e496", "517d6e3999bd425069e45346045adcbd2d0c9299", "16e1909d3ccc0f8ab10ee288e196407e73be8624", "7563ed501b990740d8617e459cb63e7ac02d00f9", "6f181bd7cba13689efaf10aafcf50fae96aef83a", "8735340bed4503bf21e50ec65872ceee68301470", "c6dd8503ca38a31d2e25cffc76639578804b0ab4", "060125aba7f79cd147f3157d2b544b52e4d873c5", "5f07f1301d6515464b35c0625ef3e4f05b240377", "5e0a19dedc3b96022dbed0e17483de85a6f44617", "2a2e608b13eb81de335638e49a931cb247bdc78e", "213c5f841e99fa2988ccaedf990c2e878c89cd90", "940b01d0d7931cb9d4d24f5bd50625b941b31a13", "2358e1bd7f0ec8d2312efa8972eff2f323d5fc03", "650d4bf807007654759ad9ad0757075a20501d12", "1bdd655bedddf0a48be45d3d26e4d194ef58f580", "4aa78a86d98c8ae5a07f06e34e9f6f392b8c18e9", "e949fdf641290d80831155f57c5d4641499694dd", "03f1264577a7f7858bbd80a0460950fa108a3ee7", "1043a66d93affb5fd3d1b24bc657d857d5a00d1c", "204ab13646af215739f97eb6729e0e54b31d2c23", "0cf347957f138f900e0ae62389d9f8c02c764950", "374bf28854c23561976fc6a2b5abd4b9f7f117ba" ], "paperAbstract": "Evaluating whether machines improve on human performance is one of the central questions of machine learning. However, there are many domains where the data is selectively labeled, in the sense that the observed outcomes are themselves a consequence of the existing choices of the human decision-makers. For instance, in the context of judicial bail decisions, we observe the outcome of whether a defendant fails to return for their court appearance only if the human judge decides to release the defendant on bail. This selective labeling makes it harder to evaluate predictive models as the instances for which outcomes are observed do not represent a random sample of the population. Here we propose a novel framework for evaluating the performance of predictive models on selectively labeled data. We develop an approach called contraction which allows us to compare the performance of predictive models and human decision-makers without resorting to counterfactual inference. Our methodology harnesses the heterogeneity of human decision-makers and facilitates effective evaluation of predictive models even in the presence of unmeasured confounders (unobservables) which influence both human decisions and the resulting outcomes. Experimental results on real world datasets spanning diverse domains such as health care, insurance, and criminal justice demonstrate the utility of our evaluation metric in comparing human decisions and machine predictions.", "pdfUrls": [ "http://cs.stanford.edu/people/jure/pubs/contraction-kdd17.pdf", "http://doi.acm.org/10.1145/3097983.3098066" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47c3d413056fe8538cb5ce2d6adc860e70062bf6", "sources": [ "DBLP" ], "title": "The Selective Labels Problem: Evaluating Algorithmic Predictions in the Presence of Unobservables", "venue": "KDD", "year": 2017 }, "47d30261b6a1b4b017123f9a1b72b2da838ec529": { "authors": [ { "ids": [ "7623676" ], "name": "Edson Tavares De Camargo" }, { "ids": [ "2084898" ], "name": "Elias Proc\u00f3pio Duarte" }, { "ids": [ "1739265" ], "name": "Fernando Pedone" } ], "doi": "10.1007/978-3-319-64203-1_30", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_30", "entities": [ "Algorithm", "Application checkpointing", "Benchmark (computing)", "Centralisation", "Chandra\u2013Toueg consensus algorithm", "Failure rate", "Fault tolerance", "Keystroke logging", "Liveness", "Mean time between failures", "Multigrid method", "Reliability engineering", "Single point of failure" ], "id": "47d30261b6a1b4b017123f9a1b72b2da838ec529", "inCitations": [], "journalName": "", "journalPages": "415-427", "journalVolume": "", "outCitations": [ "06230d13e276bd871a378ca932a41b5cff94e29f", "82f84d2b7cc61142240e9f0b8b4e73986595fb71", "00e3756119a91432622f6982b59ecd24a1340fbe", "2254335dccbed71d322cea94ce71938479d3fede", "02b1103e592fa6bf0499e27f1519692441fad557", "01ca7aa33c9affa8a716cca7a6c65717807e1215", "8857e57ad8ae54d6a03947c5d41a1295b9b7c10f", "550285725684e2d286ffd9fa5cebdc52d7c4f860", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "0fc8fef2d5141e597bae9b031f21b32432d25c0d", "01d62cd850496455ce1616500f491690effa5c98", "10824f12c211d700d4d1ad95b3ed8660cb8a3e59", "0ba5f5db0c146d54d2a868f8815ddd00fb943597", "71c84eb6f63be58b72915ade61cc08c2b9c37e58", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "2137d8386391216c360f2b6a5a2e0a1605207575", "39ef5d362200126497b2f74c33338383dcc9589c", "d12d1289d2384c2ce642f01855637b9f0519e189", "abe07474d6bd99d0cb6ecf81cc90a97fd24ba5d5", "1822b56cea223cedf501fa10bd3795767ab80a9e", "5026878730aba26f567587f8302eeaeba68c0a10", "27112ce87efb02a44f35d2299e487bfd69ec37ed", "6ea6627ce73e538c570781cbd1a0c49ca6904543", "134919aaf219601627fd1f77b31d25e5741f1418", "3e5387055046f17b1cf05f33c0e7f884e30a4fcb", "2706db42926e0e58e35336331f6d3b62f0811cf5", "52fc987b858770ac02a54effd74da945ec06c224", "f3ec30b163be88da4c0a144f6d4499e817e9501e", "10f2bba435ee4550dbd713e8e4298663fbadb87a", "442686f068097d4cd27680a0ee08272aea19c922", "2f3548e58f063c71187eebf5ebd4109ededbe9b2", "7bf742cd243e91b256afb437c020deafe6fe8035" ], "paperAbstract": "High-performance computing (HPC) systems traditionally employ rollback-recovery techniques to allow faulttolerant executions of parallel applications. Rollback-recovery based on message logging is an attractive strategy that avoids the drawbacks of coordinated checkpointing in systems with low mean-time between failures (MTBF). Most message logging protocols rely on a centralized event logger to store information (i.e., the determinants) to allow the recovery of an application process. This centralized approach, besides the obvious single point of failure problem, represents a bottleneck for the efficiency of message logging protocols. In this work, we present a fault-tolerant distributed event logger based on consensus that outperforms the centralized approach. We implemented the event logger of MPI determinants using Paxos, a prominent consensus algorithm. Our event logger inherits the Paxos properties: safety is guaranteed even if the system is asynchronous and liveness is guaranteed despite processes failures. Experimental results are reported for the performance of the distributed event logger based both on classic Paxos and parallel Paxos applied to AMG (Algebraic MultiGrid) and NAS Parallel Benchmark applications.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_30", "http://www.inf.usi.ch/faculty/pedone/Paper/2017/2017EUROPAR.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/47d3/0261b6a1b4b017123f9a1b72b2da838ec529.pdf", "s2Url": "https://semanticscholar.org/paper/47d30261b6a1b4b017123f9a1b72b2da838ec529", "sources": [ "DBLP" ], "title": "A Consensus-Based Fault-Tolerant Event Logger for High Performance Applications", "venue": "Euro-Par", "year": 2017 }, "47d9b984b2d6327098b39fc43245fc9bd3e08ffe": { "authors": [ { "ids": [ "2812433" ], "name": "Yuandong Chan" }, { "ids": [ "31638960" ], "name": "Kai Xu" }, { "ids": [ "2827531" ], "name": "Haidong Lan" }, { "ids": [ "3429925" ], "name": "Weiguo Liu" }, { "ids": [ "2916386" ], "name": "Yongchao Liu" }, { "ids": [ "38613433" ], "name": "Bertil Schmidt" } ], "doi": "10.1109/IPDPS.2017.35", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.35", "entities": [ "AVX-512", "Advanced Vector Extensions", "Algorithm", "Bit array", "Bit-level parallelism", "Central processing unit", "Computational problem", "Hamming distance", "Knights", "Manycore processor", "Microprocessor", "Multi-core processor", "Open-source software", "Parallel computing", "Random seed", "SIMD", "Seed", "Smith\u2013Waterman algorithm", "Speedup", "Streaming SIMD Extensions", "Thread (computing)", "Xeon Phi" ], "id": "47d9b984b2d6327098b39fc43245fc9bd3e08ffe", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "52-61", "journalVolume": "", "outCitations": [ "034ddc1749424d0335be143a7c7d07b7c559c2ad", "974ebdc4cd7ef424ed158a3d5dc349fefef5d35f", "1c15f69566af6198f336b961aecf418b5ccd07d3", "0ee3a1f7a363b16ceda8f1053a8172f051fd8d4c", "43b52543490aa654705afad0dd6825180ff1b066", "87f3d7730e190c695c84683830a702cc7dd6e296", "66515254c60f8b754b5ce7ae7b42911653d7ed50", "8598507f8bd3f00164bda383f4d3f5ca9d9b488c", "25f017efd2905c6d0c6a92f2dfe19113ee42938e", "65a3022b84914ffc0b19a119dcb44146e0c1ecda", "b582cc447347c12e181eb6a3ee3a7f67aebf64d8", "032af12c228e39f2e483588419bb81c44c905b8e", "9be98def5602c99b620a42999dc7a8a38793c4a5", "eec6d4664256c49a9e312b17f7455121cac90b25", "000a6f63c588697d6ae8db6cb6ffd6394d961cb7", "2f6c307c0e251afb0c93a9e8e9b6f5d1a0fb1f7e", "efa5558bddd68abe4adc81adbbef6f739e648392", "40c5441aad96b366996e6af163ca9473a19bb9ad", "1ef38c80b1bc4352ce0df0ef7c05249fb64bf78d", "0a908373dd5e87446ba85db0e590b3e3004e04f7", "b0312b82ee0017f7bbfc78ff50fcb0561d70bc9b", "627aa62eec8edda82481f429785b10ccc1818416", "4ca9ea95a0a9846965e86619e646d9ca36930c18", "71150718ec7affbc4f9130f55f925af0dd956651", "f5a88d8561bf6a64b43aa7e88beff8220e792bee", "72b6bc8f313219e2571f3234ea5ae8fc9b7bce27", "f168f5e9b77e627aeb08a5073902c6a212992d77" ], "paperAbstract": "The progress of next-generation sequencing has a major impact on medical and genomic research. This technology can now produce billions of short DNA fragments (reads) in a single run. One of the most demanding computational problems used by almost every sequencing pipeline is short-read alignment; i.e. determining where each fragment originated from in the original genome. Most current solutions are based on a seed-and-extend approach, where promising candidate regions (seeds) are first identified and subsequently extended in order to verify whether a full high-scoring alignment actually exists in the vicinity of each seed. Seed verification is the main bottleneck in many state-of-the-art aligners and thus finding fast solutions is of high importance. We present a parallel ungapped-alignment-featured seed verification (PUNAS) algorithm, a fast filter for effectively removing the majority of false positive seeds, thus significantly accelerating the short-read alignment process. PUNAS is based on bit-parallelism and takes advantage of SIMD vector units of modern microprocessors. Our implementation employs a vectorize-and-scale approach supporting multi-core CPUs and many-core Knights Landing (KNL)-based Xeon Phi processors. Performance evaluation reveals that PUNAS is over three orders-of-magnitude faster than seed verification with the Smith-Waterman algorithm and around one order-of-magnitude faster than seed verification with the banded version of Myers bit-vector algorithm. Using a single thread it achieves a speedup of up to 7.3, 27.1, and 11.6 compared to the shifted Hamming distance filter on a SSE, AVX2, and AVX-512 based CPU/KNL, respectively. The speed of our framework further scales almost linearly with the number of cores. PUNAS is open-source software available at https://github.com/Xu-Kai/PUNASfilter.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47d9b984b2d6327098b39fc43245fc9bd3e08ffe", "sources": [ "DBLP" ], "title": "PUNAS: A Parallel Ungapped-Alignment-Featured Seed Verification Algorithm for Next-Generation Sequencing Read Alignment", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "47e27b9611ffcede5782a165b6099c1c619a34bc": { "authors": [ { "ids": [ "2504006" ], "name": "Hari Cherupalli" }, { "ids": [ "3187702" ], "name": "Henry Duwe" }, { "ids": [ "7982124" ], "name": "Weidong Ye" }, { "ids": [ "8153371" ], "name": "Rakesh Kumar" }, { "ids": [ "1813088" ], "name": "John Sartori" } ], "doi": "10.1145/3148052", "doiUrl": "https://doi.org/10.1145/3148052", "entities": [ "Central processing unit", "Critical system", "Embedded system", "Exploit (computer security)", "Internet", "Internet of things", "Low-power broadcasting", "Power supply", "Requirement", "Wearable computer" ], "id": "47e27b9611ffcede5782a165b6099c1c619a34bc", "inCitations": [ "8525e8890da440477140e3f947d61b059a9eea8f", "c34cb1659dc2d4ae9e0a11a71ecce93c56b272fa", "e3aca014b04e379e2dc1b57f5fd637dff61ae872" ], "journalName": "ACM Trans. Comput. Syst.", "journalPages": "9:1-9:33", "journalVolume": "35", "outCitations": [ "10e56a885c363569821de3ec4d3369a28c0854a0", "6228ed86d5ad276f47da6d491fd9072716e6ebf8", "5324cd42d9f04ff75037ce9cbd8e34fb278e15e4", "9458915e0b7e9abfd9f9c24e35b036505c899a8b", "1abb651f5eb33d6a0c3c234c4c8a7dc2e9e47506", "5208576ac552a2bf2840558ec67f418370f5ff9d", "405eb33ab6ef2c9abc91346c9f63d11fc5973197", "05ac8c6477c306c395433a6035706d265c9c961d", "11443efe465ad544f478524da6c66c085b16e28b", "4165376a7aadf6a1c1acddd6fc236047b7becff1", "675d53d75788b4cc580e3e90c5ef91d29454a295", "6d6aa1ab79eef925c5ad0382826a7380286861de", "a58de4e7c980017ba75056724f7b2d50a2e23dea", "13ad5b5fdd3ba3d74fa96a12450b726696fbfe77", "2605c673c58f11c73166ffee54d1ae5950b532df", "2c7cffc736e2fba8b153e8cb9599028688150bf2", "7498e51ffce80639e96cfb3ab66b5558e595d07b", "442e606718cb0d1eb64973bd1b66117b32b0161d", "a64c79bbf6033e08ef5e72878c606b00d86cb5d9", "2ddb5176006689ffbc7ba6f58f4c0eccfb3168d8", "1a322874b1c33007bc627800af93f9b427948299", "02514ff22c82354b0ebb065dcb604c016e67a15e", "29a262c436c62331677c15dbad1fef815c43b36a", "6e8908c9846e8186f8099e366a68ab73a7821b38", "8a57e6017720f37450cc77c540642a6e693116e1", "0331dff40717e9975b52323d97d3d24e98fe3f3e" ], "paperAbstract": "Many emerging applications such as the Internet of Things, wearables, implantables, and sensor networks are constrained by power and energy. These applications rely on ultra-low-power processors that have rapidly become the most abundant type of processor manufactured today. In the ultra-low-power embedded systems used by these applications, peak power and energy requirements are the primary factors that determine critical system characteristics, such as size, weight, cost, and lifetime. While the power and energy requirements of these systems tend to be application specific, conventional techniques for rating peak power and energy cannot accurately bound the power and energy requirements of an application running on a processor, leading to overprovisioning that increases system size and weight. In this article, we present an automated technique that performs hardware–software coanalysis of the application and ultra-low-power processor in an embedded system to determine application-specific peak power and energy requirements. Our technique provides more accurate, tighter bounds than conventional techniques for determining peak power and energy requirements. Also, unlike conventional approaches, our technique reports guaranteed bounds on peak power and energy independent of an application’s input set. Tighter bounds on peak power and energy can be exploited to reduce system size, weight, and cost.", "pdfUrls": [ "http://people.ece.umn.edu/users/jsartori/papers/tocs17.pdf", "http://doi.acm.org/10.1145/3037697.3037711", "http://www.ee.umn.edu/users/jsartori/papers/asplos17.pdf", "http://rakeshk.web.engr.illinois.edu/asplos_17_cam.pdf", "http://doi.acm.org/10.1145/3148052" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47e27b9611ffcede5782a165b6099c1c619a34bc", "sources": [ "DBLP" ], "title": "Determining Application-specific Peak Power and Energy Requirements for Ultra-low Power Processors", "venue": "ASPLOS", "year": 2017 }, "47e913e15a4662889044a359fecd55edcd75350f": { "authors": [ { "ids": [ "3150866" ], "name": "Leonid Ryzhyk" }, { "ids": [ "3140749" ], "name": "Nikolaj Bj\u00f8rner" }, { "ids": [ "1709876" ], "name": "Marco Canini" }, { "ids": [ "2272322" ], "name": "Jean-Baptiste Jeannin" }, { "ids": [ "1859909" ], "name": "Cole Schlesinger" }, { "ids": [ "1680763" ], "name": "Douglas B. Terry" }, { "ids": [ "1746289" ], "name": "George Varghese" } ], "doi": "", "doiUrl": "", "entities": [ "Access control list", "Apache Cocoon", "Complex network", "High- and low-level", "Network planning and design", "Performance Evaluation", "Program lifecycle phase", "Refinement (computing)", "Routing", "Run time (program lifecycle phase)", "Software bug", "Software development", "Software-defined networking", "Stepwise regression", "Top-down and bottom-up design", "Verification and validation" ], "id": "47e913e15a4662889044a359fecd55edcd75350f", "inCitations": [], "journalName": "", "journalPages": "683-698", "journalVolume": "", "outCitations": [ "24a963758371e511e3749c865b14f697358f025c", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "ce2462f499f1f95cd958596e761ed5df9aaf9013", "089b10645ee63cd9c5bb4ab661141dd813408e15", "30a7bba8d47d7eca9f7826a721e62032a5c8e77a", "55ef72fe52990f491ab939b91d75b7899a66180f", "7da879c2007aa302b2537546277e83d09f65b957", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "488420924de0af51a5ae7f86d3b29392c29601bc", "468c3683e86fb3fbce53f640a3fd6250e4efd291", "016638985012e34b31274ed33988ccc43477e5d8", "98fef2c0314077d23cf2e6dd45bc8bac0180abfd", "f30ce102ab26ded26d52a0a39a2400c17ceca85f", "27f4001214ce0d449eb05d33626f444526accc7c", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "2077579d62fc090d4ddf45f107ffae0468936165", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "405377ca200df3f7da390c37516fe13582e70776", "08a572c06bdaa78d85a287111832d188e8e07f0b", "36f396b52f93fa52742ce5052a40c1c90ea726e3", "507b5fe36714eb6aa8acd96d1eef14212eddb82b", "1d2ce8f0c129985fcf2dea5cac6823bfcac90938", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97" ], "paperAbstract": "Building software-defined network controllers is an exercise in software development and, as such, likely to introduce bugs. We present Cocoon, a framework for SDN development that facilitates both the design and verification of complex networks using stepwise refinement to move from a high-level specification to the final network implementation. A Cocoon user specifies intermediate design levels in a hierarchical design process that delineates the modularity in complicated network forwarding and makes verification extremely efficient. For example, an enterprise network, equipped with VLANs, ACLs, and Level 2 and Level 3 Routing, can be decomposed cleanly into abstractions for each mechanism, and the resulting stepwise verification is over 200x faster than verifying the final implementation. Cocoon further separates static network design from its dynamically changing configuration. The former is verified at design time, while the latter is checked at run time using statically defined invariants. We present six different SDN use cases including B4 and F10. Our performance evaluation demonstrates that Cocoon is not only faster than existing verification tools but can also find many bugs statically before the network design has been fully specified.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-ryzhyk.pdf", "https://mcanini.github.io/papers/cocoon.nsdi17.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-ryzhyk.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/ryzhyk" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f371/6f1b8bc7e9c1ef131629798be42d52ea226c.pdf", "s2Url": "https://semanticscholar.org/paper/47e913e15a4662889044a359fecd55edcd75350f", "sources": [ "DBLP" ], "title": "Correct by Construction Networks Using Stepwise Refinement", "venue": "NSDI", "year": 2017 }, "47f06bc377aae390a482985f755904800d85e35c": { "authors": [ { "ids": [ "2473847" ], "name": "Ana Moreton-Fernandez" }, { "ids": [ "8147364" ], "name": "Eduardo Rodriguez-Gutiez" }, { "ids": [ "1727166" ], "name": "Arturo Gonz\u00e1lez-Escribano" }, { "ids": [ "1868120" ], "name": "Diego R. Llanos Ferraris" } ], "doi": "10.1007/978-3-319-64203-1_33", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_33", "entities": [ "Coprocessor", "Programming model", "Xeon Phi" ], "id": "47f06bc377aae390a482985f755904800d85e35c", "inCitations": [ "07a1b0fd394e23ff7cd553a334e0b33c0c3e7876" ], "journalName": "", "journalPages": "457-469", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47f06bc377aae390a482985f755904800d85e35c", "sources": [ "DBLP" ], "title": "Supporting the Xeon Phi Coprocessor in a Heterogeneous Programming Model", "venue": "Euro-Par", "year": 2017 }, "47fba0b14f7569d7294754ad61610fbd2a1bd3d7": { "authors": [ { "ids": [ "1682913" ], "name": "Xin Zhang" }, { "ids": [ "2355698" ], "name": "Radu Grigore" }, { "ids": [ "2457284" ], "name": "Xujie Si" }, { "ids": [ "1758336" ], "name": "Mayur Naik" } ], "doi": "10.1145/3133881", "doiUrl": "https://doi.org/10.1145/3133881", "entities": [ "Benchmark (computing)", "Datalog", "Heuristic", "Human\u2013computer interaction", "Iteration", "Java", "Logic programming", "Optimization problem", "Program optimization", "Programming language", "Requirement prioritization", "Static program analysis", "Synergy" ], "id": "47fba0b14f7569d7294754ad61610fbd2a1bd3d7", "inCitations": [ "20bc9af5e7265c128f77a16c4ca7b7a68b4e4eea", "c1a10554a4d20de0af2aa5b368a7312af2a5e92c", "6ce8015d4a69d438feaf85d07b9a2d81a45f33d7" ], "journalName": "PACMPL", "journalPages": "57:1-57:30", "journalVolume": "1", "outCitations": [], "paperAbstract": "We propose an interactive approach to resolve static analysis alarms. Our approach synergistically combines a sound but imprecise analysis with precise but unsound heuristics, through user interaction. In each iteration, it solves an optimization problem to find a set of questions for the user such that the expected payoff is maximized. We have implemented our approach in a tool, Ursa, that enables interactive alarm resolution for any analysis specified in the declarative logic programming language Datalog. We demonstrate the effectiveness of Ursa on a state-of-the-art static datarace analysis using a suite of 8 Java programs comprising 41-194 KLOC each. Ursa is able to eliminate 74% of the false alarms per benchmark with an average payoff of 12× per question. Moreover, Ursa prioritizes user effort effectively by posing questions that yield high payoffs earlier.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133881", "http://www.seas.upenn.edu/~mhnaik/pubs/oopsla17.pdf", "http://www.cis.upenn.edu/~mhnaik/pubs/oopsla17.pdf", "http://people.csail.mit.edu/xzhang//papers/oopsla17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/47fba0b14f7569d7294754ad61610fbd2a1bd3d7", "sources": [ "DBLP" ], "title": "Effective interactive resolution of static analysis alarms", "venue": "PACMPL", "year": 2017 }, "482d2cbe3087bd8ac7ee6080d50e0ec7a393e014": { "authors": [ { "ids": [ "3166410" ], "name": "Diego Costa" }, { "ids": [ "2088124" ], "name": "Artur Andrzejak" }, { "ids": [ "10430059" ], "name": "Janos Seboek" }, { "ids": [ "2712301" ], "name": "David Lo" } ], "doi": "10.1145/3030207.3030221", "doiUrl": "https://doi.org/10.1145/3030207.3030221", "entities": [ "C++", "Data structure", "Java", "Java Platform, Enterprise Edition", "Java collections framework", "Library", "Map", "Overhead (computing)", "Run time (program lifecycle phase)" ], "id": "482d2cbe3087bd8ac7ee6080d50e0ec7a393e014", "inCitations": [ "bf5fdf89e305ea7fc36efd71fbde19bb48d08d3d", "7e3a8818313c262dbc036ca83a9294943a13d9f0", "d8b1ca0e93b3836527ba7c6c3ccb4b946e2d4690" ], "journalName": "", "journalPages": "389-400", "journalVolume": "", "outCitations": [ "14b3bdbd319a89b59238d8be23775d442d462e59", "16e7d5ea99e7ca2698a36cf095b8cce4d4ff650f", "6b22f58432d08b29d527ea6db420b0e176d69554", "c62c98b1240c0d5102a7826dea40ab54e4895aa9", "c78f374bbfaa85df22a24c9ef9e028e8bd7fe397", "0f1042350e2c97117620d9f5182f94262f1f5ac0", "c26ff79189d1c06080a53427857dab3556f9d6fb", "17179e3554eb34a94c6aa3270e8b2cb9da9931b0", "30b30b2da89e9a287f235cdec1d346de163e50c5", "08c7dac8a5e712dafa7ba325516faa4fee412791", "3e70eb8a50c658ad5bca20bf69ed943f3af9f516", "0498808f5b07d9355741b4c1d84a757e9ab136de", "8ac6be0e3ea62e9819d5a25da645f2d350474693", "03ed17b3028b575961d30c0753de2fb7972c4a1d", "0a23a0ae1119dd52f09507fed611d4bb946769ac", "3295ec1404eca0da7e82c08f63905076809bd35e", "84f13db8f0e50ca97fe3fe0bc391e974724d4b87", "194322a0cc42f90f34c44b8e627b1d2cd94dcca4" ], "paperAbstract": "Collection data structures have a major impact on the performance of applications, especially in languages such as Java, C#, or C++. This requires a developer to select an appropriate collection from a large set of possibilities, including different abstractions (e.g. list, map, set, queue), and multiple implementations. In Java, the default implementation of collections is provided by the standard Java Collection Framework (JCF). However, there exist a large variety of less known third-party collection libraries which can provide substantial performance benefits with minimal code changes.\n In this paper, we first study the popularity and usage patterns of collection implementations by mining a code corpus comprised of 10,986 Java projects. We use the results to evaluate and compare the performance of the six most popular alternative collection libraries in a large variety of scenarios. We found that for almost every scenario and JCF collection type there is an alternative implementation that greatly decreases memory consumption while offering comparable or even better execution time. Memory savings range from 60% to 88% thanks to reduced overhead and some operations execute 1.5x to 50x faster.\n We present our results as a comprehensive guideline to help developers in identifying the scenarios in which an alternative implementation can provide a substantial performance improvement. Finally, we discuss how some coding patterns result in substantial performance differences of collections.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030221" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/482d2cbe3087bd8ac7ee6080d50e0ec7a393e014", "sources": [ "DBLP" ], "title": "Empirical Study of Usage and Performance of Java Collections", "venue": "ICPE", "year": 2017 }, "482e01ba5d29de96842c3e3daebcbad29945e4c0": { "authors": [ { "ids": [ "4937944" ], "name": "Andrea Continella" }, { "ids": [ "2416629" ], "name": "Yanick Fratantonio" }, { "ids": [ "40608753" ], "name": "Martina Lindorfer" }, { "ids": [ "32798468" ], "name": "Alessandro Puccetti" }, { "ids": [ "1934617" ], "name": "Ali Zand" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Baseline (configuration management)", "Black box", "Encryption", "Identifier", "Internet privacy", "Library", "Mobile app", "Network traffic control", "Nondeterministic algorithm", "Personally identifiable information", "Privacy", "Server (computing)", "Traffic analysis" ], "id": "482e01ba5d29de96842c3e3daebcbad29945e4c0", "inCitations": [ "8d5b2f1877886a024b845b85128c9b01c48a2a29", "3c4228c6e1c8787a8c0f8ff47016f242fc605ac2", "9eac6cd4cf4ae5e4d774aa1f4200f36ec9e1bd64", "f2b5963afef31e1c1b12c84bb3a9d1117916e673", "038ec03e66ec8ed2593a4a7481b64e8f2bf1e9df", "4ea14fa20d77ac57d4b41891201271acba72ddc7" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "c5d375afa19bde7fe4029363f32b8281f794ccae", "a9977198d07f5fede50a81236d7806868f4c9a27", "41a4630a9bd0830b823c39b8b061349009162202", "b11cd7a0ccf98b71c1d9e46fa89a4708c9efdcc6", "094cca7a7bbfa274975e58f32d392404871ca2e5", "43f89337e570f36686acdda3dcd0b7885a963557", "6b2ef620ca9363a4b996693c649fddf3c97a91c3", "3343392fe056b45692252ad18278e10020ee3d8e", "129570333e7631456c70354113a43fe6eb193329", "09cafb70a4feab144ac1e6994694a2db21c7e656", "02fe41e07def449132516345d7152477c0d7c949", "6f36ec040624a1083222942dfde657e576afe701", "0a4cf3fa5fc716c8e742c036d5465f059a8fc1a6", "8ffc32565380d35bcc68f175cb803918b56517a7", "07fe0ddb6cef7ed8946d416c093452c1d0db0c34", "9140a579de7da753a095356b5da24804af28e8f7", "0195f5034ce011da2d38cdd60dfc68f4b3c52784", "41289566ac0176dced2312f813328ad4c0552618", "5eca4c4d84031b36310f819f249f76a30bba0756", "72dcb724803d6eb09db2a2ce80019a5810cbc549", "3d3fbbba74f6322f4bccbefcb977581eab79e436", "080f1f7a903ba3d77f0f21a3a89bd2db0d958e46", "16b2d6f76febe56ac1fed2a9dc266b2409bfb7ed", "9e1bcd6414fc6fdd3b63aab48cc3732dc761f538", "7997ca17d5aa13ef1f6fabfbc7f7059045f2dd42", "1ac58ab550f1f8f075b373211d76371d52979ce6", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "390b144c7810836017f542a2dd21047c2932f80e", "6d59f58f7408362036196048c9ba11f399dd9bc2", "e33b4c25099cc8361c5984da4b3ae95356d9ce01", "31478e07f1599d9f9adba8d598bcaa54455e9015", "7810983269280d25ce9530598a9ea9c922fc07f2", "4eb6f9ffbd0e22d0943f52f18c32cb91b972a2a1", "5e360dae24e4ceff57bea7ab59e5aae83b1e2d8a", "2f35c2bf57242f5a755ac82635605100c14319da", "edc1d47954491278aba4336ab08bfee6cdc564ee", "38686aacff97078cca6a93c82145390712c5a1c4", "023f23c300804754753cb11db51fb7f582556ab7", "2e61fc82bcbdeaa0f8778d51c166e904c04ed34e", "01003069aeadff1f36ccdf3f63e1dcf54ff8050e" ], "paperAbstract": "Mobile apps are notorious for collecting a wealth of private information from users. Despite significant effort from the research community in developing privacy leak detection tools based on data flow tracking inside the app or through network traffic analysis, it is still unclear whether apps and ad libraries can hide the fact that they are leaking private information. In fact, all existing analysis tools have limitations: data flow tracking suffers from imprecisions that cause false positives, as well as false negatives when the data flow from a source of private information to a network sink is interrupted; on the other hand, network traffic analysis cannot handle encryption or custom encoding. We propose a new approach to privacy leak detection that is not affected by such limitations, and it is also resilient to obfuscation techniques, such as encoding, formatting, encryption, or any other kind of transformation performed on private information before it is leaked. Our work is based on blackbox differential analysis, and it works in two steps: first, it establishes a baseline of the network behavior of an app; then, it modifies sources of private information, such as the device ID and location, and detects leaks by observing deviations in the resulting network traffic. The basic concept of black-box differential analysis is not novel, but, unfortunately, it is not practical enough to precisely analyze modern mobile apps. In fact, their network traffic contains many sources of non-determinism, such as random identifiers, timestamps, and server-assigned session identifiers, which, when not handled properly, cause too much noise to correlate output changes with input changes. The main contribution of this work is to make black-box differential analysis practical when applied to modern Android apps. In particular, we show that the network-based non-determinism can often be explained and eliminated, and it is thus possible to reliably use variations in the network traffic as a strong signal to detect privacy leaks. We implemented this approach in a tool, called AGRIGENTO, and we evaluated it on more than one thousand Android apps. Our evaluation shows that our approach works well in practice and outperforms current state-of-the-art techniques. We conclude our study by discussing several case studies that show how popular apps and ad libraries currently exfiltrate data by using complex combinations of encoding and encryption mechanisms that other approaches fail to detect. Our results show that these apps and libraries seem to deliberately hide their data leaks from current approaches and clearly demonstrate the need for an obfuscation-resilient approach such as ours.", "pdfUrls": [ "http://cs.ucsb.edu/~yanick/publications/2017_ndss_agrigento.pdf", "http://www.cs.ucsb.edu/~vigna/publications/2017_NDSS_Agrigento.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/obfuscation-resilient-privacy-leak-detection-mobile-apps-through-differential-analysis/", "https://seclab.cs.ucsb.edu/media/uploads/papers/2017_ndss_agrigento.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/482e/01ba5d29de96842c3e3daebcbad29945e4c0.pdf", "s2Url": "https://semanticscholar.org/paper/482e01ba5d29de96842c3e3daebcbad29945e4c0", "sources": [ "DBLP" ], "title": "Obfuscation-Resilient Privacy Leak Detection for Mobile Apps Through Differential Analysis", "venue": "NDSS", "year": 2017 }, "4842b30c6357f66cab16694130d7cdc5275cf79e": { "authors": [ { "ids": [ "1924076" ], "name": "Ping Li" } ], "doi": "10.1145/3097983.3098081", "doiUrl": "https://doi.org/10.1145/3097983.3098081", "entities": [ "Approximation algorithm", "Basis function", "Experiment", "Google Map Maker", "Kernel (operating system)", "Machine learning", "Nonlinear system", "Radial basis function", "Radial basis function kernel", "Simulation" ], "id": "4842b30c6357f66cab16694130d7cdc5275cf79e", "inCitations": [ "71ea4bdcd1210829fd66fcf62d3bb80ded0a8cde", "f87fb09abb46b126e6d4d1a1565d6d38d7c444ae", "17f44642b41ccb651320b87480b0e85f26d4064f", "03b7833f41fb317edd94fc945089c0528fcd8f01", "0b47e24b7aa12b2ec65abf76b70984d9836c3635" ], "journalName": "", "journalPages": "315-324", "journalVolume": "", "outCitations": [ "3e69456017e04b9a0ee915e815216d314383068c", "2e497d93d1c1aceb246408696fc5e505df7b0d9c", "4a156f0ea13681c2997f46bef3718a09ac63f02d", "74bee1ebf204dba4b2da0399a25a5ac9253a824e", "29e3932f02a0c308c618e5ce1da075208e385f21", "f68969eac758998c5bceedcd6f73499d23c62f07", "263566f17ff838ccec66103e2883e4f1afba3a58", "4e171856b5eac3a2bf7ebc1c243d9937b55a09bc", "7f47a319c9bf0a5e0e9246354f64e52e39356c96", "46e5e9b7c6ee743ae1ab3120dd852b4183af2302", "94a0af72b9ed4891404ca412698652b3999e8ce6", "6d85dda87a18d6b5a5a1bdd3019cb31396aaa7c9", "1b20afbd2d2a349737ed3dc246e44bbdba203190", "d66ee4f4f46d24344ec1bec7624c56a2878d8db2", "6dae9e710a986a2426f7c493e66e90c649848e02", "87c8eccb221db9f33fdf9c8a6a17498e18045b4a", "034c8c60a10d09a0b28ca929a9349cb3c0466b8b", "1beb13b72b32db4eaef31b2417d11bd488e88603", "24c9b0b05c5e957e255b854f947472f9181772a4", "02be82b6567135493b9bb51573496114465c1533", "74034df1bcd0d9d89479858196d1a4e8368d3777", "17f44642b41ccb651320b87480b0e85f26d4064f", "89ba1fccbf764bbc464796eb546338315c810570", "534f6ea4ce0127e5da7f1cafb6334b59ad15b83f", "a4fb3bb18245c5b9f071ee4627df76b4c147f2ce" ], "paperAbstract": "The method of \"random Fourier features (RFF)\" has become a popular tool for approximating the \"radial basis function (RBF)\" kernel. The variance of RFF is actually large. Interestingly, the variance can be substantially reduced by a simple normalization step as we theoretically demonstrate. We name the improved scheme as the \"normalized RFF (NRFF)\", and we provide a technical proof of the asymptotic variance of NRFF, as validated by simulations.\n We also propose the \"generalized min-max (GMM)\" kernel as a measure of data similarity, where data vectors can have both positive and negative entries. GMM is positive definite as there is an associate hashing method named \"generalized consistent weighted sampling (GCWS)\" which linearizes this (nonlinear) kernel. We provide an extensive empirical evaluation of the RBF and GMM kernels on more than 50 datasets. For a majority of the datasets, the (tuning-free) GMM kernel outperforms the best-tuned RBF kernel.\n We then conduct extensive classification experiments for comparing the linearized RBF kernel using NRFF with the linearized GMM kernel using GCWS. We observe that, in order to reach a similar accuracy, GCWS typically requires substantially fewer samples than NRFF, even on datasets where the original RBF kernel outperforms the original GMM kernel. As the training, storage, and processing costs are directly proportional to the sample size, our experiments can help demonstrate that GCWS would be a more practical scheme for large-scale machine learning applications.\n The empirical success of GCWS (compared to NRFF) can also be explained theoretically, from at least two aspects. Firstly, the relative variance (normalized by the squared expectation) of GCWS is substantially smaller than that of NRFF, except for the very high similarity region (where the variances of both methods approach zero). Secondly, if we are allowed to make a general model assumption on the data, then we can show analytically that GCWS exhibits much smaller variance than NRFF for estimating the same object (e.g., the RBF kernel), except for the very high similarity region.", "pdfUrls": [ "http://stat.rutgers.edu/home/pingli/papers/GMM_NRFF.pdf", "http://www.stat.rutgers.edu/home/pingli/papers/GMM_NRFF.pdf", "http://statistics.rutgers.edu/home/pingli/papers/KDD17_GMM_final.pdf", "http://doi.acm.org/10.1145/3097983.3098081" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4842b30c6357f66cab16694130d7cdc5275cf79e", "sources": [ "DBLP" ], "title": "Linearized GMM Kernels and Normalized Random Fourier Features", "venue": "KDD", "year": 2017 }, "485f765b40b3305aa4b7229f8f1236a381d9ba7b": { "authors": [ { "ids": [ "2889338" ], "name": "Yanpeng Yang" }, { "ids": [ "2878256" ], "name": "Bruno C. d. S. Oliveira" } ], "doi": "10.1145/3133871", "doiUrl": "https://doi.org/10.1145/3133871", "entities": [ "Automated theorem proving", "Coq (software)", "Dependent type", "Programming language", "Programming language theory", "Pure type system", "Subject reduction", "Type system", "Typing" ], "id": "485f765b40b3305aa4b7229f8f1236a381d9ba7b", "inCitations": [], "journalName": "PACMPL", "journalPages": "47:1-47:26", "journalVolume": "1", "outCitations": [ "73f3200df6ad790d16ffac9538bb5471c8e5a35c", "1ad2415dc5c54a0b4d259922951973dad7d58a2a", "088139e306c11a042a6409e12744ac913e5b6daf", "245b2a2f5c7a15463ad39548ae7f1a301663134a", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "7be95dbe21c556707aec299771e8b68b0347b3ee", "4cad9cdeefd287f044967735ea5b4ef7620cb619", "0230c3f0fe50e3b348c85cf71ddb0b776c27dd75", "00aecbb15992ccfbaa19b6193e92317a345f6b65", "15406a5c52cb7d0bc920850ca6a9962b0ac5126c", "034febbdb6ea25516b50415595af245f0631b538", "1bdfbe727bd0e5815363418ba024d66e6ee7841b", "17db237c8d54b0701ddac961860ef91ee6e2c0e8", "0f8671f461b4966851b935dfc1c7daccbf962c32", "184e9846b86c0d95a104fe15ffc78b23684ff7d9", "3433466b0c01b8a3b47d9cd21de96dde6df31f21", "021e1cf955d07d44aceb18fffbd13cd4d3c623a7", "460df3bc1927d63f6dff8a8df1a77480a8f04f32", "0775660f6b029be1085573cf95b63546cd24b06a" ], "paperAbstract": "In recent years dependent types have become a hot topic in programming language research. A key reason why dependent types are interesting is that they allow unifying types and terms, which enables both additional *expressiveness* and *economy of concepts*. Unfortunately there has been much less work on dependently typed calculi for object-oriented programming. This is partly because it is widely acknowledged that the combination between dependent types and subtyping is particularly challenging. \nThis paper presents Î\u00bb Iâ\u0089\u00a4, which is a dependently typed generalization of System Fâ\u0089\u00a4. The resulting calculus follows the style of Pure Type Systems, and contains a single unified syntactic sort that accounts for expressions, types and kinds. To address the challenges posed by the combination of dependent types and subtyping, Î\u00bb Iâ\u0089\u00a4 employs a novel technique that unifies *typing* and *subtyping*. In Î\u00bb Iâ\u0089\u00a4 there is only a judgement that is akin to a typed version of subtyping. Both the typing relation, as well as type well-formedness are just special cases of the subtyping relation. The resulting calculus has a rich metatheory and enjoys of several standard and desirable properties, such as *subject reduction*, *transitivity of subtyping*, *narrowing* as well as standard *substitution lemmas*. All the metatheory of Î\u00bb Iâ\u0089\u00a4 is mechanically proved in the Coq theorem prover. Furthermore, (and as far as we are aware) Î\u00bb Iâ\u0089\u00a4 is the first dependently typed calculus that completely subsumes System Fâ\u0089\u00a4, while preserving various desirable properties.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133871", "http://i.cs.hku.hk/~bruno/papers/oopsla17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/485f765b40b3305aa4b7229f8f1236a381d9ba7b", "sources": [ "DBLP" ], "title": "Unifying typing and subtyping", "venue": "PACMPL", "year": 2017 }, "48774f39d93cf16535318e874227688658c4ccee": { "authors": [ { "ids": [ "25274194" ], "name": "Qingquan Song" }, { "ids": [ "2435190" ], "name": "Xiao Huang" }, { "ids": [ "2666978" ], "name": "Hancheng Ge" }, { "ids": [ "1697232" ], "name": "James Caverlee" }, { "ids": [ "1687568" ], "name": "Xia Hu" } ], "doi": "10.1145/3097983.3098007", "doiUrl": "https://doi.org/10.1145/3097983.3098007", "entities": [ "Data structure", "Streaming media", "Velocity" ], "id": "48774f39d93cf16535318e874227688658c4ccee", "inCitations": [ "2fee7d8c82a474a4d7236cdf7fe289af0886cc1a", "b0247214a06a85826be57191ddd81d7c30d2002d", "d139b0579d2279b4fbad81498b835c40afbeb8c7" ], "journalName": "", "journalPages": "435-443", "journalVolume": "", "outCitations": [ "188f4d9b9d580d0432056b760b3372ec83543d1d", "61aaa82a353b269efe3d8a6ee1db248b0aac0727", "74526f547c01812e7d256a622c3515bbde3b554e", "e23dd37582dfc31c25a5df644e3d08986c650182", "b83e42b63576c4667fad83dd83830a1093ffefb8", "0c1733a308b265ea310116cba78956bd6cd71b70", "224ee20c67b4dccf664595dc5f9fabcad0defe9a", "7b4a09d7676124889d823bca8bea33e9a6cbcd27", "f59019768cc6e198b73d52adcfaa0d26067a6e28", "70560383cbf7c0dc5e9be1f2fd9efba905377095", "a6a53b783ec3e01f91696b6ec846e3aac15f4a3d", "55e879bd4ae1455ae379c3f52de4b53cfd9da921", "059e06ebab437b61a9b2f34f75629a5bc6d39e3e", "023f6fc69fe1f6498e35dbf85932ecb549d36ca4", "13dc7205433523de3b1baec4b063f883693d1472", "18977c6f7abb245691f4268ccd116036bd2391f0", "0efb659b15737c76a2fc50010a694123f6c45f64", "0b2b596566529de7aa4fc2ccb426432b60ce2fb5", "32c493f25dd46f08d1cc2155d88f60c7b99ac49c", "e6f78c827993dc2dc37d66b6cfff2707cadb2447", "136bd3374e2c8cf2170762af3968c46c14dcada3", "b516d962bc42fb4828c8be2304ca051177819599", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "165c428fec7d3aac4ab6e2c9d285af92883b643c", "28e8bdce406e4ed68e836d6279bbf9f4de6cc436", "481eadc1511574a6375ae1a04e4dc1e824d09de5", "d1a6841facad41e626ca5256b4a67b193705d222", "dbf27e801a0e5281125b149ab5cc3c47382e567f", "54b04eccdc8aeca182ef7c3e302d743f797e0775", "247947bb43d193e1a7e951b973a8312af15f3ab9", "f2bdc868e33937a52c519bdf13e51a7afffbc03c", "a54b2b2b9b19c1a726ca82343270f2b88516d2a5", "10ac4bc35c4fcdc09bebbd3b46e3a1223993b894", "38b389580d774ce513284e671ff3bbcef0258de2" ], "paperAbstract": "Tensor completion has become an effective computational tool in many real-world data-driven applications. Beyond traditional static setting, with the increasing popularity of high velocity streaming data, it requires efficient online processing without reconstructing the whole model from scratch. Existing work on streaming tensor completion is usually built upon the assumption that tensors only grow in one mode. Unfortunately, the assumption does not hold in many real-world situations in which tensors may grow in multiple modes, i.e., multi-aspect streaming tensors. Efficiently modeling and completing these incremental tensors without sacrificing its effectiveness remains a challenging task due to the uncertainty of tensor mode changes and complex data structure of multi-aspect streaming tensors. To bridge this gap, we propose a Multi-Aspect Streaming Tensor completion framework (MAST) based on CANDECOMP/PARAFAC (CP) decomposition to track the subspace of general incremental tensors for completion. In addition, we investigate a special situation where time is one mode of the tensors, and leverage its extra structure information to improve the general framework towards higher effectiveness. Experimental results on four datasets collected from various real-world applications demonstrate the effectiveness and efficiency of the proposed framework.", "pdfUrls": [ "http://faculty.cse.tamu.edu/caverlee/pubs/song17kdd.pdf", "http://doi.acm.org/10.1145/3097983.3098007" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/48774f39d93cf16535318e874227688658c4ccee", "sources": [ "DBLP" ], "title": "Multi-Aspect Streaming Tensor Completion", "venue": "KDD", "year": 2017 }, "48847bcd0889a5f077de5b6041389da76b21b68f": { "authors": [ { "ids": [ "2247151" ], "name": "Navid Yaghmazadeh" }, { "ids": [ "2021637" ], "name": "Yuepeng Wang" }, { "ids": [ "1714075" ], "name": "Isil Dillig" }, { "ids": [ "1711860" ], "name": "Thomas Dillig" } ], "doi": "10.1145/3133887", "doiUrl": "https://doi.org/10.1145/3133887", "entities": [ "Database", "Database schema", "End system", "End-to-end principle", "Experiment", "Internet Movie Database (IMDb)", "Iterative refinement", "NL (complexity)", "Natural language", "Natural language processing", "Parsing", "Program synthesis", "SQL", "Type inhabitation" ], "id": "48847bcd0889a5f077de5b6041389da76b21b68f", "inCitations": [ "0a4ac94fc2434ca06f30c0caec96217e1c9896db", "47f65c165f7ccedd4c18189d4690eec5369dd9c5", "791714728fefcb067fb6b56c7f4de093d536cf00", "9ca155165434e4dfd0832e4b325c88381dc603de", "6b89be108313f5650cf58b23ade0d5f312b37190" ], "journalName": "PACMPL", "journalPages": "63:1-63:26", "journalVolume": "1", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "47c27cd2c37ba331ced0b24fba43fd917d5f6d19", "2ae9e298ee2fdbc6e241238419a27af09a2af115", "0d98bb00dfe73bc21bbe906cb3a77764c9138b47", "2f63cffb283166bb09076dffd77e2bb1f39a6d48", "2a212669c2dcf476a33bbb776d021f16ffa961ec", "19d29a59ef1b472102c540b178415489e0353b37", "05607111cf79330d56164a10d351dbf94e2cfa44", "a07ee0c6f7e3989c6c76e1bbeb090394c231a980", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "112b1448c7b309711bb8a85e282645a704a57f8f", "156011c5f03ff8da609fe5676b14991e48228037", "c72f90648299ea59977617c6456bae3bbc0dc6df", "0a3cf2777169ef0fb81205fe255eb7260bcd2c52", "18da147789c74a2633a7f7ad9d9748025ee03345", "3e68d730b678eb4994e46fb5b4edeaa2c5740ad8", "1d63a9e3751293eda942b0db2891919b3b92996c", "a5c3a864131f9e5c7e3d554b883503501a46438e", "3ec1a36e9e12a85d02adaa8ed682ee04f73ae332", "099cfdca0f11eeaf46dd6457f33caff8e8fbcb41", "0ca2d4a964db62df778c264b607d377ca9731814", "63c469ae1655cd6954e9efacbdc7253a7bdb6da7", "127fd3a2dbefc5bea84c1a682d2623c5df4f581c", "7149d00b10c8865a455d151595dd82a4880e3303", "4c556f0fdb14611302d13e008c3fd3cf80f87af2", "1c9df99cce1903d34c53025e86e72331bbfbe08f", "99cc63730e3079ed58311a4ec88f4f0c891ed61d", "6dad34fac859e5b090d55c9771a4181184cd116e", "0af1c24e00dbf342517df2f50698502e3d793ea8", "2255497a82335dbf5ef2b614bfa02317b799c621", "7c889b839e99316f749c4d4bff45ccdd7dbd46ef", "11256a3695e1313bc0989935a94ee80342e25cd1" ], "paperAbstract": "This paper presents a new technique for automatically synthesizing SQL queries from natural language (NL). At the core of our technique is a new NL-based program synthesis methodology that combines semantic parsing techniques from the NLP community with type-directed program synthesis and automated program repair. Starting with a program sketch obtained using standard parsing techniques, our approach involves an iterative refinement loop that alternates between probabilistic type inhabitation and automated sketch repair. We use the proposed idea to build an end-to-end system called SQLIZER that can synthesize SQL queries from natural language. Our method is fully automated, works for any database without requiring additional customization, and does not require users to know the underlying database schema. We evaluate our approach on over 450 natural language queries concerning three different databases, namely MAS, IMDB, and YELP. Our experiments show that the desired query is ranked within the top 5 candidates in close to 90% of the cases and that SQLIZER outperforms NALIR, a state-of-the-art tool that won a best paper award at VLDB'14.", "pdfUrls": [ "https://www.cs.utexas.edu/~ypwang/pubs/oopsla17.pdf", "http://www.cs.utexas.edu/users/isil/sqlizer.pdf", "http://doi.acm.org/10.1145/3133887" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/48847bcd0889a5f077de5b6041389da76b21b68f", "sources": [ "DBLP" ], "title": "SQLizer: query synthesis from natural language", "venue": "PACMPL", "year": 2017 }, "48a85d8120ad741f684216763d208d1428bce7cf": { "authors": [ { "ids": [ "1788674" ], "name": "Ruey-Cheng Chen" }, { "ids": [ "35154089" ], "name": "Luke Gallagher" }, { "ids": [ "1864328" ], "name": "Roi Blanco" }, { "ids": [ "1691169" ], "name": "J. Shane Culpepper" } ], "doi": "10.1145/3077136.3080819", "doiUrl": "https://doi.org/10.1145/3077136.3080819", "entities": [ "Floor and ceiling functions", "Gradient", "Gradient boosting", "Learning to rank", "Machine learning" ], "id": "48a85d8120ad741f684216763d208d1428bce7cf", "inCitations": [ "08f9750b1b53cfd7769fe2735d171295bac18796", "36d9abbbc4735a9dbd75f57f8f5602bd80d6b0b1", "3821bac3ae629cf271a119cda269f508b7f08ad5" ], "journalName": "", "journalPages": "445-454", "journalVolume": "", "outCitations": [ "222ab464a094e82dd21b9774ae67132b58075263", "0be5ae7e65fea9b56d3eed04ea15bb6bd6ed8a89", "0d97ee4888506beb30a3f3b6552d88a9b0ca11f0", "05bdc58374f15c2077f3e0ba1152b5a150f91027", "d2071c1e4a6030dc0005dbfeefdd196a8b293e84", "55da42dd2b5f0a5e8788124ec7a89a0cd93cac33", "3eae360c6ee52950f27f577aedd5f9934a04e137", "0d959afca677f60199159ed8f627f344d19e130d", "2bb585c4b9d89b095e9938f7d1d3286e4ac2076f", "1a4054cfa2001290b50dfe5e4d6a599fb5b04405", "291ac1ba6d64e16c5e898978268b83fd523be527", "628dd3515c500ab8683639042dd78ff5c8b42ed5", "40707bd624bc789d26f13734de41fa41c866a332", "1fc4a3db0443527ac78cba218f2263cd36736783", "aab092de89e16b714352ad31c88d332bd6f815af", "d6fba8d74309f90a911bd3b3ca6a95822da52bc6", "7c128d2dcf64d18cbbc3abb395a0583ba3e1fbdb", "0e67bac2937f5f53f310564efa547efd82c0371d", "23606ec8a507e74f5a8acadb66f253f1d5047718", "27a6fcbd4eb931eb828c437ba66f09d829c4870e", "266e856baab2282b305789c4e5c6ddf2a9106420", "3c24e943f25db78c17f6a6fdee1c833067fb19d8", "0df9c70875783a73ce1e933079f328e8cf5e9ea2", "f96bda0053733a8c7c1333227a6df23e55457c33", "15cb35b14344611a1849a7bf9b872d457ffbe15a", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "601de5a3cdf6d99947614ce130a16bde2f11aa03", "639c5d11e675b0287342399e2094dbe47f9e4b44", "20a1afa80ba1ecc069e389b1e36904a7017834a5", "247b97d7f79ae149461d43c661246a783441b3a0", "c6fad6b317527162aac8703bc8cb405d5661d806", "f7a34579f122b240707874b6b76dc71af23ddb0f", "684be9e9bd41d148158c64ba811c08f66b58092a" ], "paperAbstract": "Complex machine learning models are now an integral part of modern, large-scale retrieval systems. However, collection size growth continues to outpace advances in efficiency improvements in the learning models which achieve the highest effectiveness. In this paper, we re-examine the importance of tightly integrating feature costs into multi-stage learning-to-rank (LTR) IR systems. We present a novel approach to optimizing cascaded ranking models which can directly leverage a variety of different state-of-the-art LTR rankers such as LambdaMART and Gradient Boosted Decision Trees. Using our cascade model, we conclusively show that feature costs and the number of documents being re-ranked in each stage of the cascade can be balanced to maximize both efficiency and effectiveness. Finally, we also demonstrate that our cascade model can easily be deployed on commonly used collections to achieve state-of-the-art effectiveness results while only using a subset of the features required by the full model.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080819" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/48a85d8120ad741f684216763d208d1428bce7cf", "sources": [ "DBLP" ], "title": "Efficient Cost-Aware Cascade Ranking in Multi-Stage Retrieval", "venue": "SIGIR", "year": 2017 }, "48ccd54174de79b67246ac42e5348ec4a900a4b1": { "authors": [ { "ids": [ "20633351" ], "name": "Chintan Chavda" }, { "ids": [ "20683679" ], "name": "Ethan C. Ahn" }, { "ids": [ "7377464" ], "name": "Yu-Sheng Chen" }, { "ids": [ "2379012" ], "name": "Youngjae Kim" }, { "ids": [ "27522437" ], "name": "Kalidas Ganesh" }, { "ids": [ "7137077" ], "name": "Junghee Lee" } ], "doi": "", "doiUrl": "", "entities": [ "Access control", "Adversary (cryptography)", "Code injection", "Data breach", "Encryption", "Fault injection", "Information sensitivity", "Memory cell (binary)", "Non-volatile memory", "Resistive random-access memory" ], "id": "48ccd54174de79b67246ac42e5348ec4a900a4b1", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "5d389d8cfd2a3a96bdd1a8980c57dd4cc077c92e", "4b95a8a8ed760cbde3aa3c19029864d14c3e3e56", "431bbc47ce90f5e0eb8388fa80b0cc4d7c26ab76", "971fa79b8c4e34bba2e99572e277d304fda47e0c", "6e6e0ec8312957641cc11da4a3afac50ade2da61", "8c6870c1dc7d95ce523fa0b0a85a046c577c54cc", "1820a34042d6371a9e20484b0c63b698eb522a6c", "8a77a7049b278e85f9b2b2156af518c9cd8e72af", "a6465efd78d365a289acec24dffb2bad863e4332", "aeb62c33aeec9fd71f53f307a210c52d934f2eb7", "3da14037fc6e2c3dee2d6808bc2d7e933325d054", "9cc44c861e85fb27de40f2d710044e7dace0f101", "947873557360445b10e6973e058db29119f3b505", "8bdd00c6b5f6485e95341c2a738233d6f0798428", "9687ea801aeb7cbeef3be3202d68cc9d780e02dc", "29646cb9cab1f904c22736741110b6f4648d4ecf", "2835808d700c88459ff21ce31ba3c4ef02778ddb" ], "paperAbstract": "Encryption is often employed to protect sensitive information stored in memory and storage. It is the most powerful countermeasure against data breach, but it has performance overhead. As a low-cost alternative to encryption, an access-control memory (ACM) has been introduced, which integrates an access-control mechanism with memory. While ACM minimizes the performance overhead of encryption, it provides similar levels of security as to encryption method. ACM reveals information only when the access codes are correct. However, if an adversary attempts to access data directly from memory cells through a physical attack without going through a standard interface, the vulnerability could occur. This paper discusses feasibility and countermeasures for physical attacks, including fault injection attack, power analysis attack, chip modification, microprobing, and imaging for ACM. Moreover, as a concrete example of ACM, we compare the security aspects of SSDs when the write buffers in the SSDs employ ACM with emerging non-volatile memories such as STTRAM, PRAM, and RRAM.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/chavda", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-chavda.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/48cc/d54174de79b67246ac42e5348ec4a900a4b1.pdf", "s2Url": "https://semanticscholar.org/paper/48ccd54174de79b67246ac42e5348ec4a900a4b1", "sources": [ "DBLP" ], "title": "Vulnerability Analysis of On-Chip Access-Control Memory", "venue": "HotStorage", "year": 2017 }, "48e6c7035b35a3ee8b8c2e430c158bfd7102a2fe": { "authors": [ { "ids": [ "8297244" ], "name": "Mustafa Abdul Jabbar" }, { "ids": [ "32530012" ], "name": "Mohammed A. Al Farhan" }, { "ids": [ "2274654" ], "name": "Rio Yokota" }, { "ids": [ "10867102" ], "name": "David Keyes" } ], "doi": "10.1007/978-3-319-64203-1_40", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_40", "entities": [ "512-bit", "Algorithm", "Broadwell (microarchitecture)", "Computation", "Fast multipole method", "Kernel (operating system)", "Knights", "Locality of reference", "Manycore processor", "Message Passing Interface", "Multi-core processor", "Network on a chip", "Open-source software", "Parallel computing", "Scalability", "Shared memory", "Speedup", "Thread (computing)", "Tree traversal", "Xeon Phi" ], "id": "48e6c7035b35a3ee8b8c2e430c158bfd7102a2fe", "inCitations": [], "journalName": "", "journalPages": "553-564", "journalVolume": "", "outCitations": [ "13387166efd4f6d66b9ab19828855090586b16fd", "1cfab3deba5df408390bc0b085ac66563a30ec2d", "0c3ffc0728dcf07eab9ed9587bc43f10894f9b8c", "59b0e938a489511a6fe1021f79808ab917911f9c", "3eb61d9182600f9402b17a1269eba9152f13ada3", "21e05bb43446475a2034a62dc8c67dfa368ea0d4", "688384fc5e643445e835435e96b9dfcfb6598d36", "f0c68fbd5bd1fb4e70ca50a4b036ba439d94039e", "7ef7ea5ccd726b3ea49feadf132eb29cc9c5bc40", "2fe1c7a9be48f8ea8feaabf6d0ddd4ae0cee40ba", "d2652adc1e298877cfeddf93adbf5019364b3c99", "7a581293dad33644db8541d13c8d002a225463a0", "3b2c131e41a8cde6097e945b1202d08bc0aa561c" ], "paperAbstract": "Manycore optimizations are essential for achieving performance worthy of anticipated exascale systems. Utilization of manycore chips is inevitable to attain the desired floating point performance of these energy-austere systems. In this work, we revisit ExaFMM, the open source Fast Multiple Method (FMM) library, in light of highly tuned shared-memory parallelization and detailed performance analysis on the new highly parallel Intel manycore architecture, Knights Landing (KNL). We assess scalability and performance gain using task-based parallelism of the FMM tree traversal. We also provide an in-depth analysis of the most computationally intensive part of the traversal kernel (i.e., the particle-to-particle (P2P) kernel), by comparing its performance across KNL and Broadwell architectures. We quantify different configurations that exploit the on-chip 512-bit vector units within different taskbased threading paradigms. MPI communication-reducing and NUMAaware approaches for the FMM\u2019s global tree data exchange are examined with different cluster modes of KNL. By applying several algorithmand architecture-aware optimizations for FMM, we show that the N -Body kernel on 256 threads of KNL achieves on average 2.8\u00d7 speedup compared to the non-vectorized version, whereas on 56 threads of Broadwell, it achieves on average 2.9\u00d7 speedup. In addition, the tree traversal kernel on KNL scales monotonically up to 256 threads with task-based programming models. The MPI-based communication-reducing algorithms show expected improvements of the data locality across the KNL on-chip network.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/48e6c7035b35a3ee8b8c2e430c158bfd7102a2fe", "sources": [ "DBLP" ], "title": "Performance Evaluation of Computation and Communication Kernels of the Fast Multipole Method on Intel Manycore Architecture", "venue": "Euro-Par", "year": 2017 }, "4907d88f533657d7be694af4b8d79f6a8d8fd56d": { "authors": [ { "ids": [ "2801129" ], "name": "Danny Raz" }, { "ids": [ "3284150" ], "name": "Itai Segall" }, { "ids": [ "34086664" ], "name": "Maayan Goldstein" } ], "doi": "10.1145/3078468.3078481", "doiUrl": "https://doi.org/10.1145/3078468.3078481", "entities": [ "Cloud computing", "Heuristic", "Network function virtualization", "Server (computing)", "Virtual reality" ], "id": "4907d88f533657d7be694af4b8d79f6a8d8fd56d", "inCitations": [], "journalName": "", "journalPages": "1:1-1:10", "journalVolume": "", "outCitations": [ "34f88378a216784b6979d42d5c719ce976f5623d", "dd7456247989711c6da39c60517f5d2b716196e7", "eaaa087bf53bed04e1f5cefe620b1421d5803b72", "1c7ac80ae70a230eb7afc04e6afe5b082ccf810a", "c7ea13ddd27c5a5a0280ea222df8c584d9e3c434", "2e7647a07fe21c18ab5b7037de3038157338f1db", "08e3ca8c996dae7004aea31c80c73c730fc04314", "35fa8da3fd790ac62b37d9061f42e2832ec8b4c7", "e820ab0a3a07b11dcd119f7df5b683ccae68fbbe", "eefaf2da693c8c9b413afa063ec1f76e3aa519e4", "3a257a87ab5d1e317336a6cefb50fee1958bd84a", "17855de865ecfbd95834042d827fd23b46cde36b", "16b0ad00f59c462cafa4c2bec451d2b5ed044a64", "730f674eea5f2d0ba35d608c7091c3ba3fb08c04" ], "paperAbstract": "One of the main motivations for the shift to the Cloud (and the more recent shift of telco operators into NFV) is cost reduction due to high utilization of infrastructure resources. However, achieving high utilization in practical scenarios is complex since the term \"resources\" covers different orthogonal aspects, such as server CPU, storage (or disk) usage and network capacity, and the workload characterization varies over time and over different users.\n In this paper we study the placement of Virtual Machines (VMs) that implement services over the physical infrastructure, trying to understand what makes a placement scheme better than others in the overall utilization of the various resources. We show that the multidimensional case is inherently different from the single dimension case, and develop novel placement heuristics to address the specific challenges. We then show, by extensive evaluation over real data, that operators can significantly improve their resource utilization by selecting the most appropriate placement policy, according to their system specifications and the deployed services. In particular, two of our new heuristics that dynamically change the placement logic according to the amount of available (unused) resources are shown to perform very well in many practical scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078481" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4907d88f533657d7be694af4b8d79f6a8d8fd56d", "sources": [ "DBLP" ], "title": "Multidimensional resource allocation in practice", "venue": "SYSTOR", "year": 2017 }, "490d504d54f3c31a1f9a661d03d0e8fbb2b6b239": { "authors": [ { "ids": [ "2989804" ], "name": "Shasha Wen" }, { "ids": [ "3294666" ], "name": "Milind Chabbi" }, { "ids": [ "1785951" ], "name": "Xu Liu" } ], "doi": "10.1145/3037697.3037729", "doiUrl": "https://doi.org/10.1145/3037697.3037729", "entities": [ "Benchmark (computing)", "Compiler", "Computation", "Computational chemistry", "Locality of reference", "NWChem", "Operand", "Optimizing compiler", "Principle of locality", "Program optimization", "Redundancy (engineering)", "Run time (program lifecycle phase)", "Static program analysis", "Value (ethics)" ], "id": "490d504d54f3c31a1f9a661d03d0e8fbb2b6b239", "inCitations": [ "30868ee94d410fef27d3d00e423a330480eea4e3", "47b4884871be8c08c33e35438d69732b57f5129f" ], "journalName": "", "journalPages": "47-61", "journalVolume": "", "outCitations": [ "109eda1666ed4ce3ac46e272e3f4152826897b97", "073688e19290d53226404f1fb02d0d76a3906e5f", "2413e1590805bf12bd39926e58b073dda70cead6", "2392baa91358c3be090bc7939c789ab5cd4d56de", "180189c3e8b0f783a8df6a1887a94a5e3f82148b", "8ec4e02e6213c524ca20ab1100a6f2188540ccfa", "2cfe0f578b9907e98d007e379fd1db28a926b15f", "08f3113106e4b3f97af0586825631cf6442ad642", "92298bc3e72b8bfdfc42440f8aae93c4557eaacf", "0c0c2e56bb157bf0f91a555e4e6aa2f8fc7706e9", "1c3e650a8d11f0812b61fabc2abd0bc5ce1aace1", "685822d0bc60f288b9cd774ecd4d505b0311c3d0", "163093eeee2fcbf49d5f357f59fbe7cfface3cdd", "9591a06a102a2c80159f6734753b96d23aae4b50", "b3d8160562b94eed15da0c1e854a6e7f78e2aa18", "e2c992e9f3353a9c4b3ae4021629327ac63b3f90", "a0b1b8ee4a9e6ae68ce6a712ad0a66ddb4a12117", "0653e2ed9f683868cb4539eb8718551242834f6b", "715469ee7626ae11a6ac5c0a302ff2d535c9718f", "21f3f1bb95e0579a9e1a5e955d747114586f46ef", "3af0a17e79061462dbc2823f64ea188136271713", "c0251d764976b9676c24fc33459b2c1842cd3417", "72682890677496da1a98f2d4ce9396ad13997e07", "a21c972077f85d23f769c6ac4e4afa283d38de49", "ea599296134e6375316e0cf37d91f63ac6a407b3", "051d5feb423ceeaa18c05e4ad205ad6675926604", "0110c80228683bc32879efb1b2f3931421e52eb6", "bdf0919f6155c7ee22a1ec2392e94fc124fa0df6", "a65498d008a162646b8d3b5c4ce5b73f02ff5b6a", "a809e7422a38acf31fc8de4138068a24c523507c", "99a1520bc334c111ff84619a1ac376f009d0d3bf", "a8bbbde26c19e013343cca08c758bcca3f60a0d3", "4658e55e3afd28ac26b145789e3b6f71aa0789c4", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "59e69304ffdae1c79c6c2e5e13d3c7d3ee9b7e36", "00c581e956843c6e93009ea9146d69201a928888", "2042924d67accb118d6d73765c8738ed793b5166", "e98acbf54a72098136f26b15365cabb710b4ec20" ], "paperAbstract": "Complex code bases with several layers of abstractions have abundant inefficiencies that affect the execution time. Value redundancy is a kind of inefficiency where the same values are repeatedly computed, stored, or retrieved over the course of execution. Not all redundancies can be easily detected or eliminated with compiler optimization passes due to the inherent limitations of the static analysis.\n Microscopic observation of whole executions at instruction- and operand-level granularity breaks down abstractions and helps recognize redundancies that masquerade in complex programs. We have developed REDSPY---a fine-grained profiler to pinpoint and quantify redundant operations in program executions. Value redundancy may happen over time at same locations or in adjacent locations, and thus it has temporal and spatial locality. REDSPY identifies both temporal and spatial value locality. Furthermore, REDSPY is capable of identifying values that are approximately the same, enabling optimization opportunities in HPC codes that often use floating point computations. REDSPY provides intuitive optimization guidance by apportioning redundancies to their provenance---source lines and execution calling contexts. REDSPY pinpointed dramatically high volume of redundancies in programs that were optimization targets for decades, such as SPEC CPU2006 suite, Rodinia benchmark, and NWChem---a production computational chemistry code. Guided by REDSPY, we were able to eliminate redundancies that resulted in significant speedups.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037729" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/490d504d54f3c31a1f9a661d03d0e8fbb2b6b239", "sources": [ "DBLP" ], "title": "REDSPY: Exploring Value Locality in Software", "venue": "ASPLOS", "year": 2017 }, "491809aa5c65a608156baaf7e4ae319fca609366": { "authors": [ { "ids": [ "35949326" ], "name": "Minho Kim" }, { "ids": [ "2470977" ], "name": "Jaemin Lim" }, { "ids": [ "3435254" ], "name": "Hyunwoo Yu" }, { "ids": [ "1717183" ], "name": "Kiyeon Kim" }, { "ids": [ "2703779" ], "name": "Younghoon Kim" }, { "ids": [ "3123308" ], "name": "Suk-Bok Lee" } ], "doi": "", "doiUrl": "", "entities": [ "Entity", "Experiment", "Line-of-sight (missile)", "Privacy", "Simulation" ], "id": "491809aa5c65a608156baaf7e4ae319fca609366", "inCitations": [ "7aaeeb9325398a9c421afb4d714a259587fc33e8" ], "journalName": "", "journalPages": "163-176", "journalVolume": "", "outCitations": [ "a6bb7731d5f9e7c03b50cb691cd067596a572246", "36004fe08b740b6f02ea6f3023c1005717cdb0b4", "e67410d5ef6a064afd20d93650f39129d00f1a32", "88079b9ceae1981815a5f8564b7b1890094d28ab", "c1814127cf4945977277e6f069a52e56db7a92dc", "15bc1496ae89779f2e998a7da7567ec5bbd3a3a4", "0bee052af002eb197277cd222d62154c7de4ac8a", "05a926a71b0d36610360317d01d048dd2bbdad92", "e6c7a3ccfb5fdefb7b7ce886f953e34299abdcba", "3e6947ff582991d9b0e204a19e11552f7cec53d4", "1780c8d2d933081022be209eb6ef1963852f7f75", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "68152bedab7f7cf93136b75ac31b45db2278bdf8", "ce0c9434e4ac7d8deb408a058642230609f0c4e3", "bdacfa6fc5dc122cb220fe547d705511b4731bf4", "097c5c1e6c037288b28a10cab8c8abeb8d6eda10", "01a29e319e2afa2d29cab62ef1f492a953e8ca70", "2ab47454f59d9d8e55d4d8a69530562a3690794a", "1d08d8e2d8e7d15d7cf453246f357cf4f74e9429", "523c226a32361acf9c7f856c8d7e4eb8d59fe786", "c64f19846edeaf53b6e64f90b4da49f0cc9929dc" ], "paperAbstract": "Today, search for dashcam video evidences is conducted manually and its procedure does not guarantee privacy. In this paper, we motivate, design, and implement ViewMap, an automated public service system that enables sharing of private dashcam videos under anonymity. ViewMap takes a profile-based approach where each video is represented in a compact form called a view profile (VP), and the anonymized VPs are treated as entities for search, verification, and reward instead of their owners. ViewMap exploits the line-of-sight (LOS) properties of dedicated short-range communications (DSRC) such that each vehicle makes VP links with nearby ones that share the same sight while driving. ViewMap uses such LOS-based VP links to build a map of visibility around a given incident, and identifies VPs whose videos are worth reviewing. Original videos are never transmitted unless they are verified to be taken near the incident and anonymously solicited. ViewMap offers untraceable rewards for the provision of videos whose owners remain anonymous. We demonstrate the feasibility of ViewMap via field experiments on real roads using our DSRC testbeds and trace-driven simulations.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-kim-minho.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_kim_0.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_kim_0.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/kim-minho", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-kim-minho.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4918/09aa5c65a608156baaf7e4ae319fca609366.pdf", "s2Url": "https://semanticscholar.org/paper/491809aa5c65a608156baaf7e4ae319fca609366", "sources": [ "DBLP" ], "title": "ViewMap: Sharing Private In-Vehicle Dashcam Videos", "venue": "NSDI", "year": 2017 }, "492c2218f1cd089cbd336b96275e5afbc2a5b648": { "authors": [ { "ids": [ "2185466" ], "name": "Sebastian Lekies" }, { "ids": [ "39764847" ], "name": "Krzysztof Kotowicz" }, { "ids": [ "35850512" ], "name": "Samuel Gro\u00df" }, { "ids": [ "5168389" ], "name": "Eduardo A. Vela Nava" }, { "ids": [ "1935526" ], "name": "Martin Johns" } ], "doi": "10.1145/3133956.3134091", "doiUrl": "https://doi.org/10.1145/3133956.3134091", "entities": [ "Code reuse", "Comparison of JavaScript frameworks", "Content Security Policy", "Cross-site scripting", "Event (computing)", "HTML", "JavaScript", "Software documentation", "Web application" ], "id": "492c2218f1cd089cbd336b96275e5afbc2a5b648", "inCitations": [ "22c087423ace72ac02460595896b6aa307ba1071" ], "journalName": "", "journalPages": "1709-1723", "journalVolume": "", "outCitations": [ "c6b24743d3e29b2de9d146b03fdec3a18bdf6633", "533da0b2143bdee2304ebae7a22f91054d83068a", "4d00e4d162e4949ccc8803cbd984324d4fc992a2", "9084da84025cc58c51859f5e437fc80b5864f19a", "01b5b648af61ddb382da638a299fae2315b25192", "2f89183ab28b179d98f5fdf0da3f8692a35db08e", "c38ff647b9fc57eee17980221bacd040f1668bf5", "0054871fa317a7acc13e830ab6bea9af1f27a776", "1374cf2c4815cd6e389ea2860f2b70284d7a856b", "3ad52963ff247e3976f433a96659b22bb9895117", "b3dc76e3478f97b2c5bced80f4ebaa587f146b53", "552c849b53936f28302c879350694598187b2a40", "35b0d3fe937571eaf29f1473a919d397ba7141ac", "44869a0ef8ed2e584e7c2b24806e79da3339fff6", "1ffc8b184461954d05627e78480223560dd5e82d", "12823db25118d77041aad9eb4796a67915f8de2c", "6bf909ae0ab8617aa3f10bd4a15b25b986db1822", "192dc9e8618d00beb8451553d59dd391bcf53124", "7a36190f3b1d1bec1420ff3115c9ff406235742c" ], "paperAbstract": "Cross-Site Scripting (XSS) is an unremitting problem for the Web. Since its initial public documentation in 2000 until now, XSS has been continuously on top of the vulnerability statistics. Even though there has been a considerable amount of research and developer education to address XSS on the source code level, the overall number of discovered XSS problems remains high. Because of this, various approaches to mitigate XSS have been proposed as a second line of defense, with HTML sanitizers, Web Application Firewalls, browser-based XSS filters, and the Content Security Policy being some prominent examples. Most of these mechanisms focus on script tags and event handlers, either by removing them from user-provided content or by preventing their script code from executing.\n In this paper, we demonstrate that this approach is no longer sufficient for modern applications: We describe a novel Web attack that can circumvent all of theses currently existing XSS mitigation techniques. In this attack, the attacker abuses so called script gadgets (legitimate JavaScript fragments within an application's legitimate code base) to execute JavaScript. In most cases, these gadgets utilize DOM selectors to interact with elements in the Web document. Through an initial injection point, the attacker can inject benign-looking HTML elements which are ignored by these mitigation techniques but match the selector of the gadget. This way, the attacker can hijack the input of a gadget and cause processing of his input, which in turn leads to code execution of attacker-controlled values. We demonstrate that these gadgets are omnipresent in almost all modern JavaScript frameworks and present an empirical study showing the prevalence of script gadgets in productive code. As a result, we assume most mitigation techniques in web applications written today can be bypassed.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134091", "https://acmccs.github.io/papers/p1709-lekiesA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/492c2218f1cd089cbd336b96275e5afbc2a5b648", "sources": [ "DBLP" ], "title": "Code-Reuse Attacks for the Web: Breaking Cross-Site Scripting Mitigations via Script Gadgets", "venue": "CCS", "year": 2017 }, "492f7339ff5744a0d2723102828e4bac50a2cc98": { "authors": [ { "ids": [ "2573628" ], "name": "XianWei Zhang" }, { "ids": [ "1686367" ], "name": "Youtao Zhang" }, { "ids": [ "1776567" ], "name": "Bruce R. Childers" }, { "ids": [ "1724199" ], "name": "Jun Yang" } ], "doi": "10.1109/PACT.2017.34", "doiUrl": "https://doi.org/10.1109/PACT.2017.34", "entities": [ "Approximate computing", "Approximation algorithm", "Baseline (configuration management)", "Benchmark (computing)", "Computation", "Dynamic random-access memory", "Precomputed Radiance Transfer", "Scheduling (computing)", "Software bug" ], "id": "492f7339ff5744a0d2723102828e4bac50a2cc98", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "53-63", "journalVolume": "", "outCitations": [ "ab3e7d10ef80cc739b16937f3e97560a64db4b49", "d8ac0e93ab3a20ba9ada5ba5f74f441c537ed79f", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "50543dd00e1e63ed4caabd7f5f877034dc0fc33b", "61b62041b9a47d72867e21ddd352a0a0418a3ed8", "e762b1b654798cec0fb9d6000c7f7c777ac0689f", "7815c4243d581d0f96d0dac2c6e90e01d1ce94a3", "00f04d81b2d8733b041152cc70ac36e6683f96e8", "1ac13e114099c51f86b7bc31b63cf87f4472488b", "0110c80228683bc32879efb1b2f3931421e52eb6", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "514f03f6bb853a9a6bab063029ef2c95783ffe6e", "682b7c3e34922d3cde0359a013195797b43b9309", "4e27e6f8fa154e63003a840678ede36c3151c9f2", "bb117349638a1d63be1b105bba0e152bd6c031f8", "ab6888a1b024d109c768f81b49c77b585efc975a", "07ca289c9cca46ba5b5371473a30d364967f9d13", "5effa9492e92b08d0ac7d574406e67c383fe69b6", "5b4f0788ddcf7149e398750f792d133ee57b799d", "f9cf47539216a3737f6353dca8a8f3f1e588413e", "ce3817b367f00e3304185553c416f18ee7bb961c", "03a93e4ee3e69304b6acbb90677a8682782ba31d", "f737dd91b9a97f42e1f22df8198ac6e4171fade0", "15b275f0421c606f5903532e9964b140cbb2f878", "61ea230d0e757ff46d3a381e79691bd54b92a503", "281fdbdafbf8f2bcbde46099656063e5c82ce222", "710b3d324b07197a705683af18fc417ef712d042", "4291dbdf7c32db6f61cc624cd9d725aa7ec0fb91", "703c74b035ba667afeaa0d4287641bc87d2ea12f", "52137476895005f26098678a9af934f93071b416", "03d55467b20e662fbaa8416e853f57c93834a9fb", "3c89345bb88a440096f7a057c28857cc4baf3695", "29580052d194d35e02b06eb5b9fc47e1609e7893", "00fc41f729269271aec836bb09b9c3f8c13c7c7e", "784ad659b88a1a15745d653adce1a8bba74e5172", "211a125c77da70a958d1dc9f70ecc29b9a69f796", "37b5850e3e75a3462f3991491ca26674925f233b", "4a4b39ac50c1940f1e39ae1a81c0bffeb2974c78", "3f82aa1373e823ec622b3021fff9df4a82230267", "13089a313be0836f3fa8911236250e36b970ba2a", "468035263afa59095614f26a62e0217da4a1aeed", "012d556d67acedc6898930b4c93f54b87aabf5ee", "5a830ad18ff1a45c197570065b65d212818eaef6", "1535e84c1782f930d59fd31ebdcf2a530b11f183", "745f0e779ef6d98acf7e88002c4043f68767543c", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "3af0a17e79061462dbc2823f64ea188136271713" ], "paperAbstract": "Recent studies showed that DRAM restore time degrades as technology scales, which imposes large performance and energy overheads. This problem, prolonged restore time (PRT), has been identified by the DRAM industry as one of three major scaling challenges.This paper proposes DrMP, a novel fine-grained precision-aware DRAM restore scheduling approach, to mitigate PRT. The approach exploits process variations (PVs) within and across DRAM rows to save data with mixed precision. The paper describes three variants of the approach: DrMP-A, DrMP-P, and DrMP-U. DrMP-A supports approximate computing by mapping important data bits to fast row segments to reduce restore time for improved performance at a low application error rate. DrMP-P pairs memory rows together to reduce the average restore time for precise computing. DrMP-U combines DrMP-A and DrMP-P to better trade performance, energy consumption, and computation precision. Our experimental results show that, on average, DrMP achieves 20% performance improvement and 15% energy reduction over a precision-oblivious baseline. Further, DrMP achieves an error rate less than 1% at the application level for a suite of benchmarks, including applications that exhibit unacceptable error rates under simple approximation that does not differentiate the importance of different bits.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.34", "http://people.cs.pitt.edu/~zhangyt/research/pact2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/492f7339ff5744a0d2723102828e4bac50a2cc98", "sources": [ "DBLP" ], "title": "DrMP: Mixed Precision-Aware DRAM for High Performance Approximate and Precise Computing", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "4937876603808427510230d9d7bdf15dfd686ebf": { "authors": [ { "ids": [ "40209711" ], "name": "Edward Gan" }, { "ids": [ "2740804" ], "name": "Peter Bailis" } ], "doi": "10.1145/3035918.3064035", "doiUrl": "https://doi.org/10.1145/3035918.3064035", "entities": [ "Anomaly detection", "Computation", "Kernel density estimation", "Spatial database" ], "id": "4937876603808427510230d9d7bdf15dfd686ebf", "inCitations": [ "38a96a0585e6d4c5f9fe5d326fd639bb289e69f8", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "0de36e77e0abf1efd2c7631450d7899f443ac099" ], "journalName": "", "journalPages": "945-959", "journalVolume": "", "outCitations": [ "03baeccbfe8c8cff26aacda69149a05d597880d6", "08ac4b9b82e1a9420e0036b4a419d0f90d8574fe", "4dfe724dad48aca90e4491b709971f7056ea19a3", "1e4558354f9509ab9992001340f88be9f298debe", "9c346f8a40d31d884c8d5496d1d46a4a0b1848d7", "741abfcb621d9aa19aa79e4aa5c5cddb1473b3da", "170f35e43d9abea37a970b2e3f5ac13489950afe", "332162f79ec80ee092ab122f8aee9370bc3719d4", "9ec241c5f9a3cb29468e1992a3965291dc943790", "044c1f31a27014301b5c879406275b70d62f320a", "fe286b96fc0544d9fabcbae2a600242b64df193a", "4e06596889af2933ab4a08ec81c4a545005c240b", "645cc2506af907d292933cfb75ca89688a1e9d83", "162d958ff885f1462aeda91cd72582323fd6a1f4", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "82c199eaea4d0ee821afd86881e1c3cfb25478cc", "780e5d3445dfb01d8b0e4e195669a7ad88ef492c", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "71a068afb5c36437cfd6ffd40538935fb1f9dbf0", "7550b1c4abfd8fb046847f20cac360217057c47a", "483482168f727fb851ad0076dbedbcf0600d89a9", "bcf2d9d2a382dd782a7ec26f1656ca5327781225", "23981d138a1a2309d785bedd772320a8bfa55fd4", "6d8bb0b21eedbf45029449bdedfa36836c75c00e", "13a375a84a6c414b85477a401541d3e28db1e11a", "0c4867f11c9758014d591381d8b397a1d38b04a7", "e69d9f792fa7f3851e321cc7c973d3ecb0b0c66d", "bc6abce994bfc38aa07ddc6d871e6e7f692be6c7", "6e6f5ee06cca0da864d25767f5eec9e86d16312f", "a65125a7447e63f46b5f0e4e9c9f3c70cf0c3de0", "36d858eb19bba43244b92f7faabfce47b13f2403", "105ffb0e5fd7ad2bae390161a28dd16f51e2b2a0", "59c704202d45792ab8ae27a26772eeaa0a4b9c5c", "2a43f9e8e9e4d6011a0287b5ef315bd24fd308e1", "0bad381b84f48b28abc1a98f05993c8eb5be747d", "8b81aedc30417e6d60586d8c34e96cbc21431396", "7bcc53f1baf3358517a602d856192faea9442c91", "1ffcb27536ab5436e6d753919ab27ac1a44b4b69" ], "paperAbstract": "Density estimation forms a critical component of many analytics tasks including outlier detection, visualization, and statistical testing. These tasks often seek to classify data into high and low-density regions of a probability distribution. Kernel Density Estimation (KDE) is a powerful technique for computing these densities, offering excellent statistical accuracy but quadratic total runtime. In this paper, we introduce a simple technique for improving the performance of using a KDE to classify points by their density (density classification). Our technique, thresholded kernel density classification (tKDC), applies threshold-based pruning to spatial index traversal to achieve asymptotic speedups over naïve KDE, while maintaining accuracy guarantees. Instead of exactly computing each point's exact density for use in classification, tKDC iteratively computes density bounds and short-circuits density computation as soon as bounds are either higher or lower than the target classification threshold. On a wide range of dataset sizes and dimensions, tKDC demonstrates empirical speedups of up to 1000x over alternatives.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064035", "http://www.bailis.org/papers/tkdc-sigmod2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4937876603808427510230d9d7bdf15dfd686ebf", "sources": [ "DBLP" ], "title": "Scalable Kernel Density Classification via Threshold-Based Pruning", "venue": "SIGMOD Conference", "year": 2017 }, "4966a62deece78449e1fc8c00879fc20e558f7b3": { "authors": [ { "ids": [ "2997470" ], "name": "Shashank Gugnani" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "2712614" ], "name": "Franco Pestilli" }, { "ids": [ "1789919" ], "name": "Cesar F. Caiafa" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/HiPC.2017.00033", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00033", "entities": [ "Computational neuroscience", "Computer science", "Computer simulation", "Connectome", "Message Passing Interface", "OpenMP", "Parallel computing", "Program optimization", "Programming paradigm", "Sparse matrix", "Supercomputer", "Tucker decomposition" ], "id": "4966a62deece78449e1fc8c00879fc20e558f7b3", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "213-222", "journalVolume": "", "outCitations": [ "e27e2cfad056e097df9b384fd4c956b9b0577869", "c056768243a86993737b190ae8f947f409c951af", "2e1f82853405e95b455ec63930f46d3ca9e03741", "38c8607917f4d733b7985d6c73865c3de74d04fa", "8ba9f108555bc50a2d0468334d19d268320519c4", "d65dc6450c2a958064715cdbfdd2929cc6473b04", "e941ef269a90a2fedcb248f98e9185ed477ee16b", "9cc330d3be2109510935e9d3fa7d01e1000e712e", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "b644c3b0da99f729728cf224d3c385e33f4d9193", "cd1b19d016ef46429a868d99cda27d3565996a21", "622c84cfba9781ad846105f28d7bf69c5405a481", "5cba0d330a947249af144b0402fe037a0c7166cf", "9cd10380592c645a6367111f1ccdfe28c8c97230", "1915d4717bbb20849e733f711d194d198df45fe5", "028d0ec1ba0ce2c47901f483afad084f2a298c01", "0541d5338adc48276b3b8cd3a141d799e2d40150", "2009575a7ae3e42a3d86214e90360e5d27aadb82", "61cbca5e7682d16094b29f5b485d641cb0793b79", "0599d32b285cf73059f09662151537308cb9635c", "437111b1534cce8d96a1f6b42223b9356c6d33bd", "3f30b25fce16664accab00f54a27d4e8a6d09b01", "a5895946af933fc3fb32f7e975a35ded0b63d619", "09eee808ba9adeefa287324e7becac83a5827081" ], "paperAbstract": "In this paper, we combine high-performance computing science with computational neuroscience methods to show how to speed-up cutting-edge methods for mapping and evaluation of the large-scale network of brain connections. More specifically, we use a recent factorization method of the Linear Fascicle Evaluation model (i.e., LiFE [1], [2]) that allows for statistical evaluation of brain connectomes. The method called ENCODE [3], [4] uses a Sparse Tucker Decomposition approach to represent the LiFE model. We show that we can implement the optimization step of the ENCODE method using MPI and OpenMP programming paradigms. Our approach involves the parallelization of the multiplication step of the ENCODE method. We model our design theoretically and demonstrate empirically that the design can be used to identify optimal configurations for the LiFE model optimization via ENCODE method on different hardware platforms. In addition, we co-design the MPI runtime with the LiFE model to achieve profound speed-ups. Extensive evaluation of our designs on multiple clusters corroborates our theoretical model. We show that on a single node on TACC Stampede2, we can achieve speed-ups of up to 8.7x as compared to the original approach.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00033" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4966a62deece78449e1fc8c00879fc20e558f7b3", "sources": [ "DBLP" ], "title": "MPI-LiFE: Designing High-Performance Linear Fascicle Evaluation of Brain Connectome with MPI", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "49751156296944b2e33135e561bc348a3616e2ab": { "authors": [ { "ids": [ "3379338" ], "name": "Daniele Rogora" }, { "ids": [ "3167730" ], "name": "Michele Papalini" }, { "ids": [ "1959730" ], "name": "Koorosh Khazaei" }, { "ids": [ "2866882" ], "name": "Alessandro Margara" }, { "ids": [ "1807579" ], "name": "Antonio Carzaniga" }, { "ids": [ "3292643" ], "name": "Gianpaolo Cugola" } ], "doi": "10.1145/3064176.3064190", "doiUrl": "https://doi.org/10.1145/3064176.3064190", "entities": [ "Central processing unit", "Experiment", "Graphics processing unit", "Information processing", "Publish\u2013subscribe pattern", "Relational database management system", "Routing", "Social media", "Stream processing", "Subset sum problem", "Throughput" ], "id": "49751156296944b2e33135e561bc348a3616e2ab", "inCitations": [ "6b527d979475082e28dcf74a6f44f083d2594834" ], "journalName": "", "journalPages": "513-526", "journalVolume": "", "outCitations": [ "07d50d2da06698144a1de053d73a31d822ca1b12", "504e48ebe922a3a2130bb6ef9f4d71be85654f22", "2bc133545a309095df2078f8c62bd2e9a31883a0", "61f5bd13750025d6c75b55d71841ea75c7e8794c", "0fff0f43d0b4466db16d7cd51672f9c5861c70f7", "0706356c9ab6014d6b04577d38289ea8328291a5", "347d29ba62285af59b752b53916aa5b5f48031ef", "9828fe888d14bca6dd4577a104451dc2ef64f48c", "3b26f8c12f623ba477ae322762a530766b4a7614", "54c7caede820e8ab72416b34e73d6fc30b07748c", "0a5fd4d877fa683497f3f1556937dec27828ca7a", "e80f1b5dc27db93b071bf60a23a9fdcc4fbef282", "7819281b7ada8df0224651cbe8c01c0f0bba110e", "7ba2979c5ce08ac5e3c21171e146fdfea8b216b4", "b55ca85ea650f96734b6e02a5b904dea2cba6bda", "0437e781bf22d47f3a13cca1e27eca6ae91d3f41", "10fa198c6402f827f419388fd2b4c171a93a0111", "b2ab1ef321654692bc72fe1b345abcfe31ce77d3", "8e1852352d66f66e16c3f2028d739cb4420320e8", "0ff43591f0e398b98b3a579af74ed01caba8d0c6", "15ad5d98bf1c5b0341ef42bc7c25cefccdb3772c", "281f5be3b32f31dfe3974f645ba5c253e6f072cd", "62262d8ee5f03675b3f231e9e2abce8a7411ca39", "52e5409757878eccbf74ad3439d661ab18572e3a" ], "paperAbstract": "Large-scale information processing often relies on subset matching for data classification and routing. Examples are publish/subscribe and stream processing systems, database systems, social media, and information-centric networking. For instance, an advanced Twitter-like messaging service where users might follow specific publishers as well as specific topics encoded as tag sets must join a stream of published messages with the users and their preferred tag sets so that the user tag set is a subset of the message tags.\n Subset matching is an old but also notoriously difficult problem. We present TagMatch, a system that solves this problem by taking advantage of a hybrid CPU/GPU stream processing architecture. TagMatch targets large-scale applications with thousands of matching operations per seconds against hundreds of millions of tag sets. We evaluate TagMatch on an advanced message streaming application, with very positive results both in absolute terms and in comparison with existing systems. As a notable example, our experiments demonstrate that TagMatch running on a single, commodity machine with two GPUs can easily sustain the traffic throughput of Twitter even augmented with expressive tag-based selection.", "pdfUrls": [ "http://www.people.usi.ch/rogord/eurosys_pres.pdf", "http://www.people.usi.ch/rogord/rpkmcc_eurosys.pdf", "http://doi.acm.org/10.1145/3064176.3064190" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/49751156296944b2e33135e561bc348a3616e2ab", "sources": [ "DBLP" ], "title": "High-Throughput Subset Matching on Commodity GPU-Based Systems", "venue": "EuroSys", "year": 2017 }, "4983002656dc35c6a1d0ce39eb56c70d3e55b7fd": { "authors": [ { "ids": [ "26729512" ], "name": "Hang Liu" }, { "ids": [ "1744674" ], "name": "H. Howie Huang" } ], "doi": "", "doiUrl": "", "entities": [ "Asynchronous I/O", "Bitmap", "Graph (abstract data type)", "Graphene", "In-memory database", "In-memory processing", "Programming model", "Scalability" ], "id": "4983002656dc35c6a1d0ce39eb56c70d3e55b7fd", "inCitations": [ "c7e2c4bea500ea7926a50973d861f01bb8e5e364", "a61e5aab233900f6febae67471e198eb3bb89e3a", "a038202963e55feb5f7a41ed3ec6d7073beec6b9", "356100b33d589bb48fa1a6518a85efb551a13d9b", "5a4655bb21fed59e0a1eaa6eb7d31c00be1c3f84", "ab77fa0fcdb5882b9cc992a4d870bc1ebf69cf5d", "3b57c7bcece47f2a3198e6adec38f712f2914be5" ], "journalName": "", "journalPages": "285-300", "journalVolume": "", "outCitations": [ "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "caacd536fa218ef5218021506ebc041e3f460064", "2b9e6181502369199bd89691a27f89bdbaac36e4", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "0eff3eb68ae892012f0d478444f8bb6f50361be5", "1156f60e40548096df49528b1342bb3e88b0f378", "027485f716ca4f6d9ee2e189790d6560e37fcab2", "2a30b4cb56853002133311372ce8313b14fba158", "8e67d1085da29e5aa1e758751bfa5469ac07023e", "1e27b9b447cebd5047050e39bb9246fa6364b760", "0ea110472ee018a8034898588c9bdede1e0c8df8", "6536b5743e53c00bb1600f954959ae00dc24da98", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "11b93c5f176431efa72208d8b4c4c88d46261695", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "eb82d3035849cd23578096462ba419b53198a556", "e1e066a860978918808db9ba2bc6a2dff63a1455", "3486aeaf540c48952120fe853d672af984f40a6a", "175d795f44037ef60dd9df341701cd5fdc449f1f", "131ec93c0751b6cdeeff4a5d62a7e4810d06f0de", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "0f34ea8535dc5833a1a3692ffc7abc6740d2406a", "98921bac0c208cd0d2fcd51101902f51ab88416f", "0ee5abec0c7002c759d70e4d75921b65a6d8666a", "de5bd35339e5692002a77145d8b861940429ad77", "e2462bde978023a9069cc08326f626135a95cb89", "41880f9408bf4d826e4a715ee783e2d9d8666c2f", "31ffb232b5c1186bb90502254162ac3d99baf50b", "48930aa2539b12d60352283dd4f91c845cf9b69c", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "0608d9937c074520cdc93cc444cc1c77039c5332", "3baecc04e1341cbae7999e8f61a3946c76504828", "5e49e7a0a6c6d46a368a4c036bc9e89a0ac4edd5", "3d4145d8b555d27a78fdea734fe712121dc86526", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "225ca2f92481b253310686a7b6c40032bde507ea", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "0ad8e89091eed09217e66adc98136126addc2619", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "141004dee9e799b40bfaf50b4a72618613137250", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "7ebb9fad71ce8e08d5284b7644a5452cff6c75b3", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "282bc59faefb734137d2ea978cb1eb5699e67c7c" ], "paperAbstract": "As graphs continue to grow, external memory graph processing systems serve as a promising alternative to inmemory solutions for low cost and high scalability. Unfortunately, not only does this approach require considerable efforts in programming and IO management, but its performance also lags behind, in some cases by an order of magnitude. In this work, we strive to achieve an ambitious goal of achieving ease of programming and high IO performance (as in-memory processing) while maintaining graph data on disks (as external memory processing). To this end, we have designed and developed Graphene that consists of four new techniques: an IO request centric programming model, bitmap based asynchronous IO, direct hugepage support, and data and workload balancing. The evaluation shows that Graphene can not only run several times faster than several external-memory processing systems, but also performs comparably with in-memory processing on large graphs.", "pdfUrls": [ "http://www2.seas.gwu.edu/~howie/publications/Graphene_FAST17.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/liu", "http://www.usenix.org./system/files/conference/fast17/fast17-liu.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-liu.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4983/002656dc35c6a1d0ce39eb56c70d3e55b7fd.pdf", "s2Url": "https://semanticscholar.org/paper/4983002656dc35c6a1d0ce39eb56c70d3e55b7fd", "sources": [ "DBLP" ], "title": "Graphene: Fine-Grained IO Management for Graph Computing", "venue": "FAST", "year": 2017 }, "4995be654e5e7262dfd225d88b00dd0ea3de0440": { "authors": [ { "ids": [ "1851521" ], "name": "Shijie Jia" }, { "ids": [ "2519139" ], "name": "Luning Xia" }, { "ids": [ "32246333" ], "name": "Bo Chen" }, { "ids": [ "1687577" ], "name": "Peng Liu" } ], "doi": "10.1145/3133956.3134011", "doiUrl": "https://doi.org/10.1145/3133956.3134011", "entities": [ "Adobe Flash", "Adversary (cryptography)", "Cryptanalysis", "Deniable encryption", "Encryption", "FTL: Faster Than Light", "Flash file system", "Flash memory", "Flash memory controller", "Information sensitivity", "Mobile operating system", "Open-source software", "Operating system" ], "id": "4995be654e5e7262dfd225d88b00dd0ea3de0440", "inCitations": [ "6934305a246cc0b5776dcdea2030584eb7a0f274" ], "journalName": "", "journalPages": "2217-2229", "journalVolume": "", "outCitations": [ "14b639045731afd3762bcc61723f866724edc2f0", "0a806d8e11c4b95a23d0190ad75c198e377f0648", "07d799dfb834fab9059ae57d86c0ab772faa11b3", "a2df88792bb8d037e4b75002b6f992b2270b853d", "5766c96a6e758c8e190bf469c926a39840600c50", "375d954d4c5078e39155e69444d680a8da13742e", "37d719cd1dd3ffc4530e9440ebbc3b80479fd52f", "421d0602f6bb86a77a1e676f777cbb95c54ad1f8", "1f13dfc43e4e68649ebbaa59f093c8f0f0975ee2", "a148531d024871b24a0442a92b107702c3486140", "57c672e8b0bd70233ebc96c10905a5ff2f2b75b3", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "1526d412d7bdb83dcafadd1c28cf8b4c7e4f130d", "9c4b9a86c6beba0b88828e674ea809aee70641eb", "20825b909784e4941b65675fd78724ce202ba4da", "249ff3aefc661bd8dd17f537ea06f935cb248b1e", "3610bf2e6be4197502a79d80f72d5ce095b4d709", "024e9ee7a8220451f03da763307f2c49dee321bd", "3f8dbd4a5e05ab90f59442b77a5f6b8afe8ee561", "b4b26e52580d7eeb0ccbc8e5529e34a831bc4e65", "61ea7ef665186310a24af134441de0a18b6c351e", "16a7e034890b4fdf870dfe4cc806de636787798e", "00ecd7b2e0c364ce4e9f5416ee1dbeaeabe87a62", "54ade44a1a66bec6caa6d60c30125019ca514b6e" ], "paperAbstract": "Mobile devices today have been increasingly used to store and process sensitive information. To protect sensitive data, mobile operating systems usually incorporate a certain level of encryption to protect sensitive data. However, conventional encryption cannot defend against a coercive attacker who can capture the device owner, and force the owner to disclose keys used for decrypting sensitive information. To defend against such a coercive adversary, Plausibly Deniable Encryption (PDE) was introduced to allow the device owner to deny the very existence of sensitive data stored on his/her device. The existing PDE systems, built on flash storage devices, are problematic, since they either neglect the special nature of the underlying storage medium (which is usually NAND flash), or suffer from deniability compromises.\n In this paper, we propose DEFTL, a Deniability Enabling Flash Translation Layer for devices which use flash-based block devices as storage media. DEFTL is the first PDE design which incorporates deniability to Flash Translation Layer (FTL), a pervasively deployed \"translation layer\" which stays between NAND flash and the file system in literally all the computing devices. A salient advantage of DEFTL lies in its capability of achieving deniability while being able to accommodate the special nature of NAND flash as well as eliminate deniability compromises from it. We implement DEFTL using an open-source NAND flash controller. The experimental results show that, compared to conventional encryption which does not provide deniability, our DEFTL design only incurs a small overhead.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134011" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4995be654e5e7262dfd225d88b00dd0ea3de0440", "sources": [ "DBLP" ], "title": "DEFTL: Implementing Plausibly Deniable Encryption in Flash Translation Layer", "venue": "CCS", "year": 2017 }, "4997bc790d4cee11cfafdb027e94ae1d5747a981": { "authors": [ { "ids": [ "1696783" ], "name": "Jan Camenisch" }, { "ids": [ "39792258" ], "name": "Liqun Chen" }, { "ids": [ "2192968" ], "name": "Manu Drijvers" }, { "ids": [ "2259954" ], "name": "Anja Lehmann" }, { "ids": [ "1712869" ], "name": "David Novick" }, { "ids": [ "3100329" ], "name": "Rainer Urian" } ], "doi": "10.1109/SP.2017.22", "doiUrl": "https://doi.org/10.1109/SP.2017.22", "entities": [ "Cryptography", "Diffie\u2013Hellman key exchange", "Direct Anonymous Attestation", "Forward anonymity", "Key (cryptography)", "Keyboard shortcut", "Provable security", "Synchronous optical networking", "Trusted Computing", "Trusted Platform Module", "U-Prove", "X86" ], "id": "4997bc790d4cee11cfafdb027e94ae1d5747a981", "inCitations": [ "3c81ce9f0fec992bed6408a9cfa8bce54601d0de", "efdb05c4045eba65592a2265bc09003ed5953741" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "901-920", "journalVolume": "", "outCitations": [ "f770d149204f9351d123dbe852ee5c7a74a52463", "02dc2a93a48d38deae9f1369d5b33ce98af2a3f2", "c11b9dba2097d18db45e82ec48e68eb160c694df", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "d74dddc04b7c2fa2d0922b4d9865a48ba1d241be", "396a7b3289504052e115d65cf7a20ccb4e2c52ca", "03e187f67f6039c845057066b31c7c0fcd6732a3", "b023758abac2aa4885d39ceebd066d3822c0df5e", "5d9723e30c1eabd31a15a1010c1c87ab1ff13523", "8c9ce2108cfd83aee973b492cbef052cf75f61c1", "32cc3fd437950a098d6e93ae755fc6571554a955", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "4ac19d17ffbaa5430a22fa09bf236ffcb2b0457e", "0aa20fb7c3a5aa0f2af3e2a1f857bf9073ec157f", "11ef405a5ef00e402fe2f0d265f2fada864f02ad", "42333e3f231bbfe508f6da6bad2feff9ae223113", "a91e12486d718779332e084f312c76fdb9625103", "4eeb690f196fa529f7c01b281efbc8862b90ffa7", "2044ab1ce724ec11c653ed8d642a1592542d8630", "36ff3d2df8cac71f4e75637587bb2fe7bc64830e", "4634825ba407273945f4e7d10568afe99ec0a843", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "39d4af99edc754d829afaf5c1e02ea17f5a93fc2", "0df07fc5f4e09f5c9030efecf359d989d81fa36d", "86a9a7dbe71e2b5b976003223bb4d36f1f3ccacc", "d6b5642a71cdc8d2cb7a7336a4dce6f95f2f9d2b", "8e4f5a794e6c2ede4ddca1bb5fbdaa5ff8496ca6", "83998491aa362c625189fd6b4691bb8438a3150e", "69fa620d332120263317fafc41298b2a3d9b67c7", "e92e4dcd33db9cce534be837606f8dbe8f579b5c", "10865f0f01950612a64dc00e09efdd4057ebeb6e" ], "paperAbstract": "The Trusted Platform Module (TPM) is an international standard for a security chip that can be used for the management of cryptographic keys and for remote attestation. The specification of the most recent TPM 2.0 interfaces for direct anonymous attestation unfortunately has a number of severe shortcomings. First of all, they do not allow for security proofs (indeed, the published proofs are incorrect). Second, they provide a Diffie-Hellman oracle w.r.t. the secret key of the TPM, weakening the security and preventing forward anonymity of attestations. Fixes to these problems have been proposed, but they create new issues: they enable a fraudulent TPM to encode information into an attestation signature, which could be used to break anonymity or to leak the secret key. Furthermore, all proposed ways to remove the Diffie-Hellman oracle either strongly limit the functionality of the TPM or would require significant changes to the TPM 2.0 interfaces. In this paper we provide a better specification of the TPM 2.0 interfaces that addresses these problems and requires only minimal changes to the current TPM 2.0 commands. We then show how to use the revised interfaces to build q-SDH-and LRSW-based anonymous attestation schemes, and prove their security. We finally discuss how to obtain other schemes addressing different use cases such as key-binding for U-Prove and e-cash.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.22", "http://www.ieee-security.org/TC/SP2017/papers/134.pdf", "https://eprint.iacr.org/2017/639.pdf", "http://eprint.iacr.org/2017/639" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4997bc790d4cee11cfafdb027e94ae1d5747a981", "sources": [ "DBLP" ], "title": "One TPM to Bind Them All: Fixing TPM 2.0 for Provably Secure Anonymous Attestation", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "499b3d7afb464ff18a9e0aeb77f8ab507b1fa3c7": { "authors": [ { "ids": [ "3076315" ], "name": "Adam J. Aviv" }, { "ids": [ "1702836" ], "name": "Seung Geol Choi" }, { "ids": [ "2531090" ], "name": "Travis Mayberry" }, { "ids": [ "1789056" ], "name": "Daniel S. Roche" } ], "doi": "", "doiUrl": "", "entities": [ "Backup" ], "id": "499b3d7afb464ff18a9e0aeb77f8ab507b1fa3c7", "inCitations": [ "53f7a3697e3e5c620f5413b77e86488d7bf089a9", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "543", "journalVolume": "2016", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://arxiv.org/abs/1605.09779", "http://eprint.iacr.org/2016/543", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/oblivisync-practical-oblivious-file-backup-and-synchronization/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/499b3d7afb464ff18a9e0aeb77f8ab507b1fa3c7", "sources": [ "DBLP" ], "title": "ObliviSync: Practical Oblivious File Backup and Synchronization", "venue": "NDSS", "year": 2016 }, "49edf7f0dbad8b8c101af9ef95c72f62f545591e": { "authors": [ { "ids": [ "6215698" ], "name": "Junxian He" }, { "ids": [ "2749311" ], "name": "Zhiting Hu" }, { "ids": [ "1719641" ], "name": "Taylor Berg-Kirkpatrick" }, { "ids": [ "3127520" ], "name": "Ying Huang" }, { "ids": [ "1752601" ], "name": "Eric P. Xing" } ], "doi": "10.1145/3097983.3098074", "doiUrl": "https://doi.org/10.1145/3097983.3098074", "entities": [ "Algorithmic efficiency", "Calculus of variations", "Centrality", "Cubic function", "Document classification", "Experiment", "Sampling (signal processing)", "Sparse matrix", "Speedup", "Time complexity", "Topic model" ], "id": "49edf7f0dbad8b8c101af9ef95c72f62f545591e", "inCitations": [ "127405a30787c82c83a4dc28cee0cddec51ff1fc", "0a465c15bebccd1500718548b18800fd3c463ed0", "3079dfff76bcaf3673571fd7ac661d35e1634087" ], "journalName": "", "journalPages": "225-233", "journalVolume": "", "outCitations": [ "9c13b87b5efb4bb011acc89d90b15f637fa48593", "0644b1423423aac84f111cc921c71985f77eb316", "129face5f40d05de412e5ccabba726129f4020fc", "508327e33e3455342c24e740df57e2b8e114964e", "481eb978677eaae4e01639f03212fd81d1a5a448", "33efd3ecffca21efaf9d1469b7dc3d2a72a0a05e", "02c7904d986759076b6ddae1560ccba0042028bc", "bc59c1011810646429254b57e708ff001e820d0a", "9db4814c824ffbe24eeccf5453fa22ea24da22d8", "17b81a0599019406bf207284cce0ddafe9e0273f", "3769644c21140977abf85317e1c75780075fbcd4", "4a6c45ca0487ad09ba68eacbc4394d6525b3dfaf", "81a29e8a08597468e3ae707a245efbd53241c838", "094cbfa06f8374b49b84524a466a63d34c9ef34f", "db0b761ddedfaf87691e36c448cd719b31b7a97a", "4ab1d7ed09d7ac188f1f5317055f7b0abf560d14", "4bf1c5e88feca251a5e668e05b4dde5d5bf8dd6d", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "24885c5a4169b4b2b0dac7160ea24eb8e86306da", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "d743e85dc50f857dfde900af9cbd7dad2ac34003", "0675ea0d28fb515f9db05be368051befeef663da", "129397ed6557da93db5ba18cf112ee7b0a7927d8", "e56857348520a6c0ad80690b3cb8621883a32c25", "83e180e77c8b650cf2a030a4a4f3687dc1a7d328", "6426a13539e66966d63ae161fcd65288a5e23fe6", "56335341f14a8a750e242ca5600748f62a09503f", "9208ecbd7244040ba6ee59a067b527c8b095fe0a", "43db1411921c14550cf652bd5bb9ecd34cd8fcdd", "00ca2e50ae1c5f4499d9271c72466d2f9d4ae137", "10e9afe3aa298496314c450d7543f0daff9950e9", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "0fcf3ff8b6fc26cf8e45f44a0e4b974623bd9d91", "11f9ce4597878abef6a278a7eb1fa5eb010ee740", "132c8b4d0760d2d35c99b0358c8bc5a51170e5e7" ], "paperAbstract": "Correlated topic modeling has been limited to small model and problem sizes due to their high computational cost and poor scaling. In this paper, we propose a new model which learns compact topic embeddings and captures topic correlations through the closeness between the topic vectors. Our method enables efficient inference in the low-dimensional embedding space, reducing previous cubic or quadratic time complexity to linear w.r.t the topic size. We further speedup variational inference with a fast sampler to exploit sparsity of topic occurrence. Extensive experiments show that our approach is capable of handling model and data scales which are several orders of magnitude larger than existing correlation results, without sacrificing modeling quality by providing competitive or superior performance in document classification and retrieval.", "pdfUrls": [ "https://arxiv.org/pdf/1707.00206v1.pdf", "http://arxiv.org/abs/1707.00206", "http://doi.acm.org/10.1145/3097983.3098074" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/49edf7f0dbad8b8c101af9ef95c72f62f545591e", "sources": [ "DBLP" ], "title": "Efficient Correlated Topic Modeling with Topic Embedding", "venue": "KDD", "year": 2017 }, "4a2ee48574bf1fdd2a2f0d44b3f30992410f3218": { "authors": [ { "ids": [ "3490654" ], "name": "Philipp Holzinger" }, { "ids": [ "39888100" ], "name": "Ben Hermann" }, { "ids": [ "34972481" ], "name": "Johannes Lerch" }, { "ids": [ "1752222" ], "name": "Eric Bodden" }, { "ids": [ "1681134" ], "name": "Mira Mezini" } ], "doi": "10.1109/SP.2017.16", "doiUrl": "https://doi.org/10.1109/SP.2017.16", "entities": [ "Access control", "Code refactoring", "Comparison of privilege authorization features", "Confused deputy problem", "Experiment", "Hardening (computing)", "Java", "Java Class Library", "Java virtual machine", "Job Control Language", "Observable", "Privilege (computing)", "Software maintenance", "Stack-oriented programming language", "Usability", "Vector (malware)" ], "id": "4a2ee48574bf1fdd2a2f0d44b3f30992410f3218", "inCitations": [ "0bb32ad998fbac2575e16ab25ba8f9edc641d100" ], "journalName": "", "journalPages": "1027-1040", "journalVolume": "", "outCitations": [ "26fd9d028ed3557a7ffb6f3452380b41eb246b30", "1b40d2904c9137bdf36a80524ad0d3f25215a91a", "72cd420cf1f87d85b10b87fca638f33c7b28ed0b", "00815d5c9c25f69efeaa7c38ce0377fc1b3260ca", "24ef374b1ad7f1cb25db95957bb4563f0e831c53", "2f38968530d0970f7392d9d8868ff83759ae21c2", "33888bf64fa1182ab28e10da99cc2166cef41c4f", "690001d2bc3645c3fa2bb17b35c93c793e721199", "9d2cabf936ec6a003c0173008b985431da2a66a4", "00a9ba0063d34ec56792849a67ef57b4601becbb", "0cd7c4a974a2d1b35a7a500b4f58126982956ed7", "557555195d8cc631281988661984ed8c4e91b395", "88d4d0b80fdd23d384b050e9951fa76d0bb7e5b1", "9bb072299ba67badaa80a18700f770e5c41da44b", "8e865e9eab571f95b06f318593c51aea568060ac", "7536007058f529427ee9f2ce1b6b08afd5f2b777", "5493f512ba418c21f1ce20e20985157f7509007c", "195897495cfaf1890192911b52d5e761765c5fad", "ad8d021ec13f6d8f178f80d950c54ab563c202a2", "f1e394b9569f2c13c23515075f9b1e7d1fa3b00e", "0512f137ea37133028c9116f4f76f18c7d493e99", "04bae0c0934c6ba8e31e7de83027a5641489b733", "3e0dd412a6ce23ee193004e567ef43802087c651", "585f6f6b02e0e56a18745bc9240218ab3bb3bd11", "070b61be33df9504b00c233abf4f8bc17ee255ec" ], "paperAbstract": "While the Java runtime is installed on billions of devices and servers worldwide, it remains a primary attack vector for online criminals. As recent studies show, the majority of all exploited Java vulnerabilities comprise incorrect or insufficient implementations of access-control checks. This paper for the first time studies the problem in depth. As we find, attacks are enabled by shortcuts that short-circuit Java\u2019s general principle of stack-based access control. These shortcuts, originally introduced for ease of use and to improve performance, cause Java to elevate the privileges of code implicitly. As we show, this creates many pitfalls for software maintenance, making it all too easy for maintainers of the runtime to introduce blatant confuseddeputy vulnerabilities even by just applying normally semanticspreserving refactorings. How can this problem be solved? Can one implement Java\u2019s access control without shortcuts, and if so, does this implementation remain usable and efficient? To answer those questions, we conducted a tool-assisted adaptation of the Java Class Library (JCL), avoiding (most) shortcuts and therefore moving to a fully explicit model of privilege elevation. As we show, the proposed changes significantly harden the JCL against attacks: they effectively hinder the introduction of new confused-deputy vulnerabilities in future library versions, and successfully restrict the capabilities of attackers when exploiting certain existing vulnerabilities. We discuss usability considerations, and through a set of large-scale experiments show that with current JVM technology such a faithful implementation of stack-based access control induces no observable performance loss.", "pdfUrls": [ "http://bodden.de/pubs/hhl+17hardening.pdf", "https://doi.org/10.1109/SP.2017.16" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4a2e/e48574bf1fdd2a2f0d44b3f30992410f3218.pdf", "s2Url": "https://semanticscholar.org/paper/4a2ee48574bf1fdd2a2f0d44b3f30992410f3218", "sources": [ "DBLP" ], "title": "Hardening Java's Access Control by Abolishing Implicit Privilege Elevation", "venue": "IEEE Symposium on Security and Privacy", "year": 2017 }, "4a31117f8cd83ba8a4633f59bb70527320fb0394": { "authors": [ { "ids": [ "2217840" ], "name": "Djordje Jevdjic" }, { "ids": [ "1718508" ], "name": "Karin Strauss" }, { "ids": [ "1717411" ], "name": "Luis Ceze" }, { "ids": [ "1704103" ], "name": "Henrique S. Malvar" } ], "doi": "10.1145/3037697.3037718", "doiUrl": "https://doi.org/10.1145/3037697.3037718", "entities": [ "Approximation algorithm", "Areal density (computer storage)", "Bit-level parallelism", "Block cipher", "Block cipher mode of operation", "Cipher", "Cognitive dimensions of notations", "Digital rights management", "Encoder", "Encryption", "Error detection and correction", "H.264/MPEG-4 AVC", "Invariant (computer science)", "Multi-level cell", "Streaming media", "Wafer (electronics)" ], "id": "4a31117f8cd83ba8a4633f59bb70527320fb0394", "inCitations": [ "6a59d22406c5e2c5cccd3cb9b29a316294cf0f1e", "e306eb508792ab0c11ac0b3403e59060e7d0f5dd", "8faa2947d33fec55cfe4e285131d3ee31726d29b" ], "journalName": "", "journalPages": "361-373", "journalVolume": "", "outCitations": [ "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "7e99c53b9d7ed8058f1bf0b54502cb679ff9e950", "ab3e7d10ef80cc739b16937f3e97560a64db4b49", "53edf72207aa39794c6395c38bc4f1fac0f4a0c7", "52137476895005f26098678a9af934f93071b416", "5a830ad18ff1a45c197570065b65d212818eaef6", "1856f3f4184efc066cd5695785bd0cd1ee9e3d07", "8db1d284f9b620511014d53899d3b1fe99cd301f", "0f9e4c5e55310ef558bd867c90bba8dcd74000cf", "50543dd00e1e63ed4caabd7f5f877034dc0fc33b", "134986cbc54fb03900251e22d23e15b455abaf66" ], "paperAbstract": "The popularization of video capture devices has created strong storage demand for encoded videos. Approximate storage can ease this demand by enabling denser storage at the expense of occasional errors. Unfortunately, even minor storage errors, such as bit flips, can result in major visual damage in encoded videos. Similarly, video encryption, widely employed for privacy and digital rights management, may create long dependencies between bits that show little or no tolerance to storage errors.\n In this paper we propose VideoApp, a novel and efficient methodology to compute bit-level reliability requirements for encoded videos by tracking visual and metadata dependencies within encoded bitstreams. We further show how VideoApp can be used to trade video quality for storage density in an optimal way. We integrate our methodology into a popular H.264 encoder to partition an encoded video stream into multiple streams that can receive different levels of error correction according to their reliability needs. When applied to a dense and highly error-prone multi-level cell storage substrate, our variable error correction mechanism reduces the error correction overhead by half under the most error-intolerant encoder settings, achieving quality/density points that neither compression nor approximation can achieve alone. Finally, we define the basic invariants needed to support encrypted approximate video storage. We present an analysis of block cipher modes of operation, showing that some are fully compatible with approximation, enabling approximate and secure video storage systems.", "pdfUrls": [ "https://homes.cs.washington.edu/~luisceze/publications/ASPLOS17_VideoApp.pdf", "http://doi.acm.org/10.1145/3037697.3037718" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4a31117f8cd83ba8a4633f59bb70527320fb0394", "sources": [ "DBLP" ], "title": "Approximate Storage of Compressed and Encrypted Videos", "venue": "ASPLOS", "year": 2017 }, "4a732e95c9b0f9dd7ed19ee556524a7f82640f83": { "authors": [ { "ids": [ "33561372" ], "name": "Julien Herrmann" }, { "ids": [ "30233045" ], "name": "Jonathan Kho" }, { "ids": [ "2569059" ], "name": "Bora U\u00e7ar" }, { "ids": [ "2793301" ], "name": "Kamer Kaya" }, { "ids": [ "1710813" ], "name": "\u00dcmit V. \u00c7ataly\u00fcrek" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Baseline (configuration management)", "Critical path method", "Directed acyclic graph", "Execution pattern", "Experiment", "Graph (discrete mathematics)", "Heuristic", "Linear algebra", "Locality of reference" ], "id": "4a732e95c9b0f9dd7ed19ee556524a7f82640f83", "inCitations": [ "6222e2293eef57ded477b094663e273eb79961e6" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "371-380", "journalVolume": "", "outCitations": [ "2aa5bbf2ea321324c6ac763022ca9e06e1ad37e4", "606a9ba378872af47d7e4a9ef36ee33ef1d005d7", "1fe2a9ec7af750b27a1bc53abab74535daf44d90", "5970ed52ee13472729ee7403085274554b5cba2e", "378ac48cb9ecf6af6f7934370524186bbc74dc34", "2a4b55c6d86401e916d402eba48d77f37dcebaf0", "7bb8a1e2ec935b55af512a9733eccea72312f9d6", "27c0a6f1ca20306746d2d17d929044b9fd97145e", "65250c893b60e86360352d239842e6c37967b2fb", "9bf4b2e13a18c18a19bdb1930060e45608435cb0", "cc6c49137e671ee1ea5c94d818d54bc44f319972", "6c5840e20e16a81489fdb8e2c9b4657248600609", "23dadf25f3efacbc9c66f69093d656ad5b003529", "6b7f07be70390a240cbd6d5eabd08cc5aef22317" ], "paperAbstract": "Finding a good partition of a computational directed acyclic graph associated with an algorithm can help find an execution pattern improving data locality, conduct an analysis of data movement, and expose parallel steps. The partition is required to be acyclic, i.e., the inter-part edges between the vertices from different parts should preserve an acyclic dependency structure among the parts. In this work, we adopt the multilevel approach with coarsening, initial partitioning, and refinement phases for acyclic partitioning of directed acyclic graphs and develop a direct k-way partitioning scheme. To the best of our knowledge, no such scheme exists in the literature. To ensure the acyclicity of the partition at all times, we propose novel and efficient coarsening and refinement heuristics. The quality of the computed acyclic partitions is assessed by computing the edge cut, the total volume of communication between the parts, and the critical path latencies. We use the solution returned by well-known undirected graph partitioners as a baseline to evaluate our acyclic partitioner, knowing that the space of solution is more restricted in our problem. The experiments are run on large graphs arising from linear algebra applications.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101164" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4a732e95c9b0f9dd7ed19ee556524a7f82640f83", "sources": [ "DBLP" ], "title": "Acyclic Partitioning of Large Directed Acyclic Graphs", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "4a77fe1e0f42d44339bf0f690deddab56ce0aa7c": { "authors": [ { "ids": [ "2304646" ], "name": "Toshinori Araki" }, { "ids": [ "19319982" ], "name": "Assi Barak" }, { "ids": [ "1725272" ], "name": "Jun Furukawa" }, { "ids": [ "19180578" ], "name": "Tamar Lichter" }, { "ids": [ "1682750" ], "name": "Yehuda Lindell" }, { "ids": [ "2192482" ], "name": "Ariel Nof" }, { "ids": [ "2548315" ], "name": "Kazuma Ohara" }, { "ids": [ "19219245" ], "name": "Adi Watzman" }, { "ids": [ "3479789" ], "name": "Or Weinstein" } ], "doi": "10.1109/SP.2017.15", "doiUrl": "https://doi.org/10.1109/SP.2017.15", "entities": [ "AND gate", "Adversary (cryptography)", "Analysis of algorithms", "Cache (computing)", "Computation", "High-throughput computing", "Random permutation", "Secure multi-party computation", "Throughput" ], "id": "4a77fe1e0f42d44339bf0f690deddab56ce0aa7c", "inCitations": [ "3280131aeaee5d3391cff8de3cfc291049877000", "5603325eee0f5d70176860d8cc77a9a9c89289a7", "3470761bbe2c48b763a31a684abaeb8c99596b7c" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "843-862", "journalVolume": "", "outCitations": [ "796ff7cef7dcd8b9c577a86473fc1067e1078144", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "5adc94602d07e49cc1e94e2aa2b1bdf3481a47f8", "ada825ba76ae506dd30092c99af702ec3859272a", "7dd5a9a774b96ef8f551ded6418fe8adf28e8952", "d04f7f8eed11e5e58a41e314b00e49d7424d82ec", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "e50ae4d480d84c7cbdb8edcebf13e57f5a47c8ad", "a853e0842d74fa3ff146f45ea7f2ed52dac08d1a", "9fa0ee74353fd008f2fbb1f6d724437678cbf9dd", "1c07a74467c912602b33f28e90abd6eeaa60af6d", "13e622fca1a6b52aa85898e260f9455e4ba0d94b", "362246709de205ec0ac5b34e07306839c38d5a3a", "bcb49a06e4fb7ea831257e146073d84234f4d238", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "8651440f3f6e1e0d8a29564c0135673e9dd13829", "04948723dec0e6724777ee56f0d10168cce44921", "6223684e14778e4d7948e994d2169ebf38e0a95f", "23ec68ed03b485b645478a3f6905615617d905a6", "19c3736da5116e0e80a64db35afe421663c4b4a8", "94f133780f7c4b09e2513628e5cebe67c009b7d5", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "2aa24ddd5c4eea28fc3b751fb5915c01d9337626", "a9ca6a9079bcb5c513ebf63a029d7cdbb8245fa3" ], "paperAbstract": "Secure multiparty computation enables a set of parties to securely carry out a joint computation of their private inputs without revealing anything but the output. In the past few years, the efficiency of secure computation protocols has increased in leaps and bounds. However, when considering the case of security in the presence of malicious adversaries (who may arbitrarily deviate from the protocol specification), we are still very far from achieving high efficiency. In this paper, we consider the specific case of three parties and an honest majority. We provide general techniques for improving efficiency of cut-and-choose protocols on multiplication triples and utilize them to significantly improve the recently published protocol of Furukawa et al. (ePrint 2016/944). We reduce the bandwidth of their protocol down from 10 bits per AND gate to 7 bits per AND gate, and show how to improve some computationally expensive parts of their protocol. Most notably, we design cache-efficient shuffling techniques for implementing cut-and-choose without randomly permuting large arrays (which is very slow due to continual cache misses). We provide a combinatorial analysis of our techniques, bounding the cheating probability of the adversary. Our implementation achieves a rate of approximately 1.15 billion AND gates per second on a cluster of three 20-core machines with a 10Gbps network. Thus, we can securely compute 212,000 AES encryptions per second (which is hundreds of times faster than previous work for this setting). Our results demonstrate that high-throughput secure computation for malicious adversaries is possible.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.15", "http://www.ieee-security.org/TC/SP2017/papers/96.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4a77fe1e0f42d44339bf0f690deddab56ce0aa7c", "sources": [ "DBLP" ], "title": "Optimized Honest-Majority MPC for Malicious Adversaries — Breaking the 1 Billion-Gate Per Second Barrier", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "4a790de4588c147903a61a50d484444bdb4d8822": { "authors": [ { "ids": [ "1713285" ], "name": "Xiao Hu" }, { "ids": [ "1802067" ], "name": "Yufei Tao" }, { "ids": [ "1683893" ], "name": "Ke Yi" } ], "doi": "10.1145/3034786.3056110", "doiUrl": "https://doi.org/10.1145/3034786.3056110", "entities": [ "Algorithm", "Best, worst and average case", "Cartesian tree", "Computational complexity theory", "Database theory", "Join (SQL)", "MapReduce", "Parallel algorithm", "SPARK" ], "id": "4a790de4588c147903a61a50d484444bdb4d8822", "inCitations": [ "947729c7e627bc1e0109896b91effcaeab112867", "9d4f10522dc750c1a58f3ac1d105f22a560adf8d", "9c41fcd9ad733596f76108c0220cfb8a5f5e239f", "0c7b88c4ea95081e99307700d3bf7eb08e790550", "6d71d47b469d74e59a8eb71ce6980d11005cecf2" ], "journalName": "", "journalPages": "79-90", "journalVolume": "", "outCitations": [ "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "0558c94a094158ecd64f0d5014d3d9668054fb97", "a25f6ee864f0c4fd95d9ceb2f4868e9e3fe51786", "2e95819c590da31df4627657cfef6504111ad59c", "021764d0804445e0869c57314c069d07c874cb4b", "5604daae8935ef15ea000d4e97a10b7801259a61", "27746b4c0fb425ae97ba406ab5d0ad7c5a7fb4c0", "0238c4cdb18ab4df78e76a2718eec4bc42cd826f", "1163b331215f934537ca6b78b8d77ceb1f0fc139", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "3c29f6a47c955382ccbc26f258123fcce627a00b", "351df512735096126454f5d4bc8e9ae56f4cd288", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "b59740eec39f90cb53e46b93d01f43023a8822a4", "60a848d6d5716db3dac3c87cc3a3fbfd4f53b60d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "2a43f9e8e9e4d6011a0287b5ef315bd24fd308e1", "014c186a43dbaf337a2772d71714ccefb57d800a", "790595b56a2e4dd9dcd4f6db10b3d97c9312db8b", "14e5bc93f6aca3a84a55cace8825a2ac3eb2e59c", "30afaf0b5d6edd63c67346bba077da17992e8fcd", "5b8b0ca444c9efffb82d221ac01197730ebf58e6", "3bf298c72e7f9ff490e45b6d322e77c38cb4c501", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "076aee8e0b13704adb60ae212df22fea64613861" ], "paperAbstract": "Parallel join algorithms have received much attention in recent years, due to the rapid development of massively parallel systems such as MapReduce and Spark. In the database theory community, most efforts have been focused on studying worst-optimal algorithms. However, the worst-case optimality of these join algorithms relies on the hard instances having very large output sizes. In the case of a two-relation join, the hard instance is just a Cartesian product, with an output size that is quadratic in the input size.\n In practice, however, the output size is usually much smaller. One recent parallel join algorithm by Beame et al.[8] has achieved output-optimality, i.e., its cost is optimal in terms of both the input size and the output size, but their algorithm only works for a 2-relation equi-join, and has some imperfections. In this paper, we first improve their algorithm to true optimality. Then we design output-optimal algorithms for a large class of similarity joins. Finally, we present a lower bound, which essentially eliminates the possibility of having output-optimal algorithms for any join on more than two relations.", "pdfUrls": [ "http://www.cse.ust.hk/~yike/pods17.pdf", "http://doi.acm.org/10.1145/3034786.3056110", "http://home.cse.ust.hk/~yike/pods17.pdf", "https://home.cse.ust.hk/~xhuam/pods085.pdf", "http://www.cse.cuhk.edu.hk/~taoyf/paper/pods17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4a790de4588c147903a61a50d484444bdb4d8822", "sources": [ "DBLP" ], "title": "Output-optimal Parallel Algorithms for Similarity Joins", "venue": "PODS", "year": 2017 }, "4ab5e4c0d22cdccd2bf0d1925b41b79a4b08d0ba": { "authors": [ { "ids": [ "2494447" ], "name": "Amelie Chi Zhou" }, { "ids": [ "1777280" ], "name": "Yifan Gong" }, { "ids": [ "39061773" ], "name": "Bingsheng He" }, { "ids": [ "2467444" ], "name": "Jidong Zhai" } ], "doi": "10.1145/3126908.3126913", "doiUrl": "https://doi.org/10.1145/3126908.3126913", "entities": [ "Algorithm", "Amazon Elastic Compute Cloud (EC2)", "Data center", "Experiment", "Machine learning", "Optimization problem", "Simulation" ], "id": "4ab5e4c0d22cdccd2bf0d1925b41b79a4b08d0ba", "inCitations": [], "journalName": "", "journalPages": "16:1-16:12", "journalVolume": "", "outCitations": [ "055c7f8caacbdcab63209a590358e34389e8ff6c", "b76d259d4cfb68cc143cd1109138eca0d8ac8ce9", "41872a5a86c7630c8f02c6e2586fa1990f63d95c", "58d5e76b2c8404efafd2c48b8983cae2d7ad419a", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "7ec74c4725e8b69db335ac6112d1b798e30aaf91", "0fddf0839bad24f39d2d1dc542b2aa35f264cf65", "189d2d59e133f0c2ed58477b1f30cef0ae8c13c0", "2f8ab6e3284a132f90479f748923b9f04b191d0e", "f78c78791ff50e42458bd034effaa2704b01ae9a", "f4ac508e2662afebe757ce4d9247f78cc68e6c2a", "1a69fd58d883049e24ec734529ad5caf9f850620", "a17745f1d7045636577bcd5d513620df5860e9e5", "6744832bed15a0512d8a64e03c81c99dc21653ff", "f4c217923ceebd709e8eb106b1f7d25fd5d088c2", "0706356c9ab6014d6b04577d38289ea8328291a5", "f6ce088e9db74f23d6110284a9a3e1ff5f74954c", "22a8f899e13f62bf28629b273466c4bf3ae40faf", "0aaafe741c86e48f7b58376661fb95a121246007", "04d3d68dd28fa8459f732893108dc352d5b5547b", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "2faf2ed0b2a6a30365a1e000b9c84d3715924105", "05aaaa5c0e3797bb830ad1320e16f5d441eb66bd", "fe019938b10fead8351214685be6c73f4efa9c5c", "95a8c5f25652cc573c557965548c62d09890592e", "2777df250e89f60a23df45b751ddd25a245bd779", "8980a90117bd1f4d2fd6ada63a9cc46bd13d09c5", "0a68c6226e04180671a474c73fa0a2b4a154d129", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "9a605cb7f6fd58aaf022bc97428beee5ceabea42", "ffc86f0da2b93127ef88a58978ea4ec09b5b2e7a", "732a0ef83b3b5674f5299b06e1c0191f09c03dd9", "197181eabcad2e6a481d7e5a4ba836f00ff6ecac", "725abc1a03355d8928d2c60898ef76f652454f01", "187d3d8109e51b5e2c4941048b0fd6cf1d464370", "8138ab521abf140ead67dd5fe5e56055972c0ca0", "78f246756811e924825a03909952d2c32c593a52", "2132429c69c47310569ea03783d86d5626511427", "137ee6d39b7244379da35b39a915ae4e2cab1b91", "f3b53c1879fa7c23449966fdb35d01dd246c3868", "04bdd6ffc755437de38ce2886e0bd4a704fcfe72", "411eb6534d39a37ed43443ba1d2e168c73171330" ], "paperAbstract": "Recently, various applications including data analytics and machine learning have been developed for geo-distributed cloud data centers. For those applications, the ways to map parallel processes to physical nodes (i.e., \"process mapping\") could significantly impact the performance of the applications because of non-uniform communication cost in such geo-distributed environments. While process mapping has been widely studied in grid/cluster environments, few of the existing studies have considered the problem in geo-distributed cloud environments. In this paper, we propose a novel model to formulate the geo-distributed process mapping problem and develop a new method to efficiently find the near optimal solution. Our algorithm considers both the network communication performance of geo-distributed data centers as well as the communication matrix of the target application. Evaluation results with real experiments on Amazon EC2 and simulations demonstrate that our proposal achieves significant performance improvement (50% on average) compared to the state-of-the-art algorithms.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~hebs/pub/processmapping-SC17.pdf", "http://doi.acm.org/10.1145/3126908.3126913" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ab5e4c0d22cdccd2bf0d1925b41b79a4b08d0ba", "sources": [ "DBLP" ], "title": "Efficient process mapping in geo-distributed cloud data centers", "venue": "SC", "year": 2017 }, "4ac3157a7cd44d268f33e96c9141e9f2c99f139c": { "authors": [ { "ids": [ "1773557" ], "name": "Omer Subasi" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" } ], "doi": "10.1109/CLUSTER.2017.129", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.129", "entities": [ "Computation", "Error detection and correction", "Experiment", "Gaussian process", "Kriging", "Soft error", "Solid-state drive", "Supercomputer" ], "id": "4ac3157a7cd44d268f33e96c9141e9f2c99f139c", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "608-612", "journalVolume": "", "outCitations": [ "270c88be02c3c996b652b5410a49f63a2abd7687", "7027eb880ee4d5a1f71bfc861bb36ae980f781fc", "dd998158ec990f66282758bd4d3c093ac1e81937", "5d5ac7167bc5f834173aa4c63821916a1bdf487a", "dd286cdefbca8f6e435298f058ca413d131f53b0", "51ae243a89cb1f12ddc25eec902cae134913f6ba", "91a26971ed09692b026e5db76f05e88cfad5b549", "b23f060a4574ff126e98b8fe13f8b508b9f82c1f", "a61c25b0f50e4886381b7a083d27fb5b4d101317", "b8d5cc73054874b49c4ee1033717678719440145", "108c840d5d1847948a2de0250490a327ae069ee6", "4ee0564e83d0252c461087f7fd5963a01716e142", "14e5b814d398d2c7e9ed78d12a2c286c733116f5", "36480300b1e382c062b78c6bd610d1879efd950e", "990e1ff6477328ca6cfb57c27e563d8f04c6b411" ], "paperAbstract": "In this paper, we present a non-parametric dataanalytic soft-error detector. Our detector uses the key properties of Gaussian process regression. First, because Gaussian process regression provides confidence on the prediction, this confidence can be used to automatize construction of the detection range. Second, because the correlation model of a Gaussian process captures the similarity among neighboring point values, only one-time online training is needed. This leads to very low online performance overheads. Finally, Gaussian process regression localizes the detection range computation, thereby avoiding communication costs. We compare our detector with the adaptive impact-driven (AID) and spatial supportvector- machine (SSD) detectors, two effective detectors based on observation of the temporal and spatial evolution of data, respectively. Experiments with five failure distributions and six real-world high-performance computing applications reveal that the Gaussian-process-based detector achieves low false positive rate and high recall while incurring less than 0.1% performance and memory overheads. Considering the detection performance and overheads, our Gaussian process detector provides the best trade-off.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.129" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ac3157a7cd44d268f33e96c9141e9f2c99f139c", "sources": [ "DBLP" ], "title": "A Gaussian Process Approach for Effective Soft Error Detection", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "4acb188e4dd717cf1807b12270f7fbc58129af3a": { "authors": [ { "ids": [ "1719793" ], "name": "Zhijun Li" }, { "ids": [ "1679043" ], "name": "Tian He" } ], "doi": "10.1145/3117811.3119859", "doiUrl": "https://doi.org/10.1145/3117811.3119859", "entities": [ "Emulator", "Firmware", "Modulation", "Subcarrier", "Super G (wireless networking)", "Throughput" ], "id": "4acb188e4dd717cf1807b12270f7fbc58129af3a", "inCitations": [ "b957d6ae4ea13584788d85ca42e1f81a52fbf75f", "48230a7eefa4685e0c64c0fb436e5e6f2048d7e7", "ea049c984971d8f6f44d303f739c9d0afa285fae", "93355aeef7d164535934678d03758dd24992e484" ], "journalName": "", "journalPages": "2-14", "journalVolume": "", "outCitations": [ "289b61d5b498929bf057943da02e40487d21c2f8", "9a36de80b5ad616acad779764b51c4d2db415413", "18caa93dc27a245754cd469b90c4223a441c0a6e", "2acd3f6c232d632d2532df210d2788705cfdb4c6", "0eee39b13ec1e206b088c19675314794d8e62c51" ], "paperAbstract": "The applicability of existing Cross-Technology Communication (CTC) methods, which rely on packet-level modulation, is severely limited due to their very low throughput, e.g., tens of bps. Our work, named as WEBee, opens a promising direction for high throughput CTC via physical-level emulation. Specifically, WEBee synthesizes the time-domain signals by choosing appropriate frequency-domain components fed into the subcarriers of WiFi OFDM. WE-Bee can emulate the desired physical-layer ZigBee signals by manipulating only the data bits in WiFi packet payload, requiring neither hardware nor firmware changes in commodity technologies. Moreover, WEBee enables the parallel CTC, where one WiFi frame emulates two ZigBee frames simultaneously. To evaluate the performance, we implemented WEBee on commodity devices (the Atheros AR2425 WiFi card, BCM 4330 WiFi card and CC2420, CC2530 ZigBee devices). Our comprehensive evaluation reveals that WEBee can achieve the CTC between WiFi and ZigBee with a reliable throughput of 126Kbps in noisy environment, 16,000x faster than current state-of-the-art CTC methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117816", "http://www-users.cs.umn.edu/~tianhe/Papers/WEBee.pdf", "http://doi.acm.org/10.1145/3117811.3119859" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4acb188e4dd717cf1807b12270f7fbc58129af3a", "sources": [ "DBLP" ], "title": "WEBee: Physical-Layer Cross-Technology Communication via Emulation", "venue": "MobiCom", "year": 2017 }, "4acdc975e3507a6e26a47f3858a74ec0de75ae38": { "authors": [ { "ids": [ "2061645" ], "name": "Robert Rudd" }, { "ids": [ "2931341" ], "name": "Richard Skowyra" }, { "ids": [ "38720805" ], "name": "David Bigelow" }, { "ids": [ "39738781" ], "name": "Veer Dedhia" }, { "ids": [ "2749559" ], "name": "Thomas Hobson" }, { "ids": [ "3160691" ], "name": "Stephen Crane" }, { "ids": [ "2279415" ], "name": "Christopher Liebchen" }, { "ids": [ "1772810" ], "name": "Per Larsen" }, { "ids": [ "2597368" ], "name": "Lucas Davi" }, { "ids": [ "1721575" ], "name": "Michael Franz" }, { "ids": [ "8415280" ], "name": "Ahmad-Reza Sadeghi" }, { "ids": [ "1691029" ], "name": "Hamed Okhravi" } ], "doi": "", "doiUrl": "", "entities": [ "Abstraction layer", "Code reuse", "Encryption", "Exploit (computer security)", "Indirection", "Java Platform, Standard Edition", "Malware", "Memory corruption", "Memory protection", "Pointer (computer programming)", "Randomized algorithm", "Spectral leakage" ], "id": "4acdc975e3507a6e26a47f3858a74ec0de75ae38", "inCitations": [ "6e57862d7cd590f5796f8e04cc6e74d60167a469", "8de3d330c8c1205e337b3b705f86463922fb6b1c", "7dd98badf2d3accb703f0358761f2d86e5d85194", "94512fc71d9802931411641c2e6c672270bc1553", "325390173841d52f7a2791ba6b0e32ad80bf2630", "723931de6d91a965bc2fa24ac649291c9f1a4639", "464f2ce15ab6ff4589e6920c32af9898541a26eb" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3fa27974cade47e98993b98798f73594b902583b", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "4c106a45b6d1e1d9749121c02016da85e0f5b17c", "dca2b521c17a53a82fbaf241680cff0c813dec3a", "d3cf8d77c8c3f1d57ef0133df89f144f3dd63d26", "0719b9670c8580db76547497df39caabdc20fc32", "2a3ed385ca664b3d6d9182cb77691cf816ad5c84", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "2947959aa2cfc45719fac7a54812614d1fa8707f", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "6a8f65381a627a2db6c756a7185d9106f0acefec", "2811354f6f13b12176f81bc989d2e80534effa80", "01a2d5c69a09ec3fa82de6dfe12811f3d981ab7e", "3d6f626d383048fda0ac5b56864141f2521dd38f", "01f9c33a24ced5bd07489303bff0dbcf5455b054", "c7770a001ddab356e3336558be18a94fc575fe87", "5aa4d6f28c803e5bd05d39794e12c759a60aa6a2", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "025d4a410ccf648f825dbae5bfdffb45c57aa33a", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "b8ef0da6025df0284049e34dd32492b825c513a9", "8c39c5d022d066e708a1eb5cd41d5db3b64bfdfe", "3013fc25ace9eca344cb936124a42171d72b95ec", "9b2585f7248c8b5a22e9c816506e01060213ca85", "f479c0578156255ce176e75bb13051fbb0f25b98", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "ab2177167b09f9be086d44188b845fc9b5458d66", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "7b2cf50a197888a3eb273d0ef056e93c581aa272", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "f0ac31c2248ef8eb597448395da6f79227ffe916", "5ec6717a1b0b40df8fac128ce1e8960cd53ed53c", "1798b9bc347ca826724b6d80766200ebaad8dfb0", "369a5802b7e60440fa9e382a5e7d2824907e045c", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "33cb4013c7cc36a173e7fb4e541133056e8e43cf", "116eaac2e498bc2c9bea10ea838309dcf143d764", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "74572d07252e2f0b60b16abb931c46e819e2b448", "4d75cd2764c45baf46c72fddc5c676fdfce6f60e", "0548d2e17b07d241d48dca41f288a07bf71d2707", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "07356c5477c83773bd062b525f45c433e5b044e8", "217742089058db1572042a0cebfcecdec8ce215e", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "feae97f21fa86e95800d1afc30a78994ceec0672", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "acf32e644db8c3ac54834d294bba4cf46551480a", "53396c842bc8a94575470fab3acb4aef91c5073d", "67b086caacc543b7d30b2f006f77a315bc9572e0", "30e76f32c323adb0ff340760380fe5a08505b641" ], "paperAbstract": "Memory corruption vulnerabilities not only allow modification of control data and injection of malicious payloads; they also allow adversaries to reconnoiter a diversified program, customize a payload, and ultimately bypass code randomization defenses. In response, researchers have proposed and built various leakage-resilient defenses against code reuse. Leakage-resilient defenses use memory protection techniques to prevent adversaries from directly reading code as well as pointer indirection or encryption techniques to decouple code pointers from the randomized code layout, avoiding indirect leakage. In this paper, we show that although current code pointer protections do prevent leakage per se, they are fundamentally unable to stop code reuse. Specifically, we demonstrate a new class of attacks we call address-oblivious code reuse that bypasses state-of-the-art leakage-resilience techniques by profiling and reusing protected code pointers, without leaking the code layout. We show that an attacker can accurately identify protected code pointers of interest and mount code-reuse attacks at the abstraction level of pointers without requiring any knowledge of code addresses. We analyze the prevalence of opportunities for such attacks in popular code bases and build three real-world exploits against Nginx and Apache to demonstrate their practicality. We analyze recently proposed leakage resilient defenses and show that they are vulnerable to address oblivious code reuse. Our findings indicate that because of the prevalence of code pointers in realistic programs and the fundamental need to expose them to \u201cread\u201d operations (even indirectly), diversity defenses face a fundamental design challenge in mitigating such attacks. DISTRIBUTION STATEMENT A. Approved for public release: distribution", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/address-oblivious-code-reuse-effectiveness-leakage-resilient-diversity/", "http://www.ics.uci.edu/~perl/ndss17_aocr.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4acd/c975e3507a6e26a47f3858a74ec0de75ae38.pdf", "s2Url": "https://semanticscholar.org/paper/4acdc975e3507a6e26a47f3858a74ec0de75ae38", "sources": [ "DBLP" ], "title": "Address Oblivious Code Reuse: On the Effectiveness of Leakage Resilient Diversity", "venue": "NDSS", "year": 2017 }, "4ad19a1c0c8cbef103a3d40132070851430e19aa": { "authors": [ { "ids": [ "32228637" ], "name": "Hideaki Kimura" }, { "ids": [ "1695573" ], "name": "Alkis Simitsis" }, { "ids": [ "1918416" ], "name": "Kevin Wilkinson" } ], "doi": "", "doiUrl": "", "entities": [ "Concurrent computing", "Janus Recognition Toolkit (JRTk)", "Manycore processor", "PageRank", "Transaction processing", "Windows 3.1x" ], "id": "4ad19a1c0c8cbef103a3d40132070851430e19aa", "inCitations": [ "49041cad80db9fb97b392a19e64f27a546b004e1" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "1fc5dc2fe308c9eadd15f1a1d18ed298d4d343ff", "35561a33891de8675010c3d82c6e218554e638fe", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "96d197be2253f5c853edce37b59c186915160ce0", "3dff11679346f5344af1018cad57fa14cc349f2f", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "3486aeaf540c48952120fe853d672af984f40a6a", "27611a1896feb8817eb9cebca344d9736916c3bb", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "13e8d482f40a0d452ed3c24811a5a4def8fe6d3f", "fc89393583562b8c1fd88dfdc1cc38c5856c9da3", "26deee037b221bd05ed34461819f5c067b745445" ], "paperAbstract": "Existing scale-up graph engines are tuned for either short, navigational requests (e.g., Nearest-Neighbor) or longer, analytics requests (e.g., PageRank). However, they do not have good performance for both workloads running concurrently. We present Janus, a scale-up graph engine architected for modern, many-core servers with large memory. Janus has excellent scale-up performance on navigational requests, on analytics requests, and on a mixed workload running concurrently both navigational and analytics requests.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p104-kimura-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4ad1/9a1c0c8cbef103a3d40132070851430e19aa.pdf", "s2Url": "https://semanticscholar.org/paper/4ad19a1c0c8cbef103a3d40132070851430e19aa", "sources": [ "DBLP" ], "title": "Janus: Transaction Processing of Navigation and Analytic Graph Queries on Many-core Servers", "venue": "CIDR", "year": 2017 }, "4af605a89bd4e52565b2c99c9fa290f39b7880f8": { "authors": [ { "ids": [ "2225575" ], "name": "Di Xiao" }, { "ids": [ "2041657" ], "name": "Yi Cui" }, { "ids": [ "34983248" ], "name": "Daren B. H. Cline" }, { "ids": [ "1737609" ], "name": "Dmitri Loguinov" } ], "doi": "10.1145/3034786.3034790", "doiUrl": "https://doi.org/10.1145/3034786.3034790", "entities": [ "Algorithm", "Analysis of algorithms", "Computational complexity theory", "Degree distribution", "Directed acyclic graph", "Heuristic", "In-memory database", "Iterator", "Random graph" ], "id": "4af605a89bd4e52565b2c99c9fa290f39b7880f8", "inCitations": [ "00a57850e14320bb41d58696cc409151466b98b2", "7bb8c35f65a21a714cd9fde30149abcc562c77fe", "1acdab652c6e07bd56404071bfd98e552b146cce", "4bc90e9257d7d2eb3154b95231d8a65f5ec7cc8b" ], "journalName": "", "journalPages": "261-272", "journalVolume": "", "outCitations": [ "6b6ae4ff053bcee2834b5e7718810cb5bc15c36c", "110b55a017f52abfedca220036ea129d84b7cadc", "0706356c9ab6014d6b04577d38289ea8328291a5", "3674351c6ffe855af65d6003ad042551e7b49a38", "516f412a76911a13c9128aac827b52b27b98fad9", "10dac777afc83308fd10782bc2bc529469cb9ce9", "202a6dbe5247079ddd94f8a0048ce1a3e97cf346", "0b7240752edd7478dd24701c9331190f08323213", "44b2dd390f32a6a77d4e2416351df0fa08a323c1", "6ea8894ef9edf31ed83e925a5650a0a8f0b79b76", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "ddd816dd2ba5248bc2d6f8310d0c3af199d36fd8", "7ce8d8a8f40b918acda0904eb3fb26369a105eed", "488803ad9c5fc6a8ca3ac293030b152314c88e01", "0cb9928daa3b90ac6959d9fb863c5f8ad6422dde", "71affe0d9489be0ecba667f568b1a0bcd9ee3af3", "7a03e14cb1beecccbc030391703224d7beb62e94", "0f3fd2233b51ec5cbbb46451f1f76996d7493450", "5d3ad70f4f7817b73ecfef6065358df563dfab96", "8c671a8bb36514ea82d0fc782553b6a1adb1fb5f", "a3d6136ac17442b7da61b48a14044194131bf22d", "6e0a908dc3b4092783073150eff4994370a1b098", "0c5b579f824369e6367f7585c7dc12d8715bd10a", "798d70db5b6e7fde1b0d51b34f6d03ad481addc4", "773912d0827c4bc5ad332f405bbe0f28d252bb7d", "4b24658d8ed5a5661323f7cd0e29e1c6763f79aa", "29db10ae32a3728821335e6a2b9e43cb374dbc03", "3c4194f25bda9d2ebdea8d91e8d7c13a5f8b485a", "1f0612de1f191abadf250b78cd78f884203cca5e", "7805de482edfbec3a736bb6b3d1bb5163435752d", "28c83441041f7defa682e2dae09655120fb6904e", "4e0df13191a558fb619d3fcad1d7bd2c3668f844", "00a57850e14320bb41d58696cc409151466b98b2", "c39eb4343933a36f2fe6bd918ee5d057a25f3457", "df9594d5ef9a8f14c81453f6b9cef959e024b35d", "53bd1357a20550caf1317803e7bc88d3440a6984", "cbfea7048ff441a2acc14a2936b30e110bed6487", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167" ], "paperAbstract": "Triangle listing has been a long-standing problem, with many heuristics, bounds, and experimental results, but not much asymptotically accurate complexity analysis. To address this issue, we introduce a novel stochastic framework, based on Glivenko-Cantelli results for functions of order statistics, that allows modeling cost of in-memory triangle enumeration in families of random graphs. Unlike prior work that usually studies the O(.) notation, we derive the exact limits of CPU complexity of all vertex/edge iterators under arbitrary acyclic orientations as graph size n → ∞. These results are obtained in simple closed form as functions of the degree distribution. This allows us to establish optimal orientations for all studied algorithms, compare them to each other, and discover the best technique within each class.", "pdfUrls": [ "http://engineering.tamu.edu/media/4192506/2016-9-2.pdf", "http://doi.acm.org/10.1145/3034786.3034790", "http://irl.cs.tamu.edu/people/di/papers/pods2017.pdf", "http://irl.cs.tamu.edu/people/di/papers/pods2017-ppt.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4af605a89bd4e52565b2c99c9fa290f39b7880f8", "sources": [ "DBLP" ], "title": "On Asymptotic Cost of Triangle Listing in Random Graphs", "venue": "PODS", "year": 2017 }, "4b02018f3f808926fbdc088ba2d8b2a36aa8d40e": { "authors": [ { "ids": [ "1910260" ], "name": "Christoffer Quist Adamsen" }, { "ids": [ "32710366" ], "name": "Anders M\u00f8ller" }, { "ids": [ "1746015" ], "name": "Frank Tip" } ], "doi": "10.1145/3133890", "doiUrl": "https://doi.org/10.1145/3133890", "entities": [ "Approximation algorithm", "Event-driven programming", "Exception handling", "Experiment", "Information", "Initialization (programming)", "JavaScript", "Model checking", "Static program analysis", "Web application" ], "id": "4b02018f3f808926fbdc088ba2d8b2a36aa8d40e", "inCitations": [], "journalName": "PACMPL", "journalPages": "66:1-66:22", "journalVolume": "1", "outCitations": [ "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "103f2107e7e66788684e51ae69bd3bf67abf5e4d", "10ba04904f12e44cd0569cb86aa6e97e47939e23", "75ea8cef3730ee258571c7fefca4fe7036611c36", "a74d2672e0f1bb05b321e60fffab0c003693dcef", "938430097d30aba246c907503d7c3d22cfe23428", "03bb63660c3935ad2ec011a7f9e868587063f89c", "3c10b218e76ed78109ffaa5023b1d7ce07182826", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "a45adba59080ad625e3005c669345c3a96ad3e18", "10ce08ad291319fb6509d935ba617828bde83e20", "5dbda9a15a19c08b68b539274175503fb3e2d87c", "3be816a633ee79b9d734920faee820226c12a5b5", "2e292d10d668c4b4ba92f1b5272ebc82d4bc5f35", "0b6975dfee824f53f54281afe5755620c4ee9e92", "a7810702c03d2e04e62dbb74b00d09f083f96814", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "2ae2e8e62befd7603c66c3bd1f10d2fa23e0ffe6", "30228ff967dbdc880c386d6220d1e4173d9d63fe" ], "paperAbstract": "Event races are a common source of subtle errors in JavaScript web applications. Several automated tools for detecting event races have been developed, but experiments show that their accuracy is generally quite low. We present a new approach that focuses on three categories of event race errors that often appear during the initialization phase of web applications: form-input-overwritten errors, late-event-handler-registration errors, and access-before-definition errors. The approach is based on a dynamic analysis that uses a combination of adverse and approximate execution. Among the strengths of the approach are that it does not require browser modifications, expensive model checking, or static analysis. \n In an evaluation on 100 widely used websites, our tool InitRacer reports 1085 initialization races, while providing informative explanations of their causes and effects. A manual study of 218 of these reports shows that 111 of them lead to uncaught exceptions and at least 47 indicate errors that affect the functionality of the websites.", "pdfUrls": [ "http://cs.au.dk/~amoeller/papers/initracer/paper.pdf", "http://doi.acm.org/10.1145/3133890" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b02018f3f808926fbdc088ba2d8b2a36aa8d40e", "sources": [ "DBLP" ], "title": "Practical initialization race detection for JavaScript web applications", "venue": "PACMPL", "year": 2017 }, "4b3754faf49f1c54439d18e8d237995c0737f75a": { "authors": [ { "ids": [ "2317421" ], "name": "Felix Donatus L\u00fcbbe" } ], "doi": "10.1007/978-3-319-64203-1_5", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_5", "entities": [ "Benchmark (computing)" ], "id": "4b3754faf49f1c54439d18e8d237995c0737f75a", "inCitations": [], "journalName": "", "journalPages": "65-78", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_5" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b3754faf49f1c54439d18e8d237995c0737f75a", "sources": [ "DBLP" ], "title": "Micro-benchmarking MPI Neighborhood Collective Operations", "venue": "Euro-Par", "year": 2017 }, "4b66d29d33e41d58595fa6a6b1209b52ffc351a1": { "authors": [ { "ids": [ "40486057" ], "name": "Mihai Kocsis" }, { "ids": [ "39800565" ], "name": "Johannes Buyer" }, { "ids": [ "35509319" ], "name": "Nico Su\u00dfmann" }, { "ids": [ "1786012" ], "name": "Raoul Daniel Z\u00f6llner" }, { "ids": [ "2119066" ], "name": "Gheorghe Mogan" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.24", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.24", "entities": [ "Adobe AIR", "Autonomous car", "Big data", "Industry 4.0", "Internet of things", "Logistics" ], "id": "4b66d29d33e41d58595fa6a6b1209b52ffc351a1", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "186-191", "journalVolume": "", "outCitations": [ "d45eaee8b2e047306329e5dbfc954e6dd318ca1e", "66ce8b79d501e2932760e599b17410d341eb8b60", "7c819e8c60ff8a806b61b7d5a2e0a84fdd87d4e5", "fe43388bb2d939f2f9671b50dc0f47d8bc5ae6ed", "add57057cda07bbb42f8c61037aa845af9ce3970", "6f6c1d839c420eca2d3e11cef3ede5eac0ca35c1", "3efce011604064852a3a0fd0284c838f96899449", "7b49b15914bc4530eec64abc37fc4b68e6846b08", "8aac91b3b2e866b73aa205bb820138181330e29a", "0ac6f31b050b3f3f29f5f457ade67d0457f7da1e", "2fda10f6079156c4621fefc8b7cad72c1829ee94", "d492493e15935457be8c04ca37709a64d45b58ce", "c7a83aaebc26a5be8678084dc49dd36a70a60b52", "08315f0c8090c4f2afe894cd7082ba5e0ad32105" ], "paperAbstract": "In big cities around the world air pollution is reaching harmful levels. One of the reasons for air contamination is the automotive emissions. Soft appeals by the authorities to the citizens did not change much of the situation. Many people use the car in everyday activities, e.g. going to work or every day food shopping in supermarkets. Recent studies showed that a replacement of the current vehicles through autonomous vehicles would help in reducing these emissions. A radical replacement is not possible. The actual infrastructure needs to be extended by technologies that facilitate autonomous driving. Therefor integration of knowhow from other domains like internet of things, big data analysis or transportation planning has to be considered. This paper presents a smart service where, based on the information provided by principals of industry 4.0 and internet of things, the planning of an autonomous grocery delivery fleet occurs. This service is implemented then at a smaller scale, as an autonomous ordering and delivery system for coffees, and realised at the Heilbronn University. Furthermore, the paper presents the benefits and the limitations of this kind of system.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b66d29d33e41d58595fa6a6b1209b52ffc351a1", "sources": [ "DBLP" ], "title": "Autonomous Grocery Delivery Service in Urban Areas", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4b75bd70dfb76e3d9b182496ca81aa2d581d7dc4": { "authors": [ { "ids": [ "2436653" ], "name": "Rashid Tahir" }, { "ids": [ "1795621" ], "name": "Matthew Caesar" }, { "ids": [ "34232960" ], "name": "Ali Raza" }, { "ids": [ "30705830" ], "name": "Mazhar Naqvi" }, { "ids": [ "1685939" ], "name": "Fareed Zaffar" } ], "doi": "", "doiUrl": "", "entities": [ "Anomaly detection", "Attack surface", "Data center", "Hypervisor", "KVM switch", "Operating system", "Real-time computing", "Scalability", "Software deployment", "System call" ], "id": "4b75bd70dfb76e3d9b182496ca81aa2d581d7dc4", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "431-441", "journalVolume": "", "outCitations": [ "001cf174f8f2cdf73acff476e475766d077a2f1e", "b3e531b3f4ba179d147408647196e08a6fd51df0", "2c00393a182623155aaaf9312e4aa53c190c0a8a", "ab88c39e5077e768a02cc8575dc4fcb58c191826", "26d0b8fc42e2703d0d24ca281a982ae447f73247", "2f88e24e2d86d0bacf28c4dcaff12870c20376a0", "997dedbd1703b11f7d33183d4185d5fcc0438339", "2f7573ec34d9af90a153ddebe90c7536217c71c9", "e657157a9d76806bd034ec9b34412f674b9d025a", "10b9a084eca0003b91bb4c7ca59cbd0139ba0131", "0af75d58915ca47d4a03e451eef173a09db7409d", "a766f17cb4ee1ff96fec800b37d8f830d88342db", "6d337579fa90c4e0298b02589101cefb73c8f895", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "2cd9bfcc8d8bd7703f265a7a3e7da2167dfb39ff", "65192f3d0ffb066a4c47a09fc11fdfad47dd192e", "3a34e028eac4bf96cb4725f4bfb33f750d133b31", "8f5407f8d58a724ee7dbaed0aac5bef3459897a7", "4560305dc50a0437a3cae43c5a5b628db72abb8e", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "07083f18b90323abedf8932f733656391cad5e21", "12c6d2de7be43abb6f5daa9e1a316bfe044cc966", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "86013daaae16572bceb755e65ee5fa2fdfb63848", "6942d920d64b066c694733897976386f03a1b0f6", "09b4ed9425c436318c5406cce798b31983deaa4b", "3a2f37d3648592ffb42155c28f71894ad61937fe", "0d60e4db3034380df61b344e643cf312f2092385", "993cddaa74a5fc9c8daf45d87455b69c77c75161", "1a92b8a3598f026aaff570be93f8e3625059d6c3", "7e809cf2020005d24fe3238f3596edb882c45986", "009af3a1fa932ea1a9efa8d34cb0b6e32feae15e", "37ef5a307a8d6ea0ba6f5f7e39a0199437c2cf48", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "0dac671dae4192cfe96d290b50cc3f1105798825" ], "paperAbstract": "The vast attack surface of clouds presents a challenge in deploying scalable and effective defenses. Traditional security mechanisms, which work from inside the VM fail to provide strong protection as attackers can bypass them easily. The only available option is to provide security from the layer below the VM i.e., the hypervisor. Previous works that attempt to secure VMs from "outside" either incur substantial space or compute overheads making them slow and impractical or require modifications to the OS or the application codebase. To address these issues, we propose an anomaly detection fabric for clouds based on system call monitoring, which compresses the stream of system calls at their source making the system scalable and near real-time. Our system requires no modifications to the guest OS or the application making it ideal for the data center setting. Additionally, for robust and early detection of threats, we leverage the notion of VM/container communities that share information about attacks in their early stages to provide immunity to the entire deployment. We make certain aspects of the system flexible so that vendors can tune metrics to offer customized protection to clients based on their workload types. Detailed evaluation on a prototype implementation on KVM substantiates our claims.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101171", "http://web.engr.illinois.edu/~tahir2/publications/PID4652591.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b75bd70dfb76e3d9b182496ca81aa2d581d7dc4", "sources": [ "DBLP" ], "title": "An Anomaly Detection Fabric for Clouds Based on Collaborative VM Communities", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "4b76a777b2e6135f62dd969113aad2388ff7cf51": { "authors": [ { "ids": [ "20601266" ], "name": "Basheer Qolomany" }, { "ids": [ "1685793" ], "name": "Ala I. Al-Fuqaha" }, { "ids": [ "2256225" ], "name": "Driss Benhaddou" }, { "ids": [ "1692515" ], "name": "Ajay Gupta" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.7", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.7", "entities": [ "Algorithm", "Artificial neural network", "Autoregressive integrated moving average", "Computational resource", "Experiment", "Long short-term memory", "Mean squared error", "Neural Networks", "Smart city", "Time series" ], "id": "4b76a777b2e6135f62dd969113aad2388ff7cf51", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "50-57", "journalVolume": "", "outCitations": [ "97cc2a9306a780619264ff891409b7cecd4283a1", "ae3c681f067ccabe4a9839a0e3d5188f3d9f6155", "324fc9c732116fa81624faad07524039f193cede", "d3040949c8cb99e86fa0d5078d36cd6fef61b2c6", "5c511bb027d6018f9a7ebae9c5505e6f5c6530c8", "606c89b4aca850caa351ca55508d675f4d5f3343", "f9fefeb658d79e7a4866de480c9a28093160090d", "52321b87a983fb3c80e52fb7f15ac371033c3857", "f9cba0b583ede9588943886cf98e45718ca03dfe", "55b681227ee85e2cdf3ff7a7ea2816d69413625c", "11c9aefb2fa45b9fd3292454ff8de134cfd1c6b1", "408979731b043f5bed8ce73ba07c7901bd1912a9", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "0f08bcca67b3db328edfa5d3f48331dc71d8789e", "af216ef386aaf509725f030e7667d1b58ef5521a", "bd888d15d6dce2e7d22e08c6663c0661882d603a" ], "paperAbstract": "Knowing how many people occupy a building, and where they are located, is a key component of smart building services. Commercial, industrial and residential buildings often incorporate systems used to determine occupancy. However, relatively simple sensor technology and control algorithms limit the effectiveness of smart building services. In this paper we propose to replace sensor technology with time series models that can predict the number of occupants at a given location and time. We use Wi-Fi datasets readily available in abundance for smart building services and train Auto Regression Integrating Moving Average (ARIMA) models and Long Short-Term Memory (LSTM) time series models. As a use case scenario of smart building services, these models allow forecasting of the number of people at a given time and location in 15, 30 and 60 minutes time intervals at building as well as Access Point (AP) level. For LSTM, we build our models in two ways: a separate model for every time scale, and a combined model for the three time scales. Our experiments show that LSTM combined model reduced the computational resources with respect to the number of neurons by 74.48 % for the AP level, and by 67.13 % for the building level. Further, the root mean square error (RMSE) was reduced by 88.2%–93.4% for LSTM in comparison to ARIMA for the building levels models and by 80.9 %–87% for the AP level models.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.7", "https://arxiv.org/pdf/1711.10355v1.pdf", "http://arxiv.org/abs/1711.10355" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b76a777b2e6135f62dd969113aad2388ff7cf51", "sources": [ "DBLP" ], "title": "Role of Deep LSTM Neural Networks and Wi-Fi Networks in Support of Occupancy Prediction in Smart Buildings", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4b7c75d84dffa7f26f1a548376bf0d36b94640d4": { "authors": [ { "ids": [ "1730201" ], "name": "Jianbo Li" }, { "ids": [ "37395525" ], "name": "Jingrui He" }, { "ids": [ "24018646" ], "name": "Yada Zhu" } ], "doi": "10.1109/ICDM.2017.36", "doiUrl": "https://doi.org/10.1109/ICDM.2017.36", "entities": [ "Algorithm", "Benchmark (computing)", "Coordinate descent", "Data structure", "Hierarchical database model", "Iteration", "Iterative method", "Loss function", "Modality (human\u2013computer interaction)", "Optimization problem", "Program optimization", "Randomized algorithm", "Synthetic data", "Volatility" ], "id": "4b7c75d84dffa7f26f1a548376bf0d36b94640d4", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "267-276", "journalVolume": "", "outCitations": [ "15a3ed7dfc005aa6f84f1e1fb81ef6d54ce3d8aa", "60432ee6d6a5e3787d0643f799b65522bc01aac1", "3ffc2ef8b918872e6385c09362ccd76f40fd07c6", "2e6705338db75fcd92d15a4afc31ce7f719a9bdb", "3baca2f562d67ec9872e86953ecf3b234992b093", "0a043a5f666b4785245bf8f79f96b3543cbdedc6", "2d4438a0d57f60d9019d20ada3c191d423a5c402", "b436b033856bc873ea8378666e764174722824f3", "06eef03f968c80edcbeb9bff6d335da39a41d3cc", "8b6e9067c2cece212f8c902653a1d85f3eb0fde0", "05bc490f266b86c2932f6a731615edd221fe27d8", "48bb59ff4e72fbaf9a78e292b54c0cb1cd547ec3", "25c7c35c01d7560edef1aea2c693aaf346b6ab2d", "8a3e558e53a1ef4fc54a5800d56cbedbe0401564", "440157e1ebb7396d1a2ffbe0772c315f3bb8886d", "20bcaeca49c39d30bc9087b26fffb7e84905c8d3", "49ca83ef6277581ac2b8bf7c121cf8d1f222992e", "2f48a20c75f977c990d4ba271b353cbe50be53e4", "0915028333e69daaa5f27d1a28d8e6a0110fcc7a", "c271f40ed89655845a9d591fa8ae7cefbd9a92e1", "0e8d3013012025a51128a96c2f15db26a0ac95d3", "6505f53ea805767112c3135bad5a6e31b872cdb6", "5842ad95c39c4994c40610cfe5033018f9da516a" ], "paperAbstract": "Many real-world applications are characterized by temporal data collected from multiple modalities, each sampled with a different resolution. Examples include manufacturing processes and financial market prediction. In these applications, an interesting observation is that within the same modality, we often have data from multiple views, thus naturally forming a 2-level hierarchy: with the multiple modalities on the top, and the multiple views at the bottom. For example, in aluminum smelting processes, the multiple modalities include power, noise, alumina feed, etc; and within the same modality such as power, the different views correspond to various voltage, current and resistance control signals and measured responses. For such applications, we aim to address the following challenge, i.e., how can we integrate such multi-modality multi-resolution data to effectively predict the targets of interest, such as bath temperature in aluminum smelting cell and the volatility in financial market. In this paper, for the first time, we simultaneously model the hierarchical data structure and the multi-resolution property via a novel framework named HiMuV. Different from existing work based on multiple views on a single level or a single resolution, the proposed framework is based on the key assumption that the information from different modalities is complementary, whereas the information within the same modality (across different views) is redundant in terms of predicting the targets of interest. Therefore, we introduce an optimization framework where the objective function contains both the prediction loss and a novel regularizer enforcing the consistency among different views within the same modality. To solve this optimization framework, we propose an iterative algorithm based on randomized block coordinate descent. Experimental results on synthetic data, benchmark data, and various real data sets from aluminum smelting processes, and stock market prediction demonstrate the effectiveness and efficiency of the proposed algorithm.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.36" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b7c75d84dffa7f26f1a548376bf0d36b94640d4", "sources": [ "DBLP" ], "title": "HiMuV: Hierarchical Framework for Modeling Multi-modality Multi-resolution Data", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "4b90133eccb526865c6a2021eb4b8e0647a5fa3b": { "authors": [ { "ids": [ "3491841" ], "name": "Abraham A. Clements" }, { "ids": [ "3228281" ], "name": "Naif Saleh Almakhdhub" }, { "ids": [ "19206956" ], "name": "Khaled S. Saab" }, { "ids": [ "19171499" ], "name": "Prashast Srivastava" }, { "ids": [ "2933778" ], "name": "Jinkyu Koo" }, { "ids": [ "1679009" ], "name": "Saurabh Bagchi" }, { "ids": [ "2694341" ], "name": "Mathias Payer" } ], "doi": "10.1109/SP.2017.37", "doiUrl": "https://doi.org/10.1109/SP.2017.37", "entities": [ "Bare machine", "Code injection", "Code integrity", "Compiler", "Control flow", "Embedded system", "Information system", "Internet of things", "LLVM", "Run time (program lifecycle phase)" ], "id": "4b90133eccb526865c6a2021eb4b8e0647a5fa3b", "inCitations": [ "a5667c56bc941e7c18ae32ff2c59cbb96934ba18", "074a7297ae90fcade194d73b7a48dbd9c50893db", "3f88c8cbbd3338e9be6f965e9e462b68f45b4913" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "289-303", "journalVolume": "", "outCitations": [ "e39fcd5ba07f0d703e7f2e589fc7c61ca4c4206d", "7926d0b9dfc36c13910a1850cd91a7db862f0014", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "4b41bb221ccae289bd66dfc1210f36cc172350c5", "3fa27974cade47e98993b98798f73594b902583b", "a4f3faf40a34c08db5381329012bdd0b9c8b374f", "191982e2946a9bd1d5719eafbf129f6e50b91f0c", "a8a73b74d24249d5d8c90dd8250a7bab34442d9f", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "832a2b72b4f5906162358df63e452217907215e5", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "0988a425689f6f3700e797f4a2c18f73692573c3", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "386fe241f4ca1d2cf042c21eb00946f09eccb392", "387e571981a8ee2bd49b1f30563e3a3a215e3b65", "0e039df712774fcea67f214d9b5780c1dc250747", "7e61bd6abdcb68ed9b3871311cabe09753de88ff", "c265ea208212d0f49ba93ce32c38b282b6982e5c", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "aee1cbffab9d69f3ac93917200c6a46df823a70e", "28ab79d604962031585fd149941a5c0594e3d0ed", "04f956d7f0acb76a4716a5cd5f75ee634f2f393d", "acf32e644db8c3ac54834d294bba4cf46551480a", "31c44a8131c07465d4d0f97956bdbffea344e5aa", "029e930160bb7fcdab920e68526a79b1960ad89c", "b58a85e46d365e47ce937ccc09d60fbcd0fc22d4", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "188847872834a63fb435cf3a51eef72046464317", "38aef25ab57851352c97bb7ba610312005e08e3d", "23e8236644775fd5d8ff5536ba06b960e19f904b", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "4e5007ed58449919218bdf42418186a1f1d7871a", "01a2d5c69a09ec3fa82de6dfe12811f3d981ab7e", "6a8a7ea5b03edf84a8164363bb769d0499ec5c84", "d596c5794b149b154107fb8c2615a906c0151ac3", "100ebdc07a14c85b5986d3adffa34b047b5be7a4", "0ad15428453e6f4962755933bd82f395eaf787b8", "6a8f65381a627a2db6c756a7185d9106f0acefec", "1b513fd3d411fab0a733c7a702254a3003d94b54", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "3875d1d1b623af0d640528efc9e581bc91338e35", "20ed5e06873f45e85928318bc08f022057df29ec" ], "paperAbstract": "Embedded systems are ubiquitous in every aspect ofmodern life. As the Internet of Thing expands, our dependenceon these systems increases. Many of these interconnected systemsare and will be low cost bare-metal systems, executing without anoperating system. Bare-metal systems rarely employ any securityprotection mechanisms and their development assumptions (un-restricted access to all memory and instructions), and constraints(runtime, energy, and memory) makes applying protectionschallenging. To address these challenges we present EPOXY, an LLVM-based embedded compiler. We apply a novel technique, calledprivilege overlaying, wherein operations requiring privilegedexecution are identified and only these operations execute inprivileged mode. This provides the foundation on which code-integrity, adapted control-flow hijacking defenses, and protections for sensitive IO are applied. We also design fine-grainedrandomization schemes, that work within the constraints of bare-metal systems to provide further protection against control-flowand data corruption attacks. These defenses prevent code injection attacks and ROP attacksfrom scaling across large sets of devices. We evaluate theperformance of our combined defense mechanisms for a suite of75 benchmarks and 3 real-world IoT applications. Our results forthe application case studies show that EPOXY has, on average, a 1.8% increase in execution time and a 0.5% increase in energy usage.", "pdfUrls": [ "https://engineering.purdue.edu/dcsl/publications/papers/2017/final_oakland17-epoxy_cameraready.pdf", "http://iisp.gatech.edu/sites/default/files/documents/ieeessp17_saab.pdf", "https://doi.org/10.1109/SP.2017.37", "http://hexhive.github.io/publications/files/17Oakland.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4b90133eccb526865c6a2021eb4b8e0647a5fa3b", "sources": [ "DBLP" ], "title": "Protecting Bare-Metal Embedded Systems with Privilege Overlays", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "4bac8e38be2a30301c99856b1822a88891569d3f": { "authors": [ { "ids": [ "2697906" ], "name": "Shivaram Venkataraman" }, { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "2599242" ], "name": "Kay Ousterhout" }, { "ids": [ "1756230" ], "name": "Michael Armbrust" }, { "ids": [ "38565890" ], "name": "Ali Ghodsi" }, { "ids": [ "1712149" ], "name": "Michael J. Franklin" }, { "ids": [ "9229182" ], "name": "Benjamin Recht" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3132747.3132750", "doiUrl": "https://doi.org/10.1145/3132747.3132750", "entities": [ "Benchmark (computing)", "Drizzle", "End-to-end principle", "Experiment", "Failure rate", "Fault tolerance", "Mission critical", "Stream processing", "Streaming media", "Throughput" ], "id": "4bac8e38be2a30301c99856b1822a88891569d3f", "inCitations": [ "be514e8c898c05ebc869089bc67a8b57de52fcc2", "356d760d4dd7a7fd8cca011c0999c6ccc28ce025", "576f13a5f349ecc60e5e491395e8aa7a9c9f0c05", "372a2383891257520ad6dea816d3f14ddff8f003", "bcf9f2de8134a725d220caf44e225b8325870e19" ], "journalName": "", "journalPages": "374-389", "journalVolume": "", "outCitations": [ "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "0427e82f0f31fd98ee4564df85a25d5e6175fc31", "0541d5338adc48276b3b8cd3a141d799e2d40150", "3af5e48a741634d2572b839ca57b68929cd2d648", "4dfdd7cd8abbd68675ea19c5902e5a7d14709799", "9f948448e7a5f0cc94cd53656410face8b31b18a", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "3a043714354fe498752b45e4cf429dbae0fb2558", "4eab97d0d1c75641671aa5b7761978322d904c5c", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "230239fb61d7a6996ac9552706363323b34735f2", "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "7f7c515809f676bceac2cd178ddbc360a4a15a3c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "1746a50f33b31d572eb3dee45f9f20c58d25bed3", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "061316b7516e20a4d66e7d95b3543eded514ef5d", "72000109547f17c849c2ee6e2825784e64b70cea", "478fbef8568a021c3d91c13128efa19ad719dd88", "080ed793c12d97436ae29851b5e34c54c07e3816", "148ec401da7d5859a9488c0f9a34200de71cc824", "26deee037b221bd05ed34461819f5c067b745445", "5208060771fd213eefd827e3e1260b939f1aed6d", "06db78ece7ba41bccab5df77240541e32cffd623", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "09c5293b647fca40fde28ac6c38737f07e873e41", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "1ec3c93bf22e22f76dcf978fba7764f3f0696a82", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "88fd5ae53854a26b9edb2eb42ce6dfdd6e186ea5", "2706db42926e0e58e35336331f6d3b62f0811cf5", "b6571efa4483aa00d23bbcd36930c4877548ba38", "4e8f4fedddcac090efe64fe16e8c509685a6ef7f", "4b65024cd376067156a5ac967899a7748fa31f6f", "332f77fd05703c1607e3b57884ad31fb1fad0104", "69884f09be947c43e1029bb3ddc95db5edc2a03d", "15bc1496ae89779f2e998a7da7567ec5bbd3a3a4", "3d4c53d5299090c900af337c3d9a8329868db5a3", "7ae26da9b7666812857883536870c315538f7f10", "41e71c53ca2a7be0ba90919af8f3049d957e665e", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "1220e4a011c46804d4369b5580dc7fb6e387af54", "2988e34168fa91398fa397baf823af2063893e9c", "067a52aa4907f13e8b5fb688af626546e8346827", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "0608d9937c074520cdc93cc444cc1c77039c5332", "e847c3ec130da57328db79a7fea794b07dbccdd9", "5e0c8ba57734361b4564b75ab55af1d2b5ccde82", "3b6dd340fb5442e0c31d73f40e241fdd73d42330", "04a935327a56480a842410cfbe3d8dab0918396f" ], "paperAbstract": "Large scale streaming systems aim to provide high throughput and low latency. They are often used to run mission-critical applications, and must be available 24x7. Thus such systems need to adapt to failures and inherent changes in workloads, with minimal impact on latency and throughput. Unfortunately, existing solutions require operators to choose between achieving low latency during normal operation and incurring minimal impact during adaptation. Continuous operator streaming systems, such as Naiad and Flink, provide low latency during normal execution but incur high overheads during adaptation (e.g., recovery), while micro-batch systems, such as Spark Streaming and FlumeJava, adapt rapidly at the cost of high latency during normal operations.\n Our key observation is that while streaming workloads require millisecond-level processing, workload and cluster properties change less frequently. Based on this, we develop Drizzle, a system that decouples the processing interval from the coordination interval used for fault tolerance and adaptability. Our experiments on a 128 node EC2 cluster show that on the Yahoo Streaming Benchmark, Drizzle can achieve end-to-end record processing latencies of less than 100ms and can get 2-3x lower latency than Spark. Drizzle also exhibits better adaptability, and can recover from failures 4x faster than Flink while having up to 13x lower latency during recovery.", "pdfUrls": [ "http://shivaram.org/publications/drizzle-sosp17.pdf", "https://people.eecs.berkeley.edu/~apanda/assets/papers/sosp17.pdf", "http://doi.acm.org/10.1145/3132747.3132750", "http://kayousterhout.org/talks/2017_10_30_SOSP_Drizzle.pdf", "http://shivaram.org/drafts/drizzle.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4bac8e38be2a30301c99856b1822a88891569d3f", "sources": [ "DBLP" ], "title": "Drizzle: Fast and Adaptable Stream Processing at Scale", "venue": "SOSP", "year": 2017 }, "4badfb2bda4c304d4e9aa1a5ed8aca649132b711": { "authors": [ { "ids": [ "2421465" ], "name": "Ang Chen" }, { "ids": [ "1719236" ], "name": "Andreas Haeberlen" }, { "ids": [ "33779522" ], "name": "Wenchao Zhou" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" } ], "doi": "10.1145/3064176.3064212", "doiUrl": "https://doi.org/10.1145/3064176.3064212", "entities": [ "Approximation algorithm", "Cybercrime", "Internet", "Routing", "Self-propelled particles", "Software deployment" ], "id": "4badfb2bda4c304d4e9aa1a5ed8aca649132b711", "inCitations": [ "53c0617eb76ed39f3ba9f3a45374839d7904ef93", "876369c960b28e1afe6a616f166036ef7b3e4539" ], "journalName": "", "journalPages": "374-388", "journalVolume": "", "outCitations": [ "35a5a870cefa0184245cf317381f3dc4092e1781", "519a022f6103a68331402f499a9bc9447ef70995", "06ff081f78263a253cca74d31e45e706ce0d0de0", "343f416c1be18bd81f30177ecf0ea57e677ee6eb", "5ef0328d7ab3d68eca703e367443011c0b8d0729", "94773fc163ea325035a2d84f6a82bda0e69aa739", "111864cac232d8a9c170bd63069eb4af155a9f7b", "8aa09720221bdeef43e150fc7f6896f71600fb86", "075a4f85a26280097f1f041c3a1c7f168c523c63", "36583417faf3d052c415262cca1ba44a6b90d75c", "1e8c01cf85a1fb680e195e0ac6c9a7bd17268787", "66c08ae7b3a2e6f6472996d918853ba06b1c961b", "6410b6cc29af234544f7706194aba20d6c4c90ae", "1cfd9deaf713679b9faa351486ff4d16b8f3c1c8", "38bea412ab3bc9cd5f73d6c2209252f6a1f14aad", "00917c1d60b1a684b562885284cb33ad7317101f", "119af27b7fbcd4f2c224e253e6337b35fa1ca7a5", "0fbfe111ae47ba19bc509398149c0b03d1e6ecef", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "9bb0aa7c062a1ac3df0a73d1e7caa88937e9716e", "153506e97f5db120d28f0f4c726cbb5d751baa00", "069dc0767a8482388a152b0d1a5714051f68b6e2", "3cf0fcf1b729962798efef1035707e5b7136eb29", "2082a94be79311cfb3c73f02a88f005b38d1a424", "0c41fe23bc2ffb93ee3717fdbc30279bca2c0726", "2e38636cf04fd1fe0439440c7c0cc45f07bc1275", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "1a05ffa0307e641ba12b0c2dbd33f10f1def2437", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "7efe0dc4cde074bd87089491a6f95dde84397cca", "07c5da91f5a60765d563d01365bc3b15fa2f8e00", "03d4747ff2ce1b9f738cf71708e870fae9c13c1d", "08003ad503808a0955e05535480abda658ed887e", "13c19628ed35dc15296629b31fc6af37c75349e0", "4c4ecb120fa7fa0ae9713138b897be329f9e6d39", "0fbc532b40bfc80c84ba648e179e9494b9e3822a", "4ab877c692db2fc3ed155d4dad627664f3cc6dd5", "66e35b497d679dddefccc50005a5fd6986053667", "cfae4ef6ff083897fe9d00cfd370b0f668225cfa", "0a7151c200bf97973453ec05a28012cf03cf906b", "3b38dce1ad7e3f128525cee43d42545d3ec94ddf", "294a33d55f566724598311035d1a26dbe792c5fd", "31264b04c2f9b7b3b3de006cab089b91bc074868", "2cb739d7e00caa2cc9c34fe04e249aae93bc3a2d", "16c15dd84e52de5bdfda58468c2581c52b136b07", "402d90c9aec94d200a559d3400f4d13fcc1d322c", "2e037e25c0af281f6b699d238e79aa7074e9fe06", "044c01d55411994fdc6f6cf0544d6a1c2e1f75a1", "10a1afb817ba1cf8968b6de5addbee837f36fabd", "025c15b3f37a5a347a068341dfd964d93f06945e", "14a09e1fba9b74edba595260155ac7ebe9837e16", "254052dc9ea181817f6b19a88eb2125a018cd129", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "7709bd304d72d773bfc6dae2930f5e61095c144b", "069103feb2d2d3f1b0115b484d5c2f978a983df0", "3134ff9c464993f4bf309703c6aa315db40613ac", "2077cc18da002721390a23392ce4a25d19c3e2a2", "602f7d42b92d01675315a07a07bb4be6d423a25b", "11e7e02278725d09d7c6dd67482249453ad0e58e", "2a0de4299d4b8e7968546219b559f7c90cd5ff35", "08169ac66fd95edc443477415d01ea4cf1000f61", "06beeda7be321eb0a294af55b7689d22d77a5b2b" ], "paperAbstract": "Today, network operators are increasingly playing the role of part-time detectives: they must routinely diagnose intricate problems and malfunctions, e.g., routing or performance issues, and they must often perform forensic investigations of past misbehavior, e.g., intrusions or cybercrimes. However, the current Internet architecture offers little direct support for them. A variety of solutions have been proposed, but each solution tends to address only one specific problem. Moreover, each solution proposes a different fix that is incompatible with the others, which complicates deployment.\n In this paper, we make the observation that most of the existing solutions share a common \"functional core\", which suggests that it may be possible to add a single primitive to the Internet architecture that can support a wide variety of diagnostic and forensic tasks. We then present one specific candidate that we call secure packet provenance (SPP). We show that SPP is easy to add to the current architecture, that it can be implemented efficiently in both software and hardware, and that it can be used to approximate (and sometimes surpass) the capabilities offered by a variety of existing diagnostic and forensic systems.", "pdfUrls": [ "http://www.cis.upenn.edu/~angchen/papers/eurosys-2017.pdf", "http://www.seas.upenn.edu/~angchen/papers/eurosys-2017.pdf", "http://www.cis.upenn.edu/~ahae/papers/spp-eurosys2017.pdf", "http://doi.acm.org/10.1145/3064176.3064212" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4badfb2bda4c304d4e9aa1a5ed8aca649132b711", "sources": [ "DBLP" ], "title": "One Primitive to Diagnose Them All: Architectural Support for Internet Diagnostics", "venue": "EuroSys", "year": 2017 }, "4bae5718d7f7517515c46e525f2986b5557103c8": { "authors": [ { "ids": [ "3440038" ], "name": "Chengxi Zang" }, { "ids": [ "1685435" ], "name": "Peng Cui" }, { "ids": [ "1702392" ], "name": "Christos Faloutsos" }, { "ids": [ "9732730" ], "name": "Wenwu Zhu" } ], "doi": "10.1145/3097983.3098055", "doiUrl": "https://doi.org/10.1145/3097983.3098055", "entities": [ "Anomaly detection", "Cluster analysis", "Human dynamics", "Process modeling", "Social network", "Social system" ], "id": "4bae5718d7f7517515c46e525f2986b5557103c8", "inCitations": [], "journalName": "", "journalPages": "565-574", "journalVolume": "", "outCitations": [ "0314ae616425b1da2b7edf08f24c8fe8df2b1c79", "9aa9856bc652f8631bc01ca00906b5adb9bfff2e", "9fb10a3c2dcec939784ce208e0e7e7fda4be895c", "1558a06fb4f0473f76792e830b0b07c79f7decc0", "9e2227fbf260493a07db1b52b901f3d4a6761a35", "022122965312bc4e3525cecf0726551cb27b339f", "3266b454003601e517ae5d943f8c636505dab101", "372df26b92cf8e5ae3474d6f64d5954d6f7bc3fa", "267cf324f04f1993227f780af3555ea7d6c59c26", "1f0612de1f191abadf250b78cd78f884203cca5e", "7ef093ed2bf8b14195d4f18648dc805a89077366", "037c6a50a4a7cae1998d944ae2991c986731912a", "1b8abb3351226ebf254893b0685a0b87c94bccd9", "2089a031e9e0467e617af5286768e349ec276db0", "4a905ca67ea922040d98aaf644121a5ef4eca937", "075c9d6382f5511308d3e7f45921b4b86400d546", "f448dbee99628aad012ccc0dac5be8f7cd7ebbd3", "29cc0a8802126d4e97f28109763df26ab91c6531", "75b0f1e480ff7bf6526983fa6f9faabf83f1e157", "c08206b44dd1f0ea54bd073e4effaf2e4483169b", "e57f2945be3f85014a4fd2f6a7d67c174640f7e3", "07b12dd17878a9f1b5a7712c3741e6a3a7386692" ], "paperAbstract": "How do people make friends dynamically in social networks? What are the temporal patterns for an individual increasing its social connectivity? What are the basic mechanisms governing the formation of these temporal patterns? No matter cyber or physical social systems, their structure and dynamics are mainly driven by the connectivity dynamics of each individual. However, due to the lack of empirical data, little is known about the empirical dynamic patterns of social connectivity at microscopic level, let alone the regularities or models governing these microscopic dynamics.\n We examine the detailed growth process of \"WeChat\", the largest online social network in China, with 300 million users and 4.75 billion links spanning two years. We uncover a wide range of long-term power law growth and short-term bursty growth for the social connectivity of different users. We propose three key ingredients, namely average-effect, multiscale-effect and correlation-effect, which govern the observed growth patterns at microscopic level. As a result, we propose the long short memory process incorporating these ingredients, demonstrating that it successfully reproduces the complex growth patterns observed in the empirical data. By analyzing modeling parameters, we discover statistical regularities underlying the empirical growth dynamics. Our model and discoveries provide a foundation for the microscopic mechanisms of network growth dynamics, potentially leading to implications for prediction, clustering and outlier detection on human dynamics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098055" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4bae5718d7f7517515c46e525f2986b5557103c8", "sources": [ "DBLP" ], "title": "Long Short Memory Process: Modeling Growth Dynamics of Microscopic Social Connectivity", "venue": "KDD", "year": 2017 }, "4bbd9d77460a14a628119d05332360c5d78df8d3": { "authors": [ { "ids": [ "38962174" ], "name": "Aishwarya Ganesan" }, { "ids": [ "31817919" ], "name": "Ramnatthan Alagappan" }, { "ids": [ "1743175" ], "name": "Andrea C. Arpaci-Dusseau" }, { "ids": [ "1703415" ], "name": "Remzi H. Arpaci-Dusseau" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud storage", "Clustered file system", "Distributed computing", "Fault tolerance", "Next-generation network", "Redundancy (engineering)", "Software bug", "System Fault Tolerance", "Unavailability" ], "id": "4bbd9d77460a14a628119d05332360c5d78df8d3", "inCitations": [ "b2cdaead2767d790accf30ba4c7eb7e99804dc4d", "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d", "c426f6394eefb7e2e1b02928f6fa16afde125aa2", "87ca0d1d3f9fde3c93e4e7102fab0a133da31857", "e97372229adcf4c015fcf43b3dcf3b51ddc48f2e", "347e1352fb903b40dce606a1e581e9d601bc289c", "5802c2ecb6e2449d9d6ddb3cac902f7cb10eaa10" ], "journalName": "", "journalPages": "149-166", "journalVolume": "", "outCitations": [ "16f3275f76adb337de8b77f899f83fc1085d8f0c", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "3533159037bc2c11bde6b314e040ee113ae52bdd", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "58f692e9b03cb973355aab46bb6f867239aeb513", "8a7536f311d22bd588c5bc2306d54d13effaee82", "088e3e939ad234b6fdd0e321290fb26937dc2553", "229acac1bd70c57e6a17f2c24f153c06d54de252", "59250c7388caba98bd4adc2f1969fbec5500ed6a", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "e4d41e048722ece6c3efed7afc372319f6eed66d", "02fdca5fdba792e4f2c70b8b637abe4824343800", "407a55ea947f5f430e8def26c5f4183db0f53c3a", "108c840d5d1847948a2de0250490a327ae069ee6", "18fe996c6f43a8f301cd842507045b679ba3506a", "254171a0d89e34c73e800d6ec120842d2058b075", "bed6d0e530f20332c284a463c754ce1d304aca38", "086432132c177cc5e6d50a39a92cad540a162b40", "2f97a44c85b299485dacb1e6ad3ac6f4e1ba42ab", "67d07ba0550bbccba4ef34b409c9263b902de21c", "155ca30ef360d66af571eee47c7f60f300e154db", "4d3c779b5a224133bd5c69e05103fedbd904590a", "2a46b97a285acd23a5c1acdfb466c30b224169fc", "84fe12df86bfd0cc99dada158c94b4b71a433a52", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "0b4ada5b8bdabf4ee378b0992b2d3b70de0c07f7", "3e1dfc784252219f573e9540685ea2bc1666a850", "0ea1fee8651da646c5f2bdbbe83a58d05b6c5505", "42512431ca7fffdbc80eb7280d093efcead3d48d", "05dd6cb44124b8a210ac391f15ec25e68918ef22", "40a00e89195903fbaffb364fe410a215faf6715c", "c1a2e9ae8f1de1a2d7057fb7bf26a5b0567c67de", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "11b8ef5da9c8df214859bb41b60001a0abd2b5b2", "ab112b6ed1393736b7074032fdb1d81fcce4c955", "6a1df9dae902f3d377f9c85ba9732b8d2270bf2b", "8c52daf711d10f54b14b8993bd4e2585a7d28ceb", "1761d2c31b685b2f099d18ec3457da8fb38241bf", "3d1353c3ad9f641ac81edc70ae6f3a3198102b74", "a9d0bea76adbcaf114f267e4c72237c020a1f47a", "4108e4635351d6f2d0916ee19d0a0ef878649c3c", "2be26e8aa238ac37a80e08303f128d8014bb9f3b", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "1551df94e19afc9513ed81f54ec907e3f9da0278", "05dc4814248843389e8d2557e2d1f0c45d494e10" ], "paperAbstract": "We analyze how modern distributed storage systems behave in the presence of file-system faults such as data corruption and read and write errors. We characterize eight popular distributed storage systems and uncover numerous bugs related to file-system fault tolerance. We find that modern distributed systems do not consistently use redundancy to recover from file-system faults: a single file-system fault can cause catastrophic outcomes such as data loss, corruption, and unavailability. Our results have implications for the design of next generation fault-tolerant distributed and cloud storage systems.", "pdfUrls": [ "https://www.usenix.org/conference/fast17/technical-sessions/presentation/ganesan", "https://www.usenix.org/system/files/conference/fast17/fast17-ganesan.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-ganesan.pdf", "http://research.cs.wisc.edu/adsl/Publications/fast17-ganesan.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4bbd/9d77460a14a628119d05332360c5d78df8d3.pdf", "s2Url": "https://semanticscholar.org/paper/4bbd9d77460a14a628119d05332360c5d78df8d3", "sources": [ "DBLP" ], "title": "Redundancy Does Not Imply Fault Tolerance: Analysis of Distributed Storage Reactions to Single Errors and Corruptions", "venue": "FAST", "year": 2017 }, "4c12369fdef1ac723b66586c9759fc7e9fb91fb4": { "authors": [ { "ids": [ "2848828" ], "name": "Zhongjun Jin" }, { "ids": [ "1758386" ], "name": "Michael R. Anderson" }, { "ids": [ "1725561" ], "name": "Michael J. Cafarella" }, { "ids": [ "1735239" ], "name": "H. V. Jagadish" } ], "doi": "10.1145/3035918.3064034", "doiUrl": "https://doi.org/10.1145/3035918.3064034", "entities": [ "Whole Earth 'Lectronic Link" ], "id": "4c12369fdef1ac723b66586c9759fc7e9fb91fb4", "inCitations": [ "984766d01b3427168167785584f5cd91c8cd8ac2", "4e2a37c1554afccc30b9cabbed0cb3d7fde26a17", "9712068318cb9ada3c26c06f5d0352f96206e2c1", "2274f61d00020b0e596b61e113ed16f23f8c0403", "33de4502da805dd10769d2412fd04ba5ad7867f7", "72f3753ea7e5e81b9bde11a64ebe65bed9e0bcbd", "20f49d1100663f1049572910099cf172fb140a3c" ], "journalName": "", "journalPages": "683-698", "journalVolume": "", "outCitations": [ "b9addc8ce998f6892120c2c8b23ae183312bfa6c", "0a030c2142dec882fea5f33b3f562e04d66d287a", "0f7a7d9fbd8c898f0e56d91feb6acda2efaf006a", "1126ceee34acd741396c493c84d8b6072a18bfd7", "93048dc9441985260fdebaf3a9d2654696e98f87", "31a816f4fef768f29772a003e534b1378611bfe6", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "8584bc6fb5fe616d338d5ae3d20d4848572b5578", "156b07d3a2d8d744385f1e09ea49a04b09c612a5", "379b242fcc606c2a43278630a97430f750654896", "1ec7a6456958359132117635e12b682e39220b7c", "47c27cd2c37ba331ced0b24fba43fd917d5f6d19", "aee5234fa1f35155dc6021f6ff99ffb6d6262478", "fa73764d62c016bc405da3393d5edd3be0ddb18e", "515b9903cb55e548b6732e953a1bd51f457c6353", "96693c88cdcf0b721e4eff7f4f426bc90f90d601", "a1039f3239b2ac21c3e52e9b74e9b53f3327e306", "da171cd0eb8942e1c1d1ad38f4382d38035aa094", "079866b2ed52cc0a27b2ac96b1489dfd3e7b40df", "224eff578c8ca0265e1ca8f36be79592a9367de5", "7c889b839e99316f749c4d4bff45ccdd7dbd46ef", "4acb56cfcb1e346fe6f87e5ea2583e33945f38cf", "382e55821fbc7285395a33267e267f705e4a8d30", "16ac4b7138e54c7c12aef23851cbe4e4ebb735eb", "12af98b4fed2efc71fe850de3cbbd35191161967", "26b9001cce4a7f2e838ef99d0e7593b18553a7e0", "21b2e1056b7fedac3f2c61e563d19a1cc0784f81", "0282e990528c6a9b4aa92cc196f46257fb4ccee1", "99cc63730e3079ed58311a4ec88f4f0c891ed61d", "58b2550804f24d5bb0b8cfe6815623be8760ffe1", "1b00d8eab3ae0873f1fb693d172095736e4186ad", "208e7934d900055b43b8b60e4a807ac00674ec4a", "807b9c73800f59ac191c0a43c242ef79ba5ec253", "e7cdc8f4d58bdefe4577da1b14d96a5754a1bd3a", "01c5d324b2adab8a8017d48f3f7aed640cceb52e", "23dadf25f3efacbc9c66f69093d656ad5b003529", "238be0efe497fc297013ae16109fbbd2ee3d9733", "637d02a269d632dee7a97454d469a783dba01bf6", "2962b6e48aef7234b273d7849e63f5ac026d9a31", "3ff96aff948c8f07ae5b2ce0a64e04d61a85291a", "4928c7729d2ca4817529e3229769f1b856cdf80d" ], "paperAbstract": "Data transformation is a critical first step in modern data analysis: before any analysis can be done, data from a variety of sources must be wrangled into a uniform format that is amenable to the intended analysis and analytical software package. This data transformation task is tedious, time-consuming, and often requires programming skills beyond the expertise of data analysts. In this paper, we develop a technique to synthesize data transformation programs by example, reducing this burden by allowing the analyst to describe the transformation with a small input-output example pair, without being concerned with the transformation steps required to get there. We implemented our technique in a system, FOOFAH, that efficiently searches the space of possible data transformation operations to generate a program that will perform the desired transformation. We experimentally show that data transformation programs can be created quickly with FOOFAH for a wide variety of cases, with 60% less user effort than the well-known WRANGLER system.", "pdfUrls": [ "http://web.eecs.umich.edu/~michjc/papers/jin_foofah_sigmod17.pdf", "http://web.eecs.umich.edu/~mrander/pubs/foofah-full.pdf", "http://doi.acm.org/10.1145/3035918.3064034" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c12369fdef1ac723b66586c9759fc7e9fb91fb4", "sources": [ "DBLP" ], "title": "Foofah: Transforming Data By Example", "venue": "SIGMOD Conference", "year": 2017 }, "4c2274e0168f784b6fe84fbdcc76f824da370043": { "authors": [ { "ids": [ "2193560" ], "name": "Piji Li" }, { "ids": [ "2918971" ], "name": "Zihao Wang" }, { "ids": [ "2780667" ], "name": "Zhaochun Ren" }, { "ids": [ "1996394" ], "name": "Lidong Bing" }, { "ids": [ "1717078" ], "name": "Wai Lam" } ], "doi": "10.1145/3077136.3080822", "doiUrl": "https://doi.org/10.1145/3077136.3080822", "entities": [ "Artificial neural network", "Benchmark (computing)", "Deep learning", "E-commerce", "Experiment", "Latent variable", "Mobile app", "Numerical analysis", "Recommender system", "Recurrent neural network", "Simulation", "User experience", "User review" ], "id": "4c2274e0168f784b6fe84fbdcc76f824da370043", "inCitations": [ "c92403e2b6b1a9d4aa10d2ba0d87ad30cfa02153", "a483c62f661f57a883c06daa6ac25b7f80cad661", "590e75fdbb7006a7918bab1dfc8d2da881aea899", "7df6aa19f50c8ec5f12d58e0685ed5c6e9a08bb2", "23aa7a970673328dccea5d0708871bcbd0f237cf", "fe93121ff205bfd4dca834c2a8aa1fe6ec5649c5", "081ad92ce0e71541646218f11061c86414a960c2" ], "journalName": "", "journalPages": "345-354", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "2eb32b1a4c5bf741632a9fd5f852253fd0d53def", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "75cca03b03dc514f03c102ccfdd53a7c0af625fc", "50d53cc562225549457cbc782546bfbe1ac6f0cf", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "50b8e8d48f4973cdeefa835807b4e1a8ca65ced3", "4f6e61d2ab1e2f468cc4bc8fdd8d6f13efaba468", "6c02053805434162e0fed26e1d5e035eb1071249", "091aded505b84cf87c197875ccfde24d98a300c9", "4c3103164ae3d2e79c9e1d943d77b7dfdf609307", "09bf97f066bcfcf2cce2c3e9cc34509f8368765c", "8c22faf409fa5dd336711358b61368369b0edb70", "468b9055950c428b17f0bf2ff63fe48a6cb6c998", "119868d3a5862c514e64d7855b41ff9927c311d4", "44fca068eecce2203d111213e3691647914a3945", "c41d71217cebb1ef70ba16068d21df9f7be16b70", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "071b16f25117fb6133480c6259227d54fc2a5ea0", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "0b544dfe355a5070b60986319a3f51fb45d1348e", "6bf6a3fd2c4c17c4326b81424ce19aba0a4b9c42", "39afbfe64d83b17368948c6cb3567431580b2a29", "94a62f470aeea69af436e2dd0b54cd50eaaa4b23", "2f33457e932b83a9b9bf7b8c36b0fa362e731b29", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "0842636e2efd5a0c0f34ae88785af29612814e17", "2275762a28582716db92df6d525ed2481c7d7f14", "3eaf79589dbb9bce5a502e867a8f03917e52de26", "a09e359c3db51e99f7adcb5c6f4d2aea5166527d", "1510cf4b8abea80b9f352325ca4c132887de21a0", "7656cddac460af91919558175063938acfdd813b", "1e7d7f76b3a7b494122f40c22487e60a51a2d1be", "85899f17a9413c2d69cc1d2191274fe0db15ef20", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "255e97d82f528b613dbe8883727abfd14f3f9f39", "92eb167f30ad59f6949667021760eb41078cf85c", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "8729441d734782c3ed532a7d2d9611b438c0a09a" ], "paperAbstract": "Recently, some E-commerce sites launch a new interaction box called Tips on their mobile apps. Users can express their experience and feelings or provide suggestions using short texts typically several words or one sentence. In essence, writing some tips and giving a numerical rating are two facets of a user's product assessment action, expressing the user experience and feelings. Jointly modeling these two facets is helpful for designing a better recommendation system. While some existing models integrate text information such as item specifications or user reviews into user and item latent factors for improving the rating prediction, no existing works consider tips for improving recommendation quality. We propose a deep learning based framework named NRT which can simultaneously predict precise ratings and generate abstractive tips with good linguistic quality simulating user experience and feelings. For abstractive tips generation, gated recurrent neural networks are employed to \"translate'' user and item latent representations into a concise sentence. Extensive experiments on benchmark datasets from different domains show that NRT achieves significant improvements over the state-of-the-art methods. Moreover, the generated tips can vividly predict the user experience and feelings.", "pdfUrls": [ "http://arxiv.org/abs/1708.00154", "http://lipiji.com/docs/li2017neural.pdf", "http://doi.acm.org/10.1145/3077136.3080822", "https://arxiv.org/pdf/1708.00154v1.pdf", "http://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/p345-li.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c2274e0168f784b6fe84fbdcc76f824da370043", "sources": [ "DBLP" ], "title": "Neural Rating Regression with Abstractive Tips Generation for Recommendation", "venue": "SIGIR", "year": 2017 }, "4c3643f80836a1375931851576087b2d26a64509": { "authors": [ { "ids": [ "32187076" ], "name": "Eduardo Moscoso Rubino" }, { "ids": [ "34647067" ], "name": "Alberto Jose Alvares" }, { "ids": [ "3318269" ], "name": "Raul Marin Prades" }, { "ids": [ "34580655" ], "name": "Pedro Sanz Valero" } ], "doi": "10.1109/ICPP.2017.64", "doiUrl": "https://doi.org/10.1109/ICPP.2017.64", "entities": [ "Algorithm", "Compare-and-swap", "Discrete wavelet transform", "In-place algorithm", "Lifting scheme", "Linearizability", "Lock (computer science)", "Memory bound function", "Memory bus", "Naivety", "Parallel algorithm", "Programming language", "SIMD", "SWAP (instrument)", "The C Programming Language", "Thread (computing)", "Time complexity", "Wavelet", "Wavelet transform" ], "id": "4c3643f80836a1375931851576087b2d26a64509", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "553-562", "journalVolume": "", "outCitations": [ "2423b45c1f5d581876b42d2305bd6dfbc0946f18", "bcda257e69c206c06a70efacc2de09c969adfb47", "04f28b47a2e10bcd648e4f3d35d48dd1a0ad6a23", "934e7b6798c0fcc94186868f3aab3d2511a2e72c", "2b7cb9ba44fcda84da7886eb6850c071596ab839", "0f5cbceaaed97ba2e71497849e026cd4ade6a439", "3c96e515fc1e292c5af563a1df1c6f69f052b19f", "efa162a4ae6ee95142e59dbdab2099b469af6bb3", "e01d717f10984f47a83860aaffc168f48aa2d181", "6bbf642a820e11dfa2494646180e48b63817c953", "3e8135a6d7642a4016ccb8937413f54bd08d470a", "99aa609e71f61d6651b10f5f256fe5a40a574a56", "03defbfdff55208e549f29f3724f1fe11da5b1b9", "b06bb5402fd284a2036946480392e4f9c0f96598", "3f7e44509b39ad5b33fbe5185346a845f7eb426e", "ee20fca6a54149bc33687cfcfedae2f48b350544", "7e7bc1fcb395b18cbf9197e7895ae0ef68b784a5" ], "paperAbstract": "A novel efficient inplace, multithreaded, and cachefriendly parallel 2-D wavelet transform algorithm based on the lifting transform is introduced. In order to maximize the cache utilization and consequently minimize the memory bus bandwidth use, the threads compete to work on a small memory area maximizing the chance of finding it in the cache and their synchronization is done with very low overhead without the use of any locks and relying solely on the basic compare-and-swap (CAS) atomic primitive. An implementation in the C programming language with and without the use of vector (single instruction multiple data - SIMD) instructions is provided for both single (serial) and multi (parallel) threaded single-loop DWT implementations as well as serial and parallel naive implementations using linear (row order) and strided (column order) memory access patterns for comparison. Results show a significant improvement over the single-threaded optimized implementation and a much greater improvement over both the single and multi threaded naive implementations, reaching minimum running time depending on the number of processor cores and the available memory bus bandwidth, i.e., it becomes memory bound using the minimum number of memory accesses. Given the simplicity and high speed of the lifting steps, an analysis based on the number of memory bus operations (read and write) is done for images that are larger than twice the shared cache size which establishes a lower bound for the running time of all linear memory access algorithms and also determines the maximum speed gains to be expected in relation to currently implemented parallel schemes based on the parallel execution of independent lifting steps. It also shows the optimality of the parallel algorithm presented. Finally, a comparison with currently available implementations shows the gains achieved by the proposed algorithm.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.64" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c3643f80836a1375931851576087b2d26a64509", "sources": [ "DBLP" ], "title": "A Novel Minimum Time Parallel 2-D Discrete Wavelet Transform Algorithm for General Purpose Processors", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "4c3ec58f03b6ade10712361c17602be37b3613be": { "authors": [ { "ids": [ "2799608" ], "name": "Elliott Slaughter" }, { "ids": [ "2408735" ], "name": "Wonchan Lee" }, { "ids": [ "2402978" ], "name": "Sean Treichler" }, { "ids": [ "2151884" ], "name": "Wen Zhang" }, { "ids": [ "1756761" ], "name": "Michael Bauer" }, { "ids": [ "3057817" ], "name": "Galen M. Shipman" }, { "ids": [ "34694816" ], "name": "Patrick S. McCormick" }, { "ids": [ "4689402" ], "name": "Alexander Aiken" } ], "doi": "10.1145/3126908.3126949", "doiUrl": "https://doi.org/10.1145/3126908.3126949", "entities": [ "Compiler", "Distributed memory", "Implicit parallelism", "Parallel computing", "Programmer", "Programming language", "Programming model", "SPMD", "Scalability", "Speedup" ], "id": "4c3ec58f03b6ade10712361c17602be37b3613be", "inCitations": [ "199cf3fcbe03a62385ebb8ec10a521f7e0407073" ], "journalName": "", "journalPages": "14:1-14:12", "journalVolume": "", "outCitations": [ "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "03076602830c34ab1a3e70277e015e868c980136", "622b0470df42aa478c79991f2fd396e9ffa98ab6", "58dc88d6484fa37035eb1486b7d8d460c4c89e04", "024abe25f1dc77f18672f44c1c020be3e57cc3fe", "0ac7e127033f1534bb2042461c653784dcf29b55", "aec32310f6223ee32ef089fccb4e52f617b8bf37", "7d76ba8c4f6776c645673e2c3f6eb88b1a0ca7aa", "d802b277131892015086030c8c8354cc69f37e5b", "a7b25948f4f04e869a4aa8281ffbeee556b91643", "1f04c0378a78b0829d1faa76344ab39ec5dd3b3d", "137930adb403ff870aeda37d509e94958a68af37", "c7707cdffe019147f56bc391d5204a2793b95a77", "223e592891817714daffabed1104477b6ed8ca5d", "210e3d0418b1cc4f6ecf8fcfcf0f754cb65c1305", "2194c3460ab71f3826db00b045b2ae590c753319", "73bf064ce3156572ce7909c2a1553f1fc4d08e35", "17a85f0c967cd290d18dd368daff1ce27535cf08", "5d9d5c3ff08e6a36ff208d8b355d35cf3241f8f8", "515ecdf55b4f1d15ec8df93a4efd58dcd288aaf5", "5453ba227a7ee93a3efb580626dbc1719ab99478", "0541d5338adc48276b3b8cd3a141d799e2d40150", "5dea147b41ccefa69eee8c32ec0f4e830a799f3e", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "8dc0e040477ee5c22d6a9ecfd0277e6eec46de44", "7e40209617935569a12a104c354eabf029a3b537", "5f838a12393ebc6b55be1b7a9f534711c32f5e67", "9e490640f84581bd8a63b785e16ebdd2649a32be", "0cbb62679982bce62a6fe6963bc0399fb35aeaa0", "2ea7c705d0d19ca72e7fb6aae07f63a988a68d53", "04b46e1c535f2e3d1b743fc9571bb856be7e79af", "b3d85a1a12ea16cef5132183f1b939e8661aba52", "2042b469be68653afcb2b7b38490c16369b4501a", "0aec5a3c39d95da1fac6c57ae0f3a2def5ce5cd3", "82b1fb7f2ec23346eec33db680576fb563400f49", "754b7668bda19339ad60d56c0edb0149aab96baa", "bceb53f3971fad428bc6c690428f3341944b97ff" ], "paperAbstract": "We present control replication, a technique for generating high-performance and scalable SPMD code from implicitly parallel programs. In contrast to traditional parallel programming models that require the programmer to explicitly manage threads and the communication and synchronization between them, implicitly parallel programs have sequential execution semantics and naturally avoid the pitfalls of explicitly parallel code. However, without optimizations to distribute control overhead, scalability is often poor.\n Performance on distributed-memory machines is especially sensitive to communication and synchronization in the program, and thus optimizations for these machines require an intimate understanding of a program's memory accesses. Control replication achieves particularly effective and predictable results by leveraging language support for first-class data partitioning in the source programming model. We evaluate an implementation of control replication for Regent and show that it achieves up to 99% parallel efficiency at 1024 nodes with absolute performance comparable to hand-written MPI(+X) codes.", "pdfUrls": [ "http://legion.stanford.edu/pdfs/cr2017.pdf", "http://doi.acm.org/10.1145/3126908.3126949" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c3ec58f03b6ade10712361c17602be37b3613be", "sources": [ "DBLP" ], "title": "Control replication: compiling implicit parallelism to efficient SPMD with logical regions", "venue": "SC", "year": 2017 }, "4c458c029ed8d1b117223d529dc84bdd4d3b2680": { "authors": [ { "ids": [ "1702236" ], "name": "Andrea E. F. Clementi" }, { "ids": [ "3223655" ], "name": "Luciano Gual\u00e0" }, { "ids": [ "1805060" ], "name": "Guido Proietti" }, { "ids": [ "7742326" ], "name": "Giacomo Scornavacca" } ], "doi": "10.1109/IPDPS.2017.67", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.67", "entities": [ "Best, worst and average case", "Broadcasting (networking)", "Color", "Consensus (computer science)", "Fairness measure", "Gossip protocol", "Ramsey's theorem", "Rational agent", "Strong prime", "With high probability" ], "id": "4c458c029ed8d1b117223d529dc84bdd4d3b2680", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "163-171", "journalVolume": "", "outCitations": [ "343821c80e4b1d3f566e3e25c31a5ce2d7c71d1b", "253cfd9f0e1bf657b1e7f4361491a02044b482f4", "241afc601cb41d336b5e66a8a5a2cf35c6512882", "081e614534e6f7ae2ab0ea4d771eb7490528da7d", "4859412ed75a521213054fd4aff5264dc1ccdbdc", "f8fe0791bd87a44e6c663325043c0fcc70c843bb", "3045ab5550d9a5d1cd30f37a0547b956f570f14c", "46a3a5f40b927726f575be4fa8ce058a81a28c54", "f407af624b127af27e00e9321dbb4c0f31ed4a78", "06add5c025f2e5877bc4b064160dbd208bde876c", "83373b291736be8280caef7b5fa8ae07b6affa44", "10b44b914a35142eb7c1cff7a33e5527715561ee", "b06f887b880c45e9fd2cfd85a3f28cff7013f0cd", "1bbdc09011f78bb7b4648bf51ebc9e57ea53d6a5", "b65e7a34b0bab3f293eb26985a87d81bbced3311", "0c9215e0a103dc78dd8d14337c7caf0ed6a1b395", "4358ec9334eb3c313046114cb83f5f86063c1df6", "00dcfc1be4b5a9d2bcf35167798aab6933cbe571" ], "paperAbstract": "The rational fair consensus problem can be informally defined as follows. Consider a network of n (selfish) rational agents, each of them initially supporting a color chosen from a finite set Σ. The goal is to design a protocol that leads the network to a stable monochromatic configuration (i.e. a consensus) such that the probability thatthe winning color is c is equal to the fraction of the agents that initially support c, for any c Σ Σ. Furthermore, this fairness property must be guaranteed (with high probability) even in presence of any fixed coalition of rational agents that may deviate fromthe protocol in order to increase the winning probability of their supported colors. A protocol having this property, in presence of coalitions of size at most t, is said to be a whp\\,-t-strong equilibrium. We investigate, for the first time, the rational fair consensus problem in the Gossip communication model where, at everyround, every agent can actively contact at most one neighbor via a push/pull operation. We provide a randomized Gossip protocol that, starting from any initial color configuration of the complete graph, achieves rational fair consensus within O(log n) rounds using messages of O(log2 n) size, w.h.p. More in details, we prove that our protocol is a whpt-strong equilibrium for any t = o(n/log n) and, moreover, it tolerates worst-case permanent faults provided that the number of non-faulty agents is Ω(n). As far as we know, our protocol is the first solution which avoids any all-to-all communication, thus resulting in o(n2) message complexity.", "pdfUrls": [ "http://arxiv.org/abs/1705.09566", "https://doi.org/10.1109/IPDPS.2017.67", "https://arxiv.org/pdf/1705.09566v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c458c029ed8d1b117223d529dc84bdd4d3b2680", "sources": [ "DBLP" ], "title": "Rational Fair Consensus in the Gossip Model", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4c49270fefd4d359e3ee76e1a6bccb94283d51ff": { "authors": [ { "ids": [ "2993481" ], "name": "Srikanth Sundaresan" }, { "ids": [ "3041577" ], "name": "Xiaohong Deng" }, { "ids": [ "15171947" ], "name": "Yun Feng" }, { "ids": [ "6197185" ], "name": "Danny Lee" }, { "ids": [ "1718655" ], "name": "Amogh Dhamdhere" } ], "doi": "10.1145/3131365.3131382", "doiUrl": "https://doi.org/10.1145/3131365.3131382", "entities": [ "Call stack", "Crowdsourcing", "End-to-end principle", "Internet", "Network congestion", "Software deployment", "Throughput", "Tomography", "Web content" ], "id": "4c49270fefd4d359e3ee76e1a6bccb94283d51ff", "inCitations": [ "f8f7c2fb6c7ca2af10f94de9fba92fdad8601cf9", "06fd348d9388abfd880d3f207e3664e3180857cb" ], "journalName": "", "journalPages": "43-56", "journalVolume": "", "outCitations": [ "d09aa6f9d2df3e20441f80914947e6aae60a016b", "9d080843bd5da6b8b1fd776640989629368d5f6c", "b3afefa8e89adda9112724015142e99daeabf9e9", "31264b04c2f9b7b3b3de006cab089b91bc074868", "0d8937cad20f57a0453111691efe0ddbc90604b3", "06fd348d9388abfd880d3f207e3664e3180857cb", "70969bbc88f608b7c8b43da3ecdfcb021dd6a9cc", "1f453bfdfa2c2889cfcf21f647041314f7b69e04", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "0d9203c57fc06953402c40d9fe7b7db1a853fe71", "752cfbabe82028ec68660c43460f69ceb63d33cd", "bde33924af7cb40e29675408870b53a3bd3b36c2", "3407a106c79137134c511bd00b70086abff09fa2", "8290f19dda6d04c4696efbae9b14a794bacbf09a" ], "paperAbstract": "We revisit the use of crowdsourced throughput measurements to infer and localize congestion on end-to-end paths, with particular focus on points of interconnections between ISPs. We analyze three challenges with this approach. First, accurately identifying which link on the path is congested requires fine-grained network tomography techniques not supported by existing throughput measurement platforms. Coarse-grained network tomography can perform this link identification under certain topological conditions, but we show that these conditions do not always hold on the global Internet. Second, existing measurement platforms provide limited visibility of paths to popular web content sources, and only capture a small fraction of interconnections between ISPs. Third, crowdsourcing measurements inherently risks sample bias: using measurements from volunteers across the Internet leads to uneven distribution of samples across time of day, access link speeds, and home network conditions. Finally, it is not clear how large a drop in throughput to interpret as evidence of congestion. We investigate these challenges in detail, and offer guidelines for deployment of measurement infrastructure, strategies, and technologies that can address empirical gaps in our understanding of congestion on the Internet.", "pdfUrls": [ "http://www.caida.org/publications/papers/2017/challenges_inferring_internet_congestion/challenges_inferring_internet_congestion.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final100.pdf", "http://doi.acm.org/10.1145/3131365.3131382" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c49270fefd4d359e3ee76e1a6bccb94283d51ff", "sources": [ "DBLP" ], "title": "Challenges in inferring internet congestion using throughput measurements", "venue": "IMC", "year": 2017 }, "4c4f3ff4a59260080ba6d6859911f317a0fff177": { "authors": [ { "ids": [ "3260361" ], "name": "Yan Shoshitaishvili" }, { "ids": [ "2405785" ], "name": "Michael Weissbacher" }, { "ids": [ "7215706" ], "name": "Lukas Dresel" }, { "ids": [ "3425110" ], "name": "Christopher Salls" }, { "ids": [ "8199136" ], "name": "Ruoyu Wang" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "10.1145/3133956.3134105", "doiUrl": "https://doi.org/10.1145/3133956.3134105", "entities": [ "Application security", "Autonomous car", "Autonomous system (Internet)", "Broadcast automation", "DARPA Grand Challenge", "Human factors and ergonomics", "Mission critical", "Our World", "Programming paradigm", "Software system" ], "id": "4c4f3ff4a59260080ba6d6859911f317a0fff177", "inCitations": [ "b1400438b4822d59a64fba31d0dc590306418ac3" ], "journalName": "", "journalPages": "347-362", "journalVolume": "", "outCitations": [ "12d8d777e6e55044801a441849f602cdde919eb2", "0b7f62a2ac217e035e0cd9cb73d2de4fb6135af5", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "21a9f6d4f3194ad5f2ad20998a4e81d398e36874", "2a05a30beb7500192798370016039607d4a6b799", "0cffc09e3fd3c8c1569df766c391ba3afb96c208", "991f9040922b0bc1dd713304861f09a013d62dc6", "54a2e154c614453f678374621fd2d41d6ea90b7c", "a23cef5b9c04bc7ee0d090fe5937d44820595192", "4f736e861ae0b5a3a989dc786106f725824f31ec", "c271535aa3e9a5cc7839543667017cb32ec9b94c", "93f8e010f7c7307804ef7b70695b7dc4b35db52d", "12789fd5b47542937d1b83ef8b99bdb9c7a70dec", "5556995fb630c47805bbba560287ea59ce357fa1", "32de6a22689e041c643a596a62530cc1c14e1bd4", "3c99a6197d5de6a452c887b66f53efa1101cf20c", "6f45152ce34b4326fc0adfb7d7b6587b13d0a62c", "7e92eed33cdd74b64f5f8f038c6bd98a03cede87", "8d0a150bd390ba3f9f32f7b12cda58edf436aa3f", "19f9cccd47ac99d167eebfec5937e95138d2aed8", "8bb37d8ae52b33cefa70c73befc64e3ca79cded7", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "51e53b7148cf7387d90f3048f14f721367e283f5", "6cf8ec34a008031b018c8a3a4640a87f476d0925", "84c0c0ad1bb08bb4d1b000708a92851a6ec2b660" ], "paperAbstract": "Software permeates every aspect of our world, from our homes to the infrastructure that provides mission-critical services.\n As the size and complexity of software systems increase, the number and sophistication of software security flaws increase as well. The analysis of these flaws began as a manual approach, but it soon became apparent that a manual approach alone cannot scale, and that tools were necessary to assist human experts in this task, resulting in a number of techniques and approaches that automated certain aspects of the vulnerability analysis process.\n Recently, DARPA carried out the Cyber Grand Challenge, a competition among autonomous vulnerability analysis systems designed to push the tool-assisted human-centered paradigm into the territory of complete automation, with the hope that, by removing the human factor, the analysis would be able to scale to new heights. However, when the autonomous systems were pitted against human experts it became clear that certain tasks, albeit simple, could not be carried out by an autonomous system, as they require an understanding of the logic of the application under analysis.\n Based on this observation, we propose a shift in the vulnerability analysis paradigm, from tool-assisted human-centered to human-assisted tool-centered. In this paradigm, the automated system orchestrates the vulnerability analysis process, and leverages humans (with different levels of expertise) to perform well-defined sub-tasks, whose results are integrated in the analysis. As a result, it is possible to scale the analysis to a larger number of programs, and, at the same time, optimize the use of expensive human resources.\n In this paper, we detail our design for a human-assisted automated vulnerability analysis system, describe its implementation atop an open-sourced autonomous vulnerability analysis system that participated in the Cyber Grand Challenge, and evaluate and discuss the significant improvements that non-expert human assistance can offer to automated analysis approaches.", "pdfUrls": [ "http://www.cs.ucsb.edu/~vigna/publications/2017_CCS_HaCRS.pdf", "https://arxiv.org/pdf/1708.02749v1.pdf", "http://arxiv.org/abs/1708.02749", "http://doi.acm.org/10.1145/3133956.3134105", "http://sefcom.asu.edu/publications/rise-of-the-hacrs-ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c4f3ff4a59260080ba6d6859911f317a0fff177", "sources": [ "DBLP" ], "title": "Rise of the HaCRS: Augmenting Autonomous Cyber Reasoning Systems with Human Assistance", "venue": "CCS", "year": 2017 }, "4c54a8ee4b99125a52a3a3389972c7af1f3016ae": { "authors": [ { "ids": [ "3388493" ], "name": "Salessawi Ferede Yitbarek" }, { "ids": [ "31685956" ], "name": "Misiker Tadesse Aga" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" }, { "ids": [ "1769314" ], "name": "Todd M. Austin" } ], "doi": "10.1109/HPCA.2017.10", "doiUrl": "https://doi.org/10.1109/HPCA.2017.10", "entities": [ "Central processing unit", "Cipher", "Cold boot attack", "DIMM", "Disk encryption", "Dynamic random-access memory", "Emergence", "Encryption", "Non-volatile memory", "Overhead (computing)", "Power supply", "Reboot (computing)", "Replay attack", "Salted Challenge Response Authentication Mechanism", "Scrambler", "Signal integrity", "Skylake (microarchitecture)", "Stream cipher", "Vector (malware)" ], "id": "4c54a8ee4b99125a52a3a3389972c7af1f3016ae", "inCitations": [ "9d0c7a61c47d0db3181408ffdde5f140a5e07c0f" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "313-324", "journalVolume": "", "outCitations": [ "b2c005f5e9df9a3aa0f9f082d1cd26720b14cc51", "e4d1401b660e22e35327af0586d10f77575492c8", "f68b1c27574fc26b9427e2c4b8d1bc4ed02d3360", "054bca7f2fa00c3d55f0e028b37513bebb9c4ea5", "06bf0862b9b3465f895ef5bf3530cfe66a428e22", "19f7caf88ba1e30eb85bdab58b092e46b1a054c0", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "08c2649dee7ba1ab46106425a854ca3af869c2f0", "2835808d700c88459ff21ce31ba3c4ef02778ddb", "20b63210954f7c5a70664f301dcd7196856ccfa7", "27d6270a8e8ec19790924d2139c793407fc1ff41", "7d4b873a8e89ee7ef16a0e776ac30a9b45a9b394", "ed6ed456ec184fccf4a9cf6a2364f4500aa8ddd7", "07272e31fb957e026a6bc36d55e412de26843c7f", "0a289fd7b14345822b1acda6d82750b15d59663e", "64b71ec1e51cd1d4582beb06ce0767dcecd5dc2f", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "9687ea801aeb7cbeef3be3202d68cc9d780e02dc", "b02403d3239a6d6e78911192f4f82ce987a78944", "6114ee81e8440dd0bc6801053fb8c7c717a0b521" ], "paperAbstract": "Previous work has demonstrated that systems with unencrypted DRAM interfaces are susceptible to cold boot attacks – where the DRAM in a system is frozen to give it sufficient retention time and is then re-read after reboot, or is transferred to an attacker's machine for extracting sensitive data. This method has been shown to be an effective attack vector for extracting disk encryption keys out of locked devices. However, most modern systems incorporate some form of data scrambling into their DRAM interfaces making cold boot attacks challenging. While first added as a measure to improve signal integrity and reduce power supply noise, these scram-blers today serve the added purpose of obscuring the DRAM contents. It has previously been shown that scrambled DDR3 systems do not provide meaningful protection against cold boot attacks. In this paper, we investigate the enhancements that have been introduced in DDR4 memory scramblers in the 6th generation Intel Core (Skylake) processors. We then present an attack that demonstrates these enhanced DDR4 scramblers still do not provide sufficient protection against cold boot attacks. We detail a proof-of-concept attack that extracts memory resident AES keys, including disk encryption keys. The limitations of memory scramblers we point out in this paper motivate the need for strong yet low-overhead full-memory encryption schemes. Existing schemes such as Intel's SGX can effectively prevent such attacks, but have overheads that may not be acceptable for performance-sensitive applications. However, it is possible to deploy a memory encryption scheme that has zero performance overhead by forgoing integrity checking and replay attack protections afforded by Intel SGX. To that end, we present analyses that confirm modern stream ciphers such as ChaCha8 are sufficiently fast that it is now possible to completely overlap keystream generation with DRAM row buffer access latency, thereby enabling the creation of strongly encrypted DRAMs with zero exposed latency. Adopting such low-overhead measures in future generation of products can effectively shut down cold boot attacks in systems where the overhead of existing memory encryption schemes is unacceptable. Furthermore, the emergence of non-volatile DIMMs that fit into DDR4 buses is going to exacerbate the risk of cold boot attacks. Hence, strong full memory encryption is going to be even more crucial on such systems.", "pdfUrls": [ "https://www.eecs.umich.edu/eecs/about/articles/2017/HPCA17-coldboot.pdf", "https://doi.org/10.1109/HPCA.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c54a8ee4b99125a52a3a3389972c7af1f3016ae", "sources": [ "DBLP" ], "title": "Cold Boot Attacks are Still Hot: Security Analysis of Memory Scramblers in Modern Processors", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "4c5e7a3c12ddc4cbdade447667a00e461905528a": { "authors": [ { "ids": [ "3246450" ], "name": "L. Elisa Celis" }, { "ids": [ "2025970" ], "name": "Peter Krafft" }, { "ids": [ "1810064" ], "name": "Nisheeth K. Vishnoi" } ], "doi": "10.1145/3087801.3087820", "doiUrl": "https://doi.org/10.1145/3087801.3087820", "entities": [ "Heuristic", "Mathematical optimization", "Population dynamics", "Reinforcement learning", "Skolem normal form", "Stochastic process" ], "id": "4c5e7a3c12ddc4cbdade447667a00e461905528a", "inCitations": [ "2883ebf89f62972dcfad22ff8a2bf94082f65063" ], "journalName": "", "journalPages": "441-450", "journalVolume": "", "outCitations": [ "0a9c6db5676cf04fa760d498522a6c71a85a4dba", "027af00776ecb82cad685d93b36073aebed35b73", "b50e429252a5c3135977000c67f977ba222a8c59", "0381647f826e9865edb6c2c1754f8ec54ff21163", "1d6abcc133a2dde1d9cd1f3e0ea973ac94f4235c", "679ce18f6655d3f94d038901eb4fd2dd4d5a6f37", "5853453f7549402e0fc56a9ee0694baa3928d1f8", "75fb27ae0495c4a318d27e71fa7fb838e1e5cf58", "27deee900eadd2803c407336f9cf553427e4ea32", "269bc6f89cf334c7667e7ff15536776e149991b0", "8c89c019c88f7d529b5a7df1185375ddbd6cc486", "09c99bf83096eb4704283111a45fdb1bdd54d829", "1e73f6f927997b7f86606a77ff9795d77948a53d", "5f0e251752b5ee6a904a8b10c2a9f67b66244dc0", "ac2e640a89f321cffe77f518ab86f039277bc5b2", "45b26196169f568abd7e47f723dc8bc21a1e9a2c", "2d94b78403b89002e6b6846c7317f169448e92b8", "2bab6c5220111d9f0b70540dabdd0316062e9411", "61e2e06431c6245f3bd4f8668517044f6f459a75", "af151c0e0c7eb5762ef57395ff980e024a4acdd1", "23a4971ea1a47a827df398cbabc26e0e05132203", "52792bbbe9383b33af6916005547a93228e3524c", "2fc412851fce5b3d05a59fe8bc44b2fbccb80914", "4a437f424d469ee1af9b415509e6287d02eae7ae", "47f40f2345fbb2e17e0efdadf984967cb71e9df7", "16fce2945793d0842a21000f15b60d3c74e2f8da", "384ba4e423a7b51f85a8f97de367ccbf9fea1ea2", "57e61a469e280360b86524765b96447e34a071f1", "6583f945520e48a6f3c25a2b7a564a79aac1befe", "7541d17dfab78485d7b894afc31c63e464a423bf", "dbd80760f1da9a0e3afd20420763c82c59846924", "893bb2d6a150ef04c06b143ab1854af5b863a730", "245dce914e257f12391b0170025374323693acc7", "bea764470272185190e4bd841b0772c6a17f7223", "3b0d96ae2dedbe88ae13eaba040a080a1c769ecf", "0dfd538d6eb4b670bf03547cb62c62c4ae3314a4", "0325bf680d2c871ac40fd31157d0c0befa1420eb", "28df3f3d406879ecfa2034d9f1bd69bc3e52dcd2", "930b1b411516d6dd6399962875cd5d89c33f9da2", "1e3bf21de47038c66678eedb51961c34ffeaa0ce" ], "paperAbstract": "We study a distributed learning process observed in human groups and other social animals. This learning process appears in settings in which each individual in a group is trying to decide over time, in a distributed manner, which option to select among a shared set of options. Specifically, we consider a stochastic dynamics in a group in which every individual selects an option in the following two-step process: (1) select a random individual and observe the option that individual chose in the previous time step, and (2) adopt that option if its stochastic quality was good at that time step. Various instantiations of such distributed learning appear in nature, and have also been studied in the social science literature. From the perspective of an individual, an attractive feature of this learning process is that it is a simple heuristic that requires extremely limited computational capacities. But what does it mean for the group \u2013 could such a simple, distributed and essentially memoryless process lead the group as a whole to perform optimally? We show that the answer to this question is yes \u2013 this distributed learning is highly effective at identifying the best option and is close to optimal for the group overall. Our analysis also gives quantitative bounds that show fast convergence of these stochastic dynamics. We prove our result by first defining a (stochastic) infinite population version of these distributed learning dynamics and then combining its strong convergence properties along with its relation to the finite population dynamics. Prior to our work the only theoretical work related to such learning dynamics has been either in deterministic special cases or in the asymptotic setting. Finally, we observe that our infinite population dynamics is a stochastic variant of the classic multiplicative weights update (MWU) method. Consequently, we arrive at the following interesting converse: the learning dynamics on a finite population considered here can be viewed as a novel distributed and low-memory implementation of the classic MWU method. \u2217This research was supported in part by an SNF Project Grant (205121 163385).", "pdfUrls": [ "https://arxiv.org/pdf/1705.03414v1.pdf", "http://doi.acm.org/10.1145/3087801.3087820", "http://arxiv.org/abs/1705.03414", "https://export.arxiv.org/pdf/1705.03414" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4c5e/7a3c12ddc4cbdade447667a00e461905528a.pdf", "s2Url": "https://semanticscholar.org/paper/4c5e7a3c12ddc4cbdade447667a00e461905528a", "sources": [ "DBLP" ], "title": "A Distributed Learning Dynamics in Social Groups", "venue": "PODC", "year": 2017 }, "4c6869e2f11121f23ce66439ea1aa7bbb95036ad": { "authors": [ { "ids": [ "2444533" ], "name": "Pierre-Fran\u00e7ois Dutot" }, { "ids": [ "3262245" ], "name": "Yiannis Georgiou" }, { "ids": [ "2247963" ], "name": "David Glesser" }, { "ids": [ "3080619" ], "name": "Laurent Lef\u00e8vre" }, { "ids": [ "38785057" ], "name": "Millian Poquet" }, { "ids": [ "8197772" ], "name": "Issam Ra\u00efs" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "FLOPS", "Job scheduler", "Scheduling (computing)", "Shutdown (computing)", "Simulation" ], "id": "4c6869e2f11121f23ce66439ea1aa7bbb95036ad", "inCitations": [ "204ed869f69468d2c88ff64f67300d810f686c1a" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "381-390", "journalVolume": "", "outCitations": [ "e0140bbf28f231ccddb639b299ba515f1f5f0e15", "2951408162adf00a4aa989b813f69454af7b348e", "62f7e49f77ca13b7690ce0106235063f6b0771d1", "1fe28a5b5ec16120de1fdc2f832fecb05ca5ec1f", "4dbc1467275f8a3152bab09b92fb42072cbbab23", "46c7d0bbf7731950aadf4a2f19bff7f9d80f0a02", "f6ab527a5919b48b66908954a3086947c5bffde6", "b94d6bb4506dbb02244467f989b8aa1f06389988", "006152fe58da5c8db1a922f8805fe8f4f0af2204", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "81c4e99059104b00adc14f6797758aff998c066d", "07c8dc1238106ed94d5357b72e4bfebd256f162f", "0f44833eb9047158221e7b3128cde1347b58ccd6", "9d742984f04f2d04ec8a765aaba143abfb41ccac", "6049062a3a73d22c914e7fa8951b3b0e5f09b309", "b56daafeb36e1c19180f401924a6f9009940efe9", "79430b96d82f35a4deffae7792ef2ab99c792013", "85a7352a5f69ccf62b8e27c47f345f75092f4fb8", "3eaf9714931e8f59919d46efe4f9f1fb8fa492e1", "02d3d91f16330740cfb104f61f9aaf5a5dd6a69e", "642f72cdee8f3e9a5275e47cad844e1c54b57b83", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "25d6e0889c35730c415933dd57a69fffea7aecec", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "31aeea96898c7591f90953ff5992f0619a0c53cc", "1e8233a8c8271c3278f1b84bed368145c0034a35", "ae75883eb178a1549e6a5212630caff0a1810f21" ], "paperAbstract": "Energy consumption has become one of the mostcritical issues in the evolution of High Performance Computingsystems (HPC). Controlling the energy consumption of HPCplatforms is not only a way to control the cost but also a stepforward on the road towards exaflops. Powercapping is a widelystudied technique that guarantees that the platform will notexceed a certain power threshold instantaneously but it givesno flexibility to adapt job scheduling to a longer term energybudget control. We propose a job scheduling mechanism that extends thebackfilling algorithm to become energy-aware. Simultaneously, we adapt resource management with a node shutdown technique to minimize energy consumption whenever needed. Thiscombination enables an efficient energy consumption budgetcontrol on a cluster during a period of time. The technique isexperimented, validated and compared with various alternativesthrough extensive simulations. Experimentation results show highsystem utilization and limited bounded slowdown along withinteresting outcomes in energy efficiency while respecting anenergy budget during a particular time period.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101165", "https://hal.archives-ouvertes.fr/hal-01533417/file/towards_energy_budget_control_in_hpc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c6869e2f11121f23ce66439ea1aa7bbb95036ad", "sources": [ "DBLP" ], "title": "Towards Energy Budget Control in HPC", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "4c6f373112a192c929ff512a954091d4a0e35b48": { "authors": [ { "ids": [ "4901573" ], "name": "Hoang Anh Dau" }, { "ids": [ "1732516" ], "name": "Eamonn J. Keogh" } ], "doi": "10.1145/3097983.3097993", "doiUrl": "https://doi.org/10.1145/3097983.3097993", "entities": [ "Algorithm", "Data mining", "Motif", "Robotics", "Scalability", "Sequence motif", "Time series" ], "id": "4c6f373112a192c929ff512a954091d4a0e35b48", "inCitations": [ "c8eefbf3f66d1ebbc78a4c7264227f4bda7918d5" ], "journalName": "", "journalPages": "125-134", "journalVolume": "", "outCitations": [ "233f140a1d7748c990b628ee6997c687add98e0a", "835ac0e309fc774d33c566609b8849b813dbf897", "d8febf6ddd72e66f8d1b6cb5441543b7ea4fd9d9", "2409557812a3d26258949ba73a05031591f42bdc", "23026fd456fdd44f116a42bb0dbacfd48c303217", "4e29f2f3b724815a461a2b36b2084e608f76ac8e", "887a246dae18b8e9a65826bea88983e4f7bf5270", "a262b8d6ff37c9d97d5235ecea7b5e43b1704a71", "778d7c23e24832f3a947e36d6eaadb1c6b610e67", "9147dfcabee869e7217ef2841df9a21337230e65", "6468cb864ce9ec18c25d86dd5e07cc4f285272b6", "0946c4a2f3da843895355056f435610f2ba8398f", "0bfdd7fb60bb4959ac38fadca7dcfbf549dd5456", "a46b204cf37eb9fe95d3e27a2c64203c90567fa3", "9b60975837beb2800ec510c62035700e2aa754d9", "9667adfc8f4195a4fe31d3127878e6d72799acbe", "e13cbb0fd4f7cb26c1c2ed6abd2a09ec9a0cfdae", "29a183494372d07fddbe33f829960e5b24ebff85", "b3ddb92b59c95bd2b10aef95b133293e30161cec", "070186ee6691eb1cdba76ecc3078173e9139b460", "61523cfe6f51859e00aa8ce320114c03151208fa" ], "paperAbstract": "Time series motif discovery has emerged as perhaps the most used primitive for time series data mining, and has seen applications to domains as diverse as robotics, medicine and climatology. There has been recent significant progress on the scalability of motif discovery. However, we believe that the current definitions of motif discovery are limited, and can create a mismatch between the user's intent/expectations, and the motif discovery search outcomes. In this work, we explain the reasons behind these issues, and introduce a novel and general framework to address them. Our ideas can be used with current state-of-the-art algorithms with virtually no time or space overhead, and are fast enough to allow real-time interaction and hypotheses testing on massive datasets. We demonstrate the utility of our ideas on domains as diverse as seismology and epileptic seizure monitoring.", "pdfUrls": [ "http://www.cs.ucr.edu/~hdau001/guided_motif_search/guided_motif_search_in_print.pdf", "http://www.cs.ucr.edu/~eamonn/guided-motif-KDD17-new-format-10-pages-v005.pdf", "http://doi.acm.org/10.1145/3097983.3097993" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c6f373112a192c929ff512a954091d4a0e35b48", "sources": [ "DBLP" ], "title": "Matrix Profile V: A Generic Technique to Incorporate Domain Knowledge into Motif Discovery", "venue": "KDD", "year": 2017 }, "4c80aa6bf6e10e4d8d9b1e07404800bcd017f38b": { "authors": [ { "ids": [ "1726883" ], "name": "Vaspol Ruamviboonsuk" }, { "ids": [ "2846332" ], "name": "Ravi Netravali" }, { "ids": [ "2724958" ], "name": "Muhammed Uluyol" }, { "ids": [ "1764637" ], "name": "Harsha V. Madhyastha" } ], "doi": "10.1145/3098822.3098851", "doiUrl": "https://doi.org/10.1145/3098822.3098851", "entities": [ "Central processing unit", "Load (computing)", "Loader (computing)", "Mobile device", "Proxy server", "Scheduling (computing)", "Web page", "Web server" ], "id": "4c80aa6bf6e10e4d8d9b1e07404800bcd017f38b", "inCitations": [ "0457c76af0aa3d0586e3fdd6ece7ea6fda65b7da" ], "journalName": "", "journalPages": "390-403", "journalVolume": "", "outCitations": [ "005c65dfa582b03bb6d9bbab863f935c86c5c052", "3f7d260551e442e3c0453e0bd9db30b86afa286f", "1793930fb533c17e3bfac398554b78a6421efc25", "4d7e5e430bec3db4044b13ce8da7411f09c745f3", "03d83007173d5bdd55e5b2894a53352b52e9201e", "430cd2b1c08aa86bb4aef152ee2ca764c5342c3e", "925f8085efaac07756a0e47f9ac61f00d332b06f", "e9e86208d47369436d296c19eae4f418e2e6c7b6", "0cdc02c712420ef2f5d6f472f7b247e554e1bd14", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "21581c40cabfbc9b3038d257bed8e96b954cc303", "6566d98a370ee01ad78c12ec4471bb5ffbe7a8ab", "e68b642704709bf9622d7aad526a57e61b8a5c8d", "103eef2be0295f4a26e0d5043c95b9a5c8323975", "22bb280dffb450f6f65798c529309770a853aee3", "262a010c2607e0e7ebf103b0a7f48f963019594d", "cf64cdc889a4edaf641a307aa2b11d89d4d10a09", "9dff90933f8f57b12226c1c1a604e493f7ed89d6", "16d0a8ee484f4a34e1cdcda8a0c2453e2e962ada", "0b6ea07d2d7ea0f95969f9e223d362c2e6aa79b4", "143481d55d9f9d25e53f06a6afaf15feb7430c62", "5fad167381681bb5a53b64311ca0faea550eb4e4", "1497e55f921c8132be9fd67a0aca648f3379378f", "1aaea3bf77dfa69605cf7d243fc6a8255d11aae9", "08bb5149cc215c0714492b407145bbc93006f44c" ], "paperAbstract": "The existing slowness of the web on mobile devices frustrates users and hurts the revenue of website providers. Prior studies have attributed high page load times to dependencies within the page load process: network latency in fetching a resource delays its processing, which in turn delays when dependent resources can be discovered and fetched.\n To securely address the impact that these dependencies have on page load times, we present Vroom, a rethink of how clients and servers interact to facilitate web page loads. Unlike existing solutions, which require clients to either trust proxy servers or discover all the resources on any page themselves, Vroom's key characteristics are that clients fetch every resource directly from the domain that hosts it but web servers aid clients in discovering resources. Input from web servers decouples a client's processing of resources from its fetching of resources, thereby enabling independent use of both the CPU and the network. As a result, Vroom reduces the median page load time by more than 5 seconds across popular News and Sports sites. To enable these benefits, our contributions lie in making web servers capable of accurately aiding clients in resource discovery and judiciously scheduling a client's receipt of resources.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098851", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-9-3-vroom.pdf", "http://web.mit.edu/ravinet/www/vroom.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4c80aa6bf6e10e4d8d9b1e07404800bcd017f38b", "sources": [ "DBLP" ], "title": "Vroom: Accelerating the Mobile Web with Server-Aided Dependency Resolution", "venue": "SIGCOMM", "year": 2017 }, "4cd0ec7315fc6df64a98e9aa07c2abee0a6f8ec3": { "authors": [ { "ids": [ "2585046" ], "name": "Marcelo Amaral" }, { "ids": [ "40522552" ], "name": "Jord\u00e0 Polo" }, { "ids": [ "1727718" ], "name": "David Carrera" }, { "ids": [ "2457630" ], "name": "Seetharami R. Seelam" }, { "ids": [ "1697111" ], "name": "Malgorzata Steinder" } ], "doi": "10.1145/3126908.3126933", "doiUrl": "https://doi.org/10.1145/3126908.3126933", "entities": [ "Algorithm", "Autonomous car", "Central processing unit", "Cloud computing", "Deep learning", "Graphics processing unit", "Interference (communication)", "Internet of things", "Jumpstart Our Business Startups Act", "Requirement", "Run time (program lifecycle phase)", "Scheduling (computing)", "Simulation" ], "id": "4cd0ec7315fc6df64a98e9aa07c2abee0a6f8ec3", "inCitations": [ "18a16984f7a2f0f400dc1fc345ef1065b439dc72" ], "journalName": "", "journalPages": "17:1-17:12", "journalVolume": "", "outCitations": [ "1ecd36058e48734213c81728f42ff798a2c52833", "33e82ac2571ec0902aaec1a3e9e375dae79894b3", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "6b17827b6e2563f10b53b8b37c95f5af5415c556", "2cb6ad011857f6a8673309953b580c835978d1d9", "a74921772e1ba4357e72d645e3f96c108d9e3425", "1999881614aed9295f4359cf4761926bc23fcd82", "3000e77ed7282d9fb27216f3e862a3769119d89e", "c6c180f9565c5a2262cea5c1d6e10479cc876382", "528b74c398cbb277436c94adb75f61662f4f3c18", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "d37f8b27d517bd0d0a190cfe89fa21b23138e3df", "6e0684230dd2b436417e71731692baacd1c29dd1", "99ef5dbd87c0796854e72acc9f52116cd8d79b46", "65250c893b60e86360352d239842e6c37967b2fb", "ea462af4003c20080528cd0da15c6b9447405d2f", "4d2c90c888769cceb8c3fba28f93ac16cef46137", "8d477da280141c1596d416ac7321c90c78b16f50", "6349f4e8078e974d1c38f7817f406fc6f43e5d06", "8681e808a9ebd7f7f155590e75fb63563a8aae6e", "a692159161a461839a4ddc19020be50fa68db65d", "094910b1340fd1cd7776bb6635e004ca650195ac", "550960fc696179dde6bc387ef5209c54fc327d31", "1ec9ed042fd2914d21a11b7115b855c82d4d593f", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "357c28cac5b8ffa1928d834557909ef6d6b9a2a7", "06ea5b69223b1ca3a8cf115ef01268f37136ce2b", "e2c114e80363db8f555282d09f1acf50cbc0bdeb", "490d862480cf30949dce90e832aa292c498ac768", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113" ], "paperAbstract": "Recent advances in hardware, such as systems with multiple GPUs and their availability in the cloud, are enabling deep learning in various domains including health care, autonomous vehicles, and Internet of Things. Multi-GPU systems exhibit complex connectivity among GPUs and between GPUs and CPUs. Workload schedulers must consider hardware topology and workload communication requirements in order to allocate CPU and GPU resources for optimal execution time and improved utilization in shared cloud environments.\n This paper presents a new topology-aware workload placement strategy to schedule deep learning jobs on multi-GPU systems. The placement strategy is evaluated with a prototype on a Power8 machine with Tesla P100 cards, showing speedups of up to ≈1.30x compared to state-of-the-art strategies; the proposed algorithm achieves this result by allocating GPUs that satisfy workload requirements while preventing interference. Additionally, a large-scale simulation shows that the proposed strategy provides higher resource utilization and performance in cloud systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126933" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4cd0ec7315fc6df64a98e9aa07c2abee0a6f8ec3", "sources": [ "DBLP" ], "title": "Topology-aware GPU scheduling for learning workloads in cloud environments", "venue": "SC", "year": 2017 }, "4cdae463ae3caa9a72ef7d82bc796e3c5e810707": { "authors": [ { "ids": [ "2546746" ], "name": "Chenxi Qiu" }, { "ids": [ "37217705" ], "name": "Haiying Shen" } ], "doi": "10.1109/ICPP.2017.40", "doiUrl": "https://doi.org/10.1109/ICPP.2017.40", "entities": [ "Centralisation", "Interference (communication)", "Link-state routing protocol", "NP-hardness", "Rayleigh fading", "Scheduling (computing)", "Throughput" ], "id": "4cdae463ae3caa9a72ef7d82bc796e3c5e810707", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "312-321", "journalVolume": "", "outCitations": [ "edd01707a38d2bfba8f9f37390f0cacba6d45bfb", "148ce04e1d9e0c63c66060e61a3e2d17bdf1a61c", "18346404ba2f60425c90e32053069c1ff5848979", "34a2bbdbb45b70e815bf5bca064415694b33a943", "1089d3b42ab6f3e2f55ce02b0ff99c3935ada343", "2e176f96e1ce63ee700ffba93f6f5921b5c1c1cf", "5cf19c5a44cc51472aaa4a276768ce196290b48c", "3ae6263431f5ee663607fdab0910b0d0b83f8b7d", "5e5e8cc87a15b79f4e0280a6cfbe7a464fa552ad", "0a8404e1d6566c81c65bfda8acb6b0032ffd2a2c", "1e6635ba4d91974611fbb29b95e868429a01e47f", "9e9c500305b7b82dc23270d189fe0053895033d2", "58312673832dcce0b01cf4b8110d7bcf03b52ebd", "46f081dcc7f0602721a8da000fb67b789f1e0ed8", "a438253fe5c8c4e89e17e96215859cc54e53ca0f", "1071b6da3c4ed39030fe351fd39ca2501ad49f1d", "80897f58d39e7f459c2274c3ee675a3c8bdeaab0", "f4701f9e65ed95d2e1983b3ae1424ae525559779", "35a3ae7e4502de60c0eec6544443b54d35edd47e", "6715bd83646409b1ad8f6d6374b41a3748f6fd29", "57873dea816f80713ca1133e3533125bf2c4aa50" ], "paperAbstract": "In this paper, we study the link scheduling problem considering the fluctuating fading effect in transmissions. We extend the previous deterministic physical interference model to the Rayleigh-fading model that uses the stochastic propagation to address fading effects. Based on this model, we formulate a problem called Fading-Resistant Link Scheduling (Fading-R-LS) problem, which aims to maximize the throughput of all links in a single time slot. We prove that this problem is NP-hard. Based on the geometric structure of Fading-R-LS, we then propose two centralized schemes with O(g(L)) and O(1) performance guarantee, respectively, where g(L) is the number of magnitudes of transmission link lengths. Our experimental results show that the superior performance of our proposed schemes compared to previous schemes.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4cdae463ae3caa9a72ef7d82bc796e3c5e810707", "sources": [ "DBLP" ], "title": "Fading-Resistant Link Scheduling in Wireless Networks", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "4cdd5876d4c8043608a29ef8742c0532936a4904": { "authors": [ { "ids": [ "34098513" ], "name": "Benoit Morel" }, { "ids": [ "3357808" ], "name": "Tom\u00e1s Flouri" }, { "ids": [ "1717153" ], "name": "Alexandros Stamatakis" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.11", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.11", "entities": [ "Algorithm", "Computation", "Heuristic", "Load balancing (computing)", "Open-source software", "Parallel computing", "Phylogenetic tree", "Phylogenetics", "Randomized algorithm", "Run time (program lifecycle phase)", "Supercomputer", "Synthetic data", "Worst-case scenario" ], "id": "4cdd5876d4c8043608a29ef8742c0532936a4904", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "81-88", "journalVolume": "", "outCitations": [ "6a69a99d6789da7b8d0d2818bc7d6412609939cc", "5413d5ae4c9810efa60551ec6275ff1d56616748", "313468e83a69d0ad811bd8a7fc62c07d4c421cce", "35a44e092b182da91abb504eb1d880476458a659", "465ecac8eddd2c362f0e10de2583a62a1a7c371a", "4506a30af78e19f8a9c36948321d58fa29a33e83", "effc2f77618854928963f761090b6598a50b8c6e", "005065444f506076db2ff1eaacc64316f1d0b30c", "02ebe2f857df21e34c5dd573c96b8e75b9c49322", "528eb41205793bf3dd9ef236b5f9b1bf90c34f64", "bd3e0af2afa2b57839acdfc3a68a30483be8f6ff", "498bb6d4bbff79b97695cd65b03a787bf8c4388a", "66c889f52b904c5f190fcc25255091d49ec8f38e", "0b8c257e5da99635e4b5f0dbc7da1ae6fd229248", "47e2b178b00fd44a1f44bf182952ddc90f5ef5ec", "1a4a1b3fff0ecc78a6e991f5bbd22adc69aa56bf" ], "paperAbstract": "Continuous advances in molecular sequencing technologies now allow for inferring evolutionary trees (phylogenies) on supercomputers that comprise hundreds to thousands of species at the whole-transcriptome or whole-genome level. The phylogenetic likelihood function (PLF) consumes 90-95% of total execution time in such analyses and is therefore typically parallelized. Recently, the site repeats (SR) technique for substantially accelerating the PLF has been introduced. It identifies repeating patterns in parts of the likelihood computation and omits the respective redundant calculations to save time and space. However, the SR technique induces a parallel load imbalance. In this paper, we introduce a novel randomized data distribution algorithm to improve load balance (RDDA) for SR-based likelihood calculations. The algorithm is available as open-source code, induces minimal run-time overhead, and yields up to 25% run time improvements on empirical datasets and up to 50% for a synthetic, worst-case scenario. This improvement is substantial as current evolutionary data analyses may require tens of millions of core hours on supercomputer systems.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4cdd5876d4c8043608a29ef8742c0532936a4904", "sources": [ "DBLP" ], "title": "A Novel Heuristic for Data Distribution in Massively Parallel Phylogenetic Inference Using Site Repeats", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4ce5d8853dc8bc06df447396c01c8d81ef7f0517": { "authors": [ { "ids": [ "32009512" ], "name": "Mauro Ianni" }, { "ids": [ "39222438" ], "name": "Alessandro Pellegrini" }, { "ids": [ "1714807" ], "name": "Francesco Quaglia" } ], "doi": "10.1109/CLUSTER.2017.84", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.84", "entities": [ "64-bit computing", "Algorithm", "Multi-core processor", "Non-blocking algorithm", "Read-modify-write", "Scalability" ], "id": "4ce5d8853dc8bc06df447396c01c8d81ef7f0517", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "188-192", "journalVolume": "", "outCitations": [ "045a975c1753724b3a0780673ee92b37b9827be6", "aadcde924675f5940a8dd2ba9019ac9a0ad0fe6d", "0270a2b35f745f2ed17fbbac950e8086ee9aa1d6", "e6dabf97dd1d75c5eb4e3377c111b1b7124de6bc", "363b85f61630ebdc1194a59816ad950bf305c40a", "f3c3d7a65df7ee1f34ae6ac72e56b701f8ec11f0", "c1f6e5112b7a601e890c95d73491fe6071392b3d", "b8f530312698a4d2053815b2d5a25d6cee5933c3", "42142c121b2dbe48d55e81c2ce198a5639645030", "be28afba9d5a3c666b3750f2c7f77c9d676d5ac1", "3afd1b2d4bbc8401e55af04d485ec7429aca27c1", "83a601e5cd6b8d8577a8f505ad6f72f4a3714463", "60b007717bd544d7eb1f083f178787bef9d7f970", "2094223316e9164e4d7cc8f5f9facae335dd1d73" ], "paperAbstract": "We present a multi-word atomic (1,N) register for multi-core machines exploiting Read-Modify-Write (RMW) instructions to coordinate the writer and the readers in a wait-free manner. Our proposal, called Anonymous Readers Counting (ARC), enables large-scale data sharing by admitting up to 2^{32}-2 concurrent readers on off-the-shelf 64-bit machines, as opposed to the most advanced RMW-based approach which is limited to 58 readers. Further, ARC avoids multiple copies of the register content while accessing it—this affects classical register's algorithms based on atomic read/write operations on single words. Thus, ARC allows for higher scalability with respect to the register size.", "pdfUrls": [ "https://arxiv.org/pdf/1707.07478v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.84", "http://arxiv.org/abs/1707.07478" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ce5d8853dc8bc06df447396c01c8d81ef7f0517", "sources": [ "DBLP" ], "title": "A Wait-Free Multi-word Atomic (1,N) Register for Large-Scale Data Sharing on Multi-core Machines", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "4ce8ad1513e84cb464efa68827119295530ebaa4": { "authors": [ { "ids": [ "7456408" ], "name": "Supreeth Shastri" }, { "ids": [ "1733607" ], "name": "David Irwin" } ], "doi": "10.1145/3127479.3132017", "doiUrl": "https://doi.org/10.1145/3127479.3132017", "entities": [ "Fault tolerance", "Frequency-hopping spread spectrum", "Java HotSpot Virtual Machine", "Virtual machine" ], "id": "4ce8ad1513e84cb464efa68827119295530ebaa4", "inCitations": [ "21d2fe357a178d36a50398b05e0046b7b500b109" ], "journalName": "", "journalPages": "493-505", "journalVolume": "", "outCitations": [ "21c24920914a8d781ffb43d08ba8f0d916968007", "4f86fa28602d9503a8575c5b31082284abc8415c", "1901c2280e74b331ec766b26b2af0cf0f648b619", "79f29592dd5cfe0b1df8c967bf49ce06dde9e521", "34e5c5482d1381972eea4e705bcf38051de7a045", "613cdadb56592f704349bb25a359ebecd8fd9e0f", "5edb4dd1952a63707f1ff73db5e507c21bb962f8", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "5c6086766fd09dda4de4e8a4fe4ac6bea42157c7", "3d90fde9ced995e1ad3ffb9de26e3b45e90ad1fa", "d608a95490b02839fdf71a412aab46ad20a70596", "05be0db01d70bcce9530b462ab2368f9e15127d9", "985b0a763d3ae1aa2cd2752167f85ce079cfebb9", "094910b1340fd1cd7776bb6635e004ca650195ac", "530b3179e8532e87520ccd0daebda3d81ef6319b", "ec5f0f8d5b7176cb2e88271ec948f935ea0346a2", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "884d6646d610359f4040d6ec444eba8101b0555c", "6111f1a9ab657910f5a11a95de117b3c5181565a", "118c97ed0ff45bcbda0040d2acb8615a13c2d5fb", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "c7992b82d1e16849727a7d0add13f43162595d20", "3a03957218eda9094858087538e9668ab0db503b", "70e38d47b83261e257bae61dc39ffbf391b30591", "39ab382d8be340a9818d5ef7ea22674ec1f5d048", "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "7e74ea151efcdcfecffdbeaec0728f9ac1f80389", "94859f850f345629c23526e1155aa9deb1852491", "96d40ea825ee21617b24732ad956f9b7307ea254", "1da8852aa591d82f6dab3d93c8aba923e69a45d4", "120ade88aecba9157eb1ab7bc0464a0215c46ccc" ], "paperAbstract": "Cloud spot markets offer virtual machines (VMs) for a dynamic price that is much lower than the fixed price of on-demand VMs. In exchange, spot VMs expose applications to multiple forms of risk, including price risk, or the risk that a VM's price will increase relative to others. Since spot prices vary continuously across hundreds of different types of VMs, flexible applications can mitigate price risk by moving to the VM that currently offers the lowest cost. To enable this flexibility, we present HotSpot, a resource container that \"hops\" VMs---by dynamically selecting and self-migrating to new VMs---as spot prices change. HotSpot containers define a migration policy that lowers cost by determining when to hop VMs based on the transaction costs (from vacating a VM early and briefly double paying for it) and benefits (the expected cost savings). As a side effect of migrating to minimize cost, HotSpot is also able to reduce the number of revocations without degrading performance. HotSpot is simple and transparent: since it operates at the systems-level on each host VM, users need only run an HotSpot-enabled VM image to use it. We implement a HotSpot prototype on EC2, and evaluate it using job traces from a production Google cluster. We then compare HotSpot to using on-demand VMs and spot VMs (with and without fault-tolerance) in EC2, and show that it is able to lower cost and reduce the number of revocations without degrading performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132017", "http://www.ecs.umass.edu/~irwin/hotspot.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ce8ad1513e84cb464efa68827119295530ebaa4", "sources": [ "DBLP" ], "title": "HotSpot: automated server hopping in cloud spot markets", "venue": "SoCC", "year": 2017 }, "4cecfaad1e78b10e85eb33c5a22170e761867d2a": { "authors": [ { "ids": [ "3028943" ], "name": "Reto Achermann" }, { "ids": [ "3215585" ], "name": "Chris I. Dalton" }, { "ids": [ "1789618" ], "name": "Paolo Faraboschi" }, { "ids": [ "38961209" ], "name": "Moritz Hoffmann" }, { "ids": [ "1712405" ], "name": "Dejan S. Milojicic" }, { "ids": [ "2594412" ], "name": "Geoffrey Ndu" }, { "ids": [ "38512473" ], "name": "Alex Richardson" }, { "ids": [ "1734886" ], "name": "Timothy Roscoe" }, { "ids": [ "15789797" ], "name": "Adrian L. Shaw" }, { "ids": [ "2750619" ], "name": "Robert N. M. Watson" } ], "doi": "10.1145/3102980.3103000", "doiUrl": "https://doi.org/10.1145/3102980.3103000", "entities": [ "Computer", "Emergence", "Memory management unit", "Memory protection", "Non-volatile memory", "Scalability", "Spaces" ], "id": "4cecfaad1e78b10e85eb33c5a22170e761867d2a", "inCitations": [], "journalName": "", "journalPages": "118-124", "journalVolume": "", "outCitations": [ "240937d8d2b34a2f8007a858016422e1cc3d1442", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "866bce77ca5201d182c0c43090eb75bf126efba6", "09de90384bacfdd82e4503dc155ab6868f953eb3", "ff00f6674ee0ba30659825ee0af2ed093df3de3c", "2e67ad7f8e4eec2fc7f59eced0a57bb912e2d4da", "70b4132f2be9a588f86687d319a159cdcf71ad95", "595e54938ca4481cf80d03e00a90be6823aeeef6", "4cbaac7455ac02f2b5d4d266d3dc6788ee56cc83", "6d114169d5a02ff6fb62151ea54564a6be5e72ff", "aaeb37b9eba724271965a67d15ad86ba0fed04ed", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "567fbe38b1e63d3e718527b3ea9918440dd703ad", "0c1c1a86d2c702adf3ccd6659910195255d27533", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "83474f5a4514b7af4eab64065a245727955c4b46", "40bd73bd534bfac66cc6051187544365d089bdc2", "3b9b2cc9c0ce79aa3995a9b65f4a05c57bcb4efc", "eab3b28bebdc202d9c5e2354731bebadf0872aac", "28236cff92291fd6380bb82875675cbaeb9575d5", "0657eb7e069c2c2c7cae6636704e0f7fb3bcd9fc", "07529c6cac9427efab237cd20f7aa54237e74511", "8b4392aab6a003a7f97cb54bba9bd8c158a0794f", "1104fc428e140ea1a216240ba393d4556d478d45" ], "paperAbstract": "It is time to reconsider memory protection. The emergence of large non-volatile main memories, scalable interconnects, and rack-scale computers running large numbers of small \"micro services\" creates significant challenges for memory protection based solely on MMU mechanisms. Central to this is a tension between protection and translation: optimizing for translation performance often comes with a cost in protection flexibility.\n We argue that a key-based memory protection scheme, complementary to but separate from regular page-level translation, is a better match for this new world. We present MaKC, a new architecture which combines two levels of capability-based protection to scale fine-grained memory protection at both user and kernel level to large numbers of protection domains without compromising efficiency at scale or ease of revocation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103000", "http://people.inf.ethz.ch/troscoe/pubs/achermann_hotos_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4cecfaad1e78b10e85eb33c5a22170e761867d2a", "sources": [ "DBLP" ], "title": "Separating Translation from Protection in Address Spaces with Dynamic Remapping", "venue": "HotOS", "year": 2017 }, "4d0f00b4b8287c74f790f0156815471e6182fbaa": { "authors": [ { "ids": [ "3665875" ], "name": "Yi Tay" }, { "ids": [ "22209930" ], "name": "Minh C. Phan" }, { "ids": [ "1755919" ], "name": "Anh Tuan Luu" }, { "ids": [ "1716044" ], "name": "Siu Cheung Hui" } ], "doi": "10.1145/3077136.3080790", "doiUrl": "https://doi.org/10.1145/3077136.3080790", "entities": [ "Benchmark (computing)", "Deep learning", "End-to-end principle", "Experiment", "Feature engineering", "Holography", "Learning to rank", "Long short-term memory", "Question answering", "Scalability", "Semantic matching" ], "id": "4d0f00b4b8287c74f790f0156815471e6182fbaa", "inCitations": [ "d84dd93072bbc7a6377dc1a8efab6a5cb98a7bbf", "c0ba1fb8e6f6b839017a8f2cdb6f2ba72ac241ed", "cc16d675c8643c6b918ef67dbd7e001d92a2d630", "c92403e2b6b1a9d4aa10d2ba0d87ad30cfa02153", "a9d6499b2cc97d8074fdb87466fcfa7589d051a0", "b4aee872a72ef48776a7e9b4e6bcc53dfcfdb58f", "5bbe05cbe5ec5411dceedc170af973fd5d62f8f8", "e57617042a472302061ab9dd120443bddaf0ab00", "5b8c127cd01ee237c45c2841ac78cd69b48a87c3" ], "journalName": "", "journalPages": "695-704", "journalVolume": "", "outCitations": [ "1e22ed1cbec0dabd19164fa2543c824a1a14fd20", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "a27e243d2ef62644e7a2a1fa51878fe7dbca4479", "07f3f736d90125cb2b04e7408782af411c67dd5a", "a584211768d49f80192f13b8ed2fda9c058dec34", "669441a336c4fb82dfd08c5b7e50b49f08bf869b", "2ad0f706990031a5955e7c641e351d1eb6ea0c93", "272216c1f097706721096669d85b2843c23fa77d", "07a9478e87a8304fc3267fa16e83e9f3bbd98b27", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "118be9cf31ecf5cbdd49a19da8615b593ec61a4c", "d2071c1e4a6030dc0005dbfeefdd196a8b293e84", "a62b58c267fddfa06545a7fc63a3c62ef7dc9e15", "564427596799f7967c91934966cd3c6bd31cb06d", "22ae02d81c21cb90b0de071550cfb99e6a623e62", "7a9c4c98e361f3b4f4bfbeea7e6699917ce42091", "8488adaa261f9a3be7e03acc2693ae4397b0eecd", "e5d9bd94a65f3ed497a4d36e089b1393b0e5520d", "6812fb9ef1c2dad497684a9020d8292041a639ff", "bb75de5280ff9b0bbdd74633b9887d10fbe0ae10", "3e393df4a5731fb7b49cf2f527fed1ee4e6e6942", "0ca50da53eaf9dc939e126a06975654f9531c600", "d48fc0f1df6b349076329cf3b7e5d164c730bb0f", "a97bfc553687b021c48937f1d788dc38dc11c2e7", "50d53cc562225549457cbc782546bfbe1ac6f0cf", "b44ff5104557fc9b900d48f02605dd42d794927d", "507da0578b471650585fe948f9e8cd781800aeeb", "d25595b82a8347be68317a74560b574583c3c2e0", "65ad0e876216ea034b7958f016456e32666bc5c6", "0f32cd9cf29ec55b9a54dd5a4adf80b03e0ac08d", "e3796f39fe2623823a5d48dee2822da9502561c5", "03feca688989940fd4a5a13ac8837403b1af5e25", "828dbeb7cf922dc9b6657dd169b8d26d2b58eedb" ], "paperAbstract": "We describe a new deep learning architecture for learning to rank question answer pairs. Our approach extends the long short-term memory (LSTM) network with holographic composition to model the relationship between question and answer representations. As opposed to the neural tensor layer that has been adopted recently, the holographic composition provides the benefits of scalable and rich representational learning approach without incurring huge parameter costs. Overall, we present Holographic Dual LSTM (HD-LSTM), a unified architecture for both deep sentence modeling and semantic matching. Essentially, our model is trained end-to-end whereby the parameters of the LSTM are optimized in a way that best explains the correlation between question and answer representations. In addition, our proposed deep learning architecture requires no extensive feature engineering. Via extensive experiments, we show that HD-LSTM outperforms many other neural architectures on two popular benchmark QA datasets. Empirical studies confirm the effectiveness of holographic composition over the neural tensor layer.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080790", "http://arxiv.org/abs/1707.06372", "https://arxiv.org/pdf/1707.06372v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d0f00b4b8287c74f790f0156815471e6182fbaa", "sources": [ "DBLP" ], "title": "Learning to Rank Question Answer Pairs with Holographic Dual LSTM Architecture", "venue": "SIGIR", "year": 2017 }, "4d17ad72e7ec8159064662a70f8356c039537b86": { "authors": [ { "ids": [ "10762356" ], "name": "Victor Junqiu Wei" }, { "ids": [ "1723745" ], "name": "Raymond Chi-Wing Wong" }, { "ids": [ "1709267" ], "name": "Cheng Long" }, { "ids": [ "1709509" ], "name": "David M. Mount" } ], "doi": "10.1145/3035918.3064038", "doiUrl": "https://doi.org/10.1145/3035918.3064038", "entities": [ "Algorithm", "Computer graphics", "Data mining", "Distance oracle", "Feature vector", "Global Positioning System", "Oracle Database", "Point of interest", "Range query (data structures)" ], "id": "4d17ad72e7ec8159064662a70f8356c039537b86", "inCitations": [], "journalName": "", "journalPages": "1211-1226", "journalVolume": "", "outCitations": [ "951a09c6e31a17d4b83a908372d0a642f6b6a9ee", "31181e73befea410e25de462eccd0e74ba8fea0b", "ab74affabf77921f73a426ad8d61020ec5487207", "3388daf515b52422896037610b19abc370d8c4e2", "068e0817389cdbaecb1187e4f2eabc4651b773a0", "dd01c381a8099144d69909e1fc17186609404654", "c300bfd4c601ca2191f659f4cb560f811d44ac41", "55fe938af5b3240caa6c5e985b1ace41f6649f02", "bc94a8e5010782d61c5a0ae86f00ca3b172c0aa9", "323c12b895ace6ddedf9d7a2884e77183bafc7dc", "810dbfe47634574fae7af51a85c65f2e10a28080", "5349aff5d92f253b871140aac9de7df78ee7ccf2", "986f26db95b08b3eef6bae78ea39407f15a16f79", "21754342e13272ee907f4687643c2ad7014d6afb", "694d2eba0a5da7abf2837a02a1ead60e41ea7202", "024c07ef0f70b044f5a38e22865d517bf02ce74f", "5f256003eb6519c3c7d7fb859944b860cd86ae6c", "672d76423acd2c2c5e76f0162535f880974265a7", "48c1cbd9b7c059f5f585c003b68ad1ec32606d41", "20cf48240b89bd522beff22a0cf0c8cd5b2f8abf", "335abaf408273c21f52b37a801dcd46cf2bbfb56", "676434190e54c2bf80b5931d34448f3ca34b892e", "e3d2b4dbcd078db08b0b98dd077511149a8febf9", "12684752e1487643d849108fc82b7809d900e334", "f975f7a10ae8a82e8fd5a7a4a79ef4d59c8752b3", "c075149029023e5547ef3f8abf66db9397b19a06", "4f4d5460640a7b4a6bef97736f622e3ea608ec1a", "138f5f716990245dea973df5bd4c70e4767742d7", "07662991ff16875b89b9ef80db1834fca02d7e0d", "ed3fe19771dc38d5142cc88bce1fcbba53352840", "07062a50d8bbb7318bd8840d322873b10082ce9c", "b7449194b22fb27f90f7587c966fb30cf3f6e796" ], "paperAbstract": "Due to the advance of the geo-spatial positioning and the computer graphics technology, digital terrain data become more and more popular nowadays. Query processing on terrain data has attracted considerable attention from both the academic community and the industry community. One fundamental and important query is the shortest distance query and many other applications such as proximity queries (including nearest neighbor queries and range queries), 3D object feature vector construction and 3D object data mining are built based on the result of the shortest distance query. In this paper, we study the shortest distance query which is to find the shortest distance between a point-of-interest and another point-of-interest on the surface of the terrain due to a variety of applications. As observed by existing studies, computing the exact shortest distance is very expensive. Some existing studies proposed ε-approximate distance oracles where ε is a non-negative real number and is an error parameter. However, the best-known algorithm has a large oracle construction time, a large oracle size and a large distance query time. Motivated by this, we propose a novel ε-approximate distance oracle called the Space Efficient distance oracle (SE) which has a small oracle construction time, a small oracle size and a small distance query time due to its compactness storing concise information about pairwise distances between any two points-of-interest. Our experimental results show that the oracle construction time, the oracle size and the distance query time of SE are up to two orders of magnitude, up to 3 orders of magnitude and up to 5 orders of magnitude faster than the best-known algorithm.", "pdfUrls": [ "https://cs.umd.edu/users/mount/Papers/sigmod-2017-dist-oracle.pdf", "http://www.cse.ust.hk/~raywong/paper/sigmod17-ProximityTerrain.pdf", "http://doi.acm.org/10.1145/3035918.3064038" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d17ad72e7ec8159064662a70f8356c039537b86", "sources": [ "DBLP" ], "title": "Distance Oracle on Terrain Surface", "venue": "SIGMOD Conference", "year": 2017 }, "4d21099afa01298cbfcf1bfe7dc79c61fd365ad5": { "authors": [ { "ids": [ "2917142" ], "name": "Ruba Abu-Salma" }, { "ids": [ "1752376" ], "name": "M. Angela Sasse" }, { "ids": [ "2607425" ], "name": "Joseph Bonneau" }, { "ids": [ "36045338" ], "name": "Anastasia Danilova" }, { "ids": [ "19322386" ], "name": "Alena Naiakshina" }, { "ids": [ "15484498" ], "name": "Matthew Smith" } ], "doi": "10.1109/SP.2017.65", "doiUrl": "https://doi.org/10.1109/SP.2017.65", "entities": [ "Computer security", "Encryption", "End-to-end encryption", "Mental model", "Secure communication", "Transport Layer Security", "WhatsApp Messenger" ], "id": "4d21099afa01298cbfcf1bfe7dc79c61fd365ad5", "inCitations": [ "c64f331f788ba93d109df757eb2e7f2d2e96cb69", "beb678a28ca5fc4bd9a123fca966b5f04b41b102", "a86433df045cc032c0081eb725c47176dc7f4fa1", "0f90b73ce3284f6e6039b1d50b7652e7c979ea05", "f695a46a0880db7ffb00558dbbbd508a085489bd", "5b0c1e810c05fcbf05f3fa3c35046b0b5a3ebc32" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "137-153", "journalVolume": "", "outCitations": [ "afa5aabe7207e6d1a71b793d69050129e7bb8084", "389f55c5c376db4ce1c88161dca98c329614faa8", "1625f94b9a74f01fb00b4f10baac1ae84f450624", "0a6333418cc97ce5f91a70ff0648f70110fc2459", "05cc2508174137d0ce5398129a574f6a2e6b0045", "51696f979f8dbb71cd3308d8ee0baad1e2d61986", "42ebc205637da662bf7a03089317ba0cd404843e", "232c77dc82783cffec60d113290372691b46c133", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "d82611b90df4c581cccc23248e3c2e40b8e13c67", "1cc4099759dae46dd322b2691577dcd715b6234f", "3ff308b0992c4dde982e0ef93ab7defa1198730d", "501e496146b04f42e3e6a49aabd29fb909083007", "1f28e5b972a54c1eb79b14b42f7312404a2bbde1", "92f1c90d98e78b40c373a79d6e84b3f728fccfe7", "47f176367e6f2066e177ee389d28f129dac752f3", "d8cc44f872bc18aad25415ea2209da1caf6560d2", "9e463eefadbcd336c69270a299666e4104d50159", "526f85038fb531cd12fff8773a55fc12d1e6ea14", "35a0409cce01c12d6177e24a258c783d384fba23", "4769c9b85d5040e381b9661b6578977605135434", "278bc23be126c3f23c3496bfdcf26c4adaa5e34e", "4a2bc7c82991d17a4729049b3fda1022fade956d", "27f58a408da93ae1708d5956dfccd0473f80b949", "3121da57912834e4b4d7ae4dc34c33c51821ded1", "13faaeb8f57411fcd05475fe99cde446d7c53900", "0995c6e751edd57b19ef6a236909be2025180060", "10473de8f36c463c48152fcd0e09d7e20d00e671", "0e04b5dd3dc4ff3e7ddd24c71ac8ecb573d7baa4", "4ea466a79c3fbdfce4d5916481a484aa3e22860b", "2c41593e73d511e4a2ecc96d8819a7d67b5a4579", "423b1d4d75293a7f1cf61932e71480ac03ae103c", "b5de13aeb0da553916194c50615a90e382e4064d", "0d730e6890948934604677a5899472389112ae9d", "5a7f31934fb4596356ab6e5b1ec819f632159c89", "bdfd34769911b3fb40eadf71bfb34a0ec98fe160", "10ca70ab22f8c79a49066f5ee73988dd32bd54bf", "7633232774cfeacca6ead4b510501f6ec346365f", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "08e9542de3cbfe791bf86a0dee6ba5e83bc29ea7", "6da0ce44bb6ccba7d8e56d89c0d4ab1138e3dcbd", "2299d0ec8f73e080a38eb1e931ddc4600f75bfa9", "f60628636b64c187db1f106823f5af5730b973cd", "308992602c6be11203d8a7ce7151267af9686b45" ], "paperAbstract": "The computer security community has advocated widespread adoption of secure communication tools to counter mass surveillance. Several popular personal communication tools (e.g., WhatsApp, iMessage) have adopted end-to-end encryption, and many new tools (e.g., Signal, Telegram) have been launched with security as a key selling point. However it remains unclear if users understand what protection these tools offer, and if they value that protection. In this study, we interviewed 60 participants about their experience with different communication tools and their perceptions of the tools' security properties. We found that the adoption of secure communication tools is hindered by fragmented user bases and incompatible tools. Furthermore, the vast majority of participants did not understand the essential concept of end-to-end encryption, limiting their motivation to adopt secure tools. We identified a number of incorrect mental models that underpinned participants' beliefs.", "pdfUrls": [ "http://www.jbonneau.com/doc/ASBDNS17-IEEESP-secure_messaging_obstacles.pdf", "https://www.ieee-security.org/TC/SP2017/papers/84.pdf", "https://doi.org/10.1109/SP.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d21099afa01298cbfcf1bfe7dc79c61fd365ad5", "sources": [ "DBLP" ], "title": "Obstacles to the Adoption of Secure Communication Tools", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "4d271267c20c4f6a93b7f22728cb65aba36596b4": { "authors": [ { "ids": [ "3084576" ], "name": "Qiongzheng Lin" }, { "ids": [ "37417492" ], "name": "Lei Yang" }, { "ids": [ "29824111" ], "name": "Huanyu Jia" }, { "ids": [ "3444521" ], "name": "Chunhui Duan" }, { "ids": [ "10258874" ], "name": "Yunhao Liu" } ], "doi": "10.1145/3143361.3143387", "doiUrl": "https://doi.org/10.1145/3143361.3143387", "entities": [ "Automatic identification and data capture", "Benchmark (computing)", "Electron mobility", "Holism", "Quantum channel", "Radio-frequency identification", "Sampling (signal processing)", "Stationary process", "Tag (metadata)", "Two-phase commit protocol" ], "id": "4d271267c20c4f6a93b7f22728cb65aba36596b4", "inCitations": [], "journalName": "", "journalPages": "199-211", "journalVolume": "", "outCitations": [ "7b3d9e4bfc165d38dad83dca4b2397c024fbd2db", "2cc668fd26f9a2fa1b9e64c1c61f32ddf7630791", "4618dc76f77c3d09510f0530f1805dd7702f5b5c", "2d12b6189a0681b933f9a96b8ab14daac2bcfd73", "d72cece54137c12c76c333f8120730dfa6da39a2", "49ba23ea599198121d7c6a8fffc8e1156ac5e5d5", "540ac698963add83d4e47d52edc54c84160eddd1", "1413b78c713429ea00dbd70a49e0d2e606a82be9", "05eca7b08c495020d499716fb90a37ba0715f7ff", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "6f6772cb62441732e831e0eef036e17cbf8858dc", "644e1494176c0ff33fba8f745087a56cafa1ecaf", "97a223a650576788408a3956aca4a54549ba6fa2", "1879bf3d2e843155056344a8f6a6cd27b10e0668", "9c84d7af3db4fc90e872556c936953aae48ea1a1", "82802e411495bbad77fa2415c6d4633dde180764", "c6befa4184c540516080a20e1058ce33bf51e586", "19a8b624116966d42b616ad923205f5c1ec4a5c3", "2b3aabf4173e515a6e9bbc3410cd5dd9c87549ba", "a64b650820541396f221efc78cd98624bbf7a109", "0b3fa65882b095e97353814c4266d8b934f62eab", "a361f606d62e0be40df91b143f7f7086d0b249d4", "21d33bfc0caf63357379d861debd12c91f7df03e", "60b035d5c39df79d984b385eb2dd68288adea8fd", "a6ef25ec69a84ccb16dd24615f4288e25df0ff7d", "69e6c6aa4a207eb81be43949edc8af7d6b4782d1", "8aea613645b3cf811d6c37811e34e8316ad972f1", "1e45719efcccc817d382041b830c1f5052f1c36b" ], "paperAbstract": "Radio-frequency identification (RFID) systems, as major enablers of automatic identification, are currently supplemented with various interesting sensing functions, e.g., motion tracking. All these sensing applications forcedly require much higher reading rate (i.e., sampling rate) such that any fast movement of tagged objects can be accurately captured in a timely manner through tag readings. However, COTS RFID systems suffer from an extremely low individual reading rate when multiple tags are present, due to their intense channel contention in the link layer. In this work, we present a holistic system, called Tagwatch, a rate-adaptive reading system for COTS RFID devices. This work revisits the reading rate from a distinctive perspective: mobility. We observe that the reading demands of mobile tags are considerably more urgent than those of stationary tags because the states of the latter nearly remain unchanged; meanwhile, only a few tags (e.g., < 20%) are actually in motion despite the existence of a massive amount of tags in practice. Thus, Tagwatch adaptively improves the reading rates for mobile tags by cutting down the readings of stationary tags. Our main contribution is a two-phase reading design, wherein the mobile tags are discriminated in the Phase I and exclusively read in the Phase II. We built a prototype of Tagwatch with COTS RFID readers and tags. Results from our microbenchmark analysis demonstrate that the new design outperforms the reading rate by 3.2x when 5% of tags are moving.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143387" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d271267c20c4f6a93b7f22728cb65aba36596b4", "sources": [ "DBLP" ], "title": "Revisiting Reading Rate with Mobility: Rate-Adaptive Reading in COTS RFID Systems", "venue": "CoNEXT", "year": 2017 }, "4d35a9d54c6fcc3fab2ec86ba9dbba1c8a6fad37": { "authors": [ { "ids": [ "1800402" ], "name": "Abhishek Roy" }, { "ids": [ "1707434" ], "name": "Hakan Aydin" }, { "ids": [ "1721468" ], "name": "Dakai Zhu" } ], "doi": "10.1109/IGCC.2017.8323569", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323569", "entities": [ "Algorithm", "Backup", "Fault tolerance", "Multi-core processor", "Scheduling (computing)", "algorithm", "voltage" ], "id": "4d35a9d54c6fcc3fab2ec86ba9dbba1c8a6fad37", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "4b84def35313799e2b8bba32f46b825beac27afa", "0c96de3e65dbba2016602dc26f88abe666580927", "53863ebc7948d5db087ce1a6e863368ad4b7037e", "751e352fe52946ca3d0f51956706313ce521b658", "083ddef72578239ab1ab82063f1e9d69e999ccd8", "f348a7b22f7e39d00553bd9004dde02ae8ac94ec", "0d7d98a698c871d18d16448c65c99a9a369aff8b", "24a9c3297bf08caeceb15777e85f0c3da5c07e26", "ba16930ed4baa0d0391614968b9480083147d3ac", "6742ab2be6790646d503da58ed2a0c97ba071817", "606c5f3ed9befa7113bc28436a8a91f176934874" ], "paperAbstract": "In this paper, we consider energy-efficient and fault-tolerant scheduling of real-time tasks on heterogeneous multicore systems. Each task consists of a main copy and a backup copy which are scheduled on different cores, for fault tolerance purposes. Our framework deliberately delays the backup tasks in order to cancel them dynamically when the main task copies complete successfully (without faults). We identify and address two dimensions of the problem, i.e., partitioning tasks and determining processor voltage/frequency levels to minimize energy consumption. Our experimental results show that our proposed algorithms' performance levels are close to that of an ideal solution with optimal (but computationally prohibitive) partitioning and frequency assignment components.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323569", "http://cs.gmu.edu/~aydin/igsc17final.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d35a9d54c6fcc3fab2ec86ba9dbba1c8a6fad37", "sources": [ "DBLP" ], "title": "Energy-efficient primary/backup scheduling techniques for heterogeneous multicore systems", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "4d388abdad088316cae4e00f6f2e5176d53b9059": { "authors": [ { "ids": [ "9858207" ], "name": "Thomas Vissers" }, { "ids": [ "40233108" ], "name": "Timothy Barron" }, { "ids": [ "2768298" ], "name": "Tom van Goethem" }, { "ids": [ "1752104" ], "name": "Wouter Joosen" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" } ], "doi": "10.1145/3133956.3133988", "doiUrl": "https://doi.org/10.1145/3133956.3133988", "entities": [ "Denial-of-service attack", "Entity", "Rogue", "Typosquatting" ], "id": "4d388abdad088316cae4e00f6f2e5176d53b9059", "inCitations": [ "0df5c874f3661a96997d884f555bfe233b0b0d3f" ], "journalName": "", "journalPages": "957-970", "journalVolume": "", "outCitations": [ "204ded01c9d29ab80c4896f90e0277694ab15b80", "876dae4ee0323da2ae685160e9192cdf605e7299", "69349684bf61888dc9fe5ff679ff1c7572d2d535", "0a964c5ac7e19cbdc820fd4ee101a5263385733d", "2ff9c10a0a8f43306f3a0492f8d6eca744d4e7c7", "22a78f31395e79cb6c99c3cedd248ecd6568b7f7", "ba9af0bf228cedfad61daa481a71ed433076ab8d", "4e4c44bbdc838fcf266eb29c2b718112d150c05e", "649468352e70532e80f68d362bf85fae8277bf22", "2f95e2ca11610cb334d8d777d7b0f0d5561e67bc", "1ffbc2fc53bc4811e07d9531f5f4399fd70bcf6f", "124a9b51d1767d9d077602e7075ce3e1393ed400", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "637e37d06965e82e8f2456ce5d59b825a54b0ef7", "8714e68bb306f89b3fbce3307833405b6a632487", "6095ae9787e7d3233ad0730ba0d78fc9a8898839" ], "paperAbstract": "The functionality and security of all domain names are contingent upon their nameservers. When these nameservers, or requests to them, are compromised, all domains that rely on them are affected. In this paper, we study the exploitation of configuration issues (typosquatting and outdated WHOIS records) and hardware errors (bitsquatting) to seize control over nameservers' requests to hijack domains. We perform a large-scale analysis of 10,000 popular nameserver domains, in which we map out existing abuse and vulnerable entities. We confirm the capabilities of these attacks through real-world measurements. Overall, we find that over 12,000 domains are susceptible to near-immediate compromise, while 52.8M domains are being targeted by nameserver bitsquatters that respond with rogue IP addresses. Additionally, we determine that 1.28M domains are at risk of a denial-of-service attack by relying on an outdated nameserver.", "pdfUrls": [ "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/name.pdf", "http://doi.acm.org/10.1145/3133956.3133988", "https://www.securitee.org/files/dnshijack_ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d388abdad088316cae4e00f6f2e5176d53b9059", "sources": [ "DBLP" ], "title": "The Wolf of Name Street: Hijacking Domains Through Their Nameservers", "venue": "CCS", "year": 2017 }, "4d40e154622b2828b56fdd1b04977ffa339a994e": { "authors": [ { "ids": [ "33828413" ], "name": "Muhammad Ali Gulzar" }, { "ids": [ "2192580" ], "name": "Matteo Interlandi" }, { "ids": [ "3426485" ], "name": "Xueyuan Han" }, { "ids": [ "8679877" ], "name": "Mingda Li" }, { "ids": [ "3269316" ], "name": "Tyson Condie" }, { "ids": [ "35710133" ], "name": "Miryung Kim" } ], "doi": "10.1145/3127479.3131624", "doiUrl": "https://doi.org/10.1145/3127479.3131624", "entities": [ "Big data", "Crash (computing)", "Data-intensive computing", "Debugging", "Definition", "Delta Debugging", "Fault detection and isolation", "Internationalization and localization", "Oracle (software testing)", "Relational database management system", "Scalability", "Software engineering", "Time complexity" ], "id": "4d40e154622b2828b56fdd1b04977ffa339a994e", "inCitations": [ "2d740eea6caf338a5a5a069b9074c313b034b322" ], "journalName": "", "journalPages": "520-534", "journalVolume": "", "outCitations": [ "8b66710185b5b8989e9f6fd15b1fdb74db00f953", "27bc38fa9ab70b9343ba30b96ed0f0431f1ffdc6", "1f4164a0823ced00cd77046b55cca275ef187b7d", "b2ba488dcaba342ab971681178aff02d293c6345", "4bace0a37589ec16246805d167c831343c9bd241", "02f5ad096e6273f4ac67c99a021356377a37e779", "b39c5022995fea8ca566798b8977342e618fb9a5", "5178f07707b808366ff6c80a0507a8c113fe83df", "0558c94a094158ecd64f0d5014d3d9668054fb97", "00744ace45478609d427ed846393fc5447832568", "34cac14ff57deca90a5d74b8d12f69a4780717d9", "1df05b37ab38851a7537f5a7d1cc31d60ab819dd", "8e42a6f643ff2f2b120a4f29ab320ee240c49fec", "1c665c3c95c2928818327ac742942efaa9bb7bf5", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "7d1ff9d22d58674b8534342948ceaa8b805a6067", "083e9b12c5566d953efdf6d90e77638a6b0c3693", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "f8c8a5c6afb2cf5ef9a4f4cd5a2400cc0032d4c3", "18da147789c74a2633a7f7ad9d9748025ee03345", "3538708c81d5493c77b4d82f9fe3953e1bd62870", "66aa614a3c99ece796aae02df2d1a35239695f02", "21854a5fb77a45f411865652a63663bb9ff3cde9", "3e11d9cbc004ffc7ec0fd8f7f584cf711479bbae", "c33592b7ae635cb797b242e1b5c7aa7232c6f645", "dd34e51f2069c033df9b904bb8063b420649ed76", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0ac93570bab7aec97bf01eb5387372ee5cb7f94a", "14a2658e3f25184631b9bd29fc2eeb5cf2beacf7", "70bd563d00fcb402eb7d9f251bea544ecb08f213", "2e292d10d668c4b4ba92f1b5272ebc82d4bc5f35", "34953ebdcf96d4499472039312bda86a0ba4e7ca", "744e57b95bde6f4a2c866792cfb9e2987c9656ca", "2024c8eb42f6a64944e21636fb2ba725aff490c8", "701bfe031d8db19d54edcd73f512f2eb5f52db0c", "3deb0363b9414648a3339479adab3bdf99d2eda0", "0608d9937c074520cdc93cc444cc1c77039c5332", "16708d0e9bd3da8cc20285cbe1d82a4eb6799e90", "2bb77d81d865b67b0995d67d781f2a74351a3bd0", "e672542e9b666546920d29589a7fe4d751bf6241", "383478370162b80f1ef24ccc9c95e156fc808077", "0e7046853923c44f887f9b3757235bb455510aff", "6d96e42a887648034e4d6388c580bbdfb93557e2", "08e1df7e5c128f69c575c50f3a72a8a9e8ccc572", "07f9f289d91fd630e25ea0fa0d0f64eac8909f1c", "5578045657a90d2db6ac86bb4afbe38c035fc6a5", "f35dafc75194067589e069c37271aedc87b7dd94", "4a385f180470b30466ef2239809d0e2de66f2e74" ], "paperAbstract": "Developing Big Data Analytics workloads often involves trial and error debugging, due to the unclean nature of datasets or wrong assumptions made about data. When errors (e.g., program crash, outlier results, etc.) arise, developers are often interested in identifying a subset of the input data that is able to reproduce the problem. BigSift is a new faulty data localization approach that combines insights from automated fault isolation in software engineering and data provenance in database systems to find a minimum set of failure-inducing inputs. BigSift redefines data provenance for the purpose of debugging using a test oracle function and implements several unique optimizations, specifically geared towards the iterative nature of automated debugging workloads. BigSift improves the accuracy of fault localizability by several orders-of-magnitude (∼103 to 107×) compared to Titian data provenance, and improves performance by up to 66× compared to Delta Debugging, an automated fault-isolation technique. For each faulty output, BigSift is able to localize fault-inducing data within 62% of the original job running time.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131624", "https://acmsocc.github.io/2017/assets/socc17-finalpapers/socc17-final244-acmpaginated.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d40e154622b2828b56fdd1b04977ffa339a994e", "sources": [ "DBLP" ], "title": "Automated debugging in data-intensive scalable computing", "venue": "SoCC", "year": 2017 }, "4d4408d48fcd8a75cd1c840d6cc72e36f492acf1": { "authors": [ { "ids": [ "1689976" ], "name": "Jinfeng Li" }, { "ids": [ "1717691" ], "name": "James Cheng" }, { "ids": [ "1752128" ], "name": "Fan Yang" }, { "ids": [ "1682064" ], "name": "Yuzhen Huang" }, { "ids": [ "8890418" ], "name": "Yunjian Zhao" }, { "ids": [ "1784748" ], "name": "Xiao Yan" }, { "ids": [ "8929449" ], "name": "Ruihao Zhao" } ], "doi": "10.1145/3077136.3080800", "doiUrl": "https://doi.org/10.1145/3077136.3080800", "entities": [ "Algorithm", "Apache Hadoop", "Application programming interface", "Computation", "Distributed computing", "Locality of reference", "Locality-sensitive hashing", "Nearest neighbor search", "SPARK", "Scalable locality", "Source lines of code", "lsh" ], "id": "4d4408d48fcd8a75cd1c840d6cc72e36f492acf1", "inCitations": [], "journalName": "", "journalPages": "635-644", "journalVolume": "", "outCitations": [ "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "ff860f9c61f0d783d48893f671e986a01e5c3026", "276285e996c08a26706979be838fd94abae2fed9", "70ac255db930b424371a366f14ad8117e5580277", "0ad8e89091eed09217e66adc98136126addc2619", "4ffcbd052cf0a403f275beb0b1c06168ecbd9806", "310b203a7754959df711056a617634bc10ed1d9a", "a77dbe50505a1ee09982c1a1576f09d554eb45c3", "c96ca694db7e97b6b32efd47bd12d50322a5e242", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "1156f60e40548096df49528b1342bb3e88b0f378", "0541d5338adc48276b3b8cd3a141d799e2d40150", "1d32f29a9880998264286633e66ea92915cc557a", "71fbbc1675780f2f945073f9d92c09b8d76f80f0", "027bac258f2306d8a6035117653c6ccd0f0cd5f2", "1c799eca7983c62f7815ac5f41787b3e552567b6", "7852d1f7ed81875445caa40b65e67e2813a484d2", "2e3dacedc558d0b17e4f53668f2ba913666f5797", "20b2f35b361c92673908d4d7d7445c9cc0e1c22f", "1452f20140dba52b928c9be5f385b5ac35537a2c", "d1d36c845d63e0d48f03f4b3834e36ce2b5f4574", "bf32a27be69f44d529b3c80bf73277e8cd2ef94a", "b9becc31d2f335dc2d12964c55bed7d1cf4dc7dc", "27ae23bb8d284a1fa8c8ab24e23a72e1836ff5cc", "226bab70ecb9e13ef8fdb503a6183615d5c898ed", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "24cc16d32e801b01210c872f20bf1a87a97f9304", "c0bbb56b4428e9a83d067c07054946293b475fe9", "3073762760c5f27ab4570c12136928a71b7806c3", "2555cd819ea26c78bf3d58ca4094f77984a10426" ], "paperAbstract": "Locality Sensitive Hashing (LSH) algorithms are widely adopted to index similar items in high dimensional space for approximate nearest neighbor search. As the volume of real-world datasets keeps growing, it has become necessary to develop distributed LSH solutions. Implementing a distributed LSH algorithm from scratch requires high development costs, thus most existing solutions are developed on general-purpose platforms such as Hadoop and Spark. However, we argue that these platforms are both hard to use for programming LSH algorithms and inefficient for LSH computation. We propose LoSHa, a distributed computing framework that reduces the development cost by designing a tailor-made, general programming interface and achieves high efficiency by exploring LSH-specific system implementation and optimizations. We show that many LSH algorithms can be easily expressed in LoSHa's API. We evaluate LoSHa and also compare with general-purpose platforms on the same LSH algorithms. Our results show that LoSHa's performance can be an order of magnitude faster, while the implementations on LoSHa are even more intuitive and require few lines of code.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080800" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d4408d48fcd8a75cd1c840d6cc72e36f492acf1", "sources": [ "DBLP" ], "title": "LoSHa: A General Framework for Scalable Locality Sensitive Hashing", "venue": "SIGIR", "year": 2017 }, "4d6facda18e1ea219858b820c050bb4b6db03535": { "authors": [ { "ids": [ "2304880" ], "name": "Kee Sung Kim" }, { "ids": [ "2500690" ], "name": "Minkyu Kim" }, { "ids": [ "2669596" ], "name": "Dongsoo Lee" }, { "ids": [ "2193234" ], "name": "Je Hong Park" }, { "ids": [ "2062623" ], "name": "Woo-Hwan Kim" } ], "doi": "10.1145/3133956.3133970", "doiUrl": "https://doi.org/10.1145/3133956.3133970", "entities": [ "Computation", "Computational complexity theory", "Computer security", "Data structure", "Dictionary", "Email", "Encryption", "Information security", "Point of View (computer hardware company)", "Symmetric-key algorithm", "Transport Layer Security", "Wikipedia" ], "id": "4d6facda18e1ea219858b820c050bb4b6db03535", "inCitations": [], "journalName": "", "journalPages": "1449-1463", "journalVolume": "", "outCitations": [ "140a59b3dec992db8c214a8c4d751882fcc547b8", "10130d16b8ceb9aea868c416df56e929a0631cdc", "14dc5effd28d22cf7fc8aa6a1be8ae2d37859891", "62b0603324e12755abeba2602ffdecb23937e7e0", "02beed2e1350a0d0b01bb9622081cb93a965a716", "56d320acfad7f6e8060acb77191c179844fab3cb", "ab288ba0ecf4027d5eec90d8debbb06dde0076e3", "961487973d4b33f96406fddbfcf1235dc587571f", "5dd4e093946716f7a10911854d166a28941e4d8e", "ad0c881078b2cd3d69b5cc2ef63bcdb72070298e", "45e93055f2505a0cc0261581bd57d0cc7bf01939", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "567d09feb818cc156d2b1e8def1b4bfff63f8e91", "22b1b1927c63403898c7eb06d987192bbd9254fb", "47564fdfc63a1a36102b8b6c74f978bbc5190c5a", "6b41ef695bcc8c8816060002851e9be9d385a16b", "be2f737bd30976386b069f6edc61371dcda9fec8", "3864cfb41db27452cefe3b1f64f05623690201ab", "9ea1bbb1d3302aa9504e71ca42e1c19c09e310e0", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "a94205aed0148ae6d00986aef009e5e05d046f43", "13868fa5a86ebde021a1c91415fb9bb718c4a804", "32dc88258734f6c9d8fd1d0151d0c763ae2df75a", "1ab81ae077d6944fbff279a7a8a38df48f75eadf" ], "paperAbstract": "The recently proposed file-injection type attacks are highlighting the importance of forward security in dynamic searchable symmetric encryption (DSSE). Forward security enables to thwart those attacks by hiding the information about the newly added files matching a previous search query. However, there are still only a few DSSE schemes that provide forward security, and they have factors that hinder efficiency. In particular, all of these schemes do not support actual data deletion, which increments both storage space and computational complexity. In this paper, we design and implement a forward secure DSSE scheme with optimal search and update complexity, for both computation and communication point of view. As a starting point, we propose a new, simple, theoretical data structure, called dual dictionary that can take advantage of both the inverted and the forward indexes at the same time. This data structure allows to delete data explicitly and in real time, which greatly improves efficiency compared to previous works. In addition, our scheme provides forward security by encrypting the newly added data with fresh keys not related with the previous search tokens. We implemented our scheme for Enron email and Wikipedia datasets and measured its performance. The comparison with Sophos shows that our scheme is very efficient in practice, for both searches and updates in dynamic environments.", "pdfUrls": [ "https://acmccs.github.io/papers/p1449-kimA.pdf", "http://doi.acm.org/10.1145/3133956.3133970" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d6facda18e1ea219858b820c050bb4b6db03535", "sources": [ "DBLP" ], "title": "Forward Secure Dynamic Searchable Symmetric Encryption with Efficient Updates", "venue": "CCS", "year": 2017 }, "4d811c5cc0b805cd8b6ffe3a4e82bca9e1487e74": { "authors": [ { "ids": [ "1938271" ], "name": "AmirAli Abdolrashidi" }, { "ids": [ "3282425" ], "name": "Devashree Tripathy" }, { "ids": [ "26913214" ], "name": "Mehmet Esat Belviranli" }, { "ids": [ "1689989" ], "name": "Laxmi N. Bhuyan" }, { "ids": [ "4884583" ], "name": "Daniel Wong" } ], "doi": "10.1145/3123939.3123976", "doiUrl": "https://doi.org/10.1145/3123939.3123976", "entities": [ "CUDA", "Data dependency", "Dynamic programming", "Graphics processing unit", "Parallel computing", "Run time (program lifecycle phase)", "Scheduling (computing)", "Synchronization (computer science)", "Thread block", "Wire-frame model" ], "id": "4d811c5cc0b805cd8b6ffe3a4e82bca9e1487e74", "inCitations": [ "bb5364418ca7328bee7ddaa62e3a7f21030de920" ], "journalName": "", "journalPages": "600-611", "journalVolume": "", "outCitations": [ "1eeb50d5f7937f65a910203ae61430ff8b969012", "d9aa6ada2ec07ef954efbe6f0cb7f3ebfb77d714", "514514e3f6150d1f36a7820fc5da5a17953d62f7", "1e90fd5d10f69f8a4b77f519a889bec56c57628b", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "7928d1cc196c04e073ea95e88464e9d534d447fd", "0689c8056abadaa8c7df8498e511e56bd59094e4", "8a810b313e6078472e87b747e871923853850260", "755e4ad5468747b31b9d6994885b17ad957dc9d7", "876014931b26abf9b87a911d394d25beab674bbe", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "40dc89e0799a16653a2b7ff90771267725467a4a", "7dc44454887df37b83dca85e9192e98f6e02b50b", "14724c356106ae50746318b1bdd27d9b684c7d11", "fd10683c3dcb0e9c283cae6ddeb6721897de2e12", "0972414bfe90cb2fa460c27f5694375b74325aaa", "903da6c4d5488505e09df1479696832bcbdc2878", "87224645bdfb650d5e62a61ff1cbcf6fd5eaca10", "63af4355721f417bc405886f383af096fbfe51b2", "08104146873817cc35cbd96d7ca3e5169cb72296", "da797a1d389fdadd27a240a4d8da4280d801e10f", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "9e869b8ac93377ef465102ac8f77bfff4cc88cdb", "5e5da2a57395b0ca6888f1bbd7de5d27e33b5a81", "2d6f002477015469075954c6748a1a85af352c94", "4761c5cb321f90fe5520ebfb0f9194c10ffdd24d", "20bb4cd07f1c97b70bc71bc49062bc1767dfc6c7", "64e4cf7337f045a39af83cd53126dfefccbd7b19", "302b2af5ba9ee9d94460a594d35b31f795491e08", "7f5a3cd2107b852c2418b3cac3b0abfab687297d", "8814ba7515481fe16afc79cb41b05d4aa58f2df2", "347a08cd9ada1cee83713d24ec84ed49ab121987" ], "paperAbstract": "GPUs lack fundamental support for data-dependent parallelism and synchronization. While CUDA Dynamic Parallelism signals progress in this direction, many limitations and challenges still remain. This paper introduces Wireframe, a hardware-software solution that enables generalized support for data-dependent parallelism and synchronization. Wireframe enables applications to naturally express execution dependencies across different thread blocks through a dependency graph abstraction at run-time, which is sent to the GPU hardware at kernel launch. At run-time, the hardware enforces the dependencies specified in the dependency graph through a dependency-aware thread block scheduler. Overall, Wireframe is able to improve total execution time up to 65.20% with an average of 45.07%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123976" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4d811c5cc0b805cd8b6ffe3a4e82bca9e1487e74", "sources": [ "DBLP" ], "title": "Wireframe: supporting data-dependent parallelism through dependency graph execution in GPUs", "venue": "MICRO", "year": 2017 }, "4da17fac0ef3e23e6ae9849061fa604b826d3219": { "authors": [ { "ids": [ "36040916" ], "name": "Tongxin Li" }, { "ids": [ "40066762" ], "name": "Xueqiang Wang" }, { "ids": [ "4337333" ], "name": "Mingming Zha" }, { "ids": [ "8336490" ], "name": "Kai Chen" }, { "ids": [ "34989133" ], "name": "XiaoFeng Wang" }, { "ids": [ "3022476" ], "name": "Luyi Xing" }, { "ids": [ "8536237" ], "name": "Xiaolong Bai" }, { "ids": [ "1693436" ], "name": "Nan Zhang" }, { "ids": [ "3336900" ], "name": "Xinhui Han" } ], "doi": "10.1145/3133956.3134021", "doiUrl": "https://doi.org/10.1145/3133956.3134021", "entities": [ "Adversary (cryptography)", "Login", "Mobile app", "Mobile device", "Operating system", "Phishing", "Security controls", "The Walking Dead: Season Two", "User experience", "User interface", "Web content" ], "id": "4da17fac0ef3e23e6ae9849061fa604b826d3219", "inCitations": [ "f79827ec37afa05034aa5fa5711d017a1b83fccf" ], "journalName": "", "journalPages": "829-844", "journalVolume": "", "outCitations": [ "fc49dafcc2a0fbfffef599f9d9bc3e8cf7a143d7", "8e9d4966d49ebd648d520e25128bb03383f6ae22", "25c77cd67d275f66e9eb776e885887c0ab7abd9d", "a9977198d07f5fede50a81236d7806868f4c9a27", "1668a1d1196f988b497d2e9465e365b7fabb804e", "2eee257c63ab21ee4c56577a288b2c751f0329ca", "60aa7348b1730aef6400e5e23c0b864ea94b1b36", "ca0dac6a33e155b264213a6273d6a125dc3d1071", "7f9bbe985ccf6c16b6ef60ccb9ef04e4219b54cb", "0cb4ed5d73b4885f05facfa6aee45bdcdec1847e", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "cd1fdbc5bbe453339d26951a79e3ca533920a854", "23fa7b866a1b1fee7bb71c8b5a9235cca7120bbc", "5473857faea5b6480dcc1e3386279fad340166d7", "8d50599334b0cccca88b9f1ad27d4b7d5de2c1cc", "29898e452f80ba09357a2fb716c7b14d75eb3bd6", "451f72230e607cb59d60f996299c578623a19294", "0d0d0dfa38ca86e711dc4279f486f89c6d901cfe", "72a34e437f1ad1d063bff04c506fa4fc38de080a", "6bba847f25124aea6e14231f13f63a3f0a4f9c33", "591336480dbb7a0e71bff9c201c73544c4615381", "a259d3205fcef2a70d5ceb87a39e2150c058fb81", "4301542658dd07a9775ea921282b20acd3ffc446" ], "paperAbstract": "As a critical feature for enhancing user experience, cross-app URL invocation has been reported to cause unauthorized execution of app components. Although protection has already been put in place, little has been done to understand the security risks of navigating an app's WebView through an URL, a legitimate need for displaying the app's UI during cross-app interactions. In our research, we found that the current design of such cross-WebView navigation actually opens the door to a cross-app remote infection, allowing a remote adversary to spread malicious web content across different apps' WebView instances and acquire stealthy and persistent control of these apps. This new threat, dubbed Cross-App WebView Infection (XAWI), enables a series of multi-app, colluding attacks never thought before, with significant real world impacts. Particularly, we found that the remote adversary can collectively utilize multiple infected apps' individual capabilities to escalate his privileges on a mobile device or orchestrate a highly realistic remote Phishing attack (e.g., running a malicious script in Chrome to stealthily change Twitter's WebView to fake Twitter's own login UI). We show that the adversary can easily find such attack \"building blocks\" (popular apps whose WebViews can be redirected by another app) through an automatic fuzz, and discovered about 7.4% of the most popular apps subject to the XAWI attacks, including Facebook, Twitter, Amazon and others. Our study reveals the contention between the demand for convenient cross-WebView communication and the need for security control on the channel, and makes the first step toward building OS-level protection to safeguard this fast-growing technology.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134021", "https://www.informatics.indiana.edu/xw7/papers/p829-li.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4da17fac0ef3e23e6ae9849061fa604b826d3219", "sources": [ "DBLP" ], "title": "Unleashing the Walking Dead: Understanding Cross-App Remote Infections on Mobile WebViews", "venue": "CCS", "year": 2017 }, "4daba6641bf2f3ae2608cf3780c94b480d1eb544": { "authors": [ { "ids": [ "2504006" ], "name": "Hari Cherupalli" }, { "ids": [ "3187702" ], "name": "Henry Duwe" }, { "ids": [ "7982124" ], "name": "Weidong Ye" }, { "ids": [ "8153371" ], "name": "Rakesh Kumar" }, { "ids": [ "1813088" ], "name": "John Sartori" } ], "doi": "10.1109/HPCA.2017.48", "doiUrl": "https://doi.org/10.1109/HPCA.2017.48", "entities": [ "Algorithm", "Central processing unit", "Embedded system", "Overhead (computing)", "Power domains", "Power gating", "Power management", "Processor design", "Programmer", "Requirement", "Simulation", "Spectral leakage", "Symbolic simulation" ], "id": "4daba6641bf2f3ae2608cf3780c94b480d1eb544", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "157-168", "journalVolume": "", "outCitations": [ "1166d00fe5319beba9d8d713f9fdf0c4d80744b4", "675d53d75788b4cc580e3e90c5ef91d29454a295", "0a34381478b6576fd16623b3234f922a49b941b4", "151cb281d6486dcc231b6da3d32304e2b08b2fe0", "5208576ac552a2bf2840558ec67f418370f5ff9d", "3c11b4e74086db34430d5381031319cae83ce17a", "6ef623c853814ee9add50c32491367e0dff983d1", "063700ef01aad15a1981553fde02e8d162a553e7", "9458915e0b7e9abfd9f9c24e35b036505c899a8b", "46706b89bd3ccb7585c459f0f495603f287e191f", "2d172dfedc08654c47f6335550efa4d5a2d78f45", "7248d9df516209aa2c737bce3532994a5ac8a41a", "92121c117e8d9b731cc5382ba644c481a8d512af", "0170445683d4197e05032c2769faf829c3f017fd", "7498e51ffce80639e96cfb3ab66b5558e595d07b", "259730135cb6222dd64f298357efff5a7038ff90", "4165376a7aadf6a1c1acddd6fc236047b7becff1", "13ad5b5fdd3ba3d74fa96a12450b726696fbfe77", "2605c673c58f11c73166ffee54d1ae5950b532df", "49300e3de6b148a27e13754ee7fd388b4966fde3", "6228ed86d5ad276f47da6d491fd9072716e6ebf8", "0d01b42384dd92c400052a05e3d24cebaecd4056", "04a0485bec9dff9b8391bfa1372a71df2f79ad77", "8bc590fd8d61603b0c7e6475f933f66f49bec0fc", "045fb9ec269be637f3b10bfe73aa95b54b9176b9", "8d34bd50004938006d0fd1c5f2f1409d02e9d56a", "5324cd42d9f04ff75037ce9cbd8e34fb278e15e4", "41236387e01eacb63cefad6318dc48fc60e9829e", "033614852cfe29708ddebf6cb3f846582f5dd7ba", "11443efe465ad544f478524da6c66c085b16e28b", "2fd2ae274d0c03fc47b90fd7c490c8fafea6450d", "1abb651f5eb33d6a0c3c234c4c8a7dc2e9e47506", "84364e7d4de93cd8536a5c800f2e02b15aba501d", "9d4bc7c0569cb548b2a1e319948c8f91061abb49", "e3ae7cd9493c73f92f1bb38aa5729dcd14a1e138", "2ddb5176006689ffbc7ba6f58f4c0eccfb3168d8", "e557136e4f5ea24658f0388aeb5767be896840be", "3846f779e22fbd5d84e6ef8956be68fe496bdb6b", "3179e15d2fbefe0273f41b247fff2edfaf3db179", "2188cc9ad6376f4d9877fccc1365505fd144f9fc", "d724b43c9bbefc5c5f45503be77b378cd7c05114", "7e7dd129e74727c2d5ea68613bdba22b0dfc7ae5" ], "paperAbstract": "The increasingly-stringent power and energy requirements of emerging embedded applications have led to a strong recent interest in aggressive power gating techniques. Conventional techniques for aggressive power gating perform module-based power gating in processors, where power domains correspond to RTL modules. We observe that there can be significant power benefits from module-oblivious power gating, where power domains can include an arbitrary set of gates, possibly from multiple RTL modules. However, since it is not possible to infer the activity of module-oblivious power domains from software alone, conventional software-based power management techniques cannot be applied for module-oblivious power gating in processors. Also, since module-oblivious domains are not encapsulated with a well-defined port list and functionality like RTL modules, hardware-based management of module-oblivious domains is prohibitively expensive. In this paper, we present a technique for low-cost management of module-oblivious power domains in embedded processors. The technique involves symbolic simulation-based co-analysis of a processor's hardware design and a software binary to derive profitable and safe power gating decisions for a given set of module-oblivious domains when the software binary is run on the processor. Our technique is automated, does not require programmer intervention, and incurs low management overhead. We demonstrate that module-oblivious power gating based on our technique reduces leakage energy by 2x with respect to state-of-the-art aggressive module-based power gating for a common embedded processor.", "pdfUrls": [ "https://doi.org/10.1109/HPCA.2017.48", "http://www.ee.umn.edu/users/jsartori/papers/hpca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4daba6641bf2f3ae2608cf3780c94b480d1eb544", "sources": [ "DBLP" ], "title": "Enabling Effective Module-Oblivious Power Gating for Embedded Processors", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "4db34285f75cf7ddcdf911e58575a9b78c80c40a": { "authors": [ { "ids": [ "2273350" ], "name": "Mingzhe Zhang" }, { "ids": [ "1864050" ], "name": "Lunkai Zhang" }, { "ids": [ "40201553" ], "name": "Lei Jiang" }, { "ids": [ "39358904" ], "name": "Zhiyong Liu" }, { "ids": [ "1691956" ], "name": "Frederic T. Chong" } ], "doi": "10.1109/HPCA.2017.45", "doiUrl": "https://doi.org/10.1109/HPCA.2017.45", "entities": [ "Cell (microprocessor)", "Domain Name System Security Extensions", "Iteration", "Multi-level cell", "Phase-change memory", "Radio resource management" ], "id": "4db34285f75cf7ddcdf911e58575a9b78c80c40a", "inCitations": [ "f86f74f7c53e391cc056ee2e030cabf955355685", "12175bb22aad0af8c398018ff6eda1b10ffe12ab", "59e667bdea4cf085b7f01b8ae9fdbf925b6d3211", "4e544e6db8a17252e0cd6da00401bba734ad64b8", "22f0a538f061db451fcb76be205a45c876064310" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "385-396", "journalVolume": "", "outCitations": [ "82194fbed597423c3f3a6c96a5bccf3ba11e94da", "314919c141024c71cb17d525ecd8016138335002", "30532fe0df76e4f3169db85dc2a4d61af415dbd7", "425c117685a681c6c6de55e2928dc87066b53fbb", "960a0a263e07dd14aac50c0439bfdf60c3fe82db", "164a2d44033f7003565892a6f10ac86703d6ca7f", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "67ce921291a99d30be490e556293305534359101", "1bb29cdeab20f4f5d739aacbb403e3751ca15f3b", "7c93683610d90349d2de668d6bdd5af628623e5c", "03b6a916498fa8591201a2de5f22344609b1e457", "2cc69da629e857dbd7facbcf808a64b10e9db9a7", "90a80ffd112ff30e9ffa54cf5f6efe56182b9e49", "823116269044ab4c713373c66c7da3fcb495b459", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "6ab26bfbd89e6651b4bf0ee318662e77da3b30e0", "dc402ec8a32be25037bf53be3bb7e7d3261a9e19", "7e6de063791da2b379724bdc52c83e2031a7766a", "00255682a82bb4db04a42c64d7a4f8ba42160339", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "050182ddd4aa0f71637c786f5cd31902eff73d68", "4d58df3b85a479446fc0054886f41df6de0c4ec3", "0b0e3cb7976d96e24d8323918a8f5331fe562097", "05fcbc121e772bbe956b0db34ad56bab9a72e319", "12a491d732add45d9cec8bc29c326d66019031f0", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "93b220888225d1fa51fbfcd4442c542ed8886692", "57b3b91ff5ee1477f714ccafc64a23bc70584fed", "38a9120f780602521af9744e31d80ef5cd9593a7", "3b97b637430e4f799cd458ad1f86bbfa31ebf53d", "14e6d535ffcb874028827a3647345cad33fa6c93", "37146652e00f0dcc23eb7006eac5fdb08baa2004", "180097defea4ba19a4b3f5e2972502dd4dedd1ac" ], "paperAbstract": "Multi Level Cell (MLC) Phase Change Memory (PCM) is an enhancement of PCM technology, which provides higher capacity by allowing multiple digital bits to be stored in a single PCM cell. However, the retention time of MLC PCM is limited by the resistance drift problem and refresh operations are required. Previous work shows that there exists a trade-off between write latency and retention—a write scheme with more SET iterations and smaller current provides a longer retention time but at the cost of a longer write latency. Otherwise, a write scheme with fewer SET iterations achieves high performance for writes but requires a greater number of refresh operations due to its significantly reduced retention time, and this hurts the lifetime of MLC PCM. In this paper, we show that only a small part of memory (i.e., hot memory regions) will be frequently accessed in a given period of time. Based on such an observation, we propose Region Retention Monitor (RRM), a novel structure that records and predicts the write frequency of memory regions. For every incoming memory write operation, RRM select a proper write latency for it. Our evaluations show that RRM helps the system improves the balance between system performance and memory lifetime. On the performance side, the system with RRM bridges 77.2% of the performance gap between systems with long writes and systems with short writes. On the lifetime side, a system with RRM achieves a lifetime of 6.4 years, while systems using only long writes and short writes achieve lifetimes of 10.6 and 0.3 years, respectively. Also, we can easily control the aggressiveness of RRM through an attribute called hot threshold. A more aggressively configured RRM can achieve the performance which is only 3.5% inferior than the system using static short writes, while still achieve a lifetime of 5.78 years.", "pdfUrls": [ "http://mingzhe-zhang.github.io/paper/hpca2017rrm.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.45" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4db34285f75cf7ddcdf911e58575a9b78c80c40a", "sources": [ "DBLP" ], "title": "Balancing Performance and Lifetime of MLC PCM by Using a Region Retention Monitor", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "4dc205c31737bdf04717a2b86ec844121baf3dbf": { "authors": [ { "ids": [ "11856684" ], "name": "James She" }, { "ids": [ "1830010" ], "name": "Chen Zhao" }, { "ids": [ "32049933" ], "name": "Ming Cheung" }, { "ids": [ "1740670" ], "name": "Hao Liang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.36", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.36", "entities": [ "Average path length", "Citation network", "Continuation", "Existential quantification", "Six degrees of separation", "Social network" ], "id": "4dc205c31737bdf04717a2b86ec844121baf3dbf", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "278-285", "journalVolume": "", "outCitations": [ "219d95a38e8e3f2e7f2c102cc5f31f9fdddb1742", "a080ae25221ee29082f4f30302c65fe901abfe7a", "2fe7471fca022721ce1572ec86521d7db863d6aa", "ec4bd845dae9c5b9b006a198a39d6709030c714d", "fabb4f49fcfc364807bccc5e68990d1d0c71e44a", "0706356c9ab6014d6b04577d38289ea8328291a5", "45d9a0db23906842f4c3385e4f231ede34d32db0", "233084c0d1c818c842be6a9bb50f5dd2d1d1682f", "05d5a28fd29fdbd405743cd282888e463c8cb26a", "189ef562597987ef484b43ff2c9d3fca6b797309", "0e0a79910ddb8dedc198c56a803ea774af47d3a3", "3eb6fc4c534c9cfae0ef74cb07e2e96c164edbf6", "05d6e0185bcb48d396fe778ceedb2078e37e72ef", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "09147715a90046f31bf713e033f22a7a84484711", "099ec8ae8716c061a0cf0a8568a7721d5d3313bd", "0d06de003e8ca949b3b39f9a51750c050addb997", "d9cf136153bd4a941eee95174c9d2c6ffa5a6688", "199006c69026ed2dde8a25e676b67bdcb76f171b", "3de2972503b0611ab81eddedfe13bd6268ad3587" ], "paperAbstract": "The densification power law is a concept in the realm of temporal graph evolution. The number of edges grows in a power law over the number of nodes over time, replacing the pre-2005 general assumption of a linear trend. The densification power law has been verified by several real networks over a long period of time. In this work, one such graph, the arXiv citation network is investigated to examine how the densification power law is working ten years after its publication. The network is evaluated and compared with the discussion in a previous work. It is observed that the graph densification continues over time, but instead of maintaining a constant densification power exponent, as suggested by previous work, the exponent is actually dropping over time, which suggests the densification power law is now fading away. Here, this fading effect is literature analysed, and it is suggested that node capability is the major obstacle to the continuation of the original trend. To fully compare with the previous work on graph evolution, the change of the average path length over time is also investigated on our and other's results. The results imply the decreasing of the average path length in the temporal evolution is very slow, which suggests that there exists a new universal degree of separation in social networks of around three.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.36" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4dc205c31737bdf04717a2b86ec844121baf3dbf", "sources": [ "DBLP" ], "title": "From Densification Power Law to Degree of Separation: A Case Study", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4dcb77ef24a579872e9899220c191aced326a172": { "authors": [ { "ids": [ "21035690" ], "name": "Hyungil Jo" }, { "ids": [ "2056015" ], "name": "Sung-Hun Kim" }, { "ids": [ "1756753" ], "name": "Sangwook Kim" }, { "ids": [ "1782453" ], "name": "Jinkyu Jeong" }, { "ids": [ "6064655" ], "name": "Joonwon Lee" } ], "doi": "", "doiUrl": "", "entities": [ "Data center", "I/O scheduling", "Linux", "MongoDB", "Scalability", "Scheduling (computing)", "Shard (database architecture)" ], "id": "4dcb77ef24a579872e9899220c191aced326a172", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "764d7de61421968d6b477f0c055d72dcb0893544", "3338173866c3c85338a5ac26560d5392108c8eac" ], "paperAbstract": "Interactive data center applications suffer from the tail latency problem. Since most modern data center applications take the sharded architecture to serve scale-out services, a request comprises multiple sub-requests handled in individual back-end nodes. Depending on the state of each back-end node, a node may issue multiple I/Os for a single sub-request. Since traditional I/O scheduling operates in an application-agnostic manner, it sometimes causes a long latency gap between the responses of sub-requests, thereby delaying the response to endusers. In this paper, we propose a request-aware cooperative I/O scheduling scheme to reduce the tail latency of a database application. Our proposed scheme captures request arrival order at the front-end of an application and exploits it to make a decision for I/O scheduling in individual back-end nodes. We implemented a prototype based on MongoDB and the Linux kernel and evaluated it with a read-intensive scan workload. Experimental results show that our proposed scheme effectively reduces the latency gap between sub-requests, thereby reducing the tail latency.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-jo.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-jo-062917.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/jo" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4dcb/77ef24a579872e9899220c191aced326a172.pdf", "s2Url": "https://semanticscholar.org/paper/4dcb77ef24a579872e9899220c191aced326a172", "sources": [ "DBLP" ], "title": "Request-aware Cooperative I/O Scheduling for Scale-out Database Applications", "venue": "HotStorage", "year": 2017 }, "4dcb98238f7067d34f857d23010b5cde10c50f2e": { "authors": [ { "ids": [ "1810219" ], "name": "Santiago Gonzalez" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" }, { "ids": [ "1715858" ], "name": "Jivko Sinapov" }, { "ids": [ "1728389" ], "name": "Peter Stone" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Autonomous car", "Operating system", "Requirement", "Robot", "Robot Operating System", "Robotics", "Utility" ], "id": "4dcb98238f7067d34f857d23010b5cde10c50f2e", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "d3fc5469593a177c3b040c70d278828507bd84ea", "cbb85ca77c8affc84f8bd9c9868ffb2e05d33a9e", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "69884f09be947c43e1029bb3ddc95db5edc2a03d", "44607270754f8521d6c4d42297aa881393f4f8e0", "d45eaee8b2e047306329e5dbfc954e6dd318ca1e", "0558c94a094158ecd64f0d5014d3d9668054fb97", "0bd9f3dafa13aed3f30a505a0ee292bfd01c80dc", "2944568377e25ae2b3a60e2355f9f35f7612fbe0", "0a5fe2f50c12f6236a800a9be92ae059c40067ab", "061abdde4c6a7274b7070591ec0fa1bdce95efd9", "056c11ff4be528ec8f3c8ba8ea85e86d6b7203e1", "244eac7d24cd0ac9d9175dfb91b3cd300bcbbfc5", "265d18ced11e2e64d98afa97b0e86965e68101f7", "2151a214aca6e72ee2980ae8cbf7be47fed0cb7a" ], "paperAbstract": "Modern robots collect a wealth of rich sensor data during their operation. While such data allows interesting analysis and sophisticated algorithms, it is simply infeasible to store all the data that is generated. However, collecting only samples of the data greatly minimizes the usefulness of the data. We present CC-LOG, a new logging system built on top of the widely-used Robot Operating System that uses a combination of classification and compression techniques to reduce storage requirements. Experiments using the Building-Wide Intelligence Robot, a mobile autonomous mobile platform capable of operating for long periods of time in human-inhabited environments, showed that our proposed system can reduce storage requirements by more than an order of magnitude. Our results indicate that there is significant unrealized potential in optimizing infrastructure commonly used in robotics applications and research.", "pdfUrls": [ "http://www.cs.utexas.edu/~jsinapov/papers/gonzalez_HotStorage2017.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_gonzalez.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-gonzalez.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/gonzalez" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4dcb/98238f7067d34f857d23010b5cde10c50f2e.pdf", "s2Url": "https://semanticscholar.org/paper/4dcb98238f7067d34f857d23010b5cde10c50f2e", "sources": [ "DBLP" ], "title": "CC-Log: Drastically Reducing Storage Requirements for Robots Using Classification and Compression", "venue": "HotStorage", "year": 2017 }, "4e021b75ef09a7c2f5d4e6e103dbfba981579b68": { "authors": [ { "ids": [ "1695654" ], "name": "Ahmed Elmokashfi" }, { "ids": [ "2164304" ], "name": "Dong Zhou" }, { "ids": [ "3199635" ], "name": "Dziugas Baltrunas" } ], "doi": "10.1145/3117811.3117842", "doiUrl": "https://doi.org/10.1145/3117811.3117842", "entities": [ "Centralisation", "End-to-end encryption", "End-to-end principle", "Failure rate", "High availability", "Social network", "Stationary process" ], "id": "4e021b75ef09a7c2f5d4e6e103dbfba981579b68", "inCitations": [], "journalName": "", "journalPages": "88-100", "journalVolume": "", "outCitations": [ "6148985780e13799604ec39ce1e56c0c42133626", "53109a52d8a3690d8c2499de317cb68074d4d9a1", "1788428f03e3e7ad9f330fdf46be8d165cd5a453", "08b646f6fca13a738e7073d614abb902e7aec874", "062bd67c240a7710225fcaf2e236eebafa94eecb", "f6ff8d4be958b4e07af7f10a7af3afa58e0f5516", "1a11fe55ab1f1b1c7dbe69c9b0ceb099a418d302", "8e22e3677a962e93083513e428701840beb55e5e", "1686ef78f90d366f2df992ea8a8cfef23a0e1d42", "05acc6b944e20d207fa41237febf56bc04795bae", "7da446d2b14209866a412913d0a6446077bd54c0", "13bbf077589b0741e7f4e6265f1d2be75e3627db", "1f79775b58072a2ab484aad798aec0c9c7fa8605", "5574de790e0cbad422a2b8a95e7f8d20c523db13", "a244b4196df0533289008b185478c89846c592f7", "729031aab2f9b31aae5605bc3480af141368a07f", "27a78e3076a011731eca77d77cfc757b50072e04", "4caddca24c03fe85948e060d2bf222ed926d6c1d", "78c1c3f34d9db0bfbdde678c20206ec1d1e0e2f0", "7dc4eeec47a324d578f86169019b794c2537a344", "5be2e75d2bc6763c062eaa38babc707d750f3f1b", "6f79e8e6ae8c2fbba094d479a7d636cb1f28a614", "c5f1d79525f65d51e833489d7dd7a1af01c48803", "036277d492dd5777e87e5b33ffd809e5c617a37a", "7ad070b2e23d550f6fe0aee694c71f87061eaf35", "9f41b022cb87902786a1fa39949a983371451260" ], "paperAbstract": "The near ubiquitous availability and success of mobile broadband networks has motivated verticals that range from public safety communication to intelligent transportation systems and beyond to consider choosing them as the communication mean of choice. Several of these verticals, however, expect high availability of multiple nines. This paper leverages end-to-end measurements to investigate the potential of current mobile broadband networks to support these expectations. We conduct a large-scale measurement study of network availability in four networks in Norway. This study is based on three years of measurements from hundreds of stationary measurement nodes and several months of measurements from four mobile nodes. We find that the mobile network centralized architecture and infrastructure sharing between operators are responsible for a non-trivial fraction of network failures. Most episodes of degraded availability, however, are uncorrelated. We also find that using two networks simultaneously can result in more than five nines of availability for stationary nodes and three nines of availability for mobile nodes. Our findings point to potential avenues for enhancing the availability of future mobile networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117842" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e021b75ef09a7c2f5d4e6e103dbfba981579b68", "sources": [ "DBLP" ], "title": "Adding the Next Nine: An Investigation of Mobile Broadband Networks Availability", "venue": "MobiCom", "year": 2017 }, "4e02512b5df57750e1dba791a51c16ab0571dd23": { "authors": [ { "ids": [ "17769259" ], "name": "Felipe Rodrigo de Souza" }, { "ids": [ "2706343" ], "name": "Charles Miers" }, { "ids": [ "3283254" ], "name": "Adriano Fiorese" }, { "ids": [ "2056792" ], "name": "Guilherme Piegas Koslovski" } ], "doi": "", "doiUrl": "", "entities": [ "Causality", "Data center", "Integer programming", "Linear programming", "Quality of service", "Requirement", "Rounding", "Software deployment", "Software-defined networking", "Virtual machine" ], "id": "4e02512b5df57750e1dba791a51c16ab0571dd23", "inCitations": [ "dd94b2e0203a1752189c9463b8bfc593d1cec7aa" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "120-129", "journalVolume": "", "outCitations": [ "c77c29b3049bc39c36929d01525e94fcca14837f", "12af8de1e914b50b369dfcfc5edfbf2e2daf4b31", "663e064469ad91e6bda345d216504b4c868f537b", "64f3a81fff495ac336dccdd63136d451852eb1c9", "1927de3ddaddbc3bf53257cc5ee6e8ba127819a0", "2deb7a8710b7022f4b35e6e0cce5d4ec0eacef7d", "1f139cfbcde6b722c52fa0342073f94cf16e4c8b", "a2196de837ceb746214e0d820efa410e941390e5", "1cbbdf58133f763813b3a61b8faf2f5ab74464b7", "238dd4c308c1ee6ef3809fdf15fdc87be74bdbc8", "626bf17326bfa6fde411af9c51135d7ae390e1e7", "22630a79f1c50603c1356f6ac9dc8524a18d4061", "6a4b8c48e51ba62cc7bc3b3d7ea06acc0262eb08", "07ac5ac8b410994def58fa45af18756c7fc56697", "56d4c7f21519cedee39cb8da9ca96f96d64d80a0", "2ad062fa4cbf5cb59104a5b02d563d99d74240b8", "7a0604b8f816af1dc81dce32f98b541816b7bd1d", "6bc873b69b320195911a61073e689c564d91ba7b", "0159ef376e2cb6ed1b64cea3eaf22d16b8a71be8", "1b15861b55906d8eabce1e3c9f156ade8d1b4783", "ee1a9ad2851b27529819db6ab214fdb256c09da3", "11c1af3385b836626f0cc0f3e70ec386c516a424", "438110dc02f39f221896847a4d0e24f88e130598", "4c44cbcea788cc024b29ddf178249ee1c367464a", "1f657372569d962c4c4e45565dc4aea0696997bb", "0aa4cacf6a60125961f1dac4afca63a8dcf706f9", "6df2fc81a41c3d733c2481d138d4adf55aa5d36e" ], "paperAbstract": "Virtualization of computing and communication infrastructureswere disseminated as possible solutions for networksevolution and deployment of new services on clouddata centers. Although promising, their effective applicationfaces obstacles, mainly caused by rigidity on the managementof communication resources. Currently, the Software-DefinedNetworks (SDN) paradigm has been popularizing customizationand flexibility in network management due to separationof control and data planes. However, benefits introduced bySDN are not trivially applied to Virtual Infrastructures (VIs)provisioning on SDN-based cloud providers. An allocationmechanism needs joint information of control and data planesin order to deliver Quality-of-Service (QoS)-aware mappingswhile achieving provider objectives. In this work, we formulatethe online VI allocation on SDN-based cloud data centers as aMixed Integer Program (MIP). Following, integer constraintsare relaxed to obtain a linear program, and rounding techniquesare applied. The mechanism performs VI allocation consideringlatency, bandwidth, and virtual machine requirements. The results indicate that the VIs mean internal latency can bereduced while simultaneously enforcing other QoS constraints.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101128" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e02512b5df57750e1dba791a51c16ab0571dd23", "sources": [ "DBLP" ], "title": "QoS-Aware Virtual Infrastructures Allocation on SDN-Based Clouds", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "4e544e6db8a17252e0cd6da00401bba734ad64b8": { "authors": [ { "ids": [ "2196277" ], "name": "Zhaoxia Deng" }, { "ids": [ "1864050" ], "name": "Lunkai Zhang" }, { "ids": [ "36736387" ], "name": "Nikita Mishra" }, { "ids": [ "40085771" ], "name": "Henry Hoffmann" }, { "ids": [ "1691956" ], "name": "Frederic T. Chong" } ], "doi": "10.1145/3123939.3124548", "doiUrl": "https://doi.org/10.1145/3123939.3124548", "entities": [ "Gradient boosting", "Lasso", "Matrix regularization", "Mobile data terminal", "Non-functional requirement", "Non-volatile memory", "Persistence (computer science)", "Requirement" ], "id": "4e544e6db8a17252e0cd6da00401bba734ad64b8", "inCitations": [ "2228b4cb1d342bba8b3c8d000e9105636cb19e55", "dc889add8f2b1d54f44ae2048a85d7ad7bf6fa04", "41ea95cc4dca373bf324555b897760054ec4a76e", "a55a685d254caeeb4f071062d5910734f8135057", "22f0a538f061db451fcb76be205a45c876064310" ], "journalName": "", "journalPages": "232-244", "journalVolume": "", "outCitations": [ "f7484996fa52e9ca6415ecde662c5c1a32d62226", "0b885bb186445ee0c50277d990eca18c53fef09b", "4e2e21f9f4ff9f7f2811a75ab6be48989838f25f", "40eb2f5a97298da40838388700b097f82adff167", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "37d2b78e615ec9f271541c30639e4470788a1325", "3c5fab57c3c0fbc26df5ed4d1721b1190330d24c", "188ca8bb2ec2ed38a055a78b48d46f5991298754", "7515af5d111ded65acd5a192bcffc64819a769ec", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "1212e1615891b005df762b669f396b1dce8a78ee", "3ee90eaa6a389b046157889206ce155ac76c178f", "73d5dd350098daf2a8055684f704c9b5ac83c56a", "1400901fca7695d180a44d1f0f49f6830e0ceeeb", "67ce921291a99d30be490e556293305534359101", "0e44228b12df76587803f3f7a8e49fe3a0aee45f", "216f2ee11f9be97d297979a3010aac4529248359", "0648617785024ebe90481532b19cb3fd859a97d6", "7cd29ed1da71593bfb79b553ba6c5ee39ccf7a7b", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "48710c82bea8283382f81fcdba540160a0b00e16", "96afaf4d2a491dce64561d2e56478d628db0cc8a", "48caac2f65bce47f6d27400ae4f60d8395cec2f3", "158d9a985d0420d6f48a1a71d8a0cace226bbf9b", "26be1295af49be4426a83d3acf6384416285601f", "1e1e2e271506c1793609d45040ea7356e4cd4a68", "1f7e3b0c425ea6f554b10989dec5f726ecb8bd23", "4caa5ec3be365a341ea9f8dcaa8a4f9a7bebe304", "960a0a263e07dd14aac50c0439bfdf60c3fe82db", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "37146652e00f0dcc23eb7006eac5fdb08baa2004", "164a2d44033f7003565892a6f10ac86703d6ca7f", "7b5fd4fa3c7bdd14d5832e3b76e6fe36be4ea677", "b4a77957fd2c8e0f9ffbf20c7d999ab2da599bfb", "180097defea4ba19a4b3f5e2972502dd4dedd1ac", "61ad52b2ef341e50ec133600a76c3bb67cc3dda6", "1b6f3f5fd0bd3024ac44ea1c4e590d92f8a40a15", "30532fe0df76e4f3169db85dc2a4d61af415dbd7", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "352a8957005dc5519b15ed1870751ec494d66395", "03b6a916498fa8591201a2de5f22344609b1e457", "4db34285f75cf7ddcdf911e58575a9b78c80c40a", "25e0dcb0e7b3446fbf16c48e9a6a4ad36f645f3b", "7232af3c4a3c4641ece9818d1c484260ce33a266", "337584c610d77f6fb3bd93236d9326814ba112fc", "1bb29cdeab20f4f5d739aacbb403e3751ca15f3b", "7e6de063791da2b379724bdc52c83e2031a7766a", "48534b21548e3692ad7d866387f1dc7f543109e1" ], "paperAbstract": "Non-volatile memories (NVMs) have attracted significant interest recently due to their high-density, low static power, and persistence. There are, however, several challenges associated with building practical systems from NVMs, including limited write endurance and long latencies. Researchers have proposed a variety of architectural techniques which can achieve different tradeoffs between lifetime, performance and energy efficiency; however, no individual technique can satisfy requirements for all applications and different objectives. Hence, we propose Memory Cocktail Therapy (MCT), a general, learning-based framework that adaptively chooses the best techniques for the current application and objectives.\n Specifically, MCT performs four procedures to adapt the techniques to various scenarios. First, MCT formulates a high-dimensional configuration space from all different combinations of techniques. Second, MCT selects primary features from the configuration space with lasso regularization. Third, MCT estimates lifetime, performance and energy consumption using lightweight online predictors (eg. quadratic regression and gradient boosting) and a small set of configurations guided by the selected features. Finally, given the estimation of all configurations, MCT selects the optimal configuration based on the user-defined objectives. As a proof of concept, we test MCT's ability to guarantee different lifetime targets and achieve 95% of maximum performance, while minimizing energy consumption. We find that MCT improves performance by 9.24% and reduces energy by 7.95% compared to the best static configuration. Moreover, the performance of MCT is 94.49% of the ideal configuration with only 5.3% more energy consumption.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124548", "http://www.cs.ucsb.edu/~zhaoxia/papers/MICRO_MCT_Talk.pdf", "http://people.cs.uchicago.edu/~ftchong/papers/Micro17-mct.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e544e6db8a17252e0cd6da00401bba734ad64b8", "sources": [ "DBLP" ], "title": "Memory cocktail therapy: a general learning-based framework to optimize dynamic tradeoffs in NVMs", "venue": "MICRO", "year": 2017 }, "4e595957047360ce23310150566f228d6fa4507e": { "authors": [ { "ids": [ "1785303" ], "name": "Xin Jin" }, { "ids": [ "1927358" ], "name": "Xiaozhou Li" }, { "ids": [ "3108945" ], "name": "Haoyu Zhang" }, { "ids": [ "1762445" ], "name": "Robert Soul\u00e9" }, { "ids": [ "6727727" ], "name": "Jeongkeun Lee" }, { "ids": [ "1719644" ], "name": "Nate Foster" }, { "ids": [ "33742176" ], "name": "Changhoon Kim" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3132747.3132764", "doiUrl": "https://doi.org/10.1145/3132747.3132764", "entities": [ "Application-specific integrated circuit", "Attribute\u2013value pair", "Cache (computing)", "Cache coherence", "Forwarding plane", "In-memory database", "Key-value database", "Keyboard shortcut", "NetCache", "Network switch", "Throughput", "USB flash drive" ], "id": "4e595957047360ce23310150566f228d6fa4507e", "inCitations": [ "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6", "a6472fe7fbc978de8597c2f783891aa1eb1f87a5", "0d573d5f27504e51727b8c1f2be2f206e6a9cc18", "726c2e6b8d7f97d9a3256fc08d17f6fe99cc1a7b", "571a253f7c5ed3517657ce8a49c25f0ebccc3d79" ], "journalName": "", "journalPages": "121-136", "journalVolume": "", "outCitations": [ "682fa93d79223b3362424e612476b6a1ba4be56c", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "14ce949181d829a9874aa598646d9ca63fe1ade0", "79b8a6587630cabdd2778628dd6d4abf86cc2af7", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "61d5c261cfa704085f9d397b298a150bcc07336b", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "098d792d1783b5f6fc098203f71f21f5d053c653", "51098280164dcc12b1ef69632430a8a362b70452", "24c6e70c583daed1852637ec42d4589556ac59d3", "f465e873cb9d9e5cd74cc759c2b015da06385a86", "1594118f2696b573f08510cf837f3b37db87face", "0d923afc5ca379e7a488f3a0eefd5767bb2a191d", "44028c00bf3872ae06aa46f569c3b9dceebdd909", "032e6705b2c9cdb68bc66c28c8ddb4956db2b2e5", "7a278ee0578f194700cadc3811cdda4ec751f88a", "29a1148d75878671dc3663bf480e33d7bd91597d", "326986f09e88600bf5a1bd3e7012b2c8eb8ecff5", "234e6be0d4238f76b3ac038ee422be39f391c625", "01094798b20e96e1d029d6874577167f2214c7b6", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "339888b357e780c6e80fc135ec48a14c3b524f7d", "9aa0d7253574e50fe3a190ccd924433f048997dd", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "77cc2393afbe91fb27abd4264f2a6fd5363da4a5", "638c917d981915bc7a00bb0941cdd38111df51de", "20614c320f0dca6144a3934c465d42f451c972f6", "42a58d723284834d040db43c83e83e58e5fb92a0" ], "paperAbstract": "We present NetCache, a new key-value store architecture that leverages the power and flexibility of new-generation programmable switches to handle queries on hot items and balance the load across storage nodes. NetCache provides high aggregate throughput and low latency even under highly-skewed and rapidly-changing workloads. The core of NetCache is a packet-processing pipeline that exploits the capabilities of modern programmable switch ASICs to efficiently detect, index, cache and serve hot key-value items in the switch data plane. Additionally, our solution guarantees cache coherence with minimal overhead. We implement a NetCache prototype on Barefoot Tofino switches and commodity servers and demonstrate that a single switch can process 2+ billion queries per second for 64K items with 16-byte keys and 128-byte values, while only consuming a small portion of its hardware resources. To the best of our knowledge, this is the first time that a sophisticated application-level functionality, such as in-network caching, has been shown to run at line rate on programmable switches. Furthermore, we show that NetCache improves the throughput by 3-10x and reduces the latency of up to 40% of queries by 50%, for high-performance, in-memory key-value stores.", "pdfUrls": [ "http://www.cs.princeton.edu/~haoyuz/publications/netcache-sosp17.pdf", "http://doi.acm.org/10.1145/3132747.3132764", "https://www.sigops.org/sosp/sosp17/slides/netcache-sosp17-slides.pdf", "http://www.inf.usi.ch/faculty/soule/sosp2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e595957047360ce23310150566f228d6fa4507e", "sources": [ "DBLP" ], "title": "NetCache: Balancing Key-Value Stores with Fast In-Network Caching", "venue": "SOSP", "year": 2017 }, "4e731dfc4eee0006865d131b384f46b29965f42e": { "authors": [ { "ids": [ "2812833" ], "name": "Haogang Chen" }, { "ids": [ "3089670" ], "name": "Tej Chajed" }, { "ids": [ "40408909" ], "name": "Alex Konradi" }, { "ids": [ "2117867" ], "name": "Stephanie Wang" }, { "ids": [ "27102580" ], "name": "Atalay Ileri" }, { "ids": [ "1762600" ], "name": "Adam Chlipala" }, { "ids": [ "1681493" ], "name": "M. Frans Kaashoek" }, { "ids": [ "1789973" ], "name": "Nickolai Zeldovich" } ], "doi": "10.1145/3132747.3132776", "doiUrl": "https://doi.org/10.1145/3132747.3132776", "entities": [ "IBM Tivoli Storage Productivity Center", "Linux", "Linux", "SQLite", "Software bug", "Solid-state drive", "Sync (Unix)", "Verification and validation" ], "id": "4e731dfc4eee0006865d131b384f46b29965f42e", "inCitations": [ "63e0d37726433ab38187c648bbc13ba3fd6255ac" ], "journalName": "", "journalPages": "270-286", "journalVolume": "", "outCitations": [ "41da20c0fb04dd4769f3772e392362acd893af57", "55edf8d36576d63851d8f5739e8d0b6b094fe5cf", "06f7617a591dc6931ca0cbebccad508da93ff433", "1c4e7db34497e4e0286e1a7feb5972f6da614253", "1d081dbf3e9afebafac90fdeed4bfa788012142f", "16a455aeacd14529bee92b0c197619fa2d173151", "2be26e8aa238ac37a80e08303f128d8014bb9f3b", "00caa4dea9216bec01b465f8a69d0e1becc07b7a", "34ef9c71821bd3ed7fa52c9178e1ee272fedb803", "80b6e27a5c1ddda5324a4d9f1cc787c3d6b1dc34", "93b58f721de046dacada133902e6d07c6f46501f", "1d0f2662cca5c859419b78fea468f4bc2f39e87d", "3076b1894c8ac54b125f9563e0395a6f4e7db1a6", "09c0d62190aedb53e820695ccbe98d90f877cc46", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "088e3e939ad234b6fdd0e321290fb26937dc2553", "aedd00a108e2e8fe36b85ff069b87f81fbcdd83f", "07d50264195a7bebb394cac60866cccfc4689e13", "35c2f7e0454adc0130c4279fce84a31701cebc67", "93145591bbd72449ce2884ed4a7f89a89ae25f30", "b6d399804ca4afc9a346b12f33878658fc5c1bc1", "02ef312f94e9f64bca07289663d9a2344b5f764b", "07a05ce761cf6bcdbebd952870ab6486819a71d4", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "38b7e9721cc3e326580465deaf0f0028b92afe6a", "883a595fd76cb4dc0509a1005040286b31610059", "acf3617606a42e504a4d7f36a42e003d8b801b1c", "265d18ced11e2e64d98afa97b0e86965e68101f7", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "42142c121b2dbe48d55e81c2ce198a5639645030", "aca6e394db4b622e2c882b58b5d891e05ddb6db3", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "454352d5dac27e1f4f62cd187f2e0029351daf90", "274e495824827f5a9dc1ba3ab62620445e6b3d4b" ], "paperAbstract": "DFSCQ is the first file system that (1) provides a precise specification for fsync and fdatasync, which allow applications to achieve high performance and crash safety, and (2) provides a machine-checked proof that its implementation meets this specification. DFSCQ's specification captures the behavior of sophisticated optimizations, including log-bypass writes, and DFSCQ's proof rules out some of the common bugs in file-system implementations despite the complex optimizations.\n The key challenge in building DFSCQ is to write a specification for the file system and its internal implementation without exposing internal file-system details. DFSCQ introduces a metadata-prefix specification that captures the properties of fsync and fdatasync, which roughly follows the behavior of Linux ext4. This specification uses a notion of tree sequences---logical sequences of file-system tree states---for succinct description of the possible states after a crash and to describe how data writes can be reordered with respect to metadata updates. This helps application developers prove the crash safety of their own applications, avoiding application-level bugs such as forgetting to invoke fsync on both the file and the containing directory.\n An evaluation shows that DFSCQ achieves 103 MB/s on large file writes to an SSD and durably creates small files at a rate of 1,618 files per second. This is slower than Linux ext4 (which achieves 295 MB/s for large file writes and 4,977 files/s for small file creation) but much faster than two recent verified file systems, Yggdrasil and FSCQ. Evaluation results from application-level benchmarks, including TPC-C on SQLite, mirror these microbenchmarks.", "pdfUrls": [ "https://pdos.csail.mit.edu/papers/dfscq.pdf", "http://people.csail.mit.edu/nickolai/papers/chen-dfscq.pdf", "http://doi.acm.org/10.1145/3132747.3132776", "http://6826.csail.mit.edu/2017/papers/dfscq:sosp2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e731dfc4eee0006865d131b384f46b29965f42e", "sources": [ "DBLP" ], "title": "Verifying a high-performance crash-safe file system using a tree specification", "venue": "SOSP", "year": 2017 }, "4e783a1a6b42a81206d0f31d1f7ef6e3aa4b789b": { "authors": [ { "ids": [ "40640498" ], "name": "Orr Fischer" }, { "ids": [ "12427017" ], "name": "Shay Gershtein" }, { "ids": [ "1757695" ], "name": "Rotem Oshman" } ], "doi": "10.1145/3087801.3087821", "doiUrl": "https://doi.org/10.1145/3087801.3087821", "entities": [ "Asymptotically optimal algorithm", "Communication complexity", "Communications protocol", "Multiparty communication complexity", "Property testing" ], "id": "4e783a1a6b42a81206d0f31d1f7ef6e3aa4b789b", "inCitations": [ "7df5efc8036f4c4a281346ca929cc81db39a091e" ], "journalName": "", "journalPages": "111-120", "journalVolume": "", "outCitations": [ "72205343db2b5ab3debeaa7afe354474d5256a46", "315d727f1a305c1738ba8f4904d4773a82aee7ed", "1c57e39eb00d3083b9dae397331307b5bf4e1839", "5ff6db9d2ef805fc59248a466933a590cf524427", "20f1dfc912bcacfc8c12b75cbc024b9890437f64", "e05f410b45d75f78a72354d430a75373c395e9ce", "50146c3fb11e45f9072a6bc0d4a6c336756a6537", "2c74cac7f8171f9e6aec986c12b38025359c105a", "0bbdad82e740a637e3d26c0955f680442c69feb4", "261b545e2045dccaf4c8edd1f542dd9530e2c78a", "29746f4d82c5d4c2bdd5cc42537541e68d825a83", "632377f50d5b47c43947049ec3e9948338397282", "ee4324d66cfd6e70d2b1033580a6e15dbe3827a3", "0232002fec1ba80cdd1a4628267cbf156984a62d", "e0e7a8712bf0808200985f6c92ead1da034733e8", "c8045e36f0eb6af65fb326a96f3c7b926de56666", "b144b8ed1fe17bc5bbd659b6b885e7c991ab7b06", "05916fde7203116aac42310ea49b99535f7e0150", "7bd16feb9af3b52a534cd3fcdb771e51f16cccd2", "d921036a6cb7e340b019afa557a19bc65586a1ad", "41f832326f962da15069444307fea289be7442dc", "1bf92cfffe2e84878e3ea3b412c5fb140550b361", "254a19a4c7b374d507de6d4c0aae709d3abdd733", "8a4fb50c5dd2b9f0f4b15ad8c471274302ed8da5", "0b000c7f9581eb35e3aebf4d87c05f7b409f2dad", "8a79b5d8750b229c17c79db14503b7708845b5c3", "222a8b02a0f81b485c26ba71138e3ed726877d3a", "8b30695cb60ecc2d7b08cd6d2242885ad57ad5fb", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "6c16c543adb398f36bb7d7e90d3a06c43d104475", "0094bd8fbb6841300d9a8bfcc206fc4f1a1f94bb", "6faa7a1dc1bb32220e0153f109e397ff567a43cb", "32081549f5f90db81f6569ef9eecedeeab5d46df" ], "paperAbstract": "In this paper we initiate the study of property testing in simultaneous and non-simultaneous multi-party communication complexity, focusing on testing triangle-freeness in graphs. We consider the coordinator model, where we have k players receiving private inputs, and a coordinator who receives no input; the coordinator can communicate with all the players, but the players cannot communicate with each other. In this model, we ask: if an input graph is divided between the players, with each player receiving some of the edges, how many bits do the players and the coordinator need to exchange to determine if the graph is triangle-free, or far from triangle-free? For general communication protocols, we show that \u00d5(k(nd)1/4+k2) bits are sufficient to test trianglefreeness in graphs of size n with average degree d (the degree need not be known in advance). For simultaneous protocols, where there is only one communication round, we give a protocol that uses \u00d5(k \u221a n) bits when d = O( \u221a n) and \u00d5(k(nd)1/3) when d = \u03a9( \u221a n); here, again, the average degree d does not need to be known in advance. We show that for average degree d = O(1), our simultaneous protocol is asymptotically optimal up to logarithmic factors. For higher degrees, we are not able to give lower bounds on testing triangle-freeness, but we give evidence that the problem is hard by showing that finding an edge that participates in a triangle is hard, even when promised that at least a constant fraction of the edges must be removed in order to make the graph triangle-free.", "pdfUrls": [ "http://arxiv.org/abs/1705.08438", "https://arxiv.org/pdf/1705.08438v1.pdf", "http://doi.acm.org/10.1145/3087801.3087821" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4e78/3a1a6b42a81206d0f31d1f7ef6e3aa4b789b.pdf", "s2Url": "https://semanticscholar.org/paper/4e783a1a6b42a81206d0f31d1f7ef6e3aa4b789b", "sources": [ "DBLP" ], "title": "On the Multiparty Communication Complexity of Testing Triangle-Freeness", "venue": "PODC", "year": 2017 }, "4e7ae6f6c406f57014875307a080f48903ac42b4": { "authors": [ { "ids": [ "35214252" ], "name": "Fraser Brown" }, { "ids": [ "19272506" ], "name": "Shravan Narayan" }, { "ids": [ "3031766" ], "name": "Riad S. Wahby" }, { "ids": [ "2373056" ], "name": "Dawson R. Engler" }, { "ids": [ "1695297" ], "name": "Ranjit Jhala" }, { "ids": [ "34997080" ], "name": "Deian Stefan" } ], "doi": "10.1109/SP.2017.68", "doiUrl": "https://doi.org/10.1109/SP.2017.68", "entities": [ "Application programming interface", "Backward compatibility", "C++", "Google Chrome", "High- and low-level", "JavaScript", "Memory-mapped I/O", "Node.js", "Software bug" ], "id": "4e7ae6f6c406f57014875307a080f48903ac42b4", "inCitations": [ "f194bddbe2d4d4ce5d2e54e7cee01a30aa3e11ce", "38876d86e5e7851181efc9ed3bf15765c0b59bb1" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "559-578", "journalVolume": "", "outCitations": [ "201b0a185dda51629d7b6fdef3b380a0beaba455", "7b10af1a2fda84913058554b18d641b6cb199661", "0252b465a94a44e626e44d8eea306183c08c2c0b", "0719b9670c8580db76547497df39caabdc20fc32", "612abb8a9bb99f981e4f287cea8f656cb2d4cd8a", "872d3684cb079b25dd2fbffcce2f3929474e7b37", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "37bca8a9c5937a36644ecbb0fa46f7eb12e41bc5", "3780af84854d528c773d93b07214081a56a8ba83", "4debbe91fa7f6f201675693c97c53c5beb882e7b", "9b5f6b2b56a698a8d56dd3d7847d0821daf18bca", "955072e0de8895083e61c499562ddaba14061bb2", "449ac3e3912c25f907b962da65a9d8a715b1e507", "48a43d2b963ad52c99873d3857bb3f9e6c662b40", "48123161a407e9ae52a2efa4bc7b5d0d066daf9f", "df1d7b884d6974eae24d9cddc76591caacfe0a96", "7207b8a6b1368636910607bb7f35d3411422b295", "0a3b3f24f650c086530104176806f464d91f3fac", "0be938d0d1825cd2afd3c9f9c8fd7b8d0b56101d", "0c246863ee7d0513cdc2cebff9b173cd4bdc8134", "2194c3460ab71f3826db00b045b2ae590c753319", "9e1eaab73e07f2f418accce1a3ac55f5a464a130", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "2a61439362e5f1f16d9759a44fbddfbf16d69a37", "3479e0c6ebfeefb054bcc1aad1f13df5beab4e0c", "2c3d491a3bea2c1016587aa8f9fee21293a84856", "269344e7eabb9820eb91031cd4bcdcb5eba39e3d", "a4c65e56af25edfa786de8978bb0221c1e3c1387", "031e1eb62214945aae260802275c0338dece87d3", "a9fc8d376be9484307c9f42b20692963e06770e0", "046a9e129fba46d78301ead661949f5290c79989", "11e71c2706db53d7b2c3764dea9bd73930e13c7f", "2ade7d2c0fbed892925a9d8ca7e0946eb28b2b3a", "05e91f470897db25e7165bb7734dca2948b9f274", "31cf33e772b5796769b1c6027263d091f63277b8", "2494382813fa0b7aa405c4cc0f1ef5be90ca2d79", "0f30462958b56c285f37876e62f1b4543c2c3c58", "0c175d5b7aad01b5f3847287b64a428f2f2455c0", "600ce036648f6d92b2a4aa7ac5e20407afba7e9b", "1ffeb2932f515e260c932eeff6dc4b001eff4de3", "047fef73865624ed2ecab04636764c58ac89adf7", "0dbb129a8297312710a141a373bb1a123621f998", "6b83d668c5006615d871329107627dc4e5cb3ea1", "57044cdc18e37f74e6185d4fe837f780ddcd669a", "4c3a84729bd09db6a90a862846bb29e937ec2ced", "143cd817835243e873f82f28367c8866f779187d", "37de6c557635ffc2ca26b9e1e7509510d233996e", "6ec1b663eaf540f68fe318ae50a13a8e812aeecb", "ab21d8eb98d797e9daa8bf658e2f808ea3841bcc", "021af3b63fbcf5d867a4b27ca161841bf129c759", "073540ca0aaf15c28f9571707dd846b746247d7b", "736768fe05e6d114f9d0d2b10ba4a04db6c5ba75", "36b81e9f54b9a46d6c29bb0a760d01f285f2949a" ], "paperAbstract": "JavaScript, like many high-level languages, relies on runtime systemswritten in low-level C and C++. For example, the Node.js runtime systemgives JavaScript code access to the underlying filesystem, networking, and I/O by implementing utility functions in C++. Since C++'s typesystem, memory model, and execution model differ significantly fromJavaScript's, JavaScript code must call these runtime functions viaintermediate binding layer code that translates type, state, and failure between the two languages. Unfortunately, binding code isboth hard to avoid and hard to get right. This paper describes several types of exploitable errors that bindingcode creates, and develops both a suite of easily-to-build static checkersto detect such errors and a backwards-compatible, low-overhead API toprevent them. We show that binding flaws are a serious security problem byusing our checkers to craft 81 proof-of-concept exploits forsecurity flaws in the binding layers of the Node.js and Chrome, runtimesystems that support hundreds of millions of users. As one practical measure of binding bug severity, we were awarded $6,000 in bounties for just two Chrome bug reports.", "pdfUrls": [ "https://cseweb.ucsd.edu/~dstefan/pubs/brown:2017:finding.pdf", "https://doi.org/10.1109/SP.2017.68", "http://ranjitjhala.github.io/static/binding-bugs-sp2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e7ae6f6c406f57014875307a080f48903ac42b4", "sources": [ "DBLP" ], "title": "Finding and Preventing Bugs in JavaScript Bindings", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "4e920e4f04deec3d45377312cd124d4c1b6440fe": { "authors": [ { "ids": [ "23155051" ], "name": "Jan Philipp Ecker" }, { "ids": [ "1697208" ], "name": "Rudolf Berrendorf" }, { "ids": [ "2587646" ], "name": "Florian Mannu\u00df" } ], "doi": "10.1007/978-3-319-64203-1_38", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_38", "entities": [ "Sparse matrix" ], "id": "4e920e4f04deec3d45377312cd124d4c1b6440fe", "inCitations": [], "journalName": "", "journalPages": "523-537", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e920e4f04deec3d45377312cd124d4c1b6440fe", "sources": [ "DBLP" ], "title": "New Efficient General Sparse Matrix Formats for Parallel SpMV Operations", "venue": "Euro-Par", "year": 2017 }, "4e99f2e275afb80e977343568c4d3f1af8ee24ba": { "authors": [ { "ids": [ "3086775" ], "name": "Masoud Saeida Ardekani" }, { "ids": [ "18612874" ], "name": "Rayman Preet Singh" }, { "ids": [ "26642757" ], "name": "Nitin Agrawal" }, { "ids": [ "1680763" ], "name": "Douglas B. Terry" }, { "ids": [ "3197683" ], "name": "Riza O. Suminto" } ], "doi": "10.1145/3135974.3135988", "doiUrl": "https://doi.org/10.1145/3135974.3135988", "entities": [ "Complex event processing", "Ethernet hub", "Failure rate", "Fault tolerance", "Home automation", "Reliability engineering", "Residential gateway", "Single point of failure" ], "id": "4e99f2e275afb80e977343568c4d3f1af8ee24ba", "inCitations": [], "journalName": "", "journalPages": "41-54", "journalVolume": "", "outCitations": [ "33bfbb042e6ad35c50b48de22a5798783d553735", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "0de106f547d644f1c2a13bf767840eb9049ff7fc", "a6690317643f1a50d510c0922bb0c473096dc6a4", "48d5c7b3dbf631dec66b51f7b7a9e168a86506bd", "352680a8eaada2f8fe26afd49b85df7a0ed32179", "e1bbd3187d0ae01a5d7e0f6b29d67aacbf5dc09b", "45998ae76d590314e6fd930a02d9d49556bc5a64", "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "20c450f099b661c5a2dff3f348773a0d1af1b09b", "49aec52ff74120061cb35c65dd899f3f7bcb2fab", "514e9179cda253e7e04be3107cb3425162e049e8", "0832f564cb62f153534e815eeeae620d3cc01c01", "0e14b04ee84972831ef0a12e350e5f9ef32c7d26", "1160e7bf54e72fa10c769c15a9d319d4798b7b1b", "b54bd28a1ec3df9f6a5e13b1f359cf0f2ee699cb", "5dd204ae6b82ccaf4a840a704bfd4753e8d48add", "5578cabaef7b5dfc88443626e74d2e04951818f7", "e847c3ec130da57328db79a7fea794b07dbccdd9", "036e006a9f2049d15c1533ac254dcfce2483a1f6", "685f1e1a88f299704591266f2ee9abe29afdd124", "a6e0e547d137327de6e808dd59a81edcad07634b", "126f4e76145299425bfb03e85d13f91108aea032", "341d0668662682e7fd4b610e0154881d268eeca6", "446cd3b0aa52ea6863b025ba4e16e4dcf194dec2", "77064d504c6aa9f6baecb27e8978139547649fba", "4ce25286205c62fffda7d685a916cf4508149245", "50b0205264b16991cd7ac5f0c141cd3b1fe0b97a", "034ef3644de78f3d76e5855bed16db00a83f14a4", "76ec30f9d6516a94a032831a60368f2cc376f664", "65776abca3a9ead1ea72702e5b2f3334c48b884d", "1f11725f7d81b5e9b97fc3a3c178aa695e786a79", "1832010db94205828c86df95df985f3848c3ca37", "16c9604d0fc53dc7f21fb31cbc7fab6bd9bdddd6", "3cb1f4c1650f7e55b78abba5a00b56a90b8e0567", "ee62379ffb236569f73538ff7621e64a7892907a", "2da7eb94c118a1afb95408cbde1f8cdd5916c0a6", "633248ac3ae1d65b948e77fa8044bcdc099ba207", "5ec542457fde76117338d96bab2a1c021e0486fc", "177dc0912254778423fa50815ed1eaf6544d0423", "69314ab4fa664bd5965c093574f2b4fc94c0d496", "deeca8e378c7aa2302455e7ae01b88fe9fa3fa05", "670bfd38f3e57389f9d68fb9fa2dba4197028358", "0e74f8e8763bd7a9c9507badaee390d449b1f8ca", "13d325384be1a20ea69cac6d09b8d4a0b7021c3b", "05ca83fe3b178e2ea0eb8caad6521c003860c9e8" ], "paperAbstract": "Rivulet is a fault-tolerant distributed platform for running smart-home applications; it can tolerate failures typical for a home environment (e.g., link losses, network partitions, sensor failures, and device crashes). In contrast to existing cloud-centric solutions, which rely exclusively on a home gateway device, Rivulet leverages redundant smart consumer appliances (e.g., TVs, Refrigerators) to spread sensing and actuation across devices local to the home, and avoids making the Smart-Home Hub a single point of failure. Rivulet ensures event delivery in the presence of link loss, network partitions and other failures in the home, to enable applications with reliable sensing in the case of sensor failures, and event processing in the presence of device crashes. In this paper, we present the design and implementation of Rivulet, and evaluate its effective handling of failures in a smart home.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135988" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4e99f2e275afb80e977343568c4d3f1af8ee24ba", "sources": [ "DBLP" ], "title": "Rivulet: a fault-tolerant platform for smart-home applications", "venue": "Middleware", "year": 2017 }, "4ebbbeab6e0f4ba9815889854441548fa414e16b": { "authors": [ { "ids": [ "10041544" ], "name": "Jagadish B. Kotra" }, { "ids": [ "35432282" ], "name": "Narges Shahidi" }, { "ids": [ "10034764" ], "name": "Zeshan A. Chishti" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" } ], "doi": "10.1145/3037697.3037724", "doiUrl": "https://doi.org/10.1145/3037697.3037724", "entities": [ "Algorithm", "Data integrity", "Dynamic random-access memory", "MAC address", "Memory refresh", "Operating system", "Refresh rate", "Scheduling (computing)", "Semiconductor consolidation", "Viz: The Computer Game" ], "id": "4ebbbeab6e0f4ba9815889854441548fa414e16b", "inCitations": [ "3efa068494a91a825b9744c1ee4b83663f363533", "ae39cff83d4850476855c06d02a8dc80ae55ad42", "7c036d5a4b79a735b279423358af4e8df6f7ec81", "884e104c13102e1353e85a6a91e41d3cff2c80f5", "ce14ff3b9a139629e699882ca26434a29b5c07b3", "53b402418835e6f34b8a9e5ea51440bbdd02581e" ], "journalName": "", "journalPages": "723-736", "journalVolume": "", "outCitations": [ "703c74b035ba667afeaa0d4287641bc87d2ea12f", "f69cf820714d69406bba646ca3e9ace7c444da0e", "1e6c27518d3295a34f1791beff5ac1c9537d14c3", "0d929e2e2c5a15a9c51366395968819dfd8159ec", "a65595f32e7c299cb529ad62cf376df6b3be2c51", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "85398d5f19157c91bf00da3d36210e72d57887e4", "40138cbd57a4632d6267cff4c91b55e7376a6693", "274d7d0415ad8fc787f15b244339f8d0b37e6956", "45ac0e85b3ff21bc12a7147df167be38f0d24b9c", "115713b2175047e746c8e7cd22ee1b8255866d0f", "44077076ba79033a3a73713f2041ecf224a3c359", "549cca620961e5093e315a4b0f9e670da3ff258f", "1401df37cc3fc78f26570d601fd123f17646b2d2", "8fb808a890a099896e34851179daba15659df11a", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "4cb9325b008e1551404c526d5ba0b7b3b559f4ab", "1641068a497e6c810e2bc5446c68c4728bbd5ae0", "cc03093bf08ff6b55364e220d9dbf4e239228184", "960a0a263e07dd14aac50c0439bfdf60c3fe82db", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "61ea230d0e757ff46d3a381e79691bd54b92a503", "03eaf3a6b6db01bdb749e8c3a097a0198c61b976", "53e11fc15261cc5e3a47bfda9eeb4c3355053b6d", "5293158ac8e5e2b89b96b86a064aeb0086b7dac1", "2efbc4631ff97be7807043735d62ec57f0201a6d" ], "paperAbstract": "DRAM cells need periodic refresh to maintain data integrity. With high capacity DRAMs, DRAM refresh poses a significant performance bottleneck as the number of rows to be refreshed (and hence the refresh cycle time, tRFC) with each refresh command increases. Modern day DRAMs perform refresh at a rank-level, while LPDDRs used in mobile environments support refresh at a per-bank level. Rank-level refresh degrades the performance significantly since none of the banks in a rank can serve the on-demand requests. Per-bank refresh alleviates some of the performance bottlenecks as the other banks in a rank are available for on-demand requests. Typical DRAM retention time is in the order several of milliseconds, viz, 64msec for environments operating in temperatures below 85 deg C and 32msec for environments operating above 85 deg C.\n With systems moving towards increased consolidation (ex: virtualized environments), DRAM refresh becomes a significant bottleneck as it reduces the available overall DRAM bandwidth per task. In this work, we propose a hardware-software co-design to mitigate DRAM refresh overheads by exposing the hardware address mapping and DRAM refresh schedule to the Operating System. We propose a novel DRAM refresh-aware process scheduling algorithm in OS which schedules applications on cores such that none of the on-demand requests from the application are stalled by refreshes. Extensive evaluation of our proposed co-design on multi-programmed SPEC CPU2006 workloads show significant performance improvement compared to the previously proposed hardware only approaches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037724" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ebbbeab6e0f4ba9815889854441548fa414e16b", "sources": [ "DBLP" ], "title": "Hardware-Software Co-design to Mitigate DRAM Refresh Overheads: A Case for Refresh-Aware Process Scheduling", "venue": "ASPLOS", "year": 2017 }, "4ecd935aa72d4e986fed4efebbc4fb50bbf542df": { "authors": [ { "ids": [ "9833675" ], "name": "Michael M. Swift" } ], "doi": "10.1145/3102980.3102982", "doiUrl": "https://doi.org/10.1145/3102980.3102982", "entities": [ "Computer memory", "Failure rate", "Memory management", "Operating system", "Persistence (computer science)", "Runtime system", "Systems design", "Terabyte", "Time complexity" ], "id": "4ecd935aa72d4e986fed4efebbc4fb50bbf542df", "inCitations": [], "journalName": "", "journalPages": "7-11", "journalVolume": "", "outCitations": [ "15f0aeddfe3f8d2a62793318cef48e203ab3b037", "d953275032b7a0da42d555130178f718e6ec0207", "ba7458ac1438ea2102de95ec2f777e5beaa01c4b", "0c96b3ac2e720448054f1bcebdfd52ee341eac57", "866bce77ca5201d182c0c43090eb75bf126efba6", "40c5050e470fa0890e85487e4679197e07a91c09", "04e8d64b569b3d5628cfdf5ad16ba0b933845e2e", "400ae82ab2fc2c814033c65854229ecefbddbf67", "60f9d8874d8679b94896160bd3a8bf4b02d8b883", "ae041f8e6228f0ccd8b01ffdeba150e63635c2c4", "19ffc4f5129ed9d39f498f4eb901024c514263c7", "01a7c93e6b5d65b9f8e9b9db8b556964dcf9bf1f", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "caacd536fa218ef5218021506ebc041e3f460064", "2ee01ab9aca4163d391bd29c2123d9be44b0e986", "942c1ae472bcba8c133fad502e3aaf894256717a", "dc2e2b794a784782d7d9860f1358aa107f71c1bf" ], "paperAbstract": "Since the dawn of computing, memory capacity has been a primary limitation in system design. Forthcoming memory technology such as Intel and Micron's 3D XPoint memory and other technologies may provide far larger memory capacity than ever before. Furthermore, these new memory technologies are inherently persistent and save data across system crashes or power failures.\n We conjecture that current operating systems are ill-equipped for an environment where there is ample memory. For example, operating systems do substantial work for every page allocated, which adds unnecessary overhead when dealing with terabytes of memory.\n We suggest that now is the time for a complete rethinking of memory management for both operating systems and language runtimes considering excess memory capacity. We propose a new guiding principle: Order(1) operation, so that memory operations have low constant time independent of size. We describe a concrete proposal of this principle with the idea of file-only memory, in which most dynamic memory allocation is managed with file-system mechanisms rather than common virtual memory mechanisms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102982" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ecd935aa72d4e986fed4efebbc4fb50bbf542df", "sources": [ "DBLP" ], "title": "Towards O(1) Memory", "venue": "HotOS", "year": 2017 }, "4ee595e32c0e2f3df1b52a0f0aecd21b9cd266af": { "authors": [ { "ids": [ "8145575" ], "name": "Sona Ghahremani" }, { "ids": [ "1784713" ], "name": "Holger Giese" }, { "ids": [ "40325142" ], "name": "Thomas Vogel" } ], "doi": "10.1109/ICAC.2017.35", "doiUrl": "https://doi.org/10.1109/ICAC.2017.35", "entities": [ "Baseline (configuration management)", "Computation", "Experiment", "Logic programming", "Mathematical optimization", "Optimal control", "Program optimization", "Scalability", "Self-assembly", "Software system" ], "id": "4ee595e32c0e2f3df1b52a0f0aecd21b9cd266af", "inCitations": [ "ae284d18ca2c9f65c6ecce4b8a6d47587d0fb67d" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "59-68", "journalVolume": "", "outCitations": [ "19feac00388f197872fc9f1f4f0cddf500bcd296", "dc044cfa81344cd8325fb5ce571bb2b3926d3391", "06b54529b6a28a025144b70bdbab4cf0ce98bc99", "318337f743e349c3afe4cbe2ec7ef19f3a301ab8", "342b66e1f2a7d8b94c7940dd1e053a67cdb32909", "035697d0ab0022f9fd389f46ffa988f290cf6bbe", "386c6b485e5a88a011a026ff3c60337c9d8de1f2", "212679723922956ca66bf65cd676973647463b0e", "2fdcdb0d649ec9c601a6fba81f32547135d19f52", "1d737920e815b2eed4ff8f90d0c615c269fab3cf", "38e73338b5b913ee3b85d2fdcfdfce48a69da2de", "58e9dda22b94500cb5e3b77b77031d3dfcf1ea99", "224288f94cb68d7a9bc621a3173db0cce8c5f265", "1e4f863151d21660cc408a41135a204a246ed77b", "45d7eeb5645790d6899c123a07ffa6738e3c9fd7", "09e2f1dcc308494f8c1c14e1ec5b5a4256cf6643", "725b58aa6b81490b9db8e9ed2d4a72c1d0fb366f", "6c5f925b0c881a930ac1a5411ff30eee56fd94fe", "26ebd2585203891c65facaf46d924a7b298a249a", "0f5aafa8f917b2f76dee9ae1c50c946fbb082494", "3a4689375c902b64531f6bd25b33ac81de352519", "8222cdba2b3265832d880f8ad717c86d637f6525", "1e2fb8df9e973d34c090a6e84976fcb0f5e0b2a1", "a17d32dca875566c7bedadb41b885524c9ae142e", "57170167ad8c8cdad9289b9cc632ae441ca3bc03", "7eefb314b01284f67a23f7b967ad48d449f6dab0", "ce440edac57b112a97f675994389b4d15f17ba3f" ], "paperAbstract": "Self-adaptation can be realized in various ways. Rule-based approaches prescribe the adaptation to be executed if the system or environment satisfy certain conditions and result in scalable solutions, however, with often only satisfying adaptation decisions. In contrast, utility-driven approaches determine optimal adaptation decisions by using an often costly optimization step, which typically does not scale well for larger problems. We propose a rule-based and utility-driven approach that achieves the beneficial properties of each of these directions such that the adaptation decisions are optimal while the computation remains scalable since an expensive optimization step can be avoided. The approach can be used for the architecture-based self-healing of large software systems. We define the utility for large dynamic architectures of such systems based on patterns capturing issues the self-healing must address and we use patternbased adaptation rules to resolve the issues. Defining the utility as well as the adaptation rules pattern-based allows us to compute the impact of each rule application on the overall utility and to realize an incremental and efficient utility-driven self-healing. We demonstrate the efficiency and optimality of our scheme in comparative experiments with a static rule-based scheme as a baseline and a utility-driven approach using a constraint solver.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ee595e32c0e2f3df1b52a0f0aecd21b9cd266af", "sources": [ "DBLP" ], "title": "Efficient Utility-Driven Self-Healing Employing Adaptation Rules for Large Dynamic Architectures", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "4ef03c46774c137b4d94f94f996bc994f87ba46e": { "authors": [ { "ids": [ "35652254" ], "name": "Lumpsum Tongsinoot" }, { "ids": [ "2247306" ], "name": "Veera Muangsin" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.16", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.16", "entities": [ "Electron mobility", "Internet access", "Mobile phone", "Usage data" ], "id": "4ef03c46774c137b4d94f94f996bc994f87ba46e", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "123-129", "journalVolume": "", "outCitations": [ "1679ae9c33ea18a95ed31ddeab068137f4b87d6d", "031bc61d0f6068da01235364a43e9fb1ca7f9866", "01161534a1226d4fa1fdb3c915d8deb210d7ce73", "5b5332e79aefa3b913d42a434b8ddb09b31b5b2e", "be7faab2b69d9cbf4d0eaa5e67c9943ccce8b0eb", "5100694c0919bf1268f776f8f89c18853f0e68b3", "3764717e9e2ed08d2a9f8d096adf2730cfba3902", "0ef124a8528d45c5182c381eda41b33ada7b1372", "682fe25f48330429dbedb6053e92775cbfee8d69", "a2fe06e52d14494ad4ccf6601af656defe8d931c" ], "paperAbstract": "Recently, mobile phone call detail records (CDR) have been used to study mobility patterns in cities. Since home and workplace are the most important places in people's lives and define the structure and activity patterns of the city, identifying home and work locations and home-work commuting patterns are of much interest. However, due to decreasing usage of voice calls and SMS, and low usage while people staying at home, identifying home and work locations are getting more difficult. In this paper, we develop a method to exploit daily-aggregated internet usage data (G-CDR) in addition to CDR for identifying work locations. We also develop a method for identifying home locations based on detecting sleeping time. This method can significantly increase home detection success rate and accuracy. From the identified home and work locations, we have explored the population distribution and commuting patterns of people in Bangkok.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ef03c46774c137b4d94f94f996bc994f87ba46e", "sources": [ "DBLP" ], "title": "Exploring Home and Work Locations in a City from Mobile Phone Data", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "4ef4f86540a462bf7e1e7171114d66937db365c4": { "authors": [ { "ids": [ "1727558" ], "name": "Rachid Guerraoui" }, { "ids": [ "1734704" ], "name": "Jingjing Wang" } ], "doi": "10.1145/3034786.3034799", "doiUrl": "https://doi.org/10.1145/3034786.3034799", "entities": [ "Atomic commit", "Computational complexity theory", "Context-free grammar", "Database transaction", "Decision problem", "Distributed database", "Distributed transaction", "Failure rate", "Non-blocking algorithm", "Relational database management system" ], "id": "4ef4f86540a462bf7e1e7171114d66937db365c4", "inCitations": [ "d1e6417c55d5e29d64ceffe50fc49a296a5a8b30" ], "journalName": "", "journalPages": "107-122", "journalVolume": "", "outCitations": [ "1eb6ffee1f322412d9d76190fc76b3dcc6546cee", "16508d683f38d6fb4295d6cad725a97970b71eca", "552369f8cbf567091c21529663e8f9b51c2951ce", "2254335dccbed71d322cea94ce71938479d3fede", "1e32492f456bcf58d07b1658825733dbfb9d816c", "90d83c96cd08c8cc3c9f1b68739bc6c17d159ae2", "206b20f225fc655dfac733b6f0bd8077ed86215e", "57efc2b9ba2a725af1d66cc43c472d0314190051", "06176761f65d58b9725ccb58c1cb9f223ab6b65a", "50e5756be30c85d07f87fce922e71254a51ba59e", "29a05cde1994548e2e9487822248c679626c6241", "bd34f5e62cb313f33dff3bad6bb0e96394198361", "055f337f7ca7cf13884faaa855a879849d132b45", "05a618847e4f08e5bca29dff732757779722b2e0", "861fbac82ae5ec0ea654d0d95ce4d48de62419ea", "d2986fa1f6ba6f2cc4695d4159203962f3eaa876", "23681ff6c977b387d892b90018b2b463a98b9da1", "f77242c8be4b7a8a4429740f5ca571ce38b89cd7", "0f657990bb5c5c664aa75d014e81fb7256b610c5", "068e59b88a1230d709d99c83a45d3a5b91260810", "44edea16c2cf5e6fbeb9a603f9f788f04e1d7eea", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "1d8166dd9851096555dcb2ffc42c3db010803893", "0c00982450bcb08ca5fbb74e14e3a106d51f97c4", "333eca605fb09da6c7a70ca50cdd9991a10cbbdd", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "4e63eed9e709b6c8e20a4a68300883898c7d8f37", "06d8aa948ed0ff654f772439c00711dfe7fa3d1a", "77d7fbeb26b860e00bd36672f756d97a174fd425", "2fb179c0b0ad6cf37f6272db05ccfad708a82f2d", "afb815c19d5245de4877c96d63ba315eef40d839", "308d1c063329de15559c133f95957b51dba8aae2", "617eddaca59f73b4832be716fcdb4d2863360022", "3eac7a73ea65e6973a536de6e119ebb9509c609a", "f191b0fbc2d29726b1f410fff7e7c1f8c0870ce9", "7322ad3a29a61bf7e1dc489f04037d3c15f1698e", "67223547eec55336d70039fb3a75833a95bafb15", "039f09d49bc408db9e0e8429e6bd92be49c5f72e", "8580c1ea42db7644cd81fba496a153feabb4530f", "1998ddf989eb4bb7ba8fe1e678c26f2029e9911f", "3dd4f937b4c9922a5c0c4027519c949ebbc7e98b", "07b66a85083291d2b702a3bcc30f32854d4a6d29", "5d06489503b6f791aa56d2d7942359c2592e44b0", "00e3756119a91432622f6982b59ecd24a1340fbe", "9748241beb02ef1e2d0e6dc877c04b354033a838", "85e07fbdca4dec3c1ff93d275f5c317908f2859f" ], "paperAbstract": "The atomic commit problem lies at the heart of distributed database systems. The problem consists for a set of processes (database nodes) to agree on whether to commit or abort a transaction (agreement property). The commit decision can only be taken if all processes are initially willing to commit the transaction, and this decision must be taken if all processes are willing to commit and there is no failure (validity property). An atomic commit protocol is said to be non-blocking if every correct process (a database node that does not fail) eventually reaches a decision (commit or abort) even if there are failures elsewhere in the distributed database system (termination property).\n Surprisingly, despite the importance of the atomic commit problem, little is known about its complexity. In this paper, we present, for the first time, a systematic study on the time and message complexity of the problem. We measure complexity in the executions that are considered the most frequent in practice, i.e., failure-free, with all processes willing to commit. In other words, we measure how fast a transaction can commit. Through our systematic study, we close many open questions like the complexity of synchronous non-blocking atomic commit. We also present optimal protocols which may be of independent interest. In particular, we present an effective protocol which solves what we call indulgent atomic commit that tolerates practical distributed database systems which are synchronous ``most of the time''.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034799", "https://infoscience.epfl.ch/record/225579/files/how_fast_can_a_distributed_tx_commit.pdf", "https://icservices.epfl.ch/edic/down.asp?ID=2748&pid=2639" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ef4f86540a462bf7e1e7171114d66937db365c4", "sources": [ "DBLP" ], "title": "How Fast can a Distributed Transaction Commit?", "venue": "PODS", "year": 2017 }, "4f2ad0a6edbe386ea3d5b8e79e3aab1106732ddd": { "authors": [ { "ids": [ "8263449" ], "name": "Tommaso Frassetto" }, { "ids": [ "2213320" ], "name": "David Gens" }, { "ids": [ "2279415" ], "name": "Christopher Liebchen" }, { "ids": [ "8415280" ], "name": "Ahmad-Reza Sadeghi" } ], "doi": "10.1145/3133956.3134037", "doiUrl": "https://doi.org/10.1145/3133956.3134037", "entities": [ "Arbitrary code execution", "Benchmark (computing)", "Code injection", "Code reuse", "Compiler", "Computer security", "Firefox", "Just-in-time compilation", "Key (cryptography)", "Memory corruption", "Secure environment" ], "id": "4f2ad0a6edbe386ea3d5b8e79e3aab1106732ddd", "inCitations": [], "journalName": "", "journalPages": "2405-2419", "journalVolume": "", "outCitations": [ "6a8f65381a627a2db6c756a7185d9106f0acefec", "0719b9670c8580db76547497df39caabdc20fc32", "2947959aa2cfc45719fac7a54812614d1fa8707f", "116eaac2e498bc2c9bea10ea838309dcf143d764", "5aa4d6f28c803e5bd05d39794e12c759a60aa6a2", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "7e61bd6abdcb68ed9b3871311cabe09753de88ff", "dca2b521c17a53a82fbaf241680cff0c813dec3a", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "9b2585f7248c8b5a22e9c816506e01060213ca85", "3875d1d1b623af0d640528efc9e581bc91338e35", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "f479c0578156255ce176e75bb13051fbb0f25b98", "ab2177167b09f9be086d44188b845fc9b5458d66", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "0d0154d589205cc519607fbb142ecefe0f96aef0", "1bb2363ddfec8e12f5408ce6b1538d74570bd865", "67b752aaef2133ec0cda47b2a2c1856f0f2f266f", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "03f827395a17beb941241dbd72322705bdf79791", "67b086caacc543b7d30b2f006f77a315bc9572e0", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "3fa27974cade47e98993b98798f73594b902583b", "71da01051534d46fb3becd0a7506b64db56efc7a", "0e039df712774fcea67f214d9b5780c1dc250747", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "1798b9bc347ca826724b6d80766200ebaad8dfb0", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "26de23713ac23ed7a952cf56faa8bd23f8fd6575", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "569393ee0bbba78af3241e544c347b2e98a1275d", "e40f938b0b4339037a66864438a68ef8057d202f", "57f891b7213282bd58dc61230919fb531b0e4fde", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "7b2cf50a197888a3eb273d0ef056e93c581aa272", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "480d4a756381f7aec1ffda84a3d7f1ef2695252a" ], "paperAbstract": "Memory-corruption vulnerabilities pose a serious threat to modern computer security. Attackers exploit these vulnerabilities to manipulate code and data of vulnerable applications to generate malicious behavior by means of code-injection and code-reuse attacks. Researchers already demonstrated the power of data-only attacks by disclosing secret data such as cryptographic keys in the past. A large body of literature has investigated defenses against code-injection, code-reuse, and data-only attacks. Unfortunately, most of these defenses are tailored towards statically generated code and their adaption to dynamic code comes with the price of security or performance penalties. However, many common applications, like browsers and document viewers, embed just-in-time compilers to generate dynamic code. The contribution of this paper is twofold: first, we propose a generic data-only attack against JIT compilers, dubbed DOJITA. In contrast to previous data-only attacks that aimed at disclosing secret data, DOJITA enables arbitrary code-execution. Second, we propose JITGuard, a novel defense to mitigate code-injection, code-reuse, and data-only attacks against just-in-time compilers (including DOJITA). JITGuard utilizes Intel's Software Guard Extensions (SGX) to provide a secure environment for emitting the dynamic code to a secret region, which is only known to the JIT compiler, and hence, inaccessible to the attacker. Our proposal is the first solution leveraging SGX to protect the security critical JIT compiler operations, and tackles a number of difficult challenges. As proof of concept we implemented JITGuard for Firefox's JIT compiler SpiderMonkey. Our evaluation shows reasonable overhead of 9.8% for common benchmarks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134037", "https://acmccs.github.io/papers/p2405-frassettoA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f2ad0a6edbe386ea3d5b8e79e3aab1106732ddd", "sources": [ "DBLP" ], "title": "JITGuard: Hardening Just-in-time Compilers with SGX", "venue": "CCS", "year": 2017 }, "4f31172aa290766d0cf453d5796186aa5749c60d": { "authors": [ { "ids": [ "3032988" ], "name": "Ariful Azad" }, { "ids": [ "2955257" ], "name": "Mathias Jacquelin" }, { "ids": [ "2238795" ], "name": "Aydin Bulu\u00e7" }, { "ids": [ "40137540" ], "name": "Esmond G. Ng" } ], "doi": "10.1109/IPDPS.2017.85", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.85", "entities": [ "Algorithm", "Cray XC30", "Cuthill\u2013McKee algorithm", "Data structure", "Distributed memory", "Locality of reference", "Nested dissection", "Parallel computing", "Reliability-centered maintenance", "Scalability", "Sparse matrix", "Speedup", "Supercomputer" ], "id": "4f31172aa290766d0cf453d5796186aa5749c60d", "inCitations": [ "d845d46d21df1009c0146ebdd341de9056834e6d" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "22-31", "journalVolume": "", "outCitations": [ "7ff0fa0958783397fa8db7125205bd6ee65b4c01", "e20429fddd00ab8a3679b8cc3e82108ff128295d", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "4fa429b8b44bf3c67d2b4ebf6625c9357a0c8e3d", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "a2ea514fe0486cfc46a0b50f1f8f187c43027d59", "5cfeda94aaa59702e57647045de1488b8258abef", "64a513b60ad89c4eee81a186e53c8d5c8773acac", "ba75e4f7f6356d0c7a98ae813f085ce1a7a0aeec", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "1e27b9b447cebd5047050e39bb9246fa6364b760", "4f3caa5573b4c1ebef7c3ee6b9f7643e689c858e", "15942e821dbca09e250a3831aa02c161d181030c", "3cf464d61246103b12c64d4f790d8e40c639ffb8", "c127da5c51a766bfeafdd02827b3225bf9f50dd4", "c04ff62fd8366fa57fb9a039a52e590470066f43" ], "paperAbstract": "Ordering vertices of a graph is key to minimize fill-in and data structure size in sparse direct solvers, maximize locality in iterative solvers, and improve performance in graph algorithms. Except for naturally parallelizable ordering methods such as nested dissection, many important ordering methods have not been efficiently mapped to distributed-memory architectures. In this paper, we present the first-ever distributed-memory implementation of the reverse Cuthill-McKee (RCM) algorithm for reducing the profile of a sparse matrix. Our parallelization uses a two-dimensional sparse matrix decomposition. We achieve high performance by decomposing the problem into a small number of primitives and utilizing optimized implementations of these primitives. Our implementation attains up to 38x speedup on matrices from various applications on 1024 cores of a Cray XC30 supercomputer and shows strong scaling up to 4096 cores for larger matrices.", "pdfUrls": [ "http://www.eecs.wsu.edu/~assefaw/CSC16/abstracts/azad-CSC16_paper_36.pdf", "http://www.eecs.wsu.edu/~assefaw/CSC16/slides/ariful_azad_CSC16.pdf", "https://arxiv.org/pdf/1610.08128v1.pdf", "https://doi.org/10.1109/IPDPS.2017.85", "https://crd.lbl.gov/assets/Uploads/RCM-ipdps17.pdf", "http://crd.lbl.gov/assets/Uploads/RCM-ipdps17.pdf", "http://arxiv.org/abs/1610.08128" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f31172aa290766d0cf453d5796186aa5749c60d", "sources": [ "DBLP" ], "title": "The Reverse Cuthill-McKee Algorithm in Distributed-Memory", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4f4590962bde0c2050122f91e5978271bb24d556": { "authors": [ { "ids": [ "3298489" ], "name": "Xinyang Ge" }, { "ids": [ "5998467" ], "name": "Weidong Cui" }, { "ids": [ "1699210" ], "name": "Trent Jaeger" } ], "doi": "10.1145/3037697.3037716", "doiUrl": "https://doi.org/10.1145/3037697.3037716", "entities": [ "Binary file", "Code reuse", "Control flow", "Control-flow integrity", "Debugging", "Experiment", "Firefox", "Foreach loop", "Linux", "Linux", "Parallel computing", "Shadow stack", "Signal trace" ], "id": "4f4590962bde0c2050122f91e5978271bb24d556", "inCitations": [ "6ddb38aa0a7f8cf55f6874aa81797514c361ea37", "325390173841d52f7a2791ba6b0e32ad80bf2630", "44a86aa5b47e158619d2cb815b6dc99201e8f099", "65ea39f3cb19e446d708b639060460c580a328e4", "238cd2f2a8cdbd5fc696ee38a695dc6b3ee0537e", "3a8ebb3d09620e45239a177752faf2c73d202fb2" ], "journalName": "", "journalPages": "585-598", "journalVolume": "", "outCitations": [ "255bdcb05805c97d973081b59bc61c649263ceae", "0fc7a3a6f861dd7be3aa353f8d297b2515be8d55", "6a8f65381a627a2db6c756a7185d9106f0acefec", "23e8236644775fd5d8ff5536ba06b960e19f904b", "9b2585f7248c8b5a22e9c816506e01060213ca85", "6458f4c0c029b038ebd1d7f61005a010ac250892", "4cd63e0701177f04e377fa9f0857c5b0fa10b07e", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "635f3a25ca8626072d1eedc6aebddcb429de4b4e", "0e039df712774fcea67f214d9b5780c1dc250747", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "7521513abd7acae00b3fd89001da47019606cf38", "888379af9776b296a2c6e5501fc73cc60b43a830", "2a341fcac00366455b09fcfe640d6e95cd866b5a", "e89f097651f2bf25ceac9c644c754f8c94a42240", "2caf47ac0035c27019965e04b0ba2711f20d59a9", "6a5b52bbe5be23b73f3874c448de17163e09bd16", "0db59f09437b7b90376f011f5150ed976ac66231", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "ab2177167b09f9be086d44188b845fc9b5458d66", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "cb0da1ed189087c9ba716cc5c99c75b52430ec06", "22050b3ee9c69c64dc796358c7f0ba247d4adce3", "704e2027ecdaa9561b75a854b585336c16cea89f", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "71da01051534d46fb3becd0a7506b64db56efc7a", "67b086caacc543b7d30b2f006f77a315bc9572e0", "61504aa2d0cde80429f1c3a7809d0e084e184172", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "4b41bb221ccae289bd66dfc1210f36cc172350c5", "19876c4482cb45f6d1afe96ddf17b53418251316", "686150e2179840ed40a0166cba6c5d507f3aa49c", "30e76f32c323adb0ff340760380fe5a08505b641", "1fa355cabcaa6650603098c41a3a439fbed718a1", "642bdae15a4a3f2e580e49f9726e2eee675d5ebf", "0988a425689f6f3700e797f4a2c18f73692573c3", "01b5b648af61ddb382da638a299fae2315b25192", "5620cbcd700e2413c39213b142aab0c69b31b1d1", "b4b92eb555dd9c672f894216c5d50bf6164df78b", "2035c8f33909ac206c4d1a3bdee611577fb2c5d1", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "13e83680f0bc1ffb29b620945357ec832726ae90", "03f827395a17beb941241dbd72322705bdf79791" ], "paperAbstract": "Researchers are actively exploring techniques to enforce control-flow integrity (CFI), which restricts program execution to a predefined set of targets for each indirect control transfer to prevent code-reuse attacks. While hardware-assisted CFI enforcement may have the potential for advantages in performance and flexibility over software instrumentation, current hardware-assisted defenses are either incomplete (i.e., do not enforce all control transfers) or less efficient in comparison. We find that the recent introduction of hardware features to log complete control-flow traces, such as Intel Processor Trace (PT), provides an opportunity to explore how efficient and flexible a hardware-assisted CFI enforcement system may become. While Intel PT was designed to aid in offline debugging and failure diagnosis, we explore its effectiveness for online CFI enforcement over unmodified binaries by designing a parallelized method for enforcing various types of CFI policies. We have implemented a prototype called GRIFFIN in the Linux 4.2 kernel that enables complete CFI enforcement over a variety of software, including the Firefox browser and its jitted code. Our experiments show that GRIFFIN can enforce fine-grained CFI policies with shadow stack as recommended by researchers at a performance that is comparable to software-only instrumentation techniques. In addition, we find that alternative logging approaches yield significant performance improvements for trace processing, identifying opportunities for further hardware assistance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037716", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/01/griffin-asplos17.pdf", "http://www.cse.psu.edu/~trj1/papers/asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f4590962bde0c2050122f91e5978271bb24d556", "sources": [ "DBLP" ], "title": "GRIFFIN: Guarding Control Flows Using Intel Processor Trace", "venue": "ASPLOS", "year": 2017 }, "4f4f814a1cbeebf835464868b7d8cfe394b2632b": { "authors": [ { "ids": [ "1729576" ], "name": "Jie Wang" }, { "ids": [ "3190420" ], "name": "Xinfeng Xie" }, { "ids": [ "2259796" ], "name": "Jason Cong" } ], "doi": "10.1109/IPDPS.2017.79", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.79", "entities": [ "Algorithm", "Application domain", "CUDA", "Computation", "Graphics processing unit", "Hidden Markov model", "Inter-process communication", "Kepler (microarchitecture)", "Mathematical optimization", "Program optimization", "Programmer", "Sequence alignment", "Shared memory", "Smith\u2013Waterman algorithm", "String searching algorithm", "Whole genome sequencing" ], "id": "4f4f814a1cbeebf835464868b7d8cfe394b2632b", "inCitations": [ "289f1567dafdadb4209e5302e31d9364e1fab46e" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "72-81", "journalVolume": "", "outCitations": [ "2ed5d6b35f8971fb9d7434a2683922c3bfcc058e", "755e4ad5468747b31b9d6994885b17ad957dc9d7", "034b5cb0eb2506096ae6f30790834b4af0da9158", "4426da11616bc819b90f8e2413e6850c69cd02a6", "d1eabca6c7e931132e74148f5b0f58e2f8e702e2", "40c5441aad96b366996e6af163ca9473a19bb9ad", "69f1bdb0d46a1597eff3e8a4b30b1a87b0e58c06", "950bca8374bf36421957b416e4f58425e9d43095", "f455bf8a9ab7ef837dc97d2fe55b92fbc81f04b9", "0c7768ed7abec93bd9db840b64dae520b3c368ab", "90dc9aa407a46c6c47dc25f21c44fb1d46f21db3" ], "paperAbstract": "Data movement is increasingly becoming the bottleneck of both performance and energy efficiency in modern computation. Until recently, it was the case that there is limited freedom for communication optimization on GPUs, as conventional GPUs only provide two types of methods for inter-thread communication: using shared memory or global memory. However, a new warp shuffle instruction has been introduced since the Kepler architecture on Nvidia GPUs, which enables threads within the same warp to directly exchange data in registers. This brought new performance optimization opportunities for algorithms with intensive inter-thread communication. In this work, we deploy register shuffle in the application domain of sequence alignment (or similarly, string matching), and conduct a quantitative analysis of the opportunities and limitations of using register shuffle. We select two sequence alignment algorithms, Smith-Waterman (SW) and Pairwise-Hidden-Markov-Model (PairHMM), from the widely used Genome Analysis Toolkit (GATK) as case studies. Compared to implementations using shared memory, we obtain a significant speed-up of 1.2× and 2.1× by using shuffle instructions for SW and PairHMM. Furthermore, we develop a performance model for analyzing the kernel performance based on the measured shuffle latency from a suite of microbenchmarks. Our model provides valuable insights for CUDA programmers into how to best use shuffle instructions for performance optimization.", "pdfUrls": [ "http://vast.cs.ucla.edu/sites/default/files/publications/ipdps-submission.pdf", "https://doi.org/10.1109/IPDPS.2017.79" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f4f814a1cbeebf835464868b7d8cfe394b2632b", "sources": [ "DBLP" ], "title": "Communication Optimization on GPU: A Case Study of Sequence Alignment Algorithms", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4f53b1fbf0b21c75c11dc77c98a2ec08815227a0": { "authors": [ { "ids": [ "2784868" ], "name": "Florian Lautenschlager" }, { "ids": [ "1703535" ], "name": "Michael Philippsen" }, { "ids": [ "2490293" ], "name": "Andreas Kumlehn" }, { "ids": [ "1771962" ], "name": "Josef Adersberger" } ], "doi": "", "doiUrl": "", "entities": [ "Anomaly detection", "Chunking (computing)", "Data model", "Data retrieval", "Database", "Distributed computing", "Generic data model", "Instability", "Memory footprint", "Software system", "Storage efficiency", "Time series" ], "id": "4f53b1fbf0b21c75c11dc77c98a2ec08815227a0", "inCitations": [ "27f503611a6020a2c6b196042ec63be0c79306ba" ], "journalName": "", "journalPages": "229-242", "journalVolume": "", "outCitations": [ "0479b7e8c433e3f18a2b6c5dedd328f0229c1566", "5f3f9223c5c9f896be099bc177929febad508407", "5b042a76c6e61d411f68b8193ec67ad8dd1abc5e", "a11b243c571ade72c1be5bbb4105b00388174bd6", "6632e05bf8efe9498f622c7af82b4ac0ac1db23d", "6d57e29ddb68c91256e59d82e8afe321152aa357", "31b963f48ba38f1f9c5cc240f43331b07229861e", "05a20cde15e172fc82f32774dd0cf4fe5827cad2", "47fcd425e6e2a2c8ca059acf5c151a9da115c14c", "8e0ff4b8bbeac8f301e00494a39bd1b4a199fba1", "acbf00fc73320c3e4054556013e7a3fcae7f0675", "03b84b789cb342587db621c7e88eeb005cc21578", "1ab98540293251d02cf2b1db202d3ad9e4304a78", "72e5e8d22b6e278673f0d9c912c06f666ac01d28", "8fe636695d09b7c05369da9cb338c99006213c61", "c66689fafa0ce5d6d85ac8b361068de31c623516", "689c64a76a8bfe1cdf7cb8df31f523980555ef82", "94ff90079b121da750b9257423fc9b4b6fc6ebbd", "18869d8964793da4837b5b38d4aec5854d37f08c", "2ef606258486d6c32fd0b9ca54244273c21331b9" ], "paperAbstract": "Anomalies in the runtime behavior of software systems, especially in distributed systems, are inevitable, expensive, and hard to locate. To detect and correct such anomalies (like instability due to a growing memory consumption, failure due to load spikes, etc.) one has to automatically collect, store, and analyze the operational data of the runtime behavior, often represented as time series. There are efficient means both to collect and analyze the runtime behavior. But traditional time series databases do not yet focus on the specific needs of anomaly detection (generic data model, specific built-in functions, storage efficiency, and fast query execution). The paper presents Chronix, a domain specific time series database targeted at anomaly detection in operational data. Chronix uses an ideal compression and chunking of the time series data, a methodology for commissioning Chronix\u2019 parameters to a sweet spot, a way of enhancing the data with attributes, an expandable set of analysis functions, and other techniques to achieve both faster query times and a significantly smaller memory footprint. On benchmarks Chronix saves 20%\u201368% of the space that other time series databases need to store the data and saves 80%\u201392% of the data retrieval time and 73%\u201397% of the runtime of analyzing functions.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/fast17/fast17-lautenschlager.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_lautenschlager.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_lautenschlager.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/lautenschlager", "http://www.usenix.org./system/files/conference/fast17/fast17-lautenschlager.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/4f53/b1fbf0b21c75c11dc77c98a2ec08815227a0.pdf", "s2Url": "https://semanticscholar.org/paper/4f53b1fbf0b21c75c11dc77c98a2ec08815227a0", "sources": [ "DBLP" ], "title": "Chronix: Long Term Storage and Retrieval Technology for Anomaly Detection in Operational Data", "venue": "FAST", "year": 2017 }, "4f5782ab1e3dc4bfb31e81d72219310b61337049": { "authors": [ { "ids": [ "1787506" ], "name": "Wei Xie" }, { "ids": [ "3519489" ], "name": "Yong Chen" } ], "doi": "10.1109/IPDPS.2017.88", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.88", "entities": [ "Algorithm", "Clustered file system", "Consistent hashing", "Data center", "Elasticity (cloud computing)", "Scalability", "Server (computing)", "Server farm" ], "id": "4f5782ab1e3dc4bfb31e81d72219310b61337049", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "876-885", "journalVolume": "", "outCitations": [ "1d99b7749a9311d2db24a3d84728e444eff23e4b", "52d81096f46be0e75f85e0b7eeda65640c281630", "178ef44fe69c2adfcd4a31f99bb2b9a8975e9cd5", "438c51040ee6ccf9198e52d105c47e75d615b29c", "110b17aede6e6a4fc8aeec50a54fe4dddc2c4779", "7a2274412948765bf872b765dafd8139e51000ff", "5f3f9223c5c9f896be099bc177929febad508407", "2e72178091b2ca445f46200dcba71a53417b69eb", "1143a1a595dc305347ff8aba001635c88552b6f7", "638c917d981915bc7a00bb0941cdd38111df51de", "2da760f90c3d2bf6598becdde9063093f488548c", "17ad973d5a839c378db68b05d7939a28fc014935", "090599a2caf4591c87699ad850c75554cd712937", "61d5c261cfa704085f9d397b298a150bcc07336b", "2133e6faa9232e0d0967538e51b3d1fe805952d7", "780729b2fd5169c2c7a4df956a38d7df15317ca9", "534d57618f4e1657c93c0a0f930ae6270794667c", "5f6ae1d342411bcae2a1dbec79a4ad590f327bb2" ], "paperAbstract": "Elastic distributed storage systems have been increasingly studied in recent years because power consumption has become a major problem in data centers. Much progress has been made in improving the agility of resizing small- and large-scale distributed storage systems. However, most of these studies focus on metadata based distributed storage systems. On the other hand, emerging consistent hashing based distributed storage systems are considered to allow better scalability and are highly attractive. We identify challenges in achieving elasticity in consistent hashing based distributed storage. These challenges cannot be easily solved by techniques used in current studies. In this paper, we propose an elastic consistent hashing based distributed storage to solve two problems. First, in order to allow a distributed storage to resize quickly, we modify the data placement algorithm using a primary server design and achieve an equal-work data layout. Second, we propose a selective data re-integration technique to reduce the performance impact when resizing a cluster. Our experimental and trace analysis results confirm that our proposed elastic consistent hashing works effectively and allows significantly better elasticity.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.88", "http://discl.cs.ttu.edu/lib/exe/fetch.php?media=wiki:papers:main_elasticch.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f5782ab1e3dc4bfb31e81d72219310b61337049", "sources": [ "DBLP" ], "title": "Elastic Consistent Hashing for Distributed Storage Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "4f5dd5c31143e4813b195ae74318bea712302e49": { "authors": [ { "ids": [ "1930940" ], "name": "Jiamin Huang" }, { "ids": [ "2198667" ], "name": "Barzan Mozafari" }, { "ids": [ "1710013" ], "name": "Grant Schoenebeck" }, { "ids": [ "3334450" ], "name": "Thomas F. Wenisch" } ], "doi": "10.1145/3035918.3064016", "doiUrl": "https://doi.org/10.1145/3035918.3064016", "entities": [ "Algorithm", "Critical path method", "Database", "Enterprise software", "Lazy evaluation", "MariaDB", "MySQL", "Open-source software", "Oracle Database", "PostgreSQL", "Programmer", "Scheduling (computing)", "Throughput", "Transaction processing", "VoltDB", "Web service" ], "id": "4f5dd5c31143e4813b195ae74318bea712302e49", "inCitations": [ "3709ec18aa09b58cc45133d39b4f4f930249d042", "209443f19f5742a18efb19d4d8fea307e6e6d56a", "7254ad8940dc3ea502ef65fd9b71a9a2952daf81" ], "journalName": "", "journalPages": "745-758", "journalVolume": "", "outCitations": [ "990ba8f4f4af23ab60e9b9f1bfd5aabf485a4fa1", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "992f075efa000d5c6bd9ae9318b6bb427d3c4b45", "1cc643b82f19a3774901d0500b4352e9ce388f5f", "92e0243e1a73c77ef8b90292e3798f765b38f269", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "d743e2ed7f9070fcadad2887e4b9071538981b83", "c6c50988d5d38806225061b5956f8ec0fb3c698a", "8c8b44029fbdac1572ae47b8eaab3929c9987098", "8be83d658648bbc5d4e48edb3f3503fd86f791c6", "741b93a649af7deb9db97980fdbdec02dafc2eb7", "408d77abb094e0125e39cf6e5f5d9dfcbe6c3595", "2b27047d0ad4a0b233525b074a0f5b35b9e1ba30", "00e85aa90893b7ec09d04eac72f9122620e82e8e", "7df1de1c9663c2dfaefc1277a7d1cb3366b8c358", "242af20794ea044f72ccbc944d621c356e260bc7", "2d01d0eddedfa573dfe39104172df5099d3587c3", "1df0f37e87b542d62a7a30607aea96693d84fdcc", "48544327a793badc608d7c5153aa7bcd3d2b5173", "6033797f241a3687aab939db1d88b5184d32c0fb", "c1fccf186ccce685fed39745557fce804452669d", "578667cbc39c6bfc1c89fe6a54506643c3b097f8", "7d67a72caeec8ee123b748a59b263014278d5db4", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "30d430bfab63c8981fce10b403e25c6bf1a30d6b", "3c457cec00499e41dd05516db79c4daf836102ad", "a0b1b8ee4a9e6ae68ce6a712ad0a66ddb4a12117", "718c1aee5db0471cd1014840e774b4fec4655aa9", "4f089c183d486d6f21a57cbaa8754849e05fb45d", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "029f5fdbcbd621e2795f9dcd9b7b0a440a69e251", "3c77787fbaf5cb17cd600cd6e66534be490a26ee", "19601c8db1f2c750f468f1ecbfa3b258c25be472", "7cc137c213e0f4fb1e1a6a3df499d8dc044ea114", "9aa0d7253574e50fe3a190ccd924433f048997dd", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "72f91b486b8b867e5825d82db1cca5a5172f376b", "214fbc64cb1aed66af3dc948eceb35760c06e788", "85e5a0a9fa82339964042c512c8576b9f959fc27", "4d0ff88dd2970cbe26e98364c8243087e24d0d63", "4f05a78c2e2abf932915c33c6a2bb9c726ce4ac2", "430611bb598deb44861324f75da01a612979bafb", "fcfc9da7c8b72421849f001b2a220c0b0a7e7d76", "41f8af6e2fafbf65f4f84534cb905c8824d7854d", "55af531059610139bdba4f2ac4b1e63062712d6d", "3709ec18aa09b58cc45133d39b4f4f930249d042", "4e6ba973d4023f7463301180c294fffcde535e1c", "5c9d62f348c7ed09ee51e1d56643ced039ec1121", "ca05684712ef959cf707c085f1cfa731c1a86d3d", "22d3fc87f5d9ea17a3bb21f885655a1f9f2deb65", "130c96e94a9d8374da2c17752fcd5bdd9db2974f", "62f1ec11da850fa2ffab031757d226c1fa67ecb9", "22a3f0837bd6a913f516ba497469176be641c7d4", "ba4ee2b3d5cb97c8551658d38874c330a3016ba9" ], "paperAbstract": "While much of the research on transaction processing has focused on improving overall performance in terms of throughput and mean latency, surprisingly less attention has been given to performance predictability: how often individual transactions exhibit execution latency far from the mean. Performance predictability is increasingly important when transactions lie on the critical path of latency-sensitive applications, enterprise software, or interactive web services.\n In this paper, we focus on understanding and mitigating the sources of performance unpredictability in today's transactional databases. We conduct the first quantitative study of major sources of variance in MySQL, Postgres (two of the largest and most popular open-source products on the market), and VoltDB (a non-conventional database). We carry out our study with a tool called TProfiler that, given the source code of a database system and programmer annotations indicating the start and end of a transaction, is able to identify the dominant sources of variance in transaction latency. Based on our findings, we investigate alternative algorithms, implementations, and tuning strategies to reduce latency variance without compromising mean latency or throughput. Most notably, we propose a new lock scheduling algorithm, called Variance-Aware Transaction Scheduling (VATS), and a lazy buffer pool replacement policy. In particular, our modified MySQL exhibits significantly lower variance and 99th percentile latencies by up to 5.6× and 6.3×, respectively. Our proposal has been welcomed by the open-source community, and our VATS algorithm has already been adopted as of MySQL's 5.7.17 release (and been made the default scheduling policy in MariaDB).", "pdfUrls": [ "http://web.eecs.umich.edu/~mozafari/php/data/uploads/sigmod_2017_predictability.pdf", "http://web.eecs.umich.edu/~schoeneb/papers/sidm384-huangA.pdf", "http://web.eecs.umich.edu/~mozafari/php/data/uploads/Predictability-SIGMOD2017.pdf", "http://doi.acm.org/10.1145/3035918.3064016" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f5dd5c31143e4813b195ae74318bea712302e49", "sources": [ "DBLP" ], "title": "A Top-Down Approach to Achieving Performance Predictability in Database Systems", "venue": "SIGMOD Conference", "year": 2017 }, "4f6fbe4484487e3983f673ff55bdec92f947311c": { "authors": [ { "ids": [ "9990171" ], "name": "Mengxing Liu" }, { "ids": [ "4408986" ], "name": "Mingxing Zhang" }, { "ids": [ "1680073" ], "name": "Kang Chen" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" }, { "ids": [ "1725574" ], "name": "Yongwei Wu" }, { "ids": [ "2225511" ], "name": "Weimin Zheng" }, { "ids": [ "1771630" ], "name": "Jinglei Ren" } ], "doi": "10.1145/3037697.3037714", "doiUrl": "https://doi.org/10.1145/3037697.3037714", "entities": [ "Byte", "Central processing unit", "Decoupling (electronics)", "Durability (database systems)", "Dynamic random-access memory", "Non-volatile memory", "Out of the box (feature)", "Persistent memory", "Redo log", "Speedup", "Throughput", "Transactional memory", "Undo", "Volatile memory", "Volatility" ], "id": "4f6fbe4484487e3983f673ff55bdec92f947311c", "inCitations": [ "81e4324b8047463961692d38af9b0da881fe44e2", "2c50094e7e5e1134033efc6565c8d7c21a04d2d9", "5716db825bbd2c39836a2d6fa22e7f313fc12ccf", "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "cb2a018979184f87692d423322e367cc42a215d2", "20f1081cf001f716037e20d9cff147f5ac50632a", "77e4d412240f65056d1edf334ab7352a5df061bd", "aa0fb8802532106dcb78c62065258b8e4683ec94", "41ea95cc4dca373bf324555b897760054ec4a76e", "5d634970f01af4c5cf02a1a51cfd4649db7c7c6c", "db57257e6b051e0f97d35209cc5aee0909cde1f1" ], "journalName": "", "journalPages": "329-343", "journalVolume": "", "outCitations": [ "15c80ec5104e98d6f84b5ed348ba0276c0739862", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "11ef7c142295aeb1a28a0e714c91fc8d610c3047", "c733511dbd44c6d8b1a3eb9df2fdd03bae9587e2", "2ce3726fa2bb2ae42880241dfa3baba50d29043c", "57c823b3b07b98233394bf15cfbbaed6a84809df", "6aa7427f9dcd89ed9aba1c8433b43ea5741c0816", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "27611a1896feb8817eb9cebca344d9736916c3bb", "0204f40221260d00c5ee63646560a40dcd7d97d1", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "3af216f371069b57c0dca5448384d052fb490fb4", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "523a001fb647cd53e2572d221b133b76a5e614d0", "088e3e939ad234b6fdd0e321290fb26937dc2553", "277862a906af8489a1d98add2f6516a0e5df1bb1", "565c290e4aa268619ecbbc27ea584de0f3525020", "098d792d1783b5f6fc098203f71f21f5d053c653", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "fae8a785260ac5c34be82fca92a4abef4c30d655", "620e264481f778cc32ddd11ee311de61fca0e3b6", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "7227999dfa663a2a1e0e81ee450f360e1e308ff7", "b278c5f96e40eca322ddf6bc49bac234978d20cf", "1220e4a011c46804d4369b5580dc7fb6e387af54", "0645f60331e8dd88a1d0183e2bfb3b9da21c07f6", "14dc05a51866b6832990fc7fe8c8f6b85730bb84", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "fddf97e9ddcae3633065828bfe071ceda7d4afb7", "fd840d5275cac98d64e7778a1b9173b937a77386", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "22f02a69bbcafa7bfbe7824ec30f29aa23ab303a", "8bfadfde21e1385c7dbceccd54d124fc437b3721", "9aa0d7253574e50fe3a190ccd924433f048997dd", "400ae82ab2fc2c814033c65854229ecefbddbf67", "0b0a8fb95e3331cacfe58f8938c3f7134a4c70e1", "94783d113951822195d4ba44599a8fcbdef9d4bf", "66702084eca2b6ada4526b81fdc3d3c53b02535d", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "3cb34f7a770836bcfeef28f844d670b8a014ffa8" ], "paperAbstract": "Emerging non-volatile memory (NVM) offers non-volatility, byte-addressability and fast access at the same time. To make the best use of these properties, it has been shown by empirical evidence that programs should access NVM directly through CPU load and store instructions, so that the overhead of a traditional file system or database can be avoided. Thus, durable transactions become a common choice of applications for accessing persistent memory data in a crash consistent manner. However, existing durable transaction systems employ either undo logging, which requires a fence for every memory write, or redo logging, which requires intercepting all memory reads within transactions.\n This paper presents DUDETM, a crash-consistent durable transaction system that avoids the drawbacks of both undo logging and redo logging. DUDETM uses shadow DRAM to decouple the execution of a durable transaction into three fully asynchronous steps. The advantage is that only minimal fences and no memory read instrumentation are required. This design also enables an out-of-the-box transactional memory (TM) to be used as an independent component in our system. The evaluation results show that DUDETM adds durability to a TM system with only 7.4 ~ 24.6% throughput degradation. Compared to the existing durable transaction systems, DUDETM provides 1.7times to 4.4times higher throughput. Moreover, DUDETM can be implemented with existing hardware TMs with minor hardware modifications, leading to a further 1.7times speedup.", "pdfUrls": [ "http://alchem.usc.edu/portal/static/download/dudetm.pdf", "http://doi.acm.org/10.1145/3037697.3037714", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/02/dudetm_asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f6fbe4484487e3983f673ff55bdec92f947311c", "sources": [ "DBLP" ], "title": "DudeTM: Building Durable Transactions with Decoupling for Persistent Memory", "venue": "ASPLOS", "year": 2017 }, "4f736e861ae0b5a3a989dc786106f725824f31ec": { "authors": [ { "ids": [ "8199136" ], "name": "Ruoyu Wang" }, { "ids": [ "3260361" ], "name": "Yan Shoshitaishvili" }, { "ids": [ "1741440" ], "name": "Antonio Bianchi" }, { "ids": [ "2275943" ], "name": "Aravind Machiry" }, { "ids": [ "3425160" ], "name": "John Grosen" }, { "ids": [ "32432061" ], "name": "Paul Grosen" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "", "doiUrl": "", "entities": [ "Asymptotic safety in quantum gravity", "Binary file", "Binary recompiler", "Code reuse", "Compiler", "Constant (computer programming)", "DARPA Grand Challenge", "Linux", "Patch (computing)", "Pointer (computer programming)", "Reassembly", "Reverse engineering", "Rewriting", "Static library", "X86", "X86-64" ], "id": "4f736e861ae0b5a3a989dc786106f725824f31ec", "inCitations": [ "25a8ecd9c8ec59faacaece363646a10838de7371", "a8bb03c0ed3b34f8f0a6b3f3db8a56e7327012c1", "426db98a14ac8e5781921f205d5bc4097bb08ae6", "b7dac1ce43058b52672a5b7d8b0368bbb6482647", "723931de6d91a965bc2fa24ac649291c9f1a4639", "4c4f3ff4a59260080ba6d6859911f317a0fff177", "33ae35cc24ef4303979b479671c2065256e1b3a7", "629191336187398e43f1021bdcc6c293a72d1ca8" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "23e8236644775fd5d8ff5536ba06b960e19f904b", "91607d7bc71823360de59b894ae37b4f1738bca0", "9a79f218fefbbb32b82d77702f5cbea74b5ae618", "086dc4f9de77cf1a6eec0a2cf101ef03153fe3ce", "7fa71e17142563013365daa8526a1323f123961a", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "66814e4bbb7a1224ffbbe5d3b82a2b409aab7084", "1c4dca3daf8638a53c279cd26b304dcd70bc1833", "2ac1846337c4a8c09bc37db69ebbf07ddf83c889", "1bf9569aa108b6c19c8cc4fc15470cedddbd7ba9", "7d5e165a55d62750e9ad69bb317c764a2e4e12fc", "48a2778461108a6552f646413085b02888faea22", "604804e91d571a68cfb5221971b56468ebb643c1", "82c5c16e79bb6dd115a0b5d3986b93dbd523355e", "7563f3eba26c694ac7897c61d69a888b1806aad4", "c36fd0fd0a15d9a2c9c111baa818de70250d345b", "6ea6dbd0cc8f41af69e3c30323869010a4fbee51", "397adec0a3bdd93d10ad07d4fb683e3f128b637a", "323f248664ce1505da85e24eeffbb4b8a0f06d7a", "6f45152ce34b4326fc0adfb7d7b6587b13d0a62c", "326cdcbce0831d873ef41ad56e98eddfa6dff235", "48a8e9d8a41009eb6b7733b139eb5eff30d72776" ], "paperAbstract": "Static binary rewriting has many important applications in reverse engineering, such as patching, code reuse, and instrumentation. Binary reassembling is an efficient solution for static binary rewriting. While there has been a proposed solution to the reassembly of binaries, an evaluation on a realworld binary dataset shows that it suffers from some problems that lead to breaking binaries. Those problems include incorrect symbolization of immediates, failure in identifying symbolizable constants, lack of pointer safety checks, and other issues. Failure in addressing those problems makes the existing approach unsuitable for real-world binaries, especially those compiled with optimizations enabled. In this paper, we present a new systematic approach for binary reassembling. Our new approach is implemented in a tool called Ramblr. We evaluate Ramblr on 106 real-world programs on Linux x86 and x86-64, and 143 programs collected from the Cyber Grand Challenge Qualification Event. All programs are compiled to binaries with a set of different compilation flags in order to cover as many real-world scenarios as possible. Ramblr successfully reassembles most of the binaries, which is an improvement over the state-of-the-art approach. It should be noted that our reassembling procedure yields no execution overhead and no size expansion.", "pdfUrls": [ "http://www.cs.ucsb.edu/~vigna/publications/2017_NDSS_Ramblr.pdf", "https://seclab.cs.ucsb.edu/media/uploads/papers/ramblr-ndss17.pdf", "http://sefcom.asu.edu/publications/ramblr-making-reassembly-great-again-ndss2018.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/ramblr-making-reassembly-great-again/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/dcf5/dc7e6ae2614dd0079b851e3f292148366ca8.pdf", "s2Url": "https://semanticscholar.org/paper/4f736e861ae0b5a3a989dc786106f725824f31ec", "sources": [ "DBLP" ], "title": "Ramblr: Making Reassembly Great Again", "venue": "NDSS", "year": 2017 }, "4f7e757a283d8f535505a34d523f41fb6cecb5e0": { "authors": [ { "ids": [ "1729507" ], "name": "Ralf K\u00fcsters" }, { "ids": [ "39843891" ], "name": "Daniel Rausch" } ], "doi": "10.1109/SP.2017.63", "doiUrl": "https://doi.org/10.1109/SP.2017.63", "entities": [ "Cognitive dimensions of notations", "Composability", "Cryptographic primitive", "Cryptography", "Diffie\u2013Hellman key exchange", "Key exchange", "Key-agreement protocol", "Universal composability" ], "id": "4f7e757a283d8f535505a34d523f41fb6cecb5e0", "inCitations": [], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "881-900", "journalVolume": "", "outCitations": [ "7aea958a6cf1c7d45c067b379332e84829819ca4", "b56231a971677ffafe4d76844f01c3cb54e8504d", "6b581ad1857ca39e440575fb600f6c9c4544d93e", "6287b8609480691d473fc36933ba053890c2296f", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "5af56b18071e7adf0d5b9a118e05bc893aace8e3", "12918e3209e46fe58a00c2e8de7325d57c81a95d", "972f42265fd17d8b406df557d6c682ebc5688ec8", "23eb53170c6de9ff5024db120eda200816fa803f", "9d543b6e99e00dea2677024c548a665256bec729", "0e982be63c47a340bf3749401160ea29b9f8d10f", "e93d91f499964b841e25769c0a826ab1906c0b83", "81b7f4c7c782a63f2cf6771d096ea9177f7ca4f7", "dbfd312448babe69654697020c8e1a5a3c5f4b29", "04bb092c83242cb708d2653bd537c99643e8386d", "1a79a3efec3d4a177bae7326dd75e33cf362120d", "5426706f5c9ec33d4df9fabce473d4aaaa175e67", "4b2dab634a6af740eb41e792cea3d04c1a3542b1", "189fab4c16c57882f93e7c19bad74ee992827d19", "2f4f6d7daaab6e3d25d529fba534f70a74d03299", "79d1025a7c990076563019330e70d9a9c11021f6", "b373fd8eac3f712866de666ee7989bef0b0e1764", "0d8c3a051564c44005ec1be8dd8eb9218371bbdd", "48a06c7731a7f6975701f74f49f16235bdb14218", "1e7768b135545d473bf4a857f2bbb374ae960dc5", "5b269f67ca847ab27392063f6959917a1f22560c", "0642b270475d67bc40c2f1959497f672266858c6", "eae43a7a1df312c036905751b1b9f6bb5016a5ca", "0c01ad9bcdfd9f00c567737e3df5e801b186b695", "d4a8fccaffc440f52172fc3e3cacf048b72e244c", "43c046c3f3b78bec2b528d45b3ded4bb0046d426", "21bf7a06dbeb4f6ad77bbf5aa688ec73fa93dfe0", "2977e30243c4a93462cdb466d97abff4bcd638d2", "b5f6cc19b19e31aff34c391b9da54ad1b10b753a", "d030862b5ab53b3fad21e1f48733b78d4a6e35b2", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03", "91c7da9994db5dd301d6f513615f9aa8d7520285", "884c13074bf05188830a19bccbad58c4d1fcd6cc", "2cae3e9f86e99c136ced97d9da6a574bb207595f", "bde332de7397463a2c641c9983eead2267a2143c", "10b2113655539f4af9a79c424beaa25141ef9f54", "42333e3f231bbfe508f6da6bad2feff9ae223113", "02cdfa95529573d3ca7de8483f71a4066073816e" ], "paperAbstract": "The analysis of real-world protocols, in particular key exchange protocols and protocols building on these protocols, is a very complex, error-prone, and tedious task. Besides the complexity of the protocols itself, one important reason for this is that the security of the protocols has to be reduced to the security of the underlying cryptographic primitives for every protocol time and again. We would therefore like to get rid of reduction proofs for real-world key exchange protocols as much as possible and in many cases altogether, also for higher-level protocols which use the exchanged keys. So far some first steps have been taken in this direction. But existing work is still quite limited, and, for example, does not support Diffie-Hellman (DH) key exchange, a prevalent cryptographic primitive for real-world protocols. In this paper, building on work by Küsters and Tuengerthal, we provide an ideal functionality in the universal composability setting which supports several common cryptographic primitives, including DH key exchange. This functionality helps to avoid reduction proofs in the analysis of real-world protocols and often eliminates them completely. We also propose a new general ideal key exchange functionality which allows higher-level protocols to use exchanged keys in an ideal way. As a proof of concept, we apply our framework to three practical DH key exchange protocols, namely ISO 9798-3, SIGMA, and OPTLS.", "pdfUrls": [ "http://eprint.iacr.org/2017/256", "https://www.ieee-security.org/TC/SP2017/papers/324.pdf", "https://doi.org/10.1109/SP.2017.63", "https://eprint.iacr.org/2017/256.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4f7e757a283d8f535505a34d523f41fb6cecb5e0", "sources": [ "DBLP" ], "title": "A Framework for Universally Composable Diffie-Hellman Key Exchange", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "4fa62de1bf9ed8b2543489ae1be7b08007a1dd76": { "authors": [ { "ids": [ "1691086" ], "name": "Ang Li" }, { "ids": [ "1798309" ], "name": "Shuaiwen Song" }, { "ids": [ "40474862" ], "name": "Weifeng Liu" }, { "ids": [ "1785951" ], "name": "Xu Liu" }, { "ids": [ "12364057" ], "name": "Akash Kumar" }, { "ids": [ "1684335" ], "name": "Henk Corporaal" } ], "doi": "10.1145/3037697.3037709", "doiUrl": "https://doi.org/10.1145/3037697.3037709", "entities": [ "Algorithm", "CPU cache", "Exploit (computer security)", "Graphics processing unit", "Inter-process communication", "Kepler (microarchitecture)", "Locality of reference", "Maxwell (microarchitecture)", "Pascal", "Program optimization", "Scheduling (computing)", "Scratchpad memory" ], "id": "4fa62de1bf9ed8b2543489ae1be7b08007a1dd76", "inCitations": [ "907e5f587e25d0757ff2f1f1762052c3c6832f9f", "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0", "17ded16813a7ef6e179252585a742e83f004c0fb", "68e328bce39ba0c6bd399d051fbc5539bc7cf069" ], "journalName": "", "journalPages": "297-311", "journalVolume": "", "outCitations": [ "5f3cce1bc739ebfc03e003010d3438bb318efc14", "0d5ec0f90b9d07ebc48f4e00b2e583e5d49130dc", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "054e4a6966d54eb9fd207cf0484214201f46424a", "1a850fbc5d86a91d882eec88290425fbdff57cf6", "0036adadc90e4826b2f7fc157752eea459070c32", "4308295a2eaef30be423520918ad224dc2f3ffe2", "922364964b156f517bfec5c6ab867cf0437ec626", "e0857c644b1059323d15ef9d45ffe86f4f3b6a09", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "2d6f002477015469075954c6748a1a85af352c94", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "03e3a481d9713ad4d39dc608959d87b3f8d8144e", "6e193f1ff85158eb34f0e8d4a7eba5c475768889", "3e2480d7136fe5c6fa7213ea834566b93570c3ca", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "24f5343d06fd87efe2d78982f2302094bad604ff", "cbd9a9e99d78748e36b82f818df08b2e6fc1e631", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "90c8a482ff463c02575888193dc06661d3dfdd98", "67bf737ceccf387cdd05c379487da8301f55e93d", "0e0fb6a3ccbd9da9dc216913ef77d346515936c6", "1ffa34e8b3ef9ec23ffb8223658b650ce98f843d", "26512755e7f78e10390b409ed4de3378aba2bac8", "a06eb2e52176a5b6b941bb8544c544b64f527e32", "df46e5c551f64adc2f188e2e1282c77f3c3570f8", "2caa7e286803e1a7d49fb6906a7507ca08208751", "040d45e995ab920588607ebc6977ea19dc781923", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "2992b8985e094c3943e29dffc550862791fae147", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "01079a4f0bcac90e8977cbcee2ec50b98d408310", "f08a5e7a23b44c37a22e011e31843aeeae0ed4e6", "5670a2391d0c085be2ff5c704cae8e76a80a15fb", "797a4ebb3a729d677fd9190edc03c9e3a433188b", "1eeb50d5f7937f65a910203ae61430ff8b969012", "0d394c72f9d769dfa021796a29fc142db573aec7", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "7bee024cfab6e16be7c57e2ddbe13618d2a2968c", "419cded557a1578d0be08d8270791bb83f06b6a1", "8c2bb6f8ec141e764de6b34bf1a476986689e35f", "86d65bab9058b855e88723c6fb2476c953db0be3", "10443d5d4f0e5048df514e581a9f364954158d00", "03d832219a7cf933db0ef1f686fec730c09acd55" ], "paperAbstract": "Cache is designed to exploit locality; however, the role of on-chip L1 data caches on modern GPUs is often awkward. The locality among global memory requests from different SMs (Streaming Multiprocessors) is predominantly harvested by the commonly-shared L2 with long access latency; while the in-core locality, which is crucial for performance delivery, is handled explicitly by user-controlled scratchpad memory. In this work, we disclose another type of data locality that has been long ignored but with performance boosting potential --- the inter-CTA locality. Exploiting such locality is rather challenging due to unclear hardware feasibility, unknown and inaccessible underlying CTA scheduler, and small in-core cache capacity. To address these issues, we first conduct a thorough empirical exploration on various modern GPUs and demonstrate that inter-CTA locality can be harvested, both spatially and temporally, on L1 or L1/Tex unified cache. Through further quantification process, we prove the significance and commonality of such locality among GPU applications, and discuss whether such reuse is exploitable. By leveraging these insights, we propose the concept of CTA-Clustering and its associated software-based techniques to reshape the default CTA scheduling in order to group the CTAs with potential reuse together on the same SM. Our techniques require no hardware modification and can be directly deployed on existing GPUs. In addition, we incorporate these techniques into an integrated framework for automatic inter-CTA locality optimization. We evaluate our techniques using a wide range of popular GPU applications on all modern generations of NVIDIA GPU architectures. The results show that our proposed techniques significantly improve cache performance through reducing L2 cache transactions by 55%, 65%, 29%, 28% on average for Fermi, Kepler, Maxwell and Pascal, respectively, leading to an average of 1.46x, 1.48x, 1.45x, 1.41x (up to 3.8x, 3.6x, 3.1x, 3.3x) performance speedups for applications with algorithm-related inter-CTA reuse.", "pdfUrls": [ "http://www.nbi.dk/~weifeng/papers/ctaclustering_li_asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037709" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4fa62de1bf9ed8b2543489ae1be7b08007a1dd76", "sources": [ "DBLP" ], "title": "Locality-Aware CTA Clustering for Modern GPUs", "venue": "ASPLOS", "year": 2017 }, "4fb3c6d92273a976ad4cfd08060bcc164ab7516d": { "authors": [ { "ids": [ "2652697" ], "name": "Theodoros Rekatsinas" }, { "ids": [ "2185778" ], "name": "Manas Joglekar" }, { "ids": [ "1695250" ], "name": "Hector Garcia-Molina" }, { "ids": [ "1801540" ], "name": "Aditya G. Parameswaran" }, { "ids": [ "1803218" ], "name": "Christopher R\u00e9" } ], "doi": "10.1145/3035918.3035951", "doiUrl": "https://doi.org/10.1145/3035918.3035951", "entities": [ "Algorithm", "Baseline (configuration management)", "Discriminative model", "Logistic regression", "Machine learning" ], "id": "4fb3c6d92273a976ad4cfd08060bcc164ab7516d", "inCitations": [ "74eb71bd943149764f54f411905bf6607bb91c39", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "04898f9bfdfedf6ee63310f324949caffb9c6b29", "715bae3cbbb2c3d13b0d128f3ff323815bb59495", "3729ef7703006d2b17a3d5215654cc3e7bb694a1" ], "journalName": "", "journalPages": "1399-1414", "journalVolume": "", "outCitations": [ "ba24bd33caaa38127664f86d1bb486ac508b8a47", "8a0a50fba24e68117ca95dba8028ef4e98093684", "cd48760a142830b796b8a85a158cd469f3e5feb0", "1af0851efa40686b6d06e8678967d8140fd7bc68", "3bbfc62fc13ca27c6e58e42167a6aef593a1365e", "41f1ebd4c8486614f5830794220df217281d507c", "f440916284dae249d26d195458f084fe08aa3aec", "579e1e9217cfed6d563cedf8f8fdcd1604fc0917", "1a0f8261d8384bfefbf2d561d446c3a687a5febe", "e01c4177fb9ffff49891b995418ec67be922bb7a", "1d604a0a5b17db632a4b77df35f62be129822e25", "1354ba1b6567f869065e9515a488bc031a00e25a", "79f9e562470ea00d1bba08fd1fba8cc46d96f211", "2234eb38be155cdc19bcce79bc8d54b188c8a3a2", "2433ef214583e369426c7755f396b417b4caa32a", "a12cd3d9ae5530a90302a6e4af477e6e24fa0f95", "79a4ce582dae39b2cbb0fab5de1179fe2581ce95", "af2a807c5e32a35765850c6b6891f471d7bc7aea", "0790c77c1eaf2368b55c6a0def09a43690eeb848", "117da44f01ef45ef8223bec8f9c2346b131321f4", "04b314e6ab6f3139c89d8c7735359026afe09c3b", "4607f09a348c87f95aedd7711b24d8bf614fe58c", "4f84bcbddad1e931b0328be6e0a96ca731c538f8", "2c37666634d2e50b998f68dcbef42aae16e02645", "27d6326993f80269595b2a594657754bf748927c", "208e395593f6a0d8e0325bc1a3ddfac1aa54df80", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "0e5ebc2eb31b6c78ee0dee10246efeeaf587f7f4", "1b189d721adbf1d2bab93b7ed6ce826e188b0b99", "0f5c9968fe2cdb0f52c55b2d5b3dec7accf91306", "e37dc8a58f39ca8f159e4a71af3766381ea18356", "0a508835d4e9ae700fb056c4c0c7774255b1179f", "65821014abe934029310cb10d4e329645acd4817", "0e1b90fd2e39e6c9aa3bcf7e8ccacc30d724ab58", "4c7779938bd80f62f80d6964c89a97476eef7c69" ], "paperAbstract": "We focus on data fusion, i.e., the problem of unifying conflicting data from data sources into a single representation by estimating the source accuracies. We propose SLiMFast, a framework that expresses data fusion as a statistical learning problem over discriminative probabilistic models, which in many cases correspond to logistic regression. In contrast to previous approaches that use complex generative models, discriminative models make fewer distributional assumptions over data sources and allow us to obtain rigorous theoretical guarantees. Furthermore, we show how SLiMFast enables incorporating domain knowledge into data fusion, yielding accuracy improvements of up to 50% over state-of-the-art baselines. Building upon our theoretical results, we design an optimizer that obviates the need for users to manually select an algorithm for learning SLiMFast's parameters. We validate our optimizer on multiple real-world datasets and show that it can accurately predict the learning algorithm that yields the best data fusion results.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035951" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4fb3c6d92273a976ad4cfd08060bcc164ab7516d", "sources": [ "DBLP" ], "title": "SLiMFast: Guaranteed Results for Data Fusion and Source Reliability", "venue": "SIGMOD Conference", "year": 2017 }, "4fcb757af0a6b8bb4d0b09796e3da4cc98974c24": { "authors": [ { "ids": [ "3285633" ], "name": "Bei Shi" }, { "ids": [ "1717078" ], "name": "Wai Lam" }, { "ids": [ "38797620" ], "name": "Shoaib Jameel" }, { "ids": [ "2265382" ], "name": "Steven Schockaert" }, { "ids": [ "8851507" ], "name": "Kwun Ping Lai" } ], "doi": "10.1145/3077136.3080806", "doiUrl": "https://doi.org/10.1145/3077136.3080806", "entities": [ "Coherence (physics)", "Collocation", "Microsoft Word for Mac", "Text corpus", "Word embedding" ], "id": "4fcb757af0a6b8bb4d0b09796e3da4cc98974c24", "inCitations": [ "33714236d0edab4c1cba0ef887a5f9fa421d0b50" ], "journalName": "", "journalPages": "375-384", "journalVolume": "", "outCitations": [ "2538e3eb24d26f31482c479d95d2e26c0e79b990", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "1510cf4b8abea80b9f352325ca4c132887de21a0", "27211ed68a7a00f1df0121fa1890a1b2acdd1a88", "26c9e9eac8b794bb6a2b9574fdf2c1405d5c0cac", "b3625e35fe7de551d628dd4b3875d2ba847f1097", "bfea4d58717c83c67ac3f9eab855d15c59754757", "2dd459fd7443f2f028865c29b3689efa1ab538d8", "a6f793c53b1d1f09dbc11929bafc688b3e64d6bc", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "2c4e99ae3899b14b23f9dca2ef27a56c30e784bf", "1a08e135ac11db0249c6afb4540672c5a349495e", "00cc08c90bc4ae7d3523e4dad2ca3a8fafc8501a", "782bb1cca8c4f81fcb2bc51cf2ab591a8aa12423", "275a6dd72ca2dbeb81150b634ea967fc6e7bbb94", "128068d6a1a082982eb61fb1ac9da8239e936fb4", "9208ecbd7244040ba6ee59a067b527c8b095fe0a", "1f656b9c686c1e5db2a4d41f1ce7e270965def3e", "4c15b129a8da55127e4e2fe47f54799d0a313367", "d62547513e88f52a5cb29804ea11aad53a1da7ab", "da790c75b5d8850a2fdecb5e693f767ad2a6470f", "142f38642629b9d268999ad876af482177d36697", "1a07186bc10592f0330655519ad91652125cd907", "24c9b0b05c5e957e255b854f947472f9181772a4", "6363cfe79b33d66deeeba0e68e89f15b3e1e657f", "15367f1e036e47a2539f9a5ea6fe80b56bfade12", "0826c98d1b1513aa2f45e6654bb5075a58b64649", "0c23ebb3abf584fa5e0fde558584befc94fb5ea2", "8e31f3c7e70e9a5f8afafd86cebc004d5eca8c2b", "132c8b4d0760d2d35c99b0358c8bc5a51170e5e7", "01deebfc9e8ab895385a12cbe15545acb2186601", "3a90fbc91c59b63fcca1a93efe962e1fe8ed51ef", "10eb7bfa7687f498268bdf74b2f60020a151bdc6", "87d907a114409755ecd3c6886585de26a4e17ffe", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "0d67362a5630ec3b7562327acc278c1c996454b5", "052b1d8ce63b07fec3de9dbb583772d860b7c769" ], "paperAbstract": "Word embedding models such as Skip-gram learn a vector-space representation for each word, based on the local word collocation patterns that are observed in a text corpus. Latent topic models, on the other hand, take a more global view, looking at the word distributions across the corpus to assign a topic to each word occurrence. These two paradigms are complementary in how they represent the meaning of word occurrences. While some previous works have already looked at using word embeddings for improving the quality of latent topics, and conversely, at using latent topics for improving word embeddings, such \"two-step'' methods cannot capture the mutual interaction between the two paradigms. In this paper, we propose STE, a framework which can learn word embeddings and latent topics in a unified manner. STE naturally obtains topic-specific word embeddings, and thus addresses the issue of polysemy. At the same time, it also learns the term distributions of the topics, and the topic distributions of the documents. Our experimental results demonstrate that the STE model can indeed generate useful topic-specific word embeddings and coherent latent topics in an effective and efficient way.", "pdfUrls": [ "http://orca.cf.ac.uk/100911/1/Bei_SIGIR-2017.pdf", "https://users.cs.cf.ac.uk/JameelS1/sigir2017.pdf", "http://doi.acm.org/10.1145/3077136.3080806", "https://arxiv.org/pdf/1706.07276v1.pdf", "http://arxiv.org/abs/1706.07276" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4fcb757af0a6b8bb4d0b09796e3da4cc98974c24", "sources": [ "DBLP" ], "title": "Jointly Learning Word Embeddings and Latent Topics", "venue": "SIGIR", "year": 2017 }, "4fecfc3c9f71a07539c98353ef87ae8c5ec53005": { "authors": [ { "ids": [ "1781405" ], "name": "Maxime Colmant" }, { "ids": [ "1743906" ], "name": "Pascal Felber" }, { "ids": [ "1809154" ], "name": "Romain Rouvoy" }, { "ids": [ "1681385" ], "name": "Lionel Seinturier" } ], "doi": "", "doiUrl": "", "entities": [ "Distributed computing", "Docker", "Elasticsearch", "Requirement", "Run time (program lifecycle phase)", "Software deployment", "Software engineering", "Swarm" ], "id": "4fecfc3c9f71a07539c98353ef87ae8c5ec53005", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "514-523", "journalVolume": "", "outCitations": [ "0b5e9d1b9b812da75ffd8abf5f218d21a0101356", "d6d5f6d731c30c427680929b03ae16d755c4b350", "4fcde161c28bf1672e4a667a79ade97ea7f82e89", "1abc32fcf3f89ede556dbdb2678ffc9cf7f81d06", "13638fd0dff816d9bf8e2c3e2d5e1e8a5b527c32", "a817a687600ef821d4d89384a6a0b97c749f6f69", "02f105c7b14d259235a233aafeac0278c7b2c094", "163252e81b3a0f8269871f0845338c53494ec4f5", "4ca35e9bb3e50bbb876957a693f8bef4b2cff747", "39a434a6b7598745f4bb0e9d07e66b51ef2dee8c", "995c6b5e9ee851f1b70ed85a00867eb79714c246", "a9b4a0467a9489d339c3963a484b98edc4236693", "169b8204089d683536dff9ec9b8f259b907be3a8", "b02c6b00bd5dbdbd951fddb00b906c82fa80f0b3", "e23298e18aa92ac43fa941d0f5eacb339905b685", "4416052fca95270b50a29e9e3cc245cca8962861", "d587e6b766eaff8916e2e7fed5a515ce4ce0bd20", "3212c1755c3b5416da0c6d08243b3f254953285e", "9aa0d7253574e50fe3a190ccd924433f048997dd", "1adc5f0eea495fa5bf7394574af776dd8e5af633", "0a25b24b1935afa9e6bc7b8cae5ce883aa4d3d0e", "7982081501f55b39c5b5921f1b7d50cf6707cd92", "438e22ee516ecd66ade08aa6d5a9af1dd16d5716", "7d701414b0848d3194a20fdff64036fd7b5a8927" ], "paperAbstract": "The design and the deployment of energy-efficient distributed systems is a challenging task, which requires software engineers to consider all the layers of a system, from hardware to software. In particular, monitoring and analyzing the power consumption of a distributed system spanning several—potentially heterogeneous—nodes becomes particularly tedious when aiming at a finer granularity than observing the power consumption of hosting nodes. While the state-of-the-art in software-defined power meters fails to deliver adaptive solutions to offer such service-level perspective and to cope with the diversity of hardware CPU architectures, this paper proposes to automatically learn the power models of the nodes supporting a distributed system, and then to use these inferred power models to better understand how the power consumption of the system's processes is distributed across nodes at runtime. Our solution, named WattsKit, offers a modular toolkit to build software-defined power meters "à la carte", thus dealing with the diversity of user and hardware requirements. Beyond the demonstrated capability of covering a wide diversity of CPU architectures with high accuracy, we illustrate the benefits of adopting software-defined power meters to analyze the power consumption of complex layered and distributed systems. In particular, we illustrate the capability of our approach to monitor the power consumption of a system composed of Docker Swarm, Weave, Elasticsearch, and Apache Zookeeper. Thanks to WattsKit, developers and administrators are now able to identify potential power leaks in their software infrastructure.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101182" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4fecfc3c9f71a07539c98353ef87ae8c5ec53005", "sources": [ "DBLP" ], "title": "WattsKit: Software-Defined Power Monitoring of Distributed Systems", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "4ff196f7dea7bca99558c17aadd2249354ded70c": { "authors": [ { "ids": [ "9083755" ], "name": "Mulya Agung" }, { "ids": [ "24763687" ], "name": "Muhammad Alfian Amrizal" }, { "ids": [ "2531961" ], "name": "Kazuhiko Komatsu" }, { "ids": [ "2964434" ], "name": "Ryusuke Egawa" }, { "ids": [ "2067821" ], "name": "Hiroyuki Takizawa" } ], "doi": "10.1109/HiPC.2017.00026", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00026", "entities": [ "Algorithm", "Cluster analysis", "Locality of reference", "Network congestion", "Non-uniform memory access", "Scalability", "Simulation", "Time series", "Uniform memory access" ], "id": "4ff196f7dea7bca99558c17aadd2249354ded70c", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "152-161", "journalVolume": "", "outCitations": [ "a224fe92413427795041723bc0af9832e067d106", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "36332838c90392f4950230ecb73f1f0373ace934", "51491eb0e341731ef60141b0ecd82d97780ec264", "33dce868e0d719474b9f822c139628d06eca7eb1", "9b8d8f2fb88e03f8f3ad01efbfef52718b70d104", "4d51031ce850ea0f72c865011280a0aeeaaf9e02", "35566e7543e5f9b041f76d60e7778369d34bd5c9", "917fc743f23a795cf86d65da9f20b3f67dbbb7dd", "83a601e5cd6b8d8577a8f505ad6f72f4a3714463", "3be8d6f5f1cae8390e1612d5009f6a8f9196b92f", "d0dac91628415ce1b2135f68c883dc08583e9188", "63121ae5e4bca65774a9ac89223a2e44a687bd30", "8b7c84b013f0f4dafb9de769966e1954890d8949", "f4ac508e2662afebe757ce4d9247f78cc68e6c2a", "c4cbcaecad03438bc0639cb382997857a98e8b3d", "5c77f8ea871d8a539197dd743fee4a80ea77e314", "4f0e87791b6cee0eeb62988eb143f3371c5f85a1", "f58c4b789f331f78d39eabac4a646ea87ba66e0b", "7421d28428e041c271fe6370c331353f4a3fa974", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "06154716d6d51256ed2bb014ef65ec8b5d41aa26", "1564121b8fc30a42a5705672128158146542191d", "15b1a25c9ea2838ce7333bffb474a64491c1bcb4" ], "paperAbstract": "MPI process placement is an important step to achieve scalable performance on modern non-uniform memory access (NUMA) systems. A recent study on NUMA architectures has shown that, on modern NUMA systems, the memory congestion problem could cause more severe performance degradation than the data locality problem because heavy congestion on memory controllers could cause long latencies. However, conventional work on MPI process placement has focused on locality to minimize the remote-access communication. Moreover, maximizing the locality may actually degrade performance because the load imbalance among nodes in a modern NUMA system may increase. Thus, a process placement algorithm must be designed to consider memory congestion. In this paper, a method to reconcile both the locality and the memory congestion on modern NUMA systems is proposed. This method statically analyzes the application communication pattern to optimize the process placement. A data clustering method is applied to the time-series data of the MPI communications in order to identify data traffics that potentially cause memory congestion. The proposed method has been evaluated with the NPB kernels on a real NUMA system and a simulation environment. Experimental results show that the proposed method can achieve 1.6x performance improvement compared with the current state-of-the-art strategy.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00026" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/4ff196f7dea7bca99558c17aadd2249354ded70c", "sources": [ "DBLP" ], "title": "A Memory Congestion-Aware MPI Process Placement for Modern NUMA Systems", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "5013d9a5f84892a768a6f40ca7e921788b5647a4": { "authors": [ { "ids": [ "9146911" ], "name": "Shaghayegh Gharghabi" }, { "ids": [ "8844408" ], "name": "Yifei Ding" }, { "ids": [ "3056465" ], "name": "Chin-Chia Michael Yeh" }, { "ids": [ "32728466" ], "name": "Kaveh Kamgar" }, { "ids": [ "3335706" ], "name": "Liudmila Ulanova" }, { "ids": [ "1732516" ], "name": "Eamonn J. Keogh" } ], "doi": "10.1109/ICDM.2017.21", "doiUrl": "https://doi.org/10.1109/ICDM.2017.21", "entities": [ "Algorithm", "Time series" ], "id": "5013d9a5f84892a768a6f40ca7e921788b5647a4", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "117-126", "journalVolume": "", "outCitations": [ "3dc2f6d16f5a7672cf9d2a4eb226aaa98e6ab02d", "add39272e8762cea5a24c95ad238af5d61c3bd54", "171ddad7a5ed834ff7313bb614de2f44924ebeb4", "2d338b4db3f5a8f9abe498b7a7af58af17c3ef6a", "631244d87490a10fc3577ee2bc5b010e232ed265", "2ebbb96b0db80159488af13975516de7273b54e1", "1f49221dc511f9bbd66faefa19860a3239058470", "7e581838349741943ab848af371cc45ef309670d", "a1ecdee4cd74c19b987f9cceaf13865fb7e36bcd", "97816ef7eae59b581d434d9f13900786fef08ca5", "2d68c9fc943862efe6e8272e703c2d0fc5302b5f", "8af149cb3b20c0bf2aeb68199495929898608e19", "8c6feef769f59786767b06ae4dd856171e9f38a9", "dcd8e58982898dc1984ca5e0f484848df412bc6e", "2ece4b519b9ca4cc68a6684b1647dcfa7ffb7778", "03ca7769995660b62ea9c3b03f172edf6c934733", "88dcaefc947edca7adbc24b561c2606090ae7130", "2fd16d8538f78bae98a5255213df49c57516ccac", "61523cfe6f51859e00aa8ce320114c03151208fa", "b1ff017147634cd76cc5c1eec23bf72fa783808c", "de9b85d3153b21b016fc1c148ad54e52ab2c1931", "133de79b6f83cc3b09219eca3e1fac96ec38a7d0" ], "paperAbstract": "Unsupervised semantic segmentation in the time series domain is a much-studied problem due to its potential to detect unexpected regularities and regimes in poorly understood data. However, the current techniques have several shortcomings, which have limited the adoption of time series semantic segmentation beyond academic settings for three primary reasons. First, most methods require setting/learning many parameters and thus may have problems generalizing to novel situations. Second, most methods implicitly assume that all the data is segmentable, and have difficulty when that assumption is unwarranted. Finally, most research efforts have been confined to the batch case, but online segmentation is clearly more useful and actionable. To address these issues, we present an algorithm which is domain agnostic, has only one easily determined parameter, and can handle data streaming at a high rate. In this context, we test our algorithm on the largest and most diverse collection of time series datasets ever considered, and demonstrate our algorithm's superiority over current solutions. Furthermore, we are the first to show that semantic segmentation may be possible at superhuman performance levels.", "pdfUrls": [ "http://www.cs.ucr.edu/~eamonn/Segmentation_ICDM.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5013d9a5f84892a768a6f40ca7e921788b5647a4", "sources": [ "DBLP" ], "title": "Matrix Profile VIII: Domain Agnostic Online Semantic Segmentation at Superhuman Performance Levels", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "503c7a0bc920821372da0466ce01224c96c14608": { "authors": [ { "ids": [ "1829445" ], "name": "Jose M. Faleiro" }, { "ids": [ "2254232" ], "name": "Daniel J. Abadi" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Benchmark (computing)", "Flip-flop (electronics)", "Multi-core processor", "Mutual exclusion", "NAT traversal", "No Silver Bullet", "Relational database management system", "Relay", "Scalability" ], "id": "503c7a0bc920821372da0466ce01224c96c14608", "inCitations": [ "ca5dff2607ef06fdbb19a5b0fc39965b2529e588" ], "journalName": "", "journalPages": "9-", "journalVolume": "", "outCitations": [ "20c450f099b661c5a2dff3f348773a0d1af1b09b", "fda929734d5b0c383001d9fccae6f2219c104458", "3e77a77247734dc918a5723573e1158eee1955f9", "5dc3de7db4c81dd861c7a474107e6e10db29aa3b", "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "1cdedeb9461bdebedc47c7a358769f85dd7683ea", "0d0e2b8d0abfc5a8211609486c22332d40610e4d", "f5c63f9f0264fe9eb4fc1e10995dfc60bc09a969", "0bddbe35fa6e3cf625d15553365a690d3a6bf7aa", "045a975c1753724b3a0780673ee92b37b9827be6", "c1af6545a67ea040b56f8665ae17ccfb9fe1967e", "2520cfc29a521f2333fda020d7ae41860f8dfebd", "29bcabd87448e40d634432db9997bd4a585f26d8", "4fcfa58bda82134cdf2982ea12e653da6b553f89", "96d197be2253f5c853edce37b59c186915160ce0", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "1c850bfe911dbf5ab6384797dc03ed3b52a14c5f", "2e50af2320dab632d8046b6d4c130ae6cce8903f", "136eefe33796c388a15d25ca03cb8d5077d14f37", "e5fb32cf85b1740984b8cadc1ff44b2ae977ac3c", "3ac23f666d2d2bb0f6d288d289b38e4b7a57bdf5", "253d779cc8939c4f5e2d50158bc76586c743417d", "5f9d84444231fb6f87f48be9928723a32e23e5ce", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "09ecdb904eb7ae8a12d0c6c04ae531617a30eafa", "a517253fa6459d052fdaeda335e7dce2c1040a34", "76057a3c7b489290afd4a4dccf09b623502619fd", "135772775121ba60b47b9f2f012e682fe4128761", "1ae3dce5083713d1a04b959039a94ff77b346622", "208781097dd2874cbb27d6ab22de5baa830c80e6", "0817e5305249f26eb21582da909a2f92b434ff83", "23346a18e78062e586cab22195819eb0f18ffc66", "f254fe47a8e00c3c0257304850b8d5deef0a72bc", "54a882bc5f15877097dfb1aab8c480323036e48c", "4b598427d5ce2e0c072b529a54059f3cb5c4f47b", "6171721fa3aae03dca89636e503b5baf5af1f0dc", "042f443418ff2ff98a1dccbf49df9fa258dab707", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "aacfbf0d34bc24dc3b72e56719ec083759a072ce", "fb9c404de85861428d9b0a22dea7213b186d93bb", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9", "abf1157c2043274a8d580151db1d4ef5be2c892e", "03416be8097852a54dd3e309434e5a0806824646", "0997037e940df06ed7a6d19f7501579aab01e829", "25883553e5315e32194614676f11bb012db6dafd", "4523a15a22bcabad38c81e1eba13a1bddd6704c5", "34d269619576cd827b9842581755c06dac344b16", "6479bc321dd7859eb6b6b8cca100bade86940526", "2888c136064ff5527a0bb370ac1d9bf71939e066", "29741acc5dfc21549ea53e9036179acc80603018", "a7eeb0fbbc45431dfca8fce9698196dda8f16b7a", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "4634313264c26ca00c3b940865dc7c35d393bee7", "00ac447d02035c26c7e2852c2457fe812e89038f", "70401a50dc4969521980c220b781a6c9d7e9b47d", "34d33c19d0e893415b570ebdeea993db5b7af509", "e682e957f1facb8f3921a0fef5ebcf53a6c4bb04", "14da9e91b3f7a94e222d48ff72c71646c3c49046", "1a16975d1630756772b7d16e220236fe9a2830d3", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "9748241beb02ef1e2d0e6dc877c04b354033a838", "682f34e8845a5f54c20d636b3255525ded099502", "bee426af33d209f50f33084d8f677cd5154372b4", "7038e23695dbc4d8a9d1b7c6dff8dbc138009c4b", "13875088254a585cd0b050f3bc27c1af9ada690f" ], "paperAbstract": "Recent research on multi-core database architectures has made the argument that, when possible, database systems should abandon the use of latches in favor of latch-free algorithms. Latch-based algorithms are thought to scale poorly due to their use of synchronization based on mutual exclusion. In contrast, latch-free algorithms make strong theoretical guarantees which ensure that the progress of a thread is never impeded due to the delay or failure of other threads. In this paper, we analyze the various factors that influence the performance and scalability of latch-free and latch-based algorithms, and perform a microbenchmark evaluation of latch-free and latch-based synchronization algorithms. Our findings indicate that the argument for latch-free algorithms\u2019 superior scalability is far more nuanced than the current state-of-the-art in multi-core database architectures suggests.", "pdfUrls": [ "http://cs-www.cs.yale.edu/homes/dna/papers/latch-free-cidr2017.pdf", "http://cidrdb.org/cidr2017/papers/p121-faleiro-cidr17.pdf", "http://15721.courses.cs.cmu.edu/spring2018/papers/07-latching/faleiro-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/503c/7a0bc920821372da0466ce01224c96c14608.pdf", "s2Url": "https://semanticscholar.org/paper/503c7a0bc920821372da0466ce01224c96c14608", "sources": [ "DBLP" ], "title": "Latch-free Synchronization in Database Systems: Silver Bullet or Fool's Gold?", "venue": "CIDR", "year": 2017 }, "503da6ea753d21b8843d0c35e24d23ef46ba7cbf": { "authors": [ { "ids": [ "11960766" ], "name": "Akhil Krishnan" }, { "ids": [ "32847772" ], "name": "Mikhail Markov" }, { "ids": [ "1746401" ], "name": "Borzoo Bonakdarpour" } ], "doi": "10.1109/IPDPS.2017.90", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.90", "entities": [ "Aerial photography", "Algorithm", "Approximation", "Approximation algorithm", "Linear programming", "Operations research", "Optimal design", "Program optimization", "Routing", "Simulation", "Unmanned aerial vehicle", "Vehicle routing problem" ], "id": "503da6ea753d21b8843d0c35e24d23ef46ba7cbf", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "503-512", "journalVolume": "", "outCitations": [ "5b1406f4ba9365072172d666bed2ec49b90b9eeb", "b2de7e58521b308415865fda246fe5a4245327e9", "b525ed657712256c0abf866e7b0b51831078247f", "1d902a44476a9a01092341a63a212052b513724b", "1a45104c95259533c7176c956138df40f1efb82e", "3c0ce781f3e21314a42c436080f69875d8d7cc9f", "07d2f2a85a2453591e01651ffb287f4bc0e3ce7f", "180af42e1b95259b593f506b74b99f2acb8f10a4", "0c38ced8d5f1abcce633bb347e332dce99a6ad16" ], "paperAbstract": "The classic vehicle routing problem (VRP) is generally concerned with the optimal design of routes by a fleet of vehicles to service a set of customers by minimizing the overall cost, usually the travel distance for the whole set of routes. Although the problem has been extensively studied in the context of operations research and optimization, there is little research on solving the VRP, where distributed vehicles need to compute their respective routes in a decentralized fashion. Our first contribution is a synchronous distributed approximation algorithm that solves the VRP. Using the duality theorem of linear programming, we show that the approximation ratio of our algorithm is O(n · (ρ)1/n log(n + m)), where ρ is the maximum cost of travel or service in the input VRP instance, n is the size of the graph, and m is the number of vehicles. We report results of simulations and discuss implementation of our algorithm on a real fleet of unmanned aerial systems (UASs) that carry out a set of tasks.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.90" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/503da6ea753d21b8843d0c35e24d23ef46ba7cbf", "sources": [ "DBLP" ], "title": "Distributed Vehicle Routing Approximation", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "50494db7b318804b3070239f098feb168bf471f0": { "authors": [ { "ids": [ "2357970" ], "name": "Nik Sultana" }, { "ids": [ "40124142" ], "name": "Salvator Galea" }, { "ids": [ "4804229" ], "name": "David Greaves" }, { "ids": [ "38358360" ], "name": "Marcin W\u00f3jcik" }, { "ids": [ "3041844" ], "name": "Jonny Shipton" }, { "ids": [ "8845741" ], "name": "Richard G. Clegg" }, { "ids": [ "32214483" ], "name": "Luo Mai" }, { "ids": [ "3407998" ], "name": "Pietro Bressana" }, { "ids": [ "1762445" ], "name": "Robert Soul\u00e9" }, { "ids": [ "1679929" ], "name": "Richard Mortier" }, { "ids": [ "1912518" ], "name": "Paolo Costa" }, { "ids": [ "1809586" ], "name": "Peter R. Pietzuch" }, { "ids": [ "1726850" ], "name": "Jon Crowcroft" }, { "ids": [ "31755529" ], "name": "Andrew W. Moore" }, { "ids": [ "2447168" ], "name": "Noa Zilberman" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Compiler", "Data structure", "Debugging", "Field-programmable gate array", "General-purpose language", "High- and low-level", "High-level synthesis", "Integrated development environment", "Library", "Profiling (information science)", "Rapid prototyping", "Silicon compiler", "Standard library" ], "id": "50494db7b318804b3070239f098feb168bf471f0", "inCitations": [], "journalName": "", "journalPages": "459-471", "journalVolume": "", "outCitations": [ "24edd449ea31b850bc0d3bb3ffc1b8b6eb66e13a", "907d2c011942a78bf6acff8e048f4185d53ff8f2", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "82e5e034d02a0d3e1219fe527ab8480401fefd50", "0cccb12cbd39a77868edb7f6e0f0de30c2f6a3bb", "3fe59568240dc9fd2b14c9a3b44804a5f6808d40", "df1655c2e3a870720308730bff45208beff35aeb", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "aae8c9327ebbfb63343e46c3ac9d9fec3598341c", "06db78ece7ba41bccab5df77240541e32cffd623", "165cf5e471b32122ba3a38709873cecf9b1b9a58", "0f61e0d7e7f67ff54a77c2df4b7a035ed6ab87c2", "269c24a4aad9be622b609a0860f5df80688c2f93", "00972bc1b13f01eedeefcf89b84cacf7051afdf1", "2d07ed11586868970402426923a2c00b2a733cce", "1d1782862634263bb4b6ab6c42a9fbfb49f392e8", "58ef32b36ae9b19b0d4524a1bd3b7e93457978d0", "5cb88831f543d30cc688fedc445d4e358ef73626", "62481ef78abbd5d457033ff14fdae9111a15193e", "3d0a9242b3913b7de02aefa440048f1d689aa9c4", "45fcaf11eaf31228a218a24663067dab509a1031", "0f1647d286eb868c7039f8bf052ef4767166069f", "98d87ec603edd714f29bcfad4266872403f40d7f", "34035e4ebb8a24253a5023022415fcd05983893e", "94012ab269563c59e6fda55567db636f4a6b7e6b", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "4f5ef5dfe854a9c9f34af44d306534c9a1606b15", "2077579d62fc090d4ddf45f107ffae0468936165", "2082a94be79311cfb3c73f02a88f005b38d1a424", "0ab85aefdda732705df5b102c0a2851f1266146b", "65da29a03c8905cbc0614612d1632864336c4786", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "35a5a870cefa0184245cf317381f3dc4092e1781", "7c945a1f92ac242dfbf7bc6e3e4c37d1445aee36", "24c6e70c583daed1852637ec42d4589556ac59d3", "4a64c9b61e3f015d08dc85c48fe1ee5546c92291", "9ad46f6da8cc9fafef6d1dbf17d2a85c944e9184", "0d3f85933b6355789588476e491683532c68a906" ], "paperAbstract": "Due to their performance and flexibility, FPGAs are an attractive platform for the execution of network functions. It has been a challenge for a long time though to make FPGA programming accessible to a large audience of developers. An appealing solution is to compile code from a general-purpose language to hardware using high-level synthesis. Unfortunately, current approaches to implement rich network functionality are insufficient because they lack: (i) libraries with abstractions for common network operations and data structures, (ii) bindings to the underlying \u201csubstrate\u201d on the FPGA, and (iii) debugging and profiling support. This paper describes Emu, a new standard library for an FPGA hardware compiler that enables developers to rapidly create and deploy network functionality. Emu allows for high-performance designs without being bound to particular packet processing paradigms. Furthermore, it supports running the same programs on CPUs, in Mininet, and on FPGAs, providing a better development environment that includes advanced debugging capabilities. We demonstrate that network functions implemented using Emu have only negligible resource and performance overheads compared with natively-written hardware versions.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/07/sultana17emu.pdf", "https://www.cl.cam.ac.uk/~nz247/publications/emu2017atc.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/sultana", "http://www.inf.usi.ch/faculty/soule/usenix2017.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-sultana.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5049/4db7b318804b3070239f098feb168bf471f0.pdf", "s2Url": "https://semanticscholar.org/paper/50494db7b318804b3070239f098feb168bf471f0", "sources": [ "DBLP" ], "title": "Emu: Rapid Prototyping of Networking Services", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "5054d82d64e9246c269f4764a0c85ddf715dfe3b": { "authors": [ { "ids": [ "21155239" ], "name": "Wookhan Jeong" }, { "ids": [ "7752212" ], "name": "Hyunsoo Cho" }, { "ids": [ "3378728" ], "name": "Yongmyung Lee" }, { "ids": [ "21094294" ], "name": "Jaegyu Lee" }, { "ids": [ "3347382" ], "name": "SongHo Yoon" }, { "ids": [ "2519104" ], "name": "Joo Young Hwang" }, { "ids": [ "2289794" ], "name": "Dong-Gi Lee" } ], "doi": "", "doiUrl": "", "entities": [ "Adobe Flash", "Associative entity", "Booster (electric power)", "Cache (computing)", "Device driver", "FTL: Faster Than Light", "Flash file system", "Flash memory", "Flash memory controller", "Random access", "Random-access memory", "Smartphone", "Universal Flash Storage" ], "id": "5054d82d64e9246c269f4764a0c85ddf715dfe3b", "inCitations": [ "0ae03e097cd936f564a60017b864beeb12635b09" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "053f11965e2afd74b04d61228d07b9ba45107fa7", "c05ee7dee7d474424f3d403bb143ba93868e6925", "56da687431aa4e8d6c7c37d775b6b106eb909071", "e0a1546f56b68ebfcc5f7237c073d6186188f192", "05961fc1d02ca30653dd0b4c906113db796df941", "bd263ebc3e8cec76ea3f3ffa83a7878af4dfdd63" ], "paperAbstract": "NAND flash memory based storage devices use Flash Translation Layer (FTL) to translate logical addresses of I/O requests to corresponding flash memory addresses. Mobile storage devices typically have RAM with constrained size, thus lack in memory to keep the whole mapping table. Therefore, mapping tables are partially retrieved from NAND flash on demand, causing random-read performance degradation. In order to improve random read performance, we propose HPB (Host Performance Booster) which uses host system memory as a cache for FTL mapping table. By using HPB, FTL data can be read from host memory faster than from NAND flash memory. We define transactional protocols between host device driver and storage device to manage the host side mapping cache. We implement HPB on Galaxy S7 smartphone with UFS device. HPB is shown to have a performance improvement of 58 67% for random read workload.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-jeong.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/jeong", "https://www.usenix.org/system/files/conference/hotstorage17/improving_flash_storage_performance_2017_revision_3_1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/bd22/cd0f513b748ce891fb1139c17a5bac2ad14c.pdf", "s2Url": "https://semanticscholar.org/paper/5054d82d64e9246c269f4764a0c85ddf715dfe3b", "sources": [ "DBLP" ], "title": "Improving Flash Storage Performance by Caching Address Mapping Table in Host Memory", "venue": "HotStorage", "year": 2017 }, "508d8da554dcb6d39b451eb1da9a9293919c7370": { "authors": [ { "ids": [ "2089649" ], "name": "Andreas Haas" }, { "ids": [ "1987677" ], "name": "Andreas Rossberg" }, { "ids": [ "2094391" ], "name": "Derek L. Schuff" }, { "ids": [ "2982459" ], "name": "Ben L. Titzer" }, { "ids": [ "2767220" ], "name": "Michael Holman" }, { "ids": [ "3363743" ], "name": "Dan Gohman" }, { "ids": [ "31940212" ], "name": "Luke Wagner" }, { "ids": [ "2844522" ], "name": "Alon Zakai" }, { "ids": [ "32389243" ], "name": "J. F. Bastien" } ], "doi": "10.1145/3062341.3062363", "doiUrl": "https://doi.org/10.1145/3062341.3062363", "entities": [ "3D computer graphics", "Compiler", "High- and low-level", "JavaScript", "Programming model", "WebAssembly" ], "id": "508d8da554dcb6d39b451eb1da9a9293919c7370", "inCitations": [ "e32eb18d74c5a9482cd19585f2bc19f6dc675f6a", "38876d86e5e7851181efc9ed3bf15765c0b59bb1", "2bd400b03bfaad711e7fb40f3f64e5ae05bccd1b", "094d01d9eff739dce54c73bba06e097029e6f47a", "829a2dfa16642a2967cc0edded1a52db2a1911ca", "68fc3a0da7e96122e308c1bbe28e1ca3b879d461", "7a8d0acc7c6f5fce35dcc7797bfe3cb4f36cd1fe", "07b012a7bdfcc4afdad7a0bda8873cf52057d38a", "18b24afcca112e6a2f548d424d5a09c1660c1a82", "882eb456f90e5b79ca2ddf2da5e2c1d972929989", "1db8af98e1ae2df33590465c6b136ffff54436ee" ], "journalName": "", "journalPages": "185-200", "journalVolume": "", "outCitations": [ "3562c189a9a6402d2c484138ceccbe9a2cd67823", "9a8bf1a6e4e71f59620a53b0637c38a416966c4b", "08de7934c347f5832dfa035a1a12c0fd40934a8e", "0d84bed55ebd61bb90e8fe752803ee9ae3d826bf", "4f0655f1dfba053b70fbe33c4c78f2c2bea06cbf", "84c4310019cc0d0544e979c690782b36b82cf912", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "dd942cd6c6da43b55688cbda7266f69ee3c661ec", "0df445ca53975d93f27c9def03e964d3113a4607", "cea8f7d923ea3ee28d6ee9ae4669224f51819aa2", "0d4843d31be7198a94a68b00488148d4fa693567", "1810f70bdcb6f50ff70bed2c165918046e6a8aef", "18681d3c91d5f7ecf53d1a32bd5ec19ce26b5ede", "0f5bd2edf5b1ce8815e34f6090d726c35d9331d5", "4ea89fa6a34e4398e3460a677f478e1cb30504a0", "1a6c35f4dbbf47ddd6a7bd3133830459fa041ff2", "2194c3460ab71f3826db00b045b2ae590c753319", "07791e5a4cd557a4fd160a20b248442568f7b0fd", "0719b9670c8580db76547497df39caabdc20fc32", "23bd210a62e0eb576f73f4aa93acfc9188faa6d0", "736768fe05e6d114f9d0d2b10ba4a04db6c5ba75", "2554485ffdb8473262ce0cfde401cfdc5b85f3fe", "7521513abd7acae00b3fd89001da47019606cf38", "0ab24dda560e79160a8d41bee4e6e9f37a6a554e", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "046a9e129fba46d78301ead661949f5290c79989", "65192f3d0ffb066a4c47a09fc11fdfad47dd192e", "074416f82b8cf2ee3deaa8a8da64a5ad674ded23", "585706dc56e146c8fb42228fc5cbe1de0bb0a69d", "67b752aaef2133ec0cda47b2a2c1856f0f2f266f" ], "paperAbstract": "The maturation of the Web platform has given rise to sophisticated and demanding Web applications such as interactive 3D visualization, audio and video software, and games. With that, efficiency and security of code on the Web has become more important than ever. Yet JavaScript as the only built-in language of the Web is not well-equipped to meet these requirements, especially as a compilation target. \n Engineers from the four major browser vendors have risen to the challenge and collaboratively designed a portable low-level bytecode called WebAssembly. It offers compact representation, efficient validation and compilation, and safe low to no-overhead execution. Rather than committing to a specific programming model, WebAssembly is an abstraction over modern hardware, making it language-, hardware-, and platform-independent, with use cases beyond just the Web. WebAssembly has been designed with a formal semantics from the start. We describe the motivation, design and formal semantics of WebAssembly and provide some preliminary experience with implementations.", "pdfUrls": [ "https://people.mpi-sws.org/~rossberg/papers/Haas,%20Rossberg,%20Schuff,%20Titzer,%20Gohman,%20Wagner,%20Zakai,%20Bastien,%20Holman%20-%20Bringing%20the%20Web%20up%20to%20Speed%20with%20WebAssembly.pdf", "http://doi.acm.org/10.1145/3062341.3062363" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/508d8da554dcb6d39b451eb1da9a9293919c7370", "sources": [ "DBLP" ], "title": "Bringing the web up to speed with WebAssembly", "venue": "PLDI", "year": 2017 }, "50b556396ebc887461015b48ce89c572424bcedf": { "authors": [ { "ids": [ "1760272" ], "name": "Tobias Lauinger" }, { "ids": [ "34982211" ], "name": "Abdelberi Chaabane" }, { "ids": [ "2786583" ], "name": "Sajjad Arshad" }, { "ids": [ "37894036" ], "name": "William Robertson" }, { "ids": [ "35497150" ], "name": "Christo Wilson" }, { "ids": [ "1707794" ], "name": "Engin Kirda" } ], "doi": "", "doiUrl": "", "entities": [ "Causality", "Client-side", "Coupling (computer programming)", "Denial-of-service attack", "Digital rights management", "Hoc (programming language)", "JavaScript", "JavaScript library", "Library", "Library (computing)", "Trends in library usage", "Vulnerability (computing)", "Web developer", "World Wide Web", "jQuery" ], "id": "50b556396ebc887461015b48ce89c572424bcedf", "inCitations": [ "c142a0eb15bbe6cf50afd90c79f6ff4de0b244ed", "09f8c276478e0c75153897ebcf884e05d3eb45d0", "18d617b1e5fd207c890fcfe8341a0193d81478d2", "036f5b7f36b8aca36e967e0824b3acb951d9e636", "0628d0dfd34948da6c6db9ad67c8a212caac41ec", "2f0d0ba4fb91f7a98356b68d4e5466c7b42ae43b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "14dc6292ec19ad701e3c74b325e1c8693b407449", "307ab82c114b043878a7d4d4a0c0ed320914fa2c", "1c126c0ddc80c1fa177adb9ef32bdf84e0306846", "306f123c6af2577a61f67e9155cf3897ea149d1c", "1e73c2fa2709d3210c09f19933e99b71905364ab", "005618530ed8f2b1b1e12456db5ff8ce94709acd", "ca0dac6a33e155b264213a6273d6a125dc3d1071", "2f95e2ca11610cb334d8d777d7b0f0d5561e67bc", "4389822e4ebf597f6302f7b9f3dc102d4b232181", "130633f0653e6ad5766144299aa17938e7a5fca2", "c75dfb6d16d58a6f61a07ae5b0682b47c9724e37", "18c48a28a0d97496651e8c966b5dbc3983a15b28", "6911b420bb4eb3004dbb6a89c94dfd524bcf0074", "c6b24743d3e29b2de9d146b03fdec3a18bdf6633", "c72d3dcfee09798f83f3d9aa8e7926662a9df8f4", "a370b90f96f92f5b1d2e0a3725d50bf7f4b1d5ec", "6db3835143fb2c337449717005e551ef11c58fbb", "1a7160058a87a2a7dedd2f6e95f25892ec4f3d35" ], "paperAbstract": "Web developers routinely rely on third-party JavaScript libraries such as jQuery to enhance the functionality of their sites. However, if not properly maintained, such dependencies can create attack vectors allowing a site to be compromised. In this paper, we conduct the first comprehensive study of client-side JavaScript library usage and the resulting security implications across the Web. Using data from over 133 k websites, we show that 37 % of them include at least one library with a known vulnerability; the time lag behind the newest release of a library is measured in the order of years. In order to better understand why websites use so many vulnerable or outdated libraries, we track causal inclusion relationships and quantify different scenarios. We observe sites including libraries in ad hoc and often transitive ways, which can lead to different versions of the same library being loaded into the same document at the same time. Furthermore, we find that libraries included transitively, or via ad and tracking code, are more likely to be vulnerable. This demonstrates that not only website administrators, but also the dynamic architecture and developers of third-party services are to blame for the Web\u2019s poor state of library management. The results of our work underline the need for more thorough approaches to dependency management, code maintenance and third-party code inclusion on the Web.", "pdfUrls": [ "https://seclab.nu/static/publications/ndss2017jslibver.pdf", "http://www.ccs.neu.edu/home/cbw/static/pdf/lauinger-ndss17.pdf", "https://seclab.ccs.neu.edu/static/publications/ndss2017jslibver.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/thou-shalt-not-depend-me-analysing-use-outdated-javascript-libraries-web/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/cb9b/2b8fd7e8559f65504304080896514a786a1a.pdf", "s2Url": "https://semanticscholar.org/paper/50b556396ebc887461015b48ce89c572424bcedf", "sources": [ "DBLP" ], "title": "Thou Shalt Not Depend on Me: Analysing the Use of Outdated JavaScript Libraries on the Web", "venue": "NDSS", "year": 2017 }, "50d95d13c02dde5854cd3a5d7bbb8f91711f9730": { "authors": [ { "ids": [ "39586948" ], "name": "Guanpeng Li" }, { "ids": [ "2708722" ], "name": "Siva Kumar Sastry Hari" }, { "ids": [ "3887079" ], "name": "Michael Sullivan" }, { "ids": [ "40489216" ], "name": "Timothy Tsai" }, { "ids": [ "1715185" ], "name": "Karthik Pattabiraman" }, { "ids": [ "1775477" ], "name": "Joel S. Emer" }, { "ids": [ "1715863" ], "name": "Stephen W. Keckler" } ], "doi": "10.1145/3126908.3126964", "doiUrl": "https://doi.org/10.1145/3126908.3126964", "entities": [ "Algorithm", "Artificial neural network", "Autonomous car", "Deep learning", "Experiment", "Failure rate", "Hardware acceleration", "Machine learning", "Propagation of uncertainty", "Triple modular redundancy" ], "id": "50d95d13c02dde5854cd3a5d7bbb8f91711f9730", "inCitations": [ "781cf9b4d17f89ad4b971d2a1655421378149e2d", "6b25106ad8f0a8167516921c3d3966c89f639d13" ], "journalName": "", "journalPages": "8:1-8:12", "journalVolume": "", "outCitations": [ "2ffc74bec88d8762a613256589891ff323123e99", "39f63dbdce9207b87878290c0e3983e84cfcecd9", "1a18a109c88cbbf9398913e841bed290121e743a", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "7365a36e7e74046cd5af04ab6fd736839acfe233", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "925a81e193c963dd781ff4e9ad562cb1487bafbd", "8b41ca5d078d47bef6415334e97d383fa907549a", "0256f81e75c34b5aa6f932c29d11807cbd848dfb", "32c3d778d8cce464b3ad3de277666295f3a0b02a", "e1c4e2fa071046569a05e9cfdf13496d094025dd", "a1c4e5e62537fca72f3e0fd5c9956265c6e2e98b", "01fcae344d2edb715bcc63a40b6052c0331741bd", "08f5dfadbe491b04b65fc766e2f69fa95c91d035", "233b1774f28c9972df2dfcf20dfbb0df45792bd0", "5e3fb6a4514550dbdb1bfeb4e5705e4a7ffcc84f", "1b82108089107d726ad6b4167ed59b3de78654f5", "21d7130230162af2a4cc1b9375bfe9b37dbbd499", "5fe1f55db975575e399f1b35c8ecbc479bc91978", "5a2819286cd24805b19966519781f61c8621a7da", "43cab718dbfdb9e9b0a515e897f8e26f3e0ac935", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "0a84622ac7743998763aa8f5d1d1c04918bc6230", "e00a2ddf33b25d551eb90a718c0d94225fa026b8", "4f40ea0248653d4ffb6ef4857cd23f0f713d8c69", "8215eed5098c6f0615351afe0d60710d30e59a3b", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "eccbb5e0d77ae0d938519ce8045472f27febf62b", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "51327c46f6db01ab9bde8aad63ac7d5ba2b94066", "061356704ec86334dbbc073985375fe13cd39088", "20a103e1a9492b5f112c94c5adb952dfed2b8903", "28540222f0ed31ae930dc329e29eb17d280663f2", "5236160832766c58b1be2bf4f76f33d9d25b4600", "437b11128948f92e1139c555cf1326922ee36b39", "b7cf49e30355633af2db19f35189410c8515e91f", "15ab4f60005ea42d5e57f365a478b28b92ea2238", "b1c12a804c839484de33b5ad9b0b262d411cc7c4", "939453d51ba1a574a9f5a41194d14ca2470da88c", "948a2667c723662014bb325be6cd587bb56f5016", "766b21e4984729a17d2d826691affa25f855a38b", "46e11d34a8fbcd74c432837612a4e972d7a110e9" ], "paperAbstract": "Deep learning neural networks (DNNs) have been successful in solving a wide range of machine learning problems. Specialized hardware accelerators have been proposed to accelerate the execution of DNN algorithms for high-performance and energy efficiency. Recently, they have been deployed in datacenters (potentially for business-critical or industrial applications) and safety-critical systems such as self-driving cars. Soft errors caused by high-energy particles have been increasing in hardware systems, and these can lead to catastrophic failures in DNN systems. Traditional methods for building resilient systems, e.g., Triple Modular Redundancy (TMR), are agnostic of the DNN algorithm and the DNN accelerator's architecture. Hence, these traditional resilience approaches incur high overheads, which makes them challenging to deploy. In this paper, we experimentally evaluate the resilience characteristics of DNN systems (i.e., DNN software running on specialized accelerators). We find that the error resilience of a DNN system depends on the data types, values, data reuses, and types of layers in the design. Based on our observations, we propose two efficient protection techniques for DNN systems.", "pdfUrls": [ "http://blogs.ubc.ca/karthik/files/2017/09/DNN-SC17.pdf", "http://blogs.ubc.ca/karthik/files/2017/12/DNN-SC17.pdf", "http://blogs.ubc.ca/karthik/files/2017/11/SC17_talk.pdf", "http://doi.acm.org/10.1145/3126908.3126964", "http://people.csail.mit.edu/emer/papers/2017.11.sc.error_propagation_in_DNNs.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/50d95d13c02dde5854cd3a5d7bbb8f91711f9730", "sources": [ "DBLP" ], "title": "Understanding error propagation in deep learning neural network (DNN) accelerators and applications", "venue": "SC", "year": 2017 }, "50dc3bb8c5b8f7bed0c6d6231e042f56ad6af1d4": { "authors": [ { "ids": [ "1700573" ], "name": "Peng Huang" }, { "ids": [ "39152478" ], "name": "Chuanxiong Guo" }, { "ids": [ "2902416" ], "name": "Lidong Zhou" }, { "ids": [ "1855395" ], "name": "Jacob R. Lorch" }, { "ids": [ "34402895" ], "name": "Yingnong Dang" }, { "ids": [ "22795910" ], "name": "Murali Chintalapati" }, { "ids": [ "22736499" ], "name": "Randolph Yao" } ], "doi": "10.1145/3102980.3103005", "doiUrl": "https://doi.org/10.1145/3102980.3103005", "entities": [ "Bridging (networking)", "Experience", "Fail-stop", "Failure mode and effects analysis", "Failure rate" ], "id": "50dc3bb8c5b8f7bed0c6d6231e042f56ad6af1d4", "inCitations": [ "d0f86618b53c723ecd5e814457050eaa57a5bd8b", "d33b1d2e38b47a75d11deab953508a9d00a6854e", "347e1352fb903b40dce606a1e581e9d601bc289c", "5ae5348b9558729b98a70a7abed4145adeb45bbe", "221ceab1a74c8a9fa4e36a1bec2754e745e36a63", "f607dfa120b68bb9293a9b08125e1d2229e00332" ], "journalName": "", "journalPages": "150-155", "journalVolume": "", "outCitations": [ "1521e801e8e08ecec3b0baabb07f9a6ce0a67a85", "0fb26af56f32a4c083e7b5354309d0e6b3c03ec6", "0174dc1f1e606b937a0c73cc12ee4c3ff3797415", "663e064469ad91e6bda345d216504b4c868f537b", "76eea8436996c7e9c8f7ad3dac34a12865edab24", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "086820e40dc8046c30a8751394df167bec047fe1", "260368e4b7ddef442bb5c197078e200b3c0ab7b1", "20f5f8733134d87041b95b742d613051a1fb3fdb", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "070cdebc40691ef8cc3fd88a95be34bf088a123d", "3b9725ad0e3a078e9add5a37ea1ac323f7322d75", "0f6a32792d0882db35fe9391445d4322232b619e", "11b8ef5da9c8df214859bb41b60001a0abd2b5b2", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "1f8699f5d99a0fa362bbc9e5071ac3cbbaf8e4da", "65da29a03c8905cbc0614612d1632864336c4786", "058f6752d85a517aae298586fdf117acdd7560ea", "4f246dd7f2ba3764245d8a16c3048adf0cc68b1d" ], "paperAbstract": "Cloud scale provides the vast resources necessary to replace failed components, but this is useful only if those failures can be detected. For this reason, the major availability breakdowns and performance anomalies we see in cloud environments tend to be caused by subtle underlying faults, i.e., gray failure rather than fail-stop failure. In this paper, we discuss our experiences with gray failure in production cloud-scale systems to show its broad scope and consequences. We also argue that a key feature of gray failure is differential observability: that the system's failure detectors may not notice problems even when applications are afflicted by them. This realization leads us to believe that, to best deal with them, we should focus on bridging the gap between different components' perceptions of what constitutes failure.", "pdfUrls": [ "http://ryanphuang.com/paper/grayfailure-hotos17.pdf", "http://ryanphuang.com/talk/hotos17_talk.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/06/paper-1.pdf", "http://doi.acm.org/10.1145/3102980.3103005" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/50dc3bb8c5b8f7bed0c6d6231e042f56ad6af1d4", "sources": [ "DBLP" ], "title": "Gray Failure: The Achilles' Heel of Cloud-Scale Systems", "venue": "HotOS", "year": 2017 }, "50f53cd3aae488bd7dea576abc3551720f015450": { "authors": [ { "ids": [ "39941616" ], "name": "Soudeh Ghorbani" }, { "ids": [ "10736478" ], "name": "Zibin Yang" }, { "ids": [ "1758273" ], "name": "Brighten Godfrey" }, { "ids": [ "1731999" ], "name": "Yashar Ganjali" }, { "ids": [ "1875109" ], "name": "Amin Firoozshahian" } ], "doi": "10.1145/3098822.3098839", "doiUrl": "https://doi.org/10.1145/3098822.3098839", "entities": [ "Algorithm", "BIBO stability", "Clos network", "Data center", "Load balancing (computing)", "MOST Bus", "Randomized algorithm", "STRIPS", "Simulation", "Throughput", "Verilog" ], "id": "50f53cd3aae488bd7dea576abc3551720f015450", "inCitations": [ "2c4e932c9c18efba72862df86aa5157bd50fad80", "9bbd5be2829e49b1fac7f034baf7499cb069db95", "50d4a4bd8a6f56a18b89b277a27694288e405656" ], "journalName": "", "journalPages": "225-238", "journalVolume": "", "outCitations": [ "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "67ac957fb2a91c26b1a0b51c2656e7db4566c1b7", "1aafc7066e52f18dee78103822da24a5d85da93c", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "5f28bf666498d5800e015f12318930ce03cd5587", "a7b663428d39b0e181e84ecbef71b63e855f70a9", "089185eb92fb912508924a9dc6ee9a285a145f61", "0def299ea8aa65ef1faea4eeaac3d0b934d36a7b", "5f4188f380bd2b7c16773e0e6b69a004a072441b", "2031a6decaf94ce41ac09fc355022429eeeb0e49", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe", "55340cad246848be8a1b124036ca82fc1db5c396", "9570d6075ecaf7f5dc28e99edfabc64914d44ca5", "663e064469ad91e6bda345d216504b4c868f537b", "58f692e9b03cb973355aab46bb6f867239aeb513", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "60065ebe7674e35899c354702f3eb4d8dee413fa", "3233ed7eb09d987ce2ae0dfcbdddefd54fcee288", "9a26f0832fa7508f6396cbee7d06db42e026c0c8", "4408c1d218eaa41f0c4c38893e2488cadf8bb376", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "943cf22e168a86fec0381ca380474c1da39e509c", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "524cf45c6ce43043552efab2c2a53ab1ce7e1e05", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "122229239aeba1eb4f1623adb40f1845c582a520", "f57ac7f53438b2877022125bac957fda2bb2a97b", "8e86374859a1d07e049a2c6e1cb11d12302552fb", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "1b11d4b0b04e0eb061029b57e1a1c436193f13b1", "0f6f717d198ab1b99a63814facaf2fceace6b0fe", "35a5a870cefa0184245cf317381f3dc4092e1781", "347fcf36f0a9b346af7ae87af28a88b837f2477b", "640af017aa8d11f9f31480155c8d5d1a0d8865d7", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "5c5d03e884d4f0094b217c62267466fa11432c8e", "0130c8c2c9bc7f64d9bf0aee5e0704bbeadfe9f3", "fdf4ec07efb5bb79bcc83ab4e427e198f2cd705e", "058f6752d85a517aae298586fdf117acdd7560ea", "43bcabcec7c2595c620cd6fd4c96f517ede80d4e", "234e6be0d4238f76b3ac038ee422be39f391c625", "5594c2ddde27f4262a53668ca9b09ad7a9453102", "0a90d0d00c2cf9fa172abcf5b41a802b69ccad47", "ff19b52b5eeca00e2935eb0675df449b863090bb", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a" ], "paperAbstract": "The trend towards simple datacenter network fabric strips most network functionality, including load balancing, out of the network core and pushes it to the edge. This slows reaction to microbursts, the main culprit of packet loss in datacenters. We investigate the opposite direction: could slightly smarter fabric significantly improve load balancing? This paper presents DRILL, a datacenter fabric for Clos networks which performs micro load balancing to distribute load as evenly as possible on microsecond timescales. DRILL employs per-packet decisions at each switch based on local queue occupancies and randomized algorithms to distribute load. Our design addresses the resulting key challenges of packet reordering and topological asymmetry. In simulations with a detailed switch hardware model and realistic workloads, DRILL outperforms recent edge-based load balancers, particularly under heavy load. Under 80% load, for example, it achieves 1.3-1.4x lower mean flow completion time than recent proposals, primarily due to shorter upstream queues. To test hardware feasibility, we implement DRILL in Verilog and estimate its area overhead to be less than 1%. Finally, we analyze DRILL's stability and throughput-efficiency.", "pdfUrls": [ "http://web.engr.illinois.edu/~ghorban2/papers/drill_sigcomm.pdf", "http://doi.acm.org/10.1145/3098822.3098839", "http://pbg.cs.illinois.edu/papers/ghorbani17drill.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/50f53cd3aae488bd7dea576abc3551720f015450", "sources": [ "DBLP" ], "title": "DRILL: Micro Load Balancing for Low-latency Data Center Networks", "venue": "SIGCOMM", "year": 2017 }, "5110777d75dbfb9e52e5fa6a46ba366a4b8c4592": { "authors": [ { "ids": [ "1807924" ], "name": "Pierre Bourhis" }, { "ids": [ "2478931" ], "name": "Juan L. Reutter" }, { "ids": [ "37163793" ], "name": "Fernando Su\u00e1rez" }, { "ids": [ "2434366" ], "name": "Domagoj Vrgoc" } ], "doi": "10.1145/3034786.3056120", "doiUrl": "https://doi.org/10.1145/3034786.3056120", "entities": [ "Data model", "JSON", "Query language", "World Wide Web" ], "id": "5110777d75dbfb9e52e5fa6a46ba366a4b8c4592", "inCitations": [ "9312e5efa0dcef1445d45a41771f12e2a8dc6715", "239869c5679418fe6f35eac3cff5c64dc6fc8c57", "0318b516c9e6649b990a31434c91295aa842ddb0", "7c57ed8f24039fa9b450c46eccce6ede178e35fc" ], "journalName": "", "journalPages": "123-135", "journalVolume": "", "outCitations": [ "14b88bbd16edde7606a350d7294868c232291406", "7f236d19251bde10e2b3c6dee4e8936776db8b0a", "91df73b5d59761a01163226870575549d56a102d", "4949f1caaf36b540f5b65f28d787bdfdaef30bf7", "c69d2cf01905204693eaf34dd1c8c4c585471ebe", "84321e662b24363e032d680901627aa1bfd6088f", "1421ad54b0e144e7843a4eb8f6076e5f2a64caad", "bb036ab9e86d78268b46204c809a499bede69acf", "12b83ecf80c30ecc8c0234c089f0c97ec1be3ba4", "9d7db8249836e0c61945b056873ab37edd8e0956", "5b5e87bdff095b3ca7d2ad88d463b2c0a7b94373", "395094bc7421bccb167b6d6175b48e534715a60d", "a09245b5fe9927467d0ee3ca3804be78b000af92", "3d7617a79f0ed4a4e1087a442da7b8dc32f3bb02", "1d4667554f6da3bb648c40768e7da41b2437b5ff", "7ddb0230137648491a1a117fe73105f77485660b", "33f9d19cdb5f3df4c5c36237290449d6dc0f8746", "57eb3b559b3dcbdff53ca7093fcd263ff8576e0e" ], "paperAbstract": "Despite the fact that JSON is currently one of the most popular formats for exchanging data on the Web, there are very few studies on this topic and there is no agreement upon a theoretical framework for dealing with JSON. Therefore in this paper we propose a formal data model for JSON documents and, based on the common features present in available systems using JSON, we define a lightweight query language allowing us to navigate through JSON documents. We also introduce a logic capturing the schema proposal for JSON and study the complexity of basic computational tasks associated with these two formalisms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056120", "https://arxiv.org/pdf/1701.02221v1.pdf", "http://arxiv.org/abs/1701.02221" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5110777d75dbfb9e52e5fa6a46ba366a4b8c4592", "sources": [ "DBLP" ], "title": "JSON: Data model, Query languages and Schema specification", "venue": "PODS", "year": 2017 }, "51240ae6d0823a6a0368f1ee62937f52924055b1": { "authors": [ { "ids": [ "2728869" ], "name": "Ali JavadiAbhari" }, { "ids": [ "40022491" ], "name": "Pranav Gokhale" }, { "ids": [ "39813089" ], "name": "Adam Holmes" }, { "ids": [ "7875262" ], "name": "Diana Franklin" }, { "ids": [ "1912190" ], "name": "Kenneth R. Brown" }, { "ids": [ "1708269" ], "name": "Margaret Martonosi" }, { "ids": [ "1691956" ], "name": "Frederic T. Chong" } ], "doi": "10.1145/3123939.3123949", "doiUrl": "https://doi.org/10.1145/3123939.3123949", "entities": [ "Angular defect", "Compiler", "Computation", "Computer", "Error detection and correction", "Fastest", "Microarchitecture", "Network congestion", "Optimizing compiler", "Quantum", "Quantum computing", "Quantum error correction", "Quantum mechanics", "Qubit", "Scalability", "Scheduling (computing)", "Simulation", "Toric code" ], "id": "51240ae6d0823a6a0368f1ee62937f52924055b1", "inCitations": [ "8adef2c904c0888cc9a296c3f48e890972c6f3ed" ], "journalName": "", "journalPages": "692-705", "journalVolume": "", "outCitations": [ "88331df302fa2b13d6f1dc99ada50d0003b8c404", "47b3e7ec65493209886e90eef167ee6ac5576fa4", "30f75cdf2fd9e860992b8b9845409ebff6419b83", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "073f15f25656ce76f12560b9b13ec913fe7bdd59", "899a630215cf1e5c1230dd9ff691d9b68194f56e", "2b1170727f6cc7988ce21da94cfd6117513bd7ee", "4b8f25e48d326c2087ae5ec471fdb7f877b81627", "1d4de9e62b7e31400150271d439587f031261af0", "46cdd5bec1b2232c2f650d87ed414337186b48c9", "6f2579da85bcadb93e0ed9ac3f7524113c2799e5", "57a8702b9dd69d3ff3cd7044b2eba9a69a7a0e56", "74d84fc0b7435ccf199f77b39237313c8ad7efde", "2ee65fc3b336788be2e477aba28b54c7b1f52073", "7c4f7100c239de1a429cc23b286953aa70e6e5df", "068da70c9ba500e8c796967457a8a99a1adca795", "cd6f20f1389318c0409804bb7147690d2da00bf8", "be3e7db1be0a51e6212d0a1d1b88a13756d9269b", "8ac208361a9ccdfacb789d6579e31601532926e6", "127f20d19f473773bd1e8f35b09c8b22563f0ee1", "0200b1a2977f1dee45fd7d0a8e2e50cc46aed67c", "54c4e15dfea26e5a8f96ba7674f9b266dbb9f703", "0b8c0bac48469561a7e0938c65306e5719239d65", "52c82934fc0911133382dc90ec586d24b8f10aac", "020cf533980f49c27c8d55c7d4549872bea460ff", "19873af56f207c1a89fac7fb667dd70b039934cb", "eaa5af67af88aa155ed4ef3a76a716303ecd332d", "150b07cc949afdf4be5774bf1b26c7c9b1b24366", "9367fe74cfa3b151e04b0050cbfc4689f0a539d6", "b4ac17c649bc8dc4653ce6c114d4eeae3d6ed66f", "0e115b7c4a5940d2528d1c617ed390f706bd87a3", "031a4ae4dbc7982dfbff69d1b8e6d3a68d3051a9", "a9b55406ffff7861ba089a9ea86cb0eceef237ff", "57eaf807b2639d4c01af674ee511f8a6f7004c8b", "a39cff49f26909c4f45cf1c23d283cc50909424a", "2e426b7cfdbaebc53a7fc1dd2a2c60b35d4b60a5", "1574cc275d76841d78ad177c17b2f923f5bfa43d", "24213cb6536220159f93c47e8c9ea177a65d92b9", "2b0b0f969fdbd968e0dfa7130fbe916f6afc9afd", "93bef1d63693b4bb196670d0496829ee5e8b6221", "6322979bfee2012343a93df514dff2430ba0182e", "321a6a0b6d8b4baf3d076132768f0952d11a8012", "3e5b5c63f0bea93ea07429bd529a72fd915aab66", "1de3978475aa0ba00ef1436d77e86a7f8c82e50c", "4162b084682391b0a328f470f40d0f8f4aff13fb", "cf18a29ff381705b677045d031b147911bbb6990", "29ce4b4fa1bfbf7e24c349994f89f0d15b9d703c", "37153db5d744f3bc92027107a6645c5bca95aca7", "135d55a9f06c92a5841f8212cf6dffb429563e70" ], "paperAbstract": "Quantum computing (QC) is at the cusp of a revolution. Machines with 100 quantum bits (qubits) are anticipated to be operational by 2020 [30, 73], and several-hundred-qubit machines are around the corner. Machines of this scale have the capacity to demonstrate quantum supremacy, the tipping point where QC is faster than the fastest classical alternative for a particular problem. Because error correction techniques will be central to QC and will be the most expensive component of quantum computation, choosing the lowest-overhead error correction scheme is critical to overall QC success. This paper evaluates two established quantum error correction codes---planar and double-defect surface codes---using a set of compilation, scheduling and network simulation tools. In considering scalable methods for optimizing both codes, we do so in the context of a full microarchitectural and compiler analysis. Contrary to previous predictions, we find that the simpler planar codes are sometimes more favorable for implementation on superconducting quantum computers, especially under conditions of high communication congestion.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123949", "http://people.cs.uchicago.edu/~ftchong/papers/Micro17-qc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51240ae6d0823a6a0368f1ee62937f52924055b1", "sources": [ "DBLP" ], "title": "Optimized surface code communication in superconducting quantum computers", "venue": "MICRO", "year": 2017 }, "5141c8864bf5a00df9403362318b4e4dfce8484c": { "authors": [ { "ids": [ "1710738" ], "name": "Reuven Bar-Yehuda" }, { "ids": [ "1785299" ], "name": "Keren Censor-Hillel" }, { "ids": [ "1772752" ], "name": "Mohsen Ghaffari" }, { "ids": [ "3384620" ], "name": "Gregory Schwartzman" } ], "doi": "10.1145/3087801.3087806", "doiUrl": "https://doi.org/10.1145/3087801.3087806", "entities": [ "Algorithm", "Approximation", "Approximation algorithm", "Computer science", "Distributed algorithm", "Graph coloring", "Independent set (graph theory)", "Line graph", "Matching (graph theory)", "Maximal independent set", "Maximal set", "Network congestion", "Randomized algorithm", "Simulation" ], "id": "5141c8864bf5a00df9403362318b4e4dfce8484c", "inCitations": [ "ccf854435da5d6113f1c1e3f70dce7e57bae3091", "dc6eac81c50ab561185844f59f6face43a59e876", "8931c1a4ad2ddc5f5e9ba0feae4a29626ebc0f8e", "4ce3d70846f4cbcd9dc97d776d688f41ee347902", "d32cbfcf2e40251f8e4e19d907c979b450910a32" ], "journalName": "", "journalPages": "165-174", "journalVolume": "", "outCitations": [ "78c9c5a2fcc46c84c71da163ef79ce5a5383164f", "64257fbe36c61d28f1761d4ae98efd62adfecc6d", "b97295e98c63f4069c47562a6e8a5040fe26cbdf", "26bfc62fd5db796036554a255cf580d58f144b78", "252981788ab2699d3885be7d6fa8fa1a66ad0413", "01f2dc9dbc45ec3179f825fd86c9e6487cc9b52f", "1ef15e82b40e5cc0d69cddb383d6cc4c89ad4601", "0bc17b59a3634472024de414fbcdb6d455819cac", "f562d5471143e525b83e24859e0422fa21215ba3", "284e8f622ab157e43c015ce6be33d8b758b12b1f", "764ace9519283e45664e490a6df581cb68b5250b", "7bb492a6794bd90e8eba049afc8283e8314d620c", "6e67c1e6e677508cf6e4841e07fd89e45a6be958", "b9c5364e1bc9254f88efdcdcc88f642afc346e66", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "3fce58dc087a4a050a39f1c9ec6e8aadb427f29a", "0a4a34b9344b46596b2198560c7152178fe708cd", "50b017733d4860932276442de6eab5c09675d9cf", "3b02fa0413c118e8d8503177b7e37d4784546cf8", "096f9f8fc05e7097ef520a1e8cbe988b15609c4c", "64643e6de1562cf3f780616eb647f872d38bd1ba", "12e75f79cdb2ea44930412f51e0351e70fa3ee43", "145c3ca2ea0faebcdc42de8fa24dc57ecdca341d", "8853ebad60e89a637b7131205f3ab8c4be371df0", "30af21e6ac6a0826071251c8a247e5dbfca472e7", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "5be6a7e25c5e4cb0f1ece182042dc6275e438bbd", "6f04121c4626505c579ca8c67a8a190c7770d422", "d00db20e0a1a92d4bb566d20e3341060b9f4158a", "41479edf5cbd60272b652e89ec87dcd679116ce9", "2f12be163569fdb21be6e9192d010fb5022d32c2", "3ddac15bd47bc0745db4297d30be71af43adf0bb", "40b96e5570b92467927a3086e7ca429a2f402019", "aa5bf2ce0933283fccf525c5995eb8fd41521b84", "6035d6123b94d65ce7cc25d0fa95680f550bdc31" ], "paperAbstract": "We present a simple distributed \u2206-approximation algorithm for maximum weight independent set (MaxIS) in the CONGEST model which completes in O(MIS(G) \u00b7 logW ) rounds, where \u2206 is the maximum degree, MIS(G) is the number of rounds needed to compute a maximal independent set (MIS) on G, and W is the maximum weight of a node. Plugging in the best known algorithm for MIS gives a randomized solution in O(logn logW ) rounds, where n is the number of nodes. We also present a deterministic O(\u2206 + log\u2217 n)-round algorithm based on coloring. We then show how to use our MaxIS approximation algorithms to compute a 2-approximation for maximum weight matching without incurring any additional round penalty in the CONGEST model. We use a known reduction for simulating algorithms on the line graph while incurring congestion, but we show our algorithm is part of a broad family of local aggregation algorithms for which we describe a mechanism that allows the simulation to run in the CONGEST model without an additional overhead. Next, we show that for maximum weight matching, relaxing the approximation factor to (2 + \u03b5) allows us to devise a distributed algorithm requiring O( log \u2206 log log \u2206 ) rounds for any constant \u03b5 > 0. For the unweighted case, we can even obtain a (1+\u03b5)-approximation in this number of rounds. These algorithms are the first to achieve the provably optimal round complexity with respect to dependency on \u2206. \u2217Technion, Department of Computer Science, {reuven, ckeren}@cs.technion.ac.il, gregory.schwartzman@gmail.com. Supported in part by the Israel Science Foundation (grant 1696/14). \u2020ETH Zurich, ghaffari@mit.edu. ar X iv :1 70 8. 00 27 6v 1 [ cs .D C ] 1 A ug 2 01 7", "pdfUrls": [ "http://arxiv.org/abs/1708.00276", "https://arxiv.org/pdf/1708.00276v1.pdf", "http://groups.csail.mit.edu/tds/papers/Ghaffari/MaximumMatching.pdf", "http://doi.acm.org/10.1145/3087801.3087806" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8444/b7e4a0909cb586dd1fdbaba665cd520588a6.pdf", "s2Url": "https://semanticscholar.org/paper/5141c8864bf5a00df9403362318b4e4dfce8484c", "sources": [ "DBLP" ], "title": "Distributed Approximation of Maximum Independent Set and Maximum Matching", "venue": "PODC", "year": 2017 }, "51453761cb73603abd3b4813400c4776e6e6cd18": { "authors": [ { "ids": [ "1737553" ], "name": "Jian Xu" }, { "ids": [ "31961604" ], "name": "Qingqing Cao" }, { "ids": [ "39259723" ], "name": "Aditya Prakash" }, { "ids": [ "2187214" ], "name": "Aruna Balasubramanian" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" } ], "doi": "10.1145/3117811.3117819", "doiUrl": "https://doi.org/10.1145/3117811.3117819", "entities": [ "Android", "Computer form factor", "Graphical user interface", "Head-mounted display", "Look and feel", "Programming model", "Relay", "Second screen", "Smart device", "Smartphone", "Smartwatch", "User interface", "Wearable technology" ], "id": "51453761cb73603abd3b4813400c4776e6e6cd18", "inCitations": [ "702d0b68b9702eb05cbf422f5d5e1be446dc5ecd" ], "journalName": "", "journalPages": "369-382", "journalVolume": "", "outCitations": [ "b4b89fca7c1704f48be86ae8d547d18e9ff46821", "2166ed56495a8e528f891067a138f63913e9fb00", "ae2e07f55c6deda39a5b42f3e0d9a6f817623d96", "434553e2a9b6048f1eb7780ec2cd828dc2644013", "c517dd12d208674b0fc2a8ca86d0d744ebc57aba", "08832863bc3f041222f381c8ae143f8a66449059", "4b1a1b12920f05eba88dc3a8b0aaf8de5f6c1d0a", "169b847e69c35cfd475eb4dcc561a24de11762ca", "0af04aaa5c53a875b7b190dc3fce277d90076645", "9156a8a04250a6eab18f1bd63c30a7227fa2051d", "65e5349c59d0e8e894b00556a9c839dae32d3a00", "58c0845d4173769eeff5c0df5b9bafb9ba4feda6", "24720e02f13d28919cfc626c8bb405932f7ebfd2", "2639120c95316e6693de2fd8a9210ccfd3d8d812", "49e58f1a0dc36788ce5c69f1b81aa1e673f7b31f", "13c22afdacff7735ef548da0ae7aaff496513532", "983e9ac9b9f29eaa3b61b26e085da0e13b71b3e6", "58a4c8bd08b36aee9f86d1197b01fec6437ee7df", "6ecb26ef71af4764b772940c19519ae19b56d6a4", "107c1c52a2a506b6dfcae70da266ffaeb63de5b6", "49f5c845c6f56fc4053109f0591b312042ca6293", "4498b9b6c144ecba4d379ff9eaad53e6d694f5af", "5773e04dbeb04d9dae1d627fb966a85556bedf2f", "1a78d4879c56d08e4f632447fbcae4976e5d6aa7", "75d74f13e9064c01f4939740176961d72fe77a96", "04d71a416474a61a023b4d16c8794cfa11fb29ac", "047c0bb66997200427508f19431d3f19383f79a9", "08b186594b76b2e3f48dcc8016416203def1b3b3", "10601ff2dfaf52b5f14a5aab6528a3c414817336", "3f6f619fea4e9241d9fa5d39be4e985757e571de", "b372ff0bbe433da60e7e32b32c56d50c4f969344", "1bf5fd229e04562ae23f78122621416742a511ae", "2e4dd64ad2d7c1505f52626a2ddf7afcefd991fd", "dafc171c2bc15c750aa31fef47e4e14e7f38f961", "9a9b86488ccb64276fdbe47d5322ee27144ac835", "2dbd0bca3fb1a57f441f1867ac0fa7dfc245ae66", "f59ef2995ac542399dcc3956ba5747be4d9ed612" ], "paperAbstract": "Wearable devices such as smartwatches offer exciting new opportunities for users to interact with their applications. However, the current wearable programming model requires the developer to write a custom companion app for each wearable form factor; the companion app extends the smartphone display onto the wearable, relays user interactions from the wearable to the phone, and updates the wearable display as needed. The development effort required to write a companion app is significant and will not scale to an increasing diversity of form factors. This paper argues for a different programming model for wearable devices. The developer writes an application for the smartphone, but only specifies a UI design for the wearable. Our UIWear system abstracts a logical model of the smartphone GUI, re-tailors the GUI for the wearable device based on the specified UI design, and compiles it into a companion app that we call the UICompanion app. We implemented UIWear on Android smartphones, AndroidWear smartwatches, and Sony SmartEyeGlasses. We evaluate 20 developer-written companion apps from the AndroidWear category on Google Play against the UIWear-created UICompanion apps. The lines-of-code required for the developer to specify the UI design in UIWear is an order-of-magnitude smaller compared to the companion app lines-of-code. Further, in most cases, the UICompanion app performed comparably or better than the corresponding companion app both in terms of qualitative metrics, including latency and energy, and quantitative metrics, including look-and-feel.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117819", "http://cs.unc.edu/~porter/pubs/uiwear.pdf", "https://netsys.cs.stonybrook.edu/sites/netsys.cs.stonybrook.edu/files/com052-xuA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51453761cb73603abd3b4813400c4776e6e6cd18", "sources": [ "DBLP" ], "title": "UIWear: Easily Adapting User Interfaces for Wearable Devices", "venue": "MobiCom", "year": 2017 }, "51491eb0e341731ef60141b0ecd82d97780ec264": { "authors": [ { "ids": [ "1709514" ], "name": "George Bosilca" }, { "ids": [ "32974771" ], "name": "Cl\u00e9ment Foyer" }, { "ids": [ "1795494" ], "name": "Emmanuel Jeannot" }, { "ids": [ "39868021" ], "name": "Guillaume Mercier" }, { "ids": [ "31917146" ], "name": "Guillaume Papaur\u00e9" } ], "doi": "10.1007/978-3-319-64203-1_4", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_4", "entities": [], "id": "51491eb0e341731ef60141b0ecd82d97780ec264", "inCitations": [ "4ff196f7dea7bca99558c17aadd2249354ded70c" ], "journalName": "", "journalPages": "49-62", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_4" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51491eb0e341731ef60141b0ecd82d97780ec264", "sources": [ "DBLP" ], "title": "Online Dynamic Monitoring of MPI Communications", "venue": "Euro-Par", "year": 2017 }, "516fcfe0f79eef018395d9d046e5cce6de080d79": { "authors": [ { "ids": [ "39996718" ], "name": "Sibo Wang" }, { "ids": [ "35474778" ], "name": "Renchi Yang" }, { "ids": [ "33285410" ], "name": "Xiaokui Xiao" }, { "ids": [ "7391053" ], "name": "Zhewei Wei" }, { "ids": [ "3018083" ], "name": "Yin Yang" } ], "doi": "10.1145/3097983.3098072", "doiUrl": "https://doi.org/10.1145/3097983.3098072", "entities": [ "Algorithm", "Approximation algorithm", "Commodity computing", "Computation", "Computer performance", "Data center", "Experiment", "Heuristic", "Monte Carlo", "PageRank", "Personalization", "Portland Pattern Repository", "Server (computing)", "Social network", "TOP500", "Web search engine" ], "id": "516fcfe0f79eef018395d9d046e5cce6de080d79", "inCitations": [ "73916c66a9e409f1636935ed37699242f035a0f2" ], "journalName": "", "journalPages": "505-514", "journalVolume": "", "outCitations": [ "631e721376e844a016ffe18a8a9af3d75766f91c", "3e1e5a5edd5858d906b49363984a3e3659fb9478", "0ace72127a00b51623e44ec368121a8ef676410f", "1b348075d02cc532b1a01955e21ba3062e769113", "183c44d2b9ac64e8c795464f91ef98f1e3ba2ea3", "17e72ecd9f7ee25672a86b3867245cda4f84a627", "1606f7d2634e593d617ee67985fbdd9915bd7190", "0ad127e170514747c695e7cfc1fc88271c1e0634", "3105c03f6ee3135ac6b649ed6313ae0e6c0eb8fc", "17013e735af98f78fe52e2632941fb48e79c7b58", "fc73afbb4ec397eca35b382abbb1ff64f8ed12b1", "3ffc7a34be103b637a8745c00680fa584d74cdf4", "c581e212cd7679a279d5486027a5d5446a65a536", "2a622720d4021259a6f6d3c6298559d1b56e7e62", "51ea20dc4f688af41f9840a854d15bac49db1be6", "2e9755294bfcebbe2d6bbdc7937cf76f25d605fc", "e5b0de5111126bda74a0b5869ea082588e64094d", "8e0699c71c80d1f25e5eb46af03e83e7db340809", "1a3657d9dfdcd326ed776c1c4eeadd3710dc1fda", "9fdfb79b25450f42434d0baf39362052e1192acd", "eb82d3035849cd23578096462ba419b53198a556", "62ec67db3c5f7758d29573759a664c0212728f81", "29efbdf3f95cee97405accafdebd3bd374f1f003" ], "paperAbstract": "Given a graph G, a source node s and a target node t, the personalized PageRank (PPR) of t with respect to s is the probability that a random walk starting from s terminates at t. A single-source PPR (SSPPR) query enumerates all nodes in G, and returns the top-k nodes with the highest PPR values with respect to a given source node s. SSPPR has important applications in web search and social networks, e.g., in Twitter's Who-To-Follow recommendation service. However, SSPPR computation is immensely expensive, and at the same time resistant to indexing and materialization. So far, existing solutions either use heuristics, which do not guarantee result quality, or rely on the strong computing power of modern data centers, which is costly.\n Motivated by this, we propose FORA, a simple and effective index-based solution for approximate SSPPR processing, with rigorous guarantees on result quality. The basic idea of FORA is to combine two existing methods Forward Push (which is fast but does not guarantee quality) and Monte Carlo Random Walk (accurate but slow) in a simple and yet non-trivial way, leading to an algorithm that is both fast and accurate. Further, FORA includes a simple and effective indexing scheme, as well as a module for top-k selection with high pruning power. Extensive experiments demonstrate that FORA is orders of magnitude more efficient than its main competitors. Notably, on a billion-edge Twitter dataset, FORA answers a top-500 approximate SSPPR query within 5 seconds, using a single commodity server.", "pdfUrls": [ "http://shichuan.org/hin/topic/Ranking/2017.%20KDD2017%20FORA%20Simple%20and%20Effective%20Approximate%20Single-Source%20Personalized%20pagerank.pdf", "http://doi.acm.org/10.1145/3097983.3098072" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/516fcfe0f79eef018395d9d046e5cce6de080d79", "sources": [ "DBLP" ], "title": "FORA: Simple and Effective Approximate Single-Source Personalized PageRank", "venue": "KDD", "year": 2017 }, "517a01774f760d08c73bd5de2a561c98fd5814dc": { "authors": [ { "ids": [ "1837948" ], "name": "Ugljesa Milic" }, { "ids": [ "2338598" ], "name": "Oreste Villa" }, { "ids": [ "2256123" ], "name": "Evgeny Bolotin" }, { "ids": [ "37763788" ], "name": "Akhil Arunkumar" }, { "ids": [ "3149281" ], "name": "Eiman Ebrahimi" }, { "ids": [ "1684691" ], "name": "Aamer Jaleel" }, { "ids": [ "3094183" ], "name": "Alex Ram\u00edrez" }, { "ids": [ "2899855" ], "name": "David W. Nellans" } ], "doi": "10.1145/3123939.3124534", "doiUrl": "https://doi.org/10.1145/3123939.3124534", "entities": [ "Data parallelism", "Graphics processing unit", "Memory hierarchy", "Moore's law", "Non-uniform memory access", "Parallel computing", "Performance per watt", "Programming model", "Scalability", "Scheduling (computing)", "Single instruction, multiple threads", "Throughput", "Transistor", "Uniform memory access" ], "id": "517a01774f760d08c73bd5de2a561c98fd5814dc", "inCitations": [], "journalName": "", "journalPages": "123-135", "journalVolume": "", "outCitations": [ "512392937104faea422de3a9eb0d3fbc53848f2c", "46690cdff60ef7f35c2c19d6eaac89964a6b4f79", "8400d290d55005839b678a95f4f18ecdce76dbe1", "b298696bb75c3eec7a64746eacfa8fb262b38be6", "70c4ef7c1aad74d0fbe362ce4260e94f99fc4aee", "d52b40ed62b865ee455b0fd7741e83af9353ee3c", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "a213b244778e310bc4b27cbd021f964258b4c7a7", "061356704ec86334dbbc073985375fe13cd39088", "6c86a995c3454d888713e66948c0d09b1451f0c2", "081dec43c2dbe76ff43c810594495f11ab092a10", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "64ca550147ced62584f24cfb3003157c7b7086fe", "5d279a21f65eef2bf5027d0cf1e56f2d740b314e", "014adf8bdadaec12dd6317c8cf31b645228f198e", "438df624981925e6ecfb729572a02bfeacf5b073", "0d5ec0f90b9d07ebc48f4e00b2e583e5d49130dc", "804df33cbda438274e1ae2d6d9e7609238a8bb27", "a24c68f1d034ae19ea2cfcfdccdf189118cd70f9", "18e9cd28be46edec0f3ecd39b78b8b7434db85d6", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "68c3d3fca5c7bd172832f480a92e98d106da5c34", "3d50c803cc715e51d263f5a42b06858be9466c0f", "b764074aa13491173aa0882aae13565e7aa071b2", "230fdde877c43710a5cee0bb26dcd17eb1e24e7a", "1b91fcb25a395a12e7b6bc49473f223ad47f869f", "399968ac10586b66252d4f7bfba9609612f9fbb3", "3efab8db0d6e024bb9cfca5f6bd0cb63a8d9f162" ], "paperAbstract": "GPUs achieve high throughput and power efficiency by employing many small single instruction multiple thread (SIMT) cores. To minimize scheduling logic and performance variance they utilize a uniform memory system and leverage strong data parallelism exposed via the programming model. With Moore's law slowing, for GPUs to continue scaling performance (which largely depends on SIMT core count) they are likely to embrace multi-socket designs where transistors are more readily available. However when moving to such designs, maintaining the illusion of a uniform memory system is increasingly difficult. In this work we investigate multi-socket non-uniform memory access (NUMA) GPU designs and show that significant changes are needed to both the GPU interconnect and cache architectures to achieve performance scalability. We show that application phase effects can be exploited allowing GPU sockets to dynamically optimize their individual interconnect and cache policies, minimizing the impact of NUMA effects. Our NUMA-aware GPU outperforms a single GPU by 1.5×, 2.3×, and 3.2× while achieving 89%, 84%, and 76% of theoretical application scalability in 2, 4, and 8 sockets designs respectively. Implementable today, NUMA-aware multi-socket GPUs may be a promising candidate for scaling GPU performance beyond a single socket.", "pdfUrls": [ "http://hps.ece.utexas.edu/people/ebrahimi/pub/milic_micro17.pdf", "http://doi.acm.org/10.1145/3123939.3124534" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/517a01774f760d08c73bd5de2a561c98fd5814dc", "sources": [ "DBLP" ], "title": "Beyond the socket: NUMA-aware GPUs", "venue": "MICRO", "year": 2017 }, "51871d01c26acb651c81adaf073c32c3d9ec0f0b": { "authors": [ { "ids": [ "2085723" ], "name": "Yiping Kang" }, { "ids": [ "2546385" ], "name": "Johann Hauswald" }, { "ids": [ "2443277" ], "name": "Cao Gao" }, { "ids": [ "1860422" ], "name": "Austin Rovinski" }, { "ids": [ "1751516" ], "name": "Trevor N. Mudge" }, { "ids": [ "3348715" ], "name": "Jason Mars" }, { "ids": [ "2235128" ], "name": "Lingjia Tang" } ], "doi": "10.1145/3037697.3037698", "doiUrl": "https://doi.org/10.1145/3037697.3037698", "entities": [ "Algorithm", "Artificial neural network", "Cloud computing", "Computation", "Computational resource", "Computer vision", "Cortana (Halo)", "Data center", "End-to-end principle", "Google Now", "Machine learning", "Mobile device", "Natural language", "Profiling (information science)", "Scheduling (computing)", "Server (computing)", "Siri", "Throughput" ], "id": "51871d01c26acb651c81adaf073c32c3d9ec0f0b", "inCitations": [ "a5bdabd7ba2b08005cf20049d0f8ec6122e72635", "2c188c0ecd59da5bdcee384a1cc3471078aa8b6a", "a69c07779a23c50e4d51dda92c2eeeaf1c0ab347", "651297aaeece71f3d48c70455ed37f11ed512661", "dfe7ac9c2dbbe70c38846f6e2970bf644875b91b", "b2f5c5e52d8c8f6095c0e2240456449ee2fccd05", "921d43742fb5c7f161b889e30bd737d632c044c6", "4636d53cc1548f2cd7a185c8ae5fe2320b0502da", "e257102fc76d1ef59bcfb8ae24472b68653bfd3f", "178738930dc750ef8cf70f1dc7fbab6edca0d184", "33b61e78782777ba4885373f4fcb5e1d858f94db", "94ac3008bf6be6be6b0f5140a0bea738d4c75579" ], "journalName": "", "journalPages": "615-629", "journalVolume": "", "outCitations": [ "092a1cf971fb8359d3293004c6f1de82f05f3afb", "68837728232463651283edbb7ef0c93b2f502b2b", "2538e3eb24d26f31482c479d95d2e26c0e79b990", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "66f0fa52c557f232a902267d985b9c4b4aa8a85e", "b04c9e851ae605592d693aa65f0d753b8af08feb", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "162d958ff885f1462aeda91cd72582323fd6a1f4", "14ce7635ff18318e7094417d0f92acbec6669f1c", "362d884ff43d8c7cd6bce184944cfc04cdd57c18", "0c7d7b4c546e38a4097a97bf1d16a60012916758", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "3a33d36257a40d180bef5385c8586fb618fc1161", "209932cd2e3f5da071c4f6341a3b8b29cf50cc4a", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "0623414994c29a74c06eeea0a145e9d2e72e987a", "405544638e4a7b3d944ba4596066d09bffa06f45", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "00aaa84be127c04a21b5f5f8dc5d2426921654e2", "ba46ae9b310e5c2a3f31fedc308ffcf4ebd6ab06", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "02b141ddc423469afde9c99cf76028095ef28127", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "2ea6e3243c9aa5d9910cf44c4f0e18002bf01638", "287634bfbcc597ce27632f4045c8a5a563d2a086", "061356704ec86334dbbc073985375fe13cd39088", "1a4f15385f40d8ae503a29c4d70c5a908cf492d8", "38ffe43d59e6db4edc108606a56ae9b76abaa7b5", "8d2fb424ccd5ae011dd444ac4fa8282bad9e76ab", "036277d492dd5777e87e5b33ffd809e5c617a37a", "18e9a7eea9c714c24152b9c6dd5cd12fb2c4b495", "1c6ce41169382c27ffd3f856305561ba68cd59b2", "0ca45602bbf7a48e5d43e2ec11371df2801bc5b8", "13b4c25333158f630025b2b2db72efa102f9cf46", "7a978f2902460e732c50c36a171deb11733df1fc" ], "paperAbstract": "The computation for today's intelligent personal assistants such as Apple Siri, Google Now, and Microsoft Cortana, is performed in the cloud. This cloud-only approach requires significant amounts of data to be sent to the cloud over the wireless network and puts significant computational pressure on the datacenter. However, as the computational resources in mobile devices become more powerful and energy efficient, questions arise as to whether this cloud-only processing is desirable moving forward, and what are the implications of pushing some or all of this compute to the mobile devices on the edge.\n In this paper, we examine the status quo approach of cloud-only processing and investigate computation partitioning strategies that effectively leverage both the cycles in the cloud and on the mobile device to achieve low latency, low energy consumption, and high datacenter throughput for this class of intelligent applications. Our study uses 8 intelligent applications spanning computer vision, speech, and natural language domains, all employing state-of-the-art Deep Neural Networks (DNNs) as the core machine learning technique. We find that given the characteristics of DNN algorithms, a fine-grained, layer-level computation partitioning strategy based on the data and computation variations of each layer within a DNN has significant latency and energy advantages over the status quo approach.\n Using this insight, we design Neurosurgeon, a lightweight scheduler to automatically partition DNN computation between mobile devices and datacenters at the granularity of neural network layers. Neurosurgeon does not require per-application profiling. It adapts to various DNN architectures, hardware platforms, wireless networks, and server load levels, intelligently partitioning computation for best latency or best mobile energy. We evaluate Neurosurgeon on a state-of-the-art mobile development platform and show that it improves end-to-end latency by 3.1X on average and up to 40.7X, reduces mobile energy consumption by 59.5% on average and up to 94.7%, and improves datacenter throughput by 1.5X on average and up to 6.7X.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037698", "http://web.eecs.umich.edu/~jahausw/publications/kang2017neurosurgeon.pdf", "http://web.eecs.umich.edu/~ypkang/downloads/kang17neurosurgeon.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51871d01c26acb651c81adaf073c32c3d9ec0f0b", "sources": [ "DBLP" ], "title": "Neurosurgeon: Collaborative Intelligence Between the Cloud and Mobile Edge", "venue": "ASPLOS", "year": 2017 }, "51ac7123db308803413c9c408a377dcd4bc19bee": { "authors": [ { "ids": [ "2607427" ], "name": "Ivan Walulya" }, { "ids": [ "1701362" ], "name": "Philippas Tsigas" } ], "doi": "10.1109/IPDPS.2017.73", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.73", "entities": [ "Blocking (computing)", "Computer data storage", "Data structure", "Insertion sort", "Library (computing)", "Non-blocking algorithm", "Random access", "Server (computing)", "Thread (computing)", "Time complexity" ], "id": "51ac7123db308803413c9c408a377dcd4bc19bee", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "917-926", "journalVolume": "", "outCitations": [ "045a975c1753724b3a0780673ee92b37b9827be6", "6ed3f7f2cb4acc63dc71312afd931d899e24236a", "14f2ab7b89c9f508f9e886e4fd5bb702c867a190", "135772775121ba60b47b9f2f012e682fe4128761", "05c512b4ef4bb2209a302e4bb655c8a1c4cc6716", "415e5008232116e6869caf29c349a2dfe390264e", "042f443418ff2ff98a1dccbf49df9fa258dab707", "57eaf0036c74895a5e965915c6544041623719e0", "942f2a6df29234c304b69129872835d60cf5e9e9", "6261748cf3c225c89ccaeca15349bec7e5eaca4d", "1d1c68d07c4738e321a3db24fede081e95baff2c", "4e3304e77dd2fecea4086e132981d1470434cf65", "4a418603a5820524987bf82085dcc162fb7f9f2c", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "ff5e8b9972294f600b4de1a0fbb85df3a5b1bc31", "363b85f61630ebdc1194a59816ad950bf305c40a", "42142c121b2dbe48d55e81c2ce198a5639645030", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "6808efa6321651d80881e5718b488817100f8b61", "0e422bd90c8be636358d4eb75f05276b361d19d4", "2e6177199748bf6cec5b80c87c3bf2816706f1f0", "30df50d77ef9478a2848626dfe3bf65f3c991991", "1cb0679ae82be093268747da0f634281ea6a41df" ], "paperAbstract": "Dynamic vectors are among the most commonly used data structures in programming. They provide constant time random access and resizable data storage. Additionally, they provide constant time insertion (pushback) and deletion (popback) at the end of the sequence. However, in a multithreaded system, concurrent pushback and popback operations attempt to update the same shared object, creating a synchronization bottleneck. In this paper, we present a lock-free vector design that efficiently addresses the synchronization bottlenecks by utilizing a combining technique on pushback operations. Typical combining techniques come with the price of blocking. Our design introduces combining without sacrificing lock-freedom. We evaluate the performance of our design on a dual socket NUMA Intel server. The results show that our design performs comparably at low loads, and out-performs prior concurrent blocking and non-blocking vector implementations at high contention, by as much as 2.7x.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.73" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51ac7123db308803413c9c408a377dcd4bc19bee", "sources": [ "DBLP" ], "title": "Scalable Lock-Free Vector with Combining", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "51d28dd3066aad5cb465b4277d66f4ec432bfb1d": { "authors": [ { "ids": [ "32008801" ], "name": "Ridwan Rashid Noel" }, { "ids": [ "34718613" ], "name": "Palden Lama" } ], "doi": "10.1109/CLOUD.2017.15", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.15", "entities": [ "Benchmark (computing)", "Cloud storage", "Computer data storage", "Data scrubbing", "Distributed object", "Dynamic Language Runtime", "Interference (communication)", "Load balancing (computing)", "Multitenancy", "Object storage", "Testbed", "Throughput" ], "id": "51d28dd3066aad5cb465b4277d66f4ec432bfb1d", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "42-49", "journalVolume": "", "outCitations": [ "2b360e1d6cf91e52c2f496b63289ce4f441b6ac5", "84665515d4dd5946743684e2f7236babb0901d10", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "7c02eff1b79a78639747d250532651d4c92089d0", "5c5f8717f31b7e4334b450df15442223f988ff41", "8b1d8d46836a6d5eb4355315b64d85c128cbff27", "134021cfb9f082f4e8b58f31bcbb41eb990ab874", "09f0751d7452cd0480d572171593d07996325fcb", "3c5cc0f17dc2f956dbc278f24433f57affe49dce", "e5134de3c15b8a8a7c40371ca5c9a8a306ec375b", "3970d838c5b0e1dc3f03ab3930ed01c81c692cfe", "3e257f01e3ee71545d824a1615c35659525b856a", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "443b8c56d7300f61b825d1dbafe06afdda23c3e1", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "9fb19a43d7ef6b1ac51a5d9dd45be379673a17ef", "3bf64462fc3558ab7e9329d084a1af4cf0c87ebf", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "9072c624116418f47040e3042b803210c001356b", "2da760f90c3d2bf6598becdde9063093f488548c" ], "paperAbstract": "Cloud storage services are associated with high latency variance, and degraded throughput which is problematic when users are fetching and storing content for interactive applications. This can be attributed to performance hotspots created by slow nodes in a storage cluster, and performance interference caused by multi-tenancy, and background tasks such as data scrubbing, backfilling, recovery, etc. In this paper, we present DLR, a system that improves the performance of cloud storage services in the presence of hardware heterogeneity, and performance interference through a dynamic load redistribution technique. We designed DLR to dynamically adjust the load serving ratio of storage servers based on the system-level performance measurements from the storage cluster. We implemented DLR using Ceph, a popular distributed object storage system, and evaluated its performance on NSFCloud's Chameleon testbed using Ceph's Rados benchmark. Experimental results show that DLR improves the average throughput and latency of Ceph storage by up to 65%, and 41% respectively compared to the default case. Compared to Ceph's in-built load balancing technique, DLR improves the throughput by up to 98%, and latency by 96%.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/51d28dd3066aad5cb465b4277d66f4ec432bfb1d", "sources": [ "DBLP" ], "title": "Taming Performance Hotspots in Cloud Storage with Dynamic Load Redistribution", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "52000adb93dc05f862c4fa83dd2ec99947d33f60": { "authors": [ { "ids": [ "3142083" ], "name": "Thang Hoang" }, { "ids": [ "26974903" ], "name": "Ceyhun D. Ozkaptan" }, { "ids": [ "40645384" ], "name": "Attila A. Yavuz" }, { "ids": [ "1721572" ], "name": "Jorge Guajardo" }, { "ids": [ "1997544" ], "name": "Tam Nguyen" } ], "doi": "10.1145/3133956.3134090", "doiUrl": "https://doi.org/10.1145/3133956.3134090", "entities": [ "Bandwidth (signal processing)", "Client\u2013server model", "Computation", "End-to-end encryption", "Experiment", "Random access", "Random-access machine", "Secret sharing", "Server (computing)", "Shamir's Secret Sharing" ], "id": "52000adb93dc05f862c4fa83dd2ec99947d33f60", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "819", "journalVolume": "2017", "outCitations": [ "6d714dad0d2903350cd5153c59a2ac4f68672232", "9543618825efde94e081aa4820f4852fc973963d", "1cf87af22b3b4dd0ff1144d861e0573121d8de2e", "1e6dda18549f74155ca3bc6d144f439108aa5474", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "8f8de213b1318e0ef0914008010e87ab64ab94ff", "0dc1ad645908cec65ea64e138df69ed07d34cedb", "abdfbb5e6d1ac351113999ca21e246a07a0791c9", "581b0e29991ffd8396e2d91b9c53ad483e72d9b8", "9c2b3acdba197404bbd0c524f3fbd61ef15ea26f", "07e8bc0b0aaf8b919442f7c642eb6eda5b379b88", "d9bbeb390bdf8f4cefe1c21e50487f10ec6612e4", "10d5282a8d25c4490338d5fb8ad2f57b8646ad38", "13592e2d275975783125b4f285cf5bb329a98f33", "a98f1d700c715c5f5d405409eb5c28dfd806040c", "68002186998b9c4073b69b2b5e25c6183ac524cc", "2261da4f4c76139b149a76df48d34e432eb45f62", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "20b63210954f7c5a70664f301dcd7196856ccfa7", "91556bd4e11dfa2590a628f2f88e590015827d78", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "28b5bbfb9f1b4738d6ed567d62fdfe52be9cb11e", "74df90c73883c7192ec36def60ac560e37b8f1c7", "443b4d6800a959c306faae6bb4426110bc49f7cf", "022d421adcba40d8a4e0051417bab919ec5405cd", "32306b97c9ea92968952911e79c0e01db6069b8a", "333a5b74cbaf912b40c5d056ac232f7e3b6473d4", "23c5ee0eeb08464ed0f86dcd9282d169699e2966", "01ca4dd53f226dff9da314cc35d2fa6ee1979e57", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "6f6e16de3b99c67e9fcdf7a98f283880159ba590", "7e6294370a5dbc1e9e8610421c9de13ee63d135d", "31445e3bd3672ed743c4a089cc0db4f23357f0f2", "1d2063d8110a2c239762c103661d87ef20857972", "09598c6fa85bb64b22816cfaef54e682cb3f3a6a", "038d0a508c02512aa1107d303cc7ccdb8c245b05", "a94205aed0148ae6d00986aef009e5e05d046f43", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "1cef17420fe9a74a504b1d3250eaae3abe8b7595", "475b10209d1ed13b079d62aca57ec31da4284bcd", "35a24265019c8c0c08a24e4f1865e71938f5ea86", "bcb49a06e4fb7ea831257e146073d84234f4d238" ], "paperAbstract": "Oblivious Random Access Machine (ORAM) enables a client to access her data without leaking her access patterns. Existing client-efficient ORAMs either achieve O(log N) client-server communication blowup without heavy computation, or O(1) blowup but with expensive homomorphic encryptions. It has been shown that O(log N) bandwidth blowup might not be practical for certain applications, while schemes with O(1) communication blowup incur even more delay due to costly homomorphic operations.\n In this paper, we propose a new distributed ORAM scheme referred to as Shamir Secret Sharing ORAM (S3ORAM), which achieves O(1) client-server bandwidth blowup and O(1) blocks of client storage without relying on costly partial homomorphic encryptions. S3ORAM harnesses Shamir Secret Sharing, tree-based ORAM structure and a secure multi-party multiplication protocol to eliminate costly homomorphic operations and, therefore, achieves O(1) client-server bandwidth blowup with a high computational efficiency. We conducted comprehensive experiments to assess the performance of S3ORAM and its counterparts on actual cloud environments, and showed that S3ORAM achieves three orders of magnitude lower end-to-end delay compared to alternatives with O(1) client communication blowup (Onion-ORAM), while it is one order of magnitude faster than Path-ORAM for a network with a moderate bandwidth quality. We have released the implementation of S3ORAM for further improvement and adaptation.", "pdfUrls": [ "https://acmccs.github.io/papers/p491-hoangA.pdf", "http://eprint.iacr.org/2017/819", "http://web.engr.oregonstate.edu/~yavuza/Hoang_SS3ORAM_CCS_762.pdf", "http://doi.acm.org/10.1145/3133956.3134090" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52000adb93dc05f862c4fa83dd2ec99947d33f60", "sources": [ "DBLP" ], "title": "S3ORAM: A Computation-Efficient and Constant Client Bandwidth Blowup ORAM with Shamir Secret Sharing", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "521d5ab281d7c03c33ae367b39e025165ecbeece": { "authors": [ { "ids": [ "3384221" ], "name": "Noam Shalev" }, { "ids": [ "1777373" ], "name": "Idit Keidar" }, { "ids": [ "2618481" ], "name": "Yaron Weinsberg" }, { "ids": [ "2888002" ], "name": "Yosef Moatti" }, { "ids": [ "12912737" ], "name": "Elad Ben-Yehuda" } ], "doi": "10.1145/3132747.3132752", "doiUrl": "https://doi.org/10.1145/3132747.3132752", "entities": [ "Anomaly detection", "IBM Research", "Information security", "LXC", "Linux", "Outsourcing", "Principle of least privilege", "Snowden", "Superuser", "System administrator" ], "id": "521d5ab281d7c03c33ae367b39e025165ecbeece", "inCitations": [], "journalName": "", "journalPages": "515-530", "journalVolume": "", "outCitations": [ "9e694bd631811b89f645e8fd8ff8ef86a7743c4e", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "0bad381b84f48b28abc1a98f05993c8eb5be747d", "3d5c662340eb63a5107ff6168e7bfcc1a1ad58af", "1c8d06510ad449ad24fbdd164f8008cc730cab47", "ac97e091bf6f70e840fe9c8a893b281ab020f156", "7c5dc4de32a0d833eb87ae56fb24f9cb35f68fa9", "b7e56f71a8269558891af66d957da5ac45e29e37", "b12cc33111511365d4fe39a54ceb3c74755821be", "a659692ed5031c5410668d644a60d7806bb2efbb", "1e6a0b65e6e7f51d42dcbb2250bc47115bc4093c", "2d8c81c9802a46d7958d0ec7714c8c967a44cdeb", "97985288557b9c295c4f47528ee961ea5a1d36b2", "0c413df9e3dacda5088344406d83bccc3ff3c745", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "61f18efffce84bf387ee5e431049b5e587a6ebbf", "01f3290d6f3dee5978a53d9d2362f44daebc4008", "240c82f1d824fabe76c343c988534fce9a10e8b1", "2515f90e54bbc8d3be351de0d75e59ca52abaad4", "3135eb4b37aa487dd5f06dfa178bbc1d874f3cdf" ], "paperAbstract": "System administrators have unlimited access to system resources. As the Snowden case highlighted, these permissions can be exploited to steal valuable personal, classified, or commercial data. This problem is exacerbated when a third party administers the system. For example, a bank outsourcing its IT would not want to allow administrators access to the actual data. We propose WatchIT: a strategy that constrains IT personnel's view of the system and monitors their actions. To this end, we introduce the abstraction of perforated containers -- while regular Linux containers are too restrictive to be used by system administrators, by \"punching holes\" in them, we strike a balance between information security and required administrative needs. Following the principle of least privilege, our system predicts which system resources should be accessible for handling each IT issue, creates a perforated container with the corresponding isolation, and deploys it as needed for fixing the problem.\n Under this approach, the system administrator retains superuser privileges, however only within the perforated container limits. We further provide means for the administrator to bypass the isolation, but such operations are monitored and logged for later analysis and anomaly detection.\n We provide a proof-of-concept implementation of our strategy, which includes software for deploying perforated containers, monitoring mechanisms, and changes to the Linux kernel. Finally, we present a case study conducted on the IT database of IBM Research in Israel, showing that our approach is feasible.", "pdfUrls": [ "http://webee.technion.ac.il/~idish/ftp/WatchIT-SOSP2017.pdf", "http://dl.acm.org/citation.cfm?id=2995968", "http://doi.acm.org/10.1145/3132747.3132752" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/521d5ab281d7c03c33ae367b39e025165ecbeece", "sources": [ "DBLP" ], "title": "WatchIT: Who Watches Your IT Guy?", "venue": "SOSP", "year": 2016 }, "522082c8171783aa66051b4dd9559ea5006e6b1e": { "authors": [ { "ids": [ "3051271" ], "name": "Zijun Zhang" }, { "ids": [ "1744839" ], "name": "Zongpeng Li" }, { "ids": [ "1726963" ], "name": "Chuan Wu" } ], "doi": "10.1145/3084460", "doiUrl": "https://doi.org/10.1145/3084460", "entities": [ "Best, worst and average case", "Cloud computing", "Competitive analysis (online algorithm)" ], "id": "522082c8171783aa66051b4dd9559ea5006e6b1e", "inCitations": [ "50bffc27c025cff41980444957d6dd46b171caef", "4cec5b24eaa7f21df2e4a6b5fd7c299775ceffdf" ], "journalName": "POMACS", "journalPages": "23:1-23:26", "journalVolume": "1", "outCitations": [ "eb796c0959c2a439eef1891fd98dafc8f2f266b2", "5ccb5e61b33eb276edad6069b4290d56b59c81f9", "ee1a9ad2851b27529819db6ab214fdb256c09da3", "34c5a04f1c96a0e35786b0e76866cabf85fb4611", "0e73d917645d0ce61aaa2bc1c878a8966ca8e600", "0e6f878d98c43af4963abb79c4c93026a0d44040", "c62f65abd0edfd60a2c01895297856b45429d552", "0a07a56ed17c6541e490df16f6381073494d0058", "1e62a5ec2a9795b4d9be2e6315b72e97a0408714", "9f38d165507be76a6d1e3e9f92bc42c6f6fa0526", "b5c54459955017bbcf6d56f9336d5edbd3dd2a73", "0b8a5b24d0d05dfc0715fcc249ae907f91a0c1fe", "6590b45357839465a64748176c6e1528fd3f11cf", "1210fa2705369b575a05f6a846b68354c2c0959a", "31abcf70a3a118269d4b5707a7f06b0ef8cdaab9", "c6bdabfa9e946a8db4de12b8479dad6bfa28ae70", "54c0ecfbda3b5de0bc9989c1ec6b7ae16cbc8be5", "99d5ecff13848e469e68b66d6a58fa52a0a44462", "210295b200356acb57b144963116e4bf3e48bf17", "f885482b6de7eaacbc3cbbd322232b7f95a55621", "f8832d3ea93e5177554adc39f1b67195485451cc", "0051b3f0c9224a77aa2be024d18f464a4f38f540", "3e4d6efb58d2e42b4392f6e059df716c7ed25db3", "05be0db01d70bcce9530b462ab2368f9e15127d9", "42320982bd34adcf0584dac96f1be87a04858068", "5df53a74c903ea5d578f4f1d86d106a0c7c96403" ], "paperAbstract": "We study online resource allocation in a cloud computing platform through posted pricing: The cloud provider publishes a unit price for each resource type, which may vary over time; upon arrival at the cloud system, a cloud user either takes the current prices, renting resources to execute its job, or refuses the prices without running its job there. We design pricing functions based on current resource utilization ratios, in a wide array of demand-supply relationships and resource occupation durations, and prove worst-case competitive ratios in social welfare. In the basic case of a single-type, non-recycled resource (allocated resources are not later released for reuse), we prove that our pricing function design is optimal, in that it achieves the smallest competitive ratio among all possible pricing functions. Insights obtained from the basic case are then used to generalize the pricing functions to more realistic cloud systems with multiple types of resources, where a job occupies allocated resources for a number of time slots till completion, upon which time the resources are returned to the cloud resource pool.", "pdfUrls": [ "https://arxiv.org/pdf/1704.05511v1.pdf", "http://doi.acm.org/10.1145/3078505.3078529", "http://arxiv.org/abs/1704.05511", "http://doi.acm.org/10.1145/3084460" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/522082c8171783aa66051b4dd9559ea5006e6b1e", "sources": [ "DBLP" ], "title": "Optimal Posted Prices for Online Cloud Resource Allocation", "venue": "SIGMETRICS", "year": 2017 }, "52389dc43817c8b3b9bf55ed6036457b5f36c705": { "authors": [ { "ids": [ "1757079" ], "name": "Zvika Guz" }, { "ids": [ "2797168" ], "name": "Harry Li" }, { "ids": [ "1786575" ], "name": "Anahita Shayesteh" }, { "ids": [ "34866701" ], "name": "Vijay Balakrishnan" } ], "doi": "10.1145/3078468.3078483", "doiUrl": "https://doi.org/10.1145/3078468.3078483", "entities": [ "Adobe Flash", "Central processing unit", "Hard disk drive", "ISCSI", "Kinetic Void", "PCI Express", "Scalability", "Solid-state drive", "Throughput" ], "id": "52389dc43817c8b3b9bf55ed6036457b5f36c705", "inCitations": [], "journalName": "", "journalPages": "16:1-16:9", "journalVolume": "", "outCitations": [ "ee05b94d8af5113b3e4d51e957ca66fb7e6aea35", "028378b395dc2a11e8ccc3d994df228340fd9697", "029e03cd045b1fcda76e4c469eedfa0470c79624", "def29d202e537d026b8d3ed91655b540ef86cceb", "b4f974ea9f35c49999b80e09b9a279752da573ed", "1cc9ebeab21d668c8fb197a2498380e95c6a65fb", "1a0af07c26d30548f2bd40c769f3961547a78179", "b30b57a9aac8ddf10cc0f0f5c1b7103e5edf6a44", "08632fe2b934ed15d3499e7321282c81adc2c390", "048a09d7c8713dc2533c1e31ac3f224868293461", "35c2f7e0454adc0130c4279fce84a31701cebc67", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "9c854bd25e3d3f866f3929f1820667c4059b2a63", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "110c050c6c992d2b956f7b47d717810ac5c91bdc" ], "paperAbstract": "Storage disaggregation separates compute and storage to different nodes in order to allow for independent resource scaling and thus, better hardware resource utilization. While disaggregation of hard-drives storage is a common practice, NVMe-SSD (i.e., PCIe-based SSD) disaggregation is considered more challenging. This is because SSDs are significantly faster than hard drives, so the latency overheads (due to both network and CPU processing) as well as the extra compute cycles needed for the offloading stack become much more pronounced.\n In this work we characterize the overheads of NVMe-SSD disaggregation. We show that NVMe-over-Fabrics (NVMf) - a recently-released remote storage protocol specification - reduces the overheads of remote access to a bare minimum, thus greatly increasing the cost-efficiency of Flash disaggregation. Specifically, while recent work showed that SSD storage disaggregation via iSCSI degrades application-level throughput by 20%, we report on negligible performance degradation with NVMf - both when using stress-tests as well as with a more-realistic KV-store workload.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078483", "https://www.systor.org/2017/slides/NVMe-over-Fabrics_Performance_Characterization.pdf", "http://www.samsung.com/us/labs/pdfs/nvmf-disaggregation-preprint.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52389dc43817c8b3b9bf55ed6036457b5f36c705", "sources": [ "DBLP" ], "title": "NVMe-over-fabrics performance characterization and the path to low-overhead flash disaggregation", "venue": "SYSTOR", "year": 2017 }, "525cd0a3f46f3d7427a2282aec5c97e92d2f0308": { "authors": [ { "ids": [ "2184366" ], "name": "Lianghong Xu" }, { "ids": [ "1774210" ], "name": "Andrew Pavlo" }, { "ids": [ "1690586" ], "name": "Sudipta Sengupta" }, { "ids": [ "1707164" ], "name": "Gregory R. Ganger" } ], "doi": "10.1145/3035918.3035938", "doiUrl": "https://doi.org/10.1145/3035918.3035938", "entities": [ "Byte", "Data deduplication", "Database", "Delta encoding", "NoSQL", "Online and offline", "Throughput" ], "id": "525cd0a3f46f3d7427a2282aec5c97e92d2f0308", "inCitations": [], "journalName": "", "journalPages": "1355-1368", "journalVolume": "", "outCitations": [ "218dd67596de2ca4846ef8a3c8e86231f7d8e84c", "3437a7e23e3f97b58f4cf73e7e5b711131e6706c", "045943438dd45f25f0127d97ed9116b3b05914a7", "206d1ddfd889dd5b7b49177f15ea328bccac3ad2", "4c664c7015285ce14063204d0790dffbb7bbf46c", "f458bd30b0f27959b1147ea8afa08ec8fef94ad5", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "0133126c02871c8ff00ac42a13f74e756d076cb0", "a6b9d9ee907bbe398918364107747c6dac097590", "511461524439a32df5cfaa5b3230e7330b5b9153", "233960d0d4351869ba179f2aa7d89405977ab00e", "a1330395dda0d0174926f5152778ead7925983f7", "caa4063e08a1f7b0365b435c0b629fe319f4277f", "924276d95d1bdaf087beb0ccf699443b9bf855ec", "a040e419355efde5d32101658f0b12f62e49760f", "6bccf2ba321177023d0f1d83484ae81fba687d97", "b2ec74c72d99b755325dc470dec2949d69cd4d57", "207def18c67fa8024741b7ae3cdc655b57f2053f", "bb90aa0bd362d615e3598f52504d06b20125512d", "caa8b09412fc11c4963a744dae82d6d50beb8df7", "2c5b8766a1dae62b86ba38013253ab8673f6ec44", "6d5cfe7723c61149d9cf905fe173268075b8c976", "4913df252b4cb41a83d3f2376c8b1dbb1317b57d", "21318da2ea08c1f7b8c77701f67483882950df96", "a611f2c3b7a4b1ab6db8d4fbf6584edbe78e86ff", "511c5d949e6966e5c2f152f2312c6081fb97a614", "92a6961f076307d5b4778fd45d5f01f6d5d84e12", "0a27bcc0733a3fd4dd7e14b4796c854f73d9465d", "24fbb6a0817965f76812902e8b822030f6c5a6cb", "24cef48316f1e73414b50ab4f702414b2778d49b", "44607270754f8521d6c4d42297aa881393f4f8e0", "05d9391c61fd6b3ae92f852a7bb12257cc86800d", "488495c644d90c32ef7b58ec3e4ffc7b40f25b36", "088bd567f0fafc6db9310820629d75ee1dd5ea9c", "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "333617a8bd0d54f70b234afe385e59c660731135", "044a9cb24e2863c6bcaaf39b7a210fbb11b381e9", "6d1ca1108d9d96e5607571502552ad04464d7f15", "fb1c77d492576d52173a56730a4dc5991217db6f", "42e4fd620dab3d6b77ce2e0e19aeaa234c434c5b", "0c279813f1dba545c50c237f69b89c6496117015", "38a4d429f9e96ae37489e6b56b560ad24048b4b4", "481086af0ac174dc0416bc7daf33100fab5c649b", "898b60ae12a855ac9ad91f93543d82ce00ee76ff", "00f149a044b76834b69a05e4ed46cabb81e47e85", "55443845d561f05a864bcf8a2cb90af32bed89c3", "04f020a4ab2134db6f9e98eadf216d94d440414a" ], "paperAbstract": "dbDedup is a similarity-based deduplication scheme for on-line database management systems (DBMSs). Beyond block-level compression of individual database pages or operation log (oplog) messages, as used in today's DBMSs, dbDedup uses byte-level delta encoding of individual records within the database to achieve greater savings. dbDedup's single-pass encoding method can be integrated into the storage and logging components of a DBMS to provide two benefits: (1) reduced size of data stored on disk beyond what traditional compression schemes provide, and (2) reduced amount of data transmitted over the network for replication services. To evaluate our work, we implemented dbDedup in a distributed NoSQL DBMS and analyzed its properties using four real datasets. Our results show that dbDedup achieves up to 37x reduction in the storage size and replication traffic of the database on its own and up to 61x reduction when paired with the DBMS's block-level compression. dbDedup provides both benefits with negligible effect on DBMS throughput or client latency (average and tail).", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035938", "http://www.pdl.cmu.edu/PDL-FTP/Database/xu-sigmod17.pdf", "http://db.cs.cmu.edu/papers/2017/p1355-xu.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/525cd0a3f46f3d7427a2282aec5c97e92d2f0308", "sources": [ "DBLP" ], "title": "Online Deduplication for Databases", "venue": "SIGMOD Conference", "year": 2017 }, "526ea6abf01ce4af860db2c1a34ab4690dc32a51": { "authors": [ { "ids": [ "8939217" ], "name": "Hyeontaek Lim" }, { "ids": [ "1762920" ], "name": "Michael Kaminsky" }, { "ids": [ "34752743" ], "name": "David G. Andersen" } ], "doi": "10.1145/3035918.3064015", "doiUrl": "https://doi.org/10.1145/3035918.3064015", "entities": [ "Arnold tongue", "Concurrency (computer science)", "Concurrency control", "Database", "Database transaction", "Fastest", "Hekaton (database)", "IBM Tivoli Storage Productivity Center", "In-memory database", "Lock (computer science)", "Multi-core processor", "Multiversion concurrency control", "Online transaction processing", "Read-write memory", "Serializability", "Silo", "Silo (library)", "Throughput", "Transaction processing", "Transactions per second", "Two-phase commit protocol", "Two-phase locking", "YCSB" ], "id": "526ea6abf01ce4af860db2c1a34ab4690dc32a51", "inCitations": [ "612cce3e7c02c3c2b0d61528ba7c4791ba2dfaad", "3315a9dafa8d338b6f6cc7a237b986a7959f4ed4" ], "journalName": "", "journalPages": "21-35", "journalVolume": "", "outCitations": [ "1220e4a011c46804d4369b5580dc7fb6e387af54", "979caaeaf7f0285910b571745642e930224b248e", "09ecdb904eb7ae8a12d0c6c04ae531617a30eafa", "13f7c5807452ae602046582a385c0fb544ec5de1", "5121709bf42b13a93c70b45a456c82db92850a02", "5a36f17e0560750a956064ff06b63bcd57c6145f", "09ed565e84057123c15ab12b885c235d1f241aed", "4ea47f63c8b2a026a66566dd3f733d45e692d369", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "27611a1896feb8817eb9cebca344d9736916c3bb", "05885dbd3ccbbe744a2ee1c39126bd263140e741", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "83684cc2fddbe64f8902d1ee5d5112bf95eaeffe", "7227999dfa663a2a1e0e81ee450f360e1e308ff7", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "2e50af2320dab632d8046b6d4c130ae6cce8903f", "96d197be2253f5c853edce37b59c186915160ce0", "605277f87ee483cfd04f986780514c26160d2e87", "35f751e46799e3a91425267819f40dce273abec1", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "948c881ab7f1f62e9c940458e74c3e435320df72", "62f1ec11da850fa2ffab031757d226c1fa67ecb9", "861fbac82ae5ec0ea654d0d95ce4d48de62419ea", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "10eb9cfb2cea0d6a256e436becd8f0f5494dc5a0", "8665bf55084c825477cc9a6a64a0150a8d3850f7", "6a10c0016bd1d4ecd7cf6ce71ba574a82eeb6d72", "13875088254a585cd0b050f3bc27c1af9ada690f", "4e3304e77dd2fecea4086e132981d1470434cf65", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9", "aaf480b76674d99b95471f510a84b251ae3ecaeb", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "225603198cc415d363db8a8a2bd30b0df3c963b1", "0bddbe35fa6e3cf625d15553365a690d3a6bf7aa", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "3d4eabc660b637cf3b52845c9a0295416c6bd093", "2520cfc29a521f2333fda020d7ae41860f8dfebd", "1e87a3e68bc744fc7e62b04b7c9597171e147e81", "03416be8097852a54dd3e309434e5a0806824646", "0997037e940df06ed7a6d19f7501579aab01e829", "9aa0d7253574e50fe3a190ccd924433f048997dd", "3bfa50099269ef3ce832bc7f3710ba6484165092", "cc977141ecda4914987bb3d91b3d6aee603bef46", "4827cc74dba0c39172554cf0116eb111797f0d1b", "7129b305ce45f83127e928e8510da9fae0783905", "333fbb77ada815bcfef5e93d4358084b4af2d1ec", "6460e782a12649a478bbaeb9c149f59e206d9540", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "0ced2ecad932ec86aaa043f9b3ec0d9c6e88fbb5", "412a9e54bbb31e12d008a9579994e009c5b40b46", "56f6aec0132e56769e2036bbeff791dfa137d107" ], "paperAbstract": "Multi-core in-memory databases promise high-speed online transaction processing. However, the performance of individual designs suffers when the workload characteristics miss their small sweet spot of a desired contention level, read-write ratio, record size, processing rate, and so forth.\n Cicada is a single-node multi-core in-memory transactional database with serializability. To provide high performance under diverse workloads, Cicada reduces overhead and contention at several levels of the system by leveraging optimistic and multi-version concurrency control schemes and multiple loosely synchronized clocks while mitigating their drawbacks. On the TPC-C and YCSB benchmarks, Cicada outperforms Silo, TicToc, FOEDUS, MOCC, two-phase locking, Hekaton, and ERMIA in most scenarios, achieving up to 3X higher throughput than the next fastest design. It handles up to 2.07 M TPC-C transactions per second and 56.5 M YCSB transactions per second, and scans up to 356 M records per second on a single 28-core machine.", "pdfUrls": [ "http://www.cs.cmu.edu/~hl/papers/cicada-sigmod2017.pdf", "http://doi.acm.org/10.1145/3035918.3064015" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/526ea6abf01ce4af860db2c1a34ab4690dc32a51", "sources": [ "DBLP" ], "title": "Cicada: Dependably Fast Multi-Core In-Memory Transactions", "venue": "SIGMOD Conference", "year": 2017 }, "528bdbe171ca7ed4d0ec722a3fb773610e250788": { "authors": [ { "ids": [ "1943226" ], "name": "Manos Karpathiotakis" }, { "ids": [ "2327080" ], "name": "Avrilia Floratou" }, { "ids": [ "3023309" ], "name": "Fatma \u00d6zcan" }, { "ids": [ "1728318" ], "name": "Anastasia Ailamaki" } ], "doi": "10.1145/3127479.3131208", "doiUrl": "https://doi.org/10.1145/3127479.3131208", "entities": [ "Apache Hadoop", "Data architecture", "Database", "Ecosystem", "Experience", "Experiment", "Location awareness", "Machine learning", "Microsoft SQL Server", "NoSQL", "Page view", "Program optimization", "Real-time computing", "SQL", "Two-phase commit protocol" ], "id": "528bdbe171ca7ed4d0ec722a3fb773610e250788", "inCitations": [], "journalName": "", "journalPages": "108-120", "journalVolume": "", "outCitations": [ "e03194f26c3674485809b09aad6a33b71452bf63", "009523862551ecec0da53dfd0365892cb9cb430b", "18c021c9cce95ed5615a060f590b8388b604e7c5", "0d2c4723e9e5925cde74bd879611fda6f6e3980b", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "63131baa62769bfb176aaf9b603eab81eb44238e", "dd186d6826a0bc007fd02bafed6861f99b2f4ef1", "e3886987fa31e5e9388fbc76d9122213f200f4d3", "09cd2f3831285bee376adc2edf9aeceaa3ac9983", "0c0785107a75f82c4c730509af04c9831f2e5d6c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "6350f382e814d4b2f888f5a2a8bd6dd0e9362d81", "8492fac708619385bb7ac98893a43c00d928b059", "7663998b7e4a704f141c6b58eec74c4a43ed059c", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "28a9dca6faeead651539c700bef413203b2b876e", "a5cd59cf9f116cefc2627e4bba66e78a1ac4187b", "beaaf1cad62e5b3b9a6692935902ee2b3004d203", "7f7ea8be40125cc90f7813d88df6079d5ab42a89", "3425b2231e51e0c2542d9bc20ae582cd17738478", "a073767c85a53084b64639097ee26475f0ecbf2e", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "8e63fef89a31d3205442a27892050ed22eb7c3f6", "2fd6c2dd29141c7257e9b1f810823905aa968e10", "3e0c3a6a65757f59de1387ce2bd5c0366d62e4aa", "3fd5fdfd1a672a613de8a2b266676f577de9bcf1", "04e828ddaa05cccb1e5d380f4fe30b6aa36e6dfd", "db617237d4152ab1b3e9367f228e81dbcb673e87", "a8c50ddef01202c897b090868fcd44aa7bd8fdf6", "7a75c886b043e7c3f77829412774de27648f384a", "0e0a7476662aadb61c796f9cb7785754310a5585", "31fe8b18f70585f811938ab545efbf723140deab", "14348170a14b4e2edca01521184cb2cd60b83200", "1835e1b7a3a8a59a49edcd4e0144df0c5b73b812", "3654f3d8ef40bd8ec44dabb6f1b7c6766af3f641", "142de4622470a98017db345c669edc1f1f832574", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "e43eda8ac0b737690042fde856ec364e14a811a4", "72ec19ead4007e130786bde139c56c980f5466c5", "89bb85feef6886292d9f800c4ae069bcae140ea0", "33e1561a3c9c76fd20c1b6bfd73036769e2d7f67", "177cf8f87159793cbe467119e49a7b472bdf4113", "39271f5e2aa2ca5517671c86a148a625b53025f4" ], "paperAbstract": "The typical enterprise data architecture consists of several actively updated data sources (e.g., NoSQL systems, data warehouses), and a central data lake such as HDFS, in which all the data is periodically loaded through ETL processes. To simplify query processing, state-of-the-art data analysis approaches solely operate on top of the local, historical data in the data lake, and ignore the fresh tail end of data that resides in the original remote sources. However, as many business operations depend on real-time analytics, this approach is no longer viable. The alternative is hand-crafting the analysis task to explicitly consider the characteristics of the various data sources and identify optimization opportunities, rendering the overall analysis non-declarative and convoluted.\n Based on our experiences operating in data lake environments, we design System-PV, a real-time analytics system that masks the complexity of dealing with multiple data sources while offering minimal response times. System-PV extends Spark with a sophisticated data virtualization module that supports multiple applications - from SQL queries to machine learning. The module features a location-aware compiler that considers source complexity, and a two-phase optimizer that produces and refines the query plans, not only for SQL queries but for all other types of analysis as well. The experiments show that System-PV is often faster than Spark by more than an order of magnitude. In addition, the experiments show that the approach of accessing both the historical and the remote fresh data is viable, as it performs comparably to solely operating on top of the local, historical data.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131208", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/09/system-pv.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/528bdbe171ca7ed4d0ec722a3fb773610e250788", "sources": [ "DBLP" ], "title": "No data left behind: real-time insights from a complex data ecosystem", "venue": "SoCC", "year": 2017 }, "529d3d3fb82afe905c410e8a7b3fc9d09ca623c5": { "authors": [ { "ids": [ "1747521" ], "name": "Gregory J. Duck" }, { "ids": [ "1713932" ], "name": "Roland H. C. Yap" }, { "ids": [ "2189170" ], "name": "Lorenzo Cavallaro" } ], "doi": "", "doiUrl": "", "entities": [ "64-bit computing", "Aliasing", "Allocation", "Bounds checking", "Call stack", "Checking (action)", "Direct manipulation interface", "Disk mirroring", "Exception handling", "Experiment", "Instrument - device", "Instrumentation (attribute)", "Legacy code", "Low Back Pain", "Medical Device Incompatibility Problem", "Memory management", "Physical object", "Platelet Glycoprotein 4, human", "Pointer (computer programming)", "Pointer ", "Setjmp.h", "Stack-based memory allocation", "Thread (computing)", "Vulnerability (computing)" ], "id": "529d3d3fb82afe905c410e8a7b3fc9d09ca623c5", "inCitations": [ "73519ef57ae48827a27398659df04a08095aa701", "665e622409fa3669e9c42fce78f122b03f1b3688", "b49d8c0d1d6dea24b41b39b58cf276c2f078fa1c", "7cfebf75c82fdf08d21ea29751a39e6d2291b2ca", "629191336187398e43f1021bdcc6c293a72d1ca8", "46b19795bbda3b8c0c3e02896482fe61bb2943bc", "408ba239cece0308dbd180d86ee217d3c8d0b855" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6a8f65381a627a2db6c756a7185d9106f0acefec", "6a9744ec5ea1659d90f3dab0d6ce40f628432d1c", "0d0154d589205cc519607fbb142ecefe0f96aef0", "5007b598ed2c118bdf14c0a7562b6c4fb7974742", "635f3a25ca8626072d1eedc6aebddcb429de4b4e", "7e61bd6abdcb68ed9b3871311cabe09753de88ff", "0df445ca53975d93f27c9def03e964d3113a4607", "619a819b7d4235e5a6e03ba9c5f23051bac5d889", "9a8bf1a6e4e71f59620a53b0637c38a416966c4b", "d4914de7dbb5080d5c83004cab22df9100fb37d0", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "f7a969a9185f6712fba9d37deed60afcf6c39948", "0719b9670c8580db76547497df39caabdc20fc32", "7a9f655133788b2bd23c1171683f81b702b4b5b6", "4ced5380095c3f659a15e0f5b16061713f630c2e", "57f891b7213282bd58dc61230919fb531b0e4fde", "c2d68a3dd269d4a0d2dbe0314797ccb410589602", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "1fa355cabcaa6650603098c41a3a439fbed718a1", "8c65dca9c8700629b1daa8f8a45499daac5e4e82", "440273d503939d01cba669079dbf3addca045fea", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "12e2db1e82a84cff97e8dbf11619d135b340b49c" ], "paperAbstract": "Object bounds overflow errors are a common source of security vulnerabilities. In principle, bounds check instrumentation eliminates the problem, but this introduces high overheads and is further hampered by limited compatibility against un-instrumented code. On 64-bit systems, low-fat pointers are a recent scheme for implementing efficient and compatible bounds checking by transparently encoding meta information within the native pointer representation itself. However, low-fat pointers are traditionally used for heap objects only, where the allocator has sufficient control over object location necessary for the encoding. This is a problem for stack allocation, where there exist strong constraints regarding the location of stack objects that is apparently incompatible with the low-fat pointer approach. To address this problem, we present an extension of low-fat pointers to stack objects by using a collection of techniques, such as pointer mirroring and memory aliasing, thereby allowing stack objects to enjoy bounds error protection from instrumented code. Our extension is compatible with common special uses of the stack, such as alloca, setjmp and longjmp, exceptions, and multi-threading, which rely on direct manipulation of the stack pointer. Our experiments show that we successfully extend the advantages of the low-fat pointer encoding to stack objects. The end result is a competitive bounds checking instrumentation for the stack and heap with low memory and runtime overheads, and high compatibility with un-instrumented legacy code.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~gregory/papers/ndss17stack.pdf", "https://pure.royalholloway.ac.uk/portal/files/27687028/ndss2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/529d/3d3fb82afe905c410e8a7b3fc9d09ca623c5.pdf", "s2Url": "https://semanticscholar.org/paper/529d3d3fb82afe905c410e8a7b3fc9d09ca623c5", "sources": [], "title": "Stack Bounds Protection with Low Fat Pointers", "venue": "", "year": 2016 }, "52af6a06e325dbe03a0152f280872cece5db71df": { "authors": [ { "ids": [ "2470974" ], "name": "Dmitry Basin" }, { "ids": [ "1685240" ], "name": "Edward Bortnikov" }, { "ids": [ "3060519" ], "name": "Anastasia Braginsky" }, { "ids": [ "2902643" ], "name": "Guy Golan-Gueta" }, { "ids": [ "2829412" ], "name": "Eshcar Hillel" }, { "ids": [ "1777373" ], "name": "Idit Keidar" }, { "ids": [ "2719320" ], "name": "Moshe Sulamy" } ], "doi": "10.1145/3018743.3018761", "doiUrl": "https://doi.org/10.1145/3018743.3018761", "entities": [ "Attribute\u2013value pair", "Big data", "Data structure", "First-class function", "In-memory database", "Iterator", "Java", "Kinetic Void", "Map", "Memory management", "Non-blocking algorithm", "Range query (data structures)", "Skip list", "USB flash drive" ], "id": "52af6a06e325dbe03a0152f280872cece5db71df", "inCitations": [ "5f44b649b3411fd1e10b517f50f86a7dbd6e0302", "54aaa5ea1fd24aca55f11e529ef270dc6d1d44da", "31821287a7bab1bea94f5f1a4bf37c6799bc1606", "29e9f2e6931c61510b2e91f3f621a07ae1356049", "ca2c0c868209abb742e8978ff82ba1a6a90741b0", "82068c5d797abb6d8d29b3d256912aab72e6ee9b" ], "journalName": "", "journalPages": "357-369", "journalVolume": "", "outCitations": [ "202d6e1bc26a9ec2fe69c60ccc9b898c22425ed4", "094dd52d6f308e0dbd6f12a7d5722303e3ba668a", "f98062567adb3c98bfabeb99172f8bca026a0102", "18a5f443299784479e78d9e77f175af57cb2fa2b", "58da996efd7320d1e484263c97c930c8979c474f", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "38611b424808954be2c1375da1a873b1e2487ace", "478e5f62fd9a1cd9e2896b3c2d620089ebee7554", "042d24588b164e0c4c8180320250ccdb6eb06775", "50747217b6acfdb3e5f6cfeca5973519d0896670", "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "00ac447d02035c26c7e2852c2457fe812e89038f", "4714cd0edee43e12c281c4346f01fb57ee20fc0f", "314276e4c1909f5b7de9e34fd6219756f8526da5", "03416be8097852a54dd3e309434e5a0806824646", "5d153a55b6f12752afc11bb96d9d72a51c990dba", "78e47b768c784fcb15004bab48e24f80fdad579e", "4a418603a5820524987bf82085dcc162fb7f9f2c", "6061ecaae106d797d54ffda8e15223d2e7ed4ac8", "0030fe0d54b29fbbecb2d232fd76d5bf623d7a73", "2b300024ac736f7181f6d35392ec3a65f49457bd", "500adfb955f443c9fb0b8a44a5a03887fa4e9729", "89d2464a327bbac1b2de669b717738bfb3aa6c5f", "42142c121b2dbe48d55e81c2ce198a5639645030", "0e422bd90c8be636358d4eb75f05276b361d19d4", "4e3304e77dd2fecea4086e132981d1470434cf65", "f3018e7589af851341e6b40affb12d0ebdfa7db1" ], "paperAbstract": "Modern big data processing platforms employ huge in-memory key-value (KV) maps. Their applications simultaneously drive high-rate data ingestion and large-scale analytics. These two scenarios expect KV-map implementations that scale well with both real-time updates and large atomic scans triggered by range queries.\n We present KiWi, the first atomic KV-map to efficiently support simultaneous large scans and real-time access. The key to achieving this is treating scans as first class citizens,and organizing the data structure around them. KiWi provides wait-free scans, whereas its put operations are lightweight and lock-free. It optimizes memory management jointly with data structure access.We implement KiWi and compare it to state-of-the-art solutions. Compared to other KV-maps providing atomic scans, KiWi performs either long scans or concurrent puts an order of magnitude faster. Its scans are twice as fast as non-atomic ones implemented via iterators in the Java skiplist.", "pdfUrls": [ "https://www.cs.tau.ac.il/~moshesulamy/papers/p357-basin.pdf", "http://webee.technion.ac.il/people/idish/ftp/kiwi.pdf", "https://www.cs.tau.ac.il/~moshesulamy/papers/ppopp092-basinA.pdf", "http://dl.acm.org/citation.cfm?id=3018761" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52af6a06e325dbe03a0152f280872cece5db71df", "sources": [ "DBLP" ], "title": "KiWi: A Key-Value Map for Scalable Real-Time Analytics", "venue": "PPOPP", "year": 2017 }, "52c4bef81a1328d805448df35674e4ae65c45fa3": { "authors": [ { "ids": [ "21599687" ], "name": "Li-Xuan Chuo" }, { "ids": [ "2124955" ], "name": "Zhihong Luo" }, { "ids": [ "1692316" ], "name": "Dennis Sylvester" }, { "ids": [ "1687117" ], "name": "David Blaauw" }, { "ids": [ "2325469" ], "name": "Hun-Seok Kim" } ], "doi": "10.1145/3117811.3117840", "doiUrl": "https://doi.org/10.1145/3117811.3117840", "entities": [ "Algorithm", "Analog-to-digital converter", "Application-specific integrated circuit", "Blocking (radio)", "CMOS", "Carrier frequency", "ETSI Satellite Digital Radio", "Frequency divider", "Indoor positioning system", "Integrated circuit", "Interference (communication)", "Line-of-sight (missile)", "Low-power broadcasting", "Machine learning", "Multiplexing", "RF modulator", "Radio frequency", "Sampling (signal processing)", "Signal processing", "Ultra-wideband" ], "id": "52c4bef81a1328d805448df35674e4ae65c45fa3", "inCitations": [], "journalName": "", "journalPages": "222-234", "journalVolume": "", "outCitations": [ "3b3188f59e6dff7034f160650fb4b5c32a33c3cf", "e5edfbdf645a3dbcdaf7d9fcbf350c67fbbadae5", "7180d8eff34436553fef16ced83650fe07d1602b", "b31603e21fb1595e826ad653da019087fb86d721", "05fe031e53dd8990e7076a91277cb2b74e22b811", "fc51fb822024805533ff9eef4f7e486b38437109", "79883a68028f206062a73ac7f32271212e92ade8", "0c583ae5a79c2bb748c6838ab687307575977e97", "4bc80bdca050d4d89c8048f363ccfcf02c32fdbf", "43cd824a8ca28820f24bb45887e60bee57b967cf", "052b36fd8bde6035c11eb316c3f9a3665c0110f0", "073a02cea6a575db07f65bed2a0e5c078a7b904b", "264351dc439aa92c7ed87546fd6182d12a593dfb", "d95d71b2efe45c7fcda73ec13f9d051948d93b15", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "3346fb009ccf789f4cb7040ab118aae71af94cf0", "13e89b8808dde54d75ddad6d4a076a225de268c2", "8f5dfa652933c95443a848721ebe3f4b362dcf79", "16542a972ed470d2c66da0363ebfc753c2ea4114", "270ed3056dd319c75a092f3920a81422500b2cf8", "82802e411495bbad77fa2415c6d4633dde180764", "29e9cd18af650b7e448dea668121a1d98afd3c46", "eb2e9b3bcaa194920dede61d154c92cc7e748456", "4484ecdb2338660ca2b71ab2d1f7fb656086fcfc", "09836dc08f94e0b9e03a21a512e579dab28d0f9e", "046b7f6b48e4d9fcf173dea0a0802d7e87b383e1", "046a1302079f56b94c81457bf7fd21c3417a9f72", "336adcbb5553b99aa059e84e56c97678c4ee842a", "2420616018134ce70d778b3264e66659529f9727", "3331b66b235671351286b675bd28976eeb0b7576", "a95f3e6a103a3e295627b19080bef149f1ca07d9", "ddab97a37ca4a139a1ee4ed4205ab5183f512774", "15f4d8eca1d25f6ec7fbfaa939e5e70bb4abbbcd" ], "paperAbstract": "Long-range low-power localization is a key technology that enables a host of new applications of wireless sensor nodes. We present RF-Echo, a new low-power RF localization solution that achieves decimeter accuracy in long range indoor non-line-of-sight (NLOS) scenarios. RF-Echo introduces a custom-designed active RF reflector ASIC (application specific integrated circuit) fabricated in a 180nm CMOS process which echoes a frequency-shifted orthogonal frequency division multiplexing (OFDM) signal originally generated from an anchor. The proposed technique is based on time-of-flight (ToF) estimation in the frequency domain that effectively eliminates inter-carrier and inter-symbol interference in multipath-rich indoor NLOS channels. RF-Echo uses a relatively narrow bandwidth of $\\leq$80 MHz which does not require an expensive very high sampling rate analog-to-digital converter (ADC). Unlike ultra-wideband (UWB) systems, the active reflection scheme is designed to operate at a relatively low carrier frequency that can penetrate building walls and other blocking objects for challenging NLOS scenarios. Since the bandwidth at lower frequencies (2.4 GHz and sub-1 GHz) is severely limited, we propose novel signal processing algorithms as well as machine learning techniques to significantly enhance the localization resolution given the bandwidth constraint of the proposed system. The newly fabricated tag IC consumes 62.8 mW active power. The software defined radio (SDR) based anchor prototype is rapidly deployable without the need for accurate synchronization among anchors and tags. Field trials conducted in a university building confirm up to 85 m operation with decimeter accuracy for robust 2D localization.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117840", "http://blaauw.engin.umich.edu/wp-content/uploads/sites/342/2018/03/Chuo-RF-Echo.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52c4bef81a1328d805448df35674e4ae65c45fa3", "sources": [ "DBLP" ], "title": "RF-Echo: A Non-Line-of-Sight Indoor Localization System Using a Low-Power Active RF Reflector ASIC Tag", "venue": "MobiCom", "year": 2017 }, "52c84aa6c4b25fa97a7a5de31c968d87e61da81e": { "authors": [ { "ids": [ "9646877" ], "name": "Alan Quach" }, { "ids": [ "1696074" ], "name": "Zhongjie Wang" }, { "ids": [ "1794927" ], "name": "Zhiyun Qian" } ], "doi": "10.1145/3084441", "doiUrl": "https://doi.org/10.1145/3084441", "entities": [ "Acknowledgement (data networks)", "Data point", "Debian", "Heartbleed", "Kernel (operating system)", "Limiter", "Linux", "Linux", "Operating system", "Rate limiting", "Scalability", "Terminate (software)" ], "id": "52c84aa6c4b25fa97a7a5de31c968d87e61da81e", "inCitations": [ "5ffc035d46a3ff9126282fe037d91bf16995175d" ], "journalName": "", "journalPages": "8", "journalVolume": "", "outCitations": [ "1a3224f9332b4dc074d7a06ea8e4733cedfd6841", "201b0a185dda51629d7b6fdef3b380a0beaba455", "5d9b7959437a780df8f260339f013c942b9ed01d", "49a8f9e8ed7dbd8382dbd30aa81321281cd54c07", "43ccb318eaa9f21965d10eb0d4b301cb4709278d", "8cdbab26fa0dee8f165b6680e59e8966679fd068", "80fcf6611553a60b0decccafb174c4b15a00460c", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "64205427d0f900997ec0a22fdd4946a3ba16f1b9", "39e45a420ee287489bbd5245946ba8c6a2305848", "067eaf07784cdab6836395a2b2f34a506d860fee", "adb7069984e3fa48505cd5081ec118ccb95529a3", "08fabacc44f1f7d3b968fa41e52e350a24e02abc", "7e9a2085180533491a107c9dc953dd850cf0d631", "1f3229356e448ac1f517560377b12a7885571868" ], "paperAbstract": "To combat blind in-window attacks against TCP, changes proposed in RFC 5961 have been implemented by Linux since late 2012. While successfully eliminating the old vulnerabilities, the new TCP implementation was reported in August 2016 to have introduced a subtle yet serious security flaw. Assigned CVE-2016-5696, the flaw exploits the challenge ACK rate limiting feature that could allow an off-path attacker to infer the presence/absence of a TCP connection between two arbitrary hosts, terminate such a connection, and even inject payload into an unsecured TCP connection.\n In this work, we perform a comprehensive measurement of the impact of the new vulnerability. This includes (1) tracking the vulnerable Internet servers, (2) monitoring the patch behavior over time, (3) picturing the overall security status of TCP stacks at scale. Towards this goal, we design a scalable measurement methodology to scan the Alexa top 1 million websites for almost 6 months. We also present how notifications impact the patching behavior, and compare the result with the Heartbleed and the Debian PRNG vulnerability. The measurement represents a valuable data point in understanding how Internet servers react to serious security flaws in the operating system kernel.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084441", "http://www.cs.ucr.edu/~zhiyunq/pub/sigmetrics17_cack_measurement.pdf", "http://doi.acm.org/10.1145/3078505.3078510" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52c84aa6c4b25fa97a7a5de31c968d87e61da81e", "sources": [ "DBLP" ], "title": "Investigation of the 2016 Linux TCP Stack Vulnerability at Scale", "venue": "SIGMETRICS", "year": 2017 }, "52c90701403f99e1f721815ebc2a07cc795526e5": { "authors": [ { "ids": [ "2893798" ], "name": "Chun Cao" }, { "ids": [ "2662643" ], "name": "Weiyi Wang" }, { "ids": [ "1720117" ], "name": "Ying Zhang" }, { "ids": [ "1695090" ], "name": "Xiaoxing Ma" } ], "doi": "10.1109/CLOUD.2017.22", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.22", "entities": [ "Apache HBase", "Apache Hadoop", "Benchmark (computing)", "Data model", "Ecosystem", "IBM Tivoli Storage Productivity Center", "NoSQL", "Relational database", "Software maintenance" ], "id": "52c90701403f99e1f721815ebc2a07cc795526e5", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "106-113", "journalVolume": "", "outCitations": [ "6b56e86e6741c461455d794a571aae2bbad856d6", "cb933dbf49f31bd3dfa3a85083901e4f1737b2bb", "18a5f443299784479e78d9e77f175af57cb2fa2b", "48ffef88301fae4e3c64bf719c25ebfcf341e42d", "5c68414f2ed7c91fc61c1b6bfc929d66536c124f", "d4b25b918737a61362ca85a1979868a19ec04577", "7fb5d07836f38186a4385c5e4a9816b8de2914a6", "771706d2a4a935d4ae097d228cd407eaa2c34e4e", "53115fffaa36c99a45fb7741fa74d66aa4fb8517", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "c071ef3f2ef2976ec8b5a137f63f85db2da00774", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "63822e05449799dccddf1c13311f6c57b4a20c16", "6c864b602ccc3f92ea4041c77adbd815bb987309", "39f6623e4211837dcbf22d221803896271b06cac", "f1e1a826e0203aa28d57b99ea57610b0070ef8a1", "3bf43a2ed416a3b5144a5afaff2a356943e570d1" ], "paperAbstract": "Apache HBase is a widely used non-relational database in the Hadoop ecosystem. However, it will be inefficient if users perform multidimensional queries. Some of existing approaches incur extra costs in write performance or consistency maintenance, others are limited to specific applications. In this paper, we propose a novel data model called CFIDM, short for Column Family Indexed Data Model. In CFIDM, we convert the queried column into multiple column families. Values in the specific column are partitioned. Each partition is manifested by a column family, turning column family into an index with no additional cost. Then we provide guides to build this data model. Finally, we evaluate the effectiveness and versatility of CFIDM on the Bixi data set and the TPC-DS benchmark. Results show that CFIDM can save 6.6% disk space for Bixi and 35% for TPC-DS, maximally speeding up the queries by 5X and 5.5X respectively.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52c90701403f99e1f721815ebc2a07cc795526e5", "sources": [ "DBLP" ], "title": "Leveraging Column Family to Improve Multidimensional Query Performance in HBase", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "52e9eeacde685d219b701debf6dbc1ce3c3c153c": { "authors": [ { "ids": [ "1793529" ], "name": "Cho-Jui Hsieh" }, { "ids": [ "3422911" ], "name": "Si Si" }, { "ids": [ "1783667" ], "name": "Inderjit S. Dhillon" } ], "doi": "10.1145/3097983.3098080", "doiUrl": "https://doi.org/10.1145/3097983.3098080", "entities": [ "Algorithm", "Approximation algorithm", "Computer", "Convex function", "Distributed computing", "Hessian", "Kernel (operating system)", "Kernel method", "Line search", "Logistic regression", "Loss function", "Nonlinear system", "Optimization problem", "Parallel computing", "Rate of convergence", "Scalability", "Support vector machine" ], "id": "52e9eeacde685d219b701debf6dbc1ce3c3c153c", "inCitations": [], "journalName": "", "journalPages": "245-254", "journalVolume": "", "outCitations": [ "50d936aa3e549903089d14cb7eb682b6058b1611", "84bd007b64d8f2d99d9bbde3a232f55da00283bc", "9bfb2b97e64339247f2e9e2b6106e21c142ac1b2", "266c03c0eb498ff0b83556bdbf6dfc6347c1d7a2", "89ba1fccbf764bbc464796eb546338315c810570", "6e234ddb87f32e9daff3cf35ef8c7612613facc2", "25e60172a650b3e7482732d65a1f1cc179dfca65", "15b233873adbdc3a296bf041fb9df8ddd6a81216", "1c95bfb79b0605fd1d0dbd15fa98c61cb54fbd54", "55138c2b127ebdcc508503112bf1d1eeb5395604", "4ff7a5d31c2524f62662da67a22560867e025456", "b6fff8b8ea77f157913986e7af53951d9fc1128e", "91e576f8863f117f52360456e40b101347d59b9a", "90e962c7980c790e5b3ba9d511e13f19b47b622f", "160e1a787a3364a10ea89a9a8c04238cd468d1a4", "70f7b48cf82be70baca45ccd9bfeb382c4ccbe4d", "14f4c59b74a87565770433a367ffecba564c5e4c", "1854568d1111aeeb6132db3e3384c4f927e4a7b6", "0e5f68344cd53dd23213c0d72c1b9cf5b7b0fd06", "18c7fb55ff796db5c5a604e0ca44b6baaeb12239", "20915fb69ee44a17b8b36c05ce06b4a59107f700", "3557a5708b85a99be717307a4946454b0c402a94", "4e171856b5eac3a2bf7ebc1c243d9937b55a09bc", "9e2efeb74a8622098e521b54e328c177d7070faf", "d1fa8485ad749d51e7470d801bc1931706597601", "0c157fa7fec2ad6318f0008b733ce2806f847a1a", "d4a4729a5ae5e88e21a6636f7abbc35f00ae7aed", "12325eaa502bc78762e628c1eecf1181841a75a7", "1457613f8d15e020eaf0a39af794b10c9ad924a8", "0f55ddb8b330db348a1213a1158c4246ca5c72fb", "2cd850df68321f9a75b9b86ad73d92ad03325302", "3cc7230bd445128fc0dfb62cf54bfb01c25d377c", "4f5ef93300aafc04960b17de5641deeba83973d3" ], "paperAbstract": "Nonlinear kernel machines often yield superior predictive performance on various tasks; however, they suffer from severe computational challenges. In this paper, we show how to overcome the important challenge of speeding up kernel machines using multiple computers. In particular, we develop a parallel block minimization framework, and demonstrate its good scalability in solving nonlinear kernel SVM and logistic regression. Our framework proceeds by dividing the problem into smaller subproblems by forming a block-diagonal approximation of the Hessian matrix. The subproblems are then solved approximately in parallel. After that, a communication efficient line search procedure is developed to ensure sufficient reduction of the objective function value by exploiting the problem structure of kernel machines. We prove global linear convergence rate of the proposed method with a wide class of subproblem solvers, and our analysis covers strongly convex and some non-strongly convex functions. We apply our algorithm to solve large-scale kernel SVM problems on distributed systems, and show a significant improvement over existing parallel solvers. As an example, on the covtype dataset with half-a-million samples, our algorithm can obtain an approximate solution with 96% accuracy in 20 seconds using 32 machines, while all the other parallel kernel SVM solvers require more than 2000 seconds to achieve a solution with 95% accuracy. Moreover, our algorithm is the first distributed kernel SVM solver that can scale to massive data sets. On the KDDB dataset (20 million samples and 30 million features), our parallel solver can compute the kernel SVM solution within half an hour using 32 machines with 640 cores in total, while existing solvers can not scale to this dataset.", "pdfUrls": [ "http://www.stat.ucdavis.edu/~chohsieh/rf/psvm.pdf", "http://www.cs.utexas.edu/~inderjit/public_papers/parallel_kernel_kdd17.pdf", "http://doi.acm.org/10.1145/3097983.3098080" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/52e9eeacde685d219b701debf6dbc1ce3c3c153c", "sources": [ "DBLP" ], "title": "Communication-Efficient Distributed Block Minimization for Nonlinear Kernel Machines", "venue": "KDD", "year": 2017 }, "5304d6f7ca726e510106dba04912fc342328936d": { "authors": [ { "ids": [ "2741053" ], "name": "Wang-Cheng Kang" }, { "ids": [ "2442612" ], "name": "Chen Fang" }, { "ids": [ "8056043" ], "name": "Zhaowen Wang" }, { "ids": [ "1814008" ], "name": "Julian McAuley" } ], "doi": "10.1109/ICDM.2017.30", "doiUrl": "https://doi.org/10.1109/ICDM.2017.30", "entities": [ "Generative model", "High-level programming language", "Pixel", "Recommender system" ], "id": "5304d6f7ca726e510106dba04912fc342328936d", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "207-216", "journalVolume": "", "outCitations": [ "151a851786f18b3d4570c176ca94ba48f25da77b", "7bcb8505d5250435382fe7f706bcd3fbeaef9857", "9ebfce7a20c19ca5233b83a28a5743354fb1ecda", "5287d8fef49b80b8d500583c07e935c7f9798933", "10e1fb949e10d5fe99d5f1b32bb48d625149bce8", "f953553d4b8854cf8716015cacf57d35e418b375", "0cb5079c39933bd8897fde7edecf156ff57830d7", "2bb2ba7c96d40e269fc6a2d5384c739ff9fa16eb", "de1a92db873523fce9fa57921cfd991c9182f505", "65abb2e18aae3e8abe281a0c7a13caf85842ab2f", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "35756f711a97166df11202ebe46820a36704ae77", "46aca9fd693cda49f7f02d575efaee0977f078c7", "009041b1f33adda734de3ffef5f05a2ff1ed665a", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "480ca76957e066eb6b24cd28df998d30310b4ced", "20e9e36329ddabc30261ef5bee487f491d27f835", "760948698540118031e590fbc884fcea209f9104", "184b7281a87ee16228b24716ca02b29519d52eb5", "064fb3a6f2666e17f6d411c0a731d56aae0a785e", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "ced6d0b0257273850af38a3757151264beefdd73", "02fae3c2d5f51a0f47d6a335c75a3c7aeefa18b2", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6", "900bdd3fc700ebf9417c58df15a05eed8c52a90d", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "ba753286b9e2f32c5d5a7df08571262e257d2e53", "3eaf79589dbb9bce5a502e867a8f03917e52de26", "4a31ca27b987606ae353b300488068b5240633ee", "1710c43395bb4fb0417854e87d0b171a849f4e7c", "2e0f5e72ad893b049f971bc99b67ebf254e194f7", "17fb86c0e57df39a57fbb7d6d8a9deb8e518fb9c", "48a42303559ea518ba06f54a8cfce4226bb0e77e", "16174bcefcf38493c90576d0e3fed46537ef54fd", "272216c1f097706721096669d85b2843c23fa77d", "3d08280ae82c2044c8dcc66d2be5a72c738e9cf9", "0aa2a4d259433016ebc899c496faea03c024c0bd", "0bbc35bdbd643fb520ce349bdd486ef2c490f1fc", "d8c13611175285526ba3bc0c2529fb3ebb8eb187", "148721b162dd355812fae94c8aaf365e5e2c3a79", "2ef7d506b25731d0f3ec0c8f90b718b6e5bbd069", "01fcae344d2edb715bcc63a40b6052c0331741bd" ], "paperAbstract": "Building effective recommender systems for domains like fashion is challenging due to the high level of subjectivity and the semantic complexity of the features involved (i.e., fashion styles). Recent work has shown that approaches to 'visual' recommendation (e.g. clothing, art, etc.) can be made more accurate by incorporating visual signals directly into the recommendation objective, using 'off-the-shelf' feature representations derived from deep networks. Here, we seek to extend this contribution by showing that recommendation performance can be significantly improved by learning 'fashion aware' image representations directly, i.e., by training the image representation (from the pixel level) and the recommender system jointly; this contribution is related to recent work using Siamese CNNs, though we are able to show improvements over state-of-the-art recommendation techniques such as BPR and variants that make use of pretrained visual features. Furthermore, we show that our model can be used generatively, i.e., given a user and a product category, we can generate new images (i.e., clothing items) that are most consistent with their personal taste. This represents a first step towards building systems that go beyond recommending existing items from a product corpus, but which can be used to suggest styles and aid the design of new products.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.30", "http://arxiv.org/abs/1711.02231", "https://arxiv.org/pdf/1711.02231v1.pdf", "http://cseweb.ucsd.edu/~jmcauley/pdfs/icdm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5304d6f7ca726e510106dba04912fc342328936d", "sources": [ "DBLP" ], "title": "Visually-Aware Fashion Recommendation and Design with Generative Image Models", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "530fcecc1afd2e2605aefe16e1c981a7bdd6d351": { "authors": [ { "ids": [ "2398441" ], "name": "Oliver Jakob Arndt" }, { "ids": [ "19211530" ], "name": "Fabian David Trager" }, { "ids": [ "32893656" ], "name": "Tobias Mo\u00df" }, { "ids": [ "1766866" ], "name": "Holger Blume" } ], "doi": "10.1109/IPDPSW.2017.100", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.100", "entities": [ "Abstraction layer", "Algorithm", "Application programming interface", "C++", "Central processing unit", "Computer", "Digital signal processing", "Exynos", "Field-programmable gate array", "Graphics processing unit", "Image gradient", "Low-power broadcasting", "MPSoC", "Manycore processor", "Multi-core processor", "Nallatech", "OpenCL API", "Parallel computing", "Scheduling (computing)", "Software portability", "Supercomputer", "Threading Building Blocks", "Throughput", "Xeon Phi" ], "id": "530fcecc1afd2e2605aefe16e1c981a7bdd6d351", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "6-17", "journalVolume": "", "outCitations": [ "84c172ad228900b5fae49cf00aee3a8b0f64abb2", "10d6b12fa07c7c8d6c8c3f42c7f1c061c131d4c5", "4ae519abb23eb4d77d9c61918f013f3a26e8d1a9", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "3fabf788c103431e4aa398592ff2127e37794e53", "24251f02c34f32b1dd96572a1d984c4463a26a10", "cbca4841bf465fc141f777c429d347308d36b6ec", "03ed4064310cf3a1b93187e26eeaa4ecf4539532", "bf7b5bcebfcd3da537cfc84b89c60911aaef79b2", "e1053f371b0d26938caf4bcc6e1d3efbfb228650", "9a4d73973f2914a1cd7fc666263dd9b990d5d405", "08db232f21d4414cd4770d8e3fc0a95c48eb8227" ], "paperAbstract": "The increased use of application-specific computational devices turns even low-power chips into high-performance computers. Not only additional accelerators (e.g., GPU, DSP, or even FPGA), but also heterogeneous CPU clusters form modern computer systems. Programming these chips is however challenging, due to management overhead, data transfer delays, and a missing unification of the programming flow. Moreover, most accelerators require device specific optimizations. Thus, for application developers, fulfilling software's initial intention to serve high portability is one of the most ambitious objectives. In this work, we present a software abstraction layer unifying the programming flow for parallel and heterogeneous platforms. Therefore, we offer a generic C++ API for parallelizing on heterogeneous CPU clusters and offloading to accelerators, specifically addressing applications with strict real-time constraints. At a freeconfigurable choice of parallelization- and offloading-frameworks (e.g., TBB, OpenCL) without affecting the portability, we also include automatic profiling methods. While offering high configurability of the architecture mapping, these methods ease the development of optimum scheduling strategies - e.g., in terms of power, throughput, or latency. To demonstrate the use of theproposed methods, we present heterogeneous implementations of the Semi-Global Matching and Histograms of Oriented Gradients algorithms as exemplary advanced driver-assistance algorithms. We provide an in-depth discussion of scheduling strategies for execution on a Samsung Exynos 5422 MPSoC, an Intel Xeon Phi manycore, and a general-purpose processor equipped with a Nallatech PCIe-385N FPGA accelerator card.", "pdfUrls": [ "https://doi.org/10.1109/IPDPSW.2017.100" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/530fcecc1afd2e2605aefe16e1c981a7bdd6d351", "sources": [ "DBLP" ], "title": "Portable Implementation of Advanced Driver-Assistance Algorithms on Heterogeneous Architectures", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "53212cf211e0c8e4d7c155eaceefd42b29967a14": { "authors": [ { "ids": [ "1922545" ], "name": "Shervin Hajiamini" }, { "ids": [ "1765372" ], "name": "Behrooz Shirazi" }, { "ids": [ "33360863" ], "name": "Chris Cain" }, { "ids": [ "40454458" ], "name": "Hongbo Dong" } ], "doi": "10.1109/IGCC.2017.8323582", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323582", "entities": [ "Combinatorial optimization", "Graph partition", "Integer programming", "Job shop scheduling", "Legal fine", "Linear IgA Bullous Dermatosis", "Linear programming", "Makespan", "Mathematical optimization", "Multi-core processor", "Nevus sebaceous", "Optimization problem", "Partition problem", "Program optimization", "Run time (program lifecycle phase)", "Scheduling (computing)", "Single-core", "Speedup", "XIAP gene" ], "id": "53212cf211e0c8e4d7c155eaceefd42b29967a14", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "642f72cdee8f3e9a5275e47cad844e1c54b57b83", "b2abc27230a97177f7dd660035d56d9fbfca87fa", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "37bd5728e359535460dd7bf03bc265848f8f0249", "f40680efe9b42f4a83005f208a818d628ec03e8c", "f325dc834d1ec70c7c557b84ab6157d1e682f54e", "00ccbe8b4e5691d3ef9aa190d9b9cdf85b2266a0", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "64cd51eeb6f55b39ab0194ccbd7cd1b078b73a36", "7477ba88a6ecb151c61e755cf608736367c6f4d3", "078e00be20bfdc4eeae762f6170ccded05f452c6", "54f3331b575b2d451c2d716f86496cada23d596d", "1f4a8213282034b2d7547767c3ce81f85903fcaa", "abcdac2fa21a005809f0ab2391a722c2a205dd11", "9c154444ab777e83db6d3093af52261c1ff15154", "40ca9cc71ac3f9840268dd051d8003bd14dc49a2", "031d644fbe3455768776e84f126b7b6d79da0f86", "63ef39942d6c1b1eddb0f4ff9dcaa28e4036eec9", "ba98391f6175421d355972468dbae26389d04b9d", "83712692555c62db3b2bb91ca33f25fb71826b4f", "dd99a0555902990bd9064da31e9eb2ccdc6f8207", "a9a744a382bcc0fb2d8c643c5ea7ba8926cb77c1", "15860f9f774f19f245f016d9cf479222e4f9a6ba", "3cf2bcab12160c888d6ed17124cb54628a9eea4f", "422554854e439fd99d4f07c80f304d7d91a6943d", "352a8957005dc5519b15ed1870751ec494d66395" ], "paperAbstract": "In today's multicore systems, depending on an application's computational demand, cores are either operated individually at different Voltage/Frequency (V/F) levels or grouped into multiple Voltage-Frequency Islands (VFIs) to reduce system energy consumption. This paper formulates a task scheduling and VFI partitioning problem whose optimization goal is to minimize the task set (application) execution time (makespan) for a given energy budget. First, the combinatorial optimization problem is formulated with Integer Linear Programming (ILP) to obtain per-core, per-task dynamic V/F levels in a fine-grain VFI-based system with single-core islands. Next, static task scheduling on coarse-grain VFI-based systems, where an island can contain several cores operated at the same V/F level, is formulated with Mixed Integer Linear Programming (MILP), considering the energy budget and task set's precedence constraints. The experimental results show that under different energy budget constraints, fine-grain, dynamic task allocations provide on average 1.35x speedup over static coarse grain scheduling and partitioning methods.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323582" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53212cf211e0c8e4d7c155eaceefd42b29967a14", "sources": [ "DBLP" ], "title": "An energy-constrained makespan optimization framework in fine-to coarse-grain partitioned multicore systems", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "532be02930ca307815eb4fca7ecfb311553deaa6": { "authors": [ { "ids": [ "1929857" ], "name": "Sehun Jeong" }, { "ids": [ "26944840" ], "name": "Minseok Jeon" }, { "ids": [ "1726682" ], "name": "Sung Deok Cha" }, { "ids": [ "39476651" ], "name": "Hakjoo Oh" } ], "doi": "10.1145/3133924", "doiUrl": "https://doi.org/10.1145/3133924", "entities": [ "Algorithm", "Disjunctive normal form", "Greedy algorithm", "Heuristic", "Java", "Pointer analysis", "Precision and recall" ], "id": "532be02930ca307815eb4fca7ecfb311553deaa6", "inCitations": [ "aa4ec46d5aa39d3799ec7610a88dcef871d7a48e" ], "journalName": "PACMPL", "journalPages": "100:1-100:28", "journalVolume": "1", "outCitations": [ "17f58c906c6f453fc10b1d7e4db0e545b70e27d1", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "3ef15e17f7932c6d86eb06e3eb254e6dc621e029", "7b9b7d97b361a29d59a430e2d4871f39222e2ab3", "b03bd79859fab03ccb0e8fcd79e8be98da16ff22", "03aacfe8d36a673ecc379d3b76e7df1245a8d9e5", "9ea49abc003a832776df864a92838b3b51f3e55e", "5e567cda5999a6dd4e5da4bb30b9033f8d5687c4", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "15f8f3f80c22008cb5f95e870403227d38420c4f", "2f9b49160ef60a11e32fae1a102384f77b4c7272", "3aabf1d95b823fadbdd4d5d1cc71f566600c1171", "a68e2ede5e01997bc7fe0cf597a06650acdac0f5", "6b41ea91e149ecb228af142c5e8af93b6a2d6982", "96c1b21bb563f9437dd3c5e4b6415309cefdc185", "153d144f411f7054b0c4bbd6b829a3d8c2b2df31", "146ba77c0e22948e0b8ba1f9e8b50580c26c2553", "3cd077d127026466f63c4fd196b1a83b54fbe660", "0ca0fe955dc8b7bdea61f03a767f8b8a57ac51ee", "8bfd64fe8f9192a8b3c801c7d91fd46cabfc5319", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "0349574cd39d88682ae313d04ca3235e7f66947d", "69eb94eb3c7009014ea1a1015f0787a8bfa3c886", "1d75e72cd0785bf6d66ca32dcfa434970f53d18b", "00a9ba0063d34ec56792849a67ef57b4601becbb", "187768583aa8fd7dfe64cc88cb2aa831b6b531db", "1a5cd2cb21121bae9c5f2be97a20da7edcf70df2", "a578530c785b14f54918720ee4acb672ffe3986e", "4cf08665ace78092e08887100280be9aacd39e78", "44daa1fde25be30d21c4a1a32b7af314c9890af8", "3f46a08a4a93faf5dc5161a4ab9f1ea1b94adf48", "1a3eeed05708e30b2b4b1f9f6246a2b1189acd21", "2a5b1c123f4341857a287965530b39e4bfe9ddfa" ], "paperAbstract": "We present a new data-driven approach to achieve highly cost-effective context-sensitive points-to analysis for Java. While context-sensitivity has greater impact on the analysis precision and performance than any other precision-improving techniques, it is difficult to accurately identify the methods that would benefit the most from context-sensitivity and decide how much context-sensitivity should be used for them. Manually designing such rules is a nontrivial and laborious task that often delivers suboptimal results in practice. To overcome these challenges, we propose an automated and data-driven approach that learns to effectively apply context-sensitivity from codebases. In our approach, points-to analysis is equipped with a parameterized and heuristic rules, in disjunctive form of properties on program elements, that decide when and how much to apply context-sensitivity. We present a greedy algorithm that efficiently learns the parameter of the heuristic rules. We implemented our approach in the Doop framework and evaluated using three types of context-sensitive analyses: conventional object-sensitivity, selective hybrid object-sensitivity, and type-sensitivity. In all cases, experimental results show that our approach significantly outperforms existing techniques.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133924" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/532be02930ca307815eb4fca7ecfb311553deaa6", "sources": [ "DBLP" ], "title": "Data-driven context-sensitivity for points-to analysis", "venue": "PACMPL", "year": 2017 }, "532d1d6c44ccf0413af0d5a6f0707a0fe9a92d15": { "authors": [ { "ids": [ "2793795" ], "name": "Fisnik Kastrati" }, { "ids": [ "1809585" ], "name": "Guido Moerkotte" } ], "doi": "10.1145/3035918.3064022", "doiUrl": "https://doi.org/10.1145/3035918.3064022", "entities": [ "Algorithm", "Branch misprediction", "Central processing unit", "Computer data storage", "Database", "Disjunctive normal form", "Mathematical optimization", "Program optimization", "Random-access memory" ], "id": "532d1d6c44ccf0413af0d5a6f0707a0fe9a92d15", "inCitations": [], "journalName": "", "journalPages": "731-744", "journalVolume": "", "outCitations": [ "04411bf945f721f5e375d478b0a2b2014d9fe4d3", "20174871f7eca9bc0c99bd0aed7aa39c560f5ffa", "36c17e3adaaab7262e6575588665bf44b344e8f3", "09c1b69ab0fe1315b0d5e5e0b0853585c4a319b5", "412a9e54bbb31e12d008a9579994e009c5b40b46", "fc6f792e5c35d16c31ba9cfe5dd4f4cc08f16072", "6c9189db2150e910837bf639f6da45d7da85effe", "19629429a0ade02b450f5a585bdde880fd32b22b", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "c57f3ab3737423a9518d0d2c64d78c195a4aac35", "0997037e940df06ed7a6d19f7501579aab01e829", "51f938c0fb1f7c6c0de1b10cabf246d823aa9fda", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "e8fc44a5e95f64e9181688ee387e33de32c5c634", "8865ba2b6b169246ed0a1600bd7f9d447c4bd47f", "44710d563b5ebeacaeb95935c58d1051cf723e8d", "5046a718f92447642939f5c93414dc97225d726a", "d1cd7a0fd94935d85536b839d64f0e963dcd1c10", "28aaffa45781ae16658e96400813ee9eadbe459b", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "30b1293e39c52ddd0e2a617de47c1ad843621258", "14786ea73156d3273a235cff25e4cbe950d2e1c7", "c2400e79ef8bd8449c5d09322d4ce412aeb3c30a", "75824a5e3e36b1744dad99077d5d7e8d9578f9c2", "8ff60fc4a4ca64d5e2351c6436f7d1ccdbd864f2", "6ffcd9a1bb0ad5ec1c6925f52c8c1bd9d7ee6216", "6383f40bcba78fb7b28e0d072a639fbe1f16e1a3", "477c163092c54c27bc28bb7106a8c1afc4676dba" ], "paperAbstract": "Optimization of disjunctive predicates is a very challenging task which has been vastly neglected by the research community and commercial databases. In this work, we focus on the complex problem of optimizing disjunctive predicates by means of the bypass processing technique. In bypass processing, selection operators split the input tuple stream into two disjoint output streams: the true-stream with tuples that satisfy the selection predicate and the false-stream with tuples that do not. Bypass processing is crucial in avoiding expensive predicates whenever the outcome of the query predicate can be determined by evaluating the less expensive ones.\n In main memory databases, CPU architectural characteristics, such as the branch misprediction penalty, become a prominent cost factor which cannot be ignored. Our algorithm takes into account the branch misprediction penalty, and, in addition, it eliminates common subexpressions.\n The current literature relies on two assumptions: (1) predicate costs are assumed to be constant, (2) predicate selectivities are assumed to be independent. Since both assumptions do not hold in practice, our approach is not based on any of them.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064022" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/532d1d6c44ccf0413af0d5a6f0707a0fe9a92d15", "sources": [ "DBLP" ], "title": "Optimization of Disjunctive Predicates for Main Memory Column Stores", "venue": "SIGMOD Conference", "year": 2017 }, "535148bde1f6ec1b5569ad5d984af3a044b4ad15": { "authors": [ { "ids": [ "3341348" ], "name": "Pengyu Zhang" }, { "ids": [ "39122747" ], "name": "Colleen Josephson" }, { "ids": [ "2061177" ], "name": "Dinesh Bharadia" }, { "ids": [ "2546322" ], "name": "Sachin Katti" } ], "doi": "10.1145/3143361.3143374", "doiUrl": "https://doi.org/10.1145/3143361.3143374", "entities": [ "Bluetooth", "Code word", "Codebook", "Tag system", "Uncompressed video" ], "id": "535148bde1f6ec1b5569ad5d984af3a044b4ad15", "inCitations": [ "cff134fcab7a64187d353ef539af7bd791294d16" ], "journalName": "", "journalPages": "389-401", "journalVolume": "", "outCitations": [ "06e8e428d6c1e36575657c6c4aeda65e4930ef4b", "498d2ed40427eeb78799fa96ac0f5a58c6648d05", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "14ba7b31b92233766089dfae54b53e339822f3cc", "1ad6819ab62788a132f4d0773717fdacdd55af6a", "b8bfce11df38955685c09f408ca3f7828af2a0c1", "15a03a6f03a98e0fc1f64020247ea6c8479668a1", "7abee770b75fb5f6801825ce2b854c216d8b9b2a", "0d63d6a0cc224945ee253e26c854ed2740a4b8fd", "82802e411495bbad77fa2415c6d4633dde180764", "25a8326f9d0a6a3b3fd3949a87fad23e0350a012", "76b035454baf22419f0067eb8dc9fac68f4e68d1", "2b3aabf4173e515a6e9bbc3410cd5dd9c87549ba", "c70e4a09a00c302f26ce60ac15e4e208af3b0621", "159550b613a6310bc78df45fe8c084c3503df05f", "2d12b6189a0681b933f9a96b8ab14daac2bcfd73", "8e2821d7185de16bf88a4c90383ef3690ec04248", "0c9b68449b6241478ba38c2af220b393db86e206", "02c75551123cae6dfbb0c69de96a199c974bcf89", "91bdacc904edb540fa57ea9a4535a1a1d79d855b", "015ce3f823dac9e78ab3ff1f63e67e5a00145ac6", "06764fcbceaeb6d5aae590fa01a6b721adb6932a", "8347fa4ad280baf119580cc680fd85ddb16d7236" ], "paperAbstract": "We introduce the design and implementation of FreeRider, the first system that enables backscatter communication with multiple commodity radios, such as 802.11g/n WiFi, ZigBee, and Bluetooth, while these radios are simultaneously used for productive data communication. Furthermore, we are, to our knowledge, the first to implement and evaluate a multi-tag system. The key technique used by FreeRider is codeword translation, where a tag can transform a codeword present in the original excitation signal into another valid codeword from the same codebook during backscattering. In other words, the backscattered signal is still a valid WiFi, ZigBee, or Bluetooth signal. Therefore, commodity radios decode the backscattered signal and extract the tag's embedded information. More importantly, FreeRider does codeword translation regardless of the data transmitted by these radios. Therefore, these radios can still do productive data communication. FreeRider accomplishes codeword translation by modifying one or more of the three dimensions of a wireless signal --- amplitude, phase and frequency. A tag ensures that the modified signal is still comprised of valid codewords that come the same codebook as the original excitation signal. We built a hardware prototype of FreeRider, and our empirical evaluations show a data rate of ~60kbps in single tag mode, 15kbps in multi-tag mode, and a backscatter communication distance up to 42m when operating on 802.11g/n WiFi.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143374" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/535148bde1f6ec1b5569ad5d984af3a044b4ad15", "sources": [ "DBLP" ], "title": "FreeRider: Backscatter Communication Using Commodity Radios", "venue": "CoNEXT", "year": 2017 }, "536398fe6abf90f81707de0710bf041d6e9252d5": { "authors": [ { "ids": [ "1721526" ], "name": "Apan Qasem" }, { "ids": [ "40798476" ], "name": "Samuel Teich" } ], "doi": "10.1109/IGCC.2017.8323574", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323574", "entities": [ "Anatomic Node", "Architecture as Topic", "Data structure", "Experiment", "Heterogeneous computing", "Impacted tooth", "integral" ], "id": "536398fe6abf90f81707de0710bf041d6e9252d5", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "28b0148bcca982db912cd26369f76e3835a506b5", "6456603d61e7b09817c9e3821cf6845997d63e12", "fe0458784e71cbfa3cc069a4ad42c184e9041e7d", "092217c2267f6e0673590aa151d811e579ff7760", "34eabbd8c597269e03ac88179106b2fc473963da", "1e113c73209f601cf34bb64445422db2e6d9dc67", "271be72b0c57686a3e77d0f794ef08db1b39a28f", "60f9d8874d8679b94896160bd3a8bf4b02d8b883", "58d48e6600ffcf878ed0bced5c47f904efdb6199", "070418aad85f42fb82adb46ad48dc04d07487db3", "bfb8e3d25d508ee5d1d0636e7735095d98c75118", "40a8d84f81261da88e05326c076de1bc497c3333", "14a477cf712ad5647180e6233dd0638c6c269fdd", "52a4130c74ad95664fbc067ef91fd75b748ac409", "94c23e26b42f45a87b0b4e9823b4636202567072" ], "paperAbstract": "Heterogeneous compute nodes have become an integral component of today's HPC systems. Recent research has established the importance of data layout and placement on such systems. This paper explores the power and energy aspects of data layout and placement on heterogeneous systems. We present results of an experimental study that evaluates the impact of data layout and placement on candidate HPC node architectures for kernels that exhibit a wide variety of performance characteristics. The results of the study show that data layout and placement can have a significant impact on the energy efficiency of heterogeneous applications. On some platforms, selecting the appropriate layout can yield up to an order-of-magnitude improvement in energy efficiency. The study shows that the conventional approach of using a structure-of-arrays for device-mapped data structures is not always profitable and that in addition to memory divergence, data layout choices are impacted by a variety of factors including arithmetic intensity and task granularity. The results of the study are used to establish a set of energy imperatives to guide data layout and placement across different architectures.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323574" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/536398fe6abf90f81707de0710bf041d6e9252d5", "sources": [ "DBLP" ], "title": "Evaluating the impact of data layout and placement on the energy efficiency of heterogeneous applications", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "5389fccd8e6679331eb4042d34f53ca8af3b9f5e": { "authors": [ { "ids": [ "21975961" ], "name": "Abhishek Bhattacharjee" } ], "doi": "10.1145/3037697.3037705", "doiUrl": "https://doi.org/10.1145/3037697.3037705", "entities": [ "Big data", "CPU cache", "Cache prefetching", "Data access", "Dynamic random-access memory", "Link prefetching", "Memory controller", "Operating system", "Overhead (computing)", "Page table" ], "id": "5389fccd8e6679331eb4042d34f53ca8af3b9f5e", "inCitations": [ "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "09da9a22e89c5e3a2e6e9f1995fc6cd2b7e92a0b" ], "journalName": "", "journalPages": "63-76", "journalVolume": "", "outCitations": [ "3edacab130540193df4aba07cd07366ffd3600de", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "2679adf7b58a4c8dc722c6e3b23d0d2b194a7189", "5dfbdcedb7bcb8644b816bab2cc3d3fadd36775b", "31c299532c42106b71e909c2fc0fc7472c39ce90", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "9fe1c5094843ba6d78eb7007f4f7ccd5aa301a1d", "2092d64f8d99ab8cc5b353bbc3dddf4186bcb461", "533d720a8542b707c316d39cf5beeb58738af86d", "bb117349638a1d63be1b105bba0e152bd6c031f8", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "290849abe19f6af0c09ff0dc1b504858743c0150", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "0a0bf9e017e05d58b85e793e58148d2946259a74", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "5ece19ddc8abc5454426deece280d0750972c2da", "198f1ecd14b376006445aebec84df7e3e79fc149", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "c61837ef5f8eac82d934978796f467f00bdfe2a4", "0470a077e24f70719e3746d26d57333f1ccbdaf0", "85398d5f19157c91bf00da3d36210e72d57887e4", "468035263afa59095614f26a62e0217da4a1aeed", "082573e4dc88f38628242d193c966725ab355026", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "9211b3aa4ce6cf6a464c772161723e2af0ece3e6", "343a384d5476ead9496f96559aba5ad09e95e01e", "c797c15492e635ce850158dbe01f402c0f8e78cd", "738b1253c656db5c82aad1838867ed7ab629677d", "28af524636137424ad574afa38463b4771e6f006", "084037d504c95c1af6fb1398179f8495618b72d7", "4b82766a16aa951020e43d6f70b5cf097a6b353c", "8c3c2a1593d0c21e2d86eb45aabd596d6a16da6f", "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "5d423c53d6e6792a2703c2d4cf84c49d677b5d9e", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "cea621f0aeecfb2b5890472fd073761e3887f8a8", "274e7e576534b3e091f09e801cce807f5fd221c1", "2394c6644efa856f0da160a0f0031d74cd3b5000", "37b5850e3e75a3462f3991491ca26674925f233b", "2b6cd5604fbb094027ea4f70fa36343485205b9a", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "9341125876271d46cc25f86dac93f25acb343e8d", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e" ], "paperAbstract": "We propose translation-enabled memory prefetching optimizations or TEMPO, a low-overhead hardware mechanism to boost memory performance by exploiting the operating system's (OS) virtual memory subsystem. We are the first to make the following observations: (1) a substantial fraction (20-40%) of DRAM references in modern big- data workloads are devoted to accessing page tables; and (2) when memory references require page table lookups in DRAM, the vast majority of them (98%+) also look up DRAM for the subsequent data access. TEMPO exploits these observations to enable DRAM row-buffer and on-chip cache prefetching of the data that page tables point to. TEMPO requires trivial changes to the memory controller (under 3% additional area), no OS or application changes, and improves performance by 10-30% and energy by 1-14%.", "pdfUrls": [ "https://www.cs.rutgers.edu/~abhib/abhib-asplos17.pdf", "http://doi.acm.org/10.1145/3037697.3037705" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5389fccd8e6679331eb4042d34f53ca8af3b9f5e", "sources": [ "DBLP" ], "title": "Translation-Triggered Prefetching", "venue": "ASPLOS", "year": 2017 }, "5390117ecd33478a412e889a061871bef0affe8e": { "authors": [ { "ids": [ "34517382" ], "name": "Rahul Chatterjee" }, { "ids": [ "3413116" ], "name": "Joanne Woodage" }, { "ids": [ "26904394" ], "name": "Yuval Pnueli" }, { "ids": [ "39509142" ], "name": "Anusha Chowdhury" }, { "ids": [ "1707461" ], "name": "Thomas Ristenpart" } ], "doi": "10.1145/3133956.3134000", "doiUrl": "https://doi.org/10.1145/3133956.3134000", "entities": [ "Amazon Mechanical Turk", "Authentication", "Dictionary", "Dictionary attack", "Encryption", "Experiment", "Java Caps", "Linux", "Linux", "Login", "Operating system", "Password", "Password-based cryptography", "Personalization", "State (computer science)", "Substitution (logic)", "The Turk", "Usability" ], "id": "5390117ecd33478a412e889a061871bef0affe8e", "inCitations": [], "journalName": "", "journalPages": "329-346", "journalVolume": "", "outCitations": [ "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "f7403f27b0517be683836f9c1cb8b0f5a5d82b1a", "557e2b4b788ee3c9f052894edd1aef14f3c8ab9c", "05138cee22ae0162f64d5f018a4539c02123a422", "3a3b5b56143c7813dd01fe8de89eb5f90a1766ff", "24f9b64a94c659cf8a35eab535f24780c74162a4", "11808ebcb5579a56dcd07267b420a5411536cd0d", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "c3dfcfcb50ad5abd930888047a3597b6e8654451", "492184989035226d1207801e3e5b9dff8b5f6427", "6209474964e02ba34ed539cf3cee8044048a1bda", "2c61a7a2ec8ac2178812fab42a222f35918f47ce", "c6feca766d42419b5367ad64f1ffd19ce80d3f54", "099ac9a2c85e5ce992371a19b478ee5283f3b264", "053982a9c7c0a16c9b080f800013b945d1135069", "0cb0a2b5dac0972fa6388b2f31f76c89455a10db", "d2767a9010ae11b10be2589f4f433174e7aab6e5", "6adacd3f01ef16bd70b179132cdf8fa5ea6e6531", "02e3606a9c2c596bc61c401c1a0b92a623d45fa6", "50c16919a7e773d673dff439a356621c9a9ff71f" ], "paperAbstract": "Password checking systems traditionally allow login only if the correct password is submitted. Recent work on typo-tolerant password checking suggests that usability can be improved, with negligible security loss, by allowing a small number of typographical errors. Existing systems, however, can only correct a handful of errors, such as accidentally leaving caps lock on or incorrect capitalization of the first letter in a password. This leaves out numerous kinds of typos made by users, such as transposition errors, substitutions, or capitalization errors elsewhere in a password. Some users therefore receive no benefit from existing typo-tolerance mechanisms.\n We introduce personalized typo-tolerant password checking. In our approach, the authentication system learns over time the typos made by a specific user. In experiments using Mechanical Turk, we show that 45% of users would benefit from personalization. Therefore, we design a system, called TypTop, that securely implements personalized typo-tolerance. Underlying TypTop is a new stateful password-based encryption scheme that can be used to store recent failed login attempts. Our formal analysis shows that security in the face of an attacker that obtains the state of the system reduces to the difficulty of a brute-force dictionary attack against the real password. We implement TypTop for Linux and Mac OS login and report on a proof-of-concept deployment.", "pdfUrls": [ "https://acmccs.github.io/papers/p329-chatterjeeA.pdf", "http://doi.acm.org/10.1145/3133956.3134000", "https://eprint.iacr.org/2017/810.pdf", "http://eprint.iacr.org/2017/810" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5390117ecd33478a412e889a061871bef0affe8e", "sources": [ "DBLP" ], "title": "The TypTop System: Personalized Typo-Tolerant Password Checking", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "539c4c48b0d6a11150e32ff2fbe6031e431a9e9a": { "authors": [ { "ids": [ "35221280" ], "name": "Laxman Dhulipala" }, { "ids": [ "1717462" ], "name": "Guy E. Blelloch" }, { "ids": [ "2045944" ], "name": "Julian Shun" } ], "doi": "10.1145/3087556.3087580", "doiUrl": "https://doi.org/10.1145/3087556.3087580", "entities": [ "Algorithm", "Approximation algorithm", "Breadth-first search", "Computer data storage", "Graph (abstract data type)", "Hyper-threading", "List of algorithms", "Multi-core processor", "Parallel algorithm", "Parallel computing", "Program animation", "Scalability", "Set cover problem", "Shared memory", "Source lines of code", "Speedup", "Synthetic data", "Thread (computing)", "Throughput" ], "id": "539c4c48b0d6a11150e32ff2fbe6031e431a9e9a", "inCitations": [], "journalName": "", "journalPages": "293-304", "journalVolume": "", "outCitations": [ "31181e73befea410e25de462eccd0e74ba8fea0b", "1156f60e40548096df49528b1342bb3e88b0f378", "6b6ca1041dbcf0ff44992f02826342e99da54996", "30d963e87c462606793d229dbdf0786ac38ede6e", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "0706356c9ab6014d6b04577d38289ea8328291a5", "d832fb2b7a72640844e1eef439c2092b35e40f60", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "3f82e066d21d74d3b48b0a294cf74a9ae0cdf89e", "31ffb232b5c1186bb90502254162ac3d99baf50b", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "2f68dcf3bec179fa1f968165ba1da44952d9b474", "df6749291be5f89d5283bee4b2082ecafd846c81", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "b38e08b51564bbab690815119201f487916055e8", "53c3b2ca3d07b06eb587ac01e750eefc31be7509", "b9ef5daaa31ccee9ba239a3a60c6b7c552aec5dc", "9cf785889f13260a791f1106fd7b16f1390002f1", "4e185dc2f8bce26b3a51cb10ec961aa6f4e5843f", "2c67c63ce0c972bddc15dd26ace3f04905be51bc", "c3008dd707e4dfd43606a544d4cac4bf1f081f2b", "2a9546c814afff1a122380bff8bc08b346ce57bb", "17ae4c0e57e868e2648781023a143d29d348f0a6", "3dff11679346f5344af1018cad57fa14cc349f2f", "04f83137f43c6caba920b6455639f26b48656231", "1cdcab443a9d66e08e3c25653ecfcfb3d996867a", "374f9f0f84c939e28a26aa5eb7370ecc12d658d1", "017eb2deb11f48ef7350873e81c19391aa61b8e3", "3e02c6e382f556993865c803de4dc2ef54b22bcb", "abbdb6177b4408c5885a569dc24e6361f91cf169", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "e901ca0397785fcbf3039cf1e0867ac94fa2a558", "0ad8e89091eed09217e66adc98136126addc2619", "2250cf4c6dd151ef86a5b855e658745550b0d1fe", "43c29869dec0b83967b1f2b228940884e4eb90f2", "26deee037b221bd05ed34461819f5c067b745445", "13f7df91eb208a387d18fbad192c6f0f834f0b82", "fd109bac25e8a43c3a0c0730927c5442eba991dd", "21873832bfff4c2053865cc312d8db1ef436e8b3", "a65d456fa632856e33b76f17af38e9b33c908327", "b5d588298e6845b4bfd40ea779ce21e628239ef3", "fe69891ccbca5e18587dd0e0c0b48b836ae38848", "b11f541b43c6de86e09d97f334e117e392fde01c", "497cec7044463b2ab60002e11934e7918bbd9c9e", "6c39f97456c2aacca0ac330613f3ef5ecd183f99", "8f39a35ca0de700af931524f0990d54ca151b898", "20d058f261c6601b31c212550e2ce9ce8e284a34", "10924940cff0dcfc16c18fde9e4e0cd034bac55c", "175d795f44037ef60dd9df341701cd5fdc449f1f", "69b6a42ad7068962363687c038c6ae2e0760867a", "3486aeaf540c48952120fe853d672af984f40a6a", "e3d59d4eaca70a3e1e71b3b870c6b7de0033a3e1", "4b72dca0d863ddcb4d48645b1d7adbc24dc8b4ce", "6518035089d0c87b925c6262bbf5b949d3bb3fff", "5b0dcc45f1f3386fa6751009655ac32fc5591061", "34eb5e5ded51738861b8b844a1dbfddd6881fa46", "3f87270042fd9de5bb8da319510caa1875d85574", "a01d4a0f26ec9bba3e21f12f60489a6a20a8ae17" ], "paperAbstract": "Existing graph-processing frameworks let users develop efficient implementations for many graph problems, but none of them support efficiently bucketing vertices, which is needed for bucketing-based graph algorithms such as \\Delta-stepping and approximate set-cover. Motivated by the lack of simple, scalable, and efficient implementations of bucketing-based algorithms, we develop the Julienne framework, which extends a recent shared-memory graph processing framework called Ligra with an interface for maintaining a collection of buckets under vertex insertions and bucket deletions.\n We provide a theoretically efficient parallel implementation of our bucketing interface and study several bucketing-based algorithms that make use of it (either bucketing by remaining degree or by distance) to improve performance: the peeling algorithm for k-core (coreness), \\Delta-stepping, weighted breadth-first search, and approximate set cover. The implementations are all simple and concise (under 100 lines of code). Using our interface, we develop the first work-efficient parallel algorithm for k-core in the literature with nontrivial parallelism.\n We experimentally show that our bucketing implementation scales well and achieves high throughput on both synthetic and real-world workloads. Furthermore, the bucketing-based algorithms written in Julienne achieve up to 43x speedup on 72 cores with hyper-threading over well-tuned sequential baselines, significantly outperform existing work-inefficient implementations in Ligra, and either outperform or are competitive with existing special-purpose parallel codes for the same problem. We experimentally study our implementations on the largest publicly available graphs and show that they scale well in practice, processing real-world graphs with billions of edges in seconds, and hundreds of billions of edges in a few minutes. As far as we know, this is the first time that graphs at this scale have been analyzed in the main memory of a single multicore machine.", "pdfUrls": [ "https://people.csail.mit.edu/jshun/bucketing.pdf", "http://doi.acm.org/10.1145/3087556.3087580" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/539c4c48b0d6a11150e32ff2fbe6031e431a9e9a", "sources": [ "DBLP" ], "title": "Julienne: A Framework for Parallel Graph Algorithms using Work-efficient Bucketing", "venue": "SPAA", "year": 2017 }, "53abef5e08be4ff7635499e361f385ebcb56de69": { "authors": [ { "ids": [ "1830497" ], "name": "Yaqing Wang" }, { "ids": [ "2988239" ], "name": "Fenglong Ma" }, { "ids": [ "2170726" ], "name": "Lu Su" }, { "ids": [ "1947899" ], "name": "Jing Gao" } ], "doi": "10.1109/ICDM.2017.60", "doiUrl": "https://doi.org/10.1109/ICDM.2017.60", "entities": [ "Algorithm", "Big data", "Computation", "Information privacy", "Internet privacy", "Multi-source", "Server (computing)", "Source data" ], "id": "53abef5e08be4ff7635499e361f385ebcb56de69", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "505-514", "journalVolume": "", "outCitations": [ "1c2e8d2d63312c18944c9477797e65a0975361da", "579e1e9217cfed6d563cedf8f8fdcd1604fc0917", "cd48760a142830b796b8a85a158cd469f3e5feb0", "d4aae277d50baaeedc02c7e74bfd922a8f01d212", "3181b72c858a1c963f52713f5c48c280d9e7a2be", "2b9cd09b949b7e69933d18ae408397e803987151", "65821014abe934029310cb10d4e329645acd4817", "1b763ec4c21c799eef00ca26f6afc6c01b45a82c", "a8db82cec3334cd12d8700d6fce031bb8e8cb351", "2ffe1157df90ce94cb91f28074b43b58135cedac", "27d6326993f80269595b2a594657754bf748927c", "1b189d721adbf1d2bab93b7ed6ce826e188b0b99", "a12cd3d9ae5530a90302a6e4af477e6e24fa0f95", "0e5ebc2eb31b6c78ee0dee10246efeeaf587f7f4", "b1dc2af3defb9fc4912e99522dbe7d51b3b956ab", "4607f09a348c87f95aedd7711b24d8bf614fe58c", "4f84bcbddad1e931b0328be6e0a96ca731c538f8", "1a0f8261d8384bfefbf2d561d446c3a687a5febe", "220d65cf8d583f1477aafe4d4ad3b36072f9eacd", "0abb28823d51d6a644c78fedca0e7e96295df762", "0194a83bd5ab0ba5e334b65a00171f2dd418ecfe", "f5d9c0182d8578f7c0a99ad9bdd4ff62e5f7c68d", "1593786d775b94c094d5d95a1707db56ca385fab" ], "paperAbstract": "In the big data era, the information about the same object collected from multiple sources is inevitably conflicting. The task of identifying true information (i.e., the truths) among conflicting data is referred to as truth discovery, which incorporates the estimation of source reliability degrees into the aggregation of multi-source data. However, in many real-world applications, large-scale data are distributed across multiple servers. Traditional truth discovery approaches cannot handle this scenario due to the constraints of communication overhead and privacy concern. Another limitation of most existing work is that they ignore the differences among objects, i.e., they treat all the objects equally. This limitation would be exacerbated in distributed environments where significant differences exist among the objects. To tackle the aforementioned issues, in this paper, we propose a novel distributed truth discovery framework (DTD), which can effectively and efficiently aggregate conflicting data stored across distributed servers, with the differences among the objects as well as the importance level of each server being considered. The proposed framework consists of two steps: the local truth computation step conducted by each local server and the central truth estimation step taking place in the central server. Specifically, we introduce the uncertainty values to model the differences among objects, and propose a new uncertainty-based truth discovery method (UbTD) for calculating the true information of objects in each local server. The outputs of the local truth computation step include the estimated local truths and the variances of objects, which are the input information of the central truth estimation step. To infer the final true information in the central server, we propose a new algorithm to aggregate the outputs of all the local servers with the quality of different local servers taken into account. The proposed distributed truth discovery framework can infer object truths without delivering any raw data to the central server, and thus can reduce communication overhead as well as preserve data privacy. Experimental results on three real world datasets show that the proposed DTD framework can efficiently estimate object truths with accuracy guarantee, and the proposed UbTD algorithm significantly outperforms the state-of-the-art batch truth discovery approaches.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53abef5e08be4ff7635499e361f385ebcb56de69", "sources": [ "DBLP" ], "title": "Discovering Truths from Distributed Data", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "53b204514625f3fa0afae23c3877580776a3f103": { "authors": [ { "ids": [ "2481205" ], "name": "Bal\u00e1zs N\u00e9meth" }, { "ids": [ "1806102" ], "name": "Tom Haber" }, { "ids": [ "1984924" ], "name": "Jori Liesenborgs" }, { "ids": [ "1775641" ], "name": "Wim Lamotte" } ], "doi": "10.1109/CLUSTER.2017.68", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.68", "entities": [ "Algorithm", "Computation", "Distributed memory", "Manycore processor", "Markov chain", "Markov chain Monte Carlo", "Monte Carlo", "Parallel computing", "Pseudorandom number generator", "Pseudorandomness", "Run time (program lifecycle phase)", "Sampling (signal processing)", "Shared memory" ], "id": "53b204514625f3fa0afae23c3877580776a3f103", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "520-524", "journalVolume": "", "outCitations": [ "6377fee5214d9ace4ce629c9bfe463bdebbd889f", "5aaa65fbe2abe27afb237b2f40909d686b14b1ee", "2359b12b0f4c70477f51455d9eb41923e740104a", "5cbc9bc870c8501af36e9f31f36f6d6fe4e932d8", "2749bc7bdf1a1016134bf10cb2c76c037c8f2c4d", "0b21d126da425fba4b14e4bc9f74f4f2221b5bf9", "983399a958ca0e6b26886e441ab5c4ddba836fc5", "a46726f4f45ba831fb29250a868f3b35869998a9", "ffa1dbe3a7edda21daf6e065511569e4ca2987b3", "0a8b7e384dcfa89b5466b2a2d2375d61a3d8f1ce", "d45ec41b45caa8686fa1788d9191ab4044a18a83" ], "paperAbstract": "Markov Chain Monte Carlo methods provide a tool for tackling high dimensional problems. With many-core systems readily available today, it is no surprise that leveraging parallelism in these samplers has been a subject of recent research. The focus has been on solutions for shared-memory architectures, however these perform poorly in a distributed-memory environment. This paper introduces a fully decentralized version of an affine invariant sampler. By observing that a pseudorandom number generator makes stochastic algorithms deterministic, communication is both minimized and hidden by computation. Two cases at opposite ends of the communication-to-computation ratio spectrum are used during evaluation against the currently available master-slave solution, where a more than tenfold reduction in execution time is measured.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.68" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53b204514625f3fa0afae23c3877580776a3f103", "sources": [ "DBLP" ], "title": "Distributed Affine-Invariant MCMC Sampler", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "53ba11b61994ce3d05e5cc609cd66f6d90808fb5": { "authors": [ { "ids": [ "2795604" ], "name": "Thejaka Amila Kanewala" }, { "ids": [ "1843069" ], "name": "Marcin Zalewski" }, { "ids": [ "2556809" ], "name": "Andrew Lumsdaine" } ], "doi": "10.1007/978-3-319-64203-1_31", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_31", "entities": [ "List of algorithms", "Shortest path problem" ], "id": "53ba11b61994ce3d05e5cc609cd66f6d90808fb5", "inCitations": [], "journalName": "", "journalPages": "428-441", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53ba11b61994ce3d05e5cc609cd66f6d90808fb5", "sources": [ "DBLP" ], "title": "Families of Graph Algorithms: SSSP Case Study", "venue": "Euro-Par", "year": 2017 }, "53c1d6bc597b37da7786703cd641c17620f7738b": { "authors": [ { "ids": [ "39602632" ], "name": "Marianna Rapoport" }, { "ids": [ "36862800" ], "name": "Ifaz Kabir" }, { "ids": [ "19205604" ], "name": "Paul He" }, { "ids": [ "3284421" ], "name": "Ondrej Lhot\u00e1k" } ], "doi": "10.1145/3133870", "doiUrl": "https://doi.org/10.1145/3133870", "entities": [ "Abstract type", "Object type (object-oriented programming)", "Scala", "Type system", "Value (ethics)" ], "id": "53c1d6bc597b37da7786703cd641c17620f7738b", "inCitations": [ "7c1dc337148e45ecfd08cd11b28038772f2b00db" ], "journalName": "PACMPL", "journalPages": "46:1-46:27", "journalVolume": "1", "outCitations": [ "546cfafc170c7b3482f0158efba4faa1a7ea0643", "133b2f98430e7739fd405ea57f024bd0235d5dcd", "43c2fa8d8b46f294eeca331c90b0f25b85d57265", "1ad2415dc5c54a0b4d259922951973dad7d58a2a", "38e31e68af9b260c51d5abc03b27041780e81e4b", "27cb14dc66a3496a9eebce2620cf0fd202eef74f", "0f5b05d841bf89e2daa531c7068142f5689dbf95", "56c355873bb29284a047e3e66f6fe3bb4399449b", "8694190c6416b2fe7f26b65a72ed0cb534a79571", "736768fe05e6d114f9d0d2b10ba4a04db6c5ba75", "369e90b69f7e1f7e1da0046a88fc19d65575c020", "7a7fbc0d50c95eea7313d8a96a51fdd43e113a31", "544c433c7711b5d6d44f85fe342d9ff09eb00c25" ], "paperAbstract": "Dependent Object Types (DOT) is intended to be a core calculus for modelling Scala. Its distinguishing feature is abstract type members, fields in objects that hold types rather than values. Proving soundness of DOT has been surprisingly challenging, and existing proofs are complicated, and reason about multiple concepts at the same time (e.g. types, values, evaluation). To serve as a core calculus for Scala, DOT should be easy to experiment with and extend, and therefore its soundness proof needs to be easy to modify. \n This paper presents a simple and modular proof strategy for reasoning in DOT. The strategy separates reasoning about types from other concerns. It is centred around a theorem that connects the full DOT type system to a restricted variant in which the challenges and paradoxes caused by abstract type members are eliminated. Almost all reasoning in the proof is done in the intuitive world of this restricted type system. Once we have the necessary results about types, we observe that the other aspects of DOT are mostly standard and can be incorporated into a soundness proof using familiar techniques known from other calculi.", "pdfUrls": [ "https://plg.uwaterloo.ca/~olhotak/pubs/oopsla17.pdf", "https://arxiv.org/pdf/1706.03814v1.pdf", "http://arxiv.org/abs/1706.03814", "http://doi.acm.org/10.1145/3133870" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53c1d6bc597b37da7786703cd641c17620f7738b", "sources": [ "DBLP" ], "title": "A simple soundness proof for dependent object types", "venue": "PACMPL", "year": 2017 }, "53c7a9f88a20ab31841f6250de91d5fc2b50a757": { "authors": [ { "ids": [ "38328797" ], "name": "Sajith Ravindra" }, { "ids": [ "2741023" ], "name": "Miyuru Dayarathna" }, { "ids": [ "1971912" ], "name": "Sanath Jayasena" } ], "doi": "10.1145/3030207.3030227", "doiUrl": "https://doi.org/10.1145/3030207.3030227", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Benchmark (computing)", "Cloud computing", "Complex event processing", "Context switch", "Data compression", "Event stream processing", "Experiment", "Jumpstart Our Business Startups Act", "Multiprotocol Label Switching", "Packet switching", "Query string", "Scalability", "Stream processing", "Telephone exchange", "Upstream (software development)" ], "id": "53c7a9f88a20ab31841f6250de91d5fc2b50a757", "inCitations": [ "61a1afa693442d829072114910b1775a8e4ceefa", "95450cf983c9561c68fa2a653f88b71332b6a92e", "bdbd57ea557992f7f054b37b7d6af7a93d6b1a9f" ], "journalName": "", "journalPages": "91-102", "journalVolume": "", "outCitations": [ "fe4837d592d6f3c5cc27f7e9f0dad69f8fbe8090", "511562debe051d77e38c374e4080b768c1151e66", "cdd534ca8edf8c5ba068c11e8ce93a33eef8bd89", "6871b95c14dccca7636b498b5d363a743c5288e6", "0be471a00abd3826171048e6bf25a25c245560b7", "f7c206119cb10954ac1b32e959a212eeff93d84a", "376381c07be7911805d7c233b9659b868ea880b3", "99b90da55af41ef5b7d07dd573d036884f458710", "2dbdb8e6e7ad9abb93f08dab9703df109b54d09b", "c50643d8fb95ddb0cfb5ad654c1d5411b5e4c7e9", "146913d04b803ade1af2f84015f3648aeee05fd0", "135d89a35623359aa3af7ce6f95b0078c6acc43a", "224e9d48a9667ee557e2d323e34110852b4caccf", "2b70b11bff4f08c0ea412b4ad3d73299bda7d624", "4e3ca0051dbf80bb6437650c0dc4c575cd91f3d1", "2736d518883a686b03c8c211bff24ada1e6c42da", "3031dcb6a936ce5dcc6e28a66896ad4e071283e5", "023fec4175c80611c6d4488229252d9066dce1ac", "6c07d5b863cb9e166c5677b3d66933c260edcbe6", "9180aa7b7978c62363e4af3a9053371775fbcbdc", "b530af193f2e46b506f80f8c6a4e1ebb18b1e964", "54d5107b9ff52db488b9e4372373b365557e3c75", "c85367ac1b65a567039c15cfb2c3b963bc33584b", "7d986ee3e960fed8cd2351f0ce63a9ed1cb72fbf", "553dbe2b0c9efe1a1c7e9057cefe574dadbb43fd" ], "paperAbstract": "Elastic scaling of event stream processing systems has gained significant attention recently due to the prevalence of cloud computing technologies. We investigate on the complexities associated with elastic scaling of an event processing system in a private/public cloud scenario. We develop an Elastic Switching Mechanism (ESM) which reduces the overall average latency of event processing jobs by significant amount considering the cost of operating the system. ESM is augmented with adaptive compressing of upstream data. The ESM conducts one of the two types of switching where either part of the data is sent to the public cloud (data switching) or a selected query is sent to the public cloud (query switching) based on the characteristics of the query. We model the operation of the ESM as the function of two binary switching functions. We show that our elastic switching mechanism with compression is capable of handling out-of-order events more efficiently compared to techniques which does not involve compression. We used two application benchmarks called EmailProcessor and a Social Networking Benchmark (SNB2016) to conduct multiple experiments to evaluate the effectiveness of our approach. In a single query deployment with EmailProcessor benchmark we observed that our elastic switching mechanism provides 1.24 seconds average latency improvement per processed event which is 16.70% improvement compared to private cloud only deployment. When presented the option of scaling EmailProcessor with four public cloud VMs ESM further reduced the average latency by 37.55% compared to the single public cloud VM. In a multi-query deployment with both EmailProcessor and SNB2016 we obtained a reduction of average latency of both the queries by 39.61 seconds which is a decrease of 7% of overall latency. These performance figures indicate that our elastic switching mechanism with compressed data streams can effectively reduce the average elapsed time of stream processing happening in private/public clouds.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030227" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53c7a9f88a20ab31841f6250de91d5fc2b50a757", "sources": [ "DBLP" ], "title": "Latency Aware Elastic Switching-based Stream Processing Over Compressed Data Streams", "venue": "ICPE", "year": 2017 }, "53dd91c3ce45b3d4e58142666bb3896a6bb044e5": { "authors": [ { "ids": [ "8182449" ], "name": "Shichang Xu" }, { "ids": [ "35049639" ], "name": "Subhabrata Sen" }, { "ids": [ "3895596" ], "name": "Zhuoqing Morley Mao" }, { "ids": [ "12488214" ], "name": "Yunhan Jia" } ], "doi": "10.1145/3131365.3131386", "doiUrl": "https://doi.org/10.1145/3131365.3131386", "entities": [ "Best practice", "Holism", "Streaming media", "Versant Object Database", "Video over cellular" ], "id": "53dd91c3ce45b3d4e58142666bb3896a6bb044e5", "inCitations": [], "journalName": "", "journalPages": "220-234", "journalVolume": "", "outCitations": [ "475323cde4293723ac53fc3a8a3749bb82432268", "04a23d7dbd3d1ebbaa974c40181c6df2843afc28", "3986cbe772d714d35630f87d3c1fc31cda58f38a", "1782029a2ccd9788971ad3813a10cae3fc652ebb", "24b0b1ddc1a402e89804f612b66e1bbac8e3bf58", "1aebb2454d8585c2d04af0ee3df9112368a50c1d", "19025f8942e3fa76e2158cb3a76997a5116f0303", "2899a969628f945fce217df60c99a0521c53e41e", "480a0e6452e480e358767c15c1dcbe02ddb4bc22", "0ad4e891484031164b0f96f36874856d0ba5d532", "94656ce91de9c4e153c219fd144758869fe42e72", "48289b7d57e43bb5a001c334cf96694e933f7001", "73e5cc87f4d7487c3ba58ec2e55ea52ea1025b0a", "09088515673d1d44919bd654c9829c7ae7822170", "32a7818ee01bea31068a0076060c75e88283a16a", "34810d74692893e905f908e059b95ecbdc913b25", "0af8dc481a9130e443f512e5db14d4ceda7bd3b3", "5bf660501e1c3fbc933ac490eef07275e328fe3f", "64ae76cca37d8d5d11607541601e4722290cf39e", "2b4373169bab1114d5bde52c269d392b74225fb9", "d61b9b499c7e371edf7f8bb45fe7934e7d60ba2d", "29475639b37f6c556532a072702d518ac9f717cd", "56893647902b4ab971fd092ce78687675b6942a7", "650759045a1a28a977f42b219fcbe12394c296f5", "6f226a1578ae646ca6fc414a08b399568ebec0b4", "a05d9c23221d356087ee4cee1e9449a4bac03013", "06d87357594fe5514454b72b288ece6c63cec9d2", "ddfd6f905f0fbcb6d1cb6559f87050257b46072b", "2590fa90ebaf085ec59e62b5a9fca2ac53a36e08", "2302e796c9b16c0fa94e89ee8b4f34f9d4812b94", "3017bb41f18096e34eea94329834f6f8b9372be8", "546c0cfed69f188a0ca661c8db9b099f554a63d1", "1a8af4e1a735f98aae6813887fdef223e19561e1", "63b8f3f94c217aa486ecbfb78a0fb7270f226179", "dae2c6c6cbc61102b6f49346981eb9998577f611", "18aa3279983b840a943da590e9f57b20c36130c0" ], "paperAbstract": "HTTP Adaptive Streaming (HAS) has emerged as the predominant technique for transmitting video over cellular for most content providers today. While mobile video streaming is extremely popular, delivering good streaming experience over cellular networks is technically very challenging, and involves complex interacting factors. We conduct a detailed measurement study of a wide cross-section of popular streaming video-on-demand (VOD) services to develop a holistic understanding of these services' design and performance. We identify performance issues and develop effective practical best practice solutions to mitigate these challenges. By extending the understanding of how different, potentially interacting components of service design impact performance, our findings can help developers build streaming services with better performance.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final111.pdf", "http://doi.acm.org/10.1145/3131365.3131386" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53dd91c3ce45b3d4e58142666bb3896a6bb044e5", "sources": [ "DBLP" ], "title": "Dissecting VOD services for cellular: performance, root causes and best practices", "venue": "IMC", "year": 2017 }, "53f7a3697e3e5c620f5413b77e86488d7bf089a9": { "authors": [ { "ids": [ "1789056" ], "name": "Daniel S. Roche" }, { "ids": [ "3076315" ], "name": "Adam J. Aviv" }, { "ids": [ "1702836" ], "name": "Seung Geol Choi" }, { "ids": [ "2531090" ], "name": "Travis Mayberry" } ], "doi": "10.1145/3133956.3134051", "doiUrl": "https://doi.org/10.1145/3133956.3134051", "entities": [ "Baseline (configuration management)", "Cloud storage", "Dm-crypt", "Encryption", "Experiment", "Local variable", "Oblivious ram", "Random-access memory", "Speedup", "User space" ], "id": "53f7a3697e3e5c620f5413b77e86488d7bf089a9", "inCitations": [ "039fcf8f14869850109b49e4e03326e9105afc64" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "586", "journalVolume": "2017", "outCitations": [ "8f8de213b1318e0ef0914008010e87ab64ab94ff", "2065450d96aca38c79cad5172b58660765533650", "053f06b15e59aaec4cbb4ae56694590f0206ed12", "a9f2812e8d3655777a7443331361369560b9e1a5", "cbf908e366aaefc57c6de9d608232a439b7e99c9", "127adf86474103b6f05afcc5bceda45bb5e34a8a", "60ab15b10d8b0bbc46cd675bff47498a1ab8c15c", "b820afefe23d2351e41a22c9f1495f72988427c9", "1f13dfc43e4e68649ebbaa59f093c8f0f0975ee2", "04e7c279018bf688eefb7154c4a6dd8860197d3d", "024e9ee7a8220451f03da763307f2c49dee321bd", "57c672e8b0bd70233ebc96c10905a5ff2f2b75b3", "08edbbc346b63098aafa9b052c1eedec34497721", "1e6dda18549f74155ca3bc6d144f439108aa5474", "8a41c198449d0f30de5427fe753c6b10bbb7255d", "b4b26e52580d7eeb0ccbc8e5529e34a831bc4e65", "475b10209d1ed13b079d62aca57ec31da4284bcd", "1c36af6daabcf5558c44656d567f8bd645aee29d", "1526d412d7bdb83dcafadd1c28cf8b4c7e4f130d", "464c1bcbebea396a9f23b70411383fe43477bbb8", "16b5bce9ca7a24553a065f523d5843eb3b25f896", "20b63210954f7c5a70664f301dcd7196856ccfa7", "00ecd7b2e0c364ce4e9f5416ee1dbeaeabe87a62", "09598c6fa85bb64b22816cfaef54e682cb3f3a6a", "499b3d7afb464ff18a9e0aeb77f8ab507b1fa3c7", "2813c3351838e036f52bcaa94eb7203bf4d9e7d3", "37d719cd1dd3ffc4530e9440ebbc3b80479fd52f", "3ae6e3f385f075c2b7b6958122c1e30fb1b54b0e", "0f55d7414510403d67d559717086427795f226f3", "443b4d6800a959c306faae6bb4426110bc49f7cf", "2905a5c4da8c9a0970f078a211742316ef0ab77d", "92eaba06af12761b5c64b84e6028d21cd05af9dd" ], "paperAbstract": "Write-Only Oblivious RAM (WoORAM) protocols provide privacy by encrypting the contents of data and also hiding the pattern of write operations over that data. WoORAMs provide better privacy than plain encryption and better performance than more general ORAM schemes (which hide both writing and reading access patterns), and the write-oblivious setting has been applied to important applications of cloud storage synchronization and encrypted hidden volumes. In this paper, we introduce an entirely new technique for Write-Only ORAM, called DetWoORAM. Unlike previous solutions, DetWoORAM uses a deterministic, sequential writing pattern without the need for any \"stashing\" of blocks in local state when writes fail. Our protocol, while conceptually simple, provides substantial improvement over prior solutions, both asymptotically and experimentally. In particular, under typical settings the DetWoORAM writes only 2 blocks (sequentially) to backend memory for each block written to the device, which is optimal. We have implemented our solution using the BUSE (block device in user-space) module and tested DetWoORAM against both an encryption only baseline of dm-crypt and prior, randomized WoORAM solutions, measuring only a 3x-14x slowdown compared to an encryption-only baseline and around 6x-19x speedup compared to prior work.", "pdfUrls": [ "http://eprint.iacr.org/2017/586", "http://arxiv.org/abs/1706.03827", "https://acmccs.github.io/papers/p507-rocheA.pdf", "https://arxiv.org/pdf/1706.03827v2.pdf", "http://doi.acm.org/10.1145/3133956.3134051", "https://arxiv.org/pdf/1706.03827v1.pdf", "https://eprint.iacr.org/2017/586.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53f7a3697e3e5c620f5413b77e86488d7bf089a9", "sources": [ "DBLP" ], "title": "Deterministic, Stash-Free Write-Only ORAM", "venue": "CCS", "year": 2017 }, "53fe05be951fb9404bf844e089f04aa328d1784a": { "authors": [ { "ids": [ "19232880" ], "name": "Keishla D. Ortiz-Lopez" }, { "ids": [ "1717914" ], "name": "Jennifer L. Welch" } ], "doi": "10.1109/IPDPS.2017.14", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.14", "entities": [ "Algorithm", "Asynchronous system", "Central processing unit", "Communications protocol", "Data deduplication", "Message authentication code" ], "id": "53fe05be951fb9404bf844e089f04aa328d1784a", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "327-336", "journalVolume": "", "outCitations": [ "c99c2423fece8b5c20a78f8b59d3ce88ca4d9bc4", "1233c76ac4a09402e88841f31b6ae0eaaf76f615", "b3127883529ac69477b684d520a817e85aced533", "4c6906ea491a47bd50a18afad6c49797cba904f5", "2c93d82fb103f9a82dd3898bcccab07ce2a5a78c", "85920e3c65146d0d250ae247ca672c6393a41488", "1bb5295a79dc329fd271b5f2cf67509fc9ea3f93", "2449f426a3ce215f9c089d7851e53e6a0169a205", "54dc2b248271142c47b264b7a83e01523e0f30e4", "4a7be5fe08d86fab74160b3ea9ee359f34b7c5d7", "d6c68279828d74b5e58d7595a3f25d91e8729643", "6e0ea0553929d2399a75efe392d6176b98cb6049", "1e63b8495e6ee76dfaf3d86b7de1badd5a05804f", "81f3b90935a63baa419af55e95821e7444787007", "d9bb9caec3563bc9a9b077a238087e68cba1319c", "a90ef843d5e0faae4d686c1261eab5bcb547e36d" ], "paperAbstract": "In the reliable message transmission problem (RMTP) processors communicate by exchanging messages, but the channel that connects two processors is subject to message loss, duplication, and reordering. Previous work focused on proposing protocols in asynchronous systems, where message size is finite and sequence numbers are bounded. However, if the channel can duplicate messages-but not lose them-and arbitrarily reorder the messages, the problem is unsolvable. We consider a strengthening of the asynchronous model in which reordering of messages is bounded. In this model, we develop an efficient protocol to solve the RMTP when messages may be duplicated but not lost. This result is in contrast to the impossibility of such an algorithm when reordering is unbounded. Our protocol has the pleasing property that no messages need to be sent from the receiver to the sender and it works when message loss is allowed with some minimal modifications.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/53fe05be951fb9404bf844e089f04aa328d1784a", "sources": [ "DBLP" ], "title": "Bounded Reordering Allows Efficient Reliable Message Transmission", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "5401219fbb74beb5bf69ec4a709bb81dabdd5b90": { "authors": [ { "ids": [ "26341484" ], "name": "Hyeongwon Jang" }, { "ids": [ "26323645" ], "name": "Sang Youp Rhee" }, { "ids": [ "6337598" ], "name": "Jae Eun Kim" }, { "ids": [ "2393820" ], "name": "Sooyong Kang" }, { "ids": [ "1747885" ], "name": "Hyuck Han" }, { "ids": [ "1692265" ], "name": "Hyungsoo Jung" } ], "doi": "10.1109/CLUSTER.2017.46", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.46", "entities": [ "Clustered file system", "Durability (database systems)", "Input/output", "Persistent memory", "Resource contention" ], "id": "5401219fbb74beb5bf69ec4a709bb81dabdd5b90", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "228-232", "journalVolume": "", "outCitations": [ "33fe7b4284339ac4d49e0af613964887cc65ad01", "bc432fd1491c352413f635b4dc949f4e62f5ce53", "200995c610872b10bed6044e698611f2002c597d", "8daf96353cfda4b0d3c5da18eee41e5d0db85172", "ef47742e72bd64fb1ae5359cd6d5dd6dfad34dc8", "13c27125584651329f66461981cbb20fa63e9023", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "41675accef4bd8182dbbd9e5abd8f78caeb12d96", "e9d689997b2232d030a6452c156d3da51c7539c0", "180a5f043db6e58b010a8f8cb492947d46625ed7", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d" ], "paperAbstract": "As hardware vendors provision more cores and faster storage devices, attaining fast data durability for concurrent file writes is demanding to high-performance storage systems in cluster systems. We approach the challenge by proposing a system that uses a small amount of fast persistent memory for buffering concurrent file writes while preserving data durability. The main issue in designing a durable file buffer is allowing concurrent file writes to store data in a shared and limited space of persistent memory without incurring lock or resource contention. This paper addresses such issue and presents AUTOBAHN, a durable file buffer that expedites file I/O operations.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5401219fbb74beb5bf69ec4a709bb81dabdd5b90", "sources": [ "DBLP" ], "title": "AUTOBAHN: Accelerating Concurrent, Durable File I/O via a Non-volatile Buffer", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "5428b6a7197befb61f60360806ca67b687de54dd": { "authors": [ { "ids": [ "3083814" ], "name": "Jayashree Mohan" }, { "ids": [ "20426378" ], "name": "Dhathri Purohith" }, { "ids": [ "3069452" ], "name": "Matthew Halpern" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" }, { "ids": [ "1805668" ], "name": "Vijay Janapa Reddi" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Experiment", "Mobile device", "SQLite", "Smartphone" ], "id": "5428b6a7197befb61f60360806ca67b687de54dd", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "523a001fb647cd53e2572d221b133b76a5e614d0", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf", "574e45c268daaa0d186dbf92b4e5bce8276d60ed", "2ea6e3243c9aa5d9910cf44c4f0e18002bf01638", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "010bf8e639dbdee2c31a58ca9b65e89aeac11315", "8bccd7a89847ce3db5957d0544c7357f46b69642", "086699da0528ed47463cea3108851bd3dc5ba715", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "6098d8a58d19d22aeeee5a78e82ba43cb5a27897", "41e5b42b7e6c6bb5468d3aaae44279156b135fbf", "04911ac04ac0ce2adc1deee8f1a69d99caf1b493", "05659a4a6a6b73e0c18134719965c4981c7ed24c", "5d01bdfd8e5651df4124b6852ffd05364cd7526e", "0b369ac8bd9e0c618e4ea3568ebaa944f460c454", "567de8e6da78a16132fa75c91f8199e44867159b", "0495641c590874be9e09c3743d0d15c536cd3f4e", "75929c53d9876b1f4e85723cb1aa1395673d0347", "061abdde4c6a7274b7070591ec0fa1bdce95efd9", "8a07a9dfcacc623b47fe32be79bf892b2526ea11" ], "paperAbstract": "Energy consumption is a key concern for mobile devices. Prior research has focused on the screen and the network as the major sources of energy consumption. Through carefully designed measurement-based experiments, we show that for certain storage-intensive workloads, the storage subsystem on an Android smartphone consumes a significant amount of energy (36%), on par with screen energy consumption. We analyze the energy consumption of different storage primitives, such as sequential and random writes, on two popular mobile file systems, ext4 and F2FS. In addition, since most Android applications use SQLite for storage, we analyze the energy consumption of different SQLite operations. We present several interesting results from our analysis: for example, random writes consume 15\u00d7 higher energy than sequential writes, and that F2FS consumes half the energy as ext4 for most workloads. We believe our results contribute useful design guidelines for the developers of energy-efficient mobile file systems.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage_slides_purohith.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-mohan.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/mohan" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5428/b6a7197befb61f60360806ca67b687de54dd.pdf", "s2Url": "https://semanticscholar.org/paper/5428b6a7197befb61f60360806ca67b687de54dd", "sources": [ "DBLP" ], "title": "Storage on Your SmartPhone Uses More Energy Than You Think", "venue": "HotStorage", "year": 2017 }, "54314bb74466ef8eac165b622a71e670fef14eb1": { "authors": [ { "ids": [ "1964738" ], "name": "Yan Zhai" }, { "ids": [ "39167624" ], "name": "Qiang Cao" }, { "ids": [ "1767703" ], "name": "Jeffrey S. Chase" }, { "ids": [ "9833675" ], "name": "Michael M. Swift" } ], "doi": "", "doiUrl": "", "entities": [ "Access control", "Authentication", "Cloud computing", "Data mining", "FUJITSU Cloud IaaS Trusted Public S5" ], "id": "54314bb74466ef8eac165b622a71e670fef14eb1", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "11e7e02278725d09d7c6dd67482249453ad0e58e", "17886b4911ffd50d7e02a574caad34a286458b3a", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "0e316f76dac185ee2d922e64d4659b2e36842196", "066add40724f1022011ef4e17a39c7d66c88397c", "0db9636ace0830b8b5e86b031a7a86d621446bd9", "9a53abcd90ce847ba776bc933b19f77e698b020a", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "dbfd312448babe69654697020c8e1a5a3c5f4b29", "0e148769e51b9a4d2870b8cdbe93ebb70ae64a63", "0a289fd7b14345822b1acda6d82750b15d59663e", "177ba72da171d8c741a08c75162d820c501a4f4c", "0a9c1bbbe831ee15379f776a7ed8da7319ec4f06", "5699f794183a8c2440116a29a9b3502038c1f829", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "36222f8eb2ccf21ca345e15186cea64506581543" ], "paperAbstract": "One way to establish trust in a service is to know what code it is running. However, verified code identity is currently not possible for programs launched on a cloud by another party. We propose an approach to integrate support for code attestation\u2014authenticated statements of code identity\u2014into layered cloud platforms and services. To illustrate, this paper describes TapCon, an attesting container manager that provides source-based attestation and network-based authentication for containers on a trusted cloud platform incorporating new features for code attestation. TapCon allows a third party to verify that an attested container is running specific code bound securely to an identified source repository. We also show how to use attested code identity as a basis for access control. This structure enables new use cases such as joint data mining, in which two data owners agree on a safe analytics program that protects the privacy of their inputs, and then ensure that only the designated program can access their data.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-zhai.pdf", "https://users.cs.duke.edu/~qiangcao/publications/tapcon.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/zhai" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/86dd/73f59939ac2eb0f2b5bb959d27cf35cba95f.pdf", "s2Url": "https://semanticscholar.org/paper/54314bb74466ef8eac165b622a71e670fef14eb1", "sources": [ "DBLP" ], "title": "TapCon: Practical Third-Party Attestation for the Cloud", "venue": "HotCloud", "year": 2017 }, "5451e8f4e616600f6061a95b435d3d3b75f4c69a": { "authors": [ { "ids": [ "2488489" ], "name": "Animesh Trivedi" }, { "ids": [ "33372120" ], "name": "Nikolas Ioannou" }, { "ids": [ "2089988" ], "name": "Bernard Metzler" }, { "ids": [ "2840980" ], "name": "Patrick Stuedi" }, { "ids": [ "3205436" ], "name": "Jonas Pfefferle" }, { "ids": [ "1714355" ], "name": "Ioannis Koltsidas" }, { "ids": [ "7975384" ], "name": "Kornilios Kourtis" }, { "ids": [ "1735078" ], "name": "Thomas R. Gross" } ], "doi": "10.1145/3078468.3078477", "doiUrl": "https://doi.org/10.1145/3078468.3078477", "entities": [ "Application programming interface", "Data rate units", "Data store", "End-to-end principle", "Flash memory", "Flash memory controller", "Holism", "Key-value database", "Kinetic Void", "Protocol stack", "Remote direct memory access" ], "id": "5451e8f4e616600f6061a95b435d3d3b75f4c69a", "inCitations": [ "1d08d231ec66645ec56d2210c1a7c6b44c6ff041" ], "journalName": "", "journalPages": "15:1-15:14", "journalVolume": "", "outCitations": [ "93008eb5924b63846bcb1c93a96d451068a2351c", "7fe1907e9ecfd87119ba51a035c18dded3a1575c", "10fede77f843e9eb5ef1768a17543013616d9243", "1a0af07c26d30548f2bd40c769f3961547a78179", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "29a1148d75878671dc3663bf480e33d7bd91597d", "8250447bdd67beac6225ac4f3ea89367cc50f3e4", "0e5c646909bb762da0cd325e084655c12445578f", "7129b305ce45f83127e928e8510da9fae0783905", "40f04909aaa24b09569863aa71e76fe3d284cdb0", "5c0e86f286972d34036da95b9c8d80581a985819", "09fca67472a49f94c54fdd3f652ad586d5ab361b", "ca9044b491fec792cac4977915a59928689a690c", "088ab821a427bf57b796115365d3825923882ccb", "77a3133097ff59bae0b6ac8fae418a58b585dacb", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "9e152eac71577b7ee9175d3e68fd76963170eebe", "701c1b756c427f01c56e76bb250c8bbb2d4b7720", "b6e74306023f8b9890714d58c781ce9f5c1d3897", "0b877aed79939b2ba81b6dc58ce8544c6b532bcb", "d613171f0c26751c65a977ecd7a7493e819af887", "7799d6ee9ce9f262d32927cf9feb209bbcb75921", "a381880cc65109cfc3f8ba3dfc3deccf875df442", "225603198cc415d363db8a8a2bd30b0df3c963b1", "3fc93257ac94aa8d6505c19077058e68622345b6", "102bf68c4227380dab0567d8f17fb720d3421525", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "3c4ae51452823afafabe8d33d51218d1d95c2795", "37331e5ecd502b26559fe08aeee2f657cc9bcfa8", "38a9120f780602521af9744e31d80ef5cd9593a7", "5f948207acb92e6f4e09aa5f5a2cf7cdf2d80ba5", "a35295a26ded98e5649a94a8ad03baa8cc8d9dd3", "205cf007cf77bbf81e55b74635017087585f7b7c", "69258ba9b1ace027daa767192698c84bf49b9fb6", "23ed23b8226e427835e87a7803d4ee720cdbea66", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "2f38afd1b2894e6968116a3de9c86dc97a00ee55", "131e1e1d163a0f49881d7b5ac092892093391015", "03eb427813552b2165e5250105e55dbfb7ef151e", "8d7ab91362fa1319d696a0dc538ca881352bda76", "a6069e65c318f07d2b35934b0d4109148f190342", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "2b310dece4ec01037c0f10eb91f0edb589b7f0cb", "ce55e9e292cb20c90ec65d16e181f66f19898692", "57d2df84a585f96ddc874898977cfe2fbe02a68f", "af88c8a6740ccd8e5a4477329ab23cf717bfd9a6", "1f4fac99af2d8a6d9471eb3cad7b5ae0365c0933", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "14c9c04973ca9bd1cecf0892a9b90a54aa930098", "12db88fcf7cfee093c64e4e7737458e694a38181", "48b4d17fff536ec396f5d150d2aee0aac61d70a0", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "3767bfac1eb78148bb61aca159f45dd4cdb588ff", "daf0cd0076b388712ea12ec4105572997fc50cdf", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "f4906a6f4be3e117c32f35fbf5e2f9de92cc4cd5", "cbf02684c23380fb61b8a9ba0be1bb3373aa4931", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "108ffa868b6dc5e8b4987342c90c79c8ccf841c2", "48c2af3d559fb2c7ef5e71efd24ab5ae217c1fee", "1cc9ebeab21d668c8fb197a2498380e95c6a65fb", "805b7d313543543a0a45b9647db3469d1be3f167", "048a09d7c8713dc2533c1e31ac3f224868293461", "def29d202e537d026b8d3ed91655b540ef86cceb", "0d09ff9866eae4c0da7d7585b02c32f241a7e994", "5839aa45b7ea7834557fe8d69ba943f83f9740e3", "35c2f7e0454adc0130c4279fce84a31701cebc67" ], "paperAbstract": "During the past decade, network and storage devices have undergone rapid performance improvements, delivering ultra-low latency and several Gbps of bandwidth. Nevertheless, current network and storage stacks fail to deliver this hardware performance to the applications, often due to the loss of IO efficiency from stalled CPU performance. While many efforts attempt to address this issue solely on either the network or the storage stack, achieving high-performance for networked-storage applications requires a holistic approach that considers both.\n In this paper, we present FlashNet, a software IO stack that unifies high-performance network properties with flash storage access and management. FlashNet builds on RDMA principles and abstractions to provide a direct, asynchronous, end-to-end data path between a client and remote flash storage. The key insight behind FlashNet is to co-design the stack's components (an RDMA controller, a flash controller, and a file system) to enable cross-stack optimizations and maximize IO efficiency. In micro-benchmarks, FlashNet improves 4kB network IOPS by 38.6% to 1.22M, decreases access latency by 43.5% to 50.4 µsecs, and prolongs the flash lifetime by 1.6--5.9× for writes. We illustrate the capabilities of FlashNet by building a Key-Value store, and porting a distributed data store that uses RDMA on it. The use of FlashNet's RDMA API improves the performance of KV store by 2×, and requires minimum changes for the ported data store to access remote flash devices.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078477", "https://www.systor.org/2017/slides/FlashNet.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5451e8f4e616600f6061a95b435d3d3b75f4c69a", "sources": [ "DBLP" ], "title": "FlashNet: flash/network stack co-design", "venue": "SYSTOR", "year": 2017 }, "547389ee6f98be12c747fa6fa8f9538ac89f5345": { "authors": [ { "ids": [ "1720735" ], "name": "Chao Zhang" }, { "ids": [ "3355833" ], "name": "Liyuan Liu" }, { "ids": [ "27365673" ], "name": "Dongming Lei" }, { "ids": [ "34284012" ], "name": "Quan Yuan" }, { "ids": [ "39371343" ], "name": "Honglei Zhuang" }, { "ids": [ "3168604" ], "name": "Tim Hanratty" }, { "ids": [ "1722175" ], "name": "Jiawei Han" } ], "doi": "10.1145/3097983.3098027", "doiUrl": "https://doi.org/10.1145/3097983.3098027", "entities": [ "Cluster analysis", "Crowdsourcing", "Mixture model", "Multimodal interaction", "Online shopping", "Step detection", "Test set" ], "id": "547389ee6f98be12c747fa6fa8f9538ac89f5345", "inCitations": [ "4689e63bc83e8a1926af50bc98519823f6cc6252", "3f6b2e2189dcffab627cea4fb84f1a990eeaa767", "03f6304cd3a914ea33bcb24acb9e40030a1ab821", "db5f682f80fbd8f65ff84374e21686d502231af0", "2d851f1f5e34e915b4b7e91add282238098ee2da", "afd74c7dfd99fafc41c83670bc4529b51912a675", "873bb1d992e55afca552e27d9c58afd329220c7f" ], "journalName": "", "journalPages": "595-604", "journalVolume": "", "outCitations": [ "129face5f40d05de412e5ccabba726129f4020fc", "4b46d07decbfdf42381fd58c78cf1d4cebacabf6", "04034c4e773160209114c6f95d3f1e9f4aa7ee92", "1b3675fc0f2b16743b1e1f0c2f84829cfdb3d34f", "0ae76a541ff54e7e1007e14284e8e3f9c9a99935", "ecc13be3e07f8f1c74c5212d576374abbb355f38", "0ee6f08cd730930a352fe55c0f34e54d1a536c1d", "2910b8702ce197a683b17764e893155b1373f919", "163358355f85c3da8960b843606e372ded0f2cff", "a47d80a21e6ce24bb5b3f6e8b0c72680875904b1", "0d9131422ed0296212959acc2e5077213ebb3183", "88dcb174402f481137a45d6e6fcc6dfdd0a511b3", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "7992945a9b0b0c1718bc2b629a60d36bd8cca1e0", "01ff2b834772dfc2b8b7ba00620b65abb9444a75", "1f071f3e9127fcb0df176e560309bc780ea62f35", "0c7247c74f946590eca6221d5b9d906d0f4d22c3", "0a5fc63c9b951cdb3155d6edcf46ad031f7a76f2", "ecdecb0a4408d7c332f11f54a79b524e78b55232", "49b2a1b9606c0ccb95a36895760fc91b8b830266", "0ef189a41e1404a4f5eeddfee02c6ce27452ced3", "6ba401b333ed4ed97c78863929533acdecd1133c", "b584b35c98c9689f2f9ae3be83fce97200db5bb1", "21a0f88ba4c4481bb31f683376bbdc6c87986b02", "87f2101c46f191512a62f78c68a2560d35ce613b", "434013939dcb6bd1ddd6eccf404fe0646fda0251", "0a59af8e31918f78eae03d20e7f6481d074fc087", "7894683e9f0108245d43c3de91a3426e52e0d27f", "1e009f0b145193d127fd715598f385825dbae09d", "84b6f189513f0be9db02353a677e99506491f18a", "823262c42414bfaba9a0cea736e1c77c7cea7837", "6a44f2f7102f5e66c1c6d97529c8236614c26153", "39a93f7ea3e4a2fa0d46f045472f3acded81f094", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "3998b5a40342a37d8570ec8482498ff05db2ccf1", "1ecafb52f890448a9c46ac09cb36e97332dc11e8", "243e7d9e326f06103a703cf951999a97b2e5ec49", "3716a023c953e9a7dc867a46e5bce5a974e700d2", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "06584de7f82107ee61504fc7e5156c258c33f18f", "08e2b48012699908b2b4a1d799d452d02779bfdd", "0de8e6311773e3ad14f934460c001016a19daf76", "087ab67119b7caf129e93d8daa170a7c12a2a8f6", "06e01c82ca1a1e7b9a6b946162214d3eb2eeaae9", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "4144689dab8c64c772480a079a308cc1f5a7bb7e" ], "paperAbstract": "Detecting local events (e.g., protest, disaster) at their onsets is an important task for a wide spectrum of applications, ranging from disaster control to crime monitoring and place recommendation. Recent years have witnessed growing interest in leveraging geo-tagged tweet streams for online local event detection. Nevertheless, the accuracies of existing methods still remain unsatisfactory for building reliable local event detection systems. We propose TrioVecEvent, a method that leverages multimodal embeddings to achieve accurate online local event detection. The effectiveness of TrioVecEvent is underpinned by its two-step detection scheme. First, it ensures a high coverage of the underlying local events by dividing the tweets in the query window into coherent geo-topic clusters. To generate quality geo-topic clusters, we capture short-text semantics by learning multimodal embeddings of the location, time, and text, and then perform online clustering with a novel Bayesian mixture model. Second, TrioVecEvent considers the geo-topic clusters as candidate events and extracts a set of features for classifying the candidates. Leveraging the multimodal embeddings as background knowledge, we introduce discriminative features that can well characterize local events, which enables pinpointing true local events from the candidate pool with a small amount of training data. We have used crowdsourcing to evaluate TrioVecEvent, and found that it improves the performance of the state-of-the-art method by a large margin.", "pdfUrls": [ "http://hanj.cs.illinois.edu/pdf/kdd17_czhang.pdf", "http://doi.acm.org/10.1145/3097983.3098027" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/547389ee6f98be12c747fa6fa8f9538ac89f5345", "sources": [ "DBLP" ], "title": "TrioVecEvent: Embedding-Based Online Local Event Detection in Geo-Tagged Tweet Streams", "venue": "KDD", "year": 2017 }, "547c382f047acb1e2f2728fec936c89dbe74d545": { "authors": [ { "ids": [ "2333607" ], "name": "Lianjie Cao" }, { "ids": [ "36001316" ], "name": "Puneet Sharma" }, { "ids": [ "1680676" ], "name": "Sonia Fahmy" }, { "ids": [ "39095207" ], "name": "Vinay Saxena" } ], "doi": "", "doiUrl": "", "entities": [ "Artificial neural network", "Decision support system", "Intrusion detection system", "Network function virtualization", "Network model", "Proxy server", "Scalability", "Suricata" ], "id": "547c382f047acb1e2f2728fec936c89dbe74d545", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "07a0c17f21ccc862a52cec5963246cc9e0096d2d", "c1d6cec26c4f643cf985212b4ee36dc9c9217452", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "26016400563279a2b90c73d685de3e02d3d7e441", "1fa77f71d42f19fb0fc54570bbbbc460cf2989c3", "1ab7aa767e1779c87d822325859e47fe2986e6b2", "08e802a81653c9be29a35084633364b4b2021db2", "e25a07384dee2ce73e8426b7f3bff4a38eb7bf5b", "7f822adf127881926c2fab2401d6e3e381bd9c11", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "7260363c8b9a3e9d8f0b560c67cc49619bf06e56", "10cfd82431f861efad11fdfadb85d2220def7c88", "9e98d529d158e2230d722f497fbc36373eaa8583", "231ba17921ebd80e95771e28dfb5082e169d5a53", "5eb3ac3f7aa60a5b354c489ced10f42c8f381eb2", "5b33afa07e0c428724073de51200c8b8051825c1", "63a061c70da9ce645de1ad803a06f1595833befb", "21b8099f8b9b7044793daa848cc109aafe201fd7", "277f20ddc0e9fa593753ef2778110508372c597f", "2d31ca30ad19bf85c0339f66903adc238ef65515", "9289860d43896b2d174a136eb56f03bb1b05e8d9", "0e1f55c288d25d7b83a17b200f9dab5ab74d43f6" ], "paperAbstract": "Dynamic and elastic resource allocation to Virtual Network Functions (VNFs) in accordance with varying workloads is a must for realizing promised reductions in capital and operational expenses in Network Functions Virtualization (NFV). However, workload heterogeneity and complex relationships between resources allocated to a VNF and the resulting capacity makes elastic resource flexing a challenging task. We propose an NFV resource flexing system, ENVI, that uses a combination of VNF-level features and infrastructure-level features to construct a machine-learning-based decision engine for detecting resource flexing events. ENVI also extracts the dependence relationship among VNFs in deployed Service Function Chains (SFCs) to carefully plan the sequence of resource flexing steps upon scaling detection. We present preliminary results for the accuracy of ENVI\u2019s resource flexing decision engine with two different VNFs, namely, the caching proxy Squid and the intrusion detection system Suricata. Our preliminary results show that using a combination of features to train a neural network model is a promising approach for scaling detection.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/cao", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-cao.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/547c/382f047acb1e2f2728fec936c89dbe74d545.pdf", "s2Url": "https://semanticscholar.org/paper/547c382f047acb1e2f2728fec936c89dbe74d545", "sources": [ "DBLP" ], "title": "ENVI: Elastic resource flexing for Network function Virtualization", "venue": "HotCloud", "year": 2017 }, "547e07d6da931ba712656b865c3ddd8e606c7a44": { "authors": [ { "ids": [ "37612847" ], "name": "Zhenyu Song" }, { "ids": [ "3015446" ], "name": "Longfei Shangguan" }, { "ids": [ "1750056" ], "name": "Kyle Jamieson" } ], "doi": "10.1145/3123878.3132004", "doiUrl": "https://doi.org/10.1145/3123878.3132004", "entities": [ "Algorithm", "Baseline (configuration management)", "Java HotSpot Virtual Machine", "Spectral efficiency", "Throughput" ], "id": "547e07d6da931ba712656b865c3ddd8e606c7a44", "inCitations": [ "aeb55505866cf1f5310497a8a6f3736bd3cddecf", "cd69ae919f8d1087868427c90e9d363204be53dd", "2dff14d22b23cc5ab162bf7f9ab28b019d15ebc7", "c5779cdf1e5c645f8c81d324bd51dba252e079a8", "b126d650b684bac95317fd25916a3cd68ef04bd2" ], "journalName": "", "journalPages": "111-112", "journalVolume": "", "outCitations": [ "0de7786798e1a5681b51bd8084a88dfa48fa1fc7" ], "paperAbstract": "This paper presents the design and implementation of Wi-Fi Goes to Town, the first Wi-Fi based roadside hotspot network designed to operate at vehicular speeds with meter-sized picocells. Wi-Fi Goes to Town APs make delivery decisions to the vehicular clients they serve at millisecond-level granularities, exploiting path diversity in roadside networks. In order to accomplish this, we introduce new buffer management algorithms that allow participating APs to manage each others' queues, rapidly quenching each others' transmissions and flushing each others' queues. We furthermore integrate our fine-grained AP selection and queue management into 802.11's frame aggregation and block acknowledgement functions, making the system effective at modern 802.11 bit rates that need frame aggregation to maintain high spectral efficiency. We have implemented our system in an eight-AP network alongside a nearby road, and evaluate its performance with mobile clients moving at up to 35 mph. Depending on the clients' speed, Wi-Fi Goes to Town achieves a 2.4-4.7x TCP throughput improvement over a baseline fast handover protocol that captures the state of the art in Wi-Fi roaming, including the recent IEEE 802.11k and 802.11r standards.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098846", "http://doi.acm.org/10.1145/3123878.3132004", "http://paws.cs.princeton.edu/sites/default/files/WGTT-sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/547e07d6da931ba712656b865c3ddd8e606c7a44", "sources": [ "DBLP" ], "title": "Wi-Fi Goes to Town: Rapid Picocell Switching for Wireless Transit Networks", "venue": "SIGCOMM", "year": 2017 }, "54a8486eece5347cfd8f1fa4bd445c961981310b": { "authors": [ { "ids": [ "1745852" ], "name": "Thomas Bauerei\u00df" }, { "ids": [ "3225083" ], "name": "Armando Pesenti Gritti" }, { "ids": [ "1766791" ], "name": "Andrei Popescu" }, { "ids": [ "2342981" ], "name": "Franco Raimondi" } ], "doi": "10.1109/SP.2017.24", "doiUrl": "https://doi.org/10.1109/SP.2017.24", "entities": [ "Access control", "Automata theory", "Confidentiality", "Distributed computing", "Formal verification", "HOL (proof assistant)", "Information flow (information theory)", "Input/output", "Isabelle", "Multimedia framework", "Proof assistant", "Scala", "Social media" ], "id": "54a8486eece5347cfd8f1fa4bd445c961981310b", "inCitations": [ "e54b7d8ef93e08b55ca41f5d65812528b29b1c27", "29c07eefe253a4bc293a23ba1d02ff0d9101a443", "15b3f5027614363581cd42cd2bd411aebcc983f0" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "729-748", "journalVolume": "", "outCitations": [ "426ae085b8baac8e42e24ec97f50885c9bf44ac8", "c19c88795c1cae6901c39b9cee6785b133d85961", "72de8869137d5c8c0ae87d4f3101e7375f68850d", "7ed18988808474f776ddefb476b90bb3c2a2cdfa", "100b55262a5546743b9ae1f391ecd5468a81ef20", "17ab326243c6817deda6d2c55914bf194e5cd3d2", "54891c5d9866f09287372adec3ad9701935418ee", "1d5210e6490fdf8ecc6ce0135e2441d26ed43dda", "047fef73865624ed2ecab04636764c58ac89adf7", "5682f9cd82f04974a7e29cc157974b9b0c71b0ea", "8c46d17cc2e080780365ee66317da663617f0333", "0ce08422a673120033246cfd15f920be0eea058f", "1db0eded624e350616b36f6ab284c9b2093a0de5", "a39046724fc95f3e5c62850c77c3df687051f6c7", "2a286b65158f0fea302569d6b51cf0de0f933cb1", "02f68e05e5e470b55affe068c81bdbb7d81c252f", "a5fdce271d22bf940e50eb55be5a62cae016c01d", "c0fbf5ef9e808805f79b4f352047b431cbb45696", "03ad81f6276792a78312471429fc9495b89a1ffc", "06eea8801458b89676f84e407b24d6720907451d", "59261f7ccf03b580ed39f96b8928bc965c24d520", "0a77b8fd3bc19c226d98bb56162d82141ca03475", "49c59164962b847d9f35bc506d92d92f7a4f0ae7", "2dff0f21a23f9e3b6e0c50ce3fec75de4ff00359", "07f627e080722b1b314baa79441aa5f8914fb030", "0025870ef15a8f2858ff4186329d4bde316e9e01", "349842108aa31fdebcd01b58924ade3f125d3c6f", "63a9331649d18280f0b08e0b1e464ca9ef0017d1", "0aefd7662e682cd51f4b8f4ab85096fa3f14c8e3", "05ff5ece05e7680fd016beb3eb128f1e09979023", "386fb0bb5f99ccdf96409e582f7d490b7a868529", "12b3dd4eb02864a256d9c1b9e2ab8d79616d4df3", "a8280939a9d4eb0e5a9ff3b8c7545846613bbdc4", "5693c2a2c52f4905638559b2fc2b76c975806175", "11a68b5de90fc3f0b56f1acdfe688b91eff1b1ba", "4e7334db18da606f0ddb85caab476a026337aa1e", "3201bf85bee9995aafa569c47669db463551e6cc", "af0482f181cf4e5a797b0fb73a5a99498e470f3d", "014b976046f9b2c5b0f9ddaf830fb76ed65de5e4", "78a7407ec9670e87550ebaba94840b925c00bbe9", "d1c89e416674486df3961bede1bc542b996c3710", "1909d80df329a636fd91990e5c7f33a4f5ef482b", "907af07045be0792315518b60510595ea58643e6", "2ed62c0c80a8448dcb46afca4f53367126f9796e", "1d081dbf3e9afebafac90fdeed4bfa788012142f", "7a96671dbae659462f3a4f3183bb798c89934fa7", "835686801b52223a786193007a1f2c5aec8056cd", "94f8458d6317f5d59551a128311c3863f1d988a0", "62da30c995a9d83f0551ac5a535f916444628d60", "40c47420fdda6b715430153437ac77d62d1da6d8", "0d5664d2339b8d3bb89806ccea2c51fcd4d8290b", "adbf25dc1a14bb1f420b09633be1b88865a00051", "06a6ced6d2c37571ed8ec956d9b99c22f908ead6" ], "paperAbstract": "We present the design, implementation and information flow verification of CoSMeDis, a distributed social media platform. The system consists of an arbitrary number of communicating nodes, deployable at different locations over the Internet. Its registered users can post content and establish intra-node and inter-node friendships, used to regulate access control over the posts. The system's kernel has been verified in the proof assistant Isabelle/HOL and automatically extracted as Scala code. We formalized a framework for composing a class of information flow security guarantees in a distributed system, applicable to input/output automata. We instantiated this framework to confidentiality properties for CoSMeDis's sources of information: posts, friendship requests, and friendship status.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/54a8486eece5347cfd8f1fa4bd445c961981310b", "sources": [ "DBLP" ], "title": "CoSMeDis: A Distributed Social Media Platform with Formally Verified Confidentiality Guarantees", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "54aea761600684eea98b8d38c1ce972ba1f38888": { "authors": [ { "ids": [ "9187969" ], "name": "Alexander Krause" }, { "ids": [ "2162217" ], "name": "Thomas Kissinger" }, { "ids": [ "1694689" ], "name": "Dirk Habich" }, { "ids": [ "3320376" ], "name": "Hannes Voigt" }, { "ids": [ "7337091" ], "name": "Wolfgang Lehner" } ], "doi": "10.1007/978-3-319-64203-1_11", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_11", "entities": [ "Algorithm", "Concurrency (computer science)", "Graph (abstract data type)", "Graph partition", "In-memory database", "Locality of reference", "Multiprocessing", "Non-uniform memory access", "Optimal control", "Pattern matching", "Scalability", "Uniform memory access" ], "id": "54aea761600684eea98b8d38c1ce972ba1f38888", "inCitations": [ "9ec258cd336cd8db880a69eab885b7f8f73a4303" ], "journalName": "", "journalPages": "149-163", "journalVolume": "", "outCitations": [ "148edd9ac0ed0485f14f470949f64a9d92cbbc10", "0f34ea8535dc5833a1a3692ffc7abc6740d2406a", "199006c69026ed2dde8a25e676b67bdcb76f171b", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "ff71759a3efa271670c1e7820873df872b4ca3b9", "a9ecf2ae6b009e80ad5b4a9f81b116c2ee1face7", "0cfe2a442b752925c6511521c709c7c214b6e10e", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "5b6a93efeeb646ab77483a301313b71eb2f45c8c", "34a325ee1fbcc7c3c5ffd0c379422f5f7065b9c7", "5b660f6fb6b1277a5c8a311a7e688234cde909d9", "11620e0e8d0224d4401439a1c6774f5bd750b847", "3486aeaf540c48952120fe853d672af984f40a6a", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "ac90920fee742959cfd58b8b1c3b956694940ee5", "4c2988cc02773d77be8ad800e62eca6708ff5675", "b071b7803f5d3fbe4f2fd7212b85a976f894df5c", "6efa88ccc5fff02bbb8736284b9891cdfda3b35f", "ec02fc78f7ec0d9cbef609f4386143eaa84d4ae5" ], "paperAbstract": "Pattern matching on large graphs is the foundation for a variety of application domains. The continuously increasing size of the underlying graphs requires highly parallel in-memory graph processing engines that need to consider non-uniform memory access (NUMA) and concurrency issues to scale up on modern multiprocessor systems. To tackle these aspects, a fine-grained graph partitioning becomes increasingly important. Hence, we present a classification of graph partitioning strategies and evaluate representative algorithms on medium and large-scale NUMA systems in this paper. As a scalable pattern matching processing infrastructure, we leverage a data-oriented architecture that preserves data locality and minimizes concurrency-related bottlenecks on NUMA systems. Our in-depth evaluation reveals that the optimal partitioning strategy depends on a variety of factors and consequently, we derive a set of indicators for selecting the optimal partitioning strategy suitable for a given graph and workload.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_11", "https://iccl.inf.tu-dresden.de/w/images/0/0d/Europar-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/54ae/a761600684eea98b8d38c1ce972ba1f38888.pdf", "s2Url": "https://semanticscholar.org/paper/54aea761600684eea98b8d38c1ce972ba1f38888", "sources": [ "DBLP" ], "title": "Partitioning Strategy Selection for In-Memory Graph Pattern Matching on Multiprocessor Systems", "venue": "Euro-Par", "year": 2017 }, "54be4148c4ebb985505664516ca0004718086c0a": { "authors": [ { "ids": [ "3170313" ], "name": "Pramod Subramanyan" }, { "ids": [ "40436310" ], "name": "Rohit Sinha" }, { "ids": [ "3087734" ], "name": "Ilia A. Lebedev" }, { "ids": [ "1695217" ], "name": "Srinivas Devadas" }, { "ids": [ "1775517" ], "name": "Sanjit A. Seshia" } ], "doi": "10.1145/3133956.3134098", "doiUrl": "https://doi.org/10.1145/3133956.3134098", "entities": [ "Adversary (cryptography)", "Adversary model", "Confidentiality", "Hardware restriction", "Secure channel" ], "id": "54be4148c4ebb985505664516ca0004718086c0a", "inCitations": [ "6db9824d4667b22310c51fe638403238f873e9f2" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "565", "journalVolume": "2017", "outCitations": [ "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "07ee23b1fc57ba55bed31905b86b069c4346d33e", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "18fb0f29720e46c455578e6b0b3953a4eb3f1614", "77a1532cb64eab28162a0277cde52b4b7eceda49", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "0c10529346c4d2d5d4462636a0b3a0dd9fb8d25c", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "027c0969d21de0d52af6c8c7e8d63f12245382ae", "c8f6a8f081f49325eb97600eca05620887092d2c", "4204fad49d84c19156fa8b08bbf7942cde8f5aa1", "3b7e821532a852d27eacd89bcaa869a6263eb144", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "2d7f54adf5bb0ac598aaddd2498204248c49e444", "92269c8558248333fca7532d9c02469ac6cf5fa0", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "67d796b67b5f82727ee00d683073bebd37d19335", "3c4e907c07944cd55e800b4e55918adf8cb2a683", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "4c8ad20e8d682d9956dad6a68d2e2a022773a959", "43c2eca1452dfc5c47cc091cdb4b03296d67fb08", "3367eaf02789f5dcf741318fcc18c0dea8fcbb76", "934e8d76376f6c78a8b89ef2304f01a8e7099401", "232302a7a9aeb7eba0d296a5e846664efcb6ca4f", "7c555dfba844337d9ed1d56c231d99448069d83e", "e75f1da3cc9270505601be3f05e2658641c2e1ea", "069fb7358d94c3e3c318ca8ae24955f5aa5b46fb", "1c7c37d8b6ab599bcf6a148b058e259eca9d3a66", "d296252ddf0e2c6b7422008d703843c1863bd15b", "afb819e710435c395f410ccbd99a1cd492e78382", "8b546049caead6e9da6b76a0386dfb011d24aae2", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "29621133de70a2769470c13a2d9c27d3a5ed9587", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "051ec4b4cbc38d0b42882b8be11ddb42e5324c39", "0ba384496a466b8f33f0adfdf4b4b2b106ec6c45", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "09dce8e6947261600ec145f4544ede7ae5dc437e", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "01fde8698110cf46ff48a17c65f2658dab4c323c", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "5693c2a2c52f4905638559b2fc2b76c975806175", "42142c121b2dbe48d55e81c2ce198a5639645030", "6c15a1a25d4d103ed251d82c95ff4f0b38866a06", "226242629f3d21b9e86afe76b1849048148351de", "c37d38f9614be0cbc527147a0b60e25d361ffe20", "857348ee328c6870a812bf36179460306697ee6e", "078b855c40fefabd766a09f23280c59feef21634", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "17886b4911ffd50d7e02a574caad34a286458b3a", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "52c2c050af5b32d4929b4b193967a3675d03aea0", "96ba6f5c06850c009e5b77094c0d4532744dedc2", "452c803f91ab670bf36403ed5412875b13ae9e94", "41c2c11acde144ccf62cb6eff30731195d22775b", "9535dfb7a0010735b4451b4ccf90f477040da159", "59684cf4f60456f5eea2991a0d7f90095f37a657", "e170759a280b86f92113ce801b8980ed13247aca", "2b6df21137f30d25494bb58521a6062f93e915f8", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "78ef558e04209af5c1243c640f6aa71e5b211bf3", "a60d00ba42a4bed7adb3dc40cd1c32cbaffda5df", "905f7a231a80fa00c87b41d6b4a85561e54f56a4", "19ca5f86807610a7aed1008155c7105e43808d4f", "15ea0a6ac7b5849e8fc06057bd45ce08fe20985d", "82f91ec321b631ed3e4616374b555edb06bc2d4b", "09b0508faccfbc115a9c5540032da5dfb71c48e3", "51854f6133cd8d890beb8576e6f0b44a33916803", "0025870ef15a8f2858ff4186329d4bde316e9e01", "2ef5f6ac941cbe55e2a81265d996541bf5806e77" ], "paperAbstract": "Recent proposals for trusted hardware platforms, such as Intel SGX and the MIT Sanctum processor, offer compelling security features but lack formal guarantees. We introduce a verification methodology based on a trusted abstract platform (TAP), a formalization of idealized enclave platforms along with a parameterized adversary. We also formalize the notion of secure remote execution and present machine-checked proofs showing that the TAP satisfies the three key security properties that entail secure remote execution: integrity, confidentiality and secure measurement. We then present machine-checked proofs showing that SGX and Sanctum are refinements of the TAP under certain parameterizations of the adversary, demonstrating that these systems implement secure enclaves for the stated adversary models.", "pdfUrls": [ "http://eprint.iacr.org/2017/565", "https://people.eecs.berkeley.edu/~rsinha/research/pubs/ccs2017.pdf", "https://eprint.iacr.org/2017/565.pdf", "http://doi.acm.org/10.1145/3133956.3134098", "https://people.eecs.berkeley.edu/~spramod/papers/ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/54be4148c4ebb985505664516ca0004718086c0a", "sources": [ "DBLP" ], "title": "A Formal Foundation for Secure Remote Execution of Enclaves", "venue": "CCS", "year": 2017 }, "54fa405faa4245988dc0e8c760412cb866e009a5": { "authors": [ { "ids": [ "2198667" ], "name": "Barzan Mozafari" }, { "ids": [ "3415413" ], "name": "Jags Ramnarayan" }, { "ids": [ "3415450" ], "name": "Sudhir Menon" }, { "ids": [ "7587970" ], "name": "Yogesh Mahajan" }, { "ids": [ "33801198" ], "name": "Soubhik Chakraborty" }, { "ids": [ "3415563" ], "name": "Hemant Bhanawat" }, { "ids": [ "3416102" ], "name": "Kishor Bachhav" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Spark", "Big data", "Computational model", "Distributed computing", "High-throughput computing", "Scalability", "Stream processing", "Throughput", "Total cost of ownership" ], "id": "54fa405faa4245988dc0e8c760412cb866e009a5", "inCitations": [ "bd5557294322c1c3237b37495a1fb2f15950852d", "39b62c7fc926127d11f6d60d78066ef9d9564a55", "7254ad8940dc3ea502ef65fd9b71a9a2952daf81", "140de9a2a670d2468cfeb4d0c5c677cf64c80866", "5977a741cbc79c9b72a9587d40732bf2d64ff376", "2b6a2ec50b841f435a89b1711001ee8bf776a760", "40791626f1f99ec663d8b07023dc3909ac728dc3" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0f4dcff632983a35806c23acbf766d2acadc1846", "37861bb8d8daeab11d379a012ab526222a3f9990", "162e6c06bbe83daf74b4fb849367f123b4d65850", "80b88886489619bbe87ae02ad0295896851a53a3", "78c83644fbfa65e69137f57e22fa3b53f225a5d5", "5208060771fd213eefd827e3e1260b939f1aed6d", "4b65024cd376067156a5ac967899a7748fa31f6f", "029f5fdbcbd621e2795f9dcd9b7b0a440a69e251", "0558c94a094158ecd64f0d5014d3d9668054fb97", "e8c33bb06b6bef8da3af50015a03e50637e61f0a", "9ce1f58ade8612656ff9278a7785f2256fb8749a", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "4b66ce55c4a8b5c6dfa804146ad32a5a0797d7ec", "67eb4c1794be54919266f70b5bf8ba7a6824f091", "07b66a85083291d2b702a3bcc30f32854d4a6d29", "0beca56d0260ffa0c68d17b7e90ccff42b820076", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "33ef71ebd67ed2dfffe14d73408173e6e8e94e5b", "0069d62d30b906deb74e86472bf543212defded1", "5eec8a7fffeb65e011fe814609c2b535b628de42", "17f66d7a69bf20b29602d943069eedcd1c07abff", "22fa7b136662f51f712bdc9ccbb811a08327805f", "115ac1e107a0a1da87e12455de7e3f645fd00836", "35c84ab17b9537fe293766913b2081e52c64881c", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "8af01e6cb7375ff671ed6efd8576253ab6e12d04", "c20baa16cb57ff4979569871d15294fa720bbc23", "3df5b6b3b2b648f3d8224322e6a0f127850df017" ], "paperAbstract": "Many modern applications are a mixture of streaming, transactional and analytical workloads. However, traditional data platforms are each designed for supporting a specific type of workload. The lack of a single platform to support all these workloads has forced users to combine disparate products in custom ways. The common practice of stitching heterogeneous environments has caused enormous production woes by increasing complexity and the total cost of ownership. To support this class of applications, we present SnappyData as the first unified engine capable of delivering analytics, transactions, and stream processing in a single integrated cluster. We build this hybrid engine by carefully marrying a big data computational engine (Apache Spark) with a scale-out transactional store (Apache GemFire). We study and address the challenges involved in building such a hybrid distributed system with two conflicting components designed on drastically different philosophies: one being a lineage-based computational model designed for high-throughput analytics, the other a consensusand replication-based model designed for low-latency operations.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p28-mozafari-cidr17.pdf", "http://web.eecs.umich.edu/~mozafari/php/data/uploads/cidr_2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/54fa/405faa4245988dc0e8c760412cb866e009a5.pdf", "s2Url": "https://semanticscholar.org/paper/54fa405faa4245988dc0e8c760412cb866e009a5", "sources": [ "DBLP" ], "title": "SnappyData: A Unified Cluster for Streaming, Transactions and Interactice Analytics", "venue": "CIDR", "year": 2017 }, "5502e982cc8f83200996c597414f47c21fc10e73": { "authors": [ { "ids": [ "14707986" ], "name": "Anthony Canino" }, { "ids": [ "39584086" ], "name": "Yu David Liu" } ], "doi": "10.1145/3062341.3062356", "doiUrl": "https://doi.org/10.1145/3062341.3062356", "entities": [ "Android", "Assistive technology", "Java", "Laptop", "Program optimization", "Programmer", "Programming language", "Raspberry Pi 3 Model B (latest version)", "Rechargeable battery", "Run time (program lifecycle phase)", "Type system", "Typing", "Windows Phone" ], "id": "5502e982cc8f83200996c597414f47c21fc10e73", "inCitations": [ "0ceb2dd586ddcd21c26fe2dd399da7e1c9269fbb", "e3aca014b04e379e2dc1b57f5fd637dff61ae872" ], "journalName": "", "journalPages": "217-232", "journalVolume": "", "outCitations": [ "1544a52f31e1475af848ff59a5fccabff56f3355", "021af3b63fbcf5d867a4b27ca161841bf129c759", "21092ca3e1928c7a800447c09d6153bf9f022d2c", "046c311cd974a454207c0199adbde18a395ee39c", "141004dee9e799b40bfaf50b4a72618613137250", "1464b10d807ae05349dcb3eeeb6f4f279e7399a3", "82ab5e4ef9ddf57af1dd487392f6074b7fcc2f9e", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "dd9e9587977ae0453546f1583908bed2da9079be", "03ad81f6276792a78312471429fc9495b89a1ffc", "73c36a5da987d405e1f96ab09107c650a611e173", "020563172c4dba9f08f6a3221312b5bd59cd2112", "7515e77f5d1bedf72e7ca57a71361a51fe2b32cc" ], "paperAbstract": "Application-level energy management is an important dimension of energy optimization. In this paper, we introduce ENT, a novel programming language for enabling *proactive* and *adaptive* mode-based energy management at the application level. The proactive design allows programmers to apply their application knowledge to energy management, by characterizing the energy behavior of different program fragments with modes. The adaptive design allows such characterization to be delayed until run time, useful for capturing dynamic program behavior dependent on program states, configuration settings, external battery levels, or CPU temperatures. The key insight is both proactiveness and adaptiveness can be unified under a type system combined with static typing and dynamic typing. ENT has been implemented as an extension to Java, and successfully ported to three energy-conscious platforms: an Intel-based laptop, a Raspberry Pi, and an Android phone. Evaluation shows ENT improves the programmability, debuggability, and energy efficiency of battery-aware and temperature-aware programs.", "pdfUrls": [ "http://www.cs.binghamton.edu/~davidl/papers/PLDI17.pdf", "http://doi.acm.org/10.1145/3062341.3062356" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5502e982cc8f83200996c597414f47c21fc10e73", "sources": [ "DBLP" ], "title": "Proactive and adaptive energy-aware programming with mixed typechecking", "venue": "PLDI", "year": 2017 }, "5504e7296e31decf5df32cdfbe85037b8e84a545": { "authors": [ { "ids": [ "2599594" ], "name": "Leiming Yu" }, { "ids": [ "2003815" ], "name": "Xun Gong" }, { "ids": [ "2213925" ], "name": "Yifan Sun" }, { "ids": [ "39192990" ], "name": "Qianqian Fang" }, { "ids": [ "39402210" ], "name": "Norman Rubin" }, { "ids": [ "1771736" ], "name": "David R. Kaeli" } ], "doi": "10.1109/IISWC.2017.8167777", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167777", "entities": [ "Graphics processing unit", "Kernel (operating system)", "Linux", "Maxwell (microarchitecture)", "Program optimization", "Programmer", "Resource contention", "Scalability", "Scheduling (computing)", "Throughput" ], "id": "5504e7296e31decf5df32cdfbe85037b8e84a545", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "197-206", "journalVolume": "", "outCitations": [ "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "6f25bdae1bef527b4a210f8e4b753bb7a0740631", "054e4a6966d54eb9fd207cf0484214201f46424a", "039ad1ad259a9bd98e24b0738ba048282188d184", "0e12eb94aab5d64d08baacf0df36a4b7ed054c46", "a717dc2286d679980ed2458adc4ff6e27e2b55e7", "035740197ba476892d6bc844436d39f3eedf4bb0", "10eb7bfa7687f498268bdf74b2f60020a151bdc6", "892b7a56dad27028ba2cc68a900b8ee220792e85", "04ec5964a08a2ad62a30fea1fb9eff1e484a4524", "558603661cffa34459a1d1fd33a178c0a535057a", "6d75cf933ed74d85cf6eaada4d1ce6bb2a1eb256", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "a2304dca58992acd50b1306827bcab00efbef579", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "87eb82e37bff71234dbe936819d896f12288925a", "0a7dc7ad47d4339e775df204d5033bb3a6e0608a", "2d6f002477015469075954c6748a1a85af352c94", "00d00e482b32252398a6177eed1ee867a384402e", "bdc7a60ab9b6182bb53ab76c995ba6a3aa4a696e", "e79e7e4b399799c6312d2f8d25ff75ca1539284d", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "96b4b72d1098674750c4a406c93efe43e036568b", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "09c4d72644980dfff4c61cf4a4806b1d74d351c9", "2ad29134da93304e72dd047ca99ec6cfef2b4990" ], "paperAbstract": "GPUs continue to increase the number of compute resources with each new generation. Many data-parallel applications have been re-engineered to leverage the thousands of cores on the GPU. But not every kernel can fully utilize all the resources available. Many applications contain multiple kernels that could potentially be run concurrently. To better utilize the massive resources on the GPU, device vendors have started to support Concurrent Kernel Execution (CKE). However, the application throughput provided by CKE is subject to a number of factors, including the kernel configuration attributes, the dynamic behavior of each kernel (e.g., compute-intentive vs. memory-intensive), the kernel launch order and inter-kernel dependencies. Minor changes in any of theses factors can have a large impact on the effectiveness of CKE. In this paper, we present Moka, an empirical model for tuning concurrent kernel performance. Moka allows us to accurately predict the resulting performance and scalability of multi-kernel applications when using CKE. We consider both static and dynamic workload characteristics that impact the utility of CKE, and leverage these metrics to drive kernel scheduling decisions on NVIDIA GPUs. The underlying data transfer pattern and GPU resource contention are analyzed in detail. Our model is able to accurately predict the performance ceiling of concurrent kernel execution. We validate our model using several real-world applications that have multiple kernels that can run concurrently, and evaluate CKE performance on a NVIDIA Maxwell GPU. Our model is able to predict the performance of CKE applications accurately, providing estimates that differ by less than 12% as compared to actual runtime performance. Using our estimates, we can quickly find the best CKE strategy for our applications to achieve improved application throughput. We believe we have developed a useful tool to aid application programmers to accelerate their applications using CKE.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167777" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5504e7296e31decf5df32cdfbe85037b8e84a545", "sources": [ "DBLP" ], "title": "Moka: Model-based concurrent kernel analysis", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "5520e353c1b0fd5e282c9cb4e25b9ed268fd7538": { "authors": [ { "ids": [ "40248185" ], "name": "Aditya Agrawal" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" }, { "ids": [ "3289910" ], "name": "Sachin Idgunji" } ], "doi": "10.1145/3123939.3124547", "doiUrl": "https://doi.org/10.1145/3123939.3124547", "entities": [ "Bump mapping", "Die (integrated circuit)", "Dummy variable (statistics)", "Dynamic random-access memory", "Frequency capping", "Headroom (photographic framing)", "Heat sink", "Overhead projector", "Process migration", "Simulation", "Thermal copper pillar bump", "Thermal resistance", "Transistor" ], "id": "5520e353c1b0fd5e282c9cb4e25b9ed268fd7538", "inCitations": [], "journalName": "", "journalPages": "546-559", "journalVolume": "", "outCitations": [ "dcb536fae4b9062099dd73915efb6afd37391bb8", "ba7838793a4aad85a088ef95784a4d6dba557863", "34b2e3d61d290a6a592d75df82b7423d63fe18d1", "0b754507c2dd15d936bc17f8b80b1a8fcce02e6a", "b575c072dcde1d7c627e6108da5814696fff9bfb", "55273c8d9739db655dda0c59c31b0c48f7ed4af2", "19554445f1f3ea7b54be06a74a0d0840ade02be5", "3a0c9e8f601f1d05b9c145e70aead80550c156f6", "1ac13e114099c51f86b7bc31b63cf87f4472488b", "4435198bcbbdf8fed12401c2256b334d6a17df04", "00ab25c6582d543932fccbb0f15fe93445f95d61", "8745b7ed3fc14684d52fa05017cd87954d2112c7", "40b209cf2d662f1b16716e7eed91e3d107d510df", "8100ec8738572c230d8e215b70f3c4f89ee432da", "df5ecb9413ca0f4bbad56f1b78fcb917cdfcaf23", "e58543e596e36a1e3249f8c42e6849c5b5d2b282", "eb6cf15b4c0c579586255d2f94293f2f04048197", "00493a5f7a716eceb6a0e17cebf405fa834a84c2", "047e932a9fe101d77e2d684c224e4f13da5e94bd", "c62705617121636418301230dcc37a6fb0ec107c", "453d00b754ba24f92ee7a69b2fb606397d7ecba5", "27f4d1e7921f2357bab1110cea6ca827938e6f66", "d4536b49bdb553721953016c7a1a6902763fe44a", "36c13c7c1c50713634e4b989f2ac92d99e5143eb", "c1125a8c68399ff16d8b07926953a9f24892419d", "fc6206acf90cef6b7aead71ee860cb39ab4c4de2", "85664527a5f7bfe3d5000fa5c653439ebdb52129", "cbdad54e0a154bca9e51f7a416196a4595daabc5", "3ee47780011ee618bd5a64624a662375e1958e0a", "2b59583bbe6586393994e2e30e226885e7daf3a0", "c2a72f5c7fe70f70cc40c84d048aa808b855d9dc", "642d7d1830a2d33ad7d140e0d0caae92ba2c11bf", "167f78125336294e184773d1469f816944af7e11", "1b0019684ad65500c049c607f7f123fab8bcf139", "00f965f5536ece0df0de9a9b86bcf3ee7b296915", "94a884925f33a1510ec25ef8f262bf8069b574cf", "9ebb1431069e2fd18880d760940ca47c0bc6a255", "61b4af4502c6a959170265d5529b48f4105684ad", "ce7951a975558b62d1bb4a05cc9950682757a522", "e895c4aca6a49973b94effeadb9e9e37495ce49b", "743c3b2e01fae5e120249ed22000bc02b923086c", "04dcd8acdc16e42463e783ea5bc8283607ccee3f", "9de77cb052b9029abb644dd893a784f454f8eafb", "babc27e34a7f9b57864b24221404d4b0fa110e85", "68bca3f49fd2e5fd69c84a06994ef34c42ddff80", "352a8957005dc5519b15ed1870751ec494d66395", "848abc9a0a834802be18de1471f9749770763535", "6ffb684681a413e6197ec67944e6517742bd0f65", "48a7323c4894de3afb90ef2135160205ebb55011", "88b0cfaa6d572751fed94009c82a7cb071de6ea9" ], "paperAbstract": "In upcoming architectures that stack processor and DRAM dies, temperatures are higher because of the increased transistor density and the high inter-layer thermal resistance. However, past research has underestimated the extent of the thermal bottleneck. Recent experimental work shows that the Die-to-Die (D2D) layers hinder effective heat transfer, likely leading to the capping of core frequencies.\n To address this problem, in this paper, we first show how to create pillars of high thermal conduction from the processor die to the heat sink. We do this by aligning and shorting dummy D2D μbumps with thermal TSVs (TTSVs). This lowers processor temperatures substantially. We then improve application performance by boosting the processor frequency until we consume the available thermal headroom. Finally, these aligned and shorted dummy μbump-TTSV sites create die regions of higher vertical thermal conduction. Hence, we propose to leverage them with three new architectural techniques: conductivity-aware thread placement, frequency boosting, and thread migration. We evaluate our scheme, called Xylem, using simulations of an 8-core processor at 2.4 GHz and 8 DRAM dies on top. μBump-TTSV alignment and shorting in a generic and in a customized Xylem design enable an average increase in processor frequency of 400 MHz and 720 MHz, respectively, at an area overhead of 0.63% and 0.81%, and without exceeding acceptable temperatures. This improves average application performance by 11% and 18%, respectively. Moreover, applying Xylem's conductivity-aware techniques enables further gains.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124547", "http://iacoma.cs.uiuc.edu/iacoma-papers/PRES/present_micro17_2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5520e353c1b0fd5e282c9cb4e25b9ed268fd7538", "sources": [ "DBLP" ], "title": "Xylem: enhancing vertical thermal conduction in 3D processor-memory stacks", "venue": "MICRO", "year": 2017 }, "5527336607512130e923646b3a89d655e004ade6": { "authors": [ { "ids": [ "2492241" ], "name": "Lingxiao Ma" }, { "ids": [ "20570336" ], "name": "Zhi Yang" }, { "ids": [ "3029186" ], "name": "Han Chen" }, { "ids": [ "2870618" ], "name": "Jilong Xue" }, { "ids": [ "34889832" ], "name": "Yafei Dai" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Auxiliary memory", "Central processing unit", "Computation", "Computer data storage", "Fastest", "Field-programmable gate array", "Graph (abstract data type)", "Graphics processing unit", "Non-volatile memory", "Solid-state drive", "Speedup", "Xeon Phi" ], "id": "5527336607512130e923646b3a89d655e004ade6", "inCitations": [ "eba4a67229413f9eb703d26f64ef530899150ad5" ], "journalName": "", "journalPages": "195-207", "journalVolume": "", "outCitations": [ "3231d62bec8e8cc1d837e85893889855767c3b13", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "79dc7d714f01f469de4dbc531acd432dabebdfdb", "22e98d48c4cb573adec6fa875d18d14955113456", "652d93bd6e85fefffe6f2e47b35ed3c516d2d889", "eb82d3035849cd23578096462ba419b53198a556", "3726c60552263e648c6856679e672de2e1c110e5", "0be9827857bfd79a00a9b1e64d59e8c34534362c", "162e4c9d52af580b9d21ec1a631dfc25d4cd150b", "0ee5abec0c7002c759d70e4d75921b65a6d8666a", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "14edc660cb7db680f2e471460a794f68ba03f295", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "0f014693b25d9846025219b88f8ca480fac68b0a", "175d795f44037ef60dd9df341701cd5fdc449f1f", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "476b64be7cc0b985c02d69dd0532965924dd1869", "1156f60e40548096df49528b1342bb3e88b0f378", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "3486aeaf540c48952120fe853d672af984f40a6a", "0ad8e89091eed09217e66adc98136126addc2619", "4587d4722317acd4e2a90b12f58ccc9de1ecc6ee", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "3e3984102c20318162eac32d5276d21c09f9decb", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "2d6f002477015469075954c6748a1a85af352c94", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "1521d39088b203ddac981d10d214f463449ae95b", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d" ], "paperAbstract": "Recent advances in storage (e.g., DDR4, SSD, NVM) and accelerators (e.g., GPU, Xeon-Phi, FPGA) provide the opportunity to efficiently process large-scale graphs on a single machine. In this paper, we present Garaph, a GPU-accelerated graph processing system on a single machine with secondary storage as memory extension. Garaph is novel in three ways. First, Garaph proposes a vertex replication degree customization scheme that maximizes the GPU utilization given vertices\u2019 degrees and space constraints. Second, Garaph adopts a balanced edge-based partition ensuring work balance over CPU threads, and also a hybrid of notify-pull and pull computation models optimized for fast graph processing on the CPU. Third, Garaph uses a dynamic workload assignment scheme which takes into account both characteristics of processing elements and graph algorithms. Our evaluation with six widely used graph applications on seven real-world graphs shows that Garaph significantly outperforms existing state-of-art CPU-based and GPUbased graph processing systems, getting up to 5.36x speedup over the fastest among them.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_ma.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/ma", "https://www.usenix.org/system/files/conference/atc17/atc17-ma.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5527/336607512130e923646b3a89d655e004ade6.pdf", "s2Url": "https://semanticscholar.org/paper/5527336607512130e923646b3a89d655e004ade6", "sources": [ "DBLP" ], "title": "Garaph: Efficient GPU-accelerated Graph Processing on a Single Machine with Balanced Replication", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "553ff11bbe4329048fe1246ae8230e69f4c7f796": { "authors": [ { "ids": [ "3414165" ], "name": "Jinghan Meng" }, { "ids": [ "2812268" ], "name": "Yi-Cheng Tu" } ], "doi": "10.1145/3035918.3035936", "doiUrl": "https://doi.org/10.1145/3035918.3035936", "entities": [ "Approximation algorithm", "Data mining", "Database", "Graph database", "Model\u2013view\u2013controller", "NP (complexity)", "Polynomial", "Structure mining", "Time complexity", "Vertex cover" ], "id": "553ff11bbe4329048fe1246ae8230e69f4c7f796", "inCitations": [ "229bc76e592c751e88b4c44ac18399f0e8c9c855" ], "journalName": "", "journalPages": "391-402", "journalVolume": "", "outCitations": [ "2e10c5de97011d7a2929d5af7193ac2fa03310e4", "9652b8f68d0a748560b52759982e3021284dcd04", "157e33bc77be75f381e431c436641e8738f7bd3e", "75d5e3b7c38ae7ba60aabda376fc03b25f5f49c8", "8ac341c5070ec185fbde896cae997595b842868c", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "a830582a1f1a85df58fd8808b0963e3aaae109bd", "55d176b92d5740d039e1c8ebbad025d460de9ae0", "7bb492a6794bd90e8eba049afc8283e8314d620c", "7d51a0df30ad828d6953d0161ad91a3cae04af8a", "0495df24e2bbd4c3d0f4bdcf6c70e71b0072a874", "3190b6a0c421db3baf0613250cf0097eda0baecb", "70079b69a162880740c1e47b4a6c403fec280574", "dcfde37607cb953c8eabce88ad71efa66023adf5", "9f5a3d870bdf2f72ab66560d31219b8580d9fe60", "2caeae81beca7645c51c70bf21b69b3f9df76919", "34cbc81259c59fc87a9a8818a076e69acd47b6c1", "2ea4e47cdaf4a7d61e5caff957e26d2090b5f7fd", "0ecc0140fde8f6c6137ddd5f4c13f35685f7f3e8", "49b46d1e7f58dc49d4f61714cf2acc827538aa04", "0b698cf866906498ade8a8c77111cf704491db22" ], "paperAbstract": "In recent years, the popularity of graph databases has grown rapidly. This paper focuses on single-graph as an effective model to represent information and its related graph mining techniques. In frequent pattern mining in a single-graph setting, there are two main problems: support measure and search scheme. In this paper, we propose a novel framework for constructing support measures that brings together existing minimum-image-based and overlap-graph-based support measures. Our framework is built on the concept of occurrence / instance hypergraphs. Based on that, we present two new support measures: minimum instance (MI) measure and minimum vertex cover (MVC) measure, that combine the advantages of existing measures. In particular, we show that the existing minimum-image-based support measure is an upper bound of the MI measure, which is also linear-time computable and results in counts that are close to number of instances of a pattern. Although the MVC measure is NP-hard, it can be approximated to a constant factor in polynomial time. We also provide polynomial-time relaxations for both measures and bounding theorems for all presented support measures in the hypergraph setting. We further show that the hypergraph-based framework can unify all support measures studied in this paper. This framework is also flexible in that more variants of support measures can be defined and profiled in it.", "pdfUrls": [ "http://www.csee.usf.edu/~tuy/pub/SIGMOD17.pdf", "http://doi.acm.org/10.1145/3035918.3035936" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/553ff11bbe4329048fe1246ae8230e69f4c7f796", "sources": [ "DBLP" ], "title": "Flexible and Feasible Support Measures for Mining Frequent Patterns in Large Labeled Graphs", "venue": "SIGMOD Conference", "year": 2017 }, "55416b8613af06855bd94059c3d0305adc58057b": { "authors": [ { "ids": [ "9754946" ], "name": "Wencong Xiao" }, { "ids": [ "2870618" ], "name": "Jilong Xue" }, { "ids": [ "11009920" ], "name": "Youshan Miao" }, { "ids": [ "1700892" ], "name": "Zhen Li" }, { "ids": [ "1716895" ], "name": "Cheng Chen" }, { "ids": [ "1692206" ], "name": "Ming Wu" }, { "ids": [ "1688012" ], "name": "Wei Li" }, { "ids": [ "2902416" ], "name": "Lidong Zhou" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Computation", "Graph drawing", "Machine learning", "Model of computation", "Parallel computing", "Unsupervised learning" ], "id": "55416b8613af06855bd94059c3d0305adc58057b", "inCitations": [ "a6472fe7fbc978de8597c2f783891aa1eb1f87a5", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "36a64cb68a3da37ed9d54f03750e1f1ac6d3d336", "9cb00e828d97c24be3d408fc2401ccc503dedae0" ], "journalName": "", "journalPages": "669-682", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "2a894be44d07a963c28893cc6f45d29fbfa872f7", "04ca5de59edbdd49a9c0502c58331524d220bc8c", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "0270c2056eb50b5d4597afa722c50abf21e67a82", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "0e9bac6a2b51e93e73f7f5045d4252972db10b5a", "0a4b55a4ba0b60b3862d2d797e3aac0b2cde24a8", "2b3113b7fda6414548e88fc664f3be96d5209830", "9207a7356d90343b5107e3e445fa7de86f3078eb", "0608d9937c074520cdc93cc444cc1c77039c5332", "7717b438da4ec3ca4247ff7abf6dd603e91fe41d", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "280608cef4e07b5c4de82d75c9cc37c6b9478eb0", "06884d6d8acd7de87f5260df1cfbb6acc025cf00", "0546fa6622b8b8db8527be777a692d88c5c037b0", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "996de72d13fe153067552d0c6b5a28bb05728148", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "3073762760c5f27ab4570c12136928a71b7806c3", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "2f7678f96837afbc1f58680ad844c35ffa52b0c1", "d0d2e2924e7258092af15581f90760bfda25f825", "0f014693b25d9846025219b88f8ca480fac68b0a", "34b8809c214db18544ce93674bf85fce0e8b3330", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "8caec7f48fbf19a086e3cf67fce16cf77dff9488", "31af4b8793e93fd35e89569ccd663ae8777f0072", "043afbd936c95d0e33c4a391365893bd4102f1a7", "0ad8e89091eed09217e66adc98136126addc2619", "0122e063ca5f0f9fb9d144d44d41421503252010", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "6a888f3dd0a17b0241be61daa378ba6caffa6617" ], "paperAbstract": "TUX2 is a new distributed graph engine that bridges graph computation and distributed machine learning. TUX2 inherits the benefits of an elegant graph computation model, efficient graph layout, and balanced parallelism to scale to billion-edge graphs; we extend and optimize it for distributed machine learning to support heterogeneity, a Stale Synchronous Parallel model, and a new MEGA (Mini-batch, Exchange, GlobalSync, and Apply) model. We have developed a set of representative distributed machine learning algorithms in TUX2, covering both supervised and unsupervised learning. Compared to implementations on distributed machine learning platforms, writing these algorithms in TUX2 takes only about 25% of the code: Our graph computation model hides the detailed management of data layout, partitioning, and parallelism from developers. Our extensive evaluation of TUX2, using large data sets with up to 64 billion edges, shows that TUX2 outperforms state-of-the-art distributed graph engines PowerGraph and PowerLyra by an order of magnitude, while beating two state-of-the-art distributed machine learning systems by at least 48%.", "pdfUrls": [ "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-xiao.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-xiao.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/xiao" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5541/6b8613af06855bd94059c3d0305adc58057b.pdf", "s2Url": "https://semanticscholar.org/paper/55416b8613af06855bd94059c3d0305adc58057b", "sources": [ "DBLP" ], "title": "Tux2: Distributed Graph Computation for Machine Learning", "venue": "NSDI", "year": 2017 }, "554262fc1b37f8a3d45e91ade79c851f776748c5": { "authors": [ { "ids": [ "1800402" ], "name": "Abhishek Roy" }, { "ids": [ "1891854" ], "name": "Yanlei Diao" }, { "ids": [ "10776562" ], "name": "Uday Evani" }, { "ids": [ "5442319" ], "name": "Avinash Abhyankar" }, { "ids": [ "31890802" ], "name": "Clinton Howarth" }, { "ids": [ "10712297" ], "name": "R\u00e9mi Le Priol" }, { "ids": [ "1707880" ], "name": "Toby Bloom" } ], "doi": "10.1145/3035918.3064048", "doiUrl": "https://doi.org/10.1145/3035918.3064048", "entities": [ "Big data", "Bioinformatics", "Computer scientist", "Parallel computing", "Personalization", "Pipeline (computing)", "Rewrite (programming)", "Speedup", "Synergy", "Whole genome sequencing" ], "id": "554262fc1b37f8a3d45e91ade79c851f776748c5", "inCitations": [ "398c22da3edb238bcf928d24fd9a65e2a219a2c8" ], "journalName": "", "journalPages": "187-202", "journalVolume": "", "outCitations": [ "3b3a2c3ade468de01802e0d3e333b4b02cdfb8ff", "6a74715681aa5040d4818e90ded929f2501c5b3a", "53e978ec681a8aba65523e70ab502c63960e35cf", "58fc1b429d4ef4dd5d804bae8c54d4238bd15cc6", "4cad8f2a31b3c72742a761fe90a372d4a4717ebf", "0558c94a094158ecd64f0d5014d3d9668054fb97", "0388e6b008d51c922341a2caccbc9ce7187c6850", "720f612c68bdefc404bb2ac8a702cd1804b2472f", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "d3c0a49cd01559bc2402d91bbf7290c43634218b", "2228b4208c5ea6754df6edcae805038f3e47857c", "032e0ad4973e40acc3d83afd3d6cf57d5befc968", "1529d0842475f8ad061f0e725449b7c7ce19b6ac", "196514ca53f505dec7a8a2b446fc599e8de3f0cc", "3edb4a12b89adc56bb899c57557ec9dab39f7960", "5ab1e2783b702d89865df5834c8fe22124ed890a", "efa5558bddd68abe4adc81adbbef6f739e648392", "55f633638dafc5bb07f65cf7a12886ad0322f781", "4e7c664adc0bfc1800d70d6593515c78df1509db", "2e5186cb10b1945f13ad8a961100b94e63a96b95", "3146b400f6257d8dbe0fa99fe171c5e2dc3d5dff", "14a80b973aeb96e6f2f8b9e292fc05b0d5f9aad0", "700e1d1b7c54711e96645691438d7fd9fda229ef", "bd0b6d9a13e76ada9b634b988bdb3a0131ac5533", "42fee5b7c0d96f93172ac64bfef5a888874f3ab6", "8315f131d02ab573b6eda86bda6437f208cef6fb" ], "paperAbstract": "This paper presents a joint effort between a group of computer scientists and bioinformaticians to take an important step towards a general big data platform for genome analysis pipelines. The key goals of this study are to develop a thorough understanding of the strengths and limitations of big data technology for genomic data analysis, and to identify the key questions that the research community could address to realize the vision of personalized genomic medicine. Our platform, called Gesall, is based on the new \"Wrapper Technology\" that supports existing genomic data analysis programs in their native forms, without having to rewrite them. To do so, our system provides several layers of software, including a new Genome Data Parallel Toolkit (GDPT), which can be used to \"wrap\" existing data analysis programs. This platform offers a concrete context for evaluating big data technology for genomics: we report on super-linear speedup and sublinear speedup for various tasks, as well as the reasons why a parallel program could produce different results from those of a serial program. These results lead to key research questions that require a synergy between genomics scientists and computer scientists to find solutions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064048", "http://people.cs.umass.edu/~aroy/gesall-sigmod17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/554262fc1b37f8a3d45e91ade79c851f776748c5", "sources": [ "DBLP" ], "title": "Massively Parallel Processing of Whole Genome Sequence Data: An In-Depth Performance Study", "venue": "SIGMOD Conference", "year": 2017 }, "55527a4325dfa0bd197cd9a6aa1469a990283664": { "authors": [ { "ids": [ "33374965" ], "name": "Anupam Datta" }, { "ids": [ "2623167" ], "name": "Matt Fredrikson" }, { "ids": [ "34171852" ], "name": "Gihyuk Ko" }, { "ids": [ "3251561" ], "name": "Piotr Mardziel" }, { "ids": [ "1774073" ], "name": "Shayak Sen" } ], "doi": "10.1145/3133956.3134097", "doiUrl": "https://doi.org/10.1145/3133956.3134097", "entities": [ "Algorithm", "Computation", "Essence", "Experiment", "Oracle Database", "Program analysis", "Proxy server", "Systems theory", "Text corpus", "The Witness", "Universal instantiation" ], "id": "55527a4325dfa0bd197cd9a6aa1469a990283664", "inCitations": [ "45616ec288fb076494b3d73a3d096a1b8289b1f8", "f32afbbc74189a38c30bcefcf581948e10dfd1ef" ], "journalName": "", "journalPages": "1193-1210", "journalVolume": "", "outCitations": [ "1977c5af0f4917e5c0bc3ce67155756e4feb01b9", "2a8a95b209d8c4f17721d0f04db99a16af19b399", "1ecfe23503600b7a6a6ed3dcce86542420e36a06", "0ccd81d4cb36a23c817a5df196cb7ef42bf623b3", "ad438b2dc689ab95d5aa90390abd0128cd59cf8a", "08b0c25456d948b055124b22e2604ed21b857d36", "b2d8ccd1f38d46ab1354a408bde1dc9ddc5cf33b", "124371386e755b0fa43eba6f61249aa496a7f8c1", "1d89a12092d6323b9d3b1a5bd4e6790897e2a2be", "528cee5472f0a98fc295b4d8caf2e66ca1544d54", "65926b61d0308954bd6cc4f6cbe46eef64147635", "0d0229e1b8ae086b353ceca91e31460e71e57535", "20d9c466dfcfb9860b55436c7865f763ede7b500", "3fbd1f6e3b246816bfa5a28f4c96dec0a7ad6683", "154e504956cbbc8ddfd2e0aa420333f81f1183c9", "17fac85921a6538161b30665f55991f7c7e0f940", "3317ffb5504baffe242edd6bf7ac3a223e83bd62", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "4e90218dbb68ac751dcaf65c2f29292aa2f1c7c7", "a690037101df23fd5cf5b5c4a35b7ea6bc277f33", "4556f3f9463166aa3e27b2bec798c0ca7316bd65", "0b7543518962a8b51854ec2f50aafecb516279f7", "00b0dedeb3947d670492ac2f93ac00d641836649", "d921036a6cb7e340b019afa557a19bc65586a1ad", "15581e7eca3f1cfe02b148fe307e55f87eafbfc4", "0b71702ab2c8e518aa3bf7c71783f82034955a20", "24ab513a04791708b56699f253b5d3315cc7fb4e", "0d25768f709b3455e0e6c1d526cc41433b86f7f5", "4acdd33707ad6c05a0525e4a76fa5492b7fbbe4e", "1c124c199aa2df4d23e5c055e4bf7131a51bd4aa", "61d468d5254730bbecf822c6b60d7d6595d9889c", "49934d08d42ed9e279a82cbad2086377443c8a75", "293af2dc96ffed5435051e0622d6991411690da9", "05128a3c7b2debc27c7ffb4ccf7469876c2d94a2" ], "paperAbstract": "This paper presents an approach to formalizing and enforcing a class of use privacy properties in data-driven systems. In contrast to prior work, we focus on use restrictions on proxies (i.e. strong predictors) of protected information types. Our definition relates proxy use to intermediate computations that occur in a program, and identify two essential properties that characterize this behavior: 1) its result is strongly associated with the protected information type in question, and 2) it is likely to causally affect the final output of the program. For a specific instantiation of this definition, we present a program analysis technique that detects instances of proxy use in a model, and provides a witness that identifies which parts of the corresponding program exhibit the behavior. Recognizing that not all instances of proxy use of a protected information type are inappropriate, we make use of a normative judgment oracle that makes this inappropriateness determination for a given witness. Our repair algorithm uses the witness of an inappropriate proxy use to transform the model into one that provably does not exhibit proxy use, while avoiding changes that unduly affect classification accuracy. Using a corpus of social datasets, our evaluation shows that these algorithms are able to detect proxy use instances that would be difficult to find using existing techniques, and subsequently remove them while maintaining acceptable classification performance.", "pdfUrls": [ "http://arxiv.org/abs/1705.07807", "http://doi.acm.org/10.1145/3133956.3134097", "https://www.ece.cmu.edu/~ece734/lecture_slides/Proxy%20Use%20CCS%202017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55527a4325dfa0bd197cd9a6aa1469a990283664", "sources": [ "DBLP" ], "title": "Use Privacy in Data-Driven Systems: Theory and Experiments with Machine Learnt Programs", "venue": "CCS", "year": 2017 }, "5559e38ecd2b80bcd58fd7db45862dd19bc3d038": { "authors": [ { "ids": [ "10424754" ], "name": "Noor Bajunaid" }, { "ids": [ "1758079" ], "name": "Daniel A. Menasc\u00e9" } ], "doi": "10.1145/3030207.3030209", "doiUrl": "https://doi.org/10.1145/3030207.3030209", "entities": [ "Application checkpointing", "Central processing unit", "Component-based software engineering", "Experiment", "Failure rate", "Markov chain", "Markov decision process", "Simulation", "Software system", "Throughput" ], "id": "5559e38ecd2b80bcd58fd7db45862dd19bc3d038", "inCitations": [ "624a141beeaa85d5febf617ad8c1c0cfcc4fa482" ], "journalName": "", "journalPages": "245-256", "journalVolume": "", "outCitations": [ "fb35b5bc1e02de4d9b31176c39247ee9ad6c3290", "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "ddcb1cb84899b2d0d6aee3b95fe2f2b3018c252a", "54ff0557d9d78b711e7ebbd7278d3fd4a091202e", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "c1416bd9b71ee8c8b204e09ec2f51afc40433f76", "ce9fd32b48148c824ad8ab53386027cb0cf007ee", "2d0844dd884e33e4f34869969ac1e01ae60bebfb", "02544882276ff1a35f4b6f1a8504a972b8df4087", "619a739b6290c0ef32b56759763b9ab555f12f6e", "7b70d4dbe727d4eb3c1f1ec7d5515eb1625b1d5f", "6a139103526a5068a6517f5986c702d9d0dca5c1", "05b1aeb6e8020c5d31e30cd4613ead87a4fb9b3f", "cd69034e49d8821636c704c14fb6dfe5c78fc3ac", "b6d66e08e7d73150749e1b292f06e7c85d590f72", "ce343d8ddf416bc20d8e7254591f8fe5c776bdd8", "8d417d46b5d76bb308802fd7a34127d0f0354da3", "871586fdcb18b2380e0288b651bb954f84fb5e1a", "18011a5c998cd8ff1d80cfb2f296d9f8537d7663", "6cc46c57fc958ebc96a1ae209667cf31df7557cc", "fce001f86c570ef216df8503e884500806c67b03" ], "paperAbstract": "Checkpointing and rollback is a key mechanism used to improve the reliability of software systems. The benefits of this mechanism can be offset by the overhead of checkpointing when the failure rate is low. The problem of developing analytic models of rollback and checkpointing has been continuously addressed for over four decades using different assumptions. This paper examines the problem under a more realistic angle, i.e., one in which there are several software components sharing resources (e.g., processors and I/O devices) among themselves and with the checkpointing processes. Additionally, the paper allows for different components to have different computing, rollback, and checkpointing demands, as well as different failure distributions. Our models also allow for various checkpointing processes to be executing concurrently to checkpoint the state of different software components. The analytic models developed here combine Markov Chains and Queuing Networks and allow us to compute the following metrics: (1) average time needed by a component to complete its execution, (2) average throughput of a component, (3) availability of a component, and (4) checkpointing overhead. The models were validated through extensive simulation and experimentation.", "pdfUrls": [ "https://research.spec.org/icpe_proceedings/2017/proceedings/p245.pdf", "http://doi.acm.org/10.1145/3030207.3030209" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5559e38ecd2b80bcd58fd7db45862dd19bc3d038", "sources": [ "DBLP" ], "title": "Analytic Models of Checkpointing for Concurrent Component-Based Software Systems", "venue": "ICPE", "year": 2017 }, "556f01b6764f866d7bd4a2d955115ca72bd3413f": { "authors": [ { "ids": [ "3182868" ], "name": "Harendra Kumar" }, { "ids": [ "40604671" ], "name": "Yuvraj Patel" }, { "ids": [ "2042885" ], "name": "Ram Kesavan" }, { "ids": [ "35153920" ], "name": "Sumith Makam" } ], "doi": "", "doiUrl": "", "entities": [ "Checksum", "Copy-on-write", "Data integrity", "Digest access authentication", "In-memory database", "Production system (computer science)", "Software bug" ], "id": "556f01b6764f866d7bd4a2d955115ca72bd3413f", "inCitations": [ "8ee82c0bd80e86c55b56414a602d53164d4fb5c0", "0f4386d4a521e36cb15252b4e908a948a65252ef", "20a108587321823ca9cdd93ac84fc316a0400630", "ad897b9261a39cdae6e8b0fdcd755e6001e004bc", "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d" ], "journalName": "", "journalPages": "197-212", "journalVolume": "", "outCitations": [ "39f2a24878abb8699da6c5ab3b436ffef0c4fe6c", "20a108587321823ca9cdd93ac84fc316a0400630", "09c0d62190aedb53e820695ccbe98d90f877cc46", "2335e1c1fcc1d430ed3049557a974a9cc9842e2c", "4644983f2bc04241b6d5a43127e7d1609bfb24fb", "3533159037bc2c11bde6b314e040ee113ae52bdd", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "7062268b78dff4a8819fe3f1e89c6b5344f715a5", "6afa24070025c624e94b41e05168fd4223807cab", "c9ef82a4ad0b1b33296cea86fb2ec7558cf798fb", "33cb4013c7cc36a173e7fb4e541133056e8e43cf", "ca16c4ad100e54d2458bf22f8a22c8c6f15e8d20", "37ed4f9684e774157f38655768b996b6b875e80a", "8a7536f311d22bd588c5bc2306d54d13effaee82", "088e3e939ad234b6fdd0e321290fb26937dc2553", "4cb7f6fd48468da2f985a44f021fa5b49eb7a6ce", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "5c5dd0ae1d8035eadbf2fd411663dd062a922941", "8d8f082bf15960191b74377f5dbb14a4a9bfb62d", "21318da2ea08c1f7b8c77701f67483882950df96", "108c840d5d1847948a2de0250490a327ae069ee6", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "3abf71e837cb7b1e9fe7e54192d986142d87b1a2", "d9706ba11e5af14d92a1f673f412f0765c082df9", "11b8ef5da9c8df214859bb41b60001a0abd2b5b2", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "04aae75ab8a040225024b6a96ab7cbb28ef74d0a", "0ba03292a9b7cf0a0a5f2f76da3ae1309929d062", "0ba16e5cd9c81282386362c8db0adcd4a203741f", "ae705ea9428baedc1a2de4539a75f6aed444c096", "318c86751f018b5d7415dafc58e20c0ce06c68b6", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "c769840e996e09f910b9c3ef64eb2686a89c40d3", "3deb7adb74003800813b9e96ba799a1d86e6a47a", "0f6a32792d0882db35fe9391445d4322232b619e", "0f7d62354586d074fe6120700b4fc7597d877b57", "4108e4635351d6f2d0916ee19d0a0ef878649c3c" ], "paperAbstract": "We introduce a low-cost incremental checksum technique that protects metadata blocks against in-memory scribbles, and a lightweight digest-based transaction auditing mechanism that enforces file system consistency invariants. Compared with previous work, our techniques reduce performance overhead by an order of magnitude. They also help distinguish scribbles from logic bugs. We also present a mechanism to pinpoint the cause of scribbles on production systems. Our techniques have been productized in the NetApp\u00ae WAFL\u00ae (Write Anywhere File Layout) file system with negligible performance overhead, greatly reducing corruption-related incidents over the past five years, based on millions of runtime hours.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_patel.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/kumar", "https://www.usenix.org/system/files/conference/fast17/fast17-kumar.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_patel.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-kumar.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d7df/16b3ee5d58e856982607e38de47d1462b544.pdf", "s2Url": "https://semanticscholar.org/paper/556f01b6764f866d7bd4a2d955115ca72bd3413f", "sources": [ "DBLP" ], "title": "High Performance Metadata Integrity Protection in the WAFL Copy-on-Write File System", "venue": "FAST", "year": 2017 }, "558602b185565281dc6554e625bb2aa6009e43b8": { "authors": [ { "ids": [ "38155806" ], "name": "Reuven Cohen" }, { "ids": [ "2365417" ], "name": "Liran Katzir" }, { "ids": [ "2491608" ], "name": "Aviv Yehezkel" } ], "doi": "10.1145/3097983.3097999", "doiUrl": "https://doi.org/10.1145/3097983.3097999", "entities": [ "Algorithm", "Big data", "Database", "Information retrieval", "Streaming algorithm" ], "id": "558602b185565281dc6554e625bb2aa6009e43b8", "inCitations": [ "50d9847b167cb9b5084d011c0f54481f0703028a", "25b5415b855913c01915c0b2c591554360b8afb4", "b55aafb62fb0d49ad7e0ed7ab5b936f985e1ac58" ], "journalName": "", "journalPages": "95-103", "journalVolume": "", "outCitations": [ "2ed51879febb9c6050bdc11e3fd08a10ff2480d6", "488495c644d90c32ef7b58ec3e4ffc7b40f25b36", "351df512735096126454f5d4bc8e9ae56f4cd288", "3da2fe8124b8a43891fcb8ddcfccd3257ca30573", "a22ed51089758fe9b38877f3c7b3e631b3d31412", "02acc390a765e098d3448451d8e24d60f3972722", "0fafd84369d00c2ec7d3261145c188811dc9e675", "6046770d1c3e08edfdd39bdb57fccaca84f5139c", "b2ec74c72d99b755325dc470dec2949d69cd4d57", "438862c1ae7ed2efa873a5364db7b51361f31b18", "a4573f864e2b9e53e25d1ef8f63f1de01b41afc6", "743cd9443c655b75baea441ee5c0d6f7fb384673", "2986f9db238c57b638d54248c4ed1fcb5e4f459f", "1b269d55d5ea514d9a0f37adfdd9bc0f94c75c66", "07ae9afe6a56a13ca67af79a5537b0f3ec64f571", "b36c153be410c0d937d7583de557c0375506d15a", "1cfeaa60ca47b2380cbce766f01e130ef3aa708a", "3734e0c101e29304594b72e07f82bec9f41e0f0f", "14a74eeae1f51d0e3f89a49b3802242f13a38c1d", "43b319ab2e717a1a711d6b06ea9598ea042afe1a" ], "paperAbstract": "In recent years there has been a growing interest in developing \"streaming algorithms\" for efficient processing and querying of continuous data streams. These algorithms seek to provide accurate results while minimizing the required storage and the processing time, at the price of a small inaccuracy in their output. A fundamental query of interest is the intersection size of two big data streams. This problem arises in many different application areas, such as network monitoring, database systems, data integration and information retrieval. In this paper we develop a new algorithm for this problem, based on the Maximum Likelihood (ML) method. We show that this algorithm outperforms all known schemes in terms of the estimation's quality (lower variance) and that it asymptotically achieves the optimal variance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097999", "http://arxiv.org/abs/1606.00996", "https://arxiv.org/pdf/1606.00996v1.pdf", "http://arxiv.org/pdf/1606.00996v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/558602b185565281dc6554e625bb2aa6009e43b8", "sources": [ "DBLP" ], "title": "A Minimal Variance Estimator for the Cardinality of Big Data Set Intersection", "venue": "KDD", "year": 2017 }, "558eab42b1c2c85846b5f8cef857019a98748528": { "authors": [ { "ids": [ "40655437" ], "name": "Duc-Cuong Nguyen" }, { "ids": [ "10660223" ], "name": "Dominik Wermke" }, { "ids": [ "3224778" ], "name": "Yasemin Acar" }, { "ids": [ "1749517" ], "name": "Michael Backes" }, { "ids": [ "39048620" ], "name": "Charles Weir" }, { "ids": [ "2200198" ], "name": "Sascha Fahl" } ], "doi": "10.1145/3133956.3133977", "doiUrl": "https://doi.org/10.1145/3133956.3133977", "entities": [ "Android", "Best practice", "Experience", "Image stitching", "Integrated development environment", "Next-generation network", "Plug-in (computing)", "Secure coding", "Software documentation", "User interface" ], "id": "558eab42b1c2c85846b5f8cef857019a98748528", "inCitations": [ "b1400438b4822d59a64fba31d0dc590306418ac3" ], "journalName": "", "journalPages": "1065-1077", "journalVolume": "", "outCitations": [ "16cdcedec9a5fc51d89591034908b8580b911b7c", "4fd643182643a1cfe6aec3ce11dd5292814a4e6f", "ea93642d3394b0ba1d5144835ce747a74457fe4f", "4a8da50795fd4196f3aa28d1c095f84c0a40fe3d", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "c93057aec13d8b1557ef1f6f68331cd3aef280c8", "4c795976cb13e2c9970efb28639d066c36f42c94", "a851b38c914b5c005e2be8b43c2d17c98e09f7c1", "b243c5c5fe90cb485d06ba6e89c2b977d51bcc39", "03a613951421cf67237d5278d6bf3702a26da9aa", "3436344f8c0b5faf596bdf8654dc8998070d2b8b", "2ba16a91a0fa1cdd06dec7b0df8f7808e7d9833e", "114580bca9932bfc4e0018886646751adfac724f", "451f72230e607cb59d60f996299c578623a19294", "9d0d597efd386c9c05698291cf39f5a09c030a0f", "a9977198d07f5fede50a81236d7806868f4c9a27", "34a720114bdc38720c9660b0ce2a372e7bfbe59f", "a1b4ad9a9d2d8f787676205ed8663623cc1cd793", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "23fa7b866a1b1fee7bb71c8b5a9235cca7120bbc", "f60628636b64c187db1f106823f5af5730b973cd", "b6f5251a67c5cf8539c0213c387a583cdcefd493", "0fa3eabd538d777556f3e87399959d05cefa1f69", "7f9bbe985ccf6c16b6ef60ccb9ef04e4219b54cb", "a394ec59eb036aec8a088bfb09f88c7682ebf798", "1743802b43a7dca265ca8e792c87875f2e4906e8", "81653b68eb28329b05e2337381a3d78d6d5b53df", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "71f8163801980fbaa494cb8c149bd7388034c2ba", "6209474964e02ba34ed539cf3cee8044048a1bda", "6d59f58f7408362036196048c9ba11f399dd9bc2" ], "paperAbstract": "Despite security advice in the official documentation and an extensive body of security research about vulnerabilities and exploits, many developers still fail to write secure Android applications. Frequently, Android developers fail to adhere to security best practices, leaving applications vulnerable to a multitude of attacks. We point out the advantage of a low-time-cost tool both to teach better secure coding and to improve app security. Using the FixDroid IDE plug-in, we show that professional and hobby app developers can work with and learn from an in-environment tool without it impacting their normal work; and by performing studies with both students and professional developers, we identify key UI requirements and demonstrate that code delivered with such a tool by developers previously inexperienced in security contains significantly less security problems. Perfecting and adding such tools to the Android development environment is an essential step in getting both security and privacy for the next generation of apps.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133977", "https://acmccs.github.io/papers/p1065-nguyenA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/558eab42b1c2c85846b5f8cef857019a98748528", "sources": [ "DBLP" ], "title": "A Stitch in Time: Supporting Android Developers in WritingSecure Code", "venue": "CCS", "year": 2017 }, "55a022ab83a0f848f3e18693ac3a4ffe016f2704": { "authors": [ { "ids": [ "1792031" ], "name": "Tao Lu" }, { "ids": [ "7920553" ], "name": "Eric Suchyta" }, { "ids": [ "32485139" ], "name": "Jong Youl Choi" }, { "ids": [ "1734819" ], "name": "Norbert Podhorszki" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "1727669" ], "name": "Qing Liu" }, { "ids": [ "2193258" ], "name": "David Pugmire" }, { "ids": [ "4003076" ], "name": "Matthew Wolf" }, { "ids": [ "2942322" ], "name": "Mark Ainsworth" } ], "doi": "", "doiUrl": "", "entities": [ "Blob detection", "Code refactoring", "Delta encoding", "IBM WebSphere eXtreme Scale", "Simulation", "Supercomputer" ], "id": "55a022ab83a0f848f3e18693ac3a4ffe016f2704", "inCitations": [ "1e23b9e0a2f95521d72fe39c0e310450e782b264" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "9c44e61f4762618dd78ce8355065b94235b84ae2", "1bbdc1c5ee0ac447472bc3f4de720ab885ff4c43", "74943873f64cff6d4905beda883dba1e572f0fcb", "1d3a151a18ac5a479fa46d342e464751dd668d23", "8097630668c8115f03b6d320a7b5cdc1f005066e", "2b0bce44b3840796d4ed578c43542ebf839d74af", "4432b1ef0b18015f3f20f09d8a80ee3dc6a3edab", "093fc19d440f33247e545ec6c047e0aa0afb0863", "7777d299e7b4217fc4b80234994b5a68b3031199", "5355bcc49732bc71674e872097257c95f9e9a3ac", "ea1db5a68cea156b11eadb3d2ddcb791e5991949", "721c5be47c923d9c0303a3eefd3d42a57e0add03", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "f19870a1b4847ca61beed722d557a50189479d27", "04fa1a1d9298f7d56cab3f897def24057d48993f", "f93b1ccf2a97edd055afa4e6a3e32770cbc002de", "24b25dd17ee2396910f3df74481ee225d5d440bd" ], "paperAbstract": "High accuracy scientific simulations on high performance computing (HPC) platforms generate large amounts of data. To allow data to be efficiently analyzed, simulation outputs need to be refactored, compressed, and properly mapped onto storage tiers. This paper presents Canopus, a progressive data management framework for storing and analyzing big scientific data. Canopus allows simulation results to be refactored into a much smaller dataset along with a series of deltas with fairly low overhead. Then, the refactored data are compressed, mapped, and written onto storage tiers. For data analytics, refactored data are selectively retrieved to restore data at a specific level of accuracy that satisfies analysis requirements. Canopus enables end users to make trade-offs between analysis speed and accuracy onthe-fly. Canopus is demonstrated and thoroughly evaluated using blob detection on fusion simulation data.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_lu.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-lu.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/lu" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/55a0/22ab83a0f848f3e18693ac3a4ffe016f2704.pdf", "s2Url": "https://semanticscholar.org/paper/55a022ab83a0f848f3e18693ac3a4ffe016f2704", "sources": [ "DBLP" ], "title": "Canopus: Enabling Extreme-Scale Data Analytics on Big HPC Storage via Progressive Refactoring", "venue": "HotStorage", "year": 2017 }, "55c26b638c0fa1cc471bd11cd405cd1428586f96": { "authors": [ { "ids": [ "38794959" ], "name": "F\u00e1bio Coelho" }, { "ids": [ "2911639" ], "name": "Jo\u00e3o Paulo" }, { "ids": [ "3281313" ], "name": "Ricardo Manuel Pereira Vila\u00e7a" }, { "ids": [ "32837952" ], "name": "Jos\u00e9 Orlando Pereira" }, { "ids": [ "1679234" ], "name": "Rui Oliveira" } ], "doi": "10.1145/3030207.3030228", "doiUrl": "https://doi.org/10.1145/3030207.3030228", "entities": [ "Benchmark (computing)", "Coexist (image)", "Experiment", "IBM Tivoli Storage Productivity Center", "Load balancing (computing)", "Online analytical processing", "Online transaction processing", "Spatial variability", "Transaction processing" ], "id": "55c26b638c0fa1cc471bd11cd405cd1428586f96", "inCitations": [], "journalName": "", "journalPages": "293-304", "journalVolume": "", "outCitations": [ "cf855ba4a09c2181d0166705717b5788454fcfa5", "c071ef3f2ef2976ec8b5a137f63f85db2da00774", "0f5bd5f78269105eaa88c483c3f38bbc521eb06e", "24693bbc5bd27d89ebb57a24c27582ec291c3a02", "be189b1d51566dd4ca098152c36ea00ddb235c7e", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "3495d81a806605b402d93b910db83417f91b9b82", "0ce159a97e734d622d098386432253ea12d755dd", "d0ca55f5d22822d6bd952f1c751a6794ee01a4f0", "0cefe55f602bfaa4b2484a36360b28ce6896783b", "6891484a3e0688fc8f22e3457b60d3ec0bf0a5b6", "11ef7c142295aeb1a28a0e714c91fc8d610c3047", "75dea5776c20d6a68595698982594ebc24c35f9f", "31bd199555b926c6f985d0bbf4c71f5c46b5a078" ], "paperAbstract": "The increasing demand for real-time analytics requires the fusion of Transactional (OLTP) and Analytical (OLAP) systems, eschewing ETL processes and introducing a plethora of proposals for the so-called Hybrid Analytical and Transactional Processing (HTAP) systems.\n Unfortunately, current benchmarking approaches are not able to comprehensively produce a unified metric from the assessment of an HTAP system. The evaluation of both engine types is done separately, leading to the use of disjoint sets of benchmarks such as TPC-C or TPC-H.\n In this paper we propose a new benchmark, HTAPBench, providing a unified metric for HTAP systems geared toward the execution of constantly increasing OLAP requests limited by an admissible impact on OLTP performance. To achieve this, a load balancer within HTAPBench regulates the coexistence of OLTP and OLAP workloads, proposing a method for the generation of both new data and requests, so that OLAP requests over freshly modified data are comparable across runs.\n We demonstrate the merit of our approach by validating it with different types of systems: OLTP, OLAP and HTAP; showing that the benchmark is able to highlight the differences between them, while producing queries with comparable complexity across experiments with negligible variability.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030228" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55c26b638c0fa1cc471bd11cd405cd1428586f96", "sources": [ "DBLP" ], "title": "HTAPBench: Hybrid Transactional and Analytical Processing Benchmark", "venue": "ICPE", "year": 2017 }, "55ce99ff2cb04eaed460b8a8ee5c3fd4821e0e0f": { "authors": [ { "ids": [ "37785191" ], "name": "Xi Wu" }, { "ids": [ "9539166" ], "name": "Fengan Li" }, { "ids": [ "36812927" ], "name": "Arun Kumar" }, { "ids": [ "38120884" ], "name": "Kamalika Chaudhuri" }, { "ids": [ "1680133" ], "name": "Somesh Jha" }, { "ids": [ "5151034" ], "name": "Jeffrey F. Naughton" } ], "doi": "10.1145/3035918.3064047", "doiUrl": "https://doi.org/10.1145/3035918.3064047", "entities": [ "Algorithm", "Bismarck", "Differential privacy", "Experiment", "Gradient", "Gradient descent", "Privacy", "Scalability", "Stochastic gradient descent", "White box (software engineering)" ], "id": "55ce99ff2cb04eaed460b8a8ee5c3fd4821e0e0f", "inCitations": [ "26b4983e14e6c5c1b35120986c008982764c844b", "636421d05f9eb19ce083af9ed01a8a7be23104a2", "6c1196bf10c7acc65286b46a7ba42faf98082d4f", "d5c7280a8e57261f394622a92a146481c36830e2" ], "journalName": "", "journalPages": "1307-1322", "journalVolume": "", "outCitations": [ "3671338dc8c84d51b285bee79f85e7f3937a5078", "c92420f001e023c693db762758f9590571256e35", "49934d08d42ed9e279a82cbad2086377443c8a75", "5fa89d670611f44033598907a5d3c69af9c4ab68", "0144941d255dad89d3d90c2d131a15cc01df9829", "006cb500fd0b25200e12eb5a024756aea3d569ed", "1fbfa8b590ce4679367d73cb8e4f2d169ae5c624", "4912c18161cf35a066a9e70b4e4ef45ff9d19035", "000f2d99632d5d6c494bf9e1b179638e48433e99", "173eed222de30465a79639c01855e64bc956f34b", "075f328ef87a076151feb4d5b1f97b66aa597a90", "3f419db6f66c32bbb7ea887b139abd4e088a0405", "335f8ae48b7999982aefb247411be8fd9cd0eacb", "65926b61d0308954bd6cc4f6cbe46eef64147635", "5307157623c0ac7f3335ff10b066327ac4a1c495", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "341a08d1854b5ecf871bbb4c7833a435927abbda", "bef60055ed80d9f387afb33218341821006af939", "b532099ff8b67049f292cd62700dca37fc2be623", "09e315092edc93787248f66aadc86a339c3ad0db", "1050926bf7fc494f56f332bfbc0dee494b2ab8ff", "6154ce8c02375184f7928e41c4fae532500f7175", "67d9fe9856d590e566c9b3aa549541129bb92117", "17fac85921a6538161b30665f55991f7c7e0f940", "2e99b1868621e47cb89ba8e0f72a5b9d87acb991" ], "paperAbstract": "While significant progress has been made separately on analytics systems for scalable stochastic gradient descent (SGD) and private SGD, none of the major scalable analytics frameworks have incorporated differentially private SGD. There are two inter-related issues for this disconnect between research and practice: (1) low model accuracy due to added noise to guarantee privacy, and (2) high development and runtime overhead of the private algorithms. This paper takes a first step to remedy this disconnect and proposes a private SGD algorithm to address both issues in an integrated manner. In contrast to the white-box approach adopted by previous work, we revisit and use the classical technique of output perturbation to devise a novel ``bolt-on'' approach to private SGD. While our approach trivially addresses (2), it makes (1) even more challenging. We address this challenge by providing a novel analysis of the L2-sensitivity of SGD, which allows, under the same privacy guarantees, better convergence of SGD when only a constant number of passes can be made over the data. We integrate our algorithm, as well as other state-of-the-art differentially private SGD, into Bismarck, a popular scalable SGD-based analytics system on top of an RDBMS. Extensive experiments show that our algorithm can be easily integrated, incurs virtually no overhead, scales well, and most importantly, yields substantially better (up to 4X) test accuracy than the state-of-the-art algorithms on many real datasets.", "pdfUrls": [ "http://arxiv.org/pdf/1606.04722v1.pdf", "https://arxiv.org/pdf/1606.04722v1.pdf", "https://arxiv.org/pdf/1606.04722v2.pdf", "http://doi.acm.org/10.1145/3035918.3064047", "https://arxiv.org/pdf/1606.04722v3.pdf", "http://andrewxiwu.github.io/public/papers/2017/WLKCJN17-bolt-on-differential-privacy-for-scalable-stochastic-gradient-descent-based-analytics.pdf", "http://andrewxiwu.github.io/public/talks/2017/sgd-dp-sigmod17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55ce99ff2cb04eaed460b8a8ee5c3fd4821e0e0f", "sources": [ "DBLP" ], "title": "Bolt-on Differential Privacy for Scalable Stochastic Gradient Descent-based Analytics", "venue": "SIGMOD Conference", "year": 2017 }, "55d970958b4df64b7ecda9dc7ba93eeeed677767": { "authors": [ { "ids": [ "3343449" ], "name": "Yangyang Gao" }, { "ids": [ "1758209" ], "name": "Haitao Zhang" }, { "ids": [ "25064171" ], "name": "Yanpei Zhu" }, { "ids": [ "25092963" ], "name": "Bingchang Tang" }, { "ids": [ "2269883" ], "name": "Huadong Ma" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.73", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.73", "entities": [ "Closed-circuit television", "Cloud computing", "Distributed computing", "Experiment", "Hard disk drive", "Load balancing (computing)", "Solid-state drive", "Video processing" ], "id": "55d970958b4df64b7ecda9dc7ba93eeeed677767", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "563-570", "journalVolume": "", "outCitations": [ "9dfa1a03e272043c9f12976fa53c89df635b6b7c", "5918fa52f47d9c9e06442c097f263caceac46e48", "1f6b54aebcc0c3c2748cb81b9c1821e81ac356a9", "58630e2f7d3b4998f619a2524af552757cd99be3", "3fafa4f7b281cd5d80c7e4658aa4237fb5c0aabf", "b859dc5e717761b98c21ad3d67ad890ad2ab1835", "052cb7e7a6a568d755f0884216b68a4de0b4c603", "98287022e45cbc3ade99271d51252bfd468a9cf1", "26ba0637c23b0ff904b6b6c7237b58ad5470300c", "d4db602e73ac1538717e8a90efe83728c76e8580", "47c9568ea92b6ec40e0ee7b42ae32f775feb8ca4", "f13540811d0d317fe825f4e4b319dcf36b713aa8", "02e56828951df7fbc42853071af175aad103517d", "b7d439379310fe8419b84bc5289098993030d2bc", "b0fee4b84d4dbe2dfdd9d5e97fcaa703fba15212", "92fe1cbfb65f6318622508eeec4b76e4485005ec", "a06b6daae0dc5c4d559d9f7d8b8686e80ca749ca", "ca91c3ac6805ab7f6a9fc50c3defc34fa40941e5" ], "paperAbstract": "The large-scale surveillance video processing workloads are gradually migrated to cloud computing platforms. Meanwhile, the hybrid storage architecture, integrating both HDD and SSD storage devices, is increasingly used in the current cloud platforms. However, the computing and storage capabilities of the nodes are constantly changing, and this requires the delicate design of the data layout strategy for avoiding the serious load skew in the distributed computing nodes with the hybrid storage architecture. In this paper, we propose a Load-Aware Data Migration (LADM) scheme for distributed surveillance video processing with hybrid storage architecture. Specifically, according to the proposed the load estimation model and the storage capacity constraint, the Node-Level Data Migration (NLDM) strategy is used to periodically migrate the appropriate video chunks from the local HDD to the local SSD for improving the node processing performance, and the Cluster-Level Data Migration (CLDM) strategy is used to periodically migrate the appropriate video chunks from the high load nodes to the low load nodes for achieving the overall load balance of cluster. We conduct the extensive experiments based on the distributed surveillance video processing platform we developed, and the experimental results show that the proposed LADM scheme outperforms the current methods.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.73" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55d970958b4df64b7ecda9dc7ba93eeeed677767", "sources": [ "DBLP" ], "title": "A Load-Aware Data Migration Scheme for Distributed Surveillance Video Processing with Hybrid Storage Architecture", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "55e54776ff4516736e1ccd2fd30808c4fb7bb895": { "authors": [ { "ids": [ "1919517" ], "name": "Yohei Ueda" }, { "ids": [ "3165589" ], "name": "Moriyoshi Ohara" } ], "doi": "10.1109/ISPASS.2017.7975266", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975266", "entities": [ "Ahead-of-time compilation", "Benchmark (computing)", "Compiled language", "Compiler", "Continuous integration", "Java", "JavaScript", "Representational state transfer", "Scripting language", "Server (computing)", "Server-side", "Software deployment", "Throughput", "Type system", "Typing", "Web development", "Web framework", "Web service" ], "id": "55e54776ff4516736e1ccd2fd30808c4fb7bb895", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "13-22", "journalVolume": "", "outCitations": [ "22266f8ad97d4bb4e2422d3a7dfa77ab7b47af21", "7e007883306b2d0b8da57ed608f5441dcc30a3e2", "cbde41b067a4e2980e5d4a250e743a254d0678ac", "d6160ffd53c0ee246f0ddf0e28d74afc6bc135db", "4b73e80c19f9cbb3881379f73e4bb134ea9d3cf8", "cd6ce3743a1bb54296351bdfc2452b651ec6d8d3", "63405f39127cf05488bb40f96ac3bf1ccef41757", "ab1a28bb87dc9271649f1676ac08d42fb7f0d506", "3a72a91dadce5774dcbc42e76f4c1b4a99766d05", "da245266502dc96125203895ec79bbe1821fff8e", "69a6476f7285770aefbfae998dc98e803f1a4cec" ], "paperAbstract": "Web developers generally prefer coding in dynamically compiled or scripting languages, such as Java and JavaScript, because those languages allow them to deploy applications quickly to deliver new features and bug fixes in a timely manner. In this paper, we compared the Go language, a popular statically compiled language, with two dynamically compiled languages, JavaScript and Java. We evaluated the Acme Air benchmark for three implementations — one in each language. Our experimental results have shown that the Go implementation achieved a 3.8x and 2.4x higher throughput than the JavaScript and Java implementations respectively, after a simple tuning in the server configuration. Our detailed analysis indicated that this is primarily because Go suffers less from polymorphism due to static typing than JavaScript, and because the Web framework for Go causes less overhead to process RESTful Web service requests than that for Java. We argue that statically compiled languages will play more significant roles for Web applications because of their performance advantages and also because of emerging continuous integration and deployment methodologies which eliminate some of the shortcomings in statically compiled languages over dynamically compiled and scripting languages.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975266" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55e54776ff4516736e1ccd2fd30808c4fb7bb895", "sources": [ "DBLP" ], "title": "Performance competitiveness of a statically compiled language for server-side Web applications", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "55e54ba4a354dbbba4cfc67cc82966ff07903615": { "authors": [ { "ids": [ "2834988" ], "name": "Xiaohui Xie" }, { "ids": [ "1783406" ], "name": "Yiqun Liu" }, { "ids": [ "31278837" ], "name": "Xiaochuan Wang" }, { "ids": [ "1731598" ], "name": "Meng Wang" }, { "ids": [ "9273006" ], "name": "Zhijing Wu" }, { "ids": [ "4942149" ], "name": "Yingying Wu" }, { "ids": [ "1700777" ], "name": "Min Zhang" }, { "ids": [ "8093158" ], "name": "Shaoping Ma" } ], "doi": "10.1145/3077136.3080799", "doiUrl": "https://doi.org/10.1145/3077136.3080799", "entities": [ "Eye tracking", "Golden Triangle (Internet Marketing)", "Image retrieval", "Landing page", "Search engine results page", "Web search engine" ], "id": "55e54ba4a354dbbba4cfc67cc82966ff07903615", "inCitations": [ "54fef296e9c4d77906fbe9debe4e303276b69d92", "3bdb95e6013e2167141aa5e00738e57f0763e43b" ], "journalName": "", "journalPages": "275-284", "journalVolume": "", "outCitations": [ "f591c9d0210a0ecfd98b8ae4bce943d42f636cb1", "0b3a95245d9cde45ffcce46b7280e9d12f175568", "201b29dc80cab5c0adb35cf415d2b9a9d15a0ba7", "50b73a320e600b841be0bde76be562ac18ca1674", "9780b60c459d118ceed53cb8809958aa98434e58", "36f27559d12bd86d32fbe34f4b4ce7540f1686c0", "0020f7b5ff3bd39c322ae8f8f7927d116e77d322", "4752e8858dd67002b16b281e115655307ec3c974", "71ac19c90b84e0ea8ed9db927bacb36a710e7b30", "050f2c5ecdd39d261582b3e4aec8cf13c5d14ed0", "275b987cef19041cdfeecc89e509454329d2873e", "13d72ef522b405c18f7d228c5744687609b4c3a4", "27ae5272e7d4c260b1af931c225b5b92cbc7e26d", "8302d0746edee8b7d81648861395b565589f4ec7", "611f091db2785f855f181d22886f2c7f4c4afeba", "a6d8e7592353729e5e1a5e5613e120379cb09053", "63add5b09e70e5f2abf970f9d315421a67379cde", "a258c4ec6dd3b754195260344982ab14a0af4622", "70e896d590d58fe78d49038b20ce32b4c176ae6e", "1772143a707f02685e3445485b89c888b2535e7a", "2d772cbd092d2b0869dcbfecadf70594a6d129e0", "31864e13a9b3473ebb07b4f991f0ae3363517244", "0b2d5a748db9f16e36375db6b1eb97a3c16d8c80", "17f3de3c51a323e081c384504a54d33d8ceb57c7", "888e5183a9ff5a2ebda350b1c3d9153b732d9db0", "11ade2193404038404addf432d590493e43e31e7", "857e7fce2e2f88ddb38935fa809aefa196689286", "7a43cdd0e8e4b628af6619812c73b2f2d524d0bb", "4cc5fb6cf48b2c58b283460b19f3beeb7e5b6a22", "79755ffd1d4a352029db8cd803858b336de3ddd4", "6d74c216d8246c2a356b00426af715102af2a172", "2d533eb6ca5d47d745ed49c863ce3d8499b4bf17" ], "paperAbstract": "Image search engines show results differently from general Web search engines in three key ways: (1) most Web-based image search engines adopt the two-dimensional result placement instead of the linear result list; (2) image searches show snapshots instead of snippets (query-dependent abstracts of landing pages) on search engine result pages (SERPs); and (3) pagination is usually not (explicitly) supported on image search SERPs, and users can view results without having to click on the \"next page'' button. Compared with the extensive study of user behavior in general Web search scenarios, there exists no thorough investigation how the different interaction mechanism of image search engines affects users' examination behavior. To shed light on this research question, we conducted an eye-tracking study to investigate users' examination behavior in image searches. We focus on the impacts of factors in examination including position, visual saliency, edge density, the existence of textual information, and human faces in result images. Three interesting findings indicate users' behavior biases: (1) instead of the traditional \"Golden Triangle'' phenomena in the user examination patterns of general Web search, we observe a middle-position bias, (2) besides the position factor, the content of image results (e.g., visual saliency) affects examination behavior, and (3) some popular behavior assumptions in general Web search (e.g., examination hypothesis) do not hold in image search scenarios. We predict users' examination behavior with different impact factors. Results show that combining position and visual content features can improve prediction in image searches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080799" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55e54ba4a354dbbba4cfc67cc82966ff07903615", "sources": [ "DBLP" ], "title": "Investigating Examination Behavior of Image Search Users", "venue": "SIGIR", "year": 2017 }, "55fd68bddd40c949f2354f05d81be5f6d178ae9b": { "authors": [ { "ids": [ "33911355" ], "name": "Haoyang Wu" }, { "ids": [ "1685072" ], "name": "Tao Wang" }, { "ids": [ "3013173" ], "name": "Zengwen Yuan" }, { "ids": [ "1798566" ], "name": "Chunyi Peng" }, { "ids": [ "1726570" ], "name": "Zhiwei Li" }, { "ids": [ "3385382" ], "name": "Zhaowei Tan" }, { "ids": [ "8818286" ], "name": "Boyan Ding" }, { "ids": [ "7823216" ], "name": "Xiaoguang Li" }, { "ids": [ "2864629" ], "name": "Yuanjie Li" }, { "ids": [ "9756930" ], "name": "Jun Liu" }, { "ids": [ "2014352" ], "name": "Songwu Lu" } ], "doi": "10.1145/3117811.3117834", "doiUrl": "https://doi.org/10.1145/3117811.3117834", "entities": [ "Computation", "Duplex (telecommunications)", "ETSI Satellite Digital Radio", "Embedded system", "MIMO", "Modular design", "PHY (chip)", "Pipeline (computing)", "Router (computing)", "Simulation Interoperability Standards Organization", "Stock and flow" ], "id": "55fd68bddd40c949f2354f05d81be5f6d178ae9b", "inCitations": [ "6e62233d0d418ad1688a5878c2ca6b2293b54d08", "c6a47914256b780e6827ef0a291ec3439a83a91b" ], "journalName": "", "journalPages": "101-113", "journalVolume": "", "outCitations": [ "3153467ed85f5670f0b5e92e717f14af5f8ac872", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "25011a77c8478ed154721775d6284db8b268368c", "651534f5bd9d3c3ec4baba3461f03af264e1bbef", "b3bbed7a6df606cde84375159cc2090811c6bf1f", "406a93739fb99f806bdbf4612fb4af2aa7537581", "06f1566af4d87a8b7284fc1a2adec852aa8d0fa2", "0d3817273d734c34b1a0fbe2ce9ba7c013cd68a6", "288b85894e60a51f3f713dbf1f45f0ae6af13bf3", "3ddde0b1de7f56c38fb125e36a62b016a2ad37d8", "0928e5df5e4a1ec003b82347c5f034ef93bed8d8", "0f879370356cf355ebf62ff9dd18891d45763ef8", "3e928013ec01ea6cb41fc37f297c01fabb8a1664", "14055469d6d750142774af479b87107dedfcae51", "23a89575c1340c5497c860f2796c2602c7a0f440", "089ae21d8104ba919e2524ea0a1e28c506b55dd9", "5d6170014151852ff4748f52e4fb06b14fd8e7d8", "5a04255a3e67e32e988494389db6b5b1a54b5528", "1bc04cbbce54bc027b6147eb0a49189a2691a35c", "f3916811c5c8f78aafd8876761964c253912f244", "3567f2f850f7bdfcb90ea3c835dadcf68eef2d87", "bc7465967bcd7d7ebe5070dbd86126d5038ea2eb", "28dc46ee3b90ec65fc08511acbb65dfc722af0be", "92dc3b919689cbac6de9562f9ee76e20805f540e", "1893c4d98d84cdeeb6556fff140ef4f215280faf", "39234b16d65048e676147760102f3e3fee20fda4", "8247e9e8ed1bbdcf89205f955842f2befaebb915", "1873255c120b59c18f63ff77011a6e15fbb48946", "4a2f66363f34ec89c649212585e9b89e03a140cd" ], "paperAbstract": "Tick is a new SDR system that provides programmability and ensures low latency at both PHY and MAC. It supports modular design and element-based programming, similar to the Click router framework [23]. It uses an accelerator-rich architecture, where an embedded processor executes control flows and handles various MAC events. User-defined accelerators offload those tasks, which are either computation-intensive or communication-heavy, or require fine-grained timing control, from the processor, and accelerate them in hardware. Tick applies a number of hardware and software co-design techniques to ensure low latency, including multi-clock-domain pipelining, field-based processing pipeline, separation of data and control flows, etc. We have implemented Tick and validated its effectiveness through extensive evaluations as well as two prototypes of 802.11ac SISO/MIMO and 802.11a/g full-duplex.", "pdfUrls": [ "http://metro.cs.ucla.edu/papers/mobicom17-tick.pdf", "http://web.cs.ucla.edu/~zyuan/slides/mobicom17-tick-slides.pdf", "http://doi.acm.org/10.1145/3117811.3117834" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/55fd68bddd40c949f2354f05d81be5f6d178ae9b", "sources": [ "DBLP" ], "title": "The Tick Programmable Low-Latency SDR System", "venue": "MobiCom", "year": 2017 }, "560f706f2af9d300ab487314ed2c6284652d2ab3": { "authors": [ { "ids": [ "2302342" ], "name": "Biagio Cosenza" }, { "ids": [ "21490411" ], "name": "Juan J. Durillo" }, { "ids": [ "2490652" ], "name": "Stefano Ermon" }, { "ids": [ "1717074" ], "name": "Ben H. H. Juurlink" } ], "doi": "10.1109/IPDPS.2017.102", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.102", "entities": [ "Algorithm", "Artificial neural network", "Auto-Tune", "Coefficient", "Computation", "Machine learning", "Ordinal data", "Ordinal regression", "Stencil code", "Support vector machine", "Test set" ], "id": "560f706f2af9d300ab487314ed2c6284652d2ab3", "inCitations": [ "4f4853e68da9aa09c0a8ebf4612ca725ce5d3521" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "287-296", "journalVolume": "", "outCitations": [ "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "10d3e0f0648d0a5cfaebb3044ea7b14a52e54466", "0064df0d06312711f5163c4440f3d7f099fc8d9e", "6005fdb7813e0f07d90d6ed8e7beecd733ef4d04", "220f5b0e74c7f1e71d6e23da672dcffbc9e6520a", "1ccaac0fdcc5ab37a45d0cc616feeaa67a3d4ca1", "677f01e5410d98c669a1d53e64b10f0533911829", "7c0c47c6a2ade8f16ac5a00471348a4eb0bfa8ee", "075d460a4737d7c0b3fd4b7aa03e315f7256b1af", "1be3c6d7eb84bf88161c20f696a87dacd385d028", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e", "782d8591afd432a9b2bfe21553a4158a39cb9d1f", "1426b32f40126a0a906121984918ff5fbcb0b4b3", "64ac13c42f3ccfa987ad928c03cb0502b3baa7fc", "998e6eb3d90327c38fdc7f680c75137e4976c679", "98ba8f9863d92839b98f854eada60bfe87805526", "4a2d7bf9937793a648a43c93029353ade10e64da", "032857f750287c77349074587444ecd3166a4c09", "427b168f490b56716f22b129ac93aba5425ea08f", "14a477cf712ad5647180e6233dd0638c6c269fdd", "648fe4e8d720c414e5edf1eb000cf84a9ae5046a", "2c361ef5db3231d34656dd86d9b288397f0b929e", "44efef85d56e61fb304f27010cc0d1bd80283a69", "1214132530d108eff629ff18a9c05464f8003579", "482f5e72c0a245a285ef198861e191fae73de481", "2fac633f5167d54d94a9fe6a2532d1aef073980a", "064f0793b2b7af8e8fccbf62bf39976dc4ff5b7a", "1e716b39c74dd4967b16eb3a6a2e7220e4e07c97", "dae60807ef1e6fd61a2362c8187b733b08121e1e", "3c31999730ef19007df71909f1ae5223825e0ec9", "5672ce28f2927b81b01303e4926643c55a4c8133", "6472cab2678c39e2273673968c6d7d3cfe2a62c9", "09b1520aea25ff0b5852d8a777e48eacf5300fac", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "24fcc566953d80ca12d9e0e0315573083374eacf", "366d02f9687a33b21079acc6d62ad755189a52f0", "52d0aff3e4407302dd49123a8f87151bf94fdb52", "06c15f48f0f71cb034936cee635bec0fc4992594", "288b3c6605f3e46bf8b56aded52ed5f6c864f72a", "0e12eb94aab5d64d08baacf0df36a4b7ed054c46", "26d3c0e79adc665e12d848cd896fec6b6f0fed87", "0f9080d297fc22dcf24dfd8ffcd3de5cea04c689", "518cd72a5f12f050492b246ad300a46de7604af2", "d32b7382787a3e969a1b8d3291c3480b8a1da545" ], "paperAbstract": "Stencil computations expose a large and complex space of equivalent implementations. These computations often rely on autotuning techniques, based on iterative compilation or machine learning (ML), to achieve high performance. Iterative compilation autotuning is a challenging and time-consuming task that may be unaffordable in many scenarios. Meanwhile, traditional ML autotuning approaches exploiting classification algorithms (such as neural networks and support vector machines) face difficulties in capturing all features of large search spaces. This paper proposes a new way of automatically tuning stencil computations based on structural learning. By organizing the training data in a set of partially-sorted samples (i.e., rankings), the problem is formulated as a ranking prediction model, which translates to an ordinal regression problem. Our approach can be coupled with an iterative compilation method or used as a standalone autotuner. We demonstrate its potential by comparing it with state-of-the-art iterative compilation methods on a set of nine stencil codes and by analyzing the quality of the obtained ranking in terms of Kendall rank correlation coefficients.", "pdfUrls": [ "http://www.biagiocosenza.com/papers/CosenzaIPDPS17.pdf", "https://doi.org/10.1109/IPDPS.2017.102" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/560f706f2af9d300ab487314ed2c6284652d2ab3", "sources": [ "DBLP" ], "title": "Autotuning Stencil Computations with Structural Ordinal Regression Learning", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "561836f9b039a855d3db2c68d58eca8165ae0673": { "authors": [ { "ids": [ "2642780" ], "name": "Carl A. Waldspurger" }, { "ids": [ "3007449" ], "name": "Trausti Saemundsson" }, { "ids": [ "1798526" ], "name": "Irfan Ahmad" }, { "ids": [ "2233275" ], "name": "Nohhyun Park" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Approximation algorithm", "Dynamic programming", "Emulator", "Experiment", "LIRS caching algorithm", "Program optimization", "Shard (database architecture)", "Signal trace", "Simulation" ], "id": "561836f9b039a855d3db2c68d58eca8165ae0673", "inCitations": [ "a2408c13ce831b9aadca03f458b423cb28fb8a8a", "0357d8655c6788bf0041b8446c038a808c209338", "53ee65bfc69cd55d81196537086137ef8efb2108", "0f35d1156d2667232855578b50b8fb02ea5bbf51" ], "journalName": "", "journalPages": "487-498", "journalVolume": "", "outCitations": [ "f8aa33900f552f8112d6186d78bc845d2dfc0007", "03bf5d2bc45794e241f53aecf8880c26c712933d", "601a0436ca19712e13d1a853183e255659057582", "47ccfd0c9dc218f5496783310a28c581730b9ca7", "0b43a722d2ca43752750e4976f3056a006990143", "9451f420a5d39d75d1e6c2cbbbae4544afb412a6", "11ebb411b138d2acdd481a6920b822fbc213cdc0", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "47419c7d160fd05f9be712b876c292cb6241228d", "4e8839416133588c10cc56d6325db55a42fe2215", "3bbb5daf6e7be50c308b77730efb13b7bcf500b7", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "a3de178c43b990b5755be4d640a7525f97ce2f33", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "0fd4a1b1b92a65b70fad60ad6e95ed54e8f6e86a", "235ffbe72353aaa49d38fd973fa67cc2a15310fb", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "31ceeced5d23193c369b98170c45e66bae6ff77d", "eacfdf93e03d9dbbbaa2d01250939d9f94fb16a4", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55" ], "paperAbstract": "Recent approximation algorithms (e.g., CounterStacks, SHARDS and AET) make lightweight, continuouslyupdated miss ratio curves (MRCs) practical for online modeling and control of LRU caches. For more complex cache-replacement policies, scaled-down simulation, introduced with SHARDS, offers a general method for emulating a given cache size by using a miniature cache processing a small spatially-hashed sample of requests. We present the first detailed study evaluating the effectiveness of this approach for modeling non-LRU algorithms, including ARC, LIRS and OPT. Experiments with over a hundred real-world traces demonstrate that scaled-down MRCs are extremely accurate while requiring dramatically less space and time than full simulation. We propose an efficient, generic framework for dynamic optimization using multiple scaled-down simulations to explore candidate cache configurations simultaneously. Experiments demonstrate significant improvements from automatic adaptation of parameters including the stack size limit in LIRS, and queue sizes in 2Q. Finally, we introduce SLIDE, a new approach inspired by Talus that uses scaled-down MRCs to remove performance cliffs automatically. SLIDE performs shadow partitioning transparently within a single unified cache, avoiding the problem of migrating state between distinct caches when partition boundaries change. Experiments demonstrate that SLIDE improves miss ratios for many cache policies, with large gains in the presence of cliffs.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-waldspurger.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/waldspurger", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_waldspurger.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/672b/16be7ccd7b7d9e34001e3beb7da3449627f9.pdf", "s2Url": "https://semanticscholar.org/paper/561836f9b039a855d3db2c68d58eca8165ae0673", "sources": [ "DBLP" ], "title": "Cache Modeling and Optimization using Miniature Simulations", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "5632f9dd5609027da8212ef8d2dd5f1acf2c68cb": { "authors": [ { "ids": [ "32116342" ], "name": "Han Dong" }, { "ids": [ "1726351" ], "name": "Tao Li" }, { "ids": [ "9787716" ], "name": "Jiabing Leng" }, { "ids": [ "7017729" ], "name": "Lingyan Kong" }, { "ids": [ "2043040" ], "name": "Gang Bai" } ], "doi": "10.1109/ICPP.2017.13", "doiUrl": "https://doi.org/10.1109/ICPP.2017.13", "entities": [ "Algorithm", "Artificial neural network", "Central processing unit", "Computer vision", "Convolution", "Convolutional neural network", "Cube", "Data cube", "Deep learning", "Gradient", "Gradient descent", "Graphics Core Next", "Graphics processing unit", "High-level programming language", "One-class classification", "Pixel", "Stochastic gradient descent" ], "id": "5632f9dd5609027da8212ef8d2dd5f1acf2c68cb", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "41-49", "journalVolume": "", "outCitations": [ "022dfa7a38fc43856f83f79e07a2b1f08709c962", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "4c5fcbe295c6e9ebea79f150b60073b94db55fa9", "33a7f8ce389525a7c4b70e6d04ff2a2452bccc7c", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "622b5261df3c9bd7df6999e9234415c3e564317f", "6353266413c38fa9288d82decaf08853b537c1db", "0b29b743203ac9adb578bdbe4be6a8a68bdca132", "462b4a4f320c745014fdef2ec0d6d34f888ede71", "061356704ec86334dbbc073985375fe13cd39088", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "373f76633cc1f6c7a421e31c989842021a52fca4", "e9db53a6db91094072c8e252bf31acc50fd3dafd", "cc5c82da4c46587a5124f6b000395098def0ced7", "7dcf648d5a6c9f39f6f28839f642185e71681b7f", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "fa6e0913a7d9fa8d98657fb3a278ebcdbc9af69c", "7f351eee00419bce564812c13c3238b32ef02e17", "2329a46590b2036d508097143e65c1b77e571e8c", "8729441d734782c3ed532a7d2d9611b438c0a09a", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "2808f73d5a19c2ed17dee2cac113b3043ab74af3", "ccd6e2feb2ad4e2171eb36c1a0e2d579d939d032", "63936fa32f9e75ab2a864daae6791ce02112183d" ], "paperAbstract": "Hyperspectral image classification has been proved significant in remote sensing field. Traditional classification methods have meet bottlenecks due to the lack of remote sensing background knowledge or high dimensionality. Deep learning based methods, such as deep convolutional neural network (CNN), can effectively extract high level features from raw data. But the training of deep CNN is rather time-consuming. The general purpose graphic processing units (GPUs) have been considered as one of the most common co-processors that can help accelerate deep learning applications. In this paper we propose a GPU-based Cube CNN (GCN) framework for hyperspectral image classification. First, a Parallel Neighbor Pixels Extraction (PNPE) algorithm is designed to enable the framework directly loading raw hyperspectral image into GPU's global memory, and extracting samples into data cube. Then, based on the peculiarity of hyperspectral image and cube convolution, we propose a novel Cube CNN-to-GPU mapping mechanism that transfers the training of Cube CNN to GPU effectively. Finally, the mini-batch gradient descent(MBGD) algorithm is improved with Computing United Device Architecture(CUDA) multi-streaming technique, which further speeds up network training in GCN framework. Experiments on KSC dataset, PU dataset and SA dataset show that, compared with state-of-art framework Caffe, we achieve up to 83% and 67% reduction in network training time without losing accuracy, when using SGD (Stochastic Gradient Descent) and MBGD algorithm respectively. Experiments across different GPUs show the same performance trend, which demonstrates the good extendibility of GCN framework.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5632f9dd5609027da8212ef8d2dd5f1acf2c68cb", "sources": [ "DBLP" ], "title": "GCN: GPU-Based Cube CNN Framework for Hyperspectral Image Classification", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "563fc944562f32bc636f47b553e3b5b5a219d959": { "authors": [ { "ids": [ "7221435" ], "name": "Ahmed Hussein" }, { "ids": [ "2694341" ], "name": "Mathias Payer" }, { "ids": [ "2094585" ], "name": "Antony L. Hosking" }, { "ids": [ "34237257" ], "name": "Christopher A. Vick" } ], "doi": "10.1145/3050748.3050754", "doiUrl": "https://doi.org/10.1145/3050748.3050754", "entities": [ "Android", "Centralisation", "Garbage collection (computer science)", "High- and low-level", "Memory management", "Mobile operating system", "Operating system", "Power management", "Scheduling (computing)", "Throughput", "Virtual machine", "z/VM" ], "id": "563fc944562f32bc636f47b553e3b5b5a219d959", "inCitations": [], "journalName": "", "journalPages": "171-186", "journalVolume": "", "outCitations": [ "0f11d823f6c2eb9cd327c858e7259047d14e5cf8", "0e8fdc0d04ca5ea43a9c635d0d4aad748197ffc3", "0370154942e92faa90ee90feaca663248e1be20e", "33224a0666c92858ab7c18fa09f88012adcec43d", "02bccf1a3f6132caacd4c7a24d9bbc14009c70f7", "519810f2bc7760e7873675d2b4ddadc51cf64d6e", "13abd63c5f3be0381fa790cecae0b5d258c9d3ca", "d6beadc7e5b587f00d3bad88b89d5e85dbecbac7", "62f0aaa7146794f52d10a71c4ef28d64f7c77670", "31a5b5e0697914f0b5121243528aee9b7ed1d56b", "4211420caaed6e658c20ea2f89c7d1e2977c16d2", "01a443750f86a258dd56942a4f136683e1bd77ed", "2260e7a09f1aefa56a5b3b29bee91ce4c3dbefc6", "4463a1f7243a16a9df0e62eed9ef4021e0bf1671", "2cac6e84d3d7fed13ec9a5d39fd2bd6e75423578", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "125f9d30a697b41999168390397eb6a6b899378d", "b5cfad493a3595a7b8fe883900d69449ced75489", "3690069f8abbe523488d316514d26d22c9d9c418", "6e7a0210658a74f6e9bd83f41affa7af7e5be997", "c4907f185a2942d7c3f4960475f7464bd2bdb476", "21a9b74b101b823b41f2858b0862b80c416da95d", "ddb75edbeacf643edafb810c2882788e839433f0", "0495641c590874be9e09c3743d0d15c536cd3f4e", "4256339f61d809e5092b68a505f7d37099cbd341", "c388db92285bad27c567649a7c78a50b27d3d541", "2d27d661d08f6e997cc465b1497aaa3736774f92", "30509c7dc5fafa479005e6254efb34eaf2e1dc37", "0042e1d08c4a867075bd3547419c1af2ceed15d8", "6611bc084fbaf1845b1bf54cc72d1c143d25083d", "289013bfd42e27c9864cd374a462c8f9a4603783", "03816525f951f49ceb7a70aaf3bda23605a58212", "0f2fdd44eaa920a15a286c8d2c913a344a8586f3", "2a9635e0b401a51ae25829a802fb33610ed30808", "1a7246a3712b9390e9285db2611dc865f375984c", "44f6daa2322e3243133882220f5e0def22fcafc3", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "8a0e2332258873d46b98383c94406c1583564d30", "32a3ab9d4d3a4e76bb5c939bc6f0071fc36cbf73", "8469f98a0f5a96448ef33452e39f72af552475a0", "3fd85d5f5217b7df40e8fd6a8ef7d285fc4bb7e8", "07e4ad13af7da44f47e203a8f5c792020ee1dea2", "737d041822cca60a341e4058ba2bac803fe0eed0", "00a9ba0063d34ec56792849a67ef57b4601becbb", "4af11afcd5f448e667d705b08f65e4968f497d14" ], "paperAbstract": "Ubiquitous mobile platforms such as Android rely on managed language run-time environments, also known as language virtual machines (VMs), to run a diverse range of user applications (apps). Each app runs in its own private VM instance, and each VM makes its own private local decisions in managing its use of processor and memory resources. Moreover, the operating system and the hardware do not communicate their low-level decisions regarding power management with the high-level app environment. This lack of coordination across layers and across apps restricts more effective global use of resources on the device.\n We address this problem by devising and implementing a global memory manager service for Android that optimizes memory usage, run-time performance, and power consumption globally across all apps running on the device. The service focuses on the impact of garbage collection (GC) along these dimensions, since GC poses a significant overhead within managed run-time environments. Our prototype collects system-wide statistics from all running VMs, makes centralized decisions about memory management across apps and across software layers, and also collects garbage centrally. Furthermore, the global memory manager coordinates with the power manager to tune collector scheduling. In our evaluation, we illustrate the impact of such a central memory management service in reducing total energy consumption (up to 18%) and increasing throughput (up to 12%), and improving memory utilization and adaptability to user activities.", "pdfUrls": [ "http://ts.data61.csiro.au/publications/nicta_full_text/9544.pdf", "http://doi.acm.org/10.1145/3050748.3050754", "http://hexhive.github.io/publications/files/17VEE.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/563fc944562f32bc636f47b553e3b5b5a219d959", "sources": [ "DBLP" ], "title": "One Process to Reap Them All: Garbage Collection as-a-Service", "venue": "VEE", "year": 2017 }, "5648cf0638ab64dc41dfd6d9fc9003aa071992ac": { "authors": [ { "ids": [ "1861039" ], "name": "Pedro Y\u00e9benes" }, { "ids": [ "2749895" ], "name": "Jes\u00fas Escudero-Sahuquillo" }, { "ids": [ "34712227" ], "name": "Pedro Javier Garc\u00eda" }, { "ids": [ "1761901" ], "name": "Francisco J. Quiles" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/HOTI.2017.11", "doiUrl": "https://doi.org/10.1109/HOTI.2017.11", "entities": [ "Adversary (cryptography)", "Algorithm", "Average path length", "Deadlock", "Experiment", "Interconnection", "Network performance", "Network switch", "Network topology", "Routing", "Simulation", "Telephone exchange", "Virtual channel" ], "id": "5648cf0638ab64dc41dfd6d9fc9003aa071992ac", "inCitations": [ "24f46a4d25d4704e95984a856d4d5d5070e91023" ], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "9812c817cd2645047582e0b6b51f11f9dccfed53", "09adae55a947e420e2d73de8d4e3f5a1cf4e483f", "c22cd78260126ea8e0183c23aeb9a2ec928658e3", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "19b304df6f13798a0745eeaf8f4573b202a43e5f", "9c4b6c885bfc6038cdac56763663880e0f2624e6", "943cf22e168a86fec0381ca380474c1da39e509c", "ab392c91b09328afb84cf60b7fdc402610c3f908", "18a8ab664b3ee23504c302640e5792202bafe401", "17df257ba8b6e4e54dabb0967de2875c6672dca1", "5885d3525c1789aaa3aacc1740a3a6b51376f1b8", "2cba84a71e7a7949ccdc238fd3ef6b039066d793", "a15bc58fa496b6cca937713723f19f45380fc2fe", "3e089d9b3669c213d35172e63a433774cabfe499", "2b8f7ce8460e7e183de754b09cfc0e624476d7f2", "5f8991828def57d2f0cda942566afff56740d150" ], "paperAbstract": "Interconnection networks must meet the communication demands of current High-Performance Computing systems. In order to interconnect efficiently the end nodes of these systems with a good performance-to-cost ratio, new network topologies have been proposed in the last years that leverage high-radix switches, such as Slim Fly. Adversarial traffic patterns, however, may reduce severely the performance of Slim Fly networks when using only minimal-path routing. In order to mitigate the performance degradation in these scenarios, Slim Fly networks should configure an oblivious or adaptive non-minimal routing. The non-minimal routing algorithms proposed for Slim Fly usually rely on Valiant's algorithm to select the paths, at the cost of doubling the average path-length, as well as the number of Virtual Channels (VCs) required to prevent deadlocks. Moreover, Valiant may introduce additional inefficiencies when applied to Slim Fly networks, such as the "turn-around problem" that we analyze in this work. With the aim of overcoming these drawbacks, we propose in this paper two variants of the Valiant's algorithm that improve the non-minimal path selection in Slim Fly networks. They are designed to be combined with adaptive routing algorithms that rely on Valiant to select non-minimalpaths, such as UGAL or PAR, which we have adapted to the Slim Fly topology. Through the results from simulation experiments, we show that our proposals improve the network performance and/or reduce the number of required VCs to prevent deadlocks, even in scenarios with adversarial traffic.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.11", "https://htor.inf.ethz.ch/publications/img/yebenes-slim-fly-routing.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5648cf0638ab64dc41dfd6d9fc9003aa071992ac", "sources": [ "DBLP" ], "title": "Improving Non-minimal and Adaptive Routing Algorithms in Slim Fly Networks", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "5654af8f7aa9eee484d3e35e2334215d1b582171": { "authors": [ { "ids": [ "35719711" ], "name": "Devavret Makkar" }, { "ids": [ "1713030" ], "name": "David A. Bader" }, { "ids": [ "1723013" ], "name": "Oded Green" } ], "doi": "10.1109/HiPC.2017.00011", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00011", "entities": [ "Algorithm", "Betweenness centrality", "Centrality", "Closeness centrality", "Clustering coefficient", "Coefficient", "Data structure", "Floor and ceiling functions", "Graph (abstract data type)", "Graph drawing", "Graphics processing unit", "Insertion sort", "List of algorithms", "Pattern matching", "Reference counting", "Social network" ], "id": "5654af8f7aa9eee484d3e35e2334215d1b582171", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "2-12", "journalVolume": "", "outCitations": [ "9bea87d20f109e27f8414b9ed47033bf1a10d2bf", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "110b55a017f52abfedca220036ea129d84b7cadc", "aaaf870dbe949c3cdef816848d7de77bf4aea188", "137433697178a6257b98d39b0af6308c262fb6db", "a15b91e5197ebe643ced790f8de5e8519a7ceba0", "c4e4852b60ea151ec38e0edc75a69ad1e846a44a", "53aa0a7eb6b6225ae2bc3e1e3af9ed5d90cab920", "4e88d42dc5f5efe565d87af3b999c43165e42dce", "42defb3044c3c0666459dd144617adcf0ca8ef85", "03403a66f7aab646a64a43b6f38d5a11638775c5", "7a03e14cb1beecccbc030391703224d7beb62e94", "bb8926e5962246e30ee9a7decc886d570e345113", "c2d13137c73cbc4795ed7dc5be43ef19fe464b12", "0d8a4786cb611d67bbf0d7fc1a64905828fb4121", "9359fa64a59105e93dd6ca9f5aa35e0d9f9055be", "1d70f0d7bd782c65273bc689b6ada8723e52d7a3", "deaf88487bc3c0f24a809c40f88f393543579015", "59ed16a4ac0dbc3a956e77fdc87df0044ba453be", "69927a3b9fc25e655ce662c03deb1e9d2832585c", "30f9068467a0d6e862a3c2c06598f2fd7c50960b", "9c5882ea02390e3ca93d04aeeb4ec440ae17ff50", "db63d47efa261ce1bb1a154e140e4a059f9bb999", "0371f9e3efbcd4829b5ffbff585155746ef05284", "38a65bb82801138bef74b8928c6f3d83f719f241", "66b90618542d8aa36b99357987771b71e2bdc0c5", "0ad8e89091eed09217e66adc98136126addc2619", "141e35263ab810983c90d47ad62eb4fab5e51717", "22a26f40877cbd7ce0fb6c8c94e061332469d071", "3c4194f25bda9d2ebdea8d91e8d7c13a5f8b485a", "3746511ef9ba685f34ceec9a3e94795be5836953", "e2207382768cef76f63a16d91a169078cfdc9b46", "b6b0d2bc9732c3a9cdbc14d5f7f24c65854df1b9", "0e85096efe34ac24f5212ce34434a828cc00e5d2", "6ea8894ef9edf31ed83e925a5650a0a8f0b79b76", "891fe4ab8700b780184b6f5307a4cbe9cfcda8f4", "6b6ae4ff053bcee2834b5e7718810cb5bc15c36c", "31f27864950a6c417cf996927b2d5558f70d2b14", "12809396d9e314df0c8f8e7ec9691bb69571b80d" ], "paperAbstract": "Triangle counting is an important building block for finding key players in a graph. It is an integral part of the popular clustering coefficient analytic and can be used for pattern matching in social networks. A triangle, which is also a 3-clique, represents a strong connection between three players that are all connected. While counting triangles is not overly expensive from a computational standpoint, especially in comparison to centrality metrics (such as betweenness centrality and closeness centrality), it can still prove to be prohibitive for large scale networks, especially for those with a power-law distribution. This problem only deepens for dynamic graphs where the network is constantly changing, requiring constant updating of the graph and the analytic. In this paper, we present a new dynamic graph algorithm for counting triangles that is based on an inclusion-exclusion formulation. While our algorithm is independent of the computing platform, we show performance results on an NVIDIA GPU. Our approach handles 32 million updates per second, or up to 11 million updates per second if the graph data structure is also updated. In past approaches, when a vertex was affected due to an edge insertion or deletion, it was necessary to find the triangles from scratch for that given vertex. Our new formulation does not need this and only requires considering the affected edges. As such our algorithm is typically several hundred times faster than the past approach - in some cases up to 819X faster.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00011" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5654af8f7aa9eee484d3e35e2334215d1b582171", "sources": [ "DBLP" ], "title": "Exact and Parallel Triangle Counting in Dynamic Graphs", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "5696189f777e0765e150af37bc7b35f0cd4e6f44": { "authors": [ { "ids": [ "17794267" ], "name": "Christian Davatz" }, { "ids": [ "1715103" ], "name": "Christian Inzinger" }, { "ids": [ "2383382" ], "name": "Joel Scheuner" }, { "ids": [ "1910406" ], "name": "Philipp Leitner" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Benchmark (computing)", "Blueprint", "Central processing unit", "Cloud computing", "Computer hardware", "Exemplification", "Google Compute Engine", "High- and low-level", "Multitier architecture", "Real life", "Throughput", "Tier 1 network", "Virtual machine", "Web application" ], "id": "5696189f777e0765e150af37bc7b35f0cd4e6f44", "inCitations": [ "8a03185e29cf5d2dc0121228235d84a8becb1bfc" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "534-543", "journalVolume": "", "outCitations": [ "01443342c8af18b22c6a81d48603525636fd27c4", "aca5abff6fb4b965c65d49696e99ed6faf4540ff", "357c28cac5b8ffa1928d834557909ef6d6b9a2a7", "ea3ee5f19ab9ba487dbbf977b646cf1536ab0688", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "8a7e42d121cf6af10829bdfa7cb0aeabf2e991ff", "72c1990b2c992dfb25d48c51c44513c1f99b1baf", "ef87997f141b383fc3cf51cb0c93ff70d31442d7", "63e47eb01f7f7e2263ad823cbd0409d296a719e0", "286af66430ce4656e636336fcaa2eb416d149705", "51b20e97697da40b0416c73337b8ca4dc908b099", "7faf4f740b68798a84ba481aa446340d4da2f301", "9edc3150a1cdf9f827abc98fc6f82a9966bdd290", "9134af8aa7a8896797506c0bfd5d37a8ca3d0232", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "6a878694530cd91328ce249b3c1420746b59d22c", "e31d26f25c60f81696ddefda81ae18ba95b16168", "218822c9573c86299fe58fd4ac489107cabe2346", "953f26165eaf46542ef0d58d9836b6898effcdc4", "57cbf8073681910b1516c74fb7714eedb839303c", "6e0202da0710d31dc0034126f9c3764150163e6e", "9909eda627dda57fdf0e061362b7bf63fdecb65c", "1ec5f20c742850349346605bc175f9ca1b89df80", "5b1a49db2863abd1234bb3081f18d87416caf5bd", "00b1f05ebf0f82b500c21d0be6d2a25244938229", "34ddc3da70f5b17ae0a73266ad1e4f9ae155811f", "2a096d21feed31502bf3cea0831858b5f06a754a", "046225c3b0f209888b6325ee44ac79fa26713a4c", "a0ee258dec2c29955c23e73e3feb5b03a1a5de01", "74cb6530b56e140ea29a57062578448117c1a292", "b0447d4880d2b35c25350fe0a5283afbad82c7f7" ], "paperAbstract": "A challenging problem for users of Infrastructure-as-a-Service (IaaS) clouds is selecting cloud providers, regions, and instance types cost-optimally for a given desired service level. Issues such as hardware heterogeneity, contention, and virtual machine (VM) placement can result in considerably differing performance across supposedly equivalent cloud resources. Existing research on cloud benchmarking helps, but often the focus is on providing low-level microbenchmarks (e.g., CPU or network speed), which are hard to map to concrete business metrics of enterprise cloud applications, such as request throughput of a multi-tier Web application. In this paper, we propose Okta, a general approach for fairly and comprehensively benchmarking the performance and cost of a multi-tier Web application hosted in an IaaS cloud. We exemplify our approach for a case study based on the two-tier AcmeAir application, which we evaluate for 11 real-life deployment configurations on Amazon EC2 and Google Compute Engine. Our results show that for this application, choosing compute-optimized instance types in the Web layer and small bursting instances for the database tier leads to the overall most cost-effective deployments. This result held true for both cloud providers. The least cost-effective configuration in our study provides only about 67% of throughput per US dollar spent. Our case study can serve as a blueprint for future industrial or academic application benchmarking projects.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101184", "https://xleitix.github.io/publication_list/preprints/ccgrid_17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5696189f777e0765e150af37bc7b35f0cd4e6f44", "sources": [ "DBLP" ], "title": "An Approach and Case Study of Cloud Instance Type Selection for Multi-tier Web Applications", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "569b18f23019f0e4e319b0dfca4dc1d1b8213077": { "authors": [ { "ids": [ "2060408" ], "name": "Sergei Shudler" }, { "ids": [ "1835480" ], "name": "Alexandru Calotoiu" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" }, { "ids": [ "1684034" ], "name": "Felix Wolf" } ], "doi": "10.1145/3018743.3018770", "doiUrl": "https://doi.org/10.1145/3018743.3018770", "entities": [ "Computation", "Computational complexity theory", "Experiment", "Parallel computing", "Resource contention", "Scheduling (computing)" ], "id": "569b18f23019f0e4e319b0dfca4dc1d1b8213077", "inCitations": [], "journalName": "", "journalPages": "131-143", "journalVolume": "", "outCitations": [ "0fb659af82f2277c8a62ac888f4bfd01570e5470", "3f1d214b024483242ba6c8ad91a1a887fc050f5b", "0c63bf7cc53a745e18c384cf1de34999c3ffbff7", "2bdeced7ae2dab2a34f9f2b9276ffe15f2aac9dd", "09a9c06700fb72705d797691a07b04e83b4fc111", "a7973a6e2f2a0ed47f56ed56b7cd505847fc754b", "d2378cbfe444ca619aaf1de6e6240df5b2667912", "8e4232e56299b7508ccb947e497864b50f042367", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "1b314ed2c175ad3cfd7ed03b08c8556ab9c2b149", "ef0212e7885fd37caae2b33ace03525b128bbeee", "1eac8c7fb82607a6d20187cfb29b3f9a02d578c2", "4e49452a4bbc4c1cda284e9853b5f3dd185d09bd", "5f89dd1d024766bc1a9f86a6fca0fe5213a09669", "b3f8d6e69302b0ee1b40e01bf65da138f9d0f281", "ac7e5716b47cc2678b70dadd34d27648ceecfb0c", "9bc6f4adc167a0c58c808f6eabcfe86590c7d7ef", "c037edd22215b89c8d2924d4e3c81eb84fdadec7", "0ce898bf3f3e4af56492e9135c7c85e3917e20e8", "4b02f9ff133ed8a4ff80c00ba83a74e167fd86a4", "6a870ca2e39d804b02bb450f81cac62df2ef024a", "10777b463156ec55b4068fe9ab63aa69b54c09af", "2fee9034f208596eefe51cc66acb98a99f6500dd", "8d63b44ea043fc3c2b0ec90b2ffbbf84ba446674", "133c176b649618b1f6bc13ec6783647c87bf9935" ], "paperAbstract": "Task-based programming offers an elegant way to express units of computation and the dependencies among them, making it easier to distribute the computational load evenly across multiple cores. However, this separation of problem decomposition and parallelism requires a sufficiently large input problem to achieve satisfactory efficiency on a given number of cores. Unfortunately, finding a good match between input size and core count usually requires significant experimentation, which is expensive and sometimes even impractical. In this paper, we propose an automated empirical method for finding the isoefficiency function of a task-based program, binding efficiency, core count, and the input size in one analytical expression. This allows the latter two to be adjusted according to given (realistic) efficiency objectives. Moreover, we not only find (i) the actual isoefficiency function but also (ii) the function one would yield if the program execution was free of resource contention and (iii) an upper bound that could only be reached if the program was able to maintain its average parallelism throughout its execution. The difference between the three helps to explain low efficiency, and in particular, it helps to differentiate between resource contention and structural conflicts related to task dependencies or scheduling. The insights gained can be used to co-design programs and shared system resources.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018770" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/569b18f23019f0e4e319b0dfca4dc1d1b8213077", "sources": [ "DBLP" ], "title": "Isoefficiency in Practice: Configuring and Understanding the Performance of Task-based Applications", "venue": "PPOPP", "year": 2017 }, "56ad278ca41d14386d558f259f6a8b98ae6e86d1": { "authors": [ { "ids": [ "39326009" ], "name": "Amro Awad" }, { "ids": [ "2751331" ], "name": "Yipeng Wang" }, { "ids": [ "1824115" ], "name": "Deborah Shands" }, { "ids": [ "1717365" ], "name": "Yan Solihin" } ], "doi": "10.1145/3079856.3080230", "doiUrl": "https://doi.org/10.1145/3079856.3080230", "entities": [ "2.5D", "Cryptography", "Deadlock", "Encryption", "Memory access pattern", "Memory bus", "Oblivious ram", "Overhead projector", "Personally identifiable information", "Random-access memory", "Trusted Computing", "Trusted computing base" ], "id": "56ad278ca41d14386d558f259f6a8b98ae6e86d1", "inCitations": [ "03a7ecb0a3f1eff43ef7db66991ca37c7189d81c", "dba23d346783e3541d90787f54ec1e5a0cf8bcae", "9d0c7a61c47d0db3181408ffdde5f140a5e07c0f", "fcf8efb59680ef79bcca894947aa46578d2bbd8c", "a6994ee043e174871983386d6a78a3f3be6c09da", "21a402631dff504755e281934eaa90bc9dbe8ae9", "26edca5c337b6b6ec4416356f270c35dc074057d", "20f1081cf001f716037e20d9cff147f5ac50632a", "2c74b71b0ef24c20fc959c7bd82fa82097187327", "0d8952e0a65caf480228ede7e632201d5420e7b7" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "107-119", "journalVolume": "", "outCitations": [ "15e14b815b9ff01878b52e6286419d659102d796", "a52945840b980adfef34466cb4186c7cda3b61e6", "0a679d9d08231b2856fe648e6b331d8e6e46a1fa", "9e94ebc6bf426238d356ebec962deeed8f372fe5", "b13ea783a9090fba3bc345b0ed595b39c0bf7281", "2261da4f4c76139b149a76df48d34e432eb45f62", "5edf57085d9a5df85c40031c7baab3b4eb5b345b", "8a41c198449d0f30de5427fe753c6b10bbb7255d", "487e6a85d55c3adbffcd3ce8032b150e90a25bf0", "07b0b5d59ef09f33a40f30d3a2dec880029a5002", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "3da14037fc6e2c3dee2d6808bc2d7e933325d054", "9687ea801aeb7cbeef3be3202d68cc9d780e02dc", "02e965debeaf59e6f93adede60d7e39004e77fcc", "9b27ef50a3039c0be52ea58af042b4b7b99d710f", "3a30cf86b5c625e0b3c91490a867c43bf697ae27", "08edbbc346b63098aafa9b052c1eedec34497721", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "45fc1509d52e0baa512561a5ab0da5fd01a225e3", "0a7d54b4b43a9f2057b323c9e60be5e40cbaf26c", "46d1f2d7a8719081f9369661a9a864b6dd105e55", "5e7a7259528f032ae282347ff43a61c82bab5db1", "199b842e50ac2dccef63c7b84e254e3d1779b786", "d9706ba11e5af14d92a1f673f412f0765c082df9", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "07272e31fb957e026a6bc36d55e412de26843c7f", "8f1247646e29e07dddbec698f281d06cee87acbe", "b2ca498540a6001dd23146d9c8805839f2a5f557", "50a7c22bfdc116cad796e090ca918549d81a7011", "6ecb2b0fd3dc0d0e5f522f424663bb7900061a76", "971fa79b8c4e34bba2e99572e277d304fda47e0c", "07aef8af38ac0ed0f60238d205baf5ed6be5cc8b", "99879ad40118f9138c47ea2c76125084bcf1ef62", "2835808d700c88459ff21ce31ba3c4ef02778ddb", "20b63210954f7c5a70664f301dcd7196856ccfa7", "03a7ecb0a3f1eff43ef7db66991ca37c7189d81c" ], "paperAbstract": "Trustworthy software requires strong privacy and security guarantees from a secure trust base in hardware. While chipmakers provide hardware support for basic security and privacy primitives such as enclaves and memory encryption. these primitives do not address hiding of the memory access pattern, information about which may enable attacks on the system or reveal characteristics of sensitive user data. State-of-the-art approaches to protecting the access pattern are largely based on Oblivious RAM (ORAM). Unfortunately, current ORAM implementations suffer from very significant practicality and overhead concerns, including roughly an order of magnitude slowdown, more than 100% memory capacity overheads, and the potential for system deadlock.\n Memory technology trends are moving towards 3D and 2.5D integration, enabling significant logic capabilities and sophisticated memory interfaces. Leveraging the trends, we propose a new approach to access pattern obfuscation, called ObfusMem. ObfusMem adds the memory to the trusted computing base and incorporates cryptographic engines within the memory. ObfusMem encrypts commands and addresses on the memory bus, hence the access pattern is cryptographically obfuscated from external observers. Our evaluation shows that ObfusMem incurs an overhead of 10.9% on average, which is about an order of magnitude faster than ORAM implementations. Furthermore, ObfusMem does not incur capacity overheads and does not amplify writes. We analyze and compare the security protections provided by ObfusMem and ORAM, and highlight their differences.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080230" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/56ad278ca41d14386d558f259f6a8b98ae6e86d1", "sources": [ "DBLP" ], "title": "ObfusMem: A low-overhead access obfuscation for trusted memories", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "56e8215d410f575561778299765eab5227a9bfd6": { "authors": [ { "ids": [ "1914477" ], "name": "Jianyu Huang" }, { "ids": [ "39967103" ], "name": "Leslie Rice" }, { "ids": [ "1849855" ], "name": "Devin A. Matthews" }, { "ids": [ "9151878" ], "name": "Robert A. van de Geijn" } ], "doi": "10.1109/IPDPS.2017.56", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.56", "entities": [ "Algorithm", "BLAS", "Code generation (compiler)", "Coppersmith\u2013Winograd algorithm", "Data parallelism", "Fast multipole method", "Library", "Matrix multiplication", "Multi-core processor", "Parallel computing", "Strassen algorithm", "Task parallelism", "The Matrix", "Workspace" ], "id": "56e8215d410f575561778299765eab5227a9bfd6", "inCitations": [ "73c2882e65286f209521f75ab4410c22e220e564", "1cc406e388203f841adfd712df0be059edda274e", "2ef3fb64a368db97e0da4b670cf3fab95b895fde" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "656-667", "journalVolume": "", "outCitations": [ "74d231ca09c7106bdbf7e1ff2852fbbc7bd96c67", "3a953ae2b531db881357fa7066b0b17a35708e3f", "012d5e0c031f2fa6db6b77dbc29cabbd95ac2854", "a0d3306999eacc7fab93955eb1223eef10312708", "40fb5ea197206082b0b77f388c57bca79536c877", "2493744d70261082eb6eafd4b13f14e8a8f8eb20", "0c3bde4b6d8c763c0ec4083c9cda059eff87308e", "70e8a6ff0cdad8a798ae552c6259a1fce06ea717", "3ac8d40c90e52c96c31e2ab6a485eb6a06070f9f", "114f158f3b7b37614d5d83efe33c1e73c051c7c1", "d01e00939c1773366237e744ff0047fc55a53453", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "34eb32537b3f9dacbbd1567e1ce620c66e51d3c6", "05dcdb5f4876b07c20c2c46df156c248f6779a11", "9e253ee24f32eb6dfb918156adaa45622aad88b9", "3269b04305dd2bd4ecbb2daea4429eeb523cc164", "8eaa45df0a85bf7fda455cf7f1699cdfe0de1288", "6b2a23349099f95c1c4850bb1d4731612a7046e6", "355e35184d084abc712c5bfcceafc0fdfe78ceef" ], "paperAbstract": "Matrix multiplication (GEMM) is a core operation to numerous scientific applications. Traditional implementations of Strassen-like fast matrix multiplication (FMM) algorithms often do not perform well except for very large matrix sizes, due to the increased cost of memory movement, which is particularly noticeable for non-square matrices. Such implementations also require considerable workspace and modifications to the standard BLAS interface. We propose a code generator framework to automatically implement a large family of FMM algorithms suitable for multiplications of arbitrary matrix sizes and shapes. By representing FMM with a triple of matrices [U, V, W] that capture the linear combinations of submatrices that are formed, we can use the Kronecker product to define a multi-level representation of Strassen-like algorithms. Incorporating the matrix additions that must be performed for Strassen-like algorithms into the inherent packing and micro-kernel operations inside GEMM avoids extra workspace and reduces the cost of memory movement. Adopting the same loop structures as high-performance GEMM implementations allows parallelization of all FMM algorithms with simple but efficient data parallelism without the overhead of task parallelism. We present a simple performance model for general FMM algorithms and compare actual performance of 20+ FMM algorithms to modeled predictions. Our implementations demonstrate a performance benefit over conventional GEMM on single core and multi-core systems. This study shows that Strassen-like fast matrix multiplication can be incorporated into libraries for practical use.", "pdfUrls": [ "http://www.cs.utexas.edu/~jianyu/presentations/fmm_ipdps17.pdf", "https://arxiv.org/pdf/1611.01120v1.pdf", "http://www.cs.utexas.edu/~jianyu/papers/ipdps17.pdf", "https://doi.org/10.1109/IPDPS.2017.56", "https://apps.cs.utexas.edu/apps/sites/default/files/tech_reports/FMM.pdf", "http://arxiv.org/abs/1611.01120" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/56e8215d410f575561778299765eab5227a9bfd6", "sources": [ "DBLP" ], "title": "Generating Families of Practical Fast Matrix Multiplication Algorithms", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7": { "authors": [ { "ids": [ "1799006" ], "name": "Sanjib Sur" }, { "ids": [ "2285687" ], "name": "Ioannis Pefkianakis" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" }, { "ids": [ "5445008" ], "name": "Kyu-Han Kim" } ], "doi": "10.1145/3117811.3117817", "doiUrl": "https://doi.org/10.1145/3117811.3117817", "entities": [ "Algorithm", "Chipset", "Data rate units", "Electron mobility", "Experiment", "Gigabit", "PHY (chip)", "Telephone exchange", "Throughput" ], "id": "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "inCitations": [ "9905d9e816d7106bed6496eb8a3ad90947342afe", "bbb3d731a5127e473e5fe6020ad27bbfcd189d8b", "4c4795737d7f11088b5ec0d304d0a3f93daefc39", "a9657972ee88ac3480ced7ed36fd04b2f50676ae", "73f615dc7f8162998016e7d990872087040afd96", "c4810a223438a7d5f6cdd915b210b44873637943" ], "journalName": "", "journalPages": "28-41", "journalVolume": "", "outCitations": [ "f627c6fd6f550c69d97220fc377c15e37699baf8", "6f07a10dfbd583fdda034c7d606e53148f162f2d", "41318936e9bfd1530e008fa658b55198a6fc1270", "58392cd42505bf2bc0675610188f6465bc20fd6f", "e0908cba947f691cf1d0aca7a62d0144ee50ad4f", "f87aa8b1087f11606cff27da4d60852d6671993a", "85fdc9788c9353a95831939c3954e181e92616fe", "9823f8c8c43b64cc6c0c7fd09e9380d908122148", "0cd493a73a827f6be241239017b3eaa2d995d2a2", "c6bad18587cfbf62be967c11e6d9887fec3fe479", "2fca6867babd34da9f04c26d20c4915e4bcfe8cb", "a115ef0244b2add4f528bbc994c2a779a5a9a185", "d7b2cf873d0c4841887e82a2be0b54013b34080d", "9905d9e816d7106bed6496eb8a3ad90947342afe", "0ae6b77e569d0810253aeb9835c0b40d759491a8", "0f7936242a8f35eb0b751582f57be317a9d84696", "460464955cee59f610c94c9360cad879edb5d880", "3e364e301f026a197fde0608481dfa2c09e85b7b", "1943466070019e48204ebbee0914d87ced4ba09a", "17dbe14d1346fde77ab08de9f50081ddf538a1ff", "f57981c371b595ee4a47202b4ed75ef2ec417f63", "b5b68e61a21470c2f22c58f4aa19a7bf3882079d", "244626ceea2c1a814d11f1fcffcddcea9001d77a", "0969bae35536395aff521f6fbcd9d5ff379664e3", "ccf61e9d678481813da6f0505a2f9c539a28ac01", "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "21c039e563ec0ca023a5b9c729e92a2fd611946a", "0e4c008bf6de673bf4a8246fe09b086fb1b37609", "7c800e733b04c2fb5842d06b11bb55c0e13b0e07", "05a61120569176a2143eeb2dd618f28dcda025ae", "1bc04cbbce54bc027b6147eb0a49189a2691a35c", "671ef43e50af2bd00cb91b4aad6815c1b95083dc", "08e6f96da8e44d6529d29fb2087f5bbf5684404d", "534ee575a6b0c37e03d1dddb92493b57e9271298", "1ac257a741e3ddf53d20b3ff04dd01f9eb998928", "1362121a6fd16a78182c23a051bf5c275fa27888", "47240e17ac8fa393ec6e2db2dac68454e96c8495", "4a2c2855f30f07ff8877305c2a5993d4e51056cd", "8ffc5266432aff1dacc96b69c11234109288bfc8", "e7f0c3581981c8ea008c68b39cacb2df4daeb8dd", "81ed14364300805954f948abd7f2df397df233bf", "a89f8d87b84f00a949c7250713aeecb1e307ca37", "db39d93535b8d822b3a07d2a7f9c0d916847f30f" ], "paperAbstract": "Despite years of innovative research and development, gigabit-speed 60 GHz wireless networks are still not mainstream. The main concern for network operators and vendors is the unfavorable propagation characteristics due to short wavelength and high directionality, which renders the 60 GHz links highly vulnerable to blockage and mobility. However, the advent of multi-band chipsets opens the possibility of leveraging the more robust WiFi technology to assist 60 GHz in order to provide seamless, Gbps connectivity. In this paper, we design and implement MUST, an IEEE 802.11-compliant system that provides seamless, high-speed connectivity over multi-band 60 GHz and WiFi devices. MUST has two key design components: (1) a WiFi-assisted 60 GHz link adaptation algorithm, which can instantaneously predict the best beam and PHY rate setting, with zero probing overhead; and (2) a proactive blockage detection and switching algorithm which can re-direct ongoing user traffic to the robust interface within sub-10 ms latency. Our experiments with off-the-shelf 802.11 hardware show that MUST can achieve 25-60% throughput gain over state-of-the-art solutions, while bringing almost 2 orders of magnitude cross-band switching latency improvement.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117817", "http://xyzhang.ucsd.edu/papers/SSur_MobiCom17_MUST.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "sources": [ "DBLP" ], "title": "WiFi-Assisted 60 GHz Wireless Networks", "venue": "MobiCom", "year": 2017 }, "570552045c4d26d37e87aaa35fe79f8b412974f8": { "authors": [ { "ids": [ "3139121" ], "name": "Yinzhi Cao" }, { "ids": [ "24020529" ], "name": "Zhanhao Chen" }, { "ids": [ "1806013" ], "name": "Song Li" }, { "ids": [ "24066743" ], "name": "Shujiang Wu" } ], "doi": "10.1145/3133956.3133996", "doiUrl": "https://doi.org/10.1145/3133956.3133996", "entities": [ "JavaScript", "Privacy", "Reference frame (video)", "Tor Messenger" ], "id": "570552045c4d26d37e87aaa35fe79f8b412974f8", "inCitations": [], "journalName": "", "journalPages": "163-178", "journalVolume": "", "outCitations": [ "2f96da380abcf9e549612bc811025aeb4b5c5360", "164a7c8521c0e2734ebda97a5b7eecc202e03795", "51865dda5a27e14627b15979e8546bf261f089f0", "058da9f371ed8a33b4186979bc431555e0eab6f2", "8796f9d1eb4ffa8a3f752bfac8db053487eb3120", "25e8930dd98a3d9cff1c7154b2874148da597724", "8a0af8ae748210ef571d074362b552af571e6d33", "05a618847e4f08e5bca29dff732757779722b2e0", "2d4ef2f1ceeaba4acc46dec6c48dc18deb9ddb5f", "05a14a4aa20fb48f1278f4b04173bf34b44b6c73", "7a800db534a612575217ead87f4c1a8acce022af", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "66a6b8b5086454d2f511089ed3c157075239eb7d", "56238020ce7c21d572f340d694f4b16478fad71a", "1f38c11fe8511c77fb7d383126214c9e7dc28e4a", "38720bc0d2c36c156bd2be9b472be6f83169e41a", "02179081a241710264019bea393cb25cfd5c663d", "834cdbde6e7800f9f50d4884858bb093fc3b65f6", "3a2047c0532ca0ae45e404ff659b313287737809", "624f889ee119c31062eaaea60cf723f4c7863177", "03f02ace66cd558fa4dd465bc988edbe6cee8d5e", "9837a70c231c0ef3d33c2c9f5b56afd40548acce", "0584a08bf9932d557a564e77647e659ae7f8d981", "fd67e410437ae239dcc9beaddaf1dc80fddb6461", "0d939c3826455ca42310a92d5c00a956c4630b0e", "24cba0ea8970cba2a4bb750347fb59b82d028126", "192dc9e8618d00beb8451553d59dd391bcf53124", "9a2934caacf51e28030b9c60cfd4671ddeb4128e", "3188dc28042effbd519005ec18c07e7afa51c975", "5a70de39295c09dd4ee05b94e791ed79b4b00c84", "857348ee328c6870a812bf36179460306697ee6e", "3d8775945f7c62b2bca55b7097fde9427b0363bd", "3a6db9085a191b3f73c1ef0eeea977297a03f8ea", "959cfe05045e1c7e80406209244d3346061ca4e6", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "2c87990d979e3744078c2af95b8cabba0be42313", "400078e734d068b8986136ca9e3bbaa01c2bd52b", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "d7ae2ea551e43f22bd525f999c7b6ca0e0c0f23e", "323ab3462f3f6621cdf3b792cb82edb9edaa80cb", "695d602b2e0e358c43e1ffd5cbf994e4453c2c20", "182a81eaf31b1a76be592c0890182cacd4199be0" ], "paperAbstract": "Timing attacks have been a continuous threat to users' privacy in modern browsers. To mitigate such attacks, existing approaches, such as Tor Browser and Fermata, add jitters to the browser clock so that an attacker cannot accurately measure an event. However, such defenses only raise the bar for an attacker but do not fundamentally mitigate timing attacks, i.e., it just takes longer than previous to launch a timing attack. In this paper, we propose a novel approach, called deterministic browser, which can provably prevent timing attacks in modern browsers. Borrowing from Physics, we introduce several concepts, such as an observer and a reference frame. Specifically, a snippet of JavaScript, i.e., an observer in JavaScript reference frame, will always obtain the same, fixed timing information so that timing attacks are prevented; at contrast, a user, i.e., an oracle observer, will perceive the JavaScript differently and do not experience the performance slowdown. We have implemented a prototype called DeterFox and our evaluation shows that the prototype can defend against browser-related timing attacks.", "pdfUrls": [ "https://arxiv.org/pdf/1708.06774v1.pdf", "http://www.yinzhicao.org/deterfox/deterfox.pdf", "http://doi.acm.org/10.1145/3133956.3133996", "http://arxiv.org/abs/1708.06774" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/570552045c4d26d37e87aaa35fe79f8b412974f8", "sources": [ "DBLP" ], "title": "Deterministic Browser", "venue": "CCS", "year": 2017 }, "5716db825bbd2c39836a2d6fa22e7f313fc12ccf": { "authors": [ { "ids": [ "2898845" ], "name": "Nachshon Cohen" }, { "ids": [ "35238443" ], "name": "Michal Friedman" }, { "ids": [ "1752633" ], "name": "James R. Larus" } ], "doi": "10.1145/3133891", "doiUrl": "https://doi.org/10.1145/3133891", "entities": [ "Access control", "Cache coherence", "Central processing unit", "Computation", "Hard disk drive", "Hash table", "Non-volatile memory", "Out-of-order execution", "Persistence (computer science)", "Random-access memory", "Resistive random-access memory", "Volatile memory" ], "id": "5716db825bbd2c39836a2d6fa22e7f313fc12ccf", "inCitations": [ "cb2a018979184f87692d423322e367cc42a215d2", "d6fac995a609ddf90f02fcc05a6b33f572ca18fd" ], "journalName": "PACMPL", "journalPages": "67:1-67:24", "journalVolume": "1", "outCitations": [ "05a1357946de5eca42a477b7b268db4944219a2e", "1c743b19515158ceb96422e1d8e94a0275eaa04d", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "24724ad8962a9e04eb496fddaefe9708f6960601", "23773ffc679a8d9ebfd73810dec3e6fe6aa278ab", "fd840d5275cac98d64e7778a1b9173b937a77386", "42512431ca7fffdbc80eb7280d093efcead3d48d", "71c2deb5c3b4b0fd1ed68bdda534ec7ea76e845b", "578667cbc39c6bfc1c89fe6a54506643c3b097f8", "47b851237f240831abee3971bca6bb8d2a121eb1", "4f6fbe4484487e3983f673ff55bdec92f947311c", "3ad32bb609eb1190a6c98ed63b1aeef0b96301b0", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "42c70d64890726f60556caf3eec3f06e85642dd9", "57c823b3b07b98233394bf15cfbbaed6a84809df", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "05bd926844ffa89f668237a6836825c59d6377e9", "b8735a449f0a1f1889c6b744061360aa85afaa6b", "2c50094e7e5e1134033efc6565c8d7c21a04d2d9", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "3ede1909bf70d6e4bca46302f474083517b081a3" ], "paperAbstract": "Non-volatile memory technologies such as PCM, ReRAM and STT-RAM allow data to be saved to persistent storage significantly faster than hard drives or SSDs. Many of the use cases for non-volatile memory requires persistent logging since it enables a set of operations to execute in an atomic manner. However, a logging protocol must handle reordering, which causes a write to reach the non-volatile memory before a previous write operation. \n In this paper, we show that reordering results from two parts of the system: the out-of-order execution in the CPU and the cache coherence protocol. By carefully considering the properties of these reorderings, we present a logging protocol that requires only one round trip to non-volatile memory while avoiding expensive computations, thus increasing performance. We also show how the logging protocol can be extended to building a durable set (hash map) that also requires a single round trip to non-volatile memory for inserting, updating, or deleting operations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133891", "http://arxiv.org/abs/1709.02610", "https://infoscience.epfl.ch/record/231400/files/oopsla17main-oopsla76-p-bdc7425-34082-final.pdf", "https://arxiv.org/pdf/1709.02610v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5716db825bbd2c39836a2d6fa22e7f313fc12ccf", "sources": [ "DBLP" ], "title": "Efficient logging in non-volatile memory by exploiting coherency protocols", "venue": "PACMPL", "year": 2017 }, "571abb9d6a54c21107ee24bbb905227104477908": { "authors": [ { "ids": [ "1779599" ], "name": "Thomas D. Dickerson" }, { "ids": [ "2725205" ], "name": "Paul Gazzillo" }, { "ids": [ "1744502" ], "name": "Maurice Herlihy" }, { "ids": [ "33681628" ], "name": "Eric Koskinen" } ], "doi": "10.1145/3087801.3087835", "doiUrl": "https://doi.org/10.1145/3087801.3087835", "entities": [ "Computer architecture", "Concurrency (computer science)", "Concurrency control", "Cryptocurrency", "Design by contract", "Ethereum", "Java virtual machine", "Multi-core processor", "Programming language", "Serializability", "Smart contract", "Software transactional memory", "Speedup", "Throughput", "Transactional memory", "Validator" ], "id": "571abb9d6a54c21107ee24bbb905227104477908", "inCitations": [ "445b058cc0c12a4f0428a68de9b9d31f9dbdafeb", "4b99fbe18fe4a8cd1d797ed073fb92fb71bd2dcf", "3620f75bee161d630dcf493785589595e524128f" ], "journalName": "", "journalPages": "303-312", "journalVolume": "", "outCitations": [ "0b704e724b2ae86d11afe6def7e92f30b2756c06", "33918269fc2cfc235ae68ef11934b9dc375eaa39", "13f7c5807452ae602046582a385c0fb544ec5de1", "8665bf55084c825477cc9a6a64a0150a8d3850f7", "52210124ac84b31b855f481b25c6ac5e80afab97", "45b52b55345b0c703d3dddebfc17d0e9844d109c", "3fad56eb0379f9684af608bd6c9ad4de706b4cad", "12324f77ec6d16da5d608447e60e874c4262fddc", "95344b4923c9e8fb126ef784244e3d24b555eba9", "1542f3fe7bf34c7cff7c747f59bdbbae777c90cd", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "12df6611d9fff192fa09e1da60310d7485190c1c", "cba77292e7f1f271fff1bd28238728f4f18dd13e", "20f5f8733134d87041b95b742d613051a1fb3fdb", "b5605f01a75234cdeb7213f31f7de24418553f1b", "6bc121a0ab85cb2c9854862dd5c0a5ba3ee9e7a7" ], "paperAbstract": "Modern cryptocurrency systems, such as Ethereum, permit complex financial transactions through scripts called smart contracts. These smart contracts are executed many, many times, always without real concurrency. First, all smart contracts are serially executed by miners before appending them to the blockchain. Later, those contracts are serially re-executed by validators to verify that the smart contracts were executed correctly by miners. Serial execution limits system throughput and fails to exploit today\u2019s concurrent multicore and cluster architectures. Nevertheless, serial execution appears to be required: contracts share state, and contract programming languages have a serial semantics. This paper presents a novel way to permit miners and validators to execute smart contracts in parallel, based on techniques adapted from software transactional memory. Miners execute smart contracts speculatively in parallel, allowing non-conflicting contracts to proceed concurrently, and \u201cdiscovering\u201d a serializable concurrent schedule for a block\u2019s transactions, This schedule is captured and encoded as a deterministic fork-join program used by validators to re-execute the miner\u2019s parallel schedule deterministically but concurrently. Smart contract benchmarks run on a JVM with ScalaSTM show that a speedup of 1.33x can be obtained for miners and 1.69x for validators with just three concurrent threads.", "pdfUrls": [ "http://people.cs.georgetown.edu/~clay/classes/fall2017/835/papers/Adding_Concurrency_to_Smart_Contracts.pdf", "https://www.paulgazzillo.com/papers/podc17a.pdf", "http://doi.acm.org/10.1145/3087801.3087835", "https://arxiv.org/pdf/1702.04467v1.pdf", "http://arxiv.org/abs/1702.04467" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/571a/bb9d6a54c21107ee24bbb905227104477908.pdf", "s2Url": "https://semanticscholar.org/paper/571abb9d6a54c21107ee24bbb905227104477908", "sources": [ "DBLP" ], "title": "Adding Concurrency to Smart Contracts", "venue": "PODC", "year": 2017 }, "571bd1039ea974b255a6b61277a639ece79bedfb": { "authors": [ { "ids": [ "17273699" ], "name": "William C. Anderton" }, { "ids": [ "1832296" ], "name": "Maxwell Young" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Backoff", "Exponential backoff", "Job shop scheduling", "Makespan", "Manifest (transportation)", "Time complexity" ], "id": "571bd1039ea974b255a6b61277a639ece79bedfb", "inCitations": [], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1705.09271", "outCitations": [ "84a0d1692be7c4905c6de0ec4f15730d7f128f8a", "bcd5976c40ff33317b2147139dd5d1298ae3e413", "94bba2acd48dd3a3d72878b76dab6a045635b975", "2e10ed5df7f6ec6319b749e606112d8c0ed57f89", "ccb4e5f9fc72ce780b3bfdc1b30e37d541550f87", "4fba954fb028a9caf0a5eb80721c63642dd5dd28", "177b2fa8eaecdd5c3bb82f5f8bee4e46038844e0", "70ad722959cc92efc2acc15fd99f301fc8576a6e", "175e0bc956eecd3d907678358db6ed27b5462d58", "344ab639e6e7cb662976d30f3bb534c6aa54e57c", "d38e16fb29c7d518bc5df6f6c7bd47f4c28f2cb2", "6558ff44a6dea4b1366b203bd0157afe6eb69f15", "28230e9101989a27935b8b82aa1e676bef00e3fc", "64777119ef11d332d73441bdf3b88c1b9fad5315", "577c157be5b6a7d6a86a005a976e05d41a4fc7bf", "866189f6ff951c0e4b2ddcdf328692a74d678ef5", "766c870971c7600acc5b0d2d564df0af76158fc8", "435ff63c9c2e04567c004aaa0d994ef842793b6c", "438a58ae8ac7c3e334c472c574fcb0e9c5c73b08", "19b17ed55736466e0e14104372ba516049e7e7a4", "0a47fae7c4571958c42b6185430beeb3309889bd", "39f873101a5a05400b731f9e2fae6809e0aff9cd", "920726915b90f174952214108560a2d8f051585b", "716d96c52b3436da1434c9a95bd05c5733fc2bd0", "4d616fccd4862830be315f1288887b57592a7d28", "b50e429252a5c3135977000c67f977ba222a8c59", "8b1b501cd41b1e7d3cd137380af211ea935cfc5c", "ad3da293f6b9e041944d239ffcb8d74a1cccdaf8", "0684aaaa26724590981b5f37a993b856241b2ec8", "1e45719efcccc817d382041b830c1f5052f1c36b", "3927f8cc4d11bbd85810738ad053b5452749f852", "9038d2568350bd96cbc95889b24e9c4dd5cf6a0f", "2232e2b58a991f965ac5ecb71595509da275889c", "78f05033668099bd02bc354f22d26d5817cd0a4a", "772b329cd8e3ab91e6ac84233acb497d96dbdf13", "6bc4b63c48e7d631fb854b816f2ae6fcdb520df7", "8e1d37454986391ab80507d4a773fa990edf1fb9", "2fa3d688ef5fe3ddc810a2c40b5c835e5192a123", "4dd45e56e370f38907ee6f800f4b195b4fa4e04a", "7ab3c04d5fea8952b54621b4dd002d1176d9e096", "1e74398dda447da5db67b121b7373297f3557a48", "d3ff1eb5df6879d79a4a3e020a1241540956600d", "43874188d2deab7ad218abb4f33acf2a4119471d", "35657c6f6882b2a90caa9362c91d381dbf53d313", "15cc84888f35742c760413c489bf5818ba194256", "039db2ee81b2b267b8a5366dd1bfa21650d75117", "30c6b618a3109807c24d1e648bdfde113dd33f93", "39a9979ba813c6ec8eb53eb415e067686effcc33", "a38dd03398abeb7cf01353d2e70250a90402be63", "7b1f7f38c4f10c8ab48b9b6c7889c431ae7ca910", "6c6ed6e7aca2d7a3bfec56e16504a295efe9e523", "4f313902ddda555e3393eac1d64695a044b6abfd", "49efeb06b2cd2f4adc05867ec7dabbf572bb39df", "29bae5b2d889745c752349df1ea4741df071c525", "82ec1f9412c5406371ea87be88c93e7e303414d8", "03193d30be0721877b5e0bee31d16d0068544c6f", "31b252d04d9a833620cf6881b1641f896b777e25", "fa19062e54f4854fd3ed414b01047e46f3228005", "2078a884a384236435b9368cf2b39089f5caf2ee", "9464e0aa498a7d6ea10d9bf918fb9ede1a139b85", "1fe583fd534786b07c4fb12b274511033c362066", "276b961f6b57e42bd03da6d055fa6c4ef25fd5b7", "4d457343adaa603f64fe78e74e58d7192e5ee42c", "2aafa59a83478d0a02932ce861843e1067dd37be", "10493e0654d3ddf61bae0aeef5f0702f73aa186d", "b95634cbd448d51eb08c617535ecc30242442133", "7e85209acda1190826e47e34bc7c87629ba5778c", "5da170d57ea3357c98e7fd6e8313cbe4711f3d43", "2baa9f516ab8a91929af5ea3bcddc310b6fe577c" ], "paperAbstract": "Randomized binary exponential backoff (BEB) is a popular algorithm for coordinating access to a shared channel. With an operational history exceeding four decades, BEB is currently an important component of several wireless standards. Despite this track record, prior theoretical results indicate that under bursty traffic (1) BEB yields poor makespan and (2) superior algorithms are possible. To date, the degree to which these findings manifest in practice has not been resolved. To address this issue, we examine one of the strongest cases against BEB: n packets that simultaneously begin contending for the wireless channel. Using Network Simulator 3, we compare against more recent algorithms that are inspired by BEB, but whose makespan guarantees are superior. Surprisingly, we discover that these newer algorithms significantly underperform. Through further investigation, we identify as the culprit a flawed but common abstraction regarding the cost of collisions. Our experimental results are complemented by analytical arguments that the number of collisions \u2013 and not solely makespan \u2013 is an important metric to optimize. We believe that these findings have implications for the design of contention-resolution algorithms.", "pdfUrls": [ "https://arxiv.org/pdf/1705.09271v2.pdf", "http://arxiv.org/abs/1705.09271", "https://arxiv.org/pdf/1705.09271v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8c79/b5f079f30d312a3667395994c06da7b8e60b.pdf", "s2Url": "https://semanticscholar.org/paper/571bd1039ea974b255a6b61277a639ece79bedfb", "sources": [ "DBLP" ], "title": "Is Our Model for Contention Resolution Wrong?", "venue": "ArXiv", "year": 2017 }, "572a53b39ff72ceb471867fc8a3b9c6326ea3a4c": { "authors": [ { "ids": [ "32326766" ], "name": "Dear Sungbok Shin" }, { "ids": [ "1952581" ], "name": "Minsuk Choi" }, { "ids": [ "40007192" ], "name": "Jinho Choi" }, { "ids": [ "2255938" ], "name": "Scott Langevin" }, { "ids": [ "39229386" ], "name": "Christopher Bethune" }, { "ids": [ "39848947" ], "name": "Philippe Horne" }, { "ids": [ "3207968" ], "name": "Nathan Kronenfeld" }, { "ids": [ "34058303" ], "name": "Ramakrishnan Kannan" }, { "ids": [ "1731900" ], "name": "Barry L. Drake" }, { "ids": [ "1685928" ], "name": "Haesun Park" }, { "ids": [ "1795455" ], "name": "Jaegul Choo" } ], "doi": "10.1109/ICDM.2017.53", "doiUrl": "https://doi.org/10.1109/ICDM.2017.53", "entities": [ "Geo warping", "Non-negative matrix factorization", "Parallel computing", "Regular expression", "Social media", "Topic model" ], "id": "572a53b39ff72ceb471867fc8a3b9c6326ea3a4c", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "435-444", "journalVolume": "", "outCitations": [ "fe803c7835469e5cb4057ed4de6fd995f441433e", "6fc8f268815bd58a4f2f4d28ec844b1f032e4f4e", "0ff0d9894468d6d831339403e52b96ce3bcb2e5a", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "182dae04254e79a8cd6acee81114a686c8ca1cec", "08a591b1b30a2f3b1c94d33387e9cd515e95c186", "ea278ac1bcca397b00853076cfabc79c4379c28d", "7a278ee0578f194700cadc3811cdda4ec751f88a", "13cf6e3658598e92a24feb439e532894e4aa68e3", "096db7e8d2b209fb6dca9c7495ac84405c40e507", "16d4ced27bc6586c1af63ce276214a566512aa56", "460db53aa7b8b2464e6d03b6e322c74e0a289e8d", "4c0d2d4269895eb367d7b0d38d9de3e99bf3f3ae", "cb0c47f3c21f97637121ce939279433e8d88c821", "a13996d04b6794e9682893b333c4d6780ec18d8b", "2115bc020be0ade76fc2424df90ddb5b8c9a7f58", "2e20ed644e7d6e04dd7ab70084f1bf28f93f75e9", "a44e3f9a76ffa530bd87a6eb7c72025b8ebe6e71", "8479a404b73afd6a61d8a872086d9e7d6d2bdf30", "39a93f7ea3e4a2fa0d46f045472f3acded81f094", "58b0dc8c3d11e43763b232e990c1bdf9030c7c12", "46e855bf250355d8366aa6957740d820c310ef25", "185cfdba33ac86ccfe9e9a677db1c80264743c21", "1648f2183bd1b7634b4d51dafefad791f780a473" ], "paperAbstract": "Understanding newly emerging events or topics associated with a particular region of a given day can provide deep insight on the critical events occurring in highly evolving metropolitan cities. We propose herein a novel topic modeling approach on text documents with spatio-temporal information (e.g., when and where a document was published) such as location-based social media data to discover prevalent topics or newly emerging events with respect to an area and a time point. We consider a map view composed of regular grids or tiles with each showing topic keywords from documents of the corresponding region. To this end, we present a tilebased spatio-temporally exclusive topic modeling approach called STExNMF, based on a novel nonnegative matrix factorization (NMF) technique. STExNMF mainly works based on the two following stages: (1) first running a standard NMF of each tile to obtain general topics of the tile and (2) running a spatiotemporally exclusive NMF on a weighted residual matrix. These topics likely reveal information on newly emerging events or topics of interest within a region. We demonstrate the advantages of our approach using the geo-tagged Twitter data of New York City. We also provide quantitative comparisons in terms of the topic quality, spatio-temporal exclusiveness, topic variation, and qualitative evaluations of our method using several usage scenarios. In addition, we present a fast topic modeling technique of our model by leveraging parallel computing.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.53", "https://www.cc.gatech.edu/~hpark/papers/STExNMF2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/572a53b39ff72ceb471867fc8a3b9c6326ea3a4c", "sources": [ "DBLP" ], "title": "STExNMF: Spatio-Temporally Exclusive Topic Discovery for Anomalous Event Detection", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "5748bbf7b2f8763aa620fae03e573cb3090b34b0": { "authors": [ { "ids": [ "2114364" ], "name": "Atish Kathpal" }, { "ids": [ "2449395" ], "name": "Priya Sehgal" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Cassandra", "Backup", "Backup and Restore", "Database", "Database dump", "Eventual consistency", "MongoDB", "NoSQL", "Oblivious transfer", "Quiesce", "Snapshot (computer storage)", "Storage area network", "Time consistency" ], "id": "5748bbf7b2f8763aa620fae03e573cb3090b34b0", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "638c917d981915bc7a00bb0941cdd38111df51de", "7afa08d7c1c6c8758ee1227437c69463d5441d09", "0cb9e2cee074684b04ad7567cbcdf1bc83ef7645", "26f4fe14dae716349f150f6ec8058c546cbc5d28", "9aa0d7253574e50fe3a190ccd924433f048997dd", "6b624ae22cc548814f3dd3eb9ef09d98e523d9d8", "1d99b7749a9311d2db24a3d84728e444eff23e4b" ], "paperAbstract": "While NoSQL databases are gaining popularity for business applications, they pose unique challenges towards backup and recovery. Our solution, BARNS addresses these challenges, namely taking: a) cluster consistent backup and ensuring repair free restore, b) storage efficient backups, and c) topology oblivious backup and restore. Due to eventual consistency semantics of these databases, traditional database backup techniques of performing quiesce do not guarantee cluster consistent backup. Moreover, taking crash consistent backup increases recovery time due to the need for repairs. In this paper, we provide detailed solutions for taking backup of two popular, but architecturally different NoSQL DBs, Cassandra and MongoDB, when hosted on shared storage. Our solution leverages database distribution and partitioning knowledge along with shared storage features such as snapshots, clones to efficiently perform backup and recovery of NoSQL databases. Our solution gets rid of replica copies, thereby saving ~66% backup space (under 3x replication). Our preliminary evaluation shows that we require a constant restore time of ~2-3 mins, independent of backup dataset and cluster size.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/kathpal", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-kathpal.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5748/bbf7b2f8763aa620fae03e573cb3090b34b0.pdf", "s2Url": "https://semanticscholar.org/paper/5748bbf7b2f8763aa620fae03e573cb3090b34b0", "sources": [ "DBLP" ], "title": "BARNS: Towards Building Backup and Recovery for NoSQL Databases", "venue": "HotStorage", "year": 2017 }, "575cdfae744f3c3972a4c61f3cdcc718b9bc8891": { "authors": [ { "ids": [ "34856432" ], "name": "Changchang Yin" }, { "ids": [ "39835284" ], "name": "Buyue Qian" }, { "ids": [ "39367158" ], "name": "Shilei Cao" }, { "ids": [ "1972958" ], "name": "Xiaoyu Li" }, { "ids": [ "39791510" ], "name": "Jishang Wei" }, { "ids": [ "33992264" ], "name": "Qinghua Zheng" }, { "ids": [ "38673135" ], "name": "Ian Davidson" } ], "doi": "10.1109/ICDM.2017.67", "doiUrl": "https://doi.org/10.1109/ICDM.2017.67", "entities": [ "Active learning (machine learning)", "Algorithm", "Artificial neural network", "Baseline (configuration management)", "Batch processing", "Data point", "Decision boundary", "Deep learning", "Exploit (computer security)", "Feature vector", "Heuristic", "Information", "Iteration", "MNIST database" ], "id": "575cdfae744f3c3972a4c61f3cdcc718b9bc8891", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "575-584", "journalVolume": "", "outCitations": [ "2ae40898406df0a3732acc54f147c1d377f54e2a", "05355f133cf378b051012cb8a174d00c9f2456c1", "480679765e02c5d4f73237928290e7db6cfec02e", "046d4d91555f46c5cf44e1e3316edebfcf7dd110", "593ef23a6b63488ec7c4d093a099d0ba8beac5be", "3e5d5e81f3e04ef72f13b12dc4f3659fcca29454", "2300698c7566c714ee624b6e9e779cfaa2d5ff51", "708aba5781f00b5f32097e5a1f0b7f39a724879e", "0acf1a74e6ed8c323192d2b0424849820fe88715", "02485a373142312c354b79552b3d326913eaf86d", "5a931c952cf15207146a4183599f5d90e4d4d7c5", "f276c00bac7594107c291947f560b7b48b1439d7", "3dd2f70f48588e9bb89f1e5eec7f0d8750dd920a", "c9ca8331a20a56082581e203a399563fe2b54d20", "59e517ca1662ffadd83cd3547f074ab76513c348", "8942804fe4e2425758ab68df4ff80a2cac1987b8", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "162d958ff885f1462aeda91cd72582323fd6a1f4", "803d421305e07f471f0d0fd278d6043d26763767", "220c014611b045ff35d6d67f49759874152fbfd1", "7233cbdf0084e561c63553bcd0412eebcf6065b3", "609e5cc1da126d7f760d1444b43b4fae41602841", "01a27d77257c47a24daaa969f258ea844b9cbff8", "5a1e57b6120b5b3cfdf977120193fe21055a12bb", "061356704ec86334dbbc073985375fe13cd39088", "25d4f76996c24c56b2c6854b5c5c006e6fdad9e6", "0f2131d7b1c39266c85a90aab35d75a18554fc2e", "424561d8585ff8ebce7d5d07de8dbf7aae5e7270", "3f3166232216cb4d29d7c686ab20e3e245e453ba", "356c3e1cb9e10f4c504cd50dd98f82691251f207", "63861fbeb7ec41986b85965b9780b428d919919e", "4e3d067938c6666965910fe97c428bb5ef5b59f0", "01fcae344d2edb715bcc63a40b6052c0331741bd", "04930dc2847e67b992219b129311523cb33976d5", "22be9708b54c15cb47b46a96aadbadfc93f7dada", "818826f356444f3daa3447755bf63f171f39ec47", "4f3484a1b08b332479f0cc0197528e9007292a90" ], "paperAbstract": "Active learning aims to reduce manual labeling efforts by proactively selecting the most informative unlabeled instances to query. In real-world scenarios, it's often more practical to query a batch of instances rather than a single one at each iteration. To achieve this we need to keep not only the informativeness of the instances but also their diversity. Many heuristic methods have been proposed to tackle batch mode active learning problems, however, they suffer from two limitations which if addressed would significantly improve the query strategy. Firstly, the similarity amongst instances is simply calculated using the feature vectors rather than being jointly learned with the classification model. This weakens the accuracy of the diversity measurement. Secondly, these methods usually exploit the decision boundary by querying the data points close to it. However, this can be inefficient when the labeled set is too small to reveal the true boundary. In this paper, we address both limitations by proposing a deep neural network based algorithm. In the training phase, a pairwise deep network is not only trained to perform classification, but also to project data points into another space, where the similarity can be more precisely measured. In the query selection phase, the learner selects a set of instances that are maximally uncertain and minimally redundant (exploitation), as well as are most diverse from the labeled instances (exploration). We evaluate the effectiveness of the proposed method on a variety of classification tasks: MNIST classification, opinion polarity detection, and heart failure prediction. Our method outperforms the baselines with both higher classification accuracy and faster convergence rate.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.67" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/575cdfae744f3c3972a4c61f3cdcc718b9bc8891", "sources": [ "DBLP" ], "title": "Deep Similarity-Based Batch Mode Active Learning with Exploration-Exploitation", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "576f13a5f349ecc60e5e491395e8aa7a9c9f0c05": { "authors": [ { "ids": [ "2682449" ], "name": "Muhammad Bilal" }, { "ids": [ "1709876" ], "name": "Marco Canini" } ], "doi": "10.1145/3127479.3127492", "doiUrl": "https://doi.org/10.1145/3127479.3127492", "entities": [ "Algorithm", "Apache Storm", "Benchmark (computing)", "Big data", "Black box", "Heuristic", "Hill climbing", "Program optimization", "Stream processing" ], "id": "576f13a5f349ecc60e5e491395e8aa7a9c9f0c05", "inCitations": [ "5655f16d3c46537f951b5686c905f15c2f35991c", "53cc6bf305539b4bd8829df42996e0eb12512434", "414163f9be8735861e18f767b7ada35c9a2ece35" ], "journalName": "", "journalPages": "189-200", "journalVolume": "", "outCitations": [ "4bac8e38be2a30301c99856b1822a88891569d3f", "25dcb849cb146a2afc51ad092fba70570bd4de42", "69819057c9a885439b7ae13604bcfa456b402875", "5208060771fd213eefd827e3e1260b939f1aed6d", "1a19bab56d8ae4a325b650de71cd1d908c7bd715", "0b5105bbe6635b55d8a0677071b44e4000f2f6d4", "274a6c951c4aa82e6ef6b9f63c11f0ef66722c20", "09d8995d289fd31a15df47c824a9fdb79114a169", "81c343a4f02553094397824c715a852c75091a7e", "0e4b886ba6c47faa4c38fc33316ca9e6124eb37f", "16dc5417e1a558895a9b9561d31480bdc4abe295", "0ae3d90dddc8d48d735701bcccbb616b10fbd302", "40fecfef456c760912685b372151732b38e69d6e", "eda799eb0449f5d9d15577043c597e0d4e143d60", "5075192e0e25af961420412fed1f848282ae313e", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "1dc8960ee89252ba82d881b17211542017e4c597", "3af5e48a741634d2572b839ca57b68929cd2d648", "178ff3ab1afcd6fb348a9805babe0a5c814be5af", "689daac32ba52ad5d72178fd4d5e093fb9501132", "606968c2ef51f2ce58c627bb2e43aaabdaff7c4c", "3237988284481bcd75894f9cb4f4d43b6aa4b561", "10dcd5574dca03395b507fbb4d0c90f804cbbf19", "5f41cc7c081b294f684928c35a08626490ec4f8a", "e847c3ec130da57328db79a7fea794b07dbccdd9", "208cdf363b4fc8343815393aed9551eed033df18", "2c688c40374fee862e0f0038696f2951f1927337", "8140df13c0b68c3db1feda405e53a067d90e4faf", "4a520c1818fc7ca560331234f6bee68d4d8bd302", "0f08e646f08b4f36c18fe36fed8e5ec35389b55b", "e0dbc2deeb87f9c17e7b2b298e0c8f4eb1bc3dcc", "d23dc281afd418772c3dea9b056013471882ac15", "72000109547f17c849c2ee6e2825784e64b70cea", "63882b1819c2bfbd7025504f99c4990049b06c46", "52d81096f46be0e75f85e0b7eeda65640c281630", "9b707fda4ef927f92d9ecb86dea82bd1ede59d49", "1f1f47da8fff8da53589d7eab36d6bae32b2c3d2", "6f2270c81885e2f5b3b6bc86f0b2099af9c55534" ], "paperAbstract": "Optimizing the performance of big-data streaming applications has become a daunting and time-consuming task: parameters may be tuned from a space of hundreds or even thousands of possible configurations. In this paper, we present a framework for automating parameter tuning for stream-processing systems. Our framework supports standard black-box optimization algorithms as well as a novel gray-box optimization algorithm. We demonstrate the multiple benefits of automated parameter tuning in optimizing three benchmark applications in Apache Storm. Our results show that a hill-climbing algorithm that uses a new heuristic sampling approach based on Latin Hypercube provides the best results. Our gray-box algorithm provides comparable results while being two to five times faster.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127492", "https://mcanini.github.io/papers/automagic.socc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/576f13a5f349ecc60e5e491395e8aa7a9c9f0c05", "sources": [ "DBLP" ], "title": "Towards automatic parameter tuning of stream processing systems", "venue": "SoCC", "year": 2017 }, "57774604456cffa77fcf57087bbede72a23994b6": { "authors": [ { "ids": [ "2238137" ], "name": "Antoine Delignat-Lavaud" }, { "ids": [ "1694521" ], "name": "C\u00e9dric Fournet" }, { "ids": [ "2163632" ], "name": "Markulf Kohlweiss" }, { "ids": [ "2975058" ], "name": "Jonathan Protzenko" }, { "ids": [ "3351977" ], "name": "Aseem Rastogi" }, { "ids": [ "2202482" ], "name": "Nikhil Swamy" }, { "ids": [ "3132217" ], "name": "Santiago Zanella B\u00e9guelin" }, { "ids": [ "1736343" ], "name": "Karthikeyan Bhargavan" }, { "ids": [ "5516094" ], "name": "Jianyang Pan" }, { "ids": [ "1926012" ], "name": "Jean Karim Zinzindohoue" } ], "doi": "10.1109/SP.2017.58", "doiUrl": "https://doi.org/10.1109/SP.2017.58", "entities": [ "AES instruction set", "Adversary model", "Algorithm", "Authenticated encryption", "Authentication", "Cipher", "Concrete security", "Correctness (computer science)", "Cryptographic nonce", "Cryptographic primitive", "Cryptography", "Dependent type", "Encryption", "Firefox", "Galois/Counter Mode", "Google Cloud Messaging", "High- and low-level", "Multiplexing", "Padding (cryptography)", "Poly1305", "Primitive recursive function", "Provable security", "Reference implementation", "State (computer science)", "Telegraph key", "Transport Layer Security", "Type system", "Typing" ], "id": "57774604456cffa77fcf57087bbede72a23994b6", "inCitations": [ "1458c6a3b87952fa4294ac35f257539b9e309bf2", "0b465eb882ea52ad9f592188d2d3f8a313745f47", "9c7218c8effa7691d507b08d4b222c403ce26c4a", "eb8f783643a4be0d62fafb5c6236eebcc4a54a9b", "1d99ce3375cc1c65c07d9fb358e1a101f1d2590a", "4e6841a87f67a39d5b145f1dbc88000cca3b213d", "1a553d9598f81bc73151acaf71f891d05742f707", "594dc2ad971479e1abe78ddb6f5818650810c127" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "463-482", "journalVolume": "", "outCitations": [ "ac2a9d093fef9b31d50222b737cc3aa686a0888d", "016cc501dbd90b9d01335609ba6a71289900fc12", "04bb092c83242cb708d2653bd537c99643e8386d", "18c8958e7b7581244f1e9635a8cb4fa325619999", "1ac599ecd2e740a044280bd59c7363ef0850e5a7", "d4a8fccaffc440f52172fc3e3cacf048b72e244c", "93e1542b3e051b46b7b96eec94cc306468b9f745", "15f53e82bdb947b38cd9b0657fe6f22a6d492dbb", "1d081dbf3e9afebafac90fdeed4bfa788012142f", "18e1a5a400bc1c80cd0e275d88dce6c97f7cdec1", "dbfd312448babe69654697020c8e1a5a3c5f4b29", "ee140738cba12a5a218bf5890e36298410211149", "23e36a534c4d369c331f95e17771c4d3da96a198", "58610c2935ac60e5fc60a4162375a78f5e4f9691", "abb39b98016e1809019245dd0a9d5a7e8473a710", "6b22ed103bc3d86004599992380b8b3104c75df5", "40860f2db7516f09836ef5bbd65288a4e0957af7", "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "1a79a3efec3d4a177bae7326dd75e33cf362120d", "6bc308af54fe8c71993d08c9d796947eb2cfc6f2", "6b581ad1857ca39e440575fb600f6c9c4544d93e", "0f9a849aaa5a496cc8e8b8b5df2762056675c813", "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "e93d91f499964b841e25769c0a826ab1906c0b83", "81b7f4c7c782a63f2cf6771d096ea9177f7ca4f7", "64464198f4e6c10611cfb7dfe26bbb7ca4ddd344", "482dcfb965de4e9362cdd0dc144c03b35d7e500f", "0037875e7321eb65867ff47b0e22a080b84502da", "4e97b0de1c273fb83e26059d841a1df47a9f5872", "90461c89b5a3c35978e861afcf05a8f522ff4e28", "1fccfaca3c964e1943ac24db8ff472c78d27682b", "1e7768b135545d473bf4a857f2bbb374ae960dc5", "43c046c3f3b78bec2b528d45b3ded4bb0046d426", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "81edbc8b8cf013ef1be73e1ca9b4da10b29148a5", "4e6f688751813cd2d024bbd18d3fdddedc7c7180", "924706bb4e334c596dade7c8e37f75296f958900", "05ae289245b5a9222a1a6fc3f36910c3cb0f4662", "e298114c5abac2b048020d658ecfb7d73d6a422a", "5e9244286f575e3307dac938552095d3433f332d", "8ca19c05213aad8009b3cbc25a133fe4486f4669", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "b4740cf1c976fa4937fa152f1128a36698b2b423", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982" ], "paperAbstract": "The record layer is the main bridge between TLS applications and internal sub-protocols. Its core functionality is an elaborate form of authenticated encryption: streams of messages for each sub-protocol (handshake, alert, and application data) are fragmented, multiplexed, and encrypted with optional padding to hide their lengths. Conversely, the sub-protocols may provide fresh keys or signal stream termination to the record layer. Compared to prior versions, TLS 1.3 discards obsolete schemes in favor of a common construction for Authenticated Encryption with Associated Data (AEAD), instantiated with algorithms such as AES-GCM and ChaCha20-Poly1305. It differs from TLS 1.2 in its use of padding, associated data and nonces. It also encrypts the content-type used to multiplex between sub-protocols. New protocol features such as early application data (0-RTT and 0.5-RTT) and late handshake messages require additional keys and a more general model of stateful encryption. We build and verify a reference implementation of the TLS record layer and its cryptographic algorithms in F*, a dependently typed language where security and functional guarantees can be specified as pre-and post-conditions. We reduce the high-level security of the record layer to cryptographic assumptions on its ciphers. Each step in the reduction is verified by typing an F* module, for each step that involves a cryptographic assumption, this module precisely captures the corresponding game. We first verify the functional correctness and injectivity properties of our implementations of one-time MAC algorithms (Poly1305 and GHASH) and provide a generic proof of their security given these two properties. We show the security of a generic AEAD construction built from any secure one-time MAC and PRF. We extend AEAD, first to stream encryption, then to length-hiding, multiplexed encryption. Finally, we build a security model of the record layer against an adversary that controls the TLS sub-protocols. We compute concrete security bounds for the AES_128_GCM, AES_256_GCM, and CHACHA20_POLY1305 ciphersuites, and derive recommended limits on sent data before re-keying. We plug our implementation of the record layer into the miTLS library, confirm that they interoperate with Chrome and Firefox, and report initial performance results. Combining our functional correctness, security, and experimental results, we conclude that the new TLS record layer (as described in RFCs and cryptographic standards) is provably secure, and we provide its first verified implementation.", "pdfUrls": [ "http://eprint.iacr.org/2016/1178.pdf", "https://doi.org/10.1109/SP.2017.58", "https://eprint.iacr.org/2016/1178.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/08/191.pdf", "http://www.cs.umd.edu/~aseem/record.pdf", "http://eprint.iacr.org/2016/1178" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/57774604456cffa77fcf57087bbede72a23994b6", "sources": [ "DBLP" ], "title": "Implementing and Proving the TLS 1.3 Record Layer", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2016 }, "5787330b0fb8d73d60e17ec462fa6022a6c62399": { "authors": [ { "ids": [ "2812070" ], "name": "Chunzhi Su" }, { "ids": [ "2174285" ], "name": "Natacha Crooks" }, { "ids": [ "40499672" ], "name": "Cong Ding" }, { "ids": [ "2445753" ], "name": "Lorenzo Alvisi" }, { "ids": [ "35386249" ], "name": "Chao Xie" } ], "doi": "10.1145/3035918.3064031", "doiUrl": "https://doi.org/10.1145/3035918.3064031", "entities": [ "Arnold tongue", "Attribute\u2013value pair", "Benchmark (computing)", "Concurrency (computer science)", "Concurrency control", "Control system", "Global concurrency control", "IBM Tivoli Storage Productivity Center", "Key-value database", "Lock (computer science)", "Throughput", "Two-phase commit protocol", "Two-phase locking" ], "id": "5787330b0fb8d73d60e17ec462fa6022a6c62399", "inCitations": [ "af941012dcc736a55f5b9ba0d409d9fad1843eb4", "e0c0d646f7107bb07aac3deacb1e63009740a80d", "fab01f80c9e03d94f6e58520c92de620a426ce07", "8c17cb64a2153ed38d7a2517ac6b57083e0a0eff" ], "journalName": "", "journalPages": "283-297", "journalVolume": "", "outCitations": [ "5c9793fa07fcaaae864eb89fd1c1b9f6905ec546", "00ac447d02035c26c7e2852c2457fe812e89038f", "8d1c0ae7bbe138bc19abf66ca918f46b244b1f5d", "645f46933f49aa0ee730d7cac4af77c537a45950", "ab310a105f6d5b04d798c4be0d6890ba385463c4", "98cca67dfd0320d56030dd6637a733436d2b521e", "17c6f330d854435e8d8faed245f79b94740a45f1", "9ded1759123344a6a747641def78fb17a549f39a", "0d35a84b3fade29ca52d2462d024da1ac313c800", "dfe3731d484dba672b3b6ad5f94e58aa7b3e7f2e", "9c98996b0654c88ca2ccdce3a3fc54ea957dde56", "27682071ccc226220fdfafaed42d35826309d692", "a33371ec053b61652f41d18e86dae07fe04a616b", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "8cf80fc67e804c88867b6faafafeb842d26227c6", "2e50af2320dab632d8046b6d4c130ae6cce8903f", "1a74681e1b9317d86c0f6d0567315322be0d2548", "35f751e46799e3a91425267819f40dce273abec1", "6460e782a12649a478bbaeb9c149f59e206d9540", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "578636359b81d9db02475231016d788e5d4d4ccc", "412a9e54bbb31e12d008a9579994e009c5b40b46", "56f6aec0132e56769e2036bbeff791dfa137d107", "507a4cfc0e6a2f03ff0fabd0115739970c1f68fc", "2146a0384f58500ad7c0865c8518b15bb84918a2", "861fbac82ae5ec0ea654d0d95ce4d48de62419ea", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "0bddbe35fa6e3cf625d15553365a690d3a6bf7aa", "09ecdb904eb7ae8a12d0c6c04ae531617a30eafa", "c40d14cde10a9220185f9250a11f2ace45b1d668", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "05885dbd3ccbbe744a2ee1c39126bd263140e741", "643a5ea2791f56ed58dcf50141301216de10bb9d", "136eefe33796c388a15d25ca03cb8d5077d14f37", "0804ed47a40fbe6deb5ce93efe551086695ae393", "1ae507f38fbe2301f4f7fbcd64e2f49afe00a59c", "43ae9dc5aef08698c90700bc55049f7a9a8cb68a", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "50600eeb8698ddac8136c67ceb776faafa35c8ca", "08d1cedbbaa798855e30fa7dc9ddbf88060b1399", "9748241beb02ef1e2d0e6dc877c04b354033a838", "2888c136064ff5527a0bb370ac1d9bf71939e066", "1664b784dd7d446ee8838e0eec5b980f61792007", "2543a986d875f86119cb4ad9b1e287873ac4bce2", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "4827cc74dba0c39172554cf0116eb111797f0d1b", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "624cb175af600b7749bce00c0932e2a10f72e564", "62d66e4f8d7fe942facae6566453dab3e2f75a91", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "a05ca97c7128fe9b74b550896291797f1573dcd2", "26f747596560040c908d7f453149338460923445", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "bb91368e1dd77d99176a6d08b3bd98be7ab23d12", "5c0ee5e71daead36b7ac3ca13812b341499c5a62" ], "paperAbstract": "This paper presents Tebaldi, a distributed key-value store that explores new ways to harness the performance opportunity of combining different specialized concurrency control mechanisms (CCs) within the same database. Tebaldi partitions conflicts at a fine granularity and matches them to specialized CCs within a hierarchical framework that is modular, extensible, and able to support a wide variety of concurrency control techniques, from single-version to multiversion and from lock-based to timestamp-based. When running the TPC-C benchmark, Tebaldi yields more than 20× the throughput of the basic two-phase locking protocol, and over 3.7× the throughput of Callas, a recent system that, like Tebaldi, aims to combine different CCs.", "pdfUrls": [ "http://www.cs.utexas.edu/~lorenzo/papers/Su17Bringing.pdf", "http://doi.acm.org/10.1145/3035918.3064031", "http://cding.org/papers/su2017bring.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5787330b0fb8d73d60e17ec462fa6022a6c62399", "sources": [ "DBLP" ], "title": "Bringing Modular Concurrency Control to the Next Level", "venue": "SIGMOD Conference", "year": 2017 }, "57a42c9fcd3936d362975c090ccdcb54c0ff26fc": { "authors": [ { "ids": [ "39962143" ], "name": "Ayham Kassab" }, { "ids": [ "36857415" ], "name": "Jean-Marc Nicod" }, { "ids": [ "2005060" ], "name": "Laurent Philippe" }, { "ids": [ "1905182" ], "name": "Veronika Rehn-Sonigo" } ], "doi": "10.1109/ICPP.2017.63", "doiUrl": "https://doi.org/10.1109/ICPP.2017.63", "entities": [ "Computer cooling", "Heuristic", "Multi-core processor", "Point of View (computer hardware company)", "Power supply", "Scheduling (computing)", "Solar cell", "Synthetic data" ], "id": "57a42c9fcd3936d362975c090ccdcb54c0ff26fc", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "543-552", "journalVolume": "", "outCitations": [ "995c6b5e9ee851f1b70ed85a00867eb79714c246", "a12f10e9eee218f3b50c3a93f52d33f945a369e5", "517b473791bb51edebf216544e6d554e5a2a1ba9", "23dadf25f3efacbc9c66f69093d656ad5b003529", "6f2b07465f08eff8cd9005f66ec0d94c3e5a7e5a", "33708fe61813316f2810616d28f94c4eac4036bb", "457e1d17518b5aba6516bdd91cd7938926521795", "0566092157791cfa821d9cb2d86f696b9951f29e", "728dfc52a2f13570a29c9d5bf257f02bf98682bc", "82bb26b663c5c6768677c7951ae116fa78bfb081", "4dbc1467275f8a3152bab09b92fb42072cbbab23", "a620990b4e56df8ae54871a78bbce3b92780438c", "427eaf5e08fc96c09571048b62ca1d3caac7608c", "1c218aa3b2460f2495ea2462f355937e78e2b329", "1d5e81244451dc58a6e6d4c9d2b8fbff6f55e10b", "026a63d57667f92b0f1823aff099f2dc88cf64d4", "377175d109126aea51714e8ef0e4324d28eb6fcc", "7f2425afee4fc7367d85ea79757d06a0839802d0", "d8057d514036d51051af78476468fe350cb7488a", "d7459814fef788974755eb59fab8e343da625449" ], "paperAbstract": "Energy consumption has become a major concern in the recent years and Green computing has arisen as one of the challenges in order to reduce CO2 emissions in the computing domain. Many efforts have been made to make hardware less energy consuming, reduce cooling energy of data and computing centers by relocating those facilities to cool regions and other. A novel approach to make the computing domain greener is to add renewable energy sources for the power supply. The challenge of this work is to consider computing facilities which are solely run by renewable energy sources such as solar panels and wind turbines. In this work we tackle the problem of scheduling independent tasks within a predicted power envelope that varies during the time. First we evaluate different instances of the problem from a theoretical point of view. Then we propose several heuristics for the case of multi-core architectures and we assess their performance on synthetic workloads and power envelopes.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.63" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/57a42c9fcd3936d362975c090ccdcb54c0ff26fc", "sources": [ "DBLP" ], "title": "Scheduling Independent Tasks in Parallel under Power Constraints", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "57ac29e03ef2998312cdb399ecceceb36c1fb7b0": { "authors": [ { "ids": [ "1696121" ], "name": "Zhe Chen" }, { "ids": [ "1945738" ], "name": "Zhongmin Li" }, { "ids": [ "1701714" ], "name": "Xu Zhang" }, { "ids": [ "35538656" ], "name": "Guorong Zhu" }, { "ids": [ "1722346" ], "name": "Yuedong Xu" }, { "ids": [ "1784158" ], "name": "Jie Xiong" }, { "ids": [ "1705489" ], "name": "Xin Wang" } ], "doi": "10.1145/3143361.3143377", "doiUrl": "https://doi.org/10.1145/3143361.3143377", "entities": [ "Aliasing", "Experiment", "Frequency-hopping spread spectrum", "Home automation", "Indoor positioning system", "Limiter", "Multipath propagation", "Spatial anti-aliasing", "Wireless access point", "Zero suppression" ], "id": "57ac29e03ef2998312cdb399ecceceb36c1fb7b0", "inCitations": [], "journalName": "", "journalPages": "238-250", "journalVolume": "", "outCitations": [ "05fe031e53dd8990e7076a91277cb2b74e22b811", "00f324e77f618eb32f9f5b26f2943f287f596f80", "5b7a6c35b258d2c32c09f0377b0c79ba02c9a9a3", "8317f40c569af2b5bb0aefbb6b07d6a991c1204e", "29e9cd18af650b7e448dea668121a1d98afd3c46", "d66472c3048549a952f2c58ad92f5d14e7dc1d23", "ec0bf3b110af1f1352c179405435173f4e4a1fc9", "0d424feecc7420d7b02a4afa2e494b0364341921", "a7016075b373b572b3ced9f6712f0d565c608c7c", "2b00e526490d65f2ec00107fb7bcce0ace5960c7", "67f4f3c1b772f86815f88bc39231727693f5deb2", "889a9ac61712b5a6ba9241bf0fea16b3a4223f09", "e5edfbdf645a3dbcdaf7d9fcbf350c67fbbadae5", "117450cc965a994b0e570c98ab0d9a3c1e4f5b60", "1f911ae809066d4a55598bce939a466de980b13b", "18b94ae2f53920d884f77e9aa8a32c80f3005759", "7e403d160f3db4a5d631ac450abcba190268c0e6", "c3543736a6f6372dac4cd54b5b5e4acfe5b0f152", "6ba82499fffb77b74d0801da1928f14679df490f", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "82802e411495bbad77fa2415c6d4633dde180764", "16ccb8d307d3f33ebb395b32db23279b409f1228", "b5d8b259052ffecd1fcf3eae9b08e31b41c24ec0", "1b1e0ac5224582b1df37b11ca08ac0a0e5c98900", "0b3fa65882b095e97353814c4266d8b934f62eab", "3bb76c2989cb4aeae3b20f42e619a862f0d871ca", "3da658b2cad5ce155abfb9035191892e9515b8ea", "d80c46b3e3553f77db3c3b8814eb412d38957bcd", "8012327465664ca6a64ee4d202536ec6c6d024f1", "08616ca445012df0e3c982f742d2662bf0f0ce6e", "18c4419c3137aff6c6def63c5fa3e67ab0326c80", "666fbcf4697fc64d576b9e007af6ee612d10a9f8", "0017542f2f908b0f2ce3558ca1767687b1db42bb", "32e12e79fad14da18a758715f8f62ae97143b6a0", "e846a88ca573b610a41e348ecec09fd33c37fae3", "e39c17f6c5a83581890640049b075badff0cd34d", "18f355d7ef4aa9f82bf5c00f84e46714efa5fd77", "2d3c4a0d1ba1f4e015aacb691699f649fc284332", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "510dfb8ecc6cb884321afe436e3ac50e667a7a9e", "21d0f3b4c847e04be0f3735f5f55bffe32e942e3", "540ac698963add83d4e47d52edc54c84160eddd1", "17ae4340ebcb3f7d4d77fdc34106d80c924ac7ed" ], "paperAbstract": "Owing to great potential in smart home and human-computer interactive applications, WiFi indoor localization has attracted extensive attentions in the past several years. The state-of-the-art systems have successfully achieved decimeter-level accuracies. However, the high accuracy is acquired at the cost of dense access point (AP) deployment, employing large size of frequency bandwidths or special-purpose radar signals which are not compatible with existing WiFi protocol, limiting their practical deployments. This paper presents the design and implementation of AWL, an accurate indoor localization system that enables a single WiFi AP to achieve decimeter-level accuracy with only one channel hopping. The key enabler of the system is we novelly employ channel hopping to create virtual antennas, without the need of adding more antennas or physically move the antennas' positions for a larger antenna array. We successfully utilize the widely known \"bad\" spatial aliasing to improve the AoA estimation accuracy. A novel multipath suppression scheme is also proposed to combat the severe multipath issue indoors. We build a prototype of AWL on WARP software-defined radio platform. Comprehensive experiments manifest that AWL achieves a median localization accuracy of 38 cm in a rich multipath indoor environment with only a single AP equipped with 6 antennas. In a small scale area, AWL is able to accurately track a moving device's trajectory, enabling applications such as writing/drawing in the air.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143377" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/57ac29e03ef2998312cdb399ecceceb36c1fb7b0", "sources": [ "DBLP" ], "title": "AWL: Turning Spatial Aliasing From Foe to Friend for Accurate WiFi Localization", "venue": "CoNEXT", "year": 2017 }, "581a067e10b03d21deb8ebff4788d1e088ef8a37": { "authors": [ { "ids": [ "3300963" ], "name": "Jordyn Maglalang" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" }, { "ids": [ "3379439" ], "name": "Kunal Agrawal" } ], "doi": "10.1109/ICPP.2017.16", "doiUrl": "https://doi.org/10.1109/ICPP.2017.16", "entities": [ "Centralisation", "Cilk Plus", "Experiment", "Jumpstart Our Business Startups Act", "Load balancing (computing)", "Locality of reference", "Multi-core processor", "OpenMP", "Scalability", "Scheduling (computing)", "Sun WorkShop TeamWare", "Threading Building Blocks" ], "id": "581a067e10b03d21deb8ebff4788d1e088ef8a37", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "70-80", "journalVolume": "", "outCitations": [ "027d73cc11576ca9b3ff773c3f65b4159ebded5e", "eb82d3035849cd23578096462ba419b53198a556", "2540f0f7bbb395226a497a10dea036054eaedc3c", "7fb9cfac02565c0b5ad3ce2a5662057a7474d80e", "38d3ff3b608a334b12400b031c7cc483923bc629", "2ee17eaa69d7a5e38d709cab3551f53163678817", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "6f0859eb31cb85ca3bd4f92aa1f0297077af63b0", "a68b4f658d580133a292281459aa87c25d112f09", "390e82956e4c065df5a0474d62096a1e13e873b2", "40c5441aad96b366996e6af163ca9473a19bb9ad", "1eaa2899ee0679acf20cdb20b34bed32d01babc0", "06a6da00498357a2b908b1da7bfb5f19662abf1f", "0def25a673a09c6620485c78bbb075176f31062f", "0794a60523f9504ef9dee181659b6131b5c4afa5", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "1f63b8f323a6f4b3d580fd4a267a3e9f6d1a653e", "141004dee9e799b40bfaf50b4a72618613137250", "31181e73befea410e25de462eccd0e74ba8fea0b", "7bb8469f9461ef1794f7110ea8762312a120f065", "0660fe0a1cc9ca03847de601589b3beb74f7a51d", "07c17001acfbbdbd348e748620b35f983e95dd8b", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "4ef3a3ba2ba648238d10ba804ba63467948df2f3", "6a668dfe4fa05408a5f752201ad83e02181ed6e2", "550d51485e3a7e7deb41f12bb8c66c7474a416c3", "5970ed52ee13472729ee7403085274554b5cba2e", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "7d76ba8c4f6776c645673e2c3f6eb88b1a0ca7aa", "73a219c149e510bce0c49d2690e498e109cc419b", "05db6b886ce6bf260dc725450873cdb0b2a0c065" ], "paperAbstract": "Dynamic task graph schedulers automatically balance work across processor cores by scheduling tasks among available threads while preserving dependences. In this paper, we design NABBITC, a provably efficient dynamic task graph scheduler that accounts for data locality on NUMA systems. NABBITC allows users to assign a color to each task representing the location (e.g., a processor core) that has the most efficient access to data needed during that node's execution. NABBITC then automatically adjusts the scheduling so as to preferentially execute each node at the location that matches its color—leading to better locality because the node is likely to make local rather than remote accesses. At the same time, NABBITC tries to optimize load balance and not add too much overhead compared to the vanilla NABBIT scheduler that does not consider locality. We provide a theoretical analysis that shows that NABBITC does not asymptotically impact the scalability of NABBIT.We evaluated the performance of NABBITC on a suite of benchmarks, including both memory and compute intensive applications. Our experiments indicate that adding locality awareness has a considerable performance advantage compared to the vanilla NABBIT scheduler. Furthermore, we compared NABBITC to both OpenMP tasks and OpenMP loops. For regular applications, OpenMP loops can achieve perfect locality and perfect load balance statically. For these benchmarks, NABBITC has a small performance penalty compared to OpenMP due to its dynamic scheduling strategy. Similarly, for compute intensive applications with course-grained tasks, OpenMP task's centralized scheduler provides the best performance. However, we find that NABBITC provides a good trade-off between data locality and load balance; on memory intensive jobs, it consistently outperforms OpenMP tasks while for irregular jobs where load balancing is important, it outperforms OpenMP loops. Therefore, NABBITC combines the benefits of locality-aware scheduling for regular, memory intensive, applications (the forte of static schedulers such as those in OpenMP) and dynamically adapting to load imbalance in irregular applications (the forte of dynamic schedulers such as Cilk Plus, TBB, and Nabbit).", "pdfUrls": [ "http://openscholarship.wustl.edu/cgi/viewcontent.cgi?article=2168&context=cse_research", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.16", "http://www.cse.wustl.edu/~kunal/resources/Papers/nabbit-c.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/581a067e10b03d21deb8ebff4788d1e088ef8a37", "sources": [ "DBLP" ], "title": "Locality-Aware Dynamic Task Graph Scheduling", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "581d5076d0f8454f9a7d67e16e2f20bb051216d5": { "authors": [ { "ids": [ "1783012" ], "name": "Jiawen Sun" }, { "ids": [ "2459579" ], "name": "Hans Vandierendonck" }, { "ids": [ "1698312" ], "name": "Dimitrios S. Nikolopoulos" } ], "doi": "10.1109/ICPP.2017.27", "doiUrl": "https://doi.org/10.1109/ICPP.2017.27", "entities": [ "Algorithm", "Bitmap", "Data structure", "Graph drawing", "Graph partition", "Graph traversal", "Locality of reference", "Non-uniform memory access", "Polymer", "Programmer", "Shared memory", "Speedup" ], "id": "581d5076d0f8454f9a7d67e16e2f20bb051216d5", "inCitations": [ "fc7fca6f7f8872ab9d9f46a33e3d901980ab9af6" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "181-190", "journalVolume": "", "outCitations": [ "ed0d6f07f0f68372b4e977fc8b1c965fd11516c5", "500bdbe7c083020bc56a596c2f987d9ed6213ec5", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "4e088d1c5bc436f1f84997906223e5f24e1df28c", "eb82d3035849cd23578096462ba419b53198a556", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "4c2d303100688b094228d321ef9b7e6f441dfc47", "70954d2477d08afa838e827459df0e3ca5882912", "9edbd80ada5cec84254e1815d45c4d02d17412c0", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "0706356c9ab6014d6b04577d38289ea8328291a5", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "956ebc5d425188319b60ea7df90de9b5bceac3f9", "8e67d1085da29e5aa1e758751bfa5469ac07023e", "bdc2ce9d3833e809844f40dbe8a4799a9a74cb87", "0ad8e89091eed09217e66adc98136126addc2619", "141e35263ab810983c90d47ad62eb4fab5e51717", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "630b514e68c0de62fa3dca5a45e3131f1515c90c", "15b9cea4970ca2bf6bde3f54269f75e1ebda8bb5", "254ded254065f2d26ca24ec024cefd7604bd74e7", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "17ca6a9ff19ef436595ae71138d3782b7e6d0913", "26deee037b221bd05ed34461819f5c067b745445", "3486aeaf540c48952120fe853d672af984f40a6a", "4587d4722317acd4e2a90b12f58ccc9de1ecc6ee", "1156f60e40548096df49528b1342bb3e88b0f378", "0608d9937c074520cdc93cc444cc1c77039c5332", "330f0fa6a29292c4efb4bf7ce89f662d7e9ff1b1", "4a87972b28143b61942a0eb011b60f76be0ebf2e", "55b3e22b56599ed8520deb1d7cb9ac460f4fa6bb" ], "paperAbstract": "This paper investigates how to improve the memory locality of graph-structured analytics on large-scale shared memory systems. We demonstrate that a graph partitioning where all in-edges for a vertex are placed in the same partition improves memory locality. However, realising performance improvement through such graph partitioning poses several challenges and requires rethinking the classification of graph algorithms and preferred data structures. We introduce the notion of medium dense frontiers, a type of frontier that is sufficiently dense for a bitmap representation, yet benefits from an indexed graph layout. Using three types of frontiers, and three graph layout schemes optimized to each frontier type, we design an edge traversal algorithm that autonomously decides which type to use. The distinction of forward vs. backward graph traversal folds into this decision and need no longer be specified by the programmer.We have implemented our techniques in a NUMA-aware graph analytics framework derived from Ligra and demonstrate a speedup of up to 4.34× over Ligra and up to 2.93× over Polymer.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.27" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/581d5076d0f8454f9a7d67e16e2f20bb051216d5", "sources": [ "DBLP" ], "title": "Accelerating Graph Analytics by Utilising the Memory Locality of Graph Partitioning", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "582a7ef6948b8797361b3623c403640deb545848": { "authors": [ { "ids": [ "38439596" ], "name": "Bo Fang" }, { "ids": [ "1764948" ], "name": "Qiang Guan" }, { "ids": [ "1775855" ], "name": "Nathan DeBardeleben" }, { "ids": [ "1715185" ], "name": "Karthik Pattabiraman" }, { "ids": [ "1747805" ], "name": "Matei Ripeanu" } ], "doi": "10.1145/3078597.3078609", "doiUrl": "https://doi.org/10.1145/3078597.3078609", "entities": [ "Application checkpointing", "Byzantine fault tolerance", "Dependency injection", "Experiment", "Failure rate", "Fault injection", "Fault tolerance", "Principle of good enough", "State (computer science)", "Supercomputer" ], "id": "582a7ef6948b8797361b3623c403640deb545848", "inCitations": [ "781cf9b4d17f89ad4b971d2a1655421378149e2d" ], "journalName": "", "journalPages": "117-130", "journalVolume": "", "outCitations": [ "46d571de72fb4b0820428ea5579022304f8a73b7", "30a22f2254f4874c6f0ae5a219138064a8495fa9", "68a1edd0d8834e21f2fa37bf0fb54cfe67f9915c", "108c840d5d1847948a2de0250490a327ae069ee6", "0a27b1526556e37a33b9d6fb73aac4ce676f366f", "0256f81e75c34b5aa6f932c29d11807cbd848dfb", "f2f3f15dbf10cc68503713cfc77d13f274019d54", "ac7b0302d527e19cc4988d0482d62755d52fcf25", "b2de9228510f30df18d53c259ab67c977bdfea87", "51afdc23a72d19f7e6b1d46d25f7c7bb1814e85b", "e49176e44f4eba4bff009cb45282a5369c7dfe05", "36572d9cee0979e8787eee44cd077376b780473b", "054be89214b39f314af8b8d0e4430cbb04222dbe", "9b27004c849f03034119e668d5f67625326ecd03", "71e0257bb18b9f4aadf1ebed38c27a157a814ba7", "12a5768f4901227b6529a2f7bd813c39898bdf57", "10824f12c211d700d4d1ad95b3ed8660cb8a3e59", "00eee29e698b420dc9f041c4fedba06ebc287af8", "51327c46f6db01ab9bde8aad63ac7d5ba2b94066", "654e303e59b75876d53b5184e3096805791f7c77", "04363665e3f99a839c051d938fc8782f1be574fe", "1f1c9b78f5566351690cb341e7c5020923bef78b", "025c101818da34b1b2e7e514c869724c8da81a9f", "5236160832766c58b1be2bf4f76f33d9d25b4600", "5ec3e8fccdd20e223978fc35b88327af82eb4324", "11d676173bc1ccfc03e6bab12b0c879ad7ae4707", "a4885822869c1138dc6b1d4e91f9aa9f16168f12", "2f81d5beee12b9bae27705ab4ddefdf9cc169cbe", "8ed4d8fd629512bb0767f0dc2a983f6353d26a42", "a5e28e0458ecea779174930702e953e85ef1047d", "37bbd889e2e6136b4826e60367e280d103415751", "01d62cd850496455ce1616500f491690effa5c98", "b295a2e3667b52b900950417c2e9b58b01938f34", "11991eed012c421b004b2060190d2bdd824ae1f4", "3876af5f9f7d7588bfa0fcdf9df8637925e0063e", "4d05269d97975cfc5a00d2c813d43093933edf29", "2f2128b60e15d87d4e565a8532076efa84fc752e", "a2f99528a2dd954f38f6e0bd42b686c165f23403", "476f757fb7b4907352062340b932ac91616ea73c", "c8c9d53ae2a674f294b144c160b95c321a638eac", "738a102562a662031039df7723da16d25627f2e2", "fb35b5bc1e02de4d9b31176c39247ee9ad6c3290", "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0" ], "paperAbstract": "Requirements for reliability, low power consumption, and performance place complex and conflicting demands on the design of high-performance computing (HPC) systems. Fault-tolerance techniques such as checkpoint/restart (C/R) protect HPC applications against hardware faults. These techniques, however, have non negligible overheads particularly when the fault rate exposed by the hardware is high: it is estimated that in future HPC systems, up to 60% of the computational cycles/power will be used for fault tolerance.\n To mitigate the overall overhead of fault-tolerance techniques, we propose LetGo, an approach that attempts to continue the execution of a HPC application when crashes would otherwise occur. Our hypothesis is that a class of HPC applications have good enough intrinsic fault tolerance so that its possible to re-purpose the default mechanism that terminates an application once a crash-causing error is signalled, and instead attempt to repair the corrupted application state, and continue the application execution. This paper explores this hypothesis, and quantifies the impact of using this observation in the context of checkpoint/restart (C/R) mechanisms.\n Our fault-injection experiments using a suite of five HPC applications show that, on average, LetGo is able to elide 62% of the crashes encountered by applications, of which 80% result in correct output, while incurring a negligible performance overhead. As a result, when LetGo is used in conjunction with a C/R scheme, it enables significantly higher efficiency thereby leading to faster time to solution.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078609", "http://blogs.ubc.ca/karthik/files/2017/04/2017_HPDC.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/582a7ef6948b8797361b3623c403640deb545848", "sources": [ "DBLP" ], "title": "LetGo: A Lightweight Continuous Framework for HPC Applications Under Failures", "venue": "HPDC", "year": 2017 }, "5846f40cf1cb0042e3627cb2905101d992ab6bb6": { "authors": [ { "ids": [ "3344221" ], "name": "Timoth\u00e9e Ewart" }, { "ids": [ "27872289" ], "name": "Judit Planas" }, { "ids": [ "2758723" ], "name": "Francesco Cremonesi" }, { "ids": [ "17115203" ], "name": "Kai Langen" }, { "ids": [ "2984713" ], "name": "Felix Sch\u00fcrmann" }, { "ids": [ "3032993" ], "name": "Fabien Delalondre" } ], "doi": "10.1007/978-3-319-58667-0_10", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_10", "entities": [ "Application framework", "Blue Brain Project", "Complex systems", "Computational neuroscience", "IBM WebSphere eXtreme Scale", "Program optimization", "Simulation", "Systems design" ], "id": "5846f40cf1cb0042e3627cb2905101d992ab6bb6", "inCitations": [], "journalName": "", "journalPages": "181-198", "journalVolume": "", "outCitations": [ "171ef6765ddf9d22806146d8327ba082028ec32f", "da007366f0a6efe26c499fce8b1b737dbb27c6ab", "0665790f4a56ff46cecae8dd684139b2951cc3fe", "d841aeacde142f3ad8ff0c3d0a13b1394f943812", "092217c2267f6e0673590aa151d811e579ff7760", "28d17f7fbf165f9c2f41360e7a35804a657b1405", "37d3e85a8d99a756bcd8b93e12619dc84f9e877e" ], "paperAbstract": "The increasing complexity and heterogeneity of extreme scale systems makes the optimization of large scale scientific applications particularly challenging. Efficiently leveraging these complex systems requires a great deal of technical expertise and a considerable amount of man-hours. The computational neuroscience community relies on an handful of those frameworks to model the electrical activity of brain tissue at different scales. As the members of the Blue Brain Project actively contribute to a large part of those frameworks, it becomes mandatory to implement a strategy to reduce the overall development cost. Therefore, we present Neuromapp, a computational neuroscience mini-application framework. Neuromapp consists of a number of mini-apps (small standalone applications) that represent a single functionality in one of the large scientific frameworks. The collection of several mini-apps forms a skeleton which is able to reproduce the original workflow of the scientific application. Thus, it becomes easy to investigate both single component and workflow optimizations, new software and hardware systems or future system design. New solutions can then be integrated into the large scientific applications if proved to be successful, reducing the overall development and optimization effort.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5846f40cf1cb0042e3627cb2905101d992ab6bb6", "sources": [ "DBLP" ], "title": "Neuromapp: A Mini-application Framework to Improve Neural Simulators", "venue": "ISC", "year": 2017 }, "58716bec53433ce48ea9b2f632026415cc18db12": { "authors": [ { "ids": [ "1776259" ], "name": "Jiguang Wan" }, { "ids": [ "1721131" ], "name": "Wei Wu" }, { "ids": [ "6540942" ], "name": "Ling Zhan" }, { "ids": [ "34330412" ], "name": "Qing Yang" }, { "ids": [ "2624637" ], "name": "Xiaoyang Qu" }, { "ids": [ "2072948" ], "name": "Changsheng Xie" } ], "doi": "10.1109/IPDPS.2017.54", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.54", "entities": [ "Cache (computing)", "Computation", "Data model", "Experiment", "Flash memory", "Hard disk drive", "Linux", "Linux", "RAID", "Response time (technology)", "Sequential access", "Solid-state drive" ], "id": "58716bec53433ce48ea9b2f632026415cc18db12", "inCitations": [ "24919d5e5def0c1ca2c9a30fb97de94265b9fe3f" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "102-111", "journalVolume": "", "outCitations": [ "31c84cf3b24a2d9aee040a764129cbbc0047daf2", "1d5de7a7ed362ecd596ac9ed5b85bf19d5c08ef5", "07ea9af9e55ff2db2d637e8dbc4e3adeda684e4e", "0c732e52164f97eba5124ca25947cb132078ce54", "c5d954d13c1c620d78ebaba9afa120733e90ed09", "b29230b05b1b67fc0e1b9a8d5f8fc5a3da27ef62", "3f3ed6abdc2f51021a66f4762999733048ce80ea", "00502ffac06b69c797c30e52b8122de55c42fe6f", "2e7f2e84cccca89fbe7b654928029d7dd64fb384", "76d4f2374e4f5a9dfa69df8a9a33f627fff7e861", "1ff586720a08c814b70b174ec47b61567dfb7155", "73e85836599b5ab4f83afa2ae10fea99cb5d29d7", "3358850706a8ad2eb8489bb7790e8bbd3a5b6dba", "f05231b34690f92cbee73ccf0eb6104725b79d53", "02ac23384523c2e2f9bc52cd29313dfd5aad22a3", "5f9c1da7ff44a2f697e801a8101c5308d6eadf83", "31ceeced5d23193c369b98170c45e66bae6ff77d", "05961fc1d02ca30653dd0b4c906113db796df941", "17e72896255c6f4bd817893359fda37c60c5e3df", "607a678b5648121de7f0c8bfef619a60646bb8af", "28fb425b2d2cc3287628f9f6e8b31b7665ba47a7", "303f71ad0e145415aba9efe9ba96a1f734c63391", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "5d1bb922e5bf0d6558c5f17f1a6b6c722967e0bb", "3f9d4a16ec5d08c0309df743e73745f876b9abfa" ], "paperAbstract": "This paper proposes a new SSD cache architecture, DEFT-cache, Delayed Erasing and Fast Taping, that maximizes I/O performance and reliability of RAID storage. First of all, DEFT-Cache exploits the inherent physical properties of flash memory SSD by making use of old data that have been overwritten but still in existence in SSD to minimize small write penalty of RAID5/6. As data pages being overwritten in SSD, old data pages are invalidated and become candidates for erasure and garbage collections. Our idea is to selectively delay the erasure of the pages and let these otherwise useless old data in SSD contribute to I/O performance for parity computations upon write I/Os. Secondly, DEFT-Cache provides inexpensive redundancy to the SSD cache by having one physical SSD and one virtual SSD as a mirror cache. The virtual SSD is implemented on HDD but using log-structured data layout, i.e. write data are quickly logged to HDD using sequential write. The dual and redundant caches provide a cost-effective and highly reliable write-back SSD cache. We have implemented DEFT-Cache on Linux system. Extensive experiments have been carried out to evaluate the potential benefits of our new techniques. Experimental results on SPC and Microsoft traces have shown that DEFT-Cache improves I/O performance by 26.81% to 56.26% in terms of average user response time. The virtual SSD mirror cache can absorb write I/Os as fast as physical SSD providing the same reliability as two physical SSD caches without noticeable performance loss.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/58716bec53433ce48ea9b2f632026415cc18db12", "sources": [ "DBLP" ], "title": "DEFT-Cache: A Cost-Effective and Highly Reliable SSD Cache for RAID Storage", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "58a023832a3f2fc90330bd97f2a53df66017b946": { "authors": [ { "ids": [ "8280915" ], "name": "Yunpeng Song" }, { "ids": [ "35655261" ], "name": "Zhongmin Cai" }, { "ids": [ "1708494" ], "name": "Zhi-Li Zhang" } ], "doi": "10.1109/SP.2017.54", "doiUrl": "https://doi.org/10.1109/SP.2017.54", "entities": [ "Authentication", "Digital footprint", "Enhanced entity\u2013relationship model", "Hand geometry", "Laptop", "Mobile device", "Multi-touch", "Smartphone", "Smudge attack", "Spatial variability", "SwIPe (protocol)", "Tablet computer", "Touchscreen", "Usability", "Usability testing", "User experience" ], "id": "58a023832a3f2fc90330bd97f2a53df66017b946", "inCitations": [], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "357-372", "journalVolume": "", "outCitations": [ "403ff89fd5d518f70851889b0f1fb473c07b14ff", "ec4db8a804acee13220c74a2d42349e00d9f2343", "325c77469549b719c50dde71030e612992fe2dc1", "5becff7d8db7907df2b29b3e9a9c3b8cafe2caf7", "6a02a5ac2dd9b5251e421c5dda48a48f03439dcf", "0f16f6f478b5c788dce466eb50e36c612273c36e", "5fcdf1fe84cef1a1090b54f7dee4ddaca44a4b17", "3a6938407456499ebc33ae86a8b9370d913fbd77", "8e017f81d1c12e9b4fa7e28f94fc2c39ddfb1211", "96e47391bd7b47762d1ef3e944f04aa8f0028f07", "791eb376d4db96376eba3ef804657c5f0ba7229a", "0273d4de8fe5388b236d959616a1f4fdaf86bad6", "31f56ccd22979c19096768cb38461b8b5afe1cf8", "7c02498e2e125fbe61d1150ff32be3092a99d8d0", "06a09a77140c2344e5ce5094c34d2821f46946f7", "42fbf8ca5ca181090def483dad65e2e6c9d77719", "0103ac3c79bb886eb4c7a94bdaff968ee9a40df5", "66b60678eac9a69f0e253cc0eec62375a0584fd6", "8549c708f1b408ffaeb93f9e8114a7f413665f31", "02d9fecf91387b24828680dc5f458ca747a9e86e", "7833a35cd85b9e5743925d394ddf06c9d1a7679b", "027d74d535588ea7ee4f86c1c100a10c9038ad2f", "62bf472a70a99aa7af1280e23c95250132ba34e7", "5367d509d76c2efb144a681efd442ddbf3b25f4a", "67dfed34e03bc67ceb8a084df92dc4fdc5b663f8", "f6e1712ef0cc931491d602c97180b7f8e8294042", "016294079ee5fa35ec867cf2f658d9eb8cec4a4a", "99b75bfa6465aa60f5d3a6b6850d438e0919ac23", "17d8981123b6d74f1fb4e0e4b8469b25dcc538b2", "9932d1a0aaca0fd1c13718a69478ae1988d132c8", "f58a3a65cbcf4c76b85c1aa91d4a6c7e71f1bd35", "71e42050840ffdbc1a56c2fed148db843a17b523", "5c3556d4bf94ba51cea58c5d624aed19e6223e59", "413e68c139cdec13c58925268d82391c25d61c20" ], "paperAbstract": "In this paper we present a simple and reliable authentication method for mobile devices equipped with multi-touch screens such as smart phones, tablets and laptops. Users are authenticated by performing specially designed multi-touch gestures with one swipe on the touchscreen. During this process, both hand geometry and behavioral characteristics are recorded in the multi-touch traces and used for authentication. By combining both geometry information and behavioral characteristics, we overcome the problem of behavioral variability plaguing many behavior based authentication techniques – which often leads to less accurate authentication or poor user experience – while also ensuring the discernibility of different users with possibly similar handshapes. We evaluate the design of the proposed authentication method thoroughly using a large multi-touch dataset collected from 161 subjects with an elaborately designed procedure to capture behavior variability. The results demonstrate that the fusion of behavioral information with hand geometry features produces effective resistance to behavioral variability over time while at the same time retains discernibility. Our approach achieves EER of 5.84% with only 5 training samples and the performance is further improved to EER of 1.88% with enough training. Security analyses are also conducted to demonstrate that the proposed method is resilient against common smartphone authentication threats such as smudge attack, shoulder surfing attack and statistical attack. Finally, user acceptance of the method is illustrated via a usability study.", "pdfUrls": [ "https://www.ieee-security.org/TC/SP2017/papers/228.pdf", "https://doi.org/10.1109/SP.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/58a023832a3f2fc90330bd97f2a53df66017b946", "sources": [ "DBLP" ], "title": "Multi-touch Authentication Using Hand Geometry and Behavioral Information", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "58dbb77c5c14a64785cacda66dfe11c7ebdaa320": { "authors": [ { "ids": [ "3407813" ], "name": "Sevil Dr\u00e4xler" }, { "ids": [ "2574950" ], "name": "Holger Karl" }, { "ids": [ "1841992" ], "name": "Zolt\u00e1n \u00c1d\u00e1m Mann" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Complex network", "Optimization problem", "Placement (EDA)", "Program optimization", "Provisioning", "Scalability" ], "id": "58dbb77c5c14a64785cacda66dfe11c7ebdaa320", "inCitations": [ "0701a2b2c6bb469ddf5877f35d7fdc0bd25c4977", "5ca0595555b98664a0eb0a9859fdd52ec3fd27e6", "7291694b278fc9a7525009f1b732ff84886bd348", "6b7e288bd77865e44cf5bb26bba5851ab14d0815" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "365-370", "journalVolume": "", "outCitations": [ "98bb60385a0ebb298ce0173e23f2586acef09791", "2021970e92ac967cedccaac2736708ed710421c4", "89c5009db58bf4b402c3e504432d43a8baba3099", "98dcf73ff4f333c70d97401057f3f4037e3eeccc", "7a0604b8f816af1dc81dce32f98b541816b7bd1d", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "1e5027ff533d31513b667cec06f6a650882e1ee0", "074990c4e5770872e192b8c61561ce6ac15a7a30", "16ccda81a562eee4a5403dab8029c57c30fc2c19", "2ddd179040a880aa059c1f02d6f49af776e86e69", "9ad16c75b6015a2b44cfacf9ff97f5ced672dcf9" ], "paperAbstract": "The management of complex network services requires flexible and efficient service provisioning as well as optimized handling of continuous changes in the workload of the services. To adapt to changes in the demand, service components need to be replicated (scaling) and allocated to physical resources (placement) dynamically. In this paper, we propose a fully automated approach to the joint optimization problem of scaling and placement, enabling quick reaction to changes. We formalize the problem, analyze its complexity, and develop two algorithms to solve it. Empirical results show the applicability and effectiveness of the proposed approach.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101163" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/58dbb77c5c14a64785cacda66dfe11c7ebdaa320", "sources": [ "DBLP" ], "title": "Joint Optimization of Scaling and Placement of Virtual Network Services", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "58e99e8f19d1d77d6d132e89995de8f7b4784b2f": { "authors": [ { "ids": [ "34260068" ], "name": "Michael Orr" }, { "ids": [ "1691681" ], "name": "Oliver Sinnen" } ], "doi": "10.1109/HiPC.2017.00024", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00024", "entities": [ "A* search algorithm", "Algorithm", "Ambient occlusion", "Branch and bound", "Depth-first search", "Extreme Loading for Structures", "Mathematical optimization", "Scalability", "Scheduling (computing)", "Search algorithm", "State space", "State space search" ], "id": "58e99e8f19d1d77d6d132e89995de8f7b4784b2f", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "134-141", "journalVolume": "", "outCitations": [ "218c7bbaa6f48517edadc9cf9729dfc961dc7517", "0071092d6b8519feb9ce2f1884f7476ea219e611", "40c744fe80d56302cec4000b7e7339d0d2686b9e", "d6e6364e0fddd906cd03ca3db2d3b72a52270b32", "70ebf1ada29a3b80c1657b0142c4048f2fe7acd5", "9bcd710374505f684556a469659b21ca2907bab1", "7dad526fc681a0808b0163c29d8e4bd92785b952", "0a030c2142dec882fea5f33b3f562e04d66d287a", "cb7f95a4990b2d08a4e474c60ea198e9d724587f", "0a0432f604705963d15f299af02b242df7752dd9", "fc68cb63d25f6838139d4fcdd4a45763428dce29", "8e9cca113a980834776d240cbd73ca3cafdf6465", "d78ca59b2235f325d5635f6e67cd790d1adc8640" ], "paperAbstract": "The problem of task scheduling with communication delays is NP-hard. State-space search algorithms such as A* have been shown to be a promising approach to solving this problem optimally. A recently proposed state-space model for task scheduling, known as Allocation-Ordering (AO), allows state-space search methods to be applied to the problem of optimal task scheduling without the need for duplicate avoidance mechanisms. This paper examines the performance of two parallel search algorithms when applied to both the AO model and the older ELS state-space model. This suggests that its use may provide an advantage with many different variations on state-space search. This paper explores the application of AO to some of these variants, namely depth-first branch-and-bound (DFBnB) and parallel search. We also present an update to the formulation of AO that prevents invalid states from being considered during a search. An evaluation shows that AO gives a clear advantage to DFBnB and allows greater scalability for parallel search algorithms. The update to AO's formulation has no significant impact on performance either way.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00024" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/58e99e8f19d1d77d6d132e89995de8f7b4784b2f", "sources": [ "DBLP" ], "title": "Further Explorations in State-Space Search for Optimal Task Scheduling", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "58f5dda8787bc8591697641e55de2c34f88fe200": { "authors": [ { "ids": [ "2031247" ], "name": "Thibaud Ecarot" }, { "ids": [ "1711563" ], "name": "Djamal Zeghlache" }, { "ids": [ "9879478" ], "name": "Cedric Brandily" } ], "doi": "10.1109/IPDPSW.2017.97", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.97", "entities": [ "Algorithm", "Cloud computing", "Embedded system", "Enterprise resource planning", "Evolutionary algorithm", "Integer programming", "Memory management", "Tabu search", "User-centered design" ], "id": "58f5dda8787bc8591697641e55de2c34f88fe200", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "77-85", "journalVolume": "", "outCitations": [ "d8057d514036d51051af78476468fe350cb7488a", "85011e8d503e28c098249e935a1ad52a673dc9f0", "c6b682912b7832d0a2516fbcdc0d516ba96d0c55", "13f885c7f366e581882f71094ea40d80810a0764", "86ff12fcbd17c231ea564d9c33a5e92492f202ec", "9c43e7d5e43c9d78c487f338f5616619e650a285", "1d5e81244451dc58a6e6d4c9d2b8fbff6f55e10b", "a4242c4a1a03b014c7b88e43115c61ead2297665", "2e2f4a8f5a92a5bd5b21afe5200d31da2fb90a70", "5e3da1f0bef9cfb5b517ac210542ed69aca76106", "eb86316edfc8afcf83b5e499b796e7a35c6acb5c", "db626e4bce048af02d1b08b1288d18d475117e21", "7f51f3926f74b9bb6f9b69939027f339b4ecefa2", "14267096786ca00deb921c3e1eb6218bbf5478df", "7369835cc1bc8f45b3272832506aede211bc9fe7", "44d6faabca90eaee56355f2da80fce105409e145", "745f99c4c2a6153a270034fcaa00a6d49b0b386a", "b5060c044b8fb817083a16579df1ddd18e6f254c", "1272ee1c0c7dea10e25accb8a510676b59cef0ba", "11f29c9a2af424db895ede2e12882b50622a4f95", "3dc6a6759c4834fced6080ffc18364a1bb4f8322", "663e064469ad91e6bda345d216504b4c868f537b", "79fc8f5ac2e98842ab8b78d2d46b6e2714dc7ea3", "8505be1d5e5e8279e63dc14b7c4d8f7570244e0a" ], "paperAbstract": "This paper presents a method of cloud resource allocation designed to take into account both consumers and providers' interests. This comes in contrast to today's provider centered models that subject users to more restrictive terms and conditions. Both parties' interests are computed in the form of integer constraints. Costs and availability are embedded as key objectives and performance criteria in the model. We propose a hybrid resolution method based on an evolutionary algorithm augmented with a tabu search and compare performance with other resource allocation algorithms. The comparison results reveal the efficiency of the proposed hybrid evolutionary algorithms", "pdfUrls": [ "https://doi.org/10.1109/IPDPSW.2017.97" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/58f5dda8787bc8591697641e55de2c34f88fe200", "sources": [ "DBLP" ], "title": "Consumer-and-Provider-Oriented Efficient IaaS Resource Allocation", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "59090a74e1075e017c7e547d472c922af9fb80d6": { "authors": [ { "ids": [ "7468259" ], "name": "Bj\u00f6rn Feldkord" }, { "ids": [ "1732130" ], "name": "Friedhelm Meyer auf der Heide" } ], "doi": "10.1145/3087556.3087575", "doiUrl": "https://doi.org/10.1145/3087556.3087575", "entities": [ "Algorithm", "Autonomous car", "Competitive analysis (online algorithm)", "Datasheet", "Deterministic algorithm", "Embedded system", "Online algorithm", "Server (computing)" ], "id": "59090a74e1075e017c7e547d472c922af9fb80d6", "inCitations": [ "7df5efc8036f4c4a281346ca929cc81db39a091e" ], "journalName": "", "journalPages": "313-319", "journalVolume": "", "outCitations": [ "b01439268474c06f52b035c916d94571d6e3958a", "c20c3cb7f48260b03eedc15436b65c8c6521266f", "9d295f2bbef6d2a44f13fad34f0b2c3f7d1a1964", "815c5c4e367c2efcf9f6d28a2de779ef5886ead1", "4f90764fb8e7d4b1c18696f034804a2aa6487f44", "adcad2cf8342cd5346dae87422e7ab236b1531c8", "5948ce001843d1242e515c5543fb62552e04c235", "18d80fd0971f524c1e292aca51aba81b729fe4e1", "a7d20b0076dfc7e485b53d7b8cf808c4c9dca1e6", "a4d21316dd619337d47561e9fe80e1438c310683", "118f04c9a9fee4570fb841c3bcaeb3e480c76f12", "772ac20c929b1fe4c1cddc23647d1a7f3604a2dc", "c45cebaa4aa1e321e8e9f3719eba85a058c1aa7f", "e70b26300bca8bb22e7f596119fad7a45f1c044c" ], "paperAbstract": "We introduce the mobile server problem, inspired by current trends to move computational tasks from cloud structures to multiple devices close to the end user. An example for this are embedded systems in autonomous cars that communicate in order to coordinate their actions. Our model is a variant of the classical Page Migration Problem. More formally, we consider a mobile server holding a data page. The server can move in the Euclidean space (of arbitrary dimension). In every round, requests for data items from the page pop up at arbitrary points in the space. The requests are served, each at a cost of the distance from the requesting point and the server, and the mobile server may move, at a cost D times the distance traveled for some constant D. We assume a maximum distance m the server is allowed to move per round.\n We show that no online algorithm can achieve a competitive ratio independent of the length of the input sequence in this setting. Hence we augment the maximum movement distance of the online algorithms to (1+δ) times the maximum distance of the offline solution. We provide a deterministic algorithm which is simple to describe and works for multiple variants of our problem. The algorithm achieves almost tight competitive ratios independent of the length of the input sequence.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087575" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/59090a74e1075e017c7e547d472c922af9fb80d6", "sources": [ "DBLP" ], "title": "The Mobile Server Problem", "venue": "SPAA", "year": 2017 }, "592a34a7609f48ddc6ccf0769beb5978712f0461": { "authors": [ { "ids": [ "6516728" ], "name": "Phuong Nguyen" }, { "ids": [ "1688353" ], "name": "Klara Nahrstedt" } ], "doi": "10.1109/ICAC.2017.38", "doiUrl": "https://doi.org/10.1109/ICAC.2017.38", "entities": [ "Artificial neural network", "Cloud computing", "Computational model", "Elasticity (cloud computing)", "Feedback", "Monad (functional programming)", "Monolithic kernel", "Program optimization", "Robustness (computer science)", "Scheduling (computing)", "Service-level agreement", "Service-oriented architecture", "System identification" ], "id": "592a34a7609f48ddc6ccf0769beb5978712f0461", "inCitations": [ "5ef00014862c26bada8b8a084400256d9e30f469", "000ce7b4ae0219528578c1d383661a4b625045ae" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "187-196", "journalVolume": "", "outCitations": [ "1eae8519ef9486959feeb8e7ed6b7b850648203c", "d4612773f781278a3c6e5ef43602efe9ab723450", "0cc547cea26938e8c4165059ed0975cabec2c660", "af0bc4baa5a107964f489298efa34114aacf3b9e", "1c02c55e0d8a9820944fdef364e98542945b445c", "41da0c64d5884eed285c91e2938350160b5a6ccd", "b7254d39c71152f74b7c2618371e78b454fb385b", "93f836f13fab7599e14ef513700887148d9d29eb", "3905a97f8d23d8f1ed4d69d4dffe547ec63faa79", "4dc547685a514192d4671976d4f0edbd874b2537", "748742dd1dc48412f9153327b4095a5a0cdee03a", "224b0f2731b3177e683ae20433a1b19000c326ef", "b3e1949ffb2daf9dc846b4abe440c75baf3e91bb", "7f302d785d779b5ebf34b76bc4dfb9150e0d2cd2", "37f5543bef729e7e198533ad2bdbac766d99ce36", "3ef4b9cb8e04eb59fe80c4f6dd4c80a5c2276dd2", "a984f665f2b79d6968f2e144fb8b3845aabcbdec", "7e944c565a5719e054ce4f52f06af06932b4c72d", "063db66bc5b7af642fecd5b891ada6eb18828ee0", "8688c96c558f20f65ca886f7ce1470ce63aec63c", "2e72178091b2ca445f46200dcba71a53417b69eb", "36c065008b70a635dcce2ee3271ef214b9df6983", "20ef4d2f6449f421ad7f653e5a9acbc2a86954ca", "81ec951f1c1501ed21513fe77fbc90db057219d0" ], "paperAbstract": "Scientific workflows have become a popular computational model in a variety of application domains, such as astronomy, material science, physics, and biology. As scientific applications are moving to the cloud to take advantage of the elasticity and service level agreement of resources, there has been a number of recent research efforts on cloud-based workflow systems that support various types of performance guarantees under resource cost constraints. However, most of the related work often requires advanced knowledge about workflow structures to perform scheduling and resource optimization. In addition, existing workflow systems usually employ a monolithic approach in workflow implementation and execution, which makes them inefficient in dealing with heterogeneous types of workflows. In this paper, we present MONAD, a self-adaptive micro-service infrastructure for heterogeneous scientific workflows. Specifically, our micro-service architecture helps improve the flexibility of workflow composition and execution, and enables fine-grained scheduling at task level, considering task sharing across different workflows. In addition, we employ a feedback control approach with artificial neural network-based system identification to provide resource adaptation without any advanced knowledge of workflow structures. Our evaluation on multiple realistic heterogeneous workflows demonstrates that our system is robust and efficient in dealing with dynamic scientific workloads.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/592a34a7609f48ddc6ccf0769beb5978712f0461", "sources": [ "DBLP" ], "title": "MONAD: Self-Adaptive Micro-Service Infrastructure for Heterogeneous Scientific Workflows", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "593eb268bd3d04c700115a7e31f45eb48d8a7aeb": { "authors": [ { "ids": [ "33625333" ], "name": "Alireza Nazari" }, { "ids": [ "2478114" ], "name": "Nader Sehatbakhsh" }, { "ids": [ "37093404" ], "name": "Monjur Alam" }, { "ids": [ "1747217" ], "name": "Alenka G. Zajic" }, { "ids": [ "1747749" ], "name": "Milos Prvulovic" } ], "doi": "10.1145/3079856.3080223", "doiUrl": "https://doi.org/10.1145/3079856.3080223", "entities": [ "Computer architecture simulator", "Embedded system", "Malware", "Simulation" ], "id": "593eb268bd3d04c700115a7e31f45eb48d8a7aeb", "inCitations": [ "82c875e112555f21319123f1b8f61338feb6d183", "d2de08fbebf22c62b36c6f06cb91ad5bf00bdade" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "333-346", "journalVolume": "", "outCitations": [ "69884e65ea95fe6b60d72a00963a787325a9bdce", "23e8236644775fd5d8ff5536ba06b960e19f904b", "31cc47c79e7145b0a4d0eec12ac05b9114bce962", "938286fa80fe31fa3e35f450989f27659296f25f", "ead98a77a897641245975c6502c1e3b58de4436c", "86013daaae16572bceb755e65ee5fa2fdfb63848", "bd4d135a1ff6775e0818ba82c6250f0362bb2585", "37ef5a307a8d6ea0ba6f5f7e39a0199437c2cf48", "5ad6664093e66129aa4c46b93533e700a5e3a9b6", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "f1396f25c40d531480d57bf8645cda1040a6814c", "06f16d9430d5f6213cf5399b167a3d989c3ff798", "c8f6a8f081f49325eb97600eca05620887092d2c", "6458f4c0c029b038ebd1d7f61005a010ac250892", "636e0e7325e5fc96297b4385dbd34c6b14ebfa89", "cdc5dcfb78e0c084a511747fa7e7d7357b2af821", "565ed53f4a40a98b18a389a3790a7fe62a525f58", "3439cdb87fe2a1d1f55ef46ad84f85c5ef0d28a1", "22050b3ee9c69c64dc796358c7f0ba247d4adce3", "3700aad5fab8a98dd8113d2c769a78b1cdc4e5f3", "023f23c300804754753cb11db51fb7f582556ab7", "860b97915bfad0cba4e2eb42f081f6cd8bc2d575", "58b7d42d656cffb3257f5bfed9c76c8f13cf47a5", "3c1f11a1da88c8237842a246ed1a5dbe230737be", "7ce0ad27813a09788f924a061f4e60f638e03d48", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "2b0edeb07529992bb0a73e166c15036f0fe2bff6", "15ae91031196fe3d8b91e6f2c9be59bee68f021e", "4501a8a533ff9c13d2e347e07eb0d4cb434c46e4", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "5b660b8e92707040e417cb3b659f60facfa2f8a1", "d202bd005bd42bb440cef64f86881b54ad6093c1", "226242629f3d21b9e86afe76b1849048148351de", "78cd2830c84a71cfd7f300a447658d93dd0096bf", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "6be6aad95f5ac82770a2927b5175889cc8aa5958", "092b09f0ec09b2b10763f5697ca77099a37ab022", "008c2c2cf69fd4936a64e67d265b9b173f0d190f", "52c2c050af5b32d4929b4b193967a3675d03aea0", "312b989d502994f4dde79526e392d3bbd1168c6d", "25ee03dea55ac3137edb13b3e141b19a03deba21", "2bcffef2b358b5418d652c3046da4714286b86be", "01eb6d6d21f25c2598c0936b831feeb257ab43e0", "60e75f03a40506f21c8fa96095c3ca4536f3edc0", "6ef3b5a0af396e42711aab8ffbfb1728c2cb95b2", "77a1532cb64eab28162a0277cde52b4b7eceda49", "357abbad242e3222940fa05d05d5261bafc3cf5c", "0ea8f85d507ab8220f920dc1ffe6574820dd0027", "e7e7fbfa7693d27e8f0887e2ba2fe57385e2a9d8", "1a46034174ca09cfc7b0ba23acc8dc9b330d863e", "90f27eacb7f22816c5b1d4628ff0c56980d80c02", "4fb1900092a86d1cdd6996cd95acfa339456dbf2", "048dad3e615a9c0c6710993796ea82c56af1d3ee", "7c8c9bdb30ae9b40365c355504bdb457a51e108c", "a48994cf1474d22be671e6e53f4ce8da6634f33e", "dfe4e3566b72e33e1ec09746252ad0a5b52c0183", "06f557f590f3b2c28a7b87717e2930e665ddc3ca", "419b718b87d216820ee2f6ef076d9889f4aac6ee", "14bfc28b5652b605c4936c74f4f53ec6f2e215a8", "40dfb59535e057a9f27fa084ee8ec0f3972b3c9a", "3c1b97dff8f96170f9557319e9d881286aa77c1e", "801b0ea671dfbb7b8ee0accb7fdea9bf521897bd", "0e42df3278356643d63dcdd33b159ae265602c42", "869db58587579548663678a8a982752104c7ed64", "4b41bb221ccae289bd66dfc1210f36cc172350c5", "ff69e9d957f6ce7b58ea932c3fd2e2893908cef8", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "142f95ba20abb7a964500aef0983a4181be91fd6", "4374b8932109c93470d5d68356e6eb1ef91dbbb8", "c79c2857a336461e9b30d817d29cde763b52b441", "531847b1e582c5353ff436744ba0c60682cbd022", "35a0dcb48754b4a8f382ffb86a0b5794b2574fce", "1e102df57ec826f0afee0dda578551e3da3b7289", "00ed3faeb0aa2150d28832d85216cf05c69a2be6" ], "paperAbstract": "This paper describes EM-Based Detection of Deviations in Program Execution (EDDIE), a new method for detecting anomalies in program execution, such as malware and other code injections, without introducing any overheads, adding any hardware support, changing any software, or using any resources on the monitored system itself. Monitoring with EDDIE involves receiving electromagnetic (EM) emanations that are emitted as a side effect of execution on the monitored system, and it relies on spikes in the EM spectrum that are produced as a result of periodic (e.g. loop) activity in the monitored execution. During training, EDDIE characterizes normal execution behavior in terms of peaks in the EM spectrum that are observed at various points in the program execution, but it does not need any characterization of the malware or other code that might later be injected. During monitoring, EDDIE identifies peaks in the observed EM spectrum, and compares these peaks to those learned during training. Since EDDIE requires no resources on the monitored machine and no changes to the monitored software, it is especially well suited for security monitoring of embedded and IoT devices. We evaluate EDDIE on a real IoT system and in a cycle-accurate simulator, and find that even relatively brief injected bursts of activity (a few milliseconds) are detected by EDDIE with high accuracy, and that it also accurately detects when even a few instructions are injected into an existing loop within the application.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080223", "http://alenka.ece.gatech.edu/wp-content/uploads/sites/463/2017/06/ISCA17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/593eb268bd3d04c700115a7e31f45eb48d8a7aeb", "sources": [ "DBLP" ], "title": "EDDIE: EM-based detection of deviations in program execution", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "594065ed6261717f4dfc76ba6de4d8f78ff1c2a9": { "authors": [ { "ids": [ "3433542" ], "name": "Peter Rindal" }, { "ids": [ "2524585" ], "name": "Mike Rosulek" } ], "doi": "10.1145/3133956.3134044", "doiUrl": "https://doi.org/10.1145/3133956.3134044", "entities": [ "Eurocrypt", "Franklin Electronic Publishers", "PKC (conference)", "Random oracle", "Symmetric-key algorithm" ], "id": "594065ed6261717f4dfc76ba6de4d8f78ff1c2a9", "inCitations": [ "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "769", "journalVolume": "2017", "outCitations": [ "9f17818d52ddbb13998097a87964a14731b8849d", "d8c1b48ae4d6e4676d060c06087bb6b1ac81a005", "22aaafd787c6896ec9da2348dfa5f27ddf567cc9", "1bfb684e591020caec338fd631ff5397be3cff0d", "5e29280c2715c63d0c69b7864408165bf30d5439", "45f4a6c915709c734e034faae7f94683d4bccbcf", "4bd548d75200be6717c75590b6329a2e6cdc04ac", "db0f82a419f89cda64fcbec2c58137862cd04475", "23ec68ed03b485b645478a3f6905615617d905a6", "1554b04035dff3d4c6db8dc3c392e45366db4fdc", "45104cef3fc97f6c92f3fdbba3629ac3b590aae9", "72254a4ccde7776abc07c406bacaae308783414e", "536754e19b8b2850497069a6e9c6b75d368621d4", "16d23baa55835434808a3420e0884e0dc44680f6", "627d863ce5bb56de50a4cfc36f8f6c526c8eec37", "0006d5ce7d2c8443086af6101d307502a67f157b", "031878496a4a53ce1be6023661151b1ba7dd0869", "399f5140a149a58278b364c8ab7b6a3f4745617d", "0bb51d5e3a2e779d7515ee553bccd326bfc43912", "42333e3f231bbfe508f6da6bad2feff9ae223113", "547a94f8b16f521ee2eac299572a5c767d628289", "14720266a35ced804438cdf06bc8d151e7e9903c", "a9d07270be6e48448ef17b348f3455d76ea1d68f", "03c1711090d76cc9163e238686786a71c028377e", "1890cecdbba895fbcf975c4aef1616e184e69abb", "71e166a85195362cd48311cfb473debc1614602a", "b57aec9b611817d5272c8f97ec8211ecd33dca6d" ], "paperAbstract": "Private set intersection (PSI) allows two parties, who each hold a set of items, to compute the intersection of those sets without revealing anything about other items. Recent advances in PSI have significantly improved its performance for the case of semi-honest security, making semi-honest PSI a practical alternative to insecure methods for computing intersections. However, the semi-honest security model is not always a good fit for real-world problems.\n In this work we introduce a new PSI protocol that is secure in the presence of malicious adversaries. Our protocol is based entirely on fast symmetric-key primitives and inherits important techniques from state-of-the-art protocols in the semi-honest setting. Our novel technique to strengthen the protocol for malicious adversaries is inspired by the dual execution technique of Mohassel & Franklin (PKC 2006). Our protocol is optimized for the random-oracle model, but can also be realized (with a performance penalty) in the standard model.\n We demonstrate our protocol's practicality with a prototype implementation. To securely compute the intersection of two sets of size 220 requires only 13 seconds with our protocol, which is ~12x faster than the previous best malicious-secure protocol (Rindal & Rosulek, Eurocrypt 2017), and only 3x slower than the best semi-honest protocol (Kolesnikov et al., CCS 2016).", "pdfUrls": [ "https://eprint.iacr.org/2017/769.pdf", "http://eprint.iacr.org/2017/769", "http://doi.acm.org/10.1145/3133956.3134044" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/594065ed6261717f4dfc76ba6de4d8f78ff1c2a9", "sources": [ "DBLP" ], "title": "Malicious-Secure Private Set Intersection via Dual Execution", "venue": "CCS", "year": 2017 }, "5942a6955880a7e575427683c7278c371ec2c5d4": { "authors": [ { "ids": [ "9527615" ], "name": "Weilong Cui" }, { "ids": [ "1784473" ], "name": "Timothy Sherwood" } ], "doi": "10.1145/3123939.3124541", "doiUrl": "https://doi.org/10.1145/3123939.3124541", "entities": [ "Architectural decision", "Best, worst and average case", "Glossary of computer graphics", "Risk management", "Software architecture" ], "id": "5942a6955880a7e575427683c7278c371ec2c5d4", "inCitations": [], "journalName": "", "journalPages": "651-664", "journalVolume": "", "outCitations": [ "635210aa01bd460f5dad80c5fffef8a0dfb4993e", "053825c0a1c111e76c18f28b6d8ae13b414f3bed", "3ab168addfd8ae356e2db48cb5713beec2da8fc1", "3321c2459ca9388d73980ef92add5e1e6c0dc610", "132270306acd57d54c70492ead3038964198b68c", "0ea8f85d507ab8220f920dc1ffe6574820dd0027", "0f2c62a6cb60699e46fe388c3f6eea83edc475b6", "71c2deb5c3b4b0fd1ed68bdda534ec7ea76e845b", "082c182f43333f2276ba505a896748a641aaeaaf", "126df9f24e29feee6e49e135da102fbbd9154a48", "47e6effdde181d65c1d5416ace2bc3e0614df8e6", "2d98fc1f96e5bded2383f194f884d0865372e436", "b8b8d587cdc6bd98515fe760ae9b34da335d94de", "370488843f80120797e1f0af22e9fdb0152ff657", "7387a1863dba2d1ea6e703cb59d6891f758a42be", "065d49a1a4b0aa0a62697d6e6ab67f77031ba93c", "c2e1a7a3ba63305b47aad75c293a8ce4f65ebef9", "02eb6eff06fd89d772b847f9fd0f30549768fc03", "7232af3c4a3c4641ece9818d1c484260ce33a266", "6e88d09b2adc7a3d9230d324387929ec54a9d886", "048af40cbd0516eb935641065b2224f390731e2f", "1bdfa2978f64b848b8c929740d51af98d774dfab", "801f9745f678990ad8d25a170755b4fe81653e51", "06684c1e01d5417b72dc18bcd82aa40864cc4d49", "22ddd63b622aa19166322abed42c3971685accd1", "078e00be20bfdc4eeae762f6170ccded05f452c6", "bfa696236c766973328bdfe3f7fd3ffd7ac9a607", "352a8957005dc5519b15ed1870751ec494d66395", "7355123bf4bb08d41e462a60cd4e6f11a3ffcf85", "a6d9e9d9dc7adf72e06cd7489807514320ecf730", "3f03f918b10abb0f39fb6a22abdfe721e55965bc", "081a00e3c38a0b653b9f98dc1f2ef86336fab4e8", "339632faa043d4697570fc4fe48a52d007c3cf06", "302cf24307f2b8fae247318e71b395199ab889da", "40b4153ff9f9e8125db7e74a1cc5748ee81bd317", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "4cd6b5e470b4205cd1560de42cde8108fa42ba4b", "14e97ea7de6662c67e28bce1595cf419aff6ad5f", "e3fa998bede8f5db8d71349c7a0d53ad0aa4f7f7", "062754ec2896f0b696f8673cb1204cb733b612ad", "0c81699a25f2d7e247491fa05e7a9dbd3091607c", "4115fac134fd4107f470784d63136bd44ffee43a", "97158a13a871720757114a8dcb8d8f4e104d8693", "6671069172327bdcade96abda12a25b9122e6192", "37204b89f7a1276fa572ed5bf6dab49185cc2d61" ], "paperAbstract": "Designing a system in an era of rapidly evolving application behaviors and significant technology shifts involves taking on risk that a design will fail to meet its performance goals. While risk assessment and management are expected in both business and investment, these aspects are typically treated as independent to questions of performance and efficiency in architecture analysis. As hardware and software characteristics become uncertain (i.e. samples from a distribution), we demonstrate that the resulting performance distributions quickly grow beyond our ability to reason about with intuition alone. We further show that knowledge of the performance distribution can be used to significantly improve both the average case performance and minimize the risk of under-performance (which we term architectural risk). Our automated framework can be used to quantify the areas where trade-offs between expected performance and the \"tail\" of performance are most acute and provide new insights supporting architectural decision making (such as core selection) under uncertainty. Importantly it can do this even without a priori knowledge of an analytic model governing that uncertainty.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124541", "http://cs.ucsb.edu/~sherwood/pubs/MICRO-17-archrisk.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5942a6955880a7e575427683c7278c371ec2c5d4", "sources": [ "DBLP" ], "title": "Estimating and understanding architectural risk", "venue": "MICRO", "year": 2017 }, "5947017313f56af69cea1796b6501f60fcaf8e74": { "authors": [ { "ids": [ "34316056" ], "name": "Shahbaz Khan" } ], "doi": "10.1145/3087556.3087576", "doiUrl": "https://doi.org/10.1145/3087556.3087576", "entities": [ "Algorithm", "Central processing unit", "Data structure", "Depth-first search", "Deterministic algorithm", "Distributed algorithm", "Fault tolerance", "Graph (discrete mathematics)", "Graph theory", "Parallel algorithm", "Parallel random-access machine" ], "id": "5947017313f56af69cea1796b6501f60fcaf8e74", "inCitations": [], "journalName": "", "journalPages": "283-292", "journalVolume": "", "outCitations": [ "157878eedbdbab0dc14db08a8f408c7f1f7b0760", "035eaa12689b94648b7e1431e02f86890a7ad97c", "b51083c2e96a585fa0a1c805d259cd0dbb5c7f67", "d8d721038047205928508a1fa5079cc7dfbc7d0a", "d8c3129c64e5d014e73c9c5f66d4664b541b4214", "931c4ba5a348870ebb56e6c074e3951a2d9f1a39", "2692127a1f429423e27fbbff6bfed5f7d78417f7", "515ecdf55b4f1d15ec8df93a4efd58dcd288aaf5", "e05f410b45d75f78a72354d430a75373c395e9ce", "8d1e88a4933e5d3726ac0f7e1a51fa8c95a54736", "d10b7fc73704190eaa576a0ce3567c90d3c19f36", "dd4832da327d21978e945b86e72298476698c806", "34eb5e5ded51738861b8b844a1dbfddd6881fa46", "d466b599d4bda5952bf9ffd0fb71816edec30877", "296fff9688780f25806c5962d58da0d7f45e0294", "5c949277f27e04569cf39aa2b958a236f8bc0982", "23fbde1fcefdc4ed472b4042f1048e817534460f", "5f079a6dfac20beae082d7b711d55d10eca143a2", "2d89055fcc30abfa5acd5c4b2ac0c8205dd42ffe", "d507ab9e8e01da9b5ec95e612aa7755b590ef71d", "086da3000dee0f42d719407af9ef2a8f39836623", "04311b15b444a0f75ea2bb74fca26cc1aefbf3c1", "9d9187f7450eac724ecea4285d4ff59b2f06dc59", "37a9452888927675957fa54a489b5a3b58a5046a", "eb757f2c481794067b009a70922ae07e421933cb", "54ebbd984587d4537cdb42b20c467303e107e5f7", "46857832a3e2d9932ef12af0a8abfb2d75c7da6a", "302d8a74adeae224fbba294d1eb83080aa6c2b7e", "4d123ba7258e5a785e4d6c63b1d98e6885a9d3a5", "09e84a255b33c12e8cb2bb7a7b7469b8b7bfbbfd", "bfab539caacce4574849efd504ff3abf362bb5cb", "1639c3a5f926d69e170b138c775ce36d2e1e4c1e", "cb5297443265e4e07647706e2f793c103e85a428", "052af1757c410fa8b65bf95339c6e4142d723d61", "815115ca40c418d638366c429f096c2a3de0b51a", "226b121695887f250ca05fc19ffbd02e99437cb4", "7f8efaf1312bd0084257d50038b5c7e40c768f32", "5b6580b7efae0d009d526a753325300334dcc96a", "87503d0db5151491c26134b03ca6158040f5f91e", "385742fffcf113656f0d3cf6c06ef95cb8439dc6", "29b99397712a2f159d729d07b750441c59fe2e51", "2fce8527c81099869cc21c6d236b1ca72be6693b", "9df81d87af03911e65805441468cade7d4148a96", "3917b247dacf91f695f2516ef4ee3e1807da6ffd", "087457ece11f23a1869e5eab37402e2c6cc09b10", "47199bf68bdc70a64e5bd3d709bd7abbfe3067eb", "f256078e07b5e7dd595b5c7b9c8c52c810ff3c1e" ], "paperAbstract": "Depth first search (DFS) tree is a fundamental data structure for solving various graph problems. The classical algorithm [SIAMCOMP74] for building a DFS tree requires O(m+n) time for a given undirected graph G having n vertices and m edges. Recently, Baswana et al. [SODA16] presented a simple algorithm for updating the DFS tree of an undirected graph after an edge/vertex update in O (n) time. However, their algorithm is strictly sequential. We present an algorithm achieving similar bounds, that can be adopted easily to the parallel environment. In the parallel environment, a DFS tree can be computed from scratch using O(m) processors in expected O (1) time [SICOMP90] on an EREW PRAM, whereas the best deterministic algorithm takes O (√n) time [SIAMCOMP90,JAL93] on a CRCW PRAM. Our algorithm can be used to develop optimal (upto polylog n factors) deterministic algorithms for maintaining fully dynamic DFS and fault tolerant DFS, of an undirected graph.\n 1- Parallel Fully Dynamic DFS - Given any arbitrary online sequence of vertex or edge updates, we can maintain a DFS tree of an undirected graph in O (1) time per update using m processors on an EREW PRAM.\n 2- Parallel Fault tolerant DFS - An undirected graph can be preprocessed to build a data structure of size O(m) such that for a set of k updates (where k is constant) in the graph, a DFS tree of the updated graph can be computed in O (1) time using n processors on an EREW PRAM. For constant k, this is also work optimal (upto polylog n factors)\n Moreover, our fully dynamic DFS algorithm provides, in a seamless manner, nearly optimal (upto polylog n factors) algorithms for maintaining a DFS tree in the semi-streaming environment and a restricted distributed model. These are the first parallel, semi-streaming and distributed algorithms for maintaining a DFS tree in the dynamic setting.", "pdfUrls": [ "http://arxiv.org/abs/1705.03637", "https://export.arxiv.org/pdf/1705.03637", "http://doi.acm.org/10.1145/3087556.3087576", "https://arxiv.org/pdf/1705.03637v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5947017313f56af69cea1796b6501f60fcaf8e74", "sources": [ "DBLP" ], "title": "Near Optimal Parallel Algorithms for Dynamic DFS in Undirected Graphs", "venue": "SPAA", "year": 2017 }, "596df6fb4d50c7886948b08f525c4e3393d05a44": { "authors": [ { "ids": [ "1715454" ], "name": "Norman P. Jouppi" }, { "ids": [ "39660914" ], "name": "Cliff Young" }, { "ids": [ "2020474" ], "name": "Nishant Patil" }, { "ids": [ "2861543" ], "name": "David Patterson" }, { "ids": [ "7012376" ], "name": "Gaurav Agrawal" }, { "ids": [ "36843389" ], "name": "Raminder Bajwa" }, { "ids": [ "4823686" ], "name": "Sarah Bates" }, { "ids": [ "6287510" ], "name": "Suresh Bhatia" }, { "ids": [ "14950646" ], "name": "Nan Boden" }, { "ids": [ "34093825" ], "name": "Al Borchers" }, { "ids": [ "2230969" ], "name": "Rick Boyle" }, { "ids": [ "39717385" ], "name": "Pierre-luc Cantin" }, { "ids": [ "10599460" ], "name": "Clifford Chao" }, { "ids": [ "2157243" ], "name": "Chris Clark" }, { "ids": [ "10798325" ], "name": "Jeremy Coriell" }, { "ids": [ "40504585" ], "name": "Mike Daley" }, { "ids": [ "10745330" ], "name": "Matt Dau" }, { "ids": [ "1796515" ], "name": "Jeffrey Dean" }, { "ids": [ "8732659" ], "name": "Ben Gelb" }, { "ids": [ "10750186" ], "name": "Tara Vazir Ghaemmaghami" }, { "ids": [ "40301682" ], "name": "Rajendra Gottipati" }, { "ids": [ "10703669" ], "name": "William Gulland" }, { "ids": [ "39066904" ], "name": "Robert Hagmann" }, { "ids": [ "40582433" ], "name": "C. Richard Ho" }, { "ids": [ "9134643" ], "name": "Doug Hogberg" }, { "ids": [ "2805999" ], "name": "John Hu" }, { "ids": [ "1701671" ], "name": "Robert Hundt" }, { "ids": [ "40535278" ], "name": "Dan Hurt" }, { "ids": [ "2475342" ], "name": "Julian Ibarz" }, { "ids": [ "10433955" ], "name": "Aaron Jaffey" }, { "ids": [ "40449051" ], "name": "Alek Jaworski" }, { "ids": [ "29793267" ], "name": "Alexander Kaplan" }, { "ids": [ "10715238" ], "name": "Harshit Khaitan" }, { "ids": [ "35002044" ], "name": "Daniel Killebrew" }, { "ids": [ "35738359" ], "name": "Andy Koch" }, { "ids": [ "1715147" ], "name": "Naveen Kumar" }, { "ids": [ "40476752" ], "name": "Steve Lacy" }, { "ids": [ "2926266" ], "name": "James Laudon" }, { "ids": [ "38508422" ], "name": "James Law" }, { "ids": [ "32249701" ], "name": "Diemthu Le" }, { "ids": [ "3362246" ], "name": "Chris Leary" }, { "ids": [ "8847026" ], "name": "Zhuyuan Liu" }, { "ids": [ "3162843" ], "name": "Kyle Lucke" }, { "ids": [ "9620331" ], "name": "Alan Lundin" }, { "ids": [ "10803508" ], "name": "Gordon MacKean" }, { "ids": [ "1973715" ], "name": "Adriana Maggiore" }, { "ids": [ "34457217" ], "name": "Maire Mahony" }, { "ids": [ "33731706" ], "name": "Kieran Miller" }, { "ids": [ "32445120" ], "name": "Rahul Nagarajan" }, { "ids": [ "34602886" ], "name": "Ravi Narayanaswami" }, { "ids": [ "39761104" ], "name": "Ray Ni" }, { "ids": [ "13805826" ], "name": "Kathy Nix" }, { "ids": [ "3222376" ], "name": "Thomas Norrie" }, { "ids": [ "3175815" ], "name": "Mark Omernick" }, { "ids": [ "10686924" ], "name": "Narayana Penukonda" }, { "ids": [ "35743099" ], "name": "Andy Phelps" }, { "ids": [ "14022616" ], "name": "Jonathan Ross" }, { "ids": [ "2141029" ], "name": "Matt Ross" }, { "ids": [ "40405690" ], "name": "Amir Salek" }, { "ids": [ "9364218" ], "name": "Emad Samadiani" }, { "ids": [ "2219487" ], "name": "Chris Severn" }, { "ids": [ "2473816" ], "name": "Gregory Sizikov" }, { "ids": [ "10665586" ], "name": "Matthew Snelham" }, { "ids": [ "10692866" ], "name": "Jed Souter" }, { "ids": [ "39566019" ], "name": "Dan Steinberg" }, { "ids": [ "10779387" ], "name": "Andy Swing" }, { "ids": [ "1918916" ], "name": "Mercedes Tan" }, { "ids": [ "3128076" ], "name": "Gregory Thorson" }, { "ids": [ "1998839" ], "name": "Bo Tian" }, { "ids": [ "5055423" ], "name": "Horia Toma" }, { "ids": [ "35916116" ], "name": "Erick Tuttle" }, { "ids": [ "38062095" ], "name": "Vijay Vasudevan" }, { "ids": [ "34922221" ], "name": "Richard Walter" }, { "ids": [ "2935275" ], "name": "Walter Wang" }, { "ids": [ "1925527" ], "name": "Eric Wilcox" }, { "ids": [ "1712426" ], "name": "Doe Hyun Yoon" } ], "doi": "10.1145/3079856.3080246", "doiUrl": "https://doi.org/10.1145/3079856.3080246", "entities": [ "8-bit", "Application-specific integrated circuit", "Artificial neural network", "Big memory", "COMEFROM", "Central processing unit", "DEC Text Processing Utility", "Data center", "Domain-specific language", "Graphics processing unit", "Haswell (microarchitecture)", "High- and low-level", "Matrix multiplication", "Mebibyte", "Profiling (computer programming)", "Substitution model", "TOPS (Nortel)", "Tensor processing unit", "TensorFlow", "Throughput" ], "id": "596df6fb4d50c7886948b08f525c4e3393d05a44", "inCitations": [ "b450788cf6570e1d4fa3c5bf37e191c037514aaa", "232e641a8b5f550c436af6336ee63e1cd771e073", "8773e9c47e4dd6e34eda5c65a894a9668a5d7ca7", "d251e2b4ddf95d84c2ff3d5eef01311ec1a323c4", "17908d30454d8b9412587876e7931a145f0ff1fc", "6cae9ad284a73471a8ed9e483b1673a60d61d946", "0c6b249d77e998068184e52a2d7fa7a5a867e12f", "dc613690d3704d374f6147d3eb1011a64b7f3e12", "08a0c1f5f2b7c91d81e28f896c2a001d58975014", "e9908fbf95834a81b99473277b9298cc54a5c471", "c12407245a40837b2a243a8c56e9ca9519c16113", "65c302fc5eedfb33824ef18879eb53cc0327ea41", "378ebec1d7df2483640b408a941ddbd4a05079a4", "0265fc80ae6cf18bda737d9e111f2e6f03897765", "c07869749db6ab8853dd58dbeb2395991d8b568f", "25a6693df14173513a8035e1a5e9fab6cdf05184", "bf5a2bed3a98f6dc093460a6592a5d1b99a60ae5", "b8e8d87107d8f7c22cc5c3c8ee1bf21e9f7ae931", "13fa2471ba3da92e473d95c30503b35c96fdb7c5", "1fd52a3751ad21033c107bc0930f26940fd0851b", "a2d19828c435a48aaa0b9c2a08112f6a023b2df9", "995226e0bfeceaed4aefd034581d5f5519bf1002", "0846cc671b4b963f17f38ec14a3b07ccfdd0b98c", "25a2f546a083a8108ff911c6f57fea0a6b739a0b", "bcdac3dec681a986b2d9279567aee367949bfbfd", "71f7b20f85a8230962b915fe621ec92aa10bb418", "bb82414678ab060e28353aa56444560efda3adde", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "fc36845d737d231bffa0397ecaffde2f926b8d54", "524c8625a1e0f103be7829eb88ab4abfe380b447", "081fdeea36d4b56a71e87b5b0de191aa368261c8", "29d80ba32a2fbe36aab8d4424802039d8be4f370", "f3a7e1b03649f22340c767d6e2e92dc43c7fe963", "01d7b1187d8593983181d18c357ffbed9c6ac8ac", "8af899f6b7304378b49a624252c0c0fae327a3e7", "1f81d7aca07546677c2e45c001034891f6ee978d", "135614db311313e4b12fc2cfec11c1231441f034", "fa0412fc819fce2468a65b65a2820247c2776760", "a98744c24f589294d2b0910888d5acf621fb18d2", "34dcfb1ecef8faf9c24e71cc84eebdb97e966cae", "6a8fb5989b3fb290ac0a654895aad6ff8601c7ab", "9d0d3d3ef79c9e99bb9ea5c16d4cdf755a64df75", "edd820f44464c636ff32ca50381dacb4d540db37", "3ad0ef33ebddc26ca3df92a3677fd30a705e1ef3", "3a7509a72a01dae5e17f1405ab3d18e1e2fd8157", "6360c75a753a0a29c4cd194f11b0f939b78e0f0a", "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "0b6dcfa9b829bb6591c55a8c09ceb6c8a2a6b40e", "57f677d48624ba54295f150872beeaef2c8d478b", "36419744735ada4b0d6253a28b3adc886405d2f8", "d75c48758a29f93182cd886e7ff541e94f8229c8", "2d0410efd86c335c9a45fee4d754614fca4d8547", "0d561187be02ccf7905c0d2376796b5814e96a6c", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "12b31a84d5e8e26972131b221a3bb6725e92bd24", "361c9ba853c7d69058ddc0f32cdbe94fbc2166d5", "257da85f487aba9b45ebdffb70dbd41cbf14423c", "e0bcd04962a1de6f4c654e9034ebd08c963292be", "2465db685bd5694dc00e8ea9d80612aa2ebf7708", "144f24c7c0e0405bd9e36cdf13ddd3045e4ad2b2", "2512a6ced085503c399ee512ecaeb88606081261", "7c155c8082079f27ef9302d8afb0680f5cfafd0a", "e176b3ffd5dbeeb90f5ddc4d125e3f72d5f6f146", "5ec3bb36c3515c798e631dd2a73268fddbd3b2ee", "8033f293c894eae64c9f379dee2192bfe4f7883a", "220cff5a25fdbcb0ffa9bec2c2d111d1d8a3d723", "333ae8884a77f1fa0e68dd1ebdccdd6687a62aed", "95306e01f278ecd46f56dad23be040c33db43f3d", "fec60b9224e302d79cdf5c150edb15a841c4d6f1", "2a01b9536b048dee97de03963115efa47959d827", "1bd049c431a3b763cfba63963435f6c91465cb35", "6ff08854494ec866510cbb23fb0e18c1f977007e", "373a1c02be9b68369120e059d3c75d1c889153ab", "1a7f85a696afd4232b3d0eef81bc57989b8d82cc", "284fc0fb10b24bfad1b31333aea2ac82a8f154b7", "108d3e6081695a0901a36c62dc4a17d9a93d0110", "338a69b6f7d6a14bda690476efd462319ab258cd", "518e087d5fe54e7de71bc2ca55e7d58f179d5997", "7393bf444b14979c9eedf018132721caa53692b1", "391a6a423e06b0767e9fc9df4f43c5533c0ab662", "343d9c74d1465f884039b2145ed2fcc6bfcc26ca", "e61e2222c6dbd2be9c67c1c5eb41d99df44d81fe", "756a36d4d2eba006dbde8ee44f6930279e05e428", "5a831b0846fc8fbaf1befa27a3f91ed89f865133", "62693b41e40c327f8edec24789ed2eaa6a3dfb2d", "2205f353648f9ef13010d0f117cc34103d98f01f", "00512e62caa37e1f9c2025666133b8973a291b1e", "b4a90ee68b5391ede55e8d0695a04631df7ece76", "a043ccaae773ec54332d56b99a1b6d4c3b5394ec", "13d1bbd4c6b1d376f337a7d99d7cc66a9d3e0c4d", "942bb63e78d9edfe3b8d0a4bf9a3511c736a6930", "f2cf9f651a7546d4670e15e8e504572c635c0059", "0a465c15bebccd1500718548b18800fd3c463ed0", "bf92d7784a687b1aab20a0de2679498af641ab18", "5e8e46557e42940274e548246680c785eb729db2", "381e7525bc8b9d47ae0343e471f5f1d5e6963bbe", "9c03db9ad53be4862625256a24f56cc7b0a79c23", "1cc0b5aaa294e56c6d20ec672ad7f89972227ff3", "8e62c88cee326dc69fdb0d77b3a02d687e65395e", "8f6ccf34db0782e9790ecb66189842e38f94799a", "0f3056a84ea59a9b976163f28002401fa88ba80f", "222e5a12b9cfd041c9b97650b629ee34f938c99a", "3d80f420b87bf16eabac6142275e71bf48aa61a5", "ca4cddac342217e3c0143cb7f88daf1f50033c69", "4ba6f3a8c723c153bc64c12bd9112a079f97156b", "066478b5698300b5fd8c5c1ea0be81125b1296ad", "2170e2e54b79ddf8696607d9148eb529d0af1bf1", "4ff1908f41a684cd50c82e3b612432dd3154a718", "0ff00b7069e90ebdc7e14d1d287b5fd0d1edf198", "5cf79e6a2852335cd4174576e63ed8a6a718e5da", "2fb34088c68ba727eee0d18e7c4ae3ee13c6ab07", "22624bf589d08be80e2da89e4df3d0cd980a2184", "6f5cbb1030ead391b9bab2c5450ec12b56443367", "048b7767052c4239fc56d4480f1969cb3f09cc21", "dfa37e1ad351ae889502fb704cc93f4a77c1c642", "c79c373972c03158538bdc3e96f845c8640a2529", "912760092c200d1151900ff0e34acb113de43c3c", "5c49a25005494f4523b3aa34856f00d12cfd11aa", "725379eec0e1ed0fcce61ee2ad0ec3c1f5490b1c", "d0a96d4973388e98ddf10b11b67afc89d0f4c9e2", "d1be7f6de75dbe350d8d45bb0997e294fd58a985", "db4ade9e5f020960a4e5295d8f0031998c9c13ae", "425e0440166d14528153139620374928f8e7a93f", "ba2d406ad1c3584cab75d792ab5e9a914d9918db", "0b2502f06b184942e27a64e5c82b5aa1d2f0539f", "ed444042ba385c24adb4ef53f038c2a0b935f336", "4675d7ca2f35bfb9b48c3728499d91768d71990e", "9f5276bf69e450505a65d9ef3334ebffef45e73a", "983333d699242dfc221ff332081f31d8a18ae179", "5bcf27ab86be9fa376237d2d2bd8ebbf52982088", "968c1a89472a32e00d617a478c561b9cd2187c5b", "2390b9be19c8078d0ccb3628257a91cc3efc9e9a", "2e52465140904537d9b70194ca8659549550629b", "540746504cfe51a146762cbbca06cbc03229c778", "3f1c5b99816d05ef3778f53dafaa8a6becad9226", "32752285cb8898d105c4d757a91ed0cfe466c27c", "d0556be65e8564ab8bb3e26b6a0146a62027bc40", "9a7e0049decb706b7ab73e81dc31fd5c5a2de246", "06a733f70b90cf93c9b292b7f4d7a6a37db6df94", "40804e259e2bc16b8c95f00754dbdfae0bea9bf3", "26454b033fbe554436a20317edf78a7eafbfc6d6", "c83a648a2aec9532ef0f7e07758645b6f53f10ad", "1cafc9976a57e7989faa05b3a534149e0da8f078", "43858449adb8771876583ec82fa489e738a5e49d", "5e035543cd022a054653d9d09b6efec54905c444", "73184cdab2f1932435ec243880679b63e48f45af", "191c05aab25b4dd3752c1300f7c8fa6999d3a627", "b146108bc5fe9c8ad9c54bd08fe09163dc8606a1", "a348364681597ba41d68d23f5ba64bc536ab3827", "38fb1902c6a2ab4f767d4532b28a92473ea737aa", "a282cc482a1ca8cb3b22e286c3599f3fe0ecf0cd", "56257b0804c9c2418b32337d3af0970f7b67b084" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "1-12", "journalVolume": "", "outCitations": [ "49b4094f2c313a92da4461572c0bef80b0d7d649", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0f44833eb9047158221e7b3128cde1347b58ccd6", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "06ce77e4abea63948580340be25d7f2a80369e5a", "23383d4d72b088a02e5bee13af652cf1458380bc", "47b0d6c0c40d6ec07cc63794df13bfcae47668d6", "668b882cf6026f6986b31dd6e99321aa03586cb0", "812c795ce4797b718a2947a9f9bdc5b6965c2b29", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "3296a866a88f6be8f9354695cc7a098596f04253", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "21d7130230162af2a4cc1b9375bfe9b37dbbd499", "1f76b7b071f3e65c97d09720f88d6b0ad9f07e8f", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "5142c8273e0273230a10d836cdb26b029dc5cef5", "05bec301281a7dab3c5c7bc64c1d60ee18940acd" ], "paperAbstract": "Many architects believe that major improvements in cost-energy-performance must now come from domain-specific hardware. This paper evaluates a custom ASIC---called a Tensor Processing Unit (TPU) --- deployed in datacenters since 2015 that accelerates the inference phase of neural networks (NN). The heart of the TPU is a 65,536 8-bit MAC matrix multiply unit that offers a peak throughput of 92 TeraOps/second (TOPS) and a large (28 MiB) software-managed on-chip memory. The TPU's deterministic execution model is a better match to the 99th-percentile response-time requirement of our NN applications than are the time-varying optimizations of CPUs and GPUs that help average throughput more than guaranteed latency. The lack of such features helps explain why, despite having myriad MACs and a big memory, the TPU is relatively small and low power. We compare the TPU to a server-class Intel Haswell CPU and an Nvidia K80 GPU, which are contemporaries deployed in the same datacenters. Our workload, written in the high-level TensorFlow framework, uses production NN applications (MLPs, CNNs, and LSTMs) that represent 95% of our datacenters' NN inference demand. Despite low utilization for some applications, the TPU is on average about 15X -- 30X faster than its contemporary GPU or CPU, with TOPS/Watt about 30X -- 80X higher. Moreover, using the CPU's GDDR5 memory in the TPU would triple achieved TOPS and raise TOPS/Watt to nearly 70X the GPU and 200X the CPU.", "pdfUrls": [ "https://www.systems.ethz.ch/sites/default/files/hadp2017-ali_smesseim.pdf", "http://www.cs.toronto.edu/~pekhimenko/courses/csc2231-f17/Papers/tpu.pdf", "http://doi.acm.org/10.1145/3079856.3080246", "http://arxiv.org/abs/1704.04760", "https://arxiv.org/pdf/1704.04760v1.pdf", "http://www.eecg.toronto.edu/~moshovos/000/lib/exe/fetch.php?media=wiki:aca2017:tpu.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/596df6fb4d50c7886948b08f525c4e3393d05a44", "sources": [ "DBLP" ], "title": "In-datacenter performance analysis of a tensor processing unit", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "598800d4407f1b1f85c8af437574ea9271076e51": { "authors": [ { "ids": [ "3490923" ], "name": "Milad Nasr" }, { "ids": [ "34751507" ], "name": "Hadi Zolfaghari" }, { "ids": [ "1972973" ], "name": "Amir Houmansadr" } ], "doi": "10.1145/3133956.3134075", "doiUrl": "https://doi.org/10.1145/3133956.3134075", "entities": [ "Autonomous system (Internet)", "Border Gateway Protocol", "Censoring (statistics)", "Downstream (software development)", "Internet censorship circumvention", "Routing", "Simulation", "Telephone number", "Traffic analysis", "Waterfall model" ], "id": "598800d4407f1b1f85c8af437574ea9271076e51", "inCitations": [ "1269dc6d8d74f161be003c23880442cb8728ff35" ], "journalName": "", "journalPages": "2037-2052", "journalVolume": "", "outCitations": [ "1b2a56eaa107cb5ca31be45885084feec1953316", "0c48d5e7fbb256f694383e989b45126947bd0501", "1708eba3482a2bc755f405ef9446914f82a321ad", "705b36626cb7b403344411eebd1473f9382cec07", "605ed83a6d1f4eaf995e85830f373923b11d6c13", "1ff9b151019648eaea901ee3c2b795e921358b21", "05216a1c4507464bd777ccc6a2a30ec44462ec7b", "34e4016ad6362dc1f85c4a755b2b47c4795fbe4d", "2581753d78fcafb4023d4ed24af2df3eba831d8f", "099d12674f378461b54c4472d36f0c867502f338", "542cf75c54a7ba6ca987253f6afc3697b715db24", "11e24cc816f9b874ad04a6e2618e454893d063e3", "2ed69f9aa374af4113f937df1482d9731911d511", "0fa37b92444d8fcbef150470226e216bce15e3a8", "9e414908a7d478d2cc6406810df8eee7eef5cf24", "3b5c6faa99a454499e33d87cfaef9dfcb0d7b796", "3aa79ac04db48c7dd9b4b42f8f9c5d0e4c4a73ba", "187fb09496d6b9cd4141a0917e365de28b4fc0c8", "0af0b8fdde60e8237e55c301007565c53b4d0f1f", "c6f506127aec443d477fde35714cf96f9ff33b39", "54cb447bc7bad09660cdbcf53a956b88cc618053", "281789e224b09970cd25ee988a6f6f898c629bb8", "2e6c55bc293681e65ba6741b28f99002809734be", "2b04ae25edd44fd663687024ba4beda5025e13cf", "0bdf027fee19b67357dc94b5c76049251ff70e75", "00a9446982911cbd96a127f70976d39ecaaaf306", "5b98f19ce2331c228a6877c4c54301e20165f3d1", "4f7d8cf8857ccc0f4a47ba03ec56f25fe5a0af57", "510e0ef071d8c305aaa4d83ce1608476c8c32712", "1e7ddb41095be915ec28bc85d14305df3d02a445", "49cd1030cd8f98b6ec7545750c78d580ca80a43d", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "04c2551de11d386ce6416c26f12bd9b428ac9d7e", "42abcc59847c85c38d3b4afd8a99cd02c9233083", "5d121afe922953dd88407d7cc49bdfb4b1f42be5", "2306fcffb543437059eb2d26d2953c303816911e" ], "paperAbstract": "Decoy routing is an emerging approach for censorship circumvention in which circumvention is implemented with help from a number of volunteer Internet autonomous systems, called decoy ASes. Recent studies on decoy routing consider all decoy routing systems to be susceptible to a fundamental attack -- regardless of their specific designs--in which the censors re-route traffic around decoy ASes, thereby preventing censored users from using such systems. In this paper, we propose a new architecture for decoy routing that, by design, is significantly stronger to rerouting attacks compared to all previous designs. Unlike previous designs, our new architecture operates decoy routers only on the downstream traffic of the censored users; therefore we call it downstream-only decoy routing. As we demonstrate through Internet-scale BGP simulations, downstream-only decoy routing offers significantly stronger resistance to rerouting attacks, which is intuitively because a (censoring) ISP has much less control on the downstream BGP routes of its traffic.\n Designing a downstream-only decoy routing system is a challenging engineering problem since decoy routers do not intercept the upstream traffic of censored users. We design the first downstream-only decoy routing system, called Waterfall, by devising unique covert communication mechanisms. We also use various techniques to make our Waterfall implementation resistant to traffic analysis attacks.\n We believe that downstream-only decoy routing is a significant step towards making decoy routing systems practical. This is because a downstream-only decoy routing system can be deployed using a significantly smaller number of volunteer ASes, given a target resistance to rerouting attacks. For instance, we show that a Waterfall implementation with only a single decoy AS is as resistant to routing attacks (against China) as a traditional decoy system (e.g., Telex) with 53 decoy ASes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134075", "https://www.freehaven.net/anonbib/cache/decoy-ccs2017.pdf", "http://people.cs.umass.edu/~amir/papers/CCS17-Waterfall.pdf", "https://people.cs.umass.edu/~milad/papers/decoyrouter.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/598800d4407f1b1f85c8af437574ea9271076e51", "sources": [ "DBLP" ], "title": "The Waterfall of Liberty: Decoy Routing Circumvention that Resists Routing Attacks", "venue": "CCS", "year": 2017 }, "59af95c9bbaeb03a43a994d26451f32f4436e0e3": { "authors": [ { "ids": [ "2669515" ], "name": "Zhengbao Jiang" }, { "ids": [ "2259699" ], "name": "Ji-Rong Wen" }, { "ids": [ "1897235" ], "name": "Zhicheng Dou" }, { "ids": [ "2542603" ], "name": "Wayne Xin Zhao" }, { "ids": [ "1807485" ], "name": "Jian-Yun Nie" }, { "ids": [ "32916805" ], "name": "Ming Yue" } ], "doi": "10.1145/3077136.3080805", "doiUrl": "https://doi.org/10.1145/3077136.3080805", "entities": [ "Artificial neural network", "Convolutional neural network", "Diversification (finance)", "Document Object Model", "Experiment", "FITS", "Information needs", "Meet-me room", "Recurrent neural network", "Relevance", "Relevance feedback", "Supervised learning", "Unsupervised learning" ], "id": "59af95c9bbaeb03a43a994d26451f32f4436e0e3", "inCitations": [], "journalName": "", "journalPages": "545-554", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "ac94075098f25c1935d95cfd78b83ea77e928217", "25f0625a92f6054b11057423111f9285c78376fe", "a3a29254936dde43dec98404956d87d8503ddd1e", "50d53cc562225549457cbc782546bfbe1ac6f0cf", "60b249993cc7245969c6ed16bd0c233593c88cfe", "071b16f25117fb6133480c6259227d54fc2a5ea0", "3c452211d3cea26b58b46fefc5c63edb9a0e7b8e", "1b50c8353d9e4a241a5f4a9c0088eb9c5f593e1f", "ce5b0b9a74c1cbe8aa892d0ce6a731e79bff26e5", "1510cf4b8abea80b9f352325ca4c132887de21a0", "ded1fa5432e066c3a9f998bd52c6ed736068c582", "1b1df9f75ee6f27433687dad302387f811dab64d", "15004aadabd967ac722a28a9c3bb39cf5bc32605", "10ce81dadc2e07d69c8a4f0bbdf7d14b3f37882e", "76ca27cdc51e392919e1ebaea543a587ea9a9950", "10205ce087b9190ac18ade8be02a660d92a6ea52", "e3796f39fe2623823a5d48dee2822da9502561c5", "5b9534442f91a87022427b74bca9fd95dd045383", "e192383afbced65cd2babb8d7c1ffc159d0b78a4", "452f7411af7d471dd3ba84c2b06b2aaffc38cdb9", "14f936f5e2e066b36cb7a18ca35099034d391e95", "85b055c4fc2e324e88cdfe1c20e557a33b940d7e", "0633379792f0cfe3365ae5421f3f32d3b9192a52", "40e639954d38e6f7f5cd07a8c1a4c48512d8b199", "900874be4d48f919434a3107d959e57a55bc286c", "09a503095db2d68b439e48d67481399198ed0e5b", "31686da7fd747887672f9f5f4877b93f496c8cf0", "684be9e9bd41d148158c64ba811c08f66b58092a", "517a461a8839733e34c9025154de3d6275543642", "ed5d79e21beccbc3a978ddd2cbd64a9e62df8499", "24f896e584033e5aa9a20327e25de1f387c3f430", "f040f11a89a57eaec99773770a43d886a5efd22c", "65d9ed718c4e86dfb9ac81c298659ed30b02e174" ], "paperAbstract": "Search result diversification aims to retrieve diverse results to satisfy as many different information needs as possible. Supervised methods have been proposed recently to learn ranking functions and they have been shown to produce superior results to unsupervised methods. However, these methods use implicit approaches based on the principle of Maximal Marginal Relevance (MMR). In this paper, we propose a learning framework for explicit result diversification where subtopics are explicitly modeled. Based on the information contained in the sequence of selected documents, we use attention mechanism to capture the subtopics to be focused on while selecting the next document, which naturally fits our task of document selection for diversification. The framework is implemented using recurrent neural networks and max-pooling which combine distributed representations and traditional relevance features. Our experiments show that the proposed method significantly outperforms all the existing methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080805", "http://rali.iro.umontreal.ca/rali/sites/default/files/publis/dssa.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/59af95c9bbaeb03a43a994d26451f32f4436e0e3", "sources": [ "DBLP" ], "title": "Learning to Diversify Search Results via Subtopic Attention", "venue": "SIGIR", "year": 2017 }, "59afbb0417ffe1a7fdf1cf4cb137321dc3b87317": { "authors": [ { "ids": [ "35002050" ], "name": "Nadav Amit" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Linux", "Linux", "Memory-mapped I/O", "Operating system", "Scalability", "Translation lookaside buffer" ], "id": "59afbb0417ffe1a7fdf1cf4cb137321dc3b87317", "inCitations": [ "044f5a9c7b571f42cb47c7bc82a2aeb9752002f1", "a4710ac80826e48a410b1b9da80c2ca0f4a6a357" ], "journalName": "", "journalPages": "27-39", "journalVolume": "", "outCitations": [ "12b795dd078f0c19f313325380fb25655cae796d", "274e7e576534b3e091f09e801cce807f5fd221c1", "0a2402b036499d4f1d33d56e7f66d94ec81add93", "59ca42e1911be417863d0f7068b89e1e59189cc9", "c65463414285addf1b030d60c3244633767d901e", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "3890d0d9335b4a39a9fc163af28f94896adfad20", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "de5bd35339e5692002a77145d8b861940429ad77", "be337425916d4e61442269a9bc1cf69169cedb8d", "a869cba9779039e14b0e8b1a14a28d38f2a7d7f9", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "4acdb61098053f38d5500a9ef974d24828696b9d", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "4f7d3fadb180689e8ea2fa48810b6e5be8aa438b", "94783d113951822195d4ba44599a8fcbdef9d4bf", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "158ebe313a72857c5534a313f3ec0e413593b732", "d3f0d34414a5bd55eee8b762ecd5efe54657aeef", "c121933395049cca10dc9d3d5ea72a0b8c5bde22", "43393a561914f05be312a1dff5a757cbc384d1a1", "102df10591f98830cc3357b47729d6f9e9af3eca", "af2db9f221a472c0bb97260342b8e361778114b3", "2608db8056e1598cf0b0bce8c2e305c3735a7bbe", "2201c6e88e990150fda976fa85d0a55521b792dc", "1bed30d161683d279780aee34619f94a860fa973" ], "paperAbstract": "The operating system is tasked with maintaining the coherency of per-core TLBs, necessitating costly synchronization operations, notably to invalidate stale mappings. As core-counts increase, the overhead of TLB synchronization likewise increases and hinders scalability, whereas existing software optimizations that attempt to alleviate the problem (like batching) are lacking. We address this problem by revising the TLB synchronization subsystem. We introduce several techniques that detect cases whereby soon-to-be invalidated mappings are cached by only one TLB or not cached at all, allowing us to entirely avoid the cost of synchronization. In contrast to existing optimizations, our approach leverages hardware page access tracking. We implement our techniques in Linux and find that they reduce the number of TLB invalidations by up to 98% on average and thus improve performance by up to 78%. Evaluations show that while our techniques may introduce overheads of up to 9% when memory mappings are never removed, these overheads can be avoided by simple hardware enhancements.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_amit.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-amit.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/amit" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a3df/1b5681159b2202dbc7fc3e47cffb186480d9.pdf", "s2Url": "https://semanticscholar.org/paper/59afbb0417ffe1a7fdf1cf4cb137321dc3b87317", "sources": [ "DBLP" ], "title": "Optimizing the TLB Shootdown Algorithm with Page Access Tracking", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "59b0e938a489511a6fe1021f79808ab917911f9c": { "authors": [ { "ids": [ "8297244" ], "name": "Mustafa Abdul Jabbar" }, { "ids": [ "3180418" ], "name": "George S. Markomanolis" }, { "ids": [ "3083117" ], "name": "Huda Ibeid" }, { "ids": [ "2274654" ], "name": "Rio Yokota" }, { "ids": [ "10867102" ], "name": "David Keyes" } ], "doi": "10.1007/978-3-319-58667-0_5", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_5", "entities": [ "Algorithm", "Fast multipole method", "Recursion", "Remote direct memory access", "Scalability", "Space-filling curve", "Sparse matrix" ], "id": "59b0e938a489511a6fe1021f79808ab917911f9c", "inCitations": [ "f92991bab11e7cec59576305e6fd44f44e11aff7", "48e6c7035b35a3ee8b8c2e430c158bfd7102a2fe", "75e5df89fa0b9741a6f2b71f476f26d8aba2e24e" ], "journalName": "", "journalPages": "79-96", "journalVolume": "", "outCitations": [ "044c2e4b6ee04fbc1b5838afd8583dc3d11e8222", "d2652adc1e298877cfeddf93adbf5019364b3c99", "3eb61d9182600f9402b17a1269eba9152f13ada3", "6a110a740a2255f13bf27d199c6aa3a1789b9113", "4b0b00804d6e57574dd5e12c0a1442ace392c58d", "258e8729c918f01644a3f8d9f40aa23d9bea0130", "014fd14d7ec998c5376c420c50abcb852d5924e0", "2773ae9947e2f6cb58152824eca994434f1b8322", "6d211d401b1d13eb054efd13b267f19e4c742aee", "212f6bcfb4c1dff1636c29924251ffb20863cb41", "66279f9b7a5123d623b8726567a7ce95b7d83d9c", "2fe1c7a9be48f8ea8feaabf6d0ddd4ae0cee40ba", "21e05bb43446475a2034a62dc8c67dfa368ea0d4", "9766d4ef963ed2711ff6f86388192bcefe2385de", "688384fc5e643445e835435e96b9dfcfb6598d36", "876b16e6d6f2650d794bd13eda37fd15b2010b84", "77e9cab2f965b970669052b634794eb19f377541", "60c4e253965df3bdde1df6450599457bd2c214d3", "aea49287b59299f3c33d29c412e368830da4b208", "4dd07b62a60f095fba853292280df8069508f591" ], "paperAbstract": "Reduction of communication and efficient partitioning are key issues for achieving scalability in hierarchical N -Body algorithms like FMM. In the present work, we propose four independent strategies to improve partitioning and reduce communication. First of all, we show that the conventional wisdom of using space-filling curve partitioning may not work well for boundary integral problems, which constitute about 50% of FMM\u2019s application user base. We propose an alternative method which modifies orthogonal recursive bisection to solve the cell-partition misalignment that has kept it from scaling previously. Secondly, we optimize the granularity of communication to find the optimal balance between a bulk-synchronous collective communication of the local essential tree and an RDMA per task per cell. Finally, we take the dynamic sparse data exchange proposed by Hoefler et al. [1] and extend it to a hierarchical sparse data exchange, which is demonstrated at scale to be faster than the MPI library\u2019s MPI Alltoallv that is commonly used.", "pdfUrls": [ "http://arxiv.org/abs/1702.05459", "https://doi.org/10.1007/978-3-319-58667-0_5", "https://arxiv.org/pdf/1702.05459v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/59b0/e938a489511a6fe1021f79808ab917911f9c.pdf", "s2Url": "https://semanticscholar.org/paper/59b0e938a489511a6fe1021f79808ab917911f9c", "sources": [ "DBLP" ], "title": "Communication Reducing Algorithms for Distributed Hierarchical N-Body Problems with Boundary Distributions", "venue": "ISC", "year": 2017 }, "59ca5027a9790f336fc1c3b785e9fbca97bad7c0": { "authors": [ { "ids": [ "3294888" ], "name": "Minyoung Jung" }, { "ids": [ "1727224" ], "name": "Jinwoo Park" }, { "ids": [ "3260423" ], "name": "Johann Blieberger" }, { "ids": [ "1894492" ], "name": "Bernd Burgstaller" } ], "doi": "10.1109/ICPP.2017.36", "doiUrl": "https://doi.org/10.1109/ICPP.2017.36", "entities": [ "Algorithm", "Automata theory", "Cache coherence", "Computer data storage", "Computer science", "Data dependency", "Data parallelism", "Deterministic finite automaton", "Feedback arc set", "Finite-state machine", "Hash function", "Hyper-threading", "In-memory database", "Locality of reference", "Network switch", "Non-blocking algorithm", "PROSITE", "Parallel computing", "Pattern matching", "Public key fingerprint", "SIMD", "Sales force management system", "Scalability", "Shared memory", "Thread-local storage", "Work stealing", "X86" ], "id": "59ca5027a9790f336fc1c3b785e9fbca97bad7c0", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "271-281", "journalVolume": "", "outCitations": [ "971301fb398a7ee0a43adbc60d8d2119c9625733", "45010247286ccd7df378b1ad959e08e5cbdbf5ed", "64b6ef88dcb71ca50e2b3b63330da9726f982503", "0764b4e6c5ad1c827c4a3a99f3ca2a3ca3d629da", "64a7478762b24ac84b5068754eb26609a61a1884", "4e17a91733892b69033076a04ba6cb150ab0b001", "4f55cddeca52ff55209658d0d4603c00bc283ba8", "d77746dd3f1dcc2f2cfb750f9847313cd4689a6d", "3ca09297ea549605c99a96daf8bc50b23cc54efc", "87de316ea08272afbda356b8d580385dd0d8382f", "80527e7595530951081494d1b98f3f13da3033a2", "33d679e9c3602d22bc1dc08c9c2700b963385e38", "05ca17ffa777f64991a8da04f2fd03880ac51236", "2243b9c47cf56ec17c922b5403e1459821838a7e", "5c592e3444f22ab68dd22e632dbe3aae656109dd", "0cff369abc1673194ea1e61999ad6c8cd1c8bc30", "5f74b8893fbe78e0d6e1d1f58d55b68feae170c6", "5750815fc3230623164fa3cd3a983b6e58bf64f4", "57f710a44ee2397c529865e8123ea3e2d43fb05c", "ffa7f39774aafd21bfd3ec9c0dfda87ec6f2276d", "20cab7d169bf64331d263bbd5f0dc7a07ae10a4a", "f6699544f79aaac77821bd4f6e6907d3abca5372", "3547ac839d02f6efe3f6f76a8289738a22528442", "1291dc27b5e569bfeae7c9d114eed350b31cb8b7", "518ca7ad0d047e4fb193c6b30b17391ce73e5e62", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "683109e0afa9f83c0775dcc6f107e0f358edfa10" ], "paperAbstract": "String pattern matching with finite automata (FAs) is a well-established method across many areas in computer science. Until now, data dependencies inherent in the pattern matching algorithm have hampered effective parallelization. To overcome the dependency-constraint between subsequent matching steps, simultaneous deterministic finite automata (SFAs) have been recently introduced. Although an SFA facilitates parallel FA matching, SFA construction itself is limited by the exponential state-growth problem, which makes sequential SFA construction intractable for all but the smallest problem sizes.In this paper, we propose several optimizations to leverage parallelism, improve cache and memory utilization and greatly reduce the processing steps required to construct an SFA. We introduce fingerprints and hashing for efficient comparisons of SFA states. Kernels of x86 SIMD-instructions facilitate cache-locality and leverage data-parallelism with the construction of SFA states. Our parallelization for shared-memory multicores employs lock-free synchronization to minimize cache-coherence overhead. Our dynamic work-partitioning scheme employs work-stealing with thread-local work-queues. The structural properties of FAs allow efficient compression of SFA states. Our construction algorithm dynamically switches to in-memory compression of SFA states for problem sizes which approach the main memory size limit of a given system.We evaluate our approach with patterns from the PROSITE protein database. We achieve speedups of up to 312x on a 64-core AMD system and 193x on a 44-core (88 hyperthreads) Intel system. Our SFA construction algorithm shows scalability on both evaluation platforms.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.36" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/59ca5027a9790f336fc1c3b785e9fbca97bad7c0", "sources": [ "DBLP" ], "title": "Parallel Construction of Simultaneous Deterministic Finite Automata on Shared-Memory Multicores", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "59e117f8e2a66557ebed691cafc0f1993b290fa3": { "authors": [ { "ids": [ "1723281" ], "name": "Lu Wang" }, { "ids": [ "1698571" ], "name": "Yan Li" }, { "ids": [ "5426025" ], "name": "Jiayu Zhou" }, { "ids": [ "39895985" ], "name": "Dongxiao Zhu" }, { "ids": [ "37513601" ], "name": "Jieping Ye" } ], "doi": "10.1109/ICDM.2017.58", "doiUrl": "https://doi.org/10.1109/ICDM.2017.58", "entities": [ "Algorithm", "Bridging (networking)", "Computer multitasking", "Convergence Insufficiency", "Multi-task learning", "Programming paradigm", "Type\u2013token distinction", "Whole genome sequencing" ], "id": "59e117f8e2a66557ebed691cafc0f1993b290fa3", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "485-494", "journalVolume": "", "outCitations": [ "53e28e1c1650133f1e78cb6d985ecf13530319e3", "00cdf25beacb9e46fc1d9a9d9a06d6b5f110168e", "155a9847645c6b56d199506dd388b2844fc2fece", "6efebd4cb8cb39da348e6818b94c680d3cfe198c", "c4806efffa95a727006d2d6284240f2c181f75ab", "aba73f6957bce1648f066935f7ea85a99119be5d", "e79731819ceb4bc0ae51b8e09533e0739e9d9d27", "3bedd45617f8d616d107552103bfc57ed69f6b62", "4f5e2d78128805249cff3075dc7e8e526f0e4fb1", "01cbff216f2888f96151fb490338af40a09a0c30", "00791f2d67f9a6fdb77b669450fa9f25c275fd56", "190adfa68b52edf338bee01391c6bfcab9db4da6", "202f4279436464a5c4b32a0a225a32537a60ef51", "161ffb54a3fdf0715b198bb57bd22f910242eb49", "2c3f808aa10f7d8d26ee5d52c41e2fd6f6a481de", "3a2f8879c4623f1d17d57e47a5516e13ef857e7a", "c8ff85a7cba5c6730096239297e28eafbf0e9f35", "23ecef232566a28dbfbc3fd033eb2c0c87057879", "4ba27b132755200d1ab273a39595d93ad91ec7df", "7e09f96834306e3112131802fc68bc04e2cf554a", "96c5dced4451db71e49a0089a0bb61cb933d5895", "695df73f2f4bca1e406622e8734e720332e4013b", "07d1db388cd489420d40d0edb13e074d86c77dbd", "c57cf2b956b4ec3171a7677767907654a2ddebb5", "65bfab252d56189b0b52f52ff9b800857b71e72f", "4867ede3f6b318d79ca7f72a719788a88fbfd82e", "3fb92efb2a9a075b5ed961b82cc561d71470d1c8", "5832ca2ee45e22c1609bc9834afcfaae53460a44" ], "paperAbstract": "Collecting labeling information of time-to-event analysis is naturally very time consuming, i.e., one has to wait for the occurrence of the event of interest, which may not always be observed for every instance. By taking advantage of censored instances, survival analysis methods internally consider more samples than standard regression methods, which partially alleviates this data insufficiency problem. Whereas most existing survival analysis models merely focus on a single survival prediction task, when there are multiple related survival prediction tasks, we may benefit from the tasks relatedness. Simultaneously learning multiple related tasks, multi-task learning (MTL) provides a paradigm to alleviate data insufficiency by bridging data from all tasks and improves generalization performance of all tasks involved. Even though MTL has been extensively studied, there is no existing work investigating MTL for survival analysis. In this paper, we propose a novel multi-task survival analysis framework that takes advantage of both censored instances and task relatedness. Specifically, based on two common used task relatedness assumptions, i.e., low-rank assumption and cluster structure assumption, we formulate two concrete models, COX-TRACE and COX-cCMTL, under the proposed framework, respectively. We develop efficient algorithms and demonstrate the performance of the proposed multi-task survival analysis models on the The Cancer Genome Atlas (TCGA) dataset. Our results show that the proposed approaches can significantly improve the prediction performance in survival analysis and can also discover some inherent relationships among different cancer types.", "pdfUrls": [ "http://www.cs.wayne.edu/dzhu/ICDM2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.58" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/59e117f8e2a66557ebed691cafc0f1993b290fa3", "sources": [ "DBLP" ], "title": "Multi-task Survival Analysis", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "59f9d3e4fb8e2c52a2381f6695b07f08918c2c21": { "authors": [ { "ids": [ "1750856" ], "name": "Michael Benedikt" }, { "ids": [ "40404585" ], "name": "George Konstantinidis" }, { "ids": [ "1785690" ], "name": "Giansalvatore Mecca" }, { "ids": [ "1703160" ], "name": "Boris Motik" }, { "ids": [ "1802817" ], "name": "Paolo Papotti" }, { "ids": [ "2292020" ], "name": "Donatello Santoro" }, { "ids": [ "2181980" ], "name": "Efthymia Tsamoura" } ], "doi": "10.1145/3034786.3034796", "doiUrl": "https://doi.org/10.1145/3034786.3034796", "entities": [ "Algorithm", "Benchmark (computing)", "Chase (algorithm)", "Experiment", "Sputter cleaning" ], "id": "59f9d3e4fb8e2c52a2381f6695b07f08918c2c21", "inCitations": [ "a59064f8a4cf2bb941040792c0e5378b78c2314b", "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f", "2482e8dd32ce3b00e5d70941be388c50b2475a60", "aca14454517490ff3820ebe2149146e5154bc58b", "3293396cd84eac710582ede8325e1a9185c6a9ef" ], "journalName": "", "journalPages": "37-52", "journalVolume": "", "outCitations": [ "f3bc6b44f1c5bfce479f5fdf72bd7a9f636c423b", "6a1b0907f052965e8447ce5c54454dad023e0068", "d33feb0daa252ad279dc74449860d14f17cbe61a", "511bf179e362f6b071f0a5fb329dece4f4e7ae8b", "8b3cb3d5dd580bcccd079edd9b47e20e45dfdec3", "065666f3f059b414285998b2dc12cac101879ade", "050fb3d6fb3eafb9d0095927a534fc55ccbe3218", "39483fea0eaf8eafbe171e826508aa713fcc1553", "96feadda4d77a2008a17c0687c19786803244c5b", "18dad09bbacdc6f0592b06b2c4a78bbee2b15d35", "4a981a091c81e73e22eb1923778e63698e0d00eb", "1b7c794a96b53dadc777aaee5a7dab59d302636a", "02b35db726b3f6ef9f6d1084cbb7d4ad0e44daba", "0e3ee9a7147f1bc4d1ab2cf7ce8808e6829be48b", "744eacc689e1be16de6ca1f386ea3088abacad49", "585681b8a7f941ab6bf5b34fa6437fc75f38966b", "84441100dc794fbd05f967824cf3ec769b985e41", "736914265062d8e118e4df97dcd8e011a3b5c863", "44254a445525585a87cab60bb7e5991ec0c43824", "da25e34ee7542f731e684bd2bf0c95b779335e44", "75b7da6af970b0c87999853d30dd54bf3b08fa77", "65a2d3abc36b939d1e2a59faca57befce31fda19", "b6a51df38bc2236037bfd3673815865631edb4e7", "7384777a404da199820e357ab6373182a9743129", "db10480323bd2b9aeb58268678c204053722d6ad", "9377d3c17d7b76294928b5249f8fa065ed19ea06", "2bc7bac7f7cdf20816758fd794909176cc97ed92", "eb329bd5697c2295f9c79cb3c5d46f57bd25f302", "119aa9289ed882fadb3093eaa17b4cc1e6dbf530", "1888bbaf6203ddfacffa4ff70f5e01a102c29895", "910105f020fb34daa183e7644d5213e7ab4dc350", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "3f2776d223f8265d2088f5523fd4c7824015040a", "353e9cd76026d1b5abf1631a5bcf74fbf111b2d5", "da025acf3ad60123e29690bf5f6c46b589c70330" ], "paperAbstract": "The chase is a family of algorithms used in a number of data management tasks, such as data exchange, answering queries under dependencies, query reformulation with constraints, and data cleaning. It is well established as a theoretical tool for understanding these tasks, and in addition a number of prototype systems have been developed. While individual chase-based systems and particular optimizations of the chase have been experimentally evaluated in the past, we provide the first comprehensive and publicly available benchmark---test infrastructure and a set of test scenarios---for evaluating chase implementations across a wide range of assumptions about the dependencies and the data. We used our benchmark to compare chase-based systems on data exchange and query answering tasks with one another, as well as with systems that can solve similar tasks developed in closely related communities. Our evaluation provided us with a number of new insights concerning the factors that impact the performance of chase implementations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034796", "https://www.cs.ox.ac.uk/boris.motik/pubs/bkmmpst17becnhmarking-chase.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/59f9d3e4fb8e2c52a2381f6695b07f08918c2c21", "sources": [ "DBLP" ], "title": "Benchmarking the Chase", "venue": "PODS", "year": 2017 }, "5a37b117426b1ff41411429fb5687fe8d045757a": { "authors": [ { "ids": [ "1782350" ], "name": "Fred Douglis" }, { "ids": [ "32331661" ], "name": "Abhinav Duggal" }, { "ids": [ "1816098" ], "name": "Philip Shilane" }, { "ids": [ "31934385" ], "name": "Tony Wong" }, { "ids": [ "9765570" ], "name": "Shiqin Yan" }, { "ids": [ "35031086" ], "name": "Fabiano C. Botelho" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Data deduplication", "Garbage collection (computer science)", "Initialization (programming)", "Liveness", "Run time (program lifecycle phase)", "Sequential access" ], "id": "5a37b117426b1ff41411429fb5687fe8d045757a", "inCitations": [ "f962e285194ec80f584ea25f77dbbe836fa0c63c", "18e74df64bdad475e585b165640c8db3dce00125" ], "journalName": "", "journalPages": "29-44", "journalVolume": "", "outCitations": [ "a48f6d8ef6d2831bf0fd2d720c20adb8ca988550", "b8337665c72596062c70d5809a691f2f452134c2", "2c8dfa703ba8cb907384149820e117d5935d9ae0", "71d848f5633852d3c9a54ca733af6c0103cd8364", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "80fd0f9813d08ef66913aeee95fb674853dbafd7", "d4da5fbf10b696fa691501ec08618aee479ba3ea", "5bb770af1973f929e8622f17ddf378d439245144", "9c046601e01d693c1d36a074c00d226c563c76f2", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "bb6bb8da52698fc965f368ee64c68beab59737a8", "f8e4c3f5ddc4d208d1c3161a9d9a32c2141e5500", "2f6af58c7905fb8367652fe62fbb1f6ec7e28be0", "2be8894db1a0c4787f36e22fe37d4d99dcd916c1", "4fa3950e7676c7262e62e8d39a4fc9c304e53aa6", "1b753f82e08a04d1442f357edbfd03385ef788f7", "088e3e939ad234b6fdd0e321290fb26937dc2553", "bf6275801e4bac2918f1b8698c2892e1a375808f", "b02f480d56b8a4e462457e3d5fcf765ee9dc2494", "8502fd5a659150e0635973744c4a80138c4e7ca7", "4a106c99bc3dce2c3999211778fe7bde0a3786f0", "04bffc7c4b7e6e40815621c8981f94ba5a3fad8a", "04d7623afc83282bc8d4e3ef3ad2decc86cc237d", "c6d01d9365d7b134ef2efed0063820d1b9be659a", "6662d518878d3eee218462ee4d8b389c64e1b6f7", "46a574413123beb2ba0572c563e1a4883baec997", "185d057d3bce4ea115c4fbe39da65a43b1cc1a0c" ], "paperAbstract": "Most storage systems that write in a log-structured manner need a mechanism for garbage collection (GC), reclaiming and consolidating space by identifying unused areas on disk. In a deduplicating storage system, GC is complicated by the possibility of numerous references to the same underlying data. We describe two variants of garbage collection in a commercial deduplicating storage system, a logical GC that operates on the files containing deduplicated data and a physical GC that performs sequential I/O on the underlying data. The need for the second approach arises from a shift in the underlying workloads, in which exceptionally high duplication ratios or the existence of millions of individual small files result in unacceptably slow GC using the file-level approach. Under such workloads, determining the liveness of chunks becomes a slow phase of logical GC. We find that physical GC decreases the execution time of this phase by up to two orders of magnitude in the case of extreme workloads and improves it by approximately 10\u201360% in the common case, but only after additional optimizations to compensate for its higher initialization overheads.", "pdfUrls": [ "https://webcourse.cs.technion.ac.il/236834/Spring2017/ho/WCFiles/FAST17-GarbageCollection.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_duggal.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/douglis", "http://www.usenix.org./system/files/conference/fast17/fast17-douglis.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_duggal.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-douglis.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6060/9d159a47e790595af5e62bcbfa1e1996a689.pdf", "s2Url": "https://semanticscholar.org/paper/5a37b117426b1ff41411429fb5687fe8d045757a", "sources": [ "DBLP" ], "title": "The Logic of Physical Garbage Collection in Deduplicating Storage", "venue": "FAST", "year": 2017 }, "5a4cc3e885f9f2a512e2d914928b4e7d82b84446": { "authors": [ { "ids": [ "39922425" ], "name": "Daniel R. Thomas" }, { "ids": [ "1794388" ], "name": "Sergio Pastrana" }, { "ids": [ "36106268" ], "name": "Alice Hutchings" }, { "ids": [ "1892117" ], "name": "Richard Clayton" }, { "ids": [ "2619693" ], "name": "Alastair R. Beresford" } ], "doi": "10.1145/3131365.3131389", "doiUrl": "https://doi.org/10.1145/3131365.3131389", "entities": [ "Vulnerability (computing)" ], "id": "5a4cc3e885f9f2a512e2d914928b4e7d82b84446", "inCitations": [ "6972f7e6704a524980d53353c65adbabedc602c0" ], "journalName": "", "journalPages": "445-462", "journalVolume": "", "outCitations": [ "6173558ba7f1cec8c8cee0ddad6b9e81e89bdebb", "3d46dbb0da1b4e0ee5b45c46525d9459fb94222d", "db70ac76519d74f18090229210d682d07858d83d", "1c55777aba716f302e5083f18cfa964711250f63", "4116a6f1cd0efe28f8ab24e277e2ca43c301be90", "f9879006532007705965cba23a12a70801d215d5", "c617c8803fdeb439fe9bdac993c47df1070a818f", "01f2e7f3dee35c2c7b9179d8eb46f4f766c101d1", "5c93aebbc5caf080cc740d7876414e9a06eeb9b8", "462c0a5ab09b983623aea687b36e3e64d47332ab", "0849cec8dbced50deb6c1e36e764d280e9c8df87", "db1bc90df0bdfd3473cd82534e7e7e8ba0d4acd9", "6200d129b2c877929c21c22f28927e85cd47b1e5", "4681a0116597fd0804b07e8176b8761e4f569743", "4e8d6ac43d1f9df6af2f2936edeafbe518e1ac76", "c06c04c65d997264feee6a65a2d7d5006ffef7cc", "588ae052b7831f8aeaaaca8489c1b62bd4824ae6", "c484e351445232ea526c8d73b84bc529ffcddee1", "be20eb62c946dbd03a1ae2623028b16b0d1971af", "9a25143ad5cb06a596eb8bda2dd87c99de36d12c", "0bd506c244cebeca588578b978b23e5f429334da", "3655cf413dfe8a05ea6eecda86ec7b6d6d8425c4", "72bda8b4833efa1b0b0e8be115cacd16163c3bcf", "e34f06634a15790473d6d1d8513924652f1d8b30", "153f2bb7f27d6475692ca6a6700e2609ac64d0bc", "34110f07e10ee4734d8b274f4c12590cd981f1a5", "65f55691cc3bad6ca224e2144083c9deb2b2cd1d", "e555be01f8043e653488c60e0c4d6f36627bd8b2", "79ae1dc11016cab3faf7ef29806e3d0278435efa", "dff46b474057183e9db033ed038aefb693f6b7c7", "387e2538e7df60f59701163e47d7f19e32a152e1", "04645e17a1acb783a2ffb2b9b201624c76d52ae2", "54bdf79b699c26dab5047186d533ba10f1fe22a8", "a1ff6ca8cae11e1858a061b04f67e2dbf255f17d", "b9b563efa9ada517da2b0ccebbf7ce44dd431b9d", "3385c2d73d985ac92deef5e6c47584622bc6ae44", "6b7dee7321564106e889e57371af620e3b5f5796", "5c4a6ead27531632b4998d9f330bff0d6fc16a14", "5d4a9e5f0c90a9bae8a85a5a67d82de19d7f2429", "3dfb4699a5121e4b5b034fd063f7c4c93f614822", "48f1145733cbcb1e438271572267ddde5b463702", "456d79275d17e63a77868046703ab072a88da9b5", "0d51fc5764b1d38c4a09bee222a767401ec1987c", "6f3032e94f760f7115571d3b72631741ca5c7290", "9db1ca86b92cb5e0a21263de77e3e266b71637af", "078d2f8c370bce64629f14bfd177f6fd43a10074", "9a4f0bb69dd41906fe2c1a7e4e1a5c5835550606" ], "paperAbstract": "We evaluate the use of data obtained by illicit means against a broad set of ethical and legal issues. Our analysis covers both the direct collection, and secondary uses of, data obtained via illicit means such as exploiting a vulnerability, or unauthorized disclosure. We extract ethical principles from existing advice and guidance and analyse how they have been applied within more than 20 recent peer reviewed papers that deal with illicitly obtained datasets. We find that existing advice and guidance does not address all of the problems that researchers have faced and explain how the papers tackle ethical issues inconsistently, and sometimes not at all. Our analysis reveals not only a lack of application of safeguards but also that legitimate ethical justifications for research are being overlooked. In many cases positive benefits, as well as potential harms, remain entirely unidentified. Few papers record explicit Research Ethics Board (REB) approval for the activity that is described and the justifications given for exemption suggest deficiencies in the REB process.", "pdfUrls": [ "http://www.cl.cam.ac.uk/~drt24/papers/2017-ethical-issues.pdf", "http://doi.acm.org/10.1145/3131365.3131389", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final137.pdf", "https://www.cl.cam.ac.uk/~drt24/papers/2017-ethical-issues.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5a4cc3e885f9f2a512e2d914928b4e7d82b84446", "sources": [ "DBLP" ], "title": "Ethical issues in research using datasets of illicit origin", "venue": "IMC", "year": 2017 }, "5a6d6b73083571cce741122ce7ae49939f26b80d": { "authors": [ { "ids": [ "1751385" ], "name": "Li Han" }, { "ids": [ "2180844" ], "name": "Louis-Claude Canon" }, { "ids": [ "1707417" ], "name": "Henri Casanova" }, { "ids": [ "1735015" ], "name": "Yves Robert" }, { "ids": [ "1736346" ], "name": "Fr\u00e9d\u00e9ric Vivien" } ], "doi": "10.1109/CLUSTER.2017.14", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.14", "entities": [ "Algorithm", "Application checkpointing", "Central processing unit", "Directed acyclic graph", "Dynamic programming", "Fail-stop", "Failure rate", "Graph (discrete mathematics)", "Job shop scheduling", "List scheduling", "Makespan", "Order of approximation", "Parallel computing", "Recursion", "Run time (program lifecycle phase)", "Schedule (project management)", "Scheduling (computing)", "Series-parallel graph", "Stable storage" ], "id": "5a6d6b73083571cce741122ce7ae49939f26b80d", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "487-497", "journalVolume": "", "outCitations": [ "6fb9d8334659baa82339cc4af52b59abbc8316ac", "3466c2fd45c0a8f49b897a2b6f195c330c468cb3", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "53242679719fbf985d7a1132a6d538f4dded42c6", "512bbff1834aa5f9e346521ce538ef2d09e8b304", "174707ad91eebda40bbca68fff2eb15a741196fb", "4224374796da64e17fce96033d4cd42240d80eaf", "16258e60ad93771968f0c74ee18cc4850cbf6946", "9d974bff46b6a4a5a889a30ac37a5fce2c5b634d", "02c125aaea27be981fd9f0012c2c55436aace1ea", "9c8bbee60dac4ab599276815068e11f487ccb69e", "bf502232cc9c04728ad2308fd30c043a1aaee305", "1324f1d5b20f08cac775f10089a788767c56d5a9", "3087a47c1fdb5ebb1b28f3562533e3cce782dd36", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "95e91566920af5ea257986daee9356d7beacb478", "7e944c565a5719e054ce4f52f06af06932b4c72d", "02d3739f3d1af8a529fb60366c854b4e207e6e75", "ea1e01aa57774c0adff45dbfe9cd5a47d8b163c4", "050348e54d59952782ace21cac48735bc0d23b8e", "244f2bb3a33b20ac31d53097a767bb317ce0dcaa", "525b50b4ae438d89f2b088c781583bb136f8a083", "12d8599ceee4e88993293669b42482e0be193449", "8fc52ce413863e5b9d78f884912858cd8a1f4ad9", "954e25d80547d6478ad78fce26cce41d1c7a8415", "24c4b220eb8717d1cad7f577db5837005e46f69e" ], "paperAbstract": "We consider the problem of orchestrating the execution of workflow applications structured as Directed Acyclic Graphs (DAGs) on parallel computing platforms that are subject to fail-stop failures. The objective is to minimize expected overall execution time, or makespan. A solution to this problem consists of a schedule of the workflow tasks on the available processors and of a decision of which application data to checkpoint to stable storage, so as to mitigate the impact of processor failures. For general DAGs this problem is hopelessly intractable. In fact, given a solution, computing its expected makespan is still a difficult problem. To address this challenge, we consider a restricted class of graphs, Minimal Series-Parallel Graphs (M-SPGS). It turns out that many real-world workflow applications are naturally structured as M-SPGS. For this class of graphs, we propose a recursive list-scheduling algorithm that exploits the M-SPG structure to assign sub-graphs to individual processors, and uses dynamic programming to decide which tasks in these sub-gaphs should be checkpointed. Furthermore, it is possible to efficiently compute the expected makespan for the solution produced by this algorithm, using a first-order approximation of task weights and existing evaluation algorithms for 2-state probabilistic DAGs. We assess the performance of our algorithm for production workflow configurations, comparing it to (i) an approach in which all application data is checkpointed, which corresponds to the standard way in which most production workflows are executed today; and (ii) an approach in which no application data is checkpointed. Our results demonstrate that our algorithm strikes a good compromise between these two approaches, leading to lower checkpointing overhead than the former and to better resilience to failure than the latter.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.14", "http://www.icl.utk.edu/files/publications/2017/icl-utk-964-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5a6d6b73083571cce741122ce7ae49939f26b80d", "sources": [ "DBLP" ], "title": "Checkpointing Workflows for Fail-Stop Errors", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "5a6dae9da66189d3c15c4017fb441a2a0fd9f2ba": { "authors": [ { "ids": [ "1749517" ], "name": "Michael Backes" }, { "ids": [ "2747035" ], "name": "Pascal Berrang" }, { "ids": [ "4356930" ], "name": "Matthias Bieg" }, { "ids": [ "1805740" ], "name": "Roland Eils" }, { "ids": [ "38189358" ], "name": "Carl Herrmann" }, { "ids": [ "2230149" ], "name": "Mathias Humbert" }, { "ids": [ "34563314" ], "name": "Irina Lehmann" } ], "doi": "10.1109/SP.2017.21", "doiUrl": "https://doi.org/10.1109/SP.2017.21", "entities": [ "Cryptography", "Encryption", "Homomorphic encryption", "Overhead (computing)", "Personalization", "Privacy", "Random forest", "Whole genome sequencing" ], "id": "5a6dae9da66189d3c15c4017fb441a2a0fd9f2ba", "inCitations": [], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "957-976", "journalVolume": "", "outCitations": [ "3cfc0b1e3c19ffb422f0c98754c382a9d8fbbc0b", "f98c024e5a08e50445b53758145df6e77c30516a", "c296dc3441dcf28333fdfb64d293930f2d8af58d", "00f960991218995b06fff34f8db2925422c6a2ff", "9512f52337338232e1ed692748d9f069398ca545", "0d9c7eb50760ce4f2d99e8126e0c1c3103e6c98b", "764ace9519283e45664e490a6df581cb68b5250b", "40d68c0011958b9a990c9df65414fcf4fd539c72", "3ab982cb14c3335315203d4ac14c1a21268c6585", "fd2711cfe890675e8d885df88f3f76b5be5b39a6", "61883fbd35396888924520e109355e912337d2b8", "4fba6cf1fec9888feb4477da6d2985194a188d9c", "32f33b2b0801d70a46f23fa719a3e507586cff85", "5c80528d82d0c8dba94a9f41be688008955b0404", "335d90e51ece833c41641abe11de6713a72dbbb2", "3b35f7db6665ee22bc68e6a4d1f9bc9852ce6985", "5486fa766ad5da90873d00216e89185800adb4b6", "5753e70aa633d63a8eb5379d64a828bc3bad343e", "0a7196fcadeb009d5582b02ce4aa59546f6036e4", "828a0e74f3d3442fe4e43f98fdfeacd6e92aaa26", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "5df933d85c2b22bec31ba0e4a11789f7019d072f", "5975eb3e618bdd89dd2bf43deec3d1e14e6d9d0c", "626170a2cfdbadbeaba25b0a76b8a953cf836b03", "89aae9fc0e0f851de0108a1f80c4d4155104fcc7", "8a8db60e5c0f654fe6f7aa58595fd88e547940dd", "f2cb65c86563973a0abf78ba797467eb0a87164d", "e8dc26a55750fddbe40cd5fe1a1cd54ee5defe7c", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "06bf624c5fe39d38e4c063eb43ab3e60a874f101", "3038be90007237e386c7730fc61158000dbc9140", "30ed0801ccab74988cde86cd5d797c2ded55c7fd", "6260e61e8cd98e86a27b22f1b02a3a33bb5920a9", "08b99b8b7a061bb673c0e280ec3180dbe0cbfda2", "2e1999e4869b2105f35a415e0fa31a8ad2c4fac8", "1162fbb4b060ab8af9072fdba1eb191abbabc74e", "6385fe1abf656e588eab5e6453a4f83b27b49ece", "444f05c354a77c062e7321eaa12dae6a28da8608", "03583e58838ca97cca1526082bd059080bfbcf16", "038ca12407c170d6661fe8968b3104e611a78145", "193aca52b1b37262a8b9e52db8381d5a94661e6e", "e8e2cb6dc4a2bdf9a2f9bd4873bac4696c2e4a5a", "0bc03232951e35755ef8949623603e74f3d7d5ad" ], "paperAbstract": "Since the first whole-genome sequencing, the biomedical research community has made significant steps towards a more precise, predictive and personalized medicine. Genomic data is nowadays widely considered privacy-sensitive and consequently protected by strict regulations and released only after careful consideration. Various additional types of biomedical data, however, are not shielded by any dedicated legal means and consequently disseminated much less thoughtfully. This in particular holds true for DNA methylation data as one of the most important and well-understood epigenetic element influencing human health. In this paper, we show that, in contrast to the aforementioned belief, releasing one's DNA methylation data causes privacy issues akin to releasing one's actual genome. We show that already a small subset of methylation regions influenced by genomic variants are sufficient to infer parts of someone's genome, and to further map this DNA methylation profile to the corresponding genome. Notably, we show that such re-identification is possible with 97.5% accuracy, relying on a dataset of more than 2500 genomes, and that we can reject all wrongly matched genomes using an appropriate statistical test. We provide means for countering this threat by proposing a novel cryptographic scheme for privately classifying tumors that enables a privacy-respecting medical diagnosis in a common clinical setting. The scheme relies on a combination of random forests and homomorphic encryption, and it is proven secure in the honest-but-curious model. We evaluate this scheme on real DNA methylation data, and show that we can keep the computational overhead to acceptable values for our application scenario.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.21", "https://www.ieee-security.org/TC/SP2017/papers/349.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5a6dae9da66189d3c15c4017fb441a2a0fd9f2ba", "sources": [ "DBLP" ], "title": "Identifying Personal DNA Methylation Profiles by Genotype Inference", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "5a9e1c61afe1daafb1d33108b7dc381550ff8ee9": { "authors": [ { "ids": [ "34707388" ], "name": "Dongxiao Yu" }, { "ids": [ "7708260" ], "name": "Yuexuan Wang" }, { "ids": [ "2104997" ], "name": "Tigran Tonoyan" }, { "ids": [ "1717015" ], "name": "Magn\u00fas M. Halld\u00f3rsson" } ], "doi": "10.1109/IPDPS.2017.78", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.78", "entities": [ "Algorithm", "Backoff", "Electron mobility", "Exponential backoff", "Failure rate", "Self-stabilization", "Time complexity" ], "id": "5a9e1c61afe1daafb1d33108b7dc381550ff8ee9", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "337-346", "journalVolume": "", "outCitations": [ "18a4af8dc8b936c87074f98336882d2481452116", "a04c375a6e8c2bc79bf733ab4617473638b2bae1", "488b3221f94f28fc207ca3fdfb975f18382ec811", "03aaef69b9d166a2a803b96e47684e1cc3b470aa", "05d1b66e443353b562a44722faddffcbefeaef53", "6ffa18f69f9d2df56d7165f85e42e46f1aabbc89", "1ec3ef815298824efc0d2d54a6ac5c70974dcfa5", "fe9303074167e5a732ea1357732edbf0eba9a18c", "020fba5d2890211e451321fd9bff957f621d2bc2", "54ab25c6ada94ef605cfe52ad4a49d97bf14cda6", "763b5c4f4c896c0a9b8c6b1249856870f3c21154", "0ba2a78e5985022b6d50d2451f24f698454d19ca", "4371ee22b0029d2ba30a6fb5144e2b89cbb49010", "805b962116cec427b6a7bab6afa63b2f14d6f22a", "02057ddd52f5bdb886f62f72e88746111853c89f", "04aaf58d1c4571cab70abf7f6728b932998c4f5b", "1de6abd76b02bdc6e42a0a2d24fffcd967318e54", "365f7419f4189d752343473fbe69218df9c72089", "73948f42e53e2cda63d5121b54a3de20cb3ccb98", "0317c9fdacb0149ff7718f91c9940568635adf1a", "5039da176f98b84730e2c862ed2567e308cc2f56", "78286f1e5df8741f1ef839bd74ab34b1acd4533e", "0967bd75632d959541ee4afef35a5ef37c805cc7", "2312b13cb8fcf5f051f3823acfb934caa63bdf88", "b7937c0005bac2d231ac930dfa3e91bc8b4d963c", "0b66186a3e4a911a4cd15d2eedcebd82093faf12", "19e0c23884da16c8c9b1834d88a10fd247a6f695", "98f3a5fb423be381e2da1c196120ef8a90c4b971", "e7074bf2c2514d11ab85718af10b409c49163213", "4a407a7dc76494d7431b378f80e748a7cef84161", "74cd42bc507aa695bba024042492fe25a3ae2d6f" ], "paperAbstract": "Dynamic behavior is an essential part of wireless networking, due mobility, environmental changes or failures. We analyze a natural exponential backoff procedure to manage contention in a fading channel, in the presence of both node churn and link changes. We show that it attains a fast convergence, stabilizing contention from any state in logarithmic time. We use it to obtain optimal algorithm for Local Broadcast that even improves known results for the static case. The results illustrate the utility of carrier sensing, a stock feature of wireless nodes.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.78" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5a9e1c61afe1daafb1d33108b7dc381550ff8ee9", "sources": [ "DBLP" ], "title": "Dynamic Adaptation in Wireless Networks Under Comprehensive Interference via Carrier Sense", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "5ad3878ee97398d94e545d329bebe5f99e181e3a": { "authors": [ { "ids": [ "3194663" ], "name": "Ruian Duan" }, { "ids": [ "2237275" ], "name": "Ashish Bijlani" }, { "ids": [ "40444394" ], "name": "Meng Xu" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" }, { "ids": [ "1738428" ], "name": "Wenke Lee" } ], "doi": "10.1145/3133956.3134048", "doiUrl": "https://doi.org/10.1145/3133956.3134048", "entities": [ "Binary file", "C++", "Cognitive dimensions of notations", "Free software license", "Interdependence", "Java", "Mobile app", "Open Sound System", "Open-source license", "Open-source software", "Play Store", "Scalability", "Software development process", "Software license", "Source lines of code" ], "id": "5ad3878ee97398d94e545d329bebe5f99e181e3a", "inCitations": [], "journalName": "", "journalPages": "2169-2185", "journalVolume": "", "outCitations": [ "27ae23bb8d284a1fa8c8ab24e23a72e1836ff5cc", "746c828289ea54423209b9b1afd9ba7d8f14055b", "98e810ed098a651e0ba8cbb63d2d926d4eebdf9b", "8b0b7d8d7d83979b8259b6bab664542b8ecbf4a9", "2b6d234f0a953ca5b2c22172631f2850dac9c0fc", "0e5cbb048a6ad899ba59ef661e53ffd4ad6c29fd", "426272150b80891e76405aa8bce6bbb8a54e93db", "a3f0a2bb5802aa0a8bf6cc076ad64880314ae461", "0fe8165899f6933eb3f8cce2078811e43cd1be78", "11a732848cbbdad81e660c1fc7c4a88d2d9c1d6b", "33f8f2e76d0190905c2bd3a2e611d28504fa4353", "00870c8e8b65baa9306c2fafc3b70a498a16d934", "38e7550cd841ef8251ed5bfc0962ac877b496644", "0314da057cfbf61d752674b7d8527e8be9ddc3b8", "8c9c760b2079de26cf2aea31d128ff9054a3e6b1", "56a83c92b25629fe4546a8bb1161af03e093b2df", "171b35cd60d0bdf07a2bfaa43d3c799bb37235b4", "624fcdf19c27cce2a76c8d102e24c65f8b2e3937", "6ceede4549c28a42ac48f0e0f60b3c68ff3205d8", "a706b48534990372be5fd7074acf5a606e271568", "921a7276ed59cdbf1f5b0d504eb893ca14ce7ac7", "1d6a4fd8db10956de79762087e3b783f63258528", "9048746d108e983626c3405ad1775bbcc3933c6d", "2dd6381baae25bab28454bdb716313010b05d61b", "26d2e1ae4bf1c5a0f73e45741bc9d973c866846b", "847fd4428705785972bbf0d3be9575ba9a36f516", "04488816ddf0df0179aceea41fdd07a3b2449fe3", "805743c63a678016cb9b21c074c992dd237c29f1", "30af8702c6c9f69a64d176d61784b4d313eb3e26", "a259d3205fcef2a70d5ceb87a39e2150c058fb81", "a3e1d238d9ba9d3c11ab1a939361620cc109b199", "050b2cb3b23e928d204d2ad1d2f2661fe01f9947", "4ad69f733fb41cca898909e427224b65e53a2082", "738feaca3e125c6c6eae4c160b60fdedd12c89aa", "130633f0653e6ad5766144299aa17938e7a5fca2", "fd6d0674da911bae9fd01c01c931d00c94b81940", "2ec14bc3f03861e750f054727369dd0f9933eef6", "15bb9e2d8579a6901bc9ee4d7a57623da4262f97", "0dc043122228447b6ea7218ae80aab01e21df140", "129a61b59e202a734a3776e97305baeb37c9cd0f", "77462b767a378aa6207cbff5b100379fe8a55f6b", "8cd8298f1d91e92421c83d666669468fb9679840", "6c57b758334576abb98c703eb013ddb36888fa7f", "432ec065b07e59d55b7be30d9d3436b13332c47a", "7661ff0c1cdfeff8ec6344f56c512f34ee558dfa", "9e555463b67662d1c9e78f6cfdc335108ebd7b65", "5e491db0e11e71c6ed798052129712ef145b88bd", "23c63a0b251eb3e11b1f4c2c6733261a2f765f54", "295e307168586d6867e79b3d173af394e974b26f", "5f052c375a4e4884e4fb4b487f272817dc4bfbac", "585f6f6b02e0e56a18745bc9240218ab3bb3bd11", "a411c30fa4acb68b309a21167554bd97632968fa", "d54bf52f090b6a91b9582e89c3dafacb99584790", "c3a39721e079eb4baa3d286b738bf822007c20d7", "42408e1cc75a0b2e50f2ba2a7e9ace81c053ef4c", "4e145225ce8918b36b311c08571e3839214b0604", "0694c33e3ace71a19c9823ddf07c0f4e8743d118", "617da85c485487e48624c6beae735a5dee5a01ca", "79f40de182b73cde669cf2d5fb491fbfff3703bc", "2c5a5a2ab4f7b63523981ac790399c3ef2f08014" ], "paperAbstract": "With millions of apps available to users, the mobile app market is rapidly becoming very crowded. Given the intense competition, the time to market is a critical factor for the success and profitability of an app. In order to shorten the development cycle, developers often focus their efforts on the unique features and workflows of their apps and rely on third-party Open Source Software (OSS) for the common features. Unfortunately, despite their benefits, careless use of OSS can introduce significant legal and security risks, which if ignored can not only jeopardize security and privacy of end users, but can also cause app developers high financial loss. However, tracking OSS components, their versions, and interdependencies can be very tedious and error-prone, particularly if an OSS is imported with little to no knowledge of its provenance.\n We therefore propose OSSPolice, a scalable and fully-automated tool for mobile app developers to quickly analyze their apps and identify free software license violations as well as usage of known vulnerable versions of OSS. OSSPolice introduces a novel hierarchical indexing scheme to achieve both high scalability and accuracy, and is capable of efficiently comparing similarities of app binaries against a database of hundreds of thousands of OSS sources (billions of lines of code). We populated OSSPolice with 60K C/C++ and 77K Java OSS sources and analyzed 1.6M free Google Play Store apps. Our results show that 1) over 40K apps potentially violate GPL/AGPL licensing terms, and 2) over 100K of apps use known vulnerable versions of OSS. Further analysis shows that developers violate GPL/AGPL licensing terms due to lack of alternatives, and use vulnerable versions of OSS despite efforts from companies like Google to improve app security. OSSPolice is available on GitHub.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/identifying_open-source_license_violation_and_1-day_security_risk_at_large_scale.pdf", "http://doi.acm.org/10.1145/3133956.3134048", "https://taesoo.kim/pubs/2017/duan:osspolice.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5ad3878ee97398d94e545d329bebe5f99e181e3a", "sources": [ "DBLP" ], "title": "Identifying Open-Source License Violation and 1-day Security Risk at Large Scale", "venue": "CCS", "year": 2017 }, "5aee715611eed0aa673da0f78f419cf6a1c24df9": { "authors": [ { "ids": [ "2324985" ], "name": "Trung Le" }, { "ids": [ "1685912" ], "name": "Khanh Nguyen" }, { "ids": [ "1727449" ], "name": "Vu Nguyen" }, { "ids": [ "3314511" ], "name": "Tu Dinh Nguyen" }, { "ids": [ "1749657" ], "name": "Dinh Q. Phung" } ], "doi": "10.1109/ICDM.2017.35", "doiUrl": "https://doi.org/10.1109/ICDM.2017.35", "entities": [ "Algorithm", "Baseline (configuration management)", "Gaussian process", "Kriging", "On the fly", "Process modeling", "Program optimization", "Sparse matrix", "Speedup", "Stream (computing)" ], "id": "5aee715611eed0aa673da0f78f419cf6a1c24df9", "inCitations": [ "36137164334dfd029f1ae3d9d91e521fd8e68482" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "257-266", "journalVolume": "", "outCitations": [ "12fa4a3ee546ba8eeb0b88b06bcb571d65d91cc4", "367ddd68191b1cfa89fc4e568e3a782c9fd7ba35", "568afeb1de772be7d9f661a0a51c00052f4c70fd", "8e597460557d44de07ec570738cd2b42cdcc2580", "0e854fdc0574077d75454dec80ad3c5b9ecede7d", "36137164334dfd029f1ae3d9d91e521fd8e68482", "fa5f1a058154dbc36c60259462f1ae5642eba638", "9b9607b78ca1896738ec1fbf0633032bc74fbec2", "3bae80eca92a6e607cacdf03da393a1059c0d062", "145512a08a7cd79a0efb1f0503ddc6a4e4ef02dc", "e17234ec0af7ec8e31dfdfdf8dcd5c083ced9577", "22c2698c15e0244bdda0f263910caec5a67fd2ab", "6da81d06942996cc369f2f5e499b758ea06be37b", "43aa597bcadecdbb5739652fa96efaee88887e40", "3e73e107e0f65e5ca81fac14fadeee74cdbaab55", "00ca2e50ae1c5f4499d9271c72466d2f9d4ae137", "8db04123f8004f86b0ac540870ef6aa51f4249e6", "35b76e9610ffdbafffacc5992f97e4efe046cd31", "c5aeb96be4c04b2fd6904e63940b59edf20dcb97", "965d4f77f068b2101d887abcced6b4850dfee597", "2688969848b75336891b1e58a2b6ee433343867c", "d8321c6d9f8ab23cab2f104b692016279926567c", "085e7e7d093164d6a9408e107e0ab56b6d92563b", "09c48a57a8ef043cee0f1961ba88dc0f75a82ff5", "05b3ecf5190d3ea19ba9b7dbe8793118d0238d1b", "4cee84994302702b87c4173a9558fe16587c69cf", "1592fe924114866c1ac559bae33ea789930daa98", "13c5fa3dc1c0c0671138664b119827437513c2a0", "9c13b87b5efb4bb011acc89d90b15f637fa48593", "08743d09f3ec33ab1f188d4c5f8f5550c312ace0", "1aa5a281fddc9e13a38ffd131e955afc09e99dda", "573c2ded84a69d1100f52b2e503977c905141e1e", "1d32803d474761164ce751e3082b4d92217d6fcb" ], "paperAbstract": "One of the most current challenging problems in Gaussian process regression (GPR) is to handle large-scale datasets and to accommodate an online learning setting where data arrive irregularly on the fly. In this paper, we introduce a novel online Gaussian process model that could scale with massive datasets. Our approach is formulated based on alternative representation of the Gaussian process under geometric and optimization views, hence termed geometric-based online GP (GoGP). We developed theory to guarantee that with a good convergence rate our proposed algorithm always produces a (sparse) solution which is close to the true optima to any arbitrary level of approximation accuracy specified a priori. Furthermore, our method is proven to scale seamlessly not only with large-scale datasets, but also to adapt accurately with streaming data. We extensively evaluated our proposed model against state-of-the-art baselines using several large-scale datasets for online regression task. The experimental results show that our GoGP delivered comparable, or slightly better, predictive performance while achieving a magnitude of computational speedup compared with its rivals under online setting. More importantly, its convergence behavior is guaranteed through our theoretical analysis, which is rapid and stable while achieving lower errors.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5aee715611eed0aa673da0f78f419cf6a1c24df9", "sources": [ "DBLP" ], "title": "GoGP: Fast Online Regression with Gaussian Processes", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "5b136bf2b2b354a996b0acfad9daacdfc2cb86dd": { "authors": [ { "ids": [ "1866157" ], "name": "Mathy Vanhoef" }, { "ids": [ "1739936" ], "name": "Frank Piessens" } ], "doi": "10.1145/3133956.3134027", "doiUrl": "https://doi.org/10.1145/3133956.3134027", "entities": [ "AES instruction set", "Adversary (cryptography)", "Android", "Authentication", "Confidentiality", "Cryptographic nonce", "Cryptographic protocol", "Cryptography", "Encryption", "Financial Times", "Forge (software)", "Group key", "Handshaking", "Key (cryptography)", "Network packet", "Replay attack", "Session key", "TCP Wrapper", "Temporal Key Integrity Protocol", "The Fast and the Furious", "Wi-Fi Protected Access" ], "id": "5b136bf2b2b354a996b0acfad9daacdfc2cb86dd", "inCitations": [ "4e00c0c0a7d341f259aa3e6910316bca305c9934", "a0daeace2255f0978954acbced1f6238f17906fb", "52e76bc759f6da650dbfcb5ff4e7f052dcb13c3f", "4a3654c41bffe93fd88d48a31e16f924ebb06c9e", "f70e65a8b62a92966ae93b3f65ce73dfb86a11a5" ], "journalName": "", "journalPages": "1313-1328", "journalVolume": "", "outCitations": [ "8a89b597f605d1d92ffda4803e9b852d780f6f2e", "23eb53170c6de9ff5024db120eda200816fa803f", "2f651857310a9e7b50a941686bdc01e75d5e4a6c", "e5d9d757d86b0eeac2a6a66426a3714a56e2bb0f", "ac6a003a4e5d0cc12fd2ae8a57769b79283a7156", "30ec3223339b57bd3401d75b5768197d6d3f9742", "33baec7311974b244bae84739667a719355924b8", "2c55cb36b0c2575ecb81f5d45101d815808a38c3", "c36f863781089428adba70274b8b0ca9e6391d5b", "0037875e7321eb65867ff47b0e22a080b84502da", "3d98d3545f2f952654189ed4f8a127be1b18ae71", "730473b193c56d4996dd11569837700e5e5bc9b4", "ce8fd1280b9c9f461ed403980ae88720fe91b09c", "135406f778603303beab81028e71a5ee3d3b8fe7", "05ae289245b5a9222a1a6fc3f36910c3cb0f4662", "ff23e6f8e2d868fa1649d8ab49d32dfb3aee1296", "5f209fdf869ede1bc82ead79248196f34586ae4f", "16fad84b5cd76c403c94b16353fa6a4d64f19251", "03f4527bc9b89af49819131306551409487d77e9", "d85fa090978d3a257cbd9422c29b67987d27f500", "4bce2a077c5e15f01c936ab78d879748d042a661", "15f53e82bdb947b38cd9b0657fe6f22a6d492dbb", "ee5236447f8dc7cfbc7c2e06c13848112473317f", "d030862b5ab53b3fad21e1f48733b78d4a6e35b2", "2d9bb0e8c84a1d2b984fbe4ad70f8b258a17f025", "0d2b5c3426cc90e405a439d012ca3ea1766784f7", "20cee308639acf53d090ff7c7d639eb64fdca8ad", "b8980f2a605b62c7fbfdf0694ac4faadd1a25eeb", "369a232610b53bdda600c35bf8ae6d1e9195c7e5", "88283271fedd1cf1854551e5ebea54ed308e387f", "1a9d6f6d7760eaf22e438eed3861b52e44069129", "5724de7716a059aece8b5aebbe4d5a92a58e3503", "2aa0e44b8529de8ee75138eade8aba0bfb9f008f", "d78d3b199830d6c8b86610dc2921fcf139225217", "09aaf8b51ffea0816f7b62c448241de474ae65f7", "004c5a4db0b80f41dad0052f549045ecb4bc92a4", "c4639cf655a4ef6a761c2a35ad39ac7e8ec68521", "5438d78814526c949d1034b3264ba171cc038189", "31e4845a40cfa6a953aef78387b34ea3284cdff9", "11886a73a41a8842d6b194c0e675cafd8e79191d", "ff7d3dfc4e0e19ecca03ffc0a6d56307e7e7d449" ], "paperAbstract": "We introduce the key reinstallation attack. This attack abuses design or implementation flaws in cryptographic protocols to reinstall an already-in-use key. This resets the key's associated parameters such as transmit nonces and receive replay counters. Several types of cryptographic Wi-Fi handshakes are affected by the attack. All protected Wi-Fi networks use the 4-way handshake to generate a fresh session key. So far, this 14-year-old handshake has remained free from attacks, and is even proven secure. However, we show that the 4-way handshake is vulnerable to a key reinstallation attack. Here, the adversary tricks a victim into reinstalling an already-in-use key. This is achieved by manipulating and replaying handshake messages. When reinstalling the key, associated parameters such as the incremental transmit packet number (nonce) and receive packet number (replay counter) are reset to their initial value. Our key reinstallation attack also breaks the PeerKey, group key, and Fast BSS Transition (FT) handshake. The impact depends on the handshake being attacked, and the data-confidentiality protocol in use. Simplified, against AES-CCMP an adversary can replay and decrypt (but not forge) packets. This makes it possible to hijack TCP streams and inject malicious data into them. Against WPA-TKIP and GCMP the impact is catastrophic: packets can be replayed, decrypted, and forged. Because GCMP uses the same authentication key in both communication directions, it is especially affected. Finally, we confirmed our findings in practice, and found that every Wi-Fi device is vulnerable to some variant of our attacks. Notably, our attack is exceptionally devastating against Android 6.0: it forces the client into using a predictable all-zero encryption key.", "pdfUrls": [ "https://papers.mathyvanhoef.com/ccs2017.pdf", "http://doi.acm.org/10.1145/3133956.3134027", "https://acmccs.github.io/papers/p1313-vanhoefA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b136bf2b2b354a996b0acfad9daacdfc2cb86dd", "sources": [ "DBLP" ], "title": "Key Reinstallation Attacks: Forcing Nonce Reuse in WPA2", "venue": "CCS", "year": 2017 }, "5b13e903543c4b81ec55389607ee7820ba711ba0": { "authors": [ { "ids": [ "3192287" ], "name": "Akshay Jajoo" }, { "ids": [ "39655582" ], "name": "Rohan Gandhi" }, { "ids": [ "2981910" ], "name": "Y. Charlie Hu" }, { "ids": [ "4093554" ], "name": "Cheng-Kok Koh" } ], "doi": "10.1145/3143361.3143364", "doiUrl": "https://doi.org/10.1145/3143361.3143364", "entities": [ "Central processing unit", "Computational complexity theory", "Data-intensive computing", "FIFO (computing and electronics)", "GiST", "Microsoft Azure", "Schedule", "Scheduling (computing)", "Shortest job next", "Simulation", "Stock and flow", "Testbed" ], "id": "5b13e903543c4b81ec55389607ee7820ba711ba0", "inCitations": [], "journalName": "", "journalPages": "439-450", "journalVolume": "", "outCitations": [ "231ba17921ebd80e95771e28dfb5082e169d5a53", "3e8fd23175bd1aba19785802a8955f0c13ec8753", "1035495902a1372ea30588e7a269422905cbb3d8", "bd87cc38abc992be2d154a522729cddfa90dc4fc", "3338173866c3c85338a5ac26560d5392108c8eac", "33b0e063127c2fdb9e3cc4599336d5ee247770ca", "191c507dbb80a75fd7315e090c46f9f4bf6296e5", "73f246ab0a4f3406d333d8f5e1f29e21c9292148", "764d7de61421968d6b477f0c055d72dcb0893544", "17646cbb7418fc58631ad06f4b4ba39c493b97c7", "230239fb61d7a6996ac9552706363323b34735f2", "62a68d15bbfef566170fc610183eb7ebf8313dce", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "122229239aeba1eb4f1623adb40f1845c582a520", "c3f0ab61a9a34bd47325d6d8ec19fc52a917f1cd", "9c9705ba184edbcd2c397f5f2a4921c2faaa7045", "03670ae248e456b67be7e435e86ddb8a9f87c242", "0428e3b146a7849e7ad19143e09e0fa897fe2220", "852da937fedf1d6e15b82454976c432d57c290e3", "1cafaac11664e48bd121695ac1be06b0930d00a5", "3b988049dd8f62f772281e90196bbd793700c86b", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "0faff4fa4347d5369956dbdbea410869fc399bfd", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "27c04dce51362fcc7531acbe74823a7f0a4e48bf", "2cbb2a4e52a3308e31450345914cac226973b407", "0ea2514931f27fa2b870abc24487d36c8766639a", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a" ], "paperAbstract": "CoFlow scheduling improves data-intensive application performance by improving their networking performance. State-of-the-art CoFlow schedulers in essence approximate the classic online Shortest-Job-First (SJF) scheduling, designed for a single CPU, in a distributed setting, with no coordination among how the flows of a CoFlow at individual ports are scheduled, and as a result suffer two performance drawbacks: (1) The flows of a CoFlow may suffer the out-of-sync problem -- they may be scheduled at different times and become drifting apart, negatively affecting the CoFlow completion time (CCT); (2) FIFO scheduling of flows at each port bears no notion of SJF, leading to suboptimal CCT.\n We propose Saath, an online CoFlow scheduler that overcomes the above drawbacks by explicitly exploiting the spatial dimension of CoFlows. In Saath, the global scheduler schedules the flows of a CoFlow using an all-or-none policy which mitigates the out-of-sync problem. To order the CoFlows within each queue, Saath resorts to a Least-Contention-First (LCoF) policy which we show extends the gist of SJF to the spatial dimension, complemented with starvation freedom. Our evaluation using an Azure testbed and simulations of two production cluster traces show that compared to Aalo, Saath reduces the CCT in median (P90) cases by 1.53x (4.5x) and 1.42x (37x), respectively.", "pdfUrls": [ "https://www.cs.purdue.edu/homes/ajajoo/papers/saath.pdf", "https://www.cs.purdue.edu/homes/ajajoo/presentations/saath.pdf", "http://doi.acm.org/10.1145/3143361.3143364" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b13e903543c4b81ec55389607ee7820ba711ba0", "sources": [ "DBLP" ], "title": "Saath: Speeding up CoFlows by Exploiting the Spatial Dimension", "venue": "CoNEXT", "year": 2017 }, "5b2095d318f8e4693645b3502d10153a1af62d83": { "authors": [ { "ids": [ "3077443" ], "name": "Fatemeh Sheikholeslami" }, { "ids": [ "1702112" ], "name": "Georgios B. Giannakis" } ], "doi": "10.1109/ICDM.2017.22", "doiUrl": "https://doi.org/10.1109/ICDM.2017.22", "entities": [ "Algorithm", "Complex network", "Refinement (computing)", "Synthetic data" ], "id": "5b2095d318f8e4693645b3502d10153a1af62d83", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "127-136", "journalVolume": "", "outCitations": [ "0b19d84d3c44222ab0d3965eedada3a037971bc3", "0f0809e6537e5ffe212190c243a249382f5ffab1", "6d5855cc45c011b81609affdb27f5c772e110567", "35778e259b0ccb37ec8c754852956f26bc8ff51c", "63d440eb606c7aa4ee3c7fcd94d65af3f5c92c96", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "21c36fd16efea75427cb8b4a5f78b8cd658d51c3", "5b72cf570bfcc84cb03a9e310e680363373565cf", "02c1580617a9b1ccd809f06ae57773c00cf96647", "9c38c19e382866587d825afe5336dd9c76cfa02f", "10d3c0a7dd22780562bd76ef9bf4278ca3d4b0b5", "08523c48e1cccf12ba5bd3e7090bc0a3734e735e", "41e7ac9b2901172d088bf3053a7dd736a0c05410", "16dace2a89d39676515dfaea18ca82b90e884200", "07ad62b6b5da5f226c88549378886ca062e207a0", "2af4a96f88ec630c57a28461751af3659ec98dd4", "850ba1f795dc43e642e8e311b28f04522669737a", "9a3153e4176049d743238a4e0be050043f93bd30", "40eee455b405becb6d3fda47e23d79404f0f040f", "5f944a6722a6e0f2114e8fd1262eb01ede6d5b9d", "a8dc47e370b17371e57ad070e669360794473efe", "0a27e088c3dd6c3fc9cba97b3dd76fcabd413108", "15973b0bfdbe84d1cb6e2a35fc857a1d125a3923", "8eae36cabdce7cba7c1fc316596002cd84ed5e95", "1437415df29d3927c7851c7a0db0edd4a472d6e1", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "2268879700f9343b0371d4f5b6544e61b1469abe", "064566f30ff4c52e177bbe97001ba48d2b3b3d90", "05b5180b4b39b5d0b3f44dfb531b584c5dddf2c0", "10e44c294a968ca91e361fac44aa9d0f2cdf3bd3", "235d090c8549ff3b353103380313d70e33c47e4e", "48f89655c20088c977af2a936f225081d31636c4", "42716eb17f1628a861770ab32dc0ade943c9b109", "18cb0057b93aaa35453657e0d22562c7c0ccd3b6", "a7951266e1f95cff509c1cb6474a31cd69f73760", "1c5569eb82f074d23431a1e65c29c07cbb8e854b", "4299ba257957b77eec8e063bd133e9c44c1c2a8c" ], "paperAbstract": "The task of community detection over complex networks is of paramount importance in a multitude of applications. The present work puts forward a top-to-bottom community identification approach, termed DC-EgoTen, in which an egonet-tensor (EgoTen) based algorithm is developed in a divide-and-conquer (DC) fashion for breaking the network into smaller subgraphs, out of which the underlying communities progressively emerge. In particular, each step of DC-EgoTen forms a multi-dimensional egonet-based representation of the graph, whose induced structure enables casting the task of overlapping community identification as a constrained PARAFAC decomposition. Thanks to the higher representational capacity of tensors, the novel egonet-based representation improves the quality of detected communities by capturing multi-hop connectivity patterns of the network. In addition, the top-to-bottom approach ensures successive refinement of identified communities, so that the desired resolution is achieved. Synthetic as well as real-world tests corroborate the effectiveness of DC-EgoTen.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b2095d318f8e4693645b3502d10153a1af62d83", "sources": [ "DBLP" ], "title": "Overlapping Community Detection via Constrained PARAFAC: A Divide and Conquer Approach", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "5b21b3ddf254e86f07c4743eb586369263de86ee": { "authors": [ { "ids": [ "2690018" ], "name": "Changsu Kim" }, { "ids": [ "2563077" ], "name": "Juhyun Kim" }, { "ids": [ "35518579" ], "name": "Juwon Kang" }, { "ids": [ "3091593" ], "name": "Jae W. Lee" }, { "ids": [ "2363315" ], "name": "Hanjun Kim" } ], "doi": "10.1109/HiPC.2017.00045", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00045", "entities": [ "Baseline (configuration management)", "Benchmark (computing)", "Call site", "Central processing unit", "Compiler", "Computer data storage", "Parallel computing", "Profiling (computer programming)", "Profiling (information science)", "SPECint", "Speculative execution", "Static program analysis", "Subroutine" ], "id": "5b21b3ddf254e86f07c4743eb586369263de86ee", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "328-337", "journalVolume": "", "outCitations": [], "paperAbstract": "To expose hidden parallelism from programs with complex dependences, modern compilers employ memory profilers to augment imprecise static analyses. Since dynamic dependence patterns among instructions can vary widely depending on the context, such as function call site stack and loop nest level, context-aware memory profiling is of great value for precise memory profiling. However, recording memory dependences with full context information causes huge overheads in terms of CPU cycles and memory space. Existing profilers mitigate this problem by compromising precision, coverage, or both. This paper proposes a new precise Context-Aware Memory Profiling (CAMP) framework that efficiently traces all the memory dependences with full context information. CAMP statically analyzes a context tree of a program that illustrates all the possible dynamic contexts, and simplifies context management during profiling. For 14 programs from SPEC CINT2000 and CINT2006 benchmark suites, CAMP increases speculative parallelism opportunities by 12.6% on average and by up to 63.0% compared to the baseline context-oblivious, loop-aware memory profiler.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00045" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b21b3ddf254e86f07c4743eb586369263de86ee", "sources": [ "DBLP" ], "title": "Context-Aware Memory Profiling for Speculative Parallelism", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "5b24754758249abf11d2e3f1391b1850c1b22990": { "authors": [ { "ids": [ "32446890" ], "name": "Sonia L\u00f3pez" }, { "ids": [ "19203465" ], "name": "Stavan Satish Karia" } ], "doi": "10.1109/IPDPSW.2017.175", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.175", "entities": [ "Central processing unit", "Data dependency", "Field-programmable gate array", "Graphics processing unit", "Heterogeneous computing", "Heuristic", "Multi-factor authentication", "Performance per watt", "Run time (program lifecycle phase)", "Scheduling (computing)" ], "id": "5b24754758249abf11d2e3f1391b1850c1b22990", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "42-53", "journalVolume": "", "outCitations": [ "259634110eff6b7042492e582b44654fc60d1f23", "9b6ddeb90dac8a828225bd58c9cf2f8ddc232812", "89450edc162692cea533caf753cc691afde81869", "3e2b9fb6ca99d8b399465eaeb68460e05ea4eb46", "7fb410b3f7473863b70acc201ddea297047e573d", "01a2241b15ed640ef7c4cc552042cb8a3455bf35", "4fa32bf2e9cddfcff64ee80e95a30368dbc25720", "fddeb44817163d10d2d2e8d03d3fd328d541f5e7" ], "paperAbstract": "Computing systems have become increasingly heterogeneous contributing to higher performance and power efficiency. However, this is at the cost of increasing the overall complexity of designing such systems. One key challenge in the design of heterogeneous systems is the efficient scheduling of computational load. To address this challenge, this paper thoroughly analyzes state of the art scheduling policies and proposes a new dynamic scheduling heuristic: Alternative Processor within Threshold (APT). This heuristic uses a flexibility factor to attain efficient usage of the available hardware resources, taking advantage of the degree of heterogeneity of the system. In a GPU-CPU-FPGA system, tested on workloads with and without data dependencies, this approach improved overall execution time by 16% and 18% when compared to the second best heuristic.", "pdfUrls": [ "http://scholarworks.rit.edu/cgi/viewcontent.cgi?article=10560&context=theses", "https://doi.org/10.1109/IPDPSW.2017.175" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b24754758249abf11d2e3f1391b1850c1b22990", "sources": [ "DBLP" ], "title": "Alternative Processor Within Threshold: Flexible Scheduling on Heterogeneous Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "5b4a19cf26c8c86a62c08cc80ac39945d14eb703": { "authors": [ { "ids": [ "35359605" ], "name": "Harenome Razanajato" }, { "ids": [ "1769333" ], "name": "C\u00e9dric Bastoul" }, { "ids": [ "1686527" ], "name": "Vincent Loechner" } ], "doi": "10.1109/HiPC.2017.00046", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00046", "entities": [ "Benchmark (computing)", "Compiler", "Data parallelism", "Dependence analysis", "Experiment", "Iteration", "Lifting scheme", "Locality of reference", "Map (parallel pattern)", "Multi-core processor", "OpenMP", "Parallel computing", "Polyhedral", "Polyhedron", "Polytope model", "Region of interest", "Shared memory" ], "id": "5b4a19cf26c8c86a62c08cc80ac39945d14eb703", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "338-347", "journalVolume": "", "outCitations": [ "67bbc63a7a5d4cd0d272dc20c2467cf7594e6181", "8171c702ae27a120606bcd9a2bf9fc8fa673bff5", "abd39d59ef78964c37237e9a7229ba46c2d8cdef", "233cfa61d01fcf26effc64508250cb90396e8a78", "123d0425c5a0fb8a02e80bcbb9e6491124b9e08e", "16de6f9e2bf6ee1068dbca8c9e5446295c904315", "c78b35807cbc4873b6019c7f27e1d3ad5d61b2a7", "b0fdf1394339f614258a146cfc6a771e45bbefc4", "9afa38c3b54ec57de27440555859a57850f1c365", "b0f9ea06d726935289456a75f739544826bb5c0d", "0e95e0ff4014053ac11fcbcee556eaab4dc1a92d", "04afd9817df2900a02f915203e01657b56a957cd", "760a82a2c23c94f40e2615c3981575dfc78c59fc", "af9aad2775b4b9045af689bdf084f4a4681ac18a", "756beea5a3f1db7debb22a2261cd2e3df52d0714", "cefb0499d006a4c0d105c4c96bf63ffb73753455", "7ee8b7ea7474a84ff9aad2b5d5936d127abfb63e", "3d64988b882541afa8f03f912229fbba49c82e74", "9285e8f7b82bd79a4c435a5a6a3ec47fc687479a", "25fd1c8f0c455c080fb6d2d1ff100e98656194ac", "34eb707be68286ed73f590a7ae68ab599501526a", "486986fb365f072146cb9648ab408b0c567ae019", "4acb52ccf69cd1e8972ac32df6cef4971c7db803", "5a3f0e14112d8cc14ba19e5ab8c33fc42487c4bf", "f0f4757aa2f923a349e8357e73850a78e9b80fee" ], "paperAbstract": "Nowadays best performing automatic parallelizers and data locality optimizers for static control programs rely on the polyhedral model. Polyhedral compilation consists of three phases: (1) abstracting the input code into a mathematical view; (2) analyzing and transforming this representation into an optimized alternative; (3) generating the corresponding code while ensuring it is semantically equivalent to the input code. During this last phase, state-of-the-art polyhedral compilers generate only one type of parallelism when targeting multicore shared memory architectures: parallel loops via the OpenMP omp parallel for directive. In this work, we propose to explore how a polyhedral compiler could exploit parallel region constructs. Instead of initializing a new set of threads each time the code enters a parallel loop and synchronizing them when exiting it, the threads are initialized once for all at the entrance of the region of interest, and synchronized only when it is necessary. Technically, we propose to embed the whole region containing parallel loops in an omp parallel construct. Inside the parallel region, the single construct is used when some code needs to be executed sequentially; the for construct is used to distribute loop iterations between threads. Thanks to the power of the polyhedral dependence analysis, we compute when it is valid to add the optional nowait clause, to omit the implicit barrier at the end of a worksharing construct and thus to reduce even more control overhead. Through a set of experiments on the PolyBench benchmarks, we show that resulting codes can overwhelm the performance obtained by the Pluto polyhedral compiler.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00046" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b4a19cf26c8c86a62c08cc80ac39945d14eb703", "sources": [ "DBLP" ], "title": "Lifting Barriers Using Parallel Polyhedral Regions", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "5b55a031cf33f285170568fcf44f4a006d20ecc2": { "authors": [ { "ids": [ "32577227" ], "name": "Rei Thiessen" }, { "ids": [ "3284421" ], "name": "Ondrej Lhot\u00e1k" } ], "doi": "10.1145/3062341.3062359", "doiUrl": "https://doi.org/10.1145/3062341.3062359", "entities": [ "Algorithm", "Context-free grammar", "Context-free language", "F-algebra", "Input/output", "Java", "Pointer (computer programming)", "Pointer analysis", "Reachability" ], "id": "5b55a031cf33f285170568fcf44f4a006d20ecc2", "inCitations": [], "journalName": "", "journalPages": "263-277", "journalVolume": "", "outCitations": [ "9ea49abc003a832776df864a92838b3b51f3e55e", "3ef15e17f7932c6d86eb06e3eb254e6dc621e029", "498abcf07adc6cea445b4a1c270f78041a4d7f43", "2194c3460ab71f3826db00b045b2ae590c753319", "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "0ca0fe955dc8b7bdea61f03a767f8b8a57ac51ee", "0ffaf68f4399998864d6d9835c7bd8240d322b49", "3e6d92ed139f19418c74cce6697fbd2de609138e", "44daa1fde25be30d21c4a1a32b7af314c9890af8", "00a9ba0063d34ec56792849a67ef57b4601becbb", "187768583aa8fd7dfe64cc88cb2aa831b6b531db", "8bfd64fe8f9192a8b3c801c7d91fd46cabfc5319", "067b7f06fdaa1aceeb3fff534fd1f2649303922f", "6c1fa64a6b5d3565fc4ab9ca97bf530069ce2669", "128985b85556c30ad405863f2a34340049957616", "148be7f718c3ff24491a617bd85bd92db09225e3", "a578530c785b14f54918720ee4acb672ffe3986e", "02584960fcf229817eb69ff5dc942d89ff9381be", "1f32cece629d41929e6913f3b445b93bf2c168ac", "33f21e89dd5d0895b8f65d82371be568d3e3d3fb", "d17bc006fe7d87c9f205b75318f6e6fbfc7813eb", "80af0dfde58a4f1e4f7ff35fa2c882a4ab3bbad2", "027eb436c35c7e293e7ebc565163cb54c05fe2e9" ], "paperAbstract": "Points-to analysis for Java benefits greatly from context sensitivity. CFL-reachability and k-limited context strings are two approaches to obtaining context sensitivity with different advantages: CFL-reachability allows local reasoning about data-value flow and thus is suitable for demand-driven analyses, whereas k-limited analyses allow object sensitivity which is a superior calling context abstraction for object-oriented languages. We combine the advantages of both approaches to obtain a context-sensitive analysis that is as precise as k-limited context strings, but is more efficient to compute. Our key insight is based on a novel abstraction of contexts adapted from CFL-reachability that represents a relation between two calling contexts as a composition of transformations over contexts. \nWe formulate pointer analysis in an algebraic structure of context transformations, which is a set of functions over calling contexts closed under function composition. We show that the context representation of context-string-based analyses is an explicit enumeration of all input and output values of context transformations. CFL-reachability-based pointer analysis is formulated to use call-strings as contexts, but the context transformations concept can be applied to any context abstraction used in k-limited analyses, including object- and type-sensitive analysis. The result is a more efficient algorithm for computing context-sensitive results for a wide variety of context configurations.", "pdfUrls": [ "http://plg.uwaterloo.ca/~olhotak/pubs/pldi17a.pdf", "http://doi.acm.org/10.1145/3062341.3062359" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b55a031cf33f285170568fcf44f4a006d20ecc2", "sources": [ "DBLP" ], "title": "Context transformations for pointer analysis", "venue": "PLDI", "year": 2017 }, "5b69e39d4f403e3c39ebee0742216e878fee110f": { "authors": [ { "ids": [ "39572993" ], "name": "Peng Sun" }, { "ids": [ "40096128" ], "name": "Yonggang Wen" }, { "ids": [ "1767765" ], "name": "Ta Nguyen Binh Duong" }, { "ids": [ "33285410" ], "name": "Xiaokui Xiao" } ], "doi": "10.1109/CLUSTER.2017.51", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.51", "entities": [ "Commodity computing", "Computation", "Edge computing", "Graph (abstract data type)", "Graph partition", "In-memory database", "Model of computation", "Out-of-core algorithm", "Server (computing)" ], "id": "5b69e39d4f403e3c39ebee0742216e878fee110f", "inCitations": [ "f84aa869a21f083133b74e23d83ab2dd1378b7ff" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "256-266", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "dc23ad6d4eb652718a2674486037454ec509eef5", "0ad8e89091eed09217e66adc98136126addc2619", "ee947a4654479e4098142c0369de7698c2e1475d", "5f3f9223c5c9f896be099bc177929febad508407", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "3486aeaf540c48952120fe853d672af984f40a6a", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "3726c60552263e648c6856679e672de2e1c110e5", "c0bbb56b4428e9a83d067c07054946293b475fe9", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "0c8ed7f86d881dffb82b24f718bece6cb0e5c76f", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "423befa4222b5b54cf63f0879e99243b0e5139b0", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "5b6c248974d8e4c1311863454e5c19b44d2aeb4a", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "859edc821f821b74fc9c818e45bcecb850603d07", "2def083fb7fd8f887c507c0b0b32bd921a26df9b", "5bd9374195809c73157ba876f463ea7c4ec9abb5", "1452f20140dba52b928c9be5f385b5ac35537a2c", "0e33dd74064b3d7659d9ab6301c21c0480cfda72", "0443504ed242c5b5de741785eeccfb3eac576e12", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "420a0e5fc398f197bca3dfe40291a82b2c65655a" ], "paperAbstract": "It is common for real-world applications to analyze big graphs using distributed graph processing systems. Popular in-memory systems require an enormous amount of resources to handle big graphs. While several out-of-core approaches have been proposed for processing big graphs on disk, the high disk I/O overhead could significantly reduce performance. In this paper, we propose GraphH to enable high-performance big graph analytics in small clusters. Specifically, we design a two-stage graph partition scheme to evenly divide the input graph into partitions, and propose a GAB (Gather-Apply-Broadcast) computation model to make each worker process a partition in memory at a time. We use an edge cache mechanism to reduce the disk I/O overhead, and design a hybrid strategy to improve the communication performance. GraphH can efficiently process big graphs in small clusters or even a single commodity server. Extensive evaluations have shown that GraphH could be up to 7.8x faster compared to popular in-memory systems, such as Pregel+ and PowerGraph when processing generic graphs, and more than 100x faster than recently proposed out-of-core systems, such as GraphD and Chaos when processing big graphs.", "pdfUrls": [ "https://arxiv.org/pdf/1705.05595v4.pdf", "https://arxiv.org/pdf/1705.05595v3.pdf", "https://arxiv.org/pdf/1705.05595v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.51", "https://arxiv.org/pdf/1705.05595v5.pdf", "http://arxiv.org/abs/1705.05595", "https://arxiv.org/pdf/1705.05595v2.pdf", "https://export.arxiv.org/pdf/1705.05595" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b69e39d4f403e3c39ebee0742216e878fee110f", "sources": [ "DBLP" ], "title": "GraphH: High Performance Big Graph Analytics in Small Clusters", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "5b73b0524f4d6660acc0c9a4b50590905ed9fa7d": { "authors": [ { "ids": [ "1931413" ], "name": "Srinivas Narayana" }, { "ids": [ "39118448" ], "name": "Anirudh Sivaraman" }, { "ids": [ "2335477" ], "name": "Vikram Nathan" }, { "ids": [ "1688438" ], "name": "Prateesh Goyal" }, { "ids": [ "39685882" ], "name": "Venkat Arun" }, { "ids": [ "2587719" ], "name": "Mohammad Alizadeh" }, { "ids": [ "32317135" ], "name": "Vimalkumar Jeyakumar" }, { "ids": [ "33742176" ], "name": "Changhoon Kim" } ], "doi": "10.1145/3098822.3098829", "doiUrl": "https://doi.org/10.1145/3098822.3098829", "entities": [ "Aggregate function", "Compiler", "Key-value database", "Network performance", "Network switch", "Processor design", "Query language", "Simulation", "Zig-zag in-line package" ], "id": "5b73b0524f4d6660acc0c9a4b50590905ed9fa7d", "inCitations": [ "63efcd0695d3de798e2743739c8b6a32a568fb84", "88b46e17199bfaa4cf65498bcaeced5284279b97", "45cd38b6e3a1915c590edeb5af68b4f12e695fb6", "caba95723cf0ac58091dbddf1edfd3fc485b0e3e", "726c2e6b8d7f97d9a3256fc08d17f6fe99cc1a7b", "464e28060b5969df11846da9f67b3f152f98f210", "b3f196f1c0923d9ceec7c43544e5cf468afaa21d", "51c78913dd6acb4c5667c71e188f1ddb3033b85c", "68fdee99eb19e21dc3aeb5b570dd6436e928bbd5", "4c27e01a4496a1af57cc8d757fc34e46cf17e5eb" ], "journalName": "", "journalPages": "85-98", "journalVolume": "", "outCitations": [ "f4fdaaf864ca6f73ced06f937d3af978568998eb", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "c678e962b158153924bbb24c4900b84375be7e57", "a512812f72321eb4b198cc11cbd6755bfa71aa1c", "7e7b6249b598d9a4c63394e3a2efd008268ae851", "089b10645ee63cd9c5bb4ab661141dd813408e15", "1f0ea586a80833ee7b27ada93cc751449c4a3cdf", "596e57e4ad70c8856391edd3bda70be46bb075ed", "0130c8c2c9bc7f64d9bf0aee5e0704bbeadfe9f3", "4fa181431ddeb9245f8fddfc1b4bdf5138648bbf", "9289860d43896b2d174a136eb56f03bb1b05e8d9", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "1fd4d92a9b44cfe0acc9b0d9bca3366ce134e439", "1dc94ea6beac4be0f1e4327fc81ac5c2f592d934", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "034b937edbff280dfdd7b2e98639655fd3587402", "7a278ee0578f194700cadc3811cdda4ec751f88a", "ca6d4e88bc08a0a7d39ced7db8bb5711eed8f2db", "7efe0dc4cde074bd87089491a6f95dde84397cca", "3f5dc20732d3af093fec0fe2806d3c63d0652682", "6c5462d31a0d0f4e6cb2ff7ae795250957d9fcab", "26a7c792938ae8e369532ce462d0da0ffadcdcca", "4004e51f8f6bb775bd394942007f761d42fdaaad", "044ce2a427c65d53f3d8279339b8eb6f020121c7", "025652412d507a8cf98ecacd8a44d32ce28995e1", "0f35b3fd2ef4638a23ee07db4057cc78365c982a", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "2e4ab1140b454fc6dacf4d23d3663aa34c741577", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "65da29a03c8905cbc0614612d1632864336c4786", "1aafc7066e52f18dee78103822da24a5d85da93c", "f06bb30589a0554b19f1544af0e018b50b974bf7", "047a8db8654292560b5d023a8ef61cd335938822", "0c3efd29012fd6c0df075d46307e55027ce5fd8b", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "5594c2ddde27f4262a53668ca9b09ad7a9453102" ], "paperAbstract": "Network performance monitoring today is restricted by existing switch support for measurement, forcing operators to rely heavily on endpoints with poor visibility into the network core. Switch vendors have added progressively more monitoring features to switches, but the current trajectory of adding specific features is unsustainable given the ever-changing demands of network operators. Instead, we ask what switch hardware primitives are required to support an expressive language of network performance questions. We believe that the resulting switch hardware design could address a wide variety of current and future performance monitoring needs.\n We present a performance query language, Marple, modeled on familiar functional constructs like map, filter, groupby, and zip. Marple is backed by a new programmable key-value store primitive on switch hardware. The key-value store performs flexible aggregations at line rate (e.g., a moving average of queueing latencies per flow), and scales to millions of keys. We present a Marple compiler that targets a P4-programmable software switch and a simulator for high-speed programmable switches. Marple can express switch queries that could previously run only on end hosts, while Marple queries only occupy a modest fraction of a switch's hardware resources.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098829", "http://cs.nyu.edu/~anirudh/marple_sigcomm.pdf", "http://nms.csail.mit.edu/papers/marple.pdf", "http://web.mit.edu/~alephtwo/www/papers/marple-sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5b73b0524f4d6660acc0c9a4b50590905ed9fa7d", "sources": [ "DBLP" ], "title": "Language-Directed Hardware Design for Network Performance Monitoring", "venue": "SIGCOMM", "year": 2017 }, "5bce1ce2f544f93cb2ad55ffc2f43b588b204584": { "authors": [ { "ids": [ "2467321" ], "name": "Ashiwan Sivakumar" }, { "ids": [ "2107248" ], "name": "Chuan Jiang" }, { "ids": [ "2249019" ], "name": "Yun Seong Nam" }, { "ids": [ "3232007" ], "name": "Shankaranarayanan Puzhavakath Narayanan" }, { "ids": [ "34945576" ], "name": "Vijay Gopalakrishnan" }, { "ids": [ "32782897" ], "name": "Sanjay G. Rao" }, { "ids": [ "35049639" ], "name": "Subhabrata Sen" }, { "ids": [ "1790166" ], "name": "Mithuna Thottethodi" }, { "ids": [ "1717864" ], "name": "T. N. Vijaykumar" } ], "doi": "10.1145/3117811.3117827", "doiUrl": "https://doi.org/10.1145/3117811.3117827", "entities": [ "Approximation algorithm", "JavaScript", "Program slicing", "Scalability", "Web page", "Web server", "World Wide Web" ], "id": "5bce1ce2f544f93cb2ad55ffc2f43b588b204584", "inCitations": [], "journalName": "", "journalPages": "448-461", "journalVolume": "", "outCitations": [ "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "b380ea472bf02fcac9e814a717c61fcea1eb81f9", "4daf1e0aeca142a23f816bd73daf2f86ab2c5c52", "ac596d19aa9d8d129b04f1c0cc9795cd149f64ae", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "67972276329a51525048b9cd10c4649e03efb9b5", "46db0ab63c2d7db3aff7fedcc0fb8f04fb4105e4", "539037fd93c7c14eeb6bb189414906dab30f4c56", "4074cbed01f2a34f6e88cdaf48fadaaecb177500", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "cb4160990391c9069ef08a262468d21171beae91", "99b2348bc0a4425294dedba612de72cef0b63402", "36275d197118b73f8244828cb3d190617f924dcc", "ef7e8e7fced3035182cd69715b2c23c65128230d", "307514e25f67899c679c8d70f31667d338556ebd", "05cd49dca40332e85ed5f2d4cb8bde7d5970519c", "e68b642704709bf9622d7aad526a57e61b8a5c8d", "430cd2b1c08aa86bb4aef152ee2ca764c5342c3e", "103eef2be0295f4a26e0d5043c95b9a5c8323975", "6566d98a370ee01ad78c12ec4471bb5ffbe7a8ab", "0507b04c131f2244524fda97cd1707af5760216e", "31f87a5e05a6a3bd380bf979ec2db45d7a3ab971", "143481d55d9f9d25e53f06a6afaf15feb7430c62", "16d0a8ee484f4a34e1cdcda8a0c2453e2e962ada", "ae283ece337c29412103d969f56c724c91d424a3", "02f5ad096e6273f4ac67c99a021356377a37e779", "373f39a4defdb668bcfd01ee359d93365328686b", "5fad167381681bb5a53b64311ca0faea550eb4e4", "1aaea3bf77dfa69605cf7d243fc6a8255d11aae9", "6911b420bb4eb3004dbb6a89c94dfd524bcf0074", "3711d6beb5c0c427d3306a3f979ae04968df2cc4", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "0e35b24801406bcd632874134632e353b03850a4", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "dbd37f3a678dad731ca1783e988bea12e715af0a", "08bb5149cc215c0714492b407145bbc93006f44c", "337e4b7f57ccbb7485950b93da9c5bb4ec4dc9ad", "03d83007173d5bdd55e5b2894a53352b52e9201e" ], "paperAbstract": "Despite much recent progress, Web page latencies over cellular networks remain much higher than those over wired networks. Proxies that execute Web page JavaScript (JS) and push objects needed by the client can reduce latency. However, a key concern is the scalability of the proxy which must execute JS for many concurrent users. In this paper, we propose to scale the proxies, focusing on a design where the proxy's execution is solely to push the needed objects and the client completely executes the page as normal. Such redundant execution is a simple, yet effective approach to cutting network latencies, which dominate page load delays in cellular settings. We develop whittling, a technique to identify and execute in the proxy only the JS code necessary to identify and push the objects required for the client page load, while skipping other code. Whittling is closely related to program slicing, but with the important distinction that it is acceptable to approximate the program slice in the proxy given the client's complete execution. Experiments with top Alexa Web pages show NutShell can sustain, on average, 27\\% more user requests per second than a proxy performing fully redundant execution, while preserving, and sometimes enhancing, the latency benefits.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117827", "https://engineering.purdue.edu/~isl/papers/com110.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5bce1ce2f544f93cb2ad55ffc2f43b588b204584", "sources": [ "DBLP" ], "title": "NutShell: Scalable Whittled Proxy Execution for Low-Latency Web over Cellular Networks", "venue": "MobiCom", "year": 2017 }, "5bf2414cb01317cd1664c896f1f1da735443a0f5": { "authors": [ { "ids": [ "3214124" ], "name": "Jinhong Jung" }, { "ids": [ "2867343" ], "name": "Namyong Park" }, { "ids": [ "3130706" ], "name": "Lee Sael" }, { "ids": [ "1734930" ], "name": "U. Kang" } ], "doi": "10.1145/3035918.3035950", "doiUrl": "https://doi.org/10.1145/3035918.3035950", "entities": [ "Computer data storage", "Data mining", "Data pre-processing", "Experiment", "Iterative method", "Preprocessor", "Running with Rifles", "Scalability", "The Matrix" ], "id": "5bf2414cb01317cd1664c896f1f1da735443a0f5", "inCitations": [ "eba4a67229413f9eb703d26f64ef530899150ad5", "816ff95fe2f1d0e9234348164fa31bcf271e8be9", "33cd7edfd054cf1d724884dc301e96a10be532cf", "40cb86ffbc0c127bcae3137138da10f217716779", "73916c66a9e409f1636935ed37699242f035a0f2" ], "journalName": "", "journalPages": "789-804", "journalVolume": "", "outCitations": [ "eb82d3035849cd23578096462ba419b53198a556", "468c3b2bf358d07cc625b075f91595d825299948", "1a4edf228e648b54fab3f25df82817cfd0a5bb42", "07ad62b6b5da5f226c88549378886ca062e207a0", "9ebf0f5f36d1504e96d147f38e5b9e21d7d28825", "6c15b76f9018e24cbb16909a9d77df24da948ab9", "607c8ae7b868015ea2deb61969d7e38988de8ca1", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "3ad743e436bb8749b7f750e4b316550a9d124bac", "fec9ce47524e65b89c20d4dc1671c5b1a7a0a41d", "4f9df283f1dac228fbbd3efd50765ecb317efe6c", "1b348075d02cc532b1a01955e21ba3062e769113", "3f789c5902f6094fe2db402d73cf81aeb3bacc69", "2eed842c2cf908df5c5d9c1c1e0a160ecd881679", "631e721376e844a016ffe18a8a9af3d75766f91c", "3e1e5a5edd5858d906b49363984a3e3659fb9478", "408cd9103f2d7cdafce2f6b984035b2be0ed9b7d", "62ffd399b939f09430b987d59afcf1c8b5ba28a7", "a1b297de50f1678dd97e36b13559627640354914", "b3ca8fb21ef2e12b4aba555230559d632c1e3ea3", "0d06de003e8ca949b3b39f9a51750c050addb997", "cd73fb6b77c94842e58486cd40cd85d40b18b4a3", "41a70062d260feb62e0ae64acf252a839c0bbd61", "45f615637d9bdbc1c20b5ac2440dd5271be55a9d", "7664b703efab1eb2815b94b02f2ec31e534ca9cb", "7bb9bab74df4d2939bbdf41fc33027b59e0f229e", "14815c67e4d215acf9558950e2762759229fe277", "183c44d2b9ac64e8c795464f91ef98f1e3ba2ea3", "0ecdf0d9e33ae993d5f789c6e1bb410ba2fca0b8", "7fd78d448539d94e250aa9c08fe6a2a031f44f3f", "6f4c0ae207ec857e51cefc049de65c55ad61bb98", "1000dd39d230109762404983eb5666da4a777e72", "237dc6e66cc6691bf2245c02cc19a6e0c077fde9", "41e62b05a64225d209e9cd37775c3303fc6ce3c5", "050befa33b96aeec92f98cfe74b6b10702bc1250", "3105c03f6ee3135ac6b649ed6313ae0e6c0eb8fc", "1781fcd66c25d303a66eb980728b0da6ed1febbf", "f580162b0f1ca6e7cf03eb3c9cab1c10907a3a9c", "080ff994ebb101ac340e446344215f834eae0f6c", "b2de9228510f30df18d53c259ab67c977bdfea87", "735209db0198821865444ebc664d0571f9d9d19a", "13cc53a512ca45d229e9fe514ad0b30441d324d8", "acf2c6c0caa98461ae784a4054aa66968e7b6c58", "318567451fc86c9a02afd1172aba6d0ab2d5d814", "2c29b9a1b511219bc564c9b0deada00b50f5c79a", "871f43a70f3171768734e0c2add1f263b8f47a70", "29efbdf3f95cee97405accafdebd3bd374f1f003", "15244f9eb5af689f7a2ef9f746300db92bf79f39" ], "paperAbstract": "How can we measure similarity between nodes quickly and accurately on large graphs? Random walk with restart (RWR) provides a good measure, and has been used in various data mining applications including ranking, recommendation, link prediction and community detection. However, existing methods for computing RWR do not scale to large graphs containing billions of edges; iterative methods are slow in query time, and preprocessing methods require too much memory.\n In this paper, we propose BePI, a fast, memory-efficient, and scalable method for computing RWR on billion-scale graphs. BePI exploits the best properties from both preprocessing methods and iterative methods. BePI uses a block elimination approach, which is a preprocessing method, to enable fast query time. Also, BePI uses a preconditioned iterative method to decrease memory requirement. The performance of BePI is further improved by decreasing non-zeros of the matrix for the iterative method. Through extensive experiments, we show that BePI processes 100 times larger graphs, and requires up to 130 times less memory space than other preprocessing methods. In the query phase, BePI computes RWR scores up to 9 times faster than existing methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035950", "http://www3.cs.stonybrook.edu/~sael/paper/Jung%20et%20al.%20-%202017%20-%20BePI.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5bf2414cb01317cd1664c896f1f1da735443a0f5", "sources": [ "DBLP" ], "title": "BePI: Fast and Memory-Efficient Method for Billion-Scale Random Walk with Restart", "venue": "SIGMOD Conference", "year": 2017 }, "5c542a5a7de6c260a0665ffe173a0632249c2507": { "authors": [ { "ids": [ "22709779" ], "name": "Rafa\u00ebl del Pino" }, { "ids": [ "2538633" ], "name": "Vadim Lyubashevsky" }, { "ids": [ "1788020" ], "name": "Gregory Neven" }, { "ids": [ "33308902" ], "name": "Gregor Seiler" } ], "doi": "10.1145/3133956.3134101", "doiUrl": "https://doi.org/10.1145/3133956.3134101", "entities": [ "Amortized analysis", "Commitment scheme", "Privacy", "Quantum", "Quantum computing", "Randomness", "Zero-knowledge proof" ], "id": "5c542a5a7de6c260a0665ffe173a0632249c2507", "inCitations": [ "d343d5aceb3139688ce65fdf6ea989920ffca28c", "da55f0f1928aadc371bf0ca5aa7c3d7a6c574935" ], "journalName": "", "journalPages": "1565-1581", "journalVolume": "", "outCitations": [ "f31411fd1fb0078f8e3e277f1df6a462ce7ce6a4", "06c9056243849b320286560f76fffa979381995f", "366cee7fe44effe5ad5c1d5ad398856d89e0c792", "a76ad055c7a5c7bc40982206e70ed08fbebc7608", "e1e819156dc7e54d280dfd4232a516228b83c2c5", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "9b7e7cbdebb606df1eaa8db31662c0a831f10b80", "d343d5aceb3139688ce65fdf6ea989920ffca28c", "6561db24a8cd65b88e8215d7e94c0f46c7234b23", "8d6650f252d2633f22b726f24336905483cb011c", "85fa3ee32a49584eb9d0d7949f43421aebfdfd72", "39bd1f1f75ca061985833f7f1d339ace60047f45", "7821ac7391c398aa94af65e3be9d01ee7cbfb1c2", "3cdae223c942c0ce876e92890849ceaa85752a6c", "092ec581fdeb47176065592a2706ddf574efdc29", "79a4259799db4239ddedf8b0b7dde1042e21ec80", "1087621ec8df519368110c6e6ee49f6d13a0dcb0", "81a23c602abb08187b34101269379d83a12634c0", "73659d3b1e0992b21dcb0ecb1e1d0b2de5896562", "51b2557aaa360cf5dfffb0765e2029653b2d8cd8", "353989647d2f76abd61a50fa8cc133574db1bcca", "d78491c564d3be43072ee6a6f337ee2b8e2b427e", "6871b95c14dccca7636b498b5d363a743c5288e6", "1d5a4ce2820bebf83006ad1f24bd28236b158af1", "026848a58faaa17a393a50d703257cc2496659ad", "761ad28344eb146f8579f7dc79eacf31422e23be", "a00f57eafa0425c6f4d3f5cc47244ea282a83c6b", "13a4235d952dae805b3614969c89494f53f4dea0", "3e7d36821fa5ab37b3182b21521ef4a376c3bf40" ], "paperAbstract": "We propose a lattice-based electronic voting scheme, EVOLVE (Electronic Voting from Lattices with Verification), which is conjectured to resist attacks by quantum computers. Our protocol involves a number of voting authorities so that vote privacy is maintained as long as at least one of the authorities is honest, while the integrity of the result is guaranteed even when all authorities collude. Furthermore, the result of the vote can be independently computed by any observer. At the core of the protocol is the utilization of a homomorphic commitment scheme with strategically orchestrated zero-knowledge proofs: voters use approximate but efficient \"Fiat-Shamir with Aborts\" proofs to show the validity of their vote, while the authorities use amortized exact proofs to show that the commitments are well-formed. We also present a novel efficient zero-knowledge proof that one of two lattice-based statements is true (so-called OR proof) and a new mechanism to control the size of the randomness when applying the homomorphism to commitments. We give concrete parameter choices to securely instantiate and evaluate the efficiency of our scheme. Our prototype implementation shows that the voters require $8$ milliseconds to submit a vote of size about $20$KB to each authority and it takes each authority $0.15$ seconds per voter to create a proof that his vote was valid. The size of the vote share that each authority produces is approximately $15$KB per voter, which we believe is well within the practical bounds for a large-scale election.", "pdfUrls": [ "https://eprint.iacr.org/2017/1235.pdf", "http://doi.acm.org/10.1145/3133956.3134101", "http://eprint.iacr.org/2017/1235" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5c542a5a7de6c260a0665ffe173a0632249c2507", "sources": [ "DBLP" ], "title": "Practical Quantum-Safe Voting from Lattices", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "5c6041a3f1e8e47b5fe92ca4cfc42464e82e89cd": { "authors": [ { "ids": [ "1722599" ], "name": "Tayyar Rzayev" }, { "ids": [ "1752578" ], "name": "David H. Albonesi" }, { "ids": [ "2539134" ], "name": "Fran\u00e7ois Guimbreti\u00e8re" }, { "ids": [ "1808537" ], "name": "Rajit Manohar" }, { "ids": [ "1831764" ], "name": "Jaeyeon Kihm" } ], "doi": "10.1109/ISPASS.2017.7975289", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975289", "entities": [ "Display device", "Hot-carrier injection", "Human\u2013computer interaction", "Mobile device", "Requirement", "Simulation", "User interface" ], "id": "5c6041a3f1e8e47b5fe92ca4cfc42464e82e89cd", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "173-184", "journalVolume": "", "outCitations": [ "3538739741d7dd27788ef35d9f0900adb070d6f8", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "2a25ac43574a0dc240cd23b4c542493c85cc0be5", "8cfa975a656838356dc4b211b6c2186bc2601a05", "21884ac905304b240ece4321ac2ee05874926ed1", "50f62cfd46df89f34e975e9e2858100f477b652d", "2857fd5657b58701dc6545ae9a1871999b9fdf30", "dc6e7f9d811c5d04cb781685550978a3e0565e08", "373d1d260d90760634832840961f96049f1a545e", "9589b8a1df5b4c8e0e81784185e31265b0d096b7", "743db4cdd9d7c57a8448fdb3417a69590862af90", "020f76e207dc023fea856987add3b7fee7f9c44a", "233cf6ae7b91b7dd11a6bd44c3079b8256905396", "735987e4150c5c208dd2bb9f320093dc65c0be06" ], "paperAbstract": "The advent of high speed input sensor and display technologies and the drive for faster interactive response suggests that human-computer interaction (HCI) task processing deadlines of a few milliseconds or less may be required in future handheld devices. At the same time, users will expect the same, if not better, battery life than today's devices under these more stringent response requirements. In this paper, we present a toolbox for exploring the design space of HCI event processors. We first describe the simulation platform for interactive environments that runs mobile user interface code with inputs recorded from human users. We validate it against a hardware platform from prior work. Given system-level constraints on latency, we demonstrate how this toolbox can be used to design a custom heterogeneous event processor that maximizes battery life. We show that our toolbox can pick design points that are 1.5–2.5x more energy-efficient than general-purpose big. LITTLE architectures.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975289" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5c6041a3f1e8e47b5fe92ca4cfc42464e82e89cd", "sources": [ "DBLP" ], "title": "Toolbox for exploration of energy-efficient event processors for human-computer interaction", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "5c62dcd5540d7cf8a600f4a49598b63428fd6908": { "authors": [ { "ids": [ "2543728" ], "name": "Suphannee Sivakorn" }, { "ids": [ "1729224" ], "name": "George Argyros" }, { "ids": [ "40428350" ], "name": "Kexin Pei" }, { "ids": [ "1720824" ], "name": "Angelos D. Keromytis" }, { "ids": [ "39400201" ], "name": "Suman Jana" } ], "doi": "10.1109/SP.2017.46", "doiUrl": "https://doi.org/10.1109/SP.2017.46", "entities": [ "Algorithm", "Automata theory", "Automaton", "Black box", "Black-box testing", "Code coverage", "Corner case", "Deterministic finite automaton", "Internationalized domain name", "Java", "Library (computing)", "Machine learning", "Man-in-the-middle attack", "Public key certificate", "Python", "Regular expression", "Regular language", "Server (computing)", "Software bug", "Software verification and validation", "Template (C++)", "Transport Layer Security", "Verification and validation", "Wildcard character", "X.509" ], "id": "5c62dcd5540d7cf8a600f4a49598b63428fd6908", "inCitations": [ "d8d064bc613190e43f8c8fa5db6451a97d34e89f", "157810ccca1ab1acb815ca9c77afcd9040ecdd16", "32187449ad863fa01597b1a857ab5dc8677769cc", "f70b85bf33225fee9af21578c9cd70f7014e0b27", "65c6bda16861410915c4b50d2540c9d058a1bb57" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "521-538", "journalVolume": "", "outCitations": [ "eb6b4c9ad52658825947ec4717e901f420f5fa9a", "7eed7154d0590c1e6eb64d4ae7935a526deac4a4", "582302da008255ff515f05c3242f750878725745", "fb22f590e467a16bb90c5a61e7718f25461cf528", "c5ef07a9e8f24019a918ce6d882211e7f507b07b", "03086e02b706e6955735ed15603b1015334bc095", "d96e97876571131761501182e2a1363535b9a440", "5aac8e7cefc388e35a015d6ee551b429e9062429", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "13315d952a43c391bf4910271fc2582858e86e9e", "208ed7512ea84f22a004920ea0b4c475bc836abc", "114ef9d75f9b4c20853efb1f870d3f1517998056", "e5e1327ef05b629e5015631b562716ea2e024d1f", "0117b76648a294663ed15a3e77e7d80154a4b840", "05ae289245b5a9222a1a6fc3f36910c3cb0f4662", "a1843173909eaa253f5a7f147752c8cd4b0e5d71", "917d962341710f2d4d03659035f57d044a8dc70f", "226242629f3d21b9e86afe76b1849048148351de", "3f88107149efe6956ff3cbf45a1e708e12fc6dce", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "940ead3fccc58edbf7ee39da25895511de77c451", "16fad84b5cd76c403c94b16353fa6a4d64f19251", "15a8ac900e31326cca89b26305121ee8be724ed0" ], "paperAbstract": "SSL/TLS is the most commonly deployed family of protocols for securing network communications. The security guarantees of SSL/TLS are critically dependent on the correct validation of the X.509 server certificates presented during the handshake stage of the SSL/TLS protocol. Hostname verification is a critical component of the certificate validation process that verifies the remote server's identity by checking if the hostname of the server matches any of the names present in the X.509 certificate. Hostname verification is a highly complex process due to the presence of numerous features and corner cases such as wildcards, IP addresses, international domain names, and so forth. Therefore, testing hostname verification implementations present a challenging task. In this paper, we present HVLearn, a novel black-box testing framework for analyzing SSL/TLS hostname verification implementations, which is based on automata learning algorithms. HVLearn utilizes a number of certificate templates, i.e., certificates with a common name (CN) set to a specific pattern, in order to test different rules from the corresponding specification. For each certificate template, HVLearn uses automata learning algorithms to infer a Deterministic Finite Automaton (DFA) that describes the set of all hostnames that match the CN of a given certificate. Once a model is inferred for a certificate template, HVLearn checks the model for bugs by finding discrepancies with the inferred models from other implementations or by checking against regular-expression-based rules derived from the specification. The key insight behind our approach is that the acceptable hostnames for a given certificate template form a regular language. Therefore, we can leverage automata learning techniques to efficiently infer DFA models that accept the corresponding regular language. We use HVLearn to analyze the hostname verification implementations in a number of popular SSL/TLS libraries and applications written in a diverse set of languages like C, Python, and Java. We demonstrate that HVLearn can achieve on average 11.21% higher code coverage than existing black/gray-box fuzzing techniques. By comparing the DFA models inferred by HVLearn, we found 8 unique violations of the RFC specifications in the tested hostname verification implementations. Several of these violations are critical and can render the affected implementations vulnerable to active man-in-the-middle attacks.", "pdfUrls": [ "http://www.nsl.cs.columbia.edu/papers/2017/hvlearn.oakland17.pdf", "http://nsl.cs.columbia.edu/papers/2017/hvlearn.oakland17.pdf", "http://www.cs.columbia.edu/~suphannee/papers/sivakorn.sp2017.hvlearn.pdf", "https://doi.org/10.1109/SP.2017.46", "http://www.cs.columbia.edu/~angelos/Papers/2017/hvlearn.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5c62dcd5540d7cf8a600f4a49598b63428fd6908", "sources": [ "DBLP" ], "title": "HVLearn: Automated Black-Box Analysis of Hostname Verification in SSL/TLS Implementations", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "5c9a7ad7892c08656f54e96c280de25bd40389a9": { "authors": [ { "ids": [ "1856525" ], "name": "Alessio Conte" }, { "ids": [ "1728335" ], "name": "Donatella Firmani" }, { "ids": [ "22598405" ], "name": "Caterina Mordente" }, { "ids": [ "1737726" ], "name": "Maurizio Patrignani" }, { "ids": [ "1718274" ], "name": "Riccardo Torlone" } ], "doi": "10.1145/3097983.3098031", "doiUrl": "https://doi.org/10.1145/3097983.3098031", "entities": [ "Algorithm", "Clique (graph theory)", "Computation", "Experiment", "Objective-C" ], "id": "5c9a7ad7892c08656f54e96c280de25bd40389a9", "inCitations": [ "b7502ee674310ee4466b598708851f6caf73e9db" ], "journalName": "", "journalPages": "115-124", "journalVolume": "", "outCitations": [ "46676d4900c55bacb40efab31dd5fb1e3e234f11", "062a926dd4ba41264b27a0f29b06952ae0178d51", "92e76450a7ddc29f14279eb3a233c13ab680ce74", "cf212a42def6ec34bf8e0526df05b45490e3d504", "cb55a6bba52963f98238de3b2aa4655854d61056", "6d34a19e4191ee012854f70c445ea080541689fe", "0abc9eacf942396f4664e83fd84deb0e82983efd", "c846a973b8787a75755e99be946fbf5003684ae7", "a2f399ff1e3eb74cd813c8beb87816fced0af759", "10924940cff0dcfc16c18fde9e4e0cd034bac55c", "5d0d098dd284b74504d27af7c5e0e5c50f1b7fd4", "d41924cd5e725c3ce6d58032de38ed4c21bf2f5b", "1508502719f66259b2e65882c5b91c458cb8ff39", "384474a40381241d60b3728279e1212b68dc0e39", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "c5e8af623ecccdf0dcf2c8a1089c4f77a394f17b", "338cc445a8543fb7279a91de40fb921959e1bbef", "0238ff2cace6044d6453d6591061f6cf08960572" ], "paperAbstract": "K-plexes are a formal yet flexible way of defining communities in networks. They generalize the notion of cliques and are more appropriate in most real cases: while a node of a clique C is connected to all other nodes of C, a node of a k-plex may miss up to k connections. Unfortunately, computing all maximal k-plexes is a gruesome task and state-of-the-art algorithms can only process small-size networks. In this paper we propose a new approach for enumerating large k-plexes in networks that speeds up the search by several orders of magnitude, leveraging on (i) methods for strongly reducing the search space and (ii) efficient techniques for the computation of maximal cliques. Several experiments show that our strategy is effective and is able to increase the size of the networks for which the computation of large k-plexes is feasible from a few hundred to several hundred thousand nodes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098031" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5c9a7ad7892c08656f54e96c280de25bd40389a9", "sources": [ "DBLP" ], "title": "Fast Enumeration of Large k-Plexes", "venue": "KDD", "year": 2017 }, "5cb9f8d8f286cfcfce4be2d6e1ad09ca471199ac": { "authors": [ { "ids": [ "35410424" ], "name": "Rakesh Kumar" }, { "ids": [ "2748712" ], "name": "Jos\u00e9 Cano" }, { "ids": [ "3091010" ], "name": "Aleksandar Brankovic" }, { "ids": [ "1930410" ], "name": "Demos Pavlou" }, { "ids": [ "2781082" ], "name": "Kyriakos Stavrou" }, { "ids": [ "2129996" ], "name": "Enric Gibert" }, { "ids": [ "40200443" ], "name": "Alejandro Mart\u00ednez" }, { "ids": [ "40430154" ], "name": "Antonio Gonzalez" } ], "doi": "10.1109/ISPASS.2017.7975290", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975290", "entities": [ "Booting", "Central processing unit", "Clock rate", "Computer performance", "Enterprise architecture framework", "Instruction pipelining", "Microprocessor", "Pipeline (computing)", "Program optimization", "Project Denver", "Shattered World", "Simulation", "Software quality assurance" ], "id": "5cb9f8d8f286cfcfce4be2d6e1ad09ca471199ac", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "185-194", "journalVolume": "", "outCitations": [ "126d141375e17616f88efe8d33db829def39f348", "2017d5fb9bfb756544a2af5c59f8aaad239bb040", "2405ab404bdef20ff89d956fea63675af3f64feb", "3e91c01e72dcfc8579a5debfe7ba41d81c289e61", "d0903e4d0e04ea4809cb92cd9f7ab9367ec5b56f", "0884c53895371aaf2d3f5e5decb150323cbe3f9d", "3bf23f74bf33ed52f7c28587fab315610b27221a", "2d872409fa7d486802e609cfc4bbcce33e6a4b30", "423437335211cfd0fd61aad08b822ff349ecef3d", "85af385acead898d262ee01355b9884883a6552f", "761e83b066dd6e982d1b3a05b7ef1931a1d25cb9", "57384146f6b17780acf15abb85d14eff47b1b9b9", "0653e2ed9f683868cb4539eb8718551242834f6b", "dd9d7d5a75f36739420128402fdd2acfa0499b18", "0a65844b2e318305c7031eb53cb306efe7763d22", "28189dfeea9da4ed8e6cfa61b368c88afbbc68c9", "76d71b5b7bf4346b58b42c62d495b500c92c0731", "83287550c054e83dcf4e2b08e59f248803c84184", "2e175e5749bbc955ffc3f76b2217277398b39b41", "2960c89331eb7afa86584792e2e11dbf6a125820", "2549f9b455f75ebaaa3736208e319847140b705e", "989426f537434a690936fa847517914ba72324be", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "d167b5c8b21c642662000417f313798d375ff38e", "db39dc9f70d63a117dfc6e89a59fed5483532a5a", "11977fa3fc3a2cff35adb61e96d080a95aa8f290", "f29dac2e26273532c81c933f091c7a60b9480f94", "352a8957005dc5519b15ed1870751ec494d66395", "01ca2e1a1de5b0ceba2d962d19b6d89f36f9e02b", "0e00a3e0b0120dcdb89f0ee03534643090235ff5", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "04db091b542b68806222850daead11788acb6b68", "0856f6f40b889dba559f19654834114e9f469760", "19991b8201f5b447babcca43d699debafc262b52", "321b5800561979966e4f1c8e3a9b3b4880af3f75", "869f0d42d5a397f85bdade0f5086e2e1df602a71", "13a6c714cddeded37a69205f39935da4e7082f43", "bb6cedd67b26fce1f0d8eacb0357658c6831586d", "01ac84ffb4b7f575ea0705181795f4fd2368f519", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "f016d23ffca72cdf1eb584613452720eaacafd9c", "30dae96a2193f4582c7fd1b2113c881b0500b7dd", "7631275e3266f627df6cc29441f69ab9f5f2b1c6" ], "paperAbstract": "Improving single thread performance is a key challenge in modern microprocessors especially because the traditional approach of increasing clock frequency and deep pipelining cannot be pushed further due to power constraints. Therefore, researchers have been looking at unconventional architectures to boost single thread performance without running into the power wall. HW/SW co-designed processors like Nvidia Denver, are emerging as a promising alternative. However, HW/SW co-designed processors need to address some key challenges such as startup delay, providing high performance with simple hardware, translation/optimization overhead, etc. before they can become mainstream. A fundamental requirement for evaluating different design choices and trade-offs to meet these challenges is to have a simulation infrastructure. Unfortunately, there is no such infrastructure available today. Building the aforementioned infrastructure itself poses significant challenges as it encompasses the complexities of not only an architectural framework but also of a compilation one. This paper identifies the key challenges that HW/SW codesigned processors face and the basic requirements for a simulation infrastructure targeting these architectures. Furthermore, the paper presents DARCO, a simulation infrastructure to enable research in this domain.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/33061455/DARCO_ISPASS17_1.pdf", "http://homepages.inf.ed.ac.uk/jcanore/pub/2017_arm-summit_poster.pdf", "http://homepages.inf.ed.ac.uk/jcanore/pub/2017_ispass.pdf", "https://doi.org/10.1109/ISPASS.2017.7975290", "http://www.ispass.org/ispass2017/slides/cano_codesign.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5cb9f8d8f286cfcfce4be2d6e1ad09ca471199ac", "sources": [ "DBLP" ], "title": "HW/SW co-designed processors: Challenges, design choices and a simulation infrastructure for evaluation", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "5cc544fe41975ec98844bcf1091193e8081abd50": { "authors": [ { "ids": [ "2538457" ], "name": "Maliheh Shirvanian" }, { "ids": [ "1707411" ], "name": "Nitesh Saxena" } ], "doi": "10.1145/3133956.3134013", "doiUrl": "https://doi.org/10.1145/3133956.3134013", "entities": [ "Checksum", "Crypto phone", "Dictionary", "End-to-end principle", "Feature phone", "Man-in-the-middle attack", "Mobile phone", "Real life", "Secure voice", "Usability", "Usability testing" ], "id": "5cc544fe41975ec98844bcf1091193e8081abd50", "inCitations": [], "journalName": "", "journalPages": "1329-1342", "journalVolume": "", "outCitations": [ "29d7c0ed8cd6c4eb5d07e8bfa82a834e7313fe00", "b0509c72b393c0f506c032a0c1778af88eaa96aa", "040c583694cd6b6e956fbfa87b87a4a752d188bb", "130116fec51cf4591a8eedee3955df989b918ff8", "cf7e8408e546c2a95ed33107eace1a72713bf94b", "3e6495d0e7d9f7ae9a0a6ceec887152e09a49df1", "54a8251af7c57d7c62cca3ccbc2e0c2d7295a0b4", "22410d40cc64428cbcd1028bf962dc41eb8a4ea8", "c1403a9424194fa7548ec16daff02cb56e10d55d", "90050f941252bef33bdef26f8627148f6a365556", "3def59613446cc6fe9130cf7e322e7f6eecf3bab", "48522085156d57503a6da09967b62ad4300b50fd", "003e8d2197146d561aa5a8dc343cc55efe5dce8f", "3f770cc7662340485f8fb328b3f2c95403a08e8d", "2b0119a0eec6e627e0fe5c1cd9a4386be6313fdc", "45489883192b2309ee8eaad9daf48ed52e588d8a", "9051225a58fbc88c494e02fe424c89aadb4e3c7c", "dc15a796eb2fabe6cc119b495765cdcdf8e4be98", "18e3fed48f6e7c7f0cbd50c531a9ed78e82857c2", "240eda216f697828783b44bb55a6384235ec18dc", "f6b91bd2d0d0f5d157d925d4427ff0a38792eec2", "6b5052607fa9ae9d48ec677b40ae91672d287985" ], "paperAbstract": "Crypto Phones aim to establish end-to-end secure voice (and text) communications based on human-centric (usually) short checksum validation. They require end users to perform: (1) checksum comparison to detect traditional data-based man-in-the-middle (data MITM) attacks, and, optionally, (2) speaker verification to detect sophisticated voice-based man-in-the-middle (voice MITM) attacks. However, research shows that both tasks are prone to human errors making Crypto Phones highly vulnerable to MITM attacks, especially to data MITM given the prominence of these attacks. Further, human errors under benign settings undermine usability since legitimate calls would often need to be rejected.\n We introduce Closed Captioning Crypto Phones (CCCP), that remove the human user from the loop of checksum comparison by utilizing speech transcription. CCCP simply requires the user to announce the checksum to the other party--the system automatically transcribes the spoken checksum and performs the comparison. Automating checksum comparisons offers many key advantages over traditional designs: (1) the chances of data MITM due to human errors and \"click-through\" could be highly reduced (even eliminated); (2) longer checksums can be utilized, which increases the protocol security against data MITM; (3) users' cognitive burden is reduced due to the need to perform only a single task, thereby lowering the potential of human errors.\n As a main component of CCCP, we first design and implement an automated checksum comparison tool based on standard Speech to Text engines. To evaluate the security and usability benefits of CCCP, we then design and conduct an online user study that mimics a realistic VoIP scenario, and collect and transcribe a comprehensive data set spoken by a wide variety of speakers in real-life conditions. Our study results demonstrate that, by using our automated checksum comparison, CCCP can completely resist data MITM, while significantly reducing human errors in the benign case compared to the traditional approach. They also show that CCCP may help reduce the likelihood of voice MITM. Finally, we discuss how CCCP can be improved by designing specialized transcribers and carefully selected checksum dictionaries, and how it can be integrated with existing Crypto Phones to bolster their security and usability.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134013", "https://info.cs.uab.edu/saxena/docs/ss-ccs17.pdf", "https://info.cs.uab.edu/saxena/docs/ss-ccs17-PP.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5cc544fe41975ec98844bcf1091193e8081abd50", "sources": [ "DBLP" ], "title": "CCCP: Closed Caption Crypto Phones to Resist MITM Attacks, Human Errors and Click-Through", "venue": "CCS", "year": 2017 }, "5cdc4b7bc36af36166f75454167e7ac1f95f6cf8": { "authors": [ { "ids": [ "22253682" ], "name": "Nimrod Raifer" }, { "ids": [ "2625830" ], "name": "Fiana Raiber" }, { "ids": [ "1708847" ], "name": "Moshe Tennenholtz" }, { "ids": [ "1779654" ], "name": "Oren Kurland" } ], "doi": "10.1145/3077136.3080785", "doiUrl": "https://doi.org/10.1145/3077136.3080785", "entities": [ "Document", "Game theory", "Hoc (programming language)", "Information retrieval", "Minimax", "Ranking (information retrieval)", "Regret (decision theory)", "Relevance", "World Wide Web" ], "id": "5cdc4b7bc36af36166f75454167e7ac1f95f6cf8", "inCitations": [ "e7ca665defaeaefb9836c80e882f1958fc81c287" ], "journalName": "", "journalPages": "465-474", "journalVolume": "", "outCitations": [ "2837e92e73390b9602d42e2848b6cf1f58c5833c", "11c08f3e0b56dab55b5031257e4727504bf688e3", "3ab8a8f9474e20ccac522a6da7b4edb451f12f29", "ca073f144a077e411e2dad36c72f68ef8f2a03d5", "38612e346fdf3158c32c16058f7e8820a8f0325e", "da87cc3aff92351b057ef26c79853feb9754ffe9", "1f0ba28a4123f71ffa045625d29088f8979945da", "7b5575c14916376f616ea18f86935b92228e6874", "d2e91d486ae52e4deb7712726141ecd4ada559cd", "ecbc640756898f20a297df137f1e8def0469f715", "678bafacd7c467d565c1d63d711e09b0fa390b02", "684be9e9bd41d148158c64ba811c08f66b58092a", "03a8cb23b78ae1e8662b226d96e4a0ac2bf5d3fd", "18e93fa7d408e9596992f3d63155cb92827839a4", "3dafdfc4d90abd7c6de642deb3afeb754602aef9", "3334a80676fefc486575bd2ddf1b281a640742f1", "9e51babaf392f2e5fc56861166a7f3331614e1fe", "3c50f1652c97fb3f032b864750c2982704727e93", "229b9e80568169ac90357428ed3cfadd3513c823", "18e39e75401ed1f39d8619c4c08afba0a122a267", "653eddac5447381e9a7e221498973296e8eb732c", "609baf0d38324287aea3eb1b01acbb294cf8fa5e", "2c68c7faa89b104b78e2850dbade5a81f0743874", "547dc350950d191130cb84d6dc9368540dd41c7b", "eab5199f4e60d5dac0e7a9aefdef9a753c77c4f1", "655b4421df24798e3aee30223974a6c95f1b9273" ], "paperAbstract": "In competitive search settings as the Web, there is an ongoing ranking competition between document authors (publishers) for certain queries. The goal is to have documents highly ranked, and the means is document manipulation applied in response to rankings. Existing retrieval models, and their theoretical underpinnings (e.g., the probability ranking principle), do not account for post-ranking corpus dynamics driven by this strategic behavior of publishers. However, the dynamics has major effect on retrieval effectiveness since it affects content availability in the corpus. Furthermore, while manipulation strategies observed over the Web were reported in past literature, they were not analyzed as ongoing, and changing, post-ranking response strategies, nor were they connected to the foundations of classical ad hoc retrieval models (e.g., content-based document-query surface level similarities and document relevance priors). We present a novel theoretical and empirical analysis of the strategic behavior of publishers using these foundations. Empirical analysis of controlled ranking competitions that we organized reveals a key strategy of publishers: making their documents (gradually) become similar to documents ranked the highest in previous rankings. Our theoretical analysis of the ranking competition as a repeated game, and its minmax regret equilibrium, yields a result that supports the merits of this publishing strategy. We further show that it can be predicted with high accuracy, and without explicit knowledge of the ranking function, whether documents will be promoted to the highest rank in our competitions. The prediction utilizes very few features which quantify changes of documents, specifically with respect to those previously ranked the highest.", "pdfUrls": [ "https://ie.technion.ac.il/~kurland/p465-raifer.pdf", "http://doi.acm.org/10.1145/3077136.3080785" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5cdc4b7bc36af36166f75454167e7ac1f95f6cf8", "sources": [ "DBLP" ], "title": "Information Retrieval Meets Game Theory: The Ranking Competition Between Documents? Authors", "venue": "SIGIR", "year": 2017 }, "5ceabf961f38caf48684f3ac7fe8e154929a32d0": { "authors": [ { "ids": [ "7604095" ], "name": "Chunhua Liao" }, { "ids": [ "1905021" ], "name": "Pei-Hung Lin" }, { "ids": [ "39557429" ], "name": "Joshua Asplund" }, { "ids": [ "1736921" ], "name": "Markus Schordan" }, { "ids": [ "33950551" ], "name": "Ian Karlin" } ], "doi": "10.1145/3126908.3126958", "doiUrl": "https://doi.org/10.1145/3126908.3126958", "entities": [ "Benchmark (computing)", "Intel Inspector", "Intrusion detection system", "Open-source software", "OpenMP", "Parallel computing", "Parallel programming model", "Program optimization", "Programmer", "Programming model", "Race condition", "Thread (computing)" ], "id": "5ceabf961f38caf48684f3ac7fe8e154929a32d0", "inCitations": [], "journalName": "", "journalPages": "11:1-11:14", "journalVolume": "", "outCitations": [ "0b84e66c171085b9665c2fbca4718bc9f888a4d0", "122379057de5c4ee9dc80fc5cd11925278fdc215", "a45adba59080ad625e3005c669345c3a96ad3e18", "52a244c6b74623f3a9aefc77ced08e6cc4cd0142", "e57c728502b78214c9750fe384e3764301f88430", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "24e6f34e499634393416ea09c1aadd37ec9e8542", "1eee336fbc45c89a45f240611bb706ebfd588126", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "00a9ba0063d34ec56792849a67ef57b4601becbb", "4a3e50b5179120339f4bd3136470f6a768841530", "4edd5bfd9d9e846ccfb1d1830fb5fdfa3ab2efce", "05a618847e4f08e5bca29dff732757779722b2e0", "771e3c7146213802ca8c4db0afbde51606293a71", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "4e6037127e85445079272e1cb5574cbcce2e175e", "38466b62bccbdacd3ef1dab4514a7c010e8f45df", "a88e2e8740416f35380fc664fcc201fb1014a08c", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "09d95e0b06d1174e4ac83c7354bb30877320a362", "86c8f8c0cad85b189feade4b31f36d56ebd9f6c8", "92a8e3696a9c0b5d0b225133132db1f8c3c4bed5", "9c665d0c8730b2f8a2a5df5dd2d673c11b665375", "70873b1edbd42334c4418ab83fc8aa179dcc52d9", "0958a63d9c6238b38377f076b487c413bc8642c1", "456e99b5a94abaa977f848528cbc11d3184fb344", "8a81748e4e9476575a2a7c9e353d4f8f133cd786", "36e68c73386109e583ebc5bf6c58fdee3642aa0e", "e557ac7484195f4ea8a40bec6bcf4f7c50809685", "ce6fe37c3d5f9ebbdf220413e15ca8ac9e2d5f62", "6345e0110f4fbb699910c5da100eb03210b23325", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "37d4838d1d320a17af7ec6f54d9fcef6000f6a66" ], "paperAbstract": "Data races in multi-threaded parallel applications are notoriously damaging while extremely difficult to detect. Many tools have been developed to help programmers find data races. However, there is no dedicated OpenMP benchmark suite to systematically evaluate data race detection tools for their strengths and limitations.\n In this paper, we present DataRaceBench, an open-source benchmark suite designed to systematically and quantitatively evaluate the effectiveness of data race detection tools. We focus on data race detection in programs written in OpenMP, the popular parallel programming model for multi-threaded applications. In particular, DataRaceBench includes a set of microbenchmark programs with or without data races. These microbenchmarks are either manually written, extracted from real scientific applications, or automatically generated optimization variants.\n We also define several metrics to represent effectiveness and efficiency of data race detection tools. Using DataRaceBench and its metrics, we evaluate four different data race detection tools: Helgrind, ThreadSanitizer, Archer, and Intel Inspector. The evaluation results show that DataRaceBench is effective to provide comparable, quantitative results and discover strengths and weaknesses of the tools being evaluated.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126958" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5ceabf961f38caf48684f3ac7fe8e154929a32d0", "sources": [ "DBLP" ], "title": "DataRaceBench: a benchmark suite for systematic evaluation of data race detection tools", "venue": "SC", "year": 2017 }, "5d05fea2a5db55753bb5c53654bfcf9692c3a6b1": { "authors": [ { "ids": [ "1788404" ], "name": "Swadhin Pradhan" }, { "ids": [ "2573539" ], "name": "Eugene Chai" }, { "ids": [ "1785674" ], "name": "Karthikeyan Sundaresan" }, { "ids": [ "40219427" ], "name": "Lili Qiu" }, { "ids": [ "32715239" ], "name": "Mohammad Amir Khojastepour" }, { "ids": [ "2083839" ], "name": "Sampath Rangarajan" } ], "doi": "10.1145/3117811.3117818", "doiUrl": "https://doi.org/10.1145/3117811.3117818", "entities": [ "Gesture recognition", "Nominal impedance", "Pervasive informatics", "Radio-frequency identification", "SwIPe (protocol)", "User interface" ], "id": "5d05fea2a5db55753bb5c53654bfcf9692c3a6b1", "inCitations": [ "022f3c00d60d7c691bd1ea944e1b15b99739982f", "04b26697e344a9696ab4b48b592f2b95a99cffde" ], "journalName": "", "journalPages": "261-274", "journalVolume": "", "outCitations": [ "c9b83bec6befa8fe8a1823ca44abdbb784377d99", "38c49df6243bb478a7d7ddc65dfa91923767b1e0", "8ac621c36d6b4f00160345a6f0bb4fbc7d18030f", "92b09fbf854caefdb465885b2ebd85d76331dcbf", "3317dd57f1a9a29d7f0a3f8cf82403bc775f5c1c", "8505794d1e082fe4a5bfce27e6c907903c73c62b", "1c940efb1e0b966c9c0eb7b335f5759e2d1b74f0", "bea6b5930c509cecac55e89b7632d50eeb9b764e", "82802e411495bbad77fa2415c6d4633dde180764", "7898ebfcd699f102ff4425c9c7c2d6520c3b6f91", "7ee628c1d2d22c041b220908471e768ad34cb892", "109d9276acd04716b9b041e50e4751ac3250a616", "616f79c610673dd45d730494b0070d338736612e", "991e13cf21bf2c7e7204fe2ded5ad29182c4e3e1", "3953ef3b790369ae9a2848bcb364040ffbcfeff7", "095b9569f2730e397ab2ba4c5c78676319d0630b", "5ec4ea65468de9291432eaf8b5f96b01f3dc8aea", "23d2c44cd47c1842f6ff32082462d293a5071d81", "3a50b26964a9b2b50bdf76fd91296d0f590b3d2f", "9515f64fa6c198220d603be2b1e686271ef263af", "a08e60e1e7f281a3990ee2371203a3b8cef1378f", "fcda707938bf666cdda492754c9f17db346713d6", "57cf0c39b85edd602fe5818639289aeb377e4f93", "5a2b49da8b9f883a14cac4e507d43221f282e077", "bd316a09d703c8f34642d7ae69a967aa5f4a8123", "69e6c6aa4a207eb81be43949edc8af7d6b4782d1", "5d394569619773e4275f3009c838e249cb613928", "641bb9c05fbd54bb6f6f24c247581b33a11dafa4", "62427ead0e700c32abcbbb84d4c2b5714296f677", "8aea613645b3cf811d6c37811e34e8316ad972f1", "ed01790bd8a65cfb324eff32531c32e529abcb70", "ce891a64360da7ac164fdfd333c5380dc364e543", "33b0189b59e4a3f2119487358a8e1f028875dd50", "3c552ebdcca7451ff1e4476b331504680d6b829c", "c8df710db701cc08f148cad90dc55ddc5d7f46c0", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "c64d84d6060459761d7f4f958640efa99b2bdac8", "1c409c370b95ce770eccda97563d72d304f46a6c", "98b52004fe8a3cb4b961c6fa52068699b3fa7264", "3bb76c2989cb4aeae3b20f42e619a862f0d871ca", "416312ca32d815aa3ce73e64bf4e8daf1845d74c", "187a7c39124e752edc0094a2783182df7e3d480c", "1b148b263743ae21ba53c52d33ee90c18828bf83", "3e8b2e82d68874c8bc692954b3a61b20c83920f8" ], "paperAbstract": "In this paper, we design and develop RIO, a novel battery-free touch sensing user interface (UI) primitive for future IoT and smart spaces. RIO enables UIs to be constructed using off-the-shelf RFID readers and tags, and provides a unique approach to designing smart IoT spaces. With RIO, any surface can be turned into a touch-aware surface by simply attaching RFID tags to them. RIO also supports custom-designed RFID tags, and thus allows specially customized UIs to be easily deployed into a real-world environment. RIO is built using the technique of impedance tracking: when a human finger touches the surface of an RFID tag, the impedance of the antenna changes. This change manifests as a change in the phase of the RFID backscattered signal, and is used by RIO to track fine-grained touch movement over both off-the shelf and custom built tags. We study this impedance behavior in-depth and show how RIO is a reliable UI primitive that is robust even within a multi-tag environment. We leverage this primitive to build a prototype of RIO that can continuously locate a finger during a swipe movement to within 3 mm of its actual position. We also show how custom-design RFID tags can be built and used with RIO, and provide two example applications that demonstrate its real-world use.", "pdfUrls": [ "http://www.cs.utexas.edu/~swadhin/papers/rio_cameraready_mobicom17_swadhin.pdf", "http://doi.acm.org/10.1145/3117811.3117818", "http://www.cs.utexas.edu/~swadhin/ppts/rio-mobicom17-swadhin.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d05fea2a5db55753bb5c53654bfcf9692c3a6b1", "sources": [ "DBLP" ], "title": "RIO: A Pervasive RFID-based Touch Gesture Interface", "venue": "MobiCom", "year": 2017 }, "5d2ace0093bca0748a5d4941b105ccd022efbca9": { "authors": [ { "ids": [ "2436656" ], "name": "Daoyuan Wu" }, { "ids": [ "25758828" ], "name": "Rocky K. C. Chang" }, { "ids": [ "1743225" ], "name": "Weichao Li" }, { "ids": [ "28076389" ], "name": "Eric K. T. Cheng" }, { "ids": [ "38580953" ], "name": "Debin Gao" } ], "doi": "", "doiUrl": "", "entities": [ "ASEA IRB", "Android", "Crowdsourcing", "Landline", "Network performance", "Programming paradigm", "Smartphone" ], "id": "5d2ace0093bca0748a5d4941b105ccd022efbca9", "inCitations": [ "4ef7bda1225d86eb0cc12a116ae3b24ca3a1d115" ], "journalName": "", "journalPages": "445-457", "journalVolume": "", "outCitations": [ "8729fa0c0217d6a5984e7d1778313e392685fc43", "7dc4eeec47a324d578f86169019b794c2537a344", "062bd67c240a7710225fcaf2e236eebafa94eecb", "475323cde4293723ac53fc3a8a3749bb82432268", "546c0cfed69f188a0ca661c8db9b099f554a63d1", "3502bd2f8bfad6fed7a541c3c78faaf9cc2d1147", "5e817f82f57800095de625c69a2993bf23770e9a", "b11cd7a0ccf98b71c1d9e46fa89a4708c9efdcc6", "841bf3cd10f63737d95979cb5648625cb308c394", "023f23c300804754753cb11db51fb7f582556ab7", "5e360dae24e4ceff57bea7ab59e5aae83b1e2d8a", "573c6cc9ad23fabd855e2a3e50842e3acf18a160", "0c92fc4a70eda75480e8d96630d5e3f7aba830f4", "a3eec7c9315fc077930cdd13850a540a808931b8", "e33b4c25099cc8361c5984da4b3ae95356d9ce01", "098cc8b16697307a241658d69c213954ede76d59", "75324c08d87287bb549456eb716f20b2baae1102", "094cca7a7bbfa274975e58f32d392404871ca2e5", "2f74156349d454fd054f18f23c305613df19d28b", "3e364e301f026a197fde0608481dfa2c09e85b7b", "6f79e8e6ae8c2fbba094d479a7d636cb1f28a614", "1f79775b58072a2ab484aad798aec0c9c7fa8605", "8e22e3677a962e93083513e428701840beb55e5e", "2a90a9212ae78bbbb6c7168f4c22bef04f4e5bc9", "036277d492dd5777e87e5b33ffd809e5c617a37a", "13bbf077589b0741e7f4e6265f1d2be75e3627db", "23f33836c4b24252e4fa30bef6d34ba4a021872e" ], "paperAbstract": "Crowdsourcing mobile user\u2019s network performance has become an effective way of understanding and improving mobile network performance and user qualityof-experience. However, the current measurement method is still based on the landline measurement paradigm in which a measurement app measures the path to fixed (measurement or web) servers. In this work, we introduce a new paradigm of measuring per-app mobile network performance. We design and implement MopEye, an Android app to measure network round-trip delay for each app whenever there is app traffic. This opportunistic measurement can be conducted automatically without user intervention. Therefore, it can facilitate a large-scale and long-term crowdsourcing of mobile network performance. In the course of implementing MopEye, we have overcome a suite of challenges to make the continuous latency monitoring lightweight and accurate. We have deployed MopEye to Google Play for an IRB-approved crowdsourcing study in a period of ten months, which obtains over five million measurements from 6,266 Android apps on 2,351 smartphones. The analysis reveals a number of new findings on the per-app network performance and mobile DNS performance.", "pdfUrls": [ "http://arxiv.org/abs/1703.07551", "https://www.usenix.org/system/files/conference/atc17/atc17-wu.pdf", "https://arxiv.org/pdf/1703.07551v2.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/wu", "https://arxiv.org/pdf/1703.07551v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5d2a/ce0093bca0748a5d4941b105ccd022efbca9.pdf", "s2Url": "https://semanticscholar.org/paper/5d2ace0093bca0748a5d4941b105ccd022efbca9", "sources": [ "DBLP" ], "title": "MopEye: Opportunistic Monitoring of Per-app Mobile Network Performance", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "5d4fb00b737479edeb381c88e85d6b8fc34db98d": { "authors": [ { "ids": [ "1721849" ], "name": "Hao Chen" }, { "ids": [ "39763956" ], "name": "Kim Laine" }, { "ids": [ "3433542" ], "name": "Peter Rindal" } ], "doi": "10.1145/3133956.3134061", "doiUrl": "https://doi.org/10.1145/3133956.3134061", "entities": [ "32-bit", "Communication complexity", "Computation", "Computational complexity theory", "Cryptography", "Encryption", "Hash function", "Homomorphic encryption", "Interactivity", "Mobile phone", "Oblivious transfer", "Overhead (computing)", "Preprocessor" ], "id": "5d4fb00b737479edeb381c88e85d6b8fc34db98d", "inCitations": [ "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69", "36a4cec20880066c1bee18de9258622b8ab30556", "479edcfc6e36da2ce134d02ddeff56523c85cc45", "0fdbd34a34766300dfe169f67cf419d892f8ea0f" ], "journalName": "", "journalPages": "1243-1255", "journalVolume": "", "outCitations": [ "1890cecdbba895fbcf975c4aef1616e184e69abb", "1554b04035dff3d4c6db8dc3c392e45366db4fdc", "4bd548d75200be6717c75590b6329a2e6cdc04ac", "39bd1f1f75ca061985833f7f1d339ace60047f45", "627d863ce5bb56de50a4cfc36f8f6c526c8eec37", "71e166a85195362cd48311cfb473debc1614602a", "536754e19b8b2850497069a6e9c6b75d368621d4", "7821ac7391c398aa94af65e3be9d01ee7cbfb1c2", "6e77caeabde0e5825e5fd02c43b7f75cab67b689", "2040ae54015f2b241c338d16c8c8a04832b695c5", "14720266a35ced804438cdf06bc8d151e7e9903c", "0e2be1b38467743228f933cfeaac90a2baa3fdf2", "6d1ca1108d9d96e5607571502552ad04464d7f15", "0bb51d5e3a2e779d7515ee553bccd326bfc43912", "3181b9ce21265bbf8175314714e1535f75b3d80f", "17ccac4c338330e6db889fedfc11af15bb109704", "454b276753de1144fab3feb8d9d38b396c2a8b3c", "3c8722737ef9f37b7a1da6ab81b54224a3c64f72", "7e3b803b0521e869b821a38583a2d442a551cdad", "45104cef3fc97f6c92f3fdbba3629ac3b590aae9", "d8c1b48ae4d6e4676d060c06087bb6b1ac81a005", "0b5e491fbeb009949dc40b656b8caea3e039ea55", "72254a4ccde7776abc07c406bacaae308783414e", "3cfc0b1e3c19ffb422f0c98754c382a9d8fbbc0b", "45f4a6c915709c734e034faae7f94683d4bccbcf", "55bd40da846d466f84798b36fe8b2a0f20eca875", "a9d07270be6e48448ef17b348f3455d76ea1d68f", "3e426349f0cf3a65b502be05ebca23e693ec03fd", "3307252aebaf39d3f9f93833e6303d9713577fc7", "62a1092c80a4dc6f93b631f6a87c4836f40c48da", "0faddfa1cc6c74d30b82ba32bcc4a2ee27fe31bf", "03c5ab0f31220b29d3b8eb60637a7a8140fd87fc", "531f8e756ea280f093138788ee896b3fa8ca085a", "03c1711090d76cc9163e238686786a71c028377e", "9682ac586142b1bf676fbf5b43abe51f1f79a8da", "012b8a941e96594783fb10d3a785e91f13384413", "6eccb6635eb1d6de72415331794e3b3530811800", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "16d23baa55835434808a3420e0884e0dc44680f6", "497bc6f8cdd2cb16c52d02208b4f3c82c6e72e61", "8d63532f254c77549b40e232611948d36c85150b", "11f093a54c40d8ea8336d8e575d5ab717e0fbb51", "202991f6f82d3931de13fc0019ef9e3c07bc11e5", "2268405872a4410eb9cc1e9a2ddcbb4c1e81ee58", "0d142cc4e3b721e256848d39f3c54d2f2259f20d", "544282f5b1b23b8273b6332bd09504f7bca5da5d", "9f17818d52ddbb13998097a87964a14731b8849d", "6871b95c14dccca7636b498b5d363a743c5288e6" ], "paperAbstract": "Private Set Intersection (PSI) is a cryptographic technique that allows two parties to compute the intersection of their sets without revealing anything except the intersection. We use fully homomorphic encryption to construct a fast PSI protocol with a small communication overhead that works particularly well when one of the two sets is much smaller than the other, and is secure against semi-honest adversaries.\n The most computationally efficient PSI protocols have been constructed using tools such as hash functions and oblivious transfer, but a potential limitation with these approaches is the communication complexity, which scales linearly with the size of the larger set. This is of particular concern when performing PSI between a constrained device (cellphone) holding a small set, and a large service provider (e.g. WhatsApp), such as in the Private Contact Discovery application.\n Our protocol has communication complexity linear in the size of the smaller set, and logarithmic in the larger set. More precisely, if the set sizes are Ny < Nx, we achieve a communication overhead of O(Ny log Nx). Our running-time-optimized benchmarks show that it takes 36 seconds of online-computation, 71 seconds of non-interactive (receiver-independent) pre-processing, and only 12.5MB of round trip communication to intersect five thousand 32-bit strings with 16 million 32-bit strings. Compared to prior works, this is roughly a 38--115x reduction in communication with minimal difference in computational overhead.", "pdfUrls": [ "https://acmccs.github.io/papers/p1243-chenA.pdf", "http://eprint.iacr.org/2017/299", "https://eprint.iacr.org/2017/299.pdf", "http://doi.acm.org/10.1145/3133956.3134061" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d4fb00b737479edeb381c88e85d6b8fc34db98d", "sources": [ "DBLP" ], "title": "Fast Private Set Intersection from Homomorphic Encryption", "venue": "CCS", "year": 2017 }, "5d5401bd29121730dc78aee0c07bbd64d2a9c65b": { "authors": [ { "ids": [ "3353863" ], "name": "Matthew K. Mukerjee" }, { "ids": [ "2238966" ], "name": "Ilker Nadi Bozkurt" }, { "ids": [ "33878552" ], "name": "Devdeep Ray" }, { "ids": [ "1711252" ], "name": "Bruce M. Maggs" }, { "ids": [ "1730191" ], "name": "Srinivasan Seshan" }, { "ids": [ "1688592" ], "name": "Hui Zhang" } ], "doi": "10.1145/3143361.3143366", "doiUrl": "https://doi.org/10.1145/3143361.3143366", "entities": [ "Change detection and notification", "Cloud computing", "Content delivery network", "Digital distribution", "Owned", "Program optimization", "Simulation", "VDX (library software)" ], "id": "5d5401bd29121730dc78aee0c07bbd64d2a9c65b", "inCitations": [ "3449457403b7aee7a6785e2facdce90161b4bd14" ], "journalName": "", "journalPages": "68-80", "journalVolume": "", "outCitations": [ "80ebcef2602f85b1f513432151abd43640a205c8", "a0a619d0c10ac4658986e60404e44abb83a60613", "ae483cf0be2b76fe61af2155151c1b296c2ab428", "4cead48e2eac91560105871b78268e3164eb382b", "5776373e3e6c3a619252e2c132b5a4414d6c2271", "12e859ef5654e46152daf90118c31f85a422ec84", "0b90433a2df3363d77edf97fd5e998da7c7660de", "be17978d5f2d00c0cbf51ef67732788040384605", "65fd142f37c315cdf892184f8fb21281b88f6269", "35f1dbf5079b2383bc580161fcddbef960e64e2c", "b8a661bb094437e767d75561eeb4a6df5fc05e28", "631118cd04caa1bca7a2010228447f25eb4378a9", "78d0ecb0b55badabed539c81d5cbb480055ba79c", "c3c262b8e56536d14826926b69af59eaefc29bc2", "5017593f65d08a4b6072f60af89ff961ec76c9df", "12768416bd0c73f7778e5df02561e7963dc654f8", "144d99900f16711dfd5ca94207d4fc5f5be2eb68", "49cabd99565affd2f35c244f8ab1ce1890ae7efc", "75e0a740fb375524a9d0fc40a79f2c2442e9aaf1", "0155faf33da0d2184bcf18717c6ccac7e3c2527e", "4f2c94a8d689863859ac849ebb83823770cf3d6a", "0860bc34aac8a304674aa4c205ff46e6dbc93295", "0a40663fdcf7c5fb7cfc459693116c41309e7eca", "98be1f7fc3c241185bd5ee8dedf5b35dd0637622", "bf36aca757b661addddda94b2a6e85b122c3d426", "6ba9123deb52e21f3ce94a0f598b2e5307f4a84c", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "707f03bda722d191eb9d830ad37e1ae8fe64e647", "2b42dc17726840c2827993a0fd0f659553c63dc8", "908f7931de8768786d9ef7d64f5a8156860709dd" ], "paperAbstract": "Various trends are reshaping Internet video delivery: exponential growth in video traffic, rising expectations of high video quality of experience (QoE), and the proliferation of varied content delivery network (CDN) deployments (e.g., cloud computing-based, content provider-owned datacenters, and ISP-owned CDNs). More fundamentally though, content providers are shifting delivery from a single CDN to multiple CDNs, through the use of a content broker. Brokers have been shown to invalidate many traditional delivery assumptions (e.g., shifting traffic invalidates short- and long-term traffic prediction) by not communicating their decisions with CDNs. In this work, we analyze these problems using data from a CDN and a broker. We examine the design space of potential solutions, finding that a marketplace design (inspired by advertising exchanges) potentially provides interesting tradeoffs. A marketplace allows all CDNs to profit on video delivery through fine-grained pricing and optimization, where CDNs learn risk-adverse bidding strategies to aid in traffic prediction. We implement a marketplace-based system (which we dub Video Delivery eXchange or VDX) in CDN and broker data-driven simulation, finding significant improvements in cost and data-path distance.", "pdfUrls": [ "https://users.cs.duke.edu/~ilker/papers/conference/conext-17.pdf", "https://users.cs.duke.edu/~bmm/assets/pubs/MukerjeeBRMSZ17.pdf", "http://doi.acm.org/10.1145/3143361.3143366" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d5401bd29121730dc78aee0c07bbd64d2a9c65b", "sources": [ "DBLP" ], "title": "Redesigning CDN-Broker Interactions for Improved Content Delivery", "venue": "CoNEXT", "year": 2017 }, "5d61a43e9a10efae063c8992d0c507ee77086c8b": { "authors": [ { "ids": [ "1962254" ], "name": "Haiyang Sun" }, { "ids": [ "2010862" ], "name": "Robert Birke" }, { "ids": [ "1727830" ], "name": "Walter Binder" }, { "ids": [ "3255902" ], "name": "Mathias Bj\u00f6rkqvist" }, { "ids": [ "2075868" ], "name": "Lydia Y. Chen" } ], "doi": "10.1109/ICAC.2017.37", "doiUrl": "https://doi.org/10.1109/ICAC.2017.37", "entities": [ "Big data", "Event stream processing", "Jumpstart Our Business Startups Act", "Microsoft Windows", "Online and offline", "Replay attack", "Requirement", "Social media", "Stream processing" ], "id": "5d61a43e9a10efae063c8992d0c507ee77086c8b", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "39-48", "journalVolume": "", "outCitations": [ "38adc6ce214ad89ad6a0c47b489608a0fbeedaaf", "17c8851c47328dad603993f59e25bf67f8e64542", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "558c332a7c739e2c3b52391efae20bc9a8a673ba", "26d72e5c13f7e7d187423f7c2fcfe33194461b6d", "1b535af0d110491eabeedf8323a51327846e55b2", "a21c972077f85d23f769c6ac4e4afa283d38de49", "a27f9c404d79055addcf0f5ec374b011fc263c23", "9b78b15038933a9fc8233c1a6f725673f72944a7", "45b50ed3d33633978964893b3a58ca369f35bf7e", "c179e4992f339c65918fae904f7845afc14d7e0a", "f0350ae6442d2737585fdd73e97c6a1d250ac937", "0a22bb17e60fc79520c005bfb105d7fd62a8b12a", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "dbe0c864819eaabebd4cdeee226f2987b152a10a", "3af5e48a741634d2572b839ca57b68929cd2d648", "88a44593454773d887fe9b51b20db66ef5d3956b", "89ce57616932e9bf0fcce63a3bae167048f6252a", "478a3809d2d83b0c3b357cc250b725e2aa58f10c", "422456b9b6dfb526bc759bbdfcc35f694189de5f" ], "paperAbstract": "With the rapid growth of social media and Internet-of-Things, real-time processing of big data has become a core operation in various business areas. It is of paramount importance that big-data analyses are executed timely with specified accuracy guarantees. However, workloads in the wild are highly bursty with skewed contents and often present the conundrum of meeting latency and accuracy requirements simultaneously. In this paper we propose AccStream, which selectively samples and processes data tuples and blocks on emerging batch streaming platforms with a special focus on analysis of aggregation, e.g., counts, and top-k. AccStream dynamically learns the latency model of analysis jobs via on-line probing technique and employs sample theory to determine the lower limit of data so as to fulfill given accuracy targets. A unique feature of AccStream ensuring strong latency-accuracy fulfillment even under conflicts is the hybrid windowing that trades off data freshness via a combination of tumbling and rolling windows. We evaluate the prototype of AccStream on Spark Streaming, analyzing Twitter data. Our extensive results confirm that AccStream is able to achieve the latency and accuracy target against a wide range of conditions, i.e., slow and fast dynamic load intensities and content skewnesses, even when facing conflicting latency and accuracy targets. All in all, the effectiveness of AccStream in delivering timely, accurate, and (partial) fresh streaming analytics lies in shedding the adequate amount of input data at the right time and place.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d61a43e9a10efae063c8992d0c507ee77086c8b", "sources": [ "DBLP" ], "title": "AccStream: Accuracy-Aware Overload Management for Stream Processing Systems", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "5d95638039eca9d4fc34653c4d8b2aea223317f8": { "authors": [ { "ids": [ "2565726" ], "name": "Zachary Kincaid" }, { "ids": [ "3097337" ], "name": "Jason Breck" }, { "ids": [ "17786569" ], "name": "Ashkan Forouhi Boroujeni" }, { "ids": [ "1703130" ], "name": "Thomas W. Reps" } ], "doi": "10.1145/3062341.3062373", "doiUrl": "https://doi.org/10.1145/3062341.3062373", "entities": [ "Abstract interpretation", "Algorithm", "Context-free language", "Credit bureau", "Experiment", "Graphic art software", "ICRA", "Impedance matching", "Interprocedural optimization", "Model checking", "Nominal impedance", "Path expression", "Recurrence quantification analysis", "Recursion", "Static program analysis" ], "id": "5d95638039eca9d4fc34653c4d8b2aea223317f8", "inCitations": [ "29e060624ca85576f7b6b2703dd186f85d988828", "81d80c7031426343519b3912956837a02262f21c", "295f26fff0468358b4781646b0da2a3c3466d8ca" ], "journalName": "", "journalPages": "248-262", "journalVolume": "", "outCitations": [ "25598f8903873e4e876058a799c31fc932ed04f9", "5da7fb12d38f4112965455f8260ed1fd4800d1ea", "08d284f5b03bb32d9870edb18c1dc91128a79887", "225cf9dbfc991bc45a37adf6aeaaf26907fd03d8", "1bb9bcf15a128cbdc1d7fb549066f77f9492e634", "2c78cf851a81f9546e6993de64afff1964dcc054", "c84086d352be8b87b2266aeb82540602b20ec0c2", "7fd3f5279e008fdaaca4e1bf9f5ceb9c162dda0f", "07a4ae037cef54550810d7870d0e413dfdb6d10a", "87750c1aca624e79d41e0b18754926766f3cdd07", "5cb9fa09833772c31f9b071952dceac67eed822e", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "1d4309e5ecc67f5430183253047ef539bbbd59b2", "954313dcf1b794c441e9dc79cdde5e2ab69e16c5", "164b11b1f9f8432db88424b1e4f9ba6e09e5c894", "11e8ad8d5302e8149cc787de5778b52e7e976ca8", "0562995329886f59c7a0c34049e3be558387acac", "da5b788ff95ca5e5f07ac3e611e01dcd88a027db", "1b10332d1b64530c2967e885dce09e7517834352", "d233d7d5a0ebbbf4d02dbb3d21a64b86863f06ca", "3976f8b6e1a7a4f0125ae1a10a5b68d9b5ee874d", "5fbf739032dd548c1ff189e7333f05e215906a1b", "38e253b5f4c1c353cba216e575db52e92276f034", "8d46b52c205ff26f4732c8d351f7710f71bf9289", "39d846fc1dc4a43b24dbe7246f62317f08183ba0", "02584960fcf229817eb69ff5dc942d89ff9381be", "0f91568cd8219d8365dbfb1373efa5d9cafa37e1", "32e9ea021a0d97474c6e33e2f1afb7a3902651d3", "65d522eb58c16b78c9411216a3178f8671158842", "0ac8787f94890b2069dc9e15bb30f13c4ba515ad", "3fc7cbdb05611be0e0abf0fb090edc655e30d71e" ], "paperAbstract": "Compositional recurrence analysis (CRA) is a static-analysis method based on a combination of symbolic analysis and abstract interpretation. This paper addresses the problem of creating a context-sensitive interprocedural version of CRA that handles recursive procedures. The problem is non-trivial because there is an â\u0080\u009cimpedance mismatchâ\u0080\u009d between CRA, which relies on analysis techniques based on regular languages (i.e., Tarjanâ\u0080\u0099s path-expression method), and the context-free-language underpinnings of context-sensitive analysis. \nWe show how to address this impedance mismatch by augmenting the CRA abstract domain with additional operations. We call the resulting algorithm Interprocedural CRA (ICRA). Our experiments with ICRA show that it has broad overall strength compared with several state-of-the-art software model checkers.", "pdfUrls": [ "http://research.cs.wisc.edu/wpis/papers/pldi17.pdf", "http://doi.acm.org/10.1145/3062341.3062373", "http://research.cs.wisc.edu/wpis/papers/tr1840r1.pdf", "http://research.cs.wisc.edu/wpis/papers/tr1840.pdf", "http://www.cs.princeton.edu/~zkincaid/pub/pldi16.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d95638039eca9d4fc34653c4d8b2aea223317f8", "sources": [ "DBLP" ], "title": "Compositional recurrence analysis revisited", "venue": "PLDI", "year": 2017 }, "5d9aa2b2f2d270fbe3588eefb313cf63f16406f2": { "authors": [ { "ids": [ "2317634" ], "name": "Yuhwan Ro" }, { "ids": [ "2077514" ], "name": "Hyunyoon Cho" }, { "ids": [ "7266659" ], "name": "Eojin Lee" }, { "ids": [ "2550204" ], "name": "Daejin Jung" }, { "ids": [ "3173425" ], "name": "Young Hoon Son" }, { "ids": [ "2575874" ], "name": "Jung Ho Ahn" }, { "ids": [ "3091593" ], "name": "Jae W. Lee" } ], "doi": "10.1109/HPCA.2017.31", "doiUrl": "https://doi.org/10.1109/HPCA.2017.31", "entities": [ "Access time", "Baseline (configuration management)", "Clock signal", "Critical path method", "Dynamic random-access memory", "Electronic data processing", "Inter-process communication", "Locality of reference", "Memory-level parallelism", "Microarchitecture", "Parallel computing", "Random access", "Solution stack" ], "id": "5d9aa2b2f2d270fbe3588eefb313cf63f16406f2", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "517-528", "journalVolume": "", "outCitations": [ "468035263afa59095614f26a62e0217da4a1aeed", "9341125876271d46cc25f86dac93f25acb343e8d", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "bb117349638a1d63be1b105bba0e152bd6c031f8", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "46eea309204f088ef9dda197d8273465a641f60c", "5b93477e6d7d0c6701052791905300cfd887b5c2", "2b0ca5a307e4c72eff242d8c903c4f0138131d7e", "8b0b2f2605e533c40cac32e1a3a989f7aa759841", "a213b244778e310bc4b27cbd021f964258b4c7a7", "f7b7bcd2083ab27cd6779c36358b88c7fa116a0d", "29f766723ca752138855500084ced04503bfc9c8", "41b24c890ae0ef99ff031c9c8549375af6025fb6", "7815c4243d581d0f96d0dac2c6e90e01d1ce94a3", "675e82b6d0d2257c6aab0965238b0c97928b9f78", "464af3debb8434807ab04eb749d63594e78ee786", "f306e367f714f0ff6b20f8437c311e8c9aae12f8", "40f85cbe67ce1ce89009985e9caed648dd08c12e", "4e27e6f8fa154e63003a840678ede36c3151c9f2", "570445ec044aa24f8894c75130c57a5ed174e200", "dc6e7f9d811c5d04cb781685550978a3e0565e08", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "13089a313be0836f3fa8911236250e36b970ba2a", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "174a3d4ad2caba68b55fd3ee863b9471e3786f21", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "115713b2175047e746c8e7cd22ee1b8255866d0f", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "18633256bb17ba0744518479c0752ca87f0d03c6", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "a7ba0e006948107582471d9625492246be256fc2", "3370784dacf9df1e54384190dad40b817520ba3a", "1dec8f5106d11047aaaf126121110cbf890f17c3", "692582a30ab174e4b6aa1b59e13e49720643858a", "234049a484dee54d3f9555fe7f50805e783ec432", "6a3b49401ac7120530165d7c18f52d51887e48f2", "434fa04db769935ae61bbcf4d9faa602b9a8c730" ], "paperAbstract": "Memory access latency has a significant impact on application performance. Unfortunately, the random access latency of DRAM has been scaling relatively slowly, and often directly affects the critical path of execution, especially for applications with insufficient locality or memory-level parallelism. The existing low-latency DRAM organizations either incur significant area overhead or burden the software stack with non-uniform access latency. This paper proposes two microarchitectural techniques to provide uniformly low access time over the entire DRAM chip. The first technique is SALAD, a new DRAM device architecture that provides symmetric access latency with asymmetric DRAM bank organizations. Because local regions have lower data transfer time due to their proximity to the I/O pads, SALAD applies high aspect-ratio (i.e., low-latency) mats only to remote regions to offset the difference in data transfer time, resulting in symmetrically low latency across regions. The second technique is SOUP (skewed organization of µ banks with pipelined accesses), which leverages asymmetry in column access latency within a region due to non-uniform distance to the column decoders. By starting I/O transfers as soon as data from near cells arrive, instead of waiting for the entire column data, SOUP further saves two memory clock cycles for column accesses for all regions. The resulting design, called SOUP-N-SALAD, improves IPC and EDP by 9.6% (11.2%) and 18.2% (21.8%) over the baseline DDR4 device, respectively, for memory-intensive SPEC CPU2006 workloads without any software modifications, while incurring only 3% (6%) area overhead.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5d9aa2b2f2d270fbe3588eefb313cf63f16406f2", "sources": [ "DBLP" ], "title": "SOUP-N-SALAD: Allocation-Oblivious Access Latency Reduction with Asymmetric DRAM Microarchitectures", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "5db968c6a65b9b660e88009ebfbb8dd1e65bb0e2": { "authors": [ { "ids": [ "2380767" ], "name": "Shangqing Zhao" }, { "ids": [ "9095564" ], "name": "Zhengping Luo" }, { "ids": [ "1777472" ], "name": "Zhuo Lu" }, { "ids": [ "3113451" ], "name": "Xiang Lu" }, { "ids": [ "1778865" ], "name": "Yao Liu" } ], "doi": "10.1145/3117811.3117846", "doiUrl": "https://doi.org/10.1145/3117811.3117846", "entities": [ "Channel state information", "Complexity", "Digital signal processing", "Frequency offset", "Network packet", "Network performance", "Signal processing", "State (computer science)", "Stateful firewall", "Universal Software Radio Peripheral" ], "id": "5db968c6a65b9b660e88009ebfbb8dd1e65bb0e2", "inCitations": [], "journalName": "", "journalPages": "15-27", "journalVolume": "", "outCitations": [ "c7d305ff0e229de03a9587117b214c1dc6b81fab", "0a841233dde7dca1decce4db257234fe1dbf78f0", "94cea09d200d3c81d562fc353f8697a69c388e72", "5478200dc78e0747f481e3741fd7a90c0b1f0c29", "0d91309360595dec936c51ba5214650e8ee16473", "3db223470c3343fb3099ff3d1a55f1f33791afe1", "0df7428c493e47e370beab835f3ee88abfaa80d5", "159a1838d5488a277c8edadfe8d51f0b50b64cd6", "73e5cc87f4d7487c3ba58ec2e55ea52ea1025b0a", "496d0ecfe9978e0d8d93601d9eb4bd43e2968440", "f954c314460ff1b4c38b4909ddfda9df9d194c5e", "e3f73f5ec98d136b434c0b41e377532b066e43e2", "21d0f3b4c847e04be0f3735f5f55bffe32e942e3", "9be819c029921ab4f55fb7a6354081cc959d0a2d", "bd8b4971c22f2b367de17e81bf9d414389304edf", "1b1c8978dc0baa5660cf195c13883a530cfde45a", "ebe828f4b9a29233f58d3945ce875d4d1d52c033", "d3b7f3462da7fe101f93d3c2a5462e13fe37b9ad", "2c14d9e79b8884c3c91100e3b3393074fa30d4ed", "a0cace18e6baa4edda1a548742a7e37841bd086b", "06c44df74a5d079a652248906f58eda06c4a3944", "24f982d203efcebe0e301c1a22d42b847a58d408", "a7e8413633fd975873c12376de85d63b192a49d8", "3f34fdbca980b808748b23c7414a9f428914d05f", "c780171161e06a9055836cb2fdc4f2a8a717ccf1", "5c537b3734b28e4849f17e78454673a142239243", "02c75551123cae6dfbb0c69de96a199c974bcf89", "9b32cf043bfe41184b5205c42240ab8ccc6ff547", "211fb7b179f46d7ffd9d6bc219b7f2d76e1204dc", "f90e67126bd713ebf91d0a38ea1dc585f3a107ba", "076776638cd64861b46b1d237d669ab5ea650d62", "84c171fa0ec8bc29d63796b2858f95f3c3e14e75", "3538f0befbafbe69e54759964043645ade07b11e", "56af4aeb689c23c71c067e88c63e92bb200f418f", "80905bbd07c54d90fe801bce69901882379c202d", "859662e3ea8f0a183159bfe93a7fdd3cc22fe467", "0de7786798e1a5681b51bd8084a88dfa48fa1fc7", "330b1b641b0c31bbf03d51ec26759c96372fa211", "a397506bb65ad211f6e8e646dda30420fb763c41" ], "paperAbstract": "Traditional signal processing design (e.g., frequency offset and channel estimation) at a receiver treats each packet arrival as an independent process to facilitate decoding and interpreting packet data. In this paper, we enhance the performance of this process in the wireless network domain. We propose STAteful inter-Packet signaL procEssing (STAPLE), a framework of stateful signal processing residing between the physical and link layers. STAPLE transforms the signal processing procedure into a lightweight stateful process that caches in a small-sized memory table physical and link layer header fields as packet state information. The similarity of such information among packets serves as prior knowledge to further enhance the reliability of signal processing and thus improve the wireless network performance. We implement STAPLE on USRP X300-series devices with adapted configurations for 802.11a/b/g/n/ac and 802.15.4. The STAPLE prototype is of low processing complexity and does not change any wireless standard specification. Comprehensive experimental results show that the benefit from STAPLE is universal in various wireless networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117846" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5db968c6a65b9b660e88009ebfbb8dd1e65bb0e2", "sources": [ "DBLP" ], "title": "Stateful Inter-Packet Signal Processing for Wireless Networking", "venue": "MobiCom", "year": 2017 }, "5dc51725500b887b38c6a0fb6a849fd74f1696f2": { "authors": [ { "ids": [ "37852689" ], "name": "Snehasish Kumar" }, { "ids": [ "2951954" ], "name": "William N. Sumner" }, { "ids": [ "33624938" ], "name": "Vijayalakshmi Srinivasan" }, { "ids": [ "10680647" ], "name": "Steve Margerm" }, { "ids": [ "1807593" ], "name": "Arrvindh Shriraman" } ], "doi": "10.1109/HPCA.2017.59", "doiUrl": "https://doi.org/10.1109/HPCA.2017.59", "entities": [ "Branch predictor", "Code coverage", "Compiler", "Control flow", "Coprocessor", "Dataflow", "Graphics processing unit", "Hardware acceleration", "LLVM", "Microarchitecture", "Parsec (parser)", "Program analysis", "Speculative execution" ], "id": "5dc51725500b887b38c6a0fb6a849fd74f1696f2", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "565-576", "journalVolume": "", "outCitations": [ "0fb53e9325ebf8dffc2ab128ebf1d75a7e26c415", "4d7927261704a8783e330aa7cae9c1fc84c04917", "e55ae3973428baa12b811a66e3195d33c96238df", "3f210a463e6d8054929ee98267b7800e49275e06", "53a0f3e52e18364f7163d0a64fc8659058bc5485", "07f3b8cfd59624acf80e16794bd3f2bc69acd8e7", "36d31540ab5977ac72d17cdb1a483dd4f1569048", "0659411ebccf1abca4f9a3a5c8744b8bb153933e", "98ec566b3e7177701cae04921578b2322eafb92e", "6fbb7db25a5a3a2788e2b16a5e54abc1ac36aa76", "ab6a42d9c0001f5ff6245b6e429c57f78317b361", "0dcce77ec3dbbe511e9c53c284cb7f12cbc245e0", "24cba2623f7d9387f74eda81b76cd281486aa540", "6c5462d31a0d0f4e6cb2ff7ae795250957d9fcab", "0502eaea10b67788d74e4f4a635f1723bb29e7db", "63cc9119a1d29ef68c8ae5d1db44f53fe15625e5", "2616c0df5d07bd88356381976243f21b4ddd0344", "0a2af2773ca4fcbd22cd7580d29ac7739bcf028c", "13a6c714cddeded37a69205f39935da4e7082f43", "102fc630c42ee3c73243bd08936aa7e72ebb8daf", "3117c82b6da03bd6ec730a805255c253ec153da7", "12a233efbdd874afdeb8a1e6fe71c4ccff758175", "5646a51461b64be3e9511dff06ea5abef0a399d4", "f632d67c13a113fd468d910078b4be180f92127f", "320a6faa396f27f6f83b22ded48944ffd574fa1e", "04b6aadef5b51bfbefd10c1513313dc4292693eb", "4ab0e888da81ed3f1b891f3c22fdc8e07b12b286", "1087e2e1244665c9574ab5914ae13c6c88bcc096", "352a8957005dc5519b15ed1870751ec494d66395", "269c24a4aad9be622b609a0860f5df80688c2f93", "2a70eafeb6e80070ecfa6cbd05864d2e233c5dce", "0af2605cf1d628d36106770b8e6f97a40909ce43", "358bf7354dae4de25cc9bae898fb5916d98b66d0", "2e5ef3e461eaccf533aaae000ef847ae581d4363", "03b2e534532e9558e560df0bed74976b8f48c1a5", "16019260ae941f58c2887459cb1bebb2d3dd2c57", "18cd7b2c5323b8017e3fd58968826a6d1e419923", "58f76b7456081a7d9bd405348c0ff74134a13121", "253853ba8c6a69ec2898befe924ca17ae01c9127", "bcb288389d4318494887fe20ee68b6b18f39a3a5", "345b6a3e596fc541e766494433146bd251e0a628" ], "paperAbstract": "Technology constraints have increasingly led to the adoption of specialized coprocessors, i.e. hardware accelerators. The first challenge that computer architects encounter is identifying "what to specialize in the program". We demonstrate that this requires precise enumeration of program paths based on dynamic program behavior. We hypothesize that path-based [4] accelerator offloading leads to good coverage of dynamic instructions and improve energy efficiency. Unfortunately, hot paths across programs demonstrate diverse control flow behavior. Accelerators (typically based on dataflow execution), often lack an energy-efficient, complexity effective, and high performance (eg. branch prediction) support for control flow. We have developed NEEDLE, an LLVM based compiler framework that leverages dynamic profile information to identify, merge, and offload acceleratable paths from whole applications. NEEDLE derives insight into what code coverage (and consequently energy reduction) an accelerator can achieve. We also develop a novel program abstraction for offload calledBraid, that merges common code regions across different paths to improve coverage of the accelerator while trading off the increase in dataflow size. This enables coarse grained offloading, reducing interaction with the host CPU core. To prepare the Braids and paths for acceleration, NEEDLE generates software frames. Software frames enable energy efficient speculative execution on accelerators. They are accelerator microarchitecture independent support speculative execution including memory operations. NEEDLE is automated and has been used to analyze 225K paths across 29 workloads. It filtered and ranked 154K paths for acceleration across unmodified SPEC, PARSEC and PERFECT workload suites. We target NEEDLE's offload regions toward a CGRA and demonstrate 34% performance and 20% energy improvement.", "pdfUrls": [ "https://doi.org/10.1109/HPCA.2017.59", "http://www.cs.sfu.ca/~wsumner/research/papers/hpca2017kumar.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5dc51725500b887b38c6a0fb6a849fd74f1696f2", "sources": [ "DBLP" ], "title": "Needle: Leveraging Program Analysis to Analyze and Extract Accelerators from Whole Programs", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "5ddc7f4d279ea49e7bfc4e3a344c3e5266a9c57a": { "authors": [ { "ids": [ "27054917" ], "name": "Sepideh Roghanchi" }, { "ids": [ "34186349" ], "name": "Jakob Eriksson" }, { "ids": [ "3608529" ], "name": "Nilanjana Basu" } ], "doi": "10.1145/3132747.3132771", "doiUrl": "https://doi.org/10.1145/3132747.3132771", "entities": [ "Benchmark (computing)", "Cache (computing)", "Data structure", "Delegation (computer security)", "Experiment", "Lock (computer science)", "Non-blocking algorithm", "Scalability", "Shared memory", "Software transactional memory", "Thread (computing)", "Throughput", "Transactional memory" ], "id": "5ddc7f4d279ea49e7bfc4e3a344c3e5266a9c57a", "inCitations": [ "132d1c096d74c53960a7511dafb886c73158a7ec" ], "journalName": "", "journalPages": "342-358", "journalVolume": "", "outCitations": [ "20d058f261c6601b31c212550e2ce9ce8e284a34", "3a2df802b68c1d1464d442cb1ec973ef93ce69a0", "35315d63e340b6f8ca3432a5d693c8e9b0538b2a", "5cec4c7d82137333ea7f0166a26d04bba589c7da", "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "34d33c19d0e893415b570ebdeea993db5b7af509", "46ec0c7cbef89e31f878b8e9716a6c1c1cdedd29", "3872ccb40b44e0a7af38a1e6fdeb35754dfce9d2", "57eaf0036c74895a5e965915c6544041623719e0", "045a975c1753724b3a0780673ee92b37b9827be6", "a9091ef790788c5d252cad94dd6862adf457e073", "59da2533346433c937631ace15200cde2f575a55", "3c1c6dd251ecca2b61190cbb564ec71008173ef6", "86e5050f7da865ab224c3a9ae8946f29d6e8d40a", "064c377c070932a12377bf40101bdafdcb17fcf6", "09ed565e84057123c15ab12b885c235d1f241aed", "9bd0f0527d8d2f751c622ec14388017224f4810b", "183742a8abf25ace49c7ede6536abe4031f99d61", "3ac23f666d2d2bb0f6d288d289b38e4b7a57bdf5", "eca7c0f1f5f33434f62970cb9a99cdcea1a0ab2e", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "e4c962d7fc0df9a74fe3530c5208ab44f55be8ee", "55a9dee5db669d741a94e1db3ff13e4c45cb938e", "aaecddf1e0facd414ed67910fe7204fa3bc08dfe", "42d44f2d202b91ddf8a320357bbdac81e6fd35ca", "0fb98ded295c9c5b896de175f0514cc3e9a03a00", "06e3991f0aa199fe6ff9334659f93e81b04c78fd", "1cdedeb9461bdebedc47c7a358769f85dd7683ea", "1ae7993c0c2d795b243354de48dab80bf2000356", "2f925c9b58c384d80e1febfc646900d584dcf515", "ff5e8b9972294f600b4de1a0fbb85df3a5b1bc31", "85546c61e315ea4b9add88869fe6750b28a17ab9", "0a0bf9e017e05d58b85e793e58148d2946259a74", "363b85f61630ebdc1194a59816ad950bf305c40a", "42fd424539fc40536cc44362cd324e5614ad86c6", "90d7ec80c9e49c90913f6553fd2df4f7a54395e2", "00b5fe0e942f292414329a5621a53da4016cdb3e", "2a249d659149642df8e5e2cfa277d994a578b25f", "813b7cdcf6d77ea34b4cf68378e2508db28fdf50", "135772775121ba60b47b9f2f012e682fe4128761", "4e3304e77dd2fecea4086e132981d1470434cf65", "1de5ac65303d92b91f6b822ef992a9717c5c2d2e", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "bf104ebfbd44924b6b7602e48b0a74e987baaca8", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "71d584f310f11216d9e5771af58930c5a8f1dd47", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "4f499f49640fb39d16b9c05f42503b5d6b0cb531", "784dd6dbdf59896a42f134ab0bede3329030380c", "0b82470bb9cd233bff6228d3d1b484024b9f9c3b", "8ce872eb6d5d20b13595b165dde1711033f94f45", "3e77a77247734dc918a5723573e1158eee1955f9", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "09ad3377c3e24320420b465e6ec70c23da47dd4d", "f20bb59b95d2eb95013d386cde3f8969ffd7f0b7", "afc4931dd371130c3d4c6d6dbfda881140847af1", "1be239a23edc6531c456a8d21b7a7d95a6c0168a", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "6756d3e0669430fa6e006754aecb46084818d6b6", "f254fe47a8e00c3c0257304850b8d5deef0a72bc", "e75a155fd802010d677e15345a51fdb0f495c8c3", "30df50d77ef9478a2848626dfe3bf65f3c991991", "415e5008232116e6869caf29c349a2dfe390264e", "cba77292e7f1f271fff1bd28238728f4f18dd13e", "51b67af3659501a8f6e6ca5c13e45b3b2b814cab", "6f090d59bde17b7604985acf38e26785e794bcc0", "5c0eee6dbc2972422f3bb9b109bfce1b137fc751", "bb2e04b70c01a7e1c9b8e9a5fd733c405f5c266c", "b38e08b51564bbab690815119201f487916055e8", "9e281f477b1e6a5f161dbe2fa8926ebe21864c67", "4634313264c26ca00c3b940865dc7c35d393bee7", "97c649dc68ad8818c7e2b7f75b9c164aa840f6a5", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "4b289b606bf5135ef73d0224bdb5f12168dd4ef6", "1a16975d1630756772b7d16e220236fe9a2830d3" ], "paperAbstract": "We revisit the question of delegation vs. synchronized access to shared memory, and show through analysis and demonstration that delegation can be much faster than locking under a range of common circumstances. Starting from first principles, we propose fast, fly-weight delegation (ffwd). The highly optimized design of ffwd allows it to significantly outperform prior work on delegation, while retaining the scalability advantage.\n In experiments with 6 benchmark applications, and 6 shared data structures, running on four different multi-socket systems with up to 128 hardware threads, we compare ffwd to a selection of lock, combining, lock-free, software transactional memory and delegation designs. Overall, we find that ffwd often offers a simple and highly competitive alternative to existing work. By definition, the performance of a fully delegated data structure is limited by the single-thread throughput of said data structure. However, due to cache effects, many data structures offer their best performance when confined to a single thread. With an efficient delegation mechanism, we approach this single-threaded performance in a multi-threaded setting. In application-level benchmarks, we see improvements up to 100% over the next best solution tested (RCL), and multiple micro-benchmarks show improvements in the 5-10x range.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132771" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5ddc7f4d279ea49e7bfc4e3a344c3e5266a9c57a", "sources": [ "DBLP" ], "title": "Ffwd: Delegation Is (much) Faster than You Think", "venue": "SOSP", "year": 2017 }, "5df7820d2a79d4658ae1cc305c1cc775966d3754": { "authors": [ { "ids": [ "36937479" ], "name": "Jian Huang" }, { "ids": [ "10032025" ], "name": "Michael Allen-Bond" }, { "ids": [ "2869098" ], "name": "Xuechen Zhang" } ], "doi": "10.1145/3037697.3037743", "doiUrl": "https://doi.org/10.1145/3037697.3037743", "entities": [ "Cognitive dimensions of notations", "Data structure", "Deep learning", "Device driver", "Fast path", "Linux", "Memory management", "Mobile operating system", "Operating system", "PALLAS", "Program optimization", "Software bug", "Software developer", "Software-defined networking", "Static program analysis" ], "id": "5df7820d2a79d4658ae1cc305c1cc775966d3754", "inCitations": [], "journalName": "", "journalPages": "709-722", "journalVolume": "", "outCitations": [ "3533159037bc2c11bde6b314e040ee113ae52bdd", "232ac001bcd047ae90735980c2f913bd0aef9bd9", "37ca8be76f22c291bdd47db6b6051f8396755242", "081ac2d90623b74295bcb9fddbf838f5f5a523b7", "91ec7ef1b6ffeba0a2b19f00501f2f7e52a76077", "69d14d9e18e1f55d83bdeffbb7d821a76be550a4", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "95d886a0d097f0a1a81db8f431e744996ecc3048", "07d50264195a7bebb394cac60866cccfc4689e13", "1d0f2662cca5c859419b78fea468f4bc2f39e87d", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "65b6079988ec29ef3c6d62daf88b0f9e2ceee14c", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "3a33dad8e9d12835fca95deec73e841096c8bec0", "17886b4911ffd50d7e02a574caad34a286458b3a", "249db04e89034e021940f241b371fc4aa5799bdf", "535e43b4ebbee6f69d472d1a8d99dff788e8fd2e", "8383ead4e2394e9ec804563a639844a4b8552a6e", "1528e3eb9d1dd0b95cf4622eab00e58c4ac97a46", "128c3e04314e6fca8deed005d74a3d1ba36ad293", "0e422bd90c8be636358d4eb75f05276b361d19d4", "be8b6b5f34b94d126081c0596cf54f8b9cf8a8f4", "63cd9879aa176b414d85a29c0d7c969d764118b3", "154c6789afe8c7cdba41d78d2fac6ba5f5a3bd9d", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "1339613f90befa4fd36e0373c5c3ba009d6d8d5b" ], "paperAbstract": "Software optimization is constantly a serious concern for developing high-performance systems. To accelerate the workflow execution of a specific functionality, software developers usually define and implement a fast path to speed up the critical and commonly executed functions in the workflow. However, producing a bug-free fast path is nontrivial. Our study on the Linux kernel discloses that a committed fast path can have up to 19 follow-up patches for bug fixing, and most of them are deep semantic bugs, which are difficult to be pinpointed by existing bug-finding tools.\n In this paper, we present such a new category of software bugs based on our fast-path bug study across various system software including virtual memory manager, file systems, network, and device drivers. We investigate their root causes and identify five error-prone aspects in a fast path: path state, trigger condition, path output, fault handling, and assistant data structure. We find that many of the deep bugs can be prevented by applying static analysis incorporating simple semantic information. We extract a set of rules based on our findings and build a toolkit PALLAS to check fast-path bugs. The evaluation results show that PALLAS can effectively reveal fast-path bugs in a variety of systems including Linux kernel, mobile operating system, software-defined networking system, and web browser.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037743", "http://www.cc.gatech.edu/grads/j/jhuang95/papers/asplos17-pallas.pdf", "http://www.cc.gatech.edu/~jhuang95/papers/fastpath-asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5df7820d2a79d4658ae1cc305c1cc775966d3754", "sources": [ "DBLP" ], "title": "Pallas: Semantic-Aware Checking for Finding Deep Bugs in Fast Path", "venue": "ASPLOS", "year": 2017 }, "5df8ebd4a12623429bfcb8b2e3f0ebeb4824b748": { "authors": [ { "ids": [ "37613610" ], "name": "Long Cheng" }, { "ids": [ "1687918" ], "name": "Boudewijn F. van Dongen" }, { "ids": [ "1744010" ], "name": "Wil M. P. van der Aalst" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Distributed computing", "Event correlation", "Event partitioning", "MapReduce", "Radio frequency" ], "id": "5df8ebd4a12623429bfcb8b2e3f0ebeb4824b748", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "1-10", "journalVolume": "", "outCitations": [ "9246e25399a1da5c05b6883663d00c735be3c55e", "b224ab17297cf57b34f243f9b55552a17abbc0d3", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4fe6b1352e36192747f9d406939221bbce256933", "5386ad8a18b76c85840385a4d208e239208079cd", "5ac6ae1b1f37d87456843018563b981b32189e48", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "ebd00bbf3552fc45b6cf8fa75d8f9e1b007eabdb", "91d989a32874ec04e6a5ca19af87124622462323", "1f231eb8944bf0792a68b1496eec923743dc1cac", "24d88a1e86432746e5e4ca9d4bbaf324adb5a76c", "267c1cfbe3b34132ca2fe13849869c738edecc1b", "80f6b325b2ec08073b1a7af74a1a292aea66dde9", "bf251507c21aec383f91702dca339eec8e721e1d", "176b04302a0adece049fe707de6f3fb72f12de95", "49c6928c0fbf6fdfa5d8e2556bea3611daabb0bb", "efa89c68d2df269f8f963134baf98a057cde2132", "14e0d2bdfb3fca202b3fc0e19a12d3082f81b931", "754acef5f3b0bfa97865d17556ef3169ca52124d", "040d45e995ab920588607ebc6977ea19dc781923", "065066a94860279587ecc7c7caaa65303008940f", "322beb11ceeacf72dfc9df8df8cb045efb46d67f", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "Event correlation is a cornerstone for process discovery over event logs crossing multiple data sources. The computed correlation rules and process instances will greatly help us to unleash the power of process mining. However, exploring all possible event correlations over a log could be time consuming, especially when the log is large. State-of-the-art methods based on MapReduce designed to handle this challenge have offered significant performance improvements over standalone implementations. However, all existing techniques are still based on a conventional generating-and-pruning scheme. Therefore, event partitioning across multiple machines is often inefficient. In this paper, following the principle of filtering-and-verification, we propose a new algorithm, called RF-GraP, which provides a more efficient correlation over distributed systems. We present the detailed implementation of our approach and conduct a quantitative evaluation using the Spark platform. Experimental results demonstrate that the proposed method is indeed efficient. Compared to the state-of-the-art, we are able to achieve significant performance speedups with obviously less network communication.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101114", "http://wwwis.win.tue.nl/~wvdaalst/publications/p942.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5df8ebd4a12623429bfcb8b2e3f0ebeb4824b748", "sources": [ "DBLP" ], "title": "Efficient Event Correlation over Distributed Systems", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "5dfa599bb884dfe41a4a0dbe62235c4476eb51df": { "authors": [ { "ids": [ "4012116" ], "name": "Anil Shanbhag" }, { "ids": [ "2153832" ], "name": "Alekh Jindal" }, { "ids": [ "2033016" ], "name": "Samuel Madden" }, { "ids": [ "1712430" ], "name": "Jorge-Arnulfo Quian\u00e9-Ruiz" }, { "ids": [ "1787375" ], "name": "Aaron J. Elmore" } ], "doi": "10.1145/3127479.3131613", "doiUrl": "https://doi.org/10.1145/3127479.3131613", "entities": [ "Amoeba", "Apache Spark", "B-tree", "Clustered file system", "Computer data storage", "Curiously recurring template pattern", "Disk partitioning", "Exploratory testing", "Hoc (programming language)", "IBM Tivoli Storage Productivity Center" ], "id": "5dfa599bb884dfe41a4a0dbe62235c4476eb51df", "inCitations": [], "journalName": "", "journalPages": "229-241", "journalVolume": "", "outCitations": [ "1c27eafecd3d6f0008d74ffbe1e7c59a25869407", "57579a5708af144e71de6d41711dab7adac325f8", "63435a2b83504fbfbfe0912bc7935fb34df4c38f", "bfc464efbe615805d386ef5c882e4d0f97071ec2", "29ae100d58ee8c0e6cad3a75402a9f5adf20a335", "bb0dc7f89a8e64aa537e2e2d26e8c44e30bead86", "78f9ce506df537b8c36b49857123d90bf819a860", "0456a5c3b2001465d05e84ce6786ef200184de65", "2f251686d4c7cff6e4bd0164cf96b9fbddb9927b", "98bd61c4b7eb5cc36aa8351286d87f854fdbb663", "1e557937f418accc13f9c5edb33a3d48259d80e5", "bde2b2c50866c95dfd420c68217e39a985d45810", "0138f5e5c6598cec0d64bf42f68a2706ea21bea8", "347920406c9a9a3846adf485e2b864d4523a0652", "7bf83dad6a6d3566112d89af3e68eb48a049d397", "0d356d3b790477a5428ec5fb8b5d3e898f549866", "207def18c67fa8024741b7ae3cdc655b57f2053f", "d1c21c34936f587779c216ed79ca33883845caa1", "2074898fb3afab00a44439b33defd5d5f9b7a7c3", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "b2fc430d7606ebc9199b08232dc9c024a303dc55", "1d5cbc071f918143dbedf67a513850eadf30cbae", "0acc31039de608f2ac51f59b6848a48d50c919a5", "53115fffaa36c99a45fb7741fa74d66aa4fb8517", "01815b1f48f8cdd4e78260deaddf4bfe7af26f60", "8de7bef0ebfa65889fbb4751d09017d63a9cd3d9", "133eacaf0ad25b8364cb4510007d9363298e8adf" ], "paperAbstract": "Data partitioning is crucial to improving query performance several workload-based partitioning techniques have been proposed in database literature. However, many modern analytic applications involve ad-hoc or exploratory analysis where users do not have a representative query workload a priori. Static workload-based data partitioning techniques are therefore not suitable for such settings. In this paper, we propose Amoeba, a distributed storage system that uses adaptive multi-attribute data partitioning to efficiently support ad-hoc as well as recurring queries. Amoeba requires zero set-up and tuning effort, allowing analysts to get the benefits of partitioning without requiring an upfront query workload. The key idea is to build and maintain a partitioning tree on top of the dataset. The partitioning tree allows us to answer queries with predicates by reading a subset of the data. The initial partitioning tree is created without requiring an upfront query workload and Amoeba adapts it over time by incrementally modifying subtrees based on user queries using repartitioning. A prototype of Amoeba running on top of Apache Spark improves query performance by up to 7x over full scans and up to 2x over range-based partitioning techniques on TPC-H as well as a real-world workload.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131613", "http://people.cs.uchicago.edu/~aelmore/class/topics17/hyperPartitioning.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5dfa599bb884dfe41a4a0dbe62235c4476eb51df", "sources": [ "DBLP" ], "title": "A robust partitioning scheme for ad-hoc query workloads", "venue": "SoCC", "year": 2017 }, "5e1991fc2cb48c4a3eb235e3c6106cf0c560ba7d": { "authors": [ { "ids": [ "1808412" ], "name": "Chuan-Ju Wang" }, { "ids": [ "22262834" ], "name": "Ting-Hsiang Wang" }, { "ids": [ "22171670" ], "name": "Hsiu-Wei Yang" }, { "ids": [ "22176147" ], "name": "Bo-Sin Chang" }, { "ids": [ "1793168" ], "name": "Ming-Feng Tsai" } ], "doi": "10.1145/3077136.3080807", "doiUrl": "https://doi.org/10.1145/3077136.3080807", "entities": [ "Concept map", "Experiment", "Flow network" ], "id": "5e1991fc2cb48c4a3eb235e3c6106cf0c560ba7d", "inCitations": [ "55c693b35657937d964e9a348299f61a97f9e41b" ], "journalName": "", "journalPages": "85-94", "journalVolume": "", "outCitations": [ "c18c30b9b1090e752031d23d219c1007b9954229", "c2fd72cb2a77941e655b5d949d0d59b01e173c3b", "33efd3ecffca21efaf9d1469b7dc3d2a72a0a05e", "cece7c3047859db7cae3474b665ddbf39ab0073f", "1510cf4b8abea80b9f352325ca4c132887de21a0", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "47a87c2cbdd928bb081974d308b3d9cf678d257e", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "27211ed68a7a00f1df0121fa1890a1b2acdd1a88", "72728023c99d35fa884062841fd86661d296758b", "64bff6909b0f40a3243a2b4be483f903b906d795", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "37cf46e45777e67676f80c9110bed675a9840590", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "0b35eeb1ca1fcaa24ef456242bb90320afdd1cd2", "0834e74304b547c9354b6d7da6fa78ef47a48fa8" ], "paperAbstract": "This paper proposes an item concept embedding (ICE) framework to model item concepts via textual information. Specifically, in the proposed framework there are two stages: graph construction and embedding learning. In the first stage, we propose a generalized network construction method to build a network involving heterogeneous nodes and a mixture of both homogeneous and heterogeneous relations. The second stage leverages the concept of neighborhood proximity to learn the embeddings of both items and words. With the proposed carefully designed ICE networks, the resulting embedding facilitates both homogeneous and heterogeneous retrieval, including item-to-item and word-to-item retrieval. Moreover, as a distributed embedding approach, the proposed ICE approach not only generates related retrieval results but also delivers more diverse results than traditional keyword-matching-based approaches. As our experiments on two real-world datasets show, ICE encodes useful textual information and thus outperforms traditional methods in various item classification and retrieval tasks.", "pdfUrls": [ "https://cfda.citi.sinica.edu.tw/~cjwang/data/SIGIR2017.pdf", "http://doi.acm.org/10.1145/3077136.3080807" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e1991fc2cb48c4a3eb235e3c6106cf0c560ba7d", "sources": [ "DBLP" ], "title": "ICE: Item Concept Embedding via Textual Information", "venue": "SIGIR", "year": 2017 }, "5e36511b8cab586d69047adfb03971380c17d427": { "authors": [ { "ids": [ "37770113" ], "name": "Sadjad Fouladi" }, { "ids": [ "3031766" ], "name": "Riad S. Wahby" }, { "ids": [ "9751328" ], "name": "Brennan Shacklett" }, { "ids": [ "34713726" ], "name": "Karthikeyan Balasubramaniam" }, { "ids": [ "40068050" ], "name": "William Zeng" }, { "ids": [ "39807402" ], "name": "Rahul Bhalerao" }, { "ids": [ "39118448" ], "name": "Anirudh Sivaraman" }, { "ids": [ "1892184" ], "name": "George Porter" }, { "ids": [ "2203149" ], "name": "Keith Winstein" } ], "doi": "", "doiUrl": "", "entities": [ "4K resolution", "Computation", "Data compression", "Encoder", "Functional programming", "Inter-process communication", "Parallel computing", "Programming style", "Thread (computing)", "Video decoder", "Video processing" ], "id": "5e36511b8cab586d69047adfb03971380c17d427", "inCitations": [ "6c5631bb8bb0caa14704ef497d744ffa60675d0f", "193342874858249aed4796cee35a8bec1b70e236", "7a1fd1aebe0618d1ffb7a9819266b9c2039bba17", "86991eb6eed3e12f5b3985340416302a2208bceb", "0657ec025e7097d06a798a4eb2f9253fdaef3e68", "957e98a2084f6c2d22694aadd22f57070b5d7e23", "7e4bc8c54dc01bb8019455a119e3d3666b3162b8", "56df56dbab7e0b022a51e5a038c1119fcd36f137", "0c9bccd940a26e5074165b2ce082db9c4eac0fd8", "db5aa66ec7e20068d4c5d26f6002838f9a49d349", "4e4870cac092580f2e156fcbdabead7d4326b55a", "841dd77064cd38a749c550f85ee1336733eee300" ], "journalName": "", "journalPages": "363-376", "journalVolume": "", "outCitations": [ "be972aeaff3e5eb1de64c02bef0465108f983402", "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "a69bed21143feab0849f6f5aa88e011dbb24abad", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0d62530196b73076bcc1eb8bbd294aa680fc0e2b", "332f77fd05703c1607e3b57884ad31fb1fad0104", "56df56dbab7e0b022a51e5a038c1119fcd36f137", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "0558c94a094158ecd64f0d5014d3d9668054fb97" ], "paperAbstract": "We describe ExCamera, a system that can edit, transform, and encode a video, including 4K and VR material, with low latency. The system makes two major contributions. First, we designed a framework to run general-purpose parallel computations on a commercial \u201ccloud function\u201d service. The system starts up thousands of threads in seconds and manages inter-thread communication. Second, we implemented a video encoder intended for fine-grained parallelism, using a functional-programming style that allows computation to be split into thousands of tiny tasks without harming compression efficiency. Our design reflects a key insight: the work of video encoding can be divided into fast and slow parts, with the \u201cslow\u201d work done in parallel, and only \u201cfast\u201d work done serially.", "pdfUrls": [ "http://www.cs.utexas.edu/~swadhin/reading_group/slides/exCamera.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-fouladi.pdf", "http://www.cs.cornell.edu/courses/cs6453/2017sp/slides/tinythreads.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/fouladi", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_fouladi.pdf", "http://cseweb.ucsd.edu/~gmporter/papers/nsdi17-excamera.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_fouladi.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-fouladi.pdf", "http://platformlab.stanford.edu/Seminar%20Talks/Sadjad_Fouladi.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e665/4c70c990b64eb951771ece0a10c8bd001c56.pdf", "s2Url": "https://semanticscholar.org/paper/5e36511b8cab586d69047adfb03971380c17d427", "sources": [ "DBLP" ], "title": "Encoding, Fast and Slow: Low-Latency Video Processing Using Thousands of Tiny Threads", "venue": "NSDI", "year": 2017 }, "5e6453019f2682df2e1ad7a07ffcc87517c4d7af": { "authors": [ { "ids": [ "30901893" ], "name": "Po-An Tsai" }, { "ids": [ "2576892" ], "name": "Nathan Beckmann" }, { "ids": [ "39783437" ], "name": "Daniel S\u00e1nchez" } ], "doi": "10.1109/PACT.2017.42", "doiUrl": "https://doi.org/10.1109/PACT.2017.42", "entities": [ "Adaptive filter", "Complex adaptive system", "Limiter", "Nexus S", "Read-only memory", "Self-replication", "Server (computing)" ], "id": "5e6453019f2682df2e1ad7a07ffcc87517c4d7af", "inCitations": [ "6848f36de4d236cbe363a8eda86a039baa4eb50b" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "166-179", "journalVolume": "", "outCitations": [ "40ef6e2d7046e7c8de60b4e82711ecfc97a0de8a", "3d6e0b3d54b7f675ac12817f7cdd2da9c2134482", "2cc40e5ca495af9a8e3aea5f357b59cc680c472e", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "3bf23f74bf33ed52f7c28587fab315610b27221a", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "6885cd324017c499d44fa3c94fca23c3104e0aed", "429dd28f609d97a883174d3a5a2db3cc936fb062", "6acd75781396e5dedcf2f06a7131ba7f3153bfb5", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "1a36150af44446fe9664005c447a0f0e04667065", "0595bd8014c76b4d2b23e41154f193ec3ba64cc0", "ba0043bd748bb26a1ba2a8297df6320d8df7489d", "5119e4b11132d48d5fa4a5ddaf2ca1a0389b9b0f", "0f8ae60e5e1bcf549017f10883428ef957148b00", "7adc7940bc250bab39c7823fbb1ef3f86fe0625a", "43bf4c7af676fcedc629c2563eb1e9708bbe0db2", "29f766723ca752138855500084ced04503bfc9c8", "1c110c1a5fdd87a99381b796c35f6d301244b4ae", "1072d1f9eab9f69fab598f9d47ad323473b45ce3", "35c348a3663de6387a45dc58b2c85092d247818a", "16ba3e5c5e0084fef0fa4705d639f2ad164f2dbe", "12fcade43d5d01977f712193fe242322dc57ba0e", "ca57798e927b6ac4e77dc3be0522c53c31fdd6aa", "a042f95a307d4f72d2aac95ac5d5e9dbfa24db79", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "88412b002ee39eb121d93c0a2c11ddbb658e9d6b", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "cd8477fa225b6758f8e73ad1252d7556b11cbffe", "78ef173529cae4a19c7718e88cc4107cff99ff0f", "274e7e576534b3e091f09e801cce807f5fd221c1", "5507d741031a1ce2ddc0d6fec9d497192f037eda", "28552ecf4eaedb3461edca97304b29082b02fbab", "3bec21f0f1954d31642537c02b33e280d7e12029", "8bf5afa21a0bd74551b261a7399eac4ffe2494e5", "b116227c9782509c1d5a667da3632deb4356727a", "081dec43c2dbe76ff43c810594495f11ab092a10", "53691325ee4a42e0b0cf3e9d463a0bc71f447c99", "2253d6559ae9793b5cfa6e409d1d9de50dafa29a", "bc4fbbaaf1ede8d8aff16a10243226419fc32cf8", "047b0a5b50f996f3d9bcb51aab7f3176d12a69c3", "8c97a156746cf0f9e8db4dde4e76169d589da449", "352a8957005dc5519b15ed1870751ec494d66395", "165528cdf9c76edd98729c142faf50fbd6cfc69e" ], "paperAbstract": "Last-level caches are increasingly distributed, consisting of many small banks. To perform well, most accesses must be served by banks near requesting cores. An attractive approach is to replicate read-only data so that a copy is available nearby. But replication introduces a delicate tradeoff between capacity and latency: too little replication forces cores to access faraway banks, while too much replication wastes cache space and causes excessive off-chip misses. Workloads vary widely in their desired amount of replication, demanding an adaptive approach. Prior adaptive replication techniques only replicate data in each tile's local bank, so they focus on selecting which data to replicate. Unfortunately, data that is not replicated still incurs a full network traversal, limiting the performance of these techniques.We argue that a better strategy is to let cores share replicas and that adaptive schemes should focus on selecting how much to replicate (i.e., how many replicas to have across the chip). This idea fully exploits the latency-capacity tradeoff, achieving qualitatively higher performance than prior adaptive replication techniques. It can be applied to many prior cache organizations, and we demonstrate it on two: Nexus-R extends R-NUCA, and Nexus-J extends Jigsaw. We evaluate Nexus on HPC and server workloads running on a 144-core chip, where it outperforms prior adaptive replication schemes and improves performance by up to 90% and by 23% on average across all workloads sensitive to replication.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.42", "http://people.csail.mit.edu/sanchez/papers/2017.nexus.pact.pdf", "http://people.csail.mit.edu/poantsai/talks/2017.nexus.pact.slides.pdf", "http://people.csail.mit.edu/poantsai/papers/2017.nexus.pact.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e6453019f2682df2e1ad7a07ffcc87517c4d7af", "sources": [ "DBLP" ], "title": "Nexus: A New Approach to Replication in Distributed Shared Caches", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "5e6b560543706ca4c2fc89c5cc7cb6128c77b3d7": { "authors": [ { "ids": [ "32265711" ], "name": "Fan Jing Meng" }, { "ids": [ "2358097" ], "name": "Xiao Zhang" }, { "ids": [ "34650131" ], "name": "Pengfei Chen" }, { "ids": [ "40011770" ], "name": "Jing Min Xu" } ], "doi": "10.1109/CLOUD.2017.37", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.37", "entities": [ "Algorithm", "Cloud computing", "FUJITSU Cloud IaaS Trusted Public S5", "Google Cloud Platform", "Machine learning", "On the fly", "Platform as a service", "Real-time computing", "Service-level agreement" ], "id": "5e6b560543706ca4c2fc89c5cc7cb6128c77b3d7", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "230-237", "journalVolume": "", "outCitations": [ "b91e0aa8cb14d6a9b635ecd4eb75d7c9f1859027", "893e818f5cc7a160befd613131717fd2fd0f2ef8", "4ad4e58fece536a478fe3e6fa62eacb9345b7102", "10da8673314188dd6ab1f16f73c05358771dd8cf", "b83e1b29469b3613081c435284ddabfc9dc91116", "b15171c1e117d4f9209b1ad63d25bbfbc4f1362b", "5fac45d1333efb6438d43fef3cf776855140f013", "8efec73eec4b2b651b162c66db1331dda43a07d2", "3073e8f122ed3b984c783581e8690aa70ee0bae5", "03d88407c702b6dffaae48b3d55ee716bcaffb8d", "8681fa540b786a1858a3d429c0fbcaf3aeeb52ee", "3017bb41f18096e34eea94329834f6f8b9372be8", "13a375a84a6c414b85477a401541d3e28db1e11a", "12d4c92f0a3a70538ed609bf6f7b603e44d11abd", "05322b759e48cf98c6810759b947ba9bf71ec4ca" ], "paperAbstract": "Detecting anomalous behaviors of cloud platforms is one of critical tasks for cloud providers. Every anomalous behavior potentially causes incidents, especially some unaware and/or unknown issues, which severely harm their SLA (Service Level Agreement). Existing solutions generally monitor cloud platform at different layers and then detect anomalies based on rules or learning algorithms on monitoring metrics. However, complexity of nowadays cloud platforms, high dynamics of cloud workloads and thousands of various types of metrics make anomalous behavior detection more challenging to be applied in production, especially in large scale cloud production environments. In this paper, we present a practical cloud anomalous behavior detection system called DriftInsight. It firstly analyzes multi-denominational metrics of each component and identifies a set of representative steady components based on the convergences of their states. Then it generates a state model and a state transit model for each steady cloud component. Finally, it detects behavior anomalies of these steady components in near real-time and meanwhile evolve behavior models on the fly. The evaluation results of this approach in a commercial large-scale PaaS (Platform-as-a-Service) cloud are demonstrated its capability and efficiency.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e6b560543706ca4c2fc89c5cc7cb6128c77b3d7", "sources": [ "DBLP" ], "title": "DriftInsight: Detecting Anomalous Behaviors in Large-Scale Cloud Platform", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69": { "authors": [ { "ids": [ "1777029" ], "name": "Vladimir Kolesnikov" }, { "ids": [ "27040395" ], "name": "Naor Matania" }, { "ids": [ "1689531" ], "name": "Benny Pinkas" }, { "ids": [ "2524585" ], "name": "Mike Rosulek" }, { "ids": [ "3474994" ], "name": "Ni Trieu" } ], "doi": "10.1145/3133956.3134065", "doiUrl": "https://doi.org/10.1145/3133956.3134065", "entities": [ "Analysis of algorithms", "Program optimization", "Programming paradigm", "Pseudorandom function family", "Public-key cryptography", "Symmetric-key algorithm" ], "id": "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69", "inCitations": [ "2e93c09ccc4048b1bce1061cb787e152368e5ef7", "fe94997ec905da08756aa1bb80203c0f1e77d538" ], "journalName": "", "journalPages": "1257-1272", "journalVolume": "", "outCitations": [ "52d629ab0111f480be2480ce7e670a5eb8850993", "0f389571396db65e8341d83b40ae68aa28dd5161", "72254a4ccde7776abc07c406bacaae308783414e", "5e29280c2715c63d0c69b7864408165bf30d5439", "ccf71f3ba847af3da05bfe5937109af284ec4d45", "038cde8a5a432261be70d6d8de6879cb268ac1bf", "497bc6f8cdd2cb16c52d02208b4f3c82c6e72e61", "eab6190d247cd1ea3cbecc900aceadafd77efd83", "45104cef3fc97f6c92f3fdbba3629ac3b590aae9", "490b2ab76335de294498bff727c0a25314317c63", "4a29478fb87fea6beb9e95e7aa631d2ca0bdd4b2", "14720266a35ced804438cdf06bc8d151e7e9903c", "e605b71afc410051e6bd62f4e140ead0361b699f", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "080c40c5510d1d34c2e02edb6fb72826b2f82df3", "1890cecdbba895fbcf975c4aef1616e184e69abb", "1e48fc5e033897f411a11e62adba0495bdab1a0e", "3fab56c42fe6efc0b4febd21596ef89188f5d21a", "5372d6211fe20dea2681816bc6087a5ff8adb3e4", "6d1ca1108d9d96e5607571502552ad04464d7f15", "4ca1281563ddf5c4ec71835ac978811f74fd7fbc", "fe38ee9944077cd14a0f6f1813af2d3d4b59ce43", "0bb51d5e3a2e779d7515ee553bccd326bfc43912", "b03f5db2321aa18deefa44cb980799ca689ff740", "e13f0841ab58e852838f013de8bbe6548da86677", "399f5140a149a58278b364c8ab7b6a3f4745617d", "842eb3de44e0538769f1509d1b8d35161fb212bb", "13cbbcc16b4787d8df1a3fd0c2c2af258e664ca1", "9f17818d52ddbb13998097a87964a14731b8849d", "05c721f47d9a53a5739c88a14cb36baf12d2b0fa", "9f88683188af83315bc133612003672140e0e05a", "5adc94602d07e49cc1e94e2aa2b1bdf3481a47f8", "03c1711090d76cc9163e238686786a71c028377e", "594065ed6261717f4dfc76ba6de4d8f78ff1c2a9", "3519b24de42eeb1b57708e0b53199e465a00210b", "2f9ff0d7ae59304bc9c6088a5664abc85e5f45bc", "46527c14457cf84d1cf26487d6b4c31f4825db71", "a9d07270be6e48448ef17b348f3455d76ea1d68f", "d8c1b48ae4d6e4676d060c06087bb6b1ac81a005", "16d23baa55835434808a3420e0884e0dc44680f6", "b4e05b279990243d09e56a2833a1764e00d5f295", "27a92bd3d6875fff1e3fbb8d18682aa43cdd8ec3", "0303c8686bdab1a3f59bccfa87d2b433faa35d80", "e11448685d4cdc9470bfa615fadd1ebb81fba70a", "1554b04035dff3d4c6db8dc3c392e45366db4fdc", "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "5d4fb00b737479edeb381c88e85d6b8fc34db98d" ], "paperAbstract": "We present a new paradigm for multi-party private set intersection (PSI) that allows $n$ parties to compute the intersection of their datasets without revealing any additional information. We explore a variety of instantiations of this paradigm. Our protocols avoid computationally expensive public-key operations and are secure in the presence of any number of semi-honest participants (i.e., without an honest majority).\n We demonstrate the practicality of our protocols with an implementation. To the best of our knowledge, this is the first implementation of a multi-party PSI protocol. For 5 parties with data-sets of 220 items each, our protocol requires only 72 seconds. In an optimization achieving a slightly weaker variant of security (augmented semi-honest model), the same task requires only 22 seconds.\n The technical core of our protocol is oblivious evaluation of a programmable pseudorandom function (OPPRF), which we instantiate in three different ways. We believe our new OPPRF abstraction and constructions may be of independent interest.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134065", "http://eprint.iacr.org/2017/799", "https://eprint.iacr.org/2017/799.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e75d9d75536f8b126c47ae4ce91b47d51c7cc69", "sources": [ "DBLP" ], "title": "Practical Multi-party Private Set Intersection from Symmetric-Key Techniques", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "5e870d5430c1287081f063fb510d7b4256b72bf4": { "authors": [ { "ids": [ "3199368" ], "name": "Yuanfeng Peng" }, { "ids": [ "40475627" ], "name": "Benjamin P. Wood" }, { "ids": [ "1739688" ], "name": "Joseph Devietti" } ], "doi": "10.1145/3123939.3123946", "doiUrl": "https://doi.org/10.1145/3123939.3123946", "entities": [ "Arbitrary code execution", "Benchmark (computing)", "Concurrency (computer science)", "Consistency model", "Correctness (computer science)", "Overhead (computing)", "Parsec (parser)", "Race condition", "Software bug", "Strong consistency", "Thread (computing)", "Type safety" ], "id": "5e870d5430c1287081f063fb510d7b4256b72bf4", "inCitations": [ "33445fdc18f34429820342a9290dfdb687158736" ], "journalName": "", "journalPages": "490-502", "journalVolume": "", "outCitations": [ "3ca0b8fe78bb0af6acf52c522983c77424a66c96", "4979b94ae5ca344ac4a7c30e86a4ff10e5ca13b0", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "4e624272a61a228bcf9565b0e48e86ae3936db80", "79c163d6aa3f1a14e64d4288995b0ae76d5e6b4c", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "711b89b078ceb7722406c719a6ac1316ade61daf", "65c10f69cefd57c4a94dfb77d630c53e0f763f90", "4136c581d46e0dd2a2b60852e00a826eae4dc969", "012f8e43e7973c8fad3c9a48b4dd7be773c770d1", "430f66819f758f6a84aaac4b5f516f9ee4861482", "7c73b0c0e8a822401077f373d8d1ac5a8eb38507", "0a44e8cd34a110ec4ed7221b0431694172eadda8", "0958a63d9c6238b38377f076b487c413bc8642c1", "1464629646aaf8662b725216ce8f6cb1443e6dc5", "1099d738d6650ca65db0f3dc8de3206d3d90d666", "9d40fcd8a113d9910dc828aa4c68e1a6adf070f0", "dd916d401b90d848dd0c1a99d78c034e3c8bb448", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "00a65f74facc3da8b8f352c2f0f5f385b758cc0b", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "024ecd71116a7438b3eba7a97de9f428d1933ccd", "8a0af8ae748210ef571d074362b552af571e6d33", "1bf9b9e76c674b4e308bed9281a9195617c88cb8", "05a618847e4f08e5bca29dff732757779722b2e0", "dad0df622554ad578af806cc7b57ff5e210460c0", "3bf23f74bf33ed52f7c28587fab315610b27221a", "44a851e09e72741944ea01f855e5dac3ebbc4568", "b22122f79ec2812fd6b32308b4acff500de0e5dd", "2dce9b157f4ea25be0b5b2f6957b958f7c504b41", "0a92088c1cf7463ed5d347d2624976e0126ffced", "0653e2ed9f683868cb4539eb8718551242834f6b", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "86ed165adcfd254b511ff1bbb912cad65d45f0d6", "54277b022c16839ee33419744fec9919cb32eb79", "34a97a016e6c419eb4b1005a7306d45a775a407b", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "1d01e37e3f5cbc29f0123ebc8282bd371d7f42db", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "67c64f4e676e1996cca7fd0ec50e453d6c698814", "02140b856a0a946e64645aa232d8e244e5a683fb", "968f8a1d37e7ae479c2534a29d0d9d9225134605", "1ef3d10196d91aed5939009846bd7ab3a5e3f8e8", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "98493b6182dfc19be77f7532bd26fec2ae3d9545", "08cd9a0124faf900e5c155fa853efbc20600784b", "3d5126c5d4f670c1096a669508befe4ec43ac5f7", "dba2b12662b3395a6770c087dfd39977b6d017a2", "0f43560edc881142224f07e4c0f7776e1ccfd9db", "fbfa625cfe2d08529baec42db7d3324d2ad38874", "5f3f15ac33c9197cd444997e374bda879d1804dd", "0881378cc281fe6d8451eedbbb73e9e157d7bf38", "7489f52108431f231c945aceaa30a211b61e74ed", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "8b28b02af1ba77fff5b08d6dea87ba8b043b479d", "9e562fa998619a05b5f2b43a884b87fab680b762", "0c28172b2f30ac9d41f2cb4b470f926771ac7fa8" ], "paperAbstract": "Data race detection is a useful dynamic analysis for multithreaded programs that is a key building block in record-and-replay, enforcing strong consistency models, and detecting concurrency bugs. Existing software race detectors are precise but slow, and hardware support for precise data race detection relies on assumptions like type safety that many programs violate in practice.\n We propose Parsnip, a fully precise hardware-supported data race detector. Parsnip exploits new insights into the redundancy of race detection metadata to reduce storage overheads. Parsnip also adopts new race detection metadata encodings that accelerate the common case while preserving soundness and completeness. When bounded hardware resources are exhausted, Parsnip falls back to a software race detector to preserve correctness. Parsnip does not assume that target programs are type safe, and is thus suitable for race detection on arbitrary code.\n Our evaluation of Parsnip on several PARSEC benchmarks shows that performance overheads range from negligible to 2.6x, with an average overhead of just 1.5x. Moreover, Parsnip outperforms the state-of-the-art Radish hardware race detector by 4.6x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123946", "https://repository.wellesley.edu/cgi/viewcontent.cgi?article=1168&context=scholarship", "https://cs.wellesley.edu/~bpw/research/files/parsnip-micro2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e870d5430c1287081f063fb510d7b4256b72bf4", "sources": [ "DBLP" ], "title": "PARSNIP: performant architecture for race safety with no impact on precision", "venue": "MICRO", "year": 2017 }, "5e8a055053c4a5307e930770cb084d79efe8a9f1": { "authors": [ { "ids": [ "1883317" ], "name": "Shilin Zhu" }, { "ids": [ "1690083" ], "name": "Chi Zhang" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" } ], "doi": "10.1145/3117811.3117820", "doiUrl": "https://doi.org/10.1145/3117811.3117820", "entities": [ "Camera phone", "Experiment", "Fault tolerance", "Image processing", "Interference (communication)", "Waveform" ], "id": "5e8a055053c4a5307e930770cb084d79efe8a9f1", "inCitations": [], "journalName": "", "journalPages": "329-342", "journalVolume": "", "outCitations": [ "1bd00de050cb6ff81e20f91f91fa13767a713247", "54eb39514cc760c3a3479345bf515517ddf0e86f", "8a0a640dee6b2ac4cb92328508d6c4dab4c88591", "d3fafd1011eac2827406f233be709b3f670149fe", "cb8ca0655b0ab1ef3f05d80e7260b9c361dc126a", "cc53a721320b202503050afe38623643f0784f99", "d9fd953e2c9d69dcde2a038250a1cfc0b08a102a", "a0c82bf3a567e4aa88a452fbf734c4072d39c41c", "df4ed13914a8131d7d6b64034c1f2fe24a77ca2d", "36707cbe12d0e060cfc5971a3bfc132f6ae8fde8", "bf4937c0e04f674ec3e33f212ffd51353489a9e0", "4e5066f15bbfa36a73e00717ac42cb943a9c05d9", "081b13b853b785dbcb45e89248b1e94009171465", "58dd2533ea9e675a2958d6c303d75e13a9f94de8", "13eb5c34d9c4c2374b982897a3d762c7d58fa3aa", "6ad2df5dafb37e92f22863b680357ed69598ff15", "763cd05a4152fc88ba15d5cccf3768c94df36670", "6b3abd1a6bf9c9564147cfda946c447955d01804", "6572ffedfd1a7ddb53b1d6f1ba8dfeb1c3ca182a", "214eeae2838d82bd8d40d39fbedb74a290567484", "e0492ec0a285400a0ab578b552f99ee1193b1eb7", "2c7aaa61b15ea65b68055125e8bbd0189c464eb8", "b1740626a8f16830c74aa49730f688ed5b82a885", "22edcc20303d13b6641d3b27c5ef8b463e85a4f2", "c57a223aba6d88da95d63c9a93595a7c73256a3d", "5a735552c960badd66ba187fd392ddbb8449cf40", "0400f3a22a8bda6c7698fed1a1fe3c07da291f15", "6189792ae830a39f99b5390ad2d2af7ac571d31d", "119a0f1e64096a3b1b64d867d8f4a21ce79ed26a", "1f911ae809066d4a55598bce939a466de980b13b", "1e37384874c84acc7919176d4e9598e9116da2ee", "2c05fbcb76fd8bc929689159f42cb5b38f73b0bb", "2d27e2d8188743c4e3ca30fda5c25e70775f03e8", "17bc537e71b7edc1ca3248e1a75f0c8b59250d98", "11d3e3d021725b4713cc2117aa849a9a728980e2", "6ef6e6e8b295c90ab9390f07d91c9ef8304a409d", "a1bd227c468843a269f00f33708c6cc453b75136", "1a1837076108104ec4b5422466eba56b8e4fdc29", "315b05f00115424d97c94b916e4287c41a6b2629", "0a1ffbdf850bf85fcae4bfe7ad51b7860d3836eb", "3a5a0eebf7d6ec8bfb8e1d91c3b93e584aba280b", "f62af111e7db946d281de2ac4c2fac61dce9ed8e", "e5532c6958e3305adb328212fd2636968e6c966c", "9cf6ec0b2ac1131511c4bb5d02d50d30b9473f89", "b5d8b259052ffecd1fcf3eae9b08e31b41c24ec0", "d418761850285e0be834de930ab627aee2d32139", "08121f5a9d2d7edd1e9e933f6c6536bfd9f3768b", "35867081685ff40ea0b245d315b2d54e42235b69", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "5bc6ebbdb387cd4579df3cb850129dbcbdb24418", "5b4e73b5a2b13575d141d3d8ac06708bdc72ca82", "24f988b2e2e552c819efed8cdc9c8094bc1f66e0", "10ab1b48b2a55ec9e2920a5397febd84906a7769", "15d960a4427ef417a399c5f4c9a336aab668da09", "3944192cc9e019a730d4e456427712484124a959", "2d8a84a8e661ce3913cb6c05b18984b14ed11dac" ], "paperAbstract": "The ubiquity of mobile camera devices has been triggering an outcry of privacy concerns, whereas privacy protection still relies on the cooperation of the photographer or camera hardware, which can hardly be guaranteed in practice. In this paper, we introduce LiShield, which automatically protects a physical scene against photographing, by illuminating it with smart LEDs flickering in specialized waveforms. We use a model-driven approach to optimize the waveform, so as to ensure protection against the (uncontrollable) cameras and potential image-processing based attacks. We have also designed mechanisms to unblock authorized cameras and enable graceful degradation under strong ambient light interference. Our prototype implementation and experiments show that LiShield can effectively destroy unauthorized capturing while maintaining robustness against potential attacks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117820", "http://xyzhang.ucsd.edu/papers/SZhu_CZhang_MobiCom17_LiShield.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5e8a055053c4a5307e930770cb084d79efe8a9f1", "sources": [ "DBLP" ], "title": "Automating Visual Privacy Protection Using a Smart LED", "venue": "MobiCom", "year": 2017 }, "5ea42c7adfdd94993c0eb6e032dd845d1f5a07fe": { "authors": [ { "ids": [ "40263683" ], "name": "Maxime Schmitt" }, { "ids": [ "2326915" ], "name": "Philippe Helluy" }, { "ids": [ "1769333" ], "name": "C\u00e9dric Bastoul" } ], "doi": "10.1109/HiPC.2017.00028", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00028", "entities": [ "Cobham's thesis", "Compiler", "Computation", "Domain-specific language", "High- and low-level", "Parallel computing", "Polyhedron", "Principle of good enough", "Program optimization", "Programmer", "Run time (program lifecycle phase)", "Self-tuning", "Signal processing", "Simulation", "Speedup", "Time complexity" ], "id": "5ea42c7adfdd94993c0eb6e032dd845d1f5a07fe", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "172-181", "journalVolume": "", "outCitations": [ "38adc6ce214ad89ad6a0c47b489608a0fbeedaaf", "15a8e18115e4049de6825fc6eaf790d35b7c2002", "817552da0f513226ee4befa3ab4ef0ed1c94bb19", "219b47356dcd3c02a04837be9be7ae072153a9d1", "138f87cb78d8755c10fb3e024597bc6e5bfc192c", "7073828e40fb4d6757063e65c3b56504f69cdaaa", "5e41884304203908012c4f7b8cae8d0a0c8d8822", "04afd9817df2900a02f915203e01657b56a957cd", "52e99334096f02c9cf386c9391fc68181c058f4c", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "1c5b15587e4034c97610b2017697ad1ea663a8fa", "237a086708ccae0686c7d1995e0a7017650c5740", "5a830ad18ff1a45c197570065b65d212818eaef6", "65996dc3bed6c4d3732db8ba5a327f159b9ab95a", "88a32f0546fccb673225fd2fcc4d9918e7a42298", "82ce7e0db4aaf8c56a0c4c9b7ca5a6985b17601b", "4c725527b426b28f9024a0092a6a09f180b25628", "b0f6f9c81eed44638be11a8277b3400a3d9f1e95", "5127ac7b58e36ffd13ca4437fc123c6a018dc436", "518d38f7ae0e734d0674a61427ccfb5bcbbc88b7", "97cdfd0ee4e163e0f4db1a6964558480ba2f6727", "0e95e0ff4014053ac11fcbcee556eaab4dc1a92d", "664910a74a59dd8055c0d12a270281f78fd7145a", "58ef44a34ef69b880964b0e3527374e8fa8b10c4" ], "paperAbstract": "Compiler high-level automatic optimization and parallelization techniques are well suited for some classes of simulation or signal processing applications, however they usually don't take into account domain-specific knowledge nor the possibility to change or to remove some computations to achieve \"good enough\" results. Differently, production simulation and signal processing codes have adaptive capabilities: they are designed to compute precise results only where it matters if the complete problem is not tractable or if computation time must be short. In this paper, we present a new way to provide adaptive capabilities to compute-intensive codes automatically. It relies on domain-specific knowledge provided through special pragmas by the programmer in the input code and on polyhedral compilation techniques to continuously regenerate at runtime a code that performs heavy computations only where it matters. We present experimental results on several applications where our strategy enables significant computation savings and speedup while maintaining a good precision, with a minimal effort from the programmer.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00028" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5ea42c7adfdd94993c0eb6e032dd845d1f5a07fe", "sources": [ "DBLP" ], "title": "Adaptive Code Refinement: A Compiler Technique and Extensions to Generate Self-Tuning Applications", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "5eb2d2ad43056ac8d4d927196d091c1d01014646": { "authors": [ { "ids": [ "3003526" ], "name": "Yves Vanaubel" }, { "ids": [ "1729257" ], "name": "Pascal M\u00e9rindol" }, { "ids": [ "2093701" ], "name": "Jean-Jacques Pansiot" }, { "ids": [ "1896080" ], "name": "Benoit Donnet" } ], "doi": "10.1145/3131365.3131378", "doiUrl": "https://doi.org/10.1145/3131365.3131378", "entities": [ "Cross-validation (statistics)", "Emulator", "Entry point", "Graph property", "Internet topology", "Layer 2 MPLS VPN", "MPLS-TP", "Multiprotocol Label Switching", "Router (computing)", "Traceroute" ], "id": "5eb2d2ad43056ac8d4d927196d091c1d01014646", "inCitations": [], "journalName": "", "journalPages": "29-42", "journalVolume": "", "outCitations": [ "719d045e5d06571c40d6ab699cb673a5223bb6a7", "6d6030e70859795569baceaba58abb17ec62cca8", "5d4445bfa05fc47466dbbc950717a181613e87a0", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "4e047ab7e9b2a5a69462e875a1c485c05f024c54", "653a0c33ffea192c988ceb2ba69c97baac8033c0", "32877824e74f8e28bbbd1b78c03c4341f3e20925", "09fdc1e451e5960d9de2c0935f3c372b496a1e4c", "4d8fabfe18cddf32767f54d6cc254ebe787cb2d3", "400b2328e1a6f2f4b6743e0448c694acadf7d2d4", "1ba74ad9e90a6f7c74d1a606a457715ab12b80ae", "9f18933d64b31924f6c67d17ae3286a6598af9e4", "2d1f882d3e8118f56e873239190cf3fdf9963f47", "3cd2f248e9c03bde2c2203a7e0cffdb8754926cb", "3a31f220f764d0761fd1ae29ed74296a93a6c417", "2180f8dbf870984d2692128587bab8c72e3545c5", "0d06de003e8ca949b3b39f9a51750c050addb997", "dc07b3ce1e95e261cf8ccb7eadbd5367b580e538", "8e82a69d59c931d27bf1bc57db4480110d3b08b8", "bfc365c5fd5eb3d2358d8ce122c287bb515cb589", "4b9dc359b651398f24d4f98204a5fc3f57af40b1", "039eab95462b2e8b5f3fc8a8a1056b401fa9f03d", "cab3381da304f259fe4de9c4fb72256fc35e0770", "890dd7919b186c4c7ea6be96821da1ab61db4dcb", "0ab4fbcaa0d42f0e672799ae47a2101b713b9b9d", "70969bbc88f608b7c8b43da3ecdfcb021dd6a9cc", "3f5357db483ebda12f89d366a8012b0b727e2108", "053ca5edfe0992ba80a2e0fb80e0bc99a522d67b", "0b85bb5a37cb3f77bfe10a6d6864c015e07b0154", "a5aad5abb32f6b15f31b92312bb3b0f7b6470977" ], "paperAbstract": "For years, Internet topology research has been conducted through active measurement. For instance, Caida builds router level topologies on top of IP level traces obtained with traceroute. The resulting graphs contain a significant amount of nodes with a very large degree, often exceeding the actual number of interfaces of a router. Although this property may result from inaccurate alias resolution, we believe that opaque MPLS clouds made of invisible tunnels are the main cause. Using Layer-2 technologies such as MPLS, routers can be configured to hide internal IP hops from traceroute. Consequently, an entry point of an MPLS network appears as the neighbor of all exit points and the whole Layer-3 network turns into a dense mesh of high degree nodes.\n This paper tackles three problems: the revelation of IP hops hidden by MPLS tunnels, the MPLS deployment underestimation, and the overestimation of high degree nodes. We develop new measurement techniques able to reveal the presence and content of invisible MPLS tunnels. We assess them through emulation and cross-validation and perform a large-scale measurement campaign targeting suspicious networks on which we apply statistical analysis. Finally, based on our dataset, we look at basic graph properties impacted by invisible tunnels.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131378", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final75.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5eb2d2ad43056ac8d4d927196d091c1d01014646", "sources": [ "DBLP" ], "title": "Through the wormhole: tracking invisible MPLS tunnels", "venue": "IMC", "year": 2017 }, "5f014e98da91629dabfede749a07adb522289c25": { "authors": [ { "ids": [ "1727379" ], "name": "Disa Mhembere" }, { "ids": [ "2952749" ], "name": "Da Zheng" }, { "ids": [ "1693972" ], "name": "Carey E. Priebe" }, { "ids": [ "1717958" ], "name": "Joshua T. Vogelstein" }, { "ids": [ "1726784" ], "name": "Randal C. Burns" } ], "doi": "10.1145/3078597.3078607", "doiUrl": "https://doi.org/10.1145/3078597.3078607", "entities": [ "Algorithm", "Apache Spark", "Cloud computing", "Computation", "Distributed computing", "Distributed memory", "In-memory database", "K-means clustering", "Machine learning", "Parallel computing", "Scalability", "Social inequality", "Synchronization (computer science)", "Turi" ], "id": "5f014e98da91629dabfede749a07adb522289c25", "inCitations": [], "journalName": "", "journalPages": "67-78", "journalVolume": "", "outCitations": [ "1a5e09f5e9399629f6a3dcfe79d3d70e8e2c5be0", "01b60f2ce6826321296d7ac1ae8f894946d8115c", "76e0cea30381c434d4bbe161b7c58e7231316dc6", "8d651f79df61f6956b029fac7c69d56dbc051250", "3dff11679346f5344af1018cad57fa14cc349f2f", "23db2d9c5a97f36f1b63ea249402b4be0919ebc9", "0558c94a094158ecd64f0d5014d3d9668054fb97", "3784b73a1f392160523400ec0309191c0a96d86f", "0541d5338adc48276b3b8cd3a141d799e2d40150", "400c8f6d1bc0284b887f3f6412e07f9be70650f8", "a62f5a1953764af0021b992b82e7fbdee9def34d", "fd109bac25e8a43c3a0c0730927c5442eba991dd", "71fbbc1675780f2f945073f9d92c09b8d76f80f0", "9b8d8f2fb88e03f8f3ad01efbfef52718b70d104", "6f5c1f3c7015c0e15b28c8a2d2b8178be287fa75", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "1ee011c73e292fb25e682e79b4219138dc853b70", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "31af4b8793e93fd35e89569ccd663ae8777f0072", "68b417e1ab61277f0d600244db0acefe5c5ed74f" ], "paperAbstract": "k-means is one of the most influential and utilized machine learning algorithms. Its computation limits the performance and scalability of many statistical analysis and machine learning tasks. We rethink and optimize k-means in terms of modern NUMA architectures to develop a novel parallelization scheme that delays and minimizes synchronization barriers. The k-means NUMA Optimized Routine knor) library has (i) in-memory knori), (ii) distributed memory (knord), and (ii) semi-external memory (\\textsf{knors}) modules that radically improve the performance of k-means for varying memory and hardware budgets. knori boosts performance for single machine datasets by an order of magnitude or more. \\textsf{knors} improves the scalability of k-means on a memory budget using SSDs. knors scales to billions of points on a single machine, using a fraction of the resources that distributed in-memory systems require. knord retains knori's performance characteristics, while scaling in-memory through distributed computation in the cloud. knor modifies Elkan's triangle inequality pruning algorithm such that we utilize it on billion-point datasets without the significant memory overhead of the original algorithm. We demonstrate knor outperforms distributed commercial products like H2O, Turi (formerly Dato, GraphLab) and Spark's MLlib by more than an order of magnitude for datasets of 107 to 109 points.", "pdfUrls": [ "http://arxiv.org/pdf/1606.08905v1.pdf", "https://arxiv.org/pdf/1606.08905v2.pdf", "https://arxiv.org/pdf/1606.08905v1.pdf", "https://arxiv.org/pdf/1606.08905v4.pdf", "https://arxiv.org/pdf/1606.08905v5.pdf", "http://doi.acm.org/10.1145/3078597.3078607", "https://arxiv.org/pdf/1606.08905v6.pdf", "https://arxiv.org/pdf/1606.08905v3.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5f014e98da91629dabfede749a07adb522289c25", "sources": [ "DBLP" ], "title": "knor: A NUMA-Optimized In-Memory, Distributed and Semi-External-Memory k-means Library", "venue": "HPDC", "year": 2017 }, "5f05544e68687b1bea8a15d08c7aff91bd9401c1": { "authors": [ { "ids": [ "1848040" ], "name": "Peter Bailey" }, { "ids": [ "1751760" ], "name": "Alistair Moffat" }, { "ids": [ "1732541" ], "name": "Falk Scholer" }, { "ids": [ "1678526" ], "name": "Paul Thomas" } ], "doi": "10.1145/3077136.3080839", "doiUrl": "https://doi.org/10.1145/3077136.3080839", "entities": [ "Algorithm", "Information needs", "Relevance", "Role-based collaboration", "Web search engine" ], "id": "5f05544e68687b1bea8a15d08c7aff91bd9401c1", "inCitations": [ "08f9750b1b53cfd7769fe2735d171295bac18796", "36d9abbbc4735a9dbd75f57f8f5602bd80d6b0b1", "3821bac3ae629cf271a119cda269f508b7f08ad5" ], "journalName": "", "journalPages": "395-404", "journalVolume": "", "outCitations": [ "9533cce453af80638bb2914a929fc5c866b2d2b5", "313921607a8c32cf87a50a005f7f655034e5d015", "3a5066bcd59f81228876b6bf7d5410c63a82f173", "076077a5771747ad7355120f1ba64cfd603141c6", "5881189bdcba6907f2e7f7dbb3143ffbab8c5e90", "93a28b52f0273be80db1c2c9e7fdb2f03bf45797", "978dfe8ac0c1be5160e4c508f921381905e146dc", "f4ee0e688036509aa5e9d10a8abba4acef25e305", "46a3afe1ef550dd29973ac0ce1289162a71642fb", "3bb44af2251b89c83a922778be5ba50cc205cc8c", "28bcb617022a93e2f00cf4f96138bcdf048f5309", "b9d26526def969c055c51d6280bbd7e6aba99342", "ccf72f8c0262a9ccc981be62b126105786a391b0", "57cfe0e71dc1f78a8806e64c3b0c3e8fafbb2112", "88ab65123f2a842ac8f8843dbd76b62e226b91cb", "2f53b548e05776c24c048351e35df15b00642a76", "f7d6d33d98cfb20aee2b2215716fc602f851c447", "0a2d9ec95a7e5b212be1369f2daa110d09f22821", "a72a423367363e33700ac7730e294fc673ea6fb2", "2fa2afbadc78bd53ec0493a9f4c9d1de2757f3c0", "3a8a544153d9761024844d6a05619f535b14dca8", "b16aac8092cec6478a21e48f6ab250f1131eedd0", "14b69114783f65bb9f5245dbfd08323da0cc550e", "b2d26ed1e4658b8bace957b6f4a7d0b2d5e671fc", "9371fae6aec00ec384d3a531fb8197632f0b02e4", "6b3853f08c482fe1bfbe39d656d50a8c73976f3c", "0b271ce0ca450df10c5c71328b913f2ac81b7e48", "07603546b52cfe07b13f64c79af1fda45e47b6f5", "52aa034a1c1413e03e7895dab0a0b67549cf92e1", "5b8d41c7bb3747c4eab6c243210e11a72b5c829a", "5185297b4c2f0fcc00ec183b2dde1565fc963376", "471cb4c2e5039bdaacb0274fee70c7fe2e93493e", "5c444ac251c2a70b8b70494e1c2fb4400c12839b", "2d2c6555c538636531c2c1bdfe30f08693c17b72" ], "paperAbstract": "A search engine that can return the ideal results for a person's information need, independent of the specific query that is used to express that need, would be preferable to one that is overly swayed by the individual terms used; search engines should be consistent in the presence of syntactic query variations responding to the same information need. In this paper we examine the retrieval consistency of a set of five systems responding to syntactic query variations over one hundred topics, working with the UQV100 test collection, and using Rank-Biased Overlap (RBO) relative to a centroid ranking over the query variations per topic as a measure of consistency. We also introduce a new data fusion algorithm, Rank-Biased Centroid (RBC), for constructing a centroid ranking over a set of rankings from query variations for a topic. RBC is compared with alternative data fusion algorithms.\n Our results indicate that consistency is positively correlated to a moderate degree with \"deep'' relevance measures. However, it is only weakly correlated with \"shallow'' relevance measures, as well as measures of topic complexity and variety in query expression. These findings support the notion that consistency is an independent property of a search engine's retrieval effectiveness.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080839" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5f05544e68687b1bea8a15d08c7aff91bd9401c1", "sources": [ "DBLP" ], "title": "Retrieval Consistency in the Presence of Query Variations", "venue": "SIGIR", "year": 2017 }, "5f375c445c53c6b5aa9ec05684fcf9aae1358f80": { "authors": [ { "ids": [ "2718536" ], "name": "Shripad Nadgowda" }, { "ids": [ "40005837" ], "name": "Sahil Suneja" }, { "ids": [ "1765914" ], "name": "Canturk Isci" } ], "doi": "", "doiUrl": "", "entities": [ "Autoscaling", "Cloud computing", "Docker", "Load balancing (computing)", "OpenVMS", "Platform as a service", "Scalability", "Software deployment", "Substrate (electronics)", "Swarm", "Web container" ], "id": "5f375c445c53c6b5aa9ec05684fcf9aae1358f80", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "05ea86d312ed4a19ad282ad28838e8e87b6ce156", "67f49884d9418bdf4e68796ab4c77be951835e67", "22aa49d83c66122f74f1f966e246dbbeeab28103", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "9e98d529d158e2230d722f497fbc36373eaa8583", "daa32a4ddfe4afb0ac9856a12121a25b51cb3ab6", "490d862480cf30949dce90e832aa292c498ac768", "502e789f4026ab8886e46822e95a26be62f213eb", "b06c7df9404cf6d87b5d552808450b8c226deab9", "a379e5732a2172021ef3e20d1c7b82d5aec50636", "44cc87a72d95f4f0c89d38d4bc9634732f1e8fbb", "78f853271fe69da617d5a14a1e54cbae6a982a50", "0c096642a6142ab41d883a6afc9b9ac4e872842c", "242e2056b47d08e2966350e05ad9adf6fa448699", "08fec93d2c1a5064336e4cafde4c65f80a2e7f74", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "2763cd85a279d9aa28942eb51febaa76c2c852cb", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "fd6b97ea910b1caf424765c50f4fbbff5e795cbf", "0b5c26697d7fe2fd90f337934de63dc973195dfa", "381b272d8c851ca73aa6ff8aa821a1d2393c41bf", "abdaeca65df9eefb3603c8a396a599c69d7081de", "9aa0d7253574e50fe3a190ccd924433f048997dd", "0205d4afcbd37f425efc63d05efba2280ceea63a", "dfd68fbe49b9cba44b912a7b43627a0bcb79f3b2", "289f21935b0235b0b143436f896477d71578f66f" ], "paperAbstract": "Applications have commonly been oblivious to their cloud runtimes. This is primarily because they started their journey in IaaS clouds, running on a guestOS inside VMs. Then to increase performance, many guestOSes have been paravirtualized making them virtualization aware, so that they can bypass some of the virtualization layers, as in virtio. This approach still kept applications unmodified. Recently, we are witnessing a rapid adoption of containers due to their packaging benefits, high density, fast start-up and low overhead. Applications are increasingly being on-boarded to PaaS clouds in the form of application containers or appc, where they are run directly on a cloud substrate like Kubernetes or Docker Swarm. This shift in deployment practices present an opportunity to make applications aware of their cloud. In this paper, we present Paracloud framework for application containers and discuss the Paracloud interface (PaCI) for three cloud operations namely migration, auto-scaling and load-balancing.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-nadgowda.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotcloud17_slides_nadgowda.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/nadgowda" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9653/3279eab5e2629bc6fc940e63cdccf4802400.pdf", "s2Url": "https://semanticscholar.org/paper/5f375c445c53c6b5aa9ec05684fcf9aae1358f80", "sources": [ "DBLP" ], "title": "Paracloud: Bringing Application Insight into Cloud Operations", "venue": "HotCloud", "year": 2017 }, "5f4388e15af381d4c48b6376180a94db10dae0cd": { "authors": [ { "ids": [ "5082188" ], "name": "Zhenhong Liu" }, { "ids": [ "2337644" ], "name": "Syed Zohaib Gilani" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" } ], "doi": "10.1109/HPCA.2017.51", "doiUrl": "https://doi.org/10.1109/HPCA.2017.51", "entities": [ "Baseline (configuration management)", "Graphics processing unit", "Performance per watt", "Power supply", "Register file", "Scalar processor", "Throughput" ], "id": "5f4388e15af381d4c48b6376180a94db10dae0cd", "inCitations": [ "5eb9bb0450cf8ad4b8ee7bf8ceba5553f4fdf137", "fa21c85107516c7f0a341de27856d7ffe4a6c5d9", "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0", "1574575034358cc08e96b4bd5f0145286490ea49" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "601-612", "journalVolume": "", "outCitations": [ "0345b41c8a708048a9f1d27cea06b867b52eead7", "309ad0357af7722a24192781340881390055a3db", "4308295a2eaef30be423520918ad224dc2f3ffe2", "49fb77e166dc26849e37db3d5a53496ab547a545", "3401d10e78ee5afdf5b7b42e7e9751f3d338a3d6", "33da3dcba06cf453f74203e3fb2adaa8c1133f3b", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd", "ce480ccc937a0c4c7c05f231d78fb888f3c9e310", "6635cd62124e589bc56667b31cc295db2fbd22a2", "f9cf47539216a3737f6353dca8a8f3f1e588413e", "9a334c377686d8abe7711abcef58775ee02c0487", "011dbf85e32e317be5c3af5855dd82d6852b0f07", "0d394c72f9d769dfa021796a29fc142db573aec7", "12f1a44eeef9b8a52b89e54ddc227704dbddfb92", "11df018ba5452e7806dd22b6746604ca4ca45f82", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "0269cb98b0c91a804326d8f8888c32f01fda8661", "2d6f002477015469075954c6748a1a85af352c94", "13bb71a86db976ffa572407bda5b44cacadac4ca", "0915fa32b78d4df0b33732bafe89b236bc8e37ce", "09ba565ec5dd3816968edaaee8351cf653e26d81", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "284c7fde4bbaf19dd345e3b37d98085d7bfb9a4f", "1d0e6ae33237650f71583c2a5e2cc27c6523fb22", "4946ad542eacf5066d8268a43347e9f788d58f9c", "c592507ecf124838ba95004fdb85f7a1b1e7ee2b", "0114fb72afbd9cc0bca35940beb21eda596aa5e0", "f783cd996b625f2866a8ffe1a5d23f10fd0e9ae9" ], "paperAbstract": "The GPU has provide higher throughput by integrating more execution resources into a single chip without unduly compromising power efficiency. With the power wall challenge, however, increasing the throughput will require significant improvement in power efficiency. To accomplish this goal, we propose G-Scalar, a cost-effective generalized scalar execution architecture for GPUs in this paper. G-Scalar offers two key advantages over prior architectures supporting scalar execution for only non-divergent arithmetic/logic instructions. First, G-Scalar is more power-efficient as it can also support scalar execution of divergent and special-function instructions, the fraction of which in contemporary GPU applications has notably increased. Second, G-Scalar is less expensive as it can share most of its hardware resources with register value compression, of which adoption has been strongly promoted to reduce high power consumption of accessing the large register file. Compared with the baseline and previous scalar architectures, G-Scalar improves power efficiency by 24% and 15%, respectively, at a negligible cost.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.51" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5f4388e15af381d4c48b6376180a94db10dae0cd", "sources": [ "DBLP" ], "title": "G-Scalar: Cost-Effective Generalized Scalar Execution Architecture for Power-Efficient GPUs", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "5f8b88be486ff492730ca331c1016364158c1119": { "authors": [ { "ids": [ "1729163" ], "name": "Xiaojun Chang" }, { "ids": [ "40508553" ], "name": "Yaoliang Yu" }, { "ids": [ "1698559" ], "name": "Yi Yang" } ], "doi": "10.1145/3097983.3097991", "doiUrl": "https://doi.org/10.1145/3097983.3097991", "entities": [ "Algorithm", "Convex function", "Experiment", "Machine learning", "Performance", "Rewrite (programming)", "Support vector machine", "Web search engine" ], "id": "5f8b88be486ff492730ca331c1016364158c1119", "inCitations": [ "3fcc497b1692164ca077ae3f44f17d5d3de38bb8" ], "journalName": "", "journalPages": "75-83", "journalVolume": "", "outCitations": [ "450e8bb9c67e731ffd4afb80e11dcf7cad99904a", "87e10591db57b2688dcb9c77412c94a4aa01af88", "19a9f01d9ab894aa699376940c9633e25ed9c102", "6ed3f1f0070e25b455d5d377a6654c641b0b44fe", "5a26ec6568152731ce1667a426307ebccff5a50e", "22782d5a3e9cb2fc575b4fb49ff6bc0af964d4ca", "40e927fdee7517fb7513d03735754af80fb9c7b0", "7f755d620b57acf27a16ff95923c5677ff8198bb", "74fae524d0cb06b88f00a638c9bf2e98f2fab39a", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "308c16ff7d280016ead344a5daee5086cac87944", "1f83987ada783ee073fc6c1776682aaa0514940e", "c1d31d01837136222c5a501ea1cb6fa091d521fe", "01dd68192c3e9813d7435d8a414d779fa6dc8b15", "e0f8b400bbe482a4d8b8e28a35048ae236b172e5", "3817fe5d388514dd0113a26fc1c8dfb2de44e5e2", "0c60eebe10b56dbffe66bb3812793dd514865935", "0dfa460a35f7cab4705726b6367557b9f7842c65", "21ffd44e545b82c843c89c1793e90c8a8817fc1d", "2a6336c2055f681be2587f3332c50c96d1d78b5f", "364c79d2d98819b27641c651cf6553142ef747bf", "1e4e89f17f23155b406c8541eb791f76b3d13c60", "0903b956a68073eee3760572059abd5b24b026da", "6d43c41e19d994b802f5cff6fbe4e1feffd0d81f", "1e389040dbdb3057ff510df13808be153c459fd0", "24c9b0b05c5e957e255b854f947472f9181772a4", "2b0000687d5df8e06caa8e84ebd7e49cbaddf44c", "061356704ec86334dbbc073985375fe13cd39088", "2703178e07c4f5cd094449bc7677689f8a1a6196" ], "paperAbstract": "Classification problems with a large number of classes inevitably involve overlapping or similar classes. In such cases it seems reasonable to allow the learning algorithm to make mistakes on similar classes, as long as the true class is still among the top-k (say) predictions. Likewise, in applications such as search engine or ad display, we are allowed to present k predictions at a time and the customer would be satisfied as long as her interested prediction is included. Inspired by the recent work of [15], we propose a very generic, robust multiclass SVM formulation that directly aims at minimizing a weighted and truncated combination of the ordered prediction scores. Our method includes many previous works as special cases. Computationally, using the Jordan decomposition Lemma we show how to rewrite our objective as the difference of two convex functions, based on which we develop an efficient algorithm that allows incorporating many popular regularizers (such as the l2 and l1 norms). We conduct extensive experiments on four real large-scale visual category recognition datasets, and obtain very promising performances.", "pdfUrls": [ "https://cs.uwaterloo.ca/~y328yu/mypapers/kdd17.pdf", "http://doi.acm.org/10.1145/3097983.3097991" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5f8b88be486ff492730ca331c1016364158c1119", "sources": [ "DBLP" ], "title": "Robust Top-k Multiclass SVM for Visual Category Recognition", "venue": "KDD", "year": 2017 }, "5f926eab40abdd9895e4eb4c8c07f240b9096427": { "authors": [ { "ids": [ "1703309" ], "name": "Hui Li" }, { "ids": [ "2610571" ], "name": "Tsz Nam Chan" }, { "ids": [ "1722082" ], "name": "Man Lung Yiu" }, { "ids": [ "1718168" ], "name": "Nikos Mamoulis" } ], "doi": "10.1145/3035918.3064009", "doiUrl": "https://doi.org/10.1145/3035918.3064009", "entities": [ "Approximation theory", "Column (database)", "E-commerce", "Experiment", "Full table scan", "Latent variable", "Lossless compression", "Recommender system", "Singular value decomposition", "Social media" ], "id": "5f926eab40abdd9895e4eb4c8c07f240b9096427", "inCitations": [ "245151f44f461eb5153514d5184cae7e6feec22d" ], "journalName": "", "journalPages": "835-850", "journalVolume": "", "outCitations": [ "05cd61c2172e4c25f43ddb0b0927ffec19643d84", "d77f241856426005313e17ff9927c3396cb0d340", "9ba53f53a55f22626e9496f2ddb7e58266640c37", "5fa10e1a3ebaee27ce015f406508f82f8dcf5be8", "8724631b1b16469fb57df1568d41d1039067c717", "3ae4e53bcaa8f949184dfd6118a85c79c01053e7", "9aa88a8a354f1d322e242376d27d0474e50252f8", "1e96b0c0ac74070a984fec94f085109839d842a9", "4e8f82b0741c2151d36f2201fc11b0b148beab60", "c32e8d3d86695d2da36ccfce60d51956e2e4818c", "0d338e5bc70e81ea54fbd2c7535e21728fe7deb1", "63eaeb0c48175065ffd096aad10aed712c6d7bbb", "ebade6ec686beb98fa1e1962442841ee543c6d76", "d5fdc3c0b2049a025091179a73e0e4174105fcd4", "c6c5b2c0eea2912b8a099cfa190875c6e616c217", "44f4cd28486c4730fbfb262f099cb5df30637211", "031854648e0688c1bfc991e7597e54947928fb74", "1c799eca7983c62f7815ac5f41787b3e552567b6", "00df20e5bf5d9d645184191a34c43a4108e92723", "14f264a888912d2bb0defc2ba554e784e4e31fe6", "1d03698a46ff12fdfaf4811528b3e7961dfd2fe6", "637ab11fdb719115ccc4e36110d7139435181d20", "24c48b97725d84246f6dbd39c055648a305e1df4", "dc66c34ce9e477f67eaf847fc331244b562d8fea", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "1662ea4412e019883ac6c02dce63929808fd73d3", "794157feebd12d23a852ce83c40e851d94559915", "6907047e7cd11e9885ba2deb37d44a92cfce6136", "5ae98595bba7eb02ba95df2989871bfa86fb02cf", "1f2de093c64679c99437c3031ede4fd4e32c66cc", "064fb3a6f2666e17f6d411c0a731d56aae0a785e", "09849ca4b8159ff69721ebb2f25a81025188937e", "091aded505b84cf87c197875ccfde24d98a300c9", "89d27fc4c5bf15762d001a39f0a74f84c89d3681", "582003042f767f22e4119040909823f94f3cbea9", "5d67a05724b0eeba4e3e9d91fd63cfd03548cf23", "1000025264ef85af0b116b3c12d5b283504d36eb", "1697a4188b9f75ff5324eb9957b8317f459bbf59", "2ebf3b1c0084cf6e57dd8ef921c2ca36d72d6a75", "1b7944d00948f4a1826763cecd3452ba2da89873", "91b4a69f89e9b6677c8775f9f84a5924af8e7fa7", "7765adf12035ead4b0e8ab7aece41b3f549f8def" ], "paperAbstract": "Recommender systems have many successful applications in e-commerce and social media, including Amazon, Netflix, and Yelp. Matrix Factorization (MF) is one of the most popular recommendation approaches; the original user-product rating matrix R with millions of rows and columns is decomposed into a user matrix Q and an item matrix P, such that the product QT P approximates R. Each column q (p) of Q (P) holds the latent factors of the corresponding user (item), and qT p is a prediction of the rating to item p by user q. Recommender systems based on MF suggest to a user in q the items with the top-k scores in qT P. For this problem, we propose a Fast and EXact Inner PROduct retrieval (FEXIPRO) framework, based on sequential scan, which includes three elements. First, FEXIPRO applies an SVD transformation to P, after which the first several dimensions capture a large percentage of the inner products. This enables us to prune item vectors by only computing their partial inner products with q. Second, we construct an integer approximation version of P, which can be used to compute fast upper bounds for the inner products that can prune item vectors. Finally, we apply a lossless transformation to P, such that the resulting matrix has only positive values, allowing for the inner products to be monotonically increasing with dimensionality. Experiments on real data demonstrate that our framework outperforms alternative approaches typically by an order of magnitude.", "pdfUrls": [ "http://www.cs.uoi.gr/~nikos/SIGMOD17.pdf", "http://doi.acm.org/10.1145/3035918.3064009" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5f926eab40abdd9895e4eb4c8c07f240b9096427", "sources": [ "DBLP" ], "title": "FEXIPRO: Fast and Exact Inner Product Retrieval in Recommender Systems", "venue": "SIGMOD Conference", "year": 2017 }, "5f96af88dfef2bff4ed8a49ceca909efb701d1d5": { "authors": [ { "ids": [ "20614385" ], "name": "Vishakha Gupta-Cledat" }, { "ids": [ "8508418" ], "name": "Luis Remis" }, { "ids": [ "2791948" ], "name": "Christina R. Strong" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Dark Side", "Data access", "Next-generation network" ], "id": "5f96af88dfef2bff4ed8a49ceca909efb701d1d5", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2b6c031c61b78a9f9ee958d291d29c8ab359404e", "165d9bd7e9c4a030b09cf21e35ea0bf96090d8cb", "2b26821287fa20ca9924326e08c4041880171ebf", "05e7b0c687b1cc4f669d33be9896edc2f05d7b68", "cddfb34a35924b2958950deac3a6075f450e4519", "1d998e318cb673e883897a0cdbab61fd6dc1e611", "010f0f4929e6a6644fb01f0e43820f91d0fad292", "18a5f443299784479e78d9e77f175af57cb2fa2b", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "396514fb219879a4a18762cddfae2a6a607f439f", "793f5e737284925a176f8ec82b3bb0d2178bb330", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "892d5068d8200b6d8d7654c1cbe01883cbcb8488", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "3168681722207c86827e596860115a2977ce761f", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "39ac2e0fc4ec63753306f99e71e0f38133e58ead" ], "paperAbstract": "Data access is swiftly becoming a bottleneck in visual data processing, providing an opportunity to influence the way visual data is treated in the storage system. To foster this discussion, we identify two key areas where storage research can strongly influence visual processing run-times: efficient metadata storage and new storage formats for visual data. We propose a storage architecture designed for efficient visual data access that exploits next generation hardware and give preliminary results showing how it enables efficient vision analytics.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/gupta-cledat", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_gupta.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-gupta-cledat.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6d3f/b3ef83a5d5a905250a1ec986e720ae422ed4.pdf", "s2Url": "https://semanticscholar.org/paper/5f96af88dfef2bff4ed8a49ceca909efb701d1d5", "sources": [ "DBLP" ], "title": "Addressing the Dark Side of Vision Research: Storage", "venue": "HotStorage", "year": 2017 }, "5fa53fc29e7ba683ec5c3da0d9c8ade3831cb962": { "authors": [ { "ids": [ "40800616" ], "name": "Ziehen Xu" }, { "ids": [ "1690476" ], "name": "Xiaorui Wang" } ], "doi": "10.1109/IGCC.2017.8323577", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323577", "entities": [ "Android", "Best, worst and average case", "Deploy", "Drain device", "Greater Than", "Hardware performance counter", "Less Than", "Scheduling (computing)", "Smartphone", "Smartphone", "Software deployment" ], "id": "5fa53fc29e7ba683ec5c3da0d9c8ade3831cb962", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [], "paperAbstract": "Smartphone users spend more than 80% of their phone time accessing web information, which could cause undesirably large energy drain. To provide web information, a web activity may invoke asynchronous execution in different hardware devices. Thus, traditional energy estimation methods based on system statistics are usually insufficient to capture the secluded energy cost. In this paper, we propose REEWA, a runtime energy estimation framework for web activities on smartphones. In sharp contrast to the traditional modeling methods, REEWA features a design to provide highly accurate and low-overhead energy estimation based on hardware performance counters that can accurately record hardware-level events. Specifically, REEWA features (1) a set of energy models for smartphone hardware components involved in web activities, which are built based on their respective performance counters; (2) a correlation study on the counter selection process that provides the best tradeoff between the estimation accuracy and overhead; (3) a performance counter management mechanism for activity deployment. We prototyped and evaluated REEWA in two real android smart-phones. The results show that, compared to traditional estimation methods, REEWA achieves an average 33% higher estimation accuracy with a negligible overhead (less than 1%, worst-case). We applied REEWA to support heterogeneous core scheduling for web activities, which can help reduce 40% energy consumption.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323577" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fa53fc29e7ba683ec5c3da0d9c8ade3831cb962", "sources": [ "DBLP" ], "title": "REEWA: Runtime energy estimation for web activities on smartphones", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "5fc51880d9b8a75549d4063def7441d7424881bf": { "authors": [ { "ids": [ "2589074" ], "name": "Vasileios Porpodas" } ], "doi": "10.1109/PACT.2017.21", "doiUrl": "https://doi.org/10.1109/PACT.2017.21", "entities": [ "Algorithm", "Automatic vectorization", "Compiler", "Experiment", "Holism", "Line code", "Parallel computing", "SIMD", "Scalar processor", "Successive linear programming", "SuicideGirls", "Superword Level Parallelism", "Top-down and bottom-up design", "Unreachable memory" ], "id": "5fc51880d9b8a75549d4063def7441d7424881bf", "inCitations": [ "5cdd10ad78b4d50888c837196839acb15c88298d" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "330-342", "journalVolume": "", "outCitations": [], "paperAbstract": "SIMD vectors help improve the performance of certain applications. The code gets vectorized into SIMD form either by hand, or automatically with auto-vectorizing compilers. The Superword-Level Parallelism (SLP) vectorization algorithm is a widely used algorithm for vectorizing straight-line code and is part of most industrial compilers. The algorithm attempts to pack scalar instructions into vectors starting from specific seed instructions in a bottom-up way. This approach, however, suffers from two main problems: (i) the algorithm may not reach instructions that could have been vectorized, and (ii) atomically operating on individual SLP graphs suffers from cost overestimation when consecutive SLP graphs share data. Both issues lead to missed vectorization opportunities even in simple code.In this work we propose SuperGraph-SLP (SG-SLP), an improved vectorization algorithm that overcomes these limitations of the existing algorithm. SG-SLP operates on a larger region, called the SuperGraph. This allows it to reach and successfully vectorize code that was previously unreachable. Moreover, the new region helps eliminate the inaccuracies in the cost-calculation as it allows for a more holistic view of the code. Our experiments show that SG-SLP improves the vectorization coverage and outperforms the state-of-the-art SLP across a number kernels by 36% on average, without affecting the compilation time.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fc51880d9b8a75549d4063def7441d7424881bf", "sources": [ "DBLP" ], "title": "SuperGraph-SLP Auto-Vectorization", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "5fccae69d79079c92ec1fd1f4bb9f378deba500d": { "authors": [ { "ids": [ "38348272" ], "name": "Erik Krogh Kristensen" }, { "ids": [ "32710366" ], "name": "Anders M\u00f8ller" } ], "doi": "10.1145/3133914", "doiUrl": "https://doi.org/10.1145/3133914", "entities": [ "Debugging", "Java", "JavaScript", "JavaScript library", "Library", "Library (computing)", "Programmer", "Random testing", "Static program analysis", "Test automation", "Test script", "Type system", "TypeScript" ], "id": "5fccae69d79079c92ec1fd1f4bb9f378deba500d", "inCitations": [], "journalName": "PACMPL", "journalPages": "90:1-90:25", "journalVolume": "1", "outCitations": [ "1b4df92d7f0d9393103cafbdbc512c52a90296b8", "26ac3ad840d8d773eec2ab7fc60d441b34c6adc5", "1469b0cbb109c2a788a346dd0480070de8334dea", "0014188b4abf19cf34f6b4b2769528e856cd93c6", "28050faa2ac017c47515ed962c3d12c24553d57a", "f8acc358b299f6db8e080913119be084c4d6e53a", "5f4599513bc71e6c8ef48408bc4e27afb4e76806", "f7d316f7c032934a43c46766921375b6962dbbbb", "466af0d4ddf960fdaf74af66aeb501762a7c916a", "1f2a40fcddffa188811c7c5b46094dfcbd5ce5e4", "554558b662909b628292e56f016549eaeacd2cc8", "0ab393affe9d674ef790be14fdfade368f3e5989", "165290b2ce275f2b23836953cc8c513a7c35785a", "c03512277e95b7055b2fb13b662916d0ebd74cfc", "0b53fab8dea434e1046836159e184d9565ffd401", "144382ef2ee1d00ce3d36c61601afecca5620c7d", "1d441ef596b72af09e3405132e4ba0563b8840a4", "98ed78d119be90bf594e43bf4b8e31c27140664c", "021af3b63fbcf5d867a4b27ca161841bf129c759", "f223c334568a4cda11b6c992daab6282b4d40195", "013bf90f472e49c05263b90d9e36f8d2705e7fc7", "182db73d0991f0886c6b70815a8dc7cf7bbd340b", "05f0c383c785f168da8e80c903517ec5fdf71d41", "0c85abd759cbe878b186a8b01f202a38f048f445", "18876ec7dd36d455392c04668c4058175e82f6a9", "6a96d25b8a826159f53365e3f99d5a910cd6604a" ], "paperAbstract": "TypeScript applications often use untyped JavaScript libraries. To support static type checking of such applications, the typed APIs of the libraries are expressed as separate declaration files. This raises the challenge of checking that the declaration files are correct with respect to the library implementations. Previous work has shown that mismatches are frequent and cause TypeScript's type checker to misguide the programmers by rejecting correct applications and accepting incorrect ones. \n This paper shows how feedback-directed random testing, which is an automated testing technique that has mostly been used for testing Java libraries, can be adapted to effectively detect such type mismatches. Given a JavaScript library with a TypeScript declaration file, our tool TSTEST generates a \"type test script\", which is an application that interacts with the library and tests that it behaves according to the type declarations. Compared to alternative solutions that involve static analysis, this approach finds significantly more mismatches in a large collection of real-world JavaScript libraries with TypeScript declaration files, and with fewer false positives. It also has the advantage that reported mismatches are easily reproducible with concrete executions, which aids diagnosis and debugging.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133914" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fccae69d79079c92ec1fd1f4bb9f378deba500d", "sources": [ "DBLP" ], "title": "Type test scripts for TypeScript testing", "venue": "PACMPL", "year": 2017 }, "5fcee9a2d7427c20acd54257c60bf774e01ac3bd": { "authors": [ { "ids": [ "38958783" ], "name": "He Xiao" }, { "ids": [ "3543872" ], "name": "Zhenhua Li" }, { "ids": [ "2438489" ], "name": "Ennan Zhai" }, { "ids": [ "38127194" ], "name": "Tianyin Xu" } ], "doi": "", "doiUrl": "", "entities": [ "Checksum", "Client-side", "Cloud storage", "Computation", "Dropbox", "JavaScript", "Lambda lifting", "Locality of reference", "Mobile app", "Overhead (computing)", "Pervasive informatics", "Server (computing)", "Server-side", "Synchronization (computer science)", "Systems architecture", "Web application" ], "id": "5fcee9a2d7427c20acd54257c60bf774e01ac3bd", "inCitations": [ "619d77922e69c23feab9322e510004cf383be796" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3437a7e23e3f97b58f4cf73e7e5b711131e6706c", "469657d82343e1280f224ef9e841561d48016c57", "3207fdb643e19e0602757241c303e9fc21953b49", "64b5ce9ff6c7f5396cd1ec6bba8a9f5f27bc8dba", "2c9b6f1a420ecd9e54b7467efd17f203690ef07e", "2fab90b0da9aaaf3825fb8ef70efff9abe7bf57f", "419ac059fa30761dd35cf83e6204c569199da83b", "21318da2ea08c1f7b8c77701f67483882950df96", "002efcf9f0b58af153556b84395a37f6171195da", "04603b77bc61c0d323baf0774b2d88fb2a39677b", "7a376b08534d26fd19c744b8355cdf2c5a7c7991", "4c664c7015285ce14063204d0790dffbb7bbf46c", "836ecc90c9aa5ac3c7ce73d6be63948c76a0185c", "1721e4529c5e222dc7070ff318f7e1d815bfb27b" ], "paperAbstract": "Delta synchronization (sync) is known to be crucial for network-level efficiency of cloud storage services (e.g., Dropbox). Practical delta sync techniques are, however, only available for PC clients and mobile apps, but not web browsers\u2014the most pervasive and OSindependent access method. To understand obstacles of web-based delta sync, we implemented a traditional delta sync solution (named WebRsync) for web browsers using JavaScript, and find that WebRsync severely suffers from the inefficiency of JavaScript execution inside web browsers, thus leading to frequent stagnation and even crashing. Given that the computation burden on the web browser mainly stems from data chunk search and comparison, we reverse the traditional delta sync approach by lifting all chunk search and comparison operations from the client side into the server side. Inevitably, this brings enormous computation overhead to the servers. Hence, we further leverage locality matching and a more efficient checksum to reduce the overhead. The resulting solution (called WebR2sync+) outpaces WebRsync by an order of magnitude, and it is able to simultaneously support \u223c7300 web clients\u2019 delta sync using an ordinary VM server based on a Dropbox-like system architecture.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/xiao", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-xiao.pdf", "http://www.cs.yale.edu/homes/zhai-ennan/xiao17hotstorage.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/5fce/e9a2d7427c20acd54257c60bf774e01ac3bd.pdf", "s2Url": "https://semanticscholar.org/paper/5fcee9a2d7427c20acd54257c60bf774e01ac3bd", "sources": [ "DBLP" ], "title": "Practical Web-based Delta Synchronization for Cloud Storage Services", "venue": "HotStorage", "year": 2017 }, "5fcfc33e55a5a46ecd670f77252dc0ebc1876a1c": { "authors": [ { "ids": [ "2131252" ], "name": "Niccol\u00f2 Meneghetti" }, { "ids": [ "2043556" ], "name": "Oliver Kennedy" }, { "ids": [ "2000882" ], "name": "Wolfgang Gatterbauer" } ], "doi": "10.1145/3035918.3064026", "doiUrl": "https://doi.org/10.1145/3035918.3064026", "entities": [ "Algorithm", "Database", "Expectation\u2013maximization algorithm", "Probabilistic Turing machine", "Probabilistic database", "Scalability", "TI-BASIC" ], "id": "5fcfc33e55a5a46ecd670f77252dc0ebc1876a1c", "inCitations": [], "journalName": "", "journalPages": "573-586", "journalVolume": "", "outCitations": [ "67286943f8f2ca5ba482dc02092ce2d7951d19ff", "be7dbfcb5a69b8a20eaf5036a511469e565c6ee7", "2749cb94f92170f79d0e8ad266605a871767f38a", "0b2ba9da35baf58775d1707b48218501cd596bc9", "6bbd273f9f274d5a3e5dade5e202be809e707647", "5a308819821cf81f21cc68ddff3698217be36b6d", "9d7463fff8b50e491662d490752136a3312991fa", "2077cc18da002721390a23392ce4a25d19c3e2a2", "99539f92c634a586a99f583ff1bad2989a9d5f74", "395d96c34a22c3f0a11ac75e95a03663d6f20ea0", "38ec3d6ec09a9a5a3dffd1df4a1c45c7abd79484", "378cd5ccb49d9fedff9c0c926b91192acc300313", "bdda4bfc82d4e07d48020483d0364511b8e4fb34", "752a9f7b0d8e1205d86a21ff8acf46435993dd53", "2f3902d4e4b793c74fd368ce9b6f3f28a2a1206f", "4268cb59768556d29ff4f8586277bee4183c4afc", "2889dbb1e5770e1eb6e5e3087be5be5841b11fa8", "03d9e06a8bbf15edf1e59664456ad95ba6ef6ad1", "54c133bc5b6b372d0b9d9d6acd7b3319b77abe7f", "24b1dff7e0d9bd36b58d192d2a290fde0ac41133", "177f84bedb6b4ca31750ad7cf3a171d1014199c6", "1f9af2489db58ef12d37b0228d60b630fc047a33", "26f1f4512e5228ed162a27a8051cb91160325799", "5a499e1d1b42093e1b8c0bc3d74200e183f32392", "3a910967232526c862418e7b01448c48694624e1", "330b3187fd07339f609f403155c3bddaa5f0e8c8", "04c73e4f13a19a2ce270a0aa391bd7842aa113ae", "1116b96e668c0dbf5a8b539d66baaf61bedc9dba", "ded1ab521c9839145040cb45c1f0c353536de8d5", "d641ce8fe01ba5ae0ade43feaa1e1e2a7f4839b8", "2654970f704c7c450a05c41c8d4adc2f8b0a5028", "0f48c76228e3f17ce5766614800121c767b72cbb", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "8d9274823441be0e76bab445414b6381bcd3ca0a", "1fe41b1240a0eddec736b675e914b4858a955876", "2e49f21a1e38f7def51ab2db26186d0a3759c5e2", "f2aed2478e13a76b0b80f0366a9e2bef59f043a7", "1b19eb5c3ed02dcda18b9300f2804bda0a4c94e6", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "0a62aa358438f52e36f37836c6f42f4015850e56", "1ec2d02bd12f3a357449cf1bbc67b6adf7cd6296", "29312721c03293237bfb41e4000dcbdbc0486b68", "2a89cb2ff3597b49f4e0927e749652fdc635260b" ], "paperAbstract": "Tuple-independent probabilistic databases (TI-PDBs) handle uncertainty by annotating each tuple with a probability parameter; when the user submits a query, the database derives the marginal probabilities of each output-tuple, assuming input-tuples are statistically independent. While query processing in TI-PDBs has been studied extensively, limited research has been dedicated to the problems of updating or deriving the parameters from observations of query results. Addressing this problem is the main focus of this paper. We introduce Beta Probabilistic Databases (B-PDBs), a generalization of TI-PDBs designed to support both (i) belief updating and (ii) parameter learning in a principled and scalable way. The key idea of B-PDBs is to treat each parameter as a latent, Beta-distributed random variable. We show how this simple expedient enables both belief updating and parameter learning in a principled way, without imposing any burden on regular query processing. We use this model to provide the following key contributions: (i) we show how to scalably compute the posterior densities of the parameters given new evidence; (ii) we study the complexity of performing Bayesian belief updates, devising efficient algorithms for tractable classes of queries; (iii) we propose a soft-EM algorithm for computing maximum-likelihood estimates of the parameters; (iv) we show how to embed the proposed algorithms into a standard relational engine; (v) we support our conclusions with extensive experimental results.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064026", "http://odin.cse.buffalo.edu/papers/2017/SIGMOD-BetaPDBs-final.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fcfc33e55a5a46ecd670f77252dc0ebc1876a1c", "sources": [ "DBLP" ], "title": "Beta Probabilistic Databases: A Scalable Approach to Belief Updating and Parameter Learning", "venue": "SIGMOD Conference", "year": 2017 }, "5fcff06c8d2374f2daa2497eafe455ec37358b73": { "authors": [ { "ids": [ "3431258" ], "name": "Juncheng Yang" }, { "ids": [ "2212890" ], "name": "Reza Karimi" }, { "ids": [ "11754601" ], "name": "Trausti S\u00e6mundsson" }, { "ids": [ "2170018" ], "name": "Avani Wildani" }, { "ids": [ "3295331" ], "name": "Ymir Vigfusson" } ], "doi": "10.1145/3127479.3131210", "doiUrl": "https://doi.org/10.1145/3127479.3131210", "entities": [ "Algorithm", "Association rule learning", "Block (data storage)", "CPU cache", "Cache prefetching", "Digital footprint", "Hit (Internet)", "Scalability", "Software as a service" ], "id": "5fcff06c8d2374f2daa2497eafe455ec37358b73", "inCitations": [], "journalName": "", "journalPages": "66-79", "journalVolume": "", "outCitations": [ "10faf9efd59cb9cd6f66343bb773a5c3d887d037", "2c30cba9e168bfcf2b1b8b0d5072d6d475fa731e", "7f7028c02dc53da4ccda5db9200a3b5001dd27a8", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "12a0046a1197ae63c3d616c74e367dc583cef196", "c0cdc52345d88e1e03b68abc2f1492393ffa2bb7", "7b420218b4e797dcc6ca96f6c6c3ec29a9688c07", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "45c62d2c9a1020b93ba2804602524f535245ee3c", "3a8c79e6df1859c7a9805d1c9d5ba0c6245ccf3e", "0a00a41380c78577398051435e4768dd2c598045", "22778bfc772ee28b4f1309fcf851102262822c11", "352bb19017dd13aa089a908bf6ff30b9287110ce", "01e83db4f3435f90ea1eb10b5ea8657da3b08f2f", "4d86d64c2d2c97663092b3e41749006a271996e5", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "005e25861df8dafdd2699264cc501ff00b252b6b", "1dc5c9675b1f9662deac7a9d5f4b38cd13f76dba", "03a10c542aa6f04487fff244bd7053b8ee0e5c4c", "4d5e837f844c7da0893977b0304dcaabd539aaeb", "31ceeced5d23193c369b98170c45e66bae6ff77d", "3c567c0aa3143dbe604866d835a8e3e53d01deae", "3c2873764e9c8d2f287a17aeff0fe4c1b2052f73", "03c7d4a34b96478fa4f33a29976ceb32668531ab", "0a5882fc7600383eb9d6cc119942f48a70f896ad", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "306d2d924c21aa7a64f27c124b3753bf745b8751" ], "paperAbstract": "The growing pressure on cloud application scalability has accentuated storage performance as a critical bottleneck. Although cache replacement algorithms have been extensively studied, cache prefetching - reducing latency by retrieving items before they are actually requested - remains an underexplored area. Existing approaches to history-based prefetching, in particular, provide too few benefits for real systems for the resources they cost.\n We propose Mithril, a prefetching layer that efficiently exploits historical patterns in cache request associations. Mithril is inspired by sporadic association rule mining and only relies on the timestamps of requests. Through evaluation of 135 block-storage traces, we show that Mithril is effective, giving an average of a 55% hit ratio increase over LRU and Probability Graph, and a 36% hit ratio gain over Amp at reasonable cost. Finally, we demonstrate the improvement comes from Mithril being able to capture mid-frequency blocks.", "pdfUrls": [ "http://arxiv.org/abs/1705.07400", "https://arxiv.org/pdf/1705.07400v1.pdf", "http://doi.acm.org/10.1145/3127479.3131210" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fcff06c8d2374f2daa2497eafe455ec37358b73", "sources": [ "DBLP" ], "title": "Mithril: mining sporadic associations for cache prefetching", "venue": "SoCC", "year": 2017 }, "5fd26b0954a723f1fa22aa0a9fadcb4de498884b": { "authors": [ { "ids": [ "9099711" ], "name": "Jiecao Yu" }, { "ids": [ "2175353" ], "name": "Andrew Lukefahr" }, { "ids": [ "2587404" ], "name": "David J. Palframan" }, { "ids": [ "31613624" ], "name": "Ganesh S. Dasika" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" }, { "ids": [ "1721289" ], "name": "Scott A. Mahlke" } ], "doi": "10.1145/3079856.3080215", "doiUrl": "https://doi.org/10.1145/3079856.3080215", "entities": [ "Artificial neural network", "Computation", "Graphics processing unit", "Microcontroller", "Multiply\u2013accumulate operation", "Parallel computing", "SIMD", "Sparse matrix", "Synergy" ], "id": "5fd26b0954a723f1fa22aa0a9fadcb4de498884b", "inCitations": [ "305806d53240aa523168d5aa59d902fb0c9a1581", "137c11e359096580b08fd4f80a5e80c784d7bcd4", "626f7c268b68a0955f9c7c6cfc2edff4d2e3291f", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "edd820f44464c636ff32ca50381dacb4d540db37", "4636d53cc1548f2cd7a185c8ae5fe2320b0502da", "7b9339d3b359310ddbaf6caae13d3a65f657bf04", "56257b0804c9c2418b32337d3af0970f7b67b084", "0c6b249d77e998068184e52a2d7fa7a5a867e12f", "191c05aab25b4dd3752c1300f7c8fa6999d3a627", "ee724d0636dcdaee8889665e5ed347aa41097962", "d1be7f6de75dbe350d8d45bb0997e294fd58a985" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "548-560", "journalVolume": "", "outCitations": [ "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "06ce77e4abea63948580340be25d7f2a80369e5a", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "0441224d76250b03beefae64fa6c6d0879db12a8", "326d65827307862ddc3d39b84ebc662e83ff95b3", "74fc396d0b8ec548d600395182f12c9b06cc84e9", "0b99d677883883584d9a328f6f2d54738363997a", "67096e794eaf872989d6cbd26557de55d532ffb4", "58e4491dc48d46f4f47362686e09e6319c01edc0", "01fcae344d2edb715bcc63a40b6052c0331741bd", "0aad30ce6f25ce570acafcf277255db4319ac65c", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "397de65a9a815ec39b3704a79341d687205bc80a", "1990de7c3c1408326a74742d834df310d943dd18", "812c795ce4797b718a2947a9f9bdc5b6965c2b29", "b7cf49e30355633af2db19f35189410c8515e91f", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "061356704ec86334dbbc073985375fe13cd39088", "5bfecd14937da569eabec0afea710db846d3899b", "17c0a7de3c17d31f79589d245852b57d083d386e", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "080aebd2cc1019f17e78496354c37195560b0697", "34f25a8704614163c4095b3ee2fc969b60de4698", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "49b4094f2c313a92da4461572c0bef80b0d7d649", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "1a07186bc10592f0330655519ad91652125cd907", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "87d810fcea61068e8b29f2b75fa1cbb00c190bea", "3dd2f70f48588e9bb89f1e5eec7f0d8750dd920a", "123ae35aa7d6838c817072032ce5615bb891652d", "fbeaa499e10e98515f7e1c4ad89165e8c0677427", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "545caeb66c815f1a14385db3bf5df55c96cf175e" ], "paperAbstract": "As the size of Deep Neural Networks (DNNs) continues to grow to increase accuracy and solve more complex problems, their energy footprint also scales. Weight pruning reduces DNN model size and the computation by removing redundant weights. However, we implemented weight pruning for several popular networks on a variety of hardware platforms and observed surprising results. For many networks, the network sparsity caused by weight pruning will actually hurt the overall performance despite large reductions in the model size and required multiply-accumulate operations. Also, encoding the sparse format of pruned networks incurs additional storage space overhead. To overcome these challenges, we propose Scalpel that customizes DNN pruning to the underlying hardware by matching the pruned network structure to the data-parallel hardware organization. Scalpel consists of two techniques: SIMD-aware weight pruning and node pruning. For low-parallelism hardware (e.g., microcontroller), SIMD-aware weight pruning maintains weights in aligned fixed-size groups to fully utilize the SIMD units. For high-parallelism hardware (e.g., GPU), node pruning removes redundant nodes, not redundant weights, thereby reducing computation without sacrificing the dense matrix format. For hardware with moderate parallelism (e.g., desktop CPU), SIMD-aware weight pruning and node pruning are synergistically applied together. Across the microcontroller, CPU and GPU, Scalpel achieves mean speedups of 3.54x, 2.61x, and 1.25x while reducing the model sizes by 88%, 82%, and 53%. In comparison, traditional weight pruning achieves mean speedups of 1.90x, 1.06x, 0.41x across the three platforms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080215", "http://www.cse.iitd.ernet.in/~rijurekha/course/pruning-architecture.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fd26b0954a723f1fa22aa0a9fadcb4de498884b", "sources": [ "DBLP" ], "title": "Scalpel: Customizing DNN pruning to the underlying hardware parallelism", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "5fdf836f5ff96b96433f18464926103a29114552": { "authors": [ { "ids": [ "3088916" ], "name": "Tien Tuan Anh Dinh" }, { "ids": [ "5107518" ], "name": "Ji Wang" }, { "ids": [ "39076756" ], "name": "Gang Chen" }, { "ids": [ "1684598" ], "name": "Rui Liu" }, { "ids": [ "1693070" ], "name": "Beng Chin Ooi" }, { "ids": [ "1688848" ], "name": "Kian-Lee Tan" } ], "doi": "10.1145/3035918.3064033", "doiUrl": "https://doi.org/10.1145/3035918.3064033", "entities": [ "Bitcoin", "Cryptocurrency", "Distributed computing", "Ethereum", "Fault tolerance", "Peer-to-peer", "Relational database management system", "Scalability", "Smart contract", "Solution stack", "Synthetic data", "Systems design", "Throughput" ], "id": "5fdf836f5ff96b96433f18464926103a29114552", "inCitations": [ "24bb8328ad26f21ca2e2322ec2c5da16586dccac", "d66add194ef618c4573604261cf6e52758a73fb0", "77d484b0194698366ba118e28287896829cf6dfe", "1c7f5288f4a2ca577d4232be9b8ff6ec5ce6cfd9", "71c5bc722f575665878dc3ca47953f384426899a", "ba83474ac983727ad9436891653fff68ebbd35f2", "3ea55098cf9e8216139f30d64b5c737a726d466c", "4a2816c8311329ad3c438f752b751652408c82f3", "919e32847097416aada92dff7c8274cd9ca55582", "0a9f9398aa1af2a3f61755ed299d6c7d036dd3ba", "fda96fc9a88b4527804aa47a5442767bd99d90f8", "7744387e05e44276a52769cb2a42c1bec1aa293d", "5ef21885ff7a4b29d07d65663bc669a324f46c51", "a22f52c7555f955cfc6720f7c1b89eb4af613a4d", "64cf3131b34719f624d3cc47855d859059c4cbaf", "901c0c68e0cf0dc84d3f1f4ac7195b9a667da4bd", "beb6c6bc12e3756046fd5cc911b17d5d214d4a63", "c6724504d239409210284f203706a4600ff3c91b", "5b9c203dbe31cff3ec1c12b7e80e7b4fc5a1994d", "17b84f0e092401b88b2505c145890d1d0db91219", "18f806215ccbf797ad38bbc2ed465d3b1c8ade08" ], "journalName": "", "journalPages": "1085-1100", "journalVolume": "", "outCitations": [ "6fc9cd15134cdd282e25b8ea58b38240e96bfe90", "155ca30ef360d66af571eee47c7f60f300e154db", "9748241beb02ef1e2d0e6dc877c04b354033a838", "35516916cd8840566acc05d0226f711bee1b563b", "401680ef12c04c247c50737b9114c169c660aab9", "75d83792b880757a09e9a72978cc29beb57c4ad5", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "1f102935cc21d54f91ae70c09d84157b6011e6dd", "1220e4a011c46804d4369b5580dc7fb6e387af54", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "3fad56eb0379f9684af608bd6c9ad4de706b4cad", "165d99c9d30be5d301b998dc23c1a6a28fd0c425", "40a98bed1d10248d30e86304315df07280dad93e", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "261893f4c8a7c311a97249a8f42071c566372493", "02b1103e592fa6bf0499e27f1519692441fad557", "43fb74fc45ea844ad087c770fa9be747fbd03b19", "17c6f330d854435e8d8faed245f79b94740a45f1", "9aa0d7253574e50fe3a190ccd924433f048997dd", "136eefe33796c388a15d25ca03cb8d5077d14f37", "e0a854efc5b966f2ac2f7bbcda092ef63d627871", "138ab4913b7218c5835527296e87defb8dfaf206", "b02c6b00bd5dbdbd951fddb00b906c82fa80f0b3", "06b7f3156ef8f0d66fe05e504c0bb908ab288c03", "12d854f326b43232d906eb323db5d282786acb9d", "7be14a23d26a19786ed97807fa8bfbf11b299984", "20f5f8733134d87041b95b742d613051a1fb3fdb", "095a3cee30d64d3a6f22caadd58c45c5cd0b83e9", "4ab6b28bb3342cb4f65555a37418b6a25297425e", "5e86853f533c88a1996455d955a2e20ac47b3878", "0d2c4723e9e5925cde74bd879611fda6f6e3980b", "efb1a85cf540fd4f901a78100a2e450d484aebac", "88a603ffe828c503b6818410bdb3dae435f90ebe" ], "paperAbstract": "Blockchain technologies are taking the world by storm. Public blockchains, such as Bitcoin and Ethereum, enable secure peer-to-peer applications like crypto-currency or smart contracts. Their security and performance are well studied. This paper concerns recent private blockchain systems designed with stronger security (trust) assumption and performance requirement. These systems target and aim to disrupt applications which have so far been implemented on top of database systems, for example banking, finance and trading applications. Multiple platforms for private blockchains are being actively developed and fine tuned. However, there is a clear lack of a systematic framework with which different systems can be analyzed and compared against each other. Such a framework can be used to assess blockchains' viability as another distributed data processing platform, while helping developers to identify bottlenecks and accordingly improve their platforms.\n In this paper, we first describe BLOCKBENCH, the first evaluation framework for analyzing private blockchains. It serves as a fair means of comparison for different platforms and enables deeper understanding of different system design choices. Any private blockchain can be integrated to BLOCKBENCH via simple APIs and benchmarked against workloads that are based on real and synthetic smart contracts. BLOCKBENCH measures overall and component-wise performance in terms of throughput, latency, scalability and fault-tolerance. Next, we use BLOCKBENCH to conduct comprehensive evaluation of three major private blockchains: Ethereum, Parity and Hyperledger Fabric. The results demonstrate that these systems are still far from displacing current database systems in traditional data processing workloads. Furthermore, there are gaps in performance among the three systems which are attributed to the design choices at different layers of the blockchain's software stack. We have released BLOCKBENCH for public use.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064033", "http://www.comp.nus.edu.sg/~ooibc/blockbench.pdf", "http://www.comp.nus.edu.sg/~ooibc/blockbenchp.pdf", "http://arxiv.org/abs/1703.04057", "https://arxiv.org/pdf/1703.04057v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fdf836f5ff96b96433f18464926103a29114552", "sources": [ "DBLP" ], "title": "BLOCKBENCH: A Framework for Analyzing Private Blockchains", "venue": "SIGMOD Conference", "year": 2017 }, "5fe578eef61426a093b982de7bee0253c9c82265": { "authors": [ { "ids": [ "3083209" ], "name": "Weijie Zhao" }, { "ids": [ "2289824" ], "name": "Florin Rusu" }, { "ids": [ "39131579" ], "name": "Bin Dong" }, { "ids": [ "1773743" ], "name": "Kesheng Wu" }, { "ids": [ "32645680" ], "name": "Peter Nugent" } ], "doi": "10.1145/3035918.3064041", "doiUrl": "https://doi.org/10.1145/3035918.3064041", "entities": [ "Analysis of algorithms", "Array data structure", "Database", "Deployment environment", "Heuristic", "Pipeline (computing)" ], "id": "5fe578eef61426a093b982de7bee0253c9c82265", "inCitations": [ "0a424fcdcfd55ff0f80a193848547b5e5f434614", "4c6f4328f5e0b3474dc611f2e55f7d44c7a577dc" ], "journalName": "", "journalPages": "139-154", "journalVolume": "", "outCitations": [ "12b6044216d1a0849d74d1a7258619279027e8fc", "d0bc3c139c9a0129a87aa5f724e7bf82b4b04ce6", "1c1266008ad7ba6ae42a6c29964616db08193f42", "49c9b55fe6998f51705d824e36ab2b093b9aa552", "eaf2bccd82bf4cffcf1ef85487d6722f6a04716c", "0af1d9eb0c04296b9f6336ae5ee66ed4ac735e53", "d6342d37c3c670b20a52f10ad9538665debb5636", "a6cf03c9d151ec6ea6bc2d9df4144948d67a537e", "03ab9ddbb8701ec42c2ef13c51be0e857f121318", "3f419db6f66c32bbb7ea887b139abd4e088a0405", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "0d356d3b790477a5428ec5fb8b5d3e898f549866", "3c4776e5f96ebe8a6de1a855f523a28c687eb994", "01180b69272486a66ebd2c46af9383191a38276b", "251cf2745297e77435a17f2c15bf6359d3c57ef7", "9cedf29b81c0cf9f568d70887a5af63e9bdf66cf", "129d3d09e2b2cee69f19e3a8cb91d40b19e29557", "d68a46d4627ba8766906800101715bfb79069341", "370e1fcea7074072fe5946d3e728affd582a9a44", "ab9b4a7054f2583d6298954af451126da5c77632", "aae20add6de2f5bb30de6f230fab9112cb50540e", "2ba9612cbb2eeee9b4267639af3ba7a808b774fe", "bc5c4c91d3de210a2120be1b7f2e4d0f229cad8a", "53edf8302b72dc2bdbc878c07f623586853e67d0", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "0d9b6fc89b06a311f35511c12cd70d97f8fa9196", "2321a150c84d771d81fd81759757795dcda25750", "09a9a0fc511ae93f3373dfb98f6e2d5d02ee0957", "9d62a4187b126111aef25a10e6691df9ca66835f", "3851430bb53b09f78880bb3480f835c22ca81a94", "7982bdb498c5efeafddd2ffaf9810a7f0712f162", "11e512701c7a2a5cd48d5435e8d42f292161ceca", "62fc7ff2abfb026aa4353712f3819cdd8b7e24bb", "490f0881f2d922a13644572540e2b0558a717499", "11f17b63b6742495177ef2194d2be11d98dc8b8c", "8d4178d4e62950b13121279a75f80141285e1cad", "0c97fa96d179dec4f5a9349c4e5203205d427fb8", "277cc859f07728aef42b6708a98b296284598590", "318a9d14077d581fd6c23eec2e11c8b62db57265", "05a26b5deeed6f6f7e9584555b73c5af3905063b", "14c0c9ba3e69846db02a6a3df1ef8e99149aa978", "25ac757c975185450da0b49530df3c1471ebc6b0", "18209a97ce6996bdeb09b3329e3865af2a49c9b9", "762756eba9168421d338f0aedd04e0111ca75462", "4563b8386aee41e0575ef6529ee0385839f06c6f", "59ea6b535d9e462ef01fe34b4576252943257870", "24679ccb0586642553a21e9fcd8aa5a57f97cabe", "4dbb40052b3170c3871335550f0506b9ebea6acd" ], "paperAbstract": "Science applications are producing an ever-increasing volume of multi-dimensional data that are mainly processed with distributed array databases. These raw arrays are ``cooked'' into derived data products using complex pipelines that are time-consuming. As a result, derived data products are released infrequently and become stale soon thereafter. In this paper, we introduce materialized array views as a database construct for scientific data products. We model the ``cooking'' process as incremental view maintenance with batch updates and give a three-stage heuristic that finds effective update plans. Moreover, the heuristic repartitions the array and the view continuously based on a window of past updates as a side-effect of view maintenance without overhead. We design an analytical cost model for integrating materialized array views in queries. A thorough experimental evaluation confirms that the proposed techniques are able to incrementally maintain a real astronomical data product in a production environment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064041", "http://faculty.ucmerced.edu/frusu/Papers/Conference/2017-sigmod-array-views.pdf", "http://faculty.ucmerced.edu/frusu/Talks/2017-05-sigmod-array-views.pdf", "http://faculty.ucmerced.edu/frusu/Papers/Poster/2017-04-norcaldb-array-view.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5fe578eef61426a093b982de7bee0253c9c82265", "sources": [ "DBLP" ], "title": "Incremental View Maintenance over Array Data", "venue": "SIGMOD Conference", "year": 2017 }, "5ffc035d46a3ff9126282fe037d91bf16995175d": { "authors": [ { "ids": [ "38799346" ], "name": "Zain Shamsi" }, { "ids": [ "34983248" ], "name": "Daren B. H. Cline" }, { "ids": [ "1737609" ], "name": "Dmitri Loguinov" } ], "doi": "10.1145/3133956.3133963", "doiUrl": "https://doi.org/10.1145/3133956.3133963", "entities": [ "Distortion", "Expectation\u2013maximization algorithm", "Fingerprint", "IP address spoofing", "Internet", "TCP/IP stack fingerprinting" ], "id": "5ffc035d46a3ff9126282fe037d91bf16995175d", "inCitations": [], "journalName": "", "journalPages": "971-982", "journalVolume": "", "outCitations": [ "2dbcc7077a01981679007eceac6c6659a1c18200", "22a73c876926b5128673850ad9f1417ea367c4a3", "52c84aa6c4b25fa97a7a5de31c968d87e61da81e", "1055f55a370fcb0e5fde29c178013d0fe9e25a34", "5b629db1000c1836908f7ab32f5165d33c4d2578", "1a3daf44097d519fd60953c00ebb769d2bbe3727", "b37f7af8d485d937b10ccc90d9660b42c3d989b3", "813314b951e5b1bdc406100b7bdd360c9b198f50", "af580f7108f10661eb8bc37b5c9b7402feeb680e", "205bf2aebe24222208ae16a7851481c3727eeb7e", "7cf56adb702bfb1806eac68682415eb5a3a23c0f", "84b0923f2426df9593c98e9e3b2934be6756b015", "791382f7dc39154ec39ea249493d5f653b739df4", "812ca42ef0ffa57be895b11e673b7210392615f7", "726360d750e9f47a8df5058f934a77f168cad00a", "34944a2b185ef2ad607a4c887b415bb0b064a237", "1a3224f9332b4dc074d7a06ea8e4733cedfd6841", "534aa7da91b0891b4113c6a46e008125fe4197fc", "282531e079cdaff86de2f3079951ff7b9d6762b0", "c3d78a5bf8ceac42e441732f4ac6fd307adf42f9", "fc5ac5d8aedc439ea303aef6ea6a2400e13d9dad", "34a534185687eae7e16399ffa9e3769efd3942b3", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "3e50155c27ee88d82253a554e05667acc5040ece", "adb7069984e3fa48505cd5081ec118ccb95529a3", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "3c69ed9cfb2cdea79f08f55c91e47a9b1f083e8d", "390ab7ed9f0f6e2bacb8fa13874aaffdcef9c86f", "0bc19c55efc2e562644cbec924eb63c78b3586b3", "d48c0d1bd2fe5734b53c3267a67c7281d4282d36", "067eaf07784cdab6836395a2b2f34a506d860fee", "16f8e600d26b4f5374af864636c7c5ce3079b63f" ], "paperAbstract": "Recent work in OS fingerprinting has focused on overcoming random distortion in network and user features during Internet-scale SYN scans. These classification techniques work under an assumption that all parameters of the profiled network are known a-priori -- the likelihood of packet loss, the popularity of each OS, the distribution of network delay, and the probability of user modification to each default TCP/IP header value. However, it is currently unclear how to obtain realistic versions of these parameters for the public Internet and/or customize them to a particular network being analyzed. To address this issue, we derive a non-parametric Expectation-Maximization (EM) estimator, which we call Faulds, for the unknown distributions involved in single-probe OS fingerprinting and demonstrate its significantly higher robustness to noise compared to methods in prior work. We apply Faulds to a new scan of 67M webservers and discuss its findings.", "pdfUrls": [ "http://irl.cs.tamu.edu/people/zain/papers/ccs2017-tr.pdf", "http://irl.cs.tamu.edu/people/zain/papers/ccs2017.pdf", "http://doi.acm.org/10.1145/3133956.3133963" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/5ffc035d46a3ff9126282fe037d91bf16995175d", "sources": [ "DBLP" ], "title": "Faulds: A Non-Parametric Iterative Classifier for Internet-Wide OS Fingerprinting", "venue": "CCS", "year": 2017 }, "6008f1fc247e98967225ed9c1bf3731115416576": { "authors": [ { "ids": [ "1755192" ], "name": "Kaiwei Li" }, { "ids": [ "2276707" ], "name": "Jianfei Chen" }, { "ids": [ "6301522" ], "name": "Wenguang Chen" }, { "ids": [ "1701941" ], "name": "Jun Zhu" } ], "doi": "10.1145/3037697.3037740", "doiUrl": "https://doi.org/10.1145/3037697.3037740", "entities": [ "Algorithm", "Count data", "DSPACE", "Data structure", "Graphics processing unit", "Latent Dirichlet allocation", "Locality of reference", "Memory bandwidth", "Sparse matrix", "Time complexity" ], "id": "6008f1fc247e98967225ed9c1bf3731115416576", "inCitations": [ "cf32dc5e40f872e5cce30983a866eeb4fd284f46", "0a465c15bebccd1500718548b18800fd3c463ed0", "8c25cd40e5c5c7ee03d49739fb8e758eeae533b1" ], "journalName": "", "journalPages": "497-509", "journalVolume": "", "outCitations": [ "280608cef4e07b5c4de82d75c9cc37c6b9478eb0", "79c9bfe65a473a7f7d96ab536162d7eb101576f4", "61202eb74184c0d75276954c93ce774c72f8035d", "02c7904d986759076b6ddae1560ccba0042028bc", "32496087fa004299f7dab5dc0732d7e0509dfbd1", "1423e6caece9c6ad2633158aa7b5b0b879233a49", "7717b438da4ec3ca4247ff7abf6dd603e91fe41d", "c5e8b04a00f8d5dea248375c9b5e60abcecf808b", "38b42b64eca378f056356881005771d54b9cb0f3", "60e801e3dfc9812e294ed9de6d579e0293d61643", "1ba7a3660b02c51ff836cdca507ed92a419b75cb", "91ffac42e3416f0a0a542b2d981636b02271fdbb", "0b9841c3052ae0b40e594580634251ba53087b3f", "0b8ea85478a1a896425b824f421fe4aa9725d653", "07713a3daa1a0d8431b8997f677634f7e84b05a8", "348d1154938aed48c3abc9c07271ad048d7fb81e", "094cbfa06f8374b49b84524a466a63d34c9ef34f", "17ad7d385564833f682db11240ea3d74a5423256", "215aa495b4c860a1e6d87f2c36f34da464376cc4", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "7380c73dd526fc74791be0323239549d3105e9af" ], "paperAbstract": "Latent Dirichlet Allocation (LDA) is a popular tool for analyzing discrete count data such as text and images. Applications require LDA to handle both large datasets and a large number of topics. Though distributed CPU systems have been used, GPU-based systems have emerged as a promising alternative because of the high computational power and memory bandwidth of GPUs. However, existing GPU-based LDA systems cannot support a large number of topics because they use algorithms on dense data structures whose time and space complexity is linear to the number of topics.\n In this paper, we propose SaberLDA, a GPU-based LDA system that implements a sparsity-aware algorithm to achieve sublinear time complexity and scales well to learn a large number of topics. To address the challenges introduced by sparsity, we propose a novel data layout, a new warp-based sampling kernel, and an efficient sparse count matrix updating algorithm that improves locality, makes efficient utilization of GPU warps, and reduces memory consumption. Experiments show that SaberLDA can learn from billions-token-scale data with up to 10,000 topics, which is almost two orders of magnitude larger than that of the previous GPU-based systems. With a single GPU card, SaberLDA is able to learn 10,000 topics from a dataset of billions of tokens in a few hours, which is only achievable with clusters with tens of machines before.", "pdfUrls": [ "https://arxiv.org/pdf/1610.02496v1.pdf", "http://doi.acm.org/10.1145/3037697.3037740", "http://arxiv.org/abs/1610.02496", "https://arxiv.org/pdf/1610.02496v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6008f1fc247e98967225ed9c1bf3731115416576", "sources": [ "DBLP" ], "title": "SaberLDA: Sparsity-Aware Learning of Topic Models on GPUs", "venue": "ASPLOS", "year": 2017 }, "600a6810334f46d9f44bec0d0a9927154ded60dd": { "authors": [ { "ids": [ "40271762" ], "name": "Shai Bergman" }, { "ids": [ "20476284" ], "name": "Tanya Brokhman" }, { "ids": [ "39525900" ], "name": "Tzachi Cohen" }, { "ids": [ "2289351" ], "name": "Mark Silberstein" } ], "doi": "", "doiUrl": "", "entities": [ "Aerial photography", "Central processing unit", "Direct memory access", "Disk buffer", "End-to-end principle", "Experiment", "Graphics processing unit", "High- and low-level", "Operating system", "POSIX", "Page cache", "Peer-to-peer", "Peer-to-peer file sharing", "Peripheral", "RAID", "System integration", "Throughput" ], "id": "600a6810334f46d9f44bec0d0a9927154ded60dd", "inCitations": [ "1db445dc54ee1389a14d72ee628da61cd6c10428" ], "journalName": "", "journalPages": "167-179", "journalVolume": "", "outCitations": [ "43f0c099d44a68783a773f91cd03098a5252bf98", "0cbadd3a63ddc748cc30a16a706eda77a4deab8c", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "2e5132493276714e4cce3b2f64d60da4e47210cb", "54d2b5c64a67f65c5dd812b89e07973f97699552", "2d692211c220f4b16eabb7639108fba88d00cf2f", "1ecbb1f2080029357bba55e3747bfcaac82aee51", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "3ca599420f61c0e3961c71e82444acaf4f63856d", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9" ], "paperAbstract": "Recent GPUs enable Peer-to-Peer Direct Memory Access (P2P) from fast peripheral devices like NVMe SSDs to exclude the CPU from the data path between them for efficiency. Unfortunately, using P2P to access files is challenging because of the subtleties of low-level nonstandard interfaces, which bypass the OS file I/O layers and may hurt system performance. SPIN integrates P2P into the standard OS file I/O stack, dynamically activating P2P where appropriate, transparently to the user. It combines P2P with page cache accesses, re-enables read-ahead for sequential reads, all while maintaining standard POSIX FS consistency, portability across GPUs and SSDs, and compatibility with virtual block devices such as software RAID. We evaluate SPIN on NVIDIA and AMD GPUs using standard file I/O benchmarks, application traces and end-to-end experiments. SPIN achieves significant performance speedups across a wide range of workloads, exceeding P2P throughput by up to an order of magnitude. It also boosts the performance of an aerial imagery rendering application by 2.6\u00d7 by dynamically adapting to its input-dependent file access pattern, and enables 3.3\u00d7 higher throughput for a GPU-accelerated log server.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/bergman", "https://www.usenix.org/system/files/conference/atc17/atc17-bergman.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/600a/6810334f46d9f44bec0d0a9927154ded60dd.pdf", "s2Url": "https://semanticscholar.org/paper/600a6810334f46d9f44bec0d0a9927154ded60dd", "sources": [ "DBLP" ], "title": "SPIN: Seamless Operating System Integration of Peer-to-Peer DMA Between SSDs and GPUs", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "6010c059e7bdb5b8052a32f5d6d9e8a613f545e8": { "authors": [ { "ids": [ "1861480" ], "name": "Yuhua Guo" }, { "ids": [ "1727669" ], "name": "Qing Liu" }, { "ids": [ "2351391" ], "name": "Weijun Xiao" }, { "ids": [ "1815384" ], "name": "Ping Huang" }, { "ids": [ "1734819" ], "name": "Norbert Podhorszki" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "33367879" ], "name": "Xubin He" } ], "doi": "10.1109/MASCOTS.2017.23", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.23", "entities": [ "Baseline (configuration management)", "Computer data storage", "Dynamic random-access memory", "Gigabyte", "Hit (Internet)", "Instructions per cycle", "Locality of reference", "Memory bandwidth", "Paging", "Random-access memory", "SWAP (instrument)", "Self" ], "id": "6010c059e7bdb5b8052a32f5d6d9e8a613f545e8", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "187-197", "journalVolume": "", "outCitations": [ "4c689148ee5e9d6d116f6babbfab21bf2116802e", "7a72a4abd5b855e4d2713f10b93b65d8665166df", "19a3b3ccf8c7c364b8245aa657a98cb976357f3b", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "2af32811c6bf3be891ee84b19248540dfa1aa58f", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "36de396ee9d1c9991e44c01be35e5206d79c3328", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "e23298e18aa92ac43fa941d0f5eacb339905b685", "1e63acf596fafe0e7099e99767e1bcac0b7600cb", "00ab25c6582d543932fccbb0f15fe93445f95d61", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "745d50eb6b74b191191ce93c6ef1ec9760ce0cb0", "22b4811bb8265e84d53c62a842cac10dda15f6af", "71cbd5b7858785e8946523ca59c051eb0f1347ba", "72af62917a53f7f88f54fc658f3daade61284937", "32dc6016338a2098147e5edbb72c7c5670f78133", "3000b16ee204ffed4c602ed6f93fc7a692850b6e", "18633256bb17ba0744518479c0752ca87f0d03c6", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "3b621e9a6b99f32caa518116cb400035d1deed29", "6f93e0325e577f49f4bed46a2adcfee4a649dc83" ], "paperAbstract": "Die-stacked DRAM (a.k.a., on-chip DRAM) provides much higher bandwidth and lower latency than off-chip DRAM. It is a promising technology to break the "memory wall". Die-stacked DRAM can be used either as a cache (i.e., DRAM cache) or as a part of memory (PoM). A DRAM cache design would suffer from more page faults than a PoM design as the DRAM cache cannot contribute towards capacity of main memory. At the same time, obtaining high performance requires PoM systems to swap requested data to the die-stacked DRAM. Existing PoM designs fall into two categories &#x2013; line-based and page-based. The former ensures low off-chip bandwidth utilization but suffers from a low hit ratio of on-chip memory due to limited temporal locality. In contrast, page-based designs achieve a high hit ratio of on-chip memory albeit at the cost of moving large amounts of data between on-chip and off-chip memories, leading to increased off-chip bandwidth utilization and significant system performance degradation.To achieve a similar high hit ratio of on-chip memory as page-based designs, and eliminate excessive off-chip traffic involved, we propose SELF, a high performance and bandwidth efficient approach. The key idea is to SElectively swap Lines in a requested page that are likely to be accessed according to page Footprint, instead of blindly swapping an entire page. In doing so, SELF allows incoming requests to be serviced from the on-chip memory as much as possible, while avoiding swapping unused lines to reduce memory bandwidth consumption. We evaluate a memory system which consists of 4GB on-chip DRAM and 12GB off-chip DRAM. Compared to a baseline system that has the same total capacity of 16GB off-chip DRAM, SELF improves the performance in terms of instructions per cycle by 26.9%, and reduces the energy consumption per memory access by 47.9% on average. In contrast, state-of-the-art line-based and page-based PoM designs can only improve the performance by 9.5% and 9.9%, respectively, against the same baseline system.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6010c059e7bdb5b8052a32f5d6d9e8a613f545e8", "sources": [ "DBLP" ], "title": "SELF: A High Performance and Bandwidth Efficient Approach to Exploiting Die-Stacked DRAM as Part of Memory", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "601aca24c64f75bd5b124ae0d4d0910c5777b2d9": { "authors": [ { "ids": [ "8219357" ], "name": "Franziska Lichtblau" }, { "ids": [ "3014595" ], "name": "Florian Streibelt" }, { "ids": [ "39825245" ], "name": "Thorben Kr\u00fcger" }, { "ids": [ "1891855" ], "name": "Philipp Richter" }, { "ids": [ "1782612" ], "name": "Anja Feldmann" } ], "doi": "10.1145/3131365.3131367", "doiUrl": "https://doi.org/10.1145/3131365.3131367", "entities": [ "Attack patterns", "Bogon filtering", "Border Gateway Protocol", "Denial-of-service attack", "IP address spoofing", "Inter-domain", "Internet", "Routing", "Spoofing attack" ], "id": "601aca24c64f75bd5b124ae0d4d0910c5777b2d9", "inCitations": [], "journalName": "", "journalPages": "86-99", "journalVolume": "", "outCitations": [ "1bef4d26c917f0060814e86cffa9b22bea70a847", "81759e7a864c53b99ae9400f8d7075eb45c03acb", "1085a592932709315fd83f0eceaea0658cc58459", "38639fc61a30c90ec5001f8054f5842f41d6b221", "387077f7f6d6c51cd520162d432ee7b251ad8dda", "884a1fed267162e8659fe9ee5b8a9c161d407c50", "51bfe9723d6da6666025fd0afa81c94eda870d0b", "23eab8551b95795afcc26767fcdc780198278e0e", "11e7e02278725d09d7c6dd67482249453ad0e58e", "7c8e164bc71f57da42613a5243ca370218909e59", "6a06f87d82975b873b3cd6130a60a26c1d0b181c", "0ca7f8a8a1e6468ca9dad4ee0643ca4796a4eade", "5166913d122cbe9894fd537415053fb181400168", "8c234d501ef9b945caeea4f6d0cb156597bdba8b", "d7b303e4496be98cbec3087a16cc2b975f60fd2b", "2242e52c2d3c4a90cfe546a6610ae0067afaed99", "1f680405fc4f7975cd1c5f7d8f5a580b55db0ad5", "b739782f3c8113bc183f2fee2a3aff6ff9acd84e", "da5ac1e08937c034588fa4726ea8dd0f5ade5905", "0c41fe23bc2ffb93ee3717fdbc30279bca2c0726", "00c22c76de81faee6de25466ca1bffe329319e03", "45e1a918a631ffdb3b96ebe270fec0a00b502ad6", "0112891050537d4f587529c396c8b9855796d182", "1d17e7c91aaa88d17a6b53f6c4e9da6c61ba1526", "02d0a3def2294d3d9c1ac73465b3c88bf9c61b2e", "9c945bde5a30f9a7514e656c729059e1bf2dace6", "01d260543655d217c9f8b59e85550164e5d1b1d4", "00e362100c39411f4559d6bd5ad13e4c4c582714", "1a68f5963645a30eb5a0bae9f31a4b55fb0ea1a9", "1ba9175f266160f864b4f4eab8d7067e4c17946c", "0649b4021be6d211c8ffef6f824e002223432884", "32ac1fed2f6f9bf6b8913091f5a6efd40d71b1e1", "2d933e29a96228e0a5f66150252a5c1b3e4f8dc6", "6fb3423ca131bc8932cdfafa03080e74a32365fe", "334ca522814ed02d73c62a114ea2026847b81c90", "cf519007cdf241464c619894459929474c4d44b5" ], "paperAbstract": "IP traffic with forged source addresses (i.e., spoofed traffic) enables a series of threats ranging from the impersonation of remote hosts to massive denial-of-service attacks. Consequently, IP address spoofing received considerable attention with efforts to either suppress spoofing, to mitigate its consequences, or to actively measure the ability to spoof in individual networks. However, as of today, we still lack a comprehensive understanding both of the prevalence and the characteristics of spoofed traffic \"in the wild\" as well as of the networks that inject spoofed traffic into the Internet.\n In this paper, we propose and evaluate a method to passively detect spoofed packets in traffic exchanged between networks in the inter-domain Internet. Our detection mechanism identifies both source IP addresses that should never be visible in the inter-domain Internet (i.e., unrouted and bogon sources) as well as source addresses that should not be sourced by individual networks, as inferred from BGP routing information. We apply our method to classify the traffic exchanged between more than 700 networks at a large European IXP. We find that the majority of connected networks do not, or not consistently, filter their outgoing traffic. Filtering strategies and contributions of spoofed traffic vary heavily across networks of different types and sizes. Finally, we study qualitative characteristics of spoofed traffic, regarding both application popularity as well as structural properties of addresses. Combining our observations, we identify and study dominant attack patterns.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131367", "https://conferences.sigcomm.org/imc/2017/slides/imc17_spoofing_slides.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final24.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/601aca24c64f75bd5b124ae0d4d0910c5777b2d9", "sources": [ "DBLP" ], "title": "Detection, classification, and analysis of inter-domain traffic with spoofed source IP addresses", "venue": "IMC", "year": 2017 }, "602af33f7874ce311ae7150f4e863833a57b6d19": { "authors": [ { "ids": [ "2047998" ], "name": "Yuxiao Dong" }, { "ids": [ "1681386" ], "name": "Nitesh V. Chawla" }, { "ids": [ "1703726" ], "name": "Ananthram Swami" } ], "doi": "10.1145/3097983.3098036", "doiUrl": "https://doi.org/10.1145/3097983.3098036", "entities": [ "COMEFROM", "Cluster analysis", "Experiment", "Feature learning", "Machine learning", "N-gram", "Scalability", "Similarity search" ], "id": "602af33f7874ce311ae7150f4e863833a57b6d19", "inCitations": [ "c741be5953f035bd6019b08e27a5da58e9a5d376", "35c67cde5a5d3ccd2655e680a8b371f7eff0a6bb", "b18f628cb949750036fff7bc66e50765532ac2fe", "0ed218b3711004069bf5bddb0c44fca319e6de0a", "2d93b1396bc816b3681ae17961b8b31894555b3f", "5854a866716cedf321e13a8158d87ad55ed3bbb2", "7c28b81dff1899e5a148ff57888faacc9945ab22", "a8133f4e7ff289241d5a5d954d303a7605fc3eb8", "0b1d9718000c210d40984db610b9e4bc5ac50845", "4d23e94d9c56ce2c7ae7adb105f49b2c51b055a5", "cd9b25a3223300aa4c70050b19f6052e09c0be73", "c0240dc23c05f16e11e0bb73eed9f5d03b7ae042", "ffeacdfd523df26d5aa7b3c7317db76f19871222", "5fc370f6bd36390b73d82e23c10547708e6d7421", "6297f2fa76e87b7f073870714e26022966764fc2", "60718ff912b372afb048e9d93eba450ed6b6d715", "2c2091c0e5551961a334370fa8b7f84351e7d089", "eed1d7303988334fe1afd581b9f37a1d2389cd57", "958ae18d84737749d30cb51c8f6ad764773d9f19", "df37bb13b4fc0d89b8c02e25472b5f2bc7fbf30a", "242dcbd21435ad852231b423c7031e7978c9adb5", "21885344e243862755a24d845187a738fb4106c0", "f205c31b2d6b7c3f4ff6045e7d5243b92716e2a3", "28f8bfa8fc43abcdcb28eaefa5854faf70b5e9b7", "1165347d5b40f9135ddb5e4aaf7b8f681e7bcf62", "873bb1d992e55afca552e27d9c58afd329220c7f", "80e213a0c8351da1f6f21314f60a4383c8140b35" ], "journalName": "", "journalPages": "135-144", "journalVolume": "", "outCitations": [ "2c7a50371c618198dfbc96619fcf74e89b604a15", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "33efd3ecffca21efaf9d1469b7dc3d2a72a0a05e", "bda9b098b255b0b05a6d1ada9491900ae18257de", "231d0ecb5a33eae8495f646a985cc14e209fbfb2", "787d56ec5569f1054f490dcf9a9fb4b87b7990e8", "7cb3403411bb562da14757c2ba905258750f65b9", "11078836c7b2f8baeb6d0ee78ad1787da3b8f0be", "20dc1890ca65e01856f31edf10126c2ad67e9d04", "2012f32199adc88747d5a1b47c7b4ba1cb3cb995", "10b3b573b4e708dc0a8d5d73997fa6a13a1d26a6", "b7b9196227cdf2f55bd5b6fac373c9127ccb7b57", "199369d8eaff23e00c106ef2ddc4181696600c1f", "3994eccfada16936a73c60c09b96494098a038d9", "1970a644bc8a9fa7340f04785f8b19e9d33778e1", "2be2f5a313792627ae1760c4a308da5ed164d962", "34aec4d172f59c1ffe32f3afe3bf4ffc5cd7559a", "013cd20c0eaffb9cab80875a43086e0c3224fe20", "09ef6b06bf395b86d54e490ad61bf8a808f5a262", "bbac697ae9fcfdee382d3ac0ad046f4a7752c9f3", "44044556dae0e21cab058c18f704b15d33bd17c5", "13c40b32b9f35c8d24a5c00ec16a88382aaf07fe", "839104964d04c505a827ae854e1251271968c7f7", "4afa6c2eb552ceef0e396fbfe449932492873034", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "2275762a28582716db92df6d525ed2481c7d7f14", "00265f2c37a81856dd36b42ccf9e317f93d8c592", "58a63086b209374d5cf625d27617eba1e96288ef", "0710099aee18aabc0605c52628c29ac5ad94ec60", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "2414283ed14ebb0eec031bb75cd25fbad000687e", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2" ], "paperAbstract": "We study the problem of representation learning in heterogeneous networks. Its unique challenges come from the existence of multiple types of nodes and links, which limit the feasibility of the conventional network embedding techniques. We develop two scalable representation learning models, namely metapath2vec and metapath2vec++. The metapath2vec model formalizes meta-path-based random walks to construct the heterogeneous neighborhood of a node and then leverages a heterogeneous skip-gram model to perform node embeddings. The metapath2vec++ model further enables the simultaneous modeling of structural and semantic correlations in heterogeneous networks. Extensive experiments show that metapath2vec and metapath2vec++ are able to not only outperform state-of-the-art embedding models in various heterogeneous network mining tasks, such as node classification, clustering, and similarity search, but also discern the structural and semantic correlations between diverse network objects.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098036", "http://shichuan.org/hin/topic/Embedding/2017.%20KDD%20metapath2vec%20Scalable%20Representation%20Learning%20for%20Heterogeneous%20Networks.pdf", "https://ericdongyx.github.io/papers/KDD17-dong-chawla-swami-metapath2vec-poster.pdf", "https://www3.nd.edu/~dial/publications/dong2017metapath2vec.pdf", "http://hanj.cs.illinois.edu/cs512/survey_slides/4-5-metapath2vec-KDD17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/602af33f7874ce311ae7150f4e863833a57b6d19", "sources": [ "DBLP" ], "title": "metapath2vec: Scalable Representation Learning for Heterogeneous Networks", "venue": "KDD", "year": 2017 }, "602fe41b74da4d92051f63b5a95831b6ad2b5552": { "authors": [ { "ids": [ "1734058" ], "name": "Hong Zhang" }, { "ids": [ "1908497" ], "name": "Junxue Zhang" }, { "ids": [ "35667277" ], "name": "Wei Bai" }, { "ids": [ "40611817" ], "name": "Kai Chen" }, { "ids": [ "2579531" ], "name": "Mosharaf Chowdhury" } ], "doi": "10.1145/3098822.3098841", "doiUrl": "https://doi.org/10.1145/3098822.3098841", "entities": [ "Certified Digital Radio Broadcast Specialist", "Data center", "Experiment", "Failure rate", "Functional testing (manufacturing)", "Load balancing (computing)", "Network congestion", "Presto", "Simulation", "Telephone exchange", "Testbed" ], "id": "602fe41b74da4d92051f63b5a95831b6ad2b5552", "inCitations": [ "9bbd5be2829e49b1fac7f034baf7499cb069db95", "9cf2db35591b832d78d112ad6e1746c635d1a6ea" ], "journalName": "", "journalPages": "253-266", "journalVolume": "", "outCitations": [ "42f6218131551632370e5e8f88370d04b220002a", "58f692e9b03cb973355aab46bb6f867239aeb513", "39300a6bb64f813bd233343b840cb169d8d0527f", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "14c84514d25336223473290fe7c13ad66a68ef64", "1ea2382eccad5744c602634af156101aa9467c09", "35e7b16b618ca6bf63446372afb2a0ca071f2f13", "1434811cbe1c7831f0ee2974e9093d1e57461f0f", "99d0263b2f28fe6856d847610f595d9d51c4f116", "00f6f16f4b76e931d3924e56674a74fca8d94df3", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "30e5e40cb96c1d15c80ff0aa199298675465c65c", "c678e962b158153924bbb24c4900b84375be7e57", "9edfe7c6166d08eaf0b7dd865537e2c1c0ed082a", "13fbb4c39c2ce0cd9539df2daae1728fc93e01c1", "058f6752d85a517aae298586fdf117acdd7560ea", "663e064469ad91e6bda345d216504b4c868f537b", "234e6be0d4238f76b3ac038ee422be39f391c625", "8e86374859a1d07e049a2c6e1cb11d12302552fb", "025652412d507a8cf98ecacd8a44d32ce28995e1", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "1376bd56c64639af4645625fd9755c83b2bf7cda", "327a02b19a60319cc35be860ad0259a5c1aef920", "0a90d0d00c2cf9fa172abcf5b41a802b69ccad47", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "9a26f0832fa7508f6396cbee7d06db42e026c0c8", "559e4671b87c3f76d3c485ebdaefe734323879f0", "baf4368220d59064026fc3241a51a66ec0f6fa02", "bc890ed386ebd2dc3d2b6f123e1d5983d957e3ab", "65da29a03c8905cbc0614612d1632864336c4786", "6163d31dd51332eec9056e1e88e2f1aa0df947d6", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe" ], "paperAbstract": "Production datacenters operate under various uncertainties such as traffic dynamics, topology asymmetry, and failures. Therefore, datacenter load balancing schemes must be resilient to these uncertainties; i.e., they should accurately sense path conditions and timely react to mitigate the fallouts. Despite significant efforts, prior solutions have important drawbacks. On the one hand, solutions such as Presto and DRB are oblivious to path conditions and blindly reroute at fixed granularity. On the other hand, solutions such as CONGA and CLOVE can sense congestion, but they can only reroute when flowlets emerge; thus, they cannot always react timely to uncertainties. To make things worse, these solutions fail to detect/handle failures such as blackholes and random packet drops, which greatly degrades their performance.\n In this paper, we introduce Hermes, a datacenter load balancer that is resilient to the aforementioned uncertainties. At its heart, Hermes leverages comprehensive sensing to detect path conditions including failures unattended before, and it reacts using timely yet cautious rerouting. Hermes is a practical edge-based solution with no switch modification. We have implemented Hermes with commodity switches and evaluated it through both testbed experiments and large-scale simulations. Our results show that Hermes achieves comparable performance to CONGA and Presto in normal cases, and well handles uncertainties: under asymmetries, Hermes achieves up to 10% and 20% better flow completion time (FCT) than CONGA and CLOVE; under switch failures, it outperforms all other schemes by over 32%.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/08/hermes-sigcomm17.pdf", "http://www.cse.ust.hk/~kaichen/papers/hermes-sigcomm17.pdf", "http://doi.acm.org/10.1145/3098822.3098841" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/602fe41b74da4d92051f63b5a95831b6ad2b5552", "sources": [ "DBLP" ], "title": "Resilient Datacenter Load Balancing in the Wild", "venue": "SIGCOMM", "year": 2017 }, "6035f4f834c86ecedae4386757c11cb712b163c6": { "authors": [ { "ids": [ "1883252" ], "name": "Rui Miao" }, { "ids": [ "2407352" ], "name": "Hongyi Zeng" }, { "ids": [ "33742176" ], "name": "Changhoon Kim" }, { "ids": [ "6727727" ], "name": "Jeongkeun Lee" }, { "ids": [ "1914822" ], "name": "Minlan Yu" } ], "doi": "10.1145/3098822.3098824", "doiUrl": "https://doi.org/10.1145/3098822.3098824", "entities": [ "Application server", "Application-specific integrated circuit", "Compiler", "Data center", "Load balancing (computing)", "Packet switching", "Portable C Compiler", "Server (computing)", "State (computer science)", "Stateful firewall", "Telephone exchange", "Virtual IP address" ], "id": "6035f4f834c86ecedae4386757c11cb712b163c6", "inCitations": [ "0d573d5f27504e51727b8c1f2be2f206e6a9cc18", "17b6784e4070ea6c1d7fcf176e77400bfa692738", "4059d74b7c3de3fa0ed5a22f55e5ac3c21f9975b" ], "journalName": "", "journalPages": "15-28", "journalVolume": "", "outCitations": [ "006cd63664db53494cc61a44d5c6ebc668dc4b6a", "5b999d36d5230eca01532b357c7cf338a5e0d641", "3b988049dd8f62f772281e90196bbd793700c86b", "07367703f587dbc3313cc613289c4330cebe5c8c", "0270a2b35f745f2ed17fbbac950e8086ee9aa1d6", "17650831f1900b849fd1914d02337e1d006aea0c", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "3a2f37d3648592ffb42155c28f71894ad61937fe", "27f4001214ce0d449eb05d33626f444526accc7c", "5cb88831f543d30cc688fedc445d4e358ef73626", "17b6784e4070ea6c1d7fcf176e77400bfa692738", "0d3f85933b6355789588476e491683532c68a906", "22eaa2f2b7abc3604717ffff4461b5cfbbac285e", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "0baf1bef6ee3bcb0b385a4ac303dcf0b406c64f4", "05d94e914db5ef72f51adda7306ae97aa502fbdd" ], "paperAbstract": "In this paper, we show that up to hundreds of software load balancer (SLB) servers can be replaced by a single modern switching ASIC, potentially reducing the cost of load balancing by over two orders of magnitude. Today, large data centers typically employ hundreds or thousands of servers to load-balance incoming traffic over application servers. These software load balancers (SLBs) map packets destined to a service (with a virtual IP address, or VIP), to a pool of servers tasked with providing the service (with multiple direct IP addresses, or DIPs). An SLB is stateful, it must always map a connection to the same server, even if the pool of servers changes and/or if the load is spread differently across the pool. This property is called per-connection consistency or PCC. The challenge is that the load balancer must keep track of millions of connections simultaneously.\n Until recently, it was not possible to implement a load balancer with PCC in a merchant switching ASIC, because high-performance switching ASICs typically can not maintain per-connection states with PCC. Newer switching ASICs provide resources and primitives to enable PCC at a large scale. In this paper, we explore how to use switching ASICs to build much faster load balancers than have been built before. Our system, called SilkRoad, is defined in a 400 line P4 program and when compiled to a state-of-the-art switching ASIC, we show it can load-balance ten million connections simultaneously at line rate.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/topic-preview-1-1.pdf", "http://doi.acm.org/10.1145/3098822.3098824", "http://www.cs.yale.edu/homes/yu-minlan/writeup/sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6035f4f834c86ecedae4386757c11cb712b163c6", "sources": [ "DBLP" ], "title": "SilkRoad: Making Stateful Layer-4 Load Balancing Fast and Cheap Using Switching ASICs", "venue": "SIGCOMM", "year": 2017 }, "603a1ccf4d1839547ee8ab1a155dbb35c7770669": { "authors": [ { "ids": [ "39367158" ], "name": "Shilei Cao" }, { "ids": [ "39835284" ], "name": "Buyue Qian" }, { "ids": [ "34856432" ], "name": "Changchang Yin" }, { "ids": [ "1972958" ], "name": "Xiaoyu Li" }, { "ids": [ "39791510" ], "name": "Jishang Wei" }, { "ids": [ "33992264" ], "name": "Qinghua Zheng" }, { "ids": [ "38673135" ], "name": "Ian Davidson" } ], "doi": "10.1109/ICDM.2017.12", "doiUrl": "https://doi.org/10.1109/ICDM.2017.12", "entities": [ "Artificial neural network", "Automatic Transmitter Identification System (television)", "Baseline (configuration management)", "Computer multitasking", "Deep learning", "Dictionary", "Document classification", "Knowledge Search", "Long short-term memory", "Recurrent neural network", "Test set", "Text mining", "Whole Earth 'Lectronic Link" ], "id": "603a1ccf4d1839547ee8ab1a155dbb35c7770669", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "31-40", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "0fd13512b30201a12947ae96d3fc98399ffa0ff5", "0891ed6ed64fb461bc03557b28c686f87d880c9a", "e80aec2c9ccebbcd1f8c59bdd432970d408052ee", "0a7c04c252621633992810bf0f184f287610c461", "eba36ac75bf22edf9a1bfd33244d459c75b98305", "5bdd28ff6260dd9146be3533ed7376fa60720468", "3a8285f7430e299ba873b0fa8c1bff8e003c3812", "146f6f6ed688c905fb6e346ad02332efd5464616", "398dee13b3aaaefdf14c78cc1e00dcf265795fd3", "5e9fa46f231c59e6573f9a116f77f53703347659", "27e38351e48fe4b7da2775bf94341738bc4da07e", "1384b429ef5c98992043ff7e3193ce16707c9fc7", "2e10643c3759f97b673ff8c297778c0b6c20032b", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "af88ce6116c2cd2927a4198745e99e5465173783", "023cc7f9f3544436553df9548a7d0575bb309c2e", "39d087b65b483d8ffcb1baade55a9296d85da3c7", "5a1fc4283d9317d7573b7c308b8155d67d82d330", "09a503095db2d68b439e48d67481399198ed0e5b", "0c7f52c753a65ceaf3755e20b906ffd0c05c994a", "289e3e6b84982eb65aea8e3a64f2f6916c98e87e", "0651b333c2669227b0cc42de403268a4546ece70", "3727069c3548cf1814d75cb6630fdd414180284e", "3bff03b7b0b0c4e8f6384dbb2a95e4338d156524", "84a9bc5294dded8d597c9d1c958fe21e4614ff8f", "82bb306038446302cedd20fa986d20640ed88a2e", "424561d8585ff8ebce7d5d07de8dbf7aae5e7270", "044b239c207a9decc77a7c2eb6de1f95b92c9fc3", "c9ca8331a20a56082581e203a399563fe2b54d20", "1f9e2d6df1eaaf04aebf428d9fa9a9ffc89e373c", "272216c1f097706721096669d85b2843c23fa77d", "008154be54eefe4734b454c2841ac66877ac8db5", "1dafc98db545a0050b59fcbcddce5be9ad16db3b", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6" ], "paperAbstract": "The need for short-text classification arises in many text mining applications particularly health care applications. In such applications shorter texts mean linguistic ambiguity limits the semantic expression, which in turns would make typical methods fail to capture the exact semantics of the scarce words. This is particularly true in health care domains when the text contains domain-specific or infrequently appearing words, whose embedding can not be easily learned due to the lack of training data. Deep neural network has shown great potentials in boost the performance of such problems according to its strength on representation capacity. In this paper, we propose a bidirectional long short-term memory (BI-LSTM) recurrent network to address the short-text classification problem that can be used in two settings. Firstly when a knowledge dictionary is available we adopt the well-known attention mechanism to guide the training of network using the domain knowledge in the dictionary. Secondly, to address the cases when domain knowledge dictionary is not available, we present a multi-task model to jointly learn the domain knowledge dictionary and do the text classification task simultaneously. We apply our method to a real-world interactive healthcare system and an extensively public available ATIS dataset. The results show that our model can positively grasp the key point of the text and significantly outperforms many state-of-the-art baselines.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/603a1ccf4d1839547ee8ab1a155dbb35c7770669", "sources": [ "DBLP" ], "title": "Knowledge Guided Short-Text Classification for Healthcare Applications", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "606b8763a3ed206716bf61a5b00afd048907f6e3": { "authors": [ { "ids": [ "1725490" ], "name": "Li Chen" }, { "ids": [ "40611817" ], "name": "Kai Chen" }, { "ids": [ "2495971" ], "name": "Zhonghua Zhu" }, { "ids": [ "1914822" ], "name": "Minlan Yu" }, { "ids": [ "1892184" ], "name": "George Porter" }, { "ids": [ "1680749" ], "name": "Chunming Qiao" }, { "ids": [ "16074166" ], "name": "Shan Zhong" } ], "doi": "", "doiUrl": "", "entities": [ "Computer cluster", "Experiment", "Multiplexing", "Non-blocking algorithm", "Optical fiber", "Optical interconnect", "Series and parallel circuits", "Simulation", "Switching time", "Telephone exchange", "Testbed", "Time-sharing" ], "id": "606b8763a3ed206716bf61a5b00afd048907f6e3", "inCitations": [ "33e28ab30ce23a4abeedeae3f4213fcba80d1947" ], "journalName": "", "journalPages": "577-593", "journalVolume": "", "outCitations": [ "015cd839205cbb6534642bbdc05a3e85fe239d4b", "231ba17921ebd80e95771e28dfb5082e169d5a53", "68d3a881fb6ead85f093061a2e4f9e8058dfb738", "2efa784f3271a88ae5f214ad609b8de5840add92", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "0558c94a094158ecd64f0d5014d3d9668054fb97", "08d410ea6f0c3934324467d809e2ea6ffc8a9a73", "a8aeef42aa8387d7a00b143dd9699666a8249380", "288763b8420ef17baf2f0214cf283433fcb4a447", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "f6b99e81495bee0fe9ed29809470d1a636a5f128", "8af2b5b01cdf781464c9d4ec5286bfaa8cdb9dc8", "5cc9393e67a5ad6a08bd6e8efb768064b7f22482", "5f3f9223c5c9f896be099bc177929febad508407", "9146f5ad699bd4754a272c2e4daf1bc89198af6c", "6cef37401134e650bcf60748c2a8ead42af56b66", "856d255abc8d50aaa3d4e8bb59e3b53311adf900", "058f6752d85a517aae298586fdf117acdd7560ea", "56abb48a526e875551b28b2e430feef241e0b437", "941512cb0e100560fa7db2b5883b06b004ea0a66", "8d60ae4c2df409e4ab1d7e518b39e4d91dc6c6a7", "8ba231591f574c5aeee7d1f5bb8a0b2a54ca0160", "a273ab99c686b59eb06526b5ac9ec06bc811dc36", "0f3bceece9e421adb3a14046a2f0b4baa4c90dfe", "44a7bb35c10f94af012507f6d8eb6e4593d1536e", "0368d2445d3ee4205ee73da933cb8b810a89091c", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "4a60d79b042a6c1d67a9c20c076357a9fd65b670", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "b4aeb961f5f87217d024dd952a3c258fa340c3ad", "2b11bd49d4eba42a74263c1b90709123d31f29f6", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "a05548af9f54a7cd57a5c3f2d51b9e76f559f04a", "534ee575a6b0c37e03d1dddb92493b57e9271298", "0c44588bdcbb82c7183958abab5ebc89c0e650f4", "5e292a3dca061e0547a7dde8f238532666b25a27", "a5a034c673eb5240cbe074b20704fdf99f80555f", "663e064469ad91e6bda345d216504b4c868f537b", "130d640b53a1d6700b67a4ea4256071ae18e0ee8", "7b5144c88098a183eb2f8395276b0be6196a442b" ], "paperAbstract": "Existing wired optical interconnects face a challenge of supporting wide-spread communications in production clusters. Initial proposals are constrained, as they only support connections among a small number of racks (e.g., 2 or 4) at a time, with switching time of milliseconds. Recent efforts on reducing optical circuit reconfiguration time to microseconds partially mitigate this problem by rapidly time-sharing optical circuits across more nodes, but are still limited by the total number of parallel circuits available simultaneously. In this paper, we seek an optical interconnect that can enable unconstrained communications within a computing cluster of thousands of servers. We present MegaSwitch, a multi-fiber ring optical fabric that exploits space division multiplexing across multiple fibers to deliver rearrangeably non-blocking communications to 30+ racks and 6000+ servers. We have implemented a 5-rack 40-server MegaSwitch prototype with commercial optical devices, and used testbed experiments as well as large-scale simulations to explore MegaSwitch\u2019s architectural benefits and tradeoffs.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_chen_li.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/chen", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-chen.pdf", "http://www.cse.ust.hk/~kaichen/papers/megaswitch-nsdi17.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-chen.pdf", "http://cseweb.ucsd.edu/~gmporter/papers/nsdi17-megaswitch.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_chen_li.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/606b/8763a3ed206716bf61a5b00afd048907f6e3.pdf", "s2Url": "https://semanticscholar.org/paper/606b8763a3ed206716bf61a5b00afd048907f6e3", "sources": [ "DBLP" ], "title": "Enabling Wide-Spread Communications on Optical Fabric with MegaSwitch", "venue": "NSDI", "year": 2017 }, "606e89337d18dc78f57ed4e8abc674dabaa86f89": { "authors": [ { "ids": [ "26375461" ], "name": "Sungjoon Koh" }, { "ids": [ "1698586" ], "name": "Jie Zhang" }, { "ids": [ "7198431" ], "name": "Miryeong Kwon" }, { "ids": [ "8163553" ], "name": "Jungyeon Yoon" }, { "ids": [ "1851743" ], "name": "David Donofrio" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" }, { "ids": [ "36895144" ], "name": "Myoungsoo Jung" } ], "doi": "10.1109/IISWC.2017.8167758", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167758", "entities": [ "Central processing unit", "Clustered file system", "Computer data storage", "Digital footprint", "Erasure code", "Fault tolerance", "IBM System i", "Network traffic control", "Open-source software", "Reed\u2013Solomon error correction", "Scalability", "Solid-state drive", "Solid-state electronics" ], "id": "606e89337d18dc78f57ed4e8abc674dabaa86f89", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "76-86", "journalVolume": "", "outCitations": [ "4540a5f0debcd62d0ca418682af78febc54013c7", "29f3f5918946bf0a4d75bf5244f993847d03e26c", "3b2af12a43d06338dd62681328c75a1999fc87fd", "4a82b6e2f1f7d5b63a377695817a55ea06fb571a", "0801f018cb9eeecb2ab4903125b371346305f0b1", "1748a4950413dbeab59c139b16cfb8ec99f21ff4", "09b0a63b74a6adb0959ce7ea88ffda7f75ca7842", "2da760f90c3d2bf6598becdde9063093f488548c", "42a58d723284834d040db43c83e83e58e5fb92a0", "11e074f655be0b729bf83b7dec2a59f78c0721b1", "027733345e1d7df32de48c63cec756dd0ba4828d", "2ec3c95ac99411cc86477480498e3730625bccdb", "0adfb9e2942108c199c9660a95188a7c8a26415e", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "8b1d8d46836a6d5eb4355315b64d85c128cbff27", "935c0927e09582c3644ebdb1d0c13bfe6cc0d542", "4c2332dc4eee8973cf35a147e37646a323b9c868", "5f3f4c6134a1dc58a963e47ef4cfadf207694f17", "7ae26da9b7666812857883536870c315538f7f10", "18cb2beff9ee9ed81b29dd3fec7a94a9823bb4b7", "58b628792d3eb22a034a871ed3cf373afe591928", "91912a461d30035639ddda2b6de97a388823fb4b", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "3c5cc0f17dc2f956dbc278f24433f57affe49dce" ], "paperAbstract": "Large-scale systems with arrays of solid state disks (SSDs) have become increasingly common in many computing segments. To make such systems resilient, we can adopt erasure coding such as Reed-Solomon (RS) code as an alternative to replication because erasure coding can offer a significantly lower storage cost than replication. To understand the impact of using erasure coding on system performance and other system aspects such as CPU utilization and network traffic, we build a storage cluster consisting of approximately one hundred processor cores with more than fifty high-performance SSDs, and evaluate the cluster with a popular open-source distributed parallel file system, Ceph. Then we analyze behaviors of systems adopting erasure coding from the following five viewpoints, compared with those of systems using replication: (1) storage system I/O performance; (2) computing and software overheads; (3) I/O amplification; (4) network traffic among storage nodes; (5) the impact of physical data layout on performance of RS-coded SSD arrays. For all these analyses, we examine two representative RS configurations, which are used by Google and Facebook file systems, and compare them with triple replication that a typical parallel file system employs as a default fault tolerance mechanism. Lastly, we collect 54 block-level traces from the cluster and make them available for other researchers.", "pdfUrls": [ "https://arxiv.org/pdf/1709.05365v2.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167758", "http://arxiv.org/abs/1709.05365" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/606e89337d18dc78f57ed4e8abc674dabaa86f89", "sources": [ "DBLP" ], "title": "Understanding system characteristics of online erasure coding on scalable, distributed and large-scale SSD array systems", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "60b035d5c39df79d984b385eb2dd68288adea8fd": { "authors": [ { "ids": [ "39309263" ], "name": "Mahanth Gowda" }, { "ids": [ "3333096" ], "name": "Ashutosh Dhekne" }, { "ids": [ "2211812" ], "name": "Sheng Shen" }, { "ids": [ "1694368" ], "name": "Romit Roy Choudhury" }, { "ids": [ "2197142" ], "name": "Lei Yang" }, { "ids": [ "9753166" ], "name": "Suresh Golwalkar" }, { "ids": [ "9771782" ], "name": "Alexander Essanian" } ], "doi": "", "doiUrl": "", "entities": [ "Dragon Ball Z: Budokai Tenkaichi 3", "Ground truth", "Nonlinear system", "Sensor" ], "id": "60b035d5c39df79d984b385eb2dd68288adea8fd", "inCitations": [ "533bb475a9373a233f6fe0881850a200210bc26a", "3cb86d6757d03312754ea655ec97b2b856c34c15", "4d271267c20c4f6a93b7f22728cb65aba36596b4" ], "journalName": "", "journalPages": "499-513", "journalVolume": "", "outCitations": [ "bc77ba8996555dcfa129553b0bafaaabfe962813", "6363fb9caaf28cf6b4b403725d0b8962ed45c00a", "18316f5001c4b5535c2d39f9de6324acaf8b887d", "29e9cd18af650b7e448dea668121a1d98afd3c46", "05fe031e53dd8990e7076a91277cb2b74e22b811", "736899cb551d20199dd04ea9ee0390a46a1a1007", "c04cd3dc971734ebe2ba1aa113e417e8b28b248c", "363bef3ed1e7dc709add30c31afe6824403768e7", "18f45272373ad42f00a6ce1941743e8a76c50a59", "02ee38745baa9b17505a04ea7e4f8b92af798f58", "926862f4facebf5f76dd5f75d178c8d13d5f7468", "1e6a938c749ba5a90aa2a7d921b814e932993f4f", "73254e2eddc19102e42787603b8ab06af650a0d2", "f084662d653f0aea52682da144a7af39c180e86e", "82dbf4eb8e02ca69beaba66f4fddb1a72ea7404b", "16ccb8d307d3f33ebb395b32db23279b409f1228", "9d791c9868915cc27fc69849694551e396695123", "7c037a591ca9ac0c8fa2ff0079d41518731c84d4", "53de0815f11357c707f6f98a0a89b41f8d90f95d", "18b94ae2f53920d884f77e9aa8a32c80f3005759", "c3543736a6f6372dac4cd54b5b5e4acfe5b0f152", "08616ca445012df0e3c982f742d2662bf0f0ce6e", "82802e411495bbad77fa2415c6d4633dde180764", "7bdb1640e8b76880a538e37c4b393b578f3b0f32", "d56397d6b2b255a71642c1e09769e5b83c403cc7", "bfb456caf5e71d426bd3e2fd529ee833a6c3b7e7", "e5edfbdf645a3dbcdaf7d9fcbf350c67fbbadae5" ], "paperAbstract": "This paper explores the possibility of bringing IoT to sports analytics, particularly to the game of Cricket. We develop solutions to track a ball\u2019s 3D trajectory and spin with inexpensive sensors and radios embedded in the ball. Unique challenges arise rendering existing localization and motion tracking solutions inadequate. Our system, iBall, mitigates these problems by fusing disparate sources of partial information \u2013 wireless, inertial sensing, and motion models \u2013 into a non-linear error minimization framework. Measured against a mm-level ground truth, the median ball location error is at 8cm while rotational error remains below 12\u25e6 even at the end of the flight. The results do not rely on any calibration or training, hence we expect the core techniques to extend to other sports like baseball, with some domain-specific modifications.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-gowda.pdf", "http://synrg.csl.illinois.edu/papers/cricket-nsdi-2017.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/gowda", "https://courses.engr.illinois.edu/ece598rr/material/cricket-nsdi-2017.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-gowda.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/60b0/35d5c39df79d984b385eb2dd68288adea8fd.pdf", "s2Url": "https://semanticscholar.org/paper/60b035d5c39df79d984b385eb2dd68288adea8fd", "sources": [ "DBLP" ], "title": "Bringing IoT to Sports Analytics", "venue": "NSDI", "year": 2017 }, "60bcb1502fe1509f268b2d08a349088c92048147": { "authors": [ { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "6388029" ], "name": "Haiyang Shi" }, { "ids": [ "35921177" ], "name": "M. Haseeb Javed" }, { "ids": [ "2409719" ], "name": "Rajarshi Biswas" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/HOTI.2017.24", "doiUrl": "https://doi.org/10.1109/HOTI.2017.24", "entities": [ "Apache Hadoop", "Big data", "Central processing unit", "Deep learning", "Graphics processing unit", "InfiniBand", "MNIST database", "Math Kernel Library", "Remote direct memory access", "SPARK", "Scalability", "Speedup", "Stack (abstract data type)" ], "id": "60bcb1502fe1509f268b2d08a349088c92048147", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "87-94", "journalVolume": "", "outCitations": [ "93008eb5924b63846bcb1c93a96d451068a2351c", "32192d744d86e7cde73f0c9aa773214f88619a9e", "0b99d677883883584d9a328f6f2d54738363997a", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "fda87c93b75633caa0f78b6e7f9384e8e775d3f1", "556035beb283652f1dcfeff7ae43851cd4abc85a", "7fdf31d5ebdd293b3027e6555e256a936ff5515a", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "bd6fe117ca35a7ab144408be1771000feb57c7fb", "145088fc0593b2f95168f3ba4693bbc5487e9068", "003e47961411895dead02b040e29de8bb6167e91", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "898634f0e693cb521ad2dd4a7432c11381e6df60", "1e4709c0b8fe3bf759cd64dc1ede695d6e5316f0", "d517b13f2b152c913b81ce534a149493517dbdad", "061356704ec86334dbbc073985375fe13cd39088", "ee05b94d8af5113b3e4d51e957ca66fb7e6aea35" ], "paperAbstract": "Deep Learning over Big Data (DLoBD) is becoming one of the most important research paradigms to mine value from the massive amount of gathered data. Many emerging deep learning frameworks start running over Big Data stacks, such as Hadoop and Spark. With the convergence of HPC, Big Data, and Deep Learning, these DLoBD stacks are taking advantage of RDMA and multi-/many-core based CPUs/GPUs. Even though a lot of activities are happening in the field, there is a lack of systematic studies on analyzing the impact of RDMA-capable networks and CPU/GPU on DLoBD stacks. To fill this gap, we propose a systematical characterization methodology and conduct extensive performance evaluations on three representative DLoBD stacks (i.e., CaffeOnSpark, TensorFlowOnSpark, and BigDL) to expose the interesting trends regarding performance, scalability, accuracy, and resource utilization. Our observations show that RDMA-based design for DLoBD stacks can achieve up to 2.7x speedup compared to the IPoIB based scheme. The RDMA scheme can also scale better and utilize resources more efficiently than the IPoIB scheme over InfiniBand clusters. For most cases, GPU-based deep learning can outperform CPU-based designs, but not always. We see that for LeNet on MNIST, CPU + MKL can achieve better performance than GPU and GPU + cuDNN on 16 nodes. Through our evaluation, we see that there are large rooms to improve the designs of current generation DLoBD stacks further.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/60bcb1502fe1509f268b2d08a349088c92048147", "sources": [ "DBLP" ], "title": "Characterizing Deep Learning over Big Data (DLoBD) Stacks on RDMA-Capable Networks", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "60d368d0e9912f383e1215bc9fa1987e820c9c89": { "authors": [ { "ids": [ "2664272" ], "name": "Mark D. Corner" }, { "ids": [ "1778473" ], "name": "Brian Neil Levine" }, { "ids": [ "8231911" ], "name": "Omar Ismail" }, { "ids": [ "22117235" ], "name": "Angela Upreti" } ], "doi": "10.1145/3117811.3117844", "doiUrl": "https://doi.org/10.1145/3117811.3117844", "entities": [ "Computer scientist", "Database", "Ground truth", "Population", "Sampling (signal processing)", "Virtual private network" ], "id": "60d368d0e9912f383e1215bc9fa1987e820c9c89", "inCitations": [ "50798a6f0d91afb1c7d44fa0499c6f3c9dbc9b7a", "49582562fb5a4dc02be99cfa2849772d9150da7b" ], "journalName": "", "journalPages": "435-447", "journalVolume": "", "outCitations": [ "33076e6f8638a607a797de2c7503ae768f4c4445", "45e9e3e5356672c8367781ce8a19b3a3e5eb5c97", "2ec14bc3f03861e750f054727369dd0f9933eef6", "0530d1f0d2599be2e274c53c9e39c924615c2d6d", "97018df402c3512285141b37277eede60740876e", "e9c5f7e1ae65054f4bf5444a0823c360627813df", "77462b767a378aa6207cbff5b100379fe8a55f6b", "05ad6c3ab7a0b1ab0c4fc3af9f1622cf6c0fa40e", "3168dcbdce9ba0f8076133da4ffa6e6732484a4a", "34ddb4d6b98a45b24b9645b5c52b1d3424ba24f5", "af9f8945a9411cb0d8e5705f972a99127049baaf", "1f38c11fe8511c77fb7d383126214c9e7dc28e4a", "355a30d7116edc6901c4877fe5c393d73397887e", "1686ef78f90d366f2df992ea8a8cfef23a0e1d42", "a0b9645813181e0e3e048353f628ca32a9a461aa", "161d9c0156656cce193b64434c3264ed94445d59", "ff4505389c2e5e6e747f59749c733f93a9589966", "11e092850860c4839df040d3809d0827f9fd962e", "9a3c791067911d17a79918b1b0b5826beaeb2fe1", "2230030a213267dbf328443c6d70247774860f47", "5d32fcad45d7df2dd1701ad075b7b2c39390d8a7", "b0b784e657d0523238e9ec416576a93384166424", "9ed21eba6e115c9cae58fa4db05d7d297e6b9260", "0ccd81d4cb36a23c817a5df196cb7ef42bf623b3", "5a032460c589a67e7c73b19c93aa591331758139", "5892b9314971e90e32d8bf81ca4e7dcbecb5ef8f", "1149536915922269f11b445304f3d91225bde04b", "0421d11d81eca22b6136bf12670c9a343de4943a", "3208feae829cba6bd319421fe1fea58962da8fd9", "109f3f9d702b95bd912d66f25ac9fccf5f1c0760", "1668f3169d5095f9abee84b3145c18848e8e6383", "a4350675e2ee2a01876c799e22c2843bbc38f4b4", "13bbf077589b0741e7f4e6265f1d2be75e3627db", "766df29ab90d678c7d25cb8d08f3d66f6d501a69", "3f62fe7de3bf15af1e5871dd8f623db29d8f0c35", "2833e9958db9721550f2dab609ef7124875dc12a", "40c10518adb442896a4d4fdcd3a7b127fd38c672", "032aef52d232ef05007d1e09bb646672999221c7", "506113a13ec344a2f652413b8702d8a2a95c17e7", "b18cf5ab477aac99d707f611e6f7c0ee5d9bdf0d", "7fe8ffdd7b2211993c58d283c75fa9e6cc93d5f4", "2f3e0d1780affdfcc16f50392b5ef40985fa2c25", "27372d8cac88d7439c4f5c17ad6aa7a97c78ff56", "09ff6b480574a9178da694bd65c91c3ad00a52fc", "0b834f5a22d50478e07893bd78b3b416778a3c31", "403fb35cc461bc90c14a57a1c87fd90da7714c7b", "294f12b3ee69cb5170d3d8ddcea654bff823d9b8", "0d25768f709b3455e0e6c1d526cc41433b86f7f5", "17ead2ea3e5b6128d4cc38a6672398639788f137", "43e2e4963ed8774c8849b682d8630731887bf86d" ], "paperAbstract": "The most important step in an empirical computer scientist's research is gathering sufficient real-world data to validate a system. Unfortunately, it is also one of the most time-consuming and expensive tasks: placing measurement tools in remote networks or end-clients requires one to marshal resources from different administrative domains, devices, populations, and countries. Often such efforts culminate in a trace that is deficient in multiple ways: a small set of test subjects, a short time frame, missing ground truth for device IDs, networking environments lacking in diversity and geographic spread, or highly biased sampling. We present a method of addressing these challenges by leveraging the most open and globally accessible test and measurement platform: digital advertising. Digital advertising instantly provides a window into 7 billion devices spanning every county for an extremely low cost. We propose Advertising as a Platform (AaaP), an ad-based system to perform massive-scale mobile measurement studies. In contrast with measurements made by large media companies who own platforms, ad networks, and apps, we concentrate on the opportunities and challenges for researchers that are end-users of advertising systems. We evaluate a prototype system, discuss ethical guidelines, and demonstrate its use in four scenarios: IP2Geo databases, bandwidth measurement, energy management, and the identifiability of mobile users. We show the efficacy and ease-of-use of AaaP, and illuminate key challenges and the great promise of using AaaP to study a wide variety of mobile phenomena.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117844", "https://people.cs.umass.edu/~mcorner/papers/mobicom17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/60d368d0e9912f383e1215bc9fa1987e820c9c89", "sources": [ "DBLP" ], "title": "Advertising-based Measurement: A Platform of 7 Billion Mobile Devices", "venue": "MobiCom", "year": 2017 }, "60d3753019941799c31617d8a5cbf3d9aa483443": { "authors": [ { "ids": [ "2180844" ], "name": "Louis-Claude Canon" }, { "ids": [ "1718549" ], "name": "Loris Marchal" }, { "ids": [ "1736346" ], "name": "Fr\u00e9d\u00e9ric Vivien" } ], "doi": "10.1007/978-3-319-64203-1_17", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_17", "entities": [ "Approximation", "Approximation algorithm" ], "id": "60d3753019941799c31617d8a5cbf3d9aa483443", "inCitations": [ "c2b05dad04399ee2532b490d496ba2aabab00c9c" ], "journalName": "", "journalPages": "232-244", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/60d3753019941799c31617d8a5cbf3d9aa483443", "sources": [ "DBLP" ], "title": "Low-Cost Approximation Algorithms for Scheduling Independent Tasks on Hybrid Platforms", "venue": "Euro-Par", "year": 2017 }, "60ea4cdd9ceb049c21a26616743e07cca3ada0d3": { "authors": [ { "ids": [ "3211657" ], "name": "James Bornholt" }, { "ids": [ "1833123" ], "name": "Emina Torlak" } ], "doi": "10.1145/3062341.3062353", "doiUrl": "https://doi.org/10.1145/3062341.3062353", "entities": [ "Compiler", "Consistency model", "Electron hole", "Litmus", "Memory model (programming)", "PowerPC", "Programming language", "Relational algebra", "Shared memory", "Software bug", "Test suite", "X86" ], "id": "60ea4cdd9ceb049c21a26616743e07cca3ada0d3", "inCitations": [ "9c79e22df657e92d6d895ac424815ea750e6dc0c", "5060b771ee52f9d6c1115b24865401a6f3df4068", "8d778809125172e79d5528de7dd4ab4af11e25d7", "deba49c12c039fbd667277207dbaa812fba2dece", "8a424ab4a90bc2b2888c4f2e32f912f6f5282b7b", "d4d5a5baab3c0418447566724a4fd16c96e53517", "aa2c9ba9f13ebb28b2f9db8958fe4aac96374639" ], "journalName": "", "journalPages": "467-481", "journalVolume": "", "outCitations": [ "a28f4c45ad72a50f56f7f9df13762c739230b646", "011a0f193a4ad6e118abd5a36f705618071891ba", "2cc63859ef826f85f191fabed5e9c1126f307c3a", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "05a618847e4f08e5bca29dff732757779722b2e0", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "25929ecf00df179c51b95b7be250a5220d86d6f6", "49f0f6c03f6eec08fe4426706609413fa5fa6f17", "3a66a682ee36cde0738824b152a51df2ccbb80fd", "0249e70c7cf656852a54ef4915e75c55eb5a3abb", "c6004303d1ef175fef4cbae08f2acd94a7eda1af", "1ef301c1b275091b6a50d620b41df4722f2108f0", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "ebc98a5257506c33a684971297983c754ef1624a", "0ed16859ec6e047f6b9aa29dde6ad5cf991c55d9", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "f0ca54ebf208c7ef592b2ccf4e8961ec5524633c", "9ac0c2bb009443cadd571195bd99a3bcddb4e791", "235b9c8f10461a95398e169ecb91cf3e223d3350", "2a3be01de01326eda77adf03428d8bd9356330e6", "2406725a71bd6a4073ba19be0d8a5660d416223f", "7b93d3e42a7498e4de67a76b8f6861875fa74d79", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "52e00689f9e93786b25c388cede905db0cae8701", "3eae0271717f6b4d65024abf04e5d98aef41d748", "15c8550942ee0191bb34d177d7e653b2f3cb6eff", "d42b7b536bdd0db06616f3ea7ec95323a7c6d615", "291275c023bb218777d5f2a21f60a1a84d3a60c8", "857726e6c21504e66569e3d61ed6b8710e44db4a", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "0695642b13d9600c524feac5a5900158b0e5e4ed", "370d546ab1ce3988194cbf835ee09e73e3733b41", "3c142ad4ca5ed211a606450801d54b3b30d687e9", "7c8f5897a30b4ea4cbfe73d74302dbe9a125a254", "8dbc653d8194c257dfa198b427523191b3865464", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "4292384b0b798feea238c7f0437d88476e342771", "33879317057d3478699f06594dffd0e7514d7116", "2fb50ff835ad2c4d44292af8f918fc9a5dc996a0", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e" ], "paperAbstract": "A memory consistency model specifies which writes to shared memory a given read may see. Ambiguities or errors in these specifications can lead to bugs in both compilers and applications. Yet architectures usually define their memory models with prose and litmus tests—small concurrent programs that demonstrate allowed and forbidden outcomes. Recent work has formalized the memory models of common architectures through substantial manual effort, but as new architectures emerge, there is a growing need for tools to aid these efforts. \n This paper presents MemSynth, a synthesis-aided system for reasoning about axiomatic specifications of memory models. MemSynth takes as input a set of litmus tests and a framework sketch that defines a class of memory models. The sketch comprises a set of axioms with missing expressions (or holes). Given these inputs, MemSynth synthesizes a completion of the axioms—i.e., a memory model—that gives the desired outcome on all tests. The MemSynth engine employs a novel embedding of bounded relational logic in a solver-aided programming language, which enables it to tackle complex synthesis queries intractable to existing relational solvers. This design also enables it to solve new kinds of queries, such as checking if a set of litmus tests unambiguously defines a memory model within a framework sketch. \n We show that MemSynth can synthesize specifications for x86 in under two seconds, and for PowerPC in 12 seconds from 768 litmus tests. Our ambiguity check identifies missing tests from both the Intel x86 documentation and the validation suite of a previous PowerPC formalization. We also used MemSynth to reproduce, debug, and automatically repair a paper on comparing memory models in just two days.", "pdfUrls": [ "http://homes.cs.washington.edu/~emina/pubs/memsynth.pldi17.pdf", "https://homes.cs.washington.edu/~emina/doc/memsynth.pldi17.pdf", "https://homes.cs.washington.edu/~bornholt/papers/memsynth-pldi17.pdf", "https://homes.cs.washington.edu/~bornholt/papers/memsynth-pldi17.slides.pdf", "http://doi.acm.org/10.1145/3062341.3062353" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/60ea4cdd9ceb049c21a26616743e07cca3ada0d3", "sources": [ "DBLP" ], "title": "Synthesizing memory models from framework sketches and Litmus tests", "venue": "PLDI", "year": 2017 }, "610d61a4543bdb1109de0e5f9760d44e44e6014d": { "authors": [ { "ids": [ "38342948" ], "name": "Rong Ge" }, { "ids": [ "39797395" ], "name": "Pengfei Zou" }, { "ids": [ "1781155" ], "name": "Xizhou Feng" } ], "doi": "10.1109/ICPP.2017.68", "doiUrl": "https://doi.org/10.1109/ICPP.2017.68", "entities": [ "Multi-core processor", "Non-uniform memory access", "Parallel computing", "Power management", "Scalability", "Supercomputer", "Symmetric multiprocessing" ], "id": "610d61a4543bdb1109de0e5f9760d44e44e6014d", "inCitations": [ "66ace9694c92cbfad85ca3fccb79215b44b6d126" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "591-600", "journalVolume": "", "outCitations": [ "a9831b6062fb678d4591eee853e81116d038bb05", "9efa7f12bfd9d8ed38c29c5e128b21b07a438cd9", "4a6bf6c38051ec5f81be18de75e8ecb6e5e72c06", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "676ff3d6d04b5748771f843dbf8dd07ee0a612db", "9a000edf8d478fa3b0d7f74fb966664da5d33354", "5b20182516558c9da4b930064272a6d9dde74a3d", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "bce002b1fa120ab391345be98c747ce41aa32cd6", "cedeaf86f2e06e4ab3b218aaf6cdfe65e2d9cbe1", "efee61acb1847de685817b7d9bc1b6b095ef5026", "1a9ddc67539eccccaeb3c12ef947ad2a46bdda47", "a47b408349a8146f71cb54c38226d2f7d92700fe", "1fd674f96ef677bf09d7538673eda576aa8102c9", "35bc9e9d0a8c0451c47131875e88d7c20f28aa92", "0eb75d4bcd9907617b4d27c583ae480405f91721", "c111caed42f318d59e3d30bfd875bcee8581652b", "33ff17faa39706c2609c557736724a90ce272af0", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "89d4dcc25809693fa3505d09b2721c1c2c2559b2", "77f826132cf09ac91ea9c859387a8d52221a019a", "35e911328bada2e9eb915a2424153d9fda4326be", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "3462fb38042f0bde20c758728d7c8c28a1f47e09" ], "paperAbstract": "Power is a critical factor that limits the performance and scalability of modern high performance computer systems. Considering power as a first-order constraint and a scarce system resource, power-bounded computing represents a new perspective to address the power challenge in HPC.In this work we present an application-aware, multi-dimensional power allocation framework to support power-bounded parallel computing on NUMA-enabled multicore systems. This framework utilizes multiple complementary software and hardware power management mechanisms to manage power distribution among sockets, cores, and NUMA memory nodes under a total power budget. More importantly, this framework implements a hierarchical power coordination method that leverages applications' performance and power scalability to efficiently identify an ideal power distribution.We describe the design of the framework and evaluate its performance on a NUMA-enabled multicore system with 24 cores. Experimental results show that the proposed framework performs close to the oracle solution for parallel programs with various power budgets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.68" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/610d61a4543bdb1109de0e5f9760d44e44e6014d", "sources": [ "DBLP" ], "title": "Application-Aware Power Coordination on Power Bounded NUMA Multicore Systems", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "61215d040bde5e501b37efbe3caccd52209297f6": { "authors": [ { "ids": [ "2365563" ], "name": "V\u00e9ronique Cortier" }, { "ids": [ "2651276" ], "name": "Constantin Catalin Dragan" }, { "ids": [ "1988409" ], "name": "Fran\u00e7ois Dupressoir" }, { "ids": [ "38290160" ], "name": "Benedikt Schmidt" }, { "ids": [ "3105882" ], "name": "Pierre-Yves Strub" }, { "ids": [ "1800136" ], "name": "Bogdan Warinschi" } ], "doi": "10.1109/SP.2017.28", "doiUrl": "https://doi.org/10.1109/SP.2017.28", "entities": [ "Computational model", "Helios", "Privacy", "Program animation" ], "id": "61215d040bde5e501b37efbe3caccd52209297f6", "inCitations": [ "4ab1b93f531dd900d8d98caa18bbf2fd48d37971" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "993-1008", "journalVolume": "", "outCitations": [ "22ba6ba6fa5c25fd86c3dfa169e8bc6020f93303", "a00f57eafa0425c6f4d3f5cc47244ea282a83c6b", "f31411fd1fb0078f8e3e277f1df6a462ce7ce6a4", "9dde09e003f8200400567b0c7be7c60679daaa3c", "c96c8897c0835a7a71e913dda9b20df193b8cc15", "5af56b18071e7adf0d5b9a118e05bc893aace8e3", "0efa9ee4557c8b0cc8f0d329a0dab34c53fd55f2", "e6d0ac36f37643ab15875c3a5a830e9e51dbf08d", "1c2a3b0139a9f086bfa11f64c441108380dfab87", "8328c1a557ca35509c93bb31c3f95dce2ca6403f", "761ad28344eb146f8579f7dc79eacf31422e23be", "38f0fefb18aee8d6a0ab9a722c7b0030dd4bf2d6", "8e208a9025a83bd9df87bc46f69f355af846f55a", "849ff1827fd26b3677bda801a2e173a84c06c263", "a089defc1eea22b4d3afaeccf031ae110d7af459", "2949851ab9827fdd334ecc3b392296df2aacaf92", "479f5c87cb4e6a0bf6183d5e0835dd2694ecbba6", "3cb0331e8b8b2c15f97934bd92016efa38a1e2af", "406a37d8ccb6cb1355b7aeded65e50fc00b2977c", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "1c73dbac913a318fe5899d4bafcda0829217fd8e", "659a13e6c95688ff7ed973984c44b923176219b0", "2c61ecd4fab14e05b0bbe5b5b1d185a787749fb6", "18243474ab49a9046fb921ddfa8742a981ad9297", "d8e22381d67a29d04624ae451165533cc24d0025", "371cea2d63b4c282253b7ff0002b4dd9eb29f5fb", "e89d7b727da4617caf1c7c2dc8523d8565079ecf", "02fd1a072a72d24c5f61d709a1b3ce863da32729", "8cfce0a2eeabc1167ba97bd91630fc7179594dbc", "12af9a83438a8dc0db9e704dc2c1674d7ab1a08d", "5674a10bc55f8487454f0c7753c56ecec693d842", "aa8f74ef9b48a8983344b730a7618c684404da77", "c13a2d6e2f239605d3308b57dae9295fc7baaad9", "53222ae9fcf7bf668fdf585764f21b4f77fa939c", "e95cd876170c1bf0a35f21e7c6d98946698bcaf5", "f9c568cebd52de1fef344872ffc8ff722a4c8ff5", "8eaef162b486b59ba310881d77c1ee2de0eb53ab", "9feb4b268fea8a7f9513dcc9db475f5ee9c7dfde", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982" ], "paperAbstract": "We provide the first machine-checked proof of privacy-related properties (including ballot privacy) for an electronic voting protocol in the computational model. We target the popular Helios family of voting protocols, for which we identify appropriate levels of abstractions to allow the simplification and convenient reuse of proof steps across many variations of the voting scheme. The resulting framework enables machine-checked security proofs for several hundred variants of Helios and should serve as a stepping stone for the analysis of further variations of the scheme. In addition, we highlight some of the lessons learned regarding the gap between pen-and-paper and machine-checked proofs, and report on the experience with formalizing the security of protocols at this scale.", "pdfUrls": [ "https://www.ieee-security.org/TC/SP2017/papers/401.pdf", "https://doi.org/10.1109/SP.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/61215d040bde5e501b37efbe3caccd52209297f6", "sources": [ "DBLP" ], "title": "Machine-Checked Proofs of Privacy for Electronic Voting Protocols", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "612cf39494d5ea3db60616ef50836746dd289674": { "authors": [ { "ids": [ "1692609" ], "name": "Shuo Yang" }, { "ids": [ "36960048" ], "name": "Kai Wu" }, { "ids": [ "12052927" ], "name": "Yifan Qiao" }, { "ids": [ "1678390" ], "name": "Dong Li" }, { "ids": [ "2467444" ], "name": "Jidong Zhai" } ], "doi": "10.1109/CLUSTER.2017.61", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.61", "entities": [ "Algorithm", "Application checkpointing", "Application programming interface", "CPU cache", "Computer data storage", "Correctness (computer science)", "Data structure", "Emergence", "Fault tolerance", "Iterative method", "Matrix multiplication", "Monte Carlo method", "Non-volatile memory", "Performance Evaluation", "Simulation", "Sparse matrix", "Volatile memory", "Volatility" ], "id": "612cf39494d5ea3db60616ef50836746dd289674", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "475-486", "journalVolume": "", "outCitations": [ "05ae5121cfc7c101d72f70ee0e7a5f938f8140f1", "345ff2f19178c983f2742b1f3198fa045cca2121", "0642df41e63e4f6223a6f4f9b9bb56c7dbebc34f", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "8b45945681e477c65d1cec5cf8cd73f06fedca4c", "daf0a5b16eb51ae418f18a6324970626a29dcc96", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "736f4a1c6eff51f8206530aca67baa95bafc5f1c", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "3e99a917b9a4e89497541bbc3bb72079054644c6", "cd69034e49d8821636c704c14fb6dfe5c78fc3ac", "074d096a54bf6bb33c59f628206848c7724a7cf3", "37c698dcc062596a85291c6138cae2787ef3ca20", "560d637d779eff2e68ae47bf15a7344006bca638", "741a04ef3a0c3953a3d37726bf4d6170eaa68a55", "aeed48f52b510985b75ef060213bfd6731cd081a", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "3d2dfe972be7a60937df97bd309b423726375cb4", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "25296a1ee792b8709e037aa3da7ea156c41f5ccf", "3a7417fd3711d6607b16b839fd768021eb09e629", "3af216f371069b57c0dca5448384d052fb490fb4", "8b3235bbd59d3b85081d9c22cf1af494e2d1159a", "4d4c9c4a8ea86c4c25a2f308962ba0231b33af02", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "2657302160775f8766964d013efe242836693f3e", "16653666b0005f91060a3e402566659749b84313", "0dd9623584f0d80071631b9bf899817df2db2e37", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "f5d58ccdd242c9a1fae93ab8a575cb3220cb89a0", "53f27709883b9aae98b4e960692cd2aadf852a12", "793f5e737284925a176f8ec82b3bb0d2178bb330", "2e663c1047ff14ddc2416229459922757a20edfb", "2fee80acb6f7b4172622e0f40d350339ca4e3dc9", "0653e2ed9f683868cb4539eb8718551242834f6b", "f8f52a402b8833ea1ad8eb34e48f011b25c0d306", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "94783d113951822195d4ba44599a8fcbdef9d4bf", "1bf91711b94e507c62d91c79e72efcee5d21f627", "9a22ebc2bd52d67fa8b90defb1200891935acfa1", "47b851237f240831abee3971bca6bb8d2a121eb1", "2092d64f8d99ab8cc5b353bbc3dddf4186bcb461", "36572d9cee0979e8787eee44cd077376b780473b", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "0204f40221260d00c5ee63646560a40dcd7d97d1", "318def48f4414636555d44f52da8c0bab16a46bc", "a2f99528a2dd954f38f6e0bd42b686c165f23403", "d7203f317b37d565ab54b6a48ef13ded3777eb78", "82128fe46d22cd8da345701eddebc9ee86c931ca", "0686be9822d3631843a21330aeaa98278c74e647" ], "paperAbstract": "Fault tolerance is one of the major design goals for HPC. The emergence of non-volatile memories (NVM) provides a solution to build fault tolerant HPC. Data in NVM-based main memory are not lost when the system crashes because of the non-volatility nature of NVM. However, because of volatile caches, data must be logged and explicitly flushed from caches into NVM to ensure consistence and correctness before crashes, which can cause large runtime overhead.In this paper, we introduce an algorithm-based method to establish crash consistence in NVM for HPC applications. We slightly extend application data structures or sparsely flush cache blocks, which introduce ignorable runtime overhead. Such extension or cache flushing allows us to use algorithm knowledge to reason data consistence or correct inconsistent data when the application crashes. We demonstrate the effectiveness of our method for three algorithms, including an iterative solver, dense matrix multiplication, and Monte-Carlo simulation. Based on comprehensive performance evaluation on a variety of test environments, we demonstrate that our approach has very small runtime overhead (at most 8.2% and less than 3% in most cases), much smaller than that of traditional checkpoint, while having the same or less recomputation cost.", "pdfUrls": [ "https://arxiv.org/pdf/1705.05541v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.61", "http://arxiv.org/abs/1705.05541" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/612cf39494d5ea3db60616ef50836746dd289674", "sources": [ "DBLP" ], "title": "Algorithm-Directed Crash Consistence in Non-volatile Memory for HPC", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "613cdadb56592f704349bb25a359ebecd8fd9e0f": { "authors": [ { "ids": [ "7456408" ], "name": "Supreeth Shastri" }, { "ids": [ "1697572" ], "name": "David E. Irwin" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Cloud computing", "Comparison shopping website", "Server (computing)" ], "id": "613cdadb56592f704349bb25a359ebecd8fd9e0f", "inCitations": [ "4ce8ad1513e84cb464efa68827119295530ebaa4", "2d40bd8a14b429142e487282761a2a6b95d6b96b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2862d11cc739a1c6baf6addb56502d869233c11c", "207b2b45ae08fe3171ae12e2def3f57dd316f062", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "12c28dd5ea0b2d0269a67a43c2eb0b1207b2b889", "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "05be0db01d70bcce9530b462ab2368f9e15127d9", "5c6086766fd09dda4de4e8a4fe4ac6bea42157c7", "c7992b82d1e16849727a7d0add13f43162595d20", "5edb4dd1952a63707f1ff73db5e507c21bb962f8", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "530b3179e8532e87520ccd0daebda3d81ef6319b", "12635bdd3bd32f09c85a9070977a281fcb32ff61" ], "paperAbstract": "Infrastructure-as-a-Service clouds are rapidly evolving into market-like environments that offer a wide range of server contracts. Amazon EC2\u2019s spot market is the clearest example of this trend: it operates over 5000 markets globally where users can rent servers for a variable price. To exploit spot instances, while mitigating the risk of price spikes and revocations, many researchers and startups have developed techniques for modeling and predicting prices to optimize spot server selection. However, prior approaches focus largely on predicting individual server prices, which is akin to predicting the price of a single stock. We argue that researchers should instead focus on \u201cindex-based\u201d modeling and prediction that aggregates prices from many markets in each region and availability zone. We show that, for applications flexible enough to select and \u201ctrade\u201d servers globally, making decisions based on broader indices lowers costs and improves availability compared to index-agnostic policies.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-shastri.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/shastri", "http://www.ecs.umass.edu/~irwin/trading.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/613c/dadb56592f704349bb25a359ebecd8fd9e0f.pdf", "s2Url": "https://semanticscholar.org/paper/613cdadb56592f704349bb25a359ebecd8fd9e0f", "sources": [ "DBLP" ], "title": "Towards Index-based Global Trading in Cloud Spot Markets", "venue": "HotCloud", "year": 2017 }, "613d57643da1286902a23ed72cce626eebe84275": { "authors": [ { "ids": [ "1885681" ], "name": "Reza Azimi" }, { "ids": [ "33683147" ], "name": "Tyler Fox" }, { "ids": [ "1721621" ], "name": "Sherief Reda" } ], "doi": "10.1109/CLUSTER.2017.86", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.86", "entities": [ "64-bit computing", "ARM architecture", "Branch predictor", "CPU cache", "Central processing unit", "Emergence", "General-purpose computing on graphics processing units", "Gigabit", "Network interface", "Roofline model", "Scalability", "Server (computing)", "System on a chip", "Tegra", "X86" ], "id": "613d57643da1286902a23ed72cce626eebe84275", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "333-343", "journalVolume": "", "outCitations": [ "b669cac8563aae45df3f7b12edf45811acd5a147", "21a0c328f428a1d4694246ed6c44ed472b74133a", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "48ebdcef1587a21bb9fbbd50a15ef9965abf0e81", "3b75d3cd12db703011f24639a3025adb2df5ccab", "4f86cc14eb05db64d8b037833c0b416ea1b138ee", "575fa8aff336047f81df1abf705e3e9ead655d7a", "36275d14731ab7ac192eb4af487f5d34958ad084", "a1b3331c56155d7920481b3f4f3dd30b012b7f84", "0b99d677883883584d9a328f6f2d54738363997a", "092217c2267f6e0673590aa151d811e579ff7760", "b04391910d19d2d0c64b62d300927f527417414e", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "bfbf817506fb00be3eab2a1dd372a2149012a987", "1831234db41daae6dfc7cc0efb2a74d93bf0ee62" ], "paperAbstract": "The last few years saw the emergence of 64-bit ARM SoCs targeted for mobile systems and servers. Mobile-class SoCs rely on the heterogeneous integration of a mix of CPU cores, GPGPU cores, and accelerators, whereas server-class SoCs instead rely on integrating a larger number of CPU cores with no GPGPU support and a number of network accelerators. Previous works, such as the Mont-Blanc project, built their prototype ARM cluster out of mobile-class SoCs and compared their work against x86 solutions. These works mainly focused on the CPU performance. In this paper, we propose a novel ARM-based cluster organization that exploits faster network connectivity and GPGPU acceleration to improve the performance and energy efficiency of the cluster. Our custom cluster, based on Nvidia Jetson TX1 boards, is equipped with 10Gb network interface cards and enables us to study the characteristics, scalability challenges, and programming models of GPGPU-accelerated workloads. We also develop an extension to the Roofline model to establish a visually intuitive performance model for the proposed cluster organization. We compare the GPGPU performance of our cluster with discrete GPGPUs. We demonstrate that our cluster improves both the performance and energy efficiency of workloads that scale well and can leverage the better CPU-GPGPU balance of our cluster. We contrast the CPU performance of our cluster with ARM-based servers that use many CPU cores. Our results show the poor performance of the branch predictor and L2 cache are the bottleneck of server-class ARM SoCs. Furthermore, we elucidate the impact of using 10Gb connectivity with mobile systems instead of traditional, 1Gb connectivity.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.86", "http://scale.engin.brown.edu/pubs/Cluster17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/613d57643da1286902a23ed72cce626eebe84275", "sources": [ "DBLP" ], "title": "Understanding the Role of GPGPU-Accelerated SoC-Based ARM Clusters", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "613dd12b854e70cfc3710c5d041c7e7e308b1257": { "authors": [ { "ids": [ "2705791" ], "name": "Abhinav Agrawal" }, { "ids": [ "3308405" ], "name": "Gabriel H. Loh" }, { "ids": [ "1694458" ], "name": "James Tuck" } ], "doi": "10.1145/3126908.3126918", "doiUrl": "https://doi.org/10.1145/3126908.3126918", "entities": [ "Application checkpointing", "Interrupt", "Speedup", "Web storage" ], "id": "613dd12b854e70cfc3710c5d041c7e7e308b1257", "inCitations": [], "journalName": "", "journalPages": "60:1-60:12", "journalVolume": "", "outCitations": [ "0f55217987ec25afa0f815e0aa3957e669b0280e", "60b38e8653374454ec44433900b44094080d0ca4", "0d6dffd099c8a3e8d4d5757ce138222837fe8a5a", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "0108a3544506cc114214b2e30cb3284d2ff8d035", "f6715c2d9d8a76a20f4b857f7377ce63a23f0654", "1ea92529e75fe90ee1923b95d0fa8ad37ac1ed7c", "3fa792fb63f453bd9d492f23ef7662aaaf6f7ca5", "10925faac07bbeeecf7d7d50f6df2d0143c2426f", "812dd3ed5f52e3029986413d0258517397c1a541", "3a10fad57f186e8da3b912ac96e8cfa853734417", "1eb9dc6955b0de81a078c9d6fa937c33f1f04545", "3037024ee9782764cfbe8e5c9c625e2edaaf83fd", "09aaf64113deb89b642fd53cba59dbab43f297de", "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "b6a75410d9aed0032486664f4afa7a8eaa4c4c70", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "896f6698a74e656174045dc20840dd7e925f18bd", "35ae271bcc515d61dc113c35f8d3dc0300f8faad", "33062d678a316287ae2fa61ff1226bb95a4a6b51", "0e5c646909bb762da0cd325e084655c12445578f", "06154716d6d51256ed2bb014ef65ec8b5d41aa26", "0fdeee8f12f2f1f01e06b3c0c57fe824ff516682", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "73b42183793a9143e882ee21847db445da1812f6", "8fd98b77e558d05ee73e3cff0839545927e536be", "34f310dffd51a8f1585b0a6a5ccaf83094d0d663", "2e663c1047ff14ddc2416229459922757a20edfb", "ee8deabdb4df98463bc984adf501f1e8a2228484", "4b6a45073130e8b95895a56ec73d64c70f27d97c", "cb85c0ea860c073bb655e1082c75b9f40b020124", "0374714312277344a281b046d2802c8ac81e4e30", "fc1a1650af9582d52a2d2dace31baac6427c10e1", "249107c2b695dbb2c429f261359bca11beb754f8", "b372f40bafc4a4a4c4dbedb7634e468a5028c97e", "80da10bcac7b831275980d1a5c39fd06811748fd", "601be998f87aff1012b12243dfde7624797936a2", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "0b5de3cf4ac3069dc9a7ee5b4d745e908c218536" ], "paperAbstract": "With the increasing size of HPC systems, the system mean time to interrupt will decrease. This requires checkpoints to be stored in a smaller time when using checkpoint/restart (C/R) for mitigation. Multilevel checkpointing improves C/R efficiency by saving most checkpoints to fast compute-node local storage. But it incurs a high cost for writing a few checkpoints to slow global-I/O. We show that leveraging NDP to offload writing of checkpoints to global-I/O improves C/R efficiency. We explore additional opportunities using NDP to further reduce C/R overhead and evaluate checkpoint compression using NDP as a starting point.\n We evaluate the performance of our novel application of NDP for C/R and compare it to existing C/R optimizations. Our evaluation for a projected exascale system using multilevel checkpointing shows that with NDP, the host processor is able to increase its efficiency on an average from 51% to 78% (i.e., a >50% speedup in performance).", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126918" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/613dd12b854e70cfc3710c5d041c7e7e308b1257", "sources": [ "DBLP" ], "title": "Leveraging near data processing for high-performance checkpoint/restart", "venue": "SC", "year": 2017 }, "6144837b78596253906461406df8ef2d3f612a0b": { "authors": [ { "ids": [ "40310370" ], "name": "Trevor Brown" } ], "doi": "10.1145/3087801.3087834", "doiUrl": "https://doi.org/10.1145/3087801.3087834", "entities": [ "(a,b)-tree", "B+ tree", "B-tree", "Concurrency (computer science)", "Data structure", "Experiment", "FLOPS", "Fast path", "HTML", "Non-blocking algorithm", "Overhead (computing)", "Search tree", "Transactional memory", "Vendor lock-in" ], "id": "6144837b78596253906461406df8ef2d3f612a0b", "inCitations": [ "877581fb962f543a03fc5ae83204c8bf06ea8b7a" ], "journalName": "", "journalPages": "293-302", "journalVolume": "", "outCitations": [ "00b3ebd315991e5b5f4e6beec2e1488281368028", "835c0aa9082e7fd7937c802f64e0393e267e6496", "01b029b617b6ac6dcdd3a0622f7ba459ff830f00", "dcfafc13f92e52bb693087db8cd8e0af0954f553", "51e8b3cfca99f4477f7267ea633adb72a442288a", "6479bc321dd7859eb6b6b8cca100bade86940526", "e52fe20fcee68879d78069c8688055f31f8f188e", "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "e02ec8cd92c8e687b9e343868b07e0898302c72f", "a16d51087e5505b296e2b15a4b5b6fddff194ebf", "3697307709b52af113469831457f3525d389c7cd", "942f2a6df29234c304b69129872835d60cf5e9e9", "0df83d1108c6af842908743473f4cb8c0af7a802", "46e61ad29ab20618fb551afbc00ebb8eb4e9be21", "2900690eb3132a4d1536226d629727de41f38a66", "ab310a105f6d5b04d798c4be0d6890ba385463c4", "cf04ca948a4f1932766613398c6c741c8c2758dc", "88f8ce2d785888c86db1ddcd97dcdc4ee948ab32", "3f505df2ea7730fd006987677467ee8c996620a0", "22839816fbd337d77b81a7f3c6430324e057c250", "8a8d3dcf409212fc53c09f74d28abb135acdbc13", "1372e033396fe1a5aa12a1b148c5015a2e09d1d6", "20ccc698045ec67de16517e3af799c9ca4b95596", "d3842f290898211d4f02d66877194876c9a0a3e4", "94cecbcd0530b40a6b2cc0b55c4725d123caa831", "17ccb526085ec88fa2d35d8c8d7dc246b9d1bbe3", "4f499f49640fb39d16b9c05f42503b5d6b0cb531", "bdf2591487539051926341ee52e8c62198a3633c", "01d6463bd6dc85f938edb6e75c96c997c8b06799" ], "paperAbstract": "Algorithms that use hardware transactional memory (HTM) must provide a software-only fallback path to guarantee progress. The design of the fallback path can have a profound impact on performance. If the fallback path is allowed to run concurrently with hardware transactions, then hardware transactions must be instrumented, adding significant overhead. Otherwise, hardware transactions must wait for any processes on the fallback path, causing concurrency bottlenecks, or move to the fallback path. We introduce an approach that combines the best of both worlds. The key idea is to use three execution paths: an HTM fast path, an HTM middle path, and a software fallback path, such that the middle path can run concurrently with each of the other two. The fast path and fallback path do not run concurrently, so the fast path incurs no instrumentation overhead. Furthermore, fast path transactions can move to the middle path instead of waiting or moving to the software path. We demonstrate our approach by producing an accelerated version of the tree update template of Brown et al., which can be used to implement fast lock-free data structures based on down-trees. We used the accelerated template to implement two lock-free trees: a binary search tree (BST), and an (a,b)-tree (a generalization of a B-tree). Experiments show that, with 72 concurrent processes, our accelerated (a,b)-tree performs between 4.0x and 4.2x as many operations per second as an implementation obtained using the original tree update template.", "pdfUrls": [ "https://arxiv.org/pdf/1708.04838v1.pdf", "http://arxiv.org/abs/1708.04838", "http://doi.acm.org/10.1145/3087801.3087834" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6144/837b78596253906461406df8ef2d3f612a0b.pdf", "s2Url": "https://semanticscholar.org/paper/6144837b78596253906461406df8ef2d3f612a0b", "sources": [ "DBLP" ], "title": "A Template for Implementing Fast Lock-free Trees Using HTM", "venue": "PODC", "year": 2017 }, "614bdb9fce7c3088050520fc769376722eebe8e2": { "authors": [ { "ids": [ "20165375" ], "name": "Pulkit A. Misra" }, { "ids": [ "1767703" ], "name": "Jeffrey S. Chase" }, { "ids": [ "1710965" ], "name": "Johannes Gehrke" }, { "ids": [ "1757774" ], "name": "Alvin R. Lebeck" } ], "doi": "10.1145/3037697.3037722", "doiUrl": "https://doi.org/10.1145/3037697.3037722", "entities": [ "Abstraction layer", "Application programming interface", "Benchmark (computing)", "Communications protocol", "Concurrency (computer science)", "Concurrency control", "Data center", "Foreach loop", "Key-value database", "Optimistic concurrency control", "Precision Time Protocol", "Read-only memory", "Snapshot (computer storage)", "Throughput" ], "id": "614bdb9fce7c3088050520fc769376722eebe8e2", "inCitations": [ "33d1a83b1f42b67d860d78550653fe5e1f754206" ], "journalName": "", "journalPages": "779-794", "journalVolume": "", "outCitations": [ "4af63ed343df388b6353b6fc77c7137d27822bf4", "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "9e538e389d7805d297f66be47cfae47796ef9123", "098d792d1783b5f6fc098203f71f21f5d053c653", "02b1103e592fa6bf0499e27f1519692441fad557", "27cb0c2229299a82cf767d19dcc68aa1e5f0f233", "0bba65fd5ac1db9a3293e9ebcfba092cf4ae58ee", "05961fc1d02ca30653dd0b4c906113db796df941", "29a1148d75878671dc3663bf480e33d7bd91597d", "13d6c568c770ff5a070072e720fb34b0037cdab8", "4827cc74dba0c39172554cf0116eb111797f0d1b", "062c47d2a3afa47f42c6d97d72990b53a48ee9c6", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "148ec401da7d5859a9488c0f9a34200de71cc824", "7e4ecfc13aba74db770378e640d5fbcce7fd3d2e", "0bddbe35fa6e3cf625d15553365a690d3a6bf7aa", "7fe1907e9ecfd87119ba51a035c18dded3a1575c", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "00ac447d02035c26c7e2852c2457fe812e89038f", "663e064469ad91e6bda345d216504b4c868f537b", "ab310a105f6d5b04d798c4be0d6890ba385463c4", "94f918afd161b8251ddee0f648f276b9a94c12c3", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "0e5c646909bb762da0cd325e084655c12445578f", "0139dceb6cef21b234e454d53154f30391495862", "38a9120f780602521af9744e31d80ef5cd9593a7", "25a4712c9a06860b4132fc94910f810e45893e85", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "daf0cd0076b388712ea12ec4105572997fc50cdf", "793f5e737284925a176f8ec82b3bb0d2178bb330", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "14da9e91b3f7a94e222d48ff72c71646c3c49046", "6816c447cc4d3d945e0452564ff5d3220e1fdcab", "1820a34042d6371a9e20484b0c63b698eb522a6c", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "206b20f225fc655dfac733b6f0bd8077ed86215e", "f57ac7f53438b2877022125bac957fda2bb2a97b", "49bb5298fb1c6b9bfa122b47ed8ebd7d418e6706", "668948133604d876ee6d2170aeab87fdf9b8e80a", "0903d6b3b5a26fea2cb7b4956f66365d71c78549", "4bbb4e2bed21980cfe9ca7a6e243737705b0fd20", "638c917d981915bc7a00bb0941cdd38111df51de", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "948c881ab7f1f62e9c940458e74c3e435320df72", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "8318fa48ed23f9e8b9909385d3560f029c623171", "2ad184901a2f3551df5d0406f244ae655ac8c4d2", "e706b8ae2952740cb95c0182c4c44b0d11cc54c1", "19ffc4f5129ed9d39f498f4eb901024c514263c7", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "400ae82ab2fc2c814033c65854229ecefbddbf67", "9748241beb02ef1e2d0e6dc877c04b354033a838", "3bfa50099269ef3ce832bc7f3710ba6484165092", "1220e4a011c46804d4369b5580dc7fb6e387af54", "131e1e1d163a0f49881d7b5ac092892093391015" ], "paperAbstract": "Distributed transactional storage is an important service in today's data centers. Achieving high performance without high complexity is often a challenge for these systems due to sophisticated consistency protocols and multiple layers of abstraction. In this paper we show how to combine two emerging technologies---Software-Defined Flash (SDF) and precise synchronized clocks---to improve performance and reduce complexity for transactional storage within the data center.\n We present a distributed transactional system (called MILANA) as a layer above a durable multi-version key-value store (called SEMEL) for read-heavy workloads within a data center. SEMEL exploits write behavior of SSDs to maintain a time-ordered sequence of versions for each key efficiently and durably. MILANA adds a variant of optimistic concurrency control above SEMEL's API to service read requests from a consistent snapshot and to enable clients to make fast local commit or abort decisions for read-only transactions.\n Experiments with the prototype reveal up to 43% lower transaction abort rates using IEEE Precision Time Protocol (PTP) vs. the standard Network Time Protocol (NTP). Under the Retwis benchmark, client-local validation of read-only transactions yields a 35% reduction in latency and 55% increase in transaction throughput.", "pdfUrls": [ "https://users.cs.duke.edu/~alvy/papers/milana_semel_asplos2017.pdf", "http://doi.acm.org/10.1145/3037697.3037722" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/614bdb9fce7c3088050520fc769376722eebe8e2", "sources": [ "DBLP" ], "title": "Enabling Lightweight Transactions with Precision Time", "venue": "ASPLOS", "year": 2017 }, "6156334bcd7eba6cf44c39f6842b546457d56b05": { "authors": [ { "ids": [ "37613610" ], "name": "Long Cheng" }, { "ids": [ "1738352" ], "name": "Ying Wang" }, { "ids": [ "1835531" ], "name": "Yulong Pei" }, { "ids": [ "1776848" ], "name": "Dick H. J. Epema" } ], "doi": "10.1109/ICPP.2017.48", "doiUrl": "https://doi.org/10.1109/ICPP.2017.48", "entities": [ "Big data", "Black box", "Central processing unit", "Distributed database", "Microsoft Customer Care Framework", "Network traffic control", "Program optimization", "Scheduling (computing)", "Simulation", "Speedup" ], "id": "6156334bcd7eba6cf44c39f6842b546457d56b05", "inCitations": [ "530ed0215e096de71110a804cc1d4223718b4cb4", "a12f946aff7eb27a8b7a3a86215e0517e02f1ac2" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "392-401", "journalVolume": "", "outCitations": [ "4145991f0918542649ed1d8063c75409671f0a0f", "3a91cf746eeade9718cd7bd8ba1f4b3a5ea59223", "231ba17921ebd80e95771e28dfb5082e169d5a53", "0a8b6d0f90c309e6f92c1fe3ad160786bf99a982", "7bfe8c3f13e5be1310f367cb83720ad335e65c99", "4c44cbcea788cc024b29ddf178249ee1c367464a", "1de15306de89ab834561ef1cd187ec607c2a1b9e", "62a68d15bbfef566170fc610183eb7ebf8313dce", "ce2444bfd9eeda0dc5ce1fb395f3ecbaff2a70d2", "1b311cad6ddaffd66dbb9c7637e9b45ed102c0dd", "2ac0f94e66210ab4266d40983e5b6bf160ed2bc0", "87be7206ec361544f5adc53d14142c74501c7149", "32823c0a07ce1ea3f41b297853d162286bf9c1cc", "06722b5bab0d26bccd3c8ec996cf2c0478b557be", "0cd69740a2f0f37b469243d66c39915cb4582097", "1a57b0c9d1b197b5c504a7acee793014d59d0c74", "fb84c6697150e4720c8920c2a215abfa784d3963", "a42c8f5971dea1aec01391ee80fe66721b010fd6", "232e9d2ac80f62500075aa687f55ce21da5f1c90", "2e0057911766d411b7a342c8bae2d6e3d29c47cd", "5ba9d399e35235f4e5f1d66c2481534763446ef7", "1cafaac11664e48bd121695ac1be06b0930d00a5", "66551f711a1f585d944ea3379a51a6ae4bf92a88", "e565993f00296cc66805dd0aea19927d3ed1914b", "4b2070cf8574fd93d69532f6329799dd0ba58eee", "5ac6ae1b1f37d87456843018563b981b32189e48", "058f6752d85a517aae298586fdf117acdd7560ea", "33771ec025f6a47465a2da33ade66110606b591d" ], "paperAbstract": "Efficient execution of distributed database operators such as joining and aggregating is critical for the performance of big data analytics. With the increase of the compute speedup of modern CPUs, reducing the network communication time of these operators in large systems is becoming increasingly important, and also challenging current techniques. Significant performance improvements have been achieved by using state-of-the-art methods, such as reducing network traffic designed in the data management domain, and data flow scheduling in the data communications domain. However, the proposed techniques in both fields just view each other as a black box, and performance gains from a co-optimization perspective have not yet been explored.In this paper, based on current research in coflow scheduling, we propose a novel Coflow-based Co-optimization Framework (CCF), which can co-optimize application-level data movement and network-level data communications for distributed operators, and consequently contribute to their performance in large distributed environments. We present the detailed design and implementation of CCF, and conduct an experimental evaluation of CCF using large-scale simulations on large data joins. Our results demonstrate that CCF can always perform faster than current approaches on network communications in large-scale distributed scenarios.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.48", "https://pure.tudelft.nl/portal/files/29616413/ICPP_Cheng_Epema_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6156334bcd7eba6cf44c39f6842b546457d56b05", "sources": [ "DBLP" ], "title": "A Coflow-Based Co-Optimization Framework for High-Performance Data Analytics", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "6163d31dd51332eec9056e1e88e2f1aa0df947d6": { "authors": [ { "ids": [ "37504857" ], "name": "Arjun Roy" }, { "ids": [ "2407352" ], "name": "Hongyi Zeng" }, { "ids": [ "2875087" ], "name": "Jasmeet Bagga" }, { "ids": [ "2199298" ], "name": "Alex C. Snoeren" } ], "doi": "", "doiUrl": "", "entities": [ "Data center", "Failure rate", "Fault detection and isolation", "Network switch", "Router (computing)", "System call" ], "id": "6163d31dd51332eec9056e1e88e2f1aa0df947d6", "inCitations": [ "0af828156d457bb1c1f4f7e16bf6c0e83d7a5d4d", "63efcd0695d3de798e2743739c8b6a32a568fb84", "df5a202e7e19eb48d59a86fcbdb4ce85629cc43d", "5eb4c0b0c9766ab86b54c01847fc9730fe4ab657", "5ae5348b9558729b98a70a7abed4145adeb45bbe", "602fe41b74da4d92051f63b5a95831b6ad2b5552", "066c934380feb46f38f8f78566f03eb7eb3a1e11" ], "journalName": "", "journalPages": "595-612", "journalVolume": "", "outCitations": [ "332f77fd05703c1607e3b57884ad31fb1fad0104", "00ddc85d502aa4bdc45a3b8b9099fad75938b50a", "5b999d36d5230eca01532b357c7cf338a5e0d641", "310e5383a34e95cc41299ecc6f6329511afc0986", "53abb9ca99f1c9e8038dcc0bfe4ccdf770a55db3", "0ec58ad7dffcc53018a786c069cb604ef1be5aae", "3b57dfb13c0993d573696d9e3696722d92721690", "73a6eb2ae5e9aa37babb95748c4d8ecee7efaf22", "9289860d43896b2d174a136eb56f03bb1b05e8d9", "2860d9a1daa69c65bca16c1aaaf7a98e1b2407c5", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "27ff8aa0929f4552ecb64e7200ae717e68446598", "38f0ec358c3f0952927370d314779a9ea7e0f34e", "42f6218131551632370e5e8f88370d04b220002a", "4e34c29eb03589bf17356c01e85f18a0720121cc", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "1e4da813c29a65f19f6e9432cb4efe8b7d45ac1d", "0f35b3fd2ef4638a23ee07db4057cc78365c982a", "46eba995c5371d7966d59549f61c203cecd1d3c7", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "0541d5338adc48276b3b8cd3a141d799e2d40150", "3386a3417920dd16efec5459b9b48930ece73dd8", "fa48ebc1b3462f133f977d0c2dc737655adea7a6", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "170d979cc755c3fc91dfe51eb07e5e0def9ad249", "2c5484ca39ca9c704512cb01a1196239a50b38db", "65da29a03c8905cbc0614612d1632864336c4786", "5b1ba2ca198353b83b50f210de131dc764d9f990" ], "paperAbstract": "Datacenters are characterized by their large scale,stringent reliability requirements, and significant appli-cation diversity. However, the realities of employinghardware with small but non-zero failure rates mean thatdatacenters are subject to significant numbers of failures,impacting the performance of the services that rely onthem. To make matters worse, these failures are not al-ways obvious; network switches and links can fail par-tially, dropping or delaying various subsets of packetswithout necessarily delivering a clear signal that they arefaulty. Thus, traditional fault detection techniques in-volving end-host or router-based statistics can fall shortin their ability to identify these errors. We describe how to expedite the process of detectingand localizing partial datacenter faults using an end-hostmethod generalizable to most datacenter applications. Inparticular, we correlate transport-layer flow metrics and network-I/O system call delay at end hosts with the paththat traffic takes through the datacenter and apply statis-tical analysis techniques to identify outliers and localizethe faulty link and/or switch(es). We evaluate our ap-proach in a production Facebook front-end datacenter.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/roy", "http://cseweb.ucsd.edu/~snoeren/papers/fault-nsdi17.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-roy.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_roy.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-roy.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8ccf/2885d3bb874b619ce19f9b2a302dae2a2f54.pdf", "s2Url": "https://semanticscholar.org/paper/6163d31dd51332eec9056e1e88e2f1aa0df947d6", "sources": [ "DBLP" ], "title": "Passive Realtime Datacenter Fault Detection and Localization", "venue": "NSDI", "year": 2017 }, "616f96617ff959737f000d13056f359cdfff3a15": { "authors": [ { "ids": [ "3452695" ], "name": "Giorgi Maisuradze" }, { "ids": [ "35642523" ], "name": "Michael Backes" }, { "ids": [ "1701081" ], "name": "Christian Rossow" } ], "doi": "", "doiUrl": "", "entities": [ "Blinded", "Blinding (cryptography)", "Compiler", "Encode (action)", "Executable", "Google Chrome", "JavaScript", "Just-in-time compilation", "Just-in-time-concept", "Proxy server", "Rewriting", "Shellcode", "dachshund superbreed of dog" ], "id": "616f96617ff959737f000d13056f359cdfff3a15", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "116eaac2e498bc2c9bea10ea838309dcf143d764", "23654f2804f9158387eac4743aa18c88fa32a5a7", "ba6ef85a27aaf47dcbedd1d3b21d2027ebeeb20c", "1e37625f382709b06f72e5c3c41aba1328ff66dc", "3fa27974cade47e98993b98798f73594b902583b", "f479c0578156255ce176e75bb13051fbb0f25b98", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "351c6d9b736d8e6b776efdd449776b7c340cd8d7", "65950dfc50eb482d9df1ae11050a9f76fcddbc61", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "3738a8045c001c8ffd245e72b0d68382fba27a48", "0988a425689f6f3700e797f4a2c18f73692573c3", "1bb2363ddfec8e12f5408ce6b1538d74570bd865", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "03f827395a17beb941241dbd72322705bdf79791", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "3875d1d1b623af0d640528efc9e581bc91338e35", "26de23713ac23ed7a952cf56faa8bd23f8fd6575", "f0ac31c2248ef8eb597448395da6f79227ffe916", "67b752aaef2133ec0cda47b2a2c1856f0f2f266f", "569393ee0bbba78af3241e544c347b2e98a1275d", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "56c111a4ad4ed01804faaefa22dc97dbea797f77", "660ad810c69affa189f567e76ff83af682228703", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "4cd63e0701177f04e377fa9f0857c5b0fa10b07e", "2bec1bda24329497de245e83c3684da271a2a745", "08c3e50a2913da51ed3cdafdcfdfb488e8fa83c3", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "23e8236644775fd5d8ff5536ba06b960e19f904b", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c" ], "paperAbstract": "Modern browsers such as Chrome and Edge deploy constant blinding to remove attacker-controlled constants from the JIT-compiled code. Without such a defense, attackers can encode arbitrary shellcode in constants that get compiled to executable code. In this paper, we review the security and completeness of current constant blinding implementations. We develop DACHSHUND, a fuzzing-driven framework to find userspecified constants in JIT-compiled code. DACHSHUND reveals several cases in which JIT compilers of modern browsers fail to blind constants, ranging from constants passed as function parameters to blinded constants that second-stage code optimizers revert to a non-protected form. To tackle this problem, we then propose a JavaScript rewriting mechanism that removes all constants from JavaScript code. We prototype this crossbrowser methodology as part of a Web proxy and show that it can successfully remove all constants from JavaScript code.", "pdfUrls": [ "https://www.internetsociety.org/sites/default/files/ndss2017_05B-1_Maisuradze_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/616f/96617ff959737f000d13056f359cdfff3a15.pdf", "s2Url": "https://semanticscholar.org/paper/616f96617ff959737f000d13056f359cdfff3a15", "sources": [], "title": "Dachshund: Digging for and Securing Against (Non-)Blinded Constants in JIT Code", "venue": "", "year": 2016 }, "617da85c485487e48624c6beae735a5dee5a01ca": { "authors": [ { "ids": [ "19185676" ], "name": "Seulbae Kim" }, { "ids": [ "19290522" ], "name": "Seunghoon Woo" }, { "ids": [ "1678348" ], "name": "Heejo Lee" }, { "ids": [ "39476651" ], "name": "Hakjoo Oh" } ], "doi": "10.1109/SP.2017.62", "doiUrl": "https://doi.org/10.1109/SP.2017.62", "entities": [ "Duplicate code", "Ecosystem", "Function-level programming", "Open Sound System", "Open-source software", "Preprocessor", "Scalability", "Software bug", "Software developer", "Software development", "Software system", "Source lines of code", "Ubuntu", "Vulnerability (computing)" ], "id": "617da85c485487e48624c6beae735a5dee5a01ca", "inCitations": [ "aa90b32e33b283a30a86c65996127beef213dbbb", "747b50b767892aadcb77fbe1e47ef83caa9f55d8", "5ad3878ee97398d94e545d329bebe5f99e181e3a", "fcd6b9b604f9faa5fb9a585e2a2dde5733a7ef01" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "595-614", "journalVolume": "", "outCitations": [ "ee2570338f1c00097be793093268336d5ea4c0e7", "4034c2c340dc8cb4548608ed623f6c51894f4f5d", "0fe8165899f6933eb3f8cce2078811e43cd1be78", "b62341942401854a54dad1e2c2273d216092d9be", "08eb5611e24c911b7ee51ea7f600a563867d8b7e", "98e810ed098a651e0ba8cbb63d2d926d4eebdf9b", "479b770d93411cbaaa3888cd77e09e71d8c211f2", "00a513fa28207915c3c9224cf80aae080c04c58c", "fb35f65339c4f1bbe9c0bd25f09f83e65e436d51", "7923fa609d3c1f753d4d99086bfadc19f39bd077", "1208fdc24aec2fa74ca7cac91d2aaca9d11d0432", "833e757b29df5a9e760c02d2b3b67b74b397200a", "9f0c016bb12e1567a1d3a460493957ae135a0d40", "23c63a0b251eb3e11b1f4c2c6733261a2f765f54", "ea0d36e8decd5d5b6918cd1692844dedaec823ba", "5bd89dff6f2ade798098987ed2ec648c448ccb22", "1e73c2fa2709d3210c09f19933e99b71905364ab", "0ec830d516363f1917056d699aab6f07af18a053", "11b111cbe79e5733fea28e4b9ff99fe7b4a4585c", "0fb58d6facd50415077ea8a04e1f1ddeddb96c52", "0fdcab45a19b35b0f0f547656a4851317e9f9666", "34a720114bdc38720c9660b0ce2a372e7bfbe59f", "2afb54393570efde9c402c2c41ac0f5495fb7622", "0d7dea74b7344dcd4de965e9535e5cfa8630c94c", "c2ecbb65253048c69a55a0a6929606280a2e0dc7", "79f40de182b73cde669cf2d5fb491fbfff3703bc", "4ad69f733fb41cca898909e427224b65e53a2082", "ab6a8662f342272df53e62a4550075ff64b3c421", "a369ec7cc8a6d84bbae2b1f0fa073a06e7d32ac0", "89c5ea8aa0f208a12dba3a0b4ceb8873ad45e0f3", "0e107312952dc5bc947e15f7046b4a0d935d6385", "1641e3de69af316ad202c74d52ecc6b55537d550", "9bd6a581b1c6b2c306c2184e7ec9b80ce38de5ef", "6b6b1a307c0507e52c2591eb430e1c33a9a23125", "524728ed39af91a158fa04821501efccfeb4bf83", "42227c57c221d655b67a95afd2f62477e08340d9" ], "paperAbstract": "The ecosystem of open source software (OSS) has been growing considerably in size. In addition, code clones - code fragments that are copied and pasted within or between software systems - are also proliferating. Although code cloning may expedite the process of software development, it often critically affects the security of software because vulnerabilities and bugs can easily be propagated through code clones. These vulnerable code clones are increasing in conjunction with the growth of OSS, potentially contaminating many systems. Although researchers have attempted to detect code clones for decades, most of these attempts fail to scale to the size of the ever-growing OSS code base. The lack of scalability prevents software developers from readily managing code clones and associated vulnerabilities. Moreover, most existing clone detection techniques focus overly on merely detecting clones and this impairs their ability to accurately find "vulnerable" clones. In this paper, we propose VUDDY, an approach for the scalable detection of vulnerable code clones, which is capable of detecting security vulnerabilities in large software programs efficiently and accurately. Its extreme scalability is achieved by leveraging function-level granularity and a length-filtering technique that reduces the number of signature comparisons. This efficient design enables VUDDY to preprocess a billion lines of code in 14 hour and 17 minutes, after which it requires a few seconds to identify code clones. In addition, we designed a security-aware abstraction technique that renders VUDDY resilient to common modifications in cloned code, while preserving the vulnerable conditions even after the abstraction is applied. This extends the scope of VUDDY to identifying variants of known vulnerabilities, with high accuracy. In this study, we describe its principles and evaluate its efficacy and effectiveness by comparing it with existing mechanisms and presenting the vulnerabilities it detected. VUDDY outperformed four state-of-the-art code clone detection techniques in terms of both scalability and accuracy, and proved its effectiveness by detecting zero-day vulnerabilities in widely used software systems, such as Apache HTTPD and Ubuntu OS Distribution.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/617da85c485487e48624c6beae735a5dee5a01ca", "sources": [ "DBLP" ], "title": "VUDDY: A Scalable Approach for Vulnerable Code Clone Discovery", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "62009a8b6d18dd30de1fa6e52a7a018e5a18220e": { "authors": [ { "ids": [ "2793216" ], "name": "Kyungyong Lee" }, { "ids": [ "25132893" ], "name": "Myungjun Son" } ], "doi": "10.1109/CLOUD.2017.21", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.21", "entities": [ "Amazon Web Services", "Application checkpointing", "Cloud computing", "Cost efficiency", "Deep learning", "Experiment", "Fault tolerance", "Graphics processing unit", "Heuristic", "Parallel computing", "Scheduling (computing)", "Simulation" ], "id": "62009a8b6d18dd30de1fa6e52a7a018e5a18220e", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "98-105", "journalVolume": "", "outCitations": [ "5f6904f96c018a10434a7ecd45777aa2eae9a868", "0287a0c19b29b2497fd860b568dbb89cdf1a4813", "4e44046bfb459c5f627ef141786773e2c4591de4", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "7e74ea151efcdcfecffdbeaec0728f9ac1f80389", "90efc90cfeab53f1bc7495609771e91671560489", "05be0db01d70bcce9530b462ab2368f9e15127d9", "3bc68ebdfc30f1e5df9a80b48bdfde1e20e0ccbe", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "280863f80b6401bc6d65839ecb3dc7a0febdfa09", "0558c94a094158ecd64f0d5014d3d9668054fb97", "bea5780d621e669e8069f05d0f2fc0db9df4b50f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4f86fa28602d9503a8575c5b31082284abc8415c", "4954fa180728932959997a4768411ff9136aac81", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "70e38d47b83261e257bae61dc39ffbf391b30591" ], "paperAbstract": "Cloud computing resources that are equipped with GPU devices are widely used for applications that require extensive parallelism, such as deep learning. When the demand of cloud computing instance is low, the surplus of resources is provided at a lower price in the form of spot instance by AWS EC2. This paper proposes DeepSpotCloud that utilizes GPU-equipped spot instances to run deep learning tasks in a cost efficient and fault-tolerant way. Thorough analysis about spot instance price history logs reveals that GPU spot instances show more dynamic price change pattern than other general types of cloud computing resources. To deal with the price dynamicity of the GPU spot instance, DeepSpotCloud utilizes instances in different regions across continents as a single resource pool. This paper also proposes a task migration heuristic by utilizing a checkpointing mechanism of existing deep learning analysis platform to conduct fast task migration when a running spot instance is interrupted. Extensive experiments using real AWS services prove that the proposed task migration method is effective even in a WAN environment with limited network bandwidth. Comprehensive simulations by replaying AWS EC2 price history logs reveal that DeepSpotCloud can achieve 13% more cost gain than a state-of-the-art interrupt-driven scheduling policy. The prototype of DeepSpotCloud is implemented using various cloud computing services provided by AWS to serve real deep learning tasks.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/62009a8b6d18dd30de1fa6e52a7a018e5a18220e", "sources": [ "DBLP" ], "title": "DeepSpotCloud: Leveraging Cross-Region GPU Spot Instances for Deep Learning", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "620eac63b314969fa88a0e3be1d3d682e5266f3d": { "authors": [ { "ids": [ "3239866" ], "name": "Yuanwei Fang" }, { "ids": [ "2542407" ], "name": "Chen Zou" }, { "ids": [ "1787375" ], "name": "Aaron J. Elmore" }, { "ids": [ "1695232" ], "name": "Andrew A. Chien" } ], "doi": "10.1145/3123939.3123983", "doiUrl": "https://doi.org/10.1145/3123939.3123983", "entities": [ "ASIC", "Ampersand", "Big data", "CMOS", "Central processing unit", "Data rate units", "Dynamic dispatch", "Intrusion detection system", "Memory address", "Performance per watt", "Stream processing" ], "id": "620eac63b314969fa88a0e3be1d3d682e5266f3d", "inCitations": [], "journalName": "", "journalPages": "55-68", "journalVolume": "", "outCitations": [ "cc05fdb70b630138dd9b64a901eec9c36146c371", "8d71fb5efe95801b31d65366ff1ce8c01525e493", "25977aeebe5714a9e727218a0c71d05144cb8eba", "7dcd51ba5f4889ea76674caafef60d53482d7d83", "60cfe41fd68644fb19cba99babae694a2acacc17", "15c7d3d5cfce46110a5aa5c6a482e359a96082b4", "330b46ce848047b13fadc7a63c01abfe02fd4d8b", "7f80ae3a81d063083b049b91cd0299f09bbb4696", "4c0872db1fab8c8b227f0e48e47ed1f6a68f643d", "31eee4a393c60963233cba078dc3b0109d5c3e54", "164bb40bac988ed0b90fe44366cd98c307e57b4b", "20ee61caa108938c2252dbefafb926f3d481465b", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "e65b019093e69f53cf6bc985d3dfe5e5bda0c0f9", "0c6f81e60514edbc6a936a5f8593838f14658653", "686f66945a214d0da8778e64ceee0ed15e9fd73d", "d4934496f3e55768220f6ec03a60f242551b04e5", "4b96ca859e49ea7d63531b8dc3cce87b137d9abe", "1ec31b63bde561177db851683d19592f3192d476", "f6c62e96e2cec8ea1f73047d4692aafd73dd9dc5", "01a9d45797ad47d1aa36736ce2e5cd60edb2ff24", "03363ed04e9d4d2e8c9348551815e80615969611", "ac99d4048f41c5336b2cb5ac0f6dd060aaa12da4", "252e583d2a412eb3f1a38d81ba77d8d0308dc8e5", "3b6711bd158a375267999ac095b8c1a76d9dc464", "0f35cad96cebf9590c168caf4baa2103af38934d", "a1b067fc941d6727169ec18a882080fa1f074595", "4fe1c15cfd5044e2627faf418dd77462bf48195c", "43dfb8212d7eb7660a81ce66de2405f1acee4638", "757c2dcf92cfeea24ba4360479f58c640a1650ea", "207def18c67fa8024741b7ae3cdc655b57f2053f", "726e306fc3a331c67a521274e795dfe8ebeea614", "b9901eee13a35c612a50cddc3f6e3689fa0d4f3f", "9a4c6f0862a650cabf3007e34f1f17fabbcb1931", "07f62af22fab75b1b8dcc7a5ef45923322e50b57", "ae2bc1599510755ee93eac29b7dc2c66c8bb19ad", "7564661f026abd1d472707c15357494fd79e63c0", "4aa35c9d2240cfe22187617dd2c63e9a5c90958d", "55180f27718451f931bb2768446fe26ccb01ba68", "4f232b9b3834b5671be492b12371fb145de3c7c4", "43057b555c0e9763783d8f24aa286d5d38e2c878", "18a5f443299784479e78d9e77f175af57cb2fa2b", "ceb90588b49f4af3b06af7edd7b81540ffd2ca99", "2a4e3e2c6b3c2d242f5b632657a17bdfade6169a", "80527e7595530951081494d1b98f3f13da3033a2" ], "paperAbstract": "Big data analytic applications give rise to large-scale extract-transform-load (ETL) as a fundamental step to transform new data into a native representation. ETL workloads pose significant performance challenges on conventional architectures, so we propose the design of the unstructured data processor (UDP), a software programmable accelerator that includes multi-way dispatch, variable-size symbol support, Flexible-source dispatch (stream buffer and scalar registers), and memory addressing to accelerate ETL kernels both for current and novel future encoding and compression. Specifically, UDP excels at branch-intensive and symbol and pattern-oriented workloads, and can offload them from CPUs.\n To evaluate UDP, we use a broad set of data processing workloads inspired by ETL, but broad enough to also apply to query execution, stream processing, and intrusion detection/monitoring. A single UDP accelerates these data processing tasks 20-fold (geometric mean, largest increase from 0.4 GB/s to 40 GB/s) and performance per watt by a geomean of 1,900-fold. UDP ASIC implementation in 28nm CMOS shows UDP logic area of 3.82mm2 (8.69mm2 with 1MB local memory), and logic power of 0.149W (0.864W with 1MB local memory); both much smaller than a single core.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123983" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/620eac63b314969fa88a0e3be1d3d682e5266f3d", "sources": [ "DBLP" ], "title": "UDP: a programmable accelerator for extract-transform-load workloads and more", "venue": "MICRO", "year": 2017 }, "622f4d1463868b59b547363d13d96f953527ae14": { "authors": [ { "ids": [ "1808863" ], "name": "Vladimir Braverman" }, { "ids": [ "2225550" ], "name": "Stephen R. Chestnut" }, { "ids": [ "2696609" ], "name": "Nikita Ivkin" }, { "ids": [ "1757306" ], "name": "Jelani Nelson" }, { "ids": [ "2574148" ], "name": "Zhengyu Wang" }, { "ids": [ "1727403" ], "name": "David P. Woodruff" } ], "doi": "10.1145/3034786.3034798", "doiUrl": "https://doi.org/10.1145/3034786.3034798", "entities": [ "Algorithm", "Euler\u2013Bernoulli beam theory", "Insertion sort", "PSPACE", "Polynomial" ], "id": "622f4d1463868b59b547363d13d96f953527ae14", "inCitations": [ "34ce9b986aa9ce54d569f70907566784d3cac9d5", "7aa3242bef39663da7bf50730c41288e5d3c7d53", "76fb5deb426d037cd259ee5839b6629203b406f6", "00a4e6d53787a6609485b336a0c537cd693e04b4", "5e07a43f5fc03afa1cad25e66f203e65cfe9710a", "8f24d49bb72fa6ba16c6c3bb7cd2931dfb1fc037", "586e52c505910fc585174a57de4bdb50ab95b8bd", "b36cce14d5581394e95bbc0f344a96535506117b" ], "journalName": "", "journalPages": "361-376", "journalVolume": "", "outCitations": [ "1a9e5b6b01e94f5a858d7d1c6042522a683a0a5a", "43d6a1c423e81209851896f9bc1f48d2007f17b2", "19484c258f39e4d2b4b9734556294e46e3ca3ea4", "e4c5a8575a2576c4b9a6df65af6b7d5e657373ac", "5944eecf543d048b3a0359400bc49ea4f461835f", "4465762fac009c8620e5d2ad67e8ffab4b7dc2f5", "39034ba94cc6ad056fb007f5871212ce8a8553c5", "79c9c7645b8936268abf1f9928ec9174554fda98", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "4b0d99ad8f7f62b5df8b438722a9a4cb38de3677", "34ce9b986aa9ce54d569f70907566784d3cac9d5", "ab521e91a2bce4fd13326cba9a765b479feefd61", "54be8183b7af3e9a46f15a2d06be852e723e27bf", "6d1a7ee3b7f7953165cdbe378fd85fe6392de4f9", "c84a26f98dd4ced2f937c818245b822f59666f6c", "7a278ee0578f194700cadc3811cdda4ec751f88a", "40952ef7fe2d22daec75a6ab7e0fe030ce447e0a", "59884ad55dab150122111636ae9d76e63a483935", "1df921f9b87ff722306894fe72a282b183adee7c", "028e534092e48aa2435884f0bdbb5d01b46c7821", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "174177d1631fa92a746d514ba0210382d231e583", "5683e71711e08bfdc6d8c3bd356a5b69f4fd517e", "68805437a3d48d650ba37694c664004ae6b78076", "a1b9f637796f7366669f3c68dc7459596d1f7fad" ], "paperAbstract": "The task of finding heavy hitters is one of the best known and well studied problems in the area of data streams. One is given a list i1,i2,...,im∈[n] and the goal is to identify the items among [n] that appear frequently in the list. In sub-polynomial space, the strongest guarantee available is the l2 guarantee, which requires finding all items that occur at least ε||ƒ||2 times in the stream, where the vector ƒ∈Rn is the count histogram of the stream with ith coordinate equal to the number of times i appears ƒi:=#{jε[m]:ij=i. The first algorithm to achieve the l2 guarantee was the CountSketch of [11], which requires O-2log n) words of memory and O(log n) update time and is known to be space-optimal if the stream allows for deletions. The recent work of [7] gave an improved algorithm for insertion-only streams, using only O-2logε-1log log n) words of memory. In this work, we give an algorithm BPTree for l2 heavy hitters in insertion-only streams that achieves O-2logε-1) words of memory and O(logε-1) update time, which is the optimal dependence on n and m. In addition, we describe an algorithm for tracking ||ƒ||2 at all times with O-2) memory and update time. Our analyses rely on bounding the expected supremum of a Bernoulli process involving Rademachers with limited independence, which we accomplish via a Dudley-like chaining argument that may have applications elsewhere.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034798", "http://people.seas.harvard.edu/~minilek/papers/bptreev2.pdf", "https://dash.harvard.edu/bitstream/handle/1/34744122/99166508.pdf?sequence=1", "http://arxiv.org/abs/1603.00759" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/622f4d1463868b59b547363d13d96f953527ae14", "sources": [ "DBLP" ], "title": "BPTree: An \u21132 Heavy Hitters Algorithm Using Constant Memory", "venue": "PODS", "year": 2017 }, "625922f62451f1e3b556e6aceca588946880427a": { "authors": [ { "ids": [ "1757205" ], "name": "Vasileios Giotsas" }, { "ids": [ "1891855" ], "name": "Philipp Richter" }, { "ids": [ "2088273" ], "name": "Georgios Smaragdakis" }, { "ids": [ "1782612" ], "name": "Anja Feldmann" }, { "ids": [ "2897689" ], "name": "Christoph Dietzel" }, { "ids": [ "1728325" ], "name": "Arthur W. Berger" } ], "doi": "10.1145/3131365.3131379", "doiUrl": "https://doi.org/10.1145/3131365.3131379", "entities": [ "Black hole (networking)", "Border Gateway Protocol", "DDoS mitigation", "Denial-of-service attack", "Forwarding plane", "Internet", "Internet access", "Reachability" ], "id": "625922f62451f1e3b556e6aceca588946880427a", "inCitations": [ "04e6c74b9a8208351285e3e534a4532fe240e9ac" ], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "0f8ccb1bdba17f8c0843be0eb5fda2e8e2e32e95", "0a7151c200bf97973453ec05a28012cf03cf906b", "1ce45640c601a1b5426f6c5c5ef086374ebebe09", "033fb48ba30c7f40073a7c1c27f3baba1bec218f", "54df502bfd6aac0c13ae111ca46ddc36d645bdfb", "0387c89a21e113eb69fcde8a11c82a072e3a1af1", "0a03b67644a6411ab7ec73551aa27060b8e4ab1d", "265e9a6d2770743fd72bec67cb7884522e6817cb", "0f7a0c691816725080bf508d2830c2b1a074e291", "29c1d53ac3861e1b95da32349c756b349f586ea2", "09b4dae698495e8229171a64cd78b23f106de653", "64934da21ceddf2fe09da84a7fe200ef1abd59b2", "0ecec13abda69472bdee76f60ab3d97601661af7", "7b102e4c57feaa1cb802b58f0f9ff2c934a82db7", "3e52e4d429e0d676f7cb4c5431cef915557062ab", "674087f4cc6f6847f2c9b823faf53f627fb43fde", "41f6d924dd684966a9f6bc25e2266ce08cfdf4f4", "191cef5d1d84b81c8eb77119c7029fa74d23d9bc", "eab0d3bdcb21364883bb55e295e55ab479a90181", "0fecce8b0cc8b52b69167d71921c02a003916e63", "7b5f7276bebae1e1922ad510fccca0b3e7f66122", "1bec0ad7d9c3d62b70261d8358fa85df833c5724", "4e33e9d1aa91aa78819ec9700d1024e0f0cdef6c", "a941c201b62218f30551a04acc130224c54572db", "6ed50af8cde99d0e797173d9fafb97cab4a5fd08", "02df3d50dbd1d15c38db62ff58a5601ebf815d59", "5880497106a0e3b4a16fe5c0026673f8daade248", "66e35b497d679dddefccc50005a5fd6986053667", "89e9e46ec784695d40a1f180679b743a2e5656f8", "40f137ea7a002685fb9cbf0fb04086e96904619c", "50957ff587eeeae3e1f618bd0f9cf4c2924bfd85", "67eb93bb872a06ba9796d1b54e14f14e8d63e5ba", "1d204a744f642af435faefb94897fe53a0ec96b0", "7a0e7065d521e31e74fc367597db41b62b19a789", "1d65193c80e49f6ea2ecc0eb8c331965f328df51", "20f38a5d49473d999e3bafd25c9808c3f564154b", "3a1625fd7789714a6a5e2e01b3122a9621b33d27", "1bef4d26c917f0060814e86cffa9b22bea70a847", "00fd3220a51630625be397c9114b71fda62ba3d9", "4cead48e2eac91560105871b78268e3164eb382b", "23fcc8961d750d70b2735c9c810f53821a97fe93" ], "paperAbstract": "The Border Gateway Protocol (BGP) has been used for decades as the de facto protocol to exchange reachability information among networks in the Internet. However, little is known about how this protocol is used to restrict reachability to selected destinations, e.g., that are under attack. While such a feature, BGP blackholing, has been available for some time, we lack a systematic study of its Internet-wide adoption, practices, and network efficacy, as well as the profile of blackholed destinations.\n In this paper, we develop and evaluate a methodology to automatically detect BGP blackholing activity in the wild. We apply our method to both public and private BGP datasets. We find that hundreds of networks, including large transit providers, as well as about 50 Internet exchange points (IXPs) offer blackholing service to their customers, peers, and members. Between 2014--2017, the number of blackholed prefixes increased by a factor of 6, peaking at 5K concurrently blackholed prefixes by up to 400 Autonomous Systems. We assess the effect of blackholing on the data plane using both targeted active measurements as well as passive datasets, finding that blackholing is indeed highly effective in dropping traffic before it reaches its destination, though it also discards legitimate traffic. We augment our findings with an analysis of the target IP addresses of blackholing. Our tools and insights are relevant for operators considering offering or using BGP blackholing services as well as for researchers studying DDoS mitigation in the Internet.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final90.pdf", "http://people.csail.mit.edu/gsmaragd/publications/IMC2017/IMC2017.pdf", "http://doi.acm.org/10.1145/3131365.3131379", "http://people.csail.mit.edu/awberger/papers/Inferring_BGP_Blackholing_Activity_in_the_Internet.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/625922f62451f1e3b556e6aceca588946880427a", "sources": [ "DBLP" ], "title": "Inferring BGP blackholing activity in the internet", "venue": "IMC", "year": 2017 }, "625b567202a6bef1d15c15d95c4d1cf743fc34c5": { "authors": [ { "ids": [ "2300090" ], "name": "Amrita Mathuriya" }, { "ids": [ "37082461" ], "name": "Ye Luo" }, { "ids": [ "30761222" ], "name": "Raymond C. Clay" }, { "ids": [ "3444715" ], "name": "Anouar Benali" }, { "ids": [ "2692314" ], "name": "Luke Shulenburger" }, { "ids": [ "2718090" ], "name": "Jeongnim Kim" } ], "doi": "10.1145/3126908.3126952", "doiUrl": "https://doi.org/10.1145/3126908.3126952", "entities": [ "Automatic vectorization", "Blue Gene", "Central processing unit", "Compiler", "Monte Carlo", "Monte Carlo method", "On the fly", "Quantum Monte Carlo", "Simulation", "Single-precision floating-point format", "Speedup", "Supercomputer" ], "id": "625b567202a6bef1d15c15d95c4d1cf743fc34c5", "inCitations": [], "journalName": "", "journalPages": "38:1-38:12", "journalVolume": "", "outCitations": [ "6d38d21cda9c38ae0e0277b9f3ea20f0e731ec17", "1c1397ed846bd22c8d95040e5211b0bcac7f9e7b", "4f805391383b20dbc9992796d515029884ba468b", "8b3059c52fe000b567348107381a9f31f49eaed8", "3fef0fa2f218605a6761462e0fb0c57643ba7508", "f84210fdc29c2c535b9e294b037a27d5d29425bc", "c696d0dabdb18d28df0d4c23fed3733622f3ad67", "11e2465af4179b8945ae68bcd42802f8ce27bc45", "3245519444fd3f706bb133f4cf01b093a0816ba5", "092217c2267f6e0673590aa151d811e579ff7760", "ac658261691d7d76f9763ab43e0fcedc499898d0", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "2d71b38bd26f6c58155a9b85d9c0fe7e4f09d942" ], "paperAbstract": "QMCPACK has enabled cutting-edge materials research on supercomputers for over a decade. It scales nearly ideally but has low single-node efficiency due to the physics-based abstractions using array-of-structures objects, causing inefficient vectorization. We present a systematic approach to transform QMCPACK to better exploit the new hardware features of modern CPUs in portable and maintainable ways. We develop miniapps for fast prototyping and optimizations. We implement new containers in structure-of-arrays data layout to facilitate vectorizations by the compilers. Further speedup and smaller memory-footprints are obtained by computing data on the fly with the vectorized routines and expanding single-precision use. All these are seamlessly incorporated in production QMCPACK. We demonstrate upto 4.5x speedups on recent Intel® processors and IBM Blue Gene/Q for representative workloads. Energy consumption is reduced significantly commensurate to the speedup factor. Memory-footprints are reduced by up-to 3.8x, opening the possibility to solve much larger problems of future.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126952", "http://arxiv.org/abs/1708.02645", "https://arxiv.org/pdf/1708.02645v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/625b567202a6bef1d15c15d95c4d1cf743fc34c5", "sources": [ "DBLP" ], "title": "Embracing a new era of highly efficient and productive quantum Monte Carlo simulations", "venue": "SC", "year": 2017 }, "628bd6edede6a6604ea3152317c548d6bb5f7a4f": { "authors": [ { "ids": [ "8075036" ], "name": "Riad Akram" }, { "ids": [ "2581019" ], "name": "Abdullah Muzahid" } ], "doi": "10.1109/IISWC.2017.8167765", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167765", "entities": [ "Approximate computing", "Approximation algorithm", "Cognitive dimensions of notations", "Error-tolerant design", "Experiment", "Monte Carlo", "Monte Carlo method", "Programmer", "Scalability", "Traction TeamPage" ], "id": "628bd6edede6a6604ea3152317c548d6bb5f7a4f", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "116-117", "journalVolume": "", "outCitations": [ "1c7deea413bcc6bb509aeb5e0e64006834850b21", "219b47356dcd3c02a04837be9be7ae072153a9d1", "15b275f0421c606f5903532e9964b140cbb2f878", "d777b4177034dec34616bc42293978af995b84a1" ], "paperAbstract": "Approximate computing is getting a lot of traction especially for its potential in improving power, performance, and scalability of a computing system. However, prior work heavily relies upon a programmer to identify code sections where various approximation techniques can be applied. Such an approach is error prone and cannot scale well beyond small applications. In this paper, we contribute with a tool, called Approximeter, to automatically identify and quantify code sections where approximation can be used and to what extant. The tool works by first identifying potential approximable functions and then, injecting errors at appropriate locations. The tool runs Monte Carlo experiments to quantify statistical relation between injected error and corresponding output accuracy. The tool also provides a rough estimate of potential performance gain from approximating a certain function. Finally, it ranks the approximable functions based on their error tolerance and performance gain.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167765" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/628bd6edede6a6604ea3152317c548d6bb5f7a4f", "sources": [ "DBLP" ], "title": "Approximeter: Automatically finding and quantifying code sections for approximation", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "62c0af943a259c66b91dc932d3a5611afd014a4c": { "authors": [ { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "2266189" ], "name": "Jing Zhang" }, { "ids": [ "1948380" ], "name": "Da Zhang" }, { "ids": [ "1938539" ], "name": "Sarunya Pumma" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" } ], "doi": "10.1109/IPDPS.2017.119", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.119", "entities": [ "Algorithm", "Attribute\u2013value pair", "BLAST", "Big data", "Computation", "Distributed computing", "MapReduce", "Message Passing Interface", "Partition (database)", "Programmer" ], "id": "62c0af943a259c66b91dc932d3a5611afd014a4c", "inCitations": [ "310ecac3477a51aa303284f0853bd49ae8383ac3" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "605-614", "journalVolume": "", "outCitations": [ "0ad8e89091eed09217e66adc98136126addc2619", "6de2f02cfcc10d514431953a623898bfa61c1580", "5c4d550c5b35a99d0b8a041be8e66d11ff350020", "6474100a17b82d028e7131e8e0769cbc4e110914", "257adee470c54280da48d448a064b35537d51fbd", "63da1eca58997dc4d1b655095fd70edc9996a74a", "39bd8e34616ef998d722749a8e20e207f1d69078", "29542c65c237364e339e6789211db0f9e6db3287", "9ee76efb171dbc1264ab4b22933e3deedfd7fde8", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "c737aa8b2c916fe1f13a6fd4e847fa45da1e5434", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "4d36d376d891d30aca684a397bdd0b924d9b444f", "d6c4c76076efecb15655274adc648af8a445ed3a", "1802e0d4c6fbe1867779f4181826b2b9e5096888", "0f014693b25d9846025219b88f8ca480fac68b0a", "14a80b973aeb96e6f2f8b9e292fc05b0d5f9aad0", "168f2e12ae9fbb6c96146f4a7ded040d73e7b44b", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "9c3cc7337f7d70593a1ff8622de3128e1708b5a2", "601911f388ba3a8b5d666b31afc61aa6dfd1d433", "65b54461a0436e69969b2e2679dcbedcddd40d95", "a2ff5117ccd1eb3e42c6a606b8cecb4358d3ec84", "0a12a179bebdf4bb69d692a1127795b3f536270b", "703c2186db40f01cc7527aa45f5627d877487fe2", "b194c07c1b557f665a7460b515231ff36af6218f", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "230239fb61d7a6996ac9552706363323b34735f2", "1f9d47906319d0a8fac5c5fdbadf98e9da7966f9", "476b64be7cc0b985c02d69dd0532965924dd1869", "1b535af0d110491eabeedf8323a51327846e55b2", "0541d5338adc48276b3b8cd3a141d799e2d40150", "1760771f154262c26fd263f71066eca04afe00f5", "310ecac3477a51aa303284f0853bd49ae8383ac3", "ab756f4ed89c8e17632befe15c3579f0b9f04800" ], "paperAbstract": "Today, big data applications can generate largescale data sets at an unprecedented rate; and scientists have turned to parallel and distributed systems for data analysis. Although many big data processing systems provide advanced mechanisms to partition data and tackle the computational skew, it is difficult to efficiently implement skew-resistant mechanisms, because the runtime of different partitions not only depends on input data size but also algorithms that will be applied on data. As a result, many research efforts have been undertaken to explore user-defined partitioning methods for different types of applications and algorithms. However, manually writing application-specific partitioning methods requires significant coding effort, and finding the optimal data partitioning strategy is particularly challenging even for developers that have mastered sufficient application knowledge. In this paper, we propose PaPar, a Parallel data Partitioning framework for big data applications, to simplify the implementations of data partitioning algorithms. PaPar provides a set of computational operators and distribution strategies for programmers to describe desired data partitioning methods. Taking an input data configuration file and a workflow configuration file as the input, PaPar can automatically generate the parallel partitioning codes by formalizing the user-defined workflow as a sequence of key-value operations and matrixvector multiplications, and efficiently mapping to the parallel implementations with MPI and MapReduce. We apply our approach on two applications: muBLAST, a MPI implementation of BLAST algorithms for biological sequence search; and PowerLyra, a computation and partitioning method for skewed graphs. The experimental results show that compared to the partitioning methods of applications, the codes generated by PaPar can produce the same data partitions with comparable or less partitioning time.", "pdfUrls": [ "http://synergy.cs.vt.edu/pubs/papers/wang-papar-ipdps17.pdf", "https://doi.org/10.1109/IPDPS.2017.119" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/62c0af943a259c66b91dc932d3a5611afd014a4c", "sources": [ "DBLP" ], "title": "PaPar: A Parallel Data Partitioning Framework for Big Data Applications", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "62d105d5f3c63727b056dd9726636eddd74d27da": { "authors": [ { "ids": [ "1701697" ], "name": "James Lin" }, { "ids": [ "19299121" ], "name": "Zhigeng Xu" }, { "ids": [ "2763806" ], "name": "Akira Nukada" }, { "ids": [ "3264280" ], "name": "Naoya Maruyama" }, { "ids": [ "1696166" ], "name": "Satoshi Matsuoka" } ], "doi": "10.1109/ICPP.2017.52", "doiUrl": "https://doi.org/10.1109/ICPP.2017.52", "entities": [ "Algorithm", "BLAS", "Benchmark (computing)", "Bridging (networking)", "Competitive programming", "Computer memory", "Kernel (operating system)", "Manycore processor", "Matrix multiplication", "Memory bandwidth", "Memory bound function", "Methods of computing square roots", "Program optimization", "RLC circuit", "SW26010", "Sunway", "Sunway TaihuLight", "Supercomputer" ], "id": "62d105d5f3c63727b056dd9726636eddd74d27da", "inCitations": [ "608fb6c95c30e5b27ca729e3f9a4a055747abc4a", "e45dea6588d1de0a23618e019031e67eedeeee26" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "432-441", "journalVolume": "", "outCitations": [ "b76269bf962989ce271bef7ea863ff4adf9c9de6", "0b47e159ed9a3e5db1adc135620e7526d93abd87", "1ccf5fc4dca56ddb75b33b99ecbde7b1432274c4", "2d21ca41ebdfb2b4e7a145e36dc8321386627e94", "645d9e7e5e3c5496f11e0e303dc4cc1395109773", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "7fa2507aec08b080558f8a2e0971e294095756f7", "eb4f23afcc86609d9fc5fe90000d9db44cb3e575", "1a305ffa74845ec1c94f3eca0df846e2d26def5b", "b23604d797999b288c02a930e48e3bfa2dc3bd50", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "27bdd0a73b3d5f2c83ac7dfae447c20653dffa2d", "092217c2267f6e0673590aa151d811e579ff7760", "ea6ce42d3d5a9771d6f0c84f7d5aa6e28e7d0bc6", "4ae975bd05c504915aaaf7f5fd3ea49337dafb0c", "0edc1a2052c55c7687a08a8f65830c43fc8aed88" ], "paperAbstract": "The home-grown SW26010 many-core processor enabled the production of China’s first independently developed number-one ranked supercomputer – the Sunway TaihuLight. The design of the limited off-chip memory bandwidth, however, renders the SW26010 a highly memory-bound processor. To compensate for this limitation, the processor was designed with a unique hardware feature, "Register Level Communication" (RLC), to share register data among its 8 × 8 computing processing elements (CPEs) via a 2D onchip network. Such a radical architecture has sparked global researchers’ concerns regarding the programming challenges this may cause. To address these concerns, we adopted two compute-bound scientific kernels as benchmarks to identify the potential programming challenges. The first kernel is doubleprecision general matrix-multiplication (DGEMM). An RLCfriendly algorithm was designed for this kernel to reuse the data that already reside in the registers of 64 CPEs. This novel optimization enables the kernel to achieve up to 88.7% efficiency in one core group of the SW26010. This paper reveals, for the first time, the details of how the highly efficient DGEMM is implemented on the home-grown processor. The second kernel that we used is N-body. Due to the inefficient hardware support for transcendental operations on the SW26010, we replaced the reciprocal square root (rsqrt) instruction of N-body with a software routine to tackle the problem. Based on the programming challenges identified through these two optimized kernels, we proposed a three-level programming guideline for the SW26010. The paper concludes with our crucial finding that the critical step towards bridging the ninja performance gap on the SW26010 is to design an RLC-friendly algorithm to increase arithmetic intensity.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.52" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/62d105d5f3c63727b056dd9726636eddd74d27da", "sources": [ "DBLP" ], "title": "Optimizations of Two Compute-Bound Scientific Kernels on the SW26010 Many-Core Processor", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "630eb0c8cf211e95afc1696a2c627abe9e779bb3": { "authors": [ { "ids": [ "2238837" ], "name": "Chenhao Xie" }, { "ids": [ "1798309" ], "name": "Shuaiwen Song" }, { "ids": [ "1697912" ], "name": "Jing Wang" }, { "ids": [ "39986347" ], "name": "Weigong Zhang" }, { "ids": [ "2077106" ], "name": "Xin Fu" } ], "doi": "10.1109/HPCA.2017.37", "doiUrl": "https://doi.org/10.1109/HPCA.2017.37", "entities": [ "3D computer graphics", "3D rendering", "Baseline (configuration management)", "Central processing unit", "Computer data storage", "Cube", "Graphics processing unit", "Hybrid Memory Cube", "Pixel", "Random-access memory", "Throughput" ], "id": "630eb0c8cf211e95afc1696a2c627abe9e779bb3", "inCitations": [ "651ae380b5d500c613770dbf55c175c52576d7da", "3ad895a6e4ce6f07b722325613b27decf7aef4bc", "42318f40185f0bcddf60566b8586dacb557ab0cf", "a783558ea3ee37ac73cd920cafdd9f9797c1ad13", "84851b61293a4199c3f9164e21103b417aee49f7" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "637-648", "journalVolume": "", "outCitations": [ "1a85970157ebfe4455c1461f5c18764798cc6f1e", "005de7993edf311f02e4d3ab9ef90563469af7d4", "2af661c676ac486dbab88522161f4d3a57ff1561", "010840553dc8d4b22c3fb881d9c83f280bf79a0e", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "a442c8b749310e46e351045ea199a6de6bc2491e", "2caa7e286803e1a7d49fb6906a7507ca08208751", "cd88fb2e5a6323e750fcc0c2be6e6c58fd684df6", "3788d4a1c152a68702293928bbc2406c1a5a839e", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "a17a159f9852ddae3f8941b8d1eed6045c71a8c0", "04dcd8acdc16e42463e783ea5bc8283607ccee3f", "0763e8bee8d59588ce35705ef3e58b5d601d2ae6", "ada9f6bb88dadabede728be7a8c1a1767a4aa234", "179f80848143cf109fa6aebae6c3844da03b062c", "054e4a6966d54eb9fd207cf0484214201f46424a", "0d8bed4acf7d94a3cf4ea9a57ae0effeae76d408", "3a023541e04eb6aa5bc4d66821405db150c9b3bf", "2362d702b64b2f6a549155fe34a542524693d938", "01299bf5dce79d85aaa0d938670a93ddeeda4d0e", "0862fd4d2274d28fead7dbba2c9dec1acdfeb1e8", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "7a1c4673a9d869a2c3b9dfe5a693b10a50d402f9", "b3443b6d941e448e0ac8aa67c44b7aec6bb2ac4b", "352a8957005dc5519b15ed1870751ec494d66395", "84060e060083df2398d57299f07410921f3225ec", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "1c9b763ecf4834c99b4aa475eccd99e61c6414bf", "3e79beb809c21ecce96477f821531014307afb85", "bbebaeb1b00fdcde2aeb192543835d3ec1518f8d", "5be7533f14e93b40dbf31fd01de30084393ea998", "3401d10e78ee5afdf5b7b42e7e9751f3d338a3d6" ], "paperAbstract": "The performance of 3D rendering of GraphicsProcessing Unit that converts 3D vector stream into 2D framewith 3D image effects significantly impacts users gamingexperience on modern computer systems. Due to its hightexture throughput requirement, main memory bandwidthbecomes a critical obstacle for improving the overall renderingperformance. 3D-stacked memory systems such as HybridMemory Cube provide opportunities to significantly overcomethe memory wall by directly connecting logic controllers toDRAM dies. Although recent works have shown promisingimprovement in performance by utilizing HMC to acceleratespecial-purpose applications, a critical challenge of how toeffectively leverage its high internal bandwidth and computingcapability in GPU for 3D rendering remains unresolved. Basedon the observation that texel fetches greatly impact off-chipmemory traffic, we propose two architectural designs to enableProcessing-In-Memory based GPU for efficient 3D rendering. Additionally, we employ camera angles of pixels to controlthe performance-quality tradeoff of 3D rendering. Extensiveevaluation across several real-world games demonstrates thatour design can significantly improve the performance of texturefiltering and 3D rendering by an average of 3.97X (up to 6.4X) and 43% (up to 65%) respectively, over the baseline GPU. Meanwhile, our design provides considerable memory trafficand energy reduction without sacrificing rendering quality.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/630eb0c8cf211e95afc1696a2c627abe9e779bb3", "sources": [ "DBLP" ], "title": "Processing-in-Memory Enabled Graphics Processors for 3D Rendering", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "6330f075daf847554007b236b57293f8ccebca64": { "authors": [ { "ids": [ "1759838" ], "name": "Antonio Barbalace" }, { "ids": [ "2937172" ], "name": "Robert Lyerly" }, { "ids": [ "3149561" ], "name": "Christopher Jelesnianski" }, { "ids": [ "3361289" ], "name": "Anthony Carno" }, { "ids": [ "1945658" ], "name": "Ho-Ren Chuang" }, { "ids": [ "34791782" ], "name": "Vincent Legout" }, { "ids": [ "1729107" ], "name": "Binoy Ravindran" } ], "doi": "10.1145/3037697.3037738", "doiUrl": "https://doi.org/10.1145/3037697.3037738", "entities": [ "ARM architecture", "Compiler", "Data center", "Operating system", "PowerPC", "Server (computing)", "X86" ], "id": "6330f075daf847554007b236b57293f8ccebca64", "inCitations": [ "db5aa66ec7e20068d4c5d26f6002838f9a49d349", "8071c58e2639b796bdd6544a27166b70376bdd4e", "7da5e182742802b64e64858b1a03254ff127abc0", "7cdf63e05545333f10f69317383a3a88c6e29d03" ], "journalName": "", "journalPages": "645-659", "journalVolume": "", "outCitations": [ "26cd9c812c279347ae96db31cee1cbee0f646fa4", "99f53a19e494960288dd302a17cf235ee587e5b4", "b669cac8563aae45df3f7b12edf45811acd5a147", "a1a148fd9578514c43f5a71eab85a263b93ce313", "002517bf3a5321d6b39005b08494ea8ce90c7e37", "6d37e31161d5f7ac35a6598e91270344983236eb", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "066add40724f1022011ef4e17a39c7d66c88397c", "5dc5b799d6d161d5c2805917d680d1eb7314fdf5", "2194c3460ab71f3826db00b045b2ae590c753319", "3e257f01e3ee71545d824a1615c35659525b856a", "d5fc12b8d3516ddda264d1554363d3f7575fa61a", "a85ecf47fe183d70e226ff59c5515f5d695ade31", "2e7647a07fe21c18ab5b7037de3038157338f1db", "1381d9a94f69535d8ea17fd770739bbd4ce7480e", "21ca4e497be1a9983e4ea45e342b58a18f554c75", "94a62be8355bf5be1edcc881a26559e5258e0f1d", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "108c840d5d1847948a2de0250490a327ae069ee6", "0f42b4dc664eb31df423c3de3a2cecf9c6ac83a8", "6d2db3d64131cd4a0f5bb1de46581f36046385f2", "573e9cb890d39c790b58bfa805526d40e8b472ec", "15e62e72ec7dda3f997a23e1342c7dc7f2bb7a6b", "0b35861df3b66533b0a188b411dcc4de6723a5cd", "648dcb76fb59d1f71936f89f5a5e6d5df1b3ea09", "0f44833eb9047158221e7b3128cde1347b58ccd6", "054572f0a9cf49fa9757ce937d097de6200fe942", "6743524c7522ba2a55153be3b40bc8c4bd52366a", "a4766decff2fe987757450d72feb45c7fab36d0f", "95cbaf0d8b70abf07944f36731eadc4e16329392", "30619d6b82612c2df25f436998872dea1858ed5f", "d91ea0b718321e2d30df9c73c88c5658c5a5b56d", "287a8303caef61c38175b629c0d244e80c88a694", "e372255e0e15378210272cc4d719f7949c30f070", "2583d51a7aafc4e4e3c9bdcd1fa8a978f7d81bc5", "1fa22149d8ace516c0dfab1bef9a9d982053cd89", "e9560db7050516b782a2e48ada82500bee7db0a4", "2ea36da7b6cfa6da563fc8997db2905f51b861a4", "48ebdcef1587a21bb9fbbd50a15ef9965abf0e81", "41aa7bcef1b7df80f50afc1e907dad6fadaff0fb", "20b400844c150b278346185c35511004fa8f3cda", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2c2cfbec94307fc92192e5a4be0d0731799f9bf9", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "544afa259d6dfe0214f029a9fa515dd0482dbba2", "352a8957005dc5519b15ed1870751ec494d66395", "0914d1fa86a1a5eeb16dcea904cc226fb010e508", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "3f53c5aa23060d5049b851ea552f248c851aa7b5", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "45ee540d3b9b16ed9b5ad6ee034f3779b9561a73" ], "paperAbstract": "Energy efficiency is one of the most important design considerations in running modern datacenters. Datacenter operating systems rely on software techniques such as execution migration to achieve energy efficiency across pools of machines. Execution migration is possible in datacenters today because they consist mainly of homogeneous-ISA machines. However, recent market trends indicate that alternate ISAs such as ARM and PowerPC are pushing into the datacenter, meaning current execution migration techniques are no longer applicable. How can execution migration be applied in future heterogeneous-ISA datacenters?\n In this work we present a compiler, runtime, and an operating system extension for enabling execution migration between heterogeneous-ISA servers. We present a new multi-ISA binary architecture and heterogeneous-OS containers for facilitating efficient migration of natively-compiled applications. We build and evaluate a prototype of our design and demonstrate energy savings of up to 66% for a workload running on an ARM and an x86 server interconnected by a high-speed network.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037738" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6330f075daf847554007b236b57293f8ccebca64", "sources": [ "DBLP" ], "title": "Breaking the Boundaries in Heterogeneous-ISA Datacenters", "venue": "ASPLOS", "year": 2017 }, "634d281ca1acb3bc9bdebe8687142b5e90e3f125": { "authors": [ { "ids": [ "3396942" ], "name": "Alexander Gamero-Garrido" }, { "ids": [ "1727599" ], "name": "Stefan Savage" }, { "ids": [ "1763395" ], "name": "Kirill Levchenko" }, { "ids": [ "2199298" ], "name": "Alex C. Snoeren" } ], "doi": "10.1145/3133956.3134047", "doiUrl": "https://doi.org/10.1145/3133956.3134047", "entities": [ "Computer Fraud and Abuse Act", "Digital Millennium Copyright Act", "End-user license agreement", "Vulnerability (computing)" ], "id": "634d281ca1acb3bc9bdebe8687142b5e90e3f125", "inCitations": [], "journalName": "", "journalPages": "1501-1513", "journalVolume": "", "outCitations": [ "2544e463c3e9ef9d64b605c209f7fa1e91982c79", "07738e922945a8e818de04f329ea3d206012830c", "1e67a2bcfaa347bcae14792e330d8edd3a1f3bbd" ], "paperAbstract": "Product vendors and vulnerability researchers work with the same underlying artifacts, but can be motivated by goals that are distinct and, at times, disjoint. This potential for conflict, coupled with the legal instruments available to product vendors (e.g., EULAs, DMCA, CFAA, etc.) drive a broad concern that there are \"chilling effects\" that dissuade vulnerability researchers from vigorously evaluating product security. Indeed, there are well-known examples of legal action taken against individual researchers. However, these are inherently anecdotal in nature and skeptics of the chilling-effects hypothesis argue that there is no systematic evidence to justify such concerns. This paper is motivated by precisely this tussle. We present some of the first work to address this issue on a quantitative and empirical footing, illuminating the sentiments of both product vendors and vulnerability researchers. First, we canvas a range of product companies for explicit permission to conduct security assessments and thus characterize the degree to which the broad software vendor community is supportive of vulnerability research activities and how this varies based on the nature of the researcher. Second, we conduct an online sentiment survey of vulnerability researchers to understand the extent to which they have abstract concerns or concrete experience with legal threats and the extent to which this mindset shapes their choices.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134047", "http://cseweb.ucsd.edu/~snoeren/papers/dmca-ccs17.pdf", "https://cseweb.ucsd.edu/~klevchen/gsls-ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/634d281ca1acb3bc9bdebe8687142b5e90e3f125", "sources": [ "DBLP" ], "title": "Quantifying the Pressure of Legal Risks on Third-party Vulnerability Research", "venue": "CCS", "year": 2017 }, "6366e100e368a42fb3c16a238d06a5e458f52992": { "authors": [ { "ids": [ "1940000" ], "name": "Tarek Elgamal" }, { "ids": [ "40298273" ], "name": "Shangyu Luo" }, { "ids": [ "40237241" ], "name": "Matthias Boehm" }, { "ids": [ "3351526" ], "name": "Alexandre V. Evfimievski" }, { "ids": [ "1947100" ], "name": "Shirish Tatikonda" }, { "ids": [ "1698945" ], "name": "Berthold Reinwald" }, { "ids": [ "40655309" ], "name": "Prithviraj Sen" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Baseline (configuration management)", "Code generation (compiler)", "Compiler", "Directed acyclic graph", "Experiment", "High- and low-level", "Holism", "Linear algebra", "Machine learning", "Mathematical optimization", "Memory bandwidth", "Program optimization", "Requirement", "Snapshot (computer storage)", "Sparse matrix", "Spoofing attack", "Symbolic computation" ], "id": "6366e100e368a42fb3c16a238d06a5e458f52992", "inCitations": [ "513cb06433fcdb837aa408b54c97992fed327cf6", "4853a26200889f033c0f509abf0f91d8cafba55b", "db12b1acdf950527ee8eccbdaa99ee9dcf5c1274", "035e49b58f2a873d5ff5cd8a3fdd6f27972708a8", "1156373cdb17608780bd2c00fff26bcbeeb2189c", "34f515d64fdae5f8dd4b036034889c8dd7376590", "31eee4a393c60963233cba078dc3b0109d5c3e54" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2f9d58eb0c8a4d7384f5160318fff6d735d2c36f", "d0bc3c139c9a0129a87aa5f724e7bf82b4b04ce6", "78b3f0d59de6f43bbb2725cec75d55d6137a88e4", "4954fa180728932959997a4768411ff9136aac81", "6e183b2b67fe664323bcb3c18ce3a4d49dc6b110", "9689ba2d4673a39cb9bdfb9802660d6acf427704", "31eee4a393c60963233cba078dc3b0109d5c3e54", "1537d3387245897a8332f4e6a701beb6ffc0663a", "45619a2b7b41fea02345badf880530519d3d4c8f", "cdb213c2191732607134341e2b211977170ef048", "168de446821c0de9ebfa6d2145ca69837bd92671", "30795447cec18753254edbbd7839f0afa58b2a39", "1eb6dd5c7d7f18f0269cab00dd39e50834d99d58", "092217c2267f6e0673590aa151d811e579ff7760", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "ad54be29be71e31319235cceef0911b72e06c659", "4512037737ab217cee118a07c5fdb22821eaf3fb", "c57432c8eaa67696b1600c29f0842a0bfd104d79", "57a5c26fb1992819f3e1ea6c9a6d80433fd35a12", "8480353978579de9a6840b6c90f18bd2e6b505b8", "0b147fa5a2e6872dfc34be5554183f0e68398c40", "0bb706bdffeab5d11b93cb05af691e0cfaf8e660", "32cdd8f6b3019f25d1b909f26386645896e20282", "43aac4922fc82c1e8062d4d22b670701b93d980a", "1e0d2de1f900aa681c04bd08bc5f6f405e56f18c", "03c78407480b665f1d344b622637badbed4a1d01", "17d6ed86ee2fcce750d424a88fa4f6f297f42ffd", "63dc0896fd3fd7a6c40beff5beca638e778db4fc", "0a267d927cfae039cf0a9c995a59ded563344eb6", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "27f3bb5ef854c0b0e559fb382114ba24891514b0", "75284ed9e49898e3367ecc99b6cd13948671e078", "e729739e2796348faa50c0e88e38be83b070d3fe", "f2ddfa8141e10b4913bb274d11b22bb4f5ff918e", "bac4169d6b6f713c76271b5ccf3d45293351f785", "001ab484e44bf365eb2c9532e21a42f9cbcbe5f2", "08c370eb9ba13bfb836349e7f3ea428be4697818", "0a9da717999ba687bcbef86cbc996f4b7334f990", "333452f55f403dcb5713a34a12a8beabaef21cdd", "66a6e8434ef51986cdf7669af526f9914c35d3a9", "1c918798bbfd2caa2335c5cd9f15c08e56ed2cde", "213a719cdecdd2e3a449c736db0d4449476ab323", "54a8cc92a2dc47875faded1d324cffc38de5fd38", "61090305ec35de8c3e71f3acbf152db7f8c0f95a", "4483c133f637170fedcb39a971da7e26a3c3f842", "09967ca2af1dad90ece59cf3cb823dbca67426b1", "0bf4d6c9b069660dd23b1698bfcc413235811a65", "638c4b038b884d5b0a86c7a450b739fe06c1f620" ], "paperAbstract": "Systems for declarative large-scale machine learning (ML) algorithms aim at high-level algorithm specification and automatic optimization of runtime execution plans. State-ofthe-art compilers rely on algebraic rewrites and operator selection, including fused operators to avoid materialized intermediates, reduce memory bandwidth requirements, and exploit sparsity across chains of operations. However, the unlimited number of relevant patterns for rewrites and operators poses challenges in terms of development effort and high performance impact. Query compilation has been studied extensively in the database literature, but ML programs additionally require handling linear algebra and exploiting algebraic properties, DAG structures, and sparsity. In this paper, we introduce Spoof, an architecture to automatically (1) identify algebraic simplification rewrites, and (2) generate fused operators in a holistic framework. We describe a snapshot of the overall system, including key techniques of sum-product optimization and code generation. Preliminary experiments show performance close to hand-coded fused operators, significant improvements over a baseline without fused operators, and moderate compilation overhead.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p3-elgamal-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6366/e100e368a42fb3c16a238d06a5e458f52992.pdf", "s2Url": "https://semanticscholar.org/paper/6366e100e368a42fb3c16a238d06a5e458f52992", "sources": [ "DBLP" ], "title": "SPOOF: Sum-Product Optimization and Operator Fusion for Large-Scale Machine Learning", "venue": "CIDR", "year": 2017 }, "63c2432b7f61357ba520e0bdccd07ec7afb4be61": { "authors": [ { "ids": [ "1758267" ], "name": "Xu Zhao" }, { "ids": [ "8024529" ], "name": "Kirk Rodrigues" }, { "ids": [ "38173241" ], "name": "Yu Luo" }, { "ids": [ "1696433" ], "name": "Michael Stumm" }, { "ids": [ "2042324" ], "name": "Ding Yuan" }, { "ids": [ "5799064" ], "name": "Yuanyuan Zhou" } ], "doi": "10.1145/3102980.3103001", "doiUrl": "https://doi.org/10.1145/3102980.3103001", "entities": [ "Algorithm", "Basic block", "Debugging", "Information theory", "Production system (computer science)", "Run time (program lifecycle phase)", "Subroutine", "User behavior analytics", "Value (ethics)" ], "id": "63c2432b7f61357ba520e0bdccd07ec7afb4be61", "inCitations": [ "883a39fe55b14c0b60fea777fc06f271e2966d44" ], "journalName": "", "journalPages": "125-131", "journalVolume": "", "outCitations": [ "4f739534a366799e170599d3ff3d65597f0118db", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "0f6863ca14fc33a87f03ac46051b39fe2541cdf1", "37c04a742561ac2e2fd0069a9e2f92a048df4c0e", "0f28af5e2f0ec33a29c5b12e5e5be78c8f9d14e8", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "6bb4c541452795997f894a16c0c184faf2a673f9", "10da8673314188dd6ab1f16f73c05358771dd8cf", "3420487a5805d2cc8416ed7065568c96f2f26142", "9ee6209432316baf6776838917e06bca4d874747", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce" ], "paperAbstract": "A production system's printed logs are often the only source of runtime information available for postmortem debugging, performance analysis and profiling, security auditing, and user behavior analytics. Therefore, the quality of this data is critically important. Recent work has attempted to enhance log quality by recording additional variable values, but logging statement placement, i.e., where to place a logging statement, which is the most challenging and fundamental problem for improving log quality, has not been adequately addressed so far. This position paper proposes we automate the placement of logging statements by measuring how much uncertainty, i.e., the expected number of possible execution code paths taken by the software, can be removed by adding a logging statement to a basic block. Guided by ideas from information theory, we describe a simple approach that automates logging statement placement. Preliminary results suggest that our algorithm can effectively cover, and further improve, the existing logging statement placements selected by developers. It can compute an optimal logging statement placement that disambiguates the entire function call path with only 0.218% of slowdown.", "pdfUrls": [ "http://www.eecg.toronto.edu/~yuan/papers/p125-Zhao.pdf", "http://doi.acm.org/10.1145/3102980.3103001" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/63c2432b7f61357ba520e0bdccd07ec7afb4be61", "sources": [ "DBLP" ], "title": "The Game of Twenty Questions: Do You Know Where to Log?", "venue": "HotOS", "year": 2017 }, "63d9562d2e50c57e684faed416801732a37d39fd": { "authors": [ { "ids": [ "2712837" ], "name": "Benjamin Klenk" }, { "ids": [ "1731123" ], "name": "Holger Fr\u00f6ning" } ], "doi": "10.1007/978-3-319-58667-0_12", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_12", "entities": [ "Message Passing Interface", "Message passing", "Message queue", "Parallel computing", "Program optimization", "Scalability", "Waypoint" ], "id": "63d9562d2e50c57e684faed416801732a37d39fd", "inCitations": [ "97ce2604a719d65ee6ea3b7c7bb21a4b439262e6", "3e1e61d1128dae2038cd9701ba95f348f6d12db1", "798bb03fd4b95f7f5b23ae21893dfacd36c68fcf" ], "journalName": "", "journalPages": "217-236", "journalVolume": "", "outCitations": [ "09db308c41c3107b0fb25e1f61922da2ddc460d3", "e98f988ad47a2c304036c8dcfb1c56ec99b11f85", "03c316a2177d112efb7c64fae4fc10377419610b", "cf60b4d7f37cc74ca7345a579201b89a010a67e8", "33dce868e0d719474b9f822c139628d06eca7eb1", "6f197e5aa64900079d760a397bb6a062df152ea6", "14a3c198a025d77ba502410df5ed9aaadb96f66c", "4849bbb611153b5a7c53894fa1c1314138f5ae89", "51dcb9e45b0ff083ea015ed4be79e660ec5b1e2a", "547014986afdf86ced23cdcce4583ee04f464160", "da538cfbda96a1fe98142fee6f55261c8433f41b", "7f2cbf3dd422dec88f5725700913a1d44c6f5beb", "53211544bc0c9a0303a1380e422dfaf7642312d8", "3e1e61d1128dae2038cd9701ba95f348f6d12db1", "081c32609be4adcf16fe6f3bd6ae35ce2622edaf", "5e555daf8dd73cee02ee54e7eff6cb90f2944ba9", "8ce244596d60478c4c9c4dd5cf43c57e45fccfa2" ], "paperAbstract": "The scale of applications and computing systems is tremendously increasing and needs to increase even more to realize exascale systems. As the number of nodes keeps growing, communication has become key to high performance. The Message Passing Interface (MPI) has evolved to the de facto standard for inter-node data transfers. Consequently, MPI is well suited to serve as proxy for an analysis of communication characteristics of exascale proxy applications. This work presents characteristics like time spent in certain operations, point-to-point versus collective communication, and message sizes and rates, gathered from a comprehensive trace analysis. We provide an understanding of how applications use MPI to exploit node-level parallelism, always with respect to scalability, and also locate parts which require more optimization. We emphasize on the analysis of the message matching and report queue lengths and associated matching rates. It is shown that most data is transferred via point-to-point operations, but the most time is spent in collectives. Message matching rates significantly depend on the length of message queues, which tend to increase with the number of processes. As messages are also become smaller, the matching is important to high message rates in large-scale applications.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_12", "http://www.ziti.uni-heidelberg.de/ziti/uploads/ce_group/2017-ISC.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/63d9/562d2e50c57e684faed416801732a37d39fd.pdf", "s2Url": "https://semanticscholar.org/paper/63d9562d2e50c57e684faed416801732a37d39fd", "sources": [ "DBLP" ], "title": "An Overview of MPI Characteristics of Exascale Proxy Applications", "venue": "ISC", "year": 2017 }, "6430973555601cfd810351a33f539bc2f5567f35": { "authors": [ { "ids": [ "2900532" ], "name": "Bangtian Liu" }, { "ids": [ "12181108" ], "name": "Chengyao Wen" }, { "ids": [ "9208982" ], "name": "Anand D. Sarwate" }, { "ids": [ "2917750" ], "name": "Maryam Mehri Dehnavi" } ], "doi": "10.1109/CLUSTER.2017.75", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.75", "entities": [ "Algorithm", "Computation", "GeForce 700 series", "Graphics processing unit", "Manycore processor", "Multi-core processor", "Sparse matrix", "Speedup", "Titan" ], "id": "6430973555601cfd810351a33f539bc2f5567f35", "inCitations": [ "934e7c28243a136099a75c5c518bc8bb5a61ca49" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "47-57", "journalVolume": "", "outCitations": [ "6206e90af42990d97e547c08a2fb75447155fd86", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "008a6e4b2763736d2c6363ee6b546b09c0022e53", "53132a1619b13215bcd791cd6b850ff154f4f837", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "6074c1108997e0c1f97dc3c199323a162ffe978d", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "374e96d4ce090569323cb63a6dd084f06cb833d5", "a9653a27052d666b7ed47524871dc9c3a9b92cc4", "aca75724674bd0f608086f90f3e229ae2c0f92a7", "07ed71b436b9adf23f0f93c8e4533461b82e769a", "03b6c2f7876cb9f4e8218c5f749b6959bbc3c983", "669508257d4621864011252d0423047f98d9329c", "02c1580617a9b1ccd809f06ae57773c00cf96647", "000b693f3398a7fff3d393ae89f9ca18d8f10856", "280bbaa66095fd6f89999003b802700935fdf77c", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "514514e3f6150d1f36a7820fc5da5a17953d62f7", "1322c225b4e05dc22bbff7c5b9f5464f3cb7754b", "2e8ab628bc9f256c11c898aa44f049143c74d05d", "16d946c1113fcebf79a2d3af2062be37a995d133", "255aeb5c2a8eea15db08c617481ddbb35a41bfe4", "1cd294f3bcd647c8a2b2bbce47e827a8ece8b973", "048bfc88b9f54512304433bb2eeb68a3172159a8", "62dd02837c65b9c90de8d80c493f23ce1116cb3d" ], "paperAbstract": "Sparse tensors appear in many large-scale applications with multidimensional and sparse data. While multidimensional sparse data often need to be processed on manycore processors, attempts to develop highly-optimized GPU-based implementations of sparse tensor operations are rare. The irregular computation patterns and sparsity structures as well as the large memory footprints of sparse tensor operations make such implementations challenging. We leverage the fact that sparse tensor operations share similar computation patterns to propose a unified tensor representation called F-COO. Combined with GPU-specific optimizations, F-COO provides highly-optimized implementations of sparse tensor computations on GPUs. The performance of the proposed unified approach is demonstrated for tensor-based kernels such as the Sparse Matricized Tensor-Times-Khatri-Rao Product (SpMTTKRP) and the Sparse Tensor-Times-Matrix Multiply (SpTTM) and is used in tensor decomposition algorithms. Compared to state-of-the-art work we improve the performance of SpTTM and SpMTTKRP up to 3.7 and 30.6 times respectively on NVIDIA Titan-X GPUs. We implement a CANDECOMP/PARAFAC (CP) decomposition and achieve up to 14.9 times speedup using the unified method over state-of-the-art libraries on NVIDIA Titan-X GPUs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.75", "https://arxiv.org/pdf/1705.09905v1.pdf", "http://arxiv.org/abs/1705.09905" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6430973555601cfd810351a33f539bc2f5567f35", "sources": [ "DBLP" ], "title": "A Unified Optimization Approach for Sparse Tensor Operations on GPUs", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "6450300d1d15ce03ddca2339184fc6b964189498": { "authors": [ { "ids": [ "2911895" ], "name": "Yaochen Hu" }, { "ids": [ "2143543" ], "name": "Yushi Wang" }, { "ids": [ "1806665" ], "name": "Bang Liu" }, { "ids": [ "1714907" ], "name": "Di Niu" }, { "ids": [ "2848503" ], "name": "Cheng Huang" } ], "doi": "10.1145/3127479.3131623", "doiUrl": "https://doi.org/10.1145/3127479.3131623", "entities": [ "Digital footprint", "Durability (database systems)", "Erasure code", "Experiment", "Java HotSpot Virtual Machine", "Load balancing (computing)", "Microsoft Azure", "Tails" ], "id": "6450300d1d15ce03ddca2339184fc6b964189498", "inCitations": [], "journalName": "", "journalPages": "365-377", "journalVolume": "", "outCitations": [ "3b1a1c620f193794c88e71af8f81fe0c17325ca7", "5ee3dc5f9343e41d10a092522c05072fe61b2708", "1567ee3c07476ad3111ad03ef0ac20ae8348e602", "87d47502bf40a4bfa7a0ded26c3efb2426250808", "67267066c2da469e0d6fdaa25e9bb43ca92b74ef", "2999a40e21f47fdf7180505c8389a0b07017b649", "58b628792d3eb22a034a871ed3cf373afe591928", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "61e30ac897a50fcfdeb7102c2d582ea88cb586e5", "234e6be0d4238f76b3ac038ee422be39f391c625", "3b547d706d33c110f96bf1c0e805ab8cc82afdbf", "090599a2caf4591c87699ad850c75554cd712937", "3de30c8dafc720bf066e5e3a005d16212dd31149", "3db5c29024481b22c07ca76d3493183de9865575", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "0437df54051fc7c4d9b5f2941c8488e6a765918f", "a5cd59cf9f116cefc2627e4bba66e78a1ac4187b", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "581974700d6dfc05e85791675a9be4e5bec75936", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "9bc26eae6acfcc9828b14f5695e39b8ca7ed3a8e", "4518e22acf20f27fc8e0271675f45c848452d72e", "29f3f5918946bf0a4d75bf5244f993847d03e26c", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "65fd142f37c315cdf892184f8fb21281b88f6269" ], "paperAbstract": "Erasure coding has been used in storage systems to enhance data durability at a lower storage overhead. However, these systems suffer from long access latency tails due to a lack of flexible load balancing mechanisms and passively launched degraded reads when the original storage node of the requested data becomes a hotspot. We provide a new perspective to load balancing in coded storage systems by proactively and intelligently launching degraded reads and propose a variety of schemes to make optimal decisions either per request or across requests statistically. Experiments on a 98-machine cluster based on the request traces of 12 million objects collected from Windows Azure Storage (WAS) show that our schemes can reduce the median latency by 44.7% and the 95th-percentile tail latency by 77.8% in coded storage systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131623" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6450300d1d15ce03ddca2339184fc6b964189498", "sources": [ "DBLP" ], "title": "Latency reduction and load balancing in coded storage systems", "venue": "SoCC", "year": 2017 }, "6473a95e6049b2d3759061d5211aa936c20b3f5a": { "authors": [ { "ids": [ "2896559" ], "name": "Vicent Selfa" }, { "ids": [ "1813689" ], "name": "Julio Sahuquillo" }, { "ids": [ "1717133" ], "name": "Lieven Eeckhout" }, { "ids": [ "5482726" ], "name": "Salvador Petit" }, { "ids": [ "39569410" ], "name": "Mar\u00eda Engracia G\u00f3mez" } ], "doi": "10.1109/PACT.2017.19", "doiUrl": "https://doi.org/10.1109/PACT.2017.19", "entities": [ "Fairness measure" ], "id": "6473a95e6049b2d3759061d5211aa936c20b3f5a", "inCitations": [ "a11e842fdf25256a2ded132db0af76b49fdf6e73" ], "journalName": "", "journalPages": "194-205", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6473a95e6049b2d3759061d5211aa936c20b3f5a", "sources": [ "DBLP" ], "title": "Application Clustering Policies to Address System Fairness with Intel's Cache Allocation Technology", "venue": "PACT", "year": 2017 }, "64a80e61eb772a2ddcb0fd68ea68f9f2a2dc593c": { "authors": [ { "ids": [ "39233739" ], "name": "Shashank Agrawal" }, { "ids": [ "14043523" ], "name": "Melissa Chase" } ], "doi": "10.1145/3133956.3134014", "doiUrl": "https://doi.org/10.1145/3133956.3134014", "entities": [ "Attribute-based encryption", "Ciphertext", "Conditional access", "Cryptography", "Encryption", "Software deployment" ], "id": "64a80e61eb772a2ddcb0fd68ea68f9f2a2dc593c", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "807", "journalVolume": "2017", "outCitations": [ "5e2e76770b7f2c525b91c64d31c45fbc49fa9ddd", "2a40a6b0a4f8c2d4aa03764b01afde0bd25bc211", "5cb43e297b4bdd9398deb23a02257c43b2422210", "ccde4f28eac0501c7fa075d06ab3d0f01fbd09af", "1c86660a90f6eaba700d93730f29bfcdb925fa85", "bf25d9ca8eec88f515932fc7645c8ecc4e3e93b2", "083b473552c0b83d31ca89663171bd6223e72731", "35eddc46f29aa698d4edb7f558224750c78b1406", "52183a787693c362d6b58db3489b57172d3856f6", "1fd5b94eb87d028bf590d215e8fed18204f7e812", "0b277244b78a172394d3cbb68cc068fb1ebbd745", "7dd31e0556d46ff00d42f664d01e8510cf449a1d", "4f6c43279ab99dd1e19a1111d21cead981546b2e", "11d728f731bcbd990ac78f43dc9f17b5a1a9f594", "c52dc74424146429aeb6868d130c040e0acff579", "da09bc42bbf5421b119abea92716186a1ca3f02f", "0a7ddb346f432c50476359eb39510c838f739eee", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "2dff0f21a23f9e3b6e0c50ce3fec75de4ff00359", "387fa50ce45d9fd844137b191e868f1069363d75", "469b24b0d49f826dfa666ceeed87e091f9703028", "0952b3eb8c214fc380c035c96bc7472ecd09e2ef", "3611b5f7e169f24a9f9c0915ab21a7cc40009ea9", "683d0e1471be57a483fe7448fc3e037fb904677e", "de71bc2b2cd41039864934a4c3999688bdf53516", "1e533e711f6c5905143f8a500843a6fcab150fc0", "a460385322839ada14f20f9a4ec5ac98e26cd2df", "8e6456de7f818f389de9657232d90b046caf5d8f", "4d04f238b965f7d367ff2b6cf18a40a05742d3d2", "627ccf9243443ac1ada40055e4b8b034bd8b16c7", "0aa20fb7c3a5aa0f2af3e2a1f857bf9073ec157f", "1f532d6885c11af4b02995c8d6ba0d67a4574bae", "362d1bf46a749b6c7eb73a87476734c8dee489ed", "4224b8218206c31881369184fe8b5a693d98ef8b", "592745ac8339a0330f13baac973c1f998704ad43", "d68b35a940119117212b6830f8076c0c3c4d6889", "40f430cc1c394f6150adf2f6324726d811d1c72f", "9fc65fef454176ee450fb9012286fca6944b3407", "15f5ce559c8f3ea14a59cf49bacead181545dfb0", "16e3df67a7ffce10b66dc59d247fb30fa31ed272", "02b999b353f30d1c34b5e847e82faf5c5fb77bcc", "4db8718a0043cf0f50cc0df6a5fbc534a302ca14", "0658394f2f6d0a4fcacdc92a33ce68c73bd4ebf3", "144c8080bd04780a2db43d6a5230d1fd00b72657", "8b9fe673239166fe8702609922d780e0e847ff4b", "55d77b0216a3adfe6f9ebfbe465cfc8fa1004006", "48de3bda67ce9aecd1118dd825e5663a4c7e0cab", "666ad75199dfac91a3ff25c7b5e50cf253c39325", "b913d1bb258097fc67500b4faa4226252c1135ef", "181b3d195048cabecc52701961e27d6af9955963", "c973614e5befbae1f6217a2ab6540bd2a3960f35" ], "paperAbstract": "Time and again, attribute-based encryption has been shown to be the natural cryptographic tool for building various types of conditional access systems with far-reaching applications, but the deployment of such systems has been very slow. A central issue is the lack of an encryption scheme that can operate on sensitive data very efficiently and, at the same time, provides features that are important in practice.\n This paper proposes the first fully secure ciphertext-policy and key-policy ABE schemes based on a standard assumption on Type-III pairing groups, which do not put any restriction on policy type or attributes. We implement our schemes along with several other prominent ones using the Charm library, and demonstrate that they perform better on almost all parameters of interest.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134014", "http://eprint.iacr.org/2017/807", "https://eprint.iacr.org/2017/807.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/64a80e61eb772a2ddcb0fd68ea68f9f2a2dc593c", "sources": [ "DBLP" ], "title": "FAME: Fast Attribute-based Message Encryption", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "64ad3b92f61a441c5b4080b0ac9291109a919886": { "authors": [ { "ids": [ "2421465" ], "name": "Ang Chen" }, { "ids": [ "2549020" ], "name": "Yang Wu" }, { "ids": [ "1719236" ], "name": "Andreas Haeberlen" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" }, { "ids": [ "33779522" ], "name": "Wenchao Zhou" } ], "doi": "", "doiUrl": "", "entities": [ "Computation", "Database", "Debugging", "Distributed computing", "Experience", "Software deployment", "Systems architecture" ], "id": "64ad3b92f61a441c5b4080b0ac9291109a919886", "inCitations": [ "438bf6c3ddf5d02c744aa6e3ce0526eaf0fa3457", "61abd9cfc3604302209e056abfccf1ec8d8ee09e", "4f5281543eef3da4d5822bcec93123f40e43f480", "6c9cafd316cce50115697b0f933f22a752868463", "0d6051001b340b8ec6b6a2df3940f8314c9abffa", "0240d922d1934db0e79dbfac9721d7870299ff9a", "53c0617eb76ed39f3ba9f3a45374839d7904ef93", "8c7044398d1994b12a9bf7212e11398f59eaf446", "49cfadb861b0742040620009d1f39a4481becc6e", "8007bb3a6409e0ed95afc4691c64085247c382bf" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1d204d774c134dd7df97a6a83e12387efd0c7a01", "0cece4c9afe5128dec0ad0c556ce6889b94a845d", "0385a0c8b707d70bef33bb308d321b2647da0ca3", "d753029d4c125ae2a4a811cb6085d1f11b88125c", "79aec2093b2a1b0197e7d145b5cf86abc70fee3e", "164d8d8238674cdfb9bbb2583cfc390e178420de", "07c5da91f5a60765d563d01365bc3b15fa2f8e00", "9289860d43896b2d174a136eb56f03bb1b05e8d9", "01c4203910cbe98887fcfc7125a2efc34d37dd4e", "51d1cfdf9233cab9cc40f72bf049c2ad2f36082c", "62475a8e0672ed736cbe42403942a268ef0b947a", "36583417faf3d052c415262cca1ba44a6b90d75c", "2afc8d9b3a0d17fb926a6a6dd05b1fb307130a27", "2077579d62fc090d4ddf45f107ffae0468936165", "6fb97734b059f9ca6b0cb67cb5166adaf30b6a49", "2077cc18da002721390a23392ce4a25d19c3e2a2", "2fac216f660ddd5c8eabcaadb342ed117b32bb2b", "0ce67c89b4d7829580c16290a292ebee15507fa5", "ae9fc3f23fdde93e02d35210b062e4f8ed074400", "153506e97f5db120d28f0f4c726cbb5d751baa00", "2f9bb353e06dd0cafa7e287f9b9415c22878645a", "28c5d290a2b044dc4a3352ee9e692de48c82e57d", "98fef2c0314077d23cf2e6dd45bc8bac0180abfd", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "1aa94aa0c5eba9fe773f70b1a9c47db401f5cb66", "35339f6f2e99c04920f21883df1db8004436cdc7", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "069103feb2d2d3f1b0115b484d5c2f978a983df0", "258e1ce43022b8e8d1c0eb1aa85d968d3671a816", "04b91495950bfdace18739151325c75e1d20ff9a", "17a23aaab0a713b7863ada44eca0c252a243c6b1", "7b863a5af466aab0f2d61f719d80041950244511", "5848da5058fed3b97bfd801ca19e5265f489abfe", "2e38636cf04fd1fe0439440c7c0cc45f07bc1275", "1746a1f92f97e4d15bbcbca627b8e21ef001adf4", "111864cac232d8a9c170bd63069eb4af155a9f7b", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "0e9df21395c9b6272c42be80d221ec849ae83c6b", "125d586a05b620b60be8cc27161b824d2692af5f", "4350f7972bde4655c29d297b732edb67f36eb827", "2b0c044181e70ee8eacd2db26c31a03d5ec24c9c", "3760a7899bfdf7aa2799cf032980b728be7da032", "9f87fdf3f3f1e2a48c6c21629457cdb3b1873c7d", "197fc5f4e4d5212f5924b239591bbdba4bc5d409", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "50738c1db92b1a12827bd3b0c7f15c987d3240c4", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb" ], "paperAbstract": "Provenance is a way to answer \u201cwhy\u201d questions about computations. It has found a number of uses in the database community, such as debugging query answers or tracing unexpected results to database tuples. In fact, the ability to ask \u201cwhy\u201d can be useful for a much broader range of applications. In this paper, we summarize our experiences over the past few years in adapting provenance for diagnostic and forensic uses in networks and distributed systems. Our work draws inspirations from database provenance, yet the deployment scale, use cases, and distributed nature of networks require a significant re-design of traditional data provenance models. We review a number of use cases, ranging from investigating intrusions to diagnosing (and even automatically fixing) softwaredefined networks, and present a unified system architecture that we have designed and implemented for provenance in distributed systems. We conclude with a discussion of open issues in this space.", "pdfUrls": [ "http://www.seas.upenn.edu/~angchen/papers/cidr-2017.pdf", "http://www.cis.upenn.edu/~yangwu6/paper/netprov-cidr2017.pdf", "http://www.cis.upenn.edu/~ahae/papers/provenance-cidr2017.pdf", "http://cidrdb.org/cidr2017/papers/p36-chen-cidr17.pdf", "http://www.cis.upenn.edu/~angchen/papers/cidr-2017.pdf", "http://www.seas.upenn.edu/~yangwu6/paper/netprov-cidr2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8f65/a500099254d76d6154ff6d31c99dc1f73dff.pdf", "s2Url": "https://semanticscholar.org/paper/64ad3b92f61a441c5b4080b0ac9291109a919886", "sources": [ "DBLP" ], "title": "Data Provenance at Internet Scale: Architecture, Experiences, and the Road Ahead", "venue": "CIDR", "year": 2017 }, "64cc548a10a175e2dfb72df3457b56b8c2499925": { "authors": [ { "ids": [ "32052175" ], "name": "Ramyad Hadidi" }, { "ids": [ "3974788" ], "name": "Bahar Asgari" }, { "ids": [ "2167197" ], "name": "Burhan Ahmad Mudassar" }, { "ids": [ "1741842" ], "name": "Saibal Mukhopadhyay" }, { "ids": [ "2933334" ], "name": "Sudhakar Yalamanchili" }, { "ids": [ "8187053" ], "name": "Hyesoon Kim" } ], "doi": "10.1109/IISWC.2017.8167757", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167757", "entities": [ "Cube", "Dynamic random-access memory", "Hybrid Memory Cube", "Packet switching", "Three-dimensional integrated circuit" ], "id": "64cc548a10a175e2dfb72df3457b56b8c2499925", "inCitations": [ "9b3664ba2ddaa276f3f2b1212a44dc0b33735841" ], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "66-75", "journalVolume": "", "outCitations": [ "17d14aed5dfe63cd6d42abbb151b9142368f9342", "abbf3919bd951a5fdd0fa5b5a2c448b8380e4c0d", "8b04ea524cb6ced72868c120a00c4679d84be006", "6e9cec5119c9787cb5d8219f19612a21fbc1321e", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "fd6bb13962dcef8fd92ede6ed2be8f474eacda7c", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "abc5b77c91881f76a7ca6fe60f2bc1bffd347c1b", "ec856ee3364a0ce607b4bd7163b38533e52bad6f", "a6cc2def07a1880a81003449e0f0f901da597b18", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "6e1a98c60bbedbb9b70af9e374795f8e26cc4e8e", "42f174df3876256dd5606bb61b366116e9943beb", "8c3b449ed5e0e32e1e1934176265cec8dbc2bb4f", "4e8505919eb22265f107ebbeeee3fa78bf6d893a" ], "paperAbstract": "Three-dimensional (3D)-stacking technology, which enables the integration of DRAM and logic dies, offers high bandwidth and low energy consumption. This technology also empowers new memory designs for executing tasks not traditionally associated with memories. A practical 3D-stacked memory is Hybrid Memory Cube (HMC), which provides significant access bandwidth and low power consumption in a small area. Although several studies have taken advantage of the novel architecture of HMC, its characteristics in terms of latency and bandwidth or their correlation with temperature and power consumption have not been fully explored. This paper is the first, to the best of our knowledge, to characterize the thermal behavior of HMC in a real environment using the AC-510 accelerator and to identify temperature as a new limitation for this state-of-the-art design space. Moreover, besides bandwidth studies, we deconstruct factors that contribute to latency and reveal their sources for high- and low-load accesses. The results of this paper demonstrates essential behaviors and performance bottlenecks for future explorations of packet-switched and 3D-stacked memories.", "pdfUrls": [ "https://arxiv.org/pdf/1706.02725v3.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167757", "http://arxiv.org/abs/1706.02725", "https://arxiv.org/pdf/1706.02725v1.pdf", "https://arxiv.org/pdf/1706.02725v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/64cc548a10a175e2dfb72df3457b56b8c2499925", "sources": [ "DBLP" ], "title": "Demystifying the characteristics of 3D-stacked memories: A case study for Hybrid Memory Cube", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "64d2d0450e0100998e5b7f53b59ff51f9cf7a210": { "authors": [ { "ids": [ "40444394" ], "name": "Meng Xu" }, { "ids": [ "40001161" ], "name": "Kangjie Lu" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" }, { "ids": [ "1738428" ], "name": "Wenke Lee" } ], "doi": "", "doiUrl": "", "entities": [ "Compositing", "Diversification (finance)", "Fusebox", "Hardening (computing)", "RAM parity", "Transport Layer Security" ], "id": "64d2d0450e0100998e5b7f53b59ff51f9cf7a210", "inCitations": [], "journalName": "", "journalPages": "271-283", "journalVolume": "", "outCitations": [ "242b43fc76229ce4a3e9182f49267a5ad53ec106", "6a8f65381a627a2db6c756a7185d9106f0acefec", "1003cd805c87467b9a3e8e1dc75f1ceafc390161", "6e8d42ea4e8b88eacc337000c2e0b46d489f8437", "026c84df70942697ae850f9097c1676531a49821", "201b0a185dda51629d7b6fdef3b380a0beaba455", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "70ae295b9a7696f2d0c2fdb3a7a53f0d0e0a9320", "50bfb732ff36296243832c43936158bc9ba96dfe", "02eb0ffe1c4734f827e42047e8d4876e4e1aacc6", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "13cab010c7d25e38397382b567de0198f4f466de", "bc44df77508e02b5d2cb0edbef3dfa87625e8a33", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "09cafb70a4feab144ac1e6994694a2db21c7e656", "8a0af8ae748210ef571d074362b552af571e6d33", "3128d81ec9915faf1195f9ed59b3fd5f0b1b88ad", "229252e83bfa3af97ce4a66eb173ba024728e298", "2a974da13d6f956e37549378e00f86aa54bc5642", "209f4c5dc7c65670473836304f8c478e1e0a0980", "3be816a633ee79b9d734920faee820226c12a5b5", "0ad15428453e6f4962755933bd82f395eaf787b8", "0183d8c6623aaf106a27db72ecec9bb9704ab98c", "7fb20452f8a54ce18eb092b8f518eb2c254e499c", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "01ed0737864ae1530927e59a6c994fbfc73174f5", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "f6a02a055a5473b4f4d66bfb4a7aeabf3ee32560", "0c0ff71e1f225312bd24a2d78153f0b3f3816285", "49cae25a1796b6a2898b99b2684b33eed8f58ee9", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "0719b9670c8580db76547497df39caabdc20fc32", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "7a9f655133788b2bd23c1171683f81b702b4b5b6", "801d2b0f77ed00b78176418a50785c8d1ec20b41", "b71f68cfebf5447c60e0967f03ced080afa10d2b" ], "paperAbstract": "A number of security mechanisms have been proposed to harden programs written in unsafe languages, each of which mitigates a specific type of memory error. Intuitively, enforcing multiple security mechanisms on a target program will improve its overall security. However, this is not yet a viable approach in practice because the execution slowdown caused by various security mechanisms is often non-linearly accumulated, making the combined protection prohibitively expensive; further, most security mechanisms are designed for independent or isolated uses and thus are often in conflict with each other, making it impossible to fuse them in a straightforward way. In this paper, we present BUNSHIN, an N-versionbased system that enables different and even conflicting security mechanisms to be combined to secure a program while at the same time reducing the execution slowdown. In particular, we propose an automated mechanism to distribute runtime security checks in multiple program variants in such a way that conflicts between security checks are inherently eliminated and execution slowdown is minimized with parallel execution. We also present an N-version execution engine to seamlessly synchronize these variants so that all distributed security checks work together to guarantee the security of a target program.", "pdfUrls": [ "http://www.cc.gatech.edu/grads/m/mxu80/pubs/xu:bunshin-slides.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/xu-meng", "http://www.cc.gatech.edu/grads/m/mxu80/pubs/xu:bunshin.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/64d2/d0450e0100998e5b7f53b59ff51f9cf7a210.pdf", "s2Url": "https://semanticscholar.org/paper/64d2d0450e0100998e5b7f53b59ff51f9cf7a210", "sources": [ "DBLP" ], "title": "Bunshin: Compositing Security Mechanisms through Diversification", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "64d41925868149d81e28801743fa0909276756b9": { "authors": [ { "ids": [ "1708869" ], "name": "Jack J. Dongarra" }, { "ids": [ "1776509" ], "name": "Sven Hammarling" }, { "ids": [ "1699285" ], "name": "Nicholas J. Higham" }, { "ids": [ "2643233" ], "name": "Samuel D. Relton" }, { "ids": [ "2720713" ], "name": "Mawussi Zounon" } ], "doi": "10.1007/978-3-319-64203-1_37", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_37", "entities": [ "Automatic vectorization", "BLAS", "Blocking (computing)", "Cache (computing)", "Central processing unit", "Cholesky decomposition", "Computer data storage", "Graphics processing unit", "Interleaved memory", "Knights", "LAPACK", "Linear algebra", "Math Kernel Library", "Matrix multiplication", "Multi-core processor", "OpenMP", "Tamper-resistant security module", "Xeon Phi" ], "id": "64d41925868149d81e28801743fa0909276756b9", "inCitations": [], "journalName": "", "journalPages": "511-522", "journalVolume": "", "outCitations": [ "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "19342c8db214ce6f24c444518ad4a2a37582b3b3", "2fe5f8804f8ba2f738b83719b11723fb4a0f8db0", "8d077fd35e3fe86efd989f86d7887f566e904edd", "4a48eb2bf94cd607bd0d80e31e13834594b3ff23", "2e1fb97a000f1d33ac4178de2efc6731b004df93", "31fb73f622fd889980f7a5b44d794c9f45f7c380", "8c6e313b3418e42afe4a852116e18d0f24284f35", "59342eaead2aeee97b36b9f3661aad8f2f73f7a5", "53e7e01482932c3a45b5f67962faec1799d285bf", "6b570069f14c7588e066f7138e1f21af59d62e61" ], "paperAbstract": "Solving large numbers of small linear algebra problems simultaneously is becoming increasingly important in many application areas. Whilst many researchers have investigated the design of efficient batch linear algebra kernels for GPU architectures, the common approach for many/multi-core CPUs is to use one core per subproblem in the batch. When solving batches of very small matrices, 2 \u00d7 2 for example, this design exhibits two main issues: it fails to fully utilize the vector units and the cache of modern architectures, since the matrices are too small. Our approach to resolve this is as follows: given a batch of small matrices spread throughout the primary memory, we first reorganize the elements of the matrices into a contiguous array, using a block interleaved memory format, which allows us to process the small independent problems as a single large matrix problem and enables cross-matrix vectorization. The large problem is solved using blocking strategies that attempt to optimize the use of the cache. The solution is then converted back to the original storage format. To explain our approach we focus on two BLAS routines: general matrix-matrix multiplication (GEMM) and the triangular solve (TRSM). We extend this idea to LAPACK routines using the Cholesky factorization and solve (POSV). Our focus is primarily on very small matrices ranging in size from 2\u00d7 2 to 32\u00d7 32. Compared to both MKL and OpenMP implementations, our approach can be up to 4 times faster for GEMM, up to 14 times faster for TRSM, and up to 40 times faster for POSV on the new Intel Xeon Phi processor, code-named Knights Landing (KNL). Furthermore, we discuss strategies to avoid data movement between sockets when using our interleaved approach on a NUMA node.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_37", "http://www.maths.manchester.ac.uk/~higham/papers/dhhr17a.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/64d4/1925868149d81e28801743fa0909276756b9.pdf", "s2Url": "https://semanticscholar.org/paper/64d41925868149d81e28801743fa0909276756b9", "sources": [ "DBLP" ], "title": "Optimized Batched Linear Algebra for Modern Architectures", "venue": "Euro-Par", "year": 2017 }, "64e3577ff9b99f6c6ca366153d65b042afc058ab": { "authors": [ { "ids": [ "32042628" ], "name": "Salvatore Di Girolamo" }, { "ids": [ "34920674" ], "name": "Flavio Vella" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/IPDPS.2017.92", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.92", "entities": [ "Algorithm", "CPU cache", "Cache (computing)", "Clustering coefficient", "Coefficient", "Communication-avoiding algorithms", "Computation", "Floor and ceiling functions", "Library", "Message Passing Interface", "Revolution in Military Affairs", "Simulation" ], "id": "64e3577ff9b99f6c6ca366153d65b042afc058ab", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1018-1027", "journalVolume": "", "outCitations": [ "0caf691dc2f25a3eee80b3ac0e2aa72f953e18bc", "11f093a54c40d8ea8336d8e575d5ab717e0fbb51", "3ef8f56281ea8deb1a38eb95119e54e0da78d4ec", "11ff641f673812b3ce78d46b13323a5f84393f60", "fce7fd98928ab9bf3e4e919e108c48fc1040f569", "bae1f940475f4be4862425582aa84a24e57e0d46", "a1b9f637796f7366669f3c68dc7459596d1f7fad", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "24dc8d1de7e78ab100d2d83cbdf1390ddb9234c9", "0371f9e3efbcd4829b5ffbff585155746ef05284", "07745dca3ddfe267ccd7ad30cb6d4877f16389cc", "2ef5647f9bd901a6da89cdaa064fa67ce905b38d", "057a60fbc431ed0aef4e552199cc0ae0b970bb87", "f58b22395f9585c3da65bbc948c67eed3377f701", "17ffa6c8c257bf02a23699d226c541ac86af5e48", "38a351a8ac273ff60b5fd712eafa49d6b8414009", "08937c92f31895e16af48de1c7d18eeceef11f6f", "6d1ca1108d9d96e5607571502552ad04464d7f15", "69258ba9b1ace027daa767192698c84bf49b9fb6", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04" ], "paperAbstract": "The constantly increasing gap between communication and computation performance emphasizes the importance of communication-avoidance techniques. Caching is a well-known concept used to reduce accesses to slow local memories. In this work, we extend the caching idea to MPI-3 Remote Memory Access (RMA) operations. Here, caching can avoid inter-node communications and achieve similar benefits for irregular applications as communication-avoiding algorithms for structured applications. We propose CLaMPI, a caching library layered on top of MPI-3 RMA, to automatically optimize code with minimum user intervention. We demonstrate how cached RMA improves the performance of a Barnes Hut simulation and a Local Clustering Coefficient computation up to a factor of 1.8x and 5x, respectively. Due to the low overheads in the cache miss case and the potential benefits, we expect that our ideas around transparent RMA caching will soon be an integral part of many MPI libraries.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.92" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/64e3577ff9b99f6c6ca366153d65b042afc058ab", "sources": [ "DBLP" ], "title": "Transparent Caching for RMA Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "6503866cbd0df41bc75536e1e961384d12ff373a": { "authors": [ { "ids": [ "2192182" ], "name": "Simon Garcia De Gonzalo" }, { "ids": [ "2616895" ], "name": "Simon D. Hammond" }, { "ids": [ "5828650" ], "name": "Christian Trott" }, { "ids": [ "39754546" ], "name": "Wen-Mei W. Hwu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.10", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.10", "entities": [ "Auto-Tune", "Iteration", "Kernel (operating system)", "Programmer", "Run time (program lifecycle phase)", "Sparse matrix" ], "id": "6503866cbd0df41bc75536e1e961384d12ff373a", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "72-80", "journalVolume": "", "outCitations": [ "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "060fb53379e9382490a5d737413e3fa4c45ecf53", "602dcccc2bf6af1ca84355d530ff1e0a79391217", "4a2d7bf9937793a648a43c93029353ade10e64da", "0ac7e127033f1534bb2042461c653784dcf29b55", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "f3325ace129dec914966f9894d9f412e5e04bdc2", "6d001a0aaff32ed782e068d437a214e46ab84d95", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "1cd294f3bcd647c8a2b2bbce47e827a8ece8b973", "17c7747e63648d8d8d9d8ac4ac427d06ffe2c186", "47f05344d0d5fd252ebf645dddb8a1c5118cffc6", "28552ecf4eaedb3461edca97304b29082b02fbab" ], "paperAbstract": "Sparse-Matrix Vector products (SpMV) are highly irregular computational kernels that can be found in a diverse collection of high-performance science applications. Performance for this important kernel is often highly correlated with the associated matrix sparsity, which, in turn, governs the computational granularity, and therefore, the efficiency of the memory system. In this paper, we propose to extend the current set of Kokkos profiling tools with an autotuner that can iterate over possible choices for thread-team size and vector width, taking advantage of runtime information, while, choosing the optimal parameters for a particular input. This approach allows an iterative application that calls the same kernel multiple times to continue to progress towards a solution while, at the same time, alleviating the burden from the application programmer of knowing details of the underlying hardware and accounting for variable inputs. We compare the autotuner approach against a fixed approach that attempts to use all the hardware resources all the time, and show that the optimal choice made by the autotuner is significantly different among the two latest classes of accelerator architectures. After 100 iterations we identify which subset of the matrices benefit from improved performance, while others are near the break-even point, where the overhead of the tool has been completely hidden. We highlight the properties of sparse matrices that can help determine when autotuning will be of benefit. Finally, we connect the overhead of the autotuner to specific sparsity patterns and hardware resources.", "pdfUrls": [ "http://impact.crhc.illinois.edu/shared/Papers/hpcc17_SDH_camera_ready.pdf", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6503866cbd0df41bc75536e1e961384d12ff373a", "sources": [ "DBLP" ], "title": "Revisiting Online Autotuning for Sparse-Matrix Vector Multiplication Kernels on Next-Generation Architectures", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "652640d1226131fbeb66aba6eab681196c2d5222": { "authors": [ { "ids": [ "2734356" ], "name": "Scott Beamer" }, { "ids": [ "1760896" ], "name": "Krste Asanovic" }, { "ids": [ "1701130" ], "name": "David A. Patterson" } ], "doi": "10.1109/IPDPS.2017.112", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.112", "entities": [ "Algorithm", "Benchmark (computing)", "Blocking (computing)", "Cache (computing)", "Graph drawing", "Hardware performance counter", "List of algorithms", "Locality of reference", "Loop nest optimization", "PageRank", "Principle of locality", "Program optimization", "Run time (program lifecycle phase)", "Sparse matrix", "Synthetic data", "Vertex (geometry)" ], "id": "652640d1226131fbeb66aba6eab681196c2d5222", "inCitations": [ "c18d8148b38862793f3e319e044e8b46ed8ba585", "d72293a7858d27058eac1690e6a3739db4d9bd97", "854dfd36420497b6aeae18f0178272588497c2b3", "348119d77d127dba6058802c12f98f06c8849f3d" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "820-831", "journalVolume": "", "outCitations": [ "141e35263ab810983c90d47ad62eb4fab5e51717", "b6b6d2504fd57d27a0467654fa62169cc7dedbdd", "3182511c054dac8308a08b408b55ed9520650d27", "6b6ca1041dbcf0ff44992f02826342e99da54996", "4139eedda8717ffd60052f68ed78b996aaebfced", "3339acf7d66a3818bf3eaebdb685ea57d6d62e14", "0c0800259bd40b1ac96cc437629c5ea0ad729f22", "2d7bf91ca184def17e15bf515532651fd5fe5f01", "adafc767bbcb5c196bc7a3e6f252aa67489375c0", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "5f8dbd927b2be3269624f41d13ae10af2245ff7d", "0706356c9ab6014d6b04577d38289ea8328291a5", "71affe0d9489be0ecba667f568b1a0bcd9ee3af3", "b513711621e81d0abd042e0877ca751581a993f5", "05c9330f261ed3f5aecbca28004206d9a029656d", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "eb82d3035849cd23578096462ba419b53198a556", "2c394b418715072c01309b646f2535ad734d8c3e", "1c872c3f1f74a63f2cdca336409e4755e5198ceb", "829bb25d7b86a990232a392d468b0f0999c1939b", "5c0d56404b4e21d0e485c2e08abda2d12ae7b953", "3486aeaf540c48952120fe853d672af984f40a6a", "8572f800eeaae01b7faf7be62e041e3d08ea83ec", "4f3caa5573b4c1ebef7c3ee6b9f7643e689c858e", "5e762186f9710c3e357195d22488b5616d574da6", "55b3e22b56599ed8520deb1d7cb9ac460f4fa6bb", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "2b1ec3fdf5b695de2d7ec17393ec0ad9445ceb61", "3b874ce8d1fedd7f1f31a3c5ec495f4907b59da7", "26deee037b221bd05ed34461819f5c067b745445", "0271252d3044a646734988e02e0257c24ff6dcaa", "272550f6745acba4da9a10ab29ba738cb2c19d3b", "28e34059176c36934de116e138dd53cf4ee1dff0", "074d096a54bf6bb33c59f628206848c7724a7cf3" ], "paperAbstract": "Reducing communication is an important objective, as it can save energy or improve the performance of a communication-bound application. The graph algorithm PageRank computes the importance of vertices in a graph, and it serves as an important benchmark for graph algorithm performance. If the input graph to PageRank has poor locality, the execution will need to read many cache lines from memory, some of which may not be fully utilized. We present propagation blocking, an optimization to improve spatial locality, and we demonstrate its application to PageRank. In contrast to cache blocking which partitions the graph, we partition the data transfers between vertices (propagations). If the input graph has poor locality, our approach will reduce communication. Our approach reduces communication more than conventional cache blocking if the input graph is sufficiently sparse or if number of vertices is sufficiently large relative to the cache size. To evaluate our approach, we use both simple analytic models to gain insights and precise hardware performance counter measurements to compare implementations on a suite of 8 real-world and synthetic graphs. We demonstrate our parallel implementations substantially outperform prior work in execution time and communication volume. Although we present results for PageRank, propagation blocking could be generalized to SpMV (sparse matrix multiplying dense vector) or other graph programming models.", "pdfUrls": [ "http://www.scottbeamer.net/pubs/beamer-ipdps2017.pdf", "https://doi.org/10.1109/IPDPS.2017.112" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/652640d1226131fbeb66aba6eab681196c2d5222", "sources": [ "DBLP" ], "title": "Reducing Pagerank Communication via Propagation Blocking", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "6557e96f95a8bc860921304de511fc45d3834ae1": { "authors": [ { "ids": [ "1799916" ], "name": "Akshay Venkatesh" }, { "ids": [ "1780048" ], "name": "Khaled Hamidouche" }, { "ids": [ "2407825" ], "name": "Sreeram Potluri" }, { "ids": [ "2428889" ], "name": "Davide Rossetti" }, { "ids": [ "2670855" ], "name": "Ching-Hsiang Chu" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/ICPP.2017.24", "doiUrl": "https://doi.org/10.1109/ICPP.2017.24", "entities": [ "Asynchronous I/O", "Benchmark (computing)", "CUDA", "Central processing unit", "Computation", "Control flow", "Critical path method", "Decoupling (electronics)", "Graph dynamical system", "Graphics processing unit", "Network interface controller", "Point-to-point (telecommunications)", "Scalability", "Simulation", "Strahler number" ], "id": "6557e96f95a8bc860921304de511fc45d3834ae1", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "151-160", "journalVolume": "", "outCitations": [ "43f0c099d44a68783a773f91cd03098a5252bf98", "49943d94c888b54da43a5114d0b3bb29d860e2c3", "46249511a2eccfd8e29e8446d8b895040caab0e9", "4e9b0c8b64c80153906dacd05e5fecf5f1dac826", "497fc8616563777046ecc89c85771b2ab446a518", "3e6f5b5e8b7cb5408da8cd10d0cc625b00910291", "80217b30d278660bc83b19f1f9ce18667a01c20a", "cb842f77af27e472bc68110e2300f47dddb046d0", "e44585b020c93b6755fd9637d235d08b72d8fb7c", "5d70bd2207d2d28c9c7c284a8ac3ca5b7a6b016c" ], "paperAbstract": "While GPUs are becoming common in HPC systems, the CPU is still responsible for managing both GPU-side and CPU-side compute, communication, and synchronization operations. For instance, if a result from a GPU-side computation is to be transferred to a remote destination, then the CPU must synchronize on GPU compute completion issuing a communication operation. Both CPU cycles and energy are consumed waiting for synchronization. In turn, this significantly affects overall application time and scalability (eg: strong scaling applications).In this work, we present techniques to decouple communication control flow between CPU and GPU on GPU-enabled systems with MPI+CUDA applications using the novel GPUDirect-aSync (GDS) mechanism. GDS allows the GPU to progress network communication with the goal of placing the CPU away from the critical path. To take advantage of GDS in MPI+CUDA applications, we introduce the notion of offloading MPI operations to CUDA streams (referred as MPI-GDS) which subsequently allow the GPU and the NIC to progress MPI communication in stream-order either before or after a CUDA operation. We also propose efficient designs/protocols to realize point-to-point communication operations that guarantee stream-ordering while achieving good performance. The proposed methods show good benefits with micro-benchmarks and up to 30% improvement in application-kernel pattern mimicking benchmark and up to 36% improvement with broadcast application-pattern simulation (in medium message range with 8 GPU nodes) in comparison with a pure MPI+CUDA application.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6557e96f95a8bc860921304de511fc45d3834ae1", "sources": [ "DBLP" ], "title": "MPI-GDS: High Performance MPI Designs with GPUDirect-aSync for CPU-GPU Control Flow Decoupling", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "6572ffedfd1a7ddb53b1d6f1ba8dfeb1c3ca182a": { "authors": [ { "ids": [ "1690083" ], "name": "Chi Zhang" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" } ], "doi": "10.1145/3131348.3131356", "doiUrl": "https://doi.org/10.1145/3131348.3131356", "entities": [ "Angle of arrival", "Experiment", "Global Positioning System", "Photogrammetry", "Software deployment", "Sparse matrix", "Ubiquitous computing" ], "id": "6572ffedfd1a7ddb53b1d6f1ba8dfeb1c3ca182a", "inCitations": [ "73f615dc7f8162998016e7d990872087040afd96", "253ef1f6f1416906a79e0f9af2f453b52ed3052b", "5e8a055053c4a5307e930770cb084d79efe8a9f1" ], "journalName": "", "journalPages": "33-35", "journalVolume": "", "outCitations": [ "5b7a6c35b258d2c32c09f0377b0c79ba02c9a9a3", "d49530c0ce4413051dbf4f9309f6a1afcd55123a", "1f911ae809066d4a55598bce939a466de980b13b", "0d9158bd3019a828ee3ebe3a1b502fbde2258297", "1e37384874c84acc7919176d4e9598e9116da2ee", "786b684d577ae57aa2fbc7d1fb0870ad86b998b5", "52b5f461e40d9e14eaef749a6b9b7f0cfdd2bfb9", "a210f466c7e97d969401fb463307d35fa16287fb", "81493baa7e7e62d4e83d80ba667fcc82af6dc20f", "47439480b8a13ea60aeef644f2f4aac0b3329a6f" ], "paperAbstract": "The past decade's research in visible light positioning (VLP) has achieved centimeter location precision. However, existing VLP systems either require specialized LEDs which hinder large-scale deployment, or cameras which preclude continuous localization due to power consumption and short coverage. We propose Pulsar, which uses a compact photodiode sensor to discriminate existing ceiling lights based on their intrinsic optical emission features. To overcome the photodiode's lack of spatial resolution, we design a novel sparse photogrammetry mechanism, which resolves the light's angle-of-arrival, and triangulates the device's 3D location and orientation. To facilitate ubiquitous deployment, we further develop a light registration mechanism that automatically registers ceiling lights' locations on a building's floor map. Our experiments demonstrate that Pulsar can reliably achieve decimeter precision with continuous coverage.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131348.3131356", "http://doi.acm.org/10.1145/3117811.3117821", "http://xyzhang.ucsd.edu/papers/CZhang_MobiCom17_Pulsar.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6572ffedfd1a7ddb53b1d6f1ba8dfeb1c3ca182a", "sources": [ "DBLP" ], "title": "Pulsar: Towards Ubiquitous Visible Light Localization", "venue": "S3@MobiCom", "year": 2017 }, "657bc1afb7d045a6a6f9ae3b7a461dafeb045903": { "authors": [ { "ids": [ "3133439" ], "name": "Ozan Tuncer" }, { "ids": [ "16828201" ], "name": "Emre Ates" }, { "ids": [ "3357436" ], "name": "Yijia Zhang" }, { "ids": [ "1804097" ], "name": "Ata Turk" }, { "ids": [ "39920348" ], "name": "Jim M. Brandt" }, { "ids": [ "2667137" ], "name": "Vitus J. Leung" }, { "ids": [ "2489809" ], "name": "Manuel Egele" }, { "ids": [ "1809774" ], "name": "Ayse Kivilcim Coskun" } ], "doi": "10.1007/978-3-319-58667-0_19", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_19", "entities": [ "Algorithm", "Cloud computing", "Firmware", "Machine learning", "Overhead (computing)", "Resource contention", "Software bug", "Supercomputer", "System administrator", "System monitoring", "Systems management", "Time series" ], "id": "657bc1afb7d045a6a6f9ae3b7a461dafeb045903", "inCitations": [ "676e8d8260bc251229bec462ad5093d805152573", "2c691ecaf440f21441a79ac09e675eca533bb9a8" ], "journalName": "", "journalPages": "355-373", "journalVolume": "", "outCitations": [ "157c78c9752b50c632965c213ee85115ff426f67", "9f428e6fc51549b12d781ae709054bd64ad741d7", "0a5e8d1390f3ceb851f4a37a7ec8edb95e05f698", "a31c4bf1eeb568ef3db3d425064eebdcbb832330", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "397f311e2bac0a0b0e15dbda7cfb7a1bf9a8edce", "580cd9345085036c200cbf0a75753653ecebfc94", "1ac57524ba2d2a69c1bb6defed7352a06fd7050d", "22be3604de30feae20b26f449caa1d2446445e48", "3635141c7e3ed8190de4d799d06a71b027c32975", "01d62cd850496455ce1616500f491690effa5c98", "ffac367fd502040f71b65b1b4b659bfa83fe84c8", "98234d0709b21a48cee4a0b82babb3d0c63f4145", "0323b626078b11e63509339771c20a7e283a1d70", "df97e1b49b7426b64fdaa01a1ffbfd669f0992f2", "16a4367795dc7fde9bae65de3a5fda8300f27a46", "0d8f9f2db5dd032758ca60cf535c1242d4273f5c", "13deab526e6e0762f500694affe587ed298e5233", "17f70a07e7f50f6f74952dbbbbe8b667f7db7634", "5fac45d1333efb6438d43fef3cf776855140f013", "9072581becff808f58b1f1b60baee233df478a7f", "260368e4b7ddef442bb5c197078e200b3c0ab7b1", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "4be228917846a218ba00d30b42d709a11b7a5311", "20cf8768573bb412bf67e9f627c3216de8fb6cdb", "a25131cc32cfc596afa5dbc463de1231024d5dfc", "2de224c7c963a4ac466c37fa66cd2a4b1e2ec139", "e0bc3395fefd71d81f540d754bc0fd2340c21f6e", "4618c9e130481088e0a9d089728eb40576387f9b" ], "paperAbstract": "With the growing complexity and scale of high performance computing (HPC) systems, application performance variation has become a significant challenge in efficient and resilient system management. Application performance variation can be caused by resource contention as well as softwareand firmware-related problems, and can lead to premature job termination, reduced performance, and wasted compute platform resources. To effectively alleviate this problem, system administrators must detect and identify the anomalies that are responsible for performance variation and take preventive actions. However, diagnosing anomalies is often a difficult task given the vast amount of noisy and high-dimensional data being collected via a variety of system monitoring infrastructures. In this paper, we present a novel framework that uses machine learning to automatically diagnose previously encountered performance anomalies in HPC systems. Our framework leverages resource usage and performance counter data collected during application runs. We first convert the collected time series data into statistical features that retain application characteristics to significantly reduce the computational overhead of our technique. We then use machine learning algorithms to learn anomaly characteristics from this historical data and to identify the types of anomalies observed while running applications. We evaluate our framework both on an HPC cluster and on a public cloud, and demonstrate that our approach outperforms current state-of-the-art techniques in detecting anomalies, reaching an F-score over 0.97.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_19", "http://www.bu.edu/peaclab/files/2017/04/tuncer_isc2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/657b/c1afb7d045a6a6f9ae3b7a461dafeb045903.pdf", "s2Url": "https://semanticscholar.org/paper/657bc1afb7d045a6a6f9ae3b7a461dafeb045903", "sources": [ "DBLP" ], "title": "Diagnosing Performance Variations in HPC Applications Using Machine Learning", "venue": "ISC", "year": 2017 }, "659133e375c6aff6bc1eda72f6ae2a2e48585817": { "authors": [ { "ids": [ "27082708" ], "name": "Joshua Lawrence Benjamin" }, { "ids": [ "2256210" ], "name": "Adam Funnell" }, { "ids": [ "32493855" ], "name": "Philip Michael Watts" }, { "ids": [ "39700737" ], "name": "Benn Thomsen" } ], "doi": "10.1109/HOTI.2017.22", "doiUrl": "https://doi.org/10.1109/HOTI.2017.22", "entities": [ "Algorithm", "Application-specific integrated circuit", "Clock rate", "Coherence (physics)", "Data center", "Electronic switch", "Graph theory", "Heuristic", "Iteration", "Matching (graph theory)", "Multiplexing", "Network switch", "Optical switch", "Packet switching", "Pipeline (computing)", "Power dividers and directional couplers", "Scalability", "Scheduling (computing)", "Standard cell", "Star coupler", "Telephone exchange", "Terabit", "Throughput", "Time complexity", "Toad Data Modeler", "Transceiver", "Transmitter", "Wavelength-division multiplexing" ], "id": "659133e375c6aff6bc1eda72f6ae2a2e48585817", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "41-48", "journalVolume": "", "outCitations": [ "7443322265467139d76cd1fbf8eb6ef175f81825", "3fe59568240dc9fd2b14c9a3b44804a5f6808d40", "f0e0e4fd3c3a6661e474878e019f72e7f046ee30", "4773f1ec7b8cec7ed08552f1af6d017c452e7787", "dfb7d7e1c2715fab96588ee7934e126fd3ce61ae", "6d23153ad457f1a59974bb29b27332989b6344ac", "134d0ba6484eb1c0974aeff98af9a06ff58b4859", "2ddd6f32e73443ef17dbd48aa037c5964f6369bf", "6201237828488da781592ab58cdbe2c8de9b08cb", "c0d90016d819151a2e4be4422a2f41f368f42ec9", "577a708b00fccf2e55de3f2f1c7f210e05a34bff", "de6649603d469db2377f4e5e1d37971042ee1bb2", "1d8498e1a141ab2ad23d34e6ebebf94f26c18f12", "5b891081d9c004758916c92c551f377842ee9819", "451901f3990e0153386ebc7869a6dc4466b552c0", "1aab45842621ebfb8051f1c874747ad4224d5a36", "94d79b2ffc1d05363ca6e8fb5f2f763691aa6484", "e0264e3b24a00dc9218a67ba87182478a83b871c", "045ff0185f279b2d3e57b0a6ef0929f73ca9df31", "0452dec33d21c759b49a7545e0b7848237df5f66", "18c15c7c6ab7813cfd4f2b68ffe6ecfe86388d61" ], "paperAbstract": "Meeting the exponential increase in the global demand for bandwidth has become a major concern for today's data centers. The scalability of any data center is defined by the maximum capacity and port count of the switching devices it employs, limited by total pin bandwidth on current electronic switch ASICs. Optical switches can provide higher capacity and port counts, and hence, can be used to transform data center scalability. We have recently demonstrated a 1000-port star-coupler based wavelength division multiplexed (WDM) and time division multiplexed (TDM) optical switch architecture offering a bandwidth of 32 Tbit/s with the use of fast wavelength-tunable transmitters and high-sensitivity coherent receivers. However, the major challenge in deploying such an optical switch to replace current electronic switches lies in designing and implementing a scalable scheduler capable of operating on packet timescales.In this paper, we present a pipelined and highly parallel electronic scheduler that configures the high-radix (1000-port) optical packet switch. The scheduler can process requests from 1000 nodes and allocate timeslots across 320 wavelength channels and 4000 wavelength-tunable transceivers within a time constraint of 1µs. Using the Opencell NanGate 45nm standard cell library, we show that the complete 1000-port parallel scheduler algorithm occupies a circuit area of 52.7mm2, 4-8x smaller than that of a high-performance switch ASIC, with a clock period of less than 8ns, enabling 138 scheduling iterations to be performed in 1µs. The performance of the scheduling algorithm is evaluated in comparison to maximal matching from graph theory and conventional software-based wavelength allocation heuristics. The parallel hardware scheduler is shown to achieve similar matching performance and network throughput while being orders of magnitude faster.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/659133e375c6aff6bc1eda72f6ae2a2e48585817", "sources": [ "DBLP" ], "title": "A High Speed Hardware Scheduler for 1000-Port Optical Packet Switches to Enable Scalable Data Centers", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "65c708cbfcad01f2410ac68ff0b1315163fd8a37": { "authors": [ { "ids": [ "36960048" ], "name": "Kai Wu" }, { "ids": [ "11833602" ], "name": "Yingchao Huang" }, { "ids": [ "1678390" ], "name": "Dong Li" } ], "doi": "10.1145/3126908.3126923", "doiUrl": "https://doi.org/10.1145/3126908.3126923", "entities": [ "Algorithm", "Computer data storage", "Dynamic random-access memory", "Non-volatile memory", "Scalability", "Volatile memory" ], "id": "65c708cbfcad01f2410ac68ff0b1315163fd8a37", "inCitations": [], "journalName": "", "journalPages": "58:1-58:14", "journalVolume": "", "outCitations": [ "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "2092d64f8d99ab8cc5b353bbc3dddf4186bcb461", "03e93625d185c0ac144c97fdf269b5ae5f38351e", "4d4c9c4a8ea86c4c25a2f308962ba0231b33af02", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "2b8f8cf8b74491616dc8734d10b91c9182926c54", "3d2dfe972be7a60937df97bd309b423726375cb4", "e885f898d8417fc7c2bfb030a57d397ff023c41a", "a459d11e7fff61004dd392806f27317c16ce6696", "05ae5121cfc7c101d72f70ee0e7a5f938f8140f1", "d7203f317b37d565ab54b6a48ef13ded3777eb78", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "ddfe7c78115c3a610c0ad64691791ce463162282", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "1bf91711b94e507c62d91c79e72efcee5d21f627", "2a660e81e6501ec3489d962fe87448ecf277237f", "99e5ada094ca94d4d6dba82306cd15c1a0b3209a", "f5d58ccdd242c9a1fae93ab8a575cb3220cb89a0", "068d0b393db03678ea1d346ee01871e91e88c560" ], "paperAbstract": "Non-volatile memory (NVM) provides a scalable and power-efficient solution to replace DRAM as main memory. However, because of relatively high latency and low bandwidth of NVM, NVM is often paired with DRAM to build a heterogeneous memory system (HMS). As a result, data objects of the application must be carefully placed to NVM and DRAM for best performance. In this paper, we introduce a lightweight runtime solution that automatically and transparently manage data placement on HMS without the requirement of hardware modifications and disruptive change to applications. Leveraging online profiling and performance models, the runtime characterizes memory access patterns associated with data objects, and minimizes unnecessary data movement. Our runtime solution effectively bridges the performance gap between NVM and DRAM. We demonstrate that using NVM to replace the majority of DRAM can be a feasible solution for future HPC systems with the assistance of a software-based data management.", "pdfUrls": [ "https://arxiv.org/pdf/1705.00249v1.pdf", "http://arxiv.org/abs/1705.00249", "http://doi.acm.org/10.1145/3126908.3126923", "https://arxiv.org/pdf/1705.00249v2.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/65c708cbfcad01f2410ac68ff0b1315163fd8a37", "sources": [ "DBLP" ], "title": "Unimem: runtime data managementon non-volatile memory-based heterogeneous main memory", "venue": "SC", "year": 2017 }, "65d9a71846c97592d0a614ddee2740a692ba0993": { "authors": [ { "ids": [ "1689461" ], "name": "Joel Alc\u00e2ntara" }, { "ids": [ "1736719" ], "name": "Tiago Oliveira" }, { "ids": [ "1707743" ], "name": "Alysson Neves Bessani" } ], "doi": "10.1145/3135974.3135985", "doiUrl": "https://doi.org/10.1145/3135974.3135985", "entities": [ "Amazon Simple Storage Service", "Amazon Web Services", "Backup", "Cloud computing", "Cloud storage", "Database", "Disaster recovery", "Durability (database systems)", "IBM Tivoli Storage Productivity Center", "Information privacy", "Information system", "Money", "Overhead (computing)", "Virtual machine" ], "id": "65d9a71846c97592d0a614ddee2740a692ba0993", "inCitations": [], "journalName": "", "journalPages": "248-260", "journalVolume": "", "outCitations": [ "134021cfb9f082f4e8b58f31bcbb41eb990ab874", "0b674bcadb832727e56b94bc623944ca8c29cb89", "5c2efd16718cc5b0c8c37bba026f57c6d592d1e0", "108cfdfcf972eea7ef710ea4d31b7ca89bca3c09", "734080f75b9df156539691f628eb9912786893d6", "154daac0565483fc56209fb8272674b295472edf", "40a00e89195903fbaffb364fe410a215faf6715c", "086820e40dc8046c30a8751394df167bec047fe1", "a560e4a8264280ff5c4246d502beb351e564dea2", "b46cb54a87a448212af37f2594a512fec39a059e", "948c881ab7f1f62e9c940458e74c3e435320df72", "7ee123ce7763d3886e8061f29729ecbeab1b4d80", "418e5e5e58cd9cafe802d8b679651f66160d3728", "7a765f87d1c67de065539e08cc8eb03ca15c9b9a", "233cb9ba69b476203ea59832e3e4c15029f38ff3", "1901c2280e74b331ec766b26b2af0cf0f648b619", "61b8ade95787896bb16978586e14fdda63149006", "b949a90b33e85e3f92fabe870a1e731d3e72a434", "39e3d058a5987cb643e000bce555676d71be1c80", "27f071ccbea5a4940dcc585ba4cfa9258bf2bcdf", "92c58ed9cd502391078a3eedea21937ba1c8748d", "7c4cf4515091593106242f169dac0dd2208f9d8b", "9b86c7208233bfbc9781ce772dd87072af073eae" ], "paperAbstract": "Disaster Recovery (DR) is a crucial feature to ensure availability and data protection in modern information systems. A common DR approach requires the replication of services in a set of virtual machines running in the cloud as backups. This leads to considerable monetary costs and managing efforts to keep such cloud VMs. We present Ginja, a DR solution for transactional database management systems (DBMS) that uses only cloud storage services such as Amazon S3. Ginja works at file-system level to efficiently capture and replicate data updates to a remote cloud storage service, achieving three important goals: (1) reduces the costs for maintaining a cloud-based DR to less than one dollar per month for relevant databases' sizes and workloads (up to 222 x less than the traditional approach of having a DBMS replica in a cloud VM); (2) allows a precise control of the operational costs, durability and performance trade-offs; and (3) introduces a small performance overhead to the DBMS (e.g., less than 5% overhead for the TPC-C workload with ≈ 10 seconds of data loss in case of disasters).", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135985" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/65d9a71846c97592d0a614ddee2740a692ba0993", "sources": [ "DBLP" ], "title": "Ginja: one-dollar cloud-based disaster recovery for databases", "venue": "Middleware", "year": 2017 }, "65de8b9003a03f58be45ead576260c19e4e549c2": { "authors": [ { "ids": [ "1766680" ], "name": "Yufei Ding" }, { "ids": [ "3118258" ], "name": "Lin Ning" }, { "ids": [ "2306471" ], "name": "Hui Guan" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" } ], "doi": "10.1145/3062341.3062377", "doiUrl": "https://doi.org/10.1145/3062341.3062377", "entities": [ "Algorithm", "AngularJS", "Compiler", "Data mining", "Machine learning", "Open-source software", "Optimizing compiler", "Program optimization", "Social inequality", "Speedup", "Strength reduction", "TI-BASIC" ], "id": "65de8b9003a03f58be45ead576260c19e4e549c2", "inCitations": [ "346be8ca282318399c7fb272b8aac913ae5493dc" ], "journalName": "", "journalPages": "33-48", "journalVolume": "", "outCitations": [ "8eaf2504275d4a0755aa5e8a7b7d8f081973ebce", "5d9d1b95d5afd58f6e53512b7ddd04b78d62864c", "38adc6ce214ad89ad6a0c47b489608a0fbeedaaf", "58672feda42401422e8c04973be2780428553473", "bb71f94e48503a0d110ccdcf6ac587b96e242dd8", "2693c442990c55f2785e4142470b047e2095a8b8", "02837c4b66da1288e30e9f5012fbf7cb68e67ad5", "373f76633cc1f6c7a421e31c989842021a52fca4", "b3ce02f281977099e4696b8f4fbca9b1b178fd52", "7ff4b2802141e7444606a080bd9383b4d461ad91", "1a700d6cce09f1711cdaf8f5f21b6ab2a8ce7bae", "204dc0986b512a95a66632556d10c3c162caf7b7", "0dd8ecc9e92dfd7e3447b515561e6b0404402a26", "8d57c162a435a81fa92e337609ef79d4e4aecb10", "c49b212dbe9a58e36ce21c0fe13c8d65ad7a2fdb", "2cfe0f578b9907e98d007e379fd1db28a926b15f", "be6ba01248b50c86c48493f63d675c5e55c083b4", "7dadbf42abc1ecbb963ffba2dafce95e92e04a05", "9b8d8f2fb88e03f8f3ad01efbfef52718b70d104", "484a44a9f07b321a5fc9303d62ba4b342647e9fd", "0456a5c3b2001465d05e84ce6786ef200184de65", "06067266992ca9ace20756f4ac27cb9090f98315", "040678daf6a49a88345ee0c680fccfd134f24d4b", "c0251d764976b9676c24fc33459b2c1842cd3417", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "178286f3640f9c5c8c129799d6b00f313481d13a", "94bc92a2275894b498cfed61fee1d261d4daf708", "b29bf8f6900b4b0258397f73957eabd1bb977ef4", "3291d89de761058bdf52b7302909318e4c30ee8f", "5ae9cc448189fe783d84bdeab07049930b287061", "d45ec41b45caa8686fa1788d9191ab4044a18a83", "51f16256472a334ffb4a579de5eabd371291ca59", "99b2de508b45ecd0c3aa797cb0376b64f9665d1a", "59db29dc12c42c5e3251bbe628c873183918f416", "66a6dde6a6a20f77ce52cb2464a52777837bd81e", "1874063f90bdf435d2dcd02f62a25b646a5c2306" ], "paperAbstract": "Triangular Inequality (TI) has been used in many manual algorithm designs to achieve good efficiency in solving some distance calculation-based problems. This paper presents our generalization of the idea into a compiler optimization technique, named TI-based strength reduction. The generalization consists of three parts. The first is the establishment of the theoretic foundation of this new optimization via the development of a new form of TI named Angular Triangular Inequality, along with several fundamental theorems. The second is the revealing of the properties of the new forms of TI and the proposal of guided TI adaptation, a systematic method to address the difficulties in effective deployments of TI optimizations. The third is an integration of the new optimization technique in an open-source compiler. Experiments on a set of data mining and machine learning algorithms show that the new technique can speed up the standard implementations by as much as 134X and 46X on average for distance-related problems, outperforming previous TI-based optimizations by 2.35X on average. It also extends the applicability of TI-based optimizations to vector related problems, producing tens of times of speedup.", "pdfUrls": [ "https://research.csc.ncsu.edu/nc-caps/yding/publication/ATI.pdf", "http://doi.acm.org/10.1145/3062341.3062377" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/65de8b9003a03f58be45ead576260c19e4e549c2", "sources": [ "DBLP" ], "title": "Generalizations of the theory and deployment of triangular inequality for compiler-based strength reduction", "venue": "PLDI", "year": 2017 }, "65fef0cd15b565055f21cf8c489cae39dd569220": { "authors": [ { "ids": [ "1926783" ], "name": "George Prekas" }, { "ids": [ "27072743" ], "name": "Marios Kogias" }, { "ids": [ "1678618" ], "name": "Edouard Bugnion" } ], "doi": "10.1145/3132747.3132780", "doiUrl": "https://doi.org/10.1145/3132747.3132780", "entities": [ "B-tree", "Centralisation", "Data structure", "Data-intensive computing", "Database transaction", "IBM Tivoli Storage Productivity Center", "In-memory database", "In-memory processing", "Inter-processor interrupt", "Interrupt", "Linux", "Linux", "Multi-core processor", "Operating system", "Ralf Brown's Interrupt List", "Remote procedure call", "Scheduling (computing)", "Service-level agreement", "Shared memory", "Silo", "Small multiple", "Speedup", "Throughput", "Work-conserving scheduler" ], "id": "65fef0cd15b565055f21cf8c489cae39dd569220", "inCitations": [ "b053033ad436cd404bb0eb2e75b3aac83b70d62c", "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "325-341", "journalVolume": "", "outCitations": [ "a6e8098671ccfc1147870db90e80360654cd92aa", "032dce3c66746661c53d18f8b0dfe0f53e4485fe", "20a44558eed182a971f7add68ecc5931fbca2a65", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "1b6cb42480d298cd25e8546d25d930ae44729855", "293a3136995d2108a5b6d806aa15c0841e9ac238", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "5476797b6be75b27b7e2780a6cd61dab3e3acf87", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "2f52cbef51a6a8a2a74119ad821526f9e0b57b39", "08632fe2b934ed15d3499e7321282c81adc2c390", "03fb875d5022a5e98f19c271e2403232acc55318", "13b26d008210fffeb8a77c9e90f1ff837523c536", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "77a3133097ff59bae0b6ac8fae418a58b585dacb", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "0b877aed79939b2ba81b6dc58ce8544c6b532bcb", "377177bb82105c35e6e26ebad1698a20688473bd", "3fbba3719b3e07084cbc85daf2a1a094c9335b6d", "32855fde8a03ae157fb9399708f8f68fb3c7a88f", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "02d9013e5d370fb79ff1569a59190e18515fa3cd", "225603198cc415d363db8a8a2bd30b0df3c963b1", "7932a4597cec5149c575aa2303fe8f12241e4320", "17650831f1900b849fd1914d02337e1d006aea0c", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "3be9c5fa026b3fa887a8652a752d100b84e57451", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "3574657705475722b6c398c266805f758268778b", "6f5b24e2291e6d829f13f66e1bccedc49f43ff70", "8f8a07137d8b015fb8d3fed6ab0294c05a5a3401", "0852a44c86db434e9b51c67704636791e9940487", "226ca798b529c13605a2aa7fe75d58f4188f850a", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "4669acc56c3b6750c75263e3d03d6ca6b7705914", "1b2e68064f2bea7de1f73c4c04a061d05cb0f3e8", "065465ac37607a347186ea50873fc63d17cd2c79", "ad73deea37cad9a9b945d929a86d82d781450345", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "136eefe33796c388a15d25ca03cb8d5077d14f37", "6d44790b6d952eff28f302998e8121f90786e3ff" ], "paperAbstract": "This paper focuses on the efficient scheduling on multicore systems of very fine-grain networked tasks, which are the typical building block of online data-intensive applications. The explicit goal is to deliver high throughput (millions of remote procedure calls per second) for tail latency service-level objectives that are a small multiple of the task size.\n We present ZYGOS, a system optimized for μs-scale, in-memory computing on multicore servers. It implements a work-conserving scheduler within a specialized operating system designed for high request rates and a large number of network connections. ZYGOS uses a combination of shared-memory data structures, multi-queue NICs, and inter-processor interrupts to rebalance work across cores.\n For an aggressive service-level objective expressed at the 99th percentile, ZYGOS achieves 75% of the maximum possible load determined by a theoretical, zero-overhead model (centralized queueing with FCFS) for 10μs tasks, and 88% for 25μs tasks.\n We evaluate ZYGOS with a networked version of Silo, a state-of-the-art in-memory transactional database, running TPC-C. For a service-level objective of 1000μs latency at the 99th percentile, ZYGOS can deliver a 1.63x speedup over Linux (because of its dataplane architecture) and a 1.26x speedup over IX, a state-of-the-art dataplane (because of its work-conserving scheduler).", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132780", "https://infoscience.epfl.ch/record/231395/files/sosp17-final278.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/65fef0cd15b565055f21cf8c489cae39dd569220", "sources": [ "DBLP" ], "title": "ZygOS: Achieving Low Tail Latency for Microsecond-scale Networked Tasks", "venue": "SOSP", "year": 2017 }, "6612141b58c26278dafec422e1a695387c312d26": { "authors": [ { "ids": [ "2836704" ], "name": "Paolo Grani" }, { "ids": [ "34980091" ], "name": "Roberto Proietti" }, { "ids": [ "3319014" ], "name": "Venkatesh Akella" }, { "ids": [ "30724860" ], "name": "S. J. Ben Yoo" } ], "doi": "10.1109/HPCA.2017.17", "doiUrl": "https://doi.org/10.1109/HPCA.2017.17", "entities": [ "2.5D", "Arrayed waveguide grating", "Baseline (configuration management)", "Benchmark (computing)", "Best, worst and average case", "Bisection bandwidth", "ChIP-on-chip", "Die (integrated circuit)", "Focus stacking", "Interconnection", "Interposer", "Network on a chip", "PARSEC Benchmark Suite", "Parsec (parser)", "Router (computing)", "Simulation", "Supercomputer", "Transmitter" ], "id": "6612141b58c26278dafec422e1a695387c312d26", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "289-300", "journalVolume": "", "outCitations": [ "137ccb8d44485f4e8b47baea9ac12bf0dd40b4c8", "84a3d1c46fa5d7fbbddce9a1cd91b4d3d4e78987", "3e53ba2fed07cc7e9c2ad13ed606a41656d8dd98", "04d74957a491ba8bc99b99eebd285b0c0f0d1a22", "55b3e22b56599ed8520deb1d7cb9ac460f4fa6bb", "03cab14da0c365ee7900987e4eabe04b2cf0fd5b", "a58499f57d253ccce13151e2d81a75362bd01f31", "081bb04918747f2d43c4fe1dc718ebc185e7fac2", "d9dab715629bb9672886e3f2b833b121d583e6bc", "f0deb61875b8da30eb3bbf665fd3fc49c30171b1", "3d6e0b3d54b7f675ac12817f7cdd2da9c2134482", "6d23153ad457f1a59974bb29b27332989b6344ac", "3859dd7885188d9a579b8069d4a3a05d4498c425", "69c1e05900e8fad09da832cd3a894c446c872c7a", "0108a3544506cc114214b2e30cb3284d2ff8d035", "23b564bfb4e3f84e9676247f90781d04cd8b6c71", "76fc69f9e3615d6294b95c0187cf7bb7169999f2", "06e62419617f17bd0e63623472f529eb8702eb85", "7fc1dddbc4b958d416e8f666737fc6a163eae2b4", "dbc627f52c9dc69a56a7b89ec020634037d4edd8", "95edea61f1af2c1a3b0c723213188d6bda5efda1", "41dfc64125b0ee0568b35c486a663c8825ce07e9", "152c6ea02f20f4347a6c1a70693d0c5357e2460d", "206eb6384babfbe2e985ae8e8ecbe8b81036082f", "6c990d870ed62e5eeafa97dfe6d410429a92ea60", "fa109a5f9b489930882ea593ed6b6d5ed1a7c254", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "ae1bc1fa57bc4f444241f1e4b01cd457a35031d1", "c3b64af1a00766a27cab874c35b9eec33eb791c3", "1e35c585eb86fe12574016e6312ba1e5af974f5f", "52190efeaaf2ee1b01d5e70531f5b9549a3823e2", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "5381b30f9c97e7c699c63e55503f0a036d63aae8", "2428ef570ad3cac2cb90992af043fbc2a49f6fc1", "43034efa432f55931d0ae00fb6a4eda26ac87f1d", "325338d46b376a8fab8beb69207473feb3cf4a2a", "86549a5071a8b361d0de073a730a4c2ac4bf47db", "9bdd74fca923f2e3210e64df8e95bab6e3d5b7d5", "bfb8e3d25d508ee5d1d0636e7735095d98c75118", "8c3b449ed5e0e32e1e1934176265cec8dbc2bb4f", "190ca682f7b22fb81f2e506354c93170e9721e0c", "089b66433f1248ed3adb1c4a27936ccf2ca9a359", "aa5c04d5c6f347f5c860f01c3f2a0d27c0f8958a", "9e581c9572a94d751e31dd7c43e3a84dc1105ee6", "451901f3990e0153386ebc7869a6dc4466b552c0", "29d2b17e18a590e12cd24a5326a508ed603a4b53", "aac5360e08fe19ee26ff21f2d4e80cba8180cdc9", "44e70e8a6d10f59feabdb22e6a000c64a507a0dc", "c40813cb34012455e2c564af590e99005699fd0a", "98075d4a841667a872017a59eeb16ffb69257eaf", "97b41e36f607de371914556f012405a60065070e", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "7e5bad5fb60f43869a3fafa5bce03cffce482e25", "0586e768175bb15669c88b22a507cf7a4eeae634", "2b52c9caea52183a42848c5ac4fe7527942d7b05", "23022632ad418422d4629eb3e77996a6e4545528", "ee056454c194c401c521e9a1d57e28cf071dbf54", "13f2c51f986996787518d01dafb95999c8a4d5a8" ], "paperAbstract": "In future performance improvement of the basic building block of supercomputers has to come through increased integration enabled by 3D (vertical) and 2.5D (horizontal) die-stacking. But to take advantage of this integration we need an interconnection network between the memory and compute die that not only can provide an order of magnitude higher bandwidth but also consume an order of magnitude less power than today's state of the art electronic interconnects. Weshow how Arrayed Waveguide Grating Router-based photonic interconnects implemented on the silicon interposer can be used to realize a 16 × 16 photonic Network-on-Chip (NoC) with a bisection bandwidth of 16 Tb/s. We propose a baseline network, which consumes 2.57 pJ/bit assuming 100% utilization. We show that the power is dominated by the electro-optical interface of the transmitter, which can be reduced by a more aggressive design that improves the energy per bit to 0.454 pJ/bit at 100% utilization. Compared to recently proposed interposer-based electrical NoC's we show an average performance improvement of 25% on the PARSEC benchmark suite on a 64-core system using the Gem5 simulation framework.", "pdfUrls": [ "https://doi.org/10.1109/HPCA.2017.17", "http://sierra.ece.ucdavis.edu:29/2016/HPCA17_Grani.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6612141b58c26278dafec422e1a695387c312d26", "sources": [ "DBLP" ], "title": "Design and Evaluation of AWGR-Based Photonic NoC Architectures for 2.5D Integrated High Performance Computing Systems", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "66175149f593ef37d242da7233be8ea961a4b8b2": { "authors": [ { "ids": [ "35410424" ], "name": "Rakesh Kumar" }, { "ids": [ "27557023" ], "name": "Cheng-Chieh Huang" }, { "ids": [ "2309941" ], "name": "Boris Grot" }, { "ids": [ "2164782" ], "name": "Vijay Nagarajan" } ], "doi": "10.1109/HPCA.2017.53", "doiUrl": "https://doi.org/10.1109/HPCA.2017.53", "entities": [ "Binary file", "Boomerang", "Branch predictor", "Branch target predictor", "CPU cache", "Cache (computing)", "Control flow", "Megabyte", "Prefetcher", "Server (computing)", "Stemming", "Working set" ], "id": "66175149f593ef37d242da7233be8ea961a4b8b2", "inCitations": [ "2cf8e2e68df482672c6d89aa9a62811f2b08c8f1", "7a961b5f6e20773ee0911b580a76fe6da8d69e5b" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "493-504", "journalVolume": "", "outCitations": [ "6acd75781396e5dedcf2f06a7131ba7f3153bfb5", "313b6d6a2fe071869507ba7530aef10c91aefe11", "45ce66a661eea0e5b5780ff8cfaf6b2085dd7a1e", "416684e46272f1690410aa5f8e49a5afc5359c2c", "17abd522e3f8764f2e8889664c03894ed929f90f", "48536fdbbc79ddf163901c7e63bb70b6f64802e0", "bee7c71499f76a95e68ff6a74c4ad4a455e6c4b1", "4515f226cae120a137ff2fe5bda53fdefdfc053b", "a17c2533b497b461846f0fef89d916a8bd0ab1b0", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "a56ecaf2a44259478acf55d1865ed570d80c921b", "f29dac2e26273532c81c933f091c7a60b9480f94", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "f46d87abf6c031bd39b8e5bd36a5c3c98877ef2a", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "3078dba41b8a46ee4da87241f2e1848fde5ab7dd", "04dba1bd51e8f3348c57cb7b5148abd9f4b5aa21", "06d3f1dac818f84fdc3a5bede245b32c5490dd6b", "54b92179ede08158e2cf605f5e9f264ca06c01ff", "43644b8cd34a759e5cda4953c57dba0bb3e25805", "2735d885ecd978ff5b8204be249169e4b824fb6f", "2cbca64fbcd3eb397cf918b2df6ceac3f579efdd" ], "paperAbstract": "Contemporary server workloads feature massive instruction footprints stemming from deep, layered software stacks. The active instruction working set of the entire stack can easily reach into megabytes, resulting in frequent front-end stalls due to instruction cache misses and pipeline flushes due to branch target buffer (BTB) misses. While a number of techniques have been proposed to address these problems, every one of them requires dedicated metadata structures, translating into significant storage and complexity costs. In this paper, we ask the question whether it is possible to achieve high-performance control flow delivery without the metadata costs of prior techniques. We revisit a previously proposed approach of branch-predictor-directed prefetching, which leverages just the branch predictor and BTB to discover and prefetch the missing instruction cache blocks by exploring the program control flow ahead of the core front-end. Contrary to conventional wisdom, we find that this approach can be effective in covering instruction cache misses in modern CMPs with long LLC access latencies and multi-MB server binaries. Our first contribution lies in explaining the reasons for the efficacy of branch-predictor-directed prefetching. Our second contribution is in Boomerang, a metadata-free architecture for control flow delivery. Boomerang leverages a branch-predictor-directed prefetcher to discover and prefill not only the instruction cache blocks, but also the missing BTB entries. Crucially, we demonstrate that the additional hardware cost required to identify and fill BTB misses is negligible. Our experimental evaluation shows that Boomerang matches the performance of the state-of-the-art control flow delivery scheme without the latter's high metadata and complexity overheads.", "pdfUrls": [ "http://www.research.ed.ac.uk/portal/files/29959353/BoomerangPreprint_1.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.53" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/66175149f593ef37d242da7233be8ea961a4b8b2", "sources": [ "DBLP" ], "title": "Boomerang: A Metadata-Free Architecture for Control Flow Delivery", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "662ae744ba4d16943703b1d57d153ed3e659f938": { "authors": [ { "ids": [ "2824771" ], "name": "Dmitry Kogan" }, { "ids": [ "22066600" ], "name": "Nathan Manohar" }, { "ids": [ "1752788" ], "name": "Dan Boneh" } ], "doi": "10.1145/3133956.3133989", "doiUrl": "https://doi.org/10.1145/3133956.3133989", "entities": [ "Algorithm", "Android", "Authentication", "Hash chain", "One-time password", "Password", "S/KEY", "Server (computing)", "Server-side" ], "id": "662ae744ba4d16943703b1d57d153ed3e659f938", "inCitations": [], "journalName": "", "journalPages": "983-999", "journalVolume": "", "outCitations": [ "700a1debdb8c93cdd31b44164027614c45d7c92d", "2c2e30e89e55046f9dcb772b48d1b23a1447ad45", "72d745cbfc9e7b3278afcdbdf1f9282dd54c1371", "a5816e661bcd23d03c71c75a8687b2113061a936", "135774a89961f47118b5c2d77458814021c08c30", "132848893d16f7e62e79910196652c3d4aeec164", "f0ba66072ac10d9898b8a79171ec726d45ec804b", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "ec8e96b39f1a96d63a8159e8bd297402630c6ac2", "c362289b4a3a7c373d4765d43e6b8dec67b7ee25", "da7190275f46d79e90aa6ef86fffac00bf699cb1", "418e058c0dd22b18994ebdba8bd4713bf92588f7", "0098093f022a27d77ceed592310d8522a4dd9917", "492f6c880740250e4b1457911e1c56b6e9a4cb16", "41f5018cc280099215b5fec28f4ff5ea1e392d0a", "8fe8d2886d3f578d6544146edf2ba8a084b49194", "16624a1b99db5311fb82968e5bd8266634766837", "37e85823a2a1761e489acaa67ced016ae06c6f5b", "10ea51fec6da43703b9a5935a278dca952e20087", "d94d87786fd8cbab2c88d005ecdfce61c2f10f39", "27c2dda2616f7fba3ee98c47ed489c962ed1e223", "80ab83cc65002d89cb2198460dd83f4a8377b04c", "162677cf8b2bd9bc2cfacbc7b83c5d150cbb82ab", "2459b15dcd7c8d383980c0a118c0983d4ec010d5", "2bc0144228ecee16e40dc94a085cd85bb1541c8e", "54cb4579fe00965cc93901e00d731be377fc984d", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "0c30ca2885116c5d326afc5b144b088270f1cce2", "1d9d99cae9e60745bf7b7d6a7e6ae1e4eded7cbc", "71bfe597377e58030c231d2cc7565293927701eb", "004c7314e9227275a104f99b6bd7c87c0aff710b", "1f301b4c37e7c30f46014f5999652785a7c61987", "49fe12779c2cb3a6537105a0ce71fe0cc60b253c", "a1836810bddf25f8086d64c15c11369cf3fb7979", "6c03ec6e6a1d1a9183270fb266bd976e191d40b6", "2b4640842aa0e4566840901c0c1fd9d4572d2f1b", "63ba01ca7412b9359554a95f647e00be5064aa05", "0b70652541cb408152c468eaea7b114dc65beab1", "37a420adcf2098662480d09718dfc554289c3d12", "2e8558ca84da6fd30a9800c4eb5f70bb312e9eb4", "88dfa31abf1474407f2132ed911f52ec59b49ec6", "4a2fceb2b6355c3023683d0004a94797408e2c61", "beafabdbd7df228bbdd2a3bb5a463d5869b84983", "0e982205396cae6d35a114e1a1e96a9e1766b19e", "217230b8bc6c3515f2a0043b96fbc6bc0b41f7f8", "fabff2ac5b3a15dcce5325e808a6671d0aafc3d1" ], "paperAbstract": "Time-based one-time password (TOTP) systems in use today require storing secrets on both the client and the server. As a result, an attack on the server can expose all second factors for all users in the system. We present T/Key, a time-based one-time password system that requires no secrets on the server. Our work modernizes the classic S/Key system and addresses the challenges in making such a system secure and practical. At the heart of our construction is a new lower bound analyzing the hardness of inverting hash chains composed of independent random functions, which formalizes the security of this widely used primitive. Additionally, we develop a near-optimal algorithm for quickly generating the required elements in a hash chain with little memory on the client. We report on our implementation of T/Key as an Android application. T/Key can be used as a replacement for current TOTP systems, and it remains secure in the event of a server-side compromise. The cost, as with S/Key, is that one-time passwords are longer than the standard six characters used in TOTP.", "pdfUrls": [ "http://arxiv.org/abs/1708.08424", "https://arxiv.org/pdf/1708.08424v1.pdf", "https://acmccs.github.io/papers/p983-koganA.pdf", "http://doi.acm.org/10.1145/3133956.3133989" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/662ae744ba4d16943703b1d57d153ed3e659f938", "sources": [ "DBLP" ], "title": "T/Key: Second-Factor Authentication From Secure Hash Chains", "venue": "CCS", "year": 2017 }, "662e9be63ddb50d12e1c42c738efd64aaf94f989": { "authors": [ { "ids": [ "40286771" ], "name": "Ander Galisteo" }, { "ids": [ "1683741" ], "name": "Qing Wang" }, { "ids": [ "5466419" ], "name": "Aniruddha Deshpande" }, { "ids": [ "1955669" ], "name": "Marco Zuniga" }, { "ids": [ "2485577" ], "name": "Domenico Giustiniano" } ], "doi": "10.1145/3143361.3143371", "doiUrl": "https://doi.org/10.1145/3143361.3143371", "entities": [ "Experiment", "Global Positioning System", "Holism", "Indoor positioning system", "Line-of-sight (missile)", "Requirement", "Robot", "Sensor", "Simulation", "Video game localization" ], "id": "662e9be63ddb50d12e1c42c738efd64aaf94f989", "inCitations": [], "journalName": "", "journalPages": "187-198", "journalVolume": "", "outCitations": [ "cf90fe09b2e77a7cf46f99a83f24e7153bba3c16", "0d9158bd3019a828ee3ebe3a1b502fbde2258297", "18b94ae2f53920d884f77e9aa8a32c80f3005759", "559c132d3405ec18dbe6e4ebf5f27eba12ad7208", "8911bfcaf2b80eac51e150b1f1eb2d96e25f1154", "5b7a6c35b258d2c32c09f0377b0c79ba02c9a9a3", "0db91ba1dcaf3b6f0c56074d1336f18ef4294d07", "e5532c6958e3305adb328212fd2636968e6c966c", "52b5f461e40d9e14eaef749a6b9b7f0cfdd2bfb9", "1e37384874c84acc7919176d4e9598e9116da2ee", "786b684d577ae57aa2fbc7d1fb0870ad86b998b5", "b5d8b259052ffecd1fcf3eae9b08e31b41c24ec0", "0bde8e16c46f48030a4084970d1611fc11ca5d55", "1f911ae809066d4a55598bce939a466de980b13b", "1f09df83223633ef096e1b23748f7fbce72bfb26", "6ba8c478bca4434e100e209b2204128dc2002703", "b12467f1e41ae037ad9a332a367b5f40b6498cda", "e4b778c4ddbd88b83d74ef2c2ed02bbdbe0823b2" ], "paperAbstract": "Visible light is gaining significant attention as a medium to achieve accurate relative localization. Most of the studies in the area focus on indoor positioning and rely on two important assumptions: (i) lights are static, and (ii) the receiver has line-of-sight with multiple lights. These requirements limit the application of localization methods in scenarios where nodes have a single light and are mobile, such as motorbikes or swarms of robots. In general, this particular type of scenarios (single lights moving on a plane) leads to under-determined localization systems where no unique solution can be found. We follow a holistic approach that includes theory, simulations, and experiments to overcome some of the limitations present in such type of scenarios. Our theoretical and simulation results show that if nodes are enhanced with sensors providing relative directions (such as compasses), we can derive dependencies in the system to obtain unique solutions. Our proof-of-concept implementation validates our model by showing that single lights can provide relative localization with high accuracy: an average error below 5 cm.", "pdfUrls": [ "http://wwwtmp.st.ewi.tudelft.nl/marco/files/mobileVLCLoc_CoNEXT17.pdf", "http://doi.acm.org/10.1145/3143361.3143371" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/662e9be63ddb50d12e1c42c738efd64aaf94f989", "sources": [ "DBLP" ], "title": "Follow that Light: Leveraging LEDs for Relative Two-Dimensional Localization", "venue": "CoNEXT", "year": 2017 }, "66551f711a1f585d944ea3379a51a6ae4bf92a88": { "authors": [ { "ids": [ "3384952" ], "name": "Chunliang Hao" }, { "ids": [ "1719912" ], "name": "Jie Shen" }, { "ids": [ "4178789" ], "name": "Celia Chen" }, { "ids": [ "39712796" ], "name": "Heng Zhang" }, { "ids": [ "7860923" ], "name": "Yanjun Wu" }, { "ids": [ "38255368" ], "name": "Mingshu Li" } ], "doi": "", "doiUrl": "", "entities": [ "Baseline (configuration management)", "Centralisation", "Cluster state", "Fan-out", "Gang scheduling", "Jumpstart Our Business Startups Act", "Scheduling (computing)", "Simulation" ], "id": "66551f711a1f585d944ea3379a51a6ae4bf92a88", "inCitations": [ "0784846954b6b162fa60eaa5be28b825748f6ee9", "6156334bcd7eba6cf44c39f6842b546457d56b05" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "599-608", "journalVolume": "", "outCitations": [ "027bd50767a7f61fb0fc3c27051a63b209c10a99", "3e257f01e3ee71545d824a1615c35659525b856a", "4eab97d0d1c75641671aa5b7761978322d904c5c", "be8cb70c82f2dca180c7753590b7a8e6ee576ded", "090599a2caf4591c87699ad850c75554cd712937", "835916e7ad1231d5aa2985340b0ee543cadbb5b6", "11b12a29a9efb60a892b48fc61e70ab63e59b37e", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "11310368999afdce94bca4316eea38216b2446c5", "234e6be0d4238f76b3ac038ee422be39f391c625", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b" ], "paperAbstract": "As a promising alternative to centralized scheduling, sample-based scheduling is especially suitable for high fan-out workloads that contain a large number of interactive jobs. Compared to centralized schedulers, existing sample-based schedulers do not hold a global view of the cluster's resource status. Instead, the scheduling decisions are made solely based on the status of a small set of randomly sampled workers. Although this simple approach is highly efficient in large clusters, the lack of global knowledge of the cluster can lead to sub-optimal task placement decisions and difficulties in enforcing global scheduling policies. In this paper, we address these challenges in existing sample-based scheduling approaches by allowing the scheduler to maintain an approximate version of the global resource status through caching the worker node's status extracted from reply messages. More specifically, we introduce the private cluster-state technique (PCS) for the scheduler to obtain such global information. We show that the scheduler can make better scheduling decisions by utilizing PCS and the scheduler can become more capable in enforcing global scheduling policies. The use of PCS is of low cost since it does not initiate new communication in sample-based scheduling. Our approach is implemented in PSCSampler, a full distribute sample-based scheduler, which gains global knowledge from PCS. Experiment results from both simulation runs and Amazon cluster runs show that compared to Sparrow, PCSsampler can significantly reduce both 50th percentile and 90th percentile runtime. The firsttime success rate of PCSsampler in gang scheduling is closer to an omniscient centralized scheduler than baseline sample based scheduler.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101193" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/66551f711a1f585d944ea3379a51a6ae4bf92a88", "sources": [ "DBLP" ], "title": "PCSsampler: Sample-Based, Private-State Cluster Scheduling", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "66ace9694c92cbfad85ca3fccb79215b44b6d126": { "authors": [ { "ids": [ "39797395" ], "name": "Pengfei Zou" }, { "ids": [ "40278775" ], "name": "Tyler Allen" }, { "ids": [ "11023982" ], "name": "Claude H. Davis IV" }, { "ids": [ "1781155" ], "name": "Xizhou Feng" }, { "ids": [ "38342948" ], "name": "Rong Ge" } ], "doi": "10.1109/CLUSTER.2017.98", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.98", "entities": [ "Computer cluster", "Concurrency (computer science)", "Haswell (microarchitecture)", "Intel Core (microarchitecture)", "Memory module", "Multi-core processor", "Parallel computing", "Power management", "Power supply", "Scalability", "Scheduling (computing)" ], "id": "66ace9694c92cbfad85ca3fccb79215b44b6d126", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "541-551", "journalVolume": "", "outCitations": [ "77f826132cf09ac91ea9c859387a8d52221a019a", "7e48637082584703bae2dfc83953e7ff0c32e9b6", "35bc9e9d0a8c0451c47131875e88d7c20f28aa92", "7e757fff66a63b268da83ffccf464437492ac8b6", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "89d4dcc25809693fa3505d09b2721c1c2c2559b2", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "a47b408349a8146f71cb54c38226d2f7d92700fe", "cedeaf86f2e06e4ab3b218aaf6cdfe65e2d9cbe1", "efee61acb1847de685817b7d9bc1b6b095ef5026", "7d52953086089b85db2bedb16f56790b9116a2b6", "15860f9f774f19f245f016d9cf479222e4f9a6ba", "a9831b6062fb678d4591eee853e81116d038bb05", "b04391910d19d2d0c64b62d300927f527417414e", "4a6bf6c38051ec5f81be18de75e8ecb6e5e72c06", "1ea7d63617a0fdc5eadf37596d00688615565351", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "00cf571f4060063b79e7f64eab42b1ef064660f4", "610d61a4543bdb1109de0e5f9760d44e44e6014d", "9a000edf8d478fa3b0d7f74fb966664da5d33354", "1f5b507c038b09f017bffd51d4f4e4257bef6ef4", "1031ac970dfc4afd1cda54aca8f6ddce234edc89", "3462fb38042f0bde20c758728d7c8c28a1f47e09", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "86abc95269643d9ce18286f896b486bb3026f1ee", "1726f30174b09c0ba28899e81b00a1e3305e52a5", "87a34f805b3316ac75c6b3110d36a4bc576ac063", "9efa7f12bfd9d8ed38c29c5e128b21b07a438cd9", "1fd674f96ef677bf09d7538673eda576aa8102c9", "14bd3627a85b658ea1b8450039df7fe0fb57379e", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "f6ab527a5919b48b66908954a3086947c5bffde6", "0e8e26e9b86b8bc74997e6a28aeb49c0e8a31404" ], "paperAbstract": "High performance computing systems will need to operate with certain power budgets while maximizing performance in the exascale era. Such systems are built with power aware components, whose collective peak power may exceed the specified power budget. Cluster level power bounded computing addresses this power challenge by coordinating power among components within compute nodes and further adjusting the number of participating nodes. It offers more space to increase system performance by utilizing the available power budget more efficiently within and across the nodes.In this paper, we present the design of a hierarchical multi-dimensional power aware allocation framework, CLIP, for power bounded parallel computing on multicore-based computer clusters. The framework satisfies the specified power bound by managing the power distribution among nodes at the cluster level, and among sockets, cores and NUMA memory modules at the node level. The power allocation is enforced with multiple complementary power management techniques, including memory power level setting, thread concurrency throttling, and core-thread affinity. We present an application characterization method based on applications' scalability and an associated performance model, which can accurately determine the optimal number of participating compute nodes and components, and their power distribution for given applications. Experimental results on a Haswell-based computer cluster show that the proposed scheduler outperforms compared methods by over 20% on average for various power budgets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.98" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/66ace9694c92cbfad85ca3fccb79215b44b6d126", "sources": [ "DBLP" ], "title": "CLIP: Cluster-Level Intelligent Power Coordination for Power-Bounded Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "670f07398f57eed1cfac9afddf72491c61c3d4ce": { "authors": [ { "ids": [ "32808878" ], "name": "Hoby Rakotoarivelo" }, { "ids": [ "40446217" ], "name": "Franck Ledoux" }, { "ids": [ "34640015" ], "name": "Franck Pommereau" }, { "ids": [ "2505423" ], "name": "Nicolas Le Goff" } ], "doi": "10.1007/978-3-319-64203-1_43", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_43", "entities": [ "Algorithm", "Computer graphics (computer science)", "Data dependency", "Graph coloring", "Haswell (microarchitecture)", "Heuristic", "Independent set (graph theory)", "Locality of reference", "Manycore processor", "Multi-core processor", "Parallel computing", "Relevance", "Scalability" ], "id": "670f07398f57eed1cfac9afddf72491c61c3d4ce", "inCitations": [], "journalName": "", "journalPages": "594-606", "journalVolume": "", "outCitations": [ "e4b3b9bc7ac1aba7633e1a6e0495109d93fcbc38", "98068ae0dd54ae7faa70a621dbe297247285a038", "006cadd0e54581d34cfde4651ff957572eb395a3", "465c5a68c3c62f6fb949acfd7921a9072a29841b", "30d8c6e4b80f8fb1c6487a7442537fd963d1a0e9", "8551e1bb02a6aba901abaa4d917ab3e2a2d86fad", "cd5446a98025167bf08e1aba281490bf2f0a3382", "82d51c5b65901e9bf1f5dca6b900d57d065ca983", "86acce8933fba76c346e767f4baf3b340fb8b4d2", "895f1bb9100279f7bfbc2925176a407112be3691", "db99280e049ba3152d8afb89c00b1265f9c1e6c6", "c51cf8c0b023d7f3a9de5048fb25c6bbe446c3fd" ], "paperAbstract": "In this paper, we present a fine-grained multi-stage metricbased triangular remeshing algorithm on manycore and NUMA architectures. It is motivated by the dynamically evolving data dependencies and workload of such irregular algorithms, often resulting in poor performance and data locality at high number of cores. In this context, we devise a multi-stage algorithm in which a task graph is built for each kernel. Parallelism is then extracted through fine-grained independent set, maximal cardinality matching and graph coloring heuristics. In addition to index ranges precalculation, a dual-step atomic-based synchronization scheme is used for nodal data updates. Despite its intractable latencyboundness, a good overall scalability is achieved on a NUMA dual-socket Intel Haswell and a dual-memory Intel KNL computing nodes (64 cores). The relevance of our synchronization scheme is highlighted through a comparison with the state-of-the-art.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_43" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/670f07398f57eed1cfac9afddf72491c61c3d4ce", "sources": [ "DBLP" ], "title": "Scalable Fine-Grained Metric-Based Remeshing Algorithm for Manycore/NUMA Architectures", "venue": "Euro-Par", "year": 2017 }, "672eb90f2ef434c8c6d67d72789cd76a19fc754f": { "authors": [ { "ids": [ "2679256" ], "name": "Vikram K. Narayana" }, { "ids": [ "2161169" ], "name": "Shuai Sun" }, { "ids": [ "7192427" ], "name": "Armin Mehrabian" }, { "ids": [ "1995822" ], "name": "Volker J. Sorger" }, { "ids": [ "1680555" ], "name": "Tarek A. El-Ghazawi" } ], "doi": "10.1109/ICPP.2017.22", "doiUrl": "https://doi.org/10.1109/ICPP.2017.22", "entities": [ "Benchmark (computing)", "Design space exploration", "Emergence", "Hybrid drive", "Mesh networking", "Network on a chip", "Optical interconnect", "Simulation", "Throughput" ], "id": "672eb90f2ef434c8c6d67d72789cd76a19fc754f", "inCitations": [ "65f388c686d5fa56331d09b6ddf380cf6b82f3fe", "779bfd1ab67264fc3e0d9817c8bc30bae569ee3c" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "131-140", "journalVolume": "", "outCitations": [ "514de1e332b99d2f0a669e3efb97c4ccb8e3bf66", "0108a3544506cc114214b2e30cb3284d2ff8d035", "620b58a826eac90c4f07c50636d3758e3951ba57", "5f65b94fd282f822155fe45921e005611620f860", "7ec2901a65ab52450f7e3321a8e415878e438f3f", "537e22296e1eead16dfce7386572b9f77e9da2b0", "2f1a0dff0476556c43f2bb6922dfb41696202752", "1f25affeadbf77acd17b2e72e76566d5144d2703", "27c11aafa1bdb48228474f7780c5f22022a1a7b9", "40eeb2b0c99c1b7f5db28611bebb34e3e18f8925", "5d25b4a77268437aa669e272cc81b56ed184e0b6", "c353b7d54324582487506f8670b922ed98a1ba30", "aa5c04d5c6f347f5c860f01c3f2a0d27c0f8958a", "3f4d5667e04958569dff72966a93450decfd256f", "dd06b7da2cb8d1d493de999f5db62568c534b46a", "885568ca0a036a4969eb8dfbc79cca2b75be5c14", "32a93857c0cff135b821cebc3fcca7b3a36e6085", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "373b88e34295875fdab7f6cdee1438edbd0571cb", "31d0b90f43a78ccbed0133d72d7c19c4340dfea5" ], "paperAbstract": "As we move towards an era of hundreds of cores, the research community has witnessed the emergence of optoelectronic network on-chip designs based on nanophotonics, in order to achieve higher network throughput, lower latencies, and lower dynamic power. However, traditional nanophotonics options face limitations such as large device footprints compared with electronics, higher static power due to continuous laser operation, and an upper limit on achievable data rates due to large device capacitances. Nanoplasmonics is an emerging technology that has the potential for providing transformative gains on multiple metrics due to its potential to increase the light-matter interaction. In this paper, we propose and analyze a hybrid opto-electric NoC that incorporates Hybrid Plasmonics Photonics Interconnect (HyPPI), an optical interconnect that combines photonics with plasmonics. We explore various opto-electronic network hybridization options by augmenting a mesh network with HyPPI links, and compare them with the equivalent options afforded by conventional nanophotonics as well as pure electronics. Our design space exploration indicates that augmenting an electronic NoC with HyPPI gives a performance to cost ratio improvement of up to 1.8×. To further validate our estimates, we conduct trace based simulations using the NAS Parallel Benchmark suite. These benchmarks show latency improvements up to 1.64×, with negligible energy increase. We then further carry out performance and cost projections for fully optical NoCs, using HyPPI as well as conventional nanophotonics. These futuristic projections indicate that all-HyPPI NoCs would be two orders more energy efficient than electronics, and two orders more area efficient than all-photonic NoCs.", "pdfUrls": [ "https://arxiv.org/pdf/1703.04646v1.pdf", "http://arxiv.org/abs/1703.04646", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/672eb90f2ef434c8c6d67d72789cd76a19fc754f", "sources": [ "DBLP" ], "title": "HyPPI NoC: Bringing Hybrid Plasmonics to an Opto-Electronic Network-on-Chip", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "6730d8ec15d3d5e5c558b7bc85c8bf4dd9e66a44": { "authors": [ { "ids": [ "3172870" ], "name": "Masatoshi Kawai" }, { "ids": [ "2623409" ], "name": "Akihiro Ida" }, { "ids": [ "2741369" ], "name": "Kengo Nakajima" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.18", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.18", "entities": [ "Algorithm", "Automatic parallelization", "Bayesian information criterion", "Conjugate gradient method", "Graph coloring", "Hierarchical clustering", "Numerical analysis", "Parallel computing", "Preconditioner" ], "id": "6730d8ec15d3d5e5c558b7bc85c8bf4dd9e66a44", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "138-145", "journalVolume": "", "outCitations": [ "97ec26b99f10a3b7a87cbf042383a8d4b9a4e4fe", "67bf7dd26a41855f92ef8403a1edfae8823ba82c", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "208c08d11d142441ef2efb601ac0634191bf3053", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "03cb4d8e458c7e3bbea7ee68b52c7534ed05028e", "1201aa64ab230559db7753bcabf554f47656a4e5", "cfd4d6742259c99da2894deb5fe41029ba2f6495", "4f3caa5573b4c1ebef7c3ee6b9f7643e689c858e", "630b514e68c0de62fa3dca5a45e3131f1515c90c", "3e1bfe26ae904c6949111aab499e8ebbb8344cf8" ], "paperAbstract": "The block incomplete Cholesky(BIC) is one of the most useful preconditioners for the conjugate gradient(CG) method. Coloring algorithms are often used for parallelizing the BIC preconditioner, but they are the sequential algorithms. In this paper, we proposed a hierarchical parallelization for coloring methods. Proposed method is versatile. We demonstrate that this method parallelize traditional multi-coloring algorithms. Our numerical results confirm that the proposed method does not change the properties of the coloring methods.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6730d8ec15d3d5e5c558b7bc85c8bf4dd9e66a44", "sources": [ "DBLP" ], "title": "Hierarchical Parallelization of Multi-coloring Algorithms for Block IC Preconditioners", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "673c4ec59d1d13047199ab19e6d5906d1493bb6a": { "authors": [ { "ids": [ "2953192" ], "name": "Chih-Ya Shen" }, { "ids": [ "2839490" ], "name": "Liang-Hao Huang" }, { "ids": [ "1731140" ], "name": "De-Nian Yang" }, { "ids": [ "2426757" ], "name": "Hong-Han Shuai" }, { "ids": [ "1686360" ], "name": "Wang-Chien Lee" }, { "ids": [ "1691171" ], "name": "Ming-Syan Chen" } ], "doi": "10.1145/3097983.3097995", "doiUrl": "https://doi.org/10.1145/3097983.3097995", "entities": [ "Algorithm", "NP (complexity)", "Polynomial", "Social Networks", "Social network", "TERA", "The Open Group", "Time complexity" ], "id": "673c4ec59d1d13047199ab19e6d5906d1493bb6a", "inCitations": [], "journalName": "", "journalPages": "415-424", "journalVolume": "", "outCitations": [ "5da861b2fdb4e786c377c5d64993e09fc0a9719d", "7fda7df47cf271dc9c7c677c0ea2c6316b1c8d03", "fe7eb317edb35f6a495abd615fe0e2ea3ff4a5c5", "9b16dd06ce2d6008ba33de67efda17b57651da36", "12ace251fb0c2cd324f3cfcbb0bfa03028860e33", "1daa0130bb161e24e01f96a05e8959b6b571d4df", "10e44c294a968ca91e361fac44aa9d0f2cdf3bd3", "850ad780b38b11a633f7e1aacc065e19f01b770c", "2caacebad4acfad4809190b0bc9642f027aa2f0d", "d72150c7113614c47a48962110bc7a4f2a8b9bc2", "0eaf38cd3d7c7fb456201d59b6d28b084010d358", "01b0da58206ea2c10b5feca83d315d22f7abb67a", "a5168b3089e563b20f77c85db51f5362374dd74e", "697954414900a0a1618664015d802af9ac68d548", "35a156f757466dbc686e4c75290383443b8efe90", "9bf85d7a33e431d85fbf52ad55645668640f37a1", "34affc64b7f26520fbb96db3d87d177c831cc21b", "04cf0a900a4e9110e788e1f6f5f0bb85ae8ea905", "242cbdb338c47596f1219203af9a3a17209ce268", "c846a973b8787a75755e99be946fbf5003684ae7", "319e439675c2d2f56bd2dfbb9836191a57b9ac36", "007f3290e1b5e3061a8b7089037ee775efc47b83", "a34d24defaef3ecc36883540d2180535f87d11f0", "308858af48dd3dc2248ae18cb74c04462d6ee841", "00b7ffd43e9b6b70c80449872a8c9ec49c7d045a", "6035d6123b94d65ce7cc25d0fa95680f550bdc31", "9cf785889f13260a791f1106fd7b16f1390002f1" ], "paperAbstract": "Existing research on finding social groups mostly focuses on dense subgraphs in social networks. However, finding socially tenuous groups also has many important applications. In this paper, we introduce the notion of k-triangles to measure the tenuity of a group. We then formulate a new research problem, Minimum k-Triangle Disconnected Group (MkTG), to find a socially tenuous group from online social networks. We prove that MkTG is NP-Hard and inapproximable within any ratio in arbitrary graphs but polynomial-time tractable in threshold graphs. Two algorithms, namely TERA and TERA-ADV, are designed to exploit graph-theoretical approaches for solving MkTG on general graphs effectively and efficiently. Experimental results on seven real datasets manifest that the proposed algorithms outperform existing approaches in both efficiency and solution quality.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097995" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/673c4ec59d1d13047199ab19e6d5906d1493bb6a", "sources": [ "DBLP" ], "title": "On Finding Socially Tenuous Groups for Online Social Networks", "venue": "KDD", "year": 2017 }, "677f89fa65ada1f7bfd82b5de2737b6259aee240": { "authors": [ { "ids": [ "37526735" ], "name": "Marcus Carvalho" }, { "ids": [ "1732501" ], "name": "Francisco Vilar Brasileiro" }, { "ids": [ "2977061" ], "name": "Raquel Vigolvino Lopes" }, { "ids": [ "39465215" ], "name": "Giovanni Farias" }, { "ids": [ "16823153" ], "name": "Alessandro Fook" }, { "ids": [ "16251842" ], "name": "Jo\u00e3o Mafra" }, { "ids": [ "2378550" ], "name": "Daniel Turull" } ], "doi": "", "doiUrl": "", "entities": [ "CPU socket", "Central processing unit", "Cloud computing", "Enterprise resource planning", "High availability", "Provisioning", "Service-level agreement" ], "id": "677f89fa65ada1f7bfd82b5de2737b6259aee240", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "160-169", "journalVolume": "", "outCitations": [ "1f1cb93ad429d895ca37b082b901e03f54ef0994", "3e257f01e3ee71545d824a1615c35659525b856a", "2ddd179040a880aa059c1f02d6f49af776e86e69", "8f4820b2d5817994074c92904dfc0ba4065533cd", "c97e16e4c868a684011c53bf93d97da305d5af01", "d8057d514036d51051af78476468fe350cb7488a", "3a043714354fe498752b45e4cf429dbae0fb2558", "08e3ca8c996dae7004aea31c80c73c730fc04314", "0dc346b58cfb5007b7b31d14a80ab9692049767f", "4909a3b5883e38a4264e60cc76f241e197aab0fa", "895f74d91280f865e5d7e2187dd7c5c6913eea25", "53f98e69b914056040ff7a3400690abd41e100be", "85aef1f96c38139cd4fdb035bc105cbfee292154", "000779a4e324aabb1dfe8b76206e201da8b3fbff", "3fddb3bfb22b4fdd4668e587ca30c8997f733b5b", "8730033f32fbcca2c82559fa0c218143c707d7f7", "26c5818349f8b79ed3b3ba3341c9ff0b14c28d2f" ], "paperAbstract": "Infrastructure as a Service (IaaS) providers typically offer multiple service classes to deal with the wide variety of users adopting this cloud computing model. In this scenario, IaaS providers need to perform efficient admission control and capacity planning in order to minimize infrastructure costs, while fulfilling the different Service Level Objectives (SLOs) defined for all service classes offered. However, most of the previous work on this field consider a single resource dimension – typically CPU – when making such management decisions. We show that this approach will either increase infrastructure costs due to over-provisioning, or violate SLOs due to lack of capacity for the resource dimensions being ignored. To fill this gap, we propose admission control and capacity planning methods that consider multiple service classes and multiple resource dimensions. Our results show that our admission control method can guarantee a high availability SLO fulfillment in scenarios where both CPU and memory can become the bottleneck resource. Moreover, we show that our capacity planning method can find the minimum capacity required for both CPU and memory to meet SLOs with good accuracy. We also analyze how the load variation on one resource dimension can affect another, highlighting the need to manage resources for multiple dimensions simultaneously.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101133" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/677f89fa65ada1f7bfd82b5de2737b6259aee240", "sources": [ "DBLP" ], "title": "Multi-dimensional Admission Control and Capacity Planning for IaaS Clouds with Multiple Service Classes", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "6796b517d20cbfab9129cd7a504ce021d36670b4": { "authors": [ { "ids": [ "2624637" ], "name": "Xiaoyang Qu" }, { "ids": [ "1776259" ], "name": "Jiguang Wan" }, { "ids": [ "2495855" ], "name": "Fengguang Song" }, { "ids": [ "14708599" ], "name": "Xiaozhao Zhuang" }, { "ids": [ "40362296" ], "name": "Fei Wu" }, { "ids": [ "2072948" ], "name": "Changsheng Xie" } ], "doi": "10.1109/ICPP.2017.30", "doiUrl": "https://doi.org/10.1109/ICPP.2017.30", "entities": [ "Algorithm", "Computation", "Data center", "Online and offline", "Optimal matching", "Performance per watt", "Power optimization (EDA)", "Power supply", "Program optimization", "Quality of service", "Scheduling (computing)", "Semiconductor consolidation", "Solar cell", "Spatial variability" ], "id": "6796b517d20cbfab9129cd7a504ce021d36670b4", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "211-220", "journalVolume": "", "outCitations": [ "41e0575161f521fd377c116425ea92515af52330", "7c838b6ef20b12c82074e89b1fd57a81037e7836", "19908640236767427ebf0524dc3a4bb09d65145e", "1f809df0c69e4962caf2fd85a1fc59b60f640a03", "149cf93575957afa16010fd28677ae8b2da214a1", "536095c3b712a60e197cbe136e785a159c697dd7", "68f5570eb032f7276ca1b68d3089802d224770e1", "8d48628223451c494346d0a2c9a0b47eb379a546", "f05231b34690f92cbee73ccf0eb6104725b79d53", "2219893dfcd0c9a9c2769530de9898d6868ba25b", "1b5fc099265ca15af90d28d71c5e2ca6c5875571", "1ff586720a08c814b70b174ec47b61567dfb7155", "f44e3cc59eb40659ac704c1009ecb25a484a44e4", "075a63db43b68a76a40ac6bec19416d0c2099b51", "534013446a2e17af52f7bec56195156702563d55", "026a63d57667f92b0f1823aff099f2dc88cf64d4", "ad55a50de2d14e6078fce0bcfc6b69c19dc11890", "8ea2b1904ffca328d982539c3eafd8113325c23a", "5666f54831c0a2181454c2b072e736a8a644fa74", "38b656941f8cf65a6b2ae6e3f672601a2b092bf5", "419b69ce843504543138c2e88b1b5203fd354240", "394cc3b1bfc88d7bbe4b0f120004d95d0c966df1", "0f457b7f93736da38cdcffcbd76ae9cd7a0a6749", "02a4a0e1dad1a474efaf71e52c212f8a8bda1c5b", "a04f070b9ca16bac04ee76b54a1da5eca52e1cc2", "497a80b2813cffb17f46af50e621a71505094528", "2dd1c9744d79604b8728d404fc1a4382022db4a4", "c81f5ae0bbd06040cdcda52084d8647d6b6a60f4", "33c862341042a974979e103d57e1ee135167fbde", "407ca087b7e510e61a2053c6426234b6545cd19d", "5f6ae1d342411bcae2a1dbec79a4ad590f327bb2" ], "paperAbstract": "To reduce energy consumption and carbon emission, many data centers have deployed (or anticipate to build) their own renewable-energy power plants. However, the renewable energy (such as wind, tide, and solar energy) has the serious issues of intermittency and variability that prevent the green energy from being utilized effectively in practice. To cope with the issues, new power-supply management policies and workload scheduling algorithms have been designed. However, most existing work focuses on power optimization on computation only. In this paper, we introduce a novel scheme called OptiMatch to optimize the match between the power supply and the user-workload demand for massive storage systems that are mostly powered by renewable energy sources. OptiMatch has a hierarchical architecture, which consists of a number of heterogeneous storage devices. OptiMatch systematically utilizes the performance disparities between heterogeneous storage devices (i.e., performance per watt, IOPS/watt) to split the process for every write request into two stages: an on-line stage and a deferred off-line stage. The deferred off-line requests are used to match the green energy supplies. To maximize green energy utilization and minimize power budget without sacrificing quality of service, the fundamental methodology is to make the aggregate power supplies be proportional to the I/O workload demand at any time. To this end, our OptiMatch employs novel co-design optimizations. (1) We propose a dual-drive power control approach that makes the number of active nodes proportional to the workload demand when the green power supply is insufficient, meanwhile be proportional to the green power supply when green power is sufficient. (2) During periods of insufficient green supplies, we exploit virtualization consolidation schemes which enable a fine-grained power control to minimize the grid budgets. (3) During the periods of sufficient green supplies, we design an intelligent workload scheduling scheme which enables a near-optimal off-line requests assignment to maximize the green utilization. The experimental results demonstrate that the new OptiMatch framework can achieve high green utilization (up to 94.9%) with a minor performance degradation (less than 9.8%)", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.30" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6796b517d20cbfab9129cd7a504ce021d36670b4", "sources": [ "DBLP" ], "title": "OptiMatch: Enabling an Optimal Match between Green Power and Various Workloads for Renewable-Energy Powered Storage Systems", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "67f9782a12db9cc99c560e4a41891dfb880f0751": { "authors": [ { "ids": [ "2663780" ], "name": "Zhenning Wang" }, { "ids": [ "1724199" ], "name": "Jun Yang" }, { "ids": [ "1687807" ], "name": "Rami G. Melhem" }, { "ids": [ "1776567" ], "name": "Bruce R. Childers" }, { "ids": [ "1686367" ], "name": "Youtao Zhang" }, { "ids": [ "1697293" ], "name": "Minyi Guo" } ], "doi": "10.1145/3079856.3080203", "doiUrl": "https://doi.org/10.1145/3079856.3080203", "entities": [ "Cycle basis", "Data center", "Graphics processing unit", "Parallel computing", "Quality of service", "Scalability", "Task parallelism", "Throughput" ], "id": "67f9782a12db9cc99c560e4a41891dfb880f0751", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "269-281", "journalVolume": "", "outCitations": [ "6e38285609f08477b455e1c5545256e6d29c932a", "064f38e5edef42cb5a37f2a350e4413e17132b11", "3c28d5967db86e8f5e4c37d03518967c285a32bf", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "58f9fe6efcd2ec6ae334675764ec995a131dc5c7", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "96b4b72d1098674750c4a406c93efe43e036568b", "6757659aeba247db2a35691ee3b4c029e1a2dcf4", "132f00de21cee656d00ad6779f1926070ad59544", "1eeb50d5f7937f65a910203ae61430ff8b969012", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "770ec20523ff6ea96d894b7ff7f618590924a3d5", "68c31b75a8aba5e44504e2f4e829b6ba938a643b", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "15677f10fa26b540ed07922dadb7457b124532ed", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "040bd1162e05c709ac15d937cec485fae3a6af43", "44d6631f8206946ac0a011f75aa2c1a31aa13c52", "2d6f002477015469075954c6748a1a85af352c94", "13a3d0dd8e58a6b094e1bf2f84d165884884ad7b", "a7a24f882aec173c01a9ed1eb52589f71d6c80f8", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "4df2be64491c71b18d948a4b1ccce605fe31e674", "1877a239b623249a57adaaabf2085bc97f19aed6", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4", "2701d623a4feed0fb1e5e6dc0d127aa34340b936", "7a2804fe421e853ef59abeffa41060ffe700602d", "37b07566d53b8533c57d707c4913aae505a93a66", "10443d5d4f0e5048df514e581a9f364954158d00", "00f355ce566bb51dc70925217c62e437cc7e14e2", "b04c9e851ae605592d693aa65f0d753b8af08feb", "bdc7a60ab9b6182bb53ab76c995ba6a3aa4a696e", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113", "c81e776d24fa2dcb497db553fd9625aa644c009e", "4f615eb48bb9bbb1831112b631b1336fe4c6823f", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "5fcbea17a6ce50e9b8d30396239f514796a51b8b", "109b416bdbf1739373638eb7e5b37f5d475fd40e" ], "paperAbstract": "GPUs have been widely adopted in data centers to provide acceleration services to many applications. Sharing a GPU is increasingly important for better processing throughput and energy efficiency. However, quality of service (QoS) among concurrent applications is minimally supported. Previous efforts are too coarse-grained and not scalable with increasing QoS requirements. We propose QoS mechanisms for a fine-grained form of GPU sharing. Our QoS support can provide control over the progress of kernels on a per cycle basis and the amount of thread-level parallelism of each kernel. Due to accurate resource management, our QoS support has significantly better scalability compared with previous best efforts. Evaluations show that, when the GPU is shared by three kernels, two of which have QoS goals, the proposed techniques achieve QoS goals 43.8% more often than previous techniques and have 20.5% higher throughput.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080203", "http://people.cs.pitt.edu/~zhangyt/research/isca2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/67f9782a12db9cc99c560e4a41891dfb880f0751", "sources": [ "DBLP" ], "title": "Quality of service support for fine-grained sharing on GPUs", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d": { "authors": [ { "ids": [ "1737553" ], "name": "Jian Xu" }, { "ids": [ "2883723" ], "name": "Lu Zhang" }, { "ids": [ "2138575" ], "name": "Amirsaman Memaripour" }, { "ids": [ "26892337" ], "name": "Akshatha Gangadharaiah" }, { "ids": [ "27030201" ], "name": "Amit Borase" }, { "ids": [ "26969671" ], "name": "Tamires Brito Da Silva" }, { "ids": [ "1760342" ], "name": "Steven Swanson" }, { "ids": [ "16878826" ], "name": "Andy Rudoff" } ], "doi": "10.1145/3132747.3132761", "doiUrl": "https://doi.org/10.1145/3132747.3132761", "entities": [ "Computer data storage", "Dynamic random-access memory", "Fault tolerance", "Non-volatile memory", "Software bug" ], "id": "67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d", "inCitations": [], "journalName": "", "journalPages": "478-496", "journalVolume": "", "outCitations": [ "05a1357946de5eca42a477b7b268db4944219a2e", "24724ad8962a9e04eb496fddaefe9708f6960601", "05dd6cb44124b8a210ac391f15ec25e68918ef22", "4108e4635351d6f2d0916ee19d0a0ef878649c3c", "94783d113951822195d4ba44599a8fcbdef9d4bf", "def29d202e537d026b8d3ed91655b540ef86cceb", "20a108587321823ca9cdd93ac84fc316a0400630", "a2b3b24825e44103cca5ba0a4425c5829751c759", "894879db716a843559bb5a6c568ac450b8586df5", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "556f01b6764f866d7bd4a2d955115ca72bd3413f", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "b1ce55ed6cd7fb989787d016e2783e49a66ac431", "4d3c779b5a224133bd5c69e05103fedbd904590a", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "3eb7f80bc2bef135b236ab741d1582e4d2e7a050", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "6902867509928c0e5c19aff3e62e1def3a19d581", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "0c9fbe0ffae8c874fa826efa6ae3650a151afbff", "549dd5a7c187fbf2a727f84f174e5ed79ade02b1", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "157b439116e0dfb349f175d51c3793489355e08c", "400ae82ab2fc2c814033c65854229ecefbddbf67", "8a7536f311d22bd588c5bc2306d54d13effaee82", "088e3e939ad234b6fdd0e321290fb26937dc2553", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "0c96b3ac2e720448054f1bcebdfd52ee341eac57", "fae8a785260ac5c34be82fca92a4abef4c30d655", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "4bbd9d77460a14a628119d05332360c5d78df8d3", "5b072f697b2285ef21dc44ff647062d4c4b6e8f8", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "38a9120f780602521af9744e31d80ef5cd9593a7", "d04957ae69caf43707b13fa833e50119724688f1", "108c840d5d1847948a2de0250490a327ae069ee6", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "304a4823e540294704cc624f4e01c50cd1d291ec", "229acac1bd70c57e6a17f2c24f153c06d54de252", "0204f40221260d00c5ee63646560a40dcd7d97d1" ], "paperAbstract": "Emerging fast, persistent memories will enable systems that combine conventional DRAM with large amounts of non-volatile main memory (NVMM) and provide huge increases in storage performance. Fully realizing this potential requires fundamental changes in how system software manages, protects, and provides access to data that resides in NVMM. We address these needs by describing an NVMM-optimized file system called NOVA-Fortis that is both fast and resilient in the face of corruption due to media errors and software bugs. We identify and propose solutions for the unique challenges in adding fault tolerance to an NVMM file system, adapt state-of-the-art reliability techniques to an NVMM file system, and quantify the performance and storage overheads of these techniques. We find that NOVA-Fortis' reliability features consume 14.8% of the storage for redundancy and reduce application-level performance by between 2% and 38% compared to the same file system with the features removed. NOVA-Fortis outperforms DAX-aware file systems without reliability features by 1.5x on average. It outperforms reliable, block-based file systems running on NVMM by 3x on average.", "pdfUrls": [ "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final51.pdf", "http://cseweb.ucsd.edu/~jix024/papers/final_camera.pdf", "http://doi.acm.org/10.1145/3132747.3132761", "https://www.sigops.org/sosp/sosp17/slides/fortis-sosp17-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/67ffec9c10d9594eb9af4afe25b1f0b0bce5f85d", "sources": [ "DBLP" ], "title": "NOVA-Fortis: A Fault-Tolerant Non-Volatile Main Memory File System", "venue": "SOSP", "year": 2017 }, "6819c75b28dc57c08d1c3bff387416d86987e6b3": { "authors": [ { "ids": [ "1684836" ], "name": "Yi Chen" }, { "ids": [ "1761499" ], "name": "Wei You" }, { "ids": [ "2979147" ], "name": "Yeonjoon Lee" }, { "ids": [ "8336490" ], "name": "Kai Chen" }, { "ids": [ "34989133" ], "name": "XiaoFeng Wang" }, { "ids": [ "1726367" ], "name": "Wei Zou" } ], "doi": "10.1145/3133956.3134009", "doiUrl": "https://doi.org/10.1145/3133956.3134009", "entities": [ "Android", "Antivirus software", "Credential", "Experiment", "Hard coding", "Mobile app", "Program slicing", "Server-side", "Static program analysis", "Traffic analysis", "User interface", "Value (ethics)" ], "id": "6819c75b28dc57c08d1c3bff387416d86987e6b3", "inCitations": [], "journalName": "", "journalPages": "815-828", "journalVolume": "", "outCitations": [ "6b95a7f29931281441d35a98126c28707765246a", "4daf1e0aeca142a23f816bd73daf2f86ab2c5c52", "14490c37be179400c86cf89aac7c9272dddf60e7", "11a732848cbbdad81e660c1fc7c4a88d2d9c1d6b", "c826ea6af6cc2a60c85ac5ab1c06df8b3883f137", "aa531e41c4c646285b522cf6f33f82a9d68d5062", "cf6b5797d922678f0f03a8bbad96b0d7482d8c02", "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "0bae04f86b5302345d18b99801829efc98c90874", "0de787b41d82c59f109de71d7c07e579ee2bc1ff", "2060da9362efd14f9352dde99e7ef33c7c95aef4", "977913334ff5eeee9dbea0d2b80463a18a610581", "c2a5c7c0f505cffe6ca4add871ed476fe1feeb78", "dcddbe6e0bb64d4792610d08bae06f6c11aec0c7", "0cb4ed5d73b4885f05facfa6aee45bdcdec1847e", "4fd04a083c0442879aacbda6b46ad6944a56391f", "2f7e3fb8e49d1cf731115eac2968c3f6af4f47b5", "d017d86db35dab60c4796a5fdce4f7b3112966eb", "cd9a3381186d98b4ec3a2251d2678bf71e9c3e73", "471d674e3506160c53fbfebe9b07e026a19da7ff", "0e28a308465b5c29875912fe72497491b947c774", "12db1174e733c5619a388b4d5b7b7c6098d764d0", "b11cd7a0ccf98b71c1d9e46fa89a4708c9efdcc6", "94699e3a95405a380a942e51d0e063241a6372bd", "6f36ec040624a1083222942dfde657e576afe701", "7d0577772fe06b773d359d1b4060fce92fd4948f" ], "paperAbstract": "Monitoring network behaviors of mobile applications, controlling their resource access and detecting potentially harmful apps are becoming increasingly important for the security protection within today's organizational, ISP and carriers. For this purpose, apps need to be identified from their communication, based upon their individual traffic signatures (called imprints in our research). Creating imprints for a large number of apps is nontrivial, due to the challenges in comprehensively analyzing their network activities at a large scale, for millions of apps on today's rapidly-growing app marketplaces. Prior research relies on automatic exploration of an app's user interfaces (UIs) to trigger its network activities, which is less likely to scale given the cost of the operation (at least 5 minutes per app) and its effectiveness (limited coverage of an app's behaviors).\n In this paper, we present Tiger (Traffic Imprint Generator), a novel technique that makes comprehensive app imprint generation possible in a massive scale. At the center of Tiger is a unique instantiated slicing technique, which aggressively prunes the program slice extracted from the app's network-related code by evaluating each variable's impact on possible network invariants, and removing those unlikely to contribute through assigning them concrete values. In this way, Tiger avoids exploring a large number of program paths unrelated to the app's identifiable traffic, thereby reducing the cost of the code analysis by more than one order of magnitude, in comparison with the conventional slicing and execution approach. Our experiments show that Tiger is capable of recovering an app's full network activities within 18 seconds, achieving over 98% coverage of its identifiable packets and 0.742% false detection rate on app identification. Further running the technique on over 200,000 real-world Android apps (including 78.23% potentially harmful apps) leads to the discovery of surprising new types of traffic invariants, including fake device information, hardcoded time values, session IDs and credentials, as well as complicated trigger conditions for an app's network activities, such as human involvement, Intent trigger and server-side instructions. Our findings demonstrate that many network activities cannot easily be invoked through automatic UI exploration and code-analysis based approaches present a promising alternative.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134009", "https://www.informatics.indiana.edu/xw7/papers/p815-chen.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6819c75b28dc57c08d1c3bff387416d86987e6b3", "sources": [ "DBLP" ], "title": "Mass Discovery of Android Traffic Imprints through Instantiated Partial Execution", "venue": "CCS", "year": 2017 }, "684615df82dc31784362bfe4e4226a624378164b": { "authors": [ { "ids": [ "1781266" ], "name": "Xinyu Chen" }, { "ids": [ "34773586" ], "name": "Jeremy Benson" }, { "ids": [ "1762267" ], "name": "Trilce Estrada" } ], "doi": "10.1109/CLUSTER.2017.96", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.96", "entities": [ "Algorithm", "Centralisation", "Cluster analysis", "Computation", "Data point", "Machine learning", "MapReduce", "Privacy", "Product binning" ], "id": "684615df82dc31784362bfe4e4226a624378164b", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "572-581", "journalVolume": "", "outCitations": [ "7117704b70919d7458a4aa4c218ae64482a5bc4e", "977a5fd404f742fec71367bb8e2a9fc2e616437f", "34510661d328233aacd53090538bb17fbf759057", "009342aa77a56c46a475fa85e66506219f271526", "7677ab0a2ca00016665e9914ad8b797115d99ffa", "8ecd462bc48a07bb78f8e21a621602c5db0c6c03", "b02d791913d74ee2b3a4449092919d442c786518", "25cc2f1ff16830a6889b93004510db58452b8b7d", "12d49ecc6aa2bf20a850100cafe061a237a4874e", "4aad1756e88dba86399a75891895e00b160f5460", "4c7e9b8857828b3d143576e02406a5a1664c3f74", "48b344de96ce911a78311309c648af006394db4c", "afb6af175c2dc5122fb09b55f48bc13a5fe7d782", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "edadd09eb8a6aab754ec81917f5ea2304baf2c6d", "10e1e88f9c137d8e350bfc6c9f60242a8f3d22b4", "ae9e3a647a53362b820f42d4a7d57dba992f15a0", "1941002f4b3a4c0efd8b23fe0a551d8c8eec9585", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "23c5bb6caadcf9bebb770e71158272b2dce8d4cd", "4fe2bf624e18d71d87ae36824606c42c64446562", "12ff0fe079d93c2f048c960ec657e6613d06c362", "478815622d22d85b0ade98c59b6ac78c3fb1ac21", "ade86d0570c2fddb257ef5400297c4bcc27049f3", "a620d007603111ae263c5769c9dc9ac37efd2ddb", "25fd3466554913010e3221e825b715027742916e", "59f5f73372dfeefbc955851489e0b63c4c13abf7", "52047d6a5096c2951f6bdb11f6d9f57f2718841b", "dc8d0f31c95149af0426ea82f5e082de4b97c2b2", "2a59ad03300072f9e6bfe726f5f459b3df2d5f8e", "1c799eca7983c62f7815ac5f41787b3e552567b6", "d1c523fcf6ada9031ae5a7415b164a65c0641d9f", "498a25a560af8637b22d67511c1c2aacc4d64ede", "4954fa180728932959997a4768411ff9136aac81" ], "paperAbstract": "Traditional machine learning algorithms often require computations on centralized data, but modern datasets are collected and stored in a distributed way. In addition to the cost of moving data to centralized locations, increasing concerns about privacy and security warrant distributed approaches. We propose keybin, a distributed key-based binning clustering algorithm for high-dimensional spaces. keybin locally generates a spatial key for each data point across all dimensions without needing knowledge of other data. Then, it performs a conceptual Map- Reduce procedure in the index space to form a global clustering assignment. We present an implementation and a case study on the capabilities and limitations of this approach, showing that this algorithm can learn a global clustering structure with limited communication and can scale with the dimensionality and size of data sets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.96" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/684615df82dc31784362bfe4e4226a624378164b", "sources": [ "DBLP" ], "title": "keybin: Key-Based Binning for Distributed Clustering", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "684e18703bb7edc1584a3560556e1357c7d2968c": { "authors": [ { "ids": [ "2625573" ], "name": "Xiaorui Pan" }, { "ids": [ "40066762" ], "name": "Xueqiang Wang" }, { "ids": [ "2781659" ], "name": "Yue Duan" }, { "ids": [ "34989133" ], "name": "XiaoFeng Wang" }, { "ids": [ "1975143" ], "name": "Heng Yin" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Experiment", "Malware", "Mobile app", "Mobile malware", "Personally identifiable information", "Program analysis", "Stock and flow" ], "id": "684e18703bb7edc1584a3560556e1357c7d2968c", "inCitations": [ "6b6299f3e6f0d0f24ba6536d3d6fabc36738765c", "65c6bda16861410915c4b50d2540c9d058a1bb57", "0339584e6c0b073e2f62383a7a76d448766143f1", "f2b5963afef31e1c1b12c84bb3a9d1117916e673", "7647a0ef3bc423067753bea995117d4061f35771", "104220ebed97a333817878c04f4982f3d051a6b7" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "458d73c2f3a5b448f1f66bac8d3235597a8c06c3", "02eabff9cab5cb8cea696b35bbd4888e0a52057c", "c792b346d20eb351eefb127756b422d422ed7bde", "0b7f62a2ac217e035e0cd9cb73d2de4fb6135af5", "a79fe2a4a7ed871631114de2c42a1a969852eac3", "20a88443f069f8b2110fb6531ed4ad0480d0fce9", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "815e02024d754a0c0c656d96cbee31882e1f72cd", "30bc245a8295dac571c58aec0b744e4bf217c287", "be6b283871ec6df396ff00bc2d844a9e4c056000", "7998e9e8aeb036f1cbe5235c9c33ff127ad2132d", "09cca9d37140bae6c5a78b7c9ec112bd29ab0b3d", "4a4c0cfc26020d519679a98fe683fce6aab1eefa", "463dec0105456132f921f3075081e1fb824fb784", "0d7dea74b7344dcd4de965e9535e5cfa8630c94c", "6b2ef620ca9363a4b996693c649fddf3c97a91c3", "33f8f2e76d0190905c2bd3a2e611d28504fa4353", "93e390c7dd2f979fbd63e4c46977b791d92c6f41", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "421c305a0d2773d1132d9539e42d1f1337f1600a", "0a7267f1088ee19d3a6d6105d633c0c93ea4cb6b", "2fe33f4b7c75d9e29bf80e7bdd719205cfafc3c9", "4f09377086284ffed61435d4c121e5d93ededbef", "60ff68e69e121e514379d90184c52891931eeabc", "520860dcc2c23dce57ef996d76c6e0b89faf054c", "07fe0ddb6cef7ed8946d416c093452c1d0db0c34", "66c553af068d06eb583f94d640733d43cbd0799f", "09f4aa3a4483c3f790a58d5c987f03d49715788b" ], "paperAbstract": "Hidden sensitive operations (HSO) such as stealing privacy user data upon receiving an SMS message are increasingly utilized by mobile malware and other potentially-harmful apps (PHAs) to evade detection. Identification of such behaviors is hard, due to the challenge in triggering them during an app\u2019s runtime. Current static approaches rely on the trigger conditions or hidden behaviors known beforehand and therefore cannot capture previously unknown HSO activities. Also these techniques tend to be computationally intensive and therefore less suitable for analyzing a large number of apps. As a result, our understanding of real-world HSO today is still limited, not to mention effective means to mitigate this threat. In this paper, we present HSOMINER, an innovative machinelearning based program analysis technique that enables a largescale discovery of unknown HSO activities. Our approach leverages a set of program features that characterize an HSO branch and can be relatively easy to extract from an app. These features summarize a set of unique observations about an HSO condition, its paths and the relations between them, and are designed to be general for finding hidden suspicious behaviors. Particularly, we found that a trigger condition is less likely to relate to the path of its branch through data flows or shared resources, compared with a legitimate branch. Also, the behaviors exhibited by the two paths of an HSO branch tend to be conspicuously different (innocent on one side and sinister on the other). Most importantly, even though these individual features are not sufficiently accurate for capturing HSO on their own, collectively they are shown to be highly effective in identifying such behaviors. This differentiating power is harnessed by HSOMINER to classify Android apps, which achieves a high precision (>98%) and coverage (>94%), and is also efficient as discovered in our experiments. The new tool was further used in a measurement study involving 338,354 realworld apps, the largest one ever conducted on suspicious hidden operations. Our research brought to light the pervasiveness of HSO activities, which are present in 18.7% of the apps we analyzed, surprising trigger conditions (e.g., click on a certain region of a view) and behaviors (e.g., hiding operations in a dynamically generated receiver), which help better understand 1A branch, unless otherwise specified, refers to a branching structure, which contains a condition and multiple paths. the problem and contribute to more effective defense against this new threat to the mobile platform.", "pdfUrls": [ "http://www.cs.ucr.edu/~heng/pubs/ndss2017.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/dark-hazard-learning-based-large-scale-discovery-hidden-sensitive-operations-android-apps/", "https://www.informatics.indiana.edu/xw7/papers/ndss2017-05a_1-pan_slides.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/684e/18703bb7edc1584a3560556e1357c7d2968c.pdf", "s2Url": "https://semanticscholar.org/paper/684e18703bb7edc1584a3560556e1357c7d2968c", "sources": [ "DBLP" ], "title": "Dark Hazard: Learning-based, Large-Scale Discovery of Hidden Sensitive Operations in Android Apps", "venue": "NDSS", "year": 2017 }, "6852566c1f46713f757164f3a58bb715f4a1a2c6": { "authors": [ { "ids": [ "38760108" ], "name": "Liang Deng" }, { "ids": [ "1687577" ], "name": "Peng Liu" }, { "ids": [ "1688881" ], "name": "Jun Xu" }, { "ids": [ "1711641" ], "name": "Ping Chen" }, { "ids": [ "1694209" ], "name": "Qingkai Zeng" } ], "doi": "10.1145/3050748.3050750", "doiUrl": "https://doi.org/10.1145/3050748.3050750", "entities": [ "Address space", "Cloud computing", "Event-driven programming", "Privilege level", "Protection mechanism", "Virtual Machine Manager", "Virtual machine" ], "id": "6852566c1f46713f757164f3a58bb715f4a1a2c6", "inCitations": [ "df545215b7e7f830adc96f4c72566d22a68a9f06", "377712ef264d63c97b341fb782037d063018305e" ], "journalName": "", "journalPages": "83-96", "journalVolume": "", "outCitations": [ "85d555f7ce19740b4fc656ff797623c6e1513018", "60c2a873958fba5ff0ef2cce20663eec627aa782", "3b06edbeb2adf0de12a6ddbec073fd96e82617c6", "0957332f8beb1ec4071fcb6fc44cb0b5396463d5", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "b1b3c8f907db6748c373bf1d15ec0c15bb2307dc", "90df476a4070cd797ef682f30a408086899ad16b", "686150e2179840ed40a0166cba6c5d507f3aa49c", "9b27ef50a3039c0be52ea58af042b4b7b99d710f", "61504aa2d0cde80429f1c3a7809d0e084e184172", "86013daaae16572bceb755e65ee5fa2fdfb63848", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "67f961f98d34fea3ab15f473429a5156b62b5c65", "567fbe38b1e63d3e718527b3ea9918440dd703ad", "ce08e1ae1b83a4e53b997982d2178e2fe6372805", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "38aef25ab57851352c97bb7ba610312005e08e3d", "505ee623397666c0ce158e103ffac0c62dbcf2fa", "24748ef2b88e6df370b5dccfb75cba47e132f92d", "1e1a8c185cfc94cb0f26660b7e774de696761394", "74572d07252e2f0b60b16abb931c46e819e2b448", "0829638686dfef02a9ded604952173f06b1ab1aa" ], "paperAbstract": "This paper presents a novel framework that enables practical event-driven monitoring for untrusted virtual machine monitors (VMMs) in cloud computing. Unlike previous approaches for VMM monitoring, our framework neither relies on a higher privilege level nor requires any special hardware support. Instead, we place the trusted monitor at the same privilege level and in the same address space with the untrusted VMM to achieve superior efficiency, while proposing a unique mutual-protection mechanism to ensure the integrity of the monitor. Our security analysis demonstrates that our framework can provide high-assurance for event-driven VMM monitoring, even if the highest-privilege VMM is fully compromised. The experimental results show that our framework only incurs trivial performance overhead for enforcing event-driven monitoring policies, exhibiting tremendous performance improvement on previous approaches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050750" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6852566c1f46713f757164f3a58bb715f4a1a2c6", "sources": [ "DBLP" ], "title": "Dancing with Wolves: Towards Practical Event-driven VMM Monitoring", "venue": "VEE", "year": 2017 }, "685a52364afb300d38f46c4cbc96e125b3a9cff2": { "authors": [ { "ids": [ "1910886" ], "name": "Diego Lugones" }, { "ids": [ "3334155" ], "name": "Jordi Arjona Aroca" }, { "ids": [ "3098143" ], "name": "Yue Jin" }, { "ids": [ "5373377" ], "name": "Alessandra Sala" }, { "ids": [ "2809994" ], "name": "Volker Hilt" } ], "doi": "10.1145/3127479.3129250", "doiUrl": "https://doi.org/10.1145/3127479.3129250", "entities": [ "Best, worst and average case", "Cloud computing", "Domain-specific language", "Enterprise software", "High availability", "Machine learning", "Provisioning", "Virtual machine" ], "id": "685a52364afb300d38f46c4cbc96e125b3a9cff2", "inCitations": [ "fb973cd5467c9ce056cb37083efc87c74bdae3d0" ], "journalName": "", "journalPages": "466-478", "journalVolume": "", "outCitations": [ "9e94390e67fa2c44188634f6a4e8195b1eb309c8", "0f26454617a133b7a469b6c9e0e8639e1d5dbac6", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "98553ae03bc830d157af8ca866fe9e4225bb67f5", "452e1da8575e7c9390c826d22b7bb23bf025b188", "8461b79f9747a0caee85522c49bd4655c64e10fb", "4c9b5b3ec35b92357936efe9401110e37e2e046c", "082ceb12f88cf6512a097b87853f0d988c3e2c9b", "8260d12f5212a9a654486d86e5467d8a9e9a5bb5", "942ecc61675d81724823b893df0f1c9418b52d90", "2487db7fd646477b45b53344b2fbd7b41092cafb", "749559b87002b720eeed521fe383aa9a13e07c8e", "78f853271fe69da617d5a14a1e54cbae6a982a50", "0cc547cea26938e8c4165059ed0975cabec2c660", "0a54af4fbe8be9c5f760d3520f222cda64de679f", "9e98d529d158e2230d722f497fbc36373eaa8583", "0f262e51371f6a51fa77d085d8823873eb505f3e", "1884fc68add9f4a30ce491261266c21b8ce6a563", "2b992d8b0fe1d201fc6b31fd0aff4299db092888", "bdec841129045880265dbda5e047e4d13e9b3aa7", "d6695b9c256fe381a39013c3284922ce1a983c1c", "7251ed0fbf99b44712d438e84dd94a50e4407d3c", "3ab4fcd5fba7d2d7f63377ff36b1c8d190e95360", "1833dee660500dd104ca84d99600b70c2479ba3c", "808585a76d350dbe567c35b74086948cdd95cad4", "08f13e484e7e51831ec13076d14570ced91a50fb", "19d94ccda7fbdd65431854882aa0cc9c0ca5fae7", "ac9a160fa43cfabb375d3264cea6951a934a5a97", "e0b0b8298c40102d8c5d4704d7ffd7f2300b9602", "45ef8ed95bfeeba351b79118b362e183f61cc42d", "2e72178091b2ca445f46200dcba71a53417b69eb", "6b6a5fe05f19bd4322a43b0f688b854db40a2fdd", "21fea4574067c8386820d572cb2e8f7005cd34ff", "998d7172611819ae1dddbf1256bee64d2ba5eba4", "31fbfb42bec8543f2fd8fbf9ea61a878d7a5e54e", "30922a3953ff740486bfd01461cc1f0c5185c39c", "0e1c372e31184fcf1996dcb46192e65d994c04fd", "8441f7234645873c7c9d6f0b87e6df723f2d59dd", "f0181f2548b8557ea35da2e3711d6bdf5eafa138", "5857f9e492f29c75e43bd212edf6b04243c338d1", "1cf68018fdaa2113ff2ea5f5e549ddb0ec2389c3" ], "paperAbstract": "The virtualization of services with high-availability requirements calls to revisit traditional operation and provisioning processes. Providers are realizing services in software on virtual machines instead of using dedicated appliances to dynamically adjust service capacity to changing demands. Cloud orchestration systems control the number of service instances deployed to make sure each service has enough capacity to meet incoming workloads. However, determining the suitable build-out of a service is challenging as it takes time to install new instances and excessive re-configurations (i.e. scale in/out) can lead to decreased stability. In this paper we present AidOps, a cloud orchestration system that leverages machine learning and domain-specific knowledge to predict the traffic demand, optimizing service performance and cost. AidOps does not require a conservative provisioning of services to cover for the worst-case demand and significantly reduces operational costs while still fulfilling service quality expectations. We have evaluated our framework with real traffic using an enterprise application and a communication service in a private cloud. Our results show up to 4X improvement in service performance indicators compared to existing orchestration systems. AidOps achieves up to 99.985% availability levels while reducing operational costs at least by 20%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129250" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/685a52364afb300d38f46c4cbc96e125b3a9cff2", "sources": [ "DBLP" ], "title": "AidOps: a data-driven provisioning of high-availability services in cloud", "venue": "SoCC", "year": 2017 }, "68716519c7d3d6b2e1fabc0cf40f0a3fcfdecab0": { "authors": [ { "ids": [ "10787402" ], "name": "Romila Pradhan" }, { "ids": [ "2147072" ], "name": "Siarhei Bykau" }, { "ids": [ "1771369" ], "name": "Sunil Prabhakar" } ], "doi": "10.1145/3035918.3035941", "doiUrl": "https://doi.org/10.1145/3035918.3035941", "entities": [ "Algorithm", "Approximation algorithm", "Data item", "Deployment environment", "Feedback", "Social media" ], "id": "68716519c7d3d6b2e1fabc0cf40f0a3fcfdecab0", "inCitations": [ "80c7ad5e1d47a7acca3c886c426bb5f1efeaa7a4" ], "journalName": "", "journalPages": "603-618", "journalVolume": "", "outCitations": [ "cd48760a142830b796b8a85a158cd469f3e5feb0", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "1b763ec4c21c799eef00ca26f6afc6c01b45a82c", "579e1e9217cfed6d563cedf8f8fdcd1604fc0917", "208b70d2cb06b17b8d48a907dc2ad2f4272a8588", "4fd6f95aca0da395078d63c4f3be5a51f3ffef55", "4f84bcbddad1e931b0328be6e0a96ca731c538f8", "762aa12db17ecc3ed8320e1d88bef063214b595b", "09c7a7213053784e2b1487de5a08c22f6a0daef0", "25f9f4f10d8d1818746e57283d7b1a0fb4c44ff0", "561b0881fb83c7182bca4aec70bd287ea0f5be28", "d641ce8fe01ba5ae0ade43feaa1e1e2a7f4839b8", "0a2ca1578c006796da813e9d16ccdd1d3b737565", "6b81212ebb2d83c8c35b00016a7e9e36d99f62ac", "20d90871bc0dc7956bf2557d91d8d96deb0a4520", "422d9b1a05bc33fcca4b9aa9381f46804c6132fd", "af1e1bee41d004a6c1fa608a9fe2a884f48c6e5f", "12653dc3882f76c6933d7543bd5e033dd18b56e0", "4f819589fd2931333326ad7deec58f628f7d2644", "baf499e0b0ebe4985cdf1a4c08a4a4e465ce3e59", "17714c1a50e306227cd5cd56af0bc203c7e43db7", "0ac93570bab7aec97bf01eb5387372ee5cb7f94a", "a12cd3d9ae5530a90302a6e4af477e6e24fa0f95", "dbe98d292cc8d69f0673cc5c524c9bc282067bf8", "1ec2d02bd12f3a357449cf1bbc67b6adf7cd6296", "1b189d721adbf1d2bab93b7ed6ce826e188b0b99", "006336b4082bbba1ab1e5e2e6c633a44971a7dc3", "009b37cf7dbc9da978d4fa604257e2e6020fd478", "e1d2ea5d876a526b915a450e2cb23581600e3750", "371fc532e70dc5a72c49eb3503ae1f707f38ea7d", "2cca87876d142c75626304700fb599e7944cf338", "44cb3d4193fadcea42a3a657ddd67b18d6ddc87e", "0e5ebc2eb31b6c78ee0dee10246efeeaf587f7f4", "e541c475457a731d7d434c4302867fc45af5876f", "af2a807c5e32a35765850c6b6891f471d7bc7aea", "22f516acc61967369ec29d4121c7b517d5f60e08", "796510d962c5abf8c6436b64946506d6234ae506", "453c5a0ff2b97746de65153a2ea39799458585e7", "4359e7b1bba1e7a37a591111cd5c719fdb4857c8", "65821014abe934029310cb10d4e329645acd4817", "4f739534a366799e170599d3ff3d65597f0118db" ], "paperAbstract": "In domains such as the Web, sensor networks and social media, sources often provide conflicting information for the same data item. Several data fusion techniques have been proposed recently to resolve conflicts and identify correct data. The performance of these fusion systems, while quite accurate, is far from perfect. In this paper, we propose to leverage user feedback for validating data conflicts and rapidly improving the performance of fusion. To present the most beneficial data items for the user to validate, we take advantage of the level of consensus among sources, and the output of fusion to generate an effective ordering of items. We first evaluate data items individually, and then define a novel decision-theoretic framework based on the concept of value of perfect information (VPI) to order items by their ability to boost the performance of fusion. We further derive approximate formulae to scale up the decision-theoretic framework to large-scale data. We empirically evaluate our algorithms on three real-world datasets with different characteristics, and show that the accuracy of fusion can be significantly improved even while requesting feedback on a few data items. We also show that the performance of the proposed methods depends on the characteristics of data, and assess the trade-off between the amount of feedback acquired, and the effectiveness and efficiency of the methods.", "pdfUrls": [ "https://www.cs.purdue.edu/homes/rpradhan/p603-pradhan.pdf", "https://www.cs.purdue.edu/homes/rpradhan/romila_userFeedback_poster.pdf", "http://doi.acm.org/10.1145/3035918.3035941" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68716519c7d3d6b2e1fabc0cf40f0a3fcfdecab0", "sources": [ "DBLP" ], "title": "Staging User Feedback toward Rapid Conflict Resolution in Data Fusion", "venue": "SIGMOD Conference", "year": 2017 }, "6895a7a7ef3a9c8e77b5f6ee8c64b49d2dee61cc": { "authors": [ { "ids": [ "32426008" ], "name": "Vlad Nitu" }, { "ids": [ "2127809" ], "name": "Pierre Olivier" }, { "ids": [ "1685006" ], "name": "Alain Tchana" }, { "ids": [ "10399629" ], "name": "Daniel Chiba" }, { "ids": [ "1759838" ], "name": "Antonio Barbalace" }, { "ids": [ "1679417" ], "name": "Daniel Hagimont" }, { "ids": [ "1729107" ], "name": "Binoy Ravindran" } ], "doi": "10.1145/3050748.3050758", "doiUrl": "https://doi.org/10.1145/3050748.3050758", "entities": [ "Autoscaling", "Data center", "Data structure", "Elasticity (cloud computing)", "Experiment", "Hypervisor", "Lock (computer science)", "Memory scrubbing", "Non-uniform memory access", "Scalability", "Semiconductor consolidation", "Signal trace", "Speedup", "Swift (programming language)", "Unikernel", "Virtual machine" ], "id": "6895a7a7ef3a9c8e77b5f6ee8c64b49d2dee61cc", "inCitations": [ "df545215b7e7f830adc96f4c72566d22a68a9f06", "70c4f0403d80427e10c7e7167f814ec0bb12d18f" ], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "0e851f49432767888b6ef4421beb268b9f2fc057", "0327e79a9863898a2dd24a7d1850ec3bedf044d0", "150a59b0904b5725c69302dbfba148fbf67be2b0", "11928ecc96f52e153f6a3bf5143260f15f7c4dfd", "504b4cc991475ebde762d3bb24316b9fe6b92fa6", "067c7857753e21e7317b556c86e30be60aa7cac0", "2763cd85a279d9aa28942eb51febaa76c2c852cb", "3574657705475722b6c398c266805f758268778b", "08832863bc3f041222f381c8ae143f8a66449059", "4650259fb4aadb376fd5994f9ab9dd07a4f83511", "36560510bab4e9e6d8660b91189f9e11e486f1c4", "3bc68ebdfc30f1e5df9a80b48bdfde1e20e0ccbe", "ce8f8e86db523da990507f177c6c6df445cd8d46", "41fca6c199464c983cb6384ae65c83eb7522fb46", "0187493c5cbd9b8bcf2019b8521082aea6db83f1", "0d41dcafa87195ae3c05282b5250bc639d31de87", "54ff8026f68506452bb19585773935b4f1e71aa4", "69bcfe3e8c989166caa93c78637a19793ac43eee", "280863f80b6401bc6d65839ecb3dc7a0febdfa09", "54be24210d49deeed59a0bd53c6f1704d0db9e33", "3cc9d8e25164012d0c1a61d28293b36a4b9d8759", "4e4348913b3198ae51b784db893938ae3afecaf5" ], "paperAbstract": "The ability to quickly set up and tear down a virtual machine is critical for today's cloud elasticity, as well as in numerous other scenarios: guest migration/consolidation, event-driven invocation of micro-services, dynamically adaptive unikernel-based applications, micro-reboots for security or stability, etc.\n In this paper, we focus on the process of setting up/freeing the hypervisor and host control layer data structures at boot/destruction time, showing that it does not scale in current virtualization solutions. In addition to the direct overhead of long VM set-up/destruction times, we demonstrate by experimentation the indirect costs on real world auto scaling systems. Focusing on the popular Xen hypervisor, we identify three critical issues hindering the scalability of the boot and destruction processes: serialized boot, unscalable interactions with the Xenstore at guest creation time, and remote NUMA memory scrubbing at destruction time. For each of these issues we present the design and implementation of a solution in the Xen infrastructure: parallel boot with fine-grained locking, caching of Xenstore data, and local NUMA scrubbing. We evaluate these solutions using micro-benchmarks, macro-benchmarks, and real world datacenter traces. Results show that our work improves the current Xen implementation by a significant factor, for example macro-benchmarks indicate a speedup of more than 4X in high-load scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050758" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6895a7a7ef3a9c8e77b5f6ee8c64b49d2dee61cc", "sources": [ "DBLP" ], "title": "Swift Birth and Quick Death: Enabling Fast Parallel Guest Boot and Destruction in the Xen Hypervisor", "venue": "VEE", "year": 2017 }, "6897774fa37f13f2d1e4a88c82c626b98fe67951": { "authors": [ { "ids": [ "2208926" ], "name": "Le Shi" }, { "ids": [ "13840873" ], "name": "Yuming Wu" }, { "ids": [ "1708437" ], "name": "Yubin Xia" }, { "ids": [ "1795094" ], "name": "Nathan Dautenhahn" }, { "ids": [ "1716528" ], "name": "Haibo Chen" }, { "ids": [ "7274044" ], "name": "Binyu Zang" }, { "ids": [ "1861817" ], "name": "Jinming Li" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "Code injection", "Code integrity", "Control flow", "Control-flow integrity", "Hardening (computing)", "Hypervisor", "Kernel (operating system)", "Sandbox (computer security)" ], "id": "6897774fa37f13f2d1e4a88c82c626b98fe67951", "inCitations": [ "377712ef264d63c97b341fb782037d063018305e" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "85d555f7ce19740b4fc656ff797623c6e1513018", "1fb49ae43195232f0b3d1c9d534a5aa03bdd8f26", "067c7857753e21e7317b556c86e30be60aa7cac0", "057339544e31d9cb2ef807bddff2b705b8c674dd", "5bddb52a9def1c1330e8139b8496fbb8bb8c5937", "2a173976854f63517cc3eabb0e67aa4760faa6e5", "0829638686dfef02a9ded604952173f06b1ab1aa", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "567fbe38b1e63d3e718527b3ea9918440dd703ad", "16e85d76e57739da3082ca9dd4868b240c0b3c86", "505ee623397666c0ce158e103ffac0c62dbcf2fa", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "46bc4d7c5605e8468f4355335416e15f0d7e4dcd", "8204d8fef85ddd8c32e7b470c244a50910836263", "24748ef2b88e6df370b5dccfb75cba47e132f92d", "2f484f6f079faefef1a8acf26383ecdb019fd380", "44a2ee04d8b939978bd892249c459aec5672412e", "58156d27f80ee450ba43651a780ebd829b70c363", "a6dee47140750356e33dd29ec2a057ab37b5b455", "2fcdec58c1c0028e07c4823cf082fd6d3abc05dc", "02fdca5fdba792e4f2c70b8b637abe4824343800", "90df476a4070cd797ef682f30a408086899ad16b", "6c2a4fd3bae2ddae3f23558985de58dd7673378e", "c9e069c5064ccdd9dc840cca32096b8dd9445fb7", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "0e851f49432767888b6ef4421beb268b9f2fc057", "452c803f91ab670bf36403ed5412875b13ae9e94", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "01d50c4063e985a08450fd11e90d853648d9d29d" ], "paperAbstract": "Hypervisors have quickly become essential but are vulnerable to attack. Unfortunately, efficiently hardening hypervisors is challenging because they lack a privileged security monitor and decomposition strategies. In this work we systematically analyze the 191 Xen hypervisor vulnerabilities from Xen Security Advisories, revealing that the majority (144) are in the core hypervisor not Dom0. We then use the analysis to provide a novel deconstruction of Xen, called Nexen, into a security monitor, a shared service domain, and per-VM Xen slices that are isolated by a least-privileged sandboxing framework. We implement Nexen using the Nested Kernel architecture, efficiently nesting itself within the Xen address space, and extend the Nested Kernel design by adding services for arbitrarily many protection domains along with dynamic allocators, data isolation, and cross-domain control-flow integrity. The effect is that Nexen confines VM-based hypervisor compromises to single Xen VM instances, thwarts 74% (107/144) of known Xen vulnerabilities, and enforces Xen code integrity (defending against all code injection compromises) while observing negligible overhead (1.2% on average). Overall, we believe that Nexen is uniquely positioned to provide a fundamental need for hypervisor hardening at minimal performance and implementation costs.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/deconstructing-xen/", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/NDSS-2017-Program-FINAL.pdf", "http://nathandautenhahn.com/downloads/publications/shi-deconstructing-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6897/774fa37f13f2d1e4a88c82c626b98fe67951.pdf", "s2Url": "https://semanticscholar.org/paper/6897774fa37f13f2d1e4a88c82c626b98fe67951", "sources": [ "DBLP" ], "title": "Deconstructing Xen", "venue": "NDSS", "year": 2017 }, "689dd5f20fbe04fe4fb5faaf69792999bbd9d671": { "authors": [ { "ids": [ "39940567" ], "name": "Raphael Bost" }, { "ids": [ "1817082" ], "name": "Brice Minaud" }, { "ids": [ "1697011" ], "name": "Olga Ohrimenko" } ], "doi": "10.1145/3133956.3133980", "doiUrl": "https://doi.org/10.1145/3133956.3133980", "entities": [ "Adversary (cryptography)", "Backward compatibility", "Ciphertext", "Cryptanalysis", "Cryptographic primitive", "Cryptography", "Encryption", "Outsourcing", "Privacy", "Pseudorandomness", "Server (computing)", "Streaming SIMD Extensions", "Symmetric-key algorithm" ], "id": "689dd5f20fbe04fe4fb5faaf69792999bbd9d671", "inCitations": [ "53f18a9a84c41ff532302166f4456856f3711830", "3676fa3e426269bc882f8c52c493d1a683e0bfd5" ], "journalName": "", "journalPages": "1465-1482", "journalVolume": "", "outCitations": [ "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "25c3ee2e736c58eddc7182688e19fa7b65bef83a", "e840cf1f4bd8a32d4fa1ec965fff177d042327d9", "a7d092ce93157c61a6355cf754a0dd1d34a333d2", "bf25d9ca8eec88f515932fc7645c8ecc4e3e93b2", "a94205aed0148ae6d00986aef009e5e05d046f43", "45759d9823b022b5faacac48012ad9037625b6de", "1ab81ae077d6944fbff279a7a8a38df48f75eadf", "1f6cbdee0cd99b74ab2a8ffb381265286a11ea90", "0227e83202440c13c4c2b97b49ef7c64dfbd52c3", "14d206bebb06e961c3b99a22011f81a3e949be7e", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "0a7ddb346f432c50476359eb39510c838f739eee", "02beed2e1350a0d0b01bb9622081cb93a965a716", "47564fdfc63a1a36102b8b6c74f978bbc5190c5a", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "be2f737bd30976386b069f6edc61371dcda9fec8", "7169b9b8c40536abc79f0ab22dcf4c9a8f1c3249", "8cfc4d744501403c926f6ef4552dab06d039653e", "10130d16b8ceb9aea868c416df56e929a0631cdc", "3864cfb41db27452cefe3b1f64f05623690201ab", "ad0c881078b2cd3d69b5cc2ef63bcdb72070298e", "ab1419f5e7edf98a1d29658d897aa5b1a63b7191", "61ea7ef665186310a24af134441de0a18b6c351e", "9ea1bbb1d3302aa9504e71ca42e1c19c09e310e0", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "0aa20fb7c3a5aa0f2af3e2a1f857bf9073ec157f", "b60656fea43ed3389594c167f1837afed862bb62", "4646e3be0ab8ef61846c4ef954677376d0f880fb", "3ae6e3f385f075c2b7b6958122c1e30fb1b54b0e", "32cc3fd437950a098d6e93ae755fc6571554a955", "56d320acfad7f6e8060acb77191c179844fab3cb", "c306e51e9563684a87892ccad0e1b33f235e48e8", "0afae9e394d29aa4f678514e711a43f769fc4f35", "32dc88258734f6c9d8fd1d0151d0c763ae2df75a" ], "paperAbstract": "Using dynamic Searchable Symmetric Encryption, a user with limited storage resources can securely outsource a database to an untrusted server, in such a way that the database can still be searched and updated efficiently. For these schemes, it would be desirable that updates do not reveal any information a priori about the modifications they carry out, and that deleted results remain inaccessible to the server a posteriori. If the first property, called forward privacy, has been the main motivation of recent works, the second one, backward privacy, has been overlooked.\n In this paper, we study for the first time the notion of backward privacy for searchable encryption. After giving formal definitions for different flavors of backward privacy, we present several schemes achieving both forward and backward privacy, with various efficiency trade-offs.\n Our constructions crucially rely on primitives such as constrained pseudo-random functions and puncturable encryption schemes. Using these advanced cryptographic primitives allows for a fine-grained control of the power of the adversary, preventing her from evaluating functions on selected inputs, or decrypting specific ciphertexts. In turn, this high degree of control allows our SSE constructions to achieve the stronger forms of privacy outlined above. As an example, we present a framework to construct forward-private schemes from range-constrained pseudo-random functions.\n Finally, we provide experimental results for implementations of our schemes, and study their practical efficiency.", "pdfUrls": [ "https://pure.royalholloway.ac.uk/portal/files/28610034/805.pdf", "http://doi.acm.org/10.1145/3133956.3133980", "http://eprint.iacr.org/2017/805" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/689dd5f20fbe04fe4fb5faaf69792999bbd9d671", "sources": [ "DBLP" ], "title": "Forward and Backward Private Searchable Encryption from Constrained Cryptographic Primitives", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "68a75a11482804a8cc3bb6927a31adbd2c28ffc9": { "authors": [ { "ids": [ "3491943" ], "name": "Shiyou Huang" }, { "ids": [ "38745890" ], "name": "Bowen Cai" }, { "ids": [ "32065383" ], "name": "Jeff Huang" } ], "doi": "", "doiUrl": "", "entities": [ "C++", "Central processing unit", "Control flow", "Failure rate", "Heisenbug", "Parsec (parser)" ], "id": "68a75a11482804a8cc3bb6927a31adbd2c28ffc9", "inCitations": [ "aaca858e5d071b7215cd9954371d5911745145b3", "830f6be24ab13dcbc4154bd52469fbb85ff25f0e" ], "journalName": "", "journalPages": "403-415", "journalVolume": "", "outCitations": [ "062008493d48ac414b45e3d989266d0574c1b3e5", "3ce0c351e9cb8c12c6f57bad5b2c0c0de0be8f3b", "c1de36550bf324f964186105d6dd0769e86e3046", "114801eccb5eb0831fd1848f351a138253a42f15", "519404f3a71f5684c405ebbb218aa29fa2028379", "031e76f20897108925c6942e0ba00a76045a2e49", "09d95e0b06d1174e4ac83c7354bb30877320a362", "d361446ba1526fb6eeef0e3d99d7804d680a3b4b", "47b7f413e553f8534b584c51a7cc7903b98d3c48", "3e069cd8d223c0d02d44b9529b5df7d8ca1a7ca6", "11b8ef5da9c8df214859bb41b60001a0abd2b5b2", "113772329678792fc2a3a8cb9322c164547f88a0", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "cfb9bca2096f245642d3acc898a84575986ebc5b", "0694754aca6ee770e8b51fc964aa1109045fb28e", "fb34f663b34a8cb09a75fe98685c003d86f32e15", "12161e5c5daa594fdab296356b3424c3bf4c8e9e", "3ca7e9729dd00830ca25396d535295648ea19a81", "64cfec85c8149e802eb9460b0afd1f59df325169", "41e9a9ec93157fcaf8b44a2a67a595316c815966", "15559a9c70af038b56f2576f7233bba2b55fbc5a", "22a713d92a7f2a79f22c71e66b2511937b2a1a8f", "12a233efbdd874afdeb8a1e6fe71c4ccff758175", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "1e4874f3443d191a4f7f3ba63a04a264bd00e364", "0e578433d4e8bb2a571c87a2d22816074902f009", "2035c8f33909ac206c4d1a3bdee611577fb2c5d1", "059476c845d38253efad824010ed15df14941d85", "849821f391234389c232d22940d31d724fd4e4a5" ], "paperAbstract": "We present a new technique, H3, for reproducing Heisenbugs in production runs on commercial hardware. H3 integrates the hardware control flow tracing capability provided in recent Intel processors with symbolic constraint analysis. Compared to a state-of-the-art solution, CLAP, this integration allows H3 to reproduce failures with much lower runtime overhead and much more compact trace. Moreover, it allows us to develop a highly effective core-based constraint reduction technique that significantly reduces the complexity of the generated symbolic constraints. H3 has been implemented for C/C++ and evaluated on both popular benchmarks and real-world applications. It reproduces realworld Heisenbugs with overhead ranging between 1.4%23.4%, up to 8X more efficient than CLAP, and incurs only 4.9% runtime overhead on PARSEC benchmarks.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-huang.pdf", "https://parasol.tamu.edu/groups/huangroup/academic/h3.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/huang" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/68a7/5a11482804a8cc3bb6927a31adbd2c28ffc9.pdf", "s2Url": "https://semanticscholar.org/paper/68a75a11482804a8cc3bb6927a31adbd2c28ffc9", "sources": [ "DBLP" ], "title": "Towards Production-Run Heisenbugs Reproduction on Commercial Hardware", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "68ab59d03f1323a519704afc7f1d70060d668d25": { "authors": [ { "ids": [ "35885602" ], "name": "Tahsin Reza" }, { "ids": [ "3215132" ], "name": "Christine Klymko" }, { "ids": [ "1747805" ], "name": "Matei Ripeanu" }, { "ids": [ "1795587" ], "name": "Geoffrey Sanders" }, { "ids": [ "2157171" ], "name": "Roger A. Pearce" } ], "doi": "10.1109/CLUSTER.2017.85", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.85", "entities": [ "Algorithm", "Central processing unit", "Computational complexity theory", "Experiment", "Pattern matching", "Scalability", "Synthetic data" ], "id": "68ab59d03f1323a519704afc7f1d70060d668d25", "inCitations": [ "4ee59a52acd5b5115fede0f466ff5059662f4952" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "1-12", "journalVolume": "", "outCitations": [ "6db8e7dddc951a88cc399c5ffb99ceeeeb870579", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "1dd8db60043f51c04eb7200915ebd253d2fabf64", "9447b4502eed007a117e4ba87278407ca3d7b354", "6cee78f7fef3426824ca5d63a58261ebbbe2e74a", "f3a39750bc525e9a7fb42b130c2ee58f5faa188e", "3c375359b70cf8d96bd586e9cafcd42bd9ef8698", "87507a498558ed6ed23115a42f42376c0884f7f2", "06f75b1b283569baf96f4a65ec7da734b9c840f8", "065066a94860279587ecc7c7caaa65303008940f", "4895aa38cd1d7cc7fa1c1817d57a3aa41f786e21", "dd31b94077f656630348f810607308204d5fe013", "677b78d89b626ddbc8de190f49e07b96f2cb71c1", "09b64bbaeaf557a46c6397830eb09f4318600e34", "28e994b359c9d843c63128a3b54bb5dda7b7b2ac", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "0b2fa2eb6e7a731e08a10bf5766061c61e2a9e9b", "3246e9e2056ff7330c61f5fb368db002b0fbe129", "240ff3ad3f8931f278404ebbd22e13c996b039e0", "30b2dc70b23d7033f58d0307dab0f49d015ae09a", "5d3158674e1a0fedf69299a905151949fb8b01a5", "7d49e994f7feb75efea290f241d9c5122c8b6438", "eb82d3035849cd23578096462ba419b53198a556", "638deeb9efa10f081f74e6c2ee9195716afd2ceb", "039de08a43c1de269652ec991665bca76062a7db", "4066409e187467ae5ee989112b07aa9ce263732a", "1267347de992c524b933040cb96fabb93cba1738", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "d0df7dba44c1d1d287b1fc5abaa34e39dc7d9a53" ], "paperAbstract": "Subgraph pattern matching is fundamental to graph analytics and has wide applications. Unfortunately, high computational complexity limits the robustness guarantees of existing algorithms: they do not scale for modern large graph datasets and/or they have limitations in terms of accuracy or in terms of the intricacy of the patterns supported. We present algorithms, theory, and empirical evidence that iteratively eliminating vertices that do not meet local constraints dramatically reduces the search space for pattern matching in real-world graphs, and demonstrate a scalable implementation of our algorithms. We additionally identify the characteristics of patterns for which every non-eliminated vertex participates in a match. These techniques are an essential step to enable scalable, practical solutions for robust pattern matching in large-scale labeled graphs.We demonstrate the advantages of the proposed approach through strong and weak scaling experiments on massive-scale real-world (up to 257 billion edges) and synthetic (up to 2.2 trillion edges) graphs and at scales (256 compute nodes with 6,144 processors) orders of magnitude larger than those used in the past for similar problems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.85" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68ab59d03f1323a519704afc7f1d70060d668d25", "sources": [ "DBLP" ], "title": "Towards Practical and Robust Labeled Pattern Matching in Trillion-Edge Graphs", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "68acde368151954316e22985e394a591d9bb85ed": { "authors": [ { "ids": [ "7541393" ], "name": "Tobias Wicky" }, { "ids": [ "2880213" ], "name": "Edgar Solomonik" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1109/IPDPS.2017.104", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.104", "entities": [ "Algorithm", "Cholesky decomposition", "Computation", "Linear algebra", "Linear system", "Matrix multiplication", "Numerical analysis", "Numerical linear algebra", "Numerical stability", "Parallel algorithm", "Scalability", "System of linear equations", "Tamper-resistant security module", "Triangular matrix" ], "id": "68acde368151954316e22985e394a591d9bb85ed", "inCitations": [ "41875eaea7cb58024b1bd46f9d9df80d19208e6b" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "678-687", "journalVolume": "", "outCitations": [ "ad5e1e6c5b48f7f2cdafe306fbcac55b0be755f0", "c640098d8fec542f85bab54fd02d8abc4a9b21ea", "bf980d3cc50ae14ce104207882ee1fbbadf7a5f1", "0777845ab14d61d970354acd0a0ca8aaf57f0041", "4ff7a5d31c2524f62662da67a22560867e025456", "a0d3306999eacc7fab93955eb1223eef10312708", "035c542402de661b544603d84b7ec45bada14e7f", "04373d13bedbf3c4276a8b3b86311a1bff99db75", "3adaacec6270c00060bddce342ef9503ce7c648c", "b582d4a005c3288858eb3910e9233edb35323f49", "c83ced20b5ebc150a5eb0769d45dee5bf28207df", "45c3066f6cc0262a5b5bc56124f4d2187a961c42", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "8a269f794c54b62d81ba76d23aaa4bdf12301ec8", "5cfeda94aaa59702e57647045de1488b8258abef", "84ba025c6b28617241274699dccd9e5308fba766", "1aa8ad634d1879af9b5ac34b44ecc3de8debd276", "3e58c1263047ccc126ca0c06dcb150ff9d172512", "b7bb051c2376345f5c5e80f165b15f2f2e68ecc9", "fc00ada92d8b65c4e4599a82b621da5e509c84d0", "6fa09bcca34aae148dbec5cc8aaad283febfe037" ], "paperAbstract": "We present a new parallel algorithm for solving triangular systems with multiple right hand sides (TRSM). TRSM is used extensively in numerical linear algebra computations, both to solve triangular linear systems of equations as well as to compute factorizations with triangular matrices, such as Cholesky, LU, and QR. Our algorithm achieves better theoretical scalability than known alternatives, while maintaining numerical stability, via selective use of triangular matrix inversion. We leverage the fact that triangular inversion and matrix multiplication are more parallelizable than the standard TRSM algorithm. By only inverting triangular blocks along the diagonal of the initial matrix, we generalize the usual way of TRSM computation and the full matrix inversion approach. This flexibility leads to an efficient algorithm for any ratio of the number of right hand sides to the triangular matrix dimension. We provide a detailed communication cost analysis for our algorithm as well as for the recursive triangular matrix inversion. This cost analysis makes it possible to determine optimal block sizes and processor grids a priori. Relative to the best known algorithms for TRSM, our approach can require asymptotically fewer messages, while performing optimal amounts of computation and communication in terms of words sent.", "pdfUrls": [ "https://arxiv.org/pdf/1612.01855v1.pdf", "https://doi.org/10.1109/IPDPS.2017.104", "https://arxiv.org/pdf/1612.01855.pdf", "https://arxiv.org/pdf/1612.01855v2.pdf", "https://htor.inf.ethz.ch/publications/img/wicky-commavoiding-trsm.pdf", "http://arxiv.org/abs/1612.01855" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68acde368151954316e22985e394a591d9bb85ed", "sources": [ "DBLP" ], "title": "Communication-Avoiding Parallel Algorithms for Solving Triangular Systems of Linear Equations", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "68cbf263aa287c1416d8c8485c3b063a38749823": { "authors": [ { "ids": [ "36635262" ], "name": "Avik Chaudhuri" }, { "ids": [ "3072186" ], "name": "Panagiotis Vekris" }, { "ids": [ "33537515" ], "name": "Sam Goldman" }, { "ids": [ "34493238" ], "name": "Marshall Roch" }, { "ids": [ "40305995" ], "name": "Gabriel Levi" } ], "doi": "10.1145/3133872", "doiUrl": "https://doi.org/10.1145/3133872", "entities": [ "Algorithm", "JavaScript", "Parallel computing", "Rewriting", "Software bug", "Source lines of code", "Type inference", "Type system" ], "id": "68cbf263aa287c1416d8c8485c3b063a38749823", "inCitations": [], "journalName": "PACMPL", "journalPages": "48:1-48:30", "journalVolume": "1", "outCitations": [ "5ad2b1480e0b27b79bc1d0ee1b2fbe0e2c8d5143", "5c556951a9c6224208b3f1df2db5276fedfa1ca3", "2ca30d7cf4716fe81097472a37deaed3a33de9da", "1b4df92d7f0d9393103cafbdbc512c52a90296b8", "5d1f975aa06739da885c0b00e3644bd190286da4", "0fe7ae64010750abf0566a88686fb9da5a237d45", "16f82bb641104aea5724195f5c1aadf1eb9acc24", "e0aee51fc161452dbf4fa292150df2ed0f0c9d6f", "90d41056b219e11a128595a5c1f3eae93c8a257d", "30cd035c738768c752f29199482d24ec7e3e45b3", "c03512277e95b7055b2fb13b662916d0ebd74cfc", "355c3c748992e37a9c019b9c5f9f3955ce742d24", "37d52d956d02adc1a14bdafbeff11cac79c53e71", "3f9e652b6029723540bb341ee9c839ac04d6e097", "14ba6aae84e2d6b665d48f4b6d265979e9fa03b3", "2c02bb7a26b028b6f0520e1842343c259c0efb26", "6ed8857cf83e495903278e8c820dffc868c084d1", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "2ce02514639493acb31c193a763b111cb2341486", "1469b0cbb109c2a788a346dd0480070de8334dea", "1fb7601a85ad9ad94e3900f7f1602548093cf15d", "53562627fa7714fa16e0fa4d5f3a41ac2d285309", "0c36460e328643f98c09c7175608f51f1df5ccdf", "25703fead579370f38be2859ca586f215c9f8808", "0a080d15acefcc09e608c03e5b1963831181e7c1", "238e26b46501c50de693a8e18feb922c4edf5c58", "6d05179aebdb6c09ef6e56626774a08a55efb652", "26ac3ad840d8d773eec2ab7fc60d441b34c6adc5", "13f74f762bf4840ed5841b6966eb7e9bab6981a1", "884498b7ddce91751bdc90f6d4b14bf2b77697a2", "9a02cf358ff73843573b164806abdec37ceda9d7", "821459a5ebbb24d0202e11d68db4b63ed6088a0b", "9011d0cf905b419419fa2c22500ecd2e9cee3bb2", "6bfba2348b18f93fbad3151969b21d270327149a", "6b634e8a91431f37ec6885e7beb6c5fad5195d08", "cb4160990391c9069ef08a262468d21171beae91" ], "paperAbstract": "In this paper we present the design and implementation of Flow, a fast and precise type checker for JavaScript that is used by thousands of developers on millions of lines of code at Facebook every day. Flow uses sophisticated type inference to understand common JavaScript idioms precisely. This helps it find non-trivial bugs in code and provide code intelligence to editors without requiring significant rewriting or annotations from the developer. We formalize an important fragment of Flow's analysis and prove its soundness. Furthermore, Flow uses aggressive parallelization and incrementalization to deliver near-instantaneous response times. This helps it avoid introducing any latency in the usual edit-refresh cycle of rapid JavaScript development. We describe the algorithms and systems infrastructure that we built to scale Flow's analysis.", "pdfUrls": [ "http://arxiv.org/abs/1708.08021", "http://doi.acm.org/10.1145/3133872", "https://arxiv.org/pdf/1708.08021v2.pdf", "https://arxiv.org/pdf/1708.08021v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68cbf263aa287c1416d8c8485c3b063a38749823", "sources": [ "DBLP" ], "title": "Fast and precise type checking for JavaScript", "venue": "PACMPL", "year": 2017 }, "68d3ada8bc4fb3de685cb870d9e72853d56b5c7d": { "authors": [ { "ids": [ "1777183" ], "name": "Wenfei Fan" }, { "ids": [ "2724949" ], "name": "Jingbo Xu" }, { "ids": [ "2948541" ], "name": "Yinghui Wu" }, { "ids": [ "37681774" ], "name": "Wenyuan Yu" }, { "ids": [ "2969767" ], "name": "Jiaxin Jiang" }, { "ids": [ "34418171" ], "name": "Zeyu Zheng" }, { "ids": [ "2903546" ], "name": "Bohan Zhang" }, { "ids": [ "1740511" ], "name": "Yang Cao" }, { "ids": [ "1692159" ], "name": "Chao Tian" } ], "doi": "10.1145/3035918.3035942", "doiUrl": "https://doi.org/10.1145/3035918.3035942", "entities": [ "Algorithm", "Automatic parallelization", "Binary space partitioning", "Computation", "Dynamic problem (algorithms)", "GRAPE", "Incremental computing", "List of algorithms", "MapReduce", "Parallel algorithm", "Parallel computing", "Partial evaluation", "Programming model", "Real life", "Sequential algorithm", "Simulation", "Synthetic data" ], "id": "68d3ada8bc4fb3de685cb870d9e72853d56b5c7d", "inCitations": [ "2514dccd3a63999fc10396ee1866062d7cd4be2f", "3cabd75d1fcbc1ffeddad121cee327abf4223d49", "771610413f3654b8e4f38aab4dd970a481c7196f", "0c7b88c4ea95081e99307700d3bf7eb08e790550", "691e4fcf559e9d19ee6354715a8ccdcc4416c47b" ], "journalName": "", "journalPages": "495-510", "journalVolume": "", "outCitations": [ "1e8c283cedbbceb2a56bf962bc0a86fd40f1cea6", "628b470c664be4eaf9ae3f75ecb630d64353bc4c", "c2977a77e6578b3635ad893c481b50b0636735c4", "1156f60e40548096df49528b1342bb3e88b0f378", "3cabd75d1fcbc1ffeddad121cee327abf4223d49", "ff71759a3efa271670c1e7820873df872b4ca3b9", "30d963e87c462606793d229dbdf0786ac38ede6e", "308002cca6afdfd4f751a382357b027dd94d2de4", "0541d5338adc48276b3b8cd3a141d799e2d40150", "6384234e698f793cfba0cbf890b1c2a2209d06b7", "be94c5051438209816397be214ee8c1bdda94165", "2b9e6181502369199bd89691a27f89bdbaac36e4", "0ad8e89091eed09217e66adc98136126addc2619", "87507a498558ed6ed23115a42f42376c0884f7f2", "3726c60552263e648c6856679e672de2e1c110e5", "b9c5678100693e00b59e58f3368f4797b9f11e77", "87b7f6f406259c03b96412bbecde9711a9b8fccf", "80527e7595530951081494d1b98f3f13da3033a2", "322846e81eaf2b2a0783b54efcd9ea1dad767ccb", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "0975baea2e5a34f75c06284ac355af7f2de2499b", "0a3b2d2ddf6e832a0b282380abb9b2eeb7d97177", "f3a39750bc525e9a7fb42b130c2ee58f5faa188e", "569d3fc080c3c2610f28506827d8fa67795524e1", "0e33dd74064b3d7659d9ab6301c21c0480cfda72", "87f931f4d8aad3b71b8261703bbcfa18c1293181", "4ffce047b0189e30e51665f0c8872d05f383a962", "202e33581369f6050fc800ebc31615eb65649e78", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "3793cd493c6b59bdb39593e370a542af84bf8a56", "4066409e187467ae5ee989112b07aa9ce263732a", "9aa88a8a354f1d322e242376d27d0474e50252f8", "35eb3c956949bae34ea041be657920513df1b995", "8fe315a467db4ee7f93d51ef6e32b8213189ed5d", "75e217284d18901ce8b1fc4a389d3c1152b544fb", "159efe23527149666be1b2b1c08853d74d413c1b", "01ce82e98be37424e0456e2653d1ec8a0938b018", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "3bb6d5834bfb355553588e382ac5f9fa8a8d831d", "38f0f2ddccad42c482066fecb4f0440516020241", "c827d2267640a7a913250fa5046a16ff078a5ce4" ], "paperAbstract": "This paper presents GRAPE, a parallel system for graph computations. GRAPE differs from prior systems in its ability to parallelize existing sequential graph algorithms as a whole. Underlying GRAPE are a simple programming model and a principled approach, based on partial evaluation and incremental computation. We show that sequential graph algorithms can be \"plugged into\" GRAPE with minor changes, and get parallelized. As long as the sequential algorithms are correct, their GRAPE parallelization guarantees to terminate with correct answers under a monotonic condition. Moreover, we show that algorithms in MapReduce, BSP and PRAM can be optimally simulated on GRAPE. In addition to the ease of programming, we experimentally verify that GRAPE achieves comparable performance to the state-of-the-art graph systems, using real-life and synthetic graphs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035942", "https://people.csail.mit.edu/jshun/6886-s18/papers/GRAPE.pdf", "http://eecs.wsu.edu/~yinghui/mat/papers/Parallel%20Sequential%20Graph%20Computations.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68d3ada8bc4fb3de685cb870d9e72853d56b5c7d", "sources": [ "DBLP" ], "title": "Parallelizing Sequential Graph Computations", "venue": "SIGMOD Conference", "year": 2017 }, "68dcc38e3342bd1611dd3248a3668c6aefa597d3": { "authors": [ { "ids": [ "39224389" ], "name": "Anand Padmanabha Iyer" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3127479.3132254", "doiUrl": "https://doi.org/10.1145/3127479.3132254", "entities": [ "Attribute\u2013value pair", "Big data", "Data store", "Download", "Ecosystem", "Internet", "Key-value database", "Load balancing (computing)", "Open-source software", "Scalability", "Scale-invariant feature transform", "Sensor", "Source data", "Spatial analysis", "Spatial database" ], "id": "68dcc38e3342bd1611dd3248a3668c6aefa597d3", "inCitations": [], "journalName": "", "journalPages": "548-560", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "29ae100d58ee8c0e6cad3a75402a9f5adf20a335", "5174a1e57243013d90041ed9b559fddfd3248dbc", "7e64142f7e6a881acd5020be463033901561a707", "06ebf28a265cc28033ec76c2c6f35d2db69941ea", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "18a5f443299784479e78d9e77f175af57cb2fa2b", "1594118f2696b573f08510cf837f3b37db87face", "3383c6c042489e729317bac4a76fc58b9c28e811", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "adf1a2d2afe691901348da0fcb1b441466591604", "ab6103b4103947f2a48fdafd69c11a8e2fd4f697", "d543d1f003a0206e59e70e91c3d0217069bb913c", "0401a8c1feeb489f3fa011fe50e00e91a8fd7903", "8b1ab03234e0cf6bae234c1e602517f706d45f3f", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930", "e4617874f90b6968fee870d1c67f80e1e78611a6", "03416be8097852a54dd3e309434e5a0806824646", "bfc464efbe615805d386ef5c882e4d0f97071ec2", "0776c3fb6a8beeeda5d0fb5174f9c43506ecc339", "3ac6ccf7d9ba5fb95685a45b2d176c30db23d6fe", "68b95c71e923cdc986b084fd4a6ab70162d8e654", "98012349c0f97c924ca6457d99777232f4fb5e93", "17f417dd13749c9c39d06b176fca43faf3808a8f", "0127e0b604ffe00aa9ee871d9ede2a5b9ec8fdcc", "6fca390fc65c0c46d1f5de81e3f6a4890af5dcc4", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "2d22229794c2bd70c5bd1b1e4f004eb5864627a9" ], "paperAbstract": "The increasing interest in the Internet-of-Things (IoT) suggests that a new source of big data is imminent---the machines and sensors in the IoT ecosystem. The fundamental characteristic of the data produced by these sources is that they are inherently geospatial in nature. In addition, they exhibit unprecedented and unpredictable skews. Thus, big data systems designed for IoT applications must be able to efficiently ingest, index and query spatial data having heavy and unpredictable skews. Spatial indexing is well explored area of research in literature, but little attention has been given to the topic of efficient distributed spatial indexing.\n In this paper, we propose Sift, a distributed spatial index and its implementation. Unlike systems that depend on load balancing mechanisms that kick-in post ingestion, Sift tries to distribute the incoming data along the distributed structure at indexing time and thus incurs minimal rebalancing overhead. Sift depends only on an underlying key-value store, hence is implementable in many existing big data stores. Our evaluations of Sift on a popular open source data store show promising results---Sift achieves up to 8× reduction in indexing overhead while simultaneously reducing the query latency and index size by over 2× and 3× respectively, in a distributed environment compared to the state-of-the-art.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132254" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/68dcc38e3342bd1611dd3248a3668c6aefa597d3", "sources": [ "DBLP" ], "title": "A scalable distributed spatial index for the internet-of-things", "venue": "SoCC", "year": 2017 }, "6901aa1b377508980c1ac035da31e6c8ca38973a": { "authors": [ { "ids": [ "35067898" ], "name": "Sepehr Assadi" }, { "ids": [ "1740426" ], "name": "Sanjeev Khanna" } ], "doi": "10.1145/3087556.3087581", "doiUrl": "https://doi.org/10.1145/3087556.3087581", "entities": [ "Algorithm", "Approximation algorithm", "Communications protocol", "Computation", "Graph embedding", "Graph theory", "MapReduce", "Matching (graph theory)", "Randomized algorithm", "Randomness", "Scalability", "Vertex cover" ], "id": "6901aa1b377508980c1ac035da31e6c8ca38973a", "inCitations": [ "fb5467963174490c7650e212a90bd17f5a88e61e", "7e0695d65ad3aedaa30bb7aaf28edc432ac711e7", "6dce6f2503bffa3534a79ad2a40d97348a1f1df1", "297dbe1090536a25b80a2b6d9f113dba90c1da5a", "99cb0285c507acc5d8444b3650f56508ad23aa51", "7df5efc8036f4c4a281346ca929cc81db39a091e", "f17c846b4e44207a670993887791f41c42c1120e" ], "journalName": "", "journalPages": "3-12", "journalVolume": "", "outCitations": [ "ce55a4c31e4c08ea540b1e4059e1e531af0aa40d", "7adb8a46e782bb256a59dbd6679819dd70a6620f", "30fc67dfcc25ab3ce1642cb3b4f114940414dee8", "52d953929fa9a8557552cd642b3f35e2ced3d077", "890eb782930be126b1697929de16ebae5dfd20c4", "38a65bb82801138bef74b8928c6f3d83f719f241", "b38270ca237de4740517d2f34b0f16e56826077f", "9bd101ad8faba5ff0ff1f625be773ce0acb697fc", "21792fab7219b556fd216f4ddde6470a2d513e0d", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "32a4545b14cbd94a257ad25e318eb13fab13d5bf", "5be4a65faa4e7fe077ad13f4b0cdbbe68222c49c", "0cf322a5c478d15c2b4899cbdb88656ddffc845b", "88f12ba7a98b145d5122a19511ca99ce64681453", "6b5d280fe46317632725a3930f91e5a23cb9b5e8", "49fa147421d34edf6c0603461820ae4e8f193546", "0df3980457291b7425e37eb686a6fd7b3eb94abe", "31f6546bc55295d10692316f7b44c03a488432ae", "03e66d2e5f428454037f865400691652941ba9f1", "1fd6bacb90edf0f951e89a54bd54294db821c268", "53b91a45f636b787f255c159196a3971ad7adccb", "925c8e61e8c3732af589b6ec590b721fdd491fb6", "9caea8b49997b1f285588361e458f6b2d9e3df37", "80de3c3c745a3214f6de40d1a514236bd4028b39", "aa6ad058dffedcaa0b614b23a7508562a4652855", "1ccf73f5c883a261aa6d8f0fdbec5c0477cc647c", "0285fff215bd498b5cdc63c565eea8b910419ff3", "bf8998d76741f3ee7b4ba1f82524353e7083c3b5", "261b545e2045dccaf4c8edd1f542dd9530e2c78a", "3e31ab485ed989e8ffa198b71623544f22f6097f", "4f65c02184db98cda485201dd5a99168e300075c", "113816aa1c7cd8aeb9204d2e8a99f97932d8887b", "54ebbd984587d4537cdb42b20c467303e107e5f7", "159efe23527149666be1b2b1c08853d74d413c1b", "6decda57f8fbac5eaac7d997dc7dd6eed40ff94c", "632c6bfaa258aae7958bd192b2a00db8a33cc516", "3d25eb8241345f86101fda145d95d89c27844fd1", "1f1c21173020834c1786de98d545df5c77c0e7f7", "5236121265ad38ac79f964db1bb8536b8c5ca888", "b29b445de9b5f07640084bffa971d649c8dde7be", "6d248d20660602f34b87b2e9a597dbc3be06cd3a", "9053b4305403b655b9cf13d46a3a5f9684198a92", "0b84b757ba7161df8651b0c49f94d7642c610210", "107344a6a049d629465e9d38f00222872043a2b1", "e8606d1641a25a31c93ae320ed9b0cf84d96f2d0", "c522a12feebcc3793e1c40ccbf9c1d61089d2222", "e89614d46dead70da5a7dc96d8adec52b4ffdc1a", "6094392d07d36c086a988493686b73ebca39169b", "379ef18377d803d87859314c0e110cdf64f2ea73", "02f7b61d3d557da6de3c26178530179492e8f574", "45381d4c4ea253bd307758a6258dbf9375b12bd6", "04311b15b444a0f75ea2bb74fca26cc1aefbf3c1", "052af1757c410fa8b65bf95339c6e4142d723d61", "3d20263ba27604da74fb59465ede07fcac77b8df", "1eb5fce431067ab19a44a7962dfef28ef7127ab4", "c9bb3728b1e2afe09def0733caffcb99a68baef3", "11a80ec20ea9722d1dcec01bc797a54be3e0410d", "134dd16ee1c683561b1dacfc60c46ef1375500d0" ], "paperAbstract": "A common approach for designing scalable algorithms for massive data sets is to distribute the computation across, say k, machines and process the data using limited communication between them. A particularly appealing framework here is the simultaneous communication model whereby each machine constructs a small representative summary of its own data and one obtains an approximate/exact solution from the union of the representative summaries. If the representative summaries needed for a problem are small, then this results in a communication-efficient and \\emph{round-optimal} (requiring essentially no interaction between the machines) protocol. Some well-known examples of techniques for creating summaries include sampling, linear sketching, and composable coresets. These techniques have been successfully used to design communication efficient solutions for many fundamental graph problems. However, two prominent problems are notably absent from the list of successes, namely, the maximum matching problem and the minimum vertex cover problem. Indeed, it was shown recently that for both these problems, even achieving a modest approximation factor of \\polylog{(n)} requires using representative summaries of size \\widetilde{\\Omega}(n^2) i.e. essentially no better summary exists than each machine simply sending its entire input graph.\n The main insight of our work is that the intractability of matching and vertex cover in the simultaneous communication model is inherently connected to an adversarial partitioning of the underlying graph across machines. We show that when the underlying graph is randomly partitioned across machines, both these problems admit \\emph{randomized composable coresets} of size \\widetilde{O}(n) that yield an \\widetilde{O}(1)-approximate solution\\footnote{Here and throughout the paper, we use \\Ot(\\cdot) notation to suppress \\polylog{(n)} factors, where n is the number of vertices in the graph. In other words, a small subgraph of the input graph at each machine can be identified as its representative summary and the final answer then is obtained by simply running any maximum matching or minimum vertex cover algorithm on these combined subgraphs. This results in an Õ(1)-approximation simultaneous protocol for these problems with Õ(nk) total communication when the input is randomly partitioned across k machines. We also prove our results are optimal in a very strong sense: we not only rule out existence of smaller randomized composable coresets for these problems but in fact show that our \\Ot(nk) bound for total communication is optimal for em any simultaneous communication protocol (i.e. not only for randomized coresets) for these two problems. Finally, by a standard application of composable coresets, our results also imply MapReduce algorithms with the same approximation guarantee in one or two rounds of communication, improving the previous best known round complexity for these problems.", "pdfUrls": [ "http://arxiv.org/abs/1705.08242", "https://arxiv.org/pdf/1705.08242v1.pdf", "http://www.seas.upenn.edu/~sassadi/stuff/papers/randomized-coreset_matching-vc.pdf", "http://doi.acm.org/10.1145/3087556.3087581", "http://www.cis.upenn.edu/~sanjeev/papers/spaa17_randomized_coresets.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6901aa1b377508980c1ac035da31e6c8ca38973a", "sources": [ "DBLP" ], "title": "Randomized Composable Coresets for Matching and Vertex Cover", "venue": "SPAA", "year": 2017 }, "699d82aa82acad84ac326549e91c4b586625cad6": { "authors": [ { "ids": [ "39763339" ], "name": "Yi Dai" }, { "ids": [ "7200817" ], "name": "Kefei Wang" }, { "ids": [ "1743393" ], "name": "Gang Qu" }, { "ids": [ "1771507" ], "name": "Liquan Xiao" }, { "ids": [ "39891717" ], "name": "Dezun Dong" }, { "ids": [ "3204113" ], "name": "Xingyun Qi" } ], "doi": "10.1109/IPDPS.2017.15", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.15", "entities": [ "Application-specific integrated circuit", "Computer", "Crossbar switch", "Interconnection", "Microarchitecture", "Place and route", "Placement (EDA)", "Router (computing)", "Routing", "Scalability", "Simulation", "Speedup", "Supercomputer", "Throughput" ], "id": "699d82aa82acad84ac326549e91c4b586625cad6", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "429-438", "journalVolume": "", "outCitations": [ "f7a6e6822acdfbf496236e14c27874df07a44501", "528628b4d20e6a98130ac12083a8c879aa31c7ad", "194a6fc5de629e4f55c00d0720b8279ac8b494de", "54ade7ae16c9495b238d260de2de79c1a588453f", "1afc2cc0b9ac3140693302704ee44ce1054b6325", "66e0aa17f60779815d5eb35e68d545ae2dc351c3", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "19b304df6f13798a0745eeaf8f4573b202a43e5f", "0165efbba4d2c63225cbcb0e217270f2a79b37a7", "981587ba79028bc8770ac6f745ff90baee340fb7", "15f1312866a40e516f0e7f128864013ef6eb2df8" ], "paperAbstract": "High-radix routers with low latency and high bandwidth play an increasingly important role in the design of large-scale interconnection networks such as those used in super-computers and datacenters. The tile-based crossbar approach partitions a single large crossbar into many small tiles and can considerably reduce the complexity of arbitration while providing throughput higher than the conventional switch implementation. However, it is not scalable due to power consumption, placement, and routing problems. In this paper, we propose a truly scalable router microarchitecture called Multiport Binding Tile-based Router (MBTR). By aggregating multiple physical ports into a single tile a high-radix router can be flexibly organized into a different array of tiles, thus the number of tiles and hardware overhead can be considerably reduced. Compared with a hierarchical crossbar, MBTR achieves up to 50%∼75% reduction in memory consumption as well as wire area. Simulation results demonstrate MBTR is indistinguishable from the YARC router in terms of throughput and delay, and can even outperform it by reducing potential contention for output ports. We have fabricated an ASIC MBTR chip with 28nm technology. Internally, it runs at 700MHz and 30ns latency without any speedup. We also discuss how the microarchitecture parameters of MBTR can be adjusted based on the power, area, and design complexity constraints of the arbitration logic.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/699d82aa82acad84ac326549e91c4b586625cad6", "sources": [ "DBLP" ], "title": "A Scalable and Resilient Microarchitecture Based on Multiport Binding for High-Radix Router Design", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "69a56cdc940e53ad5953ad62d14c195950dbbc80": { "authors": [ { "ids": [ "10388641" ], "name": "Raphael R. Campos" }, { "ids": [ "39231033" ], "name": "S\u00e9rgio D. Canuto" }, { "ids": [ "2508452" ], "name": "Thiago Salles" }, { "ids": [ "3430183" ], "name": "Clebson C. A. de S\u00e1" }, { "ids": [ "1686355" ], "name": "Marcos Andr\u00e9 Gon\u00e7alves" } ], "doi": "10.1145/3077136.3080815", "doiUrl": "https://doi.org/10.1145/3077136.3080815", "entities": [ "Experiment", "Focus stacking", "Out-of-bag error", "Parallel computing", "Radio frequency", "Random forest", "Randomized algorithm", "Speedup" ], "id": "69a56cdc940e53ad5953ad62d14c195950dbbc80", "inCitations": [], "journalName": "", "journalPages": "105-114", "journalVolume": "", "outCitations": [ "52c004bc6f75ff0ec3121678f422ed1a2fe61efe", "8a122a280e3d3ffa23bad6ada90327893faeaa70", "24cd014beac97708f5acdc4355ce52dd40bd57e8", "ad52dac8f267c8c75f30ac5b0c6c6bc980217285", "509bf8d7ac8ff4334c5aee5771183da0fd9c7a42", "b37ac371f0cba3744ee2cd9c67ba84f29dd74db9", "f8cfbe539781c7e856ab632a2be46a0218820134", "1d0e9193e4becfbe87e5ba731218cd8ee43a97f2", "5bc8e8631fda4f09c43d1dd68b1cb284d17a7c5c", "fc134ba45bdbbf8f4448e2cdf19819be099e18b7", "646994e3a01c2c9f40e8c5b8a77af7a59c5e1558", "6276fe6a5c90c9d761192fed90777120485b1893", "e845fbb957d739fd9788711710147cf0679d498d", "1cc5e31266aa9d10a19ac0fcb575ef0717b524c4", "20b95661a93a6c91f52925237c97c833078054ce", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "4aec57df7a2796e064992d6ff21b26a4d01210b1", "102ed1e9b785caec1cb69c043dbda7b2cfa2d57d" ], "paperAbstract": "Random Forest (RF) is one of the most successful strategies for automated classification tasks. Motivated by the RF success, recently proposed RF-based classification approaches leverage the central RF idea of aggregating a large number of low-correlated trees, which are inherently parallelizable and provide exceptional generalization capabilities. In this context, this work brings several new contributions to this line of research. First, we propose a new RF-based strategy (BERT) that applies the boosting technique in bags of extremely randomized trees. Second, we empirically demonstrate that this new strategy, as well as the recently proposed BROOF and LazyNN_RF classifiers do complement each other, motivating us to stack them to produce an even more effective classifier. Up to our knowledge, this is the first strategy to effectively combine the three main ensemble strategies: stacking, bagging (the cornerstone of RFs) and boosting. Finally, we exploit the efficient and unbiased stacking strategy based on out-of-bag (OOB) samples to considerably speedup the very costly training process of the stacking procedure. Our experiments in several datasets covering two high-dimensional and noisy domains of topic and sentiment classification provide strong evidence in favor of the benefits of our RF-based solutions. We show that BERT is among the top performers in the vast majority of analyzed cases, while retaining the unique benefits of RF classifiers (explainability, parallelization, easiness of parameterization). We also show that stacking only the recently proposed RF-based classifiers and BERT using our OOB-based strategy is not only significantly faster than recently proposed stacking strategies (up to six times) but also much more effective, with gains up to 21% and 17% on MacroF1 and MicroF1, respectively, over the best base method, and of 5% and 6% over a stacking of traditional methods, performing no worse than a complete stacking of methods at a much lower computational effort.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080815" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/69a56cdc940e53ad5953ad62d14c195950dbbc80", "sources": [ "DBLP" ], "title": "Stacking Bagged and Boosted Forests for Effective Automated Classification", "venue": "SIGIR", "year": 2017 }, "69cfe330051480618129ed4449112c2ee1fb851f": { "authors": [ { "ids": [ "37824333" ], "name": "Syed M. A. H. Jafri" }, { "ids": [ "9113752" ], "name": "Ahmed Hemani" }, { "ids": [ "1750558" ], "name": "Kolin Paul" }, { "ids": [ "2422864" ], "name": "Naeem Abbas" } ], "doi": "10.1109/IPDPS.2017.59", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.59", "entities": [ "Algorithm", "Artificial neural network", "Computation", "Convolutional neural network", "Embedded system", "Hardware acceleration", "Locality of reference", "Machine learning", "Memory footprint", "Neural Networks", "Parallel computing", "Requirement", "Throughput", "Tiling window manager" ], "id": "69cfe330051480618129ed4449112c2ee1fb851f", "inCitations": [ "4805fa6c66509692ca51434463641ba5d7527341" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "276-286", "journalVolume": "", "outCitations": [ "244c94a54a44ac0b31a2c772d918f66f596fd7ba", "0af203b0112a8564c730a596fe5cf35556537e2e", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "2ffc74bec88d8762a613256589891ff323123e99", "0934508c768ff8ba9744678ad92e51dfdbd5f122", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "162d958ff885f1462aeda91cd72582323fd6a1f4", "2d051998f49630013af34ad478de7fa48c5b877d", "0ae151b91793ad57a04b9962a0ea235d21e5e6b0", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "0b99d677883883584d9a328f6f2d54738363997a", "e25f086bfa283498b30821be22f4027b4d9fb447", "309ad0357af7722a24192781340881390055a3db", "c8c266327db328c5489ed6d769f619b5bf09a9de", "233b1774f28c9972df2dfcf20dfbb0df45792bd0", "008154be54eefe4734b454c2841ac66877ac8db5", "32c3d778d8cce464b3ad3de277666295f3a0b02a", "2bc37c57da973aa53c41e9ebb6e1407cd8e3e13b", "772d2ad4dff3ab14f930d05bfb0f2e1993a67e74", "37cd2c2f4e208077cfc3a9a4db6365227cf8b326", "15b275f0421c606f5903532e9964b140cbb2f878", "a24ce3e17c36398759566dd4e3bfd0c923cca77d", "8234d8590e9a053446142c1c4565ed4e4acb78be", "06ce77e4abea63948580340be25d7f2a80369e5a", "2aecacec07de7e636a49511bfa3d7fe89b0e4d99", "04fa47f1d3983bacfea1e3c838cf868f9b73dc58", "bcb288389d4318494887fe20ee68b6b18f39a3a5", "baea5b38ef79158a6f942497b06443ae24f15331", "ce1cd2de3f0f257e7590a42be8fdef5eeb3af2bc", "4ca573dfa4f917417321d7523175a4382646c58f", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "583aab8fc9a3b271843ef1bd020edfd376cc1495", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "23e19cc9d2318b07eeaf8a9d34245131eb1a58be" ], "paperAbstract": "Today, machine learning based on neural networks has become mainstream, in many application domains. A small subset of machine learning algorithms, called Convolutional Neural Networks (CNN), are considered as state-ofthe- art for many applications (e.g. video/audio classification). The main challenge in implementing the CNNs, in embedded systems, is their large computation, memory, and bandwidth requirements. To meet these demands, dedicated hardware accelerators have been proposed. Since memory is the major cost in CNNs, recent accelerators focus on reducing the memory accesses. In particular, they exploit data locality using either tiling, layer merging or intra/inter feature map parallelism to reduce the memory footprint. However, they lack the flexibility to interleave or cascade these optimizations. Moreover, most of the existing accelerators do not exploit compression that can simultaneously reduce memory requirements, increase the throughput, and enhance the energy efficiency. To tackle these limitations, we present a flexible accelerator called MOCHA. MOCHA has three features that differentiate it from the state-of-the-art: (i) the ability to compress input/ kernels, (ii) the flexibility to interleave various optimizations, and (iii) intelligence to automatically interleave and cascade the optimizations, depending on the dimension of a specific CNN layer and available resources. Post layout Synthesis results reveal that MOCHA provides up to 63% higher energy efficiency, up to 42% higher throughput, and up to 30% less storage, compared to the next best accelerator, at the cost of 26-35% additional area.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.59" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/69cfe330051480618129ed4449112c2ee1fb851f", "sources": [ "DBLP" ], "title": "MOCHA: Morphable Locality and Compression Aware Architecture for Convolutional Neural Networks", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "69dc2dc68160c59b01822876d1c2e2657a79453a": { "authors": [ { "ids": [ "4698435" ], "name": "Anton Burtsev" }, { "ids": [ "1729756" ], "name": "David Johnson" }, { "ids": [ "37157703" ], "name": "Josh Kunz" }, { "ids": [ "1769498" ], "name": "Eric Eide" }, { "ids": [ "2358499" ], "name": "Jacobus E. van der Merwe" } ], "doi": "10.1145/3127479.3131209", "doiUrl": "https://doi.org/10.1145/3127479.3131209", "entities": [ "Cloud computing", "Network architecture", "Principle of least privilege", "Software-defined networking", "Substrate (electronics)" ], "id": "69dc2dc68160c59b01822876d1c2e2657a79453a", "inCitations": [ "3f99bb743fa9576f8da7d168f3858dd0acf35e79" ], "journalName": "", "journalPages": "128-141", "journalVolume": "", "outCitations": [ "2b52fa44fc607717b5ceca2069d233baa29e95f9", "64f3a81fff495ac336dccdd63136d451852eb1c9", "043dfe85197bbe64f9929ca620c8315f9c98ee7b", "24c2ed6a87dc09f12e34a7a1d3a1595d7dc31c6e", "2cc08b9f07a889a7c035df438ca99d0ad8c97aa5", "35adc59cbb6eb28fc9c4c810839944b12f3f8c5c", "31d85e0d248024a0b34ffe74d0a720a39af54c88", "0706225eeac0f855b19c365313db61252ecde0d7", "65f7060f75db0b8a0843b65525438599281934f2", "07529c6cac9427efab237cd20f7aa54237e74511", "95a8bfacb4b659f3e4f373dd76a0b0c089cd56ca", "055598e1e221d0758c7d83e311516b44beeb7ced", "f7b2556004f7261481d38a5071387b7f6ab91bb5", "4fd92c8a4094ca4967b00181c29649ebdda47562", "0579a80af74c6c21fcf4648227d4622633930f64", "411aec09ba1b7a2c6939070e46d0b90ee6d7ee0f", "b84322d9f2eefe24c03832b9a701d344f58b869c", "4c53c5d778150c4c734bce4be9844b8d31a9cbd4", "0db9636ace0830b8b5e86b031a7a86d621446bd9", "9b5869da492ca33db7a8ec17776ecdb4dac8d288", "d917a006f1264c20bfdcd6b053835f8686f15afd", "2859be5fbe5b731a7997a86e33a55dc68cd94faa", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "1f639f5ba1bbc808cc1b0562fc46943496c9585a", "1b54f6f1d93b0a409a6c58e8445a471be9c80603", "9a53abcd90ce847ba776bc933b19f77e698b020a", "9f74be4d6f157b3bb057543b13f0092f6e3f52be", "21678be84430f56942cf5172c281b1861b9ac7a0", "8658aa6a59061d9b4bb2f580ef4f6964741345b7", "09bbd2945bc577d0832c2146fa161c5d90c4e52b", "3e651a701c9d814616a67acb4f72532ef976a0b3" ], "paperAbstract": "We present CapNet, a capability-based network architecture designed to enable least authority and secure collaboration in the cloud. CapNet allows fine-grained management of rights, recursive delegation, hierarchical policies, and least privilege. To enable secure collaboration, CapNet extends a classical capability model with support for decentralized authority. We implement CapNet in the substrate of a software-defined network, integrate it with the OpenStack cloud, and develop protocols enabling secure multi-party collaboration.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131209", "http://www.flux.utah.edu/download?uid=254", "https://cross.ucsc.edu/wp-content/uploads/2017/09/cross17-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/69dc2dc68160c59b01822876d1c2e2657a79453a", "sources": [ "DBLP" ], "title": "CapNet: security and least authority in a capability-enabled cloud", "venue": "SoCC", "year": 2017 }, "69e964b8e42a27997daf60529844698121a8f3e6": { "authors": [ { "ids": [ "31833586" ], "name": "Hanna Alam" }, { "ids": [ "1747773" ], "name": "Tianhao Zhang" }, { "ids": [ "2928845" ], "name": "Mattan Erez" }, { "ids": [ "2913809" ], "name": "Yoav Etsion" } ], "doi": "10.1145/3079856.3080209", "doiUrl": "https://doi.org/10.1145/3079856.3080209", "entities": [ "Dynamic video memory technology", "Operating system", "Stock and flow" ], "id": "69e964b8e42a27997daf60529844698121a8f3e6", "inCitations": [ "a4710ac80826e48a410b1b9da80c2ca0f4a6a357" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "457-468", "journalVolume": "", "outCitations": [ "add1023b52ef4fe76a0dfdfe0916256348624da0", "e8b55466fd0563ad80e0534dc2ddf709b7f54dd8", "35dc57e8c8f78956c740135a4f45ad1a00124bc1", "33196b69eeec351efd5178eae5da92979bdc6fd7", "024e39f4185e48a0a692663a0f26dc323de47fed", "1b6262f0533c202c1f140e60053ee3c72f216687", "05a55820da0430f3b7e68f54bcb2cb6427c8cf28", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "671958087f3c24e7b025019476be8918302270e2", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "6ff096588c6f8f673e83fcc2639a8cc6f450c50a", "1ef9766f75c3c77ccc11f526db7f4894210c0391", "a725204b6d9981f818a88b68ac7498a6261f7dea", "60006af1bbd5355a8784f0c4bbb1aafba2750d9e", "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "2a59eb5eacb88eb893a31fc8bdee2c4385e22d7a", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "a70c72d011676d58fea4a652d9bd93f915ab26d8", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "533d720a8542b707c316d39cf5beeb58738af86d", "1599aa8cd110dc063ccd48d0de3af770a8780c88", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "8150bce0f1961ee5d1f40daa3e6edcb81f5439ba", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "c5a00bd4aca85ea79ee05326ec34efe5cda92510", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "102fd9c66b2a5f71a4a3890bdb48a813d0650eaf", "daeff61502115efc4b9ee81607a8e5489215ea88", "71a2d8c473f13d0c664f751db97e81128281b1eb", "4a66a0c4137dfdfbb618b1c2d199032f6d719a9c", "343a384d5476ead9496f96559aba5ad09e95e01e", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9" ], "paperAbstract": "In this paper, we introduce the Do-It-Yourself virtual memory translation (DVMT) architecture as a flexible complement for current hardware-fixed translation flows. DVMT decouples the virtual-to-physical mapping process from the access permissions, giving applications freedom in choosing mapping schemes, while maintaining security within the operating system. Furthermore, DVMT is designed to support virtualized environments, as a means to collapse the costly, hardware-assisted two-dimensional translations. We describe the architecture in detail and demonstrate its effectiveness by evaluating several different DVMT schemes on a range of virtualized applications with a model based on measurements from a commercial system. We show that different DVMT configurations preserve the native performance, while achieving speedups of 1.2x to 2.0x in virtualized environments.", "pdfUrls": [ "https://lph.ece.utexas.edu/merez/uploads/MattanErez/isca2017_dvmt.pdf", "http://yoav.net.technion.ac.il/files/2017/07/DVMT.pdf", "http://doi.acm.org/10.1145/3079856.3080209" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/69e964b8e42a27997daf60529844698121a8f3e6", "sources": [ "DBLP" ], "title": "Do-it-yourself virtual memory translation", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "69f9a690eee4081b07244c251328e7d1ac19d4c7": { "authors": [ { "ids": [ "2932353" ], "name": "Beverly A. Sanders" }, { "ids": [ "7301435" ], "name": "Jason N. Byrd" }, { "ids": [ "2712027" ], "name": "Nakul Jindal" }, { "ids": [ "34851048" ], "name": "Victor F. Lotrich" }, { "ids": [ "31938628" ], "name": "Dmitry Lyakh" }, { "ids": [ "36824435" ], "name": "Ajith Perera" }, { "ids": [ "2774852" ], "name": "Rodney J. Bartlett" } ], "doi": "10.1109/IPDPS.2017.108", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.108", "entities": [ "Computational chemistry", "Defense Distributed", "Electronic structure", "Parallel computing", "Perturbation theory", "Runtime system", "Scalability", "Sparse matrix" ], "id": "69f9a690eee4081b07244c251328e7d1ac19d4c7", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "555-564", "journalVolume": "", "outCitations": [ "e7f981b829c2febd381a7e90b8b8c85a5f5c4777", "2706db42926e0e58e35336331f6d3b62f0811cf5", "68d80dacf66e2ea556bcdbb4f4792efeeda2122f", "2356b24bae9cd3fc3b42686b129d6e1fb5cc48d9", "a0f60261be52d1467797e8586aadea1986d4044f", "ed632cc68d6087210cdeb6a00317bc5032544e41", "19df5d05b6da98f99619ee4584c5177bd02c8a2a", "a1dd203c6159a1eddaa2d7cc104d4c06a7ffffa4", "0c1ef9519c3dcd4a309650bc24e5d5f906e369ed", "a3bec8c3dd2ac915675db13dd1d64f53588e7aca", "9eaf2d5af34f9a284902d9fb0c1f8b187b2bc3fc", "a814615952a917dcb0b7fb5e22285b62fbe3d6cc" ], "paperAbstract": "Aces4 is a parallel programming platform comprising a DSL for Computational Chemistry and its runtime system. It offers a convenient way to express parallelism together with extensive support for extremely large, possibly sparse, distributed arrays. It aids scientists in the creation of performant, scalable, massively parallel programs that can effectively take advantage of leadership class computing systems to address important scientific questions. Aces4 has enabled the development and implementation of new methods in electronic structure theory which are breaking new ground in their ability to perform highly accurate calculations on ever larger molecular systems. In this paper the design of Aces4, which is based on the the Super Instruction Architecture approach, is described. Experimental scaling results for Molecular Cluster Perturbation Theory, a new method enabled by Aces4, and CCSD, a widely used computational chemistry method are given.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.108" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/69f9a690eee4081b07244c251328e7d1ac19d4c7", "sources": [ "DBLP" ], "title": "Aces4: A Platform for Computational Chemistry Calculations with Extremely Large Block-Sparse Arrays", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "6a3a3c263e0a985a3cd127799ffe101bfd7da943": { "authors": [ { "ids": [ "2061882" ], "name": "Brown Farinholt" }, { "ids": [ "1853966" ], "name": "Mohammad Rezaeirad" }, { "ids": [ "2070759" ], "name": "Paul Pearce" }, { "ids": [ "2811653" ], "name": "Hitesh Dharmdasani" }, { "ids": [ "19192251" ], "name": "Haikuo Yin" }, { "ids": [ "35011666" ], "name": "Stevens Le Blond" }, { "ids": [ "1703426" ], "name": "Damon McCoy" }, { "ids": [ "1763395" ], "name": "Kirill Levchenko" } ], "doi": "10.1109/SP.2017.48", "doiUrl": "https://doi.org/10.1109/SP.2017.48", "entities": [ "Botnet", "Credential", "Experiment", "Honeypot (computing)", "Malware", "Remote Desktop Protocol", "Remote desktop software" ], "id": "6a3a3c263e0a985a3cd127799ffe101bfd7da943", "inCitations": [ "3509c5617d848ef49113b4adbc7f796ced41c907", "ab3c5dd63e8f89a1a949d9bb4c5d50f8b6d29a19", "9eb432aaaac8a368c7c4464d984ea3e70877657f" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "770-787", "journalVolume": "", "outCitations": [ "34c2b718869354a588ee602f41be77e553346c5d", "8a32009be8ae5c3f61de634256371fe07eab03bd", "11efa6998c2cfd3de59cf0ec0321a9e17418915d", "7aa450d7e2b43175590a1ee2c94f5342152cfc56", "2f48f56e14f105441433ef15e47a3d2af8affa3e", "d201cffd07b3e4a961e80be5bc95c36ce770f538", "2cfbb7b89a5e220b21bbf64161dc880c1b644017", "07cbb544b23a4f0e914863cb17afa4eafd9f59a8", "05ac3c0dfd582901ab08a708d10eb50909cd614f", "1b234ab74e1488d29a26f3cd14bbb8424880f95b", "0eff95f6fd369d8f479b895c9e5e5c609d46efa1" ], "paperAbstract": "Remote Access Trojans (RATs) give remote attackers interactive control over a compromised machine. Unlike large-scale malware such as botnets, a RAT is controlled individually by a human operator interacting with the compromised machine remotely. The versatility of RATs makes them attractive to actors of all levels of sophistication: they've been used for espionage, information theft, voyeurism and extortion. Despite their increasing use, there are still major gaps in our understanding of RATs and their operators, including motives, intentions, procedures, and weak points where defenses might be most effective. In this work we study the use of DarkComet, a popular commercial RAT. We collected 19,109 samples of DarkComet malware found in the wild, and in the course of two, several-week-long experiments, ran as many samples as possible in our honeypot environment. By monitoring a sample's behavior in our system, we are able to reconstruct the sequence of operator actions, giving us a unique view into operator behavior. We report on the results of 2,747 interactive sessions captured in the course of the experiment. During these sessions operators frequently attempted to interact with victims via remote desktop, to capture video, audio, and keystrokes, and to exfiltrate files and credentials. To our knowledge, we are the first large-scale systematic study of RAT use.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.48", "https://people.eecs.berkeley.edu/~pearce/papers/rats_oakland_2017.pdf", "https://people.eecs.berkeley.edu/~pearce/talks/rats_oakland_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a3a3c263e0a985a3cd127799ffe101bfd7da943", "sources": [ "DBLP" ], "title": "To Catch a Ratter: Monitoring the Behavior of Amateur DarkComet RAT Operators in the Wild", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "6a574933a11bcc55c29d7b7f1dcfff4d1f6db0ff": { "authors": [ { "ids": [ "2863294" ], "name": "Daniel Molka" }, { "ids": [ "11468115" ], "name": "Robert Sch\u00f6ne" }, { "ids": [ "3241805" ], "name": "Daniel Hackenberg" }, { "ids": [ "1781970" ], "name": "Wolfgang E. Nagel" } ], "doi": "10.1145/3030207.3030223", "doiUrl": "https://doi.org/10.1145/3030207.3030223", "entities": [ "Cache (computing)", "Central processing unit", "Computer data storage", "Freedom of information laws by country", "Hardware performance counter", "Internet bottleneck", "Memory hierarchy", "Software documentation", "X Window System" ], "id": "6a574933a11bcc55c29d7b7f1dcfff4d1f6db0ff", "inCitations": [], "journalName": "", "journalPages": "27-38", "journalVolume": "", "outCitations": [ "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "d0dac91628415ce1b2135f68c883dc08583e9188", "798737345064b2c19ca1a6537dbaa950d268ba14", "265dc2065146ca50f01cc26092e2b7fdd22338d9", "0ce969b8a274e5b2b569916515499c8151ae4746", "0d776c6b3d19e76a24c8c77bf33a5276294710b9", "bbbe1ce1a11cc28250fe0106bab44b915bb81a8e", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "67cf1189c859d66bac309f9438df434fb651f97a", "13729035e288f71654115470c751baf5df4898db", "2506ecf40a69932426ba76aacfa77a53928cb0ee", "1b2bdbae8451cbb56be2649aba3c818b6d0eb16a", "86685044b78aed871688f4c7e8d95b4b62538570", "17810349765c08963af130efe28b6a6b77b7ec51", "00ecfb48c72709de7ea719e658b2a37301136cda", "40d9b9a3b07c6fe1face9587dd7455f393c5d149", "bd5a528e4801bd247580d2a81f183f0ac3dce175", "f4a91972bf1a05b195bce06a24dc33960bff1151", "2bf4e760a778b9d10a78fb48a89013759bfbc037" ], "paperAbstract": "Modern processors incorporate several performance monitoring units, which can be used to count events that occur within different components of the processor. They provide access to information on hardware resource usage and can therefore be used to detect performance bottlenecks. Thus, many performance measurement tools are able to record them complementary to information about the application behavior. However, the exact meaning of the supported hardware events is often incomprehensible due to the system complexity and partially lacking or even inaccurate documentation. For most events it is also not documented whether a certain rate indicates a saturated resource usage. Therefore, it is usually difficult to draw conclusions on the performance impact from the observed event rates. In this paper, we evaluate whether hardware performance counters can be used to measure the capacity utilization within the memory hierarchy and estimate the impact of memory accesses on the achieved performance. The presented approach is based on a small selection of micro-benchmarks that constantly stress individual components in the memory subsystem, ranging from caches to main memory. These workloads are used to identify hardware performance counters that provide good estimates for the utilization of individual components in the memory hierarchy. However, since access latencies can be interleaved with computing instructions, a high utilization of the memory hierarchy does not necessarily result in low performance. We therefore also investigate which stall counters provide good estimates for the number of cycles that are actually spent waiting for the memory hierarchy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030223", "https://research.spec.org/icpe_proceedings/2017/proceedings/p27.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a574933a11bcc55c29d7b7f1dcfff4d1f6db0ff", "sources": [ "DBLP" ], "title": "Detecting Memory-Boundedness with Hardware Performance Counters", "venue": "ICPE", "year": 2017 }, "6a6f60059462b94f1177176526ad9eb6633e284a": { "authors": [ { "ids": [ "39718171" ], "name": "Rishabh Mehrotra" }, { "ids": [ "1954563" ], "name": "Imed Zitouni" }, { "ids": [ "1977489" ], "name": "Ahmed Hassan Awadallah" }, { "ids": [ "1877430" ], "name": "Ahmed El Kholy" }, { "ids": [ "2072010" ], "name": "Madian Khabsa" } ], "doi": "10.1145/3077136.3080833", "doiUrl": "https://doi.org/10.1145/3077136.3080833", "entities": [ "Clickstream", "Computer user satisfaction", "Cursor (databases)", "Experiment", "Holism", "Human\u2013computer interaction", "Information", "Pointing device", "Process modeling", "Search engine results page", "Utility", "Web search engine" ], "id": "6a6f60059462b94f1177176526ad9eb6633e284a", "inCitations": [ "1e2bc1f2603a34fe5f9199ba3fecf66792cd921e", "a3b0819915289f6b44dc34d46e4e6e43fce718e7" ], "journalName": "", "journalPages": "165-174", "journalVolume": "", "outCitations": [ "c08206b44dd1f0ea54bd073e4effaf2e4483169b", "07facb9dd8b7aebe39decdc680c62333aacd5d39", "6d74c216d8246c2a356b00426af715102af2a172", "3674b7610d922fee4a5ef4b9aabeb342ee0c38f8", "2b0750d16db1ecf66a3c753264f207c2cb480bde", "5ea4873fee47c25a50f33afac0ae4a41e6e5a1c7", "b7dc5aa1c615455321ed52cb78c0a09463839c5c", "c77be34db96695159244723fe9ffa4a88dc4a36d", "a0f96a954ccb478440523ef36e67f01de51e6918", "7e7343a5608fff1c68c5259db0c77b9193f1546d", "29d47e1015d90a3bcde7db84399e34651fb37af7", "1ab758492347723ae8ad20257715f3fd49e75c27", "4047d5efd1683bbd3280500c3244149089412024", "12ba2eb20f7d345e4b6ada54f811d3bb26608932", "15aca6d6c880784443528a1812f3bfb39378c1ab", "09a54c9c2624edfb54ea931d310453449b35af1d", "718924736d73b8ee83c4e764daaf4536aeff22d4", "07439e56ffb20dd6837d0febf8d97a84eeab2c37", "6de51cb7a3c00aee4047edb5aba39266828125e3", "3453d79657e8f3e1614e73608f92465ab7f5a291", "328c7b4ce5a0d81326ee2a3befa0f2dd630a48c1", "ab21e24201e6117ee6879a58624b655a52e9dd54", "4c4312a1643c9f96206a2210a2a6bbb27b23f097", "c204c31be826a6793a9b54b3f898fc53005db1b6", "6e3b9188ec3d3f7cff45ef90994ff9cce939f71c", "35996b3a5fb7b28d790160f8217d7ee52f9839ba", "d1255f19bda602e2e837c3b186cb076211beb401", "87113f966642bba168fd145339c5746e64603cc0", "4f604adabc435c04a8227c8a5c0eb1bb0f9acfd9", "6929bfe073c222657e424983d36d567e4c6113a0", "2dfceb046d08b99e4ac05136aa02b9ad26b5500b", "83dc2a2380c898d45c4485d2260f55c424f5a465", "8bf2f063f8c9310261d15d99b7dcc5490b9d8409", "1aae05759f085792596312eda89315145aea794b", "a5203a54528b4ff60c6d1078087e82829b39e365" ], "paperAbstract": "Detecting and understanding implicit measures of user satisfaction are essential for meaningful experimentation aimed at enhancing web search quality. While most existing studies on satisfaction prediction rely on users' click activity and query reformulation behavior, often such signals are not available for all search sessions and as a result, not useful in predicting satisfaction. On the other hand, user interaction data (such as mouse cursor movement) is far richer than just click data and can provide useful signals for predicting user satisfaction. In this work, we focus on considering holistic view of user interaction with the search engine result page (SERP) and construct detailed universal interaction sequences of their activity. We propose novel ways of leveraging the universal interaction sequences to automatically extract informative, interpretable subsequences. In addition to extracting frequent, discriminatory and interleaved subsequences, we propose a Hawkes process model to incorporate temporal aspects of user interaction. Through extensive experimentation we show that encoding the extracted subsequences as features enables us to achieve significant improvements in predicting user satisfaction. We additionally present an analysis of the correlation between various subsequences and user satisfaction. Finally, we demonstrate the usefulness of the proposed approach in covering abandonment cases. Our findings provide a valuable tool for fine-grained analysis of user interaction behavior for metric development.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080833", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/SIGIR17b.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a6f60059462b94f1177176526ad9eb6633e284a", "sources": [ "DBLP" ], "title": "User Interaction Sequences for Search Satisfaction Prediction", "venue": "SIGIR", "year": 2017 }, "6a728101f8d24da7d153f5a92e71e94f7a323dae": { "authors": [ { "ids": [ "1744187" ], "name": "Jun He" }, { "ids": [ "33383407" ], "name": "Sudarsun Kannan" }, { "ids": [ "1743175" ], "name": "Andrea C. Arpaci-Dusseau" }, { "ids": [ "1703415" ], "name": "Remzi H. Arpaci-Dusseau" } ], "doi": "10.1145/3064176.3064187", "doiUrl": "https://doi.org/10.1145/3064176.3064187", "entities": [ "Simulation", "Solid-state drive", "Solid-state electronics" ], "id": "6a728101f8d24da7d153f5a92e71e94f7a323dae", "inCitations": [ "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "0b393cab00401cb971cf71970e00c2767f881f75", "ec3924af8c1cb428b4f1309b9a9ca3c86abd6631", "4fa7ade25b7bd22ea3357da0516833a318cc72fc", "40f196e21a289394c4354961116587b8accba45e", "7b2be06575567a40314b377827f077ef1a7ec825" ], "journalName": "", "journalPages": "127-144", "journalVolume": "", "outCitations": [ "a04942ae2b468546e5577e9b8e4e25176fbdc146", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b" ], "paperAbstract": "We perform a detailed vertical analysis of application performance atop a range of modern file systems and SSD FTLs. We formalize the \"unwritten contract\" that clients of SSDs should follow to obtain high performance, and conduct our analysis to uncover application and file system designs that violate the contract. Our analysis, which utilizes a highly detailed SSD simulation underneath traces taken from real workloads and file systems, provides insight into how to better construct applications, file systems, and FTLs to realize robust and sustainable performance.", "pdfUrls": [ "http://pages.cs.wisc.edu/~jhe/eurosys17-he.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final25.pdf", "http://doi.acm.org/10.1145/3064176.3064187" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a728101f8d24da7d153f5a92e71e94f7a323dae", "sources": [ "DBLP" ], "title": "The Unwritten Contract of Solid State Drives", "venue": "EuroSys", "year": 2017 }, "6a7ab215f023bcad781b0d7098f54d22e5ee89a1": { "authors": [ { "ids": [ "2889938" ], "name": "Ya-Shuai L\u00fc" }, { "ids": [ "35103628" ], "name": "Libo Huang" }, { "ids": [ "40391089" ], "name": "Li Shen" }, { "ids": [ "1690770" ], "name": "Zhiying Wang" } ], "doi": "10.1145/3123939.3124532", "doiUrl": "https://doi.org/10.1145/3123939.3124532", "entities": [ "Algorithm", "Amiga Reflections", "Clock rate", "Computer graphics", "Control flow", "Display resolution", "Graphics processing unit", "Path tracing", "Rendering (computer graphics)", "SIMD", "Single instruction, multiple threads", "Visual effects", "Z-buffering" ], "id": "6a7ab215f023bcad781b0d7098f54d22e5ee89a1", "inCitations": [], "journalName": "", "journalPages": "560-573", "journalVolume": "", "outCitations": [ "4308295a2eaef30be423520918ad224dc2f3ffe2", "26ef909381d93060f626231fe7560a5636a947cd", "0f48fd50a9f0cb30d1f0495010a45a0e732e8a12", "90cf1ea079253c4451d4c74a37f0575e8501cdf4", "bb0dc7f89a8e64aa537e2e2d26e8c44e30bead86", "14d98ecba21e404f80daf024a03effe259cf9b88", "08d041581636f8eee888091b5539696d729f2bff", "31b6005f360f989a68398a7b6bc5cd1f6c692fcc", "13a1b9ce5946ba75af176f8bede4d68fd72b6c7e", "6f61bb35530ac58d8d5f5f5598b09cccba013558", "8bd6f67ef03b3c138c52f3e9b1716aebe937d244", "05177b6ee393267db5332729d7f97ddd090e9c35", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "4e93fc3a397144055d707e87c1c82289d1ef77f3", "1d0e6ae33237650f71583c2a5e2cc27c6523fb22", "3ad6c5012cba78f29cb8d0ed556c32b93966076e", "9a334c377686d8abe7711abcef58775ee02c0487", "0c75806bfe62a119e1aa580327c2f8db01b898aa", "9993c073c3ab75ef893d009e3109d23727d4c1ae", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "2d6f002477015469075954c6748a1a85af352c94", "6318714027d58d7b402314c3c42b466d1307c33e", "6c4e1d757e6bb655950dab25894ecf5e25239b85", "1b50cd4fb7c894a818dc87952ce5fc04d273d939", "6f23e1be35046b8202d356043c49304c0974396d", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06", "1c05733fa16907f235e6400625c2ae88f1942b42", "09ba565ec5dd3816968edaaee8351cf653e26d81", "7e816b82fb92df08d7bc0d9805f8988754e0d8c1", "5fe56eabf8e341a71b87498daccee5039fbdee97", "6f4717d1078a961504e957204db5a9730cc405b4", "0d8570b96e947ff532c5db0587d22a63526beef2" ], "paperAbstract": "Computer graphics is generally divided into two branches: real-time rendering and physically-based rendering. Conventional graphics processing units (GPUs) were designed to accelerate the former which is based on the standard Z-buffer algorithm. However, many applications in entertainment, science, and industry require high quality visual effects such as soft-shadows, reflections, and diffuse lighting interactions which are difficult to achieve with the Z-buffer algorithm, but are straightforward to implement using physically-based rendering methods. Physically-based rendering can already be implemented on present programmable GPUs. However, for physically-based rendering on GPUs, a large portion of the processing power is wasted due to low utilization of SIMD units. This is because the core algorithm of physically-based rendering, ray tracing, suffers from Single Instruction, Multiple Thread (SIMT) control flow divergences. In this paper, we propose the Dynamic Ray Shuffling (DRS) architecture for GPUs to address this problem. Our key insight is that the primary control flow divergences are caused by inconsistent ray traversal states of a warp, and can be eliminated by dynamically shuffling rays. Experimental results show that, for an estimated 0.11% area cost, DRS significantly improves the SIMD efficiency for the tested benchmarks from 41.06% to 81.04% on average. With this, the performance of a physically-based rendering method such as path tracing can be improved by 1.67X--1.92X, and 1.79X on average.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124532" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a7ab215f023bcad781b0d7098f54d22e5ee89a1", "sources": [ "DBLP" ], "title": "Unleashing the power of GPU for physically-based rendering via dynamic ray shuffling", "venue": "MICRO", "year": 2017 }, "6a9e5d60de6710db7875efd325c439e4bc081e01": { "authors": [ { "ids": [ "3446210" ], "name": "Kun Suo" }, { "ids": [ "3521193" ], "name": "Yong Zhao" }, { "ids": [ "1786877" ], "name": "Jia Rao" }, { "ids": [ "40227618" ], "name": "Luwei Cheng" }, { "ids": [ "1718639" ], "name": "Xiaobo Zhou" }, { "ids": [ "1738937" ], "name": "Francis C. M. Lau" } ], "doi": "10.1145/3127479.3127484", "doiUrl": "https://doi.org/10.1145/3127479.3127484", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Data center", "I/O bound", "Linux", "Multiplexing", "Multitenancy", "Operating system", "Priority inversion", "Scheduling (computing)", "Throughput" ], "id": "6a9e5d60de6710db7875efd325c439e4bc081e01", "inCitations": [], "journalName": "", "journalPages": "269-281", "journalVolume": "", "outCitations": [ "43a8e1e54fceff27ebbf4b0a1f52ef130d0fcbf0", "e89b5dd4cfecc8208ccbaf8bfba9c261040a9a54", "34b4ef25ff000b4ad7dcb2ed111b7c2a2bf4990f", "6c34255d4a24356f1856540650123a9dc1cb1c1d", "2129d9f040643983de7dffd1735849076bb2c1f1", "9907cff78b60cb0c63b0978b3408e03116614d59", "6d44790b6d952eff28f302998e8121f90786e3ff", "163247e7ed8db43c9529d85c384d8843e22a136b", "76b73a657ef1cb543790acc99fc8abc80dbe4fc7", "303fab4117468e84d10f426ab3b3e6c92da1159e", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "0e7af537d6eacd9832c90f31a46ced1ac91b573e", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "93e6deb8ac31807df341728a469984add00aed27", "765888be0a163ad512885ce8d5eff385e7683894", "0edd896bc82b7fb65ef63cb1e3512db795c7f7d4", "47dc52eeb7bf6efb46c550201cc8d52af71cc1a3", "5fe4eb1749a823469950456a123c77530e33ad73", "0562bc5f82b40e2e9c0ae035aa2dd1da6107017c", "9d0cf10496766ddcaf7f4676b74e0df75b19c30c", "5e1a1b6f70ceadc4a7ca3378e9fced99db711dee" ], "paperAbstract": "While virtualization helps to enable multi-tenancy in data centers, it introduces new challenges to the resource management in traditional OSes. We find that one important design in an OS, prioritizing interactive and I/O-bound workloads, can become ineffective in a virtualized OS. Resource multiplexing between multiple tenants breaks the assumption of continuous CPU availability in physical systems and causes two types of priority inversions in virtualized OSes. In this paper, we present xBalloon, a lightweight approach to preserving I/O prioritization. It uses a balloon process in the virtualized OS to avoid priority inversion in both short-term and long-term scheduling. Experiments in a local Xen environment and Amazon EC2 show that xBalloon improves I/O performance in a recent Linux kernel by as much as 136% on network throughput, 95% on disk throughput, and 125x on network tail latency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127484" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6a9e5d60de6710db7875efd325c439e4bc081e01", "sources": [ "DBLP" ], "title": "Preserving I/O prioritization in virtualized OSes", "venue": "SoCC", "year": 2017 }, "6aaa54c7f9e2228a8a0c8583ba7e281211e63891": { "authors": [ { "ids": [ "40274299" ], "name": "Manolis Papadakis" }, { "ids": [ "2541837" ], "name": "Gilbert Louis Bernstein" }, { "ids": [ "37529085" ], "name": "Rahul Sharma" }, { "ids": [ "4689402" ], "name": "Alexander Aiken" }, { "ids": [ "1689128" ], "name": "Pat Hanrahan" } ], "doi": "10.1145/3133902", "doiUrl": "https://doi.org/10.1145/3133902", "entities": [ "Compiler", "Computer graphics", "Correctness (computer science)", "Data integrity", "Data model", "Data structure", "Graph (abstract data type)", "High- and low-level", "Memory safety", "Modulo operation", "Referential integrity", "Relational model", "Run time (program lifecycle phase)", "Satisfiability modulo theories", "Seam carving", "Simulation", "Simultaneous multithreading", "Verification and validation" ], "id": "6aaa54c7f9e2228a8a0c8583ba7e281211e63891", "inCitations": [], "journalName": "PACMPL", "journalPages": "78:1-78:29", "journalVolume": "1", "outCitations": [ "0a3cf2777169ef0fb81205fe255eb7260bcd2c52", "2b8de17d3a163489ef7d0814c9033a853b0725cf", "006ca7d3571497c73062b67ab1ab20a4b09b0972", "13ecf9e9ef6fad925d31c9055e073a2323b51f38", "daacf0a675e8fa8a55e42cd5aea529d6b5203f39", "2549e09c1bc2a95476bc2f3e8daa1bd715fdce56", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "4662e89d0e3d3265e894a6e2d0c79c5570475216", "030a78598b86ac6a1536af140d9bd978558ae64f", "bfd9f180da6d3ce68f97f28abe7b685e961ec34e", "09bb95408a2cbccf670cb752f107000537c874d2", "2194c3460ab71f3826db00b045b2ae590c753319", "02febb2078bdc8bfe0913c45d3339ea3f62dfc4d", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "148758f14801b44d78874b662a05561023911807", "3d09b1c2ba9f3c985cbbda778dd53f6f8dd888c5", "5bab310e6c2ffbefca39408ab74c5a04775a72f1", "1d442755ac58e74558e97372a77e39dab0b535a0", "24a963758371e511e3749c865b14f697358f025c", "d8b4164fef65ffc7082a3c95b0a706e5c3aa38f9", "210e3d0418b1cc4f6ecf8fcfcf0f754cb65c1305", "943f74949f0aef37bb76c4f9d06e32144fcdd2a3", "7880786c500bb7b6e5ef7e4c6d135550d5ce3df6", "62f7869436b0719fa676717d6e945d48416a8bb7", "046a9e129fba46d78301ead661949f5290c79989", "0772d5a87825c858c4fd24cb43f0b61efda9db3d", "87b8ce02e57879951b3389d0d655ca242d2e291c", "866a6532f5055dea107aedf1d8b266ac2b9955b1", "42f79406592fd2b98e528614cc8d92c1d9784d65", "170746e36dfe606ca448ac4ca518b91bf6f828d0", "dd1a43b41415dd5a915f63f58d4c9963bbb02cf5", "274730459e051d8967fd682fb2fe0a4917d175c8", "11b7ec4906d4a7d6e5e056f3fc01a42558978fa3", "0b351e934210edc715e099c8258a989bad86ca5d", "0b003a2b6cea934d46546616ad156611ba2a9ac6", "23f03fa6a94c5f7b3dddda96ecf923c5f2e439a3", "1a8ed1cdbe8930598c35fddd9eab688e1d2df886", "3fe2c231e339357384e97ab477f1c278356d5e3b", "7026e8607b80c7e21b8901a4cbaa5a99da1d6656", "2de47fc883ae50e9850ab3ec797f8c6cabd294d1", "8ac1ff0d2ca61d43af9354d9deaaffcc6bfdb751", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "50eba68089cf51323d95631c2f59ff916848863f", "a10daf6174ddf8ad8ee59d731db521931a13a162", "582c6f03e766eefb6bd6ef67d314ed31c6e0823d" ], "paperAbstract": "Algorithms that create and mutate graph data structures are challenging to implement correctly. However, verifying even basic properties of low-level implementations, such as referential integrity and memory safety, remains non-trivial. Furthermore, any extension to such a data structure multiplies the complexity of its implementation, while compounding the challenges in reasoning about correctness. We take a language design approach to this problem. We propose Seam, a language for expressing local edits to graph-like data structures, based on a relational data model, and such that data integrity can be verified automatically. We present a verification method that leverages an SMT solver, and prove it sound and precise (complete modulo termination of the SMT solver). We evaluate the verification capabilities of Seam empirically, and demonstrate its applicability to a variety of examples, most notably a new class of verification tasks derived from geometric remeshing operations used in scientific simulation and computer graphics. We describe our prototype implementation of a Seam compiler that generates low-level code, which can then be integrated into larger applications. We evaluate our compiler on a sample application, and demonstrate competitive execution time, compared to hand-written implementations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133902", "http://theory.stanford.edu/~aiken/publications/papers/oopsla17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6aaa54c7f9e2228a8a0c8583ba7e281211e63891", "sources": [ "DBLP" ], "title": "Seam: provably safe local edits on graphs", "venue": "PACMPL", "year": 2017 }, "6ab2c1fa831052a09eab22e799f0a065d13580dd": { "authors": [ { "ids": [ "8503165" ], "name": "Kaisheng Ma" }, { "ids": [ "1739207" ], "name": "Xueqing Li" }, { "ids": [ "35423932" ], "name": "Jinyang Li" }, { "ids": [ "2442306" ], "name": "Yongpan Liu" }, { "ids": [ "27905006" ], "name": "Yuan Xie" }, { "ids": [ "39862527" ], "name": "Jack Sampson" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "1733274" ], "name": "Narayanan Vijaykrishnan" } ], "doi": "10.1145/3123939.3124533", "doiUrl": "https://doi.org/10.1145/3123939.3124533", "entities": [ "Approximation algorithm", "Backup", "Backup and Restore", "Computation", "Non-volatile memory", "Synergy" ], "id": "6ab2c1fa831052a09eab22e799f0a065d13580dd", "inCitations": [ "2fd6522eca6f38239d5b41d01e2d02d036aef850", "92bef85286549db653db4c4d55864195bc02a7b2", "41ea95cc4dca373bf324555b897760054ec4a76e" ], "journalName": "", "journalPages": "204-218", "journalVolume": "", "outCitations": [ "938286fa80fe31fa3e35f450989f27659296f25f", "2a55257ee1723616c8ca89752c07f2c15e37f246", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "80606b3db39d116d792ba8d2f5f51a5a6392f989", "ab8880e4438ff5e317974ecf5fd1d94fdd7c0ee5", "4b0f7bde293bc2e0c9c35fc191e5106d96cb559c", "fbce9b077f4d6075ef4db500595825c0a36a7538", "403a364ffed800c5abb267c40ef177578e1c6b23", "4ace5fbb66763f82d52d4f5e70c001100847e9b1", "1b61b9a3152da7e29eb6abcccc9dbf2973fe662c", "bc31f9ff642ce0463bd7fd641cd198ebd8e3f20b", "4037fa9ff27be77096ceb2ca14119ed9e076c669", "682b7c3e34922d3cde0359a013195797b43b9309", "9aacb7078279dbea907db697cf5b90758816ba54", "8e2f79d3002ddcd7266f8c6b2ab147dccd0eeb41", "0237b5a56032de7f71182d5565ba6ff63a22292d", "08c44e6f210b2cdaed97ecadaaabc42a23472c33", "18afe37fb52dc00d2b4067d84d94c785da8bda11", "505cd06765bb848e66d23b21ead3428f2a274ea9", "1600c3ed12301b06a1107a68c2de84fb3582a918", "22b7ceabd007203cf79e399136bdbcbc32d82d77", "15c3c1222f17ffd475c2a2c529fad87e14f70fcf", "2f6a6b62fa8a6d9d27a42ca5c1ebea49f333e266", "12404af084b38ee3f35ff3e43a583229039194dc", "497be8bc880ca84d302bd24f34c01e8f9e5951ee", "204dc0986b512a95a66632556d10c3c162caf7b7", "17bd3bde933863354583a6c50cbb85dd73d54a50", "29245de3627432ed56dc33556d1cb55b5366b489", "6f7303c27681b38167b9471cd77b16cce31aa0a6", "f9cf47539216a3737f6353dca8a8f3f1e588413e", "edbef040ef6df0d0db7132baf2d1b3d89703f828", "1212e1615891b005df762b669f396b1dce8a78ee", "5260e26d9dc0cb4e18a6434b615c73d22eb2d686", "2c4d4bb7619342eef2a87108562c4ef2fdac586b", "4b94ab4681e75506dda997d71dfd7996970d1a47", "631dbd310d1ae72bc47c54f06e7d1a2b2f01be60", "51ce1af8df64a73caae825d4843aad138aec6cd2", "997c03cced5b07356082a8b121a84d1389c4fd44", "383c4f29050b61087e08abe129369b58b552aea7", "514e626778a94c6781887b3109d646e852d50813", "1f2d6096f7df7ab815765a248d4078666557abcc", "2922edf0152641eed6cfa4f42b9f1ab9b9c71c97", "a21c972077f85d23f769c6ac4e4afa283d38de49", "23ac350f9f48645063e457626de4a36f8f610b59", "0090ee65a8bb632b95026877fb3f497e20b9f12b", "0cbe6121b40be2af869359c3e7b1de8d9a09a787", "59262d95118c90b19ee4fa7a91702d1f14fe8276", "09d97251a2932b6a3c1c2009f820d55f281433b9", "84b4a8b2f2fe58c57223dcb1e839d3106c820380", "10dc03bab236aa58778b74520978ec280ecdf731", "a2f3bb40653499eeb33babacf69579b5ea9d20e1", "ab5eb825e332846c9546adb91654ffd83f08f66b", "0110c80228683bc32879efb1b2f3931421e52eb6", "7ccd86abcdf2abcb86252b29735b24392a818c1f", "17c8851c47328dad603993f59e25bf67f8e64542", "3b2174b73957738d485175c12c3095ac802cdffa", "1de7857a95e851be4a153fc008be168c0c765782", "52137476895005f26098678a9af934f93071b416", "6519bf5580fcdcc9c50fd72c6c8dc5d040d443e8", "8012327465664ca6a64ee4d202536ec6c6d024f1", "6e40d81d8dbf93780c241e79639a860753dade11", "9c006ab6731fb7351d25bcbfe6f3f9a66b553328", "11b6149a68c0a7e5e648c408ce7d965e82a10c39", "469a5bd76639452c652d51761937e97332dc0615", "511549311d69f7c4353cabfeb225bb3f154f8f9c", "1855ffd13ab6b36e59cb9a9558461ffeff1704f7", "52e99334096f02c9cf386c9391fc68181c058f4c", "40138cbd57a4632d6267cff4c91b55e7376a6693", "5a830ad18ff1a45c197570065b65d212818eaef6", "0aaf629dcab6bd6f9947be2390fbd27bf86d5eac", "ec4e20322c940eca3f15b25708052e7fbc52c9d7", "3edaf0de6bbaf152a00cadcb1cb62a52b17fdee5" ], "paperAbstract": "Batteryless IoT devices powered through energy harvesting face a fundamental imbalance between the potential volume of collected data and the amount of energy available for processing that data locally. However, many such devices perform similar operations across each new input record, which provides opportunities for mining the potential information in buffered historical data, at potentially lower effort, while processing new data rather than abandoning old inputs due to limited computational energy. We call this approach incidental computing, and highlight synergies between this approach and approximation techniques when deployed on a non-volatile processor platform (NVP). In addition to incidental computations, the backup and restore operations in an incidental NVP provide approximation opportunities and optimizations that are unique to NVPs.\n We propose a variety of incidental approximation approaches suited to NVPs, with a focus on approximate backup and restore, and approximate recomputation in the face of power interruptions. We perform RTL level evaluation for many frequently used workloads. We show that these incidental techniques provide an average of 4.2X more forward progress than precise NVP execution.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124533", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final4.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6ab2c1fa831052a09eab22e799f0a065d13580dd", "sources": [ "DBLP" ], "title": "Incidental computing on IoT nonvolatile processors", "venue": "MICRO", "year": 2017 }, "6acd2cce7d04bba4af841a5d524a69cc423e671e": { "authors": [ { "ids": [ "2792832" ], "name": "Tao B. Schardl" }, { "ids": [ "16521420" ], "name": "William S. Moses" }, { "ids": [ "1712661" ], "name": "Charles E. Leiserson" } ], "doi": "10.1145/3018743.3018758", "doiUrl": "https://doi.org/10.1145/3018743.3018758", "entities": [ "Cilk Plus", "Common subexpression elimination", "Compiler", "Concurrency (computer science)", "Control flow", "Control flow graph", "Intermediate representation", "LLVM", "Loop scheduling", "Loop-invariant code motion", "OpenMP", "Optimizing compiler", "Parallel computing", "Recursion", "Scheduling (computing)", "Spawning networks", "Tail call" ], "id": "6acd2cce7d04bba4af841a5d524a69cc423e671e", "inCitations": [ "e191fb663b1dd33d59a8f2c9048a408ae145fca3", "2d4fdf953dc3f79fd760b317bca228fe80ec9386", "5be83e638fb0bb16d1e3bb51c2f5a5e814d813dc" ], "journalName": "", "journalPages": "249-265", "journalVolume": "", "outCitations": [ "eaf130a181b812e35154f70a5bc05acf1d70b200", "728209cc265fd27f3a4f28265ff051279c747456", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "72ae6c9b296f58cfcc9185ef3fdfd54a81fe03cc", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "1156f60e40548096df49528b1342bb3e88b0f378", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "39867c7d684e77377f03d4f83e5dadf4acd8d61b", "13f7df91eb208a387d18fbad192c6f0f834f0b82", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "03f5501e776ca19515df15e11f216265f3afc43d", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "358f1efb36a605462288b5b9707e68621887358d", "27a956c44b9beda32618fde7efce542565e06381", "934b34df03bff0c934d2808e52403ec8cf97e094", "b483cabdce5562753abae51c144d26f3eaac75fb", "56e70cd03dfbc9b436b2ffa688ab69ea2fa82fd1", "61f6ae61c4e0b0b4333e9cad1feead72ab73b9d4", "e7351e01e0d41d666dba23d7a2da0a1f7dcae4af", "1f33e83905ee40dfeeacd6c04f64c1af71c2b7fb", "7b83b2cc9b4d5c0ce578b96c2508b0aa926fefda", "7330ddee7271f498926e5598271cb483dfc4caf1", "04b3880ea2ddca5f1b8cba48edee655a4dedbc48", "588f239fd53676109cd7996ce5f0cee4639dc89f", "1ee116d4253bc30c0988c995a3be2c594337f856", "1742aa2092223ca9f78f379ce9f541bac1001aaf", "6005fdb7813e0f07d90d6ed8e7beecd733ef4d04", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "855ebfd962bc26b7b646e3a01983338adf70ce4c", "5752172e2c9d97a544798a97fa17b2a627d7116d", "0d9c39200e541ce7c5a2f3cfa54302c2c9bc631a", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "7673efb3f26da627247838b427ae3980d97689be", "3ccc1e2f8be4c99a38abd10560c606d9256d176b", "47fea97038923902a502403219fc44fd22b5d19f", "a68e2ede5e01997bc7fe0cf597a06650acdac0f5", "950790da1aaa1389327cb35fb65af8ffead25515", "8368d2fc947cf6ac46a1d251d1895f2f87c7d498", "0bd6329e696abf03e763f0b77d253b352c87d9cb", "26999d2b6bd57cf699f9b8b89dd93f3dae982f70", "3dff11679346f5344af1018cad57fa14cc349f2f", "1c9885c43f26f791a26d44c929ae3ab0c6a79f81", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "2194c3460ab71f3826db00b045b2ae590c753319", "3667d48b43140092f552fef31affa1ddba9636de", "145f95174e769bc0195fbf632ffce174f8a1ac1a", "0836859831c6c69412ae633bcf47e96355a92d6b", "36d2c1290591a3e194df392a54149d5c0548f7d6", "2e375fbc7bc0ac5c169056a13839723ae8101055", "b8719183f3579e6f0bdf2d98ee500097a28cb9cf", "077e8c0df834570e7baa110599825059ce5f34f8", "0a89fafea6184b469511ba73735d451da92c18fa", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "101f10b90ce859135868668478fbde5882c87458", "3486aeaf540c48952120fe853d672af984f40a6a" ], "paperAbstract": "This paper explores how fork-join parallelism, as supported by concurrency platforms such as Cilk and OpenMP, can be embedded into a compiler's intermediate representation (IR). Mainstream compilers typically treat parallel linguistic constructs as syntactic sugar for function calls into a parallel runtime. These calls prevent the compiler from performing optimizations across parallel control constructs. Remedying this situation is generally thought to require an extensive reworking of compiler analyses and code transformations to handle parallel semantics.\n Tapir is a compiler IR that represents logically parallel tasks asymmetrically in the program's control flow graph. Tapir allows the compiler to optimize across parallel control constructs with only minor changes to its existing analyses and code transformations. To prototype Tapir in the LLVM compiler, for example, we added or modified about 6000 lines of LLVM's 4-million-line codebase. Tapir enables LLVM's existing compiler optimizations for serial code -- including loop-invariant-code motion, common-subexpression elimination, and tail-recursion elimination -- to work with parallel control constructs such as spawning and parallel loops. Tapir also supports parallel optimizations such as loop scheduling.", "pdfUrls": [ "http://wsmoses.com/tapir.pdf", "http://dl.acm.org/citation.cfm?id=3018758" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6acd2cce7d04bba4af841a5d524a69cc423e671e", "sources": [ "DBLP" ], "title": "Tapir: Embedding Fork-Join Parallelism into LLVM's Intermediate Representation", "venue": "PPOPP", "year": 2017 }, "6aeb1bc8306f0c079d24cded5c4eff7de5da46e8": { "authors": [ { "ids": [ "2341074" ], "name": "Mike O'Connor" }, { "ids": [ "2866959" ], "name": "Niladrish Chatterjee" }, { "ids": [ "15895903" ], "name": "Donghyuk Lee" }, { "ids": [ "33301771" ], "name": "John M. Wilson" }, { "ids": [ "40248185" ], "name": "Aditya Agrawal" }, { "ids": [ "1715863" ], "name": "Stephen W. Keckler" }, { "ids": [ "1696619" ], "name": "William J. Dally" } ], "doi": "10.1145/3123939.3124545", "doiUrl": "https://doi.org/10.1145/3123939.3124545", "entities": [ "Baseline (configuration management)", "Central processing unit", "Concurrency (computer science)", "Dynamic random-access memory", "Graphics processing unit", "High Bandwidth Memory", "ISDB", "Locality of reference", "Memory controller", "Terabyte", "Throughput", "Wiring" ], "id": "6aeb1bc8306f0c079d24cded5c4eff7de5da46e8", "inCitations": [ "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "7910d6665da5f1aefc1d151e526ce9fe65a82ce0" ], "journalName": "", "journalPages": "41-54", "journalVolume": "", "outCitations": [ "80bedecd71a7e965b9c2f667b8f42c2705d4c714", "37782ba980effbbb63d8518625d0f795be866822", "540a65f5e2176c4000551f1335a24e0f07500f68", "363f34245c38c45eafa9c1e50e790ed33c69f224", "074a308aa9682041c06cb572698cc32cb73dba46", "8b04ea524cb6ced72868c120a00c4679d84be006", "464af3debb8434807ab04eb749d63594e78ee786", "5bc38d62a09e26105973662c420628810b597750", "1f3611aa60accc2ebd229162b8919b2a7ccbae33", "37e49c57dd4d0849380d177222db53e52ff21347", "942394566ccb9dbf40243dc2bd3c4d7605bbefa2", "570445ec044aa24f8894c75130c57a5ed174e200", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "009e4da527a3518c29c95970efb79733a67979fb", "8f1b17f8f327f6f3856f906a81e9ca19b7f9c5f7", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "bfe6157690a837af71c62abc94811ef7faf45fd4", "0b99d677883883584d9a328f6f2d54738363997a", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "3ad6c5012cba78f29cb8d0ed556c32b93966076e", "c8b6e390eb9cf0a3452decfff8461359315416cd", "1dec8f5106d11047aaaf126121110cbf890f17c3", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "232790b7531d6ab598ebae0aff2f5e0017bcbf28" ], "paperAbstract": "Future GPUs and other high-performance throughput processors will require multiple TB/s of bandwidth to DRAM. Satisfying this bandwidth demand within an acceptable energy budget is a challenge in these extreme bandwidth memory systems. We propose a new high-bandwidth DRAM architecture, Fine-Grained DRAM (FGDRAM), which improves bandwidth by 4× and improves the energy efficiency of DRAM by 2× relative to the highest-bandwidth, most energy-efficient contemporary DRAM, High Bandwidth Memory (HBM2). These benefits are in large measure achieved by partitioning the DRAM die into many independent units, called grains, each of which has a local, adjacent I/O. This approach unlocks the bandwidth of all the banks in the DRAM to be used simultaneously, eliminating shared buses interconnecting various banks. Furthermore, the on-DRAM data movement energy is significantly reduced due to the much shorter wiring distance between the cell array and the local I/O. This FGDRAM architecture readily lends itself to leveraging existing techniques to reducing the effective DRAM row size in an area efficient manner, reducing wasteful row activate energy in applications with low locality. In addition, when FGDRAM is paired with a memory controller optimized to exploit the additional concurrency provided by the independent grains, it improves GPU system performance by 19% over an iso-bandwidth and iso-capacity future HBM baseline. Thus, this energy-efficient, high-bandwidth FGDRAM architecture addresses the needs of future extreme-bandwidth memory systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124545", "http://www.cs.utexas.edu/~skeckler/pubs/MICRO_2017_Fine_Grained_DRAM.pdf", "http://www.cs.utah.edu/~nil/pubs/micro17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6aeb1bc8306f0c079d24cded5c4eff7de5da46e8", "sources": [ "DBLP" ], "title": "Fine-grained DRAM: energy-efficient DRAM for extreme bandwidth systems", "venue": "MICRO", "year": 2017 }, "6af9b38c455abd8f694046ccd549ecf6674a0145": { "authors": [ { "ids": [ "3122513" ], "name": "Changyeon Jo" }, { "ids": [ "3305511" ], "name": "Youngsu Cho" }, { "ids": [ "34460642" ], "name": "Bernhard Egger" } ], "doi": "10.1145/3127479.3129262", "doiUrl": "https://doi.org/10.1145/3127479.3129262", "entities": [ "Algorithm", "Data center", "Downtime", "Live Aid", "Machine learning", "Performance per watt", "Service-level agreement", "Virtual machine" ], "id": "6af9b38c455abd8f694046ccd549ecf6674a0145", "inCitations": [], "journalName": "", "journalPages": "351-364", "journalVolume": "", "outCitations": [ "09a8a5cd0e6caa3ffa39afae01bea3575aa0bbf5", "4f88aa229971889e65a7b2ccda47ed7816e5c376", "f93a33630f09e03d7b6fe8963b107e0d1342edef", "f17bf8b1735fcaab55d174d60a01602af113270a", "b53ca4243a5d6a5dfe6fed8ab68d94a43de96716", "3000e77ed7282d9fb27216f3e862a3769119d89e", "9809ad10d68d1c222e26386f26d4090fdd1163fe", "2e72178091b2ca445f46200dcba71a53417b69eb", "cc6a68cfa395d62d4a1d76de062e87012a03e072", "6a1f99f44290d67cda02976e24358dddce24a739", "49864efddfc0cb3cd805d06434befb5642d4ad46", "4f739534a366799e170599d3ff3d65597f0118db", "94a62be8355bf5be1edcc881a26559e5258e0f1d", "316486bada6023816c785c0d4eb401658737be3f", "336c1e3936ce150907b50f624b060bdb43d5e11b", "4205079799d4628a6d1ac6601591dd1c9f7b2ad2", "21843a9de32675bc961bd0929ce6ab50215e1888", "3e1a99137a1cf74d8bffaf42520886499158263a", "9c1f92a0d0d0ed692a98739d2156fbfb14710979", "b06c7df9404cf6d87b5d552808450b8c226deab9", "13c1c167dde348be47b0f681bf7df3c469ad6ef9", "1ecd36058e48734213c81728f42ff798a2c52833", "54a534ca4fd52fa9b1c837568ea6a474048cc34d", "ba34e70a31fbca81683e068c712b39ba48539940", "545a2bcb074bdfc1e457ab75af3e3cb19e283c5c", "c6a2ee3a694b7c2bd53388a10d526486fb4988e8", "2960c89331eb7afa86584792e2e11dbf6a125820", "57446e4f6a2d0af8003b1dcf1c9ba440e63c039f", "00a9ba0063d34ec56792849a67ef57b4601becbb", "acec0a12a1946279a35b79828cab4f4cb13761fd", "1c02c55e0d8a9820944fdef364e98542945b445c", "095bb341c31f5fb4df05213ce9557ac89cda466d", "28363c658b7c4ebb0ecfd1bdd137cb7869e90e0c", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "1b9383d6bc46ffc132fabe94c74b77962a66116a", "791d4f039b74abacc6a20263fedd00ec72a5d01e", "2a34289c09e7f60daf2838dcd55630080f95614d", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "427a74ca404d76e161dba7e1aa00b2fa78323b1b" ], "paperAbstract": "Live migration is one of the key technologies to improve data center utilization, power efficiency, and maintenance. Various live migration algorithms have been proposed; each exhibiting distinct characteristics in terms of completion time, amount of data transferred, virtual machine (VM) downtime, and VM performance degradation. To make matters worse, not only the migration algorithm but also the applications running inside the migrated VM affect the different performance metrics. With service-level agreements and operational constraints in place, choosing the optimal live migration technique has so far been an open question. In this work, we propose an adaptive machine learning-based model that is able to predict with high accuracy the key characteristics of live migration in dependence of the migration algorithm and the workload running inside the VM. We discuss the important input parameters for accurately modeling the target metrics, and describe how to profile them with little overhead. Compared to existing work, we are not only able to model all commonly used migration algorithms but also predict important metrics that have not been considered so far such as the performance degradation of the VM. In a comparison with the state-of-the-art, we show that the proposed model outperforms existing work by a factor 2 to 5.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129262" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6af9b38c455abd8f694046ccd549ecf6674a0145", "sources": [ "DBLP" ], "title": "A machine learning approach to live migration modeling", "venue": "SoCC", "year": 2017 }, "6afa3c6d8cb3db80947ab5563c7604086403ddb9": { "authors": [ { "ids": [ "1703054" ], "name": "Ye Chen" }, { "ids": [ "1690270" ], "name": "Ke Zhou" }, { "ids": [ "1783406" ], "name": "Yiqun Liu" }, { "ids": [ "38898636" ], "name": "Min Zhang" }, { "ids": [ "8093158" ], "name": "Shaoping Ma" } ], "doi": "10.1145/3077136.3080804", "doiUrl": "https://doi.org/10.1145/3077136.3080804", "entities": [ "Algorithm", "Heuristic evaluation", "Hyperlink", "Information retrieval", "Online and offline", "Relevance", "Search algorithm", "Software metric", "Trust metric", "Web search engine" ], "id": "6afa3c6d8cb3db80947ab5563c7604086403ddb9", "inCitations": [], "journalName": "", "journalPages": "15-24", "journalVolume": "", "outCitations": [ "2e9d862db3e67cb11c2d78c9cd4bcd666d0fb9eb", "5ea4873fee47c25a50f33afac0ae4a41e6e5a1c7", "c77be34db96695159244723fe9ffa4a88dc4a36d", "c77044bbf672707ee8031147a1e9bd03f0250860", "bd11602566b54dc3183af4cfede0dca914a6627b", "019a82e45aeff1c83a227da913eefb47cee371a1", "575a33aa00b7be4c63d5e6b455a6726e84d3f4d4", "de56d11c568cde9fc45b85f1c58515696d22f6b1", "7b845d042a614c15b0412258103009f8c9d042a9", "7ef9f22727abc2c78d5d364995fa4ac75c666f9b", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "a0f96a954ccb478440523ef36e67f01de51e6918", "0c905be96ad92683b3a459133393de5005adf4bc", "2167054da02b0bb2dddfccfa2c60866858478da2", "b0f78fab8f70deaa0b64466b3fe77e2863d02908", "f33cb88a87d2077953f682a6d4fcd57d7149562c", "1686596d24edcc2dd723920ea800a1c0b76c3d9e", "040678daf6a49a88345ee0c680fccfd134f24d4b", "6e3b9188ec3d3f7cff45ef90994ff9cce939f71c", "0883ff294e62403eb5c9bd8d81268ca2a0ea518c", "0e9f79887d29e5fe8455d7cd815f3c44a78ade5e", "328c7b4ce5a0d81326ee2a3befa0f2dd630a48c1", "1b9458181d29fdb3745e3b50f41fe4ef6c2b4492", "1b50c8353d9e4a241a5f4a9c0088eb9c5f593e1f", "0e10dc74083d65ad8f395f9673f5bc33820b9f11", "3453d79657e8f3e1614e73608f92465ab7f5a291", "c41950d7d0ce14e41691c0fcff67dd7aaaada571", "618ee0a69b0955509fb78f2a2263c33386a804cf", "993d394fd5c63f9efa310fba938fba677141801c", "1aae05759f085792596312eda89315145aea794b", "5cb0cd0486518c5474e25b800ee210016f98b1e9", "14895145cc7fe8e4bf133bbe8ce2d3c4fd33fef0", "bd73ec7bac51a332e5833bb1a02bcd3bf7a79474", "94121093a8a8e411513ed6b45335a47f06f8d4bc", "053e5d866889384911ca8778a2ad484bebafd0ca", "abc853bd9a67dd87ebce025a7dc94fb467b067b9", "4047d5efd1683bbd3280500c3244149089412024", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "87113f966642bba168fd145339c5746e64603cc0", "471cb4c2e5039bdaacb0274fee70c7fe2e93493e", "8cc9e53e8bfdcaa37b77b9d838d0db7deadcda04", "342be3d0e7529e63d16d1b22dbddf26d63e14bd6", "e95a1ce95b4f0e7b542b70ef80073b9525646717" ], "paperAbstract": "As in most information retrieval (IR) studies, evaluation plays an essential part in Web search research. Both offline and online evaluation metrics are adopted in measuring the performance of search engines. Offline metrics are usually based on relevance judgments of query-document pairs from assessors while online metrics exploit the user behavior data, such as clicks, collected from search engines to compare search algorithms. Although both types of IR evaluation metrics have achieved success, to what extent can they predict user satisfaction still remains under-investigated. To shed light on this research question, we meta-evaluate a series of existing online and offline metrics to study how well they infer actual search user satisfaction in different search scenarios. We find that both types of evaluation metrics significantly correlate with user satisfaction while they reflect satisfaction from different perspectives for different search tasks. Offline metrics better align with user satisfaction in homogeneous search (i.e. ten blue links) whereas online metrics outperform when vertical results are federated. Finally, we also propose to incorporate mouse hover information into existing online evaluation metrics, and empirically show that they better align with search user satisfaction than click-based online metrics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080804" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6afa3c6d8cb3db80947ab5563c7604086403ddb9", "sources": [ "DBLP" ], "title": "Meta-evaluation of Online and Offline Web Search Evaluation Metrics", "venue": "SIGIR", "year": 2017 }, "6b63b0bd3471d04afb88333d638736a120ce32b0": { "authors": [ { "ids": [ "20568058" ], "name": "Nikhil Hegde" }, { "ids": [ "2698880" ], "name": "Jianqiao Liu" }, { "ids": [ "9996721" ], "name": "Kirshanthan Sundararajah" }, { "ids": [ "1700486" ], "name": "Milind Kulkarni" } ], "doi": "10.1109/ISPASS.2017.7975294", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975294", "entities": [ "Algorithm", "Benchmark (computing)", "Bioinformatics", "Bioinformatics", "Computation", "Computational science", "Computer graphics", "Data mining", "Distributed memory", "Graphics processing unit", "Reference implementation", "Scalability", "Shared memory", "TRAVERSE", "Tree traversal" ], "id": "6b63b0bd3471d04afb88333d638736a120ce32b0", "inCitations": [ "35d8727f5e726177d8f12a4955524804c5d531cf", "f8bdeda858392940eecc837ff4a4f27f9a6d0208" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "227-238", "journalVolume": "", "outCitations": [ "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "fce7fd98928ab9bf3e4e919e108c48fc1040f569", "24f5343d06fd87efe2d78982f2302094bad604ff", "8724631b1b16469fb57df1568d41d1039067c717", "ca2bfcf88873cce70e92b160bf0b6a2472c2fee7", "826a530b835a917200da1b25993b5319021e4551", "044c2e4b6ee04fbc1b5838afd8583dc3d11e8222", "d2f27753960117098a97bd63e0f951af9d094fee", "105ef0aaca927ab66295ce5e9c2a1f4ceba98152", "027e7780dbda48d99f3654e77b4a63063224950e", "947c6bf534ccd620044f77c3bd6068f633b421fb", "177c153341d4ab6bf94aab982a4b4d90608f5ee9", "2b43a525ab4755af63cce0dcc92789fd2dc54d63", "3096595380cfc118bb163b74897e13a84d094432", "f5e5f365acc6f00c014c523c65efd9df6cee2606", "67dc83a15c020b84403f1b6b52140965f11e4588", "f4e959d8b5c09739931a2d9e4a9f27ddd1f31d60", "28437a38fe8a1335fb8cad44c52707367395fdaf", "004646b5f172aa3874e13c282ece7576ede361f9", "a659cfa6f5721ce9b9fd4d90bc6311de888856a6", "9466bb74a149b1bad4f1d9922e8ccb2cde2c3d3f", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "fc7577d182417e2013b702cc962b9f06a3115a20", "658ba5abf3bc748cbc86a2cbfc1a0202ff79cd75", "10ba24b55580a5bd3422824900290bdab5842729", "61cd159f10181b8cbbc25750b4b718009d649982", "0ad64e710d615794af918ad5e037f4d909f97a28", "bb0dc7f89a8e64aa537e2e2d26e8c44e30bead86", "2790284b6a16790d03b0cb5ed46bc6b0fecde1eb", "189a34938acaf3b94cd948b5f574de11d75d4fd5", "128fb51d28256aac3b20a8f6de85598767876fbe", "af58a1221b31f007fe3b4418db66d905df9e8db8", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "17907e18f11b5ab7ae266e87008acead6d1943d8", "36f06481eaae63522dfb61475602584997ebfee8", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "30ad73786e2fa09fd191de334819784d86a5c55d", "4a744fe29d4658542cd3d692fd6ff92f7f6b9104" ], "paperAbstract": "An interesting class of irregular algorithms is tree traversal algorithms, which repeatedly traverse various trees to perform efficient computations. Tree traversal algorithms form the algorithmic kernels in an important set of applications in scientific computing, computer graphics, bioinformatics, and data mining, etc. There has been increasing interest in understanding tree traversal algorithms, optimizing them, and applying them in a wide variety of settings. Crucially, while there are many possible optimizations for tree traversal algorithms, which optimizations apply to which algorithms is dependent on algorithmic characteristics. In this work, we present a suite of tree traversal kernels, drawn from diverse domains, called Treelogy, to explore the connection between tree traversal algorithms and state-of-the-art optimizations. We characterize these algorithms by developing an ontology based on their structural properties. The attributes extracted through our ontology, for a given traversal kernel, can aid in quick analysis of the suitability of platform- and application-specific as well as independent optimizations. We provide reference implementations of these kernels for three platforms: shared memory multicores, distributed memory systems, and GPUs, and evaluate their scalability.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975294" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6b63b0bd3471d04afb88333d638736a120ce32b0", "sources": [ "DBLP" ], "title": "Treelogy: A benchmark suite for tree traversals", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "6b7c74e9d7d334613bf22032ad1184a92c66b643": { "authors": [ { "ids": [ "2042271" ], "name": "Swann Perarnau" }, { "ids": [ "2381933" ], "name": "Judicael A. Zounmevo" }, { "ids": [ "40293316" ], "name": "Matthieu Dreher" }, { "ids": [ "19244667" ], "name": "Brian C. Van Essen" }, { "ids": [ "1695375" ], "name": "Roberto Gioiosa" }, { "ids": [ "1700811" ], "name": "Kamil Iskra" }, { "ids": [ "1695549" ], "name": "Maya Gokhale" }, { "ids": [ "1696668" ], "name": "Kazutomo Yoshii" }, { "ids": [ "1709765" ], "name": "Peter H. Beckman" } ], "doi": "10.1109/IPDPS.2017.25", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.25", "entities": [ "Component-based software engineering", "Kernel (operating system)", "Linux", "Linux", "Load balancing (computing)", "Memory hierarchy", "Memory management", "Memory module", "Non-volatile random-access memory", "Operating system", "Operating-system-level virtualization", "Out of the box (feature)", "PCI Express", "Persistent memory", "Programmer", "Scheduling (computing)", "Simulation", "User interface", "User space" ], "id": "6b7c74e9d7d334613bf22032ad1184a92c66b643", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "153-162", "journalVolume": "", "outCitations": [ "2f2614d54cae5e9367a061bc7455a870d9b9a85d", "55c968c46b45f4b0955b31f36494ca6a87b8ddb4", "77d97e17c7129a810d14fb8dfd17fa4ca07e18bc", "24bb5f66906421f42aff2d64dfa35b4beb3ead7a", "e040885cd33f933f356ff8f60783acfd3ed1a7c5", "0e0ece84cd0a6c7d8bbe8e06da1341dcbd87d225", "5232bdf468e907010a0886a63343e54b448780c5", "19bd720b75e5c24dd8c702908222514b8df4d0da", "0d6787f19c7a521784a38d31420dd8da7bd490ef", "8fe10a1189cbd7644f38a2f65df509d9f84893fd", "2fa3b8370363eb07f49ad864c932a2ca3c019a87", "7070f6e7be0ea5c8b80a6d3f9986fcf743860443", "2d846b715e2e6bde38a4c70861a18a4024e11412", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "4fe2bf624e18d71d87ae36824606c42c64446562", "1e2207b8f1b2ce2f09d2d8bfa5e01324a5d4effa", "1274edf4e40fc3b012ace2a2b74217aa52e6d222", "166eebd00f73b599be246d1897a87d715509d431", "bc382e961d061afb334f015c6293704c58afcc89", "145ad8fff2a7023e68645b1b3fe71fea5edf771e", "273fcf24c3c9c07cde1cc68b23786ff7910e0d47", "04ee1aee71c420c153893e26408a9e7b638ca763", "3b7e2038ec22cf637df70c833d473b0f3b43713a", "d36f0cf5375345732339abe77255f024d5a9d05a", "701c90f0593e5675d62fc3882bd5da9b7c296394", "8f1e54c98327edb317225d60464837b6557b247d", "c5d0d547b6a3fa470dcc77f558f6c7c5768edabd", "be337425916d4e61442269a9bc1cf69169cedb8d", "2073266dfb3f034d55cd5a3fca62d230832afd43", "cf60a588acc40f7fbb1a61612cdfd380d6faae6d", "5b5dfbfffeade87035fca8fadca1a7f27f8a72fe", "227e529c08f821d134dd15fb9296419250ab9301", "771156b34f7f4f539ef7289027e2205692206aed", "050b6a5f0e650a12223c27fb133eb5e398df8480", "44da4713fcf0a4ee7a8323737e678b3faec42d2e", "9254ac5860329c41457c1a0f03c7c5f216c56318" ], "paperAbstract": "Exascale systems are expected to feature hundreds of thousands of compute nodes with hundreds of hardware threads and complex memory hierarchies with a mix of on-package and persistent memory modules. In this context, the Argo project is developing a new operating system for exascale machines. Targeting production workloads using workflows or coupled codes, we improve the Linux kernel on several fronts. We extendthe memory management of Linux to be able to subdivide NUMA memory nodes, allowing better resource partitioning among processes running on the same node. We also add support for memory-mapped access tonode-local, PCIe-attached NVRAM devices and introduce a new scheduling class targeted at parallel runtimes supporting user-level load balancing. These features are unified into compute containers, a containerization approach focused on providing modern HPC applications with dynamic control over a wide range of kernel interfaces. To keep our approach compatible with industrial containerization products, we also identifycontentions points for the adoption of containers in HPC settings. Each NodeOS feature is evaluated by using a set of parallel benchmarks, miniapps, and coupled applications consisting of simulation and data analysis components, running on a modern NUMA platform. We observe out-of-the-box performance improvements easily matching, and often exceeding, those observed with expert-optimized configurations on standard OS kernels. Our lightweight approach to resource management retains the many benefits of a full OS kernel that application programmers have learned to depend on, at the same time providing a set of extensions that can be freely mixed and matched to best benefit particular application components.", "pdfUrls": [ "http://www.mcs.anl.gov/papers/P7010-0217.pdf", "https://doi.org/10.1109/IPDPS.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6b7c74e9d7d334613bf22032ad1184a92c66b643", "sources": [ "DBLP" ], "title": "Argo NodeOS: Toward Unified Resource Management for Exascale", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "6b8cc1c8358b84a55ec2858910adf839928370ef": { "authors": [ { "ids": [ "3120064" ], "name": "Min Si" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.27", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.27", "entities": [ "Computation", "Computer performance", "Correctness (computer science)", "Intel Core (microarchitecture)", "Interrupt", "Knights", "Manycore processor", "Message Passing Interface", "Multi-core processor", "Point-to-point (telecommunications)" ], "id": "6b8cc1c8358b84a55ec2858910adf839928370ef", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "206-214", "journalVolume": "", "outCitations": [ "03ce4e186c562c164165ac9a8e73d198e7499944", "38978787ef1ff65158918675b6fcbd27af16ca8c", "29c6dc1ec8af2aae994efafd101a853ab0d054ad", "1a3704ec76ee662093e2bf57135f8230d1fc7e8c", "2faf6e0b2b08be9a3ab46d6e932e2c642b882195", "1e6a243e6a916b50da630ebbc252c0c471fa3603", "33bf94c462ecb645ca580f0919014ec98db3d69a", "b4686c5d516bf78735ae586f35853dad3a759b59", "6bb0cde4cad16661cc8694842ffe170c22f14c6f", "62b996c8b0845277f1b8a1459ecae454c054cd7c", "7da4737120dbc08a4f06902df15c0e2569efca30", "17add92eadb85475c55ca747c894aab83012bfeb", "e1d29194ff677c7426d65af3f5b6fec3ffa182b6", "309bd4c9b1b9cf81cbf071b8b2ad80e97acf7c60", "5028918fa934c721700f4c88dcb417d0f4d85349", "adf6c68dd3c839f58d10f503607ba67efa5192b7" ], "paperAbstract": "The MPI two-sided communication model has been widely used in scientific applications for decades. The nonblocking version of the two-sided routines allows the application to potentially improve performance on many systems by overlapping communication and computation. In practice, unfortunately, the overlap is hard to achieve because of the limitations of the MPI internal progress engine and the underlying network. The traditional approach to resolving this issue is to implement an asynchronous progress engine based on either additional threads or hardware interrupts; however, such approaches may result in reduced computing power or expensive overheads. In this paper, we present a portable process-based asynchronous progress approach for two-sided communication in the PMPI-based Casper framework. It allows the user to specify an arbitrary number of cores on a multicore or many-core architecture and offload the point-to-point communication to these cores, thus ensuring asynchronous progress with low overhead. Unlike our previous work that supports asynchronous progress for the MPI one-sided model, a completely new design is needed for the message-matching-based two-sided model in order to ensure comprehensive semantics correctness as defined in the MPI standard. We present a detailed design of this two-sided model and compare it with the traditional thread-based approach on both a multicore Intel Xeon cluster and a many-core Knights Landing cluster.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.27", "http://www.mcs.anl.gov/papers/P7070-0717.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6b8cc1c8358b84a55ec2858910adf839928370ef", "sources": [ "DBLP" ], "title": "Process-Based Asynchronous Progress Model for MPI Point-to-Point Communication", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "6bdef4a86108309086de4071c9d28d97565a84a4": { "authors": [ { "ids": [ "1890685" ], "name": "Prashant Pandey" }, { "ids": [ "33877556" ], "name": "Michael A. Bender" }, { "ids": [ "2387399" ], "name": "Rob Johnson" }, { "ids": [ "2620138" ], "name": "Robert Patro" } ], "doi": "10.1145/3035918.3035963", "doiUrl": "https://doi.org/10.1145/3035918.3035963", "entities": [ "Bit array", "Bit manipulation", "Bloom filter", "Central processing unit", "Computational biology", "Concurrency control", "Data structure", "Database", "Experiment", "Hash table", "Haswell (microarchitecture)", "In-memory database", "Locality of reference", "Lookup table", "Quotient filter", "Random-access memory", "Solid-state drive", "X86" ], "id": "6bdef4a86108309086de4071c9d28d97565a84a4", "inCitations": [ "746ff01cf8bf954e15eb6ad9c240205d39dca387", "3a6f98e3cb616dc7fe479282f032975cb898fa5a", "f2bec2b1a69d8db2cb8ada7096be847fb5b83dd4", "8a7b52dd3bf98996bdcafad45b5549ff0f199424", "0030ab76d18084ebae4da1cab829c535a71b984c", "7140dfc69ed2ca65dd8bbdcf5d5b3742f2d839c2", "3e3f8fcf746a8bfa1aa6773ded336bd3ae6245bf", "0821389b330338a5b844080287440c583d05d441" ], "journalName": "", "journalPages": "775-787", "journalVolume": "", "outCitations": [ "1688c9bb957395bf7ac05098537c736cfd076382", "f2bec2b1a69d8db2cb8ada7096be847fb5b83dd4", "76e43a0e0ff9e32cfc0223721e13d8d2a0d2bfd0", "6c1f7496580d1169b232c53981f1e63e593be21f", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "e9105f3cdb634572032d99c3a762921a4b2842c7", "fc645620fbf4a6667e11e237c882f276aa2df573", "8f491cc687250f6ae2c3d87dcb256680c25ff1ce", "3946e3a4a4f5b42d55859153e98d3e83151303bb", "41cdc268d9ec5595f40ebf2e457f3f7f87a503de", "d87c6b6a1a5da0502e311e24fb2d2f79dd956623", "da6a07944d97723d6c154d76609b5c20a3636f9a", "34bb8a2c052e358e14b49eb8b03f02826cdfecaf", "339888b357e780c6e80fc135ec48a14c3b524f7d", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "59f0f5f7ee08d4d690d95c96db6af793c139c7a4", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "21eef1690dccbd1aacc8098da4bae7299096792c", "7a278ee0578f194700cadc3811cdda4ec751f88a", "2ebf5570bee564efbf1b782ba23454708d6c251b", "24d8d6225ea758f240dd75a658a1f2957fb18d20", "3a2f37d3648592ffb42155c28f71894ad61937fe", "92a6961f076307d5b4778fd45d5f01f6d5d84e12", "7f49cad23f57a8bc48133b2c599d40c216d1c046", "03a3b5ca18f6482cfee128eb24ddd1a59015fb2d", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "8c23432e7c894bcadfd0aed0fa36dc00ba5bd75f", "13bf79b773cc84590d3efeb88187f2675dea4b81" ], "paperAbstract": "Approximate Membership Query (AMQ) data structures, such as the Bloom filter, quotient filter, and cuckoo filter, have found numerous applications in databases, storage systems, networks, computational biology, and other domains. However, many applications must work around limitations in the capabilities or performance of current AMQs, making these applications more complex and less performant. For example, many current AMQs cannot delete or count the number of occurrences of each input item, take up large amounts of space, are slow, cannot be resized or merged, or have poor locality of reference and hence perform poorly when stored on SSD or disk. This paper proposes a new general-purpose AMQ, the counting quotient filter (CQF). The CQF supports approximate membership testing and counting the occurrences of items in a data set. This general-purpose AMQ is small and fast, has good locality of reference, scales out of RAM to SSD, and supports deletions, counting (even on skewed data sets), resizing, merging, and highly concurrent access. The paper reports on the structure's performance on both manufactured and application-generated data sets.\n In our experiments, the CQF performs in-memory inserts and queries up to an order-of magnitude faster than the original quotient filter, several times faster than a Bloom filter, and similarly to the cuckoo filter, even though none of these other data structures support counting. On SSD, the CQF outperforms all structures by a factor of at least 2 because the CQF has good data locality.\n The CQF achieves these performance gains by restructuring the metadata bits of the quotient filter to obtain fast lookups at high load factors (i.e., even when the data structure is almost full). As a result, the CQF offers good lookup performance even up to a load factor of 95%. Counting is essentially free in the CQF in the sense that the structure is comparable or more space efficient even than non-counting data structures (e.g., Bloom, quotient, and cuckoo filters).\n The paper also shows how to speed up CQF operations by using new x86 bit-manipulation instructions introduced in Intel's Haswell line of processors. The restructured metadata transforms many quotient filter metadata operations into rank-and-select bit-vector operations. Thus, our efficient implementations of rank and select may be useful for other rank-and-select-based data structures.", "pdfUrls": [ "http://www3.cs.stonybrook.edu/~ppandey/files/p775-pandey.pdf", "http://doi.acm.org/10.1145/3035918.3035963", "http://www3.cs.stonybrook.edu/~ppandey/files/SIGMOD17_Talk_CQF_long.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6bdef4a86108309086de4071c9d28d97565a84a4", "sources": [ "DBLP" ], "title": "A General-Purpose Counting Filter: Making Every Bit Count", "venue": "SIGMOD Conference", "year": 2017 }, "6c6baac8a7e3ea43cf875da9cafd6f1f06d0cea6": { "authors": [ { "ids": [ "2965669" ], "name": "Francois Tessier" }, { "ids": [ "3348747" ], "name": "Venkatram Vishwanath" }, { "ids": [ "1795494" ], "name": "Emmanuel Jeannot" } ], "doi": "10.1109/CLUSTER.2017.80", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.80", "entities": [ "Algorithm", "Analysis of algorithms", "Benchmark (computing)", "Bottleneck (software)", "Computer data storage", "Cray XC40", "Data aggregation", "Job control (Unix)", "Lustre", "Memory-mapped I/O", "Message Passing Interface", "Multiple buffering", "Network topology", "Simulation", "Supercomputer", "Two-phase commit protocol" ], "id": "6c6baac8a7e3ea43cf875da9cafd6f1f06d0cea6", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "70-80", "journalVolume": "", "outCitations": [ "2da4ab6c02d97fe47b589ddd450a5c41f2b47bb9", "6ebc0c4e3e14a644a71ec7db11707e917693ffcb", "ebc09b04a900afc6c3cf53a4b7ff6035f33f02b2", "e1f02518a257af5af200b77e0518cfe6c6f437f8", "a14e1d1d3eea6803ac34b904a4c619f8f686370c", "25d5f7757ebd0b7a5cde7bf64c83ad0020318f39", "2a11832bb798de3315838c327bdcec6493cd2a5c", "4b0db76df9148e80806e7b45b13e85ee54cd5b6c", "171450cd7ed50d4c50955949c87df08bbb115549", "55224d0eedb75cde4474667ad01417eb502b05cd", "0a564c5117375287c60d3a27a96003f30396f62f", "cc5e8e2b073c41983a76e38183f89ea724307175", "36d76c6079f993685354edb10d68df971d7da519", "c0c56908d343d52669e1aee072dd611681dc831f", "2d60d3596490d9999d8433bf41405060779bc11d", "409ed5839cf6d0ba246d91f82d1ac33cbe600c27", "9ca4fb478cfa38ed8a490bcb361dd7631aa3af37", "07f5e78517d0baf8c64ab7b6461cb33a2a1bceed", "f8779e0694bd5f37fda4e8d06572056d09d13d51", "176d712d084112b2e65e385e8220e4679c24f28a", "8b334741506521040da36c23982d071f1e4143da" ], "paperAbstract": "Reading and writing data efficiently from storage system is necessary for most scientific simulations to achieve good performance at scale. Many software solutions have been developed to decrease the I/O bottleneck. One well-known strategy, in the context of collective I/O operations, is the two-phase I/O scheme. This strategy consists of selecting a subset of processes to aggregate contiguous pieces of data before performing reads/writes. In this paper, we present TAPIOCA, an MPI-based library implementing an efficient topology-aware two-phase I/O algorithm. We show how TAPIOCA can take advantage of double-buffering and one-sided communication to reduce as much as possible the idle time during data aggregation. We also introduce our cost model leading to a topology-aware aggregator placement optimizing the movements of data. We validate our approach at large scale on two leadership-class supercomputers: Mira (IBM BG/Q) and Theta (Cray XC40). We present the results obtained with TAPIOCA on a micro-benchmark and the I/O kernel of a large-scale simulation. On both architectures, we show a substantial improvement of I/O performance compared with the default MPI I/O implementation. On BG/Q+GPFS, for instance, our algorithm leads to a performance improvement by a factor of twelve while on the Cray XC40 system associated with a Lustre filesystem, we achieve an improvement of four.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.80" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6c6baac8a7e3ea43cf875da9cafd6f1f06d0cea6", "sources": [ "DBLP" ], "title": "TAPIOCA: An I/O Library for Optimized Topology-Aware Data Aggregation on Large-Scale Supercomputers", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "6c94924012ceaa0e85d707f7bd4d1506fea576e6": { "authors": [ { "ids": [ "1703441" ], "name": "Mihir Bellare" }, { "ids": [ "1727493" ], "name": "Wei Dai" } ], "doi": "10.1145/3133956.3133965", "doiUrl": "https://doi.org/10.1145/3133956.3133965", "entities": [ "Block size (cryptography)", "Cryptography", "Encryption", "Public-key cryptography", "Spectral leakage", "Symmetric-key algorithm", "Time complexity" ], "id": "6c94924012ceaa0e85d707f7bd4d1506fea576e6", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "217", "journalVolume": "2018", "outCitations": [ "264f6ff74beef068cc136eb8d78a90ac0ae0cc3d", "2459b15dcd7c8d383980c0a118c0983d4ec010d5", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "43f0dbcfc606fd2003035cd2dd4e1f37ebd8b4e6", "7feb5d1d3660d7f6ab5265e3b538ecdee51e70a4", "20312508a8e905d4ecd90e165261c0af16c661d1", "ae651f1ca9eb6051ab4402c94720978dc1740e93", "495674d2e9de951263b0e06143b9a71932e1442e", "3d861eb125e6332414c2e8dd96e92f93ccf50224", "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "3b2af12a43d06338dd62681328c75a1999fc87fd", "2946e481bbee1d63103544752b8f06dfb9a044e7", "1249b6c557fbe528329392e0ec4abcd67d1d975d" ], "paperAbstract": "Towards advancing the use of big keys as a practical defense against key exfiltration, this paper provides efficiency improvements for cryptographic schemes in the bounded retrieval model (BRM). We identify probe complexity (the number of scheme accesses to the slow storage medium storing the big key) as the dominant cost. Our main technical contribution is what we call the large-alphabet subkey prediction lemma. It gives good bounds on the predictability under leakage of a random sequence of blocks of the big key, as a function of the block size. We use it to significantly reduce the probe complexity required to attain a given level of security. Together with other techniques, this yields security-preserving performance improvements for BRM symmetric encryption schemes and BRM public-key identification schemes.", "pdfUrls": [ "http://eprint.iacr.org/2018/217", "http://doi.acm.org/10.1145/3133956.3133965" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6c94924012ceaa0e85d707f7bd4d1506fea576e6", "sources": [ "DBLP" ], "title": "Defending Against Key Exfiltration: Efficiency Improvements for Big-Key Cryptography via Large-Alphabet Subkey Prediction", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "6c977b4cfc2963165c4ccfaa0fc20e5ad75e2c4b": { "authors": [ { "ids": [ "35572925" ], "name": "Shaikha Saleh Mohamed" }, { "ids": [ "31129267" ], "name": "Nedaa Baker Al Barghuthi" }, { "ids": [ "1943105" ], "name": "Huwida E. Said" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.26", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.26", "entities": [ "Global Village (telecommunications)", "Industry 4.0", "Smart city" ], "id": "6c977b4cfc2963165c4ccfaa0fc20e5ad75e2c4b", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "200-205", "journalVolume": "", "outCitations": [ "a895eb6b0627d63e0345d26ea9c6134ca1a94844", "cf03815a70cac050cee343e01c8e86efe518c81b", "67b4d4f68ef2c6687ed05e6019f65dc4f5bd0497", "28c1cb6d11469926197a988ae500f08d33497f5d", "5cdac5b479d0bc6e7fa69b21d14d1463715b1b2a", "d108f4b1300bd7ba5fcd44eae1ae60e61f330883", "8a648bf836bbed27c1a44195aa389d8b8841cf50" ], "paperAbstract": "The world has become a global village and technology has a strong influence in every aspect of our daily lives and the fourth industrial revolution that will alter the way we live and learn. This inter-connectivity brings a need for greater engagement, experience and efficiency. Catch a sign on a day in a life of a student and how connectivity and technology helps enrich his/her daily activities. This research aims to study the beneficiary of adopting Smart Education technology among United Arab Emirates (UAE) Universities. This type of education is becoming a dominant in academia especially within universities around the globe. Since it was introduced, it demonstrates a significant change in educational instructional methods. The objective of this research is to investigate Smart Education instruments much as its significance and demands. This research used different tools for assessment. Surveys and interviews were conducted to group of universities learners and educators within the higher education system in the UAE. The outcome of the research has shown a significant support towards the usage of the Smart tools and technologies. The survey has also indicated that 72% of the participants preferred the technology approach within their curriculum. The research highlighted that there is a great impact of using the Smart technology tools. In the Future, the authors would like to investigate the risk of adopting such educational approach.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6c977b4cfc2963165c4ccfaa0fc20e5ad75e2c4b", "sources": [ "DBLP" ], "title": "An Analytical Study Towards the UAE Universities Smart Education Innovated Approaches", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "6c9cafd316cce50115697b0f933f22a752868463": { "authors": [ { "ids": [ "1747536" ], "name": "Yang Ji" }, { "ids": [ "30685860" ], "name": "Sangho Lee" }, { "ids": [ "31978519" ], "name": "Evan Downing" }, { "ids": [ "2933275" ], "name": "Weiren Wang" }, { "ids": [ "3028582" ], "name": "Mattia Fazzini" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" }, { "ids": [ "8172306" ], "name": "Alessandro Orso" }, { "ids": [ "1738428" ], "name": "Wenke Lee" } ], "doi": "10.1145/3133956.3134045", "doiUrl": "https://doi.org/10.1145/3133956.3134045", "entities": [ "Causality", "Denial-of-service attack", "Information flow", "Information flow (information theory)", "Persistence (computer science)", "Reachability", "Refinable function", "System call" ], "id": "6c9cafd316cce50115697b0f933f22a752868463", "inCitations": [], "journalName": "", "journalPages": "377-390", "journalVolume": "", "outCitations": [ "023f23c300804754753cb11db51fb7f582556ab7", "52ad66e45e0a47a2689810d5d1c88665c9388e4d", "0ca4dbd0201c3ce423c5cd64d5cdb420c966b847", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "1c7e1a0bde89990a9173664d3ff6931542741226", "1a5464cda7cff01d5a0ebb49df17fa8c05882295", "173f9ebbc0be6b591dfa72111aa78d3568e2db87", "67f961f98d34fea3ab15f473429a5156b62b5c65", "5578045657a90d2db6ac86bb4afbe38c035fc6a5", "22a3f0837bd6a913f516ba497469176be641c7d4", "2076ea658f23f76715f0f770b40ffb83969109bd", "41cf0c686eb95e5e09d93a78bb4977d5e7eb5713", "5eea7073acfa8b946204ff681aca192571a1d6c2", "4578afb3d3108a9064f2299b47f2f32cb94926ee", "2784864964db743a1d666704b75e5455b880edeb", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "173bd678095821c34781c6649ccc7206d346f219", "5f3066829e20358838b307513130129170efe84f", "64ad3b92f61a441c5b4080b0ac9291109a919886", "092b09f0ec09b2b10763f5697ca77099a37ab022", "feb5db279d43f6affb474398f96bb5c910aa2340", "19985cad0eb4010b22c2ae1ef9442e036a924244", "8133926b933e82d29766c042764ae5cac935f830", "537f16973900fbf4e559d64113711d35bf7ca4a2", "56c7be2c2d191395b558aa7b8f629b6606528b68", "1ec96c3938c037982cb75a40d5efd619f487911f", "60107efaca23147b103d6e055281bc2b954d1007", "bbf208de5efa2ac6eea2f82b9775636bbe7ea1b4", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "6409e64aed68fcc9e3fdc35b87dd168eeb440d32", "2e43a3895b2f1bde1615f9815feed76698a0d629", "35339f6f2e99c04920f21883df1db8004436cdc7", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "0b29324025d4ef8c2277bdd3ad51008ea72a4fae", "18910d76c2f93a0df01cc81598bc3680e5d944f9", "b2a8d677582870ab35983cb9652ec804abe28bbe", "48bb5f38620529dbc0b4b34a25862ed68ff3ffc4", "25e8930dd98a3d9cff1c7154b2874148da597724", "5aea75e13421d95e50612f1a9bbd1a8f785798ec", "111864cac232d8a9c170bd63069eb4af155a9f7b", "90f2e587256b8b3cc7651f257a8066ff9f2f544e" ], "paperAbstract": "As modern attacks become more stealthy and persistent, detecting or preventing them at their early stages becomes virtually impossible. Instead, an attack investigation or provenance system aims to continuously monitor and log interesting system events with minimal overhead. Later, if the system observes any anomalous behavior, it analyzes the log to identify who initiated the attack and which resources were affected by the attack and then assess and recover from any damage incurred. However, because of a fundamental tradeoff between log granularity and system performance, existing systems typically record system-call events without detailed program-level activities (e.g., memory operation) required for accurately reconstructing attack causality or demand that every monitored program be instrumented to provide program-level information.\n To address this issue, we propose RAIN, a Refinable Attack INvestigation system based on a record-replay technology that records system-call events during runtime and performs instruction-level dynamic information flow tracking (DIFT) during on-demand process replay. Instead of replaying every process with DIFT, RAIN conducts system-call-level reachability analysis to filter out unrelated processes and to minimize the number of processes to be replayed, making inter-process DIFT feasible. Evaluation results show that RAIN effectively prunes out unrelated processes and determines attack causality with negligible false positive rates. In addition, the runtime overhead of RAIN is similar to existing system-call level provenance systems and its analysis overhead is much smaller than full-system DIFT.", "pdfUrls": [ "https://www.cc.gatech.edu/~mfazzini/publications/2017_ccs_ji.pdf", "http://doi.acm.org/10.1145/3133956.3134045", "https://taesoo.kim/pubs/2017/ji:rain-slides.pdf", "http://iisp.gatech.edu/sites/default/files/images/rain.pdf", "http://iisp.gatech.edu/sites/default/files/documents/ccs2017-rain.pdf", "https://taesoo.kim/pubs/2017/ji:rain.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6c9cafd316cce50115697b0f933f22a752868463", "sources": [ "DBLP" ], "title": "RAIN: Refinable Attack Investigation with On-demand Inter-Process Information Flow Tracking", "venue": "CCS", "year": 2017 }, "6cb2d9311bf6109e9f812ea351a3197b90bd1803": { "authors": [ { "ids": [ "35946412" ], "name": "Yu Lin Chen" }, { "ids": [ "14110106" ], "name": "Shuai Mu" }, { "ids": [ "1704032" ], "name": "Jinyang Li" }, { "ids": [ "2848503" ], "name": "Cheng Huang" }, { "ids": [ "35964168" ], "name": "Jin Li" }, { "ids": [ "34447742" ], "name": "Aaron Ogus" }, { "ids": [ "33416866" ], "name": "Douglas Phillips" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Cloud storage", "Computer data storage", "Consensus (computer science)", "Data center", "Erasure code", "Failure rate", "Microsoft Azure", "Software versioning", "Strong consistency" ], "id": "6cb2d9311bf6109e9f812ea351a3197b90bd1803", "inCitations": [], "journalName": "", "journalPages": "539-551", "journalVolume": "", "outCitations": [ "57efc2b9ba2a725af1d66cc43c472d0314190051", "3168681722207c86827e596860115a2977ce761f", "2f6af58c7905fb8367652fe62fbb1f6ec7e28be0", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "6a1df9dae902f3d377f9c85ba9732b8d2270bf2b", "7dbce0de554c2adbc28d7ba1d927c9f1cc8b184a", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "b129f84262024128ee64300ab257744b0b5ed8fb", "3d019723a6f8678b6adc901e8eae2076263d9089", "7ae26da9b7666812857883536870c315538f7f10", "235043df31e4e0b0fc08458c98f9f867053afde5", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "458449bcb96ef53a787381264562bbe4ea5ce2bf", "20a44558eed182a971f7add68ecc5931fbca2a65", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "3bb4709c79394d7fe89b8a2741f5100821c95b1e", "09b0a63b74a6adb0959ce7ea88ffda7f75ca7842", "043f655a77831b802f2f10854c0d3b7f3a7d9aa5", "00c181b8b64e824fbe0172339f1e4560b557fab5", "134021cfb9f082f4e8b58f31bcbb41eb990ab874", "0f6a32792d0882db35fe9391445d4322232b619e", "58b628792d3eb22a034a871ed3cf373afe591928", "41c43d0a579339ceaaaa5e95b514e8a955389569" ], "paperAbstract": "Microsoft Azure Storage is a global cloud storage system with a footprint in 38 geographic regions. To protect customer data against catastrophic data center failures, it optionally replicates data to secondary DCs hundreds of miles away. Using Microsoft OneDrive as an example, this paper illustrates the characteristics of typical cloud storage workloads and the opportunity to lower storage cost for geo-redundancy with erasure coding. The paper presents the design, implementation and evaluation of Giza \u2013 a strongly consistent, versioned object store that applies erasure coding across global data centers. The key technical challenge Giza addresses is to achieve single cross-DC round trip latency for the common contention-free workload, while also maintaining strong consistency when there are conflicting access. Giza addresses the challenge with a novel implementation of well-known distributed consensus algorithms tailored for restricted cloud storage APIs. Giza is deployed to 11 DCs across 3 continents and experimental results demonstrate that it achieves our design goals.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-chen_yu_lin.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/chen-yu-lin", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_chen.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ed25/0f6abf457ded656d9afcdbfcabce6d7e9944.pdf", "s2Url": "https://semanticscholar.org/paper/6cb2d9311bf6109e9f812ea351a3197b90bd1803", "sources": [ "DBLP" ], "title": "Giza: Erasure Coding Objects across Global Data Centers", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "6cb7126ffd988d9fc8b71c632269af106865b592": { "authors": [ { "ids": [ "35253924" ], "name": "Juan Deng" }, { "ids": [ "3174431" ], "name": "Hongda Li" }, { "ids": [ "7150554" ], "name": "Hongxin Hu" }, { "ids": [ "29181730" ], "name": "Kuang-Ching Wang" }, { "ids": [ "1720429" ], "name": "Gail-Joon Ahn" }, { "ids": [ "2724702" ], "name": "Ziming Zhao" }, { "ids": [ "1822882" ], "name": "Wonkyu Han" } ], "doi": "", "doiUrl": "", "entities": [ "Buffer overflow", "Buffers", "Controllers", "Elasticity (cloud computing)", "Firewall (computing)", "Nelfinavir", "Network function virtualization", "Scalability", "Software-defined networking", "Virtual machine" ], "id": "6cb7126ffd988d9fc8b71c632269af106865b592", "inCitations": [ "28dd40f411fc91cd9d5b72e6bd8d07de4b36c0ea", "ec6afb55691ed7522c7b0b6662a00149dc49e985" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "9e14559b2b1652eeb2474af3b42e24e75afe1f6c", "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "3967126afbca6a722d7257cd671fe5e4979358a5", "84211e768343af91b901c2b06b13f5dc2bba329e", "3cc80bddf6f179ad963bec63c951e99ea32f0c73", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "2f976aa22e08e4233c8d1dd82343bfd3a124d9ac", "6e4d333d5e53ee2dd71c8483e5aef59bd5f7f596", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "4006b12cdaa141f867e21a253ab6b2aa71bd8388", "0300ba32e9d71891ea250643db80b6f67dc3f8f9", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "d717410a0e4da81c30bb4759029d2897895db8cf", "2baa50ceffb972260c877567a5dd513dc79fca21", "449a2e057497d446ec3382bb15de815b956f6195", "4c44cbcea788cc024b29ddf178249ee1c367464a", "397edb026ba0500df10ac813229d900e7880f307", "5692a5398e92ec43703145d512eef4d06a2a2fc8", "4cc4b12f171f78a2d92a34636e6f202eceacc5e4", "08ddde0eaf4925704222135788f79fe293c5894d", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "6276fdf6db1c4b23cc21191e2e8f14fb51606b5b", "49daabab3341b4cb4ef1a70ab2c5d82c04fbf70a" ], "paperAbstract": "Traditional hardware-based firewall appliances are placed at fixed locations with fixed capacity. Such nature makes them difficult to protect today\u2019s prevailing virtualized environments. Two emerging networking paradigms, Network Function Virtualization (NFV) and Software-Defined Networking (SDN), offer the potential to address these limitations. NFV envisions to implement firewall function as software instance (a.k.a virtual firewall). Virtual firewalls provide great flexibility and elasticity, which are necessary to protect virtualized environments. In this paper, we propose to build an innovative virtual firewall controller, VFW Controller, to enable safe, efficient and costeffective virtual firewall elasticity control. VFW Controller addresses four key challenges with respect to semantic consistency, correct flow update, buffer overflow avoidance, and optimal scaling in virtual firewall scaling. To demonstrate the feasibility of our approach, we implement the core components of VFW Controller on top of NFV and SDN environments. Our experimental results demonstrate that VFW Controller is efficient to provide safe elasticity control of virtual firewalls.", "pdfUrls": [ "http://www.public.asu.edu/~zzhao30/publication/HongxinNDSS2017.pdf", "http://web.eecs.utk.edu/~mschucha/netsec/readings/firewallsNdss2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6cb7/126ffd988d9fc8b71c632269af106865b592.pdf", "s2Url": "https://semanticscholar.org/paper/6cb7126ffd988d9fc8b71c632269af106865b592", "sources": [], "title": "On the Safety and Efficiency of Virtual Firewall Elasticity Control", "venue": "", "year": 2016 }, "6cf98b41109d01519ab138a1280600711442dbef": { "authors": [ { "ids": [ "34199165" ], "name": "David Menendez" }, { "ids": [ "2375915" ], "name": "Santosh Nagarakatte" } ], "doi": "10.1145/3062341.3062372", "doiUrl": "https://doi.org/10.1145/3062341.3062372", "entities": [ "Compiler", "LLVM", "Peephole optimization", "Precondition", "Predicate transformer semantics", "Program optimization", "Software bug", "Superoptimization" ], "id": "6cf98b41109d01519ab138a1280600711442dbef", "inCitations": [ "6d429e159682bc1f2c844fa94ad55619d9c7e48f" ], "journalName": "", "journalPages": "49-63", "journalVolume": "", "outCitations": [ "4f48d20824b18e1cf151eabe0128a79e4cf47bb8", "b9addc8ce998f6892120c2c8b23ae183312bfa6c", "5772d2e620fab2152b83d2943fbc50ce23354d46", "6e15aa5b91a16c025969476a7a873347cd01ed57", "34ff0423915c4513a2997e5b91d8043445223bf8", "32dff46a1fcf439bc389551b510ee89150fed139", "19096e10c13d16dc0afd5bedcd80cb3afc4b671d", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "85e7266af5751f7e1c5e26b03e71e2f92af7b8c3", "00c08861cfb438d5ff209dfadc2d839641cd3ca9", "28f5b0627ec297302eb2518c3444224a11dcd352", "dfaa11c1db1980e103a3e5079700d52e16720f36", "26b9001cce4a7f2e838ef99d0e7593b18553a7e0", "3235b27709b4c9aaad5d34b4f012ebe8581d9d86", "3811b03c4ebbd93dfc602eef422fc8237fda8654", "0c80eb8588fac0a763a15e1b7a33c6d885ce80a4", "1ef301c1b275091b6a50d620b41df4722f2108f0", "2f3edee1d3459096ba1de54450fca4d8406d1ed1", "238be0efe497fc297013ae16109fbbd2ee3d9733", "0c8f20da78ebc7891141c175fecb7a5c026f3e7d", "54cc1e9d38763083aa13dd484ddf48a9653b353b", "6e561bc498224748b4b89c22f4cad193f4cde31e", "1deae7e8531cd6870e741ebe63e471742c2d4658", "043899304f00ef1b544320837633366d472c46db", "194ab7a0e0cc7a31577603d30ebe6b2cb1f279bd", "8e4a0d25306545b498856693990ac396a1642604", "37791336941a0d954e4a98c96b1a66ca7be43eb2", "25a77652204ae3e524a1ca25cca7a44c72d37d6d", "0210336e523726e73d9f26da99eedc5875ff1c12", "9163b75b06884004a0400ec9017820f8872f5412", "b957492be245dac1252c8b46cedc0220aaf99c04", "c10a9d8fa571010cc6125a144388266502b0a991", "5d0c6e456a9b4f858da875a7d758bc6134f643a7", "8b41f2d85262928e417bb24a8302439f42436681", "2daa13833ac64dc9af37901366716ddef9ebf4d7", "1d8e5b1674f94dceea8af0b4a641d2f269956c48", "30f02b5309d2d5c84d2ac76ebb1afe3023220164", "0d9579e31aabd30b752ade4064b965de76e3ce77", "41922cfcfee13411d170e649cdf081e2be5e6891", "05d28dfb947b040b604ccfcef824c44033b122cf", "235b9c8f10461a95398e169ecb91cf3e223d3350", "1796693cac1375a236e657e6115d5b21d84a69db", "ff47af5531e55c7b88879ed10c8b0203e5948d99" ], "paperAbstract": "Peephole optimizations are a common source of compiler bugs. Compiler developers typically transform an incorrect peephole optimization into a valid one by strengthening the precondition. This process is challenging and tedious. This paper proposes Alive-Infer, a data-driven approach that infers preconditions for peephole optimizations expressed in Alive. Alive-Infer generates positive and negative examples for an optimization, enumerates predicates on-demand, and learns a set of predicates that separate the positive and negative examples. Alive-Infer repeats this process until it finds a precondition that ensures the validity of the optimization. Alive-Infer reports both a weakest precondition and a set of succinct partial preconditions to the developer. Our prototype generates preconditions that are weaker than LLVM’s preconditions for 73 optimizations in the Alive suite. We also demonstrate the applicability of this technique to generalize 54 optimization patterns generated by Souper, an LLVM IR–based superoptimizer.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062372", "https://www.cs.rutgers.edu/~sn349/papers/pldi2017-alive-infer.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6cf98b41109d01519ab138a1280600711442dbef", "sources": [ "DBLP" ], "title": "Alive-Infer: data-driven precondition inference for peephole optimizations in LLVM", "venue": "PLDI", "year": 2017 }, "6cfe1e553cb48c7087bb61e80031c415978a4ede": { "authors": [ { "ids": [ "2105818" ], "name": "Seong Min Kim" }, { "ids": [ "9765412" ], "name": "Juhyeng Han" }, { "ids": [ "9775571" ], "name": "Jaehyeong Ha" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" }, { "ids": [ "1729324" ], "name": "Dongsu Han" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Ecosystem", "End-to-end encryption", "Identifier", "Overhead (computing)", "Personally identifiable information", "Privacy", "Proxy server", "Relay", "Throughput", "Tor Messenger", "Trusted Execution Technology", "Trusted execution environment" ], "id": "6cfe1e553cb48c7087bb61e80031c415978a4ede", "inCitations": [ "be16f521ebdbe1acea867c9ce8100692d9f56308", "d32d738f04d5c4ae4d12be1f6de88b132a7b12a7", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "8ca1436fe1e9bbdb39a92178fa80c7869d92573d", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "a355edbb24d406761407e2728218d2192f2c1fcf", "e41440cff90683629228b308a94e48c7af11ca36", "b053033ad436cd404bb0eb2e75b3aac83b70d62c", "2ee14b821b4084aa1b9237390e280daf450419e5" ], "journalName": "", "journalPages": "145-161", "journalVolume": "", "outCitations": [ "04269ca9938b4d99658a3527cde3eff2b502f269", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "2d968ef0c5ad0cc6718e2f8b40ce7f4c323dbbdd", "5b2092b54860f134f78b2ec884c910750def71e6", "1648be4fa6a517cdad08c6299e20869427242d02", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "21f47e1d9078d12de1bd06341619923e8b9d85bb", "566333376dd1af014555b4cf82cda42c22501013", "11dabc4a43d72c78e0d05b8f8bc599620840ff1a", "d550c945a24211ed94c89f104698f1998b3fcb64", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "d9ef20252f9d90295460953e8ab78667b66919ad", "6b6fae57882fd193461fca64654107068ce9fd9a", "806df240622f866fd8bb4b31b292822dd2aeaf0a", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "345947186f190649c582204776071ac9a62e8d67", "5fd8edc50805f796daacb9737e1a7b2a9f5a5c2f", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "a8369392447c083d2be1b74753f68f87811c72f5", "108747579aef6bf029623639a86070feaf5cad41", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "178ab90ac932cdd58eaeffa742e9711430bc9a23", "133e0e83dc6877c6d417431e875cd57876153893", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "35516916cd8840566acc05d0226f711bee1b563b", "6bd5352a8b42520aad1c03e9940568b6c41978aa", "0f19202d0ad4ddb2041540e6ba4dec4e677224fd", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "48a068fc680131e6c0f215ee41318086a5befec1", "30909df12b1b01760ae4c5406e15f302a6524446", "01fde8698110cf46ff48a17c65f2658dab4c323c", "2633619177fcb13211008c6f8b952933afc01cde", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "0469044c24ac4897fcffc3c7db6a2d9c7fd08848", "7fc6e368df553233b61981dbb34772f5d8013821", "6d15489d862e9399807f8aaac50241b440e98663" ], "paperAbstract": "With Tor being a popular anonymity network, many attacks have been proposed to break its anonymity or leak information of a private communication on Tor. However, guaranteeing complete privacy in the face of an adversary on Tor is especially difficult because Tor relays are under complete control of world-wide volunteers. Currently, one can gain private information, such as circuit identifiers and hidden service identifiers, by running Tor relays and can even modify their behaviors with malicious intent. This paper presents a practical approach to effectively enhancing the security and privacy of Tor by utilizing Intel SGX, a commodity trusted execution environment. We present a design and implementation of Tor, called SGX-Tor, that prevents code modification and limits the information exposed to untrusted parties. We demonstrate that our approach is practical and effectively reduces the power of an adversary to a traditional network-level adversary. Finally, SGX-Tor incurs moderate performance overhead; the end-to-end latency and throughput overheads for HTTP connections are 3.9% and 11.9%, respectively.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/kim-seongmin", "https://taesoo.gtisc.gatech.edu/pubs/2017/kim:sgx-tor.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-kim-seongmin.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_kim.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_kim.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-kim-seongmin.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/99de/1c59e2e530a7ed9c27251a00ef5e833ff35b.pdf", "s2Url": "https://semanticscholar.org/paper/6cfe1e553cb48c7087bb61e80031c415978a4ede", "sources": [ "DBLP" ], "title": "Enhancing Security and Privacy of Tor's Ecosystem by Using Trusted Execution Environments", "venue": "NSDI", "year": 2017 }, "6cfe9dc89d0fd4778ec9a42af1a39ed99f605211": { "authors": [ { "ids": [ "2738554" ], "name": "Yanfang Le" }, { "ids": [ "1692142" ], "name": "Hyunseok Chang" }, { "ids": [ "1706584" ], "name": "Sarit Mukherjee" }, { "ids": [ "33345248" ], "name": "Limin Wang" }, { "ids": [ "1713535" ], "name": "Aditya Akella" }, { "ids": [ "9833675" ], "name": "Michael M. Swift" }, { "ids": [ "1707547" ], "name": "T. V. Lakshman" } ], "doi": "10.1145/3127479.3132252", "doiUrl": "https://doi.org/10.1145/3127479.3132252", "entities": [ "Central processing unit", "Control plane", "Data center", "Forwarding plane", "Hypervisor", "Management plane", "Microprocessor", "Multi-core processor", "Network interface", "Network interface controller", "Network switch", "New Foundations", "Program optimization", "Software-defined networking" ], "id": "6cfe9dc89d0fd4778ec9a42af1a39ed99f605211", "inCitations": [], "journalName": "", "journalPages": "506-519", "journalVolume": "", "outCitations": [ "11c1af3385b836626f0cc0f3e70ec386c516a424", "42824a703198f8824fa8eb001035f8e9d1c7fdd0", "320dd86417661178837059025c6d1f9a75d08bfd", "0d3f85933b6355789588476e491683532c68a906", "c4f94368cab4575431ca56645ab4688bc907128b", "110ca6f5ef5fb2963b8b987aec358a46c0cb7823", "e9ddae666a8fde158d4b52b157fbed62542a4049", "a1471d43cb75cc9699c392297ecf3693a5e5cf96", "64f3a81fff495ac336dccdd63136d451852eb1c9", "6bbb692392335a67f0187151a34ec5c16400a00b", "7129b305ce45f83127e928e8510da9fae0783905", "0baf1bef6ee3bcb0b385a4ac303dcf0b406c64f4", "aaca14d54be1d34833fd086229ab2f5aa8b70050", "3bc9d643f741343c6eef24363a20e2b483439149", "4534c15b4760cb29a0ce74fcd43297fe83f2f277", "99d473437ea95dddc983a197e96569505be757f2", "36bb67d8fba0c85f2495449a9926018827368df5", "3a86edce75e4d49dec49d799d30e41a0af877fbe", "336b4f3099b8f629adc20a69aba15257e53539f9", "8df62aad18d6de13331479666c3b5d6a32b0ba58", "4bf97ac7427196bff2b9c689b53f34bbe98d52ce", "3027396f89500a973e5cdf93c2aedb8d6a62eee6", "088536c44bcbc67165dc064ba4af0bc015d58a65", "11fe71a02b30a206ca0706391b33c45d3e9d8f37", "1cbbdf58133f763813b3a61b8faf2f5ab74464b7", "1901753d2664473a3758a1c4413421611d7f17bc", "67da5535394a9bfb4e5853d89063db505776db8d", "4f5ef5dfe854a9c9f34af44d306534c9a1606b15", "bea095bd20dc301747b7645ff3ee9de80d16c701", "847955ec5e2777f771e9ad757638b28671fc5f25", "424bc408f86bada47693a2fb45369cdfaf5fbdb4", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "2233d39a189ab2bdf8ad370e61bea79c5c3192dc", "7db303e5171f09dfd25065ba8a70a4d74e9ae345", "b386c93eec61b5af3c137e125cf1471f7131167a", "669c88fa482308d0336249d09f4b3b32edc36aa6", "2077579d62fc090d4ddf45f107ffae0468936165", "1b11d4b0b04e0eb061029b57e1a1c436193f13b1", "225603198cc415d363db8a8a2bd30b0df3c963b1", "bf710d87694f9c49ae16448dfcf7a42aad9b4fa0", "4c5d4902583313fea69033c6e528b95a42353da6", "b46e192c84945528f6029138fdb26a9629f2dc6c", "ec448ceb3e05b9222113366dace9fdd2a62322de", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "dc83b565728f78ad9c49a9b83138022de7a83d1c", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "163247e7ed8db43c9529d85c384d8843e22a136b", "42d1b52254873ecd0f36eb7342f95dbad9c50187", "9edfe7c6166d08eaf0b7dd865537e2c1c0ed082a", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "38835c6de31855989425eed69451969d8dfafe42", "7ed8dd92f4a174b630836700cf12d0adebd5c708", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "10bfd5aed1410b88c5c1b5212f450a1994fc5afe", "2b0cdd2452832a36e15a1c17d8fdf91dfad9352f", "0428e3b146a7849e7ad19143e09e0fa897fe2220" ], "paperAbstract": "Increasingly, smart Network Interface Cards (sNICs) are being used in data centers to offload networking functions (NFs) from host processors thereby making these processors available for tenant applications. Modern sNICs have fully programmable, energy-efficient multi-core processors on which many packet processing functions, including a full-blown programmable switch, can run. However, having multiple switch instances deployed across the host hypervisor and the attached sNICs makes controlling them difficult and data plane operations more complex.\n This paper proposes a generalized SDN-controlled NF offload architecture called UNO. It can transparently offload dynamically selected host processors' packet processing functions to sNICs by using multiple switches in the host while keeping the data centerwide network control and management planes unmodified. UNO exposes a single virtual control plane to the SDN controller and hides dynamic NF offload behind a unified virtual management plane. This enables UNO to make optimal use of host's and sNIC's combined packet processing capabilities with local optimization based on locally observed traffic patterns and resource consumption, and without central controller involvement. Experimental results based on a real UNO prototype in realistic scenarios show promising results: it can save processing worth up to 8 CPU cores, reduce power usage by up to 2x, and reduce the control plane overhead by more than 50%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132252", "http://pages.cs.wisc.edu/~akella/wisr-webpage/papers/p506-le.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6cfe9dc89d0fd4778ec9a42af1a39ed99f605211", "sources": [ "DBLP" ], "title": "UNO: uniflying host and smart NIC offload for flexible packet processing", "venue": "SoCC", "year": 2017 }, "6d4b0754f02e65a4928eeef40e329cce53c6ea0f": { "authors": [ { "ids": [ "1737486" ], "name": "Qian Zhang" }, { "ids": [ "1683402" ], "name": "Hua Wang" }, { "ids": [ "2372528" ], "name": "Fangjin Zhu" }, { "ids": [ "2032035" ], "name": "Shanwen Yi" }, { "ids": [ "37283093" ], "name": "Kang Feng" }, { "ids": [ "2477329" ], "name": "Linbo Zhai" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.21", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.21", "entities": [ "Algorithm", "OpenVMS", "Program optimization", "Simulation", "Virtual machine" ], "id": "6d4b0754f02e65a4928eeef40e329cce53c6ea0f", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "162-169", "journalVolume": "", "outCitations": [ "363bee9652c178ff2c00c80ca309222865183b98", "d8057d514036d51051af78476468fe350cb7488a", "0400b848e608ecd72f7fdb11321b80305359bb20", "400c502c5b75ea0b42e15bd8d22e62a0f6f9a6aa", "1644dd4b25709a00f78f7554a7ccd5a4a1fb0bfc", "5ea1a64924e4a48bf628abee74bb56e936c07a18", "21f1bf63b1ca0fcb43f70a99ddda8f115502b62f", "2f4cfcab9c2f5082fc13110cc26e3fa1bac439dd", "fdf71813a6f4a0377a80d4cd8899e987a77dde8f", "59ab46bfd59cb43876e701389f256b93430e6273", "04ab0c05680d496b99cd7f56125b2ce32a6584ec", "9c43e7d5e43c9d78c487f338f5616619e650a285", "da86cc415db0437686d14337e681c66d8ef1f72b", "6d66fbba26971c94e3a7881aca71b3f54cffebb8", "2d9e62ca99119615436b7d4c6aef76eabac1a4e8" ], "paperAbstract": "In cloud datacenters, energy-efficient Virtual Machine Placement (VMP) mechanism is needed to maximize energy efficiency. Existing virtual machine (VM) allocation strategies based on whether the VMs' resource demands are assumed to be static or dynamic. Apparently, the former fails to fully utilize resources while the latter, which is implemented on shorter timescales, is either complicated or inefficient. Moreover, most prior VMP algorithms place VMs one by one, which lacks an optimization space. To handle these problems, we predict Gaussian distribution patterns of VM demands and propose an ant-colony-system VM placement algorithm (GACO-VMP) which synchronously coordinates the VMs with complementary resource requirements on the same server. The Gaussian distribution pattern is derived from the VMs running the same job. This mechanism minimizes energy consumption, while guaranteeing high resource utilization and also balancing resource utilization across multiple resources. In addition, we design two new metrics, called cumulative utilization ratio(CUR) and resource balance distance (RBD), in order to measure the overall resource utilization level and the equilibrium of multi-dimensional resource utilization, respectively. Simulations based on Google Cluster real trace indicate that GACO-VMP can achieve remarkable performance gains over two existing strategies in energy efficiency, VM migrations, resource utilization and resource balance.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6d4b0754f02e65a4928eeef40e329cce53c6ea0f", "sources": [ "DBLP" ], "title": "Energy-Aware VM Placement with Periodical Dynamic Demands in Cloud Datacenters", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "6dbb14299c49b811ed22fc6dbecc51f5ce9a2733": { "authors": [ { "ids": [ "39413307" ], "name": "Sam Silvestro" }, { "ids": [ "2817973" ], "name": "Hongyu Liu" }, { "ids": [ "26427034" ], "name": "Corey Crosser" }, { "ids": [ "34472423" ], "name": "Zhiqiang Lin" }, { "ids": [ "1870340" ], "name": "Tongping Liu" } ], "doi": "10.1145/3133956.3133957", "doiUrl": "https://doi.org/10.1145/3133956.3133957", "entities": [ "Application security", "Countermeasure (computer)", "Heap spraying", "Linux", "Linux", "Memory management", "Memory protection", "Overhead (computing)" ], "id": "6dbb14299c49b811ed22fc6dbecc51f5ce9a2733", "inCitations": [], "journalName": "", "journalPages": "2389-2403", "journalVolume": "", "outCitations": [ "dd66604a91a98014eb08227526feaed6f208fc6a", "31f86ce173ec40025e4cbe8132b6de459a2c8e12", "14d3104c58ad60e02c3ab9d9433093fe5f21d00c", "5f8bbc28027342b16df77fa1b9a1efe4628d41dc", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "dbce3d345f3c43c51d8cc71c17d073e716a4d07c", "0ff9371fd3888576a66f44e956f9c10316d12219", "2d412673804cd4934aa93c579b82d609f8a36cae", "73de9120c975c6debd712af0136291ec1d4b2fde", "4dbc47ef441a4c6da733e79fc30c97327d7ad05d", "7a8424572e9545c112884b9961c8b6b2613a5b5e", "e23298e18aa92ac43fa941d0f5eacb339905b685", "28ab79d604962031585fd149941a5c0594e3d0ed", "2ce7803031fcba0fa5a397e85039e762cee59f83", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "0261afd40eee66cea4ea682fab322a439a28f37d", "05a8448e87b72e661890940ff3eda53070ed89e6", "9edcfee579bbebfa31f789cfb35ead153af2485b", "4766d59a750d33281ef96c02763a71cdbe946a52", "03e93625d185c0ac144c97fdf269b5ae5f38351e", "9a8bf1a6e4e71f59620a53b0637c38a416966c4b", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "1898169191c2030e1c1e442afbb66610281f328f", "71d584f310f11216d9e5771af58930c5a8f1dd47", "ce6fe37c3d5f9ebbdf220413e15ca8ac9e2d5f62", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "3829df26d4ce686251b9b5030893febd75162539" ], "paperAbstract": "In spite of years of improvements to software security, heap-related attacks still remain a severe threat. One reason is that many existing memory allocators fall short in a variety of aspects. For instance, performance-oriented allocators are designed with very limited countermeasures against attacks, but secure allocators generally suffer from significant performance overhead, e.g., running up to 10x slower. This paper, therefore, introduces FreeGuard, a secure memory allocator that prevents or reduces a wide range of heap-related security attacks, such as heap overflows, heap over-reads, use-after-frees, as well as double and invalid frees. FreeGuard has similar performance to the default Linux allocator, with less than 2% overhead on average, but provides significant improvement to security guarantees.", "pdfUrls": [ "http://www.cs.utsa.edu/~tongpingliu/pubs/freeguard-ccs17.pdf", "https://liuhycs.github.io/papers/freeguard-ccs2017.pdf", "http://www.utdallas.edu/~zxl111930/file/CCS17c.pdf", "http://doi.acm.org/10.1145/3133956.3133957", "http://arxiv.org/abs/1709.02746", "https://arxiv.org/pdf/1709.02746v2.pdf", "https://arxiv.org/pdf/1709.02746v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6dbb14299c49b811ed22fc6dbecc51f5ce9a2733", "sources": [ "DBLP" ], "title": "FreeGuard: A Faster Secure Heap Allocator", "venue": "CCS", "year": 2017 }, "6dc0b00e15a8ee5168bae39dabc300002fa6173e": { "authors": [ { "ids": [ "3416132" ], "name": "Ioannis Demertzis" }, { "ids": [ "1790969" ], "name": "Charalampos Papamanthou" } ], "doi": "10.1145/3035918.3064057", "doiUrl": "https://doi.org/10.1145/3035918.3064057", "entities": [ "Big data", "Encryption", "End-to-end principle", "In-memory database", "Locality of reference", "Outsourcing", "Parallel computing", "PowerPC 600", "Server (computing)", "Spectral leakage" ], "id": "6dc0b00e15a8ee5168bae39dabc300002fa6173e", "inCitations": [ "34f32b29277ef6ecc0d49b75460be0dd3645605c", "22e8678bc72e9d75a32a42ef83a94836cf08eb65", "0bd8f0ab2ade3cbb560dca22c2f5dfd203f4cfd3", "f4ed6e5506b62faa5df8bb1407921b9d04052cdf", "812510b7b500eebe7a9a74eaeb331a0160434b2c" ], "journalName": "", "journalPages": "1053-1067", "journalVolume": "", "outCitations": [ "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "d878fb5a7d1ea14649f590de5ebb806d1414f0b6", "1c730d368ee9b381907a95bc3638cffbc0968bcd", "25c3ee2e736c58eddc7182688e19fa7b65bef83a", "9ea1bbb1d3302aa9504e71ca42e1c19c09e310e0", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "0df6726c1d83b1e0d6c6580a1e2594519590e38e", "023364e58d680cabb5bb7b1927e6ee15da3276b2", "415f425606c812f26087d39e50c27f0b7c6bbc57", "4af77753e00973f339fd93a27e4131047018e79c", "0471807906db31b7f477caa31a255a651e1e26fd", "1cb9aa0116af7d9e61ffabfa951153e9f4e43779", "56d320acfad7f6e8060acb77191c179844fab3cb", "32dc88258734f6c9d8fd1d0151d0c763ae2df75a", "ad0c881078b2cd3d69b5cc2ef63bcdb72070298e", "20b63210954f7c5a70664f301dcd7196856ccfa7", "1ab81ae077d6944fbff279a7a8a38df48f75eadf", "05c75c61c64d0d38ac00c9be0649ac24dd0d72a3", "a0835c336ccc0e2f6f7cde1ba9c214996a70f1f3", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "3864cfb41db27452cefe3b1f64f05623690201ab", "1c7e79160ab095c2de9d83da8ca3311b3bebb366", "a92d7dee389e0386ae855a6a0a76cde58bb30f63", "6f6e16de3b99c67e9fcdf7a98f283880159ba590", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "6b41ef695bcc8c8816060002851e9be9d385a16b", "02beed2e1350a0d0b01bb9622081cb93a965a716", "4beef78e9b21611a59237b63d512014e47f32d5e" ], "paperAbstract": "Searchable encryption (SE) allows a client to outsource a dataset to an untrusted server while enabling the server to answer keyword queries in a private manner. SE can be used as a building block to support more expressive private queries such as range/point and boolean queries, while providing formal security guarantees. To scale SE to big data using external memory, new schemes with small locality have been proposed, where locality is defined as the number of non-continuous reads that the server makes for each query. Previous space-efficient SE schemes achieve optimal locality by increasing the read efficiency-the number of additional memory locations (false positives) that the server reads per result item. This can hurt practical performance.\n In this work, we design, formally prove secure, and evaluate the first SE scheme with tunable locality and linear space. Our first scheme has optimal locality and outperforms existing approaches (that have a slightly different leakage profile) by up to 2.5 orders of magnitude in terms of read efficiency, for all practical database sizes. Another version of our construction with the same leakage as previous works can be tuned to have bounded locality, optimal read efficiency and up to 60x more efficient end-to-end search time. We demonstrate that our schemes work fast in in-memory as well, leading to search time savings of up to 1 order of magnitude when compared to the most practical in-memory SE schemes. Finally, our construction can be tuned to achieve trade-offs between space, read efficiency, locality, parallelism and communication overhead.", "pdfUrls": [ "http://idemertzis.com/Papers/sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3064057" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6dc0b00e15a8ee5168bae39dabc300002fa6173e", "sources": [ "DBLP" ], "title": "Fast Searchable Encryption With Tunable Locality", "venue": "SIGMOD Conference", "year": 2017 }, "6ded4db9b287b0f988e536008ede70ff731b09d6": { "authors": [ { "ids": [ "2435025" ], "name": "Alin Tomescu" }, { "ids": [ "1695217" ], "name": "Srinivas Devadas" } ], "doi": "10.1109/SP.2017.19", "doiUrl": "https://doi.org/10.1109/SP.2017.19", "entities": [ "Bitcoin", "Double-spending", "Java", "Megabyte", "Mobile phone", "Public-key cryptography", "Server (computing)", "Thin client", "Tor Messenger" ], "id": "6ded4db9b287b0f988e536008ede70ff731b09d6", "inCitations": [ "3dcfbce9ee59859b0ba7cf871ccca75d08fd7c6c", "0308e687336f7e17c78032b90d69f69f37e0d86a", "56ee02f6ca43075036c8d783b76b7ffd0105727f", "02dd8348fbdf459c4ca05245ea6498e9f04bd03f", "7e5a109251149194b61a963e00961d08d97326eb", "1969453d7960eaca8cfbd642877925f5f5028ce5" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "393-409", "journalVolume": "", "outCitations": [ "45f6f2425f3e21abea774fa6261eb6c457efd140", "07d013ed623ee73e010cfd251b474f539a62aa7c", "01f187c3f0390123e70e01f824101bf771e76b8f", "2d89cd8f47434726bfb3649299d3900d400d7d9f", "4c65005c8822c3117bd3c3746e3a9b9e17386328", "08f8fbc075c0c0822115315bcffa54964b0599a7", "005b52bd01af724cc1b8f22e3b33545417a3d5fb", "1f42fdecd70a7d72f0f108e80511320f7204316c", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "35ff80b9a8ad9b75e2a1509d71de892c02e55a53", "33853565b4dcad38b9b79091a48d3f40409f06d7", "b807d8124c67a9c6985acf824e4af69a3be28c28", "4c9016ff9accd981c248d26b446923eb668e6b9c", "0d5f7a1825bae713cebd66d121d5b01e31d8adab", "79f9267b878445e2239ec639103d30e69e7b5e9f", "632dad60ab2cb1a981144560fb31916a0b5422e1", "3be07a6355a6fa855d6ec3295cf44ce24f061cc2", "f65ee3a9f171da68b57039a5d5f2f1ad70798488", "35516916cd8840566acc05d0226f711bee1b563b", "ac1a918fc933b767d34574ec2cc6a33b4223dc1a", "b722fa7a5c993240f9adf7752fa99b6dc816a49d", "1ee169e1161fbaaea334bd99759015cebe506764", "26e5ba037d25d9d8e99efa5101fe14bbd14607b2", "0cdc035cd66f6d8be594e0e41d4023eacbd58cbb", "2c71d19590b59007f49e9ce04c6aab529fc4b8fd", "2c6d93d96725e44669afe1e1cbec9b5e9910d883", "35fe18606529d82ce3fc90961dd6813c92713b3c", "0d07495964f03dd49bf978c64874fdb3e8dbcb01", "1ccef9fa75e519daa10618fe9f2d7a46a34a7040", "7bed3507a73099b5bb55be17fe3d436c82e39550", "48b029ced7f12780d57c2577def98d65381ff551", "161c24b98ce3af2c0f8a5e96d5055a367b81801e", "e1ecc225690f79d1d51202d6772d3c2e0d0aea2a", "12d854f326b43232d906eb323db5d282786acb9d", "0706b93482fa8aaa4ce534f7cf6971b51c10cd76", "00a34420e9e048689caa44d515efeb5d19e1a6ca", "35629fe630cb4592dd6c69b585bd6aefeeea0fdd", "a1ca1d9c4cf67418db0134803b4fddf897e9f257", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "017aba316f6d8447a4e045d8ddd417456629031e", "9fad725d1cbb33a3a5dbe87878a63e3476f08c36", "08e9542de3cbfe791bf86a0dee6ba5e83bc29ea7", "198ad7cafc43c1cf0271e4daafdc4638775f8b49", "3fad56eb0379f9684af608bd6c9ad4de706b4cad", "562573a079b28e8d6435f082e060e959ab25dbd3", "6570cb9f817714b1380a9717475b75e77e9f2726", "3d049eb62dd331b066df3cd455287ec487a745bb", "744159a1774cafade8c6726e33dd8741b7d27dd0", "1113664b038d0390b061afb80ee214b09a207fc9" ], "paperAbstract": "We present Catena, an efficiently-verifiable Bitcoinwitnessing scheme. Catena enables any number of thin clients, such as mobile phones, to efficiently agree on a log of application-specific statements managed by an adversarial server. Catenaimplements a log as an OP_RETURN transaction chain andprevents forks in the log by leveraging Bitcoin's security againstdouble spends. Specifically, if a log server wants to equivocate ithas to double spend a Bitcoin transaction output. Thus, Catenalogs are as hard to fork as the Bitcoin blockchain: an adversarywithout a large fraction of the network's computational powercannot fork Bitcoin and thus cannot fork a Catena log either. However, different from previous Bitcoin-based work, Catenadecreases the bandwidth requirements of log auditors from 90GB to only tens of megabytes. More precisely, our clients onlyneed to download all Bitcoin block headers (currently less than35 MB) and a small, 600-byte proof for each statement in a block. We implement Catena in Java using the bitcoinj library and use itto extend CONIKS, a recent key transparency scheme, to witnessits public-key directory in the Bitcoin blockchain where it can beefficiently verified by auditors. We show that Catena can securemany systems today, such as public-key directories, Tor directoryservers and software transparency schemes.", "pdfUrls": [ "http://people.csail.mit.edu/alinush/files/catena-sp2017-slides.pdf", "https://doi.org/10.1109/SP.2017.19", "http://people.csail.mit.edu/alinush/papers/catena-sp2017.pdf", "http://people.csail.mit.edu/devadas/pubs/catena.pdf", "https://eprint.iacr.org/2016/1062.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6ded4db9b287b0f988e536008ede70ff731b09d6", "sources": [ "DBLP" ], "title": "Catena: Efficient Non-equivocation via Bitcoin", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "6def497b863d6c48e49302580165b086c9d3762a": { "authors": [ { "ids": [ "1875835" ], "name": "Fabien Hermenier" }, { "ids": [ "2374288" ], "name": "Giovanni Giuliani" }, { "ids": [ "30658206" ], "name": "Andre Milani" }, { "ids": [ "3037156" ], "name": "Sophie Demassey" } ], "doi": "10.1007/978-3-319-64203-1_2", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_2", "entities": [], "id": "6def497b863d6c48e49302580165b086c9d3762a", "inCitations": [], "journalName": "", "journalPages": "23-35", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_2" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6def497b863d6c48e49302580165b086c9d3762a", "sources": [ "DBLP" ], "title": "Scaling Energy Adaptive Applications for Sustainable Profitability", "venue": "Euro-Par", "year": 2017 }, "6e569186a67e8b4a6bed42796430b4be6552045b": { "authors": [ { "ids": [ "14834749" ], "name": "Daniel Anderson" }, { "ids": [ "38692447" ], "name": "Pryce Bevan" }, { "ids": [ "1785256" ], "name": "Kevin J. Lang" }, { "ids": [ "2941680" ], "name": "Edo Liberty" }, { "ids": [ "39384151" ], "name": "Lee Rhodes" }, { "ids": [ "32523323" ], "name": "Justin Thaler" } ], "doi": "10.1145/3131365.3131407", "doiUrl": "https://doi.org/10.1145/3131365.3131407", "entities": [ "Algorithm", "Amortized analysis", "Common Building Block", "David Gries", "Experiment", "Motor Industry Software Reliability Association", "Open-source software", "Stream (computing)", "Time complexity" ], "id": "6e569186a67e8b4a6bed42796430b4be6552045b", "inCitations": [], "journalName": "", "journalPages": "268-282", "journalVolume": "", "outCitations": [ "bdd9dc7e771b3f940f833f58bdd6c217690f17f0", "08cac9d5dd079a020152830d2e2f8bde931f8293", "e4c5a8575a2576c4b9a6df65af6b7d5e657373ac", "2220feec76a17e509a58abf8c742ea9b7866a99e", "2be10a531f0cc566fea842e8f096dbcbc2f0a1d3", "174177d1631fa92a746d514ba0210382d231e583", "36797a5cad36a29fbc6540590130f99857f3b080", "1e509cf3720a7494005871d6dbeccc79348c81c9", "0dd16e993f715a0c8b8d992d5c6ec1fd5d54eda0", "1ad4a22b9aee64aca6a2fa329749f065f4b7ee6d", "7a278ee0578f194700cadc3811cdda4ec751f88a", "3946e3a4a4f5b42d55859153e98d3e83151303bb", "68805437a3d48d650ba37694c664004ae6b78076", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "5d458a1bff91aa598fcc47711e5cfd7a6dfa559d", "59884ad55dab150122111636ae9d76e63a483935", "3b6291825ce69692101fc412bb70bdfe2b6af200", "34ce9b986aa9ce54d569f70907566784d3cac9d5", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "090fedae1eb71295d5505431f4a70485f20ef94f", "4465762fac009c8620e5d2ad67e8ffab4b7dc2f5" ], "paperAbstract": "Estimating frequencies of items over data streams is a common building block in streaming data measurement and analysis. Misra and Gries introduced their seminal algorithm for the problem in 1982, and the problem has since been revisited many times due its practicality and applicability. We describe a highly optimized version of Misra and Gries' algorithm that is suitable for deployment in industrial settings. Our code is made public via an open source library called Data Sketches that is already used by several companies and production systems.\n Our algorithm improves on two theoretical and practical aspects of prior work. First, it handles weighted updates in amortized constant time, a common requirement in practice. Second, it uses a simple and fast method for merging summaries that asymptotically improves on prior work even for unweighted streams. We describe experiments confirming that our algorithms are more efficient than prior proposals.", "pdfUrls": [ "http://www.csd.uoc.gr/~hy562/papers/imc17-paper255.pdf", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final255.pdf", "https://arxiv.org/pdf/1705.07001v2.pdf", "http://doi.acm.org/10.1145/3131365.3131407", "http://arxiv.org/abs/1705.07001", "https://arxiv.org/pdf/1705.07001v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e569186a67e8b4a6bed42796430b4be6552045b", "sources": [ "DBLP" ], "title": "A high-performance algorithm for identifying frequent items in data streams", "venue": "IMC", "year": 2017 }, "6e5c9286657818ba0beac6a9cad4cbbaa814ce2b": { "authors": [ { "ids": [ "24341752" ], "name": "Kamesh Arumugam" }, { "ids": [ "1709179" ], "name": "Desh Ranjan" }, { "ids": [ "1718354" ], "name": "Mohammad Zubair" }, { "ids": [ "11168509" ], "name": "Balsa Terzic" }, { "ids": [ "2993624" ], "name": "Alexander Godunov" }, { "ids": [ "27425015" ], "name": "Tunazzina Islam" } ], "doi": "10.1109/ICPP.2017.55", "doiUrl": "https://doi.org/10.1109/ICPP.2017.55", "entities": [ "Algorithm", "Computation", "Control flow", "Data access", "Double-precision floating-point format", "FLOPS", "Fastest", "Graphics processing unit", "Machine learning", "Nvidia Tesla", "Parallel computing", "Simulation", "Speedup", "Supervised learning" ], "id": "6e5c9286657818ba0beac6a9cad4cbbaa814ce2b", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "462-471", "journalVolume": "", "outCitations": [], "paperAbstract": "Parallel computing architectures like GPUs have traditionally been used to accelerate applications with dense and highly-structured workloads; however, many important applications in science and engineering are irregular and dynamic in nature, making their effective parallel implementation a daunting task. Numerical simulation of charged particle beam dynamics is one such application where the distribution of work and data in the accurate computation of collective effects at each time step is irregular and exhibits control-flow and memory access patterns that are not readily amenable to GPU's architecture. Algorithms with these properties tend to present both significant branch and memory divergence on GPUs which leads to severe performance bottlenecks.We present a novel cache-aware algorithm that uses machine learning to address this problem. The algorithm presented here uses supervised learning to adaptively model and track irregular access patterns in the computation of collective effects at each time step of the simulation to anticipate the future control-flow and data access patterns. Access pattern forecast are then used to formulate runtime decisions that minimize branch and memory divergence on GPUs, thereby improving the performance of collective effects computation at a future time step based on the observations from earlier time steps. Experimental results on NVIDIA Tesla K40 GPU shows that our approach is effective in maximizing data reuse, ensuring workload balance among parallel threads, and in minimizing both branch and memory divergence. Further, the parallel implementation delivers up to 485 Gflops of double precision performance, which translates to a speedup of up to 2.5X compared to the fastest known GPU implementation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.55" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e5c9286657818ba0beac6a9cad4cbbaa814ce2b", "sources": [ "DBLP" ], "title": "A Machine Learning Approach for Efficient Parallel Simulation of Beam Dynamics on GPUs", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "6e63485bc4754aed5be30a8dc6e73e6f549d4591": { "authors": [ { "ids": [ "3415925" ], "name": "Panagiotis Kintis" }, { "ids": [ "3422391" ], "name": "Najmeh Miramirkhani" }, { "ids": [ "39822150" ], "name": "Charles Lever" }, { "ids": [ "3325283" ], "name": "Yizheng Chen" }, { "ids": [ "26989662" ], "name": "Rosa Romero G\u00f3mez" }, { "ids": [ "2903336" ], "name": "Nikolaos Pitropakis" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" }, { "ids": [ "2805955" ], "name": "Manos Antonakakis" } ], "doi": "10.1145/3133956.3134002", "doiUrl": "https://doi.org/10.1145/3133956.3134002", "entities": [ "Phishing", "Social engineering (security)" ], "id": "6e63485bc4754aed5be30a8dc6e73e6f549d4591", "inCitations": [ "fcf9c18a9c2c6460adda0b91209327df94184fa5", "0df5c874f3661a96997d884f555bfe233b0b0d3f" ], "journalName": "", "journalPages": "569-586", "journalVolume": "", "outCitations": [ "6c5d03568e012a95c5a663309c8c21ff1e07e53f", "ba9af0bf228cedfad61daa481a71ed433076ab8d", "124a9b51d1767d9d077602e7075ce3e1393ed400", "83e8d61a3d767e3b6c6b8dce63c35bc709fddc49", "6e9f6466a125587b4eeeb3845a5d8afe8b1e902c", "0a964c5ac7e19cbdc820fd4ee101a5263385733d", "876dae4ee0323da2ae685160e9192cdf605e7299", "83552961f24078d1603813cc2a3c445945c9dc61", "0796bb6c803e4256d7bdc0885c6a26d058da3319", "5a053b3784b4add490ec27257a836e1e2f1d06f8", "325e00509090fafddcf2e53d5bdb81ccd1c5637a", "a19aa12ef0726bff27838faf907d9c7e059ef2c3", "11bd3633e2647a205b78f71a7a583f81cbe33b39", "5c0ad4b6e036756e421a08d98202497179fe75d0", "90bbddf70db6cdce63dbcc7b11fd871d230ac54f", "519a022f6103a68331402f499a9bc9447ef70995", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "95b1bfb4c47b213355a236894d672cb4cc5138f5", "532a45960f622a64a356f543b1cc032d78eb77d5", "0cd78491f578f93ccfcac8957c451850e33afb58", "73d25c29ae231c8e6a3acd283b896ec7225caccd", "649468352e70532e80f68d362bf85fae8277bf22", "2cfbb7b89a5e220b21bbf64161dc880c1b644017", "3032182c47b75d9c1d16877815dab8f8637631a2", "22a78f31395e79cb6c99c3cedd248ecd6568b7f7", "5d6ed1a005c90b1aa7d3fb883be38193dcecb04f", "0bb1bb38263368784df02ca4546fba3ea12c7c0e", "d98a304e44391fbdbc8c7c57248a652b04c14d00", "8714e68bb306f89b3fbce3307833405b6a632487" ], "paperAbstract": "Domain squatting is a common adversarial practice where attackers register domain names that are purposefully similar to popular domains. In this work, we study a specific type of domain squatting called \"combosquatting,\" in which attackers register domains that combine a popular trademark with one or more phrases (e.g., betterfacebook[.]com, youtube-live[.]com). We perform the first large-scale, empirical study of combosquatting by analyzing more than 468 billion DNS records - collected from passive and active DNS data sources over almost six years. We find that almost 60% of abusive combosquatting domains live for more than 1,000 days, and even worse, we observe increased activity associated with combosquatting year over year. Moreover, we show that combosquatting is used to perform a spectrum of different types of abuse including phishing, social engineering, affiliate abuse, trademark abuse, and even advanced persistent threats. Our results suggest that combosquatting is a real problem that requires increased scrutiny by the security community.", "pdfUrls": [ "http://arxiv.org/abs/1708.08519", "https://www.securitee.org/files/combosquatting_ccs2017.pdf", "http://doi.acm.org/10.1145/3133956.3134002", "https://arxiv.org/pdf/1708.08519v1.pdf", "http://iisp.gatech.edu/sites/default/files/images/hiding_in_plain_sight-_a_longitudinal_study_of_combosquatting_abuse.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e63485bc4754aed5be30a8dc6e73e6f549d4591", "sources": [ "DBLP" ], "title": "Hiding in Plain Sight: A Longitudinal Study of Combosquatting Abuse", "venue": "CCS", "year": 2017 }, "6e6d5988a347502d12c803b7e8fe15394571c8a5": { "authors": [ { "ids": [ "40534473" ], "name": "M. Kashif Ilyas" }, { "ids": [ "1835480" ], "name": "Alexandru Calotoiu" }, { "ids": [ "1684034" ], "name": "Felix Wolf" } ], "doi": "10.1007/978-3-319-64203-1_3", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_3", "entities": [], "id": "6e6d5988a347502d12c803b7e8fe15394571c8a5", "inCitations": [], "journalName": "", "journalPages": "36-48", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_3" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e6d5988a347502d12c803b7e8fe15394571c8a5", "sources": [ "DBLP" ], "title": "Off-Road Performance Modeling - How to Deal with Segmented Data", "venue": "Euro-Par", "year": 2017 }, "6e7640c890edf815eb8a22e5f6b6d625a12676cb": { "authors": [ { "ids": [ "22661353" ], "name": "Arseniy Zaostrovnykh" }, { "ids": [ "8030096" ], "name": "Solal Pirelli" }, { "ids": [ "39995135" ], "name": "Luis Pedrosa" }, { "ids": [ "1702632" ], "name": "Katerina J. Argyraki" }, { "ids": [ "2465036" ], "name": "George Candea" } ], "doi": "10.1145/3098822.3098833", "doiUrl": "https://doi.org/10.1145/3098822.3098833", "entities": [ "Automated proof checking", "Correctness (computer science)", "Formal verification", "Network address", "Network address translation", "Reachability", "Scalability", "Separation logic", "Software bug", "State (computer science)", "Symbolic execution", "Toolchain", "Transfer function", "Verification and validation" ], "id": "6e7640c890edf815eb8a22e5f6b6d625a12676cb", "inCitations": [ "22f136c1a906fb12d395a03b59f6be2e34d61cc3", "cd5fdc7ea21293acb52a7af34e01217d54b1c39e", "304ba54357f0ebd37d35d33fe8b3703c979b514e" ], "journalName": "", "journalPages": "141-154", "journalVolume": "", "outCitations": [ "0dfbdd26cda3df93e5916a04d6a279ceeb3c238e", "16a455aeacd14529bee92b0c197619fa2d173151", "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "4866f9428056529a77889569d24397489b77c502", "07595a3a571e09ccaa7727a4659efcb9d9a4f135", "36222f8eb2ccf21ca345e15186cea64506581543", "4111e1e79347ec19b715d043be46a83f56581742", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "29eb6849b0eb147673f51d6bc8a501131e4e7c52", "1b0c3ef8e2eabf218549e9926b51a51044b7639d", "38b7e9721cc3e326580465deaf0f0028b92afe6a", "04c2388e36d269cd139f25e85b2d4a44f3c8c34c", "7f5fcc6fc5b1b14894292aacbfa9ee1f85d243a2", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "4c8ad20e8d682d9956dad6a68d2e2a022773a959", "1d8e5b1674f94dceea8af0b4a641d2f269956c48", "15751a87bf3cc204d3ed35325db4d55cd9c7b169", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "0f2a17851a88571a5313c75fdb0d5d28d4f6fa02", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "cfb5150c52b5699c46c20f0f665ecb7a405653d8", "1deae7e8531cd6870e741ebe63e471742c2d4658", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "07ca726af9c235573654b85e8d478bd7303aa62f", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "36f396b52f93fa52742ce5052a40c1c90ea726e3", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "b41b4305fc24bca16f7a3c9f80b0e867ca033438", "79b3cdbca83235716e3e790e4de7c30f1a50d734", "af73e3d3cd348fcb9288bbbbbac65780c7a313f7", "04b319357d6bab89ec9575f4b044d7609aa4296a", "42fbdb54cdb37b005a66eb50e704219798e927d7", "3be1076f8abd20bc41bdb054fd46b495e3109611", "006194c81c756d4e09b229f2591a5949d471598b", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "0b5b42425deb371d8dc60ac9b090c7232702370a", "168d5cbbc2251d1afd71d9c7f29dfa2a5d597b58", "0719b9670c8580db76547497df39caabdc20fc32", "219c95e028a1a8e2baebdecb8b998e12a03bc33b", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "0f46341f6e6a20b1446757c75e29b27561955447" ], "paperAbstract": "We present a Network Address Translator (NAT) written in C and proven to be semantically correct according to RFC 3022, as well as crash-free and memory-safe. There exists a lot of recent work on network verification, but it mostly assumes models of network functions and proves properties specific to network configuration, such as reachability and absence of loops. Our proof applies directly to the C code of a network function, and it demonstrates the absence of implementation bugs. Prior work argued that this is not feasible (i.e., that verifying a real, stateful network function written in C does not scale) but we demonstrate otherwise: NAT is one of the most popular network functions and maintains per-flow state that needs to be properly updated and expired, which is a typical source of verification challenges. We tackle the scalability challenge with a new combination of symbolic execution and proof checking using separation logic; this combination matches well the typical structure of a network function. We then demonstrate that formally proven correctness in this case does not come at the cost of performance. The NAT code, proof toolchain, and proofs are available at [58].", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098833", "https://vignat.github.io/vignat-paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e7640c890edf815eb8a22e5f6b6d625a12676cb", "sources": [ "DBLP" ], "title": "A Formally Verified NAT", "venue": "SIGCOMM", "year": 2017 }, "6e7cd7b318cca957fec0c8056beebb982b5b15ee": { "authors": [ { "ids": [ "1717462" ], "name": "Guy E. Blelloch" }, { "ids": [ "1974678" ], "name": "Phillip B. Gibbons" }, { "ids": [ "2492972" ], "name": "Harsha Vardhan Simhadri" } ], "doi": "10.1109/HiPC.2017.00023", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00023", "entities": [ "Algorithm", "Asymptotically optimal algorithm", "Cache (computing)", "Central processing unit", "Load balancing (computing)", "Memory footprint", "Memory management", "Parallel computing", "Scheduling (computing)", "Static variable", "Time complexity" ], "id": "6e7cd7b318cca957fec0c8056beebb982b5b15ee", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "124-133", "journalVolume": "", "outCitations": [ "52f00ddc4c6537ca981947e7865cab4a4d4c43ec", "69e56e5f52e416e6998b255235c5e92081dadda0", "8431e9dc7004d7aaa2ec2ee89c90a28b2d658275", "3d1e89c91510bfc5e18fbe92b8ed6a8e0b52b436", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "32031a0e1df68e86a4be76a82b402fbbde2a572f", "19c8c86f8b04c19d43d5fa7ee41a287543c4ab9a", "03880f1d3faedb37aa51deab3b70a98b939dba28", "467f4406b5c42921e56bbf92524d8ebc8f94c997", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "85f9ce622c0b84e9801489c0a7f25de3a95ef993", "80fcacb46c2c5d986a319c8a9b80b0a0ccbb6ce8", "3eae0271717f6b4d65024abf04e5d98aef41d748", "7bb8469f9461ef1794f7110ea8762312a120f065", "02d0a24f01c02035f0e7ec890094b546e6482e56", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "0ecc0caadf47e0eeefdfde624c539ae44f6f5212", "071686697917fd56ae8ace0c4d6bfcf3bef5700a", "0e19b564e8e047f0bff5bc3c183f53fcebb0af87", "2042b469be68653afcb2b7b38490c16369b4501a", "615b4206a09b677e64c96d257dfe4d65719206c6", "008b490697d36e43dc2df656efff524bedcf076f", "1dff33cb24cf30be232d02bc48ebdf200480d2f3" ], "paperAbstract": "Thread schedulers are designed to dynamically map parallel programs to processors to optimize performance metrics including memory footprint, number of cache misses at each cache level, and load balance, so as to minimize the total running time of the program. Programs with dynamic memory allocation pose particular challenges for thread schedulers, and indeed prior schedulers that are provably cache- and time-efficient on multi-level cache hierarchies require static memory allocation. Not only do many thread schedulers fail to reuse memory effectively, but there is often an inherent trade-off between parallelism and memory use in algorithms. In this paper, we present the first runtime thread scheduler for multi-level cache hierarchies, called the space-bounded recursive-PDF scheduler, that is provably space-, cache-, and time-efficient for parallel programs that dynamically allocate memory. Our bounds hold for nested parallel programs with good regularity as measured by the effective cache complexity \u2014 a program-centric metric. The cache and time bounds are asymptotically optimal, while the space bound is asymptotically optimal for highly parallel and regular programs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00023", "http://harsha-simhadri.org/pubs/hipc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e7cd7b318cca957fec0c8056beebb982b5b15ee", "sources": [ "DBLP" ], "title": "Provably Efficient Scheduling of Dynamically Allocating Programs on Parallel Cache Hierarchies", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "6e82d815f9ea3e0a09ab96221843d0edd04ce499": { "authors": [ { "ids": [ "2754784" ], "name": "Sergey Hardock" }, { "ids": [ "40145493" ], "name": "Ilia Petrov" }, { "ids": [ "2252634" ], "name": "Robert Gottstein" }, { "ids": [ "1743524" ], "name": "Alejandro P. Buchmann" } ], "doi": "10.1145/3035918.3035958", "doiUrl": "https://doi.org/10.1145/3035918.3035958", "entities": [ "Adobe Flash", "Adobe Flash Player", "Byte", "Delta encoding", "Emulator", "Flash memory", "Flash memory emulator", "High- and low-level", "IBM Tivoli Storage Productivity Center", "In-place algorithm", "Multi-level cell", "Online transaction processing", "Throughput" ], "id": "6e82d815f9ea3e0a09ab96221843d0edd04ce499", "inCitations": [ "d8552f846701a2758ff2bdd8112370ddd121badc" ], "journalName": "", "journalPages": "1571-1586", "journalVolume": "", "outCitations": [ "7ae91ea70c601145a9c977b8b0419f6ba4c42900", "d4c9dd82ff88ad4a1f26fbe9424e2a81559d417b", "2525c025f11aec60cff428271ca851381b92008f", "0997037e940df06ed7a6d19f7501579aab01e829", "ca6c637d2ef7d0ff5a2d77139b07eb7ee7e7bbdf", "437df9d3c0f294dadddae07ad6fa42e31940e143", "a08fde3d9b36c7e570598010d2c3452b64946097", "5a85a3e51019c7d28136efd5ad17697a61ce200d", "29c8572f3bb27e4dc2f1c05a51e2b8cdaa4e2b15", "ddc3e4501691c41bda5d927628f5f4abb2cfeb7f" ], "paperAbstract": "Under update intensive workloads (TPC, LinkBench) small updates dominate the write behavior, e.g. 70% of all updates change less than 10 bytes across all TPC OLTP workloads. These are typically performed as in-place updates and result in random writes in page-granularity, causing major write-overhead on Flash storage, a write amplification of several hundred times and lower device longevity.\n In this paper we propose an approach that transforms those small in-place updates into small update deltas that are appended to the original page. We utilize the commonly ignored fact that modern Flash memories (SLC, MLC, 3D NAND) can handle appends to already programmed physical pages by using various low-level techniques such as ISPP to avoid expensive erases and page migrations. Furthermore, we extend the traditional NSM page-layout with a delta-record area that can absorb those small updates. We propose a scheme to control the write behavior as well as the space allocation and sizing of database pages.\n The proposed approach has been implemented under Shore- MT and evaluated on real Flash hardware (OpenSSD) and a Flash emulator. Compared to In-Page Logging [21] it performs up to 62% less reads and writes and up to 74% less erases on a range of workloads. The experimental evaluation indicates: (i) significant reduction of erase operations resulting in twice the longevity of Flash devices under update-intensive workloads; (ii) 15%-60% lower read/write I/O latencies; (iii) up to 45% higher transactional throughput; (iv) 2x to 3x reduction in overall write amplification.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035958" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e82d815f9ea3e0a09ab96221843d0edd04ce499", "sources": [ "DBLP" ], "title": "From In-Place Updates to In-Place Appends: Revisiting Out-of-Place Updates on Flash", "venue": "SIGMOD Conference", "year": 2017 }, "6e918f3d2e7ea08d7829af2c85b4a98db570d5e6": { "authors": [ { "ids": [ "34862645" ], "name": "Jon Calhoun" }, { "ids": [ "1699887" ], "name": "Marc Snir" }, { "ids": [ "2404759" ], "name": "Luke N. Olson" }, { "ids": [ "1703559" ], "name": "William Gropp" } ], "doi": "10.1145/3078597.3078617", "doiUrl": "https://doi.org/10.1145/3078597.3078617", "entities": [ "Compiler", "Computation", "Jacobi method", "Optimizing compiler", "Propagation of uncertainty", "Smart Data Compression", "Verification and validation" ], "id": "6e918f3d2e7ea08d7829af2c85b4a98db570d5e6", "inCitations": [], "journalName": "", "journalPages": "131-142", "journalVolume": "", "outCitations": [ "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "202aa40aa565854e431254a8478a8f3516c5ae05", "2640471efddd30a2855a2a4d76fde3459d36cdf6", "14e5bbf94dba58ead368cceab1541cff7cbb0170", "674b9ffeb40cd64063bd1a0c69370f83b802ed76", "42452be4c840abd3a4a0fa49c4b8d4aeeb3f2f6e", "0e53de74819527fe602c20e761fc8a7533a6e173", "8343d5ca1ed12d05bda5113f17a061d1b3098540", "8dcbf85cf07327ec5b61d0606d1911841dfede2b", "d720e48a3149e00018431c12fafad922b2eaf7ac", "7edb887ed7f15203eccb614095af001ea74bcfb6", "3abf71e837cb7b1e9fe7e54192d986142d87b1a2", "2657302160775f8766964d013efe242836693f3e", "0a84622ac7743998763aa8f5d1d1c04918bc6230", "b23f060a4574ff126e98b8fe13f8b508b9f82c1f", "37bbd889e2e6136b4826e60367e280d103415751", "f3ee81bed49c66cea802f23bfdac4ba23418a305", "abe07474d6bd99d0cb6ecf81cc90a97fd24ba5d5", "28fa4fbdb75bf2c8f12fc1a9783f583790cf092d", "73c9a5beceea745330d7e9d952d13233389c453d", "8cecedf4a44294b51fb092fb70d5392ba9d4ca17", "28540222f0ed31ae930dc329e29eb17d280663f2", "747ad718761b7d848a12e4f3a82aa0f46117a815", "c1a2e9ae8f1de1a2d7057fb7bf26a5b0567c67de", "2194c3460ab71f3826db00b045b2ae590c753319", "01d62cd850496455ce1616500f491690effa5c98", "3d5efe42321620330d8befe8fff4570a86525493", "108c840d5d1847948a2de0250490a327ae069ee6", "18992850afed53b60ce696e20374a1e1b3d9da22", "031567bf5e2a41f08d0f8e25dc4f130c125f2a7e", "18fe996c6f43a8f301cd842507045b679ba3506a", "f733d2efa413506dfc47056e4fe88cca347bd533", "5cd34f3b96b01b4863b4f0b4c873ee0cbe7c4f16", "df05f7c86a9cc6be766e81742babc9f9844d88b4", "27112ce87efb02a44f35d2299e487bfd69ec37ed", "5236160832766c58b1be2bf4f76f33d9d25b4600", "69d62d6464a5ffe31905dfe3e21fb5cdbc02755d", "a0fd149d853373fbc2982cee3b667fd5889be917", "bec080196e283a87d8c410dd5a3328f236ad957b", "73b42183793a9143e882ee21847db445da1812f6", "0256f81e75c34b5aa6f932c29d11807cbd848dfb", "e427cd948c32a1067f29581689a4f2d46ad9b54f", "49222c57619115fd34eeecf63f2579c60b324ca5", "3fa792fb63f453bd9d492f23ef7662aaaf6f7ca5", "d2ca94b978834e8a08ae2933ed3ca060458ab0ba", "925a81e193c963dd781ff4e9ad562cb1487bafbd", "025c101818da34b1b2e7e514c869724c8da81a9f" ], "paperAbstract": "With the rate of errors that can silently effect an application's state/output expected to increase on future HPC machines, numerous application-level detection and recovery schemes have been proposed. Recovery is more efficient when errors are contained and affect only part of the computation's state. Containment is usually achieved by verifying all information leaking out of a statically defined containment domain, which is an expensive procedure. Alternatively, error propagation can be analyzed to bound the domain that is affected by a detected error. This paper investigates how silent data corruption (SDC) due to soft errors propagates through three HPC applications: HPCCG, Jacobi, and CoMD. To allow for more detailed view of error propagation, the paper tracks propagation at the instruction and application variable level. The impact of detection latency on error propagation is shown along with an application's ability to recover. Finally, the impact of compiler optimizations are explored along with the impact of local problem size on error propagation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078617" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e918f3d2e7ea08d7829af2c85b4a98db570d5e6", "sources": [ "DBLP" ], "title": "Towards a More Complete Understanding of SDC Propagation", "venue": "HPDC", "year": 2017 }, "6e9c898d036b2cb7e1d50a4a54288b62cfff7cad": { "authors": [ { "ids": [ "38066673" ], "name": "Luis Eduardo de Souza Amorim" }, { "ids": [ "2219597" ], "name": "Michael J. Steindorfer" }, { "ids": [ "2447783" ], "name": "Eelco Visser" } ], "doi": "10.1145/3136014.3136020", "doiUrl": "https://doi.org/10.1145/3136014.3136020", "entities": [ "Context-free grammar", "Context-free language", "Data dependency", "Declarative programming", "LR parser", "Language workbench", "Lazy evaluation", "OCaml", "Open-source software", "Operator-precedence parser", "Order of operations", "Parsing", "Programmer", "Programming language", "Rainbow table", "Shallow minor", "Word-sense disambiguation" ], "id": "6e9c898d036b2cb7e1d50a4a54288b62cfff7cad", "inCitations": [ "0b58636ea52ac55ce61c31f96347000be48158bc" ], "journalName": "", "journalPages": "55-66", "journalVolume": "", "outCitations": [ "fc93c176dcec649a8a15266a667e16766244edaa", "ab1aeb95b493cc132cc1b60ffb96a94ae4326b31", "322a7e6f646b8f54bc766a367f9a570f1acf7658", "18c9c93c56817768a28bd66c329e01fe56b053d4", "543ac7965639667f816214c6c98ec5f9f7cb36e1", "869172852497052bc81d041c914c328bb16561ca", "3709d6c71d1003f47fce2989134173bf477802ac", "2bcc56aa8f39ec3d5f16c0064e461e90a6a1764f", "93a6a7319dbbe63fe68f5bac5b5f4518ff5b14f2", "79bb2782756dbcea84ab5431c131edcf226ee1f7", "255989d3825d5d4c158919e11f8205f18b0aa2da", "74a0e8f05c1bd8d69574278764cc41520f3398af", "1ad2fe5d7d1fcd00218266898fe5371ab598639e", "7b81149bae4381377080a6ec9173e0b6c89ae57b", "6bd1a113a0256a4d4fbd7c82f7e4e88f70d720d0", "215ac9b23a9a89ad7c8f22b5f9a9ad737204d820", "fc24ca9db6cd2dd30ab1cb8792aaa9fd14dfd740" ], "paperAbstract": "Context-free grammars are suitable for formalizing the syntax of programming languages concisely and declaratively. Thus, such grammars are often found in reference manuals of programming languages, and used in language workbenches for language prototyping. However, the natural and concise way of writing a context-free grammar is often ambiguous. \n Safe and complete declarative disambiguation of operator precedence and associativity conflicts guarantees that all ambiguities arising from combining the operators of the language are resolved. Ambiguities can occur due to shallow conflicts, which can be captured by one-level tree patterns, and deep conflicts, which require more elaborate techniques. Approaches to solve deep priority conflicts include grammar transformations, which may result in large unambiguous grammars, or may require adapted parser technologies to include data-dependency tracking at parse time. \n In this paper we study deep priority conflicts \"in the wild\". We investigate the efficiency of grammar transformations to solve deep priority conflicts by using a lazy parse table generation technique. On top of lazily-generated parse tables, we define metrics, aiming to answer how often deep priority conflicts occur in real-world programs and to what extent programmers explicitly disambiguate programs themselves. By applying our metrics to a small corpus of popular open-source repositories we found that in OCaml, up to 17% of the source files contain deep priority conflicts.", "pdfUrls": [ "https://pure.tudelft.nl/portal/files/34444067/sle17.pdf", "http://doi.acm.org/10.1145/3136014.3136020" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6e9c898d036b2cb7e1d50a4a54288b62cfff7cad", "sources": [ "DBLP" ], "title": "Deep priority conflicts in the wild: a pilot study", "venue": "SLE", "year": 2017 }, "6eccccd95a962f391dd83410ae54cda311e742c8": { "authors": [ { "ids": [ "18109907" ], "name": "Hoda Naghibijouybari" }, { "ids": [ "2874577" ], "name": "Khaled N. Khasawneh" }, { "ids": [ "1687247" ], "name": "Nael B. Abu-Ghazaleh" } ], "doi": "10.1145/3123939.3124538", "doiUrl": "https://doi.org/10.1145/3123939.3124538", "entities": [ "Algorithm", "Central processing unit", "Cloud computing", "Computational resource", "Covert channel", "Data center", "Data rate units", "General-purpose computing on graphics processing units", "Graphics processing unit", "Interference (communication)", "Microarchitecture", "Parallel computing", "Reverse engineering", "Scheduling (computing)", "Trojan horse (computing)" ], "id": "6eccccd95a962f391dd83410ae54cda311e742c8", "inCitations": [ "892f83ce3116be08be29fc5797343711ef8239c0" ], "journalName": "", "journalPages": "354-366", "journalVolume": "", "outCitations": [ "24395c6f447d34f3da3779a102ef265ff12ab49d", "650f94f3a3b06b0d0cd034169d57ca087eec509c", "023f23c300804754753cb11db51fb7f582556ab7", "3c28d5967db86e8f5e4c37d03518967c285a32bf", "1734463e8278c8d2412182a1a15267d3a3aa760b", "064f38e5edef42cb5a37f2a350e4413e17132b11", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "8b3bf0ce97cf1e5c6ff99d76e9961d77ed0bf180", "6158ad0fb4f022b2467915fdf587650a04627c2b", "96ba6f5c06850c009e5b77094c0d4532744dedc2", "6dd8d0d2a7ca2c2717e6cd4ee3c8af65f359b796", "1c1fcd25b14737f24cc7e3e4af8bfe96f298182f", "045bbbea384e9d54be38dd207bf237d5208ea599", "160b6782474f687a098a406bb7409c774d0e91ab", "5cea87619b5dc8f13ca6a68ba3c563296b2b368f", "cc8786711f75e957fbe81e798db07c2fefce644f", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "21ddf1f7ab7e2cd2ae07073bf3238ce46314bac9", "222c651bbde2d6ec42dfa148b9b9499ed5119389", "693a8b56a9e961052702ff088131eb553e88d9ae", "934e8d76376f6c78a8b89ef2304f01a8e7099401", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "18336fdfca9e54b4a1a0dc03a0eaa66379778133", "48c4d54228c3ca76a1cf45f2c27e25f5abb384d3", "4008b13cef86ab5f0cb5a5a06fc458a560fe9e34", "a0a33a962062e1abf552dbb964b0097860978fda", "040bd1162e05c709ac15d937cec485fae3a6af43", "b2ca498540a6001dd23146d9c8805839f2a5f557", "52c2c050af5b32d4929b4b193967a3675d03aea0", "29cfc0e4add7a511d039d63570454a04e38a9bd4", "90f2e587256b8b3cc7651f257a8066ff9f2f544e", "454b134e0ad83921cbe13f4e4332c79b93aa7612", "0f9b2e598ee1ddde4fd5a2f3008a6983367cc22c", "c0c14c16813f0083b9e3bf602746a8be1270996a", "404b72fbf63ff8f3f15c26a88384a0d4d7bcfcd7", "23177452df15b652dd54a59324502b92c99687a7", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "923be6c156a050543d1f92d8fce95f35a5bb7525", "1ce1361260c5367eb64392ebe37f29b3fbfc21de", "58427fedfab49fdd5f78d8f8fb1834b3c1dc3ea2", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4" ], "paperAbstract": "General Purpose Graphics Processing Units (GPGPUs) are present in most modern computing platforms. They are also increasingly integrated as a computational resource on clusters, data centers, and cloud infrastructure, making them possible targets for attacks. We present a first study of covert channel attacks on GPGPUs. GPGPU attacks offer a number of attractive properties relative to CPU covert channels. These channels also have characteristics different from their counterparts on CPUs. To enable the attack, we first reverse engineer the hardware block scheduler as well as the warp to warp scheduler to characterize how co-location is established. We exploit this information to manipulate the scheduling algorithms to create co-residency between the trojan and the spy. We study contention on different resources including caches, functional units and memory, and construct operational covert channels on all these resources. We also investigate approaches to increase the bandwidth of the channel including: (1) using synchronization to reduce the communication cycle and increase robustness of the channel; (2) exploiting the available parallelism on the GPU to increase the bandwidth; and (3) exploiting the scheduling algorithms to create exclusive co-location to prevent interference from other possible applications. We demonstrate operational versions of all channels on three different Nvidia GPGPUs, obtaining error-free bandwidth of over 4 Mbps, making it the fastest known microarchitectural covert channel under realistic conditions.", "pdfUrls": [ "http://www.cs.ucr.edu/~nael/pubs/micro17-gpu.pdf", "http://doi.acm.org/10.1145/3123939.3124538" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6eccccd95a962f391dd83410ae54cda311e742c8", "sources": [ "DBLP" ], "title": "Constructing and characterizing covert channels on GPGPUs", "venue": "MICRO", "year": 2017 }, "6ecfc6ef1d571637cdd02aae709464dffb268ac8": { "authors": [ { "ids": [ "2786856" ], "name": "Ashish Tapdiya" }, { "ids": [ "1702786" ], "name": "Yuan Xue" }, { "ids": [ "3025858" ], "name": "Daniel Fabbri" } ], "doi": "10.1109/CLUSTER.2017.79", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.79", "entities": [ "ACID", "Commodity computing", "Concurrency (computer science)", "Concurrency control", "Data store", "Database", "Materialized view", "NewSQL", "NoSQL", "Qualitative comparative analysis", "Scalability", "Synergy" ], "id": "6ecfc6ef1d571637cdd02aae709464dffb268ac8", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "384-388", "journalVolume": "", "outCitations": [ "0538e05e1ced11b91cda5d1aed88a73969def882", "9aa0d7253574e50fe3a190ccd924433f048997dd", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "18a5f443299784479e78d9e77f175af57cb2fa2b", "2b300024ac736f7181f6d35392ec3a65f49457bd", "412a9e54bbb31e12d008a9579994e009c5b40b46", "2321a150c84d771d81fd81759757795dcda25750", "0c97fa96d179dec4f5a9349c4e5203205d427fb8", "6c523c74fb0b99440982375608d880fdb2752d21", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "363116c764453d9b740c46d23b1f5a3c5801d76e", "15caf02eadcd5b043e795a60853010c03ebc9246", "10723678d19bab6a52c8ee9b89f9118536044033", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "d6cd585cdc5bdaf55dffc2752d3c2a086cfc27b1" ], "paperAbstract": "Relational databases are well suited for vertical scaling; however, specialized hardware can be expensive. Conversely, NewSQL and NoSQL data stores are designed to scale horizontally. NewSQL databases provide ACID transaction support; however, joins are limited to the partition keys, resulting in restricted query expressiveness. On the other hand, NoSQL databases are designed to scale out on commodity hardware; however, they are limited by slow join performance. Hence, we consider if the NoSQL join performance can be improved while ensuring ACID semantics and without drastically sacrificing write performance, disk utilization and query expressiveness.This paper presents the Synergy system that leverages schema and workload driven mechanisms to identify materialized views, and a specialized concurrency control system on top of a NoSQL database to enable scalable data management with familiar relational conventions. Synergy trades slight write performance degradation and increased disk utilization for faster join performance (compared to standard NoSQL databases) and improved query expressiveness (compared to NewSQL databases).", "pdfUrls": [ "https://arxiv.org/pdf/1710.01792v1.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.79", "http://arxiv.org/abs/1710.01792" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6ecfc6ef1d571637cdd02aae709464dffb268ac8", "sources": [ "DBLP" ], "title": "A Comparative Analysis of Materialized Views Selection and Concurrency Control Mechanisms in NoSQL Databases", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "6ed7f6aeedcb88b024791705e77d3e075b92d065": { "authors": [ { "ids": [ "1985874" ], "name": "Oussama Soualah" }, { "ids": [ "1793459" ], "name": "Marouen Mechtri" }, { "ids": [ "2242502" ], "name": "Chaima Ghribi" }, { "ids": [ "1711563" ], "name": "Djamal Zeghlache" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Decision tree", "Monte Carlo", "NP (complexity)", "Network function virtualization", "Simulation", "Testbed" ], "id": "6ed7f6aeedcb88b024791705e77d3e075b92d065", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "579-588", "journalVolume": "", "outCitations": [ "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "ba6753cb5b3ea151930e5940fbc5cdb8ef91eb98", "33744dd76bcc3fbd9cd1953a0d2a9b99014666e8", "78f654bfef4c3759af6a10dfa9ff2e848f37ce22", "7f822adf127881926c2fab2401d6e3e381bd9c11", "63a061c70da9ce645de1ad803a06f1595833befb", "95b35e80a2cae533d2d6751f00c5047aedb47df7", "23cba2e1368b68d488877fc6fece9fb3a2f26e74", "9b0955664a743d5849130a5b0f4c1c3a9b5a1e66", "21b8099f8b9b7044793daa848cc109aafe201fd7", "e6e217e9188f0bc44400d0d5d4bf00140143880a", "67f4586e6ef30b3ae4e56a3aa7946a3b1f449a8f", "d15d3650acbdb868d22fb61cd14ad8cb5e57f131", "05f35907dd815c9a4d5b431a8367842f19b57830", "1e5027ff533d31513b667cec06f6a650882e1ee0", "ec407a7690c3ef891180500819da94f09b4bc8d9", "3ce2d233cee585ecff73729836918ba87195c18f", "0e2c4ad06ec462a961f195492941bc70afd560ae", "bc2a998888fc30ca6f007653e0631620308f717c", "e15431eabf00e9e976afcfe96f97057dbc826347", "1876d9e94604de357cc658d55d31b020542d745c", "f29bb48ed72c8f651d136c2f5ca01f84a1b748bc", "1294ce3e9dff936d9588f43a457d818b72c4923a", "d3fb5d22083079062952be841ecb9e20c032ec26", "16ccda81a562eee4a5403dab8029c57c30fc2c19", "48e0c98bf5cad1fbc65b401526804c252364b022", "fffdebefecd6a60a841acc8aafb7f0e89a76f996" ], "paperAbstract": "This paper addresses energy efficient VNF placement and chaining over NFV enabled infrastructures. VNF placement and chaining are formulated as a decision tree search to overcome this NP-Hard problem complexity. The proposed approach is an extension of the Monte Carlo Tree Search(MCTS) method to achieve energy savings using physical resourceconsolidation and sharing VNFs between multiple tenants. A realcloud testbed and extensive simulations are used to assessperformance and ability to scale with problem size. Evaluationresults show significant reduction in energy consumption of theproposed placement solution compared to related work. The polynomialcomplexity of our proposal is highlighted by the simulation results.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101191" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6ed7f6aeedcb88b024791705e77d3e075b92d065", "sources": [ "DBLP" ], "title": "Energy Efficient Algorithm for VNF Placement and Chaining", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "6f01ef0fd14d4b7bf99844df7d47edd674625e0f": { "authors": [ { "ids": [ "29719971" ], "name": "Sourav Chakraborty" }, { "ids": [ "1802958" ], "name": "Hari Subramoni" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/CLUSTER.2017.106", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.106", "entities": [ "Algorithm", "Central processing unit", "IBM OpenPower", "Library (computing)", "Open MPI", "Run time (program lifecycle phase)", "Shared memory", "Xeon Phi" ], "id": "6f01ef0fd14d4b7bf99844df7d47edd674625e0f", "inCitations": [ "5ceb46a025ecb09fa0f8240c7bef23a5580eaea4" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "13-24", "journalVolume": "", "outCitations": [ "32b873df142242696c13abd1788617d3f643a173", "15ee1b0a68d65fa53aecfe16aec6e8894de95a68", "5d4d8678c7abe4a99fe87cf2d1dd12a62d80dba2", "ad91ce97c65bd785a8a0f7fb7ad8c22360886f42", "5e983f887015d1de9a88a9eea97bc75b3f3f28da", "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "5a92a26a2550ca38f2b4e7b1252cb678f5d48e55", "8e80783316380bc01d69e9c7551e6a602303ac50", "8e2011a5edab5780aef6303cb95774cf524f4e37", "b472170ec02b9380d5aefdcd64380282c375ea4c", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "179264a9a96a7962d7ca6cc22aa605e3b121d9dc", "5b2933791939f0244650daebd3da92904119f69b", "1e2338427ceffd262bc7059c257b144917cf281c", "42f26cd7904dda8843e357c283f323e9101f37c3", "47488c34088ccd8adf78fa24b5c82dcbcf200021", "019153a4a937fddf0f0e24a9e07e3053f6f990b1", "830ad9ce5c06ed1ace24c44df4be9111e296d820", "6aaebcfff03eaf2b278b8c713ace841f39651d02", "3f750233c3e20da134b4427eb6645f877ac0a503", "7e7292367aaa2d36d8d8b641425de15943f7032b" ], "paperAbstract": "Multi-/many-core CPU based architectures are seeing widespread adoption due to their unprecedented compute performance in a small power envelope. With the increasingly large number of cores on each node, applications spend a significant portion of their execution time in intra-node communication. While shared memory is commonly used for intra-node communication, it needs to copy each message once at the sender and once at the receiver side. Kernel-assisted mechanisms transfer a message using a single copy but suffer from significant contention with a large number of concurrent accesses. Consequently, naively using Kernel-assisted copy techniques in collectives can lead to severe performance degradation. In this work, we analyze and propose a model to quantify the contention and design collective algorithms to avoid this bottleneck. We evaluate the proposed designs on three different architectures - Xeon, Xeon Phi, and OpenPOWER and compare them against state-of-the-art MPI libraries - MVAPICH2, Intel MPI, and Open MPI. Our designs show up to 50x improvement for One-to-all and All-to-one collectives (Scatter and Gather) and up to 5x improvement for All-to-all collectives (Allgather and Alltoall).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.106" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f01ef0fd14d4b7bf99844df7d47edd674625e0f", "sources": [ "DBLP" ], "title": "Contention-Aware Kernel-Assisted MPI Collectives for Multi-/Many-Core Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "6f0c1898575d56d1c1073b1f2eb6cba5bc931005": { "authors": [ { "ids": [ "26965774" ], "name": "Yashwant Marathe" }, { "ids": [ "2390821" ], "name": "Nagendra Dwarakanath Gulur" }, { "ids": [ "1799329" ], "name": "Jee Ho Ryoo" }, { "ids": [ "6970155" ], "name": "Shuang Song" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1145/3123939.3124549", "doiUrl": "https://doi.org/10.1145/3123939.3124549", "entities": [ "Baseline (configuration management)", "Best, worst and average case", "CPU cache", "Cache (computing)", "Context switch", "Network switch", "Page table", "Telephone exchange", "Thread (computing)", "Translation lookaside buffer", "Virtual machine", "Virtual reality" ], "id": "6f0c1898575d56d1c1073b1f2eb6cba5bc931005", "inCitations": [ "0231ffa4b9b095efbf0f302898cd7abd7dd0b764" ], "journalName": "", "journalPages": "449-462", "journalVolume": "", "outCitations": [ "06125169a21ef17641d7199544417b21c378eede", "398cc68e6df0cffb5b06da2ab39b004bec8ad9ab", "a70c72d011676d58fea4a652d9bd93f915ab26d8", "eb82d3035849cd23578096462ba419b53198a556", "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "19987f1180f222ca1063b4e55346c902aadf3abc", "44f474a25ee7d1fccfb97bd4e64ffc7ae0df61e8", "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "d52b40ed62b865ee455b0fd7741e83af9353ee3c", "2960c89331eb7afa86584792e2e11dbf6a125820", "99d31c3dafc1683816c28bbe1e12e5352bb0bc36", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "863bcc82bd61f2199a4f2c4c6bc34a26c217c87a", "8219bf467b82208a98aa7b45e67f35ed740b979f", "1f1a1271f08c7cbf78acc56420665dd538be33c8", "1648469823fab2d5c6387e8f78e5ba4602f1e6df", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "bb3961d142c6c734708cc0ac0cc2d1bbcda1a662", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "73dd5dde28119e41dd0f0a07275b7f722c4619d2", "a4f66a9c86992bc22123d13ceb7225b08ae96929", "d875686d4b910315859db0bc477875cc8d1c1acd", "0653e2ed9f683868cb4539eb8718551242834f6b", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "13165fb53e7d93a0c5e3438fc1fd4f77b7a1ab0b", "c7e2001aba02d1a27fef849c4850fbceb625a1c0", "2ddd179040a880aa059c1f02d6f49af776e86e69", "2804bcc9df4352c2da1367f182a54e7c67a160ec", "6db7424e15cf54a4bae5f79b55ee84894723a875", "1c32ad0a42109fab826eb3054df7cfc33b424125", "6ff096588c6f8f673e83fcc2639a8cc6f450c50a", "700ceaf012da1bbe8c8a2ff96f91c98baf7f1505", "0531ddb536a96591e26e1f59dfa59600beace3f9", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "c0cf6192ba294ee9a7a7edda864a7d0dad5ac35d", "a93cfc7d891492c9bfdfe129b8077b3a2f41fe08", "1ae7be5d55833e6aa53d24f620be5df9006a3558", "2f5215e1d6a2bd44ede8e6519f17ca0bba1fa27d", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "2843539cb23c9025329a2c06ca8d92dbd3cb9003", "0018db87e16ba3a3a62aa72e120a58cd09fe014a", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "2253d6559ae9793b5cfa6e409d1d9de50dafa29a", "817f2d1e63771c8f8b5316d0edde45de22d6024b", "6201fb6d59a909959edfb661f52470c04799b0e7", "96fd45d3af1c51da300d635b27230c646669d481", "4654217c2e4e46be69f0135a85e484b750791a5d", "01a443750f86a258dd56942a4f136683e1bd77ed", "343a384d5476ead9496f96559aba5ad09e95e01e" ], "paperAbstract": "Computing in virtualized environments has become a common practice for many businesses. Typically, hosting companies aim for lower operational costs by targeting high utilization of host machines maintaining just enough machines to meet the demand. In this scenario, frequent virtual machine context switches are common, resulting in increased TLB miss rates (often, by over 5X when contexts are doubled) and subsequent expensive page walks. Since each TLB miss in a virtual environment initiates a 2D page walk, the data caches get filled with a large fraction of page table entries (often, in excess of 50%) thereby evicting potentially more useful data contents.\n In this work, we propose CSALT - a Context-Switch Aware Large TLB, to address the problem of increased TLB miss rates and their adverse impact on data caches. First, we demonstrate that the CSALT architecture can effectively cope with the demands of increased context switches by its capacity to store a very large number of TLB entries. Next, we show that CSALT mitigates data cache contention caused by conflicts between data and translation entries by employing a novel TLB-Aware Cache Partitioning scheme. On 8-core systems that switch between two virtual machine contexts executing multi-threaded workloads, CSALT achieves an average performance improvement of 85% over a baseline with conventional L1-L2 TLBs and 25% over a baseline which has a large L3 TLB.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124549", "https://lca.ece.utexas.edu/pubs/csalt.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f0c1898575d56d1c1073b1f2eb6cba5bc931005", "sources": [ "DBLP" ], "title": "CSALT: context switch aware large TLB", "venue": "MICRO", "year": 2017 }, "6f31d49425d355d4eee17622b2efb300c5026887": { "authors": [ { "ids": [ "10244521" ], "name": "Huang Fang" }, { "ids": [ "2424698" ], "name": "Minhao Cheng" }, { "ids": [ "1793529" ], "name": "Cho-Jui Hsieh" } ], "doi": "10.1109/ICDM.2017.19", "doiUrl": "https://doi.org/10.1109/ICDM.2017.19", "entities": [ "Algorithm", "Approximation algorithm", "Constrained clustering", "Constraint logic programming", "Convex optimization", "Dimensionality reduction", "Experiment", "Linkage (software)", "Nearest neighbor search", "Nearest neighbour algorithm", "Optimization problem", "Program optimization", "Semi-supervised learning" ], "id": "6f31d49425d355d4eee17622b2efb300c5026887", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "101-110", "journalVolume": "", "outCitations": [ "038f2b498be7bd71fae9c235facf270a5002bbc0", "46820e8d6aca201bb5cd0d8e7bd685b3c497e12c", "e516d4bf4836ddc09f02aedd1088728244a76b17", "0389a414c5d0ef50e06fe0c15f6102f374ce1b04", "a74534569c5532ea62da572e3b617dedc825b262", "8479a404b73afd6a61d8a872086d9e7d6d2bdf30", "0f16f6f478b5c788dce466eb50e36c612273c36e", "65a1c8293865b1479a807c2eebdc11dc1a697554", "1d43b461fef66d38e506f4aabfd00550218d269d", "4229f467b059188fc7a1234016a3c80557fa7df0", "66a6dde6a6a20f77ce52cb2464a52777837bd81e", "0c7f1d285ce069b2f7a807a4b2750695098bffe6", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "24c9b0b05c5e957e255b854f947472f9181772a4", "0bacca0993a3f51649a6bb8dbb093fc8d8481ad4", "07cd9f122bf94075df0537e0e4cafa40cea2d146", "b5fc68ad44c73037576935979137fc3147be9c26", "754f44238928103b7a8205d9888374199b0bfa9b" ], "paperAbstract": "We consider the semi-supervised dimension reduction problem: given a high dimensional dataset with a small number of labeled data and huge number of unlabeled data, the goal is to find the low-dimensional embedding that yields good classification results. Most of the previous algorithms for this task are linkage-based algorithms. They try to enforce the must-link and cannot-link constraints in dimension reduction, leading to a nearest neighbor classifier in low dimensional space. In this paper, we propose a new hyperplane-based semi-supervised dimension reduction method—the main objective is to learn the low-dimensional features that can both approximate the original data and form a good separating hyperplane. We formulate this as a non-convex optimization problem and propose an efficient algorithm to solve it. The algorithm can scale to problems with millions of features and can easily incorporate non-negative constraints in order to learn interpretable non-negative features. Experiments on real world datasets demonstrate that our hyperplane-based dimension reduction method outperforms state-of-art linkage-based methods when very few labels are available.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f31d49425d355d4eee17622b2efb300c5026887", "sources": [ "DBLP" ], "title": "A Hyperplane-Based Algorithm for Semi-Supervised Dimension Reduction", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "6f546215728fa94b76344c35cb32253bd9f82bd3": { "authors": [ { "ids": [ "27086859" ], "name": "HyungSeok Han" }, { "ids": [ "3695676" ], "name": "Sang Kil Cha" } ], "doi": "10.1145/3133956.3134103", "doiUrl": "https://doi.org/10.1145/3133956.3134103", "entities": [ "Application programming interface", "Kernel (operating system)", "Operating system", "Software bug", "Superuser", "Value (ethics)", "Vulnerability (computing)" ], "id": "6f546215728fa94b76344c35cb32253bd9f82bd3", "inCitations": [], "journalName": "", "journalPages": "2345-2358", "journalVolume": "", "outCitations": [ "22b22af6c27e6d4348ed9d131ec119ba48d8301e", "a73f2dab1e9caae57bbbffe551dcefdf00e43f3e", "0c216f20a00819d9cb88adb57e478536cc43a13c", "7ae091ea6b9221fa8e7fe4c1295557fc1749a9d2", "1d441ef596b72af09e3405132e4ba0563b8840a4", "1dfa9e8190eb9f67187acb90d033eb7142ac7db8", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "8c8ffe8e4fdadbf42b46944d0339eafc3e4de4c3", "4cde139a7f5f7df6b1e435197b1698ad20811f4d", "7cecd69662651f61d78498a6c3ed9ecd711c5358", "5556995fb630c47805bbba560287ea59ce357fa1", "4f74140cc94d1dd01c81251cfbdb83d84d6491cf", "1db244a916891d16d572e21b1e6e7730f7439bc9", "27a708a398b47a8ab27c4108c6e7a7099ef36d44", "09faa1cc5c8784d811502c5137bf63b5f1ac2934", "0228d60b7a56a3d778e5425c41eaf72cf0b6ec55", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "1de5ae8534fc76323e4d926e10dc0fc76a28a361", "af3be2c532d48644f888811432316837e6d535f6", "0daad057a90235279c4c565f8776097f5a7df4de", "66e47de58ae39b384f61242cdae7672d9728876a", "a80cf182c22c5e11d523c8af36732576a1d3d8d4", "5d7dfc48f39117ba409aa59503549a681569e895", "125268a25397dd17fb3c7dbd4018114a972e4acb", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "19f9cccd47ac99d167eebfec5937e95138d2aed8", "69b7456f3d47fed3745239b5f67996a0b9a1a5c9", "3613ac6a2743ed1fd969bedd4ddcc7ba45893f0c", "d9a19024749993fcba708034907de1e52f1a3e11", "de71e2359995087b4ce7d46e4eb718c341c70ee0" ], "paperAbstract": "Kernel vulnerabilities are critical in security because they naturally allow attackers to gain unprivileged root access. Although there has been much research on finding kernel vulnerabilities from source code, there are relatively few research on kernel fuzzing, which is a practical bug finding technique that does not require any source code. Existing kernel fuzzing techniques involve feeding in random input values to kernel API functions. However, such a simple approach does not reveal latent bugs deep in the kernel code, because many API functions are dependent on each other, and they can quickly reject arbitrary parameter values based on their calling context. In this paper, we propose a novel fuzzing technique for commodity OS kernels that leverages inferred dependence model between API function calls to discover deep kernel bugs. We implement our technique on a fuzzing system, called IMF. IMF has already found 32 previously unknown kernel vulnerabilities on the latest macOS version 10.12.3 (16D32) at the time of this writing.", "pdfUrls": [ "http://daramg.gift/paper/han-ccs2017.pdf", "http://doi.acm.org/10.1145/3133956.3134103" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f546215728fa94b76344c35cb32253bd9f82bd3", "sources": [ "DBLP" ], "title": "IMF: Inferred Model-based Fuzzer", "venue": "CCS", "year": 2017 }, "6f5c0bf2582b7b72812535d72a0215dd3070b822": { "authors": [ { "ids": [ "2874577" ], "name": "Khaled N. Khasawneh" }, { "ids": [ "1687247" ], "name": "Nael B. Abu-Ghazaleh" }, { "ids": [ "1712897" ], "name": "Dmitry V. Ponomarev" }, { "ids": [ "1723562" ], "name": "Lei Yu" } ], "doi": "10.1145/3123939.3123972", "doiUrl": "https://doi.org/10.1145/3123939.3123972", "entities": [ "Central processing unit", "Evasion (network security)", "Head-mounted display", "Helmet-mounted display", "High- and low-level", "Learnability", "Logistic regression", "Malware", "Probably approximately correct learning", "Reverse engineering", "Software bug" ], "id": "6f5c0bf2582b7b72812535d72a0215dd3070b822", "inCitations": [ "9c377cd0ff9a8bac5fb22d8990c8e27cc9f6956a" ], "journalName": "", "journalPages": "315-327", "journalVolume": "", "outCitations": [ "37ef5a307a8d6ea0ba6f5f7e39a0199437c2cf48", "0612fdc261b459b0e84fe8590afd543e3fb08b94", "595a00f0975b5d5c28d904ddba1ae5a493316573", "25a8afdfae0d607fa991e84c20f21646bd779fe3", "3946a5c410ba1980d932cc6d2987b4d935e038d5", "48c4d54228c3ca76a1cf45f2c27e25f5abb384d3", "a7e5e0348e237013e5e8617ec4a75ae4cbed9c8e", "d9976e04d412e49d272e8b1d9e20b4fc9a454511", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "2a035c452ae58575ccd804ca0b58665c6951d90c", "16f3a01cc552e9d24829cfece9ca2974eb1b59be", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "396210c5a655bf1b82a56e6f94c9830aa818c89c", "1f0e9613b1d47bbe8ba5b32a57e89b81ec02aba8", "142f95ba20abb7a964500aef0983a4181be91fd6", "4374b8932109c93470d5d68356e6eb1ef91dbbb8", "1e102df57ec826f0afee0dda578551e3da3b7289", "00ed3faeb0aa2150d28832d85216cf05c69a2be6", "6229f5d2f1419c0bb328a60a505b6f1563aad0bc", "0718917ab92761ffee9626057b302464e524a141", "1780f4fc05d87356e923a75a8ab3ff4ce79b9fb0", "008c2c2cf69fd4936a64e67d265b9b173f0d190f", "35734e8724559fb0d494e5cba6a28ad7a3d5dd4d", "2cf3fd84f30e5cae30dd46a3d7ecc0d63583b1a6", "3c1b97dff8f96170f9557319e9d881286aa77c1e", "745fed914e5773cce64de570d9e7381d26b10839", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "b2ca498540a6001dd23146d9c8805839f2a5f557", "45f6957cab31e802934cc761380c1a4a37c66208", "3a0ad57ecb97795a8cc91290484ff9e576728c84", "1c1f65aa4c7229fbbb516348f2e7c4f622155e3b", "7b7016be1c64010a449548feac3a0bb2623737da", "4f3a3922d80f82b1e7a23aa1fbb60c4afaf4f907", "c641fb8f6ff1ac0c6d0d4ad9fbb7d50d8464729b", "22bc7549801fd359f932bbdc11b8ca24b87baadf", "5e4fa9397c18062b970910f8ee168d3297cf098f", "888cb0ac77e16ec98c8e2a5af79e567d8a43dcd1", "d98944b3674fb03368e1479ad30fc69187c3fe3c", "3871a09a4c64716182f2238c4144b223faa39202", "d65c36c535af5bc59605fa04da7a964adbcf952e", "21124f218317e18e69d251ca7c40a129489b26c8", "a4ebb757ea215851146b7a4f78b79278f6d42b33", "63a374bdf9fa51876e134f92065965a0c3f8b19d", "21ddf1f7ab7e2cd2ae07073bf3238ce46314bac9", "0e316f76dac185ee2d922e64d4659b2e36842196", "25ee03dea55ac3137edb13b3e141b19a03deba21", "0653e2ed9f683868cb4539eb8718551242834f6b", "7cdf1c29cb63423c9638dd4f5620956b3fe80d11", "29fa9b903dbd8d19e39b0d7fb06efc6a1907dfdb", "bed7594951af86e1e84d49b1b7ad598d7830a8e5", "173f9ebbc0be6b591dfa72111aa78d3568e2db87" ], "paperAbstract": "Hardware Malware Detectors (HMDs) have recently been proposed as a defense against the proliferation of malware. These detectors use low-level features, that can be collected by the hardware performance monitoring units on modern CPUs to detect malware as a computational anomaly. Several aspects of the detector construction have been explored, leading to detectors with high accuracy. In this paper, we explore the question of how well evasive malware can avoid detection by HMDs. We show that existing HMDs can be effectively reverse-engineered and subsequently evaded, allowing malware to hide from detection without substantially slowing it down (which is important for certain types of malware). This result demonstrates that the current generation of HMDs can be easily defeated by evasive malware. Next, we explore how well a detector can evolve if it is exposed to this evasive malware during training. We show that simple detectors, such as logistic regression, cannot detect the evasive malware even with retraining. More sophisticated detectors can be retrained to detect evasive malware, but the retrained detectors can be reverse-engineered and evaded again. To address these limitations, we propose a new type of Resilient HMDs (RHMDs) that stochastically switch between different detectors. These detectors can be shown to be provably more difficult to reverse engineer based on resent results in probably approximately correct (PAC) learnability theory. We show that indeed such detectors are resilient to both reverse engineering and evasion, and that the resilience increases with the number and diversity of the individual detectors. Our results demonstrate that these HMDs offer effective defense against evasive malware at low additional complexity.", "pdfUrls": [ "http://www.cs.ucr.edu/~nael/pubs/micro17-rhmd.pdf", "http://doi.acm.org/10.1145/3123939.3123972" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f5c0bf2582b7b72812535d72a0215dd3070b822", "sources": [ "DBLP" ], "title": "RHMD: evasion-resilient hardware malware detectors", "venue": "MICRO", "year": 2017 }, "6f6806f8dc90f1e1b73ee432602e13698a788827": { "authors": [ { "ids": [ "33651904" ], "name": "Amit Chavan" }, { "ids": [ "2313625" ], "name": "Amol Deshpande" } ], "doi": "10.1145/3035918.3064056", "doiUrl": "https://doi.org/10.1145/3035918.3064056", "entities": [ "Algorithm", "Algorithmic efficiency", "Analysis of algorithms", "Baseline (configuration management)", "Database", "Database engine", "Delta encoding", "Dex", "Dynamic programming", "Heuristic", "Pipeline (computing)", "Query plan", "Synthetic data", "Version control" ], "id": "6f6806f8dc90f1e1b73ee432602e13698a788827", "inCitations": [ "678f12849b2b0ce95a1a01961e73a8397e03c025", "901c0c68e0cf0dc84d3f1f4ac7195b9a667da4bd" ], "journalName": "", "journalPages": "171-186", "journalVolume": "", "outCitations": [ "473fa1c5c66d4a51adbb64c263687d730fc6d217", "5d37dbcead67858f972056555745041250bb1b6a", "c9b91e05b4ad22ddd078b8658b14dd8fcba66d5f", "62ea7fbdc3349f4fe8f12f098f1ce4a746faa5db", "12b6044216d1a0849d74d1a7258619279027e8fc", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "87f753c9679a8e06c35d4c5faa50015e8f602f0f", "3bb527f0149baba20caa9a9dbbf5403066a5132f", "844a6ced825c82a1eb31d03f62a0baa45d487406", "3f8d104ebd8b63b761db1dc49747fbbb40681b70", "1d1c3e69ceb5ac354f8a8a245da4dfc404ad266a", "4306478b5205fb34e7a1036db4b714f51e419e45", "09cacb2d068d605e6f8148b173524094a41670d5", "5aefda15f1dcf04529bbf518659a23112cbb5246", "2ff1c5db24059f32a086bc58b2bcfbeaaae23be4", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "6616f158581ec7d7871c056a57a834ed2f09f528", "07c589765de3d9a37ceb558d37fab03e091321a0", "79e69f6a619c2d088ed8139a569e30c9628c1e1e", "d79411c18f55d86bc9c9c7164108f77fb4f469aa", "f458bd30b0f27959b1147ea8afa08ec8fef94ad5", "d7ebd33290b4f178e1c0e9c294be28e321cd30e9", "238bc5f0a213632d0928c7360d07f002fa7f1d37", "209cd159fba2945dfd4d594998189dbd89e01652", "29cc097c8d9ad13df2f1e02d6ab99a9ca2936178", "363116c764453d9b740c46d23b1f5a3c5801d76e", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "74e89eafa75fed08224710ef55007eb7cf349ce2", "2f8da4fea7268c6f846af7453d763d2ec2da6111", "03a666995669ebb66f3a8e7cd3b6e0f07a0f8d6a", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "b716349a3072afbede9f0fb8f561a8e0f297baf0", "3ed00b246b5f3be959d0335203e59861024ac69d", "c8e9e10457825d908e1a2bd1173c47d9dd3f3dec", "46a574413123beb2ba0572c563e1a4883baec997", "36288858ac21b08b863a7d181dd0430c4c91ab3f", "1ae482036382ecde3ad475626504569a17421396", "2c0e63c99e51fb7eefd96d9f63e5dc5a8709e179", "f5c0978ee0166d19ede8184650dfae64d2784af1", "0a4af05224a4837fe8b1df087528e74388b6fcbd", "0dafdc7debdcae528b2549489a03509cb4ecb9fe", "08b3d43e099f6afe15704cbf8388e01ebd2fd078", "72d6c08256143c4445cfb349e84f2a220f082bf1", "0fb2ab7176f91e34061b128c86ef100401a1b037", "c5e4449a63eea9277672ef2e2d3f387449f6e2b0", "0f41cd1792db9ff879fdfffc746cf5a01adf207f", "5704f6ee368d4d178612d526ccbb300cdcb95d2c" ], "paperAbstract": "The increasing reliance on robust data-driven decision-making across many domains has made it necessary for data management systems to manage many thousands to millions of versions of datasets, acquired or constructed at various stages of analysis pipelines over time. Delta encoding is an effective and widely-used solution to compactly store a large number of datasets, that simultaneously exploits redundancies across them and keeps the average retrieval cost of reconstructing any dataset low. However, supporting any kind of rich retrieval or querying functionality, beyond single dataset checkout, is challenging in such storage engines. In this paper, we initiate a systematic study of this problem, and present DEX, a novel stand-alone delta-oriented execution engine, whose goal is to take advantage of the already computed deltas between the datasets for efficient query processing. In this work, we study how to execute checkout, intersection, union and t-threshold queries over record-based files; we show that processing of even these basic queries leads to many new and unexplored challenges and trade-offs. Starting from a query plan that confines query execution to a small set of deltas, we introduce new transformation rules based on the algebraic properties of the deltas, that allow us to explore the search space of alternative plans. For the case of checkout, we present a dynamic programming algorithm to efficiently select the optimal query plan under our cost model, while we design efficient heuristics to select effective plans that vastly outperform the base checkout-then-query approach for other queries. A key characteristic of our query execution methods is that the computational cost is primarily dependent on the size and the number of deltas in the expression (typically small), and not the input dataset versions (which can be very large). We have implemented DEX prototype on top of git, a widely used version control system. We present an extensive experimental evaluation on synthetic data with diverse characteristics, that shows that our methods perform exceedingly well compared to the baseline.", "pdfUrls": [ "http://cs.umd.edu/~amitc/pubs/chavanA-dex.pdf", "http://doi.acm.org/10.1145/3035918.3064056" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6f6806f8dc90f1e1b73ee432602e13698a788827", "sources": [ "DBLP" ], "title": "DEX: Query Execution in a Delta-based Storage System", "venue": "SIGMOD Conference", "year": 2017 }, "6fa91727e4e80a6771809eeb65e07bf980aa4e95": { "authors": [ { "ids": [ "35414071" ], "name": "Max Curran" }, { "ids": [ "1819780" ], "name": "Md. Shaifur Rahman" }, { "ids": [ "1719874" ], "name": "Himanshu Gupta" }, { "ids": [ "39656743" ], "name": "Kai Zheng" }, { "ids": [ "3085412" ], "name": "Jon P. Longtin" }, { "ids": [ "1691843" ], "name": "Samir Ranjan Das" }, { "ids": [ "37227016" ], "name": "Thanvir Mohamed" } ], "doi": "10.1145/3117811.3129239", "doiUrl": "https://doi.org/10.1145/3117811.3129239", "entities": [ "Backhaul (telecommunications)", "Gigabit", "Network architecture", "Network planning and design", "Radio frequency", "Steerable filter", "Viz: The Computer Game" ], "id": "6fa91727e4e80a6771809eeb65e07bf980aa4e95", "inCitations": [ "c9a44a37f4fb220a51b3eb37eb7642bb3b97c2cb" ], "journalName": "", "journalPages": "154-166", "journalVolume": "", "outCitations": [ "7b5144c88098a183eb2f8395276b0be6196a442b", "04239e6f0b824266b21396b6042bf57c64e87080", "17d122f143726288da193a767fd0a7634010f0ff", "9b5ff4f20a6c0700c12e4e0c812fac57b1d68232", "3a89bc3c7ae2d2d1b720e3d213668b60e135df5f", "e0207eca0cb2880588915701cf37a85df15f49e5", "4e4b8c8f09e9de2c7eee2aecb748334e0dc44611", "43b39ed60cda5e724c073d8f98074cd93ceb5c85", "0e4c008bf6de673bf4a8246fe09b086fb1b37609", "c0bfa7614955558d835520fccb1e2253b51da021", "766df29ab90d678c7d25cb8d08f3d66f6d501a69", "bda401bc4c8b97418c83c4a1f06774b04713dd3d", "3967126afbca6a722d7257cd671fe5e4979358a5", "08e6f96da8e44d6529d29fb2087f5bbf5684404d", "5227d19882c018b745cf542275532ba5675fd251", "9815afac78e7f349a07353eabef66ca935553686", "c96d772cc44965adbff0e81587edb396bd4fedd6", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "a05548af9f54a7cd57a5c3f2d51b9e76f559f04a", "d9a478d61f91031e0e6d208b8311a0d33d56527f", "85e44851b35dfafe1f6f9fcf7e99c8c9e9a9ff1f", "7c31e77aff3a990754e70df271fa66260c1e8272", "1b54f6f1d93b0a409a6c58e8445a471be9c80603", "23dadf25f3efacbc9c66f69093d656ad5b003529", "25bddd2cebba683dc4154a289a739e0a84b5d0b0", "1078fee9fa40fd09e4898edc92cd8ae59fe01bb2", "c4fc46536c2bf8b95425656a081b861c43f0a545", "32f2b7c18f4c8cb924a3c844de009ac75e6341b2", "5638d32bf892532ffe3f2e6f20ed5da5780f2c1a" ], "paperAbstract": "Expected increase in cellular demand has pushed recent interest in picocell networks which have reduced cell sizes (100-200m or less). For ease of deployment of such networks, a wireless backhaul network is highly desired. Since RF-based technologies are unlikely to provide the desired multi-gigabit data rates, we motivate and explore use of free space optics (FSO) for picocell backhaul. In particular, we present a novel network architecture based on steerable links and sufficiently many robust short-range links, to help circumvent the key challenge of outdoor effects in reliable operation of outdoor FSO links. Our architecture is motivated by the fact that, due to the high density of picocells, many short-range links will occur naturally in a picocell backhaul. Moreover, use of steerable FSO links facilitates networks with sufficient redundancy while using only a small number of interfaces per node. We address the key problems that arise in the context of such a backhaul architecture, viz., an FSO link design with desired characteristics, and related network design and management problems. We develop and evaluate a robust 100m FSO link prototype, and simulate the proposed architecture in many metro US cities while show its viability via evaluation of key performance metrics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3129239" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6fa91727e4e80a6771809eeb65e07bf980aa4e95", "sources": [ "DBLP" ], "title": "FSONet: A Wireless Backhaul for Multi-Gigabit Picocells Using Steerable Free Space Optics", "venue": "MobiCom", "year": 2017 }, "6fe2d1a86b3b89585f008c364d6f42de1a495939": { "authors": [ { "ids": [ "1747359" ], "name": "G\u00e1bor N\u00e9meth" }, { "ids": [ "7658422" ], "name": "Daniel Gehberger" }, { "ids": [ "2869045" ], "name": "P\u00e9ter M\u00e1tray" } ], "doi": "", "doiUrl": "", "entities": [ "Data access", "Distributed shared memory", "Locality of reference", "Shared memory" ], "id": "6fe2d1a86b3b89585f008c364d6f42de1a495939", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "87c1d1434f2c93d3a0cd6b0812e7cfd04ee55731", "daf0cd0076b388712ea12ec4105572997fc50cdf", "66f25a8b5b0bf2eceb3490156a9a6aa971c8d8b8", "0b93379135c16cbf8dea68e05a7962d19924953a", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "c41b26e105fce93ff229f7c8677e480a191336bf", "8f369c8e284e27ff73591209cc69a62e8252ff02", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "29a1148d75878671dc3663bf480e33d7bd91597d", "0a4110fda21f0de29824ead1df591d2c5e1da8d0" ], "paperAbstract": "Latency-sensitive applications like virtualized telecom and industrial IoT systems require a service for ultrafast state externalization to become cloud-native. In this paper we propose a distributed shared memory system, called DAL, which achieves the lowest possible latency by transparently co-locating individual data items with applications working on them. Upon changes in data access patterns, the system automatically adapts data locations to keep the number of remote operations at a minimum. By avoiding the costs of network transport and using shared memory communication, the system can achieve 1 \u03bcs data access latency. We envision DAL as a platform component which enables latency-sensitive applications to take advantage of the cloud.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/nemeth", "https://www.usenix.org/sites/default/files/conference/protected-files/hotcloud17_slides_nemeth.pdf", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-nemeth.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/6fe2/d1a86b3b89585f008c364d6f42de1a495939.pdf", "s2Url": "https://semanticscholar.org/paper/6fe2d1a86b3b89585f008c364d6f42de1a495939", "sources": [ "DBLP" ], "title": "DAL: A Locality-Optimizing Distributed Shared Memory System", "venue": "HotCloud", "year": 2017 }, "6fe5102e3998391400875526b3bfa9c448696818": { "authors": [ { "ids": [ "3045749" ], "name": "Zhou Fang" }, { "ids": [ "2264850" ], "name": "Mulong Luo" }, { "ids": [ "1702254" ], "name": "Mani B. Srivastava" }, { "ids": [ "1728598" ], "name": "Rajesh K. Gupta" } ], "doi": "10.1109/CLOUD.2017.28", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.28", "entities": [ "Attribute\u2013value pair", "Data store", "Finite-state machine", "Key-value database", "Multi-master replication", "Replication (computing)", "Router (computing)", "Speculative execution", "Testbed", "Throughput" ], "id": "6fe5102e3998391400875526b3bfa9c448696818", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "155-162", "journalVolume": "", "outCitations": [ "223b9e0e1bf2d696458ca0fb7aabb1bb0ea0b639", "1d084659435346b75c85df283369d265db5488e1", "8080073e2f0f8977ccacca10cacd131dfdedbcc9", "155ca30ef360d66af571eee47c7f60f300e154db", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "976a757a83f2c689978dd3e5018979909809dc4f", "4af63ed343df388b6353b6fc77c7137d27822bf4", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "b129f84262024128ee64300ab257744b0b5ed8fb", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "1efbe52fe691584c5eb82cfb9d0380e7a6170862", "02b1103e592fa6bf0499e27f1519692441fad557", "8c8e927057b40bc1dc70b222ccb0c20f8703f617", "3d98d3545f2f952654189ed4f8a127be1b18ae71", "0a974f9a517409cf80c1f38e5d805c2c3da35f6a", "6de80f14aa1aa717e3eafd73c269c50b2a7de390", "af8b04305b92127b468a610b591b07f7897b2446", "42142c121b2dbe48d55e81c2ce198a5639645030", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "010cf83f39ebbbd42080e1491884ac6e9fe4d9aa", "00c181b8b64e824fbe0172339f1e4560b557fab5", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "3dd4f937b4c9922a5c0c4027519c949ebbc7e98b", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "9aa0d7253574e50fe3a190ccd924433f048997dd", "9cd9321b82d573447f08d84e9a8ca31c46fd6b8e" ], "paperAbstract": "We present Timestamp Order Preserving (TOP), a replicated state machine (RSM) protocol that exploits the synchrony of networks to provide high performance. TOP uses physical timestamp of synchronized clock as a consistent total order to achieve consensus. It keeps estimating the bounds of network latency and offset of synchronized clock to deduce the commit time for each operation. It adopts speculative processing and reconciliation techniques to improve performance. To demonstrate its advantages, we implement a key-value data store that uses TOP for data replication. Through evaluations in a geo-deployed testbed, by comparing it with Primary-Copy and Quorum-Replication protocols, we demonstrate that TOP has a similar commit latency with a higher sustainable throughput. In addition, it processes operations in the order of submission timestamp, which provides a stricter form of consistency.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/6fe5102e3998391400875526b3bfa9c448696818", "sources": [ "DBLP" ], "title": "Exploiting Synchrony in Replicated State Machines", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "7016d0244f052049e56a853b204a8ae71ddebb91": { "authors": [ { "ids": [ "34774340" ], "name": "Fan Yao" }, { "ids": [ "2169805" ], "name": "Jingxin Wu" }, { "ids": [ "1734723" ], "name": "Suresh Subramaniam" }, { "ids": [ "2836326" ], "name": "Guru Venkataramani" } ], "doi": "10.1109/CLOUD.2017.30", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.30", "entities": [ "Experiment", "Jumpstart Our Business Startups Act", "Load profile", "Low-power broadcasting", "Naivety", "Quality of service", "Run time (program lifecycle phase)", "Server (computing)", "Server farm", "Simulation", "Testbed", "Timer", "WAsP" ], "id": "7016d0244f052049e56a853b204a8ae71ddebb91", "inCitations": [ "78698dd7ad99ad6808df8c3023617f3f7af363b5" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "171-178", "journalVolume": "", "outCitations": [ "7317dcd52ea4dee743ca377dc0497397b0df2d15", "47db5a1f12720c47d24b3748801cac0ca61a8e92", "841cebeab490ad455df3e7f7bf2cdff0076e59ca", "7c06a1f6325a6617ba3e446b56c40ee437923a25", "4f51ceb8beed738d5d0244af77053d5d36a553df", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "8dfbd9c224f98f9de1b026b384f5f24c4800dd9b", "5185ac4910e030bc37ce057492604fb78427979a", "c1c71d0b6c0f2705e0e407f6823c928f83d67f73", "287634bfbcc597ce27632f4045c8a5a563d2a086", "19ab850e54aedce25233c73e0143eaf7f50813b9", "7692addeac2ffdcec2aa342cc8faa19221a8502d", "49c7b11728fd191db440a77776eda6b505904c27", "3000e77ed7282d9fb27216f3e862a3769119d89e", "45ee540d3b9b16ed9b5ad6ee034f3779b9561a73", "5db890a9a72c1b047e1bb0be071e888cb17cc7d7", "2f3ab5d53d6c0e2f0395ea3dca096267c332ebdc", "c7849d421956b63577325a38da2381875c428de0", "33da06260d2c420f568793aab04d4677d220e791", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "3b7eb28c6f5018e7254fc39abcb22e23f4e89d92", "78e009cc05a6a832106d5ca6802ce56bef6b247f" ], "paperAbstract": "With the growing energy demands from server farms, it becomes necessary to understand the tradeoffs between energy consumption and application performance. Typically, server farms are provisioned for peak load even when they are mostly operating at low utilization levels. This results in wasteful energy consumption. At the same time, application workloads have Quality of Service (QoS) constraints that need to be satisfied. Optimizing server farm energy consumption with QoS constraints is a challenging task since the workload can have variabilities in job sizes, job arrival patterns and system utilization levels. In this paper, we present WASP, where we explore techniques that make smart use of the processor and system low-power states, and orchestrate their use with workload adaptivity for more effective energy management. We perform an extensive study of Energy-Latency tradeoffs with simulations, and evaluate WASP on a testbed with a cluster of servers. Our experiments on real systems show that WASP achieves up to 57% energy reduction over a naive policy that uses a shallow processor sleep state when there are no jobs to execute, and 39% over a delay timer based approach while maintaining the 90th percentile job service latency to be under 2x job execution time.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.30", "https://www2.seas.gwu.edu/~guruv/cloud17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7016d0244f052049e56a853b204a8ae71ddebb91", "sources": [ "DBLP" ], "title": "WASP: Workload Adaptive Energy-Latency Optimization in Server Farms Using Server Low-Power States", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "70493c68bc11efbb1ecde4d9f2c5bda9540de47b": { "authors": [ { "ids": [ "19241420" ], "name": "Sandeep M. D'Souza" }, { "ids": [ "1720679" ], "name": "Ragunathan Rajkumar" } ], "doi": "", "doiUrl": "", "entities": [ "Cloudlet", "Cyber-physical system", "Fault tolerance", "Requirement", "Scalability", "Smart city", "Span and div" ], "id": "70493c68bc11efbb1ecde4d9f2c5bda9540de47b", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "b7c15322c7b9ea2ec1126f543d064c8f0d13b21f", "4353966653d17069f2905c1a1578402503bc9d1b", "0cb7a2c3309c15993db73a2ade9eecc7b6dbb9af", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "207c7b23ab24c813ab382f702b4f01e04e76ccef", "1604854444aa3ae01d9c61de64fa21f615d6deba", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "23c55c3823bb9912a5e1e3918722a1f5f9011a82", "3540e345c600b2a3d40c300168182d1393cab248", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "708c4c89e1788e3d9b89cce624cdec3b1f831392", "163d8bb36e82fba6bc1ff53e0987e19f14cc9887", "83684cc2fddbe64f8902d1ee5d5112bf95eaeffe", "4bef7f35800919456555506ed112802c5680b4cd", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "6beef89c6d38aaa26f1ddabfb0ad54d621094d6f", "a7ada1bdaf9bec65338a4775e7328ead2b876863", "ae75b562b7817c67e3f741b892dcfffd7d7b21cc", "d12d1289d2384c2ce642f01855637b9f0519e189", "9b1485630ffaaa543acff16741343437cdaae08a", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0d8301b36e9014763bf52fea8bcc6aec283caa0c", "4e6787c38064b2c747e7cebc47728de9a97e01e2", "d2525961c9cb501036553a285323d520abc37f1b", "207ea0115bf4388d11f0ab4ddbfd9fd00de5e8d1" ], "paperAbstract": "Emerging Cyber-Physical Systems (CPS) such as connected vehicles and smart cities span large geographical areas. These systems are increasingly distributed and interconnected. Hence, a hierarchy of cloudlet and cloud deployments will be key to enable scaling, while simultaneously hosting the intelligence behind these systems. Given that CPS applications are often safety-critical, existing techniques focus on reducing latency to provide real-time performance. While low latency is useful, a shared and precise notion of time is key to enabling coordinated action in distributed CPS. In this position paper, we argue for a global Quality of Time (QoT)-based architecture, centered around a shared virtualized notion of time, based on the timeline abstraction [1]. Our architecture allows applications to specify their QoT requirements, while exposing timing uncertainty to the application. The timeline abstraction with the associated knowledge of QoT enables scalable geo-distributed coordination in CPS, while providing avenues for fault tolerance and graceful degradation in the face of adversity.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/dsouza", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-dsouza.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7049/3c68bc11efbb1ecde4d9f2c5bda9540de47b.pdf", "s2Url": "https://semanticscholar.org/paper/70493c68bc11efbb1ecde4d9f2c5bda9540de47b", "sources": [ "DBLP" ], "title": "Time-based Coordination in Geo-Distributed Cyber-Physical Systems", "venue": "HotCloud", "year": 2017 }, "705d9d72015edb7d9aec9ff5a0742fdf45fbe6d8": { "authors": [ { "ids": [ "34747583" ], "name": "Peter Ohmann" }, { "ids": [ "40456419" ], "name": "Alexander Brooks" }, { "ids": [ "1806462" ], "name": "Loris D'Antoni" }, { "ids": [ "1697640" ], "name": "Ben Liblit" } ], "doi": "10.1145/3062341.3062368", "doiUrl": "https://doi.org/10.1145/3062341.3062368", "entities": [ "Control flow", "Debugging", "Formal language", "Polynomial", "Regular language", "Scalability", "Time complexity" ], "id": "705d9d72015edb7d9aec9ff5a0742fdf45fbe6d8", "inCitations": [ "ae219ae071fb77bdbd252437a4684816fbea2b36", "0dc147319a8d01b48718b31ed6afe71113702bef", "ab5a3ee384c0dfebe9c6d6ec946f402afb3f3474" ], "journalName": "", "journalPages": "390-405", "journalVolume": "", "outCitations": [ "03e8d39bf0f25ab6ff992b0990a1a36bfffd4662", "0dc147319a8d01b48718b31ed6afe71113702bef", "45c26fb5050664ea07d53932f8c0fefa7ed3c061", "223b72e228f787edda258dcdaaf62ae815a9cba6", "378a7e94ff4f4400e82212c448c2acb463a5ef29", "3f88107149efe6956ff3cbf45a1e708e12fc6dce", "275855235c5aa73f9c34ffa4eed00dfb5cea764e", "97bfbeece4bf1dfcf83dcdef94718bd9c78aa8c2", "58b20e850dd6cff91dcd0b7c0fb9a8c5da4fab38", "ee1eb323416c00e12b4e995394aaf09a08811061", "8960cbad872433aa1c8ef0b2549a573b04faae2e", "2194c3460ab71f3826db00b045b2ae590c753319", "18ba3b150bd90bde44fcee6166c08bb069fd5ae2", "5704aba27fd5fdaab8ddf5d7639b4e3209fa04d6", "65d8e0ad805f92d8d9d646f4eb640b1db0274088", "3af74c53a26a468b3be83990be378d529242b874", "1d9f8c792c1cf91a9a66d168cd07e09d2297d602", "2d78d4e22f8c87f1e8f2202aea97e5795e0ab216", "f3389cf47aaf2d3951439265b6458f9078a95c99", "397fd386a0fd0ac00353d35f8b4c9a3ee333111f", "063c698f518758abd140f8424cd7f9497ce7feba", "9e7da2bb2ba456b2bdb423d62d25a0392b972e7e", "7eaedbcd1010f11929104c8e3d63de2122ef04a5", "86d2bd0db494341558683b70714969c1ff75028d", "2cc9b6f61e9575e20ca49dbd04cdd47497119694", "d48631279c3a28bb8b1ea5f3b211b3f73b7e44c4", "1e4874f3443d191a4f7f3ba63a04a264bd00e364", "098d3e1fedbfe870d886c3c56daef13dce2965ce", "1f08055ec22875e55fe2bd893e93a795e942922a", "67943b9ea82c304c728cbd16c2daeb6e54170405", "91a155166f45404af1e0f5fd5532d5cb37ef3f3b", "090203127dc92ba26f7e6df39e0a12aa238e7a58", "9a9d91596965092ea9ee0c8f6fb8fbe088dbc1ff", "56be0a187b6bfdd1afe547cf62c8bdd081547794", "03cb9e54559d503f54b4254a15328427de91d22a", "e1dcf86c08574ff156970ffcb1ed0ea87ccf65da", "1ac60c54d8af5e754b7a2188ba34ad84584cf919", "0205b55e4fcf5710a97d5d561efa66c82e39f0f7", "6dd474a2ce9bc35c9d7518c981715bd394a9cd86", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "3dc2914ac70a61c630eb58c2a33732afdfdcf979", "468bff8de97b7380ff497eea8eec3c9621218a40" ], "paperAbstract": "Debugging is difficult. When software fails in production, debugging is even harder, as failure reports usually provide only an incomplete picture of the failing execution. We present a system that answers control-flow queries posed by developers as formal languages, indicating whether the query expresses control flow that is possible or impossible for a given failure report. We consider three separate approaches that trade off precision, expressiveness for failure constraints, and scalability. We also introduce a new subclass of regular languages, the unreliable trace languages, which are particularly suited to answering control-flow queries in polynomial time. Our system answers queries remarkably efficiently when we encode failure constraints and user queries entirely as unreliable trace languages.", "pdfUrls": [ "http://pages.cs.wisc.edu/~liblit/pldi-2017/pldi-2017.pdf", "http://pages.cs.wisc.edu/~ohmann/papers/pldi-2017.pdf", "http://doi.acm.org/10.1145/3062341.3062368", "http://pages.cs.wisc.edu/~loris/papers/pldi17recovery.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/705d9d72015edb7d9aec9ff5a0742fdf45fbe6d8", "sources": [ "DBLP" ], "title": "Control-flow recovery from partial failure reports", "venue": "PLDI", "year": 2017 }, "70745c6514748e8d3764d77abf410edd90a597ac": { "authors": [ { "ids": [ "1711264" ], "name": "Ming Chen" }, { "ids": [ "32401480" ], "name": "Dean Hildebrand" }, { "ids": [ "40033094" ], "name": "Henry Nelson" }, { "ids": [ "4884504" ], "name": "Jasmit Saluja" }, { "ids": [ "9755399" ], "name": "Ashok Sankar Harihara Subramony" }, { "ids": [ "1708491" ], "name": "Erez Zadok" } ], "doi": "10.1145/3116213", "doiUrl": "https://doi.org/10.1145/3116213", "entities": [ "Amortized analysis", "Application programming interface", "File system API", "HTTP/2", "High- and low-level", "POSIX", "Remote procedure call", "Server (computing)", "Unix", "User space", "Virtual private network" ], "id": "70745c6514748e8d3764d77abf410edd90a597ac", "inCitations": [ "247289ef2f00ba86c8e3a28cc692197d5741ae6e", "c7e2c4bea500ea7926a50973d861f01bb8e5e364", "09d446b184787213e45035c7e7657056373fb1b1" ], "journalName": "", "journalPages": "301-314", "journalVolume": "", "outCitations": [ "0e956dc3288b6ff2345238448236045d3033c09f", "44028c00bf3872ae06aa46f569c3b9dceebdd909", "3c693d3a4a40e6e98c0f839205c4c308d4326a8c", "f93b1ccf2a97edd055afa4e6a3e32770cbc002de", "5c6d7d5b165c1037db4434577db1f5155ef8f00e", "03eb427813552b2165e5250105e55dbfb7ef151e", "c14abc3126b27e15ca7c50f4b827cec912d00449", "97158a13a871720757114a8dcb8d8f4e104d8693", "5691b8cc2f9b76c5371125137b9410727d393fe3", "45114ae3c9263c2ad9e042d66b224e5de3e649d4", "b46cb54a87a448212af37f2594a512fec39a059e", "af3735131085f326148fa8885ec41bd637d130a2", "04b7525f514eff641e8e0e734ab96675c82c6a06", "1e9f092e114393ba786cb2002b6f1b0dabe875fe", "6d38e49cf1f121712f19805ec779905bc9507e58", "29d45feaa50b0304ab52bd5c6d0381c21c2b42bc", "0641c61c2709ea41536cf78bcc6316fb4951b5ab", "44f474a25ee7d1fccfb97bd4e64ffc7ae0df61e8", "1ea75edc65bf349f9f057213b391ab8be4cc3ab6", "84af1a7cf881369e759e53ab143fc01e66d3b8a8", "6da927dcc24e0550cfba60a4338052370c7892fb", "10fede77f843e9eb5ef1768a17543013616d9243", "627c0d36688b2252ae3ca0b5f68ce97e341d338d", "1ef607f45cb77db12e7a52e6de052a9a0ebb830b", "38a48d914e1a47e617fd5031f5a18388a2ddf4ab" ], "paperAbstract": "Modern systems use networks extensively, accessing both services and storage across local and remote networks. Latency is a key performance challenge, and packing multiple small operations into fewer large ones is an effective way to amortize that cost, especially after years of significant improvement in bandwidth but not latency. To this end, the NFSv4 protocol supports a compounding feature to combine multiple operations. Yet compounding has been underused since its conception because the synchronous POSIX file-system API issues only one (small) request at a time.\n We propose vNFS, an NFSv4.1-compliant client that exposes a vectorized high-level API and leverages NFS compound procedures to maximize performance. We designed and implemented vNFS as a user-space RPC library that supports an assortment of bulk operations on multiple files and directories. We found it easy to modify several UNIX utilities, an HTTP/2 server, and Filebench to use vNFS. We evaluated vNFS under a wide range of workloads and network latency conditions, showing that vNFS improves performance even for low-latency networks. On high-latency networks, vNFS can improve performance by as much as two orders of magnitude.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_ming_chen.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/chen", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_ming_chen.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-chen.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-chen.pdf", "http://doi.acm.org/10.1145/3116213", "http://www.fsl.cs.stonybrook.edu/docs/nfs4perf/vnfs-fast17.pdf", "http://www.fsl.cs.sunysb.edu/docs/nfs4perf/vnfs-fast17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/70745c6514748e8d3764d77abf410edd90a597ac", "sources": [ "DBLP" ], "title": "vNFS: Maximizing NFS Performance with Compounds and Vectorized I/O", "venue": "FAST", "year": 2017 }, "70799342284f353e3478254bd866cf8e28e4cafc": { "authors": [ { "ids": [ "3482258" ], "name": "Alexandra Ferreron" }, { "ids": [ "3408343" ], "name": "Radhika Jagtap" }, { "ids": [ "12772192" ], "name": "Sascha Bischoff" }, { "ids": [ "2180436" ], "name": "Roxana Rusitoru" } ], "doi": "10.1109/ISPASS.2017.7975275", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975275", "entities": [ "ARM architecture", "Basic block", "Design space exploration", "Simulation", "Thread (computing)" ], "id": "70799342284f353e3478254bd866cf8e28e4cafc", "inCitations": [ "5340e44c9c96ea3220ee12f91cef1c45c75acedf" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "109-120", "journalVolume": "", "outCitations": [ "085d5c50730c02a959980b83eae018fbe3fa0c94", "736f4a1c6eff51f8206530aca67baa95bafc5f1c", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "6a74067e23bb3a0c85f2db3b2c1cd198cebc32d7", "0b70018e5f132af24e1682a01854766bfdf08fa6", "0653e2ed9f683868cb4539eb8718551242834f6b", "beb2db2079bbac605fe37d20c530756ebb032830", "0d1458aa38de3f526552b0d3df8ec99824e11f14", "6b406760f69fc0e4b8412ea4b864c345a9540b3b", "de0a9dc7edcb717711cc743165329791d6d5d0e2", "bfbf817506fb00be3eab2a1dd372a2149012a987", "65d1532560a19daa80c95abb6a22cc73d17e3e89", "0524b5c458a3eeda6b3e70fb26ac8f9431de5f93", "947c6bf534ccd620044f77c3bd6068f633b421fb", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "009e4da527a3518c29c95970efb79733a67979fb", "8a9112daed1df3caeb8efd7e4b05e4ed05c23dbc", "79ff6d26643770fecefe08d7bf1ec504ae465bc8", "a60b7163b465993dc46d36d82d6c7f988a9b7426", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "530d782e0ad7a88a20a1087a26fecc463d8c794b" ], "paperAbstract": "Exascale computing will get mankind closer to solving important social, scientific and engineering problems. Due to high prototyping costs, High Performance Computing (HPC) system architects make use of simulation models for design space exploration and hardware-software co-design. However, as HPC systems reach exascale proportions, the cost of simulation increases, since simulators themselves are largely single-threaded. Tools for selecting representative parts of parallel applications to reduce running costs are widespread, e.g., BarrierPoint achieves this by analysing, in simulation, abstract characteristics such as basic blocks and reuse distances. However, architectures new to HPC have a limited set of tools available. In this work, we provide an independent cross-architectural evaluation on real hardware — across Intel and ARM — of the BarrierPoint methodology, when applied to parallel HPC proxy applications. We present both cases: when the methodology can be applied and when it cannot. In the former case, results show that we can predict the performance of full application execution by running shorter representative sections. In the latter case, we dive into the underlying issues and suggest improvements. We demonstrate a total simulation time reduction of up to 178x, whilst keeping the error below 2.3% for both cycles and instructions.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975275", "https://arxiv.org/pdf/1803.09584v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/70799342284f353e3478254bd866cf8e28e4cafc", "sources": [ "DBLP" ], "title": "Crossing the architectural barrier: Evaluating representative regions of parallel HPC applications", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "708f93dfc766e1f3c2624022100a3b549711b9e4": { "authors": [ { "ids": [ "2893721" ], "name": "Suhang Wang" }, { "ids": [ "1682418" ], "name": "Charu C. Aggarwal" }, { "ids": [ "38746648" ], "name": "Huan Liu" } ], "doi": "10.1145/3097983.3098001", "doiUrl": "https://doi.org/10.1145/3097983.3098001", "entities": [ "Anomaly detection", "Cluster analysis", "Computational resource", "Data point", "Ensemble learning", "Experiment", "Feature engineering", "Feature vector", "High-level programming language", "Kernel method", "Locality of reference", "Machine learning", "Overfitting", "Randomness", "Similarity measure", "Statistical classification" ], "id": "708f93dfc766e1f3c2624022100a3b549711b9e4", "inCitations": [ "7a8fd8670b928b474c4dfeb1dc1898d08d545025" ], "journalName": "", "journalPages": "485-494", "journalVolume": "", "outCitations": [ "f0951665942dcb80e795cfbe68b65d0dbda59b17", "0b35eeb1ca1fcaa24ef456242bb90320afdd1cd2", "3b2bf65ebee91249d1045709200a51d157b0176e", "427b168f490b56716f22b129ac93aba5425ea08f", "b1f06867f0aa6a637e8b21283b71cf3e73d580ae", "56486a35a2a62f5f6eba1e0d50a3c82de4f5dbdb", "8b08191e4fdf0d34a853dc72aabaf408d4e2ebd7", "5392f3f48d7b8cbc9c221ccd8bf6554a17c2761d", "41809d7fc7c41cf4d0afd5823034b5c0ac2949aa", "127316fbe268c78c519ceb23d41100e86639418a", "e50f4d3316d13841c287dcdf5479d7820d593571", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "1ffcb27536ab5436e6d753919ab27ac1a44b4b69", "8bd01b1c44c7ff8b1ccf5987fa85fedef49d1daa", "fc8cda36a0972e7de1ac3a7bcb81dc32da79bee4", "0e93d05017b495f205fbf5d27188bba7be9be5e4", "a675cef5b434619a71ec2a30ac63d108f3d98a63", "013cd20c0eaffb9cab80875a43086e0c3224fe20", "77cf1b068da9adf55ae84115f7206747368c4198", "205a4bc5e53e8db52190c72e581e709139acf57b", "10e1fb949e10d5fe99d5f1b32bb48d625149bce8", "141e6c1dd532504611266d08458dbe2a0dbb4e98", "2ba4222ae97c71dad57c2fa6a55c9f4d9eaad971", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "3fdb0e91027e0d3cfc39220db021b838f68b90e8", "1696cbf7da0ee845c50591843993e6605adec177", "284db8df66ef94594ee831ff2b36f546e023953a" ], "paperAbstract": "Feature engineering has found increasing interest in recent years because of its ability to improve the effectiveness of various machine learning models. Although tailored feature engineering methods have been designed for various domains, there are few that simulate the consistent effectiveness of kernel methods. At the core, the success of kernel methods is achieved by using similarity functions that emphasize local variations in similarity. Unfortunately, this ability comes at the price of the high level of computational resources required and the inflexibility of the representation as it only provides the similarity of two data points instead of vector representations of each data point; while the vector representations can be readily used as input to facilitate various models for different tasks. Furthermore, kernel methods are also highly susceptible to overfitting and noise and it cannot capture the variety of data locality. In this paper, we first analyze the inner working and weaknesses of kernel method, which serves as guidance for designing feature engineering. With the guidance, we explore the use of randomized methods for feature engineering by capturing multi-granular locality of data. This approach has the merit of being time and space efficient for feature construction. Furthermore, the approach is resistant to overfitting and noise because the randomized approach naturally enables fast and robust ensemble methods. Extensive experiments on a number of real world datasets are conducted to show the effectiveness of the approach for various tasks such as clustering, classification and outlier detection.", "pdfUrls": [ "http://www.public.asu.edu/~swang187/publications/RandLocal.pdf", "http://doi.acm.org/10.1145/3097983.3098001" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/708f93dfc766e1f3c2624022100a3b549711b9e4", "sources": [ "DBLP" ], "title": "Randomized Feature Engineering as a Fast and Accurate Alternative to Kernel Methods", "venue": "KDD", "year": 2017 }, "70c4f0403d80427e10c7e7167f814ec0bb12d18f": { "authors": [ { "ids": [ "2297493" ], "name": "Filipe Manco" }, { "ids": [ "32056813" ], "name": "Costin Lupu" }, { "ids": [ "40279556" ], "name": "Florian Schmidt" }, { "ids": [ "8627779" ], "name": "Jose Mendes" }, { "ids": [ "2388918" ], "name": "Simon Kuenzer" }, { "ids": [ "26961754" ], "name": "Sumit Sati" }, { "ids": [ "2613133" ], "name": "Kenichi Yasukata" }, { "ids": [ "1758591" ], "name": "Costin Raiciu" }, { "ids": [ "2963590" ], "name": "Felipe Huici" } ], "doi": "10.1145/3132747.3132763", "doiUrl": "https://doi.org/10.1145/3132747.3132763", "entities": [ "Central processing unit", "Centralisation", "Control plane", "Docker", "Hypervisor", "Interaction", "Linux", "Linux", "Unikernel", "Virtual machine", "z/VM" ], "id": "70c4f0403d80427e10c7e7167f814ec0bb12d18f", "inCitations": [ "4df9fbffb5580023cae2ae22970606d7ae9f76f4", "5a25bb5c4809d22536ce0ec3ac32615ed125ee5e" ], "journalName": "", "journalPages": "218-233", "journalVolume": "", "outCitations": [ "0e851f49432767888b6ef4421beb268b9f2fc057", "95de86d0a9c82966c0edb8ad322a1bae6dec7a78", "a93cfc7d891492c9bfdfe129b8077b3a2f41fe08", "434553e2a9b6048f1eb7780ec2cd828dc2644013", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "6895a7a7ef3a9c8e77b5f6ee8c64b49d2dee61cc", "3574657705475722b6c398c266805f758268778b", "60212872aa40b660de117ba751542988bcfc406d", "59d45d685e35f5a84768c029ea09b9c48765251e", "86c78adba2c8791d3d3e70c927504b2c7e259528", "16d95a30bf189a0fe37af03eee9cc8af49709cd2", "a659692ed5031c5410668d644a60d7806bb2efbb", "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "043029ff68d0449eacae8a67fc62ed4ee03215a2", "6d37e31161d5f7ac35a6598e91270344983236eb", "5bddb52a9def1c1330e8139b8496fbb8bb8c5937", "3f6f619fea4e9241d9fa5d39be4e985757e571de", "048d6cee0d6764932f886d93e641235e7e7a60fa", "9747efb725620e0767597b923f174057f7d9e82b", "78ce88433728d9c27828353bc9aecef6d884ec13", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "4650259fb4aadb376fd5994f9ab9dd07a4f83511", "2f48f56e14f105441433ef15e47a3d2af8affa3e" ], "paperAbstract": "Containers are in great demand because they are lightweight when compared to virtual machines. On the downside, containers offer weaker isolation than VMs, to the point where people run containers in virtual machines to achieve proper isolation. In this paper, we examine whether there is indeed a strict tradeoff between isolation (VMs) and efficiency (containers). We find that VMs can be as nimble as containers, as long as they are small and the toolstack is fast enough.\n We achieve lightweight VMs by using unikernels for specialized applications and with Tinyx, a tool that enables creating tailor-made, trimmed-down Linux virtual machines. By themselves, lightweight virtual machines are not enough to ensure good performance since the virtualization control plane (the toolstack) becomes the performance bottleneck. We present LightVM, a new virtualization solution based on Xen that is optimized to offer fast boot-times regardless of the number of active VMs. LightVM features a complete redesign of Xen's control plane, transforming its centralized operation to a distributed one where interactions with the hypervisor are reduced to a minimum. LightVM can boot a VM in 2.3ms, comparable to fork/exec on Linux (1ms), and two orders of magnitude faster than Docker. LightVM can pack thousands of LightVM guests on modest hardware with memory and CPU usage comparable to that of processes.", "pdfUrls": [ "http://cnp.neclab.eu/projects/lightvm/lightvm.pdf", "http://doi.acm.org/10.1145/3132747.3132763" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/70c4f0403d80427e10c7e7167f814ec0bb12d18f", "sources": [ "DBLP" ], "title": "My VM is Lighter (and Safer) than your Container", "venue": "SOSP", "year": 2017 }, "70d5eae0007dd8f116f53850918f3e6eca9ef339": { "authors": [ { "ids": [ "15737370" ], "name": "Jake Corina" }, { "ids": [ "2275943" ], "name": "Aravind Machiry" }, { "ids": [ "3425110" ], "name": "Christopher Salls" }, { "ids": [ "3260361" ], "name": "Yan Shoshitaishvili" }, { "ids": [ "2710468" ], "name": "Shuang Hao" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "10.1145/3133956.3134069", "doiUrl": "https://doi.org/10.1145/3133956.3134069", "entities": [ "Android", "Arbitrary code execution", "Argument (complex analysis)", "Bluetooth", "Data structure", "Device driver", "Digital camera", "Hard disk drive", "Mobile device", "Smartphone", "Software bug", "Sparse matrix", "Static program analysis", "Unix", "Unix-like", "User space", "Vulnerability (computing)" ], "id": "70d5eae0007dd8f116f53850918f3e6eca9ef339", "inCitations": [], "journalName": "", "journalPages": "2123-2138", "journalVolume": "", "outCitations": [ "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "f5b5973ff8bf9fd17527ea2b084db0266502a875", "0ab393affe9d674ef790be14fdfade368f3e5989", "1096be3f6700b0d33746781b650e51155f36ef29", "3cfca42b42a22a23d5f13d27b9bf3e8a1dec98bb", "02838cb6982e67992ae54fa616162b16ce5110c6", "341d33498388711a5303c5f51433b3d5739a21d2", "37d11b005125bacba9939a34d35b22ab9c987aa0", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "ad80cc9e5caf510752d9ab74edf20165c67657bd", "d78d3b199830d6c8b86610dc2921fcf139225217", "6ea63d09993b9a268689790ea8d25bc36345497e", "51a10bc2d3966dfcf82060e9c94fa7436e98023e", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "a21921eb0c5600562c8dad8e2bc40fff1ec8906b", "1f7e5e582663868ed2f6763f98066ca278177a61", "9090142233801801411a28b30c653aae5408182a", "eec80d671ce951843f3791fd4bc36d9c0b9295eb", "27145fe45450babe306513efb97ae0ec8590c246", "b8719183f3579e6f0bdf2d98ee500097a28cb9cf", "bdeaac660facf421f3e29ec2595bd14a1ed88112", "b1922eec5cf87809a697814baa82144593f0a6ee", "0616cc5097e0706a5f219283acddeae1ba27a57e", "479949999394d7db736d7088a746e5159bee5894", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "9ccdb55f5555f77799eb9673d7f632bc79998028", "a80cf182c22c5e11d523c8af36732576a1d3d8d4", "b444e56f31694b3ba4b14f91b4b3d1a8ebc19830", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "08e2d172e510ab14713b28cc71a37e7c78cc7b13", "0a5eacf219767f05c35f509b3e37c1ae75ae107b" ], "paperAbstract": "Device drivers are an essential part in modern Unix-like systems to handle operations on physical devices, from hard disks and printers to digital cameras and Bluetooth speakers. The surge of new hardware, particularly on mobile devices, introduces an explosive growth of device drivers in system kernels. Many such drivers are provided by third-party developers, which are susceptible to security vulnerabilities and lack proper vetting. Unfortunately, the complex input data structures for device drivers render traditional analysis tools, such as fuzz testing, less effective, and so far, research on kernel driver security is comparatively sparse. In this paper, we present DIFUZE, an interface-aware fuzzing tool to automatically generate valid inputs and trigger the execution of the kernel drivers. We leverage static analysis to compose correctly-structured input in the userspace to explore kernel drivers. DIFUZE is fully automatic, ranging from identifying driver handlers, to mapping to device file names, to constructing complex argument instances. We evaluate our approach on seven modern Android smartphones. The results show that DIFUZE can effectively identify kernel driver bugs, and reports 32 previously unknown vulnerabilities, including flaws that lead to arbitrary code execution.", "pdfUrls": [ "https://csaw.engineering.nyu.edu/application/files/4215/0825/7193/CSAW17_paper_51.pdf", "http://doi.acm.org/10.1145/3133956.3134069", "http://sefcom.asu.edu/publications/difuze-interface-aware-fuzzing-for-kernel-drivers-ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/70d5eae0007dd8f116f53850918f3e6eca9ef339", "sources": [ "DBLP" ], "title": "DIFUZE: Interface Aware Fuzzing for Kernel Drivers", "venue": "CCS", "year": 2017 }, "711745113c95bb9ac69d9895da968d3d6f49d2b3": { "authors": [ { "ids": [ "3204268" ], "name": "Robayet Nasim" }, { "ids": [ "17767400" ], "name": "Andreas J. Kassier" } ], "doi": "", "doiUrl": "", "entities": [ "Heuristic", "Linear programming", "Online optimization", "Optimization problem", "Program optimization", "Resource contention", "Semiconductor consolidation", "Tabu search", "Thunderbolt (interface)", "Virtual machine" ], "id": "711745113c95bb9ac69d9895da968d3d6f49d2b3", "inCitations": [ "ef44c582066f5f6b601aab4be7db541859de3ba1" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "170-180", "journalVolume": "", "outCitations": [ "3bb902100289a7af154d18372d08aca7f1ce45e0", "025b61d89518562f0d42b58de20ffbf6398c4c18", "2214854b62c6f0816bfe042bdf28d91df57440d9", "c7ea13ddd27c5a5a0280ea222df8c584d9e3c434", "c6f5facdf3ba848e663fb5d53299c8dcd61f8891", "814155d944d8e42902b1a9855bed3d02668d1076", "14a2c1c8657a5ea7b9efe7f5f09353aedaa37fdd", "6d37e31161d5f7ac35a6598e91270344983236eb", "31edc8557cfc66925e6f1bdfaf2ad82380827e1e", "b80d4589cfb3a54dd42cec78f71e48455f0dc03e", "708945b02f687f4487a246f4127faa8d6d7fee9f", "e78f7f604b30e6c0e82975f1ffa83dd5e9d16e4f", "7741cd1a7a7548a9f355e3e194fb5b68391ed76a", "6bc873b69b320195911a61073e689c564d91ba7b", "f71ec13a4471b57380bddeff035acac749a35ff8", "7a627c9b10826d0978c7584317e4911982ccea95", "08f2a999098930ed8d473ee01e5338a706a7920e", "2f4cfcab9c2f5082fc13110cc26e3fa1bac439dd", "2b7febcfdec074620c26d373f877d04e7fb24443" ], "paperAbstract": "In virtualized datacenters (vDCs), dynamic consolidation of virtual machines (VMs) is used as one of the most common techniques to achieve both energy-and resource-utilization efficiency. Live migrations of VMs are used for dynamic consolidation but due to dynamic resource demand variation of VMs may lead to frequent and non-optimal migrations. Assuming deterministic workload of the VMs may ensure the most energy/resource-efficient VM allocations but eventually may lead to significant resource contention or under-utilization if the workload varies significantly over time. On the other hand, adopting a conservative approach by allocating VMs depending on their peak demand may lead to low utilization, if the peak occurs infrequently or for a short period of time. Therefore, in this work we design a robust VM migration scheme that strikes a balance between protection for resource contention and additional energy costs due to powering on more servers while considering uncertainties on VMs resource demands. We use the theory of Gamma robustness and derive a robust Mixed Integer Linear programming (MILP) formulation. Due to the complexity, the problem is hard to solve for online optimization and we propose a novel heuristic based on Tabu Search. Using several scenarios, we show that that the proposed heuristic can achieve near optimal solution qualities in a short time and scales well with the instance sizes. Moreover, we quantitatively analyze the trade-off between energy cost versus protection level and robustness.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101134" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/711745113c95bb9ac69d9895da968d3d6f49d2b3", "sources": [ "DBLP" ], "title": "A Robust Tabu Search Heuristic for VM Consolidation under Demand Uncertainty in Virtualized Datacenters", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "7154a4ba30ed4b8f2f97bd91b5e9809df0fa2ad6": { "authors": [ { "ids": [ "3143218" ], "name": "Kishori M. Konwar" }, { "ids": [ "1740804" ], "name": "N. Prakash" }, { "ids": [ "1739044" ], "name": "Nancy A. Lynch" }, { "ids": [ "1697407" ], "name": "Muriel M\u00e9dard" } ], "doi": "10.1145/3087801.3087832", "doiUrl": "https://doi.org/10.1145/3087801.3087832", "entities": [ "Algorithm", "Atomicity (database systems)", "Client (computing)", "Clustered file system", "Computer data storage", "Concurrent computing", "Edge computing", "Erasure code", "Failure rate", "Fault tolerance", "International Standard Book Number", "Liveness", "Message passing", "PODC", "Persistence (computer science)", "Reed\u2013Solomon error correction", "Server (computing)" ], "id": "7154a4ba30ed4b8f2f97bd91b5e9809df0fa2ad6", "inCitations": [ "42773206f25cf6f2696a83836775aab39559335f", "9b96fd02ed79512eb0efbdf6654c32d83745c3e4" ], "journalName": "", "journalPages": "63-72", "journalVolume": "", "outCitations": [ "2dfa0cb9e6c5b0a35438d6d42bc8847471070c92", "6f69d0cf92b071c7fa0f2bd9abaeb5937e5b12ae", "4cead48e2eac91560105871b78268e3164eb382b", "6ee44bf140a6d4bcccaeea0340592eae6dffc63e", "136f8675c0af31185fa69c3919f7c2307826fecb", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "07159000917af52dae4ecb9612dba682d0661c03", "0fc8c684ea6d28c828da984b1a931d43afe20756", "a92499e9da6d97ac2afd83f87fccedefe80259a3", "58b628792d3eb22a034a871ed3cf373afe591928", "1407182795058cbd556d1ce7751db777f129fd3e", "8c367ede3dc0ccb855d88d0d1f32024497a81551", "00996f92050947b57e10c4082a5ba82c368a78ed", "32de355572d2a1317c616a3b55b084ea96389354", "0aa4b9a1ff7970b3ee600e4391d689aca8fa723d", "725733bee83a8a07f60157a3d83c6b85de7c5159", "3d88c20644a16100cb6d0a16226b53a966d12418", "0e69ee351252fd09a50e6baae53b4776009825ae", "207ea0115bf4388d11f0ab4ddbfd9fd00de5e8d1", "0d77bb6ef2bb6d165f58bf0251bf3d7cf29f1491", "0ec668e12ff9f7c5c968bac1b4c441320d190fa6", "1748a4950413dbeab59c139b16cfb8ec99f21ff4", "19f7267e097f942087bbb0ec500bcc2959e24dae", "015222399f281321db43114b86a5b8b8dd38cf83", "4d14bade1c16331bf27c5da45c747cd1869faab5", "7015b8c40955c5ee1ca23deb07c5f8600e381e85" ], "paperAbstract": "Motivated by emerging applications to the edge computing paradigm, we introduce a two-layer erasure-coded fault-tolerant distributed storage system offering atomic access for read and write operations. In edge computing, clients interact with an edge-layer of servers that is geographically near; the edge-layer in turn interacts with a back-end layer of servers. The edge-layer provides low latency access and temporary storage for client operations, and uses the back-end layer for persistent storage. Our algorithm, termed Layered Data Storage (LDS) algorithm, offers several features suitable for edge-computing systems, works under asynchronous message-passing environments, supports multiple readers and writers, and can tolerate f1 < n1/2 and f2 < n2/3 crash failures in the two layers having n1 and n2 servers, respectively. We use a class of erasure codes known as regenerating codes for storage of data in the back-end layer. The choice of regenerating codes, instead of popular choices like Reed-Solomon codes, not only optimizes the cost of back-end storage, but also helps in optimizing communication cost of read operations, when the value needs to be recreated all the way from the back-end. The two-layer architecture permits a modular implementation of atomicity and erasure-code protocols; the implementation of erasurecodes is mostly limited to interaction between the two layers. We prove liveness and atomicity of LDS, and also compute performance costs associated with read and write operations. In a system with n1 = \u0398(n2), f1 = \u0398(n1), f2 = \u0398(n2), the write and read costs are respectively given by \u0398(n1) and \u0398(1) + n1I(\u03b4 > 0). Here \u03b4 is a parameter closely related to the number of write operations that are concurrent with the read operation, and I(\u03b4 > 0) is 1 if \u03b4 > 0, and 0 if \u03b4 = 0. The cost of persistent storage in the back-end layer is \u0398(1). The impact of temporary storage is minimally felt in a multiobject system running N independent instances of LDS, where only a small fraction of the objects undergo concurrent accesses at any point during the execution. For the multi-object system, we identify a condition on the rate of concurrent writes in the system such that Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than ACM must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org. PODC \u201917, July 25-27, 2017, Washington, DC, USA \u00a9 2017 Association for Computing Machinery. ACM ISBN 978-1-4503-4992-5/17/07. . . $15.00 https://doi.org/10.1145/3087801.3087832 the overall storage cost is dominated by that of persistent storage in the back-end layer, and is given by \u0398(N ).", "pdfUrls": [ "http://arxiv.org/abs/1703.01286", "https://arxiv.org/pdf/1703.01286v1.pdf", "https://arxiv.org/pdf/1703.01286v2.pdf", "http://groups.csail.mit.edu/tds/papers/Konwar/PODC_final_v7_pnm.pdf", "http://doi.acm.org/10.1145/3087801.3087832" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7154/a4ba30ed4b8f2f97bd91b5e9809df0fa2ad6.pdf", "s2Url": "https://semanticscholar.org/paper/7154a4ba30ed4b8f2f97bd91b5e9809df0fa2ad6", "sources": [ "DBLP" ], "title": "A Layered Architecture for Erasure-Coded Consistent Distributed Storage", "venue": "PODC", "year": 2017 }, "715f72c533b18495a2f8a23811f1e0b6034e42c6": { "authors": [ { "ids": [ "1859608" ], "name": "Anshuj Garg" }, { "ids": [ "2484837" ], "name": "Debadatta Mishra" }, { "ids": [ "1749860" ], "name": "Purushottam Kulkarni" } ], "doi": "10.1145/3050748.3050760", "doiUrl": "https://doi.org/10.1145/3050748.3050760", "entities": [ "Central processing unit", "Data deduplication", "Graphics processing unit", "Kernel same-page merging", "Linux", "Refresh rate", "Virtual machine" ], "id": "715f72c533b18495a2f8a23811f1e0b6034e42c6", "inCitations": [], "journalName": "", "journalPages": "44-59", "journalVolume": "", "outCitations": [ "3eecc905a579d435068bf7df7280abf5f606ba33", "04704080ae469d24797ee6369f2e2a72ffcca828", "b5ff5af19f737db653f5e14690967166f2a4beea", "a205801dbd56f93f3b98fd6d9a535ed1961806fa", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "c934afd425f7c0aeacf75c7f0aec71d70e1d0e79", "5b33afa07e0c428724073de51200c8b8051825c1", "43cf61960c85339deeeeeb2b75cdf9595565afa8", "045729ec838ecc50be166fe4511506ac4a08226d", "7d3fe94b51306badb8ae65ec11fc89a1e4782e02", "0be302437cec82b9200d61d13d3125e62a8ef499", "5e59d68dd32830d465ae2d739a8f7f5fba52f8e5", "07042865b10297ca4fc9164829d6330db2f60b4c", "5fe4eb1749a823469950456a123c77530e33ad73", "45472bef11491245ad51dde6963e3cc40c5f3b79", "86337138bb6dfabef8e1d45ec3c4e30d64c3ce36", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "0967839ec5595b6379df1d4d494d8535dffc6575", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "2ee01ab9aca4163d391bd29c2123d9be44b0e986", "7d729347a96424bbbd1a114b78e65ea391be3160", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "3bfe2c5558224de412c3dafcefe2b80a5fc78cf5", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "1506e49a71ffcc4d201928dbc76a881608c9c6c4", "d985c93917cd0a145451ec2c02c9e25d988ac368" ], "paperAbstract": "Content based page sharing techniques improve memory efficiency in virtualized systems by identifying and merging identical pages. Kernel Same-page Merging (KSM), a Linux kernel utility for page sharing, sequentially scans memory pages of virtual machines to deduplicate pages. Sequential scanning of pages has several undesirable side effects---wasted CPU cycles when no sharing opportunities exist, and rate of discovery of sharing being dependent on the scanning rate and corresponding CPU availability. In this work, we exploit presence of GPUs on modern systems to enable rapid memory sharing through targeted scanning of pages. Our solution, Catalyst, works in two phases, the first where pages of virtual machines are processed by the GPU to identify likely pages for sharing and a second phase that performs page-level similarity checks on a targeted set of shareable pages. Opportunistic usage of the GPU to produce sharing hints enables rapid and low-overhead duplicate detection, and sharing of memory pages in virtualization environments. We evaluate Catalyst against various benchmarks and workloads to demonstrate that Catalyst can achieve higher memory sharing in lesser time compared to different scan rate configurations of KSM, at lower or comparable compute costs.", "pdfUrls": [ "https://www.cse.iitb.ac.in/~puru/research/pubs/paper/vee2017-catalyst.pdf", "http://doi.acm.org/10.1145/3050748.3050760" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/715f72c533b18495a2f8a23811f1e0b6034e42c6", "sources": [ "DBLP" ], "title": "Catalyst: GPU-assisted rapid memory deduplication in virtualization environments", "venue": "VEE", "year": 2017 }, "71662b0b173e160f4f994461552b65a3eace2f83": { "authors": [ { "ids": [ "1715657" ], "name": "Ali Abedi" }, { "ids": [ "1867025" ], "name": "Tim Brecht" } ], "doi": "10.1145/3030207.3030229", "doiUrl": "https://doi.org/10.1145/3030207.3030229", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Benchmark (computing)", "Central processing unit", "Cloud computing", "Experiment", "Network performance" ], "id": "71662b0b173e160f4f994461552b65a3eace2f83", "inCitations": [ "d5bf554144b4483abc75e45316f293d956b67811" ], "journalName": "", "journalPages": "287-292", "journalVolume": "", "outCitations": [ "91f580f4ab65ed1b7ad6bd97af2bd67d43e7678b", "438110dc02f39f221896847a4d0e24f88e130598", "52a29a226d5c0e4858bedcaa2ca8323fcb4c5cf0", "1999881614aed9295f4359cf4761926bc23fcd82", "8900b40061708168197c034c9e16af6031e28235", "62183550749e7a1bd483246c03441b3be7c06335", "121df43546bdfa846751c75ca667013f4904e7a1", "05326f1d3a8af8cc15b88c6338e12c092e98ef38", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "357c28cac5b8ffa1928d834557909ef6d6b9a2a7", "0d24c8ffa3a5f800c3c1b3146b96a955a84216b6" ], "paperAbstract": "Previous work has shown that benchmark and application performance in public cloud computing environments can be highly variable. Utilizing Amazon EC2 traces that include measurements affected by CPU, memory, disk, and network performance, we study commonly used methodologies for comparing performance measurements in cloud computing environments. The results show considerable flaws in these methodologies that may lead to incorrect conclusions. For instance, these methodologies falsely report that the performance of two identical systems differ by 38% using a confidence level of 95%. We then study the efficacy of the Randomized Multiple Interleaved Trials (RMIT) methodology using the same traces. We demonstrate that RMIT could be used to conduct repeatable experiments that enable fair comparisons in this cloud computing environment despite the fact that changing conditions beyond the user's control make comparing competing alternatives highly challenging.", "pdfUrls": [ "https://cs.uwaterloo.ca/~brecht/papers/icpe-rmit-2017.pdf", "https://cs.uwaterloo.ca/~brecht/papers/icpe-rmit-talk-2017.pdf", "http://doi.acm.org/10.1145/3030207.3030229" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/71662b0b173e160f4f994461552b65a3eace2f83", "sources": [ "DBLP" ], "title": "Conducting Repeatable Experiments in Highly Variable Cloud Computing Environments", "venue": "ICPE", "year": 2017 }, "716cba488d780d9bfd336d46a79dbc39d5a12369": { "authors": [ { "ids": [ "1786032" ], "name": "Masahiro Tanaka" }, { "ids": [ "1724468" ], "name": "Kenjiro Taura" }, { "ids": [ "1768754" ], "name": "Kentaro Torisawa" } ], "doi": "10.1109/IPDPS.2017.89", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.89", "entities": [ "Autonomic computing", "Foreach loop", "Linear programming", "Memory footprint", "Middleware", "Multi-core processor", "Performance", "Pool (computer science)", "Remote procedure call", "Scalability", "Self-tuning", "Subroutine", "Throughput" ], "id": "716cba488d780d9bfd336d46a79dbc39d5a12369", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1088-1097", "journalVolume": "", "outCitations": [ "3d84f3e43358e8fad5386f5bf352d77286a655fa", "8c96cb744fe91463b01c7ea4054f157f450862cc", "c26ac45ae1f3507b874e72249ea5ab4873259f78", "52eca5f1b35e0534e1ae79cbf0e2583b89b0bf8c", "ef908391380d142769fdb94d675ccdbcf6bf8d4d", "6cd06c0795981ac155c3f80bef5f465427cf01a3", "8ac4a4a0ff7c8541a83534fe964ab5e886c65fb7", "355c48837d01d8396b8f5c3c8594609e5144d782", "aad38df3218c21b448e517a3e03599e18c6b1605", "24e227084e9ea05de2cbd58b981bd29bad06178c", "9c8be71b4a2ef441b2893bd82911d3afb11585c7", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "03bd506f69cd60299c5a3e1c6532dd65b72df621", "1e2cac2b76508474dade617205e496007706523f", "212fef528f932200dd0ba04b11f70dca5524e775", "1f1ca237ca06a05c36d1fa1877593bcf508258a9", "da5cd00115f7ec108de8eebf071c5f3f19807df4", "2b7fe50f8212a40b9ccba6b9ec8da4ec2e4d41d5" ], "paperAbstract": "Large-scale data analysis applications are becoming more and more prevalent in a wide variety of areas. These applications are composed of many currently available programs called analysis components. Thousands of analysis component processes are orchestrated on many compute nodes. This paper proposes a novel self-tuning framework for optimizing an application's throughput in large-scale data analysis. One challenge is developing efficient orchestration that takes into account the diversity of analysis components and the varying performances of compute nodes. In our previous work, we achieved such an orchestration to a certain degree by introducing our own middleware, which wraps each analysis component as a remote procedure call (RPC) service. The middleware also pools the processes to reduce startup overhead, which is a serious obstacle to achieving high throughput. This work tackles the remaining task of tuning the size of the analysis components' process pools to maximize the application's throughput. This is challenging because analysis components differ drastically in turnaround times and memory footprints. The size of the process pool for each type of analysis component should be set by giving consideration to these properties as well as the constraints on both the memory capacity and the processor core counts. In this work, we formulate this task as a linear programming problem and obtain the optimal pool sizes by solving it. Compared to our previous work, we significantly improved the scalability of our framework by reformulating the performance model to work on hundreds of heterogeneous nodes. We also extended the service allocation mechanism to manage the computational load on each compute node and reduce communication overhead. The experimental results show that our approach is scalable to thousands of analysis component processes running on 200 compute nodes across three clusters. Moreover, our approach significantly reduces memory footprint.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.89" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/716cba488d780d9bfd336d46a79dbc39d5a12369", "sources": [ "DBLP" ], "title": "Autonomic Resource Management for Program Orchestration in Large-Scale Data Analysis", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "71c2733900d3cdd0d4fda9191c083db1125cc809": { "authors": [ { "ids": [ "2110340" ], "name": "Yiwen Li" }, { "ids": [ "3337066" ], "name": "Brendan Dolan-Gavitt" }, { "ids": [ "38703414" ], "name": "Sam Weber" }, { "ids": [ "2030168" ], "name": "Justin Cappos" } ], "doi": "", "doiUrl": "", "entities": [ "Kernel (operating system)", "Linux", "Lock (computer science)", "Operating system", "Security bug", "Software bug", "Systems design", "z/OS" ], "id": "71c2733900d3cdd0d4fda9191c083db1125cc809", "inCitations": [ "9b788c4d0ae70097f353ae54e272cbf6b34b3569" ], "journalName": "", "journalPages": "1-13", "journalVolume": "", "outCitations": [ "82f6d9469abb2c2935d5f5da9244887ed596d78b", "565a174a24e7f47dcd7a21f57cabc252b5692a0f", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "48b5dd4b43e403a17c3a94688efa666b554b8882", "70d3ba23d33a05fc865046e8fb539fbe09ce1686", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "2f484f6f079faefef1a8acf26383ecdb019fd380", "059170b316ecb882014beced829b682a04758dd5", "4d140605c191c27eba745654e84302da125b23a6", "08832863bc3f041222f381c8ae143f8a66449059", "067c7857753e21e7317b556c86e30be60aa7cac0", "9156a8a04250a6eab18f1bd63c30a7227fa2051d", "12e9e5119ae70c83810c03a93bb60f7687aa8d11", "3ab0873bc36d98b0084eb024c1830b157a205cb4", "2eb054f5bad84acc87d1cbc618a55140010ad8e1", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "7f489d9801e0674f4436beb34ea8b8695050d5fe" ], "paperAbstract": "Virtual machines (VMs) that try to isolate untrusted code are widely used in practice. However, it is often possible to trigger zero-day flaws in the host Operating System (OS) from inside of such virtualized systems. In this paper, we propose a new security metric showing strong correlation between \u201cpopular paths\u201d and kernel vulnerabilities. We verify that the OS kernel paths accessed by popular applications in everyday use contain significantly fewer security bugs than less-used paths. We then demonstrate that this observation is useful in practice by building a prototype system which locks an application into using only popular OS kernel paths. By doing so, we demonstrate that we can prevent the triggering of zero-day kernel bugs significantly better than three other competing approaches, and argue that this is a practical approach to secure system design.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_li.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/li-yiwen", "https://www.usenix.org/system/files/conference/atc17/atc17-li_yiwen.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/77af/af8e922209941082da0a61dcaf18026633f4.pdf", "s2Url": "https://semanticscholar.org/paper/71c2733900d3cdd0d4fda9191c083db1125cc809", "sources": [ "DBLP" ], "title": "Lock-in-Pop: Securing Privileged Operating System Kernels by Keeping on the Beaten Path", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "71eb41fe1532b648ca654d23a0fcc66f566f443d": { "authors": [ { "ids": [ "2557896" ], "name": "Xiaohui Duan" }, { "ids": [ "31638960" ], "name": "Kai Xu" }, { "ids": [ "2812433" ], "name": "Yuandong Chan" }, { "ids": [ "2540484" ], "name": "Christian Hundt" }, { "ids": [ "38613433" ], "name": "Bertil Schmidt" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" }, { "ids": [ "3429925" ], "name": "Weiguo Liu" } ], "doi": "10.1109/CLUSTER.2017.16", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.16", "entities": [ "256-bit", "Algorithm", "Automatic vectorization", "Bioinformatics", "Bioinformatics", "Bowtie (sequence analysis)", "Central processing unit", "Communications satellite", "High-throughput computing", "Intel Core (microarchitecture)", "Manycore processor", "Memory bound function", "Multithreading (computer architecture)", "Next-generation network", "Parallel computing", "Performance Evaluation", "Program optimization", "SIMD", "SW26010", "Scalability", "Sunway", "Supercomputer", "Thread (computing)", "Throughput" ], "id": "71eb41fe1532b648ca654d23a0fcc66f566f443d", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "36-46", "journalVolume": "", "outCitations": [ "4cad8f2a31b3c72742a761fe90a372d4a4717ebf", "38978787ef1ff65158918675b6fcbd27af16ca8c", "40c5441aad96b366996e6af163ca9473a19bb9ad", "b0312b82ee0017f7bbfc78ff50fcb0561d70bc9b", "627aa62eec8edda82481f429785b10ccc1818416", "86a2f114ea28dd2c6debb0457f363776d53571a7", "65a3022b84914ffc0b19a119dcb44146e0c1ecda", "f168f5e9b77e627aeb08a5073902c6a212992d77", "f5a88d8561bf6a64b43aa7e88beff8220e792bee", "684ef2aaa7e41d2d382113cc0e1b092c265e0c41", "9c465b7d37024f6afe8a7063590c38fb69ec815c", "689a5359b472de774bbdd1dc64fdad4152ef7c67", "eec6d4664256c49a9e312b17f7455121cac90b25", "8e58170f1093ea47372bcf00f987cbf906775057", "25f017efd2905c6d0c6a92f2dfe19113ee42938e", "70c02b56d4e4120f1090cff1f88902518f3cd19b", "0c7f839b4a5c7771cfeb628f70d37e10b32b8368", "b76269bf962989ce271bef7ea863ff4adf9c9de6", "71150718ec7affbc4f9130f55f925af0dd956651", "000a6f63c588697d6ae8db6cb6ffd6394d961cb7", "1ef38c80b1bc4352ce0df0ef7c05249fb64bf78d", "b9d8829b5d1836f5ec1babc15d2832bb97ef3d8e" ], "paperAbstract": "The availability and amount of sequenced genomes have been rapidly growing in recent years because of the adoption of next-generation sequencing (NGS) technologies that enable high-throughput short-read generation at highly competitive cost. Since this trend is expected to continue in the foreseeable future, the design and implementation of efficient and scalable NGS bioinformatics algorithms are important to research and industrial applications. In this paper, we introduce S-Aligner–a highly scalable read mapper designed for the Sunway Taihu Light supercomputer and its fourth-generationShenWei many-core architecture (SW26010). S-Aligner employs a combination of optimization techniques to overcome both the memory-bound and the compute-bound bottlenecks in the read mapping algorithm. In order to make full use of the compute power of Sunway Taihu Light, our design employs three levels of parallelism: (1) internode parallelism using MPI based on a task-grid pattern, (2) intranode parallelism using multithreading and asynchronous data transfer to fully utilize all 260 cores of the SW26010 many-core processor, and (3) vectorization to exploit the available 256-bit SIMD vector registers. Moreover, we have employed asynchronous access patterns and data-sharing strategies during file I/O to overcome bandwidth limitations of the network file system. Our performance evaluation demonstrates that S-Aligner scales almost linearly with approximately 95% efficiency for up to 13,312 nodes (concurrently harnessing more than 3 millioncompute cores). Furthermore, our implementation on a single node outperforms the established RazerS3 mapper running on a platform with eight Intel Xeon E7-8860v3 CPUs while achieving highly competitive alignment accuracy.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/71eb41fe1532b648ca654d23a0fcc66f566f443d", "sources": [ "DBLP" ], "title": "S-Aligner: Ultrascalable Read Mapping on Sunway Taihu Light", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "7206aead5a341f361e6571d607f3c032e65e2f7e": { "authors": [ { "ids": [ "26410174" ], "name": "Yizhou Shan" }, { "ids": [ "2984487" ], "name": "Shin-Yeh Tsai" }, { "ids": [ "2290416" ], "name": "Yiying Zhang" } ], "doi": "10.1145/3127479.3128610", "doiUrl": "https://doi.org/10.1145/3127479.3128610", "entities": [ "Byte", "Cache (computing)", "Data center", "Distributed memory", "Distributed shared memory", "Dynamic random-access memory", "High availability", "Linux", "NoSQL", "Non-volatile memory", "Persistence (computer science)", "Persistent data structure", "Persistent memory", "Replication (computing)", "Shared memory" ], "id": "7206aead5a341f361e6571d607f3c032e65e2f7e", "inCitations": [ "a6b0d5a4b19d9a8e133e2fb30b40a6b9eae7283d", "1b1dda022e899b2d922adf330c96a8c9f7ad2abe" ], "journalName": "", "journalPages": "323-337", "journalVolume": "", "outCitations": [ "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "05a1357946de5eca42a477b7b268db4944219a2e", "0e9741bc1e0c80520a8181970cd4f61caa00055a", "8f19dd31786cf38e3edbfaa2d079087776ef8e4d", "b45e7e96e35169fe8f68a1d14a85c6503eac79bc", "605277f87ee483cfd04f986780514c26160d2e87", "20960a5cec02eccebd7a14273c5521074f6fcb80", "006cd63664db53494cc61a44d5c6ebc668dc4b6a", "13960cd52f532dd94f8eb2ecef7302967abeb00b", "32a01a917bc310388002e7c7231ba2c07416bed6", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "4e27f44ade4545931a99eee2dc8011b44f5db4b6", "0c96b3ac2e720448054f1bcebdfd52ee341eac57", "1c8195cadc7ad4a8b59b16fe77574dd6d160d7d2", "29a1148d75878671dc3663bf480e33d7bd91597d", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "9376a2d69d06e39fd6fd27c9ce2f0817cc1dd4ef", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "c7d6ee693eb72e274aa8702ea579902996e4f3d5", "3dff11679346f5344af1018cad57fa14cc349f2f", "01815b1f48f8cdd4e78260deaddf4bfe7af26f60", "4827cc74dba0c39172554cf0116eb111797f0d1b", "60ddf74dd5b443c3bfb59fe876b42f9d6112c4fb", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "94783d113951822195d4ba44599a8fcbdef9d4bf", "823d6f218fb9cc061d401ed85b3032512c5b18d6", "21326de7444f0df72937a03876732016c59d9c9d", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "03621901aaa2d77270478326b3a25350508a2b93", "3b97b637430e4f799cd458ad1f86bbfa31ebf53d", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "1b1dda022e899b2d922adf330c96a8c9f7ad2abe", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "7dab366ccff16430aa3bf863302ae1242b0c44e3", "57c823b3b07b98233394bf15cfbbaed6a84809df", "b85195f9fec553b9b1be88a6dd7ddb49ff2cef7b", "793f5e737284925a176f8ec82b3bb0d2178bb330", "0653e2ed9f683868cb4539eb8718551242834f6b", "1156f60e40548096df49528b1342bb3e88b0f378", "e4a29d953e0bbd0b7adb06429481cdc47338ad12", "46b2f5742ed26cd098ea025e310f169bb3ff734a", "0270c2056eb50b5d4597afa722c50abf21e67a82", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "3bb6d5834bfb355553588e382ac5f9fa8a8d831d", "71c2deb5c3b4b0fd1ed68bdda534ec7ea76e845b", "6a1df9dae902f3d377f9c85ba9732b8d2270bf2b", "693a93f8b8c7317ce34c947c06b3a6220155a2b0", "263f588edb69272ccbf9f1b78a9625b914235f6d", "514a5c15e8cf3f681febecad954a4508d9189c99", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "5758cebc129de7537537e44c4e295a11a2de137d", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "0706356c9ab6014d6b04577d38289ea8328291a5", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "9aa0d7253574e50fe3a190ccd924433f048997dd", "d5fc12b8d3516ddda264d1554363d3f7575fa61a", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "3915310247bf33ec432a774a2e650aa46e01b290", "0ad8e89091eed09217e66adc98136126addc2619", "2fb179c0b0ad6cf37f6272db05ccfad708a82f2d", "2eff823bdecb1a506ba88e1127fa3cdb1a263682", "08fa5e5935f8108340f1583845292cfa9401ccc1", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "2d59bc0e0498086d5dcbba6e23e4b04d879f5953", "3f8948a91ecb86e1dce17c876684d2e4b55c7569", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "5227ecb47812d09b60f5b414294a1042fcbd21c9", "60cadcec3c159ca4ce00fe12da61b9ec00c59b70", "9183cde02e4306828089fb8adae74736a9df3ceb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "0494a1ab6f0dd764fb9039772818b8f269ed70b4", "83d89c2785cb82496a0259d9982dd69d286317de", "205cf007cf77bbf81e55b74635017087585f7b7c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "abf6512c047a5e109815bbbff9bf0a6b498e5c69", "73e997ecf8b862b8a0cd0990455971390d2a3c3d", "337e4b7f57ccbb7485950b93da9c5bb4ec4dc9ad", "637dd73813b6a01b6e81e5710fd0a99737d65b96", "eb82d3035849cd23578096462ba419b53198a556" ], "paperAbstract": "Next-generation non-volatile memories (NVMs) will provide byte addressability, persistence, high density, and DRAM-like performance. They have the potential to benefit many datacenter applications. However, most previous research on NVMs has focused on using them in a single machine environment. It is still unclear how to best utilize them in distributed, datacenter environments.\n We introduce Distributed Shared Persistent Memory (DSPM), a new framework for using persistent memories in distributed data-center environments. DSPM provides a new abstraction that allows applications to both perform traditional memory load and store instructions and to name, share, and persist their data.\n We built Hotpot, a kernel-level DSPM system that provides low-latency, transparent memory accesses, data persistence, data reliability, and high availability. The key ideas of Hotpot are to integrate distributed memory caching and data replication techniques and to exploit application hints. We implemented Hotpot in the Linux kernel and demonstrated its benefits by building a distributed graph engine on Hotpot and porting a NoSQL database to Hotpot. Our evaluation shows that Hotpot outperforms a recent distributed shared memory system by 1.3× to 3.2× and a recent distributed PM-based file system by 1.5× to 3.0×.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3128610", "https://engineering.purdue.edu/WukLab/hotpot-socc17.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final19.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7206aead5a341f361e6571d607f3c032e65e2f7e", "sources": [ "DBLP" ], "title": "Distributed shared persistent memory", "venue": "SoCC", "year": 2017 }, "721e64bfd3158a77c55d59dd6415570594a72e9c": { "authors": [ { "ids": [ "3437947" ], "name": "Hassan Halawa" }, { "ids": [ "18135403" ], "name": "Hazem A. Abdelhafez" }, { "ids": [ "23196233" ], "name": "Andrew Boktor" }, { "ids": [ "1747805" ], "name": "Matei Ripeanu" } ], "doi": "10.1007/978-3-319-64203-1_7", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_7", "entities": [ "Central processing unit", "Clock rate", "Frequency scaling", "Graphics processing unit", "Matrix multiplication", "Memory bus", "Multi-core processor", "Performance per watt", "Program optimization", "Requirement", "System on a chip", "Tegra" ], "id": "721e64bfd3158a77c55d59dd6415570594a72e9c", "inCitations": [], "journalName": "", "journalPages": "92-105", "journalVolume": "", "outCitations": [ "092217c2267f6e0673590aa151d811e579ff7760", "1df217c386d56afe8c8f10a6da5fab4075caa2e2", "13b9a0f935d11105e7cbebe75187568372a9c34b", "23177452df15b652dd54a59324502b92c99687a7", "07563644cae0d3d03b37724efe084b6510220103" ], "paperAbstract": "This study characterizes the NVIDIA Jetson TK1 and TX1 Platforms, both built on a NVIDIA Tegra System on Chip and combining a quad-core ARM CPU and an NVIDIA GPU. Their heterogeneous nature, as well as their wide operating frequency range, make it hard for application developers to reason about performance and determine which optimizations are worth pursuing. This paper attempts to inform developers\u2019 choices by characterizing the platforms\u2019 performance using Roofline models obtained through an empirical measurement-based approach as well as through a case study of a heterogeneous application (matrix multiplication). Our results highlight a difference of more than an order of magnitude in compute performance between the CPU and GPU on both platforms. Given that the CPU and GPU share the same memory bus, their Roofline models\u2019 balance points are also more than an order of magnitude apart. We also explore the impact of frequency scaling: build CPU and GPU Roofline profiles and characterize both platforms\u2019 balance point variation, power consumption, and performance per watt as frequency is scaled. The characterization we provide can be used in two main ways. First, given an application, it can inform the choice and number of processing elements to use (i.e., CPU/GPU and number of cores) as well as the optimizations likely to lead to high performance gains. Secondly, this characterization indicates that developers can use frequency scaling to tune the Jetson Platform to suit the requirements of their applications. Third, given a required power/performance budget, application developers can identify the appropriate parameters to use to tune the Jetson platforms to their specific workload requirements. We expect that this optimization approach can lead to overall gains in performance and/or power efficiency without requiring application changes.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_7", "http://www.ece.ubc.ca/~matei/papers/europar17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/721e/64bfd3158a77c55d59dd6415570594a72e9c.pdf", "s2Url": "https://semanticscholar.org/paper/721e64bfd3158a77c55d59dd6415570594a72e9c", "sources": [ "DBLP" ], "title": "NVIDIA Jetson Platform Characterization", "venue": "Euro-Par", "year": 2017 }, "7233d3bd82ade69235873870f929f2ffc46a1327": { "authors": [ { "ids": [ "2124920" ], "name": "Zafar Ayyub Qazi" }, { "ids": [ "39362520" ], "name": "Melvin Walls" }, { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "1732751" ], "name": "Vyas Sekar" }, { "ids": [ "1699297" ], "name": "Sylvia Ratnasamy" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "10.1145/3098822.3098848", "doiUrl": "https://doi.org/10.1145/3098822.3098848", "entities": [ "Event-driven process chain", "Interaction", "Next-generation network", "Open-source software", "Pipeline (computing)", "Scalability", "Schema (genetic algorithms)", "Systems architecture", "Throughput" ], "id": "7233d3bd82ade69235873870f929f2ffc46a1327", "inCitations": [ "3af0534b0d73dab52f26d2442594264e953edb6a", "7a4c1b65cb73268b5beb4eb815a618c08b229aca" ], "journalName": "", "journalPages": "348-361", "journalVolume": "", "outCitations": [ "262999cfbfd121fc51930eb61493d35e3f6fbe7b", "873170e5da3fbd9f759ca34d86ae73f7f6597501", "c6159999ccb3b9261234b10dd1cd006a7b733e89", "6e4d333d5e53ee2dd71c8483e5aef59bd5f7f596", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "57cfb44be82575569275dc58e887acbca4ad7fa8", "3c4a87e0bc2b06eab1ce0e80a56230da470194be", "4728bda27d89d524f0751ef0dddb5da0bffe0826", "24e13c33e8ac68f6eae9784052e8e1ee70feff98", "6f4fb0e7c154add94208112a77cbec2a3334bb77", "1d940e5b26f56ec93d91d6d61a29f88c5e61507e", "10050f52b7422ca8cb2fc40a52ef11108a9b0cc4", "2445d07ed9574fc8c27bcfdcdfaacdb9e66afb1e", "336b4f3099b8f629adc20a69aba15257e53539f9", "b3bf6ac7bd450eccec62fa45182924a5106eddd3", "6bcf05e4dcff1ff71945e1f282aebe295c8b13f4", "97018df402c3512285141b37277eede60740876e", "409e7463291ff20c3704a67c2ce8a3ae8838171f", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "02c76f7d61f1ff47609a19f46aec3e6d0c8a9425", "6800646e8de9b08e6a2174a927b50bb0e28fbb76", "17650831f1900b849fd1914d02337e1d006aea0c", "28f2ae875b37ce38d5e6c7209e6a705a39a53a47", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71" ], "paperAbstract": "Cellular traffic continues to grow rapidly making the scalability of the cellular infrastructure a critical issue. However, there is mounting evidence that the current Evolved Packet Core (EPC) is ill-suited to meet these scaling demands: EPC solutions based on specialized appliances are expensive to scale and recent software EPCs perform poorly, particularly with increasing numbers of devices or signaling traffic.\n In this paper, we design and evaluate a new system architecture for a software EPC that achieves high and scalable performance. We postulate that the poor scaling of existing EPC systems stems from the manner in which the system is decomposed which leads to device state being duplicated across multiple components which in turn results in frequent interactions between the different components. We propose an alternate approach in which state for a single device is consolidated in one location and EPC functions are (re)organized for efficient access to this consolidated state. In effect, our design \"slices\" the EPC by user.\n We prototype and evaluate PEPC, a software EPC that implements the key components of our design. We show that PEPC achieves 3-7x higher throughput than comparable software EPCs that have been implemented in industry and over 10x higher throughput than a popular open-source implementation (OpenAirInterface). Compared to the industrial EPC implementations, PEPC sustains high data throughput for 10-100x more users devices per core, and a 10x higher ratio of signaling-to-data traffic. In addition to high performance, PEPC's by-user organization enables efficient state migration and customization of processing pipelines. We implement user migration in PEPC and show that state can be migrated with little disruption, e.g., migration adds only up to 4μs of latency to median per packet latencies.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098848", "https://people.eecs.berkeley.edu/~zaqazi/sigcomm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7233d3bd82ade69235873870f929f2ffc46a1327", "sources": [ "DBLP" ], "title": "A High Performance Packet Core for Next Generation Cellular Networks", "venue": "SIGCOMM", "year": 2017 }, "7269ad16f06579abc8f87206ffe1881d6b94c302": { "authors": [ { "ids": [ "38918519" ], "name": "Opeoluwa Matthews" }, { "ids": [ "2172933" ], "name": "Daniel J. Sorin" } ], "doi": "10.1145/3123939.3123971", "doiUrl": "https://doi.org/10.1145/3123939.3123971", "entities": [ "Cache coherence", "Communications protocol", "Formal verification", "Push-button", "Systems architect" ], "id": "7269ad16f06579abc8f87206ffe1881d6b94c302", "inCitations": [], "journalName": "", "journalPages": "477-489", "journalVolume": "", "outCitations": [ "472806cf3acbef93dfa559253a2ff928d346037f", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "c6a994363c51cba1a6edd9f70d31a45c41c2028f", "0af8c168f4423535773afea201c05a9e63ee9515", "2d5c0b31e7684d7923fe1cbca26f2d87be2d0106", "10f1faeec4ee2158b8535b249a20de5419998153", "2291663e7f3cdb88ce88e995f50ad8438edb639d", "3ab168addfd8ae356e2db48cb5713beec2da8fc1", "8680354c6231ee6861c95e594f4316b1ed34e215", "2c3f8c60aa4ddccc9e737cc04a6bfb7761ced8ea", "be47b4f2b2a1e85923a8c574880b4b35013cd712", "2ca16a4b561ce75be6639d24bc2d93eeb8df86bc", "99d4c73c44f1049bf1e31f4f11f561d67eb5524d", "1449f296ecd8748c51b7819ba86482a33d68296d", "7fc3cbfecd9c5d342181996d3fe753963ea0fdf9", "39e345371784fcc2ad45f78433bb312e01a39fa5", "5d1209675ffa1bea82a206bf33c061b9b127daa5", "78ef558e04209af5c1243c640f6aa71e5b211bf3", "74b7b3323ada32a114b0a819458f65404a3c93ec", "905f7a231a80fa00c87b41d6b4a85561e54f56a4", "78186ba44a0e98017832b6b5bf49bd73be40a3bc", "31095a910cd60eddba72e67d85fa146c93f2e3e2", "15881fb2a115bd8f2f87965b0d42f4abcc0129bd", "0f985bbdc6144889ad3073dec2079144f6e3d8dd", "61e977e6c68c2acc73bdd9d94522ddb38e4b898d", "b071ec32cc1bc4a7c041e26c3ca3c1343ba69b66", "26a5f66c7de4af3467eee4ab9a39cbe917e157d0", "378782a827933059f9f91e6e29aac84bd0857828", "189f9aaa1e8a0ce369d14dfd1723e7df7e4e5d2d", "0101197fc89e9db09eade5e3adec52a295bb424e", "d6ea6e5309f9e5b7ce4b75dffa2766fd3d0c371f", "43e3b9f52eb2fc59ca35d87e0c452ff2f9909efa", "81e5ebd32b063356406a60cc5d6c650c1c0f48bd" ], "paperAbstract": "Recent work in formal verification theory and verification-aware design has sought to bridge the divide between the class of protocols architects want to design and the class of protocols that are verifiable with state of the art tools. Particularly the recent Neo work in formal verification theory, for the first time, formalizes how to compose flat subprotocols with an arbitrary number of nodes into a hierarchy while maintaining correct behavior. However, it is unclear if this theory scales to realistic systems. Moreover, there is a diversity of systems architects would be interested in, to which it is not clear if the theory applies.\n In this paper, we show how the abstract Neo theory can be leveraged to design a realistic hierarchical coherence protocol. As such, we present the first realistic hierarchical coherence protocol verified with fully-automated (push-button) verification tools for all scales and tree configurations. We explore the practical limitations posed by both the theory and the verification tools in designing this verifiable hierarchical protocol. We experimentally evaluate our protocol, comparing it to more complex protocols that have optimizations prohibited by the theory and verification tool. Finally, we discuss how a variety of system configurations and protocols architects might be interested in can be adapted to the Neo theory, which we hope opens up the theory to future work in verification-aware protocol design.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123971", "http://web.eecs.umich.edu/~luwa/papers/MICRO17/Neo_MICRO17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7269ad16f06579abc8f87206ffe1881d6b94c302", "sources": [ "DBLP" ], "title": "Architecting hierarchical coherence protocols for push-button parametric verification", "venue": "MICRO", "year": 2017 }, "727d43e3c80a1149c3a505b7c67bce690fcfb99c": { "authors": [ { "ids": [ "1709145" ], "name": "Raghu Ramakrishnan" }, { "ids": [ "1731050" ], "name": "Arvinder Kaur" } ], "doi": "10.1145/3030207.3044533", "doiUrl": "https://doi.org/10.1145/3030207.3044533", "entities": [ "Closed system", "Nautical chart", "Production support", "Production system (computer science)", "Real-time computing", "Response time (technology)", "Software system", "Swift (programming language)", "System monitoring", "Technical support", "Throughput", "Unavailability" ], "id": "727d43e3c80a1149c3a505b7c67bce690fcfb99c", "inCitations": [], "journalName": "", "journalPages": "213-222", "journalVolume": "", "outCitations": [ "4e10e053686755a7247de06ede0ae5ae71e00ffb", "27cbcec780455b0440fb4024eedd285f2b90a87d", "46791475fdacabca114719f6676c217091b7676c", "5f54c3c57e3bc3f9821314150f81ee9e18f716b8", "8daa9607850aad4749f1203eed2a78ad8cd08089", "56f11e159e7a2690ab6e4fea18ece014392d7352", "42b27a24138962a57bdf1e345d845e52f3de4abc", "2acbd7681ac7c06cae1541b8925f95294bc4dc45", "7535288a3a8754df21327fd17f2b7bda66667663", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "00794ee8c2231b5e42d792dcb8c4a5b858c01464", "b011815f586b70bf92e5e535e9a6403ef6319c8d", "965b11c4e2327bf0220649b0b4cbee76e4dd7bb0", "2f2e3d4f0a6fffd3eae8cde797ed9f4c4fe31a27", "5f0f9b1757f80356ef2f7551ffa64e2e589a202b", "6c8c4d50d49a8c53f4296f23ad0540593916a799", "aed0f7aa62787f9ffcf23b67694b536c02df5cc6", "996ca46fc91047b74ec0c9f3ff21ef47c9fc341e", "f73ae2fd526b63d5b04aa02f2b23eb747b768607" ], "paperAbstract": "The detection of early-warning signals of performance deterioration can help technical support teams in taking swift remedial actions, thus ensuring rigor in production support operations of large scale software systems. Performance anomalies or deterioration, if left unattended, often result in system slowness and unavailability. In this paper, we presents a simple, intuitive and low-overhead technique for recognizing the early warning signs in near real time before they impact the system The technique is based on the inverse relationship which exists between throughput and average response time in a closed system. Because of this relationship, a significant increase in the average system response time causes an abrupt fall in system throughput. To identify such occurrences automatically, Individuals and Moving Range (XmR) control charts are used. We also provide a case study from a real-world production system, in which the technique has been successfully used. The use of this technique has reduced the occurrence of performance related incidents significantly in our daily operations. The technique is tool agnostic and can also be easily implemented in popular system monitoring tools by building custom extensions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3044533", "https://research.spec.org/icpe_proceedings/2017/proceedings/p213.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/727d43e3c80a1149c3a505b7c67bce690fcfb99c", "sources": [ "DBLP" ], "title": "Technique for Detecting Early-Warning Signals of Performance Deterioration in Large Scale Software Systems", "venue": "ICPE", "year": 2017 }, "72880d15db2282512e5d3f0a3796b397d68cc7db": { "authors": [ { "ids": [ "32939696" ], "name": "Shweta Shinde" }, { "ids": [ "33275317" ], "name": "Dat Le Tien" }, { "ids": [ "2855848" ], "name": "Shruti Tople" }, { "ids": [ "1750032" ], "name": "Prateek Saxena" } ], "doi": "", "doiUrl": "", "entities": [ "Business logic", "Central processing unit", "Classless Inter-Domain Routing", "Dataflow", "Hypervisor", "Interaction", "Library", "Linux", "Linux", "Multiprocessing", "Network enclave", "OpenSSL", "Operating system", "POSIX", "Software feature", "Synchronization (computer science)", "System call", "Task Control Block", "Thread (computing)", "Tor Messenger", "Trust anchor", "User space", "Web service" ], "id": "72880d15db2282512e5d3f0a3796b397d68cc7db", "inCitations": [ "38a54f9bbbfc46599770a28999365144a273783f", "0b5c1b6c8f40926aa04c34591f5453a3f61619ec", "788b9e288c8db9decbbb2668fdee3737e386e143", "3ff39b5db2527b702d826461157fdfe553d715be", "7ab74b4e4c11626c2642fcb95342c9c318dbfdca", "d06fa7a63b595b968176656ae0946078987933ff", "345533e1f72f3f9e215e1fc468a3131a90481414", "0a85b3afc89958583642b7fd39b37e745a053190", "19aeb06b3ba5b454fc462254c178acdf233d955b", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "477bbcb5655a9c64893207bb49032e87c06a05f2", "8569785f80712b5787e12b86a3870a28c0182b2c", "e41440cff90683629228b308a94e48c7af11ca36", "8ca1436fe1e9bbdb39a92178fa80c7869d92573d", "6f8fe3cbacb8436615e886b6188e2e62fd1a5b3c", "43242753ae89aca56a92fb1f65870ea0c7b7d6d7", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "4e6841a87f67a39d5b145f1dbc88000cca3b213d", "85741fa2a0fb1060c138b1b11a0906381661fbcb", "33ae35cc24ef4303979b479671c2065256e1b3a7", "2d8795b4e1fcacb73d11da6c834b87dc83212535", "34fe0c6e91d2a6a2325f5057222c3fbf22224fe5", "a355edbb24d406761407e2728218d2192f2c1fcf", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "b897c4c09b480f9934d5e9e4cfa2d540aaed522f", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "0af2e5c772b26963e3a756225a9fab5654f640f9", "5358608f29d567b1a3b4c8adccdfaaf24b379cd0", "3cae67dde8b20aa58ebd12def02c7fa8ad844de4", "1521e801e8e08ecec3b0baabb07f9a6ce0a67a85", "5b2092b54860f134f78b2ec884c910750def71e6", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "1bb07c114cb447552d36a95445cc207f496d85aa", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "d937dd67265c2ac0ba5ffa8022323d37c2344188", "01d1575116b8aaacde1fd0e164a932b1ceffa04d", "46e46c77423fcaf6e4a435fecca4430b1e78bd5d", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "20f5f8733134d87041b95b742d613051a1fb3fdb", "01fde8698110cf46ff48a17c65f2658dab4c323c", "0a289fd7b14345822b1acda6d82750b15d59663e", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "5e4e0daea223658f8c96d7728bd32398680ebef3", "0e98262b7d5d42ad0f7f7f83a5be5107f6f24df5", "abf20d55fe1d05b41006a565fc152157bd06933f", "d296252ddf0e2c6b7422008d703843c1863bd15b", "1b80ae882afb809686f20765e4a42a5b99aa55de", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "39d4af99edc754d829afaf5c1e02ea17f5a93fc2", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "6b6fae57882fd193461fca64654107068ce9fd9a", "7578c7af43ec815923f92618b84f241f65658cd2", "08832863bc3f041222f381c8ae143f8a66449059", "30909df12b1b01760ae4c5406e15f302a6524446", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa" ], "paperAbstract": "Intel SGX, a new security capability in emerging CPUs, allows user-level application code to execute in hardwareisolated enclaves. Enclave memory is isolated from all other software on the system, even from the privileged OS or hypervisor. While being a promising hardware-rooted building block, enclaves have severely limited capabilities, such as no native access to system calls and standard OS abstractions. These OS abstractions are used ubiquitously in real-world applications. In this paper, we present a new system called PANOPLY which bridges the gap between the SGX-native abstractions and the standard OS abstractions which feature-rich, commodity Linux applications require. PANOPLY provides a new abstraction called a micro-container (or a \u201cmicron\u201d), which is a unit of code and data isolated in SGX enclaves. Microns expose the standard POSIX abstractions to application logic, including access to filesystems, network, multi-threading, multi-processing and thread synchronization primitives. Further, PANOPLY enforces a strong integrity property for the inter-enclave interactions, ensuring that the execution of the application follows the legitimate control and data-flow even if the OS misbehaves. Thus, commodity Linux applications can enhance security by splitting their application logic in one or more microns, or by importing micron-libraries, with little effort. In contrast to previous systems that enable comparable richness, PANOPLY offers two orders of magnitude lower TCB (about 20 KLOC in total), more than half of which is boiler-plate and can be automatically verified in the future. We demonstrate how PANOPLY enables much stronger security in 4 real-world applications \u2014 including Tor, OpenSSL, and web services \u2014 which can base security on hardware-root of trust.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/panoply-low-tcb-linux-applications-sgx-enclaves/", "https://www.comp.nus.edu.sg/~shweta24/publications/panoply_ndss17.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_07-5_Shinde_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/72ac/f6830064ee89b955e554013420def6154c12.pdf", "s2Url": "https://semanticscholar.org/paper/72880d15db2282512e5d3f0a3796b397d68cc7db", "sources": [ "DBLP" ], "title": "Panoply: Low-TCB Linux Applications With SGX Enclaves", "venue": "NDSS", "year": 2017 }, "728cb61e78d55bc5039ed78920b20259a1135cb3": { "authors": [ { "ids": [ "3202124" ], "name": "Aaron Gember" }, { "ids": [ "1713535" ], "name": "Aditya Akella" }, { "ids": [ "38981616" ], "name": "Ratul Mahajan" }, { "ids": [ "1925072" ], "name": "Hongqiang Harry Liu" } ], "doi": "10.1145/3132747.3132753", "doiUrl": "https://doi.org/10.1145/3132747.3132753", "entities": [ "Computer Networks (journal)", "Control plane", "Data center", "Digraphs and trigraphs", "Router (computing)", "Routing", "TRAVERSE", "Traffic analysis", "Waypoint" ], "id": "728cb61e78d55bc5039ed78920b20259a1135cb3", "inCitations": [ "04e214856e0b24f32bffb209ed803b0ecf546e0a" ], "journalName": "", "journalPages": "359-373", "journalVolume": "", "outCitations": [ "4f21cbaa02e89b7aed6c405232ca9b804ca748cb", "9603f51112fb9a142dbb6aeb49ce38000543a715", "ec277878ef7504a13d1839fc43ce5fee8c51d574", "a4711e14e292520440add5b32515799da80bbe8f", "0b2e3f503ad87b5da5a6cdc2ba2b42ce1abb28d3", "0355a7b4c66e42b73fa3d0d7198ce68b2dbcd5be", "0dd046fd2f1ba04690c1f41be83326cbf6c4897b", "1049b09d5a0be6e49e0a4a991273abd38ca445af", "f3c9dbb2446e33bebde44ba0b373760a42d28f72", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "273de61c65c39e0e55942ea166a473e63ddaa02c", "24e10a0f77ef92eb86d26108ebe725178bc0bf94", "cd32028cc8de270115f32a1a81f0d416f3f75673", "7177bb51200fc0ca4b78dd3f36a4286d213da151", "306a2933e91fdf8971bd160c5bbe365e48ec7fdd", "7542f9b6b4adce3774547c8af3ba983524538542", "17059e939aa051d7db57f4af959b2af21fa3dd18", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "36f396b52f93fa52742ce5052a40c1c90ea726e3", "5e601d023575b3877b33ccee8bbe67606ef8e6d9", "0e0f7fa2de3f757a51e747399d93c570249e72ac", "0becd1254be3dcc6f64d418bc0e9e8b5ab73e6bd", "28c5d290a2b044dc4a3352ee9e692de48c82e57d", "b48c018c1ed359fdcbe8a11b03ef14363280cb6f", "3ec1a36e9e12a85d02adaa8ed682ee04f73ae332", "4ad98b30cbee2fc78e50a3d4d149f0847f093956", "62f17216621696ad4ef8d8efc537d28a48a4a93b", "663e064469ad91e6bda345d216504b4c868f537b", "05607111cf79330d56164a10d351dbf94e2cfa44", "f69412d8c00780b66ec14b09c5045a5c2ec8250e", "33c65aace24e26979cc9ba6242310d06710ad8d2", "a07ee0c6f7e3989c6c76e1bbeb090394c231a980", "c24809e301b30cb1dcc1da4ee14e4e1f87dd742b", "625ae31ca2ee2666e2e9cccef5524d42266401db", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "fb32f21ce7699c914c336226f7ef78fe9c84f7d0", "240b650045b4f59c58220e9cbac7f2f91bdaa3ec", "3b506ead7aff20082a5cd55330d12f92f4296dc8", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "3a7c5e7529d583cdcf87a549a512052699834cb3", "1b5188ba7183288356b8bfff288efbc08d111c9d", "5361b0205d13431a149b22af8c62a11c9556fd77", "c9d38dcba3376a0027741a1b75018a9c1dd65d9c" ], "paperAbstract": "The forwarding behavior of computer networks is governed by the configuration of distributed routing protocols and access filters---collectively known as the network control plane. Unfortunately, control plane configurations are often buggy, causing networks to violate important policies: e.g., specific traffic classes (defined in terms of source and destination endpoints) should always be able to reach their destination, or always traverse a waypoint. Manually repairing these configurations is daunting because of their inter-twined nature across routers, traffic classes, and policies.\n Inspired by recent work in automatic program repair, we introduce CPR, a system that automatically computes correct, minimal repairs for network control planes. CPR casts configuration repair as a MaxSMT problem whose constraints are based on a digraph-based representation of a control plane's semantics. Crucially, this representation must capture the dependencies between traffic classes arising from the cross-traffic-class nature of control plane constructs. The MaxSMT formulation must account for these dependencies whilst also accounting for all policies and preferring repairs that minimize the size (e.g., number of lines) of the configuration changes. Using configurations from 96 data center networks, we show that CPR produces repairs in less than a minute for 98% of the networks, and these repairs requiring changing the same or fewer lines of configuration than hand-written repairs in 79% of cases.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132753", "http://aaron.gember-jacobson.com/docs/gember-jacobson2017cpr.pdf", "http://pages.cs.wisc.edu/~akella/wisr-webpage/papers/p359-gember-jacobson.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/728cb61e78d55bc5039ed78920b20259a1135cb3", "sources": [ "DBLP" ], "title": "Automatically Repairing Network Control Planes Using an Abstract Representation", "venue": "SOSP", "year": 2017 }, "728f4d2633f221ed84f8ff83b282c15e94b164db": { "authors": [ { "ids": [ "39897019" ], "name": "Jason Li" }, { "ids": [ "1734975" ], "name": "Ryan O'Donnell" } ], "doi": "10.1145/3087556.3087557", "doiUrl": "https://doi.org/10.1145/3087556.3087557", "entities": [ "Constraint satisfaction", "Constraint satisfaction problem", "Interactive proof system", "Polylogarithmic function", "Polynomial", "Relaxation (approximation)", "Semidefinite programming", "Time complexity" ], "id": "728f4d2633f221ed84f8ff83b282c15e94b164db", "inCitations": [], "journalName": "", "journalPages": "95-100", "journalVolume": "", "outCitations": [ "3a8442bb9e9493ad3be698d4541b1413b95d716c", "46e6ee845ea713d30db2da616f99f9b2410d7062", "4e18614bff014b1c0db4ea1e11bca0f25a98ceb2", "7c7d34484159d153d1f918fc0c6d58e92668c0d2", "2a0cbc355a584d695610d9e4b1f72c5c56ae56f6", "83fa064f76f807f43887bdf056099f37be3aae74", "4c3a78661fd920b4116afd0ad88247bbd00160ce", "2edb1048ebb27432792eec261ea6b2d9728d383a", "1af3a2ac718a54d413e0ffa436260797a5515fed", "26eb8f750e648c0f364844b9e8556c427530ed65", "7989aa599aeb88810114d03b9eeaa4508e3e8187", "d2e7a761fd13a1a3990226cc1e43d2f222bb893a", "c1ddb2c9ffdb046f3a9bda0c3be3c2b1b1d5ec21", "c6643f0d95a1b99a5a81a0538a52e298eca57915", "ae2eecbe2d5a4365107dc2e4b8a2dcbd0b3938b7", "2e5e0429968b0eab878bf0b4fa8a6e7552ecd902", "6d9d763263e593bbc1194dad5a30206a2bbe208f", "22e04c7babfe92737745cc128e2b118a5fd75bdb", "0fdda242c8bc15390cf6ff7669cb3c274e522b9f", "32bd62191f501753b8307bef23758adb50d95627", "1b8248cd38e4a36c2be81f720d9109c1cf283b85", "7fcbb21c00a46291e921f7eb49a70021594d84cb" ], "paperAbstract": "We show that the basic semidefinite programming relaxation value of any constraint satisfaction problem can be computed in NC; that is, in parallel polylogarithmic time and polynomial work. As a complexity-theoretic consequence we get that \\MIPone[k,c,s] \\subseteq \\PSPACE provided s/c \\leq (.62-o(1))k/2^k, resolving a question of Austrin, H\\aa stad, and Pass. Here \\MIPone[k,c,s] is the class of languages decidable with completeness c and soundness s by an interactive proof system with k provers, each constrained to communicate just 1 bit.", "pdfUrls": [ "http://eccc.hpi-web.de/report/2016/142", "https://eccc.weizmann.ac.il/report/2016/142/download/", "http://www.cs.cmu.edu/~odonnell/papers/parallel-csp-mip.pdf", "https://eccc.weizmann.ac.il/report/2016/142/revision/1/download/", "http://doi.acm.org/10.1145/3087556.3087557" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/728f4d2633f221ed84f8ff83b282c15e94b164db", "sources": [ "DBLP" ], "title": "Bounding Laconic Proof Systems by Solving CSPs in Parallel", "venue": "SPAA", "year": 2016 }, "729c911b6ea94e81cb7b993d7ce24222a49e2e60": { "authors": [ { "ids": [ "29372698" ], "name": "Nikolai Baudis" }, { "ids": [ "36787033" ], "name": "Florian Jacob" }, { "ids": [ "2658382" ], "name": "Philipp Andelfinger" } ], "doi": "10.1109/MASCOTS.2017.15", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.15", "entities": [ "Binary heap", "Circular buffer", "Computation", "Control flow", "Core Data", "Data structure", "Graphics", "Graphics processing unit", "Parallel computing", "Performance Evaluation", "Priority queue", "Profiling (information science)", "Simulation", "Splay tree" ], "id": "729c911b6ea94e81cb7b993d7ce24222a49e2e60", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "1-11", "journalVolume": "", "outCitations": [ "8b5608e0bc9e9d344a6efedbd5d4658aa2fa506a", "ca8c1cc5eb4a1de0b463bbe475d30a4600f8c541", "686765ea4aea86b52eb843216ab34ec56e877bbf", "1696dc3a9b567b235019f53fd0b79db364100a8e", "1612819ba65ca6ca514c1df5a617af3998aa4cff", "5d7df03344ec2d89057633a0ca13776a976b38b2", "26a35f807b277501182994db7c75c02b7cb25555", "ea8347c7aa01bc5e2c86af8ec28660dfab697c23", "1134ac0613544aa3a61ee4a9edf11a83b83dcf8a", "0d2e12a4f0882394526bcc8dd4e35384b3b6c809", "0a9587098b06df2a59a14dc5e0f80d08724522f4", "88e9a1f9d21989d440d4dd05537deda09229d74e", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "e1489ec329e1c0e55b6d5a0509c99d32515305cd", "3ebf3857a60c3e224284bbbe6c7127d0a12c546d", "b3ff18402d20e8ab976b2b295baa0c41ce0dc874", "6b2a23349099f95c1c4850bb1d4731612a7046e6", "0aba1af911d193b4918eb25638db78eaffd71f7a", "1a3bf65285f1e7b295ff0dec13e9a0c367e82b29", "487f2d08fcb1019958e924566f8c71d4a7799d1c", "1008e1b5d8d30a7d2f3a3113521e840c58d7b4ac", "3ce939a537e65e2edde00ee5f5884fedfd9c345f", "284c30863b50b8c0985032a93fdc8fe910de34dd", "9f60935a3c9f2d11b03a71793d12c351871414eb", "20c84c2599abd7c4dd85851608a665278215cb95", "c271f7400a02d2a4e7fe6dc832a3851eea54c4a0", "5d55860c0fe3408bad2d54add57ec97f9199b367", "63d4406bfde3670ef03d0a386438db755f632a24", "51733043a79cba9635d0fdf1506667ebe2390737", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "c3008dd707e4dfd43606a544d4cac4bf1f081f2b", "7837b6d733790b2eef0d9eee23405912cfec0827", "a1f4d268b3ddf5f657cb8ccd17bab4e03d36420c", "2462dc12b9ffda182ab894ee55938249420b81af", "08104146873817cc35cbd96d7ca3e5169cb72296", "0a030c2142dec882fea5f33b3f562e04d66d287a", "4ac639f092b870ebe72e4b366afb90f3073d6223", "85ec6e95b22101403a069b88d3663229582fcd52", "1e682a2cf91450db746ad2fc5ee4ca7a5b7b573b", "1a8bb9a1f5530dee48c7f30266c66f001063a908", "5b8869bb7afa5d8d3c183dfac0d0f26c2e218593", "404480f20069c176992986af17178d86ccae47a1", "0689c8056abadaa8c7df8498e511e56bd59094e4", "3c2acc1e9b78a4df6a65ac4181882467605dfb05", "0c17805ab324006d40a8dd37d3550815824498fb", "08d041581636f8eee888091b5539696d729f2bff", "37d3e85a8d99a756bcd8b93e12619dc84f9e877e" ], "paperAbstract": "Graphics processing units (GPUs) are increasingly applied to accelerate tasks such as graph problems and discreteevent simulation that are characterized by irregularity, i.e., a strong dependence of the control flow and memory accesses on the input. The core data structure in many of these irregular tasks are priority queues that guide the progress of the computations and which can easily become the bottleneck of an application. To our knowledge, currently no systematic comparison of priority queue implementations on GPUs exists in the literature. We close this gap by a performance evaluation of GPU-based priority queue implementations for two applications: discrete-event simulation and parallel A* path searches on grids. We focus on scenarios requiring large numbers of priority queues holding up to a few thousand items each. We present performance measurements covering linear queue designs, implicit binary heaps, splay trees, and a GPU-specific proposal from the literature. The measurement results show that up to about 500 items per queue, circular buffers frequently outperform tree-based queues for the considered applications, particularly under a simple parallelization of individual item enqueue operations. We analyze profiling metrics to explore classical queue designs in light of the importance of high hardware utilization as well as homogeneous computations and memory accesses across GPU threads.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/729c911b6ea94e81cb7b993d7ce24222a49e2e60", "sources": [ "DBLP" ], "title": "Performance Evaluation of Priority Queues for Fine-Grained Parallel Tasks on GPUs", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "72b671d4415201f4e428179929202eeb3236438d": { "authors": [ { "ids": [ "40407899" ], "name": "Joseph McMahan" }, { "ids": [ "35129633" ], "name": "Michael Christensen" }, { "ids": [ "2382138" ], "name": "Lawton Nichols" }, { "ids": [ "24961975" ], "name": "Jared Roesch" }, { "ids": [ "10012224" ], "name": "Sung-Yee Guo" }, { "ids": [ "1735761" ], "name": "Ben Hardekopf" }, { "ids": [ "1784473" ], "name": "Timothy Sherwood" } ], "doi": "10.1145/3037697.3037733", "doiUrl": "https://doi.org/10.1145/3037697.3037733", "entities": [ "Algorithm", "Arbitrary code execution", "Binary file", "Control register", "Correctness (computer science)", "Embedded system", "End system", "End-to-end principle", "Field-programmable gate array", "Formal verification", "Functional programming", "Imperative programming", "Interference (communication)", "Lambda calculus", "Non-interference (security)", "Proof assistant", "Requirement", "Side effect (computer science)", "Trusted Computing" ], "id": "72b671d4415201f4e428179929202eeb3236438d", "inCitations": [ "8820bc7bd377c33f8b5fcea27eca16cd56a3faad" ], "journalName": "", "journalPages": "177-191", "journalVolume": "", "outCitations": [ "1eb6978391e96947ff0d3cfb25b82e66a211e691", "f797999b8213b277f726c27199602e9fde4181cf", "2f484f6f079faefef1a8acf26383ecdb019fd380", "1b0c3ef8e2eabf218549e9926b51a51044b7639d", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "5b88be1b60ccb2a5fe93a7fa5c32766959beb5d8", "001dc0e96172432d40f6a514e1b623e20d0381f8", "0fdbb387a89b5b461b8a7d410a91f7cf60c24fbd", "a0f11fca9446a364985cea2c449fcf2f9255ad41", "0025870ef15a8f2858ff4186329d4bde316e9e01", "5bfce0ba237cd3e48452299ba3c750c1ef870567", "8c822ab13d880b3cf1a8373887ed41a311399c67", "0215915cc57ab4a725d96f853eade1d80f8fc7d3", "61aad95b2a8741d16cc32963697bc52b003f5825", "0ed793ff37c5938ac5fe2630bdb52b564994c359", "9f658ec588acc8acbcad4ac45eaad65987f2760c", "2ae20abb4b3a02814d3a5f1c06474e8429ee51a8", "0e40421e9a839b5d51d584f3f821401239edfb50", "38063a333087ce7fb19fb4604c5df35498d59c52", "40f7030000e3c31501030cbb1ddd9bd1657bd64f", "0c515587e546ea2bdf9ac77eaf0d8bc578954443", "2769c203102a875c10bc11affc161891472176d1", "1d01390129ce2738a3529e442dac8d2c51fe499f", "1724ed039193961c572928c613f7163dbfd09b70", "0f5bd2edf5b1ce8815e34f6090d726c35d9331d5", "10493e0654d3ddf61bae0aeef5f0702f73aa186d", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "4c8ad20e8d682d9956dad6a68d2e2a022773a959", "4c3bf7d26f162c45428c9e5e0b54342bb37bbf3c", "1eb2cff1424202b628c509ada98d6e9f2144e9b0", "4730b7c91ba7fac1e392d5f635ed4592f6ad969a", "257fbdaba74d41aa0ec86781b2d307d01f1a0728", "e170759a280b86f92113ce801b8980ed13247aca", "641b9d0b1733e2c3a6989d6e555c9150432c56c1", "7f2210ff39ef9669f2a84db611c80c4b28f9fffc", "0737bb34b65af08f0e2e59f958858cc3c5a4385a", "4271680ae4d95b130426e165ad9e9d9b81d938cd", "0b39c9d81ed3eb6409259a797b4423f9a321906b", "fc64117e5d5ed5947a0c85c55597e4116d6e55c6", "141dcba8c52100e4ce036beda59e36e4570bdf52", "9ed99d39d78a04fc854f9f457b1cbab963801fbd", "15aaa56f06eca80760943e47f1781591209f2860", "1796693cac1375a236e657e6115d5b21d84a69db", "011f7da0095ac8c0d4477eeda2728e5f80a35767", "c4d8a849fcd89d6eef9a72a8d01cb25ed6946d97", "2c6533d714d8dc4d3f7faf418db93c38df642fea", "9cdd6da72a90e4fd786012b33f85efc16cdac0d3", "1191bd92762d5ac7c1d50f7591896a8eadab0a66", "5ac46b7c320aabe83eacb1a91c055939c1941dac", "2d6004a048de3d4a7d162345284f455960d2727f", "2c8f14ae08b39c3f71e3102db499e9fc87435d41", "195a49ba713beccf68ef358c8430333dba6cbc61", "12eeed063c2fe705463a3063afaccf83007bed17", "2245246f7866d4468e570cc1bc982fd45d20d031", "2b6df21137f30d25494bb58521a6062f93e915f8", "0796bc5e45078961e96614ee697a30787be7ac11" ], "paperAbstract": "Building a trustworthy life-critical embedded system requires deep reasoning about the potential effects that sequences of machine instructions can have on full system operation. Rather than trying to analyze complete binaries and the countless ways their instructions can interact with one another --- memory, side effects, control registers, implicit state, etc. --- we explore a new approach. We propose an architecture controlled by a thin computational layer designed to tightly correspond with the lambda calculus, drawing on principles of functional programming to bring the assembly much closer to myriad reasoning frameworks, such as the Coq proof assistant. This approach allows assembly-level verified versions of critical code to operate safely in tandem with arbitrary code, including imperative and unverified system components, without the need for large supporting trusted computing bases. We demonstrate that this computational layer can be built in such a way as to simultaneously provide full programmability and compact, precise, and complete semantics, while still using hardware resources comparable to normal embedded systems. To demonstrate the practicality of this approach, our FPGA-implemented prototype runs an embedded medical application which monitors and treats life-threatening arrhythmias. Though the system integrates untrusted and imperative components, our architecture allows for the formal verification of multiple properties of the end-to-end system, including a proof of correctness of the assembly-level implementation of the core algorithm, the integrity of trusted data via a non-interference proof, and a guarantee that our prototype meets critical timing requirements.", "pdfUrls": [ "http://cs.ucsb.edu/~sherwood/pubs/ASPLOS-17-zarf.pdf", "http://www.cs.ucsb.edu/~sherwood/pubs/ASPLOS-17-zarf.pdf", "http://www.cs.ucsb.edu/~jmcmahan/research/ASPLOS_2017_Zarf.pdf", "http://doi.acm.org/10.1145/3037697.3037733" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/72b671d4415201f4e428179929202eeb3236438d", "sources": [ "DBLP" ], "title": "An Architecture Supporting Formal and Compositional Binary Analysis", "venue": "ASPLOS", "year": 2017 }, "72b8219baf03b5a18653eadd5d724499d422ec29": { "authors": [ { "ids": [ "2321965" ], "name": "Guoyang Chen" }, { "ids": [ "40007793" ], "name": "Yue Zhao" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" }, { "ids": [ "38067595" ], "name": "Huiyang Zhou" } ], "doi": "", "doiUrl": "", "entities": [ "Computer multitasking", "Data center", "Entity Name Part Qualifier - adopted", "Graphics Processing Unit", "Graphics processing unit", "Preemption (computing)", "Programs - Publication Format", "Scheduling (computing)", "Smartphone", "Smartphone", "Software framework", "Throughput" ], "id": "72b8219baf03b5a18653eadd5d724499d422ec29", "inCitations": [ "1883eb486e44c4a61864f538d2f0e90dca8f45f9" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "14724c356106ae50746318b1bdd27d9b684c7d11", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "b04c9e851ae605592d693aa65f0d753b8af08feb", "23686b1e875f885631425cf5fa07cf5cbc31b705", "1a3bf65285f1e7b295ff0dec13e9a0c367e82b29", "68073f621072d793e95b9562bf9a9245415d5a96", "1b91fcb25a395a12e7b6bc49473f223ad47f869f", "34e2b75fd5717029fc9da92dd6160eb6e2d19ad9", "63af4355721f417bc405886f383af096fbfe51b2", "08d041581636f8eee888091b5539696d729f2bff", "16e57efb869966c49ad37dd56508a3b60f0f2985", "a7a24f882aec173c01a9ed1eb52589f71d6c80f8", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "10443d5d4f0e5048df514e581a9f364954158d00", "00156e79606084497789662dfaf59c3b54a10722", "755e4ad5468747b31b9d6994885b17ad957dc9d7", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "2954071739e1df663ee207e130465cb1789ae982", "4cc504da30fd273e12f28bc0cf573ff37f829f89", "347a08cd9ada1cee83713d24ec84ed49ab121987", "7a0cdc6a29b230908df2c54e584af62a7eed8d52", "1618f89bc0936ab14b8ec38905120d658014ed48", "064f38e5edef42cb5a37f2a350e4413e17132b11", "0c75806bfe62a119e1aa580327c2f8db01b898aa", "21e5ea3c252c84137efcb45cef1437bdcc15c773", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4", "109b416bdbf1739373638eb7e5b37f5d475fd40e", "cfeb833da2d3ca20adfc05a762b3f68cffa13416", "5c9fa798a510b66a37c1b0852582fd7735ed088e" ], "paperAbstract": "Modern GPUs are broadly adopted in many multitasking environments, including data centers and smartphones. However, the current support for the scheduling of multiple GPU kernels (from different applications) is limited, forming a major barrier for GPU to meet many practical needs. This work for the first time demonstrates that on existing GPUs, efficient preemptive scheduling of GPU kernels is possible even without special hardware support. Specifically, it presents EffiSha, a pure software framework that enables preemptive scheduling of GPU kernels with very low overhead. The enabled preemptive scheduler offers flexible support of kernels of different priorities, and demonstrates significant potential for reducing the average turnaround time and improving the system overall throughput of programs that time share a modern GPU.", "pdfUrls": [ "https://people.engr.ncsu.edu/xshen5/Publications/ppopp17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/72b8/219baf03b5a18653eadd5d724499d422ec29.pdf", "s2Url": "https://semanticscholar.org/paper/72b8219baf03b5a18653eadd5d724499d422ec29", "sources": [], "title": "EffiSha: A Software Framework for Enabling Efficient Preemptive Scheduling of GPU", "venue": "", "year": 2016 }, "72e1f260a83f48eaca7428a7bc417066d83af61e": { "authors": [ { "ids": [ "4631835" ], "name": "Mingzhe Li" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "1802958" ], "name": "Hari Subramoni" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/HiPC.2017.00017", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00017", "entities": [ "Address space", "Benchmark (computing)", "Cache (computing)", "Communications protocol", "High memory", "Implicit parallelism", "InfiniBand", "Memory address", "Overhead (computing)", "Paging", "Programming complexity", "RM-ODP" ], "id": "72e1f260a83f48eaca7428a7bc417066d83af61e", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "62-71", "journalVolume": "", "outCitations": [ "f271c36891cf15f9fda3d49b46b187ca8685bf48", "c02d7f5976d15df75c9c75f70240446a542e41dd", "192fec9d330de17828caba1d2a44983ca414c900", "029e03cd045b1fcda76e4c469eedfa0470c79624", "62b757b1a924f3386c33b8a988327e3749ab8a54", "5a9d21dafb62f5eb7bcd1b199b05247bbec26e0b", "09eee808ba9adeefa287324e7becac83a5827081", "5825199bfdacf86d2e1b10b5602b66e4e64240d6", "054aff204bfa1e499e2005ce1538224af3ad0506", "7ee529c7a72f7f228ba1e60011d5e1d5078730d6", "8c1eb7aed124413525731af39d0249ce3663588e", "184b12b7a3d1da8c6d4f59af2ba7e36cc5e2ef03", "9d4b56f72719ea4839f48ace91f4852778bd8209", "1e5ec3d788da1dbf7e02bc27024b43727b33bb6b", "cd1b19d016ef46429a868d99cda27d3565996a21", "7dda6789b0db46a2a985017e414ea778196bc180", "27e0a35c057d8e80a28ea5551e6b3757f7d491b0", "4224686929991f07f21ddcb1442dceb2df91913f", "59f043d927b7effc02e351e86e027fd2c997851e", "d120c8dbee3d94ff05d493b3a01a1e4be4f7d41e", "075d412b9c4f2cc6a8457c1ccacbf8f269dea60e", "2d4846b8cac3b3be04aa30ab95454568a1821257", "5d6167a03f4935d74cae680abed27800cf2fb364", "947c6bf534ccd620044f77c3bd6068f633b421fb", "544e01941a237ea8993d98cd11cf4a6af0ef40aa", "116fe21cdd8d72277061e16f4c6e243bc7061681", "287271a5073c4189a74c9709575ddb5e1a819da5", "51eb2dbd85db7dcf97937fe11ff3885b1598d1d6", "ca9ced836a12c546d6b542a2e7ffe0fc91f90557", "155997067af4570041162a4b95e4ce7621e0d022", "39979b40856636900eeed1fca84cc7b97b38d4c3", "366b90b6929c600d683c698c1c559579a5a08ce2", "29d9c1724230cbdfa635bf7ee969b813a843a2fd", "069bccf15b31ae944f960559ee272dfd309d24a9" ], "paperAbstract": "Modern high-performance communication runtime systems have taken advantage of advanced features on highperformance networks (e.g. InfiniBand) to deliver optimal performance. High-performance communication over InfiniBand typically requires the communication buffers to be registered first. However, buffer registration and deregistration are costly operations, which leads to performance degradation if they happen frequently. To hide this overhead, many existing communication runtime choose to design a high-performance registration cache to reduce the number of buffer registrations, but such type of designs still need some amount of buffers to be registered and cached, which leads to multiple issues such as performance overhead, high memory consumption for bookkeeping, and code complexity for maintaining the registration cache. To solve these issues, a recently introduced feature for InfiniBand called Implicit OnDemand Paging (ODP) is getting momentum. This feature enables one process to register its complete memory address space for I/O accesses. To fully take advantage of Implicit-ODP, it is critical to fully understand the behavior and benefits of Implicit-ODP on InfiniBand and performance/memory trade-offs it presents. This paper first presents an analysis of the Implicit-ODP feature and studies its basic performance with InfiniBand verbs-level micro-benchmarks. Then, we describe the design tradeoffs with Implicit-ODP and the various optimizations at MPI runtime. We propose and design communication protocols that can leverage the Implicit-ODP feature at the MPI level. The experimental results at the micro-benchmark level and application level show that our proposed design can deliver comparable performance to the existing pin-down scheme, while it does not need registration cache in the MPI runtime. To the best of our knowledge, this is the first work to study and analyze the Implicit-ODP feature and design a registration caching free MPI library with it.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00017" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/72e1f260a83f48eaca7428a7bc417066d83af61e", "sources": [ "DBLP" ], "title": "Designing Registration Caching Free High-Performance MPI Library with Implicit On-Demand Paging (ODP) of InfiniBand", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "72e809af6871830faf4b4d1d8060d0dd206c4097": { "authors": [ { "ids": [ "2473791" ], "name": "Buddhika Chamith" }, { "ids": [ "1897982" ], "name": "Bo Joel Svensson" }, { "ids": [ "1946692" ], "name": "Luke Dalessandro" }, { "ids": [ "31778078" ], "name": "Ryan Newton" } ], "doi": "10.1145/3062341.3062344", "doiUrl": "https://doi.org/10.1145/3062341.3062344", "entities": [ "ARM architecture", "Branch (computer science)", "Offset (computer science)", "Scalability", "System call", "X86", "X86-64" ], "id": "72e809af6871830faf4b4d1d8060d0dd206c4097", "inCitations": [], "journalName": "", "journalPages": "320-332", "journalVolume": "", "outCitations": [ "8b8d9dbe3e755cbbab950b6133b1cc11d8e08943", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "6fddbb1d2f1aff3a1103a633a67393491bbc0488", "4a250eef9f4897c47d28547b9e88327fb23dc7e9", "0e00a3e0b0120dcdb89f0ee03534643090235ff5", "e39fcd5ba07f0d703e7f2e589fc7c61ca4c4206d", "91607d7bc71823360de59b894ae37b4f1738bca0", "9a64e0c322f007af90fd61356bde7bc6de3642e2", "c9c7127fe315cc27c351dd3525fcba8356f0eda9", "3f6e88b1eb1e06cdd742b6037c4ef61477a41aaf", "0651d1863f3edac83e574e223f301be9ed564dd4", "31673f10c275afeb96dc7a5873da6f66b338f11a" ], "paperAbstract": "Existing techniques for injecting probes into running applications are limited; \n they either fail to support probing arbitrary locations, or to support scalable, \n rapid toggling of probes. We introduce a new technique on x86-64, called \n instruction punning, which allows scalable probes at any instruction. The key \n idea is that when we inject a jump instruction, the relative address of the jump \n serves simultaneously as data and as an instruction sequence. We show that this \n approach achieves probe invocation overheads of only a few dozen cycles, and \n probe activation/deactivation costs that are cheaper than a system call, even \n when all threads in the system are both invoking probes and toggling them.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062344" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/72e809af6871830faf4b4d1d8060d0dd206c4097", "sources": [ "DBLP" ], "title": "Instruction punning: lightweight instrumentation for x86-64", "venue": "PLDI", "year": 2017 }, "730892c721389d7d28cf35d8034f061172106d5f": { "authors": [ { "ids": [ "3021644" ], "name": "Ryan E. Grant" }, { "ids": [ "21573963" ], "name": "James H. Laros" }, { "ids": [ "1694952" ], "name": "Michael Levenhagen" }, { "ids": [ "2216287" ], "name": "Stephen Olivier" }, { "ids": [ "1785427" ], "name": "Kevin T. Pedretti" }, { "ids": [ "1769023" ], "name": "Lee Ward" }, { "ids": [ "2041092" ], "name": "Andrew J. Younge" } ], "doi": "10.1109/IGCC.2017.8323587", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323587", "entities": [ "Classification", "Hereditary pancreatitis", "Level of detail", "Out-of-band agreement", "Perf (Linux)", "Profiling (information science)", "RASSF5 wt Allele", "Recommender system", "Taxonomy (general)", "Workload" ], "id": "730892c721389d7d28cf35d8034f061172106d5f", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "008aa3bf67964c6058df797e8cd64ae909f53984", "ecebe33b29bfe4822c43c7485339e68309799914", "0ea8f85d507ab8220f920dc1ffe6574820dd0027", "efee61acb1847de685817b7d9bc1b6b095ef5026", "290d131b55046f5a4bbb908db8995ead8243e75a", "cfeac65a550b5c3b0fd8dfb373b53ed3784ae30f", "a4b5f11f2971a155d8c7a2d87430ec03be11a8ff", "ab602d672875244c42a75559ba98f5606c6c1554", "ccf4217174f87a05a947a054638c8de84371fe89", "152d12dc9f79a9c8b959bf0b1a8c22988ee7412d", "84939572147e3991706eaf5ccfcb07799620299f", "e60d1171e74c87495f5c6cffede388af271f9584", "23f4b96752fdb29b74b25f491e0df05a0701e6db", "b6b9ee1bfecc15143556fdf1933462e37c2ac0dc", "974a66c416f0b4ce0d0d28242ab767ce36757b90", "a320d0eae237b03a49386bddd65de07b23c0f89e", "00de82b9bbc7528ca6d089cb69f01a4fd3d64301", "6e5c8c274850d5e7e1a4d01a13b4d3d96d037227", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "415c7835aa18984d92086edbb9d9937fcdd0a6eb", "24bb5f66906421f42aff2d64dfa35b4beb3ead7a", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "18405589bf710c17bf645c874f8e728f70f41691", "05c079fd3b0892f5e5f39676f4dafac38a13b8c5", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "10710091dc04a2831ab9fbd45554e0ccf8089d93" ], "paperAbstract": "Advanced power measurement capabilities are becoming available on large scale High Performance Computing (HPC) deployments. There exist several approaches to providing power measurements today, primarily through in-band (e.g. RAPL) and out-of-band measurements (e.g. power meters). Both types of measurement can be augmented with application-level profiling, however it can be difficult to assess the type and detail of measurement needed to obtain insight from the application power profile. This paper presents a taxonomy for classifying power profiling techniques on modern HPC platforms. Three HPC mini-applications are analyzed across three production HPC systems to examine the level of detail, scope, and complexity of these power profiles. We demonstrate that a combination of out-of-band measurement with in-band application region profiling can provide an accurate, detailed view of power usage without introducing overhead. This work also provides a set of recommendations for how to best profile HPC workloads.", "pdfUrls": [ "https://cfwebprod.sandia.gov/cfdocs/CompResearch/docs/1503001054936-paper.pdf", "https://doi.org/10.1109/IGCC.2017.8323587" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/730892c721389d7d28cf35d8034f061172106d5f", "sources": [ "DBLP" ], "title": "Evaluating energy and power profiling techniques for HPC workloads", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "731a8751977510e2b26359ceec7005092a806cb3": { "authors": [ { "ids": [ "3080400" ], "name": "Abhinandan Majumdar" }, { "ids": [ "3295421" ], "name": "Leonardo Piga" }, { "ids": [ "2862953" ], "name": "Indrani Paul" }, { "ids": [ "2492556" ], "name": "Joseph L. Greathouse" }, { "ids": [ "39338994" ], "name": "Wei Huang" }, { "ids": [ "1752578" ], "name": "David H. Albonesi" } ], "doi": "10.1109/HPCA.2017.34", "doiUrl": "https://doi.org/10.1109/HPCA.2017.34", "entities": [ "Central processing unit", "Dynamic frequency scaling", "Dynamic voltage scaling", "Frequency scaling", "General-purpose computing on graphics processing units", "Graphics processing unit", "High-throughput computing", "Iteration", "Power management", "Run time (program lifecycle phase)", "Throughput" ], "id": "731a8751977510e2b26359ceec7005092a806cb3", "inCitations": [ "d0f85e1f8821df9444e0549d0333c5f3bc5fd304", "168f1f10f25a50916c161ed870e9d58e23cffa14" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "613-624", "journalVolume": "", "outCitations": [ "03be84163a3eea346a85f0de3297b23ebac477fb", "04a8986ea5df3d6c29fb21627ac1f51ccf68eb15", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "40c642c5631acead90651940b790ac48d23eb139", "1d286a264b233125b681e522e8f5fed596a8608c", "a5a95ad4b217cf5b2f1038753ba76fae94da1bec", "e4d25afc26388e74e74cfda91b004fae9452b7b8", "136ffe66f6bb69c5ad2537531373220c2c704b57", "67bf737ceccf387cdd05c379487da8301f55e93d", "62dab95957108bb92760f64e77a92293d3b80b2d", "296e9ce6735e1d78bb4e5a02b626329a09f00a39", "cf64cdc889a4edaf641a307aa2b11d89d4d10a09", "3198f80cef91c613231fe6ceb329f49562c71660", "1108af609469e420aeae551ba8a893c3200e07fa", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "6ac0c44e4e56583914de316346977c8461716141", "5c295e63d2ec53f2a9bd6a4539c27e93ac5957a6", "9dd84c0fb9f5b8863917c658349a061d08e3b141", "4e91c55bb217af534042941b0c2f5fb744758285", "cbcb3af4df4a23e06074a6fa52e532ccf3574257", "facd5eda3258e0f2ac90a611be625e9a10aa8a6a", "03385e04bf3df318ee9a94237e6b5e96b8663a0d", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "15e22435b37d2cb33422f7e35ac141cfe4a23c1a", "2c9662101750dd471c49176bd8ccf01fd6cb4ffb", "160343fe69c9e07083683b18b5315704a9891d3a", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "7232af3c4a3c4641ece9818d1c484260ce33a266", "3ee47780011ee618bd5a64624a662375e1958e0a", "251efd4676f5c14c5bcabf7eb480fb3a4d0a80cd", "1ae2386555655188154b1f89d0fef17d362f474b", "06d18be06791722e5efb82093a982e6cd3298618", "372e1535886f31b4e4b24a1365867e93f45545b7", "00156e79606084497789662dfaf59c3b54a10722", "4416052fca95270b50a29e9e3cc245cca8962861", "6d9bc0191062b39581a3d9f3f33f61c96d78f471", "58f9fe6efcd2ec6ae334675764ec995a131dc5c7", "6abf9d33fc37651238d5144413bb3ae280f4a29f", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "f359d33a1c09d2f626217e21f722508968c7057b", "14505c2bdd3822d7a62385121d28ba3eb36fea1d" ], "paperAbstract": "Modern processors can greatly increase energy efficiency through techniques such as dynamic voltage and frequency scaling. Traditional predictive schemes are limited in their effectiveness by their inability to plan for the performance and energy characteristics of upcoming phases. To date, there has been little research exploring more proactive techniques that account for expected future behavior when making decisions. This paper proposes using Model Predictive Control (MPC) to attempt to maximize the energy efficiency of GPU kernels without compromising performance. We develop performance and power prediction models for a recent CPU-GPU heterogeneous processor. Our system then dynamically adjusts hardware states based on recent execution history, the pattern of upcoming kernels, and the predicted behavior of those kernels. We also dynamically trade off the performance overhead and the effectiveness of MPC in finding the best configuration by adapting the horizon length at runtime. Our MPC technique limits performance loss by proactively spending energy on the kernel iterations that will gain the most performance from that energy. This energy can then be recovered in future iterations that are less performance sensitive. Our scheme also avoids wasting energy on low-throughput phases when it foresees future high-throughput kernels that could better use that energy. Compared to state-of-the-practice schemes, our approach achieves 24.8% energy savings with a performance loss (including MPC overheads) of 1.8%. Compared to state-of-the-art history-based schemes, our approach achieves 6.6% chip-wide energy savings while simultaneously improving performance by 9.6%.", "pdfUrls": [ "http://www.computermachines.org/joe/publications/pdfs/hpca2017_gpgpu_mpc.pdf", "http://www.csl.cornell.edu/~albonesi/research/papers/hpca17.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/731a8751977510e2b26359ceec7005092a806cb3", "sources": [ "DBLP" ], "title": "Dynamic GPGPU Power Management Using Adaptive Model Predictive Control", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "73371e0dd70b45d93cb50f27812fb5d971d34ff5": { "authors": [ { "ids": [ "2876310" ], "name": "Michihiro Koibuchi" }, { "ids": [ "26349492" ], "name": "Tomohiro Totoki" }, { "ids": [ "2567723" ], "name": "Hiroki Matsutani" }, { "ids": [ "34575333" ], "name": "Hideharu Amano" }, { "ids": [ "1766280" ], "name": "Fabien Chaix" }, { "ids": [ "1888437" ], "name": "Ikki Fujiwara" }, { "ids": [ "1707417" ], "name": "Henri Casanova" } ], "doi": "10.1109/CLUSTER.2017.33", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.33", "entities": [ "Deadlock", "End-to-end principle", "Error-tolerant design", "Experiment", "Flow control (data)", "Hot-potato and cold-potato routing", "Network topology", "Routing", "Simulation", "Soft error", "Telephone exchange", "Throughput" ], "id": "73371e0dd70b45d93cb50f27812fb5d971d34ff5", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "178-187", "journalVolume": "", "outCitations": [ "ab724df417d8913f053d01aa8e10b3267f0ab7d3", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "010a2d16eef8be8773ee2a73600f685ec0b2e371", "8384167f2b55c0f941eb2460c6a63bfcaa8165f3", "09944142043338de285575751861728d4212fd1d", "3e089d9b3669c213d35172e63a433774cabfe499", "e8ab15351038e776d9b8feaa96d9e3c74c9952e6", "fdc97d588371369d6207c3ed3d3be4106cbf3faa", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "0dd57dbc7e47ed7e27affd8d289585005d4d62a5", "0371f9e3efbcd4829b5ffbff585155746ef05284", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "c4367a8a5ef066026bc7b6bb29cf4790752ae7b6", "8dae031e5163375f857e222e90478bf668cfff35", "2dd8c1b9473bfe7a4a23902b78669d6a762f928c", "2c701d28226ff573c48ccaeb2fc355b5f76638d0", "5f8991828def57d2f0cda942566afff56740d150", "068238b4d2818acd5b2c5ee74b86c5bd90d33ba1" ], "paperAbstract": "Designing low-latency network topologies of switches is a key objective for next-generation large-scale clusters. Low latency is preconditioned on low hop counts, but existing network topologies have hop counts much larger than theoretical lower bounds. To alleviate this problem, we propose building network topologies based on uni-directional graphs that are known to have hop counts close to theoretical lower bounds. A practical difficulty with uni-directional topologies is switch-by-switch flow control, which we resolve by using hot-potato routing. Cycle-accurate network simulation experiments for various traffic patterns on uni-directional topologies show that hot-potato routing achieves performance comparable to that of conventional deadlock-free routing. Similar experiments are used to compare several uni-directional topologies to bi-directional topologies, showing that the former achieve significantly lower latency and higher throughput. We quantify end-to-end application performance for parallel application benchmarks via discrete-even simulation, showing that uni-directional topologies can lead to large application performance improvements over their bi-directional counterparts. Finally, we discuss practical issues for uni-directional topologies such as cabling complexity and cost, power consumption, and soft-error tolerance. Our results make a compelling case for considering uni-directional topologies for upcoming large-scale clusters.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73371e0dd70b45d93cb50f27812fb5d971d34ff5", "sources": [ "DBLP" ], "title": "A Case for Uni-directional Network Topologies in Large-Scale Clusters", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "73395a3afc1ccb139d959b3b9f3c7db40c4235e7": { "authors": [ { "ids": [ "32523236" ], "name": "Armin Moharrer" }, { "ids": [ "1776006" ], "name": "Stratis Ioannidis" } ], "doi": "10.1109/ICDM.2017.41", "doiUrl": "https://doi.org/10.1109/ICDM.2017.41", "entities": [ "AdaBoost", "Algorithm", "Convex hull", "Convex optimization", "Data mining", "Design of experiments", "Distributed computing", "Frank\u2013Wolfe algorithm", "Machine learning", "MapReduce", "Optimization problem", "Parallel computing", "Program optimization", "SPARK", "Software deployment" ], "id": "73395a3afc1ccb139d959b3b9f3c7db40c4235e7", "inCitations": [ "12cd2f5720902ff2366e1dec44b0b8c782b25b48" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "317-326", "journalVolume": "", "outCitations": [ "0648cde87ed6b799ab9ae1146dc9a1d5a77f10b7", "627a3b416f3c49f29f2846b8b5a5a82e0858fd0a", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "ca15802f99d3cac8d910e145163099fd91fa1be3", "15b233873adbdc3a296bf041fb9df8ddd6a81216", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "8015f2aa20a9f75802ec13cd8f24881a929d20e2", "4fd399297f50035d66ad6907f34c79e09561b429", "32b88a14a317c63e367d0f22cb38ee38767db3a2", "0df2b754298a40ddd26351a155ea8c66b7f66513", "d611db34db098729a6550d2c5c3fede87c745909", "1b5695d37e8e10ee0a2657550e98bf9e6719c42a", "abbdb6177b4408c5885a569dc24e6361f91cf169", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "0541d5338adc48276b3b8cd3a141d799e2d40150", "013f1b7d6163b980ed33a9439471b4f1a92c3365", "5d3a0eba8e82853c28e315003e131811ebd18143", "7f52958e275b9744e3680d2f1b1b913cad185a7c", "481f5d0e7ac29a509891e40aa6644519aa20cd34", "3cd9b0a61bdfa1bb8a0a1bf0369515a76ecd06e3", "16b3f9790d37035faf5837ac68661c6df13a9dcb", "1e4370ac3d25151cc1f358fbd329e3864b990bd3", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "30e94e24d67994c5a8e2f20f852a51d28a720de2", "59cdcd315df6a03927222367c77635e19e21abe7", "f4dc9be2d4469d70a6394616f5778964b0c94181", "42b6ea0bbc174ce1ecb116aff6280559545dd6db", "630f710aece5e64563de7fb27ff8a7c6a20384d1", "0ac9a4ac173521873d31d1edd5d00836cfb07297", "2f099e793bbeeb1750be87f11e03f9b7462f5c63", "0144941d255dad89d3d90c2d131a15cc01df9829", "2d66dac85b819503ce6d311d37770e31bdf36692", "1d958756dd4e69ee44e8307add6b4786e8aab1d6", "0f3fd2233b51ec5cbbb46451f1f76996d7493450", "4954fa180728932959997a4768411ff9136aac81", "280d632ef3234c5ab06018c6eaccead75bc173b3", "8650a37500346c52cae22f9180af67a50c715f89" ], "paperAbstract": "Large-scale optimization problems abound in data mining and machine learning applications, and the computational challenges they pose are often addressed through parallelization. We identify structural properties under which a convex optimization problem can be massively parallelized via map-reduce operations using the Frank-Wolfe (FW) algorithm. The class of problems that can be tackled this way is quite broad and includes experimental design, AdaBoost, and projection to a convex hull. Implementing FW via map-reduce eases parallelization and deployment via commercial distributed computing frameworks. We demonstrate this by implementing FW over Spark, an engine for parallel data processing, and establish that parallelization through map-reduce yields significant performance improvements: we solve problems with 10 million variables using 350 cores in 44 minutes; the same operation takes 133 hours when executed serially.", "pdfUrls": [ "http://www.ece.neu.edu/fac-ece/ioannidis/static/pdf/2017/fw.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.41" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73395a3afc1ccb139d959b3b9f3c7db40c4235e7", "sources": [ "DBLP" ], "title": "Distributing Frank-Wolfe via Map-Reduce", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "733f49039d1ce077a116d03633e3338125f33196": { "authors": [ { "ids": [ "39071506" ], "name": "Christopher Morris" }, { "ids": [ "1746871" ], "name": "Kristian Kersting" }, { "ids": [ "1724469" ], "name": "Petra Mutzel" } ], "doi": "10.1109/ICDM.2017.42", "doiUrl": "https://doi.org/10.1109/ICDM.2017.42", "entities": [ "Algorithm", "Database", "Experiment", "Graph database", "Graph kernel", "Graph property", "Kernel (operating system)", "Load (computing)", "Provable prime", "Rademacher complexity", "Time complexity" ], "id": "733f49039d1ce077a116d03633e3338125f33196", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "327-336", "journalVolume": "", "outCitations": [ "114ea414b025a68d641efad9b74295a5625b9e7e", "c53062136a610a9a2e8e9d8b3d7fd1f7ad701e03", "54573093a5188f5970a140c8014ffc1c498c3120", "16a0ff94b980d0ad9cd5ae3b9a3769c6f358842a", "1845a560ef99f127418aa42acdd737a1975875b6", "1a9154d9718ae255a614ba32bcd5d248c0df4924", "03cf66a2ea4f407447f28ab12b7d262a92a39460", "9f5c802e44c1076c418f1bff7f266983fe1da577", "1efdad6f91e830fd64306e4625f74191b05ef9c4", "7e688b260483badfed211273b7da2a4bfea290cd", "3ce05152dbedab572167e031b90d677c13b49767", "14e5699840eccea7fdda22f4824ccce311d828fd", "2a1471dd22a2585b5855b02b6886958aa827c941", "2dfd92c808487049ab4c9b45db77e9055b9da5a2", "5caae7552b47f8ff02eb56c596df770a4477accc", "619cdd400f94702638fbb64eca63f36289b78d81", "207e83d1deb0db8e6f60a63cb1b5ebdf64c05a15", "67871f8153926f0b665f1779430c58f361e4a573", "c6559b982277c352a357d28e492e5c38d65489bb", "18cc6e16a148d48138621534088c4f26303c09cc", "b117e3ab62ce5a405428bd3b9f76afd1293e0c85", "b484849ccbe1b1d4c9f4e8c4654fa9b94c9536c1", "0f16f6f478b5c788dce466eb50e36c612273c36e", "83b8b21842bf4257f55f75d86aadfb964ec41096", "e8151a322b62a37b03c105d0033c3775b00f1ee1", "0752ddea24948711028077604eabd6c16fe972d0", "6a92a251be77883b26a535c69ebbc872d0479edc", "5987ac0ae8e468a402e3c2513e7779af1ab19591", "1b7d19d5d1922afe48b37eac46e98abd8ed807ea", "b6050484c97a6ff077fba0bb5eb066214a0df193", "26d51c7b5d666abda8b45eb1c942bbc3e0ea05f2", "24c9b0b05c5e957e255b854f947472f9181772a4", "7a10f6a406b664d1159e7c4fefbdd6ac275aee53", "007665be9c8d19d1d67ba466577953ae071d03b9", "7e1874986cf6433fabf96fff93ef42b60bdc49f8", "7cd2eacc36df106e2be7757872ef5d06f6ec36f4", "b86802dea6052b827d40dc55d2e34bc76e9125ae", "3e615ef61863110fcfc1a4c97dff923df66d3476", "8aaedff7c085b376b17d273a4fbd7b6c5b5953b7", "66a78637403a0f8f606c2ba24454a140ff1de3fe", "06824a3be85f5881d85cb1cc578a6cd4d54ee3a8", "79ba8bcfbf9496834fdc22a1f7c96d26d776cd6c", "34e89bf1beb0897e851a7c79bb71aa6fc8a7d3fb", "2c972f5a08e3dedf37f45b35ce17ae80c2139c1c", "e8f16ec1024a6cffbb4e0d57529e9432207d4a5c", "10e970f9747d98f79e2557ceba178dc4ca9ed754", "0b7e7da8149733462998992aad3254abadd3c97c", "716b168d1a6e1ea62b9cd534e601772b89962a7c", "0f3d2a17809f999cd4ab9d97fd5eb71086580685", "427b168f490b56716f22b129ac93aba5425ea08f", "855733917a479176f106581d1ad64d5bef85ff74", "20460b11db27bb41b3504044c334cb5b1e42c9b5", "21559ee898e0969f21ed4c0381b48e6f40002040", "94ec7959601e9da5e5ba5d7da8ebeabb1f2d613b" ], "paperAbstract": "Most state-of-the-art graph kernels only take local graph properties into account, i.e., the kernel is computed with regard to properties of the neighborhood of vertices or other small substructures. On the other hand, kernels that do take global graph properties into account may not scale well to large graph databases. Here we propose to start exploring the spacebetween local and global graph kernels, so called glocalized graph kernels, striking the balance between both worlds. Specifically, we introduce a novel graph kernel based on the k-dimensional Weisfeiler-Lehmanalgorithm. Unfortunately, the k-dimensional Weisfeiler-Lehman algorithm scales exponentially in k. Consequently, we devise a stochastic version of the kernel with provable approximation guarantees using conditional Rademacher averages. On bounded-degree graphs, it can even be computed in constant time. We support our theoretical results with experiments on several graph classification benchmarks, showing that our kernels often outperform the state-of-the-art in terms of classification accuracies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.42" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/733f49039d1ce077a116d03633e3338125f33196", "sources": [ "DBLP" ], "title": "Glocalized Weisfeiler-Lehman Graph Kernels: Global-Local Feature Maps of Graphs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "73d6eab602594684d5626fee5b2c8b170c3aa076": { "authors": [ { "ids": [ "2367833" ], "name": "Yongqiang Wang" }, { "ids": [ "2069412" ], "name": "Chase Qishi Wu" }, { "ids": [ "2423828" ], "name": "Aiqin Hou" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.63", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.63", "entities": [ "Algorithm", "Big data", "Heuristic", "Profiling (information science)", "Provisioning", "Scheduling (computing)", "Simulation", "Throughput" ], "id": "73d6eab602594684d5626fee5b2c8b170c3aa076", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "482-489", "journalVolume": "", "outCitations": [ "895e17fe6dc4e47c2bd722366f4c1425a552a6ae", "8f12fb737b938a388c298b73ba50173dfd349fef", "cead53d1ef22c3451e3b770ab8b3472bb6b5249e", "5bb86870fcee82b40ed2a260d6b4a2347d57ac5e", "4f42d1fc1a2e822d08e99f97c8bf1b4429919be9", "3051270ff39347dee663ade150d52a3ac2a7be21", "9e86cf0478d79b92b8a0e859129d68d3455ac263", "ab6998c7d96214915007184b88db232441493664", "ddf4b9d001b5cb3d9416bbb6b308ac94e011daaf", "c32f62c591f50b0c1ca9dce965218fcd41383687", "23dadf25f3efacbc9c66f69093d656ad5b003529" ], "paperAbstract": "An increasing number of scientific applications carry out big data transfer through dedicated networks for global collaboration, where bandwidth scheduling plays a critical role in improving the utilization of network resources and meeting diverse user requests. In this paper, we formulate a periodic bandwidth scheduling problem to maximize the number of satisfied user requests for profiling-based floating-window bandwidth reservations under deadline constraint on a network path, referred to as PFWR-DC. We prove that PFWR-DC is NP-complete, and propose an integrated bandwidth scheduling algorithm based on the product of floating-window size and dynamic transport throughput, referred to as ProductWT-BS. Extensive simulation results shed light on the performance superiority of ProductWT-BS in terms of scheduling success ratio over five heuristic algorithms designed for performance comparison. The proposed scheduling algorithm has great potential to improve the performance of collaborative scientific applications that require the floating-window bandwidth provisioning service for coordinated network-based operations.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.63" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73d6eab602594684d5626fee5b2c8b170c3aa076", "sources": [ "DBLP" ], "title": "Periodic Scheduling of Profiling-Based Floating-Window Bandwidth Reservations for Scientific Collaboration", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "73d7a34d7303e47bf87badd70d3fa102d23fed9f": { "authors": [ { "ids": [ "37264036" ], "name": "Ari Rasch" }, { "ids": [ "2676488" ], "name": "Michael Haidl" }, { "ids": [ "1707325" ], "name": "Sergei Gorlatch" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.9", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.9", "entities": [ "Application domain", "Auto-Tune", "Interdependence", "Program optimization", "Programmer", "Programming language", "Search engine optimization", "Self-tuning", "Spring Framework" ], "id": "73d7a34d7303e47bf87badd70d3fa102d23fed9f", "inCitations": [ "fac8fb5456729f8b7747e3574761d7d7920d6789" ], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "64-71", "journalVolume": "", "outCitations": [ "f888140129fa54b5d3f4d75fe63f3c4b9ea6dfd3", "1ccaac0fdcc5ab37a45d0cc616feeaa67a3d4ca1", "0996181a15b977e4801bc4d65636a5f97e295032", "1601727e9d919af14b4319b156657b08e73fab0b", "6a4620589f63f3385707d2d590f7b7dc8ee4d74f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "220f5b0e74c7f1e71d6e23da672dcffbc9e6520a", "1ac19f434c742202451da7c44591c52ad3f9e9fd", "1e375b7bd9b02336371dbbb06bee4a94b2a93fc8", "782d8591afd432a9b2bfe21553a4158a39cb9d1f", "12f1a2a510a4e86ecd75c8081a78620c71822f99", "2a4423b10725e54ad72f4f1fcf77db5bc835f0a6", "1c6477bc1b1c7b3767624be6d286d382ce05c211" ], "paperAbstract": "We describe the Auto-Tuning Framework (ATF) — a novel generic approach for automatic program optimization by choosing the most suitable values of program parameters, such as number of parallel threads, tile sizes, etc. Our framework combines four advantages over the state-of-the-art autotuning: i) it is generic regarding the programming language, application domain, tuning objective (e.g., high performance and/or low energy consumption), and search technique; ii) it can auto-tune a broader class of applications by allowing tuning parameters to be interdependent, e.g., when one parameter is divisible by another parameter; iii) it allows tuning parameters with substantially larger ranges by implementing an optimized search space generation process; and iv) its interface is arguably simpler than the interfaces of current auto-tuning frameworks. We demonstrate ATF's efficacy by comparing it to the state-of-the-art auto-tuning approaches OpenTuner and CLTune, showing better tuning results with less programmer's effort.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73d7a34d7303e47bf87badd70d3fa102d23fed9f", "sources": [ "DBLP" ], "title": "ATF: A Generic Auto-Tuning Framework", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "73f0b0a0f82825fe5129cfc1115df98b2d5acdf8": { "authors": [ { "ids": [ "39738118" ], "name": "Xiaoli Li" }, { "ids": [ "2538581" ], "name": "Jun Huan" } ], "doi": "10.1145/3097983.3097994", "doiUrl": "https://doi.org/10.1145/3097983.3097994", "entities": [ "Algorithm", "Data assimilation", "Machine learning", "Programming paradigm", "Social constructivism", "Synthetic data", "Theory" ], "id": "73f0b0a0f82825fe5129cfc1115df98b2d5acdf8", "inCitations": [ "6c39ded46031a904e3448d29e71a8482e0aa2801", "014ed5f17c896288d23b968854df4d4bcff3e96b" ], "journalName": "", "journalPages": "285-294", "journalVolume": "", "outCitations": [ "2968ad0b5d53b9bbe44b516304fda2d6145f9a5d", "4273e56d1c82a555815b84aff28cdf79fe42e8f9", "6103a6435b84f43b6e8ba335717c74f8cb5f4960", "df8ba4aa40f84572f5aeb93bca0a76530bd2856c", "096f2c35689b8f1850695b614ae3bb7f95060944", "6efebd4cb8cb39da348e6818b94c680d3cfe198c", "0cc681296eba487fce17892d0fb31dbe4f6a631e", "7995537d2eb717a9eb3cea6a0a37ac8ad238d248", "b1337f8ea9870856ab831960918a9fa4b9ca047c", "5636dca44384240ce9aff2b10b78458cd3c2f450", "8efbe7027ad7ddbf88213f8fadd508ab41385115", "1bb662deceaf9fa941030987675cd98251b51a70", "d96c99d961bfda7704062369169a6279546bda0b", "51194f3d980d05cf25c34cec9367a7605703692b", "7c04d879bf37ace4c4b9e151dc5de3928d9dd698", "147570a4736ddf6167d471d2bf43db1f78703812", "2111cbedefd934ebe61fb86e1d7cb8edae85110c", "28e0707528f78fecb26fe7f003d94f6a5de32b98", "1fd2ec347cd440edec4b59fc863c95065a9e9a27", "07aecf899e441d88ae94a19d87f72203c5d5eaca", "96be6d38cfacdb416ac792428374d84875c41e81", "89c808af926ecb20870b2521fbaa7dcbb85be106", "39d1a98ecfeefc6d37cc86c8d281bc1cb7734939", "342bf4460eb3f5e7494c1ca8d6c4cfd4b3fca33b", "6e7933eaa84ed6deedaf43aa6e4e67b786e8f7b4", "d7c028b2937387addb55acba6f0dd686f0d91624" ], "paperAbstract": "Developing transparent predictive analytics has attracted significant research attention recently. There have been multiple theories on how to model learning transparency but none of them aims to understand the internal and often complicated modeling processes. In this paper we adopt a contemporary philosophical concept called \"constructivism\", which is a theory regarding how human learns. We hypothesize that a critical aspect of transparent machine learning is to \"reveal\" model construction with two key process: (1) the assimilation process where we enhance our existing learning models and (2) the accommodation process where we create new learning models. With this intuition we propose a new learning paradigm, constructivism learning, using a Bayesian nonparametric model to dynamically handle the creation of new learning tasks. Our empirical study on both synthetic and real data sets demonstrate that the new learning algorithm is capable of delivering higher quality models (as compared to base lines and state-of-the-art) and at the same time increasing the transparency of the learning process.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097994" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73f0b0a0f82825fe5129cfc1115df98b2d5acdf8", "sources": [ "DBLP" ], "title": "Constructivism Learning: A Learning Paradigm for Transparent Predictive Analytics", "venue": "KDD", "year": 2017 }, "73f5ef6bfee715505726aae6f4330b5372215a71": { "authors": [ { "ids": [ "1765729" ], "name": "Hovhannes A. Harutyunyan" }, { "ids": [ "3088722" ], "name": "Meghrig Terzian" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.20", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.20", "entities": [ "Algorithm", "End-to-end encryption", "Multi-core processor", "Multicast", "Routing", "Simulation", "Single-core" ], "id": "73f5ef6bfee715505726aae6f4330b5372215a71", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "154-161", "journalVolume": "", "outCitations": [ "e7d9b9f7e48bef4a53c7cf60e3df67f8782307ff", "b8f713cd75dc87c68b5b33e33a0154fc47840754", "231a16918701b907cd5dbb42931cd422c08a32da", "82f39a0c3b5d2ef45253520dbb6c70710e9ef972", "7b042258f3b8d4f32cb200e714e9279c4a377cf5", "3be60e25d2c1a250aa00972c1991832b45d20c89", "6ce56d9bfb1be20fe0962548e13e93e195e45ece", "ef20f7d580ace329654763f2c81fac843eba0f1e", "4509711a991665e8dbb5cae730592471553d0e3d", "44ceb02ab6dc1fe1e9f480dd00e3228256459022", "e56b0c2cf71c30568f0c781464e0f4901d7db238", "2a4c5b19731a648f5a21b3c3b2dfe9b6270bccd8", "78b7daa5eb063a13dab9ff82fe67598d04ddbc7d", "e74551b8449ac0a8e9aa696c38a4843b4d131466", "04eede31709515229b0dccf3a67d563de982e198", "91f8de8de04cc4fce3521bb168a2bf0160619635", "3728c927d6599c6521100821038dab8acfa8555a" ], "paperAbstract": "Multicast communication constrained by end-to-end delay and inter-destination delay variation is known as Delay and Delay Variation Bounded Multicast (DVBM). These constraints make the multicast communication real-time. In this paper, we propose a multi-core multicast approach to solve the DVBM problem. The proposed three-phase algorithm, Multi-core DVBM Trees (MCDVBMT), semi-matches group members to core nodes. The message is disseminated to group members using trees rooted at the designated core nodes. Simulation results show that when existing single-core based algorithms fail to construct a tree satisfying both constraints, MCDVBMT succeeds using multiple cores with better inter-destination delay variation and traffic concentration. However, the cost and the end-to-end delay of the communication is higher than that of single-core algorithms.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/73f5ef6bfee715505726aae6f4330b5372215a71", "sources": [ "DBLP" ], "title": "A Multi-core Multicast Approach for Delay and Delay Variation Multicast Routing", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "742ab97db9e924e0bc7f3a822e5c9d6ab74af0d6": { "authors": [ { "ids": [ "1683962" ], "name": "Gianluca De Marco" }, { "ids": [ "1678996" ], "name": "Grzegorz Stachowiak" } ], "doi": "10.1145/3087801.3087831", "doiUrl": "https://doi.org/10.1145/3087801.3087831", "entities": [], "id": "742ab97db9e924e0bc7f3a822e5c9d6ab74af0d6", "inCitations": [], "journalName": "", "journalPages": "391-400", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087831" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/742ab97db9e924e0bc7f3a822e5c9d6ab74af0d6", "sources": [ "DBLP" ], "title": "Asynchronous Shared Channel", "venue": "PODC", "year": 2017 }, "7481121f28a7277b2ed204af1e819e60a813af63": { "authors": [ { "ids": [ "2621201" ], "name": "Apoorv Gupta" }, { "ids": [ "25017988" ], "name": "Aman Bansal" }, { "ids": [ "31925906" ], "name": "Rishab Gupta" }, { "ids": [ "15082586" ], "name": "Deepika Naryani" }, { "ids": [ "2236810" ], "name": "Apoorvi Sood" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.6", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.6", "entities": [ "Artificial neural network", "Denial-of-service attack", "Electron mobility", "Enigma machine", "Hall effect", "Neural Networks", "Smart city", "Water cooling" ], "id": "7481121f28a7277b2ed204af1e819e60a813af63", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "42-49", "journalVolume": "", "outCitations": [ "85f87d2ba03835cb489b4606ddcdf76fa2646cfa", "f22f6972e66bdd2e769fa64b0df0a13063c0c101", "a5dc8a7844825ea41a6298c69b7de1df9106669d", "ec8aa0edfb1a85874dc9e7184c2145ad5597ab75", "2b00e526490d65f2ec00107fb7bcce0ace5960c7", "05b556b0573373020adcb2ba1c71d72d9dcf95a5", "01f78bfd5c2136629f369e4b495bbfd9221ea7c1", "6e9b6b380ec19dacb99f0a438bb79c7d1abcf078" ], "paperAbstract": "In many developing countries, rapid urbanisation of cities coupled with the lack of proper urban planning has made various streets susceptible to waterlogging during heavy rains. This severely affects the traffic movement across an entire city and leads to disruption in work. Since an important component of smart cities is developing an efficient urban mobility system, the authors have developed a method which helps in detection of areas prone to waterlogging and prediction of severity of waterlogging in these areas in the future. The areas susceptible to waterlogging are detected with the help of elevation of the area and the past travel time data. Elevation of an area is an indicator of the level or height of an area, so the low-lying areas are more prone to accumulation of water when it rains. Similarly, the past travel time data of an area also serves as a measure to find out the intensity of water logging as the larger the accumulation of water in an area, the more is the travel time. The past data pertaining to waterlogging severity in an area with respect to parameters such as the amount of rainfall and day of the week is used to train a neural network, which is then used to predict the possibility of waterlogging and its intensity in that area in the future.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.6" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7481121f28a7277b2ed204af1e819e60a813af63", "sources": [ "DBLP" ], "title": "Urban Waterlogging Detection and Severity Prediction Using Artificial Neural Networks", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "74944a6253c74d0f160996db5233960257c0815b": { "authors": [ { "ids": [ "12212141" ], "name": "MohammadReza HoseinyFarahabady" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" }, { "ids": [ "1699399" ], "name": "Zahir Tari" } ], "doi": "10.1109/CLUSTER.2017.21", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.21", "entities": [ "Best-effort delivery", "Central processing unit", "Interference (communication)", "Limiter", "Optimization problem", "Parallel computing", "Particle swarm optimization", "Program optimization", "Provisioning", "Quality of service", "Requirement", "Resource contention", "Stream processing", "Swarm intelligence" ], "id": "74944a6253c74d0f160996db5233960257c0815b", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "137-146", "journalVolume": "", "outCitations": [ "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "fcd61f5508b15f7369258ae0d0db01eacf399f7b", "fbafa098c81a5c5b7f5dd5ef0d985f96009c91a0", "2831b316e1f669f456aad81d1a51f0fd0bd44fde", "478fbef8568a021c3d91c13128efa19ad719dd88", "2f7f5d0e989c74d6279e2620e10e8d0b0c021cb7", "15a51835f498825a82cfd8604e81d5cf41462e4d", "2f919f99bf5b6d5667968c318b62d7335814ceff", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "1fad99d25da3a4894915941752c6fb50dcd2d8ae", "7ec834a37986fd8206a16a4f0143ad68cb0e58bb", "3e19046c665867bbe557685da60738a40738010a", "9655b25a85ea2fd1b50cd9eb3c4e298aa15bb012", "ad62c65d2c5d626f32ae9c5214d3d4b88348950b", "d72515ff5c534a919bc55a1770d69b1c8298b3f7", "6109c80c0314e458c426e63e2971221fd3108c91", "544afa259d6dfe0214f029a9fa515dd0482dbba2", "bf82f0b0cf448b18fec979d25368c6cd9c04ce0c", "8ff24b26e23ced3f93893d58687c74282f18d73a", "b1582e2a952924fecb86a8baa7f5e90902c3bbfb", "4f17119eaa541f64f6ae9be2a0b6e30de70fe421", "6560d5255cfa785b89b5b7e851b78db5fefeb2fd", "4a520c1818fc7ca560331234f6bee68d4d8bd302", "0ef1dd03db41de69165075562a051021a186c230", "0fc6c9dcd7a850e7a0d3796d32e6771353154fd9", "034e026c17a03107ff1bd92f90413e7911108df6", "1833dee660500dd104ca84d99600b70c2479ba3c", "689daac32ba52ad5d72178fd4d5e093fb9501132", "070f03f5e8b016628903c56a0011e810d0914d73", "dc2df2048d8da76627d7c821b72d6a306037fd25", "8e4b845da0ad9d106a0a7c46a9335b972ef2b187", "7a978f2902460e732c50c36a171deb11733df1fc" ], "paperAbstract": "This paper addresses the shared resource contention problem associated with the auto-parallelization of running queries in distributed stream processing engines. In such platforms, analyzing a large amount of data often requires to execute user-defined queries over continues raw-inputs in a parallel fashion at each single host. However, previous studies showed that the collocated applications can fiercely compete for shared resources, resulting in a severe performance degradation among applications. This paper presents an advanced resource allocation strategy for handling scenarios in which the target applications have different quality of service (QoS) requirements while shared-resource interference is considered as a key performance-limiting parameter.To properly allocate the best possible resource to each query, the proposed controller predicts the performance degradation of the running pane-level as well as the window-level queries when co-running with other queries. This is addressed as an optimization problem where a set of cost functions is defined to achieve the following goals: a) reduce the sum of QoS violation incidents over all machines; b) keep the CPU utilization level within an accepted range; and c) avoid fierce shared resource interference among collocated applications. Particle swarm optimization is used to find an acceptable solution at each round of the controlling period. The performance of the proposed solution is benchmarked with Round-Robin and best-effort strategies, and the experimental results clearly demonstrate that the proposed controller has the following advantages over its opponents: it increases the overall resource utilization by 15% on average while can reduce the average tuple latencies by 14%. It also achieves an average 123% improvement in preventing QoS violation incidents", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/74944a6253c74d0f160996db5233960257c0815b", "sources": [ "DBLP" ], "title": "QoS- and Contention- Aware Resource Provisioning in a Stream Processing Engine", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "74c88d63b94e3f24d1ee97b8bd8c69cbdd297725": { "authors": [ { "ids": [ "2724204" ], "name": "Colin Cooper" }, { "ids": [ "2526035" ], "name": "Tomasz Radzik" }, { "ids": [ "2242239" ], "name": "Nicolas Rivera" } ], "doi": "10.1145/3087556.3087564", "doiUrl": "https://doi.org/10.1145/3087556.3087564", "entities": [ "Conductance (graph)", "Connectivity (graph theory)", "Degree (graph theory)", "Graph (discrete mathematics)", "Moore neighborhood", "PODC", "SPAA" ], "id": "74c88d63b94e3f24d1ee97b8bd8c69cbdd297725", "inCitations": [], "journalName": "", "journalPages": "305-312", "journalVolume": "", "outCitations": [ "2145b40e0095d376854d61780278b1d0ebf3f0c1", "24d4c9c35b14a5e48dfce323da5c0803afbca1b5", "37030e618f7caa7a8c3fec3454fb0d43915002a4", "b8112bde00dcf02eb7b44fb2d22e648ff8006873", "13f008360c48e279afbaa9335155a4ea54b9da31", "7423137dd23b0044698fe9f3554fea8a6beb776a", "1552ae1bbacc56f0bfc7d43ade48746ef7c88386", "25ba3600de62f0a83bd330326284b757fce60abe" ], "paperAbstract": "We present improved bounds on the cover time of the coalescing-branching random walk process COBRA. The COBRA process, introduced in [Dutta et al., SPAA 2013], can be viewed as spreading a single item of information throughout an undirected graph in synchronised rounds. In each round, each vertex which has received the information in the previous round (possibly simultaneously from more than one neighbour and possibly not for the first time), 'pushes' the information to b randomly selected neighbours. The COBRA process is typically studied for integer branching rates b \\ge 2 (with the case b=1 corresponding to a random walk). The aim of the process is to propagate the information quickly, but with a limited number of transmissions per vertex per round.\n The cover time of COBRA is defined as the expected number of rounds until each vertex has received the information at least once. Our main results are a bound of O(m + (d_{\\max})^2\\log n) = O(n^2\\log n) on the COBRA cover time for an arbitrary connected graph with n vertices, m edges and the maximum vertex degree d_{\\max}, and a bound of O((r^2 + r/(1-\\lambda)) \\log n) for r-regular connected graphs with the second eigenvalue \\lambda. Our bounds improve the O(n^{11/4}\\log n) and ((r^4/\\phi^2)\\log^2 n) bounds shown in [Mitzenmacher et al., SPAA 2016], where \\phi is the conductance of the graph, and complement the O((1/(1-\\l))^3 \\log n) bound shown in [Cooper et al., PODC 2016]. We obtain our bounds by analysing the process called Biased Infection with Persistent Source (BIPS), which was introduced in [Cooper et al., PODC 2016] as a dual process for COBRA.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087564", "https://kclpure.kcl.ac.uk/portal/files/73244955/COBRA_SPAA2017_proceedings.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/74c88d63b94e3f24d1ee97b8bd8c69cbdd297725", "sources": [ "DBLP" ], "title": "Improved Cover Time Bounds for the Coalescing-Branching Random Walk on Graphs", "venue": "SPAA", "year": 2017 }, "74c9c9e827dd8b96b85190c3b5c813cbbda666d0": { "authors": [ { "ids": [ "1742725" ], "name": "\u00d6zg\u00fc Alay" }, { "ids": [ "2254522" ], "name": "Andra Lutu" }, { "ids": [ "31707265" ], "name": "Miguel Pe\u00f3n Quir\u00f3s" }, { "ids": [ "2210210" ], "name": "Vincenzo Mancuso" }, { "ids": [ "34512849" ], "name": "Thomas Hirsch" }, { "ids": [ "1853904" ], "name": "Kristian Evensen" }, { "ids": [ "1803494" ], "name": "Audun Fosselie Hansen" }, { "ids": [ "2010350" ], "name": "Stefan Alfredsson" }, { "ids": [ "40528628" ], "name": "Jonas Karlsson" }, { "ids": [ "1691426" ], "name": "Anna Brunstrom" }, { "ids": [ "3203460" ], "name": "Ali Safari Khatouni" }, { "ids": [ "1703450" ], "name": "Marco Mellia" }, { "ids": [ "1727779" ], "name": "Marco Ajmone Marsan" } ], "doi": "10.1145/3117811.3117812", "doiUrl": "https://doi.org/10.1145/3117811.3117812", "entities": [ "Exemplification", "Experiment", "Open platform", "Switch", "Testbed" ], "id": "74c9c9e827dd8b96b85190c3b5c813cbbda666d0", "inCitations": [ "d4ba7c62211b9763250d7d23003284465adebd37", "005cfb6668f48bf56fbeaceaf8bbf5eb9ca7e0ee", "44882fc3d6647d37c155cec9a47699dca087f6d1" ], "journalName": "", "journalPages": "70-78", "journalVolume": "", "outCitations": [ "5a694baba8b3a3abd69cbb689122c59590e049da", "ee14ef4456830a8d4fdadb9e9cb76160f96df240", "0892739439c0b4a92f78da0577e7b012996ae00e", "84939e1947f8e757a0d5b8356eb36e8092b9ec89", "9423c2b986ad6a7967d74855470f0339b5598c1d", "42948d0b9a8b5d9bc6d3c3d48ac18efc991706c1", "d1949d7c3f22d8a140ed94ce026071bf0a1aee57", "46d1792e3c8e45868b1399c3e8c7d2a6675fdd43", "0d998c8d61131a3854532b1168edce19c76ddf95", "9038061c1f4487b5be19cdbb46de9261a7d84e0d", "03a264cb2394da393689ec120209cd7583510ac0", "06901df85fa95c8dbc8c0e03b1dfe4a8a5c31f0d", "3b0c70696bd29e190e6cd7383e6f9012ce90d404", "1374d4342ac5e426d7b7755d6b7968af192c3705", "622d537a108d34013db2d379a391a3ccac2ab8fb" ], "paperAbstract": "Open experimentation with operational Mobile Broadband (MBB) networks in the wild is currently a fundamental requirement of the research community in its endeavor to address the need of innovative solutions for mobile communications. Even more, there is a strong need for objective data about stability and performance of MBB (e.g., 3G/4G) networks, and for tools that rigorously and scientifically assess their status. In this paper, we introduce the MONROE measurement platform: an open access and flexible hardware-based platform for measurements and custom experimentation on operational MBB networks. The MONROE platform enables accurate, realistic and meaningful assessment of the performance and reliability of 11 MBB networks in Europe. We report on our experience designing, implementing and testing the solution we propose for the platform. We detail the challenges we overcame while building and testing the MONROE testbed and argue our design and implementation choices accordingly. We describe and exemplify the capabilities of the platform and the wide variety of experiments that external users already perform using the system.", "pdfUrls": [ "http://porto.polito.it/2689436/2/Mobicom2017.pdf", "http://doi.acm.org/10.1145/3117811.3117812" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/74c9c9e827dd8b96b85190c3b5c813cbbda666d0", "sources": [ "DBLP" ], "title": "Experience: An Open Platform for Experimentation with Commercial Mobile Broadband Networks", "venue": "MobiCom", "year": 2017 }, "74d23cb8751120849bc908477b28c886c6a76252": { "authors": [ { "ids": [ "1756753" ], "name": "Sangwook Kim" }, { "ids": [ "2968918" ], "name": "Hwanju Kim" }, { "ids": [ "6064655" ], "name": "Joonwon Lee" }, { "ids": [ "1782453" ], "name": "Jinkyu Jeong" } ], "doi": "", "doiUrl": "", "entities": [ "Application checkpointing", "Data-intensive computing", "Database", "Holism", "Linux", "Linux", "MongoDB", "PostgreSQL", "Priority inversion", "Redis", "Throughput" ], "id": "74d23cb8751120849bc908477b28c886c6a76252", "inCitations": [ "a94bee6b9f3c9dc19465ac4c6c503c0c17ce846b", "3c320a4d53946087ba6f29f109c17bdf270efff9", "d1ab2cd6a008fa9fde2311f26dec32b9cfbf0aaf" ], "journalName": "", "journalPages": "345-358", "journalVolume": "", "outCitations": [ "012ab4527d6aee2387c243d304c624f3b9cf03f3", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "a30de973f68640b5032d07e2ee3ee80f03d292c5", "9c046601e01d693c1d36a074c00d226c563c76f2", "65fa329956f69119c4da6afa2ee2ed634ba9e464", "01c933428cc95b901cc19da06d7ac5dbcb31e4f6", "0340b5830450f7e94023af098d4e9af37a33fdcd" ], "paperAbstract": "In data-intensive applications, such as databases and keyvalue stores, reducing the request handling latency is important for providing better data services. In such applications, I/O-intensive background tasks, such as checkpointing, are the major culprit in worsening the latency due to the contention in shared I/O stack and storage. To minimize the contention, properly prioritizing I/Os is crucial but the effectiveness of existing approaches is limited for two reasons. First, statically deciding the priority of an I/O is insufficient since high-priority tasks can wait for low-priority I/Os due to I/O priority inversion. Second, multiple independent layers in modern storage stacks are not holistically considered by existing approacheswhich thereby fail to effectively prioritize I/Os throughout the I/O path. In this paper, we propose a request-centric I/O prioritization that dynamically detects and prioritizes I/Os delaying request handling at all layers in the I/O path. The proposed scheme is implemented on Linux and is evaluated with three applications, PostgreSQL, MongoDB, and Redis. The evaluation results show that our scheme achieves up to 53% better request throughput and 42\u00d7 better 99 percentile request latency (84 ms vs. 3581 ms), compared to the default configuration in Linux.", "pdfUrls": [ "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_kim_0.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/kim-sangwook", "http://www.usenix.org./system/files/conference/fast17/fast17-kim-sangwook.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_kim_0.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-kim-sangwook.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9629/7c9a920b1c6cbd9712d00368a947b5062a50.pdf", "s2Url": "https://semanticscholar.org/paper/74d23cb8751120849bc908477b28c886c6a76252", "sources": [ "DBLP" ], "title": "Enlightening the I/O Path: A Holistic Approach for Application Performance", "venue": "FAST", "year": 2017 }, "74ee421d480875d5fefac2c7b7d14d5d326c581c": { "authors": [ { "ids": [ "1717169" ], "name": "Artur Czumaj" }, { "ids": [ "5354501" ], "name": "Peter Davies" } ], "doi": "10.1145/3087801.3087825", "doiUrl": "https://doi.org/10.1145/3087801.3087825", "entities": [ "Algorithm", "Collision detection", "Discrete mathematics", "Graph (discrete mathematics)", "Leader election", "Polynomial", "Randomized algorithm", "Spontaneous order", "Subroutine", "With high probability" ], "id": "74ee421d480875d5fefac2c7b7d14d5d326c581c", "inCitations": [ "8fd0d90b60bd62030bc87676d003208342b7d51c", "de7c723d17e836a0af5634b4d0dbe10016c0ae31" ], "journalName": "", "journalPages": "3-12", "journalVolume": "", "outCitations": [ "0baf45f76d12e6e0c31866e85bb1c45b9469a21f", "1f5efaebc753983321854f9c086ec3030487ecc6", "3c000326500a63a105d6350c43facb3833b702ab", "0688b5456386ef9073c55182c1de3cc09f7f32ab", "66bdd6d45572ac64d5a5f2d0a7939b3432c1487d", "3d5a593771a4ee3e7843ba21a72a526b4ebb4667", "48e4dd20c5c8fa110c31a4c3409c92d91f2f3e5f", "28b8586be449d1aa91412fff0d16cd6a130d06c6", "29a402cd8922e08072c0404a3080a0447e9a2fb2", "2cb8eab690dfd468307ac248c663159499671586", "0a819ed9886d82772ccf719695c6b485082ec2a8", "47b5da30deeea0bb1ecac41a5891d72264a8729a", "e839d7c40fe80cecb00eb495210dd93a3fdf973e", "9e3d7e20c27c7df8625f32b0338f60dc0a64295b", "ac1cea9296ddba70d2151932a84eab4795820d1b", "304e26cd875160d59858972a51dff391cce63dc6", "0157dc0404cb6b31a1beef7e6855980220849654", "e576f9a0997c6389fda98efe18671eca1a6bd195", "2f77eff3f1bb26ea4aabe7ef6317dd15724e5feb" ], "paperAbstract": "We study two fundamental communication primitives: broadcasting and leader election in the classical model of multi-hop radio networks with unknown topology and without collision detection mechanisms. It has been known for almost 20 years that in undirected networks with n nodes and diameterD, randomized broadcasting requires\u03a9(D log n D +log n) rounds in expectation, assuming that uninformed nodes are not allowed to communicate (until they are informed). Only very recently, Haeupler and Wajc (PODC\u20192016) showed that this bound can be slightly improved for the model with spontaneous transmissions, providing an O(D log n log logn logD + log O(1) n)-time broadcasting algorithm. In this paper, we give a new and faster algorithm that completes broadcasting in O(D logn logD + log O(1) n) time, with high probability. This yields the first optimal O(D)-time broadcasting algorithm whenever D is polynomial in n. Furthermore, our approach can be applied to design a new leader election algorithm that matches the performance of our broadcasting algorithm. Previously, all fast randomized leader election algorithms have been using broadcasting as their subroutine and their complexity have been asymptotically strictly bigger than the complexity of broadcasting. In particular, the fastest previously known randomized leader election algorithm of Ghaffari and Haeupler (SODA\u20192013) requiresO(D log n D min{log logn, log n D }+ log n)-time with high probability. Our new algorithm requiresO(D logn logD+log O(1) n) time with high probability, and it achieves the optimalO(D) time wheneverD is polynomial in n. Research partially supported by the Centre for Discrete Mathematics and its Applications (DIMAP). Contact information: {A.Czumaj, P.W.Davies}@warwick.ac.uk. Phone: +44 24 7657 3796.", "pdfUrls": [ "https://arxiv.org/pdf/1703.01859v1.pdf", "http://doi.acm.org/10.1145/3087801.3087825", "http://arxiv.org/abs/1703.01859" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/74ee/421d480875d5fefac2c7b7d14d5d326c581c.pdf", "s2Url": "https://semanticscholar.org/paper/74ee421d480875d5fefac2c7b7d14d5d326c581c", "sources": [ "DBLP" ], "title": "Exploiting Spontaneous Transmissions for Broadcasting and Leader Election in Radio Networks", "venue": "PODC", "year": 2017 }, "7510c9408b5f90e46289ae9d885776c281c19411": { "authors": [ { "ids": [ "3079223" ], "name": "Hangyu Li" }, { "ids": [ "1735632" ], "name": "Hong Xu" }, { "ids": [ "2304090" ], "name": "Sarana Nutanong" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Web Services", "Bohr\u2013Einstein debates", "Data center", "OLAP cube", "Online analytical processing", "Power dividers and directional couplers", "Quantitative computed tomography" ], "id": "7510c9408b5f90e46289ae9d885776c281c19411", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "035109378c724a5db763f5b7ff3c94b7949ce5a2", "73f512de77dad7d0abe8076a856727021b9493d3", "b113895fdc1d62da873db332e5d63f11cfcff0eb", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "336e3bb6c20367881e11fd33037defbf36db024d", "0a68c6226e04180671a474c73fa0a2b4a154d129", "b1e3b19f9917012e6ca14afc5322c7d5d63e0653", "133eacaf0ad25b8364cb4510007d9363298e8adf", "1c799eca7983c62f7815ac5f41787b3e552567b6", "9771e382794af067f7360f1cac7b6d2a1e6dd1c4", "5eb14aca4a0a1a68960bc8d59801ed76a82d84ad" ], "paperAbstract": "We propose Bohr, a similarity aware geo-distributed data analytics system that minimizes query completion time. The key idea is to exploit similarity between data in different data centers (DCs), and transfer similar data from the bottleneck DC to other sites with more WAN bandwidth. Though these sites have more input data to process, these data are more similar and can be more efficiently aggregated by the combiner to reduce the intermediate data that needs to be shuffled across the WAN. Thus our similarity aware approach reduces the shuffle time and in turn the query completion time (QCT). We design and implement Bohr based on OLAP data cubes to perform efficient similarity checking among datasets in different sites. Evaluation across ten sites of AWS EC2 shows that Bohr decreases the QCT by 30% compared to state-of-the-art solutions.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/hotcloud17_slides_li.pdf", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-li.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/li" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/cfa0/c4a998738983562352eaa20b0adc0840684f.pdf", "s2Url": "https://semanticscholar.org/paper/7510c9408b5f90e46289ae9d885776c281c19411", "sources": [ "DBLP" ], "title": "Bohr: Similarity Aware Geo-distributed Data Analytics", "venue": "HotCloud", "year": 2017 }, "755345c6622c8a6972af3e56975e80fc515de108": { "authors": [ { "ids": [ "7938294" ], "name": "Huasong Shan" }, { "ids": [ "34660837" ], "name": "Qingyang Wang" }, { "ids": [ "1682055" ], "name": "Calton Pu" } ], "doi": "10.1145/3133956.3133968", "doiUrl": "https://doi.org/10.1145/3133956.3133968", "entities": [ "Benchmark (computing)", "Burst mode (computing)", "Denial-of-service attack", "Experiment", "Feedback", "Kalman filter", "Long tail", "Multitier architecture", "Network model", "Numerical analysis", "Queueing theory", "Resource contention", "Response time (technology)", "Simulation", "Video game bot", "Web application" ], "id": "755345c6622c8a6972af3e56975e80fc515de108", "inCitations": [], "journalName": "", "journalPages": "1725-1739", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "d2fe3f26505c106cb2f61c86ba0a2dc316b0868f", "619cbddb1f728000358be665596ab2a4e60d1106", "11c030c097edc9651814804d913205ce374da585", "39c80e494f44505c80a345c5228e6ec0e74673f6", "268aa8e7e58c180b6ef3ab1c4440fd5633f4ac79", "14ce949181d829a9874aa598646d9ca63fe1ade0", "1dc23940f830a1aede366ac9654b5d499c14bcef", "11ab2d9ddeee678d1080f5825c9c45ba9eef8f05", "0b109f74f54038b7b7bf433578392b5069823a39", "a7cb7ee58a980b8a6606f8330c899fcc7be4e452", "3606622037f30ac044c7e49d796352ae9c9f172c", "078c9d55f33adfe2215bdbc565f7bbb2cff74312", "6560af8ca02fd6187bddb9dadd38ae863993a1c2", "d6695b9c256fe381a39013c3284922ce1a983c1c", "1391f1edb5c79af532c4922d75f0d57f87cc01cf", "274ef9fe8b908071397b6d21096738739ffe0e81", "38b246c73c28b67054338fe4cc0cb19551d02537", "a6a1b2a62676ccaa714c53f20cf4c6bf629e0102", "aa5b74993af58febc566cd067b94425e495b1a17", "acfcf1049e74108942694d14c5b5570821cab3de", "4be9eb96cb53b411d9f6a41b445c385aea2c5059", "371d64572d5e2f6af298a42d84aca5807cd19946", "14df23e853b72e1fa103a213f526de6d4f464367", "2b769624424cac167271a882f9bb0ebc538922e2", "24349c15add8fab491d8f34c3e430c870bd89406", "b6d87c6fcaf01a8ad54fdb85725f02db30bde099", "fcd61f5508b15f7369258ae0d0db01eacf399f7b", "a77a8536ea318d2f2e470571017e53f39c74d364", "7bc74e0ef523d905083c87dfee78d8ea724597e3", "ad2de3163f647cdb400a39c8742e4c3ba19a1919", "8972083ff96e9cfda3b004d78e0f8fcbe5e0209c", "234d05c7522379a91f594b94c1cd0c32f3922413", "1adcf0e652877c55b70d68fd67a22f271fa61089", "0b5303775072512c6fc8457255f2b3041b18c14d" ], "paperAbstract": "As the extension of Distributed Denial-of-Service (DDoS) attacks to application layer in recent years, researchers pay much interest in these new variants due to a low-volume and intermittent pattern with a higher level of stealthiness, invaliding the state-of-the-art DDoS detection/defense mechanisms. We describe a new type of low-volume application layer DDoS attack--Tail Attacks on Web Applications. Such attack exploits a newly identified system vulnerability of n-tier web applications (millibottlenecks with sub-second duration and resource contention with strong dependencies among distributed nodes) with the goal of causing the long-tail latency problem of the target web application (e.g., 95th percentile response time > 1 second) and damaging the long-term business of the service provider, while all the system resources are far from saturation, making it difficult to trace the cause of performance degradation.\n We present a modified queueing network model to analyze the impact of our attacks in n-tier architecture systems, and numerically solve the optimal attack parameters. We adopt a feedback control-theoretic (e.g., Kalman filter) framework that allows attackers to fit the dynamics of background requests or system state by dynamically adjusting attack parameters. To evaluate the practicality of such attacks, we conduct extensive validation through not only analytical, numerical, and simulation results but also real cloud production setting experiments via a representative benchmark website equipped with state-of-the-art DDoS defense tools. We further proposed a solution to detect and defense the proposed attacks, involving three stages: fine-grained monitoring, identifying bursts, and blocking bots.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133968", "http://www.csc.lsu.edu/~qywang/papers/CCS17-Tail-Attack.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/755345c6622c8a6972af3e56975e80fc515de108", "sources": [ "DBLP" ], "title": "Tail Attacks on Web Applications", "venue": "CCS", "year": 2017 }, "7554f4f2a390d4989b5d34c3404a916594e3a86a": { "authors": [ { "ids": [ "1829047" ], "name": "Patrick Dubroy" }, { "ids": [ "38957454" ], "name": "Alessandro Warth" } ], "doi": "10.1145/3136014.3136022", "doiUrl": "https://doi.org/10.1145/3136014.3136022", "entities": [ "Experiment", "JavaScript", "Memoization", "Parsing", "Parsing expression grammar", "Time complexity", "Top-down and bottom-up design" ], "id": "7554f4f2a390d4989b5d34c3404a916594e3a86a", "inCitations": [], "journalName": "", "journalPages": "14-25", "journalVolume": "", "outCitations": [ "8e21d12dadcc957043e0e70332297965fdc6872c", "85683fc3b99f3b1c682a5614f607054b39e10d41", "8ded2ccf3baf49a9025330abb14d41f58141a746", "12cae02d8dcc8ba2a96b9726dc8585b2f748fd3b", "1abb835694f93afe6335aa7a5fd6effe075b99d5", "ce04d969db34332bea481bfcec4de2313821b756", "b24bc7db63aa058f7a135c071830c82066af1780", "83e77fb03c91fb9f57a664db705a475f467d70ff", "6d5a3e0b92a121bbc85558601d4c6704e0f6aa88", "7495f88b1d337f037fd4c917d408d356b69ea21b", "82dc4ab10024a5000376c35847ee7b0201524792", "82f8fbc66004ac438ac742c1ad6016d07d1ae037", "303b122551f37383a43acc1229f6e57dcde20f40", "d2d8b1e5fc32f54980a3bc8b67b95e8bff2bda0a", "4d22fab95c78b3c23fa9dff88fb82976edc213c2", "395c5eb8c6a519bfc75a9afdfdc1759e8f950cc2" ], "paperAbstract": "Packrat parsing is a popular technique for implementing top-down, unlimited-lookahead parsers that operate in guaranteed linear time. In this paper, we describe a method for turning a standard packrat parser into an incremental parser through a simple modification to its memoization strategy. By â\u0080\u009cincrementalâ\u0080\u009d, we mean that the parser can perform syntax analysis without completely reparsing the input after each edit operation. This makes packrat parsing suitable for interactive use in code editors and IDEs â\u0080\u0094 even with large inputs. Our experiments show that with our technique, an incremental packrat parser for JavaScript can outperform even a hand-optimized, non-incremental parser.", "pdfUrls": [ "https://ohmlang.github.io/pubs/sle2017/incremental-packrat-parsing.pdf", "http://doi.acm.org/10.1145/3136014.3136022" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7554f4f2a390d4989b5d34c3404a916594e3a86a", "sources": [ "DBLP" ], "title": "Incremental packrat parsing", "venue": "SLE", "year": 2017 }, "75649a9bea754256ba49afa2ed500782bb805954": { "authors": [ { "ids": [ "3204215" ], "name": "Ghufran Baig" }, { "ids": [ "3311387" ], "name": "Dan Alistarh" }, { "ids": [ "31978478" ], "name": "Thomas Karagiannis" }, { "ids": [ "2757057" ], "name": "Bozidar Radunovic" }, { "ids": [ "34803943" ], "name": "Matthew Balkwill" }, { "ids": [ "40219427" ], "name": "Lili Qiu" } ], "doi": "10.1145/3143361.3143367", "doiUrl": "https://doi.org/10.1145/3143361.3143367", "entities": [ "Compaq LTE", "Digital rights management", "Interference (communication)", "Network architecture", "Simulation", "TV tuner card", "Throughput", "White spaces (radio)" ], "id": "75649a9bea754256ba49afa2ed500782bb805954", "inCitations": [ "a41ee4560a38a320566ec80eaf7397cdbe7ea4ff" ], "journalName": "", "journalPages": "2-14", "journalVolume": "", "outCitations": [ "2b5dc07b5a74b378b6bd4f0187d6b6867a1a0b24", "227f9fc73fae69b2722635778362b1941c713368", "833566fd48df01236f7411f91f40c455feddd8be", "cf2dc87c62b097ac14ea3680fa323e1ae11fbb6d", "df0d45fb16e69c79593b2d7f5394686470774f24", "479c299cd40fe8f4848433d7a021a485338e21be", "17be93206193d6e6d0c88016bdaa8212807dc84b", "18336fdfca9e54b4a1a0dc03a0eaa66379778133", "ff0d44450e64565488f494dfb45d08c2b713101a", "6d69ae6a0a48878571652daf6a4acc7f24b1b8ab", "08bb5149cc215c0714492b407145bbc93006f44c", "03809a85789f7aeb39002fdcd7c3cdf33cc7370f", "5bc87181b6b4227fd3e60b39929e0030ee8665db", "6e88adca4dd428da4ba9637297408937478f5d3e" ], "paperAbstract": "In this paper we study network architecture for unlicensed cellular networking for outdoor coverage in TV white spaces. The main technology proposed for TV white spaces is 802.11af, a Wi-Fi variant adapted for TV frequencies. However, 802.11af is originally designed for improved indoor propagation. We show that long links, typical for outdoor use, exacerbate known Wi-Fi issues, such as hidden and exposed terminal, and significantly reduce its efficiency.\n Instead, we propose CellFi, an alternative architecture based on LTE. LTE is designed for long-range coverage and throughput efficiency, but it is also designed to operate in tightly controlled and centrally managed networks. CellFi overcomes these problems by designing an LTE-compatible spectrum database component, mandatory for TV white space networking, and introducing an interference management component for distributed coordination. CellFi interference management is compatible with existing LTE mechanisms, requires no explicit communication between base stations, and is more efficient than CSMA for long links.\n We evaluate our design through extensive real world evaluation on off-the-shelf LTE equipment and simulations. We show that, compared to 802.11af, it increases coverage by 40% and reduces median flow completion times by 2.3x.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143367" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/75649a9bea754256ba49afa2ed500782bb805954", "sources": [ "DBLP" ], "title": "Towards unlicensed cellular networks in TV white spaces", "venue": "CoNEXT", "year": 2017 }, "758634417e6b2e20f30cf43691935ceb0f9a2633": { "authors": [ { "ids": [ "32045924" ], "name": "Martin Kucera" }, { "ids": [ "2164812" ], "name": "Petar Tsankov" }, { "ids": [ "2959734" ], "name": "Timon Gehr" }, { "ids": [ "14066540" ], "name": "Marco Guarnieri" }, { "ids": [ "1736447" ], "name": "Martin T. Vechev" } ], "doi": "10.1145/3133956.3134079", "doiUrl": "https://doi.org/10.1145/3133956.3134079", "entities": [ "Application domain", "Greedy algorithm", "Privacy", "Time complexity" ], "id": "758634417e6b2e20f30cf43691935ceb0f9a2633", "inCitations": [ "35c4aa1791cc396141a9d503556ebd37ac247a16" ], "journalName": "", "journalPages": "391-408", "journalVolume": "", "outCitations": [ "47b4ed69f4788130bd192291f30da65e38091ce1", "1b22be246cae34d946a2cfe68dd50ec103f3e257", "552626d9ab480d8b40052d35e16d516ffb1772ff", "946404319f1898c0ec0d4725748392972a3c164c", "025d9bd934d677432a040453d465f04a7cc01fca", "1dc0e966375242346f5bd544ff498a7b855737aa", "575e151ff358f8dadd92b29174af0a0d3be6f9b1", "9c188b3291a7e83d667764be3377a99e15b4d988", "b0e6bf7a7f508e4e1fcc84a27722f306c9449008", "0bec492d110c0746cb3e4dbdf411007ec0bc8772", "71c2f07fb65c4350674f01fccbdfba57cefdc295", "c4b2717d17db19e55bb31219e56ef176eb275d0e", "5fbf739032dd548c1ff189e7333f05e215906a1b", "11baf128445828df62e8d5606533c476feb1da33", "96c43f6f174ff94245ca049bc94031301678aab0", "ca919e747bcfc3f50306ad711bd71207a790ade6", "0f890546c00ee8b35c96bc712a2ecfe574af3754", "9fb35946a52007b708851f42b10b429e0611cb4d", "fd2711cfe890675e8d885df88f3f76b5be5b39a6", "e50486863a0e3debd34bd0fffa7335191e8c3160", "233dea485e13b067d70fd45512266092e65d2dd5", "0b89c0c8096b60d939da90da5fcb989447fdfbd5", "3a6db9085a191b3f73c1ef0eeea977297a03f8ea", "66cc5efd9be8dae922a6a7946dee88154592a25a", "71e091c35026944318457182cbe55672a98ef6a2", "83e3e3c781f54d5876fd6ac0884a2597dee3c31e", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "9263789ba999bb726c9c7fdf0bbc77844ee03272", "0f99d17278536e003cc0770a1668afbcc23726dc", "15ca4ce99f7bf74ccb53877d47726853288c5160", "b955d9bc420389523eb1682dc947b94e998635ce", "b532099ff8b67049f292cd62700dca37fc2be623", "7df12c498fecedac4ab6034d3a8032a6d1366ca6", "4afc353a68ce5cc9e17febaa3199da43ba549840", "21171cafc8977f727e6bc4b63d6ad381d2c28c28", "30573448c8859053a267f5a7b933b628d6ed3729", "149fd1141ad8d141824b69146a89ea65d668c4f4", "609d55884a2b3f16c309a7102541a645fe736004", "c3eba5fcba83f9637e83c1ad8be15944f22b15c1", "803aef81c1d6e82a536f27133b5dad421a0051ce", "17fac85921a6538161b30665f55991f7c7e0f940", "1a40ddec6d64cc7eda66760cf8bbd225565c5db8", "8e943ff3efefa3d8377c63184dec6c163b5dab0c", "3dfce4601c3f413605399267b3314b90dc4b3362", "53486f288b34857846f5be392415ba5652d67210", "607dd75812ecf8e81b79663d05bd21824f4efdf7", "6ef12f4e6f120ba7fc8a83a610e3f362e0a133a7", "5f2b00ed42130fd97c79d3c3f041199de8efcdb9", "66c9a41a13f83bc8bf2a28177c4f1864a1a4dd57", "4455594931964097198bc324b561b27611c945f5" ], "paperAbstract": "Existing probabilistic privacy enforcement approaches permit the execution of a program that processes sensitive data only if the information it leaks is within the bounds specified by a given policy. Thus, to extract any information, users must manually design a program that satisfies the policy. In this work, we present a novel synthesis approach that automatically transforms a program into one that complies with a given policy. Our approach consists of two ingredients. First, we phrase the problem of determining the amount of leaked information as Bayesian inference, which enables us to leverage existing probabilistic programming engines. Second, we present two synthesis procedures that add uncertainty to the program's outputs as a way of reducing the amount of leaked information: an optimal one based on SMT solving and a greedy one with quadratic running time. We implemented and evaluated our approach on 10 representative programs from multiple application domains. We show that our system can successfully synthesize a permissive enforcement mechanism for all examples.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134079", "http://www.srl.inf.ethz.ch/papers/spire-ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/758634417e6b2e20f30cf43691935ceb0f9a2633", "sources": [ "DBLP" ], "title": "Synthesis of Probabilistic Privacy Enforcement", "venue": "CCS", "year": 2017 }, "758a9afd3b717ee47b566bb37ffe3254195be28d": { "authors": [ { "ids": [ "40024834" ], "name": "Lei Yang" }, { "ids": [ "1780398" ], "name": "Jiannong Cao" }, { "ids": [ "2960930" ], "name": "Zhenyu Wang" }, { "ids": [ "1740306" ], "name": "Weigang Wu" } ], "doi": "10.1109/ICPP.2017.39", "doiUrl": "https://doi.org/10.1109/ICPP.2017.39", "entities": [ "Algorithm", "Benchmark (computing)", "Cloud computing", "Computation", "Computer", "Heuristic", "Knapsack problem", "Mobile device", "Multi-user", "Partition problem", "Simulation" ], "id": "758a9afd3b717ee47b566bb37ffe3254195be28d", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "302-311", "journalVolume": "", "outCitations": [], "paperAbstract": "Mobile edge cloud has been increasingly concerned by researchers due to its closer distance to mobile users than the traditional cloud on Internet. Offloading computations from mobile devices to the nearby edge cloud is an effective technique to accelerate the applications and/or save energy on the mobile devices. However, the mobile edge cloud usually has limited computation resources and constrained access bandwidth shared by multiple users in its proximity. Thus, allocation of resources and bandwidth among the users is significant to the overall application performance. In this paper, we study network aware multi-user computation partitioning problem in mobile edge clouds, i.e., to decide for each user which parts of the application should be offload onto the edge cloud, and which others should be executed locally, and meanwhile to allocate the access bandwidth among the users, such that the average application performance of the users is maximized.This problem is novel in that we consider the competition among users for both computing resources and bandwidth, and jointly optimizes the partitioning decisions with the allocation of resources and bandwidths among users, while most existing works either focus on the single user computation partitioning or study the multiple user computation partitioning without regard of the constrained network bandwidth. We first formulate the problem, and then transform it into the classic Multi-class Multi-dimensional Knapsack Problem and develop an effective algorithm, namely Performance Function Matrix based Heuristic (PFM-H), to solve it. Comprehensive simulations show that our proposed algorithm outperforms the benchmark algorithms significantly in the average application performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.39" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/758a9afd3b717ee47b566bb37ffe3254195be28d", "sources": [ "DBLP" ], "title": "Network Aware Multi-User Computation Partitioning in Mobile Edge Clouds", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "75d60809b9ac769a4a7e2a9907b3bc028ac58935": { "authors": [ { "ids": [ "40040203" ], "name": "Abutalib Aghayev" }, { "ids": [ "2395925" ], "name": "Theodore Y. Ts'o" }, { "ids": [ "30367882" ], "name": "Garth A. Gibson" }, { "ids": [ "2083064" ], "name": "Peter Desnoyers" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Disk storage", "File server", "Linux", "Linux", "Magnetic storage", "Plug compatible", "Server (computing)", "Shingled magnetic recording", "Throughput" ], "id": "75d60809b9ac769a4a7e2a9907b3bc028ac58935", "inCitations": [ "ec3924af8c1cb428b4f1309b9a9ca3c86abd6631", "2fe51b5c34484b5fb8f0ec54483750ffc842fd4a", "c2b8a1485ad43085b80ccf8d29f029edcba08529", "40f196e21a289394c4354961116587b8accba45e", "0ae03e097cd936f564a60017b864beeb12635b09", "a377d5f506a411c5d95361188c0b7f500fc2ca09", "18e93539fe6163a0b56f3427fc562733f89449a6", "537d37be13687758d01e35fc6a62be118ec48ea1", "8e4cdaa006bce928ed7a6d37b9bfbfdffe2a6367" ], "journalName": "", "journalPages": "105-120", "journalVolume": "", "outCitations": [ "4468cbc8a9ad13ebeaa210424e842f158415ab07", "11fe43dfcf43802595c2076c7641aff6f025e1ec", "b30cdebe589719c9780dbb7034fee48c109a1716", "27c1898a013f0df5b5d1fb8a7edde5ce435c9d46", "b84f8f1e8da2494d1bd5f3a065228d097d3cfe1f", "2da760f90c3d2bf6598becdde9063093f488548c", "b81b73725e1e037ffa1a935e54f67ea5e7703b86", "1b0eace707f6b86e94793d1a7c83b7d065e604fa", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "8dee6c0a8438a995b1d2452b84c7544be5f00578", "9d5e9f98f85629d9dae20d181ff2c9fcdcdb5520", "088e3e939ad234b6fdd0e321290fb26937dc2553", "ba356329a7c6672eca15815ed622dac2c71b4513", "5c06564087db9e53a72ef1eb5865696b0dddd8ca", "081fa9cfe750d58db2cd1f5e53e728c5de636910", "0ad098eca1a5cc262da3d8f99229efbaee88b02e", "7a08e5fda1d9d1e0a8cc9bc4e62dfb74471fc32c", "0420266f84cc95d6b7a8100e601f67d1118d4965", "2018f3fc13cd38122abdf37bf939b5011cd2e3c9", "aeef17ecae6f2469f68f9f3b709f640949a1f438", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "158ebe313a72857c5534a313f3ec0e413593b732", "f1780c4ebcba175f8985058dcad9fe407f526587", "bc0c53752004832ff9e6e0f56539fb63ae1df154", "0bf50c9aff7d5182504dd18b7cc0f6041b5e520b", "6d7569ecf4455d3f736dffdc3770213b59a07b44", "2167c708155dac4bb63d29a4bcc960dd320d8e2a", "12a0046a1197ae63c3d616c74e367dc583cef196", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "4acdb61098053f38d5500a9ef974d24828696b9d", "501f491dd60ea26bcb8152bfd3f9ac2456e69da8", "006cb2c8713bff9e97a8c68c65e66b98379731f7" ], "paperAbstract": "Drive-Managed SMR (Shingled Magnetic Recording) disks offer a plug-compatible higher-capacity replacement for conventional disks. For non-sequential workloads, these disks show bimodal behavior: After a short period of high throughput they enter a continuous period of low throughput. We introduce ext4-lazy1, a small change to the Linux ext4 file system that significantly improves the throughput in both modes. We present benchmarks on four different drive-managed SMR disks from two vendors, showing that ext4-lazy achieves 1.7-5.4\u00d7 improvement over ext4 on a metadata-light file server benchmark. On metadata-heavy benchmarks it achieves 2-13\u00d7 improvement over ext4 on drive-managed SMR disks as well as on conventional disks.", "pdfUrls": [ "http://www.pdl.cmu.edu/PDL-FTP/Storage/ext4-lazy.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_aghayev_0.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_aghayev_0.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/aghayev", "https://www.usenix.org/system/files/conference/fast17/fast17-aghayev.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-aghayev.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c0d0/bcbd802e8957b9220a92e78478f4f8cc670f.pdf", "s2Url": "https://semanticscholar.org/paper/75d60809b9ac769a4a7e2a9907b3bc028ac58935", "sources": [ "DBLP" ], "title": "Evolving Ext4 for Shingled Disks", "venue": "FAST", "year": 2017 }, "761bb7b9f388304ac3c635665d7aca41f244f806": { "authors": [ { "ids": [ "3142657" ], "name": "Rajat Kateja" }, { "ids": [ "1783539" ], "name": "Anirudh Badam" }, { "ids": [ "1721840" ], "name": "Sriram Govindan" }, { "ids": [ "39807362" ], "name": "Bikash Sharma" }, { "ids": [ "17031505" ], "name": "Greg Ganger" } ], "doi": "10.1145/3079856.3080236", "doiUrl": "https://doi.org/10.1145/3079856.3080236", "entities": [ "Backup", "Computer cooling", "Data center", "Data-intensive computing", "Decoupling (electronics)", "Digital footprint", "Durability (database systems)", "Dynamic random-access memory", "Non-volatile memory", "Provisioning", "Solid-state drive", "Terabyte", "Uninterruptible power supply" ], "id": "761bb7b9f388304ac3c635665d7aca41f244f806", "inCitations": [ "264a5e7a5230b228b86f63a75546738a66454c56" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "613-626", "journalVolume": "", "outCitations": [ "290211989fc5392703365b6e2d3d95082277f640", "a43f2375fc8ac9fadbab91d5c10e61ef88a0525d", "27cc332571aa00e892d7e094a3ee7b9e44b12c75", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "242eb56f480c631e362e379a7f37ef76f499d05f", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "2832dd8e2707d7817620075fe340818179d5ff6d", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "193e98a45b0e4eb7f85595d3c8b572b59242ed68", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "5d01bdfd8e5651df4124b6852ffd05364cd7526e", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "2f079a108c75637f2de4f9af2de1ee5d3a3cb0de", "2db3fcbc192ccc04682a5c528f86190b30e1d11e", "514a5c15e8cf3f681febecad954a4508d9189c99", "24724ad8962a9e04eb496fddaefe9708f6960601", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "9aa0d7253574e50fe3a190ccd924433f048997dd", "05a1357946de5eca42a477b7b268db4944219a2e", "1220e4a011c46804d4369b5580dc7fb6e387af54", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "234e356dec0b8fbecfe2b3ab3297fd8a6786daf1", "417ab9b8b003982222017ef585e19680366609f3", "205cf007cf77bbf81e55b74635017087585f7b7c", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "94783d113951822195d4ba44599a8fcbdef9d4bf", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "0256aa3aba5908e46e5209255dbdb13069ac36d5", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "793f5e737284925a176f8ec82b3bb0d2178bb330", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "0277480cced7d09474dda4dc4f7b9381bd4c895a", "0541d5338adc48276b3b8cd3a141d799e2d40150", "3d1beb48ba561888fdfc458a6b9c396c5175aa86", "4dfdd7cd8abbd68675ea19c5902e5a7d14709799", "9c48179c07963a9fad69a359362c0aee87f9fe18", "088e3e939ad234b6fdd0e321290fb26937dc2553" ], "paperAbstract": "Non-Volatile Memories (NVMs) can significantly improve the performance of data-intensive applications. A popular form of NVM is Battery-backed DRAM, which is available and in use today with DRAMs latency and without the endurance problems of emerging NVM technologies. Modern servers can be provisioned with up-to 4 TB of DRAM, and provisioning battery backup to write out such large memories is hard because of the large battery sizes and the added hardware and cooling costs. We present Viyojit, a system that exploits the skew in write working sets of applications to provision substantially smaller batteries while still ensuring durability for the entire DRAM capacity. Viyojit achieves this by bounding the number of dirty pages in DRAM based on the provisioned battery capacity and proactively writing out infrequently written pages to an SSD. Even for write-heavy workloads with less skew than we observe in analysis of real data center traces, Viyojit reduces the required battery capacity to 11% of the original size, with a performance overhead of 7-25%. Thus, Viyojit frees battery-backed DRAM from stunted growth of battery capacities and enables servers with terabytes of battery-backed DRAM.", "pdfUrls": [ "http://www.pdl.cmu.edu/PDL-FTP/NVM/ISCA17_Viyojit.pdf", "http://nvmw.ucsd.edu/nvmw18-program/unzip/current/nvmw2018-final52.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/ISCA17_Viyojit.pdf", "http://doi.acm.org/10.1145/3079856.3080236" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/761bb7b9f388304ac3c635665d7aca41f244f806", "sources": [ "DBLP" ], "title": "Viyojit: Decoupling battery and DRAM capacities for battery-backed DRAM", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "7625915bf372217d21fd2e24efdc6d6bb806fa80": { "authors": [ { "ids": [ "2661238" ], "name": "Massimo Marchiori" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.25", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.25", "entities": [ "Smart city" ], "id": "7625915bf372217d21fd2e24efdc6d6bb806fa80", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "192-199", "journalVolume": "", "outCitations": [ "c5fce332b6c701041273b57d7e27268261df8089", "8136368261536e4d6b468bc737147ad51872b97f", "8818a7ffad39306c8e1b48028d1e0eea6cba8681", "f1031d8ca5ec5c54d39b9379cb5fdbae403e21de", "09bbcaf19beabd7dec1f5b547d8ab227a01ba71d", "e6c4448c3fd9c385a0b9f30a1355dd7ae9922b3e", "e17a79c14f2adcdbb71b698b20332e7aca6becfa", "abfd30e1025b1e43898d0b08b1f1c3305407fbb1", "0ab63c23d7d850dfb96b1d0ba6f3784a844d23e9", "91f5d4916ea02a3a4bf7c4437bc574a2dc0268b0", "09ba5efa9a96406a87d07f7086c2b22a44eedd18", "6cb074f980943519aa29184094853d5b6ffea657", "5ff3d1e22a1a2a617c95aa07966c0deca0047f6f", "f84f0364ecc95369e50840ef81c6a977f67c4dd6" ], "paperAbstract": "Smart cities are a natural evolution of the recent wave of technology, allowing both to offer better services to citizens and also to save costs. Nevertheless the adoption of smart city solutions have to face major problems, not due to technology but to society: economy and inertia. Even if the adoption of smart city technologies can bring to savings, new solutions need to face the general inertia of public administrations, that would have to face new expenses, without being familiar with these new kinds of options, therefore opting out of the perceived risk. For this reason we feel it is helpful to also consider a variant of smart city technologies, dubbed “Smart Cheap City” (SCC for short). A Smart Cheap City solution does not necessarily aim to maximize benefits to citizen, or to produce the biggest savings, but instead has a primary focus: minimizing the cost of implementation. This leads to lower the entry barrier, facilitating early adoption by cities, and so acting as a gentle introduction to the benefits of smart cities, fostering their wider and more effective adoption. We report on a project based on the SCC paradigm, tackling the waste management of a city, and show how smart cheap city solutions can be effectively used to provide both immediate benefit, and to overcome the barriers that have so far slowed down their adoption.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7625915bf372217d21fd2e24efdc6d6bb806fa80", "sources": [ "DBLP" ], "title": "The Smart Cheap City: Efficient Waste Management on a Budget", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "7626ba5ea8754f99699509784251e49d1e700d86": { "authors": [ { "ids": [ "2118466" ], "name": "Mansour Shafaei" }, { "ids": [ "2083064" ], "name": "Peter Desnoyers" } ], "doi": "", "doiUrl": "", "entities": [ "In-place algorithm", "Locality of reference", "Magnetic storage", "Shingled magnetic recording", "Sputter cleaning", "Synthetic data", "W-shingling" ], "id": "7626ba5ea8754f99699509784251e49d1e700d86", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "f8aa33900f552f8112d6186d78bc845d2dfc0007", "8063715f1f0fe581a58f7659fde9d5a3d60b9147", "3a6a2a2a5904469af04d266995dc84b0eec89ee2", "081fa9cfe750d58db2cd1f5e53e728c5de636910", "31ceeced5d23193c369b98170c45e66bae6ff77d", "006cb2c8713bff9e97a8c68c65e66b98379731f7", "0a60f56f159c47baab19464528cba224e6eaac7a", "0bf50c9aff7d5182504dd18b7cc0f6041b5e520b", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "3fc68bf55557ac4b377bd97bed8b28f3e201d775", "8dee6c0a8438a995b1d2452b84c7544be5f00578", "2018f3fc13cd38122abdf37bf939b5011cd2e3c9" ], "paperAbstract": "Virtual Guard (Vguard) is a track-based static mapping translation layer for shingled magnetic recording (SMR) drives. Data is written in-place by caching data from the next track in the shingling direction, allowing direct overwrite of sectors in the target track. This enables Vguard to take advantage of track-level locality, nearly eliminating cleaning for many workloads. We compare performance of Vguard to an available drive-managed SMR drive analyzed and modeled in previous research. Vguard reduces the 99.9% latency by 15\u00d7 for real-world traces, and maximum latency by 32% for synthetic random write workloads.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/shafaei", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-shafaei.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7626/ba5ea8754f99699509784251e49d1e700d86.pdf", "s2Url": "https://semanticscholar.org/paper/7626ba5ea8754f99699509784251e49d1e700d86", "sources": [ "DBLP" ], "title": "Virtual Guard: A Track-Based Translation Layer for Shingled Disks", "venue": "HotStorage", "year": 2017 }, "762a3f63c14b597019b2b42399416f4e71be74aa": { "authors": [ { "ids": [ "1688592" ], "name": "Hui Zhang" }, { "ids": [ "1754659" ], "name": "Jeffrey K. Hollingsworth" } ], "doi": "10.1109/IPDPS.2017.37", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.37", "entities": [ "Address space", "Benchmark (computing)", "Chapel", "Computer performance", "Correctness (computer science)", "Local variable", "Locality of reference", "Parallel computing", "Partitioned global address space", "Program optimization" ], "id": "762a3f63c14b597019b2b42399416f4e71be74aa", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "377-386", "journalVolume": "", "outCitations": [ "398aaf00253e2c29e6238dd0499aa3a75c76914c", "2a9968defc94dff7f0f40beaf54941dca1a7d342", "0062b2153532a78b3aac817806b8be7a760414f6", "3bf4dcdbd8787a7ded95af0fc22ba87f26532c2e", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "b1f907cd8c25d0acab070a36a9426519a9e669df", "5bc26f5871f4b83006e5262810848aafdad00ef5", "178599e5e976e82528e71cb2e1b812d588fa0e44", "07780846047d6aa2781868b4cdfe13ae8eda21af", "6cef271ffa332f9570fc1d21fcc99e7b35c6a825", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "c5c9f19531d607699f7f8f1f3c8b0105ad023996", "1a8d1bb7364022fbda2e5da92f034f729198ac01", "5f1c5e9481893362449da2270f5cf751875e406a" ], "paperAbstract": "Chapel is an emerging PGAS (Partitioned Global Address Space) language whose design goal is to make parallel programming more productive and generally accessible. To date, the implementation effort has focused primarily on correctness over performance. We present a performance measurement technique for Chapel and the idea is also applicable to other PGAS models. The unique feature of our tool is that it associates the performance statistics not to the code regions (functions), but to the variables (including the heap allocated, static, and local variables) in the source code. Unlike code-centric methods, this data-centric analysis capability exposes new optimization opportunities that are useful in resolving data locality problems. This paper introduces our idea and implementations of the approach with three benchmarks. We also include a case study optimizing benchmarks based on the information from our tool. The optimized versions improved the performance by a factor of 1.4x for LULESH, 2.3x for MiniMD, and 2.1x for CLOMP with simple modifications to the source code.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/762a3f63c14b597019b2b42399416f4e71be74aa", "sources": [ "DBLP" ], "title": "Data Centric Performance Measurement Techniques for Chapel Programs", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "764439c3ba54e88cb440326267d9e718614681e0": { "authors": [ { "ids": [ "40238765" ], "name": "Joe DeBlasio" }, { "ids": [ "39967853" ], "name": "Saikat Guha" }, { "ids": [ "1739245" ], "name": "Geoffrey M. Voelker" }, { "ids": [ "2199298" ], "name": "Alex C. Snoeren" } ], "doi": "10.1145/3131365.3131393", "doiUrl": "https://doi.org/10.1145/3131365.3131393", "entities": [ "Ecosystem", "Online advertising", "Search advertising", "Web search engine" ], "id": "764439c3ba54e88cb440326267d9e718614681e0", "inCitations": [ "9b5bc50b465192247d89c0ca3c540938ed0fb5d8" ], "journalName": "", "journalPages": "157-170", "journalVolume": "", "outCitations": [ "d6275e955c24fe5f1d8051cfe6ebf87033b6a595", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "2e832347a1cdb1eeceda71a2db27cef6a182cfda", "523b3c576021827efbc54be8794953a216e77225", "363bd45d8d847dded5b0459b19ce3a22d6dbb189", "4826d50f9a3cdff3923e2785c00b2c72da0e9fa2", "38bea412ab3bc9cd5f73d6c2209252f6a1f14aad", "283b0864062319c567d9f77fbdff66b66aa6e293", "9b3b8e6b6a737ddc7b6e665473cec394d44b26af", "0b85f0a61d045cb6b1bb8287f9555e3f3c8bd719", "862d2b3316f699e746931732f768b8a5bbfedbf7", "b2fe06f3bfd3ad73e256872368bf4b02c6cdee31", "cc5baac045812b8530c1c4318a0927b75445ad0c", "22e949f45669eac8c80aa98be3e6b6177b41fbc4", "56459adf8db0849c4f49cfec3fb8b8f55a59cc0b", "fc41d7ad66bea7e801c0478296616c35402aa2b4", "1fe53d5e6bbbc0ca3408a545a17c1fd65623b8de", "5efcb873c4fdfb91bd501c5de2a97cc06a4a4e30", "05fa252ef7e5f659011f4ca0b99b880c4bb1f9e0", "0bbdf88081b98daa8dbf71832ec299924a360aac", "0c63babe1df598d041e26c072195fe8a6f1ad389", "75e0a740fb375524a9d0fc40a79f2c2442e9aaf1", "108f8bad22a541aa53f0c6aff8a4c061f3d4d465", "8ee7be6ec7fe9588c1b7389e7864208230cd7d32", "0bec9e9fee4bee287ed2ea1ef9059b573fb0b711", "318acfafaa66c5b1f1fe93caaa5c435fb637db9d", "635ba31fdabf376078fefe998f7ac2a3e4dcafcd" ], "paperAbstract": "Most search engines generate significant revenue through search advertising, wherein advertisements are served alongside traditional search results. These advertisements are attractive to advertisers because ads can be targeted and prominently presented to users at the exact moment that the user is searching for relevant topics.\n Deceptive advertising is harmful to all legitimate actors in the search ad ecosystem: Users are less likely to find what they are looking for and may lose trust in ads or the search engine, advertisers lose potential revenue and face unfair competition from advertisers who are not playing by the rules, and the search engine's ecosystem suffers when both users and advertisers are unhappy.\n This paper explores search advertiser fraud on Microsoft's Bing search engine platform. We characterize three areas: the scale of search advertiser fraud, the targeting and bidding behavior of fraudulent advertisers, and how fraudulent advertisers impact other advertisers in the ecosystem.", "pdfUrls": [ "https://conferences.sigcomm.org/imc/2017/papers/imc17-final177.pdf", "http://cseweb.ucsd.edu/~snoeren/papers/bingads-imc17.pdf", "http://doi.acm.org/10.1145/3131365.3131393", "https://conferences.sigcomm.org/imc/2017/slides/177-DeBlasio-SearchAdFraud.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/764439c3ba54e88cb440326267d9e718614681e0", "sources": [ "DBLP" ], "title": "Exploring the dynamics of search advertiser fraud", "venue": "IMC", "year": 2017 }, "7667d3b352358134d46554400ebb0297102a9de2": { "authors": [ { "ids": [ "32130106" ], "name": "Yasuhiro Fujiwara" }, { "ids": [ "3049487" ], "name": "Naoki Marumo" }, { "ids": [ "27257992" ], "name": "Mathieu Blondel" }, { "ids": [ "2775065" ], "name": "Koh Takeuchi" }, { "ids": [ "2144822" ], "name": "Hideaki Kim" }, { "ids": [ "2664600" ], "name": "Tomoharu Iwata" }, { "ids": [ "1735221" ], "name": "Naonori Ueda" } ], "doi": "10.1145/3035918.3064021", "doiUrl": "https://doi.org/10.1145/3035918.3064021", "entities": [ "Computation", "Cubic function", "Data point", "Dimensionality reduction", "Inverse iteration", "Kernel (operating system)", "LU decomposition", "Lagrange multiplier", "Nonlinear dimensionality reduction", "Nonlinear system", "Power iteration", "Ripple" ], "id": "7667d3b352358134d46554400ebb0297102a9de2", "inCitations": [], "journalName": "", "journalPages": "1479-1492", "journalVolume": "", "outCitations": [ "06a934d2f0102d1a011e3f0e2d13bfe3a4cd9559", "244e27bf8c5a5277a733c5271fdeb306f98115d5", "1c718faf70f8285e7093c23d78a740a918a9a9fd", "702049aa4b4255a7c40acde1cbc15787f38140f5", "4f62d84f1171ec81519ecbe573fc617e174dd753", "79b59321cfe90c3835a7b74d7e15de862614be5e", "2059be0aa4a57d00d204c9ccdf4deeed2c984e07", "a039592b7a86ce1d928aa12fa3d475f35c2a6e77", "195e2a1e5e6ad0c3352730527723efa164e5dc34", "ec975f959cc47a78dffc22d0219f3bf0b95de80e", "ad612f9c4b3219120ad0c715614e06fd1caa6eec", "1dbc30663ff1284534baf992ba82774817b2f36d", "7419cb6342f5dba08790465dded2e9a79ef79ff5", "708c6a0020f2c56ab225a748c3f90eb3faddfecc", "27909aa46f74c23eae41912e27b0c9dc0bfacb0f", "66dbec672b044eb917efb0c0d3238290688fd47f", "be37eaa99bee96dec9c20964b79c37a07806843c", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "5a87d1c6cf8fc2737f1f8dedfb31ab364872cfe0", "128fd1c7a051eb456480096b78d52e5f89533dc5", "18bdc78571825b2cf66f18a0f52f3d218d02fb16", "bf4f79fd31493648d80d0a4a8da5edeeaba74055", "5a2d20868eddd4312c19c0144f2d651dc9141163", "3339f78b2d27233d22fd806cfcdc6800a2a2d945", "0ca73582f2c773ecbeeea6932bfc26494e965058", "0c4867f11c9758014d591381d8b397a1d38b04a7", "2d32885d9ba6a67598c9161a5ffbe5544e098d15", "2ef606258486d6c32fd0b9ca54244273c21331b9", "bf229459f3b3e617dafb5315262283eac2735634", "456bdfe889ea06bc1a1c8891e65f57b5549b6e92", "eede0832ebef207c2c55415294f44ce2fdc0b905", "408cd9103f2d7cdafce2f6b984035b2be0ed9b7d", "3b8a4cc6bb32b50b29943ceb7248f318e589cd79", "c11169fbf077d8e762a476d3b38b14fc84edd8ab", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "31181e73befea410e25de462eccd0e74ba8fea0b", "0479b7e8c433e3f18a2b6c5dedd328f0229c1566", "122eddb0391a84eb40bca0370975229919e2e10b" ], "paperAbstract": "Locally Linear Embedding (LLE) is a popular approach to dimensionality reduction as it can effectively represent nonlinear structures of high-dimensional data. For dimensionality reduction, it computes a nearest neighbor graph from a given dataset where edge weights are obtained by applying the Lagrange multiplier method, and it then computes eigenvectors of the LLE kernel where the edge weights are used to obtain the kernel. Although LLE is used in many applications, its computation cost is significantly high. This is because, in obtaining edge weights, its computation cost is cubic in the number of edges to each data point. In addition, the computation cost in obtaining the eigenvectors of the LLE kernel is cubic in the number of data points. Our approach, Ripple, is based on two ideas: (1) it incrementally updates the edge weights by exploiting the Woodbury formula and (2) it efficiently computes eigenvectors of the LLE kernel by exploiting the LU decomposition-based inverse power method. Experiments show that Ripple is significantly faster than the original approach of LLE by guaranteeing the same results of dimensionality reduction.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064021", "https://bigdata.nii.ac.jp/eratokansyasai4/wp-content/uploads/2017/09/a3e433715ef245279f3b644d59265792.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7667d3b352358134d46554400ebb0297102a9de2", "sources": [ "DBLP" ], "title": "Scaling Locally Linear Embedding", "venue": "SIGMOD Conference", "year": 2017 }, "769d69b52e5e45cabf0ea529da435cfa0ea2e447": { "authors": [ { "ids": [ "7530678" ], "name": "Chongsheng Yu" }, { "ids": [ "1685323" ], "name": "Xin Li" }, { "ids": [ "39350835" ], "name": "Lei Ju" }, { "ids": [ "1682848" ], "name": "Yu Zhang" }, { "ids": [ "34278336" ], "name": "Jian Qin" }, { "ids": [ "40651849" ], "name": "Lei Dou" }, { "ids": [ "1699746" ], "name": "Jie Liu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.76", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.76", "entities": [ "Algorithm", "Embedded system", "Global Positioning System", "Mobile phone", "Star Trek:" ], "id": "769d69b52e5e45cabf0ea529da435cfa0ea2e447", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "586-593", "journalVolume": "", "outCitations": [], "paperAbstract": "Position prediction of moving object has become a reality utilizing the vast amount of location data acquired by positioning devices embedded in mobile phones and cars. In this paper, we proposed a position prediction system which focuses on the time regularity of object moving. Historical location data of the object is used to extract personal trajectory patterns to obtain candidate next positions. Each of the candidate positions is scored by the proposed Time Mode-based Prediction (TMP) algorithm according to the proximity between the time component of patterns and current time. The position with the highest score is regarded as predicted next position. Furthermore, a hybrid B/S and C/S architecture is employed to perform the real-time prediction and results display. An evaluation based on a public trajectory data set of 12 objects demonstrates that the proposed TMP algorithm can realize position prediction with high accuracy. Moreover, the average accuracy rate of our prediction algorithm is about 85.5%, which is 33.7% greater than the Markov-based algorithm with one known position.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.76" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/769d69b52e5e45cabf0ea529da435cfa0ea2e447", "sources": [ "DBLP" ], "title": "Time Mode Based Next Position Prediction System", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "76a4074a630e7742034ec23e41a0eb4c4ac9ab19": { "authors": [ { "ids": [ "40518047" ], "name": "Ofer Freedman" }, { "ids": [ "1683786" ], "name": "Pawel Gawrychowski" }, { "ids": [ "2970238" ], "name": "Patrick K. Nicholson" }, { "ids": [ "1681527" ], "name": "Oren Weimann" } ], "doi": "10.1145/3087801.3087804", "doiUrl": "https://doi.org/10.1145/3087801.3087804", "entities": [ "Adjacency matrix", "Approximation algorithm", "B-tree", "ICALP", "Long line (telecommunications)", "Lowest common ancestor", "PODC", "SIAM Journal on Computing", "Symposium on Foundations of Computer Science", "Tree (set theory)" ], "id": "76a4074a630e7742034ec23e41a0eb4c4ac9ab19", "inCitations": [ "55dc778b004b4a07eeb879361c040e5e84293fe1" ], "journalName": "", "journalPages": "185-194", "journalVolume": "", "outCitations": [ "b312cf1d457652ff4b8cfe00f204ad6c0e5bc464", "2b1e4c2171da16b564d32cfa92361b1546829212", "754130318d6b1283495d87ec3b191a52f075647f", "0e557bf922da1f2ed6b1b32b3d13e7d6b5a192c8", "abbcce9298ffa0f93fadf9c33dec035faaa49611", "f31248216a301760ea5d2f007d2bbcc384d5fafe", "5cb928ba3da05fe38b6f7cc101dc79951d22f9fd", "85d3b35ce8617503754da901ed5fdb4076354b55", "0badcbdc0404e385342cfa09add4a6be50b2c1ed", "67d6983066cf88ac916f9dabedf5872495b1445b", "4d2b4e6901c831911418b4cf0eb3edef3fca81ea", "c8ae5cd80bbce1749e7f83c9c200a9333932590c", "0234fefbfe64eaa8f560b1640df40de1bcd2c20b", "aa88dac90ee7b66dad6f101d66b05c82c73ee134", "24927a463e19c4348802f7f286acda31a035715b", "002f4a1d1b5aa35b5b2bf7229320abdcd554f20c", "13d545b8997581c192159fd8c204a6ceb5b4ff93", "26cb1dae1bf001d3631108b39c873643e309e364", "5c27accddec3708500f8f6fcc0aebf0085ce3bf0", "3c0b574b98e2fae687f021d77637e24e2d2d641f", "880243e65f83cabfcc3e96ec131ff525e8d74b2a", "7fe0d988db286872295d216da719f5a110adc3ae", "a031ca31eaa68431ac80e5b592586f48fb560cf3", "6f82be8e4e9ae1eb7c004f2c562c9e79c37ad3c6", "a04df36569ffe1fe67d7c96aac2903b78fbb337d", "a02280cbe1653a835eea2d0a1a038420243fd77f" ], "paperAbstract": "Labeling schemes seek to assign a short label to each node in a network, so that a function on two nodes (such as distance or adjacency) can be computed by examining their labels alone. For the particular case of trees, following a long line of research, optimal bounds (up to low order terms) were recently obtained for adjacency labeling [FOCS \u201915], nearest common ancestor labeling [SODA \u201914], and ancestry labeling [SICOMP \u201906]. In this paper we obtain optimal bounds for distance labeling. We present labels of size 1{4 log n` oplog nq, matching (up to low order terms) the recent 1{4 log n \u0301Oplog nq lower bound [ICALP \u201916]. Prior to our work, all distance labeling schemes for trees could be reinterpreted as universal trees. A tree T is said to be universal if any tree on n nodes can be found as a subtree of T . A universal tree with |T | nodes implies a distance labeling scheme with label size log |T |. In 1981, Chung et al. proved that any distance labeling scheme based on universal trees requires labels of size 1{2 log n \u0301 log n \u0308 log log n`Oplog nq. Our scheme is the first to break this lower bound, showing a separation between distance labeling and universal trees. The \u0398plog nq barrier for distance labeling in trees has led researchers to consider distances bounded by k. The size of such labels was improved from log n`Opk ? log nq [WADS \u201901] to log n ` Opk logpk log nqq [SODA \u201903] and finally to log n ` Opk logpk logpn{kqqq [PODC \u201907]. We show how to construct labels whose size is the minimum between log n ` Opk logpplog nq{kqq and Oplog n \u0308 logpk{ log nqq. We complement this with almost tight lower bounds of log n ` \u03a9pk logplog n{pk log kqqq and \u03a9plog n \u0308 logpk{ log nqq. Finally, we consider p1` \u03b5q-approximate distances. We show that the recent labeling scheme of [ICALP \u201916] can be easily modified to obtain an Oplogp1{\u03b5q \u0308 log nq upper bound and we prove a matching \u03a9plogp1{\u03b5q \u0308 log nq lower bound. \u030aThe research was supported in part by Israel Science Foundation grant 794/13.", "pdfUrls": [ "https://arxiv.org/pdf/1608.00212v1.pdf", "http://doi.acm.org/10.1145/3087801.3087804", "http://www.cs.haifa.ac.il/~oren/Publications/caterpillars.pdf", "http://arxiv.org/abs/1608.00212", "https://arxiv.org/pdf/1608.00212v2.pdf", "http://arxiv.org/pdf/1608.00212v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/845c/5fd688aad6697d32540ac806196b45a2874b.pdf", "s2Url": "https://semanticscholar.org/paper/76a4074a630e7742034ec23e41a0eb4c4ac9ab19", "sources": [ "DBLP" ], "title": "Optimal Distance Labeling Schemes for Trees", "venue": "PODC", "year": 2017 }, "76a87e2ebd5393cdb18b68d38039231b02a64718": { "authors": [ { "ids": [ "16893749" ], "name": "Mehmet Fatih Aktas" }, { "ids": [ "2459687" ], "name": "Javier Diaz Montes" }, { "ids": [ "1709070" ], "name": "Ivan Rodero" }, { "ids": [ "1750983" ], "name": "Manish Parashar" } ], "doi": "10.1109/ICPP.2017.34", "doiUrl": "https://doi.org/10.1109/ICPP.2017.34", "entities": [ "CPU cache", "Computation", "Computational complexity theory", "Context of computational complexity", "Data access", "Data-intensive computing", "Dataspaces", "Deployment environment", "Non-uniform memory access", "Predictive modelling" ], "id": "76a87e2ebd5393cdb18b68d38039231b02a64718", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "251-260", "journalVolume": "", "outCitations": [ "34c40cf24b3897683e1b3231ef80ffadbe61f901", "0fbed4dbe1ee87479c12330df89e4ef8540f0156", "ed8453876bbd36f2aa37257bcbe985f80e0bcf38", "1e770d2259a641269343aca023ca690fbf11b8a1", "7717cb7fbbf26557238c2ef847d0a48def176d0b", "a7c584c93fe8d48d44262f449081d8dc66d3c705", "11c7d538faa06f5cfde52dae687cdc25dd26c3f1", "9e0e22ae4c7bc19a9128a93be9954f5b0e078791", "61ad98f7f693221bf2149897955aa93eac8950ba", "57d791eb2cd5fe8ed9cfe8a7167f7a4439e3b11e", "f51026239f5786b31ee28dbb6dee4024a2f6dcd7", "f733aeafc308ffa62d606098dcdd42b1a348d2a0", "25a0b0eb9ca8966e34bbaece280e34445124ad93", "e75e0921c25aa34ed07d13e93e6c87099e702fc2", "0368d2445d3ee4205ee73da933cb8b810a89091c", "33b7ee84455dc9e0c9af29bc565f13ca53bf1be7", "5e8045347e39748456d163d7726cb3598be8e207", "9180aa7b7978c62363e4af3a9053371775fbcbdc", "64d4f6759b32697e6cbebf901624c93c0a0c1744", "bd3fa54f6c515bbdd48128ae1a0f45ad987fc32f", "95ec136b0abaa9c0882c3ab17581ed6a9c78ac90", "4ff82f6f7eb86216ddbad4f38c3b06650ad082d0", "13992a8c7704b8913f91094fa795884768ff332e", "89a18521ca5dcc926198ad627c072d071683a5f7", "1324f1d5b20f08cac775f10089a788767c56d5a9", "4e055f0ce6220e6d75aa2c6d7de50455dea572ef" ], "paperAbstract": "Data staging has been shown to be very effective for supporting data intensive in-situ workflows and coupling of applications. Experimental sciences are increasingly becoming collaborative among geographically distributed teams, and include experimental instruments and HPC facilities. This new way of doing science poses new challenges due to data sizes, complexity of computation, and the use of wide area networks between couplings. In this paper, we explore how the staging abstraction can be extended to support such workflows. Specifically, we develop a NUMA-like abstraction that orchestrates multiple distributed local-area staging abstractions, and provides asynchronous data put/get semantics to enable data sharing across them. To mask data movement overhead and provide in-time data access, we propose the use of predictive prefetching approaches that leverage the iterative nature of the coupling. We evaluate our prototype implementation using a fusion workflow and show that our design can effectively and transparently support widearea coupled workflows. Additionally, results show that the use of prefetching techniques leads to significant gains in data access times of data that needs to be moved over the wide area network.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/76a87e2ebd5393cdb18b68d38039231b02a64718", "sources": [ "DBLP" ], "title": "WA-Dataspaces: Exploring the Data Staging Abstractions for Wide-Area Distributed Scientific Workflows", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "76bcb2112024eb75549d3781d1799ffee82e58f1": { "authors": [ { "ids": [ "40027070" ], "name": "Junjie Wang" }, { "ids": [ "1837117" ], "name": "Bihuan Chen" }, { "ids": [ "1761382" ], "name": "Lei Wei" }, { "ids": [ "38057149" ], "name": "Yang Liu" } ], "doi": "10.1109/SP.2017.23", "doiUrl": "https://doi.org/10.1109/SP.2017.23", "entities": [ "Abstract family of languages", "Bug bounty program", "Code coverage", "Context-sensitive grammar", "Deep learning", "Denial-of-service attack", "Heuristic", "Internet Explorer", "JavaScript", "Knowledge Search", "Memory corruption", "Mutation (genetic algorithm)", "Open-source software", "Parsing", "Rendering (computer graphics)", "Seed", "Software bug", "Text corpus", "XML", "XSLT", "libxml2", "libxslt" ], "id": "76bcb2112024eb75549d3781d1799ffee82e58f1", "inCitations": [ "2cf43b8bc82f063e257bf21c92e5b038eacd34d3", "18ec9aa174db18f94c4bab972d8631b3fee26881", "b6ade209fba99c0bb17b979252bdbaf354ca743b", "b0fd7a0f70b64c06031bb915d9aedd44b6550b16", "d9b98ccc75ec1a464e84121916fb39838dc61862" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "579-594", "journalVolume": "", "outCitations": [ "8eeff5e62ad0dd4073eb80377db29e7cd7b8a24f", "7ae091ea6b9221fa8e7fe4c1295557fc1749a9d2", "95baae72c5fcca4038339c350556dd6143d9a263", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "1d014a13de863f6f6f12aadd4df15e44f4527acf", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "08e2d172e510ab14713b28cc71a37e7c78cc7b13", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "5556995fb630c47805bbba560287ea59ce357fa1", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "18e965d40f7dacb88bca7b0a231eca5adbfb6201", "0ff3bc9d452f44ab93b5feb1763118837c77fec3", "0228d60b7a56a3d778e5425c41eaf72cf0b6ec55", "2a5c00109e15c68e8d8f479a2657c2889cc69cc5", "0adfaffb6e236d886c6eda579b28f5a8530019c6", "96ff84f5ac7aaacad8c131fd8d9e5697337cbe4f", "36800d797c927b1be9437a789eaa30e90d0b7c87", "6ea63d09993b9a268689790ea8d25bc36345497e", "15883946146cf0dde6799e5ee5147f2ec839bf9f", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "3fb4c74305c237147567a687857839ba0be2b4ab", "341d33498388711a5303c5f51433b3d5739a21d2", "27145fe45450babe306513efb97ae0ec8590c246" ], "paperAbstract": "Programs that take highly-structured files as inputs normally process inputs in stages: syntax parsing, semantic checking, and application execution. Deep bugs are often hidden in the application execution stage, and it is non-trivial to automatically generate test inputs to trigger them. Mutation-based fuzzing generates test inputs by modifying well-formed seed inputs randomly or heuristically. Most inputs are rejected at the early syntax parsing stage. Differently, generation-based fuzzing generates inputs from a specification (e.g., grammar). They can quickly carry the fuzzing beyond the syntax parsing stage. However, most inputs fail to pass the semantic checking (e.g., violating semantic rules), which restricts their capability of discovering deep bugs. In this paper, we propose a novel data-driven seed generation approach, named Skyfire, which leverages the knowledge in the vast amount of existing samples to generate well-distributed seed inputs for fuzzing programs that process highly-structured inputs. Skyfire takes as inputs a corpus and a grammar, and consists of two steps. The first step of Skyfire learns a probabilistic context-sensitive grammar (PCSG) to specify both syntax features and semantic rules, and then the second step leverages the learned PCSG to generate seed inputs. We fed the collected samples and the inputs generated by Skyfire as seeds of AFL to fuzz several open-source XSLT and XML engines (i.e., Sablotron, libxslt, and libxml2). The results have demonstrated that Skyfire can generate well-distributed inputs and thus significantly improve the code coverage (i.e., 20% for line coverage and 15% for function coverage on average) and the bug-finding capability of fuzzers. We also used the inputs generated by Skyfire to fuzz the closed-source JavaScript and rendering engine of Internet Explorer 11. Altogether, we discovered 19 new memory corruption bugs (among which there are 16 new vulnerabilities and received 33.5k USD bug bounty rewards) and 32 denial-of-service bugs.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.23", "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/fuzzing.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/76bcb2112024eb75549d3781d1799ffee82e58f1", "sources": [ "DBLP" ], "title": "Skyfire: Data-Driven Seed Generation for Fuzzing", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "76d9ab5822fce4d1eca85f9e816242dc632d88f4": { "authors": [ { "ids": [ "34994156" ], "name": "Robert Nishihara" }, { "ids": [ "29912342" ], "name": "Philipp Moritz" }, { "ids": [ "2117867" ], "name": "Stephanie Wang" }, { "ids": [ "40044088" ], "name": "Alexey Tumanov" }, { "ids": [ "38516379" ], "name": "William Paul" }, { "ids": [ "2115957" ], "name": "Johann Schleier-Smith" }, { "ids": [ "3393220" ], "name": "Richard Liaw" }, { "ids": [ "2748967" ], "name": "Mehrdad Niknami" }, { "ids": [ "1694621" ], "name": "Michael I. Jordan" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3102980.3102998", "doiUrl": "https://doi.org/10.1145/3102980.3102998", "entities": [ "Computation", "Feedback", "Machine learning", "Requirement", "Throughput" ], "id": "76d9ab5822fce4d1eca85f9e816242dc632d88f4", "inCitations": [ "fdc54fe24f3acee66f0fe17d9b3d9a928ef98b0a", "9cb3de2ba7c63f7a9b0c312799b9a31e2e4cce59", "081fdeea36d4b56a71e87b5b0de191aa368261c8", "0e8f5cd8d8dbbe4a55427e90ed35977e238b1eed", "bcf9f2de8134a725d220caf44e225b8325870e19" ], "journalName": "", "journalPages": "106-110", "journalVolume": "", "outCitations": [ "19fd2c2c9d4eecb3cf1befa8ac845a860083e8e7", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "41e71c53ca2a7be0ba90919af8f3049d957e665e", "7ff303e7c450aee82b6fff5cc64be54e5604da01", "06154716d6d51256ed2bb014ef65ec8b5d41aa26", "bbb9c3119edd9daa414fd8f2df5072587bfa3462", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "0608d9937c074520cdc93cc444cc1c77039c5332", "080aebd2cc1019f17e78496354c37195560b0697", "4c44cbcea788cc024b29ddf178249ee1c367464a", "0541d5338adc48276b3b8cd3a141d799e2d40150", "6f54a7933235ced5684e3bff18f7e5dc40510018", "1464776f20e2bccb6182f183b5ff2e15b0ae5e56", "4954fa180728932959997a4768411ff9136aac81", "583aa7cfcacdab3df24b9b3281d34763a22accef", "8e0eacf11a22b9705a262e908f17b1704fd21fa7", "332f77fd05703c1607e3b57884ad31fb1fad0104", "0236335b815ef41e86f0fe41e53a2acc1d4742f6" ], "paperAbstract": "Machine learning applications are increasingly deployed not only to serve predictions using static models, but also as tightly-integrated components of feedback loops involving dynamic, real-time decision making. These applications pose a new set of requirements, none of which are difficult to achieve in isolation, but the combination of which creates a challenge for existing distributed execution frameworks: computation with millisecond latency at high throughput, adaptive construction of arbitrary task graphs, and execution of heterogeneous kernels over diverse sets of resources. We assert that a new distributed execution framework is needed for such ML applications and propose a candidate approach with a proof-of-concept architecture that achieves a 63x performance improvement over a state-of-the-art execution framework for a representative application.", "pdfUrls": [ "https://arxiv.org/pdf/1703.03924v1.pdf", "http://doi.acm.org/10.1145/3102980.3102998", "https://arxiv.org/pdf/1703.03924v2.pdf", "http://arxiv.org/abs/1703.03924" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/76d9ab5822fce4d1eca85f9e816242dc632d88f4", "sources": [ "DBLP" ], "title": "Real-Time Machine Learning: The Missing Pieces", "venue": "HotOS", "year": 2017 }, "76f427542150941395ef7b1c8a8043b7a3a5ea5f": { "authors": [ { "ids": [ "3461396" ], "name": "Mohammad-Parsa Hosseini" }, { "ids": [ "1917915" ], "name": "Tuyen X. Tran" }, { "ids": [ "1735828" ], "name": "Dario Pompili" }, { "ids": [ "2304632" ], "name": "Kost V. Elisevich" }, { "ids": [ "1682711" ], "name": "Hamid Soltanian-Zadeh" } ], "doi": "10.1109/ICAC.2017.41", "doiUrl": "https://doi.org/10.1109/ICAC.2017.41", "entities": [ "Autonomic computing", "Big data", "Decision support system", "Deep learning", "Discharger", "Edge computing", "Electrocorticography", "Electroencephalography", "Emergence", "Feature extraction", "Multimodal interaction", "Neurostimulation", "Residue number system", "Resting state fMRI", "Self-discharge", "Simulation", "Spontaneous order" ], "id": "76f427542150941395ef7b1c8a8043b7a3a5ea5f", "inCitations": [ "6cae9ad284a73471a8ed9e483b1673a60d61d946", "70401ee7cf17cc6848a60b37a1fd500ee26c2291", "03a0e8563f1a65c7fdc68d21ddc4aaced677de41", "96adad0a36a41349729565c6057826df5681f290", "a1096e811dac3dfb6ea515a04e694dd901cf7ca4", "411492940b0766923b4384b03ccbb5ed9a71c500" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "83-92", "journalVolume": "", "outCitations": [ "8d08d4286d978935d710ccb92f242297923d0395", "d0a18f442d32eb2ccafd3773d458a191d230c690", "0b75c7b08726857be88341120971329382dc8c50", "272065b6a1548f51f8509903a3b082464e776fe5", "b64c938fc399999ca738501224fc7d5ee33c4673", "b07524896eca854351a03f6885e276eb3afb3883", "128cefbf4b3c8945fac6947fabf1981a15d74250", "a14dfa4684a6935f8f2f0264bd3112bdb2e65454", "8d2f5e7b6ae46aa7d0e6f3aa89f122478ffc76cb", "6938d305ee3a51b4197580d1174c355248d2c0ff", "ab99bcd4e4571b1903b0e03f2efb3c5892949b83", "95530415c2dd716093121950aee5a49e5eac6816", "9a1daf4248a260b9df0a335829e151aba174dbbe", "912c084010703706632e58a67195158e62acb22b", "6452216674d141d4361028b7e8a015c152129852", "ac2610e783457541ec12f146055a71e7826be37d", "16778143b4f6dfc5f5727d266a3f809901e3145c", "042180000d256454446fdcd3dbac8cf7c0093ead", "0789fe98af2f2ac6b0ff952742309edf62154a6e", "45beb6d732293c80e33d37f100564afd19262072", "18c5814a064748ea2559e205f02dbbdfe7655cd4", "de7848ff6949602dc8f1479929153c04168f8a08", "85c1730b5a310dfbc060f8c42d29081986f90169", "536bdd662448aeb14636e18ee2012398c5c74004", "2cbb401c2c452f28aaea23d1dacdfac416ca4077", "9f5a828ad6a2b8bac21f7343a653d2f613254ac9", "726ee1abe8df46b36ad08b6d16c712510da7a8aa", "db62dfe38f0deb4ed72adc60b2b326fe7d1892e4", "226575b9b189989965d44538c70b710b526d818f", "a7765cc0ba64c209dd9d2666a8db8a049831949f", "5eeb52bfffd537c60708cac5c44ca20987ecb400", "e7214c5b6a24d23bd762c036a52df32584d1c969", "0f7ec1772b263e1f361b37ac61f953da57116396", "f81da11f8ee74b26393b84e6bed0f8275311c3c1" ], "paperAbstract": "Epilepsy is a chronic brain disorder characterized by the occurrence of spontaneous seizures of which about 30 percent of patients remain medically intractable and may undergo surgical intervention; despite the latter, some may still fail to attain a seizure-free outcome. Functional changes may precede structural ones in the epileptic brain and may be detectable using existing noninvasive modalities. Functional connectivity analysis through electroencephalography (EEG) and resting state-functional magnetic resonance imaging (rs-fMRI), complemented by diffusion tensor imaging (DTI), has provided such meaningful input in cases of temporal lobe epilepsy (TLE). Recently, the emergence of edge computing has provided competent solutions enabling context-aware and real-time response services for users. By leveraging the potential of autonomic edge computing in epilepsy, we develop and deploy both noninvasive and invasive methods for the monitoring, evaluation and regulation of the epileptic brain, with responsive neurostimulation (RNS; Neuropace). First, an autonomic edge computing framework is proposed for processing of big data as part of a decision support system for surgical candidacy. Second, an optimized model for estimation of the epileptogenic network using independently acquired EEG and rs-fMRI is presented. Third, an unsupervised feature extraction model is developed based on a convolutional deep learning structure for distinguishing interictal epileptic discharge (IED) periods from nonIED periods using electrographic signals from electrocorticography (ECoG). Experimental and simulation results from actual patient data validate the effectiveness of the proposed methods.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.41", "http://nsfcac.rutgers.edu/sites/all/files/Biblio%20Papers/08005336.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/76f427542150941395ef7b1c8a8043b7a3a5ea5f", "sources": [ "DBLP" ], "title": "Deep Learning with Edge Computing for Localization of Epileptogenicity Using Multimodal rs-fMRI and EEG Big Data", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "777b13a1efbc4e6fbacdc1bc4c2cf4987880bfa9": { "authors": [ { "ids": [ "1779678" ], "name": "Calvin C. Newport" } ], "doi": "10.1145/3087801.3087813", "doiUrl": "https://doi.org/10.1145/3087801.3087813", "entities": [ "1-bit architecture", "Algorithm", "Communication complexity", "Mobile operating system", "Mobile phone", "Network topology", "Operating system", "Peer-to-peer", "Polynomial", "Pseudorandomness", "Randomness", "Smartphone", "Time complexity" ], "id": "777b13a1efbc4e6fbacdc1bc4c2cf4987880bfa9", "inCitations": [], "journalName": "", "journalPages": "43-52", "journalVolume": "", "outCitations": [ "254a19a4c7b374d507de6d4c0aae709d3abdd733", "e70f108961a7b250d3a77ca4a16c8e65626e96e6", "4ef3133817267e0829bbff3e2024af93403dddc7", "52e3d54c7fe011413a4f1cb555e6374d10765a22", "fa7ff5e70d3fb5f57088edd537fa693fa3229538", "13dff6a28d24e4fe443161fcb7d96b68a085a3d4", "a9ea13a34553e3c4fce7abf46dd5999f8bc73cd9", "84521e6d27c705ca3f97e20bbcc3e80a29779f78", "5928dd51e1d7d940d528ffc0455cab8248c551bc", "a609ee7d499abfe207dd3c0e4861201ea327a433", "6f8c546b574ff16a800d202d51900cc1e56e4e94", "19ae27ba71869cc4328fe428eddec223a5cb2a7d", "f7734fc62ec55d5ffa62d74f461bf725c45dfed4", "0967bd75632d959541ee4afef35a5ef37c805cc7", "bb92a3071b138f9e7c21e11e475ee3b3ab715da3", "718bba0f9b305c9bcbb332d1e12f87949d97cf95" ], "paperAbstract": "In this paper, we study the fundamental problem of gossip in the mobile telephone model: a recently introduced variation of the classical telephone model modified to better describe the local peer-to-peer communication services implemented in many popular smartphone operating systems. In more detail, the mobile telephone model differs from the classical telephone model in three ways: (1) each device can participate in at most one connection per round; (2) the network topology can undergo a parameterized rate of change; and (3) devices can advertise a parameterized number of bits about their state to their neighbors in each round before connection attempts are initiated. We begin by describing and analyzing new randomized gossip algorithms in this model under the harsh assumption of a network topology that can change completely in every round. We prove a significant time complexity gap between the case where nodes can advertise 0 bits to their neighbors in each round, and the case where nodes can advertise 1 bit. For the latter assumption, we present two solutions: the first depends on a shared randomness source, while the second eliminates this assumption using a pseudorandomness generator we prove to exist with a novel generalization of a classical result from the study of two-party communication complexity. We then turn our attention to the easier case where the topology graph is stable, and describe and analyze a new gossip algorithm that provides a substantial performance improvement for many parameters. We conclude by studying a relaxed version of gossip in which it is only necessary for nodes to each learn a specified fraction of the messages in the system. We prove that our existing algorithms for dynamic network topologies and a single advertising bit solve this relaxed version up to a polynomial factor faster (in network size) for many parameters. These are the first known gossip results for the mobile telephone model, and they significantly expand our understanding of how to communicate and coordinate in this increasingly relevant setting.", "pdfUrls": [ "http://people.cs.georgetown.edu/~cnewport/pubs/gossipmobile-full.pdf", "http://doi.acm.org/10.1145/3087801.3087813", "https://arxiv.org/pdf/1705.09609v1.pdf", "http://arxiv.org/abs/1705.09609" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c4ff/5315a17389e170ecede1cd66190dd29a9b59.pdf", "s2Url": "https://semanticscholar.org/paper/777b13a1efbc4e6fbacdc1bc4c2cf4987880bfa9", "sources": [ "DBLP" ], "title": "Gossip in a Smartphone Peer-to-Peer Network", "venue": "PODC", "year": 2017 }, "77866d8b193fdc69917e5afc6542d9e31ce8f145": { "authors": [ { "ids": [ "2895933" ], "name": "Donald Nguyen" }, { "ids": [ "1776186" ], "name": "Keshav Pingali" } ], "doi": "10.1145/3037697.3037750", "doiUrl": "https://doi.org/10.1145/3037697.3037750", "entities": [ "BASIC Stamp", "Benchmark (computing)", "Blue Gene", "Central processing unit", "Data structure", "HTML", "Haswell (microarchitecture)", "Nehalem (microarchitecture)", "Programming model", "Scalability", "Software transactional memory", "Speedup", "Transactional memory", "Westmere (microarchitecture)" ], "id": "77866d8b193fdc69917e5afc6542d9e31ce8f145", "inCitations": [ "340deebfcc6d4cc2a56ef4f5262e9cd7496e96a2" ], "journalName": "", "journalPages": "105-118", "journalVolume": "", "outCitations": [ "993e21ed73fc39048a42d06855bc85236ffd1063", "769792475cc90f1c8794d61c55f91307617a221b", "10ac3bfb06f169675c20359c3319a656ec494334", "57fe2b6acccbba91df0847442d2634ffef7ccfb4", "6aa7427f9dcd89ed9aba1c8433b43ea5741c0816", "46ec0c7cbef89e31f878b8e9716a6c1c1cdedd29", "12fab45bab0c56ac8bb29f83f431d7233ee9232c", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06", "29f56a7f34879033bc6ecf52e03099fb55277e0d", "2e5b84bb9aee003f3895060e27490dcb289facf7", "1e3778a9aa1283ca322bd5b60262f09595b3e0c0", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "0948c0acfb779e551e5c2420081eab206f57f396", "46e61ad29ab20618fb551afbc00ebb8eb4e9be21", "6756d3e0669430fa6e006754aecb46084818d6b6", "1c5ab8e3d63cd979419ef51b7f05526defdd7d49", "92734d3844dd7a6afbde724ac770c65f748a4510", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "0b1c3b2554a683bab2db9100ed53a7de545e7b98", "dad318fb2f3ecf39063e5484a95893085c440850", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "171695dfdb42ea09ea3207f0f5fd11985c02e671", "317aa3281b6ccb8c02f3076a63a08e5429bf37cc", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "200609035711763e162096cc010cc3e00895c6c6", "05ef51cba0404893c1d13259b049890bfe06366a", "5dba3105fc05e6ba918106cb3f96d482c1a092f8", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "1e0d2de1f900aa681c04bd08bc5f6f405e56f18c", "323292fad95a1bce506e100ac8d622019a2012d2", "1bb2f9e63b68038843ef1a59295f057167ec7e1f", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "917392fb11729b5b522d1ce5a00d3f23f4594e3c", "22839816fbd337d77b81a7f3c6430324e057c250", "37b26ffe6b8768adcad426004edba7f26b770063", "5b75c61e3183ea6228d08b2f6c00fd2cd74baada", "76057a3c7b489290afd4a4dccf09b623502619fd", "3150e68dccebd9d8e371143270f6bc3942b7d69c", "01f21f3aacb36a425aa9213a10ccc543a11659ab", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "562e90563e3f3ea2142eccc33aef18e52f9e4921", "1d45ebf71db9dc87da0716286c0985093f882bec", "43252ba020a456768700880f1e10eff3b30d4526", "1fffb35160cf06ddbbfa3dad44fc293ad9b29b87", "40cb40b7812e019c1051e3a457a8643400b81d51", "6a5d6dc6dbc413a95f56fb97dee732659bb30e38", "0d5a5a3235281ad2b9cca9bae3a2e8c0c7594470", "485f73d4a1875a1875b3fefc4fce4739a7acf796", "4adadc82e4f6db798164438ca655d0fc0252cf17" ], "paperAbstract": "Transactional memory (TM) has been the focus of numerous studies, and it is supported in processors such as the IBM Blue Gene/Q and Intel Haswell. Many studies have used the STAMP benchmark suite to evaluate their designs. However, the speedups obtained for the STAMP benchmarks on all TM systems we know of are quite limited; for example, with 64 threads on the IBM Blue Gene/Q, we observe a median speedup of 1.4X using the Blue Gene/Q hardware transactional memory (HTM), and a median speedup of 4.1X using a software transactional memory (STM).\n What limits the performance of these benchmarks on TMs? In this paper, we argue that the problem lies with the programming model and data structures used to write them. To make this point, we articulate two principles that we believe must be embodied in any scalable program and argue that STAMP programs violate both of them. By modifying the STAMP programs to satisfy both principles, we produce a new set of programs that we call the Stampede suite. Its median speedup on the Blue Gene/Q is 8.0X when using an STM. The two principles also permit us to simplify the TM design. Using this new STM with the Stampede benchmarks, we obtain a median speedup of 17.7X with 64 threads on the Blue Gene/Q and 13.2X with 32 threads on an Intel Westmere system.\n These results suggest that HTM and STM designs will benefit if more attention is paid to the division of labor between application programs, systems software, and hardware.", "pdfUrls": [ "http://iss.ices.utexas.edu/Publications/Papers/asplos426-nguyen.pdf", "http://doi.acm.org/10.1145/3037697.3037750" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/77866d8b193fdc69917e5afc6542d9e31ce8f145", "sources": [ "DBLP" ], "title": "What Scalable Programs Need from Transactional Memory", "venue": "ASPLOS", "year": 2017 }, "77956a4289b1c62a8639ddf073590de945c88007": { "authors": [ { "ids": [ "2021341" ], "name": "Himchan Park" }, { "ids": [ "31959062" ], "name": "Min-Soo Kim" } ], "doi": "10.1145/3035918.3064014", "doiUrl": "https://doi.org/10.1145/3035918.3064014", "entities": [ "Experiment", "Graph (abstract data type)", "Graph database", "Recursion", "Scalability", "Synthetic data", "Time complexity" ], "id": "77956a4289b1c62a8639ddf073590de945c88007", "inCitations": [ "60fd8b9ce88b7550174119fcdb3a19d5663d2cc9" ], "journalName": "", "journalPages": "913-928", "journalVolume": "", "outCitations": [ "e2462bde978023a9069cc08326f626135a95cb89", "5f3f9223c5c9f896be099bc177929febad508407", "2b1ec3fdf5b695de2d7ec17393ec0ad9445ceb61", "3c7beb9cc66257a1434a0ce662fc74ba25919871", "09050b5922d97e98c385c7dcfd2ce12f94c291a2", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "1e557937f418accc13f9c5edb33a3d48259d80e5", "71affe0d9489be0ecba667f568b1a0bcd9ee3af3", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "34aff134ba0d25b146721b35ec58a9c1c1abe2ac", "62bfdedb87d1fed25eb5aa1bc6ff546c70a0ba6a", "741f4bd8a0d1d01ff4be38bbaba10fc9ea6412c6", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "18808d821b83148f09eca1c426ac6fa86b3665c9", "08d4da77c489d550c3e215725551481310719893", "49c841a57970309a31c439b1b30262487f6068d2", "1f0612de1f191abadf250b78cd78f884203cca5e", "d1c21c34936f587779c216ed79ca33883845caa1", "31b63d505dbf6f2b9a60d0c45976f2cbd5cd9619", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d", "e3ef3a0d4207048faa1936780ae51042eb5620c5", "0acc31039de608f2ac51f59b6848a48d50c919a5", "ce18973fb7c23cb4fc1c1a61c1c1c4333f4abad1", "ddc843dcccd5e1ebcc01c7b3dba5ccebd0a1cc06", "c3fbbd9c1fc5e53c6a9e3fe27e1bfce4755c8ef3", "2def083fb7fd8f887c507c0b0b32bd921a26df9b", "23f5a0dc63c467b1259ebe6319979a492ca55fc0", "6a888f3dd0a17b0241be61daa378ba6caffa6617" ], "paperAbstract": "As many applications encounter exponential growth in graph sizes, a fast and scalable graph generator has become more important than ever before due to lack of large-scale realistic graphs for evaluating the performance of graph processing methods. Although there have been proposed a number of methods to generate synthetic graphs, they are not very efficient in terms of space and time complexities, and so, cannot generate even trillion-scale graphs using a moderate size cluster of commodity machines. Here, we propose an efficient and scalable disk-based graph generator, TrillionG that can generate massive graphs in a short time only using a small amount of memory. It can generate a graph of a trillion edges following the RMAT or Kronecker models within two hours only using 10 PCs. We first generalize existing graph generation models to the scope-based generation model, where RMAT and Kronecker correspond to two extremes. Then, we propose a new graph generation model called the recursive vector model, which compromises two extremes, and so, solves the space and time complexity problems existing in RMAT and Kronecker. We also extend the recursive vector model so as to generate a semantically richer graph database. Through extensive experiments, we have demonstrated that TrillionG outperforms the state-of-the-art graph generators by up to orders of magnitude.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064014" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/77956a4289b1c62a8639ddf073590de945c88007", "sources": [ "DBLP" ], "title": "TrillionG: A Trillion-scale Synthetic Graph Generator using a Recursive Vector Model", "venue": "SIGMOD Conference", "year": 2017 }, "77bcddfbf299243d696491e69603d0dff6fbd5a8": { "authors": [ { "ids": [ "9553169" ], "name": "Aniruddh Ramrakhyani" }, { "ids": [ "2526541" ], "name": "Tushar Krishna" } ], "doi": "10.1109/HPCA.2017.44", "doiUrl": "https://doi.org/10.1109/HPCA.2017.44", "entities": [ "Algorithm", "Classical XY model", "Deadlock", "Electronic data processing", "Failure rate", "Mesh networking", "Microarchitecture", "Network on a chip", "Power gating", "Program lifecycle phase", "Router (computing)", "Routing", "Run time (program lifecycle phase)", "Stock and flow", "Throughput" ], "id": "77bcddfbf299243d696491e69603d0dff6fbd5a8", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "253-264", "journalVolume": "", "outCitations": [ "2771787fa43c087058eb6b5817541d7ac61919b2", "156e96821d30f3a16f00feb0dd20f06807e552d8", "3b52e7e351d2ee2f8a0ba48703d70d3aed6dd087", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "00bdbb3c0f457a226b5c04dd52a5e5fbe5830925", "5676da9f4854aba9c0cab007c3e517cd81acf81d", "15769b5fdd35e872157c770ec36b87e2253c8570", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "1a1dedd953da52903494b00f08dd159c05f95375", "42f95692eaaf42825ca424206331edc607aeb67e", "40ee22cf1ac9ce95e905730e044354af93be7192", "4bad51c7685254155733ee8def6a1294378aa1af", "10995500254d0023cc0173654a997be9ad4e67f3", "854fc26b3fe9fb5d4e643be81df063d45415a993", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "0d01b42384dd92c400052a05e3d24cebaecd4056", "4b81f4c961ed0e3cd64738236bf09da0643e3886", "5398448921a865c9f0438f76842b261a3a8f2c2e", "36ce17dd3f7734b7578fd6580c886b3d1b2a475b", "1add375f9522cbb2633f2de06793e16cb8818d48", "960904bf8dd1de618d606a95fbca8d345d1e769a", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "3191f28b942bd428fd4df250afc15bf68b402362", "002cca80b9deaa3c1e9d46ae0bfdc9fd79079907", "fb315084da1fe348bd340331af86c45f2f79cbaa", "063700ef01aad15a1981553fde02e8d162a553e7", "245c9703304dc058417f5503ea324bcde8b2eefd", "0a569200aeb678b4420cb1025ee9dc6097f60567", "3ec18371eed24707fb16bf7cc258f3043088207f", "1e157cfbd2fa3ca1b786bdf6bcf3f6cf40ed39bb", "0fae80846df9733b4d9e6ac31fd9539e312e47e3", "6d0b81ea278511824a5f6676908c77e24f43413e", "7b78e5d363e74f1a4e0686259df72ba8f681b8b5", "41236387e01eacb63cefad6318dc48fc60e9829e" ], "paperAbstract": "Future SoCs are expected to have irregular on-chip topologies, either at design time due to heterogeneity in the size of core/accelerator tiles, or at runtime due to link/node failures or power-gating of network elements such as routers/router datapaths. A key challenge with irregular topologies is that of routing deadlocks (cyclic dependence between buffers), since conventional XY or turn-model based approaches are no longer applicable. Most prior works in heterogeneous SoC design, resiliency, and power-gating, have addressed the deadlock problem by constructing spanning trees over the physical topology, messages are routed via the root removing cyclic dependencies. However, this comes at a cost of tree construction at runtime, and increased latency and energy for certain flows as they are forced to use non-minimal routes. In this work, we sweep the design space of possible topologies as the number of disconnected components (links/routers) increase, and demonstrate that while most of the resulting topologies are deadlock prone (i.e., have cycles), the injection rates at which they deadlock are often much higher than the injection rates of real applications, making the current solutions highly conservative. We propose a novel framework for deadlock-freedom called Static Bubble, that can be applied at design time to the underlying mesh topology, and guarantees deadlock-freedom for any runtime topology derived from this mesh due to power-gating or failure of router/link. We present an algorithm to augment a subset of routers in any n × m mesh (21 routers in a 64-core mesh) with an additional buffer called static bubble, such that any dependence chain has at least one static bubble. We also present the microarchitecture of a low-cost (less than 1% overhead) FSM at every router to activate one static bubble for deadlock recovery. Static Bubble enhances existing solutions for NoC resiliency and power-gating by providing up to 30% less network latency, 4x more throughput and 50% less EDP.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.44", "http://synergy.ece.gatech.edu/wp-content/uploads/sites/332/2016/09/staticbubble_hpca2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/77bcddfbf299243d696491e69603d0dff6fbd5a8", "sources": [ "DBLP" ], "title": "Static Bubble: A Framework for Deadlock-Free Irregular On-chip Topologies", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "77c780ed48a64c335966aa9f780f04faa548881d": { "authors": [ { "ids": [ "35105064" ], "name": "Ignacio Cano" }, { "ids": [ "38735026" ], "name": "Srinivas Aiyar" }, { "ids": [ "39409085" ], "name": "Varun Arora" }, { "ids": [ "32091152" ], "name": "Manosiz Bhattacharyya" }, { "ids": [ "2160171" ], "name": "Akhilesh Chaganti" }, { "ids": [ "20379251" ], "name": "Chern Cheah" }, { "ids": [ "2293758" ], "name": "Brent N. Chun" }, { "ids": [ "1859843" ], "name": "Karan Gupta" }, { "ids": [ "34874615" ], "name": "Vinayak Khot" }, { "ids": [ "1689594" ], "name": "Arvind Krishnamurthy" } ], "doi": "", "doiUrl": "", "entities": [ "Cluster manager", "Clustered file system", "Computer data storage", "Durability (database systems)", "Experience", "Failure rate", "Hard disk drive", "Machine learning", "MapReduce" ], "id": "77c780ed48a64c335966aa9f780f04faa548881d", "inCitations": [ "87cd0d02a934acfd7a24a0c0915b901100fa21ac" ], "journalName": "", "journalPages": "51-66", "journalVolume": "", "outCitations": [ "54017300f8d0b1e2f51a5088dda84df6fc91c1fb", "02b1103e592fa6bf0499e27f1519692441fad557", "5f3f9223c5c9f896be099bc177929febad508407", "be658efd0bd00b9385890dc4159654f71ade648f", "224b0f2731b3177e683ae20433a1b19000c326ef", "1d07b7d897cfa631aa67f482af9514b1306803c9", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "f282e51e81705730a514d4ec691268fc11ae8234", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "1655cde00456867e6f12de9952fe3a78170fe7bb", "bae3eda9605700b14237f4d04652ab6759c68eef", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "6c4f2f853848cfec0bfad945ebfdce709742ae44", "094ca99cc94e38984823776158da738e5bc3963d", "46fa3ec8f2fa7d0683ffaeeb438af76c6627823d", "f52fb7ca4bddd1d9479ebe5a8c9b83469c812869", "32d211cb69ccc211fb48837b33ae8830244ac3de", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "471271dfcd33ceb2553b4bd3b3431983fd6ec888", "30a064a35cfabadb7487e8aee200a01bf417a294", "0dfea0be65456d1caf66704d145efec685d80f6e", "29b82a4cbaab5585c802020eed7f25c9c80ebfc9", "28be6c2ed074a7fa63818a1730b04219d8a01c02", "2d60d3596490d9999d8433bf41405060779bc11d", "061316b7516e20a4d66e7d95b3543eded514ef5d", "5ff311923cd8f80057b2cfc15cf7ec3ac0a6fdbc", "48534b21548e3692ad7d866387f1dc7f543109e1", "88ffcdcd0627253903408214f8a3b4549197340c", "88f74334a81e0f25b74c789eaf856f4b1593340d", "255736180aaa9adf84839834d99e23b1eec47b2d", "402da07a0ac4645e26370ff5ac8ab3540257a8ab", "0541d5338adc48276b3b8cd3a141d799e2d40150", "a6797456656d072808909a79af517e5ed4bc9ab3" ], "paperAbstract": "Modern cluster storage systems perform a variety of background tasks to improve the performance, availability, durability, and cost-efficiency of stored data. For example, cleaners compact fragmented data to generate long sequential runs, tiering services automatically migrate data between solid-state and hard disk drives based on usage, recovery mechanisms replicate data to improve availability and durability in the face of failures, cost saving techniques perform data transformations to reduce the storage costs, and so on. In this work, we present Curator, a background MapReduce-style execution framework for cluster management tasks, in the context of a distributed storage system used in enterprise clusters. We describe Curator\u2019s design and implementation, and evaluate its performance using a handful of relevant metrics. We further report experiences and lessons learned from its five-year construction period, as well as thousands of customer deployments. Finally, we propose a machine learning-based model to identify an efficient execution policy for Curator\u2019s management tasks that can adapt to varying workload characteristics.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/cano", "http://homes.cs.washington.edu/~icano/papers/nsdi-curator-final.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-cano.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-cano.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c4d4/91d5160222ce8fa16c06e20fa313dfed90df.pdf", "s2Url": "https://semanticscholar.org/paper/77c780ed48a64c335966aa9f780f04faa548881d", "sources": [ "DBLP" ], "title": "Curator: Self-Managing Storage for Enterprise Clusters", "venue": "NSDI", "year": 2017 }, "781114cbbf88ae150113df14121648f94bc594c6": { "authors": [ { "ids": [ "40001161" ], "name": "Kangjie Lu" }, { "ids": [ "38875860" ], "name": "Marie-Therese Walter" }, { "ids": [ "2862928" ], "name": "David Pfaff" }, { "ids": [ "31659184" ], "name": "Stefan N\u00fcmberger" }, { "ids": [ "1738428" ], "name": "Wenke Lee" }, { "ids": [ "1749517" ], "name": "Michael Backes" } ], "doi": "", "doiUrl": "", "entities": [ "Address space layout randomization", "Call stack", "Compiler", "Heap spraying", "Kernel (operating system)", "Linux", "Memory corruption", "Memory safety", "OpenSSL", "Pointer (computer programming)", "Privilege escalation", "Protection ring", "RAM parity", "Randomness", "Stack machine", "Symbolic execution", "Undefined behavior", "Undefined value", "User space", "Vector (malware)", "Vulnerability (computing)" ], "id": "781114cbbf88ae150113df14121648f94bc594c6", "inCitations": [ "831950908fcc713595a6b64b80c0aea60072da24", "2968fdf952edd08d2e7b2f303cba2339e4ee8c40", "903340e11ddfee6bbc0d8f39180c6e1256392578" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "4f0655f1dfba053b70fbe33c4c78f2c2bea06cbf", "0719b9670c8580db76547497df39caabdc20fc32", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "2c21f9488edfb2586327528bb59461a41363fc42", "1677bf5c635ef0e81b6c6cdfce30727f83959132", "c465cddd1a514d75d5e21775a4ae972d90e81902", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "06567663b31f7b8cf1de3d5f2ca6c79422ef60c8", "8c8ffe8e4fdadbf42b46944d0339eafc3e4de4c3", "e81ee8f90fa0e67d2e40dc794809dd1a942853aa", "38cb7372f8f1cfc72d06a1d2bf9224453dfd3e8e", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "6ebad4c8330e2804e31d9d95c7874670e68d3295", "36f4666c5c294548d2a9a536ed44e926172639f3", "3c3d3f4752252f6c47ef857a0b2fbec09fca2a1f", "07e7a136224b2230679385c2dc6f1ddfbde449f1", "aaeb37b9eba724271965a67d15ad86ba0fed04ed", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "b439340d07516687ab69765f7facfd31cf98544f", "480d4a756381f7aec1ffda84a3d7f1ef2695252a", "546146767ffcbb5f7e12f11838d5ecbeafa6d14c", "8cd67da41459950f2c9fdb579d7b197058aaddb4", "24b2f987c6a1b633df3a8ca970ce59a7259fd482", "217742089058db1572042a0cebfcecdec8ce215e", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "11a7e426012fcec35c32fdea0b60b874783c682f", "144382ef2ee1d00ce3d36c61601afecca5620c7d", "03c85aa6d213f56cbd3602d9eaf2cc72de9f9a7e", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "082b2a75e94cf1142a5c3a301418e6e05568d8b0", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "5f6a808bedd3dbfd1290063b3cd8221132ce5c95", "d4914de7dbb5080d5c83004cab22df9100fb37d0" ], "paperAbstract": "A common type of memory error in the Linux kernel is using uninitialized variables (uninitialized use). Uninitialized uses not only cause undefined behaviors but also impose a severe security risk if an attacker takes control of the uninitialized variables. However, reliably exploiting uninitialized uses on the kernel stack has been considered infeasible until now since the code executed prior to triggering the vulnerability must leave an attacker-controlled pattern on the stack. Therefore, uninitialized uses are largely overlooked and regarded as undefined behaviors, rather than security vulnerabilities. In particular, full memorysafety techniques (e.g., SoftBound+CETS) exclude uninitialized use as a prevention target, and widely used systems such as OpenSSL even use uninitialized memory as a randomness source. In this paper, we propose a fully automated targeted stackspraying approach for the Linux kernel that reliably facilitates the exploitation of uninitialized uses. Our targeted stack-spraying includes two techniques: (1) a deterministic stack spraying technique that suitably combines tailored symbolic execution and guided fuzzing to identify kernel inputs that user-mode programs can use to deterministically guide kernel code paths and thereby leave attacker-controlled data on the kernel stack, and (2) an exhaustive memory spraying technique that uses memory occupation and pollution to reliably control a large region of the kernel stack. We show that our targeted stack-spraying approach allows attackers to reliably control more than 91% of the Linux kernel stack, which, in combination with uninitialized-use vulnerabilities, suffices for a privilege escalation attack. As a countermeasure, we propose a compiler-based mechanism that initializes potentially unsafe pointer-type fields with almost no performance overhead. Our results show that uninitialized use is a severe attack vector that can be readily exploited with targeted stack-spraying, so future memory-safety techniques should consider it a prevention target, and systems should not use uninitialized memory as a randomness source.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/kangjie-ndss17.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/unleashing-use-initialization-vulnerabilities-linux-kernel-using-targeted-stack-spraying/", "http://www.cc.gatech.edu/grads/k/klu38/publications/ubi-ndss17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e912/c7ba9c1904f4317149b79abbd8235eeaf40c.pdf", "s2Url": "https://semanticscholar.org/paper/781114cbbf88ae150113df14121648f94bc594c6", "sources": [ "DBLP" ], "title": "Unleashing Use-Before-Initialization Vulnerabilities in the Linux Kernel Using Targeted Stack Spraying", "venue": "NDSS", "year": 2017 }, "7820021d8a3eace969cea5f4865c5aeef4673596": { "authors": [ { "ids": [ "3747964" ], "name": "Zhao Zhang" }, { "ids": [ "2540073" ], "name": "Evan R. Sparks" }, { "ids": [ "1712149" ], "name": "Michael J. Franklin" } ], "doi": "10.1145/3078597.3078603", "doiUrl": "https://doi.org/10.1145/3078597.3078603", "entities": [ "Application programming interface", "Baseline (configuration management)", "Computation", "Data lineage", "Database normalization", "Debugging", "Machine learning", "Pipeline (computing)", "Software bug", "Storage efficiency" ], "id": "7820021d8a3eace969cea5f4865c5aeef4673596", "inCitations": [ "8c7044398d1994b12a9bf7212e11398f59eaf446" ], "journalName": "", "journalPages": "143-153", "journalVolume": "", "outCitations": [ "d21eaf787e553d404098ae03c22c06af7a35367e", "2009b3c05dd9084a0a1c609abcedd81713ed7150", "0a30411ad3f537fe026bc6335adbde75a6da3a8e", "5f3f9223c5c9f896be099bc177929febad508407", "3784b73a1f392160523400ec0309191c0a96d86f", "2b41f8d57eb0830f841d5cffd67c7776de5dcccd", "1bbffba0be1ab923c63c6fb0b4b8d2b87f04f545", "6561b6a55114cc1da9f0f09d0396bb6b508a6fe9", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "9cea29601e72fd8e6ef8419aa31ddc103eceb7f8", "2a85b683073c2c8b762079c52a0d54392b243afb", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "0558c94a094158ecd64f0d5014d3d9668054fb97", "7fa0bbff98f08eeeadb927c8c3e486772d138bd8", "0964ac250b81a2caa85dd172527f07a9ffc8230b", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "54158c17a5306acc3e4a5134e57cfc2da82c1ba4", "1289d49693f147bdc28904353fcbb3d50b9fce95", "abe9f3b91fd26fa1b50cd685c0d20debfb372f73", "07bcb9e4bb95c975bd3f5b09d73b5df35642b6c1", "3deb0363b9414648a3339479adab3bdf99d2eda0", "14a2658e3f25184631b9bd29fc2eeb5cf2beacf7", "0cd8a0c73a35cf8557959e453f8a96de74f9b963", "2809d4876e34b8c64fc1783fe6a0a278770505b0", "8552df7893426c2dfd75e7f87506ccb37b2cb266", "0e4106aaab16fe1b75318196b9b02dd51bca1a56", "a718b85520bea702533ca9a5954c33576fd162b0", "4f3e75fcfdbe9a961bea2948578fc20a814b1084", "5a820264a175bb71380e0dd96d3ce49e46551f29", "0667441114b2a46d92d840b8c549ce1af7c9e9ef", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930", "486b80ee409dc0e4173834b88b25069443c82714", "3a052feb019328487068c8efc4c5dced8eb51a87", "0bad61e613f9e3b780174cab8be2d5786a70c6bc", "57e9d60fd6ab7c0b0f7fd2e19533a333ea911194", "34953ebdcf96d4499472039312bda86a0ba4e7ca", "9d16a98ce8e9cfb74afb3aa87a9a12eea75e77f6", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "8e42a6f643ff2f2b120a4f29ab320ee240c49fec" ], "paperAbstract": "We present the Hippo system to enable the diagnosis of distributed machine learning (ML) pipelines by leveraging fine-grained data lineage. Hippo exposes a concise yet powerful API, derived from primitive lineage types, to capture fine-grained data lineage for each data transformation. It records the input datasets, the output datasets and the cell-level mapping between them. It also collects sufficient information that is needed to reproduce the computation. Hippo efficiently enables common ML diagnosis operations such as code debugging, result analysis, data anomaly removal, and computation replay. By exploiting the metadata separation and high-order function encoding strategies, we observe an O(10^3)x total improvement in lineage storage efficiency vs. the baseline of cell-wise mapping recording while maintaining the lineage integrity. Hippo can answer the real use case lineage queries within a few seconds, which is low enough to enable interactive diagnosis of ML pipelines.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078603" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7820021d8a3eace969cea5f4865c5aeef4673596", "sources": [ "DBLP" ], "title": "Diagnosing Machine Learning Pipelines with Fine-grained Lineage", "venue": "HPDC", "year": 2017 }, "782acff3ad2c1f21159c55dec5b19b32b8d3e4df": { "authors": [ { "ids": [ "2949392" ], "name": "Rahul Boyapati" }, { "ids": [ "3406930" ], "name": "Jiayi Huang" }, { "ids": [ "39665992" ], "name": "Pritam Majumder" }, { "ids": [ "1680392" ], "name": "Ki Hwan Yum" }, { "ids": [ "1692009" ], "name": "Eun Jung Kim" } ], "doi": "10.1145/3079856.3080241", "doiUrl": "https://doi.org/10.1145/3079856.3080241", "entities": [ "Approximate computing", "Approximation algorithm", "Big data", "ChIP-on-chip", "Computation", "Computational RAM", "Data compression", "Data-intensive computing", "Error detection and correction", "Graph (abstract data type)", "In-memory database", "Machine learning", "Memory bandwidth", "Multi-core processor", "Network on a chip", "Onset (audio)", "Regular expression", "Software bug", "Throughput", "Video processing" ], "id": "782acff3ad2c1f21159c55dec5b19b32b8d3e4df", "inCitations": [ "ebc77ac0428966da41fe3993f568484108989399" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "666-677", "journalVolume": "", "outCitations": [ "52e99334096f02c9cf386c9391fc68181c058f4c", "682b7c3e34922d3cde0359a013195797b43b9309", "5a830ad18ff1a45c197570065b65d212818eaef6", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "757d8c525b32a6e5cc6eefc410ea8c1f0b210584", "41944de4e2fa6379092ba76ce43e152a12292295", "0653e2ed9f683868cb4539eb8718551242834f6b", "15b275f0421c606f5903532e9964b140cbb2f878", "44b5e20418e0657efd49daa993dd3881c22f826d", "52137476895005f26098678a9af934f93071b416", "de24c9686cd00fb208bdfd65e3e6257c6d7fee10", "a21c972077f85d23f769c6ac4e4afa283d38de49", "17c8851c47328dad603993f59e25bf67f8e64542", "0cd5a228cbebb830adac40b0b225daa9a116dda9", "54f3331b575b2d451c2d716f86496cada23d596d", "0090ee65a8bb632b95026877fb3f497e20b9f12b", "78a8809fcf3cdb0e0346a8f8b0d1f6255905fbba", "5ad36ad05fca05d5ac1a49c898cd87efa0853999", "793b84b732bf257cb842e316a246ccfa3b0ef6a2", "813ab7b5aa6ae1fd523196970510f8c68fd57124", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "34575d258392298a871fcf58d9ed2b7ac6f5d4ad", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "006662a19c6383e8ee15616c90be206cd08867f0", "a58445c48c3402305e92ff7cb7eaa9641a56ca6f", "3af0a17e79061462dbc2823f64ea188136271713", "219b47356dcd3c02a04837be9be7ae072153a9d1", "55cf549a13817cb929c483b0b7631175fbd38368", "3d642e3b0bfc0e931c59e68b8f7975f304c2f6cb", "d4a9bd884e2aee900f27251f575bd018d5ee18f4", "0cbe6121b40be2af869359c3e7b1de8d9a09a787", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "0110c80228683bc32879efb1b2f3931421e52eb6", "13bb71a86db976ffa572407bda5b44cacadac4ca", "f6e5e70860080a69e232d14a98bf20128957b9b5", "0061ee3d39fe6b9c8946cdca0996c941a3fddd1b" ], "paperAbstract": "The trend of unsustainable power consumption and large memory bandwidth demands in massively parallel multicore systems, with the advent of the big data era, has brought upon the onset of alternate computation paradigms utilizing heterogeneity, specialization, processor-in-memory and approximation. Approximate Computing is being touted as a viable solution for high performance computation by relaxing the accuracy constraints of applications. This trend has been accentuated by emerging data intensive applications in domains like image/video processing, machine learning and big data analytics that allow inaccurate outputs within an acceptable variance. Leveraging relaxed accuracy for high throughput in Networks-on-Chip (NoCs), which have rapidly become the accepted method for connecting a large number of on-chip components, has not yet been explored. We propose APPROX-NoC, a hardware data approximation framework with an online data error control mechanism for high performance NoCs. APPROX-NoC facilitates approximate matching of data patterns, within a controllable value range, to compress them thereby reducing the volume of data movement across the chip.\n Our evaluation shows that APPROX-NoC achieves on average up to 9% latency reduction and 60% throughput improvement compared with state-of-the-art NoC data compression mechanisms, while maintaining low application error. Additionally, with a data intensive graph processing application we achieve a 36.7% latency reduction compared to state-of-the-art compression mechanisms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080241", "http://faculty.cs.tamu.edu/ejkim/HPC_WEB/docs/isca17_approxnoc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/782acff3ad2c1f21159c55dec5b19b32b8d3e4df", "sources": [ "DBLP" ], "title": "APPROX-NoC: A data approximation framework for Network-on-Chip architectures", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "782b2fbc15b50c15f12baa126817466ec5e8251c": { "authors": [ { "ids": [ "2827061" ], "name": "Yao Zhou" }, { "ids": [ "37395525" ], "name": "Jingrui He" } ], "doi": "10.1109/ICDM.2017.78", "doiUrl": "https://doi.org/10.1109/ICDM.2017.78", "entities": [ "Algorithm", "Crowdsourcing", "Gradient", "Program optimization", "Randomized algorithm", "Regular Language description for XML" ], "id": "782b2fbc15b50c15f12baa126817466ec5e8251c", "inCitations": [ "7c28b81dff1899e5a148ff57888faacc9945ab22" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "685-694", "journalVolume": "", "outCitations": [ "278841ab0cb24c1abcb75e363aeed1fa741c8cc4", "727fa1f55462f732bcc8e3ae41a119d24e38bd69", "658d97c2ea8a6ed1d9de4ef0f85da21a8816d29a", "27e2af1d9775974485ff0b642aec57326a791411", "27c25930e4efc42bb9475532530c4dcf2b72b5b1", "023f6fc69fe1f6498e35dbf85932ecb549d36ca4", "263f77020e45c0bdcc306d196613c692a8a5f479", "0b65d7a55a1541f99e0686844e2129527e7702e0", "0a91832df36f7dd029646059e7090fa4b29b8bca", "1b0644187f24bcffd8aae376cbf188ecdfed172d", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "56436d67863a81fd52f670b7c9d77e8c6526a4c4", "86d2add9aa90014a6330e3eb59277562adaeeda4", "4a261b8e566fe3a5dd7ea9e57d189e7350498057", "059e06ebab437b61a9b2f34f75629a5bc6d39e3e", "56c3c82077885275fd91c768b4d963d1bf1bf013", "66d87d19a9e7d559aedcb843127fda9cda5ce417", "4104d7e3473173bd2ded1cef30fcfb8305af770f", "3dd92f999bea0d5fa2a679b0f6556e5911fe0a49", "271e7b664d72f3f77b7bae7d7b64710067d3d1d4", "12d8b675b6bc49313764f89b5e64d721af0ec1ae", "0e8d3013012025a51128a96c2f15db26a0ac95d3", "067f9fae7475e1b9a8fc78268c20f3886e82650f", "736e8deabcae7e2f9eb6c41a1bfae1b5270a8dbd", "3d8c9e6af31a0f4cd3cd47706a8735167ca95b0b", "0059cfac9c5b7811866f0729d0917b7478148fc5", "67b3b5a1eb77893ae4f443176f5c3245a89761b6", "e9c22584df6369dc443712aa02b6749c025996cf", "8c8337d10e2d71e3d7da43c8bc8d9abe5c631e9c" ], "paperAbstract": "Driven by the dramatic growth of data both in terms of the size and sources, learning from heterogeneous data is emerging as an important research direction for many real applications. One of the biggest challenges of this type of problem is how to meaningfully integrate heterogeneous data to considerably improve the generality and quality of the learning model. In this paper, we first present a unified learning framework that aims to leverage the structural information from two types of data heterogeneity: view heterogeneity (as in multi-view learning) and worker heterogeneity (as in crowdsourcing). The objective follows the principles of view consistency and worker consensus by minimizing the loss term with a regularized prediction tensor. We then propose to relax and solve the optimization framework with an iterative updating method. We also prove that the gradient of the most time-consuming updating block is separable with respect to the workers, which leads to a randomized algorithm with faster speed and better convergence. Finally, we compare the proposed method with several state-of-the-arts and demonstrate its effectiveness on various data sets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.78", "http://www.public.asu.edu/~yzhou174/papers/ICDM17-slides.pdf", "http://www.public.asu.edu/~yzhou174/papers/ICDM17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/782b2fbc15b50c15f12baa126817466ec5e8251c", "sources": [ "DBLP" ], "title": "A Randomized Approach for Crowdsourcing in the Presence of Multiple Views", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "78522d5ab004d27241bc4e34e5cf96d0e5e2630b": { "authors": [ { "ids": [ "3134457" ], "name": "Saurabh Gupta" }, { "ids": [ "28293642" ], "name": "Tirthak Patel" }, { "ids": [ "1686571" ], "name": "Christian Engelmann" }, { "ids": [ "34966505" ], "name": "Devesh Tiwari" } ], "doi": "10.1145/3126908.3126937", "doiUrl": "https://doi.org/10.1145/3126908.3126937", "entities": [ "Failure rate", "IBM WebSphere eXtreme Scale", "Supercomputer" ], "id": "78522d5ab004d27241bc4e34e5cf96d0e5e2630b", "inCitations": [ "60efd4296aa4855f6470cee2be0caaaea6013eec" ], "journalName": "", "journalPages": "44:1-44:12", "journalVolume": "", "outCitations": [ "2398278a25035cfeefa3dd4aba91b16d48f540ba", "186d6c8ebd79be2226a43eadaff4e95ca34d9c00", "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "0290313cb69e7bb7327af7392da04ffc8a75d0a5", "8a7536f311d22bd588c5bc2306d54d13effaee82", "39ef5d362200126497b2f74c33338383dcc9589c", "f268085629fee4946fb71def02c6fa856246782d", "4936d512f9c9fae6d6ad9a6d517930ef410915bf", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "01b5c01835a57f63c250b4eed923b7f736707624", "896f6698a74e656174045dc20840dd7e925f18bd", "06230d13e276bd871a378ca932a41b5cff94e29f", "50b04724feb83fc729ccc56896ed80310e6cad76", "502366e7dd9bb70f81e931b5a4cb6bd25651143e", "01d62cd850496455ce1616500f491690effa5c98", "20bd9e51b0a95cfe03afdb00337e1c95c290e473", "4158af458c17f19d463ce3174aaa13fa23316833", "1ea2fd80993c826a22de284fa8365bd34cb8cd92", "186e56c99b6392919e10734e8e9c174892663131", "750fcd66bca7941f0950455f79b4b1b4e3821e5b", "429d28998216da5648f40248bf4bc9e508edd2fd", "580cd9345085036c200cbf0a75753653ecebfc94", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "34f310dffd51a8f1585b0a6a5ccaf83094d0d663", "7c62c7e0b4e026f6b5b027735c99cbf033789ba9", "b39b8b5be74498b90ae59297a6883e3fd57b1eb8", "61c45bc4978d927f5a4ff44868fa239409bf4290", "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "ddcb1cb84899b2d0d6aee3b95fe2f2b3018c252a", "73b343b073bbc0b9660b5c500f994a5e61f53132", "01a136d82c63f6e8eec7fe5cffc27e91ca5b1f84", "3be52767a60d0f6659f8d0ae5dc23dc5e4dc4ec7", "1e8fa3399883d288483c145741156c77d80a4278", "710b3d324b07197a705683af18fc417ef712d042", "96d860caedf7731e2f598a768e85d04e26753868", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4" ], "paperAbstract": "Resilience is one of the key challenges in maintaining high efficiency of future extreme scale supercomputers. Researchers and system practitioners rely on field-data studies to understand reliability characteristics and plan for future HPC systems. In this work, we compare and contrast the reliability characteristics of multiple large-scale HPC production systems. Our study covers more than one billion compute node hours across five different systems over a period of 8 years. We confirm previous findings which continue to be valid, discover new findings, and discuss their implications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126937" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/78522d5ab004d27241bc4e34e5cf96d0e5e2630b", "sources": [ "DBLP" ], "title": "Failures in large scale systems: long-term measurement, analysis, and implications", "venue": "SC", "year": 2017 }, "7857c88a380055d83a1a291c056185dbf944169e": { "authors": [ { "ids": [ "39073765" ], "name": "David R. Matos" }, { "ids": [ "38279896" ], "name": "Miguel L. Pardal" }, { "ids": [ "40003160" ], "name": "Miguel Correia" } ], "doi": "10.1145/3135974.3135978", "doiUrl": "https://doi.org/10.1145/3135974.3135978", "entities": [ "Backup", "Black box", "Hypertext Transfer Protocol", "Machine learning", "MediaWiki", "Platform as a service", "Remote backup service", "State (computer science)", "System administrator", "Web application", "WordPress" ], "id": "7857c88a380055d83a1a291c056185dbf944169e", "inCitations": [], "journalName": "", "journalPages": "209-221", "journalVolume": "", "outCitations": [ "24d12f3d74b459ea0c0272c7cd4c2ec87b465340", "522efdd5adda8c7aecc04318c82db684b4041c59", "3d2101ca5c3a583f5aa709d9753664d00dc20cf0", "13bdea7a56e87b0060c5e87ccc9a6818dfc74e5b", "313eceae658fb6c132a448c86f3abcc37931df09", "6438ee542a37d8caadae7a56300f81da9b39b4a4", "b5158966daa803118c8900d153324c79def1fff1", "eb758f78f582f259a2386a8d99721b70603751e3", "020e04cd70f4c878069fe9e714d0258bb30a8035", "81a502b52485e52713ccab6d260f15871c2acdcb", "707e0d71111787a85260f5146ee6ee5232f7b117", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "4dbe55cdf630bd364a2ab19768c23f8545292c83", "436c36e10d9241b1d62c7ead9f74f919bd14125c", "2f6ac431e923495be42d3e2018908e30fd3f7b41", "8133926b933e82d29766c042764ae5cac935f830", "fd0ac0adc980f1d1a819bad6adf3d2fee27a85bc", "24b80730cb9cfad11651d3a09e28e35c50cc4838", "03e65455887d857d4f078eda30aba02bbee50aa4", "31ebdb718261f2b7df900c88ec34d3e6776327b2", "8a4747aad0ecbbb729ccd0c27148d5f298b97b83", "088a382a5af6a44ccb69c2f49517bf8d99ab6759", "2b9b4a646c5b86b0fbf0e18cd3bd0f52e06fa980", "54ccf5ca4adb89bbd775cc81bb391183f9729a84", "3dab3dbc5b7fd9e32d6da6cd9d0930a89b229ce0", "266edad9a7cb024fd6a4128488de1485bfb664d8", "1cb50385143b931cea0df7400619e035919b27a4", "6d8c9fcce8177d6f8d122d653c7d32d7624d6714", "1c284b5f98385127ee083220c5ac3f09fab1a52b", "a1d095de33a3889de814fbd16b8512a1a8c620ee", "1cbef02d99729cd8195466a89a2cb79cb2253dff", "b79ad1b9a28385fc27a32220267fc46f98ba200e", "035e7e03ac7da20e63cc2ee6d4dcf895e9c5c567" ], "paperAbstract": "Web applications hosted on the cloud are exposed to cyberattacks and can be compromised by HTTP requests that exploit vulnerabilities. Platform as a Service (PaaS) offerings often provide a backup service that allows restoring application state after a serious attack, but all valid state changes since the last backup are lost. We propose Rectify, a new approach to recover from intrusions on applications running in a PaaS. Rectify is a service designed to be deployed alongside the application in a PaaS container. It does not require modifications to the software and the recovery can be performed by a system administrator. Machine learning techniques are used to associate the requests received by the application to the statements issued to the database. Rectify was evaluated using three widely used web applications - Wordpress, LimeSurvey and MediaWiki - and the results show that the effects of malicious requests can be removed whilst preserving the valid application data.", "pdfUrls": [ "http://www.gsd.inesc-id.pt/~mpc/pubs/rectify_black_box.pdf", "http://www.gsd.inesc-id.pt/~dmatos/papers/rectify_black_box.pdf", "http://doi.acm.org/10.1145/3135974.3135978" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7857c88a380055d83a1a291c056185dbf944169e", "sources": [ "DBLP" ], "title": "Rectify: black-box intrusion recovery in PaaS clouds", "venue": "Middleware", "year": 2017 }, "786eea6221dc6bf93c1b84f1f3c68d0f2d08bff9": { "authors": [ { "ids": [ "2096387" ], "name": "Sinong Wang" }, { "ids": [ "1750487" ], "name": "Ness B. Shroff" } ], "doi": "10.1145/3084450", "doiUrl": "https://doi.org/10.1145/3084450", "entities": [ "Combinatorial optimization", "Complex network", "Ellipsoid method", "NP (complexity)", "Optimization problem", "Polynomial", "Program optimization" ], "id": "786eea6221dc6bf93c1b84f1f3c68d0f2d08bff9", "inCitations": [ "0dab34a0bb73b2b966a7edfddfce414646fe6986" ], "journalName": "POMACS", "journalPages": "13:1-13:32", "journalVolume": "1", "outCitations": [ "a5c69522be528655ae661c277f28beb5c981ce9c", "34edf0328cfcee2bc2e8d1ba7362b4aada08f964", "25581e20615317e298d9194e68d634e8ca22d6fc" ], "paperAbstract": "There has been significant interest in studying security games for modeling the interplay of attacks and defenses on various systems involving critical infrastructure, financial system security, political campaigns, and civil safeguarding. However, existing security game models typically either assume additive utility functions, or that the attacker can attack only one target. Such assumptions lead to tractable analysis, but miss key inherent dependencies that exist among different targets in current complex networks. In this paper, we generalize the classical security game models to allow for non-additive utility functions. We also allow attackers to be able to attack multiple targets. We examine such a general security game from a theoretical perspective and provide a unified view. In particular, we show that each security game is equivalent to a combinatorial optimization problem over a set system ε, which consists of defender's pure strategy space. The key technique we use is based on the transformation, projection of a polytope, and the ellipsoid method. This work settles several open questions in security game domain and significantly extends the state-of-the-art of both the polynomial solvable and NP-hard class of the security game.", "pdfUrls": [ "http://newslab.ece.ohio-state.edu/research/resources/POMACS2017.pdf", "http://doi.acm.org/10.1145/3084450", "http://arxiv.org/abs/1701.08644", "https://arxiv.org/pdf/1701.08644v1.pdf", "http://doi.acm.org/10.1145/3078505.3078519" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/786eea6221dc6bf93c1b84f1f3c68d0f2d08bff9", "sources": [ "DBLP" ], "title": "Security Game with Non-additive Utilities and Multiple Attacker Resources", "venue": "SIGMETRICS", "year": 2017 }, "788bdfe4dbd2228dca0f7ef48eda469af3cb1347": { "authors": [ { "ids": [ "2588506" ], "name": "Houjun Tang" }, { "ids": [ "9271904" ], "name": "Suren Byna" }, { "ids": [ "39131579" ], "name": "Bin Dong" }, { "ids": [ "1685638" ], "name": "Jialin Liu" }, { "ids": [ "3239875" ], "name": "Quincey Koziol" } ], "doi": "10.1109/CLUSTER.2017.53", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.53", "entities": [ "Clustered file system", "Computer data storage", "Experiment", "Fault tolerance", "Hoc (programming language)", "Lustre", "MongoDB", "Scalability", "SciDB", "Simulation", "Speedup", "Supercomputer", "User space" ], "id": "788bdfe4dbd2228dca0f7ef48eda469af3cb1347", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "359-369", "journalVolume": "", "outCitations": [ "5f3f9223c5c9f896be099bc177929febad508407", "e5134de3c15b8a8a7c40371ca5c9a8a306ec375b", "34677603cc90efe1d420aab63100e09caa1d1a5a", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "bc432fd1491c352413f635b4dc949f4e62f5ce53", "2da760f90c3d2bf6598becdde9063093f488548c", "57ebf9f30b57b3f0c5895540e1332b4b299e59f2", "6d58155f4be0615ec688a6e6c21eccf12ad3a7da", "305bad959b93960ff2ac468884424935e6158cfd", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "02c7714e034a832ce25bf0bf563cf0a789ad7342", "1e7be30c6b2cd522083183913a0ca820a036342c", "779f67bf325e182d4b16c0bac1b085d34a7fbdb4", "0b31698fc626cce6ad3ef899f56c39d1ebff2df0", "2d2255446fa2c5d5e96c4635ba75ca1741c82f7e", "05227501b3727de9b117907ecd77b0bff694869f", "8563bd4361a3a5f7ee8b6c68c1019040f32fbbab", "24b1ab8ed36586c78fae6aa19ee3f41379e35129", "8cd63388eb68ede942d27644c7ef629e358764da", "ba625fb8f294a5003a0880096695a92bc9bb843a", "79a7d3c29ce5b210b3558fda3a315c9b524b4477", "0377d33df698ca17c03c9bdca5388dbbcd925161", "37617b02017b7912ad4d977ba420ab3fa232e445", "13bf79b773cc84590d3efeb88187f2675dea4b81", "b6ef7b9cd4d2c883f92c2a9b764fb2ae2a7c1c00", "1104b0b3f147fbf6101534bf73b4d6a1e08fcc1a", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "2566acc500a8f013610d306bea7a8f548930dfed", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "1fcb1c5595b4518b0e8bab042f32605c367588c2", "2d60d3596490d9999d8433bf41405060779bc11d", "da30f9be5550ba3f0c96eba6a2ad7de28f2efeb2", "108ffa868b6dc5e8b4987342c90c79c8ccf841c2", "05dd4149b422b56cc037cd35eb041496b656d671" ], "paperAbstract": "Scientific data sets, which grow rapidly in volume, are often attached with plentiful metadata, such as their associated experiment or simulation information. Thus, it becomes difficult for them to be utilized and their value is lost over time. Ideally, metadata should be managed along with its corresponding data by a single storage system, and can be accessed and updated directly. However, existing storage systems in high-performance computing (HPC) environments, such as Lustre parallel file system, still use a static metadata structure composed of non-extensible and fixed amount of information. The burden of metadata management falls upon the end-users and require ad-hoc metadata management software to be developed.With the advent of "object-centric" storage systems, there is an opportunity to solve this issue. In this paper, we present SoMeta, a scalable and decentralized metadata management approach for object-centric storage in HPC systems. It provides a flat namespace that is dynamically partitioned, a tagging approach to manage metadata that can be efficiently searched and updated, and a light-weight and fault tolerant management strategy. In our experiments, SoMeta achieves up to 3.7X speedup over Lustre in performing common metadata operations, and up to 16X faster than SciDB and MongoDB for advanced metadata operations, such as adding and searching tags. Additionally, in contrast to existing storage systems, SoMeta offers scalable user-space metadata management by allowing users with the capability to specify the number of metadata servers depending on their workload.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.53" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/788bdfe4dbd2228dca0f7ef48eda469af3cb1347", "sources": [ "DBLP" ], "title": "SoMeta: Scalable Object-Centric Metadata Management for High Performance Computing", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "78969e6f35454c154f746ed9c2fcfd7e07db6c0a": { "authors": [ { "ids": [ "6938895" ], "name": "Chunyi Liu" }, { "ids": [ "2571439" ], "name": "Fan Ni" }, { "ids": [ "2073481" ], "name": "Xingbo Wu" }, { "ids": [ "2358097" ], "name": "Xiao Zhang" }, { "ids": [ "1804354" ], "name": "Song Jiang" } ], "doi": "10.1145/3078468.3078471", "doiUrl": "https://doi.org/10.1145/3078468.3078471", "entities": [ "Attribute\u2013value pair", "B-tree", "Block code", "Data deduplication", "Experiment", "Garbage collection (computer science)", "Hard disk drive", "Key-value database", "LevelDB", "Log-structured merge-tree", "Solid-state drive", "Throughput" ], "id": "78969e6f35454c154f746ed9c2fcfd7e07db6c0a", "inCitations": [], "journalName": "", "journalPages": "4:1-4:6", "journalVolume": "", "outCitations": [ "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "1860428ac473bbe38da909c0ba8c882b07deb8df", "3a2dd13e3ddedbc705b2fe00d5c74c53bfd64479", "92bc53a3a28a2cc02e02d959c439c80fce1846f1", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "7019d566d10fcdb836aa338c344de4f0ed2131b6", "18a5f443299784479e78d9e77f175af57cb2fa2b", "d40caa395d6f0b6a2638bb57ea05aac9f1f64ac0", "16444e411e3e33bf7cb3b813f76834fc3dd87d72", "1820a34042d6371a9e20484b0c63b698eb522a6c", "1539172c6a44fbf2e6012da535932b059393da1e", "64a7478762b24ac84b5068754eb26609a61a1884", "898b60ae12a855ac9ad91f93543d82ce00ee76ff", "70ce10f47aafa0994627a9575565b5c98af58d98", "d4da5fbf10b696fa691501ec08618aee479ba3ea", "da90d0265ef675763e8b51efb3c2efba8f809c54", "182cb3740940f403ff6f311fa54c5c1c9d7edc3f", "607a678b5648121de7f0c8bfef619a60646bb8af", "627b93073977b7b7c5ae0cf610f41ee0ed27669c", "3033df10e73f3061d21e58de6c141383815c0420", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "441e5c174786110eb4c53f89873f471cd5165c88", "2c9b6f1a420ecd9e54b7467efd17f203690ef07e", "0139dceb6cef21b234e454d53154f30391495862", "4bbb4e2bed21980cfe9ca7a6e243737705b0fd20" ], "paperAbstract": "While flash-based SSDs have much higher access speed than hard disks, they have an Achilles heel, which is the service of write requests. Not only is writing slower than reading, but also it can incur expensive garbage collection operations and reduce SSDs' lifetime. The deduplication technique can help to avoid writing data objects whose contents have been on the disk. A typical object is the disk block, for which a block-level deduplication scheme can help identify duplicate ones and avoid their writing. For the technique to be effective, data written to the disk must not only be the same as those currently on the disk but also be block-aligned.\n In this work, we will show that many deduplication opportunities are lost due to block misalignment, leading to a substantially large number of unnecessary writes. As case studies, we develop a scheme to retain alignments of the data that are read from the disk in the file modifications by using small additional spaces for two important applications, a log-based key-value store (e.g., FAWN) and an LSM-tree based key-value store (e.g., LevelDB). Our experiments show that the proposed scheme can achieve up to 4.5X and 26% of throughput improvement for FAWN and LevelDB systems, respectively, with a less than 5% space overhead.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078471", "https://omega.uta.edu/~xxw4571/papers/freewrite.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/78969e6f35454c154f746ed9c2fcfd7e07db6c0a", "sources": [ "DBLP" ], "title": "Freewrite: creating (almost) zero-cost writes to SSD in applications", "venue": "SYSTOR", "year": 2017 }, "78b2292abc46172d621b5303f3f8b166337114a9": { "authors": [ { "ids": [ "12199006" ], "name": "Abdulaziz Tabbakh" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" }, { "ids": [ "2064331" ], "name": "Xuehai Qian" } ], "doi": "10.1109/IPDPS.2017.106", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.106", "entities": [ "CPU cache", "Dynamic random-access memory", "Graphics processing unit", "Memory bandwidth", "Multiprocessing", "Parallel computing", "Replication (computing)", "Scheduling (computing)", "Task parallelism" ], "id": "78b2292abc46172d621b5303f3f8b166337114a9", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "698-707", "journalVolume": "", "outCitations": [ "1a850fbc5d86a91d882eec88290425fbdff57cf6", "5d79e0c5e4b531f26de469688668c50f8c1069b2", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "1eeb50d5f7937f65a910203ae61430ff8b969012", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "70b1f5b927e03b1800f71c58198b5547dd0f83a4", "72ba3d541afe35920aad310c32a5e7acd1347022", "ac2c02a5073fb36701af2ecacc596c18db96e2da", "2d6f002477015469075954c6748a1a85af352c94", "da620c71ca24a493ebd9a96ab05ca116d72eb46e", "67bf737ceccf387cdd05c379487da8301f55e93d", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "03d832219a7cf933db0ef1f686fec730c09acd55" ], "paperAbstract": "The power consumed by memory system in GPUs is a significant fraction of the total chip power. As thread level parallelism increases, GPUs are likely to stress cache and memory bandwidth even more, thereby exacerbating power consumption. We observe that neighboring concurrent thread arrays (CTAs) within GPU applications share considerable amount of data. However, the default GPU scheduling policy spreads these CTAs to different streaming multiprocessor cores (SM) in a round-robin fashion. Since each SM has a private L1 cache, the shared data among CTAs are replicated across L1 caches of different SMs. Data replication reduces the effective L1 cache size which in turn increases the data movement and power consumption. The goal of this paper is to reduce data movement and increase effective cache space in GPUs. We propose a sharing-aware CTA scheduler that attempts to assign CTAs with data sharing to the same SM to reduce redundant storage of data in private L1 caches across SMs. We further enhance the scheduler with a sharing-aware cache allocation and replacement policy. The sharing-aware cache management approach dynamically classifies private and shared data. Private blocks are given higher priority to stay longer in L1 cache, and shared blocks are given higher priority to stay longer in L2 cache. Essentially, this approach increases the lifetime of shared blocks and private blocks in different cache levels. The experimental results show that the proposed scheme reduces the off-chip traffic by 19\\% which translates to an average DRAM power reduction of 10% and performance improvement of 7%.", "pdfUrls": [ "http://alchem.usc.edu/portal/static/download/share_aware_gpu.pdf", "https://doi.org/10.1109/IPDPS.2017.106" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/78b2292abc46172d621b5303f3f8b166337114a9", "sources": [ "DBLP" ], "title": "Power Efficient Sharing-Aware GPU Data Management", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "78b6cbcceca106c039c9dc2d757376956882ac64": { "authors": [ { "ids": [ "12488214" ], "name": "Yunhan Jia" }, { "ids": [ "39645110" ], "name": "Qi Alfred Chen" }, { "ids": [ "1705047" ], "name": "Shiqi Wang" }, { "ids": [ "37407858" ], "name": "Amir Rahmati" }, { "ids": [ "35064352" ], "name": "Earlence Fernandes" }, { "ids": [ "3895596" ], "name": "Zhuoqing Morley Mao" }, { "ids": [ "1704708" ], "name": "Atul Prakash" } ], "doi": "", "doiUrl": "", "entities": [ "Access control", "Adversary (cryptography)", "Backward compatibility", "Categorization", "Denial-of-service attack", "Malware", "Mobile app", "Performance Evaluation", "Smartphone" ], "id": "78b6cbcceca106c039c9dc2d757376956882ac64", "inCitations": [ "b4a018531870e3436f359440ecdb725f22ae6d3e", "32067fd66db125407ac709bae8a6c5f860efc7f2", "9b2a3cad73b2c086220ea9e4b82e059e4e65aa79", "1fe3c451b28a595624202d4e9cce4cc681c5594a", "879a7fc87b9855b77786ac3bf5f97300a486df8b", "5179759df98dfd30bd0fde3ce77d1b04230a88bd", "39e7d1e0a89fe3105619eed6e84d2be6dafe5198", "0b1571450fcadf9d6dac5de0667f558573154a1f" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6f2b2af90d49942f1d75f070cf176811a7a05bd8", "a851b38c914b5c005e2be8b43c2d17c98e09f7c1", "4a4c0cfc26020d519679a98fe683fce6aab1eefa", "0a5192954c6229694bcfa6963887b60436d394d9", "034b8a0b698036b210f53ce0290fb1bc13c7f1db", "12ef153d9c7ccc374d56acf34b59fb2eaec6f755", "29898e452f80ba09357a2fb716c7b14d75eb3bd6", "0c364deb5f061d17dfa86e0ede98bb0077e3ee16", "2fe33f4b7c75d9e29bf80e7bdd719205cfafc3c9", "2897894d1da23a794c424acdedc2b33363838d8d", "30bc245a8295dac571c58aec0b744e4bf217c287", "3343392fe056b45692252ad18278e10020ee3d8e", "0cb4ed5d73b4885f05facfa6aee45bdcdec1847e", "7f9bbe985ccf6c16b6ef60ccb9ef04e4219b54cb", "8ffc32565380d35bcc68f175cb803918b56517a7", "25c77cd67d275f66e9eb776e885887c0ab7abd9d", "0bae04f86b5302345d18b99801829efc98c90874", "26e6b1675e081a514f4fdc0352d6cb211ba6d9c8", "0c0e6a70a41a5574f06b60b5567013571b071233", "e6305e00746f75401fde3f4719f037a9fd183d7c", "9e5db350ba34f2b4c662cdea7acb6e906484ada9", "27a7497a46c9597b35d2120c224061423ff3f311", "16d53aec6fe8f4fb08a956208cfec4cabfa6ece1", "2cc82a4971c957a7e5654e078e5087312f097316", "3c7cf150b5fbbdce6d937eccc1ab05aeb77d0566", "63328498806cb2515ca726f0a8d79a4520857ab2", "8835620c6ee3ac40b357c98a73548fadd39b479f", "0b0f2ee9d7b67f2b4a1d10462bb34d798967be9e", "708beb6b5638b4abc57082af2e58161699712323", "29039350ed46383cc5570bc9274f63153b7f8678", "bb4bffbc7507b82adb0a5035e78b5639a7df1b56", "dcddbe6e0bb64d4792610d08bae06f6c11aec0c7", "35716646b2ce09a1dd0dd584b5adc7242df0beff", "25864f75b479c2019498314cdcfdd12ec68bfa3f", "51b39f5e1b01abc6dffc3e777131649b3f5516e8", "8e9d4966d49ebd648d520e25128bb03383f6ae22", "6be8efbb2c051b357931bf27f2a2316db7728edd", "34fce3b85f830879ecf6dd9e4acacaebed52fc9d", "57db04988af0b65c217eaf3271afc40927d1c72f", "2dd7aca522990255ce9185a20d46fabd2796314b", "361d0121f49d526602ddd9bf369a4bc402e5e0fb", "a30936a60327076a939a580c8176eff034e96235", "06c291ac85e9297707f74e13f90f041c77e9eed4", "1a6218a174f5be297cfa4a16fd52a1b814ac4261", "6697bd267ccf363bc1b8ab7cb971b880495ff3f1", "023f23c300804754753cb11db51fb7f582556ab7", "38a5609c6cd26f39609ea877868f4d4a41886e52", "537f16973900fbf4e559d64113711d35bf7ca4a2", "e00bbf19c0b2ca6b07ab688664515e33cc7a0216", "0d0d0dfa38ca86e711dc4279f486f89c6d901cfe", "31780e2147fb5e99127c6345784e7e85aa96326c", "75e9c33a771b11dad87d9a3222a307463e30d784", "2c5ce9b00da45e7b5e63b299cfeadd9af000f446", "14a23ef72ceb106e4c95df6e396c6291c11a87c9" ], "paperAbstract": "The Internet-of-Things (IoT) has quickly evolved to a new appified era where third-party developers can write apps for IoT platforms using programming frameworks. Like other appified platforms, e.g., the smartphone platform, the permission system plays an important role in platform security. However, design flaws in current IoT platform permission models have been reported recently, exposing users to significant harm such as break-ins and theft. To solve these problems, a new access control model is needed for both current and future IoT platforms. In this paper, we propose ContexIoT, a context-based permission system for appified IoT platforms that provides contextual integrity by supporting fine-grained context identification for sensitive actions, and runtime prompts with rich context information to help users perform effective access control. Context definition in ContexIoT is at the inter-procedure control and data flow levels, that we show to be more comprehensive than previous context-based permission systems for the smartphone platform. ContexIoT is designed to be backward compatible and thus can be directly adopted by current IoT platforms. We prototype ContexIoT on the Samsung SmartThings platform, with an automatic app patching mechanism developed to support unmodified commodity SmartThings apps. To evaluate the system\u2019s effectiveness, we perform the first extensive study of possible attacks on appified IoT platforms by reproducing reported IoT attacks and constructing new IoT attacks based on smartphone malware classes. We categorize these attacks based on lifecycle and adversary techniques, and build the first taxonomized IoT attack app dataset. Evaluating ContexIoT on this dataset, we find that it can effectively distinguish the attack context for all the tested apps. The performance evaluation on 283 commodity IoT apps shows that the app patching adds nearly negligible delay to the event triggering latency, and the permission request frequency is far below the threshold that is considered to risk user habituation or annoyance.", "pdfUrls": [ "http://web.eecs.umich.edu/~earlence/assets/papers/contexiot_ndss17.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/contexlot-towards-providing-contextual-integrity-appified-iot-platforms/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/78b6/cbcceca106c039c9dc2d757376956882ac64.pdf", "s2Url": "https://semanticscholar.org/paper/78b6cbcceca106c039c9dc2d757376956882ac64", "sources": [ "DBLP" ], "title": "ContexloT: Towards Providing Contextual Integrity to Appified IoT Platforms", "venue": "NDSS", "year": 2017 }, "78bafdffa38ebf78233397ca81834f47607443b0": { "authors": [ { "ids": [ "1777264" ], "name": "Gregory V. Chockler" }, { "ids": [ "3234311" ], "name": "Alexander Spiegelman" } ], "doi": "10.1145/3087801.3087824", "doiUrl": "https://doi.org/10.1145/3087801.3087824", "entities": [ "Cloud storage", "DSPACE", "Emulator", "Object type (object-oriented programming)", "Read-modify-write", "Server (computing)" ], "id": "78bafdffa38ebf78233397ca81834f47607443b0", "inCitations": [ "9b96fd02ed79512eb0efbdf6654c32d83745c3e4", "21333794dd301ef0ac96e55a9aca16f5bba69f9e" ], "journalName": "", "journalPages": "83-92", "journalVolume": "", "outCitations": [ "015222399f281321db43114b86a5b8b8dd38cf83", "0fc8c684ea6d28c828da984b1a931d43afe20756", "045a975c1753724b3a0780673ee92b37b9827be6", "976a757a83f2c689978dd3e5018979909809dc4f", "563dcd02639b70782b2b152e4f5f046b03cbff90", "1f27c2b85727f21a5aa541f6bc46603935b0b6ed", "73fd6c57527c7c9d037e8781aa8c0cbd0f75c1fe", "00996f92050947b57e10c4082a5ba82c368a78ed", "069342ee7ced1b925d1ad5c260f1142cc7ad44ec", "37e57c471f5884dde8b1f596e61414a468538cc1", "07159000917af52dae4ecb9612dba682d0661c03", "24f9c44fc774332793d72228ba4962487d4b2400", "0e92d8c5255325d5b6cd1d11be6f540aae5455bd", "0032d13e96645bbf124269fd9e3659c1845b0cc6", "443f89754162b0a9f97f46f80eb74e9cdd416ec1", "136f8675c0af31185fa69c3919f7c2307826fecb", "0c8030095703f5052aa653ccd6e3d46bbb2dbed7", "4af63ed343df388b6353b6fc77c7137d27822bf4", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "206b20f225fc655dfac733b6f0bd8077ed86215e", "3a26331d29c8839f17f557d089e9e8d817bf8c9a", "58c1e11d6ee2e36f80625231ad384448769d7e27", "81209d8d761c05f4ce0fcf7c14b267764ddc246d", "0270a2b35f745f2ed17fbbac950e8086ee9aa1d6", "42142c121b2dbe48d55e81c2ce198a5639645030", "06abe3ef45a6b5d72a247fee066c523777e9e03d", "1d6e7be863a53b8e5ca8c43f99a4064541d520cd", "eee4661c127fa48874950777e52dedf410d2f430", "024ef6de37650977dd5d05d6e2cc586868b22406", "99faa20f39ee93a6564ac52ec996273902f0939d", "252844376fd0319359cc6fc0f512f744a3c9362d" ], "paperAbstract": "Driven by the rising popularity of cloud storage, the costs associated with implementing reliable storage services from a collection of fault-prone servers have recently become an actively studied question. The well-known ABD result shows that an f -tolerant register can be emulated using a collection of 2f + 1 fault-prone servers each storing a single read-modify-write object, which is known to be optimal. In this paper we generalize this bound: we investigate the inherent space complexity of emulating reliable multi-writer registers as a function of the type of the base objects exposed by the underlying servers, the number of writers to the emulated register, the number of available servers, and the failure threshold. We establish a sharp separation between registers, and both max-registers (the base object type assumed by ABD) and CAS in terms of the resources (i.e., the number of base objects of the respective types) required to support the emulation; we show that no such separation exists between max-registers and CAS. Our main technical contribution is lower and upper bounds on the resources required in case the underlying base objects are fault-prone read/write registers. We show that the number of required registers is directly proportional to the number of writers and inversely proportional to the number of servers.", "pdfUrls": [ "https://arxiv.org/pdf/1705.07212v1.pdf", "https://pure.royalholloway.ac.uk/portal/files/28328790/main.pdf", "http://arxiv.org/abs/1705.07212", "http://doi.acm.org/10.1145/3087801.3087824" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fa7d/734a15d826242569927355e03ef61a4dfde3.pdf", "s2Url": "https://semanticscholar.org/paper/78bafdffa38ebf78233397ca81834f47607443b0", "sources": [ "DBLP" ], "title": "Space Complexity of Fault-Tolerant Register Emulations", "venue": "PODC", "year": 2017 }, "78d4fc136ffcfa192e2bdb4c08cdd56a0a0e2f64": { "authors": [ { "ids": [ "2960286" ], "name": "John Meehan" }, { "ids": [ "3137330" ], "name": "Cansu Aslantas" }, { "ids": [ "2031287" ], "name": "Stanley B. Zdonik" }, { "ids": [ "1773620" ], "name": "Nesime Tatbul" }, { "ids": [ "40444545" ], "name": "Jiang Du" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Kafka", "Big data", "Inter-process communication", "Scalability", "Stream processing", "Streaming media", "Time series" ], "id": "78d4fc136ffcfa192e2bdb4c08cdd56a0a0e2f64", "inCitations": [ "1fb1fb1017dde4d9088f50c43d22eeea3cfaece9", "65085beceabd9d4e6d30aef58ca7812b84ad787b", "569bcf7b700af193c7f59ba150a41a4671317aba", "7352c0c3e4217188ebc73430d12d4ce240e35c79", "213b5f30cd84c80c1f53e46553fa221fdcc226dd" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "7ec028ace29244cb74c105327a7e4177a34aa6bd", "36fce6f27746e6b210abd55a61479c5c101142d5", "0b7490e35779ee8d224773617f62cad4781cefd0", "a073767c85a53084b64639097ee26475f0ecbf2e", "a11b243c571ade72c1be5bbb4105b00388174bd6", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "cc7f5a7e69a83bf5aa4ad0448708a36f0953e5b8", "20ed54badb1fd2bb311815c2eb322f7a959fcddd", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "9f948448e7a5f0cc94cd53656410face8b31b18a", "e3886987fa31e5e9388fbc76d9122213f200f4d3", "393f85195b5dc97619613e4048b584838bb439e4", "0cc7cdf507b7a22e0455ecd9b09815dc1873453d", "73f31354cc9058ddc2e47a1c585b753e1592c1bf", "8c5e7962c11fcb4ef9084a75a2a2fd5bd3fd74cc", "e847c3ec130da57328db79a7fea794b07dbccdd9", "2dd52b31c8f0aa236853062f5ad18d5c686c9e40", "412a9e54bbb31e12d008a9579994e009c5b40b46" ], "paperAbstract": "In this paper, we argue that in many \u201cBig Data\u201d applications, getting data into the system correctly and at scale via traditional ETL (Extract, Transform, and Load) processes is a fundamental roadblock to being able to perform timely analytics or make real-time decisions. The best way to address this problem is to build a new architecture for ETL which takes advantage of the push-based nature of a stream processing system. We discuss the requirements for a streaming ETL engine and describe a generic architecture which satisfies those requirements. We also describe our implementation of streaming ETL using a scalable messaging system (Apache Kafka), a transactional stream processing system (S-Store), and a distributed polystore (Intel\u2019s BigDAWG), as well as propose a new time-series database optimized to handle ingestion internally.", "pdfUrls": [ "http://sstore.cs.brown.edu/papers/ingestion-cidr-2017.pdf", "http://cs.brown.edu/courses/csci2270/papers/brown-data-ingest.pdf", "http://cs.brown.edu/courses/cs227/papers/brown-data-ingest.pdf", "http://cidrdb.org/cidr2017/papers/p124-meehan-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/78d4/fc136ffcfa192e2bdb4c08cdd56a0a0e2f64.pdf", "s2Url": "https://semanticscholar.org/paper/78d4fc136ffcfa192e2bdb4c08cdd56a0a0e2f64", "sources": [ "DBLP" ], "title": "Data Ingestion for the Connected World", "venue": "CIDR", "year": 2017 }, "78e952ca5a744e8cae3bb20e2637af88d41b41bf": { "authors": [ { "ids": [ "2229509" ], "name": "Masatomo Hashimoto" }, { "ids": [ "36304207" ], "name": "Masaaki Terai" }, { "ids": [ "35219029" ], "name": "Toshiyuki Maeda" }, { "ids": [ "1962559" ], "name": "Kazuo Minami" } ], "doi": "10.1145/3030207.3030217", "doiUrl": "https://doi.org/10.1145/3030207.3030217", "entities": [ "Central processing unit", "Computation", "Kernel (operating system)", "Memory bound function", "Performance tuning", "Program optimization", "Scalar processor" ], "id": "78e952ca5a744e8cae3bb20e2637af88d41b41bf", "inCitations": [], "journalName": "", "journalPages": "361-372", "journalVolume": "", "outCitations": [ "c78f374bbfaa85df22a24c9ef9e028e8bd7fe397", "3cbb63051d0927b8f40853184c4788d47cd853b4", "38f8d8f68072c6eed23dd2d6bdba3ff72b1fd981", "0394f8e5b8cd881048981c7a5e1a866471adb580", "2f54ca897f9805c9dc27afd8ae2a7a63ad798e0c", "a8559e6c955a43ecbde0ce05b928346691dfb89f", "092217c2267f6e0673590aa151d811e579ff7760", "05393361e6d9e56ee7dbabb1e5ef6c1c212fc34d", "338ec0476191e19ba96d38b34db9a752f0f138a8", "ea23e16062dc7513f101ad0a7a4e8473764c33fd", "230bc9e1b5671dc435c1a084621a8fee61c0200a", "e23da0346eef46def598562009fd7a7c84932953", "98e2a7057ca438e0838a5c036c87798d5ffee408", "1e375b7bd9b02336371dbbb06bee4a94b2a93fc8", "0f16f6f478b5c788dce466eb50e36c612273c36e", "24e63240320f77654a9c3673f66f6bf6be3e91a1", "94c1818648ceaf538c342651175d09534946f17c", "b13a68dc72c0a6dafacc018317e9c96aa6ebbd75", "0e24c9eeb4c02ef4c1d9f5db7eea3e3766f4e581", "295f4ffa651675b22ae8e2f3f30b400330da0c69" ], "paperAbstract": "The process of performance tuning is time consuming and costly even if it is carried out automatically. It is crucial to learn from the experience of experts. Our long-term goal is to construct a database of facts extracted from specific performance tuning histories of computation-intensive applications such that we can search the database for promising optimization patterns that fit a given kernel.\n In this study, as a significant step toward our goal, we explored a thousand computation-intensive applications in terms of the distribution of kernel classes, each of which is related to expected efficiency and specific tuning patterns. To statistically estimate the distribution of the kernel classes, 100 loops were randomly sampled and then manually classified by experienced performance engineers. The result indicates that 50-70% of the kernels are memory-bound and hence difficult to run efficiently on modern scalar processors. In addition, based on the classification results, we constructed experimental classifiers for identifying loop kernels and for predicting kernel classes, which achieved cross-validated classification accuracy of 81% and 65%, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030217" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/78e952ca5a744e8cae3bb20e2637af88d41b41bf", "sources": [ "DBLP" ], "title": "An Empirical Study of Computation-Intensive Loops for Identifying and Classifying Loop Kernels: Full Research Paper", "venue": "ICPE", "year": 2017 }, "78ea9dea4915d5b2a86eb1854145c91ac2c8aba5": { "authors": [ { "ids": [ "32628418" ], "name": "Muhammad Asim Jamshed" }, { "ids": [ "2313756" ], "name": "YoungGyoun Moon" }, { "ids": [ "6190270" ], "name": "Donghwi Kim" }, { "ids": [ "1729324" ], "name": "Dongsu Han" }, { "ids": [ "1944837" ], "name": "KyoungSoo Park" } ], "doi": "", "doiUrl": "", "entities": [ "Application programming interface", "Business logic", "Firewall (computing)", "HOOD method", "High- and low-level", "Internet protocol suite", "Intrusion detection system", "Middlebox", "Multi-commodity flow problem", "Protocol stack", "Source lines of code", "State (computer science)", "Stateful firewall" ], "id": "78ea9dea4915d5b2a86eb1854145c91ac2c8aba5", "inCitations": [ "be16f521ebdbe1acea867c9ce8100692d9f56308", "8ca1436fe1e9bbdb39a92178fa80c7869d92573d", "bf006dc952ff01f6daaa87933bb812859127ed1c", "304ba54357f0ebd37d35d33fe8b3703c979b514e", "e41440cff90683629228b308a94e48c7af11ca36" ], "journalName": "", "journalPages": "113-129", "journalVolume": "", "outCitations": [ "04f6a5dc6c2aac0586f8f1e83b434ea96fffcd66", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "225603198cc415d363db8a8a2bd30b0df3c963b1", "4859d7eb90e7a662536cb0ae8272898239cf396b", "6354917580770e081618c4cab9de04a007fef167", "3547ac839d02f6efe3f6f76a8289738a22528442", "25f855c968af75e4617f25c71aee3cedec1dedaf", "73e30965ab41161ef7ecbed00133a931dbc2efba", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "17c18f25346fddc11bb5f2448e9b705ef8585d21", "0cbd8cc50dd35561b2adf2194715f58fc7e7651d", "b85df0212d624cbcf52108969ba722fe5d24cb2e", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "239f4e993a97984c1e0dde4032c5c2f6ab05dd22", "6a7fa6f50777fb8783e5c9d1e9685eb153cddd45", "5cb88831f543d30cc688fedc445d4e358ef73626", "e298114c5abac2b048020d658ecfb7d73d6a422a", "336b4f3099b8f629adc20a69aba15257e53539f9", "4d03b65744ce64166457436b24d6db23d3c3e493", "8e226c40a8c056dc4c348eef256b711902e1d943", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "6694a3e4351562cc810a7e14fc6f19647b7f8c71", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "7129b305ce45f83127e928e8510da9fae0783905", "5e8145c020e23f89c2377fdea4a4b06016d3101b", "08b9e9756a5da46f9bbeb9286eaff5b3640705c8", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "afa6dee5c3439e28bc2cfb216f43c21f1f586fbc" ], "paperAbstract": "Stateful middleboxes, such as intrusion detection systems and application-level firewalls, have provided key functionalities in operating modern IP networks. However, designing an efficient middlebox is challenging due to the lack of networking stack abstraction for TCP flow processing. Thus, middlebox developers often write the complex flow management logic from scratch, which is not only prone to errors, but also wastes efforts for similar functionalities across applications. This paper presents the design and implementation of mOS, a reusable networking stack for stateful flow processing in middlebox applications. Our API allows developers to focus on the core application logic instead of dealing with low-level packet/flow processing themselves. Under the hood, it implements an efficient event system that scales to monitoring millions of concurrent flow events. Our evaluation demonstrates that mOS enables modular development of stateful middleboxes, often significantly reducing development efforts represented by the source lines of code, while introducing little performance overhead in multi-10Gbps network environments.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-jamshed.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_jamshed.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_jamshed.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-jamshed.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/jamshed" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/78ea/9dea4915d5b2a86eb1854145c91ac2c8aba5.pdf", "s2Url": "https://semanticscholar.org/paper/78ea9dea4915d5b2a86eb1854145c91ac2c8aba5", "sources": [ "DBLP" ], "title": "mOS: A Reusable Networking Stack for Flow Monitoring Middleboxes", "venue": "NSDI", "year": 2017 }, "78fd9cba20a513f65274378e3521165dc476369e": { "authors": [ { "ids": [ "32190895" ], "name": "Jason A. Donenfeld" } ], "doi": "", "doiUrl": "", "entities": [ "Kernel (operating system)", "Next-generation network", "Tunneling protocol" ], "id": "78fd9cba20a513f65274378e3521165dc476369e", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/wireguard-next-generation-kernel-network-tunnel/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/78fd9cba20a513f65274378e3521165dc476369e", "sources": [ "DBLP" ], "title": "WireGuard: Next Generation Kernel Network Tunnel", "venue": "NDSS", "year": 2017 }, "7902365152dd93d2b200e3adb14a97169dc78e89": { "authors": [ { "ids": [ "3264651" ], "name": "Chenyan Xiong" }, { "ids": [ "2475437" ], "name": "Zhuyun Dai" }, { "ids": [ "1686498" ], "name": "James P. Callan" }, { "ids": [ "31114233" ], "name": "Zhiyuan Liu" }, { "ids": [ "39071178" ], "name": "Russell Power" } ], "doi": "10.1145/3077136.3080809", "doiUrl": "https://doi.org/10.1145/3077136.3080809", "entities": [ "End-to-end principle", "Hoc (programming language)", "Kernel (operating system)", "Learning to rank", "Ranking (information retrieval)", "Web search engine" ], "id": "7902365152dd93d2b200e3adb14a97169dc78e89", "inCitations": [ "32e7f0863e7c56cfced89abedaee46e2288bc127", "01c2d1a5a52a686ab1ad42ff1dd623390c0a47f2", "1ba89581155ebde06e6a24e7098d9d5c945fdac6", "311ac3902cd07a590fc3b92d8e2dfc7b6b53201a", "1603d035494395812a78f2cde1b9e1db8b2cc5a2", "432b36c1bec275c2778c66f9897f9e02f7d8b579", "577a2f547d1abeff1c3308c10198160bf44ee089", "3c8063179345af107834671bfc5453b940ce07d7", "0acf2f684c0d0e2e3aba215dffab84b2cc175c31" ], "journalName": "", "journalPages": "55-64", "journalVolume": "", "outCitations": [ "299a008d8221b572c25ccf581e723ababb364fbb", "22c19d97e560a3937f1625f63830c90bf125d359", "90fbffb3486e7e9668eb4d42bbc9e94d0b665256", "7161eb8d3b1cb01769a36528f9c6bddd663545a9", "214adc2dfdc2160cdf5be54001daf2b2304a03b3", "617279b61a0a2b1e60d73dd799068dfdb7de15cd", "fc3098cff5469c55c3e81dc127563afe6dbadf22", "0b19b37da5e438e6355418c726469f6a00473dc3", "4e3571c4d195381518dfced7c36a78b581041737", "11a370a2f99127de2ff8d2bcbb784658ec8cfb3e", "df6d4ee85c6a286d23c0ef11a0d6e7c4eb1a5490", "5b9534442f91a87022427b74bca9fd95dd045383", "8478c0f46dd30ef7f4052145983d6d315c2e1f17", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "4c9fafa3b1bed97bb00b8bc68db39a9ad48490f1", "07f3f736d90125cb2b04e7408782af411c67dd5a", "42cf161894f4b9ebb86a9109dc2af45d9eee8916", "02121b35aa2c543e6afbd193afb9c929705b7aee", "44a19978896dba74d8d1047b0b2219aeb916535e", "0aedae19e94d76520cb92932b6506389799051e2", "3d8d8da2c0bc8f709b3728680ea521ebbfcb1a7c", "67e5630eed803d6f480c0a151f33f39d56739f1f", "234dc7984668f72ee4917755915040386be4eb1f", "15281680463698dca403697bd627af4efebc98a2" ], "paperAbstract": "This paper proposes K-NRM, a kernel based neural model for document ranking. Given a query and a set of documents, K-NRM uses a translation matrix that models word-level similarities via word embeddings, a new kernel-pooling technique that uses kernels to extract multi-level soft match features, and a learning-to-rank layer that combines those features into the final ranking score. The whole model is trained end-to-end. The ranking layer learns desired feature patterns from the pairwise ranking loss. The kernels transfer the feature patterns into soft-match targets at each similarity level and enforce them on the translation matrix. The word embeddings are tuned accordingly so that they can produce the desired soft matches. Experiments on a commercial search engine's query log demonstrate the improvements of K-NRM over prior feature-based and neural-based states-of-the-art, and explain the source of K-NRM's advantage: Its kernel-guided embedding encodes a similarity metric tailored for matching query words to document words, and provides effective multi-level soft matches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080809", "http://www.cs.cmu.edu/~callan/Papers/sigir17-Chenyan-Xiong-a.pdf", "http://arxiv.org/abs/1706.06613", "https://arxiv.org/pdf/1706.06613v1.pdf", "http://ai2-website.s3.amazonaws.com/publications/SIGIR2017End-to-end.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7902365152dd93d2b200e3adb14a97169dc78e89", "sources": [ "DBLP" ], "title": "End-to-End Neural Ad-hoc Ranking with Kernel Pooling", "venue": "SIGIR", "year": 2017 }, "791453489be6112655b3049d9ae0a403bb31678b": { "authors": [ { "ids": [ "7860942" ], "name": "Yingjun Wu" }, { "ids": [ "3393323" ], "name": "Wentian Guo" }, { "ids": [ "39712651" ], "name": "Chee Yong Chan" }, { "ids": [ "1688848" ], "name": "Kian-Lee Tan" } ], "doi": "10.1145/3035918.3064011", "doiUrl": "https://doi.org/10.1145/3035918.3064011", "entities": [ "Compile time", "Compiler", "Crash (computing)", "Database", "Degree of parallelism", "E-commerce", "In-memory database", "Multi-core processor", "Online transaction processing", "Parallel computing", "Parallel database", "Relational database management system", "Server (computing)", "Stored procedure", "Transaction processing" ], "id": "791453489be6112655b3049d9ae0a403bb31678b", "inCitations": [ "64451ec9c182374a0ce6a3503946692f674569e2", "264a5e7a5230b228b86f63a75546738a66454c56" ], "journalName": "", "journalPages": "267-281", "journalVolume": "", "outCitations": [ "2520cfc29a521f2333fda020d7ae41860f8dfebd", "5a36f17e0560750a956064ff06b63bcd57c6145f", "5046a718f92447642939f5c93414dc97225d726a", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "ca61d435baaaa92dc06333bb7a54676e482283fb", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "97498df02888b55d3a48648aadf7ce0769d7b358", "514a5c15e8cf3f681febecad954a4508d9189c99", "65106d0a26da1352faa5a32a8016f8504d10b1c4", "7bc692b8d73d506dc7585eaecab30af3acf59459", "016cc168aaf437843c65cb3ebdade0330007c2e4", "12e1c4ebf2464570d461d26407c87e7439b99ff5", "ef47742e72bd64fb1ae5359cd6d5dd6dfad34dc8", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "6c079493f0d1f7df6613f00edc985a753e75a5c0", "0d29a696d8c66d795336ab34aff0b6fb8decb06d", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "4b14389e3ed8bdf2c470e69cc0eff3be6fdbe254", "11620e0e8d0224d4401439a1c6774f5bd750b847", "9baa88ccbc656e0774ae1bc01d5698f7801919d6", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9", "e8b3ecb8f19b638603ab296deac8aa3fbd303f90", "1b3a8d042ed8569d027cb120774f50521709abb2", "136eefe33796c388a15d25ca03cb8d5077d14f37", "512e19f69011155c9618c650a7d4e2e525a4d1dd", "66702084eca2b6ada4526b81fdc3d3c53b02535d", "39e3d058a5987cb643e000bce555676d71be1c80", "4ab55cf927d366b9307f4f5d1a705948b9f7ea02", "412a9e54bbb31e12d008a9579994e009c5b40b46", "56f6aec0132e56769e2036bbeff791dfa137d107", "37d543efda665556815dc45af537a3400fb106c7", "0997037e940df06ed7a6d19f7501579aab01e829", "08d1cedbbaa798855e30fa7dc9ddbf88060b1399", "71691ee2dbe001d599334e5389d80dd32c44a74e", "3bb3f2b1e3642cbe3885cc616f576621d3f36fe4", "549dd5a7c187fbf2a727f84f174e5ed79ade02b1", "afb3bfadd61e2613d179ce3310581da883c66898", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "5121709bf42b13a93c70b45a456c82db92850a02", "9748241beb02ef1e2d0e6dc877c04b354033a838", "2525c025f11aec60cff428271ca851381b92008f", "2543a986d875f86119cb4ad9b1e287873ac4bce2" ], "paperAbstract": "Main-memory database management systems (DBMS) can achieve excellent performance when processing massive volume of on-line transactions on modern multi-core machines. But existing durability schemes, namely, tuple-level and transaction-level logging-and-recovery mechanisms, either degrade the performance of transaction processing or slow down the process of failure recovery. In this paper, we show that, by exploiting application semantics, it is possible to achieve speedy failure recovery without introducing any costly logging overhead to the execution of concurrent transactions. We propose PACMAN, a parallel database recovery mechanism that is specifically designed for lightweight, coarse-grained transaction-level logging. PACMAN leverages a combination of static and dynamic analyses to parallelize the log recovery: at compile time, PACMAN decomposes stored procedures by carefully analyzing dependencies within and across programs; at recovery time, PACMAN exploits the availability of the runtime parameter values to attain an execution schedule with a high degree of parallelism. As such, recovery performance is remarkably increased. We evaluated PACMAN in a fully-fledged main-memory DBMS running on a 40-core machine. Compared to several state-of-the-art database recovery mechanisms, can significantly reduce recovery time without compromising the efficiency of transaction processing.", "pdfUrls": [ "https://arxiv.org/pdf/1604.03226v2.pdf", "https://arxiv.org/pdf/1604.03226v1.pdf", "http://arxiv.org/pdf/1604.03226v1.pdf", "http://doi.acm.org/10.1145/3035918.3064011" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/791453489be6112655b3049d9ae0a403bb31678b", "sources": [ "DBLP" ], "title": "Fast Failure Recovery for Main-Memory DBMSs on Multicores", "venue": "SIGMOD Conference", "year": 2017 }, "791771488f91db90c4f6593152bc0737bd597342": { "authors": [ { "ids": [ "1766422" ], "name": "Arvind Arasu" }, { "ids": [ "2747732" ], "name": "Ken Eguro" }, { "ids": [ "1774634" ], "name": "Raghav Kaushik" }, { "ids": [ "1691108" ], "name": "Donald Kossmann" }, { "ids": [ "39504108" ], "name": "Pingfan Meng" }, { "ids": [ "2001538" ], "name": "Vineet Pandey" }, { "ids": [ "1708341" ], "name": "Ravishankar Ramamurthy" } ], "doi": "10.1145/3035918.3064030", "doiUrl": "https://doi.org/10.1145/3035918.3064030", "entities": [ "Attribute\u2013value pair", "Concerto", "Concurrency (computer science)", "Key-value database", "Verification and validation" ], "id": "791771488f91db90c4f6593152bc0737bd597342", "inCitations": [ "f65907515ab7b0ca87a3b7640869c9680e95d392", "02a46ed5ea2bc0d8e59c0f876c99306e11a22f0a" ], "journalName": "", "journalPages": "251-266", "journalVolume": "", "outCitations": [ "17f19d9ec093ef82a10f1276fc53c10d4667836d", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "0cccb12cbd39a77868edb7f6e0f0de30c2f6a3bb", "9eb446e0cc04bcbcec4cf29d9bf0f80f52fd5369", "007520b26bc1d71911ce613b9de83d8b2bc47c5d", "2e2ba3ed517ab712b1c3ac399c167f72ad23ba04", "8502fd5a659150e0635973744c4a80138c4e7ca7", "05a618847e4f08e5bca29dff732757779722b2e0", "570856c20451c9866cc9b94611b73c452fffd145", "29abdb6e6645e71121e85ffd27488bccdc4d0d8b", "4c788fdb543d2749a38589a800f36426fd14a236", "600081cefad09cf337b978bb6e1ec53309ee69b3", "6ed9a1b9e5db5e35f8045b057a3edb03372351c1", "47ed26fc42f926445ccd7017cc1bfcf185aa35a5", "08952d434a9b6f1dc9281f2693b2dd855edcda6b", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "92afeb122100993cc1409a101d9e5a0559b820d0", "017aba316f6d8447a4e045d8ddd417456629031e", "2ada97f3a01282a53966a92af7072d9040cb6472", "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "1695d1b240b75a1e468cf4fc37e52403073b6345", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "1965474a1e45f2e6ddda8b8a219c5e6eb0c5b0fe", "2564946f8a489fff7f3fa92542371f96499e312c", "13c479acb6a0703d5577e89b3f08677e9cff017f", "f6e03edfceb05787bdcb50119834e5a2b4a814e2", "9aa0d7253574e50fe3a190ccd924433f048997dd", "2af05d57b0c2f1ec0271f7fb1289e2fe7e0e1440", "1a2380dde8892683be4b1a23289d2001a1f707e8", "0b491803fa7697b2712d8a8c3c766ac39f8764cf", "42c9fee7b266da770e296229f5ce0d7b0ddc3a61", "18fb0f29720e46c455578e6b0b3953a4eb3f1614", "39512313225665cfbee041ad4045fcdd9c52d944", "041b6be90c7abb4d1db99b1a56074ff5bc816392" ], "paperAbstract": "Verifying the integrity of outsourced data is a classic, well-studied problem. However current techniques have fundamental performance and concurrency limitations for update-heavy workloads. In this paper, we investigate the potential advantages of deferred and batched verification rather than the per-operation verification used in prior work. We present Concerto, a comprehensive key-value store designed around this idea. Using Concerto, we argue that deferred verification preserves the utility of online verification and improves concurrency resulting in orders-of-magnitude performance improvement. On standard benchmarks, the performance of Concerto is within a factor of two when compared to state-of-the-art key-value stores without integrity.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064030" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/791771488f91db90c4f6593152bc0737bd597342", "sources": [ "DBLP" ], "title": "Concerto: A High Concurrency Key-Value Store with Integrity", "venue": "SIGMOD Conference", "year": 2017 }, "793cb6678e81917535650a8389ef24a9ef7dfd9d": { "authors": [ { "ids": [ "35441897" ], "name": "Simon Oya" }, { "ids": [ "2130458" ], "name": "Carmela Troncoso" }, { "ids": [ "1684979" ], "name": "Fernando P\u00e9rez-Gonz\u00e1lez" } ], "doi": "10.1145/3133956.3134004", "doiUrl": "https://doi.org/10.1145/3133956.3134004", "entities": [ "Adversary (cryptography)", "Adversary model", "Best, worst and average case", "Conditional entropy", "Correctness (computer science)", "Information theory", "Privacy" ], "id": "793cb6678e81917535650a8389ef24a9ef7dfd9d", "inCitations": [ "6b2cc009bf31007e558a096ab1b1a9c63abce665", "35947a30316c1f4df45be67cea8bbd52a20775d4" ], "journalName": "", "journalPages": "1959-1972", "journalVolume": "", "outCitations": [ "05a926a71b0d36610360317d01d048dd2bbdad92", "04bd64577c1f66486825c4ef9132cb94bb5334b4", "d921036a6cb7e340b019afa557a19bc65586a1ad", "04c93e29fa41d3289d134bfa1b672ac4384e7496", "4910a76ab2b6cd73f3841a03d74d5917c8f73a4e", "08a8c653b4f20f2b63ac6734f24fa5f5f819782a", "d79ed41122871e18760166a91771b8e33651ef0c", "1d571349f656da1b6fc1838b3b0c5b9cffcffe32", "14a48cbcb93867ff91ceb4db5e3628a4eb0bc457", "ae5e1b0941153ebc0de90b4830893618b81a7169", "fab4fbe093117b2dd65826497defb669dcc3984b", "54234446dd8991441336600931bac9b99882a526", "1065f1c73c538a8d4b017af1825967e1fab1bf52", "590a3950d3c356b3f058e1d7e2516376d2b92b40", "15cd71ac0333ade954201db6979abb39bde3d181", "523c226a32361acf9c7f856c8d7e4eb8d59fe786", "70fda5147aedd42c64143a464117b5ffde18a2e4", "2e34b5314279a2512e63e4c54aa8f933f9478c61", "01a29e319e2afa2d29cab62ef1f492a953e8ca70", "2d474083f3c9e8dab83b9f6d572743dd5cdd52a7", "97dcb1979ad773f9d902caad7bd0f6bb6c6c5e0d", "bdacfa6fc5dc122cb220fe547d705511b4731bf4", "37bcd8bbe2cddd48f0ec152fc5ffa4fca93f3828", "461ea2ebc14861c0b9c6c07ba61eaad93cc65980", "46ba61649de35403b04c581f61e7df399f573ec1", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "1b1ee9348713c415dad2bd9832bf3110203a46ae", "5bf1f8c2fa288784956715ee1fbffad57c16fdb6", "465043a6ea2c9aff2fff449359a2f4203731a2f4" ], "paperAbstract": "In the last years we have witnessed the appearance of a variety of strategies to design optimal location privacy-preserving mechanisms, in terms of maximizing the adversary's expected error with respect to the users' whereabouts. In this work, we take a closer look at the defenses created by these strategies and show that, even though they are indeed optimal in terms of adversary's correctness, not all of them offer the same protection when looking at other dimensions of privacy. To avoid \"bad\" choices, we argue that the search for optimal mechanisms must be guided by complementary criteria. We provide two example auxiliary metrics that help in this regard: the conditional entropy, that captures an information-theoretic aspect of the problem; and the worst-case quality loss, that ensures that the output of the mechanism always provides a minimum utility to the users. We describe a new mechanism that maximizes the conditional entropy and is optimal in terms of average adversary error, and compare its performance with previously proposed optimal mechanisms using two real datasets. Our empirical results confirm that no mechanism fares well on every privacy criteria simultaneously, making apparent the need for considering multiple privacy dimensions to have a good understanding of the privacy protection a mechanism provides.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134004", "https://arxiv.org/pdf/1705.08779v1.pdf", "https://arxiv.org/pdf/1705.08779v2.pdf", "https://software.imdea.org/~carmela.troncoso/papers/Oya-CCS17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/793cb6678e81917535650a8389ef24a9ef7dfd9d", "sources": [ "DBLP" ], "title": "Back to the Drawing Board: Revisiting the Design of Optimal Location Privacy-preserving Mechanisms", "venue": "CCS", "year": 2017 }, "796b61d401037a9c5c91d7f0d3bd62697acbb208": { "authors": [ { "ids": [ "29858363" ], "name": "Scott Votke" }, { "ids": [ "31960069" ], "name": "Seyyed Ahmad Javadi" }, { "ids": [ "2044504" ], "name": "Anshul Gandhi" } ], "doi": "10.1109/MASCOTS.2017.11", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.11", "entities": [ "Cloud computing", "Colocation centre", "Interference (communication)", "Resource contention" ], "id": "796b61d401037a9c5c91d7f0d3bd62697acbb208", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "232-243", "journalVolume": "", "outCitations": [ "01fbd0dd3229278905b9a124aaf20280c64f56bc", "3000e77ed7282d9fb27216f3e862a3769119d89e", "2899e535980411e873e7d709d2b0e78973cc24cc", "04e24be4c25539e4b4fa0498c85b3a3e2d026c02", "4205079799d4628a6d1ac6601591dd1c9f7b2ad2", "3cda09fdc91d7f85a138a4d56848a3a0708df76f", "1ff2a26cf246fc7c390e907426fb2bce8026bb38", "5233d7195acccd2681f20b9f60e9f12ec1cbba70", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "02cbb22e2011938d8d2c0a42b175e96d59bb377f", "f37ff5eda851bcbba803abcadd12864d6019c87f", "0d65b64286550dbe68301474916bca661584c2ec", "8242f42f077b59ff239e8cab19b99d94c190c608", "aa75d07d5a7c240c9ae84ec0f27d99c37021e202", "5848da5058fed3b97bfd801ca19e5265f489abfe", "9c96514250c4a35deba5ae3ffb93e9731fe23a79", "91638410ad42da9bf391ad670d87026e06855697", "f5fed3c82ae151f6e04af84f25c38c31a21e39f3", "4c767b5bef5f2d6d22ed2b342852cb3743326e29", "1ecd36058e48734213c81728f42ff798a2c52833", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "f24b702c16849ba88da1df30cc3f1e126a487ede", "093f488e41a142e981c395f69f4946ed2b1983a7", "f73ae2fd526b63d5b04aa02f2b23eb747b768607", "a2882eb115f70abc3cfa4d173c3c6aec47f0a366", "5f9983c58b38c9cb76497d574150737378a8508c", "0822500c8f7246f5abb6a57836e946d564a1f5ea" ], "paperAbstract": "One of the key performance challenges in cloud computing is the problem of interference, or resource contention, among colocated VMs. While prior work has empirically analyzed interference for specific workloads under specific settings, there is a need for a generic approach to estimate application performance under any interference condition.In this paper, we present an analytical model to estimate performance as a function of various workload, system, and interference conditions, including the intensity and length ofinterference, for single- and multi-VM systems. Comparisons with empirical results under various scenarios show that our model can provide accurate latency estimations (less than 5% error). We employ our model to analyze systems under interference, and derive useful results to aid practitioners.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.11", "http://www3.cs.stonybrook.edu/~anshul/mascots17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/796b61d401037a9c5c91d7f0d3bd62697acbb208", "sources": [ "DBLP" ], "title": "Modeling and Analysis of Performance Under Interference in the Cloud", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "798d05714f7520100073ec81e8426a21c6ca1973": { "authors": [ { "ids": [ "1686967" ], "name": "Rui Pereira" }, { "ids": [ "39534603" ], "name": "Marco Couto" }, { "ids": [ "35851129" ], "name": "Francisco Ribeiro" }, { "ids": [ "37685890" ], "name": "Rui Rua" }, { "ids": [ "1689398" ], "name": "J\u00e1come Cunha" }, { "ids": [ "1715196" ], "name": "Jo\u00e3o Paulo Fernandes" }, { "ids": [ "1733627" ], "name": "Jo\u00e3o Saraiva" } ], "doi": "10.1145/3136014.3136031", "doiUrl": "https://doi.org/10.1145/3136014.3136031", "entities": [ "Programming language", "Run time (program lifecycle phase)", "Software engineering" ], "id": "798d05714f7520100073ec81e8426a21c6ca1973", "inCitations": [], "journalName": "", "journalPages": "256-267", "journalVolume": "", "outCitations": [ "531190c5faae2437c5f8b1e2c7ee403cdf272305", "d1a2bb461b37913659e8bb4ec359e4f07d471ceb", "e487a90095cc8dd9b783391a5c4ce91b175b6d23", "407a9722d948e2a8f53316e33f04b949e5bf9f32", "1a112d3bad8b19a998908fee9af2657a48219f35", "06b2c1f8301a98e268271cc2e3d33c50932b09d4", "7e73399ec22ff2e89745f754c71ae1829801ad19", "0f95717ff75d4e02cc7c981765cfb41115fadf89", "a0d4f558071bf3dea7e4f2367b133f0e7e2764e1", "8f6111c692b996cc06ac59b2b09f0f538fe6693a", "3b9a02e717cae12d3762afe5fb56cc25464df2f0", "2276f1c307064a110c3274258d4dab3665aec71a", "07f639d1d93d18664151e683765e96ac7f95a67b", "322f4b2b7c86f022496e3a3c6a9a2da5c7dccc6b", "30ffc7c6aab3bbd1f5af69fb97a7d151509d0a52", "780c44a2a7a262a23b2c8ea37e86cecef0752dfb", "7b6df9b9f0975e4cc5f76966c00a6fd4eac26876", "2e2289d350ee7b0627f5e54785d21fc07c1e9d3f", "5371dcee6d9a55805dcc66dbcaf6b210ac7c654f", "2438e5da4708053e4233dfa2402fa1ed0ce3335f", "c26ff79189d1c06080a53427857dab3556f9d6fb", "1560a93ea151d6e130f5ec845a33ad65cf62d467", "fe222f8044272610ebd7de690d3de728276a6a36", "da6353163c1fa35a9a117bba885cefdd9de9f51a", "eed647284425aad525f0babecee51b53f06009b2", "29f2a4b0e812c459ad2f9ed3781067cb83a1f7c9", "748f7f8a7ed6111dc624d3e4f22810c1a010beb4", "53e04e3bd9b3589747eca3c8156b6d414a4cd1d2", "3ee47780011ee618bd5a64624a662375e1958e0a", "23f4b96752fdb29b74b25f491e0df05a0701e6db", "dc9338d735ebcb4bb3e2665db0ee216ffe68d0cd" ], "paperAbstract": "This paper presents a study of the runtime, memory usage and energy consumption of twenty seven well-known software languages. We monitor the performance of such languages using ten different programming problems, expressed in each of the languages. Our results show interesting findings, such as, slower/faster languages consuming less/more energy, and how memory usage influences energy consumption. Finally, we show how to use our results to provide software engineers support to decide which language to use when energy efficiency is a concern.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136031" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/798d05714f7520100073ec81e8426a21c6ca1973", "sources": [ "DBLP" ], "title": "Energy efficiency across programming languages: how do energy, time, and memory relate?", "venue": "SLE", "year": 2017 }, "79ad32c7ead7527afb3ddb9b8dbe7033a0bb0025": { "authors": [ { "ids": [ "2313853" ], "name": "Zechao Shang" }, { "ids": [ "1718849" ], "name": "Jeffrey Xu Yu" } ], "doi": "", "doiUrl": "", "entities": [ "Consistency (database systems)", "Eventual consistency", "Jumpstart Our Business Startups Act", "Snapshot (computer storage)", "Snapshot isolation", "Transaction processing", "Weak consistency" ], "id": "79ad32c7ead7527afb3ddb9b8dbe7033a0bb0025", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1cfc1cce7ec6c199a2e43f7c312c398820778e6c", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "5bd9374195809c73157ba876f463ea7c4ec9abb5", "0d35a84b3fade29ca52d2462d024da1ac313c800", "4e16f47f413554ae4ee71206abb2f29d5fc7cb49", "0760550d3830230a05191766c635cec80a676b7e", "043afbd936c95d0e33c4a391365893bd4102f1a7", "01b6e91d3ebeacc8b543ab9acb95d3c9fe6ad57b", "17356d327c5040417ce9ac8e993ca026961c17f2", "a1894966c9c7090f9bdcff87aaa85924be40f18b", "10917992856076a69a2e408fb9b276d8651779b9", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "1854568d1111aeeb6132db3e3384c4f927e4a7b6", "190b1d3ba9a402de641f099f402a850261a366c6", "72d1af150ad1419a1b0fce04aa35b71b83200915", "1452f20140dba52b928c9be5f385b5ac35537a2c", "b39ea949bd5351c9516012be35d86558e70d5fa7", "0e83f61bb7afa6a6981c20c6ff440ef9076d209f", "0804ed47a40fbe6deb5ce93efe551086695ae393", "de18d5e1946b5e7f46c6dd5eeaf92abb64ba8a21", "065e808aa05fe23de00ab4510d1607ddff04c232", "09df8dce865358101fab0e23b970822aa5c895f9", "05aba481e8a221df5d8775a3bb749001e7f2525e", "22d3fc87f5d9ea17a3bb21f885655a1f9f2deb65", "655abf918e5ebc49bec229ecc29d4e7dda512698", "8d1c0ae7bbe138bc19abf66ca918f46b244b1f5d", "2888c136064ff5527a0bb370ac1d9bf71939e066", "c911a39f2b5d0a5d5962010685d30d7f6381a7ac", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "92c661404330dd1bc9ad9b6cdfc25ebd782999aa", "2c37666634d2e50b998f68dcbef42aae16e02645", "61011eb60b242f529f58eecaf7029524920cd6cf", "6babe6becc5e13ae72d19dde27dc7f80a9642d59", "7960dcacda423270883a0939fa6b4ba293f6f629", "1e8c283cedbbceb2a56bf962bc0a86fd40f1cea6", "c18ed4756d270536253607ac7d93bdec8556f1cb", "62f41341d9ba292877e9e299d6eb70b5435ee8c8", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "15ad785d44ff34ad028426c31a1e8d43b2b44ab6", "12078fd9bee79fd2e9fae055c4cc33db382272af", "802a857a47452ed01101e75708009e5024a8be68", "fd2fb6e618153e08a3b16e1a4f82934a73eb7932", "0541d5338adc48276b3b8cd3a141d799e2d40150", "ba240fac7a9af2e5e0bf8016d7e7fd039c5207f0", "13a94e9847ceb7c55d38bd6567a6252f23caa406", "990567f3348bf749c23ca627bad82bfbc7032e6a", "082e054aa9997ab58638eaca4531a328106d67d1", "874c59d4801ad55a8eda16c045e34721c09169d8", "0ad8e89091eed09217e66adc98136126addc2619", "0122e063ca5f0f9fb9d144d44d41421503252010", "9359fa64a59105e93dd6ca9f5aa35e0d9f9055be", "0ee5abec0c7002c759d70e4d75921b65a6d8666a", "047565a5b15fbebc78e0bc7d8ca823237dac9de2", "d31e35241bcd649892c7601e10716b02b2837a74", "140e2320331dc06aeaefa3d1c0be6bff777f3994", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "92a8e3696a9c0b5d0b225133132db1f8c3c4bed5", "456a8840265fbdb1c252ef5d55170d8ea50c6aba" ], "paperAbstract": "It is expensive to maintain strong data consistency during concurrent execution. However, weak consistency levels, which are considered harmful, have been widely applied in analytical jobs. Their success challenges our belief: data consistency, which is believed to be an essential to precise computing, does not always need to be preserved. In this paper, we tackle one of the core questions related to the application of weak consistency: When does weak consistency work well? We propose an effective explanation for the success of weak consistency. We name it bad things do not come in threes, or BN3. It is based on the observation that the volume of data is far larger than the number of workers. If all workers are operating concurrently, the probability that two workers access the same data at the same time is relatively low. Although it is not small enough to be neglected, the chance that three or more workers access the same data at the same time is even lower. Based on the BN3 conjecture, we analyze different consistency levels. We show that a weak consistency level in transaction processing is equivalent to snapshot isolation (SI) under reasonable assumptions. Although the BN3 is an oversimplification of real scenarios, it explains why weak consistency often achieves results that are accurate enough. It also serves as a quality promise for the future wide application of weak consistency in analytical tasks. We verify our results in experimental studies.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p115-shang-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/79ad/32c7ead7527afb3ddb9b8dbe7033a0bb0025.pdf", "s2Url": "https://semanticscholar.org/paper/79ad32c7ead7527afb3ddb9b8dbe7033a0bb0025", "sources": [ "DBLP" ], "title": "My Weak Consistency is Strong", "venue": "CIDR", "year": 2017 }, "79d380f14a4e117e1b7f66a4a2b1304717718f61": { "authors": [ { "ids": [ "8568662" ], "name": "Se Kwon Lee" }, { "ids": [ "38611538" ], "name": "K. Hyun Lim" }, { "ids": [ "9763760" ], "name": "Hyunsub Song" }, { "ids": [ "1739708" ], "name": "Beomseok Nam" }, { "ids": [ "1719212" ], "name": "Sam H. Noh" } ], "doi": "", "doiUrl": "", "entities": [ "B+ tree", "B-tree", "Memcached", "Persistent memory", "Radix tree", "Spatial database", "Synthetic data", "Tree structure" ], "id": "79d380f14a4e117e1b7f66a4a2b1304717718f61", "inCitations": [ "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "ec62b73a97016f09d5b9859d31ed991ae84e55ad", "cb2a018979184f87692d423322e367cc42a215d2", "4994eb0dfa2d15d7b5013563d018e8c16b71b039", "433143d5a065cbc4a127362aec99002a1421e322" ], "journalName": "", "journalPages": "257-270", "journalVolume": "", "outCitations": [ "6abf5107efc723c655956f027b4a67565b048799", "314919c141024c71cb17d525ecd8016138335002", "24724ad8962a9e04eb496fddaefe9708f6960601", "fdc8271d01d35673f82dbe8c230b581ca19f6623", "81778c0996c46c77a66597e782ec0eb558f054f2", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "7efeb43699d31e8ae365b1e4f7e56c066083a159", "3afdc9f3ff5ce37bf204a4f92f4ab1a1bd0e7b5f", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "05a1357946de5eca42a477b7b268db4944219a2e", "3d2dfe972be7a60937df97bd309b423726375cb4", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "9183cde02e4306828089fb8adae74736a9df3ceb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "295521cfe1a56458d53a58613de5fb92c97c5c23", "202a362049618c8c485a235536e1540d3ce9265b", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "94783d113951822195d4ba44599a8fcbdef9d4bf" ], "paperAbstract": "Recent interest in persistent memory (PM) has stirred development of index structures that are efficient in PM. Recent such developments have all focused on variations of the B-tree. In this paper, we show that the radix tree, which is another less popular indexing structure, can be more appropriate as an efficient PM indexing structure. This is because the radix tree structure is determined by the prefix of the inserted keys and also does not require tree rebalancing operations and node granularity updates. However, the radix tree as-is cannot be used in PM. As another contribution, we present three radix tree variants, namely, WORT (Write Optimal Radix Tree), WOART (Write Optimal Adaptive Radix Tree), and ART+CoW. Of these, the first two are optimal for PM in the sense that they only use one 8-byte failure-atomic write per update to guarantee the consistency of the structure and do not require any duplicate copies for logging or CoW. Extensive performance studies show that our proposed radix tree variants perform considerable better than recently proposed B-tree variants for PM such NVTree, wB+Tree, and FPTree for synthetic workloads as well as in implementations within Memcached.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/fast17/fast17-lee.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-lee.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/lee-se-kwon", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_kwon_lee.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_kwon_lee.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/79d3/80f14a4e117e1b7f66a4a2b1304717718f61.pdf", "s2Url": "https://semanticscholar.org/paper/79d380f14a4e117e1b7f66a4a2b1304717718f61", "sources": [ "DBLP" ], "title": "WORT: Write Optimal Radix Tree for Persistent Memory Storage Systems", "venue": "FAST", "year": 2017 }, "79d68db415c56f5641cd645173f7d3f0b5307035": { "authors": [ { "ids": [ "39173720" ], "name": "Changwan Hong" }, { "ids": [ "2342667" ], "name": "Aravind Sukumaran-Rajam" }, { "ids": [ "3099014" ], "name": "Jinsung Kim" }, { "ids": [ "1750948" ], "name": "P. Sadayappan" } ], "doi": "10.1109/PACT.2017.48", "doiUrl": "https://doi.org/10.1109/PACT.2017.48", "entities": [ "Benchmark (computing)", "Data (computing)", "Edge detection", "Graph (abstract data type)", "Graphics processing unit", "High- and low-level", "Load balancing (computing)", "Multigraph", "Sparse matrix", "Two-phase commit protocol" ], "id": "79d68db415c56f5641cd645173f7d3f0b5307035", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "27-40", "journalVolume": "", "outCitations": [ "586414efa54ba9f4a7def0dc5322b7723f22c552", "175d795f44037ef60dd9df341701cd5fdc449f1f", "2b9e6181502369199bd89691a27f89bdbaac36e4", "3ebf3857a60c3e224284bbbe6c7127d0a12c546d", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "00b8d047cad54ac03ac5d1d919a5d4a09ea4bbbb", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "3c2cc49ee044d3b5815e9d5ad9c6010e94484d92", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "94c20561f102ede79c461c6aeee74eb7a55d5eb4", "1156f60e40548096df49528b1342bb3e88b0f378", "0f34ea8535dc5833a1a3692ffc7abc6740d2406a", "d6c4c76076efecb15655274adc648af8a445ed3a", "0074e55e67c74420b725fbb09a8f2f351d6947a9", "123b7a6282243af8a81f693f0cd9ac6263946dbe", "c4f83b436c164005bde22f6997de18c3fc1cb725", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "46f3bb6751419b87856c4db0193e7a72ef3fa17c", "c3008dd707e4dfd43606a544d4cac4bf1f081f2b", "3dff11679346f5344af1018cad57fa14cc349f2f" ], "paperAbstract": "High-level GPU graph processing frameworks are an attractive alternative for achieving both high productivity and high performance. Hence, several high-level frameworks for graph processing on GPUs have been developed. In this paper, we develop an approach to graph processing on GPUs that seeks to overcome some of the performance limitations of existing frameworks. It uses multiple data representation and execution strategies for dense versus sparse vertex frontiers, dependent on the fraction of active graph vertices. A two-phase edge processing approach trades off extra data movement for improved load balancing across GPU threads, by using a 2D blocked representation for edge data. Experimental results demonstrate performance improvement over current state-of-the-art GPU graph processing frameworks for many benchmark programs and data sets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.48" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/79d68db415c56f5641cd645173f7d3f0b5307035", "sources": [ "DBLP" ], "title": "MultiGraph: Efficient Graph Processing on GPUs", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "79e1c4967aaa7917c7d5d24bf34404eb13f90c37": { "authors": [ { "ids": [ "34397071" ], "name": "Hyogi Sim" }, { "ids": [ "2379012" ], "name": "Youngjae Kim" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "34657090" ], "name": "Geoffroy R. Vall\u00e9e" }, { "ids": [ "8600116" ], "name": "Seung-Hwan Lim" }, { "ids": [ "3287952" ], "name": "Ali Raza Butt" } ], "doi": "10.1145/3126908.3126929", "doiUrl": "https://doi.org/10.1145/3126908.3126929", "entities": [ "Clustered file system", "Database", "Distributed File System (Microsoft)", "Electronic filter", "Preprocessor", "Scalability", "Shared nothing architecture", "Web search engine" ], "id": "79e1c4967aaa7917c7d5d24bf34404eb13f90c37", "inCitations": [ "9a397280f7e809008ebe027b0d53e0a8701933d3" ], "journalName": "", "journalPages": "5:1-5:12", "journalVolume": "", "outCitations": [ "25d8626df87f31cc182ae529f42ec49616ee9bd0", "5f3f9223c5c9f896be099bc177929febad508407", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "ffdfb8f4e351c43af4183d3601fb5a2fad6583c5", "05227501b3727de9b117907ecd77b0bff694869f", "cfa035a41201f8c85d1049ec56004e6ed2d19dda", "abe7c167e44b1198fb5bea572ece4e53e35e8732", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "988d1a223e2ee40f2474f729ac3ac53e012d8337", "1a7f47ab856e8d3e153966b125d75ea6d692ebff", "3c03e217aeaf6734b5471d5f8930436e009d60af", "2d60d3596490d9999d8433bf41405060779bc11d", "0a368cdcb14046f6371f77f65d900d52c4da5486", "3d11c0ce702f416401ec383e7cecd82802bc81b2", "2da760f90c3d2bf6598becdde9063093f488548c", "a05d78d262792b22a66b7b15b2330e380318c830", "1104b0b3f147fbf6101534bf73b4d6a1e08fcc1a", "12a0046a1197ae63c3d616c74e367dc583cef196", "091778f43d947affb69dbccc2c3251abfa852ad2", "1f171f0f6bb88b9145722f15c83e414906202632", "483b2f4c7dbc72f7969b60cff0984f2062f02956", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "2d2255446fa2c5d5e96c4635ba75ca1741c82f7e" ], "paperAbstract": "Data services such as search, discovery, and management in scalable distributed environments have traditionally been decoupled from the underlying file systems, and are often deployed using external databases and indexing services. However, modern data production rates, looming data movement costs, and the lack of metadata, entail revisiting the decoupled file system-data services design philosophy.\n In this paper, we present TagIt, a scalable data management service framework aimed at scientific datasets, which is tightly integrated into a shared-nothing distributed file system. A key feature of TagIt is a scalable, distributed metadata indexing framework, using which we implement a flexible tagging capability to support data discovery. The tags can also be associated with an active operator, for pre-processing, filtering, or automatic metadata extraction, which we seamlessly offload to file servers in a load-aware fashion. Our evaluation shows that TagIt can expedite data search by up to 10X over the extant decoupled approach.", "pdfUrls": [ "http://people.cs.vt.edu/~butta/docs/sc2017-tagit.pdf", "http://doi.acm.org/10.1145/3126908.3126929" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/79e1c4967aaa7917c7d5d24bf34404eb13f90c37", "sources": [ "DBLP" ], "title": "Tagit: an integrated indexing and search service for file systems", "venue": "SC", "year": 2017 }, "7a096ec74a76a483dcd5ee0f019d1afce7d9b910": { "authors": [ { "ids": [ "2021576" ], "name": "Stephanos Matsumoto" }, { "ids": [ "2662834" ], "name": "Raphael M. Reischuk" } ], "doi": "10.1109/SP.2017.57", "doiUrl": "https://doi.org/10.1109/SP.2017.57", "entities": [ "Computer simulation", "Ethereum", "HTTPS", "Man-in-the-middle attack", "Public key infrastructure", "Rogue", "Smart contract", "Stock and flow" ], "id": "7a096ec74a76a483dcd5ee0f019d1afce7d9b910", "inCitations": [ "36e81b745b1122de2440be3a25920860f8287147", "58c0217720b83732daa5040512e1b621c491ecd8", "5720ae66a166ce4d0dd50665e4212e04244f63c1" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "410-426", "journalVolume": "", "outCitations": [ "3a14db6df0a0b5c01e091e169ea6a07bc9a6821f", "3d049eb62dd331b066df3cd455287ec487a745bb", "36400061d8cf4620069eb372c82fe86d0cc56bcb", "0641830054d30adf5c115adc0fd369f3ecdc6d73", "1f42fdecd70a7d72f0f108e80511320f7204316c", "2c71d19590b59007f49e9ce04c6aab529fc4b8fd", "1113664b038d0390b061afb80ee214b09a207fc9", "bdfd34769911b3fb40eadf71bfb34a0ec98fe160", "084da7c90567476907522d91d22a8a8a6f818447", "019b5b8e54b10860d39dac8f449c9d3db173527b", "1ee169e1161fbaaea334bd99759015cebe506764", "73209bd8fbd87e50a6d6396808fe08c33a64bd6a", "64814163f51d19ea85008a77e81f4cccf2d863dd", "150fd4e8e40126477e0683608531593829fdbfff", "8992bb749cb88b5e4aeb195d8649952301dad01d", "e1ecc225690f79d1d51202d6772d3c2e0d0aea2a", "27a8f66219047eb41900f12bd5813b4f52b829e1", "563239b0eaa3aa7003e8e8e66ba3e789f7cee265", "f65ee3a9f171da68b57039a5d5f2f1ad70798488", "3e2e7f25cea8ff585c4a64a0ddbfaba480de939e", "35fe18606529d82ce3fc90961dd6813c92713b3c", "a58e5388358da913ede1ac7ca0807c66fb871f00", "0be5af9e009e0e077b2eac9dc8b1e816f46d2fa7", "b790b2f67c09bf5d4f957e4e1e55818b1801cb57", "bdd6c1397d2cd6fe0e8055e02897b1ab2b236556", "1542f3fe7bf34c7cff7c747f59bdbbae777c90cd", "226cfb67d2d8eba835f2ec695fe28b78b556a19f", "0e98262b7d5d42ad0f7f7f83a5be5107f6f24df5", "57855fea0eea38a503ae58cbb024a2606002f677", "540f48b41fbf307e48dcba0760c91b571213d1d0", "2f7bb6613154e1b3580c0114bf2cfb3c8ceb477e", "67b13f67689d9c0254fa28d58dd5d20cfcf82984", "04b09b16da4b3feba818cb2aac65cfeb36b2662a", "3591be0ccd08c80c0048ebaa0e7005556f49cf5e", "52210124ac84b31b855f481b25c6ac5e80afab97", "2c6d93d96725e44669afe1e1cbec9b5e9910d883", "2dbcc7077a01981679007eceac6c6659a1c18200", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "2079dd422917c3dca4f323a55069f514da931bd4", "08e9542de3cbfe791bf86a0dee6ba5e83bc29ea7", "5de2f3e2ccfb52188028b01ab1f686a0ebcdfad0", "33853565b4dcad38b9b79091a48d3f40409f06d7", "8502fd5a659150e0635973744c4a80138c4e7ca7", "12d854f326b43232d906eb323db5d282786acb9d", "4298f3b94a1ea146bec61a955edd4ed9ef0227fe", "1d4abd83093f1343ee1f5b8ffb3c5999e3754c90", "4ea466a79c3fbdfce4d5916481a484aa3e22860b" ], "paperAbstract": "Despite a great deal of work to improve the TLS PKI, CA misbehavior continues to occur, resulting in unauthorized certificates that can be used to mount man-in-the-middle attacks against HTTPS sites. CAs lack the incentives to invest in higher security, and the manual effort required to report a rogue certificate deters many from contributing to the security of the TLS PKI. In this paper, we present IKP, a platform that automates responses to unauthorized certificates and provides incentives for CAs to behave correctly and for others to report potentially unauthorized certificates. Domains in IKP specify criteria for their certificates, and CAs specify reactions such as financial penalties that execute in case of unauthorized certificate issuance. By leveraging smart contracts and blockchain-based consensus, we can decentralize IKP while still providing automated incentives. We describe a theoretical model for payment flows and implement IKP in Ethereum to show that decentralizing and automating PKIs with financial incentives is both economically sound and technically viable.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.57", "https://www.ieee-security.org/TC/SP2017/papers/290.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a096ec74a76a483dcd5ee0f019d1afce7d9b910", "sources": [ "DBLP" ], "title": "IKP: Turning a PKI Around with Decentralized Automated Incentives", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "7a2918f9f0192e9a83c46c1ee58742dd6bd98b87": { "authors": [ { "ids": [ "7223774" ], "name": "Rami Khalil" }, { "ids": [ "39000631" ], "name": "Arthur Gervais" } ], "doi": "10.1145/3133956.3134033", "doiUrl": "https://doi.org/10.1145/3133956.3134033", "entities": [ "B-tree", "Bitcoin", "Duplex (telecommunications)", "Entity", "Ethereum", "Scalability", "Throughput" ], "id": "7a2918f9f0192e9a83c46c1ee58742dd6bd98b87", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "823", "journalVolume": "2017", "outCitations": [ "6fc9cd15134cdd282e25b8ea58b38240e96bfe90", "51b27a41ca1a33445a1041fcea84341fcf0b8c4c", "4392166a1194010c844ec915694fd5c56da94301", "00ddc47f6c54dcdfe96d41e0961d2d378c827e16", "261893f4c8a7c311a97249a8f42071c566372493", "ac72566bbc7628255002a70ca5bec0874929eba4", "75d83792b880757a09e9a72978cc29beb57c4ad5", "f65ee3a9f171da68b57039a5d5f2f1ad70798488", "195f2d9d2e6282b31ffd320c2ceda9661d806927", "3786397daec7374a048bb9dba24d3cdb198bae95", "8992bb749cb88b5e4aeb195d8649952301dad01d", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "259603d8d1c2a6d439eb8fa5038659a94aac08e1", "35fe18606529d82ce3fc90961dd6813c92713b3c", "12df6611d9fff192fa09e1da60310d7485190c1c", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "8502fd5a659150e0635973744c4a80138c4e7ca7", "43fb74fc45ea844ad087c770fa9be747fbd03b19", "fb9567e958ae22114b663f26aef8bf00fa190e0d", "40a98bed1d10248d30e86304315df07280dad93e", "2079dd422917c3dca4f323a55069f514da931bd4", "aec843c0f38aff6c7901391a75ec10114a3d60f8" ], "paperAbstract": "Scaling the transaction throughput of decentralized blockchain ledgers such as Bitcoin and Ethereum has been an ongoing challenge. Two-party duplex payment channels have been designed and used as building blocks to construct linked payment networks, which allow atomic and trust-free payments between parties without exhausting the resources of the blockchain.\n Once a payment channel, however, is depleted (e.g., because transactions were mostly unidirectional) the channel would need to be closed and re-funded to allow for new transactions. Users are envisioned to entertain multiple payment channels with different entities, and as such, instead of refunding a channel (which incurs costly on-chain transactions), a user should be able to leverage his existing channels to rebalance a poorly funded channel.\n To the best of our knowledge, we present the first solution that allows an arbitrary set of users in a payment channel network to securely rebalance their channels, according to the preferences of the channel owners. Except in the case of disputes (similar to conventional payment channels), our solution does not require on-chain transactions and therefore increases the scalability of existing blockchains. In our security analysis, we show that an honest participant cannot lose any of its funds while rebalancing. We finally provide a proof of concept implementation and evaluation for the Ethereum network.", "pdfUrls": [ "https://eprint.iacr.org/2017/823.pdf", "http://doi.acm.org/10.1145/3133956.3134033", "http://eprint.iacr.org/2017/823" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a2918f9f0192e9a83c46c1ee58742dd6bd98b87", "sources": [ "DBLP" ], "title": "Revive: Rebalancing Off-Blockchain Payment Networks", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "7a3127be85c4c8122c8bdd99e1da1d009a162e71": { "authors": [ { "ids": [ "35111509" ], "name": "Kimio Kuramitsu" } ], "doi": "10.1145/3136014.3136025", "doiUrl": "https://doi.org/10.1145/3136014.3136025", "entities": [ "Backtracking", "C++", "Context-free grammar", "Context-sensitive grammar", "Parsing", "Parsing expression grammar", "Programming language", "Scripting language", "Symbol table", "Time complexity", "Typedef" ], "id": "7a3127be85c4c8122c8bdd99e1da1d009a162e71", "inCitations": [], "journalName": "", "journalPages": "26-37", "journalVolume": "", "outCitations": [ "85683fc3b99f3b1c682a5614f607054b39e10d41", "36c8cd0d4228256810ccca7a945a3ed72153b02a", "64a7441aeecab0e8d14b91ffa3959906ebd39693", "ce04d969db34332bea481bfcec4de2313821b756", "049065e15e288983965240f7f33f4953a622a5e2", "828b44771ff28d8fea10cdfabd8772cecf042bb1", "1705d0254fbd8990dc9f3fbe3e926d2ebb230663", "543ac7965639667f816214c6c98ec5f9f7cb36e1", "6713ff1746e7f1ef12e7231acf6310007079f443", "69357b4197140e1df9e65aeb466462839ce1fbb4", "4e37893e503466fe3673ae117f6d7c364c21779a", "72e40e4bef8906055fadc245c4773d520334f7f6", "303b122551f37383a43acc1229f6e57dcde20f40", "b3aa9613a3e9fb7a1da38a1aa08d95a6984282dc", "69e11fb5797c29db798678c4182b2359c371d026", "b6dfd533a19982fc1efd5722d6cad394f4b89a94", "6d5a3e0b92a121bbc85558601d4c6704e0f6aa88", "0f428a57b38a5876b412c8374f279fe86a7f2f42", "7381052ac9aed7c209f6d7cc47f0d5f856f3cb77" ], "paperAbstract": "Parsing expression grammars (PEGs) are a powerful and popular foundation for describing syntax. Despite PEGs' expressiveness, they cannot recognize many syntax patterns of popular programming languages. Typical examples include typedef-defined names in C/C++ and here documents appearing in many scripting languages. We use a single unified state representation, called a symbol table, to capture various context-sensitive patterns. Over the symbol table, we design a small set of restricted semantic predicates and actions. The extended PEGs are called SPEGs, and are designed to be safe in contexts of backtracking and the linear time guarantee of packrat parsing. This paper will show that SPEGs have improved the expressive power in such ways that they recognize practical context-sensitive grammars, including back referencing, indentation-based code layout, and contextual keywords.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136025" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a3127be85c4c8122c8bdd99e1da1d009a162e71", "sources": [ "DBLP" ], "title": "A symbol-based extension of parsing expression grammars and context-sensitive packrat parsing", "venue": "SLE", "year": 2017 }, "7a3eef2bb0c599464d74929a0fbf2babfd259466": { "authors": [ { "ids": [ "34955824" ], "name": "Cristina V. Lopes" }, { "ids": [ "32514549" ], "name": "Petr Maj" }, { "ids": [ "1974431" ], "name": "Pedro Martins" }, { "ids": [ "40188003" ], "name": "Vaibhav Saini" }, { "ids": [ "34237363" ], "name": "Di Yang" }, { "ids": [ "27018374" ], "name": "Jakub Zitny" }, { "ids": [ "1906848" ], "name": "Hitesh Sajnani" }, { "ids": [ "1729644" ], "name": "Jan Vitek" } ], "doi": "10.1145/3133908", "doiUrl": "https://doi.org/10.1145/3133908", "entities": [ "C++", "Data deduplication", "Duplicate code", "Ecosystem", "Java", "JavaScript", "Open-source software", "Python", "Text corpus", "Video game clone" ], "id": "7a3eef2bb0c599464d74929a0fbf2babfd259466", "inCitations": [ "76d28b31ad49f1b76afdeeab10d38e571151b8d3", "e65aae0062d6d0bbcdcc07660b388c3c1c531a43", "88e7e944405c95119174860d1714a2b271f99dd9", "67ba1c7159bdd4440cbe6f446189978e732d81af", "2c0e0537ecf9f9b3b12527daba73b1bf98df10b4" ], "journalName": "PACMPL", "journalPages": "84:1-84:28", "journalVolume": "1", "outCitations": [ "2209b7af4d41bc60f1b044a8305a49df77da1637", "1d6e9e63884187048eff828eb7ae77d58ce09b09", "e20d848578a93ae1b9998d7fa13f19bcf15f1cb3", "479b770d93411cbaaa3888cd77e09e71d8c211f2", "73b718e508fa943dfb22a9cb5fb17f888239ad0e", "4a73f9f530f167a3ebaba91875f1549090952cc4", "0bfca74dd16aad83216742115231d400d60a9f0d", "29afbfd2f362c0173613ff82cf96d8495de51878", "de7293a0137fefe92412d61d3db93e22c0988136", "54ae528e6b251ce681c3c0442f8231822e05772b", "129a61b59e202a734a3776e97305baeb37c9cd0f", "0ec830d516363f1917056d699aab6f07af18a053", "93870564a98011b07a09ba142ca6d5ef119873e4", "2582b486305ce677224d8632a3f24b880b533dcd", "6b3436d1626970915f690294970b61aa214ba136", "0eb6211964149bc326c0e682971c982ab3cc1cb5", "30b30b2da89e9a287f235cdec1d346de163e50c5", "98e810ed098a651e0ba8cbb63d2d926d4eebdf9b", "1312398bd0377beada6d6ab5252221b0464c9892", "0a8317fc7cd3fb07e69aa2a11342898e22fc2f47", "bf66c481f47dbe1fb64abfa9d247ceb4699a24c2", "9385a37454676209720be40b0cf8d37cacb482a2", "701d11fa3afc3b6b586913db74b6011dc2fb8200", "040b57078101f4fbc3395ba19a040ff7f5b60bcd", "2d9ef02d447795622bf732b64ef22b1c25093868", "f78384cac7ca1ac95fa23be5bafa43757fe0da70", "00a9ba0063d34ec56792849a67ef57b4601becbb", "c78f374bbfaa85df22a24c9ef9e028e8bd7fe397" ], "paperAbstract": "Previous studies have shown that there is a non-trivial amount of duplication in source code. This paper analyzes a corpus of 4.5 million non-fork projects hosted on GitHub representing over 428 million files written in Java, C++, Python, and JavaScript. We found that this corpus has a mere 85 million unique files. In other words, 70% of the code on GitHub consists of clones of previously created files. There is considerable variation between language ecosystems. JavaScript has the highest rate of file duplication, only 6% of the files are distinct. Java, on the other hand, has the least duplication, 60% of files are distinct. Lastly, a project-level analysis shows that between 9% and 31% of the projects contain at least 80% of files that can be found elsewhere. These rates of duplication have implications for systems built on open source software as well as for researchers interested in analyzing large code bases. As a concrete artifact of this study, we have created DéjàVu, a publicly available map of code duplicates in GitHub repositories.", "pdfUrls": [ "http://janvitek.org/pubs/oopsla17b.pdf", "http://doi.acm.org/10.1145/3133908" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a3eef2bb0c599464d74929a0fbf2babfd259466", "sources": [ "DBLP" ], "title": "D\u00e9j\u00e0Vu: a map of code duplicates on GitHub", "venue": "PACMPL", "year": 2017 }, "7a3f1da4f346d3f6968f5b38c6cdb0c01ee4c6ca": { "authors": [ { "ids": [ "2222195" ], "name": "Younghwan Go" }, { "ids": [ "32628418" ], "name": "Muhammad Asim Jamshed" }, { "ids": [ "2313756" ], "name": "YoungGyoun Moon" }, { "ids": [ "9767431" ], "name": "Changho Hwang" }, { "ids": [ "1944837" ], "name": "KyoungSoo Park" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Apu Nahasapeemapetilon", "CPU cache", "Central processing unit", "Computation", "Data rate units", "Graphics processing unit", "Memory bandwidth", "PCI Express", "Parallel computing", "Pipeline (computing)", "Program optimization", "Software pipelining" ], "id": "7a3f1da4f346d3f6968f5b38c6cdb0c01ee4c6ca", "inCitations": [ "e7c997bb82031f3fe4125b1fe000aa4215763cad", "8fe193cdf4a263139110f7c39db8246e19f22d84", "83a31c52bed8d3845201acb7a5b4603212b9e8b6", "5a8cd841f59a68c948c7aa05359c7df32dbc8d5c", "2e7c1a2953e737ea43237c313751d3e5c5f73250" ], "journalName": "", "journalPages": "83-96", "journalVolume": "", "outCitations": [ "274b913658674eb107f9edc0714937fd5f1fcbcf", "25f855c968af75e4617f25c71aee3cedec1dedaf", "73e30965ab41161ef7ecbed00133a931dbc2efba", "5ca076b1bd1bca64e5029627111e49ce0e4c4f5a", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "7ebb9fad71ce8e08d5284b7644a5452cff6c75b3", "b85df0212d624cbcf52108969ba722fe5d24cb2e", "6074c1108997e0c1f97dc3c199323a162ffe978d", "0c9a56eb4f45d3969943e8cff74593e9c6c5f549", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "239f4e993a97984c1e0dde4032c5c2f6ab05dd22", "d14509b6b012b4856ef9cd55e89065893dda8c95", "080aebd2cc1019f17e78496354c37195560b0697", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "e32f5699635c0733106e52f5d2e3965268080be6", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "399e1c9da99634c30350651e9001eec806c4ebd3", "8480af6ef75446b1d4046127f723c4d065c64917", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "3547ac839d02f6efe3f6f76a8289738a22528442", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1" ], "paperAbstract": "Many research works have recently experimented with GPU to accelerate packet processing in network applications. Most works have shown that GPU brings a significant performance boost when it is compared to the CPUonly approach, thanks to its highly-parallel computation capacity and large memory bandwidth. However, a recent work argues that for many applications, the key enabler for high performance is the inherent feature of GPU that automatically hides memory access latency rather than its parallel computation power. It also claims that CPU can outperform or achieve a similar performance as GPU if its code is re-arranged to run concurrently with memory access, employing optimization techniques such as group prefetching and software pipelining. In this paper, we revisit the claim of the work and see if it can be generalized to a large class of network applications. Our findings with eight popular algorithms widely used in network applications show that (a) there are many compute-bound algorithms that do benefit from the parallel computation capacity of GPU while CPU-based optimizations fail to help, and (b) the relative performance advantage of CPU over GPU in most applications is due to data transfer bottleneck in PCIe communication of discrete GPU rather than lack of capacity of GPU itself. To avoid the PCIe bottleneck, we suggest employing integrated GPU in recent APU platforms as a cost-effective packet processing accelerator. We address a number of practical issues in fully exploiting the capacity of APU and show that network applications based on APU achieve multi-10 Gbps performance for many compute/memory-intensive algorithms.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-go.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/nsdi17_slides_go.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/go", "http://www.usenix.org./sites/default/files/conference/protected-files/nsdi17_slides_go.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-go.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7a3f/1da4f346d3f6968f5b38c6cdb0c01ee4c6ca.pdf", "s2Url": "https://semanticscholar.org/paper/7a3f1da4f346d3f6968f5b38c6cdb0c01ee4c6ca", "sources": [ "DBLP" ], "title": "APUNet: Revitalizing GPU as Packet Processing Accelerator", "venue": "NSDI", "year": 2017 }, "7a423500bdc9825d6d02162728c126ccdc58f0af": { "authors": [ { "ids": [ "2820066" ], "name": "Manolis Kaliorakis" }, { "ids": [ "1718647" ], "name": "Dimitris Gizopoulos" }, { "ids": [ "3220061" ], "name": "Ramon Canal" }, { "ids": [ "1747103" ], "name": "Antonio Gonz\u00e1lez" } ], "doi": "10.1145/3079856.3080225", "doiUrl": "https://doi.org/10.1145/3079856.3080225", "entities": [ "CPU cache", "Experiment", "Fault injection", "Microarchitecture", "Microprocessor", "Processor design", "Register file", "Simulation" ], "id": "7a423500bdc9825d6d02162728c126ccdc58f0af", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "241-254", "journalVolume": "", "outCitations": [ "0f450ad16b7f09869bc456475a790bd876023173", "117b373a595a484eea3c3748d079851a40a5ee2f", "35e3643eb7060f30ef408c4910fc6448eecde6e6", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "53e84426865ca499e7902f4401262f124229c46e", "51327c46f6db01ab9bde8aad63ac7d5ba2b94066", "87a013dff0f9ab089cbfdaf350b94722cb240688", "e761c87518476fd30308c57e7ab5806ecc6664e4", "7848ea4660a27f6d05f8f5dfc33cfa55f999e975", "e61283985b604a6a907f21326a6d136e72628fe2", "6d2db5f608b74d77fd01341dfd7a564d1a3d259b", "ed333ba67317b10684ebc77986c1bf6300d4e9f4", "7bdbd4ae0c3e4503cc42f20afa673f255d04aa00", "1d55f921999b3fcc55e73d8b73f633156b11937c", "11a3cacd4e3f11d61203aa4c68b124ab5fe54ba3", "1144956f60e04e0839dd2fc5b8031fb4a4599072", "0256f81e75c34b5aa6f932c29d11807cbd848dfb", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "406fe23ae820e770b2c27890fa03379f5be45630", "5eb0d58f4d0f68b9b596474cdd5e2372a357a923", "0a43cabe7ca27b66cb902f16150bec9fb7e023b1", "31ddb6bfd53445f2b16a89d7dffedeee235d5f77", "b821cd8806d8e1054dbf79ab5a62245f5196d442", "656b047c4d7a830c876530bc8f8fe38c9d3e8a2f", "738a102562a662031039df7723da16d25627f2e2", "289a8de10b6cc7e7a94ea0091f32d3630c7c484b", "becc7bd74e9a91223b080c8d9cef2d454482fb19", "ece321434f2ae4766b2c33b4160269eb166a4bba", "0c8919aeb81fa8b7851585ea4190570873efa628", "00a6a1818ca95c25835226a5e1f84f1e77e0776d", "17c5972f45561c705bce6d8e0174d94cc8ad4adf", "424adf2456825dfb961d98b8475354bc8c4a98d4", "99f520a8a467a26ba4099efd2fc0c85c3189bd4f", "0eacd1b47786f740b723d906d46e160f143c0378", "37ed4f9684e774157f38655768b996b6b875e80a", "1b61f857c29d1e0a56dbc65ff1dcefec66e73cbe", "b4907b8a9d4c597acf947246f401e3752f30ff0b", "831bb5b664d8750ee074a5a85e5470ca33f407a3", "69d62d6464a5ffe31905dfe3e21fb5cdbc02755d", "246be658a2ce791070a440cfc965a3ddac325c18", "ad908749e636e4bc543c474fb070977959397dae", "51a63777dfeec2635401a40d51c89269824c80f3", "526f66cfd97a9374d7827c78a9dae9eaec471878", "023abef0f3f56cda13bcb5adeb28dd4c7241c261", "ab2c7421a3f7fda4a95b6db79261f499d55bca32", "938286fa80fe31fa3e35f450989f27659296f25f", "3186aead0cac0a94a8bf909a5023eae7afa8426b", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72", "007630d085e968b552b0baf7406d0a9b1695f553", "affc4c930884275acc9c136e68901dbd3473632e" ], "paperAbstract": "Early reliability assessment of hardware structures using microarchitecture level simulators can effectively guide major error protection decisions in microprocessor design. Statistical fault injection on microarchitectural structures modeled in performance simulators is an accurate method to measure their Architectural Vulnerability Factor (AVF) but requires excessively long campaigns to obtain high statistical significance.\n We propose MeRLiN1, a methodology to boost microarchitecture level injection-based reliability assessment by several orders of magnitude and keep the accuracy of the assessment unaffected even for large injection campaigns with very high statistical significance. The core of MeRLiN is the grouping of faults of an initial list in equivalent classes. All faults in the same group target equivalent vulnerable intervals of program execution ending up to the same static instruction that reads the faulty entries. Faults in the same group occur in different times and entries of a structure and it is extremely likely that they all have the same effect in program execution; thus, fault injection is performed only on a few representatives from each group.\n We evaluate MeRLiN for different sizes of the physical register file, the store queue and the first level data cache of a contemporary microarchitecture running MiBench and SPEC CPU2006 benchmarks. For all our experiments, MeRLiN is from 2 to 3 orders of magnitude faster than an extremely high statistical significant injection campaign, reporting the same reliability measurements with negligible loss of accuracy. Finally, we theoretically analyze MeRLiN's statistical behavior to further justify its accuracy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080225" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a423500bdc9825d6d02162728c126ccdc58f0af", "sources": [ "DBLP" ], "title": "MeRLiN: Exploiting dynamic instruction behavior for fast and accurate microarchitecture level reliability assessment", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "7a4c1b65cb73268b5beb4eb815a618c08b229aca": { "authors": [ { "ids": [ "2864629" ], "name": "Yuanjie Li" }, { "ids": [ "3013173" ], "name": "Zengwen Yuan" }, { "ids": [ "1798566" ], "name": "Chunyi Peng" } ], "doi": "10.1145/3117811.3117838", "doiUrl": "https://doi.org/10.1145/3117811.3117838", "entities": [ "Compaq LTE", "Data access", "Forwarding plane", "Mobile device", "Parallel computing" ], "id": "7a4c1b65cb73268b5beb4eb815a618c08b229aca", "inCitations": [ "9a47688fdf7a2f899ceb8c5470bed8247dadfdd3", "067f92dd54cbea2c65c2944254660defaf989fa7" ], "journalName": "", "journalPages": "56-69", "journalVolume": "", "outCitations": [ "fa68b657d5f959bfbc3deff02ab79c8b2c7f149f", "41318936e9bfd1530e008fa658b55198a6fc1270", "08b12c65fa237ed030954efcdc42094f4bc7ecf1", "d75dd24cbd8df096544d3f8695a941dc191858ef", "077d6198039dea3f60ef9f3d3ef9f128fd3edf71", "297c7c5beec23a972470bb0bbc0ea289066c8b55", "e0d5adef03788e99833a674e587c742da985255e", "522a16a41c33f8cb0f4a8bf51c9f3cd13cd2f05e", "73e5cc87f4d7487c3ba58ec2e55ea52ea1025b0a", "062bd67c240a7710225fcaf2e236eebafa94eecb", "9a6ae3f7ae420d9ac4355ae0c277339dfd7300c5", "2445d07ed9574fc8c27bcfdcdfaacdb9e66afb1e", "07037e3febe36179acbc4a2730d41d051078aac5", "27a70914fe1d0e0b345620e60c2c0b38bc2bd5f1", "a45ac5215bc436d0adf6b4ba5c6dc6107444af1c", "205443fb07540cfca5e581476617589b2a60d996", "17d122f143726288da193a767fd0a7634010f0ff", "1943466070019e48204ebbee0914d87ced4ba09a", "89f2fb283de7e3f6738f852a25d6b90b9c1a1033", "7233d3bd82ade69235873870f929f2ffc46a1327", "6800646e8de9b08e6a2174a927b50bb0e28fbb76", "0179d342a07191731a4194297c9b2a8b3eba522a", "02c76f7d61f1ff47609a19f46aec3e6d0c8a9425", "3b50af00a37195726ede36cb080e3f44bb3a3f08", "41758f25a78f4223fefb7ac00cc70a9e6ba949af", "a9bbe0f977fd440511c3277058fb287652e6181a", "517356abd7f660229da7c379085490a804a32754", "5fa82acf3761b76f18b71931ff13c3b723f4c43e", "1fee1ef3ce1e4f9321a24d519c10234787206cfa", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "5b2998bcedbd184a836b8533b62860264be54442", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "2e4fc61c4801129d251c968a4df453781b8d04b7", "1de54c9f1e490090ffcacfe9a35cd40d3fcb235f", "22bb280dffb450f6f65798c529309770a853aee3", "8be586e7d44183f4745fc4685d2e68a5ba7879b4", "8f078271d8bd6b9ff21818de0dc3b4294e5fac12", "8ca277147aac642bd26a1b2220f89e12574d268f", "2f85f20a076cb91dcdf4b3e5b16886ee9b6b3543" ], "paperAbstract": "Control-plane operations are indispensable to providing data access to mobile devices in the 4G LTE networks. They provision necessary control states at the device and network nodes to enable data access. However, the current design may suffer from long data access latency even under good radio conditions. The fundamental problem is that, data-plane packet delivery cannot start or resume until all control-plane procedures are completed, and these control procedures run sequentially by design. We show both are more than necessary under popular use cases. We design DPCM, which reduces data access latency through parallel processing approaches and exploiting device-side state replica. We implement DPCM and validate its effectiveness with extensive evaluations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117838", "https://www.cs.purdue.edu/homes/chunyi/pubs/mobicom17-li.pdf", "http://metro.cs.ucla.edu/papers/mobicom17-dpcm.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a4c1b65cb73268b5beb4eb815a618c08b229aca", "sources": [ "DBLP" ], "title": "A Control-Plane Perspective on Reducing Data Access Latency in LTE Networks", "venue": "MobiCom", "year": 2017 }, "7a543d5970cdd8e7a574c42cf7ab4c7811784b68": { "authors": [ { "ids": [ "17876815" ], "name": "Longjie Ma" }, { "ids": [ "8264925" ], "name": "Wu Jigang" }, { "ids": [ "39361461" ], "name": "Long Chen" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Cloudlet", "Cluster analysis", "Defense of the Ancients", "Dota 2", "Heuristic", "Integer programming", "K-medoids", "Linear programming", "Simulation", "Software deployment", "Storage area network", "Wireless access point" ], "id": "7a543d5970cdd8e7a574c42cf7ab4c7811784b68", "inCitations": [ "3c611d78623a0b4ff21e60b6517341992821c0bd" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "196-203", "journalVolume": "", "outCitations": [ "076a06bda55555ba0a2cf75af5fe2ba265d76c76", "31181e73befea410e25de462eccd0e74ba8fea0b", "946437981815374aae4272b1dfc1b5067edcef56", "95addf2c9e556d436f853c6eb870d1f47a539bac", "09d7a6126120458d3988676d4f0a1ffada7d0a55", "0278408c86f221b3db5724213ea5fb87844dd148", "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "4ffa3dccb5a4564987e243b8ca09244bbc07f16d", "341a5ae25aa5277ee56c3d65a79d252fab0cb366", "1b76c259fdf9150cc72b5be0db0d255c984e2fc9", "2fffe5649b701046445e59f10f08d050f263440c", "39ff1cb9dedd983d299e6f1ef8fef14eb18f77c5", "a04682f6eda735cfda5692bfc10cf40410674bb8", "373f39a4defdb668bcfd01ee359d93365328686b" ], "paperAbstract": "In the large-scale Wireless Metropolitan Area Network (WMAN) consisting of many wireless Access Points (APs),choosing the appropriate position to place cloudlet is very important for reducing the user's access delay. For service provider, it isalways very costly to deployment cloudlets. How many cloudletsshould be placed in a WMAN and how much resource eachcloudlet should have is very important for the service provider. In this paper, we study the cloudlet placement and resourceallocation problem in a large-scale Wireless WMAN, we formulatethe problem as an novel cloudlet placement problem that givenan average access delay between mobile users and the cloudlets, place K cloudlets to some strategic locations in the WMAN withthe objective to minimize the number of use cloudlet K. Wethen propose an exact solution to the problem by formulatingit as an Integer Linear Programming (ILP). Due to the poorscalability of the ILP, we devise a clustering algorithm K-Medoids(KM) for the problem. For a special case of the problem whereall cloudlets computing capabilities have been given, we proposean efficient heuristic for it. We finally evaluate the performanceof the proposed algorithms through experimental simulations. Simulation result demonstrates that the proposed algorithms areeffective.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101138" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a543d5970cdd8e7a574c42cf7ab4c7811784b68", "sources": [ "DBLP" ], "title": "DOTA: Delay Bounded Optimal Cloudlet Deployment and User Association in WMANs", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "7a5cf32d06c3b2e4f27bee372a53bdc2e8fcfbce": { "authors": [ { "ids": [ "1919355" ], "name": "Andrew Baumann" } ], "doi": "10.1145/3102980.3103002", "doiUrl": "https://doi.org/10.1145/3102980.3103002", "entities": [ "Central processing unit", "Interaction", "Memory protection", "Moore's law", "Protection ring", "User space" ], "id": "7a5cf32d06c3b2e4f27bee372a53bdc2e8fcfbce", "inCitations": [ "b3f2a11d45757e675be123d55ec0eb192bcca990", "e9ecf9568b3c504bb1e5cdfefbd5a567b284ce9e", "03e89626cbb864fb1243b4ee8b4037020a9250eb", "0f627e7b85e9d0c59d9acde72464afa702795536", "7cdf63e05545333f10f69317383a3a88c6e29d03" ], "journalName": "", "journalPages": "132-137", "journalVolume": "", "outCitations": [ "2343df8034837c982fe77492480a7bb1ea908c02", "17886b4911ffd50d7e02a574caad34a286458b3a", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "69b7456f3d47fed3745239b5f67996a0b9a1a5c9", "71a2d8c473f13d0c664f751db97e81128281b1eb", "4c891cc807e701ba31a378a1e672d26bbac22cdc", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "7aaede70f5efcb1542a80707c1f0f8b01955a7d2", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "7927d7f25e2ca4ca23c1a95d671d4dde36f9fbc7", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "eeb5ec8d23124c4b352aa4168cb03f87f9480c92", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "0a289fd7b14345822b1acda6d82750b15d59663e", "1bd2d9fb62832737735d011154834b7c80c7e50a", "6a8f65381a627a2db6c756a7185d9106f0acefec", "3367eaf02789f5dcf741318fcc18c0dea8fcbb76", "0a65844b2e318305c7031eb53cb306efe7763d22", "0038ce3977245eb24664222903fc17699f3636e7", "8eaaf9e6a63b8d7562d27ec73aca36931815b83a", "d296252ddf0e2c6b7422008d703843c1863bd15b", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd" ], "paperAbstract": "Moore's Law may be slowing, but, perhaps as a result, other measures of processor complexity are only accelerating. In recent years, Intel's architects have turned to an alphabet soup of instruction set extensions such as MPX, SGX, MPK, and CET as a way to sell CPUs through new security features. Unlike prior extensions, which mostly focused on accelerating user-mode data processing, these new features exhibit complex interactions and give system designers plenty to think about.\n This calls for a rethink of how we approach the instruction set. In this paper we highlight some of the challenges arising from recent security-focused extensions, and speculate about the longer-term implications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103002", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/05/baumann-hotos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7a5cf32d06c3b2e4f27bee372a53bdc2e8fcfbce", "sources": [ "DBLP" ], "title": "Hardware is the new Software", "venue": "HotOS", "year": 2017 }, "7ac874f9c15f5cf48aaf6dacd4107f99ac7f5b5c": { "authors": [ { "ids": [ "1717410" ], "name": "James Aspnes" } ], "doi": "10.1145/3087801.3087836", "doiUrl": "https://doi.org/10.1145/3087801.3087836", "entities": [ "Clock signal", "Fairness measure", "Network switch", "Population protocol", "Standard model (cryptography)", "Theory", "Transfinite induction" ], "id": "7ac874f9c15f5cf48aaf6dacd4107f99ac7f5b5c", "inCitations": [ "863346ac258c4339d982a7b7386adef945401dab" ], "journalName": "", "journalPages": "431-440", "journalVolume": "", "outCitations": [ "0100fcefa1b5bb437a9da15065c32cbf9eb96aaa", "024ccc30279c1e1424a8df0de792f38e09d31037", "789510db3e7b06781b366b98bcd63c83f23ff1a4", "9fa01f138564424de5b94f1ce63eb45206ea2bd4", "4866ced610bea69c2f7d3fb754acda959e6c9274", "28d9dbee97eba9f11a87edf3b44fabb4a8db082e", "026a0f721c6e95ca2db9e52df215ab1078b1e7fa", "c1cb2c003ab53c6e0795ae7faf81ec2fb091a250", "34f7a08863613de09fe16ae8514018ba4840931b", "7cd2f0c7a90350b36fe75f04c262ed42e9ea6818", "0c6456937a3424ce8574b55ca454b52417182293", "46aebf6842058dcb9a30ef4df4f965839e4ad06c", "6b4c30ea4f78776783c0ff58b4aa7cbccf56bf69", "44e1a20d80a29a9491b574ad9fac68f7fb8c745b", "3b0d96ae2dedbe88ae13eaba040a080a1c769ecf", "543936d5d012bb90ba440c9b01db87176cd12fda" ], "paperAbstract": "Population protocols are required to converge to the correct answer, and are subject to a fairness condition that guarantees eventual progress, but generally have no internal mechanism for detecting when this progress has occurred. We define an extension to the standard population protocol that provides each agent with a clock signal that indicates when the agent has waited long enough. To simplify the model, we represent \u201clong enough\u201d as an infinite time interval, and treat a clocked population protocol as operating over transfinite time. This gives a clean theoretical model that we show how to translate back into finite real-world executions where the clock ticks whenever the underlying protocol is looping or stuck. Over finite time intervals, the protocol behaves as in the standard model. At nonzero limit ordinals\u03c9,\u03c9 \u00b72, etc., corresponding to clock ticks, the protocol switches to a limit of previous configurations supplemented by an signal registering in an extra component in some of the agents\u2019 states. Using transfinite timesmeans that we can represent fairness over sequences of transitions that may include clock ticks with the same definition as over smaller intervals. Using arbitrary ordinals allows using times like \u03c92 or \u03c93 to represent convergence that depends on detecting convergence repeatedly at", "pdfUrls": [ "http://cs-www.cs.yale.edu/homes/aspnes/papers/podc2017-clocked-proceedings.pdf", "http://doi.acm.org/10.1145/3087801.3087836", "http://cs-www.cs.yale.edu/homes/aspnes/papers/podc2017-clocked-full.pdf", "http://www.cs.yale.edu/homes/aspnes/papers/podc2017-clocked-full.pdf", "http://cs.yale.edu/homes/aspnes/papers/podc2017-clocked-full.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7ac8/74f9c15f5cf48aaf6dacd4107f99ac7f5b5c.pdf", "s2Url": "https://semanticscholar.org/paper/7ac874f9c15f5cf48aaf6dacd4107f99ac7f5b5c", "sources": [ "DBLP" ], "title": "Clocked Population Protocols", "venue": "PODC", "year": 2017 }, "7ae7621edcb6bf0e20ad891220602380c87dfa6b": { "authors": [ { "ids": [ "3309629" ], "name": "Mennan Selimi" }, { "ids": [ "2769268" ], "name": "Lloren\u00e7 Cerd\u00e0-Alabern" }, { "ids": [ "1749419" ], "name": "Marc S\u00e1nchez Artigas" }, { "ids": [ "3247322" ], "name": "Felix Freitag" }, { "ids": [ "1718231" ], "name": "Lu\u00eds Veiga" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Cloud computing", "Heuristic", "Heuristic (computer science)", "Microservices", "Owned", "Provisioning", "Software deployment", "Web 2.0" ], "id": "7ae7621edcb6bf0e20ad891220602380c87dfa6b", "inCitations": [ "399944e0c32772d86f07568f4e00af264b3dc650", "0f3ce2f2df9c8366ec63ba9e7577d2e657c6035d", "2f55b49c20fef423709bbbb8e346c211924e00b4", "9d2690c6cee455406aaff66d7f9b287c9a979a4a", "7291694b278fc9a7525009f1b732ff84886bd348", "9459bc2a3f3c276d2c9430c164dee6b0aecb073d" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "401-410", "journalVolume": "", "outCitations": [ "278fd34a73a16e0d4bce28dc97329dae5166799b", "57aca1569e3eb72604973e2a024f590cb70eed06", "c9efc4c885d1827738fbd01af99c3466676c9c49", "702c71975469ea9b9a81bee5ac282a1d278edaec", "312a2edbec5fae34beaf33faa059d37d04cb7235", "04b0efaf6982c350523c0e9183c91f1bc5b64ea9", "0bd7fbf28e63db7139bd8995948ea7004fe56dbe", "097ec320bb82712203fc18ef60111b03dafa937f", "0c14a1d38c4e22ac3ec8c87b64411822f2bd46d3", "134f827f61a4c2f4003b229af4c3678ab52ae18b", "8b9ae31b198626959a088cd7e4f992672cd29479", "1b7a5fd487fad151da54250578a8f962208fb267", "04aac0c9ee376b731d5449b823f48923373541c4", "1ffa399603456862138cc518319ca6a667468a34", "0a4110fda21f0de29824ead1df591d2c5e1da8d0", "d8a60c31994e12ceaa0f199fe572b215dada8c7e", "3e522ad5ac774102d0efe32e3686e62d5f000d7f", "36274af7994ea907fe5c13c79b84f09d8c20bf61", "568f9422fb2a19f3427630faf64764d187e6b0e0", "04d729a8660ca5b6b46c1df2b014f97fbea31c85", "cb9e5fb860f9a22a94ac723ccfd6bb54422d4447", "0261d45beed77ee3f9066f465ca39085fc066d96", "03216f453b13c6fb844855b93cbcd6e23a7c7086", "a217fb1d865c456e986b92cf6345e890d1956bbc" ], "paperAbstract": "Community networks (CNs) have gained momentum in the last few years with the increasing number of spontaneously deployed WiFi hotspots and home networks. These networks, owned and managed by volunteers, offer various services to their members and to the public. To reduce the complexity of service deployment, community micro-clouds have recently emerged as a promising enabler for the delivery of cloud services to community users. By putting services closer to consumers, micro-clouds pursue not only a better service performance, but also a low entry barrier for the deployment of mainstream Internet services within the CN. Unfortunately, the provisioning of the services is not so simple. Due to the large and irregular topology, high software and hardware diversity of CNs, it requires of a "careful" placement of micro-clouds and services over the network. To achieve this, this paper proposes to leverage state information about the network to inform service placement decisions, and to do so through a fast heuristic algorithm, which is vital to quickly react to changing conditions. To evaluate its performance, we compare our heuristic with one based on random placement in Guifi.net, the biggest CN worldwide. Our experimental results show that our heuristic consistently outperforms random placement by 211% in terms of bandwidth gain. We quantify the benefits of our heuristic on a real live video-streaming service, and demonstrate that video chunk losses decrease significantly, attaining a 37% decrease in the loss packet rate. Further, using a popular Web 2.0 service, we demonstrate that the client response times decrease up to an order of magnitude when using our heuristic.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101167" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7ae7621edcb6bf0e20ad891220602380c87dfa6b", "sources": [ "DBLP" ], "title": "Practical Service Placement Approach for Microservices Architecture", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "7b042b03feb5f397c30713fdbe160ba27a892f61": { "authors": [ { "ids": [ "3435201" ], "name": "Pierre Matri" }, { "ids": [ "26420353" ], "name": "Yevhen Alforov" }, { "ids": [ "40135279" ], "name": "Alvaro Brandon" }, { "ids": [ "1772075" ], "name": "Michael Kuhn" }, { "ids": [ "2797656" ], "name": "Philip H. Carns" }, { "ids": [ "1697513" ], "name": "Thomas Ludwig" } ], "doi": "10.1109/CLUSTER.2017.63", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.63", "entities": [ "Attribute\u2013value pair", "Big data", "Database", "Object storage", "POSIX", "Principle of abstraction", "Time series" ], "id": "7b042b03feb5f397c30713fdbe160ba27a892f61", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "81-86", "journalVolume": "", "outCitations": [ "40c2cc3ff3afbd12bc8af32636ddc9e367801266", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "09062511a5de7fb31afae813c85df737f0881673", "05227501b3727de9b117907ecd77b0bff694869f", "9c0b4daa08cf295de210757dbe3c4861b3618893", "a29a8cc403fac037b200a4e2814f756e6e1fecea", "0ffcc6dfc6fd43d06bcb71c4a247cac7483fc145", "5f3f9223c5c9f896be099bc177929febad508407", "92c65a1b4dda537f8c47861852218afbe317575c", "e4e26822589aa524c79b529e43cbb10da8dba46c", "3daa7ee9493c38e0e7d110b0db439a533f155121", "2a5d9c7afcf6b70ad83bca0c4262b66ef654415a", "5568df48a03cd16e286025c812f1912a7d1c1766", "ba40f8d4c4769daa1d831b868ab56cd338b8d40b", "fb46380c4e749e654da4c11d1391cfa35d6f0899", "42fee5b7c0d96f93172ac64bfef5a888874f3ab6", "2580328f9d3b747e2a72e6ea51cc618de2d45699", "da30f9be5550ba3f0c96eba6a2ad7de28f2efeb2", "5a2d0cb52a3ad39f47e4267331b545bce4cb022f", "5c0f77d10bc13cab68b85ff03db7da154efd7ac7", "f6fb49344735d8b135a86e49e58913babe50b8f7", "1e9f092e114393ba786cb2002b6f1b0dabe875fe", "6049062a3a73d22c914e7fa8951b3b0e5f09b309", "bcfbbe50b4b2b9b5c24b0628d31b2b03bf6cb274", "21854a5fb77a45f411865652a63663bb9ff3cde9", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "a776115d6567d38ed345c8c93fb23c7ff335cb1a", "6a810c09686de4879bac3fec5c99f0b8833098bd", "7e5c548cc3a2f9180301bc9b44cf4524aabdcbec", "8b1d8d46836a6d5eb4355315b64d85c128cbff27", "92a060d80eb7bb7f41005c7eda8884ee4de601d5", "06cfb22d7d8804257fe1a789470015c649ec71eb", "2da760f90c3d2bf6598becdde9063093f488548c", "071b9e96b5e266af39e5d08fe6571a14472c6d7d", "0558c94a094158ecd64f0d5014d3d9668054fb97", "1a26009ceaf8e482e2aaff71af3dabb2a73e0dda", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "d56257d8d0139133532b50cc45c27ed542d449d6", "28b626bd09e65ab1f711ecb1853b0fae52e384be", "75e3b00fe158ffced55b3dcf299907ff811120ab", "2680e43fff9b16200106702e0c5165685312d52d" ], "paperAbstract": "The increasingly growing data sets processed on HPC platforms raise major challenges for the underlying storage layer. A promising alternative to POSIX-IO-compliant file systems are simpler blobs (binary large objects), or object storage systems. They offer lower overhead and better performance at the cost of largely unused features such as file hierarchies or permissions. Similarly, blobs are increasingly considered for replacing distributed file systems for big data analytics or as a base for storage abstractions like key-value stores or time-series databases. This growing interest in such object storage on HPC and big data platforms raises the question: Are blobs the right level of abstraction to enable storage-based convergence between HPC and Big Data? In this paper we take a first step towards answering the question by analyzing the applicability of blobs for both platforms.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.63" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b042b03feb5f397c30713fdbe160ba27a892f61", "sources": [ "DBLP" ], "title": "Could Blobs Fuel Storage-Based Convergence Between HPC and Big Data?", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "7b0d3331717729f0b03077575ad2798e69073736": { "authors": [ { "ids": [ "3178312" ], "name": "Baris Kasikci" }, { "ids": [ "5998467" ], "name": "Weidong Cui" }, { "ids": [ "3298489" ], "name": "Xinyang Ge" }, { "ids": [ "1887661" ], "name": "Ben Niu" } ], "doi": "10.1145/3132747.3132767", "doiUrl": "https://doi.org/10.1145/3132747.3132767", "entities": [ "Commodity computing", "Concurrency (computer science)", "Concurrency control", "Failure rate", "Forward error correction", "Interleaved memory", "Lazy evaluation", "Memcached", "MySQL", "Pointer (computer programming)", "Program optimization", "Shared memory", "Software bug" ], "id": "7b0d3331717729f0b03077575ad2798e69073736", "inCitations": [], "journalName": "", "journalPages": "582-598", "journalVolume": "", "outCitations": [ "05b1011b95859cf353263e0ed2b4c87c3ab84585", "60430eadf9f9e2eab06d4bcebb26f5cc9e32fddf", "36a0e9c2be06f5ec6a89d8cc7e2793e33e0f6efb", "452b7f1eb4899fb83d6bc21a180643c4433684bb", "59ee76587fde9f3fef3b44872c58fb076165c6e5", "21161c8efa04cd2ec2e4f121fc720d7e2ffc4e38", "98ad840c92a4fe779725d2b0e5638733244bb040", "6b07dd22a109afd5e8b71b17449457b38858a870", "2194c3460ab71f3826db00b045b2ae590c753319", "41e9a9ec93157fcaf8b44a2a67a595316c815966", "03c40bad2ff5a077f5949ec211b807fa36c63cf0", "8c2b2fb1d4c44d1e1b63be4e5ef3bbb8d37dbfb5", "18ba3b150bd90bde44fcee6166c08bb069fd5ae2", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "05a618847e4f08e5bca29dff732757779722b2e0", "212cf7f493c6a46e35d799a359c2e4c5b82ea72f", "b7138e89931107fa6ff32143faea2d2f29bd40ff", "151a0887f0f202a0d93c6ff773717797860df723", "0c5c03b4ea2b9c3bcff5efac10fef337d594c417", "74714c8b171bb8f016b48bd48ed21876784de2dc", "2e292d10d668c4b4ba92f1b5272ebc82d4bc5f35", "1e4874f3443d191a4f7f3ba63a04a264bd00e364", "1e3d74387b8ccafe78b509b9a39414ab77ebe909", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "0e578433d4e8bb2a571c87a2d22816074902f009", "89bf31c076dde66c5ab4541e5b7388489bd7868a", "0d29a696d8c66d795336ab34aff0b6fb8decb06d", "585706dc56e146c8fb42228fc5cbe1de0bb0a69d", "855af0ff0a3b2837402a191304e0465b65e0a36b", "2035c8f33909ac206c4d1a3bdee611577fb2c5d1", "19892128d0d545b2a9bc8a6eff86caa282908bc4", "29c67a6407b8f0d30e5b30ab9b84851bc8e820b8", "0bc25a572260f0b726ff6681d264c5684a462c24", "7bab43eb94430943a6883acced0453a98e1e62c6", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "16acc7543d5057767c163abaadfecdcf42f2c19a", "418002dea14ec4e90be51daad3f5ee38b3c5574f", "00a65f74facc3da8b8f352c2f0f5f385b758cc0b", "35cf7f54d5648a71dae3619b8293ca23a2ee795f", "fb34f663b34a8cb09a75fe98685c003d86f32e15", "172e53475249525093594009251e7c4f60795b88", "64cfec85c8149e802eb9460b0afd1f59df325169", "114801eccb5eb0831fd1848f351a138253a42f15", "1f157f2b144528924eec46d9316bd5517352b89a", "0653e2ed9f683868cb4539eb8718551242834f6b", "357ac5cc0fada997d486e7fb585177933fac6457", "113772329678792fc2a3a8cb9322c164547f88a0", "3a33dad8e9d12835fca95deec73e841096c8bec0", "12161e5c5daa594fdab296356b3424c3bf4c8e9e", "09de90384bacfdd82e4503dc155ab6868f953eb3", "2a1791cdc6983d5a23d9d4ee68f6d70d8225696a", "49e8721bd4821eff0f147d73bea970f2de3aab8a" ], "paperAbstract": "Diagnosing concurrency bugs---the process of understanding the root causes of concurrency failures---is hard. Developers depend on reproducing concurrency bugs to diagnose them. Traditionally, systems that attempt to reproduce concurrency bugs record fine-grained thread schedules of events (e.g., shared memory accesses) that lead to failures. Recording schedules incurs high runtime performance overhead and scales poorly, making existing techniques unsuitable in production.\n In this paper, we formulate the coarse interleaving hypothesis, which states that the events leading to many concurrency bugs are coarsely interleaved. Therefore, a fine-grained and expensive recording is unnecessary for diagnosing such concurrency bugs. We test the coarse interleaving hypothesis by studying 54 bugs in 13 systems and find that it holds in all cases. In particular, the time elapsed between events leading to concurrency bugs is on average 5 orders of magnitude greater than what is used today in fine-grained recording.\n Using the coarse interleaving hypothesis, we develop Lazy Diagnosis, a hybrid dynamic-static interprocedural pointer and type analysis to diagnose the root causes of concurrency bugs. Our Lazy Diagnosis prototype, Snorlax, relies on commodity hardware to track thread interleavings at a coarse granularity. Snorlax does not require any source code changes and can diagnose complex concurrency bugs in real large-scale systems (MySQL, httpd, memcached, etc.) with full accuracy and an average runtime performance overhead of below 1%. Broadly, we believe that our findings can be used to build more efficient in-production bug detection and record/replay techniques.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/09/snorlax-sosp17.pdf", "http://doi.acm.org/10.1145/3132747.3132767" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b0d3331717729f0b03077575ad2798e69073736", "sources": [ "DBLP" ], "title": "Lazy Diagnosis of In-Production Concurrency Bugs", "venue": "SOSP", "year": 2017 }, "7b155e582fe8af284dac58ca10de41068459dceb": { "authors": [ { "ids": [ "3372542" ], "name": "Ester Livshits" }, { "ids": [ "1679226" ], "name": "Benny Kimelfeld" } ], "doi": "10.1145/3034786.3056107", "doiUrl": "https://doi.org/10.1145/3034786.3056107", "entities": [ "Algorithm", "British undergraduate degree classification", "Functional dependency", "Maximal set" ], "id": "7b155e582fe8af284dac58ca10de41068459dceb", "inCitations": [ "65a241bb7cf1599765462556a40958a2b8faedca" ], "journalName": "", "journalPages": "289-301", "journalVolume": "", "outCitations": [ "05cb693dfcad830afe6f120f30894f9ab84efe1d", "086da3000dee0f42d719407af9ef2a8f39836623", "5415182a2a36a614721bfd385310f2a4f2f2a58a", "9c8bbee60dac4ab599276815068e11f487ccb69e", "379a861b13efa6572fa747edd3af0cd457214abe", "2def02ffe888c52cbfe1a02ab143b856fdcf6033", "1d712b192f43d4edb924c779e6ca91d4106094a5", "277716adb9c012aa895c952bd537cb810fa97880", "367de07e5cdb397e8d219c9b51f0e4c6d3317998", "b410b6599005d77833f17a490ecbdc79741c9300", "6743a1fd0a1198adf8abe3fb851abf603762f404", "2b45fd0062cdd6059e2951dd355768708e6e5e38", "a960702680bbf3371a72e2a5c2d9efd2f43dc787", "c0483ad5cc94ecc6bc4adb7cf89c5ee47da12548", "34ae5e1ba049b81139868641a0dc5de1f733be13", "4229b702b33cca32ce0f13976373a4950daf5beb", "015b1cb7d086387bdbf86d5546c955edd6c0c781", "bf6af5a2d45964c7daf45a22976d6f2ffe205c16", "a7018d72473daadce315a6da5d4801c12143a4e2", "178d3dc56e69e85adc373564a440dee91d7505bb", "04326ef1c61df8504bb1005364c532a3186d8a79", "bd958d346852af7a3a153b9a762423b1c950e0b5", "a784bf35675a1af1fdc9df66c3986ea9bec77b7b", "74feb1f64b3773173e296c93d1e72b9734914c37", "17ff074ae373cb95575ce8ca579f35b4088f02d6", "3e475331fd261b578c6422c48f2fb327827f7b68", "25f9f4f10d8d1818746e57283d7b1a0fb4c44ff0", "2057029e758fd42f882a1108b8d8d269603a9439", "23745e462f946a472a8e9ba1dca209efc5fce11f", "86791725002ae6adb1e257c2a8c4acadc9d5bcce", "14e59a1b2b58ac8d3549672bf377a41a6602dd7f", "f963a360ea0e2a9f6f7acc0c2eaf0eb66bd69701", "5eda7032bc75d046390f381158ace4703e34ffb6", "1ed8636cdd2892833a367a3ffb6a43cf0a8d7d76", "823abbfa7b6cd6921da89e49c84d4ee6799daf3a", "072f30530dbec74eff91243b08587cd2df14ab77", "14c0a4d868a250abae88e19400f66690e8480f5c" ], "paperAbstract": "In the traditional sense, a subset repair of an inconsistent database refers to a consistent subset of facts (tuples) that is maximal under set containment. Preferences between pairs of facts allow to distinguish a set of preferred repairs based on relative reliability (source credibility, extraction quality, recency, etc.) of data items. Previous studies explored the problem of categoricity, where one aims to determine whether preferences suffice to repair the database unambiguously, or in other words, whether there is precisely one preferred repair. In this paper we study the ability to quantify ambiguity, by investigating two classes of problems. The first is that of counting the number of subset repairs, both preferred (under various common semantics) and traditional. We establish dichotomies in data complexity for the entire space of (sets of) functional dependencies. The second class of problems is that of enumerating (i.e., generating) the preferred repairs. We devise enumeration algorithms with efficiency guarantees on the delay between generated repairs, even for constraints represented as general conflict graphs or hypergraphs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3056107" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b155e582fe8af284dac58ca10de41068459dceb", "sources": [ "DBLP" ], "title": "Counting and Enumerating (Preferred) Database Repairs", "venue": "PODS", "year": 2017 }, "7b298aecb67246843571ee19557bff0ef80647bc": { "authors": [ { "ids": [ "1697183" ], "name": "Bing Xie" }, { "ids": [ "1962156" ], "name": "Yezhou Huang" }, { "ids": [ "1767703" ], "name": "Jeffrey S. Chase" }, { "ids": [ "32485139" ], "name": "Jong Youl Choi" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "1772467" ], "name": "Jay F. Lofstead" }, { "ids": [ "1770398" ], "name": "Sarp Oral" } ], "doi": "10.1145/3078597.3078614", "doiUrl": "https://doi.org/10.1145/3078597.3078614", "entities": [ "Fastest", "Interference (communication)", "Lustre", "Mean squared error", "Nonlinear system", "Performance prediction", "Petascale computing", "Supercomputer", "TOP500", "Titan" ], "id": "7b298aecb67246843571ee19557bff0ef80647bc", "inCitations": [ "e7ae25cedd398e91acb65f8a211a4fd524785b86" ], "journalName": "", "journalPages": "181-192", "journalVolume": "", "outCitations": [ "9119dbf004aacd27a1da3ff1be6bc05c2ee03675", "884fc7d1c8353a6ca2f0830a9f0f840a985afa7e", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "5df2fd5f6181673851e5f8d50740448d33a21edc", "265da96369ea4988aac2fd98a69e66f553cc07cf", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "02e0bc77460469aefec5bd794ee6c4efc15e6adb", "8fb16c8a2b97af7d5059df24f858e285d051c31a", "4be2b9ac4bf468bb1f0bd62ac170806fef3e93bf", "52578625d290592864e94669e0deb7278eb577f9", "3c03e217aeaf6734b5471d5f8930436e009d60af", "bc0c2db8a648b408b07b68a1d03f728214a19f2f", "aeae0567deda241a5a2aeb992f41e68089e58030", "05e0dd9ba23f99acf5537b51f3a3263d3febe6dc", "988d1a223e2ee40f2474f729ac3ac53e012d8337", "486ddb3c2fe8a74807f14d58f9704b34a97949f8", "ebc86cb634accc52cc5b895e42f4fdc1f964c964", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "0a564c5117375287c60d3a27a96003f30396f62f", "9edab79d681bae0071aa784328b0ce134d909c10", "986a6ee08281e59df422b55cd577b342ff56fa9c", "f760618157047f6399a315d4fed20967adb41c6a", "113edba34ed7221a0d98d57761b205af46eda099", "0090bfad54c63cb7db4e545c441732eeebbfd573" ], "paperAbstract": "In this paper, we develop a predictive model useful for output performance prediction of supercomputer file systems under production load. Our target environment is Titan---the 3rd fastest supercomputer in the world---and its Lustre-based multi-stage write path. We observe from Titan that although output performance is highly variable at small time scales, the mean performance is stable and consistent over typical application run times. Moreover, we find that output performance is non-linearly related to its correlated parameters due to interference and saturation on individual stages on the path. These observations enable us to build a predictive model of expected write times of output patterns and I/O configurations, using feature transformations to capture non-linear relationships. We identify the candidate features based on the structure of the Lustre/Titan write path, and use feature transformation functions to produce a model space with 135,000 candidate models. By searching for the minimal mean square error in this space we identify a good model and show that it is effective.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078614", "https://users.cs.duke.edu/~bingxie/bing-hpdc17/bing-hpdc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b298aecb67246843571ee19557bff0ef80647bc", "sources": [ "DBLP" ], "title": "Predicting Output Performance of a Petascale Supercomputer", "venue": "HPDC", "year": 2017 }, "7b5a157e8bae64a805d7c25bdb65974dcab9924e": { "authors": [ { "ids": [ "1812799" ], "name": "Dingxiong Deng" }, { "ids": [ "1773086" ], "name": "Cyrus Shahabi" }, { "ids": [ "2478322" ], "name": "Ugur Demiryurek" }, { "ids": [ "2663662" ], "name": "Linhong Zhu" } ], "doi": "10.1109/ICDM.2017.17", "doiUrl": "https://doi.org/10.1109/ICDM.2017.17", "entities": [ "Algorithm", "Cluster analysis", "Holism", "Lasso", "Matrix regularization", "Multi-task learning", "Risk management", "Sensor", "Traffic analysis", "Traffic exchange" ], "id": "7b5a157e8bae64a805d7c25bdb65974dcab9924e", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "81-90", "journalVolume": "", "outCitations": [ "1cb0c6573195aeb933e9ff663dad71d8ad1b0e13", "490c4b8d1afd46ec71cf34d2832b3237410762d1", "0dd58bd61d4d82f566db8edcc18999d53fe6bfc1", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "0479353510fc33adcc7acf69211a455b518de477", "1af0851efa40686b6d06e8678967d8140fd7bc68", "79930435480cd9a15402d60db69171413628a4f7", "0e601d009fd118cc165bfa2825c70b01940bdd9c", "8fc6facc4a77536877d04c663c1f8a7c330c0c64", "47ec3073d28b8a8713ffc41cebba57398ef723d4", "55acbcb7a7158ad4cb0d7ca0d654012a721ea45f", "833852a2305a93927408e4339d435ee6669ab0d0", "02d843e3a008e76cf6a4c23bd01023d264b05686", "7acfbd623bf203d9b075c205883c56acc3649140", "573ae3286d050281ffe4f6c973b64df171c9d5a5", "0d5619cf83cc951ced77a481b6497a558e61107f", "6efebd4cb8cb39da348e6818b94c680d3cfe198c", "44d811b066187865cce71d9acecdda8c3f9c8eee", "ccf223ab8a184f2d9f387fdb5bbd241010ee2cf6", "0f8f6c2e8c41b835407ba4b07d48944252a41400", "a6f53eddf6779b7e27e66c89cbf5c79d85905c7a", "3605b9befd5f1b53019b8edb3b3d227901e76c89", "2bbbc937de355cc2971433d5c67cd984d5472fe2", "07d1db388cd489420d40d0edb13e074d86c77dbd", "3f926aa4adfe7634d6c3350e699f20a5e38c3b52", "2c5135a0531bc5ad7dd890f018e67a40529f5bcb", "00791f2d67f9a6fdb77b669450fa9f25c275fd56", "3f9f761e697c14ae4f2bd4d5ad46db84879f118d", "d0c8bb3e73cf7a2779ec82e26afd987895df8a93", "bb27d2897e90cd73e54300ef9b5a454b72e05d93", "73c57d714521c32e7c4ab03a54c19616e159ef4c", "4fef1c8a91b87d313b6032ead69f13f6d7130520", "dd43dc772b81f14e1c0acd2e734500940376d6c4", "434013939dcb6bd1ddd6eccf404fe0646fda0251" ], "paperAbstract": "Due to the recent vast availability of transportation traffic data, major research efforts have been devoted to traffic prediction, which is useful in many applications such as urban planning, traffic management and navigations systems. Current prediction methods that independently train a model per traffic sensor cannot accurately predict traffic in every situation (e.g., rush hours, constructions and accidents) because there may not exist sufficient training samples per sensor for all situations. To address this shortcoming, our core idea is to explore the commonalities of prediction tasks across multiple sensors who behave similarly in a specific traffic situation. Instead of building a model independently per sensor, we propose a Multi-Task Learning (MTL) framework that aims to first automatically identify the traffic situations and then simultaneously build one forecasting model for similar-behaving sensors per traffic situation. The key innovation here is that instead of the straightforward application of MTL where each "task" corresponds to a sensor, we relate each MTL's "task" to a traffic situation. Specifically, we first identify these traffic situations by running clustering algorithms on all sensors' data. Subsequently, to enforce the commonalities under each identified situation, we use the group Lasso regularization in MTL to select a common set of features for the prediction tasks, and we adapt efficient FISTA algorithm with guaranteed convergence rate. We evaluated our methods with a large volume of real-world traffic sensor data; our results show that by incorporating traffic situations, our proposed MTL framework performs consistently better than naively applying MTL per sensor. Moreover, our holistic approach, under different traffic situations, outperforms all the best traffic prediction approaches for a given situation by up to 18% and 30% in short and long term predictions, respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.17", "http://www-scf.usc.edu/~dingxiod/Papers/sa_mtl_ding.pdf", "http://infolab.usc.edu/DocsDemos/ding-icdm-17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b5a157e8bae64a805d7c25bdb65974dcab9924e", "sources": [ "DBLP" ], "title": "Situation Aware Multi-task Learning for Traffic Prediction", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "7b74c0f2bbf98dd7ae7153fbb3525139f5a1a3b6": { "authors": [ { "ids": [ "3435078" ], "name": "Dylan Machovec" }, { "ids": [ "1681003" ], "name": "Sudeep Pasricha" }, { "ids": [ "1728645" ], "name": "Anthony A. Maciejewski" }, { "ids": [ "1744243" ], "name": "Howard Jay Siegel" }, { "ids": [ "1727497" ], "name": "Gregory A. Koenig" }, { "ids": [ "1731296" ], "name": "Michael Wright" }, { "ids": [ "33529455" ], "name": "Marcia Hilton" }, { "ids": [ "2789975" ], "name": "Jendra Rambharos" }, { "ids": [ "35428304" ], "name": "Thomas Naughton" }, { "ids": [ "1794961" ], "name": "Neena Imam" } ], "doi": "10.1109/IPDPSW.2017.158", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.158", "entities": [ "Heterogeneous computing", "Heuristic", "Overselling", "Preemption (computing)", "Simulation", "Supercomputer", "Utility" ], "id": "7b74c0f2bbf98dd7ae7153fbb3525139f5a1a3b6", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "54-64", "journalVolume": "", "outCitations": [ "9b6ddeb90dac8a828225bd58c9cf2f8ddc232812", "14a7aff39b4869d464b6f8ea4a93e53d41839b24", "4c050ee67e590a1f102a4b605ff7d13f4d746cc1", "b94d6bb4506dbb02244467f989b8aa1f06389988", "d0cd9b350eb0e1c21c2674727374b59a83265bdc", "1a98bf1f950d824773af27d32d3da8db4169451b", "32cb689d61f0e2d071c6a111bae8f2557662cd40", "4a1590c8b8dc28be9d0aaef56e298be67da5f07a", "c6203cd94b06fbcfac861c9bf33c0dd80fe488a3", "d51f4ab7389b96e990fa050adb99d518a7f88c39", "f17a85b6e883bd2c28b22d74968d87bf8980b6c2", "7a0906c8fac46ed86c0c89df16662145324933f6", "297938b9bc0d175a7fdaaf15a6ae530f0a9be9c0", "e99553b4cc9c88084c14ec718c5011882b253a8c", "b66f1f7ddcf058a0bd67b8c24bbdffe9f2398f5d", "03670ae248e456b67be7e435e86ddb8a9f87c242", "6a04c5b1d8dbc1319c43a0d50f652dcb70f4154f", "2ce1680064b577cccc2874ebe5362fb1b7484918", "a7c2b3f34ad9adaba912a3fdd5cffdfd99231c98", "c49a198b149497b2979bbb831331e337d23488cd", "113796a205e0b53456999761b3dcc582edde4623", "1402005ec32f2883b61df1b0f5ff6ae47c15d676", "77ffb473344575b11a4e8d94ed883466074b572b", "964ed7297596a98497495ff5cce27306ffdc61e5", "563debb9959f0e1742bb83d38f03383b611ae56d", "516d4035575368bcc8629285f72a727b085381a7" ], "paperAbstract": "We design resource management heuristics that assign serial tasks to the nodes of a heterogeneous high performance computing (HPC) system. The value of completing these tasks is modeled using monotonically decreasing utility functions that represent the time-varying importance of the task. The value of completing a task is equal to its utility function at the time of its completion. The overall performance of this system is measured using the total utility earned by all tasks during some interval of time. To maximize the performance of such a system where the preemption of tasks is possible, we have designed, analyzed, and compared a set of resource allocation heuristic techniques. We combine two utility-aware heuristics with three different preemption techniques to create six preemption-capable heuristics. We also consider the two utility-aware heuristics without preemption. We use simulation studies to evaluate this set of eight heuristics and compare them with an FCFS heuristic, which is often used in real systems, and random assignments. In general, our set of eight heuristics is able to significantly outperform the comparison heuristics, and the preemption-capable heuristics are able to significantly increase the utility earned compared to the heuristics that do not use preemption. We analyze the performance tradeoffs among the different preemption-capable heuristics under a variety of oversubscribed workload environments.", "pdfUrls": [ "http://www.engr.colostate.edu/~sudeep/wp-content/uploads/c104.pdf", "https://doi.org/10.1109/IPDPSW.2017.158" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b74c0f2bbf98dd7ae7153fbb3525139f5a1a3b6", "sources": [ "DBLP" ], "title": "Preemptive Resource Management for Dynamically Arriving Tasks in an Oversubscribed Heterogeneous Computing System", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "7b7d467755abf363753c1b386de5152190136890": { "authors": [ { "ids": [ "2623828" ], "name": "Sonia Ben Mokhtar" }, { "ids": [ "2498594" ], "name": "Antoine Boutet" }, { "ids": [ "1743906" ], "name": "Pascal Felber" }, { "ids": [ "1970140" ], "name": "Marcelo Pasin" }, { "ids": [ "14211471" ], "name": "Rafael Pires" }, { "ids": [ "2106027" ], "name": "Valerio Schiavoni" } ], "doi": "10.1145/3135974.3135987", "doiUrl": "https://doi.org/10.1145/3135974.3135987", "entities": [ "Adversary model", "Cryptography", "Dolev\u2013Yao model", "Information retrieval", "Personally identifiable information", "Privacy", "Private information retrieval", "Throughput", "Tor Messenger", "Web search engine", "Web search query" ], "id": "7b7d467755abf363753c1b386de5152190136890", "inCitations": [], "journalName": "", "journalPages": "198-208", "journalVolume": "", "outCitations": [ "c68e267df3d11bc60f6e7b405160ceeefd8bd7d3", "6871b95c14dccca7636b498b5d363a743c5288e6", "35a588f4400bad13eb6caa2c2a660172bd2aff9e", "5b2092b54860f134f78b2ec884c910750def71e6", "452c803f91ab670bf36403ed5412875b13ae9e94", "43242753ae89aca56a92fb1f65870ea0c7b7d6d7", "23864b54c1ee459fc39664ce947c0595794ea0b6", "f0569efef9069572a2958b59dbf43ba01fe2cfae", "212bb4024f25a1dab631fb5dcdff0cf7721ff6fe", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "09af9108cb5c196d5c15a6f3d26e604434203bea", "eeb5ec8d23124c4b352aa4168cb03f87f9480c92", "154e504956cbbc8ddfd2e0aa420333f81f1183c9", "66ce71277f7dd9ac21040f098c5b4be670b6a2ff", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "1b80ae882afb809686f20765e4a42a5b99aa55de", "8397c13450edf3ff11866d8c81dc14aaa47f0555", "113c9ce94472e8304ce8e21f08d0e1bbecce1e1e", "557d8b988bca3d0033189723d11102e04c0c67c0", "706051dc89d80b8a517ddc8db886b50b53a1e0eb", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "d7ca9676c8e6c534e527451463b86ec7da2d5a78", "1a0b083eb8353772bd1d57f7429cfb452fc97dcd", "32b7799ca55025ae6ebf2863e02404623f27f094", "03c5ab0f31220b29d3b8eb60637a7a8140fd87fc", "7517aee88ff9db3a27e4061fd42f0878d4865fce", "df805da2bb2a7e830b615636ee7cd22368a63563", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "6d18de28809c5220a1ac1303d93fa972ffa9dd9c", "13cc53a512ca45d229e9fe514ad0b30441d324d8", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "5938e67f7dc657a5fbff20448da5b5f07d643be3", "79b11e4d19c4f682211d69c0743ff24c981597c7" ], "paperAbstract": "The exploitation of user search queries by search engines is at the heart of their economic model. As consequence, offering private Web search functionalities is essential to the users who care about their privacy. Nowadays, there exists no satisfactory approach to enable users to access search engines in a privacy-preserving way. Existing solutions are either too costly due to the heavy use of cryptographic mechanisms (e.g., private information retrieval protocols), subject to attacks (e.g., Tor, TrackMeNot, GooPIR) or rely on weak adversarial models (e.g., PEAS). This paper introduces X-Search, a novel private Web search mechanism building on the disruptive Software Guard Extensions (SGX) proposed by Intel. We compare X-Search to its closest competitors, Tor and PEAS, using a dataset of real web search queries. Our evaluation shows that: (1) X-Search offers stronger privacy guarantees than its competitors as it operates under a stronger adversarial model; (2) it better resists state-of-the-art re-identification attacks; and (3) from the performance perspective, X-Search outperforms its competitors both in terms of latency and throughput by orders of magnitude.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135987" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b7d467755abf363753c1b386de5152190136890", "sources": [ "DBLP" ], "title": "X-search: revisiting private web search using intel SGX", "venue": "Middleware", "year": 2017 }, "7b91e84ddb8a8c3004189838014ee8a073a4ebb6": { "authors": [ { "ids": [ "3136303" ], "name": "Guojing Cong" }, { "ids": [ "3031526" ], "name": "Onkar Bhardwaj" }, { "ids": [ "2521552" ], "name": "Minwei Feng" } ], "doi": "10.1109/ICPP.2017.10", "doiUrl": "https://doi.org/10.1109/ICPP.2017.10", "entities": [ "Algorithm", "Amortized analysis", "Deep learning", "Distributed computing", "Gloss (annotation)", "Gradient", "Gradient descent", "Silent Hill: Downpour", "Sparse matrix", "Speedup", "Stochastic gradient descent" ], "id": "7b91e84ddb8a8c3004189838014ee8a073a4ebb6", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "11-20", "journalVolume": "", "outCitations": [ "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "5cd6811e6383a7329b3baeabd4647c3d92216872", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "0ec7e022a8848fb41940983addeeb76471227f30", "0b99d677883883584d9a328f6f2d54738363997a", "4b197d60de05e14781d67a318b29a4d4600a7460", "043afbd936c95d0e33c4a391365893bd4102f1a7", "0122e063ca5f0f9fb9d144d44d41421503252010", "58c45859350b7e9fc2dc6676e318e8f526073f5f", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "1ffca389d203b1cb4ac0389f85e39b0cc5babc0c", "2672ba89286367fe312f167d85a75b3fbe64b2ab", "7371a9331123bf7974626b57b5e3c7570122ad87", "43c96ccaa90b3875ce2912063b9949716f8d5824", "34f25a8704614163c4095b3ee2fc969b60de4698", "a058935fd019c2367fd32c16cd1ce6983a29aafb", "6a97adbfaeecd5c1eeb3ae9c76a3842d4858cc06", "5d90f06bb70a0a3dced62413346235c02b1aa086", "96167ed3ebc9a2c3270f6ae96043e6f086eed4de", "0760550d3830230a05191766c635cec80a676b7e", "515dab07e990cdaf0a60bb009949c8686d109750", "ba461c849065809acf7f8d48ccced77004c155ca", "0144941d255dad89d3d90c2d131a15cc01df9829" ], "paperAbstract": "Parallel and distributed processing is employed to accelerate training for many deep-learning applications with large models and inputs. As it reduces synchronization and communication overhead by tolerating stale gradient updates, asynchronous stochastic gradient descent (ASGD), derived from stochastic gradient descent (SGD), is widely used. Recent theoretical analyses show ASGD converges with linear asymptotic speedup over SGD.Oftentimes glossed over in theoretical analysis are communication overhead and practical learning rates that are critical to the performance of ASGD. After analyzing the communication performance and convergence behavior of ASGD using the Downpour algorithm as an example, we demonstrate the challenges for ASGD to achieve good practical speedup over SGD. We propose a distributed, bulk-synchronous stochastic gradient descent algorithm that allows for sparse gradient aggregation from individual learners. The communication cost is amortized explicitly by a gradient aggregation interval, and global reductions are used instead of a parameter server for gradient aggregation. We prove its convergence and show that it has superior communication performance and convergence behavior over popular ASGD implementations such as Downpour and EAMSGD for deep-learning applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7b91e84ddb8a8c3004189838014ee8a073a4ebb6", "sources": [ "DBLP" ], "title": "An Efficient, Distributed Stochastic Gradient Descent Algorithm for Deep-Learning Applications", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "7bab43eb94430943a6883acced0453a98e1e62c6": { "authors": [ { "ids": [ "38144094" ], "name": "Tong Zhang" }, { "ids": [ "39916306" ], "name": "Changhee Jung" }, { "ids": [ "2006849" ], "name": "Dongyoon Lee" } ], "doi": "10.1145/3037697.3037708", "doiUrl": "https://doi.org/10.1145/3037697.3037708", "entities": [ "Central processing unit", "Dynamic data", "Intrusion detection system", "Power Management Unit", "Race condition", "Run time (program lifecycle phase)", "Sampling (signal processing)" ], "id": "7bab43eb94430943a6883acced0453a98e1e62c6", "inCitations": [ "7b0d3331717729f0b03077575ad2798e69073736", "f9a0f90b36d6c3e6cf2d0f8b9c519fa5be8543fd", "101b5462ce25c0a6f3102577a42029ed9c5e200f", "334cb2afe4d4ab0ca9c731a12da1e3112cfea699", "e712211bf8628d16d55ec7c6f99b03309e455d4f" ], "journalName": "", "journalPages": "149-162", "journalVolume": "", "outCitations": [ "79c163d6aa3f1a14e64d4288995b0ae76d5e6b4c", "c8bbe1b7a791a21ea6c7db852d240d457243b1ac", "711b89b078ceb7722406c719a6ac1316ade61daf", "3a33dad8e9d12835fca95deec73e841096c8bec0", "5732268aea93dc3aa6b6f4a2db57a609b9714417", "89cbc439c3ba58bcb8a05ffa0ffd22791f02d860", "11bd8d0769616d29dbb7662db28cd179219b2a0a", "46d7318c84b6e34b7927a9738f4e22eb9d38541f", "dd916d401b90d848dd0c1a99d78c034e3c8bb448", "a45adba59080ad625e3005c669345c3a96ad3e18", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "ba4357cc6c13cf12849d9bfe6ab0b0b9b728c2bb", "771e3c7146213802ca8c4db0afbde51606293a71", "0881378cc281fe6d8451eedbbb73e9e157d7bf38", "8b28b02af1ba77fff5b08d6dea87ba8b043b479d", "89dce5f511b06739cbd1e2e2300775106bfe1d17", "0e578433d4e8bb2a571c87a2d22816074902f009", "9e562fa998619a05b5f2b43a884b87fab680b762", "29c67a6407b8f0d30e5b30ab9b84851bc8e820b8", "efa12d71ca43f924a6e8be04cd1df2c22355af86", "23679522a504aed5d1ace3917d723b1aabec8c71", "22050b3ee9c69c64dc796358c7f0ba247d4adce3", "5a0a225bf9260e49123dd1e7847917953a9dced9", "4136c581d46e0dd2a2b60852e00a826eae4dc969", "8b8d9dbe3e755cbbab950b6133b1cc11d8e08943", "79b6d89f92080b1ec9b8f23d7811666e15f6cabe", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "1c7e1a0bde89990a9173664d3ff6931542741226", "8c2b2fb1d4c44d1e1b63be4e5ef3bbb8d37dbfb5", "855af0ff0a3b2837402a191304e0465b65e0a36b", "84f13db8f0e50ca97fe3fe0bc391e974724d4b87", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "2035c8f33909ac206c4d1a3bdee611577fb2c5d1", "024ecd71116a7438b3eba7a97de9f428d1933ccd", "2a10aedda169c9ae4e2b6e478d47a36f43f3a489", "c28251c4824163b037092a01d19e912216e697f2", "22a713d92a7f2a79f22c71e66b2511937b2a1a8f", "14064a1095bcfa1b8e8b43a384a1d815a8ac5bbc", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "cb0da1ed189087c9ba716cc5c99c75b52430ec06", "114801eccb5eb0831fd1848f351a138253a42f15", "0a92088c1cf7463ed5d347d2624976e0126ffced", "0653e2ed9f683868cb4539eb8718551242834f6b", "6a5b52bbe5be23b73f3874c448de17163e09bd16", "15559a9c70af038b56f2576f7233bba2b55fbc5a", "4979b94ae5ca344ac4a7c30e86a4ff10e5ca13b0", "08d8c62df23a5f6f8b79cc3639cc179938a48ba4", "737b778da405a9a48412ebfccff867fc054f0a28", "0a44e8cd34a110ec4ed7221b0431694172eadda8", "151a0887f0f202a0d93c6ff773717797860df723", "849821f391234389c232d22940d31d724fd4e4a5" ], "paperAbstract": "This paper presents ProRace, a dynamic data race detector practical for production runs. It is lightweight, but still offers high race detection capability. To track memory accesses, ProRace leverages instruction sampling using the performance monitoring unit (PMU) in commodity processors. Our PMU driver enables ProRace to sample more memory accesses at a lower cost compared to the state-of-the-art Linux driver. Moreover, ProRace uses PMU-provided execution contexts including register states and program path, and reconstructs unsampled memory accesses offline. This technique allows \\ProRace to overcome inherent limitations of sampling and improve the detection coverage by performing data race detection on the trace with not only sampled but also reconstructed memory accesses. Experiments using racy production software including apache and mysql shows that, with a reasonable offline cost, ProRace incurs only 2.6% overhead at runtime with 27.5% detection probability with a sampling period of 10,000.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037708", "http://people.cs.vt.edu/~dongyoon/papers/ASPLOS-17-ProRace.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7bab43eb94430943a6883acced0453a98e1e62c6", "sources": [ "DBLP" ], "title": "ProRace: Practical Data Race Detection for Production Use", "venue": "ASPLOS", "year": 2017 }, "7bb8c35f65a21a714cd9fde30149abcc562c77fe": { "authors": [ { "ids": [ "1887403" ], "name": "Humayun Kabir" }, { "ids": [ "2421239" ], "name": "Kamesh Madduri" } ], "doi": "10.1109/HiPC.2017.00012", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00012", "entities": [ "Algorithm", "Clique (graph theory)", "Degeneracy (graph theory)", "Dense subgraph", "OpenMP", "Parallel algorithm", "Relaxation (approximation)", "Shared memory", "Sparse matrix" ], "id": "7bb8c35f65a21a714cd9fde30149abcc562c77fe", "inCitations": [ "c4e4852b60ea151ec38e0edc75a69ad1e846a44a", "4bc90e9257d7d2eb3154b95231d8a65f5ec7cc8b" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "13-22", "journalVolume": "", "outCitations": [ "a15b91e5197ebe643ced790f8de5e8519a7ceba0", "c4e4852b60ea151ec38e0edc75a69ad1e846a44a", "348cd08a60d29968e319f4ee80291527d5ead903", "f98348cfecc440ebe49d762e0ce1fe570d29b4a4", "17ae4c0e57e868e2648781023a143d29d348f0a6", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "8c671a8bb36514ea82d0fc782553b6a1adb1fb5f", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "9e667140817d961ef48d0269a8a6f3281e9824e3", "be86376aa17040cb82090d250f53782b05fd2486", "2dc4a4694c1e7a479ff955cc11fdc4a56b0600e4", "fe69891ccbca5e18587dd0e0c0b48b836ae38848", "74a45374231073dc0cc8fd91e9c9181673126fb7", "c2d13137c73cbc4795ed7dc5be43ef19fe464b12", "04fe2b1ba2cf79caef3ad683bc8b05d46be8bd71", "1f33001bbc25d00cbcf48f3a210d64eaab9296c5", "5b5b1e5bcc2ed0ab7cce68919ee3ab948422d093", "7805de482edfbec3a736bb6b3d1bb5163435752d", "2c67c63ce0c972bddc15dd26ace3f04905be51bc", "10924940cff0dcfc16c18fde9e4e0cd034bac55c", "35e3d06f62d784be6d0161e507d215098bf5620d", "9cf785889f13260a791f1106fd7b16f1390002f1", "3c4194f25bda9d2ebdea8d91e8d7c13a5f8b485a", "72b2e9bd1d780106797767e82133a69e1a8e8a78", "00a57850e14320bb41d58696cc409151466b98b2", "af5e63ef8591010d509e0eaf826422523a810cd9", "83f6c4cf7f8a05dc25811d72a6277ed2a0067bdf", "1156f60e40548096df49528b1342bb3e88b0f378", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167", "4af605a89bd4e52565b2c99c9fa290f39b7880f8", "18b4e5d8e497bcb1049547300dd2e1053dd3e5e1", "cd8cc4bff28cf6ba9d7fa584e348b37db93ede76", "5623dec3a4fe0e6c45f3422d1840bc463cbad3c9", "edefa6a9dee97598b66bdf131aeb48ffa5ce2dd5", "4bc90e9257d7d2eb3154b95231d8a65f5ec7cc8b", "29db10ae32a3728821335e6a2b9e43cb374dbc03" ], "paperAbstract": "We present PKT, a new shared-memory parallel algorithm and OpenMP implementation for the truss decomposition of large sparse graphs. A k-truss is a dense subgraph definition that can be considered a relaxation of a clique. Truss decomposition refers to a partitioning of all the edges in the graph based on their k-truss membership. The truss decomposition of a graph has many applications. We show that our new approach PKT consistently outperforms other truss decomposition approaches for a collection of large sparse graphs and on a 24-core shared-memory server. PKT is based on a recently proposed algorithm for k-core decomposition.", "pdfUrls": [ "http://www.cse.psu.edu/~kxm85/papers/PKT_HiPC17_slides.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00012", "http://arxiv.org/abs/1707.02000", "https://arxiv.org/pdf/1707.02000v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7bb8c35f65a21a714cd9fde30149abcc562c77fe", "sources": [ "DBLP" ], "title": "Shared-Memory Graph Truss Decomposition", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "7bbd1c3872e679cf4ea8ec2e57dc17506af8c291": { "authors": [ { "ids": [ "1728624" ], "name": "Wei Chen" }, { "ids": [ "1786877" ], "name": "Jia Rao" }, { "ids": [ "1718639" ], "name": "Xiaobo Zhou" } ], "doi": "10.1109/IPDPS.2017.28", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.28", "entities": [ "Apache Hadoop", "Data redundancy", "Jumpstart Our Business Startups Act", "Load balancing (computing)", "MapReduce", "Name binding" ], "id": "7bbd1c3872e679cf4ea8ec2e57dc17506af8c291", "inCitations": [ "98ac6ab21def7f6e1b80c0be62ccfc0b594bfc57" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1078-1087", "journalVolume": "", "outCitations": [ "257adee470c54280da48d448a064b35537d51fbd", "3d658c5c758b2a567c7d9150759b2b6d6ddda50b", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "0ea4380ff8bb30e6bd5fd888268d6f8f38229fb7", "a2bf3581cf421211c4cafdcb295610b7aa7e8991", "0a12a179bebdf4bb69d692a1127795b3f536270b", "c737aa8b2c916fe1f13a6fd4e847fa45da1e5434", "67a16b2945c33eabd17eb314c58c8e3eb7d2334d", "52bfb3aa30ec06784d839ab431287a657d0d7907", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "78f246756811e924825a03909952d2c32c593a52", "0541d5338adc48276b3b8cd3a141d799e2d40150", "443b8c56d7300f61b825d1dbafe06afdda23c3e1", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "70bd563d00fcb402eb7d9f251bea544ecb08f213", "ae24289a0ed3152de528f863c96279382b14ae61", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "d65f897b7cea2761f88411e757e9587c0282cb41", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "1f9d47906319d0a8fac5c5fdbadf98e9da7966f9", "0867e38682f5cdbcfb53588ab1315f4cc8595582" ], "paperAbstract": "MapReduce applications, which require access to a large number of computing nodes, are commonly deployed in heterogeneous environments. The performance discrepancy between individual nodes in a heterogeneous cluster present significant challenges to attain good performance in MapReduce jobs. MapReduce implementations designed and optimized for homogeneous environments perform poorly on heterogeneous clusters. We attribute suboptimal performance in heterogeneous clusters to significant load imbalance between map tasks. We identify two MapReduce designs that hinder load balancing: (1) static binding between mappers and their data makes it difficult to exploit data redundancy for load balancing; (2) uniform map sizes is not optimal for nodes with heterogeneous performance. To address these issues, we propose FlexMap, a user-transparent approach that dynamically provisions map tasks to match distinct machine capacity in heterogeneous environments. We implemented FlexMap in Hadoop-2.6.0. Experimental results show that it reduces job completion time by as much as 40% compared to stock Hadoop and 30% to SkewTune.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7bbd1c3872e679cf4ea8ec2e57dc17506af8c291", "sources": [ "DBLP" ], "title": "Addressing Performance Heterogeneity in MapReduce Clusters with Elastic Tasks", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "7c036d5a4b79a735b279423358af4e8df6f7ec81": { "authors": [ { "ids": [ "40313606" ], "name": "Akbar Sharifi" }, { "ids": [ "1738893" ], "name": "Wei Ding" }, { "ids": [ "34933560" ], "name": "Diana Guttman" }, { "ids": [ "38708956" ], "name": "Hui Zhao" }, { "ids": [ "8573809" ], "name": "Xulong Tang" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "8948708" ], "name": "Chita R. Das" } ], "doi": "10.1109/MASCOTS.2017.16", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.16", "entities": [ "CAS latency", "Computer cluster", "Computer data storage", "Data center", "Dynamic voltage scaling", "Modulation", "Multi-core processor", "Operating system", "Server (computing)" ], "id": "7c036d5a4b79a735b279423358af4e8df6f7ec81", "inCitations": [ "2d1b2392585b09297dd79a14ca3fb853133d64e3", "884e104c13102e1353e85a6a91e41d3cff2c80f5" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "210-220", "journalVolume": "", "outCitations": [ "5f40b3daa73881e3fc006d17c28736014620a006", "77f826132cf09ac91ea9c859387a8d52221a019a", "31c299532c42106b71e909c2fc0fc7472c39ce90", "4ebbbeab6e0f4ba9815889854441548fa414e16b", "ef22b2c93c5c720a2b010f1280db8f8c7114c287", "938574649516c7690ce05891ef499760b9a0553b", "3b34d78a7716d6048731bc034a162eede6078038", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "367d34d830482b349c73f373717a079d335c03e5", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "8814ba7515481fe16afc79cb41b05d4aa58f2df2", "40138cbd57a4632d6267cff4c91b55e7376a6693", "53356bd1d40e9c9aeaeb352f0f74ad83bb1650eb", "8fb808a890a099896e34851179daba15659df11a", "69743194ca177ef816d31a99475c3ba3ff97808c", "370baef5b5f9e2933a195bc025c93feb02baf494", "a3eb0826dd5d88669d506c0cbfb0f3dc90937fed", "2608db8056e1598cf0b0bce8c2e305c3735a7bbe", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "1ae3f4cdaaf12ddc6f7bf1a24588af58c54e7930", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "2644b8562292e2492459db3ed214d3330ef7712d", "bce002b1fa120ab391345be98c747ce41aa32cd6", "01ab8a70840cefc0c5e545bc76f4b5195bb37333", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "89d4dcc25809693fa3505d09b2721c1c2c2559b2", "8a81748e4e9476575a2a7c9e353d4f8f133cd786", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "1de7a8de961624bfd482744c6be24fb15ae14776", "061a6b94ebecd2a8af8b901d41f15700543b2bd9", "157d5b2488d953b7c88abc36791c2e897c152395", "87a34f805b3316ac75c6b3110d36a4bc576ac063", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "054e4a4132a3463e820c548035d9abf73ffbda57", "26e72340c47b7348e1b1de285f89dd96cc925b27", "f5b5fccb492754e021bad1b6fc850946595c8d68", "ad702abe479b86ed043f83d6475a2e82c4718c9a", "9a000edf8d478fa3b0d7f74fb966664da5d33354", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "0def751f534a8b2d012137e477238eab3a36c79b" ], "paperAbstract": "Since main memory system contributes to a large and increasing fraction of server/datacenter energy consumption, there have been several efforts to reduce its power and energy consumption. DVFS schemes have been used to reduce the memory power, but they come with a performance penalty. In this work, we propose DEMM, an OS-based, high performance DVFS mechanism that reduces memory power by dynamically scaling individual memory channel frequencies/voltages. Our strategy also involves clustering the running applications based on their sensitivities to memory latency, and assigning memory channels to the application clusters. We introduce a new metric called Discrete Misses per Kilo Cycle (DMPKC) to capture the performance sensitivities of the applications to memory frequency modulation. DEMM allows us to save power in the memory system with negligible impact on performance. We demonstrate around 25% savings in the memory system energy and 10% savings in the total system energy, with only a 4% loss in workload performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.16", "http://xzt102.github.io/publications/2017_MASCOTS_Xulong.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c036d5a4b79a735b279423358af4e8df6f7ec81", "sources": [ "DBLP" ], "title": "DEMM: A Dynamic Energy-Saving Mechanism for Multicore Memories", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "7c11b349296003d6406c10c96aa223cfa8f5f542": { "authors": [ { "ids": [ "39833854" ], "name": "Marco Serafini" }, { "ids": [ "35168570" ], "name": "Gianmarco De Francisci Morales" }, { "ids": [ "1737053" ], "name": "Georgos Siganos" } ], "doi": "10.1145/3127479.3131625", "doiUrl": "https://doi.org/10.1145/3127479.3131625", "entities": [ "Algorithm", "Computer data storage", "Distributed computing", "Distributed web crawling", "Embarrassingly parallel", "Graph traversal", "Load balancing (computing)", "MapReduce", "Message Passing Interface", "SPARK", "Search algorithm", "Subgraph isomorphism problem" ], "id": "7c11b349296003d6406c10c96aa223cfa8f5f542", "inCitations": [], "journalName": "", "journalPages": "214-228", "journalVolume": "", "outCitations": [ "6713ee6b1c9ba14e525f12958898e99eeb3003bc", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "17a36f29960ed525a46837c15b6e056c7965b1d0", "09147715a90046f31bf713e033f22a7a84484711", "aaf7d70839488859a90f623957dcfc2fdbe836f1", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "51d959bcdb280ead8363bc814455536a401196a8", "178ff3ab1afcd6fb348a9805babe0a5c814be5af", "8cca529e651867e5ac2a30ceca4e661ef0900ef7", "f2d4b6b2048279f5c56e8febfd3473fcbf294710", "230239fb61d7a6996ac9552706363323b34735f2", "c25b5ebd34f111db504907b8fb0bac001c28a6db", "007758ab121a7e03181da56f135ea15ea7fb7576", "743376fa0ba57f5f9a46ca45e0315bb264833113", "2a88a3cc678ae1229ce894070bb22bd0df5ac3ab", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "87507a498558ed6ed23115a42f42376c0884f7f2", "8d7e87e4d027aaf794835a8137364f56bd378a59", "a27c3f0a249dc122104b937c5783f83b3585bb53", "7df587e7efe8f35c3232b7729d9fbf25f5227b05", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "36e59e71a19fba9f61012e8653a9eee884eac93c", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0d06de003e8ca949b3b39f9a51750c050addb997", "b02aba65fb2792cee4fb0cbae12858195f383764", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "011f7f9ba9e6f9bc7f05994271725bc0fc9c3b94", "9447b4502eed007a117e4ba87278407ca3d7b354", "c2459e76da560a69cb90c7077e3bcee1701bb7f3", "d9cf136153bd4a941eee95174c9d2c6ffa5a6688", "a571746b0ee622caa7d69b46202d251de93f35e9", "744eacc689e1be16de6ca1f386ea3088abacad49", "1558a06fb4f0473f76792e830b0b07c79f7decc0", "26deee037b221bd05ed34461819f5c067b745445", "25dcb849cb146a2afc51ad092fba70570bd4de42", "6c3c8229760e57e381aece8176943a997d4a9953", "3a9564179380cb03a01abbc30e4c7ed4d80efb32", "63115442310908b876aa1e81d877813ebee8b247", "5d3158674e1a0fedf69299a905151949fb8b01a5", "26b4bb74dc87eefeba587fe2aceda0789174e476", "2c688c40374fee862e0f0038696f2951f1927337" ], "paperAbstract": "This paper introduces QFrag, a distributed system for graph search on top of bulk synchronous processing (BSP) systems such as MapReduce and Spark. Searching for patterns in graphs is an important and computationally complex problem. Most current distributed search systems scale to graphs that do not fit in main memory by partitioning the input graph. For analytical queries, however, this approach entails running expensive distributed joins on large intermediate data.\n In this paper we explore an alternative approach: replicating the input graph and running independent parallel instances of a sequential graph search algorithm. In principle, this approach leads us to an embarrassingly parallel problem, since workers can complete their tasks in parallel without coordination. However, the skew present in natural graphs makes this problem a deceitfully parallel one, i.e., an embarrassingly parallel problem with poor load balancing. We therefore introduce a task fragmentation technique that avoids stragglers but at the same time minimizes coordination. Our evaluation shows that QFrag outperforms BSP-based systems by orders of magnitude, and performs similar to asynchronous MPI-based systems on simple queries. Furthermore, it is able to run computationally complex analytical queries that other systems are unable to handle.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3131625" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c11b349296003d6406c10c96aa223cfa8f5f542", "sources": [ "DBLP" ], "title": "QFrag: distributed graph search via subgraph isomorphism", "venue": "SoCC", "year": 2017 }, "7c147d62562f06142141fd07af1708979d06c580": { "authors": [ { "ids": [ "34929522" ], "name": "Isuru Dilanka Fernando" }, { "ids": [ "1971912" ], "name": "Sanath Jayasena" }, { "ids": [ "23546490" ], "name": "Milinda Fernando" }, { "ids": [ "2666202" ], "name": "Hari Sundar" } ], "doi": "10.1109/ICPP.2017.60", "doiUrl": "https://doi.org/10.1109/ICPP.2017.60", "entities": [ "Algorithm", "CUDA", "Computation", "Distributed memory", "Graphics processing unit", "IP Multimedia Subsystem", "Linear system", "Matrix multiplication", "Message Passing Interface", "Numerical analysis", "Parallel computing", "Scalability", "Semi-supervised learning", "Sparse matrix" ], "id": "7c147d62562f06142141fd07af1708979d06c580", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "513-522", "journalVolume": "", "outCitations": [ "6a110a740a2255f13bf27d199c6aa3a1789b9113", "d9b4d4ffa5bf9e9e376b0bdeba19d4090d653feb", "a3b4133fb1a65f35b9b7950da9786d23fe5723b4", "c0a29917ec3f8c497cbe99607703b6bec7c335e8", "65b7c62555d2c3f5763651bff19ec6bf040ef5db", "a4f8cb111b904e9b28268fcdc505d3413bb56d9c", "445c01f4ecdf8559d150bb1aa656e800306e04a3", "086dbe1d8574b35d5b33ca9e70e76890517262b4", "0c6ab1d7631aec1faf4493352293bf19d06ee40f", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "829261562b8c26114153a972fb38f15615aa1aea", "47934a9f73042acef86daa1f5f71459d5929bc48", "688384fc5e643445e835435e96b9dfcfb6598d36", "21465b9dc1450989df35b1c66a0499f5bc433a97", "77eee519041bcb7281d8bc544ae69b8af386b6cc", "1c6477bc1b1c7b3767624be6d286d382ce05c211", "ebc09b04a900afc6c3cf53a4b7ff6035f33f02b2", "cb13d6831bbf5a40c7caf6edf5fead628c076b4b" ], "paperAbstract": "We present a scalable distributed memory library for generating and computations involving structured dense matrices, such as those produced by boundary integral equation formulations. Such matrices are dense, but have special structure that can be exploited to obtain efficient storage and matrix-vector product evaluations and consequently the fast solution of linear systems. At the core of the methods we use is the observation that off-diagonal matrix blocks of such matrices have a low numerical rank, and that this property can be exploited in a multi-level fashion. In this work we focus on the Hierarchically Semi-Separable (HSS) representation. We present algorithms for building and using HSS representations that are parallelized using MPI and CUDA to leverage state-of-the-art heterogeneous clusters. The efficiency of our methods and implementation is demonstrated on large dense matrices obtained from a boundary integral equation formulation of the Laplace equation with Dirichlet boundary conditions. We demonstrate excellent (linear) scalability on up to 128 GPUs on 128 nodes. Our codes will lay the foundation for fast direct solvers for elliptic problems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c147d62562f06142141fd07af1708979d06c580", "sources": [ "DBLP" ], "title": "A Scalable Hierarchical Semi-Separable Library for Heterogeneous Clusters", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "7c2436a08942fc28d5c8ae58d692b515f03ec0fc": { "authors": [ { "ids": [ "2959404" ], "name": "Kiron Lebeck" }, { "ids": [ "19225377" ], "name": "Kimberly Ruth" }, { "ids": [ "1769675" ], "name": "Tadayoshi Kohno" }, { "ids": [ "3268360" ], "name": "Franziska Roesner" } ], "doi": "10.1109/SP.2017.13", "doiUrl": "https://doi.org/10.1109/SP.2017.13", "entities": [ "Augmented reality", "Experience", "Head-mounted display", "Microsoft HoloLens", "Stemming" ], "id": "7c2436a08942fc28d5c8ae58d692b515f03ec0fc", "inCitations": [ "62da278aab472d41561c6d4bda0200c3368960fd", "6c8b210cf0db3da132bb07bda07323f7c0270ad5", "8f47acc4306ebbd8bc4f34d03eb10e0fcef0c46a", "f65827989a5c13f9fccdcc746799063b24eb11c3", "8ff0d2097be6eb4511e19e2023741c7d3fc8d380", "db23293bb40b38c17afeb3d5dd69eb44fb851326", "39c746f8f73b6682055259bb6d87bfe9a7db4f7a" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "320-337", "journalVolume": "", "outCitations": [ "419619225a73b115be0834b27f8e88ae38e0e451", "79ce55cca0ace7bcadec5e777a9b4fec2aecf007", "2c347993f71c77e8bbb6c40e5caa6ba2f6a7097f", "7f681c3cfad622388f955be7ad6b00a05eeadd2c", "ac0d88ca5f75a4a80da90365c28fa26f1a26d4c4", "36367dc5f5f84fbfdeff7695ff454a0611ca011f", "8e1993d6feca285bfbc2f6a1d209e4146f88098d", "6dd21b1a93311e853858b48517e4ccbbad756fdb", "cce7600bdabc13dfc7c88e5e24b7fa94c6740653", "2c0c5c40f98d9b645549f235a680be5b729ebe48", "1e98c65ab6821d01ede567b9d9e766ad357b3702", "1423b4a807f7c0921e33075f016e93b3e8a1dba2", "6189792ae830a39f99b5390ad2d2af7ac571d31d", "cabcc02dafb581b404934597a61a40cc51038552", "2a3f581da9c91c57f4bed1cfecbaaa4499f5c023", "013a2d879fcc30e581b9f503dcc03360afbb6b15", "6bf63a7c3aa05b3b651640b7e23d3821cabcc2e8", "7aea059e3ae71d990f92b976c533de85d1681e23", "2405bc04bd3e4532e205a683d7425c675dca1436", "48b7b474af1e86ee6e9db66972155c10cbbdace6", "8c9c760b2079de26cf2aea31d128ff9054a3e6b1", "35867081685ff40ea0b245d315b2d54e42235b69", "a27e903267e25959a10bdeac496e651bd1a0ddc3", "1dfc5ed6e7ea8771643c996c10fcaf03558570da", "614e8fb15a682caf18b7dba0fc018c65c48de8cf" ], "paperAbstract": "Augmented reality (AR) technologies, such as Microsoft's HoloLens head-mounted display and AR-enabled car windshields, are rapidly emerging. AR applications provide users with immersive virtual experiences by capturing input from a user's surroundings and overlaying virtual output on the user's perception of the real world. These applications enable users to interact with and perceive virtual content in fundamentally new ways. However, the immersive nature of AR applications raises serious security and privacy concerns. Prior work has focused primarily on input privacy risks stemming from applications with unrestricted access to sensor data. However, the risks associated with malicious or buggy AR output remain largely unexplored. For example, an AR windshield application could intentionally or accidentally obscure oncoming vehicles or safety-critical output of other AR applications. In this work, we address the fundamental challenge of securing AR output in the face of malicious or buggy applications. We design, prototype, and evaluate Arya, an AR platform that controls application output according to policies specified in a constrained yet expressive policy framework. In doing so, we identify and overcome numerous challenges in securing AR output.", "pdfUrls": [ "http://homes.cs.washington.edu/~kklebeck/lebeck-sp17.pdf", "https://doi.org/10.1109/SP.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c2436a08942fc28d5c8ae58d692b515f03ec0fc", "sources": [ "DBLP" ], "title": "Securing Augmented Reality Output", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "7c3c5b282948121244d330651e36b05f31c382cb": { "authors": [ { "ids": [ "34854131" ], "name": "Shaden Smith" }, { "ids": [ "1681616" ], "name": "George Karypis" } ], "doi": "10.1007/978-3-319-64203-1_47", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_47", "entities": [ "Algorithm", "Compressed data structure", "Computation", "Computational resource", "Data compression", "Data structure", "Iteration", "Memoization", "Parallel algorithm", "Singular value decomposition", "Sparse matrix", "Speedup", "Synthetic data", "Tucker decomposition" ], "id": "7c3c5b282948121244d330651e36b05f31c382cb", "inCitations": [ "0f0bcf003e7de278514dff084487873762b9ffb3", "29fce39d7c39ac71d11aa78f9f49ef2132cb8c8f" ], "journalName": "", "journalPages": "653-668", "journalVolume": "", "outCitations": [ "255aeb5c2a8eea15db08c617481ddbb35a41bfe4", "231f97057e1efed073c20ccdf3aa3c5aaf063ffb", "53132a1619b13215bcd791cd6b850ff154f4f837", "1de6ac748387859f43bc15e15ff5380df05bae34", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "2d03baec8ac1568e6813aa43d625d552524f977e", "62dd02837c65b9c90de8d80c493f23ce1116cb3d", "5a3c8589d63fcee5dd40ef43aea6ef38e2fda9a8", "53c156d2d0e9446b8d8ea1457215e1a608bb63c6", "38b389580d774ce513284e671ff3bbcef0258de2", "669508257d4621864011252d0423047f98d9329c", "008a6e4b2763736d2c6363ee6b546b09c0022e53", "158415ae990b78023a3e1af851c418a94da91325", "280bbaa66095fd6f89999003b802700935fdf77c", "ac0a0828c17c040c065a9285264094ba2560497d", "1322c225b4e05dc22bbff7c5b9f5464f3cb7754b", "31af4b8793e93fd35e89569ccd663ae8777f0072", "338d08904c1372fa7e2a9fa88124c04ce0ef138b", "8526f7d58b58294521636d4709a08272e6f1f3c8", "f6a30c63ff1f5ce2c3361627e6b0d47d90908f96", "44ccdebc83766fb6a2016fa58c3c3a337356b79b", "0a5aef2da6166c9b26ecc0a421f6bb5fd586ff97", "53a225f2843e8544ca9c615ecfcc5fad26083e49", "1ac425def5f0de754c0a738cc8a528eaf9ab3381", "274bc40268671fa3fff54d2ef89454b13fb026da" ], "paperAbstract": "The Tucker decomposition is a higher-order analogue of the singular value decomposition and is a popular method of performing analysis on multi-way data (tensors). Computing the Tucker decomposition of a sparse tensor is demanding in terms of both memory and computational resources. The primary kernel of the factorization is a chain of tensor-matrix multiplications (TTMc). State-of-the-art algorithms accelerate the underlying computations by trading off memory to memoize the intermediate results of TTMc in order to reuse them across iterations. We present an algorithm based on a compressed data structure for sparse tensors and show that many computational redundancies during TTMc can be identified and pruned without the memory overheads of memoization. In addition, our algorithm can further reduce the number of operations by exploiting an additional amount of user-specified memory. We evaluate our algorithm on a collection of real-world and synthetic datasets and demonstrate up to 20.7\u00d7 speedup while using 28.5\u00d7 less memory than the state-of-the-art parallel algorithm.", "pdfUrls": [ "http://glaros.dtc.umn.edu/gkhome/fetch/papers/smith2017tucker.pdf", "https://doi.org/10.1007/978-3-319-64203-1_47", "https://www.cs.umn.edu/sites/cs.umn.edu/files/tech_reports/17-010.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f379/ac269ab07eaf965c5c55ca341ecf28857091.pdf", "s2Url": "https://semanticscholar.org/paper/7c3c5b282948121244d330651e36b05f31c382cb", "sources": [ "DBLP" ], "title": "Accelerating the Tucker Decomposition with Compressed Sparse Tensors", "venue": "Euro-Par", "year": 2017 }, "7c4fff12ca7b14edadb44d16a396c7ffff66bcf7": { "authors": [ { "ids": [ "1815209" ], "name": "Guoming Zhang" }, { "ids": [ "1752301" ], "name": "Chen Yan" }, { "ids": [ "2384727" ], "name": "Xiaoyu Ji" }, { "ids": [ "5640972" ], "name": "Tianchen Zhang" }, { "ids": [ "24037472" ], "name": "Taimin Zhang" }, { "ids": [ "39533577" ], "name": "Wenyuan Xu" } ], "doi": "10.1145/3133956.3134052", "doiUrl": "https://doi.org/10.1145/3133956.3134052", "entities": [ "Airplane mode", "Cortana (Halo)", "Google Now", "Huawei E220", "Human\u2013computer interaction", "Microphone", "Modulation", "Nonlinear system", "S-Voice", "Siri", "Speech recognition", "Synchronous optical networking" ], "id": "7c4fff12ca7b14edadb44d16a396c7ffff66bcf7", "inCitations": [ "170839d31da1f85b66bd08e055213bac126b2a22", "cb85f14d3c9685cad65c95087f8e8f505eab24a7", "b618c47ea67d36ac48e2a19bdb093f130a284317", "1180a22488c285ada5b05d67a7fa7dd6b70b03a5", "9123089825cad35ab5a9bc45452d67fb722be529", "679f1322ae8c37bb67ca6bdea2ac6ac91f290d70", "911c8a8ad9bdebb845176bb0d5af44879c5ef9d1", "01b387cb20e492d993dbb3a5ca8e0404a6f2bd0e", "64900c18432091b6a7239cfb4922f070a446b80a", "8e4808e71c9b9f852dc9558d7ef41566639137f3", "5ce1cdd95b3977e66a5c22fb6cab577a8a65597d", "bd6a623fba9de19eab105df3651ec79c474844ae" ], "journalName": "", "journalPages": "103-117", "journalVolume": "", "outCitations": [ "0dfc79f36528411a9f1350545ca6f1754b18dfc7", "0fa16e7eb8679e1f4233cd75264e0ccf0746f19f", "b1e7d0bf688dddac7b00d49e1dda8778f5514109", "2b5544c666b7d593422c3daea54a7844feb858fc", "6603030e6032d008af8377233a4c9a11aa42a247", "54a8251af7c57d7c62cca3ccbc2e0c2d7295a0b4", "9e6d681b0ec6640956d2cc3ac3d8c092f5adf9a5", "e1267dffc8b4c090e3c8283c1f6ab1e20444c09b", "6b3a004de158c8c1af6d010ac64489d4929d2346", "05d3774554e6a957df0c7ae1f78173f427385c74", "b0b6ecd1f7220392b35c356a1c616b7d785a42c6", "238b3cd3d89ab169f97c2a5bc6c0c9cc5d0965b8", "190c25f9f564b4703dc362b21c7fe3996a098ef4", "3439454a00ef811b3a244f2b0ce770e80f7bc3b6", "d87041ae6d2e5c91b9eee0c2418a66aed094012f", "433a6b4c8c3b121c7522bb84fbd550799f00e62b", "a105fdd213d8089d1b23acdbabfbe330208be468", "c57be0b654d6ca624dd08aa49842db5cb35b127d", "182417ab0197edb519254c8f35e4f70020e2a49c", "cfecc6fd703de0ec72399aedd181c2bae6db6329", "4e751b8493030dbbcf7d313cafb829130f95d714", "9545077902d9ea3444894b43b6d14ff16f5dc06c", "26e6b1675e081a514f4fdc0352d6cb211ba6d9c8", "738720c65a0e740e9ab93a8bb0b05753668aa516", "4c7b8cb31a9be2b168c5882f022120f97140ce3e", "03526d32b88b798c916cb62264b073ae44163c36", "46e79dc282b5c42e9e8e78c956c3bb0b9768b451", "1ba2eb7f01b3917755cdc3047d1ba7c29a95cc6e", "22410d40cc64428cbcd1028bf962dc41eb8a4ea8", "03967dc536786d93b1c5a17d9f04a39c46532094", "d6fffb3c44e2ab0c5ba08788ba40e49954046096", "7095ea5eb985d8a06f3d9aa65698c2b916313064" ], "paperAbstract": "Speech recognition (SR) systems such as Siri or Google Now have become an increasingly popular human-computer interaction method, and have turned various systems into voice controllable systems (VCS). Prior work on attacking VCS shows that the hidden voice commands that are incomprehensible to people can control the systems. Hidden voice commands, though \"hidden\", are nonetheless audible. In this work, we design a totally inaudible attack, DolphinAttack, that modulates voice commands on ultrasonic carriers (e.g., f > 20 kHz) to achieve inaudibility. By leveraging the nonlinearity of the microphone circuits, the modulated low-frequency audio commands can be successfully demodulated, recovered, and more importantly interpreted by the speech recognition systems. We validated DolphinAttack on popular speech recognition systems, including Siri, Google Now, Samsung S Voice, Huawei HiVoice, Cortana and Alexa. By injecting a sequence of inaudible voice commands, we show a few proof-of-concept attacks, which include activating Siri to initiate a FaceTime call on iPhone, activating Google Now to switch the phone to the airplane mode, and even manipulating the navigation system in an Audi automobile. We propose hardware and software defense solutions, and suggest to re-design voice controllable systems to be resilient to inaudible voice command attacks.", "pdfUrls": [ "http://www.usslab.org/papers/CCS2017_DolphinAttack_CameraReady.pdf", "https://acmccs.github.io/papers/p103-zhangAemb.pdf", "http://doi.acm.org/10.1145/3133956.3134052", "https://endchan.xyz/.media/50cf379143925a3926298f881d3c19ab-applicationpdf.pdf", "http://arxiv.org/abs/1708.09537", "https://arxiv.org/pdf/1708.09537v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c4fff12ca7b14edadb44d16a396c7ffff66bcf7", "sources": [ "DBLP" ], "title": "DolphinAttack: Inaudible Voice Commands", "venue": "CCS", "year": 2017 }, "7c60a110d0a07d00027180f2fa6bf7ffa16acd00": { "authors": [ { "ids": [ "2704719" ], "name": "Philippe Tillet" }, { "ids": [ "2332855" ], "name": "David Cox" } ], "doi": "10.1145/3126908.3126939", "doiUrl": "https://doi.org/10.1145/3126908.3126939", "entities": [ "BLAS", "Convolution", "Experiment", "LAPACK", "Library (computing)", "Maxwell (microarchitecture)", "Predictive modelling", "Self-tuning" ], "id": "7c60a110d0a07d00027180f2fa6bf7ffa16acd00", "inCitations": [], "journalName": "", "journalPages": "43:1-43:12", "journalVolume": "", "outCitations": [ "4184aa51b3ccd19eeddf16e0b92f1efae814c48e", "0c76a904b28c775eb5f33cd982f0bfeddab353e3", "1b7fcedcbbc7d8e64a38fcf1d6bc8f67afb356a0", "8d2fb424ccd5ae011dd444ac4fa8282bad9e76ab", "15570f630b16137005315de9530055f471a979fe", "5672ce28f2927b81b01303e4926643c55a4c8133", "6472cab2678c39e2273673968c6d7d3cfe2a62c9", "12f1a2a510a4e86ecd75c8081a78620c71822f99", "b81fb53cc1dff847804279275ce1e3238ffe8766", "f8e9b050c93af6dea582563f61b6460b590bc3af", "346ca00b2b2825e639d01ca218a53e4304c17bc0", "f6f2216c4172748e8ca7c423d447e5804174e1df", "6ab10dc02fdd0e8808981e95969017b3d12db6ab", "c658634494efda05dfac80a6fc2e6770f4d46bd1" ], "paperAbstract": "Efficient implementations of HPC applications for parallel architectures generally rely on external software packages (e.g., BLAS, LAPACK, CUDNN). While these libraries provide highly optimized routines for certain characteristics of inputs (e.g., square matrices), they generally do not retain optimal performance across the wide range of problems encountered in practice. In this paper, we present an input-aware auto-tuning framework for matrix multiplications and convolutions, ISAAC, which uses predictive modeling techniques to drive highly parameterized PTX code templates towards not only hardware-, but also application-specific kernels. Numerical experiments on the NVIDIA Maxwell and Pascal architectures show up to 3x performance gains over both cuBLAS and cuDNN after only a few hours of auto-tuning.", "pdfUrls": [ "https://arxiv.org/pdf/1802.05371v1.pdf", "http://arxiv.org/abs/1802.05371", "http://doi.acm.org/10.1145/3126908.3126939" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c60a110d0a07d00027180f2fa6bf7ffa16acd00", "sources": [ "DBLP" ], "title": "Input-Aware Auto-Tuning of Compute-Bound HPC Kernels", "venue": "SC", "year": 2017 }, "7c7fbfe80184e1f0572427657d3ca8464a8d50cf": { "authors": [ { "ids": [ "39287913" ], "name": "Juan Salamanca" }, { "ids": [ "1694548" ], "name": "Jos\u00e9 Nelson Amaral" }, { "ids": [ "2006159" ], "name": "Guido Araujo" } ], "doi": "10.1007/978-3-319-64203-1_44", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_44", "entities": [ "Performance Evaluation", "Speculative multithreading" ], "id": "7c7fbfe80184e1f0572427657d3ca8464a8d50cf", "inCitations": [], "journalName": "", "journalPages": "607-621", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_44" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c7fbfe80184e1f0572427657d3ca8464a8d50cf", "sources": [ "DBLP" ], "title": "Performance Evaluation of Thread-Level Speculation in Off-the-Shelf Hardware Transactional Memories", "venue": "Euro-Par", "year": 2017 }, "7c8822e4bff8eaa454a7a89bf13c443a16a85dd6": { "authors": [ { "ids": [ "2692077" ], "name": "Shubhra Kanti Karmaker Santu" }, { "ids": [ "3267191" ], "name": "Parikshit Sondhi" }, { "ids": [ "1736467" ], "name": "ChengXiang Zhai" } ], "doi": "10.1145/3077136.3080838", "doiUrl": "https://doi.org/10.1145/3077136.3080838", "entities": [ "Crowdsourcing", "E-commerce", "Experiment", "Information retrieval", "Learning to rank", "Mathematical optimization", "Relevance", "Sparse matrix", "Web search engine" ], "id": "7c8822e4bff8eaa454a7a89bf13c443a16a85dd6", "inCitations": [], "journalName": "", "journalPages": "475-484", "journalVolume": "", "outCitations": [ "350f92b121d0086682e6ffc6d0742d6f88038ea3", "1f6d9b75e40305f1c46c62ea0283d241294d9fc7", "84e62d38d2efc55e69b9094a17064d60824f64ee", "3d7efdb458ed4ceac72afad879960fcc853ff6ba", "8490234d79b47e459824dcf87c1e288211a3c964", "ec6012530267dc9d3e59e4bc87d5a65def3e1873", "0dc505b3d140d9349a2f4dd87cebfa97687f0c08", "40707bd624bc789d26f13734de41fa41c866a332", "ed6c4254cbb518866b3bc3950db8f1b7568309ca", "cb9cce27523fe7450dfc8a2f46eef558c471a2cb", "6ca125d2d81d9ba95928986ec2693e5b6cad2ef1", "de56d11c568cde9fc45b85f1c58515696d22f6b1", "684be9e9bd41d148158c64ba811c08f66b58092a", "3bd42cfb7e633320bbeec7f6d361e92abec60b07", "c6fad6b317527162aac8703bc8cb405d5661d806", "a9b7b1b218ea1ff2ba39a618e6dc5b481e8e1129", "14b4f0e8e2a12eb10cfa5e6aed7cff8df8637757", "28dbdf5c9ccd4a784c3bef0672837b29c8892b81", "23606ec8a507e74f5a8acadb66f253f1d5047718", "a489d95fb930401c1f4b7d92bb139d271d49abbf", "8c300bf6090c427631d772b875fdabf57af4257a", "f189f55077d0fe9e8d0b9586ffb3b6f33682b844", "3282994d8829a983c94c0dc04f1fb1d931fb0f19", "760b8efc52271fd453f92132de847e9bebd81636", "c66b6aa41812e00facea7b5de249b9670c602fd3", "48886ea4ee14f0151f186207e1b9ad1d947e83ef", "0df9c70875783a73ce1e933079f328e8cf5e9ea2", "17ce7734904a6162caa24e13a9454c1239924744", "0b12d60a76c8c9816ae77ee5c7c6cbdaa337ae05", "1d81f37488a6c492d5dd2733d485c9b014e9eeb4", "628d0fbc738d4707961426b9794f63966ddb71e0", "24c9b0b05c5e957e255b854f947472f9181772a4" ], "paperAbstract": "E-Commerce (E-Com) search is an emerging important new application of information retrieval. Learning to Rank (LETOR) is a general effective strategy for optimizing search engines, and is thus also a key technology for E-Com search. While the use of LETOR for web search has been well studied, its use for E-Com search has not yet been well explored. In this paper, we discuss the practical challenges in applying learning to rank methods to E-Com search, including the challenges in feature representation, obtaining reliable relevance judgments, and optimally exploiting multiple user feedback signals such as click rates, add-to-cart ratios, order rates, and revenue. We study these new challenges using experiments on industry data sets and report several interesting findings that can provide guidance on how to optimally apply LETOR to E-Com search: First, popularity-based features defined solely on product items are very useful and LETOR methods were able to effectively optimize their combination with relevance-based features. Second, query attribute sparsity raises challenges for LETOR, and selecting features to reduce/avoid sparsity is beneficial. Third, while crowdsourcing is often useful for obtaining relevance judgments for Web search, it does not work as well for E-Com search due to difficulty in eliciting sufficiently fine grained relevance judgments. Finally, among the multiple feedback signals, the order rate is found to be the most robust training objective, followed by click rate, while add-to-cart ratio seems least robust, suggesting that an effective practical strategy may be to initially use click rates for training and gradually shift to using order rates as they become available.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080838" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7c8822e4bff8eaa454a7a89bf13c443a16a85dd6", "sources": [ "DBLP" ], "title": "On Application of Learning to Rank for E-Commerce Search", "venue": "SIGIR", "year": 2017 }, "7cb0713f62f23f9ef49bd681da9ac7870d059875": { "authors": [ { "ids": [ "1983423" ], "name": "Hamid Tabani" }, { "ids": [ "3084705" ], "name": "Jose-Maria Arnau" }, { "ids": [ "1737344" ], "name": "Jordi Tubella" }, { "ids": [ "1747103" ], "name": "Antonio Gonz\u00e1lez" } ], "doi": "10.1109/PACT.2017.11", "doiUrl": "https://doi.org/10.1109/PACT.2017.11", "entities": [ "Acoustic cryptanalysis", "Acoustic model", "CUDA", "Central processing unit", "Cluster analysis", "Computation", "Google Map Maker", "Graphics processing unit", "Hardware acceleration", "Lazy evaluation", "Memoization", "Memory bandwidth", "Mixture model", "Mobile processor", "Run time (program lifecycle phase)", "Speech recognition", "Speedup", "User (computing)" ], "id": "7cb0713f62f23f9ef49bd681da9ac7870d059875", "inCitations": [ "e3fc67dfcf8e194f452fd734e4dfd99a53f2afeb", "932eea3ef02f39c591f747eca8b92db56d59834b", "2017ff3ee3a45a954ff58c7c6ed2b41a26c7ded7" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "41-52", "journalVolume": "", "outCitations": [ "96072f148ac85c7c47773fc89ced419954d2baa8", "243711ef080c4c5086694716f4557e81a0fc23df", "47dd53e91cf252d1047a03d59b223b5c88996c4d", "0c7d7b4c546e38a4097a97bf1d16a60012916758", "433a1c7a7cb260d04b513f63a8e0f81908da6707", "b158d06d2ace84222899fa504c4eb4395fe31f0c", "3364bc50921a9566d61ef8cb73baa82341725e4b", "1851d801a9aeab61b487a71687bcaff3a45318ea", "058bb1ed9d3a72fec581f17790e77fb8ef880939", "0f415803e4b94cfbefa0bf118b04e9f7f79dcf36", "d33cffa5fcf0f84de8495f25bdcc9f7c49fc336f", "6b9f6d644abc8143edab837e7490480ac2b02c17", "1dec63e2a929bb3be57906bfef94f38e969cfbd9", "330e5970507a1a62047ed737abd24e88207724d4", "2ad29134da93304e72dd047ca99ec6cfef2b4990", "7aaf60bea9efc340c57a7dae667abd138270cff3", "87980d8fac11638dabfae23e2b2e54afef5d58da", "1638c010f5e95918df21ae1c9096da9f09dff060", "19b00c51e7fe255a5e7c2361900ec4dfac14047d", "10d3a56e1f858ebf25f5fb931f2ec77698cef154", "6873a4db9703c9bf38ddabf9abed17ac5b673b59", "c00930140f49b543ae99bbdfa2bc977e60e91cdd", "d7cf0d7208a465ed04ac44e8ce210cfe91b154b2" ], "paperAbstract": "Accurate, real-time Automatic Speech Recognition (ASR) comes at a high energy cost, so accuracy has often to be sacrificed in order to fit the strict power constraints of mobile systems. However, accuracy is extremely important for the end-user, and today's systems are still unsatisfactory for many applications. The most critical component of an ASR system is the acoustic scoring, as it has a large impact on the accuracy of the system and takes up the bulk of execution time. The vast majority of ASR systems implement the acoustic scoring by means of Gaussian Mixture Models (GMMs), where the acoustic scores are obtained by evaluating multidimensional Gaussian distributions.In this paper, we propose a hardware accelerator for GMM evaluation that reduces the energy required for acoustic scoring by three orders of magnitude compared to solutions based on CPUs and GPUs. Our accelerator implements a lazy evaluation scheme where Gaussians are computed on demand, avoiding 50% of the computations. Furthermore, it employs a novel clustering scheme to reduce the size of the acoustic model, which results in 8x memory bandwidth savings with a negligible impact on accuracy. Finally, it includes a novel memoization scheme that avoids 74.88% of floating-point operations. The end design provides a 164x speedup and 3532x energy reduction when compared with a highly-tuned implementation running on a modern mobile CPU. Compared to a state-of-the-art mobile GPU, the GMM accelerator achieves 5.89x speedup over a highly optimized CUDA implementation, while reducing energy by 241x.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7cb0713f62f23f9ef49bd681da9ac7870d059875", "sources": [ "DBLP" ], "title": "An Ultra Low-Power Hardware Accelerator for Acoustic Scoring in Speech Recognition", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "7cb20916c6f3cc7c552f2e4cf15801b9a60f4dab": { "authors": [ { "ids": [ "3320568" ], "name": "Arash Molavi Kakhki" }, { "ids": [ "2360210" ], "name": "Samuel Jero" }, { "ids": [ "2450059" ], "name": "David R. Choffnes" }, { "ids": [ "3271869" ], "name": "Cristina Nita-Rotaru" }, { "ids": [ "1729928" ], "name": "Alan Mislove" } ], "doi": "10.1145/3131365.3131368", "doiUrl": "https://doi.org/10.1145/3131365.3131368", "entities": [ "Finite-state machine", "Hoc (programming language)", "Mobile device", "Multiplexing", "Rewriting" ], "id": "7cb20916c6f3cc7c552f2e4cf15801b9a60f4dab", "inCitations": [ "5e0077867ea32cfadaa8885206635348cf2f9b64", "f829bb1adffb7d4eed8918a6cf420e8dd43c6a97", "715f202fc11670ea3063d0e55d61ca3777814cc2" ], "journalName": "", "journalPages": "290-303", "journalVolume": "", "outCitations": [ "3f200c41618d0c3d75c4cd287b4730aadcf596f7", "1c68a88cae1e486e14925637a3acb82164927ac5", "940989cf71be558d09c47aab670a0485c77cd216", "dda7b731f7a5aec049414371ca0d65518a74cb96", "0d998c8d61131a3854532b1168edce19c76ddf95", "122c4345cc86666a5221704c90fa74390f99654b", "58747ebab2e2231634b5a4707ce56c63b81a9019", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "546c0cfed69f188a0ca661c8db9b099f554a63d1", "077b23a64c80039a9d36da0cab766262edc89af2", "7f9ad9212ccb9ab9b5614bef93347a4b05266a77", "02adbca269b534eed78dfdb8e52b45b86894a406", "2d8a132fd622b6b8e46507911f7ab24cbd37e667", "26a2e427654d6f2f40a8f5d59773c70677662376", "9f95eb7ce7ce190c7c8e6fca26de1a283f7007b1", "1f55399204e4499942d5a78f2a1d95945d264314", "37dcb659ef4a441cec259eef98ce8fb17c53297b", "16d0a8ee484f4a34e1cdcda8a0c2453e2e962ada" ], "paperAbstract": "Google's QUIC protocol, which implements TCP-like properties at the application layer atop a UDP transport, is now used by the vast majority of Chrome clients accessing Google properties but has no formal state machine specification, limited analysis, and ad-hoc evaluations based on snapshots of the protocol implementation in a small number of environments. Further frustrating attempts to evaluate QUIC is the fact that the protocol is under rapid development, with extensive rewriting of the protocol occurring over the scale of months, making individual studies of the protocol obsolete before publication.\n Given this unique scenario, there is a need for alternative techniques for understanding and evaluating QUIC when compared with previous transport-layer protocols. First, we develop an approach that allows us to conduct analysis across multiple versions of QUIC to understand how code changes impact protocol effectiveness. Next, we instrument the source code to infer QUIC's state machine from execution traces. With this model, we run QUIC in a large number of environments that include desktop and mobile, wired and wireless environments and use the state machine to understand differences in transport- and application-layer performance across multiple versions of QUIC and in different environments. QUIC generally outperforms TCP, but we also identified performance issues related to window sizes, re-ordered packets, and multiplexing large number of small objects; further, we identify that QUIC's performance diminishes on mobile devices and over cellular networks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131368" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7cb20916c6f3cc7c552f2e4cf15801b9a60f4dab", "sources": [ "DBLP" ], "title": "Taking a long look at QUIC: an approach for rigorous evaluation of rapidly evolving transport protocols", "venue": "IMC", "year": 2017 }, "7cbbab21c6f6fb54a968005afa21468b825f1d1e": { "authors": [ { "ids": [ "3204078" ], "name": "Jaebaek Seo" }, { "ids": [ "2767582" ], "name": "Byoungyoung Lee" }, { "ids": [ "2105818" ], "name": "Seong Min Kim" }, { "ids": [ "34033392" ], "name": "Ming-Wei Shih" }, { "ids": [ "1694991" ], "name": "Insik Shin" }, { "ids": [ "1729324" ], "name": "Dongsu Han" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" } ], "doi": "", "doiUrl": "", "entities": [ "Address space", "Address space layout randomization", "Adversary (cryptography)", "Algorithm", "Compiler", "Computer data storage", "Data structure", "Executable space protection", "HTTPS", "Intel Developer Zone", "LLVM", "Memory corruption", "Overhead (computing)", "Protection mechanism", "Randomness", "Relocation (computing)", "Server (computing)", "Trusted Computing", "Trusted execution environment" ], "id": "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "inCitations": [ "d32d738f04d5c4ae4d12be1f6de88b132a7b12a7", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9", "7ab74b4e4c11626c2642fcb95342c9c318dbfdca", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "b897c4c09b480f9934d5e9e4cfa2d540aaed522f", "873d3d4efb797a4fc3ead8bfa7ab5fde906306aa", "0d45681a313e37dd9f716f50bfa4d178eb16c64a", "26edca5c337b6b6ec4416356f270c35dc074057d", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "068a58434b02a80eef0834b7c14911994f0fe382", "6cfe1e553cb48c7087bb61e80031c415978a4ede", "187e2d1c888c5c0529e5a50c8c90efe9889cbd69", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "5bad4ddf00f7a0fd082d335fff346f2ab11d021c", "287da0ab3c169c41433b0e5504161dfd1afbfa6c", "54be4148c4ebb985505664516ca0004718086c0a", "19aeb06b3ba5b454fc462254c178acdf233d955b", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "377712ef264d63c97b341fb782037d063018305e", "345533e1f72f3f9e215e1fc468a3131a90481414", "be16f521ebdbe1acea867c9ce8100692d9f56308", "0c0994b08b790dc467b892d538321f9dbd10a3c2", "408ba239cece0308dbd180d86ee217d3c8d0b855", "2a7056e53f29bc73471048a77b0c55ea4e92b897", "38a54f9bbbfc46599770a28999365144a273783f" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "116eaac2e498bc2c9bea10ea838309dcf143d764", "2d968ef0c5ad0cc6718e2f8b40ce7f4c323dbbdd", "1d08bb92568d98319634fe2409a9eab085d68b60", "635f3a25ca8626072d1eedc6aebddcb429de4b4e", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "30909df12b1b01760ae4c5406e15f302a6524446", "0957332f8beb1ec4071fcb6fc44cb0b5396463d5", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "71da01051534d46fb3becd0a7506b64db56efc7a", "07848afa4b5f2f09cd2fe28a6cdc25c642da61cd", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "188847872834a63fb435cf3a51eef72046464317", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "8a6d19bea6f04e2bf2277c7ccd61becdf2bb48e7", "387e571981a8ee2bd49b1f30563e3a3a215e3b65", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "74572d07252e2f0b60b16abb931c46e819e2b448", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "0ba384496a466b8f33f0adfdf4b4b2b106ec6c45", "a8ad2ce828a3af6f59c6c3dd7f5fb08dbf9d3e3b", "01fde8698110cf46ff48a17c65f2658dab4c323c", "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "85d555f7ce19740b4fc656ff797623c6e1513018", "4931ec923c348c2f0adf9e8cba7ad239075048bd", "acf32e644db8c3ac54834d294bba4cf46551480a", "08c3e50a2913da51ed3cdafdcfdfb488e8fa83c3", "53396c842bc8a94575470fab3acb4aef91c5073d", "0a289fd7b14345822b1acda6d82750b15d59663e", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "5b2092b54860f134f78b2ec884c910750def71e6", "7c8c9bdb30ae9b40365c355504bdb457a51e108c", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "05f70f429a7bf38efa9e457fd486cb862bd495be", "4d75cd2764c45baf46c72fddc5c676fdfce6f60e", "f0ac31c2248ef8eb597448395da6f79227ffe916", "1495c7daaba55dd2e68e026fc6c1848eee1ee710", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "6b6fae57882fd193461fca64654107068ce9fd9a" ], "paperAbstract": "Traditional execution environments deploy Address Space Layout Randomization (ASLR) to defend against memory corruption attacks. However, Intel Software Guard Extension (SGX), a new trusted execution environment designed to serve security-critical applications on the cloud, lacks such an effective, well-studied feature. In fact, we find that applying ASLR to SGX programs raises non-trivial issues beyond simple engineering for a number of reasons: 1) SGX is designed to defeat a stronger adversary than the traditional model, which requires the address space layout to be hidden from the kernel; 2) the limited memory uses in SGX programs present a new challenge in providing a sufficient degree of entropy; 3) remote attestation conflicts with the dynamic relocation required for ASLR; and 4) the SGX specification relies on known and fixed addresses for key data structures that cannot be randomized. This paper presents SGX-Shield, a new ASLR scheme designed for SGX environments. SGX-Shield is built on a secure in-enclave loader to secretly bootstrap the memory space layout with a finer-grained randomization. To be compatible with SGX hardware (e.g., remote attestation, fixed addresses), SGX-Shield is designed with a software-based data execution protection mechanism through an LLVM-based compiler. We implement SGX-Shield and thoroughly evaluate it on real SGX hardware. It shows a high degree of randomness in memory layouts and stops memory corruption attacks with a high probability. SGX-Shield shows 7.61% performance overhead in running common microbenchmarks and 2.25% overhead in running a more realistic workload of an HTTPS server.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/ndss17_sgx_sheild.pdf", "https://taesoo.gtisc.gatech.edu/pubs/2017/seo:sgx-shield.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/sgx-shield-enabling-address-space-layout-randomization-sgx-programs/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/7cbb/ab21c6f6fb54a968005afa21468b825f1d1e.pdf", "s2Url": "https://semanticscholar.org/paper/7cbbab21c6f6fb54a968005afa21468b825f1d1e", "sources": [ "DBLP" ], "title": "SGX-Shield: Enabling Address Space Layout Randomization for SGX Programs", "venue": "NDSS", "year": 2017 }, "7cd2fb8276d2df637185b17db9673297460415de": { "authors": [ { "ids": [ "1727558" ], "name": "Rachid Guerraoui" }, { "ids": [ "1723331" ], "name": "Anne-Marie Kermarrec" }, { "ids": [ "3035974" ], "name": "Mahsa Taziki" } ], "doi": "10.1145/3077136.3080783", "doiUrl": "https://doi.org/10.1145/3077136.3080783", "entities": [ "MovieLens", "Nash equilibrium", "Privacy", "Recommender system" ], "id": "7cd2fb8276d2df637185b17db9673297460415de", "inCitations": [], "journalName": "", "journalPages": "665-674", "journalVolume": "", "outCitations": [ "745449bd7ad5d32224685e9188591e53395f376e", "bac41b59697da3ca5c80ca08f2bbbc97a3576248", "9aa88a8a354f1d322e242376d27d0474e50252f8", "6b74ec27d76ae42c2faa9211e2640141595838b6", "d3f6eb8ba989df230a03a16f5540f9c2e7f3d41d", "94a62f470aeea69af436e2dd0b54cd50eaaa4b23", "a67429474820bd56c30bc36618ed43fba3908bbe", "71134014e08ccd9190f91b8213370e7f3b774d82", "0586ede7472076870ed65277253e6e19d61105e5", "0156091af83cd6ec1e981ff4607b3e0dfe48ee9e", "616420b5e1e999d84fe0fcc1992a36124fbca64d", "e071c8a5a9ea01896de8cea8230ee521559125c0", "c86c176a023905dfbcba683278e8356ccb189496", "385d67feedab147c6c2ac649aa960ad27b311edc", "7f5d603ecd6387500aa7e7781a6ae03b0cd214bf", "2824b6a3d0096b0b522f4b7a7659b5f792f93d8f", "d5fdc3c0b2049a025091179a73e0e4174105fcd4", "3edce4a9017aa823bad7c12716c67ef3248b1066", "52a7ef9ca296d5b8470e680511d8ea9e8fdd10ee", "3c80a9cc4f9a5f5ea0b458cde677ff8f7b28e797", "a346b26eaccae174498e8f7dd4c11db247d8a5cf", "2275762a28582716db92df6d525ed2481c7d7f14", "2ab47454f59d9d8e55d4d8a69530562a3690794a", "f9d6530181804d6c1539685395b3207bcb9fed24", "ea4bbc79d1d95b61b10d768531a083e26c2dab94", "077c648efef8a2bf8c0164f3d8141256090da41b", "10245ff960453cac74528c205129d3e26a84960c", "16c029e8f4bc3b662b0ad89d15dd57ff567f3726", "c47bbef14c5f055805f76a78c0f1c7d4ddd87cee", "ad0748dde92e52d5e6f7eb9df3975f976bac714e", "b5df5479d6e3f94941dbc88298a93e19fe1c1c0e", "6c007a258b8e35ee7408e9babd9d842cb576ecc2" ], "paperAbstract": "Recommenders are becoming one of the main ways to navigate the Internet. They recommend appropriate items to users based on their clicks, i.e., likes, ratings, purchases, etc. These clicks are key to providing relevant recommendations and, in this sense, have a significant utility. Since clicks reflect the preferences of users, they also raise privacy concerns. At first glance, there seems to be an inherent trade-off between the utility and privacy effects of a click. Nevertheless, a closer look reveals that the situation is more subtle: some clicks do improve utility without compromising privacy, whereas others decrease utility while hampering privacy.\n In this paper, for the first time, we propose a way to quantify the exact utility and privacy effects of each user click. More specically, we show how to compute the privacy effect (disclosure risk) of a click using an information-theoretic approach, as well as its utility, using a commonality-based approach. We determine precisely when utility and privacy are antagonist and when they are not. To illustrate our metrics, we apply them to recommendation traces from Movielens and Jester datasets. We show, for instance, that, considering the Movielens dataset, 5.94% of the clicks improve the recommender utility without loss of privacy, whereas 16.43% of the clicks induce a high privacy risk without any utility gain.\n An appealing application of our metrics is what we call a click-advisor, a visual user-aware clicking platform that helps users decide whether it is actually worth clicking on an item or not (after evaluating its potential utility and privacy effects using our techniques). Using a game-theoretic approach, we evaluate several user clicking strategies. We highlight in particular what we define as a smart strategy, leading to a Nash equilibrium, where every user reaches the maximum possible privacy while preserving the average overall recommender utility for all users (with respect to the case where user clicks are based solely on their genuine preferences, i.e., without consulting the click-advisor).", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080783" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7cd2fb8276d2df637185b17db9673297460415de", "sources": [ "DBLP" ], "title": "The Utility and Privacy Effects of a Click", "venue": "SIGIR", "year": 2017 }, "7cfd14cec177b2ea04ff2855b94009cf739d42b0": { "authors": [ { "ids": [ "8102722" ], "name": "Tae Joon Jun" }, { "ids": [ "10426008" ], "name": "Myong Hwan Yoo" }, { "ids": [ "38004716" ], "name": "Daeyoung Kim" }, { "ids": [ "2015360" ], "name": "Kyu Tae Cho" }, { "ids": [ "6300489" ], "name": "Seung Young Lee" }, { "ids": [ "8095175" ], "name": "Kyuoke Yeun" } ], "doi": "10.1145/3030207.3044531", "doiUrl": "https://doi.org/10.1145/3030207.3044531", "entities": [ "Application programming interface", "Data Distribution Service", "Desktop virtualization", "Graphics processing unit", "InfiniBand", "Interlock (engineering)", "Mission critical", "Operating system", "Performance Evaluation", "Quad data rate", "Radar", "Scalability", "Virtual machine", "X86 virtualization" ], "id": "7cfd14cec177b2ea04ff2855b94009cf739d42b0", "inCitations": [ "b33b795533155637ceaa2c89da8bd20794a34d51" ], "journalName": "", "journalPages": "223-232", "journalVolume": "", "outCitations": [ "e19edcf8bcdab98e75fd4db6470aaa798ac841e2", "04704080ae469d24797ee6369f2e2a72ffcca828", "4bf0de201b5fc9121bbc5c9378fecc016d127ab7", "00156e79606084497789662dfaf59c3b54a10722", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "0532ccc88f4f96168b2ca18730718635a89093de", "784e63d7b367c31f6c88010798b223a914fbff2f", "0be302437cec82b9200d61d13d3125e62a8ef499", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "8785083d32191fc633f1b30904cc52dda76ad4a1", "cd05da83e028cf3b1ad9ddbe39d78343b2deaf19", "9bc397cfd1e150149a1de08133a9036613a8be64", "51cf2e326bdfceb1fb77163db5a2433776c0c20f", "2bbbfcc7484e14cb80910e8f582d1b6a34b2d7f9", "d65f3570a709a2bb6cfbc540d19da029a63e2661", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "45472bef11491245ad51dde6963e3cc40c5f3b79" ], "paperAbstract": "Tactical Operations Center (TOC) system in military field is an advanced computer system composed of multiple servers and desktops to interlock internal/external weapon systems processing mission-critical applications in combat situation. However, the current TOC system has several limitations such as difficulty of integrating tactical weapon systems including missile launch system and radar system into the single TOC system due to the heterogeneity of HW and SW between systems, and an inefficient computing resource management for the weapon systems.\n In this paper, we proposed a novel HPC supported mission-critical Cloud architecture as TOC for Surface-to-Air-Missile (SAM) system with OpenStack Cloud OS, Data Distribution Service (DDS), and GPU virtualization techniques. With this approach, our system provides elastic resource management over the weapon systems with virtual machines, integration of heterogeneous systems with different kinds of guest OS, real-time, reliable, and high-speed communication between the virtual machines and virtualized GPU resource over the virtual machines. Evaluation of our TOC system includes DDS performance measurement over 10Gbps Ethernet and QDR InfiniBand networks on the virtualized environment with OpenStack Cloud OS, and GPU virtualization performance evaluation with two different methods, PCI pass-through and remote-API. With the evaluation results, we conclude that our system provides reasonable performance in the combat situation compared to the previous TOC system while additionally supports scalable and elastic use of computing resource through the virtual machines.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3044531" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7cfd14cec177b2ea04ff2855b94009cf739d42b0", "sources": [ "DBLP" ], "title": "HPC Supported Mission-Critical Cloud Architecture", "venue": "ICPE", "year": 2017 }, "7cfebf75c82fdf08d21ea29751a39e6d2291b2ca": { "authors": [ { "ids": [ "1725917" ], "name": "Shen Liu" }, { "ids": [ "1719733" ], "name": "Gang Tan" }, { "ids": [ "1699210" ], "name": "Trent Jaeger" } ], "doi": "10.1145/3133956.3134066", "doiUrl": "https://doi.org/10.1145/3133956.3134066", "entities": [ "Aliasing", "C++", "Data dependency", "Dependence analysis", "Executable", "Marshalling (computer science)", "Pointer (computer programming)", "Pointer analysis", "Program Dependence Graph", "StumbleUpon", "Unmarshalling" ], "id": "7cfebf75c82fdf08d21ea29751a39e6d2291b2ca", "inCitations": [], "journalName": "", "journalPages": "2359-2371", "journalVolume": "", "outCitations": [ "274537bd5a77326d44bae3f99da8908a7f57c3f3", "0560fc4924bbbe7e920122dc25c1ecfc3e59e374", "0af2e5c772b26963e3a756225a9fab5654f640f9", "be7536d9baaef7ccdbff845f8e98c136b4c80bb3", "125268a25397dd17fb3c7dbd4018114a972e4acb", "0719b9670c8580db76547497df39caabdc20fc32", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "2cc08b9f07a889a7c035df438ca99d0ad8c97aa5", "14bf0f3d15a053fed6aae540d22d8096c643b308", "23400d3b5efed8ab1015d9aed496dabad707c991", "29f56a7f34879033bc6ecf52e03099fb55277e0d", "0a90f6db154f6a9f3565ee596d23da3196361454", "529d3d3fb82afe905c410e8a7b3fc9d09ca623c5", "28aa980f5f91a0d9731facb2781693992e2d7bc5", "0482608bdcc55c42b49f9a8e01585b4482f29fb3", "4ced5380095c3f659a15e0f5b16061713f630c2e", "3cae67dde8b20aa58ebd12def02c7fa8ad844de4", "822c270ac7682f5bd9b3c361e46cf8d28a4d2283", "3df4834a33d06f300eaa357974be1f5780cee27c", "d937dd67265c2ac0ba5ffa8022323d37c2344188", "41babfa6bc6e757433d9497644de553eab33ef95", "61e80faa61e60c487b07b38f09e0210fe508f32f", "565919855788bfcc7fbaad3006fe0f42c735b333", "a23094f0ec3ea80481cbeb9484829a7fa8173d6c", "65192f3d0ffb066a4c47a09fc11fdfad47dd192e", "3201bf85bee9995aafa569c47669db463551e6cc", "c7ba68dbff13e84cbaee3b0d57e597ba2eb60c5f", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "046a9e129fba46d78301ead661949f5290c79989", "2cef6527284b58e5820748e72191cc4b9d6e1112" ], "paperAbstract": "Partitioning a security-sensitive application into least-privileged components and putting each into a separate protection domain have long been a goal of security practitioners and researchers. However, a stumbling block to automatically partitioning C/C++ applications is the presence of pointers in these applications. Pointers make calculating data dependence, a key step in program partitioning, difficult and hard to scale; furthermore, C/C++ pointers do not carry bounds information, making it impossible to automatically marshall and unmarshall pointer data when they are sent across the boundary of partitions. In this paper, we propose a set of techniques for supporting general pointers in automatic program partitioning. Our system, called PtrSplit, constructs a Program Dependence Graph (PDG) for tracking data and control dependencies in the input program and employs a parameter-tree approach for representing data of pointer types; this approach is modular and avoids global pointer analysis. Furthermore, it performs selective pointer bounds tracking to enable automatic marshalling/unmarshalling of pointer data, even when there is circularity and arbitrary aliasing. As a result, PtrSplit can automatically generate executable partitions for C applications that contain arbitrary pointers.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134066", "http://www.cse.psu.edu/~gxt29/paper/ptrsplit.pdf", "https://acmccs.github.io/papers/p2359-liuAemb.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7cfebf75c82fdf08d21ea29751a39e6d2291b2ca", "sources": [ "DBLP" ], "title": "PtrSplit: Supporting General Pointers in Automatic Program Partitioning", "venue": "CCS", "year": 2017 }, "7d312139c903396efeb7dd38c6ad9f0e6ff04366": { "authors": [ { "ids": [ "2708722" ], "name": "Siva Kumar Sastry Hari" }, { "ids": [ "40489216" ], "name": "Timothy Tsai" }, { "ids": [ "2032635" ], "name": "Mark Stephenson" }, { "ids": [ "1715863" ], "name": "Stephen W. Keckler" }, { "ids": [ "1775477" ], "name": "Joel S. Emer" } ], "doi": "10.1109/ISPASS.2017.7975296", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975296", "entities": [ "Assembly language", "Embedded system", "Experiment", "Fault injection", "Graphics processing unit", "High- and low-level", "Pervasive informatics", "Processor register", "Scalability", "Soft error", "Supercomputer" ], "id": "7d312139c903396efeb7dd38c6ad9f0e6ff04366", "inCitations": [ "3b8c500ca7fe4e2f8c8cc02bea053b269eccf442", "26fef351a3a671a64b32b3c673a332e912cfed24", "d72db57aeadc0ffcb3225d1711dbf9160fd92969", "06e4905bef810f8a2f089974c5291fb7dd84be46" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "249-258", "journalVolume": "", "outCitations": [ "87a013dff0f9ab089cbfdaf350b94722cb240688", "0bae66c26c7af51b78f8110d44f45325275049d8", "0a43cabe7ca27b66cb902f16150bec9fb7e023b1", "7f6c49645686f4814c01aca621341a0b244898b6", "17c5972f45561c705bce6d8e0174d94cc8ad4adf", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72", "88e3c9faad4294d56fc20b768ceb6a25debf4ae6", "b6c223f20a192c57fd0c78c4f2f0bb125b09eea3", "0f3b99e14ad40fbe21ef25438718cb8988ff1b0e", "8ced61f98ffd5c8ccc4a7083c3c43d21d4067db9", "5e3fb6a4514550dbdb1bfeb4e5705e4a7ffcc84f", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "e38b5a044a68bc1e9326acf6e5ae4db1ac2eaaaa", "b3aa0590b54b4f23723a7986f94806bea77a2392", "1d55f921999b3fcc55e73d8b73f633156b11937c", "26a635be4fc593b5fb6ec6bf11b03634e803f311", "738a102562a662031039df7723da16d25627f2e2", "b916d07d2ed850c118a6f4a8903df34b4b9cb0a5", "04ec5964a08a2ad62a30fea1fb9eff1e484a4524", "28540222f0ed31ae930dc329e29eb17d280663f2", "3b3d0b71196ce5dae90146473721d5863524a1c1" ], "paperAbstract": "As GPUs become more pervasive in both scalable high-performance computing systems and safety-critical embedded systems, evaluating and analyzing their resilience to soft errors caused by high-energy particle strikes will grow increasingly important. GPU designers must develop tools and techniques to understand the effect of these soft errors on applications. This paper presents an error injection-based methodology and tool called SASSIFI to study the soft error resilience of massively parallel applications running on state-of-the-art NVIDIA GPUs. Our approach uses a low-level assembly-language instrumentation tool called SASSI to profile and inject errors. SASSI provides efficiency by allowing instrumentation code to execute entirely on the GPU and provides the ability to inject into different architecture-visible state. For example, SASSIFI can inject errors in general-purpose registers, GPU memory, condition code registers, and predicate registers. SASSIFI can also inject errors into addresses and register indices. In this paper, we describe the SASSIFI tool, its capabilities, and present experiments to illustrate some of the analyses SASSIFI can be used to perform.", "pdfUrls": [ "http://www.cs.utexas.edu/~skeckler/pubs/ISCA_2017_SASSIFI.pdf", "https://doi.org/10.1109/ISPASS.2017.7975296" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7d312139c903396efeb7dd38c6ad9f0e6ff04366", "sources": [ "DBLP" ], "title": "SASSIFI: An architecture-level fault injection tool for GPU application resilience evaluation", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "7d358d23c0412eda1a87bd1f604bb467b4deb8f7": { "authors": [ { "ids": [ "2635901" ], "name": "Pigi Kouki" }, { "ids": [ "2634786" ], "name": "Jay Pujara" }, { "ids": [ "11403782" ], "name": "Christopher Marcum" }, { "ids": [ "2744403" ], "name": "Laura M. Koehly" }, { "ids": [ "1746034" ], "name": "Lise Getoor" } ], "doi": "10.1109/ICDM.2017.32", "doiUrl": "https://doi.org/10.1109/ICDM.2017.32", "entities": [ "Collective intelligence", "Experiment", "Family tree", "Probabilistic soft logic", "Scalability" ], "id": "7d358d23c0412eda1a87bd1f604bb467b4deb8f7", "inCitations": [ "7bf53f276b19c330b678fde84795ea6cb0252677", "7e59154a0446de99a18400a43e65c0905f248cd4" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "227-236", "journalVolume": "", "outCitations": [ "8dd0b4e5fcbab552f0ef1460b2c97cec926cdb58", "2d28eb6a7b105b1a7bbf353f2bd4b6f1f70e5c01", "346df16c47d619147d5a64788da826717e79ecbe", "c090682e5b64d126d924dd4a8781ba60bda56972", "ee37ed59f89dba6dc7c6912f3cd3b86a4ae6e0c9", "36315e5abd752d7419647409c3b7f2199b48fafd", "aa0848ac8c287afcbfe0e1de7b5d2b163a13c16c", "0e544a06caeacb0b4160e85a4cc1ac5d606c5152", "eee6aa28d2b2fbe728c773da69d392a324810efe", "d6bb3fba168a9883300c2c1469d5deaa7cff9666", "2dec6e69f42b22d03208ce01afc103a7d702f276", "3bbfc62fc13ca27c6e58e42167a6aef593a1365e", "0a7412b9b00e61886b21bbdb8eb521f23e215e54", "0f16f6f478b5c788dce466eb50e36c612273c36e", "1b9fb7623ee4a717664bc145c7e03722b9a63e24", "8bc23235070ce181d34002e2a44e4b233beaa732", "05393361e6d9e56ee7dbabb1e5ef6c1c212fc34d", "4f819589fd2931333326ad7deec58f628f7d2644", "35970fb36604a7aee36a691446e34f9734e27a25", "1c555cc696520469ca6b14351952b194ada0bb28", "142857bb3fb5139eb9b49ff168b9d8cd4ae0db8e", "3e3bcc0d1a23f02e76b5ec09042552248e50fdf6", "4b38f5105dbc756ffdbb14519286aa23e5889c1b" ], "paperAbstract": "Entity resolution in settings with rich relational structure often introduces complex dependencies between co-references. Exploiting these dependencies is challenging - it requires seamlessly combining statistical, relational, and logical dependencies. One task of particular interest is entity resolution in familial networks. In this setting, multiple partial representations of a family tree are provided, from the perspective of different family members, and the challenge is to reconstruct a family tree from these multiple, noisy, partial views. This reconstruction is crucial for applications such as understanding genetic inheritance, tracking disease contagion, and performing census surveys. Here, we design a model that incorporates statistical signals, such as name similarity, relational information, such as sibling overlap, and logical constraints, such as transitivity and bijective matching, in a collective model. We show how to integrate these features using probabilistic soft logic, a scalable probabilistic programming framework. In experiments on real-world data, our model significantly outperforms state-of-the-art classifiers that use relational features but are incapable of collective reasoning.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.32", "https://linqspub.soe.ucsc.edu/basilic/web/Publications/2017/kouki:icdm17/kouki-icdm17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7d358d23c0412eda1a87bd1f604bb467b4deb8f7", "sources": [ "DBLP" ], "title": "Collective Entity Resolution in Familial Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "7d44fbcded948afdbc9c264272225c1a5628a98f": { "authors": [ { "ids": [ "2494078" ], "name": "Andreas Sembrant" }, { "ids": [ "3083590" ], "name": "Trevor E. Carlson" }, { "ids": [ "1759749" ], "name": "Erik Hagersten" }, { "ids": [ "1780873" ], "name": "David Black-Schaffer" } ], "doi": "10.1109/IISWC.2017.8167756", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167756", "entities": [], "id": "7d44fbcded948afdbc9c264272225c1a5628a98f", "inCitations": [], "journalName": "", "journalPages": "54-65", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167756" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7d44fbcded948afdbc9c264272225c1a5628a98f", "sources": [ "DBLP" ], "title": "A graphics tracing framework for exploring CPU+GPU memory systems", "venue": "IISWC", "year": 2017 }, "7d6160f01a4fec2cfc55f56fa1fff108a8f116ac": { "authors": [ { "ids": [ "40371232" ], "name": "Bo Li" }, { "ids": [ "32757734" ], "name": "Edgar A. Le\u00f3n" }, { "ids": [ "1717511" ], "name": "Kirk W. Cameron" } ], "doi": "10.1145/3078597.3078601", "doiUrl": "https://doi.org/10.1145/3078597.3078601", "entities": [ "CAS latency", "COS", "Clock rate", "Computation", "Concurrency (computer science)", "Distributed computing", "Emergence", "Job control (Unix)", "Memory bandwidth", "Run time (program lifecycle phase)", "Scalability", "Time complexity", "X86" ], "id": "7d6160f01a4fec2cfc55f56fa1fff108a8f116ac", "inCitations": [], "journalName": "", "journalPages": "155-166", "journalVolume": "", "outCitations": [ "d6be948f6efd5960f6a65f3b56524011e2a411e7", "5eb1030c17ce1e2a5248033bcd0183da0c687b67", "c8bbe1b7a791a21ea6c7db852d240d457243b1ac", "31c299532c42106b71e909c2fc0fc7472c39ce90", "24a56ea63d3a6518e9fa2f9a9380bf4620760101", "244030cb8e73144251ef3701ac758168031d17f9", "086699da0528ed47463cea3108851bd3dc5ba715", "019c029c54edbc9253d5766fabdd1ea0623e8981", "77f826132cf09ac91ea9c859387a8d52221a019a", "1f5b507c038b09f017bffd51d4f4e4257bef6ef4", "e98f988ad47a2c304036c8dcfb1c56ec99b11f85", "ae0fe4626a829c43e33771677f88ed2fb36669b4", "3bda118f9e1382ff3ea5547d57e2396203c0186d", "48001896819760ce9e188ff33dbb4921e64332f4", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "6fd344c359faa7fcd15d1adf76da58ce328a4b45", "256774b46b3265ae950ea3717e5a2d0c51ab2b55", "e60d1171e74c87495f5c6cffede388af271f9584", "a320d0eae237b03a49386bddd65de07b23c0f89e", "2c9662101750dd471c49176bd8ccf01fd6cb4ffb", "5f43c772f4f06ee0d36a1b12d99f04db0d10b655", "69743194ca177ef816d31a99475c3ba3ff97808c", "7e757fff66a63b268da83ffccf464437492ac8b6", "5a04a10af20300d5c9400f15357b559fb184affa", "3f1e00eefae3bde81ec8faa4f5725abc125b10b0", "346ee93e610a95c60394900f857d398bc2ae74df", "06d18be06791722e5efb82093a982e6cd3298618", "1108af609469e420aeae551ba8a893c3200e07fa", "efa12d71ca43f924a6e8be04cd1df2c22355af86" ], "paperAbstract": "Highly-parallel, high-performance scientific applications must maximize performance inside of a power envelope while maintaining scalability. Emergent parallel and distributed systems offer a growing number of operating modes that provide unprecedented control of processor speed, memory latency, and memory bandwidth. Optimizing these systems for performance and power requires an understanding of the combined effects of these modes and thread concurrency on execution time. In this paper, we describe how an analytical performance model that separates pure computation time (C) and pure stall time (S) from computation-memory overlap time (O) can accurately capture these combined effects. We apply the COS model to predict the performance of thread and power mode combinations to within 7% and 17% for parallel applications (e.g. LULESH) on Intel x86 and IBM BG/Q architectures, respectively. The key insight of the COS model is that the combined effects of processor and memory throttling and concurrency on overlap trend differently than the combined effects on pure computation and pure stall time. The COS model is novel in that it enables independent approximation of overlap which leads to capabilities and accuracies that are as good or better than the best available approaches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078601" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7d6160f01a4fec2cfc55f56fa1fff108a8f116ac", "sources": [ "DBLP" ], "title": "COS: A Parallel Performance Model for Dynamic Variations in Processor Speed, Memory Speed, and Thread Concurrency", "venue": "HPDC", "year": 2017 }, "7d77afec74c6458344bec55d759c6f60f9b969a9": { "authors": [ { "ids": [ "2622807" ], "name": "Richard Cole" }, { "ids": [ "1678824" ], "name": "Vijaya Ramachandran" } ], "doi": "10.1145/3087556.3087572", "doiUrl": "https://doi.org/10.1145/3087556.3087572", "entities": [ "Algorithm", "CPU cache", "Cache (computing)", "Representational state transfer", "Scheduling (computing)", "Thread (computing)", "Work stealing" ], "id": "7d77afec74c6458344bec55d759c6f60f9b969a9", "inCitations": [], "journalName": "", "journalPages": "351-362", "journalVolume": "", "outCitations": [ "31181e73befea410e25de462eccd0e74ba8fea0b", "836d9fa4aa7753d5c15381b20cdb6c650c7b9d0e", "429326f33b2097b4ff729752bcd5ae5e40ddc881", "cd5446a98025167bf08e1aba281490bf2f0a3382", "467f4406b5c42921e56bbf92524d8ebc8f94c997", "e376db9da7b1b25b0b0db374a02a80542768a152", "fc55d75ca1b6b7fd9497eb62fdcaacaf52ab689a", "52f00ddc4c6537ca981947e7865cab4a4d4c43ec", "79818ab3f303ec9ba2838b63de1dbb48dc6924d2", "1417c585009393cc2b45a38939a5e818738c62ea", "58c14442dd22ad5151f1c416e8b75b608e9212e2", "202116387ccf060e90867468f18041561fbc7735" ], "paperAbstract": "We analyze the caching overhead incurred by a class of multithreaded algorithms when scheduled by an arbitrary scheduler. We obtain bounds that match or improve upon the well-known O(Q+S · (M/B)) caching cost for the randomized work stealing (RWS) scheduler, where S is the number of steals, Q is the sequential caching cost, and M and B are the cache size and block (or cache line) size respectively.", "pdfUrls": [ "https://arxiv.org/pdf/1705.08350v1.pdf", "https://arxiv.org/pdf/1705.08350v2.pdf", "http://doi.acm.org/10.1145/3087556.3087572", "http://arxiv.org/abs/1705.08350" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7d77afec74c6458344bec55d759c6f60f9b969a9", "sources": [ "DBLP" ], "title": "Bounding Cache Miss Costs of Multithreaded Computations Under General Schedulers: Extended Abstract", "venue": "SPAA", "year": 2017 }, "7da5e182742802b64e64858b1a03254ff127abc0": { "authors": [ { "ids": [ "2127809" ], "name": "Pierre Olivier" }, { "ids": [ "1786348" ], "name": "Sang-Hoon Kim" }, { "ids": [ "1729107" ], "name": "Binoy Ravindran" } ], "doi": "10.1145/3102980.3103009", "doiUrl": "https://doi.org/10.1145/3102980.3103009", "entities": [ "Central processing unit", "Computer cluster", "Data center", "Distributed shared memory", "Operating system", "Process migration", "Programmer", "Scheduling (computing)", "Shared memory", "Single system image", "System image" ], "id": "7da5e182742802b64e64858b1a03254ff127abc0", "inCitations": [], "journalName": "", "journalPages": "174-179", "journalVolume": "", "outCitations": [ "c251fc6c99d8b515f3f0844604a21af92cce647f", "2960c89331eb7afa86584792e2e11dbf6a125820", "26cd9c812c279347ae96db31cee1cbee0f646fa4", "2583d51a7aafc4e4e3c9bdcd1fa8a978f7d81bc5", "0081c1fcb079f87147a68565764b59923c918d9c", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "5b9e70db26c4981315ea83ecdaef24fffaebdee0", "beee9a013c359d9745c9de8ed7c1ee38d824882d", "263f588edb69272ccbf9f1b78a9625b914235f6d", "6330f075daf847554007b236b57293f8ccebca64", "5dc5b799d6d161d5c2805917d680d1eb7314fdf5", "57d05ed8b82a86dfa31ecdea1f581c7de2e49a5d", "7515af5d111ded65acd5a192bcffc64819a769ec", "573e9cb890d39c790b58bfa805526d40e8b472ec", "d91ea0b718321e2d30df9c73c88c5658c5a5b56d", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "42765ef8d032b9febab4bda43517b66cb8fba801", "b8b8d587cdc6bd98515fe760ae9b34da335d94de", "1cd81ebacba40806b9dce8eb1a89ad7009705575", "0270c2056eb50b5d4597afa722c50abf21e67a82", "691797cd3473df4470c653e643072a9063eb0504" ], "paperAbstract": "The datacenter is becoming fully heterogeneous, integrating multiple OS-capable CPUs of different Instruction Set Architectures in separate machines. These machines present diverse performance and power consumption profiles and we show that significant potential benefits for both metrics can be expected, should these machines be able to cooperate in the processing of datacenter, multi-programmed workloads. We advocate that this cooperation should be enabled at the level of the OS, relieving the programmer from any effort related to the heterogeneity of the managed machines. We propose a distributed OS architecture running on a fully heterogeneous computer cluster, enabling this cooperation through three main components: the abstraction of the entire cluster in a single system image, a distributed shared memory system, and a heterogeneous scheduler.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103009", "http://www.ssrg.ece.vt.edu/papers/hotOS_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7da5e182742802b64e64858b1a03254ff127abc0", "sources": [ "DBLP" ], "title": "OS Support for Thread Migration and Distribution in the Fully Heterogeneous Datacenter", "venue": "HotOS", "year": 2017 }, "7db7a5152c5dec4a5871ea64cf246c9076cb98a3": { "authors": [ { "ids": [ "1698602" ], "name": "Yu Chen" }, { "ids": [ "1683893" ], "name": "Ke Yi" } ], "doi": "10.1145/3035918.3035921", "doiUrl": "https://doi.org/10.1145/3035918.3035921", "entities": [ "Algorithm", "Foreign key", "Many-to-many", "Program optimization", "Query optimization", "Sampling (signal processing)", "Selection bias", "Unique key" ], "id": "7db7a5152c5dec4a5871ea64cf246c9076cb98a3", "inCitations": [ "5911afa17128e62c1acd352cd7e943566601755a", "6072cf5f9e7f1b4ed590717cee1d62d437e2e5f4", "2b6a2ec50b841f435a89b1711001ee8bf776a760", "20a60403555b21eadc38bfe95ad1f0baf13ac13b" ], "journalName": "", "journalPages": "759-774", "journalVolume": "", "outCitations": [ "06e49a2331745a33eca328b6883fed63ae2ac5b3", "11eb8c2027e0422f3ef8de7720525bbe4897ff36", "280b697a7acd66f2835ef4220984fafd79b8e96e", "ca0b88a7f00fb55448436abe01a725df7517f060", "4b163245cdc7a1d80ded5e26424fb382910965b9", "7a278ee0578f194700cadc3811cdda4ec751f88a", "77ec1a0ec386e019022933e8a19da59715f39e29", "6922e7fce434b841c1666fc6f7f5fc029269f7ad", "dbe3720c170f39241ccf07a8f64dd0a176c7ec83", "27341f28a56ec5d37ac81835d9ab2a8bb8864be3", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "2a365b450f8610a25d32b966612dcf0359b39a72", "076354ae1ff33fc9efbe916d3ca5b6463d1533f0", "23c5939aaf3828f5201120543dc0da4227b5a77d", "043cf6a9e18cb499da540ffd58b37086158ffd68", "0026eee31421ce11c665c8a5de319f1f492f4060", "c8bd4caf0fc9c8a4fbffc7e05416901d4fd7a41b", "37e0d25940bd49022c41e63909532acd88eb16b9", "da01f7fcc5c7eeba75bc09a41fdd946e65210090" ], "paperAbstract": "Join size estimation is a critical step in query optimization, and has been extensively studied in the literature. Among the many techniques, sampling based approaches are particularly appealing, due to their ability to handle arbitrary selection predicates. In this paper, we propose a new sampling algorithm for join size estimation, called two-level sampling, which combines the advantages of three previous sampling methods while making further improvements. Both analytical and empirical comparisons show that the new algorithm outperforms all the previous algorithms on a variety of joins, including primary key-foreign key joins, many-to-many joins, and multi-table joins. The new sampling algorithm is also very easy to implement, requiring just one pass over the data. It only relies on some basic statistical information about the data, such as the ℓk-norms and the heavy hitters.", "pdfUrls": [ "http://home.cse.ust.hk/~yike/sigmod17.pdf", "http://www.cse.ust.hk/~yike/sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3035921" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7db7a5152c5dec4a5871ea64cf246c9076cb98a3", "sources": [ "DBLP" ], "title": "Two-Level Sampling for Join Size Estimation", "venue": "SIGMOD Conference", "year": 2017 }, "7dd462e1dcb0d348e6d35d2f76ab22a812b1da34": { "authors": [ { "ids": [ "2338627" ], "name": "Jiarui Fang" }, { "ids": [ "1711877" ], "name": "Haohuan Fu" }, { "ids": [ "2694567" ], "name": "Wenlai Zhao" }, { "ids": [ "6464022" ], "name": "Bingwei Chen" }, { "ids": [ "2793777" ], "name": "Weijie Zheng" }, { "ids": [ "1689072" ], "name": "Guangwen Yang" } ], "doi": "10.1109/IPDPS.2017.20", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.20", "entities": [ "Artificial neural network", "Convolution", "Convolutional neural network", "Deep learning", "Double-precision floating-point format", "Graphics processing unit", "Pipeline (computing)", "Program optimization", "SW26010", "Speedup", "Sunway", "Sunway TaihuLight", "Supercomputer" ], "id": "7dd462e1dcb0d348e6d35d2f76ab22a812b1da34", "inCitations": [ "2505deb6860ef0af17eab6f5f7d2161f0c9db2d0", "e45dea6588d1de0a23618e019031e67eedeeee26" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "615-624", "journalVolume": "", "outCitations": [ "39f63dbdce9207b87878290c0e3983e84cfcecd9", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "4fe1c707a48869cbbdf3eb0384e526d1d294f7e2", "c382406fd8db2744b2a609837395e5da05e1d2ed", "402da07a0ac4645e26370ff5ac8ab3540257a8ab", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "1740eb993cc8ca81f1e46ddaadce1f917e8000b5", "4788873f23fbfbca24744f0fa0d8e602c9403fba", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "c2c10045880d31dc011fb2ff2935f910f9fcd182", "14b5e8ba23860f440ea83ed4770e662b2a111119", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "68837728232463651283edbb7ef0c93b2f502b2b", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "b76269bf962989ce271bef7ea863ff4adf9c9de6", "b4eac8295c90dbfb7d8d22ba560e025621287c58", "092217c2267f6e0673590aa151d811e579ff7760", "2ffc74bec88d8762a613256589891ff323123e99", "1d696a1beb42515ab16f3a9f6f72584a41492a03", "31868290adf1c000c611dfc966b514d5a34e8d23", "326d65827307862ddc3d39b84ebc662e83ff95b3", "061356704ec86334dbbc073985375fe13cd39088" ], "paperAbstract": "To explore the potential of training complex deep neural networks (DNNs) on other commercial chips rather than GPUs, we report our work on swDNN, which is a highly-efficient library for accelerating deep learning applications on the newly announced world-leading supercomputer, Sunway TaihuLight. Targeting SW26010 processor, we derive a performance model that guides us in the process of identifying the most suitable approach for mapping the convolutional neural networks (CNNs) onto the 260 cores within the chip. By performing a systematic optimization that explores major factors, such as organization of convolution loops, blocking techniques, register data communication schemes, as well as reordering strategies for the two pipelines of instructions, we manage to achieve a double-precision performance over 1.6 Tflops for the convolution kernel, achieving 54% of the theoretical peak. Compared with Tesla K40m with cuDNNv5, swDNN results in 1.91-9.75x performance speedup in an evaluation with over 100 parameter configurations.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7dd462e1dcb0d348e6d35d2f76ab22a812b1da34", "sources": [ "DBLP" ], "title": "swDNN: A Library for Accelerating Deep Learning Applications on Sunway TaihuLight", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "7dd98badf2d3accb703f0358761f2d86e5d85194": { "authors": [ { "ids": [ "2416829" ], "name": "Yaohui Chen" }, { "ids": [ "2581467" ], "name": "Dongli Zhang" }, { "ids": [ "2770380" ], "name": "Ruowen Wang" }, { "ids": [ "1750724" ], "name": "Rui Qiao" }, { "ids": [ "32115586" ], "name": "Ahmed M. Azab" }, { "ids": [ "6952630" ], "name": "Long Lu" }, { "ids": [ "2765428" ], "name": "Hayawardh Vijayakumar" }, { "ids": [ "2464334" ], "name": "Wenbo Shen" } ], "doi": "10.1109/SP.2017.30", "doiUrl": "https://doi.org/10.1109/SP.2017.30", "entities": [ "ARM architecture", "Address space", "Address space layout randomization", "Binary file", "Binary hardening", "Code reuse", "Commercial software", "Compiler", "Debugging", "In-place algorithm", "InterPro", "Memory footprint", "Mobile device", "Software incompatibility", "XML Object Model" ], "id": "7dd98badf2d3accb703f0358761f2d86e5d85194", "inCitations": [ "565d52ee2df2e8bc9ae5e05b416c9aaa596cbac4", "723931de6d91a965bc2fa24ac649291c9f1a4639", "c06c2a045db65cb1e86e87f4b547a8dd2571bcdb" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "304-319", "journalVolume": "", "outCitations": [ "23e8236644775fd5d8ff5536ba06b960e19f904b", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "4acdc975e3507a6e26a47f3858a74ec0de75ae38", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "7b2cf50a197888a3eb273d0ef056e93c581aa272", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "3fa27974cade47e98993b98798f73594b902583b", "c06c2a045db65cb1e86e87f4b547a8dd2571bcdb", "2947959aa2cfc45719fac7a54812614d1fa8707f", "0988a425689f6f3700e797f4a2c18f73692573c3", "684be7334614fd40f1a00c29d9a45838b6534714", "0fc7f3a21359665c456853e3fe09c9a5c4a24f37", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "6a8f65381a627a2db6c756a7185d9106f0acefec", "116eaac2e498bc2c9bea10ea838309dcf143d764", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "4931ec923c348c2f0adf9e8cba7ad239075048bd", "1de15306de89ab834561ef1cd187ec607c2a1b9e", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "63eca2d9de958abab6a20f0696789ccb6f1b8aa1", "b58a85e46d365e47ce937ccc09d60fbcd0fc22d4", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "03f827395a17beb941241dbd72322705bdf79791", "53396c842bc8a94575470fab3acb4aef91c5073d", "1798b9bc347ca826724b6d80766200ebaad8dfb0", "67b086caacc543b7d30b2f006f77a315bc9572e0", "638297d9b5c8e0e83ca5acfbf1325196ea0bbb3c", "67ca2402dc696119d9bf5cd4c30d9d32becdd8d0", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "9b2585f7248c8b5a22e9c816506e01060213ca85", "387e571981a8ee2bd49b1f30563e3a3a215e3b65", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "0e039df712774fcea67f214d9b5780c1dc250747" ], "paperAbstract": "Code reuse attacks exploiting memory disclosure vulnerabilities can bypass all deployed mitigations. One promising defense against this class of attacks is to enable execute-only memory (XOM) protection on top of fine-grained address space layout randomization (ASLR). However, recent works implementing XOM, despite their efficacy, only protect programs that have been (re)built with new compiler support, leaving commercial-off-the-shelf (COTS) binaries and source-unavailable programs unprotected. We present the design and implementation of NORAX, a practical system that retrofits XOM into stripped COTS binaries on AArch64 platforms. Unlike previous techniques, NORAX requires neither source code nor debugging symbols. NORAX statically transforms existing binaries so that during runtime their code sections can be loaded into XOM memory pages with embedded data relocated and data references properly updated. NORAX allows transformed binaries to leverage the new hardware-based XOM support—a feature widely available on AArch64 platforms (e.g., recent mobile devices) yet virtually unused due to the incompatibility of existing binaries. Furthermore, NORAX is designed to co-exist with other COTS binary hardening techniques, such as in-place randomization (IPR). We apply NORAX to the commonly used Android system binaries running on SAMSUNG Galaxy S6 and LG Nexus 5X devices. The results show that NORAX on average slows down the execution of transformed binaries by 1.18% and increases their memory footprint by 2.21%, suggesting NORAX is practical for real-world adoption.", "pdfUrls": [ "http://finallyjustice.github.io/norax.pdf", "http://seclab.cs.sunysb.edu/seclab/pubs/norax.pdf", "https://doi.org/10.1109/SP.2017.30", "https://csaw.engineering.nyu.edu/application/files/5915/0825/7164/CSAW17_paper_83.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7dd98badf2d3accb703f0358761f2d86e5d85194", "sources": [ "DBLP" ], "title": "NORAX: Enabling Execute-Only Memory for COTS Binaries on AArch64", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "7df587e7efe8f35c3232b7729d9fbf25f5227b05": { "authors": [ { "ids": [ "2056584" ], "name": "Ruslan Mavlyutov" }, { "ids": [ "1692732" ], "name": "Carlo Curino" }, { "ids": [ "7821366" ], "name": "Boris Asipov" }, { "ids": [ "1680925" ], "name": "Philippe Cudr\u00e9-Mauroux" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithmic efficiency", "Big data", "Brute-force attack", "Curiously recurring template pattern", "Data infrastructure", "Jumpstart Our Business Startups Act", "Open research", "Petabyte" ], "id": "7df587e7efe8f35c3232b7729d9fbf25f5227b05", "inCitations": [ "08f13e484e7e51831ec13076d14570ced91a50fb", "2274f61d00020b0e596b61e113ed16f23f8c0403", "7c11b349296003d6406c10c96aa223cfa8f5f542" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0fb2ab7176f91e34061b128c86ef100401a1b037", "2009b3c05dd9084a0a1c609abcedd81713ed7150", "239ffe260e5a13c43d7131200a891194e94ff767", "0f5c9968fe2cdb0f52c55b2d5b3dec7accf91306", "53071eb97aec4ce20d6be5769063806c48b264b5", "0964ac250b81a2caa85dd172527f07a9ffc8230b", "1359d01962b882c95607a75aeafeb532188cb159", "1df05b37ab38851a7537f5a7d1cc31d60ab819dd", "063227819c189992e1c579c8a5b9a68d2b8e50a4", "6857346e57e49509dd6d8bf9f95d2dcca378903c", "35339f6f2e99c04920f21883df1db8004436cdc7", "4dfdd7cd8abbd68675ea19c5902e5a7d14709799", "64a098e7de5b3200ec7513d068bf5760871025f8" ], "paperAbstract": "In this paper, we predict the rise of Dependency-Driven Analytics (DDA), a new class of data analytics designed to cope with growing volumes of unstructured data. DDA drastically reduces the cognitive burden of data analysis by systematically leveraging a compact dependency graph derived from the raw data. The computational cost associated with the analysis is also reduced substantially, as the graph acts as an index for commonly accessed data items. We built a system supporting DDA using off-the-shelf Big Data and graph DB technologies, and deployed it in production at Microsoft to support the analysis of the exhaust of our Big Data infrastructure producing petabytes of system logs daily. The dependency graph in this setting captures lineage information among jobs and files and is used to guide the analysis of telemetry data. We qualitatively discuss the improvement over the brute-force analytics our users used to performed by considering a series of practical applications, including: job auditing and compliance, automated SLO extraction of recurring tasks, and global job ranking. We conclude by discussing the shortcomings of our current implementation and by presenting some of the open research challenges for Dependency-Driven Analytics that we plan to tackle next.", "pdfUrls": [ "https://exascale.info/assets/pdf/cidr2017_dependency-driven-analytics.pdf", "http://cidrdb.org/cidr2017/papers/p59-mavlyutov-cidr17.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2016/10/Provenance-2016-TR.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a815/35434090ee0d8d1fa160f5b639b3fb5077cb.pdf", "s2Url": "https://semanticscholar.org/paper/7df587e7efe8f35c3232b7729d9fbf25f5227b05", "sources": [ "DBLP" ], "title": "Dependency-Driven Analytics: A Compass for Uncharted Data Oceans", "venue": "CIDR", "year": 2017 }, "7dfd6cbb18a8c14b10dba08a7323b82583596eb6": { "authors": [ { "ids": [ "2635891" ], "name": "Lucas Waye" }, { "ids": [ "1985762" ], "name": "Pablo Buiras" }, { "ids": [ "2208884" ], "name": "Owen Arden" }, { "ids": [ "1761830" ], "name": "Alejandro Russo" }, { "ids": [ "34674218" ], "name": "Stephen Chong" } ], "doi": "10.1145/3133956.3134036", "doiUrl": "https://doi.org/10.1145/3133956.3134036", "entities": [ "Confidentiality", "Cryptography", "Information flow", "Information flow (information theory)", "Non-interference (security)", "Programming language" ], "id": "7dfd6cbb18a8c14b10dba08a7323b82583596eb6", "inCitations": [], "journalName": "", "journalPages": "1893-1907", "journalVolume": "", "outCitations": [ "7230f80b75c774b878c70de2290ceea5d624d353", "74c378daae64645ae3bde445908e053a147686a5", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "627f4311a61f0601e2e484e1526b91825ae7c171", "090145f00e1db235c5b6762b300bd6295ef7e582", "0d2b12d9ffc0bd889fe632721f58a7c6152a0efc", "2342738aab04922f8e5128a4ea0b3e4d387d22e4", "0f35529ec8233263a7242110d7a940bbb2c41bf8", "4b23b5946ae3027c7b13a3fa20102641596def40", "0070255ef5a64ee13e3460c55ef68ab650fd4f75", "1cf87af22b3b4dd0ff1144d861e0573121d8de2e", "612abb8a9bb99f981e4f287cea8f656cb2d4cd8a", "84ced55aa48d1caaba050e59943db60d54ed5a07", "03c506f2952aae528039e9108a35a13243dfcc8b", "88397fcb33165c16b6d6ba91bbcde41304fa7ec1", "008a307e2531033fbd4435cb57972f195bd40e58", "e6370f30fb09a5606c70c12b8c909969baed3fed", "600ce036648f6d92b2a4aa7ac5e20407afba7e9b", "a30f58f9c7c2c96ec8569cf6cfb7eaf71490bb12", "75dabcc0ad68c5cd24f727414ff465ee204ed407", "2668a13443ce950199a1f7219b83024a6fa41ac3", "2178c148c525dad8ad9e2d666fd5771f6f74ac01", "32f33b2b0801d70a46f23fa719a3e507586cff85", "11a68b5de90fc3f0b56f1acdfe688b91eff1b1ba", "59928e58a7f2f898346618b4fba512fe9157a7b1", "a18c51ae4419a12b884b0fbf520b2ffc713f12c1", "2fedd16fcfdd6361f23b66a2b572f02856ed28b6", "08f3d15318d657323136cb185805876f608325b5", "9e7f61ba4926be2c6185fdac3acc76cef30b4739", "6db178ae20979e4a1c45c8993efd3f44a2bf3df2", "0888c688ff388897ab5dd8574e0496c07bdb78cc", "bf83639acb00f5169a834d0585c826c9bfe6f8ec", "31b73eaf5f3f36323f7373b38c550f0f74d520e2", "6678d04a83e18d8937ab666a82619a8354d9d4e9", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "4d04f238b965f7d367ff2b6cf18a40a05742d3d2", "389f55c5c376db4ce1c88161dca98c329614faa8", "4be77d8e77c95deac350e757220c45c051ace4f5", "29e45dd3c4a986f2b2d8036994e693ec332752d8", "29711dc4c5ddbebcb09b1409289d856ab12e1bf4", "48b9b7893d90b54845e116364b277af5f071be03", "09e45129e1997d0d074cebc87759079d79a6cd1c", "0eed9ea4963fcd4cd7384085b7c4804810d948d5", "52bf4f178bfa5ece1d779efa6233dd4a06bf936b", "1909d80df329a636fd91990e5c7f33a4f5ef482b", "2b6df21137f30d25494bb58521a6062f93e915f8", "109ee1e1d4f4a9595a78d1edb38862cdfc3b08bf", "2baf1c8a26ad2fe930ae79712eb08913107d77fe", "5c79e3282d45f3836c60f89b93ff2512c507dd7b", "5f4268edf3d28aacfe928af6719cdd3082207a5e", "545476e378e77049052d0b2a7b43f35b9bfb93be", "6b83d668c5006615d871329107627dc4e5cb3ea1", "a89018b5e1bd0866922842ff33952b7d04ecc2ec" ], "paperAbstract": "We present Clio, an information flow control (IFC) system that transparently incorporates cryptography to enforce confidentiality and integrity policies on untrusted storage. Clio insulates developers from explicitly manipulating keys and cryptographic primitives by leveraging the policy language of the IFC system to automatically use the appropriate keys and correct cryptographic operations. We prove that Clio is secure with a novel proof technique that is based on a proof style from cryptography together with standard programming languages results. We present a prototype Clio implementation and a case study that demonstrates Clio's practicality.", "pdfUrls": [ "http://arxiv.org/abs/1708.08895", "https://users.soe.ucsc.edu/~owen/publications/pdfs/clio_ccs17.pdf", "http://lucaswaye.com/papers/clio-ccs17.pdf", "https://arxiv.org/pdf/1708.08895v1.pdf", "http://doi.acm.org/10.1145/3133956.3134036" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7dfd6cbb18a8c14b10dba08a7323b82583596eb6", "sources": [ "DBLP" ], "title": "Cryptographically Secure Information Flow Control on Key-Value Stores", "venue": "CCS", "year": 2017 }, "7e2920998cf5070b89b4ba86f4d902746a41c36e": { "authors": [ { "ids": [ "1730909" ], "name": "Jeremy S. Logan" }, { "ids": [ "32485139" ], "name": "Jong Youl Choi" }, { "ids": [ "4003076" ], "name": "Matthew Wolf" }, { "ids": [ "1781276" ], "name": "George Ostrouchov" }, { "ids": [ "39341300" ], "name": "Lipeng Wan" }, { "ids": [ "1734819" ], "name": "Norbert Podhorszki" }, { "ids": [ "40513398" ], "name": "William Godoy" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "38949055" ], "name": "Erich Lohrmann" }, { "ids": [ "2315880" ], "name": "Greg Eisenhauer" }, { "ids": [ "39117045" ], "name": "Chad Wood" }, { "ids": [ "1754335" ], "name": "Kevin A. Huck" } ], "doi": "10.1109/CLUSTER.2017.30", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.30", "entities": [ "Domain-specific language", "HPC Challenge Benchmark", "Memory hierarchy", "Middleware", "Program optimization" ], "id": "7e2920998cf5070b89b4ba86f4d902746a41c36e", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "563-571", "journalVolume": "", "outCitations": [ "a9efb0d5dde11d1b58602b2fc97b64d8caeebaf0", "64d4f6759b32697e6cbebf901624c93c0a0c1744", "4aa70e2060966dfb90d9073526f23f101191d7a7", "139ddc4090cbbfaa0bf8721916336605c0551f11", "1a01d6c6fe6af72880865f6061ccaddf9893371b", "589e89d77f689ebfc3f36bc1f76fd518ae4a237c", "3475f3916ef2a424c0945e329fae80d38e05c0a4", "33f73088b95e3d6eed31e8ea9048b1a373f0bd75", "5d25b4a77268437aa669e272cc81b56ed184e0b6", "5121837e40f54742fbd26503c7ca76e68ced467a", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "7777d299e7b4217fc4b80234994b5a68b3031199", "1b5f6f1ba2f54d339b20dda3bcf5ecd90b878940", "32d54e4c8a18f3e0e32df291fe00f60a05573258", "3273a1140a0e57508203c366f053e9c3153812ab", "81e64ec92c0dfe8e93b77e22155e3fbd637d5dce", "cb0af764ccf08ee29934d704ff03be338289e1a6", "7717cb7fbbf26557238c2ef847d0a48def176d0b", "66b8e04aa277b17daaa1c2d1929d6f0714a5b968", "0d98b995638e1aa0de2f4a66cb727b05fea99b89", "093fc19d440f33247e545ec6c047e0aa0afb0863", "adfa760f21654a2a508b4b6646314010e1eeb039", "83f2087f3c602d043277927380e35885879210f5", "7af6c88b8ac93a14b324a17e8417f2c75dab58f5" ], "paperAbstract": "As the memory and storage hierarchy get deeper and more complex, it is important to have new benchmarks and evaluation tools that allow us to explore the emerging middleware solutions to use this hierarchy. Skel is a tool aimed at automating and refining this process of studying HPC I/O performance. It works by generating application I/O kernel/benchmarks as determined by a domain-specific model. This paper provides some techniques for extending Skel to address new situations and to answer new research questions. For example, we document use cases as diverse as using Skel to troubleshoot I/O performance issues for remote users, refining an I/O system model, and facilitating the development and testing of a mechanism for runtime monitoring and performance analytics. We also discuss data oriented extensions to Skel to support the study of compression techniques for Exascale scientific data management.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.30" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e2920998cf5070b89b4ba86f4d902746a41c36e", "sources": [ "DBLP" ], "title": "Extending Skel to Support the Development and Optimization of Next Generation I/O Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "7e32e068c7471c5dfd139c8a563e4644dd3d54eb": { "authors": [ { "ids": [ "14043523" ], "name": "Melissa Chase" }, { "ids": [ "3151466" ], "name": "David Derler" }, { "ids": [ "2642595" ], "name": "Steven Goldfeder" }, { "ids": [ "34593606" ], "name": "Claudio Orlandi" }, { "ids": [ "8206995" ], "name": "Sebastian Ramacher" }, { "ids": [ "2747546" ], "name": "Christian Rechberger" }, { "ids": [ "2773945" ], "name": "Daniel Slamanig" }, { "ids": [ "1774608" ], "name": "Gregory M. Zaverucha" } ], "doi": "10.1145/3133956.3133997", "doiUrl": "https://doi.org/10.1145/3133956.3133997", "entities": [ "Algorithmic efficiency", "Benchmark (computing)", "Cipher", "Digital signature", "Interactivity", "Key (cryptography)", "Non-interactive zero-knowledge proof", "One-way function", "Post-quantum cryptography", "Public-key cryptography", "Quantum", "Quantum mechanics", "Random oracle", "S transform", "Symmetric-key algorithm", "Type signature", "Zero", "Zero-knowledge proof" ], "id": "7e32e068c7471c5dfd139c8a563e4644dd3d54eb", "inCitations": [ "6db9824d4667b22310c51fe638403238f873e9f2", "2318052be839a9d6c3ebeaf097f571afed135525", "133eba30fbd96f0551d692c76f4c851d4d2f9f27", "7cf2491021da0f120845cf24fba23bc43ec692ad", "2c4cc18223fec4b06cb8ea50dae1e6b2ebce0971", "2f7b4ee46d284664fd1a4a679d1e610e2954ca8b", "80621d09c3d3dd896c7e2bff083b9e702dc2ed29" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "279", "journalVolume": "2017", "outCitations": [ "3cdae223c942c0ce876e92890849ceaa85752a6c", "068f5d6def4a5e28fd57d1b6672b598dc22e75a9", "020ff6532a58ee426217cf4e75ed64af4658bd6a", "3d67ccdbcdda14a7fee21261884cff4bd9ca966f", "4ff0de33c6c9e63055413f0ec3e8b75e1a23ac45", "f67989d0b37d10543a6352eb7173621568d32557", "0da99da07bfac01a7daa38ac7742e6c539261e3b", "a5048bfa4f2a7ff43c4ca38c03788ced98010fd3", "710c227f1da46ba3cfc3ff323af1e7fe237ce61b", "696c7e456d94e1af6a2ebf127520023774e7cfac", "743d648bbea4d75ae189ef8b4a4aa0b00218b1ce", "1144078fe05a113c02d068962be9d17d0f2b9e53", "679f0e91fa2ec7579aa0fd667191ccb45ecf86b7", "26e47313c3aa208026a4e9a92e76e53aa7fc8205", "f69b2781cfcccf5c6803b5ba0e7f9180650bd2a7", "394257e099e25217b57e3527e3d00e9411ef1872", "00ae47997f1770f738e4d7bfb6917eaac89c18ce", "59f68160ff43ac19b318639e879d4b1372f42caa", "79d47283082099b16f8a501bf4a397334b603442", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "740bdeb418fbf60a87cb92edf3cd9ef77112d922", "0df5aebb4563cbcf1d9b02cec4742aea3bea78c2", "79a4259799db4239ddedf8b0b7dde1042e21ec80", "3ad556181c59e22dd8138e43102b7daab7b1546f", "b0cdfd144c83a6f4c3966e1b071f3f6f5f579e5c", "20d1c524fd1557e76a687ffab57f96364beb6fe7", "1249b6c557fbe528329392e0ec4abcd67d1d975d", "16d916b69aa99a32d999068a376a5b4e96bfbd07", "ae6530e9c04f67f00027bb0705071cbf1d1f8198", "b26de63ff444be172e48c05aae7dd01e1e975c91", "8c43726d7229a6dea25d3f510ca4b8e681117b60", "0c272cc1fe6f3613ba8dba35d377aaacd7ef44fd", "ed1de67a614f64c629127c60f0793aa036557ed2", "c6de221f652e0978d2d1890f4542486da1c61c3a", "d94ba52bd05fe12926836c3332e2fd895e78ed30", "4284153a0bf0aa3d0f94ad3113f4d117e4767bef", "444e66dc4dea22b2e61203ed237d771feff93457", "88af46daa6ee6e09a9e7fc17fccf509e589fa183", "3a36dd56044a0924db09d8e90bce2b5324df905f", "262650ebd04f86b919f84b80d18b748bd0d1ba12", "026848a58faaa17a393a50d703257cc2496659ad", "4cac89a16695cf2c876f55ee19b0f1c3261f4d93", "f0ba66072ac10d9898b8a79171ec726d45ec804b", "13c26e30558f739815bcd64d021733952aaa8dd0", "c5cfd7772efd1ece94837828d17f66732e48f7d2", "017ef8514a1bdc1fa0dbff8deb0d3342c85a84a2", "9c9dc6e15bedebf021415fa28e1c4571b6ac0083", "f7bdf9eb72a5de39579ab44886defe0116af6397", "3624eaf7c2f05612429ed1579cc82102c11f4d65", "0e7276629b06ed57fb240cfc1c9bda19145f4663", "c3ab907bacb9426a0ff79151bded4aacb61b21cf", "4036e9eeaf14c4ef3c386debcdc59164efe53b00", "17ab1207ba042433064befb730f465ffa31b8121", "43fb8926f0fce498351c442cc49beab0e6f7151f", "1b6c1efb9725a3ba0b88a22bf048b2b207898b44", "9eff11dca715a931d40cafd8c61673d8b1faf08a", "092ec581fdeb47176065592a2706ddf574efdc29", "b9f07d4251f12464c0f4cdd06b8e4ec6a091e592", "4162b084682391b0a328f470f40d0f8f4aff13fb", "17e432a1b350f4db7b4d1bf146a61110f64a5a44", "4dc3dc96281fee8c676578546c98c295219aa7fc", "101bb77cb2c8b2bfcde41973ac3473db325d7e6d", "1d02b4a41f21e94d5530bcb2df72fbeea5d310e0", "d02395e591790fec4d9cf7ae293b12faf806942f", "069e80c561872ed678c1b2adf10d75ce66e4ff78", "0f84edc6778b53deb2be9a8b6b6fcafd3f8a9c09", "be08d4216f66b27d578e0dfc48573495d41934e9", "1792b1b298148d5dd628dec7769be8c1a460be6a", "3a11477d546d5cdd921732766526f6b4a65c42f1", "90f26c0f0d04b9c4999b454c35ee1c7603ca9e4b", "d4673ffd554b2233345c2a931903ecbdaf32d8bb", "03347b06cc7639533d1da36003a8a658accf3f9d", "1b4a4a28e3fd5c1712f538b8ef5f0a2dfb6f0601", "0cf7b49040a2027044f2bdc2b3c9ca7a974f17b4", "b1a0bc84c8b00ac071ad9fba74f36d6f295106ad" ], "paperAbstract": "We propose a new class of post-quantum digital signature schemes that: (a) derive their security entirely from the security of symmetric-key primitives, believed to be quantum-secure, and (b) have extremely small keypairs, and, (c) are highly parameterizable.\n In our signature constructions, the public key is an image y=f(x) of a one-way function f and secret key x. A signature is a non-interactive zero-knowledge proof of x, that incorporates a message to be signed. For this proof, we leverage recent progress of Giacomelli et al. (USENIX'16) in constructing an efficient Σ-protocol for statements over general circuits. We improve this Σ-protocol to reduce proof sizes by a factor of two, at no additional computational cost. While this is of independent interest as it yields more compact proofs for any circuit, it also decreases our signature sizes.\n We consider two possibilities to make the proof non-interactive: the Fiat-Shamir transform and Unruh's transform (EUROCRYPT'12, '15,'16). The former has smaller signatures, while the latter has a security analysis in the quantum-accessible random oracle model. By customizing Unruh's transform to our application, the overhead is reduced to 1.6x when compared to the Fiat-Shamir transform, which does not have a rigorous post-quantum security analysis.\n We implement and benchmark both approaches and explore the possible choice of f, taking advantage of the recent trend to strive for practical symmetric ciphers with a particularly low number of multiplications and end up using Low MC (EUROCRYPT'15).", "pdfUrls": [ "https://eprint.iacr.org/2017/279.pdf", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/04/eprint-CDGORRSZ17-279.pdf", "http://doi.acm.org/10.1145/3133956.3133997", "http://ramacher.at/_static/talks/cryptosymposium.pdf", "http://eprint.iacr.org/2017/279" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e32e068c7471c5dfd139c8a563e4644dd3d54eb", "sources": [ "DBLP" ], "title": "Post-Quantum Zero-Knowledge and Signatures from Symmetric-Key Primitives", "venue": "CCS", "year": 2017 }, "7e3fdbbad04a39b2f44436287668f1a682e26ab0": { "authors": [ { "ids": [ "1746771" ], "name": "Gokcen Kestor" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" }, { "ids": [ "33955807" ], "name": "Wenjing Ma" } ], "doi": "10.1109/IPDPS.2017.75", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.75", "entities": [ "Algorithm", "Fail-stop", "Failure rate", "Overhead (computing)", "Scheduling (computing)", "Work stealing" ], "id": "7e3fdbbad04a39b2f44436287668f1a682e26ab0", "inCitations": [ "1130a13b74e11b99d5233dce7f157d54cfea4ed1", "b3811891b083090da650459bcee017594d794389", "36f83f372ec018a79ad563e5a78f3cf7f4bad292" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "397-408", "journalVolume": "", "outCitations": [ "a8171f306b021d52d9af9f6f474748ef75492210", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "dc12b7531e42ffed5ce81398388cf04cb4ca539c", "029d525dd48347fa4b8a48dbf4b41b4b37199a6a", "455d253c61379bce5626fba8ef9897d3ac1307dc", "03fb875d5022a5e98f19c271e2403232acc55318", "345ff2f19178c983f2742b1f3198fa045cca2121", "01d62cd850496455ce1616500f491690effa5c98", "8a317f50aadff9853a9796a2235d11b1471af7f3", "34f310dffd51a8f1585b0a6a5ccaf83094d0d663", "0f6a32792d0882db35fe9391445d4322232b619e", "81b7a5dc0e03d1f1a0e01fde3af8684f610db591", "41cb3498cc12fbf2145e023e43f4d0b220e6a2f9", "ee8ee09169e9fb633c86e6ec3ff4c9bd4585eb66", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "0ab6d7acaf684285fc256fff262f3e185b266c6c", "504986a7de84bc763bcce07c70ec7527b9ffe494", "700534b13da5b314e9735ad5fb3ae5fe543fa2f3", "2a88cb605d1fbc7dfa15aae9041c69bf03be85a1", "0df37799cedef8c3625cc554aee51e65cbcedd51", "238c566910b81c09e18b5b6812d067420500d9d9", "1a661e3daa3b5f1e04df994ca3afcf7a258bdaa6", "6e1ceacdaa0c979cf52af5f478cc2a9891e2b6a0", "292919bc42727aea7d8e22671b7d624874022854", "a0b51ccf50b61d58047aa73e3fb33df8bde410e9", "19df5d05b6da98f99619ee4584c5177bd02c8a2a" ], "paperAbstract": "Nested fork-join programs scheduled using work stealing can automatically balance load and adapt to changes in the execution environment. In this paper, we design an approach to efficiently recover from faults encountered by these programs. Specifically, we focus on localized recovery of the task space in the presence of fail-stop failures. We present an approach to efficiently track, under work stealing, the relationships between the work executed by various threads. This information is used to identify and schedule the tasks to be re-executed without interfering with normal task execution. The algorithm precisely computes the work lost, incurs minimal re-execution overhead, and can recover from an arbitrary number of failures. Experimental evaluation demonstrates low overheads in the absence of failures, recovery overheads on the same order as the lost work, and much lower recovery costs than alternative strategies.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.75" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e3fdbbad04a39b2f44436287668f1a682e26ab0", "sources": [ "DBLP" ], "title": "Localized Fault Recovery for Nested Fork-Join Programs", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "7e4c7ed7c772ef43eda726afe68c25cb2e2357f3": { "authors": [ { "ids": [ "6100194" ], "name": "Ricardo Koller" }, { "ids": [ "30904419" ], "name": "Dan Williams" } ], "doi": "10.1145/3102980.3103008", "doiUrl": "https://doi.org/10.1145/3102980.3103008", "entities": [ "Battle of Midway", "Cloud computing", "Kernel (operating system)", "Linux", "Linux", "Multitenancy", "Operating system", "Peer-to-peer", "Serverless computing", "The Daily Dot", "Throughput" ], "id": "7e4c7ed7c772ef43eda726afe68c25cb2e2357f3", "inCitations": [ "33442a58af2978e0d8af8c513530474f8bed6109", "2b24b48418ea265d1666cac509e8634bba2e5461", "0657ec025e7097d06a798a4eb2f9253fdaef3e68" ], "journalName": "", "journalPages": "169-173", "journalVolume": "", "outCitations": [ "225603198cc415d363db8a8a2bd30b0df3c963b1", "6d496d510f867274473a01dcb0a1a7bf45d0904f", "9747efb725620e0767597b923f174057f7d9e82b", "22fd20f23c40ecb9044cae7ee58b76d39fcf45b6", "184c5be1a8931fa88b7da10448b4c2fb58f4e150", "5cfc936d12bbd8a0f100687b12b20e406215f30a", "aa931bfc67b3e7b56671e14facdfe7a85d26992a", "08832863bc3f041222f381c8ae143f8a66449059", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "f0b6e6d3da2853e3a0ea8b11ca68f5ef59ea3b93" ], "paperAbstract": "From the inception of the cloud, running multi-tenant workloads has put strain on the Linux kernel's abstractions. After years of having its abstractions bypassed via virtualization, the kernel has responded with a native container abstraction that is eagerly being applied in the cloud. In this paper, we point out that history is repeating itself: with the introduction of serverless computing, even the native container abstraction is ill-suited. We show that bypassing the kernel with unikernels can yield at least a factor of 6 better latency and throughput. Facing a more complex kernel than ever and a relatively undemanding computing model, we must revisit the question of whether the kernel should try to adapt, we should continue bypassing the kernel, or if it is finally time to try a new native OS for this important future cloud workload.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103008", "https://www.sigops.org/hotos/hotos17/papers/hotos17-final99.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e4c7ed7c772ef43eda726afe68c25cb2e2357f3", "sources": [ "DBLP" ], "title": "Will Serverless End the Dominance of Linux in the Cloud?", "venue": "HotOS", "year": 2017 }, "7e635fe196a1ee9154349d66685e9b07fd39dc43": { "authors": [ { "ids": [ "21145493" ], "name": "Zecheng He" }, { "ids": [ "1791359" ], "name": "Ruby B. Lee" } ], "doi": "10.1145/3123939.3124546", "doiUrl": "https://doi.org/10.1145/3123939.3124546", "entities": [ "Cache (computing)", "Computer", "Computer security", "Information flow (information theory)", "Information security", "Side-channel attack", "Simulation" ], "id": "7e635fe196a1ee9154349d66685e9b07fd39dc43", "inCitations": [], "journalName": "", "journalPages": "341-353", "journalVolume": "", "outCitations": [ "96ba6f5c06850c009e5b77094c0d4532744dedc2", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "2294d7eb67a46c8f8b7a802d4711554d347cbefc", "4682c1fe28fd25a7f989535190e5c57cd8aa2667", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "52c2c050af5b32d4929b4b193967a3675d03aea0", "0e5aded3f3a38a39882062e6204f1d672f797eb1", "5000601ff7fa91306bddb13b8da9f787cdbf5c2a", "b2fe67109faf9da943ded021b0cebaef97107ff9", "3b7e821532a852d27eacd89bcaa869a6263eb144", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "3401c236126f4cd50a988bb36949e40519e724dc", "4d67f221c595dbfc448e49b1b6e6bf9bfed40f7b", "934e8d76376f6c78a8b89ef2304f01a8e7099401", "bf5de374c5dd7011295ea5884ae5f3120df7214a", "12fbc18477afc5dec741a7ad630feaf8eda777f4", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "c409e33e6f6b1e37632d6c0b0d5786a530392a29", "0a36afb6a8bd3fbf1267134e84a27845bbdc6aa2", "c57be0b654d6ca624dd08aa49842db5cb35b127d", "89de1d99430a6adc28b4b65da9c769d72253ff9c", "9d63945e8e83b24cdc9d281536a159b957402e65", "fe4e9863bf994fc5e98b90b10a9bdd7258f66a81", "1481ee2e4cbf51d6fdd2fd411d3405a118d037d4", "656e782fe23364e36a49aeef2d8a74126a38ea04", "16e334b961341a783eaad96891f4bdbb44b98667", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "0494ff3ac38ba7f29be90c945e9fb11c76a49dbf", "57a41bdc3d1d51c0422287bbbce371f01ad42c19", "29621133de70a2769470c13a2d9c27d3a5ed9587", "c0c14c16813f0083b9e3bf602746a8be1270996a", "11c6fddeff9e2f95c8cf238ea9f12f8ffae7cf8c", "027c0969d21de0d52af6c8c7e8d63f12245382ae", "f474f64fa3302b1dcc66e7c2c0961fffac0109b0", "3f88373acaba2214a4462e58ee8951b66f113615" ], "paperAbstract": "Security-critical data can leak through very unexpected side channels, making side-channel attacks very dangerous threats to information security. Of these, cache-based side-channel attacks are some of the most problematic. This is because caches are essential for the performance of modern computers, but an intrinsic property of all caches - the different access times for cache hits and misses - is the property exploited to leak information in time-based cache side-channel attacks. Recently, different secure cache architectures have been proposed to defend against these attacks. However, we do not have a reliable method for evaluating a cache's resilience against different classes of cache side-channel attacks, which is the goal of this paper.\n We first propose a novel probabilistic information flow graph (PIFG) to model the interaction between the victim program, the attacker program and the cache architecture. From this model, we derive a new metric, the Probability of Attack Success (PAS), which gives a quantitative measure for evaluating a cache's resilience against a given class of cache side-channel attacks. We show the generality of our model and metric by applying them to evaluate nine different cache architectures against all four classes of cache side-channel attacks. Our new methodology, model and metric can help verify the security provided by different proposed secure cache architectures, and compare them in terms of their resilience to cache side-channel attacks, without the need for simulation or taping out a chip.", "pdfUrls": [ "http://palms.ee.princeton.edu/system/files/Micro-camera-ready-final.pdf", "http://doi.acm.org/10.1145/3123939.3124546" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e635fe196a1ee9154349d66685e9b07fd39dc43", "sources": [ "DBLP" ], "title": "How secure is your cache against side-channel attacks?", "venue": "MICRO", "year": 2017 }, "7e7eca04d0e44563b5278189be53ebdf7714e85c": { "authors": [ { "ids": [ "1688105" ], "name": "Jie Huang" }, { "ids": [ "1956037" ], "name": "Oliver Schranz" }, { "ids": [ "2214494" ], "name": "Sven Bugiel" }, { "ids": [ "1749517" ], "name": "Michael Backes" } ], "doi": "10.1145/3133956.3134064", "doiUrl": "https://doi.org/10.1145/3133956.3134064", "entities": [ "Android", "Compartmentalization (information security)", "Compile time", "Compiler", "Firmware", "Library", "Monetization", "Play Store", "Privilege separation", "Sandbox (computer security)", "Software development kit" ], "id": "7e7eca04d0e44563b5278189be53ebdf7714e85c", "inCitations": [], "journalName": "", "journalPages": "1037-1049", "journalVolume": "", "outCitations": [ "7697637258eafd0529febd59cd921f1196b357b5", "f32b96ab4853031295e71c868ae2654a89aba321", "ba9aea6a06ffee76dc8b974c751cd2102c747a91", "bb4bffbc7507b82adb0a5035e78b5639a7df1b56", "9ffc6ab3ef9fc73e502e562f96b4a0516912b915", "914dbdd8f539bdb054c257fb046238d56727c0f1", "a0b9645813181e0e3e048353f628ca32a9a461aa", "3dc6ab6c6187a809d9843fe6a164589dc63cccfb", "4701edf14bb55a3865a335901310694162b19da8", "0aa2244ecb601734c1bb22c1bdad152292a19be4", "3f8851fae74e2ee53ab0ad84c83d24756eaaebcc", "9061a3802910b71cf5d840473d7b9989649af94a", "847fd4428705785972bbf0d3be9575ba9a36f516", "16d53aec6fe8f4fb08a956208cfec4cabfa6ece1", "77462b767a378aa6207cbff5b100379fe8a55f6b", "07720ce6c546695fd39110b11bcaf5182dc5a66c", "432ec065b07e59d55b7be30d9d3436b13332c47a", "9a5c5e7f30c1db4aa91b55829e8fe1669213f65e", "642e0646013dadd1f8f49f88901a109cdb6f2984", "6d59f58f7408362036196048c9ba11f399dd9bc2", "16acd56ff683614e8f08ea95d63d5f28b0a01cba", "130633f0653e6ad5766144299aa17938e7a5fca2", "9e5db350ba34f2b4c662cdea7acb6e906484ada9", "915e3f65bdc5d46a7e1b1bd6d79337c8b00fe02c", "ea855ef23af8b0f52634be6544534de3a9671b3d", "0c364deb5f061d17dfa86e0ede98bb0077e3ee16", "f924c4f1b7fa1d160e6f769c84c015f24853ad4b", "2ec14bc3f03861e750f054727369dd0f9933eef6", "0530d1f0d2599be2e274c53c9e39c924615c2d6d", "746c828289ea54423209b9b1afd9ba7d8f14055b" ], "paperAbstract": "Third-party libraries are commonly used by app developers for alleviating the development efforts and for monetizing their apps. On Android, the host app and its third-party libraries reside in the same sandbox and share all privileges awarded to the host app by the user, putting the users' privacy at risk of intrusions by third-party libraries. In this paper, we introduce a new privilege separation approach for third-party libraries on stock Android. Our solution partitions Android applications at compile-time into isolated, privilege-separated compartments for the host app and the included third-party libraries. A particular benefit of our approach is that it leverages compiler-based instrumentation available on stock Android versions and thus abstains from modification of the SDK, the app bytecode, or the device firmware. A particular challenge for separating libraries from their host apps is the reconstruction of the communication channels and the preservation of visual fidelity between the now separated app and its libraries. We solve this challenge through new IPC-based protocols to synchronize layout and lifecycle management between different sandboxes. Finally, we demonstrate the efficiency and effectiveness of our solution by applying it to real world apps from the Google Play Store that contain advertisements.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134064" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7e7eca04d0e44563b5278189be53ebdf7714e85c", "sources": [ "DBLP" ], "title": "The ART of App Compartmentalization: Compiler-based Library Privilege Separation on Stock Android", "venue": "CCS", "year": 2017 }, "7ef478cd2ebac26e73a0411c46eb7afe3a60a9bb": { "authors": [ { "ids": [ "1718428" ], "name": "Jiawei Zhang" }, { "ids": [ "3343330" ], "name": "Congying Xia" }, { "ids": [ "2418496" ], "name": "Chenwei Zhang" }, { "ids": [ "3122003" ], "name": "Limeng Cui" }, { "ids": [ "2274395" ], "name": "Yanjie Fu" }, { "ids": [ "1703117" ], "name": "Philip S. Yu" } ], "doi": "10.1109/ICDM.2017.70", "doiUrl": "https://doi.org/10.1109/ICDM.2017.70", "entities": [ "Algorithm", "Autoencoder", "BL (logic)", "Centrality", "Complex network", "Computer data storage", "Direct Internet Message Encapsulation", "Experiment", "Feature vector", "Machine learning", "Social network", "Sparse matrix", "Web application" ], "id": "7ef478cd2ebac26e73a0411c46eb7afe3a60a9bb", "inCitations": [ "db68dccdac472cc6a445ba97a414044765e55656", "7c28b81dff1899e5a148ff57888faacc9945ab22" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "605-614", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "81c88f962ea46c856f289bb50f79f40e2df815de", "13c40b32b9f35c8d24a5c00ec16a88382aaf07fe", "b1ee115765dde65bcc4789630aae1b729aa62fe1", "14f2bc1234ed1418790262f56488dc4447c78bc8", "61745e0a984e8fd9c45f545c3c83f7b99b110505", "b7b9196227cdf2f55bd5b6fac373c9127ccb7b57", "01385ee1a7bdc0a74085fb6271441faaeb9f32fd", "2a3f862199883ceff5e3c74126f0c80770653e05", "141e35263ab810983c90d47ad62eb4fab5e51717", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2", "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "aba09ce7859301e46a163d015a4ca4c5aba8c040", "6dfc1b3c75005393e0a179ea902b9f7beb12a58f", "ab208d4fe216c95d9ae43163c2337a277269f45e", "03cb609fcfce6c60cbe3eb0dd8254069bf6d7573", "244b062abd07764089e67dfbe5709bb92174535f", "2fa9c668ecf6204832f994307f527395fdd79ef4", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "e2e81a917c194e27261ee5e02a6c804191e63485", "10f272220cbca311c8379a72f9d5ba387b18c437", "89953feb5e3127c9997ae896fe0ccf81540661f1", "23682ad5e9977dc4c3485d937a80ec0199436b92", "994afdf0db0cb0456f4f76468380822c2f532726", "04b52c8230c3f9f4f4032b06458069d81c8f07b2", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "5c69418969d8d5a286306f1087108dcceb50d39e", "8edcc2d165e72e59d47eb994013b6a87da5af504", "08eeaae7108e35a9639ef750a75132d0c71b2dd1" ], "paperAbstract": "Network embedding aims at projecting the network data into a low-dimensional feature space, where the nodes are represented as a unique feature vector and network structure can be effectively preserved. In recent years, more and more online application service sites can be represented as massive and complex networks, which are extremely challenging for traditional machine learning algorithms to deal with. Effective embedding of the complex network data into low-dimension feature representation can both save data storage space and enable traditional machine learning algorithms applicable to handle the network data. Network embedding performance will degrade greatly if the networks are of a sparse structure, like the emerging networks with few connections. In this paper, we propose to learn the embedding representation for a target emerging network based on the broad learning setting, where the emerging network is aligned with other external mature networks at the same time. To solve the problem, a new embedding framework, namely "Deep alIgned autoencoder based eMbEdding" (DIME), is introduced in this paper. DIME handles the diverse link and attribute in a unified analytic based on broad learning, and introduces the multiple aligned attributed heterogeneous social network concept to model the network structure. A set of meta paths are introduced in the paper, which define various kinds of connections among users via the heterogeneous link and attribute information. The closeness among users in the networks are defined as the meta proximity scores, which will be fed into DIME to learn the embedding vectors of users in the emerging network. Extensive experiments have been done on real-world aligned social networks, which have demonstrated the effectiveness of DIME in learning the emerging network embedding vectors.", "pdfUrls": [ "https://arxiv.org/pdf/1711.09409v1.pdf", "http://arxiv.org/abs/1711.09409", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.70" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7ef478cd2ebac26e73a0411c46eb7afe3a60a9bb", "sources": [ "DBLP" ], "title": "BL-MNE: Emerging Heterogeneous Social Network Embedding Through Broad Learning with Aligned Autoencoder", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "7f00084278521e3e77f88a4d7559b13790e2c549": { "authors": [ { "ids": [ "2768173" ], "name": "Yiting Xia" }, { "ids": [ "3157401" ], "name": "Xiaoye Sun" }, { "ids": [ "1895237" ], "name": "Simbarashe Dzinamarira" }, { "ids": [ "7463928" ], "name": "Dingming Wu" }, { "ids": [ "40568048" ], "name": "Xin Sunny Huang" }, { "ids": [ "8051543" ], "name": "T. S. Eugene Ng" } ], "doi": "10.1145/3098822.3098837", "doiUrl": "https://doi.org/10.1145/3098822.3098837", "entities": [ "Apache Hadoop", "Clos network", "Data center", "Digital footprint", "Hierarchical control system", "Jumpstart Our Business Startups Act", "Network architecture", "Network topology", "Random graph", "Run time (program lifecycle phase)", "Simulation", "Testbed", "Throughput", "Tree network" ], "id": "7f00084278521e3e77f88a4d7559b13790e2c549", "inCitations": [ "98149e1123a528fe177ffe3bf7d5ef275220a798" ], "journalName": "", "journalPages": "295-308", "journalVolume": "", "outCitations": [ "7b5144c88098a183eb2f8395276b0be6196a442b", "0c44588bdcbb82c7183958abab5ebc89c0e650f4", "695dad4f57dcbcc0e0c5d1987b3c66cb7b8d196a", "08d410ea6f0c3934324467d809e2ea6ffc8a9a73", "177dee5388017f7119100bb283f946ad92722a6b", "288763b8420ef17baf2f0214cf283433fcb4a447", "caf5726018e4eeac01e86c824e41fe25c0bab059", "663e064469ad91e6bda345d216504b4c868f537b", "56f74c032ead21ab27727e77ff2043744e70f796", "fca01f072bddcafb4f97e6e778dd9c2e1221b477", "21c039e563ec0ca023a5b9c729e92a2fd611946a", "42e5e97272ad8728749f861ed7a920707e698778", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "6cef37401134e650bcf60748c2a8ead42af56b66", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "3f965097faf0cf310de82c41c992199646774287", "559e4671b87c3f76d3c485ebdaefe734323879f0", "76533f914cb2ff6c050b003cb20cb0c4ead8bb99", "bd1323c9312298a366c60ca75186293a3c21f51f", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "2bce718b77e8ff9c733f3b03b78a8a0246a864a7", "aa6a64afc25f48ad44e510d0055405836c8cc325", "534ee575a6b0c37e03d1dddb92493b57e9271298", "065c8bfcb45e8c342d26aa1855cf292f9a5cbeff", "8af2b5b01cdf781464c9d4ec5286bfaa8cdb9dc8", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9" ], "paperAbstract": "This paper promotes convertible data center network architectures, which can dynamically change the network topology to combine the benefits of multiple architectures. We propose the flat-tree prototype architecture as the first step to realize this concept. Flat-tree can be implemented as a Clos network and later be converted to approximate random graphs of different sizes, thus achieving both Clos-like implementation simplicity and random-graph-like transmission performance. We present the detailed design for the network architecture and the control system. Simulations using real data center traffic traces show that flat-tree is able to optimize various workloads with different topology options. We implement an example flat-tree network on a 20-switch 24-server testbed. The traffic reaches the maximal throughput in 2.5s after a topology change, proving the feasibility of converting topology at run time. The network core bandwidth is increased by 27.6% just by converting the topology from Clos to approximate random graph. This improvement can be translated into acceleration of applications as we observe reduced communication time in Spark and Hadoop jobs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098837", "https://www.cs.rice.edu/~eugeneng/papers/SIGCOMM17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7f00084278521e3e77f88a4d7559b13790e2c549", "sources": [ "DBLP" ], "title": "A Tale of Two Topologies: Exploring Convertible Data Center Network Architectures with Flat-tree", "venue": "SIGCOMM", "year": 2017 }, "7f078eecf62d783ebffbf686202ff71439e1df97": { "authors": [ { "ids": [ "2990124" ], "name": "Nico D\u00f6ttling" }, { "ids": [ "2561310" ], "name": "Satrajit Ghosh" }, { "ids": [ "1733482" ], "name": "Jesper Buus Nielsen" }, { "ids": [ "3205358" ], "name": "Tobias Nilges" }, { "ids": [ "28084654" ], "name": "Roberto Trifiletti" } ], "doi": "10.1145/3133956.3134024", "doiUrl": "https://doi.org/10.1145/3133956.3134024", "entities": [ "Arithmetic circuit complexity", "Black box", "Boolean circuit", "Communication complexity", "Computation", "Linear function", "Linear function (calculus)", "Oblivious transfer", "Polynomial", "Secure two-party computation", "Symposium on Theory of Computing", "Two-phase commit protocol" ], "id": "7f078eecf62d783ebffbf686202ff71439e1df97", "inCitations": [ "5fc9b11bfd1c57c733f4175e20676de410494589" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "790", "journalVolume": "2017", "outCitations": [ "23ec68ed03b485b645478a3f6905615617d905a6", "9888dff0c68e01d8d1ec5cef5033e3f3b896ea3d", "50b72015f192a1c4a6422b813fd3ace29b29d634", "e1d27dc62897a1a00a09756af7b16760650c12fa", "19de1229db1c2e62367a3d1459e24848064dfd02", "19c3736da5116e0e80a64db35afe421663c4b4a8", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "04948723dec0e6724777ee56f0d10168cce44921", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "58bb949ed93e2f21288638508e8818e58329cc41", "6f70632a51dd43b8a37d95051cbdb5e9bb02b1ec", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "a797a0346e106e0d1d1d2db778aa509031c7bf8c", "1db2265e3ce510fee6d4d9b39c135bddb4040949", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "129db5ec39a453ea53c94ad529cf13dccafe4167", "42333e3f231bbfe508f6da6bad2feff9ae223113", "470d8902d1e250923e9fb0289b484cc9b2149abb", "245a28316264b23073a144278bd46fc4f7ef9852", "e1a0dd0b0b193d87b83bc970a5a81387ea951a00", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "6cba388f07e05f39aebd6da74d860584d642cd4e" ], "paperAbstract": "We introduce a new approach to actively secure two-party computation based on so-called oblivious linear function evaluation (OLE), a natural generalisation of oblivious transfer (OT) and a special case of the notion of oblivious polynomial evaluation introduced by Naor and Pinkas at STOC 1999. OLE works over a finite field F. In an OLE the sender inputs two field elements a ƒ F and b ƒ F, and the receiver inputs a field element x ∈ F and learns only ƒx) = ax + b. Our protocol can evaluate an arithmetic circuit over a finite field F given black-box access to OLE for F. The protocol is unconditionally secure and consumes only a constant number of OLEs per multiplication gate. An OLE over a field F of size O(2κ) be implemented with communication O(κ). This gives a protocol with communication complexity O(C κ) for large enough fields, where C is an arithmetic circuit computing the desired function.\n This asymptotically matches the best previous protocols, but our protocol at the same time obtains significantly smaller constants hidden by the big-O notation, yielding a highly practical protocol. Conceptually our techniques lift the techniques for basing practical actively secure 2PC of Boolean circuits on OT introduced under the name TinyOT by Nielsen, Nordholt, Orlandi and Burra at Crypto 2012 to the arithmetic setting. In doing so we develop several novel techniques for generating various flavours of OLE and combining these.\n We believe that the efficiency of our protocols, both in asymptotic and practical terms, establishes OLE and its variants as an important foundation for efficient actively secure 2PC.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134024", "https://eprint.iacr.org/2017/790.pdf", "http://eprint.iacr.org/2017/790" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7f078eecf62d783ebffbf686202ff71439e1df97", "sources": [ "DBLP" ], "title": "TinyOLE: Efficient Actively Secure Two-Party Computation from Oblivious Linear Function Evaluation", "venue": "CCS", "year": 2017 }, "7f802c5353290a4934ee595951d35f1b2605cc32": { "authors": [ { "ids": [ "8748529" ], "name": "MingWei Yang" }, { "ids": [ "5962804" ], "name": "Ivan B. Djordjevic" }, { "ids": [ "2940282" ], "name": "Cihan Tunc" }, { "ids": [ "1712474" ], "name": "Salim Hariri" }, { "ids": [ "1799510" ], "name": "Ali Akoglu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.39", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.39", "entities": [ "Data center", "Data rate units", "Just-in-time compilation", "Maximum throughput scheduling", "Network architecture", "Optical interconnect", "Optical switch", "Signal-to-noise ratio", "Simulation", "Software-defined networking", "Terabit", "Throughput" ], "id": "7f802c5353290a4934ee595951d35f1b2605cc32", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "299-302", "journalVolume": "", "outCitations": [ "d2254f9fdf4aca4e50f4cbc22bf7661b8e801700", "065c8bfcb45e8c342d26aa1855cf292f9a5cbeff", "41c61aab65d7d212f91c000052cf5f3648b31e3e", "671d716520eceebcebffc3e7c5d0180ea2d03de6", "36e619251ffe1834adb2ae735b3b8aafcb7cf150", "9a93f8c5a97b7340a0d8ed3915be372bbae84e78", "b671fecb876789699b0ec1a2e7eb533b3a89a182", "5c852e0a3a1ca7beae37e120818c2b09bd68847b", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "663e064469ad91e6bda345d216504b4c868f537b", "8b29022ed0a20eb46e9da616eb6883a27d0c12f2" ], "paperAbstract": "Virtual Data Center (VDC) is becoming the real world tendency of IT model that offers flexible and on-demand service, most of which adopt Software-Defined Networking (SDN) as their network architecture to simplify network management and ensure network isolation. In our paper, by following our Just-in-Time Architecture (JITA) design, we further studied the composable architecture in optical domain. For this architecture to work efficiently, Terabit per second (Tbps) bandwidth and negligible latency (i.e., less than microsecond latency) are needed to support disaggregated infrastructures. Therefore, in this paper, we present an all-optical packets switch solution with proposed SDN controlling scheme. The modeling of the all-optical switch is also proposed, implemented and studied. Based on the modeling of the switch, the output Optical Signal-to-Noise Ratio (OSNR) and latency performance are studied. With the result of the simulation, we find the limits and dominated factors for designing the large scale all-optical switch in the composable data centers. An example of analyzing the maximum throughput and the latency of the all-optical switch is given at the end.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.39" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7f802c5353290a4934ee595951d35f1b2605cc32", "sources": [ "DBLP" ], "title": "An Optical Interconnect Network Design for Dynamically Composable Data Centers", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "7f93f814459f18008ab8521c447d024ffab5c6a9": { "authors": [ { "ids": [ "2670855" ], "name": "Ching-Hsiang Chu" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "2942686" ], "name": "Ammar Ahmad Awan" }, { "ids": [ "1802958" ], "name": "Hari Subramoni" }, { "ids": [ "8798733" ], "name": "Jahanzeb Maqbool Hashmi" }, { "ids": [ "2963343" ], "name": "Bracy Elton" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/ICPP.2017.25", "doiUrl": "https://doi.org/10.1109/ICPP.2017.25", "entities": [ "Benchmark (computing)", "Deep learning", "Graphics processing unit", "IEEE-488", "InfiniBand", "Multi-source", "Multicast", "Remote direct memory access", "Scalability" ], "id": "7f93f814459f18008ab8521c447d024ffab5c6a9", "inCitations": [ "3d80f420b87bf16eabac6142275e71bf48aa61a5", "d771ce5fefb6e853ab176a09204556ae663e682f" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "161-170", "journalVolume": "", "outCitations": [ "8205d403963582c7fb2e6ddf64c858320afe9a9f", "2bc795d795c95625531428dfb294e18913d07b23", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "497fc8616563777046ecc89c85771b2ab446a518", "6fbd9834cb888b84db1f25756cb6173b3622e4a0", "4af7f2b680893a7e254a844f2392d4569423a64c", "77830356061996b6fe062759f3e61a3805b25d04", "07292f16bf63f6a3d4596e4231c7bc5af47bb9f1", "bd6fe117ca35a7ab144408be1771000feb57c7fb", "b1871f19c057a0e90aec4b2b6317d15a9cce3242", "061356704ec86334dbbc073985375fe13cd39088", "d683f824fcb7fde88cf2aa376330c54a70fd02d7", "7ee23bd4979aff5374ffa58800d4204e1f4714a2", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "faed70ccbdab1858045096d1d306f56257b36b4d" ], "paperAbstract": "Broadcast operations (e.g. MPI_Bcast) have been widely used in deep learning applications to exchange a large amount of data among multiple graphics processing units (GPUs). Recent studies have shown that leveraging the InfiniBand hardware-based multicast (IB-MCAST) protocol can enhance scalability of GPU-based broadcast operations. However, these initial designs with IB-MCAST are not optimized for multi-source broadcast operations with large messages, which is the common communication scenario for deep learning applications. In this paper, we first model existing broadcast schemes and analyze their performance bottlenecks on GPU clusters. Then, we propose a novel broadcast design based on message streaming to better exploit IB-MCAST and NVIDIA GPUDirect RDMA (GDR) technology for efficient large message transfer operation. The proposed design can provide high overlap among multi-source broadcast operations. Experimental results show up to 68% reduction of latency compared to state-of-the-art solutions in a benchmark-level evaluation. The proposed design also shows near-constant latency for a single broadcast operation as a system grows. Furthermore, it yields up to 24% performance improvement in the popular deep learning framework, Microsoft CNTK, which uses multi-source broadcast operations; notably, the performance gains are achieved without modifications to applications. Our model validation shows that the proposed analytical model and experimental results match within a 10% range. Our model also predicts that the proposed design outperforms existing schemes for multi-source broadcast scenarios with increasing numbers of broadcast sources in large-scale GPU clusters.", "pdfUrls": [ "http://web.cse.ohio-state.edu/~chu.368/slides/17_ICPP.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7f93f814459f18008ab8521c447d024ffab5c6a9", "sources": [ "DBLP" ], "title": "Efficient and Scalable Multi-Source Streaming Broadcast on GPU Clusters for Deep Learning", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "7fc9e1eb679ed67ce3bc8682dbeb2e723b24064a": { "authors": [ { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "15669188" ], "name": "Ross Miller" }, { "ids": [ "34966505" ], "name": "Devesh Tiwari" }, { "ids": [ "2689136" ], "name": "Christopher Zimmer" }, { "ids": [ "1706743" ], "name": "Feiyi Wang" }, { "ids": [ "1770398" ], "name": "Sarp Oral" }, { "ids": [ "9441826" ], "name": "Raghul Gunasekaran" }, { "ids": [ "39774655" ], "name": "Deryl Steinert" } ], "doi": "10.1145/3126908.3126946", "doiUrl": "https://doi.org/10.1145/3126908.3126946", "entities": [ "Consumability", "Directory service", "Federated identity", "Preprocessor", "Scalability" ], "id": "7fc9e1eb679ed67ce3bc8682dbeb2e723b24064a", "inCitations": [], "journalName": "", "journalPages": "45:1-45:12", "journalVolume": "", "outCitations": [ "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "06230d13e276bd871a378ca932a41b5cff94e29f", "94e09f8f9a88ffebcacfb298fa737eed0debaab4", "42b9cea730b4cd3d7c5780981ad85b5eca1c5417", "769072c06b62956f955b713005282ce075691841", "2398278a25035cfeefa3dd4aba91b16d48f540ba", "20bd9e51b0a95cfe03afdb00337e1c95c290e473", "9edab79d681bae0071aa784328b0ce134d909c10", "502366e7dd9bb70f81e931b5a4cb6bd25651143e", "7c8f5cfe90578324da0fc7815075e610fc1161ff", "572dd2d5d75227bb878430c9375b9be92cc7e6e9", "483b2f4c7dbc72f7969b60cff0984f2062f02956", "1e8fa3399883d288483c145741156c77d80a4278" ], "paperAbstract": "In this paper, we describe the GUIDE framework used to collect, federate, and analyze log data from the Oak Ridge Leadership Computing Facility (OLCF), and how we use that data to derive insights into facility operations. We collect system logs and extract monitoring data at every level of the various OLCF subsystems, and have developed a suite of pre-processing tools to make the raw data consumable. The cleansed logs are then ingested and federated into a central, scalable data warehouse, Splunk, that offers storage, indexing, querying, and visualization capabilities. We have further developed and deployed a set of tools to analyze these multiple disparate log streams in concert and derive operational insights. We describe our experience from developing and deploying the GUIDE infrastructure, and deriving valuable insights on the various subsystems, based on two years of operations in the production OLCF environment.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126946" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7fc9e1eb679ed67ce3bc8682dbeb2e723b24064a", "sources": [ "DBLP" ], "title": "GUIDE: a scalable information directory service to collect, federate, and analyze logs for operational insights into a leadership HPC facility", "venue": "SC", "year": 2017 }, "7fed5e7a7fe5800ce403c75b7320fcf27d9de053": { "authors": [ { "ids": [ "1691421" ], "name": "Amir Nakib" }, { "ids": [ "3258784" ], "name": "Thibaud Rohmer" }, { "ids": [ "1733230" ], "name": "El-Ghazali Talbi" }, { "ids": [ "1921279" ], "name": "Abdelhamid Nafaa" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.51", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.51", "entities": [ "Algorithm", "Dynamic programming", "Online and offline", "Optimization problem", "Program optimization", "Quality of service", "Rejection sampling" ], "id": "7fed5e7a7fe5800ce403c75b7320fcf27d9de053", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "388-395", "journalVolume": "", "outCitations": [ "09cacef0c5f1bf67616b371b5a628e33dc926b80", "0c1761bb534da524300580ff72a3865f75f1de3e", "6e80932db731638d852df4b41d336411b40fbf6f", "eaaf533a85e0d8cc293efc599e1de075b4a99033", "337e4b7f57ccbb7485950b93da9c5bb4ec4dc9ad", "4a5bf64fab7eadb01a022035c6eaf3874fb2ade5", "2b9ab02a58c895c29404b5a7d0828ad120fdf78f", "0f72abea4de4d3b1a47bfa114163b318c9507827", "86ae19c62c2f96b043a7c557dc6b3a964601252b" ], "paperAbstract": "Nowadays Video-on-Demand are widely used and number of customers increases, consequently, many network problems must be solved to ensure the quality of service. One of the solutions that has proved its efficiency consists in using Peer-to Peers architecture. However, they raise challenges such as peers resource allocation. Most literature on tackle the problem with optimal static rules found at off-line study of the system. In this paper, we propose to formulate this problem as a dynamic optimization problem, then, solve it using a dynamic learning based optimization algorithm. The obtained results show that using a dynamic resource allocation reduces the rejection rate while enhancing the entropy of the system, in the face of a dynamically changing title demand.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.51" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/7fed5e7a7fe5800ce403c75b7320fcf27d9de053", "sources": [ "DBLP" ], "title": "Dynamic Learning Optimization Algorithm for P2P-VoD Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "8007bb3a6409e0ed95afc4691c64085247c382bf": { "authors": [ { "ids": [ "2438489" ], "name": "Ennan Zhai" }, { "ids": [ "2869954" ], "name": "Ruzica Piskac" }, { "ids": [ "3409635" ], "name": "Ronghui Gu" }, { "ids": [ "6357485" ], "name": "Xun Lao" }, { "ids": [ "31825486" ], "name": "Xi Wang" } ], "doi": "10.1145/3133921", "doiUrl": "https://doi.org/10.1145/3133921", "entities": [ "Cloud computing", "Cloud management", "Code generation (compiler)", "Data center", "Domain-specific language", "Failure rate", "Network switch", "Source lines of code" ], "id": "8007bb3a6409e0ed95afc4691c64085247c382bf", "inCitations": [], "journalName": "PACMPL", "journalPages": "97:1-97:28", "journalVolume": "1", "outCitations": [ "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "488420924de0af51a5ae7f86d3b29392c29601bc", "16107efd38ec1af09f1f6d908f8212b66138ee38", "05bd2c6e82a96e7bb3d7d7262f953fc53ead3d1a", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "040980f7892c9b562a3847cb97f0808858665070", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "c62e795562e019330ea9dd8c67b4cec6de98e194", "06590f3b461056de7a61d8fa14a56aaa8ad3a1e6", "39425e747dbd401d08246d2bfd75b01d41e289c4", "2d71e521198371ab970c402ddf76eea350569ae8", "3edfc29b8a4f4fb1e245087cd1c59498f2255fe8", "3b9725ad0e3a078e9add5a37ea1ac323f7322d75", "4c59d131a5b221c2729384ce4235ee29c172c80f", "1e4da813c29a65f19f6e9432cb4efe8b7d45ac1d", "640af017aa8d11f9f31480155c8d5d1a0d8865d7", "164d8d8238674cdfb9bbb2583cfc390e178420de", "114dc57340496512f2e026b8ecdf7c746af21025", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "64ad3b92f61a441c5b4080b0ac9291109a919886", "2c0f180286fd64470bf5b7e42f9107fddf955a47", "811d08c2b2620b6f87e774fdd73cd708f4cb20c5", "04c724bad0963d1e6e7a3743be08c08810402582", "069103feb2d2d3f1b0115b484d5c2f978a983df0", "0cf1bc4a4d8362b5a675df3fd32c9ba450935a7a", "3b44e2e3bff20cd3828f0e757cb073392331c1ec", "17da1c9b36bb34bb7ea65774888648cc99c88ce5", "4423433d2364b9d112f7ede56a9a6df571bc2a8f", "277a2794d9ea67cc0657b019c2e77faa3b37eddf", "0ec58ad7dffcc53018a786c069cb604ef1be5aae", "58f692e9b03cb973355aab46bb6f867239aeb513", "673832d6aa0f4e802a3786f8bb962987293bce6d", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "46eba995c5371d7966d59549f61c203cecd1d3c7", "111864cac232d8a9c170bd63069eb4af155a9f7b", "fb007952ae4f9d13b4d3aba3541ddcaf1e9f8d79", "2e38636cf04fd1fe0439440c7c0cc45f07bc1275", "559e4671b87c3f76d3c485ebdaefe734323879f0", "5cc17a34d23dea74c8da124fcd73a65b8df9a5ba", "05a436f059c3897c3509dc059903364eff4a79af", "53abb9ca99f1c9e8038dcc0bfe4ccdf770a55db3", "7b1fbcd0d09b381ad9433a85587c373f4f1b77cf", "9f87fdf3f3f1e2a48c6c21629457cdb3b1873c7d", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "1f8699f5d99a0fa362bbc9e5071ac3cbbaf8e4da", "086820e40dc8046c30a8751394df167bec047fe1", "105e451347d3f60dae466555304aa218f2b3bde2", "022805188d0bf2e9e2a141d56bdc40645a9fea7f", "7ae26da9b7666812857883536870c315538f7f10", "141727ec87084aea5e0d5bffaa63b4fc2d9bf478", "134021cfb9f082f4e8b58f31bcbb41eb990ab874", "16b0a02e2db3e3beb3dd0ce83e610549f271c9f1", "dec3ff8cf104d347dadc85e4fb4f8f13a835cb62", "ab1f0b1d1b21771a1a639d4702440331eb421a64" ], "paperAbstract": "Today's cloud services extensively rely on replication techniques to ensure availability and reliability. In complex datacenter network architectures, however, seemingly independent replica servers may inadvertently share deep dependencies (e.g., aggregation switches). Such unexpected common dependencies may potentially result in correlated failures across the entire replication deployments, invalidating the efforts. Although existing cloud management and diagnosis tools have been able to offer post-failure forensics, they, nevertheless, typically lead to quite prolonged failure recovery time in the cloud-scale systems. In this paper, we propose a novel language framework, named RepAudit, that manages to prevent correlated failure risks before service outages occur, by allowing cloud administrators to proactively audit the replication deployments of interest. In particular, RepAudit consists of three new components: 1) a declarative domain-specific language, RAL, for cloud administrators to write auditing programs expressing diverse auditing tasks; 2) a high-performance RAL auditing engine that generates the auditing results by accurately and efficiently analyzing the underlying structures of the target replication deployments; and 3) an RAL-code generator that can automatically produce complex RAL programs based on easily written specifications. Our evaluation result shows that RepAudit uses 80x less lines of code than state-of-the-art efforts in expressing the auditing task of determining the top-20 critical correlated-failure root causes. To the best of our knowledge, RepAudit is the first effort capable of simultaneously offering expressive, accurate and efficient correlated failure auditing to the cloud-scale replication systems.", "pdfUrls": [ "http://www.cs.yale.edu/homes/zhai-ennan/oopsla17-final.pdf", "http://doi.acm.org/10.1145/3133921" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8007bb3a6409e0ed95afc4691c64085247c382bf", "sources": [ "DBLP" ], "title": "An auditing language for preventing correlated failures in the cloud", "venue": "PACMPL", "year": 2017 }, "80504579e7242f4a9ce24c420ccb5b91f2f9e005": { "authors": [ { "ids": [ "1734519" ], "name": "Alastair F. Donaldson" }, { "ids": [ "32078908" ], "name": "Hugues Evrard" }, { "ids": [ "3017460" ], "name": "Andrei Lascu" }, { "ids": [ "3130099" ], "name": "Paul Thomson" } ], "doi": "10.1145/3133917", "doiUrl": "https://doi.org/10.1145/3133917", "entities": [ "Compiler", "Experiment", "Graphics processing unit", "Metamorphic testing", "Microsoft Windows", "OpenGL", "OpenGL Shading Language", "Oracle Database", "Security bug", "Shader", "Shading", "Shading language", "Software bug", "Test automation", "Web page", "WebGL", "Windows 10" ], "id": "80504579e7242f4a9ce24c420ccb5b91f2f9e005", "inCitations": [], "journalName": "PACMPL", "journalPages": "93:1-93:29", "journalVolume": "1", "outCitations": [ "915e5ef596066e765a420be81d33d25f079d97d2", "06705b1003b26ef589cf02d5df01c8097eefb276", "2bbda639ab417ca79d5948e393171e62ccdd7367", "71f8f0820db5dd77e3525e0481542102ffd83741", "cfb173786a34157e727504ff6c38a500d7d1085a", "05d28dfb947b040b604ccfcef824c44033b122cf" ], "paperAbstract": "We present an automated technique for finding defects in compilers for graphics shading languages. key challenge in compiler testing is the lack of an oracle that classifies an output as correct or incorrect; this is particularly pertinent in graphics shader compilers where the output is a rendered image that is typically under-specified. Our method builds on recent successful techniques for compiler validation based on metamorphic testing, and leverages existing high-value graphics shaders to create sets of transformed shaders that should be semantically equivalent. Rendering mismatches are then indicative of shader compilation bugs. Deviant shaders are automatically minimized to identify, in each case, a minimal change to an original high-value shader that induces a shader compiler bug. We have implemented the approach as a tool, GLFuzz, targeting the OpenGL shading language, GLSL. Our experiments over a set of 17 GPU and driver configurations, spanning the main 7 GPU designers, have led to us finding and reporting more than 60 distinct bugs, covering all tested configurations. As well as defective rendering, these issues identify security-critical vulnerabilities that affect WebGL, including a significant remote information leak security bug where a malicious web page can capture the contents of other browser tabs, and a bug whereby visiting a malicious web page can lead to a ``blue screen of death'' under Windows 10. Our findings show that shader compiler defects are prevalent, and that metamorphic testing provides an effective means for detecting them automatically.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133917" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/80504579e7242f4a9ce24c420ccb5b91f2f9e005", "sources": [ "DBLP" ], "title": "Automated testing of graphics shader compilers", "venue": "PACMPL", "year": 2017 }, "8071c58e2639b796bdd6544a27166b70376bdd4e": { "authors": [ { "ids": [ "1759838" ], "name": "Antonio Barbalace" }, { "ids": [ "39517894" ], "name": "Anthony Iliopoulos" }, { "ids": [ "1750962" ], "name": "Holm Rauchfuss" }, { "ids": [ "2963523" ], "name": "Goetz Brasche" } ], "doi": "10.1145/3102980.3102990", "doiUrl": "https://doi.org/10.1145/3102980.3102990", "entities": [ "Operating system" ], "id": "8071c58e2639b796bdd6544a27166b70376bdd4e", "inCitations": [ "213b5f30cd84c80c1f53e46553fa221fdcc226dd" ], "journalName": "", "journalPages": "56-61", "journalVolume": "", "outCitations": [ "46742c000a65f676c00ec4e33d19d535a1c29dd7", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "866bce77ca5201d182c0c43090eb75bf126efba6", "7370316302bfff5ef6ff760cbfaae668e4760275", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "566781ba2b0c139f4f1f15701e8c9d6e497f944a", "7dfccc1c1babded51f525213c687707d52428567", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "43f0c099d44a68783a773f91cd03098a5252bf98", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "da978af48937a768fffbd0faea35820eb40ae6c8", "339aeb3b948ab2da7eeb308a2783238690d0618c", "0e5c646909bb762da0cd325e084655c12445578f", "0f42b4dc664eb31df423c3de3a2cecf9c6ac83a8", "7ff303e7c450aee82b6fff5cc64be54e5604da01", "7a9bca5fb9b1775dd3c7a38d9e34e9233e7419cf", "6d2db3d64131cd4a0f5bb1de46581f36046385f2", "4678cdcf7e57c1563379ac7cc344254f01ace572", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "1ea92529e75fe90ee1923b95d0fa8ad37ac1ed7c", "6330f075daf847554007b236b57293f8ccebca64", "25fd64c30726f131cc1399845f62531549e9bc75", "6d60c734fe59f78cdbb47b505d41b449e9392a4b", "1d31aceaa4fbf3a8afca3139675482b1cdf84495", "3fd342adbc95e243de00e0ca0e315eb023b23d99", "8964497eef0b88462213f152a776d260388cff36", "138a9c2a9579435cd8cb0f24e7ec135821074557", "5dc5b799d6d161d5c2805917d680d1eb7314fdf5", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "5f3e6fd1668e0732c91ff72061ece073224f1745", "0038ce3977245eb24664222903fc17699f3636e7", "2e5132493276714e4cce3b2f64d60da4e47210cb", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "0d6787f19c7a521784a38d31420dd8da7bd490ef", "5635aeef49e9678d0a5eaa3f17d247a059a19cb0", "054572f0a9cf49fa9757ce937d097de6200fe942", "a3edc4f28c3d0bb89e99c26082147656a809bcf4", "93726e7e258eceacf51a823361113f5792f50f86", "02ebdcf8200135ec0433e12e4ef2459ac740370b" ], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102990" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8071c58e2639b796bdd6544a27166b70376bdd4e", "sources": [ "DBLP" ], "title": "It's Time to Think About an Operating System for Near Data Processing Architectures", "venue": "HotOS", "year": 2017 }, "80a5e5dc09a5b1c14f3bfd4561f840cd99c2edbf": { "authors": [ { "ids": [ "2758173" ], "name": "Yufei Ren" }, { "ids": [ "2073481" ], "name": "Xingbo Wu" }, { "ids": [ "1712838" ], "name": "Li Zhang" }, { "ids": [ "1845933" ], "name": "Yandong Wang" }, { "ids": [ "1726357" ], "name": "Wei Zhang" }, { "ids": [ "5758171" ], "name": "Zijun Wang" }, { "ids": [ "2166550" ], "name": "Michel Hack" }, { "ids": [ "1804354" ], "name": "Song Jiang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.30", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.30", "entities": [ "Application programming interface", "Computation", "Computer vision", "Deep learning", "Distributed computing", "Fault tolerance", "Graphics processing unit", "Load balancing (computing)", "Message Passing Interface", "POSIX", "Remote direct memory access", "Server (computing)", "Test set" ], "id": "80a5e5dc09a5b1c14f3bfd4561f840cd99c2edbf", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "231-238", "journalVolume": "", "outCitations": [ "5d90f06bb70a0a3dced62413346235c02b1aa086", "0122e063ca5f0f9fb9d144d44d41421503252010", "1cfc1cce7ec6c199a2e43f7c312c398820778e6c", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "043afbd936c95d0e33c4a391365893bd4102f1a7", "0760550d3830230a05191766c635cec80a676b7e", "08153c4cdc75b094adfcff939e5ed6652b1b8c51", "6074c1108997e0c1f97dc3c199323a162ffe978d", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "14cf64fec34946c88c71bbc4b6c606010696bc82", "45dd7be0cf09bfc8b97522890e3ed703defd9995", "0144941d255dad89d3d90c2d131a15cc01df9829", "029e03cd045b1fcda76e4c469eedfa0470c79624", "13f66fb89aefadf21718bb436dba4bdd8de73463", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "8e0eacf11a22b9705a262e908f17b1704fd21fa7", "722fcc35def20cfcca3ada76c8dd7a585d6de386" ], "paperAbstract": "Distributed deep learning systems place stringent requirement on communication bandwidth in its model training with large volumes of input data under user-time constraint. The communications take place mainly between cluster of worker nodes for training data and parameter servers for maintaining a global trained model. For fast convergence the worker nodes and parameter servers have to frequently exchange billions of parameters to quickly broadcast updates and minimize staleness. Demand on the bandwidth becomes even higher with the introduction of dedicated GPUs in the computation. While RDMA-capable network has a great potential to provide sufficiently high bandwidth, its current use over TCP/IP or tied to particular programming models, such as MPI, limits its capability to break the bandwidth bottleneck. In this work we propose iRDMA, an RDMA-based parameter server architecture optimized for high-performance network environment supporting both GPU- and CPU-based training. It utilizes native asynchronous RDMA verbs to achieve network line speed while minimizing the communication processing cost on both worker and parameter-server sides. Furthermore, iRDMA exposes the parameter server system as a POSIX-compatible file API for convenient support of load balance and fault tolerance as well as its easy use. We have implemented iRDMA at IBM's deep learning platform. Experiment results show that our design can help deep learning applications, including image recognition and language classification, to achieve near-linear improvement on convergence speed and training accuracy acceleration by using distributed computing resources. From the system perspective, iRDMA can efficiently utilize about 95% network bandwidth of fast networks to synchronize models among distributed training processes.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.30" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/80a5e5dc09a5b1c14f3bfd4561f840cd99c2edbf", "sources": [ "DBLP" ], "title": "iRDMA: Efficient Use of RDMA in Distributed Deep Learning Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "80ed3b859801edc6a4cab2c0d1db4a6273de352c": { "authors": [ { "ids": [ "1710634" ], "name": "Edward Chuah" }, { "ids": [ "1682292" ], "name": "Arshad Jhumka" }, { "ids": [ "31693355" ], "name": "Samantha Alt" }, { "ids": [ "7422827" ], "name": "Theo Damoulas" }, { "ids": [ "2025208" ], "name": "Nentawe Gurumdimma" }, { "ids": [ "1825061" ], "name": "Marie-Christine Sawley" }, { "ids": [ "31625246" ], "name": "William L. Barth" }, { "ids": [ "1830876" ], "name": "Tommy Minyard" }, { "ids": [ "1703576" ], "name": "James C. Browne" } ], "doi": "10.1109/HiPC.2017.00044", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00044", "entities": [ "Dependability", "Error detection and correction", "Error message", "Failure rate", "Log analysis", "Propagation of uncertainty", "Software deployment", "Supercomputer", "UNIX System III", "ranger" ], "id": "80ed3b859801edc6a4cab2c0d1db4a6273de352c", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "317-327", "journalVolume": "", "outCitations": [ "6b17c5def4aa387175cf830837ea1290bbe3ca92", "9826daa08e5e4d73a1878fd3383e37472064f23f", "62de5cc9ab426b8161de120208fc514d26cc19ef", "a20def90994cab53b1e5202147848bb5bd4891a4", "3635141c7e3ed8190de4d799d06a71b027c32975", "2d1c16f01c89136e171d5421ec303a695de7748b", "33282ae3f2929d70d37234e763f40bac5bbbbc6f", "73b343b073bbc0b9660b5c500f994a5e61f53132", "9d416d1c24d04fdc3b9b200cfa587103982c7795", "b9eeae3c51c66874902c82e5c9cc4007cb0388ac", "c178021cdb78a71022d0c3ace17afc31f1dca9a1", "33765080cd39654f606710d111bcb9b92e0b1d5b", "cedda598732ea3c959cc5bc6a596c1b303c5c54b", "30f68de4ac3af105c73167beda796a8d6071e083", "580cd9345085036c200cbf0a75753653ecebfc94", "9fdde5b04351bde57e805da89fb6ef93cc1a5aa2", "42c8e66c80292519d3a38915316981a16fb097c8", "bd1ed675f2fb2d47b7bd9ba8d0bdf71a99699693", "233b43f13c17fc5c45d6dd67a46a18d5e7d95d57", "230d4b4485201598878f81d0a965a225b77a2ece", "f91c0bcbfdca7efcef7b474d1050b656ad6f524c", "3a1e9f17e1a98e17d3bcc5dbee09f920bfee2eee", "0ec40760465c0e7cb0f5d25179d1e8b44049a1e3", "63d914b396095617b147f8f17d7a6fe8be570f44", "7ba8f8fb815f87e98bca62b88d756a25a72a3b1f", "03d88407c702b6dffaae48b3d55ee716bcaffb8d", "96d860caedf7731e2f598a768e85d04e26753868", "4618c9e130481088e0a9d089728eb40576387f9b", "f0ba9dfcc0d3de1c1c941c9d42435350ed662557", "350a4b5cecfffb6a0e88c349b84e56df8829da44", "7ac49153fae03ee4f658ce7107b3eb743cecfa59", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "bda0fb0cde6a1120c721a6caa9f58f6d049ddb7a", "2eb161fb37474aeb4139b3849fd90b57492629fc", "473a60bd901dad0544450d8be706df7e770211db", "4c67bc55689e66f9fcfdcef15663b0de2cb1d4ad", "f8fc7487f592966beb70dae75f9e6ab3ff48ed03" ], "paperAbstract": "Recent work have used both failure logs and resource use data separately (and together) to detect system failureinducing errors and to diagnose system failures. System failure occurs as a result of error propagation and the (unsuccessful) execution of error recovery mechanisms. Knowledge of error propagation patterns and unsuccessful error recovery is important for more accurate and detailed failure diagnosis, and knowledge of recovery protocols deployment is important for improving system reliability. This paper presents the CORRMEXT framework which carries failure diagnosis another significant step forward by analyzing and reporting error propagation patterns and degrees of success and failure of error recovery protocols. CORRMEXT uses both error messages and resource use data in its analyses. Application of CORRMEXT to data from the Ranger supercomputer have produced new insights. CORRMEXT has: (i) identified correlations between resource use counters that capture recovery attempts after an error, (ii) identified correlations between error events to capture error propagation patterns within the system, (iii) identified error propagation and recovery paths during system execution to explain system behaviour, (iv) showed that the earliest times of change in system behaviour can only be identified by analyzing both the correlated resource use counters and correlated errors. CORRMEXT will be installed on the HPC clusters at the Texas Advanced Computing Center in Autumn 2017.", "pdfUrls": [ "https://warwick.ac.uk/fac/sci/dcs/people/research/u1664603/hipc-2017_slides-v2.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00044" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/80ed3b859801edc6a4cab2c0d1db4a6273de352c", "sources": [ "DBLP" ], "title": "Enabling Dependability-Driven Resource Use and Message Log-Analysis for Cluster System Diagnosis", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "80f4c0293b5d1f522191aa60e1ff9705ce0cdf01": { "authors": [ { "ids": [ "3111708" ], "name": "Ankica Barisic" }, { "ids": [ "39118564" ], "name": "Dominique Blouin" }, { "ids": [ "1698810" ], "name": "Vasco Amaral" }, { "ids": [ "1729288" ], "name": "Miguel Goul\u00e3o" } ], "doi": "10.1145/3136014.3136027", "doiUrl": "https://doi.org/10.1145/3136014.3136027", "entities": [ "Digital subscriber line", "Domain-specific language", "GYRO", "Requirement", "Requirements engineering", "Reverse engineering", "Robot", "Usability" ], "id": "80f4c0293b5d1f522191aa60e1ff9705ce0cdf01", "inCitations": [ "89c6b254b83a4e5d96c030f95c9caa0aa94fadba" ], "journalName": "", "journalPages": "115-128", "journalVolume": "", "outCitations": [ "b043388c5f630687047c767bc02bf1716f508b0d", "9e9a5685925c56ee7064413dd78040ea0806b256", "39e960e2dd424bc0bfa1abc0d88bf04eff83180e", "6f33820540da74474edb4468b53d034108223142", "e5186f327e6d6b855298624227ae684bb65fdeac", "38aa9f63c638104b3f2b068e51a0a7dc5c710558", "7ef49902b1ce1e820a6b42eae890fdced4d71df6", "6a30730a3f67e4a1d918d4724b2df121a304769f", "4d2d14daf3ca1dafd4c87daa56c020e2c84af1c7", "152cc02fdbdfb7d490721a2dc1d7fc79a5cd65fe", "5c7b902358a1a76bf87d4c32c2192343d501fd26", "910648e5228cb4ad3458a5de1b70ebd8a974fd25", "d059b5514879f854e4ba3487ea7923cc8224521b", "8ed07c82e3b7c9b0c6b77695910454bde79dd3a8", "061ba5044771e48b01d0d7f5e51ba6d6fbcf15fa", "d69ef23fb76e8e1eafb10aea26e7a55eaf8c7928", "8cb147d7ebd97efc27448d5bd0c44268a87bfabd", "ee4b62cfd6063393f944cd0d9b1137aadf9eb88d", "49d856781adce81dbbc39da9cf73f00059ff85f2", "70e36a26214525db78f972f6a36d8d980296892a", "e8a1c42e252016c2e2f0efd6efec419f657d20d3", "54327565dc687ec8ac74202cf38227c689ca62dc", "815a654d7f19bcfacabe1aa4486bf4502b46906a", "6ff839a9b1a04f7eb697b7b3fcf22eb27b6a6d22", "0e2444472d67f9ffbefc4f965140dc8f3ecb2d4c", "de9f790fd0289fb4282701186dbe12e3345c6a6c", "cf364741398fb7489bc62ce99eeb20c65eb94407", "637eb667e5255e679b22bb3bc3a48dbdeb4dfe6b", "0ae05f98c8bc62268fc94f5b52f5ed425a88f1d4", "fbc60c004d84d6ee55aaa02f34a823da4f898b2b", "76f165ea8f51b58b6e2a55e274f5156491d4f4eb", "1dac0f46a9f8ed65276faa35d8556362d671ec16", "e95040410b1f1fe2cbd8af4ed6529166f955dcec" ], "paperAbstract": "There is currently a lack of Requirements Engineering (RE) approaches applied to, or supporting, the development of a Domain-Specific Language (DSL) taking into account the environment in which it is to be used. We present a model-based RE approach to support DSL development with a focus on usability concerns. RDAL is a RE fragment language that can be complemented with other languages to support RE and design. USE-ME is a model driven approach for DSLs usability evaluation which is integrable with a DSL development approach. We combine RDAL and a new DSL, named DSSL, that we created for the specification of DSL-based systems. Integrated with this combination we add USE-ME to support usability evaluation. This combination of existing languages and tools provides a comprehensive RE approach for DSL development. We illustrate the approach with the development of the Gyro DSL for programming robots.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136027" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/80f4c0293b5d1f522191aa60e1ff9705ce0cdf01", "sources": [ "DBLP" ], "title": "A requirements engineering approach for usability-driven DSL development", "venue": "SLE", "year": 2017 }, "8123fe7c1220c2dcf326e74f77b4a018e2e19fc1": { "authors": [ { "ids": [ "39348488" ], "name": "Till Smejkal" }, { "ids": [ "33601888" ], "name": "Marcus H\u00e4hnel" }, { "ids": [ "2889090" ], "name": "Thomas Ilsche" }, { "ids": [ "2615012" ], "name": "Michael Roitzsch" }, { "ids": [ "1781970" ], "name": "Wolfgang E. Nagel" }, { "ids": [ "1731688" ], "name": "Hermann H\u00e4rtig" } ], "doi": "", "doiUrl": "", "entities": [ "Electronic billing", "Perf (Linux)", "Scheduling (computing)" ], "id": "8123fe7c1220c2dcf326e74f77b4a018e2e19fc1", "inCitations": [ "103879846a547f5d04bf7fefe58f2f6959007779" ], "journalName": "", "journalPages": "589-601", "journalVolume": "", "outCitations": [ "13ca3cb654d67afdc3ba62e47afee6a72139e622", "40019227e3800aa4ba303242dbe769afd3779436", "9845063317e8d1dc2a596010cea501f25f0245d1", "1adc5f0eea495fa5bf7394574af776dd8e5af633", "271f053d2561dbd339713c87322f8ad994d2099b", "9b242777d34fa5d66598771702fc12c48dd6620a", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "de5bd35339e5692002a77145d8b861940429ad77", "31aeea96898c7591f90953ff5992f0619a0c53cc", "2516524a25fdba2c54f9a1d80b26300d896f2c9e", "438e22ee516ecd66ade08aa6d5a9af1dd16d5716", "70ae2ba890bf6ce2ff645ca2fd4262480f8048db", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "01efff2d9fb655d7bc6532581857f8dfa27cd790", "377175d109126aea51714e8ef0e4324d28eb6fcc", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "55512fc0be51166c06fbde0eda8c1e4cdccd298c", "137afccd3ecd925327cd7289ced5f309ff4e61de", "169b8204089d683536dff9ec9b8f259b907be3a8", "12111b92dea58acf3fa2d71d572106f307719343", "252273319f51f95113099e370f42110d49355453", "23f4b96752fdb29b74b25f491e0df05a0701e6db", "2cac6e84d3d7fed13ec9a5d39fd2bd6e75423578", "c3cc6cfc9c36f25ace6980eac9a7fb4b2a3a460b", "3201e8efdb6ebd31eab9f14d77d15e6db9056782", "3ee47780011ee618bd5a64624a662375e1958e0a", "f4a91972bf1a05b195bce06a24dc33960bff1151", "07425384622afb86c0e43564c09fb3295fa7357c", "466e69619631b4ea74833036f55c88b9b5f6e251", "b6581c4e8a74128adacd064e6b5ea4b1ca40c772", "3685a773ab05b40e22bbb73b2b5e801dfd95f747", "9044a1b27037e6de691effbffb2f34c3a6c3b1c4", "d99707139c3013c7e3a74ecc74f50a32391a06e3" ], "paperAbstract": "Energy-based billing as well as energy-efficient software require accurate knowledge of energy consumption. Model-based energy accounting and external measurement hardware are the main methods to obtain energy data, but cost and the need for frequent recalibration have impeded their large-scale adoption. Running Average Power Limit (RAPL) by Intel R \u00a9 enables non-intrusive, off-the-shelf energy monitoring, but only on a per-socket level. To enable apportioning of energy to individual applications we present E-Team, a non-intrusive, scheduler-based, easy-to-use energy-accounting mechanism. By leveraging RAPL, our method can be used on any Intel system built after 2011 without the need for external infrastructure, application modification, or model calibration. E-Team allows starting and stopping measurements at arbitrary points in time while maintaining a low performance overhead. E-Team provides high accuracy, compared to external instrumentation, with an error of less than 3.5 %.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/smejkal", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_smejkal.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-smejkal.pdf", "https://os.inf.tu-dresden.de/papers_ps/atc2017-e-team.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ef86/cadaf7526f5146b7ce11594f646ecb791e0c.pdf", "s2Url": "https://semanticscholar.org/paper/8123fe7c1220c2dcf326e74f77b4a018e2e19fc1", "sources": [ "DBLP" ], "title": "E-Team: Practical Energy Accounting for Multi-Core Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "812be1f6d8a2f55d7b36718b34a333f67541e72b": { "authors": [ { "ids": [ "2447516" ], "name": "Vignesh T. Ravi" }, { "ids": [ "40303192" ], "name": "James Erwin" }, { "ids": [ "31799139" ], "name": "Pradeep Sivakumar" }, { "ids": [ "40802965" ], "name": "C. Q. Tang" }, { "ids": [ "2819266" ], "name": "Jianxin Xiong" }, { "ids": [ "39322919" ], "name": "Ravindra Babu Ganapathi" }, { "ids": [ "3071651" ], "name": "Mark Debbage" } ], "doi": "10.1109/HOTI.2017.20", "doiUrl": "https://doi.org/10.1109/HOTI.2017.20", "entities": [ "Big data", "Cloud computing", "High- and low-level", "Interleaving (disk storage)", "Machine learning", "Mathematical optimization", "Message Passing Interface", "On-premises software", "Pipeline (computing)", "Program optimization", "Solution stack", "Throughput" ], "id": "812be1f6d8a2f55d7b36718b34a333f67541e72b", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "95-102", "journalVolume": "", "outCitations": [ "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "81bb091f4c7c95b98269bbba976767d4dc2bb853", "1ce3217ca5607757d89b05e9876c4bcda8aba16d", "a125c8cf7410568947317a9718cbc5ad14218ddd", "5cfeda94aaa59702e57647045de1488b8258abef", "1a7a3c3945947358e99257b3461d3eff8a06946d", "0b41487e1acb31ceabc84068fc97e2fa6619ae5e", "5f8991828def57d2f0cda942566afff56740d150", "3e6c3166e1d0a84f87279ab8268eb86b6357bb84", "8b2d14367dc51cee8d80ba31bc3acaa70d2e17dc", "2c237e20e2ed8f0a1a7c052864389f4166ee26c5", "2368a00495ea5d2ed318a8921366bb40d036d0b3", "198cdaeb0c0572f2327d73f95964fd2567bd3916" ], "paperAbstract": "Scientific HPC applications along with the emerging class of Big Data and Machine Learning workloads are rapidly driving the fabric scale both on premises and in the cloud. Achieving high aggregate fabric throughput is paramount to the overall performance of the application. However, achieving high fabric throughput at scale can be challenging - that is, the application communication pattern will need to map well on to the target fabric architecture, and the multi-layered host software stack in the middle will need to orchestrate that mapping optimally to unleash the full performance.In this paper, we investigate low-level optimizations to the host software stack with the goal of improving the aggregate fabric throughput, and hence, application performance. We develop and present a number of optimization and tuning techniques that are key driving factors to the fabric performance at scale - such as, Fine-grained interleaving, improved pipelining, and careful resource utilization and management. We believe that these low-level optimizations can be commonly leveraged by several programming models and their runtime implementations making these optimizations broadly applicable. Using a set of well-known MPI-based scientific applications, we demonstrate that these optimizations can significantly improve the overall fabric throughput and the application performance. Interestingly, we also observe that some of these optimizations are inter-related and can additively contribute to the overall performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/812be1f6d8a2f55d7b36718b34a333f67541e72b", "sources": [ "DBLP" ], "title": "Host Software Stack Optimizations to Maximize Aggregate Fabric Throughput", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "8152e621012339cd950dd082ff711b6f1e325f1b": { "authors": [ { "ids": [ "13413642" ], "name": "Umar Iqbal" }, { "ids": [ "34616778" ], "name": "Zubair Shafiq" }, { "ids": [ "1794927" ], "name": "Zhiyun Qian" } ], "doi": "10.1145/3131365.3131387", "doiUrl": "https://doi.org/10.1145/3131365.3131387", "entities": [ "AdBlock", "Antivirus software", "Archive", "Crowdsourcing", "JavaScript", "Machine learning", "Span and div", "Speedup", "Static program analysis", "Wayback Machine" ], "id": "8152e621012339cd950dd082ff711b6f1e325f1b", "inCitations": [ "30ec687df1a951d0e68d469f602e091981401987", "287750d6ec07d5867be4bf489dba7bba14d51fe4" ], "journalName": "", "journalPages": "171-183", "journalVolume": "", "outCitations": [ "504e6398379fcc254d42e844b1afd88f5707a9e1", "1fd6ef68f78e4f149d45e6af1ed1319759e194c6", "a5adc8c3bbcc46e75c9300be64c8334d752e9601", "02ef09e439f8f268ffdec9887a9ac576180ca6c8", "400078e734d068b8986136ca9e3bbaa01c2bd52b", "2235aa3e3607a73cdd5c88eff4f7e6bedea6952d", "79914eef0ca54f18e746c7dd5cc04953840a277a", "05ad6c3ab7a0b1ab0c4fc3af9f1622cf6c0fa40e", "9a3c791067911d17a79918b1b0b5826beaeb2fe1", "6840d33437499bbbe4b9f1d92b93e5818b519875", "4781b899447abc3439eb785281aa754126f1d818", "0efb5dafbc3b79b133015981b136b4bd87c4ea35", "3957270267c2bba0ac00ab3c4461f0c47cfd95c3", "1c32b6ce44bd647090cf64e6e82c421c4c0c2f35", "2bfbb6b8da453f91f5860ea936588fddef6c80e0", "1660f24384ea0c0f4d811f1967e0d231bb422443", "1d0e9193e4becfbe87e5ba731218cd8ee43a97f2", "1ac30f626837e58d02340a3b3656ec99f97468fd", "307514e25f67899c679c8d70f31667d338556ebd", "250740ddb532b216e39d2f101ac36bd24107dbb7", "2cbd80746d765a99bedb976c60c2830f937cefd9", "65f3413bf79303535bece155f353c9ce193a473a", "7f238553410099e1a98e3039412b233ebd300899" ], "paperAbstract": "The increasing popularity of adblockers has prompted online publishers to retaliate against adblock users by deploying anti-adblock scripts, which detect adblock users and bar them from accessing content unless they disable their adblocker. To circumvent anti-adblockers, adblockers rely on manually curated anti-adblock filter lists for removing anti-adblock scripts. Anti-adblock filter lists currently rely on informal crowdsourced feedback from users to add/remove filter list rules. In this paper, we present the first comprehensive study of anti-adblock filter lists to analyze their effectiveness against anti-adblockers. Specifically, we compare and contrast the evolution of two popular anti-adblock filter lists. We show that these filter lists are implemented very differently even though they currently have a comparable number of filter list rules. We then use the Internet Archive's Wayback Machine to conduct a retrospective coverage analysis of these filter lists on Alexa top-5K websites over the span of last five years. We find that the coverage of these filter lists has considerably improved since 2014 and they detect anti-adblockers on about 9% of Alexa top-5K websites. To improve filter list coverage and speedup addition of new filter rules, we also design and implement a machine learning based method to automatically detect anti-adblock scripts using static JavaScript code analysis.", "pdfUrls": [ "http://doi.acm.org/10.1145/3131365.3131387", "https://conferences.sigcomm.org/imc/2017/papers/imc17-final113.pdf", "http://www.cs.ucr.edu/~zhiyunq/pub/imc17_antiadblocker_measurement.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8152e621012339cd950dd082ff711b6f1e325f1b", "sources": [ "DBLP" ], "title": "The ad wars: retrospective measurement and analysis of anti-adblock filter lists", "venue": "IMC", "year": 2017 }, "81532badf6f1a7da5c1b495c2c91243ef7968a37": { "authors": [ { "ids": [ "2869488" ], "name": "Boris Teabe" }, { "ids": [ "32426008" ], "name": "Vlad Nitu" }, { "ids": [ "1685006" ], "name": "Alain Tchana" }, { "ids": [ "1679417" ], "name": "Daniel Hagimont" } ], "doi": "10.1145/3064176.3064180", "doiUrl": "https://doi.org/10.1145/3064176.3064180", "entities": [ "Busy waiting", "Central processing unit", "Critical section", "Hardware virtualization", "Hypervisor", "Library for WWW in Perl", "Linux", "Linux", "Lock (computer science)", "Loop heat pipe", "Preemption (computing)", "Scheduling (computing)", "Spinlock", "Virtual machine", "Whole Earth 'Lectronic Link" ], "id": "81532badf6f1a7da5c1b495c2c91243ef7968a37", "inCitations": [ "8410828bc05c0e6109d866d3b1ccee075b127924" ], "journalName": "", "journalPages": "286-297", "journalVolume": "", "outCitations": [ "371c5cc544d758eacd08ee2e9333b1b94f26689e", "6a285b0a2243223ee6905692d79b4a8d39f5af5e", "ca6e70cca64c928872a8cd137515d72708b58a69", "067c7857753e21e7317b556c86e30be60aa7cac0", "014f2fde05dc7c6f0b71c13f98e4a0d0c4969e25", "f47de67129a525a73fd9b15ed53f0746c594c290", "3c3a05197123ab4b2ec3d402c2d9d2777c63c712", "008f16f7af27244b507659be26bebb8bb07aded3", "693770a65bf0183c9bca10e5fde5e3848bbbb40c", "04e954c5afc21447cf43ba1420c9905d359eefd9", "7c833a334df551456885bd8b55d63753afac1fae", "136c75e41eb66c85aab922c7fdf62820d63b139f", "1749ad1f7adb7c7bc984a2713277d71ae21ac28e", "97c649dc68ad8818c7e2b7f75b9c164aa840f6a5", "a3021aba46ea0b09bac5a6f9f1e5449b13da9c05", "0e7af537d6eacd9832c90f31a46ced1ac91b573e", "09cef59336519ce93d15841bc2756a79ce13477d" ], "paperAbstract": "In native Linux systems, spinlock's implementation relies on the assumption that both the lock holder thread and lock waiter threads cannot be preempted. However, in a virtualized environment, these threads are scheduled on top of virtual CPUs (vCPU) that can be preempted by the hypervisor at any time, thus forcing lock waiter threads on other vCPUs to busy wait and to waste CPU cycles. This leads to the well-known Lock Holder Preemption (LHP) and Lock Waiter Preemption (LWP) issues.\n In this paper, we propose I-Spinlock (for Informed Spinlock), a new spinlock implementation for virtualized environments. Its main principle is to only allow a thread to acquire a lock if and only if the remaining time-slice of its vCPU is sufficient to enter and leave the critical section. This is possible if the spinlock primitive is aware (informed) of its time-to-preemption (by the hypervisor).\n We implemented I-Spinlock in the Xen virtualization system. We show that our solution is compliant with both para-virtual and hardware virtualization modes. We performed extensive performance evaluations with various reference benchmarks and compared our solution to previous solutions. The evaluations demonstrate that I-Spinlock outperforms other solutions, and more significantly when the number of core increases.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064180" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/81532badf6f1a7da5c1b495c2c91243ef7968a37", "sources": [ "DBLP" ], "title": "The lock holder and the lock waiter pre-emption problems: nip them in the bud using informed spinlocks (I-Spinlock)", "venue": "EuroSys", "year": 2017 }, "81b972c639a5e87db673a61ec86c34993654a6fc": { "authors": [ { "ids": [ "32289854" ], "name": "Tsuyoshi Id\u00e9" }, { "ids": [ "1796492" ], "name": "Dzung T. Phan" }, { "ids": [ "1682581" ], "name": "Jayant Kalagnanam" } ], "doi": "10.1109/ICDM.2017.27", "doiUrl": "https://doi.org/10.1109/ICDM.2017.27", "entities": [ "Algorithm", "Anomaly detection", "Calculus of variations", "Complex systems", "Computer multitasking", "Generic group model", "Graphical model", "Instability", "Iterative method", "Lasso", "Mixture model", "Modal logic", "Multi-task learning", "Numerical stability", "Relevance", "Selection algorithm", "Software bug", "Sparse matrix" ], "id": "81b972c639a5e87db673a61ec86c34993654a6fc", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "177-186", "journalVolume": "", "outCitations": [ "3406288a450dee3dd8514f2d1c4418c3d64f7c4f", "2b9ac00b2a424e5240b393c67bf4cb21b7e82414", "fb6f6feae242567dd0867948dd7da03ab1f48986", "fc51412058a7559d2f439068d42d7f993b4ee0c0", "dc6369546864f6693a5a2bc3ae02d3011115e68d", "343ab523f4117dc38f690fb4afb2231e2b684b9d", "8b3d4c64d3e55b026bdc644a11f01d9328bc8583", "0f9e4c5e55310ef558bd867c90bba8dcd74000cf", "4aff89685217453b161c6b1ef2e1b89b07261993", "2a1c6183bfe457620fada87577833395387ac423", "0c4867f11c9758014d591381d8b397a1d38b04a7", "6fd867375e5fc91f0afa31bb6d0ae9098c7098e9", "c77e1a137b376de86d968a450714eab9231ae70b", "17e2decd3876ed2e43a2435e1f0eda7a0832f146", "2679a2729924ac9eaf27c08a8fac3d36f4cb3ae0", "3e0156082a534a27b9dffac524a7e05f7a20a992", "d39d1b9c812a85a06d5e2e109cae7794fd88fab0", "1caaaf71ee767696052ce5db6036057b29a37efe", "161ffb54a3fdf0715b198bb57bd22f910242eb49", "37753f4c81f27cd685f769fe3e9ef879f22f035e", "d7737fc7ce08fb6389326b9d43be1d977b66d09d", "25c760c11c7803b2aefd6b6ae36f15908f76b544" ], "paperAbstract": "This paper proposes a new framework for anomaly detection when collectively monitoring many complex systems. The prerequisite for condition-based monitoring in industrial applications is the capability of (1) capturing multiple operational states, (2) managing many similar but different assets, and (3) providing insights into the internal relationship of the variables. To meet these criteria, we propose a multi-task learning approach based on a sparse mixture of sparse Gaussian graphical models (GGMs). Unlike existing fused- and group-lasso-based approaches, each task is represented by a sparse mixture of sparse GGMs, and can handle multi-modalities. We develop a variational inference algorithm combined with a novel sparse mixture weight selection algorithm. To handle issues in the conventional automatic relevance determination (ARD) approach, we propose a new ℓ0-regularized formulation that has guaranteed sparsity in mixture weights. We show that our framework eliminates well-known issues of numerical instability in the iterative procedure of mixture model learning. We also show better performance in anomaly detection tasks on real-world data sets. To the best of our knowledge, this is the first proposal of multi-task GGM learning allowing multi-modal distributions.", "pdfUrls": [ "http://ide-research.net/papers/2017_ICDM_Ide.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.27" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/81b972c639a5e87db673a61ec86c34993654a6fc", "sources": [ "DBLP" ], "title": "Multi-task Multi-modal Models for Collective Anomaly Detection", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "81d1d095afeada52d673eebffe16f9d4d1eff5d3": { "authors": [ { "ids": [ "2295608" ], "name": "Yang Wang" }, { "ids": [ "2099857" ], "name": "Shuibing He" }, { "ids": [ "1800904" ], "name": "Xiaopeng Fan" }, { "ids": [ "38623820" ], "name": "Chengzhong Xu" }, { "ids": [ "4437808" ], "name": "Joseph Culberson" }, { "ids": [ "37401425" ], "name": "Joseph Horton" } ], "doi": "10.1109/ICPP.2017.50", "doiUrl": "https://doi.org/10.1109/ICPP.2017.50", "entities": [ "Algorithm", "Analysis of algorithms", "Cache (computing)", "Cloud computing", "Data item", "Dynamic programming", "Mobile cloud computing", "Money", "Next-generation network", "Online algorithm", "Online and offline", "Speculative execution", "Time complexity" ], "id": "81d1d095afeada52d673eebffe16f9d4d1eff5d3", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "412-421", "journalVolume": "", "outCitations": [ "5275daefa085b9480f839e39c68dd777b611be4e", "67c7ae0ece7e568ddbbd4c49d8aab22b45494b7c", "2e6e51dbef0899e8ff92f0617235bba97e5fa56a", "3b67014e2c7c3542b5f17e69edb0c2c479c68b04", "2b015d4541225eebdb281e3e8194145757887d21", "9f2a7e78069fcf873d630e9c09006c32f9fd1af7", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "31da7495d2299d718d4102b896fc98587def9966", "d949c8adb08a00918b303f0e14746ca3239d4ee4", "587572975420ba72bc024f59c03451f5db2b04ad", "16661dffe51873efba14cdc728523fbef0e22289", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "01cbfe34eec6a1d1f3b971449a9ab2911b2ceb4d", "300130e8d199184c8c7921f44f338fae47150152", "114555b5443b956c59510ca0d8581801210be78e", "3c02593a7f5ea8a22583c507c20a65c0244d385e", "40aaca4840dc77e64b34f67c6c649dd97166b308", "7779c10dfa1f84953016b6292844815c5faf84f5" ], "paperAbstract": "In this paper we consider the data caching problem in next generation data services in the cloud, which is characterized by using monetary cost and access trajectory information to control cache replacements, instead of exploiting capacityoriented strategies as in traditional research. In particular, given a stream of requests to a shared data item with respect to a homogeneous cost model, we first propose a fast off-line algorithm using dynamic programming techniques. The proposed algorithm can generate optimal schedule within O(mn) timespace complexity to cache, migrate as well as replicate the shared data item to serve an n-length request sequence with minimum cost in a fully connected m-node network, substantially improving the previous results. Additionally, we also study this problem in its online form, and present a 3-competitive online algorithm by leveraging a speculative caching idea. The algorithm can serve an online request in constant time, and is space efficient in O(m) as well, rendering it to be more practical in reality. Our research complements the shortage of similar research in literature on this problem.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/81d1d095afeada52d673eebffe16f9d4d1eff5d3", "sources": [ "DBLP" ], "title": "Data Caching in Next Generation Mobile Cloud Services, Online vs. Off-Line", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "8210f6a9b276f954b4bb852c5f9ee73e0f23d3b7": { "authors": [ { "ids": [ "39470595" ], "name": "Paul Marinescu" }, { "ids": [ "17072147" ], "name": "Chad Parry" }, { "ids": [ "2522299" ], "name": "Marjori Pomarole" }, { "ids": [ "10680263" ], "name": "Yuan Tian" }, { "ids": [ "2331637" ], "name": "Patrick Tague" }, { "ids": [ "2763385" ], "name": "Ioannis Papagiannis" } ], "doi": "10.1109/SP.2017.33", "doiUrl": "https://doi.org/10.1109/SP.2017.33", "entities": [ "Authorization", "Cross-site request forgery", "Cross-site scripting", "Data model", "Graph database", "Machine learning", "Social Networks", "Social network", "Software bug", "Web application" ], "id": "8210f6a9b276f954b4bb852c5f9ee73e0f23d3b7", "inCitations": [ "402c35e8d3478edeb6aa7e60b04e761cccc266a9" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "1094-1109", "journalVolume": "", "outCitations": [ "5f3f9223c5c9f896be099bc177929febad508407", "90b478a7704530ef722befa81fdd8ee74a31af2c", "a1c6a7817891703ff1d103a23ab01961dda598cb", "148edd9ac0ed0485f14f470949f64a9d92cbbc10", "4af63ed343df388b6353b6fc77c7137d27822bf4", "08b038790fe2bec7e166dca7f21f06a4ea9da839", "0a9febbf3daff2db95accc73eb74d5dd05b54cb8", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "0c80eb8588fac0a763a15e1b7a33c6d885ce80a4", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "d809e003f28f2f8de8dac00f0e0c3c8c830e9fa0", "81653b68eb28329b05e2337381a3d78d6d5b53df", "28573c2d17dd0fcb57c6d2171e7a2761d47c6ed0", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "1edb070e3530f1a02ecd76f6621f7719d13b2109", "1a7160058a87a2a7dedd2f6e95f25892ec4f3d35", "07e70cec6ef8359c5801eb94d5954276045c0dd7", "53c7eaf1773172ee693887b6f6148b682f5be026", "590b1fda209259f3502018bb2dfc4b80191c842e", "5b9472af7e61fc25b426253259b5fde1c2344cba", "132f37dc511812013e4d0fab686fd4274d40db05", "845ff91a8f0bab59e53cb9b5784de3b66e04e5eb", "51c37c5e1ea925c93e4e03d1007f117582cd2844", "ab0458e4e7dcad49c9aae98dab5bd2dc8c099af4", "3f1d3a2463d437aa21b2253473f72f7b9a9c58a3", "2b284c8b71a6da2469e8bb3ca52264be228ad269", "62293752a4f433617758d3b698780b66fd596987", "154e4aaf1da9e5ed6ebf5e216adbaaa85324d39c", "8db5fd6c8b016d3dfc3d2e8761ceb65e14cd2405", "afa1b511c12ef6b20cc23f095561c9423fe645bf", "07ea2d63219ce749d61b7a5b3d752b548c6ed05f", "6994d810bddae500fd5bc4e9866316fa44a516c6" ], "paperAbstract": "Authorization bugs, when present in online social networks, are usually caused by missing or incorrect authorization checks and can allow attackers to bypass the online social network's protections. Unfortunately, there is no practical way to fully guarantee that an authorization bug will never be introduced—even with good engineering practices—as a web application and its data model become more complex. Unlike other web application vulnerabilities such as XSS and CSRF, there is no practical general solution to prevent missing or incorrect authorization checks. In this paper we propose Invariant Detector (IVD), a defense-in-depth system that automatically learns authorization rules from normal data manipulation patterns and distills them into likely invariants. These invariants, usually learned during the testing or pre-release stages of new features, are then used to block any requests that may attempt to exploit bugs in the social network's authorization logic. IVD acts as an additional layer of defense, working behind the scenes, complementary to privacy frameworks and testing. We have designed and implemented IVD to handle the unique challenges posed by modern online social networks. IVD is currently running at Facebook, where it infers and evaluates daily more than 200,000 invariants from a sample of roughly 500 million client requests, and checks the resulting invariants every second against millions of writes made to a graph database containing trillions of entities. Thus far IVD has detected several high impact authorization bugs and has successfully blocked attempts to exploit them before code fixes were deployed.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.33", "https://yia.nnis.gr/publications/63-ivd-camera-ready-sp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8210f6a9b276f954b4bb852c5f9ee73e0f23d3b7", "sources": [ "DBLP" ], "title": "IVD: Automatic Learning and Enforcement of Authorization Rules in Online Social Networks", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "8219bf467b82208a98aa7b45e67f35ed740b979f": { "authors": [ { "ids": [ "2781365" ], "name": "Chang Hyun Park" }, { "ids": [ "34836938" ], "name": "Taekyung Heo" }, { "ids": [ "3207141" ], "name": "Jungi Jeong" }, { "ids": [ "36595712" ], "name": "Jaehyuk Huh" } ], "doi": "10.1145/3079856.3080217", "doiUrl": "https://doi.org/10.1145/3079856.3080217", "entities": [ "Address space", "Fragmentation (computing)", "Hybrid system", "IP fragmentation", "Memory management", "Memory-mapped I/O", "Operating system", "Page (computer memory)", "Page table", "Scalability", "Software quality assurance", "Translation lookaside buffer", "Two-hybrid screening" ], "id": "8219bf467b82208a98aa7b45e67f35ed740b979f", "inCitations": [ "6f0c1898575d56d1c1073b1f2eb6cba5bc931005" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "444-456", "journalVolume": "", "outCitations": [ "caacd536fa218ef5218021506ebc041e3f460064", "daeff61502115efc4b9ee81607a8e5489215ea88", "8314d58a250867e083838d177a40946039903e7b", "8ac1d5487352962ac9e116e68b620d7259f5a5df", "33196b69eeec351efd5178eae5da92979bdc6fd7", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "40c5050e470fa0890e85487e4679197e07a91c09", "1c860ede7e14e4cd9210cbad8d0c8619673f87ca", "05a55820da0430f3b7e68f54bcb2cb6427c8cf28", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "5ece19ddc8abc5454426deece280d0750972c2da", "671958087f3c24e7b025019476be8918302270e2", "89f4842ef627eb667691b5329e1eaac9bd66a0bf", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "c5a00bd4aca85ea79ee05326ec34efe5cda92510", "6017dd9b32e3c58b8a85c44971d6d82a1e3560cc", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "9c001d2546b07f4325dfa32d46f602bdf56ec474", "0653e2ed9f683868cb4539eb8718551242834f6b", "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "48a7323c4894de3afb90ef2135160205ebb55011", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "533d720a8542b707c316d39cf5beeb58738af86d", "0a934c1fa360491bebaa6fb4d0348179b9713b2d", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "6ac87f5ef30f787e5dacd59e97d7804e99a7366e", "19de90c933c20849c85d5428c8a643210b97ec83", "776846c6a922e3a9ae25d03e66dda5bba772f576", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "109f26c285d48ba8f7b5e259364fecef0b3273f6", "343a384d5476ead9496f96559aba5ad09e95e01e", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "28af524636137424ad574afa38463b4771e6f006" ], "paperAbstract": "To mitigate excessive TLB misses in large memory applications, techniques such as large pages, variable length segments, and HW coalescing, increase the coverage of limited hardware translation entries by exploiting the contiguous memory allocation. However, recent studies show that in non-uniform memory systems, using large pages often leads to performance degradation, or allocating large chunks of memory becomes more difficult due to memory fragmentation. Although each of the prior techniques favors its own best chunk size, diverse contiguity of memory allocation in real systems cannot always provide the optimal chunk of each technique. Under such fragmented and diverse memory allocations, this paper proposes a novel HW-SW hybrid translation architecture, which can adapt to different memory mappings efficiently. In the proposed hybrid coalescing technique, the operating system encodes memory contiguity information in a subset of page table entries, called anchor entries. During address translation through TLBs, an anchor entry provides translation for contiguous pages following the anchor entry. As a smaller number of anchor entries can cover a large portion of virtual address space, the efficiency of TLB can be significantly improved. The most important benefit of hybrid coalescing is its ability to change the coverage of the anchor entry dynamically, reflecting the current allocation contiguity status. By using the contiguity information directly set by the operating system, the technique can provide scalable translation coverage improvements with minor hardware changes, while allowing the flexibility of memory allocation. Our experimental results show that across diverse allocation scenarios with different distributions of contiguous memory chunks, the proposed scheme can effectively reap the potential translation coverage improvement from the existing contiguity.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080217" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8219bf467b82208a98aa7b45e67f35ed740b979f", "sources": [ "DBLP" ], "title": "Hybrid TLB coalescing: Improving TLB translation coverage under diverse fragmented memory allocations", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "8225fec5d29815399796dfc8117b7a677cbde9c2": { "authors": [ { "ids": [ "1784637" ], "name": "Michael LeBeane" }, { "ids": [ "1780048" ], "name": "Khaled Hamidouche" }, { "ids": [ "40379015" ], "name": "Brad Benton" }, { "ids": [ "1768054" ], "name": "Mauricio Breternitz" }, { "ids": [ "1783873" ], "name": "Steven K. Reinhardt" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1145/3126908.3126950", "doiUrl": "https://doi.org/10.1145/3126908.3126950", "entities": [ "Central processing unit", "Collective operation", "Critical path method", "Data parallelism", "Graphics processing unit", "Helper class", "Jacobi method", "Kernel (operating system)", "Machine learning", "Network interface", "Networking hardware", "Speedup", "Stencil buffer" ], "id": "8225fec5d29815399796dfc8117b7a677cbde9c2", "inCitations": [], "journalName": "", "journalPages": "22:1-22:12", "journalVolume": "", "outCitations": [ "59857e2857df6d69a12e3cbaa720648b5c299159", "791a3e9eca83dd161ea372a97ca9fd5bf4f7854a", "43498db7de27abf14e5d2903a8318c62b3c4c0e9", "6335be42a352d1d4daa907533854410f57269926", "9fb2c0811b9099829ff3ba91cb533378701139ab", "56b559ed5a2000305e88bf3382f623a0272eb072", "fad6c07cb914becb0905d63362e8bdd900eed3f3", "e822721972693e064e092b474a3c6d3706fdcfb3", "fe4d9ea30324f73eb05cf14cecac6797724a84c6", "7ca15f2fd831b6920310c70f5bd18c3a9c00cb4f", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "46249511a2eccfd8e29e8446d8b895040caab0e9", "cefab52aace077144d0e17dbc500a2c1c60cbbf5", "0451388ae4562c2833dd5e39f897208c9c1d7bb9", "4849bbb611153b5a7c53894fa1c1314138f5ae89", "43f0c099d44a68783a773f91cd03098a5252bf98", "c8c9620a52ad2017369da8e18b665dc0612e57f0", "240ffbd7eead669292472c857c822b1880463929", "3e6f5b5e8b7cb5408da8cd10d0cc625b00910291" ], "paperAbstract": "GPUs are widespread across clusters of compute nodes due to their attractive performance for data parallel codes. However, communicating between GPUs across the cluster is cumbersome when compared to CPU networking implementations. A number of recent works have enabled GPUs to more naturally access the network, but suffer from performance problems, require hidden CPU helper threads, or restrict communications to kernel boundaries.\n In this paper, we propose GPU Triggered Networking, a novel, GPU-centric networking approach which leverages the best of CPUs and GPUs. In this model, CPUs create and stage network messages and GPUs trigger the network interface when data is ready to send. GPU Triggered Networking decouples these two operations, thereby removing the CPU from the critical path. We illustrate how this approach can provide up to 25% speedup compared to standard GPU networking across microbenchmarks, a Jacobi stencil, an important MPI collective operation, and machine-learning workloads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126950" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8225fec5d29815399796dfc8117b7a677cbde9c2", "sources": [ "DBLP" ], "title": "GPU triggered networking for intra-kernel communications", "venue": "SC", "year": 2017 }, "822ed5a4e89171fefabcc658df7475e7e1fcde71": { "authors": [ { "ids": [ "3237687" ], "name": "Tarun Kathuria" }, { "ids": [ "1743429" ], "name": "S. Sudarshan" } ], "doi": "10.1145/3034786.3034792", "doiUrl": "https://doi.org/10.1145/3034786.3034792", "entities": [ "Algorithm", "Approximation", "Greedy algorithm", "Heuristic", "Jumpstart Our Business Startups Act", "Loss function", "Multi-objective optimization", "Program optimization", "Provable security", "Query optimization" ], "id": "822ed5a4e89171fefabcc658df7475e7e1fcde71", "inCitations": [ "db12b1acdf950527ee8eccbdaa99ee9dcf5c1274" ], "journalName": "", "journalPages": "53-67", "journalVolume": "", "outCitations": [], "paperAbstract": "Complex queries for massive data analysis jobs have become increasingly commonplace. Many such queries contain common subexpressions, either within a single query or among multiple queries submitted as a batch. Conventional query optimizers do not exploit these subexpressions and produce sub-optimal plans. The problem of multi-query optimization (MQO) is to generate an optimal combined evaluation plan by computing common subexpressions once and reusing them. Exhaustive algorithms for MQO explore an O(nn) search space. Thus, this problem has primarily been tackled using various heuristic algorithms, without providing any theoretical guarantees on the quality of their solution.\n In this paper, instead of the conventional cost minimization problem, we treat the problem as maximizing a linear transformation of the cost function. We propose a greedy algorithm for this transformed formulation of the problem, which under weak, intuitive assumptions, provides an approximation factor guarantee for this formulation. We go on to show that this factor is optimal, unless P = NP. An- other noteworthy point about our algorithm is that it can be easily incorporated into existing transformation-based optimizers. We finally propose optimizations which can be used to improve the efficiency of our algorithm.", "pdfUrls": [ "https://www.cse.iitb.ac.in/~sudarsha/Pubs-dir/mqo-kathuria-pods2017.pdf", "http://doi.acm.org/10.1145/3034786.3034792" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/822ed5a4e89171fefabcc658df7475e7e1fcde71", "sources": [ "DBLP" ], "title": "Efficient and Provable Multi-Query Optimization", "venue": "PODS", "year": 2017 }, "822f38101358c3030a18461877fe376d02be85e9": { "authors": [ { "ids": [ "8081324" ], "name": "Simon Birnbach" }, { "ids": [ "1692868" ], "name": "Richard Baker" }, { "ids": [ "1697125" ], "name": "Ivan Martinovic" } ], "doi": "", "doiUrl": "", "entities": [ "Privacy" ], "id": "822f38101358c3030a18461877fe376d02be85e9", "inCitations": [ "a6630ee1a9eb6fc184c326530ee8eca1181aecbe", "94a91c87daee820105e2069ddeea5773325fff92" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/wi-fly-detecting-privacy-invasion-attacks-consumer-drones/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/822f38101358c3030a18461877fe376d02be85e9", "sources": [ "DBLP" ], "title": "Wi-Fly?: Detecting Privacy Invasion Attacks by Consumer Drones", "venue": "NDSS", "year": 2017 }, "823e1ac97e3216e73ae3676afd29f3a2b7b2469c": { "authors": [ { "ids": [ "1837948" ], "name": "Ugljesa Milic" }, { "ids": [ "35404220" ], "name": "Alejandro Rico" }, { "ids": [ "2410518" ], "name": "Paul M. Carpenter" }, { "ids": [ "3094183" ], "name": "Alex Ram\u00edrez" } ], "doi": "10.1109/ISPASS.2017.7975265", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975265", "entities": [ "Asymmetric multiprocessing", "CPU cache", "Graphics processing unit", "High-throughput computing", "Low-power broadcasting", "Parallel computing", "Simultaneous multithreading", "Supercomputer", "Throughput" ], "id": "823e1ac97e3216e73ae3676afd29f3a2b7b2469c", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "3-12", "journalVolume": "", "outCitations": [ "039ad1ad259a9bd98e24b0738ba048282188d184", "5054cd49d8c7f36d9385ebdc613460f72d47d176", "c75fb2158ab82bbc347bcd6f04ba2bf86f448296", "0a12d25544d2a65d911a88710e4880356b4398c5", "9e68eb128e6b44462a9d8d0dcb6a228c80bc4ec6", "5a22d24ca3d0f205bf6e82cd3d0422af88c369e9", "24251f02c34f32b1dd96572a1d984c4463a26a10", "6f090d59bde17b7604985acf38e26785e794bcc0", "66b67e87398aedc52c34bbf1eaecf0e677b7ebbe", "04de290864f6a756e3c55dc32d75ff510d0a19a5", "5b3a9980f3512cb0ceb63093479beadeceddc2be", "482db358c4fec90e4662b3be5536ca8cc35a1e79", "c7ea0774356e4b10f0a49f9c3feeb0a7713fe0ad", "6b5dfbc9752c181c3e41422ec57078e452a67a34", "06bab0a18bad4b8532d9fbb85f0e55e4594e505b", "ca8572319cda59106a8f1e1d4db89f32bdf0e7d3", "0653e2ed9f683868cb4539eb8718551242834f6b", "d130d9ea828d38ce8f9089e0e4624fa5a1bd16fa", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "21a0c328f428a1d4694246ed6c44ed472b74133a", "352a8957005dc5519b15ed1870751ec494d66395", "0b2f5fb96e4b243407a69b2e39b5e1024d4e06af", "370488843f80120797e1f0af22e9fdb0152ff657", "01d32e62828315a140a5db4010431cac3d6868c6", "5be692c2839e91e8ec6f98006c0d30372deb72c6", "26941a2e1a04e0464f98ecaeb105c889f2f0ee3c", "191254adb67fde809f32fdc3e1a1377350c26490", "41965de84461726a0e12296acd667a442c8eba25", "6dc69032224e2bb4493a386b97879ca73e617792" ], "paperAbstract": "High performance computing (HPC) applications have parallel code sections that must scale to large numbers of cores, which makes them sensitive to serial regions. Current supercomputing systems with heterogeneous or asymmetric CMPs (ACMP) combine few high-performance big cores for serial regions, together with many low-power lean cores for throughput computing. The low requirements of HPC applications in the core front-end lead some designs, such as SMT and GPU cores, to share front-end structures including the instruction cache (I-cache). However, little work exists to analyze the benefit of sharing the I-cache among full cores, which seems compelling as a solution to reduce silicon area and power. This paper analyzes the performance, power and area impact of such a design on an ACMP with one high-performance core and multiple low-power cores. Having identified that multiple cores run the same code during parallel regions, the lean cores share the I-cache with the intent of benefiting from mutual prefetching, without increasing the average access latency. Our exploration of the multiple parameters finds the sweet spot on a wide interconnect to access the shared I-cache and the inclusion of a few line buffers to provide the required bandwidth and latency to sustain performance. The projections with McPAT and a rich set of HPC benchmarks show 11% area savings with a 5% energy reduction at no performance cost.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975265" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/823e1ac97e3216e73ae3676afd29f3a2b7b2469c", "sources": [ "DBLP" ], "title": "Sharing the instruction cache among lean cores on an asymmetric CMP for HPC applications", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "824c05ae636ed07823b3a71ca1b7abf32676e437": { "authors": [ { "ids": [ "34775038" ], "name": "Marcel B\u00f6hme" }, { "ids": [ "2123149" ], "name": "Van-Thuan Pham" }, { "ids": [ "26944338" ], "name": "Manh-Dung Nguyen" }, { "ids": [ "1789700" ], "name": "Abhik Roychoudhury" } ], "doi": "10.1145/3133956.3134020", "doiUrl": "https://doi.org/10.1145/3133956.3134020", "entities": [ "Critical system", "Grammatical Framework", "Graph (discrete mathematics)", "Open Sound System", "Simulated annealing", "Simulation", "Software bug", "Software testing", "Stack trace", "System call", "libxml2" ], "id": "824c05ae636ed07823b3a71ca1b7abf32676e437", "inCitations": [ "67ff9c425b17b78eaf7e3be970833aef41262cc8", "c764afe6f7f792b42499ff6a6a46f920ca7ea325", "2cf43b8bc82f063e257bf21c92e5b038eacd34d3", "0ba3f46d3d3dd75e9966188a96cd952c8706f2d6", "47287dbf82800e92b7fc599ef518d10272e81282" ], "journalName": "", "journalPages": "2329-2344", "journalVolume": "", "outCitations": [ "8eeff5e62ad0dd4073eb80377db29e7cd7b8a24f", "05cf2988ea3ac5e697fc51f85e7dd2031dd8af01", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "6ea63d09993b9a268689790ea8d25bc36345497e", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "064b79ea7254df88805b489b9493d71b6a8b3dad", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "2c21f9488edfb2586327528bb59461a41363fc42", "015dccaa1231403fdcda42e0b053d7a50bb6fffd", "0c3f6413b362992b5e7b6d20a6bd057ad18cf92f", "2c3d491a3bea2c1016587aa8f9fee21293a84856", "2a4423b10725e54ad72f4f1fcf77db5bc835f0a6", "4d6b68ba16f6c05129f6dd56fdbd00e0ec8215ea", "65d8e0ad805f92d8d9d646f4eb640b1db0274088", "08d2058c254047361856c64cccddc5a54697dad6", "0e2860ebe5bf1fe3fadb17dfe0c4c6d08ab13014", "80d90b11973bff4522aca3a85fd7923c65eb8b3b", "1d014a13de863f6f6f12aadd4df15e44f4527acf", "3ab44c2c7221375f8ee0672597c8d4f470d4c98a", "488ab1e313f5109153f2c74e3b5d86d41e9b4b71", "fac3a6f272428e9d6879227ef76c1fa9397b317f", "95baae72c5fcca4038339c350556dd6143d9a263", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "47287dbf82800e92b7fc599ef518d10272e81282", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "34ebfcf4c815a081dcde964f7a73ba4a5a0641dc", "178be9685c2941edacc99dc964a3d977d62c3f1e", "905cec78bc8ab8ba3a263ff3749544ef85c22ec0", "0205b55e4fcf5710a97d5d561efa66c82e39f0f7", "03d24a02d01b2ce1697c7bf47ab4f8c84b93ff1f", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "18e965d40f7dacb88bca7b0a231eca5adbfb6201", "285147a82c8f46a680826fe0308242ad190af75d", "5373091df23dec67c3e295757a165dc630a97637", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "27145fe45450babe306513efb97ae0ec8590c246" ], "paperAbstract": "Existing Greybox Fuzzers (GF) cannot be effectively directed, for instance, towards problematic changes or patches, towards critical system calls or dangerous locations, or towards functions in the stack-trace of a reported vulnerability that we wish to reproduce. In this paper, we introduce Directed Greybox Fuzzing (DGF) which generates inputs with the objective of reaching a given set of target program locations efficiently. We develop and evaluate a simulated annealing-based power schedule that gradually assigns more energy to seeds that are closer to the target locations while reducing energy for seeds that are further away. Experiments with our implementation AFLGo demonstrate that DGF outperforms both directed symbolic-execution-based whitebox fuzzing and undirected greybox fuzzing. We show applications of DGF to patch testing and crash reproduction, and discuss the integration of AFLGo into Google's continuous fuzzing platform OSS-Fuzz. Due to its directedness, AFLGo could find 39 bugs in several well-fuzzed, security-critical projects like LibXML2. 17 CVEs were assigned.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134020", "http://www.comp.nus.edu.sg/~thuanpv/publications/AFLGo_CCS17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/824c05ae636ed07823b3a71ca1b7abf32676e437", "sources": [ "DBLP" ], "title": "Directed Greybox Fuzzing", "venue": "CCS", "year": 2017 }, "829677ca5af4daba5a1619457869462db075905b": { "authors": [ { "ids": [ "2445536" ], "name": "Junhao Gan" }, { "ids": [ "1802067" ], "name": "Yufei Tao" } ], "doi": "10.1145/3035918.3064050", "doiUrl": "https://doi.org/10.1145/3035918.3064050", "entities": [ "Algorithm", "Cluster analysis", "DBSCAN", "Relaxation (approximation)" ], "id": "829677ca5af4daba5a1619457869462db075905b", "inCitations": [ "1f3fa5b6b7b74cb585132c6cbfd5dbc708fef30c", "b6b1e61a25d080570b0295d84d96266af0293188", "d5bcb5d7198e8936d5d5946c7feb3364eadba542" ], "journalName": "", "journalPages": "1493-1507", "journalVolume": "", "outCitations": [ "1375c722eee6e58041f9e295042d42e43ac3428c", "77e9256276e2014b6d801d16f896653f5bc6d5d4", "057249d12815a5b01cca41e808704954ac616a22", "1a6715bc96b4661424ffc1218c2bd65fcd76a844", "02e0bc77460469aefec5bd794ee6c4efc15e6adb", "5869b3d5607bff1a079aa24c8a241d656fe683b7", "39a73f94c33cf8f0268930b7ec538f3d95152f5e", "757effee0314f1b81386ffb7c48c11a7aedf546c", "1f63499a9cb43f0f4d6a56b37de551c7e0c94971", "13a375a84a6c414b85477a401541d3e28db1e11a", "6d0e181087547f6306468560e2b3f8242892bf46", "92f47daf7dedc1443729d711a54a5be21129cb61", "1a92a74bd60ba49c26958eda0cfedef12ef61171", "371db75f3f668317af075767b0630c6c93288212", "75a4860c9b3b2e95bc3a8056543e7560a1753f2b", "c923be743ee3990e1ac0f29bd53531a8d54832e1", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930", "0157f142bee7b462897424908cd6c73d84f225cc", "0597d7259c0a61fa13f2f9aa852525d127a7fe16", "c5a17da1916bd3a7f2dc5fb3339374c1988389a7", "309d445a15ce2e71c20bd069e4c655888ae501e5" ], "paperAbstract": "Dynamic clustering---how to efficiently maintain data clusters along with updates in the underlying dataset---is a difficult topic. This is especially true for density-based clustering, where objects are aggregated based on transitivity of proximity, under which deciding the cluster(s) of an object may require the inspection of numerous other objects. The phenomenon is unfortunate, given the popular usage of this clustering approach in many applications demanding data updates.\n Motivated by the above, we investigate the algorithmic principles for dynamic clustering by DBSCAN, a successful representative of density-based clustering, and ρ-approximate DBSCAN, proposed to bring down the computational hardness of the former on static data. Surprisingly, we prove that the ρ-approximate version suffers from the very same hardness when the dataset is fully dynamic, namely, when both insertions and deletions are allowed. We also show that this issue goes away as soon as tiny further relaxation is applied, yet still ensuring the same quality---known as the ``sandwich guarantee''---of ρ-approximate DBSCAN. Our algorithms guarantee near-constant update processing, and outperform existing approaches by a factor over two orders of magnitude.", "pdfUrls": [ "http://www.cse.cuhk.edu.hk/~taoyf/paper/sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3064050" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/829677ca5af4daba5a1619457869462db075905b", "sources": [ "DBLP" ], "title": "Dynamic Density Based Clustering", "venue": "SIGMOD Conference", "year": 2017 }, "829aca8797d7587be04f99a0fd3061bcab1b4569": { "authors": [ { "ids": [ "39345318" ], "name": "Tian Zhang" }, { "ids": [ "3087426" ], "name": "Ryan Stutsman" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "Computation", "Direct memory access", "Extensibility", "FLOPS", "In-memory database", "JavaScript", "Just-in-time compilation", "Machine code", "Network interface controller", "Server (computing)", "USB flash drive" ], "id": "829aca8797d7587be04f99a0fd3061bcab1b4569", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1023b268d9f8d399f76dd34da8719a10d6439017", "bfc464efbe615805d386ef5c882e4d0f97071ec2", "0706225eeac0f855b19c365313db61252ecde0d7", "39ac2e0fc4ec63753306f99e71e0f38133e58ead", "65fbaf682da11c714110c95ab9a05562358ff040", "0541d5338adc48276b3b8cd3a141d799e2d40150", "3cae67dde8b20aa58ebd12def02c7fa8ad844de4", "560bdd9aa5411b514662dc31ffda0a434e4176d1", "2a9d09d8e2390c92cdaa5c8b98d6dd4cb394f638", "0538e05e1ced11b91cda5d1aed88a73969def882", "1220e4a011c46804d4369b5580dc7fb6e387af54", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "284c50888bc0a85d871d65d18f0509a11f663907", "0558c94a094158ecd64f0d5014d3d9668054fb97", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "4acdb61098053f38d5500a9ef974d24828696b9d", "a0031fc8c94eb2e18276cb8350e6d89f3926f606", "5fe3090971c9fb42ca0bdb67141040017d9a3f8e", "8318fa48ed23f9e8b9909385d3560f029c623171", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "2db2cd448948663f9bd4397dce4bff0caba3b97b", "514a5c15e8cf3f681febecad954a4508d9189c99", "7e4921a43378b2b7b9cf950604fe434e4b07da58" ], "paperAbstract": "Large scale in-memory key-value stores like RAMCloud can perform millions of operations per second per server with a few microseconds of access latency. However, these systems often only provide simple feature sets, and the lack of extensibility is an obstacle for building higher-level services. We evaluate the possibility of using JavaScript for shipping computation to data and for extending database functionality by comparing against other possible approaches. Microbenchmarks are promising; the V8 JavaScript runtime provides near native performance with reduced isolation costs when compared with native code and hardware-based protections. We conclude with initial thoughts on how this technology can be deployed for fast procedures that operate on in-memory data, that maximize gains from JIT, and that exploit the kernelbypass DMA capabilities of modern network cards.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/zhang", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-zhang.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotcloud17_slides_zhang.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8678/56f061d8cab3ab8de84337f9a7a5034b7ee0.pdf", "s2Url": "https://semanticscholar.org/paper/829aca8797d7587be04f99a0fd3061bcab1b4569", "sources": [ "DBLP" ], "title": "JavaScript for Extending Low-latency In-memory Key-value Stores", "venue": "HotCloud", "year": 2017 }, "829cec29978198e5e55ecc4d03dc461dddc5b4d6": { "authors": [ { "ids": [ "9905902" ], "name": "Adeesha Wijayasiri" }, { "ids": [ "2477515" ], "name": "Tania Banerjee-Mishra" }, { "ids": [ "1726027" ], "name": "Sanjay Ranka" }, { "ids": [ "1722128" ], "name": "Sartaj Sahni" }, { "ids": [ "2014508" ], "name": "Mark S. Schmalz" } ], "doi": "10.1109/HiPC.2017.00031", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00031", "entities": [ "Algorithm", "Central processing unit", "Desktop metaphor", "Dynamic data", "Flip-flop (electronics)", "Frequency scaling", "Graphics processing unit", "Load balancing (computing)", "Multi-core processor", "Next-generation network", "Parallel algorithm", "Pareto efficiency", "Requirement", "Synthetic data" ], "id": "829cec29978198e5e55ecc4d03dc461dddc5b4d6", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "193-202", "journalVolume": "", "outCitations": [ "ce9c78aee91f491c5e724f637e1e4542f02877de", "88522226e881ac5eff6122a1d8d187800e38bfbf", "1f6ebf99877c4214cc8a7b3fcf7b4f827c0fd37a", "26a2d7fdc08aefa4f752acb9d9ca8991439bc44a", "544b6f8386e4b521c3db0c731c1c18c9f300a8be", "f4c946e493ac96bc600fe1f40c57c2df73a75397", "3d9c2e764e70bce36d741cdd030662b5f0701690", "eeb80b90b577febe960df1d61b7a2df89c3f9a02", "4d8bebe9f9ceeddb261185c43d4c04180c90e448", "eeab80afc1857d1f0fd7440e901881b0fa1bbe27", "e3bcfe3cc7d7e8a7119c7b1769f04d0771394927", "2b550251323d541dd5d3f72ab68073e05cd485c5" ], "paperAbstract": "Hybrid multicore processors (HMPs) are poised to dominate the landscape of the next generation of computing on the desktop as well as on exascale systems. HMPs consist of general purpose CPU cores along with specialized co-processors and can provide high performance for a wide spectrum of applications at significantly lower energy requirements per FLOP. In this paper, we develop parallel algorithms and software for constructing multi-resolution SAR images on HMPs. We develop several load balancing algorithms for optimizing time performance and energy on HMPs. We also present a systematic approach for deriving the energy-time performance trade-offs on HMPs in the presence of Dynamic Voltage Frequency Scaling. Pareto-optimal curves are presented on a system consisting of 24 traditional cores and a GPU.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00031" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/829cec29978198e5e55ecc4d03dc461dddc5b4d6", "sources": [ "DBLP" ], "title": "Parallel Dynamic Data Driven Approaches for Synthetic Aperture Radar", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "82b1fb7f2ec23346eec33db680576fb563400f49": { "authors": [ { "ids": [ "2274456" ], "name": "Omid Mashayekhi" }, { "ids": [ "35858504" ], "name": "Hang Qu" }, { "ids": [ "23174062" ], "name": "Chinmayee Shah" }, { "ids": [ "1721681" ], "name": "Philip Levis" } ], "doi": "", "doiUrl": "", "entities": [ "Cache (computing)", "Centralisation", "Control flow", "Control plane", "Fluid animation", "Operating system", "Scalability", "Scheduling (computing)", "Simulation" ], "id": "82b1fb7f2ec23346eec33db680576fb563400f49", "inCitations": [ "8213e1441d60fe48371d31f44ef9eb0e9f7c5bee", "4c3ec58f03b6ade10712361c17602be37b3613be" ], "journalName": "", "journalPages": "513-526", "journalVolume": "", "outCitations": [ "e6e95a733cd5e4234e2bc38cba2e52ab5e561af6", "09c5293b647fca40fde28ac6c38737f07e873e41", "332f77fd05703c1607e3b57884ad31fb1fad0104", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "28a9dca6faeead651539c700bef413203b2b876e", "43776b15c034076a36b7143d58af8e04715e41d0", "027bd50767a7f61fb0fc3c27051a63b209c10a99", "0558c94a094158ecd64f0d5014d3d9668054fb97", "080ed793c12d97436ae29851b5e34c54c07e3816", "835916e7ad1231d5aa2985340b0ee543cadbb5b6", "27bd83d70eb241319362e2eeb4917f2b6148f376", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "4954fa180728932959997a4768411ff9136aac81", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0608d9937c074520cdc93cc444cc1c77039c5332", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "41e71c53ca2a7be0ba90919af8f3049d957e665e", "2988e34168fa91398fa397baf823af2063893e9c", "3b6dd340fb5442e0c31d73f40e241fdd73d42330", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "11310368999afdce94bca4316eea38216b2446c5", "b6571efa4483aa00d23bbcd36930c4877548ba38", "4cf0f575677146eeb002487e56abc2cf7cafe591", "dc69b14f1dfaa0d4d4949268bfb8a2e20135036e", "5e0c8ba57734361b4564b75ab55af1d2b5ccde82", "be5f6a576cda7e56bf2beef996685d111e9ebd7b", "73eeeb984a15bb83fac23d276c973312b4bf6dd1", "0c0f35f0a3b89359f7fa84553a3e7dbde766940f", "3dff11679346f5344af1018cad57fa14cc349f2f", "234e6be0d4238f76b3ac038ee422be39f391c625", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b" ], "paperAbstract": "Control planes of cloud frameworks trade off between scheduling granularity and performance. Centralized systems schedule at task granularity, but only schedule a few thousand tasks per second. Distributed systems schedule hundreds of thousands of tasks per second but changing the schedule is costly. We present execution templates, a control plane abstraction that can schedule hundreds of thousands of tasks per second while supporting fine-grained, per-task scheduling decisions. Execution templates leverage a program\u2019s repetitive control flow to cache blocks of frequently-executed tasks. Executing a task in a template requires sending a single message. Large-scale scheduling changes install new templates, while small changes apply edits to existing templates. Evaluations of execution templates in Nimbus, a data analytics framework, find that they provide the fine-grained scheduling flexibility of centralized control planes while matching the strong scaling of distributed ones. Execution templates support complex, real-world applications, such as a fluid simulation with a triply nested loop and data dependent branches.", "pdfUrls": [ "https://arxiv.org/pdf/1705.01662v1.pdf", "http://arxiv.org/abs/1705.01662", "https://www.usenix.org/system/files/conference/atc17/atc17-mashayekhi.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/mashayekhi" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/82b1/fb7f2ec23346eec33db680576fb563400f49.pdf", "s2Url": "https://semanticscholar.org/paper/82b1fb7f2ec23346eec33db680576fb563400f49", "sources": [ "DBLP" ], "title": "Execution Templates: Caching Control Plane Decisions for Strong Scaling of Data Analytics", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "82c875e112555f21319123f1b8f61338feb6d183": { "authors": [ { "ids": [ "1756372" ], "name": "Yi Han" }, { "ids": [ "3375595" ], "name": "Sriharsha Etigowni" }, { "ids": [ "1700415" ], "name": "Hua Liu" }, { "ids": [ "1800447" ], "name": "Saman A. Zonouz" }, { "ids": [ "1679058" ], "name": "Athina P. Petropulu" } ], "doi": "10.1145/3133956.3134081", "doiUrl": "https://doi.org/10.1145/3133956.3134081", "entities": [ "Artificial neural network", "Computer hardware", "Contactless payment", "Control flow", "Control system", "David W. Bradley", "Embedded controller", "Embedded system", "Hall effect", "Power-line communication", "Programmable logic device", "Public key fingerprint", "Run time (program lifecycle phase)", "Trust (emotion)", "Trusted Computing", "Trusted computing base", "User interface" ], "id": "82c875e112555f21319123f1b8f61338feb6d183", "inCitations": [], "journalName": "", "journalPages": "1095-1108", "journalVolume": "", "outCitations": [ "39c0c8312d4e7476e4852d6b681bbb434382f815", "4803d1810d1f1a4980c55cdf2820a3c203b7a53e", "3f5e13e951b58c1725250cb60afc27f08d8bf02c", "48631e988b0fc362b0b7c8a17d674b70ad196755", "f8f8062ca99714e7fff67c300bb5743d50aab10a", "b10cb04fd45f968d29ce0bdc17c4d29d12e05b67", "599f3b766913703916751168e6a6fbba139431b3", "593eb268bd3d04c700115a7e31f45eb48d8a7aeb", "2b538bdcc64653c27135aff994f946d47b180982", "65c282400ce9cf4934ac6e575d66893a7a0c4747", "2e0b1160e981bc251f794c51448675f451f4bf63", "4a5f1a340e9215023e1ad2bf5b2676594d37bd0e", "f9dbb3e729034fc509580d21605d22471a43b8d7", "056a2fc1b3cd022bd4dd4e002444af66bb532ce1", "17bb76f79a0aca5abc36096bcb36c2611c0d1d71", "ea0d2e22439c0dac5c667bdb9b8344e281cc7dac", "52f23a0e428acc4348a815eb964c6a99287cd304", "ee9dc1d95b69a07919408a5847a8694fd9ae8e09", "817ea3169d3b33ff069c988c5448442783189880", "5e9c36a1ac9618864c43a47b0072a36e539fc848", "6aefb421473f7da8f282e309092c03a5464fd8c3", "81168342c34845bca79c2187c5f8da2ff7a970a0", "379b242fcc606c2a43278630a97430f750654896", "3e5a6e6a2779c4ab1f15ff36611ebaa8d54508e8", "3dc01f7eae98301b19be0e1a76b337701e3b8232", "1b5c9d1fbef10f8fac349ad2037a011630fcb9ce", "3a34e028eac4bf96cb4725f4bfb33f750d133b31", "09cb97bc213961d855cb812d1d8ab0cf42e22659", "14e51709779527f9114d80f7ac0c8389933a547d", "ea9b23bd5d1ab922bd92378ccb6c9ccff86bab91", "6873a4db9703c9bf38ddabf9abed17ac5b673b59", "d47df16183cf956ccedb320d82705462b109f554", "0c00a328fa7cd56ee60338c54e89bd48310db80b", "aafb55eb35d7c5923a1c267c3cc1a8f000314947", "6c6181a0854e057f3b96c4fcd48465979bcc6dc6", "ead6f1dc139955564624c99a874f233afb8deff2", "67800cdddf13c1d746da7a8d62be56550cf20dfb", "11540131eae85b2e11d53df7f1360eeb6476e7f4", "085f6f07c4a3786dcb4cf82d18ed12bd91385d31", "affdfef38b7a6ddd50de567b743c9590da082b8d", "6d8c9fcce8177d6f8d122d653c7d32d7624d6714", "27d0b64d7aae31ffff7276ec3d940188498f1d4e" ], "paperAbstract": "Trustworthy operation of industrial control systems depends on secure and real-time code execution on the embedded programmable logic controllers (PLCs). The controllers monitor and control the critical infrastructures, such as electric power grids and healthcare platforms, and continuously report back the system status to human operators. We present Zeus, a contactless embedded controller security monitor to ensure its execution control flow integrity. Zeus leverages the electromagnetic emission by the PLC circuitry during the execution of the controller programs. Zeus's contactless execution tracking enables non-intrusive monitoring of security-critical controllers with tight real-time constraints. Those devices often cannot tolerate the cost and performance overhead that comes with additional traditional hardware or software monitoring modules. Furthermore, Zeus provides an air-gap between the monitor (trusted computing base) and the target (potentially compromised) PLC. This eliminates the possibility of the monitor infection by the same attack vectors.\n Zeus monitors for control flow integrity of the PLC program execution. Zeus monitors the communications between the human machine interface and the PLC, and captures the control logic binary uploads to the PLC. Zeus exercises its feasible execution paths, and fingerprints their emissions using an external electromagnetic sensor. Zeus trains a neural network for legitimate PLC executions, and uses it at runtime to identify the control flow based on PLC's electromagnetic emissions. We implemented Zeus on a commercial Allen Bradley PLC, which is widely used in industry, and evaluated it on real-world control program executions. Zeus was able to distinguish between different legitimate and malicious executions with 98.9% accuracy and with zero overhead on PLC execution by design.", "pdfUrls": [ "http://arxiv.org/abs/1708.09099", "https://acmccs.github.io/papers/p1095-hanA.pdf", "https://arxiv.org/pdf/1708.09099v1.pdf", "http://doi.acm.org/10.1145/3133956.3134081" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/82c875e112555f21319123f1b8f61338feb6d183", "sources": [ "DBLP" ], "title": "Watch Me, but Don't Touch Me! Contactless Control Flow Monitoring via Electromagnetic Emanations", "venue": "CCS", "year": 2017 }, "82d6bad4242c42c9552cc1a1bc3623817aabc86d": { "authors": [ { "ids": [ "2201537" ], "name": "Chui-Hui Chiu" }, { "ids": [ "11009181" ], "name": "Dipak Kumar Singh" }, { "ids": [ "34660837" ], "name": "Qingyang Wang" }, { "ids": [ "2794114" ], "name": "Seung-Jong Park" } ], "doi": "10.1109/CLOUD.2017.10", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.10", "entities": [ "Bandwidth (signal processing)", "Cloud computing", "Data center", "Dynamic circuit network", "Network congestion", "Network switch", "Program optimization", "Scheduling (computing)", "Simulation", "Software-defined networking" ], "id": "82d6bad4242c42c9552cc1a1bc3623817aabc86d", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "37525b2c3cc16a2fe166708a4f7081b949b1888e", "231ba17921ebd80e95771e28dfb5082e169d5a53", "663e064469ad91e6bda345d216504b4c868f537b", "62a68d15bbfef566170fc610183eb7ebf8313dce", "2e0057911766d411b7a342c8bae2d6e3d29c47cd", "025652412d507a8cf98ecacd8a44d32ce28995e1", "1cafaac11664e48bd121695ac1be06b0930d00a5", "0541d5338adc48276b3b8cd3a141d799e2d40150", "058f6752d85a517aae298586fdf117acdd7560ea" ], "paperAbstract": "Existing coflow scheduling frameworks effectively shorten communication time and completion time of cluster applications. However, existing frameworks only consider available bandwidth on hosts and overlook congestion in the network when making scheduling decisions. Through extensive simulations using the realistic workload probability distribution from Facebook, we observe the performance degradation of the state-of-the-art coflow scheduling framework, Varys, in the cloud environment on a shared data center network (DCN) because of the lack of network congestion information. We propose Coflourish, the first coflow scheduling framework that exploits the congestion feedback assistances from the software-defined-networking(SDN)-enabled switches in the networks for available bandwidth estimation. Our simulation results demonstrate that Coflourish outperforms Varys by up to 75.5% in terms of average coflow completion time under various workload conditions. The proposed work also reveals the potentials of integration with traffic engineering mechanisms in lower levels for further performance optimization.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.10", "http://csc.lsu.edu/~qywang/papers/Coflourish-CLOUD2017-CameraReady.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/82d6bad4242c42c9552cc1a1bc3623817aabc86d", "sources": [ "DBLP" ], "title": "Coflourish: An SDN-Assisted Coflow Scheduling Framework for Clouds", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "82f9ad557bf09b5cf488430f6b52670a5f750f99": { "authors": [ { "ids": [ "1727038" ], "name": "Xu Ji" }, { "ids": [ "1722340" ], "name": "Chao Wang" }, { "ids": [ "40127965" ], "name": "Nosayba El-Sayed" }, { "ids": [ "35790236" ], "name": "Xiaosong Ma" }, { "ids": [ "2379012" ], "name": "Youngjae Kim" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "1712301" ], "name": "Wei Xue" }, { "ids": [ "39783437" ], "name": "Daniel S\u00e1nchez" } ], "doi": "10.1145/3126908.3126917", "doiUrl": "https://doi.org/10.1145/3126908.3126917", "entities": [ "Computation", "Computational science", "Data center", "Data structure", "Graph (abstract data type)", "Holism", "Interaction", "Personal computer", "Profiling (information science)", "Program optimization", "Scalability", "Systems design" ], "id": "82f9ad557bf09b5cf488430f6b52670a5f750f99", "inCitations": [], "journalName": "", "journalPages": "25:1-25:12", "journalVolume": "", "outCitations": [ "89684ff812933341bd6e1921ad75f40f49211674", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "0455a164fdd31c24c37104853544a66191660659", "28920825e8d112a90299bdfea483d0d75f0da807", "74274d42b6a60c8d35e6dc107c88d78a4df388b6", "4e5b07ece60254133f6a15c4cebc2ebe9c8b7ec9", "12d8ead802196d498877ffe92ff4c42ca1ce7694", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "5119e4b11132d48d5fa4a5ddaf2ca1a0389b9b0f", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "b116227c9782509c1d5a667da3632deb4356727a", "1b65277f50406900a475a68856df8fe8835c19be", "398cc68e6df0cffb5b06da2ab39b004bec8ad9ab", "1eb4c18aa99f19a8dc91e6235e2dbdb7fd46d06c", "33e64874996ac6d163e4e5a97e28b617de7cc0f5", "53ad6a3c95c8be67bddbae3ef76c938adcd9775d", "a042f95a307d4f72d2aac95ac5d5e9dbfa24db79", "c9a37fb935d6460c240149e4aa9daf27551ac61b", "607e98b19885a08c20e948f1b00387876a983fd1", "8c8b44029fbdac1572ae47b8eaab3929c9987098", "27bcb72519d77192da2b30eca4e1442c8f3637b1", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113", "261ba8eead07e19398936953b1839eec6f9f29b0", "0558c94a094158ecd64f0d5014d3d9668054fb97", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "e412b5b0160fca763dae6103cff2c4f22e872aa2", "51dd07564a6ae6f5aa28c8f078fed2a816caafc6", "863bcc82bd61f2199a4f2c4c6bc34a26c217c87a", "48d708dbd76f2ffda3ae35728f46f651de22290d", "4182a6dcb35a96a56c70ed637fff58e694f98780", "a4ba733aee85f774b336ff4b6616e7fa00c87f77", "03416be8097852a54dd3e309434e5a0806824646", "89ff8d6f32c129dc3c333d4092dcbfac4dbbe516", "398aaf00253e2c29e6238dd0499aa3a75c76914c", "ed83425357e98154d1369f76105d092f0b2cb34f", "4766d59a750d33281ef96c02763a71cdbe946a52", "f5a4344be21a9f33f4a87ffd365898e3e0fec81c", "19fd1e65138021f9406ee119bba81af564dc5edd", "4b2aee3a9fa95772db22494a8f83dffe789bffe4", "2790284b6a16790d03b0cb5ed46bc6b0fecde1eb", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "5f049497fab24c39cb69eff6defc5731dcf217a9", "3486aeaf540c48952120fe853d672af984f40a6a", "0653e2ed9f683868cb4539eb8718551242834f6b", "00ecfb48c72709de7ea719e658b2a37301136cda", "5507d741031a1ce2ddc0d6fec9d497192f037eda", "023abef0f3f56cda13bcb5adeb28dd4c7241c261", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "6e8d0f9e066c00e375aeb033fa131df03a850927", "db3380a3221093d9a60d393875b01e410c6da9cd", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "e34671e16f34c92f61f8424d5a7c6166e47bf820", "8bb515c3b9974ce580fb63d16ae7f590a7c2dee6", "298c14f1afc65a9c58b8ae5abe16a27ea4f13a71", "6ed0ca19b2413daeef61864c706fddbd711dfc7e" ], "paperAbstract": "Memory accesses limit the performance and scalability of countless applications. Many design and optimization efforts will benefit from an in-depth understanding of memory access behavior, which is not offered by extant access tracing and profiling methods.\n In this paper, we adopt a holistic memory access profiling approach to enable a better understanding of program-system memory interactions. We have developed a two-pass tool adopting fast online and slow offline profiling, with which we have profiled, at the variable/object level, a collection of 38 representative applications spanning major domains (HPC, personal computing, data analytics, AI, graph processing, and datacenter workloads), at varying problem sizes. We have performed detailed result analysis and code examination. Our findings provide new insights into application memory behavior, including insights on per-object access patterns, adoption of data structures, and memory-access changes at different problem sizes. We find that scientific computation applications exhibit distinct behaviors compared to datacenter workloads, motivating separate memory system design/optimizations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126917", "http://ds.qcri.org/publications/2017-ji-sc.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/82f9ad557bf09b5cf488430f6b52670a5f750f99", "sources": [ "DBLP" ], "title": "Understanding object-level memory access patterns across the spectrum", "venue": "SC", "year": 2017 }, "830f69708281e6febdc7a1e1564187c72acde0c5": { "authors": [ { "ids": [ "3379338" ], "name": "Daniele Rogora" }, { "ids": [ "35815960" ], "name": "Steffen Smolka" }, { "ids": [ "1807579" ], "name": "Antonio Carzaniga" }, { "ids": [ "1735104" ], "name": "Amer Diwan" }, { "ids": [ "1762445" ], "name": "Robert Soul\u00e9" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Complex systems", "Debugging", "Random oracle", "Scalability", "Software deployment", "Software performance testing", "Viva Pi\u00f1ata: Trouble in Paradise", "Web service" ], "id": "830f69708281e6febdc7a1e1564187c72acde0c5", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1b31c65d8b5023dabcdd18fd57241488834c7206", "78ce3c4d29e325a953dd622ca2c008519acdb21d", "20f3fcd714230fbcb88661ba0f623d9e6217a717", "a5ade56a2f37f3f5f5b956b0c5546de9a3428537", "032f1a16ad4cd815ca5cbf3dbfca2714007a1a2e", "5630cbd4e0a0f00ecc2fab7001e424f23adbf0a2", "04c724bad0963d1e6e7a3743be08c08810402582", "d65f897b7cea2761f88411e757e9587c0282cb41", "43de5136309e262007d3f14893959af69749caf8", "6d04b5a9559199c483b696abac683c6d720cc61d", "0e639ae7d0caae09489f7fbfb6f4739d96f626e8", "5a6682af0ad2eb0e08e6f52c0101119c603b663c", "8e52bc3b6b61452825599cb4bbfd5de4f8d0123d" ], "paperAbstract": "Web services and applications are complex systems. Layers of abstraction and virtualization allow flexible and scalable deployment. But they also introduce complications if one wants predictable performance and easy trouble-shooting. We propose to support the designers, testers, and maintainers of such systems by annotating system components with performance models. Our goal is to formulate annotations that can be used as oracles in performance testing, that can provide valuable guidance for debugging, and that can also inform designers by predicting the performance profile of an assembly of annotated components. We present an initial formulation of such annotations together with their concrete derivation from the execution of a complex web service.", "pdfUrls": [ "http://www.people.usi.ch/rogord/hotcloud_pres.pdf", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-rogora.pdf", "https://www.usenix.org/conference/hotcloud17/program/presentation/rogora", "http://www.inf.usi.ch/faculty/soule/hotcloud2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/830f/69708281e6febdc7a1e1564187c72acde0c5.pdf", "s2Url": "https://semanticscholar.org/paper/830f69708281e6febdc7a1e1564187c72acde0c5", "sources": [ "DBLP" ], "title": "Performance Annotations for Cloud Computing", "venue": "HotCloud", "year": 2017 }, "833c3f38216e46473ab7768bbff915284b5d4836": { "authors": [ { "ids": [ "40466466" ], "name": "Kai Herrmann" }, { "ids": [ "3320376" ], "name": "Hannes Voigt" }, { "ids": [ "3193518" ], "name": "Andreas Behrend" }, { "ids": [ "2765285" ], "name": "Jonas Rausch" }, { "ids": [ "7337091" ], "name": "Wolfgang Lehner" } ], "doi": "10.1145/3035918.3064046", "doiUrl": "https://doi.org/10.1145/3035918.3064046", "entities": [ "Cognitive dimensions of notations", "Database", "Database schema", "Database trigger", "End-to-end principle", "Program optimization", "SQL", "Source lines of code" ], "id": "833c3f38216e46473ab7768bbff915284b5d4836", "inCitations": [ "26d00fe71ae527cebac04a68616dfeddb1f4c6e1", "27e58fdbb6240613d9d647138dc26d134c954f36" ], "journalName": "", "journalPages": "1101-1116", "journalVolume": "", "outCitations": [ "752263c767d5fa584bb32392201a7273522202e7", "73cdfc071d8600a3ec6ca837643c8cefd3be842d", "25216ad30291a9b1277f6ff67f4573fbddb77870", "25c96cbb6b0a392d3e1ab07ad0326b13ed0548d5", "01322d3f7039eaed09ce4fa77dc4efb122e400a2", "0cedba45412f78d37cb2575fd5b8346f9afb53d3", "310c0b3199fb14dc300b78db5f138a7aeab3d4d5", "4974924bf648a56d555079c37774dd31649a1145", "0ea3bca1222c33163c165779c01213b7cd1c22b9", "e8f1c51ab34f0b9e068aee00e4d4e822302f140c", "87875a394b46ac31e6530725b9d3a66ba003477e", "fab53cb4ac0a110fe650fc11eb352251db51d82e", "c11b8fc2c942ee227138754be07ece269b94aec2", "0711279d98e1304591140cf009dd60a3303c50c8", "5fec451ab851aada6379a0f9d868ee740b5d2fb3" ], "paperAbstract": "We introduce end-to-end support of co-existing schema versions within one database. While it is state of the art to run multiple versions of a continuously developed application concurrently, it is hard to do the same for databases. In order to keep multiple co-existing schema versions alive -- which are all accessing the same data set -- developers usually employ handwritten delta code (e.g. views and triggers in SQL). This delta code is hard to write and hard to maintain: if a database administrator decides to adapt the physical table schema, all handwritten delta code needs to be adapted as well, which is expensive and error-prone in practice. In this paper, we present InVerDa: developers use the simple bidirectional database evolution language BiDEL, which carries enough information to generate all delta code automatically. Without additional effort, new schema versions become immediately accessible and data changes in any version are visible in all schema versions at the same time. InVerDa also allows for easily changing the physical table design without affecting the availability of co-existing schema versions. This greatly increases robustness (orders of magnitude less lines of code) and allows for significant performance optimization. A main contribution is the formal evaluation that each schema version acts like a common full-fledged database schema independently of the chosen physical table design.", "pdfUrls": [ "https://arxiv.org/pdf/1608.05564v2.pdf", "http://doi.acm.org/10.1145/3035918.3064046", "https://arxiv.org/pdf/1608.05564v1.pdf", "http://arxiv.org/pdf/1608.05564v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/833c3f38216e46473ab7768bbff915284b5d4836", "sources": [ "DBLP" ], "title": "Living in Parallel Realities: Co-Existing Schema Versions with a Bidirectional Database Evolution Language", "venue": "SIGMOD Conference", "year": 2017 }, "833dd2477b9f783434121f9d07a91349fad4d5d4": { "authors": [ { "ids": [ "2322230" ], "name": "Michael Vollmer" }, { "ids": [ "17853037" ], "name": "Ryan G. Scott" }, { "ids": [ "1702346" ], "name": "Madan Musuvathi" }, { "ids": [ "31778078" ], "name": "Ryan Newton" } ], "doi": "10.1145/3018743.3018746", "doiUrl": "https://doi.org/10.1145/3018743.3018746", "entities": [ "APL", "Benchmark (computing)", "Compiler", "Computation", "Consistency model", "Haskell", "Immutable object", "Imperative programming", "Overhead (computing)", "Process calculus", "Programmer", "Programming idiom", "Programming language", "Race condition", "Sequential consistency", "Shared memory", "The Glorious Glasgow Haskell Compilation System", "Thread-local storage", "Type system", "X86" ], "id": "833dd2477b9f783434121f9d07a91349fad4d5d4", "inCitations": [ "070771bdc55490cdcdadc63f815faf0cf23224fb", "08de23da3ed2240cce1b6f48e1096cfe806d90d2" ], "journalName": "", "journalPages": "283-298", "journalVolume": "", "outCitations": [ "4e0ee850f7e8323fbb0fbb3591c671926cf22f4d", "7bd7375a41c279d15caeb435e1f67cac0bc7510d", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7", "d4c5e14e27b45266532c74f6aa0d51a1a4280e7c", "2bf4940710deb2571e93b1c922e8e7452e854afd", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "0aff06e25dd081211e39771f2aeb41aff7b2fcd6", "3371781698dbd3d3e78477af7528530024b828f8", "0ed62848d5c9e01f692c0c0b3851848ac7bb0764", "33dcafd805a3b44fd64270028633032ff0bb6fac", "4683861d7bf2f5a8197ed71f419a9b50cee1e0d6", "2d4f01286cb0c8cc81cfba1dfc52e9c0f1da5122", "60d73856a1b51913e9179377356bdd63e270a199", "16a04050353b741974c7d0448e8b0149831bfdc0", "0478cf7e8f0262aad69c2c375bd151fc0e8bbf1e", "09cb251072ef19e125ec5d94de5777584af68db5", "17d2fac94e9368788d7f53832483f66057c577d7", "028a4bab0dc19548b942947ac0d5160d3de0655c", "09d95e0b06d1174e4ac83c7354bb30877320a362", "16dc592aa326ecd1f8d46ca7e3485a7311af3dba", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "0d69f96cf1927ea2993608f839752de0314c0347", "3eae0271717f6b4d65024abf04e5d98aef41d748", "362e9b5afe5934a9d8046d758c17c5bada0652b3", "39fd614e47450e49bdfa94175489b3115a2f3ecd", "b44a4cfd880ecd47978fda1738479179651304f8", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "09056887aefcb6a089bb931e2b75bc67f8807e27", "771b52e7c7d0a4ac8b8ee0cdeed209d1c4114480", "7e40209617935569a12a104c354eabf029a3b537", "5ac46b7c320aabe83eacb1a91c055939c1941dac", "4a3f0c1b983315c863dd6f4820dc147b50ab6109", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "336d2c8a1afff240d601294a9ec1702f04f0b5b2", "857726e6c21504e66569e3d61ed6b8710e44db4a", "1500d857a550fe0dbcc37b0f35139f5ba8c2059e", "2e3058c0c279a5ff59b7fd65b7f73fab2fe0d0b3", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "413d938109026fb513083a3b3f1c616da005639c", "51081c97ac4f894dd63d537028b7f752b212f6d6", "129aea79d23b3295999332bf336c4aa8804ebfc5" ], "paperAbstract": "A core, but often neglected, aspect of a programming language design is its memory (consistency) model. Sequential consistency~(SC) is the most intuitive memory model for programmers as it guarantees sequential composition of instructions and provides a simple abstraction of shared memory as a single global store with atomic read and writes. Unfortunately, SC is widely considered to be impractical due to its associated performance overheads. \n Perhaps contrary to popular opinion, this paper demonstrates that SC is achievable with acceptable performance overheads for mainstream languages that minimize mutable shared heap. In particular, we modify the Glasgow Haskell Compiler to insert fences on all writes to shared mutable memory accessed in nonfunctional parts of the program. For a benchmark suite containing 1,279 programs, SC adds a geomean overhead of less than 0.4\\% on an x86 machine. \n The efficiency of SC arises primarily due to the isolation provided by the Haskell type system between purely functional and thread-local imperative computations on the one hand, and imperative computations on the global heap on the other. We show how to use new programming idioms to further reduce the SC overhead; these create a virtuous cycle of less overhead and even stronger semantic guarantees (static data-race freedom).", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018746", "http://recurial.com/wp-content/uploads/2017/03/ppopp17-sc-haskell.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/833dd2477b9f783434121f9d07a91349fad4d5d4", "sources": [ "DBLP" ], "title": "SC-Haskell: Sequential Consistency in Languages That Minimize Mutable Shared Heap", "venue": "PPOPP", "year": 2017 }, "8346f424b4d07919e81581f405a983cda43a8de5": { "authors": [ { "ids": [ "40429127" ], "name": "Jen-Cheng Huang" }, { "ids": [ "2144577" ], "name": "Lifeng Nai" }, { "ids": [ "39708396" ], "name": "Pranith Kumar" }, { "ids": [ "3194681" ], "name": "Hyojong Kim" }, { "ids": [ "8187053" ], "name": "Hyesoon Kim" } ], "doi": "10.1109/IPDPS.2017.118", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.118", "entities": [ "Apache Hadoop", "Apache Spark", "Sampling (signal processing)", "Simulation", "Social media", "Social simulation" ], "id": "8346f424b4d07919e81581f405a983cda43a8de5", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "595-604", "journalVolume": "", "outCitations": [ "43e2dd9cacfc25761d95fa9fb2a0f28aa768efc8", "5d405f87571807066aed687baed4d4a3c2a85172", "6b406760f69fc0e4b8412ea4b864c345a9540b3b", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0e968f665c764a5bb636bb817e0a8b85762ba206", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "79ff6d26643770fecefe08d7bf1ec504ae465bc8", "eecbaaaab769c62e8c24d98b9bc7ca955b41fbda", "bb6cedd67b26fce1f0d8eacb0357658c6831586d", "a81c73e2e277f290bdf4dc2b0e34a61a2920afc8", "82adcea5d233d776b78224998984e7adf2268fe0", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "1ae0a2b3677b5b99441b4829a94e4577d6786de5", "dd61bd4a3a147663df5a6be5ba7e7473dd3fc960", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "38e31e68af9b260c51d5abc03b27041780e81e4b", "0c4867f11c9758014d591381d8b397a1d38b04a7", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "1be5ca1c9a94bd29c64d358e677b699e16c58f55", "10aa9ee7caaf9381b6a0468ae899a9729824a6b7", "313b6d6a2fe071869507ba7530aef10c91aefe11", "2c545dc62362253220285bf521fdf73c0eeba975", "f29dac2e26273532c81c933f091c7a60b9480f94" ], "paperAbstract": "Today, there is a steep rise in the amount of data being collected from diverse applications. Consequently, data analytic workloads are gaining popularity to gain insight that can benefit the application, e.g., financial trading, social media analysis. To study the architectural behavior of the workloads, architectural simulation is one of the most common approaches. However, because of the long-running nature of the workloads, it is not trivial to identify which parts of the analysis to simulate. In the current work, we introduce SimProf, a sampling framework for data analytic workloads. Using this tool, we are able to select representative simulation points based on the phase behavior of the analysis at a method level granularity. This provides a better understanding of the simulation point and also reduces the simulation time for different input sets. We present the framework for Apache Hadoop and Apache Spark frameworks, which can be easily extended to other data analytic workloads.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.118" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8346f424b4d07919e81581f405a983cda43a8de5", "sources": [ "DBLP" ], "title": "SimProf: A Sampling Framework for Data Analytic Workloads", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "83508bca98a0b7159bc22dfea399f405575fabdb": { "authors": [ { "ids": [ "1708134" ], "name": "Ju Wang" }, { "ids": [ "1784158" ], "name": "Jie Xiong" }, { "ids": [ "2466164" ], "name": "Xiaojiang Chen" }, { "ids": [ "1792087" ], "name": "Hongbo Jiang" }, { "ids": [ "9140912" ], "name": "Rajesh Krishna Balan" }, { "ids": [ "2068791" ], "name": "Dingyi Fang" } ], "doi": "10.1145/3117811.3117830", "doiUrl": "https://doi.org/10.1145/3117811.3117830", "entities": [ "Multipath propagation", "RSS", "Radio frequency", "Radio-frequency identification", "Real life" ], "id": "83508bca98a0b7159bc22dfea399f405575fabdb", "inCitations": [ "ac53a4947d5d947f34d4b68e408fce221ae404e9" ], "journalName": "", "journalPages": "288-300", "journalVolume": "", "outCitations": [ "62efd4e8913ac5c4cd3a771fdd02e5abb51afc30", "3317dd57f1a9a29d7f0a3f8cf82403bc775f5c1c", "666fbcf4697fc64d576b9e007af6ee612d10a9f8", "00f324e77f618eb32f9f5b26f2943f287f596f80", "5ec4ea65468de9291432eaf8b5f96b01f3dc8aea", "c58103fba20614d9b0d29c44fccfad9ebbcc1176", "476d1877acb919fa79cb917de6309989074ad36a", "fa499200c8ad0ddc383ad4a105712eea798718c7", "38c49df6243bb478a7d7ddc65dfa91923767b1e0", "26ca9c0c988870e6b171957c8901b3fa080c8f53", "1879bf3d2e843155056344a8f6a6cd27b10e0668", "b8971c007282d98a19e5fa213e794d30f35aa7bc", "db9ba8a02aa72c5881e1114b23d1e4f4f6720442", "29e9cd18af650b7e448dea668121a1d98afd3c46", "a7e2814ec5db800d2f8c4313fd436e9cf8273821", "2d8b03affff51cbc803135c42b95c68ef31e17a5", "422b68115ce86e87b8c54600f21631588fc4711d", "b40c3bfbad4f350d584bb34150eb69de511f7079", "471a1e1dc78b8da317dd5cfaf21494af1fb5d60f", "82802e411495bbad77fa2415c6d4633dde180764", "01fb0eb6a8a8055342d1e85d5e01a80fc98b842a", "101aaa6b7a3ebd049412265a43f8aed414f44db1", "0d8dba43dfe0d165804d9fa0098ed0ada6a9c402", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "0e81a72ada8dd4f455d42f436baa135f63821ff7", "d1451fc7db68d40dba360200ba6c29da928122e4", "1260c16ac61b614e89112963ebb4de58227f9a1c", "4d7d4349a81b781a7b6be868d093060a98b3d960", "7a72943beae792e7631f41ec6bc6fb5375977a78", "d610f6b0b01b67af90be3e475b1eb188fa53ac85", "01b20d091baef770f7aebb632daa6c8187c744f4", "052b36fd8bde6035c11eb316c3f9a3665c0110f0", "e39c17f6c5a83581890640049b075badff0cd34d", "2b2d03f8b96aa1e306fb941e0318d403efbde4be", "6d3106eeeb2fa5531d2d82fc1687f0a7ced25b68", "35d14e4dc1d95074d3d4ac06cdb0e933a64f83b7", "b6554c0f9f2f67a862b3cae8f0f1660780769372", "151831fc041a3fc19ed56bacdd8bf330d2a93eeb", "994d5fa869cbe7dc520df94e878ea2c954d1b8e3", "05fe031e53dd8990e7076a91277cb2b74e22b811", "69e6c6aa4a207eb81be43949edc8af7d6b4782d1", "8aea613645b3cf811d6c37811e34e8316ad972f1", "9a28babd714a2c936075b24ec9a54e3ed5b61411" ], "paperAbstract": "Target imaging and material identification play an important role in many real-life applications. This paper introduces TagScan, a system that can identify the material type and image the horizontal cut of a target simultaneously with cheap commercial off the-shelf (COTS) RFID devices. The key intuition is that different materials and target sizes cause different amounts of phase and RSS (Received Signal Strength) changes when radio frequency (RF) signal penetrates through the target. Multiple challenges need to be addressed before we can turn the idea into a functional system including (i) indoor environments exhibit rich multipath which breaks the linear relationship between the phase change and the propagation distance inside a target; (ii) without knowing either material type or target size, trying to obtain these two information simultaneously is challenging; and (iii) stitching pieces of the propagation distances inside a target for an image estimate is non-trivial. We propose solutions to all the challenges and evaluate the system's performance in three different environments. TagScan is able to achieve higher than 94% material identification accuracies for 10 liquids and differentiate even very similar objects such as Coke and Pepsi. TagScan can accurately estimate the horizontal cut images of more than one target behind a wall.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117830" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/83508bca98a0b7159bc22dfea399f405575fabdb", "sources": [ "DBLP" ], "title": "TagScan: Simultaneous Target Imaging and Material Identification with Commodity RFID Devices", "venue": "MobiCom", "year": 2017 }, "83a2ced84ac91333b734c62897c9dee2ba94ee15": { "authors": [ { "ids": [ "31074563" ], "name": "Alfredo Gim\u00e9nez" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "39117045" ], "name": "Chad Wood" }, { "ids": [ "19265505" ], "name": "Kathleen Shoga" }, { "ids": [ "36210022" ], "name": "Aniruddha Marathe" }, { "ids": [ "1718183" ], "name": "Peer-Timo Bremer" }, { "ids": [ "1748473" ], "name": "Bernd Hamann" }, { "ids": [ "1772965" ], "name": "Martin Schulz" } ], "doi": "10.1145/3126908.3126935", "doiUrl": "https://doi.org/10.1145/3126908.3126935", "entities": [ "Analysis of algorithms", "Computer cooling", "Logical Domains / Oracle VM Server for SPARC", "Scalability", "Sensor" ], "id": "83a2ced84ac91333b734c62897c9dee2ba94ee15", "inCitations": [], "journalName": "", "journalPages": "35:1-35:12", "journalVolume": "", "outCitations": [ "37a1140778e8db6202fd66acd77d790ee34d9a16", "2dbcd5f0076acf5a909e76f9dc2b897d4e0d4243", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "86dd6cffcb498c282c22966507fb533ae8901dd6", "6af3228141a9891e57f879c6ea2b48787e56e17f", "1c236ad95d923d819169ab6904ad580f454a7f10", "582389c37f27dd69b39e949257f7fe83a6fee8d9", "a937ce408728ba75adeeafe0c0a9849627782311", "09af1a0185955c3aea1692972296c697f0c5b7ee", "03f6441c5a5200d061be0e4314eb236fcb9fb736", "7e2ddd55ccc0012fca5515edd5482eda853b69d7", "8e07693e2d6b1a9949f7d3b3e81060e69f4bb420", "0323b626078b11e63509339771c20a7e283a1d70", "fb2fa455cb0cdb4bf46c4990e0d1681a62179ea4", "566707209e3ace646b3b0cb1a3bc7d7215b1ec55", "3679eeccf323ff83d68ad539f055da8939d26d01", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "06b5ce2fc7e86f2b4ee61dee4ff564dfa576d2d9", "48e9abb2745b9903e984feac3ac1776ce0a629a0", "1382b9e97f88050d65c3aa19842a76252cdf9621", "3fcb48efe8489061deeea37ceafdabe115ca4789", "d1a6a735e72ababac022d0d5a35fcb620742ca98", "450ed5307db8343752999f3f606bdfb6230494ef", "442c9bc99ba0eadcc44eb5d01ac1411ec7b4fdbc", "298c14f1afc65a9c58b8ae5abe16a27ea4f13a71" ], "paperAbstract": "Modern HPC centers comprise clusters, storage, networks, power and cooling infrastructure, and more. Analyzing the efficiency of these complex facilities is a daunting task. Increasingly, facilities deploy sensors and monitoring tools, but with millions of instrumented components, analyzing collected data manually is intractable. Data from an HPC center comprises different formats, granularities, and semantics, and handwritten scripts no longer suffice to transform the data into a digestible form.\n We present ScrubJay, an intuitive, scalable framework for automatic analysis of disparate HPC data. ScrubJay decouples the task of specifying data relationships from the task of analyzing data. Domain experts can store reusable transformations that describe relations between domains. ScrubJay also automates performance analysis. Analysts provide a query over logical domains of interest, and ScrubJay automatically derives needed steps to transform raw measurements. ScrubJay makes large-scale analysis tractable, reproducible, and provides insights into HPC facilities.", "pdfUrls": [ "http://graphics.cs.ucdavis.edu/~hamann/GimenezGamblinBhateleWoodShogaMaratheBremerHamannSchulzSC17PaperAsSubmitted04032017.pdf", "http://graphics.idav.ucdavis.edu/~hamann/GimenezGamblinBhateleWoodShogaMaratheBremerHamannSchulzSC17PaperFinal08092017.pdf", "http://doi.acm.org/10.1145/3126908.3126935" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/83a2ced84ac91333b734c62897c9dee2ba94ee15", "sources": [ "DBLP" ], "title": "ScrubJay: deriving knowledge from the disarray of HPC performance data", "venue": "SC", "year": 2017 }, "83c1a9d880807154c131cc3103d5a76f99433e18": { "authors": [ { "ids": [ "2015383" ], "name": "Shoji Nishimura" }, { "ids": [ "1749641" ], "name": "Haruo Yokota" } ], "doi": "10.1145/3035918.3035934", "doiUrl": "https://doi.org/10.1145/3035918.3035934", "entities": [ "Analysis of algorithms", "Data access", "Experiment", "FITS", "Geographic information system", "Information system", "Partition (database)", "Space-filling curve" ], "id": "83c1a9d880807154c131cc3103d5a76f99433e18", "inCitations": [], "journalName": "", "journalPages": "1525-1537", "journalVolume": "", "outCitations": [ "0b53d23584071656e88ca2943ed61857c20a26d0", "7fb5d07836f38186a4385c5e4a9816b8de2914a6", "1c27eafecd3d6f0008d74ffbe1e7c59a25869407", "c128c60423daf4b3cc6fa24f01b712575b5828da", "e467a52667aa6faa87ff27f0e5480981832044c2", "351f57001290e6cce272b5d80d776a8b57801765", "2dd6cd544f8c397927d18dab8c6f2ab5a3fc8e36", "32a17985afca711e9bd09ab202c44ceeb5d2be64", "bfc464efbe615805d386ef5c882e4d0f97071ec2", "d758056c8d90543d6caff2262c648792caebafa5", "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "b8862d944ac03fd018b282703301341cc5122a56", "e80402ac18fc2d59a9a518d1fdeeae9767ba2971", "823709fd1d9a4f98cea876650727dd17818409a1", "b4b55fbedea644e6336937cb827c1251f11479dc", "271bb32d09a4096e29ee936c1fd5cc144931bc76", "566e63917526cc083b103985f96cf0c65ce7a4a5", "0a2721cd57f436e6f1ca5b7055b81ccf0e17a0cf", "71e62800c297987e4ef53c5d11f05fe2ebb00ab3", "41c60f57772bc5440cf9e569e55d50f437b469ff", "6350f382e814d4b2f888f5a2a8bd6dd0e9362d81" ], "paperAbstract": "Recently, massive data management plays an increasingly important role in data analytics because data access is a major bottleneck. Data skipping is a promising technique to reduce the number of data accesses. Data skipping partitions data into pages and accesses only pages that contain data to be retrieved by a query. Therefore, effective data partitioning is required to minimize the number of page accesses. However, it is an NP-hard problem to obtain optimal data partitioning given query pattern and data distribution.\n We propose a framework that involves a multidimensional indexing technique based on a space-filling curve. A space-filling curve is a way to define which portion of data can be stored in the same page. Therefore, the problem can be interpreted as selecting a curve that distributes data to be accessed by a query to minimize the number of page accesses. To solve this problem, we analyzed how different space-filling curves affect the number of page accesses. We found that it is critical for a curve to fit a query pattern and be robust against any data distribution. We propose a cost model for measuring how well a space-filling curve fits a given query pattern and tolerates data skew. Also we propose a method for designing a query-aware and skew-tolerant curve for a given query pattern.\n We prototyped our framework using the defined query-aware and skew-tolerant curve. We conducted experiments using a skew data set, and confirmed that our framework can reduce the number of page accesses by an order of magnitude for data warehousing (DWH) and geographic information systems (GIS) applications with real-world data.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035934" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/83c1a9d880807154c131cc3103d5a76f99433e18", "sources": [ "DBLP" ], "title": "QUILTS: Multidimensional Data Partitioning Framework Based on Query-Aware and Skew-Tolerant Space-Filling Curves", "venue": "SIGMOD Conference", "year": 2017 }, "83d89c2785cb82496a0259d9982dd69d286317de": { "authors": [ { "ids": [ "2138575" ], "name": "Amirsaman Memaripour" }, { "ids": [ "1783539" ], "name": "Anirudh Badam" }, { "ids": [ "3078275" ], "name": "Amar Phanishayee" }, { "ids": [ "2389316" ], "name": "Yanqi Zhou" }, { "ids": [ "31817919" ], "name": "Ramnatthan Alagappan" }, { "ids": [ "1718508" ], "name": "Karin Strauss" }, { "ids": [ "1760342" ], "name": "Steven Swanson" } ], "doi": "10.1145/3064176.3064215", "doiUrl": "https://doi.org/10.1145/3064176.3064215", "entities": [ "Atomicity (database systems)", "Byte", "Byte addressing", "Critical path method", "Data structure", "In-place algorithm", "Non-volatile memory", "Persistent data structure", "Throughput" ], "id": "83d89c2785cb82496a0259d9982dd69d286317de", "inCitations": [ "cb2a018979184f87692d423322e367cc42a215d2", "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "952267605cf8e7ace81576632ed3b9d70f6e3334", "7206aead5a341f361e6571d607f3c032e65e2f7e", "db57257e6b051e0f97d35209cc5aee0909cde1f1" ], "journalName": "", "journalPages": "499-512", "journalVolume": "", "outCitations": [ "24724ad8962a9e04eb496fddaefe9708f6960601", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "05a1357946de5eca42a477b7b268db4944219a2e", "a43f2375fc8ac9fadbab91d5c10e61ef88a0525d", "0204f40221260d00c5ee63646560a40dcd7d97d1", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "205cf007cf77bbf81e55b74635017087585f7b7c", "d04957ae69caf43707b13fa833e50119724688f1", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "243c522b56809292f1f50117a9915053d32bf4fb", "793f5e737284925a176f8ec82b3bb0d2178bb330", "478e885ab5fe8324a3ed29dce2734684197d2c8c", "2ef08ccb970632bb8ada93ea70078eac54ce92d3", "0139dceb6cef21b234e454d53154f30391495862", "76eea8436996c7e9c8f7ad3dac34a12865edab24", "1594118f2696b573f08510cf837f3b37db87face", "9aa0d7253574e50fe3a190ccd924433f048997dd", "589fa768b4a51211d93250669391803869cb2941", "6c079bc20b4dd76e25687f979fdabda227f5fa8c", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "15c80ec5104e98d6f84b5ed348ba0276c0739862", "7a9abc36f336750f4c0679f0b4ef87c9dc12133c", "1220e4a011c46804d4369b5580dc7fb6e387af54", "94783d113951822195d4ba44599a8fcbdef9d4bf", "39e3d058a5987cb643e000bce555676d71be1c80", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701" ], "paperAbstract": "Data structures for non-volatile memories have to be designed such that they can be atomically modified using transactions. Existing atomicity methods require data to be copied in the critical path which significantly increases the latency of transactions. These overheads are further amplified for transactions on byte-addressable persistent memories where often the byte ranges modified for data structure updates are significantly smaller compared to the granularity at which data can be efficiently copied and logged. We propose Kamino-Tx that provides a new way to perform transactional updates on non-volatile byte-addressable memories (NVM) without requiring any copying of data in the critical path. Kamino-Tx maintains an additional copy of data off the critical path to achieve atomicity. But in doing so Kamino-Tx has to overcome two important challenges of safety and minimizing NVM storage overhead. We propose a more dynamic approach to maintaining the additional copy of data to reduce storage overheads. To further mitigate the storage overhead of using Kamino-Tx in a replicated setting, we develop Kamino-Tx-Chain, a variant of Chain Replication where replicas perform in-place updates and do not maintain data copies locally; replicas in Kamino-Tx-Chain leverage other replicas as copies to roll back or forward for atomicity. Our results show that using Kamino-Tx increases throughput by up to 9.5x for unreplicated systems and up to 2.2x for replicated settings.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064215", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/01/paper-2.pdf", "http://pages.cs.wisc.edu/~ra/kamino.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/83d89c2785cb82496a0259d9982dd69d286317de", "sources": [ "DBLP" ], "title": "Atomic In-place Updates for Non-volatile Main Memories with Kamino-Tx", "venue": "EuroSys", "year": 2017 }, "83fb6276431a40d8b3fed09eca59cbd6d8e7b307": { "authors": [ { "ids": [ "2944920" ], "name": "Shaomeng Li" }, { "ids": [ "26783785" ], "name": "Sudhanshu Sane" }, { "ids": [ "2160727" ], "name": "Leigh Orf" }, { "ids": [ "1823535" ], "name": "Pablo D. Mininni" }, { "ids": [ "1773970" ], "name": "John Clyne" }, { "ids": [ "2405249" ], "name": "Hank Childs" } ], "doi": "10.1109/CLUSTER.2017.15", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.15", "entities": [ "Data buffer", "Data compression", "Memory hierarchy", "Run time (program lifecycle phase)", "Simulation", "Solid-state drive", "Solid-state electronics", "Supercomputer", "Wavelet", "Wavelet transform" ], "id": "83fb6276431a40d8b3fed09eca59cbd6d8e7b307", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "216-227", "journalVolume": "", "outCitations": [ "46db3ada0980b2d513b73fcef5383d6b6c63cffe", "d07a7772a5e8b101fc93027c4ab172c5967e09d1", "fdaa9a15aa97e385cc240c517e1d255ae92f4fa9", "01fbae01b5cba998995180bfd03136f75c2dd4a5", "03b70c782df4806b0e2bc27d6ca7085fca47d7e1", "ba625fb8f294a5003a0880096695a92bc9bb843a", "de4b00d227e41344edf1d7c1a9b77b7b2ef5a9bd", "c750b9288ed25777e5b7129139e01c143177324c", "59a902d3a87001aaf091752773e8b4679651499c", "892a63a99e7c5cede6093e809e0c350da56ab8cc", "962f449c4edd4ef723b48a936a168fd43c9f1ec6", "44860f86dc2d5b526298890f407c8d253a621cea", "3f9edf67472607344b451acf8a6f2e9cc38ae730", "29b0ff8ef513b3c4ab3c8882c8f61f906eb249fa", "9c0b153ea0b741107301c2a13dc0e0f2f92c863c", "8832d189d0d5d3886bb2fcda809beba409af7c8e", "230dfb469a5cd733eba745815cf25e968bd4ac1c", "5c027b4c0ab42649eb5778b4153240184a644c95", "f15ca310a4519e98185d22b3b73fd2509c6ef032", "44d2c8bec79fb924f7de9507bbf63a5cfbd255ee", "c6c7fb1a8da010ce9aa660c47e3399cc43fa58ab", "dac1b3293224845695062b178a5caa1e76f87503", "0c274baed7507a4fb400c6919069ff3191fc4d13", "d38be5c5894355a9d2bb4c84b5680a7a2be89924", "bbd6c87a25d320f851b24c964c3725e1428d642a", "8e4dad48cbc091ac08b81d273dfd7b22b84a266f", "7d68de2e79c25ec0c69b58e72cc2054343dae1ed", "aa64b802ad1b89a27d8f5db849d41a33a519475f", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "20cc5fdba0915a3958c31d7b18763e82a5418856", "5ae8c2924636b3ac45ce5d272f61da0927d597ae" ], "paperAbstract": "Data reduction through compression is emerging as a promising approach to ease I/O costs for simulation codes on supercomputers. Typically, this compression is achieved by techniques that operate on individual time slices. However, as simulation codes advance in time, outputting multiple time slices as they go, the opportunity for compression incorporating the time dimension has not been extensively explored. Moreover, recent supercomputers are increasingly equipped with deeper memory hierarchies, including solid state drives and burst buffers, which creates the opportunity to temporarily store multiple time slices and then apply compression to them all at once, i.e., spatiotemporal compression. This paper explores the benefits of incorporating the time dimension into existing wavelet compression, including studying its key parameters and demonstrating its benefits in three axes: storage, accuracy, and temporal resolution. Our results demonstrate that temporal compression can improve each of these axes, and that the impact on performance for real systems, including tradeoffs in memory usage and execution time, is acceptable. We also demonstrate the benefits of spatiotemporal wavelet compression with real-world visualization use cases and tailored evaluation metrics.", "pdfUrls": [ "http://cdux.cs.uoregon.edu/pubs/LiCluster.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/83fb6276431a40d8b3fed09eca59cbd6d8e7b307", "sources": [ "DBLP" ], "title": "Spatiotemporal Wavelet Compression for Visualization of Scientific Simulation Data", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "8403534001a885206e7507c212188c9944b5378c": { "authors": [ { "ids": [ "2151313" ], "name": "Athena Elafrou" }, { "ids": [ "2825685" ], "name": "Georgios I. Goumas" }, { "ids": [ "1774783" ], "name": "Nectarios Koziris" } ], "doi": "10.1109/ICPP.2017.38", "doiUrl": "https://doi.org/10.1109/ICPP.2017.38", "entities": [ "Assistive technology", "Compressed sensing", "Math Kernel Library", "Mathematical optimization", "Matrix multiplication", "Preprocessor", "Profiling (computer programming)", "Program optimization", "Sparse matrix", "Test suite" ], "id": "8403534001a885206e7507c212188c9944b5378c", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "292-301", "journalVolume": "", "outCitations": [ "aad3fc32b5f88366cedc4f1f518b80fc98b629c7", "e10bc70182dce9ef5fa4852d222375df936ae4b3", "136ffe66f6bb69c5ad2537531373220c2c704b57", "577412cf4fb567fa3dfd2c8cde337590122cb34a", "074d096a54bf6bb33c59f628206848c7724a7cf3", "66ba6dd1b746aa0e10c3a4f62a5c4dd6b955b503", "bc31e8f6e35852934b6019d7a59a82074f224a01", "16a7bf1a7f73252f597912e5661d6d8ba5ce4216", "8b80f37566b85ca022d6a8949c295b8477ebcc07", "acc30d21e9d835d91444e3a94158e0771c9b2f48", "ca8b54c677b3996d73d54452724b4a6a2a79621f", "449ad15148b56012dd4c76e68c7e12f12f86ac82", "1a93bb7c711abaae5f6388477d7daef9ba0f89c9", "796777acff940e622ac80810961e9c1033db14c1", "02544882276ff1a35f4b6f1a8504a972b8df4087", "3d1090cf93b35336ec950750659f017df9a9954c", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "04cab4528a50ba956cc8fdf46110545169da285c", "12ab8a6105bd353898ee8dffd3410d3c2ed53f24", "11fa55df451335b846a56c6b295738c32506adeb", "2b85f7d3cc58dad3cf913e4a85b4e7108dc2ebbc", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "b3d98b1fbbc03e4dee1557cf6ee10c37f8c1cbc2", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "0707e2b5ac86497c7af9494fe3670298fadf9ea6", "5672ce28f2927b81b01303e4926643c55a4c8133", "d6c4c76076efecb15655274adc648af8a445ed3a", "0c0800259bd40b1ac96cc437629c5ea0ad729f22", "092217c2267f6e0673590aa151d811e579ff7760", "4670c000c84fd55184cb33c736ba2c32f170a825", "4a61a6368aa881cccf115a12280b72e1c39be8c5" ], "paperAbstract": "This paper presents a low-overhead optimizer for the ubiquitous sparse matrix-vector multiplication (SpMV) kernel. Architectural diversity among different processors together with structural diversity among different sparse matrices lead to bottleneck diversity. This justifies an SpMV optimizer that is both matrix- and architecture-adaptive through runtime specialization. To this direction, we present an approach that first identifies the performance bottlenecks of SpMV for a given sparse matrix on the target platform either through profiling or by matrix property inspection, and then selects suitable optimizations to tackle those bottlenecks. Our optimization pool is based on the widely used Compressed Sparse Row (CSR) sparse matrix storage format and has low preprocessing overheads, making our overall approach practical even in cases where fast decision making and optimization setup is required. We evaluate our optimizer on three x86-based computing platforms and demonstrate that it is able to distinguish and appropriately optimize SpMV for the majority of matrices in a representative test suite, leading to significant speedups over the CSR and Inspector-Executor CSR SpMV kernels available in the latest release of the Intel MKL library.", "pdfUrls": [ "https://arxiv.org/pdf/1711.05487v1.pdf", "http://export.arxiv.org/pdf/1711.05487", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.38", "http://arxiv.org/abs/1711.05487" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8403534001a885206e7507c212188c9944b5378c", "sources": [ "DBLP" ], "title": "Performance Analysis and Optimization of Sparse Matrix-Vector Multiplication on Modern Multi- and Many-Core Processors", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "8410828bc05c0e6109d866d3b1ccee075b127924": { "authors": [ { "ids": [ "10441993" ], "name": "Gauthier Voron" }, { "ids": [ "2848926" ], "name": "Ga\u00ebl Thomas" }, { "ids": [ "1679769" ], "name": "Vivien Qu\u00e9ma" }, { "ids": [ "1798389" ], "name": "Pierre Sens" } ], "doi": "10.1145/3064176.3064196", "doiUrl": "https://doi.org/10.1145/3064176.3064196", "entities": [ "Heuristic", "Hypervisor", "Non-uniform memory access", "Uniform memory access" ], "id": "8410828bc05c0e6109d866d3b1ccee075b127924", "inCitations": [ "459e2d2df50421fd7647d860f1a3b7fa88b417dc" ], "journalName": "", "journalPages": "453-467", "journalVolume": "", "outCitations": [ "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "0e7af537d6eacd9832c90f31a46ced1ac91b573e", "294ad206a120a519cfd99294c8b5e004dcc06abf", "045729ec838ecc50be166fe4511506ac4a08226d", "7b1a8647dd6482adee6b774d694e5c85b6d5a9cf", "6142d3f595563f37efa41b672abecd4e5074fde0", "81532badf6f1a7da5c1b495c2c91243ef7968a37", "86e5050f7da865ab224c3a9ae8946f29d6e8d40a", "0852a44c86db434e9b51c67704636791e9940487", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "303fab4117468e84d10f426ab3b3e6c92da1159e", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "371c5cc544d758eacd08ee2e9333b1b94f26689e", "a00c1beed819cc5be93bea423505ac1ce020d961", "2d5200c9aedc7069b843c6a9a0ba122abebeabe5", "146139716c9e8ec4f57475b9673171761ac34074", "136c75e41eb66c85aab922c7fdf62820d63b139f", "2129d9f040643983de7dffd1735849076bb2c1f1", "6a285b0a2243223ee6905692d79b4a8d39f5af5e", "7de2ed992aae322333c14e4ffad5b347f7a7016a", "2813b5741442ca4910e456576dbbd48bc2cb58e6", "3417d3ea8c58e3101f1d8a76bd7453c951588b1c", "ad439f70f5a0ed761b3a7bf169f38a790a342e4d", "9aa0d7253574e50fe3a190ccd924433f048997dd", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "3c1e0e9c5b774f8d1b7522e7b7ea90634b1e252a", "09cef59336519ce93d15841bc2756a79ce13477d", "1ca27025fea3aeede2831870718036977fce0ae8", "51e878ed0979919041030f871f6e34531ca39750", "af6e9a07ced1b59f82ea2fb15dd9f00b5a4aa24f", "3c3a05197123ab4b2ec3d402c2d9d2777c63c712", "7a978f2902460e732c50c36a171deb11733df1fc", "71a2d8c473f13d0c664f751db97e81128281b1eb", "09e7f72d628322eff987f1e0b44e66a19d546028", "ad2bd653e1b898db00e8a87f92b190049217125a", "04e954c5afc21447cf43ba1420c9905d359eefd9", "6cdc292cd1674a5348789e3e6ecee239ccc940c8", "117c8dca0918376176e7bc8c0432103ed8e9c34f", "0562bc5f82b40e2e9c0ae035aa2dd1da6107017c", "9872bf81d8559bfb5fcf4dc65674afba98dec470" ], "paperAbstract": "While virtualization only introduces a small overhead on machines with few cores, this is not the case on larger ones. Most of the overhead on the latter machines is caused by the Non-Uniform Memory Access (NUMA) architecture they are using. In order to reduce this overhead, this paper shows how NUMA placement heuristics can be implemented inside Xen. With an evaluation of 29 applications on a 48-core machine, we show that the NUMA placement heuristics can multiply the performance of 9 applications by more than 2.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064196", "https://portail.telecom-bretagne.eu/publi/public/fic_download.jsp?id=79020" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8410828bc05c0e6109d866d3b1ccee075b127924", "sources": [ "DBLP" ], "title": "An interface to implement NUMA policies in the Xen hypervisor", "venue": "EuroSys", "year": 2017 }, "84521e6d27c705ca3f97e20bbcc3e80a29779f78": { "authors": [ { "ids": [ "1779678" ], "name": "Calvin C. Newport" } ], "doi": "10.1109/IPDPS.2017.11", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.11", "entities": [ "Algorithm", "Leader election", "Mobile operating system", "Mobile phone", "Network topology", "Operating system", "Polylogarithmic function", "Push\u2013pull output", "Smartphone", "Time complexity" ], "id": "84521e6d27c705ca3f97e20bbcc3e80a29779f78", "inCitations": [ "777b13a1efbc4e6fbacdc1bc4c2cf4987880bfa9" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "172-181", "journalVolume": "", "outCitations": [ "718bba0f9b305c9bcbb332d1e12f87949d97cf95", "3045ab5550d9a5d1cd30f37a0547b956f570f14c", "13dff6a28d24e4fe443161fcb7d96b68a085a3d4", "4ef3133817267e0829bbff3e2024af93403dddc7", "52e3d54c7fe011413a4f1cb555e6374d10765a22", "3006fbb09724024d98a9c59b70bc03ac14ba3193", "a9ea13a34553e3c4fce7abf46dd5999f8bc73cd9", "10298ef457eb38f0775326b6f2d245f1b8121bb5", "5928dd51e1d7d940d528ffc0455cab8248c551bc", "0967bd75632d959541ee4afef35a5ef37c805cc7", "f0b657949ff1014e0a69b6c985ba9605b76792bd", "48adc076d7c2c5a74323f8dd61ffa32be706d982", "2ce42a99cf15fffc3babe6aa35e520deb37f212e", "6f8c546b574ff16a800d202d51900cc1e56e4e94", "19ae27ba71869cc4328fe428eddec223a5cb2a7d", "e70f108961a7b250d3a77ca4a16c8e65626e96e6", "bb92a3071b138f9e7c21e11e475ee3b3ab715da3", "cdfd5de78df6a2b97b05001de962c7112c736a51" ], "paperAbstract": "In this paper, we study the fundamental problem of leader election in the mobile telephone model: a recently introduced variation of the classical telephone model modified to better describe the local peer-to-peercommunication services implemented in many popular smartphone operating systems. In more detail, the mobile telephone model differs from the classical telephone model in three ways: (1) each devicecan participate in at most one connection per round; (2) the network topology can undergo a parameterizedrate of change; and (3) devices can advertise a parameterized number of bits to their neighbors in each round before connection attempts are initiated. We begin by describing and analyzing a new leader election algorithm in this model that works under the harshest possible parameter assumptions: maximum rate of topology changes and no advertising bits. We then apply this result to resolve an open question from [Ghaffari, 2016] on the efficiency of PUSH-PULL rumor spreading under these conditions. We then turn our attention to the slightly easier case where devices can advertise a single bit in each round. We demonstrate a large gap in time complexity between these zero bit and one bit cases. In more detail, we describe and analyze a new algorithm that solves leader election with a time complexitythat includes the parameter bounding topology changes. For all values of this parameter, this algorithm is faster than the previous result, with a gap that grows quickly as the parameter increases (indicating lower rates of change). We conclude by describing and analyzing a modified version of this algorithmthat does not require the assumptionthat all devices start during the same round. This new version has a similar time complexity (the rounds required differ only by a polylogarithmic factor),but now requires slightly larger advertisement tags.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.11", "http://people.cs.georgetown.edu/~cnewport/pubs/le-IPDPS2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84521e6d27c705ca3f97e20bbcc3e80a29779f78", "sources": [ "DBLP" ], "title": "Leader Election in a Smartphone Peer-to-Peer Network", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8476714438669d5703690d4bbee9bfe751f61144": { "authors": [ { "ids": [ "2644814" ], "name": "Akhil Arora" }, { "ids": [ "2663974" ], "name": "Sainyam Galhotra" }, { "ids": [ "1699732" ], "name": "Sayan Ranu" } ], "doi": "10.1145/3035918.3035924", "doiUrl": "https://doi.org/10.1145/3035918.3035924", "entities": [ "Benchmark (computing)", "Common Platform", "Computer science", "Expectation\u2013maximization algorithm", "Instant messaging", "Social network" ], "id": "8476714438669d5703690d4bbee9bfe751f61144", "inCitations": [ "4788acab3002f6943e213e6a519dc9e0b6704641", "b4d849c28c91c3b664e996020b6f8517f78d8266", "c51924c480547768355ed8ca814f9200a36cbab4", "97ed889c7b5bc5786001863b4763c64e62f5ddfa", "560fa3d7c66d8d02a88d9f447a3e051ee2bb654f", "1f0d3c04153e41ea2865437fcd9c05875aa4a7e0", "a31964b8281008ad2611845654462240e9688a89", "42b0fb834260a07be0a358ecaabd822a148a5f3c" ], "journalName": "", "journalPages": "651-666", "journalVolume": "", "outCitations": [ "1da59fb230f7fb495befb9d04e4338fd55757cd3", "6eb45da9c8582c2e426a4896ff399987b73558b3", "637aa0c0214cb6783e4ae4d076eb5fadcfcd82e7", "36c0b81a2ef2505e5c1c763c1abc25cdd72903f2", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "1b82d160a3b69345f88879ca59454e1cd230c848", "eb82d3035849cd23578096462ba419b53198a556", "4bb0f607c1f6be38ca720ad6913577a778cc2f15", "048a42699d9991ec18b34bdb484ef244830e1d71", "706c83309fa09454a136d4e607364b27be66172c", "28bf0df09f97e7ef9108e71b45fe1b9a7aa201e2", "23d85a0008429845870780c6db3640c05165acaf", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "7febcbdf1b86ff38d19d65927f83bab185f44e1d", "72465eb427490619a5a625a45dda81c92a8cfe14", "3d84a02dfe4fc904aa729b8b7ca51fd8c97a9dc7", "b9e43395663f74c581982e9ca97a0d7057a0008c", "184cbf767d2f5501ddab3babe7be8ba93321e14c", "21968ae000669eb4cf03718a0d97e23a6bf75926", "4b983a1a0b370a4ea87c2be4f2c7c53b13edb4f6", "abb152802d5b4686a394e221abe951187ea06158", "2d49ad12e22313a82e7f14dd41efe20ecd5daf43", "18d215f2c5a4b372a7d2d71cc115a1e808bfa437", "d7f9c3253552e13f24c3b73bc055ef60388af57c", "a6d73877be2b91e8b6c9c0896e58942c93086ff8", "8cdafae1951cc054421361a444cf6401038f473e" ], "paperAbstract": "Influence maximization (IM) on social networks is one of the most active areas of research in computer science. While various IM techniques proposed over the last decade have definitely enriched the field, unfortunately, experimental reports on existing techniques fall short in validity and integrity since many comparisons are not based on a common platform or merely discussed in theory. In this paper, we perform an in-depth benchmarking study of IM techniques on social networks. Specifically, we design a benchmarking platform, which enables us to evaluate and compare the existing techniques systematically and thoroughly under identical experimental conditions. Our benchmarking results analyze and diagnose the inherent deficiencies of the existing approaches and surface the open challenges in IM even after a decade of research. More fundamentally, we unearth and debunk a series of myths and establish that there is no single state-of-the-art technique in IM. At best, a technique is the state of the art in only one aspect.", "pdfUrls": [ "http://people.cs.umass.edu/~sainyam/papers/SIGMOD17_im_benchmarking.pdf", "http://doi.acm.org/10.1145/3035918.3035924" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8476714438669d5703690d4bbee9bfe751f61144", "sources": [ "DBLP" ], "title": "Debunking the Myths of Influence Maximization: An In-Depth Benchmarking Study", "venue": "SIGMOD Conference", "year": 2017 }, "84851b61293a4199c3f9164e21103b417aee49f7": { "authors": [ { "ids": [ "2567514" ], "name": "Youngsok Kim" }, { "ids": [ "2865235" ], "name": "Jae-Eon Jo" }, { "ids": [ "2885986" ], "name": "Hanhwi Jang" }, { "ids": [ "1998820" ], "name": "Minsoo Rhu" }, { "ids": [ "2363315" ], "name": "Hanjun Kim" }, { "ids": [ "2287483" ], "name": "Jangwoo Kim" } ], "doi": "10.1145/3123939.3123968", "doiUrl": "https://doi.org/10.1145/3123939.3123968", "entities": [ "Computation", "GeForce", "Graphics", "Graphics pipeline", "Graphics processing unit", "NVLink", "Network traffic control", "Pure Data", "Runahead", "Scalability", "Simulation", "Speedup" ], "id": "84851b61293a4199c3f9164e21103b417aee49f7", "inCitations": [], "journalName": "", "journalPages": "574-586", "journalVolume": "", "outCitations": [ "71f5f14e56f53385763f2fd00228f6b449b2cb23", "015d4dd0e334146b05baf201696d0ea205f8c04c", "b55a35d009ae07832ad9cc91704172c3db45ee17", "40f357eb1e7010f841b986219df7ad40afdaee99", "46690cdff60ef7f35c2c19d6eaac89964a6b4f79", "630eb0c8cf211e95afc1696a2c627abe9e779bb3", "343ead469fff618b6f8c4adba7866caec8caca81", "5b87d90e7fe1f7521d42acdc39dd01922680ae2e", "2362d702b64b2f6a549155fe34a542524693d938", "0b8409b9d5b95bc72bf73960cdb0a3ba12d97f63", "598f974b963f5236fc92468470d1113f7cc71279", "b52e5a9308c8c8ed150265797d077da092d1c131", "84175abc1b5a4ef36b92ffd27a068e867d13a3f2", "2d3cb488c6f2491a187a2fd1cb0e51eae9c4fe23", "2af661c676ac486dbab88522161f4d3a57ff1561", "51c050e77be367fd4902b08eabe9ef4a288ad01f", "1449f296ecd8748c51b7819ba86482a33d68296d", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "dc24c4c43b34ba93f4b32982d57532f9f892a03a", "368d487828b0d8ec17c26fdcac82ce47d6a6d6e7", "1c1805ea457142e0e5ad547f04ae04d5ac671ffc", "265800ef1842840d3042ed88121898c681e206a2", "05e01e3cae5a7395e7a8c406ccdbff27bce0d646", "a17a159f9852ddae3f8941b8d1eed6045c71a8c0", "054e4a6966d54eb9fd207cf0484214201f46424a", "0ed9efdbb5925d9c312b58328db2d21140e69508", "03826b283660a4028bd5576feeed0c256225e722", "0862fd4d2274d28fead7dbba2c9dec1acdfeb1e8", "65e2820a4be57c9abdd8e687a2a2241a184d79bf", "e801b7f2a5ba972a90679968ad9e908109c4fac7", "0c5730af8b994a6a4b273615e0391a09507c3db2", "90d12d4979c0b432ae52ec76500648b1dc205ab2", "34bb12a64b69699eadbbc2af48552d1d80f5b6cb" ], "paperAbstract": "Graphics Processing Unit (GPU) vendors have been scaling single-GPU architectures to satisfy the ever-increasing user demands for faster graphics processing. However, as it gets extremely difficult to further scale single-GPU architectures, the vendors are aiming to achieve the scaled performance by simultaneously using multiple GPUs connected with newly developed, fast inter-GPU networks (e.g., NVIDIA NVLink, AMD XDMA). With fast inter-GPU networks, it is now promising to employ split frame rendering (SFR) which improves both frame rate and single-frame latency by assigning disjoint regions of a frame to different GPUs. Unfortunately, the scalability of current SFR implementations is seriously limited as they suffer from a large amount of redundant computation among GPUs.\n This paper proposes GPUpd, a novel multi-GPU architecture for fast and scalable SFR. With small hardware extensions, GPUpd introduces a new graphics pipeline stage called Cooperative Projection & Distribution (C-PD) where all GPUs cooperatively project 3D objects to 2D screen and efficiently redistribute the objects to their corresponding GPUs. C-PD not only eliminates the redundant computation among GPUs, but also incurs minimal inter-GPU network traffic by transferring object IDs instead of mid-pipeline outcomes between GPUs. To further reduce the redistribution overheads, GPUpd minimizes inter-GPU synchronizations by implementing batching and runahead-execution of draw commands. Our detailed cycle-level simulations with 8 real-world game traces show that GPUpd achieves a geomean speedup of 4.98X in single-frame latency with 16 GPUs, whereas the current SFR implementations achieve only 3.07X geomean speedup which saturates on 4 or more GPUs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123968" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84851b61293a4199c3f9164e21103b417aee49f7", "sources": [ "DBLP" ], "title": "GPUpd: a fast and scalable multi-GPU architecture using cooperative projection and distribution", "venue": "MICRO", "year": 2017 }, "84a3c5d479cb1fc43fd839492ad33dcc997b7e00": { "authors": [ { "ids": [ "2919642" ], "name": "Maciej Besta" }, { "ids": [ "18356904" ], "name": "Michal Podstawski" }, { "ids": [ "10673368" ], "name": "Linus Groner" }, { "ids": [ "2880213" ], "name": "Edgar Solomonik" }, { "ids": [ "1713648" ], "name": "Torsten Hoefler" } ], "doi": "10.1145/3078597.3078616", "doiUrl": "https://doi.org/10.1145/3078597.3078616", "entities": [ "Algorithm", "Backup", "Distributed memory", "Graph (abstract data type)", "Library (computing)", "Lock (computer science)", "Programming complexity", "Rate of convergence", "Shared memory" ], "id": "84a3c5d479cb1fc43fd839492ad33dcc997b7e00", "inCitations": [ "f9a8b38b7f208f9a03ca04022227b1c2cb6b735c", "12d3a826eaf6a53bcd299dde7e3de700d387ce17", "1f0572f47be66c2c0fbf3fd0f98f25e5b5f88361" ], "journalName": "", "journalPages": "93-104", "journalVolume": "", "outCitations": [ "1156f60e40548096df49528b1342bb3e88b0f378", "7f1d7ef36f5664da349a3e7fc3276e30f6f871fd", "3231d62bec8e8cc1d837e85893889855767c3b13", "b9ef5daaa31ccee9ba239a3a60c6b7c552aec5dc", "62e316005b84675a4229c13813f50be2cfd6a9a9", "b997070b0cc16bdcd62e982834b9899d3b8ea921", "947c6bf534ccd620044f77c3bd6068f633b421fb", "1ef8c8c815b7268d7f7d4fe76af78aaa8df3e6da", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "0ad8e89091eed09217e66adc98136126addc2619", "0558c94a094158ecd64f0d5014d3d9668054fb97", "a3b9dcc5d35f622f279f5fdeaac8d6d4b38a1b0c", "141e35263ab810983c90d47ad62eb4fab5e51717", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "1d2744b83519657f5f2610698a8ddd177ced4f5c", "adafc767bbcb5c196bc7a3e6f252aa67489375c0", "0c88eb0d571a91d5c6675b4f30330cf8fb575382", "9207a7356d90343b5107e3e445fa7de86f3078eb", "98da568adc7858630e26d0563304a495f90608d0", "746085d18a41041bdc13a206aae72a191c2d7d05", "02605b0527820be0cade7e507048ff3fccd7857e", "31699c35f42e4d9d108b4c595f9cea9655f5022e", "67ca97141193b812015cfd8047f89a0a5f76d0e1", "70279bddb1fb7f5a032a7856640f8dbc7b08edf9", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "d7f449c199ce86d3b8039899caabb31b54ced7f2", "7bcc53f1baf3358517a602d856192faea9442c91", "27d3c3dfb69d734eb762344d446ff622205da001", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "780a55bf4daf8af8e0211d429f245ecf6393e92f", "3486aeaf540c48952120fe853d672af984f40a6a", "5f8991828def57d2f0cda942566afff56740d150", "08937c92f31895e16af48de1c7d18eeceef11f6f", "2984638090457cf02d82715d9834314448efa878", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "3c4194f25bda9d2ebdea8d91e8d7c13a5f8b485a", "17ad1361dfabc1c50b506813d0f5d54df159fc36", "254b99cac6c010d46f9651d46674a7f69c82f3b1", "74e6b114822b712c100c7ffd1b01f4fb1564bd28", "3dff11679346f5344af1018cad57fa14cc349f2f", "e4a02cc0e9e158b22b6b0d69372285b2e4d19c69", "0d0e3c418fa2dc31a021a6caa73c9efce5aa8b7b", "2a2eb0e00483288a8b3d2b561dd98e013c5c0275", "ba240fac7a9af2e5e0bf8016d7e7fd039c5207f0", "0ad664799302bcc4d6304c740fb226027ad4c250", "a5aad5abb32f6b15f31b92312bb3b0f7b6470977" ], "paperAbstract": "We reduce the cost of communication and synchronization in graph processing by analyzing the fastest way to process graphs: pushing the updates to a shared state or pulling the updates to a private state. We investigate the applicability of this push-pull dichotomy to various algorithms and its impact on complexity, performance, and the amount of used locks, atomics, and reads/writes. We consider 11 graph algorithms, 3 programming models, 2 graph abstractions, and various families of graphs. The conducted analysis illustrates surprising differences between push and pull variants of different algorithms in performance, speed of convergence, and code complexity; the insights are backed up by performance data from hardware counters. We use these findings to illustrate which variant is faster for each algorithm and to develop generic strategies that enable even higher speedups. Our insights can be used to accelerate graph processing engines or libraries on both massively-parallel shared-memory machines as well as distributed-memory systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078616", "https://htor.inf.ethz.ch/publications/img/pushpull.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84a3c5d479cb1fc43fd839492ad33dcc997b7e00", "sources": [ "DBLP" ], "title": "To Push or To Pull: On Reducing Communication and Synchronization in Graph Computations", "venue": "HPDC", "year": 2017 }, "84aa5f46d4c5de90e9819840299cd9ef8555f923": { "authors": [ { "ids": [ "1885300" ], "name": "Jingpei Yang" }, { "ids": [ "3464209" ], "name": "Rajinikanth Pandurangan" }, { "ids": [ "36885897" ], "name": "Changho Choi" }, { "ids": [ "34866701" ], "name": "Vijay Balakrishnan" } ], "doi": "10.1145/3078468.3078469", "doiUrl": "https://doi.org/10.1145/3078468.3078469", "entities": [ "Algorithm", "Device driver", "Digital rights management", "Garbage collection (computer science)", "Linux", "Overhead (computing)", "Overhead projector", "Performance Evaluation", "Run time (program lifecycle phase)", "Solid-state drive", "Stream processing", "Streaming media", "Time-sharing", "z/VM" ], "id": "84aa5f46d4c5de90e9819840299cd9ef8555f923", "inCitations": [ "0bf4140e5a1d1d2669e8f30877d180d824475dd1", "8f849c0051edc612327e1121ccfa70a4ec0bacea", "1d08d231ec66645ec56d2210c1a7c6b44c6ff041" ], "journalName": "", "journalPages": "3:1-3:11", "journalVolume": "", "outCitations": [ "f9a345390e3fd4802a19efc49cfcb64dd0f023a5", "957ae212c16ea9a70a53d1143e0f8a908a496648", "1da48d8173e34eb7825870248c4c12b6bbe7d9c1", "3589a4305a32801c5185b27f4de210f0eff80341", "3a8dd763354946d6cf044a0be711052178233ffb", "424a0f460b4f261b386787bdec37a2b01347a930", "73e85836599b5ab4f83afa2ae10fea99cb5d29d7", "60a488e29b5b64c44f6ce124bce7ced9602636d4", "91912a461d30035639ddda2b6de97a388823fb4b", "0903d6b3b5a26fea2cb7b4956f66365d71c78549", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "72722e7602138e3896e5576d3f3ef730e7b7c4b4", "33fc2e3e200b5aacfa666215357f3d8c6d821dc8", "05eee6c2859966911a8053e6fa27fccb8648b594" ], "paperAbstract": "Multi-stream SSDs can isolate data with different life time to disparate erase blocks, thus reduce garbage collection overhead and improve overall SSD performance. Applications are responsible for management of these device-level steams such as stream open/close and data-to-stream mapping. This requires application changes, and the engineer deploying the solution needs to be able to individually identify the streams in their workload. Furthermore, when multiple applications are involved, such as in VM or containerized environments, stream management becomes more complex due to the limited number of streams a device can support, for example, allocating streams to applications or sharing streams across applications will cause additional overhead.\n To address these issues and reduce the overhead of stream management, this paper proposes automatic stream management algorithms that operate under the application layer. Our stream assignment techniques, called AutoStream, is based on run time workload detection and independent of the application(s). We implement our AutoStream prototype in NVMe Linux device driver and our performance evaluation shows up to 60% reduction on WAF(Write Amplification Factor) and up to 237% improvement on performance compared to a conventional SSD device.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078469" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84aa5f46d4c5de90e9819840299cd9ef8555f923", "sources": [ "DBLP" ], "title": "AutoStream: automatic stream management for multi-streamed SSDs", "venue": "SYSTOR", "year": 2017 }, "84b7a2fb52a618f0129ac324406fa5ab121cb168": { "authors": [ { "ids": [ "12353823" ], "name": "Matthew Parkinson" }, { "ids": [ "1757457" ], "name": "Dimitrios Vytiniotis" }, { "ids": [ "1796965" ], "name": "Kapil Vaswani" }, { "ids": [ "38731436" ], "name": "Manuel Costa" }, { "ids": [ "2874604" ], "name": "Pantazis Deligiannis" }, { "ids": [ "29680958" ], "name": "Dylan McDermott" }, { "ids": [ "40373059" ], "name": "Aaron Blankstein" }, { "ids": [ "2481591" ], "name": "Jonathan Balkind" } ], "doi": "10.1145/3141879", "doiUrl": "https://doi.org/10.1145/3141879", "entities": [ "Crash (computing)", "Garbage collection (computer science)", "Heapsort", "Manual memory management", "Memory management", "Memory safety", "Non-blocking algorithm", "Pointer (computer programming)", "Programmer", "Programming model", "Programming productivity", "Thread (computing)", "Vulnerability (computing)" ], "id": "84b7a2fb52a618f0129ac324406fa5ab121cb168", "inCitations": [], "journalName": "PACMPL", "journalPages": "95:1-95:25", "journalVolume": "1", "outCitations": [ "8af43ca7e7d2748f1eaffa854a4bb8a5ed75c178", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "22a3110123362412f91ae44c2b15e2234324f6fd", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "2815e28b533f8770a3ef84618e189c325c2ece61", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "4256339f61d809e5092b68a505f7d37099cbd341", "14d3104c58ad60e02c3ab9d9433093fe5f21d00c", "3b29957c1624402e2a6fc5754f7c4ae9a0692fe5", "1591e9cf9c5d5fa42e7b5e48bd76f43a0a6e8f0b", "dbce3d345f3c43c51d8cc71c17d073e716a4d07c", "942f2a6df29234c304b69129872835d60cf5e9e9", "43fb7b102ea54ce51b6fcd42005698ae1399e25e", "1ba0e9c31bca601cfb31152bf8520042641cf639", "6973083bca583e26a0d8e7709ce7b9888cf3ee69", "6434aa10f3745dcf959cfca9c379aae120396724", "28c3b2e9cd7bead2f908871f3f5f6f9a5d914c27", "4e0ee850f7e8323fbb0fbb3591c671926cf22f4d", "042f443418ff2ff98a1dccbf49df9fa258dab707", "d6a9a192352c78215127c09edf18e7a329e50942" ], "paperAbstract": "Garbage collection greatly improves programmer productivity and ensures memory safety. Manual memory management on the other hand often delivers better performance but is typically unsafe and can lead to system crashes or security vulnerabilities. We propose integrating safe manual memory management with garbage collection in the .NET runtime to get the best of both worlds. In our design, programmers can choose between allocating objects in the garbage collected heap or the manual heap. All existing applications run unmodified, and without any performance degradation, using the garbage collected heap. \n Our programming model for manual memory management is flexible: although objects in the manual heap can have a single owning pointer, we allow deallocation at any program point and concurrent sharing of these objects amongst all the threads in the program. Experimental results from our .NET CoreCLR implementation on real-world applications show substantial performance gains especially in multithreaded scenarios: up to 3x savings in peak working sets and 2x improvements in runtime.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/07/snowflake-extended.pdf", "http://doi.acm.org/10.1145/3141879" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84b7a2fb52a618f0129ac324406fa5ab121cb168", "sources": [ "DBLP" ], "title": "Project snowflake: non-blocking safe manual memory management in .NET", "venue": "PACMPL", "year": 2017 }, "84f7ef8aa654ccb46244dde02c2bb705d6abb484": { "authors": [ { "ids": [ "3058378" ], "name": "Dingwen Tao" }, { "ids": [ "1699598" ], "name": "Sheng Di" }, { "ids": [ "1756221" ], "name": "Zizhong Chen" }, { "ids": [ "1721552" ], "name": "Franck Cappello" } ], "doi": "10.1109/IPDPS.2017.115", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.115", "entities": [ "Algorithm", "Computer data storage", "Data compression", "Data point", "Encoder", "Lossy compression", "Mean squared error", "NetBSD Gzip / FreeBSD Gzip", "Peak signal-to-noise ratio", "Variable-length code" ], "id": "84f7ef8aa654ccb46244dde02c2bb705d6abb484", "inCitations": [ "2f84f0296b1151bd7062431d34673babbe97b1af", "038aad8409d8d00d82cc96da6b571de7a93a7f4a", "042381199e946fa918bb8354210c68c477fe7bf0", "d6f159da21bc00dc57bb6994ac3e70f0df698413", "ffa491eb990b98a2bbf444b85229dc83a62647c4", "953a64af385e2da82f0af8661c42c0224666b45a", "20554f3028a9641e0796506b1501cb57681ef1e0", "8ce594615e0f448e9b6b498b56b9e48924bca238" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1129-1139", "journalVolume": "", "outCitations": [ "4d3b26551b351ce690d3ce6d323a9e98c8d039ef", "4d0b12bef1f99d117f43d46de35682b320a4b9fe", "7777d299e7b4217fc4b80234994b5a68b3031199", "6e5ad4686eb73092b7a2d532506fa61b5a796d29", "09b71bc8d83e2583319b5bd42838e6c4ffa0bd70", "39d2b916719a48d262690f57aab91c56aa2ab72c", "44607270754f8521d6c4d42297aa881393f4f8e0", "9c0b153ea0b741107301c2a13dc0e0f2f92c863c", "f3ee81bed49c66cea802f23bfdac4ba23418a305", "81ad3cc01deab05be0bf9add2d08e043f28cd55c", "0484496ff92137f89796b87766e19c6378ee3e76", "093fc19d440f33247e545ec6c047e0aa0afb0863", "49c8f4db70b9446da52a9250db26e53e88fc1605" ], "paperAbstract": "Today's HPC applications are producing extremely large amounts of data, such that data storage and analysis are becoming more challenging for scientific research. In this work, we design a new error-controlled lossy compression algorithm for large-scale scientific data. Our key contribution is significantly improving the prediction hitting rate (or prediction accuracy) for each data point based on its nearby data values along multiple dimensions. We derive a series of multilayer prediction formulas and their unified formula in the context of data compression. One serious challenge is that the data prediction has to be performed based on the preceding decompressed values during the compression in order to guarantee the error bounds, which may degrade the prediction accuracy in turn. We explore the best layer for the prediction by considering the impact of compression errors on the prediction accuracy. Moreover, we propose an adaptive error-controlled quantization encoder, which can further improve the prediction hitting rate considerably. The data size can be reduced significantly after performing the variable-length encoding because of the uneven distribution produced by our quantization encoder. We evaluate the new compressor on production scientific data sets and compare it with many other state-of-the-art compressors: GZIP, FPZIP, ZFP, SZ-1.1, and ISABELA. Experiments show that our compressor is the best in class, especially with regard to compression factors (or bit-rates) and compression errors (including RMSE, NRMSE, and PSNR). Our solution is better than the second-best solution by more than a 2x increase in the compression factor and 3.8x reduction in the normalized root mean squared error on average, with reasonable error bounds and user-desired bit-rates.", "pdfUrls": [ "http://arxiv.org/abs/1706.03791", "https://arxiv.org/pdf/1706.03791v1.pdf", "https://doi.org/10.1109/IPDPS.2017.115" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/84f7ef8aa654ccb46244dde02c2bb705d6abb484", "sources": [ "DBLP" ], "title": "Significantly Improving Lossy Compression for Scientific Data Sets Based on Multidimensional Prediction and Error-Controlled Quantization", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "85086f14c5047dfa726f5e2c4adff609e0daf357": { "authors": [ { "ids": [ "2726254" ], "name": "Chunghan Lee" }, { "ids": [ "3099840" ], "name": "Tatsuo Kumano" }, { "ids": [ "3167733" ], "name": "Tatsuma Matsuki" }, { "ids": [ "2365691" ], "name": "Hiroshi Endo" }, { "ids": [ "3358242" ], "name": "Naoto Fukumoto" }, { "ids": [ "2689677" ], "name": "Mariko Sugawara" } ], "doi": "10.1145/3078468.3078479", "doiUrl": "https://doi.org/10.1145/3078468.3078479", "entities": [ "Desktop metaphor", "Desktop virtualization", "Digital footprint", "Fibre Channel", "Kibibyte", "Next-generation network", "OpenVMS", "Response time (technology)", "Virtual desktop", "Virtual machine" ], "id": "85086f14c5047dfa726f5e2c4adff609e0daf357", "inCitations": [], "journalName": "", "journalPages": "13:1-13:11", "journalVolume": "", "outCitations": [ "4e8839416133588c10cc56d6325db55a42fe2215", "cd4e4ce1be12594f27b86645a01395eb965bf405", "002efcf9f0b58af153556b84395a37f6171195da", "4a6b319bdd6a58132280e3a3d295289a78a7547b", "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "92e536c1789bf301f456b01590006c9a3eff6cd8", "282c6a3b573051e3e799d73cfc623ccbd68bcd6a", "39c61999f2d164fa5bcecd16968a337ccb02ce2c", "0c60a639dc9cd8014f685ec986c29bf55a10bb5a", "02777215df9cf14ef59580e943c7e93226452ded", "7a2274412948765bf872b765dafd8139e51000ff", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "75e74a0f013e9028c69df3addc0d161ef35d0c51", "7a8fbd4ca84a7b79e44521ef6d32302b6658c2f5", "e00b6d9ac717fc6539dffbb2032b7237c1e78a1e", "4939aabe46ad048ef5df1409c5c02d04def1d0c8", "0f0caece2fed6fc689b01f6d4521dca9f44c5dde", "375b0a1d23cdcbed617338d46f0aec89a1b02cef", "057d21830cde5b3be2fdb3a74ee69a3c7e9109f8" ], "paperAbstract": "Despite the growing popularity of enterprise virtual desktop infrastructure (VDI), little is known about its storage traffic characteristics. In addition, no prior work has considered the detailed characteristics of virtual machine (VM) behavior on VDI. In this paper, we analyze the enterprise storage traffic on commercial office VDI using designated VMs. For 28 consecutive days, we gathered various types of traces, including a usage questionnaire and active and passive measurements. To characterize the storage traffic, we focused on two perspectives: fibre channel (FC) traffic and VM behavior. From the FC traffic perspective, we found that read traffic is dominant, although the applications are similar to those in a previous small-scale VDI. In particular, the write response time of large transactions, e.g.,128 KiB, is strongly affected by a slight decrease in cache hits during an update storm. From the VM behavior, we found that all active user VMs generate only 25% of traffic. Although a few VMs generate massive traffic, their impact is small. These characteristics are unique in comparison with the small-scale VDI. Our results have significant implications for designing the next generation of VDI and improving its performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078479" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/85086f14c5047dfa726f5e2c4adff609e0daf357", "sources": [ "DBLP" ], "title": "Understanding storage traffic characteristics on enterprise virtual desktop infrastructure", "venue": "SYSTOR", "year": 2017 }, "851ea615172c87e9c9b0aa548529183809c16760": { "authors": [ { "ids": [ "22172105" ], "name": "Bora Edizel" }, { "ids": [ "3105979" ], "name": "Amin Mantrach" }, { "ids": [ "3042223" ], "name": "Xiao Bai" } ], "doi": "10.1145/3077136.3080811", "doiUrl": "https://doi.org/10.1145/3077136.3080811", "entities": [ "Artificial neural network", "Baseline (configuration management)", "Convolutional neural network", "Experiment", "Machine learning", "Online advertising", "Production system (computer science)", "Search engine marketing", "Web search engine", "Word2vec" ], "id": "851ea615172c87e9c9b0aa548529183809c16760", "inCitations": [ "da23721162b66e0a12e8823c8144f9394004a94a" ], "journalName": "", "journalPages": "305-314", "journalVolume": "", "outCitations": [ "0aedae19e94d76520cb92932b6506389799051e2", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "3bd42cfb7e633320bbeec7f6d361e92abec60b07", "950f1a292449254520584601fe5fe88721da6952", "c61f196f4d1aa3043a5bcc7dffe018a193f3d934", "5c53c7101b530eae80417beeba16684d789056f2", "6938196e63ff09c25d1e1366aaec7135a6720216", "81bb8626560fdd0ae35a8aab1f115022a38499d3", "caecd3cd42ac98ff5bba0438e16fe1d6afbbf23e", "033eb044ef6a865a53878397633876827b7a8f20", "83174a52f38c80427e237446ccda79e2a9170742", "239bf6d2204c54718aaa3748e73c8e3109ea75ac", "2e10643c3759f97b673ff8c297778c0b6c20032b", "3d18a13c3ccfdcc87797cc4e033e32dcc6578c9b", "07f3f736d90125cb2b04e7408782af411c67dd5a", "5b9534442f91a87022427b74bca9fd95dd045383", "3ba179bceb9692d4d21109d0b87b120195761148", "f70391f9a6aeec75bc52b9fe38588b9d0b4d40c6", "462aeffc0d9490463b14c864cef9c77dea09e4b6", "425c90a1aae5be2d335f4791d9936ebf67e147de", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "272216c1f097706721096669d85b2843c23fa77d", "23ba229478df2f6cba5279d4bb4d746c37c47c56", "2d3844224e93787c190f65458a775f77088947b5", "22562a15409361f776e4c82d98a8fc51317c50fd", "162d958ff885f1462aeda91cd72582323fd6a1f4", "1ff88585ce3fd5fdaab6573722d4874641359951", "a098579783a15701c9b187a8ddb5e31746bd1f79", "ef74c2c33dbe0e63e8668138fb2167ab9417de2f", "13e650e1297dd7c037bb7b81743aa78aa5aea9b2", "505253630ab7e8f35e26e27904bd3c8faea3c5ce", "165dd9fda3e6ab9976b45f01425acfbee4fea995" ], "paperAbstract": "Predicting the click-through rate of an advertisement is a critical component of online advertising platforms. In sponsored search, the click-through rate estimates the probability that a displayed advertisement is clicked by a user after she submits a query to the search engine. Commercial search engines typically rely on machine learning models trained with a large number of features to make such predictions. This inevitably requires a lot of engineering efforts to define, compute, and select the appropriate features. In this paper, we propose two novel approaches (one working at character level and the other working at word level) that use deep convolutional neural networks to predict the click-through rate of a query-advertisement pair. Specifically, the proposed architectures only consider the textual content appearing in a query-advertisement pair as input, and produce as output a click-through rate prediction. By comparing the character-level model with the word-level model, we show that language representation can be learnt from scratch at character level when trained on enough data. Through extensive experiments using billions of query-advertisement pairs of a popular commercial search engine, we demonstrate that both approaches significantly outperform a baseline model built on well-selected text features and a state-of-the-art word2vec-based approach. Finally, by combining the predictions of the deep models introduced in this study with the prediction of the model in production of the same commercial search engine, we significantly improve the accuracy and the calibration of the click-through rate prediction of the production system.", "pdfUrls": [ "https://arxiv.org/pdf/1707.02158v1.pdf", "http://doi.acm.org/10.1145/3077136.3080811", "http://arxiv.org/abs/1707.02158" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/851ea615172c87e9c9b0aa548529183809c16760", "sources": [ "DBLP" ], "title": "Deep Character-Level Click-Through Rate Prediction for Sponsored Search", "venue": "SIGIR", "year": 2017 }, "85389c54465b7785df01469d618189d8de993623": { "authors": [ { "ids": [ "3129259" ], "name": "Evangelos Pournaras" }, { "ids": [ "2392298" ], "name": "Jovan Nikolic" } ], "doi": "10.1109/ICAC.2017.30", "doiUrl": "https://doi.org/10.1109/ICAC.2017.30", "entities": [ "Agent-based model", "Aggregate function", "Agile software development", "Autonomic computing", "Big data", "Computation", "Computational resource", "Failure rate", "Redundancy (engineering)", "Reverse engineering" ], "id": "85389c54465b7785df01469d618189d8de993623", "inCitations": [ "2c9bfc8a995ade40bc7ba48a9bccc699fd475130" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "11-20", "journalVolume": "", "outCitations": [ "f0bf995f22957c08043ac4c06b5cb29155e3fcd4", "2e348661406eeb9f2d415d1f6df0fed26bcb271e", "7d78f8edef81e41b9ed88e606dacab39fd371785", "8f825a8f0c264f9d046e2a958b150600fb2e82d3", "6104d27ba0e434b64284ff265b5106cccd70236c", "2e05a6809adce315ec26546fb598f76a51d739b4", "79f84dd84fa396378ba597b63b13f11fb8c5d3c4", "af5ffe9752771e299a8b6c57dd7b8be3226f6644", "18b31639918833b26bb4adad361cfe09d33fe6ba", "363e4c1fc27f9b3744e333961cbac01cfb35b987", "5575fb3219a049c5b69593eebcd2d1169c46cfde", "24019386432f4b60842b86a4892d5d424e4c5e2e", "2e956342f9fc7164145bb625f246fd56c20605de", "339888b357e780c6e80fc135ec48a14c3b524f7d", "d160c1fd020e3f96ca703883dc168f640cf04b45", "442686f068097d4cd27680a0ee08272aea19c922", "004d00c5eb23d6bf6bc8e206172dfb47a8e4a828", "1bd38d5308732cd7d7565a3b56ea892eb6a6df37", "b2229379386b3a2835c4a1459ee5356b732163db", "6ff6dda73e27496843d8940fbbcc934283a027cf", "309ea2926d0e87919b928635685e0828a7425e9e", "2a6317ff42de9e59ce3c08611ca6e5c537ba9e71", "64533dbddc95edf3dacf5de7a115bc41f858ecc4", "3ffd2ad9ac1bf6d0595fe1a121a0b0c71560b0c4", "b76aaec8c8b60689ddc7aa8f381b7bd9cd34a94f", "56d3d2304a5651f0038523ad3624735627c9ea4d", "b15323ffde8b2d16d156c876590f74bca879b5bf", "04b871408dd461467455e68e53307a1c4c45084c", "0d5d097d6a117d4a2dad1488c9062921b62787fb", "1e918182da45a4ca5a4e7b8e0ca24800a231c471", "7f340838f7797db17c08477db5eb25267f4984da", "7ede5ecf9027b5e2e39bd59eafafa324e1eca2eb", "1840276632de25372960e8d7207957ea4250006a", "23679522a504aed5d1ace3917d723b1aabec8c71", "0ecb8fd980c141f469b97d58b852f8ac0f2eae77" ], "paperAbstract": "The feasibility of large-scale decentralized networks for local computations, as an alternative to big data systems that are often privacy-intrusive, expensive and serve exclusively corporate interests, is usually questioned by network dynamics such as node leaves, failures and rejoins in the network. This is especially the case when decentralized computations performed in a network, such as the estimation of aggregation functions, e.g. summation, are linked to the actual nodes connected in the network, for instance, counting the sum using input values from only connected nodes. Reverse computations are required to maintain a high aggregation accuracy when nodes leave or fail. This paper introduces an autonomic agent-based model for highly dynamic self-corrective networks using decentralized reverse computations. The model is generic and equips the nodes with the capability to disseminate connectivity status updates in the network. Highly resilient agents to the dynamic network migrate to remote nodes and orchestrate reverse computations for each node leave or failure. In contrast to related work, no other computational resources or redundancy are introduced. The self-corrective model is experimentally evaluated using real-world data from a smart grid pilot project under highly dynamic network adjustments that correspond to catastrophic events with up to 50% of the nodes leaving the network. The model is highly agile and modular and is applied to the large-scale decentralized aggregation network of DIAS, the Dynamic Intelligent Aggregation Service, without major structural changes in its design and operations. Results confirm the outstanding improvement in the aggregation accuracy when self-corrective actions are employed with a minimal increase in communication overhead.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.30" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/85389c54465b7785df01469d618189d8de993623", "sources": [ "DBLP" ], "title": "Self-Corrective Dynamic Networks via Decentralized Reverse Computations", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "8542fdcb42804a31fedb86874e3c75cd03830d4d": { "authors": [ { "ids": [ "1778504" ], "name": "Oana Balmau" }, { "ids": [ "33215555" ], "name": "Diego Didona" }, { "ids": [ "1727558" ], "name": "Rachid Guerraoui" }, { "ids": [ "1711100" ], "name": "Willy Zwaenepoel" }, { "ids": [ "3186268" ], "name": "Huapeng Yuan" }, { "ids": [ "2620857" ], "name": "Aashray Arora" }, { "ids": [ "1859843" ], "name": "Karan Gupta" }, { "ids": [ "20911093" ], "name": "Pavan Konka" } ], "doi": "", "doiUrl": "", "entities": [ "Amplifier", "Attribute\u2013value pair", "Central processing unit", "Holism", "Kinetic Void", "Memory-mapped I/O", "RocksDB", "Synergy", "Synthetic data", "Throughput", "Tree structure" ], "id": "8542fdcb42804a31fedb86874e3c75cd03830d4d", "inCitations": [ "e5c8095c2a87b474cb76e0d1d7dc9a8a37f3a80e", "27a36203f14d73b95dfffec857b4ff923d9ef430", "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6", "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2", "eb4d7f3c536af50ae5d6828d9bb43138c2578dc7" ], "journalName": "", "journalPages": "363-375", "journalVolume": "", "outCitations": [ "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "9b90568faad1fd394737b79503571b7f5f0b2f4b", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "269a701af1aba00837838030c9a862be26acd1da", "b05f104f5a28a1a2c2fdb216d3d0959a5786f0ad", "2986f9db238c57b638d54248c4ed1fcb5e4f459f", "0bba65fd5ac1db9a3293e9ebcfba092cf4ae58ee", "28fb425b2d2cc3287628f9f6e8b31b7665ba47a7", "423549d9e46a26474bc9554530c84c244481d881", "199ac28b6bc68bf05c77645ffae7640df114bca5", "40f04909aaa24b09569863aa71e76fe3d284cdb0", "6ede07b71f0427359286e347be421513a093ed97", "1e29246acdc73e27899352e3d1862e2af4b74ac2", "40f13c06aadfd6e9d2c96f716de2b2454d5b854e", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "2b56f0ca7e74a43a54b70a7bb3507855c653a85b", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "f4147b82166813bbe5dc01e9486664c273d1556c", "1772fdf329f526d4f6c9e62b99bc65eac0ff31b5", "098d792d1783b5f6fc098203f71f21f5d053c653", "3a26331d29c8839f17f557d089e9e8d817bf8c9a", "d4f8d2b9f7fad337d9ad22a92555e159c9543836", "7035c0d21a48e2d2f5c6323650854b033853bb89", "18a5f443299784479e78d9e77f175af57cb2fa2b", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "d67adb456a315aee244babf4f20e318cc14d13f3", "9e86e9a332be800d7420aa0a394cd1b348d93e48", "1860428ac473bbe38da909c0ba8c882b07deb8df", "46c0f934ef0705b953ba8b14c5dee79b4df724db" ], "paperAbstract": "We present TRIAD, a new persistent key-value (KV) store based on Log-Structured Merge (LSM) trees. TRIAD improves LSM KV throughput by reducing the write amplification arising in the maintenance of the LSM tree structure. Although occurring in the background, write amplification consumes significant CPU and I/O resources. By reducing write amplification, TRIAD allows these resources to be used instead to improve user-facing throughput. TRIAD uses a holistic combination of three techniques. At the LSM memory component level, TRIAD leverages skew in data popularity to avoid frequent I/O operations on the most popular keys. At the storage level, TRIAD amortizes management costs by deferring and batching multiple I/O operations. At the commit log level, TRIAD avoids duplicate writes to storage. We implement TRIAD as an extension of Facebook\u2019s RocksDB and evaluate it with production and synthetic workloads. With these workloads, TRIAD yields up to 193% improvement in throughput. It reduces write amplification by a factor of up to 4x, and decreases the amount of I/O by an order of magnitude.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-balmau.pdf", "https://infoscience.epfl.ch/record/228863/files/atc17-final224.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/balmau" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8542/fdcb42804a31fedb86874e3c75cd03830d4d.pdf", "s2Url": "https://semanticscholar.org/paper/8542fdcb42804a31fedb86874e3c75cd03830d4d", "sources": [ "DBLP" ], "title": "TRIAD: Creating Synergies Between Memory, Disk and Log in Log Structured Key-Value Stores", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "85c2c5aa74624fbfc324f636a2786cfc1a16e6e4": { "authors": [ { "ids": [ "30901893" ], "name": "Po-An Tsai" }, { "ids": [ "2576892" ], "name": "Nathan Beckmann" }, { "ids": [ "1708256" ], "name": "Daniel Sanchez" } ], "doi": "", "doiUrl": "", "entities": [ "Architecture as Topic", "Bank (environment)", "CPU cache", "Cache", "Cache (computing)", "Computer Systems", "Distributed cache", "Dynamic random-access memory", "Gigabyte", "Multi-core processor", "Muscle Rigidity", "Operating system", "Working set" ], "id": "85c2c5aa74624fbfc324f636a2786cfc1a16e6e4", "inCitations": [ "04984fc1683186a526917575c435733e9311ff6e" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6acd75781396e5dedcf2f06a7131ba7f3153bfb5", "2cc40e5ca495af9a8e3aea5f357b59cc680c472e", "3bf23f74bf33ed52f7c28587fab315610b27221a", "2253d6559ae9793b5cfa6e409d1d9de50dafa29a", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "0598e498f7174d2138f43ffeaf3539018b3219b3", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "18633256bb17ba0744518479c0752ca87f0d03c6", "32dc6016338a2098147e5edbb72c7c5670f78133", "333da42f4369dff8ca905ad21ea6ee2f5dc99d55", "1a36150af44446fe9664005c447a0f0e04667065", "267bd5da4c7c0d7bcb3ffa0d860f3842a7c59aa2", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "429dd28f609d97a883174d3a5a2db3cc936fb062", "5119e4b11132d48d5fa4a5ddaf2ca1a0389b9b0f", "234049a484dee54d3f9555fe7f50805e783ec432", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "6e1a98c60bbedbb9b70af9e374795f8e26cc4e8e", "f8e9b050c93af6dea582563f61b6460b590bc3af", "7adc7940bc250bab39c7823fbb1ef3f86fe0625a", "48a7323c4894de3afb90ef2135160205ebb55011", "2269889c9085ff518ee9e7f5b2f92e4599dd3ff2", "047b0a5b50f996f3d9bcb51aab7f3176d12a69c3", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "6885cd324017c499d44fa3c94fca23c3104e0aed", "ab39ddb0c2599f0c50f279aa7fb2350ff18dcd54", "a213b244778e310bc4b27cbd021f964258b4c7a7", "78ef173529cae4a19c7718e88cc4107cff99ff0f", "35c348a3663de6387a45dc58b2c85092d247818a", "0aaca867a9b1eb10628fc28cc1b4d1c19f2ac3e3", "16ba3e5c5e0084fef0fa4705d639f2ad164f2dbe", "014ba063a3721973ba6af6503232d4d21d1456bb", "a58445c48c3402305e92ff7cb7eaa9641a56ca6f", "29f766723ca752138855500084ced04503bfc9c8", "306d589976b94d385fcaef6924edd6f4c7fd840e", "5507d741031a1ce2ddc0d6fec9d497192f037eda", "88412b002ee39eb121d93c0a2c11ddbb658e9d6b", "0717371b254df3e466a11d1965c2c9541a43b7a3", "745d50eb6b74b191191ce93c6ef1ec9760ce0cb0", "35c3882db9e1b2bdf838122787968679595f61de", "6ee973a185abb0a11398bdefa0cd97e41bcb0b00", "b116227c9782509c1d5a667da3632deb4356727a", "0ab9f989ffff6389059ee5267db014d1b211d51e", "03880f1d3faedb37aa51deab3b70a98b939dba28", "55043afbb87e38627778a323dfdc35a55357e47d", "28552ecf4eaedb3461edca97304b29082b02fbab", "36de396ee9d1c9991e44c01be35e5206d79c3328", "01299bf5dce79d85aaa0d938670a93ddeeda4d0e", "3bec21f0f1954d31642537c02b33e280d7e12029", "49e56b0a0b4b2a99658e5dac2f5e5b5d0ecf5044", "2b0ca5a307e4c72eff242d8c903c4f0138131d7e", "43bf4c7af676fcedc629c2563eb1e9708bbe0db2", "ca57798e927b6ac4e77dc3be0522c53c31fdd6aa", "352a8957005dc5519b15ed1870751ec494d66395", "165528cdf9c76edd98729c142faf50fbd6cfc69e", "128089d3f880b3a205e236c22767e40ff8f8cfe0", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "3370784dacf9df1e54384190dad40b817520ba3a", "32c8c7949a6efa2c114e482c830321428ee58d70" ], "paperAbstract": "Caches are traditionally organized as a rigid hierarchy, with multiple levels of progressively larger and slower memories. Hierarchy allows a simple, fixed design to benefit a wide range of applications, since working sets settle at the smallest (i.e., fastest and most energy-efficient) level they fit in. However, rigid hierarchies also add overheads, because each level adds latency and energy even when it does not fit the working set. These overheads are expensive on emerging systems with heterogeneous memories, where the differences in latency and energy across levels are small. Significant gains are possible by specializing the hierarchy to applications. We propose Jenga, a reconfigurable cache hierarchy that dynamically and transparently specializes itself to applications. Jenga builds virtual cache hierarchies out of heterogeneous, distributed cache banks using simple hardware mechanisms and an OS runtime. In contrast to prior techniques that trade energy and bandwidth for performance (e.g., dynamic bypassing or prefetching), Jenga eliminates accesses to unwanted cache levels. Jenga thus improves both performance and energy efficiency. On a 36-core chip with a 1GB DRAM cache, Jenga improves energy-delay product over a combination of state-of-the-art techniques by 23% on average and by up to 85%. CCS CONCEPTS \u2022 Computer systems organization\u2192 Multicore architectures;", "pdfUrls": [ "http://people.csail.mit.edu/poantsai/papers/2017.jenga.isca.pdf", "http://people.csail.mit.edu/sanchez/papers/2017.jenga.isca.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/85c2/c5aa74624fbfc324f636a2786cfc1a16e6e4.pdf", "s2Url": "https://semanticscholar.org/paper/85c2c5aa74624fbfc324f636a2786cfc1a16e6e4", "sources": [], "title": "Jenga: Sotware-Defined Cache Hierarchies", "venue": "", "year": 2017 }, "85ddb741036d9db38bdee5a19b8141307dc0ba6f": { "authors": [ { "ids": [ "2597368" ], "name": "Lucas Davi" }, { "ids": [ "2213320" ], "name": "David Gens" }, { "ids": [ "2279415" ], "name": "Christopher Liebchen" }, { "ids": [ "8415280" ], "name": "Ahmad-Reza Sadeghi" } ], "doi": "", "doiUrl": "", "entities": [ "Rand index" ], "id": "85ddb741036d9db38bdee5a19b8141307dc0ba6f", "inCitations": [ "c355211372607f6c5851fe20c22b545db74fd688" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/pt-rand-practical-mitigation-data-only-attacks-against-page-tables/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/85ddb741036d9db38bdee5a19b8141307dc0ba6f", "sources": [ "DBLP" ], "title": "PT-Rand: Practical Mitigation of Data-only Attacks against Page Tables", "venue": "NDSS", "year": 2017 }, "85ef0c9033d18795f0fc87bca6ace531efc9f70b": { "authors": [ { "ids": [ "2215883" ], "name": "Byung-Chul Tak" }, { "ids": [ "1765914" ], "name": "Canturk Isci" }, { "ids": [ "40095466" ], "name": "Sastry S. Duri" }, { "ids": [ "2606437" ], "name": "Nilton Bila" }, { "ids": [ "2718536" ], "name": "Shripad Nadgowda" }, { "ids": [ "11194779" ], "name": "James Doran" } ], "doi": "", "doiUrl": "", "entities": [ "Conformance testing", "DevOps", "Download", "Immutable object", "Software deployment", "Software portability", "Vulnerability (computing)" ], "id": "85ef0c9033d18795f0fc87bca6ace531efc9f70b", "inCitations": [ "d97a25138711755569a5ceee228c15a7381eb062", "2b24b48418ea265d1666cac509e8634bba2e5461", "0240d922d1934db0e79dbfac9721d7870299ff9a", "513fc5135fa76557a30de7058f95d4e011b8daa3" ], "journalName": "", "journalPages": "313-319", "journalVolume": "", "outCitations": [ "c6c41739b2a685d1b4bb6d611a499cc545fe6ef0", "285c580ef6b98a238fd2d93a0dae902e6fc115a5", "213d01d532afc296455b10d2ec1eb76792523850", "0653e2ed9f683868cb4539eb8718551242834f6b", "4964875b14c78dba826a7f0485778c026c91bcc1", "6b60cde77cd6cfe352ac33427c75193f4de9ddd4", "1edb070e3530f1a02ecd76f6621f7719d13b2109" ], "paperAbstract": "Container technology is being adopted as a mainstream platform for IT solutions because of high degree of agility, reusability and portability it offers. However, there are challenges to be addressed for successful adoption. First, it is difficult to establish the full pedigree of images downloaded from public registries. Some might have vulnerabilities introduced unintentionally through rounds of updates by different users. Second, non-conformance to the immutable software deployment policies, such as those promoted by the DevOps principles, introduces vulnerabilities and the loss of control over deployed software. In this study, we investigate containers deployed in a production cloud to derive a set of recommended approaches to address these challenges. Our analysis reveals evidences that (i), images of unresolved pedigree have introduced vulnerabilities to containers belonging to third parties; (ii), updates to live public containers are common, defying the tenet that deployed software is immutable; and (iii), scanning containers or images alone is insufficient to eradicate vulnerabilities from public containers. We advocate for better systems support for tracking image provenance and resolving disruptive changes to containers, and propose practices that container users should adopt to limit the vulnerability of their containers.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-tak-paper.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-tak.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/tak", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_tak.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/85ef/0c9033d18795f0fc87bca6ace531efc9f70b.pdf", "s2Url": "https://semanticscholar.org/paper/85ef0c9033d18795f0fc87bca6ace531efc9f70b", "sources": [ "DBLP" ], "title": "Understanding Security Implications of Using Containers in the Cloud", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "85f2691b1ef5f1b75502a925f1bddb8c20660a37": { "authors": [ { "ids": [ "39970946" ], "name": "Halit Dogan" }, { "ids": [ "3345702" ], "name": "Farrukh Hijaz" }, { "ids": [ "38337485" ], "name": "Masab Ahmad" }, { "ids": [ "3080282" ], "name": "Brian Kahne" }, { "ids": [ "2013672" ], "name": "Peter Wilson" }, { "ids": [ "1767824" ], "name": "Omer Khan" } ], "doi": "10.1109/IPDPS.2017.116", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.116", "entities": [ "Central processing unit", "Computation", "Machine learning", "Multi-core processor", "Parallel computing", "Shared memory", "Simulation", "Synchronization (computer science)" ], "id": "85f2691b1ef5f1b75502a925f1bddb8c20660a37", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "254-264", "journalVolume": "", "outCitations": [ "5f0a21670abe1634780d27f60228a83a6a8232ad", "0a569200aeb678b4420cb1025ee9dc6097f60567", "52d2a6110e3bc2215d0347a04c421fb094044557", "a38ceeba4afb200a365fb53d9262b525f36a2f40", "01ac84ffb4b7f575ea0705181795f4fd2368f519", "1f33e83905ee40dfeeacd6c04f64c1af71c2b7fb", "22fe619996b59c09cb73be40103a123d2e328111", "1be969b6bc855741434eea9cfb9700d99398e146", "128fb51d28256aac3b20a8f6de85598767876fbe", "38211dc39e41273c0007889202c69f841e02248a", "288ae8945f3ee4b76cdca26cbf8148d030e90383", "8aee0297903089ade77a6a45a103149140f702a3", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "566c89a1ace18f57ac3212e6d62634b501990b31", "245c9703304dc058417f5503ea324bcde8b2eefd", "9ab0de951cc9cdf16887b1f841f8da6affc9c0de", "34d2db88f259d69022e7492225301ffd6e0f55c0", "c32c76490c506722ede871dc3b92725908000bbd", "0c205f91402984905e1bcf5f05f973c5588c1325", "078e00be20bfdc4eeae762f6170ccded05f452c6", "80c593a0668f4eb157a525831b7daad3bdb44381", "352a8957005dc5519b15ed1870751ec494d66395", "3f4d5667e04958569dff72966a93450decfd256f", "6f090d59bde17b7604985acf38e26785e794bcc0", "b82fa45559b1c211667a90acb0d07402bdb54a76", "1521d39088b203ddac981d10d214f463449ae95b" ], "paperAbstract": "Shared Memory stands out as a sine qua non for parallel programming of many commercial and emerging multicore processors. It optimizes patterns of communication that benefit common programming styles. As parallel programming is now mainstream, those common programming styles are challenged with emerging applications that communicate often and involve large amount of data. Such applications include graph analytics and machine learning, and this paper focuses on these domains. We retain the shared memory model and introduce a set of lightweight in-hardware explicit messaging instructions in the instruction set architecture (ISA). A set of auxiliary communication models are proposed that utilize explicit messages to accelerate synchronization primitives, and efficiently move computation towards data. The results on a 256-core simulated multicore demonstrate that the proposed communication models improve performance and dynamic energy by an average of 4x and 42% respectively over traditional shared memory.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.116" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/85f2691b1ef5f1b75502a925f1bddb8c20660a37", "sources": [ "DBLP" ], "title": "Accelerating Graph and Machine Learning Workloads Using a Shared Memory Multicore Architecture with Auxiliary Support for In-hardware Explicit Messaging", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "861f27c06ecfb7c97fbf52930768e90727801f8e": { "authors": [ { "ids": [ "12343940" ], "name": "Asa Dan" }, { "ids": [ "1808537" ], "name": "Rajit Manohar" }, { "ids": [ "1688502" ], "name": "Yoram Moses" } ], "doi": "10.1145/3087801.3087839", "doiUrl": "https://doi.org/10.1145/3087801.3087839", "entities": [ "Asynchronous circuit", "Causality" ], "id": "861f27c06ecfb7c97fbf52930768e90727801f8e", "inCitations": [], "journalName": "", "journalPages": "241-250", "journalVolume": "", "outCitations": [], "paperAbstract": "Even in the absence of clocks, time bounds on the duration of actions enable the use of time fordistributed coordination. This paper initiates an investigation of coordination in such a setting. A newcommunication structure called a zigzag pattern is introduced, and shown to guarantee bounds on therelative timing of events in this clockless model. Indeed, zigzag patterns are shown to be necessary andsufficient for establishing that events occur in a manner that satisfies prescribed bounds. We capturewhen a process can know that an appropriate zigzag pattern exists, and use this to provide necessary andsufficient conditions for timed coordination of events using a full-information protocol in the clocklessmodel.", "pdfUrls": [ "http://arxiv.org/abs/1705.08627", "http://doi.acm.org/10.1145/3087801.3087839", "https://arxiv.org/pdf/1705.08627v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/861f/27c06ecfb7c97fbf52930768e90727801f8e.pdf", "s2Url": "https://semanticscholar.org/paper/861f27c06ecfb7c97fbf52930768e90727801f8e", "sources": [ "DBLP" ], "title": "On Using Time Without Clocks via Zigzag Causality", "venue": "PODC", "year": 2017 }, "864076ad279e8b7c41ce476ff95c25913973ab03": { "authors": [ { "ids": [ "31519886" ], "name": "Daniel Goodman" }, { "ids": [ "2979630" ], "name": "Georgios Varisteas" }, { "ids": [ "1695968" ], "name": "Timothy L. Harris" } ], "doi": "10.1145/3064176.3064177", "doiUrl": "https://doi.org/10.1145/3064176.3064177", "entities": [ "Degree of parallelism", "Fastest", "Haswell (microarchitecture)", "In-memory database", "Limiter", "Parallel computing", "Profiling (information science)", "Sandy Bridge" ], "id": "864076ad279e8b7c41ce476ff95c25913973ab03", "inCitations": [], "journalName": "", "journalPages": "254-269", "journalVolume": "", "outCitations": [ "4e5dbcefad7aaab899bda76910096075033b4432", "5848da5058fed3b97bfd801ca19e5265f489abfe", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "2beb38e6e831790082b0578bcddd1b9f73714822", "6782ce678403201194d9603c54e03ac58b99c95f", "79c2dda134031672160bfcf9333bfcc92128f8aa", "516081ef1d32c90a07a64c8060b82bf10df26f4c", "24251f02c34f32b1dd96572a1d984c4463a26a10", "6918681cd06dce0eb7fc15cbd8cf11ecf8322a94", "58537d88aad8249dfd25c3e859fd278e39f382b8", "035b237ae69122e07cd3e017e4eeac2621fb5171", "45ac0e85b3ff21bc12a7147df167be38f0d24b9c", "95f4d53119f101dd5a35d962d98c6b61760fbd33", "17810349765c08963af130efe28b6a6b77b7ec51", "38628d26d4f624378f4303b61ae93c5d34d007c3", "1478b551944362489d14395c78f6488f8b5e3d06", "ae731e4a725cf73b2f2347e8d8437e6221c8449e", "b2a7d7c2073d4f512af89208b83a5bb54f91a24c", "0c7631b10f3e5c8d6dd083ec6ffcbfdda92d1f15", "25c4dcffc6bc69b0885587aff9acb9f2dd949c07", "1ab74d44982409beeca21efb2dbcb97a5c7de4b2", "490d862480cf30949dce90e832aa292c498ac768", "2361474f4a3b524a2761a4eee6046f956ed3c430", "9f7c92a1f7572bfe4758f709cc3821424fb5fdc9", "c1c9ea7e5baae0d42deeb27d1be7cbee274d439b", "7b998ea95c57d6dcd56fa27d6f56b1a0c437ced4", "60f5f4f7ddc994664792dfc2f6790107d36bc5a0", "07afa1ea6934df5d325b07754f9eda290981735d", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633", "0c65d05478483a294701d38c98e111d8a4b033f5", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "100c700767dba2f1460139c0128fecfc48bedd70" ], "paperAbstract": "Pandia is a system for modeling the performance of in-memory parallel workloads. It generates a description of a workload from a series of profiling runs, and combines this with a description of the machine's hardware to model the workload's performance over different thread counts and different placements of those threads.\n The approach is \"comprehensive\" in that it accounts for contention at multiple resources such as processor functional units and memory channels. The points of contention for a workload can shift between resources as the degree of parallelism and thread placement changes. Pandia accounts for these changes and provides a close correspondence between predicted performance and actual performance. Testing a set of 22 benchmarks on 2 socket Intel machines fitted with chips ranging from Sandy Bridge to Haswell we see median differences of 1.05% to 0% between the fastest predicted placement and the fastest measured placement, and median errors of 8% to 4% across all placements.\n Pandia can be used to optimize the performance of a given workload---for instance, identifying whether or not multiple processor sockets should be used, and whether or not the workload benefits from using multiple threads per core. In addition, Pandia can be used to identify opportunities for reducing resource consumption where additional resources are not matched by additional performance---for instance, limiting a workload to a small number of cores when its scaling is poor.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064177", "https://timharris.uk/papers/2017-eurosys.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/864076ad279e8b7c41ce476ff95c25913973ab03", "sources": [ "DBLP" ], "title": "Pandia: comprehensive contention-sensitive thread placement", "venue": "EuroSys", "year": 2017 }, "8647718acbc7fc9f6e2e7006d5320d6110fec734": { "authors": [ { "ids": [ "1691730" ], "name": "Claude Castelluccia" }, { "ids": [ "2432288" ], "name": "Markus D\u00fcrmuth" }, { "ids": [ "3367361" ], "name": "Maximilian Golla" }, { "ids": [ "9734274" ], "name": "Fatma Deniz" } ], "doi": "", "doiUrl": "", "entities": [ "Authentication" ], "id": "8647718acbc7fc9f6e2e7006d5320d6110fec734", "inCitations": [ "7fb403e822e2287d2e485b2bdc842003f0952d9b", "3716902eb894dc5bfa5d8e5ce908d566fd434c35", "6d1e1d5298777903337fe09ab8275711dfffb9e6", "c9ff90684b71463cea5343e4a346d189c95de655" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/towards-implicit-visual-memory-based-authentication/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8647718acbc7fc9f6e2e7006d5320d6110fec734", "sources": [ "DBLP" ], "title": "Towards Implicit Visual Memory-Based Authentication", "venue": "NDSS", "year": 2017 }, "86629bfdedcacc4bd9047bb51a1dd024f8a1e609": { "authors": [ { "ids": [ "3006292" ], "name": "Mohammad Dashti" }, { "ids": [ "23412010" ], "name": "Sachin Basil John" }, { "ids": [ "2366196" ], "name": "Amir Shaikhha" }, { "ids": [ "3123617" ], "name": "Christoph Koch" } ], "doi": "10.1145/3035918.3035919", "doiUrl": "https://doi.org/10.1145/3035918.3035919", "entities": [ "Algorithm", "Computation", "Concurrency control", "Database", "Feedback", "In-memory database", "Multiversion concurrency control", "Negative feedback", "Overhead (computing)", "Throughput", "Transaction processing" ], "id": "86629bfdedcacc4bd9047bb51a1dd024f8a1e609", "inCitations": [], "journalName": "", "journalPages": "235-250", "journalVolume": "", "outCitations": [ "0bddbe35fa6e3cf625d15553365a690d3a6bf7aa", "9baa88ccbc656e0774ae1bc01d5698f7801919d6", "6460e782a12649a478bbaeb9c149f59e206d9540", "016cc168aaf437843c65cb3ebdade0330007c2e4", "35f751e46799e3a91425267819f40dce273abec1", "8b4e64bfd039982b0115ffa56f17926532664b07", "2769c203102a875c10bc11affc161891472176d1", "8222f4df5029fa4838cee76aa4bcc54cc8d09dd2", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "190b1d3ba9a402de641f099f402a850261a366c6", "1ae507f38fbe2301f4f7fbcd64e2f49afe00a59c", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "00ac447d02035c26c7e2852c2457fe812e89038f", "c6e80969c6af9ee93e2483fde66e68cb43ea61aa", "645f46933f49aa0ee730d7cac4af77c537a45950", "1a4be3b3071a8e7a294709da33ae400c0101b590", "2520cfc29a521f2333fda020d7ae41860f8dfebd", "e61d3b0e540c5791087f7435ae5ce0b00a6a7160", "1155490b99d6a2501f7bf79e4456a5c6c2bc153a", "2e50af2320dab632d8046b6d4c130ae6cce8903f", "1f102935cc21d54f91ae70c09d84157b6011e6dd", "3560302f54c1658161e2b866fb3809eff22f6459", "4d61705b2fc05f55d5e6ed062a37664df185a1cc", "01094798b20e96e1d029d6874577167f2214c7b6", "10eb9cfb2cea0d6a256e436becd8f0f5494dc5a0", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "9748241beb02ef1e2d0e6dc877c04b354033a838", "a42327b7f77448e75a3d299f1eeacc33b490e591", "0997037e940df06ed7a6d19f7501579aab01e829", "604517b1ba44203bf1efcc860183d1e413b9ff80", "023e3bcd1c1d374f894836dc7dce688bdb406817", "10551c91b4d36d1009b23b4d2b88a9e1733fe029", "0560fc4924bbbe7e920122dc25c1ecfc3e59e374", "09ecdb904eb7ae8a12d0c6c04ae531617a30eafa", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "58eec4589bb965b2bb23a940cb00393e48fa329b", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5" ], "paperAbstract": "The optimistic variants of Multi-Version Concurrency Control (MVCC) avoid blocking concurrent transactions at the cost of having a validation phase. Upon failure in the validation phase, the transaction is usually aborted and restarted from scratch. The \"abort and restart\" approach becomes a performance bottleneck for use cases with high contention objects or long running transactions. In addition, restarting from scratch creates a negative feedback loop in the system, because the system incurs additional overhead that may create even more conflicts.\n In this paper, we propose a novel approach for conflict resolution in MVCC for in-memory databases. This low overhead approach summarizes the transaction programs in the form of a dependency graph. The dependency graph also contains the constructs used in the validation phase of the MVCC algorithm. Then, when encountering conflicts among transactions, our mechanism quickly detects the conflict locations in the program and partially re-executes the conflicting transactions. This approach maximizes the reuse of the computations done in the initial execution round, and increases the transaction processing throughput.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035919" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/86629bfdedcacc4bd9047bb51a1dd024f8a1e609", "sources": [ "DBLP" ], "title": "Transaction Repair for Multi-Version Concurrency Control", "venue": "SIGMOD Conference", "year": 2017 }, "867db294f4688031d0068a7730bc51505642e21f": { "authors": [ { "ids": [ "3204841" ], "name": "Cris Cecka" } ], "doi": "10.1145/3126908.3126919", "doiUrl": "https://doi.org/10.1145/3126908.3126919", "entities": [ "Algorithm", "Communication-avoiding algorithms", "Distributed memory", "Fast multipole method", "Graphics processing unit", "Nvidia Tesla", "Run time (program lifecycle phase)", "Throughput" ], "id": "867db294f4688031d0068a7730bc51505642e21f", "inCitations": [ "518e087d5fe54e7de71bc2ca55e7d58f179d5997" ], "journalName": "", "journalPages": "54:1-54:11", "journalVolume": "", "outCitations": [ "55ffdcab5daf1b87b870c62bfe70929694f7e189", "0ee449d5f053020bea71176bbfbb7e7bd9106655", "0d89a25fda460a5e3b2cd6bd476e80fcacce4fd5", "4283df7e17919d3ef4c0eff0391a850ceaf4fbdb", "086e763b63dc1d81f801773575cc1eaf9dbd2d04", "eee905e8cd6f3e36523bc16b9b25bfd01608111a", "b15074880271c36a552a125604523b023a30a044", "414b0fb3e72689d2148798e15f4df35b5baa62fd", "7ef7ea5ccd726b3ea49feadf132eb29cc9c5bc40", "8f2a63bb001ad488b56326926f22debe1439608c", "2ec5aff6404f6f8cd28f83acaaff00ab4c5063a7", "34360863d08ddf5546a27def78430b2addfcb8e2", "258e8729c918f01644a3f8d9f40aa23d9bea0130", "0c6ab1d7631aec1faf4493352293bf19d06ee40f", "4a48eb2bf94cd607bd0d80e31e13834594b3ff23", "c0edf82a33c3c4beb5bf613b7a07040233b3f0b3", "6855fe6b8e8d4954e2ae737737d7abc2ef6814a5", "dc6ce87367e6fcb849a65dfaba8d327210f6c731", "2db6399ced87a5b1952f0618c6756c1c148422cd", "2fe5f8804f8ba2f738b83719b11723fb4a0f8db0" ], "paperAbstract": "Communication-avoiding algorithms have been a subject of growing interest in the last decade due to the growth of distributed memory systems and the disproportionate increase of computational throughput to communication bandwidth. For distributed 1D FFTs, communication costs quickly dominate execution time as all industry-standard implementations perform three all-to-all transpositions of the data. In this work, we reformulate an existing algorithm that employs the Fast Multipole Method to reduce the communication requirements to approximately a single all-to-all transpose. We present a detailed and clear implementation strategy that relies heavily on existing library primitives, demonstrate that our strategy achieves consistent speed-ups between 1.3x and 2.2x against cuFFTXT on up to eight NVIDIA Tesla P100 GPUs, and develop an accurate compute model to analyze the performance and dependencies of the algorithm.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126919" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/867db294f4688031d0068a7730bc51505642e21f", "sources": [ "DBLP" ], "title": "Low communication FMM-accelerated FFT on GPUs", "venue": "SC", "year": 2017 }, "868034790a4ee284f4a125e0f068d4b8fc6d9768": { "authors": [ { "ids": [ "2465596" ], "name": "Mark Santolucito" }, { "ids": [ "2438489" ], "name": "Ennan Zhai" }, { "ids": [ "38186814" ], "name": "Rahul Dhodapkar" }, { "ids": [ "6437995" ], "name": "Aaron Shim" }, { "ids": [ "2869954" ], "name": "Ruzica Piskac" } ], "doi": "10.1145/3133888", "doiUrl": "https://doi.org/10.1145/3133888", "entities": [ "Algorithm", "Association rule learning", "Error detection and correction", "Failure rate", "Intermediate representation", "Machine learning", "Preprocessor", "Software system", "Type system", "Verification and validation" ], "id": "868034790a4ee284f4a125e0f068d4b8fc6d9768", "inCitations": [ "513fc5135fa76557a30de7058f95d4e011b8daa3" ], "journalName": "PACMPL", "journalPages": "64:1-64:20", "journalVolume": "1", "outCitations": [ "3113fba078dd3d4f4fc722786f0c268549f90c07", "5790f3d5cb60df78918cde1980797d0a14d67e3b", "1521c2c45835a1dd3d29f9886010c652063b7bad", "2d71e521198371ab970c402ddf76eea350569ae8", "1a55371d325cd1fd477ad42a9fab02048c70f5d9", "99388a0d3beb526a13421ed088743b30a75e2b29", "4fa0029515a0cb4842fc82e29a98aa8683ef4853", "36ed5e0ca6d2023137094d04d56a34fe56915fa3", "d67cd96b0a30c5d2b2703491e02ec0f7d1954bb8", "626d66d41ebf9a126f0127796f5d81414905c31a", "04e64e4045ce40b27960d8d764eaf73ce19c9a53", "06321731aa73daa44b049a4d5357ca8569dc428e", "a6526df1d9b18fd3542fad7fdd95e93a5edce909", "06d11bdd79c002f7cfdf9bcfa181f25c96f6009a", "7e4d3ca41adc598e4a8b71df2d5c040ccb59be87", "02fd15933e45ae55920d1ce5a93d21d2f9376968", "42225f36bb06c749414e66adc6be027a7cb82f5f", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "ba8dc7a05eccbd32b282a1e367503cf13fea8641", "9f94aadf6b2b57d6131e50af232a05e6dea1e48d", "62003c6c0b6bfdccb7aae83c04ac5aacaf8ffc59", "22a3f0837bd6a913f516ba497469176be641c7d4", "1291f7862c50cb2257400720a3cf6a9992a87f22", "f6229c101fbc5760d4439bd5289aa479c63302e8", "2a24a89da99de1fe7e59409f93997e0fe1c40301" ], "paperAbstract": "System failures resulting from configuration errors are one of the major reasons for the compromised reliability of today's software systems. Although many techniques have been proposed for configuration error detection, these approaches can generally only be applied after an error has occurred. Proactively verifying configuration files is a challenging problem, because 1) software configurations are typically written in poorly structured and untyped “languages”, and 2) specifying rules for configuration verification is challenging in practice. This paper presents ConfigV, a verification framework for general software configurations. Our framework works as follows: in the pre-processing stage, we first automatically derive a specification. Once we have a specification, we check if a given configuration file adheres to that specification. The process of learning a specification works through three steps. First, ConfigV parses a training set of configuration files (not necessarily all correct) into a well-structured and probabilistically-typed intermediate representation. Second, based on the association rule learning algorithm, ConfigV learns rules from these intermediate representations. These rules establish relationships between the keywords appearing in the files. Finally, ConfigV employs rule graph analysis to refine the resulting rules. ConfigV is capable of detecting various configuration errors, including ordering errors, integer correlation errors, type errors, and missing entry errors. We evaluated ConfigV by verifying public configuration files on GitHub, and we show that ConfigV can detect known configuration errors in these files.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133888", "http://www.cs.yale.edu/homes/zhai-ennan/configv-oopsla17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/868034790a4ee284f4a125e0f068d4b8fc6d9768", "sources": [ "DBLP" ], "title": "Synthesizing configuration file specifications with association rule learning", "venue": "PACMPL", "year": 2017 }, "86991eb6eed3e12f5b3985340416302a2208bceb": { "authors": [ { "ids": [ "39637073" ], "name": "Josep Samp\u00e9" }, { "ids": [ "1749419" ], "name": "Marc S\u00e1nchez Artigas" }, { "ids": [ "1695568" ], "name": "Pedro Garc\u00eda L\u00f3pez" }, { "ids": [ "34976581" ], "name": "Gerard Par\u00eds" } ], "doi": "10.1145/3135974.3135980", "doiUrl": "https://doi.org/10.1145/3135974.3135980", "entities": [ "Cloud storage", "Computation", "Computer data storage", "Locality of reference", "Middleware", "Object storage", "Peer-to-peer", "Resource contention", "Runtime system", "Scalability", "Server (computing)", "Serverless computing", "Stateful firewall", "Stock and flow", "Telephone number" ], "id": "86991eb6eed3e12f5b3985340416302a2208bceb", "inCitations": [ "7bc3c800f29d9050dce723d3f425f019ba3fa436" ], "journalName": "", "journalPages": "121-133", "journalVolume": "", "outCitations": [ "9583ab2af5f28da6fb08f79476d14214d20b9848", "5777bc96b4f09e62e5b1283eb5f67ffb056e7820", "8275794599fbdb276339e47620188eca33c7bb8c", "80399c4ad123e27f3d9d7fdfccd141920918ff51", "d16225c38fa25ad3b3378a8049a10579543c5c35", "015f34fa2e0d280428d59483db354562187b0c35", "651fc53366936136b12a1a09afbf86189f033251", "1ddd410257b00370c1fe58377f02f608ff16c3bf", "d7ce1f5e34ca1baf2b4ef2be941dd569a6f89bf6", "6d496d510f867274473a01dcb0a1a7bf45d0904f", "56df56dbab7e0b022a51e5a038c1119fcd36f137", "92e536c1789bf301f456b01590006c9a3eff6cd8", "6bb4ee559d7bafbdd387f2c64435fe4994ea016c", "807df0de011be333fc1dd06ac58c426e8b3437ef", "5e36511b8cab586d69047adfb03971380c17d427", "33c1c2454e55f5daf69533f20ab19f0fad0604a2", "6088230ede570bf552d16c8781ee7a9c77c6a1a1", "e2880778365b3ff25902a0a481e1f0f3ecb9f32d", "030c5d1b06de23942fbfc2f5eb3572f8dad24a90", "95b9d6e9bf31c2125d825e9d482ca26034b31756", "006cd63664db53494cc61a44d5c6ebc668dc4b6a" ], "paperAbstract": "Traditionally, active storage techniques have been proposed to move computation tasks to storage nodes in order to exploit data locality. However, we argue in this paper that active storage is ill-suited for cloud storage for two reasons: 1. Lack of elasticity: Computing can only scale out with the number of storage nodes; and 2. Resource Contention: Sharing compute resources can produce interferences in the storage system. Serverless computing is now emerging as a promising alternative for ensuring painless scalability, and also, for simplifying the development of disaggregated computing tasks.\n Here we present an innovative data-driven serverless computing middleware for object storage. It is a lightweight compute solution that allows users to create small, stateless functions that intercept and operate on data flows in a scalable manner without the need to manage a server or a runtime environment. We demonstrate through different use cases how our solution scales with minimal overhead, while getting rid of the resource contention problems incurred by active storage tasks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135980" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/86991eb6eed3e12f5b3985340416302a2208bceb", "sources": [ "DBLP" ], "title": "Data-driven serverless functions for object storage", "venue": "Middleware", "year": 2017 }, "86c78adba2c8791d3d3e70c927504b2c7e259528": { "authors": [ { "ids": [ "2388918" ], "name": "Simon Kuenzer" }, { "ids": [ "3118329" ], "name": "Anton Ivanov" }, { "ids": [ "2297493" ], "name": "Filipe Manco" }, { "ids": [ "8627779" ], "name": "Jose Mendes" }, { "ids": [ "10414012" ], "name": "Yuri Volchkov" }, { "ids": [ "40279556" ], "name": "Florian Schmidt" }, { "ids": [ "2613133" ], "name": "Kenichi Yasukata" }, { "ids": [ "1895414" ], "name": "Michio Honda" }, { "ids": [ "2963590" ], "name": "Felipe Huici" } ], "doi": "10.1145/3050748.3050757", "doiUrl": "https://doi.org/10.1145/3050748.3050757", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Central processing unit", "Change detection and notification", "GB-PVR", "Hypervisor", "Simulation", "Uptime", "X86" ], "id": "86c78adba2c8791d3d3e70c927504b2c7e259528", "inCitations": [ "a46da31446c98c59d88e8cf626d0dd917b12d93e", "c01f0c08545d3abf85b59c15f0bb7ce0dc03ebcb", "70c4f0403d80427e10c7e7167f814ec0bb12d18f", "d5f1b82f43c51b0ba5aeb7132457a58b54b8308d" ], "journalName": "", "journalPages": "15-29", "journalVolume": "", "outCitations": [ "592527217d62cf196df1545a555165dc9bc8fa3d", "0387c89a21e113eb69fcde8a11c82a072e3a1af1", "0719fda19d92123715c98f872fb857fec7803aba", "c7f3a75479a42b242e9c9ebcd17b521d850e836a", "7da24bcadf4df972057c208372293e2da68fd8ac", "0a7d32cd957055bcc475edaa97d5e4890eec0446", "5017593f65d08a4b6072f60af89ff961ec76c9df", "38fb66c959ef2bdfb11c6ac7c7ca37d635d1dafc", "4cead48e2eac91560105871b78268e3164eb382b", "4ab925447eda3627162919aee0b4863f0cd75c80", "60141685abac0d670eac915a0acbc258ccf235a4", "380645605e14aa824bdbd50bab86566def7221d1", "35f1dbf5079b2383bc580161fcddbef960e64e2c", "048d6cee0d6764932f886d93e641235e7e7a60fa", "a93cfc7d891492c9bfdfe129b8077b3a2f41fe08", "03d1048cff1e12d46f6aa382bb438b06f6c3a2ad", "7129b305ce45f83127e928e8510da9fae0783905", "5b7561f44f95ab68dbedb839849cbe72313aef20", "266c9e256ee30065765aba238cabd8c5b029372a", "65fd142f37c315cdf892184f8fb21281b88f6269", "c20e9557baa6333832e83e5b4cbbb6a359fa6885", "2f57f5aa940bd000c45e12431e258dc6d5de9acd", "76ca7b8a3c66a2381ee3d58d5e6e05e89cb1ca2a", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "0860bc34aac8a304674aa4c205ff46e6dbc93295", "05e00789898c6d964d8aaedd5c3acd4f9c7c4d6f", "5cfc936d12bbd8a0f100687b12b20e406215f30a" ], "paperAbstract": "Video streaming dominates the Internet's overall traffic mix, with reports stating that it will constitute 90% of all consumer traffic by 2019. Most of this video is delivered by Content Delivery Networks (CDNs), and, while they optimize QoE metrics such as buffering ratio and start-up time, no single CDN provides optimal performance. In this paper we make the case for elastic CDNs, the ability to build virtual CDNs on-the-fly on top of shared, third-party infrastructure at a scale. To bring this idea closer to reality we begin by large-scale simulations to quantify the effects that elastic CDNs would have if deployed, and build and evaluate MiniCache, a specialized, minimalistic virtualized content cache that runs on the Xen hypervisor. MiniCache is able to serve content at rates of up to 32 Gb/s and handle up to 600K reqs/sec on a single CPU core, as well as boot in about 90 milliseconds on x86 and around 370 milliseconds on ARM32.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050757" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/86c78adba2c8791d3d3e70c927504b2c7e259528", "sources": [ "DBLP" ], "title": "Unikernels Everywhere: The Case for Elastic CDNs", "venue": "VEE", "year": 2017 }, "870a7cddc838d827d997d92536387a09bdbc4377": { "authors": [ { "ids": [ "1680158" ], "name": "Glauber D. Gon\u00e7alves" }, { "ids": [ "1743723" ], "name": "Alex Borges Vieira" }, { "ids": [ "2553058" ], "name": "Idilio Drago" }, { "ids": [ "2357626" ], "name": "Ana Paula Couto da Silva" }, { "ids": [ "1714923" ], "name": "Jussara M. Almeida" } ], "doi": "10.1109/MASCOTS.2017.32", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.32", "entities": [ "Backup", "Cloud computing", "Cloud storage", "Personal cloud", "Type of service" ], "id": "870a7cddc838d827d997d92536387a09bdbc4377", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "32-42", "journalVolume": "", "outCitations": [ "92e536c1789bf301f456b01590006c9a3eff6cd8", "b582db57d2f9311633e2c4ed9a08518b08d4ef3d", "002efcf9f0b58af153556b84395a37f6171195da", "9aa049a9fe55bc8302543f0e7ed5515103b9b147", "5e19bba3330339df1c82ddb729b3984cef3e110b", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "6fc719f39cd4d26d6712220f93c3d35cd188cc1e", "e20557f44f9a2b5f6f7f0039397b8111d56583b7", "df58f969ff76d4cd710e0214a7e01e3ae2e3f8fb", "74879c098d285c5a5b08789bd737b2991fbea178", "a09a0aa9b8b88b0c6e26259cb5b3a09a5d7d67f0", "61d9c952d8f4079b86bae2f2e27ecaabca7433a2", "419ac059fa30761dd35cf83e6204c569199da83b", "3e38a20d5f7d9897a2b6d382db3595589c6ec9f1", "26c9c9f0cbcb45d2d399913d3157c6f49f4f788d", "bda8c614298ae73d612709d9cd42fcd33cf78aa6", "056cc5ef214b4e7d3c258878e4346ce7b14949ca", "d9d37fa441d9c1a5832658306886380f67abac12", "9902e03feb04a8710ed6022275e9b964b1efe146" ], "paperAbstract": "Personal Cloud Storage (PCS) is a very popular Internet service. It allows users to backup data to the cloud as well as to perform collaborative work while sharing content. Notably, content sharing is a key feature for PCS users. It however comes with extra costs for service providers, as shared files must be synchronized to multiple user devices, generating more downloads from cloud servers. Despite the increasing interest in this type of service, a thorough investigation on the costs and benefits of PCS for service providers and end users has not been conducted yet. To that end, we propose a model to analyze cost-benefit tradeoffs for both parties. We develop utility functions that capture, in an abstract level, the satisfaction of the service provider and users in various scenarios. Then, we apply our model to evaluate alternative policies for content sharing in PCS. We consider two alternative policies for the current PCS sharing architecture, which count on user collaboration to reduce providers' costs. Our results show that such policies are advantageous for providers and users, leading to 39% utility improvements for both parties, while requiring low commitment of resources from participating users.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/870a7cddc838d827d997d92536387a09bdbc4377", "sources": [ "DBLP" ], "title": "Cost-Benefit Tradeoffs of Content Sharing in Personal Cloud Storage", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "87233e22b61c41b541e50e6a37bdcafead11ef73": { "authors": [ { "ids": [ "1779294" ], "name": "Riko Jacob" }, { "ids": [ "1731269" ], "name": "Nodari Sitchinava" } ], "doi": "10.1145/3087556.3087583", "doiUrl": "https://doi.org/10.1145/3087556.3087583", "entities": [ "Algorithm", "Non-volatile memory", "Sorting", "Sorting algorithm", "Sparse matrix", "Volatile memory" ], "id": "87233e22b61c41b541e50e6a37bdcafead11ef73", "inCitations": [ "55d8957711bedf5968864cacb61e44d7fc4d96ab", "5f44b649b3411fd1e10b517f50f86a7dbd6e0302" ], "journalName": "", "journalPages": "247-254", "journalVolume": "", "outCitations": [ "24aeb80a7c397a0e147ad700fcf578c0324551d5", "1619e740a4e1b98f26cc55a81f3a183fb59d2c7b", "8149e88266b636bef2f6998c3e9361e6fdea7692", "0e216e95f17f64ff18cd50463dd8ec023aa08248", "3339acf7d66a3818bf3eaebdb685ea57d6d62e14", "e94cd229f39aec66a9602c05225b87bc8925810d", "039124197fac7a16e36611d8beed94524dd5fed5", "377084771036256909a7032b80aeeca8c268ce67", "007a3dfb2ae0778b99fdb21a6911d80a8c59dbca", "29b8666fd06384a14b096191814a8530f46c4147", "8eaa45df0a85bf7fda455cf7f1699cdfe0de1288", "b95d5b63733f2ee9287d2e1a78fbd8e331cfd3cf", "3e69317455f7db9b1325239c6f6f52cbe29a5491", "ef387050a4d0b8e870464514c2311413e6c4ac95", "175b2231faf668bae01706bb357218f99de39099" ], "paperAbstract": "Motivated by the asymmetric read and write costs of emerging non-volatile memory technologies, we study lower bounds for the problems of sorting, permuting and multiplying a sparse matrix by a dense vector in the asymmetric external memory model (AEM). Given an AEM with internal (symmetric) memory of size M, transfers between symmetric and asymmetric memory in blocks of size B and the ratio ω between write and read costs, we show Ω(min (N, ωN/B logω M/B N/B) lower bound for the cost of permuting N input elements. This lower bound also applies to the problem of sorting N elements. This proves that the existing sorting algorithms in the AEM model are optimal to within a constant factor for reasonable ranges of parameters N, M, B, and ω. We also show a lower bound of Ω(min {H, ω H/B logω M/B N/ max{δ ,M}}) for the cost of multiplying an N x N matrix with at most H= δ N non-empty entries by a vector with N elements.", "pdfUrls": [ "http://www2.hawaii.edu/~nodari/pubs/17-spaa.pdf", "http://doi.acm.org/10.1145/3087556.3087583" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/87233e22b61c41b541e50e6a37bdcafead11ef73", "sources": [ "DBLP" ], "title": "Lower Bounds in the Asymmetric External Memory Model", "venue": "SPAA", "year": 2017 }, "8755fd59b74028b0bd45e9b5c355c64c0c70af04": { "authors": [ { "ids": [ "2459613" ], "name": "Jonathan Eastep" }, { "ids": [ "29779938" ], "name": "Steve Sylvester" }, { "ids": [ "1866337" ], "name": "Christopher Cantalupo" }, { "ids": [ "16916763" ], "name": "Brad Geltz" }, { "ids": [ "3042670" ], "name": "Federico Ardanaz" }, { "ids": [ "40039182" ], "name": "Asma Al-Rawi" }, { "ids": [ "2142084" ], "name": "Kelly Livingston" }, { "ids": [ "1705061" ], "name": "Fuat Keceli" }, { "ids": [ "2975537" ], "name": "Matthias Maiterth" }, { "ids": [ "38308455" ], "name": "Siddhartha Jana" } ], "doi": "10.1007/978-3-319-58667-0_21", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_21", "entities": [ "Advanced Configuration and Power Interface", "Control system", "Dynamic voltage scaling", "Open-source software", "Perf (Linux)", "Plug-in (computing)", "Power management", "Power supply", "Procurement", "Program optimization", "Runtime system", "Scalability", "Scheduling (computing)", "Software system", "Xeon Phi" ], "id": "8755fd59b74028b0bd45e9b5c355c64c0c70af04", "inCitations": [ "b4c32173842274974b5724d2fefd14efa0a67077", "204ed869f69468d2c88ff64f67300d810f686c1a", "c828d212e897354f926e4c1a724bd01c20ccebba", "ca83bbc1d1ea5b5a829eed9acd638c626af2aa1d" ], "journalName": "", "journalPages": "394-412", "journalVolume": "", "outCitations": [ "1e8233a8c8271c3278f1b84bed368145c0034a35", "68eec7c5cd770a7d0af62f6856263bc675998fb0", "12f815cc078020d26f57cfd86d1e4cd18fbc158a", "244030cb8e73144251ef3701ac758168031d17f9", "dd97355244bb2e1b369be0b2617e8452710ca44b", "14bd3627a85b658ea1b8450039df7fe0fb57379e", "81c4e99059104b00adc14f6797758aff998c066d", "025b0273eb6ccd57e6a949fe44225ca5d8041cf9", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "51afdc23a72d19f7e6b1d46d25f7c7bb1814e85b", "0acc688828bc8397d885f61cf9b7d4e4dcd7a119", "429d28998216da5648f40248bf4bc9e508edd2fd", "f103c1775462f4409ae15818cfa0a761e282d324", "346ee93e610a95c60394900f857d398bc2ae74df", "3875d86a9dc765b5ae2e66dc46d0da58ded6d75c", "59b709fe6377d2332fe396bf25e9b65a10c1062c", "7e757fff66a63b268da83ffccf464437492ac8b6" ], "paperAbstract": "Performance of future large-scale HPC systems will be limited by costs associated with scaling power. Some HPC centers are reaching the limits of their existent site power delivery infrastructure and are facing prohibitive upgrade costs. Others are reaching budgetary limits on their energy operating costs. Without a breakthrough in energy efficiency, the HPC industry may fail to maintain historical performance scaling rates and fall short of 2018-2020 Exascale performance goals by an estimated 23x margin. Overcoming this gap will require co-designed hardware and software system energy management solutions and increased collaboration between hardware vendors and the HPC software community. In this work, we introduce the Global Extensible Open Power Manager (GEOPM): a tree-hierarchical, plug-in extensible, open source runtime framework that we are contributing to the HPC community to accelerate collaboration and research toward co-designed energy management solutions. First results with an experimental power rebalancing optimization demonstrate up to 32% improvements in the runtime of CORAL system procurement benchmarks like miniFE and Nekbone in a power-limited Xeon Phi cluster. These promising initial results motivate further work with the community to extend GEOPM to new optimization strategies to achieve further speedups. Keywords\u2014runtime systems; scalability; control systems; tuning; power management; RAPL; P-states; DVFS; resource management; power-aware scheduling; performance optimization", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_21", "http://www.dcs.warwick.ac.uk/pmbs/pmbs/PMBS/papers/paper6.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8755/fd59b74028b0bd45e9b5c355c64c0c70af04.pdf", "s2Url": "https://semanticscholar.org/paper/8755fd59b74028b0bd45e9b5c355c64c0c70af04", "sources": [ "DBLP" ], "title": "Global Extensible Open Power Manager: A Vehicle for HPC Community Collaboration on Co-Designed Energy Management Solutions", "venue": "ISC", "year": 2017 }, "876186bfd05bdd01c1f2ea288c532b16b8a0694f": { "authors": [ { "ids": [ "17804514" ], "name": "Wonbae Kim" }, { "ids": [ "1716765" ], "name": "Young-ri Choi" }, { "ids": [ "1739708" ], "name": "Beomseok Nam" } ], "doi": "10.1109/CLOUD.2017.35", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.35", "entities": [ "Apache Hadoop", "Curiously recurring template pattern", "Distributed computing", "Experiment", "Fairness measure", "Initialization (programming)", "Job scheduler", "Load balancing (computing)", "Overhead projector", "Scheduling (computing)" ], "id": "876186bfd05bdd01c1f2ea288c532b16b8a0694f", "inCitations": [ "17d77d5e2db5b9aaf54b8240f829b1d4f077df29" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "214-221", "journalVolume": "", "outCitations": [ "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "42b7e5b7337b2e0f0a440d90b40c4085e128c0fa", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "16139ba6fa6ad2828c20abdf5d9f34687836f932", "8f634a90ffa17c2fe32834828b0b8d10ef551744", "7c3e88b0c762065bd0d974cb3d67a1e61479f647", "35e8655b2c8845d607fc14ca12a42311dc30c379", "41e0ef45114deff41d92c8b235393cfe4d9a94f4", "036d544defb7f8e6297bd4c57a3b430d04a269e8", "702ba56d3f4aa529b8b1ccec4b47a0a81130d5fa", "1d27d04e8cef4d32cb4e022c9f493a40a019f59f", "47947ed7d4c12855b1b5a4c4ec3123528761d64b", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "Hadoop clusters have been transitioning from a dedicated cluster environment to a shared cluster environment. This trend has resulted in the YARN container abstraction that isolates computing tasks from physical resources. With YARN containers, Hadoop has expanded to support various distributed frameworks. However, it has been reported that Hadoop tasks suffer from a significant overhead of container relaunch. In order to reduce the container overhead without making significant changes to the existing YARN framework, we propose leveraging the input split, which is the logical representation of physical HDFS blocks. Our assorted block coalescing scheme combines multiple HDFS blocks and creates large input splits of various sizes, reducing the number of containers and their initialization overhead. Our experimental study shows the assorted block coalescing scheme reduces the container overhead by a large margin while it achieves good load balance and job scheduling fairness without impairing the degree of overlap between map phase and reduce phase.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/876186bfd05bdd01c1f2ea288c532b16b8a0694f", "sources": [ "DBLP" ], "title": "Coalescing HDFS Blocks to Avoid Recurring YARN Container Overhead", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "877c0f4dfcd789ea602a0fdfcc86d63def74aea1": { "authors": [ { "ids": [ "2190980" ], "name": "Jaewon Lee" }, { "ids": [ "2885986" ], "name": "Hanhwi Jang" }, { "ids": [ "2865235" ], "name": "Jae-Eon Jo" }, { "ids": [ "20862008" ], "name": "Gyu-hyeon Lee" }, { "ids": [ "2287483" ], "name": "Jangwoo Kim" } ], "doi": "10.1109/ISPASS.2017.7975292", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975292", "entities": [ "Code::Blocks", "Configuration management", "Emulator", "Memcached", "Parsec (parser)", "Simulation", "Speedup", "Systems modeling", "Throughput" ], "id": "877c0f4dfcd789ea602a0fdfcc86d63def74aea1", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "205-216", "journalVolume": "", "outCitations": [ "01ac84ffb4b7f575ea0705181795f4fd2368f519", "103d493e5c7bd28b5a8cf604f62877a96ff25bfb", "54f3331b575b2d451c2d716f86496cada23d596d", "a422d4fe122fb878394fc79c332a38535842e58c", "6b406760f69fc0e4b8412ea4b864c345a9540b3b", "4ff8d44338a2be192db0303b04c65214fdee0fb9", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "09cbad69073a35028379ffc3da50e129ab29a523", "41965de84461726a0e12296acd667a442c8eba25", "9fffbc5a4ceba903bf91443405814e28dbcddd64", "79ff6d26643770fecefe08d7bf1ec504ae465bc8", "5c40cdb6386021d68288e7158e1330ad3b8c223e", "338e0e131b48d214e9995e85237a539b44d03367", "1f3a55ec43dc506c95f263506a3d74248c4c3fc7", "bb6cedd67b26fce1f0d8eacb0357658c6831586d", "246be658a2ce791070a440cfc965a3ddac325c18", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "5370b45fb252c6a31497dd56f3d82afff7b62281", "3c66994ac5c16064132e3f241b0fec97092e6164", "35a331c7be9808df2e0086c2bf17d9b65b518aae", "686f66945a214d0da8778e64ceee0ed15e9fd73d", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "60a45695845e3f1e5dd8d7a886b23fff89c295bc", "171fa4b28a6d01971df4df46d4274e518c507906", "02514ff22c82354b0ebb065dcb604c016e67a15e", "c3e38e83f34265c0c4e2dd4f539e3d4bbf7bc661", "2960c89331eb7afa86584792e2e11dbf6a125820", "0653e2ed9f683868cb4539eb8718551242834f6b", "047a8db8654292560b5d023a8ef61cd335938822", "b77e7ae60aed8f307075c5a261274938da41e1e8", "6b1793a272eb7920bc29e32a707ec65ab3b1eaa6", "0884c53895371aaf2d3f5e5decb150323cbe3f9d", "3bf23f74bf33ed52f7c28587fab315610b27221a", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "f29dac2e26273532c81c933f091c7a60b9480f94" ], "paperAbstract": "Computer architects use a variety of workloads to measure system performance. For many workloads, the workload configuration determines the stress applied to the system and the corresponding performance behavior. Therefore, architects must make great efforts to explore and find the correct workload configuration before performing detailed analysis. However, such explorations become impossible for indevelopment systems which exist only as a software model. The existing system modeling platforms are either accurate but too slow, or fast but inaccurate to get workload-reported performance metrics (e.g., latency and throughput) which are necessary for configuring workloads. In this paper, we propose StressRight, a method to quickly model the first-order performance of full-system workloads and reconstruct the workload-reported performance metrics. Stress-Right allows to explore how the workload configurations affect the stress and performance. The key idea is to execute workloads on a fast but timing-agnostic platform (e.g. emulators), and efficiently reconstruct the timing/performance details by analyzing only the unique code blocks. Our evaluation using memcached and PARSEC shows that StressRight achieves 8∼45x speedup compared to a cycle-level simulator while maintaining good accuracy.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975292" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/877c0f4dfcd789ea602a0fdfcc86d63def74aea1", "sources": [ "DBLP" ], "title": "StressRight: Finding the right stress for accurate in-development system evaluation", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "8792056eb7d2c917347747a8135d80ab0ebd12f7": { "authors": [ { "ids": [ "1699666" ], "name": "Stefan Dobrev" }, { "ids": [ "2652176" ], "name": "Manuel Lafond" }, { "ids": [ "1704652" ], "name": "Lata Narayanan" }, { "ids": [ "1759125" ], "name": "Jaroslav Opatrny" } ], "doi": "10.1145/3087556.3087577", "doiUrl": "https://doi.org/10.1145/3087556.3087577", "entities": [ "Algorithm", "Centralisation", "Graph (discrete mathematics)", "Greedy algorithm", "Routing", "Shortest path problem" ], "id": "8792056eb7d2c917347747a8135d80ab0ebd12f7", "inCitations": [ "b7bf2d75bdf2087af19b1455decd2f7160e74a6f", "f4e2990ff8f44da04a7379baba6db85139d04700" ], "journalName": "", "journalPages": "265-274", "journalVolume": "", "outCitations": [ "f4e2990ff8f44da04a7379baba6db85139d04700", "2ed3b3b398740f33be36783ece0694190257040d", "661f02a2e9497cb0a9163a20b79dfab3a359ee56", "396c28e14c8197f2302ff50ee25385efe1dd9bd1", "49fa7db35a62a0ada3a84ee08bc83a7f86443dfb", "94da6d584da40f8a1816f202097a6a618bb06360", "4fb7122fa65ae1e6ae1844c5ef092f03110a67b3", "aa0c4e33ebbf3e6e41da7d5d2b7c713cd0a8a006", "b7bf2d75bdf2087af19b1455decd2f7160e74a6f", "9a6211226f356f13c81824c3afc45a764a6cb9d5", "0aaa66cb549734575b2a0ad77a10c34de29f7199" ], "paperAbstract": "We consider a problem of routing on directed paths and trees to a single destination, with rate-limited, adversarial traffic. In particular, we focus on local buffer management algorithms that ensure no packet loss, while minimizing the size of the required buffers. While a centralized algorithm for the problem that uses constant-sized buffers has been recently shown [21], there is no known local algorithm that achieves a sub-linear buffer size. In this paper we show tight bounds for the maximum buffer size needed by l-local algorithms for information gathering on directed paths and trees, where an algorithm is called l-local if the decision made by each node v depends only on the sizes of the buffers at most l hops away from v.\n We show three main results: A lower bound of Ω(c log n/l) for all l-local algorithms on both directed and undirected paths, where c is an upper bound on the link capacity and injection rate. A surprisingly simple 1-local algorithm for directed paths that uses buffers of size O(log n), when c=1. A natural 2-local extension of this algorithm to directed trees, for c=1, with the same asymptotic bound.\n Our Ω(log n) lower bound is significantly lower than the Ω(n) lower bound for greedy algorithms, and perhaps surprisingly, there is a matching upper bound. The algorithm that achieves it can be summarized in two lines: If the size of your buffer is odd, forward a message if your successor's buffer size is equal or lower. If your buffer size is even, forward a message only if your successor's buffer size is strictly lower. For trees, a simple arbitration between siblings is added.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087577" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8792056eb7d2c917347747a8135d80ab0ebd12f7", "sources": [ "DBLP" ], "title": "Optimal Local Buffer Management for Information Gathering with Adversarial Traffic", "venue": "SPAA", "year": 2017 }, "87c6db39af058f542183ad7f126f46c38366aba6": { "authors": [ { "ids": [ "1800925" ], "name": "Xiang Lian" }, { "ids": [ "5570760" ], "name": "Dongchul Kim" } ], "doi": "10.1145/3035918.3035929", "doiUrl": "https://doi.org/10.1145/3035918.3035929", "entities": [ "Algorithm", "Bioinformatics", "Bioinformatics", "Biological network", "Database", "Experiment", "Gene regulatory network", "Hoc (programming language)", "Microarray", "Network analysis (electrical circuits)", "Synthetic data" ], "id": "87c6db39af058f542183ad7f126f46c38366aba6", "inCitations": [], "journalName": "", "journalPages": "359-373", "journalVolume": "", "outCitations": [ "20b5fc20821968a2e990183ee4613c591951597c", "3a19613b19af47305a73d35830773bef47e606ed", "774db16a3f25a73ceda9e6ab4d5a8b8f3c40605d", "11e77315e127fa7196173cfa8296ebe0de83f026", "36c0b81a2ef2505e5c1c763c1abc25cdd72903f2", "36898244a91ddbae7bece6aad471889c0f2a2c83", "786a1ef5a7aebc0b50767802a0ef73c20838b6c0", "1e7c43bff2a19aeecab9ec0871e527ebb0a38f09", "262fe7be566e77c22ca09ee7b48201075d548293", "5198b8b18713f831e321b3b9ac9bbb21c9a03e86", "fe1fa4d0d0fcc0dee27f203a5ecf49debe94945e", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930", "ded1ab521c9839145040cb45c1f0c353536de8d5", "023d91d9a451523b550c60f12490d832e313b08f", "a80df730dbe0f90c752a9a4411bb4e4f375fda1d", "37af9a2655a6ecddfda65be1fbcf7ddd43e21446", "3fe077f1aba99d12c48c2b60de4736a65bff771c", "11fc1be0025f006a41304a1c2973358483c860e6", "26096f25cc8ebfefd71194644fa9fe7ae84f9b74", "362922ae8ce306b8f3fc3465072dd703b8bd5edb", "92e6e67b81528c2c4e1b70803847907d6b251ecd", "17ae114d87f8483f4cb4d7443614e9fe07c0b89b", "13145221ed525c4c77325db4377a6818e2f41e1f", "6eccbba04f448fa5bc93ed94bc63bb03d36e114c", "e49269783b1561bf936a3767747c3b2cf059533a", "bbac86e074504ca060553bfdb7953b0531ab4f2a", "71fb410156fd52066e00eb1d0670bee3720b4b72", "4f20828f487b093898c989f1043cb0d81b583d5e" ], "paperAbstract": "In many real applications such as bioinformatics and biological network analysis, it has always been an important, yet challenging, topic to accurately infer/reconstruct gene regulatory networks (GRNs) from microarray data, and efficiently identify those matching GRNs with similar interaction structures for potential disease analysis and treatment tasks. Motivated by this, in this paper, we formalize the problem of ad-hoc inference and matching over gene regulatory networks (IM-GRN), which deciphers ad-hoc GRN graph structures online from gene feature databases (without full GRN materializations), and retrieves the inferred GRNs that are subgraph-isomorphic to a query GRN graph with high confidences. Specifically, we propose a novel probabilistic score to measure the possible interaction between any two genes (inferred from gene feature vectors), and thus model GRNs by probabilistic graphs, containing edge existence probabilities. In order to efficiently process IM-GRN queries, we propose effective reduction, pruning, and embedding strategies to significantly reduce the search space of GRN inference and matching, without materializing all GRNs. We also present an effective indexing mechanism and an efficient IM-GRN query processing algorithm by the index traversal. Finally, extensive experiments have been conducted to verify the efficiency and effectiveness of our proposed IM-GRN query answering approaches over real/synthetic GRN data sets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035929" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/87c6db39af058f542183ad7f126f46c38366aba6", "sources": [ "DBLP" ], "title": "Efficient Ad-Hoc Graph Inference and Matching in Biological Databases", "venue": "SIGMOD Conference", "year": 2017 }, "87cd0d02a934acfd7a24a0c0915b901100fa21ac": { "authors": [ { "ids": [ "9511562" ], "name": "Joy Rahman" }, { "ids": [ "34718613" ], "name": "Palden Lama" } ], "doi": "10.1109/MASCOTS.2017.18", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.18", "entities": [ "Apache Hadoop", "Big data", "Cloud computing", "Cloud storage", "Data-intensive computing", "Job scheduler", "Jumpstart Our Business Startups Act", "Machine learning", "Multiplexing", "Out-of-order execution", "Provisioning", "Response time (technology)", "Scalability", "Scheduling (computing)", "Service-level agreement", "Swift (programming language)", "Testbed", "Throughput" ], "id": "87cd0d02a934acfd7a24a0c0915b901100fa21ac", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "43-52", "journalVolume": "", "outCitations": [ "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "dbd187b75b231bc4bb28efee74d5841a9fdd2bcf", "e2aa118e2deb14e9d99e8b6faa683a04f7ea8fa0", "c313e177616625b6c5adf1dbaf96f95abf68b042", "ca08b8b63dd2c04a92a259cc60f9e50400833dd4", "77c780ed48a64c335966aa9f780f04faa548881d", "38b656941f8cf65a6b2ae6e3f672601a2b092bf5", "054590ce7e8826e018982aac10214d9947959784", "09f0751d7452cd0480d572171593d07996325fcb", "5e0c7138ca73b265c6d822ae1f93f9bb101da107", "2da760f90c3d2bf6598becdde9063093f488548c", "830ee8d87a3f2ef969d34c1dc7224d1b3dca6c1b", "2a30361f6cdc7a0636ae4a5fd29132b2053f5f0e", "3bf64462fc3558ab7e9329d084a1af4cf0c87ebf", "4168306ef07c1f8997c2001af0e3ae0569dfda6d", "6d44790b6d952eff28f302998e8121f90786e3ff", "044604ae6a0ba104f6ad5bc18a4f1dede23c17fe", "536095c3b712a60e197cbe136e785a159c697dd7", "91509396b1f7f250a777cbf8db17149bb58f2cb2", "12481927d7d78e6f231c24a708406943fa3f863d", "28eb88b180674f43381ede3e9573689496cfd321" ], "paperAbstract": "Cloud-based services are increasingly popular for big data analytics due to the flexibility, scalability, and cost-effectiveness of provisioning elastic resources on-demand. However, data analytics-as-a-service suffers from the overheads of data movement between compute and storage clusters, due to their decoupled architecture in existing cloud infrastructure. In this work, we propose a novel approach of in-situ big data processing on cloud storage by dynamically offloading data-intensive jobs from compute cluster to storage cluster, and improve job throughput. However, it is challenging to achieve this goal since introducing additional workload on the storage cluster can significantly impact interactive web requests that fetch cloud storage data, with strict SLA (service-level agreement) for tail latency. In this work, we present MPLEX, a system that augments data analytics-as-a-service by efficiently multiplexing compute and storage cluster to improve job throughput without violating the SLA of cloud storage service in terms of tail response time. It applies an SLA-aware opportunistic job scheduling technique supported by a machine learning based prediction model to exploit the dynamic workload conditions in the compute, and storage cluster. Performance evaluations on an OpenStack Swift cluster, and an OpenStack based virtual cluster of Hadoop VMs built atop NSFCloud's Chameleon testbed show that MPLEX improves the Hadoop job throughput by up to 1.7X, while maintaining the SLA for cloud storage service requests.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/87cd0d02a934acfd7a24a0c0915b901100fa21ac", "sources": [ "DBLP" ], "title": "MPLEX: In-Situ Big Data Processing with Compute-Storage Multiplexing", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "87f1b5f55dec0e782513491aa5baf3eb43bbed5f": { "authors": [ { "ids": [ "1804361" ], "name": "Dong Dai" }, { "ids": [ "3519489" ], "name": "Yong Chen" }, { "ids": [ "2797656" ], "name": "Philip H. Carns" }, { "ids": [ "37234038" ], "name": "John Jenkins" }, { "ids": [ "40211322" ], "name": "Robert B. Ross" } ], "doi": "10.1109/PACT.2017.14", "doiUrl": "https://doi.org/10.1109/PACT.2017.14", "entities": [ "Business process", "High availability", "Jumpstart Our Business Startups Act", "Lightweight Portable Security", "Requirement", "Supercomputer", "Usability" ], "id": "87f1b5f55dec0e782513491aa5baf3eb43bbed5f", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "117-129", "journalVolume": "", "outCitations": [ "114f664a25c411b708c0f0058c3f455b5a1f3e9b", "49cd4b3efdc10090150523e14b36de7ec9ff8755", "47ea1f35e4ba08444628a3393bc58aafda9666df", "157d932bfc401b28b94e2bf642445b2b951237b7", "0323b626078b11e63509339771c20a7e283a1d70", "2077cc18da002721390a23392ce4a25d19c3e2a2", "67f1ddc1d044a9c68457514e67ff28a7e087dfd0", "6561b6a55114cc1da9f0f09d0396bb6b508a6fe9", "4577a47c29ce9f137eb642074d1f07c65709fb3c", "4181c0d8284d8737f61232365747690297e31c82", "05a618847e4f08e5bca29dff732757779722b2e0", "589e89d77f689ebfc3f36bc1f76fd518ae4a237c", "3f6e88b1eb1e06cdd742b6037c4ef61477a41aaf", "625272057c97231155f75dc622312f5d32978d4a", "6409e64aed68fcc9e3fdc35b87dd168eeb440d32", "5121837e40f54742fbd26503c7ca76e68ced467a", "9c7566f3820b0ae59adadc7249bb972668d0845d", "0c027f312b84ec40d0920dc912c4b591ab78d15e", "4578afb3d3108a9064f2299b47f2f32cb94926ee", "6beef89c6d38aaa26f1ddabfb0ad54d621094d6f", "1b4f194321f8f3219306a040c0d6d8c70dca1dcb", "48f7dfaafe60d40a115fda6bc143e3ca181e4657", "4612278f5ff220edf2a46404978626faf2ef4dbb", "e16f7aa1b63e29255a2fe8f9fb1159fb50f90e61", "2809d4876e34b8c64fc1783fe6a0a278770505b0", "a15412c4e8f4bf55abfbec4d19aeea604c5749da", "35339f6f2e99c04920f21883df1db8004436cdc7", "0bae160fd88c8422b303e6db2d5b99e17bf61bb9", "ddd526d70a299c782d31d8625d789b083c32dd8b" ], "paperAbstract": "Provenance describes detailed information about the history of a piece of data, containing the relationships among elements such as users, processes, jobs, and workflows that contribute to the existence of data. Provenance is key to supporting many data management functionalities that are increasingly important in operations such as identifying data sources, parameters, or assumptions behind a given result; auditing data usage; or understanding details about how inputs are transformed into outputs. Despite its importance, however, provenance support is largely underdeveloped in highly parallel architectures and systems. One major challenge is the demanding requirements of providing provenance service in situ. The need to remain lightweight and to be always on often conflicts with the need to be transparent and offer an accurate catalog of details regarding the applications and systems. To tackle this challenge, we introduce a lightweight provenance service, called LPS, for high-performance computing (HPC) systems. LPS leverages a kernel instrument mechanism to achieve transparency and introduces representative execution and flexible granularity to capture comprehensive provenance with controllable overhead. Extensive evaluations and use cases have confirmed its efficiency and usability. We believe that LPS can be integrated into current and future HPC systems to support a variety of data management needs.", "pdfUrls": [ "http://myweb.ttu.edu/ddai/papers/dong-pact-lps-talk-2017.pdf", "http://myweb.ttu.edu/ddai/papers/dong-pact-lps-2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/87f1b5f55dec0e782513491aa5baf3eb43bbed5f", "sources": [ "DBLP" ], "title": "Lightweight Provenance Service for High-Performance Computing", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "8814ba7515481fe16afc79cb41b05d4aa58f2df2": { "authors": [ { "ids": [ "8573809" ], "name": "Xulong Tang" }, { "ids": [ "40211216" ], "name": "Ashutosh Pattnaik" }, { "ids": [ "2127651" ], "name": "Huaipan Jiang" }, { "ids": [ "2163220" ], "name": "Onur Kayiran" }, { "ids": [ "2111543" ], "name": "Adwait Jog" }, { "ids": [ "33403976" ], "name": "Sreepathi Pai" }, { "ids": [ "1707927" ], "name": "Mohamed Ibrahim" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "8948708" ], "name": "Chita R. Das" } ], "doi": "10.1109/HPCA.2017.14", "doiUrl": "https://doi.org/10.1109/HPCA.2017.14", "entities": [ "Baseline (configuration management)", "Dynamic programming", "Graphics processing unit", "Kernel (operating system)", "Parallel computing", "Scheduling (computing)", "Speedup" ], "id": "8814ba7515481fe16afc79cb41b05d4aa58f2df2", "inCitations": [ "2d1b2392585b09297dd79a14ca3fb853133d64e3", "4d811c5cc0b805cd8b6ffe3a4e82bca9e1487e74", "bf811d9008d469c72f7e75f277d82136d553cc2b", "e967dbdb1236627b440e7fa2256c5ed27f5e0bb2", "7c036d5a4b79a735b279423358af4e8df6f7ec81" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "649-660", "journalVolume": "", "outCitations": [ "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "938574649516c7690ce05891ef499760b9a0553b", "157d5b2488d953b7c88abc36791c2e897c152395", "2379c027e7376bb76978602a7b185dfa73a9cd35", "40138cbd57a4632d6267cff4c91b55e7376a6693", "2462dc12b9ffda182ab894ee55938249420b81af", "30c0accd81025eb203d63fce539ff0a99be1b87f", "ad702abe479b86ed043f83d6475a2e82c4718c9a", "08104146873817cc35cbd96d7ca3e5169cb72296", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "0425f1e7e8651b5ba3c9e2eb98a3c50a07146972", "e91c7049e519a3d43fac7c1e75f71a5de5165938", "b298696bb75c3eec7a64746eacfa8fb262b38be6", "1fd8d8ffd5d478e4fbb530551de1a8e096fbc263", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "367d34d830482b349c73f373717a079d335c03e5", "5236710748ceb864a91e9fb4efac905114d8f1ef", "4339f17c10b91d2def6e16ab981d7b5428e6d82c", "5db195c9157a8178c89e69d413d08c1725a11267", "1d809d4ea4f22d9e0df6ba1549d87d8aa45512af", "eed632d5672f6445952c8dad122b4746846ef34b", "2d6f002477015469075954c6748a1a85af352c94", "14724c356106ae50746318b1bdd27d9b684c7d11", "eec6d4664256c49a9e312b17f7455121cac90b25", "a425a436ac21c8f913cc59a3ad7f8c17f57ed192", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "f359d33a1c09d2f626217e21f722508968c7057b", "347a08cd9ada1cee83713d24ec84ed49ab121987" ], "paperAbstract": "Dynamic parallelism (DP) is a promising feature for GPUs, which allows on-demand spawning of kernels on the GPU without any CPU intervention. However, this feature has two major drawbacks. First, the launching of GPU kernels can incur significant performance penalties. Second, dynamically-generated kernels are not always able to efficiently utilize the GPU cores due to hardware-limits. To address these two concerns cohesively, we propose SPAWN, a runtime framework that controls the dynamically-generated kernels, thereby directly reducing the associated launch overheads and queuing latency. Moreover, it allows a better mix of dynamically-generated and original (parent) kernels for the scheduler to effectively hide the remaining overheads and improve the utilization of the GPU resources. Our results show that, across 13 benchmarks, SPAWN achieves 69% and 57% speedup over the flat (non-DP) implementation and baseline DP, respectively.", "pdfUrls": [ "http://www.cse.psu.edu/hpcl/docs/2017_HPCA_Xulong.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.14", "http://xzt102.github.io/publications/2017_HPCA_Xulong.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8814ba7515481fe16afc79cb41b05d4aa58f2df2", "sources": [ "DBLP" ], "title": "Controlled Kernel Launch for Dynamic Parallelism in GPUs", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "883a39fe55b14c0b60fea777fc06f271e2966d44": { "authors": [ { "ids": [ "1758267" ], "name": "Xu Zhao" }, { "ids": [ "8024529" ], "name": "Kirk Rodrigues" }, { "ids": [ "38173241" ], "name": "Yu Luo" }, { "ids": [ "1696433" ], "name": "Michael Stumm" }, { "ids": [ "2042324" ], "name": "Ding Yuan" }, { "ids": [ "25462708" ], "name": "Yuanyuan Zhou" } ], "doi": "10.1145/3132747.3132778", "doiUrl": "https://doi.org/10.1145/3132747.3132778", "entities": [ "Algorithm", "Apache HBase", "Apache Hadoop", "Debugging", "Deployment environment", "Information theory", "Overhead projector", "Printing", "Programmer", "Word-sense disambiguation" ], "id": "883a39fe55b14c0b60fea777fc06f271e2966d44", "inCitations": [ "68f78b6630d89319ec17ec1e9178e739f7deec68" ], "journalName": "", "journalPages": "565-581", "journalVolume": "", "outCitations": [ "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "468bff8de97b7380ff497eea8eec3c9621218a40", "114801eccb5eb0831fd1848f351a138253a42f15", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "12a233efbdd874afdeb8a1e6fe71c4ccff758175", "0f28af5e2f0ec33a29c5b12e5e5be78c8f9d14e8", "63c2432b7f61357ba520e0bdccd07ec7afb4be61", "10da8673314188dd6ab1f16f73c05358771dd8cf", "8932c6d523f9f5aba76993c71c0089ba15155a7c", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "0f6863ca14fc33a87f03ac46051b39fe2541cdf1", "28f7f43774bce41023f9912a24219e33612a3842", "7e411405e8fbf47a6b585bbb310ff03cd4eb51ce", "2a85b683073c2c8b762079c52a0d54392b243afb", "0e578433d4e8bb2a571c87a2d22816074902f009", "959cfe05045e1c7e80406209244d3346061ca4e6", "29b4f606b66b3ab22d04a3fb9db1120d79123fb7", "9e819f5cd459026bcbe97ad42db0647b1dc7c6a9", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "6bb4c541452795997f894a16c0c184faf2a673f9", "4f739534a366799e170599d3ff3d65597f0118db", "55cde409703468c3b7bb1797738affb5f44c8f3f", "808fadaaa7d7091e95809f419959917bb6ce4a6d", "059476c845d38253efad824010ed15df14941d85", "3420487a5805d2cc8416ed7065568c96f2f26142", "4af63ed343df388b6353b6fc77c7137d27822bf4", "113772329678792fc2a3a8cb9322c164547f88a0", "37c04a742561ac2e2fd0069a9e2f92a048df4c0e", "9ee6209432316baf6776838917e06bca4d874747", "28227157274b1d2fa8f2b67ba459de24c15fd765" ], "paperAbstract": "When systems fail in production environments, log data is often the only information available to programmers for postmortem debugging. Consequently, programmers' decision on where to place a log printing statement is of crucial importance, as it directly affects how effective and efficient postmortem debugging can be. This paper presents Log20, a tool that determines a near optimal placement of log printing statements under the constraint of adding less than a specified amount of performance overhead. Log20 does this in an automated way without any human involvement. Guided by information theory, the core of our algorithm measures how effective each log printing statement is in disambiguating code paths. To do so, it uses the frequencies of different execution paths that are collected from a production environment by a low-overhead tracing library. We evaluated Log20 on HDFS, HBase, Cassandra, and ZooKeeper, and observed that Log20 is substantially more efficient in code path disambiguation compared to the developers' manually placed log printing statements. Log20 can also output a curve showing the trade-off between the informativeness of the logs and the performance slowdown, so that a developer can choose the right balance.", "pdfUrls": [ "http://www.eecg.toronto.edu/~yuan/papers/p125-Zhao.pdf", "http://doi.acm.org/10.1145/3132747.3132778", "http://log20.dsrg.utoronto.ca/log20_sosp17_paper.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/883a39fe55b14c0b60fea777fc06f271e2966d44", "sources": [ "DBLP" ], "title": "Log20: Fully Automated Optimal Placement of Log Printing Statements under Specified Overhead Threshold", "venue": "SOSP", "year": 2017 }, "884e104c13102e1353e85a6a91e41d3cff2c80f5": { "authors": [ { "ids": [ "1706667" ], "name": "Haibo Zhang" }, { "ids": [ "39786262" ], "name": "Prasanna Venkatesh Rengasamy" }, { "ids": [ "1896477" ], "name": "Shulin Zhao" }, { "ids": [ "2456813" ], "name": "Nachiappan Chidambaram Nachiappan" }, { "ids": [ "1743609" ], "name": "Anand Sivasubramaniam" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "2049950" ], "name": "Ravi Iyer" }, { "ids": [ "8948708" ], "name": "Chita R. Das" } ], "doi": "10.1145/3123939.3123948", "doiUrl": "https://doi.org/10.1145/3123939.3123948", "entities": [ "Baseline (configuration management)", "Cache (computing)", "End-to-end principle", "Handheld game console", "Macroblock", "Mobile device", "Stock and flow", "Streaming media", "Systems design", "Video decoder", "Video display controller", "Video processing" ], "id": "884e104c13102e1353e85a6a91e41d3cff2c80f5", "inCitations": [ "651ae380b5d500c613770dbf55c175c52576d7da", "2d1b2392585b09297dd79a14ca3fb853133d64e3", "0ec62e028c2088f1aef01f2e65d167faf1d3569e", "957e98a2084f6c2d22694aadd22f57070b5d7e23" ], "journalName": "", "journalPages": "517-531", "journalVolume": "", "outCitations": [ "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "4ebbbeab6e0f4ba9815889854441548fa414e16b", "32820f7d69f1ac020d2ce8bc5254725a7797f447", "d4f1440424385311516b6426aee852c9852f9bb7", "3aace85faabe82a9f4136db22a08a0fb67aa98e7", "8cfa975a656838356dc4b211b6c2186bc2601a05", "14d1e5f11fc26dc63145fc665a2567e816326114", "7c036d5a4b79a735b279423358af4e8df6f7ec81", "dbf51cedf0c1a31dbfae837c00157f2948d3b300", "96613e18a42af8184cfcd75dc44dbfcda26a402f", "086699da0528ed47463cea3108851bd3dc5ba715", "53b402418835e6f34b8a9e5ea51440bbdd02581e", "0d6ec6020d8e9e51bf7aa0dc6812c6c03d7fbf6d", "3af0a17e79061462dbc2823f64ea188136271713", "a3eb0826dd5d88669d506c0cbfb0f3dc90937fed", "5cdba218d49d0e7977c370e4e667fdb0925acc85", "19324b3315dd6649d3232d999d9cf71e76b0d43d", "082caa1f44381aa93a59b8ad257cea4e9657974a", "7068187c2c0f3804a9adcb399131da56ad8fcde5", "309ad0357af7722a24192781340881390055a3db", "013b529f4ec9c1d9ddcef88a1a1f4b0efcc0c9c5", "2c83f60dd38199526d155b5e5a3f8a1dcfc60543", "6fac8e881b5d754fc80bd3ad855a2a03511572f9", "29235e0e2406ba49ca29e02265d53991234f0113", "57ecf98b0dfd305fe058fdb43e7675f139c754e7", "abf7957411b47667f5044810a192f7b57deeb5f9", "221e8f04737a2ae514603df0bbf622d66dcdb4bf", "1b938edfde3b3b04c13599c2db87c72b7962f383", "482713333307098f425f563906413de1915f700d", "0ec62e028c2088f1aef01f2e65d167faf1d3569e", "174b4cb435c87e421c973ce59ccf5b06e09aa8af", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "02febacc509320c0390a18e4975132e3a44018e7", "1cb9820481d1974b7ef1eb9ee3eaa3fae381fffb", "b8dc9473470ab786a7ec425171d1f49992a571b5", "1914bdccc0b3c87fb92b47cf035acb6a2df84dbe", "33708fe61813316f2810616d28f94c4eac4036bb", "cf0547f03ead7ce36f24a9f055f87d81533b21ac", "14d7568e2095059cef48c1e31ffd9b65f06a1964", "c9675b105100e44caee24d721185b2a0022d8ada", "0fed2cde05ba2cad24909d4034ebcf0372192400", "12203385fbe8e26aefa1d82c9effaacb44f27a98", "6fc11128ac6b106280d76bf57eacc00236b3c517", "11f4defb917bd9eda6a87ac3fefc0c1b95724891", "71ba04c22515152b06f0fef4d785f8b8056e4a82", "2d1b2392585b09297dd79a14ca3fb853133d64e3", "83587c76d3486a94c4744479ce7b37f06d454ec1", "0653e2ed9f683868cb4539eb8718551242834f6b", "82ba8bacafb288f1e9c4bf375e41000fe2a3eef6", "cf64cdc889a4edaf641a307aa2b11d89d4d10a09", "87a4156fc53e76450b4766cea45edd4bb7e84b7d", "3364bc50921a9566d61ef8cb73baa82341725e4b", "00ab25c6582d543932fccbb0f15fe93445f95d61", "b4d2a829abb4a8ae8aac85eb7f2c506b666bfeb6", "7df306b302b529e2c8b49169e4885678e2bb6bc0", "094de78aeb52744f28b0c244bb2bf9ab1550cd35", "390ddad3234caab99c97cf23a6715e2bad5a637d", "549cca620961e5093e315a4b0f9e670da3ff258f" ], "paperAbstract": "Video streaming has become the most common application in handhelds and this trend is expected to grow in future to account for about 75% of all mobile data traffic by 2021. Thus, optimizing the performance and energy consumption of video processing in mobile devices is critical for sustaining the handheld market growth. In this paper, we propose three complementary techniques, race-to-sleep, content caching and display caching, to minimize the energy consumption of the video processing flows. Unlike the state-of-the-art frame-by-frame processing of a video decoder, the first scheme, race-to-sleep, uses two approaches, called batching of frames and frequency boosting to prolong its sleep state for saving energy, while avoiding any frame drops. The second scheme, content caching, exploits the content similarity of smaller video blocks, called macroblocks, to design a novel cache organization for reducing the memory pressure. The third scheme, in turn, takes advantage of content similarity at the display controller to facilitate display caching further improving energy efficiency. We integrate these three schemes for developing an end-to-end video processing framework and evaluate our design on a comprehensive mobile system design platform with a variety of video processing workloads. Our evaluations show that the proposed three techniques complement each other in improving performance by avoiding frame drops and reducing the energy consumption of video streaming applications by 21%, on average, compared to the current baseline design.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123948", "http://www.cse.psu.edu/hpcl/docs/2017_MICRO_Zhang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/884e104c13102e1353e85a6a91e41d3cff2c80f5", "sources": [ "DBLP" ], "title": "Race-to-sleep + content caching + display caching: a recipe for energy-efficient video streaming on handhelds", "venue": "MICRO", "year": 2017 }, "885f97b8c11ceae6a17e9a5027997b8c582579af": { "authors": [ { "ids": [ "1890969" ], "name": "Wen Sun" }, { "ids": [ "32194204" ], "name": "V\u00e9ronique Simon" }, { "ids": [ "2102057" ], "name": "S\u00e9bastien Monnet" }, { "ids": [ "2282672" ], "name": "Philippe Robert" }, { "ids": [ "1798389" ], "name": "Pierre Sens" } ], "doi": "10.1145/3084462", "doiUrl": "https://doi.org/10.1145/3084462", "entities": [ "Algorithm", "Apache Hadoop", "Balanced line", "Clustered file system", "Distributed computing", "Durability (database systems)", "Google File System", "Hard disk drive", "Persistence (computer science)", "Server (computing)", "Simulation", "Spatial variability" ], "id": "885f97b8c11ceae6a17e9a5027997b8c582579af", "inCitations": [], "journalName": "POMACS", "journalPages": "24:1-24:21", "journalVolume": "1", "outCitations": [ "57a313d10cab7b33bae04eab8a44a27dd0af3ec8", "a2f9c297485360bc46e3cd9cfd7561176290a7f3", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "415b66c3e229a7f4c6f19e0595e385f522c09153", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "ad18f5c7adc0072b2f1d2dfafabf44a67c4cffe2", "18a5f443299784479e78d9e77f175af57cb2fa2b", "1886edb4e771c1c0aa7bae360d7f3de23ac4ac8e", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "07745dca3ddfe267ccd7ad30cb6d4877f16389cc", "69fe4e4710c7a53b1dd36a5900b3a824707529e8" ], "paperAbstract": "Distributed storage systems such as Hadoop File System or Google File System (GFS) ensure data availability and durability using replication. Persistence is achieved by replicating the same data block on several nodes, and ensuring that a minimum number of copies are available on the system at any time. Whenever the contents of a node are lost, for instance due to a hard disk crash, the system regenerates the data blocks stored before the failure by transferring them from the remaining replicas. This paper is focused on the analysis of the efficiency of replication mechanism that determines the location of the copies of a given file at some server. The variability of the loads of the nodes of the network is investigated for several policies. Three replication mechanisms are tested against simulations in the context of a real implementation of a such a system: Random, Least Loaded and Power of Choice.\n The simulations show that some of these policies may lead to quite unbalanced situations: if β is the average number of copies per node it turns out that, at equilibrium, the load of the nodes may exhibit a high variability. It is shown in this paper that a simple variant of a power of choice type algorithm has a striking effect on the loads of the nodes: at equilibrium, the distribution of the load of a node has a bounded support, most of nodes have a load less than 2β which is an interesting property for the design of the storage space of these systems. Stochastic models are introduced and investigated to explain this interesting phenomenon.", "pdfUrls": [ "https://arxiv.org/pdf/1701.00335v2.pdf", "http://arxiv.org/abs/1701.00335", "https://arxiv.org/pdf/1701.00335v1.pdf", "http://doi.acm.org/10.1145/3078505.3078531", "http://doi.acm.org/10.1145/3084462" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/885f97b8c11ceae6a17e9a5027997b8c582579af", "sources": [ "DBLP" ], "title": "Analysis of a Stochastic Model of Replication in Large Distributed Storage Systems: A Mean-Field Approach", "venue": "SIGMETRICS", "year": 2017 }, "8862841f91bea97d39872eb2aa954bd6e6b570da": { "authors": [ { "ids": [ "2661057" ], "name": "Andrew Ferraiuolo" }, { "ids": [ "1741005" ], "name": "Rui Xu" }, { "ids": [ "2968310" ], "name": "Danfeng Zhang" }, { "ids": [ "1732157" ], "name": "Andrew C. Myers" }, { "ids": [ "2419859" ], "name": "G. Edward Suh" } ], "doi": "10.1145/3037697.3037739", "doiUrl": "https://doi.org/10.1145/3037697.3037739", "entities": [ "ARM architecture", "Central processing unit", "Computer security", "Data-flow analysis", "Experiment", "Hardware description language", "Information flow", "Information flow (information theory)", "Multi-core processor", "Open-source hardware", "Programmer", "Type system", "Verification and validation" ], "id": "8862841f91bea97d39872eb2aa954bd6e6b570da", "inCitations": [ "2125b5c2ce173e25a3faa37985439455578a1438", "9ca7bf8864df3ae5b340e4f5e8cc837bd270f9a0", "43ef02647ad28e9b31572dd47cce39a965336776", "b57e082caeb64bdaf30759287dfed6cb3fa6b484", "882a9148b069df982bc08e7b1a71523ee2010872", "5ec75be1f4e79eaadeb748ace4eccb310b26d8b0", "49e88c6bcaea88ddeccd6fb19fee950137819d3e", "210a8fcd5ceb4ebc3bd5778f5aec5d4ad1cae3f3", "5f675d1b2c9598b2ede2561421ecce7a0a9b7580" ], "journalName": "", "journalPages": "555-568", "journalVolume": "", "outCitations": [ "4204fad49d84c19156fa8b08bbf7942cde8f5aa1", "2cb02a9e43dfb7c6f4d33cd774eec7d94d2db680", "81983cc18985dc200316f3633445add123d0bda5", "03f02ace66cd558fa4dd465bc988edbe6cee8d5e", "09dce8e6947261600ec145f4544ede7ae5dc437e", "1fb49ae43195232f0b3d1c9d534a5aa03bdd8f26", "0e4b25d4b2a542ce7d459d7ae7bf8e5eb0367e25", "194f92ac984c8ec7d354c50ece491b3ae2e3bb51", "0025870ef15a8f2858ff4186329d4bde316e9e01", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "082a3d4886a28e046c92796f86dd7ec7f7e77d25", "2ed8c0aacd2d905a550a184fe861bfff6576281c", "83773eb750de5b5e126adec959dc9905aff2c232", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "3457738844208e1b4aeb2f1a0971ec39216e3000", "01fde8698110cf46ff48a17c65f2658dab4c323c", "19ca5f86807610a7aed1008155c7105e43808d4f", "092b09f0ec09b2b10763f5697ca77099a37ab022", "b4d9a259db32a92c162ed44e89d3a0cfdacd3f67", "5f2b22b77559ddb4f3734459d1ff66c58d22df12", "03ad81f6276792a78312471429fc9495b89a1ffc", "24e74d608559e176ad01f95b3b1f3221e474f357", "4ea1a23b31a0c3c6c63edb6d5e22943f3a214739", "b57d19d648800779764498c663d42c2ffc2264a9", "0547bd2bd244a16437e73080cce81a62ac30142b", "9396371baa0f755a6e766c12eb102a97a3bc5562", "0003ce240eb8c05cee9c56c54e16c0e3b84390dd", "2b6df21137f30d25494bb58521a6062f93e915f8", "18a41be780aa642c84a2c6850397fab147e0ac27", "454b134e0ad83921cbe13f4e4332c79b93aa7612", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "2b3cdf37bff57e29fb5aecc136603f16c855366b", "1924732c99bfdf5252b4220b7e5f98d744856661", "182bc7c29d1a96604121c1e2466eeb74a9ddca1f", "12a1c765683c65b4c63b0e2be0813ebe3552d7ab" ], "paperAbstract": "Hardware-based mechanisms for software isolation are becoming increasingly popular, but implementing these mechanisms correctly has proved difficult, undermining the root of security. This work introduces an effective way to formally verify important properties of such hardware security mechanisms. In our approach, hardware is developed using a lightweight security-typed hardware description language (HDL) that performs static information flow analysis. We show the practicality of our approach by implementing and verifying a simplified but realistic multi-core prototype of the ARM TrustZone architecture. To make the security-typed HDL expressive enough to verify a realistic processor, we develop new type system features. Our experiments suggest that information flow analysis is efficient, and programmer effort is modest. We also show that information flow constraints are an effective way to detect hardware vulnerabilities, including several found in commercial processors.", "pdfUrls": [ "https://cseweb.ucsd.edu/~dstefan/cse291-winter18/papers/ferraiuolo:verification-annotated.pdf", "http://doi.acm.org/10.1145/3037697.3037739", "https://tsg.ece.cornell.edu/lib/exe/fetch.php?media=pubs:secverilog-asplos17.pdf", "http://www.cs.cornell.edu/andru/papers/trustzone/asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8862841f91bea97d39872eb2aa954bd6e6b570da", "sources": [ "DBLP" ], "title": "Verification of a Practical Hardware Security Architecture Through Static Information Flow Analysis", "venue": "ASPLOS", "year": 2017 }, "88646d19e0111eed813e5cd326889556b0379d29": { "authors": [ { "ids": [ "1711920" ], "name": "Qi Zhu" }, { "ids": [ "40485705" ], "name": "Bo Wu" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" }, { "ids": [ "40391089" ], "name": "Li Shen" }, { "ids": [ "1690770" ], "name": "Zhiying Wang" } ], "doi": "10.1109/IPDPS.2017.124", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.124", "entities": [ "Algorithm", "Graphics processing unit", "Java Caps", "Jumpstart Our Business Startups Act", "Scheduling (computing)", "Throughput" ], "id": "88646d19e0111eed813e5cd326889556b0379d29", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "967-977", "journalVolume": "", "outCitations": [ "2ae2d80ffb19521bcd7fdbf26e9ed2a5d9641bb0", "cdf26f11e3280497b02d168aaa6edd7b820e7800", "7f4ae7c055c638a07ab1e4d5030be9007fcca3a9", "96def001f76a9254345ccc7ced9d1b81bbeed1b1", "34d67f1f2578013ec828f2840e28bc3961d8a98c", "aaa59e57b2f96ead6a61592e968c0b9907c6d128", "8823b1c6aee66ed0a36240a884a8a71578c5eacb", "60f068dea641df784a379411c57aa8f2b23d1a98", "c111caed42f318d59e3d30bfd875bcee8581652b", "02b141ddc423469afde9c99cf76028095ef28127", "45de7137919e20513db715056da4d853caec603a", "83bbec4d4f56b5631f48607b7b6c75a505a8b448", "f016d23ffca72cdf1eb584613452720eaacafd9c", "2f4b9ac4a0694f0b1681348334befba0bfe9d897", "8bd32accc1244ba9add521ca5605f397374de518", "3462fb38042f0bde20c758728d7c8c28a1f47e09", "4b30df44b074595ee73b67ea901e44c7c440ef73", "13cf7f6f047ab5bed7eb419a604dd1668f0ee9c1", "3ee47780011ee618bd5a64624a662375e1958e0a", "66f47514ca03fd4a52121836bdb767bb5fac95cc", "8db3c11cd85195f459b8ba82fe3326e8f86f1d52", "441f99e6b0e40ce36716252a0b0a1c97690170d1", "832573e996403371beb2403821df33d8082c1121", "0e935b1f654b5e3fa12f7dc6277bdc7f2ca36c9a", "96d2e5456b8d7b8ad763781a16b61beabf2d7fcf", "1108af609469e420aeae551ba8a893c3200e07fa", "839e7a4fdc95da41c34eafb4f5b0517ba8c759a6", "b04c9e851ae605592d693aa65f0d753b8af08feb", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "02308b39a7a53b6e313a7acbd1f1896292cebe01" ], "paperAbstract": "This paper presents the first systematic study on co-scheduling independent jobs on integrated CPU-GPU systems with power caps considered. It reveals the performance degradations caused by the co-run contentions at the levels of both memory and power. It then examines the problem of using job co-scheduling to alleviate the degradations in this less understood scenario. It offers several algorithms and a lightweight co-run performance and power predictive model for computing the performance bounds of the optimal co-schedules and finding appropriate schedules. Results show that the method can efficiently find co-schedules that significantly improve the system throughput (9-46% on average over the default schedules).", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.124", "https://people.engr.ncsu.edu/xshen5/Publications/ipdps17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/88646d19e0111eed813e5cd326889556b0379d29", "sources": [ "DBLP" ], "title": "Co-Run Scheduling with Power Cap on Integrated CPU-GPU Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "88697967428bfbe8cc36f2c9ba0e44b87495fe7f": { "authors": [ { "ids": [ "2423655" ], "name": "Petr Stefan" }, { "ids": [ "1881944" ], "name": "Vojtech Hork\u00fd" }, { "ids": [ "2605317" ], "name": "Lubom\u00edr Bulej" }, { "ids": [ "1763212" ], "name": "Petr Tuma" } ], "doi": "10.1145/3030207.3030226", "doiUrl": "https://doi.org/10.1145/3030207.3030226", "entities": [ "Java", "Performance Evaluation", "Software performance testing", "Unit testing" ], "id": "88697967428bfbe8cc36f2c9ba0e44b87495fe7f", "inCitations": [ "ee0b3bad0aa922146b57e82048f3a2b5dd12f991", "31dc16067f6733ac00d80bea78a065177396f8f1", "7c096c79fa4932c283cbc86732ef9cf2d724097e", "260fd481ee3896bc89e1b8e26d3cca2973bbbfd9" ], "journalName": "", "journalPages": "401-412", "journalVolume": "", "outCitations": [ "c6ae67e4be57f02b492fe12abf9776657f371cc4", "905e3daea4c2fb235c6cf840c48bcd94ef6b278f", "66869075d20858a2e9af144b2749a055c6b03177", "6c8a6d07fa5fa555aeb2864e32380bd17c36c6fd", "4ef336dc70511a5a1eb2d806c5f076936e2403fa", "d6a515ef291c09c53687c0e681292593804cdb86", "083d9e6c3dc08692ab35123ca4f16348a935f8c6", "d53abdc1a13158c7c1133c123d86288b2c33ddc1", "4fa8bacddbbb5f0bdd54ed61320ed883ec15bbe8", "3f6eb56461bb589604a0aeefc355ce7ea3345280", "96d6ababe5f2a407252adbd9f811a9a982e7b6ca", "7b1592460f7282039e0cd9b369adbd9f63a99e2e", "4bd1944714ca025237957053170206c14f940cbc", "4dcf5e7eed29c6707a8e1a415c5a6713a23c1d91", "3544e9c91b42485e0f1e45849173ab0a421045f6", "4e8709545e56f2caed2923a8a4cc8b2433758a99", "50f2a2413124910eb735f4a0ba38f6b73c4f38d6", "c141ffa34f6f298052cbb40e7e22f1482c2024a8", "ad2fbcd08d0b1bb33e2b21dd0d14a54857c08e0f", "0a8317fc7cd3fb07e69aa2a11342898e22fc2f47", "47fcd425e6e2a2c8ca059acf5c151a9da115c14c", "72935e6f33c37de679323b3e23793e87f67f3e05", "1609c49786d105376b0a31e4faeeeaa17a6940a7", "51bbe69041420c58ca00dd5241ae4757ce6241da", "730c13a11bd311ea8b2d43751f8806f424799240", "032f1a16ad4cd815ca5cbf3dbfca2714007a1a2e", "21edcc4fc055c2613c23673245d19dff17e73cdf" ], "paperAbstract": "Although methods and tools for unit testing of performance exist for over a decade, anecdotal evidence suggests unit testing of performance is not nearly as common as unit testing of functionality. We examine this situation in a study of GitHub projects written in Java, looking for occurrences of performance evaluation code in common performance testing frameworks. We quantify the use of such frameworks, identifying the most relevant performance testing approaches, and describe how we adjust the design of our SPL performance testing framework to follow these conclusions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030226" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/88697967428bfbe8cc36f2c9ba0e44b87495fe7f", "sources": [ "DBLP" ], "title": "Unit Testing Performance in Java Projects: Are We There Yet?", "venue": "ICPE", "year": 2017 }, "888985e15d2bd58baac82097b2f64f1c1d03fff9": { "authors": [ { "ids": [ "3299496" ], "name": "Jeff Rasley" }, { "ids": [ "1772774" ], "name": "Yuxiong He" }, { "ids": [ "38768999" ], "name": "Feng Yan" }, { "ids": [ "2537545" ], "name": "Olatunji Ruwase" }, { "ids": [ "9430958" ], "name": "Rodrigo Fonseca" } ], "doi": "10.1145/3135974.3135994", "doiUrl": "https://doi.org/10.1145/3135974.3135994", "entities": [ "Algorithm", "Deep learning", "Machine learning", "Scheduling (computing)", "Sparse matrix", "Speedup", "Statistical model" ], "id": "888985e15d2bd58baac82097b2f64f1c1d03fff9", "inCitations": [ "7a0b1f7fe39629360f1766a480dd8903065a2854", "0cc35eb6cf8c0405219089f94b1bb7453453cbe2", "a636fdb0028bd0bc8bd5c4980191c07a95f1d795", "ceb3a9decbe8d910299751831fb1b07be85d7cb8", "662d788d5c75dc789186440656c7693912c4b1b6" ], "journalName": "", "journalPages": "1-13", "journalVolume": "", "outCitations": [ "54cd614a15ce790e2144e45e160596efd36e6316", "04fe6b11280c79b91c060934be66856877e532c6", "464d94b3dc9a109dd64008a41a00181830f285aa", "52d97890dbc290108136739ec2afe0f2b6c4f570", "044f0b1d5d0b421abbc7569ba4cc4bf859fd9801", "3d25e227fc8c0e7247d7b4209d1c0820e2076341", "7950620b31a66aa8fd226a66c85686c72c4e137a", "007ab5528b3bd310a80d553cccad4b78dc496b02", "210b3ccdc5d43ff218f894695a6ee8f1ff71a32f", "217135d666e8349ba6d7312a37bd1dd166c098ec", "dba141eddbbaa86f86a9831c83641ff5a7a28861", "63936fa32f9e75ab2a864daae6791ce02112183d", "4bb6263d482d8f8f9fc8aa0146b70ddca971a671", "35cd36289610df4f221c309c4420036771fcb274", "d7a4fb554d070fb548a25aec8d1205f766c184a2", "7b2ce6eb25ab840936ae6bf2cd49bc7f20785bc4", "9e18015bffe5e5f0ed7240e7af7ed19a934ae32f", "043afbd936c95d0e33c4a391365893bd4102f1a7", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "4954fa180728932959997a4768411ff9136aac81", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "5d90f06bb70a0a3dced62413346235c02b1aa086", "722fcc35def20cfcca3ada76c8dd7a585d6de386" ], "paperAbstract": "The quality of machine learning (ML) and deep learning (DL) models are very sensitive to many different adjustable parameters that are set before training even begins, commonly called hyperparameters. Efficient hyperparameter exploration is of great importance to practitioners in order to find high-quality models with affordable time and cost. This is however a challenging process due to a huge search space, expensive training runtime, sparsity of good configurations, and scarcity of time and resources. We develop a scheduling algorithm POP that quickly identifies among promising, opportunistic and poor configurations of hyperparameters. It infuses probabilistic model-based classification with dynamic scheduling and early termination to jointly optimize quality and cost. We also build a comprehensive hyperparameter exploration infrastructure, HyperDrive, to support existing and future scheduling algorithms for a wide range of usage scenarios across different ML/DL frameworks and learning domains. We evaluate POP and HyperDrive using complex and deep models. The results show that we speedup the training process by up to 6.7x compared with basic approaches like random/grid search and up to 2.1x compared with state-of-the-art approaches while achieving similar model quality compared with prior work.", "pdfUrls": [ "http://cs.brown.edu/~jeffra/papers/middleware17-final141.pdf", "http://doi.acm.org/10.1145/3135974.3135994" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/888985e15d2bd58baac82097b2f64f1c1d03fff9", "sources": [ "DBLP" ], "title": "HyperDrive: exploring hyperparameters with POP scheduling", "venue": "Middleware", "year": 2017 }, "88f408ffdf6eb960662a1a0dd409a13476f6790f": { "authors": [ { "ids": [ "2811961" ], "name": "Baijun Wu" }, { "ids": [ "26971044" ], "name": "John Peter Campora" }, { "ids": [ "37473728" ], "name": "Sheng Chen" } ], "doi": "10.1145/3133930", "doiUrl": "https://doi.org/10.1145/3133930", "entities": [ "Compiler", "Debugger", "Display resolution", "Error message", "Functional programming", "Machine learning", "Programmer", "Type inference", "Type safety", "Typing", "Usability" ], "id": "88f408ffdf6eb960662a1a0dd409a13476f6790f", "inCitations": [ "44eb1eb1013a35940fc1ab424dd1aad411d205f0" ], "journalName": "PACMPL", "journalPages": "106:1-106:29", "journalVolume": "1", "outCitations": [ "76a75933e5b13a94ddbe22d60d08b7d8940f246f", "5ab5f7d873819f63a5eefe91a8afcc65b84e7007", "58ceeb151558c1f322b9f6273b47e90e9c04e6b1", "bfcf14ae04a9a326f9263dcdd30e475334a96d39", "8efcddf353187bab640df05c7168802321d20606", "dcab98a689c5751c8d8716dd10a1f72751267781", "176a8e6017bba9b5041888f35e72565e593b6d41", "8e9ea8d195b535e5edd3c23acf2dbe4ec875dd29", "62f3eaec30521360445110584271e36880563f69", "18fc2aa116bf0d6a54eb658932146857ebe229cb", "3860ba33f0896bc6de2c8c4ccbd4bc119b10bcf9", "9cb40bbd6183d6fa7aee981386c4df694ba474e1", "0e5d9ca8c876c6881109445d6d96010c388752db", "5ae82d62ceba02154696ed1f5a6bc84596e8c9e6", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "2987a79c48d5159051b102df2bead22bca5400e5", "2dd3fde015c45b0b4cf5b4fc3bc2b10be9d60a20", "48dd7dfade1813dc96af76eae5e8b79329074f35", "eb6b4c9ad52658825947ec4717e901f420f5fa9a", "1c2c08047cd8f7a56c707883077d6ed04b9e7bad", "2c57d368743217e65efe05d9e9413d022245ce87", "2d7551bb6127067d469a810d412c7b149b7d54cc", "61cd30bbfc5c630dfb8d07c2f1f3ba45daa494f1", "4bedc66519ffa38854483698aaa65d601584763e", "95ba3f28d01d721175a66dd3c0352ef9c6470a19", "8ac6be0e3ea62e9819d5a25da645f2d350474693", "2c18ec63b49ba81b4eb2b67ba30a9607b0662979", "7159b9525aa47e5d98f8e528bb40d5760de93bc5", "7b743215e4819b8e2100dde9ebf4cdde8b50347c", "0fba3766c7d613da8f35a2872f728c0c9e081092", "44eb1eb1013a35940fc1ab424dd1aad411d205f0", "01e949d4d5f0b869e906dd26c3564703bf0b7a97", "c85086843e6c90a6f7382334a965be546b0748fe", "e62009e4e87c38aa62907827babd10180fb45121", "7403fa3e56cee44e0f48185bb4a79d935eb9b01c" ], "paperAbstract": "Type inference is convenient by allowing programmers to elide type annotations, but this comes at the cost of often generating very confusing and opaque type error messages that are of little help to fix type errors. Though there have been many successful attempts at making type error messages better in the past thirty years, many classes of errors are still difficult to fix. In particular, current approaches still generate imprecise and uninformative error messages for type errors arising from errors in grouping constructs like parentheses and brackets. Worse, a recent study shows that these errors usually take more than 10 steps to fix and occur quite frequently (around 45% to 60% of all type errors) in programs written by students learning functional programming. We call this class of errors, nonstructural errors. \nWe solve this problem by developing Learnskell, a type error debugger that uses machine learning to help diagnose and deliver high quality error messages, for programs that contain nonstructural errors. While previous approaches usually report type errors on typing constraints or on the type level, Learnskell generates suggestions on the expression level. We have performed an evaluation on more than 1,500 type errors, and the result shows that Learnskell is quite precise. It can correctly capture 86% of all nonstructural errors and locate the error cause with a precision of 63%/87% with the first 1/3 messages, respectively. This is several times more than the precision of state-of-the-art compilers and debuggers. We have also studied the performance of Learnskell and found out that it scales to large programs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133930" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/88f408ffdf6eb960662a1a0dd409a13476f6790f", "sources": [ "DBLP" ], "title": "Learning user friendly type-error messages", "venue": "PACMPL", "year": 2017 }, "8935940fe5077f5862ddc5fc32ce7f396ce85a9a": { "authors": [ { "ids": [ "2052055" ], "name": "Sarat Sreepathi" }, { "ids": [ "1892474" ], "name": "Jitendra Kumar" }, { "ids": [ "2275691" ], "name": "Richard Tran Mills" }, { "ids": [ "7642805" ], "name": "Forrest M. Hoffman" }, { "ids": [ "33884461" ], "name": "Vamsi Sripathi" }, { "ids": [ "1791950" ], "name": "William W. Hargrove" } ], "doi": "10.1109/CLUSTER.2017.88", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.88", "entities": [ "Airborne Ranger", "Algorithm", "Big data", "CUDA", "Central processing unit", "Cluster analysis", "Computational problem", "Computer cooling", "Data mining", "Distributed memory", "Ecology", "GeForce 700 series", "Graphics processing unit", "K-means clustering", "Memory hierarchy", "Message Passing Interface", "Numerical analysis", "OpenACC", "Scalability", "Sensor", "Simulation", "Software deployment", "Supercomputer", "Titan", "Titan (supercomputer)", "Unmanned aerial vehicle", "Unsupervised learning" ], "id": "8935940fe5077f5862ddc5fc32ce7f396ce85a9a", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "267-277", "journalVolume": "", "outCitations": [ "b7ddb9b89874e517d0c6eee558a9f90bceb14ae7", "1ca0eb31f37606cdf77851b29153fbebedcefafc", "f67cb6025de5ee7991765c25a445304becd18221", "95bcb5593a8916440df60caf5730640d766046d6", "03efd91b6a7cab2746d0172eb57e987a5e18f3b7", "40fb14ce84e1f1f774de99cae59a06713f5f4155", "f2cb09c934265c04a517d8b1ef526378bd13814d", "273c7905de2108cd02300927b667248c8ca10035", "8c0b0e80514f70b4eef3a274315168c7a5a66335", "3e49142a9c4c027f824d57cd830ee408db526a8f", "0585f10992496cd80128e82503499042dcdea2ab", "408e9cb5000565e5ee1baae673d6e08fbbc55d48", "0638dc0565cb11191ab1e2b91cd19b630cfa8c34", "3359a6c6d574d81cbf361faf65c0ede7f1fa0c4b", "3c7466c1a76c6bf1d08ee3b9361b88c599257e51", "abd2344f82da918c1dddb23e0dbe60cdd0e89289", "e8e70b147c619c88ccac78479005a5cdf8b54079", "488d2a77874ff8b98f6464df1d93d68c211f1f73", "badb2fb3c8792d5b70aa27ae1ae231208ba4253f", "c62d0aede7ca7d4aa19099dc646f4cc7584a00ea", "0653e2ed9f683868cb4539eb8718551242834f6b" ], "paperAbstract": "A proliferation of data from vast networks of remote sensing platforms (satellites, unmanned aircraft systems (UAS), airborne etc.), observational facilities (meteorological, eddy covariance etc.), state-of-the-art sensors, and simulation models offer unprecedented opportunities for scientific discovery. Unsupervised classification is a widely applied data mining approach to derive insights from such data. However, classification of very large data sets is a complex computational problem that requires efficient numerical algorithms and implementations on high performance computing (HPC) platforms. Additionally, increasing power, space, cooling and efficiency requirements has led to the deployment of hybrid supercomputing platforms with complex architectures and memory hierarchies like the Titan system at Oak Ridge National Laboratory. The advent of such accelerated computing architectures offers new challenges and opportunities for big data analytics in general and specifically, large scale cluster analysis in our case. Although there is an existing body of work on parallel cluster analysis, those approaches do not fully meet the needs imposed by the nature and size of our large data sets. Moreover, they had scaling limitations and were mostly limited to traditional distributed memory computing platforms. We present a parallel Multivariate Spatio-Temporal Clustering (MSTC) technique based on k-means cluster analysis that can target hybrid supercomputers like Titan. We developed a hybrid MPI, CUDA and OpenACC implementation that can utilize both CPU and GPU resources on computational nodes. We describe performance results on Titan that demonstrate the scalability and efficacy of our approach in processing large ecological data sets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.88" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8935940fe5077f5862ddc5fc32ce7f396ce85a9a", "sources": [ "DBLP" ], "title": "Parallel Multivariate Spatio-Temporal Clustering of Large Ecological Datasets on Hybrid Supercomputers", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "894f2e901ba2c3e3df11c6473eaef60c838e572d": { "authors": [ { "ids": [ "2999829" ], "name": "Pieter Ghysels" }, { "ids": [ "1773637" ], "name": "Xiaoye S. Li" }, { "ids": [ "40474825" ], "name": "Christopher Gorman" }, { "ids": [ "1805296" ], "name": "Fran\u00e7ois-Henry Rouet" } ], "doi": "10.1109/IPDPS.2017.21", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.21", "entities": [ "64-bit computing", "Algorithm", "BSD", "Discretization", "FLOPS", "Message Passing Interface", "Multigrid method", "OpenMP", "PETSc", "Preconditioner", "Randomized algorithm", "Sampling (signal processing)", "Sparse matrix", "Unstructured grid" ], "id": "894f2e901ba2c3e3df11c6473eaef60c838e572d", "inCitations": [ "d9c99d37370e5e712cc6d04c1679cd91993445e0" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "897-906", "journalVolume": "", "outCitations": [ "82703fe8dd01150caef7657974624ca10f8f951b", "576029e642aa251538a65d92406dae755fe5ec88", "5c3bc41b39d9f0a452a9631d06f657ff2f6c8f36", "ee34b6e840a875fe1d91736c5b24d78abb56e739", "0f82a8d1aa4762e19059c735f7f840e7bd60ac48", "596201b3938685fe215eee88accf771bd9e07fd3", "6995179345701e6f690ba7703190c7bd964a27a0", "8c6e313b3418e42afe4a852116e18d0f24284f35", "a87ad38b648cf2759150715f32723ae8a092142b", "282f9082cd3a4565dbc5c4507c092ed18244c512", "177b78214dc553f41b84dea72d9d02589a85d921", "30133bcbbfe13bef0f5db0e05cabc436ac598558", "952896a6656080d1a0e021733bfaa237dd53f832", "566d4c9bf9c5687ab53337a2b217c8b5befb1d36", "d90c10fa741e73a146067e661130666275471519", "8b9ee021d0d76ae4b78ab22b73ac47c7eed52064", "d9b4d4ffa5bf9e9e376b0bdeba19d4090d653feb", "0354aff91dd843e01e396fbd635129bea73977b4", "da638625c1ce4a5bb3b2808b2dc28675ea7eb0f2", "66d6ee943e5713a1a04358c2389cf7443a27f75f", "284605c1ffc8aa65b8bb3bdbc3a53e69c069cde8", "cb13d6831bbf5a40c7caf6edf5fead628c076b4b", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "8d36cd7f57804453b0675d9dfef7e7ecd25627b8", "68af1e7cd763bee646f31b1df4f1c7f84785bc2c", "0a70273286d8e90bb0b101232c6ecd129de7df5e", "d9b2d29e6f969589d341fe346868bd86526ff865", "87ea76767d9c0a6ee3b68c2d2dafa01ce5db3d4f", "45a7a21ed645ba444b0173a059812920495fc522", "a3b4133fb1a65f35b9b7950da9786d23fe5723b4", "61aaffc396c17521e13d1fd137433d815519759c", "1b2eee4571a0427847adc6d983d961d26dd7dc70", "0fd82e4ff009aadd492d10c889370c73308a3ca2", "a3ed13e1318144425d550dfaf5b2cf5fbce16d9d" ], "paperAbstract": "We present the design and implementation of a parallel and fully algebraic preconditioner based on an approximate sparse factorization using low-rank matrix compression. The sparse factorization uses a multifrontal algorithm with fill-in occurring in dense frontal matrices. These frontal matrices are approximated as hierarchically semi-separable matrices, which are constructed using a randomized sampling technique. The resulting preconditioner has (close to) optimal complexity in terms of flops and memory usage for many discretized partial differential equations. We illustrate the robustness and performance of this new preconditioner for a number of unstructured grid problems. Initial results show that the rank-structured preconditioner could be a viable alternative to algebraic multigrid and incomplete LU, for instance. Our implementation uses MPI and OpenMP and supports real and complex arithmetic and 32 and 64 bit integers. We present a detailed performance analysis. The code is released as the STRUMPACK library with a BSD license, and a PETSc interface is available to allow for easy integration in existing applications.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/894f2e901ba2c3e3df11c6473eaef60c838e572d", "sources": [ "DBLP" ], "title": "A Robust Parallel Preconditioner for Indefinite Systems Using Hierarchical Matrices and Randomized Sampling", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "89c612db72dfa0c956034fcdae1afca41c2a26ce": { "authors": [ { "ids": [ "2856285" ], "name": "Xun Jian" }, { "ids": [ "1770635" ], "name": "Pavan Kumar Hanumolu" }, { "ids": [ "8153371" ], "name": "Rakesh Kumar" } ], "doi": "10.1109/HPCA.2017.60", "doiUrl": "https://doi.org/10.1109/HPCA.2017.60", "entities": [ "Big data", "Computer data storage", "Control system", "Data center", "Digital data", "Dynamic voltage scaling", "High memory", "Memory management", "Memory module", "Memory-mapped I/O", "Power management" ], "id": "89c612db72dfa0c956034fcdae1afca41c2a26ce", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "229-240", "journalVolume": "", "outCitations": [ "67f881ebc6df47f140dbf99308f1846851a9b434", "77296530878f5f3e94d59499ed5603fb724302d9", "2644b8562292e2492459db3ed214d3330ef7712d", "c140b5f0166fc0d2bd01fdfc9a866c2d9bfbf898", "43e2e66122932f6d67dad607a631a4e93f4fa0a1", "6069f23cf3e413a6ce60bec60acb60952d88cc95", "2d0988d60e21766f5357e57e30384909c6a7494f", "19a3b3ccf8c7c364b8245aa657a98cb976357f3b", "8b04ea524cb6ced72868c120a00c4679d84be006", "26e72340c47b7348e1b1de285f89dd96cc925b27", "32a0f72fb9fb6e3614f7312702f23f2d241c1101", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "de7d933ed07ba0fe9275efbd6251f19f0883ce0f", "f088374812301ed93fbfe8c5b72bf1351c084c01", "3cf1a5732c0e0cb82a2da9c9c1c6f2e59ae8f994", "1ae3f4cdaaf12ddc6f7bf1a24588af58c54e7930", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "0e2efda23894526e869e57cb81c76de22f6a8d20" ], "paperAbstract": "As the amount of digital data the world generates explodes, data centers and HPC systems that process this big data will require high bandwidth and high capacity main memory. Unfortunately, conventional memory technologies either provide high memory capacity (e.g., DDRx memory) or high bandwidth (GDDRx memory), but not both. Memory networks, which provide both high bandwidth and high capacity memory by connecting memory modules together via a network of point-to-point links, are promising future memory candidates for data centers and HPCs. In this paper, we perform the first exploration to understand the power characteristics of memory networks. We find idle I/O links to be the biggest power contributor in memory networks. Subsequently, we study idle I/O power in more detail. We evaluate well-known circuit-level I/O power control mechanisms such as rapid on off, variable link width, and DVFS. We also adapt prior works on memory power management to memory networks. The adapted schemes together reduce I/O power by 32% and 21%, on average, for big and small networks, respectively. We also explore novel power management schemes specifically targeting memory networks, which yield another 29% and 17% average I/O power reduction for big and small networks, respectively.", "pdfUrls": [ "http://rakeshk.crhc.illinois.edu/hpca_17_cam1.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/89c612db72dfa0c956034fcdae1afca41c2a26ce", "sources": [ "DBLP" ], "title": "Understanding and Optimizing Power Consumption in Memory Networks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "89d2bc625848e028935efb56824f7188880d183d": { "authors": [ { "ids": [ "2093805" ], "name": "Mehdi Azarmi" }, { "ids": [ "1712275" ], "name": "Bharat K. Bhargava" } ], "doi": "10.1109/CLOUD.2017.17", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.17", "entities": [ "Computer security", "Correctness (computer science)", "End-to-end encryption", "End-to-end principle", "Interaction", "Run time (program lifecycle phase)", "SOA Security", "Service composability principle", "Service-oriented architecture", "Taint checking", "Telephone number", "Trusted third party" ], "id": "89d2bc625848e028935efb56824f7188880d183d", "inCitations": [ "6c7db5b65a4101fd5e45c7b9f87fc8c0019fbd03" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "58-65", "journalVolume": "", "outCitations": [ "ba9aea6a06ffee76dc8b974c751cd2102c747a91", "7787750a43880a065bd25e8cddbf653f6363aa68", "2ff8f4b3a7f18ab258bbdc1f0968c6e9dfd72a2b", "382591224df8b8b2eb39712f282860424575754e", "0b1fe1af18e2fda99eedf9e5cf3ac18826e585c1", "6c7db5b65a4101fd5e45c7b9f87fc8c0019fbd03", "1a4edc1a25674e0e8032c34b8d52c84764d12811", "5f2b22b77559ddb4f3734459d1ff66c58d22df12", "18f2d484c7722f4fcd21e1e2a3ae6ea5641dd104", "1327a7420af5804ecf94c5666dfbac04cc2f89f3", "5f0aee4c2a0a3ddfadb86619ec544382074b00aa", "282f88f3e89813bba06aa0b23987955b987e9af3" ], "paperAbstract": "A service-oriented architecture (SOA)-based application is composed of a number of distributed and loosely-coupled services which are interconnected to accomplish a more complex functionality. The main security challenge in SOA is that we cannot trust the participating services in a service composition to behave as expected all the time. Moreover, the chain of all services involved in an end-to-end invocation may not be visible to the clients. As a result, any violation of the client's policies could remain undetected. To address these challenges in SOA, we propose the following contributions. First, we propose a new end-to-end security architecture for SOA based on a dynamic composite trust model. To maintain the dynamic trust, we designed a trusted-third party service called trust manager component, which collects and processes feedbacks from the actual execution of services. Second, we developed an end-to-end inter-service policy monitoring and enforcement framework (PME framework), which is able to dynamically intercept the interactions between services at runtime and react to the potentially malicious activities according to the client's policies. Third, we design an intra-service policy monitoring and enforcement framework based on taint analysis mechanism to monitor the flow of information within services and detect and prevent information disclosure attacks. These two frameworks together can provide an end-to-end visibility and security in SOA. Finally, we have extensively studied the correctness and performance of the proposed security frameworks based on a realistic SOA case study in a cloud environment. All experimental studies validate that the practicality and effectiveness of the presented solutions.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/89d2bc625848e028935efb56824f7188880d183d", "sources": [ "DBLP" ], "title": "End-to-End Policy Monitoring and Enforcement for Service-Oriented Architecture", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "89d7dcee8bc1786cd2a83e404ebc2a3243de1a4c": { "authors": [ { "ids": [ "17799643" ], "name": "Ruohuang Zheng" }, { "ids": [ "14252027" ], "name": "Michael C. Huang" } ], "doi": "10.1145/3079856.3080213", "doiUrl": "https://doi.org/10.1145/3079856.3080213", "entities": [ "Dependability", "Error detection and correction", "Memory protection", "Overhead (computing)", "Receiver autonomous integrity monitoring", "Redundant array of independent memory", "Very-large-scale integration" ], "id": "89d7dcee8bc1786cd2a83e404ebc2a3243de1a4c", "inCitations": [], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "214-227", "journalVolume": "", "outCitations": [ "fa8e2a8d32263c33359fa9f83a176b68c2461b4f", "373dd8489687b53581ce1bc8c91bc1f97b71f929", "4dd69c412369b729aeb1e9aee37d3f41c5a20e14", "fdd03390fe821a1595bb5af057e1e3fcf94a08f9", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "3abf71e837cb7b1e9fe7e54192d986142d87b1a2", "656b047c4d7a830c876530bc8f8fe38c9d3e8a2f", "828c504705841dee0031e52bf9acb016fcec45de", "89d9709aea044f9ca12fd75993dc8ea3fc124db3", "8cb4b89e49faf8754485851f9e4572016afd51b0", "224c89d4276027d7548e793136272cb213cd3c3e", "128fb51d28256aac3b20a8f6de85598767876fbe", "4eab21a23b77cf823fc2257e2d2771f8ad8374af", "cc317639665a6ad1df22ca2ef0abfcaff3052d0b", "128466dbf1ac66c0a368fc5eab87131ff1c0c80c", "5c5dd0ae1d8035eadbf2fd411663dd062a922941", "5457a891ed0bb1015b110098d621bdeebcc8e21e", "3b5657c7ed08519bba5903af01ba6d42dfcc8848", "03d55467b20e662fbaa8416e853f57c93834a9fb", "406fe23ae820e770b2c27890fa03379f5be45630", "ff0e22e5691547a088d28a09a62dd742a2d4ba3d", "012d556d67acedc6898930b4c93f54b87aabf5ee", "e8a566fe85f7187f14f0b345847207509a4c274c", "0e77b8d3d0a7c6c4b5e09b2a90eb4c1e4279bdce", "894879db716a843559bb5a6c568ac450b8586df5", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "108c840d5d1847948a2de0250490a327ae069ee6", "33da3dcba06cf453f74203e3fb2adaa8c1133f3b", "318c86751f018b5d7415dafc58e20c0ce06c68b6", "80977b5b42a99aae81e4b8415b9c35d26fb1d35a", "31751d2c4916bde3d05029106a09cb25cf17f6f6", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "fd03ce30e57e95965c4468945b8377e09bf7890d", "4f9f2734116dc4e334409238aba96a7f8d0f650d", "239e046347d5075b3eeef5439050e9f2ca760b7b", "35b92289fe9c19e5baf462ffe91bdd2d25768b00", "43de2dd6c008e27c9d9f425b59f50fb634422ff6", "288f5d268d6d3c5b546f972847014db6a612575d", "288e29f06895be73f25253a91f12184aa38800d6", "11377c548fa4eac91002657be699866acb034aac", "747ad718761b7d848a12e4f3a82aa0f46117a815", "ae705ea9428baedc1a2de4539a75f6aed444c096", "0eacd1b47786f740b723d906d46e160f143c0378", "15e63d368aa803c73b8f5d1315a51ebd7ceea3c3", "5236160832766c58b1be2bf4f76f33d9d25b4600", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "337fa81caa00ddd73ec504cffd9510f66a458b9c" ], "paperAbstract": "Memory hardware errors may result from transient particle-induced faults as well as device defects due to aging. These errors are an important threat to computer system reliability as VLSI technologies continue to scale. Managing memory hardware errors is a critical component in developing an overall system dependability strategy. Memory error detection and correction are supported in a range of available hardware mechanisms. However, memory protections (particularly the more advanced ones) come at substantial costs in performance and energy usage. Moreover, the protection mechanisms are often a fixed, system-wide choice and can not easily adapt to different protection demand of different applications or memory regions.\n In this paper, we present a new RAIM (redundant array of independent memory) design that compared to the state-of-the-art implementation can easily provide high protection capability and the ability to selectively protect a subset of the memory. A straightforward implementation of the design can incur a substantial memory traffic overhead. We propose a few practical optimizations to mitigate this overhead. With these optimizations the proposed RAIM design offers significant advantages over existing RAIM design at lower or comparable costs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080213", "http://www.ece.rochester.edu/~mihuang/PAPERS/isca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/89d7dcee8bc1786cd2a83e404ebc2a3243de1a4c", "sources": [ "DBLP" ], "title": "Redundant memory array architecture for efficient selective protection", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "89e34c8645469977a2b3eac99400a1134189740e": { "authors": [ { "ids": [ "1714005" ], "name": "Trishank Karthik Kuppusamy" }, { "ids": [ "38090423" ], "name": "Vladimir Diaz" }, { "ids": [ "2030168" ], "name": "Justin Cappos" } ], "doi": "", "doiUrl": "", "entities": [ "Bandwidth (signal processing)", "Delta encoding", "Docker", "Ethernet hub", "Exploit (computer security)", "Extensible Authentication Protocol", "Haskell", "Mercury", "OCaml", "Python", "Python Package Index", "RubyGems", "Software repository", "Time-utility function" ], "id": "89e34c8645469977a2b3eac99400a1134189740e", "inCitations": [ "122ab8b1ac332bceacf556bc50268b9d80552bb3" ], "journalName": "", "journalPages": "673-688", "journalVolume": "", "outCitations": [ "473fa1c5c66d4a51adbb64c263687d730fc6d217", "10425a28c7f053d0939e8c74ad81f59d7008138c", "9436b2e2c095edab3bdd12d49abf177ca62342d9", "bb52ff840b1b6e2144268e57c72118a49460d6f4", "282ce026f019c4635fce1d308029fa4d0ddc05cf", "0706b93482fa8aaa4ce534f7cf6971b51c10cd76", "1627ce7f1429366829df3d49e28b8ecd7f7597b5", "8502fd5a659150e0635973744c4a80138c4e7ca7", "0436a45833b62ffbec8d3104fb221675967e1a7c", "0609c475dd31632b705e9fb9a603060a3ff2c46a", "9401e4d6a572a6d56d9c41d078b11fde97f6e059", "ba6924250d679f75dbc80fdf94dc22a60857b00b", "024735cc8a42fe4d9fa7c4c1c097b1f7a71286a3", "28022c2d8b0b2e7d70c138638472c525058c33be", "017aba316f6d8447a4e045d8ddd417456629031e", "3e0080a34eca4eabb9b371c2b3c369dc4dc90112", "0ba16e5cd9c81282386362c8db0adcd4a203741f", "45b5429e1f70fe6924b6651885e47fe79bd33f9d", "06b7f3156ef8f0d66fe05e504c0bb908ab288c03", "c01ebf8b9dd5d4ed1d0db22e9a2ba954a46fbf35", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "f3f4e8b03e337a7fb41bcd66f04a0d0bbc8fe02f", "20f5f8733134d87041b95b742d613051a1fb3fdb", "0b074432c900b2c36c4a9143d34c42da43e108b1", "342dd09a25f2835fa5f95ff8e27bab95bf9ca83c", "4b44f86d55352155b469356de8050e64c3be500d", "1c15eab4374afd112496389d6fcd495d517f5788", "1fedc07f1d12347d38274353a84678e98c006cf0", "9e3c28e2857af8dd27242a48991c921efd4c32f0", "0bafcffb9274aafe39830da451e6f44f38f434a4", "7b86134253e48d97acb3a4a7af928587d1825aca", "31e07d9add20cc678ddc3ae5d79299bc4653bd38", "48b029ced7f12780d57c2577def98d65381ff551", "36beed570d47f5ae4302f670c4a9096bc0027afc", "07c5da91f5a60765d563d01365bc3b15fa2f8e00", "46dc49d1db07399ae8f60f635b5d8bdf52446f65", "08952d434a9b6f1dc9281f2693b2dd855edcda6b", "60831aa582eef4df8026403864044a2fa2b14ac8", "9872e06b366e3520c61d2346a20066892e567e80", "9c592eb42573518f54e162708f33c5bec735b132", "877bd179d1e31e74145e3eb4232e25d63a5c7233", "1e214e456dc1874ae58d835b5e5f327f12498f21", "d12d1289d2384c2ce642f01855637b9f0519e189" ], "paperAbstract": "A popular community repository such as Docker Hub, PyPI, or RubyGems distributes tens of thousands of software projects to millions of users. The large number of projects and users make these repositories attractive targets for exploitation. After a repository compromise, a malicious party can launch a number of attacks on unsuspecting users, including rollback attacks that revert projects to obsolete and vulnerable versions. Unfortunately, due to the rapid rate at which packages are updated, existing techniques that protect against rollback attacks would cause each user to download 2\u20133 times the size of an average package in metadata each month, making them impractical to deploy. In this work, we develop a system called Mercury that uses a novel technique to compactly disseminate version information while still protecting against rollback attacks. Due to a different technique for dealing with key revocation, users are protected from rollback attacks, even if the software repository is compromised. This technique is bandwidth-efficient, especially when delta compression is used to transmit only the differences between previous and current lists of version information. An analysis we performed for the Python community shows that once Mercury is deployed on PyPI, each user will only download metadata each month that is about 3.5% the size of an average package. Our work has been incorporated into the latest versions of TUF, which is being integrated by Haskell, OCaml, RubyGems, Python, and CoreOS, and is being used in production by LEAP, Flynn, and Docker.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/kuppusamy", "https://www.usenix.org/system/files/conference/atc17/atc17-kuppusamy.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/89e3/4c8645469977a2b3eac99400a1134189740e.pdf", "s2Url": "https://semanticscholar.org/paper/89e34c8645469977a2b3eac99400a1134189740e", "sources": [ "DBLP" ], "title": "Mercury: Bandwidth-Effective Prevention of Rollback Attacks Against Community Repositories", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "8a1a0335500abde19a441d33214555dd1d4b7602": { "authors": [ { "ids": [ "3103778" ], "name": "Kenneth S. B\u00f8gh" }, { "ids": [ "1738624" ], "name": "Sean Chester" }, { "ids": [ "3132288" ], "name": "Darius Sidlauskas" }, { "ids": [ "1713664" ], "name": "Ira Assent" } ], "doi": "10.1145/3035918.3035962", "doiUrl": "https://doi.org/10.1145/3035918.3035962", "entities": [ "Algorithm", "Central processing unit", "Data parallelism", "Database", "Experiment", "Graphics processing unit", "Multi-core processor", "Parallel computing", "Pareto efficiency", "Programming paradigm", "Threaded code" ], "id": "8a1a0335500abde19a441d33214555dd1d4b7602", "inCitations": [], "journalName": "", "journalPages": "447-462", "journalVolume": "", "outCitations": [ "4439b5ce4506099452a2b2c30939bfa9f6020eb9", "4a7960e5f9e16aefbed71ae5d437cd0ccfcfc48a", "05a99a31ed4a5a25e21859fca562b7c0f70ab0dc", "34fa41ccb6e548612886623916d502fce17fd3a8", "335606158cd0e80d353c0820aa7f3519c0edef87", "43d794b0c7e8fbed5d65a71cd2e06816233ee6e4", "339358fda0285c6760edce559105b97fdabae02d", "3ece2061c7c94e406c2ca28845f76e050ba81877", "6130cd6b45f26d58ce9255b2dce9d82095ab1d36", "93cccf4711ae0cfbf1bbdf048c089eacb333f742", "410e117f9fb9c89f5edc9c8c2490bb8fada4f148", "041969f14a8deb754f5162194d917bdcaf782319", "99593995f1772301413463a64a165dcae583ca7f", "6d59fd2b815de3c5836b362cd177cda0b115ac71", "5adece4919d359441c506260dc22ea6e7489e9fd", "4701832beac42eb0b6262157966314c9e02d9dc0", "6e21a27e457c25fa6158c5c78757c5b7bc0017a6", "6c15df6e5195b05083fc05d80d74a7f595a5bb93", "534b6dee5e86587676047d7ff921cea970b7d95b", "88333eaec5057661e1306c875248e9c8cab07c2b", "2a4cf7df272cb2b7d684c1e630594b5368d67b7d", "65848c1e1e7f04350b30176f504826f4ee04dd39", "85d542ce705eb3fcb923ca93d781d93c4a6ee061", "893167546c870eac602d81874c6473fd3cd8bd21", "97669a145aa4011e1cabd1a2ca269256774c240e", "4279521956426d1fb4fb71270e0ed95e292163c7", "9fb8f081e203cc0114ea93a1b19123aa164438b9", "6247338483718ef2876cbb21e483ee4bec66c51d", "772ddb61b7c96033c4ccae4303d35d74ccf15470", "a6725be97c70a3f9c7719f23d4d8233e4771cdf1", "be87b87400d68221be036d8c7b36b1dc09d2775b", "9d62524ce36b4ecdefe847ef5c95c040569991f6", "6c27f5ba435733960ed6624f9a2b7a7785c8cfef", "a40cb295b6c1633c549d5846cd9fe59c381e6920", "1be11ffbcc103c070e9de3a28eeca2de5f8f8369", "5dbe84872b42cb6167d3b601fdf68b4eb2d7f5d9", "a1f2fb587abcab6cca51abc3c9fd28bd8b06737c", "00689648ecb99287550c99479d221088327f8692", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "8e2fe6add0e4f6643409202da05e69cb4bfd20f2" ], "paperAbstract": "Multicore CPUs and cheap co-processors such as GPUs create opportunities for vastly accelerating database queries. However, given the differences in their threading models, expected granularities of parallelism, and memory subsystems, effectively utilising all cores with all co-processors for an intensive query is very difficult. This paper introduces a novel templating methodology to create portable, yet architecture-aware, algorithms. We apply this methodology on the very compute-intensive task of calculating the *skycube*, a materialisation of exponentially many skyline query results, which finds applications in data exploration and multi-criteria decision making. We define three parallel templates, two that leverage insights from previous skycube research and a third that exploits a novel point-based paradigm to expose more data parallelism. An experimental study shows that, relative to the state-of-the-art that does not parallelise well due to its memory and cache requirements, our algorithms provide an order of magnitude improvement on either architecture and proportionately improve as more GPUs are added.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035962", "https://sean-chester.github.io/assets/preprints/sigmod_boegh_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8a1a0335500abde19a441d33214555dd1d4b7602", "sources": [ "DBLP" ], "title": "Template Skycube Algorithms for Heterogeneous Parallelism on Multicore and GPU Architectures", "venue": "SIGMOD Conference", "year": 2017 }, "8a2a2a3616b03743a2435f4e41eb607272ff5be4": { "authors": [ { "ids": [ "1715491" ], "name": "Long Gong" }, { "ids": [ "1790319" ], "name": "Paul Tune" }, { "ids": [ "2374501" ], "name": "Liang Liu" }, { "ids": [ "2812772" ], "name": "Sen Yang" }, { "ids": [ "28686573" ], "name": "Jun Xu" } ], "doi": "10.1145/3084440", "doiUrl": "https://doi.org/10.1145/3084440", "entities": [ "Algorithm", "Computational complexity theory", "Crossbar switch", "Data center", "Electronic switching system", "Input/output", "Network switch", "Packet switching", "Simulation", "Throughput" ], "id": "8a2a2a3616b03743a2435f4e41eb607272ff5be4", "inCitations": [ "0d938a0ea855bca4b5ccbefc5bbc9d649ba4a6bd", "bf90034d3ad0339e02b0ac2b905ad4cbecb7be4c" ], "journalName": "POMACS", "journalPages": "3:1-3:33", "journalVolume": "1", "outCitations": [ "2514adbeb41458a80e1583f3c6ef17891f769874", "3cbece44a5a2dda117e835885f13beb02c9c5a9f", "b7d8d2891d83f023bcd1d30f75540c8b89f24575", "711cb0d4522bd9503e494a4c3137a26dffb2a54a", "6a54ffae79385e085eb5ba22ba7ef9094374ad5d", "367512018c30ee87a2cb4ce71bb73d1d05a6f42d", "0b22e134cfb5709888bef5dffc1a8d37f3cbfa35", "35882c21ea18bb2d6833559a78f699ba73c259ab", "150fdaf81a42057476ab35aa30db73a028abdf4a", "b06207038b178fcbc10785567d8f56f7edf75b9f", "02cd35fe566b9cd6e0b7af1ee8f88135b36c9eba", "9ab5c60dd2f4297249fa4d934f019fca0c88580f", "455bb755e2dfade1901bf106afe85003e3cf694f", "60065ebe7674e35899c354702f3eb4d8dee413fa", "39a4f2a84089545f24e009d18152b79b63de5819", "4ff890e3aedf2fb5115ef0176b39b172dc42a0e3", "71865ea50aef7c61dfdc8c61900d9dd24cfaafe2", "d323e57593ea9aaa7a7f9dd211fae301e7e64350", "389227f2fc1aedf402d541eadf929ea4bb34fb24", "3e52c2161ff344efe625ea8f961ec679e72ab906", "2c0a24f7731c1b8f74049e17fde64f51d29defb1", "00e65a04c790bde33d03cede767c270dc4f39889", "716a235c0095dccd5ab72f6edb65c80ac131f70f", "09f84b582832e873f899d1640a10892e060ec304", "19b304df6f13798a0745eeaf8f4573b202a43e5f", "0a46107dfedab33d3ec919385df2a9a7dd781463", "bce3bcc08c069f4cfc784b2be133d8a4fb23ccaa", "795fa4775f5249d1a3fce0998b18c58a0a3801e0", "552e9a91551e1e7ff284c21c46a4b7d5dea69bfb" ], "paperAbstract": "Most present day switching systems, in Internet routers and data-center switches, employ a single input-queued crossbar to interconnect input ports with output ports. Such switches need to compute a matching, between input and output ports, for each switching cycle (time slot). The main challenge in designing such matching algorithms is to deal with the unfortunate tradeoff between the quality of the computed matching and the computational complexity of the algorithm. In this paper, we propose a general approach that can significantly boost the performance of both SERENA and iSLIP, yet incurs only O(1) additional computational complexity at each input/output port. Our approach is a novel proposing strategy, called Queue-Proportional Sampling (QPS), that generates an excellent starter matching. We show, through rigorous simulations, that when starting with this starter matching, iSLIP and SERENA can output much better final matching decisions, as measured by the resulting throughput and delay performance, than they otherwise can.", "pdfUrls": [ "http://doi.acm.org/10.1145/3084440", "http://doi.acm.org/10.1145/3078505.3078509" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8a2a2a3616b03743a2435f4e41eb607272ff5be4", "sources": [ "DBLP" ], "title": "Queue-Proportional Sampling: A Better Approach to Crossbar Scheduling for Input-Queued Switches", "venue": "SIGMETRICS", "year": 2017 }, "8a32009be8ae5c3f61de634256371fe07eab03bd": { "authors": [ { "ids": [ "35011666" ], "name": "Stevens Le Blond" }, { "ids": [ "20630818" ], "name": "C\u00e9dric Gilbert" }, { "ids": [ "10515643" ], "name": "Utkarsh Upadhyay" }, { "ids": [ "1719959" ], "name": "Manuel Gomez-Rodriguez" }, { "ids": [ "2450059" ], "name": "David R. Choffnes" } ], "doi": "", "doiUrl": "", "entities": [ "Antivirus software", "Ecosystem", "Machine learning", "Malware", "Social engineering (security)", "VirusTotal" ], "id": "8a32009be8ae5c3f61de634256371fe07eab03bd", "inCitations": [ "79bcad91ac8c3ace49e126a94ae4cd3327ab18e4", "6a3a3c263e0a985a3cd127799ffe101bfd7da943" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "34c2b718869354a588ee602f41be77e553346c5d", "12424c2a9655284d1db8f40b93f14e34504ff6a3", "1f0fbdf41eee36e2459edb3ba7f77e191725c783", "b0d2e7a274bf7f778f0cd22486ead79d58c859ac", "35060a869ea38caf58426ea0c6aa65e59021f12a", "023555bdd427c20fb77a795c3a15e77ca885142d", "4b973014da15720c65d0f74430e9980fae74b847", "3befa34619a695fd34f72f21683f1131c2c379b2", "f457a06163cbdc1be98d3b314c576eba6ed8c9cb", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "9ed18f6b2c55138b2d8f6d62bde136621fe84829", "70d687ca03cb47731bf4424c36adc09b11847e68", "261360b612878b3e249b5357cef159385024e637", "04e5b276da90c8181d6ad8397f763a181baae949", "7e61bd6abdcb68ed9b3871311cabe09753de88ff", "2cfbb7b89a5e220b21bbf64161dc880c1b644017", "2ec814320c8ee12cd002d9b5f40facbeffd05ecb", "7aa450d7e2b43175590a1ee2c94f5342152cfc56" ], "paperAbstract": "Our understanding of exploit documents as a vector to deliver targeted malware is limited to a handful of studies done in collaboration with the Tibetans, Uyghurs, and political dissidents in the Middle East. In this measurement study, we present a complementary methodology relying only on publicly available data to capture and analyze targeted attacks with both greater scale and depth. In particular, we detect exploit documents uploaded over one year to a large anti-virus aggregator (VirusTotal) and then mine the social engineering information they embed to infer their likely targets and contextual information of the attacks. We identify attacks against two ethnic groups (Tibet and Uyghur) as well as 12 countries spanning America, Asia, and Europe. We then analyze the exploit documents dynamically in sandboxes to correlate and compare the exploited vulnerabilities and malware families targeting different groups. Finally, we use machine learning to infer the role of the uploaders of these documents to VirusTotal (i.e., attacker, targeted victim, or third-party), which enables their classification based only on their metadata, without any dynamic analysis. We make our datasets available to the academic community.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/broad-view-ecosystem-socially-engineered-exploit-documents/", "https://people.mpi-sws.org/~stevens/pubs/ndss17.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_03B-4_LeBlond_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a764/44b96f4fbc8dee28bade399e37b83ea4aee7.pdf", "s2Url": "https://semanticscholar.org/paper/8a32009be8ae5c3f61de634256371fe07eab03bd", "sources": [ "DBLP" ], "title": "A Broad View of the Ecosystem of Socially Engineered Exploit Documents", "venue": "NDSS", "year": 2017 }, "8a3c11d6caf121136916ae7287063a256796a697": { "authors": [ { "ids": [ "3225184" ], "name": "Ajay Panyala" }, { "ids": [ "1773557" ], "name": "Omer Subasi" }, { "ids": [ "3285377" ], "name": "Mahantesh Halappanavar" }, { "ids": [ "1803382" ], "name": "Anantharaman Kalyanaraman" }, { "ids": [ "2349294" ], "name": "Daniel G. Chavarr\u00eda-Miranda" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" } ], "doi": "10.1109/HiPC.2017.00013", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00013", "entities": [ "Algorithm", "Approximate computing", "Approximation algorithm", "Emergence", "Graph coloring", "Heuristic", "List of algorithms", "Loop perforation", "PageRank", "Scalability" ], "id": "8a3c11d6caf121136916ae7287063a256796a697", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "23-32", "journalVolume": "", "outCitations": [ "6ba949c2b63975a05572c757c0379a7a6547dd4e", "4d6f48d2645f3bd986b6c0f1fc2968c18642d3bc", "6dab20f55bf33ed2aec142c59080243e136101e7", "8785c3fa5e12c3951219e02cbc8f6c42a194af12", "21a89861b346b6a0bd5bf9b22b5785b2df22b9e6", "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "1cf83f719f781a04cc0aba64483ce78de9f6b0bb", "7ed0ddad026bcd08f862b74acfa81e3490305989", "c567523c5a98be4fbfb464ef2d21bfdfa9a5425f", "eb82d3035849cd23578096462ba419b53198a556", "1d8051898e35f78dca00ff1b37106b028dd0ac4a", "e741b677759b94aaa5f3162e3a3b01d396a43aaa", "11c6394327a3130e6aba130c86ffa0942a759d89", "88795e75f1504e81a32ab8daf236495e76f01f6f", "1922676eadd756060dcb6c70bfca18bc789b8a48", "66022e895798f1689f709bf35cba737476d3defc", "006cadd0e54581d34cfde4651ff957572eb395a3", "654e303e59b75876d53b5184e3096805791f7c77", "3a34d9a271d2060d5d75b8198f88adfa67c990b9", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "453d73995c98d6677a77bf547fe569ab7b1b02a8", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "93b4c12793cf71fda00c3b9e0e3e737d4623a6d3", "7e36674b63ab1c05579b26af6f30c6b0aa17e057" ], "paperAbstract": "Approximate computing enables processing of large-scale graphs by trading off quality for performance. Approximate computing techniques have become critical not only due to the emergence of parallel architectures but also due to the availability of large scale datasets enabling data-driven discovery. Using two prototypical graph algorithms, PageRank and community detection, we present several approximate computing heuristics to scale the performance with minimal loss of accuracy. We present several heuristics including loop perforation, data caching, incomplete graph coloring and synchronization, and evaluate their efficiency. We demonstrate performance improvements of up to 83% for PageRank and up to 450x for community detection, with low impact on accuracy for both the algorithms. We expect the proposed approximate techniques will enable scalable graph analytics on data of importance to several applications in science and their subsequent adoption to scale similar graph algorithms.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00013" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8a3c11d6caf121136916ae7287063a256796a697", "sources": [ "DBLP" ], "title": "Approximate Computing Techniques for Iterative Graph Algorithms", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "8b12a237f1845086337926742810259b8e87fed0": { "authors": [ { "ids": [ "1713648" ], "name": "Torsten Hoefler" }, { "ids": [ "2780481" ], "name": "Amnon Barak" }, { "ids": [ "40399772" ], "name": "Amnon Shiloh" }, { "ids": [ "1682521" ], "name": "Zvi Drezner" } ], "doi": "10.1109/IPDPS.2017.36", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.36", "entities": [ "Algorithm", "Las Vegas algorithm", "Monte Carlo", "Parallel computing", "Parallel programming model", "Simulation" ], "id": "8b12a237f1845086337926742810259b8e87fed0", "inCitations": [ "d0556be65e8564ab8bb3e26b6a0146a62027bc40" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "357-366", "journalVolume": "", "outCitations": [ "08c3532ede4089a290e114dfa0e7d29ccac9c1bb", "a3dcc1d2d083f6ed8371e6619557cf7ffdc4b410", "5b3f43a02fe5bce776833d95d5a2b8afc904b375", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "3515804fcd5368b4eb8af3c66ba75c01f2c9e871", "550285725684e2d286ffd9fa5cebdc52d7c4f860", "569de2eececd3adb7219d63eb85e4bdc63486c42", "f27070fa68d10ac71d82c6f0184cb7c6fc111f79", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "6fbd9834cb888b84db1f25756cb6173b3622e4a0", "5ce262f8c816009dc859d476d1850fba0c516e12", "07b66a85083291d2b702a3bcc30f32854d4a6d29", "24f310878b013ce02e9c046fa1bac611d66868a8", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "10b44b914a35142eb7c1cff7a33e5527715561ee", "1822b56cea223cedf501fa10bd3795767ab80a9e", "1861776e08d4ce30ac63bd99b03501a80b98bf87", "b6571efa4483aa00d23bbcd36930c4877548ba38", "a524c99eab404a707fdffe28ff8a83f865cb3d61", "b7c2e68743af169ae14dc2f2a0534e0ea2666ba8" ], "paperAbstract": "Large-scale parallel programming environments and algorithms require efficient group-communication on computing systems with failing nodes. Existing reliable broadcast algorithms either cannot guarantee that all nodes are reached or are very expensive in terms of the number of messages and latency. This paper proposes Corrected-Gossip, a method that combines Monte Carlo style gossiping with a deterministic correction phase, to construct a Las Vegas style reliable broadcast that guarantees reaching all the nodes at low cost. We analyze the performance of this method both analytically and by simulations and show how it reduces the latency and network load compared to existing algorithms. Our method improves the latency by 20% and the network load by 53% compared to the fastest known algorithm on 4,096 nodes. We believe that the principle of corrected-gossip opens an avenue for many other reliable group communication operations.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.36", "http://htor.inf.ethz.ch/publications/img/hoefler-corrected-gossip.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8b12a237f1845086337926742810259b8e87fed0", "sources": [ "DBLP" ], "title": "Corrected Gossip Algorithms for Fast Reliable Broadcast on Unreliable Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8b1bb7e6fa59e87906c125831ea9b90d67260b74": { "authors": [ { "ids": [ "1993213" ], "name": "Valeria Bartsch" }, { "ids": [ "2045923" ], "name": "Rui Machado" }, { "ids": [ "40539501" ], "name": "Dirk Merten" }, { "ids": [ "37389026" ], "name": "Mirko Rahn" }, { "ids": [ "1918587" ], "name": "Franz-Josef Pfreundt" } ], "doi": "10.1007/978-3-319-64203-1_36", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_36", "entities": [ "Application checkpointing", "IEEE-488" ], "id": "8b1bb7e6fa59e87906c125831ea9b90d67260b74", "inCitations": [], "journalName": "", "journalPages": "497-508", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_36" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8b1bb7e6fa59e87906c125831ea9b90d67260b74", "sources": [ "DBLP" ], "title": "GASPI/GPI In-memory Checkpointing Library", "venue": "Euro-Par", "year": 2017 }, "8b338e925ae623adbc4cf387d3dd7ccf839d66e6": { "authors": [ { "ids": [ "2228713" ], "name": "Savvas Savvides" }, { "ids": [ "2489328" ], "name": "Julian James Stephen" }, { "ids": [ "3086775" ], "name": "Masoud Saeida Ardekani" }, { "ids": [ "1795767" ], "name": "Vinaitheerthan Sundaram" }, { "ids": [ "1697599" ], "name": "Patrick Th. Eugster" } ], "doi": "10.1145/3127479.3129256", "doiUrl": "https://doi.org/10.1145/3127479.3129256", "entities": [ "Algorithm", "Client-side", "Cloud computing", "Confidentiality", "Cost efficiency", "Encryption", "Homomorphic encryption", "IBM Tivoli Storage Productivity Center", "Plaintext", "Programmer" ], "id": "8b338e925ae623adbc4cf387d3dd7ccf839d66e6", "inCitations": [], "journalName": "", "journalPages": "479-492", "journalVolume": "", "outCitations": [ "225c357ee5490febc4fe9ca002fbf08b29adec46", "18e704e31d06f955f39955cd4c785c4731e5fbd7", "63271da9b91a3b3639a3d0c4f05cdfb48cd44756", "010ab443478bbfbecb03be9c250a49ae3b19b4d5", "228c9ef44ed51f4152a5655be54c8d679c54bb01", "1bf87acc82ea32cddd929514228b5e9de0233dc0", "0fda9bbccd6908637e2ead1cef69f091bfda75d4", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "0471807906db31b7f477caa31a255a651e1e26fd", "5f2b22b77559ddb4f3734459d1ff66c58d22df12", "1e3822536527f98c53b716c26fa05da5bf729f17", "48106c17d9af30b5eafb75f2fdcf50c6dc1502d7", "197107b7ec65a623b59987cf7243921908068751", "0558c94a094158ecd64f0d5014d3d9668054fb97", "496882385f4f8fc703a77de669528ac2f9a9d07c", "28a9dca6faeead651539c700bef413203b2b876e", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "85a3c518ae3f0d77a2a16e3a45761be2c8517b19", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "2004123b3b9698abe916116910b2c46a712a5585", "795112d3cc48f401c3787ca370a88d2ec0f1ad51", "7cc6a150b1ba5b40c8e5aca6e94c817d5bebbc5c", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "0c515587e546ea2bdf9ac77eaf0d8bc578954443", "1bd2d9fb62832737735d011154834b7c80c7e50a", "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "2420f3bd82b9b9a4fc99fa1e3b79b4cb6d6c3fef", "02beed2e1350a0d0b01bb9622081cb93a965a716", "a0835c336ccc0e2f6f7cde1ba9c214996a70f1f3", "7c8a4abc7624802783bde0688969fcdf373d01e7", "bfeec4b4af543b6e54a8a9d44ce23ddadb2fca74", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "4af77753e00973f339fd93a27e4131047018e79c", "22a5eeb8608b35e371b7544a54fabeadca8866e3", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "4beef78e9b21611a59237b63d512014e47f32d5e", "1b0968a9e392383a770c8b40255f970600108f66", "ee62379ffb236569f73538ff7621e64a7892907a", "0a289fd7b14345822b1acda6d82750b15d59663e", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "33736e956a5c4703fb5f215bd3ad686eeeedf2de", "198caed043e560f768c187d5f8f83f0af56f3e6b", "49e72b668dcde9fe57a8ed60e6890a5622733f19", "2a7d3b967a356c2a42f729048b0d3511b0005351", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "19aeb06b3ba5b454fc462254c178acdf233d955b", "19e6f3ea035d33590a8598be68ac82e6f00ce518" ], "paperAbstract": "Cloud computing offers a cost-efficient data analytics platform. However, due to the sensitive nature of data, many organizations are reluctant to analyze their data in public clouds. Both software-based and hardware-based solutions have been proposed to address the stalemate, yet all have substantial limitations. We observe that a main issue cutting across all solutions is that they attempt to support confidentiality in data queries in a way transparent to queries. We propose the novel abstraction of secure data types with corresponding annotations for programmers to conveniently denote constraints relevant to security. These abstractions are leveraged by novel compilation techniques in our system Cuttlefish to compute data analytics queries in public cloud infrastructures while keeping sensitive data confidential. Cuttlefish encrypts all sensitive data residing in the cloud and employs partially homomorphic encryption schemes to perform operations securely, resorting however to client-side completion, re-encryption, or secure hardware-based re-encryption based on Intel's SGX when available based on a novel planner engine. Our evaluation shows that our prototype can execute all queries in standard benchmarks such as TPC-H and TPC-DS with an average overhead of 2.34× and 1.69× respectively compared to a plaintext execution that reveals all data.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129256" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8b338e925ae623adbc4cf387d3dd7ccf839d66e6", "sources": [ "DBLP" ], "title": "Secure data types: a simple abstraction for confidentiality-preserving data analytics", "venue": "SoCC", "year": 2017 }, "8b3bf0ce97cf1e5c6ff99d76e9961d77ed0bf180": { "authors": [ { "ids": [ "2547065" ], "name": "Cl\u00e9mentine Maurice" }, { "ids": [ "5743763" ], "name": "Manuel Weber" }, { "ids": [ "40413271" ], "name": "Michael Schwarz" }, { "ids": [ "32399671" ], "name": "Lukas Giner" }, { "ids": [ "2015792" ], "name": "Daniel Gruss" }, { "ids": [ "2143859" ], "name": "Carlo Alberto Boano" }, { "ids": [ "1743786" ], "name": "Stefan Mangard" }, { "ids": [ "1790951" ], "name": "Kay R\u00f6mer" } ], "doi": "", "doiUrl": "", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Cache (computing)", "Cloud computing", "Covert channel", "Data rate units", "Error detection and correction", "Forward error correction", "High-throughput computing", "Interrupt", "Kilobit", "Microarchitecture", "Privilege (computing)", "Throughput", "Virtual machine" ], "id": "8b3bf0ce97cf1e5c6ff99d76e9961d77ed0bf180", "inCitations": [ "b181777c520457db8a0c555b970cdb349d22e559", "a59fa020903b80dbd1f7a5d927e7caada963f25a", "15426b6cb856252d967c7962c7dc3ef32b04ef98", "7ca0a5c249f1af0c2bd50d53bdfc2628e7bf5e39", "6eccccd95a962f391dd83410ae54cda311e742c8", "41c2c11acde144ccf62cb6eff30731195d22775b", "5394541bffc1715962c9e1e7f5bbeb85a5322d68", "cd2f62b6218613b7e3f13b70b1410db610ca3c6a", "548f7faddd750a642f95536a83ab5c2279c8bf33", "2e89f0285eff5efdac884f753f416f796867716d", "e766cb4ebdaaadb6e1d4c9022bedbc4100f91506", "c3407b18b527c1bce4188f9309b1e03e3e10ccc5", "06bacb1e3297c0f1be8c26f180fbf585cd1afa8b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "77a1532cb64eab28162a0277cde52b4b7eceda49", "8c581854139c628a8c16e36bf48dc5b65d3e26d0", "1aa4ae6a1575a8551a4265bec6e1912c401d0d75", "171c25d7c2b1fd87c32951cceb480f8525cd37dc", "705e4ef0714782574ed245107c1bc5df4d65bc1f", "2fc84ea4ffbee661ce90c5804101887abe8268a8", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "4f739534a366799e170599d3ff3d65597f0118db", "26f3cbecbc636984d57e52191c1d87c9377aff6f", "44121b31751a79ccc97107ea39751b813fc70b51", "1734463e8278c8d2412182a1a15267d3a3aa760b", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "bd79772a58dd4bf040ac9f9c1946614b6a51cc4c", "70fb3cea8335aefdf849597e9d9dd7512d722d88", "1513be7178d849dc4363b31c9ea19a2baea076e7", "094c85f01677702613b032cc2c307255a5d94ec8", "68bcbdfe611ad7429ac9fc5b4b4303c8af1b0c7d", "4e669d0b4a796843f1eca7341467af2476abf6da", "912dfc6b3224efb06019269652c06147bf7c6df8", "161d57d92fdec2d758ff12ab43e0fd52f8891fc6", "d921036a6cb7e340b019afa557a19bc65586a1ad", "97808b34532bf110ddcb3f5a3202c2fa718042a6", "371f651d1408d43a3dda1c5fe813734ed6938963", "85384a03972cfada708456ac7231876ce8fcb6ed", "52c2c050af5b32d4929b4b193967a3675d03aea0", "0204b810b62ced1b96d337d215572f5a56e05440", "8c05a04012696061f5e4015d19e68dcd91f95f12", "765c5d29bce0617e78b2ec3e918e31f6e543645e", "907e4972815c0fcd484d335a9c3fd4cccc9a081e", "656e782fe23364e36a49aeef2d8a74126a38ea04", "ed90710f170f136af15cdd058a59949af56c4457", "277c68718ebdf79be1d79cb957ac468fa4519277", "1018da2c264e5cd2501ac60f17a9b71bd28acd14", "7fdb842336e905df1043a2f347351a22b2e244eb", "793e7bb65f8fd58b957f7c6dad6032eb29cb7683" ], "paperAbstract": "Covert channels evade isolation mechanisms between multiple parties in the cloud. Especially cache covert channels allow the transmission of several hundred kilobits per second between unprivileged user programs in separate virtual machines. However, caches are small and shared and thus cache-based communication is susceptible to noise from any system activity and interrupts. The feasibility of a reliable cache covert channel under a severe noise scenario has not been demonstrated yet. Instead, previous work relies on either of the two contradicting assumptions: the assumption of direct applicability of error-correcting codes, or the assumption that noise effectively prevents covert channels. In this paper, we show that both assumptions are wrong. First, error-correcting codes cannot be applied directly, due to the noise characteristics. Second, even with extraordinarily high system activity, we demonstrate an error-free and highthroughput covert channel. We provide the first comprehensive characterization of noise on cache covert channels due to cache activity and interrupts. We build the first robust covert channel based on established techniques from wireless transmission protocols, adapted for our use in microarchitectural attacks. Our errorcorrecting and error-handling high-throughput covert channel can sustain transmission rates of more than 45 KBps on Amazon EC2, which is 3 orders of magnitude higher than previous covert channels demonstrated on Amazon EC2. Our robust and errorfree channel even allows us to build an SSH connection between two virtual machines, where all existing covert channels fail.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/hello-other-side-ssh-over-robust-cache-covert-channels-cloud/", "https://cmaurice.fr/pdf/ndss17_maurice.pdf", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_06A-1_Maurice_paper.pdf", "https://csaw.engineering.nyu.edu/application/files/5015/0825/9219/CSAW17_paper_120.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8b3b/f0ce97cf1e5c6ff99d76e9961d77ed0bf180.pdf", "s2Url": "https://semanticscholar.org/paper/8b3bf0ce97cf1e5c6ff99d76e9961d77ed0bf180", "sources": [ "DBLP" ], "title": "Hello from the Other Side: SSH over Robust Cache Covert Channels in the Cloud", "venue": "NDSS", "year": 2017 }, "8b71dd605de1ac1504c98f58425e582ebcfc6a34": { "authors": [ { "ids": [ "1933019" ], "name": "Yuanyuan Sun" }, { "ids": [ "40172713" ], "name": "Yu Hua" }, { "ids": [ "1804354" ], "name": "Song Jiang" }, { "ids": [ "2252906" ], "name": "Qiuyu Li" }, { "ids": [ "3296960" ], "name": "Shunde Cao" }, { "ids": [ "3020732" ], "name": "Pengfei Zuo" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Cloud storage", "Computer data storage", "Cuckoo hashing", "Failure rate", "Hash function", "Infinite loop", "Insertion sort", "Open addressing", "Pseudoforest", "Usability", "YCSB" ], "id": "8b71dd605de1ac1504c98f58425e582ebcfc6a34", "inCitations": [], "journalName": "", "journalPages": "553-565", "journalVolume": "", "outCitations": [ "c0b438eee7bd423606da9335229602b9c77c10d4", "583aa7cfcacdab3df24b9b3281d34763a22accef", "2c8dfa703ba8cb907384149820e117d5935d9ae0", "a54aa9d822854f265ab0f2264fdea87d75852dcc", "cf24180161075ecb51e20bafa089bc869ca807b1", "3437a7e23e3f97b58f4cf73e7e5b711131e6706c", "24f641c3987721be01f2c484198608b4c53f0208", "65a6947843e78e424b1103b972648476087e2c21", "0b6d88342563acaf5f7ac34bec19cfdef6c77eff", "7a7ef06b3007ad58fdfe294e7332d232c0d53ad8", "027485f716ca4f6d9ee2e189790d6560e37fcab2", "1d998e318cb673e883897a0cdbab61fd6dc1e611", "655b7a3fdf6139f0e35e540e486144e9bfd49ef5", "00e1cc1cdd5e928d2f446fe255fd6e98720b12fb", "6479c756e597c38e57aa45e2eae8550fd738418b", "18e9a7eea9c714c24152b9c6dd5cd12fb2c4b495", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "ab233f9e9e567b71a24eb5291168f3e3c1e749bf", "41fca6c199464c983cb6384ae65c83eb7522fb46", "0a368cdcb14046f6371f77f65d900d52c4da5486", "023fbe8879f4728506b339ffee1b49515c128670", "098d5792ffa43e9885f9fc644ffdd7b6a59b0922", "1594118f2696b573f08510cf837f3b37db87face", "2da760f90c3d2bf6598becdde9063093f488548c", "69265965579d23c6290732837a19a484fb1d28b1", "2bdf7087d706ec519c2d9ff9517d44c4dcde3768", "f96a59bb3e30829ed2c5c352852b5b7c38842128", "92a6961f076307d5b4778fd45d5f01f6d5d84e12", "0e882baf8b351e7d6edb064ff5d8077bc4300097", "01094798b20e96e1d029d6874577167f2214c7b6", "27f8ac77b89986f7a24f929b200b6a358b8f7d01", "9aa0d7253574e50fe3a190ccd924433f048997dd", "611945de1a6abb689b0f27aaf31724de539a645c", "1cb0679ae82be093268747da0f634281ea6a41df", "f3ca4e289497aa25fab3b7f3d6af4f8b63a30273", "202991f6f82d3931de13fc0019ef9e3c07bc11e5", "2ca4b5e877a52ef9681e112a2d5b1308dab3f237", "6d1ca1108d9d96e5607571502552ad04464d7f15", "7188dfabff76f19286382e198d2047740e7174a8", "9e6f82dd30532e3c4d98190577e8cdf75ca12432", "24d8d6225ea758f240dd75a658a1f2957fb18d20", "ec62b73a97016f09d5b9859d31ed991ae84e55ad", "3a2f37d3648592ffb42155c28f71894ad61937fe", "0faddfa1cc6c74d30b82ba32bcc4a2ee27fe31bf" ], "paperAbstract": "Fast query services are important to improve overall performance of large-scale storage systems when handling a large number of files. Open-addressing cuckoo hash schemes have been widely used to support query services due to the salient features of simplicity and ease of use. Conventional schemes are unfortunately inadequate to address the potential problem of having endless loops during item insertion, which degrades the query performance. To address the problem, we propose a costefficient cuckoo hashing scheme, named SmartCuckoo. The idea behind SmartCuckoo is to represent the hashing relationship as a directed pseudoforest and use it to track item placements for accurately predetermining the occurrence of endless loop. SmartCuckoo can efficiently predetermine insertion failures without paying a high cost of carrying out step-by-step probing. We have implemented SmartCuckoo in a large-scale cloud storage system. Extensive evaluations using three realworld traces and the YCSB benchmark demonstrate the efficiency and efficacy of SmartCuckoo. We have released the source code of SmartCuckoo for public use.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_sun.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/sun", "https://www.usenix.org/system/files/conference/atc17/atc17-sun.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8b71/dd605de1ac1504c98f58425e582ebcfc6a34.pdf", "s2Url": "https://semanticscholar.org/paper/8b71dd605de1ac1504c98f58425e582ebcfc6a34", "sources": [ "DBLP" ], "title": "SmartCuckoo: A Fast and Cost-Efficient Hashing Index Scheme for Cloud Storage Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "8b88a0e1ed527f1c600589a64e96e29e6e919bc9": { "authors": [ { "ids": [ "3333838" ], "name": "Guolei Yang" }, { "ids": [ "1990973" ], "name": "Neil Zhenqiang Gong" }, { "ids": [ "26382519" ], "name": "Ying Cai" } ], "doi": "", "doiUrl": "", "entities": [ "Recommender system" ], "id": "8b88a0e1ed527f1c600589a64e96e29e6e919bc9", "inCitations": [ "52cfb9ad5f9951bb71e915c6362bc034b3177ba6", "6bc565939f5ff4d96cbfe502dd5fa539098d309a", "71f375aa056ab0b63c4ee678ba2e51d54f745d35", "69092affc3461a38eb05cf7982f104eb30b0492c" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/fake-co-visitation-injection-attacks-recommender-systems/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8b88a0e1ed527f1c600589a64e96e29e6e919bc9", "sources": [ "DBLP" ], "title": "Fake Co-visitation Injection Attacks to Recommender Systems", "venue": "NDSS", "year": 2017 }, "8bb657d36766e21154863ae4bf25aece50fc4ff9": { "authors": [ { "ids": [ "40029726" ], "name": "Jiang Zhou" }, { "ids": [ "1787506" ], "name": "Wei Xie" }, { "ids": [ "1804361" ], "name": "Dong Dai" }, { "ids": [ "3519489" ], "name": "Yong Chen" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Data access", "Data center", "Emergence", "Hard disk drive", "Procedural reasoning system", "Pseudorandomness", "Replication (computing)", "Solid-state drive", "Solid-state electronics" ], "id": "8bb657d36766e21154863ae4bf25aece50fc4ff9", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "645-648", "journalVolume": "", "outCitations": [ "0d13346bf40457f0344915f463ad842ecbd40d79", "638c917d981915bc7a00bb0941cdd38111df51de", "780729b2fd5169c2c7a4df956a38d7df15317ca9", "039124197fac7a16e36611d8beed94524dd5fed5", "2da760f90c3d2bf6598becdde9063093f488548c", "625272057c97231155f75dc622312f5d32978d4a", "c41f931657c53ee0cec2c906dfd6e1abd1fc5e54" ], "paperAbstract": "Data replication is a key technique to achievedata availability, reliability, and optimized performance indistributed storage systems and data centers. In recent years, with the emergence of new storage devices, heterogeneousobject-based storage system, such as a storage system withthe co-existence of hard disk drives and solid state drives, have become increasingly attractive as they combine merits ofdifferent storage devices to deliver better promise. However, existing data replication schemes do not place data basedon heterogeneous device characteristics as well as consideringdistinct data access patterns. In this paper, we introduce anovel data replication scheme PRS to achieve efficient datareplication for heterogeneous storage systems. Different fromtraditional schemes, the PRS groups objects according todata access patterns and distributes replicas to heterogeneousdevices with their features. It uses a pseudo random algorithmto optimize replica layout by considering storage device performanceand capacity. The experimental results confirm thatPRS is a highly efficient replication scheme for heterogeneousstorage systems.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101201", "http://discl.cs.ttu.edu/lib/exe/fetch.php?media=wiki:papers:pattern-directed_replication_scheme_for_heterogeneous_object-based_storage.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8bb657d36766e21154863ae4bf25aece50fc4ff9", "sources": [ "DBLP" ], "title": "Pattern-Directed Replication Scheme for Heterogeneous Object-Based Storage", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "8bb9df23104df66000c6fcdb2608e24b1e22d7fa": { "authors": [ { "ids": [ "1727505" ], "name": "Bin Gu" }, { "ids": [ "2617763" ], "name": "Guodong Liu" }, { "ids": [ "1748032" ], "name": "Heng Huang" } ], "doi": "10.1145/3097983.3098010", "doiUrl": "https://doi.org/10.1145/3097983.3098010", "entities": [ "Algorithm", "Benchmark (computing)", "Cluster analysis", "Clustering high-dimensional data", "Data mining", "Feature selection", "Matrix regularization", "Sparse matrix" ], "id": "8bb9df23104df66000c6fcdb2608e24b1e22d7fa", "inCitations": [ "b0846ae67df104b7ffea8cff8a5ab8f7d55de245" ], "journalName": "", "journalPages": "185-193", "journalVolume": "", "outCitations": [ "2cc6ce2ef79a293fb19f75fa524e163d720c732b", "0910c5dba9fc02ae9c2912e38a66e2648c63a17a", "ed89f5d136d299e46b89f7f18c9c10ba7c5335db", "a911e90fd4c4999f7944ffeff3cffca18d1bf05c", "9a5eaf528d75e31acf31a52279c49821dbb1b3d2", "49e60d3c7e2932db0f90433e4299ab170b9b007b", "5d6aa71ab6c5b4e7cc6538b15dc34e41aea68c23", "72af45375b1a0e29e9b635e4334e9b9ba9d15802", "a9fc0062c33c0f5e678276b6d6872152b2979c6b", "1d89516427c0d91653b70171a0e8998af9d5960b", "223841a71f5bce4cb03040e229d13e9a71b78ec3", "46217f372a75dddc2254fdbc6b9418ba3554e453", "055538aed1d84b1217b7d084adf6a3be6c5cebda", "9ee15b9fdf291591208ecc109dc5dfff678cfc19", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "7bf216e7b5449f91f3e3ddfd3c7b44f88ae96b19", "47103a6460ddd27c5f502b378501308d31d3c4e8", "246737333a8e2e42b92be7a79f9508700b64c290", "4feb07b897d85105f3a58aaadc6c485a5aef9566", "472c3d480f6ea65e56f0e219bf3a8b4fde93b838", "9c6385f12fb0acf1d5912d137d6f405584672411", "696fb67fa8bcdaa2a6af710a2d334ab496d98086", "333bcb212334b3c77e9347b3cdeeb935936e130b" ], "paperAbstract": "Feature selection is one of the most important data mining research topics with many applications. In practical problems, features often have group structure to effect the outcomes. Thus, it is crucial to automatically identify homogenous groups of features for high-dimensional data analysis. Octagonal shrinkage and clustering algorithm for regression (OSCAR) is an important sparse regression approach with automatic feature grouping and selection by ℓ1 norm and pairwise ℓ norm. However, due to over-complex representation of the penalty (especially the pairwise ℓ norm), so far OSCAR has no solution path algorithm which is mostly useful for tuning the model. To address this challenge, in this paper, we propose a groups-keeping solution path algorithm to solve the OSCAR model (OscarGKPath). Given a set of homogenous groups of features and an accuracy bound ε, OscarGKPath can fit the solutions in an interval of regularization parameters while keeping the feature groups. The entire solution path can be obtained by combining multiple such intervals. We prove that all solutions in the solution path produced by OscarGKPath can strictly satisfy the given accuracy bound ε. The experimental results on benchmark datasets not only confirm the effectiveness of our OscarGKPath algorithm, but also show the superiority of our OscarGKPath in cross validation compared with the existing batch algorithm.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098010" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8bb9df23104df66000c6fcdb2608e24b1e22d7fa", "sources": [ "DBLP" ], "title": "Groups-Keeping Solution Path Algorithm for Sparse Regression with Automatic Feature Grouping", "venue": "KDD", "year": 2017 }, "8bbba8c51e79b4ec86d95141a24b6c9a3c6eac6b": { "authors": [ { "ids": [ "31610797" ], "name": "Timothy Zhu" }, { "ids": [ "2366265" ], "name": "Michael A. Kozuch" }, { "ids": [ "1738043" ], "name": "Mor Harchol-Balter" } ], "doi": "10.1145/3127479.3132245", "doiUrl": "https://doi.org/10.1145/3127479.3132245", "entities": [ "Data center", "Experiment", "Hoc (programming language)", "Network congestion", "Server (computing)", "Service-level agreement", "Tracing (software)" ], "id": "8bbba8c51e79b4ec86d95141a24b6c9a3c6eac6b", "inCitations": [], "journalName": "", "journalPages": "598-610", "journalVolume": "", "outCitations": [ "0831a5baf38c9b3d43c755319a602b15fc01c52d", "7b6e453e08717cfdcb66349ac184996e43ed85b3", "06db78ece7ba41bccab5df77240541e32cffd623", "0c34e00dcd7f15126110b9d430306157a0aae769", "3ec219ec5f6a1fb2e02fa657d34314c2c48d6f15", "ebd241b481a90b52cef6463211b76ba1dc46c44a", "b08af27a9b4c1e8732681918229cd6e4a525c68e", "39300a6bb64f813bd233343b840cb169d8d0527f", "3b988049dd8f62f772281e90196bbd793700c86b", "0abe5211e209b272890ba6820a33b72e938b0b3b", "0c575d220d8bc125d6a2290984c8e2b87011631d", "7019d566d10fcdb836aa338c344de4f0ed2131b6", "118da4d571ee02b4f31b5c4c078857472e77ba1e", "47d5357957cabb610131db1b228e58b70860ee8d", "3de30c8dafc720bf066e5e3a005d16212dd31149", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "8969f883979ac45fe24cecde39c15ddc4bd756d3", "9cd87e9a36ebd7686a758c0d28ba3a494f237d60", "01dca7c7612aa71e5da87087c97a8dfffe94d43b", "65a2cb8a02795015b398856327bdccc36214cdc6", "7b420218b4e797dcc6ca96f6c6c3ec29a9688c07", "4ccbc02229c96d7208ec273e858ad43bc3b84feb", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "122229239aeba1eb4f1623adb40f1845c582a520", "057d21830cde5b3be2fdb3a74ee69a3c7e9109f8" ], "paperAbstract": "Service providers want to reduce datacenter costs by consolidating workloads onto fewer servers. At the same time, customers have performance goals, such as meeting tail latency Service Level Objectives (SLOs). Consolidating workloads while meeting tail latency goals is challenging, especially since workloads in production environments are often bursty. To limit the congestion when consolidating workloads, customers and service providers often agree upon rate limits. Ideally, rate limits are chosen to maximize the number of workloads that can be co-located while meeting each workload's SLO. In reality, neither the service provider nor customer knows how to choose rate limits. Customers end up selecting rate limits on their own in some ad hoc fashion, and service providers are left to optimize given the chosen rate limits.\n This paper describes WorkloadCompactor, a new system that uses workload traces to automatically choose rate limits simultaneously with selecting onto which server to place workloads. Our system meets customer tail latency SLOs while minimizing datacenter resource costs. Our experiments show that by optimizing the choice of rate limits, WorkloadCompactor reduces the number of required servers by 30--60% as compared to state-of-the-art approaches.", "pdfUrls": [ "http://www.pdl.cmu.edu/PDL-FTP/CloudComputing/SOCC17-zhu.pdf", "http://doi.acm.org/10.1145/3127479.3132245" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8bbba8c51e79b4ec86d95141a24b6c9a3c6eac6b", "sources": [ "DBLP" ], "title": "WorkloadCompactor: reducing datacenter cost while providing tail latency SLO guarantees", "venue": "SoCC", "year": 2017 }, "8bdf9d4c4fca5a89f8980e5b17e4025ffc462996": { "authors": [ { "ids": [ "2179606" ], "name": "Christopher Haine" }, { "ids": [ "1729212" ], "name": "Olivier Aumage" }, { "ids": [ "1718247" ], "name": "Denis Barthou" } ], "doi": "10.1007/978-3-319-64203-1_19", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_19", "entities": [ "Binary file", "Rewriting" ], "id": "8bdf9d4c4fca5a89f8980e5b17e4025ffc462996", "inCitations": [], "journalName": "", "journalPages": "260-272", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8bdf9d4c4fca5a89f8980e5b17e4025ffc462996", "sources": [ "DBLP" ], "title": "Rewriting System for Profile-Guided Data Layout Transformations on Binaries", "venue": "Euro-Par", "year": 2017 }, "8be55825f346e7114b1df14b09cd9e767d7a663c": { "authors": [ { "ids": [ "2183875" ], "name": "Xiao He" }, { "ids": [ "32474239" ], "name": "Thomas Gumbsch" }, { "ids": [ "2733826" ], "name": "Damian Roqueiro" }, { "ids": [ "1704422" ], "name": "Karsten M. Borgwardt" } ], "doi": "10.1109/ICDM.2017.25", "doiUrl": "https://doi.org/10.1109/ICDM.2017.25", "entities": [ "Algorithm", "Cluster analysis", "Ground truth", "Kernel (operating system)", "Korea Computer Center", "Linear separability", "Simulation" ], "id": "8be55825f346e7114b1df14b09cd9e767d7a663c", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "157-166", "journalVolume": "", "outCitations": [ "2d37c82d7b8c338a18f356410ab60ecc9deaef7d", "7abd4a611ce093f6089d36406e2a0fa774048a5f", "6cc33fba4ffdf8e54522a0623403ab4843e75a6a", "555c061d713293fe404f9ef6e0f7e921e552bf35", "82544d04b642931ae6233b9f37c8e8c44fc95c35", "848d1844b8b4923544ff2146cbac1655684f61fb", "5d76202ebe37936b4c3796d9cbbe7be5b60f93d0", "47f5f740e225281c02c8a2ae809be201458a854f", "1a8f0886649ebda38dd09a81a06a18ceef3c3833", "10df69205ff0e95bc0720d285165ce5d7d693805", "291393c0ef39323726c121b2260658e93e7a5b2b", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "1f319dcfd70b5d8afdd2ebf5551ae3fbdee01ff8", "286a3bd498eb680b2bee37c7bc447486d1263ca9", "3b1d68c036947f17def72a11e9f7c9f2200ae791", "5db4e06712ad4dfa8a6a9f23e8268c76cc046282", "01c7b980daabb6b8e4ea1ff4527f968ec937c044", "f6f75ba91b1bb1a5049b34a7e3e2527bf45b2fb0", "0fddb1a0317b51183f7f045432861ec5170f63db", "397306cada03c29ab9c3d5a7991a343cae92f2e3", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "12811b1225262d4c04e79f2e2fb1c51a83762574", "45b319c92c5ab92875205b41eae72a8e7cc56453", "87907b823bfbfb4e3f52aae32c66bb4691ef1eff" ], "paperAbstract": "Clustering results are often affected by covariates that are independent of the clusters one would like to discover. Traditionally, Alternative Clustering algorithms can be used to solve such a problem. However, these suffer from at least one of the following problems: i) continuous covariates or non-linearly separable clusters cannot be handled; ii) assumptions are made about the distribution of the data; iii) one or more hyper-parameters need to be set. Here we propose a novel algorithm, named Kernel Conditional Clustering (KCC), whose objective is derived from a kernel based conditional dependence measure. KCC is parameter-light and makes no assumptions about the cluster structure, the covariates, or the distribution of the data. On both simulated and real-world datasets, the proposed KCC algorithm detects the ground truth cluster structures more accurately than state-of-the-art alternative clustering methods.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8be55825f346e7114b1df14b09cd9e767d7a663c", "sources": [ "DBLP" ], "title": "Kernel Conditional Clustering", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "8bfe88931eda9652a7507573eddfcd8ca0a1ea59": { "authors": [ { "ids": [ "39071799" ], "name": "Benjamin Marks" }, { "ids": [ "2255031" ], "name": "Tia Newhall" } ], "doi": "10.1109/IPDPSW.2017.29", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.29", "entities": [ "Application checkpointing", "Auxiliary memory", "Cache (computing)", "Computer data storage", "Data-intensive computing", "Device driver", "Hard disk drive", "Linux", "Linux", "Memory-mapped I/O", "Operating system", "Peripheral", "Persistence (computer science)", "Random-access memory", "Request for proposal", "Snapshot (computer storage)", "Software modernization", "Solid-state drive" ], "id": "8bfe88931eda9652a7507573eddfcd8ca0a1ea59", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "30-41", "journalVolume": "", "outCitations": [ "225129e1c1626d7e1c2b5fd6146a90ffd137ba02", "dd360ec1e6f6b7d068989566f831db70e81a3a31", "6e468c8087da488ac437367a2498e8117398371d", "fda87c93b75633caa0f78b6e7f9384e8e775d3f1", "38a37ecb6e7a8d15d5010b8a53f313ad6a1d6442", "65ad5a20fa3458a204fa0b50fbf72445cdf1cc3d", "126b535adfb45dedc4232530aa1496eb6dfc9633", "13ecbbe73caa5ee0a14bd2fb8ee2fb30f2c6e735", "075ed574602ef2c7f128c3a4e2bf07c173d4b117", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "1ac1348938a45e1da84be8caac78e3097acaf0c1", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "47518ea0e23e10daa00819d123025526b0e98522", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "206b086db0cc1c807a9fdbf7bbc9c261a50bfd34", "173d3a3093c2629196ea8f51bc11b15296c151e9", "badeb810f55e9a4bcd30e8cdb401b2f8faeaa3b5", "4ec9fdae4d3e6b27a8ad8a3387877b71dbaf52c8", "686b4318f875216720d8d9e8216c8c6be086ac7d", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "We present Nswap2L-FS, a fast, adaptable, and heterogeneous storage system for backing file data in clusters. Nswap2L-FS particularly targets backing temporary files, such as those created by data-intensive applications for storing intermediate results. Our work addresses the problem of how to efficiently and effectively make use of heterogeneous storage devices that are increasingly common in clusters. Nswap2L-FS implements a two-layer device design. The top layer transpar- ently manages a set of bottom layer physical storage devices, which may include SSD, HDD, and its own implementation of network RAM. Nswap2L-FS appears to node operating systems as a single, fast backing storage device for file systems, hiding the complexity of heterogeneous storage management from OS subsystems. Internally, it implements adaptable and tunable policies that specify where data should be placed and whether data should be migrated from one underlying physical device to another based on resource usage and the characteristics of different devices. We present solutions to challenges that are specific to supporting backing filesystems, including how to efficiently support a wide range of I/O request sizes and balancing fast storage goals with expectations of persistence of stored file data. Nswap2L-FS defines relaxed persistence guarantees on individual file writes to achieve faster I/O accesses; less stringent persistence semantics allow it to make use of network RAM to store file data, resulting in faster file I/O to applications. Relaxed persistence guarantees are acceptable in many situations, particularly those involving short-lived data such as temporary files. Nswap2L-FS provides a persistence snapshot mechanism that can be used by applications or checkpointing systems to ensure that file data are persistent at certain points in their execution. Nswap2L-FS is implemented as a Linux block device driver that can be added as a file partition on individual cluster nodes. Experimental results show that file-intensive applications run faster when using Nswap2L-FS as backing store. Additionally, its adaptive data placement and migration policies, which make effective use of different underlying physical storage devices, result in performance exceeding that of any single device.", "pdfUrls": [ "https://doi.org/10.1109/IPDPSW.2017.29", "https://www.cs.swarthmore.edu/~newhall/hcw17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8bfe88931eda9652a7507573eddfcd8ca0a1ea59", "sources": [ "DBLP" ], "title": "Transparent Heterogeneous Backing Store for File Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "8c17cb64a2153ed38d7a2517ac6b57083e0a0eff": { "authors": [ { "ids": [ "2174285" ], "name": "Natacha Crooks" }, { "ids": [ "33960217" ], "name": "Youer Pu" }, { "ids": [ "2445753" ], "name": "Lorenzo Alvisi" }, { "ids": [ "32961575" ], "name": "Allen Clement" } ], "doi": "10.1145/3087801.3087802", "doiUrl": "https://doi.org/10.1145/3087801.3087802", "entities": [ "Black box", "Black\u2013Scholes model", "Bridging (networking)", "Isolation (database systems)", "Observable", "PODC", "Snapshot isolation" ], "id": "8c17cb64a2153ed38d7a2517ac6b57083e0a0eff", "inCitations": [ "528391ed092a671cf9f02ce0c2134bbdf8d23700", "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2", "bfb813fb8f6b2a2aea7ffe9d72fd94e829fd0313" ], "journalName": "", "journalPages": "73-82", "journalVolume": "", "outCitations": [ "aae102355133753c4b7fe673ab33f634effeffa1", "96d2a84e57ff1475394b7702473f3e8e868feb68", "08b254714a3ef30108809eb9f712c45240aa1884", "5c9793fa07fcaaae864eb89fd1c1b9f6905ec546", "592950c96053778e9b58ce4c461c1cfa8d32e334", "00ac447d02035c26c7e2852c2457fe812e89038f", "05885dbd3ccbbe744a2ee1c39126bd263140e741", "43fe3ad9ce1c3dbe4f905068ae2adc7bcb7fc9fb", "13f7c5807452ae602046582a385c0fb544ec5de1", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "ab310a105f6d5b04d798c4be0d6890ba385463c4", "5787330b0fb8d73d60e17ec462fa6022a6c62399", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "7119bb1433b8905c4dd71e86f9f9040f31604982", "eeae90e80ff72c7757ed1c3bd9e1c0f2a8f3c356", "2a42fb5ad895ed993219836b6b4ff93670c9db71", "32257d8d2b08c87e58c7b7f4b2430d58e4b51a81", "e7ab23d011e5183db78cfea48e303210f6e57e2e", "2146a0384f58500ad7c0865c8518b15bb84918a2", "344beb13d24f286da268f66acbfecc9b9da77ca5", "13c27125584651329f66461981cbb20fa63e9023", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "499c8e2ace075745a697728c1a43011875684cf2", "d8da1fae13be213d8627e6e4f3451115db7b1e03", "61011eb60b242f529f58eecaf7029524920cd6cf", "71c0dd6bd1dd57716b6797043e9f09b951c88a22", "137347d6adf1698cfb9733b6237db709e097422b", "42142c121b2dbe48d55e81c2ce198a5639645030", "6f164cc777efdf08748c96e5be185f69a8f01cd8", "7ed6b9c5a7e73b3dd5036f2dd0ff19a1a2cba45f", "22d9bd7d4d4e071ae573ac56fca7b58824c50801", "8d1c0ae7bbe138bc19abf66ca918f46b244b1f5d", "937353eebacc1ab07071280291294629dce5422a", "08b8009cc59c035bdbcb69fdd76f1a58363d3da5", "21d99ad2fecf7d82cdc632752b06d6c532ddfe21", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "35c2f7e0454adc0130c4279fce84a31701cebc67", "01667cd68d259d08807e266e07c4f8eb1c81d2af", "d5229a1f0e3111bc9feaccb018eedc647e03cf5f", "f5c63f9f0264fe9eb4fc1e10995dfc60bc09a969", "bc631e10de057f1ae6f65cb1b6f4baac1024e449", "068e59b88a1230d709d99c83a45d3a5b91260810", "35f69e58d32139a78d8e395bcd9d7efc0a61903c", "4593ae644f04d76f582dedc4cc32d2acd33c9a93" ], "paperAbstract": "This paper introduces the first state-based formalization of isolation guarantees. Our approach is premised on a simple observation: applications view storage systems as black-boxes that transition through a series of states, a subset of which are observed by applications. Defining isolation guarantees in terms of these states frees definitions from implementation-specific assumptions. It makes immediately clear what anomalies, if any, applications can expect to observe, thus bridging the gap that exists today between how isolation guarantees are defined and how they are perceived. The clarity that results from definitions based on client-observable states brings forth several benefits. First, it allows us to easily compare the guarantees of distinct, but semantically close, isolation guarantees. We find that several well-known guarantees, previously thought to be distinct, are in fact equivalent, and that many previously incomparable flavors of snapshot isolation can be organized in a clean hierarchy. Second, freeing definitions from implementation-specific artefacts can suggest more efficient implementations of the same isolation guarantee. We show how a client-centric implementation of parallel snapshot isolation can be more resilient to slowdown cascades, a common phenomenon in large-scale datacenters. ACM Reference format: Natacha Crooks, Youer Pu, Lorenzo Alvisi, and Allen Clement. 2017. Seeing is Believing: A Client-Centric Specification of Database Isolation. In Proceedings of PODC \u201917, Washington, DC, USA, July 25-27, 2017, 30 pages. http://dx.doi.org/10.1145/3087801.3087802", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087802", "http://www.cs.cornell.edu/~lorenzo/papers/Crooks17Seeing.pdf", "https://apps.cs.utexas.edu/apps/sites/default/files/tech_reports/paper_extendedtr.pdf", "http://www.cs.utexas.edu/~ncrooks/2017-podc-seeing.pdf", "http://www.cs.utexas.edu/~ncrooks/2017-podc-seeing-tr.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/bf45/a6a969c9b55f86b370e978153c3fddc162bb.pdf", "s2Url": "https://semanticscholar.org/paper/8c17cb64a2153ed38d7a2517ac6b57083e0a0eff", "sources": [ "DBLP" ], "title": "Seeing is Believing: A Client-Centric Specification of Database Isolation", "venue": "PODC", "year": 2017 }, "8c1eb7aed124413525731af39d0249ce3663588e": { "authors": [ { "ids": [ "1698586" ], "name": "Jie Zhang" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1109/IPDPS.2017.43", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.43", "entities": [ "Benchmark (computing)", "Cloud computing", "Computation", "Emergence", "GIOVE", "High availability", "Hypervisor", "InfiniBand", "LU decomposition", "Message Passing Interface", "Overhead (computing)", "Provisioning", "Single-root input/output virtualization", "Virtual machine", "X86 virtualization" ], "id": "8c1eb7aed124413525731af39d0249ce3663588e", "inCitations": [ "0351f22135e61de52250654b1f8b277cd8c7a173", "4509e808f3f4a015de41758c5724a8c029d924c8", "72e1f260a83f48eaca7428a7bc417066d83af61e", "8a702304f6964ad5abdb1c4b19e6a645738a4474" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "143-152", "journalVolume": "", "outCitations": [ "97f355e50deffa3416b34dba7f2e3ab505ac8b2d", "26b45b0df49e63d612a3a8ab3c89fcf53a343111", "0007f817593d82a859f2aa2ba1d1ee7e9199e190", "03ed30028164bd7b5215da3fb431f4402071a49f", "04e9d7b1544ec76e3e5c24b46ccae5d5096b638b", "4cd04e87d5db64fb72037c34af2b3f315b31df08", "a7298ad92e7f58242f3e43007fe12389e19b29a3", "433b9d8aff0749ce61a3601eced49ec404942dd4", "4deadb9a1abb36bbe9cf412217e242e07a2e6418", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "71dec883ad8effe7d6075512138625080d32ab47", "6678b17fc8758efea8d32c2d47f9924f8a0cdc6d", "ac8189a722de5e25a4723b74d5d5980a0abb3122", "62b757b1a924f3386c33b8a988327e3749ab8a54", "25f3dc99f25edac5787dc65db8dd210c37e294b1", "ca701980a9394f55c6bc471dda6b4765afa60527", "078a3183d87878156f172cb220f77bda0e1f9aed", "b69382e95bbf9bd9f141bbb7e0d9ab2bd8353e2b", "e565993f00296cc66805dd0aea19927d3ed1914b", "4352a6357d5759f28a945bbef2c8d5c16ea89dfe", "5f93c0524269ba2f4a2558f5b222fd68730e3bba", "0b3650864c1f4c5d7eb996e620d35239b507ccc7", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4" ], "paperAbstract": "High-speed interconnects (e.g. InfiniBand) have been widely deployed on modern HPC clusters. With the emergence of HPC in the cloud, high-speed interconnects have paved their way into the cloud with recently introduced Single Root I/O Virtualization (SR-IOV) technology, which is able to provide efficient sharing of high-speed interconnect resources and achieve near-native I/O performance. However, recent studies have shown that SR-IOV-based virtual networks prevent virtual machine migration, which is an essential virtualization capability towards high availability and resource provisioning. Although several initial solutions have been pro- posed in the literature to solve this problem, our investigations show that there are still many restrictions on these proposed approaches, such as depending on specific network adapters and/or hypervisors, which will limit the usage scope of these solutions on HPC environments. In this paper, we propose a high-performance virtual machine migration framework for MPI applications on SR-IOV enabled InfiniBand clusters. Our proposed method does not need any modification to the hypervisor and InfiniBand drivers and it can efficiently handle virtual machine (VM) migration with SR-IOV IB device. Our evaluation results indicate that the proposed design is able to not only achieve fast VM migration speed but also guarantee the high performance for MPI applications during the migration in the HPC cloud. At the application level, for NPB LU benchmark running inside VM, our proposed design is able to completely hide the migration overhead through the computation and migration overlapping. Furthermore, our proposed design shows good scaling when migrating multiple VMs.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.43" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8c1eb7aed124413525731af39d0249ce3663588e", "sources": [ "DBLP" ], "title": "High-Performance Virtual Machine Migration Framework for MPI Applications on SR-IOV Enabled InfiniBand Clusters", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8c2174bf1aad2aea23f27d7aba76814839cdbe0d": { "authors": [ { "ids": [ "31804143" ], "name": "Chenhan D. Yu" }, { "ids": [ "39902876" ], "name": "James Levitt" }, { "ids": [ "4095509" ], "name": "Severin Reiz" }, { "ids": [ "2395747" ], "name": "George Biros" } ], "doi": "10.1145/3126908.3126921", "doiUrl": "https://doi.org/10.1145/3126908.3126921", "entities": [ "Approximation algorithm", "Computation", "Fast multipole method", "Haswell (microarchitecture)", "Knights", "Low-rank approximation", "Matrix multiplication", "Pascal (microarchitecture)", "Shared memory", "Singular value decomposition", "Synchronization (computer science)", "The Matrix" ], "id": "8c2174bf1aad2aea23f27d7aba76814839cdbe0d", "inCitations": [], "journalName": "", "journalPages": "53:1-53:14", "journalVolume": "", "outCitations": [ "df3f9a7b1ffdf1030b09cad4dfba439aabfdb78b", "36b43cb996ffacc7ccdf53c7c2ec97a0321d5e98", "4a85424b802a65b1e2c48e6e6c48117191c5644c", "0f16f6f478b5c788dce466eb50e36c612273c36e", "be06046737ffef02b1b0da2f84db9a908f76c9d8", "fc8cda36a0972e7de1ac3a7bcb81dc32da79bee4", "a3b4133fb1a65f35b9b7950da9786d23fe5723b4", "2e939434c9bae1fefd9e543ae67398ef99528480", "0fd55b8a6d5c4823e53005e74277683f0a9a7f7a", "37d29194f097d4d698529bf70e0bc659a8fe3720", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "b6fff8b8ea77f157913986e7af53951d9fc1128e", "31b72979ef41e97c96da9a80a6a21f661320d107", "47934a9f73042acef86daa1f5f71459d5929bc48", "6ef0b43cf897f527540c29cae0618aabb7329072", "61aaffc396c17521e13d1fd137433d815519759c", "9466bb74a149b1bad4f1d9922e8ccb2cde2c3d3f", "8f6b90b7066a1e89f52f1a9b84b8a5e7298a9502", "34360863d08ddf5546a27def78430b2addfcb8e2", "09c5931307cba3f80d3ecc14d02eecfa46463cfe", "7dc37aafb25554b6b523551174ce7fc2774e98e2", "001bd2afb6a81840b5aa9971bfa9bb7066f6a6c2", "630b514e68c0de62fa3dca5a45e3131f1515c90c", "58251a940550cfee8229be86f1dd2c54807276c5", "2730606a9d29bb52bcc42124393460503f736d74", "4e969aebed59674f29f638bdefff446197f285d8", "69ae1f76b0c8b915589e4b541cba223a4fb465d9", "00508c963aa6b57be777255e5ca4f966a35b86b2", "6995179345701e6f690ba7703190c7bd964a27a0", "93e1dae94601e6f9fe37f4b63a0061be6d6acfe5", "d9b4d4ffa5bf9e9e376b0bdeba19d4090d653feb", "20a5ad96fac2eb585d73db495176ef25ff7f6465", "59e03eba4b9c7d848c71bbaa14bc80959e9311e7", "788394577818baa8aa395d6bdd2b96ccb648e853", "08a66903d4341996d8943facc24d75ac77d267b7" ], "paperAbstract": "We present GOFMM (geometry-oblivious FMM), a novel method that creates a hierarchical low-rank approximation, or \"compression,\" of an arbitrary dense symmetric positive definite (SPD) matrix. For many applications, GOFMM enables an approximate matrix-vector multiplication in N log N or even N time, where N is the matrix size. Compression requires N log N storage and work. In general, our scheme belongs to the family of hierarchical matrix approximation methods. In particular, it generalizes the fast multipole method (FMM) to a purely algebraic setting by only requiring the ability to sample matrix entries. Neither geometric information (i.e., point coordinates) nor knowledge of how the matrix entries have been generated is required, thus the term \"geometry-oblivious.\" Also, we introduce a shared-memory parallel scheme for hierarchical matrix computations that reduces synchronization barriers. We present results on the Intel Knights Landing and Haswell architectures, and on the NVIDIA Pascal architecture for a variety of matrices.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126921", "http://arxiv.org/abs/1707.00164", "https://arxiv.org/pdf/1707.00164v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8c2174bf1aad2aea23f27d7aba76814839cdbe0d", "sources": [ "DBLP" ], "title": "Geometry-oblivious FMM for compressing dense SPD matrices", "venue": "SC", "year": 2017 }, "8c398b9e760faafc45c78122ae23d07c60985ea0": { "authors": [ { "ids": [ "38590383" ], "name": "Lei Yu" }, { "ids": [ "1685610" ], "name": "Ling Liu" }, { "ids": [ "1682055" ], "name": "Calton Pu" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Adversary model", "Ciphertext indistinguishability", "Computation", "Differential privacy", "Experiment", "Geo warping", "Location inference", "Mobile device", "Personalization", "Privacy", "Requirement", "Smart device", "Two-phase commit protocol" ], "id": "8c398b9e760faafc45c78122ae23d07c60985ea0", "inCitations": [ "80885aba8ece12b094878e03087a60071ba1f9b7", "8297ab6128366e95b400d51ccf58e4965ac9707d", "c3e0a0b5872781aea4e913ede8b3511c8c368633" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "54234446dd8991441336600931bac9b99882a526", "2d474083f3c9e8dab83b9f6d572743dd5cdd52a7", "63b88452574095639ef9a1f692eef3c1ec386b0a", "ae5e1b0941153ebc0de90b4830893618b81a7169", "32e1d2b395538bb9af876580374aa86915c659f3", "383b6f7cce3075e9f512cde2db60d6cbfe797c53", "49c392c9e5a50394c7479c7a837ebcdab7662317", "076bd9264480b622b01d0aefd6729905a1188211", "08a8c653b4f20f2b63ac6734f24fa5f5f819782a", "1065f1c73c538a8d4b017af1825967e1fab1bf52", "37bcd8bbe2cddd48f0ec152fc5ffa4fca93f3828", "566e63917526cc083b103985f96cf0c65ce7a4a5", "15cd71ac0333ade954201db6979abb39bde3d181", "04bd64577c1f66486825c4ef9132cb94bb5334b4", "17fac85921a6538161b30665f55991f7c7e0f940", "c6cc3d48168ab50313d477164170526d28136020", "461ea2ebc14861c0b9c6c07ba61eaad93cc65980", "24ccdcba118ff9a72de4840efb848c7c852ef247", "84834a49ee6cd83806a9e64fcd26b6d6fe8da4bc", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "01a29e319e2afa2d29cab62ef1f492a953e8ca70", "5bf1f8c2fa288784956715ee1fbffad57c16fdb6", "1450346361a5d16a8b4fa19ef52770518dcb81d7" ], "paperAbstract": "Location privacy continues to attract significant attentions in recent years, fueled by the rapid growth of locationbased services (LBSs) and smart mobile devices. Location obfuscation has been the dominating location privacy preserving approach, which transforms the exact location of a mobile user to a perturbed location before its public release. The notion of location privacy has evolved from user-defined location kanonymity to two statistical quantification based privacy notions: geo-indistinguishability and expected inference error. The former promotes differential location privacy but does not protect location against inference attacks of Bayesian adversary with using prior information, whereas the latter promotes the background inference resilient location privacy but does not guarantee differential location privacy with respect to geo-indistinguishability. In this paper we argue that geo-indistinguishability and expected inference error are two complementary notions for location privacy. We formally study the relationship between two privacy notions. By leveraging this relationship and a personalized error bound, we can effectively combine the two privacy notions. We develop PIVE, a two-phase dynamic differential location privacy framework. In Phase I, we take into account the user-defined inference error threshold and the prior knowledge about the user\u2019s location to determine a subset of locations as the protection location set for protecting the actual location by increasing adversary\u2019s expected location inference error. In Phase II, we generate pseudo-locations (i.e., perturbed locations) in the way that achieves differential privacy over the protection location set. This two-phase location obfuscation is constructed dynamically by leveraging the relationship between two privacy notions based on adversary\u2019s current prior information and user-specific privacy requirements on different locations and at different times. Experiments with real-world datasets demonstrate that our PIVE approach effectively guarantees the two privacy notions simultaneously and outperforms the existing mechanisms in terms of adaptive privacy protection in presence of skewed locations and computation efficiency.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/ling_ndss_locationprivacy_ndss_2017_v0.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/dynamic-differential-location-privacy-personalized-error-bounds/", "https://www.internetsociety.org/sites/default/files/ndss2017_06A-2_Yu_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8c39/8b9e760faafc45c78122ae23d07c60985ea0.pdf", "s2Url": "https://semanticscholar.org/paper/8c398b9e760faafc45c78122ae23d07c60985ea0", "sources": [ "DBLP" ], "title": "Dynamic Differential Location Privacy with Personalized Error Bounds", "venue": "NDSS", "year": 2017 }, "8c409b668f76f720d2b4efb11129c161b2dc46cf": { "authors": [ { "ids": [ "1720147" ], "name": "Marco Brambilla" }, { "ids": [ "1705375" ], "name": "Jordi Cabot" }, { "ids": [ "39443934" ], "name": "Javier Luis C\u00e1novas Izquierdo" }, { "ids": [ "34544046" ], "name": "Andrea Mauri" } ], "doi": "10.1145/3136014.3136033", "doiUrl": "https://doi.org/10.1145/3136014.3136033", "entities": [ "Crowdsourcing", "Curve fitting", "Directory Services Markup Language", "Domain-specific language", "Domain-specific modeling", "Modeling language", "Programming paradigm" ], "id": "8c409b668f76f720d2b4efb11129c161b2dc46cf", "inCitations": [ "d3c7378a568b81c45759698a0548080dbd7c656b" ], "journalName": "", "journalPages": "129-138", "journalVolume": "", "outCitations": [ "5eb61b4544780757235a10bc384d72b2ebb5270c", "87d95dc96c90e1e371ffc484ac6bfa83a3be75b5", "0be1ab2398a7cdf1b8d2d4b496f4efd898e89e74", "024b6bc78fb1f97e3056e3873cc47043780e220a", "395d93ea7d00cb3655f3346afadef98ff38d636f", "a82a10b8c59f330910bdd0e0391c46e2eb89d8d6", "0e37876d4c7b60c2c2c748f723d304af72a3de03", "fbc60c004d84d6ee55aaa02f34a823da4f898b2b", "042a1d7e76d8a24851451099c32ede833e540572", "f034e925e992f7cc080086087a95305a18b9745b", "77ed3dce60ec3b67ccdbdf099b3c90b2fe003223", "b043388c5f630687047c767bc02bf1716f508b0d", "0afde1f62b74f13998b59b6ea9a27aa95863331a", "dda8fe1f73559b7c17cfddc084f3271fb9d6f5e1", "0a5543ee4d4fec9b7da1773399347d3c5519fabe", "59970eba7298666165c54ba678d5900a9ecfc2d6", "385cf0d0b528510e09ccfc2bb7670d335cee61e5", "6522603ed4648394cc33246ff3e46d0dc0bf4708", "421599bc84af4f5779c21a613aa93013751255b2" ], "paperAbstract": "Crowdsourcing has emerged as a novel paradigm where humans are employed to perform computational tasks. In the context of Domain-Specific Modeling Language (DSML) development, where the involvement of end-users is crucial to assure that the resulting language satisfies their needs, crowdsourcing tasks could be defined to assist in the language definition process. By relying on the crowd, it is possible to show an early version of the language to a wider spectrum of users, thus increasing the validation scope and eventually promoting its acceptance and adoption. We propose a systematic method for creating crowdsourcing campaigns aimed at refining the graphical notation of DSMLs. The method defines a set of steps to identify, create and order the questions for the crowd. As a result, developers are provided with a set of notation choices that best fit end-users' needs. We also report on an experiment validating the approach.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136033" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8c409b668f76f720d2b4efb11129c161b2dc46cf", "sources": [ "DBLP" ], "title": "Better call the crowd: using crowdsourcing to shape the notation of domain-specific languages", "venue": "SLE", "year": 2017 }, "8c781998307778cb598d12487a0063ed6b933ec4": { "authors": [ { "ids": [ "2381030" ], "name": "Oded Padon" }, { "ids": [ "2738261" ], "name": "Giuliano Losa" }, { "ids": [ "1702872" ], "name": "Shmuel Sagiv" }, { "ids": [ "1746745" ], "name": "Sharon Shoham" } ], "doi": "10.1145/3140568", "doiUrl": "https://doi.org/10.1145/3140568", "entities": [ "Bernays\u2013Sch\u00f6nfinkel class", "Communications protocol", "EPR paradox", "Fast Lane (E-ZPass)", "Finite model property", "First-order logic", "Formal verification", "Invariant (computer science)", "Paxos (computer science)", "Propositional calculus", "Software bug", "Software verification and validation", "Undecidable problem", "Universal quantification", "Verification and validation" ], "id": "8c781998307778cb598d12487a0063ed6b933ec4", "inCitations": [ "2ebf38ec360bc0a6e67c6ac20e0d779d3f99ac7a", "2525e086a6838cdd7ce33fe134507a41b47dd0bd", "e8467daba6c10cf19794092b518be121d741232e", "1c188301588c9e77f33c3879d1c82ca62f33ab50", "882401bd454cd322380497621d577826599109ce", "22319101f4599b00e93c94e9b5adb2a7d825ece4", "8ec98549facb9e8dc1516da5a26a7d6878a3979c" ], "journalName": "PACMPL", "journalPages": "108:1-108:31", "journalVolume": "1", "outCitations": [ "16a455aeacd14529bee92b0c197619fa2d173151", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "030a78598b86ac6a1536af140d9bd978558ae64f", "9c0d566165cb5922c983d15c96299dd33bcf6d37", "233e8fe63722202ac9da45d76df7e25eaa8ad3bf", "3b1de2ff85fa53c86fecbb33770f4598f3cba905", "0ce5504d9be22dfe444821584eac187d07e4556d", "36222f8eb2ccf21ca345e15186cea64506581543", "08b05adf14978e822ca3982610aa9e25eb9d028a", "d12d1289d2384c2ce642f01855637b9f0519e189", "2a86811d68d9b3a5eec996bfaf032d0dfbe96e8b", "277f3ec379d9ac15eb4ae295f0928d98b23555ec", "1d0b3845e4ac36480c4c41c914200e4c14a145ff", "7b4ee87e5fa2b9a1cbb13455f94e32d4162d1776", "010738235c0f589663ac09cf6dc45aede7f616bc", "8dbc653d8194c257dfa198b427523191b3865464", "0124dc671fb7c36efc5162230d3b59f26d2e4051", "165f3688f38f91da7bb642cf4fdeaf0fc95b0c95", "9dbf255ab9ab57e983bb0893ca58709e3fc86312", "a60d00ba42a4bed7adb3dc40cd1c32cbaffda5df", "c5c63714d0fd233b54462646e4b5c54f8519f6d8", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "ad7547782acc173be49f59246350b549ba739cfd", "02b1103e592fa6bf0499e27f1519692441fad557", "754df916a1227263fee7b58e9b4ce87263d67cc2", "4144c65e70ed4795f0c41dca0119483775219163", "00c181b8b64e824fbe0172339f1e4560b557fab5", "104119350eed6afeabfc1977281af19800791207", "36a1d747fd861604fd50c8e2251990d6ade98a3c", "a0681c1c883c200da5a913368a7928fb34ff51bf", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "2930e792d5e3e7958290d14626f0bdb502d36964", "5ae7e8272702c0ed9f0530271de685de2f6a9bf5", "616d461180ce3c2c99c7bbf5907ff872431e1c29", "5518d5853b694c865a55be5628e7205007806563", "51f16256472a334ffb4a579de5eabd371291ca59", "6c725d2a7e88515c5f7c877936f90b0184c4fe8f", "a7674ccadef1307269e96b81fb616603e4e246fe", "0ca377db38dfb9089b32ae67095788d01d376e6a", "316bfd53ade09b43b2722014b5518f35b70d6621", "578c2b170aaab7ea88ea0cca472f123287c5b6b5", "155ca30ef360d66af571eee47c7f60f300e154db", "46e7ccccebe5f3d3ba8dc06ebb0be7980994fba4", "a4040afec1edb557889e097a6bc9e721f8088698", "62f7869436b0719fa676717d6e945d48416a8bb7", "92cca3d3d1b2d9025a5ba60ad41dac79d761eecf", "8bedd1bd60713787249250e1360810395bf2d32c", "3081a06a1a99cff963ba9d68304b2362c253a4f0", "0e08139163a1bd5dcbd890942e629803134cab9f", "fe9b19732e41c2c3b0e3d2c4f2d90be2f5129708", "5acc6e0d4011d81419b81d7cd383bed48c4cb22c" ], "paperAbstract": "Distributed protocols such as Paxos play an important role in many computer systems. Therefore, a bug in a distributed protocol may have tremendous effects. Accordingly, a lot of effort has been invested in verifying such protocols. However, checking invariants of such protocols is undecidable and hard in practice, as it requires reasoning about an unbounded number of nodes and messages. Moreover, protocol actions and invariants involve both quantifier alternations and higher-order concepts such as set cardinalities and arithmetic. \nThis paper makes a step towards automatic verification of such protocols. We aim at a technique that can verify correct protocols and identify bugs in incorrect protocols. To this end, we develop a methodology for deductive verification based on effectively propositional logic (EPR)â\u0080\u0094a decidable fragment of first-order logic (also known as the Bernays-Sch\u00c3\u00b6nfinkel-Ramsey class). In addition to decidability, EPR also enjoys the finite model property, allowing to display violations as finite structures which are intuitive for users. Our methodology involves modeling protocols using general (uninterpreted) first-order logic, and then systematically transforming the model to obtain a model and an inductive invariant that are decidable to check. The steps of the transformations are also mechanically checked, ensuring the soundness of the method. We have used our methodology to verify the safety of Paxos, and several of its variants, including Multi-Paxos, Vertical Paxos, Fast Paxos, Flexible Paxos and Stoppable Paxos. To the best of our knowledge, this work is the first to verify these protocols using a decidable logic, and the first formal verification of Vertical Paxos, Fast Paxos and Stoppable Paxos.", "pdfUrls": [ "https://arxiv.org/pdf/1710.07191v1.pdf", "http://www.tau.ac.il/~sharonshoham/papers/oopsla17.pdf", "http://doi.acm.org/10.1145/3140568", "http://arxiv.org/abs/1710.07191" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8c781998307778cb598d12487a0063ed6b933ec4", "sources": [ "DBLP" ], "title": "Paxos made EPR: decidable reasoning about distributed protocols", "venue": "PACMPL", "year": 2017 }, "8c7cf9c759dcca3195dea6e27c2e25ee9a05671c": { "authors": [ { "ids": [ "1838891" ], "name": "Chenglong Wang" }, { "ids": [ "39866566" ], "name": "Alvin Cheung" }, { "ids": [ "1991345" ], "name": "Rastislav Bod\u00edk" } ], "doi": "10.1145/3062341.3062365", "doiUrl": "https://doi.org/10.1145/3062341.3062365", "entities": [ "Algorithm", "Programming by example", "Programming paradigm", "SQL", "Scalability", "Stack Overflow" ], "id": "8c7cf9c759dcca3195dea6e27c2e25ee9a05671c", "inCitations": [ "33de4502da805dd10769d2412fd04ba5ad7867f7", "94f3d2bc772f74b3742e097d8822b14db33d93b9", "791714728fefcb067fb6b56c7f4de093d536cf00", "69f53559815f4645ec3e358b40e611ad3ad36a8c", "1699209b90d425ce0840c13e871242846a13a944", "debd9e33f34b367008357c91c2c70cb85cfc532c", "6b89be108313f5650cf58b23ade0d5f312b37190", "984766d01b3427168167785584f5cd91c8cd8ac2", "47f65c165f7ccedd4c18189d4690eec5369dd9c5" ], "journalName": "", "journalPages": "452-466", "journalVolume": "", "outCitations": [ "3457e1ec12f37e2a3655d617ab7914ccb0a80801", "4acb56cfcb1e346fe6f87e5ea2583e33945f38cf", "24d9ff1f55620f22e188b1650859088190975bf9", "379b242fcc606c2a43278630a97430f750654896", "12f37f340f4762c9edc9ee36d2b910be42889983", "4015eafeea8ec3a900984230ebb39688d417de2d", "11256a3695e1313bc0989935a94ee80342e25cd1", "7149d00b10c8865a455d151595dd82a4880e3303", "334f4b4540eff561957e86aa55d150094782d93c", "42414b70fc61def8adcf5c159604c72e4508e9c1", "35a8bd9f56806f203b7fa47831bb3dde174a06e7", "020e287d79d0d96abc5026b9af4a4f8820fc0b1d", "1ca07e7040660d6194cac1c1d8d2200953c9a7a8", "19096e10c13d16dc0afd5bedcd80cb3afc4b671d", "3e68d730b678eb4994e46fb5b4edeaa2c5740ad8", "cb5ac87043a4925eaee2e432993d12586b62994b", "f3318491a55590e00dfe45d68708f515822e343a", "e3809570d6c2483b57b3bce921ec410938e67e18", "1749fc7daf90f1cd7a299117ea6a6db19c4cb514", "29c985f36e30085df0b959e7e792a3af1c8d6556", "26b9001cce4a7f2e838ef99d0e7593b18553a7e0", "079866b2ed52cc0a27b2ac96b1489dfd3e7b40df", "00c08861cfb438d5ff209dfadc2d839641cd3ca9", "208e7934d900055b43b8b60e4a807ac00674ec4a", "67d18339ed72b7fc2152cb42b63362b570c11946", "15e3d493cffef853e478d188245a2b6eff4bd6c6", "93048dc9441985260fdebaf3a9d2654696e98f87", "0af1c24e00dbf342517df2f50698502e3d793ea8", "18b8ef71bc01b8658b4ef2c8b9a9e4e6e5c2a07b", "121f7e8a3c80cab6bf48d12e2c914d2c7a301890", "1e21bac710633f7f57e38188ce3a56d37d9cd5b2", "0fa8a4cbb7cacfe161280e5b6a1f780929ddc743", "1d63a9e3751293eda942b0db2891919b3b92996c", "02ea2faa6190bc14f4244386996054ef11b0d89c", "235b9c8f10461a95398e169ecb91cf3e223d3350", "0b5c867bc6201b0b2af7cd42dadaa18a52c404a2", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "0fed8c28b74eaca6cb2fb4233ce11955acf1ba8c" ], "paperAbstract": "SQL is the de facto language for manipulating relational data. Though powerful, many users find it difficult to write SQL queries due to highly expressive constructs. \n While using the programming-by-example paradigm to help users write SQL queries is an attractive proposition, as evidenced by online help forums such as Stack Overflow, developing techniques for synthesizing SQL queries from given input-output (I/O) examples has been difficult, due to the large space of SQL queries as a result of its rich set of operators. \n \n In this paper, we present a new scalable and efficient algorithm for synthesizing SQL queries based on I/O examples. The key innovation of our algorithm is development of a language for abstract queries, i.e., queries with uninstantiated operators, that can be used to express a large space of SQL queries efficiently. Using abstract queries to represent the search space nicely decomposes the synthesis problem into two tasks: 1) searching for abstract queries that can potentially satisfy the given I/O examples, and 2) instantiating the found abstract queries and ranking the results. \n \n We have implemented this algorithm in a new tool called Scythe and evaluated it using 193 benchmarks collected from Stack Overflow. Our evaluation shows that Scythe can efficiently solve 74% of the benchmarks, most in just a few seconds, and the queries range from simple ones involving a single selection to complex queries with 6 nested subqueires.", "pdfUrls": [ "https://scythe.cs.washington.edu/media/scythe-pldi.pdf", "http://doi.acm.org/10.1145/3062341.3062365" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8c7cf9c759dcca3195dea6e27c2e25ee9a05671c", "sources": [ "DBLP" ], "title": "Synthesizing highly expressive SQL queries from input-output examples", "venue": "PLDI", "year": 2017 }, "8cb66bdb8e523c4a047e206571f535bb3268b1d1": { "authors": [ { "ids": [ "1731715" ], "name": "Olivier Beaumont" }, { "ids": [ "2380259" ], "name": "Lionel Eyraud-Dubois" }, { "ids": [ "39703729" ], "name": "Suraj Kumar" } ], "doi": "10.1109/IPDPS.2017.71", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.71", "entities": [ "Algorithm", "Approximation", "Central processing unit", "Computation", "Graphics processing unit", "List scheduling", "Runtime system", "Scheduling (computing)" ], "id": "8cb66bdb8e523c4a047e206571f535bb3268b1d1", "inCitations": [ "e9687c7f101aab7488d40174a14210dc0bc70e60", "c2b05dad04399ee2532b490d496ba2aabab00c9c" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "768-777", "journalVolume": "", "outCitations": [ "91c8d42a946110db6ba10587c1d40d10c12661f9", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "2bdb4b4dfec4bcf6d0c26c7ba5d540221a854c00", "43b9e9e2e566a30b0b3ade63616ec923b93ff9a5", "2e7f407e359890219570e29f8e4d842c4c977c11", "ff764a8113534931e3f607cbbda60a2f3f57ad37", "a547c92c55a7774fc3416831ecbf04227e444420", "7059ceba09965f0950dbd71a825a3e78915edf11", "5bfdff20904b5a7d2893a30ca8ffa383d54cbf99", "6d208bd8d02f3a0eced5575433733f36eae8b684", "877c9c040821cefc2cbe87aa2e42c8e197bb553d", "2726565619d893410d936f789418f06ea3ca6287", "4b0b1fd123ec9c43e82bc60d7fa0b9254d60a28d", "2730606a9d29bb52bcc42124393460503f736d74", "bce56b023105e6fadf8f05e450c872a3840a34db" ], "paperAbstract": "In High Performance Computing, heterogeneity is now the normwith specialized accelerators like GPUs providing efficientcomputational power. The added complexity has led to the developmentof task-based runtime systems, which allow complex computations to beexpressed as task graphs, and rely on scheduling algorithms to performload balancing between all resources of the platforms. Developing goodscheduling algorithms, even on a single node, and analyzing them canthus have a very high impact on the performance of current HPCsystems. The special case of two types of resources (namely CPUs andGPUs) is of practical interest. HeteroPrio is such an algorithm whichhas been proposed in the context of fast multipole computations, andthen extended to general task graphs with very interesting results. Inthis paper, we provide a theoretical insight on the performance ofHeteroPrio, by proving approximation bounds compared to the optimalschedule in the case where all tasks are independent and for differentplatform sizes. Interestingly, this shows that spoliation allows toprove approximation ratios for a list scheduling algorithm on twounrelated resources, which is not possible otherwise. We also establishthat almost all our bounds are tight. Additionally, we provide anexperimental evaluation of HeteroPrio on real task graphs from denselinear algebra computation, which highlights the reasons explainingits good practical performance.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.71" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8cb66bdb8e523c4a047e206571f535bb3268b1d1", "sources": [ "DBLP" ], "title": "Approximation Proofs of a Fast and Efficient List Scheduling Algorithm for Task-Based Runtime Systems on Multicores and GPUs", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8cd63388eb68ede942d27644c7ef629e358764da": { "authors": [ { "ids": [ "1783874" ], "name": "Teng Wang" }, { "ids": [ "2216937" ], "name": "Adam Moody" }, { "ids": [ "2606966" ], "name": "Yue Zhu" }, { "ids": [ "3270933" ], "name": "Kathryn Mohror" }, { "ids": [ "2605713" ], "name": "Kento Sato" }, { "ids": [ "2219526" ], "name": "Tanzima Zerin Islam" }, { "ids": [ "1709886" ], "name": "Weikuan Yu" } ], "doi": "10.1109/IPDPS.2017.39", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.39", "entities": [ "Attribute\u2013value pair", "Buffer overflow", "Burst mode (computing)", "Cluster analysis", "Experiment", "Key-value database", "Scalability", "Value (ethics)" ], "id": "8cd63388eb68ede942d27644c7ef629e358764da", "inCitations": [ "788bdfe4dbd2228dca0f7ef48eda469af3cb1347", "d09b94ca965f63d1687cff8437da6aae7f7b7005" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1174-1183", "journalVolume": "", "outCitations": [ "7717cb7fbbf26557238c2ef847d0a48def176d0b", "478cd69ab5de77a7dc8d2419e49b14a8cac82e73", "0c3a060886185b59322bbd1296e08a63d84d2ad8", "589e89d77f689ebfc3f36bc1f76fd518ae4a237c", "4fbe8c8ace7546e3a10bfd8e151bc09a41fd3f9a", "44607270754f8521d6c4d42297aa881393f4f8e0", "483b2f4c7dbc72f7969b60cff0984f2062f02956", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "0973e45d3eeb9641d3de34d48f8d0432f1113dcf", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "0f55217987ec25afa0f815e0aa3957e669b0280e", "569de2eececd3adb7219d63eb85e4bdc63486c42", "11c136aa1136ccf6ebbb23c3b3e1fbdd8447bb00", "5d25b4a77268437aa669e272cc81b56ed184e0b6", "3288d37f1929d15a26da7a5c09a89258b5d4366c", "aeae0567deda241a5a2aeb992f41e68089e58030", "a8b429845ac951b0fe6cdb071ae862c7d305e36e", "9028fe4e30d51bfdf494a36d2b8c4bcfb10cfe8a", "1fcb1c5595b4518b0e8bab042f32605c367588c2", "3609a17555a6c6757f8ff0297fc046e6dc623a57", "00fd1e42e5c5ce6ab7fb9c7eb7952ab3ae642de5", "04b3aaf58a91557e15c8064660baa1cc5e8db14e", "721c5be47c923d9c0303a3eefd3d42a57e0add03", "2d2255446fa2c5d5e96c4635ba75ca1741c82f7e", "337d5988addf1cf6db2233ef357b19000c7b8616" ], "paperAbstract": "Distributed burst buffers are a promising storage architecture for handling I/O workloads for exascale computing. Their aggregate storage bandwidth grows linearly with system node count. However, although scientific applications can achieve scalable write bandwidth by having each process write to its node-local burst buffer, metadata challenges remain formidable, especially for files shared across many processes. This is due to the need to track and organize file segments across the distributed burst buffers in a global index. Because this global index can be accessed concurrently by thousands or more processes in a scientific application, the scalability of metadata management is a severe performance-limiting factor. In this paper, we propose MetaKV: a key-value store that provides fast and scalable metadata management for HPC metadata workloads on distributed burst buffers. MetaKV complements the functionality of an existing key-value store with specialized metadata services that efficiently handle bursty and concurrent metadata workloads: compressed storage management, supervised block clustering, and log-ring based collective message reduction. Our experiments demonstrate that MetaKV outperforms the state-of-the-art key-value stores by a significant margin. It improves put and get metadata operations by as much as 2.66× and 6.29×, respectively, and the benefits of MetaKV increase with increasing metadata workload demand.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.39" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8cd63388eb68ede942d27644c7ef629e358764da", "sources": [ "DBLP" ], "title": "MetaKV: A Key-Value Store for Metadata Management of Distributed Burst Buffers", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8cdbedc491cf77b0a57008108ea7f4c05cc5660e": { "authors": [ { "ids": [ "1737944" ], "name": "Philip A. Bernstein" }, { "ids": [ "3006292" ], "name": "Mohammad Dashti" }, { "ids": [ "32019086" ], "name": "Tim Kiefer" }, { "ids": [ "1740962" ], "name": "David Maier" } ], "doi": "", "doiUrl": "", "entities": [ "Actor model", "Application programming interface", "Cloud storage", "Fault tolerance", "In-memory database", "Programming model", "Relational database management system", "Scalability", "Server (computing)" ], "id": "8cdbedc491cf77b0a57008108ea7f4c05cc5660e", "inCitations": [ "ca5dff2607ef06fdbb19a5b0fc39965b2529e588", "529cd5bd015c52012a4e22b4f7ac8d56a1fe92de", "777db975964f08b514cd05249cb2c029da7bea02" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1a133e61010294b0cd77fa851dfeea7292e49439", "529cd5bd015c52012a4e22b4f7ac8d56a1fe92de", "aacfbf0d34bc24dc3b72e56719ec083759a072ce", "1eb4e754d9b79f0806667302801fc9a0b50f88af", "268d3a230f4cec9b9cd072fce8e9aaa5e37b9a11", "208aee805004d575035284b1c232209e9fa26b0e", "b8ab674f963a03f2664e6b8abe42a9f17975aa10" ], "paperAbstract": "Many of today\u2019s interactive server applications are implemented using actor-oriented programming frameworks. Such applications treat actors as a distributed in-memory object-oriented database. However, actor programming frameworks offer few if any database system features, leaving application developers to fend for themselves. It is challenging to add such features because the design space is different than traditional database systems. The system must be scalable to a large number of servers, it must work well with a variety of cloud storage services, and it must integrate smoothly with the actor programming model. We present the vision of an actor-oriented database. We then describe one component of such a system, to support indexed actors, focusing especially on details of the fault tolerance design. We implemented the indexing component in the Orleans actororiented programming framework and present the result of initial performance measurements.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p29-bernstein-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8cdb/edc491cf77b0a57008108ea7f4c05cc5660e.pdf", "s2Url": "https://semanticscholar.org/paper/8cdbedc491cf77b0a57008108ea7f4c05cc5660e", "sources": [ "DBLP" ], "title": "Indexing in an Actor-Oriented Database", "venue": "CIDR", "year": 2017 }, "8d22127355d1f128eb82b32aab0cfd3139b22766": { "authors": [ { "ids": [ "40380763" ], "name": "Thorsten Kurth" }, { "ids": [ "1679327" ], "name": "Jian Zhang" }, { "ids": [ "1759942" ], "name": "Nadathur Satish" }, { "ids": [ "3159503" ], "name": "Evan Racah" }, { "ids": [ "3168518" ], "name": "Ioannis Mitliagkas" }, { "ids": [ "8176660" ], "name": "Md. Mostofa Ali Patwary" }, { "ids": [ "39016464" ], "name": "Tareq M. Malas" }, { "ids": [ "1789372" ], "name": "Narayanan Sundaram" }, { "ids": [ "2154054" ], "name": "Wahid Bhimji" }, { "ids": [ "23989913" ], "name": "Mikhail Smorkalov" }, { "ids": [ "2846805" ], "name": "Jack Deslippe" }, { "ids": [ "24013958" ], "name": "Mikhail Shiryaev" }, { "ids": [ "3433231" ], "name": "Srinivas Sridharan" }, { "ids": [ "1764912" ], "name": "Prabhat" }, { "ids": [ "1719384" ], "name": "Pradeep Dubey" } ], "doi": "10.1145/3126908.3126916", "doiUrl": "https://doi.org/10.1145/3126908.3126916", "entities": [ "Deep learning", "FLOPS", "Heterogeneous Element Processor", "High- and low-level", "Manycore processor", "Statistical classification", "Supervised learning", "Terabyte", "Xeon Phi" ], "id": "8d22127355d1f128eb82b32aab0cfd3139b22766", "inCitations": [ "b8fb260da9d74b67acd6776e9fa1b55cb5756c8c", "d0556be65e8564ab8bb3e26b6a0146a62027bc40", "90423cbc33e340927c0cf040f1605b34ea04b0fd", "9e31f112d029d1d87f263db46679cc5d79cf6798", "3ad0ef33ebddc26ca3df92a3677fd30a705e1ef3" ], "journalName": "", "journalPages": "7:1-7:11", "journalVolume": "", "outCitations": [ "1121ff5cdeaa470521b8dff084ba1424dd613cc1", "1792758cc7e555ff31a8b8222ac89f409d79a06a", "2e2d56d03636f104d63914fea267231b08250984", "138d59afbfcce68e7c25dd223828b1cd97042866", "ac473f1674f14253da0e50c25b8cb86f8801a808", "3ba179bceb9692d4d21109d0b87b120195761148", "8e0eacf11a22b9705a262e908f17b1704fd21fa7", "3f1c1427b175140e7f725a155096a4e73c1b8509", "361ccdfcbeef1bf639c5528fe4d8cb5fa00453f3", "838c9137e6fd807c871c80976b4f75c8c8bfcffc", "81b7dcaef4a53daab41658a4d1e97972d04b3384", "066f55164cccf2f44c6235ebfed12dc9761f1f45", "11540131eae85b2e11d53df7f1360eeb6476e7f4", "12bf156b71ed9aacbb640d5cbef709626b560e71", "c71cd00da482cf87bdbf7e4307ec11761f308321", "f0253f122b19ff3c7f1403b97dabbe5da93c6ec8", "50889cfa110efbf011db409ffd40ded25374b16f", "64bad9c3e4d8ed38c16b0086da865ccd574e836a", "b7cf49e30355633af2db19f35189410c8515e91f", "043afbd936c95d0e33c4a391365893bd4102f1a7", "8c89bd06ab5d35060a5d296b0a3b824bc5a4ade7", "01fcae344d2edb715bcc63a40b6052c0331741bd", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "272216c1f097706721096669d85b2843c23fa77d", "6435805ebe3abd7c02fae390edad37c1a5c7c5a6", "48bb1b6e4ccd29dfb8e2461bc48fcbec4e2e7fb4", "424561d8585ff8ebce7d5d07de8dbf7aae5e7270", "0790c77c1eaf2368b55c6a0def09a43690eeb848", "03cfc5a64fdfddaa66ced60a995219721cc41da4", "26f58c28de7469dc6b6846d37953bbbe3f4fc0e9", "0122e063ca5f0f9fb9d144d44d41421503252010", "15a7e48c3ac32ba79d427dd3c0bb673ecca0777e", "12a714d01dfef0dae2a113f0da3df223d6a5289c", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "140e2320331dc06aeaefa3d1c0be6bff777f3994" ], "paperAbstract": "This paper presents the first, 15-PetaFLOP Deep Learning system for solving scientific pattern classification problems on contemporary HPC architectures. We develop supervised convolutional architectures for discriminating signals in high-energy physics data as well as semi-supervised architectures for localizing and classifying extreme weather in climate data. Our Intelcaffe-based implementation obtains ~2TFLOP/s on a single Cori Phase-II Xeon-Phi node. We use a hybrid strategy employing synchronous node-groups, while using asynchronous communication across groups. We use this strategy to scale training of a single model to ~9600 Xeon-Phi nodes; obtaining peak performance of 11.73-15.07 PFLOP/s and sustained performance of 11.41-13.27 PFLOP/s. At scale, our HEP architecture produces state-of-the-art classification accuracy on a dataset with 10M images, exceeding that achieved by selections on high-level physics-motivated features. Our semi-supervised architecture successfully extracts weather patterns in a 15TB climate dataset. Our results demonstrate that Deep Learning can be optimized and scaled effectively on many-core, HPC systems.", "pdfUrls": [ "https://arxiv.org/pdf/1708.05256v1.pdf", "http://arxiv.org/abs/1708.05256", "http://doi.acm.org/10.1145/3126908.3126916" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8d22127355d1f128eb82b32aab0cfd3139b22766", "sources": [ "DBLP" ], "title": "Deep learning at 15PF: supervised and semi-supervised classification for scientific data", "venue": "SC", "year": 2017 }, "8d2d0fae94d6c8a1f229d7bf242b003ce0f8c245": { "authors": [ { "ids": [ "1706874" ], "name": "Chao Zheng" }, { "ids": [ "1699045" ], "name": "Benjam\u00edn Tovar" }, { "ids": [ "1686699" ], "name": "Douglas Thain" } ], "doi": "", "doiUrl": "", "entities": [ "Bioinformatics", "Bioinformatics", "Distributed computing", "MapleStory", "Scheduling (computing)", "Throughput" ], "id": "8d2d0fae94d6c8a1f229d7bf242b003ce0f8c245", "inCitations": [ "6df1c4bbc69ecb79a8ec32ea0873e0c313d616e3" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "130-139", "journalVolume": "", "outCitations": [ "3a043714354fe498752b45e4cf429dbae0fb2558", "4224374796da64e17fce96033d4cd42240d80eaf", "1a231ee4857da2d9588e020f4aca4cb2d8cd2265", "dda22bbd8e22e50f9c827bf1b1f34b66be17a5eb", "466486c7459f333523ec8fccfba7e8b797506097", "332f77fd05703c1607e3b57884ad31fb1fad0104", "702ba56d3f4aa529b8b1ccec4b47a0a81130d5fa", "525b50b4ae438d89f2b088c781583bb136f8a083", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "8fc52ce413863e5b9d78f884912858cd8a1f4ad9", "0811b44051398d81c89ba579814fb5834a093b07", "3b7e2038ec22cf637df70c833d473b0f3b43713a", "006636b7c78d3ddf9d3f0fb91a588d33e789d151", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "3087a47c1fdb5ebb1b28f3562533e3cce782dd36", "4af63ed343df388b6353b6fc77c7137d27822bf4", "41c97a6b41aefc6b0e0a3c702db080fd5aeef6f5", "18e059eda8c61358b889945d57eff8ed6542dfe6", "07ea35f84c64e9d4694ea827ddb4a186b3fda7f4" ], "paperAbstract": "Workflows are a widely used abstraction for describing large scientific applications and running them on distributed systems. However, most workflow systems have been silent on the question of what execution environment each task in the workflow is expected to run in. Consequently, a workflow may run successfully in the environment it was created, but fail on other platforms due to the differences in execution environment. Container-based schedulers have recently arisen as a potential solution to this problem, adopting containers to distribute computing resources and deliver well-defined execution environments to applications. In this paper, we consider how to connect workflow system to container schedulers with minimal performance loss and higher system efficiency. As an example of current technology, we use Makeflow and Mesos. We present five design challenges, and address them by using four configurations that connecting workflow system to container scheduler from different level of the infrastructure. In order to take full advantage of the resource sharing schema of Mesos, we enable the resource monitor of Makeflow to dynamically update the task resource requirement. We explore the performance of a large bioinformatics workflow, and observe that using Makeflow, Work Queue and the Resource monitor together not only increase the transfer throughput but also achieves highest resource usage rate.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101130", "http://ccl.cse.nd.edu/research/papers/makeflow-mesos-ccgrid17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8d2d0fae94d6c8a1f229d7bf242b003ce0f8c245", "sources": [ "DBLP" ], "title": "Deploying High Throughput Scientific Workflows on Container Schedulers with Makeflow and Mesos", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "8d353e7711ee4fadcb338f491c4d850084d4a194": { "authors": [ { "ids": [ "1785299" ], "name": "Keren Censor-Hillel" }, { "ids": [ "1736596" ], "name": "Bernhard Haeupler" }, { "ids": [ "33352347" ], "name": "D. Ellis Hershkowitz" }, { "ids": [ "1719383" ], "name": "Goran Zuzic" } ], "doi": "10.1145/3087801.3087808", "doiUrl": "https://doi.org/10.1145/3087801.3087808", "entities": [ "Algorithm", "BMC Remedy Action Request System", "Best, worst and average case", "Fault (technology)", "Linear network coding", "Network model", "Routing", "Throughput" ], "id": "8d353e7711ee4fadcb338f491c4d850084d4a194", "inCitations": [ "f86260036a3dbf5e8380030e4f30fa2bd0773664", "1be1f2e57dce1380756abe4dd1cd60dbd5e7f8b1" ], "journalName": "", "journalPages": "33-42", "journalVolume": "", "outCitations": [ "2a2be1145a23eb1e707ad0ed50c5e75b61a3818a", "9e3d7e20c27c7df8625f32b0338f60dc0a64295b", "0baf45f76d12e6e0c31866e85bb1c45b9469a21f", "2f038da39880318c25ff7a80803b2072af9df6c9", "0551531dd21cd2ac36be70b3b6994adb5d81ed28", "083e8b76f4081281530364a0df992ff5c7b99290", "6ffa18f69f9d2df56d7165f85e42e46f1aabbc89", "38ac5acdf2c3a9baba6813adf584087867c71e7c", "8d802645dc1a0b52745d792ef387a4e3744400c5", "304e26cd875160d59858972a51dff391cce63dc6", "33026fa96efe550f525fc0d8bc698a184fcab721", "1ec3ef815298824efc0d2d54a6ac5c70974dcfa5", "86f665e871de8fb91d697484aa567112d732cd41", "387588025d7b09aa8245308576b31af850a93a10", "05bacc626ef59eef0763480f14b948a34d6dc65a", "0f434e0319f6d2c93642302af595e141abfc9896", "00e275f42faa4c934f6a7023c6a564f9c9772c60", "2d95db35ec42c2de1a381c327f2e9123ff5e06c9", "066907935b61c73d1e408ee01ae31a7844336353", "29a402cd8922e08072c0404a3080a0447e9a2fb2", "e4a98cbb88e61f1a7858f5e3aa31a612fe403d3f", "2312b13cb8fcf5f051f3823acfb934caa63bdf88", "60ccf9ca452c3260d024f06381a344f5617d709c", "19e0c23884da16c8c9b1834d88a10fd247a6f695", "2515154629b4827649621a28fced9454d3a6e3eb", "20cdeb6c17f087c64882193cefb229b8df991fa8", "2cb8eab690dfd468307ac248c663159499671586", "06ac167f68a2e85ff4feb8ef9e70c1cb13d998c0", "3544236ab3d4e33ef1a6bc2f9e45a07a55dbe72d", "3679e43c953deb094d78854f2bd0994a8e52a115", "01e881cc8b7c5a82b4ab7eeffd1d924c5d85bb62", "5bfe3e745ddaf29d1e1cd7f59e185182cda854ed", "dcef94ce662a2a07fb16db625d0133af50693446", "0157dc0404cb6b31a1beef7e6855980220849654" ], "paperAbstract": "The widely-studied radio network model [Chlamtac and Kutten, 1985] is a graph-based description that captures the inherent impact of collisions in wireless communication. In this model, the strong assumption is made that node v receives a message from a neighbor if and only if exactly one of its neighbors broadcasts. We relax this assumption by introducing a new noisy radio network model in which random faults occur at senders or receivers. Specifically, for a constant noise parameter p \u2208 [0, 1), either every sender has probability p of transmitting noise or every receiver of a single transmission in its neighborhood has probability p of receiving noise. We first study single-message broadcast algorithms in noisy radio networks and show that the Decay algorithm [Bar-Yehuda et al., 1992] remains robust in the noisy model while the diameter-linear algorithm of Gasieniec et al., 2007 does not. We give a modified version of the algorithm of Gasieniec et al., 2007 that is robust to sender and receiver faults, and extend both this modified algorithm and the Decay algorithm to robust multi-message broadcast algorithms, broadcasting \u03a9 ( 1 log n log log n ) and \u03a9 ( 1 log n ) messages per round, respectively. We next investigate the extent to which (network) coding improves throughput in noisy radio networks. In particular, we study the coding cap \u2013 the ratio of the throughput of coding to that of routing \u2013 in noisy radio networks. We address the previously perplexing result of Alon et al. 2014 that worst case coding throughput is no better than worst case routing throughput up to constants: we show that the worst case throughput performance of coding is, in fact, superior to that of routing \u2013 by a \u0398(log(n)) gap \u2013 provided receiver faults are introduced. However, we show that sender faults have little effect on throughput. In particular, we show that any coding or routing scheme for the noiseless setting can be transformed to be robust to sender faults with only a constant throughput overhead. These transformations imply that the results of Alon et al., 2014 carry over to noisy radio networks with sender faults as well. As a result, if sender faults are introduced then there exist topologies for which there is a \u0398(log logn) gap, but the worst case throughput across all topologies is \u0398 ( 1 log n ) for both coding and routing. \u2217Supported in part by the Israel Science Foundation (grant 1696/14) and the Binational Science Foundation (grant 2015803). \u2020Supported in part by the National Science Foundation through grants CCF-1527110 and CCF-1618280. ar X iv :1 70 5. 07 36 9v 1 [ cs .D C ] 2 1 M ay 2 01 7", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087808", "https://arxiv.org/pdf/1705.07369v1.pdf", "http://arxiv.org/abs/1705.07369" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8d35/3e7711ee4fadcb338f491c4d850084d4a194.pdf", "s2Url": "https://semanticscholar.org/paper/8d353e7711ee4fadcb338f491c4d850084d4a194", "sources": [ "DBLP" ], "title": "Broadcasting in Noisy Radio Networks", "venue": "PODC", "year": 2017 }, "8dcf0ac839e6cc0f6099416d38f387de52692c45": { "authors": [ { "ids": [ "1884304" ], "name": "Xiang Pan" }, { "ids": [ "26831180" ], "name": "Anys Bacha" }, { "ids": [ "3084320" ], "name": "Radu Teodorescu" } ], "doi": "10.1109/IPDPS.2017.109", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.109", "entities": [ "Cache coherence", "Multiprocessing", "Non-volatile memory", "Random-access memory", "Spectral leakage", "Static random-access memory", "Volatile memory" ], "id": "8dcf0ac839e6cc0f6099416d38f387de52692c45", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "265-275", "journalVolume": "", "outCitations": [ "745809d0bc52a838f3734eb936956332240bce1d", "e20690c72ca01578407133fcc30dfa364a4cd789", "40eb2f5a97298da40838388700b097f82adff167", "614bdb722ea07158b4a0ee30bf1829b8f1b5ddff", "560d19ec2fe5dd078cd5cb98cc912267054c5133", "1144956f60e04e0839dd2fc5b8031fb4a4599072", "6edb9033d4d8578ddd2663fb4d109d28e3ae1ab3", "a2f3bb40653499eeb33babacf69579b5ea9d20e1", "3364bc50921a9566d61ef8cb73baa82341725e4b", "022a7c4ad4e3df2820eb6bddaaada5710f451fa8", "204e4cef7acf5ca52cb7cdd4aab3be6403cd6120", "754e449be96f3c84d7dd93a19759f3094c8f8300", "1e341922ecec838f83505224d5cbf018f21f8b5c", "09c6f94f84784fed6d222b3d7b31977eda214d63", "58873ddb897824494ac3d5f6ba63a28f93ebf6e4", "4225402b417543e6626ce0cf9c408d5cb65b9511", "579b08fba42753f045d7e1c29f177213ca3905ed", "29321bfe08b81e47ecc814a3bd9a1a10e95fb5f9", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "352a8957005dc5519b15ed1870751ec494d66395", "25a902f0cfc6049feadf54fbccf86ff6f7508291", "1600c3ed12301b06a1107a68c2de84fb3582a918", "7cd29ed1da71593bfb79b553ba6c5ee39ccf7a7b", "bbd677f51628791eb44d64fb9744ea0e610c357b" ], "paperAbstract": "Near-threshold computing is emerging as a promising energy-efficient alternative for power-constrained environments. Unfortunately, aggressive reduction in supply voltage to the near-threshold range, albeit effective, faces a host of challenges. This includes higher relative leakage power and high error rates, particularly in dense SRAM structures such as on-chip caches. This paper presents an architecture that rethinks the cache hierarchy in near-threshold multiprocessors. Our design uses STT-RAM to implement all on-chip caches. STT-RAM has several advantages over SRAM at low voltages including low leakage, high density, and reliability. The design consolidates the private caches of near-threshold cores into shared L1 instruction/data caches organized in clusters. We find that our consolidated cache design can service more than 95% of incoming requests within a single cycle. We demonstrate that eliminating the coherence traffic associated with private caches results in a performance boost of 11%. In addition, we propose a hardware-based core management system that dynamically consolidates virtual cores into variable numbers of physical cores to increase resource efficiency. We demonstrate that this approach can save up to 33% in energy.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.109" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8dcf0ac839e6cc0f6099416d38f387de52692c45", "sources": [ "DBLP" ], "title": "Respin: Rethinking Near-Threshold Multiprocessor Design with Non-volatile Memory", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8dd1b01c68aec42dcb13224f6d9516b041a77ff0": { "authors": [ { "ids": [ "2681026" ], "name": "Hamed Omidvar" }, { "ids": [ "8904504" ], "name": "Massimo Franceschetti" } ], "doi": "10.1007/s10955-017-1942-4", "doiUrl": "https://doi.org/10.1007/s10955-017-1942-4", "entities": [ "Agent-based model", "American Cryptogram Association", "Cellular automaton", "Computer engineering", "Computer science", "GeForce 9 series", "Glauber", "Ising model", "Jolla", "Percolation", "Self-organization", "The Globe and Mail", "Theoretical computer science", "Time complexity", "With high probability" ], "id": "8dd1b01c68aec42dcb13224f6d9516b041a77ff0", "inCitations": [], "journalName": "", "journalPages": "401-410", "journalVolume": "", "outCitations": [ "9e7bdf5f90cd7d1ec55706956253d0e46a177e88", "68738053c219c28e3a1fb3e74c373c4c6c465062", "209f3dcce7c44bdaa735ecb8254ea8d1a043ae91", "3a2e0083c1a15f1eda81729b395c4add6d5a4854", "003e6db0b662f93a677785b81e5a87a8276dfe92", "89e42adbaa1a5ec94515bf5df6047ae4dc2bd6cf", "786e580bedc80f8e3083a137112f8270ca0666a1", "e24e723eae7db728531653f485a7ce73581bd2f6", "5bccc18973e5f2577a79462c4d23a82ca347a2e6", "31f259dcd04223f63973f988009fcfb85b5fde5d", "26bcdb674141c575eaefa169e0d5c21db36cc850", "069e836a4c6b0eda55e1ad41bc386540b731a983", "7134b6f6a087a8d07aebc24edf80ed6f2954e5f8", "093e76716ba8c386f468f29b4552d56b4bed1cc5", "6dd6412d7db94b586a3211dfa48f6be9db3633a8", "66dc1f957a8a8c1048974130cd242e7d5afd6bf9", "51a0c2668220f6e7f57ea0650e2b7c9c524416ce", "d44423675b4e8670043d70a9090d45a3c75bd575", "30449c3ad0f2da65c1b1174660643557cc2de931", "37722bc96c8de8a5283b447d3924eddfd944e613", "34ee11059d693642fa7a5376e96e147539a997b1", "12b15f757fe3eff230483de86fdabf6710911b8f", "00b61c59b792b0d7f1427c85fe2cf62e68cff239", "089a2a2f88d5aae1bc5139a5188f30d0f2765eaf", "923e7d7a40e2ee73678643abd84ab287adc33074", "5e989c7956749eb86027a3a684e3b8c428979bfc" ], "paperAbstract": "We consider an agent-based model in which two types of agents interact locally over a graph and have a common intolerance threshold \u03c4 for changing their types with exponentially distributed waiting times. The model is equivalent to an unperturbed Schelling model of self-organized segregation, an Asynchronous Cellular Automata (ACA) with extended Moore neighborhoods, or a zero-temperature Ising model with Glauber dynamics, and has applications in the analysis of social and biological networks, and spin glasses systems. Some rigorous results were recently obtained in the theoretical computer science literature, and this work provides several extensions. We enlarge the intolerance interval leading to the formation of large segregated regions of agents of a single type from the known size > 0 to size \u2248 0.134. Namely, we show that for 0.433 < \u03c4 < 1/2 (and by symmetry 1/2 < \u03c4 < 0.567), the expected size of the largest segregated region containing an arbitrary agent is exponential in the size of the neighborhood. We further extend the interval leading to large segregated regions to size \u2248 0.312 considering \u201calmost segregated\u201d regions, namely regions where the ratio of the number of agents of one type and the number of agents of the other type vanishes quickly as the size of the neighborhood grows. In this case, we show that for 0.344 < \u03c4 \u2264 0.433 (and by symmetry for 0.567 \u2264 \u03c4 < 0.656) the expected size of the largest almost segregated region containing an arbitrary agent is exponential in the size of the neighborhood. This behavior is reminiscent of supercritical percolation, where small clusters of empty sites can be observed within any sufficiently large region of the occupied percolation cluster. The exponential bounds that we provide also imply that complete segregation, where agents of a single type cover the whole grid, does not occur with high probability for p = 1/2 and the range of tolerance considered. * An extended abstract of this paper with most proofs and some results omitted, has appeared in the Proceeding of PODC\u201917. This work was partially supported by Army Research Office (ARO), award number W911NF-15-1-0253. \u2020 PhD student, Department of Electrical and Computer Engineering, University of California, San Diego. E-mail address: homidvar@ucsd.edu. Address: 9500 Gilman Drive, Mail Code 0018, La Jolla, CA 92093. Phone: (858) 333-2933 \u2021 Professor, Department of Electrical and Computer Engineering, University of California, San Diego. E-mail address: mfrances@ucsd.edu. Address: 9500 Gilman Drive, Mail Code 0407, La Jolla, CA 92093. Phone: (858) 822-2284 ar X iv :1 70 5. 08 58 6v 1 [ cs .S I] 2 4 M ay 2 01 7", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087826", "http://arxiv.org/abs/1705.08586", "https://arxiv.org/pdf/1705.08586v3.pdf", "https://arxiv.org/pdf/1705.08586v1.pdf", "https://arxiv.org/pdf/1705.08586v2.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b142/689fd8ae3926d8621952fbe2b1bdeea8b969.pdf", "s2Url": "https://semanticscholar.org/paper/8dd1b01c68aec42dcb13224f6d9516b041a77ff0", "sources": [ "DBLP" ], "title": "Self-organized Segregation on the Grid", "venue": "PODC", "year": 2017 }, "8df39cca6481ff0f6fa790d9129a25338042216b": { "authors": [ { "ids": [ "7820558" ], "name": "Grant Hernandez" }, { "ids": [ "3433592" ], "name": "Farhaan Fowze" }, { "ids": [ "27917663" ], "name": "Dave Tian" }, { "ids": [ "34885053" ], "name": "Tuba Yavuz" }, { "ids": [ "1784947" ], "name": "Kevin R. B. Butler" } ], "doi": "10.1145/3133956.3134050", "doiUrl": "https://doi.org/10.1145/3133956.3134050", "entities": [ "Algorithm", "Control system", "Embedded system", "Firmware", "Microcontroller", "Openness", "Symbolic execution", "USB", "Wireless USB" ], "id": "8df39cca6481ff0f6fa790d9129a25338042216b", "inCitations": [ "1961c82250cf02079c34d3f4b990ae8f81c06e15" ], "journalName": "", "journalPages": "2245-2262", "journalVolume": "", "outCitations": [ "ca918fa48aea2057051d7d461c1092b688bdae30", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "490c8a9940021512db7d56f441780a7e7668fb58", "0b5b42425deb371d8dc60ac9b090c7232702370a", "2c21f9488edfb2586327528bb59461a41363fc42", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "53e80c7164482105bd62595c05a8ad9a8579b513", "2c2a304ae470cf5e2ed3e586806679d807bd583b", "bdeaac660facf421f3e29ec2595bd14a1ed88112", "4970c315f38851e50b8fa49e13c8e68dbce143a0", "03c85aa6d213f56cbd3602d9eaf2cc72de9f9a7e", "39d846fc1dc4a43b24dbe7246f62317f08183ba0", "6d2cb21b26297616eca97eacdf5cf350cfb6302e", "8bc2ba6e97e359e5282a6c7c6eef814ea72d16a1", "61819919af096dc949ebd8eb36c41e303e26a87c", "0b6e1e4d69648afb6b0691c37f92b70c42016953", "531fa3815b674e917821c9d2de02fb756ba17598", "2b8f6813be29f884dc021e9cd2b7f06678574ee5", "1a4c7185626d0f2acebf7f05a29fa2073a2fa841", "6727b2c1d938db5f36857b7b43910ec87dd96756", "d4c99aafee6fdd8d12c030e63a929d4c424bf986", "699b07b89e0f5ab1b6c76d370d95f23d204d3211", "73f770633c70d8f4814273ed9731017b8f586698", "74e1dbfcc636eb87d06b9e5891cec632e84e6f7e", "500ed24d8c840ce493e7154f4371c0cc1c897fda", "b00672fc5ff99434bf5347418a2d2762a3bb2639", "2b1288f7ff7b66841e14b6bba1c06c87841d006c", "1f7e5e582663868ed2f6763f98066ca278177a61", "abae6da8186a864fe206b322366a65e95595e3e3", "96fefaab85d6b201f98f833c77bc8a9bc56062d6", "ad27ad437bfa9c2e9d81b6f5c0137bbcdfe2ce9f", "46b444dbd774cedb8d35ac9b94b5e3aca9a0873c", "a465ba2fdc04aa9c461ef6f93c1aeae37a8fad32", "7937387460c274bbf40cd0cedddcdf9c20c94a00", "0dd44a60b4c08175dd5909e4836b9a3b8531933d", "b642d797f87d6a5c875acb7c43e2f057daf386a9", "855960bc7a09c5f0df85206da9e9253ba1293d3d", "01777aec28f659cb65b831e3ba84d45fd808cc2e", "20582dec866b1494480492b6257d4010d6ea3113", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "2a43e20d932a0da61edcfdf5373ba4ecbc294f7d", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "a8a73b74d24249d5d8c90dd8250a7bab34442d9f", "22ce1db5efe1b56017cbb883784bd013864ca999", "6ea63d09993b9a268689790ea8d25bc36345497e", "9cba7563c2a1390a4c3283f64134ac732b465077", "8d0a150bd390ba3f9f32f7b12cda58edf436aa3f", "16a8e0646724f730eb52216f9bc1284ddb630fd6", "d47f2fc2dc9005e5480a3a36f9641d61dd8afdf4", "158ebb18074ca6c40edfde16ee729b0970f003b5", "125c5130a8cdb9f5cca09e47701d1ef9f3d731a3", "f043e3efe5f2241acd8b4cf41acec89a41edb6c8" ], "paperAbstract": "The USB protocol has become ubiquitous, supporting devices from high-powered computing devices to small embedded devices and control systems. USB's greatest feature, its openness and expandability, is also its weakness, and attacks such as BadUSB exploit the unconstrained functionality afforded to these devices as a vector for compromise. Fundamentally, it is virtually impossible to know whether a USB device is benign or malicious. This work introduces FirmUSB, a USB-specific firmware analysis framework that uses domain knowledge of the USB protocol to examine firmware images and determine the activity that they can produce. Embedded USB devices use microcontrollers that have not been well studied by the binary analysis community, and our work demonstrates how lifters into popular intermediate representations for analysis can be built, as well as the challenges of doing so. We develop targeting algorithms and use domain knowledge to speed up these processes by a factor of 7 compared to unconstrained fully symbolic execution. We also successfully find malicious activity in embedded 8051 firmwares without the use of source code. Finally, we provide insights into the challenges of symbolic analysis on embedded architectures and provide guidance on improving tools to better handle this important class of devices.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134050", "http://arxiv.org/abs/1708.09114", "https://arxiv.org/pdf/1708.09114v1.pdf", "http://www.tuba.ece.ufl.edu/firmUSB.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8df39cca6481ff0f6fa790d9129a25338042216b", "sources": [ "DBLP" ], "title": "FirmUSB: Vetting USB Device Firmware using Domain Informed Symbolic Execution", "venue": "CCS", "year": 2017 }, "8e4ff2179aacf754887bedb46b954eb804b8cee1": { "authors": [ { "ids": [ "3383674" ], "name": "Miguel Ambrona" }, { "ids": [ "1737231" ], "name": "Gilles Barthe" }, { "ids": [ "2906122" ], "name": "Romain Gay" }, { "ids": [ "32122736" ], "name": "Hoeteck Wee" } ], "doi": "10.1145/3133956.3134088", "doiUrl": "https://doi.org/10.1145/3133956.3134088", "entities": [ "Access control", "Attribute-based encryption", "Cryptographic primitive", "Cryptography", "Encryption", "Generic group model", "Master theorem" ], "id": "8e4ff2179aacf754887bedb46b954eb804b8cee1", "inCitations": [ "92880b163ad99d4063295d77cde7d571fcd5106c" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "983", "journalVolume": "2017", "outCitations": [ "567911628a5d1656027ecafcb8384e16c0b8e8cc", "15edf156d144f9b3c690f85d28b77825f1b39f9c", "2bcf79cbeafa58024ddfb990e0d76411ce34a696", "2dc6b23839d32bddba95bb6846e4ef849029d1e1", "1c86660a90f6eaba700d93730f29bfcdb925fa85", "0658394f2f6d0a4fcacdc92a33ce68c73bd4ebf3", "387fa50ce45d9fd844137b191e868f1069363d75", "8289821325781e2f0ce83cfbfc1b62c44be799ee", "3cb11014e822ba8b47c0a119206ad454e14c97a4", "5ac0a370e94222e2ae87a8b78ca3f28b90e23574", "3ea4b131c65dea824ac72269c1dbcdee09869cff", "5af56b18071e7adf0d5b9a118e05bc893aace8e3", "de71bc2b2cd41039864934a4c3999688bdf53516", "586d274e9416931fd5ea3f631a832a9336df19d4", "27f4943e11adefc5e7654cae3a8718a8d22dbe05", "5281536f3d07af0074666f48884b9d8b860dd046", "46f018df7221cdc1babbdff88c0ec642c61a98bd", "ab65a5d4342a25e9d2e0474e5d1c33a5b7abda08", "93c35767aadb444fe3de4263a88971abcdfa40c4", "2dff0f21a23f9e3b6e0c50ce3fec75de4ff00359", "bd5b0a8eafec1ccf1516e4a969ecc6e3948a1869", "4e151782d4d1b8c9401b81375ebf3f3a5feaaa9d", "8e6456de7f818f389de9657232d90b046caf5d8f", "5d9723e30c1eabd31a15a1010c1c87ab1ff13523", "16e3df67a7ffce10b66dc59d247fb30fa31ed272", "592745ac8339a0330f13baac973c1f998704ad43", "66f4416508bb55750221494923f1a62c2845af89", "684f6b6b3bd8da14f0e9f4a59e0c748b55a44120", "33015e314346674df0c4fbb2629c6e2422c08c31", "40f430cc1c394f6150adf2f6324726d811d1c72f", "c239b21487d9ecaf3b0e2a598806ab9273af55cc", "1f532d6885c11af4b02995c8d6ba0d67a4574bae", "ca86f06cc01a43a6092dc6a76ac70dfeea3226fc", "5191878146f4c564fa6d413c08325bf32b823bb3", "3e09055c5940a4656ac0f8ee85c10242ea8aece6", "35eddc46f29aa698d4edb7f558224750c78b1406", "627ccf9243443ac1ada40055e4b8b034bd8b16c7", "d40d6eaa5020db9d161ef6ee2fa980f40bf23a76", "da09bc42bbf5421b119abea92716186a1ca3f02f", "683d0e1471be57a483fe7448fc3e037fb904677e", "8ea8da6af639e0cf81ea4deca856eabbb84c0e69", "0b277244b78a172394d3cbb68cc068fb1ebbd745", "55d77b0216a3adfe6f9ebfbe465cfc8fa1004006", "4987aaf293f1715aeda9387f832e3630a79fe74b", "0aeb21de164e5c4567bfaa7f787fff8b42670429", "cec9731efd9dde5c7d3d71ba31c34c0a865be757", "5897578e7e24f2868701a6b9eae39565d05f9a26", "f6a9941d346c9fe0250746de709423278d2abbf8", "6b1fb3d4a3da850db2ebd8f95b90da3d19b9518c", "8b9fe673239166fe8702609922d780e0e847ff4b", "136a5e50af7f75aca1a28db1a337bacc2cbdde5c", "11d728f731bcbd990ac78f43dc9f17b5a1a9f594", "9d0f8c61b60fe27feb0da4bf13162ad3d024b371", "0a9ce8889505a151eea2515b7eec741a16fcee3a" ], "paperAbstract": "Attribute-based encryption (ABE) is a cryptographic primitive which supports fine-grained access control on encrypted data, making it an appealing building block for many applications. In this paper, we propose, implement, and evaluate fully automated methods for proving security of ABE in the Generic Bilinear Group Model (Boneh, Boyen, and Goh, 2005, Boyen, 2008), an idealized model which admits simpler and more efficient constructions, and can also be used to find attacks. Our method is applicable to Rational-Fraction Induced ABE, a large class of ABE that contains most of the schemes from the literature, and relies on a Master Theorem, which reduces security in the GGM to a (new) notion of symbolic security, which is amenable to automated verification using constraint-based techniques. We relate our notion of symbolic security for Rational-Fraction Induced ABE to prior notions for Pair Encodings. Finally, we present several applications, including automated proofs for new schemes.", "pdfUrls": [ "https://eprint.iacr.org/2017/983.pdf", "http://doi.acm.org/10.1145/3133956.3134088", "http://www.di.ens.fr/~rgay/ABGW17.pdf", "http://eprint.iacr.org/2017/983" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8e4ff2179aacf754887bedb46b954eb804b8cee1", "sources": [ "DBLP" ], "title": "Attribute-Based Encryption in the Generic Group Model: Automated Proofs and New Constructions", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "8e967cb870e5da7830a8d05030e0dc1389298bbf": { "authors": [ { "ids": [ "10328573" ], "name": "Kok-Kiong Yap" }, { "ids": [ "2788273" ], "name": "Murtaza Motiwala" }, { "ids": [ "35109390" ], "name": "Jeremy Rahe" }, { "ids": [ "34709352" ], "name": "Steve Padgett" }, { "ids": [ "1744203" ], "name": "Matthew J. Holliman" }, { "ids": [ "22651598" ], "name": "Gary Baldus" }, { "ids": [ "40583445" ], "name": "Marcus Hines" }, { "ids": [ "40375056" ], "name": "Taeeun Kim" }, { "ids": [ "28182410" ], "name": "Ashok Narayanan" }, { "ids": [ "1791577" ], "name": "Ankur Jain" }, { "ids": [ "40691104" ], "name": "Victor Lin" }, { "ids": [ "39996266" ], "name": "Colin Rice" }, { "ids": [ "17092819" ], "name": "Brian Rogan" }, { "ids": [ "37248999" ], "name": "Arjun Singh" }, { "ids": [ "32594711" ], "name": "Bert Tanaka" }, { "ids": [ "33790088" ], "name": "Manish Verma" }, { "ids": [ "2814532" ], "name": "Puneet Sood" }, { "ids": [ "1795809" ], "name": "Muhammad Mukarram Bin Tariq" }, { "ids": [ "3234564" ], "name": "Matt Tierney" }, { "ids": [ "22684603" ], "name": "Dzevad Trumic" }, { "ids": [ "2591679" ], "name": "Vytautas Valancius" }, { "ids": [ "40199101" ], "name": "Calvin Ying" }, { "ids": [ "1740988" ], "name": "Mahesh Kallahalla" }, { "ids": [ "34915044" ], "name": "Bikash Koley" }, { "ids": [ "1718754" ], "name": "Amin Vahdat" } ], "doi": "10.1145/3098822.3098854", "doiUrl": "https://doi.org/10.1145/3098822.3098854", "entities": [ "Internet", "Network switch", "Peering", "Router (computing)", "Routing", "Scalability", "Software-defined networking" ], "id": "8e967cb870e5da7830a8d05030e0dc1389298bbf", "inCitations": [ "2cceb34b8908fa9c89a9f4df54a2ba838a69d717", "d977c9a1b18191c924bc9e5529eb6fb25b75489d", "00fd0d48da95aa33d361ff65c9a888ab8503b8c1", "2426ec69b4b9f4a37b944b75c9942bd4eac88e8e", "4aa4861d4205ef0885d5c5d04fe80617d3c099da", "4f529a014523fc1a54c03f781d49309f847bbfea", "0e47ab1cfcacf5bbdedfff83cde0a45b0f3b3bab", "b30584ba33c8c2b1b8c54b2421538bfd1cbb32da" ], "journalName": "", "journalPages": "432-445", "journalVolume": "", "outCitations": [ "4aa4861d4205ef0885d5c5d04fe80617d3c099da", "06cc4d5643610c790a5b1bbcd063e1d0c45d377b", "3520db5efa10ca6d00449c68d87bb80f5e44aa53", "2fdfb9098803f9b7d523d9bf67dca8b53cd28cf9", "0f1a89bd89497587049eef69534cd15feb3c620b", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "7aff15f99cc4b47f0174b2e2dcba9a52c33c2caa", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "024b1d3c33196953c831ba432cb12f88f36ad40e", "55ef72fe52990f491ab939b91d75b7899a66180f", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "729f190c1ee6776bed4a0e34ac72fe0df17737ae", "0a24cbe80fe2fbdb701c43903b1b187691d8b6d6", "1b11d4b0b04e0eb061029b57e1a1c436193f13b1", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "088536c44bcbc67165dc064ba4af0bc015d58a65", "1b54f6f1d93b0a409a6c58e8445a471be9c80603", "3ff93ff1ccbfce995067a4617d73ea30115318b6", "de9c49aee2b7c7104f9065dfbafe4b478dec2967", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "6981e8f529553be35001d5d2b494e004793d5327", "00c181b8b64e824fbe0172339f1e4560b557fab5" ], "paperAbstract": "We present the design of Espresso, Google's SDN-based Internet peering edge routing infrastructure. This architecture grew out of a need to exponentially scale the Internet edge cost-effectively and to enable application-aware routing at Internet-peering scale. Espresso utilizes commodity switches and host-based routing/packet processing to implement a novel fine-grained traffic engineering capability. Overall, Espresso provides Google a scalable peering edge that is programmable, reliable, and integrated with global traffic systems. Espresso also greatly accelerated deployment of new networking features at our peering edge. Espresso has been in production for two years and serves over 22% of Google's total traffic to the Internet.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-10-2-espresso.pdf", "http://doi.acm.org/10.1145/3098822.3098854", "http://www.cs.princeton.edu/courses/archive/fall17/cos561/papers/espresso17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8e967cb870e5da7830a8d05030e0dc1389298bbf", "sources": [ "DBLP" ], "title": "Taking the Edge off with Espresso: Scale, Reliability and Programmability for Global Internet Peering", "venue": "SIGCOMM", "year": 2017 }, "8ea7ee81210afdb230f0c98314a91676269929a2": { "authors": [ { "ids": [ "1729212" ], "name": "Olivier Aumage" }, { "ids": [ "2350676" ], "name": "Julien Bigot" }, { "ids": [ "1802900" ], "name": "H\u00e9l\u00e8ne Coullon" }, { "ids": [ "37434914" ], "name": "Christian P\u00e9rez" }, { "ids": [ "40536492" ], "name": "J\u00e9r\u00f4me Richard" } ], "doi": "", "doiUrl": "", "entities": [ "Comet (programming)", "Component-based software engineering", "OpenMP", "Programming model", "Separation of concerns", "Use-case analysis" ], "id": "8ea7ee81210afdb230f0c98314a91676269929a2", "inCitations": [ "555beb3901b450c4f35849a72da11543f1c7e7a4" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "635-644", "journalVolume": "", "outCitations": [ "2f93ba2f49d1c2c0c4e668dc4955d384fca80354", "01d91fdad6d64a24a236b48347987af101e48841", "262c123c6325d7b4c2edc904d6e85d352ab266b9", "2bd338ef8751b62d23e53fbb44d67042d634da2f", "29e78a4483c3a020d1081e53ab3126c1e3ac38ed", "5ff8127a6f81633464e6ebe07b53dcc0cf970da6", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "8879c1cfefa0ad6f5c578a5f16ae7100b2717472", "336d736e780a3cef2f664046f9e529b143c5ddf5", "36316d741c31ac88604796628aea327ebe4e2bbf", "6a668dfe4fa05408a5f752201ad83e02181ed6e2", "03daf2d17337f000538d9d4727fa49d52bdb922c", "7a44468c0e8dd87a1e653541d4dfb13329526bf3", "5dea147b41ccefa69eee8c32ec0f4e830a799f3e", "2993f093b92d59334fd13ecfbddf8cfefbcfdc41", "87ff0bf1197ed7ac24338ba243836c778c67c881", "7c83ebbfa0666fd9242c5f2f44e1079e65446f58" ], "paperAbstract": "This paper studies the feasibility of efficiently combining both a software component model and a task-based model. Task based models are known to enable efficient executions on recent HPC computing nodes while component models ease the separation of concerns of application and thus improve their modularity and adaptability. This paper describes a prototype version of the COMET programming model combining concepts of task-based and component models, and a preliminary version of the COMET runtime built on top of StarPU and L2C. Evaluations of the approach have been conducted on a real-world use-case analysis of a subpart of the production application GYSELA. Results show that the approach is feasible and that it enables easy composition of independent software codes without introducing overheads. Performance results are equivalent to those obtained with a plain OpenMP based implementation.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101199" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8ea7ee81210afdb230f0c98314a91676269929a2", "sources": [ "DBLP" ], "title": "Combining Both a Component Model and a Task-Based Model for HPC Applications: A Feasibility Study on GYSELA", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "8ead750dcd97a007d104a56954d5a18700f6b187": { "authors": [ { "ids": [ "1914760" ], "name": "Kyong-Tak Cho" }, { "ids": [ "1730051" ], "name": "Kang G. Shin" } ], "doi": "10.1145/3133956.3134001", "doiUrl": "https://doi.org/10.1145/3133956.3134001", "entities": [ "Acknowledgement (data networks)", "CAN bus", "Control unit", "Electronic control unit", "Engine control unit", "Fingerprint", "Identity document forgery", "Patch (computing)", "Public key fingerprint", "Transmitter" ], "id": "8ead750dcd97a007d104a56954d5a18700f6b187", "inCitations": [ "8d5a870b783bf289694e077d6db2fe0ea8d75aa6" ], "journalName": "", "journalPages": "1109-1123", "journalVolume": "", "outCitations": [ "f00b214ac4e02f60ff455e092a66e0f153bc2cc8", "9b78cc3a68f38d86489113204a45f063b6671a74", "b96328f7bb3ef862b1b78dd85335e0bfbda24e57", "4065a5844eba1a120bef16e1c943338ee0393fcf", "a3bfe87159938a96d3f2037ff0fe10adca0d21b0", "a34d6f64bf042862c74266b7ecf907920d6392a7", "cdbb46785f9b9acf8d03f3f8aba58b201f06639f", "90c63c1285dbea2e9e8a8bfb843ecdc933e1b281", "56c3e3e4259f57b960748277eada19e3a387b31b", "007ee2559d4a2a8c661f4f5182899f03736682a7", "41ec1bc9a81de1f10d067cf4d2d8024eebd3aeeb", "7e0e23218cfad13c0efb6a506bbc44a6ddbace3a", "9104647755fb7576af94133e2737a680442f6ab1", "39153fd796d154f84d30f4ab5f19fd40253b6c06", "af544c1b1c4dd04a3362a5a4b88326861f27044b" ], "paperAbstract": "Various defense schemes --- which determine the presence of an attack on the in-vehicle network --- have recently been proposed. However, they fail to identify which Electronic Control Unit (ECU) actually mounted the attack. Clearly, pinpointing the attacker ECU is essential for fast/efficient forensic, isolation, security patch, etc. To meet this need, we propose a novel scheme, called Viden (Voltage-based attacker identification), which can identify the attacker ECU by measuring and utilizing voltages on the in-vehicle network. The first phase of Viden, called ACK learning, determines whether or not the measured voltage signals really originate from the genuine message transmitter. Viden then exploits the voltage measurements to construct and update the transmitter ECUs' voltage profiles as their fingerprints. It finally uses the voltage profiles to identify the attacker ECU. Since Viden adapts its profiles to changes inside/outside of the vehicle, it can pinpoint the attacker ECU under various conditions. Moreover, its efficiency and design-compliance with modern in-vehicle network implementations make Viden practical and easily deployable. Our extensive experimental evaluations on both a CAN bus prototype and two real vehicles have shown that Viden can accurately fingerprint ECUs based solely on voltage measurements and thus identify the attacker ECU with a low false identification rate of 0.2%.", "pdfUrls": [ "http://arxiv.org/abs/1708.08414", "https://kabru.eecs.umich.edu/wordpress/wp-content/uploads/ktcho_CCS17.pdf", "https://arxiv.org/pdf/1708.08414v1.pdf", "http://doi.acm.org/10.1145/3133956.3134001" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8ead750dcd97a007d104a56954d5a18700f6b187", "sources": [ "DBLP" ], "title": "Viden: Attacker Identification on In-Vehicle Networks", "venue": "CCS", "year": 2017 }, "8f1579e54d6c6c621dc2b84d57efc198af867b7c": { "authors": [ { "ids": [ "3543872" ], "name": "Zhenhua Li" }, { "ids": [ "33830008" ], "name": "Yuanyuan Yang" } ], "doi": "10.1109/HOTI.2017.15", "doiUrl": "https://doi.org/10.1109/HOTI.2017.15", "entities": [ "Algorithm", "Data center", "Heuristic", "Heuristic (computer science)", "Integer programming", "Linear programming", "Requirement", "Scheduling (computing)", "Simulation", "Virtual machine" ], "id": "8f1579e54d6c6c621dc2b84d57efc198af867b7c", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "73-79", "journalVolume": "", "outCitations": [ "a1894966c9c7090f9bdcff87aaa85924be40f18b", "5eb3ac3f7aa60a5b354c489ced10f42c8f381eb2", "5a682b068759efc65c0bcae8b1162a04536ac408", "7b5144c88098a183eb2f8395276b0be6196a442b", "3b8c2360ca905b19bf193493ae44ea102767f04c", "7f822adf127881926c2fab2401d6e3e381bd9c11", "299556b20c4846d5430b391550c427081b4b1abb", "21b8099f8b9b7044793daa848cc109aafe201fd7", "ba6753cb5b3ea151930e5940fbc5cdb8ef91eb98", "534ee575a6b0c37e03d1dddb92493b57e9271298", "63a061c70da9ce645de1ad803a06f1595833befb", "3369350838fbf5dd53265da784eacf721924bb2d", "30a784df832fbeed18100e8d32232cc846be770e", "f57ac7f53438b2877022125bac957fda2bb2a97b", "16ccda81a562eee4a5403dab8029c57c30fc2c19" ], "paperAbstract": "Hybrid data center networks (HDCNs), where each ToR switch is installed with a directional antenna, emerge as a candidate helping alleviate the over-subscription problem in traditional data centers. Meanwhile, as virtualization techniques develop rapidly, there is a trend that traditional network functions that are implemented in hardware will also be virtualized into virtual machines. However, how to place virtual network functions (VNFs) into data centers to meet the customer requirements in a hybrid data center network environment is a challenging problem. In this paper, we study the VNF placement in hybrid data center networks, and provide a joint VNF placement and antenna scheduling model. We further simplify it to a mixed integer programming (MIP) problem. Due to the hardness of a MIP problem, we develop a heuristic algorithm to solve it. To the best of our knowledge, this is the first work concerning VNF placement in the context of HDCNs. Our extensive simulations demonstrate the effectiveness of the proposed algorithm, which make it a very suitable and promising solution for VNF placement in HDCN environment.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f1579e54d6c6c621dc2b84d57efc198af867b7c", "sources": [ "DBLP" ], "title": "Placement of Virtual Network Functions in Hybrid Data Center Networks", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "8f2e0440ab4d1d9ebad2b7d4103b6609a93a92b9": { "authors": [ { "ids": [ "40129231" ], "name": "Peng Cheng" }, { "ids": [ "1850222" ], "name": "Hao Xin" }, { "ids": [ "37833805" ], "name": "Lei Chen" } ], "doi": "10.1145/3035918.3064008", "doiUrl": "https://doi.org/10.1145/3035918.3064008", "entities": [ "Algorithm", "Approximation algorithm", "Bilateral filter", "Experiment", "Greedy algorithm", "Knapsack problem", "NP (complexity)", "Polynomial", "Scheduling (computing)", "Synthetic data", "Time complexity", "Ultima Ratio Regum", "Value (ethics)" ], "id": "8f2e0440ab4d1d9ebad2b7d4103b6609a93a92b9", "inCitations": [], "journalName": "", "journalPages": "1197-1210", "journalVolume": "", "outCitations": [ "7d7658ad5b65f1438ba7e8217c72565fc89fa6c1", "12bcd428a4d47a9f3c775d84e9d73033740f8bd6", "5420175febf6d9d94e6d4967cf06a6a74e5b3f12", "57bc59cbb94711a7945e2d8984e4e9afd49e1849", "14836d573c94dc8db699cd3f0a08373c50d73fc2", "4d6ea75cc13599cedb03f6a606d6154a8f424074", "3b2fc7d952bd2a0dafdbf4e13617d70581e7955d", "fed4ffd31ccb68162ff52d5ad97f92dfc0e558ea", "18a7403cce25ab6351193ca43a5b1020ca8a69cf", "1146a1fb2a35880d205160ad62511194505cc3f7", "7909dc85b5d3016100bd6cba675f70de3a5e9413", "118ccb22e4ffed2259f661013c3d1d626c85db17", "e392f3143c86ec40b20b5b4a5a29cf209f2572e8", "24c48b97725d84246f6dbd39c055648a305e1df4", "a3c0a1617f640339350358745106fff590130a45", "49bfad37cb0c2d82b85fa3c4cb333767ac5c49a6", "1e58e490a84e5526258b43818e5b5e0dfb27852a", "6bf018c9ce81b17834c8573888340000322e7650", "5c6e4bba1ca3071d24bdffea78d441e0a4afb895", "78b4271480ba3b143022773b9e9b5e2342db41dc", "06501b7ea604a8b8ffff402ee492955e6892daad", "7cbb5c100298818e260bed8a5996918a612e3545", "14d73480e38599a6997a37d871353e92c71e9503", "57236a1873e831e2442ec4cc2106c692c35432bb" ], "paperAbstract": "Ridesharing enables drivers to share any empty seats in their vehicles with riders to improve the efficiency of transportation for the benefit of both drivers and riders. Different from existing studies in ridesharing that focus on minimizing the travel costs of vehicles, we consider that the satisfaction of riders (the utility values) is more important nowadays. Thus, we formulate the problem of utility-aware ridesharing on road networks (URR) with the goal of providing the optimal rider schedules for vehicles to maximize the overall utility, subject to spatial-temporal and capacity constraints. To assign a new rider to a given vehicle, we propose an efficient algorithm with a minimum increase in travel cost without reordering the existing schedule of the vehicle. We prove that the URR problem is NP-hard by reducing it from the 0-1 Knapsack problem and it is unlikely to be approximated within any constant factor in polynomial time through a reduction from the DENS k-SUBGRAPH problem. Therefore, we propose three efficient approximate algorithms, including a bilateral arrangement algorithm, an efficient greedy algorithm and a grouping-based scheduling algorithm, to assign riders to suitable vehicles with a high overall utility. Through extensive experiments, we demonstrate the efficiency and effectiveness of our URR approaches on both real and synthetic data sets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064008" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f2e0440ab4d1d9ebad2b7d4103b6609a93a92b9", "sources": [ "DBLP" ], "title": "Utility-Aware Ridesharing on Road Networks", "venue": "SIGMOD Conference", "year": 2017 }, "8f422aa51611bee4905ba431fbd7778651d3f930": { "authors": [ { "ids": [ "2572064" ], "name": "Kadir Akbudak" }, { "ids": [ "1754635" ], "name": "Hatem Ltaief" }, { "ids": [ "39625255" ], "name": "Aleksandr Mikhalev" }, { "ids": [ "1794014" ], "name": "David E. Keyes" } ], "doi": "10.1007/978-3-319-58667-0_2", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_2", "entities": [], "id": "8f422aa51611bee4905ba431fbd7778651d3f930", "inCitations": [ "8deaa1b91a446d77728ee61189ec5fc5234277c7", "434025cbbe35130e4a8efdcc9f5d53eb42156fb7" ], "journalName": "", "journalPages": "22-40", "journalVolume": "", "outCitations": [ "da683799144f0b4db8b41d0bd0e0c09414883bab", "9b87e0c65c7d517d81d6422849f927dc66b0f059", "5425e73a772c68b4be27b6886d7ea8efe2c6f38c", "d9b4d4ffa5bf9e9e376b0bdeba19d4090d653feb", "0962ac3ffefd3b3ad5b9a0d25862d95f347be82b", "ce97ce67ad3f5644601bd2bde69de62ab56be1c7", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "47309382ed0b272a47a48e0b6dc2eaca12fcbe94", "db81eb62f54ff80033c6123e7d3c524574bcc458", "0fd82e4ff009aadd492d10c889370c73308a3ca2", "952896a6656080d1a0e021733bfaa237dd53f832", "4c9e7233cc630109709d9ccf9814140fd1d28c9f", "917fd12162c12c0fd2cb6409de1dd438531c553a", "9498bb96328c3e9498772241b55031570e6577d1" ], "paperAbstract": "Covariance matrices are ubiquitous in computational science and engineering. In particular, large covariance matrices arise from multivariate spatial data sets, for instance, in climate/weather modeling applications to improve prediction using statistical methods and spatial data. One of the most time-consuming computational steps consists in calculating the Cholesky factorization of the symmetric, positive-definite covariance matrix problem. The structure of such covariance matrices is also often data-sparse, in other words, effectively of low rank, though formally dense. While not typically globally of low rank, covariance matrices in which correlation decays with distance are nearly always hierarchically of low rank. While symmetry and positive definiteness should be, and nearly always are, exploited for performance purposes, exploiting low rank character in this context is very recent, and will be a key to solving these challenging problems at large-scale dimensions. The authors design a new and flexible tile row rank Cholesky factorization and propose a high performance implementation using OpenMP task-based programming model on various leading-edge manycore architectures. Performance comparisons and memory footprint saving on up to 200K\u00d7200K covariance matrix size show a gain of more than an order of magnitude for both metrics, against state-of-the-art open-source and vendor optimized numerical libraries, while preserving the numerical accuracy fidelity of the original model. This research represents an important milestone in enabling large-scale simulations for covariance-based scientific applications.", "pdfUrls": [ "http://www.springer.com/cda/content/document/cda_downloaddocument/9783319586663-c2.pdf?SGWID=0-0-45-1606804-p180852894", "https://doi.org/10.1007/978-3-319-58667-0_2" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/8f42/2aa51611bee4905ba431fbd7778651d3f930.pdf", "s2Url": "https://semanticscholar.org/paper/8f422aa51611bee4905ba431fbd7778651d3f930", "sources": [ "DBLP" ], "title": "Tile Low Rank Cholesky Factorization for Climate/Weather Modeling Applications on Manycore Architectures", "venue": "ISC", "year": 2017 }, "8f49ae0e014262cc5fb60a06e8e2bab0651100d4": { "authors": [ { "ids": [ "1998879" ], "name": "Daniel S. Berger" }, { "ids": [ "1719320" ], "name": "Ramesh K. Sitaraman" }, { "ids": [ "1738043" ], "name": "Mor Harchol-Balter" } ], "doi": "", "doiUrl": "", "entities": [ "CPU cache", "Change detection and notification", "Content delivery network", "Digital distribution", "Digital video", "Distributed computing", "Hidden Markov model", "Hill climbing", "Hit (Internet)", "Server (computing)" ], "id": "8f49ae0e014262cc5fb60a06e8e2bab0651100d4", "inCitations": [ "076538d1114ea07d2ed373bce513ec566aed7d5c", "e9a1f0bb78f2746b18cec0514805ed91f23ccc10", "237d67f89d957068b83b3b3930ab7bf13b8d0e4f", "0357d8655c6788bf0041b8446c038a808c209338", "157295da1d2e67b8286bff1ff03ae0dd248b7a17", "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6", "0f35d1156d2667232855578b50b8fb02ea5bbf51", "e593411ab69421d57e7fb69d53662e3c78340512" ], "journalName": "", "journalPages": "483-498", "journalVolume": "", "outCitations": [ "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "01a326535590a1c4c19367cc1b5e6ed827330ab8", "0da3e7bf3125534a7fe08c1d630b3cf32259c5fa", "301f660f8cb2336829aef07dd534a51e22f22017", "fe416dfe577e6d77bd5c9c6af514e665f90af94a", "1c7d0f188a8033d8a14ab3ae30662f7e85fa65b6", "13a4ac8f6000812c712702c73683ffc1e3e7c577", "f220a5165d73c8829d4d852f294c2894ac290162", "19c3fcffda8e6e5870b3a533c483bca024501ab5", "1d1d26cdf8f923a1114aaf120ae2947170207142", "fe20c9537ea0be2f2a79388a53e1f2fb4b7fe771", "060d964a36ffa603c4f21f76188b4845835086eb", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "a20097a100edc22dcf685605dd935f4ae8234ae3", "a1edd2178bb0bf2061aab69d5e866f928b6e0292", "bde2b660b1fa21da4d483b2ac1ba169f0a57f21b", "73ba74bfbdd56c291f8619019350ebfdb8416c5e", "532787be1a70aebc1c0b975e31a81c538927f0af", "bb5e43dd30a3a60df42652d56781568f5cd0a99d", "1d5d0550c1def1293a5a965db6de866156e6e359", "4bc9cb4e174ac934cba417400f61df28b07753f2", "15a1e04faa63dae8e8cef5d8cb1478ea2655d124", "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "3523903cefcf1f06798e6ea398870fc6bfe23924", "13c1a3b9ae98870859e49596102318019174073a", "0c577868739d3938aeb8dd9cbb679603026b8ad1", "32f6ded4e88667f34fe49a0ee80d9a9093b00547", "0b6e205fbbd179291c3ed198db6f403ca74624a1", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "dfbaf25ce830248b52cc504b690086fa88971525", "0e2303bc719e965bc09cab0d486bb3553cdd2d21", "0fd4a1b1b92a65b70fad60ad6e95ed54e8f6e86a", "82248b6188d24e884820161fcc37f6ce4d3fbbbe", "26cc13fd4b68aabd7181111dce93b3249ce570c4", "4cead48e2eac91560105871b78268e3164eb382b", "813007c4ca412b1b3d8eba4aad66e53ce3cd77a8", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "0720cfa5330462593b20ea0bbb7d8b5862a6b730", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "0a40663fdcf7c5fb7cfc459693116c41309e7eca", "0552f48a63628c52c0f34e61067b77a87bc7379b", "0d39a7260dbe21cf6498187824fa0b1de92c1a47", "3d5b229981595a5864270d9336e941e0ae374fec", "029ef1c1c1646e5650260d99e0b837d4e524d559", "1594118f2696b573f08510cf837f3b37db87face", "ad74b4fd7c3dfeef100d9497a8a43c201fb05077", "95676cb40e90e80cd09d25836da0f69b164f27df", "1fc74811ceb2bb88aa852ecaa3484155ce7d210f", "0277480cced7d09474dda4dc4f7b9381bd4c895a", "235ffbe72353aaa49d38fd973fa67cc2a15310fb", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "e60d043d8eeeefd399131f5ddfc606ef45549d50", "61d3de9ef68f4878d1a3ba48e278d067c3834d58", "39838d42b98b2c7b65a440cc6a8c50caecdd7855", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "17567c8bcf41cc189b4f8e5f9c8cd666bb4d3da9", "e5ff40f9b7f7ea5d047cd27cc889cbeb1790cb53", "c5d954d13c1c620d78ebaba9afa120733e90ed09", "1ecb0e58cb0e5331ee3e0082673a2a981284397e" ], "paperAbstract": "Most major content providers use content delivery networks (CDNs) to serve web and video content to their users. A CDN is a large distributed system of servers that caches and delivers content to users. The first-level cache in a CDN server is the memory-resident Hot Object Cache (HOC). A major goal of a CDN is to maximize the object hit ratio (OHR) of its HOCs. But, the small size of the HOC, the huge variance in the requested object sizes, and the diversity of request patterns make this goal challenging. We propose AdaptSize, the first adaptive, size-aware cache admission policy for HOCs that achieves a high OHR, even when object size distributions and request characteristics vary significantly over time. At the core of AdaptSize is a novel Markov cache model that seamlessly adapts the caching parameters to the changing request patterns. Using request traces from one of the largest CDNs in the world, we show that our implementation of AdaptSize achieves significantly higher OHR than widelyused production systems: 30-48% and 47-91% higher OHR than Nginx and Varnish, respectively. AdaptSize also achieves 33-46% higher OHR than state-of-the-art research systems. Further, AdaptSize is more robust to changing request patterns than the traditional tuning approach of hill climbing and shadow queues studied in other contexts.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/berger", "http://www.pdl.cmu.edu/PDL-FTP/associated/nsdi17-berger.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-berger.pdf", "https://people.cs.umass.edu/~ramesh/Site/PUBLICATIONS_files/nsdi17-berger.pdf", "http://www.cs.cmu.edu/~harchol/Papers/NSDI17.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-berger.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e7bb/88d0bd83b9b81e93e987787f2366dce5f313.pdf", "s2Url": "https://semanticscholar.org/paper/8f49ae0e014262cc5fb60a06e8e2bab0651100d4", "sources": [ "DBLP" ], "title": "AdaptSize: Orchestrating the Hot Object Memory Cache in a Content Delivery Network", "venue": "NSDI", "year": 2017 }, "8f53bef92f25071acd73b18393a436e3c05e57c1": { "authors": [ { "ids": [ "1963938" ], "name": "David Chisnall" }, { "ids": [ "2730702" ], "name": "Brooks Davis" }, { "ids": [ "3036326" ], "name": "Khilan Gudka" }, { "ids": [ "9999566" ], "name": "David Brazdil" }, { "ids": [ "5233254" ], "name": "Alexandre Joannou" }, { "ids": [ "38444468" ], "name": "Jonathan Woodruff" }, { "ids": [ "1833858" ], "name": "A. Theodore Markettos" }, { "ids": [ "9990026" ], "name": "J. Edward Maste" }, { "ids": [ "2122139" ], "name": "Robert Norton" }, { "ids": [ "1893166" ], "name": "Stacey D. Son" }, { "ids": [ "1954071" ], "name": "Michael Roe" }, { "ids": [ "1926335" ], "name": "Simon W. Moore" }, { "ids": [ "34607009" ], "name": "Peter G. Neumann" }, { "ids": [ "8656151" ], "name": "Ben Laurie" }, { "ids": [ "2750619" ], "name": "Robert N. M. Watson" } ], "doi": "10.1145/3037697.3037725", "doiUrl": "https://doi.org/10.1145/3037697.3037725", "entities": [ "Computer security model", "Direct memory access", "Field-programmable gate array", "File system permissions", "High- and low-level", "Java", "Java virtual machine", "Machine code", "Memory protection", "Memory safety", "Microprocessor", "Read-only memory", "Standard library" ], "id": "8f53bef92f25071acd73b18393a436e3c05e57c1", "inCitations": [], "journalName": "", "journalPages": "569-583", "journalVolume": "", "outCitations": [ "1810f70bdcb6f50ff70bed2c165918046e6a8aef", "5fcc0219ea8ab9ab9f38eb27ce4ec31662366902", "565919855788bfcc7fbaad3006fe0f42c735b333", "d937dd67265c2ac0ba5ffa8022323d37c2344188", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "70b4132f2be9a588f86687d319a159cdcf71ad95", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "858d676cdd76fb7cea413ab0c869bbf988663350", "0719b9670c8580db76547497df39caabdc20fc32", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "a4aad71df81971ee0255ca7c21fd22e5d37e92b5", "0e76004790865d4a69b9a520f6284b79e05c5446", "449ac3e3912c25f907b962da65a9d8a715b1e507", "4c3a84729bd09db6a90a862846bb29e937ec2ced", "3e651a701c9d814616a67acb4f72532ef976a0b3", "e7d317d9ea5d16d2105a791618d21d5746577404", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "2cc08b9f07a889a7c035df438ca99d0ad8c97aa5", "c7ba68dbff13e84cbaee3b0d57e597ba2eb60c5f", "43393a561914f05be312a1dff5a757cbc384d1a1", "b114a2a66775995ebefa0059fde453bec92fc08c", "240937d8d2b34a2f8007a858016422e1cc3d1442", "3dfe89c342568a7987d5c619536623c0d8f62d39", "106c0924c389a9883dcbfdb6b3cc37e9e9b9f898", "1f333ade9620ad695556353d5a052f1c71ae297b", "4acdb61098053f38d5500a9ef974d24828696b9d", "0657eb7e069c2c2c7cae6636704e0f7fb3bcd9fc", "c6c795b96ee140c01d5acac3ae8df7727bb31d95", "847fd4428705785972bbf0d3be9575ba9a36f516" ], "paperAbstract": "Java provides security and robustness by building a high-level security model atop the foundation of memory protection. Unfortunately, any native code linked into a Java program -- including the million lines used to implement the standard library -- is able to bypass both the memory protection and the higher-level policies. We present a hardware-assisted implementation of the Java native code interface, which extends the guarantees required for Java's security model to native code.\n Our design supports safe direct access to buffers owned by the JVM, including hardware-enforced read-only access where appropriate. We also present Java language syntax to declaratively describe isolated compartments for native code.\n We show that it is possible to preserve the memory safety and isolation requirements of the Java security model in C code, allowing native code to run in the same process as Java code with the same impact on security as running equivalent Java code. Our approach has a negligible impact on performance, compared with the existing unsafe native code interface. We demonstrate a prototype implementation running on the CHERI microprocessor synthesized in FPGA.", "pdfUrls": [ "https://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201704-asplos-cherijni.pdf", "http://doi.acm.org/10.1145/3037697.3037725", "http://www.cl.cam.ac.uk/research/security/ctsrd/pdfs/201704-asplos-cherijni.pdf", "https://www.repository.cam.ac.uk/bitstream/handle/1810/264315/cheri-jni.preprint.pdf?isAllowed=y&sequence=1" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f53bef92f25071acd73b18393a436e3c05e57c1", "sources": [ "DBLP" ], "title": "CHERI JNI: Sinking the Java Security Model into the C", "venue": "ASPLOS", "year": 2017 }, "8f64b96f89b96bfadbc836660a918d05080f2553": { "authors": [ { "ids": [ "2681426" ], "name": "Brian Friesen" }, { "ids": [ "8176660" ], "name": "Md. Mostofa Ali Patwary" }, { "ids": [ "1913660" ], "name": "Brian Austin" }, { "ids": [ "1759942" ], "name": "Nadathur Satish" }, { "ids": [ "23873862" ], "name": "Zachary Slepian" }, { "ids": [ "1789372" ], "name": "Narayanan Sundaram" }, { "ids": [ "39875293" ], "name": "Deborah Bard" }, { "ids": [ "5947136" ], "name": "Daniel J. Eisenstein" }, { "ids": [ "2846805" ], "name": "Jack Deslippe" }, { "ids": [ "1719384" ], "name": "Pradeep Dubey" }, { "ids": [ "1764912" ], "name": "Prabhat" } ], "doi": "10.1145/3126908.3126927", "doiUrl": "https://doi.org/10.1145/3126908.3126927", "entities": [ "Algorithm", "Analysis of algorithms", "CPU cache", "Concurrency (computer science)", "Galaxy", "Load balancing (computing)", "Observable", "Parallel computing", "SIMD", "Xeon Phi" ], "id": "8f64b96f89b96bfadbc836660a918d05080f2553", "inCitations": [], "journalName": "", "journalPages": "20:1-20:11", "journalVolume": "", "outCitations": [ "3d162455812809333f079dd60afcf59209c7d043", "9624a222d48df6bd0f1f708223b2e7fa0fd2dfa7", "f89ea960e98d2167602a4d51361eb669f9a8ee4c", "54a14e97282913f5d842eca745c75ecf84590f69" ], "paperAbstract": "The nature of dark energy and the complete theory of gravity are two central questions currently facing cosmology. A vital tool for addressing them is the 3-point correlation function (3PCF), which probes deviations from a spatially random distribution of galaxies. However, the 3PCF's formidable computational expense has prevented its application to astronomical surveys comprising millions to billions of galaxies. We present Galactos, a high-performance implementation of a novel, O(N2) algorithm that uses a load-balanced k-d tree and spherical harmonic expansions to compute the anisotropic 3PCF. Our implementation is optimized for the Intel Xeon Phi architecture, exploiting SIMD parallelism, instruction and thread concurrency, and significant L1 and L2 cache reuse, reaching 39% of peak performance on a single node. Galactos scales to the full Cori system, achieving 9.8 PF (peak) and 5.06 PF (sustained) across 9636 nodes, making the 3PCF easily computable for all galaxies in the observable universe.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126927", "https://arxiv.org/pdf/1709.00086v1.pdf", "http://arxiv.org/abs/1709.00086" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f64b96f89b96bfadbc836660a918d05080f2553", "sources": [ "DBLP" ], "title": "Galactos: computing the anisotropic 3-point correlation function for 2 billion galaxies", "venue": "SC", "year": 2017 }, "8f6b90b7066a1e89f52f1a9b84b8a5e7298a9502": { "authors": [ { "ids": [ "31804143" ], "name": "Chenhan D. Yu" }, { "ids": [ "1769646" ], "name": "William B. March" }, { "ids": [ "2395747" ], "name": "George Biros" } ], "doi": "10.1109/IPDPS.2017.10", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.10", "entities": [ "Algorithm", "Approximation algorithm", "Gramian matrix", "Iterative method", "Kernel (operating system)", "Linear system", "Low-rank approximation", "Machine learning", "X86" ], "id": "8f6b90b7066a1e89f52f1a9b84b8a5e7298a9502", "inCitations": [ "d9c99d37370e5e712cc6d04c1679cd91993445e0", "4853301ba9cfac0e351f34f2e38075a45b95d442", "8c2174bf1aad2aea23f27d7aba76814839cdbe0d", "a48f508d94330811d76912051fb374f375107560" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "886-896", "journalVolume": "", "outCitations": [ "89ba1fccbf764bbc464796eb546338315c810570", "7d5f5ccd2cf22a836b172aa016d9350c1444a561", "ff614894b12ab932c13b000a161930413e469675", "7dc37aafb25554b6b523551174ce7fc2774e98e2", "29f141f6f8eeaca9ae8477e60d3e1c8d8ef45461", "6d73c9947d840cd84a8eee79c224add5fbbb929e", "34014a26baa56d7da05991547e52c43a10d4aff6", "4e969aebed59674f29f638bdefff446197f285d8", "0f16f6f478b5c788dce466eb50e36c612273c36e", "696152f0d39dc268c2bb7f132d133020af801143", "7cda32aeefdd3cabd76871b8ee06bd1a1ea2ba10", "61aaffc396c17521e13d1fd137433d815519759c", "28552ecf4eaedb3461edca97304b29082b02fbab", "4d00a2663b67857947bab2b304e152caf023f2b7", "509bcb8ec0e1f0376d55340d3f05364ef3fc0c52", "02be82b6567135493b9bb51573496114465c1533", "23a78128e4cd7c51573659e02ae5cdde0bc851b9", "72cff71a6026c15102d1c23d8c429ca60fe2d048", "1592fe924114866c1ac559bae33ea789930daa98", "d9b4d4ffa5bf9e9e376b0bdeba19d4090d653feb", "9466bb74a149b1bad4f1d9922e8ccb2cde2c3d3f", "59e03eba4b9c7d848c71bbaa14bc80959e9311e7", "788394577818baa8aa395d6bdd2b96ccb648e853", "355e35184d084abc712c5bfcceafc0fdfe78ceef", "0fd55b8a6d5c4823e53005e74277683f0a9a7f7a", "ad8ad670e07c075bc3bc0197008b3fe62333c33c", "12180009bf7626ea6d5a4c58739e3449e104d4c5", "865acaeda503b7b575ff04b9ed21b3d1c897f692", "131dbf4013ee6cea60f44f265eb57c80158f9afb", "b6fff8b8ea77f157913986e7af53951d9fc1128e" ], "paperAbstract": "Kernel matrices appear in machine learning and non-parametric statistics. Given N points in d dimensions and a kernel function that requires O(d) work to evaluate, we present an O(dN log N)-work algorithm for the approximate factorization of a regularized kernel matrix, a common computational bottleneck in the training phase of a learning task. With this factorization, solving a linear system with a kernel matrix can be done with O(N log N) work. Our algorithm only requires kernel evaluations and does not require that the kernel matrix admits an efficient global low rank approximation. Instead, our factorization only assumeslow-rank properties for the off-diagonal blocks under anappropriate row and column ordering. We also present a hybrid method that, when the factorization is prohibitively expensive, combines a partial factorization with iterative methods. As a highlight, we are able to approximately factorize a dense 11M-by-11M kernel matrixin 2 minutes on 3,072 x86 "Haswell" cores and a 4.5M-by-4.5M matrix in 1 minute using 4,352 "Knights Landing" cores.", "pdfUrls": [ "http://arxiv.org/abs/1701.02324", "https://arxiv.org/pdf/1701.02324v1.pdf", "https://doi.org/10.1109/IPDPS.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f6b90b7066a1e89f52f1a9b84b8a5e7298a9502", "sources": [ "DBLP" ], "title": "An N log N Parallel Fast Direct Solver for Kernel Matrices", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8f8ea49d393a57467a08fcc867d925ad42de2565": { "authors": [ { "ids": [ "2218907" ], "name": "Lena E. Olson" }, { "ids": [ "1736221" ], "name": "Mark D. Hill" }, { "ids": [ "32548473" ], "name": "David A. Wood" } ], "doi": "10.1145/3037697.3037715", "doiUrl": "https://doi.org/10.1145/3037697.3037715", "entities": [ "Address space", "Cache (computing)", "Cache coherence", "Central processing unit", "Coherence (physics)", "Crash (computing)", "Hardware acceleration", "Software bug" ], "id": "8f8ea49d393a57467a08fcc867d925ad42de2565", "inCitations": [], "journalName": "", "journalPages": "163-176", "journalVolume": "", "outCitations": [ "9f86a4c99327319812b31d343244f1ebad4c1533", "7c452c52ea03a4fac7578c8de13174334e8e0ee9", "6bee7fdbaaf15941023c73a78288f2c5bb331a60", "a1da20a5814d65623505440a2a18121b8b4b6b5f", "39d23367587109f3b27d51c3455af4a272df4915", "15b275f0421c606f5903532e9964b140cbb2f878", "c975df11ad3d58032608e32aa5dd41146ffff90c", "c08dc9ef237fd370ad28dc11834874dd47b6c169", "4a3a13d7da024a37cd88ee9fc0a4e0e0e909211e", "b786466e00593d8889cb4e34c3e516f9104de271", "0c56edc4c7ccce002417a283ca607a633ab2460d", "68c3d3fca5c7bd172832f480a92e98d106da5c34", "c8b9255ad616c5dd0b9952b106a246e6e967743c", "222c651bbde2d6ec42dfa148b9b9499ed5119389", "9b10f9a29167b3350b01f00db84410f40a066fdd", "e54dc5fecf100ba175953700ab4c537288b6bce8", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "10f1faeec4ee2158b8535b249a20de5419998153", "10a0ab781e94a75fdcbde819f3f4cddcab768bbd", "db919926a8b1505c26537bab7e53bf8c59d70f95", "89378530ae5895d0ca90ca54f15b6257aa2aaaa9", "7c3e5cabe11faf285435f7cacf2930e71d880b8a", "18e9cd28be46edec0f3ecd39b78b8b7434db85d6", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "0a9527067ecf9471bcc8c9cfea7145777daf4586", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "230fdde877c43710a5cee0bb26dcd17eb1e24e7a", "1e89ddb6dad3fe80ac4c9b3a9e7518de545960df" ], "paperAbstract": "Specialized hardware accelerators have performance and energy-efficiency advantages over general-purpose processors. To fully realize these benefits and aid programmability, accelerators may share a physical and virtual address space and full cache coherence with the host system. However, allowing accelerators -- particularly those designed by third parties -- to directly communicate with host coherence protocols poses several problems. Host coherence protocols are complex, vary between companies, and may be proprietary, increasing burden on accelerator designers. Bugs in the accelerator implementation may cause crashes and other serious consequences to the host system.\n We propose Crossing Guard, a coherence interface between the host coherence system and accelerators. The Crossing Guard interface provides the accelerator designer with a standardized set of coherence messages that are simple enough to aid in design of bug-free coherent caches. At the same time, they are sufficiently complex to allow customized and optimized accelerator caches with performance comparable to using the host protocol. The Crossing Guard hardware is implemented as part of the trusted host, and provides complete safety to the host coherence system, even in the presence of a pathologically buggy accelerator cache.", "pdfUrls": [ "http://research.cs.wisc.edu/multifacet/papers/asplos17_crossing_guard_lightning.pdf", "http://research.cs.wisc.edu/multifacet/papers/asplos17_crossing_guard_talk.pdf", "http://research.cs.wisc.edu/multifacet/papers/asplos17_crossing_guard.pdf", "http://doi.acm.org/10.1145/3037697.3037715" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f8ea49d393a57467a08fcc867d925ad42de2565", "sources": [ "DBLP" ], "title": "Crossing Guard: Mediating Host-Accelerator Coherence Interactions", "venue": "ASPLOS", "year": 2017 }, "8f9c0dc3dbb44fa69eecfcbfd0bcd7e947b4fa39": { "authors": [ { "ids": [ "36911504" ], "name": "Manojit Ghose" }, { "ids": [ "35389416" ], "name": "Pratyush Verma" }, { "ids": [ "1987809" ], "name": "Sushanta Karmakar" }, { "ids": [ "32151627" ], "name": "Aryabartta Sahu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.22", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.22", "entities": [ "Dynamic voltage scaling", "Scheduling (computing)" ], "id": "8f9c0dc3dbb44fa69eecfcbfd0bcd7e947b4fa39", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "170-177", "journalVolume": "", "outCitations": [ "49290acfae08ed5430b8883d0df7cfb89a5d5cd4", "1267be2d80f9a864d85e8798876be1d7df9663b2", "8c509bec24d666b7a1227a25aebb91f995837ea2", "228af90bbd22f12743d65b2abb45ce407323d2ab", "d53e4d704f92bf5a5c4d3a65198c047c64e0dbc7", "02cbb22e2011938d8d2c0a42b175e96d59bb377f", "844d601df904d76907b246585ff5030908467c20", "728dfc52a2f13570a29c9d5bf257f02bf98682bc", "ca03fa1762abbd3ea1ba560233b6fc714ceb3120", "381f236aefb620f873878ea6bbd63d31f1b2b2ca", "3740004dd0231de6dd07abed6dffdeda71ae8b7c", "6bfc468ebb287a5e65f12cfc91f0e5b4e82fc245", "68af531d1d215dc86230ac940aa84bee9b217c24", "5753171bedc9fdbc2023a398f80890b5e68fb8b9", "3905a97f8d23d8f1ed4d69d4dffe547ec63faa79", "31abcf70a3a118269d4b5707a7f06b0ef8cdaab9", "e28cdeacba7ff818db0db3174938fc5ca646df71", "1909482b518858b77606b0c91b701296e8dabfee", "50624eceb4f17c6ce35ffbdc7c2fc495c2dfbd31", "07e6ca15ba66473976f6b5d462a235c92cfc82d6", "95a8c5f25652cc573c557965548c62d09890592e", "79bc19231b448044aa91335e7804dc1401a8080c", "7e944c565a5719e054ce4f52f06af06932b4c72d", "e09c6bed29d86def161ba5c23328a9a4586b9afa", "d881e2aae1fb23348f942a807df3e39954d7789c", "2730606a9d29bb52bcc42124393460503f736d74" ], "paperAbstract": "High end scientific applications in the form of workflows are being executed in cloud for various benefits. But with an increase in the processing capabilities of the cloud system, the energy consumption has also increased significantly. Thus energy efficient execution of these scientific workflows in cloud becomes essential. Existing research on energy efficient scheduling of scientific workflows in cloud mainly focus on reducing only the dynamic energy consumption of the compute nodes and uses DVFS technique. In this paper, we have proposed six different energy efficient scheduling approaches for a set of online scientific workflows in a cloud system considering both static and dynamic energy consumption of the compute nodes. These approaches are divided into two categories: non-splittable allocation of VMs on single host, and splittable allocation on multiple hosts. We have compared the performance of our proposed policies with state-of-art energy efficient scheduling policy, EnReal and found that the policies perform better than EnReal. All three scheduling policies with non-splittable VM allocation perform at par with EnReal in energy consumption but they do not require any migration of VMs. And all policies under splittable VM category perform significantly better than EnReal with an average energy reduction of 70%.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8f9c0dc3dbb44fa69eecfcbfd0bcd7e947b4fa39", "sources": [ "DBLP" ], "title": "Energy Efficient Scheduling of Scientific Workflows in Cloud Environment", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "8fc351a96549280a5df3b3671af1d37ffeb52782": { "authors": [ { "ids": [ "2229502" ], "name": "Tao Gao" }, { "ids": [ "1794267" ], "name": "Yanfei Guo" }, { "ids": [ "1788289" ], "name": "Boyu Zhang" }, { "ids": [ "1758014" ], "name": "Pietro Cicotti" }, { "ids": [ "35150586" ], "name": "Yutong Lu" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" }, { "ids": [ "1737658" ], "name": "Michela Taufer" } ], "doi": "10.1109/IPDPS.2017.31", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.31", "entities": [ "MapReduce", "Program optimization", "Supercomputer" ], "id": "8fc351a96549280a5df3b3671af1d37ffeb52782", "inCitations": [ "8ec36ef8d8ddfa6b6f842336b1f0d93d8dd21da0" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1098-1108", "journalVolume": "", "outCitations": [ "0cd87f8454774bf494bf62a58c137ca9b848d0b4", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "3aa1bc5f67254b4e2d86170b70adfacf937008f6", "500b80adc7e25dfffa9a05d25bdffce81b1b0031", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "261acc48d66031bec58d95623e89d298349937a8", "1c0a70a3e34b7071a896be66a923ed776c4c3e97", "5d5e93cf6e4a595ba6abe97c852ca7298639b6cd", "947c6bf534ccd620044f77c3bd6068f633b421fb", "436373807a0a9dc8660e7739e018d18cc18dacd7", "0541d5338adc48276b3b8cd3a141d799e2d40150", "94ff8cd9e59ec747bdad91835f089a33819c0cb5", "1f0e31b52967090e265218ae77b7fd332621a627", "32ec06c8cd5cea328af79660c188c56c0b01b5b3", "56a3854d0d3a60cf10289724ead0c254df6e6836", "911bac2a9205ed5d1178460d269ff0ab109635cc", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "8dd97ace0d9bddaaa7004c7325f30c2145fbe41f", "898634f0e693cb521ad2dd4a7432c11381e6df60", "9c3cc7337f7d70593a1ff8622de3128e1708b5a2", "7a0868597edad12564839bf0fcbf6b8f3ad36818", "70bd563d00fcb402eb7d9f251bea544ecb08f213", "145088fc0593b2f95168f3ba4693bbc5487e9068", "169d5a4c6281ed5cb0b37b51cb80cc730d7731a0" ], "paperAbstract": "In this paper we present Mimir, a new implementation of MapReduce over MPI. Mimir inherits the core principles of existing MapReduce frameworks, such as MR-MPI, while redesigning the execution model to incorporate a number of sophisticated optimization techniques that achieve similar or better performance with significant reduction in the amount of memory used. Consequently, Mimir allows significantly larger problems to be executed in memory, achieving large performance gains. We evaluate Mimir with three benchmarks on two highend platforms to demonstrate its superiority compared with that of other frameworks.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8fc351a96549280a5df3b3671af1d37ffeb52782", "sources": [ "DBLP" ], "title": "Mimir: Memory-Efficient and Scalable MapReduce for Large Supercomputing Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "8feb09cc758a2bb79f20bd5dc67f027951eefabe": { "authors": [ { "ids": [ "3224778" ], "name": "Yasemin Acar" }, { "ids": [ "1749517" ], "name": "Michael Backes" }, { "ids": [ "2200198" ], "name": "Sascha Fahl" }, { "ids": [ "1682642" ], "name": "Simson L. Garfinkel" }, { "ids": [ "1999637" ], "name": "Doowon Kim" }, { "ids": [ "37176218" ], "name": "Michelle L. Mazurek" }, { "ids": [ "3447368" ], "name": "Christian Stransky" } ], "doi": "10.1109/SP.2017.52", "doiUrl": "https://doi.org/10.1109/SP.2017.52", "entities": [ "Application programming interface", "Correctness (computer science)", "Cryptocurrency", "Cryptography", "Library", "Library (computing)", "Public-key cryptography", "Python", "Software documentation", "Usability" ], "id": "8feb09cc758a2bb79f20bd5dc67f027951eefabe", "inCitations": [ "b1400438b4822d59a64fba31d0dc590306418ac3", "34b871ba7fe283a69fd28b641866251fa3c5921e", "d3a53dd08418d08ca9860b89291581b1e5fca0a9", "adf6e0d0a7223fc2c7a5829b224c4687e910caa4", "00ab3faccde927281a07764f229afd7be736138c", "9ba789d232617f63513833ced810a314bda45306", "0236e578d5e250ac41ab4678b5f759efce8bfba4", "5d04270da4f8e5b2417a8160fb979374f3015235", "80dc1bcd0d0479b271021a1aa481258669bbc173", "06d3bbb293f088c4b0e06938a79c1e6d01de2f44", "37996e5017fba1a736b3deac1ed4e091c6739f29", "8e349cf73397edbda17404352736f7ffadee3592", "0fa5a928a8b43cf5531694119abfb0691554ab03" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "154-171", "journalVolume": "", "outCitations": [ "16cdcedec9a5fc51d89591034908b8580b911b7c", "0d730e6890948934604677a5899472389112ae9d", "a21ae6a2c42c6e4a2ea9f7d00eed43685ddea48f", "81918728ce8897bcf8c5e216fa55e23ec582fa8e", "28fd36a24a38de08532efe7594b2d29f4035fd14", "36e57fef0ec1580ad8d509b0ebc2071753c1f81e", "f0987b07fe295b47b1d9a825daca9dde152450a3", "cb706f6488b6960bfb742676ac4bfb372a3e055d", "71f8163801980fbaa494cb8c149bd7388034c2ba", "046ef4a32005eb18f56facfd334226870d3402ab", "ffb2003551fa97cde9d82f8219fb2754090e37b3", "8cbb012d528ae623f9df3bac25d341ed02a82cf1", "5e9593606a03e450f19b6128d2a3a76deb52c687", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "347b8a473c838c0c64a443001ed02d4903826c0b", "7245bae934efc1a3a67d7dc3c69ec2b11cd87d19", "9b43db76ca42d59078d71345df98fc921e2215ad", "983b547e23fb1093876ab2c594a0d9e0259b1b9c", "563239b0eaa3aa7003e8e8e66ba3e789f7cee265", "a851b38c914b5c005e2be8b43c2d17c98e09f7c1", "5570835de31f18f294733cb5d1c4e8dbb6e7b582", "13fadf9e3fc927e9e7df14132feecc1899c68d63", "7f2eae392eb9c697bc39f6581770c7ad05ae5ef3", "19fbe8155ce3004e4979f6a97ec219456c6ed338", "41dbaa15a59c35ed3df64ee26cba6b501a6c425f", "292da27473d1f860519107b90d6f0b0f316df7c5", "0162cd54b5e0ddbc793545fc0b15176e85ebb358", "00c9bef0807447d9e9d977b7bc07da367ba3ad8a", "525f0ad1a68e337564d0e2a0c9d247f14b168aba", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "8d284eadda0aab6c94174b36191ab869019070ed", "70c614dc9351ec807fc6f31fd2744d799e8011f2", "f60628636b64c187db1f106823f5af5730b973cd", "3088d37df9db3607d7aa4f60d15019fb4a981274" ], "paperAbstract": "Potentially dangerous cryptography errors are well-documented in many applications. Conventional wisdom suggests that many of these errors are caused by cryptographic Application Programming Interfaces (APIs) that are too complicated, have insecure defaults, or are poorly documented. To address this problem, researchers have created several cryptographic libraries that they claim are more usable, however, none of these libraries have been empirically evaluated for their ability to promote more secure development. This paper is the first to examine both how and why the design and resulting usability of different cryptographic libraries affects the security of code written with them, with the goal of understanding how to build effective future libraries. We conducted a controlled experiment in which 256 Python developers recruited from GitHub attempt common tasks involving symmetric and asymmetric cryptography using one of five different APIs. We examine their resulting code for functional correctness and security, and compare their results to their self-reported sentiment about their assigned library. Our results suggest that while APIs designed for simplicity can provide security benefits – reducing the decision space, as expected, prevents choice of insecure parameters – simplicity is not enough. Poor documentation, missing code examples, and a lack of auxiliary features such as secure key storage, caused even participants assigned to simplified libraries to struggle with both basic functional correctness and security. Surprisingly, the availability of comprehensive documentation and easy-to-use code examples seems to compensate for more complicated APIs in terms of functionally correct results and participant reactions, however, this did not extend to security results. We find it particularly concerning that for about 20% of functionally correct tasks, across libraries, participants believed their code was secure when it was not. Our results suggest that while new cryptographic libraries that want to promote effective security should offer a simple, convenient interface, this is not enough: they should also, and perhaps more importantly, ensure support for a broad range of common tasks and provide accessible documentation with secure, easy-to-use code examples.", "pdfUrls": [ "https://obj.umiacs.umd.edu/papers_for_stories/CryptoAPIs_Python_camera_ready%5B1%5D.pdf", "https://www.ieee-security.org/TC/SP2017/papers/161.pdf", "https://doi.org/10.1109/SP.2017.52" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/8feb09cc758a2bb79f20bd5dc67f027951eefabe", "sources": [ "DBLP" ], "title": "Comparing the Usability of Cryptographic APIs", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "90063731a10c66175d564349ee18c9d6101c7a13": { "authors": [ { "ids": [ "1715363" ], "name": "Wenyan Lu" }, { "ids": [ "8598330" ], "name": "Guihai Yan" }, { "ids": [ "1772334" ], "name": "Jiajun Li" }, { "ids": [ "10745963" ], "name": "Shijun Gong" }, { "ids": [ "2276854" ], "name": "Yinhe Han" }, { "ids": [ "1793856" ], "name": "Xiaowei Li" } ], "doi": "10.1109/HPCA.2017.29", "doiUrl": "https://doi.org/10.1109/HPCA.2017.29", "entities": [ "Big data", "Convolutional neural network", "Dataflow", "Dataflow architecture", "Neural Networks", "Neuron", "Parallel computing", "Performance per watt", "Scalability", "Speedup", "Synapse" ], "id": "90063731a10c66175d564349ee18c9d6101c7a13", "inCitations": [ "6f537c85b5160a6375306f6eca1a3e8558e7dbd9", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "0f3056a84ea59a9b976163f28002401fa88ba80f", "12b31a84d5e8e26972131b221a3bb6725e92bd24", "5ce80b41443518a14d800f6b93b4057bbb007432" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "553-564", "journalVolume": "", "outCitations": [ "e14d4145a911646e208b58207bb8f4749fad7fc2", "fbeaa499e10e98515f7e1c4ad89165e8c0677427", "0934508c768ff8ba9744678ad92e51dfdbd5f122", "51b4ba88f8011cd56f40cf258a4c81d3b3454c2c", "2ffc74bec88d8762a613256589891ff323123e99", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "8478c0f46dd30ef7f4052145983d6d315c2e1f17", "4360dbd31da8fbae0c8c69fe2737c84f35ca20cd", "218fa09cfcda71e722920e9f6cbd94f8f04ffda2", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "c382406fd8db2744b2a609837395e5da05e1d2ed", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "e1c4e2fa071046569a05e9cfdf13496d094025dd", "c37315fe1243e58c4d7034987509e16baac591f8", "061356704ec86334dbbc073985375fe13cd39088", "b8842c51dab2607506a4ace521d5eb7dc61d3e81", "437b11128948f92e1139c555cf1326922ee36b39", "703c9b8d73b207c059a555e96b94d956c4e25339", "174930cac7174257515a189cd3ecfdd80ee7dd54", "4f40ea0248653d4ffb6ef4857cd23f0f713d8c69", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "162d958ff885f1462aeda91cd72582323fd6a1f4", "4b7ce0fc27cd84c2b44f16c16b1c5c6b612c6881", "c3eb002c120bc0bd09a1bfe1f208c5bb84bc294a", "6ce45f2f20c5368f8ed927d6d0c626e1d06af5e2", "8b793918bdf8e20e218d64cbe9f1657b47bb9ac0", "e44e17c5fc8702b82b25dbae9d415b4e8cec5695" ], "paperAbstract": "Convolutional Neural Networks (CNN) are verycomputation-intensive. Recently, a lot of CNN accelerators based on the CNN intrinsic parallelism are proposed. However, we observed that there is a big mismatch between the parallel types supported by computing engine and the dominant parallel types of CNN workloads. This mismatch seriously degrades resource utilization of existing accelerators. In this paper, we propose aflexible dataflow architecture (FlexFlow) that can leverage the complementary effects among feature map, neuron, and synapse parallelism to mitigate the mismatch. We evaluated our design with six typical practical workloads, it acquires 2-10x performance speedup and 2.5-10x power efficiency improvement compared with three state-of-the-art accelerator architectures. Meanwhile, FlexFlow is highly scalable with growing computing engine scale.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.29", "http://www.carch.ac.cn/~yan/download/LuW_HPCA_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/90063731a10c66175d564349ee18c9d6101c7a13", "sources": [ "DBLP" ], "title": "FlexFlow: A Flexible Dataflow Accelerator Architecture for Convolutional Neural Networks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "903340e11ddfee6bbc0d8f39180c6e1256392578": { "authors": [ { "ids": [ "39686402" ], "name": "Alyssa Milburn" }, { "ids": [ "3053948" ], "name": "Herbert Bos" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "", "doiUrl": "", "entities": [ "C++", "Compiler", "Hardening (computing)", "Initialization (programming)", "Linux", "Optimizing compiler", "Server (computing)", "Undefined behavior", "Undefined value", "Vulnerability (computing)" ], "id": "903340e11ddfee6bbc0d8f39180c6e1256392578", "inCitations": [ "ca9c4ef54efbb1602febc3944ef352faca138c98", "fb9c164a53194117fbd163e25b97f9b0fb3c8fbb" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "a699b046ae4f1fdd717c3261dcdbc80c7f2be151", "2790284b6a16790d03b0cb5ed46bc6b0fecde1eb", "8c39c5d022d066e708a1eb5cd41d5db3b64bfdfe", "8cdbab26fa0dee8f165b6680e59e8966679fd068", "0b0917c1080b1cae8a80c41d38f903aa55baea70", "295f4ffa651675b22ae8e2f3f30b400330da0c69", "078f0317c057bb9aff55f22cd856b8b2b5f3cef0", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "0e55379d27454c5d9d72e4ba4b3752007b9f886f", "11496e28e6dd3f1ab7d2f054c46d7587988dfd86", "54cc1e9d38763083aa13dd484ddf48a9653b353b", "3c48458d8f77b04c9e69b61f5beddf770e61ec04", "9b2585f7248c8b5a22e9c816506e01060213ca85", "72eb18b6c1faf83c876031138a75b1cfa2856c0a", "c4e77ec0e6e4ac6638b662bfe5342439ad4451de", "5f6a808bedd3dbfd1290063b3cd8221132ce5c95", "425ec6e7d42945cc21e2aa6916c2078b3ae917bb", "07e7a136224b2230679385c2dc6f1ddfbde449f1", "49cae25a1796b6a2898b99b2684b33eed8f58ee9", "06567663b31f7b8cf1de3d5f2ca6c79422ef60c8", "1a646f79beae9a6768edd81290c92efaf48f21d4", "188847872834a63fb435cf3a51eef72046464317", "50117210fbb7a8ce28ba503280f3c65fdc029ec7", "1ddcc37ae33b4dea4fe74a0b83f48809f2ea01d8", "5c896f1fd08a3475140a9e5dede850cc543c190e", "5680b51c34fae849be22a0a59ab828a1d9cde403", "ce06ccbb2deb604333661ecae010c54c1faaaa19", "38cb7372f8f1cfc72d06a1d2bf9224453dfd3e8e", "33619233fbf58abf86a861af324fb9ae4e293d04", "056a150af91bfcc67848d9d2606c577b58952c41", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "b4c5c1db5a371228efd819843857d09de5dc525b", "d4914de7dbb5080d5c83004cab22df9100fb37d0", "2859e75f421621ff776d55e533dc6ee7cb4b0a92", "ab2177167b09f9be086d44188b845fc9b5458d66", "781114cbbf88ae150113df14121648f94bc594c6", "12d5c3bcd2dd94b4f07f358be291c5906b16dba4", "17dc880085035d4355b3ea57b5b5d6d84e9dc59a", "5ddc6a439cdc9b4eaebdad8c20976f1f0be4523f", "6588630854bf12b190ae8f95ed8763f7cdb945f6", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "11a7e426012fcec35c32fdea0b60b874783c682f", "64422f2b4620da8090d073ed5cbcb1a49078868f", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "9ff3c58d60625aa7da9151e79ff5009ae863802f", "2194c3460ab71f3826db00b045b2ae590c753319", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "36f4666c5c294548d2a9a536ed44e926172639f3" ], "paperAbstract": "Usage of uninitialized values remains a common error in C/C++ code. This results not only in undefined and generally undesired behavior, but is also a cause of information disclosure and other security vulnerabilities. Existing solutions for mitigating such errors are not used in practice as they are either limited in scope (for example, only protecting the heap), or incur high runtime overhead. In this paper, we propose SafeInit, a practical protection system which hardens applications against such undefined behavior by guaranteeing initialization of all values on the heap and stack, every time they are allocated or come into scope. Doing so provides comprehensive protection against this class of vulnerabilities in generic programs, including both information disclosure and re-use/logic vulnerabilities. We show that, with carefully designed compiler optimizations, our implementation achieves sufficiently low overhead (<5% for typical server applications and SPEC CPU2006) to serve as a standard hardening protection in practical settings. Moreover, we show that we can effortlessly apply it to harden non-standard code, such as the Linux kernel, with low runtime overhead.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/safelnit-comprehensive-and-practical-mitigation-uninitialized-read-vulnerabilities/", "http://www.cs.vu.nl/~giuffrida/papers/safeinit-ndss-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9033/40e11ddfee6bbc0d8f39180c6e1256392578.pdf", "s2Url": "https://semanticscholar.org/paper/903340e11ddfee6bbc0d8f39180c6e1256392578", "sources": [ "DBLP" ], "title": "Safelnit: Comprehensive and Practical Mitigation of Uninitialized Read Vulnerabilities", "venue": "NDSS", "year": 2017 }, "9079c1a6a76c233202d734fcd8fbecc1e1c7df5c": { "authors": [ { "ids": [ "3181518" ], "name": "Zhice Yang" }, { "ids": [ "3176991" ], "name": "Qianyi Huang" }, { "ids": [ "1737486" ], "name": "Qian Zhang" } ], "doi": "10.1145/3117811.3117814", "doiUrl": "https://doi.org/10.1145/3117811.3117814", "entities": [ "Covert channel", "Data security", "Excalibur: Morgana's Revenge", "Experiment", "Laptop", "Malware", "Mobile device", "Mobile operating system", "Mobile phone", "Network interface", "Network interface controller", "Nominal impedance", "Radio frequency", "Transmitter" ], "id": "9079c1a6a76c233202d734fcd8fbecc1e1c7df5c", "inCitations": [], "journalName": "", "journalPages": "356-367", "journalVolume": "", "outCitations": [ "26df904b8efc8ecc129d4ba5410b4676b713c8a3", "c70e4a09a00c302f26ce60ac15e4e208af3b0621", "7a012d0eb6e19e2a40529314f58f4a31f4d5f8ff", "02c75551123cae6dfbb0c69de96a199c974bcf89", "69926408eb54d78f0d2e748f422adb4791ade674", "023f23c300804754753cb11db51fb7f582556ab7", "498d2ed40427eeb78799fa96ac0f5a58c6648d05", "89dd6c7900370055f7afb12dd3eba9d363e5b2b4", "0de7786798e1a5681b51bd8084a88dfa48fa1fc7", "e1267dffc8b4c090e3c8283c1f6ab1e20444c09b", "052b36fd8bde6035c11eb316c3f9a3665c0110f0", "1c1fcd25b14737f24cc7e3e4af8bfe96f298182f", "27d4dc8b28a22edd561b9fd38d481adb4e1504f4", "5a0e84b72d161ce978bba66bfb0e337b80ea1708", "75599aaa50bd14438edbcef5a803e38076eed853", "5e5fed2a65407c805e1cdeaae674f91ef7fe947e", "1ad6819ab62788a132f4d0773717fdacdd55af6a", "151831fc041a3fc19ed56bacdd8bf330d2a93eeb", "de7661a8df52b761d6f1cb73bcb4ad777939bfa7", "4e0a3114459ad385aac4d694a86f1d241608c415", "7e52a62fd1bbbe70ca1d4e0eeb9c4b993f137d09", "8347fa4ad280baf119580cc680fd85ddb16d7236", "3326e87fe5815d085000935acd63ff5d365c6a19", "d04fc13945f772ca41e15931e5ce9e0fe9687c00", "881c376276588f0887a9fb24d31c00dc41ed1f12", "154fd28b125ddbfa28f59138907e74862c2f8ab2", "21a31f7510bae44af944809520c1a04c575abbaf", "3aa7fa1563467801db724b046df439dc33de2407", "d7ae2ea551e43f22bd525f999c7b6ca0e0c0f23e" ], "paperAbstract": "Today's mobile devices contain sensitive data, which raises concerns about data security. This paper discusses a covert channel threat on existing mobile systems. Through it, malware can wirelessly leak information without making network connections or emitting signals, such as sound, EMR, vibration, etc., that we can feel or are aware of. The covert channel is built on a communication method that we call NICScatter. NICScatter transmitter malware forces mobile devices, such as mobile phones, tablets or laptops, to reflect surrounding RF signals to covertly convey information. The operation is achieved by controlling the impedance of a device's wireless network interface card (NIC). Importantly, the operation requires no special privileges on current mobile OSs, which allows the malware to stealthily pass sensitive data to an attacker's nearby mobile device, which can then decode the signal and thus effectively gather the guarded data. Our experiments with different mobile devices show that the covert channel can achieve 1.6 bps and transmit as far as 2 meters. In a through-the-wall scenario, it can transmit up to 70 cm.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117814" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9079c1a6a76c233202d734fcd8fbecc1e1c7df5c", "sources": [ "DBLP" ], "title": "NICScatter: Backscatter as a Covert Channel in Mobile Devices", "venue": "MobiCom", "year": 2017 }, "90b264a142777e1527a35b01fab4052d5ac31da4": { "authors": [ { "ids": [ "1713648" ], "name": "Torsten Hoefler" }, { "ids": [ "32042628" ], "name": "Salvatore Di Girolamo" }, { "ids": [ "26434660" ], "name": "Konstantin Taranov" }, { "ids": [ "3021644" ], "name": "Ryan E. Grant" }, { "ids": [ "1705033" ], "name": "Ron Brightwell" } ], "doi": "10.1145/3126908.3126970", "doiUrl": "https://doi.org/10.1145/3126908.3126970", "entities": [ "CUDA", "Central processing unit", "Direct memory access", "Ecosystem", "Imperative programming", "In-memory database", "Network interface controller", "OpenCL API", "Programming model", "Remote direct memory access", "Scalability", "Simulation" ], "id": "90b264a142777e1527a35b01fab4052d5ac31da4", "inCitations": [], "journalName": "", "journalPages": "59:1-59:16", "journalVolume": "", "outCitations": [ "1c6477bc1b1c7b3767624be6d286d382ce05c211", "0d3f85933b6355789588476e491683532c68a906", "3cc2336cb701ab40273d0b5603064a70a209b4c6", "f99c251c349c9084d63a890ff39a1aa433af4b87", "a2514450633e633b6213c71cd3d153f8e8bdbe04", "97d96b2aa923a67676e5174993d36c1cbb063826", "10bc77207d0508bd52411f2ceebf4e66bd832e53", "7717cb7fbbf26557238c2ef847d0a48def176d0b", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "550285725684e2d286ffd9fa5cebdc52d7c4f860", "e116690dfdfb44f9e350cb36fbf8e8b9b3da1d3f", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "5e41c7a61334341b5f2b8a60257b44be1255fe1b", "497f9bde384cb99b926f5a82e66cd164101432a3", "bf5ad3d39cef9073e57835bdc0f4302d0d026ec2", "d120c8dbee3d94ff05d493b3a01a1e4be4f7d41e", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "bb4cf037d8a5adbb3f08a3405d926d022b8c27c5", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "d0dac91628415ce1b2135f68c883dc08583e9188", "e7ce6c7d4a7c0a6182fdb549a848c295257785a6", "6cb47a1990da9cdf712a8562645a41f06e931e3c", "2b8b76dc2e01726ee1331eb52bdb6e3d13710de9", "7dda6789b0db46a2a985017e414ea778196bc180", "4535a96bbd868cbc578da3c512f0db2c3e4ccc2a", "283e9cb02bde34038d10822bd7d16adbd8d750ea", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "00d00e482b32252398a6177eed1ee867a384402e", "c46c6f403dfc406e7dca4406ca5370657b621ec4", "7ca15f2fd831b6920310c70f5bd18c3a9c00cb4f", "65c93d5e98a5905e29f69058d7912521371bca90", "2c373d9ced8c71896e393d0accb4b57fbca4649e", "569de2eececd3adb7219d63eb85e4bdc63486c42", "7734021e1d62a8ee2e363a7222cc7c5f2852bb77", "7536a348dacf1de7cc921214c07a87b8345d9996", "1220e4a011c46804d4369b5580dc7fb6e387af54", "0c205f91402984905e1bcf5f05f973c5588c1325", "3e73f7d4046b116563dd98b91b0326b068e2bc3c", "a12abf6ef0503d4288d64611ca221a7a8fc043ed", "0c59eeef638d13932129847acd8204c85a1abc13" ], "paperAbstract": "Optimizing communication performance is imperative for large-scale computing because communication overheads limit the strong scalability of parallel applications. Today's network cards contain rather powerful processors optimized for data movement. However, these devices are limited to fixed functions, such as remote direct memory access. We develop sPIN, a portable programming model to offload simple packet processing functions to the network card. To demonstrate the potential of the model, we design a cycle-accurate simulation environment by combining the network simulator Log-GOPSim and the CPU simulator gem5. We implement offloaded message matching, datatype processing, and collective communications and demonstrate transparent full-application speedups. Furthermore, we show how sPIN can be used to accelerate redundant in-memory filesystems and several other use cases. Our work investigates a portable packet-processing network acceleration model similar to compute acceleration with CUDA or OpenCL. We show how such network acceleration enables an eco-system that can significantly speed up applications and system services.", "pdfUrls": [ "https://arxiv.org/pdf/1709.05483v2.pdf", "https://arxiv.org/pdf/1709.05483v1.pdf", "http://doi.acm.org/10.1145/3126908.3126970", "http://arxiv.org/abs/1709.05483" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/90b264a142777e1527a35b01fab4052d5ac31da4", "sources": [ "DBLP" ], "title": "sPIN: high-performance streaming processing in the network", "venue": "SC", "year": 2017 }, "90ce807027aaf19f1a8011f625ce1352d20816ce": { "authors": [ { "ids": [ "3284845" ], "name": "Kyungjoo Kim" }, { "ids": [ "24372348" ], "name": "Timothy B. Costa" }, { "ids": [ "39632372" ], "name": "Mehmet Deveci" }, { "ids": [ "2984433" ], "name": "Andrew M. Bradley" }, { "ids": [ "2616895" ], "name": "Simon D. Hammond" }, { "ids": [ "2150394" ], "name": "Murat Efe Guney" }, { "ids": [ "2234380" ], "name": "Sarah Knepper" }, { "ids": [ "12909372" ], "name": "Shane Story" }, { "ids": [ "1750699" ], "name": "Sivasankaran Rajamanickam" } ], "doi": "10.1145/3126908.3126941", "doiUrl": "https://doi.org/10.1145/3126908.3126941", "entities": [ "Automatic vectorization", "BLAS", "Knights", "LAPACK", "Library (computing)", "Machine learning", "Math Kernel Library", "OpenMP", "SIMD", "Secure copy", "Service control point", "Simulation", "Speedup" ], "id": "90ce807027aaf19f1a8011f625ce1352d20816ce", "inCitations": [], "journalName": "", "journalPages": "55:1-55:12", "journalVolume": "", "outCitations": [ "139a9c6e9e6c3601a2d5dfcabe71e5ec98d81e21", "58666ceb74e6b4eb570e64f313d0b6c46e5d62c0", "355e35184d084abc712c5bfcceafc0fdfe78ceef", "585f61a9d3cdac4ea53890f334d803ab58650dea", "ac473f1674f14253da0e50c25b8cb86f8801a808", "9170c6e348ee82c4c9dfa66e074698461347ebc2", "ef62e60b81317a24dbeb8ded6dc4a8ed89b776a8", "c95e56e73fbd1985cbc38636629ba7156f9ae758", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "092217c2267f6e0673590aa151d811e579ff7760", "2fe5f8804f8ba2f738b83719b11723fb4a0f8db0", "0653e2ed9f683868cb4539eb8718551242834f6b", "46957f67c3887a5548affc05d97c8ce8636ca4a0" ], "paperAbstract": "Many applications, such as PDE based simulations and machine learning, apply blas/lapack routines to large groups of small matrices. While existing batched blas APIs provide meaningful speedup for this problem type, a non-canonical data layout enabling cross-matrix vectorization may provide further significant speedup. In this paper, we propose a new compact data layout that interleaves matrices in blocks according to the SIMD vector length. We combine this compact data layout with a new interface to blas/lapack routines that can be used within a hierarchical parallel application. Our layout provides up to 14X, 45X, and 27X speedup against OpenMP loops around optimized dgemm, dtrsm and dgetrf kernels, respectively, on the Intel Knights Landing architecture. We discuss the compact batched blas/lapack implementations in two libraries, KokkosKernels and Intel® Math Kernel Library. We demonstrate the APIs in a line solver for coupled PDEs. Finally, we present detailed performance analysis of our kernels.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126941" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/90ce807027aaf19f1a8011f625ce1352d20816ce", "sources": [ "DBLP" ], "title": "Designing vector-friendly compact BLAS and LAPACK kernels", "venue": "SC", "year": 2017 }, "90e648f2beee1f09b6bb57d59979dbc238fdd589": { "authors": [ { "ids": [ "13704144" ], "name": "Vaibhav Arora" }, { "ids": [ "39613914" ], "name": "Tanuj Mittal" }, { "ids": [ "1724045" ], "name": "Divyakant Agrawal" }, { "ids": [ "1709353" ], "name": "Amr El Abbadi" }, { "ids": [ "1983944" ], "name": "Xun Xue" }, { "ids": [ "11729408" ], "name": "Yanan Zhi" }, { "ids": [ "3256550" ], "name": "Jianfeng Zhu" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Benchmark (computing)", "Coherence (physics)", "Distributed computing", "Fault tolerance", "Institute for Operations Research and the Management Sciences", "Multi-objective optimization", "Open-source software", "SQL", "Scalability", "Strong consistency", "Throughput", "YCSB" ], "id": "90e648f2beee1f09b6bb57d59979dbc238fdd589", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "f4912128cb16480a18ad3f6f0fa3c9dcb836cf7d", "e29ac26c08189f01f23cfa154dd431dbec40ad70", "082bc77513862f8d709322916f44d6fe2f2d06d7", "89ab15cebdfaacda5e5dd4206410d1e492d65b18", "6816c447cc4d3d945e0452564ff5d3220e1fdcab", "38acf01a412d4bfeb810ab9fc5a7a1f1c8643c6b", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "af8b04305b92127b468a610b591b07f7897b2446", "9aa0d7253574e50fe3a190ccd924433f048997dd" ], "paperAbstract": "Consensus protocols are used to provide consistency guarantees over replicated data in a distributed system, and allow a set of replicas to work together as a coherent group. Raft is a consensus protocol that is designed to be easy to understand and implement. It is equivalent to Multi-Paxos in fault-tolerance and performance. It uses a leader based approach for coordinating replication to a majority. The leader regularly informs the followers of its existence using heartbeats. All reads and writes go through the leader to ensure strong consistency. However, read-heavy workloads increase load on the leader since the followers in Raft are maintained as cold standbys. Since the algorithm itself guarantees replication to at least a majority, why not exploit this fact to serve strongly consistent reads without a leader? We propose mechanisms to use quorum reads in Raft to offload the leader and better utilize the cluster. We integrate our approach in CockroachDB, an open-source distributed SQL database which uses Raft and leader leases, to compare our proposed changes. The evaluation results with the YCSB benchmark illustrate that quorum reads result in an increased throughput of the system under read-heavy workloads, as well as lower read/write latencies.", "pdfUrls": [ "http://www.cs.ucsb.edu/~vaibhavarora/improving-Raft-Read-Scalability-HotCloud2017", "https://www.usenix.org/conference/hotcloud17/program/presentation/arora" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/90e6/48f2beee1f09b6bb57d59979dbc238fdd589.pdf", "s2Url": "https://semanticscholar.org/paper/90e648f2beee1f09b6bb57d59979dbc238fdd589", "sources": [ "DBLP" ], "title": "Leader or Majority: Why have one when you can have both? Improving Read Scalability in Raft-like consensus protocols", "venue": "HotCloud", "year": 2017 }, "90ff17d99ff394638e894a772247d1f8e88e03b5": { "authors": [ { "ids": [ "2340927" ], "name": "David Maxwell" }, { "ids": [ "1716332" ], "name": "Leif Azzopardi" }, { "ids": [ "2678472" ], "name": "Yashar Moshfeghi" } ], "doi": "10.1145/3077136.3080824", "doiUrl": "https://doi.org/10.1145/3077136.3080824", "entities": [ "Document", "Information", "Kullback\u2013Leibler divergence", "Search engine results page", "User experience" ], "id": "90ff17d99ff394638e894a772247d1f8e88e03b5", "inCitations": [ "fc288ce65c904de44edea5ab119eb4646cd2f51d" ], "journalName": "", "journalPages": "135-144", "journalVolume": "", "outCitations": [ "3fec6927bc6f0a1ef40e10a9c1a297d6764ab599", "677b92ce1e423c52393df2f4ae5ea909bd3065f4", "fea75f39a5e22ca23834e6bc575d0f32a2f36d75", "99fa792e90e805c45d8759073d2278d6f3709046", "5ce66759edcc12a7aed136936f3feacee113cecb", "bd33adcd86e12cd4471b0712d042335d7f19ebfd", "90d272b5a1e76995a205013353957a39eb04b027", "3398bb71a344fc14d666b90b5828f40401b5b89c", "55355c58481d2d1360e0fdc08a7dcb8f52e807a9", "ab84443f7f56f3c422ba0c7b29c8d2dcbd9522fb", "0353d56503f0418629f8306987d7a99dead41864", "3ce4e4df850d8aeb85d68b3a2bcf1937ec49d74b", "2226a21bea78f21761a53d2d1328bfde18b59aea", "9a68661452f34ad18d40846c365ac332dd34566c", "20df3751ed74984bed7b4c364fc775f58b2d4cbe", "29a183494372d07fddbe33f829960e5b24ebff85", "08b7e13edf9022717f5cd1cf033e52e31ccd3db5", "3ac30b0438af478ef24dfadb647a62c7fd3baba7", "0b5c188b15cd28b74702ca3071acd0df894b42d6", "500e872f2de25469543120b58ec60efb09487e24", "de79e2cea3527c0921b7ff78011658003f3ae244", "63182ffe8d92c2b4e9a2c32d6a5ff715febedd8d", "6e2e87a19b867dbf5d22650fe1f21389522880cd", "4b0771ee0a3417aae94643fcaaabfd922367f8f5", "3cea4e14ad83ae5e23eb8dc743d7f5728bfe8502", "dda232b51cbede591eb722ebdb328ed004fb211e", "4a516095f91364db4d0dea01e9c2576000dd7a02", "25ba19e8ce4537e45f328b8cad11f9fb9bb87c53", "2ec7302489c7441f340303160baac6b4fa9ebe46", "5ec94a45641704109b267170f299724073d6cd6d", "3fc3e0697359e67edb3519f2d1717606b1061894", "88add04a9ed81118ecbcdf3a73c33f0dddeb6d91", "7a43cdd0e8e4b628af6619812c73b2f2d524d0bb", "2c56d3a6e0c5260614ebd8bc843a29e18c4afbbc", "e18ffd573fc73298881c7a930269039f372950f2", "d4f2f0b971984fa5235ccd76a8bb1441a736bfa5", "6c8ed3912d9dacf77c1fe33bc948b5638be32d9e", "13d72ef522b405c18f7d228c5744687609b4c3a4", "342be3d0e7529e63d16d1b22dbddf26d63e14bd6", "4722ac1fbb0c3c97f24e219732bb6c8adc913f32" ], "paperAbstract": "The design and presentation of a Search Engine Results Page (SERP) has been subject to much research. With many contemporary aspects of the SERP now under scrutiny, work still remains in investigating more traditional SERP components, such as the result summary. Prior studies have examined a variety of different aspects of result summaries, but in this paper we investigate the influence of result summary length on search behaviour, performance and user experience. To this end, we designed and conducted a within-subjects experiment using the TREC AQUAINT news collection with 53 participants. Using Kullback-Leibler distance as a measure of information gain, we examined result summaries of different lengths and selected four conditions where the change in information gain was the greatest: (i) title only; (ii) title plus one snippet; (iii) title plus two snippets; and (iv) title plus four snippets. Findings show that participants broadly preferred longer result summaries, as they were perceived to be more informative. However, their performance in terms of correctly identifying relevant documents was similar across all four conditions. Furthermore, while the participants felt that longer summaries were more informative, empirical observations suggest otherwise; while participants were more likely to click on relevant items given longer summaries, they also were more likely to click on non-relevant items. This shows that longer is not necessarily better, though participants perceived that to be the case - and second, they reveal a positive relationship between the length and informativeness of summaries and their attractiveness (i.e. clickthrough rates). These findings show that there are tensions between perception and performance when designing result summaries that need to be taken into account.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080824" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/90ff17d99ff394638e894a772247d1f8e88e03b5", "sources": [ "DBLP" ], "title": "A Study of Snippet Length and Informativeness: Behaviour, Performance and User Experience", "venue": "SIGIR", "year": 2017 }, "911b73ae86d61d6919f16cc2f538a6548766bf64": { "authors": [ { "ids": [ "2321965" ], "name": "Guoyang Chen" }, { "ids": [ "1684635" ], "name": "Lei Zhang" }, { "ids": [ "26647905" ], "name": "Richa Budhiraja" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" }, { "ids": [ "2570113" ], "name": "Youfeng Wu" } ], "doi": "10.1145/3123939.3124543", "doiUrl": "https://doi.org/10.1145/3123939.3124543", "entities": [ "Address space", "Byte", "Byte addressing", "Computer data storage", "Convergence Insufficiency", "Data structure", "Dynamic data", "Dynamization", "Experiment", "Linked list", "Non-volatile memory", "Persistence (computer science)", "Pointer (computer programming)", "Volatile memory" ], "id": "911b73ae86d61d6919f16cc2f538a6548766bf64", "inCitations": [ "c8d937e3abc6c78b0d7358a1231280904428d946", "41ea95cc4dca373bf324555b897760054ec4a76e" ], "journalName": "", "journalPages": "191-203", "journalVolume": "", "outCitations": [ "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "3af216f371069b57c0dca5448384d052fb490fb4", "05a1357946de5eca42a477b7b268db4944219a2e", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "47b851237f240831abee3971bca6bb8d2a121eb1", "2d45779437516ee55e5f9f4e7a7d8803fa795443", "a459d11e7fff61004dd392806f27317c16ce6696", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "24724ad8962a9e04eb496fddaefe9708f6960601", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "9183cde02e4306828089fb8adae74736a9df3ceb", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0204f40221260d00c5ee63646560a40dcd7d97d1", "c8d937e3abc6c78b0d7358a1231280904428d946", "209c2347a28bc0af9f8ace63ebbdf056729f41dc", "42c70d64890726f60556caf3eec3f06e85642dd9", "5d539c01a4d377558309b10465ee156f81a7346d", "129f11028220d87525b37b4605a2c04eb26f3e73", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "157352f39ed5ba9a571dd4c27b48786ac8be90cf", "fae8a785260ac5c34be82fca92a4abef4c30d655", "94783d113951822195d4ba44599a8fcbdef9d4bf", "41219a03eca08286b2592ae727f5ec9a1f58e73a", "823116269044ab4c713373c66c7da3fcb495b459" ], "paperAbstract": "This paper explores solutions for enabling efficient supports of position independence of pointer-based data structures on byte-addressable None-Volatile Memory (NVM). When a dynamic data structure (e.g., a linked list) gets loaded from persistent storage into main memory in different executions, the locations of the elements contained in the data structure could differ in the address spaces from one run to another. As a result, some special support must be provided to ensure that the pointers contained in the data structures always point to the correct locations, which is called position independence.\n This paper shows the insufficiency of traditional methods in supporting position independence on NVM. It proposes a concept called implicit self-contained representations of pointers, and develops two such representations named off-holder and Region ID in Value (RIV) to materialize the concept. Experiments show that the enabled representations provide much more efficient and flexible support of position independence for dynamic data structures, alleviating a major issue for effective data reuses on NVM.", "pdfUrls": [ "https://people.engr.ncsu.edu/xshen5/Publications/micro17b.pdf", "http://doi.acm.org/10.1145/3123939.3124543" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/911b73ae86d61d6919f16cc2f538a6548766bf64", "sources": [ "DBLP" ], "title": "Efficient support of position independence on non-volatile memory", "venue": "MICRO", "year": 2017 }, "91b87b4f8bde059a425f5c70c0237f6c184678d2": { "authors": [ { "ids": [ "1755091" ], "name": "Cheng Luo" }, { "ids": [ "1783406" ], "name": "Yiqun Liu" }, { "ids": [ "1725544" ], "name": "Tetsuya Sakai" }, { "ids": [ "1726798" ], "name": "Fan Zhang" }, { "ids": [ "1700777" ], "name": "Min Zhang" }, { "ids": [ "8093158" ], "name": "Shaoping Ma" } ], "doi": "10.1145/3077136.3080795", "doiUrl": "https://doi.org/10.1145/3077136.3080795", "entities": [ "Behavior model", "Heuristic evaluation", "Landing page", "Search engine results page", "Television", "Usability testing", "User (computing)", "Web search engine" ], "id": "91b87b4f8bde059a425f5c70c0237f6c184678d2", "inCitations": [ "cb0fdaaa58ce9bb1e2a5f13bf1398af9667082b9" ], "journalName": "", "journalPages": "435-444", "journalVolume": "", "outCitations": [ "f237e72b2d5265da4c213e3c69979395728137dc", "357b1b911696fc636a1a1d59bc7d847655474825", "355146c49d983f5c35c6033374a7252ac0141fd8", "2fcc5e388b7fce46da2eb1a1fd2f82c3fd7ec419", "ac94075098f25c1935d95cfd78b83ea77e928217", "64693b8c3b3340a4055e336b9a2be0d81b80c064", "623bcee50f773a5356687c94f742b8779cada95a", "6203c2bff16e4fefd31012d2003e973e741eba9a", "471cb4c2e5039bdaacb0274fee70c7fe2e93493e", "0788fb1109b1db8c72776005c9c0af926a278017", "8490234d79b47e459824dcf87c1e288211a3c964", "f7fcc97be18be855f0b337972b740a036606d7c0", "69c507ac4106861fad210afcd45e8352eaf76eec", "62aaab0d71bf4046c0509ae3f5bbade781524c31" ], "paperAbstract": "Mobile search engine result pages (SERPs) are becoming highly visual and heterogenous. Unlike the traditional ten-blue-link SERPs for desktop search, different verticals and cards occupy different amounts of space within the small screen. Hence, traditional retrieval measures that regard the SERP as a ranked list of homogeneous items are not adequate for evaluating the overall quality of mobile SERPs. Specifically, we address the following new problems in mobile search evaluation: (1) Different retrieved items have different heights within the scrollable SERP, unlike a ten-blue-link SERP in which results have similar heights with each other. Therefore, the traditional rank-based decaying functions are not adequate for mobile search metrics. (2) For some types of verticals and cards, the information that the user seeks is already embedded in the snippet, which makes clicking on those items to access the landing page unnecessary. (3) For some results with complex sub-components (and usually a large height), the total gain of the results cannot be obtained if users only read part of their contents. The benefit brought by the result is affected by user's reading behavior and the internal gain distribution (over the height) should be modeled to get a more accurate estimation. To tackle these problems, we conduct a lab-based user study to construct suitable user behavior model for mobile search evaluation. From the results, we find that the geometric heights of user's browsing trails can be adopted as a good signal of user effort. Based on these findings, we propose a new evaluation metric, Height-Biased Gain, which is calculated by summing up the product of gain distribution and discount factors that are both modeled in terms of result height. To evaluate the effectiveness of the proposed metric, we compare the agreement of evaluation metrics with side-by-side user preferences on a test collection composed of four mobile search engines. Experimental results show that HBG agrees with user preferences 85.33% of the time, which is better than all existing metrics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080795" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/91b87b4f8bde059a425f5c70c0237f6c184678d2", "sources": [ "DBLP" ], "title": "Evaluating Mobile Search with Height-Biased Gain", "venue": "SIGIR", "year": 2017 }, "91fd133adf2bd15ab814351b3a9e9f13f2951e38": { "authors": [ { "ids": [ "8775916" ], "name": "Da Cao" }, { "ids": [ "1743245" ], "name": "Liqiang Nie" }, { "ids": [ "7792071" ], "name": "Xiangnan He" }, { "ids": [ "7621447" ], "name": "Xiaochi Wei" }, { "ids": [ "7472429" ], "name": "Shunzhi Zhu" }, { "ids": [ "1684968" ], "name": "Tat-Seng Chua" } ], "doi": "10.1145/3077136.3080779", "doiUrl": "https://doi.org/10.1145/3077136.3080779", "entities": [ "Algorithm", "Cold start", "Interaction", "Latent variable", "Recommender system", "Relevance" ], "id": "91fd133adf2bd15ab814351b3a9e9f13f2951e38", "inCitations": [ "5e3257540faa7bf220d0dda97085ceff18674f19", "05f44cddc0884c5ae7ce6502a247c502d63c922f", "322d8a9449f2ea654036ea6be3cba4458eebf207" ], "journalName": "", "journalPages": "585-594", "journalVolume": "", "outCitations": [ "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "1510cf4b8abea80b9f352325ca4c132887de21a0", "1960c7d4365c1165283fc2304be7d09a853fb33d", "0e730f8ed7cfdeb458cab36e8495e2cc0ee6d6a6", "48b0a08bf96b8e28ee982867ce0a9f568b788a0a", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "07b01fa102195e4a69383d21f15faadf28bae714", "66b7ad291f7ad0724fd8fabfa84da65794b43696", "32d957abf2cfb2ef2a099596d7309c6ee39e7a9c", "1d9b302a5a004e279b984f35d01190cb59658c50", "1a327c69a7565d041407675c1004d84def61d7ac", "35193f0a1c64fd4c2f5088329aeaddf08774f12d", "212fc5ddeb4416aa7e1435f4c69391d0ad4fb18d", "1683ffc189d16b616131c300f45af87602d211f7", "26f753a7d8304922dff1f1b52f8f5fc30451497a", "89a16eb847e5039fe5d9c6372ab45145400c9aa1", "32f57bb6a476e60e2f4432375534b9247656d5f9", "2275762a28582716db92df6d525ed2481c7d7f14", "ad49936242755a6fdb8b75ce28c8ed6e66c3f31b", "e0e1a359c0617be201d4e9ddfd021ed950dc0940", "4f3417e73528025a5429547814e5a2fd91deb818", "52c0876b25a5721c4c6930d94d5308f0779734ec", "54736f0e0489f1022efe7b0c680ce04f59b3c525", "47f54a7953b3d167c6d94f2e8e035c2e798b3f18", "37d3ddb0723edb194d9de489ca6615cbe792ebc8", "122d981dd8c3eb0bf16affcd3efa21652ea3090d", "2744288f090192987e980274999065ad2d6e45d6", "6ba4e10d06d9842765a4350bf5abbd3dd095045c", "9e412d1b457a8e81f23b663041ddcfc6e426e549", "ab58d6f6b2d7858761a4077d35a02d8609bd111f", "36b33d88b16186d136f2258c433e7619b7c83421", "080b40bb7c719a494f8aa78a2455f0adf42c403b", "ab6a97598d7b3905338ca0f4d3796be64eff0eb4", "5720a5015ca67400fadd0ff6863519f4b030e731", "5f3f5ccc3e926bea62dc2ca20dcc45377587e9b3", "1535d5db7078c85f0e2d565860a0fb4053a6090c", "762b63d2eb86f8fd0de98a08561b77527ae8f165" ], "paperAbstract": "Existing recommender algorithms mainly focused on recommending individual items by utilizing user-item interactions. However, little attention has been paid to recommend user generated lists (e.g., playlists and booklists). On one hand, user generated lists contain rich signal about item co-occurrence, as items within a list are usually gathered based on a specific theme. On the other hand, a user's preference over a list also indicate her preference over items within the list. We believe that 1) if the rich relevance signal within user generated lists can be properly leveraged, an enhanced recommendation for individual items can be provided, and 2) if user-item and user-list interactions are properly utilized, and the relationship between a list and its contained items is discovered, the performance of user-item and user-list recommendations can be mutually reinforced.\n Towards this end, we devise embedding factorization models, which extend traditional factorization method by incorporating item-item (item-item-list) co-occurrence with embedding-based algorithms. Specifically, we employ factorization model to capture users' preferences over items and lists, and utilize embedding-based models to discover the co-occurrence information among items and lists. The gap between the two types of models is bridged by sharing items' latent factors. Remarkably, our proposed framework is capable of solving the new-item cold-start problem, where items have never been consumed by users but exist in user generated lists. Overall performance comparisons and micro-level analyses demonstrate the promising performance of our proposed approaches.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080779", "http://www.comp.nus.edu.sg/~xiangnan/papers/sigir17-EmbeddingMF.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/91fd133adf2bd15ab814351b3a9e9f13f2951e38", "sources": [ "DBLP" ], "title": "Embedding Factorization Models for Jointly Recommending Items and User Generated Lists", "venue": "SIGIR", "year": 2017 }, "924915ae02e621c10f257fb09a1dcc0202d9aad4": { "authors": [ { "ids": [ "39722092" ], "name": "Christian DeLozier" }, { "ids": [ "3409732" ], "name": "Ariel Eizenberg" }, { "ids": [ "2810634" ], "name": "Shiliang Hu" }, { "ids": [ "10019813" ], "name": "Gilles Pokam" }, { "ids": [ "1739688" ], "name": "Joseph Devietti" } ], "doi": "10.1145/3123939.3123947", "doiUrl": "https://doi.org/10.1145/3123939.3123947", "entities": [ "Cache (computing)", "Consistency model", "False sharing", "Key-value database", "LevelDB", "Linux", "Linux", "Memory protection", "Parallel computing", "Ptrace", "Service control point", "Speedup", "Strong consistency", "User space" ], "id": "924915ae02e621c10f257fb09a1dcc0202d9aad4", "inCitations": [], "journalName": "", "journalPages": "639-650", "journalVolume": "", "outCitations": [ "1898169191c2030e1c1e442afbb66610281f328f", "7cba91f31115e3962a6e75062e8db921ed452804", "71d584f310f11216d9e5771af58930c5a8f1dd47", "274e7e576534b3e091f09e801cce807f5fd221c1", "6fddbb1d2f1aff3a1103a633a67393491bbc0488", "54f3331b575b2d451c2d716f86496cada23d596d", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "aaecddf1e0facd414ed67910fe7204fa3bc08dfe", "093d1d23500d65e99c7d0cd5569ce0f0d4c37076", "0881378cc281fe6d8451eedbbb73e9e157d7bf38", "0e00a3e0b0120dcdb89f0ee03534643090235ff5", "5a53da97349d5ba5c612bc39ee6ceb8dc6e16934", "07f14998bd81048b73471934ca62295953361e65", "3795b4928d3ed0750071fbef8c0ac8d5f094c97a", "8d190e781507f9aea91cead8ac0d02b664649070", "41baa7d9a6e1040b2eef8728d38706d8fdcbb06a", "fcae2fcef595059529ebe553431ab41b44062ae4", "326d1495d5288ce7fbe548809df56a8ac11da544", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "1d15930cd9e4ececf22fd96bf9ba52f12dc0665e", "3b62c1f19254820c75dd0011f038d7aae04b3414", "0a0bf9e017e05d58b85e793e58148d2946259a74", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "ad913bd3d95fc9e5f6888974e04726eb441a6fc6", "045bbbea384e9d54be38dd207bf237d5208ea599" ], "paperAbstract": "Cache contention in the form of false sharing and true sharing arises when threads overshare cache lines at high frequency. Such oversharing can reduce or negate the performance benefits of parallel execution. Prior systems for detecting and repairing cache contention lack efficiency in detection or repair, contain subtle memory consistency flaws, or require invasive changes to the program environment.\n In this paper, we introduce a new way to combat cache line oversharing via the Thread Memory Isolation (Tmi) system. Tmi operates completely in userspace, leveraging performance counters and the Linux ptrace mechanism to tread lightly on monitored applications, intervening only when necessary. Tmi's compatible-by-default design allows it to scale to real-world workloads, unlike previous proposals. Tmi introduces a novel code-centric consistency model to handle cross-language memory consistency issues. Tmi exploits the flexibility of code-centric consistency to efficiently repair false sharing while preserving strong consistency model semantics when necessary.\n Tmi has minimal impact on programs without oversharing, slowing their execution by just 2% on average. We also evaluate Tmi on benchmarks with known false sharing, and manually inject a false sharing bug into the leveldb key-value store from Google. For these programs, Tmi provides an average speedup of 5.2x and achieves 88% of the speedup possible with manual source code fixes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123947", "http://www.cis.upenn.edu/~delozier/docs/tmi_micro_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/924915ae02e621c10f257fb09a1dcc0202d9aad4", "sources": [ "DBLP" ], "title": "TMI: thread memory isolation for false sharing repair", "venue": "MICRO", "year": 2017 }, "92553beba2145fca43b2e4a25318433843b74a74": { "authors": [ { "ids": [ "35531065" ], "name": "Qingyao Ai" }, { "ids": [ "1739818" ], "name": "Yongfeng Zhang" }, { "ids": [ "2112104" ], "name": "Keping Bi" }, { "ids": [ "1734498" ], "name": "Xu Chen" }, { "ids": [ "1704390" ], "name": "W. Bruce Croft" } ], "doi": "10.1145/3077136.3080813", "doiUrl": "https://doi.org/10.1145/3077136.3080813", "entities": [ "Artificial neural network", "Bag-of-words model", "Benchmark (computing)", "Deep learning", "Entity", "Extensibility", "Generative model", "Online shopping", "Personalization" ], "id": "92553beba2145fca43b2e4a25318433843b74a74", "inCitations": [ "322d8a9449f2ea654036ea6be3cba4458eebf207", "1b9fe7cef87f59dfc2f6cf3f144bfe6032222022", "6a1d1c100ad3ebcace07048876414673c9c07613", "d05ee44f524e48a5e112fad0fc1ab4c20594d1d7" ], "journalName": "", "journalPages": "645-654", "journalVolume": "", "outCitations": [ "22ae02d81c21cb90b0de071550cfb99e6a623e62", "0dc505b3d140d9349a2f4dd87cebfa97687f0c08", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "cca93fbe0534b8eb7bfe1602196aba78ad1bc184", "7a7d290d2feda000027831cbba76b7e38bf18c90", "2d7f7c9dd68e86ef977a0294a35abb63d9c7bceb", "6ff38efe7537b1f64c9c12c4781fc6f71a7b10ee", "36cb4ed29f9f0b6ea37343b3d98154293a374ec2", "4afa6c2eb552ceef0e396fbfe449932492873034", "17ce7734904a6162caa24e13a9454c1239924744", "1145859ba17172d517cdffe2a5f00a16366c5765", "7161eb8d3b1cb01769a36528f9c6bddd663545a9", "f9dd22e0a3be85c2e721b3a0c4a735268bff0932", "9eb67ca57fecc691853636507e2b852de3f56fac", "d8b3bafeed46a7b5105a17a6507536c524ed8dcf", "8b40b159c2316dbea297a301a9c561b1d9873c4a", "4ae093d626e17670ede21200f9cf1790e8c23dc8", "0b544dfe355a5070b60986319a3f51fb45d1348e", "2b8859eb4774d2597cd68d5615f2fbaf408a1e6c", "4d4d53a82d105f6d1b5c4a12778dd5667641c387", "87d907a114409755ecd3c6886585de26a4e17ffe", "2bb2ba7c96d40e269fc6a2d5384c739ff9fa16eb", "1510cf4b8abea80b9f352325ca4c132887de21a0", "0ece9e2827d0af7f47bef2e14e3d8986556e1bad", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "33151c9905102c47d431f59fc9a5a7667960507a", "328b00f1baaf08dedba3a788b4ce0a4b26003f18", "e912e59930acd9f63c9e0c0613620f28468eca0e", "01d96ab81684227b98ef374e2ffb80e76a668cb4" ], "paperAbstract": "Product search is an important part of online shopping. In contrast to many search tasks, the objectives of product search are not confined to retrieving relevant products. Instead, it focuses on finding items that satisfy the needs of individuals and lead to a user purchase. The unique characteristics of product search make search personalization essential for both customers and e-shopping companies. Purchase behavior is highly personal in online shopping and users often provide rich feedback about their decisions (e.g. product reviews). However, the severe mismatch found in the language of queries, products and users make traditional retrieval models based on bag-of-words assumptions less suitable for personalization in product search. In this paper, we propose a hierarchical embedding model to learn semantic representations for entities (i.e. words, products, users and queries) from different levels with their associated language data. Our contributions are three-fold: (1) our work is one of the initial studies on personalized product search; (2) our hierarchical embedding model is the first latent space model that jointly learns distributed representations for queries, products and users with a deep neural network; (3) each component of our network is designed as a generative model so that the whole structure is explainable and extendable. Following the methodology of previous studies, we constructed personalized product search benchmarks with Amazon product data. Experiments show that our hierarchical embedding model significantly outperforms existing product search baselines on multiple benchmark datasets.", "pdfUrls": [ "https://ciir-publications.cs.umass.edu/pub/web/getpdf.php?id=1260", "http://doi.acm.org/10.1145/3077136.3080813" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/92553beba2145fca43b2e4a25318433843b74a74", "sources": [ "DBLP" ], "title": "Learning a Hierarchical Embedding Model for Personalized Product Search", "venue": "SIGIR", "year": 2017 }, "9277d342b22fb55238ef3811e07ac2b226c9db78": { "authors": [ { "ids": [ "2389316" ], "name": "Yanqi Zhou" }, { "ids": [ "21287668" ], "name": "Sameer Wagh" }, { "ids": [ "2030711" ], "name": "Prateek Mittal" }, { "ids": [ "1752172" ], "name": "David Wentzlaff" } ], "doi": "10.1109/HPCA.2017.36", "doiUrl": "https://doi.org/10.1109/HPCA.2017.36", "entities": [ "Adversary (cryptography)", "Cloud computing", "Data center", "Dynamic random-access memory", "Memory timings", "Multi-user", "Network on a chip", "Privacy", "Schedule (project management)", "Scheduling (computing)", "Side-channel attack", "Simulation", "Time of arrival", "Timing channel", "Traffic shaping" ], "id": "9277d342b22fb55238ef3811e07ac2b226c9db78", "inCitations": [ "0d8952e0a65caf480228ede7e632201d5420e7b7" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "337-348", "journalVolume": "", "outCitations": [ "0a679d9d08231b2856fe648e6b331d8e6e46a1fa", "7e4bf4bce26804987fabf9a8cca182b5dd550a7e", "6e0684230dd2b436417e71731692baacd1c29dd1", "007394c2bae389cf43e46db4567dafe206355c25", "0e5aded3f3a38a39882062e6204f1d672f797eb1", "08dcda95dcbd1ebe906a1c24507b7814759462aa", "21ddf1f7ab7e2cd2ae07073bf3238ce46314bac9", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "188ca8bb2ec2ed38a055a78b48d46f5991298754", "c9c818f6572b7c9a87992fcfbdb4cea39a96514f", "2d34910db66225f2c7089a32aa2c4df7b72e57ae", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "d3216e8805687c458a82bea952ca4b1c6f4548b9", "17e49d6850b9fd3d888720fa23bde2194a7785b9", "077c648efef8a2bf8c0164f3d8141256090da41b", "2ab47454f59d9d8e55d4d8a69530562a3690794a", "078b855c40fefabd766a09f23280c59feef21634", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "092b09f0ec09b2b10763f5697ca77099a37ab022", "249cfb8ae490b8c334649aea9d6d3a2efe7caf5f", "2762c266378290594e1715104ef8e98a8cb60d35", "16278e2ffc0f005fa2477c5fa71e9c6710333de6", "82bcb524a2036676bfa4ebd3324fe76013dced54", "6bf2c022fcacf2f80c6ea3f44dcc4e4e1331abc4", "d2607df2b01d1123cae2716b876776510c41f836", "07b0b5d59ef09f33a40f30d3a2dec880029a5002", "00ab25c6582d543932fccbb0f15fe93445f95d61", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "1ce1361260c5367eb64392ebe37f29b3fbfc21de", "1d26d405ddc1c72e3ffd76506a1286071ad67197", "20b63210954f7c5a70664f301dcd7196856ccfa7", "5f88b3e85617ade9fa7dbbfad907c005a3d8897b", "0d8524a1eca5e41ee755acd30a0c28a782d05331", "19218913ef99ba9acd2491d8bab1d154cb375fa3" ], "paperAbstract": "Information leaks based on timing side channels in computing devices have serious consequences for user security and privacy. In particular, malicious applications in multi-user systems such as data centers and cloud-computing environments can exploit memory timing as a side channel to infer a victim's program access patterns/phases. Memory timing channels can also be exploited for covert communications by an adversary. We propose Camouflage, a hardware solution to mitigate timing channel attacks not only in the memory system, but also along the path to and from the memory system (e.g. NoC, memory scheduler queues). Camouflage introduces the novel idea of shaping memory requests' and responses' inter-arrival time into a pre-determined distribution for security purposes, even creating additional fake traffic if needed. This limits untrusted parties (either cloud providers or co-scheduled clients) from inferring information from another security domain by probing the bus to and from memory, or analyzing memory response rate. We design three different memory traffic shaping mechanisms for different security scenarios by having Camouflage work on requests, responses, and bi-directional (both) traffic. Camouflage is complementary to ORAMs and can be optionally used in conjunction with ORAMs to protect information leaks via both memory access timing and memory access patterns. Camouflage offers a tunable trade-off between system security and system performance. We evaluate Camouflage's security and performance both theoretically and via simulations, and find that Camouflage outperforms state-of-the-art solutions in performance by up to 50%.", "pdfUrls": [ "http://parallel.princeton.edu/papers/camouflage.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.36" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9277d342b22fb55238ef3811e07ac2b226c9db78", "sources": [ "DBLP" ], "title": "Camouflage: Memory Traffic Shaping to Mitigate Timing Attacks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "927db6add0917c3c9a6e57d36f47d9a3f5f927b1": { "authors": [ { "ids": [ "3099583" ], "name": "Xiaodong Yu" }, { "ids": [ "1971458" ], "name": "Kaixi Hou" }, { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" } ], "doi": "10.1109/IISWC.2017.8167767", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167767", "entities": [ "Amdahl's law", "Automata theory", "Automaton", "Central processing unit", "High- and low-level", "Markup language", "Programmer", "Speedup", "State transition table" ], "id": "927db6add0917c3c9a6e57d36f47d9a3f5f927b1", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "120-121", "journalVolume": "", "outCitations": [ "164bb40bac988ed0b90fe44366cd98c307e57b4b", "20ee61caa108938c2252dbefafb926f3d481465b", "d807a9baf8a027420b2efb95897cdb35832dcb14", "039a5c7b3e685964031bf12461cade1421e2b2a9", "102090e6e2363e094439a41ef0439dfac5da0126", "7666859ba1c7c9a0bae972cc6e7f04e2a4f728aa" ], "paperAbstract": "Programming Micron's Automata Processor (AP) requires expertise in both automata theory and the AP architecture, as programmers have to manually manipulate state transition elements (STEs) and their transitions with a low-level Automata Network Markup Language (ANML). When the required STEs of an application exceed the hardware capacity, multiple reconfigurations are needed. However, most previous AP-based designs limit the dataset size to fit into a single AP board and simply neglect the costly overhead of reconfiguration. This results in unfair performance comparisons between the AP and other processors. To address this issue, we propose a framework for the fast and fair evaluation of AP devices. Our framework provides a hierarchical approach that automatically generates automata for large datasets through user-defined paradigms and allows the use of cascadable macros to achieve highly optimized reconfigurations. We highlight the importance of counting the configuration time in the overall AP performance, which in turn, can provide better insight into identifying essential hardware features, specifically for large-scale problem sizes. Our framework shows that the AP can achieve up to 461x overall speedup fairly compared to CPU counterparts.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167767", "http://people.cs.vt.edu/~xdyu/html/AP_paper.pdf", "http://people.cs.vt.edu/~xdyu/html/AP_poster.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/927db6add0917c3c9a6e57d36f47d9a3f5f927b1", "sources": [ "DBLP" ], "title": "A framework for fast and fair evaluation of automata processing hardware", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "92c9197a53aad179f0b0ff4cfa52a9707e380a0a": { "authors": [ { "ids": [ "1775841" ], "name": "Anne Benoit" }, { "ids": [ "3080619" ], "name": "Laurent Lef\u00e8vre" }, { "ids": [ "3164170" ], "name": "Anne-C\u00e9cile Orgerie" }, { "ids": [ "8197772" ], "name": "Issam Ra\u00efs" } ], "doi": "10.1007/978-3-319-64203-1_10", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_10", "entities": [ "Denial-of-service attack", "Distributed computing", "Frequency capping", "Shutdown (computing)", "Signal trace", "Simulation", "Testbed" ], "id": "92c9197a53aad179f0b0ff4cfa52a9707e380a0a", "inCitations": [ "3b80109403756c28e83ec01d66a149d36dc795c7" ], "journalName": "", "journalPages": "134-146", "journalVolume": "", "outCitations": [ "19d94ccda7fbdd65431854882aa0cc9c0ca5fae7", "54fb43214ba4ce5a915f16cf230d9c7593685084", "bedf36489d9d7ffc6bc1127c71e0029568f0d4c0", "05c7f31410420fbd9ad14bbad9e81ff5c44f1464", "38a0bced15718230eeec1f5ffd29ada0f4f10a7a", "85a7352a5f69ccf62b8e27c47f345f75092f4fb8", "108437fd06da184ef8bd79bb676c5db8fa8a8401", "750a893064474de2b422f7299eac7484dace94cd", "031b446fcfeacf069dd7cff4fe6bac38de29f510", "6603b862dfc4ec409f213f4da00cb4850851c945", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "3e19046c665867bbe557685da60738a40738010a", "378b6432287bfee0e1d02d2c9454fdc8de184680", "3af9c6335a5966bd6a19f255dc210f59bf06fabd", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "81c4e99059104b00adc14f6797758aff998c066d", "1c2d36194a5c9acb83f1247005dbb08aa8cc1965", "09e614ae422152db33ea2577166b68792b90016d" ], "paperAbstract": "Large scale distributed systems are expected to consume huge amounts of energy. To solve this issue, shutdown policies constitute an appealing approach able to dynamically adapt the resource set to the actual workload. However, multiple constraints have to be taken into account for such policies to be applied on real infrastructures, in particular the time and energy cost of shutting down and waking up nodes, and power capping to avoid disruption of the system. In this paper, we propose models translating these various constraints into different shutdown policies that can be combined. Our models are validated through simulations on real workload traces and power measurements on real testbeds.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/92c9197a53aad179f0b0ff4cfa52a9707e380a0a", "sources": [ "DBLP" ], "title": "Shutdown Policies with Power Capping for Large Scale Computing Systems", "venue": "Euro-Par", "year": 2017 }, "92cc8cdaa0a414b7ed0641225ce58a4c07fc8063": { "authors": [ { "ids": [ "2276804" ], "name": "Omid Salehi-Abari" } ], "doi": "10.1145/3130242.3131494", "doiUrl": "https://doi.org/10.1145/3130242.3131494", "entities": [ "Data rate units", "Electron mobility", "HDMI", "Headset (audio)", "Lifting scheme", "Line-of-sight (missile)", "Transmitter", "Uncompressed video", "Virtual reality" ], "id": "92cc8cdaa0a414b7ed0641225ce58a4c07fc8063", "inCitations": [ "88afa5c97259b6342457072ccf866abbbf027fd6", "9ff85ebfdb8c4fd22a7a75003b054bf051cee4e8", "8e071ad183c69841d3448ab5b254299a1779b56f", "c9a44a37f4fb220a51b3eb37eb7642bb3b97c2cb" ], "journalName": "", "journalPages": "49", "journalVolume": "", "outCitations": [ "1943466070019e48204ebbee0914d87ced4ba09a", "8e8ecfef9b375ec6660477ba21e71327c745b9c4", "534ee575a6b0c37e03d1dddb92493b57e9271298", "4b2f3372baef782618daf54e59782f251c58b97d", "21c039e563ec0ca023a5b9c729e92a2fd611946a", "7c3311642157c39e4afef58934cfa5cd3585af9d", "c20eb6df3fea85baebc529fe7e77d03f7aa1a86b", "1ec1e55baaabfbf4283d0151dd590519c4d574df", "47240e17ac8fa393ec6e2db2dac68454e96c8495", "839d13983d55f3aeeb8e644447fd9a4b5665fc56", "26bfee054866d9a2d6e1c2d55d9b1dca408a916c", "092b58f5cf8eb1de4f1de470d781ed0d91d65d1a", "ad6a85c685fbcf089cfd5a87c98557d7f4328e6a", "08e6f96da8e44d6529d29fb2087f5bbf5684404d", "5c9f2dc4df03ced1cd41e9d342e461a2c6efd6d6", "a2ea288f10dbf019811f69917f2f2a3d07d46374", "58392cd42505bf2bc0675610188f6465bc20fd6f", "2d994df27d5c171e641289ad4dc3a1074996d3de", "59bd84d84a3a448836bb8eb3e41c5d309dfa9597" ], "paperAbstract": "Today's high-end virtual reality (VR) systems require a cable connection to stream high-definition videos from a PC or game console to the headset. This cable significantly limits the player's mobility and, hence, the user's VR experience. The high data rate requirement of this link (multiple Gbps) precludes its replacement by today's wireless systems, such as Wi-Fi. In this talk, I present MoVR, a system that creates a high data rate, millimeter wave (mmWave) link between the PC and the headset. Specifically, I will explain how we address the two key problems that prevent existing mmWave links from being used in VR systems. First, mmWave signals suffer from a blockage problem. i.e., they operate mainly in line-of-sight and can be blocked by simple obstacles such as the player lifting her hand in front of the headset. Second, mmWave radios use highly directional antennas with very narrow beams; they work only when the transmitter\u2019s beam is aligned with the receiver\u2019s beam. Any small movement of the headset can break the alignment and stall the data stream.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-abari.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/abari", "http://doi.acm.org/10.1145/3130242.3131494", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-abari.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/92cc8cdaa0a414b7ed0641225ce58a4c07fc8063", "sources": [ "DBLP" ], "title": "Enabling High-Quality Untethered Virtual Reality", "venue": "mmNets@MobiCom", "year": 2017 }, "92f6dbb1e9e950c354a411d8ce7b5b504c584a26": { "authors": [ { "ids": [ "35214896" ], "name": "Raj Parihar" }, { "ids": [ "14252027" ], "name": "Michael C. Huang" } ], "doi": "10.1109/PACT.2017.35", "doiUrl": "https://doi.org/10.1109/PACT.2017.35", "entities": [ "Arbitrary code execution", "Baseline (configuration management)", "Computer performance", "Heuristic", "Holism", "Intel Turbo Boost", "Load balancing (computing)", "Look and feel", "Multi-core processor", "Parallel computing", "Program slicing", "Thread (computing)" ], "id": "92f6dbb1e9e950c354a411d8ce7b5b504c584a26", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "91-104", "journalVolume": "", "outCitations": [ "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "0fca03c476d869660dec04fb83f54161767a4ba7", "01d1981ec1a0d265ce271bcb0b99df5538bc7d1e", "0ac4387b663cae2542bd92576977dd15292618ad", "0afece5039c2522cc7ff60b89407381be3b6f3a0", "b28443d1b145e781edd3a1e332fa6eaa10dade2b", "012953dd18ef0ad32600fc145b7b7b46cf447164", "74ee49d678c751d2b5d13e957a59dc4bab6409bb", "33f410aed92033ea9180b7175ca24b63dd5b9792", "410b3bbd06c4f2f0b6f53a1444f43cb589a0cae5", "786d2c81ac5f795806b16a1cf292387261ae3f1a", "7e2335eef12e0be759086dc0112dde8eda6e3061", "0ea8f85d507ab8220f920dc1ffe6574820dd0027", "2bbdb8e4817baf603634e2c1480d251b0e75e54f", "29d42e6d55bd74eaf1ff2e86778cc11eee6c8f4f", "fd9dc505e3cf0b6a828ae67f1850658540ec9179", "1e365e63d5001819f11eb14e84057e8b85b4b138", "023a4e74dd473ac37a4032ef4f3d928ca5b134e3", "bf2dcea1628710d1a867f5ab5b99c23a075b9067", "22f41ab797f0b37343b5624004c79221fb4414ab", "4fb0a129c06bd8752f965b27a19ea3255e4b771f", "61e977e6c68c2acc73bdd9d94522ddb38e4b898d", "0a338a0ce79d077473d84486aca55298adac7cfe", "4d6240f20d7b0748a83cabb663e49a70031b4ae9", "270c331ba47d3cd524d66602884e6445358f65ca", "26249d13395b7dad2079a47623ad169010ece061", "5616fe711d3134f0f336a82548891ba86562fcbc", "d67f67e2a8d2caf5ff04f315c21611571f7779c6", "10d0597d5fde94b6f8879af4cb799ac1dc1c764b", "0d2353f1cdc2bcc150d142bb9d7eeb80fc4444fe", "3321dc8decbe94be9a7ad4a7fd56da2df492dcb0", "1754d5e097ab2c22f808596baf01016561785bbb", "9f4f53fd6a1919663c7be4ae9c6e7b8b25eb01df", "8ff917891a496a5955fe01765d096e1fc39fb031", "3e12f870184708de704dc6df3dc88334cbe80a1a", "24e5cd0aea2d67823a4d0fd1606542bc3b85b8fc", "06a03880a9667b30eddbd80c6f8bc1aa5d0bb393", "15b275f0421c606f5903532e9964b140cbb2f878", "61f484d5b1b5763fd96d00622b2476540c8c4c16", "5adde02d5fe7fc072ad9ad2ed9a641380149c39f", "06c5d36fbdaf3cb39c136618c8765c471566afbc", "206d4ebdf93ae6c9b530efc94fa408ccca2b402f", "dd509dbca64e80ea80a077c93ff1ca1d19932a28", "18bd379dc60f7d5f8fc0b49165f17f5c33e5a58b", "b4a508bb316a7d3ee7c8b348f2c8b66cffb9cbf8", "7ef867dcd48dfc45086bbd76580404cd3a788d0a", "4213eb763ecf02792cd47fe64752d83c70d40b98", "5da7aa9efd6035641fc13d6c74e0068dcfe4406a", "3a5e93a329af9d5f801e80f792f1f1573ed4ac30", "9725bae2781603477ff77fd9d02bb2f01e7a741c", "2706c60d5809d4d1ab75693a56a5d8ca5afb3144", "03fc198adf79731c92070b8aa839c46ebf9b3c14", "144df383c80cfde170a7bcd24361e168e46dc938", "43f8e4f54c9b28911164ebe3af8e11362f9a8b04", "12dadba338c016512858cfbc41051791cce532c9", "0ff68df0328a446ab90a6357d0569e024e4647c9", "056aea9d5e4961533ea849f05478856a09fb367d", "2373b801838e095ab2c2e85f8acce8898caf46bb" ], "paperAbstract": "In spite of the multicore revolution, high single thread performance still plays an important role in ensuring a decentoverall gain. Look-ahead is a proven strategy in uncoveringimplicit parallelism; however, a conventional out-of-ordercore quickly becomes resource-inefficient when looking beyond a short distance. An effective approach is to use an in-dependent look-ahead thread running on a separate contextguided by a program slice known as the skeleton. We observethat fixed heuristics to generate skeletons are often suboptimal. As a consequence, look-ahead agent is not able to targetsufficient bottlenecks to reap all the benefits it should.In this paper, we present DRUT, a holistic hardware-software solution, which achieves good single thread performance by tuning the look-ahead skeleton efficiently. First, we propose a number of dynamic transformations to branchbased code modules (we call them Do-It-Yourself or DIY)that enable a faster look-ahead thread without compromisingthe quality of the look-ahead. Second, we extend our tuningmechanism to any arbitrary code region and use a profile-driven technique to tune the skeleton for the whole program.Assisted by the aforementioned techniques, look-aheadthread improves the performance of a baseline decoupledlook-ahead by up to 1.93× with a geometric mean of 1.15×. Our techniques, combined with the weak dependence removal technique, improve the performance of a baselinelook-ahead by up to 2.12× with a geometric mean of 1.20×. This is an impressive performance gain of 1.61× over thesingle-thread baseline, which is much better compared toconventional Turbo Boost with a comparable energy budget.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/92f6dbb1e9e950c354a411d8ce7b5b504c584a26", "sources": [ "DBLP" ], "title": "DRUT: An Efficient Turbo Boost Solution via Load Balancing in Decoupled Look-Ahead Architecture", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "934f786eebd19e7d5c5d86a8240c266175d40828": { "authors": [ { "ids": [ "1786206" ], "name": "Parikshit Shah" }, { "ids": [ "39016863" ], "name": "Akshay Soni" }, { "ids": [ "36850916" ], "name": "Troy Chevalier" } ], "doi": "10.1145/3097983.3098025", "doiUrl": "https://doi.org/10.1145/3097983.3098025", "entities": [ "Algorithm", "Duality (optimization)", "Experiment", "Hungarian algorithm", "Linear programming", "Numerical analysis", "Online algorithm", "Online and offline", "Program optimization", "Rank (J programming language)", "Ranking (information retrieval)", "Scalability", "Traffic shaping", "Web application", "Web traffic" ], "id": "934f786eebd19e7d5c5d86a8240c266175d40828", "inCitations": [ "c03de4a1a2586a02e40fe9f64cb3b907f5f609fa", "b22af4f74746b2575cb1024ca473f9b797540b21" ], "journalName": "", "journalPages": "405-414", "journalVolume": "", "outCitations": [ "4b69669db26d1fbcbf54dfc6d1a35d48b58bc34f", "10ceb668f84860bb09fca364125cae4b1ee2e760", "032b91157dd0f048f546f2d52fbc519686d075ef", "8d3a9360b3b3f42165370fda3db6700b0f9becc4", "71afa5576efde850f0cdd2d2504831e1dc2b5d54", "075f328ef87a076151feb4d5b1f97b66aa597a90", "f885482b6de7eaacbc3cbbd322232b7f95a55621", "d784a4d365223e6c42d959f0e757ae5dedc1a53a", "2e635989e232816546ef352edc38881580b04c1e", "2c2eff1c8e30064d3c208f8f85360476af7ab815", "1bc1c886e8bafe2dc2226edd3fb07e8b9d75610b", "4b0ca81f043e82a1421c4ec3ae95ddf1058a73e8", "44c3044613ba5af9afe959c8db8143c582adb835", "22562a15409361f776e4c82d98a8fc51317c50fd", "396030e4838ab208638fc7e466223ab666fb6284" ], "paperAbstract": "We study the online constrained ranking problem motivated by an application to web-traffic shaping: an online stream of sessions arrive in which, within each session, we are asked to rank items. The challenge involves optimizing the ranking in each session so that local vs. global objectives are controlled: within each session one wishes to maximize a reward (local) while satisfying certain constraints over the entire set of sessions (global). A typical application of this setup is that of page optimization in a web portal. We wish to rank items so that not only is user engagement maximized in each session, but also other business constraints (such as the number of views/clicks delivered to various publishing partners) are satisfied.\n We describe an online algorithm for performing this optimization. A novel element of our approach is the use of linear programming duality and connections to the celebrated Hungarian algorithm. This framework enables us to determine a set of shadow prices for each traffic-shaping constraint that can then be used directly in the final ranking function to assign near-optimal rankings. The (dual) linear program can be solved off-line periodically to determine the prices. At serving time these prices are used as weights to compute weighted rank-scores for the items, and the simplicity of the approach facilitates scalability to web applications. We provide rigorous theoretical guarantees for the performance of our online algorithm and validate our approach using numerical experiments on real web-traffic data from a prominent internet portal.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098025" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/934f786eebd19e7d5c5d86a8240c266175d40828", "sources": [ "DBLP" ], "title": "Online Ranking with Constraints: A Primal-Dual Algorithm and Applications to Web Traffic-Shaping", "venue": "KDD", "year": 2017 }, "936c1a51552cc39a24861af88d32a92827692eb3": { "authors": [ { "ids": [ "3222192" ], "name": "Moritz Contag" }, { "ids": [ "1990374" ], "name": "Guo Li" }, { "ids": [ "37758552" ], "name": "Andre Pawlowski" }, { "ids": [ "19218885" ], "name": "Felix Domke" }, { "ids": [ "1763395" ], "name": "Kirill Levchenko" }, { "ids": [ "1713890" ], "name": "Thorsten Holz" }, { "ids": [ "1727599" ], "name": "Stefan Savage" } ], "doi": "10.1109/SP.2017.66", "doiUrl": "https://doi.org/10.1109/SP.2017.66", "entities": [ "Black box", "Diesel", "Evasion (network security)", "Firmware", "Limiter", "Microsoft Software Assurance", "Software assurance", "Static program analysis" ], "id": "936c1a51552cc39a24861af88d32a92827692eb3", "inCitations": [ "3509c5617d848ef49113b4adbc7f796ced41c907", "71487c5365198acfa866c08090481f0fa2fa308f" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "231-250", "journalVolume": "", "outCitations": [ "cdc7ac99f16729c84f01eea8e5d538c500f44eeb", "13b57d45994bac28870d85a848d65b0ef2f760a1", "dcaf96c2777c51861c8d018e4066116d9194fc04", "73062e44e8a4b3d80c0a98e009c9604dc90d3911" ], "paperAbstract": "Modern vehicles are required to comply with a range of environmental regulations limiting the level of emissions for various greenhouse gases, toxins and particulate matter. To ensure compliance, regulators test vehicles in controlled settings and empirically measure their emissions at the tailpipe. However, the black box nature of this testing and the standardization of its forms have created an opportunity for evasion. Using modern electronic engine controllers, manufacturers can programmatically infer when a car is undergoing an emission test and alter the behavior of the vehicle to comply with emission standards, while exceeding them during normal driving in favor of improved performance. While the use of such a defeat device by Volkswagen has brought the issue of emissions cheating to the public's attention, there have been few details about the precise nature of the defeat device, how it came to be, and its effect on vehicle behavior. In this paper, we present our analysis of two families of software defeat devices for diesel engines: one used by the Volkswagen Group to pass emissions tests in the US and Europe, and a second that we have found in Fiat Chrysler Automobiles. To carry out this analysis, we developed new static analysis firmware forensics techniques necessary to automatically identify known defeat devices and confirm their function. We tested about 900 firmware images and were able to detect a potential defeat device in more than 400 firmware images spanning eight years. We describe the precise conditions used by the firmware to detect a test cycle and how it affects engine behavior. This work frames the technical challenges faced by regulators going forward and highlights the important research agenda in providing focused software assurance in the presence of adversarial manufacturers.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.66", "http://cseweb.ucsd.edu/~klevchen/diesel-sp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/936c1a51552cc39a24861af88d32a92827692eb3", "sources": [ "DBLP" ], "title": "How They Did It: An Analysis of Emission Defeat Devices in Modern Automobiles", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "93b58f721de046dacada133902e6d07c6f46501f": { "authors": [ { "ids": [ "3201819" ], "name": "Daejun Park" }, { "ids": [ "3149588" ], "name": "Dongkun Shin" } ], "doi": "", "doiUrl": "", "entities": [ "Desktop computer", "Durability (database systems)", "Interference (communication)", "Manycore processor", "Multi-core processor", "Relevance", "Scalability", "Smartphone", "Sync (Unix)", "System call", "Transaction log" ], "id": "93b58f721de046dacada133902e6d07c6f46501f", "inCitations": [ "41da20c0fb04dd4769f3772e392362acd893af57", "4e731dfc4eee0006865d131b384f46b29965f42e" ], "journalName": "", "journalPages": "787-798", "journalVolume": "", "outCitations": [ "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "664df84bcca3aa2703a7458b71e4cf1f53dcdcc0", "13c27125584651329f66461981cbb20fa63e9023", "13f80c8de4078d3cbb22afa93f92d6bb660cb0d1", "120c8504b4290920309165d48bb032f2c724a161", "47b78e7eb12859a141aed6a28a4e301eb0352629", "1af5f199dbe6f03aef7bd404a4236e9b29ba4410", "5f2ab212c91472016f64fb888dcd8ee8a0949b9a", "7ef137faca4da278382ccdcb90da8fcd19faca36", "2e48c19a8da173cfd2a6c929919964f68577e737", "9aa0d7253574e50fe3a190ccd924433f048997dd", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "061944ca83bb46fac511394dca642f7af2d2858a", "265d18ced11e2e64d98afa97b0e86965e68101f7", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "4468cbc8a9ad13ebeaa210424e842f158415ab07" ], "paperAbstract": "For data durability, many applications rely on synchronous operations such as an fsync() system call. However, latency-sensitive synchronous operations can be delayed under the compound transaction scheme of the current journaling technique. Because a compound transaction includes irrelevant data and metadata, as well as the data and metadata of fsynced file, the latency of an fsync call can be unexpectedly long. In this paper, we first analyze various factors that may delay an fsync operation, and propose a novel hybrid journaling technique, called ijournaling, which journals only the corresponding file-level transaction for an fsync call, while recording a normal journal transaction during periodic journaling. The file-level transaction journal has only the related metadata updates of the fsynced file. By removing several factors detrimental to fsync latency, the proposed technique can reduce the fsync latency, mitigate the interference between fsync-intensive threads, and provide high manycore scalability. Experiments using a smartphone and a desktop computer showed significant improvements in fsync latency through the use of ijournaling.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/park", "https://www.usenix.org/system/files/conference/atc17/atc17-park.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/93b5/8f721de046dacada133902e6d07c6f46501f.pdf", "s2Url": "https://semanticscholar.org/paper/93b58f721de046dacada133902e6d07c6f46501f", "sources": [ "DBLP" ], "title": "iJournaling: Fine-Grained Journaling for Improving the Latency of Fsync System Call", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "93da61cac343fb2b4cbda7c4ed7a2f28cfaf1956": { "authors": [ { "ids": [ "39805430" ], "name": "Parker Hill" }, { "ids": [ "2158304" ], "name": "Animesh Jain" }, { "ids": [ "37784857" ], "name": "Mason Hill" }, { "ids": [ "2004936" ], "name": "Babak Zamirai" }, { "ids": [ "2496402" ], "name": "Chang-Hong Hsu" }, { "ids": [ "2672197" ], "name": "Michael Laurenzano" }, { "ids": [ "1721289" ], "name": "Scott A. Mahlke" }, { "ids": [ "2235128" ], "name": "Lingjia Tang" }, { "ids": [ "3348715" ], "name": "Jason Mars" } ], "doi": "10.1145/3123939.3123970", "doiUrl": "https://doi.org/10.1145/3123939.3123970", "entities": [ "Apache Synapse", "Artificial neural network", "Computation", "Deep learning", "Graphics processing unit", "Impedance bridging", "Program optimization", "Singlet fission", "Speedup", "Synapse", "Web service" ], "id": "93da61cac343fb2b4cbda7c4ed7a2f28cfaf1956", "inCitations": [ "e9d3582659584547b09d20af92be57d8d2907dc9", "626f7c268b68a0955f9c7c6cfc2edff4d2e3291f" ], "journalName": "", "journalPages": "786-799", "journalVolume": "", "outCitations": [ "4fe1c707a48869cbbdf3eb0384e526d1d294f7e2", "4308295a2eaef30be423520918ad224dc2f3ffe2", "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "02b28f3b71138a06e40dbd614abf8568420ae183", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "766b21e4984729a17d2d826691affa25f855a38b", "2329a46590b2036d508097143e65c1b77e571e8c", "e1c4e2fa071046569a05e9cfdf13496d094025dd", "209932cd2e3f5da071c4f6341a3b8b29cf50cc4a", "11a9625badce3b83229cc0c4916f11cd4ceb8bef", "49b4094f2c313a92da4461572c0bef80b0d7d649", "87e037c174b9896de6a2afa209a57306dc661a64", "fbeaa499e10e98515f7e1c4ad89165e8c0677427", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "092a1cf971fb8359d3293004c6f1de82f05f3afb", "081651b38ff7533550a3adfc1c00da333a8fe86c", "0892a4477f02af4a9b47e456627497435e5d8159", "d4078329aafb7d93b27bcb937330346044ca2b63", "b7cf49e30355633af2db19f35189410c8515e91f", "32c3d778d8cce464b3ad3de277666295f3a0b02a", "a1e0d801a0fd064a64810a0eb6291ab52e4a96c1", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "053912e76e50c9f923a1fc1c173f1365776060cc", "09b8120cbc52e7df46122e8e608146289fddbdfa", "326a98860619a7dc25cc22c3979188e73ec1188a", "5d90f06bb70a0a3dced62413346235c02b1aa086", "963d8068fa312bef3e68833ce3656fdac2f692a4", "114aa26f7539c27bfbed43c18938dfa0c76251dc", "1b61b9a3152da7e29eb6abcccc9dbf2973fe662c", "40ea5abe3aac04e316d5628159f9082b7213d8e4", "104dd4963f7f0ef03fe09d505d31966666f9281d", "812c795ce4797b718a2947a9f9bdc5b6965c2b29", "45b50ed3d33633978964893b3a58ca369f35bf7e", "46f74231b9afeb0c290d6d550043c55045284e5f", "6d45a2ab08d4c4331cc582852dccd052c6b91938", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "9f1f065bf08cd90431cc051267a708f56436cd82", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "4af785bf8a5959d7e8eb37ca87c45db2ac6a544c", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "52e99334096f02c9cf386c9391fc68181c058f4c", "c084c846c667f0c81322028c5e94da4b782bd91d", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "0daf79a3cdab6ecbbd8e1c140ad94bbf67037356", "534f6ea4ce0127e5da7f1cafb6334b59ad15b83f", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "19dc39725884eee2c9415d57814a360289a07648" ], "paperAbstract": "Deep neural networks (DNNs) are key computational building blocks for emerging classes of web services that interact in real time with users via voice, images and video inputs. Although GPUs have gained popularity as a key accelerator platform for deep learning workloads, the increasing demand for DNN computation leaves a significant gap between the compute capabilities of GPU-enabled datacenters and the compute needed to service demand.\n The state-of-the-art techniques to improve DNN performance have significant limitations in bridging the gap on real systems. Current network pruning techniques remove computation, but the resulting networks map poorly to GPU architectures, yielding no performance benefit or even slowdowns. Meanwhile, current bandwidth optimization techniques focus on reducing off-chip bandwidth while overlooking on-chip bandwidth, a key DNN bottleneck.\n To address these limitations, this work introduces DeftNN, a GPU DNN execution framework that targets the key architectural bottlenecks of DNNs on GPUs to automatically and transparently improve execution performance. DeftNN is composed of two novel optimization techniques - (1) synapse vector elimination, a technique that identifies non-contributing synapses in the DNN and carefully transforms data and removes the computation and data movement of these synapses while fully utilizing the GPU to improve performance, and (2) near-compute data fission, a mechanism for scaling down the on-chip data movement requirements within DNN computations. Our evaluation of DeftNN spans 6 state-of-the-art DNNs. By applying both optimizations in concert, DeftNN is able to achieve an average speedup of 2.1X on real GPU hardware. We also introduce a small additional hardware unit per GPU core to facilitate efficient data fission operations, increasing the speedup achieved by DeftNN to 2.6X.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123970" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/93da61cac343fb2b4cbda7c4ed7a2f28cfaf1956", "sources": [ "DBLP" ], "title": "DeftNN: addressing bottlenecks for DNN execution on GPUs via synapse vector elimination and near-compute data fission", "venue": "MICRO", "year": 2017 }, "93ee8f084498d1f667a4e9c1ea2671c4dd4e5f91": { "authors": [ { "ids": [ "2062558" ], "name": "Daniel Genkin" }, { "ids": [ "2172728" ], "name": "Luke Valenta" }, { "ids": [ "2133826" ], "name": "Yuval Yarom" } ], "doi": "10.1145/3133956.3134029", "doiUrl": "https://doi.org/10.1145/3133956.3134029", "entities": [ "Chosen-ciphertext attack", "Ciphertext", "Computer security", "Email", "Encryption", "Libgcrypt", "Mathematical structure", "Microarchitecture", "Montgomery modular multiplication", "Pretty Good Privacy", "Public-key cryptography", "SWAP (instrument)", "Side-channel attack", "Spectral leakage" ], "id": "93ee8f084498d1f667a4e9c1ea2671c4dd4e5f91", "inCitations": [ "57b211bc0bd6d8375073107517ee35ed7b7b7e78", "c47f1c156ea127aa985e715a61fc9f4b246a415a", "c18e47de658ca1c3ca349fe803309ce45284d49f" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "806", "journalVolume": "2017", "outCitations": [ "a0c6bd744a82dca055731df419d552578058b7d6", "0dada24bcee09e1f3d556ecf81f628ecaf4659d1", "11257763984a9239f40b377b1c8d154f88d9ac85", "7b597c9e9bd10964e2ddddd5e5e865c221e58ee6", "13fadf9e3fc927e9e7df14132feecc1899c68d63", "1513be7178d849dc4363b31c9ea19a2baea076e7", "ea73c6594ab0fe97a4054c9d61b4d669766011f1", "85364f539eb3315ef2e173bc607ab5fa026b1d16", "32590f3c546b930f8f2b81ef75d2dc5165ce8266", "83737e1e9b94a86834badf529a309cd1202e4cb5", "87d49f253a0e623e0255afa06d63e9b5a9fb09d0", "085bb66a2f7b307760a3ddf035df96285fea51a3", "b86e4b55df36b5313b8892ced4f4aafe2b52dbfa", "38edc4410c2557bf98feebf214d8bc774fe602b5", "3dc01f7eae98301b19be0e1a76b337701e3b8232", "fe125cc563227d4307112419b0b66a84d8d0e1b9", "09b0508faccfbc115a9c5540032da5dfb71c48e3", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "9fd6442ce3cbbd20242e9ae2d0e739d3230542a6", "889b726b0964d2cf155323e314043ec34df5b3a8", "6c484d52c1c58fb7c1d6dca551db2e1291072360", "1f932f0d49a4c56d9718e8506d6177c6a6848831", "63e21447b7098b41c4ff16e7e2be257fbb24e49a", "cef9aef9b73c94eacc55670f3aa8f70329cd4bc6", "588b3ecacf1acd3e9a736ef1286d63c91f35ca1a", "6f8d4a59cfe5db779a099d099ca199edd66cd263", "70fb3cea8335aefdf849597e9d9dd7512d722d88", "9a449203b6582f3656e4325bf0e841226cbf447f", "64a3b5b28ca86f3088cff3ec2b48296ae56db842", "4207ebe6f2656c1a40149ec446ca99885ce5b2ad", "aaeb99920069fad63f5dbbf37f8c4ebe2b180e95", "8395b33eb645c545fa51a5778ce6e2c927560283", "0352873da0c142fcd99fdcf6853fc0878710e472", "e7e7fbfa7693d27e8f0887e2ba2fe57385e2a9d8", "b56bec459de1a4875520775b46979c226cbbeb9c", "226242629f3d21b9e86afe76b1849048148351de", "135d6f50f43dc278d20026352f0051ac368ce315", "c569925bc8972c2c8a640a57513de6c904981d54", "7272e04a354d3bc54b1a8119e868090996d3bf1d", "0e42df3278356643d63dcdd33b159ae265602c42", "9f50019aa8161577e4fc62f79da41083ba03f70b", "704a3fcbddf7045cf522d125031d5f009c9abb02", "fd1d864a95d7231eaf133b00a1757ee5d0bf0e07", "4d5d6dfdb582c0d695953e92c408f2377a6c9039", "25b24d6cc547c80e3a1ad037082694a5f6a2b8a9", "4d624b942a58818f8d425460638cb4b65ed84e1c", "39c8baedf47623a837feb0351abb323cff760d56", "3c1f11a1da88c8237842a246ed1a5dbe230737be", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "4ad23e9e5745e8f3ee19317c54844b58d93513df", "52c2c050af5b32d4929b4b193967a3675d03aea0" ], "paperAbstract": "In recent years, applications increasingly adopt security primitives designed with better countermeasures against side channel attacks. A concrete example is Libgcrypt's implementation of ECDH encryption with Curve25519. The implementation employs the Montgomery ladder scalar-by-point multiplication, uses the unified, branchless Montgomery double-and-add formula and implements a constant-time argument swap within the ladder. However, Libgcrypt's field arithmetic operations are not implemented in a constant-time side-channel-resistant fashion.\n Based on the secure design of Curve25519, users of the curve are advised that there is no need to perform validation of input points. In this work we demonstrate that when this recommendation is followed, the mathematical structure of Curve25519 facilitates the exploitation of side-channel weaknesses.\n We demonstrate the effect of this vulnerability on three software applications---encrypted git, email and messaging---that use Libgcrypt. In each case, we show how to craft malicious OpenPGP files that use the Curve25519 point of order 4 as a chosen ciphertext to the ECDH encryption scheme. We find that the resulting interactions of the point at infinity, order-2, and order-4 elements in the Montgomery ladder scalar-by-point multiplication routine create side channel leakage that allows us to recover the private key in as few as 11 attempts to access such malicious files.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134029", "https://obj.umiacs.umd.edu/papers_for_stories/genkin_ACMCCS2017.pdf", "https://eprint.iacr.org/2017/806.pdf", "http://eprint.iacr.org/2017/806" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/93ee8f084498d1f667a4e9c1ea2671c4dd4e5f91", "sources": [ "DBLP" ], "title": "May the Fourth Be With You: A Microarchitectural Side Channel Attack on Several Real-World Applications of Curve25519", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "93f45cdc7bb6d291d77ce1f987cd724e06d5dd3c": { "authors": [ { "ids": [ "33877556" ], "name": "Michael A. Bender" }, { "ids": [ "1680147" ], "name": "Martin Farach-Colton" }, { "ids": [ "2387399" ], "name": "Rob Johnson" }, { "ids": [ "10673138" ], "name": "Simon Mauras" }, { "ids": [ "40223468" ], "name": "Tyler Mayer" }, { "ids": [ "2652269" ], "name": "Cynthia A. Phillips" }, { "ids": [ "3245149" ], "name": "Helen Xu" } ], "doi": "10.1145/3034786.3056117", "doiUrl": "https://doi.org/10.1145/3034786.3056117", "entities": [ "Amortized analysis", "Block size (cryptography)", "Data dictionary", "Data structure", "Database", "Graph coloring", "Insertion sort", "Random-access memory", "Range query (data structures)", "Skip list", "With high probability" ], "id": "93f45cdc7bb6d291d77ce1f987cd724e06d5dd3c", "inCitations": [ "27a36203f14d73b95dfffec857b4ff923d9ef430" ], "journalName": "", "journalPages": "69-78", "journalVolume": "", "outCitations": [ "7035c0d21a48e2d2f5c6323650854b033853bb89", "1688c9bb957395bf7ac05098537c736cfd076382", "b38bfeba50bc0adde29082d7bc5ffbc390a215ec", "2df3ce51d09faab51d8f5cf2fade92929e0d3643", "2ebe1ffec53e63c2799cba961503f0a6abafccd3", "1bf3bad98ffedc59413c965a7b3e969eaaa7edbd", "99c13596ef7e0095e6091a1a597e6484b807940e", "5780152288f2a9de673176c85d721708f4964b3a", "1ae7993c0c2d795b243354de48dab80bf2000356", "ae4eb2a8278c05dd631c0665d4bd882c641f805e", "1a54f03255d567e1593d9a876dcc7fe555de09e6", "5b0206dd59d8a70788c11933d8409938b4ea3fd0", "1e29246acdc73e27899352e3d1862e2af4b74ac2", "196320bcff718f371d44e97af121e395c24edd10", "06bd4d2d21624c7713d7f10ccb7df61bf6b9ee71", "1b44e2b8cc1010c4fef018d04acca0b485383a5f", "0db70f733101cdc77a280b30d65cb5ade46ccc8c", "f98062567adb3c98bfabeb99172f8bca026a0102", "04f020a4ab2134db6f9e98eadf216d94d440414a", "327cbb1da2652b430a52171d510cf72235b890b6", "f4147b82166813bbe5dc01e9486664c273d1556c", "5ecb33f280a9117f882192b0f5e06f93a1745c6c", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "8289104e679bd0b71558521cd44d0d5aef726b8d", "9210f0d3e8a0bb680e7144db7b425527b924f352", "9a8ff6073b183de6940bba457fb6f996736c39a2", "2faec6517248ac5679c2adcf83dd5d8672f9cd4f", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "3ec900e462c0faf6c131fa6fc1488d03954780e7", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "a4faf114f72676c3cd7f75b8070da6be4be48c77", "43aabfa094d030308a7f2c4c5b2858dcc1131994", "56f2beface9bf100b2be028259a8f1d987ccc6f0", "1cdedeb9461bdebedc47c7a358769f85dd7683ea", "5ad1dd1aa78ba772c969aa01ee5e8ee0d255ce3d", "6fbfc4fe76e152e4d371930a0069cf204e482528", "68f24a01660117b11e3c44cfb8ed001ee187b291", "905ce0fe513230b33fced8512050c637174735d2", "4933940eda1fb15b0d277e7e24fb787e33993de6", "ca6798d44dda5be7051aed8ca05446c5af69b634", "0e1f56f21aa0545ac3e2840eeb21757027d14b4b" ], "paperAbstract": "The skip list is an elegant dictionary data structure that is commonly deployed in RAM. A skip list with N elements supports searches, inserts, and deletes in O(log N) operations with high probability (w.h.p.) and range queries returning K elements in O(log N + K) operations w.h.p.\n A seemingly natural way to generalize the skip list to external memory with block size B is to \"promote\" with probability 1/B, rather than 1/2. However, there are practical and theoretical obstacles to getting the skip list to retain its efficient performance, space bounds, and high-probability guarantees.\n We give an external-memory skip list that achieves write-optimized bounds. That is, for 0 < ε < 1, range queries take O(logBε N + K/B) I/Os w.h.p. and insertions and deletions take O((logBε N) / B1-ε) amortized I/Os w.h.p.\n Our write-optimized skip list inherits the virtue of simplicity from RAM skip lists. Moreover, it matches or beats the asymptotic bounds of prior write-optimized data structures such as the Bε & tree or LSM trees, which are deployed in high-performance databases and file systems.\n The main technical challenge in proving our bounds comes from the fact that there are so few levels in the skip list, an aspect of the data structure that is essential to getting strong external-memory bounds. We use extremal-graph coloring to show that it is possible to decompose paths in the skip list into uncorrelated groups, regardless of the insertion/deletion pattern. Thus, we achieve our bounds by averaging over these uncorrelated paths rather than by averaging over uncorrelated levels, as in the standard skip list.", "pdfUrls": [ "http://supertech.csail.mit.edu/papers/BenderFaJo17.pdf", "http://doi.acm.org/10.1145/3034786.3056117" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/93f45cdc7bb6d291d77ce1f987cd724e06d5dd3c", "sources": [ "DBLP" ], "title": "Write-Optimized Skip Lists", "venue": "PODS", "year": 2017 }, "945ddbcbda534c6a3a135ce3de71c5fd7e3ec069": { "authors": [ { "ids": [ "2073481" ], "name": "Xingbo Wu" }, { "ids": [ "2571439" ], "name": "Fan Ni" }, { "ids": [ "1804354" ], "name": "Song Jiang" } ], "doi": "10.1145/3127479.3127483", "doiUrl": "https://doi.org/10.1145/3127479.3127483", "entities": [ "Attribute\u2013value pair", "CPU cache", "Cache (computing)", "Central processing unit", "Commodity computing", "Computer", "Computer data storage", "Data access", "Data item", "Data structure", "Database index", "Dynamic random-access memory", "Experiment", "Hash table", "Locality of reference", "Throughput", "Web search engine" ], "id": "945ddbcbda534c6a3a135ce3de71c5fd7e3ec069", "inCitations": [], "journalName": "", "journalPages": "27-39", "journalVolume": "", "outCitations": [ "0437e781bf22d47f3a13cca1e27eca6ae91d3f41", "817f8243fcf7c130914131f1b02e8302d564293b", "27f81148ecbcd04dd97cebd717c8921e5f2a4373", "4a7b00b3dbd9b923eddeea26017771c97021599b", "6479c756e597c38e57aa45e2eae8550fd738418b", "8bc100f1b5e313278a09d04ca210506d95c3b4f5", "6de2f02cfcc10d514431953a623898bfa61c1580", "20984289dadab9f2a356a535d5ccaba87dc90a42", "77aa217da1e3a078f39d9a84f8627cf1ee0d3a22", "6bf62a3aec49e5d128ae024a17628893898fcd8f", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "1bed30d161683d279780aee34619f94a860fa973", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "655b7a3fdf6139f0e35e540e486144e9bfd49ef5", "60a488e29b5b64c44f6ce124bce7ced9602636d4", "2b8de17d3a163489ef7d0814c9033a853b0725cf", "22a449d60f34753f68cbe8f7180cb19a12a3bc83", "03416be8097852a54dd3e309434e5a0806824646", "08c02a36b7ad8f1ab4337562f67c60192b9b333e", "9a75d6e22681237e3258876909bdf1db3365d15b", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "3fc27e335c054b3ea1e35565d0e8697092e895b6", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "2c1a0af30cc12ec108ce8bffde856ced8b759022", "6d1ca1108d9d96e5607571502552ad04464d7f15", "354e53bdd739801b27a7ce43673321fd3519e484", "ade263c815bf995916a2fa56c89f70efacf6ec5f", "c01d5136874e5f5b46bed068bd2da87f162718e9", "8f369c8e284e27ff73591209cc69a62e8252ff02", "412a9e54bbb31e12d008a9579994e009c5b40b46", "15b7c7dbfece993f16054d1feaa8e141f59bc427", "c5cc6243f070d80f5edef24608694c39195e2d1a", "12b71edea3a3cd18b507e4f0e20297d77f0f986f", "560a18eb5a4ba13429853ec35c71f9eac1acddb5", "63f5dd078fb20cbcb018800f044ae740db74bd45" ], "paperAbstract": "With the ever increasing DRAM capacity in commodity computers, applications tend to store large amount of data in main memory for fast access. Accordingly, efficient traversal of index structures to locate requested data becomes crucial to their performance. The index data structures grow so large that only a fraction of them can be cached in the CPU cache. The CPU cache can leverage access locality to keep the most frequently used part of an index in it for fast access. However, the traversal on the index to a target data during a search for a data item can result in significant false temporal and spatial localities, which make CPU cache space substantially underutilized. In this paper we show that even for highly skewed accesses the index traversal incurs excessive cache misses leading to suboptimal data access performance. To address the issue, we introduce Search Lookaside Buffer (SLB) to selectively cache only the search results, instead of the index itself. SLB can be easily integrated with any index data structure to increase utilization of the limited CPU cache resource and improve throughput of search requests on a large data set. We integrate SLB with various index data structures and applications. Experiments show that SLB can improve throughput of the index data structures by up to an order of magnitude. Experiments with real-world key-value traces also show up to 73% throughput improvement on a hash table.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127483" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/945ddbcbda534c6a3a135ce3de71c5fd7e3ec069", "sources": [ "DBLP" ], "title": "Search lookaside buffer: efficient caching for index data structures", "venue": "SoCC", "year": 2017 }, "94689cd14cd6772acb5df2273f196802b1097b86": { "authors": [ { "ids": [ "1778378" ], "name": "Sudhanshu Shukla" }, { "ids": [ "6226754" ], "name": "Mainak Chaudhuri" } ], "doi": "10.1109/HPCA.2017.24", "doiUrl": "https://doi.org/10.1109/HPCA.2017.24", "entities": [ "Active Directory", "CPU cache", "Critical path method", "Last mile", "Manycore processor", "Naivety", "Operating system", "Overhead projector", "Scalability", "Shared memory", "Simulation", "Sparse matrix", "Thread (computing)" ], "id": "94689cd14cd6772acb5df2273f196802b1097b86", "inCitations": [ "7a961b5f6e20773ee0911b580a76fe6da8d69e5b", "cf1a6552c530708565322e399c0c02225733a427" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "205-216", "journalVolume": "", "outCitations": [ "0d5a5be365e222b32767ae6ca66d2832199a4870", "378782a827933059f9f91e6e29aac84bd0857828", "475444844bc8c47c0e8c682ed7e493dc0af494f1", "6c1743c8b25269bc0ccb46dc865681011932d472", "133b8ac84cd8824a068bcbbc98fe3ed922374fff", "374a06fd4f3e5ba7c809006b718fc17db49c6818", "5c55d7719a188eb121388844d42d079d8f6680c7", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "8a842eacf4e200237a979f74f41838d68cbdccb2", "7af884e9cb3e8ff5edeac5a720ee6571d7aff74b", "cd2c848dd86bc46752587d34ea7b765d417ec06d", "3f4d5667e04958569dff72966a93450decfd256f", "167a9340e19d7fb74af04be55ee29dd49c3d5abe", "25896cfb61a5b88fb0c6acd83d49f5a1a01e5039", "046ecf8fe9d36cd6c45b8c0a5b96f57b5db5650f", "9b66e8a097f95ec0d8509561881f040500dec878", "358e6e95c7359b87bdca56391332e1612e5ebb23", "00ab25c6582d543932fccbb0f15fe93445f95d61", "014ba063a3721973ba6af6503232d4d21d1456bb", "0b72532a85ce8f141adf2ecbb5701d41dd9b3095", "295e5e971f6d7955fb442f5bb31f6601fece0202", "67ce921291a99d30be490e556293305534359101", "a85e1d290159f0eaddd69eca8c2f792d8c799dc8", "0e804d8784687fbcc9a4ee2a4ff8cf19ee9ae80c", "199267c2389b31ed3caaaddaa6293c1a6c4d2589", "10326fe8830644f6e218335d50a757d5e0ce7825", "6b193874f280c431f1070c8b9d75c11a01b4be1e", "1e04fdd737dfb543f294f65cb80a8cc286da7a00", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "2b1415ed444d67e78439e63baa73c05d7246f8f2", "65c30178a48dc4ce04768e58bdcc0b03fa419eda", "28f5de9effd9bf21220e81984710489b78608cc6", "fde8989241eb491c0c90ffbacabd333433c7884d", "7b468a23bffb2ce9eb74181471f365aa6701a370", "1f3300369f5caa3885f6ca661ff985a5853a4f42", "7016db09eb0834dd326a912d13267447518aadf1", "a2462243470f05971e5ba8b1568faa81c640863e", "35c348a3663de6387a45dc58b2c85092d247818a", "33d8743ad609524c5f7949ac44a4dcaffe228dde", "0d91d95b4696b32b3d496a93ca44425d8685132c", "ae7591221ea3c0ea98d6dec3242aed5c7f515aee", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "b7ae1193f965e79d137aea7defc3df143aacbbe6", "62b10ab049eb3a6bf8003b08e0b93871a776ebfd" ], "paperAbstract": "The sparse directory has emerged as a critical component for supporting the shared memory abstraction in multi-and many-core chip-multiprocessors. Recent research efforts have explored ways to reduce the number of entries in the sparse directory. These include tracking coherence of private regions at a coarse grain, not tracking blocks that belong to pages identified as private by the operating system (OS), and not tracking a subset of blocks that are speculated to be private by the hardware. These techniques require support for multi-grain coherence, assistance of OS, or broadcast-based recovery on sharing an untracked block that is wrongly speculated as private. In this paper, wedesign a robust minimally-sized sparse directory that can offer adequate performance while enjoying the simplicity, scalability, and OS-independence of traditional broadcast-free block-grain coherence. We begin our exploration with a naive design that does not have a sparse directory and the location/sharers of a block are tracked by borrowing a portion of the block's last-level cache (LLC) data way. Such a design, however, lengthens the critical path from two transactions to three transactions (two hops to three hops) for the blocks that experience frequent shared read accesses. We address this problem by architecting a tiny sparse directory that dynamically identifies and tracks a selected subset of the blocks that experience a large volume of shared accesses. We augment the tiny directory proposal with an option of selectively spilling into the LLC space for tracking the coherence of the critical shared blocks that the tiny directory fails to accommodate. Detailed simulation-based study on a 128-core system with a large set of multi-threaded applications spanning scientific, general-purpose, and commercial computing shows that our coherence tracking proposal operating with (1/32)x to (1/256)x sparse directories offers performance within a percentage of a traditional 2x sparse directory.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.24", "http://www.cse.iitk.ac.in/users/mainakc/pub/hpca2017tinydirectory.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/94689cd14cd6772acb5df2273f196802b1097b86", "sources": [ "DBLP" ], "title": "Tiny Directory: Efficient Shared Memory in Many-Core Systems with Ultra-Low-Overhead Coherence Tracking", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "946b28871cfb00627445accf619f995e9dc50184": { "authors": [ { "ids": [ "10993830" ], "name": "Huan Chen" }, { "ids": [ "2387791" ], "name": "Theophilus Benson" } ], "doi": "10.1145/3143361.3143391", "doiUrl": "https://doi.org/10.1145/3143361.3143391", "entities": [ "Border Gateway Protocol", "Content-addressable memory", "Control plane", "Experiment", "Hermes2D", "Insertion sort", "Network switch", "Simulation", "Software-defined networking", "Telephone exchange", "Transport Chemical Aerosol Model" ], "id": "946b28871cfb00627445accf619f995e9dc50184", "inCitations": [], "journalName": "", "journalPages": "283-295", "journalVolume": "", "outCitations": [ "3f62529507f5f32c6f34aa8abf384c766ca0982f", "2e8259f5d1ffc038238b51fb7f089596b7b31101", "231ba17921ebd80e95771e28dfb5082e169d5a53", "202612db6ba261122decf31cc8a44944b89e17d7", "08b12c65fa237ed030954efcdc42094f4bc7ecf1", "9a2a7e68fc04d504ae4dcd79f96c14ec914526d5", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "0c02b5bc72b99023511ca3cad840c2bb96034e1b", "00c2708f6a9d5af145ddd361fd8d5b1d17548d71", "2e4ab1140b454fc6dacf4d23d3663aa34c741577", "3967126afbca6a722d7257cd671fe5e4979358a5", "695dad4f57dcbcc0e0c5d1987b3c66cb7b8d196a", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "caf5726018e4eeac01e86c824e41fe25c0bab059", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "9849f77eaeda52f28a7046d35ad78e632f51fafb", "0849e21a444d4a3bbea735a788628bea5543f900", "45284cb75a68d7c331a1fe426043b0d95d4df59f", "663e064469ad91e6bda345d216504b4c868f537b", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "3c92e739842e82bf03d32b1e7820c2c85f386d6f", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "1a3a747402ae2e46e4795a84517d6658a73893e8", "7db38afdf8a4650d0f97bb09685dc34f05cd5472", "034b937edbff280dfdd7b2e98639655fd3587402", "153d74de9a8a53108a3dd5b6f5f97ff4e6179a49", "4d03b65744ce64166457436b24d6db23d3c3e493", "aaca14d54be1d34833fd086229ab2f5aa8b70050", "1ee9bd0b45d00c62ce7f22b6883f80a7fe759cdf", "563f9bcbe6dea8c5a205418f9794dbdeb276ce16", "653a0c33ffea192c988ceb2ba69c97baac8033c0" ], "paperAbstract": "SDN controllers demand tight performance guarantees over the control plane actions performed by switches. For example, traffic engineering techniques that frequently reconfigure the network require guarantees on the speed of reconfiguring the network. Initial experiments show that poor performance of Ternary Content-Addressable Memory (TCAM) control actions (e.g., rule insertion) can inflate application performance by a factor of 2x! Yet, modern switches provide no guarantees for these important control plane actions -- inserting, modifying, or deleting rules.\n In this paper, we present the design and evaluation of Hermes, a practical and immediately deployable framework that offers a novel method for partitioning and optimizing switch TCAM to enable performance guarantees. Hermes builds on recent studies on switch performance and provides guarantees by trading-off a nominal amount of TCAM space for assured performance. We evaluated Hermes using large-scale simulations. Our evaluations show that with less than 5% overheads, Hermes provides 5ms insertion guarantees that translates into an improvement of application level metrics by up to 80%. Hermes is more than 50% better than existing state of the art techniques and provides significant improvement for traditional networks running BGP.", "pdfUrls": [ "http://cs.brown.edu/~tab/papers/CoNEXT17.pdf", "http://doi.acm.org/10.1145/3143361.3143391" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/946b28871cfb00627445accf619f995e9dc50184", "sources": [ "DBLP" ], "title": "Hermes: Providing Tight Control over High-Performance SDN Switches", "venue": "CoNEXT", "year": 2017 }, "9475bb3acf8483cac4c2f928c94d014d3d0167d2": { "authors": [ { "ids": [ "38797515" ], "name": "Peter Steinbach" }, { "ids": [ "33656588" ], "name": "Matthias Werner" } ], "doi": "10.1007/978-3-319-58667-0_11", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_11", "entities": [ "Benchmark (computing)" ], "id": "9475bb3acf8483cac4c2f928c94d014d3d0167d2", "inCitations": [ "8691c7ec9b9a22b0aba686f0bf8d04df9707cd6b" ], "journalName": "", "journalPages": "199-216", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://arxiv.org/abs/1702.00629", "https://doi.org/10.1007/978-3-319-58667-0_11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9475bb3acf8483cac4c2f928c94d014d3d0167d2", "sources": [ "DBLP" ], "title": "gearshifft - The FFT Benchmark Suite for Heterogeneous Platforms", "venue": "ISC", "year": 2017 }, "947dbf2e58babb4dbe7c6359080152e299644372": { "authors": [ { "ids": [ "2121860" ], "name": "Konstantinos Mamouras" }, { "ids": [ "2780408" ], "name": "Mukund Raghothaman" }, { "ids": [ "1710176" ], "name": "Rajeev Alur" }, { "ids": [ "1804315" ], "name": "Zachary G. Ives" }, { "ids": [ "1740426" ], "name": "Sanjeev Khanna" } ], "doi": "10.1145/3062341.3062369", "doiUrl": "https://doi.org/10.1145/3062341.3062369", "entities": [ "Algorithm", "Approximation algorithm", "Combinatory logic", "Decision Making", "Greater Than", "High- and low-level", "Increment", "Java", "Linguistics", "Memory footprint", "Programmer", "Programmer Device Component", "Programming Languages", "Programming paradigm", "Query language", "Question (inquiry)", "Regular expression", "Relational database", "Specification", "Stream (computing)", "Streaming algorithm", "Streaming media", "Throughput", "Type system", "algorithm" ], "id": "947dbf2e58babb4dbe7c6359080152e299644372", "inCitations": [ "0ff505ed0d46ae274eeda195e30c1f856706370f", "8a9fbd05b90a1070e83fd93b6d467b55d1eb8ca7", "0fc010550a3f1a12d964e62d8bdf352933018837", "d3991c46d3f6943766a9e26502304156113b28d6", "e3174b0258f983ce45c49f192f3ddb4502f3a4a1", "bde1d3ae41fd1a3056dbafd01ebf9641658844df", "ab5a3ee384c0dfebe9c6d6ec946f402afb3f3474", "9a438d76daba978f9b6336ed1fba6eb082933c7c", "a9e743b1318a78d55c77c570914a2d6ee241ce2e" ], "journalName": "Proceedings of the ACM SIGPLAN ... Conference on Programming Language Design and Implementation. ACM SIGPLAN Conference on Programming Language Design and Implementation", "journalPages": "693-708", "journalVolume": "52 6", "outCitations": [ "527f8653ec2768758f066ef6dbf3a131d4482277", "9a438d76daba978f9b6336ed1fba6eb082933c7c", "a512812f72321eb4b198cc11cbd6755bfa71aa1c", "526ca04dc8ba3a02d99735174b0a7b797bd9bbae", "645980227447c84ab90cec97020abbbdc85734b3", "ac853fee48dd655536f1fab285d95427de34de6d", "05ff1efc30a0938e0bced05119932438d7054e54", "b1582e2a952924fecb86a8baa7f5e90902c3bbfb", "e5514c75d9cbd8c971d21051085a3d1f7e278cb6", "4b8fbe5e18af87ce47b728bf7b4e644c9de0c95e", "0c504d0c8319802c9f63eeea0d7b437cded2f4ef", "507e8e8b48f6bce4c136dec692b27bba3b9da640", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "43dfb8212d7eb7660a81ce66de2405f1acee4638", "7224d949cd34082b1249e8be84fde65b2c6b34fd", "22dc8ea117cfba40cf514be421e475adbe130dcf", "44dc0f4fdc648050f2a76e0fff19e4a2340bf847", "88e5b8347e00e81475061182c60974930b6fb4e2", "d9c2586e599816defa98d04056fcb8e490d1eba4", "8375f40706943a50094acf909849a6bc611fe5e9", "17f66d7a69bf20b29602d943069eedcd1c07abff", "0fccec96f9d55e68c1b542a40883bbdf7d06b0f3", "29d0199f1ef68881af6e2985293d3c3f581a0b12", "0ef1dd03db41de69165075562a051021a186c230", "97bfbeece4bf1dfcf83dcdef94718bd9c78aa8c2", "5c94454722d8d4fb43bfd4e8449211267ab1d086", "086cde207a125816091c34a076b473a465b5388f", "4b9f7c233a105eafd23cc3afdd39e6cd36993fa1", "04ee1c7ed1b22ce513ce2672b89eb3b2ea371258", "37c9a357b512c9e373fd12359fcff525fdbf5d4d", "11f32b8188b044ad61666d0435b0285fa8fff04a", "2f708dc8de91f08dba286a5e582e58c6f98e7f2c", "2dfa1c860e63f69012ba8b016d4d098e1dcf86e4", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "2dcab9e92c6006e0e123d076ff2ab28aa859805a", "3b06fd86abb6986e2c07ae64184db7e79b501bd7", "0f040247e5aa1735fef94ae5da02c29df3d9b2d9", "2cade71564be9a3cd2e406eb209631e0c345f5f5", "2c552f7d28e1d113554dc4f9289d9ef25c094ae9", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0455148206bd0e7975b8c65a5fd6ba0b18583152", "1fcc527c54e692ab6db69a8a6b5f5ee9118e0dd6" ], "paperAbstract": "Real-time decision making in emerging IoT applications typically relies on computing quantitative summaries of large data streams in an efficient and incremental manner. To simplify the task of programming the desired logic, we propose StreamQRE, which provides natural and high-level constructs for processing streaming data. Our language has a novel integration of linguistic constructs from two distinct programming paradigms: streaming extensions of relational query languages and quantitative extensions of regular expressions. The former allows the programmer to employ relational constructs to partition the input data by keys and to integrate data streams from different sources, while the latter can be used to exploit the logical hierarchy in the input stream for modular specifications. \n We first present the core language with a small set of combinators, formal semantics, and a decidable type system. We then show how to express a number of common patterns with illustrative examples. Our compilation algorithm translates the high-level query into a streaming algorithm with precise complexity bounds on per-item processing time and total memory footprint. We also show how to integrate approximation algorithms into our framework. We report on an implementation in Java, and evaluate it with respect to existing high-performance engines for processing streaming data. Our experimental evaluation shows that (1) StreamQRE allows more natural and succinct specification of queries compared to existing frameworks, (2) the throughput of our implementation is higher than comparable systems (for example, two-to-four times greater than RxJava), and (3) the approximation algorithms supported by our implementation can lead to substantial memory savings.", "pdfUrls": [ "http://www.seas.upenn.edu/~mamouras/papers/StreamQRE-PLDI'17.pdf", "http://www.cis.upenn.edu/~alur/PLDI17.pdf", "https://www.cis.upenn.edu/~alur/PLDI17.pdf", "http://doi.acm.org/10.1145/3062341.3062369" ], "pmid": "29151821v1", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/947dbf2e58babb4dbe7c6359080152e299644372", "sources": [ "DBLP", "Medline" ], "title": "StreamQRE: modular specification and efficient evaluation of quantitative queries over streaming data", "venue": "PLDI", "year": 2017 }, "94bfd6e99c3e1adbe5c97b04dfed04393d5d371a": { "authors": [ { "ids": [ "11787967" ], "name": "Nicolas Michael" }, { "ids": [ "10427140" ], "name": "Nitin Ramannavar" }, { "ids": [ "3263774" ], "name": "Yixiao Shen" }, { "ids": [ "34298418" ], "name": "Sheetal Patil" }, { "ids": [ "2995591" ], "name": "Jan-Lung Sung" } ], "doi": "10.1145/3030207.3044530", "doiUrl": "https://doi.org/10.1145/3030207.3044530", "entities": [ "Cloud computing", "Distributed computing", "Experiment", "High- and low-level", "Multitenancy", "Parallel computing", "Perceived performance", "Performance Evaluation", "Provisioning", "Quality of service", "Service-level agreement", "Software deployment", "Software performance testing", "Steady state", "Test automation", "Throughput", "User interface" ], "id": "94bfd6e99c3e1adbe5c97b04dfed04393d5d371a", "inCitations": [ "61a1afa693442d829072114910b1775a8e4ceefa", "bdbd57ea557992f7f054b37b7d6af7a93d6b1a9f", "1c5ac4839d31063bc20466ca023f399916bc2c21" ], "journalName": "", "journalPages": "189-200", "journalVolume": "", "outCitations": [ "34ddc3da70f5b17ae0a73266ad1e4f9ae155811f", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "1393b0233321538385ed771255c16a244f7d7859", "4dd26aa1151b7338679f97015349f3a76e92f565", "ef87997f141b383fc3cf51cb0c93ff70d31442d7", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "a471e945f0d29871edba692cc35f32d8ed4eb865", "9edc3150a1cdf9f827abc98fc6f82a9966bdd290", "1fccfd32a286b709124b9d642fcd85fc5fcec79b", "9aa0d7253574e50fe3a190ccd924433f048997dd", "10551c91b4d36d1009b23b4d2b88a9e1733fe029" ], "paperAbstract": "The evolution of cloud-computing imposes many challenges on performance testing and requires not only a different approach and methodology of performance evaluation and analysis, but also specialized tools and frameworks to support such work. In traditional performance testing, typically a single workload was run against a static test configuration. The main metrics derived from such experiments included throughput, response times, and system utilization at steady-state. While this may have been sufficient in the past, where in many cases a single application was run on dedicated hardware, this approach is no longer suitable for cloud-based deployments. Whether private or public cloud, such environments typically host a variety of applications on distributed shared hardware resources, simultaneously accessed by a large number of tenants running heterogeneous workloads. The number of tenants as well as their activity and resource needs dynamically change over time, and the cloud infrastructure reacts to this by reallocating existing or provisioning new resources. Besides metrics such as the number of tenants and overall resource utilization, performance testing in the cloud must be able to answer many more questions: How is the quality of service of a tenant impacted by the constantly changing activity of other tenants? How long does it take the cloud infrastructure to react to changes in demand, and what is the effect on tenants while it does so? How well are service level agreements met? What is the resource consumption of individual tenants? How can global performance metrics on application- and system-level in a distributed system be correlated to an individual tenant's perceived performance?\n In this paper we present CloudPerf, a performance test framework specifically designed for distributed and dynamic multi-tenant environments, capable of answering all of the above questions, and more. CloudPerf consists of a distributed harness, a protocol-independent load generator and workload modeling framework, an extensible statistics framework with live-monitoring and post-analysis tools, interfaces for cloud deployment operations, and a rich set of both low-level as well as high-level workloads from different domains.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3044530" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/94bfd6e99c3e1adbe5c97b04dfed04393d5d371a", "sources": [ "DBLP" ], "title": "CloudPerf: A Performance Test Framework for Distributed and Dynamic Multi-Tenant Environments", "venue": "ICPE", "year": 2017 }, "94cad31969e8279ee356f9048907d999100444ee": { "authors": [ { "ids": [ "2306471" ], "name": "Hui Guan" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" }, { "ids": [ "1769928" ], "name": "Hamid Krim" } ], "doi": "10.1145/3126908.3126961", "doiUrl": "https://doi.org/10.1145/3126908.3126961", "entities": [ "CUDA", "Code refactoring", "Document", "Graphics processing unit", "Interactivity", "Natural language processing", "OpenCL API", "Program optimization", "Programmer", "Supercomputer", "Utility", "Xeon Phi" ], "id": "94cad31969e8279ee356f9048907d999100444ee", "inCitations": [], "journalName": "", "journalPages": "10:1-10:14", "journalVolume": "", "outCitations": [ "2538e3eb24d26f31482c479d95d2e26c0e79b990", "99d12ad1ab72cbe9ce0ddf7909bac437a7224d44", "1ecbb2fa4684f8994f90dd0fd5d16c4037439799", "2f5102ec3f70d0dea98c957cc2cab4d15d83a2da", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "dab1f8ed933e29bd9913be50263e9f5a2cafb421", "1851c5a5664d1977ed15cbae4e257fbfa49a3708", "15367f1e036e47a2539f9a5ea6fe80b56bfade12", "9a296c206ef66031684152cbd3764a0f642cb688", "83a6cacc126d85c45605797406262677c256a6af", "f5532ed3440c8ae58f7cad105dee2999cf7e7668", "0dce085fc4f8bd429a614ac042aaac3ae95b73e1", "bc6d746a5fc534bd22a6e7ffb3096d075aa993ef", "2aa01a458835656b968654991a8f67e5037f4aea", "02df3d50dbd1d15c38db62ff58a5601ebf815d59", "ce6783c962a887b21197610e80e80afcb150a1d1" ], "paperAbstract": "Achieving high performance on modern systems is challenging. Even with a detailed profile from a performance tool, writing or refactoring a program to remove its performance issues is still a daunting task for application programmers: it demands lots of program optimization expertise that is often system specific.\n Vendors often provide some detailed optimization guides to assist programmers in the process. However, these guides are frequently hundreds of pages long, making it difficult for application programmers to master and memorize all the rules and guidelines and properly apply them to a specific problem instance.\n In this work, we develop a framework named Egeria to alleviate the difficulty. Through Egeria, one can easily construct an advising tool for a certain high performance computing (HPC) domain (e.g., GPU programming) by providing Egeria with a optimization guide or other related documents for the target domain. An advising tool produced by Egeria provides a concise list of essential rules automatically extracted from the documents. At the same time, the advising tool serves as a question-answer agent that can interactively offers suggestions for specific optimization questions. Egeria is made possible through a distinctive multi-layered design that leverages natural language processing techniques and extends them with knowledge of HPC domains and how to extract information relevant to code optimization Experiments on CUDA, OpenCL, and Xeon Phi programming guides demonstrate, both qualitatively and quantitatively, the usefulness of Egeria for HPC.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126961", "https://people.engr.ncsu.edu/xshen5/Publications/sc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/94cad31969e8279ee356f9048907d999100444ee", "sources": [ "DBLP" ], "title": "Egeria: a framework for automatic synthesis of HPC advising tools through multi-layered natural language processing", "venue": "SC", "year": 2017 }, "94d96a4e255519b75b2d1f8aaa322aba1cb77822": { "authors": [ { "ids": [ "2501652" ], "name": "Junguk Cho" }, { "ids": [ "1692142" ], "name": "Hyunseok Chang" }, { "ids": [ "1706584" ], "name": "Sarit Mukherjee" }, { "ids": [ "1707547" ], "name": "T. V. Lakshman" }, { "ids": [ "2358499" ], "name": "Jacobus E. van der Merwe" } ], "doi": "10.1145/3143361.3143398", "doiUrl": "https://doi.org/10.1145/3143361.3143398", "entities": [ "AMD FireStream", "Big data", "Control plane", "One-to-many (data model)", "Pipeline (computing)", "Routing", "Software-defined networking", "Stream processing", "Typhoon" ], "id": "94d96a4e255519b75b2d1f8aaa322aba1cb77822", "inCitations": [], "journalName": "", "journalPages": "310-322", "journalVolume": "", "outCitations": [ "4af63ed343df388b6353b6fc77c7137d27822bf4", "320dd86417661178837059025c6d1f9a75d08bfd", "56df56dbab7e0b022a51e5a038c1119fcd36f137", "3d4d19a68a9ee57f0c4bb5f692b488ac4ce2bb8e", "478fbef8568a021c3d91c13128efa19ad719dd88", "e5514c75d9cbd8c971d21051085a3d1f7e278cb6", "2f7f5d0e989c74d6279e2620e10e8d0b0c021cb7", "6d496d510f867274473a01dcb0a1a7bf45d0904f", "503688e85f7d5c28da04a35c3fe35abd955456f3", "3a3056b9a2369e9bf792d97405cf4b11a3155302", "186e56c99b6392919e10734e8e9c174892663131", "178ff3ab1afcd6fb348a9805babe0a5c814be5af", "603a54c6c715851482ebc15090ee0e0b99b8f6d9", "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "0706225eeac0f855b19c365313db61252ecde0d7", "2488408f5620bf9dac6fdc972fd3de753197ce80", "1cbbdf58133f763813b3a61b8faf2f5ab74464b7", "7ae26da9b7666812857883536870c315538f7f10", "10dcd5574dca03395b507fbb4d0c90f804cbbf19", "4f58b0e258d6f50770b97a72b2bff4bcc7caf504", "a27c3f0a249dc122104b937c5783f83b3585bb53", "2568017b22f63b44df251a4d4b59dc7977aff44e", "72000109547f17c849c2ee6e2825784e64b70cea", "e847c3ec130da57328db79a7fea794b07dbccdd9", "b386c93eec61b5af3c137e125cf1471f7131167a", "25dcb849cb146a2afc51ad092fba70570bd4de42", "0ef1dd03db41de69165075562a051021a186c230", "076bc832c2121eca1107d198bec668487c9947b1", "eb74c5c80b5698c2253bdff7ac9f228c76da1cf3", "405544638e4a7b3d944ba4596066d09bffa06f45", "64f3a81fff495ac336dccdd63136d451852eb1c9", "1ab5b1bb9f802477c80f861f8dd8b3f2f8ad5bd8", "40e3de7d94569d4da0e5351f3249d2a3c932f0ac" ], "paperAbstract": "Stream processing pipelines operated by current big data streaming frameworks present two problems. First, the pipelines are not flexible, controllable, and programmable enough to accommodate dynamic streaming application needs. Second, the application-level data routing over the pipelines do not exhibit optimal performance for increasingly common one-to-many communication. To address these problems, we propose an SDN-based real-time big data streaming framework called Typhoon, that tightly integrates SDN functionality into a real-time stream framework. By partially offloading application-layer data routing and control to the network layer via SDN interfaces and protocols, Typhoon provides on-the-fly programmability of both the application and network layers, and achieve high-performance data routing. In addition, Typhoon SDN controller exposes cross-layer information, from both the application and the network, to SDN control plane applications to extend the framework's functionality. We introduce several SDN control plane applications to illustrate these benefits.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143398", "https://www.flux.utah.edu/download?uid=258", "http://www.cs.utah.edu/~jmanbal/paper/typhoon-slides.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/94d96a4e255519b75b2d1f8aaa322aba1cb77822", "sources": [ "DBLP" ], "title": "Typhoon: An SDN Enhanced Real-Time Big Data Streaming Framework", "venue": "CoNEXT", "year": 2017 }, "94da42c5635232a14ba4b9bfaadab9456886f04b": { "authors": [ { "ids": [ "37931567" ], "name": "Arpith Chacko Jacob" }, { "ids": [ "1985764" ], "name": "Alexandre E. Eichenberger" }, { "ids": [ "2946535" ], "name": "Hyojin Sung" }, { "ids": [ "2404184" ], "name": "Samuel Ant\u00e3o" }, { "ids": [ "2043410" ], "name": "Gheorghe-Teodor Bercea" }, { "ids": [ "3065632" ], "name": "Carlo Bertolli" }, { "ids": [ "34071538" ], "name": "Alexey Bataev" }, { "ids": [ "1734057" ], "name": "Tian Jin" }, { "ids": [ "34520676" ], "name": "Tong Chen" }, { "ids": [ "2937166" ], "name": "Zehra Sura" }, { "ids": [ "2108362" ], "name": "Georgios Rokos" }, { "ids": [ "3728768" ], "name": "Kevin O'Brien" } ], "doi": "10.1109/HiPC.2017.00048", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00048", "entities": [ "Baseline (configuration management)", "Central processing unit", "Clang", "Compiler", "Graphics", "Graphics processing unit", "Join (SQL)", "Open-source software", "OpenACC", "OpenMP", "Parallel computing", "Programming model", "Robustness (computer science)", "Thread (computing)", "Threaded code" ], "id": "94da42c5635232a14ba4b9bfaadab9456886f04b", "inCitations": [ "1c309a9e19541a6ed40aa9a58123ba8be4784abb" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "358-367", "journalVolume": "", "outCitations": [ "7926d0b9dfc36c13910a1850cd91a7db862f0014", "c9ae9df2ac7a203241cb039081720c74c88f1dee", "0a9b6d3b7f1acab362ee264edc084104331d14d0", "871d9641582562f9a83ed785ce3051f3e9e95483", "f3e7ab9ba13f9efa2f6681fad01f88ca35b619e4", "14724c356106ae50746318b1bdd27d9b684c7d11", "2a07fffbe5eb67d1a75b03d8a9e2d38054b19ea5", "271be72b0c57686a3e77d0f794ef08db1b39a28f", "1b800033b24f44112035c96c47a55a95afe83724", "6710769f8fb90d6fe30dc8e27183f19a6cb31faa", "b0f9ea06d726935289456a75f739544826bb5c0d", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "b575f0d8b3eb38bcf0a1b99bad144002e96ffa18", "1c6477bc1b1c7b3767624be6d286d382ce05c211", "679541d90bcfb71019c7407b4c408a80e88db99d", "7e045aaacd735daf206db9cb35b50d694dcae166" ], "paperAbstract": "Graphics Processing Units (GPUs) are increasingly used to accelerate portions of general-purpose applications. Higher level language extensions have been proposed to help non-experts bridge the gap between a host and the GPU's threading model. Recent updates to the OpenMP standard allow a user to parallelize code on a GPU using the well known fork-join programming model for CPUs. Mapping this model to the architecturally visible threading model of typical GPUs has been challenging. In this work we propose a novel approach using the technique of Warp Specialization. We show how to specialize one warp (a unit of 32 GPU threads) to handle sequential code on a GPU. When this master warp reaches a user-specified parallel region, it awakens unused GPU warps to collectively execute the parallel code. Based on this method, we have implemented a Clang-based, OpenMP 4.5 compliant, open source compiler for GPUs. Our work achieves a 3.6x (and up to 32x) performance improvement over a baseline that does not exploit fork-join parallelism on an NVIDIA k40m GPU across a set of 25 kernels. Compared to state-of-the-art compilers (Clang-ykt, GCC-OpenMP, GCC-OpenACC) our work is 2.1 - 7.6x faster. Our proposed technique is simpler to implement, robust, and performant.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00048" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/94da42c5635232a14ba4b9bfaadab9456886f04b", "sources": [ "DBLP" ], "title": "Efficient Fork-Join on GPUs Through Warp Specialization", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "95228c931949b3d6cb709ba36a53f419fa7b8253": { "authors": [ { "ids": [ "1698790" ], "name": "Ehsan Totoni" }, { "ids": [ "3410386" ], "name": "Subramanya R. Dulloor" }, { "ids": [ "14610544" ], "name": "Amitabha Roy" } ], "doi": "10.1145/3102980.3103004", "doiUrl": "https://doi.org/10.1145/3102980.3103004", "entities": [ "Application checkpointing", "Automatic parallelization", "Big data", "Compiler", "Iteration", "Logistic regression", "Machine learning", "Message Passing Interface", "Parallel computing", "Systems design" ], "id": "95228c931949b3d6cb709ba36a53f419fa7b8253", "inCitations": [], "journalName": "", "journalPages": "144-149", "journalVolume": "", "outCitations": [ "80b2059edd71d1aa5f1669da93ea6cca43b02188", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "61710e89b38424dc3d74b405e7a7633ef8291779", "0910af95b5d45964060d62162ebedba0dcc9a4c1", "2c858958bf1a28979043c7d5cb7429b3fb1332e9", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "5e999cf950878415c018dce12b9e6e15f77e0a00", "050348e54d59952782ace21cac48735bc0d23b8e", "0558c94a094158ecd64f0d5014d3d9668054fb97", "0c36ce0ea8ec9070edef08d833e2bd18ff919b20", "26deee037b221bd05ed34461819f5c067b745445", "42f26cd7904dda8843e357c283f323e9101f37c3", "0933c5842c5d9fd4ff647ab6e3e5f19cae12475f", "28a9dca6faeead651539c700bef413203b2b876e", "4d57536c05aaa1c24ba4c42b38fa1f7b0631f534", "2f559253dc7031108626630b7c5002033f029e49", "8f381d2b69ce376330ce4316b455cbe56265a0c1", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "416a3348da1114e26171a50694b66f8c35024571", "4416236e5ee4239e86e3cf3db6a2d1a2ff2ae720" ], "paperAbstract": "Big data systems such as Spark are built around the idea of splitting an iterative parallel program into tiny tasks with other aspects of system design built around this basic design principle. Unfortunately, in spite of immense engineering effort, tiny tasks have unavoidably large overheads. We use the example of logistic regression -- a common machine learning primitive -- to compare the performance of Spark to different designs that converge to a hand-coded parallel MPI-based implementation. We conclude that Spark leaves orders of magnitude performance on the table, due to its insistence on setting the granularity of a task to a single iteration. We counter a common argument for the tiny task approach --namely better resilience to faults -- by demonstrating that optimum job checkpoint intervals are far longer than the duration of the tiny tasks favored in Spark's design. We propose an alternative approach that relies on an auto-parallelizing compiler tightly integrated with the MPI runtime, illustrating the opposite end of the spectrum where task granularities are as large as possible.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3103004" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/95228c931949b3d6cb709ba36a53f419fa7b8253", "sources": [ "DBLP" ], "title": "A Case Against Tiny Tasks in Iterative Analytics", "venue": "HotOS", "year": 2017 }, "958c9819d33d55c698af7d9ed4a16bb81769e717": { "authors": [ { "ids": [ "11189226" ], "name": "Kanchan Joshi" }, { "ids": [ "35401583" ], "name": "Kaushal Yadav" }, { "ids": [ "20418047" ], "name": "Praval Choudhary" } ], "doi": "", "doiUrl": "", "entities": [ "Ecosystem", "Linux", "Linux", "Memory-mapped I/O", "Weighted round robin" ], "id": "958c9819d33d55c698af7d9ed4a16bb81769e717", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "7f713eeef50a87ec595c64832fdaf25ffa38b5bd", "57d2df84a585f96ddc874898977cfe2fbe02a68f", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15" ], "paperAbstract": "There is need of differentiated I/O service when applications with diverse performance-needs share a storagedevice. NVMe specification provides a method called Weighted-Round-Robin-with-urgent-priority (WRR) which can help in providing such differentiated I/O service. In Round-Robin arbitration all I/O queues are treated to be of equal priority, leading to symmetric I/O processing. While in WRR arbitration, queues can be marked urgent, high, medium or low, with provision for different weightage for each category. Onus is on host to associate priority with I/O queues and define weights. We find that very little has been done in current Linux ecosystem when it comes to supporting WRR and making benefits reach to application. In this paper we propose a method that introduces WRR support in Linux NVMe driver. This method delivers WRR capability to applications without the need of rebuilding them. Unlike affinity-based approach, it does not limit computeability of application. Our results demonstrate that modified driver indeed provides differentiated I/O performance among applications. Proposed work modifies only NVMe driver and is generic enough to be included in mainstream Linux kernel for supporting WRR.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-joshi.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/joshi", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_joshi.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fd3e/63f4c2ce6ad2969f34931f4591aa29e7008e.pdf", "s2Url": "https://semanticscholar.org/paper/958c9819d33d55c698af7d9ed4a16bb81769e717", "sources": [ "DBLP" ], "title": "Enabling NVMe WRR support in Linux Block Layer", "venue": "HotStorage", "year": 2017 }, "959a93bca92aafba2d1fc33fda530a50bf95fa69": { "authors": [ { "ids": [ "1897378" ], "name": "Darko Makreshanski" }, { "ids": [ "2811717" ], "name": "Jana Giceva" }, { "ids": [ "3188030" ], "name": "Claude Barthels" }, { "ids": [ "1687400" ], "name": "Gustavo Alonso" } ], "doi": "10.1145/3035918.3035959", "doiUrl": "https://doi.org/10.1145/3035918.3035959", "entities": [ "Database engine", "High-level programming language", "IBM Tivoli Storage Productivity Center", "In-memory database", "Online analytical processing", "Online transaction processing", "Replay attack" ], "id": "959a93bca92aafba2d1fc33fda530a50bf95fa69", "inCitations": [ "5bd6f82d3d61270401d8284dc95437512e77d02f", "264a5e7a5230b228b86f63a75546738a66454c56", "077eb0de78522a25420fe0c0ab4e864a82ac7177" ], "journalName": "", "journalPages": "37-50", "journalVolume": "", "outCitations": [ "cd4b958bf9dda5f44fbb457f7bf0eca96d6563e7", "3c6be4c9ea5c56d4ab97aabb4e7c9d5ced57bea8", "2e50af2320dab632d8046b6d4c130ae6cce8903f", "92e0243e1a73c77ef8b90292e3798f765b38f269", "05eddbc2d37cf994fb7c6a4558e2679e5f40d23e", "cf855ba4a09c2181d0166705717b5788454fcfa5", "9748241beb02ef1e2d0e6dc877c04b354033a838", "2610745375939fee88e556858344ab9344c47871", "09ecdb904eb7ae8a12d0c6c04ae531617a30eafa", "24c3330d34d640945e0eb99fe4a0b1c31695a8cb", "00ac447d02035c26c7e2852c2457fe812e89038f", "e9664a7b5b35be268180e45563cc5971df86404d", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "817da34922f767e11715f5aca7743e8d2388c68d", "08150fd93c906fd8a882af537ea184d709e98264", "97f18a7fd4a91a28c93545930a913e481425d57e", "5046a718f92447642939f5c93414dc97225d726a", "3542254ecc9f57d19f00e9fcc645b3d44469a6ba", "145b46a0b1f8dc8f2182290655b9597fba087e78", "14390fd81841cc4bb3d3764042481fc0a0e89e7b", "463bec3d0298e96e3702e071e241e3898f76eff2", "1ab74d44982409beeca21efb2dbcb97a5c7de4b2", "41f8af6e2fafbf65f4f84534cb905c8824d7854d", "8865ba2b6b169246ed0a1600bd7f9d447c4bd47f", "ddf313f6fcc0520c716c54873164ded8e31703da", "33ef71ebd67ed2dfffe14d73408173e6e8e94e5b", "86daff3998005e12c963124e61f7c9cfd598a3cf", "0997037e940df06ed7a6d19f7501579aab01e829", "2525c025f11aec60cff428271ca851381b92008f", "6033797f241a3687aab939db1d88b5184d32c0fb", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "afda6470dd16dc0a865dbb6fc291e5806132379b", "ef47742e72bd64fb1ae5359cd6d5dd6dfad34dc8", "0eb1eba6a5bcf54391fcceecd431591d03a6df3a", "12b71edea3a3cd18b507e4f0e20297d77f0f986f", "9e5f18b528416025473bcab6082b51cf7ab60dcf", "29f6129a8b8f80b547ed3394d2ada9ba10b95952", "12c356a40e63af45aa6b0a63af38676a0dfe2b3e", "afc4e448edba15519b5ea054d7b47af1a5aab93d", "c8e6dd5bffa931f1252fd6a62dd267283cd85f69", "35f751e46799e3a91425267819f40dce273abec1", "b8d04f5a56f72aa39c1fd58db33c850c477862c5", "13d350d63b88061db3b63355e90a05ef270972d6", "45ac2218b74fd28ff170dc93cf4390649466c491", "5c9d62f348c7ed09ee51e1d56643ced039ec1121", "73f31354cc9058ddc2e47a1c585b753e1592c1bf", "5ad1dd1aa78ba772c969aa01ee5e8ee0d255ce3d", "1f4fac99af2d8a6d9471eb3cad7b5ae0365c0933", "174148018456e391ee06adc21ea0535c825e8df3", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "4f05a78c2e2abf932915c33c6a2bb9c726ce4ac2", "028101c16b995cae0430fd52707cb91878a3b1d3", "6479bc321dd7859eb6b6b8cca100bade86940526", "06545f48a6b25a3cafd76e514b2310254972888b", "412a9e54bbb31e12d008a9579994e009c5b40b46", "8c6631f7a857e167406207c42ec47d7610a365b2" ], "paperAbstract": "In this paper we present BatchDB, an in-memory database engine designed for hybrid OLTP and OLAP workloads. BatchDB achieves good performance, provides a high level of data freshness, and minimizes load interaction between the transactional and analytical engines, thus enabling real time analysis over fresh data under tight SLAs for both OLTP and OLAP workloads.\n BatchDB relies on primary-secondary replication with dedicated replicas, each optimized for a particular workload type (OLTP, OLAP), and a light-weight propagation of transactional updates. The evaluation shows that for standard TPC-C and TPC-H benchmarks, BatchDB can achieve competitive performance to specialized engines for the corresponding transactional and analytical workloads, while providing a level of performance isolation and predictable runtime for hybrid workload mixes (OLTP+OLAP) otherwise unmet by existing solutions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035959" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/959a93bca92aafba2d1fc33fda530a50bf95fa69", "sources": [ "DBLP" ], "title": "BatchDB: Efficient Isolated Execution of Hybrid OLTP+OLAP Workloads for Interactive Applications", "venue": "SIGMOD Conference", "year": 2017 }, "95cfdd551be21dcb1745489f19bdf40bd7435d74": { "authors": [ { "ids": [ "1975156" ], "name": "Wojciech M. Golab" }, { "ids": [ "1718098" ], "name": "Danny Hendler" } ], "doi": "10.1145/3087801.3087819", "doiUrl": "https://doi.org/10.1145/3087801.3087819", "entities": [ "Algorithm", "Cache coherence", "Coherence (physics)", "Compare-and-swap", "Crash (computing)", "Fetch-and-add", "Mutual exclusion", "Mutual information", "Schedule (computer science)", "Shared memory", "TRAVERSE", "Time complexity" ], "id": "95cfdd551be21dcb1745489f19bdf40bd7435d74", "inCitations": [], "journalName": "", "journalPages": "211-220", "journalVolume": "", "outCitations": [ "077b751c42995c11de1b14875544690c08ced364", "a4721c5760267dad631d1f6f16db931910ccf6c7", "62f7869436b0719fa676717d6e945d48416a8bb7", "4f0a7a76fdd4a97cd69a1034e020cf7cc3f14fe2", "835c0aa9082e7fd7937c802f64e0393e267e6496", "18b864ae7aec09f3975e7235ad8ee2b98a02cf57", "8838fbcb10c4322ade8c9286d930cb016e803b68", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "934c1463eb015a99fb481db2290f841c45f9e6e1", "d3ec16a81f030a6003fb91393084fcac26bf68d5", "eaa33bed90105b2eb65d0ed187493031fc7f487d", "f20bb59b95d2eb95013d386cde3f8969ffd7f0b7", "7da89dea35d519edab9d6c7f26db93bd0d12145e", "b9bfc2fe4f070678da14a5b56e5d3ef0de3ca531", "448eeb724de8ff146eee7f40aa689d74b56f3601", "3e77a77247734dc918a5723573e1158eee1955f9", "4bd009ce33cbea440b4f464a33416687de5f9573", "2239a7eb74b5264ff95f6231e4fbe829877fec9e", "32d99461d75b42ff53f56c8fa562af92c0ebaddc", "f338c63cf2d0e79333f7ca2db221c73483e29cdf", "3f74f300d4da8859d7f38cd69ec551de2a6c3297", "74ef7861ef7834852c87ed5dc3073edd7de6129b", "035941ac151141dbfb4303ee6bd941e885082207", "ed4a7d0cf586e216ccd75a16ea6759df9d4c1947", "3cf43c6e7e812512a11ee95bd5543d170592d58e", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "092d56ca4dc2dc7f278c03f91bc8a52e55f6868b", "1e53d00664a459ad7c56a8f085724c3ce9d6853f" ], "paperAbstract": "Recoverable mutual exclusion (RME) is a variation on the classic mutual exclusion (ME) problem that allows processes to crash and recover. The time complexity of RME algorithms is quantified in the same way as for ME, namely by counting remote memory references \u2013 expensive memory operations that traverse the processor-to-memory interconnect. Prior work on the RME problem established an upper bound of O(logN ) RMRs in an asynchronous shared memory model with N processes that communicate using atomic read and write operations, prompting the question whether sub-logarithmic RMR complexity is attainable using common readmodify-write primitives. We answer this question positively in the cache-coherent model by presenting an RME algorithm that incurs O(logN /log logN ) RMRs and uses read, write, Fetch-AndStore, and Compare-And-Swap instructions. We also present an O(1) RMRs algorithm that relies on double-word Compare-AndSwap and a double-word variation of Fetch-And-Store. Both algorithms are inspired by Mellor-Crummey and Scott\u2019s queue lock.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087819" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/95cfdd551be21dcb1745489f19bdf40bd7435d74", "sources": [ "DBLP" ], "title": "Recoverable Mutual Exclusion in Sub-logarithmic Time", "venue": "PODC", "year": 2017 }, "965692b5e6302869584d1c2e0abc671bfcf375d5": { "authors": [ { "ids": [ "3391899" ], "name": "Xiaoke Wang" }, { "ids": [ "1726963" ], "name": "Chuan Wu" }, { "ids": [ "1678308" ], "name": "Franck Le" }, { "ids": [ "40618241" ], "name": "Francis Chi-Moon Lau" } ], "doi": "10.1109/CLOUD.2017.34", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.34", "entities": [ "Algorithm", "Data center", "Multi-armed bandit", "Network congestion", "Network function virtualization", "Online algorithm", "Program optimization", "Provisioning", "Scalability", "Service composability principle", "Software deployment", "Transfer function", "USB On-The-Go" ], "id": "965692b5e6302869584d1c2e0abc671bfcf375d5", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "205-213", "journalVolume": "", "outCitations": [ "7f822adf127881926c2fab2401d6e3e381bd9c11", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "48473f87ffd3f86c27dd9e6ee77a7d695323ad3b", "38a0bced15718230eeec1f5ffd29ada0f4f10a7a", "31b10fed7b29aaf9c250ae8ca8b804457b389480", "bdf524f747f3c7b4b38753671aafe63f1c03ecb6", "4faec72a59bfe377f5e5b8286f0b75217421ffb6", "5eb3ac3f7aa60a5b354c489ced10f42c8f381eb2", "336b4f3099b8f629adc20a69aba15257e53539f9", "45877ca79aec909a8e6c64de21850e21c8e55717", "17f7424f3d469b6436e6d3c5158926759fbe822b", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e", "4b83b18ecbb46a0e03b1cf7009c7f2244a729f66", "63a061c70da9ce645de1ad803a06f1595833befb", "1fbfa8b590ce4679367d73cb8e4f2d169ae5c624", "0538e399046c74d95124c715760aa51ab4716dce", "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "e9d783d483a0dbe3c2482cf835cb693260db8dd8", "0e1f55c288d25d7b83a17b200f9dab5ab74d43f6", "31ceeced5d23193c369b98170c45e66bae6ff77d", "26e17f6b62a7caec660b3356d49e879e6e0eeabc" ], "paperAbstract": "Network function virtualization has emerged as a promising technology to enable rapid network service composition/innovation, energy conservation and cost minimization for network operators. To optimally operate a virtualized network service, it is of key importance to optimally deploy a VNF (virtualized network function) service chain within the provisioning infrastructure (e.g., servers and the network within a cloud datacenter), and dynamically scale it in response to flow traffic changes. Most of the existing work on VNF scaling assume access to precise network bandwidth information for placement decisions, while in reality, network bandwidth typically fluctuates following an unknown pattern and an effective way to adapt to it is to do trials. In this paper, we address dynamic VNF service chain deployment and scaling by a novel combination of an online provisioning algorithm and a multi-armed bandit optimization framework, which exploits online learning of the available bandwidths to enable optimal deployment of a scaled service chain. Specifically, we adopt the online algorithm to minimize the cost for provisioning VNF instances on the go, and a bandit-based online learning algorithm to place the VNF instances which minimizes the congestion in a datacenter network. We demonstrate effectiveness of our algorithms using solid theoretical analysis and trace-driven evaluation.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/965692b5e6302869584d1c2e0abc671bfcf375d5", "sources": [ "DBLP" ], "title": "Online Learning-Assisted VNF Service Chain Scaling with Network Uncertainties", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "966c2df4f4b1036a2a7d7d7d9971fe8575d174e3": { "authors": [ { "ids": [ "1761472" ], "name": "Hao Yang" }, { "ids": [ "19192306" ], "name": "Xiulin Ma" }, { "ids": [ "2146838" ], "name": "Kun Du" }, { "ids": [ "1709780" ], "name": "Zhou Li" }, { "ids": [ "39999006" ], "name": "Hai-Xin Duan" }, { "ids": [ "34809315" ], "name": "XiaoDong Su" }, { "ids": [ "38431717" ], "name": "Guang Liu" }, { "ids": [ "19207195" ], "name": "Zhifeng Geng" }, { "ids": [ "1693935" ], "name": "Jianping Wu" } ], "doi": "10.1109/SP.2017.11", "doiUrl": "https://doi.org/10.1109/SP.2017.11", "entities": [ "Black hat", "Black\u2013Scholes model", "Dictionary", "Distortion", "Email", "Evasion (network security)", "Instant messaging", "Long tail", "Mobile phone", "Program optimization", "Purify", "Search engine optimization", "Spamdexing", "Telephone number", "The Inner Circle", "Tweaking", "Underground", "Vocabulary", "Web search engine", "Web search query" ], "id": "966c2df4f4b1036a2a7d7d7d9971fe8575d174e3", "inCitations": [], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "751-769", "journalVolume": "", "outCitations": [ "297c46edcc9730e9177e28d7824c1cccba655fb5", "c719eca361a854a69addd7f39537b7c412702eca", "450dcb127df546d73710701560b64d984382fef2", "d055444c58fffebebe2cfba521fb73ad80bf7991", "c65d758636ac96d08263800924991bc3320e35f6", "01f2e7f3dee35c2c7b9179d8eb46f4f766c101d1", "5bcc42ad16cf4518684dccc03f64d12387615108", "283b0864062319c567d9f77fbdff66b66aa6e293", "41ba47e06798d10ce6ed602facffdd4eee6cfb8c", "6b6d357fb4ef19f2330596183ce00d2f3797740d", "30e761464e7637bd406c0b86f4eaef9da1fb8a70", "88a0a5f7dab7ca987d17d3bbdb4e49d3fbf439bc", "23017ad78de9debb8c37ec3309dc5cba9fcd12e5", "3871a09a4c64716182f2238c4144b223faa39202", "3334a80676fefc486575bd2ddf1b281a640742f1", "39fe4349bec2efb024faf5c8e22d07105940ca5d", "d8648bbe6945171131281772209357a2f862c41a", "d7c97b150edaf75f8ee9a20246e54eff353d9a26", "88dfde5ec95e70eb6f9831010122fd00cfc8afd4", "22e949f45669eac8c80aa98be3e6b6177b41fbc4", "2152f9f91e798c23715fdce699b6a8f0f8d43170", "c484e351445232ea526c8d73b84bc529ffcddee1", "0be628988f8ee0beaad57d68cbae8b635f0b555c", "0808d953d48391b32709bdce0482d9ffd296ee05", "3de7ef76a010e624b47255b93bffa1a7c48e35a9", "1028db69758c561602bc6251cb0019e1b712d5dd", "b19c2aa855c247461d9caa48ea94b0b39d650001", "5a6777d8bc03d6a60cbd2dd5029acaf2a5197230", "06240b5f5a928d49b53e848bcbaa4bf7c6beab35", "523b3c576021827efbc54be8794953a216e77225", "a74cd295bacf1b61ffd0eb7fb4f64487347bf43f", "ecce2d601a45d7c6e63444f85ffe5212dde76e38", "53852d69c008f9ebfb05939b4eb7c1f3279437e6", "0cd78491f578f93ccfcac8957c451850e33afb58", "2aa7d52dc9f3fd61cf50bc951ffcd0914123d61b", "c40fe4a4b663e14b7ffd84a8c0496d5e48a96242", "837b071d4c83e0f90d3e6b6edc1a2d4ca35711fb", "dfe0c8336e2026fad6682bbc84466e4fbdaaff37", "13e00ac021b75f895bfc963f5ecac42ed3dae86c" ], "paperAbstract": "Online underground economy is an important channel that connects the merchants of illegal products and their buyers, which is also constantly monitored by legal authorities. As one common way for evasion, the merchants and buyers together create a vocabulary of jargons (called "black keywords" in this paper) to disguise the transaction (e.g., "smack" is one street name for "heroin" [1]). Black keywords are often "unfriendly" to the outsiders, which are created by either distorting the original meaning of common words or tweaking other black keywords. Understanding black keywords is of great importance to track and disrupt the underground economy, but it is also prohibitively difficult: the investigators have to infiltrate the inner circle of criminals to learn their meanings, a task both risky and time-consuming. In this paper, we make the first attempt towards capturing and understanding the ever-changing black keywords. We investigated the underground business promoted through blackhat SEO (search engine optimization) and demonstrate that the black keywords targeted by the SEOers can be discovered through a fully automated approach. Our insights are two-fold: first, the pages indexed under black keywords are more likely to contain malicious or fraudulent content (e.g., SEO pages) and alarmed by off-the-shelf detectors, second, people tend to query multiple similar black keywords to find the merchandise. Therefore, we could infer whether a search keyword is "black" by inspecting the associated search results and then use the related search queries to extend our findings. To this end, we built a system called KDES (Keywords Detection and Expansion System), and applied it to the search results of Baidu, China's top search engine. So far, we have already identified 478,879 black keywords which were clustered under 1,522 core words based on text similarity. We further extracted the information like emails, mobile phone numbers and instant messenger IDs from the pages and domains relevant to the underground business. Such information helps us gain better understanding about the underground economy of China in particular. In addition, our work could help search engine vendors purify the search results and disrupt the channel of the underground market. Our co-authors from Baidu compared our results with their blacklist, found many of them (e.g., long-tail and obfuscated keywords) were not in it, and then added them to Baidu's internal blacklist.", "pdfUrls": [ "https://users.cs.fiu.edu/~carbunar/teaching/cen5079/cen5079.2017/presentations/klingon.pdf", "https://doi.org/10.1109/SP.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/966c2df4f4b1036a2a7d7d7d9971fe8575d174e3", "sources": [ "DBLP" ], "title": "How to Learn Klingon without a Dictionary: Detection and Measurement of Black Keywords Used by the Underground Economy", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "9684122fec2450323b1201a7df799ddee0313979": { "authors": [ { "ids": [ "1933019" ], "name": "Yuanyuan Sun" }, { "ids": [ "40172713" ], "name": "Yu Hua" }, { "ids": [ "40466061" ], "name": "Xue Liu" }, { "ids": [ "3296960" ], "name": "Shunde Cao" }, { "ids": [ "3020732" ], "name": "Pengfei Zuo" } ], "doi": "10.1145/3127479.3127485", "doiUrl": "https://doi.org/10.1145/3127479.3127485", "entities": [ "Approximation algorithm", "Cloud computing", "Computation", "Experiment", "Hash function", "Hash table", "Locality of reference", "Locality-sensitive hashing", "Nearest neighbor search", "Overhead (computing)", "Principal component analysis", "Result set", "Testbed", "lsh" ], "id": "9684122fec2450323b1201a7df799ddee0313979", "inCitations": [], "journalName": "", "journalPages": "242-255", "journalVolume": "", "outCitations": [ "54cd614a15ce790e2144e45e160596efd36e6316", "0146a878a13fc69cc1f19a6a4b1d36c41834050e", "36977673ce003f08f14ea7981bfb451414a564f6", "061ea54bed6dd31ad7dafa7c9e9213e0ccabdaec", "65fdc2897fcf0cbbdb076a69a2ac6451a9772760", "8b064b42921ca5ddda7e048a43dd30f611e42e24", "07f64c359a48f865f6d0dcc767425f1e2e0beb96", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "91f3ace2dd9b7f8e9c12b73ff099cda6ecc22f4b", "2a43f9e8e9e4d6011a0287b5ef315bd24fd308e1", "268cd46a06e8e3052bbd64e96fac73d600430281", "1379ad7fe27fa07419b7f6956af754bdb6d49558", "51bb6450e617986d1bd8566878f7693ffd03132d", "397bad00f65c36d7015afa8c52a8a3862e6cf4ab", "9d66de2a59ec20ca00a618481498a5320ad38481", "3e1887ffd2431607ef2dbb75291cfde326f7f9e1", "456bc56efe30b4a24a7fdb39b74eadb0e0e0f8db", "1d32f29a9880998264286633e66ea92915cc557a", "0d06191defeb5fe96c7504979520dd8626eb8fd5", "1115301036fd165c59ab4ba81187bb04e0819cb0", "6e7254740a4017d74659b157589b391233d84775", "759b28cb6527f8820f1cffc3581884c5caa19091", "0a68c6226e04180671a474c73fa0a2b4a154d129", "09136517bb853b7e9ff3be93036ea10f5333c969", "027bac258f2306d8a6035117653c6ccd0f0cd5f2", "d7ccf8266caa85ec8d65ef828d21f1c161466427", "1b20afbd2d2a349737ed3dc246e44bbdba203190", "007771ff5d0e5bff99cb8b30bbcbb2028a17e8a7", "1eb4e4121ce5d74a5b2718487343b62f86d2e84f", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "5d58e5bc8161a277f423c2ecf3f3fca6318d6996", "abe7c167e44b1198fb5bea572ece4e53e35e8732", "228c25717727234204c924032037ba8e8656ba0a", "245cae919bc3621336843c84236822cd912dc0f4", "1d0d1b10384e4459fb49d1ba86ccaa45627dff2b", "026050f71175d235f3f91ca0e99e994c00f9b5a6", "5f893acccfcc421e46cd7ea069cd03c49cf44fe7", "01b5e334a4c6c65803f7b49594502b6f75c03401", "4fdbd442a8b5d74cc3c8c3437cf4acd68c2ee60c", "4721c09dbe5f1544f1ca35b1cf83c924817e0c0f", "023fbe8879f4728506b339ffee1b49515c128670", "5c498f8b35c0aa00b50ca324d80ace6650bb3931", "b9becc31d2f335dc2d12964c55bed7d1cf4dc7dc", "ff860f9c61f0d783d48893f671e986a01e5c3026", "276285e996c08a26706979be838fd94abae2fed9", "0faff4fa4347d5369956dbdbea410869fc399bfd" ], "paperAbstract": "Cloud computing needs to process and analyze massive high-dimensional data in a real-time manner. Approximate queries in cloud computing systems can provide timely queried results with acceptable accuracy, thus alleviating the consumption of a large amount of resources. Locality Sensitive Hashing (LSH) is able to maintain the data locality and support approximate queries. However, due to randomly choosing hash functions, LSH has to use too many functions to guarantee the query accuracy. The extra computation and storage overheads exacerbate the real performance of LSH. In order to reduce the overheads and deliver high performance, we propose a distribution-aware scheme, called DLSH, to offer cost-effective approximate nearest neighbor query service for cloud computing. The idea of DLSH is to leverage the principal components of the data distribution as the projection vectors of hash functions in LSH, further quantify the weight of each hash function and adjust the interval value in each hash table. We then refine the queried result set based on the hit frequency to significantly decrease the time overhead of distance computation. Extensive experiments in a large-scale cloud computing testbed demonstrate significant improvements in terms of multiple system performance metrics. We have released the source code of DLSH for public use.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127485" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9684122fec2450323b1201a7df799ddee0313979", "sources": [ "DBLP" ], "title": "DLSH: a distribution-aware LSH scheme for approximate nearest neighbor query in cloud computing", "venue": "SoCC", "year": 2017 }, "96d40ea825ee21617b24732ad956f9b7307ea254": { "authors": [ { "ids": [ "1927364" ], "name": "Youngseok Yang" }, { "ids": [ "6338202" ], "name": "Geon-Woo Kim" }, { "ids": [ "31948265" ], "name": "Won Wook Song" }, { "ids": [ "1783243" ], "name": "Yunseong Lee" }, { "ids": [ "33984867" ], "name": "Andrew Chung" }, { "ids": [ "2003626" ], "name": "Zhengping Qian" }, { "ids": [ "39839886" ], "name": "Brian Cho" }, { "ids": [ "1704157" ], "name": "Byung-Gon Chun" } ], "doi": "10.1145/3064176.3064181", "doiUrl": "https://doi.org/10.1145/3064176.3064181", "entities": [ "Application checkpointing", "Cascading Style Sheets", "Computation", "Data center", "Forth", "Jumpstart Our Business Startups Act" ], "id": "96d40ea825ee21617b24732ad956f9b7307ea254", "inCitations": [ "13e388ab3495d313ae6838b26e8d34517a67e698", "4ce8ad1513e84cb464efa68827119295530ebaa4", "c253c6de9d8dd50560a3bc05d6d077bd3c403bff", "1804e67480b9c46b377d0a102e26d3a9a1a9139b", "2d40bd8a14b429142e487282761a2a6b95d6b96b" ], "journalName": "", "journalPages": "575-588", "journalVolume": "", "outCitations": [ "01b54e85d5b02ad6af205106739a409a105fee93", "7f7c515809f676bceac2cd178ddbc360a4a15a3c", "9aa88a8a354f1d322e242376d27d0474e50252f8", "0371b65e431972dfe0f81573274d10eb8c9eec7f", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "332f77fd05703c1607e3b57884ad31fb1fad0104", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "0558c94a094158ecd64f0d5014d3d9668054fb97", "3a043714354fe498752b45e4cf429dbae0fb2558", "7e74ea151efcdcfecffdbeaec0728f9ac1f80389", "0541d5338adc48276b3b8cd3a141d799e2d40150", "8f8a07137d8b015fb8d3fed6ab0294c05a5a3401", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "042fdf16f6b9dd28b282dfceee5e8278ab7b6289", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "2499dd8e12466c5427872242f638d3464ad4c1f5", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "5e0c8ba57734361b4564b75ab55af1d2b5ccde82", "12635bdd3bd32f09c85a9070977a281fcb32ff61", "3e257f01e3ee71545d824a1615c35659525b856a" ], "paperAbstract": "Datacenters are under-utilized, primarily due to unused resources on over-provisioned nodes of latency-critical jobs. Such idle resources can be used to run batch data analytic jobs to increase datacenter utilization, but these transient resources must be evicted whenever latency-critical jobs require them again. Resource evictions often lead to cascading recomputations, which is usually handled by checkpointing intermediate results on stable storages of eviction-free reserved resources. However, checkpointing has major shortcomings in its substantial overhead of transferring data back and forth. In this work, we step away from such approaches and focus on observing the job structure and the relationships between computations of the job. We carefully mark the computations that are most likely to cause a large number of recomputations upon evictions, to run them reliably using reserved resources. This lets us retain corresponding intermediate results effortlessly without any additional checkpointing. We design Pado, a general data processing engine, which carries out our idea with several optimizations that minimize the number of additional reserved nodes. Evaluation results show that Pado outperforms Spark 2.0.0 by up to 5.1×, and checkpoint-enabled Spark by up to 3.8×.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064181" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/96d40ea825ee21617b24732ad956f9b7307ea254", "sources": [ "DBLP" ], "title": "Pado: A Data Processing Engine for Harnessing Transient Resources in Datacenters", "venue": "EuroSys", "year": 2017 }, "96f23d87015bdb919cde92f038ca5a76c7b6bb8c": { "authors": [ { "ids": [ "34827976" ], "name": "Praveen Alapati" }, { "ids": [ "9342263" ], "name": "Venkata Kalyan Tavva" }, { "ids": [ "1702478" ], "name": "Madhu Mutyam" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.47", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.47", "entities": [ "Algorithm", "Locality of reference", "Multi-core processor", "Principle of locality", "Scalability", "Search tree", "Throughput", "Tree structure" ], "id": "96f23d87015bdb919cde92f038ca5a76c7b6bb8c", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "356-363", "journalVolume": "", "outCitations": [ "31181e73befea410e25de462eccd0e74ba8fea0b", "38611b424808954be2c1375da1a873b1e2487ace", "9043d0e94ab9c4884682098e73dd61c1e289934a", "f2bd972d7d516d9c1854174b8ca0f8aa51200428", "500adfb955f443c9fb0b8a44a5a03887fa4e9729", "42142c121b2dbe48d55e81c2ce198a5639645030", "73569727e735c4c5628b8410cf6b971a9ef07ef5", "6075f8bfc541841270f223de64b577e17a748b75", "663eda36657a10a2ab0d1e6482b0844efb1291cc", "a24b1525f5385836231c32867d871ffe2effe002", "00b3ebd315991e5b5f4e6beec2e1488281368028", "58da996efd7320d1e484263c97c930c8979c474f" ], "paperAbstract": "Effective design of concurrent tree implementation plays a major role in improving the scalability of applications in a multicore environment. We introduce a concurrent binary search tree with fatnodes (FatCBST) and present algorithms to perform basic operations on it. Unlike a simple node with single value, a fatnode consists of a set of values. FatCBST concept allows a thread to perform update operations on an existing fatnode without changing the tree structure, making it less disruptive to other threads' operations. Fatnodes help to take advantage of the spatial locality in the cache hierarchy and also reduce the height of the concurrent binary search tree. Our FatCBST implementation allows multiple threads to perform update operations on the same existing fatnode at the same time. Experimental results show that the FatCBST implementations that have small fatnode sizes provide better throughput for high and medium contention workloads; and large fatnode sizes provide better throughput for low contention workloads, as compared to the current state-of-the-art implementations.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/96f23d87015bdb919cde92f038ca5a76c7b6bb8c", "sources": [ "DBLP" ], "title": "FatCBST: Concurrent Binary Search Tree with Fatnodes", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "970364730c038572ee9b7b69eef86d97a5488a2d": { "authors": [ { "ids": [ "40205336" ], "name": "Pengcheng Li" }, { "ids": [ "1804605" ], "name": "Dhruva R. Chakrabarti" }, { "ids": [ "1716493" ], "name": "Chen Ding" }, { "ids": [ "32967785" ], "name": "Liang Yuan" } ], "doi": "10.1109/IPDPS.2017.83", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.83", "entities": [ "Algorithm", "Benchmark (computing)", "CPU cache", "Cache (computing)", "Central processing unit", "Computer architecture", "Computer data storage", "Emulator", "Non-volatile memory", "Non-volatile random-access memory", "Persistence (computer science)", "Persistent data structure", "Persistent memory", "Time complexity" ], "id": "970364730c038572ee9b7b69eef86d97a5488a2d", "inCitations": [ "aa0fb8802532106dcb78c62065258b8e4683ec94", "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "112-122", "journalVolume": "", "outCitations": [ "37a1e8411669e29cf8fbf48ec920c97c0066ac7e", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "2e8ab636f544007408884dc6fafafdb00a4cd62a", "24f45805614b7e74346e56e5d151fc460432f886", "0261afd40eee66cea4ea682fab322a439a28f37d", "998376a2b4e53acab8dd12f7137dad992c13a8ea", "4209919a9b9618d69a145b15927b5c455f9d05d4", "5341ce4822eb9cdb614615d57cea02edc8d33c05", "0558aeb941b46e7b588e5d81bdc01c11a53ba45d", "47b851237f240831abee3971bca6bb8d2a121eb1", "78f75d24f58c69386d3d10c27fe55c9bbccb95dd", "2d45779437516ee55e5f9f4e7a7d8803fa795443", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "47ccfd0c9dc218f5496783310a28c581730b9ca7", "0c0ff71e1f225312bd24a2d78153f0b3f3816285", "7765baefdd1d404fc31b372094cdebbcb1b63a9e", "a7e1330976e46e7a48986f2648381f8876ac653e", "23773ffc679a8d9ebfd73810dec3e6fe6aa278ab", "209c2347a28bc0af9f8ace63ebbdf056729f41dc", "512a8925693d5f4b8e4cfde32bcd3c846a14b71e", "9225ce5b4359748953cb1de088da5b8a63397490", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "2f1993d2c6c82e6a794adf19d1b5cec9fc593602", "0332013fc380ca283d3afc457c430c513d19cc51", "60b85b7ee655397a4d2202f9cdf6dd5e3f04f6fd", "43a7ea9a745da76f5fcf74d1b2ae4786a9f37664", "72734685215d1a65a8b2b0f7e9e8f6c3e89fce3e", "b8735a449f0a1f1889c6b744061360aa85afaa6b", "2f42558a0b49b56ef706e8435eeb7bb480f58aad", "4dd4e6f0806001700b0b310172f8161ac47e9389", "ca4564d556b03eeee755fad7a89475072424ea56", "0cc19393203cd41fc29e3cb940ee468039cd0158", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e" ], "paperAbstract": "Non-volatile main memory (NVRAM) enables data persistence in memory. However, the existence of transient CPU caches in modern computer architectures brings a serious performance issue. In particular, cache lines have to be flushed frequently to guarantee consistent persistent program states. Hence, persistence and performance cannot be easily obtained simultaneously. In this paper, we optimize data persistence by proposing a software cache. The software cache first buffers lines that need to be flushed, and then flushes them out at an appropriate later time. The software cache aims to maximize the combination of cache line flushes. We designed a new linear-time algorithm to calculate cache miss ratio curve (MRC) so as to adaptively select the best cache capacity at run-time based on program behavior. We evaluated the software cache on a real-world memory-based database benchmark, the SPLASH2 benchmark suite and four micro-benchmarks. Results indicate that the software cache solution reduces cache write backs to persistent memory by 12× and improves performance over the state-of- the-art methods by 2.1× on average, measured on a real system emulator.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.83", "https://doi.org/10.1007/978-3-319-52709-3_8" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/970364730c038572ee9b7b69eef86d97a5488a2d", "sources": [ "DBLP" ], "title": "Adaptive Software Caching for Efficient NVRAM Data Persistence", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2016 }, "97215c33155ce8c826f91c08823766ed999acc3f": { "authors": [ { "ids": [ "37319881" ], "name": "Jan Friedrich" }, { "ids": [ "1792619" ], "name": "Christoph Lindemann" }, { "ids": [ "2869919" ], "name": "Michael Petrifke" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.50", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.50", "entities": [ "Contextual advertising", "MovieLens", "Recommender system", "Whole Earth 'Lectronic Link" ], "id": "97215c33155ce8c826f91c08823766ed999acc3f", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "380-387", "journalVolume": "", "outCitations": [ "329c5ee99db8ce14d2b68d9a3ca760e1619d19c6", "d272ce4a41f29188d18c711c24cec2ec0eb23989", "00ec0923a12e6ee12591ab8c1ce941b540e419a8", "e7d53f538f5239739d1f943c81d17e4a167c65c6", "33bb3b15729dba3f74dd6bbc36e792f1903a6207", "41245aa31780116ae14cab0290841070779bf4a5", "6572b014c44d7067afd77708b34a074f3497bdbe", "0f929c5bcb8599d4f9f4071f676478ff44284fd5", "276ebc620a8976026bd2d03582b9ecfa3738d43c" ], "paperAbstract": "In this paper, we show how to integrate user-item scoring into a graph-based tag-aware item recommender system. Building upon the ProbS and PLIERS methods, we introduce refined formulas for affinity and similarity scoring taking into account user-item preference in the mass diffusion of recommender systems. Additionally, we propose a two-step similarity score that recommends items based on a repeated mass diffusion on the item-tag graph. We denote the proposed method as User Preference-based Probability Spreading for content recommendation, UPPS. UPPS relies on the notion that the influence of current user items on the recommendation process should depend on the user item preference. To evaluate the proposed approach, we employ the well-known MovieLens dataset. In comparison to ProbS and PLIERS, UPPS yields an improvement in both the NDCG@10 and P@10 measures by more than 25% over ProbS and more than 100% over PLIERS.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.50" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/97215c33155ce8c826f91c08823766ed999acc3f", "sources": [ "DBLP" ], "title": "User Preference-Based Probability Spreading for Tag-Aware Content Recommendation", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "9722bc0ae3d0ab8ef7f3efd3945423c1364cef64": { "authors": [ { "ids": [ "1734275" ], "name": "Yi Wang" }, { "ids": [ "35423240" ], "name": "Ye Xia" }, { "ids": [ "38125149" ], "name": "Shigang Chen" } ], "doi": "10.1109/CLOUD.2017.26", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.26", "entities": [ "Algorithm", "Cloud computing", "Experiment", "Heuristic", "Integer programming", "Linear programming", "Numerical analysis", "Requirement", "Scheduling (computing)" ], "id": "9722bc0ae3d0ab8ef7f3efd3945423c1364cef64", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "138-146", "journalVolume": "", "outCitations": [ "32a11508fcbbb64a9b427b57403dc1ad54b6d718", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "813e836347456fb2d61c8feb6a71b0a4165c5c6d", "00da51b59055b8915c1d186206127d5b152c5949", "3905a97f8d23d8f1ed4d69d4dffe547ec63faa79", "520c257fe4636bf6791d6db0210cca79d8a4c107", "bd11a56ef36908d78c0e76cbe7979ed5cf44de19", "00d99332631cda8eba4592305d72e9a8ae2ebed2", "1324f1d5b20f08cac775f10089a788767c56d5a9", "067c137c8340f5a05c8215235e2a2824f667aa06", "21fea4574067c8386820d572cb2e8f7005cd34ff", "62183550749e7a1bd483246c03441b3be7c06335", "0f7502716508bdac40207f51be2e7623e8823088", "5673d837c41eeb199b4a1948d26ef22b86c2e6ed", "001759eb648b12ec121a22c1bedc27f02b1feba8" ], "paperAbstract": "We study a fundamental problem of how to schedule complex workflows in the cloud for applications such as data analytics. One of the main challenges is that such workflow scheduling problems involve many constraints, requirements and varied objectives and it is extremely difficult to find high-quality solutions. To meet the challenge, we explore using mixed integer programming (MIP) to formulate and solve complex workflow scheduling problems. To illustrate the MIP-based method, we formulate three related workflow scheduling problems in MIP. They are fairly generic, comprehensive and are expected to be useful for a wide range of workflow scheduling scenarios. Using results from numerical experiments, we demonstrate that, for problems up to certain size, the MIP approach is entirely applicable and more advantageous over heuristic algorithms.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.26", "https://www.cise.ufl.edu/~sgchen/Publications/2017%20Using%20Integer%20Programming%20for%20Workflow%20Scheduling%20in%20the%20Cloud.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9722bc0ae3d0ab8ef7f3efd3945423c1364cef64", "sources": [ "DBLP" ], "title": "Using Integer Programming for Workflow Scheduling in the Cloud", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "97255d2c710ed3a0be0b1c761429ced58dd7382c": { "authors": [ { "ids": [ "2267110" ], "name": "Shirin Nilizadeh" }, { "ids": [ "10375926" ], "name": "Francois Labreche" }, { "ids": [ "24000497" ], "name": "Alireza Sedighian" }, { "ids": [ "1934617" ], "name": "Ali Zand" }, { "ids": [ "6683839" ], "name": "Jos\u00e9 M. Fernandez" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "2350947" ], "name": "Gianluca Stringhini" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "10.1145/3133956.3134055", "doiUrl": "https://doi.org/10.1145/3133956.3134055", "entities": [ "Adversarial machine learning", "Anti-spam techniques", "Enterprise social networking", "Machine learning", "Simulation", "Social network", "Spamming", "Trust metric" ], "id": "97255d2c710ed3a0be0b1c761429ced58dd7382c", "inCitations": [], "journalName": "", "journalPages": "1159-1174", "journalVolume": "", "outCitations": [ "5c371701115dee38149b29400c10eeeb2bd00a35", "5dd778727cbd82023db0892dc9d489b2f5f9bb3e", "5afc96145b117108dbfcdbb73d379adf040200e8", "1bd10813ade534b5500e92600d909bacb514138d", "6f006a3895dd8fb24f83235a67f2fe72418aa800", "a515fd3bb97782f14ed7690d78e3a9b7addf6054", "0bfee412a7d9c4b8107623341695be7e62442d87", "5f72c4f0bb0d75bc731ad83ee14e0f25b098a271", "a5ce1a22c2c824f5151a08c4c0a5e42fe5eb7fab", "376ffb536c3dc5675e9ab875b10b9c4a1437da5d", "0a645cbb39ac37929a2e6ba55fc248dc4ca967e4", "71fbbc1675780f2f945073f9d92c09b8d76f80f0", "52d3d9c8c07eccb23dbd2c133176c07291560da1", "04d7370bea4dc5f7369a1dd7bff177529026a439", "258c8adfba357ed20cc03b5c2229eb773924bc08", "1597409a71b61c1ab4b2230b80759e0cc83037c4", "98009627fe1aea92640428648ff824bcf244bceb", "224f2b4ba00b61ed5898d7da69669a92f2a82bdd", "182b7e3e4d00fb1e60da7bc2739b78f123e485e6", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "58b1b0a22469fadaee0339d779333542f0ff4939", "f356ec2afdde63b923a848b8fbf91f8d6f83873f", "32d16e35ac8801cf39a6a996429780d8c1a695fd", "73e001d5aeeec47d7eda1f5e0d9a73e45d2f2764", "cc88ed7fee917555f13be25c6e6e60380a15ecee", "254687dec6e5456fa289826da6558186ef2cc24f", "33a3ad9983a4879f00d6ed38c5891bb67ad461e7", "6cf06d1abc900c29bbc090af2467f3c8a60593d8", "edbddebd82ea25917bdb01d311af2b8c5d7523d2", "2d2d84beafcf36d38eda7f5a3497011df1d83825", "312a2edbec5fae34beaf33faa059d37d04cb7235", "57c7afc5e85ba03936e2881e401325b613169fbb", "2830246be09bbd376cd7f2ed9ae150110dcb08cf", "eb1382db4796fb74f8f91f9ac00c05d077740fbd", "27392f65b6aa29d76c0a1bb4d68d39251c9b6357", "38ea8361ab3bf672e575d0424986e78f109c1bb5", "653fbfbad9d565dd5e5e0d48b6bb32dd02e8f157", "8385a2213f42ce6750588c2891a0b9bb5d68ae96", "04bb4d96c94ff9f6f19fe12be38532c8c4fb35d4", "5d9c00f2ca7f7145425e60830b9582b4fc866a61", "6470930ff36cde541c837bedcf17c20490fedbbc", "06501b7ea604a8b8ffff402ee492955e6892daad", "46dcba93715fb8246c6d7aee96c9fba00fa902a8", "039b08ab33e94c18e4976bad3bc363a9f8d24213", "026972d6a94755c4a3ddf9ab0d463fd0416c4262", "1e63600b5906a6e18c2b9540155b9adb85c4d437", "164f7422f2dfa481179ed34f6e180b0bb60f4174", "6ff68637a09d89c4aa3bb29a4d47391fb0000d06", "c0c7f7e4cb004a03f09e985166b6fb25aaa0b428", "bfcf14ae04a9a326f9263dcdd30e475334a96d39", "4a00ebe98d455d61dc1b708265c237fe2ee6ec64", "b7a78492e56fc00aefef47c8f776bda0f872c0cd", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "0706356c9ab6014d6b04577d38289ea8328291a5", "3036a8349138b8a2987aadd6ad1b536d1b593d19", "6dd9bb6b38e5b84616e207f00a181dbadce06937", "162edd6986f0a58d22b6dda1d476764bbdeeb86c", "3f1e9ac89edc17273325ededc47b2252ae583d3a", "1d35171758a147e1bf3f0b2bc30d6d3a2077ae0c", "136d0daa5d918dfebe8a26a6991053ef372892ea", "71e5a6c95ff476c303c3adeae0c1a4387485f733", "1c62b4ea4657526083414b19d5ba2073cdac6e5a", "2a91c8ff11a828209f10714cfc46fd929a51e9dc", "22ba0d428dc3935bb466ef5ae6414473b86327b0", "8862fb588a13dbf2222343dbf88d2ec58c7f91fb", "bc3b26d7e794a60e5fe54b15ad8695299bbf162d", "935331d53a4f2f19b3e4e862a183eca6dc61203d", "9cf785889f13260a791f1106fd7b16f1390002f1", "8f0860ea50da735ed3d0c5afffcdb51834606ca0", "72442ec6c473e236e7529fbf6e8b3e56866fc9e5", "2cf3fd84f30e5cae30dd46a3d7ecc0d63583b1a6", "3e656e08d2b8d1bf84db56090f4053316b01c10f", "393da513897cc819b195223b6407414da2fef023", "3531e0eb9ec8a6ac786146c71ead7f8d624fd2ca", "c430de8ea7ccb9d983c35cf874368a64e39bde31", "37092f9c79cb6220fd6f01882e9e3c1b41d0750e", "30593ad852efa6b0f7b21c0413041f9ae9d06e51", "3858543e01af258499ad17c12a3ea4dc1da08a70", "0cdef05699f3119eb197a4ce5a96112ba358da61", "a59f46c2ee905fabc79885189c1c6781def6e45b", "0088523c4bf74b35c2e7471e740d6758ab36f7b9", "34ead52ee8def3f07ad5ee1fea6b5e06fed3385c", "5becff7d8db7907df2b29b3e9a9c3b8cafe2caf7", "98dec3483b92540b3d89c625f00f9b07ffbf196d", "6ae5d06fd366d44107752b903d0ec22b41321e3d", "33159f5183e9af53fb1ddb3435559d2d6f01723e", "f38a69c657bbb7927ef032049c9ea9ba17322b15", "05880074c6e3a88afee0c33e899f8ad473c6cdee" ], "paperAbstract": "Cybercriminals have found in online social networks a propitious medium to spread spam and malicious content. Existing techniques for detecting spam include predicting the trustworthiness of accounts and analyzing the content of these messages. However, advanced attackers can still successfully evade these defenses.\n Online social networks bring people who have personal connections or share common interests to form communities. In this paper, we first show that users within a networked community share some topics of interest. Moreover, content shared on these social network tend to propagate according to the interests of people. Dissemination paths may emerge where some communities post similar messages, based on the interests of those communities. Spam and other malicious content, on the other hand, follow different spreading patterns.\n In this paper, we follow this insight and present POISED, a system that leverages the differences in propagation between benign and malicious messages on social networks to identify spam and other unwanted content. We test our system on a dataset of 1.3M tweets collected from 64K users, and we show that our approach is effective in detecting malicious messages, reaching 91% precision and 93% recall. We also show that POISED's detection is more comprehensive than previous systems, by comparing it to three state-of-the-art spam detection systems that have been proposed by the research community in the past. POISED significantly outperforms each of these systems. Moreover, through simulations, we show how POISED is effective in the early detection of spam messages and how it is resilient against two well-known adversarial machine learning attacks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134055", "http://arxiv.org/abs/1708.09058", "http://www.cs.ucsb.edu/~vigna/publications/2017_CCS_POISED.pdf", "http://www0.cs.ucl.ac.uk/staff/G.Stringhini/papers/poised-ccs2017.pdf", "https://arxiv.org/pdf/1708.09058v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/97255d2c710ed3a0be0b1c761429ced58dd7382c", "sources": [ "DBLP" ], "title": "POISED: Spotting Twitter Spam Off the Beaten Paths", "venue": "CCS", "year": 2017 }, "97273833e54498bcfb44de5bc9c3755a29e0af54": { "authors": [ { "ids": [ "2964163" ], "name": "Bertil Chapuis" }, { "ids": [ "1722985" ], "name": "Beno\u00eet Garbinato" }, { "ids": [ "1739619" ], "name": "Periklis Andritsos" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.35", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.35", "entities": [ "Data center", "Data deduplication", "GPS navigation device", "Inverted index", "Performance", "Relevance", "Spatial analysis", "Spatial database", "Text corpus" ], "id": "97273833e54498bcfb44de5bc9c3755a29e0af54", "inCitations": [ "184e3516e6a77db3b4164d628cfcf08546e2e5da" ], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "270-277", "journalVolume": "", "outCitations": [ "01320190d1c2ecd869adea4c1b8faafe39ecb4b2", "02cc5b5ad2d7ca9f83c9e566bcb5f9b608ab0619", "9b6df30a4ad0cfc03ffa0602f72801119076b800", "31037314ad53a20ab92af6d3301849daeb1f6421", "15f839c51b9820b6180ee3b4b198c00e680d478a", "296ceb35d69589451c011d4c088e49a6b3bf1e02", "0d99a8787bd3abe24c7737775da4d842bb86e4ab", "99a4b339424b1d2eabc95b4dc9954ddc6c3702bb", "a10f3a5e1fc4c607a4169442468c57ede8ffd370", "766f66c31bea550f42ec8c936311d227fcf46064", "351df512735096126454f5d4bc8e9ae56f4cd288", "bb90aa0bd362d615e3598f52504d06b20125512d", "46f766c11df69808453e14c900bcb3f4e081fcae", "569b6724588f23bb5d68961a8351c442e1481a44", "182cb3740940f403ff6f311fa54c5c1c9d7edc3f", "c47b53a4e80f1d8f9709a17b9836db53594a307a", "19f87081d3abb0ab127e387405a881da6aae20fe", "8daeecc84fcaf42172cba7ef58e5068fae7bbcbc", "044a9cb24e2863c6bcaaf39b7a210fbb11b381e9" ], "paperAbstract": "In this paper, we present an efficient type-agnostic approach for finding sub-sequences in data, such as text documents or GPS trajectories. Our approach relies on data deduplication for creating an inverted index. In contrast with existing data deduplication techniques that split raw sequences of characters arbitrarily, our approach preserves the semantics of the original sequence via the notion of token and can be used to index normalized data. When compared to indexing methods that preserve the semantics and operate on normalized data, our method increases the relevance of the inverted index, reduces its size and improves its performances. As data normalization is generally not used beyond the scope of textual data, we introduce a framework that helps identify the extent to which data should be normalized regardless of its type. On this basis, we demonstrate with a dataset made of GPS trajectories that our method can be used agnostically: it can be used to index and query data of a completely different type. Finally, we show that the resulting spatial-index is characterized by a better discrimination than classic spatial-indexing approaches.", "pdfUrls": [ "http://www.cs.toronto.edu/~periklis/pubs/dss17.pdf", "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/97273833e54498bcfb44de5bc9c3755a29e0af54", "sources": [ "DBLP" ], "title": "An Efficient Type-Agnostic Approach for Finding Sub-sequences in Data", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "9731df0fc65f957ff34cc04322efa19de74b9d60": { "authors": [ { "ids": [ "3192665" ], "name": "Jesper \u00d6qvist" }, { "ids": [ "1693048" ], "name": "G\u00f6rel Hedin" } ], "doi": "10.1145/3136014.3136032", "doiUrl": "https://doi.org/10.1145/3136014.3136032", "entities": [ "Algorithm", "Attribute grammar", "Compiler", "Concurrent computing", "Declarative programming", "Executable", "Experiment", "Fixed-point arithmetic", "Java", "Lock (computer science)", "Metacompiler", "Non-blocking algorithm", "Speedup" ], "id": "9731df0fc65f957ff34cc04322efa19de74b9d60", "inCitations": [], "journalName": "", "journalPages": "151-162", "journalVolume": "", "outCitations": [ "941070f2b066c064dbf8ab022a58e381af3c0184", "0954212d0d60a1053de84760d96df2f5dea6c208", "7ef221d33b50067333a24076f17a3186847d97bc", "d970bf63dd49f5bfe151bce63ee79729b8042b68", "476952721ead5b7ea2fa2ff2d4f39d5440fc9144", "11d9279db49a444cc3be3f7564c3d8cb47e39cc9", "23f9005fac3568c4af0c4beadc97a27ae18583f6", "c562fa866f01c8c86a68c5108111c48364ff4283", "ad5c1d55da3a2e47315c531f995f7414e9d556ea", "3089127f8ab8b6e22c7956754b34904cb2794c7e", "45bf5f54280f999d078197cf0658f48a847f4bea", "5531807b753a46a6b7f945cb6bd6fb898e097e18", "457e62e93d81b1aee73e543f1bc19b5fb4ca1416", "9a08568179c22f2fa6259027af661abf3f1d2547", "6d2712a243246434750317f1e2f05d3e31f2d717", "306457fed6b7b5b6158f5993f6506b52ab687779", "fcefe5d41194ef8a646132e6317b10c53f1edff2", "4d380250dbdf5f3e7a0a9cd2f4539993b1589bca", "7c460946452b43a98f924a08e7d7861d75cc359b", "0b61a17906637ece5a9c5e7e3e6de93378209706", "8c0f865cba4d4e298c623252dad8ac9cd77e50b1", "796159144cef7d93d3210ef56eabe524f8651079", "8ff72a6b9664a89e228f42ab8f673bcff64b4e9a", "8e4ca1be8aa81880668ef4f760c8cb500355d19a", "6968869f4d6a4dd09ceee59c694931f9696f40c2", "59c5ffed41637ce335f70685d0e5970183e7b367" ], "paperAbstract": "Reference Attribute Grammars (RAGs) is a declarative executable formalism used for constructing compilers and related tools. Existing implementations support concurrent evaluation only with global evaluation locks. This may lead to long latencies in interactive tools, where interactive and background threads query attributes concurrently. \n We present lock-free algorithms for concurrent attribute evaluation, enabling low latency in interactive tools. Our algorithms support important extensions to RAGs like circular (fixed-point) attributes and higher-order attributes. \n We have implemented our algorithms in Java, for the JastAdd metacompiler. We evaluate the implementation on a JastAdd-specified compiler for the Java language, demonstrating very low latencies for interactive attribute queries, on the order of milliseconds. Furthermore, initial experiments show a speedup of about a factor 2 when using four parallel compilation threads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136032" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9731df0fc65f957ff34cc04322efa19de74b9d60", "sources": [ "DBLP" ], "title": "Concurrent circular reference attribute grammars", "venue": "SLE", "year": 2017 }, "978b209bead0d121b28b7005119bf64f0998ce86": { "authors": [ { "ids": [ "3330305" ], "name": "Zhan Qin" }, { "ids": [ "1689202" ], "name": "Ting Yu" }, { "ids": [ "3018083" ], "name": "Yin Yang" }, { "ids": [ "1783739" ], "name": "Issa M. Khalil" }, { "ids": [ "33285410" ], "name": "Xiaokui Xiao" }, { "ids": [ "1713938" ], "name": "Kui Ren" } ], "doi": "10.1145/3133956.3134086", "doiUrl": "https://doi.org/10.1145/3133956.3134086", "entities": [ "Baseline (configuration management)", "Cluster analysis", "Contact list", "Differential privacy", "Experiment", "Graph property", "Internet privacy", "Population", "Recommender system", "Social graph", "Social network", "Synthetic data" ], "id": "978b209bead0d121b28b7005119bf64f0998ce86", "inCitations": [], "journalName": "", "journalPages": "425-438", "journalVolume": "", "outCitations": [ "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "3671338dc8c84d51b285bee79f85e7f3937a5078", "3819fddcae6cd9550dcb1d28dd78480924e688b6", "842be98042d0884e7a449ec0e262627148eab5f8", "0db4d7a384c7feb5832ff3563c24cb0f6140e0ef", "01c2e2eff7daaa9c1ea2a329519d0e838ebc6c14", "1ed8886fb8a9ba6b57b8742d0e5b76a32daa8fad", "c73287153c0a50102a40800c1ada626a410c63cc", "2173406c4ca5fff0de66e8cbed4cb01ca959cb31", "526b43dc4c046c7505a277be2bddf0af97fc5be2", "8726139a30434175795fe924188bd5c6e0b0740d", "0fb0781cac8b3caac879f19d53cce72bd3de2397", "8490234d79b47e459824dcf87c1e288211a3c964", "23d1255f1a0453ba96c6e6ef054903086f3656df", "3cec3624644e4ce4fd1d51a66e8e741531a6925a", "7e7713af700613492c56074c3f73aab9a222a825", "0d0b9664d28be6b212e721d77288814df638d5cf", "249d2e15cfcd531e3f91d561877d5b23d31ec2e8", "0908a2e14f5e53f762e70271f9c4519f87b83f98", "b532099ff8b67049f292cd62700dca37fc2be623", "1cf08cc1e0aafd6d783eec70add7e0875b7cd32a", "259da70238f076c670c6cc2901b82b3f20d472df", "5aaf311172b9778d78f6904fbe40124c63463b57", "0041adeb171b36f7ef568b6934ea3f4263163c05", "53bd1357a20550caf1317803e7bc88d3440a6984", "1871ea4cf23441d0297c99d9115f664a6ba0efda", "1521d39088b203ddac981d10d214f463449ae95b", "1eddbbbdec587b0906013de1d377346cb7f8884b", "4a6f84890570a4558f4679b58860505d58d729b9", "253cc4744e6b2b87f88e46188fe527982b19542e", "d7d7d6867b98dfbc5196034edb7340132326772b", "6227544195ed3cb30e411b31507e330ac2397398", "0371f9e3efbcd4829b5ffbff585155746ef05284", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "4a4ba3a1fefa9b9551dcb0953fc8168e23b319be", "3696da952ee2f16793094455fdff394023b80e5c", "7a160de26bd8c83273394cce0dd2a7ac66a80a33", "815cc46e7e9f2a58f743e787df3471984f1e757a", "19c83d150727f832362103ff4b7551356abaa69f", "2005b01f44dfc7b7861f37540f6f4fc8fc8a18ee", "336237fd41c22a697fb7cab88679ac0ea0b3fa52", "8cd9aa720a3a2f9dcb52ad9eb1bf258a80ce0648", "15075f37c428827209a2bc8b1595a04d5a21dc8b", "64028c85cd7b7e42f208e29734028572d7735c61", "684cbdc64df41f30e0f6ba4f9b442285519f605b", "f75d683b62493bf5d1a166547115bbf83e3c1fa5" ], "paperAbstract": "A large amount of valuable information resides in decentralized social graphs, where no entity has access to the complete graph structure. Instead, each user maintains locally a limited view of the graph. For example, in a phone network, each user keeps a contact list locally in her phone, and does not have access to other users' contacts. The contact lists of all users form an implicit social graph that could be very useful to study the interaction patterns among different populations. However, due to privacy concerns, one could not simply collect the unfettered local views from users and reconstruct a decentralized social network.\n In this paper, we investigate techniques to ensure local differential privacy of individuals while collecting structural information and generating representative synthetic social graphs. We show that existing local differential privacy and synthetic graph generation techniques are insufficient for preserving important graph properties, due to excessive noise injection, inability to retain important graph structure, or both. Motivated by this, we propose LDPGen, a novel multi-phase technique that incrementally clusters users based on their connections to different partitions of the whole population. Every time a user reports information, LDPGen carefully injects noise to ensure local differential privacy.We derive optimal parameters in this process to cluster structurally-similar users together. Once a good clustering of users is obtained, LDPGen adapts existing social graph generation models to construct a synthetic social graph.\n We conduct comprehensive experiments over four real datasets to evaluate the quality of the obtained synthetic graphs, using a variety of metrics, including (i) important graph structural measures; (ii) quality of community discovery; and (iii) applicability in social recommendation. Our experiments show that the proposed technique produces high-quality synthetic graphs that well represent the original decentralized social graphs, and significantly outperform those from baseline approaches.", "pdfUrls": [ "https://acmccs.github.io/papers/p425-qinAemb.pdf", "http://doi.acm.org/10.1145/3133956.3134086" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/978b209bead0d121b28b7005119bf64f0998ce86", "sources": [ "DBLP" ], "title": "Generating Synthetic Decentralized Social Graphs with Local Differential Privacy", "venue": "CCS", "year": 2017 }, "97d8304dfa81f7a3b6ee3bfdb6c6d46d7a216b4d": { "authors": [ { "ids": [ "34428485" ], "name": "Andrew Rice" }, { "ids": [ "3184719" ], "name": "Edward Aftandilian" }, { "ids": [ "2220990" ], "name": "Ciera Jaspan" }, { "ids": [ "13987128" ], "name": "Emily Johnston" }, { "ids": [ "1884064" ], "name": "Michael Pradel" }, { "ids": [ "26976764" ], "name": "Yulissa Arroyo-Paredes" } ], "doi": "10.1145/3133928", "doiUrl": "https://doi.org/10.1145/3133928", "entities": [ "ASM", "Algorithm", "Angular defect", "Application programming interface", "Best practice", "Exception handling", "Identifier", "Internal code", "JDBC", "Java", "Method (computer programming)", "MySQL", "Open-source software", "OpenJDK", "Precision and recall", "Programmer", "Programming language", "Sensitivity and specificity" ], "id": "97d8304dfa81f7a3b6ee3bfdb6c6d46d7a216b4d", "inCitations": [ "2ddad233613450bdcb7287b13a7b15356cab2ef5" ], "journalName": "PACMPL", "journalPages": "104:1-104:22", "journalVolume": "1", "outCitations": [ "1224d34a3d31b80ec4df903b6885a3672afa1ab1", "02f9fc2f52a45f05c24e2ac2e4353d4f8631d7ba", "09ad9778b7d8ef3a9a6953a988dd3aacdc3e85ae", "53c96fead0dc9307809c57e428d60665483ada9a", "5eab6c72ba39e0fea5c3aac0c2f5f9cc0a03eb0c", "080419ded5f843f4018d7dcfe2e8dd4c2880a68c", "1565c1f47707dde7bb5cd3145f03ea2fe5d305e6", "7eb04479154dac633c5b71e4d59804e76dc5d809", "2be31918ca1c0cbd7b572a8429ce20d25b765862", "597a37c1282639a3f5920455bed38dacc5d1aa71", "7f013f172a45824d907f68481e92a22e0188ea0b", "9a56a9cea19b83bf46ab2d47b59bc1ea3020a2b1", "517f88cf48c82490ea1838d4901ff435c3633c0f", "d20b417ffd14c30e1fa1db9dfd781e8b28d33934", "0ed6455b996d66f83ae40345156caac316b0a9c6", "1c6dcc84016679fe36f5664662bc6b49169900af", "33aeeab47f547da8ff4b95c2328826a8d92f260a", "de9c49aee2b7c7104f9065dfbafe4b478dec2967", "04086dc692fa8e565bbf6d4a248dddaf52ab7a37", "ad94b595fbe9994e9cae80f7f161e8a92065b1d9", "8eec72b01833bead6567e5a7d0cf8e66b398ef4b", "cda3a5e9a75d71d4fabac3c7916c3a0d0c01d4c2", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "7a4567c922705938d74f09ddec5e3ef0c51693e6", "7d0577772fe06b773d359d1b4060fce92fd4948f", "0b60581a4d1c277467daf22e7172d948c39edc64", "23eb55812d5c478ec677a03aeccb0bd99d6db671", "a8414a566b7c40a4a0c679b775ef55cf1723d979", "295f4ffa651675b22ae8e2f3f30b400330da0c69", "3fab5fa0ba3e9dc91c73377b90e49f433ebdbc50", "17d24cb36625d5aaa1370c43f877b5ede2c908ea", "1d3fc81f0c195d3fbb99ff511837ac4dfdadf290", "64fa5b71eb2c77c03f41b0a46d27d86719bc4dfd", "f5c5ad347c31e7c033efffb956a2de851dc93971", "9769e24c45c87e0daa5cff39991e0313882213fd", "00c9bef0807447d9e9d977b7bc07da367ba3ad8a", "129707a3e577c8fe0f491a63ee628700874e3ed5" ], "paperAbstract": "Identifier names are often used by developers to convey additional information about the meaning of a program over and above the semantics of the programming language itself. We present an algorithm that uses this information to detect argument selection defects, in which the programmer has chosen the wrong argument to a method call in Java programs. We evaluate our algorithm at Google on 200 million lines of internal code and 10 million lines of predominantly open-source external code and find defects even in large, mature projects such as OpenJDK, ASM, and the MySQL JDBC. The precision and recall of the algorithm vary depending on a sensitivity threshold. Higher thresholds increase precision, giving a true positive rate of 85%, reporting 459 true positives and 78 false positives. Lower thresholds increase recall but lower the true positive rate, reporting 2,060 true positives and 1,207 false positives. We show that this is an order of magnitude improvement on previous approaches. By analyzing the defects found, we are able to quantify best practice advice for API design and show that the probability of an argument selection defect increases markedly when methods have more than five arguments.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133928", "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46317.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/97d8304dfa81f7a3b6ee3bfdb6c6d46d7a216b4d", "sources": [ "DBLP" ], "title": "Detecting argument selection defects", "venue": "PACMPL", "year": 2017 }, "9809bc2847bc9274564c6c3545561d920c5e44f3": { "authors": [ { "ids": [ "6891291" ], "name": "Siyang Li" }, { "ids": [ "1709817" ], "name": "Youyou Lu" }, { "ids": [ "2684311" ], "name": "Jiwu Shu" }, { "ids": [ "38639927" ], "name": "Yang Hu" }, { "ids": [ "39429972" ], "name": "Tao Li" } ], "doi": "10.1145/3126908.3126928", "doiUrl": "https://doi.org/10.1145/3126908.3126928", "entities": [ "Attribute\u2013value pair", "Clustered file system", "Directory (computing)", "Distributed File System (Microsoft)", "Key-value database", "Limiter", "Loose coupling", "Scalability", "Throughput", "Tree structure", "Webserver directory index" ], "id": "9809bc2847bc9274564c6c3545561d920c5e44f3", "inCitations": [], "journalName": "", "journalPages": "4:1-4:12", "journalVolume": "", "outCitations": [ "0c60a639dc9cd8014f685ec986c29bf55a10bb5a", "2625c985d89abbe62938e911db55b60f0c94e710", "b1ec820da48f69a4652ddf08f00e2e991126cf4b", "9c51bc1ef1097e2e2dad83997be737125e63d438", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "37617b02017b7912ad4d977ba420ab3fa232e445", "9183cde02e4306828089fb8adae74736a9df3ceb", "ad43b820f35a18a45438a295b2c546b689a35e0c", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "2da760f90c3d2bf6598becdde9063093f488548c", "542300cbe9f77c015981d20b8d6113cb22d11bbf", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "55e62284924ce63b0fcccf102a3551ae7396a145", "e9852418b28b3d1990ce787193ed1deb2cbc406a", "40f13c06aadfd6e9d2c96f716de2b2454d5b854e", "2fc2a466c79269e317541dee95dcb489c997f9c4", "d2ba07d52730a0a9eeca8ebe0042f4f66d1a7b94", "c7d6ee693eb72e274aa8702ea579902996e4f3d5", "3ba95775ac7ff5d6883d8f4d2859c215221e6a2c", "05227501b3727de9b117907ecd77b0bff694869f", "257c1c169dd0ae98e273efd0d0948f2a028d4c3f", "04b3aaf58a91557e15c8064660baa1cc5e8db14e", "5bb770af1973f929e8622f17ddf378d439245144", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "19f3c85feddc4c65409cbb73941f63a98d39fec0", "199ac28b6bc68bf05c77645ffae7640df114bca5", "4fbe8c8ace7546e3a10bfd8e151bc09a41fd3f9a", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "1b0eace707f6b86e94793d1a7c83b7d065e604fa", "55340cad246848be8a1b124036ca82fc1db5c396", "da30f9be5550ba3f0c96eba6a2ad7de28f2efeb2", "220d17b570a2355454730fb561cb602f72301a5a", "6bb60403e540318a07ed9842fedd594b33648767", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "131e1e1d163a0f49881d7b5ac092892093391015", "9f8942efd7eb663bc176c285d230b01e48ae94dc", "87eb6044798792bb4fffd2dcb477bc8ad0982268" ], "paperAbstract": "Key-Value stores provide scalable metadata service for distributed file systems. However, the metadata's organization itself, which is organized using a directory tree structure, does not fit the key-value access pattern, thereby limiting the performance. To address this issue, we propose a distributed file system with a loosely-coupled metadata service, LocoFS, to bridge the performance gap between file system metadata and key-value stores. LocoFS is designed to decouple the dependencies between different kinds of metadata with two techniques. First, LocoFS decouples the directory content and structure, which organizes file and directory index nodes in a flat space while reversely indexing the directory entries. Second, it decouples the file metadata to further improve the key-value access performance. Evaluations show that LocoFS with eight nodes boosts the metadata throughput by 5 times, which approaches 93% throughput of a single-node key-value store, compared to 18% in the state-of-the-art IndexFS.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126928" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9809bc2847bc9274564c6c3545561d920c5e44f3", "sources": [ "DBLP" ], "title": "LocoFS: a loosely-coupled metadata service for distributed file systems", "venue": "SC", "year": 2017 }, "9891046f5d3a6dee76d2417f8acd98b9c6de6aa2": { "authors": [ { "ids": [ "2691614" ], "name": "Craig MacDonald" }, { "ids": [ "2783910" ], "name": "Nicola Tonellotto" }, { "ids": [ "1698205" ], "name": "Iadh Ounis" } ], "doi": "10.1145/3077136.3080827", "doiUrl": "https://doi.org/10.1145/3077136.3080827", "entities": [ "Computer user satisfaction", "Experiment", "Inverted index", "Language Integrated Query", "Response time (technology)", "Rewriting", "Run time (program lifecycle phase)", "Web search engine" ], "id": "9891046f5d3a6dee76d2417f8acd98b9c6de6aa2", "inCitations": [ "1872c5f4bbd4e8c233363d70bc64c55196039bd2" ], "journalName": "", "journalPages": "495-504", "journalVolume": "", "outCitations": [ "3eae360c6ee52950f27f577aedd5f9934a04e137", "00539f3da855c8f36660f5121c280e5860fc63bb", "b66bb4396c15915e4c19d29b2b5d86a510de03f7", "3f53bbebf00bac61b89dfbba4c91505a7d6c783a", "00ab6bb0df7fd605038d64eb5798b31481a39dd0", "05bdc58374f15c2077f3e0ba1152b5a150f91027", "124401836dc6224bd987aea4da92b1d7f0d98428", "1b7ac6c0bfc15c42f65cc6532cfa58df0bcf2f9c", "8f308a14b34bcccc9be48169bc5d20adab265236", "7859071375af210096a2003f355df17817297173", "0221b18d99c15f8e045f8d42653a3a3fee9f44f2", "2ead7199ef67931c822b3dc414b12b08e76a6b7b", "b2d26ed1e4658b8bace957b6f4a7d0b2d5e671fc", "2bb585c4b9d89b095e9938f7d1d3286e4ac2076f", "bbd8dc2a7cafa3232b5171fd28d7dc6dc5aa45c9", "fc3272302461b74217662085a8a05a5e500dbf05", "4c9fafa3b1bed97bb00b8bc68db39a9ad48490f1", "95dddaae82aac40aab010c1778f647538e4b4aa4", "0df9c70875783a73ce1e933079f328e8cf5e9ea2", "f7a34579f122b240707874b6b76dc71af23ddb0f", "89d27fc4c5bf15762d001a39f0a74f84c89d3681", "15cb35b14344611a1849a7bf9b872d457ffbe15a", "39373a86c51539555c0b1f3eacc38b4a379f7c4e", "dc75b2811d2cd01b30fa850351aae2e0fa092cfe", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "35f3c8659274b67030d12b46ff466f7b3e543037", "16a66728e5550a9c1b1127da3b429c66a25540d1", "1b1ea9f3f15f5160b77aa2177e7fdeb6eeed911a", "40707bd624bc789d26f13734de41fa41c866a332", "91a353974741cdcac274f8dfeabde87430fbc05b" ], "paperAbstract": "To enhance effectiveness, a user's query can be rewritten internally by the search engine in many ways, for example by applying proximity, or by expanding the query with related terms. However, approaches that benefit effectiveness often have a negative impact on efficiency, which has impacts upon the user satisfaction, if the query is excessively slow. In this paper, we propose a novel framework for using the predicted execution time of various query rewritings to select between alternatives on a per-query basis, in a manner that ensures both effectiveness and efficiency. In particular, we propose the prediction of the execution time of ephemeral (e.g., proximity) posting lists generated from uni-gram inverted index posting lists, which are used in establishing the permissible query rewriting alternatives that may execute in the allowed time. Experiments examining both the effectiveness and efficiency of the proposed approach demonstrate that a 49% decrease in mean response time (and 62% decrease in 95th-percentile response time) can be attained without significantly hindering the effectiveness of the search engine.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080827" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9891046f5d3a6dee76d2417f8acd98b9c6de6aa2", "sources": [ "DBLP" ], "title": "Efficient & Effective Selective Query Rewriting with Efficiency Predictions", "venue": "SIGIR", "year": 2017 }, "9893f69672ac3188e25c05a4d79de31423d9462c": { "authors": [ { "ids": [ "17822345" ], "name": "Sangkuen Lee" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "9441826" ], "name": "Raghul Gunasekaran" } ], "doi": "10.1109/HiPC.2017.00041", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00041", "entities": [ "Big data", "Entity", "Jumpstart Our Business Startups Act", "Petascale computing", "Scalability", "Structure mining", "Supercomputer", "Titan", "Titan (supercomputer)" ], "id": "9893f69672ac3188e25c05a4d79de31423d9462c", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "294-305", "journalVolume": "", "outCitations": [ "7b0192f4b832cdcffab4f3243ca1b756f763778f", "eb82d3035849cd23578096462ba419b53198a556", "c150763ae0bee77c6d51e3a7eb8cd3fd9ec0aac1", "84bb64f5b935818163c668dcb5025214e7334a73", "322a6e1f464330751dea2eb6beecac24466322ad", "7ce9b4d45e49b5b46a159cca91e376f39655ea31", "3ad21b253652d1778308b1531e5ba775084d150e", "c76e5f49cbe87de17a94cb0526cadf7d68fa5d3f", "b3bf6373ff41a115197cb5b30e57830c16130c2c", "f8d3a145a02960fb15cbed3b260cddbc7ef2937f", "bbe4d6ea1b6b369d85c58c0bcc6c0e8aec8b8494", "0897928b81412750f37a19b18710022942f52ec1", "ef82501ce2722202cff3d62830d9923118aedcff", "07a180e29b5332f0e8a140daf88a85bcc63bb7b1", "2bf973df141a57270457187d0d2c070a0f43b55d", "2b211f9553ec78ff17fa3ebe16c0a036ef33c54b", "1dd8db60043f51c04eb7200915ebd253d2fabf64" ], "paperAbstract": "The Oak Ridge Leadership Computing Facility (OLCF) runs Titan, the No. 4 supercomputer in the world, to deliver over four billion compute core hours every year to several scientific domains, in their pursuit of leadership science. In this paper, we analyze four years worth of heterogeneous log data sources from the OLCF resource fabric, capturing metadata on entities such as users (2,546), scientific project allocations (674), jobs (1,352,402) and publications (1,146), to derive insights into the trends in core hour usage and publications, across 35 science domains. We have constructed a scalable graph to represent the OLCF entities and apply rich graph analytics for our analysis. Based on this, we have analyzed the metadata across five dimensions, namely (1) quantitative analysis of Titan system usage, (2) quantitative analysis of OLCF publications, (3) correlation analysis between system usage and publications, (4) text analysis to derive OLCF research trends, and (5) utilization of graph mining for association analysis. To the best of our knowledge, our work is the first of its kind to apply graph- based big data techniques to provide comprehensive insights on an HPC center's core hour usage and users' publication trends. Our results provide valuable details into an HPC center's core allocation program, measuring the productivity of scientific domains, the interplay between core usage and research output, accelerating collaboration, and in predicting new connections between resource entities.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00041" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9893f69672ac3188e25c05a4d79de31423d9462c", "sources": [ "DBLP" ], "title": "Applying Graph Analytics to Understand Compute Core Usage and Publication Trends in a Petascale Supercomputing Facility", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "98bd1ddddb3f63e81c8e75574b1d3ea656db9037": { "authors": [ { "ids": [ "3311387" ], "name": "Dan Alistarh" }, { "ids": [ "2925872" ], "name": "Justin Kopinsky" }, { "ids": [ "2800851" ], "name": "Jerry Li" }, { "ids": [ "3202209" ], "name": "Giorgi Nadiradze" } ], "doi": "10.1145/3087801.3087810", "doiUrl": "https://doi.org/10.1145/3087801.3087810", "entities": [ "Best, worst and average case", "Power of two", "Priority queue", "Round-robin scheduling", "Schedule", "Scheduling (computing)", "Stochastic process" ], "id": "98bd1ddddb3f63e81c8e75574b1d3ea656db9037", "inCitations": [ "3ccfb61bd268766477cf39bb0ec9d068bd3d1bdc" ], "journalName": "", "journalPages": "283-292", "journalVolume": "", "outCitations": [ "34fe2764de8ed746e90cb95071cf9d713f3c3d66", "a24b1525f5385836231c32867d871ffe2effe002", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "8723d777680b773525141fe7c5865aa476955239", "3b19f4e941730bac88f282c61d8097ac8abd36ea", "178b92c9d7438aa44949a4f5441e83f8a9de3ccb", "8cf7ac0b732d9f8f12e2fe0d4d8b37a2a976d8e0", "33da45838d0b6c082cc71e603fd802bac4d56713", "48c2af3d559fb2c7ef5e71efd24ab5ae217c1fee", "e438d6f2a62d00d2a3e1c9baf61b5ea4397c28a4", "718fbd8cc924463bebce3e410b65d734523cebe9", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "234e6be0d4238f76b3ac038ee422be39f391c625", "655abf918e5ebc49bec229ecc29d4e7dda512698", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "946abaa6ceb50439db1811acf759828b59d46e6f", "529462c0d6d8aab16b4cd76317439d1545f8b7fa", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "3c1e0e9c5b774f8d1b7522e7b7ea90634b1e252a", "51152159525753b76ac96a23b8d3fb172d7727d0", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "1372e033396fe1a5aa12a1b148c5015a2e09d1d6", "b022f7981d06e24c9ac03e954a4ab21746e7086b", "f98062567adb3c98bfabeb99172f8bca026a0102", "1ae7993c0c2d795b243354de48dab80bf2000356", "737c8404b74096707fb4e8b856e37581389c071b", "1e682a2cf91450db746ad2fc5ee4ca7a5b7b573b", "5c0eee6dbc2972422f3bb9b109bfce1b137fc751", "67e6e6af6d0a36f8ef573c4c7b6786fa0fe16f1a", "368e091fdcfd86cc0c433af9cb3208db8cb56031", "7e37fa3e0a6b695582df654efc92237a6c6b9759", "327cbb1da2652b430a52171d510cf72235b890b6" ], "paperAbstract": "Consider the following random process: we are given n queues, into which elements of increasing labels are inserted uniformly at random. To remove an element, we pick two queues at random, and remove the element of lower label (higher priority) among the two. The cost of a removal is the rank of the label removed, among labels still present in any of the queues, that is, the distance from the optimal choice at each step. Variants of this strategy are prevalent in state-of-the-art concurrent priority queue implementations. Nonetheless, it is not known whether such implementations provide any rank guarantees, even in a sequential model. We answer this question, showing that this strategy provides surprisingly strong guarantees: Although the single-choice process, where we always insert and remove from a single randomly chosen queue, has degrading cost, going to infinity as we increase the number of steps, in the two choice process, the expected rank of a removed element is O (n) while the expected worst-case cost is O (n logn). These bounds are tight, and hold irrespective of the number of steps for which we run the process. The argument is based on a new technical connection between \u201cheavily loaded\" ballsinto-bins processes and priority scheduling. Our analytic results inspire a new concurrent priority queue implementation, which improves upon the state of the art in terms of practical performance.", "pdfUrls": [ "https://people.csail.mit.edu/jshun/6886-s18/papers/AKLN17.pdf", "http://doi.acm.org/10.1145/3087801.3087810", "http://arxiv.org/abs/1706.04178", "https://arxiv.org/pdf/1706.04178v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/98bd/1ddddb3f63e81c8e75574b1d3ea656db9037.pdf", "s2Url": "https://semanticscholar.org/paper/98bd1ddddb3f63e81c8e75574b1d3ea656db9037", "sources": [ "DBLP" ], "title": "The Power of Choice in Priority Scheduling", "venue": "PODC", "year": 2017 }, "98cb1044adc0b3b967b2cf0869913de52b76ee19": { "authors": [ { "ids": [ "2282089" ], "name": "Patrick MacArthur" } ], "doi": "10.1109/HOTI.2017.19", "doiUrl": "https://doi.org/10.1109/HOTI.2017.19", "entities": [ "Centralisation", "Commodity computing", "DPDK / dpdk.org", "Debugging", "Direct memory access", "Forwarding plane", "Network interface", "Operating system", "Remote direct memory access", "Server (computing)", "Software development kit", "User space", "Zero-copy" ], "id": "98cb1044adc0b3b967b2cf0869913de52b76ee19", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "103-110", "journalVolume": "", "outCitations": [ "f1c6796e8cb057ec23b964318c0f0f6013751e7d", "5b0b950692f5becbce981c6176cecc58439e5d0c", "1bfec476e03d3dd9b4b07ca6afcbeb0d8acdd734", "77f429051c7e069f7eb5e56d12d4391f24eb5aab", "b47f692948689d7d3fb9d902b722a52144ea5059", "c790c036ba874be05c0c9ee29187811070d67f28", "ce55e9e292cb20c90ec65d16e181f66f19898692", "14c9c04973ca9bd1cecf0892a9b90a54aa930098", "9f4e7291a0121a2db5f6920b3df731790fd9198b", "b31563ab9e1e173fb6f946ef8abf335aa26c7d5e" ], "paperAbstract": "RDMA (Remote Direct Memory Access) is a technology that enables user applications to perform direct data transfer between the virtual memory of processes on remote endpoints, without operating system involvement or intermediate data copies. Achieving zero intermediate data copies using RDMA requires specialized network interface hardware. Software RDMA drivers emulate RDMA semantics in software to allow the use of RDMA without investing in such hardware, although they cannot perform zero-copy transfers. Nonetheless, software RDMA drivers are useful for research, application development, testing, debugging, or as a less expensive desktopclient for a centralized RDMA server application running on RDMA-capable hardware.Existing software RDMA drivers perform data transfer in the kernel. Data Plane Development Kit (DPDK) provides a framework for mapping Ethernet interface cards into userspace and performing bulk packet transfers. This in turn allows a software RDMA driver to perform data transfer in userspace. We present our software RDMA driver, urdma, that performs data transfer in userspace, discuss its design and implementation, and demonstrate that it can achieve lower small message latency than existing kernel-based implementations while maintaining high bandwidth utilization for large messages.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.19" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/98cb1044adc0b3b967b2cf0869913de52b76ee19", "sources": [ "DBLP" ], "title": "Userspace RDMA Verbs on Commodity Hardware Using DPDK", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "9905d9e816d7106bed6496eb8a3ad90947342afe": { "authors": [ { "ids": [ "4043890" ], "name": "Teng Wei" }, { "ids": [ "1775391" ], "name": "Xinyu Zhang" } ], "doi": "10.1145/3117811.3117832", "doiUrl": "https://doi.org/10.1145/3117811.3117832", "entities": [ "Algorithm", "Ambient calculus", "Data rate units", "Downtime", "Electron mobility", "Experiment", "Network architecture", "Peripheral Interface Adapter", "Wireless access point" ], "id": "9905d9e816d7106bed6496eb8a3ad90947342afe", "inCitations": [ "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "c4810a223438a7d5f6cdd915b210b44873637943" ], "journalName": "", "journalPages": "42-55", "journalVolume": "", "outCitations": [ "badc9305a8d6851513409d55e3da2a8b73fb442f", "a354a659d40e8167f52c451b3a191635da730063", "70eaf561beddde532dcab8e49bd5886fd3a984d7", "cd9a5f86817f8356b5dfe4c84a2ce99f6e87a22c", "02ee38745baa9b17505a04ea7e4f8b92af798f58", "2418cfde238efec1f86babf0787f2d25319f5c3c", "29e9cd18af650b7e448dea668121a1d98afd3c46", "58392cd42505bf2bc0675610188f6465bc20fd6f", "27644a68d3a0dd999b040ec47f08560bbce71773", "9823f8c8c43b64cc6c0c7fd09e9380d908122148", "8ee2e4cdc964b0dad050112ddd28f20c6827fa5e", "063ca005b4714603f907bbc8b89b27e3af535caf", "1b58c0f204231664fa27f1206fe0a9abe7ba4594", "60798a6e3b3010aeee53a4d13add65136e69ac6a", "7f604273216f5941ad72a830ff0840f0b63f9fbd", "86a120c1f61ced176097d7ec1fc4f6d3acd754bd", "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "18f6daed30adf74d88b6cc8267e47f431f4cd7ce", "40416933853c5733696f90914ac394e52ab94d7f", "fc32f882e0ed37ae786ac9a2063418d92f5b52d5", "0d8dba43dfe0d165804d9fa0098ed0ada6a9c402", "073d430644693ae43270a1e10ff3f2b76c45fc24", "32364ba3ea0049cec1be32bde2d42549efe4d324", "38ec3a01b51c8b533544a05b3188b67cf38cfbfd", "b5d8b259052ffecd1fcf3eae9b08e31b41c24ec0", "671ef43e50af2bd00cb91b4aad6815c1b95083dc", "81ed14364300805954f948abd7f2df397df233bf", "300831b4ae35b20d0ef179fc9677f32f447fa43a", "25d8eda7ef56c83d4f997f8534dd102ea9e8e06c", "1943466070019e48204ebbee0914d87ced4ba09a", "4995aeb811015d6a0c079b41d1df83fb03600b26", "e6b9e45f1bdd48f9574c7372c6e93ca0e0a26d7f", "08e6f96da8e44d6529d29fb2087f5bbf5684404d", "c2134a8fd527924922842d979e28b7796d1a09bb", "8c01928baaff8a76b082fa2a248410c0856defa4", "47240e17ac8fa393ec6e2db2dac68454e96c8495", "d3e9db81de54297142c3625bbed06b022c7dae25", "171b90e82ba0a587dd6d9986334e79ad32f299f0", "02794bb25484e9b42b5589caf14f5ccdb4e9ab28", "048050777395f86219216960e8eadab6ebd476eb", "3478fcb26ff970c3c84a1a941aee106819f4f5f6", "980773ca869fc17562e4fbcf4202a8f21893b114", "1f2d4bea662e1442fe87fb84d949a4bcf903ca21", "144f233814d2ea63ffa8f14ecc690335763e90f7", "1f911ae809066d4a55598bce939a466de980b13b", "d3e2d9772228bbbd1c36d8997870b4a0dc2ec01a", "c49bca6ca8035435dbb7ac73c18d71b37a0da39f", "460464955cee59f610c94c9360cad879edb5d880" ], "paperAbstract": "60 GHz millimeter-wave networking has emerged as the next frontier technology to provide multi-Gbps wireless connectivity. However, the intrinsic directionality and limited field-of-view of 60 GHz antennas make the links extremely sensitive to user mobility and orientation change. Hence, seamless coverage, even at room level, becomes challenging. In this paper, we propose Pia, a robust 60 GHz network architecture that can provide seamless coverage and mobility support at multi-Gbps bitrate. Pia comprises multiple cooperating access points (APs). It leverages the pose information on mobile clients to proactively select the AP and manage multi-link spatial reuse. These decisions require a model of the pose/location of the APs and ambient reflectors. We address these challenges through a set of AP-pose sensing and compressive angle estimation algorithms that fuse the pose measurement with link quality measurement on the client. We have implemented Pia using commodity 60 GHz platforms. Our experiments show that Pia reduces the occurrence of link outage by 6.3x and improves the spatial sharing capacity by 76%, compared to conventional schemes that only use in-band information for adaptation.", "pdfUrls": [ "http://xyzhang.ucsd.edu/papers/TWei_MobiCom17_Pia.pdf", "http://doi.acm.org/10.1145/3117811.3117832" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9905d9e816d7106bed6496eb8a3ad90947342afe", "sources": [ "DBLP" ], "title": "Pose Information Assisted 60 GHz Networks: Towards Seamless Coverage and Mobility Support", "venue": "MobiCom", "year": 2017 }, "99201dc46fb530777d6645a6f376d554c270fb88": { "authors": [ { "ids": [ "3449317" ], "name": "Raghavendra Pradyumna Pothukuchi" }, { "ids": [ "2186447" ], "name": "Amin Ansari" }, { "ids": [ "3384083" ], "name": "Bhargava Gopireddy" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" } ], "doi": "10.1109/PACT.2017.23", "doiUrl": "https://doi.org/10.1109/PACT.2017.23", "entities": [ "Heuristic", "Network on a chip", "Router (computing)", "Simulation", "Span and div", "Value-driven design" ], "id": "99201dc46fb530777d6645a6f376d554c270fb88", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "260-272", "journalVolume": "", "outCitations": [ "4d2bdb8d9da5b1b20708b8f41f93e7b85cd8ad51", "50de0f6a952131dfe562c5b3836e5d934b39b939", "621f06195844f960c7afca4aa04fc39dc12ba559", "7a09870c68862177d92892474700fd34b335f71a", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "4f59ff5b79482f498bbb178cc6d73ae4b8eadd9b", "e7c1e6ece41d2a4db49354cc5166ead63cd250f6", "339ae2f3507728126b32fb2e077f3e5707dae774", "5facba913915834c37f340eca9df5618cc350547", "3966eb5cabdef5b437708a958d0480b34a87bbaf", "932d6f2725ac81019dcab28386871ebe93115c9b", "fc8da95a002aa377778d9d06afcd937ebe414340", "27c3c07363666d5dd7a28d8c329ed3154148ae76", "bfa696236c766973328bdfe3f7fd3ffd7ac9a607", "19d38434e785e3e27ed1bb4bf8cc119c0032407c", "f644e5ed34af1a47b80a8428c8e812082f1c3dae", "6e31fffc237af47fc0f09064d38465f28aa99941", "90ab2cdb38a005f887227a0904f4c4483fcc5871", "10dc03bab236aa58778b74520978ec280ecdf731", "184e36291edee5e9d850219fc07f61d2ef7bf00f", "6a9773de27440377ca480fae7c3f1a4324f0fc0a", "1c5e8319f4497d829049d8ea05dca782eed200ef", "46921fd25823cb6a7d0a5310f39667fe19baac65", "5f049497fab24c39cb69eff6defc5731dcf217a9", "5236160832766c58b1be2bf4f76f33d9d25b4600", "3acc24a9954a323149209c2a49a5a4e3603d2ae3", "47a356bf5462a495fd2240a11665a23969e1202c", "6bad4549c14a39dee0cdd3a29455fba584d61e0a", "3315112da21a8806581d473b9168b50648bf65f0", "481309c85ade5f4ff5770613ce6acb49acb12396", "e1be591e0bd8eda4ef109166db0b5edeb9ad934b", "0b0b8f1c3ffa887bf8080a3d44348c3d98a11272", "0573066c173d07f01d543392d311430a015845fa", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "42d96591e5583c2001c100d979a8f180e1a4e6b1", "2444d6e88c9f18ea35c329fc78a2cb22688076ae", "650954f9731cdcdf75beb3fb33765815828c598f", "009400317f5bd193079715b821aecd7f1ac45704", "066c073180c003718a8b667db823c6fcd23cd30a", "b7e033561f964c657491b8f25fecc8480764bf80", "1260dbc45978a1d9ed57dc2e96e239b9a23f013e", "35c1c691ece3069baf5712a1b5cce4bbd345953e", "120490f2ed5937bbec70fcc4d8bf0a13cce9fba5", "ffecbd183dff55bd98e9cfc8c11ffb4f29031b78", "16a7dba3aeae984e32183bda49c4c5612d51bb8a", "329756f2d29829e1b2e713360016995855d0ea26", "3446d6833234bf1924b3b82fec84b766b65a72cb", "b1162cf8d956f02acecb40e1ca457a62619c4d8a", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "f57ac7f53438b2877022125bac957fda2bb2a97b", "485b9204a1df97fceb4aedbc62e9efbc09a4525a", "63cc9119a1d29ef68c8ae5d1db44f53fe15625e5" ], "paperAbstract": "Networks-on-Chip (NoCs) in chip multiprocessors are prone to within-die process variation as they span the whole chip. To tolerate variation, their voltages (Vdd) carry over-provisioned guardbands. As a result, prior work has proposed to save energy by operating at reduced Vdd while occasionally suffering and fixing errors. Unfortunately, these proposals use heuristic controller designs that provide no error bounds guarantees.In this work, we develop a scheme that dynamically minimizes the Vdd of groups of routers in a variation-prone NoC using formal control-theoretic methods. The scheme, called Sthira, saves substantial energy while guaranteeing the stability and convergence of error rates. We also enhance the scheme with a low-cost secondary network that retransmits erroneous packets for higher energy efficiency. The enhanced scheme is called Sthira+. We evaluate Sthira and Sthira+ with simulations of NoCs with 64-100 routers. In an NoC with 8 routers per Vdd domain, our schemes reduce the average energy consumptionof the NoC by 27%; in a futuristic NoC with one router per Vdd domain, Sthira+ and Sthira reduce the average energy consumption by 36% and 32%, respectively. The performance impact is negligible. These are significant savings over the state-of-the-art. We conclude that formal control is essential, and that the cheaper Sthira is more cost-effective than Sthira+.", "pdfUrls": [ "http://iacoma.cs.uiuc.edu/iacoma-papers/pact17.pdf", "http://iacoma.cs.uiuc.edu/iacoma-papers/PRES/present_pact17.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/99201dc46fb530777d6645a6f376d554c270fb88", "sources": [ "DBLP" ], "title": "Sthira: A Formal Approach to Minimize Voltage Guardbands under Variation in Networks-on-Chip for Energy Efficiency", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "994c742264337459411320aeb4b804aef89f59bc": { "authors": [ { "ids": [ "21975961" ], "name": "Abhishek Bhattacharjee" } ], "doi": "10.1145/3123939.3123943", "doiUrl": "https://doi.org/10.1145/3123939.3123943", "entities": [ "Brain\u2013computer interface", "Branch predictor", "Central processing unit", "Clock rate", "Electroencephalography", "Embedded system", "Perceptron" ], "id": "994c742264337459411320aeb4b804aef89f59bc", "inCitations": [ "554e79cfbebe0c9cbd3a1a49f8035d26bd0bf23b" ], "journalName": "", "journalPages": "409-422", "journalVolume": "", "outCitations": [ "19c825b6708cbf7b7f0b6234926a057d99efa2e7", "3364bc50921a9566d61ef8cb73baa82341725e4b", "06f925ec192ced76a38c6079da89f1be2a28e5d1", "daa5538192e0058e12a83bd64fd19866c01adcf6", "51a091a8566f1d748954a96604565d89d350c788", "6165e0b12efe4e58aec366a9be9839d6e46432d0", "62ae4296700d2365640249716e0ad096bb5204d3", "25e0dcb0e7b3446fbf16c48e9a6a4ad36f645f3b", "e1071eedf1201381f1f2be1ef44164934aa1232a", "02c78232075ac431834e3442dcb2954d4e708def", "2f9ad9610cd9989338148c4b1751ca90edc9ab42", "8d7111dfbb365fcc86804394f327be8abb736f3c", "56828bb7ad555eed8d43e6d3eba4ee39e862defe", "24d6f6b931b84f0057ab5322e5f6f1e27b21699c", "9b141de80bf24ffcade1fa858b0c03de98083dd0", "4aa35c9d2240cfe22187617dd2c63e9a5c90958d", "352a8957005dc5519b15ed1870751ec494d66395", "2804673b49ef3ca43691e3e1262d5b826618d6f3", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "00fc4cfb76847f94610e52268197417cf8ed73b4", "69743194ca177ef816d31a99475c3ba3ff97808c", "1bf070d27dbbbe133c1498cf480c595515425784", "2d2bb7e9f65b7f0a485edd6c3c84ed73b3ab71ab", "20797ed16fb882a74ecd2bca9d9b21ce82f38479", "07c823c0378e90c8682c3356457e6ba63cc1a80a", "e55682e75c87f7fe335202fdd06012b13cb6d8bb", "3092cb9e010ef448fdf647d4d863664144ee03ab", "68837728232463651283edbb7ef0c93b2f502b2b", "08b5659be3d6a5178766a58c6f4a76cd385f444d", "01547bfc60daabea8aad4c6f4a656a8cc0c59c82", "53356bd1d40e9c9aeaeb352f0f74ad83bb1650eb", "103e439ff264fad88e9777c7183615907dfac45c", "19b141eb87e5a5592d82ab4539235bfcf153c554", "4c0872db1fab8c8b227f0e48e47ed1f6a68f643d", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "23a1ba8d395984f54b257f7352980d20e919ddb0", "03a36ba9567ae9cb3e544b0e34db4e4ff1b01497", "387eb8909b5527dd2513cfdd2f376a3a1f2973b3", "f032f145f06ffef9e60833e4f85700fb8a98a1d0", "a7af778715b179f5bfeb8bfe0944ca9fdba5ea0e", "c48d3ce4560fc2deca54b54c75f6485a8785e41c", "0d77ff97789f5545c6de58115cef599b28e7dd90", "73d5dd350098daf2a8055684f704c9b5ac83c56a", "e13fd04725aa09b3c540f4e30e138cb97dbbd183", "31c299532c42106b71e909c2fc0fc7472c39ce90", "64f523020668b9c138b608d5a5344e36ef8ab223" ], "paperAbstract": "A key problem with implantable brain-machine interfaces is that they need extreme energy efficiency. One way of lowering energy consumption is to use the low power modes available on the processors embedded in these devices. We present a technique to predict when neuronal activity of interest is likely to occur so that the processor can run at nominal operating frequency at those times, and be placed in low power modes otherwise. To achieve this, we discover that branch predictors can also predict brain activity. We perform brain surgeries on awake and anesthetized mice, and evaluate the ability of several branch predictors to predict neuronal activity in the cerebellum. We find that perceptron branch predictors can predict cerebellar activity with accuracies as high as 85%. Consequently, we co-opt branch predictors to dictate when to transition between low power and normal operating modes, saving as much as 59% of processor energy.", "pdfUrls": [ "https://www.cs.rutgers.edu/~abhib/abhib-micro17.pdf", "http://doi.acm.org/10.1145/3123939.3123943" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/994c742264337459411320aeb4b804aef89f59bc", "sources": [ "DBLP" ], "title": "Using branch predictors to predict brain activity in brain-machine implants", "venue": "MICRO", "year": 2017 }, "994f209aa17986a29b13b641ab1c6ee76073707c": { "authors": [ { "ids": [ "2964052" ], "name": "Shun Yao" }, { "ids": [ "33830021" ], "name": "Dantong Yu" } ], "doi": "10.1109/IPDPS.2017.32", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.32", "entities": [ "Cloud computing", "Computation", "Coprocessor", "Cryptography", "Library", "Montgomery modular multiplication", "OpenSSL", "Public-key cryptography", "RSA (cryptosystem)", "SIMD", "Symmetric-key algorithm", "Thread (computing)", "Throughput", "Transport Layer Security", "Xeon Phi" ], "id": "994f209aa17986a29b13b641ab1c6ee76073707c", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "565-574", "journalVolume": "", "outCitations": [ "79234d33b0d53ad6bd507949806a96acaece3a3b", "31c05ee157fb7cd55c48e9363a44c64a298602db", "1892133530acab096b4ea4bf281377c3eec760b9", "1f527fed31971e07093695c128c10b4f3c20d109", "047edaacc9f2f107e747dad735b85b83c19246df", "3f4b5703f44970649551c96c6891465339e78ee4", "b725016a4e7b7fa0cb4b334f6e185c3479fd3b9b", "8da6ba33d9b392024f03b92a2bfe963f7dd402a8", "6b9f7f1e8a602ff83126d087c5a08aa9c8c12f16", "248e7c9b9f60868f95accdd2fe90053edd84ce6c", "9a59ec7a2153a9b87c384f9d32a7b0b1d2d436cf", "9a97d2ff61a2bbe6d72e32633d7bdb1750dfe31b", "92376650e204612f54ae9023bcb748e38f2852ae", "490f3679089cec9b848b49c11e841b41fda9df27", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "0f9b2e598ee1ddde4fd5a2f3008a6983367cc22c", "22ed33b9108ee432bf2155d5b474f70960fe3d3f", "6f7afc7542e18a0956c6387322a79249ff886e22", "6b69f7503ae2d1222407fba6b0b9a52ffa9e1ee5", "3cb97904fecafcd1394495b1552325b4e9e9ac15" ], "paperAbstract": "The Secure Sockets Layer (SSL) is the main protocol used to secure Internet traffic and cloud computing. It relies on the computation-intensive RSA cryptography, which primarily limits the throughput of the handshake process. In this paper, we design and implement an OpenSSL library, termed PhiOpenSSL, which targets the Intel Xeon Phi (KNC) coprocessor, and utilizes Intel Phi's SIMD and multi-threading capability to reduce the SSL computation latency. In particular, PhiOpenSSL vectorizes all big integer multiplications and Montgomery operations involved in RSA calculations and employs theChinese Remainder Theorem and fixed-window exponentiation in its customized library. In an experiment involving the computation of Montgomery exponentiation, PhiOpenSSL was as much as 15.3 times faster than the two other reference libcrypto libraries, one from the Intel Many-core Platform Software Stack (MPSS) and the other from the default OpenSSL. Our RSA private key cryptography routines in PhiOpenSSL are 1.6-5.7 times faster than those in these two reference systems.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/994f209aa17986a29b13b641ab1c6ee76073707c", "sources": [ "DBLP" ], "title": "PhiOpenSSL: Using the Xeon Phi Coprocessor for Efficient Cryptographic Calculations", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "9a54f130cfed3c89f579097df76a7445e93746d4": { "authors": [ { "ids": [ "32050020" ], "name": "Tom Hope" }, { "ids": [ "2939123" ], "name": "Joel Chan" }, { "ids": [ "1717650" ], "name": "Aniket Kittur" }, { "ids": [ "1805894" ], "name": "Dafna Shahaf" } ], "doi": "10.1145/3097983.3098038", "doiUrl": "https://doi.org/10.1145/3097983.3098038", "entities": [ "Artificial neural network", "Crowdsourcing", "Database", "First-order logic", "Information retrieval", "Natural language", "Precision and recall", "Recurrent neural network", "Sparse matrix", "Structural similarity" ], "id": "9a54f130cfed3c89f579097df76a7445e93746d4", "inCitations": [ "4cd340466577fe29caa963f213f5c7b68dcf4934", "ad64649f20cc20a2d1584cbc4b859d9fa9920538", "4385f387693ab985848405aa21e7842334aef6de" ], "journalName": "", "journalPages": "235-243", "journalVolume": "", "outCitations": [ "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "fcea2bdfa53e5d98c50a86a846be70d865575635", "ae3bef16200d002273018a6ffcfac42ac55e3fbd", "4afa6c2eb552ceef0e396fbfe449932492873034", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "6498e11d48b8ccf641848e77f83d6af051f7e2c2", "071b16f25117fb6133480c6259227d54fc2a5ea0", "7030688c1bd73740c5b588dc75a74c4db0bd4972", "267b0fec6dbd9e2a8afe1bcf722186df3aaf9373", "8fbf69a4d1263bc55335872b4430ad5f38e618e0", "a7b7e102955fa334efcd6c516058b70d193696e7", "38bfb182de181738eb50e142e215c67f4cbb4fe3", "30ce8a22a7d0990cea9e1117d4a4d58848636456", "8a9b3bc63ee9d0af9d494e0a574e65c4c2f323eb", "8c84cfc3416855aea7a46841640bd9ed5aacfa9f", "3fc97768dc0b36449ec377d6a4cad8827908d5b4", "2194b06c55349f7580491b347a0f54bcb7b3543a", "70aaf258cab562550197b92143a46e26f1e757f2", "8c2298201250b98afdeea5536f6d67ba2e2c2984", "26a2a2f683b6e6d93c510a2f8065870c54b05f05", "53ced1bf79a5ff0db025fee76a67a33f37e4286d", "1145859ba17172d517cdffe2a5f00a16366c5765", "15d7aa9ce648e8d5ed950d082f10e743fe2401b4", "696c6a3a344f6be426501e5f81d0de8374784fcd", "1634b7d3dd67daf1067d97185319c44f8a7d227e", "3c11f833000f2988b71c20a61645c3e1ce2f124d" ], "paperAbstract": "The availability of large idea repositories (e.g., the U.S. patent database) could significantly accelerate innovation and discovery by providing people with inspiration from solutions to analogous problems. However, finding useful analogies in these large, messy, real-world repositories remains a persistent challenge for either human or automated methods. Previous approaches include costly hand-created databases that have high relational structure (e.g., predicate calculus representations) but are very sparse. Simpler machine-learning/information-retrieval similarity metrics can scale to large, natural-language datasets, but struggle to account for structural similarity, which is central to analogy. In this paper we explore the viability and value of learning simpler structural representations, specifically, \"problem schemas\", which specify the purpose of a product and the mechanisms by which it achieves that purpose. Our approach combines crowdsourcing and recurrent neural networks to extract purpose and mechanism vector representations from product descriptions. We demonstrate that these learned vectors allow us to find analogies with higher precision and recall than traditional information-retrieval methods. In an ideation experiment, analogies retrieved by our models significantly increased people's likelihood of generating creative ideas compared to analogies retrieved by traditional methods. Our results suggest a promising approach to enabling computational analogy at scale is to learn and leverage weaker structural representations.", "pdfUrls": [ "http://arxiv.org/abs/1706.05585", "https://arxiv.org/pdf/1706.05585v1.pdf", "http://doi.acm.org/10.1145/3097983.3098038" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9a54f130cfed3c89f579097df76a7445e93746d4", "sources": [ "DBLP" ], "title": "Accelerating Innovation Through Analogy Mining", "venue": "KDD", "year": 2017 }, "9abb0f36cb95b64c4ceaf47c9dace28fb42ecd72": { "authors": [ { "ids": [ "1719935" ], "name": "Xin Chen" }, { "ids": [ "3295331" ], "name": "Ymir Vigfusson" }, { "ids": [ "1731482" ], "name": "Douglas M. Blough" }, { "ids": [ "1724559" ], "name": "Fang Zheng" }, { "ids": [ "9082182" ], "name": "Kun-Lung Wu" }, { "ids": [ "2303604" ], "name": "Liting Hu" } ], "doi": "10.1109/ICAC.2017.31", "doiUrl": "https://doi.org/10.1109/ICAC.2017.31", "entities": [ "Application checkpointing", "Crash (computing)", "Fault tolerance", "Run time (program lifecycle phase)", "Snapshot (computer storage)", "State (computer science)", "Stream processing", "Throughput" ], "id": "9abb0f36cb95b64c4ceaf47c9dace28fb42ecd72", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "145-154", "journalVolume": "", "outCitations": [ "9e074f3d1c0e6212282818c8fb98cc35fe03f4d0", "a19b139a2bf717e41ce537b90d987796e83e3410", "43f3092709dd8c89c18218928a2d91d341902198", "37601bb6e655f2392ba1ca2086da0d1e03e19edc", "e847c3ec130da57328db79a7fea794b07dbccdd9", "3af5e48a741634d2572b839ca57b68929cd2d648", "7e1cbbbc5cd9c0d735556562e3d1b5ab77598850", "3ede9f94ee174dae45f3e95cf3a45df2dbe05307", "0608d9937c074520cdc93cc444cc1c77039c5332", "48f7cfc2634144282358d745730cfd90b74aa0e3", "030b2ba8af0c2695f65f556aefb7efdad2e5d1bf", "4b14389e3ed8bdf2c470e69cc0eff3be6fdbe254", "daf0a5b16eb51ae418f18a6324970626a29dcc96", "385e1a0916c67c2c290c98e282e86f02e10b239a", "0133fedb4bfd837f519a6f7078b8dd35d951549f", "3aba1891b50f493c414f1ef4aeb43d0b628af922", "a2578fc0352d7932f81554c1a42264999bc6f86f", "705e44ed277553e02181e82b1532a1c5b95a9b5c", "a37154c4e4ce0f213c1c8e317cb16cc5cb468c88", "8eecc13f2f1c51856eaac199e5f5b7c20b62789c", "18bee1154bb4549c0788a71351dc4e07e4f07aaf", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb" ], "paperAbstract": "Distributed micro-batch streaming systems, such as Spark Streaming, employ backpressure mechanisms to maintain a stable, high throughput stream of results that is robust to runtime dynamics. Checkpointing in stream processing systems is a process that creates periodic snapshots of the data flow for fault tolerance. These checkpoints can be expensive to produce and add significant delay to the data processing. The checkpointing latencies are also variable at runtime, which in turn compounds the challenges for the backpressure mechanism to maintain stable performance. Consequently, the interferences caused by the checkpointing may degrade system performance significantly, even leading to exhaustion of resources or system crash.This paper describes GOVERNOR, a controller that factors the checkpointing costs into the backpressure mechanism. It not only guarantees a smooth execution of the stream processing but also reduces the throughput loss caused by interferences of the checkpointing. Our experimental results on four stateful streaming operators with real-world data sources demonstrate that Governor implemented in Spark Streaming can achieve 26% throughput improvement, and lower the risk of system crash, with negligible overhead.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.31", "http://users.cis.fiu.edu/~lhu/doc/governor.pdf", "http://blough.ece.gatech.edu/research/papers/icac2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9abb0f36cb95b64c4ceaf47c9dace28fb42ecd72", "sources": [ "DBLP" ], "title": "GOVERNOR: Smoother Stream Processing Through Smarter Backpressure", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "9afe4b008fa2e867d49369ddbb2f073368d14f5a": { "authors": [ { "ids": [ "1694182" ], "name": "Erik van der Kouwe" }, { "ids": [ "10377685" ], "name": "Vinod Nigade" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "10.1145/3064176.3064211", "doiUrl": "https://doi.org/10.1145/3064176.3064211", "entities": [ "Dangling pointer", "Log-structured file system", "Memory management", "Overhead (computing)", "Pointer (computer programming)", "Strong consistency", "Thread (computing)" ], "id": "9afe4b008fa2e867d49369ddbb2f073368d14f5a", "inCitations": [ "46c699ca4e1574e65205d3246efc15b851e48e32", "38b127fa89540dc6b617a36bf40fa1bcc70fbf85", "ca9c4ef54efbb1602febc3944ef352faca138c98" ], "journalName": "", "journalPages": "405-419", "journalVolume": "", "outCitations": [ "3886c40229b3de318de668e0c0f4202079eb6f55", "09faa1cc5c8784d811502c5137bf63b5f1ac2934", "3829df26d4ce686251b9b5030893febd75162539", "9a8bf1a6e4e71f59620a53b0637c38a416966c4b", "151caa8e687fbdeeef71723ca4eabbc07d6fa272", "dbce3d345f3c43c51d8cc71c17d073e716a4d07c", "2f2d69b165b8dbda97a70137fbe43ad80573b949", "4fe68f0468d4e6a47cd7f747f49c307dea87cd89", "0ff9371fd3888576a66f44e956f9c10316d12219", "2811354f6f13b12176f81bc989d2e80534effa80", "088e3e939ad234b6fdd0e321290fb26937dc2553", "e9304fda58d1539ca6041d971e76ffe1dc63e1bb", "2194c3460ab71f3826db00b045b2ae590c753319", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "3d6395a04f7436b143339bbb7e4e46dfaaaf8afb", "acf32e644db8c3ac54834d294bba4cf46551480a", "94ea21e1bfcd08318926950749c0a363f0fc5412", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "17dc880085035d4355b3ea57b5b5d6d84e9dc59a", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "79473986fe994d4aeb9d662e0b8e572758a4511b", "2c3d491a3bea2c1016587aa8f9fee21293a84856", "3013fc25ace9eca344cb936124a42171d72b95ec", "7a8424572e9545c112884b9961c8b6b2613a5b5e", "4220dad5850abe8c6051cc80686fd8727d5d64a0", "8c8ffe8e4fdadbf42b46944d0339eafc3e4de4c3", "3a49f77c00c4397d3a88c9465dfb94e8caf6d5e7", "217742089058db1572042a0cebfcecdec8ce215e", "29146d2b80bc8c56f7e18efe8d2c92354254e947", "821c8d329eb4db75d8c58d8d442ba5eba6b8e720", "4e5a2301412529e2b27d9ac32428d8561a4c490d", "592be7266ac5e1a423703242a5f976bdf05627af", "0d939c3826455ca42310a92d5c00a956c4630b0e", "14d3104c58ad60e02c3ab9d9433093fe5f21d00c", "617588d9255cd9be96a0adbc1be69809941de09b", "0e55379d27454c5d9d72e4ba4b3752007b9f886f" ], "paperAbstract": "Use-after-free vulnerabilities due to dangling pointers are an important and growing threat to systems security. While various solutions exist to address this problem, none of them is sufficiently practical for real-world adoption. Some can be bypassed by attackers, others cannot support complex multithreaded applications prone to dangling pointers, and the remainder have prohibitively high overhead. One major source of overhead is the need to synchronize threads on every pointer write due to pointer tracking.\n In this paper, we present DangSan, a use-after-free detection system that scales efficiently to large numbers of pointer writes as well as to many concurrent threads. To significantly reduce the overhead of existing solutions, we observe that pointer tracking is write-intensive but requires very few reads. Moreover, there is no need for strong consistency guarantees as inconsistencies can be reconciled at read (i.e., object deallocation) time. Building on these intuitions, DangSan's design mimics that of log-structured file systems, which are ideally suited for similar workloads. Our results show that DangSan can run heavily multithreaded applications, while introducing only half the overhead of previous multithreaded use-after-free detectors.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064211" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9afe4b008fa2e867d49369ddbb2f073368d14f5a", "sources": [ "DBLP" ], "title": "DangSan: Scalable Use-after-free Detection", "venue": "EuroSys", "year": 2017 }, "9b13f378b047d1445720d3d91680c68e7991ce8f": { "authors": [ { "ids": [ "39504478" ], "name": "Harshil Shah" }, { "ids": [ "35480289" ], "name": "Siddharth Kamaria" }, { "ids": [ "35618845" ], "name": "Riddhesh Markandeya" }, { "ids": [ "31715963" ], "name": "Miral Shah" }, { "ids": [ "30578421" ], "name": "Bhaskar Chaudhury" } ], "doi": "10.1109/HiPC.2017.00050", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00050", "entities": [ "Algorithm", "CUDA", "Central processing unit", "Double-precision floating-point format", "Graphics", "Graphics processing unit", "Kepler (microarchitecture)", "Maxwell (microarchitecture)", "Monte Carlo", "Numerical analysis", "Nvidia Tesla", "Parallel computing", "Particle-in-cell", "Position-independent code", "PowerPC 600", "Program optimization", "Simulation", "Speedup", "Testbed", "Velocity", "Video card" ], "id": "9b13f378b047d1445720d3d91680c68e7991ce8f", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "378-387", "journalVolume": "", "outCitations": [ "8d63b44ea043fc3c2b0ec90b2ffbbf84ba446674", "8bc77b016e09e67ce88bc187ff1179aaa3d73ae5", "60e7f7f9367e952f53b8545ef441886a84e3ff58", "4cea67678804f9f3fb9ac042b8e711eeb133a2fd", "38353caa2ed03feb19e114297103bc55e1f31f0a", "1891724061a44a3e5aeb00734da2eb8601c41753", "65c25688b386fa5dd21887c18fcb7f65d5c0ce3b", "07a344a7cb9f381c69c0843f213292678e71ed1f" ], "paperAbstract": "The implementation of 2D-3v (2D in space and 3D in velocity space) PIC-MCC (Particle-In-Cell Monte Carlo Collision) method described in this paper involves the computational solution of Vlasov-Poisson equations, which provides the spatial and temporal evolution of the charged-particle velocity distribution functions in plasmas under the effect of self-consistent electromagnetic (EM) fields and collisions. Stringent numerical constraints associated with a PIC code makes it computationally prohibitive on CPUs in case of large problem sizes (total number of particles, number of grid points and simulation time-scale). We present the design and implementation of a Graphics Processing Unit (GPU) based 2D-3v PIC code using the CUDA C APIs for Kepler architecture. Several parallelization and optimization techniques have been presented in this paper with special emphasis on shuffle intrinsic specific to Nvidia Kepler architecture (or later), which significantly improves the performance compared to existing GPU implementations in the literature. On a test bed comprising of a serial implementation on Xeon E5 CPU and parallel implementations on Nvidia Tesla K40 graphics card, we have achieved a speedup of up to 60x in double precision mode. Effect of important numerical parameters on speedup has been investigated. Finally, we compare the performance of our best parallel implementation on different GPUs (Kepler as well as Maxwell) and analyze the effect of hardware architecture on the performance of the PIC code.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00050" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9b13f378b047d1445720d3d91680c68e7991ce8f", "sources": [ "DBLP" ], "title": "A Novel Implementation of 2D3V Particle-in-Cell (PIC) Algorithm for Kepler GPU Architecture", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "9b230d8deed48458ed3f450a172f1f32aea25f2c": { "authors": [ { "ids": [ "25137425" ], "name": "Chungheon Yi" }, { "ids": [ "7378917" ], "name": "Wonik Choi" }, { "ids": [ "1685610" ], "name": "Ling Liu" }, { "ids": [ "4346918" ], "name": "Youngjun Jeon" } ], "doi": "10.1109/CLOUD.2017.24", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.24", "entities": [ "Algorithm", "Cloud computing", "Deep learning", "Experiment", "Fingerprint", "Global Positioning System", "Image map", "Positioning system", "Region of interest", "Smartphone", "Virtual private server", "Visual analytics" ], "id": "9b230d8deed48458ed3f450a172f1f32aea25f2c", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "122-129", "journalVolume": "", "outCitations": [], "paperAbstract": "This paper presents a cloud-based positioning method that leverages visualized signal images through visual analytics and deep learning. At a mobile client, such as a smart phone, this approach transforms multidimensional signals captured at a known location and at a given time into a signal image and transmits such visual signal images to the Cloud. By collecting and storing many such visual signal images as fingerprints, we can build a visual signal image cloud and produce a signal image map for the geographical region of interest and utilize such signal image map to serve the positioning requests of mobile clients on the move. When a user Alice wants to know her current position, her mobile client will generate a signal image from the multiple signals it receives with timestamp and send this query image to a Cloud server. The server searches the existing signal images stored in the cloud to find those that are similar to this query signal image and estimates the positioning of Alice based on the locations of those similar signal images collected by the server. We evaluate our visual signal images based positioning system on the entire two floors of a large department store and on the street and shops outside the department store, with the signal images collected over 30 minutes before serving positioning queries. The mean error of up to 4 meters is observed. To further verify the applicability of the proposed method, extensive experiments were conducted to distinguish whether a user is indoor or outdoor by applying a deep learning algorithm with 60% of signal images collected for training and 40% signal images for testing. This experiment shows that the proposed method is able to distinguish indoor and outdoor with accuracy of about 95%.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9b230d8deed48458ed3f450a172f1f32aea25f2c", "sources": [ "DBLP" ], "title": "Cloud-Based Positioning Method with Visualized Signal Images", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "9b4f7a1a1a749cce6f797bbaa9719db4e3569c51": { "authors": [ { "ids": [ "40351860" ], "name": "Bo Tang" }, { "ids": [ "1715843" ], "name": "Shi Han" }, { "ids": [ "1722082" ], "name": "Man Lung Yiu" }, { "ids": [ "1879091" ], "name": "Rui Ding" }, { "ids": [ "1711277" ], "name": "Dongmei Zhang" } ], "doi": "10.1145/3035918.3035922", "doiUrl": "https://doi.org/10.1145/3035918.3035922", "entities": [ "Computation", "Experiment", "Online analytical processing", "Program optimization", "Scoring functions for docking", "Synthetic data" ], "id": "9b4f7a1a1a749cce6f797bbaa9719db4e3569c51", "inCitations": [ "389f93511963d4e0032bc79be3ce1a340aedbeed", "7f75690619d23375011b658e67994284fa030477", "d9f320601fc7bcff3aed567f06c0bb4014efaf72", "4f34a05ba6ab9cdaab97656ed1cb44fb2e112f4a" ], "journalName": "", "journalPages": "1509-1524", "journalVolume": "", "outCitations": [ "62a29c2179d244c38b7b5fd4e53d760c32fec860", "a0203b4a547a6d172a053d39d1d618ee47ce3e31", "9833c197cc28947d9ad411849a9fecf9c4f57a01", "b6eb482777d1686f6ea2b0e2d05cb0ec9431a6b3", "e87a36d7455ef16fb17752f02c305d696dee9e15", "a6078ad365012c1f527c192894a9184daab8e597", "4e3c1f3904d4b5404a03b6101370841f7c4798d5", "8a394cff2d0ee0607cbcf3d94bf65743fd858731", "ac091df499518a3fe5918ad66a74a9e7b5b260f4", "3b86fe77c59c0ad615773b3898cf9f17df4fa3a3", "2962a76dc7ffb53e7fa8a6a251b07be206c86476", "7b35afd47433b635ce090e87997cb2dfdc79da5a", "3f419db6f66c32bbb7ea887b139abd4e088a0405", "88bc09e1505da44f05a96cdffcbee232000af549", "a401596b7c337afefe0ea228ef9cd4908429b43a", "8631ae4a69f409ed09f451867c3ef4bf17129b79", "475266810bb4aacd984b3863eb0ee4bd20e8dc55", "ae9192b19dc64d2dd3954674d1c33b2f61fe993b", "fd37acd0204d55738acb7cf28b32fb56f9ebb6db", "10e4cd0d4f1dbf0accdedc5261555499a2e0c34d", "1317f7e3d1de6ffd0888303ca95d9c8c6bae2af3", "6f4db26ef56717c7cfe3baad61667a82dc70a6f6", "008b18ee86a04bf65e461c9d77f56bcbc28c3788", "a24d39e7c504a5705e4a480f99c1461992931934", "71fcfabb8c284be5acd1bd3aea0d7d602fb1d03d", "65e0af21793f0dc748a1755b736db4fbeb9bb4e8", "4dcce6c317aedc617533710f40c084ca86ee8386", "299613bc57efd6e4bb590878220ecdd1d222d7b1", "08639cd6b89ac8f375cdc1076b9485ac9d657083", "e5208097b88a4f0d73c28c3c669b17ed8f369c22", "794fd8c71e8d70f2d74b7877729b6f6966053c3a" ], "paperAbstract": "OLAP tools have been extensively used by enterprises to make better and faster decisions. Nevertheless, they require users to specify group-by attributes and know precisely what they are looking for. This paper takes the first attempt towards automatically extracting top-k insights from multi-dimensional data. This is useful not only for non-expert users, but also reduces the manual effort of data analysts. In particular, we propose the concept of insight which captures interesting observation derived from aggregation results in multiple steps (e.g., rank by a dimension, compute the percentage of measure by a dimension). An example insight is: ``Brand B's rank (across brands) falls along the year, in terms of the increase in sales''. Our problem is to compute the top-k insights by a score function. It poses challenges on (i) the effectiveness of the result and (ii) the efficiency of computation. We propose a meaningful scoring function for insights to address (i). Then, we contribute a computation framework for top-k insights, together with a suite of optimization techniques (i.e., pruning, ordering, specialized cube, and computation sharing) to address (ii). Our experimental study on both real data and synthetic data verifies the effectiveness and efficiency of our proposed solution.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035922" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9b4f7a1a1a749cce6f797bbaa9719db4e3569c51", "sources": [ "DBLP" ], "title": "Extracting Top-K Insights from Multi-dimensional Data", "venue": "SIGMOD Conference", "year": 2017 }, "9b7555cbb3ddb09da518dc43063e939bfee1f675": { "authors": [ { "ids": [ "2002359" ], "name": "Sarah Chasins" }, { "ids": [ "1991345" ], "name": "Rastislav Bod\u00edk" } ], "doi": "10.1145/3133875", "doiUrl": "https://doi.org/10.1145/3133875", "entities": [ "Capacitor plague", "Data science", "Database", "Experience", "Failure rate", "Language construct", "Programmer", "Scripting language", "Server (computing)", "Skip list", "Speedup", "Usability testing", "Web scraping" ], "id": "9b7555cbb3ddb09da518dc43063e939bfee1f675", "inCitations": [], "journalName": "PACMPL", "journalPages": "51:1-51:28", "journalVolume": "1", "outCitations": [ "0fa8a4cbb7cacfe161280e5b6a1f780929ddc743", "7dab5efa797a4f2ad54ef390fbd59000b163189d", "4156c049c3dbe68033f68b2a66a5b8fb0e8088ed", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "26f60a765181c1bc46f8abd832a1ae05867eb79f", "0f0d8d56b696f9077ca2e853870def0d1a7a41d0", "22eca7b8a3ab6ea577790eefbefa1e0f3da9e2bb", "657e8867bee3a1c35e73468a1b1ca28144a1a969", "5f227cd78454e7c30a716b3897c9695afccca8e0", "1cc263c84b85027164bd39db169f5d5959ef6822", "4e068c978a66c48de248ae136b7a361bf0d80ca0", "11dd765aededa856fba40938a5bc0a8b4cf3f9ca", "1a897305bb27172ea2736d82960884b60960d524", "2f052f40a3307de1e45e11a3007a7552b36ebfc8", "1e0dd351c97c09a6027aadd787e73a51ce1ecc66", "97e453d119e9352f4c0e64e515a89ebab6bcde3a", "9888ced21520e0d1222c8828ce63ccc169575dff" ], "paperAbstract": "With more and more web scripting languages on offer, programmers have access to increasing language support for web scraping tasks. However, in our experiences collaborating with data scientists, we learned that two issues still plague long-running scraping scripts: i) When a network or website goes down mid-scrape, recovery sometimes requires restarting from the beginning, which users find frustratingly slow. ii) Websites do not offer atomic snapshots of their databases; they update their content so frequently that output data is cluttered with slight variations of the same information â\u0080\u0094 e.g., a tweet from profile 1 that is retweeted on profile 2 and scraped from both profiles, once with 52 responses then later with 53 responses. \nWe introduce the skip block, a language construct that addresses both of these disparate problems. Programmers write lightweight annotations to indicate when the current object can be considered equivalent to a previously scraped object and direct the program to skip over the scraping actions in the block. The construct is hierarchical, so programs can skip over long or short script segments, allowing adaptive reuse of prior work. After network and server failures, skip blocks accelerate failure recovery by 7.9x on average. Even scripts that do not encounter failures benefit; because sites display redundant objects, skipping over them accelerates scraping by up to 2.1x. For longitudinal scraping tasks that aim to fetch only new objects, the second run exhibits an average speedup of 5.2x. Our small user study reveals that programmers can quickly produce skip block annotations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133875" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9b7555cbb3ddb09da518dc43063e939bfee1f675", "sources": [ "DBLP" ], "title": "Skip blocks: reusing execution history to accelerate web scripts", "venue": "PACMPL", "year": 2017 }, "9b90568faad1fd394737b79503571b7f5f0b2f4b": { "authors": [ { "ids": [ "3389473" ], "name": "Siying Dong" }, { "ids": [ "35651670" ], "name": "Mark Callaghan" }, { "ids": [ "2087881" ], "name": "Leonidas Galanis" }, { "ids": [ "1823828" ], "name": "Dhruba Borthakur" }, { "ids": [ "2526663" ], "name": "Tony Savor" }, { "ids": [ "38233464" ], "name": "Michael Strum" } ], "doi": "", "doiUrl": "", "entities": [ "Amplifier", "B-tree", "Central processing unit", "Database", "Database engine", "IBM Tivoli Storage Productivity Center", "InnoDB", "Log-structured file system", "MySQL", "Online transaction processing", "Requirement", "Response time (technology)", "RocksDB", "Storage efficiency", "Throughput" ], "id": "9b90568faad1fd394737b79503571b7f5f0b2f4b", "inCitations": [ "46c0f934ef0705b953ba8b14c5dee79b4df724db", "0da73832dee2c9b3d4c0d039d8e714e6ff098e40", "8542fdcb42804a31fedb86874e3c75cd03830d4d", "3f81f14a369d635b08c3e0bc2406a41801d9e109", "44c216b53c1f5a7091618c6b7ba9a32a35323dad" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2b56f0ca7e74a43a54b70a7bb3507855c653a85b", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "f4147b82166813bbe5dc01e9486664c273d1556c", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "bce1901805ec6e07993cb248bd1a9279c1800971", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "a30de973f68640b5032d07e2ee3ee80f03d292c5", "cb8fb0fdaeeb7ef7c44fffdee5a49009271e0bcb", "0e658882a7d8e4c54eef5aa7f97a449d1ed7e3e2", "4ce25286205c62fffda7d685a916cf4508149245", "18a5f443299784479e78d9e77f175af57cb2fa2b" ], "paperAbstract": "RocksDB is an embedded, high-performance, persistent keyvalue storage engine developed at Facebook. Much of our current focus in developing and configuring RocksDB is to give priority to resource efficiency instead of giving priority to the more standard performance metrics, such as response time latency and throughput, as long as the latter remain acceptable. In particular, we optimize space efficiency while ensuring read and write latencies meet service-level requirements for the intended workloads. This choice is motivated by the fact that storage space is most often the primary bottleneck when using Flash SSDs under typical production workloads at Facebook. RocksDB uses log-structured merge trees to obtain significant space efficiency and better write throughput while achieving acceptable read performance. This paper describes methods we used to reduce storage usage in RocksDB. We discuss how we are able to trade off storage efficiency and CPU overhead, as well as read and write amplification. Based on experimental evaluations of MySQL with RocksDB as the embedded storage engine (using TPC-C and LinkBench benchmarks) and based on measurements taken from production databases, we show that RocksDB uses less than half the storage that InnoDB uses, yet performs well and in many cases even better than the B-tree-based InnoDB storage engine. To the best of our knowledge, this is the first time a Log-structured merge treebased storage engine has shown competitive performance when running OLTP workloads at large scale.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p82-dong-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9b90/568faad1fd394737b79503571b7f5f0b2f4b.pdf", "s2Url": "https://semanticscholar.org/paper/9b90568faad1fd394737b79503571b7f5f0b2f4b", "sources": [ "DBLP" ], "title": "Optimizing Space Amplification in RocksDB", "venue": "CIDR", "year": 2017 }, "9b9077f5cccbdfe5cebe1a7a3a43bc1665dcb8b7": { "authors": [ { "ids": [ "2865352" ], "name": "Amanieu D'Antras" }, { "ids": [ "2751492" ], "name": "Cosmin Gorgovan" }, { "ids": [ "1778171" ], "name": "Jim D. Garside" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1145/3062341.3062371", "doiUrl": "https://doi.org/10.1145/3062341.3062371", "entities": [ "32-bit", "64-bit computing", "ARM Cortex-M", "ARM architecture", "Benchmark (computing)", "Binary file", "Binary translation", "Central processing unit", "Linux", "Linux", "Operating system", "Shadow Copy", "Time complexity", "Tracing (software)", "X86-64" ], "id": "9b9077f5cccbdfe5cebe1a7a3a43bc1665dcb8b7", "inCitations": [ "9bb1f695e399d030ba188b29b546cd9b26039715", "31284eda208c225f41987d33c77f547f1e1135ce" ], "journalName": "", "journalPages": "333-346", "journalVolume": "", "outCitations": [ "50bfb732ff36296243832c43936158bc9ba96dfe", "f016d23ffca72cdf1eb584613452720eaacafd9c", "58f1d8e4c3588fd4bb7d58276ef14bafa603aaa9", "2549f9b455f75ebaaa3736208e319847140b705e", "0653e2ed9f683868cb4539eb8718551242834f6b", "9bb1f695e399d030ba188b29b546cd9b26039715", "a49c9057fc3912d3e9bea3d6e2cd39e57561cec3", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "da33e45c803199b6c5ed2acf76d1b6ea4dcaa66a", "54f3331b575b2d451c2d716f86496cada23d596d", "04a953ba760845232c0f3c6e4dc3ca7b1fb8da4e", "0856f6f40b889dba559f19654834114e9f469760", "71978728170fff7b66d34f452881066a5ff9bb4c", "1bb55fbb37be9d48ecf62c5b61caaa874553f737", "01c056c6c08b7331e93bc7cffc7bbbd683201317", "489222ded537277b00537a190db1c0fda5783765", "888379af9776b296a2c6e5501fc73cc60b43a830", "0a65844b2e318305c7031eb53cb306efe7763d22", "fbd7f49a8df330e17f7fdff08a59e93d32f50502", "0ef6f7fa8f12e408c928c371c07ccbc3fcade91d", "6d12aea56165acf3715e2c82b5f560e48359366d", "326cdcbce0831d873ef41ad56e98eddfa6dff235" ], "paperAbstract": "The ARMv8 architecture introduced AArch64, a 64-bit execution mode with a new instruction set, while retaining binary compatibility with previous versions of the ARM architecture through AArch32, a 32-bit execution mode. Most hardware implementations of ARMv8 processors support both AArch32 and AArch64, which comes at a cost in hardware complexity. \n We present MAMBO-X64, a dynamic binary translator for Linux which executes 32-bit ARM binaries using only the AArch64 instruction set. We have evaluated the performance of MAMBO-X64 on three existing ARMv8 processors which support both AArch32 and AArch64 instruction sets. The performance was measured by comparing the running time of 32-bit benchmarks running under MAMBO-X64 with the same benchmark running natively. On SPEC CPU2006, we achieve a geometric mean overhead of less than 7.5% on in-order Cortex-A53 processors and a performance improvement of 1% on out-of-order X-Gene 1 processors. \n MAMBO-X64 achieves such low overhead by novel optimizations to map AArch32 floating-point registers to AArch64 registers dynamically, handle overflowing address calculations efficiently, generate traces that harness hardware return address prediction, and handle operating system signals accurately.", "pdfUrls": [ "https://www.research.manchester.ac.uk/portal/files/56078084/pldi_16.pdf", "http://doi.acm.org/10.1145/3062341.3062371" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9b9077f5cccbdfe5cebe1a7a3a43bc1665dcb8b7", "sources": [ "DBLP" ], "title": "Low overhead dynamic binary translation on ARM", "venue": "PLDI", "year": 2017 }, "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0": { "authors": [ { "ids": [ "1691086" ], "name": "Ang Li" }, { "ids": [ "2872273" ], "name": "Wenfeng Zhao" }, { "ids": [ "1798309" ], "name": "Shuaiwen Song" } ], "doi": "10.1145/3123939.3123944", "doiUrl": "https://doi.org/10.1145/3123939.3123944", "entities": [ "Boundary vector field", "Central processing unit", "Dynamic voltage scaling", "Graphics processing unit", "Power supply", "Scheduling (computing)", "Static random-access memory", "Throughput" ], "id": "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0", "inCitations": [], "journalName": "", "journalPages": "532-545", "journalVolume": "", "outCitations": [ "4f27cd8d8cb86879798be18c373d6bf82ecfa75f", "36e46139ac2d2f3242cfe49469ce09403b5df852", "4fa62de1bf9ed8b2543489ae1be7b08007a1dd76", "184e13ee9330c4c7055a1dea27b5cc245109d68b", "70c4ef7c1aad74d0fbe362ce4260e94f99fc4aee", "485b9204a1df97fceb4aedbc62e9efbc09a4525a", "0345b41c8a708048a9f1d27cea06b867b52eead7", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "10dc03bab236aa58778b74520978ec280ecdf731", "b2bbb08faa8018bcb41ef3039f4010bb17176d01", "2c0dc8ea2b2dba866f54ec6b42a7ecc823e33997", "03e3a481d9713ad4d39dc608959d87b3f8d8144e", "64335c55eb06c6cc5372f91b6feaf192550e36fd", "3d642e3b0bfc0e931c59e68b8f7975f304c2f6cb", "1d286a264b233125b681e522e8f5fed596a8608c", "8af1a4b65bd380b8775c15e2a9323ae5184bc8dd", "5f4388e15af381d4c48b6376180a94db10dae0cd", "72166f3aa4cdbc91d65b890b83c8c084dec846b0", "b37cdf43ff9c85693e335c04086003819a7aa4f9", "83c6974c1692b930833ed8f8a7f0419a122c1545", "6635cd62124e589bc56667b31cc295db2fbd22a2", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "34e22b27e9ad9421f8fa541da3a28b9909823fc8", "61cd159f10181b8cbbc25750b4b718009d649982", "65a3e6a90c904af04af86f4bce5228085588bb57", "352a8957005dc5519b15ed1870751ec494d66395", "57977d94ac12da26117a7fa8e927362660c77184", "55b5e730062d6e1c56b31d89dcf3a0b239a3951e", "d16d79b87e56337d17b573ded3c02d52a92955ef", "232d892b423c24aaefcec9eb2ae211316be0f025", "64da7b96d107f54c91c0c68b587f2ecaed356a63", "7132859e2843f7adb82ec89daf0eb2bdb1da590b", "cb6beb68971de81435aca0356987fcff8fad176d", "01d35bd1e86619cad0d7399278a392edee45d396", "1f1a1f0cd075cef63083c8ec15321021dbff2cfc", "02ed6039c45bf92926497560d0ace1d29fc9cb24", "1f3611aa60accc2ebd229162b8919b2a7ccbae33", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "c2605782274b212ded8fd136f0acf5af83998f1d", "0ae24644ca8866321ce6c117c5823ad9f149bef9", "11a3cacd4e3f11d61203aa4c68b124ab5fe54ba3", "797a4ebb3a729d677fd9190edc03c9e3a433188b", "f26de3e8203d9ceb8b32e9246e6fb11209b58754", "a2a3f6f5a502222c0c93d3ea6b66d94b86d48d03", "0d394c72f9d769dfa021796a29fc142db573aec7", "19245c59b858d89171defc372de801a179679e0f", "1d55a4505bebb74ab47ec2365a660fda39c40d14", "15edb4bbf5382ea9282281ea82b343e0c1684b78", "49fb77e166dc26849e37db3d5a53496ab547a545", "3364bc50921a9566d61ef8cb73baa82341725e4b", "b21bec027820ae04e1e7b9c31369b2341057189d", "4845474141b68b3b36e614b69c3682d064bc9a57", "2d6f002477015469075954c6748a1a85af352c94", "d539a270d13f51511a7af0a518bc2db26a3ec3d7", "7dc52b76909594dd62ed4e19910a6529acba4ed9", "d9851d64a523843f5671dc085bf4c27ddad611b2", "d4e153d0ff33cb15cd6c13570599c6c36cc78db5", "e287691fd15c0f46b452f49d030137093f531970", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "d608cc5c549c610c24e4f88dec7b9fff75c61203", "28055aaeb478fd09f5a042408cd6b63cbf707d1e", "db6e414480be82e495a6c5e684bfc2d3dcbcf115", "10b3a5ed39d3a164374c4bbbe70385bd0b211d9f", "00156e79606084497789662dfaf59c3b54a10722", "4f02395639cfdac14ea42e99b18fde92f8288b63", "f9cf47539216a3737f6353dca8a8f3f1e588413e", "309ad0357af7722a24192781340881390055a3db", "03385e04bf3df318ee9a94237e6b5e96b8663a0d", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "157d5b2488d953b7c88abc36791c2e897c152395", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "21a5376ec9836da95c7facc629874f61bd6e6927" ], "paperAbstract": "Power reduction is one of the primary tasks for designing modern processors, especially for high-performance throughput processors such as GPU due to their high power budget. In this paper, we propose a novel circuit-architecture co-design scheme to harvest enormous power savings for GPU on-chip SRAM and interconnects. We propose a new 8T SRAM that exhibits asymmetric energy consumption for bit value 0/1, in terms of read, write and standby. We name this feature Bit-Value-Favor (BVF). To harvest the power benefits from BVF on GPUs, we propose three coding methods at architectural level to maximize the occurrence of bit-1s over bit-0s in the on-chip data and instruction streams, leading to substantial chip-level power reduction. Experimental results across a large spectrum of 58 representative GPU applications demonstrate that our proposed BVF design can bring an average of 21% and 24% chip power reduction under 28nm and 40nm process technologies, with negligible design overhead. Further sensitivity studies show that the effectiveness of our design is robust to DVFS, warp scheduling policies and different SRAM capacities.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123944" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0", "sources": [ "DBLP" ], "title": "BVF: enabling significant on-chip power savings via bit-value-favor for throughput processors", "venue": "MICRO", "year": 2017 }, "9bb1f695e399d030ba188b29b546cd9b26039715": { "authors": [ { "ids": [ "2865352" ], "name": "Amanieu D'Antras" }, { "ids": [ "2751492" ], "name": "Cosmin Gorgovan" }, { "ids": [ "1778171" ], "name": "Jim D. Garside" }, { "ids": [ "2940048" ], "name": "John Goodacre" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1145/3050748.3050756", "doiUrl": "https://doi.org/10.1145/3050748.3050756", "entities": [ "32-bit", "64-bit computing", "ARM architecture", "Binary translation", "Computer architecture", "Hypervisor", "Memory management", "Operating system", "PowerPC", "SPARC", "Single user mode", "User space", "X86", "X86-64" ], "id": "9bb1f695e399d030ba188b29b546cd9b26039715", "inCitations": [ "9b9077f5cccbdfe5cebe1a7a3a43bc1665dcb8b7", "31284eda208c225f41987d33c77f547f1e1135ce" ], "journalName": "", "journalPages": "228-241", "journalVolume": "", "outCitations": [ "078d90580b1d01ac8f4adc2b14cfa8661b0af8e4", "e1ac84461475bb2c7c89f63bdced533556b8671f", "898636295f0cb2a1cd96e83796d4e2b5a05d7124", "0856f6f40b889dba559f19654834114e9f469760", "66639a71451c34cc01bb3bca2bf029ebe7d5d48e", "152c1893389e244ddaa7dd47c247b9c4c72ca7d8", "0a65844b2e318305c7031eb53cb306efe7763d22", "5f6a808bedd3dbfd1290063b3cd8221132ce5c95", "02e9d10ea0e3414146d946e8d4b0d196cef6e875", "f016d23ffca72cdf1eb584613452720eaacafd9c", "da1b0f589e925481ef74ec778c2b597be0b7b54e", "47dc52eeb7bf6efb46c550201cc8d52af71cc1a3", "9b9077f5cccbdfe5cebe1a7a3a43bc1665dcb8b7", "34eb2d9daf746f32409b88fcd974f0cf19c4341c", "55517b0c1f9755691ab471fa79ecf25d1928d540", "da33e45c803199b6c5ed2acf76d1b6ea4dcaa66a", "067c7857753e21e7317b556c86e30be60aa7cac0", "109df0e8e5969ddf01e073143e83599228a1163f", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "50bfb732ff36296243832c43936158bc9ba96dfe", "ffdec1d45b9395ccb074fc0de65a799aa937ac50", "2549f9b455f75ebaaa3736208e319847140b705e", "1898169191c2030e1c1e442afbb66610281f328f", "0317e2e18d66839df7348298e80aff7f3ad5ab28", "6d12aea56165acf3715e2c82b5f560e48359366d", "80ab97b9c2f8f74795b11fa50aa0ad7f37975c0e", "fbd7f49a8df330e17f7fdff08a59e93d32f50502", "0653e2ed9f683868cb4539eb8718551242834f6b", "650c4aa6fcfec595f8bb7b3f6015f1f31a4320be", "0ef6f7fa8f12e408c928c371c07ccbc3fcade91d", "07ebe9df86f0e6eb19fcdd03bbe9dd7f64ff887f", "dcb54653a7f348c16f110a9f5de7533fa3476495", "71a2d8c473f13d0c664f751db97e81128281b1eb" ], "paperAbstract": "Current computer architectures --- ARM, MIPS, PowerPC, SPARC, x86 --- have evolved from a 32-bit architecture to a 64-bit one. Computer architects often consider whether it could be possible to eliminate hardware support for a subset of the instruction set as to reduce hardware complexity, which could improve performance, reduce power usage and accelerate processor development. This paper considers the scenario where we want to eliminate 32-bit hardware support from the ARMv8 architecture.\n Dynamic binary translation can be used for this purpose and generally comes in one of two forms: application-level translators that translate a single user mode process on top of a native operating system, and system-level translators that translate an entire operating system and all its processes.\n Application-level translators can have good performance but is not totally transparent; system-level translators may be 100% compatible but performance suffers. HyperMAMBO-X64 uses a new approach that gets the best of both worlds, being able to run the translator as an application under the hypervisor but still react to the behavior of guest operating systems. It works with complete transparency with regards to the virtualized system whilst delivering performance close to that provided by hardware execution.\n A key factor in the low overhead of HyperMAMBO-X64 is its deep integration with the virtualization and memory management features of ARMv8. These are exploited to support caching of translations across multiple address spaces while ensuring that translated code remains consistent with the source instructions it is based on. We show how these attributes are achieved without sacrificing either performance or accuracy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050756" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9bb1f695e399d030ba188b29b546cd9b26039715", "sources": [ "DBLP" ], "title": "HyperMAMBO-X64: Using Virtualization to Support High-Performance Transparent Binary Translation", "venue": "VEE", "year": 2017 }, "9bce69cb320f770ba693ee0313514e49cc4ca1a0": { "authors": [ { "ids": [ "4143280" ], "name": "Dimitrios Skarlatos" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" } ], "doi": "10.1145/3123939.3124540", "doiUrl": "https://doi.org/10.1145/3123939.3124540", "entities": [ "Address space", "Computer data storage", "Data deduplication", "ECC memory", "Graphic art software", "Hash function", "Hypervisor", "Kernel same-page merging", "Memory address", "Memory controller", "Memory footprint", "Simulation", "Virtual machine" ], "id": "9bce69cb320f770ba693ee0313514e49cc4ca1a0", "inCitations": [], "journalName": "", "journalPages": "302-314", "journalVolume": "", "outCitations": [ "9d30381c49afa033eacc04fb68975762eb7bafab", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "05c56f4abc527fbf384ad011dc9c0a613955641a", "b3995fc1dee8a1e4fe07fb7c6caf6c3584bd13da", "9341125876271d46cc25f86dac93f25acb343e8d", "045729ec838ecc50be166fe4511506ac4a08226d", "747ad718761b7d848a12e4f3a82aa0f46117a815", "1ddd08b8610ffe95cb85d2aab2ff08c2722c9772", "097904d7691fb6d5cd15cea9ee0ed8d02ba1ce41", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "86337138bb6dfabef8e1d45ec3c4e30d64c3ce36", "46d63d1b3ea2cab49a863b56b97f263f75c956f3", "5843e36547498373da1f64039d7145ac6d4853ee", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "08632fe2b934ed15d3499e7321282c81adc2c390", "48534b21548e3692ad7d866387f1dc7f543109e1", "7a280c6cdc0ab18f8809b7101330702bfd5bb759", "9aa0d7253574e50fe3a190ccd924433f048997dd", "11bbc477d14d1c945f203f1a83a530856a89d28f", "85588e89039ee1cbc2a0df0a0316f53a25a824e7", "24f56717022075157ba4ad01e84f65ab6ebce505", "1b6262f0533c202c1f140e60053ee3c72f216687", "a422d4fe122fb878394fc79c332a38535842e58c", "3230c6025956c2d3fd11971e0d30b690e3078a1e", "0edf4ef1b8e09e4abc994f7d450bc090262e2c9b", "352a8957005dc5519b15ed1870751ec494d66395", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "85398d5f19157c91bf00da3d36210e72d57887e4", "a042f95a307d4f72d2aac95ac5d5e9dbfa24db79", "d167b5c8b21c642662000417f313798d375ff38e", "28055aaeb478fd09f5a042408cd6b63cbf707d1e", "343a384d5476ead9496f96559aba5ad09e95e01e", "00ab25c6582d543932fccbb0f15fe93445f95d61", "0a934c1fa360491bebaa6fb4d0348179b9713b2d", "205ff590dc7881db74d766c43e3509ddfbe24d81", "b448d62161c48de4d2100c811b115af647c0fa4a" ], "paperAbstract": "To reduce the memory requirements of virtualized environments, modern hypervisors are equipped with the capability to search the memory address space and merge identical pages --- a process called page deduplication. This process uses a combination of data hashing and exhaustive comparison of pages, which consumes processor cycles and pollutes caches.\n In this paper, we present a lightweight hardware mechanism that augments the memory controller and performs the page merging process with minimal hypervisor involvement. Our concept, called PageForge, is effective. It compares pages in the memory controller, and repurposes the Error Correction Codes (ECC) engine to generate accurate and inexpensive ECC-based hash keys. We evaluate PageForge with simulations of a 10-core processor with a virtual machine (VM) on each core, running a set of applications from the TailBench suite. When compared with RedHat's KSM, a state-of-the-art software implementation of page merging, PageForge attains identical savings in memory footprint while substantially reducing the overhead. Compared to a system without same-page merging, PageForge reduces the memory footprint by an average of 48%, enabling the deployment of twice as many VMs for the same physical memory. Importantly, it keeps the average latency overhead to 10%, and the 95th percentile tail latency to 11%. In contrast, in KSM, these latency overheads are 68% and 136%, respectively.", "pdfUrls": [ "http://skarlat2.web.engr.illinois.edu/publications/pageForge_micro17.pdf", "http://iacoma.cs.uiuc.edu/iacoma-papers/PRES/present_micro17_1.pdf", "http://doi.acm.org/10.1145/3123939.3124540", "http://iacoma.cs.uiuc.edu/iacoma-papers/micro17_1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9bce69cb320f770ba693ee0313514e49cc4ca1a0", "sources": [ "DBLP" ], "title": "Pageforge: a near-memory content-aware page-merging architecture", "venue": "MICRO", "year": 2017 }, "9bd40ca73af180ed147c097a2d017c4334c692ff": { "authors": [ { "ids": [ "2032633" ], "name": "Vinay Vasista" }, { "ids": [ "28311535" ], "name": "Kumudha Narasimhan" }, { "ids": [ "9355467" ], "name": "Siddharth Bhat" }, { "ids": [ "1751936" ], "name": "Uday Bondhugula" } ], "doi": "10.1145/3126908.3126968", "doiUrl": "https://doi.org/10.1145/3126908.3126968", "entities": [ "Benchmark (computing)", "Computation", "Digital subscriber line", "Haswell (microarchitecture)", "High- and low-level", "High-level programming language", "Multigrid method", "Numerical analysis", "Parallel computing", "Polyhedron", "Program optimization", "Smoothing", "Symmetric multiprocessing" ], "id": "9bd40ca73af180ed147c097a2d017c4334c692ff", "inCitations": [ "acbfaf8c31964c3e91ef71d46125c94c5457efc3" ], "journalName": "", "journalPages": "15:1-15:13", "journalVolume": "", "outCitations": [ "fe7bd2137955540edc81e84c5051ae32daf1703d", "319b688af1e668dd68c9a26f3d9234b006477a8a", "39dc510d600fb5b1df1cbdfb3b131de045fc6e88", "1f08718472459d5eb3dffd69a1b0cafc6ec280d1", "4fafd03a57348374f894823b0c7cfe6c85e5bd93", "1c5b15587e4034c97610b2017697ad1ea663a8fa", "ec2d148481abc50865d0960b748d3992b85fef6f", "1bec73cbc0ffb8eb32d6da63895f5319b7409386", "731b34f20df18fd0992f23cfe8031990cded2c0d", "7c2beb7b8770b40caf55dc10437023e60a8fed35", "3c31999730ef19007df71909f1ae5223825e0ec9", "627be34198e7f10072ee2f4d2b99346b05b759b9", "4a2d7bf9937793a648a43c93029353ade10e64da", "1cda9f98189bd303d9163cdc1a57bfab274c8b2f", "0b9aaee517e0cabb274f5d7cfb01d8f58e51d76e", "74bef8b800ba9d31892365612463b8727366b667", "7a1f79599b3d45354883e46b176a0bbd46020f11" ], "paperAbstract": "The Geometric Multigrid (GMG) method is widely used in numerical analysis to accelerate the convergence of partial differential equations solvers using a hierarchy of grid discretizations. Multiple grid sizes and recursive expression of multigrid cycles make the task of program optimization tedious. A high-level language that aids domain experts for GMG with effective optimization and parallelization support is thus valuable.\n We demonstrate how high performance can be achieved along with enhanced programmability for GMG, with new language/optimization support in the PolyMage DSL framework. We compare our approach with (a) hand-optimized code, (b) hand-optimized code in conjunction with polyhedral optimization techniques, and (c) the existing PolyMage optimizer adapted to multigrid. We use benchmarks varying in multigrid cycle structure and smoothing steps for evaluation. On a 24-core Intel Xeon Haswell multicore system, our automatically optimized codes achieve a mean improvement of 3.2x over straightforward parallelization, and 1.31x over the PolyMage optimizer.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126968" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9bd40ca73af180ed147c097a2d017c4334c692ff", "sources": [ "DBLP" ], "title": "Optimizing geometric multigrid method computation using a DSL approach", "venue": "SC", "year": 2017 }, "9bd79f92726523a8beb891a5efe51aa7bd8c5d6b": { "authors": [ { "ids": [ "34138485" ], "name": "Feilong Liu" }, { "ids": [ "2335541" ], "name": "Lingyan Yin" }, { "ids": [ "3264757" ], "name": "Spyros Blanas" } ], "doi": "10.1145/3064176.3064202", "doiUrl": "https://doi.org/10.1145/3064176.3064202", "entities": [ "Database", "Datagram", "IBM Tivoli Storage Productivity Center", "InfiniBand", "Internet protocol suite", "Network interface", "Parallel database", "Relational database management system", "Remote direct memory access", "Response time (technology)" ], "id": "9bd79f92726523a8beb891a5efe51aa7bd8c5d6b", "inCitations": [ "264a5e7a5230b228b86f63a75546738a66454c56" ], "journalName": "", "journalPages": "48-63", "journalVolume": "", "outCitations": [ "ee05b94d8af5113b3e4d51e957ca66fb7e6aea35", "7409f3c2a052df862a5ba4cfc484db2ba0abcd35", "05eddbc2d37cf994fb7c6a4558e2679e5f40d23e", "54e4f8d356c304425ee0f48ee52f0057672d0b4f", "2cfbdfcf3f590cbd7a6c9c4299eb42569e77697c", "f4acec66b5a69d67932ecccc459714ad5538cf45", "029e03cd045b1fcda76e4c469eedfa0470c79624", "14390fd81841cc4bb3d3764042481fc0a0e89e7b", "463bec3d0298e96e3702e071e241e3898f76eff2", "893a6c56d5da331808cb49d6af7c4af22ff5aed5", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "10ca6fc3a9adf282073defda372355bfd668b31e", "3a2a82a103e55d58eb481df31c69edb113947010", "25191c846bb8361148828131c9e3ecb85ad90935", "205cf007cf77bbf81e55b74635017087585f7b7c", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "023bdfe7bfef7ee6611f8a0165dd89c87221f353", "daf0cd0076b388712ea12ec4105572997fc50cdf", "7730f057ceda3ab75d50ebf0ee10942938b4f8df", "1f4fac99af2d8a6d9471eb3cad7b5ae0365c0933", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "12c356a40e63af45aa6b0a63af38676a0dfe2b3e", "4a64eda713f9473424b9e5b450f27bfdcd10b550", "afc4e448edba15519b5ea054d7b47af1a5aab93d", "62be31097d51acb6530dd933a7f0ff8741019937", "284c50888bc0a85d871d65d18f0509a11f663907", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "2ab305079385594badd4233ebb9512d52ecaccfb", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "9f3c12e98d7173afd86ef25fb77a702f180cfdd4", "0a8b6d0f90c309e6f92c1fe3ad160786bf99a982", "04177653956b94de7b1ecb5c7c65b70c56f0b940", "0cd69740a2f0f37b469243d66c39915cb4582097", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "2dfc9ced962652476d6e5843a839c37b2578a102", "29a1148d75878671dc3663bf480e33d7bd91597d" ], "paperAbstract": "The commoditization of high-performance networking has sparked research interest in the RDMA capability of this hardware. One-sided RDMA primitives, in particular, have generated substantial excitement due to the ability to directly access remote memory from within an application without involving the TCP/IP stack or the remote CPU. This paper considers how to leverage RDMA to improve the analytical performance of parallel database systems. To shuffle data efficiently using RDMA, one needs to consider a complex design space that includes (1) the number of open connections, (2) the contention for the shared network interface, (3) the RDMA transport function, and (4) how much memory should be reserved to exchange data between nodes during query processing. We contribute six designs that capture salient trade-offs in this design space. We comprehensively evaluate how transport-layer decisions impact the query performance of a database system for different generations of InfiniBand. We find that a shuffling operator that uses the RDMA Send/Receive transport function over the Unreliable Datagram transport service can transmit data up to 4× faster than an RDMA-capable MPI implementation in a 16-node cluster. The response time of TPC-H queries improves by as much as 2×.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064202", "http://web.cse.ohio-state.edu/~blanas.2/files/eurosys2017_rdmashuffling.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9bd79f92726523a8beb891a5efe51aa7bd8c5d6b", "sources": [ "DBLP" ], "title": "Design and Evaluation of an RDMA-aware Data Shuffling Operator for Parallel Database Systems", "venue": "EuroSys", "year": 2017 }, "9bf288b77d60224de0524eb3ed483446b1a4a35a": { "authors": [ { "ids": [ "1745299" ], "name": "Lu Chen" }, { "ids": [ "28342797" ], "name": "Fei Wu" }, { "ids": [ "3119010" ], "name": "Jiaqi Xu" }, { "ids": [ "1959851" ], "name": "Kannan Srinivasan" }, { "ids": [ "1750487" ], "name": "Ness B. Shroff" } ], "doi": "10.1145/3117811.3117826", "doiUrl": "https://doi.org/10.1145/3117811.3117826", "entities": [ "Algorithm", "Channel capacity", "Duplex (telecommunications)", "Experiment", "Frequency band", "Hoc (programming language)", "Interference (communication)", "Loopback", "One-way function", "Relay", "Routing", "Simulation", "Strategic management", "Throughput" ], "id": "9bf288b77d60224de0524eb3ed483446b1a4a35a", "inCitations": [], "journalName": "", "journalPages": "114-126", "journalVolume": "", "outCitations": [ "46c219bfe7f30809cffe3a1117df541719ab1075", "5d6170014151852ff4748f52e4fb06b14fd8e7d8", "4b2f3372baef782618daf54e59782f251c58b97d", "378b8f01ae617393fa7a54783767d1004571a403", "387588025d7b09aa8245308576b31af850a93a10", "05ca83fe3b178e2ea0eb8caad6521c003860c9e8", "e895a0c2b989221a665868331eafbca5967436b7", "31d59b54ece81587110ec058dd90aa12fce23349", "21c0da421b70c02d592837b0168dbcd1174c05fc", "61d425a00fc56b374342dff111320953b42c8395", "0928e5df5e4a1ec003b82347c5f034ef93bed8d8", "8f98501c5e676daee0387d60d43f7854541eab2c", "13e89b8808dde54d75ddad6d4a076a225de268c2", "3567f2f850f7bdfcb90ea3c835dadcf68eef2d87", "cc70e0f4e152952ae07684b0efdacf33a54abc5b", "55fb41ed4d51bda02f07ee063525947eff0f4f0e", "0bca1a737517ccb3f0b0da83b108d4d474e04d8c", "47240e17ac8fa393ec6e2db2dac68454e96c8495", "3538f0befbafbe69e54759964043645ade07b11e" ], "paperAbstract": "Full duplex techniques can potentially double the channel capacity and achieve lower delays by empowering two radios to simultaneously transmit in thesame frequency band.However, full duplex is only available between two adjacent nodes within the communication range. In this paper, we present BiPass to break this limitation.With the help of full duplex capable relays, weenable simultaneous bidirectional in-band cut-through transmissions between two far apart nodes, so they can do full duplex communications as if they were within each other's transmission range.To design such a system, we analyze interference patterns and propose a loop-back interference cancellation strategy. We identify the power amplification problem at relay nodes and develop an algorithm to solve it. We also develop a routing algorithm, an opportunistic forwarding scheme, and a real-time feedback strategy to leverage this system in ad-hoc networks.To evaluate the real world performance of BiPass, we build a prototype and conduct experiments using software defined radios. We show that BiPass can achieve 1.6x median throughput gain over state-of-the-art one-way cut-through systems, and 4.09x gain over the decode-and-forward scheme. Our simulations further reveal that even when the data traffic is not bidirectional, BiPass has 1.36x throughput gain and 47\\% delay reduction overone-way cut-through systemsin large networks.", "pdfUrls": [ "http://newslab.ece.ohio-state.edu/research/resources/mobicom17.pdf", "http://doi.acm.org/10.1145/3117811.3117826" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9bf288b77d60224de0524eb3ed483446b1a4a35a", "sources": [ "DBLP" ], "title": "BiPass: Enabling End-to-End Full Duplex", "venue": "MobiCom", "year": 2017 }, "9bf8cf95f21a9923fc1fad938868893802f36c7d": { "authors": [ { "ids": [ "1702108" ], "name": "Taisuke Izumi" }, { "ids": [ "14027261" ], "name": "Fran\u00e7ois Le Gall" } ], "doi": "10.1145/3087801.3087811", "doiUrl": "https://doi.org/10.1145/3087801.3087811", "entities": [ "Algorithm", "Computer engineering", "Distributed computing", "Graph theory", "Informatics", "Ku band", "Randomized algorithm", "Topology (electrical circuits)", "Uniform Resource Identifier" ], "id": "9bf8cf95f21a9923fc1fad938868893802f36c7d", "inCitations": [ "a78a1624328bce76350d5bc981f56ee859923cad", "aeffe9bb3aba6a192465ef9f86d50365805fe4ec", "4a0963249a012d38ae8169bd2f552a7a59121105", "cc2c4065befdb2554eeae8650e0f5c7785fc0507", "e1a9ba1bb980ad2e5930e55627fd1f10842582c6" ], "journalName": "", "journalPages": "381-389", "journalVolume": "", "outCitations": [ "063521610699b966f1bd0157d124feae4f3ac515", "cf5907dd5032f0c1584405286c7447a8ebad00c5", "25186afb27fd7d50b2f6b0e03487b6020e1e439e", "0cb8e1e585da63b76c5796258c3a514b88fcc33f", "29a5e1ee50bdcc2648559efacc7e54f00852fd73", "3f084898e62b5824cf70100b91a63f1c2450a467", "20f1dfc912bcacfc8c12b75cbc024b9890437f64", "94c899462170819e0a68fc56f49f8a36e023a5e6", "68262f8e325440a25ee50e6ed4478b4098c74281", "037fa2c0495830857ed0e93ca05af754723c4165", "c49004f490050a8263be89a07ba4f14542e64579", "98b1ba120603786dcc41e7b628b5e95d54146c82", "6faa7a1dc1bb32220e0153f109e397ff567a43cb", "a73740f7dfec1cb431b373d78b6a69b9776840b5", "023892c5ccce1559cc6c3741e8548b50dff7c804", "8b30695cb60ecc2d7b08cd6d2242885ad57ad5fb", "50146c3fb11e45f9072a6bc0d4a6c336756a6537", "4e0df13191a558fb619d3fcad1d7bd2c3668f844", "0b32f09816f43cfe36735304f8db8072df3f50e7", "aa052149b40abaabbcdf637b85c7bc3c6ed570cc", "145c3ca2ea0faebcdc42de8fa24dc57ecdca341d", "45e0b544421fec82f71e196db60258f0f71b7786", "4a4a140bd777bd0b681ad951bf9f532c03b954e0", "c45360475f99e2f8f9ec494482971f9db91b25e2", "345e83dd58f26f51d75e2fef330c02c9aa01e61b", "b73a1f71aeaa60fdf4f1a06f0eb5fb46e452e007", "df9594d5ef9a8f14c81453f6b9cef959e024b35d", "5ce79c2ae7145c608090804133760f57e74c5a07", "1f912ac1e1f8a9bfda1cf7664648a74b6559a407", "0c38ced8d5f1abcce633bb347e332dce99a6ad16", "092cfae8c147126dffff703b6bbcb4f552ab5a27", "18c90d5a0c40c21b1d5169235d8264cccc0c39dc", "44039a59510de5cfb055285acbf9273143acabc7", "222a8b02a0f81b485c26ba71138e3ed726877d3a", "fdbc86c139599783a8defd00989df1c39c605602", "5674293866d738cdb7fea2f14a8935fb04725f33", "5e4675c46cf333cb2e0ae191cc31db259e1c4054" ], "paperAbstract": "Triangle-free graphs play a central role in graph theory, and triangle detection (or triangle finding) as well as triangle enumeration (triangle listing) play central roles in the field of graph algorithms. In distributed computing, algorithms with sublinear round complexity for triangle finding and listing have recently been developed in the powerful CONGEST clique model, where communication is allowed between any two nodes of the network. In this paper we present the first algorithms with sublinear complexity for triangle finding and triangle listing in the standard CONGEST model, where the communication topology is the same as the topology of the network. More precisely, we give randomized algorithms for triangle finding and listing with round complexity O(n(log n)) and O(n logn), respectively, where n denotes the number of nodes of the network. We also show a lower bound \u03a9(n/ logn) on the round complexity of triangle listing, which also holds for the CONGEST clique model. \u2217Postal address: Nagoya Institute of Technology, Gokiso-cho, Showa-ku, Nagoya, Aichi, 466-8555, Japan. Tel: (+81)-52-735-5567. \u2020Postal address: Department of Communications and Computer Engineering, Graduate School of Informatics, Kyoto University, Yoshida-Honmachi, Sakyo-ku, Kyoto 606-8501, Japan. Tel: (+81)-75-753-5382.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087811", "http://arxiv.org/abs/1705.09061", "https://arxiv.org/pdf/1705.09061v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9bf8/cf95f21a9923fc1fad938868893802f36c7d.pdf", "s2Url": "https://semanticscholar.org/paper/9bf8cf95f21a9923fc1fad938868893802f36c7d", "sources": [ "DBLP" ], "title": "Triangle Finding and Listing in CONGEST Networks", "venue": "PODC", "year": 2017 }, "9c185fcd40e7ec12dfefdd98d7a2a52fb47308fc": { "authors": [ { "ids": [ "1694978" ], "name": "Cheng Wang" }, { "ids": [ "1684443" ], "name": "Bhuvan Urgaonkar" }, { "ids": [ "2741241" ], "name": "Aayush Gupta" }, { "ids": [ "1809899" ], "name": "George Kesidis" }, { "ids": [ "10375507" ], "name": "Qianlin Liang" } ], "doi": "10.1145/3064176.3064220", "doiUrl": "https://doi.org/10.1145/3064176.3064220", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Backup", "Burstable billing", "Cloud computing", "Emergence", "Experiment", "In-memory database", "Memcached", "Program optimization", "Simulation", "Single-instance storage" ], "id": "9c185fcd40e7ec12dfefdd98d7a2a52fb47308fc", "inCitations": [ "7011fa4d814667a71b7b8c4a6a59fc20a6720680", "13e388ab3495d313ae6838b26e8d34517a67e698", "0357d8655c6788bf0041b8446c038a808c209338" ], "journalName": "", "journalPages": "620-634", "journalVolume": "", "outCitations": [ "4af63ed343df388b6353b6fc77c7137d27822bf4", "8d40857945efdc98733565bcc484d90cfd353efa", "1a3263471a40f829c657ac30eaa66c5bb0675c2c", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "0c93b691888a64857612296950cf31585460a41a", "12c28dd5ea0b2d0269a67a43c2eb0b1207b2b889", "680df3638610bc5e3ddff23bc1afb0fc056039ce", "3a33424cd2ad63cc056a2d9a06b8794d78ba5214", "b31cfdf7387663d0fb0c6f30778c99dc16861096", "c1c71d0b6c0f2705e0e407f6823c928f83d67f73", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "71de39ceaaa0efecc2c84ce8fe0af8ceb5ed79e7", "0ae0d64b98498dcb80d6169f65333c979e4bd0c0", "ec5f0f8d5b7176cb2e88271ec948f935ea0346a2", "b0447d4880d2b35c25350fe0a5283afbad82c7f7", "d608a95490b02839fdf71a412aab46ad20a70596", "05be0db01d70bcce9530b462ab2368f9e15127d9", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "55b5f88ba09e4f2f53aec5418835f2a6498cd289", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "5e3da1f0bef9cfb5b517ac210542ed69aca76106", "ce8f8e86db523da990507f177c6c6df445cd8d46", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "90b75afbf77307096c536a219dd23047059ff9a9", "70e38d47b83261e257bae61dc39ffbf391b30591", "7f9d43c76bb5c077aabf1ef6ef20d35763e34291", "1bfaed6a3ae32c39f5b9a61fa528a8d23ff48801", "18d7dfa2d905fc7815784ce1f653667ab9404434", "39879b7becd8ab4662008f8da92538fe368f939c", "045943438dd45f25f0127d97ed9116b3b05914a7", "7a278ee0578f194700cadc3811cdda4ec751f88a", "5175688633b7c22fdd0b1bec4f042c30d1650a15", "6168919f450a8ed906051f2562abbfe51aa4d97d", "51098280164dcc12b1ef69632430a8a362b70452", "94859f850f345629c23526e1155aa9deb1852491", "1da8852aa591d82f6dab3d93c8aba923e69a45d4", "cbf3d5f867297f1551539d8b43ede65851b590cc", "120ade88aecba9157eb1ab7bc0464a0215c46ccc", "2316b4b4735ae39ddebce63c22644c93a855c262", "0742cebd319c73d45a72d5d0303e33472a16a64b", "27f1c44006f5b7756b1295af6884e6477df4eea6", "00c181b8b64e824fbe0172339f1e4560b557fab5", "07add9c98a979e732cfa215c901adb1975f3f43a" ], "paperAbstract": "In order to keep the costs of operating in-memory storage on the public cloud low, we devise novel ideas and enabling modeling and optimization techniques for combining conventional Amazon EC2 instances with the cheaper spot and burstable instances. Whereas a naturally appealing way of using failure-prone spot instances is to selectively store unpopular (\"cold\") content, we show that a form of \"hot-cold mixing\" across regular and spot instances might be more cost-effective. To overcome performance degradation resulting from spot instance revocations, we employ a highly available passive backup using the recently emergent burstable instances. We show how the idiosyncratic resource allocations of burstable instances make them ideal candidates for such a backup. We implement all our ideas in an EC2-based memcached prototype. Using simulations and live experiments on our prototype, we show that (i) our hot-cold mixing, informed by our modeling of spot prices, helps improve cost savings by 50-80% compared to only using regular instances, and (ii) our burstable-based backup helps reduce performance degradation during spot revocation, e.g., the 95% latency during failure recovery improves by 25% compared to a backup based on regular instances.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064220" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9c185fcd40e7ec12dfefdd98d7a2a52fb47308fc", "sources": [ "DBLP" ], "title": "Exploiting Spot and Burstable Instances for Improving the Cost-efficacy of In-Memory Caches on the Public Cloud", "venue": "EuroSys", "year": 2017 }, "9c655cc39beefb867fb9848a5eba6d2c4e9185cc": { "authors": [ { "ids": [ "4755641" ], "name": "Xiaolong Cui" }, { "ids": [ "1711470" ], "name": "Taieb Znati" }, { "ids": [ "1687807" ], "name": "Rami G. Melhem" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.71", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.71", "entities": [ "Application checkpointing", "Benchmark (computing)", "Failure rate", "Fault tolerance", "In-memory database", "Mean time between failures", "Message Passing Interface", "Run time (program lifecycle phase)", "Scalability" ], "id": "9c655cc39beefb867fb9848a5eba6d2c4e9185cc", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "547-554", "journalVolume": "", "outCitations": [ "3fa792fb63f453bd9d492f23ef7662aaaf6f7ca5", "cd69034e49d8821636c704c14fb6dfe5c78fc3ac", "a21e7d0f189f9b6065770b4591be74107cb513b2", "01f312d41dd0e24277acb788424def8c20abe08c", "580cd9345085036c200cbf0a75753653ecebfc94", "34f310dffd51a8f1585b0a6a5ccaf83094d0d663", "19601c8db1f2c750f468f1ecbfa3b258c25be472", "202aa40aa565854e431254a8478a8f3516c5ae05", "14362c62f3dda3aa0b6ebb4fdf4fcabd79665f0e", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "54b4765a71fbb131fc37b75aa40b1c4b39947bf9", "9a22ebc2bd52d67fa8b90defb1200891935acfa1", "8969d1d058f3602a449d9c5cb6eceb41068b19ec", "18fe996c6f43a8f301cd842507045b679ba3506a", "5ed5c5662441c7b3a746259751100845fcefe6ef", "d719e60e2b26a549f2daa2839f7b724264a910fc", "411dbebc11cf3360e4f9cc6cb9fe9188c0331e22", "553cf4279c6d01e1da08a8e6511efb9c7354c5d0", "abe07474d6bd99d0cb6ecf81cc90a97fd24ba5d5", "311e55ffac576c2903da694adcf5dab78304bf29", "daf0a5b16eb51ae418f18a6324970626a29dcc96", "2706db42926e0e58e35336331f6d3b62f0811cf5", "455d253c61379bce5626fba8ef9897d3ac1307dc" ], "paperAbstract": "In today's large-scale High Performance Computing (HPC) systems, an increasing portion of the computing capacity is wasted due to failures and recoveries. It is expected that exascale machines will decrease the mean time between failures to a few hours, making fault tolerance a major challenge. This work explores novel methodologies to fault tolerance that achieve forward recovery, power-awareness, and scalability. The proposed model, referred to as Rejuvenating Shadows, is able to deal with multiple types of failure and maintain consistent level of resilience. An implementation is provided for MPI, and empirically evaluated with various benchmark applications that represent a wide range of HPC workloads. The results demonstrate Rejuvenating Shadows' ability to tolerate high failure rates, and to outperform in-memory checkpointing/restart in both execution time and resource utilization.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.71" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9c655cc39beefb867fb9848a5eba6d2c4e9185cc", "sources": [ "DBLP" ], "title": "Rejuvenating Shadows: Fault Tolerance with Forward Recovery", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "9c65f2f661cfb496a32a1fed6d9e815055e80bc5": { "authors": [ { "ids": [ "8322375" ], "name": "S. Karen Khatamifard" }, { "ids": [ "3090595" ], "name": "Longfei Wang" }, { "ids": [ "3341623" ], "name": "Weize Yu" }, { "ids": [ "37962891" ], "name": "Sel\u00e7uk K\u00f6se" }, { "ids": [ "1695310" ], "name": "Ulya R. Karpuzcu" } ], "doi": "10.1145/3079856.3080250", "doiUrl": "https://doi.org/10.1145/3079856.3080250", "entities": [ "Best, worst and average case", "Electric power conversion", "Gradient", "Integrated circuit", "Performance per watt", "Voltage regulation", "Voltage regulator" ], "id": "9c65f2f661cfb496a32a1fed6d9e815055e80bc5", "inCitations": [ "05eac77f2e5723a153025c401865c973728a8d08" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "120-132", "journalVolume": "", "outCitations": [ "2aa39fe57111c11157f3b003da254252cb1bacf9", "00f04673dce8d9dd42e65ea871a59bcfdac7846d", "4c5eedaff0823e8d4c5a390562dcdef02ed19e77", "5ef621fd77140f17b4e44dce9cfa61f88d61b2ca", "91ddf6620d7231992ef5a3b29b2c93540b5259af", "167f78125336294e184773d1469f816944af7e11", "6bae18a5f9f8c71e64aa9e19a0b66353e7063661", "0a80e3dce25d865e9fdf69da4d09cc8ac3398ff4", "75a10915b6edbd901df7aafdd5249cd8fcbf6402", "6528a8e4913c727a9cd9b6fef60a9d11d405935b", "31fb2b92f92968fcd60112f86b2201e874cfba19", "a2cb02b91a2b3b1f3cbc7e073d41abbc037821f6", "514ca0a5fa05bd5862a509f42da479dc12442d93", "aff00508b54357ee5cf766401c8677773c833dba", "c529bcbcd193e704d487a1889f262030c35a8a97", "6681c5cecb6efb15f170786e04e05fc77820be50", "14b664dd04703d2333bce3cb35641f90a3a8a2ce", "f5006e4c45fbf4d489b23331764b9fbd1f0e39e4", "a2b95a2da409f929425cba5a35a1aee3e606fe44", "5ee8ca0327bb21806d375ccd6cfab89516447eb6", "4e72b218559d8cf0b0c91554bdd088f30a40c1d6", "2d309ebcee68c04c4b224503e48ed0e7e8dc5da6", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "3a486d7cdbfd346a505ab0e333ef24c436b25448", "7c4df095f298650690ac4f41cf40fba0cf08e3af", "1dd93cde3a1e475ac636307511d32a9d80866d40", "68218fc8a53cafa2f8a646927dfdc455f2b4dd26", "4ba046d77597cb99454ce0e4afa2c82ea318bd79", "7b55f57c5e071cf9f2163cc87916a68a5ac540f9", "1892b4c0e5e08fa92d7de217cdfbb7e288bb1222", "42509c08694e9836c613404ca9b4e5f5be299edd", "5a6b1dd420c151b8d307c9627960175975805b9a", "115991a75dff714531218dea0da0fac266dd685f", "352a8957005dc5519b15ed1870751ec494d66395", "5ea27946526481238eef69a5bbea285881c5a906", "ae70d1a24fc2a21d0b9b395d753ef81244d041f3", "401281cd4a69e2900393823c55d3b120d0fcbd5b" ], "paperAbstract": "Tailoring the operating voltage to fine-grain temporal changes in the power and performance needs of the workload can effectively enhance power efficiency. Therefore, power-limited computing platforms of today widely deploy integrated (i.e., on-chip) voltage regulation which enables fast fine-grain voltage control. Voltage regulators convert and distribute power from an external energy source to the processor. Unfortunately, power conversion loss is inevitable and projected integrated regulator designs are unlikely to eliminate this loss even asymptotically. Reconfigurable power delivery by selective shut-down, i.e., gating, of distributed on-chip regulators in response to spatio-temporal changes in power demand can sustain operation at the minimum conversion loss. However, even the minimum conversion loss is sizable, and as conversion loss gets dissipated as heat, on-chip regulators can easily cause thermal emergencies due to their small footprint.\n Although reconfigurable distributed on-chip power delivery is emerging as a new design paradigm to enforce sustained operation at minimum possible power conversion loss, thermal implications have been overlooked at the architectural level. This paper hence provides a thermal characterization. We introduce ThermoGater, an architectural governor for a collection of practical, thermally-aware regulator gating policies to mitigate (if not prevent) regulator-induced thermal emergencies, which also consider potential implications for voltage noise. Practical ThermoGater policies can not only sustain minimum power conversion loss throughout execution effectively, but also keep the maximum temperature (thermal gradient) across chip within 0.6°C (0.3°C) on average in comparison to thermally-optimal oracular regulator gating, while the maximum voltage noise stays within 1.0% of the best case voltage noise profile.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080250", "http://people.ece.umn.edu/~ukarpuzc/Karpuzcu_files/thermoGaterTalk.pdf", "http://altai.ece.umn.edu/Main_files/isca17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9c65f2f661cfb496a32a1fed6d9e815055e80bc5", "sources": [ "DBLP" ], "title": "ThermoGater: Thermally-aware on-chip voltage regulation", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "9c7218c8effa7691d507b08d4b222c403ce26c4a": { "authors": [ { "ids": [ "9959698" ], "name": "Jean Karim Zinzindohou\u00e9" }, { "ids": [ "1736343" ], "name": "Karthikeyan Bhargavan" }, { "ids": [ "2975058" ], "name": "Jonathan Protzenko" }, { "ids": [ "2443895" ], "name": "Benjamin Beurdouche" } ], "doi": "10.1145/3133956.3134043", "doiUrl": "https://doi.org/10.1145/3133956.3134043", "entities": [ "64-bit computing", "Algorithm", "Application programming interface", "Assembly language", "Authentication", "Clang", "CompCert", "Compiler", "Correctness (computer science)", "Cryptographic primitive", "Cryptography", "Drop-in replacement", "Encryption", "Fastest", "GNU Compiler Collection", "Hash function", "Hash-based message authentication code", "High- and low-level", "Library", "Memory safety", "Message authentication", "Microsoft CryptoAPI", "Novell Storage Services", "OpenSSL", "Poly1305", "Programming language", "SHA-2", "Transport Layer Security", "Type signature" ], "id": "9c7218c8effa7691d507b08d4b222c403ce26c4a", "inCitations": [ "47a97d0c6d0ee4313fe8a0380c857e50fe93f7d4", "38876d86e5e7851181efc9ed3bf15765c0b59bb1", "18307d7fea0fed1067a5704f9aa13c93541e0142", "9cee090f05d008d61f79eec49bc25fa1a46fcd89" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "536", "journalVolume": "2017", "outCitations": [ "04402122e2fb065ed1280000981f7626496f0afb", "57774604456cffa77fcf57087bbede72a23994b6", "d4611529e2ac02c2a58cb526a566d68ae6fe330e", "615168555150d80752a1c195229642acbe6fb3d9", "6e90354c3a6b52117b89c6c53aaa388eb282a148", "47a97d0c6d0ee4313fe8a0380c857e50fe93f7d4", "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "265f7bdcb3e6bb6d32146113e3929e2365bbd5af", "13fadf9e3fc927e9e7df14132feecc1899c68d63", "3cc86ff94309bb58b2125eea173b23ab89f26a3b", "17886b4911ffd50d7e02a574caad34a286458b3a", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "194f7d8647009dea5f4867ae27d340c84c46f51b", "81be3e82bc4ea1903b1e35375db949eca1f10cb7", "38f9d9b25c3e923c2c68be54285cee4404dbd7e9", "1daf34029b8390040c9d93d9037075fde2749994", "1fccfaca3c964e1943ac24db8ff472c78d27682b", "a4b1ebe5e56c1b19a73cd6de79ea805cb39bb4ca", "2e37af19b69f12699279e3dc754cfac681555d1f", "b3d82bcd6c8d14b9f0e6c8b2b556aae1c9ea28b5", "5af56b18071e7adf0d5b9a118e05bc893aace8e3" ], "paperAbstract": "HACL* is a verified portable C cryptographic library that implements modern cryptographic primitives such as the ChaCha20 and Salsa20 encryption algorithms, Poly1305 and HMAC message authentication, SHA-256 and SHA-512 hash functions, the Curve25519 elliptic curve, and Ed25519 signatures.\n HACL* is written in the F* programming language and then compiled to readable C code. The F* source code for each cryptographic primitive is verified for memory safety, mitigations against timing side-channels, and functional correctness with respect to a succinct high-level specification of the primitive derived from its published standard. The translation from F* to C preserves these properties and the generated C code can itself be compiled via the CompCert verified C compiler or mainstream compilers like GCC or CLANG. When compiled with GCC on 64-bit platforms, our primitives are as fast as the fastest pure C implementations in OpenSSL and libsodium, significantly faster than the reference C code in TweetNaCl, and between 1.1x-5.7x slower than the fastest hand-optimized vectorized assembly code in SUPERCOP.\n HACL* implements the NaCl cryptographic API and can be used as a drop-in replacement for NaCl libraries like libsodium and TweetNaCl. HACL* provides the cryptographic components for a new mandatory ciphersuite in TLS 1.3 and is being developed as the main cryptographic provider for the miTLS verified implementation. Primitives from HACL* are also being integrated within Mozilla's NSS cryptographic library. Our results show that writing fast, verified, and usable C cryptographic libraries is now practical.", "pdfUrls": [ "https://eprint.iacr.org/2017/536.pdf", "http://doi.acm.org/10.1145/3133956.3134043", "http://eprint.iacr.org/2017/536" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9c7218c8effa7691d507b08d4b222c403ce26c4a", "sources": [ "DBLP" ], "title": "HACL*: A Verified Modern Cryptographic Library", "venue": "CCS", "year": 2017 }, "9cf7699e39fa3aad6b60aa69822dda6d06f600d7": { "authors": [ { "ids": [ "2914356" ], "name": "Junfeng Zhou" }, { "ids": [ "1818568" ], "name": "Shijie Zhou" }, { "ids": [ "1718849" ], "name": "Jeffrey Xu Yu" }, { "ids": [ "1725477" ], "name": "Hao Wei" }, { "ids": [ "1871595" ], "name": "Ziyang Chen" }, { "ids": [ "35676781" ], "name": "Xian Tang" } ], "doi": "10.1145/3035918.3035927", "doiUrl": "https://doi.org/10.1145/3035918.3035927", "entities": [ "Algorithm", "Connected component (graph theory)", "Database", "Directed acyclic graph", "Directed graph", "Graph (discrete mathematics)", "Graph operations", "Reachability", "Strongly connected component", "Time complexity", "Transitive reduction" ], "id": "9cf7699e39fa3aad6b60aa69822dda6d06f600d7", "inCitations": [ "f077dabe280ab3ed9956a5db54f7162fa82b44ce", "0ed6e7d571fed36bd705675ce9261af440a7a7bb", "c9b7ce8aae0e9ba537847fab95b4e4511c148679", "aa8c9f7dc4695e960b61341e4ed81015ecf63248" ], "journalName": "", "journalPages": "375-390", "journalVolume": "", "outCitations": [ "0c2ff25bf68074214a960666bff2a7b0d5e29fcc", "13bda7e7a6d3ae153ff1b8c546174d2bfac6c5aa", "ea1e01aa57774c0adff45dbfe9cd5a47d8b163c4", "15ad785d44ff34ad028426c31a1e8d43b2b44ab6", "b503111efb0560aaea70e950a6928116df2862a7", "e26a832a1588b7e38212a5c1e31c6d9dfb81e713", "0420abce5921d7e80838ab746da96c1b4bc71474", "1ad8410d0ded269af4a0116d8b38842a7549f0ae", "480dabdf58171f02358fbcc39a011680f6ba91c3", "3030fa2aecda339d593b86a260bfab9988b42df7", "c17014959370282e7c3efc96d9d831c1663c919f", "95eb1efa27358aa89dda77545758bc58b53ede4d", "229c3cb87d18538315b18c5ad6796ac367adf45a", "385742fffcf113656f0d3cf6c06ef95cb8439dc6", "24ec656fa6eb25b7ddaba66697dd660a0ddc283b", "d3a7853dabbd406bb803fbec7de7292e5e65947b", "29b3fdcdda43e08534d6cae560b08dd2fbe88427", "b9c5678100693e00b59e58f3368f4797b9f11e77", "3615c522a69d613491a780a47571a84c476c3712", "26a2b94a118334585f5d717b24ef06b6f9014ba8", "d153c2177e8058f82a45c492ddc6f60ea5d58ab6", "2fbeba55935a0fa9597d1ef4cc1cbdac7e41e39e", "d0be1e20643e7e15bd4669f1c3ef0c2287852566", "421241a61dc5332237c84116422043c3503f6577", "81a2883e9a0396b2eda737d048b85281766f58af", "c2070e3e1fce3c420d4e3c2d310fb63f5aa7ccb6", "6384234e698f793cfba0cbf890b1c2a2209d06b7", "737e43bd36ac3cc785915fa2930997976137ef35", "d59baf77e392539565c286de8a716f624bf3336f", "a3d5e5d2fae76af242e40e35989237a8c3e6385f" ], "paperAbstract": "Answering reachability queries is one of the fundamental graph operations. The existing approaches build indexes and answer reachability queries on a directed acyclic graph (DAG) G, which is constructed by coalescing each strongly connected component of the given directed graph G into a node of G. Considering that G can still be large to be processed efficiently, there are studies to further reduce G to a smaller graph. However, these approaches suffer from either inefficiency in answering reachability queries, or cannot scale to large graphs.\n In this paper, we study DAG reduction to accelerate reachability query processing, which reduces the size of G by computing transitive reduction (TR) followed by computing equivalence reduction (ER). For ER, we propose a divide-and-conquer algorithm, namely linear-ER. Given the result Gt of TR, linear-ER gets a smaller DAG Gε in linear time based on equivalence relationship between nodes in G. Our DAG reduction approaches (TR and ER) significantly improve the cost of time and space, and can be scaled to large graphs. We confirm the efficiency of our approaches by extensive experimental studies for TR, ER, and reachability query processing using 20 real datasets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035927" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9cf7699e39fa3aad6b60aa69822dda6d06f600d7", "sources": [ "DBLP" ], "title": "DAG Reduction: Fast Answering Reachability Queries", "venue": "SIGMOD Conference", "year": 2017 }, "9cff27c2c79e0532174e9b2b145cf6cffe10b0e8": { "authors": [ { "ids": [ "1695071" ], "name": "Bo Wan" }, { "ids": [ "4523297" ], "name": "Haizhao Luo" }, { "ids": [ "9368082" ], "name": "Kaiqi Zhou" }, { "ids": [ "6916241" ], "name": "Xi Li" }, { "ids": [ "1722340" ], "name": "Chao Wang" }, { "ids": [ "2798613" ], "name": "Xianglan Chen" }, { "ids": [ "8453780" ], "name": "Xuehai Zhou" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.75", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.75", "entities": [ "Autopilot", "Best, worst and average case", "Performance", "Programming paradigm", "Real-time computing", "Real-time operating system", "Responsiveness" ], "id": "9cff27c2c79e0532174e9b2b145cf6cffe10b0e8", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "578-585", "journalVolume": "", "outCitations": [ "19c9f71ebe33e56ee7c477e2768108496ba4fa56", "458c4abd999e93a16beb09b8b3b2ca739d96df4b", "667c394fac1ba48cbe2837c736378258b9d34c1a", "c13c18d3e9b16b47577a6d79b0c7ab5a73594c8e", "1716c4b9a49c151a950c0d87d6a186ba3974c229", "be030ea2a62ba210a0f8323f6643a07efb4c4078", "3689419decf85a3d1a5f4ea172df4a56464b4931", "a29880ebaef563b7f2ac56888303e3b6d2f58b02", "00ee88a13c13d2477ce6ce042fa252cca626692c", "8172187e24fda22359156eba222570d17f974bac", "6c3a903f77d965d6306156ab1c619d5780c4f88c", "125c6eefb1b21b4a15d0efe133b4168a4194308c", "3397a2047ba1481d100868743ae71f023a7f0cc3", "67bb7f4f5789cf7dd90d8daca90829f2e444a66f", "3a33dad8e9d12835fca95deec73e841096c8bec0" ], "paperAbstract": "Real-time systems need reliable guarantees for the satisfaction of their timing constraints. However, novel speed-up hardware architectures and software mechanism, which target improving average-case performances, ignore and sometimes worsen the ability to obtain guarantees. An alternative approach is the Logical Execution Time (LET) model, but there are some deficiencies in existing LET-based development tools. In this paper, we propose a novel LET-based time-aware programming framework called TipFrame. The framework introduces Servants to improve the responsiveness of LET-based periodic tasks further. The runtime makes behaviors in the system level consistent with the semantics of LET model for predictability. TipFrame implements in C language providing time-aware programming interfaces called TipFrame-C. The programming paradigm of TipFrame-C is described using an autopilot avionic control system. Evaluation results demonstrate that our approach is effective and efficient to construct LET-based real-time systems.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.75" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9cff27c2c79e0532174e9b2b145cf6cffe10b0e8", "sources": [ "DBLP" ], "title": "A Time-Aware Programming Framework for Constructing Predictable Real-Time Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "9d6ef820108deea58af84ef1385fa50f1687bfcb": { "authors": [ { "ids": [ "1749477" ], "name": "Pengjie Ren" }, { "ids": [ "1721165" ], "name": "Zhumin Chen" }, { "ids": [ "2780667" ], "name": "Zhaochun Ren" }, { "ids": [ "2517592" ], "name": "Furu Wei" }, { "ids": [ "35729719" ], "name": "Jun Ma" }, { "ids": [ "1696030" ], "name": "Maarten de Rijke" } ], "doi": "10.1145/3077136.3080792", "doiUrl": "https://doi.org/10.1145/3077136.3080792", "entities": [ "Artificial neural network", "Benchmark (computing)", "Connection pool", "Convolutional neural network", "Experiment", "Feature vector", "Network model", "Recurrent neural network", "The Sentence" ], "id": "9d6ef820108deea58af84ef1385fa50f1687bfcb", "inCitations": [ "a483c62f661f57a883c06daa6ac25b7f80cad661", "a38ded8b5b29f3e3d72e55b4cc97b681f0768fd0", "fd9be0b084c88ea094faf742af8d61e284deb604" ], "journalName": "", "journalPages": "95-104", "journalVolume": "", "outCitations": [ "0678c73ff855c93f238538735fcecc1ee6bc8379", "229b9e80568169ac90357428ed3cfadd3513c823", "0e246be7a97a76cf94c93a9ee9835a5f189da8db", "7a67159fc7bc76d0b37930b55005a69b51241635", "11f3474414d9c3fe3d9f85777b3292a5b31dd6f4", "1bad3e9f15df77f06ae449bba17f9e85a3bb9187", "0c838bbfce01f1b274e4ead67f8b62f2f3f1eb84", "c694b3fe19f691f2c60fff953025f7f320f0aa80", "eef4b9c83bc543991777081ad166e2cbe6017ebf", "2f33457e932b83a9b9bf7b8c36b0fa362e731b29", "05aba481e8a221df5d8775a3bb749001e7f2525e", "d6e4b66add3723afe58cd79eee03fc4d26fa33a8", "eec3debe5d352f6e83731388b94319c4aa23cd0d", "747112c833fbd5e60825d1ed005230808dc5da33", "047d078f6cfed4cc067643b5e15f4e8f65a97c0b", "fdc279eddcd4ebdaa42a46bfa20a6aba8d32e597", "34f25a8704614163c4095b3ee2fc969b60de4698", "0f6d8e5d9b243eaf1d3968d8cd272db64e536beb", "0528d266f13933f5ea09de200f4a401f6ab9020c", "04fa53cfb61af83163258b5c4cbe0883a3fb43e2", "2c56d3a6e0c5260614ebd8bc843a29e18c4afbbc", "22e944359a59de287e3cc4de41f917d06ac3b00d", "27e4b65121d3c88643d86dc91a9bdafdf223b988", "586efcb57828d1d68c3d75a5598fa902d3efcea1", "40e42907bb2f0a0a1a7aa7101ce433ac4fae3346", "5a0dd4bd20b1518bc2ee32e319615b2d86682838", "fe6685dac1680be4aaa5a3c42f12127fd492a78e", "44fca068eecce2203d111213e3691647914a3945", "7b95d389bc6affe6a127d53b04bcfd68138f1a1a", "88dfb8867ed7c2aba461d7d87be54fbd63688d95", "8e2cd5369db9242574740e0d2739c755f8f61c92", "b9e43395663f74c581982e9ca97a0d7057a0008c", "f12192424dfd1a2df308346520556a1a7ed6094c", "71dff5e1548e1e2f1aeee46d626454216364c3a5", "d66d960f7344a2b6ec07e6de3a80fcaca3e48662", "84cfd498d393f4a1ea4115e56c2a986c90423c83", "363c448602d0eb666e34fa06576e1ed9614fd1fc", "4d36e239f21ae8d1553e38a050e465347e73d8f8", "79300b0ab31523ca4518a3a3b8623e2b0b53d79b", "014ff1eac046518a57ba2f1a75b1aa50579cc594", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "255e97d82f528b613dbe8883727abfd14f3f9f39", "3981b1c4fe4edba811bcbfcf743af82c768d3753", "b217788dd6d274ad391ee950e6f6a34033bd2fc7", "0ce17d4d41b1291e942ec3bf6c102f3ae3034b3d", "775ec4c7ef4635c194cdef8dee3241b3c7cccde7", "e374ffb49d8492ece0f54697499a7c78e2201f70", "ed2e60f027b03bec109d5cd047c9178b8f8259e1", "0af737eae02032e66e035dfed7f853ccb095d6f5", "4dbf34d4604935af29eeca2d5a5ea4f17df0e379", "4e8e560328f8d7358535913d56b35f0ac89606be" ], "paperAbstract": "As a framework for extractive summarization, sentence regression has achieved state-of-the-art performance in several widely-used practical systems. The most challenging task within the sentence regression framework is to identify discriminative features to encode a sentence into a feature vector. So far, sentence regression approaches have neglected to use features that capture contextual relations among sentences.\n We propose a neural network model, Contextual Relation-based Summarization (CRSum), to take advantage of contextual relations among sentences so as to improve the performance of sentence regression. Specifically, we first use sentence relations with a word-level attentive pooling convolutional neural network to construct sentence representations. Then, we use contextual relations with a sentence-level attentive pooling recurrent neural network to construct context representations. Finally, CRSum automatically learns useful contextual features by jointly learning representations of sentences and similarity scores between a sentence and sentences in its context. Using a two-level attention mechanism, CRSum is able to pay attention to important content, i.e., words and sentences, in the surrounding context of a given sentence.\n We carry out extensive experiments on six benchmark datasets. CRSum alone can achieve comparable performance with state-of-the-art approaches; when combined with a few basic surface features, it significantly outperforms the state-of-the-art in terms of multiple ROUGE metrics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080792", "https://staff.fnwi.uva.nl/m.derijke/wp-content/papercite-data/pdf/ren-leveraging-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9d6ef820108deea58af84ef1385fa50f1687bfcb", "sources": [ "DBLP" ], "title": "Leveraging Contextual Sentence Relations for Extractive Summarization Using a Neural Attention Model", "venue": "SIGIR", "year": 2017 }, "9d84066054cb0d2faf71c59e83a60fb7194a2b1c": { "authors": [ { "ids": [ "2706342" ], "name": "Pengfei Xuan" }, { "ids": [ "40520239" ], "name": "Feng Luo" }, { "ids": [ "38342948" ], "name": "Rong Ge" }, { "ids": [ "1713512" ], "name": "Pradip K. Srimani" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Flink", "Apache Spark", "Computer data storage", "Data-intensive computing", "Dynamic random-access memory", "HPCC", "In-memory database", "In-memory processing", "Jumpstart Our Business Startups Act", "Memory management", "Throughput" ], "id": "9d84066054cb0d2faf71c59e83a60fb7194a2b1c", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "549-558", "journalVolume": "", "outCitations": [ "5653a0e51c48e03be15182a0f00fdb387702f10d", "c6dd5763ce70176c6850a32affa273660b73b887", "1ccc78a4ccbebf07d464fa2b0d7d7fb50f76d9b3", "53a886ccfb6b2b64ad42d79c110853ef78c344c1", "15869be2a649ac2c82a9e3f1fb57317d21b95fcc", "0558c94a094158ecd64f0d5014d3d9668054fb97", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "96cfd8eb3d72fa464d222eca5f65aa7e8995ac37", "62a0dc7c9c45a5a0a016386100f35537bfe1707d", "3aa1bc5f67254b4e2d86170b70adfacf937008f6", "d81c5a462f5b2fd04897453333b6a3cb660cc47d", "9c378565a0b510890b474df039caab1f2d58bded", "3ea19afaddac7778540ea86f0609a99cbc1d18d8", "9f948448e7a5f0cc94cd53656410face8b31b18a", "0ad3358ffc0d5e44311160767cc0fb65ccd25b00", "00fddb9852f5ebbb195d4f7fc2339ee29de78541", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "da30f9be5550ba3f0c96eba6a2ad7de28f2efeb2", "334782999a2ea3cf4616d8c5df80f6b98fc2df2b", "067c7857753e21e7317b556c86e30be60aa7cac0", "8dae2c51620cc8980726a466605d2deac52c144b", "83b25db931334a9b649e100eb19270a381fad85b", "267ed9139427b2d30b17cfc91880b04fb910983a", "208843384ddd6c85cd867c1d38a3731ef8aa0bcd", "0fa21a92b4f55fb235a803d7e203ecee7face136", "1bc50f26cbb045936612be9afb0f48c6d7bad913" ], "paperAbstract": "In order to boost the performance of data-intensive computing on HPC systems, in-memory computing frameworks, such as Apache Spark and Flink, use local DRAM for data storage. Optimizing the memory allocation to data storage is critical to delivering performance to traditional HPC compute jobs and throughput to data-intensive applications sharing the HPC resources. Current practices that statically configure in-memory storage may leave inadequate space for compute jobs or miss the opportunity to utilize available space for data-intensive applications. In this paper, we explore techniques to dynamically adjust in-memory storage allocation and provide optimum memory to compute jobs. We have developed a dynamic in-memory storage controller, DynIMS, which monitors memory demands of compute tasks in real time and employs a feedback-based control mechanism to adapt the allocation of in-memory storage. We test DynIMS using HPCC and Spark workloads on a HPC cluster. Experimental results show that DynIMS can achieve up to 5X performance improvement compared to systems with static memory allocations.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101187" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9d84066054cb0d2faf71c59e83a60fb7194a2b1c", "sources": [ "DBLP" ], "title": "Dynamic Management of In-Memory Storage for Efficiently Integrating Compute-and Data-Intensive Computing on HPC Systems", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "9ddf07a9a3ad68788ed9ff67d5a754d10f3f7cb5": { "authors": [ { "ids": [ "3543872" ], "name": "Zhenhua Li" }, { "ids": [ "40560512" ], "name": "Weiwei Wang" }, { "ids": [ "35497150" ], "name": "Christo Wilson" }, { "ids": [ "20276147" ], "name": "Jian Chen" }, { "ids": [ "35150378" ], "name": "Chen Qian" }, { "ids": [ "8645024" ], "name": "Taeho Jung" }, { "ids": [ "1718343" ], "name": "Lan Zhang" }, { "ids": [ "1918582" ], "name": "Kebin Liu" }, { "ids": [ "31637221" ], "name": "Xiangyang Li" }, { "ids": [ "10258874" ], "name": "Yunhao Liu" } ], "doi": "", "doiUrl": "", "entities": [ "Cellular organizational structure", "Communications protocol", "Crowdsourcing", "Function-Behaviour-Structure ontology", "Software deployment", "Spamming" ], "id": "9ddf07a9a3ad68788ed9ff67d5a754d10f3f7cb5", "inCitations": [ "2ab7da2ac8da95a24a688ec7a90df3908ae93a09", "39575aa9f6131aebbbd93fcd9a51e7d972529674", "258ac89a3f607c4880a25d81c403b8a3dd199e31", "4e6ee960798304341e1c2e10e5915cb3b9aa3a0b", "948e78ac4630fe7225394ca8a19f8db5d0f191bc" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "75528f7fe90b380b9b883aa707bb7a248b0fe6f0", "0acf1a74e6ed8c323192d2b0424849820fe88715", "19a45a3d02a259447911af6130e5f151480c04d5", "8e1f883e3c72739efa6202a5f1ba55106c01e49d", "2c55d843ae5603ef3d5bbdffb3980e2d388b04a8", "24c9b0b05c5e957e255b854f947472f9181772a4", "a4d4e91f5315ad524e415baffddbe6483123951f", "13e00ac021b75f895bfc963f5ecac42ed3dae86c", "824d52696145cac75223b67abf4f17d85bd79310", "2cb251202b00311eb246bfa974cf73c6bc7f1cd4", "23f33836c4b24252e4fa30bef6d34ba4a021872e", "266dbc45f3567e8db16ff28828a5a67629bb74cc", "33fad977a6b317cfd6ecd43d978687e0df8a7338", "84cff988bc784ee1a397fe661a6936b4bc735cae", "0d965a1e16c4bcef6a709ae909d91488d348fa20", "4781b899447abc3439eb785281aa754126f1d818", "6f5d94254c255a3296c1043549c7bc96e6373540", "c246602943b168dfde115bed8fcd55be3f1de6cf", "13a375a84a6c414b85477a401541d3e28db1e11a", "22e949f45669eac8c80aa98be3e6b6177b41fbc4" ], "paperAbstract": "Base stations constitute the basic infrastructure of today\u2019s cellular networks. Unfortunately, vulnerabilities in the GSM (2G) network protocol enable the creation of fake base stations (FBSes) that are not authorized by network operators. Criminal gangs are using FBSes to directly attack users by sending spam and fraud SMS messages, even if the users have access to 3G/4G networks. In this paper, we present the design, deployment, and evolution of an FBS detection system called FBS-Radar, based on crowdsourced data of nearly 100M users. In particular, we evaluate five different metrics for identifying FBSes in the wild, and find that FBSes can be precisely identified without sacrificing user privacy. Additionally, we present a novel method for accurately geolocating FBSes while incurring negligible impact on end-user devices. Our system protects users from millions of spam and fraud SMS messages per day, and has helped the authorities arrest hundreds of FBS operators.", "pdfUrls": [ "http://www.ccs.neu.edu/home/cbw/static/pdf/li-ndss17.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_08-3_Li_paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/fbs-radar-uncovering-fake-base-stations-scale-wild/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e512/89ebcb855a4d3eb80f9d786cde11fd0c98f2.pdf", "s2Url": "https://semanticscholar.org/paper/9ddf07a9a3ad68788ed9ff67d5a754d10f3f7cb5", "sources": [ "DBLP" ], "title": "FBS-Radar: Uncovering Fake Base Stations at Scale in the Wild", "venue": "NDSS", "year": 2017 }, "9e070fa0b4d2eb0732415c5e4f8f6536b01af548": { "authors": [ { "ids": [ "40617487" ], "name": "Kamala Ramasubramanian" }, { "ids": [ "36006864" ], "name": "Kathryn Dahlgren" }, { "ids": [ "10164386" ], "name": "Asha Karim" }, { "ids": [ "24485994" ], "name": "Sanjana Maiya" }, { "ids": [ "40072467" ], "name": "Sarah Borland" }, { "ids": [ "2913364" ], "name": "Boaz Leskes" }, { "ids": [ "3064226" ], "name": "Peter Alvaro" } ], "doi": "", "doiUrl": "", "entities": [ "Communications protocol", "Correctness (computer science)", "Distributed computing", "Fault injection", "Fault tolerance", "Lazy evaluation", "Programmer", "Regression testing", "Replication (computing)", "Software quality" ], "id": "9e070fa0b4d2eb0732415c5e4f8f6536b01af548", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "51dae50f2cf78af88e8d064671b91e6e059ca7a6", "cd8d45ad0f78577596779b3efc6dfd7673a7e369", "3e784381615e6dbe5b9ef0885212252382f523a4", "2c7bfc8d75dab44aeab34b1bf5243b192112f502", "58a78af40f40dd248b5f3deb038d3d2b32788b57", "4d05c293c4e31978fd8726b83899237a54f0771d", "513bd1e5ec39f711f212d2105af3ee03dea4b53d", "16077ad3c95a5e8f100f0b2ae7bd8f18329e917f", "239ffe260e5a13c43d7131200a891194e94ff767", "39cd87e8d9b0a4f9f0ad84ecfe1323f9372cdb4b", "00e3756119a91432622f6982b59ecd24a1340fbe", "003d5a65de0ac72daaf105ded903cb3eb88585b3", "1d204d774c134dd7df97a6a83e12387efd0c7a01", "1c4e5204fb912760aadb939b25908d4075d0204d", "0baef8105721efa00de1c34875550bf2c99dec20", "65776abca3a9ead1ea72702e5b2f3334c48b884d", "ca20d2b54b1a786e21db6925cd24ce844d742ebe", "382501ee3b61ba6ac7ea9c7662a5a5015ea86b14", "1b4f194321f8f3219306a040c0d6d8c70dca1dcb", "36222f8eb2ccf21ca345e15186cea64506581543", "8dbc653d8194c257dfa198b427523191b3865464", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "f26d1263d92e5036e8b2e6b9eb7986da69b20170", "0368d2445d3ee4205ee73da933cb8b810a89091c" ], "paperAbstract": "Verification is often regarded as a one-time procedure undertaken after a protocol is specified but before it is implemented. However, in practice, protocols continually evolve with the addition of new capabilities and performance optimizations. Existing verification tools are illsuited to \u201ctracking\u201d protocol evolution and programmers are too busy (or too lazy?) to simultaneously co-evolve specifications manually. This means that the correctness guarantees determined at verification time can erode as protocols evolve. Existing software quality techniques such as regression testing and root cause analysis, which naturally support system evolution, are poorly suited to reasoning about fault tolerance properties of a distributed system because these properties require a search of the execution schedule rather than merely replaying inputs. This paper advocates that our community should explore the intersection of testing and verification to better ensure quality for distributed software and presents our experience evolving a data replication protocol at Elastic using a novel bug-finding technology called Lineage Driven Fault Injection (LDFI) as evidence.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/ramasubramanian", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-ramasubramanian.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9e07/0fa0b4d2eb0732415c5e4f8f6536b01af548.pdf", "s2Url": "https://semanticscholar.org/paper/9e070fa0b4d2eb0732415c5e4f8f6536b01af548", "sources": [ "DBLP" ], "title": "Growing a protocol", "venue": "HotCloud", "year": 2017 }, "9e1d75ff8ada7ba76b640359590acca7ec762b5d": { "authors": [ { "ids": [ "3278547" ], "name": "Young Wn Song" }, { "ids": [ "1721119" ], "name": "Yann-Hang Lee" } ], "doi": "10.1109/IPDPS.2017.87", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.87", "entities": [ "Address space", "Algorithm", "Correctness (computer science)", "Data access", "FastTrack", "Intrusion detection system", "Line Printer Daemon protocol", "Locality of reference", "Lock (computer science)", "Multi-core processor", "Multiprocessing", "Principle of locality", "Race condition", "Symmetric multiprocessing", "Thread (computing)" ], "id": "9e1d75ff8ada7ba76b640359590acca7ec762b5d", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "387-396", "journalVolume": "", "outCitations": [ "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "54f3331b575b2d451c2d716f86496cada23d596d", "0653e2ed9f683868cb4539eb8718551242834f6b", "059697e0824d06a43321a9f9d7450da9cc4dc0a8", "a45adba59080ad625e3005c669345c3a96ad3e18", "0e578433d4e8bb2a571c87a2d22816074902f009", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "8c2b2fb1d4c44d1e1b63be4e5ef3bbb8d37dbfb5", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "d3630df8179b704a516b6ed01cfcad24ae50ade7", "8b28b02af1ba77fff5b08d6dea87ba8b043b479d", "711b89b078ceb7722406c719a6ac1316ade61daf", "5bc7a761cb77abe5aa964191d501385198b7f79d", "7905c7e24bbd5c987dca90dc690e8a11ed4d122f", "0958a63d9c6238b38377f076b487c413bc8642c1", "131672edb4fa10458d1b5d9d047dde18f33d997f", "05a618847e4f08e5bca29dff732757779722b2e0", "771e3c7146213802ca8c4db0afbde51606293a71", "8b662bf3287f779f9f5d367e7b34d2fe1e3efde2", "968f8a1d37e7ae479c2534a29d0d9d9225134605", "86ed165adcfd254b511ff1bbb912cad65d45f0d6", "60a45695845e3f1e5dd8d7a886b23fff89c295bc", "44808fd8f2ffd19bb266708b8de835c28f5b8596", "5b89866789a58f374a4ae83c555dd20e67e80ac5" ], "paperAbstract": "Detecting data races in multithreaded programs is critical to ensure the correctness of the programs. To discover data races precisely without false alarms, dynamic detection approaches are often applied. However, the overhead of the existing dynamic detection approaches, even with recent innovations, is still substantially high. In this paper, we present a simple but efficient approach to parallelize data race detection in multicore SMP (Symmetric Multiprocessing) machines. In our approach, data access information needed for dynamic detection is collected at application threads and passed to de-tection threads. The access information is distributed in a way that the operation performed by each detection thread is inde-pendent of that of other detection threads. As a consequence, the overhead caused by locking operations in data race detection can be alleviated and multiple cores can be fully utilized to speed up and scale up the detection. Furthermore, each detection thread deals with only its own assigned memory access region rather than the whole address space. The executions of detection threads can exploit the spatial locality of accesses leading to an improved cache performance. We have applied our parallel approach on the FastTrack algorithm and demon-strated the validity of our approach on an Intel Xeon machine. Our experimental results show that the parallel FastTrack detector, on average, runs 2.2 times faster than the original FastTrack detector on the 8 core machine.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.87" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9e1d75ff8ada7ba76b640359590acca7ec762b5d", "sources": [ "DBLP" ], "title": "A Parallel FastTrack Data Race Detector on Multi-core Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "9e86e9a332be800d7420aa0a394cd1b348d93e48": { "authors": [ { "ids": [ "1778504" ], "name": "Oana Balmau" }, { "ids": [ "1727558" ], "name": "Rachid Guerraoui" }, { "ids": [ "1989618" ], "name": "Vasileios Trigonakis" }, { "ids": [ "1680380" ], "name": "Igor Zablotchi" } ], "doi": "10.1145/3064176.3064193", "doiUrl": "https://doi.org/10.1145/3064176.3064193", "entities": [ "Algorithm", "Application programming interface", "Attribute\u2013value pair", "Component-based software engineering", "Data buffer", "Data store", "Data structure", "In-memory database", "Key-value database", "LevelDB", "Log-structured file system", "Multi-core processor", "Thread (computing)", "Throughput" ], "id": "9e86e9a332be800d7420aa0a394cd1b348d93e48", "inCitations": [ "a6ca37aeeef5911e4f36b904088479bea999cc81", "8cfa25e85c2c6c9305f696819d764ed5490f3faf", "e5c8095c2a87b474cb76e0d1d7dc9a8a37f3a80e", "8542fdcb42804a31fedb86874e3c75cd03830d4d" ], "journalName": "", "journalPages": "80-94", "journalVolume": "", "outCitations": [ "d9956fa98c56db50b6f3d84a9837fecc252c3cad", "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "03416be8097852a54dd3e309434e5a0806824646", "3a8c90ab13adb55e3610a020c69f03d72dfae274", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "f4147b82166813bbe5dc01e9486664c273d1556c", "1594118f2696b573f08510cf837f3b37db87face", "b4087345c63a7b2412eeb31066b5e4bceadbbcb2", "911c224cc262ab25c2726aeb4774ca40ca52223a", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "206b20f225fc655dfac733b6f0bd8077ed86215e", "0334a7d886ba288f88427a48f7a27997b44675e7", "56cd3ff6758b55fd95df82cf877934167589b281", "04f020a4ab2134db6f9e98eadf216d94d440414a", "a30de973f68640b5032d07e2ee3ee80f03d292c5", "42142c121b2dbe48d55e81c2ce198a5639645030", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "e7ab23d011e5183db78cfea48e303210f6e57e2e", "28fb425b2d2cc3287628f9f6e8b31b7665ba47a7", "21e51da40ab080ca2b71ad36094e2b686008b6cc", "30df50d77ef9478a2848626dfe3bf65f3c991991", "31181e73befea410e25de462eccd0e74ba8fea0b", "18a5f443299784479e78d9e77f175af57cb2fa2b", "1cb0679ae82be093268747da0f634281ea6a41df" ], "paperAbstract": "Log-structured merge (LSM) data stores enable to store and process large volumes of data while maintaining good performance. They mitigate the I/O bottleneck by absorbing updates in a memory layer and transferring them to the disk layer in sequential batches. Yet, the LSM architecture fundamentally requires elements to be in sorted order. As the amount of data in memory grows, maintaining this sorted order becomes increasingly costly. Contrary to intuition, existing LSM systems could actually lose throughput with larger memory components.\n In this paper, we introduce FloDB, an LSM memory component architecture which allows throughput to scale on modern multicore machines with ample memory sizes. The main idea underlying FloDB is essentially to bootstrap the traditional LSM architecture by adding a small in-memory buffer layer on top of the memory component. This buffer offers low-latency operations, masking the write latency of the sorted memory component. Integrating this buffer in the classic LSM memory component to obtain FloDB is not trivial and requires revisiting the algorithms of the user-facing LSM operations (search, update, scan). FloDB's two layers can be implemented with state-of-the-art, highly-concurrent data structures. This way, as we show in the paper, FloDB eliminates significant synchronization bottlenecks in classic LSM designs, while offering a rich LSM API.\n We implement FloDB as an extension of LevelDB, Google's popular LSM key-value store. We compare FloDB's performance to that of state-of-the-art LSMs. In short, FloDB's performance is up to one order of magnitude higher than that of the next best-performing competitor in a wide range of multi-threaded workloads.", "pdfUrls": [ "https://infoscience.epfl.ch/record/227333/files/flodb-infoscience.pdf", "http://doi.acm.org/10.1145/3064176.3064193", "https://infoscience.epfl.ch/record/227333/files/flodb-eurosys-presentation-conference.compressed.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9e86e9a332be800d7420aa0a394cd1b348d93e48", "sources": [ "DBLP" ], "title": "FloDB: Unlocking Memory in Persistent Key-Value Stores", "venue": "EuroSys", "year": 2017 }, "9ebbffde784b5aef699ceebe57b04f491ac0cdc2": { "authors": [ { "ids": [ "2965136" ], "name": "Sebastian Werner" }, { "ids": [ "2664393" ], "name": "Javier Navaridas" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1109/HPCA.2017.23", "doiUrl": "https://doi.org/10.1109/HPCA.2017.23", "entities": [ "Algorithm", "Low-power broadcasting", "Mathematical optimization", "Microarchitecture", "Network on a chip", "Network switch", "Performance per watt", "Router (computing)", "Routing", "Serialization", "Synthetic data", "Throughput" ], "id": "9ebbffde784b5aef699ceebe57b04f491ac0cdc2", "inCitations": [ "61b641aa64042043ea14a5fd3f792660402ddd55", "c74b2e2e6ed90e1034559aee12fc3b62451e8b68", "7c948a5548f077b66f6a162210aa616e5e5a4a18", "c686c5c0746938b32efcc28bcaa2054e8e65d9e8", "b65ae1d2f8d2cfe058fdc2dbbf72052fe1d95bfe" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "265-276", "journalVolume": "", "outCitations": [ "86eb50849e24c989c793b2e65ecbb16b51d44b78", "4375a7de96101ba4dbc95f08774fd18d3f4f3e9d", "14f4d1bfc69d144cbf0a4cfc7c708fa95b6fbb8b", "c4a90335f644e4123d065c37f2197ae80992c06e", "aac5360e08fe19ee26ff21f2d4e80cba8180cdc9", "051c67273bb170429a88bc995050a8521ee1bae2", "48b96c5e3181f6bb114d078a840015522b40cb69", "c3b64af1a00766a27cab874c35b9eec33eb791c3", "1843d5dd795a470fd990ed680b582ac27373660b", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "10289d16e1336fe57b3b52137af4678c229eef54", "0709328c35748941bae7074dc0ef010cc3d198d4", "3f4d5667e04958569dff72966a93450decfd256f", "06bab0a18bad4b8532d9fbb85f0e55e4594e505b", "7ec2901a65ab52450f7e3321a8e415878e438f3f", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "29a0fbe270445e8b0cb6b18d7a0c05bae656b4e2", "b33c35f64c2f24e024c8e03beac90f47eb2b351e", "f76d00da3a825dcd78c261c5bde2dc13eb9aeb23", "3966af775134902da24da6ab125fba7c2f2429bf", "e941a27d1c8d066a6d2e59e421bdb6c9231debe1", "0c2aedcde307e0c08966e6c1d15f754f2e3113b6", "6589dc0b5dd9484d9a206990c82d56928994c7bc", "0108a3544506cc114214b2e30cb3284d2ff8d035", "438a428c7fc8800d96cad3dca78c8e5d6af9e146", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "06e62419617f17bd0e63623472f529eb8702eb85", "06525545eb94e24cdcbca739597023c9996d228a", "41789d53d8b9b8d956815890f6b0a528fe9a6ab1", "27732e2d5bc50654050bfcdd32f8ad3d9ba5d115", "0cfca98cc8e16534668dc8e18f962dd8013ff661", "9221b56812fc2b81621ca040b48b6e6d8ae6f23b", "08c0c5bdc1bc0c4e3fb0067133ef3f6ae59df197", "632da165cc701848bfb34bfccaeea6cfa2ee94d7", "da25c9cf27b5b3726f8a00437f08f33c37ca350c", "1e35c585eb86fe12574016e6312ba1e5af974f5f", "59239b1dc602ee2a3a0dbf7d32aecb8fe5187594", "1f854aba5723b0a9e200b72a9c3a41024d52eaec", "6b1ea96e7b1ea82c217b1d348c7231deb104e34a", "83000c6227326bbaae66308e89414b219719fff9", "29ba9706af6a076d786e336f5aebf169aba46bcb", "a3edc4f28c3d0bb89e99c26082147656a809bcf4", "f53494818b4ba2e6f5ba9e8fbb1ebdd490db17f0", "932d5c303dfd32a012000cdd976b2c25f4fbf0ca", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "dbc627f52c9dc69a56a7b89ec020634037d4edd8" ], "paperAbstract": "Optical on-chip communication is considered a promising candidate to overcome latency and energy bottlenecks of electrical interconnects. Although recently proposed hybrid Networks-on-chip (NoCs), which implement both electrical and optical links, improve power efficiency, they often fail to combine these two interconnect technologies efficiently and suffer from considerable laser power overheads caused by high-bandwidth optical links. We argue that these overheads can be avoided by inserting a higher quantity of low-bandwidth optical links in a topology, as this yields lower optical loss and in turn laser power. Moreover, when optimally combined with electrical links for short distances, this can be done without trading off latency. We present the effectiveness of this concept with Lego, our hybrid, mesh-based NoC that provides high power efficiency by utilizing electrical links for local traffic, and low-bandwidth optical links for long distances. Electrical links are placed systematically to outweigh the serialization delay introduced by the optical links, simplify router microarchitecture, and allow to save optical resources. Our routing algorithm always chooses the link that offers the lowest latency and energy. Compared to state-of-the-art proposals, Lego increases throughput-per-watt by at least 40%, and lowers latency by 35% on average for synthetic traffic. On SPLASH-2/PARSEC workloads, Lego improves power efficiency by at least 37% (up to 3.5x).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9ebbffde784b5aef699ceebe57b04f491ac0cdc2", "sources": [ "DBLP" ], "title": "Designing Low-Power, Low-Latency Networks-on-Chip by Optimally Combining Electrical and Optical Links", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "9ee5ba7057e1d04311a4add0123217dd98a4b93f": { "authors": [ { "ids": [ "1772752" ], "name": "Mohsen Ghaffari" }, { "ids": [ "1738955" ], "name": "Fabian Kuhn" }, { "ids": [ "39262594" ], "name": "Hsin-Hao Su" } ], "doi": "10.1145/3087801.3087827", "doiUrl": "https://doi.org/10.1145/3087801.3087827", "entities": [ "Algorithm", "Best, worst and average case", "Distributed algorithm", "Minimum spanning tree", "Polynomial", "Random graph", "Randomized algorithm", "Routing", "Span and div", "Spanning tree", "Time complexity" ], "id": "9ee5ba7057e1d04311a4add0123217dd98a4b93f", "inCitations": [ "7f65a4ab85a9293476c0235ddc1b870a8eaebbc5" ], "journalName": "", "journalPages": "131-140", "journalVolume": "", "outCitations": [ "9871fd28db4458acb0e82e9807c40a06c366f468", "eb82d3035849cd23578096462ba419b53198a556", "26093f353ac3956ecc1de56fe9fd9cdfc448b01f", "117e8e6cdddbc7c99da4dd37e513ad0051c1097a", "45e0b544421fec82f71e196db60258f0f71b7786", "6776a89b92b845ac591a945795ae1ff7d1540b1b", "a73740f7dfec1cb431b373d78b6a69b9776840b5", "023892c5ccce1559cc6c3741e8548b50dff7c804", "0e174be8a3335121b26009696dfa6403d53ce2b9", "a8d4b20e75f1755c8ff0d576d2d0e9674fed80bb", "6eeef952611552ff0b8a4fe1daa9c492cd68d1bb", "4018a9578fd25afbdc3b1a1e1b2ebfe99528043e", "86fb6d3152a9849444f2301c91ddce5b97ce611b", "1565669b4e162eb0e8790613364bee17a3048ab6", "a2cf1792a861746fa99f05125440709035ad6dc2", "4a855ccaab477c53efc847fc9372639065776288", "7c74be3367d23ed30559e527caf4d5f8996fc1f3", "6563799018bdd2d8eae9cf486cb21a3ca397c727", "ca8450d56a5fc7d407ffb001b0ca9f5ad296be08", "5674293866d738cdb7fea2f14a8935fb04725f33", "80f2486716b3c567d6c8a0dde35cfa7f5f76c825", "cdfd5de78df6a2b97b05001de962c7112c736a51", "6b16bb7aa73739de089357a572c5327d464f26c8", "db32b0fde36679286b2406dad2926ab81a6b4020", "ec2544af2429518eefffdd094cbebd5e994661c3", "5e4675c46cf333cb2e0ae191cc31db259e1c4054", "37030e618f7caa7a8c3fec3454fb0d43915002a4", "b97487eb448295db655f87d6884b66d07e71f2a7", "52e12834033e373ffdb4aff7f1d88ed995c69c6e", "0aea7c981f3c0bf140bbe30b135ad1d87eab3503", "1bc474099e193ecdc7ca24b1d7faf8d8b3d081e4", "18049ba7a59f881d7d23e57ebf729aa81a44f541", "2806b9025260a9a0b2c6d311419c4b22ab807c37", "0d34d16f8ed9395184952402b25d06f75106e03c", "6bf984001617f7912ad3e25a17e795e4fdf5020f", "0368d2445d3ee4205ee73da933cb8b810a89091c", "44039a59510de5cfb055285acbf9273143acabc7", "f562d5471143e525b83e24859e0422fa21215ba3", "073a1729adfd962364743ab160bb5f211b6c63ad", "23a19cc949fb43f5420c7b1ab7ea5be2187b3bc9", "4b8cabab34aca3eb66abd34250fb1b8029fbf3c1", "d7d1bfd55973cef5fd93d4398cf997e584d75e69", "ae51cb991ea1820115cc2e42dbe1b2468bcaff05", "68c5797320e4272e4d0a96440eba5558be3923e7", "5a91a360f4c59d65ff1265e5660c97cc8d1fc643", "4951c568443fb3071518e6ecfdb4b57d452659c7", "8c5f19a8e17a2ba5bf654d33e2189a0ee561b1ad", "4778e290172bd63889ff5af963895c3526308477", "5b7cc45f49bc4b4ee9c17da716d03e644da63816", "86c5c7f63ae001e8714244f7037fca03f3cf25f9", "02d2ef2976785afa2935ff3c30e01cb5c380c473", "43e65e636db1da08a6659a0a8cb0c96d2213655f", "54070d11e4424b925348846d975a6f00d61dcf90", "4afb309b5f3d195e805ac86dc5f7f49dd6e79859", "0b291c329178573c7c78a653ed3282a5350cae58", "13f008360c48e279afbaa9335155a4ea54b9da31", "93d50e630715123d33a4e05d1ff5ffa1e221d355", "b2e31dac80a70a1ecddb47b09556003e3ece6928", "1685f6fb2f162ffd995b4aafcaf96b5cc5bf08fc", "25fb8bd206e42ce525ee81f6fe091ab04a901bd3", "222a8b02a0f81b485c26ba71138e3ed726877d3a", "fa7e9af14a46e07db867d9d01cd885e02a06fd62", "d49b5c88f0775329445f4be7054936e8cf382d1d", "1f17efff82e1c54207e29317e44d93696b7cbef9", "6faa7a1dc1bb32220e0153f109e397ff567a43cb" ], "paperAbstract": "We present a randomized distributed algorithm that computes a minimum spanning tree in \u03c4mix (G ) \u00b7 2O ( \u221a logn log logn) ) rounds, in any n-node graphG with mixing time \u03c4mix (G ). This result provides a sub-polynomial complexity for a wide range of graphs of practical interest, and goes below the celebrated \u03a9\u0303(D + \u221a n) lower bound of Das Sarma et al. [STOC\u201911] which holds for some worst-case general graphs. The core novelty in this result is a distributed method for permutation routing. In this problem, one is given a number of sourcedestination pairs, and we should deliver one packet from each source to its destination, all in parallel, in the shortest span of time possible. Our algorithm allows us to route and deliver all these packets in \u03c4mix (G ) \u00b7 2O ( \u221a logn log logn) rounds, assuming that each node v is the source or destination for at most dG (v ) packets. The main technical ingredient in this routing result is a certain hierarchical embedding of good-expansion random graphs on the base graph, which we believe can be of interest well beyond this work.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087827", "http://groups.csail.mit.edu/tds/papers/Ghaffari/podc117.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9ee5/ba7057e1d04311a4add0123217dd98a4b93f.pdf", "s2Url": "https://semanticscholar.org/paper/9ee5ba7057e1d04311a4add0123217dd98a4b93f", "sources": [ "DBLP" ], "title": "Distributed MST and Routing in Almost Mixing Time", "venue": "PODC", "year": 2017 }, "9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b": { "authors": [ { "ids": [ "1733482" ], "name": "Jesper Buus Nielsen" }, { "ids": [ "37636206" ], "name": "Thomas Schneider" }, { "ids": [ "28084654" ], "name": "Roberto Trifiletti" } ], "doi": "", "doiUrl": "", "entities": [ "Amortized analysis", "Benchmark (computing)", "Computation", "Data pre-processing", "Preprocessor", "Secure multi-party computation", "Secure two-party computation", "Two-phase commit protocol", "XOR", "XOR gate", "Yao graph" ], "id": "9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b", "inCitations": [ "3fdcd1f6bf4940926f67f32a3e3cc1794e244eb5", "369756d09b28a70979483f5d786c35ec336c3b45", "39f6571e188f704eb84f9a6d682be7c0483fd443", "14dd45aee2b1047b07e87757cf4e541b7afd1ff0", "0130d3428065bf7830263fdce79cc0192113af4e", "3a956b0ff66f4acda00a91e4ba731569515e8806", "5fc9b11bfd1c57c733f4175e20676de410494589", "411e4ecb35e5385ed0c88a36f0b2821c42af8f70", "3dbb3dcaff97a0db797d01def0f96b6e37289daa", "e0957a2d15cad958cc3cc90ae791f369ca5f426a", "ddda84420c2a5391c95de096fd7504a0aec5edf7", "046cd9bcecd662abad5e7e7707c043b8e2578cc8", "b6bedb1b62df6f29fb56e2fa62478f80d97ec0ef", "3ee83944675cd0bc505529ab1823f72c2272ad7e", "3fab56c42fe6efc0b4febd21596ef89188f5d21a" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "1069", "journalVolume": "2016", "outCitations": [ "02178fc50f9c3d965613d81205ad1c6c4e98f339", "5adc94602d07e49cc1e94e2aa2b1bdf3481a47f8", "362246709de205ec0ac5b34e07306839c38d5a3a", "bcb49a06e4fb7ea831257e146073d84234f4d238", "260ff9a2bebd8663a1e2c46e41e10681185af793", "68678e6e0b066fd410163114cbd67078a3c0eb94", "796ff7cef7dcd8b9c577a86473fc1067e1078144", "218bbd0efffc2ee63edffb8c5220f06155e23578", "8c5e81a2badc7ed7c03914a8c12773084a96155a", "abb28a53d8d425fa052ec18c11996882406ddfe6", "69dc0fe412f974a595abe6d7052d8fdf2304ba3b", "588972fccb475cfaafdbb6efeef592eacadbe5f0", "26b4199c18d93495c34b29be6f96f220f85e38da", "f24813873a971abae25f0507204cbe415b6ed140", "2a30e769b5b1787dc2b8dce5eaab253e9a4c4a76", "19c3736da5116e0e80a64db35afe421663c4b4a8", "a9ca6a9079bcb5c513ebf63a029d7cdbb8245fa3", "4c27a4a70e9997a8a50003762d855f24b836daa1", "15c76f461543c44a8b9d8b32b2bbd18c595aea52", "db0f82a419f89cda64fcbec2c58137862cd04475", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "1c07a74467c912602b33f28e90abd6eeaa60af6d", "2eb315952f6a2e342b19cf95287c8a0b1f2c36fa", "ada825ba76ae506dd30092c99af702ec3859272a", "eacc0f9a8646de38020d781438c68cc23ec47b05", "33148623fc14ea5735e73dd716d030ab17118299", "0bbf79585b6f427cee59282d5b3dee3b0ea9c1b4", "13ca5e20283085e1c2854325665bd7fd6497a62c", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "0356d1e56d1fbb2ca881e542186f247149294c0b", "cb1e3138e99816dd9b87c3f66889f60fbc0521a5", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "ad0564d120af0e7471cd32d4c0438b8c25f33a0d", "47b8fd6ee8b07bd14de3c91df515b11180121de9", "3fb1b878daafbd54989438e4fb778380a03226e6", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "2285498561f10a3bdc244529bf7a4d4cd13584c8", "3f40a5b0bcf4401c3f8efdbb539deec2763ad916", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "a797a0346e106e0d1d1d2db778aa509031c7bf8c", "46527c14457cf84d1cf26487d6b4c31f4825db71", "2f9c590bb2df7fe3e4caffaaa709fa6840d02d62", "04948723dec0e6724777ee56f0d10168cce44921", "5161aa950ec876026dfc24b4cbf69ae1e552c0e6", "b57aec9b611817d5272c8f97ec8211ecd33dca6d", "f3aac8b66f9b569862dca2434b424111e94f7ed8", "07b46f998a147ef221ebfdf5f1fb8db6cdf3f1c1", "21769cdf2b40157b064a4e353493b92a077309f2", "0166c8b5c6445043b94fc7b62d145d0c3c8b6483", "842eb3de44e0538769f1509d1b8d35161fb212bb", "7f14bca3b6f51d4cfe8084798c2808c08b0214d6", "1a68d1bbb2eab66239e51b26b7636c453f505b3b", "d89c91e556c9ebc345931547f579a8494a573391", "9c04890c31c3fd10dcc3593454e2a2700923e19a", "12bdab344f43ae37e4796b262e696523516444e6", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "c3b6d1b083f132d6f40f354fce32453410b6f942", "42333e3f231bbfe508f6da6bad2feff9ae223113", "18f5d7663632c92c84f89151823dff2120ae43cf", "1eb0b401e7dbd8a4e638243713b39fffc991fe9f", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "25636d7ba1cf7cf43ad3c94f25c9d37432753549", "15fc5f92da22ecb1761be6adccd7c858288c40ab" ], "paperAbstract": "Secure two-party computation (S2PC) allows two parties to compute a function on their joint inputs while leaking only the output of the function. At TCC 2009 Orlandi and Nielsen proposed the LEGO protocol for maliciously secure 2PC based on cut-and-choose of Yao\u2019s garbled circuits at the gate level and showed that this is asymptotically more efficient than on the circuit level. Since then the LEGO approach has been improved upon in several theoretical works, but never implemented. In this paper we describe further concrete improvements and provide the first implementation of a protocol from the LEGO family. Our protocol has a constant number of rounds and is optimized for the offline/online setting with function-independent preprocessing. We have benchmarked our prototype and find that our protocol can compete with all existing implementations and that it is often more efficient. As an example, in a LAN setting we can evaluate an AES-128 circuit with online latency down to 1.13ms, while if evaluating 128 AES-128 circuits in parallel the amortized cost is 0.09ms per AES-128. This online performance does not come at the price of offline inefficiency as we achieve comparable performance to previous, less general protocols, and significantly better if we ignore the cost of the function-independent preprocessing. Also, as our protocol has an optimal 2-round online phase it is significantly more efficient than previous protocols when considering a high latency network. Keywords\u2014Secure Two-party Computation, Implementation, LEGO, XOR-Homomorphic Commitments, Selective OT-Attack", "pdfUrls": [ "http://eprint.iacr.org/2016/1069.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/constant-round-maliciously-secure-2pc-function-independent-preprocessing-using-lego/", "http://www.cs.au.dk/~jbn/ndss2017.pdf", "http://eprint.iacr.org/2016/1069" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9f48/a66ebf7a5cdcd990a62b2373c2f279cfd62b.pdf", "s2Url": "https://semanticscholar.org/paper/9f48a66ebf7a5cdcd990a62b2373c2f279cfd62b", "sources": [ "DBLP" ], "title": "Constant Round Maliciously Secure 2PC with Function-independent Preprocessing using LEGO", "venue": "NDSS", "year": 2016 }, "9fb72d48e20450fb42448fe30d26f96a36b5db6a": { "authors": [ { "ids": [ "34999391" ], "name": "James Gleeson" }, { "ids": [ "6164169" ], "name": "Daniel Kats" }, { "ids": [ "11065117" ], "name": "Charlie Mei" }, { "ids": [ "1879216" ], "name": "Eyal de Lara" } ], "doi": "10.1145/3078468.3078478", "doiUrl": "https://doi.org/10.1145/3078468.3078478", "entities": [ "Application programming interface", "Bare machine", "Divergence (computer science)", "General-purpose computing on graphics processing units", "Graphics processing unit", "OpenCL API", "Passthrough", "Remote procedure call", "Virtual machine" ], "id": "9fb72d48e20450fb42448fe30d26f96a36b5db6a", "inCitations": [ "47734696239c5090f81cf821a0962a0ab47d2376" ], "journalName": "", "journalPages": "11:1-11:13", "journalVolume": "", "outCitations": [ "00156e79606084497789662dfaf59c3b54a10722", "6074c1108997e0c1f97dc3c199323a162ffe978d", "ec97e16a9ee20c90e7c65fc6dd4ddcd0098c6b3e", "9411dff66d0321c63d85fe84614d64b997383bd6", "32a548e5089703f7f6d775c8927889dd12abf35b", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "71a2d8c473f13d0c664f751db97e81128281b1eb", "9bc397cfd1e150149a1de08133a9036613a8be64", "0c1a9a3ff88fbd026407a26039abfadd6ccea047", "4719002c81cced8fa11fa228d4aa44777eabd944", "4954fa180728932959997a4768411ff9136aac81", "04e9d7b1544ec76e3e5c24b46ccae5d5096b638b", "6a15184024955d133c168488c989fba6b7449741", "ca701980a9394f55c6bc471dda6b4765afa60527", "483d18fb39b8655c80a1e93c740928762cc32a3b", "a8bf87e5fda4d881ecee66aa6ad058e67e25510a", "b69382e95bbf9bd9f141bbb7e0d9ab2bd8353e2b", "6bdacf836b47e40f1e8d5d8e9e1c8224d74a1cef", "1c37285eb427f9d5bae45dc0624cb5ee78abe827", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "091780ccc35385b75bdf58b86a8342ae92d66049", "067c7857753e21e7317b556c86e30be60aa7cac0", "010c1b6fad2e47868b22f3787e2f355875f10cec", "433b9d8aff0749ce61a3601eced49ec404942dd4", "28d0b30592d994a7a736f81cfb7f7237e4f364f8", "584ba882d9023aaad8985de81c8088c038c24137", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "0be302437cec82b9200d61d13d3125e62a8ef499", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "061356704ec86334dbbc073985375fe13cd39088", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "7370316302bfff5ef6ff760cbfaae668e4760275", "335504b014e48069c6dcf227645ae61830d6cf27", "45472bef11491245ad51dde6963e3cc40c5f3b79", "49c68d4376a01ab024735804bc2b7fa1bbcc11be" ], "paperAbstract": "General purpose GPU (GPGPU) computing in virtualized environments leverages PCI passthrough to achieve GPU performance comparable to bare-metal execution. However, GPU passthrough prevents service administrators from performing virtual machine migration between physical hosts.\n Crane is a new technique for virtualizing OpenCL-based GPGPU computing that achieves within 5.25% of passthrough GPU performance while supporting VM migration. Crane interposes a virtualization-aware OpenCL library that makes it possible to reclaim and subsequently reassign physical GPUs to a VM without terminating the guest or its applications. Crane also enables continued GPU operation while the VM is undergoing live migration by transparently switching between GPU passthrough operation and API remoting.", "pdfUrls": [ "http://sysweb.cs.toronto.edu/publication_files/0000/0307/a11-gleeson.pdf", "http://doi.acm.org/10.1145/3078468.3078478" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9fb72d48e20450fb42448fe30d26f96a36b5db6a", "sources": [ "DBLP" ], "title": "Crane: fast and migratable GPU passthrough for OpenCL applications", "venue": "SYSTOR", "year": 2017 }, "9fe5c41f28af93b1789a44fc9f232cfff146e870": { "authors": [ { "ids": [ "2810695" ], "name": "Daniel Reiter Horn" }, { "ids": [ "2964002" ], "name": "Ken Elkabany" }, { "ids": [ "3237897" ], "name": "Chris Lesniewski-Laas" }, { "ids": [ "2203149" ], "name": "Keith Winstein" } ], "doi": "", "doiUrl": "", "entities": [ "Arithmetic coding", "Baseline (configuration management)", "Byte", "Dropbox", "Fault tolerance", "Huffman coding", "JPEG", "JPEG 2000", "Lossless compression", "Open-source software", "Parallel algorithm", "Pebibyte", "Petabyte", "Software deployment" ], "id": "9fe5c41f28af93b1789a44fc9f232cfff146e870", "inCitations": [], "journalName": "", "journalPages": "1-15", "journalVolume": "", "outCitations": [ "26d98ce36db100aee0867c397a331253ac7cd573", "65ae594f1243b8239cabf35764198443614c4325", "04d0add7245a1ab6c49498f8ae2e308794369b36", "9b96c86217891961f877d938718f2f31943a3ea7", "11a7e426012fcec35c32fdea0b60b874783c682f", "94aa01189f997afd56241ac4130e2c32e3420b66", "e711b68e3fe1964aeedde33b38cb43c5a7fee743", "508c2172d12bbc2b749ee79c8fea158bfac0f1c6", "10925faac07bbeeecf7d7d50f6df2d0143c2426f", "44607270754f8521d6c4d42297aa881393f4f8e0", "29146d2b80bc8c56f7e18efe8d2c92354254e947" ], "paperAbstract": "We report the design, implementation, and deployment of Lepton, a fault-tolerant system that losslessly compresses JPEG images to 77% of their original size on average. Lepton replaces the lowest layer of baseline JPEG compression\u2014a Huffman code\u2014with a parallelized arithmetic code, so that the exact bytes of the original JPEG file can be recovered quickly. Lepton matches the compression efficiency of the best prior work, while decoding more than nine times faster and in a streaming manner. Lepton has been released as open-source software and has been deployed for a year on the Dropbox file-storage backend. As of February 2017, it had compressed more than 203 PiB of user JPEG files, saving more than 46 PiB.", "pdfUrls": [ "http://arxiv.org/abs/1704.06192", "https://arxiv.org/pdf/1704.06192v1.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-horn-daniel.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/horn", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-horn-daniel.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/9fe5/c41f28af93b1789a44fc9f232cfff146e870.pdf", "s2Url": "https://semanticscholar.org/paper/9fe5c41f28af93b1789a44fc9f232cfff146e870", "sources": [ "DBLP" ], "title": "The Design, Implementation, and Deployment of a System to Transparently Compress Hundreds of Petabytes of Image Files for a File-Storage Service", "venue": "NSDI", "year": 2017 }, "9ff85ebfdb8c4fd22a7a75003b054bf051cee4e8": { "authors": [ { "ids": [ "3342263" ], "name": "Zeqi Lai" }, { "ids": [ "2981910" ], "name": "Y. Charlie Hu" }, { "ids": [ "1789035" ], "name": "Yong Cui" }, { "ids": [ "2662418" ], "name": "Linhui Sun" }, { "ids": [ "2096679" ], "name": "Ningwei Dai" } ], "doi": "10.1145/3117811.3117815", "doiUrl": "https://doi.org/10.1145/3117811.3117815", "entities": [ "Central processing unit", "Data compression", "Data rate units", "Floating Point Systems", "Immersion (virtual reality)", "Interaction", "Mobile device", "Refresh rate", "Server (computing)", "Smartphone", "Virtual reality" ], "id": "9ff85ebfdb8c4fd22a7a75003b054bf051cee4e8", "inCitations": [ "655ec9ae84c329b03ee16fc0da4a3d2edc069f55", "33f97956275a78b18059453b7e9f102b1d9dc1a5", "651ae380b5d500c613770dbf55c175c52576d7da", "067f92dd54cbea2c65c2944254660defaf989fa7", "88afa5c97259b6342457072ccf866abbbf027fd6", "552ec9f70427528f8a7522073e22c477504f3668" ], "journalName": "", "journalPages": "409-421", "journalVolume": "", "outCitations": [ "9e9df334bd9ebfcf4f972568ce59aed4b84269d7", "10fb02d7d304d6337c008d97ffceef79ceb593e5", "7f1d542b436e423ccac18b7b0dc220b9b9a5418c", "4433c9f0899699f17f06b5046de7a6886e5dd15c", "92cc8cdaa0a414b7ed0641225ce58a4c07fc8063", "0340c1a2086cf0c896687e60bd15c264b461dfc9", "47240e17ac8fa393ec6e2db2dac68454e96c8495", "44f0b63ff297fede8486942b3d74a42be53c8049", "264eb62645951a6bc59997e1050a2a81df725541", "19c95d23781b1370ef8fafb2db6e6cb6aa443118", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "21092ca3e1928c7a800447c09d6153bf9f022d2c", "e7bb09876ce8eb2eb401e4c06cf2344633261cbd", "2bdd7fc4daca795236021b76ec1ac44901d2b900", "1a9b72ebef4c73b95b6db6e2948aa1305e246f61", "0cf5b1e3b4e8133d275b4dadcc81cbe01b7e237d", "a2cad4e4fd946adf6cc87e483b2ba18579de1264", "3d8ef812b3f953aaf9cb2baccf65b81374e3a204", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "4383ac7d42480c9547587025908522c291436665", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "00436cbe55c70c28cb689b16a4c7bb6f6b7b16a2", "4407e6eb2694d513d444454a5f6ae94281322a90", "7bd19f37bd85824d52ecdd9a4141c841508dcb24", "0abe0d0d6ecfce98dc954270f6c7c26bc5195840", "c2fff2b4bf11c02e63b557e1dd408650bd3c9929", "8729fa0c0217d6a5984e7d1778313e392685fc43", "4841f2f732b05a3b05576934cb941d3fdbbee2e7", "381dbb898c39ac2ffe608b2699fc7201dc32a599", "a698b351e21113b23890020409b1868631dd925e", "caf31edd457cbcd21c43d37109588799fa66c984", "2c2cfbec94307fc92192e5a4be0d0731799f9bf9" ], "paperAbstract": "In this paper, we perform a systematic design study of the \"elephant in the room\" facing the VR industry -- is it feasible to enable high-quality VR apps on untethered mobile devices such as smartphones? Our quantitative, performance-driven design study makes two contributions. First, we show that the QoE achievable for high-quality VR applications on today's mobile hardware and wireless networks via local rendering or offloading is about 10X away from the acceptable QoE, yet waiting for future mobile hardware or next-generation wireless networks (e.g. 5G) is unlikely to help, because of power limitation and the higher CPU utilization needed for processing packets under higher data rate. Second, we present Furion, a VR framework that enables high-quality, immersive mobile VR on today's mobile devices and wireless networks. Furion exploits a key insight about the VR workload that foreground interactions and background environment have contrasting predictability and rendering workload, and employs a split renderer architecture running on both the phone and the server. Supplemented with video compression, use of panoramic frames, and parallel decoding on multiple cores on the phone, we demonstrate Furion can support high-quality VR apps on today's smartphones over WiFi, with under 14ms latency and 60 FPS (the phone display refresh rate).", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117815", "http://www.yongcui.org/lunwen/Furion.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/9ff85ebfdb8c4fd22a7a75003b054bf051cee4e8", "sources": [ "DBLP" ], "title": "Furion: Engineering High-Quality Immersive Virtual Reality on Today's Mobile Devices", "venue": "MobiCom", "year": 2017 }, "a027a5a5d021f8236678d730c74a71ab43ffebd3": { "authors": [ { "ids": [ "34975877" ], "name": "Jin Tack Lim" }, { "ids": [ "1933752" ], "name": "Christoffer Dall" }, { "ids": [ "2175344" ], "name": "Shih-Wei Li" }, { "ids": [ "1700208" ], "name": "Jason Nieh" }, { "ids": [ "26924262" ], "name": "Marc Zyngier" } ], "doi": "10.1145/3132747.3132754", "doiUrl": "https://doi.org/10.1145/3132747.3132754", "entities": [ "ARM architecture", "Cloud computing", "Hypervisor", "Null (SQL)", "Virtual machine", "X86", "X86 virtualization" ], "id": "a027a5a5d021f8236678d730c74a71ab43ffebd3", "inCitations": [], "journalName": "", "journalPages": "201-217", "journalVolume": "", "outCitations": [ "3e239cdc9eb2d33f3eabb01f552a12aa0bf98537", "60c2a873958fba5ff0ef2cce20663eec627aa782", "905b27d6fe624a28fd6fdd04cf7139333e052030", "07ebe9df86f0e6eb19fcdd03bbe9dd7f64ff887f", "067c7857753e21e7317b556c86e30be60aa7cac0", "acb418dd5b4f7095cc3ebe60920c7d14a4a14298", "85d555f7ce19740b4fc656ff797623c6e1513018", "08b11d0812f6cc3c9b954c116d36bd983ead6241", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "47dc52eeb7bf6efb46c550201cc8d52af71cc1a3", "c8e7d53d48c06a5b8a9f5b9b3920f33d3b1b5e93", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "164b6a0a9ab4a32b5fbcb519cd598f9a121b747d", "71a2d8c473f13d0c664f751db97e81128281b1eb", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "5e7567dc5c9922527e7ce5e4fd62981488a09829", "3c4ae51452823afafabe8d33d51218d1d95c2795", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "611cce8f8236c1de04c3217f4341c9e03cd8a1eb", "03a97821f3f77490f1c775501762985f10cd7be8", "57eea2cb6cda923e9403b17fc04c1a494141149b", "6111f1a9ab657910f5a11a95de117b3c5181565a", "043029ff68d0449eacae8a67fc62ed4ee03215a2" ], "paperAbstract": "Nested virtualization, the ability to run a virtual machine inside another virtual machine, is increasingly important because of the need to deploy virtual machines running software stacks on top of virtualized cloud infrastructure. As ARM servers make inroads in cloud infrastructure deployments, supporting nested virtualization on ARM is a key requirement, which has been met recently with the introduction of nested virtualization support to the ARM architecture. We build the first hypervisor to use ARM nested virtualization support and show that despite similarities between ARM and x86 nested virtualization support, performance on ARM is much worse than on x86. This is due to excessive traps to the hypervisor caused by differences in non-nested virtualization support. To address this problem, we introduce a novel paravirtualization technique to rapidly prototype architectural changes for virtualization and evaluate their performance impact using existing hardware. Using this technique, we propose Nested Virtualization Extensions for ARM (NEVE), a set of simple architectural changes to ARM that can be used by software to coalesce and defer traps by logging the results of hypervisor instructions until the results are actually needed by the hypervisor or virtual machines. We show that NEVE allows hypervisors running real application workloads to provide an order of magnitude better performance than current ARM nested virtualization support and up to three times less overhead than x86 nested virtualization. NEVE will be included in ARMv8.4, the next version of the ARM architecture.", "pdfUrls": [ "https://www.cs.columbia.edu/~nieh/pubs/sosp2017_neve.pdf", "http://doi.acm.org/10.1145/3132747.3132754" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a027a5a5d021f8236678d730c74a71ab43ffebd3", "sources": [ "DBLP" ], "title": "NEVE: Nested Virtualization Extensions for ARM", "venue": "SOSP", "year": 2017 }, "a059c48208c458955449d6a33985add48c3d61ad": { "authors": [ { "ids": [ "34629911" ], "name": "Marquita Ellis" }, { "ids": [ "2418537" ], "name": "Evangelos Georganas" }, { "ids": [ "40108011" ], "name": "Rob Egan" }, { "ids": [ "1736072" ], "name": "Steven A. Hofmeyr" }, { "ids": [ "2238795" ], "name": "Aydin Bulu\u00e7" }, { "ids": [ "2673141" ], "name": "Brandon Cook" }, { "ids": [ "1757847" ], "name": "Leonid Oliker" }, { "ids": [ "1731111" ], "name": "Katherine A. Yelick" } ], "doi": "10.1007/978-3-319-64203-1_6", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_6", "entities": [ "Algorithm", "Assembly language", "Computational problem", "De novo transcriptome assembly", "Manycore processor", "Multi-core processor", "Scalability", "Self-assembly", "Simulation", "Software transactional memory", "Speedup", "Whole genome sequencing", "Xeon Phi" ], "id": "a059c48208c458955449d6a33985add48c3d61ad", "inCitations": [ "8daaabe92e8032ff509bc9a9bce095c17d848a0e" ], "journalName": "", "journalPages": "79-91", "journalVolume": "", "outCitations": [ "d795a755b311195803a03b4f1945299e0c699bcb", "25f017efd2905c6d0c6a92f2dfe19113ee42938e", "5271196259ef99378d992db18daf35674851d53d", "6c1f7496580d1169b232c53981f1e63e593be21f", "40c5441aad96b366996e6af163ca9473a19bb9ad", "309e8e0f88015d7b22b406e43f13bf0a1115bc1a", "579d5c4b23445de009d9cc894556420b49682f9e", "38410204a2977a9484e6d1205697b4103b5482f7", "f17fb68a4c6b93167dddc8290bc76f5f90d326ab", "4a033b4544d4af982ebb88bc39d05f9a4c5b6d8d", "6aa080787b51c11f27734e2fa440635e2ed24b1a", "3f6ec7c3909ee1f88b45fa6dfecc95024546c679", "32a2940bbb3aa60b786dcaf42e810eca831a16a4", "a9997aac5f4733ffcae221f67638503179733ad2", "0faadb88cfebdd7967f2d6f422429e0af634774d", "8fa08913694b934c6a66ad2c31b7165f1938f6b8" ], "paperAbstract": "De novo genome assembly is one of the most important and challenging computational problems in modern genomics; further, it shares algorithms and communication patterns important to other graph analytic and irregular applications. Unlike simulations, it has no floating point arithmetic and is dominated by small memory transactions within and between computing nodes. In this work, we focus on the highly scalable HipMer assembler and identify the dominant algorithms and communication patterns, also using microbenchmarks to capture the workload. We evaluate HipMer on a variety of platforms from the latest HPC systems to ethernet clusters. HipMer performs well on all single node systems, including the Xeon Phi manycore architecture. Given large enough problems, it also demonstrates excellent scaling across nodes in an HPC system, but requires a high speed network with low overhead and high injection rates. Our results shed light on the architectural features that are most important for achieving good parallel efficiency on this and related problems. AQ1", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_6", "http://gauss.cs.ucsb.edu/~aydin/HipMer-EuroPar17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a059/c48208c458955449d6a33985add48c3d61ad.pdf", "s2Url": "https://semanticscholar.org/paper/a059c48208c458955449d6a33985add48c3d61ad", "sources": [ "DBLP" ], "title": "Performance Characterization of De Novo Genome Assembly on Leading Parallel Systems", "venue": "Euro-Par", "year": 2017 }, "a0979efcf14f23a510aff548db1da66f3ecac705": { "authors": [ { "ids": [ "2609934" ], "name": "Geumhwan Cho" }, { "ids": [ "1805420" ], "name": "Jun Ho Huh" }, { "ids": [ "2942667" ], "name": "Junsung Cho" }, { "ids": [ "1925273" ], "name": "Seongyeol Oh" }, { "ids": [ "2259283" ], "name": "Youngbae Song" }, { "ids": [ "1802864" ], "name": "Hyoungshick Kim" } ], "doi": "10.1109/SP.2017.61", "doiUrl": "https://doi.org/10.1109/SP.2017.61", "entities": [ "Android", "Lock (computer science)", "Real life", "Usability" ], "id": "a0979efcf14f23a510aff548db1da66f3ecac705", "inCitations": [ "49ad378e88e508b4d7818f5d247dc559d8e3d168" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "338-356", "journalVolume": "", "outCitations": [ "49140401a7a4290ec5da00f76601ca115c007b74", "894f0e12c5578864bd5f3b1608a25707c277af2f", "7095ea5eb985d8a06f3d9aa65698c2b916313064", "c4b3ba5d7960eaa488490b01f98442b23c85b16d", "e8b16e99dd0b01bd897e11d58ecf4f8085755335", "8650b37db13b8352da0b31711edab817e63cd281", "2c61a7a2ec8ac2178812fab42a222f35918f47ce", "62bf472a70a99aa7af1280e23c95250132ba34e7", "7f7b85864f511a472aec88828296eb558606189f", "d3ad7225152481fd283d9c1b29f86cfcc38b46ea", "2548bd5294aa11e7ddddffd816cde7397d1b03ed", "9b749a89ff99eb4eca609724916b118a6d2f49eb", "5f4f110872661bcb10ebc03ae851ab976d64b351", "0065cfac2f343795a7911214c70a578e6bf36083", "6f9f136280fe41de21faca8a5349b37546358b5b", "47d72cc544b3396c498e9d8eee53c875fa89f4e1", "88c89fbffbf9e14e3b82cceab14408c6d368d59c", "197350a9fa144d04129b152314d746da365ed916", "5e32b2242241de92d6f20fd315c2bebc1f251cb7", "486020d807340c26d0b7155a6948383a01ffff43", "1e4927abe09777f7062c99db28036cc18b2312c0", "0273d4de8fe5388b236d959616a1f4fdaf86bad6", "46f158aaace8a5cd2ba99320b43f201278b169cc", "4f954a4b99131763635d27ee425f2c6eadf1b575", "0d7f6c69656df44280cdc1c709f1ea7f62c8bb61", "bf87bea0e0feee8a684b47be63cbaa34122568a1", "0a5e6f8370b7b499fc2354e064646bd6e677e41c", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "0b864a2b27c3842c014279be6bb297e11daebfe3" ], "paperAbstract": "To improve the security of user-chosen Android screen lock patterns, we propose a novel system-guided pattern lock scheme called "SysPal" that mandates the use of a small number of randomly selected points while selecting a pattern. Users are given the freedom to use those mandated points at any position. We conducted a large-scale online study with 1,717 participants to evaluate the security and usability of three SysPal policies, varying the number of mandatory points that must be used (upon selecting a pattern) from one to three. Our results suggest that the two SysPal policies that mandate the use of one and two points can help users select significantly more secure patterns compared to the current Android policy: 22.58% and 23.19% fewer patterns were cracked. Those two SysPal policies, however, did not show any statistically significant inferiority in pattern recall success rate (the percentage of participants who correctly recalled their pattern after 24 hours). In our lab study, we asked participants to install our screen unlock application on their own Android device, and observed their real-life phone unlock behaviors for a day. Again, our lab study did not show any statistically significant difference in memorability for those two SysPal policies compared to the current Android policy.", "pdfUrls": [ "http://seclab.skku.edu/wp-content/uploads/2013/05/syspal.pdf", "https://doi.org/10.1109/SP.2017.61" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a0979efcf14f23a510aff548db1da66f3ecac705", "sources": [ "DBLP" ], "title": "SysPal: System-Guided Pattern Locks for Android", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "a0bf0e6baedc197199d684ff9c294ba9261cbd8d": { "authors": [ { "ids": [ "3478513" ], "name": "Yuchen Bian" }, { "ids": [ "2090567" ], "name": "Jingchao Ni" }, { "ids": [ "1707763" ], "name": "Wei Cheng" }, { "ids": [ "38822242" ], "name": "Xiang Zhang" } ], "doi": "10.1109/ICDM.2017.11", "doiUrl": "https://doi.org/10.1109/ICDM.2017.11", "entities": [ "Algorithm", "Amiga Walker", "Cluster analysis", "Clustering coefficient", "Multiply-with-carry", "Network analysis (electrical circuits)" ], "id": "a0bf0e6baedc197199d684ff9c294ba9261cbd8d", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "21-30", "journalVolume": "", "outCitations": [ "7cb3403411bb562da14757c2ba905258750f65b9", "1dd8db60043f51c04eb7200915ebd253d2fabf64", "141e35263ab810983c90d47ad62eb4fab5e51717", "b48d230e170db57dcaa4f204a6f8695f52fb28de", "23de9f75103a62fbffe76fd66a3a3f0089c87489", "3baecc04e1341cbae7999e8f61a3946c76504828", "262811f8a8633687e4a71b01b224f8c62b38f07e", "07ad62b6b5da5f226c88549378886ca062e207a0", "1c8e9f9fa4a030ef03d6b5f912a8ff1bc1cb2c47", "13c347cff81bdb5753ff8f626d13d603d469592b", "2ec587a3b1a3ff5544fda95f00e1681be0c0e8b9", "2d343fd4a6c88503700b24821114c20d62605407", "1b348075d02cc532b1a01955e21ba3062e769113", "1fbd00f1e28a1b8afe71db7842d9f2b0de6df8df", "a9cf10a92e9f4dc0f3486bd800051cf0150ebe63", "237dc6e66cc6691bf2245c02cc19a6e0c077fde9", "da5156036f113207d50654c6e7097ee84733766f", "0a3b29c96cc1786971bdc701a7b77dd41067998f", "2743c9b33d463a5568e54a01dea566724b88c342", "2340ff1620b00f8cae0c66ea7504201895dde1ff", "009dbf3187862352aac542bf7d61e27bce6b27f5", "66549f785d13a44171fcc21899802325e7d923cd", "1ef8c8c815b7268d7f7d4fe76af78aaa8df3e6da", "37030e618f7caa7a8c3fec3454fb0d43915002a4", "eba76826b8812ae81f476ea3e4d3916a82e757d8", "4343fd5edb47bf39ed0dca8085a22097584ed597", "2eefcfc0bc0792baa80c4b229699359d421e3850" ], "paperAbstract": "Local community detection (or local clustering) is of fundamental importance in large network analysis. Random walk based methods have been routinely used in this task. Most existing random walk methods are based on the single-walker model. However, without any guidance, a single-walker may not be adequate to effectively capture the local cluster. In this paper, we study a multi-walker chain (MWC) model, which allows multiple walkers to explore the network. Each walker is influenced (or pulled back) by all other walkers when deciding the next steps. This helps the walkers to stay as a group and within the cluster. We introduce two measures based on the mean and standard deviation of the visiting probabilities of the walkers. These measures not only can accurately identify the local cluster, but also help detect the cluster center and boundary, which cannot be achieved by the existing single-walker methods. We provide rigorous theoretical foundation for MWC, and devise efficient algorithms to compute it. Extensive experimental results on a variety of real-world networks demonstrate that MWC outperforms the state-of-the-art local community detection methods by a large margin.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a0bf0e6baedc197199d684ff9c294ba9261cbd8d", "sources": [ "DBLP" ], "title": "Many Heads are Better than One: Local Community Detection by the Multi-walker Chain", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "a0d9267a0bacc12729126719451967f6a41c9955": { "authors": [ { "ids": [ "1900699" ], "name": "George Michelogiannakis" }, { "ids": [ "1746446" ], "name": "John Shalf" } ], "doi": "10.1109/HiPC.2017.00018", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00018", "entities": [ "Algorithm", "CPU cache", "Climate model", "Collective intelligence", "Data parallelism", "Dynamic random-access memory", "Dynamical simulation", "Facial recognition system", "Image processing", "Link prefetching", "Memory address", "Multi-core processor", "Parallel algorithm", "Parallel computing", "Prefetcher", "Run time (program lifecycle phase)", "Simulation", "Supercomputer" ], "id": "a0d9267a0bacc12729126719451967f6a41c9955", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "72-83", "journalVolume": "", "outCitations": [ "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "43c07ad30e0ce39430a9339672c1f25d35a0c924", "14a477cf712ad5647180e6233dd0638c6c269fdd", "2973052248a903ef2033f800c66979262d9ab718", "002e5d1003a5d8192f43419350cee2c94562478f", "1eeb50d5f7937f65a910203ae61430ff8b969012", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "2af32811c6bf3be891ee84b19248540dfa1aa58f", "8b4682a90b39d0b95d92098be48f05687cb23086", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "0110c80228683bc32879efb1b2f3931421e52eb6", "0036adadc90e4826b2f7fc157752eea459070c32", "12d8ead802196d498877ffe92ff4c42ca1ce7694", "83903c5244bf45fa0a69076bf49a4dfad8bad7fd", "738b1253c656db5c82aad1838867ed7ab629677d", "886f29f247fd49084fbf25fdd60049b47db4f4ea", "55feee96f99106b0299fef657adb75e334f7ff8a", "78afa9e11c7bdabecb1b83b7c7a0d71fb88873f0", "9bb6b8e4005e6842b705d0adbc1bbd0e0af8351c", "3ce662e1663456ce2a5b5d240112721c0d0a4582", "2957200e36b7cf809553c706e17ffadd722bed2a", "8d71fb5efe95801b31d65366ff1ce8c01525e493", "0455a164fdd31c24c37104853544a66191660659", "1262176518bb210bd46f120d3782f1677af180cd", "5984d89301db24fe9bf6d45679a996b3f54ec857", "837be6dc51f0ccf8135ebaea8a48afc3faf5b14b", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "3729ff8f0465348fb5a347de612219b86909b686", "84af439f436046933363acc01a721fcb65d78dbb", "385ecc6897c593486c79a17dd4e538a4fe19b1b5", "0efc0fcd1a3e4e6131997f353e43b6ebf56f45bf", "3f7065233b11b4c5ec00e6054c8c634de9832d5e", "28c14bbd81e89b29ca1d00b109d1188ed9af3c9a", "455a1900007ee309a70799abfe82696eeff4a976", "007394c2bae389cf43e46db4567dafe206355c25", "0624bfdc9e4dd6ff07c66c1c0251f1c78804256e", "10db43d5a1b3c1ddf6dc93fa992b8ed42dcf9833", "04b99244652d2aeba265acdd94ea0bd294d7a5f5", "d4d64409cb3a0f84223aea17909c76b159490e6d", "54f3331b575b2d451c2d716f86496cada23d596d", "ba0043bd748bb26a1ba2a8297df6320d8df7489d", "724fb8f5a10b1fef00bf8604cfb4eb4fe92756c0", "1d59804e5e433d654d9005f49a010cb508cd81b7", "2679adf7b58a4c8dc722c6e3b23d0d2b194a7189", "5050a42d589f8d6d75a7deabb42a32a1e8a370df", "170ac68f69b17fc306c814782d1fcec16f01b6e2", "082573e4dc88f38628242d193c966725ab355026", "624bd22fcd5746d84fb8e07a0f0234200726ec8d", "73dd5dde28119e41dd0f0a07275b7f722c4619d2", "60a1389c827f9f706c9dc1639e2584f0f3de878e", "785f69fbf3ca670bc082f1e669b9b433100a0596", "0ceaf6426740fdb13ea51ec26e7fa05a436f96b5", "3933d3380710680865441c87a22d691ba751a2ef", "107bab67aa1f40a91689376f633aedcad34f114a", "36634852b6ae5e3ce40e031c730b82881c585fba", "312279ba476f0f7900999f61c98454a574ecd0b0", "264a8c0de024dde8aef4d0320dff7639f59cf09b", "0e2efda23894526e869e57cb81c76de22f6a8d20", "403c69de1f30094a545eeb08adeb872f61865f34", "3087eeb39c88b1fc9bdc72812930451fc98cedec", "9f9e5ab13cb63ca38d9c95f7725115afafb11134", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "c120ba78c0a09f4a97f2979f3e9ff9710c3932bf", "48e6cb0ce1b6d7ed665613664b65a4a9c124c6bf", "429d28998216da5648f40248bf4bc9e508edd2fd", "85398d5f19157c91bf00da3d36210e72d57887e4", "510eff441d0ef057efbaad1428cd0ea92329265c", "4831977f158c3627b66a2b736b6ea60e9267f1a3", "19ee7afaf66d895eca9cb2ee0c0a87ad6398984d", "54bf1d3d222a08ef6380c44a8af33a69a95b554b", "198f1ecd14b376006445aebec84df7e3e79fc149", "470f6f2d46247282158b29c706a02fa7e465b059", "7bee024cfab6e16be7c57e2ddbe13618d2a2968c", "56470b57284ebc284bd20a681d2912f0dfe6bbe3", "497cf2a63c2b6e41719f47ec130f5bca897ad6b7", "1739fd145ef1d327ab301cacc017af2a87f33086", "36b0217d5f07cdd3d63c5656434c26bfc5b18fe7", "8c34cdd2bab66623d2831004fbd1fa1cdf8a0366", "b5f9262a17961f1ce5db7b1130a0e951f067388e", "a77965261272eb3e37226c07cd2c3402f3bae879", "078e00be20bfdc4eeae762f6170ccded05f452c6", "71b2d85dc0ad8524daafe540a47c68ea8411d711", "8ec6fe079e23c92ba2dee50932ffb3abe5e10912", "180189c3e8b0f783a8df6a1887a94a5e3f82148b", "20338bf6e95c532cceaa57087fa6436af17dc005", "72af62917a53f7f88f54fc658f3daade61284937", "2aed1e5dab68713bf8ed0fb4b35a0ec67b087c30" ], "paperAbstract": "With rapidly increasing parallelism, DRAM performance and power have surfaced as primary constraints from consumer electronics to high performance computing (HPC) for a variety of applications, including bulk-synchronous data-parallel applications which are key drivers for multi-core, with examples including image processing, climate modeling, physics simulation, gaming, face recognition, and many others. We present the last-level collective prefetcher (LLCP), a purely hardware last-level cache (LLC) prefetcher that exploits the highly correlated prefetch patterns of data-parallel algorithms that would otherwise not be recognized by a prefetcher that is oblivious to data parallelism. LLCP generates prefetches on behalf of multiple cores in memory address order to maximize DRAM efficiency and bandwidth, and can prefetch from multiple memory pages without expensive translations. Compared to well-established other prefetchers, LLCP improves execution time by 5.5% on average (10% maximum), increases DRAM bandwidth by 9% to 18%, decreases DRAM rank energy by 6%, produces 27% more timely prefetches, and increases coverage by 25% at minimum.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00018" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a0d9267a0bacc12729126719451967f6a41c9955", "sources": [ "DBLP" ], "title": "Last Level Collective Hardware Prefetching For Data-Parallel Applications", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "a146ceab16c2b0743c0759e0fcce12a9dfd54636": { "authors": [ { "ids": [ "2864704" ], "name": "Shahin Tajik" }, { "ids": [ "3371903" ], "name": "Heiko Lohrke" }, { "ids": [ "4885298" ], "name": "Jean-Pierre Seifert" }, { "ids": [ "1681989" ], "name": "Christian Boit" } ], "doi": "10.1145/3133956.3134039", "doiUrl": "https://doi.org/10.1145/3133956.3134039", "entities": [ "Adversary (cryptography)", "Bitstream", "Contactless smart card", "Countermeasure (computer)", "Encryption", "Field-programmable gate array", "Gate array", "Hall effect", "Plaintext", "Reverse engineering", "Unified Model" ], "id": "a146ceab16c2b0743c0759e0fcce12a9dfd54636", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "822", "journalVolume": "2017", "outCitations": [ "a1c941b1daa5d2e8f0bfdf72a15efcaed6a97c1e", "6a69c10fdfb819dbf0c45075ac570704ada41d57", "430a15e9d8b8e8f1cdb19a500fa0769b986565eb", "18fde05984e680354aee4af6ebcbe3ad8668f63a", "c82a74c8e86850a7a40ab4605cef626c2125344b", "9edcedf41b2edfb46d09fcc8ef9506f78c19da01", "14457309fcf3483f52b1c0ae253784d91f56ed5f", "b6940c8c98f4754bae5a19b55f398486b906801d", "e452c672df7d52ec271364d39b09b3e91b8ecfff", "c7625d8a3ce49358cb4308f1875bded9e39c82af", "45f23c12d2457cbcaaf6eaca3448145fa2e15a13", "3cb2ed78b9d0b3b12dd1def159348938cfea6200", "bd8a4ac6f64dad917f0ea1ece6300bab13284c84", "b4d9a259db32a92c162ed44e89d3a0cfdacd3f67", "2605a4ef5b3464f8b31c191534a45a761cfe5f6b" ], "paperAbstract": "Modern Integrated Circuits (ICs) employ several classes of countermeasures to mitigate physical attacks. Recently, a powerful semi-invasive attack relying on optical contactless probing has been introduced, which can assist the attacker in circumventing the integrated countermeasures and probe the secret data on a chip. This attack can be mounted using IC debug tools from the backside of the chip. The first published attack based on this technique was conducted against a proof-of-concept hardware implementation on a Field Programmable Gate Array (FPGA). Therefore, the success of optical probing techniques against a real commercial device without any knowledge of the hardware implementation is still questionable. The aim of this work is to assess the threat of optical contactless probing in a real attack scenario. To this end, we conduct an optical probing attack against the bitstream encryption feature of a common FPGA. We demonstrate that the adversary is able to extract the plaintext data containing sensitive design information and intellectual property (IP). In contrast to previous optical attacks from the IC backside, our attack does not require any device preparation or silicon polishing, which makes it a non-invasive attack. Additionally, we debunk the myth that small technology sizes are unsusceptible to optical attacks, as we use an optical resolution of about 1 um to successfully attack a 28 nm device. Based on our time measurements, an attacker needs less than 10 working days to conduct the optical analysis and reverse-engineer the security-related parts of the hardware. Finally, we propose and discuss potential countermeasures, which could make the attack more challenging.", "pdfUrls": [ "https://csaw.engineering.nyu.edu/application/files/1415/0825/8652/CSAW17_paper_157.pdf", "https://eprint.iacr.org/2017/822.pdf", "http://doi.acm.org/10.1145/3133956.3134039", "http://eprint.iacr.org/2017/822" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a146ceab16c2b0743c0759e0fcce12a9dfd54636", "sources": [ "DBLP" ], "title": "On the Power of Optical Contactless Probing: Attacking Bitstream Encryption of FPGAs", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "a155264f143aafd380f40fd0167c9b7960f64ea2": { "authors": [ { "ids": [ "3139121" ], "name": "Yinzhi Cao" }, { "ids": [ "1806013" ], "name": "Song Li" }, { "ids": [ "8405939" ], "name": "Erik Wijmans" } ], "doi": "", "doiUrl": "", "entities": [ "Device fingerprint", "Fingerprint", "Operating system", "Video card" ], "id": "a155264f143aafd380f40fd0167c9b7960f64ea2", "inCitations": [ "91639e55d0cc4e46ecf9cae172a80e831edf1d18", "fe7f6f26d0ed371c1f625027600c20260f9e0622", "051961468e3e7a3855eaff8ac9ec35e0235a4a38", "38574a13b6626e0c5a63e4298fc6a314e848789e", "5dbdc0b0c527335d5efff8477ec3679facd333db", "29696aad37a2e83480a4a071f7b463ddac072669", "5763961c699ba40007ea0b8c2508590378a7fdfd", "a23b1f45227150506c43afbe9eb4edf711c7e9c2", "8d3f70c046acbaa20cfc1c4e914a087e1279d53e", "d8aa74c325787147d3b3623dff7784c639591969", "58149610a57cb4626918bf003b8bad25e740b1f4", "d758efcfeeaff10e2dd3de0f7174d5c57a79d769" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "05ad6c3ab7a0b1ab0c4fc3af9f1622cf6c0fa40e", "075cac5b7d31c2a5c8fe1fbb0ffc28b806a9d89a", "01dbc5466cce6abd567cc5b34a481f5c438fb15a", "820a83807b6530afde5ddbccb81af9794780993f", "0eaa75079b43ebb1a6a3fcb0f57f9c9e7e742aff", "5a032460c589a67e7c73b19c93aa591331758139", "51b0ce84988e083d6253af098542f905e1fea0a8", "598848aaa4aa40bb6b7ab51490821a173cf18800", "cfd26a17234696593919df3f880a235d6ac5871d", "02ef09e439f8f268ffdec9887a9ac576180ca6c8", "0d2f693901fba451ede4d388724b0e3f57029cd3", "1f38c11fe8511c77fb7d383126214c9e7dc28e4a", "38720bc0d2c36c156bd2be9b472be6f83169e41a", "08a11b3b2850f422979e724c971dd8d2003180d2", "42cf18892910afd15b0d6872f16384a7bb6cf915", "2235aa3e3607a73cdd5c88eff4f7e6bedea6952d", "fe2f4faec5cf209ae7d8a73100db9cce46ce53d4", "54c4cfcc2914f7f976cd08c7acc5fc05cfe22f52", "0fd652356d1ecbc457504a17f594187ccab2fe76", "3957270267c2bba0ac00ab3c4461f0c47cfd95c3", "9a3c791067911d17a79918b1b0b5826beaeb2fe1", "a1b2b348e71569ca9b1663b045cae7c194b34044", "3208feae829cba6bd319421fe1fea58962da8fd9", "bdb430d06f3e134b2df8965dd62c77ac87710064", "311480d3924bbbebafa8e1fa86b662056a4ad532", "3c46e697b706390f463874aa0e5e990235401439", "834cdbde6e7800f9f50d4884858bb093fc3b65f6" ], "paperAbstract": "In this paper, we propose a browser fingerprinting technique that can track users not only within a single browser but also across different browsers on the same machine. Specifically, our approach utilizes many novel OS and hardware level features, such as those from graphics cards, CPU, and installed writing scripts. We extract these features by asking browsers to perform tasks that rely on corresponding OS and hardware functionalities. Our evaluation shows that our approach can successfully identify 99.24% of users as opposed to 90.84% for state of the art on single-browser fingerprinting against the same dataset. Further, our approach can achieve higher uniqueness rate than the only cross-browser approach in the literature with similar stability.", "pdfUrls": [ "http://www.yinzhicao.org/TrackingFree/crossbrowsertracking_NDSS17.pdf", "https://www.internetsociety.org/sites/default/files/ndss2017_02B-3_Cao_paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/cross-browser-fingerprinting-os-and-hardware-level-features/" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a155/264f143aafd380f40fd0167c9b7960f64ea2.pdf", "s2Url": "https://semanticscholar.org/paper/a155264f143aafd380f40fd0167c9b7960f64ea2", "sources": [ "DBLP" ], "title": "(Cross-)Browser Fingerprinting via OS and Hardware Level Features", "venue": "NDSS", "year": 2017 }, "a15839ce22c2a681b442caf965aace8fd9b4f11a": { "authors": [ { "ids": [ "2392899" ], "name": "Jennifer B. Sartor" }, { "ids": [ "1941512" ], "name": "Kristof Du Bois" }, { "ids": [ "2070187" ], "name": "Stijn Eyerman" }, { "ids": [ "1717133" ], "name": "Lieven Eeckhout" } ], "doi": "10.1109/ISPASS.2017.7975267", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975267", "entities": [ "Benchmark (computing)", "Delimiter", "Garbage collection (computer science)", "Initialization (programming)", "Java", "Memory management", "Multi-core processor", "Operating system", "Program comprehension", "Program optimization", "Run time (program lifecycle phase)", "Scalability", "Scheduling (computing)", "Speedup", "Thread (computing)", "Tracing garbage collection", "z/OS" ], "id": "a15839ce22c2a681b442caf965aace8fd9b4f11a", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "23-32", "journalVolume": "", "outCitations": [ "38928932df7520bb86d87d52b06e9702858ff5b9", "0a964585f4a3f9801c79a8a9f34907413ee6a4b5", "047ce7ad6d9d4f674e1775da86723c7e869e375b", "426b58973d650a5d76cae091be16e97fdbad198b", "24251f02c34f32b1dd96572a1d984c4463a26a10", "1fce9ad71fe8654fbb093735501ff713bb9ee735", "26039c7a845cd8712dedad4e4b02226f1296e84d", "84cb332f6e388f218357128793a57b42b296f1e6", "56917004d3cc3aca5668305fb33ddda4855dd519", "9c3857a69a7eec7777418627cfc87ad5e79572ca", "0bf0a5ba7045e7faab3546da103f0d69a5e91e72", "5128bfa2e95e165b3e70d0bc5062f190909079d2", "00a9ba0063d34ec56792849a67ef57b4601becbb", "b64158ca108facfaaf57e94d3a7458b6a4ce5aa8", "3fd85d5f5217b7df40e8fd6a8ef7d285fc4bb7e8", "4a4ff9f2bf4e993471f8b7fb82f048182c751892", "daa5538192e0058e12a83bd64fd19866c01adcf6", "0f11d823f6c2eb9cd327c858e7259047d14e5cf8", "87bf9c81ed472f79e067d59db244f7c8870735d6", "620bb971f3b71e8cc146e5a3748f090aaa03e839", "28076b479b83c83a98b8a8ff925b1e1446314b03" ], "paperAbstract": "Understanding the reasons why multi-threaded applications do not achieve perfect scaling on modern multicore hardware is challenging. Furthermore, more and more modern programs are written in managed languages, which have extra service threads (e.g., to perform memory management), which may retard scalability and complicate performance analysis. In this paper, we extend speedup stacks, a previously-presented visualization tool to analyze multi-threaded program scalability, to managed applications. Speedup stacks are comprehensive bar graphs that break down an application's execution to explain the main causes of sublinear speedup, i.e., when some threads are not allowing the application to progress, and thus increasing the execution time. We not only expand speedup stacks to analyze how the managed language's service threads affect overall scalability, but also implement speedup stacks while running on native hardware. We monitor the application and service threads' scheduling behavior using light-weight OS kernel modules, incurring under 1% overhead running unmodified Java benchmarks. We add two performance delimiters targeting managed applications: garbage collection and main initialization activities. We analyze the scalability limitations of these benchmarks and the impact of using both a stop-the-world and a concurrent garbage collector with speedup stacks. Our visualization tool facilitates the identification of scalability bottlenecks both between application threads and of service threads, pointing developers to whether optimization should be focused on the language runtime or the application. Speedup stacks provide better program understanding for both program and system designers, which can help optimize multicore processor performance.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975267" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a15839ce22c2a681b442caf965aace8fd9b4f11a", "sources": [ "DBLP" ], "title": "Analyzing the scalability of managed language applications with speedup stacks", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "a1635f941451d7d378c00b8188eeb06c15a9af5b": { "authors": [ { "ids": [ "2666202" ], "name": "Hari Sundar" }, { "ids": [ "1777719" ], "name": "Parmeshwar Khurd" } ], "doi": "10.1109/ICPP.2017.28", "doiUrl": "https://doi.org/10.1109/ICPP.2017.28", "entities": [ "Algorithm", "Computation", "Cray XK7", "Delaunay triangulation", "Image analysis", "Parallel algorithm", "Routing", "Scalability", "Supercomputer", "Titan (supercomputer)" ], "id": "a1635f941451d7d378c00b8188eeb06c15a9af5b", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "191-200", "journalVolume": "", "outCitations": [ "cdd2e4f1edc9409bdfe18bea2077d1d6a747aa21", "1991ec48e1ff4b273b96d3bf1f039c3b17b0a5c2", "c04ff62fd8366fa57fb9a039a52e590470066f43", "064da94eda53212c23db85261c9df55b01b9a9f1", "a6ba7bf4a5ac756ebafb54dad644adbc7e30b76e", "7ef754dae667c5a4c3ab8e51ffb66c7821e6cf4f", "1817b7392a8ca41a4c16311ee0efd7676fb05747", "bb99d9702431f286c5b324c4243edba8eabe5b36", "b4611aadfb3d08a85a220e2b061a2c22c03b6bbb", "949f0d4ea6d730f29aa11d42c061f3ddbd68888d", "49e998c509fc536628c7fc9d28ba55693b8e82a0", "445c01f4ecdf8559d150bb1aa656e800306e04a3", "5faf588c2cee6aa549a5d7c88b67366ae2a49691", "3329c9855e4743742062781d427a526df0a4a6b6", "7bcc53f1baf3358517a602d856192faea9442c91", "1d6e55cfb0a61ddd6cc326aeeac513f553e565c9", "65b7c62555d2c3f5763651bff19ec6bf040ef5db", "5aaf882f18185072b45c2bae0f8c1becea4d184d", "f88d65fedc709e006289d905915952db26f0c8c6", "f517454bea81d117ad9c9cecc68b21204aae881b", "67dd1137c7c1704cba8514431b24fd5e831ba4e7", "25a33619091b68865a8859c075c20d2a2da5fb3a", "299bbe1f4f9b38e0308c81fe20b357a8908405e6" ], "paperAbstract": "We present parallel algorithms for computing cycle orders and cycle perimeters in relative neighborhood graphs. This parallel algorithm has wide-ranging applications from microscopic to macroscopic domains, e.g., in histopathological image analysis and wireless network routing. Our algorithm consists of the following steps (sub-algorithms): (1) Uniform partitioning of the graph vertices across processes, (2) Parallel Delaunay triangulation and (3) Parallel computation of the relative neighborhood graph and the cycle orders and perimeters. We evaluated our algorithm on a large dataset with 6.5 Million points and demonstrate excellent fixed-size scalability. We also demonstrate excellent isogranular scalability up to 131K processes. Our largest run was on a dataset with 13 billion points on 131K processes on ORNL's Cray XK7 Titan supercomputer.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a1635f941451d7d378c00b8188eeb06c15a9af5b", "sources": [ "DBLP" ], "title": "Parallel Algorithms for the Computation of Cycles in Relative Neighborhood Graphs", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "a1a4af9e2d0f115a84a23887b8bb63537ce91bc7": { "authors": [ { "ids": [ "2390548" ], "name": "Brent Carmer" }, { "ids": [ "1720644" ], "name": "Alex J. Malozemoff" }, { "ids": [ "1702744" ], "name": "Mariana Raykova" } ], "doi": "10.1145/3133956.3133983", "doiUrl": "https://doi.org/10.1145/3133956.3133983", "entities": [ "64-bit computing", "Arithmetic circuit complexity", "Arithmetic logic unit", "Compiler", "Cryptography", "Digital rights management", "Encryption", "Experiment", "Functional encryption", "Mobile payment", "Obfuscation (software)", "Primitive recursive function", "Pseudorandom function family", "Pseudorandom number generator", "Universal instantiation" ], "id": "a1a4af9e2d0f115a84a23887b8bb63537ce91bc7", "inCitations": [ "6db9824d4667b22310c51fe638403238f873e9f2", "24231c0452dae6e8f91e14bc0decbffd1e96db65" ], "journalName": "", "journalPages": "747-764", "journalVolume": "", "outCitations": [ "197107b7ec65a623b59987cf7243921908068751", "1cf4f34f6060d1f398c8235679b97a2a989f7861", "1084e53f970e3296ae70b3fb8061e322107a2a3c", "3193029ccf35c55e494cc77c626152293aa27ff3", "fd4a86772418e95ea7c2568cb43893cde8a94a72", "94eca1a90226bf3b73ad63bc7993721c64815c81", "5c07dadf28f3cfdd67ab60a12d3ea9860bcd8b24", "8a37efc82e54353d387cfb073f9379c053988aef", "aa5276b19af47ed73fa1200d4fe6839b0229a276", "0003c342fd0b3e48a483901bd3b731b974fc1f37", "16672ac21ffc44665d2561682d4f65dc43984823", "0c2cbc336b113a81933dce0e45736316d647eabe", "8d501cb3aacc8a6350f17d7166a22867e0fcec3d", "1d8f6bd257f1433241aef52d5432eda6f6edb78e", "cc9223aca4d24b7d60c550b2527e25d676e2d28d", "069e80c561872ed678c1b2adf10d75ce66e4ff78", "0b87fcc3ae4653f3ad8a7c5bb6fe4b5923e0c021", "35308cb49c955cb827a398e35e9d25a71f2a28cb", "636c76dca24289ad9c47195bc4a574b6e780be9a", "3bb4b29c05db7c31eb68f3a7681f735fd7794051", "f36a8d4ea696a0d192a43c04158bae65bb472fb7", "2fb3c68ac20704fcda5b6ec91a3e166ec41f6c13", "02354c6603a58a5ccdc4ffa952782e6d296b4321", "504625252472061d7b0df7dfad63cfa6301262dc", "01a14a8c67335c1cfef255455126a85e53c0eeb6", "cf1351bf3cc7e9f5d61f793b784eca7bec7616fb", "0a7d54b4b43a9f2057b323c9e60be5e40cbaf26c", "b273f47f97fc3f1ed922c3effda9ab88c52a1680", "6ad3a228de26f3eccddb2123538d4362dcfeb302", "220afbdf387d4819564aa7ca6b141c2a2d2bb8f1", "0677d17466f47dc8ef5fb89221ff3007c6196c33", "2f7c97e82641e4f0f8c7b508b75af4952b0cc07d", "1c1ee7b39616c52e96d91e243dc8996cfed11027", "7fd4476dad0bed0e9fa07a11c1a3ebd6bda31787", "66385752e5ed336b2e247ef473f9077898426b60", "6e6dc4e2534146c276d3fc31732a5361ef1458da" ], "paperAbstract": "Program obfuscation is a powerful security primitive with many applications. White-box cryptography studies a particular subset of program obfuscation targeting keyed pseudorandom functions (PRFs), a core component of systems such as mobile payment and digital rights management. Although the white-box obfuscators currently used in practice do not come with security proofs and are thus routinely broken, recent years have seen an explosion of cryptographic techniques for obfuscation, with the goal of avoiding this build-and-break cycle.\n In this work, we explore in detail cryptographic program obfuscation and the related primitive of multi-input functional encryption (MIFE). In particular, we extend the 5Gen framework (CCS 2016) to support circuit-based MIFE and program obfuscation, implementing both existing and new constructions. We then evaluate and compare the efficiency of these constructions in the context of PRF obfuscation.\n As part of this work we (1) introduce a novel instantiation of MIFE that works directly on functions represented as arithmetic circuits, (2) use a known transformation from MIFE to obfuscation to give us an obfuscator that performs better than all prior constructions, and (3) develop a compiler for generating circuits optimized for our schemes. Finally, we provide detailed experiments, demonstrating, among other things, the ability to obfuscate a PRF with a 64-bit key and 12 bits of input (containing 62k gates) in under 4 hours, with evaluation taking around 1 hour. This is by far the most complex function obfuscated to date.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133983", "http://eprint.iacr.org/2017/826", "https://eprint.iacr.org/2017/826.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a1a4af9e2d0f115a84a23887b8bb63537ce91bc7", "sources": [ "DBLP" ], "title": "5Gen-C: Multi-input Functional Encryption and Program Obfuscation for Arithmetic Circuits", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "a1a68124a923bf681415998ff69e9e73e3295ad8": { "authors": [ { "ids": [ "39662889" ], "name": "Antonio Mallia" }, { "ids": [ "3230478" ], "name": "Giuseppe Ottaviano" }, { "ids": [ "22269520" ], "name": "Elia Porciani" }, { "ids": [ "2783910" ], "name": "Nicola Tonellotto" }, { "ids": [ "1797933" ], "name": "Rossano Venturini" } ], "doi": "10.1145/3077136.3080780", "doiUrl": "https://doi.org/10.1145/3077136.3080780", "entities": [ "Algorithm", "Approximation algorithm", "Compressed data structure", "Data compression", "Data structure", "Inverted index", "Optimization problem", "Program optimization", "Provable prime", "Web search engine" ], "id": "a1a68124a923bf681415998ff69e9e73e3295ad8", "inCitations": [ "6e2ecfde192df2d3e9f25f858776a7336965b03d" ], "journalName": "", "journalPages": "625-634", "journalVolume": "", "outCitations": [ "237f8ae5cd9dd51cb7f09d1ea09c5cbd2fbfb084", "738f8c4daa3090ed0c08e8b012014f757be8f168", "21c6caa930b2e76fe70e427c77a3e0a1b86c2514", "c463f666033883327eb43c6df504173dd6d950b3", "48fe4acbe59e450e3a86cf709c217493b743a87a", "aef1f8de55aa3db14bffc119e6529c4ed6886167", "20548990990c447ab54a3ecba82af2b5443a01d6", "fc3272302461b74217662085a8a05a5e500dbf05", "91a353974741cdcac274f8dfeabde87430fbc05b", "09d75defdd4c9632e3f8a569d8193c51ea66ca8e", "1719d5d60933883ce21c822a6f94bf33ef6066fa", "393d1f3b81410536944dff9514a2b787094e846d", "dc75b2811d2cd01b30fa850351aae2e0fa092cfe", "040678daf6a49a88345ee0c680fccfd134f24d4b", "a50970770c8804febb8755f65787a3a0411174f9", "0221b18d99c15f8e045f8d42653a3a3fee9f44f2", "66713fbcb8d5e48a9eb6425bd7fdbb53751e60b1", "16a66728e5550a9c1b1127da3b429c66a25540d1", "b648925030b450502ca5fa956561c243a7d01ba9", "04e2d423b03334d0a91b551016ed8e84c228fd5a", "d4ace73d54ffca372d79136c9811728704a5e74d", "7e39821fc23fcb78108995839994e4ab0442142e", "7321b709e5489f59f1124ca2f41ba17cdbcc1ad9", "0a717685158723040c56b42d67fc8988fab9fc73", "89d27fc4c5bf15762d001a39f0a74f84c89d3681", "c458b2718b1cbcb94794432632df8aa03becda3d" ], "paperAbstract": "Query processing is one of the main bottlenecks in large-scale search engines. Retrieving the top k most relevant documents for a given query can be extremely expensive, as it involves scoring large amounts of documents. Several dynamic pruning techniques have been introduced in the literature to tackle this problem, such as BlockMaxWAND, which splits the inverted index into constant- sized blocks and stores the maximum document-term scores per block; this information can be used during query execution to safely skip low-score documents, producing many-fold speedups over exhaustive methods.\n We introduce a refinement for BlockMaxWAND that uses variable- sized blocks, rather than constant-sized. We set up the problem of deciding the block partitioning as an optimization problem which maximizes how accurately the block upper bounds represent the underlying scores, and describe an efficient algorithm to find an approximate solution, with provable approximation guarantees. rough an extensive experimental analysis we show that our method significantly outperforms the state of the art roughly by a factor 2×. We also introduce a compressed data structure to represent the additional block information, providing a compression ratio of roughly 50%, while incurring only a small speed degradation, no more than 10% with respect to its uncompressed counterpart.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080780" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a1a68124a923bf681415998ff69e9e73e3295ad8", "sources": [ "DBLP" ], "title": "Faster BlockMax WAND with Variable-sized Blocks", "venue": "SIGIR", "year": 2017 }, "a1ae1215793bbec1f98c94639224002333e93b25": { "authors": [ { "ids": [ "17830134" ], "name": "Qinheping Hu" }, { "ids": [ "1806462" ], "name": "Loris D'Antoni" } ], "doi": "10.1145/3062341.3062345", "doiUrl": "https://doi.org/10.1145/3062341.3062345", "entities": [ "Algorithm", "Binary decoder", "Encoder", "Model checking", "Program synthesis", "Transducer", "Undecidable problem" ], "id": "a1ae1215793bbec1f98c94639224002333e93b25", "inCitations": [ "ab5a3ee384c0dfebe9c6d6ec946f402afb3f3474" ], "journalName": "", "journalPages": "376-389", "journalVolume": "", "outCitations": [ "443402448370bf0397bacb6fd35ca5eaa47042b8", "2c552f7d28e1d113554dc4f9289d9ef25c094ae9", "39d09723b83a7e647b4a188ad1e935254f409b40", "0f384b45ee96f84a20783fbbe0c11c942ba1073b", "1da09f4ed6a83640d21401d24e9c58bef0c90b16", "19096e10c13d16dc0afd5bedcd80cb3afc4b671d", "d235dd24f805c92188df5c860bc2a1d4863ca402", "507e8e8b48f6bce4c136dec692b27bba3b9da640", "bb9486a1e6c430ccac918bad6087729d4bcb1502", "5bc3a89d6b8c1daf3d5598356c0f89b991ef5b36", "3f259f7bab6ebce1071e1aafb808a8948907eed2", "5e12c7b1be9c7d8cad5532695ba4e02a6934ab23", "06f4a1d4b7347ebf53bd8c6436d379ac810c0ed8", "0135c9de75c3ff60182c6d9f6cbe39c293c18cab", "478794d7c6e709497ae3f36bba055a5269ecb93b", "2781b6984d3d5711d74acd2697807c18e52e96a7", "4fa58f063b9c77115a387b5556117acea39523e8", "037a4f9b65e5a5751318ccf80e33a2b55113020a", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "bffc85055ebdd10fb5bc7a15a1cb22757ab2e9b6", "2e7c2ad71b88156fb23f1d097beeda22549fb620", "6dd474a2ce9bc35c9d7518c981715bd394a9cd86", "5d8ce28c6c305b84345c4ba5370b9753109bcaad", "5b92d54f7fb2f28ed1c183fa9592ef79f8aef9f5" ], "paperAbstract": "We propose a fully-automated technique for inverting functional programs that operate over lists such as string encoders and decoders. We consider programs that can be modeled using symbolic extended finite transducers (), an expressive model that can describe complex list-manipulating programs while retaining several decidable properties. Concretely, given a program P expressed as an , we propose techniques for: (1) checking whether P is injective and, if that is the case, (2) building an Pâ\u0088\u00921 describing its inverse. We first show that it is undecidable to check whether an is injective and propose an algorithm for checking injectivity for a restricted, but a practical class of . We then propose an algorithm for inverting based on the following idea: if an is injective, inverting it amounts to inverting all its individual transitions. We leverage recent advances program synthesis and show that the transition inversion problem can be expressed as an instance of the syntax-guided synthesis framework. Finally, we implement the proposed techniques in a tool called and show that can invert 13 out 14 real complex string encoders and decoders, producing inverse programs that are substantially identical to manually written ones.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062345", "http://pages.cs.wisc.edu/~loris/papers/pldi17inversion.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a1ae1215793bbec1f98c94639224002333e93b25", "sources": [ "DBLP" ], "title": "Automatic program inversion using symbolic transducers", "venue": "PLDI", "year": 2017 }, "a210e2ebc38857177b5180787f5cda62e3bd6ef4": { "authors": [ { "ids": [ "2295608" ], "name": "Yang Wang" }, { "ids": [ "20650555" ], "name": "Wuji Chen" }, { "ids": [ "1698907" ], "name": "Wei Zheng" }, { "ids": [ "36766903" ], "name": "He Huang" }, { "ids": [ "2151884" ], "name": "Wen Zhang" }, { "ids": [ "2661589" ], "name": "Hengchang Liu" } ], "doi": "10.1109/ICDM.2017.59", "doiUrl": "https://doi.org/10.1109/ICDM.2017.59", "entities": [ "Closed-circuit television", "GPS navigation device", "Recursion (computer science)", "Sampling (signal processing)", "Sparse matrix", "Upload", "Vehicle tracking system", "Video clip" ], "id": "a210e2ebc38857177b5180787f5cda62e3bd6ef4", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "495-504", "journalVolume": "", "outCitations": [ "44c2a6cb800676e42d4f137b4058ab05420d92d8", "52909610fec7764573e8ed0b63e1d283c6900ad0", "9a3510be1c94436128b66780ff6d8b43365732fa", "dbe217b10bdb05d0e6e218fb64519ea3df28d4ca", "fb8a881364dbc61c4c4ea0400fade9ad9728db1b", "b340e9afc0737cfe5265606a4e28d0e1a0a64e82", "0e80a2bdc44f4b89747ad93709b52a611dbd5bbe", "7634d98b9d288ed555a2fc81a3b12d2adc5866ed", "4aad1756e88dba86399a75891895e00b160f5460", "45a33bddf460554b7c1f550aa382d63345a20704", "3764717e9e2ed08d2a9f8d096adf2730cfba3902", "4451dbe084bc21eed915b183df8181b6f381f760", "0d5189f97a83623a88839796a94a9ebd9bd9fb73", "5ce04fb478a8643994e1b869cca72aac31172ccf" ], "paperAbstract": "Due to the sparse distribution of road video surveillance cameras, precise trajectory tracking for hit-and-run vehicles remains a challenging task. Previous research on vehicle trajectory recovery mostly focuses on recovering trajectory with low-sampling-rate GPS coordinates by retrieving road traffic flow patterns from collected GPS information. However, to the best of our knowledge, none of them considered using on-road taxicabs as mobile video surveillance cameras as well as the time-varying characteristics of vehicle traveling and road traffic flow patterns, therefore not suitable for recovering trajectories of hit-and-run vehicles. With this insight, we model the travel time-cost of a road segment during various time periods precisely with LNDs (Logarithmic Normal Distributions), then use LSNDs (Log Skew Normal Distributions) to approximate the time-cost of an urban trip during various time periods. We propose a novel approach to calculate possible location and time distribution of the hit-and-run vehicle in parallel, select the optimal taxicab to verify the distribution by uploading and checking video clips of this taxicab, finally refine the restoring trajectory in a parallel and recursive manner. We evaluate our solution on real-world taxicab and road surveillance system datasets. Experimental results demonstrate that our approach outperforms alternative solutions in terms of accuracy ratio of vehicle tracking.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.59" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a210e2ebc38857177b5180787f5cda62e3bd6ef4", "sources": [ "DBLP" ], "title": "Tracking Hit-and-Run Vehicle with Sparse Video Surveillance Cameras and Mobile Taxicabs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "a21fd20aac6c75284e210c6d623407832d1ba7b1": { "authors": [ { "ids": [ "2613133" ], "name": "Kenichi Yasukata" }, { "ids": [ "2963590" ], "name": "Felipe Huici" }, { "ids": [ "2390550" ], "name": "Vincenzo Maffione" }, { "ids": [ "34588217" ], "name": "Giuseppe Lettieri" }, { "ids": [ "1895414" ], "name": "Michio Honda" } ], "doi": "10.1145/3127479.3127489", "doiUrl": "https://doi.org/10.1145/3127479.3127489", "entities": [ "Central processing unit", "Hypervisor", "Network function virtualization", "New Foundations", "No Silver Bullet", "Server (computing)", "Software deployment", "Throughput", "Vendor lock-in" ], "id": "a21fd20aac6c75284e210c6d623407832d1ba7b1", "inCitations": [], "journalName": "", "journalPages": "157-169", "journalVolume": "", "outCitations": [ "5476797b6be75b27b7e2780a6cd61dab3e3acf87", "0acdc80c65ad254cd4eba59d4df1bacbadc08a5d", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "067c7857753e21e7317b556c86e30be60aa7cac0", "860e931383fbc92f635644bc7abc3f395ba34179", "04f6a5dc6c2aac0586f8f1e83b434ea96fffcd66", "4c2be7d70e8e521e6e845dfe1a4dfc22f60af7b7", "11408f01e2c7c2395ccfa372c559686dec241545", "117c8dca0918376176e7bc8c0432103ed8e9c34f", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "42d1b52254873ecd0f36eb7342f95dbad9c50187", "87064d58ef49df1b47c4ac74258fda1aecab2b68", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "ec448ceb3e05b9222113366dace9fdd2a62322de", "0852a44c86db434e9b51c67704636791e9940487", "fb881b01f93f16d3912bb2dd2ce74dfb18c00d74", "7932a4597cec5149c575aa2303fe8f12241e4320", "2960c89331eb7afa86584792e2e11dbf6a125820", "4728bda27d89d524f0751ef0dddb5da0bffe0826", "1e1449da1472b6f0b9cb8ccd8c576bc47f9df348", "c4f94368cab4575431ca56645ab4688bc907128b", "85f9776a1ebee3e6cbe2ab3b11c370ecdcadf98d", "44d666999ca078e0fce6b5f2642a1c3e72ac87a1", "537efab803156da5b16885ad32270a5ebce16e6e", "29c324788b83463aa707784210edbca894694f20", "60212872aa40b660de117ba751542988bcfc406d", "d0ffa9adf6219a4afbbe388ce0c7102d2cae5eb8", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "303fab4117468e84d10f426ab3b3e6c92da1159e", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "16c6f58d9514a2360908a4aa2339079e1b0c7dd1", "2cad33b44ce1c51a47f25f199f5b90ad927fc010", "424bc408f86bada47693a2fb45369cdfaf5fbdb4", "56a42c71388e80f68efd350afe38175510aaa915", "163247e7ed8db43c9529d85c384d8843e22a136b" ], "paperAbstract": "Network Function Virtualization has been touted as the silver bullet for tackling a number of operator problems, including vendor lock-in, fast deployment of new functionality, converged management, and lower expenditure since packet processing runs on inexpensive commodity servers. The reality, however, is that, in practice, it has proved hard to achieve the stable, predictable performance provided by hardware middleboxes, and so operators have essentially resorted to throwing money at the problem, deploying highly underutilized servers (e.g., one NF per CPU core) in order to guarantee high performance during peak periods and meet SLAs.\n In this work we introduce HyperNF, a high performance NFV framework aimed at maximizing server performance when concurrently running large numbers of NFs. To achieve this, HyperNF implements hypercall-based virtual I/O, placing packet forwarding logic inside the hypervisor to significantly reduce I/O synchronization overheads. HyperNF improves throughput by 10%-73% depending on the NF, is able to closely match resource allocation specifications (with deviations of only 3.5%), and to efficiently cope with changing traffic loads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127489" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a21fd20aac6c75284e210c6d623407832d1ba7b1", "sources": [ "DBLP" ], "title": "HyperNF: building a high performance, high utilization and fair NFV platform", "venue": "SoCC", "year": 2017 }, "a22b9b10712ec321e63ec1a93ca65e3772b5e907": { "authors": [ { "ids": [ "3097667" ], "name": "Ghada Dessouky" }, { "ids": [ "3018662" ], "name": "Farinaz Koushanfar" }, { "ids": [ "8415280" ], "name": "Ahmad-Reza Sadeghi" }, { "ids": [ "37636206" ], "name": "Thomas Schneider" }, { "ids": [ "2811044" ], "name": "Shaza Zeitouni" }, { "ids": [ "1744880" ], "name": "Michael Zohner" } ], "doi": "", "doiUrl": "", "entities": [ "Computation", "Lookup table", "Secure multi-party computation" ], "id": "a22b9b10712ec321e63ec1a93ca65e3772b5e907", "inCitations": [ "369756d09b28a70979483f5d786c35ec336c3b45", "ddda84420c2a5391c95de096fd7504a0aec5edf7" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/pushing-communication-barrier-secure-computation-using-lookup-tables/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a22b9b10712ec321e63ec1a93ca65e3772b5e907", "sources": [ "DBLP" ], "title": "Pushing the Communication Barrier in Secure Computation using Lookup Tables", "venue": "NDSS", "year": 2017 }, "a22e10038ad6146c93595122120cb3396044087b": { "authors": [ { "ids": [ "40437828" ], "name": "Grisha Weintraub" }, { "ids": [ "1795510" ], "name": "Ehud Gudes" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "Cloud computing", "Cloud database", "Crowdsourcing", "Data integrity", "Database", "High availability", "Programming paradigm", "Scalability", "Usability" ], "id": "a22e10038ad6146c93595122120cb3396044087b", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "498-503", "journalVolume": "", "outCitations": [ "447333c07b91dacf26a786afc0fd6db26e9e2f1a", "2e2ba3ed517ab712b1c3ac399c167f72ad23ba04", "34e87a802ec9f5daa9de523c4f41d59a9ea70661", "395d93ea7d00cb3655f3346afadef98ff38d636f", "51d6588ff7c1994f035a5a3be8d2e8ca62b78f22", "6cf8ec34a008031b018c8a3a4640a87f476d0925", "1f527fed31971e07093695c128c10b4f3c20d109", "33736e956a5c4703fb5f215bd3ad686eeeedf2de", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "58efda5a28e5791adfde9ef6e330caf7b89349c6", "9aa0d7253574e50fe3a190ccd924433f048997dd", "2bc188270ba4b54f04baa882130bddaba75f98a6", "3d48f3bc44b34a4d5977cebef014ae126582515b", "b26de63ff444be172e48c05aae7dd01e1e975c91", "1be4238ce7d2e12137c40ac7b7a2f15e9b09cdad", "48cb29ced65a4121989a6fc91423f1f32aae18f8", "9eb446e0cc04bcbcec4cf29d9bf0f80f52fd5369", "007520b26bc1d71911ce613b9de83d8b2bc47c5d", "4cbaac7455ac02f2b5d4d266d3dc6788ee56cc83", "096d66a3ecfefb24ff8aece012dfd706615a5d3a", "67f243b7946c389f1e851cff3d4bd5810337f07c" ], "paperAbstract": "Thanks to their high availability, scalability, and usability, cloud databases have become one of the dominant cloud services. However, since cloud users do not physically possess their data, data integrity may be at risk. In this paper, we present a novel protocol that utilizes crowdsourcing paradigm to provide practical data integrity assurance in key-value cloud databases. The main advantage of our protocol over previous work is its high applicability - as opposed to existing approaches, our scheme does not require any system changes on the cloud side and thus can be applied directly to any existing system. We demonstrate the feasibility of our scheme by a prototype implementation and its evaluation.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101179" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a22e10038ad6146c93595122120cb3396044087b", "sources": [ "DBLP" ], "title": "Crowdsourced Data Integrity Verification for Key-Value Stores in the Cloud", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "a22e548e8b13bf117434398aa26f17441f30df13": { "authors": [ { "ids": [ "9774630" ], "name": "Anubhavnidhi Abhashkumar" }, { "ids": [ "2103536" ], "name": "Joon-Myung Kang" }, { "ids": [ "1733676" ], "name": "Sujata Banerjee" }, { "ids": [ "1713535" ], "name": "Aditya Akella" }, { "ids": [ "1720117" ], "name": "Ying Zhang" }, { "ids": [ "1752668" ], "name": "Wenfei Wu" } ], "doi": "10.1145/3143361.3143380", "doiUrl": "https://doi.org/10.1145/3143361.3143380", "entities": [ "Access control", "Access control list", "Algorithm", "Electron mobility", "Experiment", "Heuristic", "High-level programming language", "Janus Recognition Toolkit (JRTk)", "Network topology", "Optimization problem", "Program optimization", "Quality of service", "Reachability" ], "id": "a22e548e8b13bf117434398aa26f17441f30df13", "inCitations": [], "journalName": "", "journalPages": "296-309", "journalVolume": "", "outCitations": [ "6ae27ff737c6379298edf6aa069df1e2565feb7c", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "2827d635444c744217d3f292b6ed0d750bc7c6fa", "4360de181b891b93b8d6dca3006389a3a6f05793", "4afcc064027d1cb8f837a8b30c52555e1702f74d", "4ff670483f43c4f0036960a5dd5c109f82d556ec", "7ed8dd92f4a174b630836700cf12d0adebd5c708", "39b242891e7eb2dbac63bd1eec858b5db71225ac", "0be133617dfb5fe8fe35cf7cdfb7c2f0c3e672cd", "056f1d66700d33f5e95de5cb571deb28a1706aef", "eccbc8b66c51c8f6bfc0196ee8dc0b475d3ba1b5", "603a54c6c715851482ebc15090ee0e0b99b8f6d9", "622cd2bc2e93a49be7008e3a14a4978212a3b16e", "00a9888c49e640897e458d0857acf07b83df4abd", "93096ce5920f4cf8bc406456e3fd6ee88b2b428e", "0d7ecd90182b2d28433209897650427c5de10bb6", "47d5357957cabb610131db1b228e58b70860ee8d", "089b10645ee63cd9c5bb4ab661141dd813408e15", "9bba824cb7eb7b5b5fc19353468dc0e62878bd89", "19cb1104626b3d8bb75a781925542ed9dc1db224", "0abc01db8648fa379369432d32638b47eddb9b70", "0235c2edb213160404f48384d5ea2353ce6fbf00", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "516cf9cc4d0886e1cd91832230a2d7645426a3ec", "30a7bba8d47d7eca9f7826a721e62032a5c8e77a", "16b0a02e2db3e3beb3dd0ce83e610549f271c9f1", "05380d21cd612184e89e82005c9392ac16554352", "6408a141436b1511883b3747551c638798a9af53", "2d1c0b3b79d618bb50e111360fc4211f6f262bcb", "17059e939aa051d7db57f4af959b2af21fa3dd18", "6c2b5f6bea40a0a10a712be80e17247978fcb72b", "92aab4a0d77c53b06e1dc9a5ddf5b0a5d9998b77", "07ca726af9c235573654b85e8d478bd7303aa62f", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "49f3023cd6ff7356c741328ee64d5e74c8de9a64", "050319a3aff4d911fca7e3ee63ffc1a99a0b1595", "3c92e739842e82bf03d32b1e7820c2c85f386d6f", "1ee9bd0b45d00c62ce7f22b6883f80a7fe759cdf", "1d706d2adf2a66f956e37b363be2c19c8aad79fe", "eea31ad577018a6f15edda67f6a14bca18b666a6", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "0c0481e79a1078d69b6d37bca50e9313aa223ccc", "52ece1e929758e9d282e818e8e9985f88570f2dd", "22630a79f1c50603c1356f6ac9dc8524a18d4061", "69af536f38f32a985f228f4ed8948f7a8442b1ae", "58099048c6dd8b6a7dcfac0855deb546e50024b3", "94546cd187a5818811f6efec14c1360ad41cdd9d", "1121085a1556152558df3297bab860afcbacf185" ], "paperAbstract": "Existing network policy abstractions handle basic group based reachability and access control list based security policies. However, QoS policies as well as dynamic policies are also important and not representing them in the high level policy abstraction poses serious limitations. At the same time, efficiently configuring and composing group based QoS and dynamic policies present significant technical challenges, such as (a) maintaining group granularity during configuration, (b) dealing with network-bandwidth contention among policies from distinct writers and (c) dealing with multiple path changes corresponding to dynamically changing policies, group membership and end-point mobility. In this paper we propose Janus, a system which makes two major contributions. First, we extend the prior policy graph abstraction model to represent complex QoS and dynamic tateful/temporal policies. Second, we convert the policy configuration problem into an optimization problem with the goal of maximizing the number of satisfied and configured policies, and minimizing the number of path changes under dynamic environments. To solve this, Janus presents several novel heuristic algorithms. We evaluate our system using a diverse set of bandwidth policies and network topologies. Our experiments demonstrate that Janus can achieve near-optimal solutions in a reasonable amount of time.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143380" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a22e548e8b13bf117434398aa26f17441f30df13", "sources": [ "DBLP" ], "title": "Supporting Diverse Dynamic Intent-based Policies using Janus", "venue": "CoNEXT", "year": 2017 }, "a22eb623224133c5966aadc2b11d9618d8270b3b": { "authors": [ { "ids": [ "33117655" ], "name": "Junjie Qian" }, { "ids": [ "9280383" ], "name": "Hong Jiang" }, { "ids": [ "2478252" ], "name": "Witawas Srisa-an" }, { "ids": [ "1787028" ], "name": "Sharad C. Seth" }, { "ids": [ "2063309" ], "name": "Stan Skelton" }, { "ids": [ "2722260" ], "name": "Joseph Moore" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Central processing unit", "Computer-aided design", "Echo state network", "Linux", "Non-volatile memory", "PCI Express", "Parallel I/O", "Profiling (information science)", "Scheduling (computing)", "Throughput", "Volatile memory" ], "id": "a22eb623224133c5966aadc2b11d9618d8270b3b", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "569-578", "journalVolume": "", "outCitations": [ "01c08039d1cb1ea58a1b8380c7bd38ca045a093d", "adc4cda0bdbf38641da4e1c711260ff58e094a91", "1332c7cea4e586d064f1ab3b908d9d438cdbdaf0", "500bdbe7c083020bc56a596c2f987d9ed6213ec5", "d582f634687f20e5343cd81ffbb92c4009b2b10f", "2f0614c9a3b78b38eebf004288d842cdcc74ad50", "1a0af07c26d30548f2bd40c769f3961547a78179", "613b99a70e418550fcffd07c3df1a24d4884a121", "3eb7f80bc2bef135b236ab741d1582e4d2e7a050", "a7ec195ea9d26ada0a4e0ba8a53c4c8d7e32a5cd", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "29f97d7142302d7ca5c876a396fefb154549013d", "854e5edb79631f5c7fdb5d477afaafdb9c76203a", "18cc8c1902a5c0ae35c75d9cc647c04a679e520d", "83fcaf1952e367733842ca06031b69b23e5c81a7", "a1d4e106e2981ce33aea7658667ea199ecf54907", "3d2dfe972be7a60937df97bd309b423726375cb4", "388587b903aaf56791e786522246883aeaf89892", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "377175d109126aea51714e8ef0e4324d28eb6fcc", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "3fc93257ac94aa8d6505c19077058e68622345b6", "261b6bd9fe783e7f34e4a752ce92568d513761f2", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633" ], "paperAbstract": "Non-volatile memory express (NVMe) based SSDs and the NUMA platform are widely adopted in servers to achieve faster storage speed and more powerful processing capability. As of now, very little research has been conducted to investigate the performance and energy efficiency of the state-of-the-art NUMA architecture integrated with NVMe SSDs, an emerging technology used to host parallel I/O threads. As this technology continues to be widely developed and adopted, we need to understand the runtime behaviors of such systems in order to design software runtime systems that deliver optimal performance while consuming only the necessary amount of energy. This paper characterizes the runtime behaviors of a Linux-based NUMA system employing multiple NVMe SSDs. Our comprehensive performance and energy-efficiency study using massive numbers of parallel I/O threads shows that the penalty due to CPU contention is much smaller than that due to remote access of NVMe SSDs. Based on this insight, we develop a dynamic "lesser evil" algorithm called ESN, to minimize the impact of these two types of penalties. ESN is an energy-efficient profiling-based I/O thread scheduler for managing I/O threads accessing NVMe SSDs on NUMA systems. Our empirical evaluation shows that ESN can achieve optimal I/O throughput and latency while consuming up to 50% less energy and using fewer CPUs.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101189" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a22eb623224133c5966aadc2b11d9618d8270b3b", "sources": [ "DBLP" ], "title": "Energy-Efficient I/O Thread Schedulers for NVMe SSDs on NUMA", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "a23094f0ec3ea80481cbeb9484829a7fa8173d6c": { "authors": [ { "ids": [ "2336901" ], "name": "Joshua Lind" }, { "ids": [ "36724905" ], "name": "Christian Priebe" }, { "ids": [ "2213059" ], "name": "Divya Muthukumaran" }, { "ids": [ "39533740" ], "name": "Dan O'Keeffe" }, { "ids": [ "3102052" ], "name": "Pierre-Louis Aublin" }, { "ids": [ "2625265" ], "name": "Florian Kelbert" }, { "ids": [ "7147879" ], "name": "Tobias Reiher" }, { "ids": [ "7243998" ], "name": "David Goltzsche" }, { "ids": [ "1784358" ], "name": "David M. Eyers" }, { "ids": [ "1740458" ], "name": "R\u00fcdiger Kapitza" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" }, { "ids": [ "1809586" ], "name": "Peter R. Pietzuch" } ], "doi": "", "doiUrl": "", "entities": [ "Backward compatibility", "Bitcoin", "Central processing unit", "Confidentiality", "Cryptography", "Data integrity", "Data-flow analysis", "Dataflow", "LibreSSL", "Memcached", "Task Control Block", "Trusted Computing", "Trusted computing base" ], "id": "a23094f0ec3ea80481cbeb9484829a7fa8173d6c", "inCitations": [ "5bad4ddf00f7a0fd082d335fff346f2ab11d021c", "34fe0c6e91d2a6a2325f5057222c3fbf22224fe5", "33ae35cc24ef4303979b479671c2065256e1b3a7", "287da0ab3c169c41433b0e5504161dfd1afbfa6c", "7cfebf75c82fdf08d21ea29751a39e6d2291b2ca", "334ec6e57110ece9f482f9ec2e85412b0be8072a" ], "journalName": "", "journalPages": "285-298", "journalVolume": "", "outCitations": [ "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "2cef6527284b58e5820748e72191cc4b9d6e1112", "58cc45cea54540051a58a76c7d334da54d877f6a", "141dcba8c52100e4ce036beda59e36e4570bdf52", "8920b02f86372a3af13a10ca57243411a3c8792a", "5940b5a888378b6c40ad43b09c5d85a32846cbd6", "d3f7fa7419306da789d32b12e0c6a622651d0dea", "2194c3460ab71f3826db00b045b2ae590c753319", "08832863bc3f041222f381c8ae143f8a66449059", "12e557e882613b9dd240776e7486d067463f977f", "11c6fddeff9e2f95c8cf238ea9f12f8ffae7cf8c", "9aa0d7253574e50fe3a190ccd924433f048997dd", "34526b1e2d616b95c4eab774c9857090b991ffae", "0183d8c6623aaf106a27db72ecec9bb9704ab98c", "88ef8274eade7517b520f3315ed6b07ea0b7dd7a", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "abf20d55fe1d05b41006a565fc152157bd06933f" ], "paperAbstract": "Trusted execution support in modern CPUs, as offered by Intel SGX enclaves, can protect applications in untrusted environments. While prior work has shown that legacy applications can run in their entirety inside enclaves, this results in a large trusted computing base (TCB). Instead, we explore an approach in which we partition an application and use an enclave to protect only security-sensitive data and functions, thus obtaining a smaller TCB. We describe Glamdring, the first source-level partitioning framework that secures applications written in C using Intel SGX. A developer first annotates securitysensitive application data. Glamdring then automatically partitions the application into untrusted and enclave parts: (i) to preserve data confidentiality, Glamdring uses dataflow analysis to identify functions that may be exposed to sensitive data; (ii) for data integrity, it uses backward slicing to identify functions that may affect sensitive data. Glamdring then places security-sensitive functions inside the enclave, and adds runtime checks and cryptographic operations at the enclave boundary to protect it from attack. Our evaluation of Glamdring with the Memcached store, the LibreSSL library, and the Digital Bitbox bitcoin wallet shows that it achieves small TCB sizes and has acceptable performance overheads.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/lind", "https://www.usenix.org/system/files/conference/atc17/atc17-lind.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a230/94f0ec3ea80481cbeb9484829a7fa8173d6c.pdf", "s2Url": "https://semanticscholar.org/paper/a23094f0ec3ea80481cbeb9484829a7fa8173d6c", "sources": [ "DBLP" ], "title": "Glamdring: Automatic Application Partitioning for Intel SGX", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "a2c11997c1db4ce0551a7b86876cd4829037914c": { "authors": [ { "ids": [ "2368358" ], "name": "Georgios Chatzopoulos" }, { "ids": [ "1727558" ], "name": "Rachid Guerraoui" }, { "ids": [ "1695968" ], "name": "Timothy L. Harris" }, { "ids": [ "1989618" ], "name": "Vasileios Trigonakis" } ], "doi": "10.1145/3064176.3064194", "doiUrl": "https://doi.org/10.1145/3064176.3064194", "entities": [ "Algorithm", "Backoff", "Cache coherence", "High- and low-level", "Locality of reference", "Lock (computer science)", "MapReduce", "Merge sort", "Multi-core processor", "OpenMP", "Performance Rating", "Program optimization" ], "id": "a2c11997c1db4ce0551a7b86876cd4829037914c", "inCitations": [], "journalName": "", "journalPages": "544-559", "journalVolume": "", "outCitations": [ "f8e9b050c93af6dea582563f61b6460b590bc3af", "e7dd4cd96fa2c2588221e95de672d774ca43d7e1", "10f1faeec4ee2158b8535b249a20de5419998153" ], "paperAbstract": "Portability and efficiency are usually antagonists in multi-core computing. In order to develop efficient code, one needs to take into account the topology of the target multi-cores (e.g., for locality). This clearly hampers code portability. In this paper, we show that you can have the cake and eat it too.\n We introduce MCTOP, an abstraction of multi-core topologies augmented with important low-level hardware information, such as memory bandwidths and communication latencies. We show how to automatically generate MCTOP using libmctop, our library that leverages the determinism of cache-coherence protocols to infer the topology of multi-cores using only latency measurements.\n MCTOP enables developers to accurately and portably define high-level performance optimization policies. We illustrate several such policies through four examples: (i-ii) thread placement in OpenMP and in a MapReduce library, (iii) a topology-aware mergesort algorithm, as well as (iv) automatic backoff schemes for locks. We illustrate the portability of these optimizations on five processors from Intel, AMD, and Oracle, with low effort.", "pdfUrls": [ "https://infoscience.epfl.ch/record/227458/files/MCTOP_EuroSys17.pdf", "http://doi.acm.org/10.1145/3064176.3064194", "https://timharris.uk/papers/2017-eurosys-mctop.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a2c11997c1db4ce0551a7b86876cd4829037914c", "sources": [ "DBLP" ], "title": "Abstracting Multi-Core Topologies with MCTOP", "venue": "EuroSys", "year": 2017 }, "a3121498f5e8dc81a9209a03f13759fed67e8aa4": { "authors": [ { "ids": [ "40351860" ], "name": "Bo Tang" }, { "ids": [ "1753946" ], "name": "Kyriakos Mouratidis" }, { "ids": [ "1722082" ], "name": "Man Lung Yiu" } ], "doi": "10.1145/3035918.3064044", "doiUrl": "https://doi.org/10.1145/3035918.3064044", "entities": [ "Algorithm", "Benchmark (computing)", "FOCAL (programming language)", "Row (database)", "Synthetic data", "Tree rearrangement", "User (computing)", "User interface", "User profile", "Weight function" ], "id": "a3121498f5e8dc81a9209a03f13759fed67e8aa4", "inCitations": [ "1d08cbe6da0910787ccba32a264c5e1ef9903e21" ], "journalName": "", "journalPages": "805-820", "journalVolume": "", "outCitations": [ "3557d22cd663e4939a86f43d044b8cb55cd97e16", "5432324996087b0081acc2637eba3455bf021bc0", "74eaae49b235cee87bca9098f8a8c3fc3f8de1f4", "5dbe84872b42cb6167d3b601fdf68b4eb2d7f5d9", "10feecb5eebbb958439aabb2e10bd56739e315c9", "42f582f01479787622357ac90ea33d813c700f85", "5f814d8d2fa48cc9c29f8c94d6ae97236e6e259d", "04ff502fe2cd481a151230fb890d7ae9a093f561", "084916725a430a16e8469d257ebbb5ea8b0c5bbc", "806bdc15cf8a85738f3e4c193780b426b2c1637a", "d203fe567346510b3de7da57e974a5aff2b21b20", "1beeaf5818ba40ea64972b1b3b617f7467cf6eb6", "a11db1672f9bbf8c2c967b4d41b6264284882fff", "2f4b3836e40fe54412cf77a002b8ab9827d5d8c3", "4325e0fbf8e6e3a9574e1f070a649d8d1528abcb", "0d557c7a7d6b50c3ccdb6a9f48e4c552541f5930", "28e702e1a352854cf0748b9a6a9ad6679b1d4e83", "4b633bec0b93f5175e45aa17b966846eaae983f9", "ed56740bdab41806b41bcb9deaedf91f2f7c2f82", "279c6d90821d46703fe6c17daa9c64064cc2044a", "38a5150d236872d43610ded32ca0714fe0c68f7c", "3dcc1a9a3d08413e680cb764988c424720400867", "8826c500c659f33c76d7341105f0b7816769abda", "03d09018b004d899fba9d9794c67246ed766ab1d", "fc30bfb07da885dbe27c3acb50a600781c727c97", "3ceaf000e836f3601d78ebfe058c5106525c2e0b", "57db55f3555e9821b64ad8dd941fa87ad3f06e92", "6f8660e7fa35001dbf75b60199af24b9d3482d7d", "146e33f6ac7ee643af0a6a10f78a5273e6dfad86", "a401596b7c337afefe0ea228ef9cd4908429b43a", "0999cb9f10d1308564098246ecc48094e670045c", "1c5441a63edfe3a0553e31a51bcb87e049c6a192", "03d2efcaf662140acbafa4b0b743b17c755b2fda", "8ee9cda65b4b28e002e416045c1e84f5d22e5df5" ], "paperAbstract": "In rank-aware processing, user preferences are typically represented by a numeric weight per data attribute, collectively forming a weight vector. The score of an option (data record) is defined as the weighted sum of its individual attributes. The highest-scoring options across a set of alternatives (dataset) are shortlisted for the user as the recommended ones. In that setting, the user input is a vector (equivalently, a point) in a d-dimensional preference space, where d is the number of data attributes. In this paper we study the problem of determining in which regions of the preference space the weight vector should lie so that a given option (focal record) is among the top-k score-wise. In effect, these regions capture all possible user profiles for which the focal record is highly preferable, and are therefore essential in market impact analysis, potential customer identification, profile-based marketing, targeted advertising, etc. We refer to our problem as k-Shortlist Preference Region identification (kSPR), and exploit its computational geometric nature to develop a framework for its efficient (and exact) processing. Using real and synthetic benchmarks, we show that our most optimized algorithm outperforms by three orders of magnitude a competitor we constructed from previous work on a different problem.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064044" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a3121498f5e8dc81a9209a03f13759fed67e8aa4", "sources": [ "DBLP" ], "title": "Determining the Impact Regions of Competing Options in Preference Space", "venue": "SIGMOD Conference", "year": 2017 }, "a32f7cdd2ac43369b3d043baf235d9fee3b7c8b5": { "authors": [ { "ids": [ "40459348" ], "name": "Meng Luo" }, { "ids": [ "2448867" ], "name": "Oleksii Starov" }, { "ids": [ "1968758" ], "name": "Nima Honarmand" }, { "ids": [ "1679711" ], "name": "Nick Nikiforakis" } ], "doi": "10.1145/3133956.3133987", "doiUrl": "https://doi.org/10.1145/3133956.3133987", "entities": [ "Common Vulnerabilities and Exposures", "Desktop metaphor", "Internet security", "Mobile device", "Mobile security", "Out There", "User interface", "World Wide Web" ], "id": "a32f7cdd2ac43369b3d043baf235d9fee3b7c8b5", "inCitations": [ "7cfcce2a3731e6f359b67e2683b4e37572f17dbb" ], "journalName": "", "journalPages": "149-162", "journalVolume": "", "outCitations": [ "52d7ed9fab4024b0d95e057ff0372054b3deb3e2", "1668a1d1196f988b497d2e9465e365b7fabb804e", "880de1bc6989cb1971451de4ce47671fd7732683", "5a25c0d6e57d843705cb14380d0a5892aedc1aaa", "d8cc44f872bc18aad25415ea2209da1caf6560d2", "39702b34b0c42660db3a68aec07115e27ffdb2ac", "0708567ce2291383dd59ec0c6d47c30ea5b6199b", "9e83eba1bd0a6f08c3c93bf7cb726f487f7ce430", "02d6ca3691d60f4d006f5e26abe43ba0c613c8e8", "5aac597aa92bdd4cc91a5a2d7408e9bf3f4805e0", "699334161bf2d0ca394236a2a57755e1d8d92269", "321a935b029a82fdacbba34274c801574a84ac09", "94b244c518f431f84d2e00317709c98771a91eca", "0694c33e3ace71a19c9823ddf07c0f4e8743d118", "0045a78372c5a299b3135923ca8e5bfb2c9b6175", "a9977198d07f5fede50a81236d7806868f4c9a27", "e76f92a4fe497250684555c46e3826ceb7d88086", "1006c70eff5b3967030635c800c3ad9c0494b7c3", "6f751ec41bdc551a6744e10b0b2b915a5ac383fb", "22652399c7fb219a093344b9b47028b5d0069711", "591336480dbb7a0e71bff9c201c73544c4615381" ], "paperAbstract": "Much of recent research on mobile security has focused on malicious applications. Although mobile devices have powerful browsers that are commonly used by users and are vulnerable to at least as many attacks as their desktop counterparts, mobile web security has not received the attention that it deserves from the community. In particular, there is no longitudinal study that investigates the evolution of mobile browser vulnerabilities over the diverse set of browsers that are available out there. In this paper, we undertake the first such study, focusing on UI vulnerabilities among mobile browsers. We investigate and quantify vulnerabilities to 27 UI-related attacks---compiled from previous work and augmented with new variations of our own---across 128 browser families and 2,324 individual browser versions spanning a period of more than 5 years. In the process, we collect an extensive dataset of browser versions, old and new, from multiple sources. We also design and implement a browser-agnostic testing framework, called Hindsight, to automatically expose browsers to attacks and evaluate their vulnerabilities. We use Hindsight to conduct the tens of thousands of individual attacks that were needed for this study. We discover that 98.6% of the tested browsers are vulnerable to at least one of our attacks and that the average mobile web browser is becoming less secure with each passing year. Overall, our findings support the conclusion that mobile web security has been ignored by the community and must receive more attention.", "pdfUrls": [ "https://acmccs.github.io/papers/p149-luoA.pdf", "http://doi.acm.org/10.1145/3133956.3133987", "https://www.securitee.org/files/hindsight_ccs2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a32f7cdd2ac43369b3d043baf235d9fee3b7c8b5", "sources": [ "DBLP" ], "title": "Hindsight: Understanding the Evolution of UI Vulnerabilities in Mobile Browsers", "venue": "CCS", "year": 2017 }, "a342dd884378431dc6944a22fc283a6e771abea5": { "authors": [ { "ids": [ "8095209" ], "name": "Ryan Spring" }, { "ids": [ "4260659" ], "name": "Anshumali Shrivastava" } ], "doi": "10.1145/3097983.3098035", "doiUrl": "https://doi.org/10.1145/3097983.3098035", "entities": [ "Algorithm", "Algorithmic efficiency", "Artificial neural network", "Backpropagation", "Computation", "Deep learning", "Dropout (neural networks)", "Embedded system", "Gradient", "Low-power broadcasting", "Matrix multiplication", "Scalability", "Sparse matrix", "Speedup", "The Matrix" ], "id": "a342dd884378431dc6944a22fc283a6e771abea5", "inCitations": [ "6cae9ad284a73471a8ed9e483b1673a60d61d946", "58b8467e930d51211d11778b2fbe4375474f7a51", "6a202f300f1906477eb90962eee4d7dc0dea00ee", "75c603fe09308a36a0dc55e0c347b5d71e855b1b", "4a94e4d8f4d4776b3094794051f0f7e7e82e79bf", "5b96a2cc7f3a7920b67959d8ccd87e24e54c80db", "2b9bf9fc91ab3ede6eadde378740f1bfb97bc656", "535c4647c9304c9c7202bc430a6aaedb36d8e31a", "0375dbfedebc209b1f276f629c8958d37b48cac6", "08e4600c4cce18ca8069b067d8757229a34b8796", "059ae9f412826ea0b2be5d9379981fd1e97f13ee", "36807811c6607fb56612d5c6410e9ae4bafaa9d9", "416b26c91ec13b05bc1abd11aebc56321ab22a8c", "0ae6e70027e3f16ba1144731495b0d9d23413e73", "71ea4bdcd1210829fd66fcf62d3bb80ded0a8cde", "0e5a77429015050dde9983e5a8725150f7806adc" ], "journalName": "", "journalPages": "445-454", "journalVolume": "", "outCitations": [ "44f4cd28486c4730fbfb262f099cb5df30637211", "b94043a133e3d07ed0b1cfc036829e619ea0ba22", "0ecc96d6566880b4426e3d8a26638f423d98b8cf", "34f25a8704614163c4095b3ee2fc969b60de4698", "4388c3910a165c01a251ea7d50b18f37fac5a642", "534f6ea4ce0127e5da7f1cafb6334b59ad15b83f", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "31868290adf1c000c611dfc966b514d5a34e8d23", "8f3f63773aa801b2cdcfa2e5699e3abb9aed7443", "1b503c21d865d3c57e14bfa2f4fddfc29536625d", "02fdc2743f6c5ddddc39af8d3af1f04e301e17ef", "b9becc31d2f335dc2d12964c55bed7d1cf4dc7dc", "096e07ced8d32fc9a3617ff1f725efe45507ede8", "01fcae344d2edb715bcc63a40b6052c0331741bd", "0122e063ca5f0f9fb9d144d44d41421503252010", "1b16571a4bbdcb916b250d3640e683700de143b8", "594d2e123ecb8ec0bc781aec467007d65ab5464d", "2176fb10204cc018b07ff248e21aa6e3d32ff1c0", "3157ed1fbad482520ca87045b308446d8adbdedb", "122eddb0391a84eb40bca0370975229919e2e10b", "28135fd3e80dda50a673cd556f10b9b972005d27", "1c799eca7983c62f7815ac5f41787b3e552567b6", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "4064696e69b0268003879c0bcae6527d3b786b85", "ff860f9c61f0d783d48893f671e986a01e5c3026", "5c534f0c84c8d4cdf8618645043da421b7682753", "1d32f29a9880998264286633e66ea92915cc557a", "a5943976e3ecdb920903adb52616849ab1261375", "db9b52b7890e165554e513006f1a44a9a2ffe41c", "87c8eccb221db9f33fdf9c8a6a17498e18045b4a" ], "paperAbstract": "Current deep learning architectures are growing larger in order to learn from complex datasets. These architectures require giant matrix multiplication operations to train millions of parameters. Conversely, there is another growing trend to bring deep learning to low-power, embedded devices. The matrix operations, associated with the training and testing of deep networks, are very expensive from a computational and energy standpoint. We present a novel hashing-based technique to drastically reduce the amount of computation needed to train and test neural networks. Our approach combines two recent ideas, Adaptive Dropout and Randomized Hashing for Maximum Inner Product Search (MIPS), to select the nodes with the highest activations efficiently. Our new algorithm for deep learning reduces the overall computational cost of the forward and backward propagation steps by operating on significantly fewer nodes. As a consequence, our algorithm uses only 5% of the total multiplications, while keeping within 1% of the accuracy of the original model on average. A unique property of the proposed hashing-based back-propagation is that the updates are always sparse. Due to the sparse gradient updates, our algorithm is ideally suited for asynchronous, parallel training, leading to near-linear speedup, as the number of cores increases. We demonstrate the scalability and sustainability (energy efficiency) of our proposed algorithm via rigorous experimental evaluations on several datasets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098035", "https://arxiv.org/pdf/1602.08194v1.pdf", "https://arxiv.org/pdf/1602.08194.pdf", "https://arxiv.org/pdf/1602.08194v2.pdf", "http://arxiv.org/abs/1602.08194", "http://arxiv.org/pdf/1602.08194v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a342dd884378431dc6944a22fc283a6e771abea5", "sources": [ "DBLP" ], "title": "Scalable and Sustainable Deep Learning via Randomized Hashing", "venue": "KDD", "year": 2017 }, "a38c6933d5ea67a33a679a2d925cdf82045b5c50": { "authors": [ { "ids": [ "38561978" ], "name": "Tao Guo" }, { "ids": [ "2324195" ], "name": "Xin Cao" }, { "ids": [ "1737379" ], "name": "Gao Cong" }, { "ids": [ "1680607" ], "name": "Jiaheng Lu" }, { "ids": [ "2873542" ], "name": "Xuemin Lin" } ], "doi": "10.1145/3035918.3035920", "doiUrl": "https://doi.org/10.1145/3035918.3035920", "entities": [ "Algorithm", "Algorithmic efficiency", "Computation", "Distributed algorithm", "Distributed computing", "Experiment", "Graph partition", "Load balancing (computing)", "PageRank", "Personalization", "Scalability" ], "id": "a38c6933d5ea67a33a679a2d925cdf82045b5c50", "inCitations": [ "7528af5e921212f3e338951bdabf653977eabb72" ], "journalName": "", "journalPages": "479-494", "journalVolume": "", "outCitations": [ "462408be7dea21a28232a72dd0d1fed122872f26", "631e721376e844a016ffe18a8a9af3d75766f91c", "3e1e5a5edd5858d906b49363984a3e3659fb9478", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "3ad743e436bb8749b7f750e4b316550a9d124bac", "1dd8db60043f51c04eb7200915ebd253d2fabf64", "2e9755294bfcebbe2d6bbdc7937cf76f25d605fc", "02903a2d438b242d87904d18c20f191ec98f9b2b", "41a70062d260feb62e0ae64acf252a839c0bbd61", "04b55cb152457e741aeffcc06108963a019c8cc6", "408cd9103f2d7cdafce2f6b984035b2be0ed9b7d", "183c44d2b9ac64e8c795464f91ef98f1e3ba2ea3", "18141e44f720c49dde29ebe4ad16a481796fd422", "233084c0d1c818c842be6a9bb50f5dd2d1d1682f", "1606f7d2634e593d617ee67985fbdd9915bd7190", "2eed842c2cf908df5c5d9c1c1e0a160ecd881679", "95b251202e1f5ff7d7f41dda553a38e395ecf555", "0ecdf0d9e33ae993d5f789c6e1bb410ba2fca0b8", "87f931f4d8aad3b71b8261703bbcfa18c1293181", "eb82d3035849cd23578096462ba419b53198a556", "9fdfb79b25450f42434d0baf39362052e1192acd", "51ea20dc4f688af41f9840a854d15bac49db1be6", "010a3f35d8d245ad0dcc87fc5f598ed0cda31ebf", "39eb6f355b0dc5fbd7115b93d389cc8fa3c5b1c3", "2a622720d4021259a6f6d3c6298559d1b56e7e62", "468c3b2bf358d07cc625b075f91595d825299948", "47e6565146580f4550e496bd9bfc70053bf748d6", "09b4276e4c1bd3621de3830b8ee1ebfc4876136e", "47c4651a707a91db806a6034897e42e2530bb681", "423befa4222b5b54cf63f0879e99243b0e5139b0", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "6d30db4bdb14d5a23320970407e1fa5bb514b7c2", "009dbf3187862352aac542bf7d61e27bce6b27f5", "040d45e995ab920588607ebc6977ea19dc781923", "630b514e68c0de62fa3dca5a45e3131f1515c90c", "29efbdf3f95cee97405accafdebd3bd374f1f003", "543bb14ed41e66a4a964a81110417facd7c20744", "b3ca8fb21ef2e12b4aba555230559d632c1e3ea3", "32452c6be455b2ec18403866e4fc48ef842f2587", "607c8ae7b868015ea2deb61969d7e38988de8ca1", "3726c60552263e648c6856679e672de2e1c110e5", "3105c03f6ee3135ac6b649ed6313ae0e6c0eb8fc", "0b3292b79721cda54001902d54b3142d6ec3012f", "1156f60e40548096df49528b1342bb3e88b0f378", "4f9df283f1dac228fbbd3efd50765ecb317efe6c", "05881ca4901379f7e9221d6ee3c0c5921e8f24be", "138a0de40dcaacf258e5e02499cb960ae549aa8f" ], "paperAbstract": "As one of the most well known graph computation problems, Personalized PageRank is an effective approach for computing the similarity score between two nodes, and it has been widely used in various applications, such as link prediction and recommendation. Due to the high computational cost and space cost of computing the exact Personalized PageRank Vector (PPV), most existing studies compute PPV approximately. In this paper, we propose novel and efficient distributed algorithms that compute PPV exactly based on graph partitioning on a general coordinator-based share-nothing distributed computing platform. Our algorithms takes three aspects into account: the load balance, the communication cost, and the computation cost of each machine. The proposed algorithms only require one time of communication between each machine and the coordinator at query time. The communication cost is bounded, and the work load on each machine is balanced. Comprehensive experiments conducted on five real datasets demonstrate the efficiency and the scalability of our proposed methods.", "pdfUrls": [ "https://www.cs.helsinki.fi/u/jilu/documents/SIGMOD2017.pdf", "http://doi.acm.org/10.1145/3035918.3035920" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a38c6933d5ea67a33a679a2d925cdf82045b5c50", "sources": [ "DBLP" ], "title": "Distributed Algorithms on Exact Personalized PageRank", "venue": "SIGMOD Conference", "year": 2017 }, "a397059ac9f927f07c46436d1375f599744d7c38": { "authors": [ { "ids": [ "4353261" ], "name": "Hung Dang" }, { "ids": [ "1781271" ], "name": "Ee-Chien Chang" } ], "doi": "10.1109/CLOUD.2017.18", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.18", "entities": [ "Client-side", "Cloud storage", "Confidentiality", "Data deduplication", "Encryption", "Experiment", "Multitier architecture", "Privacy", "Server (computing)", "Server-side", "Server-side scripting" ], "id": "a397059ac9f927f07c46436d1375f599744d7c38", "inCitations": [ "95c2aa7e4e6d81ed754749a00376ebc0257908b1", "84973e62fdfc24aba7ec4a79610a7550d34860bc" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "66-73", "journalVolume": "", "outCitations": [ "22092491c0f3f4ab6e5f0568640e2f92bfe08769", "708a3c03556b5bc20b5bd8e58ef2f47f6a9fc7d2", "bf83639acb00f5169a834d0585c826c9bfe6f8ec", "4aa28d4cf57fecfaa23723748af547d25c44e794", "0832933a67612dc18e0f70cb0ed949fef1a830a5", "9837a70c231c0ef3d33c2c9f5b56afd40548acce", "489b78ce5b8184e37e296db3213b1b11b40f865e", "07a5809436a9ade7bee9fdc9a970c23263a580d0", "2d52f69dd4686a3e66f5a8a1650a24bcea43530e", "f474f64fa3302b1dcc66e7c2c0961fffac0109b0", "02cc5b5ad2d7ca9f83c9e566bcb5f9b608ab0619", "14ef57d70a10373ddd7970e2a5d9789e41d1e97d", "e67410d5ef6a064afd20d93650f39129d00f1a32", "08c2649dee7ba1ab46106425a854ca3af869c2f0", "20b63210954f7c5a70664f301dcd7196856ccfa7", "5febaf151b020809d2f2c780511854d1b5cdfb0e", "c6893bda51266222708b31c46fb7d620d52c2f80", "10ea68194568919015f1e6686ba5d70462b07616", "446961b27f6c14413ae6cc2f78ad7d7c53ede26c", "0b52c0dddb4b37abfd6fb3657c81342777ff62bc", "33223f6bddb07fc5035e675516f5578125f597ef", "796ff51394bb51a5054d23b77c6dec33952d8462", "fb7f27650f30aefa7786c874da4b7cc27a11b9df", "70fda5147aedd42c64143a464117b5ffde18a2e4", "17fac85921a6538161b30665f55991f7c7e0f940", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "9209c11ec4a63c5e6b4c967e49e6fc9ae3e169f2" ], "paperAbstract": "Cloud storage providers can reduce storage costs by detecting identical files and storing only one instance of them. While appealing to the storage providers, this deduplication set-up raises various privacy concerns among clients. Various techniques to retrofit content confidentiality in deduplication have been studied in the literature. Nevertheless, data encryption alone is insufficient to protect users' privacy, for the ownership and equality information of the outsourced data left unprotected may have serious privacy implications. In this paper, we investigate a three-tier architecture that saves bandwidth otherwise incurred by server-side deduplication solutions, yet does not admit the client-side deduplication's leakage on file existence. Leveraging trusted SGX-enabled processors, we construct the first privacy-preserving data deduplication protocol that protects not only the confidentiality, but also the ownership and equality information of the outsourced data, offering better privacy guarantees in comparison with existing works on secure data deduplication. Our experiments show that the proposed protocol incurs low performance overhead over conventional solutions that provide weaker level of privacy protection.", "pdfUrls": [ "http://www.comp.nus.edu.sg/~hungdang/papers/PDeDup.pdf", "https://doi.org/10.1109/CLOUD.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a397059ac9f927f07c46436d1375f599744d7c38", "sources": [ "DBLP" ], "title": "Privacy-Preserving Data Deduplication on Trusted Processors", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "a3c9c6a14c96a12ca2c71a2034251d6f01709af7": { "authors": [ { "ids": [ "1742868" ], "name": "Yang Hu" }, { "ids": [ "2147744" ], "name": "Junchao Wang" }, { "ids": [ "3133941" ], "name": "Huan Zhou" }, { "ids": [ "31869154" ], "name": "Paul Martin" }, { "ids": [ "2515461" ], "name": "Arie Taal" }, { "ids": [ "1765828" ], "name": "Cees T. A. M. de Laat" }, { "ids": [ "1897259" ], "name": "Zhiming Zhao" } ], "doi": "10.1007/978-3-319-64203-1_25", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_25", "entities": [ "Algorithm", "Cloud computing", "Component-based software engineering", "Earliest deadline first scheduling", "Elasticity (cloud computing)", "Failure rate", "Scheduling (computing)", "Software deployment" ], "id": "a3c9c6a14c96a12ca2c71a2034251d6f01709af7", "inCitations": [ "5097612ef9f53375ce36e41caefd4ac9e9b7cb00" ], "journalName": "", "journalPages": "345-357", "journalVolume": "", "outCitations": [ "0c885c52410e3005f5f18f024d5a03c8272cafb8", "35444cb32ca5883a584e050f7df64209e47ee555", "659184bbb3d5624dc6a6db6736af18e5a7d3ca79", "3b988049dd8f62f772281e90196bbd793700c86b", "39300a6bb64f813bd233343b840cb169d8d0527f", "3d174575d397b5d70ed6ffa15c3bf94dca526c09", "3bd537c64184312d3d391389ecec061a32699850", "162fd0ea5cf77e1b71754f29e0a509f649d4443b", "2bb9c543664d14c77b0b5ad4f8af3c6d1468b819", "602311b7c7c6a6c1e2c0120537c0013a059980f7", "25be4335206df53c769244cfce81ebc5914a46cf", "9760de335c316ee7b22a945eb03bcda2cb3f727a", "0143402248bb0b52858c661592212872be9131fe", "203442c8d1322daff9ebeba0337bf99cd771d8e1", "c839df17c5527664cf5338f1cedba625e8d24967", "668948133604d876ee6d2170aeab87fdf9b8e80a", "5a8b7b33956e083f9600eed14cd3e80a01dee212", "11040f24714857941c569df70b21c4c8655e074a" ], "paperAbstract": "Time critical applications are appealing to deploy in clouds due to the elasticity of cloud resources and their on-demand nature. However, support for deploying application components with strict deadlines on their deployment is lacking in current cloud providers. This is particularly important for adaptive applications that must automatically and seamlessly scale, migrate, or recover swiftly from failures. A common deployment procedure is to transmit application packages from the application provider to the cloud, and install the application there. Thus, users need to manually deploy their applications into clouds step by step with no guarantee regarding deadlines. In this work, we propose a Deadlineaware Deployment System (DDS) for time critical applications in clouds. DDS enables users to automatically deploy applications into clouds. We design bandwidth-aware EDF scheduling algorithms in DDS that minimize the number of deployments that miss their deadlines and maximize the utilization of network bandwidth. In the evaluation, we show that DDS leverages network bandwidth sufficiently, and significantly reduces the number of missed deadlines during deployment.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a3c9c6a14c96a12ca2c71a2034251d6f01709af7", "sources": [ "DBLP" ], "title": "Deadline-Aware Deployment for Time Critical Applications in Clouds", "venue": "Euro-Par", "year": 2017 }, "a3e1152774fd2f083d9ca0bb18bc76fec56dfa8b": { "authors": [ { "ids": [ "20941204" ], "name": "Byungseok Kim" }, { "ids": [ "2162549" ], "name": "Jaeho Kim" }, { "ids": [ "1719212" ], "name": "Sam H. Noh" } ], "doi": "", "doiUrl": "", "entities": [ "3D XPoint", "Bandwidth (signal processing)", "Central processing unit", "Computer performance", "DBpedia", "Disk array", "Gigabit", "Solid-state drive" ], "id": "a3e1152774fd2f083d9ca0bb18bc76fec56dfa8b", "inCitations": [ "ca2d15589cc4dba414932b3955f0335e3e99ebb3" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "139e967aa7e44408783baecc83bee893976f7ba3", "4e3bae36e660f82f98aad204f753baeca91208c8", "2bcdf51bdd0d9eb77591263a48859d484a22def0", "850943a92b899aafa9052869a75a61960bd6529f", "0b5de3cf4ac3069dc9a7ee5b4d745e908c218536", "5271d6693ba950c389921ccc21110664f25a83db", "009d8914ca7ca1ec459f6c35a772f85c602eb052", "26a88fcaf621270af5f5786fdb2df376a2bc00aa", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "2e46f9074bd81ea4ec29ecec7e0231c16fb2e8db", "3aff5fb3d1e23dfc0c45989f71b4aa99b3a5784b", "05961fc1d02ca30653dd0b4c906113db796df941", "3b2af12a43d06338dd62681328c75a1999fc87fd", "1f0c405f9fa2cc9de23a45710fa85b9e7330a958", "335dcf9242c48b83a9985fc13de0fc567a59d78f", "48fd1ad4d7b235ff8a5604f579ed169a2444f4c0", "def29d202e537d026b8d3ed91655b540ef86cceb", "1eb9dc6955b0de81a078c9d6fa937c33f1f04545", "61977858b3eea4f5a6d81393301e7298ade7a2d8", "1ea92529e75fe90ee1923b95d0fa8ad37ac1ed7c", "d58cc242fd70227cff98376a914e0b42b1b79db8", "0e5c646909bb762da0cd325e084655c12445578f", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "3cf9039fa2fc01f711870e33d868669caf5c4df4" ], "paperAbstract": "With the advent of high performing NVMe SSDs, the bottleneck of system performance is shifting away from the traditional storage device. In particular, the I/O stack software layers have already been recognized as a heavy burden on the overall I/O. Efforts to alleviate this burden have been considered [1, 2, 3, 4]. Recently, the spotlight has been on the CPU. With computing capacity as well as the means to get the data to the processor now being limited, recent studies have suggested that processing power be pushed into where the data is residing [5, 6, 7, 8]. With devices such as 3D XPoint [9, 10, 11] in the horizon, this phenomenon is expected to be aggravated. In this paper, we focus on another component related to such changes. In particular, it has been observed that the bandwidth of the network that connects clients to storage servers is now being surpassed by storage bandwidth [12, 13]. Figure 1 shows the changes that are happening. We observe that the changes in the storage interface is allowing storage bandwidth to surpass that of the network. As shown in Table 1, recent developments in SSDs have resulted in individual SSDs providing read and write bandwidth in the 5GB/s and 3GB/s range, respectively, which surpasses or is close to that of 10/25/40GbE (Gigabit Ethernet) that comprise the majority of networks being supported today. Based on this observation, in this paper, we revisit the organization of disk arrays. Specifically, we target write performance in all-flash arrays, which we interchangeably refer to as SSD arrays, that are emerging as a solution for high-end storage [14, 15, 16, 17, 18, 19, 20]. As shown in Table 2, most major storage vendors carry such a solution and these products employ plenty of SSDs to achieve large capacity and high performance [16, 17, 18, 19]. Figure 2 shows how typical all-flash arrays would be connected to the network and the host. Our goal is to provide high, sustained, and consistent write performance in such a storage environment. Figure 1: Network and storage bandwidth growth trend", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/kim", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-kim.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_kim_b.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a3e1/152774fd2f083d9ca0bb18bc76fec56dfa8b.pdf", "s2Url": "https://semanticscholar.org/paper/a3e1152774fd2f083d9ca0bb18bc76fec56dfa8b", "sources": [ "DBLP" ], "title": "Managing Array of SSDs When the Storage Device Is No Longer the Performance Bottleneck", "venue": "HotStorage", "year": 2017 }, "a429c9e82fa8f18cd128e2193f8f772ab5151b3d": { "authors": [ { "ids": [ "38797620" ], "name": "Shoaib Jameel" }, { "ids": [ "1871846" ], "name": "Zied Bouraoui" }, { "ids": [ "2265382" ], "name": "Steven Schockaert" } ], "doi": "10.1145/3077136.3080803", "doiUrl": "https://doi.org/10.1145/3077136.3080803", "entities": [ "Bag-of-words model", "Bag-of-words model in computer vision", "Entity", "Experiment" ], "id": "a429c9e82fa8f18cd128e2193f8f772ab5151b3d", "inCitations": [ "5bbe05cbe5ec5411dceedc170af973fd5d62f8f8" ], "journalName": "", "journalPages": "783-792", "journalVolume": "", "outCitations": [ "328b00f1baaf08dedba3a788b4ce0a4b26003f18", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "27211ed68a7a00f1df0121fa1890a1b2acdd1a88", "318b558717ff9a4a996e45368b26a1233f03d1d7", "2a3f862199883ceff5e3c74126f0c80770653e05", "34d0d27e63ae5ac8fc8d00809973d5baf13ce927", "3a0e788268fafb23ab20da0e98bb578b06830f7d", "36edb135176366f011dbbbaf1543bf2cb0f5300a", "45cc0801e1949728bc98f9e95129d1a48c36afbc", "7878cf925311291f53e45a2e0bdffe478b236682", "021aac664fc1a2f5d3c4d833dec96289d77e2b5f", "3734d31548756bb315e5b613cd6e9245c00b1430", "1b9483efbac9894715344bf1ece48d65519223bb", "16524adee515692a50dd67a170b8e605e4b00b29", "4cf090824b89b4fddebc659f09ddca4aafafcdd7", "f3999b9749d5f0b71f638bcc998dc14f2e0a6cbe", "607a834558b16c318be9c735bea048ae6638841d", "448345e5386edac7942b26227610053be7563f23", "5cee4bb66f208f8fc20f7ed8cfc9aed0573a022a", "994afdf0db0cb0456f4f76468380822c2f532726", "1081a81d93c74d83c3fa0f7a78d3f987c117739f", "38a417e821f4d8001329f20ee28b7cc5ef9e5af7", "44d442bf3acf0fb1ad87ffbe7dd1683e08c5952b", "9092bf4d0716ebd8c3ec931814aba2b84da9029d", "9b087350cfa1d07bc97329ddbf73b8cd0dc616ab", "5936c2e590b18cd57e83b2f91363da0bfea1538c", "543a68fcc4d48726abdef66a2befcd23f154eb3a", "23cb9cb90a31ca7c899beeae5dac22be0ffdaaf9", "588ccc8d75b050b692fd5d47a0b4ae7082d473ce", "2e42bf8747363161851dc04b85aedb1ada50daaf", "36a181c75ecf77be863cea34adb21398fadd7b5f", "04b52c8230c3f9f4f4032b06458069d81c8f07b2", "4752c1ee6ef02a122af7ecb60aa2f044f840e004", "68a33a3afac65eb6e0fb3726c1f9c8b727f32a42", "0826c98d1b1513aa2f45e6654bb5075a58b64649", "3d73d4b69fcf11da0e461507b48d02425aeffada", "4c967da691861cd41cb9b24d8f085257bdca8e69", "bcae9da4151d4accf3d90ef8bab883fbec35cb85", "1145859ba17172d517cdffe2a5f00a16366c5765", "b78f9987638719b714a19202a33114bb46335a4d", "48a12a320ec4751096e9bb298bf16b4dfb29a8bf" ], "paperAbstract": "We propose a new class of methods for learning vector space embeddings of entities. While most existing methods focus on modelling similarity, our primary aim is to learn embeddings that are interpretable, in the sense that query terms have a direct geometric representation in the vector space. Intuitively, we want all entities that have some property (i.e. for which a given term is relevant) to be located in some well-defined region of the space. This is achieved by imposing max-margin constraints that are derived from a bag-of-words representation of the entities. The resulting vector spaces provide us with a natural vehicle for identifying entities that have a given property (or ranking them according to how much they have the property), and conversely, to describe what a given set of entities have in common. As we show in our experiments, our models lead to a substantially better performance in a range of entity-oriented search tasks, such as list completion and entity ranking.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080803", "http://orca.cf.ac.uk/100910/1/MEmbER___SIGIR_2017-10.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a429c9e82fa8f18cd128e2193f8f772ab5151b3d", "sources": [ "DBLP" ], "title": "MEmbER: Max-Margin Based Embeddings for Entity Retrieval", "venue": "SIGIR", "year": 2017 }, "a46dc8a6feb06d691e1dceaa70a432d31405af7b": { "authors": [ { "ids": [ "1991587" ], "name": "Xueqi Li" }, { "ids": [ "1930057" ], "name": "Guangming Tan" }, { "ids": [ "1782022" ], "name": "Chunming Zhang" }, { "ids": [ "1732849" ], "name": "Xu Li" }, { "ids": [ "3503115" ], "name": "Zhonghai Zhang" }, { "ids": [ "40463872" ], "name": "Ninghui Sun" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.34", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.34", "entities": [ "Big data", "Computation", "Concurrency (computer science)", "Garbage collection (computer science)", "In-memory database", "In-memory processing", "Memory hierarchy", "Microarchitecture", "Parallel computing", "Programming model", "Run time (program lifecycle phase)", "Scalability", "Speedup", "Test case", "World Geodetic System" ], "id": "a46dc8a6feb06d691e1dceaa70a432d31405af7b", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "262-269", "journalVolume": "", "outCitations": [ "2228b4208c5ea6754df6edcae805038f3e47857c", "3adb3c7292ca9493eb947931624765df61aa72e5", "17810349765c08963af130efe28b6a6b77b7ec51", "f45b777d8591a1e7d3e24f0a9746328d6c5c41fa", "f5a88d8561bf6a64b43aa7e88beff8220e792bee", "0558c94a094158ecd64f0d5014d3d9668054fb97", "196514ca53f505dec7a8a2b446fc599e8de3f0cc", "0b72a5e4bec54e9f0a4d77db5b484d27886b49fe", "0ee0212af4454c7dfdbed09ccb63edecf29f6ea5", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "279beb332fa6e158f32742b7dfafe83f12a97110", "5235c7b89e2ca9ebada925bb486ef03bca1c9596", "6d948fbf56977e8aa8927cd3d101b32382977837", "04ecc752b775f934ca04a09e9bbc67bbb5f31c27" ], "paperAbstract": "In this paper, we performed a comprehensive study of quantifying and mitigating computational inefficiency of current genomic analysis approaches. First, we found current parallelization approaches that have limited scalability due to either unexploited parallelism or low utilization of system resource. Thus, we proposed Spark-Gene, which is on the basis of Spark in-memory programming model. To test the performance of our Spark-Gene, we used WGS in the GATK as the test case. We show that Spark-Gene reduces the execution time of WGS analysis from 19 hours to 30 minutes with a speedup in excess of 37-fold at 256 CPU cores. Furthermore, we identified that garbage collection is the major scalable bottleneck of better parallel efficiency for native in-memory computing model. Second, we quantified microarchitectural inefficiency for typical genomic applications and uncovered opportunities for microarchitectural optimizations for the design of genomic domain-specific accelerator, especially on specializing concurrency, computation and memory hierarchy. This paper is to leverage state-of-art big-data technologies to improve parallelization for genomics analysis and motivate the integration of accelerators into the genomic analysis computing system.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a46dc8a6feb06d691e1dceaa70a432d31405af7b", "sources": [ "DBLP" ], "title": "Quantifying and Mitigating Computational Inefficiency of Genomics Data Analysis", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "a474ad232ebea0f30cbad37052fe34e9daef39a8": { "authors": [ { "ids": [ "1703636" ], "name": "Alexander Russell" }, { "ids": [ "10808021" ], "name": "Qiang Tang" }, { "ids": [ "1699719" ], "name": "Moti Yung" }, { "ids": [ "3578131" ], "name": "Hong-Sheng Zhou" } ], "doi": "10.1145/3133956.3133993", "doiUrl": "https://doi.org/10.1145/3133956.3133993", "entities": [ "Adversary (cryptography)", "Adversary model", "Algorithm", "Black box", "Ciphertext indistinguishability", "Cryptography", "Cryptosystem", "Encryption", "Kleptography", "Modular programming", "Semantic security", "Software engineering", "Subversion" ], "id": "a474ad232ebea0f30cbad37052fe34e9daef39a8", "inCitations": [ "f0c993c7aa8b197579cb75d20d822a14d4e3187f" ], "journalName": "", "journalPages": "907-922", "journalVolume": "", "outCitations": [ "7c8006c29c7659a70c6d175f66ae72d016fc57aa", "44db5fa4c9409fafaea23778852d525909470961", "a658f8bd1abaf8f06183266d13de25f0559c9592", "0bd0c57495f89e3b57115876794532ecf803681b", "a931feced4e9b76db14d48e0aeb7555b6dc90d98", "42b58b197534300ad27f3ef7fed17f7d11e8b2ce", "0f084f2e975c3f54bbf5740b6d5d261066bfab16", "f9c777b5a9e98df91b7fef648676b101ca9ee742", "7937e5708e05d2203f2d19928757c8afe2463049", "a6b020f4511566b81caf164c319071093215c138", "18dcb2eefd5da64253d413aa4095b86cff22cea5", "0bbe80fc59381c08c4c72b59c24d86cbcb4b4d81", "611f8a4217467e45fae660a61ee7d7951570d0a3", "032653f0212d0995487885808265b317680bb19f", "70b117f9e9150698cf8f5dc2d175801f12a27908", "5513593daa8b8a52c5808590f0975e4c80c5c71a", "769ef515f691dab237375dc34c313e5892eff564", "93bf73e1aa95894410ea7cf07dbb0ef2fa291852", "40cac3e514dbb61c64d56685a519ef93f3e10f46", "e55348cd947f0b8ce7e380b0561b2b518486d65d" ], "paperAbstract": "Notable recent security incidents have generated intense interest in adversaries which attempt to subvert---perhaps covertly---crypto\\-graphic algorithms. In this paper we develop (IND-CPA) Semantically Secure encryption in this challenging setting. This fundamental encryption primitive has been previously studied in the \"kleptographic setting,\" though existing results must relax the model by introducing trusted components or otherwise constraining the subversion power of the adversary: designing a Public Key System that is kletographically semantically secure (with minimal trust) has remained elusive to date. In this work, we finally achieve such systems, even when all relevant cryptographic algorithms are subject to adversarial (kleptographic) subversion. To this end we exploit novel inter-component randomized cryptographic checking techniques (with an offline checking component), combined with common and simple software engineering modular programming techniques (applied to the system's black box specification level). Moreover, our methodology yields a strong generic technique for the preservation of any semantically secure cryptosystem when incorporated into the strong kleptographic adversary setting.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133993" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a474ad232ebea0f30cbad37052fe34e9daef39a8", "sources": [ "DBLP" ], "title": "Generic Semantic Security against a Kleptographic Adversary", "venue": "CCS", "year": 2017 }, "a4e37fcbb75a5453b9e26229dfd1185a1f7346dd": { "authors": [ { "ids": [ "1688607" ], "name": "Fei Xia" }, { "ids": [ "2302653" ], "name": "Dejun Jiang" }, { "ids": [ "37089621" ], "name": "Jin Xiong" }, { "ids": [ "40463872" ], "name": "Ninghui Sun" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "B+ tree", "B-tree", "Concurrency (computer science)", "Dynamic random-access memory", "Hash table", "Key-value database", "Non-volatile memory", "Thread (computing)", "Throughput", "Volatile memory", "YCSB" ], "id": "a4e37fcbb75a5453b9e26229dfd1185a1f7346dd", "inCitations": [ "db57257e6b051e0f97d35209cc5aee0909cde1f1", "433143d5a065cbc4a127362aec99002a1421e322" ], "journalName": "", "journalPages": "349-362", "journalVolume": "", "outCitations": [ "1cac40347773f012d908ac5ef578da940dd840ee", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "4d3409792c7b9639c483d24b9add187a6e5fa9c9", "05a1357946de5eca42a477b7b268db4944219a2e", "7efeb43699d31e8ae365b1e4f7e56c066083a159", "18642fd39dd265cbc149b937f2f5ca2e925e3484", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "33ca2a8ce7dc9211534c9d6856636c7f8cf054eb", "24724ad8962a9e04eb496fddaefe9708f6960601", "99723365fc9fe6960201bf9d246a90ccbb6396fa", "235b8ae9b189cdbeb6032df259fc7e1777ac57d9", "2ecd95f47bb682de959bc80c5b1fc2315a98b1bc", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "396514fb219879a4a18762cddfae2a6a607f439f", "098d792d1783b5f6fc098203f71f21f5d053c653", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "13be0864308b3b88cbebd296c5c338b5a72b4554", "199ac28b6bc68bf05c77645ffae7640df114bca5", "7e4921a43378b2b7b9cf950604fe434e4b07da58", "2da760f90c3d2bf6598becdde9063093f488548c", "81778c0996c46c77a66597e782ec0eb558f054f2", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "52f43d03f290f113615117cea3eb17d23476038a", "18a5f443299784479e78d9e77f175af57cb2fa2b", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "9183cde02e4306828089fb8adae74736a9df3ceb", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "2c1a0af30cc12ec108ce8bffde856ced8b759022", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "620e264481f778cc32ddd11ee311de61fca0e3b6", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "1b0eace707f6b86e94793d1a7c83b7d065e604fa", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "0ad5ed5a0a7b9210993753d7594c7285d5e9b179", "03416be8097852a54dd3e309434e5a0806824646", "54a882bc5f15877097dfb1aab8c480323036e48c", "0645f60331e8dd88a1d0183e2bfb3b9da21c07f6", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "9aa0d7253574e50fe3a190ccd924433f048997dd", "2d45779437516ee55e5f9f4e7a7d8803fa795443", "13b26d008210fffeb8a77c9e90f1ff837523c536", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "efbf7c8762f2e3e300146cc837c457e0bd912a07", "8301c813277cc59b47a84d25dc1e307eee8ce310", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "ec62b73a97016f09d5b9859d31ed991ae84e55ad", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "04b3aaf58a91557e15c8064660baa1cc5e8db14e", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "4bbb4e2bed21980cfe9ca7a6e243737705b0fd20", "7ae71ff8a5f950c12305f27d8584a9c9203717a9", "ca61d435baaaa92dc06333bb7a54676e482283fb" ], "paperAbstract": "Hybrid memory systems consisting of DRAM and Non-Volatile Memory are promising to persist data fast. The index design of existing key-value stores for hybrid memory fails to utilize its specific performance characteristics: fast writes in DRAM, slow writes in NVM, and similar reads in DRAM and NVM. This paper presents HiKV, a persistent key-value store with the central idea of constructing a hybrid index in hybrid memory. To support rich key-value operations efficiently, HiKV exploits the distinct merits of hash index and B-Tree index. HiKV builds and persists the hash index in NVM to retain its inherent ability of fast index searching. HiKV builds the B-Tree index in DRAM to support range scan and avoids long NVM writes for maintaining consistency of the two indexes. Furthermore, HiKV applies differential concurrency schemes to hybrid index and adopts ordered-write consistency to ensure crash consistency. For single-threaded performance, HiKV outperforms the state-of-the-art NVM-based key-value stores by reducing latency up to 86.6%, and for multi-threaded performance, HiKV increases the throughput by up to 6.4x under YCSB workloads.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-xia.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/xia", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_xia.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a4e3/7fcbb75a5453b9e26229dfd1185a1f7346dd.pdf", "s2Url": "https://semanticscholar.org/paper/a4e37fcbb75a5453b9e26229dfd1185a1f7346dd", "sources": [ "DBLP" ], "title": "HiKV: A Hybrid Index Key-Value Store for DRAM-NVM Memory Systems", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "a4ef2f986173fcad3d052b71574356a8264fe629": { "authors": [ { "ids": [ "1881544" ], "name": "Sudip K. Seal" }, { "ids": [ "3462045" ], "name": "Mark R. Cianciosa" }, { "ids": [ "35039469" ], "name": "Steven P. Hirshman" }, { "ids": [ "31845812" ], "name": "Andreas Wingen" }, { "ids": [ "3008629" ], "name": "Robert S. Wilcox" }, { "ids": [ "3459357" ], "name": "Ezekial A. Unterberg" } ], "doi": "10.1109/ICPP.2017.37", "doiUrl": "https://doi.org/10.1109/ICPP.2017.37", "entities": [ "Cray XC30", "Data parallelism", "Parallel computing", "Plasma display", "Scalability", "Speedup", "Supercomputer" ], "id": "a4ef2f986173fcad3d052b71574356a8264fe629", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "282-291", "journalVolume": "", "outCitations": [ "10777b463156ec55b4068fe9ab63aa69b54c09af", "919e817eb2a71b29b6ac23caea6a3f5eff718b24", "860b76188fd65e8031e2e50f20f3c7dcc8acf8b0" ], "paperAbstract": "Fast, accurate three dimensional reconstructions of plasma equilibria, crucial for physics interpretation of fusion data generated within confinement devices like stellarators/ tokamaks, are computationally very expensive and routinely require days, even weeks, to complete using serial approaches. Here, we present a parallel implementation of the three dimensional plasma reconstruction code, V3FIT. A formal analysis to identify the performance bottlenecks and scalability limits of this new parallel implementation, which combines both task and data parallelism, is presented. The theoretical findings are supported by empirical performance results on several thousands of processor cores of a Cray XC30 supercomputer. Parallel V3FIT is shown to deliver over 40X speedup, enabling fusion scientists to carry out three dimensional plasma equilibrium reconstructions at unprecedented scales in only a few hours (instead of in days/weeks) for the first time.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a4ef2f986173fcad3d052b71574356a8264fe629", "sources": [ "DBLP" ], "title": "Parallel Reconstruction of Three Dimensional Magnetohydrodynamic Equilibria in Plasma Confinement Devices", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "a517b78d2ce866d4f46cd37c5070c6cd57771b57": { "authors": [ { "ids": [ "26396121" ], "name": "Unnikrishnan Cheramangalath" }, { "ids": [ "1696918" ], "name": "Rupesh Nasre" }, { "ids": [ "1692577" ], "name": "Y. N. Srikant" } ], "doi": "10.1109/CLUSTER.2017.72", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.72", "entities": [ "Algorithm", "Compiler", "Computer cluster", "Domain-specific language", "Falcon", "Falcon", "Graph (abstract data type)", "Graphics processing unit", "Information system", "Parallel algorithm", "Speedup" ], "id": "a517b78d2ce866d4f46cd37c5070c6cd57771b57", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "439-450", "journalVolume": "", "outCitations": [ "15ad785d44ff34ad028426c31a1e8d43b2b44ab6", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "21f35a5ecc0faf0c5f760e20cb9ce9e63a30a768", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "09a180d9d410d8e551f42401d6453d57406b6d29", "632997c4b10ce669ecac5b913c66bd2ee155b35f", "d7f449c199ce86d3b8039899caabb31b54ced7f2", "3486aeaf540c48952120fe853d672af984f40a6a", "3c2cc49ee044d3b5815e9d5ad9c6010e94484d92", "d65c96df1d220fc9d8284a7c61d283a842517e81", "45a916500ce98c8d018c13de4c1d5c53130e8a72", "0706356c9ab6014d6b04577d38289ea8328291a5", "4e88d42dc5f5efe565d87af3b999c43165e42dce", "0d8fadb88666b1137e9e767b5c82d2a98f807f2d", "c79ae0af0d1cc663e50d3f443639569c02afba1b", "0f014693b25d9846025219b88f8ca480fac68b0a", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "0ad8e89091eed09217e66adc98136126addc2619", "62329d96c5e7681bf11fe13510d4ee489008170f", "914f287d6e83ac8e525d4c0e643cee6a1dce6fb4", "141e35263ab810983c90d47ad62eb4fab5e51717", "0f34ea8535dc5833a1a3692ffc7abc6740d2406a", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "0be9827857bfd79a00a9b1e64d59e8c34534362c", "c85c784038d6f4f4845842bdc41877f8581ac796", "3726c60552263e648c6856679e672de2e1c110e5", "2d8be5e1b88ac9919984b9369f7045fbb0af0d08", "2ef3e0589031df3369445d62a3d1df83530b903a", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "1156f60e40548096df49528b1342bb3e88b0f378", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "17cdbe2b0205a6b74168a07489e05e498232b2e1" ], "paperAbstract": "Graph models of social information systems typically contain trillions of edges. Such big graphs cannot beprocessed on a single machine. The graph object must bepartitioned and distributed among machines and processedin parallel on a computer cluster. Programming such systemsis very challenging. In this work, we present DH-Falcon, a graph DSL (domain-specific language) which can be usedto implement parallel algorithms for large-scale graphs, tar-geting Distributed Heterogeneous (CPU and GPU) clusters. DH-Falcon compiler is built on top of the Falcon compiler, which targets single node devices with CPU and multipleGPUs. An important facility provided by DH-Falcon is that itsupports mutation of graph objects, which allows programmerto write dynamic graph algorithms. Experimental evaluationshows that DH-Falcon matches or outperforms state-of-the-art frameworks and gains a speedup of up to 13×.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.72" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a517b78d2ce866d4f46cd37c5070c6cd57771b57", "sources": [ "DBLP" ], "title": "DH-Falcon: A Language for Large-Scale Graph Processing on Distributed Heterogeneous Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "a518074cccb6a4f45622714827aef6da40f7e987": { "authors": [ { "ids": [ "3160580" ], "name": "Sanjib Das" }, { "ids": [ "2304912" ], "name": "Paul Suganthan G. C." }, { "ids": [ "3030274" ], "name": "AnHai Doan" }, { "ids": [ "5151034" ], "name": "Jeffrey F. Naughton" }, { "ids": [ "27032261" ], "name": "Ganesh Krishnan" }, { "ids": [ "2349874" ], "name": "Rohit Deep" }, { "ids": [ "2506563" ], "name": "Esteban Arcaute" }, { "ids": [ "39709126" ], "name": "Vijay Raghavendra" }, { "ids": [ "33733502" ], "name": "Youngchoon Park" } ], "doi": "10.1145/3035918.3035960", "doiUrl": "https://doi.org/10.1145/3035918.3035960", "entities": [ "Apache Hadoop", "Cloud computing", "Crowdsourcing", "Entity\u2013relationship model", "Experiment", "Falcon", "Plan", "Program optimization", "Scalability" ], "id": "a518074cccb6a4f45622714827aef6da40f7e987", "inCitations": [ "72a06022fcd4b5d97c1832808f124b655916a74e", "ee17a651f243b1bd8a05c10d79d593f61bfaa154", "0bcb4ab04e1ebabbf67a38bd7b20b8c660018add", "02befacf3720b4db0dda8616d6b0f143080362cf", "d011a9ff9d3262d9b15d595fa359298b35063b43", "0dad2fc42f6733f76819f7e83678b587db2feaae" ], "journalName": "", "journalPages": "1431-1446", "journalVolume": "", "outCitations": [ "29eb9e82b7a3aa02b9af5f0f1b6d7c6a4c6e2917", "7c94374bb22de9d1a1f6bff984bfca5bd293e503", "59ea6b535d9e462ef01fe34b4576252943257870", "6274f60dd41b0078a978d76a7ff7ad8db0f4d829", "04a390ef582a42ad83641a341ccb45c7d36add5b", "abb8d9e2826d48846596614d2522236a7cb19e5d", "007071191023201553995d3c57c889b5ff8690a7", "151673abe01271dc3fc37725c02e95e7970f3bed", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "4359e7b1bba1e7a37a591111cd5c719fdb4857c8", "20d90871bc0dc7956bf2557d91d8d96deb0a4520", "50c9a75513a4da3446642d6e6a397081c97baac2", "00af4d7a9de6f01b9b4e468bd8d63c4d5da6bebd", "d125e153b571992a6c9c783edc3cb83e64052f44", "7a84a82a45c0e10e485c4fcf3afa9158caf20b1c", "5144ce3943d6dce07db0fa7469b54e1f3aea80ab", "0ee10b695a90d7699b0518436d9bac53410c2d0b", "599f9b53971128732db638c9c19f1a8239bfe6aa", "3ad85267dcb8f81b4381295cdf9293f57f9c2c23", "200a3e75626bc3a1f1a99cbc9799cd784cd1bb12", "632eca15ed20f87490c60a6005c4c58f06bee61b", "08f51a9138458f667f0c00d40b6a820c451c7d36", "18d4a77f0d5e20f90d61ad0a8209e1d6aca4f569", "4614382c4d8ca41c6d145c61dba29c2cc39301bd", "ad5255bfe9c6f2b55814e8a1e2a2a0f9813d4e5a", "24a9b9128495e972d29619596b8e66428af19ab4", "0894c223858b5b40ed6ee256d5378030a82bded2", "76db421d3ff172ca679f2c680d3b4f161718c194", "8a0b267493ac9510e47ceb4bcebb6d202b2f89a5", "1d593074bd02f2792f36adcf579e23a7626d0603", "70c02b56d4e4120f1090cff1f88902518f3cd19b", "c921482cb0048738284aa66e531b83f64ce46bf5", "2c46c17fedb951e6321c8c31b09d01861c8142a9", "000953906a54de7c81b56b25f08c24bd83567d57", "422d9b1a05bc33fcca4b9aa9381f46804c6132fd", "289fa221f73c1740df420769d9ad6ac05e7008c9", "7ac5425b837a5e889847afdaf04c0241b9b0b4ba", "676e50a4d2141ae66a0d2aafcf79c8c989fcce33", "49ceba7f32b3d440f20b7b35d4c7462016666ef9", "6a35c6aead0796373bbdcc6c8c3371d8c25146b0", "152ae230ea49aba046aaa1dcefd7f7e4be0185b5", "194fb3dc0b8772bcf8aede8f4a64f68cf61a952a", "88cd4becf3587a8378e450a99ded801fbdb264e1", "4033104e3a37324df023fec7e95d852e962617de", "40bc01f4d62958d0138bb367521a8e867a16b4e6", "882ea179c692c9ed7bbeee115302631dc014b199", "c33592b7ae635cb797b242e1b5c7aa7232c6f645", "64591361f2f39a1c0418141c4795e43573637739" ], "paperAbstract": "Many works have applied crowdsourcing to entity matching (EM). While promising, these approaches are limited in that they often require a developer to be in the loop. As such, it is difficult for an organization to deploy multiple crowdsourced EM solutions, because there are simply not enough developers. To address this problem, a recent work has proposed Corleone, a solution that crowdsources the entire EM workflow, requiring no developers. While promising, Corleone is severely limited in that it does not scale to large tables. We propose Falcon, a solution that scales up the hands-off crowdsourced EM approach of Corleone, using RDBMS-style query execution and optimization over a Hadoop cluster. Specifically, we define a set of operators and develop efficient implementations. We translate a hands-off crowdsourced EM workflow into a plan consisting of these operators, optimize, then execute the plan. These plans involve both machine and crowd activities, giving rise to novel optimization techniques such as using crowd time to mask machine time. Extensive experiments show that Falcon can scale up to tables of millions of tuples, thus providing a practical solution for hands-off crowdsourced EM, to build cloud-based EM services.", "pdfUrls": [ "http://pages.cs.wisc.edu/~anhai/papers/falcon-tr.pdf", "http://pages.cs.wisc.edu/~anhai/papers/falcon-sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3035960" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a518074cccb6a4f45622714827aef6da40f7e987", "sources": [ "DBLP" ], "title": "Falcon: Scaling Up Hands-Off Crowdsourced Entity Matching to Build Cloud Services", "venue": "SIGMOD Conference", "year": 2017 }, "a557cc474f8238c78195463e72850e573e872410": { "authors": [ { "ids": [ "2170071" ], "name": "Muhammad Shafiq" }, { "ids": [ "7933066" ], "name": "Xiangzhan Yu" }, { "ids": [ "16488638" ], "name": "Dawei Wang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.31", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.31", "entities": [ "Duplex (telecommunications)", "Eval", "Feature selection", "Instant messaging", "Machine learning", "Mutual information", "Network traffic control", "Traffic classification" ], "id": "a557cc474f8238c78195463e72850e573e872410", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "239-245", "journalVolume": "", "outCitations": [ "049db5d2f0af69de36f5bad47b3762dc2a787edc", "9892ab8cd696ca0d37965dc4e5e4ae5f7cd005e5", "0e935de5c678c7f01dce3f206fedc97a88be4e80", "2d3bf208d1d5a0a2e324cc74b965604de9fae46b", "00c57bb8c7d2ce7c8f32aef062ef3d61b9961711", "447c91fafe70f82f259b01440b620140b6dd019f", "3ea19dcbe7c8fcde728f546d96543ae9e2aa8d07", "740410296b173061c8fee358e31a1dba53a59c46", "8f58aa24adced52c40c3c27cf680ea6338de590f", "10bc4f5d7cb220455999625384f7c530d39b1f28", "0c91d5305ad34814b631d4a642bb0535a2e066ea", "f458197be4441e9d2591df4797c1fa22ec4a8007", "89416499ef3cf1735efa7d475fa2d5344ee5cc32", "fc04e5de952c2f504226b052a41b9dee85cec621", "3a46082b837508eb7a6cc38100f7194cbe08d54e", "58c7ffc47fde0f5120e1ba32dfe8b30db7d65c64", "0e8fd670a427b4c14e39feb94a8d91700ce134d2", "c58bc8ddf6ea3ebe9f12b3463eca2e9369831737" ], "paperAbstract": "Identification of network traffic accurately at its early stage is very important for network traffic management and application traffic classification. In recent years, this becomes very hot topic to identify traffic at its early stage. Unidirectional and bidirectional statistical features are effective features and widely used in Internet traffic classification. However, it is important to evaluate and select effective features for Instant Messaging (IM) application traffic classification at early stage. In this paper we are interested to find out robust and effective features at early stage. We firstly extract 22 statistical features of the first flow on two different network environment traffic datasets include on HIT and NIMS datasets. Then mutual information is conducted between the extract statistical features to select the effective features. Additionally to select robust features, we execute attribute selection cfsSubsetEval with Best search evaluator that select the robust and stable features from the result achieved by mutual information. And then, we execute 10 well-known machine learning classifiers. Our experimental results show that max_fpktl, std_bpktl, max_biat, mean_fpktl, mean_bpktl and min_biat feature are robust features at early stage traffic classification.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.31" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a557cc474f8238c78195463e72850e573e872410", "sources": [ "DBLP" ], "title": "Robust Feature Selection for IM Applications at Early Stage Traffic Classification Using Machine Learning Algorithms", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "a5b285544ea04d219d8ab148328189d40222e7c7": { "authors": [ { "ids": [ "3133111" ], "name": "Changhyun Park" }, { "ids": [ "2317713" ], "name": "Seongjin Lee" }, { "ids": [ "1773263" ], "name": "Youjip Won" }, { "ids": [ "2076810" ], "name": "Soohan Ahn" } ], "doi": "10.1145/3030207.3030219", "doiUrl": "https://doi.org/10.1145/3030207.3030219", "entities": [ "Amplifier", "Markov model", "Solid-state drive", "USB flash drive" ], "id": "a5b285544ea04d219d8ab148328189d40222e7c7", "inCitations": [ "ce14ff3b9a139629e699882ca26434a29b5c07b3" ], "journalName": "", "journalPages": "257-262", "journalVolume": "", "outCitations": [ "2bc0144228ecee16e40dc94a085cd85bb1541c8e", "858d18d809e43b35f3366c639b10abe42ffe51eb", "1820a34042d6371a9e20484b0c63b698eb522a6c", "91912a461d30035639ddda2b6de97a388823fb4b", "957ae212c16ea9a70a53d1143e0f8a908a496648", "0d5a77a52118f67438fd07c4eebaabcded7b6fd8", "7d40d2dbb78a38d8ba0489a8e14cef6b59a14b86", "a86bebe56c45d161c92df54e4887b085b77480a6", "0d8661da67acbd346eacb196a8f7ef21bdc3ccf6", "e7bf34b9439581b130e8282b172e98fc1b51fc80", "90ecc0422e8ba70ceff4461d05e7028ce8882f84", "6ede07b71f0427359286e347be421513a093ed97", "fbc3e87009ac8c044f28eb4c197e0612b025a34c", "1624c3b2b05fdf751835fff81f05a794f1e22fff" ], "paperAbstract": "A number of analytical models have been proposed to estimate the write amplification of the Flash storage to obtain the expected lifespan. This work is dedicated to examining the practical implication of the four existing analytical models for estimating the write amplification: Coupon Collector, Uniform Distribution, Expected Value and Markov model. Since the models assume uniform random workload in full utilization of an SSD to predict write amplification, they are not applicable in predicting write amplification in general workload. Moreover, the existing models have not been verified with the real SSD. In this work, we compare the write amplification of the models with that of a real SSD. When we use 0.147 as the overprovisioning factor of an SSD while running uniform random workload, the write amplification of Uniform Distribution, Expected Value, Markov model is 3.90, 4.08, and 4.08, respectively. However, write amplification of the real SSD shows 1.19, which is very different from that of the prediction models. Through experiment, we found that write amplification is closely related to the value of overprovisioning factor. To improve the accuracy of existing prediction models, we update the overprovisioning factor to take account of the ratio of a hot file and the utilization of the storage. We also find that by setting the overprovisioning factor to 1.15, we can obtain write amplification of 1.2 which is close to the write amplification of general workload in a real SSD.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030219" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a5b285544ea04d219d8ab148328189d40222e7c7", "sources": [ "DBLP" ], "title": "Practical Implication of Analytical Models for SSD Write Amplification", "venue": "ICPE", "year": 2017 }, "a5b652e177083805ab1ee7569a3fc350490d3ede": { "authors": [ { "ids": [ "40598941" ], "name": "Donald Kline" }, { "ids": [ "10092636" ], "name": "Nikolas Parshook" }, { "ids": [ "2190728" ], "name": "Alex Johnson" }, { "ids": [ "3242455" ], "name": "James E. Stine" }, { "ids": [ "9343501" ], "name": "William Stanchina" }, { "ids": [ "2730907" ], "name": "Erik Brunvand" }, { "ids": [ "1678617" ], "name": "Alex K. Jones" } ], "doi": "10.1109/IGCC.2017.8323572", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323572", "entities": [ "Anatomic Node", "CMOS", "Emission - Male genitalia finding", "Extracranial-Intracranial Arterial Bypass", "Integrated circuit", "Mobile computing", "Semiconductor device fabrication", "Top-down and bottom-up design", "anatomical layer" ], "id": "a5b652e177083805ab1ee7569a3fc350490d3ede", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "30a74b804f7f25e817793e40bf17241d64c2f431", "421bef45c5db3454759fd2ce3174fb41f64c6486", "47a7db9689718544299d764da30afef77dc18b85", "c3d3715b72a388fc37249be13d18b847310c4c7e", "375eaaaa933705b057574ec5ce564074a8f4ef5f", "edb46e8223d7084a4e94388d4e9ed9df35187116", "80b831b14ccdf795db35c9c03c72e6e6529606b7", "4a3b9531ec94aaf2c6aa40badaad09a5fd0b41f3", "274468bbd956518390e0de4be0ed6654878e393b", "24fef0f46083e967c54fa988fae932270b8c8900", "be3687a877616217042de0a9669e75a5eeefaa22", "ad379a38731294e5d32f3068565f9db28bddaf95" ], "paperAbstract": "Low-energy computing in the use phase is compelling because it helps to address thermal density issues of deeply scaled CMOS, maximizes battery-life of mobile computing platforms, while also addressing sustainability. Unfortunately, environmental impacts of fabricating CMOS integrated circuits (ICs) is increasing and rapidly catching the operational phase of computing systems, particularly for low-energy and mobile computing products. This is due to trends in fabrication techniques for increasingly small geometries, such as increasing photo-lithography and metrology costs. Without attention, IC fabrication will likely become the dominant energy consumer and source of carbon emissions over an IC's lifetime. We propose a scaled parameterized model for evaluating the environmental impacts of IC fabrication, which can scale from 130nm to 32nm technology and account for stepwise changes in process technologies. As an example of the type of analysis possible using this model we demonstrate the environmental impacts of changing the metal stack at these technology nodes. Our results indicate that based on the die area calculated from a commercial design flow and our parameterized model, changing the number of metal layers from eight to six layers results in an average savings in manufacturing energy of 9.5%, 13.8%, and 13% for 130nm, 90nm, and 65nm technologies, respectively, and, depending on scenario, it can take years for operational energy savings to makeup this difference.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323572" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a5b652e177083805ab1ee7569a3fc350490d3ede", "sources": [ "DBLP" ], "title": "Sustainable IC design and fabrication", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "a5ded2131a7f90b726ca70f323d0e42a63370ec2": { "authors": [ { "ids": [ "23153807" ], "name": "Jos\u00e9 Bravo Ferreira" }, { "ids": [ "2661184" ], "name": "Marco Cello" }, { "ids": [ "23120433" ], "name": "Jes\u00fas Omana Iglesias" } ], "doi": "10.1007/978-3-319-64203-1_26", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_26", "entities": [ "Duplicate code", "Expectation\u2013maximization algorithm", "Jumpstart Our Business Startups Act", "Library", "Library (computing)", "Linear programming", "Requirement", "Scalability", "Scheduling (computing)", "Simulation" ], "id": "a5ded2131a7f90b726ca70f323d0e42a63370ec2", "inCitations": [ "7bc3c800f29d9050dce723d3f425f019ba3fa436" ], "journalName": "", "journalPages": "358-371", "journalVolume": "", "outCitations": [ "3e257f01e3ee71545d824a1615c35659525b856a", "43776b15c034076a36b7143d58af8e04715e41d0", "47b10d0afece1e971485f4f259beca506a566b5c", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "594710511ce2177ff7dbbc62fa75dbf14fc7ca26", "6d496d510f867274473a01dcb0a1a7bf45d0904f", "2988e34168fa91398fa397baf823af2063893e9c" ], "paperAbstract": "Container-based infrastructures have surged in popularity, offering advantages in agility and scaling, while also presenting new challenges in resource utilization due to unnecessary library duplication. In this paper, we consider sharing libraries across containers, and study the impact of such a strategy on overall resource requirements, scheduling, and utilization. Our analysis and simulations suggest significant benefits arising from library sharing. Furthermore, a small fraction of libraries shared between any two containers, on average, is enough to reap most of the benefits, and even n\u00e4\u0131ve schedulers, such as a First Fit scheduler, succeed at doing so. We also propose a score maximization, mixed-integer linear-programming scheduler for handling bulk request arrivals (such as large jobs composed of many smaller tasks), which compares favorably against state-of-the-art schedulers in these scenarios.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_26", "https://marcocello.github.io/pubs/Euro-Par-2017-LibrarySharing.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a5de/d2131a7f90b726ca70f323d0e42a63370ec2.pdf", "s2Url": "https://semanticscholar.org/paper/a5ded2131a7f90b726ca70f323d0e42a63370ec2", "sources": [ "DBLP" ], "title": "More Sharing, More Benefits? A Study of Library Sharing in Container-Based Infrastructures", "venue": "Euro-Par", "year": 2017 }, "a60a940786b61cd4322d8385ce85a0b871979bf9": { "authors": [ { "ids": [ "1890055" ], "name": "Jiwoong Park" }, { "ids": [ "39716981" ], "name": "Taejeong Kim" } ], "doi": "10.1109/ICDM.2017.47", "doiUrl": "https://doi.org/10.1109/ICDM.2017.47", "entities": [ "Cluster analysis", "Doubly stochastic model", "Graph theory", "Perturbation theory", "Perturbation theory (quantum mechanics)", "Similarity measure", "Spectral graph theory", "Stochastic process" ], "id": "a60a940786b61cd4322d8385ce85a0b871979bf9", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "377-384", "journalVolume": "", "outCitations": [ "b8c3f5b58788f6460799f9b0a3a740f089210d9e", "12424c2a9655284d1db8f40b93f14e34504ff6a3", "3fdb0e91027e0d3cfc39220db021b838f68b90e8", "28967d2b5abc65856b17b4c24f85065f7e46817d", "115f7c6eb074cb71575dce430161ae551efe5dc4", "c4d1de29c8bb9df264f4738a1da3cd71738fd738", "14f2bc1234ed1418790262f56488dc4447c78bc8", "102bf2b9d3e8712e6a2af6afc41ed050b398efd4", "2f1c3e89c16d5c3592aa6abd9a5b7d9e16b7eeb5", "8941a98cde5c090305dba64ac46fc025d29ad0da", "8eaa5fde4d2b8209962d366e2ed9fa719c368aa3", "0c400ad35f5e7563275015cd3fdf78db95b563c8", "90bd16caa44086db6f0e4bbc1dde7063cb71b7b8", "254fcf4b2faeb6b9c06d3770c23f33a5ea114fea", "333aa1225a0364a46185aa19ec99c34b37555258", "a4eaaae93fb6454b5f1d922f9e71bba46a4a9dd5", "1ff2f56ea61919f4782694f1589f64282dfa85d7", "36278bf6919c6dced7d16dc0c02d725e1ed178f8", "145794cc2ae0986b1efb68f4b978deef8c6ab5e1", "1437415df29d3927c7851c7a0db0edd4a472d6e1" ], "paperAbstract": "Building an ideal graph which reveals the exact intrinsic structure of the data is critical in graph-based clustering. There have been a lot of efforts to construct an affinity matrix satisfying such a need in terms of a similarity measure. A recent approach attracting attention is on using doubly stochastic normalization of the affinity matrix to improve the clustering performance. In this paper, we propose a novel method to build a high-quality affinity matrix via incorporating Davis-Kahan theorem of matrix perturbation theory in the doubly stochastic normalization problem. We interpret the goal of the doubly stochastic normalization problem as minimizing the relative distance between the eigenspaces of the corresponding matrices. Also, for the doubly stochastic normalization problem we include an additional constraint that each eigenvalue be on the unit interval to fully conform to the spectral graph theory. Experiments on our framework present superior performance over various datasets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a60a940786b61cd4322d8385ce85a0b871979bf9", "sources": [ "DBLP" ], "title": "Learning Doubly Stochastic Affinity Matrix via Davis-Kahan Theorem", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "a6472fe7fbc978de8597c2f783891aa1eb1f87a5": { "authors": [ { "ids": [ "2809737" ], "name": "Bojie Li" }, { "ids": [ "23221554" ], "name": "Zhenyuan Ruan" }, { "ids": [ "9754946" ], "name": "Wencong Xiao" }, { "ids": [ "3426371" ], "name": "Yuanwei Lu" }, { "ids": [ "2142901" ], "name": "Yongqiang Xiong" }, { "ids": [ "3651513" ], "name": "Andrew Putnam" }, { "ids": [ "1703319" ], "name": "Enhong Chen" }, { "ids": [ "38645922" ], "name": "Lintao Zhang" } ], "doi": "10.1145/3132747.3132756", "doiUrl": "https://doi.org/10.1145/3132747.3132756", "entities": [ "Attribute\u2013value pair", "Central processing unit", "Commodity computing", "Computation", "Computer data storage", "Data center", "FLOPS", "In-memory database", "Internet bottleneck", "Key-value database", "Kinetic Void", "Network interface controller", "Network processor", "PCI Express", "Performance per watt", "Remote direct memory access", "Scalability", "Server (computing)", "Throughput", "USB flash drive", "Value (ethics)" ], "id": "a6472fe7fbc978de8597c2f783891aa1eb1f87a5", "inCitations": [ "ad42b4773cd461ba58bda07e1b7b0ff24c4ddba4", "2dc3d8536a8fa4660c6e842ef17c2d1553fceea6" ], "journalName": "", "journalPages": "137-152", "journalVolume": "", "outCitations": [ "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "0e3253e5ca318e70d4968a45f8d41f88dbffd9e3", "eb82d3035849cd23578096462ba419b53198a556", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "406a93739fb99f806bdbf4612fb4af2aa7537581", "a1a858f2d7bf4fc2ea6976197cbde74d5725fc71", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "205cf007cf77bbf81e55b74635017087585f7b7c", "33f95f238e12e1790ad880ec40cf6c63ea4a70dc", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "122229239aeba1eb4f1623adb40f1845c582a520", "623547a8e0bf856a181faeb6a6856c1517101ee3", "6479c756e597c38e57aa45e2eae8550fd738418b", "5cb88831f543d30cc688fedc445d4e358ef73626", "32d355a7a20f92ccda0608f83d7456870231c570", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "9ad46f6da8cc9fafef6d1dbf17d2a85c944e9184", "3a8c90ab13adb55e3610a020c69f03d72dfae274", "0d3f85933b6355789588476e491683532c68a906", "13b26d008210fffeb8a77c9e90f1ff837523c536", "55416b8613af06855bd94059c3d0305adc58057b", "74ee16d728e7c1834928416b84e859162eaf4e78", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "25f855c968af75e4617f25c71aee3cedec1dedaf", "060fb58c595197a4acc345961ef3cb3f772eee49", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "0d165b784e824521667fa42444061516015aa500", "2c11d5117a8b97ef2ef268e5fb38e8c5ffb1c58c", "145691f9541b9c0f792cd309b063b79fd2fcccfc", "cf8004fdef3bb60f51665d2b36757bcf4fc274c2", "29a1148d75878671dc3663bf480e33d7bd91597d", "e7f3a439a130fd1035bbdb3c60ad960d7d533c5e", "8df62aad18d6de13331479666c3b5d6a32b0ba58", "1594118f2696b573f08510cf837f3b37db87face", "165cf5e471b32122ba3a38709873cecf9b1b9a58", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "7e4921a43378b2b7b9cf950604fe434e4b07da58", "269f4b06d268fe2c4b5347bea9d5103bbab97136", "ebd8073af1f66f2d00034cf9fc88da9f2a00ee62", "09bd66ed15985caa6b0bf1d54a36b508141ed128", "4ccb2d0f62dfd174f2fe9551a4d2a32c2424c618", "daf0cd0076b388712ea12ec4105572997fc50cdf", "18a5f443299784479e78d9e77f175af57cb2fa2b", "56f6aec0132e56769e2036bbeff791dfa137d107", "4e595957047360ce23310150566f228d6fa4507e", "007ac3f2c9b2cd9fbf0ef668128a8dd2f12bcbbb", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "24c6e70c583daed1852637ec42d4589556ac59d3", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "034b937edbff280dfdd7b2e98639655fd3587402", "3b72a180f6e6b264c6b91e8e5dde0e27f172c9ed", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "cca9f7e4d9dcc0368934026f1fe2d6590870fa68", "679511fd4d0fa7fc889de0c3c50ecee80d9996d3", "01094798b20e96e1d029d6874577167f2214c7b6", "03416be8097852a54dd3e309434e5a0806824646", "9aa0d7253574e50fe3a190ccd924433f048997dd" ], "paperAbstract": "Performance of in-memory key-value store (KVS) continues to be of great importance as modern KVS goes beyond the traditional object-caching workload and becomes a key infrastructure to support distributed main-memory computation in data centers. Recent years have witnessed a rapid increase of network bandwidth in data centers, shifting the bottleneck of most KVS from the network to the CPU. RDMA-capable NIC partly alleviates the problem, but the primitives provided by RDMA abstraction are rather limited. Meanwhile, programmable NICs become available in data centers, enabling in-network processing. In this paper, we present KV-Direct, a high performance KVS that leverages programmable NIC to extend RDMA primitives and enable remote direct key-value access to the main host memory.\n We develop several novel techniques to maximize the throughput and hide the latency of the PCIe connection between the NIC and the host memory, which becomes the new bottleneck. Combined, these mechanisms allow a single NIC KV-Direct to achieve up to 180 M key-value operations per second, equivalent to the throughput of tens of CPU cores. Compared with CPU based KVS implementation, KV-Direct improves power efficiency by 3x, while keeping tail latency below 10 μs. Moreover, KV-Direct can achieve near linear scalability with multiple NICs. With 10 programmable NIC cards in a commodity server, we achieve 1.22 billion KV operations per second, which is almost an order-of-magnitude improvement over existing systems, setting a new milestone for a general-purpose in-memory key-value store.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132756", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/12/kv-direct.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6472fe7fbc978de8597c2f783891aa1eb1f87a5", "sources": [ "DBLP" ], "title": "KV-Direct: High-Performance In-Memory Key-Value Store with Programmable NIC", "venue": "SOSP", "year": 2017 }, "a6735253c13b7084ef852588344a29770a01ddf6": { "authors": [ { "ids": [ "38736958" ], "name": "Lijun Chang" }, { "ids": [ "1688012" ], "name": "Wei Li" }, { "ids": [ "19262604" ], "name": "Wenjie Zhang" } ], "doi": "10.1145/3035918.3035939", "doiUrl": "https://doi.org/10.1145/3035918.3035939", "entities": [ "Algorithm", "Approximation algorithm", "Baseline (configuration management)", "Heuristic", "Independent set (graph theory)", "Iterated local search", "Iteration", "Local search (optimization)", "Search algorithm", "Speedup", "Time complexity", "Vertex (geometry)" ], "id": "a6735253c13b7084ef852588344a29770a01ddf6", "inCitations": [ "b7502ee674310ee4466b598708851f6caf73e9db", "6a94ff28e78b6953ec8fd06a3850110ec428604f" ], "journalName": "", "journalPages": "1181-1196", "journalVolume": "", "outCitations": [ "17b4fddfa17610d764e667ffe237f978dde9462f", "1a4edf228e648b54fab3f25df82817cfd0a5bb42", "88c8983ee14198c318ca948deccb5b6ee0abb40f", "fb39b3a5dd7eac30066036603ff4f5129186b193", "3b487f66bfcdb4569aa9c77bbeed382d1d1390a2", "ee3869ae9a27a10f8f50f221a54a2c5674ba67f1", "a5de2180bb83fe9e7b2633f0bfefa3c50942aecb", "7aa85143fb0aad0e6c81e157776774384a63c405", "cbce38ea34c90b8a53f35f10b2235e65fc5fe167", "482c0f4d4b72a499d0f56d046f4e05469dc0f037", "7c710aafc93a57d937eb3244994ad513b84c7ebe", "37999ab3c76d82a990e28fb499cc1ffdbdacd401", "5611b606c6680cb5b99e7a41965934b01b0641e9", "ca1c2798e187c2b411b37c2b49a020fdbe6581c0", "850ad780b38b11a633f7e1aacc065e19f01b770c", "c9e80e72f7de24ff01276348a7eceadcf7cf0823", "0050d4029afcdca251d96462ef6d3a98129bc957", "927b7fbfb53fafb56a4097d6193c7cdd625794e6", "b4932747aaa90cc201135dabe88ea45c0236f4ae", "6035d6123b94d65ce7cc25d0fa95680f550bdc31", "8858200fb69a050304e74e59df91d46a6013d3df", "1f0612de1f191abadf250b78cd78f884203cca5e", "acff8ef872ae2e143449bd2bed41eedf4d1b2f87", "23dadf25f3efacbc9c66f69093d656ad5b003529", "ff1a1fcbc579287fcb2add2d1e9aaf1220f188a9", "2ea4562a7e69e5306fdf3c5a3ae40ac2242b4096", "9c7252e8cc8896d58885847b5d14d27489300c1c", "3f7dd60fa461fecf9e34013c81ccf50a3e5397d2", "7bcc53f1baf3358517a602d856192faea9442c91", "97ea45e5be07159ace0da7420ebee343d7b1f0cc", "265a01450168f186348640003f55b11fbd0c4f2b", "159483560ab822a0a8fbea69278e40c81348e847", "1e92b690b17d7f590adbb3fd7d28bbf5c193aae5", "3ddac15bd47bc0745db4297d30be71af43adf0bb", "c4188fcd1f383608e55e8c17e830dc75f079017e", "636fc4193126dd7810a90ad6c0cdf22f4b03d747", "f2d4b6b2048279f5c56e8febfd3473fcbf294710", "07d78543ea3987754113c23d1d49f0d7d36a2c5d" ], "paperAbstract": "This paper studies the problem of efficiently computing a maximum independent set from a large graph, a fundamental problem in graph analysis. Due to the hardness results of computing an exact maximum independent set or an approximate maximum independent set with accuracy guarantee, the existing algorithms resort to heuristic techniques for approximately computing a maximum independent set with good performance in practice but no accuracy guarantee theoretically. Observing that the existing techniques have various limits, in this paper, we aim to develop efficient algorithms (with linear or near-linear time complexity) that can generate a high-quality (large-size) independent set from a graph in practice. In particular, firstly we develop a Reducing-Peeling framework which iteratively reduces the graph size by applying reduction rules on vertices with very low degrees (Reducing) and temporarily removing the vertex with the highest degree (Peeling) if the reduction rules cannot be applied. Secondly, based on our framework we design two baseline algorithms, BDOne and BDTwo, by utilizing the existing reduction rules for handling degree-one and degree-two vertices, respectively. Both algorithms can generate higher-quality (larger-size) independent sets than the existing algorithms. Thirdly, we propose a linear-time algorithm, LinearTime, and a near-linear time algorithm, NearLinear, by designing new reduction rules and developing techniques for efficiently and incrementally applying reduction rules. In practice, LinearTime takes similar time and space to BDOne but computes a higher quality independent set, similar in size to that of an independent set generated by BDTwo. Moreover, in practice NearLinear has a good chance to generate a maximum independent set and it often generates near-maximum independent sets. Fourthly, we extend our techniques to accelerate the existing iterated local search algorithms. Extensive empirical studies show that all our algorithms output much larger independent sets than the existing linear-time algorithms while having a similar running time, as well as achieve significant speedup against the existing iterated local search algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035939" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6735253c13b7084ef852588344a29770a01ddf6", "sources": [ "DBLP" ], "title": "Computing A Near-Maximum Independent Set in Linear Time by Reducing-Peeling", "venue": "SIGMOD Conference", "year": 2017 }, "a674803e67ef3c696d259d4aec1b38113cd636ff": { "authors": [ { "ids": [ "21302949" ], "name": "Sameer G. Kulkarni" }, { "ids": [ "1726357" ], "name": "Wei Zhang" }, { "ids": [ "38348772" ], "name": "Jinho Hwang" }, { "ids": [ "2640561" ], "name": "Shriram Rajagopalan" }, { "ids": [ "1706474" ], "name": "K. K. Ramakrishnan" }, { "ids": [ "2297346" ], "name": "Timothy Wood" }, { "ids": [ "2913151" ], "name": "Mayutan Arumaithurai" }, { "ids": [ "1799074" ], "name": "Xiaoming Fu" } ], "doi": "10.1145/3098822.3098828", "doiUrl": "https://doi.org/10.1145/3098822.3098828", "entities": [ "Central processing unit", "Experiment", "Network function virtualization", "Operating system", "Proportionally fair", "Scheduling (computing)", "Throughput", "User space", "cgroups" ], "id": "a674803e67ef3c696d259d4aec1b38113cd636ff", "inCitations": [ "cd5fdc7ea21293acb52a7af34e01217d54b1c39e", "5a8cd841f59a68c948c7aa05359c7df32dbc8d5c", "e703f605afa0bb4eb5192c382cc367f76434f3b9", "83a31c52bed8d3845201acb7a5b4603212b9e8b6" ], "journalName": "", "journalPages": "71-84", "journalVolume": "", "outCitations": [ "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "07595a3a571e09ccaa7727a4659efcb9d9a4f135", "0acdc80c65ad254cd4eba59d4df1bacbadc08a5d", "327a02b19a60319cc35be860ad0259a5c1aef920", "8feddaae81eb260f7e3f716ee76f58de895b98e5", "d6b2fdd44c9655d9193735ce6ef934076265ea59", "f1aba2d8893d0b11cdaeeca73e5c5c2b0502d537", "de17cf40a4db13315c631c597959ae26f691f2fa", "2613e7d1b36e751f66402acd6aaf8d8501b91b19", "531957a3e9e47f1993e99bab2391cd828393e2d2", "0a1fc6f5d6955ec24f07902813f6e403db35ce1c", "13fbb4c39c2ce0cd9539df2daae1728fc93e01c1", "156fa936f4c46972245c0720e30b11593e934574", "4387210b77368c2735568d6d9af8d587cc6af68f", "159cb5220c0881ceac85f84b8158918003a2c1b4", "e9af96fbbacb4268c3c5ff974cc44990b12294e5", "7365135511b7510ac59c47725ab45ecb3e69f748", "42d1b52254873ecd0f36eb7342f95dbad9c50187", "314041de58e3aae21e463c01193ea26cbc38b478", "2c153242eb1a02a3417f2eddb8373d109d309fe0", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "005718daa82f7880b5b1f3b01e0e8f6aad2b5751", "3b8c2360ca905b19bf193493ae44ea102767f04c", "0aa4cacf6a60125961f1dac4afca63a8dcf706f9", "1aafc7066e52f18dee78103822da24a5d85da93c", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "980773ca869fc17562e4fbcf4202a8f21893b114", "56a42c71388e80f68efd350afe38175510aaa915", "48421ba55cec9d7ecb9dce0962dcef45eed9aa65", "336b4f3099b8f629adc20a69aba15257e53539f9", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "16702514f859c5de5a7a9432ce2bae7527362c75", "3bb723980b8eaee73acfe39ba74934245051575d" ], "paperAbstract": "Managing Network Function (NF) service chains requires careful system resource management. We propose NFVnice, a user space NF scheduling and service chain management framework to provide fair, efficient and dynamic resource scheduling capabilities on Network Function Virtualization (NFV) platforms. The NFVnice framework monitors load on a service chain at high frequency (1000Hz) and employs backpressure to shed load early in the service chain, thereby preventing wasted work. Borrowing concepts such as rate proportional scheduling from hardware packet schedulers, CPU shares are computed by accounting for heterogeneous packet processing costs of NFs, I/O, and traffic arrival characteristics. By leveraging cgroups, a user space process scheduling abstraction exposed by the operating system, NFVnice is capable of controlling when network functions should be scheduled. NFVnice improves NF performance by complementing the capabilities of the OS scheduler but without requiring changes to the OS's scheduling mechanisms. Our controlled experiments show that NFVnice provides the appropriate rate-cost proportional fair share of CPU to NFs and significantly improves NF performance (throughput and loss) by reducing wasted work across an NF chain, compared to using the default OS scheduler. NFVnice achieves this even for heterogeneous NFs with vastly different computational costs and for heterogeneous workloads.", "pdfUrls": [ "http://faculty.cs.gwu.edu/timwood/papers/17-SIGCOMM-NFVNice.pdf", "http://www.net.informatik.uni-goettingen.de/publications/2023/Kulkarni_SIGCOMM17.pdf", "http://doi.acm.org/10.1145/3098822.3098828" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a674803e67ef3c696d259d4aec1b38113cd636ff", "sources": [ "DBLP" ], "title": "NFVnice: Dynamic Backpressure and Scheduling for NFV Service Chains", "venue": "SIGCOMM", "year": 2017 }, "a68d1074562788a73e4e45508f2c80104be4f021": { "authors": [ { "ids": [ "2907351" ], "name": "Swagata Sharma" }, { "ids": [ "30547537" ], "name": "Venkat Durvasulu" }, { "ids": [ "32733962" ], "name": "Berk Celik" }, { "ids": [ "32341502" ], "name": "Siddharth Suryanarayanan" }, { "ids": [ "2904279" ], "name": "Timothy M. Hansen" }, { "ids": [ "1728645" ], "name": "Anthony A. Maciejewski" }, { "ids": [ "1744243" ], "name": "Howard Jay Siegel" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.17", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.17", "entities": [ "Computation", "FOSSIL", "Simulation", "Supercomputer", "Time complexity" ], "id": "a68d1074562788a73e4e45508f2c80104be4f021", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "130-137", "journalVolume": "", "outCitations": [ "346c5896ff2032d7c7a8400cbbd3bd2f61c72f1a", "3cc6814bdfb95255cf906400b13a730b2054843b", "582fef1623a2c3d8ade583de24431fd1058fac70", "4b7d9b0322ebc4874d5baaae14661aa634885c19", "92460447efdb0c8687e99ff25b40a747f059252d", "021d2b357c2d4fb0000f93970732bdef961ac6d4", "89b4e1c894522a5044d8e61cae4e5d1dc02c6b30", "8259ef01aef9dd37755c914cbedbe946966a410a", "95e51fb982088b3692b0469ce4ccdbbd69d7965b", "b9ed142da67ad39863692695b5f4b57fbc3e402b", "daa0b258d3d70c19937091ce74d8b72701a6ff60", "4b0f79d8d1fa2cfa8e4146a5343cd3817b92bd18", "53ae638b06e897bbe0b2c8565595dde6b4de76a9", "4d3d300a7cbd734d6f0b595337626ea2c87501ba", "80c861535df3e93b572736f1d3b4aa17cd390b8e", "26e4e43e1f31a112767eb0ed85a00050483a0457", "f8e3e7b00ecb80feb4132d1e4deed5d961080a3c", "134b801ad01c7310fadbcb81b0abfd52a9c26523", "00b13e9fbf7cdc83f9121f75f2cbe35f40ccd2f2", "91db3d1b6de4e4dc0759530b2b37fb95db7e8a04", "5e32caf09a5a101ae0c65dcd807de9862f6ee14f", "fce84ac43f721f2aeb5d3f139c28f6bb5febaa55", "423beb3c062f8cf1ec5a53f494ddc37d5697ac90" ], "paperAbstract": "Assessing the effectiveness of a demand response (DR) program requires appropriate metrics of performance. In this paper, we propose the assessment of an aggregator-based residential DR program using two newly developed metrics addressing the economic and environmental aspects of sustainability. The economic sustainability metric of the DR method is quantified by the economic savings of the customers on electricity charges and the aggregator's profit. The environmental sustainability is quantified by measuring the reduction in capacity factors of fossil-fueled peaking power plants and the subsequent reduction in CO2 emissions. A simulation study is performed for a large-scale power system consisting of 5,555 users and 56,659 schedulable assets using real pricing data from a utility and a bulk electricity market for a 31-day period. Finally, we apply high-performance computing methods to the month-long study to yield a faster computation time.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a68d1074562788a73e4e45508f2c80104be4f021", "sources": [ "DBLP" ], "title": "Metrics-Based Assessment of Sustainability in Demand Response", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "a6a906ae9727a33409eaa207253a6ad32a871c11": { "authors": [ { "ids": [ "12418191" ], "name": "Kewen Meng" }, { "ids": [ "1763308" ], "name": "Boyana Norris" } ], "doi": "10.1109/CLUSTER.2017.43", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.43", "entities": [ "Binary code", "Experiment", "Parsing", "Performance prediction", "Performance tuning", "Program analysis", "Software development", "Static program analysis", "Supercomputer", "Usability" ], "id": "a6a906ae9727a33409eaa207253a6ad32a871c11", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "103-113", "journalVolume": "", "outCitations": [ "138a4fa853bd6b02052085fffbaacab38ee1473a", "9d30381c49afa033eacc04fb68975762eb7bafab", "6800600e6451d0bf0a4e866a483cac8c8617da88", "1657da35049f5ade21c169b274c3c0adee288f46", "51ec4530e2b6d73f410568952db220c05865e073", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "fef1056a69be6f597b4866bc3ee306bf01a4df0d", "60d4b2c4d9630e4905748e7d3565a013d2304906", "15708276fbb98a7d3f8835a2c51cb522eeab9967", "42be8c9380613754c82782ae86291d3c379f2ead", "0a89b4a34c86cb65689f5e79a88fd4bc7c8f63c5", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "a0b1b8ee4a9e6ae68ce6a712ad0a66ddb4a12117", "0e95e0ff4014053ac11fcbcee556eaab4dc1a92d", "092217c2267f6e0673590aa151d811e579ff7760", "b04391910d19d2d0c64b62d300927f527417414e", "c3745b88bff2e49543056666816d130e9d2d1baa", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "3dc60796f2b423dfd089f79e54bd6caf764a41e6", "91e413ece54c911087295f1d9c06397e961e361f", "91607d7bc71823360de59b894ae37b4f1738bca0" ], "paperAbstract": "The performance model of an application can provide understanding about its runtime behavior on particular hardware. Such information can be analyzed by developers for performance tuning. However, model building and analyzing is frequently ignored during software development until performance problems arise because they require significant expertise and can involve many time-consuming application runs. In this paper, we propose a fast, accurate, flexible and user-friendly tool, Mira, for generating performance models by applying static program analysis, targeting scientific applications running on supercomputers. We parse both the source code and binary to estimate performance attributes with better accuracy than considering just source or just binary code. Because our analysis is static, the target program does not need to be executed on the target architecture, which enables users to perform analysis on available machines instead of conducting expensive experiments on potentially expensive resources. Moreover, statically generated models enable performance prediction on nonexistent or unavailable architectures. In addition to flexibility, because model generation time is significantly reduced compared to dynamic analysis approaches, our method is suitable for rapid application performance analysis and improvement. We present empirical validation results to demonstrate the current capabilities of our approach on small benchmarks and a mini application.", "pdfUrls": [ "https://arxiv.org/pdf/1705.07575v1.pdf", "http://arxiv.org/abs/1705.07575", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.43" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6a906ae9727a33409eaa207253a6ad32a871c11", "sources": [ "DBLP" ], "title": "Mira: A Framework for Static Performance Analysis", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "a6bec07b5b3fbbd434b30b388235f236c94aecbd": { "authors": [ { "ids": [ "3297280" ], "name": "Seyed Hessam Mirsadeghi" }, { "ids": [ "1739480" ], "name": "Jesper Larsson Tr\u00e4ff" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" }, { "ids": [ "1754477" ], "name": "Ahmad Afsahi" } ], "doi": "10.1109/HiPC.2017.00047", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00047", "entities": [ "Algorithm", "Distributed algorithm", "Message Passing Interface", "Message passing", "Network topology", "Scalability", "Sparse matrix" ], "id": "a6bec07b5b3fbbd434b30b388235f236c94aecbd", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "348-357", "journalVolume": "", "outCitations": [ "b617857ea85e80cb3d299773002eb2b9401d4c7a", "4aaff0a4237ae178d06001e33834cc7bd4573925", "a2cb1c5ca5308a6926662a923f7a0acd329b6207", "14db638cc8784cb1f861cc1134c4832d1d23b677", "5c3354c4cef92629657ea3f327737baffbb76c38", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "3081f02021c619b87155ecc6379ab037c0e36dea", "98025d4ee9cf6c4eea8ba6aae51916bd205afdd4", "3fce58dc087a4a050a39f1c9ec6e8aadb427f29a", "302d316453b600ea16da667e43539fd5af8c322d", "4ff7a5d31c2524f62662da67a22560867e025456", "66ebf1350896fa1d743f1acfdc4289b8f4d8154e", "50b017733d4860932276442de6eab5c09675d9cf", "b148119329dbcedc01ecafb47218939e1f50d47a", "7da4737120dbc08a4f06902df15c0e2569efca30", "3f15ec50a233f495ddeaea45bf00f9c695c91df0", "1179120ff478240669b790d152f3cbcac2100a99" ], "paperAbstract": "Neighborhood collectives were added to the Message Passing Interface (MPI) to better support sparse communication patterns found in many applications. These new collectives encourage more scalable programming styles, and greatly extend the scope of MPI collectives by allowing users to define their own collective communication patterns. In this paper, we describe a new, distributed algorithm for computing improved communication schedules for neighborhood collectives. We show how to discover common process neighborhoods in fully general MPI distributed graph topologies, and how to exploit this information to build message-combining communication schedules for the MPI neighborhood collectives. Our experimental results show considerable performance improvements for application communication topologies of various shapes and sizes. On average, the performance gain is around 50%, but it can also be as much as 71% for topologies with larger numbers of neighbors.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00047" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6bec07b5b3fbbd434b30b388235f236c94aecbd", "sources": [ "DBLP" ], "title": "Exploiting Common Neighborhoods to Optimize MPI Neighborhood Collectives", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "a6ca37aeeef5911e4f36b904088479bea999cc81": { "authors": [ { "ids": [ "2646773" ], "name": "Zhiyu Liu" }, { "ids": [ "1801923" ], "name": "Irina Calciu" }, { "ids": [ "1744502" ], "name": "Maurice Herlihy" }, { "ids": [ "1734461" ], "name": "Onur Mutlu" } ], "doi": "10.1145/3087556.3087582", "doiUrl": "https://doi.org/10.1145/3087556.3087582", "entities": [ "Algorithm", "Central processing unit", "Concurrent data structure", "Data structure", "Data-intensive computing", "Die (integrated circuit)", "Embarrassingly parallel", "FIFO (computing and electronics)", "In-memory database", "Naivety", "Pipeline (computing)", "Pointer (computer programming)", "Scalability", "Server (computing)", "Throughput" ], "id": "a6ca37aeeef5911e4f36b904088479bea999cc81", "inCitations": [ "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "983e87929eeb3f77c2ddb02d17d6efe978c80667", "e45dea6588d1de0a23618e019031e67eedeeee26", "ecf5efd5fe18860b42a1abd198e94a868dbf944c", "00cc482570d739e7b733f45b6f8f1836b24056bd" ], "journalName": "", "journalPages": "235-245", "journalVolume": "", "outCitations": [ "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "588fd53a6cbdb2f2d7f2bd676944d7b5fdfafcb9", "11be4655461d518b6de5b90fbc816f5e1d74ba91", "60f9d8874d8679b94896160bd3a8bf4b02d8b883", "33da45838d0b6c082cc71e603fd802bac4d56713", "a5bd15d203c6aa740aba16776b422db010e66b58", "e5d2b364140071f2ab20a942b855d4599775faea", "8b04ea524cb6ced72868c120a00c4679d84be006", "3c89345bb88a440096f7a057c28857cc4baf3695", "069eafae5ee9df25ff5c457bb636f73b98d8f6e9", "48a7323c4894de3afb90ef2135160205ebb55011", "205ff590dc7881db74d766c43e3509ddfbe24d81", "2b082f9606aa5c1beb7c5704f4b8338205a9994e", "4f70e1583f5d31d29ceed2998c52b2bf6c01e2ec", "5e41307a2f2850f164ad0175f372799ce61e0bf9", "42f174df3876256dd5606bb61b366116e9943beb", "0fdeee8f12f2f1f01e06b3c0c57fe824ff516682", "5c71f2e8ab879bf508002d8f2e29c0f21317f3e9", "4e3304e77dd2fecea4086e132981d1470434cf65", "2394c6644efa856f0da160a0f0031d74cd3b5000", "468035263afa59095614f26a62e0217da4a1aeed", "37b5850e3e75a3462f3991491ca26674925f233b", "a0280c69589951383ea0dbcd06f11bc4c595eff1", "012d556d67acedc6898930b4c93f54b87aabf5ee", "2cfa2068d49fc0e9cc5d96bc498c63e782f7478f", "6db9bd41b294a7b45792b8f4ac8864f5d178f35e", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "5906fc1d9cc56d31b9373cdb868cb90aa613d90d", "2263e2beda96efcff8ff19efaefcb85f5136aeca", "813b7cdcf6d77ea34b4cf68378e2508db28fdf50", "e75a155fd802010d677e15345a51fdb0f495c8c3", "30df50d77ef9478a2848626dfe3bf65f3c991991", "60aa9510638d4d9739ebfc3a0042187988482346", "5baaeed2b180d8b9886eca113ae0c86196c8bdaf", "24b300b2395e4d0b0d2c8b9797fc9f8e735a58ef", "0d0b89fa3caa4b403a5dca4e6ea02cba82e8d293", "1697056663684522a89fbc838ac03512122f0ea8", "06902cb95ede2c305db4000852014f276b25c082", "159052c1920d03ac534980af5343dc6eb0c41e3b", "0add1d832f492523dbf87618f6726237c1a57221", "6f93e0325e577f49f4bed46a2adcfee4a649dc83", "264cd229ac4bbdf655d5e7b44563bf84bd846364", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "19554445f1f3ea7b54be06a74a0d0840ade02be5", "30bb582c2c09abc7eb9dda7d9f80804eeb89f9d7", "9e86e9a332be800d7420aa0a394cd1b348d93e48", "42142c121b2dbe48d55e81c2ce198a5639645030" ], "paperAbstract": "The performance gap between memory and CPU has grown exponentially. To bridge this gap, hardware architects have proposed near-memory computing (also called processing-in-memory, or PIM), where a lightweight processor (called a PIM core) is located close to memory. Due to its proximity to memory, a memory access from a PIM core is much faster than that from a CPU core. New advances in 3D integration and die-stacked memory make PIM viable in the near future. Prior work has shown significant performance improvements by using PIM for embarrassingly parallel and data-intensive applications, as well as for pointer-chasing traversals in sequential data structures. However, current server machines have hundreds of cores, and algorithms for concurrent data structures exploit these cores to achieve high throughput and scalability, with significant benefits over sequential data structures. Thus, it is important to examine how PIM performs with respect to modern concurrent data structures and understand how concurrent data structures can be developed to take advantage of PIM.\n This paper is the first to examine the design of concurrent data structures for PIM. We show two main results: (1) naive PIM data structures cannot outperform state-of-the-art concurrent data structures, such as pointer-chasing data structures and FIFO queues, (2) novel designs for PIM data structures, using techniques such as combining, partitioning and pipelining, can outperform traditional concurrent data structures, with a significantly simpler design.", "pdfUrls": [ "http://cs.brown.edu/people/irina/papers/spaa2017-final.pdf", "https://cs.brown.edu/people/irina/slides/spaa2017-slides.pdf", "http://doi.acm.org/10.1145/3087556.3087582" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6ca37aeeef5911e4f36b904088479bea999cc81", "sources": [ "DBLP" ], "title": "Concurrent Data Structures for Near-Memory Computing", "venue": "SPAA", "year": 2017 }, "a6e9bec3612179eef7acf3bc5af41340db9a28cf": { "authors": [ { "ids": [ "1678924" ], "name": "Jian Liu" }, { "ids": [ "2405565" ], "name": "Chen Wang" }, { "ids": [ "40432206" ], "name": "Yingying Chen" }, { "ids": [ "1707411" ], "name": "Nitesh Saxena" } ], "doi": "10.1145/3133956.3133964", "doiUrl": "https://doi.org/10.1145/3133956.3133964", "entities": [ "Algorithm", "Authentication", "Biometrics", "Denial-of-service attack", "Experiment", "Fingerprint", "Fingerprint recognition", "Lock (computer science)", "Password", "Touchscreen" ], "id": "a6e9bec3612179eef7acf3bc5af41340db9a28cf", "inCitations": [], "journalName": "", "journalPages": "73-87", "journalVolume": "", "outCitations": [ "61409ec7cc5f286f9b08e9b9095fa4ccabd00b79", "7d75125b35c0b65906f237088d11ccc0f348df76", "2c02be3998a11fec62070304c63e3c6ad33c4b86", "2c3a0644741b39b7bdb1c5477dc7324f7afb8ee7", "616cc19f392720107a5215b479a7b2541b366b6a", "9bea4b47f5272bf7cc2173d9c160fc37c133bad9", "7500a3b8a9b8a9c85f508757484c1880e298e42a", "19fe0c0c7fee6dbe877ac0978a0b8a4b08ffee27", "99b75bfa6465aa60f5d3a6b6850d438e0919ac23", "3a6938407456499ebc33ae86a8b9370d913fbd77", "cd339c35df2131cb1939fb6e7691fd921c8d84bc", "289436d56b5fb8cd476eea40bccedce5a25d8122", "c806c45a5e7e1810782b36c63f57c7ea02e002b1", "44a8de5f4104b99c46b3d9c9d6fc058824bc2966", "733e563845d78ad1085122848cb7ab21752c91ca", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "d1d88cc0d2edda0d0564f6702a87ba415f53729d", "a345789306801724af1287a96e8a8136b5f72be5", "0f16f6f478b5c788dce466eb50e36c612273c36e", "13e4dcd1b5306eec2436f2515f83eae1ab974f64", "8afd80726c54ed7b95d30d1230bef633d128c930", "4aaadcaf19c774ea8f7dd87254c932bdf23ec4c7", "96e81b654a5adce68e22c9070389895ba0848b05", "83ca1ac19e12a19179dcde4867a90c3a690acce0", "0f069706535b78d31c7b56dc9d9cb86759025f56", "421bd0ca198cfa900bb3b0775b1666d57ce16b13", "8ed4812d0b97212c1629257bb56763a983444f80", "12060bd60d55b33fe64b7f6cfb058b247268d1cf", "89feb0d7d2cb4ba125ce2a776b03f5c451051e1f", "1837ca0c85ceaaaa91ee6ec174c5066fcfbb58f6", "f9d25c9333081bec5aa40d5aa0bd087c41bd5a1d", "2d09cf5a9329887bb7bb05fffe15439ea89261b1", "88e03ea8fcd0549f1cfd537247de4e2740c52dc6", "1ffb5a089f027ec392cd1df7cfa861f6d92172f2", "b02ec0e94c6b65d3f7740737c3b62723fe9bcb8f", "24a5ba3221b6882477a2fee10d3de6ef592fe400" ], "paperAbstract": "The goal of this work is to enable user authentication via finger inputs on ubiquitous surfaces leveraging low-cost physical vibration. We propose VibWrite that extends finger-input authentication beyond touch screens to any solid surface for smart access systems (e.g., access to apartments, vehicles or smart appliances). It integrates passcode, behavioral and physiological characteristics, and surface dependency together to provide a low-cost, tangible and enhanced security solution. VibWrite builds upon a touch sensing technique with vibration signals that can operate on surfaces constructed from a broad range of materials. It is significantly different from traditional password-based approaches, which only authenticate the password itself rather than the legitimate user, and the behavioral biometrics-based solutions, which usually involve specific or expensive hardware (e.g., touch screen or fingerprint reader), incurring privacy concerns and suffering from smudge attacks. VibWrite is based on new algorithms to discriminate fine-grained finger inputs and supports three independent passcode secrets including PIN number, lock pattern, and simple gestures by extracting unique features in the frequency domain to capture both behavioral and physiological characteristics such as contacting area, touching force, and etc. VibWrite is implemented using a single pair of low-cost vibration motor and receiver that can be easily attached to any surface (e.g., a door panel, a desk or an appliance). Our extensive experiments demonstrate that VibWrite can authenticate users with high accuracy (e.g., over 95% within two trials), low false positive rate (e.g., less 3%) and is robust to various types of attacks.", "pdfUrls": [ "http://www.winlab.rutgers.edu/~yychen/papers/vibwrite.pdf", "https://info.cs.uab.edu/saxena/docs/lwcs-ccs17.pdf", "http://doi.acm.org/10.1145/3133956.3133964" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6e9bec3612179eef7acf3bc5af41340db9a28cf", "sources": [ "DBLP" ], "title": "VibWrite: Towards Finger-input Authentication on Ubiquitous Surfaces via Physical Vibration", "venue": "CCS", "year": 2017 }, "a6fae22e8e40e350f005c4133256fe68cbcf5356": { "authors": [ { "ids": [ "2599242" ], "name": "Kay Ousterhout" }, { "ids": [ "20978225" ], "name": "Christopher Canel" }, { "ids": [ "39814781" ], "name": "Max Wolffe" }, { "ids": [ "1699297" ], "name": "Sylvia Ratnasamy" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "10.1145/3102980.3102981", "doiUrl": "https://doi.org/10.1145/3102980.3102981", "entities": [ "Apache Spark", "First-class citizen", "Jumpstart Our Business Startups Act" ], "id": "a6fae22e8e40e350f005c4133256fe68cbcf5356", "inCitations": [ "284b7631a9961f69eae1e0bac49438aee34edaa0", "301d189e85def6eaddbc7152416df1511b55e82b", "c206dd5b90104df0fd12a2c1f3fb0f913ee08c0b", "372a2383891257520ad6dea816d3f14ddff8f003" ], "journalName": "", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "0558c94a094158ecd64f0d5014d3d9668054fb97", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "2f88dcf1e9abaa0b0f8c63820548c98b2da61220", "332f77fd05703c1607e3b57884ad31fb1fad0104", "0254e7809ea94c30adedd5e853bdd0014b6521c9", "28a9dca6faeead651539c700bef413203b2b876e", "0541d5338adc48276b3b8cd3a141d799e2d40150", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060" ], "paperAbstract": "Users often struggle to reason about the performance of today's systems. Without an understanding of what factors are most important to performance, users do not know how to tune their system's hardware and software configuration to improve performance. We argue that performance clarity -- making it easy to understand where bottlenecks lie and the performance implications of various system changes -- should be a first class design goal. To illustrate that this is possible, we propose an architecture for data analytics frameworks in which jobs are decomposed into schedulable units called monotasks that each consume a single resource. By untangling the use of different resources, using monotasks allows the system to trivially report time used on each resource and the resource bottleneck. Our prototype implementation of monotasks for Apache Spark is API-compatible and achieves performance parity with Spark, and yields a simple performance model that can predict the effects of future hardware and software changes.", "pdfUrls": [ "http://kayousterhout.org/publications/hotos17.pdf", "http://doi.acm.org/10.1145/3102980.3102981" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a6fae22e8e40e350f005c4133256fe68cbcf5356", "sources": [ "DBLP" ], "title": "Performance clarity as a first-class design principle", "venue": "HotOS", "year": 2017 }, "a76a3e5acaca9531a0b8628a9ed7a946ca717131": { "authors": [ { "ids": [ "2636545" ], "name": "Rob F. Van der Wijngaart" }, { "ids": [ "2418537" ], "name": "Evangelos Georganas" }, { "ids": [ "2682150" ], "name": "Timothy G. Mattson" }, { "ids": [ "3313944" ], "name": "Andrew M. Wissink" } ], "doi": "10.1007/978-3-319-58667-0_14", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_14", "entities": [ "Kernel (operating system)" ], "id": "a76a3e5acaca9531a0b8628a9ed7a946ca717131", "inCitations": [], "journalName": "", "journalPages": "256-274", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a76a3e5acaca9531a0b8628a9ed7a946ca717131", "sources": [ "DBLP" ], "title": "A New Parallel Research Kernel to Expand Research on Dynamic Load-Balancing Capabilities", "venue": "ISC", "year": 2017 }, "a7c7c7d036b55bd9a3882def0d14e31d78931b15": { "authors": [ { "ids": [ "2375706" ], "name": "Junhua Fang" }, { "ids": [ "2252093" ], "name": "Rong Zhang" }, { "ids": [ "1686139" ], "name": "Tom Z. J. Fu" }, { "ids": [ "1723364" ], "name": "Zhenjie Zhang" }, { "ids": [ "1696626" ], "name": "Aoying Zhou" }, { "ids": [ "2849144" ], "name": "Junhua Zhu" } ], "doi": "10.1145/3078597.3078613", "doiUrl": "https://doi.org/10.1145/3078597.3078613", "entities": [ "Algorithm", "Attribute\u2013value pair", "B-tree", "Distributed computing", "Download", "Equivalence partitioning", "Hash function", "Heuristic", "Key-based routing", "Logical connective", "NP-hardness", "Optimization problem", "Program optimization", "Routing", "Routing table", "State (computer science)", "Stream processing", "Synthetic data", "Utility" ], "id": "a7c7c7d036b55bd9a3882def0d14e31d78931b15", "inCitations": [ "c48ca16a8ef45337d077a99e49b03d86859363dd", "9429348ca570d752f354cfe3b47437a023ba3f90" ], "journalName": "", "journalPages": "15-26", "journalVolume": "", "outCitations": [ "a6d6dad952d35658d5e9a5c481401f7d3a5d7a7d", "178ff3ab1afcd6fb348a9805babe0a5c814be5af", "0137ce40aabafe0e2ec2c4cec221f4e66ceb261d", "4aa70e2060966dfb90d9073526f23f101191d7a7", "2c688c40374fee862e0f0038696f2951f1927337", "f6ccaa02a3cdc8c9160c979903eaacbccdf582f2", "1dfed536f743300240ad9921ba57304ed883ce3c", "3f4cedff46e2fd542899ad9cfac286bf9976e022", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "10dcd5574dca03395b507fbb4d0c90f804cbbf19", "007758ab121a7e03181da56f135ea15ea7fb7576", "3a91cf746eeade9718cd7bd8ba1f4b3a5ea59223", "3abca96006f8a6c014635b6a111368f459110e83", "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "e847c3ec130da57328db79a7fea794b07dbccdd9", "16287f07e76eaec9cfe06c76c859161b2607e7ef", "1b535af0d110491eabeedf8323a51327846e55b2", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "bef5bdc4d49c9da09d125f4d86b15509ebff52cd", "02f2f61ea5f6bbf797521f0f2f4c59b8db677645", "88aa5523bb6690faf449edb4e1c9c6a19abf7122", "0ef1dd03db41de69165075562a051021a186c230", "0a12a179bebdf4bb69d692a1127795b3f536270b", "0f751490a4c5c441124a8af8276a80450a3cf5b5", "07e1818b1e816d3ec56cc1016a227387ddc22204", "4a578eaa97df4a2eb99d79da215fbafe084e26eb", "2bc73c30bd107cc1a57c7e13982fc27a5c6aa579", "638c917d981915bc7a00bb0941cdd38111df51de", "689daac32ba52ad5d72178fd4d5e093fb9501132", "06213941a0cb8c4870d68e1ad6598ba43bba6bd7", "a42c8f5971dea1aec01391ee80fe66721b010fd6" ], "paperAbstract": "Key-based workload partitioning is a common strategy used in parallel stream processing engines, enabling effective key-value tuple distribution over worker threads in a logical operator. It is likely to generate poor balancing performance when workload variance occurs on the incoming data stream. This paper presents a new key-based workload partitioning framework, with practical algorithms to support dynamic workload assignment for stateful operators. The framework combines hash-based and explicit key-based routing strategies for workload distribution, which specifies the destination worker threads for a handful of keys and assigns the other keys with the hash function. When short-term distribution fluctuations occur to the incoming data stream, the system adaptively updates the routing table containing the chosen keys, in order to rebalance the workload with minimal migration overhead within the stateful operator. We formulate the rebalance operation as an optimization problem, with multiple objectives on minimizing state migration costs, controlling the size of the routing table and breaking workload imbalance among worker threads. Despite of the NP-hardness nature behind the optimization formulation, we carefully investigate and justify the heuristics behind key (re)routing and state migration, to facilitate fast response to workload variance with ignorable cost to the normal processing in the distributed system. Empirical studies on synthetic data and real-world stream applications validate the usefulness of our proposals.", "pdfUrls": [ "https://arxiv.org/pdf/1610.05121v1.pdf", "https://arxiv.org/pdf/1610.05121v2.pdf", "http://arxiv.org/abs/1610.05121", "http://doi.acm.org/10.1145/3078597.3078613" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a7c7c7d036b55bd9a3882def0d14e31d78931b15", "sources": [ "DBLP" ], "title": "Parallel Stream Processing Against Workload Skewness and Variance", "venue": "HPDC", "year": 2017 }, "a810c97eb8aee081747bda8af7749cc6ecc28769": { "authors": [ { "ids": [ "19231918" ], "name": "Ryan D. Friese" }, { "ids": [ "3247453" ], "name": "Nathan R. Tallent" }, { "ids": [ "1692279" ], "name": "Abhinav Vishnu" }, { "ids": [ "1715527" ], "name": "Darren J. Kerbyson" }, { "ids": [ "1753153" ], "name": "Adolfy Hoisie" } ], "doi": "10.1109/IPDPS.2017.61", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.61", "entities": [ "Cache (computing)", "Central processing unit", "Critical path method", "Dynamic program analysis", "Locality of reference", "Microarchitecture", "OSI model", "Performance prediction", "Scalability" ], "id": "a810c97eb8aee081747bda8af7749cc6ecc28769", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "317-326", "journalVolume": "", "outCitations": [ "147ed9f73bff0513be17a6026cad14dea68186f7", "48f59efbc20fef3e571b9fb81d039bfa9619a9bf", "6800600e6451d0bf0a4e866a483cac8c8617da88", "bf33756bc551581e0f7a1d43e148b305ff2296d7", "1824405dd805a02dccc85ce4341abfe0799282a8", "15708276fbb98a7d3f8835a2c51cb522eeab9967", "69cd319c93692acc0822eeec743706515c693471", "85a96bb5d97a1f8aafb65d88848bc7ebb0a0e3be", "705c20122d0f139e747c14a9879f9bb5ae65387a", "7a4a6c94aa2edb834ef19bd4568a4e84673fd8d6", "e240f361ee577a1662b719a23c6117ecad3d307c", "0aa48caa248c27ed8905a0123cd1c29ff0dc4968", "0f9080d297fc22dcf24dfd8ffcd3de5cea04c689", "e1fe8bda7373de0a8bc4382de18ec086c10de3b3", "aae636bd99bc4bae4cd4afcfa4621ef573a55c26", "a0ba323d58a1879fb877cc92293ed0f631317af4", "3d19057951ec74bd5b7ad8e4fe09eaea1dfca3b1", "178599e5e976e82528e71cb2e1b812d588fa0e44", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "05941c054493f241523dc6545c825ee38df3959a", "021d7ca29cea1a55e5095e17ddb658e54e054793", "28d43fffcbaf2eb7e2d9b931a9cb2082399e8409", "42be8c9380613754c82782ae86291d3c379f2ead" ], "paperAbstract": "Many applications have irregular behavior — e.g., input-dependent solvers, irregular memory accesses, or unbiased branches — that cannot be captured using today's automated performance modeling techniques. We describe new hierarchical critical path analyses for the Palm model generation tool. To obtain a good tradeoff between model accuracy, generality, and generation cost, we combine static and dynamic analysis. To create a model's outer structure, we capture tasks along representative MPI critical paths. We create a histogram of critical tasks with parameterized task arguments and instance counts. To model each task, we identify hot instruction-level paths and model each path based on data flow, data locality, and microarchitectural constraints. We describe application models that generate accurate predictions for strong scaling when varying CPU speed, cache and memory speed, microarchitecture, and (with supervision) input data class. Our models' errors are usually below 8%; and always below 13%.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.61" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a810c97eb8aee081747bda8af7749cc6ecc28769", "sources": [ "DBLP" ], "title": "Generating Performance Models for Irregular Applications", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "a826646a8e4e8a746111d3a6915c8f0fcfcc3a00": { "authors": [ { "ids": [ "34223410" ], "name": "Hossein Sayadi" }, { "ids": [ "1747542" ], "name": "Houman Homayoun" } ], "doi": "10.1109/IGCC.2017.8323570", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323570", "entities": [ "ARM architecture", "Architecture as Topic", "Central processing unit", "Congenital contractural arachnodactyly", "FBN2 wt Allele", "Interdependence", "Multithreading (computer architecture)", "Predictive modelling", "Scheduling (computing)", "Thread (computing)", "voltage" ], "id": "a826646a8e4e8a746111d3a6915c8f0fcfcc3a00", "inCitations": [ "11ca91ee2c9974fbc8577207893381a46d79fd3d" ], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "91b1c1107fa62f319137184232493e625570fa87", "cbaa525322aa172c94b78a8c8affcd38c6cbfbdf", "1bed9dbc346fcc7c39ac42c8a3be089a76f4d11a", "352a8957005dc5519b15ed1870751ec494d66395", "5ce74f1e96630ce71c23e065e987529033912263", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "30455ed559efdac68f2675e278548d27453b8c85", "c762fe9d42cc53e7f5e62ec94e34bc1164739c84", "4aeb08b5f9b03cab4cf6b2ac1dc9dda9a236ab52", "14a2a931ad8f2b6baaa9797b0e9bf74e7d0426be", "167c651a235cf567ee8ca19b8d0e4d2f19e01b42", "e23298e18aa92ac43fa941d0f5eacb339905b685", "03d6143cffcc8cf96abf78d23e74cb1083f54d1b", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "1491e934ee736e8219c2922ab1629322dac753a0", "1fcec27437d40285684aad5c68d2db076b27a195", "71aa0f7b09d9c5890f73ecb60d1b59c0694de808", "7ce25a0852e2345be1a1bd02b8eb4cefb9d47073", "17dea513763c57dcd0e62085045fb5be6770c600", "27c66ba59c76e737f863ba05b7099ad5788af836", "109df0e8e5969ddf01e073143e83599228a1163f" ], "paperAbstract": "Composite Cores Architecture (CCA), a class of dynamic heterogeneous architectures, enables the system to construct the right core at run-time for each application by composing cores together to build larger core or decomposing a large core into multiple smaller cores. While this architecture provides more flexibility for the running application to find the best run-time settings to maximize energy-efficiency, due to interdependence of various tuning parameters such as the type of the core, run-time voltage and frequency and the number of threads, it makes it more challenging for scheduling. Past research mainly addressed the scheduling problem in composite cores architecture by looking at one or two of these tuning parameters. However, as we will show in this paper, it is important to concurrently optimize and fine-tune these parameters to harness the power of heterogeneity in this emerging class of architecture. In addition, most previous work on CCA mainly studied traditional single threaded CPU applications. In this work, we investigate the scheduling challenges for multithreaded applications in CCA. First, through methodical investigation of power and performance results, we characterize various multithreaded applications on a CCA which can be composed into few big or many little cores and demonstrate how the interplay among various application, system, and architecture level parameters affect the performance and energy-efficiency. Furthermore, based on characterization results, a highly accurate regression-based model for energy-efficiency prediction is developed to guide the scheduling decision. Using the predictive model, we developed a scheduling scheme for effective mapping of multithreaded applications onto CCA. The results show that the proposed scheduling scheme on average achieves close to 94% efficiency as compared to the Oracle scheduling.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323570", "http://ece.gmu.edu/~hhomayou/files/IGSC17-Hossein.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a826646a8e4e8a746111d3a6915c8f0fcfcc3a00", "sources": [ "DBLP" ], "title": "Scheduling multithreaded applications onto heterogeneous composite cores architecture", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "a8992d84bf98a4dfb7a8ecbecf75eb07b7846a02": { "authors": [ { "ids": [ "6415958" ], "name": "Dipanjan Sengupta" }, { "ids": [ "1798309" ], "name": "Shuaiwen Song" } ], "doi": "10.1007/978-3-319-58667-0_6", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_6", "entities": [ "Algorithm", "Graph (abstract data type)", "Graphics processing unit", "Parallel computing", "Program optimization", "Programming model", "Real-time computing", "Scalability", "Social network", "Speedup", "Usability", "Velocity", "World Wide Web" ], "id": "a8992d84bf98a4dfb7a8ecbecf75eb07b7846a02", "inCitations": [], "journalName": "", "journalPages": "97-119", "journalVolume": "", "outCitations": [ "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "4e88d42dc5f5efe565d87af3b999c43165e42dce", "5dbbc4fe19a41441bdcce3395167e97df2d0d812", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "22a26f40877cbd7ce0fb6c8c94e061332469d071", "5bfb81407ab5102ba0369e86ca674eac081a4d0a", "835914163add576b4c6c92ae0fcd34f661544ce2", "2ae3f1fccf9a184b60585870ac839430f44cd201", "75e217284d18901ce8b1fc4a389d3c1152b544fb", "947c6bf534ccd620044f77c3bd6068f633b421fb", "deaf88487bc3c0f24a809c40f88f393543579015", "12809396d9e314df0c8f8e7ec9691bb69571b80d", "1041d3f00afb5f5a53196813ceb2ebfab6d0a6ee", "00dbf46a7a4ba6222ac5d44c1a8c09f261e5693c", "9a8f16a532173bac3aece77d9451bdb3d7f2e57c", "105de19ab71db0a38bc0d734c8fd0efeba2faab7", "6f7cd29a3dfdcb2f6880a022e13054542020c5ce", "3ebf3857a60c3e224284bbbe6c7127d0a12c546d", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "0ad8e89091eed09217e66adc98136126addc2619", "2a17c90ed723d6a14415cc1f677a5c0aa512f501", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "1156f60e40548096df49528b1342bb3e88b0f378", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "061c4eab625847e26a8fffeecde830bc0ddd22e1", "41bdd5eee51158a292c91ca02930ed55ea3aa907", "b513711621e81d0abd042e0877ca751581a993f5" ], "paperAbstract": "With the prevalence of the World Wide Web and social networks, there has been a growing interest in high performance analytics for constantly-evolving dynamic graphs. Modern GPUs provide massive amount of parallelism for efficient graph processing, but the challenges remain due to their lack of support for the near real-time streaming nature of dynamic graphs. Specifically, due to the current high volume and velocity of graph data combined with the complexity of user queries, traditional processing methods by first storing the updates and then repeatedly running static graph analytics on a sequence of versions or snapshots are deemed undesirable and computational infeasible on GPU. We present EvoGraph, a highly efficient and scalable GPUbased dynamic graph analytics framework that incrementally processes graphs on-the-fly using fixed-sized batches of updates. The runtime realizes this vision with a user friendly programming model, along with a vertex property-based optimization to choose between static and incremental execution; and efficient utilization of all hardware resources using GPU streams, including its computational and data movement engines. Extensive experimental evaluations for a wide variety of graph inputs and algorithms demonstrate that EvoGraph achieves up to 429 million updates/sec and over 232x speedup compared to the competing frameworks such as STINGER.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_6", "https://people.csail.mit.edu/jshun/6886-s18/papers/EvoGraph.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a899/2d84bf98a4dfb7a8ecbecf75eb07b7846a02.pdf", "s2Url": "https://semanticscholar.org/paper/a8992d84bf98a4dfb7a8ecbecf75eb07b7846a02", "sources": [ "DBLP" ], "title": "EvoGraph: On-the-Fly Efficient Mining of Evolving Graphs on GPU", "venue": "ISC", "year": 2017 }, "a8c50ddef01202c897b090868fcd44aa7bd8fdf6": { "authors": [ { "ids": [ "2682150" ], "name": "Timothy G. Mattson" }, { "ids": [ "1867298" ], "name": "Vijay Gadepally" }, { "ids": [ "7946702" ], "name": "Zuohao She" }, { "ids": [ "7485473" ], "name": "Adam Dziedzic" }, { "ids": [ "33282330" ], "name": "Jeff Parkhurst" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Database engine", "Metagenomics", "Reference implementation" ], "id": "a8c50ddef01202c897b090868fcd44aa7bd8fdf6", "inCitations": [ "528bdbe171ca7ed4d0ec722a3fb773610e250788", "14609528f7adb5a43bf75338308b823cc2a68335", "343d9c74d1465f884039b2145ed2fcc6bfcc26ca", "3845947052d99e45f0f3e7789b66c6582d098e48", "b3f6118a882901c692c2c14e7a468a4cf2ba598c" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "1c8f17fcacf017e6b087c40f311cba6e6f63f542", "75cbe27efe1c8b255102f641feba1871176c6c20", "daf208ad61c0ba239439ab46f1d1d4bbac5b69f0", "265efd16c16dc942705246a57337d323722003cb", "47a2f8acc3cb3c14dcade010cfe0824d0c1eeec1", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "a1b067fc941d6727169ec18a882080fa1f074595", "18c021c9cce95ed5615a060f590b8388b604e7c5", "73f31354cc9058ddc2e47a1c585b753e1592c1bf", "34aed48c74e8b1f29ce85e03801f8a2a49806091", "93885ebdf313be3c4d5e099f0dbb1ee7eb1eede0", "581aef2a2a9301e8fe23cf3cc8bf687696779705", "a89b0ea03da49d8b340231385c6abf437399f410", "26a48f0cc3421cd56d13d4e3325230c40372e2a6", "2a664fcf10c1ff5890966c52621a7638d1ac9b04", "a5d560ef928d0b9f6df2710c4ce66240ed6802b7", "354a6faf144407ab11cce8274992ecfe94ccfc4c", "35b7492ff025d4b9412508504c97d8545c8d8a3f", "07641ebcb7726102c37f00525a0a7a3c859bf036", "60eacdc4ec8c1ebf6c8f343214f36d2ced7b21c3" ], "paperAbstract": "In most Big Data applications, the data is heterogeneous. As we have been arguing in a series of papers, storage engines should be well suited to the data they hold. Therefore, a system supporting Big Data applications should be able to expose multiple storage engines through a single interface. We call such systems, polystore systems. Our reference implementation of the polystore concept is called BigDAWG (short for the Big Data Analytics Working Group). In this demonstration, we will show the BigDAWG system and a number of polystore applications built to help ocean metagenomics researchers handle their heterogenous Big Data.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p120-mattson-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a8c5/0ddef01202c897b090868fcd44aa7bd8fdf6.pdf", "s2Url": "https://semanticscholar.org/paper/a8c50ddef01202c897b090868fcd44aa7bd8fdf6", "sources": [ "DBLP" ], "title": "Demonstrating the BigDAWG Polystore System for Ocean Metagenomics Analysis", "venue": "CIDR", "year": 2017 }, "a8f43c1075174205515b3070b8251d08b54af986": { "authors": [ { "ids": [ "35030227" ], "name": "Giancarlo Pellegrino" }, { "ids": [ "1935526" ], "name": "Martin Johns" }, { "ids": [ "35198615" ], "name": "Simon Koch" }, { "ids": [ "1749517" ], "name": "Michael Backes" }, { "ids": [ "1701081" ], "name": "Christian Rossow" } ], "doi": "10.1145/3133956.3133959", "doiUrl": "https://doi.org/10.1145/3133956.3133959", "entities": [ "Countermeasure (computer)", "Cross-site request forgery", "Cross-site scripting", "Experiment", "Open-source software", "Programming paradigm", "Security testing", "Stock and flow", "Structured Query Language Interface", "Tracing (software)", "User (computing)", "Web application" ], "id": "a8f43c1075174205515b3070b8251d08b54af986", "inCitations": [], "journalName": "", "journalPages": "1757-1771", "journalVolume": "", "outCitations": [ "e35ca2444455aa2d58e1e232cd83e77190c57528", "6600ece66ff490bea3c5feae99aeabef492474b0", "23cbb38a3da69c710ea630c417db7e8256ff183a", "6a76d403770404ed286521bd5f973ad8e7ea7a36", "59a7ca6350c561e6faaed0ae099093420d3f7964", "63e73545063bddda32ebb0ab46e54bb5ee379dac", "05c76c9792b80d0f4669e94c990c722d1327d63f", "9b8b6ad7c3bbbdec2cb41d95fc8262138607abe2", "2f7275b4f7f06fa56849138b310543a6b0fcdd07", "3ca09297ea549605c99a96daf8bc50b23cc54efc", "3a14ea1fc798843bec6722e6f7997d1ef9714922", "62f285b3230ceb17fb6f409964d3f0e69584903e", "154f2ef6fcc98ebc1af23a31201fcf80b9393b05", "49850ae42f00dcb535c68fc4c6e99fec78ab972b", "5535033d514b0da3eac6a5f27c507fdfb77c05bc", "bf640cdccd94870da585f5cc6f489d15e29030af", "3b532950ded354ff3d657f8061aec210e9059da7", "ed2229a068b5a6d520fb7ea6de378c9bd6d4667e", "199997a280b374f9500204b8eaca68a0ac653db3", "79bb2782756dbcea84ab5431c131edcf226ee1f7", "1c126c0ddc80c1fa177adb9ef32bdf84e0306846", "0f88cf1e22a83cbaf19a3f496c9c944cb16ad37f", "0049ba742328950007d0c009be1850a7b79cfd05", "30af8702c6c9f69a64d176d61784b4d313eb3e26", "2d9ec144833f04b83b858fd744407a9203284e3e", "b37705f58fc21f6d35fdc83c848810d6f5aee2b2", "5b9472af7e61fc25b426253259b5fde1c2344cba", "64593a93cc57effc72fe6ae909db384be96b75d4", "66694ae34a01c91cc8a33ca5a4340d3de167c37e", "b645f19ed52b4315a82bf3564b8db5ce230cd49e", "108ee099052937197909ed61541702e7f8d0c216" ], "paperAbstract": "Cross-Site Request Forgery (CSRF) vulnerabilities are a severe class of web vulnerabilities that have received only marginal attention from the research and security testing communities. While much effort has been spent on countermeasures and detection of XSS and SQLi, to date, the detection of CSRF vulnerabilities is still performed predominantly manually.\n In this paper, we present Deemon, to the best of our knowledge the first automated security testing framework to discover CSRF vulnerabilities. Our approach is based on a new modeling paradigm which captures multiple aspects of web applications, including execution traces, data flows, and architecture tiers in a unified, comprehensive property graph. We present the paradigm and show how a concrete model can be built automatically using dynamic traces.Then, using graph traversals, we mine for potentially vulnerable operations. Using the information captured in the model, our approach then automatically creates and conducts security tests, to practically validate the found CSRF issues. We evaluate the effectiveness of Deemon with 10 popular open source web applications. Our experiments uncovered 14 previously unknown CSRF vulnerabilities that can be exploited, for instance, to take over user accounts or entire websites.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133959", "http://arxiv.org/abs/1708.08786", "https://arxiv.org/pdf/1708.08786v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a8f43c1075174205515b3070b8251d08b54af986", "sources": [ "DBLP" ], "title": "Deemon: Detecting CSRF with Dynamic Analysis and Property Graphs", "venue": "CCS", "year": 2017 }, "a90b8faaf327dc06dd1df2bf5cc64b4af234b6bc": { "authors": [ { "ids": [ "1718101" ], "name": "Sergio Rajsbaum" }, { "ids": [ "38307043" ], "name": "Armando Casta\u00f1eda" }, { "ids": [ "2913070" ], "name": "David Flores-Pe\u00f1aloza" }, { "ids": [ "13121022" ], "name": "Manuel Alcantara" } ], "doi": "10.1109/IPDPS.2017.70", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.70", "entities": [ "Autonomous car", "Autonomous robot", "Computation", "Crash (computing)", "Directed acyclic graph", "Fault tolerance", "Mobile robot", "Non-blocking algorithm", "Robot", "Shared memory", "Snapshot (computer storage)", "Vertex (geometry)", "Vertex (graph theory)" ], "id": "a90b8faaf327dc06dd1df2bf5cc64b4af234b6bc", "inCitations": [ "357a8059b8fdabc4c281a7cb2e03dce22c4ef2a4", "765731abac777fdd772837c58b43a775c99784d0" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "493-502", "journalVolume": "", "outCitations": [ "60f8c9937c769f51486baa6b2e3c2faa50d96a43", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "e9b1ae133ae34f8348c19c1c1066439eddbf9ab0", "b7faa8135d1159d0356c0710ea267adf43852dab", "50b862c0552c18a72632580807572ea4999624ce", "35813f84719c20e315268e12a7edfab38ce90418", "bb0d3409e2cc104a4ffe062e1a860c537a9f61fb", "410a17b2f1120698005cf16829089cf83fdc688e", "4dee28e31d82612438c19639cc3d78957ccc858b", "6afc4972cc6e40716480c4bc827225b9da7d3046", "908b2bf7b27827343ba1d508489238ea2963e4e7", "3c9f845e65a1d996a9cc0df88cd39099437f875b", "00e35ff42644e5d068634165d77d2e44e78f8679", "7ab9de59820edc16d34429ac2ec77c8ff60b1486", "40af88c7982cd15e18f21a66897eb938fd98866b", "c7f0190371409c846b430d9ed528a95788cbbf56", "214baa0f37921ce21a9705e81514cde8e28e1ce9", "daefc7b798a846028f6b7a702f56b20c59f144e2", "9ef8edd4009f9bc5766391ea737d0ebac1faea6e", "296bf65141caa95e7701f5efc2c030a345b20a53", "a71d4757e2194ca691b0464dc5d17f9c1d4ef6d7", "00e3756119a91432622f6982b59ecd24a1340fbe", "01a100dadf3d776e7f2fee97b42966c47aa65fb8", "22bb39eebb11649ab5a55b789d5b677ac97eab49", "a937641fa59c41ecc1b6ce25ff0a5695285a28f3", "c0705da825e9baa2da4232c6a3f283b1990e4e64", "000ff9478eb220d1c520b6a2be733178357da64f" ], "paperAbstract": "The LOOK-COMPUTE-MOVE model for a set of autonomous robots has been thoroughly studied for over two decades. Each robot repeatedly LOOKS at its surroundings and obtains a snapshot containing the positions of all robots; based on this information, the robot COMPUTES a destination and then MOVES to it. Previous work assumed all robots are present at the beginning of the computation. What would be the effect of robots appearing asynchronously? This paper studies thisquestion, for problems of bringing the robots close together, andexposes an intimate connection with combinatorial topology. A central problem in the mobile robots area is the gathering problem. In its discrete version, the robots start at vertices in some graph G known to them, move towards the same vertex and stop. The paper shows that if robots are asynchronous and may crash, then gathering is impossible for any graph G with at least two vertices, even if robots can have unique IDs, remember the past, know the same names for the vertices of G and use an arbitrary number of lights to communicate witheach other. Next, the paper studies two weaker variants of gathering: edge gathering and 1-gathering. For both problems we present possibility and impossibility results. The solvability of edge gathering is fully characterized: it is solvable for three or more robots on a given graph if and only if the graph is acyclic. Finally, general robot tasks in a graph are considered. A combinatorial topology characterization for the solvable tasks is presented, by a reduction of the asynchronous fault-tolerant LOOK-COMPUTE-MOVE model to a wait-free read/write shared-memory computing model, bringing together two areas that have been independently studied for a long time into a common theoretical foundation.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.70" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a90b8faaf327dc06dd1df2bf5cc64b4af234b6bc", "sources": [ "DBLP" ], "title": "Fault-Tolerant Robot Gathering Problems on Graphs With Arbitrary Appearing Times", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "a9224aaff526781a0461eb8b7fd8594ebee02132": { "authors": [ { "ids": [ "9555567" ], "name": "William McDoniel" }, { "ids": [ "3161552" ], "name": "Markus H\u00f6hnerbach" }, { "ids": [ "2522777" ], "name": "Rodrigo Canales" }, { "ids": [ "32193821" ], "name": "Ahmed E. Ismail" }, { "ids": [ "1766804" ], "name": "Paolo Bientinesi" } ], "doi": "10.1007/978-3-319-58667-0_4", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_4", "entities": [ "Automatic vectorization", "Large-scale Atomic/Molecular Massively Parallel Simulator", "Molecular dynamics", "Program optimization", "Simulation", "Supercomputer", "Xeon Phi" ], "id": "a9224aaff526781a0461eb8b7fd8594ebee02132", "inCitations": [], "journalName": "", "journalPages": "61-78", "journalVolume": "", "outCitations": [ "013506e35e0d12a104b88008855d1f5276226b20", "57d131b3917555f252283906bacc46318699e6a0", "925f5c2797e05eb0e5f37649190194109b5971e6", "3f1e4a6c488ca3dde71f32f150b2bb131a7ae1ee", "6b5319282d300cd2534e01b53054b90151ab7d03", "a4dac5f7a35fb4f480ea94880097fa4cf25ca963", "9c92838a8da9dd2bb8729f274a312fdc52d9f099", "b452135a11ff0afd2a11332cde390bf13215aad5", "b58b1327d4c427de652346b8c00600315b984463", "a4a49e65f59fcbb400ddb6778428ca9a81f34c79", "3fbca995c07fa6b3742cdb129e68273c9ab3ed67", "9c09dfaa4b72e59e4e2fa51181df4009e5f7344f", "415c7835aa18984d92086edbb9d9937fcdd0a6eb", "70302e07a5d51a7e330c11e61424f6a3beffa0ab", "293ca9dec5a2eefc9ad0c4c157bcbdf04b836c3a" ], "paperAbstract": "Molecular Dynamics is an important tool for computational biologists, chemists, and materials scientists, consuming a sizable amount of supercomputing resources. Many of the investigated systems contain charged particles, which can only be simulated accurately using a longrange solver, such as PPPM. We extend the popular LAMMPS molecular dynamics code with an implementation of PPPM particularly suitable for the second generation Intel Xeon Phi. Our main target is the optimization of computational kernels by means of vectorization, and we observe speedups in these kernels of up to 12x. These improvements carry over to LAMMPS users, with overall speedups ranging between 2-3x, without requiring users to retune input parameters. Furthermore, our optimizations make it easier for users to determine optimal input parameters for attaining top performance.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_4", "http://arxiv.org/abs/1702.04250", "https://arxiv.org/pdf/1702.04250v1.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a922/4aaff526781a0461eb8b7fd8594ebee02132.pdf", "s2Url": "https://semanticscholar.org/paper/a9224aaff526781a0461eb8b7fd8594ebee02132", "sources": [ "DBLP" ], "title": "LAMMPS' PPPM Long-Range Solver for the Second Generation Xeon Phi", "venue": "ISC", "year": 2017 }, "a94bee6b9f3c9dc19465ac4c6c503c0c17ce846b": { "authors": [ { "ids": [ "1765238" ], "name": "Adam Manzanares" }, { "ids": [ "1745378" ], "name": "Filip Blagojevic" }, { "ids": [ "40304283" ], "name": "Cyril Guyot" } ], "doi": "", "doiUrl": "", "entities": [ "Data center", "Enterprise resource planning", "Linux" ], "id": "a94bee6b9f3c9dc19465ac4c6c503c0c17ce846b", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "f1373c425406de273d4f8288abddde7fb52bfaff", "8b1769e57834d559246e2d619293072a7e63f59c", "13b925352e4ee3066a6d38ef9f16efdfa967cabb", "6125daf94c0df8fbfa04675a61e499cfa870b0bc", "74d23cb8751120849bc908477b28c886c6a76252", "ebd241b481a90b52cef6463211b76ba1dc46c44a", "65a2cb8a02795015b398856327bdccc36214cdc6", "16e367708e50a9ed6228334c9d49f4db0dab4cd8", "57994f93ef7d965e827a9d1210104473da758da7", "0b6adc0dbc55076dc9c9a8931f4a4df58fd291b6", "23015f1b4df6d84f73db0f31fa42992c18a5fff8", "022c9386551ac2f24e0718eaf60937aa1a7e1b5a", "0286bfcbcd7bf03774b6a6cf59bcf487760f18d7", "0eb7670a6fb74672ec78426bb4712f1afe05a418" ], "paperAbstract": "In large scale data centers, controlling tail latencies of IO requests keeps storage performance bounded and predictable, which is critical for infrastructure resource planning. This work provides a transparent mechanism for applications to pass prioritized IO commands to storage devices. As a consequence, we observe much shorter tail latencies for prioritized IO while impacting nonprioritized IO in a reasonable manner. We also provide a detailed description of the changes we made to the Linux Kernel that enable applications to pass IO priorities to a storage device. Our results show that passing priorities to the storage device is capable of decreasing tail latencies by a factor of 10x while decreasing IOPS minimally.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_manzanares.pdf", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-manzanares.pdf", "https://www.usenix.org/conference/hotstorage17/program/presentation/manzanares" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/a94b/ee6b9f3c9dc19465ac4c6c503c0c17ce846b.pdf", "s2Url": "https://semanticscholar.org/paper/a94bee6b9f3c9dc19465ac4c6c503c0c17ce846b", "sources": [ "DBLP" ], "title": "IOPriority: To The Device and Beyond", "venue": "HotStorage", "year": 2017 }, "a96ccc23fc1d7677a251ab24b22d01ed7635eabc": { "authors": [ { "ids": [ "26319452" ], "name": "Yuping Fan" }, { "ids": [ "39548154" ], "name": "Paul Rich" }, { "ids": [ "2854349" ], "name": "William E. Allcock" }, { "ids": [ "1857295" ], "name": "Michael E. Papka" }, { "ids": [ "1773347" ], "name": "Zhiling Lan" } ], "doi": "10.1109/CLUSTER.2017.11", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.11", "entities": [ "Censoring (statistics)", "Experiment", "Job scheduler", "Machine learning", "Random forest", "Runtime system", "Scheduling (computing)", "Tobit model" ], "id": "a96ccc23fc1d7677a251ab24b22d01ed7635eabc", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "530-540", "journalVolume": "", "outCitations": [ "125e09b09635b7d8a67b6caadfa4c88e98193406", "4b4bb2adf568615c3b337e554c437da080cc57e7", "68e012b1ea42244e531b346a2828256488808856", "1b65af0b2847cf6edb1461eda659f08be27bc76d", "8a41bf2ef9a77fa5d47c9b482eb38f9f6d636300", "04cfedb0c6135dc7d43c8884592d55a3e95d094f", "7b0db6135b8dd3e2a9efa86163e91c0cd0fdf660", "ec4e0235cbd1e925032de12f0c9a031ba0d7876f", "f38a23575e04ed211f42dcacf0c6f87efe004f43", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "24e6cf0796237f21c780a3f0c996817f57b3a1bd", "46217f372a75dddc2254fdbc6b9418ba3554e453", "9b122ef696acf3c7eefd20807c079bc8b650edd9", "9a46e4da40afc0278ae078ff77f543493646c6a2", "b94d6bb4506dbb02244467f989b8aa1f06389988", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "152ad4d33270f2a9273e0686d479339a4f58bf92", "6903bf1673befb449a100747e380272af86e17ce", "5d462b30c3847c6ba8fd4b02b45ba36a8e001701", "2690a9c9398de966e28e4ee0817a5672454846a9", "66e4e386ddd2e7b1c7a218479eb0de131a6090a2", "2b0b6f3530a6d91c2bf7d5a7af53db7a56457786", "7d2dea02f6cf1c1d0d7db44eef3c1b8b37411e5a", "c5560fcd390764e3bca699284162d983ac4a662f", "03670ae248e456b67be7e435e86ddb8a9f87c242" ], "paperAbstract": "Job runtime estimates provided by users are widely acknowledged to be overestimated and runtime overestimation can greatly degrade job scheduling performance. Previous studies focus on improving accuracy of job runtime estimates by reducing runtime overestimation, but fail to address the underestimation problem (i.e., the underestimation of job runtimes). Using an underestimated runtime is catastrophic to a job as the job will be killed by the scheduler before completion. We argue that both the improvement of runtime accuracy and the reduction of underestimation rate are equally important. To address this problem, we propose an online runtime adjustment framework called TRIP. TRIP explores the data censoring capability of the Tobit model to improve prediction accuracy while keeping a low underestimation rate of job runtimes. TRIP can be used as a plugin to job scheduler for improving job runtime estimates and hence boosting job scheduling performance. Preliminary results demonstrate that TRIP is capable of achieving high accuracy of 80% and low underestimation rate of 5%. This is significant as compared to other well-known machine learning methods such as SVM, Random Forest, and Last-2 which result in a high underestimation rate (20%-50%). Our experiments further quantify the amount of scheduling performance gain achieved by the use of TRIP.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a96ccc23fc1d7677a251ab24b22d01ed7635eabc", "sources": [ "DBLP" ], "title": "Trade-Off Between Prediction Accuracy and Underestimation Rate in Job Runtime Estimates", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "a99320ad6d1334197adc90ae70bf27ee6f7785fa": { "authors": [ { "ids": [ "2015053" ], "name": "Alexandru Agache" }, { "ids": [ "2122068" ], "name": "Mihai Ionescu" }, { "ids": [ "1758591" ], "name": "Costin Raiciu" } ], "doi": "10.1145/3064176.3064185", "doiUrl": "https://doi.org/10.1145/3064176.3064185", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Apache Hadoop", "Application programming interface", "Best, worst and average case", "Cloud computing", "Distributed computing", "Experiment", "Network topology", "OpenVMS" ], "id": "a99320ad6d1334197adc90ae70bf27ee6f7785fa", "inCitations": [], "journalName": "", "journalPages": "605-619", "journalVolume": "", "outCitations": [ "16dc5417e1a558895a9b9561d31480bdc4abe295", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "2997435fe9f0e646e6a37d9783b520b9cdbdd38b", "058f6752d85a517aae298586fdf117acdd7560ea", "3b988049dd8f62f772281e90196bbd793700c86b", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "58a17426e5b999634c2c8df8767095ad1ded7a7d", "120ade88aecba9157eb1ab7bc0464a0215c46ccc", "1376bd56c64639af4645625fd9755c83b2bf7cda", "0bd7fbf28e63db7139bd8995948ea7004fe56dbe", "42f6218131551632370e5e8f88370d04b220002a", "bf36aca757b661addddda94b2a6e85b122c3d426", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "2d9e62ca99119615436b7d4c6aef76eabac1a4e8", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "1cafaac11664e48bd121695ac1be06b0930d00a5", "0f6f717d198ab1b99a63814facaf2fceace6b0fe", "0935bb723e4071ccd4c2334d3b6d728faa111d11", "19d8f8af2d774bda8b95bbeacf62c85811c37b3c", "231ba17921ebd80e95771e28dfb5082e169d5a53", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "323ce440a3c8fc1e5f3377ea07b3675c18b51d22", "764d7de61421968d6b477f0c055d72dcb0893544" ], "paperAbstract": "Clouds offer an opaque I/O API to their customers: details of the underlying resources (network topology, disk drives) or their current load are kept hidden. Tenants can profile the I/O performance in their VMs and optimise accordingly, but the side effect is increased load. Certain cloud providers try to discourage profiling by enforcing strict I/O isolation, at the cost of reduced utilisation in the average case. In this paper we challenge this status quo and propose CloudTalk, an API that allows tenants to communicate with the cloud provider and receive hints used to optimise their workloads.\n We have built a distributed implementation of CloudTalk that scales to hundreds of machines and provides significant performance benefits in many cases. Further, we have implemented changes to Hadoop and HDFS that use CloudTalk to decide which machines to use for task placement and replica selection. Our experiments in a local cluster and on Amazon EC2 show that CloudTalk helps improve performance by as much as two times for a wide range of scenarios.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064185" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a99320ad6d1334197adc90ae70bf27ee6f7785fa", "sources": [ "DBLP" ], "title": "CloudTalk: Enabling Distributed Application Optimisations in Public Clouds", "venue": "EuroSys", "year": 2017 }, "a9c3fd9dd71040038f18c0e014a7787c0dc23088": { "authors": [ { "ids": [ "3053170" ], "name": "Ronan-Alexandre Cherrueau" }, { "ids": [ "3023568" ], "name": "Dimitri Pertin" }, { "ids": [ "20477658" ], "name": "Anthony Simonet" }, { "ids": [ "7682100" ], "name": "Adrien L\u00e8bre" }, { "ids": [ "2122228" ], "name": "Matthieu Simonin" } ], "doi": "", "doiUrl": "", "entities": [ "Holism in science", "Linux", "Linux", "Relevance", "Run time (program lifecycle phase)", "Software project management", "Testbed" ], "id": "a9c3fd9dd71040038f18c0e014a7787c0dc23088", "inCitations": [ "8ef5cbf8c0a30d1f26e8b478014f656b706dffd6" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "544-548", "journalVolume": "", "outCitations": [ "6049062a3a73d22c914e7fa8951b3b0e5f09b309" ], "paperAbstract": "By massively adopting OpenStack for operating small to large private and public clouds, the industry has made it one of the largest running software project, overgrowing the Linux kernel. However, with success comes increased complexity, facing technical and scientific challenges, developers are in great difficulty when testing the impact of individual changes on the performance of such a large codebase, which will likely slow down the evolution of OpenStack. Thus, we claim it is now time for the scientific community to join the effort and get involved in the development of OpenStack, like it has been once done for Linux. In this spirit, we developed Enos, an integrated framework that relies on container technologies for deploying and evaluating OpenStack on any testbed. Enos allows researchers to easily express different configurations, enabling fine-grained investigations of OpenStack services. Enos collects performance metrics at runtime and stores them for post-mortem analysis and sharing. The relevance of the Enos approach to reproducible research is illustrated by evaluating different OpenStack scenarios on the Grid'5000 testbed.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101185" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a9c3fd9dd71040038f18c0e014a7787c0dc23088", "sources": [ "DBLP" ], "title": "Toward a Holistic Framework for Conducting Scientific Evaluations of OpenStack", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "a9d08ed96a8bfd3ae7e683cba23af777213c37b1": { "authors": [ { "ids": [ "2900344" ], "name": "Gustavo A. Chaparro-Baquero" }, { "ids": [ "35678780" ], "name": "Shi Sha" }, { "ids": [ "2286739" ], "name": "Soamar Homsi" }, { "ids": [ "35420329" ], "name": "Wujie Wen" }, { "ids": [ "1714867" ], "name": "Gang Quan" } ], "doi": "10.1109/IGCC.2017.8323573", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323573", "entities": [ "Best-effort delivery", "CPU (central processing unit of computer system)", "Central processing unit", "Content management system", "Deterministic algorithm", "Dynamic random-access memory", "Memory bandwidth", "Multi-core processor", "Real-time computing", "Scheduling (computing)", "Simulation", "Thermal management (electronics)", "anatomical layer", "heat dissipation" ], "id": "a9d08ed96a8bfd3ae7e683cba23af777213c37b1", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [], "paperAbstract": "Designing 3D systems with on-chip DRAM is a promising solution to improve memory bandwidth and reduce memory access latency. However, 3D chips exacerbate the chip thermal problem due to their longer heat dissipation path, as well as the tight thermal coupling between logic and memory layers. In this paper, we are interested in studying thermal aware resource management strategies for both CPUs and memory systems when realizing hard real-time systems on 3D platforms under given peak temperature constraints. Given the dramatically increased power density not only from CPUs but also from memory systems as well, we believe that a joint CPU and memory system resource management is highly desired for 3D platforms to effectively deal with the heat dissipation confined in a small package. In addition, different from many existing thermal management strategies, which are reactive and best-effort in nature, we are more interested in ones that can ensure the strong guarantee for real-time applications. To this end, we introduce a novel approach that incorporates the periodic resource model to guarantee timing constraints for hard real-time systems under thermal constraints. In the meantime, by periodically (deterministically) throttling the accesses of CPUs and memory resources, our approach can effectively guarantee the thermal constraints imposed on both CPUs and memory systems. We use simulation results to demonstrate the effectiveness of our proposed approach in guaranteeing both the timing and temperature constraints for hard real-time tasks on 3D platforms.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323573" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/a9d08ed96a8bfd3ae7e683cba23af777213c37b1", "sources": [ "DBLP" ], "title": "Thermal-aware joint CPU and memory scheduling for hard real-time tasks on multicore 3D platforms", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "aa03a69f3f7c3d0dd423288fdf86427a0270d53a": { "authors": [ { "ids": [ "37625386" ], "name": "He Zhao" }, { "ids": [ "1723019" ], "name": "Lan Du" }, { "ids": [ "1854608" ], "name": "Wray L. Buntine" }, { "ids": [ "1697913" ], "name": "Gang Liu" } ], "doi": "10.1109/ICDM.2017.73", "doiUrl": "https://doi.org/10.1109/ICDM.2017.73", "entities": [ "Algorithm", "Experiment", "Gibbs sampling", "Information theory", "Perplexity", "Sparse matrix", "Test set", "Topic model" ], "id": "aa03a69f3f7c3d0dd423288fdf86427a0270d53a", "inCitations": [ "9b9e7c9784c420d7c6803350785833c1ead19976" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "635-644", "journalVolume": "", "outCitations": [ "813651fe9edfafc124bb3f8e7b26384c21395d07", "268b37db3acb544de18348575b051a18e32033ab", "49b09f12b073497b7b839979ad7eddd16069c48a", "c964ced85cbbd7dee82107d649afa8b1ed4f2cd0", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "b3625e35fe7de551d628dd4b3875d2ba847f1097", "29f391a4da5c199d7949bfca9fb7fd388c57948d", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "01a7995cf0b1c89ec6322cc2b734f70e6b18e222", "d504a5bf60d33c02a63b94f511fef83ed7ce9a9e", "25c731735a77a0a26589f5ae35d2c09a56edb0f6", "10aa22af53cf53fa9c28ca9ed77dd63d69ac301a", "2d2ff73b66d36093abcaa249ab278e819f5b6f01", "11c379218bd8b4f6aef76ff5b25d6b973d865b30", "bcd1c98f58aa580a628cf4c690efca947f89dbae", "1f656b9c686c1e5db2a4d41f1ce7e270965def3e", "6cd9aa2648d06bd1bddc2cf1a7f7939cc519716c", "e25c478391e2f90de683ff64c83e801d1b728cfa", "124eabfdffb3e29bb0d34878f47bdb0d3a382d7d", "1e63600b5906a6e18c2b9540155b9adb85c4d437", "e7eee4d652786a202ef9cda9a96359e7996f5eca", "9208ecbd7244040ba6ee59a067b527c8b095fe0a", "244a152b82401d1619ef4ac88c51672b1662e3a2", "c75cea408d0fd38f0a48242024d6a72f63fcdf46", "280608cef4e07b5c4de82d75c9cc37c6b9478eb0", "39067f1866edf7fab9ceb15fd5263bf5ef9a782c", "1122ab4fbf7f15cdf7440e01f60eb1d44192dd6c", "4987baca7365133a03b8a0849bae337d8bf0c3c4", "2cf98ee58e57320a25b72a8da31aa3277cdf236e", "62f991612959e6fff29128b1d1a54596f6504029", "058ad0815ce350f0e7538e00868c762be78fe5ef" ], "paperAbstract": "Besides the text content, documents and their associated words usually come with rich sets of meta information, such as categories of documents and semantic/syntactic features of words, like those encoded in word embeddings. Incorporating such meta information directly into the generative process of topic models can improve modelling accuracy and topic quality, especially in the case where the word-occurrence information in the training data is insufficient. In this paper, we present a topic model, called MetaLDA, which is able to leverage either document or word meta information, or both of them jointly. With two data argumentation techniques, we can derive an efficient Gibbs sampling algorithm, which benefits from the fully local conjugacy of the model. Moreover, the algorithm is favoured by the sparsity of the meta information. Extensive experiments on several real world datasets demonstrate that our model achieves comparable or improved performance in terms of both perplexity and topic quality, particularly in handling sparse texts. In addition, compared with other models using meta information, our model runs significantly faster.", "pdfUrls": [ "https://arxiv.org/pdf/1709.06365v1.pdf", "http://arxiv.org/abs/1709.06365", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.73" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aa03a69f3f7c3d0dd423288fdf86427a0270d53a", "sources": [ "DBLP" ], "title": "MetaLDA: A Topic Model that Efficiently Incorporates Meta Information", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "aa4ec46d5aa39d3799ec7610a88dcef871d7a48e": { "authors": [ { "ids": [ "8304919" ], "name": "Kwonsoo Chae" }, { "ids": [ "39476651" ], "name": "Hakjoo Oh" }, { "ids": [ "2647251" ], "name": "Kihong Heo" }, { "ids": [ "34704380" ], "name": "Hongseok Yang" } ], "doi": "10.1145/3133925", "doiUrl": "https://doi.org/10.1145/3133925", "entities": [ "Heuristic", "Program analysis", "Source lines of code", "Static program analysis" ], "id": "aa4ec46d5aa39d3799ec7610a88dcef871d7a48e", "inCitations": [], "journalName": "PACMPL", "journalPages": "101:1-101:25", "journalVolume": "1", "outCitations": [ "8469007f1d62cac7d7188dbe6b67b24d40d46ca4", "220d40ff0f681c2dc87ca468099629127db8b938", "15993a512329867192b5d46c3c067c2bb1562b24", "0ca0fe955dc8b7bdea61f03a767f8b8a57ac51ee", "0f3b8eba6e536215b6f6c727a009c8b44cda0a91", "2e10643c3759f97b673ff8c297778c0b6c20032b", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "2cb861f65b53594baa78ae0556a843615f768be0", "76a75933e5b13a94ddbe22d60d08b7d8940f246f", "8bfd64fe8f9192a8b3c801c7d91fd46cabfc5319", "5459cd31ff0f182da81d5c58026546b995118676", "04d29724eede0f4e06a18b2909daa8f41488bc67", "5abd03095061c25ab7c4fab6b33a6ceb999c78e3", "10e6739668f5c81d0607d2068eaab77ef93991ed", "052662aaa5e351e194bcd9bf34810f9bb3f45e93", "013cd20c0eaffb9cab80875a43086e0c3224fe20", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "6e15aa5b91a16c025969476a7a873347cd01ed57", "7b9b7d97b361a29d59a430e2d4871f39222e2ab3", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "532be02930ca307815eb4fca7ecfb311553deaa6", "96c3e0d366ba91767e9742eadd84d3d0e884bfb9", "0694a216294941f9c486cb8e6f1ec6ea50b29562", "c0274ceaeec3ea83bcfb755893f55b69add39f94", "72338dfa9f2f54bffce94055876beab16439202f", "5d4927ab2a6a43f0789c8008d9cff191a745d800", "ff47af5531e55c7b88879ed10c8b0203e5948d99", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "3811b03c4ebbd93dfc602eef422fc8237fda8654", "310a7d8d88e7691222e6be1a55b1f46fb5772931", "1186ffa7792dc094737566c6448d9a0ca965fead", "61cd30bbfc5c630dfb8d07c2f1f3ba45daa494f1", "00524d47e1f2f62ac514c1f8eb47accd93b7d350", "b03bd79859fab03ccb0e8fcd79e8be98da16ff22", "1224d34a3d31b80ec4df903b6885a3672afa1ab1", "187768583aa8fd7dfe64cc88cb2aa831b6b531db", "2c0ddf9ba2413de5deb2071bc26e6d68fe28a513", "75b8c0abfd45fd77d7a61da7d12bdf516e3139c7", "c65ce591b42b816279e741bb9612b832d21288e0", "123c4108dcb409121f3b758b26d273699d053fea", "4f48d20824b18e1cf151eabe0128a79e4cf47bb8", "129707a3e577c8fe0f491a63ee628700874e3ed5", "d859b6698969b788bd25bc2abd96e6e57e341bea", "263b587800f6160379b33deefa0bc66c3f44d08f", "a6526df1d9b18fd3542fad7fdd95e93a5edce909", "07b338fada8dd2560cefd48bb57d076a169bc5ce", "3b1a125ab13ce7235bd3f1dbf08016e77a7e35d8", "153d144f411f7054b0c4bbd6b829a3d8c2b2df31", "286a16a6266f16839534e17d03bf7eb3340dadbd", "1deae7e8531cd6870e741ebe63e471742c2d4658", "3ab9ff2bd88cd97d729eec96541b99859a4f4d4e", "2bbda639ab417ca79d5948e393171e62ccdd7367", "1ae85cfe32316d52bd823c184fd725d2c87a0cfe", "4c12c8eccd17d63543f747c22ba0521b8c8a692e", "6b41ea91e149ecb228af142c5e8af93b6a2d6982", "50358e3f30ab8c99f9d383e5683b31c2311e5651" ], "paperAbstract": "We present a technique for automatically generating features for data-driven program analyses. Recently data-driven approaches for building a program analysis have been developed, which mine existing codebases and automatically learn heuristics for finding a cost-effective abstraction for a given analysis task. Such approaches reduce the burden of the analysis designers, but they do not remove it completely; they still leave the nontrivial task of designing so called features to the hands of the designers. Our technique aims at automating this feature design process. The idea is to use programs as features after reducing and abstracting them. Our technique goes through selected program-query pairs in codebases, and it reduces and abstracts the program in each pair to a few lines of code, while ensuring that the analysis behaves similarly for the original and the new programs with respect to the query. Each reduced program serves as a boolean feature for program-query pairs. This feature evaluates to true for a given program-query pair when (as a program) it is included in the program part of the pair. We have implemented our approach for three real-world static analyses. The experimental results show that these analyses with automatically-generated features are cost-effective and consistently perform well on a wide range of programs.", "pdfUrls": [ "http://www.seas.upenn.edu/~kheo/home/paper/oopsla17-chohheya.pdf", "http://doi.acm.org/10.1145/3133925" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aa4ec46d5aa39d3799ec7610a88dcef871d7a48e", "sources": [ "DBLP" ], "title": "Automatically generating features for learning program analysis heuristics for C-like languages", "venue": "PACMPL", "year": 2017 }, "aaa11576d4ebd0b17dd3750189cbe7785b2fa188": { "authors": [ { "ids": [ "2836493" ], "name": "Seikwon Kim" }, { "ids": [ "8569719" ], "name": "Seonyoung Lee" }, { "ids": [ "1837923" ], "name": "Taehoon Kim" }, { "ids": [ "36595712" ], "name": "Jaehyuk Huh" } ], "doi": "10.1109/PACT.2017.12", "doiUrl": "https://doi.org/10.1109/PACT.2017.12", "entities": [ "Address space", "Algorithm", "Baseline (configuration management)", "Big data", "Cache (computing)", "Cube", "Data compression", "Dynamic Markov compression", "Hybrid Memory Cube", "Locality of reference", "Multiple granularity locking", "OpenVMS", "Page (computer memory)", "Server (computing)", "Translation lookaside buffer" ], "id": "aaa11576d4ebd0b17dd3750189cbe7785b2fa188", "inCitations": [], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "206-218", "journalVolume": "", "outCitations": [ "309ad0357af7722a24192781340881390055a3db", "d4e153d0ff33cb15cd6c13570599c6c36cc78db5", "1121c104e9d2951462f67f8eb364f043fe9a65e5", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "94973810b159138f16577179daf63fc3c19f3224", "297c981cc41ac8073f4a9aeda6f4a1039ce3a0f0", "41b24c890ae0ef99ff031c9c8549375af6025fb6", "44607270754f8521d6c4d42297aa881393f4f8e0", "33da3dcba06cf453f74203e3fb2adaa8c1133f3b", "7f23db6b9ea34556e90f1e49b0e91ade82c9d2d5", "59d426dda9e2d2db7b887440a77adebb1227631b", "4f02395639cfdac14ea42e99b18fde92f8288b63", "bd64635ee260c3fec8589f6af402b92db8142c15", "11a3cacd4e3f11d61203aa4c68b124ab5fe54ba3", "8b04ea524cb6ced72868c120a00c4679d84be006", "87a4156fc53e76450b4766cea45edd4bb7e84b7d", "a7bf5729876a0499cd4a1fa428622e3fa47c5016", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "12bc20a1963859e9f76afb4b308b90ded1cff1fe", "6785219a4e5901962137299bded495dd5c841729", "56017a274989ce08c331a5bf6b7f435665d69c97", "49dc03814c171c08331fe9f1afc34a54951ae8e0", "12d6da762b2a5d512d383f3b587bd30c23c3df97", "3d951ad1164c17217a24e46b57f9b31cea1b2b96", "64320211fdc563ebf03c1ed972d6eee5661ac876", "72166f3aa4cdbc91d65b890b83c8c084dec846b0", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "052095bb131a0942053be4ff4097b41c429c7e65", "bc4ebb16afeb93a8ebbce34eb0e759b4fb4e80d7" ], "paperAbstract": "The increasing memory requirements of big data applications have been driving the precipitous growth of memory capacity in server systems. To maximize the efficiency of external memory, HW-based memory compression techniques have been proposed to increase effective memory capacity. Although such memory compression techniques can improve the memory efficiency significantly, a critical trade-off exists in the HW-based compression techniques. As the memory blocks need to be decompressed as quickly as possible to serve cache misses, latency-optimized techniques apply compression at the cacheline granularity, achieving the decompression latency of less than a few cycles. However, such latency-optimized techniques can lose the potential high compression ratios of capacity-optimized techniques, which compress larger memory blocks with longer latency algorithms.Considering the fundamental trade-off in the memory compression, this paper proposes a transparent dual memory compression (DMC) architecture, which selectively uses two compression algorithms with distinct latency and compression characteristics. Exploiting the locality of memory accesses, the proposed architecture compresses less frequently accessed blocks with a capacity-optimized compression algorithm, while keeping recently accessed blocks compressed with a latency-optimized one. Furthermore, instead of relying on the support from the virtual memory system to locate compressed memory blocks, the study advocates a HW-based translation between the uncompressed address space and compressed physical space. This OS-transparent approach eliminates conflicts between compression efficiency and large page support adopted to reduce TLB misses. The proposed compression architecture is applied to the Hybrid Memory Cube (HMC) with a logic layer under the stacked DRAMs. The experimental results show that the proposed compression architecture provides 54% higher compression ratio than the state-of-the-art latency-optimized technique, with no performance degradation over the baseline system without compression.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aaa11576d4ebd0b17dd3750189cbe7785b2fa188", "sources": [ "DBLP" ], "title": "Transparent Dual Memory Compression Architecture", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "aacca992d4a318f13c5d93345a6af923033730e8": { "authors": [ { "ids": [ "9732929" ], "name": "Manel Mrabet" }, { "ids": [ "1728443" ], "name": "Yosra Ben Saied" }, { "ids": [ "1809801" ], "name": "Le\u00efla Azouz Sa\u00efdane" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Computation", "Model\u2013view\u2013controller", "Naive Bayes classifier", "Quality of service", "Requirement", "Trust (emotion)" ], "id": "aacca992d4a318f13c5d93345a6af923033730e8", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "488-497", "journalVolume": "", "outCitations": [ "91549f524d37dc08316ab3f98483386d790ecbfe", "280f2f488342b69db3a5e2d2cd2872b17cd093b4", "08bcacf72e2d66b2eb9f637d2901ce56a70dff83", "d36671397f0bb927036de3b56cd605e2227b2043", "261693c0671a0c52bbef78658e4a1b072ae89c1b", "d136e5c10e5e18a05528a8680089263ddf60247e", "10efcbc5d90e07ee860ebea57a6eb3425c89a4ea", "1c29ed645ed7019e6225276394f980b4e4ecb011", "d2321ef3a05400fad6ade4cccd84e1341adf9c09", "22622c17e9c64e49a848c9048311a949f433e306", "34fa2d6e09ca31f10d4faf8da53e428805efc134", "3a75dd169aa999d147d4dc06c96f688ef6689e7b", "e0032ac8dfa6d855623d18e508acd20aa225dd50", "499437ee2e68a4c13b7f1dba8b77a70805560f7c", "082f4733a7b0a6cd6bfa8daf813757d00a12d93d", "304ed638d9bf07760ae398e133b400a34cad9787" ], "paperAbstract": "This paper introduces an approach that handleswith the trustworthy cloud service selection issue in Cloudcomputing environments. Despite the fact that most of theexisting trust systems consider several QoS attributes for trustcomputing, none of them did consider the correlation that mayexist among these attributes. However, we demonstrate in thispaper that the integration of correlation between QoS attributesin trust computing significantly helps to solve many issues suchas predicting missing assessment, detecting malicious feedbacksand improving the accuracy of trust values. The main goal ofthis paper is to show the significance of correlation betweenthose attributes for trust computing. To do that, we proposecombining both the popular Naïve Bayes model and the ngramMarkov model to design a more efficient trust model forcloud environments. Our proposed trust model also takes intoaccount the user's requirements, aggregates both the qualitativeand quantitative assessment, and considers several sources whencomputing cloud services' trust values. Experimental results showthat our proposed approach outperforms the traditional Naïve Bayes trust models.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101178" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aacca992d4a318f13c5d93345a6af923033730e8", "sources": [ "DBLP" ], "title": "Modeling Correlation between QoS Attributes for Trust Computation in Cloud Computing Environments", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "aaedefd104d2dc326116a285801493fd13e07882": { "authors": [ { "ids": [ "2242990" ], "name": "Iyswarya Narayanan" }, { "ids": [ "1726699" ], "name": "Di Wang" }, { "ids": [ "1782355" ], "name": "Abdullah Al Mamun" }, { "ids": [ "1743609" ], "name": "Anand Sivasubramaniam" }, { "ids": [ "2782126" ], "name": "Hosam K. Fathy" }, { "ids": [ "40115643" ], "name": "Sean James" } ], "doi": "10.1109/IISWC.2017.8167752", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167752", "entities": [ "Data center", "Power management", "Provisioning" ], "id": "aaedefd104d2dc326116a285801493fd13e07882", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "12-21", "journalVolume": "", "outCitations": [ "7d4960df4f413ab228da2b528986bd2f2ca784ad", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "11fcb18b8a83b7874bbbf00bc7f6b0e2e3f8c8d7", "6ac0002b174440dd498f2a7ecefadb33c4a8fb50", "d33e95ce920a8de37addbb1c2269c133922a0fca", "2219893dfcd0c9a9c2769530de9898d6868ba25b", "075a63db43b68a76a40ac6bec19416d0c2099b51", "90dbac1a0386d5c44abcceb4413fe7470daf284e", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "00b8e8cefae77424b0ed13ab784ed806296642ac", "aaed4ba940e68624f5f4be574e34a75418d7bd9f", "193e98a45b0e4eb7f85595d3c8b572b59242ed68", "09ed9cb47e09f56608bdbe6dffaa527c8ca0cd73", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "17a27f479755d4cf1b2100ef6e39d05919383828", "085e4d63d38d4ad219173077894f6befa7aeecd6", "4f6fea85be94a1c545ed9c42212cd3841260b42f", "2012a38d77e214e893680cfed26e7da6cdd67b15", "118c97ed0ff45bcbda0040d2acb8615a13c2d5fb", "0206a78a269f8bb23e9986a4881cb8adb03a8946", "6e7149b952045ae2ff6de230c90f82c95f8e835d", "f44e3cc59eb40659ac704c1009ecb25a484a44e4", "26099477c9bf5da89cf1d4db91cb581bca3d25a3", "7c67b077528f688d4cde9af36366fe2e611782fe", "4d59ee22a805cc99028241abdb22b774a93fb959", "a444d014d060796908507bc334a791ca407bbc10", "54754cbd5011c059af8358b162ffd9ffbcb51f39", "8ea2b1904ffca328d982539c3eafd8113325c23a", "09b5988ace31c5e3f99f89c768404b66744ffa31" ], "paperAbstract": "Datacenters often are a power utility's largest consumers, and are expected to participate in several power management scenarios with diverse characteristics in which Energy Storage Devices (ESDs) are expected to play important roles. Different ESD technologies exist, including little explored technologies such as flow batteries, that offer different performance characteristics in cost, size, and environmental impact. While prior works in datacenter ESD literature have considered one of usage aspect, technology, performance metric (typically cost), the whole three-dimensional space is little explored. Towards understanding this design space, this paper presents first such study towards joint characterization of ESD usages based on their provisioning and operating demands, under ideal and realistic ESD technologies, and quantify their impact on datacenter performance. We expect our work can help datacenter operators to characterize this three-dimensional space in a systematic manner, and make design decisions targeted towards cost-effective and environmental impact aware datacenter energy management.", "pdfUrls": [ "http://sites.psu.edu/iyswarya/files/2017/10/paper71_iiswc-232tiue.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167752" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aaedefd104d2dc326116a285801493fd13e07882", "sources": [ "DBLP" ], "title": "Evaluating energy storage for a multitude of uses in the datacenter", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "aafcba1c7ecd9fd2f247bf921ad15e79fc105279": { "authors": [ { "ids": [ "4793807" ], "name": "Fei Zhang" }, { "ids": [ "1799074" ], "name": "Xiaoming Fu" }, { "ids": [ "1874456" ], "name": "Ramin Yahyapour" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Data center", "Network traffic control", "Software deployment", "Virtual machine" ], "id": "aafcba1c7ecd9fd2f247bf921ad15e79fc105279", "inCitations": [ "2e91495c19320b420b94029766dfa6dade3ec551", "f1ab4f42ee9a9cd82c7623f3207c94a2ad58d5d6" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "284-293", "journalVolume": "", "outCitations": [ "acec0a12a1946279a35b79828cab4f4cb13761fd", "1a4162bdf0ae312d6fb04c1d3e30bbee7a111042", "5f192e4b543662f36d3b70427959f555c20900c4", "b13ea783a9090fba3bc345b0ed595b39c0bf7281", "27a35b13625a3f767c178d0a8443a1002a35842b", "1b2fd58a6880a5a2b31c06262a6560461999a954", "180310f5adcf9378c88e01a4a8b500f9b1a21e70", "2263579da02cf7af2edf26f45ae0c9104ba4fc8b", "09a8a5cd0e6caa3ffa39afae01bea3575aa0bbf5", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "2a86965cb89d6936aceb272295eb98794f58ceb8", "03ed30028164bd7b5215da3fb431f4402071a49f", "7a280c6cdc0ab18f8809b7101330702bfd5bb759", "33fba8ac45a0997e441a5f781ae792156e590f28", "bc6661a49bb739a2be258f0d5681eca46340807d", "5bc690391cb140731f88c8a68b4dee6dacd7097d", "2ac0f94e66210ab4266d40983e5b6bf160ed2bc0", "1ca60fc9e0cb610805da40fc09ab496ad5fce436", "03f1a7b5f65096c7515665df1fc38f5f07f58b31", "5dc26364e8c64868a40ef00004fc62d12c0e4264", "7fe414a5202749d1bf959d40bae4d2ca28d872c7", "94a62be8355bf5be1edcc881a26559e5258e0f1d", "c5772d4aef7a2b490e2bff0a77588a67f81ffdf4", "2bef12742683926a29888fda5798ac32d12a30fd", "9de98cf4a428f34be78eae3c1f2434c825278ddd", "307df3c8ce84c61534981a3059791664ed04ba90", "64351b55908f4c460519589c1e8f1d4f0dc922bb", "082a25b906aa716ca3c2439b8c1889449ecac44c", "608c25084161670f8c646196c1094453f0860dd2", "20a6849e0bde65f97cb4f150d6030c90aa72a0b8", "0989862438a85b97d795e2008536fa654177b5c8" ], "paperAbstract": "Live Virtual Machine (VM) migration offers a couple of benefits to cloud providers and users, but it is limited within a data center. With the development of cloud computing and the cooperation between data centers, live VM migration is also desired across data centers. Based on a detailed analysis of VM deployment models and the nature of VM image data, we design and implement a new migration framework called CBase. The key concept of CBase is a newly introduced central base image repository for reliable and efficient data sharing between VMsand data centers. With this central base image repository, liveVM migration and further performance optimizations are madepossible. The results from an extensive experiment show thatCBase is able to support VM migration efficiently, outperformingexisting solutions in terms of total migration time and network traffic.", "pdfUrls": [ "http://www.net.informatik.uni-goettingen.de/publications/2007/FZhang-CCGrid2017.pdf", "http://dl.acm.org/citation.cfm?id=3101152" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aafcba1c7ecd9fd2f247bf921ad15e79fc105279", "sources": [ "DBLP" ], "title": "CBase: A New Paradigm for Fast Virtual Machine Migration across Data Centers", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "aafef27fe0f20f6f1bc5566a708c40f02ba541f9": { "authors": [ { "ids": [ "3373415" ], "name": "Tien-Dat Phan" }, { "ids": [ "2629067" ], "name": "Shadi Ibrahim" }, { "ids": [ "2494447" ], "name": "Amelie Chi Zhou" }, { "ids": [ "1717000" ], "name": "Guillaume Aupy" }, { "ids": [ "1779139" ], "name": "Gabriel Antoniu" } ], "doi": "10.1007/978-3-319-64203-1_28", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_28", "entities": [ "MapReduce" ], "id": "aafef27fe0f20f6f1bc5566a708c40f02ba541f9", "inCitations": [], "journalName": "", "journalPages": "385-398", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aafef27fe0f20f6f1bc5566a708c40f02ba541f9", "sources": [ "DBLP" ], "title": "Energy-Driven Straggler Mitigation in MapReduce", "venue": "Euro-Par", "year": 2017 }, "ab2dd895c0d8e567071caf8704e6078d33cd6e22": { "authors": [ { "ids": [ "1815539" ], "name": "Xingda Wei" }, { "ids": [ "11431365" ], "name": "Sijie Shen" }, { "ids": [ "40576767" ], "name": "Rong Chen" }, { "ids": [ "1716528" ], "name": "Haibo Chen" } ], "doi": "", "doiUrl": "", "entities": [ "Backup", "Denial-of-service attack", "Disk partitioning", "Distributed transaction", "Downtime", "IBM Tivoli Storage Productivity Center", "In-memory database", "Online transaction processing", "Relational database management system", "Remote direct memory access", "Transaction processing", "Transaction processing system", "Transactions per second" ], "id": "ab2dd895c0d8e567071caf8704e6078d33cd6e22", "inCitations": [ "274f3d90c1585b8f6e999cbda8bc0b05d3125d0a", "1f896f601fc53038d0bbc28fde31ff84b12d06d9" ], "journalName": "", "journalPages": "335-347", "journalVolume": "", "outCitations": [ "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "1220e4a011c46804d4369b5580dc7fb6e387af54", "108937f6a7220ae9370511bbcaa44674c48b1a65", "a53e550b1c9282dc79ae920c12b62358bdb6e193", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "1e557937f418accc13f9c5edb33a3d48259d80e5", "412a9e54bbb31e12d008a9579994e009c5b40b46", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "783af0c84ff2dc13118bb8877d200024c1649e24", "04c692a690333b377022b873c13fb58edb598b27", "205cf007cf77bbf81e55b74635017087585f7b7c", "3abca96006f8a6c014635b6a111368f459110e83", "104119350eed6afeabfc1977281af19800791207", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "00945bd2fe73a6f617010009d621b23b1e1303c6", "f00d2dfb39b3b1b114220dba32a0fbccc2368c66", "d1c21c34936f587779c216ed79ca33883845caa1", "1b26a72abdddd205d83a0a3f77da72e18f3d7e4b", "3ae8993ebc28dd9b99d415d04d2b766dc99212d9", "19197540fcab68bc15531c90c6103a3f836a1791", "9a11bbaf9af5ce7988386e6da8d6d3acb587f5ef", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "0742cebd319c73d45a72d5d0303e33472a16a64b", "0b9c6fe7beb3971b27aff8c5aa5e133de74316a4", "535c8fe6680bb97ff5839d751c10594dcbbcb0fe", "872a4386ee70317a48a99ec64f5e66ddc16dea2f", "76eea8436996c7e9c8f7ad3dac34a12865edab24" ], "paperAbstract": "Recent in-memory database systems leverage advanced hardware features like RDMA to provide transactional processing at millions of transactions per second. Distributed transaction processing systems can scale to even higher rates, especially for partitionable workloads. Unfortunately, these high rates are challenging to sustain during partition reconfiguration events. In this paper, we first show that state-of-the-art approaches would cause notable performance disruption under fast transaction processing. To this end, this paper presents DrTM+B, a live reconfiguration approach that seamlessly repartitions data while causing little performance disruption to running transactions. DrTM+B uses a pre-copy based mechanism, where excessive data transfer is avoided by leveraging properties commonly found in recent transactional systems. DrTM+B\u2019s reconfiguration plans reduce data movement by preferring existing data replicas, while data is asynchronously copied from multiple replicas in parallel. It further reuses the log forwarding mechanism in primary-backup replication to seamlessly track and forward dirty database tuples, avoiding iterative copying costs. To commit a reconfiguration plan in a transactionally safe way, DrTM+B designs a cooperative commit protocol to perform data and state synchronizations among replicas. Evaluation on a working system based on DrTM+R with 3-way replication using typical OLTP workloads like TPC-C and SmallBank shows that DrTM+B incurs only very small performance degradation during live reconfiguration. Both the reconfiguration time and the downtime are also minimal.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_wei.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-wei.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/wei" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c9cf/13122cc6e5bfeacc6e2cd23ac7cb2d8d4728.pdf", "s2Url": "https://semanticscholar.org/paper/ab2dd895c0d8e567071caf8704e6078d33cd6e22", "sources": [ "DBLP" ], "title": "Replication-driven Live Reconfiguration for Fast Distributed Transaction Processing", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "ab5c82679d36ebbde1553923f77660a2afaaa357": { "authors": [ { "ids": [ "39042249" ], "name": "Govert G. Brinkmann" }, { "ids": [ "2415279" ], "name": "Kristian F. D. Rietveld" }, { "ids": [ "1731520" ], "name": "Frank W. Takes" } ], "doi": "10.1109/ICPP.2017.47", "doiUrl": "https://doi.org/10.1109/ICPP.2017.47", "entities": [ "Algorithm", "Amdahl's law", "CUDA", "Central processing unit", "Computation", "Data structure", "Directed graph", "Display resolution", "Force-directed graph drawing", "Graph drawing", "Graphics", "Graphics processing unit", "List of toolkits", "Network analysis (electrical circuits)", "Shared memory", "Speedup", "Time complexity", "Workstation" ], "id": "ab5c82679d36ebbde1553923f77660a2afaaa357", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "382-391", "journalVolume": "", "outCitations": [ "004646b5f172aa3874e13c282ece7576ede361f9", "788ddd99b67b9ee2d469f91f3eec6485d3765def", "81c2a5fcdb3b192790d484ea822cce888b77f66b", "4410f0c48f982f960a54500df7bd88e4cab88927", "052715e9292df2bb62e95616ac6486fba7cbf72f", "50cb181318757562a0eea97d07fce376e402290a", "30f9ce2feb3b08b79859beefdbef00e98f9c0003", "d0061c4d795f17e51f17d4b5472125587f382f8b", "fce7fd98928ab9bf3e4e919e108c48fc1040f569", "3a389251f7e3879622eff52da5493cdc56a0ace4", "ca6473f1f56af4e50de5b1f335de855ef004c827", "5ce64de5b87da6365b7a718d3bfdae62f3930286", "959258cb7ff636fee908e6f6877388081ca706b6", "6241886034d9a8cec981f6857a1dd1066b8290a0", "0e5d7003928e6f1876b51d66bbb3e6da99ae8172", "bfaf9f138b54a6e8f1093078672ce0f8368bc280", "73edb0be8aeb6ad1eafb0d3be803b2e59df995b0", "00c4db9c5b8b8bb2285a4649de75ba3580cd0e35", "f359d33a1c09d2f626217e21f722508968c7057b", "4d100834e51448da922e16718955206018a39f81", "6b39843f539e8f0fdb557a126c14b051433975e6" ], "paperAbstract": "Network analysis software relies on graph layout algorithms to enable users to visually explore network data. Nowadays, networks easily consist of millions of nodes and edges, resulting in hours of computation time to obtain a readable graph layout on a typical workstation. Although these machines usually do not have a very large number of CPU cores, they can easily be equipped with Graphics Processing Units (GPUs), opening up the possibility of exploiting hundreds or even thousands of cores to counter the aforementioned computational challenges. In this paper we introduce a novel GPU framework for visualizing large real-world network data. The main focus is on a GPU implementation of force-directed graph layout algorithms, which are known to create high quality network visualizations. The proposed framework is used to parallelize the well-known ForceAtlas2 algorithm, which is widely used in many popular network analysis packages and toolkits. The different procedures and data structures of the algorithm are adjusted to the CUDA GPU architecture's specifics in terms of memory coalescing, shared memory usage and thread workload balance. To evaluate its performance, the GPU implementation is tested using a diverse set of 38 different large-scale real-world networks. This allows for a thorough characterization of the parallelizable components of both force-directed layout algorithms in general as well as the proposed GPU framework as a whole. Experiments demonstrate how the approach can efficiently process very large real-world networks, showing overall speedup factors between 40x and 123x compared to existing CPU implementations. In practice, this means that a network with 4 million nodes and 120 million edges can be visualized in 14 minutes rather than 9 hours.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ab5c82679d36ebbde1553923f77660a2afaaa357", "sources": [ "DBLP" ], "title": "Exploiting GPUs for Fast Force-Directed Visualization of Large-Scale Networks", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "ab6747cd68631eeecee9668e87ad5707c67c3599": { "authors": [ { "ids": [ "1962725" ], "name": "Mohsen Imani" }, { "ids": [ "1816396" ], "name": "Abbas Rahimi" }, { "ids": [ "9353618" ], "name": "Deqian Kong" }, { "ids": [ "3560620" ], "name": "Tajana Simunic" }, { "ids": [ "1714291" ], "name": "Jan M. Rabaey" } ], "doi": "10.1109/HPCA.2017.28", "doiUrl": "https://doi.org/10.1109/HPCA.2017.28", "entities": [ "Autoassociative memory", "CMOS", "Cognition", "Content-addressable memory", "Discharger", "Emulator", "Hamming distance", "Scalability" ], "id": "ab6747cd68631eeecee9668e87ad5707c67c3599", "inCitations": [ "bedbfacbf1f051594178f76fe8bf26729c69acf5", "6036ed313813c15a1da611b8c06220e769a2670f", "681841070932d7de009aa15e9b8bc7593839b66b", "d134975e44c47786278865eda04070f5e963a7a2", "58dce821908f2381b5a78e641a2ac2623ad0627d", "3479e9dfd300db71dedf00f076e1773ac745bec3", "219d5ba4fe929ed739527f06ec052e0a56b843dc", "9bedc74cf098d8a40b158851948e4c3ba5bbd12f", "4431efadb482fbc2619e9dbd50d5ce707efa6396", "e7b7a4274eb7e000025a3453988f0f8b900c397d", "12175bb22aad0af8c398018ff6eda1b10ffe12ab", "9e0b654aa6c87657c8ff872abcc086a7c6a8c908", "24d46636b389101b935cf88a0dc062b91bb416a3", "3a815a81a820fa187979f78fcd896735bd6903a8", "dc2452ced0eecd3e4458e3e5d48fb66657ad7146", "e592da53cdb320410df8dcb62d131bdf2a35e098", "7970cd4e9dac6ae848eb193a8807c749d39365ae", "051b35f9572525381a3c2802acd918dd4c4c49e3", "2feb40d9b3dcc9439e6c7c4b8ef22d7ed5f94c68", "0a9deba882998bf964dfab48c7899823147b189d" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "445-456", "journalVolume": "", "outCitations": [ "32a01a917bc310388002e7c7231ba2c07416bed6", "1018693848b64234fb23440b0c29d989edb606ad", "4bd921f38965faf21f4824589c3ad3aabe838cad", "25ff450eb9fbcd99d541363ab0cdcef2b2cebeb2", "23567eb140757d026cb3f5d25419386b52a5623b", "19e5ac8e0951d8561d3984ba79798db16c8d9306", "9359cc0fbbc4adce2cd93e8a96f87d74eb098943", "fec860f1991ce44b6d6e4aa29f0690687f9a697b", "425931e434f6b370cc6cdd2db58873843def7d7f", "eb3424491e81620dddb06b6cca1e74306c2f71c8", "023a9f4e3bc35da5a4dba6fe11a2cf08d6d0a3e5", "5f6cd28eeec077d51f25f53fc3152732f80caa30", "91708bf1810f1d850aadf01d4511ca5804a2b992", "7a278bbd952b05b9c65a0c32e2cdff5490df7f93", "7529f2e9238ab5e5af3b8f2619b37995133dc9c4", "f477232c0a0835dcbc4fc6b6283db484695482f9", "75e165fa374abf73b40d2ab4949f93dd7f920923", "45b50ed3d33633978964893b3a58ca369f35bf7e", "e6298dd550a4a72f9819702ce6bee578b888d316", "27b92df79c638fc74bc4e8b2bd27d272d512c399", "96531e1936fd37e3690d47a67f4fd44ee41ad4b9", "09d97251a2932b6a3c1c2009f820d55f281433b9", "7977965428de966e12f337f9409369d5766fcd2f", "2c9088e08296d4ac316a1b4721acc7ad44eb017b" ], "paperAbstract": "Brain-inspired hyperdimensional (HD) computing emulates cognition tasks by computing with hypervectors as an alternative to computing with numbers. At its very core, HD computing is about manipulating and comparing large patterns, stored in memory as hypervectors: the input symbols are mapped to a hypervector and an associative search is performed for reasoning and classification. For every classification event, an associative memory is in charge of finding the closest match between a set of learned hypervectors and a query hypervector by using a distance metric. Hypervectors with the i.i.d. components qualify a memory-centric architecture to tolerate massive number of errors, hence it eases cooperation of various methodological design approaches for boosting energy efficiency and scalability. This paper proposes architectural designs for hyperdimensional associative memory (HAM) to facilitate energy-efficient, fast, and scalable search operation using three widely-used design approaches. These HAM designs search for the nearest Hamming distance, and linearly scale with the number of dimensions in the hypervectors while exploring a large design space with orders of magnitude higher efficiency. First, we propose a digital CMOS-based HAM (D-HAM) that modularly scales to any dimension. Second, we propose a resistive HAM (R-HAM) that exploits timing discharge characteristic of nonvolatile resistive elements to approximately compute Hamming distances at a lower cost. Finally, we combine such resistive characteristic with a currentbased search method to design an analog HAM (A-HAM) that results in faster and denser alternative. Our experimental results show that R-HAM and A-HAM improve the energy-delay product by 9.6× and 1347× compared to D-HAM while maintaining a moderate accuracy of 94% in language recognition.", "pdfUrls": [ "http://people.eecs.berkeley.edu/~abbas/papers/HPCA17.pdf", "http://moimani.weebly.com/uploads/2/3/8/6/23860882/hpca16_imani.pdf", "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ab6747cd68631eeecee9668e87ad5707c67c3599", "sources": [ "DBLP" ], "title": "Exploring Hyperdimensional Associative Memory", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "ab811cf9bdc9b1c7aca3cec965d212d4d4ddf1bc": { "authors": [ { "ids": [ "2827559" ], "name": "Zoi Kaoudi" }, { "ids": [ "1712430" ], "name": "Jorge-Arnulfo Quian\u00e9-Ruiz" }, { "ids": [ "2934941" ], "name": "Saravanan Thirumuruganathan" }, { "ids": [ "1756323" ], "name": "Sanjay Chawla" }, { "ids": [ "1724045" ], "name": "Divyakant Agrawal" } ], "doi": "10.1145/3035918.3064042", "doiUrl": "https://doi.org/10.1145/3035918.3064042", "entities": [ "Algorithm", "Experiment", "Gradient", "Gradient descent", "High- and low-level", "Iteration", "Machine learning", "Mathematical optimization", "Program optimization", "Synthetic data" ], "id": "ab811cf9bdc9b1c7aca3cec965d212d4d4ddf1bc", "inCitations": [ "05233cf6194ddee6427f0bb76cb8749cc220d2bb" ], "journalName": "", "journalPages": "977-992", "journalVolume": "", "outCitations": [ "17e1bb7fc17b45fe5ad8724a635d285ed000efa8", "4954fa180728932959997a4768411ff9136aac81", "51463d7d66e2c703561225efc9ae2b9abd768db6", "45619a2b7b41fea02345badf880530519d3d4c8f", "36d858eb19bba43244b92f7faabfce47b13f2403", "67d9fe9856d590e566c9b3aa549541129bb92117", "0f5c9968fe2cdb0f52c55b2d5b3dec7accf91306", "0790c77c1eaf2368b55c6a0def09a43690eeb848", "01e1fa7924b3eb76b73f1828c93805f3ba028bae", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "5b1c8b1ad5d7f18df6124e5dcb7833bfba027901", "75284ed9e49898e3367ecc99b6cd13948671e078", "065e808aa05fe23de00ab4510d1607ddff04c232", "17f70b9d1fcf3b31948ffa578ac89399751fe73d", "290c735c8e3e2ffe896d80ea379e48b8177a7f39", "43aac4922fc82c1e8062d4d22b670701b93d980a", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "2ea7b6224ebd9267cfc37ef21f3761452d24cc93", "341a08d1854b5ecf871bbb4c7833a435927abbda", "8a7e4164d954eb55617362dcb18ca1359b4b753b" ], "paperAbstract": "As the use of machine learning (ML) permeates into diverse application domains, there is an urgent need to support a declarative framework for ML. Ideally, a user will specify an ML task in a high-level and easy-to-use language and the framework will invoke the appropriate algorithms and system configurations to execute it. An important observation towards designing such a framework is that many ML tasks can be expressed as mathematical optimization problems, which take a specific form. Furthermore, these optimization problems can be efficiently solved using variations of the gradient descent (GD) algorithm. Thus, to decouple a user specification of an ML task from its execution, a key component is a GD optimizer. We propose a cost-based GD optimizer that selects the best GD plan for a given ML task. To build our optimizer, we introduce a set of abstract operators for expressing GD algorithms and propose a novel approach to estimate the number of iterations a GD algorithm requires to converge. Extensive experiments on real and synthetic datasets show that our optimizer not only chooses the best GD plan but also allows for optimizations that achieve orders of magnitude performance speed-up.", "pdfUrls": [ "https://arxiv.org/pdf/1703.09193v1.pdf", "http://arxiv.org/abs/1703.09193", "http://doi.acm.org/10.1145/3035918.3064042" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ab811cf9bdc9b1c7aca3cec965d212d4d4ddf1bc", "sources": [ "DBLP" ], "title": "A Cost-based Optimizer for Gradient Descent Optimization", "venue": "SIGMOD Conference", "year": 2017 }, "ab9d01de8bb09ece4e4097c735bf0d6079ae5077": { "authors": [ { "ids": [ "8549365" ], "name": "Jianping Kelvin Li" }, { "ids": [ "2383364" ], "name": "Misbah Mubarak" }, { "ids": [ "40211322" ], "name": "Robert B. Ross" }, { "ids": [ "1759102" ], "name": "Christopher D. Carothers" }, { "ids": [ "1707383" ], "name": "Kwan-Liu Ma" } ], "doi": "10.1109/CLUSTER.2017.26", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.26", "entities": [ "Catastrophic interference", "Data aggregation", "Interference (communication)", "Network congestion", "Network performance", "Network topology", "Next-generation network", "Routing", "Scalability", "Simulation", "Tree network", "Utility", "Visual analytics" ], "id": "ab9d01de8bb09ece4e4097c735bf0d6079ae5077", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "193-203", "journalVolume": "", "outCitations": [ "3c3fcd7a025f50bf598de03d41bc0fd00660f11f", "9d30381c49afa033eacc04fb68975762eb7bafab", "251544e7c508771ab34cb2d6b97800960cde1f1e", "8fc0623a28cc193927cd012bd8daac5e6cad75d3", "5f8991828def57d2f0cda942566afff56740d150", "dc39c68a00e38f2993b450eb01c96e1d032ab850", "56d1d13d74407f25516ab28140256f2a67cc1b4d", "9c4b6c885bfc6038cdac56763663880e0f2624e6", "b29373bf0d480561d668f302fe447de4d7c9a405", "86dd6cffcb498c282c22966507fb533ae8901dd6", "663e064469ad91e6bda345d216504b4c868f537b", "83f2087f3c602d043277927380e35885879210f5", "0417ed50c871b0f41782705112eea8936241ec91", "3c3abe3d8519b3637223ab9e518b459d5218a903", "c39c26d510c1a965c5f132edc989a598ca92b700", "cf44c98bc058e690d2434efe3355995aaf6d03c9", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "ece84a462853368c21a570a73707d857bf0e10b4", "8cf9e252c8314e26f20b619acb6392d52abac647", "04232e5f25ed9df85dd755dd991baae5caeecd2a", "5c8f8cc934949901555c80777488eeff1596dc1a", "3ec4cf958f6ee00dc00aa14840c96268c4c3f9c9", "15c0ff1ace0798e8ef9767a76f90a32d1ee3ee8b", "d6be948f6efd5960f6a65f3b56524011e2a411e7", "89199b87c710d654c7285afa0eab5c88ee0427ff", "a15bc58fa496b6cca937713723f19f45380fc2fe", "582389c37f27dd69b39e949257f7fe83a6fee8d9", "b040e7de49f4b4e8e9e007d7e4149d7ef277c609", "09a941005b6b60d1cf013f992a6012b9d2e41b47", "3834b27a7c684afabf4464a15cb5d3a0c4d4918d", "f0982dfd3071d33296c22a4c38343887dd5b2a9b", "18fbcb1de113f5d60c8e81566231a0ecea46f3fe" ], "paperAbstract": "High-radix, low-diameter, hierarchical networks based on the Dragonfly topology are common picks for building next generation HPC systems. However, effective tools are lacking for analyzing the network performance and exploring the design choices for such emerging networks at scale. In this paper, we present visual analytics methods that couple data aggregation techniques with interactive visualizations for analyzing large-scale Dragonfly networks. We create an interactive visual analytics system based on these techniques. To facilitate effective analysis and exploration of network behaviors, our system provides intuitive, scalable visualizations that can be customized to show various traffic characteristics and correlate between different performance metrics. Using high-fidelity network simulation and HPC applications communication traces, we demonstrate the usefulness of our system with several case studies on exploring network behaviors at scale with different workloads, routing strategies, and job placement policies. Our simulations and visualizations provide valuable insights for mitigating network congestion and inter-job interference.", "pdfUrls": [ "http://www.mcs.anl.gov/papers/P7079-0717.pdf", "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ab9d01de8bb09ece4e4097c735bf0d6079ae5077", "sources": [ "DBLP" ], "title": "Visual Analytics Techniques for Exploring the Design Space of Large-Scale High-Radix Networks", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "aba9a321a91fec94a60e5b21ed71099a91e68acc": { "authors": [ { "ids": [ "34934599" ], "name": "Usama Naseer" }, { "ids": [ "2387791" ], "name": "Theophilus Benson" } ], "doi": "", "doiUrl": "", "entities": [ "Change detection and notification", "Cubic function", "FITS", "Flip-flop (electronics)", "Protocol stack", "SPDY", "Throughput", "Web service" ], "id": "aba9a321a91fec94a60e5b21ed71099a91e68acc", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "0a62c0b8647804bb4247f6563f24488101952e9a", "3d8fa14143e26d534a09a26b3e4dd3c6a83455c7", "088a6aa4196456fc3d6415ddf7d5474bd4102f7a", "b534337da8e86539f3e14eedabad7d473fdf3bde", "0a6103c182ede82da9e726b1b50d8a2383bf418a", "3bbffc0103e0dab4dd22c9234a831e17926fa1c6", "45f8bf3c6bdf24aac09c45d114441f3b6fe23cbd", "36ed5e0ca6d2023137094d04d56a34fe56915fa3", "6aba884d517ab35a63e74da36d20207965eb7e0d", "16d0a8ee484f4a34e1cdcda8a0c2453e2e962ada", "143481d55d9f9d25e53f06a6afaf15feb7430c62", "0ad4e891484031164b0f96f36874856d0ba5d532", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "5f1d959f1ea180d0612d5f0c8599d8b5e8c5d36d", "533268c5e53e03760c0b0c39ce5ff6d3cb0da5e5", "3711d6beb5c0c427d3306a3f979ae04968df2cc4", "287e60321a08f8601161b8e79a6d9a3e5093d621", "151a71cd659abdf5431ce1437968b72844055c7f", "484db31fbaa7e71e33a82d3d5afe54c7c875feaf", "08bb5149cc215c0714492b407145bbc93006f44c", "2bce0f3e815c471702fb9db657914b6169098c2b", "940989cf71be558d09c47aab670a0485c77cd216", "7a59672b52480f653ab430284b91094e9093bd69", "114f268992fcd5dccad6f55091445de06880bdb2" ], "paperAbstract": "The web serving protocol stack is constantly changing and evolving to tackle technological shifts in networking infrastructure and website complexity. For example, Cubic to tackle high throughput, SPDY to tackle loss and QUIC to tackle security issues and lower connection setup time. Accordingly, there are a plethora of protocols and configuration parameters that enable the web serving protocol stack to address a variety of realistic conditions. Yet, despite the diversity in end-user networks and devices, today, most content providers have adopted a \u201cone-size-fits-all\u201d approach to configuring user facing web stacks (CDN servers). In this paper, we illustrate the drawbacks through empirical evidence that this \u201cone-size-fits-all\u201d approach results in sub-optimal performance and argue for a novel framework that extends existing CDN architectures to provide programmatic control over the configuration options of the CDN serving stack.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/naseer", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-naseer_052317.pdf", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-naseer.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/cc98/82a64d6ff9af83c1eb558c251d447122b0fb.pdf", "s2Url": "https://semanticscholar.org/paper/aba9a321a91fec94a60e5b21ed71099a91e68acc", "sources": [ "DBLP" ], "title": "Configtron: Tackling network diversity with heterogeneous configurations", "venue": "HotCloud", "year": 2017 }, "abbb22faa83910e0f087c217a5a7a84ce9bcc1eb": { "authors": [ { "ids": [ "3388401" ], "name": "Rohail Syed" }, { "ids": [ "2582990" ], "name": "Kevyn Collins-Thompson" } ], "doi": "10.1145/3077136.3080835", "doiUrl": "https://doi.org/10.1145/3077136.3080835", "entities": [ "Algorithm", "Approximation algorithm", "Baseline (configuration management)", "Crowdsourcing", "Information needs", "Information retrieval", "Optimization problem", "Personalization", "Program optimization", "Reinforcement learning", "Relevance", "Usability testing", "Vocabulary", "Web search engine" ], "id": "abbb22faa83910e0f087c217a5a7a84ce9bcc1eb", "inCitations": [ "bfca58d69b80ead742b46298852b1205537c8c04", "65b926aebeb4e9f53ea69d3979bb2c68945e01f6", "dea298b4ba2934a30955fc804c000750e0157ec4" ], "journalName": "", "journalPages": "555-564", "journalVolume": "", "outCitations": [ "5abca53c6c57f5264df6e40bb5ea2f09602bf506", "a29851811df8435a8d686a9c43c912f43dc06ccc", "3e25ad2980ec097ec5b0761a61b6588d33d548df", "f7d4359b0090a775a1251e6c640701b16e25147a", "8d7c2404fd67574a3b07602aad90e89141eb9622", "012927ab94f0e542ce137e032b0288bbfbc2f9ca", "616865ec6762ec8a49ad83a084c7039272f6d31a", "0708037f8b6af7ca166a8c53ad362de063fb9809", "4afa6c2eb552ceef0e396fbfe449932492873034", "8ba3534e58e10998cb1686de596c013bd6a4803a", "42771aede47980ae8eeebac246c7a8b941d11414", "dea266419175e64b5b543bd3b9ef11b52243f4c8", "31c6b7b6b8a4ca6de749d874b29b07fbb2290ff5", "40b42731dbb4c8ed6a03fbee44b945b55c00f19a", "3a3679965ab4ef93409097be5898c68a51d81ea8", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "dea298b4ba2934a30955fc804c000750e0157ec4", "8faacb2148f97ab6e417d871ebc0614bf6fc2cc3" ], "paperAbstract": "While search technology is widely used for learning-oriented information needs, the results provided by popular services such as Web search engines are optimized primarily for generic relevance, not effective learning outcomes. As a result, the typical information trail that a user must follow while searching to achieve a learning goal may be an inefficient one involving unnecessarily easy or difficult content, or material that is irrelevant to actual learning progress relative to a user's existing knowledge. We address this problem by introducing a novel theoretical framework, algorithms, and empirical analysis of an information retrieval model that is optimized for learning outcomes instead of generic relevance. We do this by formulating an optimization problem that incorporates a cognitive learning model into a retrieval objective, and then give an algorithm for an efficient approximate solution to find the search results that represent the best 'training set' for a human learner. Our model can personalize results for an individual user's learning goals, as well as account for the effort required to achieve those goals for a given set of retrieval results. We investigate the effectiveness and efficiency of our retrieval framework relative to a commercial search engine baseline ('Google') through a crowdsourced user study involving a vocabulary learning task, and demonstrate the effectiveness of personalized results from our model on word learning outcomes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080835" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/abbb22faa83910e0f087c217a5a7a84ce9bcc1eb", "sources": [ "DBLP" ], "title": "Retrieval Algorithms Optimized for Human Learning", "venue": "SIGIR", "year": 2017 }, "abbf56a715fc32b9ef04103bac7759bbaa509e4d": { "authors": [ { "ids": [ "40481129" ], "name": "Chen Chen" }, { "ids": [ "10741766" ], "name": "Harshal Tushar Lehri" }, { "ids": [ "31682740" ], "name": "Lay Kuan Loh" }, { "ids": [ "10761550" ], "name": "Anupam Alur" }, { "ids": [ "1758542" ], "name": "Limin Jia" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" }, { "ids": [ "33779522" ], "name": "Wenchao Zhou" } ], "doi": "10.1145/3035918.3035926", "doiUrl": "https://doi.org/10.1145/3035918.3035926", "entities": [ "Linear programming", "Routing", "Run time (program lifecycle phase)" ], "id": "abbf56a715fc32b9ef04103bac7759bbaa509e4d", "inCitations": [ "53c0617eb76ed39f3ba9f3a45374839d7904ef93", "0240d922d1934db0e79dbfac9721d7870299ff9a", "99eb70eac5c1458beaa8fd9230fd45d05d1169ec" ], "journalName": "", "journalPages": "203-218", "journalVolume": "", "outCitations": [ "35347e426812f44addc5885ba54c9d48ca14fb72", "153506e97f5db120d28f0f4c726cbb5d751baa00", "1960fb12c5e7de808f64db14f7316b562bae6b2c", "111864cac232d8a9c170bd63069eb4af155a9f7b", "313d4b7583d730c2864422d9d7d4e06db59f1527", "94d44e9c456b755524e5d28dddaa918584cd755b", "6a338178e088868e5253914ed4363b73ce81a0f4", "36583417faf3d052c415262cca1ba44a6b90d75c", "51d1cfdf9233cab9cc40f72bf049c2ad2f36082c", "17d122f143726288da193a767fd0a7634010f0ff", "1289116bbdfeef7070507dd18688753d8ca32d52", "0a30411ad3f537fe026bc6335adbde75a6da3a8e", "26b0a144690bf20d8ab06ea69ac3d49ce870979c", "000c557e5bcb9def56479751f06fc8eec3f8acda", "22fe15c47aa7f8adfe733aeb6fe9956932a2a98d", "1d80de948ddcf8f3524763ed3d5f4485f46d70fb", "231793a731604a9f756fe8453098ea814c1c0ef3", "28c15499e6175f26966ac57a07fbe1b79a1d4c18", "c07928641bba0e42199a19b63763e96dd48183e4", "ad9b5bfc8371836c5499d5525493935d8adda5f9", "481d75d413e61440db177b0c954a650f9506f331", "1fe41b1240a0eddec736b675e914b4858a955876", "2bd43ff1b3bb7db0e430e8578e913c2787dd6532", "069103feb2d2d3f1b0115b484d5c2f978a983df0" ], "paperAbstract": "Network provenance, which records the execution history of network events as meta-data, is becoming increasingly important for network accountability and failure diagnosis. For example, network provenance may be used to trace the path that a message traversed in a network, or to reveal how a particular routing entry was derived and the parties involved in its derivation. A challenge when storing the provenance of a live network is that the large number of the arriving messages may incur substantial storage overhead. In this paper, we explore techniques to dynamically compress distributed provenance stored at scale. Logically, the compression is achieved by grouping equivalent provenance trees and maintaining only one concrete copy for each equivalence class. To efficiently identify equivalent provenance, we (1) introduce distributed event-based linear programs (DELP) to specify distributed network applications, and (2) statically analyze DELPs to allow for quick detection of provenance equivalence at runtime. Our experimental results demonstrate that our approach leads to significant storage reduction and query latency improvement over alternative approaches.", "pdfUrls": [ "http://people.cs.georgetown.edu/~wzhou/publication/provcompress-sigmod17.pdf", "http://doi.acm.org/10.1145/3035918.3035926" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/abbf56a715fc32b9ef04103bac7759bbaa509e4d", "sources": [ "DBLP" ], "title": "Distributed Provenance Compression", "venue": "SIGMOD Conference", "year": 2017 }, "ac265a5fbea3fde88456b5c78496567a2edec8e8": { "authors": [ { "ids": [ "20645435" ], "name": "Joel Edward Denny" }, { "ids": [ "8568681" ], "name": "Seyong Lee" }, { "ids": [ "7553591" ], "name": "Jeffrey S. Vetter" } ], "doi": "10.1109/IPDPS.2017.60", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.60", "entities": [ "Byte", "Byte addressing", "Computer data storage", "Data structure", "Non-volatile memory", "Nonvolatile BIOS memory", "Null (SQL)", "Persistence (computer science)", "Program optimization", "Run time (program lifecycle phase)", "Testbed", "Undo" ], "id": "ac265a5fbea3fde88456b5c78496567a2edec8e8", "inCitations": [ "e3a66a2e79c2a5f7a52bf4e8089bf522fdc04abe", "cb2a018979184f87692d423322e367cc42a215d2" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1163-1173", "journalVolume": "", "outCitations": [ "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "94783d113951822195d4ba44599a8fcbdef9d4bf", "0645f0f88e9a3cd6e9b1d0c21bc24666a7377666", "b575f0d8b3eb38bcf0a1b99bad144002e96ffa18", "c516d505dcee2faa0eea6b6a456fefa9451af12e", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "11ef7c142295aeb1a28a0e714c91fc8d610c3047", "2e663c1047ff14ddc2416229459922757a20edfb", "0858a3b2b393ae083b3dbc4ded61c046d5ee04d2", "0204f40221260d00c5ee63646560a40dcd7d97d1", "27bcb72519d77192da2b30eca4e1442c8f3637b1", "3a751a26108511e43d4130284ebed785e4c440ed", "c44b97f870b862f7f6f8aebc9ffde4565dd64380", "5dba3105fc05e6ba918106cb3f96d482c1a092f8", "f8f52a402b8833ea1ad8eb34e48f011b25c0d306", "6e0ade8e4c0948e47b7e1ad78eacf42e5f9d8d0f", "243c522b56809292f1f50117a9915053d32bf4fb", "05a1357946de5eca42a477b7b268db4944219a2e", "165d99c9d30be5d301b998dc23c1a6a28fd0c425", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "da8f5c3e65e2eb398dc5a4866023ef51e4056905", "d7203f317b37d565ab54b6a48ef13ded3777eb78", "565c290e4aa268619ecbbc27ea584de0f3525020", "24724ad8962a9e04eb496fddaefe9708f6960601" ], "paperAbstract": "Substantial advances in nonvolatile memory (NVM) technologies have motivated wide-spread integration of NVM into mobile, enterprise, and HPC systems. Recently, considerable research has focused on architectural integration of NVM and respective programming systems, exploiting NVM's trait of persistence correctly and efficiently. In this regard, we design several novel language-based optimization techniques for programming NVM and demonstrate them as an extension of our NVL-C system. Specifically, we focus on optimizing the performance of atomic updates to complex data structures residing in NVM. We build on two variants of automatic undo logging: canonical undo logging, and shadow updates. We show these techniques can be implemented transparently and efficiently, using dynamic selection and other logging optimizations. Our empirical results on several applications gathered on an NVM testbed illustrate that our cost-model-based dynamic selection technique can accurately choose the best logging variant across different NVM modes and input sizes. In comparison to statically choosing canonical undo logging, this improvement reduces execution time to as little as 53% for block-addressable NVM and 73% for emulated byte-addressable NVM on a Fusion-io ioScale device.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ac265a5fbea3fde88456b5c78496567a2edec8e8", "sources": [ "DBLP" ], "title": "Language-Based Optimizations for Persistence on Nonvolatile Main Memory Systems", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "acaa6136ce98b086a7bcf5bed71ac34165939a08": { "authors": [ { "ids": [ "2865037" ], "name": "Eli Cortez C. Vilarinho" }, { "ids": [ "27089047" ], "name": "Anand Bonde" }, { "ids": [ "9329228" ], "name": "Alexandre Muzio" }, { "ids": [ "3196342" ], "name": "Mark Russinovich" }, { "ids": [ "1765528" ], "name": "Marcus Fontoura" }, { "ids": [ "2118138" ], "name": "Ricardo Bianchini" } ], "doi": "10.1145/3132747.3132772", "doiUrl": "https://doi.org/10.1145/3132747.3132772", "entities": [ "Branch predictor", "Client-side", "Cloud research", "Job scheduler", "Machine learning", "Management system", "Microsoft Azure", "Power management", "Resource Central", "Resource exhaustion attack", "Scheduling (computing)", "Server (computing)", "Software deployment", "Virtual machine" ], "id": "acaa6136ce98b086a7bcf5bed71ac34165939a08", "inCitations": [ "3105cd78fb5f9c62ccf0346e061579e2bcd130c6", "5a25bb5c4809d22536ce0ec3ac32615ed125ee5e", "8e34c6ca528e9977064921ac2f4b36143d4f217d", "210c130a29ce04a6529d2545ef07b9adce1d96f9", "53cc6bf305539b4bd8829df42996e0eb12512434" ], "journalName": "", "journalPages": "153-167", "journalVolume": "", "outCitations": [ "05ea86d312ed4a19ad282ad28838e8e87b6ce156", "6eb4369ac471b21006feea2a49e2cc647db9a318", "3e257f01e3ee71545d824a1615c35659525b856a", "3000e77ed7282d9fb27216f3e862a3769119d89e", "8a86f143b8bb22b6fea93c8c70c35cfb5b490a17", "3784b73a1f392160523400ec0309191c0a96d86f", "5dc26364e8c64868a40ef00004fc62d12c0e4264", "e0b0b8298c40102d8c5d4704d7ffd7f2300b9602", "54754cbd5011c059af8358b162ffd9ffbcb51f39", "1ecd36058e48734213c81728f42ff798a2c52833", "6f54a7933235ced5684e3bff18f7e5dc40510018", "9e94390e67fa2c44188634f6a4e8195b1eb309c8", "0a5ff7336879c99513dca6fce6ef44984ebf3f55", "4954fa180728932959997a4768411ff9136aac81", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "2e7647a07fe21c18ab5b7037de3038157338f1db", "808585a76d350dbe567c35b74086948cdd95cad4", "581b0e29991ffd8396e2d91b9c53ad483e72d9b8", "40528e881a5a896466970650a7c8d7a41b2004ff", "7a978f2902460e732c50c36a171deb11733df1fc", "d8057d514036d51051af78476468fe350cb7488a", "59ab46bfd59cb43876e701389f256b93430e6273" ], "paperAbstract": "Cloud research to date has lacked data on the characteristics of the production virtual machine (VM) workloads of large cloud providers. A thorough understanding of these characteristics can inform the providers' resource management systems, e.g. VM scheduler, power manager, server health manager. In this paper, we first introduce an extensive characterization of Microsoft Azure's VM workload, including distributions of the VMs' lifetime, deployment size, and resource consumption. We then show that certain VM behaviors are fairly consistent over multiple lifetimes, i.e. history is an accurate predictor of future behavior. Based on this observation, we next introduce Resource Central (RC), a system that collects VM telemetry, learns these behaviors offline, and provides predictions online to various resource managers via a general client-side library. As an example of RC's online use, we modify Azure's VM scheduler to leverage predictions in oversubscribing servers (with oversubscribable VM types), while retaining high VM performance. Using real VM traces, we then show that the prediction-informed schedules increase utilization and prevent physical resource exhaustion. We conclude that providers can exploit their workloads' characteristics and machine learning to improve resource management substantially.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132772", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/10/Resource-Central-SOSP17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/acaa6136ce98b086a7bcf5bed71ac34165939a08", "sources": [ "DBLP" ], "title": "Resource Central: Understanding and Predicting Workloads for Improved Resource Management in Large Cloud Platforms", "venue": "SOSP", "year": 2017 }, "accf1dc20245eefe8dc53050c40259cbb0e9de46": { "authors": [ { "ids": [ "1883004" ], "name": "Marcin Bienkowski" }, { "ids": [ "40014671" ], "name": "Jan Marcinkowski" }, { "ids": [ "2152904" ], "name": "Maciej Pacut" }, { "ids": [ "1691890" ], "name": "Stefan Schmid" }, { "ids": [ "3371612" ], "name": "Aleksandra Spyra" } ], "doi": "10.1145/3087556.3087558", "doiUrl": "https://doi.org/10.1145/3087556.3087558", "entities": [ "Algorithm", "B-tree", "Cache (computing)", "Competitive analysis (online algorithm)", "Deterministic algorithm", "Online algorithm", "Program optimization", "Routing" ], "id": "accf1dc20245eefe8dc53050c40259cbb0e9de46", "inCitations": [ "1c0224f2bf52629f4f709d26597c5c29d3dfe1ca" ], "journalName": "", "journalPages": "329-338", "journalVolume": "", "outCitations": [ "459b132d45c0a54f18c163128803d6d3cbaa8d09", "5b5f778f9b990860716e62380deb351a7bf935e1", "3f38007a82b66c89fc108b988701977083774f00", "016912c4def2f6abae3fee637aac4d178e2ae613", "6405d1d2abfb0f0e90f79f38e59d7016de543723", "59b5348572f7a6514ab47df87545029da4805010", "385e4a73df6e88109e2138079e112e6c213e3252", "48a2ab32dc400e408879300216966be78db67b13", "0deb57cc07a36cf1ea5ba0b3482bf14f2e8bb60d", "39e5ea76467fe006b7f32ac4d958a402e8be3403", "d19edb6ba6f22091393fc8010976a02a95828764", "c76e6bbab954d9e4f1a78c3564ad41eb7ff4b488", "056be77d0ed7762577154d08b3806e663db32dae", "d0eb35b57b0bf83a13e70fc41e96aacd5dd7b2eb", "141032077b5d133b70fdf895c783acfc4f9f6afe", "89d51acc9e6256deb0c4ddc009b2baf308c75c70", "574d82810dbf8603afbb3e77875405ace30c56c5", "88b1e21e1803bc134b43035b552b62be30659e39", "5d2bb138461df8cd1b6c754a294173ff0d38e98d", "729e6516a13268970cc2b21763825ab169b9b096", "3038eeaf3743b5dcd097be5711def3c61bc231b3", "23c59835af89599cc630797e0c13bb0da3ed53bf", "3dd516ae5f4d738c9211baebe7680b60f35e8488", "196eee779d8acd2b789bf4a28486af264357ac85", "f345ecb8e361107bd6c8ca9b8f712c42a102c4cd", "2104eb14b36370ae0df0ef846e3a6b2cd5e9d331", "f8bbcfb18bdbf9aefc0d712a8e476f1760929244", "73de95988381f8fd71c34e2893d1bc1773716d93", "1e989eee55fdf5ebe871ba7adcf3c484f98f368a", "0f1889762d8acb58fef61b2fa9b73a070329e4bc", "0566141713514822ac26448018a2208ae628f24f", "fc1f8838c115ed1b9bd7e8475b3d58ab40ea2e0c" ], "paperAbstract": "We initiate the study of a natural and practically relevant new variant of online caching where the to-be-cached items can have dependencies. We assume that the universe is a tree T and items are tree nodes; we require that if a node v is cached then the whole subtree T(v) rooted at v is cached as well. This theoretical problem finds an immediate application in the context of forwarding table optimization in IP routing and software-defined networks. We present an elegant online deterministic algorithm TC for this problem, and rigorously prove that its competitive ratio is O(height(T) * k_ALG/(k_ALG-k_OPT+1)), where k_ALG and k_OPT denote the cache sizes of an online and the optimal offline algorithm, respectively. The result is optimal up to a factor of O(height(T)).", "pdfUrls": [ "https://export.arxiv.org/pdf/1602.08563", "https://arxiv.org/pdf/1602.08563v1.pdf", "http://arxiv.org/abs/1602.08563", "https://arxiv.org/pdf/1602.08563v2.pdf", "http://arxiv.org/pdf/1602.08563v1.pdf", "http://doi.acm.org/10.1145/3087556.3087558", "https://www.net.t-labs.tu-berlin.de/~stefan/spaa17caching.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/accf1dc20245eefe8dc53050c40259cbb0e9de46", "sources": [ "DBLP" ], "title": "Online Tree Caching", "venue": "SPAA", "year": 2017 }, "acd2f5ba26a7e53db6c2a5f8575a8d3f3b356a6f": { "authors": [ { "ids": [ "2068030" ], "name": "Joonmoo Huh" }, { "ids": [ "1694458" ], "name": "James Tuck" } ], "doi": "10.1145/3123939.3124554", "doiUrl": "https://doi.org/10.1145/3123939.3124554", "entities": [ "Algorithm", "Automatic vectorization", "Best, worst and average case", "Central processing unit", "Compiler", "Heuristic", "LLVM", "Microprocessor", "NAS Parallel Benchmarks", "Parallel computing", "SIMD", "Speedup", "Successive linear programming", "Superword Level Parallelism" ], "id": "acd2f5ba26a7e53db6c2a5f8575a8d3f3b356a6f", "inCitations": [], "journalName": "", "journalPages": "718-729", "journalVolume": "", "outCitations": [ "9acfc2f0fe088c59ded490f685211f5509757587", "6d499b33fd52179dd92b533317e485e08b990d0e", "a207985b7828224f5a1d3fb10ba94e8e7bdd25e5", "3f50c04634e0b04a6e63eec1ec2679d98ecb8d82", "4a7bbb5718449555f63eb45a1ab2c71fd212a75c", "15bccb4ffd4f2f44fa0fae5cdbe85afc362855f6", "c9f5932a9b0155bf949d5c5c3bcc0e548cb57abf", "e113e30a8db1d3ee73f341436fdbbff542aba03b", "039449e900b28ae63ef515063e59642fe501aed7", "263286281bb576d353fecebd6023b05effc15fbf", "4d73ab0a5b410f73f2c2498327033e7bc7c05b78", "3ad8e1308849167d96355b9b1906994eb92283a3", "2194c3460ab71f3826db00b045b2ae590c753319", "77bf9e4c570759266a5d7ad4a20b8895ff264576", "5332810c642f5e9e6985525d3f8efa3c645cb161", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "ef6e38c3fed47236d90b61e36cfea98990c07bbc", "6c5462d31a0d0f4e6cb2ff7ae795250957d9fcab", "0e65407ea4bea5f92860752c3056a82b7ed58cd1", "3ab98cab824062228713b1278ad1e4f026ec346e", "4a53afac820fabe9bbe2ed5ec21f76ccd5c228d5", "0653e2ed9f683868cb4539eb8718551242834f6b" ], "paperAbstract": "Most high-performance microprocessors come equipped with general-purpose Single Instruction Multiple Data (SIMD) execution engines to enhance performance. Compilers use auto-vectorization techniques to identify vector parallelism and generate SIMD code so that applications can enjoy the performance benefits provided by SIMD units. Superword Level Parallelism (SLP), one such vectorization technique, forms vector operations by merging isomorphic instructions into a vector operation and linking many such operations into long isomorphic chains. However, effective grouping of isomorphic instructions remains a key challenge for SLP algorithms.\n In this work, we describe a new hierarchical approach for SLP. We decouple the selection of isomorphic chains and arrange them in a hierarchy of choices at the local and global levels. First, we form small local chains from a set of preferred patterns and rank them. Next, we form long global chains from the local chains using a few simple heuristics. Hierarchy allows us to balance the grouping choices of individual instructions more effectively within the context of larger local and global chains, thereby finding better opportunities for vectorization.\n We implement our algorithm in LLVM, and we compare it against prior work and the current SLP implementation in LLVM. A set of applications that benefit from vectorization are taken from the NAS Parallel Benchmarks and SPEC CPU 2006 suite to compare our approach and prior techniques. We demonstrate that our new algorithm finds better isomorphic chains. Our new approach achieves an 8.6% speedup, on average, compared to non-vectorized code and 2.5% speedup, on average, over LLVM-SLP. In the best case, the BT application has 11% fewer total dynamic instructions and achieves a 10.9% speedup over LLVM-SLP.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124554" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/acd2f5ba26a7e53db6c2a5f8575a8d3f3b356a6f", "sources": [ "DBLP" ], "title": "Improving the effectiveness of searching for isomorphic chains in superword level parallelism", "venue": "MICRO", "year": 2017 }, "ad4f5c7bf59d630b3366aeaeda19e365999f8cf5": { "authors": [ { "ids": [ "1691935" ], "name": "Rami Sheikh" }, { "ids": [ "3100338" ], "name": "Harold W. Cain" }, { "ids": [ "2676797" ], "name": "Raguram Damodaran" } ], "doi": "10.1145/3123939.3123951", "doiUrl": "https://doi.org/10.1145/3123939.3123951", "entities": [ "Branch predictor", "CPU cache", "Data dependency", "Instruction-level parallelism", "Memory address", "Microsoft Windows", "Parallel computing", "Password Authentication Protocol", "Requirement", "Value (ethics)" ], "id": "ad4f5c7bf59d630b3366aeaeda19e365999f8cf5", "inCitations": [], "journalName": "", "journalPages": "423-435", "journalVolume": "", "outCitations": [ "4f105edc6d373f41b998871962189ab9b2adb601", "3f47b134f541253daf2b82dd6677b1c6980715bc", "00062dfaf2b2c72d623a5f5b726c82db5f5b5a70", "caba1c8333b45910a237ea3843d5d8cf7836e5b4", "02ae896b9c2037476424bcdbdd6b9753b931550f", "963b12d9620f9af1b9bd97a7164dc899ce8af6ce", "0f1509bfcf6130e932c8cb338ecd12f9b8490e56", "1ba080e79f78f16f4843895fe5faefa482d2d7b0", "c93c0038834f40d1ac776ca985daa610c25068e7", "1ea33a0ba2ded13492a4afa6817f953eede0e037", "196cb273f787d5684ad968b49003ae0e9df56e4a", "2697a32e94a4e1a0e375c1360371708576171549", "0258bcc9277250fe98e6eb9a0dbd7b7595250158", "1ab9ca1e2950c03b9e14abf4a0d7c0ce7486d260", "3a95620ade477b089ae392395a0d19653e822c38", "2fc7fc0f89451c78b250212ecf9ef5b0b133d400", "f31c2dfb0ab1a5843de8df6b340c19f50b76a49d", "2685b8937b2c76bc2bc67a8b7c925682e6224811", "33fe4a236c886461fe141e90904ed8f041c0f9c3", "370488843f80120797e1f0af22e9fdb0152ff657", "180189c3e8b0f783a8df6a1887a94a5e3f82148b", "1f2c1195440a3c1c57fca2556da2cd79a49324de", "542d7ddb6a2efa4a9a55c63bdc1e5fbae129df56", "3082d9ff0a7356b7414a5c6f0521e43dbcb9b2f8", "487bf98f99d95a69d525b13236f1fbb61f4d3dd0", "81710e200ffa25c9bca41cbd35fdd6992ce79b6c", "0f8895e31d6bd3a40d9429346e440b0c1d073ba1", "b7c9efd81382765b3cc82ec756e86ccbb5f39d1e" ], "paperAbstract": "Current flagship processors excel at extracting instruction-level-parallelism (ILP) by forming large instruction windows. Even then, extracting ILP is inherently limited by true data dependencies. Value prediction was proposed to address this limitation. Many challenges face value prediction, in this work we focus on two of them. Challenge #1: store instructions change the values in memory, rendering the values in the value predictor stale, and resulting in value mispredictions and a retraining penalty. Challenge #2: value mispredictions trigger costly pipeline flushes. To minimize the number of pipeline flushes, value predictors employ stringent, yet necessary, high confidence requirements to guarantee high prediction accuracy. Such requirements can negatively impact training time and coverage.\n In this work, we propose Decoupled Load Value Prediction (DLVP), a technique that targets the value prediction challenges for load instructions. DLVP mitigates the stale state caused by stores by replacing value prediction with memory address prediction. Then, it opportunistically probes the data cache to retrieve the value(s) corresponding to the predicted address(es) early enough so value prediction can take place. Since the values captured in the data cache mirror the current program data (except for in-flight stores), this addresses the first challenge. Regarding the second challenge, DLVP reduces pipeline flushes by using a new context-based address prediction scheme that leverages load-path history to deliver high address prediction accuracy (over 99%) with relaxed confidence requirements. We call this address prediction scheme Path-based Address Prediction (PAP). With a modest 8KB prediction table, DLVP improves performance by up to 71%, and 4.8% on average, without increasing the core energy consumption.", "pdfUrls": [ "http://www4.ncsu.edu/~rmalshei/i/micro2017.pdf", "http://doi.acm.org/10.1145/3123939.3123951" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ad4f5c7bf59d630b3366aeaeda19e365999f8cf5", "sources": [ "DBLP" ], "title": "Load value prediction via path-based address prediction: avoiding mispredictions due to conflicting stores", "venue": "MICRO", "year": 2017 }, "ad806d9c69ab834d814865958cd3ded4df4f12f9": { "authors": [ { "ids": [ "2834533" ], "name": "Alex Horn" }, { "ids": [ "4314485" ], "name": "Ali Kheradmand" }, { "ids": [ "5755087" ], "name": "Mukul R. Prasad" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Amortized analysis", "Border Gateway Protocol", "Classless Inter-Domain Routing", "Datalog", "Experiment", "Forwarding plane", "Insertion sort", "Reachability", "Real-time data", "Software-defined networking" ], "id": "ad806d9c69ab834d814865958cd3ded4df4f12f9", "inCitations": [ "48b486667e0b99dbe874bde113b0520d5168c17d", "c60960f93f657205b0f6247e00c79c97203e9a51", "3efbf0223dc54fa48bcfd9652333911c4d796ecc", "0e0f7fa2de3f757a51e747399d93c570249e72ac", "b74d2874646d36b36eee5c836adf6b29d9173425" ], "journalName": "", "journalPages": "735-749", "journalVolume": "", "outCitations": [ "1f0ea586a80833ee7b27ada93cc751449c4a3cdf", "20f63c31f73ce4b110acd21554824a40d62516aa", "119af470b90b725c847c4b1fd25aea9a6c5b2b57", "5692a5398e92ec43703145d512eef4d06a2a2fc8", "0e74750ccdd195fa55369a68c7cc0e354ffee9d8", "ab435079fd1e6b39a71cc4ec682494afec1ffd32", "b5aea92d2d5d5037572745be233ae581b4ce0cc5", "37179bfc4836890a32950ea2fb74795823284362", "7af6489d51eb9d55a0298b778bb822c7f75e9936", "1396d46b92058a1834757f208058014be8e39219", "2827d635444c744217d3f292b6ed0d750bc7c6fa", "60cdcebff3ea7032443ce2de76b37dc1512682cc", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "29620a4f3b46dc7bbc70d3cde868b0c687d46ed7", "4004e51f8f6bb775bd394942007f761d42fdaaad", "d65504b3b0e66ed41cf98f381c6071748b65b340", "24a963758371e511e3749c865b14f697358f025c", "121180b8c56026deb122eb738547944417239c66", "9c6d7d0537d2011fd61cc9fd8cda5a21c939b52a", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "0355a7b4c66e42b73fa3d0d7198ce68b2dbcd5be", "0d7ecd90182b2d28433209897650427c5de10bb6", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "0ea5ac1eb04bcf16a8856d886be45ec90044a4c3", "5c627f0a893f19012dc9183dddd2925219cb02a5", "0385a0c8b707d70bef33bb308d321b2647da0ca3", "28ed63405cc70fbcef04b04fd3e61fd7b23b59bc", "08a572c06bdaa78d85a287111832d188e8e07f0b", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "36f396b52f93fa52742ce5052a40c1c90ea726e3", "2f032f3faecd3bdacfa34db26c49c0688d818af8", "07ca726af9c235573654b85e8d478bd7303aa62f", "73966d417bdfe0fd2f1bfd82e7dddf51ccbda961", "8dd84ffc12f27531d0a5ff2aba1ec326139d194a", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "1164ec0b8d2bd8e95b9fc07e9669ff9d4d379c64", "04b319357d6bab89ec9575f4b044d7609aa4296a", "61797d1af87a27a89b5bb5fd4a03de061e5dc0e3", "507b5fe36714eb6aa8acd96d1eef14212eddb82b", "fca51a1d1275524ebb39037edb3b79c830a9faf3", "2f976aa22e08e4233c8d1dd82343bfd3a124d9ac", "156d17c97a2955f0fcd5b3360129bb23f2ed9538", "19114b7a2f5243a47e80590cc11a2d8ec5b96308" ], "paperAbstract": "Real-time network verification promises to automatically detect violations of network-wide reachability invariants on the data plane. To be useful in practice, these violations need to be detected in the order of milliseconds, without raising false alarms. To date, most real-time data plane checkers address this problem by exploiting at least one of the following two observations: (i) only small parts of the network tend to be affected by typical changes to the data plane, and (ii) many different packets tend to share the same forwarding behaviour in the entire network. This paper shows how to effectively exploit a third characteristic of the problem, namely: similarity among forwarding behaviour of packets through parts of the network, rather than its entirety. We propose the first provably amortized quasi-linear algorithm to do so. We implement our algorithm in a new real-time data plane checker, Delta-net. Our experiments with SDN-IP, a globally deployed ONOS software-defined networking application, and several hundred million IP prefix rules generated using topologies and BGP updates from realworld deployed networks, show that Delta-net checks a rule insertion or removal in approximately 40 microseconds on average, a more than 10\u00d7 improvement over the state-of-the-art. We also show that Delta-net eliminates an inherent bottleneck in the state-of-the-art that restricts its use in answering Datalog-style \u201cwhat if\u201d queries.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/horn-alex", "https://arxiv.org/pdf/1702.07375v1.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-horn-alex.pdf", "http://arxiv.org/abs/1702.07375", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-horn-alex.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ad80/6d9c69ab834d814865958cd3ded4df4f12f9.pdf", "s2Url": "https://semanticscholar.org/paper/ad806d9c69ab834d814865958cd3ded4df4f12f9", "sources": [ "DBLP" ], "title": "Delta-net: Real-time Network Verification Using Atoms", "venue": "NSDI", "year": 2017 }, "ad9b28f8c1c693b8531ff33524450f207530b6d6": { "authors": [ { "ids": [ "8807566" ], "name": "Mustafa Kemal Tas" }, { "ids": [ "2793301" ], "name": "Kamer Kaya" }, { "ids": [ "1728260" ], "name": "Erik Saule" } ], "doi": "10.1109/ICPP.2017.59", "doiUrl": "https://doi.org/10.1109/ICPP.2017.59", "entities": [ "Algorithm", "Computational science", "Data point", "Graph coloring", "Heuristic", "Manycore processor", "Mathematical optimization", "Multi-core processor", "Non-blocking algorithm", "Numerical analysis", "Parallel algorithm", "Parallel computing", "Program optimization", "Real life", "Shared memory", "Thread (computing)" ], "id": "ad9b28f8c1c693b8531ff33524450f207530b6d6", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "503-512", "journalVolume": "", "outCitations": [ "7f47c4a00c0cbbfc6899141d745f9a8b7dcbcfcf", "e68a9314b92e644327c35ea2a7143b91466a553c", "6c2a93d49e4082ab4b5ff4e8c67554654f79468c", "203a7b17267a28a06808bfb3b0b9571e32d15503", "fbf8150f45d4358926e40f7005a2f493f39803ae", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "5da16f56eb095530a44446fbd156af4c85235527", "35a458f3bc6c54bf24436a51fefee75d0aeb01b5", "0c88eb0d571a91d5c6675b4f30330cf8fb575382", "ae4d65769b6551a51d6fc6be2f021515bffa0798", "adc87f84e2f7e49ec15bc5afd1faaa9b552b9d70", "6dab20f55bf33ed2aec142c59080243e136101e7", "71ea8afad9c502632aa0ac98ec62a99f6050429d", "88795e75f1504e81a32ab8daf236495e76f01f6f", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "9b49c5be16de40b36e2a1e324440ab68bb802f0c", "134fb9229407b0941e716fdd22a2f96b978c5bb6", "2b837611dd905c3726111bd7877334a8eb5a935f", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "1c830ff14342dbefb0fe0fbcb939ea6190a68780", "1d001e3dee27b846f712d345dc072b4f15d2c527", "9f609c9c7cd3e8fbea58311a7a6365b6b7a2f386", "d606df54382b7afe5e2e3fa50e9122795b6d04cb" ], "paperAbstract": "In parallel computing, a valid graph coloring yields a lock-free processing of the colored tasks, data points, etc., without expensive synchronization mechanisms. However, coloring is not free and the overhead can be significant. In particular, for the bipartite-graph partial coloring (BGPC) and distance-2 graph coloring (D2GC) problems, which have various use-cases within the scientific computing and numerical optimization domains, the coloring overhead can be in the order of minutes with a single thread for many real-life graphs.In this work, we propose parallel algorithms for bipartite-graph partial coloring on shared-memory architectures. Compared to the existing shared-memory BGPC algorithms, the proposed ones employ greedier and more optimistic techniques that yield a better parallel coloring performance. In particular, on 16 cores, the proposed algorithms are more than 4x faster than their counterparts in the ColPack library which is, to the best of our knowledge, the only publicly-available coloring library for multicore architectures. In addition to BGPC, the proposed techniques are employed to devise parallel distance-2 graph coloring algorithms and similar performance improvements have been observed. Finally, we propose two costless balancing heuristics for BGPC that can reduce the skewness and imbalance on the cardinality of color sets (almost) for free. The heuristics can also be used for the D2GC problem and in general, they will probably yield a better color-based parallelization performance especially on many-core architectures.", "pdfUrls": [ "https://webpages.uncc.edu/~esaule/public-website/papers/icpp17-TKS.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.59" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ad9b28f8c1c693b8531ff33524450f207530b6d6", "sources": [ "DBLP" ], "title": "Greed Is Good: Parallel Algorithms for Bipartite-Graph Partial Coloring on Multicore Architectures", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "adbe2e7f4c58d978cba63554def44f8f6e7d3b17": { "authors": [ { "ids": [ "1890055" ], "name": "Jiwoong Park" }, { "ids": [ "11018159" ], "name": "Cheolgi Min" }, { "ids": [ "1696005" ], "name": "Heon Young Yeom" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "CPU cache", "Cache (computing)", "Hit (Internet)", "IBM System i", "In-memory database", "InnoDB", "Kernel (operating system)", "Key-value database", "Linux", "MySQL", "Online transaction processing", "User space" ], "id": "adbe2e7f4c58d978cba63554def44f8f6e7d3b17", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "649-658", "journalVolume": "", "outCitations": [ "2ee01ab9aca4163d391bd29c2123d9be44b0e986", "27bcb72519d77192da2b30eca4e1442c8f3637b1", "08fa5e5935f8108340f1583845292cfa9401ccc1", "540de0c91d7d834a41832eca28f0c5a7fd3885e4", "2da760f90c3d2bf6598becdde9063093f488548c", "43f1a17266420ec48edbacd0101dc607bf644ce2", "0bcea4e03620eb323d990119a9d35c45a278023f", "144b1ba05cb64b493858b665cc38374f3ef7e332", "f02ae995020a9bf4d9ca251d0f35a9e9eb09cc8d" ], "paperAbstract": "A large number of cloud datastores have been developed to handle the cloud OLTP workload. Double caching problem where the same data resides both at the user buffer and the kernel buffer has been identified as one of the problems and has been largely solved by using direct I/O mode to bypass the kernel buffer. However, maintaining the caching layer only in user-level has the disadvantage that the user process may monopolize memory resources and that it is difficult to fully utilize the system memory due to the risks of the forced termination of the process or the unpredictable performance degradation in case of memory pressure. In this paper, we propose a new I/O mode, DBIO, to efficiently exploit OS kernel buffer as a victim cache for user-level file content cache, enjoying the strengths of kernel-level cache rather than just skipping it. DBIO provides the new file read/write function calls, which enable user programs to dynamically choose the right I/O behavior based on their context when issuing I/Os instead of when opening the file. On the cloud key-value store workloads and the traditional OLTP workloads with the modified version of MySQL/InnoDB, DBIO improves the in-memory cache hit ratio and the transaction performance compared to both buffered and direct I/O mode, fully utilizing the user buffer and the kernel buffer without double caching.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101202" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/adbe2e7f4c58d978cba63554def44f8f6e7d3b17", "sources": [ "DBLP" ], "title": "A New File System I/O Mode for Efficient User-Level Caching", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "add350d0c5605c98d285b87493fc77c1d68281df": { "authors": [ { "ids": [ "2193705" ], "name": "Dibakar Gope" }, { "ids": [ "7216768" ], "name": "David J. Schlais" }, { "ids": [ "1704076" ], "name": "Mikko H. Lipasti" } ], "doi": "10.1145/3079856.3080234", "doiUrl": "https://doi.org/10.1145/3079856.3080234", "entities": [ "Dynamic web page", "Function-level programming", "Graphic art software", "Hardware acceleration", "Hash table", "HipHop for PHP", "Just-in-time compilation", "Memory management", "Microarchitecture", "PHP", "Regular expression", "Run time (program lifecycle phase)", "Scripting language", "Server (computing)", "Server-side", "Server-side scripting", "String (computer science)", "Web content" ], "id": "add350d0c5605c98d285b87493fc77c1d68281df", "inCitations": [ "0343ae9ab99d0cbd719baf0d2cc1b82425f3664a" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "507-520", "journalVolume": "", "outCitations": [ "0f35cad96cebf9590c168caf4baa2103af38934d", "cc05fdb70b630138dd9b64a901eec9c36146c371", "29f2a4b0e812c459ad2f9ed3781067cb83a1f7c9", "0335d8a77f4c93b7972b694c9c828301dd193911", "16a0bbb27f7cbd8647a7811eb003cf772a1d8d5f", "6b2866fe86c31a6ce23446c6daeafb09a5ba0243", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "2020e67ccc611ee8b79c300b2b71dec0632cb164", "056255e00e0319bf6320d5cc7a2038162f4e861f", "07f62af22fab75b1b8dcc7a5ef45923322e50b57", "3bc180e00cb21933223785f70abc5509852dfa00", "22c9661e8ba4ca1312bdc794e92cf97c9870f472", "df6736a35ec677c8ee9d6d2863bd34f365ca3a41", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "8c9a529502244ebbd7d734053acc1c5b4719ca57", "5284651b0025be9de74064ba52b9e245aa022d62", "0904bcfc6bac02fccaaae307cfec2c7c30d5b620", "6068cdb7a834f47a38ffcbfa4128e70ed3e385eb", "c630a97772d9b5d2380d409b2ef2c28dced5e392", "5fda732aae5f0d845c8ff2e72f144f3d69e362d9", "a3f3d0f41d0f914f0a7edaccb3d80cc69388cb59", "342a1b8bfdc86461ae9048cc43539198aabbe853", "eb812b50012c1eb368b488f0e644453ef1db0e21", "2bb1d6a5ece860bd9ec8c19c81ba171cada60d3e", "0d882911151738be130a2f63e3d3e81f543b3bd9", "bc42584c1d74f96d2e03dfcc487af642527a62fe", "9c6ee3cb97fac0108731a93472b81dfd3d2f2520", "3fd85d5f5217b7df40e8fd6a8ef7d285fc4bb7e8", "2f4002755b309cdb91e18116b8028005497d8400", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "33fe4a236c886461fe141e90904ed8f041c0f9c3", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "18bd7fc45fb824e7efa0b062e1e8f58c3d64cd02", "6f48f470bf45fa4401ee0accef1931d6e492b730", "352a8957005dc5519b15ed1870751ec494d66395", "01543ab5a761b28b12d9ddf856d8603aeda2b0d7", "4c372083685ab8b8cfc6ff984310c6d078580897", "62f0aaa7146794f52d10a71c4ef28d64f7c77670", "060fb58c595197a4acc345961ef3cb3f772eee49", "6f090d59bde17b7604985acf38e26785e794bcc0", "48536fdbbc79ddf163901c7e63bb70b6f64802e0", "25977aeebe5714a9e727218a0c71d05144cb8eba", "82cbd7160140333445e1be04bcee570627a37560", "e69e30d7df7bb0b91b44c78e0906fd143f2808ac", "0789af812af3aebac3853dc2745f3847d503fa02", "5e3f8c2ba2fb225c29ba343565d52b9661e7198e", "6abd933f15bb853aebd129e313474865b53a5fed", "3e1e30975f56ed863f8819115ba25d7142b2c793", "99e4d7f26140f2b31b440882e1684600a62b042c", "0d281938d3ff2377541704cab6ba1c4408420733", "03b2e534532e9558e560df0bed74976b8f48c1a5", "dd5f64f4b0a83ce5655d64f5955c7deec91073c8" ], "paperAbstract": "PHP is the dominant server-side scripting language used to implement dynamic web content. Just-in-time compilation, as implemented in Facebook's state-of-the-art HipHopVM, helps mitigate the poor performance of PHP, but substantial overheads remain, especially for realistic, large-scale PHP applications. This paper analyzes such applications and shows that there is little opportunity for conventional microarchitectural enhancements. Furthermore, prior approaches for function-level hardware acceleration present many challenges due to the extremely flat distribution of execution time across a large number of functions in these complex applications. In-depth analysis reveals a more promising alternative: targeted acceleration of four fine-grained PHP activities: hash table accesses, heap management, string manipulation, and regular expression handling. We highlight a set of guiding principles and then propose and evaluate inexpensive hardware accelerators for these activities that accrue substantial performance and energy gains across dozens of functions. Our results reflect an average 17.93% improvement in performance and 21.01% reduction in energy while executing these complex PHP workloads on a state-of-the-art software and hardware platform.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080234", "http://pharm.ece.wisc.edu/papers/isca17_dgope.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/add350d0c5605c98d285b87493fc77c1d68281df", "sources": [ "DBLP" ], "title": "Architectural support for server-side PHP processing", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "adeb9da8d810cc3a7ef7887857a8ec8d96e20cdd": { "authors": [ { "ids": [ "1703465" ], "name": "Yuan Zhang" }, { "ids": [ "8862299" ], "name": "Tianshu Lyu" }, { "ids": [ "36124320" ], "name": "Yan Zhang" } ], "doi": "10.1145/3077136.3080784", "doiUrl": "https://doi.org/10.1145/3077136.3080784", "entities": [ "Algorithm", "Experiment", "Gibbs sampling", "Information system", "Interaction", "Personalization", "Sampling (signal processing)", "Social Networks", "Social network", "Social search" ], "id": "adeb9da8d810cc3a7ef7887857a8ec8d96e20cdd", "inCitations": [], "journalName": "", "journalPages": "753-762", "journalVolume": "", "outCitations": [ "6cad4d9529e6a6a8a27e41af1860e8cf96848338", "2cf379819632deb93b2cd9250da25bf21fa25171", "1339b888fb739b8a1194fa94fbe848e01d93456a", "1491084572f77d15cb1367395a4ab8bb8b3cbe1a", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "adc003765629b9b554db53163a9ccaaf0d9e47f4", "0a8221f744e8ba86d69a07295b35b05531523e49", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "3e5755b6d4e1b21f2a90544c43c2f57d953cdfc3", "5c6005ea3753b8a5209b402e2ba1e64f466141c5", "226569e5064221563774fb78422cc10ba2b4611d", "8c03ea8dfe9f2aa5f806d3ce4a7f83671a5db3f5", "9610cc619682585aa05902656f2bf35b03a04f92", "a151fcaa3d003321d6e09602a927fc434d19b032", "10d3c0a7dd22780562bd76ef9bf4278ca3d4b0b5", "62bb7ce6ae6ed38f0ae4d304d56e8edfba1870d0", "21968ae000669eb4cf03718a0d97e23a6bf75926", "0fa69f27691baf2d635c3b3c579325fcd32b6daf", "6250d0b4821f51ca8aa08730eabf1caad8402e48", "3e8f926ef29c50196619b4019588738d640a92f9", "5baadbbb240bd3da9b4cb3c283f1fa3622bff005", "58a63086b209374d5cf625d27617eba1e96288ef", "3bddba3214fe9e7248a934455a2624aa5b781778", "bab268857baf555ea6d2c1f638857e28b4fd5aa5", "254721f4be7cc778231ad3437f856f30ea643cba", "063ac7f23c65bd97633d4f6bf4c31eb70879aa7c", "00bf724dbf055f3559eb64273000f448f5342fa1", "15973b0bfdbe84d1cb6e2a35fc857a1d125a3923", "167da2ad2b01a43566b21a4715d1315ff7ac45ef", "17ad7d385564833f682db11240ea3d74a5423256", "3cc69c7926bfef4662a8ce406cf8029346441838", "b527f54dc237721af64b3d952ac1798b358d3bb5" ], "paperAbstract": "Recently, online social networks are becoming increasingly popular platforms for social interactions. Understanding how information propagates in such networks is important for personalization and recommendation in social search.\n In this paper, we propose a Hierarchical Community-level Information Diffusion (HCID) model to capture the information diffusion process in social networks. We introduce the notion of users' topic popularity as to enable our model to depict the information diffusion process which is both topic-aware (which topic the information is concerned with) and source-aware (where the information comes from). Instead of assuming homogeneity of social communities, we propose the notion of community hierarchy, where information diffusion across inter-level communities is uni-directional from the higher levels to the lower ones.\n We design a Gibbs sampling algorithm to infer model parameters and propose prediction methods for two information diffusion prediction tasks, the retweet prediction and the cascade prediction. Comparison experiments are conducted on two real datasets. Results show that our model achieves substantial improvement compared with the existing work.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080784" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/adeb9da8d810cc3a7ef7887857a8ec8d96e20cdd", "sources": [ "DBLP" ], "title": "Hierarchical Community-Level Information Diffusion Modeling in Social Networks", "venue": "SIGIR", "year": 2017 }, "adebb94c3e62a47297bbf685e103cb89b7580b46": { "authors": [ { "ids": [ "2652547" ], "name": "Gaurav Saxena" }, { "ids": [ "1742197" ], "name": "Peter K. Jimack" }, { "ids": [ "33230769" ], "name": "Mark A. Walkley" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.48", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.48", "entities": [ "Adaptive Multi-Rate audio codec", "Adaptive mesh refinement", "Cubic function", "Discretization", "High Productivity Computing Systems", "Load balancing (computing)", "Message passing", "Numerical analysis", "Open-source software" ], "id": "adebb94c3e62a47297bbf685e103cb89b7580b46", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "364-371", "journalVolume": "", "outCitations": [ "186cb16fbcd8506091ae0b1bae4025caa7900dd4", "0deccedc40a40fbd2c395f2d6cbee0abd81ebc0c", "77eee519041bcb7281d8bc544ae69b8af386b6cc", "88a32f0546fccb673225fd2fcc4d9918e7a42298", "6251f8602db7d6263b4e1805461caf68ee859669", "140db6d24179f6d6efa590e23465464d18ccc7a1", "190023789a1cfe4317011c0fb28277a0d1626ba4", "c156cfedd88a18734131492f54d2b7ddc0adeafb" ], "paperAbstract": "In prior-research the authors have demonstrated that, for stencil-based numerical solvers for Partial Differential Equations (PDEs), the parallel performance can be significantly improved by selecting sub-domains that are not cubic in shape (Saxena et. al., HPCS 2016, pp. 875-885). This is achieved through accounting for cache utilization in both the message passing and the computational kernel, where it is demonstrated that the optimal domain decompositions not only depend on the communication and load balance but also on the cache-misses, amongst other factors. In this work we demonstrate that those conclusions may also be extended to more advanced numerical discretizations, based upon Adaptive Mesh Refinement (AMR). In particular, we show that when basing our AMR strategy on the local refinement of patches of the mesh, the optimal patch shape is not typically cubic. We provide specific examples, with accompanying explanation, to show that communication minimizing strategies are not necessarily the best choice when applying AMR in parallel. All numerical tests undertaken in this work are based upon the open source BoxLib library.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.48" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/adebb94c3e62a47297bbf685e103cb89b7580b46", "sources": [ "DBLP" ], "title": "A Cache-Aware Approach to Adaptive Mesh Refinement in Parallel Stencil-Based Solvers", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "ae02715ffec920ec94913ec655f9166cc1f1c644": { "authors": [ { "ids": [ "2042672" ], "name": "Ruichuan Chen" }, { "ids": [ "2282441" ], "name": "Istemi Ekin Akkus" }, { "ids": [ "34824488" ], "name": "Bimal Viswanath" }, { "ids": [ "2086940" ], "name": "Ivica Rimac" }, { "ids": [ "2809994" ], "name": "Volker Hilt" } ], "doi": "10.1145/3143361.3143388", "doiUrl": "https://doi.org/10.1145/3143361.3143388", "entities": [ "Cloud computing", "Failure rate", "Software as a service", "Software deployment" ], "id": "ae02715ffec920ec94913ec655f9166cc1f1c644", "inCitations": [], "journalName": "", "journalPages": "464-477", "journalVolume": "", "outCitations": [ "e70ca4cd5560a2e81795564bde278f6334b16de6", "040980f7892c9b562a3847cb97f0808858665070", "058f6752d85a517aae298586fdf117acdd7560ea", "55edb24987e86cf48eab59a4b9de814b73b8931c", "98d36cb98573543117e4748516aa85ef5a3c1093", "5dc26364e8c64868a40ef00004fc62d12c0e4264", "c62e795562e019330ea9dd8c67b4cec6de98e194", "53aabc0ab7bdb22c4bb5b508a4db2fc4a2387060", "7ae26da9b7666812857883536870c315538f7f10", "70182c452b98ad726bb19a53cddbb73687841a40", "41153ea83e44cc920dff0169b4d42664d2a5bc9d", "1e4da813c29a65f19f6e9432cb4efe8b7d45ac1d", "045a50ec31973fee15ff967f18e016fae77fd1f3", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "1e2b8872ebf9a06265015757002d1bedec19e16e", "2a4423b10725e54ad72f4f1fcf77db5bc835f0a6", "05a436f059c3897c3509dc059903364eff4a79af", "068f7e719b394f5b098fd29f1ad190c8f65bc3ee", "1d912b67ba7cda4d341d834c1c6de96db01888fc", "2d20da962fe29ad45ee2d4dbc71c1e114fddf3dc", "47d5357957cabb610131db1b228e58b70860ee8d", "330de12472ed98642e1ed28944ff94d3d6eee8de", "4423433d2364b9d112f7ede56a9a6df571bc2a8f", "91f580f4ab65ed1b7ad6bd97af2bd67d43e7678b", "06faf0e1f1b6c7bf786cdeb474e3dc6a3f5435c3", "2bd0c2267e6722b00417578d0ee38a14dc12cae9", "2bce718b77e8ff9c733f3b03b78a8a0246a864a7", "1ee3e65a3e5cb1b814a39258aa0f7cb60a51f955", "811d08c2b2620b6f87e774fdd73cd708f4cb20c5", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "0cf1bc4a4d8362b5a675df3fd32c9ba450935a7a", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "086820e40dc8046c30a8751394df167bec047fe1", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "0ec58ad7dffcc53018a786c069cb604ef1be5aae", "1b25e0b915ecb646101bc9464476591dfbe07ff5", "58f692e9b03cb973355aab46bb6f867239aeb513", "73a6eb2ae5e9aa37babb95748c4d8ecee7efaf22", "53abb9ca99f1c9e8038dcc0bfe4ccdf770a55db3", "46eba995c5371d7966d59549f61c203cecd1d3c7", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "8a7536f311d22bd588c5bc2306d54d13effaee82", "663e064469ad91e6bda345d216504b4c868f537b", "d6571d5908f567a1dcf11bcc221e8213cfba1d3d", "05bd2c6e82a96e7bb3d7d7262f953fc53ead3d1a", "640af017aa8d11f9f31480155c8d5d1a0d8865d7", "25ac76af7c179a73c1280056f48d8afa59fe9802", "559e4671b87c3f76d3c485ebdaefe734323879f0", "e3c561049eb532e328fc2b8288c490986cd9403f", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "164d8d8238674cdfb9bbb2583cfc390e178420de", "235da9c0f828b60300f7e5cfa2ca6aaa116dd14c", "114dc57340496512f2e026b8ecdf7c746af21025", "ab1f0b1d1b21771a1a639d4702440331eb421a64", "d1ef3f5c15f978c1e8466d6e2a275bc9d5701b03", "7b1fbcd0d09b381ad9433a85587c373f4f1b77cf", "3e8ee62447587d3a875dc2f77a6df4dfda9be2de", "09f65af23ab2cf7b212de8ebd5105a9b9c3f7a06", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "0541d5338adc48276b3b8cd3a141d799e2d40150", "1f8699f5d99a0fa362bbc9e5071ac3cbbaf8e4da" ], "paperAbstract": "A common practice to increase the reliability of a cloud application is to deploy redundant instances. Unfortunately such redundancy efforts can be undermined if the application's instances share common dependencies. This paper presents ReCloud, a novel system that can efficiently find a reliable deployment plan for cloud applications. ReCloud considers and avoids common dependencies shared across application instances that may lead to correlated failures, and works with applications that even have complex internal structures. ReCloud utilizes various pieces of available dependency information (e.g., hardware, software and/or network dependencies) about the cloud infrastructure to quantitatively assess the reliability of the application's deployment plan with rigorous error bounds. This assessment further enables ReCloud to find a deployment plan that balances between reliability and other criteria such as application performance and resource utilization. We implemented a fully functional system. The experimental results show that, even in a large cloud environment with more than 27K hosts, ReCloud needs only 30 seconds to find a deployment plan that is one order of magnitude more reliable than the common practice.", "pdfUrls": [ "https://iakkus.github.io/papers/2017-conext-chen.pdf", "http://doi.acm.org/10.1145/3143361.3143388" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ae02715ffec920ec94913ec655f9166cc1f1c644", "sources": [ "DBLP" ], "title": "Towards Reliable Application Deployment in the Cloud", "venue": "CoNEXT", "year": 2017 }, "ae39cff83d4850476855c06d02a8dc80ae55ad42": { "authors": [ { "ids": [ "10041544" ], "name": "Jagadish B. Kotra" }, { "ids": [ "2813780" ], "name": "Seongbeom Kim" }, { "ids": [ "2421239" ], "name": "Kamesh Madduri" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" } ], "doi": "10.1109/IISWC.2017.8167772", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167772", "entities": [ "CAS latency", "Desktop computer", "Haswell (microarchitecture)", "Hypervisor", "Internet bottleneck", "Memory management", "Nehalem (microarchitecture)", "Network congestion", "Non-uniform memory access", "Semiconductor consolidation", "Server (computing)", "Software system", "Uniform memory access", "Westmere (microarchitecture)" ], "id": "ae39cff83d4850476855c06d02a8dc80ae55ad42", "inCitations": [ "ce14ff3b9a139629e699882ca26434a29b5c07b3" ], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "146-155", "journalVolume": "", "outCitations": [ "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "4cb9325b008e1551404c526d5ba0b7b3b559f4ab", "105a4b77923df05b1514f700ea6f37f80b72c831", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "f69cf820714d69406bba646ca3e9ace7c444da0e", "53e11fc15261cc5e3a47bfda9eeb4c3355053b6d", "59ca42e1911be417863d0f7068b89e1e59189cc9", "0eff3eb68ae892012f0d478444f8bb6f50361be5", "109f26c285d48ba8f7b5e259364fecef0b3273f6", "146139716c9e8ec4f57475b9673171761ac34074", "8fb808a890a099896e34851179daba15659df11a", "4ebbbeab6e0f4ba9815889854441548fa414e16b", "549cca620961e5093e315a4b0f9e670da3ff258f", "18cc8c1902a5c0ae35c75d9cc647c04a679e520d", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633", "a725204b6d9981f818a88b68ac7498a6261f7dea", "3efa068494a91a825b9744c1ee4b83663f363533", "274d7d0415ad8fc787f15b244339f8d0b37e6956", "2adcff217adf82fb5dd43d97e88503e4893f07c7" ], "paperAbstract": "He VMware ESXi hypervisor attracts a wide range of customers and is deployed in domains ranging from desktop computing to server computing. While the software systems are increasingly moving towards consolidation, hardware has already transitioned into multi-socket Non-Uniform Memory Access (NUMA)-based systems. The marriage of increasing consolidation and the multi-socket based systems warrants low-overhead, simple and practical mechanisms to detect and address performance bottlenecks, without causing additional contention for shared resources such as performance counters. In this paper, we propose a simple, practical and highly accurate, dynamic memory latency probing mechanism to detect memory congestion in a NUMA system. Using these dynamic probed latencies, we propose congestion-aware memory allocation, congestion-aware memory migration, and a combination of these two techniques. These proposals, evaluated on Intel Westmere (8 nodes) and Intel Haswell (2 nodes) using various workloads, improve the overall performance on an average by 7.2% and 9.5% respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167772" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ae39cff83d4850476855c06d02a8dc80ae55ad42", "sources": [ "DBLP" ], "title": "Congestion-aware memory management on NUMA platforms: A VMware ESXi case study", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "ae6a5bc5286b91f77b5c608a507d1067043870fe": { "authors": [ { "ids": [ "1896753" ], "name": "Marco Brocanelli" }, { "ids": [ "1690476" ], "name": "Xiaorui Wang" } ], "doi": "10.1109/ICAC.2017.15", "doiUrl": "https://doi.org/10.1109/ICAC.2017.15", "entities": [ "Daemon", "Daemon (computing)", "Digital footprint", "Feature phone", "Low-power broadcasting", "Microcontroller", "Mobile app", "Multi-core processor", "Smartphone", "User interface" ], "id": "ae6a5bc5286b91f77b5c608a507d1067043870fe", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "93-102", "journalVolume": "", "outCitations": [ "14859760785dc78f575a2023c97bbb9b83ee6467", "06ea03222b06bb86a75c41f6ce6d4fe00d533d42", "db2f7427b166fe9ac3e07eade5541b48426a842d", "e5e6f88a46c956fb76b6ac8c8363c9262db38bb8", "5892b9314971e90e32d8bf81ca4e7dcbecb5ef8f", "20d0b7473429464fc2f9bfd59d513d63c844551c", "120ae5f165e779b4a1c9921ab76e3a462404b8d1", "0f42b4dc664eb31df423c3de3a2cecf9c6ac83a8", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "233cf6ae7b91b7dd11a6bd44c3079b8256905396", "87b99f3e8632e5915717d184bc8309ac9debfe03", "13dcb87a54110b02cc4a8a7780962cb3981bc037", "125f9d30a697b41999168390397eb6a6b899378d", "3c29ccdcebd8520731a58c5c4aa23cfa8c221665", "8af41e16436c0c8f6e6cc1301913d89f9d923634", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "5dc445cae4224e93a701c9f78cc1da6d48e8aa9e", "5c47194e256009260746d9182b1723e3a3b8f379", "6f976295ebc0386fe6b60748d29bdfb2db809073", "824a7fd43e7b89def09eb9696d81164617c483b8" ], "paperAbstract": "A major concern for today’s smartphones is their much faster battery drain than traditional feature phones, despite their greater battery capacities. The difference is mainly contributed by those more powerful but also much more powerconsuming smartphone components, such as the multi-core application processor. While the application processor must be active when any smart apps are being used, it is also unnecessarily waken up, even during idle periods, to perform operations related to basic phone functions (i.e., incoming calls and text messages).In this paper, we investigate how to increase the battery life of smartphones by minimizing the use of the application processor during idle periods.We find that the application processor is often waken up by a process running on it, called the Radio Interface Layer Daemon (RILD), which interfaces the user and apps to the GSM/LTE cellular network. In particular, we demonstrate that a great amount of energy could be saved if RILD is stopped, such that the application processor can sleep more often. Based on this key finding, we design a Smart On Demand (SOD) configuration that reduces smartphone idle energy consumption by running RILD operations on a secondary low-power microcontroller. As a result, RILD operations can be handled at much lower energy costs and the application processor is waken up only when one needs to use any smart apps, in an on-demand manner. We have built a hardware prototype of SOD and evaluated it with real user traces. Our results show that SOD can increase its battery life by up to 2.5 more days.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ae6a5bc5286b91f77b5c608a507d1067043870fe", "sources": [ "DBLP" ], "title": "Smartphone Radio Interface Management for Longer Battery Lifetime", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "ae876b07435880d0359ca68b30af90b2e3193ad4": { "authors": [ { "ids": [ "1804361" ], "name": "Dong Dai" }, { "ids": [ "1726357" ], "name": "Wei Zhang" }, { "ids": [ "3519489" ], "name": "Yong Chen" } ], "doi": "10.1145/3078597.3078606", "doiUrl": "https://doi.org/10.1145/3078597.3078606", "entities": [ "Algorithm", "Computational science", "Database", "Degree (graph theory)", "Graph database", "Graph partition", "Locality of reference", "Online transaction processing", "Parallel computing", "Requirement", "Social network", "Synthetic data", "Transaction processing" ], "id": "ae876b07435880d0359ca68b30af90b2e3193ad4", "inCitations": [], "journalName": "", "journalPages": "219-230", "journalVolume": "", "outCitations": [ "947c6bf534ccd620044f77c3bd6068f633b421fb", "2b9e6181502369199bd89691a27f89bdbaac36e4", "4714cd9a2c38a4590ca6802a076009a09e49f7e9", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "19cadcb4e7439bc525c604771ab4872ec93a5b53", "87ea76767d9c0a6ee3b68c2d2dafa01ce5db3d4f", "05dd4149b422b56cc037cd35eb041496b656d671", "3cd5bd08eb59e6f3d181a8e3fa958e77ef44e07e", "5a2e3b99774e88b4e26ef6f7c87ee424146d0482", "4612278f5ff220edf2a46404978626faf2ef4dbb", "1141ff370d51c25ae17709ae9131097313215e18", "09a487a94365b0a6862597213226ad04816185f4", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "0975baea2e5a34f75c06284ac355af7f2de2499b", "568f669def3d39edc89d850ce8ab09eb43899b06", "282f9082cd3a4565dbc5c4507c092ed18244c512", "3118495675d98e05e13e70af338467c4011b2816", "105de19ab71db0a38bc0d734c8fd0efeba2faab7", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "58ba34f71bafffcd120112f97a55cebb656b8bab", "261fe21f58d1fa709a9c81e63d1dde4cc3d09158", "e2462bde978023a9069cc08326f626135a95cb89", "4cee38d9d088cf021bc5f5b9fda6764feeb1806a", "97f54490e9abb765042514546d89a6c36ba279cc", "1753c2dc85cc40e0a2e8b4a405c1690eab066d8d", "1156f60e40548096df49528b1342bb3e88b0f378" ], "paperAbstract": "Graphs have become increasingly important in many applications and domains such as querying relationships in social networks or managing rich metadata generated in scientific computing. Many of these use cases require high-performance distributed graph databases for serving continuous updates from clients and, at the same time, answering complex queries regarding the current graph. These operations in graph databases, also referred to as online transaction processing (OLTP) operations, have specific design and implementation requirements for graph partitioning algorithms. In this research, we argue it is necessary to consider the connectivity and the vertex degree changes during graph partitioning. Based on this idea, we designed an Incremental Online Graph Partitioning (IOGP) algorithm that responds accordingly to the incremental changes of vertex degree. IOGP helps achieve better locality, generate balanced partitions, and increase the parallelism for accessing high-degree vertices of the graph. Over both real-world and synthetic graphs, IOGP demonstrates as much as 2x better query performance with a less than 10% overhead when compared against state-of-the-art graph partitioning algorithms.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078606", "http://www.myweb.ttu.edu/ddai/papers/iogp-hpdc.pdf", "http://discl.cs.ttu.edu/lib/exe/fetch.php?media=p219-dai.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ae876b07435880d0359ca68b30af90b2e3193ad4", "sources": [ "DBLP" ], "title": "IOGP: An Incremental Online Graph Partitioning Algorithm for Distributed Graph Databases", "venue": "HPDC", "year": 2017 }, "aec72c125e6fe148455121ddee5bdff63af7016e": { "authors": [ { "ids": [ "1850125" ], "name": "Zhi Chen" }, { "ids": [ "40384319" ], "name": "Zhangxiaowen Gong" }, { "ids": [ "30436131" ], "name": "Justin Josef Szaday" }, { "ids": [ "31201708" ], "name": "David C. Wong" }, { "ids": [ "1729097" ], "name": "David A. Padua" }, { "ids": [ "4315458" ], "name": "Alexandru Nicolau" }, { "ids": [ "1764886" ], "name": "Alexander V. Veidenbaum" }, { "ids": [ "35222148" ], "name": "Neftali Watkinson" }, { "ids": [ "2937166" ], "name": "Zehra Sura" }, { "ids": [ "2596015" ], "name": "Saeed Maleki" }, { "ids": [ "1695950" ], "name": "Josep Torrellas" }, { "ids": [ "1802807" ], "name": "Gerald DeJong" } ], "doi": "10.1109/IISWC.2017.8167779", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167779", "entities": [ "Automatic vectorization", "Compiler", "For loop", "Library", "Loop optimization", "Program optimization", "Utility" ], "id": "aec72c125e6fe148455121ddee5bdff63af7016e", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "219-228", "journalVolume": "", "outCitations": [ "8df8598664263e9b7c1f5c39d5f28b082e6fb87b", "49e30610c99b8b6c5f38541cccd68613d6c7f0d7", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "518d38f7ae0e734d0674a61427ccfb5bcbbc88b7", "17b60d466abfb8a86eeb45b5b1a1b429a3b6b30a", "3020f7f8381227c90ac58466ec116f470d0b63ec", "d259e252d9042af5f39f23c08814863e6d884b01", "0653e2ed9f683868cb4539eb8718551242834f6b", "3e2480d7136fe5c6fa7213ea834566b93570c3ca", "5240aedc03d3a203da7548c1efcf4e42dcb6e5c7", "05c5eaabc93f37021260904cec8d8f25c4afebdd", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "383aec58bdf09e4549c4df2c984214838c5cb7f6", "0cbb62679982bce62a6fe6963bc0399fb35aeaa0", "8e7ad8c633c757304181010c39d037d0ec14fe57", "7715a8f043d9a8af5cdb8c44eab92a15bf2162db" ], "paperAbstract": "Although numerous loop optimization techniques have been designed and deployed in commercial compilers in the past, virtually no common experimental infrastructure nor repository exists to help the compiler community evaluate the effectiveness of these techniques. This paper describes a repository, LORE, that maintains a large number of C language for loop nests extracted from popular benchmarks, libraries, and real applications. It also describes the infrastructure that builds and maintains the repository. Each loop nest in the repository has been compiled, transformed, executed, and measured independently. These loops cover a variety of properties that can be used by the compiler community to evaluate loop optimizations using a broad and representative collection of loops. To illustrate the usefulness of the repository, we also present two example applications. One is assessing the capabilities of the auto-vectorization features of three widely used compilers. The other is measuring the performance difference of a compiler across different versions. These applications prove that the repository is valuable for identifying the strengths and weaknesses of a compiler and for quantitatively measuring the evolution of a compiler.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167779", "http://iacoma.cs.uiuc.edu/iacoma-papers/PRES/present_iiswc17.pdf", "http://iacoma.cs.uiuc.edu/iacoma-papers/iiswc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aec72c125e6fe148455121ddee5bdff63af7016e", "sources": [ "DBLP" ], "title": "LORE: A loop repository for the evaluation of compilers", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "af0e2e50fc623d12b089740a2bfa11c5ee97f523": { "authors": [ { "ids": [ "34393940" ], "name": "Gavin Baker" }, { "ids": [ "2526280" ], "name": "Chris Lupo" } ], "doi": "10.1145/3030207.3030230", "doiUrl": "https://doi.org/10.1145/3030207.3030230", "entities": [ "Benchmark (computing)", "Central processing unit", "Channel memory", "Coprocessor", "Graphics processing unit", "Hardware acceleration", "Heterogeneous computing", "Intel QuickPath Interconnect", "Non-uniform memory access", "Open-source software", "PCI Express", "Parallel computing", "Performance per watt", "Profiling (information science)", "Resource contention", "Uniform memory access", "Usability" ], "id": "af0e2e50fc623d12b089740a2bfa11c5ee97f523", "inCitations": [], "journalName": "", "journalPages": "305-316", "journalVolume": "", "outCitations": [ "388587b903aaf56791e786522246883aeaf89892", "3af5d2164fdbcbb47f64044e62445ed5dec0c245", "43054be153cccd589fa417751431d71e1b4d4a19", "5cc3780286a223c5b27df8e21b20f0c8f538f571", "f3e9db1605922044ca4506dbee54841caf821a0a", "107dffd9177fb9cc7d62c7fd3369f12d47861540" ], "paperAbstract": "Computer architects have increased hardware parallelism and power efficiency by integrating massively parallel hardware accelerators (coprocessors) into compute systems. Many modern HPC clusters now consist of multi-CPU nodes along with additional hardware accelerators in the form of graphics processing units (GPUs). Each CPU and GPU is integrated with system memory via communication links (QPI and PCIe) and multi-channel memory controllers. The increasing density of these heterogeneous computing systems has resulted in complex performance phenomena including non-uniform memory access (NUMA) and resource contention that make application performance hard to predict and tune. This paper presents the Topology Aware Resource Usability and Contention (TARUC) benchmark. TARUC is a modular, open-source, and highly configurable benchmark useful for profiling dense heterogeneous systems to provide insight for developers who wish to tune application codes for specific systems. Analysis of TARUC performance profiles from a multi-CPU, multi-GPU system is also presented.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030230" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/af0e2e50fc623d12b089740a2bfa11c5ee97f523", "sources": [ "DBLP" ], "title": "TARUC: A Topology-Aware Resource Usability and Contention Benchmark", "venue": "ICPE", "year": 2017 }, "af357a6e5077ec1f7a6b67d69afb647d407aa736": { "authors": [ { "ids": [ "2123452" ], "name": "Jiajun Wang" }, { "ids": [ "2130920" ], "name": "Reena Panda" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1109/ISPASS.2017.7975288", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975288", "entities": [ "Big data", "CPU cache", "Cache (computing)", "Cloud computing", "Computer memory", "Data mining", "Link prefetching", "Software as a service", "Web search engine", "Working set" ], "id": "af357a6e5077ec1f7a6b67d69afb647d407aa736", "inCitations": [ "e2a43a6f01a08c980aa0ed36d22924d9b3bb550e", "b1737410b565b760da5cff99572a1349f501667a" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "163-172", "journalVolume": "", "outCitations": [ "bc2bba7e1bb4e7d8307aa36bdc5ee86cdd61cc58", "beb3c64ee6d00b89ab5d5c06d85f5433f2689413", "02fe9b425b78a0211ccfaa2710f949fa2a769406", "2af32811c6bf3be891ee84b19248540dfa1aa58f", "d35be2b3f5860b60fa427688b46c8f348fff50ae", "59680d7d7feef0286605cb81d35bc8fa2292a608", "6521c3812eb43ef082dda19cd5961a67ca608f33", "0d8dfa6ec2fdbb432e1ce76a3e7e542336b270c2", "34c327c6b1bde6a3f9c595a38b14612c6fcb7c2d", "d167b5c8b21c642662000417f313798d375ff38e", "2d1e64c74c612b882afde5e1ca219ae2cee8253e", "40f85cbe67ce1ce89009985e9caed648dd08c12e", "006d9d8e348f68d2e3353981c3a770385d71858a", "34ddc3da70f5b17ae0a73266ad1e4f9ae155811f", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "1c32ad0a42109fab826eb3054df7cfc33b424125", "082573e4dc88f38628242d193c966725ab355026", "c53c3ccbdc359fd7f4279d37c9f8acfce3d8c579", "46b444dbd774cedb8d35ac9b94b5e3aca9a0873c", "3b095e04f13a487c0b8679e64098d7929c1d7db7", "9aa0d7253574e50fe3a190ccd924433f048997dd", "5984d89301db24fe9bf6d45679a996b3f54ec857", "1237b20fa7afa553118cde32294c481db15286c6", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e" ], "paperAbstract": "Cloud computing is gaining popularity due to its ability to provide infrastructure, platform and software services to clients on a global scale. Using cloud services, clients reduce the cost and complexity of buying and managing the underlying hardware and software layers. Popular services like web search, data analytics and data mining typically work with big data sets that do not fit into top level caches. Thus performance efficiency of last-level caches and the off-chip memory becomes a crucial determinant of cloud application performance. In this paper we use CloudSuite as an example and we study how prefetching schemes affect cloud workloads. We conduct detailed analysis on address patterns to explore the correlation between prefetching performance and intrinsic workload characteristics. Our work focuses particularly on the behavior of memory accesses at the last-level cache and beyond. We observe that cloud workloads in general do not have dominant strides. State-of-the-art prefetching schemes are only able to improve performance for some cloud applications such as web search. Our analysis shows that cloud workloads with long temporal reuse patterns often get negatively impacted by prefetching, especially if their working set is larger than the cache size.", "pdfUrls": [ "http://lca.ece.utexas.edu/pubs/ispass_jiajun.pdf", "https://doi.org/10.1109/ISPASS.2017.7975288" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/af357a6e5077ec1f7a6b67d69afb647d407aa736", "sources": [ "DBLP" ], "title": "Prefetching for cloud workloads: An analysis based on address patterns", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "af5073e253ba61efd5baee66a25a163debb09a69": { "authors": [ { "ids": [ "34617972" ], "name": "Artur Boronat" } ], "doi": "10.1145/3136014.3136026", "doiUrl": "https://doi.org/10.1145/3136014.3136026", "entities": [ "Eclipse Modeling Framework", "FOSD metamodels", "Is-a", "Mathematical model", "Metamodeling", "Model-driven engineering", "Multiple inheritance", "Object Constraint Language", "Type system" ], "id": "af5073e253ba61efd5baee66a25a163debb09a69", "inCitations": [], "journalName": "", "journalPages": "194-205", "journalVolume": "", "outCitations": [ "a5e8b08583d8c235ba949fefc06b4cdbde6a6868", "c51fa7747bbdc0b7ffe815b93cfe6cb056b7b1ec", "7866fbc1db4323985b3e69946917ab53085a0ce0", "0ed3480d40fa2eae822c64855c8970d611b2da1f", "10b206ff108069c460bf1623f40ca4521393292e", "4075d84411e7e75b1507f9baa2310a46b0e2e062", "d2e11ed4964fda0e2181b34c56195e67993fccf5", "01c37126c8a38d55d629d3e2f6aa5eb4aeb44943", "9a5740918f1e3768575999b02459516c83416e4c", "b489a1341d7d585ae873db095e4b76c98e34fa2a", "07eb0080630f63d9d7688f28fc57892b2d7c6ca6", "22e76cd11d915292d6517b9499fef264d4345b75", "24c5a5898c0217c10fb08e845d2957b23ca8dfd9", "c5b476c1f6065741ad2b2804f6aa36a4c776efb8", "c08f035e3a837e760e8087de64521b74b970ce6d", "9b119169aeedc1a3fa76d4d0e2c6289ee6230519", "d19dda65e2db1e5855cfcd21758e4317e151030f", "06a2b8c07471a7c37d9b241f78525078d225f8cb", "7e5f9f33e8eee5b917a1bbfae644dfb88088cdf8", "046f36f4ec00d86fcf036eddf02dc336e7777c8a", "3bf29c230c334aef7663f1aa8f8fa7f6bb98af0a", "dead1bab20b25a6d2f04b060919df45d250d800d", "b4e55a9e9b9411a5df607935c8f1b5e805662d71", "390c344cb04f5a720dd7bf21f2c4d39e9940937a", "7b766217aacc85175e62220be7c047c4b1de6f0b", "07d6f0c0f97491271506e681fb6dc24b5fed4e22", "3c5b9eca9024631cd44fed2b8d0169f25a9da1eb", "24b976fb0fc029ea56e2bf4e666d43d84a763a89", "4722a4e3e4113bba637548854f3f72f0e95ff62a", "21f68cccdd3c3ccf718f5d52dc1368373bcec121" ], "paperAbstract": "In model-driven engineering (MDE), models abstract the relevant features of software artefacts and model management operations, including model transformations, act on them automating large tasks of the development process. Flexible reuse of such operations is an important factor to improve productivity when developing and maintaining MDE solutions. In this work, we revisit the traditional notion of object subtyping based on subsumption, discarded by other approaches to model subtyping. We refine a type system for object-oriented programming, with multiple inheritance, to support model types in order to analyse its advantages and limitations with respect to reuse in MDE. Specifically, we extend type expressions with referential constraints and with OCL constraints. Our approach has been validated with a tool that extracts model types from (EMF) metamodels, paired with their OCL constraints, automatically and that exploits the extended subtyping relation to reuse model management operations. We show that structural model subtyping is expressive enough to support variants of model subtyping, including multiple, partial and dynamic model subtyping. The tool has received the ACM badge \"Artifacts Evaluated â\u0088\u0092 Functional\".", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136026", "https://lra.le.ac.uk/bitstream/2381/41180/6/Boronat_sle17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/af5073e253ba61efd5baee66a25a163debb09a69", "sources": [ "DBLP" ], "title": "Structural model subtyping with OCL constraints", "venue": "SLE", "year": 2017 }, "af857f7324cef45e3cdc290e63fc5ac25ca06ef7": { "authors": [ { "ids": [ "36931447" ], "name": "Luis Garcia" }, { "ids": [ "6639898" ], "name": "Ferdinand Brasser" }, { "ids": [ "3007033" ], "name": "Mehmet Hazar Cintuglu" }, { "ids": [ "8415280" ], "name": "Ahmad-Reza Sadeghi" }, { "ids": [ "2365695" ], "name": "Osama A. Mohammed" }, { "ids": [ "1800447" ], "name": "Saman A. Zonouz" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Control system", "David W. Bradley", "Embedded system", "Failure rate", "Firmware", "Malware", "Physical plant", "Power-line communication", "Programmable logic device", "Rootkit", "Sensor", "Software deployment", "System safety", "Testbed", "Trust (emotion)" ], "id": "af857f7324cef45e3cdc290e63fc5ac25ca06ef7", "inCitations": [ "fe25125d8ef245ea2f32ad01b498eea2acb0c3e0", "59b645579d7ac9983951b9b6b51610b2b2e1ad01", "8dda341fe79e8eaf2b43c0ab60e22da46ce9c761", "67c9dccba7dee85fd0d0c5b703bc47cc6259ab09", "d4fd2ce667d849f1ef064db90ff003e793f045fd", "9b2a3cad73b2c086220ea9e4b82e059e4e65aa79", "7887b47bae82e31861df084db2e365d563e430d6", "a493c723794c0d3bcd0b00cb78b0e24c417b5056", "42168d6f3df61ec701098da4268c755e1e481304", "c3118a4791458619e4ff6986991378164e36b1fe", "29aa4e03d46dd46d278e62f0e6d1af9c56368c31" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3f5e13e951b58c1725250cb60afc27f08d8bf02c", "3e5a6e6a2779c4ab1f15ff36611ebaa8d54508e8", "277f21e099ce9a85b938b7bb9c09133f7b23c0a0", "2a7f77384a45dc8ac86faa265f92a5b3c1d048da", "1af84c9c62fb85590c41b7cfc9357919747842b2", "d4ddcb4c6b4b57c5aaed15c49bab1c5bf9238cba", "24e9c86f6aba84a1a4b1e266cd60f689a6d0f98e", "05c7cffede9dbf1624bfa4d658c457fa33499307", "d47df16183cf956ccedb320d82705462b109f554", "f4241fc37a32ffce5aa20f504cb7654675f22569", "2dd486e7a35e161d6cfec66b74ee1fc48dde4455", "4ccd740d05526ca9a1d76ade93043f7173591f18", "300fff001ec0164e0f119d1b0e8dbe735f0bebaa", "17bb76f79a0aca5abc36096bcb36c2611c0d1d71", "1f527fed31971e07093695c128c10b4f3c20d109", "47da0dc2a0dfcf08ed451452845c1ae2410fa4e0", "bdfd34769911b3fb40eadf71bfb34a0ec98fe160", "80cd13219ebcf0a15956979f2abed02630b32a2b", "ed3513c95fb93d174f0a84273ea16c877fa828a5", "d8f03d77418167fd2c1b40173786e1faec1ca9bc", "10272f29fff747d7efccab3b58d64ffd1112c811", "c265ea208212d0f49ba93ce32c38b282b6982e5c", "739459f31bef7d20a46f615d0af314b184ba7531", "00e87244ed9e7025536e108d89d2ea1a73ef217e", "12349e8f09a4a1f0c2dd46e94320cd9dccc4fede", "26d15bf2e248a5c5df2da0acd8c1bbb73a2726fd", "16b3bbfce587386ef0dca21460df1cef2706596b", "94917e2c031ea8abd8eb4986009d470598c561d8", "357abbad242e3222940fa05d05d5261bafc3cf5c", "31c44a8131c07465d4d0f97956bdbffea344e5aa", "9d04c008698849430bf627b12fe1b815c1e39518", "1a72812452454d01f5afea10a108b693742c956d", "56afbe673f14b4dfa0c4894043713bf16f9492b4", "52f23a0e428acc4348a815eb964c6a99287cd304" ], "paperAbstract": "Trustworthy operation of industrial control systems (ICS) depends on secure code execution on the embedded programmable logic controllers (PLCs). The controllers monitor and control the underlying physical plants such as electric power grids and continuously report back the system status to human operators. We present HARVEY, 1 a PLC rootkit that implements a physics-aware stealthy attack against cyberphysical power grid control systems. HARVEY sits within the PLC\u2019s firmware below the control logic and modifies control commands before they are sent out by the PLC\u2019s output modules to the physical plant\u2019s actuators. HARVEY replaces legitimate control commands with malicious, adversary-optimal commands to maximize the damage to the physical power equipment and cause large-scale failures. To ensure system safety, the operators observe the status of the power system by fetching system parameter values from PLC devices. To conceal the maliciously caused anomalous behavior from operators, HARVEY intercepts the sensor measurement inputs to the PLC device. HARVEY simulates the power system with the legitimate control commands (which were intercepted/replaced with malicious ones), and calculates/injects the sensor measurements that operators would expect to see. We implemented HARVEY on the widely spread Allen Bradley PLC and evaluated it on a real-world electric power grid test-bed. The results empirically prove HARVEY\u2019s deployment feasibility in practice nowadays.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/hey-my-malware-knows-physics-attacking-plcs-physical-model-aware-rootkit/", "http://wp.internetsociety.org/ndss/wp-content/uploads/sites/25/2017/09/ndss2017_08-1_Garcia_paper.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/af85/7f7324cef45e3cdc290e63fc5ac25ca06ef7.pdf", "s2Url": "https://semanticscholar.org/paper/af857f7324cef45e3cdc290e63fc5ac25ca06ef7", "sources": [ "DBLP" ], "title": "Hey, My Malware Knows Physics! Attacking PLCs with Physical Model Aware Rootkit", "venue": "NDSS", "year": 2017 }, "af8cbf190af96f7e3efb5bc6eda7eec247edab0e": { "authors": [ { "ids": [ "1717000" ], "name": "Guillaume Aupy" }, { "ids": [ "19274839" ], "name": "Clement Brasseur" }, { "ids": [ "1718549" ], "name": "Loris Marchal" } ], "doi": "10.1109/IPDPS.2017.58", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.58", "entities": [ "Algorithm", "Central processing unit", "Data dependency", "Heuristic", "Input/output", "Memory bound function", "Multiprocessing", "NP-completeness", "Parallel computing", "Scheduling (computing)", "Sparse matrix", "Synthetic data" ], "id": "af8cbf190af96f7e3efb5bc6eda7eec247edab0e", "inCitations": [ "82032bf46de30412e2c6bfb42cbc48029d701109" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "758-767", "journalVolume": "", "outCitations": [ "148b6e0d808ff89e70f3c5e9d1ee9f7b88f0549a", "0f82a8d1aa4762e19059c735f7f840e7bd60ac48", "1f02f71a302bc9b1c45eb6125eabbc5960fb0d2d", "6bd91c5e9fcf66496dc93ba4690e3fe112959156", "cd954d71d103504d246b39b016ecbb957c20858b", "c4b854d185adf0caf10d4da0a567ea728a4b7c68", "0578cda4294fae60ac2b0600daf0d1aec30eb1b8", "3d03b4548ce01cbed75c61a74c26b19cc2b746c0", "1f2e5225d333781601bd3d19ba7f1184796e4255", "52b03acb17591fe2c62a6440b4527f65c3bd9714" ], "paperAbstract": "Factorizing sparse matrices using direct multifrontal methods generates directed tree-shaped task graphs, where edges represent data dependency between tasks. This paper revisits the execution of tree-shaped task graphs using multiple processors that share a bounded memory. A task can only be executed if all its input and output data can fit into the memory. The key difficulty is to manage the order of the task executions so that we can achieve high parallelism while staying below the memory bound. In particular, because input data of unprocessed tasks must be kept in memory, a bad scheduling strategy might compromise the termination of the algorithm. In the single processor case, solutions that are guaranteed to be below a memory bound are known. The multi-processor case (when one tries to minimize the total completion time) has been shown to be NP-complete. We present in this paper a novel heuristic solution that has a low complexity and is guaranteed to complete the tree within a given memory bound.We compare our algorithm to state of the art strategies, and observe that on both actual execution trees and synthetic trees, we always perform better than these solutions, with average speedups between 1.25 and 1.45 on actual assembly trees. Moreover, we show that the overhead of our algorithm is negligible even on deep trees (10 5), and would allow its runtime execution.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.58", "https://hal.inria.fr/hal-01390107v1/document" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/af8cbf190af96f7e3efb5bc6eda7eec247edab0e", "sources": [ "DBLP" ], "title": "Dynamic Memory-Aware Task-Tree Scheduling", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "af8ddd263336b129e7be43e4fda7e110302b2d86": { "authors": [ { "ids": [ "6246197" ], "name": "Maria Malik" }, { "ids": [ "1740142" ], "name": "Dean M. Tullsen" }, { "ids": [ "1747542" ], "name": "Houman Homayoun" } ], "doi": "10.1109/IISWC.2017.8167753", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167753", "entities": [ "Big data", "Cost efficiency", "Data center", "Electronic data processing", "MapReduce", "Program optimization", "Programming model", "Scheduling (computing)", "Server (computing)" ], "id": "af8ddd263336b129e7be43e4fda7e110302b2d86", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "22-31", "journalVolume": "", "outCitations": [ "776fd831d08a955ac85e07dc6aa821c6614e8345", "7317dcd52ea4dee743ca377dc0497397b0df2d15", "8cbec3e5cf6e73433d2ac3098c831dae5814caa7", "c3f443d86a5f1b34c4cbcbf18ec6c08a37d5a649", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "71fcd2988e8453fe788aa40ac37160c197d778ae", "4725d50b5f56103888a213dbc51f988aaf06bf4c", "54111c92d666fe9cb22224503a6781c590ef5ad1", "516081ef1d32c90a07a64c8060b82bf10df26f4c", "32aa639d91607c10520d163726dbafbcd79ea46e", "173c39f15841ab5ee2976c34f6b4c3808c386d94", "7a978f2902460e732c50c36a171deb11733df1fc", "2db9f8767a6da83bd0186b4835f9e329c258bdca", "54f3331b575b2d451c2d716f86496cada23d596d", "581b0e29991ffd8396e2d91b9c53ad483e72d9b8", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "035d92a0286d7eef6179c9eb09530082eb291827", "1d8465c3f5aee1b7a790f6eeb44637343861ba47", "67b9072f4c7d0b8e7e05983e3532aebddbe5098f", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "8a9112daed1df3caeb8efd7e4b05e4ed05c23dbc", "38628d26d4f624378f4303b61ae93c5d34d007c3", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "1bed9dbc346fcc7c39ac42c8a3be089a76f4d11a", "4bcc9f705fed158ab33b01477337dda86abdc62d", "e6899ce6a4cc6c3153874222871f619b5c512047", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "728dfc52a2f13570a29c9d5bf257f02bf98682bc", "590bd345ef4b8f274af3363a52b7d8f518cdc08a", "58537d88aad8249dfd25c3e859fd278e39f382b8", "e59e81579734b7746f082a6ca4c95c216344474d", "a751f6f93c46d72a4f8201a0a5df3432243a0991", "9ee6209432316baf6776838917e06bca4d874747", "3000e77ed7282d9fb27216f3e862a3769119d89e", "34d67f1f2578013ec828f2840e28bc3961d8a98c", "de0f08295b2f3778dd3ce5ae147aa921e49bf6a1" ], "paperAbstract": "Datacenters provide flexibility and high performance for users and cost efficiency for operators. However, the high computational demands of big data and analytics technologies such as MapReduce, a dominant programming model and framework for big data analytics, mean that even small changes in the efficiency of execution in the data center can have a large effect on user cost and operational cost. Fine-tuning configuration parameters of MapReduce applications at the application, architecture, and system levels plays a crucial role in improving the energy-efficiency of the server and reducing the operational cost. In this work, through methodical investigation of performance and power measurements, we demonstrate how the interplay among various MapReduce configurations as well as application and architecture level parameters create new opportunities to co-locate MapReduce applications at the node level. We also show how concurrently fine-tuning optimization parameters for multiple scheduled MapReduce applications improves energy-efficiency compared to fine-tuning parameters for each application separately. In this paper, we present Co-Located Application Optimization (COLAO) that co-schedules multiple MapReduce applications at the node level to enhance energy efficiency. Our results show that through co-locating MapReduce applications and fine-tuning configuration parameters concurrently, COLAO reduces the number of nodes by half to execute MapReduce applications while improving the EDP by 2.2X on average, compared to fine-tuning applications individually and run them serially for a broad range of studied workloads.", "pdfUrls": [ "http://ece.gmu.edu/~hhomayou/files/Malik-IISWC-2017.pdf", "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167753" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/af8ddd263336b129e7be43e4fda7e110302b2d86", "sources": [ "DBLP" ], "title": "Co-locating and concurrent fine-tuning MapReduce applications on microservers for energy efficiency", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "afb92062975dd4ddbabf7b7e40ed23b7b238f5b0": { "authors": [ { "ids": [ "2368347" ], "name": "Sudsanguan Ngamsuriyaroj" }, { "ids": [ "9341917" ], "name": "Kittirat Thepsutum" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.37", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.37", "entities": [ "Cluster analysis", "Interaction", "K-means clustering" ], "id": "afb92062975dd4ddbabf7b7e40ed23b7b238f5b0", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "286-291", "journalVolume": "", "outCitations": [ "45d72eac2ad2c8ac55d653072503f8f40ea6a296", "183898655299b70cf7fd198b373c93137337b31a", "fa266a070a2f431976fb62e9064fa60921742080", "977fe5853db16e320917a43fb00f334456625a1e", "3d7112ad58bc7806d77fe669215679ce0c9e4837", "5cfa934c904ef06728b1517da75b01a5eef3d92e", "773dafb377c6c2340b231522c43563380ff5c622", "49b60b92201710d095684b6df1b1d93f92122dd0" ], "paperAbstract": "Biological functions in all living cells are performed by protein-protein interactions since they form cells and control function mechanisms. Thus, identifying pairs of protein-protein interactions would be very useful, but it is not an easy task. But, doing a wet lab consumes huge amount of resources whereas using computational methods is highly challenging since they may introduce high false positives. Since a protein is a sequence of amino acids, a protein interaction would be influenced by some interactions of amino acids, and the identification of outstanding interacting pairs would give insightful meaning into how a pair of proteins interacts. This paper proposes a novel method to analyze a set of well-known protein-protein interactions for identifying a set of strong amino acid pairs that may influence the interaction. We calculate amino acid correlation values via Pearson's correlation, and use K-means clustering to group a set of outstanding amino acid pairs based on correlation values. The experimental results for 10 sets of protein interaction networks can identify a number of strong amino acid pairs among them.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/afb92062975dd4ddbabf7b7e40ed23b7b238f5b0", "sources": [ "DBLP" ], "title": "Identifying Dominant Amino Acid Pairs of Known Protein-Protein Interactions via K-Means Clustering", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "afc17c4a6769b548c380269450fb83cdf5137921": { "authors": [ { "ids": [ "34284140" ], "name": "J\u00f6rn Schumacher" } ], "doi": "10.1109/HOTI.2017.25", "doiUrl": "https://doi.org/10.1109/HOTI.2017.25", "entities": [ "ATLAS", "Client\u2013server model", "Data acquisition", "End system", "Experiment", "InfiniBand", "Message Passing Interface", "Message queue", "Publish\u2013subscribe pattern", "Requirement", "Server (computing)", "Throughput" ], "id": "afc17c4a6769b548c380269450fb83cdf5137921", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "57-64", "journalVolume": "", "outCitations": [ "7d771b20731969fe10c267465582ee60e9383db3", "0641c61c2709ea41536cf78bcc6316fb4951b5ab", "77d1e3ddc48f354545b1fa6201b67ed118750686", "de695a5d60badd66339fdd1e6c614eeadc98e4a8", "8785083d32191fc633f1b30904cc52dda76ad4a1", "29014c1f9edcdcaabfbd49f8c27db11ba775a85b", "33cfd1dabf1dfddaa55a32a2542fff68d094c8ea", "4f604eb3b4d92bbd7563a853674392abe0e5f4d9", "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "dc3ceea37d4bea4332287717b943d2b363070d9a", "1f1223fe0db94c8a6a380f98308844cb9dda8254" ], "paperAbstract": "Because of their performance characteristics, highperformance fabrics like Infiniband or OmniPath are interesting technologies for many local area network applications, including data acquisition systems for high-energy physics experiments like the ATLAS experiment at CERN. This paper analyzes existing APIs for high-performance fabrics and evaluates their suitability for data acquisition systems in terms of performance and domain applicability.The study finds that existing software APIs for highperformance interconnects are focused on applications in highperformance computing with specific workloads and are not compatible with the requirements of data acquisition systems. To evaluate the use of high-performance interconnects in data acquisition systems, a custom library called NetIO has been developed and is compared against existing technologies.NetIO has a message queue-like interface which matches the ATLAS use case better than traditional HPC APIs like MPI. The architecture of NetIO is based on an interchangeable back-end system which supports different interconnects. A libfabric-based back-end supports a wide range of fabric technologies including Infiniband. On the front-end side, NetIO supports several highlevel communication patterns that are found in typical data acquisition applications like client/server and publish/subscribe. Unlike other frameworks, NetIO distinguishes between highthroughput and low-latency communication, which is essential for applications with heterogeneous traffic patterns. This feature of NetIO allows experiments like ATLAS to use a single network for different traffic types like physics data or detector control.Benchmarks of NetIO in comparison with the message queue implementation ØMQ are presented. NetIO reaches up to 2x higher throughput on Ethernet and up to 3x higher throughput on FDR Infiniband compared to ØMQ on Ethernet. The latencies measured with NetIO are comparable to ØMQ latencies.", "pdfUrls": [ "https://cds.cern.ch/record/2273702/files/ATL-DAQ-PROC-2017-020.pdf", "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.25" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/afc17c4a6769b548c380269450fb83cdf5137921", "sources": [ "DBLP" ], "title": "Utilizing HPC Network Technologies in High Energy Physics Experiments", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "afee592460f76fba10418dd723f4c99039a2203d": { "authors": [ { "ids": [ "27083500" ], "name": "Scott J. H. Buckley" }, { "ids": [ "3100464" ], "name": "Anthony M. Sloane" } ], "doi": "10.1145/3136014.3136024", "doiUrl": "https://doi.org/10.1145/3136014.3136024", "entities": [ "Attribute grammar", "Coq (software)", "Correctness (computer science)", "Meta-process modeling", "Operational semantics" ], "id": "afee592460f76fba10418dd723f4c99039a2203d", "inCitations": [], "journalName": "", "journalPages": "139-150", "journalVolume": "", "outCitations": [ "8c3ecc7a560a8c8923659a5f592a43310f2de954", "fcefe5d41194ef8a646132e6317b10c53f1edff2", "8c0f865cba4d4e298c623252dad8ac9cd77e50b1", "309e1722b8f1b963d8372e2991926302680c0c24", "39b4f940daf238be80e5ed87f437a03399451a9b", "eef0e0820ca3ef8cee957c89373527e8a73dcaaf", "457e62e93d81b1aee73e543f1bc19b5fb4ca1416", "3089127f8ab8b6e22c7956754b34904cb2794c7e", "4f66059c1dfbc763fd7329eb7d7d7700dc344b83", "6d2712a243246434750317f1e2f05d3e31f2d717", "fa31aa034d286289d9739b5e579d3ff9901733cf", "0b61a17906637ece5a9c5e7e3e6de93378209706", "4592e4249010d8353746e07aa23d606da8ba3897" ], "paperAbstract": "The similarities and differences between attribute grammar systems are obscured by their implementations. A formalism that captures the essence of such systems would allow for equivalence, correctness, and other analyses to be formally framed and proven. We present Saiga, a core language and small-step operational semantics that precisely captures the fundamental concepts of the specification and execution of parameterised reference attribute grammars. We demonstrate the utility of by a) proving a meta-theoretic property about attribute caching, and b) by specifying two attribute grammars for a realistic name analysis problem and proving that they are equivalent. The language, semantics and associated tests have been mechanised in Coq; we are currently mechanising the proofs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136024" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/afee592460f76fba10418dd723f4c99039a2203d", "sources": [ "DBLP" ], "title": "A formalisation of parameterised reference attribute grammars", "venue": "SLE", "year": 2017 }, "aff5e855936208786f5e21e164b93a0d82279a70": { "authors": [ { "ids": [ "13798887" ], "name": "Xiangqi Li" }, { "ids": [ "1697043" ], "name": "Matthew Flatt" } ], "doi": "10.1145/3136014.3136019", "doiUrl": "https://doi.org/10.1145/3136014.3136019", "entities": [ "Debugger", "Debugging", "Digital subscriber line", "Domain-specific language", "Extensible programming", "Wiring" ], "id": "aff5e855936208786f5e21e164b93a0d82279a70", "inCitations": [], "journalName": "", "journalPages": "91-102", "journalVolume": "", "outCitations": [ "163ddeafaadc460d343567a299e9b880ef29fac5", "43ef8552b8ef0fbc49e68dc56636cc7701b56485", "1b6fd37f92ded89c0751bafd3f709d0a349cf3d2", "49c305336dd81efa672ae651c88042eaf923f12f", "85185566af3d958c0ab55c7d3ecec50963ca2453", "ca9f4626a54c70539f2a8be5441222940fb7451f", "b443fc0f924f61f42dde2caf4ca974c5deabede9", "d18e91ddfd00b2a04cdbbf800f25b3ce12e1c982", "8def424c5d4e6c9ae2c1bac5eefd32e8d9159b6e", "3fc09eaf1840a07ebf365ec73181f6e383b8b69b", "a8892f3b2ec5895c3a880b9387d223e0dd8a6544", "2199a23550e9695c1785128d588cfcb0b6f14dd7", "d8983ef55c23afcea469560ed78c1a81f184cf17", "049a3d0082810d287652976f459e961ca173923d", "7edb8fe0d66f18bc90ad0ae39c26a4ec03d00cab", "142b2c95a396cbc816692269187f553a6ec5a1e7", "d71b8b7193fd56cbc6d40adfaba9102042026254", "087e2339566f7477c319f7291d34cc2c2728694a", "59e145dfef77797b57b28d71b019c2e8b6dd9f01", "1da0b10ba41a613f76843e22b332fc019aa4ff9e" ], "paperAbstract": "Extensible languages enable the convenient construction of many kinds of domain-specific languages (DSLs) by mapping domain-specific surface syntax into the host language's core forms in a layered and composable way. The host language's debugger, however, reports evaluation and data details in ways that reflect the host language, instead of the DSL in its own terms, and closing the gap may require more than correlating host evaluation steps to the original DSL source. In this paper, we describe an approach to DSL construction with macros that pairs the mapping of DSL terms to host terms with a mapping to convert primitive events back to domain-specific concepts. Domain-specific events are then suitable for presenting to a user or wiring into a domain-specific visualization. We present a core model of evaluation and events, and we present a language design---analogous to pattern-based notations for macros, but in the other direction---for describing how events in a DSL's expansion are mapped to events at the DSL's level.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136019" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/aff5e855936208786f5e21e164b93a0d82279a70", "sources": [ "DBLP" ], "title": "Debugging with domain-specific events via macros", "venue": "SLE", "year": 2017 }, "affd921fb9b4eb946c1c427c0f81fbec8e551464": { "authors": [ { "ids": [ "1809899" ], "name": "George Kesidis" }, { "ids": [ "1729535" ], "name": "Uday V. Shanbhag" }, { "ids": [ "2772186" ], "name": "Neda Nasiriani" }, { "ids": [ "1684443" ], "name": "Bhuvan Urgaonkar" } ], "doi": "10.1109/MASCOTS.2017.24", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.24", "entities": [], "id": "affd921fb9b4eb946c1c427c0f81fbec8e551464", "inCitations": [ "8e34c6ca528e9977064921ac2f4b36143d4f217d" ], "journalName": "", "journalPages": "244-254", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/affd921fb9b4eb946c1c427c0f81fbec8e551464", "sources": [ "DBLP" ], "title": "Competition and Peak-Demand Pricing in Clouds Under Tenants' Demand Response", "venue": "MASCOTS", "year": 2017 }, "b03dea069a81251f3cacc7bb7404826cda28d849": { "authors": [ { "ids": [ "32113594" ], "name": "Raja Appuswamy" }, { "ids": [ "1943226" ], "name": "Manos Karpathiotakis" }, { "ids": [ "1686933" ], "name": "Danica Porobic" }, { "ids": [ "1728318" ], "name": "Anastasia Ailamaki" } ], "doi": "", "doiUrl": "", "entities": [ "CPU cache", "Cache coherence", "Database engine", "General-purpose computing on graphics processing units", "Heterogeneous System Architecture", "Message passing", "Multi-core processor", "Online analytical processing", "Online transaction processing", "Parallel computing", "Replay attack", "Shared memory" ], "id": "b03dea069a81251f3cacc7bb7404826cda28d849", "inCitations": [ "209443f19f5742a18efb19d4d8fea307e6e6d56a", "2394ac9566e43eda9b2b72fc3bfb18993f1a3eaf", "cf99bc5412e2513b97cf6d4cbbb0e427a973c528" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6de2f02cfcc10d514431953a623898bfa61c1580", "378782a827933059f9f91e6e29aac84bd0857828", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "70931098d5df188787d28d65c28fe61839d7e3c3", "67cf1189c859d66bac309f9438df434fb651f97a", "5046a718f92447642939f5c93414dc97225d726a", "213a719cdecdd2e3a449c736db0d4449476ab323", "5dc5b799d6d161d5c2805917d680d1eb7314fdf5", "8d134a1afb17073f524bb38dc1e509018f89e96f", "b6f83d871948e3f4216026b0455ddebfb1cf3b1a", "ab4e0d6c59196243ab8d7f8644ecff86708fbefe", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "3542254ecc9f57d19f00e9fcc645b3d44469a6ba", "94ff1a9056b41596e69f58a3a0524fd277aecd62", "0109b8d4f75feed4ffbd4b5d555bac1e2d27815d", "5646b458317f717e66af06ae57b6284f07ec43f8", "1d726b10b4fefeab892b209c9d04392996cf22ed", "195e70a6bb011623ac568c3ff36adb473ee27628", "54a8cc92a2dc47875faded1d324cffc38de5fd38", "4483c133f637170fedcb39a971da7e26a3c3f842", "30c0accd81025eb203d63fce539ff0a99be1b87f", "15e07d192d360652e68e38b4ba267e160f972390", "97f18a7fd4a91a28c93545930a913e481425d57e", "0f09c5a706eae3fa3f90875524ebaf3b3747c5f9", "142de4622470a98017db345c669edc1f1f832574", "5adece4919d359441c506260dc22ea6e7489e9fd", "7ff303e7c450aee82b6fff5cc64be54e5604da01", "0b147fa5a2e6872dfc34be5554183f0e68398c40", "6623d2c5f5772d6cd5ac8d7c77a4bd3f5d480d24", "7afe6271e3c345f0a1b60ab458c0de2e3ef940d5", "97c4de1676ffe66ec38b5213e6d45f74a5a60d7e", "32cdd8f6b3019f25d1b909f26386645896e20282", "60a488e29b5b64c44f6ce124bce7ced9602636d4", "51c0791f3ccc3e3d8e811004f6b448d14b1a61f4", "4ce795cc102abe9eff1df27b731cd90e369cb36a", "21843ea8918c628b1785e5c9e0fddf0a4de7ee9a", "9f2a6fc20fb292a5d33eb6bd930e1de9d527ee6b", "4439b5ce4506099452a2b2c30939bfa9f6020eb9", "813f433ad44b4b2f66b0ba7d4152ef5123840e2c", "2d68faca85e7970b5903f7ec39e6b607cc2c016a", "9aa0d7253574e50fe3a190ccd924433f048997dd", "c788b1d20e0d9d9eac57fdaf9a5b3f5b6b30b5df", "17e1036e3681a0da3361ae56cfa77d523ce51d88", "8db5d8f4bf055bbe64ccfe29c5fd778ef24ade5b", "0f42b4dc664eb31df423c3de3a2cecf9c6ac83a8", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6" ], "paperAbstract": "Modern database engines balance the demanding requirements of mixed, hybrid transactional and analytical processing (HTAP) workloads by relying on i) global shared memory, ii) system-wide cache coherence, and iii) massive parallelism. Thus, database engines are typically deployed on multi-socket multi-cores, which have been the only platform to support all three aspects. Two recent trends, however, indicate that these hardware assumptions will be invalidated in the near future. First, hardware vendors have started exploring alternate non-cache-coherent shared-memory multi-core designs due to escalating complexity in maintaining coherence across hundreds of cores. Second, as GPGPUs overcome programmability, performance, and interfacing limitations, they are being increasingly adopted by emerging servers to expose heterogeneous parallelism. It is thus necessary to revisit database engine design because current engines can neither deal with the lack of cache coherence nor exploit heterogeneous parallelism. In this paper, we make the case for Heterogeneous-HTAP (HTAP), a new architecture explicitly targeted at emerging hardware. HTAP engines store data in shared memory to maximize data freshness, pair workloads with ideal processor types to exploit heterogeneity, and use message passing with explicit processor cache management to circumvent the lack of cache coherence. Using Caldera, a prototype HTAP engine, we show that the HTAP architecture can be realized in practice and can offer performance competitive with specialized OLTP and OLAP engines.", "pdfUrls": [ "https://pdfs.semanticscholar.org/b03d/ea069a81251f3cacc7bb7404826cda28d849.pdf", "http://cidrdb.org/cidr2017/papers/p21-appuswamy-cidr17.pdf", "https://infoscience.epfl.ch/record/224447/files/21-Appuswamy.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b03d/ea069a81251f3cacc7bb7404826cda28d849.pdf", "s2Url": "https://semanticscholar.org/paper/b03dea069a81251f3cacc7bb7404826cda28d849", "sources": [ "DBLP" ], "title": "The Case For Heterogeneous HTAP", "venue": "CIDR", "year": 2017 }, "b03f5db2321aa18deefa44cb980799ca689ff740": { "authors": [ { "ids": [ "1777029" ], "name": "Vladimir Kolesnikov" }, { "ids": [ "1733482" ], "name": "Jesper Buus Nielsen" }, { "ids": [ "2524585" ], "name": "Mike Rosulek" }, { "ids": [ "3474994" ], "name": "Ni Trieu" }, { "ids": [ "28084654" ], "name": "Roberto Trifiletti" } ], "doi": "10.1145/3133956.3133991", "doiUrl": "https://doi.org/10.1145/3133956.3133991", "entities": [ "Compiler", "Computation", "Garbled circuit", "Two-phase commit protocol", "Value (ethics)", "Yao graph" ], "id": "b03f5db2321aa18deefa44cb980799ca689ff740", "inCitations": [ "5e75d9d75536f8b126c47ae4ce91b47d51c7cc69", "fe94997ec905da08756aa1bb80203c0f1e77d538" ], "journalName": "", "journalPages": "3-20", "journalVolume": "", "outCitations": [ "1fcf965b8e477cde3b2d854958c0418c435eb5a0", "0130d3428065bf7830263fdce79cc0192113af4e", "db0f82a419f89cda64fcbec2c58137862cd04475", "b273f47f97fc3f1ed922c3effda9ab88c52a1680", "046cd9bcecd662abad5e7e7707c043b8e2578cc8", "07b46f998a147ef221ebfdf5f1fb8db6cdf3f1c1", "04948723dec0e6724777ee56f0d10168cce44921", "55aafd6b8e9816203006c7e5c37e2e2fb8451113", "bcf72f7422bd9d24aa70dfa6c9511e8626482792", "4c27a4a70e9997a8a50003762d855f24b836daa1", "1bfb684e591020caec338fd631ff5397be3cff0d", "19c3736da5116e0e80a64db35afe421663c4b4a8", "ad0564d120af0e7471cd32d4c0438b8c25f33a0d", "f3aac8b66f9b569862dca2434b424111e94f7ed8", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "25636d7ba1cf7cf43ad3c94f25c9d37432753549", "31100ccd0867d6d5338612a62b2cde11be75f1b8", "5f1622916e69b5bf985b339191ab1d3dd3517038", "42333e3f231bbfe508f6da6bad2feff9ae223113", "11484e276a27191b043c2ccab243addcdf4c965a", "260ff9a2bebd8663a1e2c46e41e10681185af793", "05543dfa89c4e89ed1f78a1b83c2e172cd8f6321", "842eb3de44e0538769f1509d1b8d35161fb212bb", "0affd3f06d26de268d81c288454dd7880e518f9e", "33148623fc14ea5735e73dd716d030ab17118299", "305e8126f7f07f4ced3a29b7c1464a0e58a00856", "218bbd0efffc2ee63edffb8c5220f06155e23578", "e5302edfa2fa077525008333fcb56d9c2f3451ef", "2d2581b990fd8b2df020cea5a6392b15f771bf0a", "cf81847f2e274502192705e98e332d393e13a7b1", "3fb1b878daafbd54989438e4fb778380a03226e6", "796ff7cef7dcd8b9c577a86473fc1067e1078144", "05dfe536310bc0176ad23cc40fdc8e501811f4be", "d89c91e556c9ebc345931547f579a8494a573391", "cd7d6df7ec98254301674c6d3a1401d2336db00a", "91c9299d4beb955196149e3a6b02ff4790343c98" ], "paperAbstract": "Cut-and-choose (CC) is the standard approach to making Yao's garbled circuit two-party computation (2PC) protocol secure against malicious adversaries. Traditional cut-and-choose operates at the level of entire circuits, whereas the LEGO paradigm (Nielsen & Orlandi, TCC 2009) achieves asymptotic improvements by performing cut-and-choose at the level of individual gates. In this work we propose a unified approach called DUPLO that spans the entire continuum between these two extremes. The cut-and-choose step in our protocol operates on the level of arbitrary circuit \"components,\" which can range in size from a single gate to the entire circuit itself.\n With this entire continuum of parameter values at our disposal, we find that the best way to scale 2PC to computations of realistic size is to use CC components of intermediate size, and not at the extremes. On computations requiring several millions of gates or more, our more general approach to CC gives between 4-7x improvement over existing approaches.\n In addition to our technical contributions of modifying and optimizing previous protocol techniques to work with general CC components, we also provide an extension of the recent Frigate circuit compiler (Mood et al, Euro S&P 2016) to effectively express any C-style program in terms of components which can be processed efficiently using our protocol.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133991", "http://eprint.iacr.org/2017/344", "https://eprint.iacr.org/2017/344.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b03f5db2321aa18deefa44cb980799ca689ff740", "sources": [ "DBLP" ], "title": "DUPLO: Unifying Cut-and-Choose for Garbled Circuits", "venue": "CCS", "year": 2017 }, "b04e1a22b8a50c30a87bab3424ac4e3541c5eff2": { "authors": [ { "ids": [ "40135389" ], "name": "Adriano Augusto" }, { "ids": [ "34515205" ], "name": "Raffaele Conforti" }, { "ids": [ "2019552" ], "name": "Marlon Dumas" }, { "ids": [ "1691344" ], "name": "Marcello La Rosa" } ], "doi": "10.1109/ICDM.2017.9", "doiUrl": "https://doi.org/10.1109/ICDM.2017.9", "entities": [ "Business process", "Causality", "Concurrency (computer science)", "Deadlock", "Directed acyclic graph", "Real life", "Scalability" ], "id": "b04e1a22b8a50c30a87bab3424ac4e3541c5eff2", "inCitations": [ "faeb64abfd19c3a4a1dee6a2b62cd141c8e241d5" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "1-10", "journalVolume": "", "outCitations": [ "0e3e7a3a3b8d8e04bd0b4397b9e51fbe64ac035b", "4f0ae5c9752d67ffc04906ddee4ea12d5a1a6069", "9f4b3463fb6d2f4fe68db6a45e5b4c6b562b0c68", "4d4299bfd0ef670b2f913103b853f6394ed026a7", "d84e354bcf0d3a3e895d548f59ed8b2a90dfbda5", "1a49511ab6abd07d9039ea8bab2f0fd6e75649d1", "2ca252b056f8facbdb1088a47c39890fac47108d", "a36ddb33df71772e0f532dc4101b0d4fe75f45b7", "7e6bac4de2adda5f5e993644126d8cbdf6839f39", "2ee1507c7cffdadd7228bce1bb697bad8b7d63f0", "608d21702d10aabb0eb9fae08e2392d4f55bb7e0", "99d2c85d4047cfbce0c5407c16b799be7a97c6b1", "0151b96a9f96a370ef08fffb8c56c0b32534f839", "32a306b6511c58ec70746c6495237b577fa595fb", "02d29c76019d689734ed012269c851a5196c1ae8", "57b5a6a4f6f2ef04e9575af49d6e1539e33220c6", "322beb11ceeacf72dfc9df8df8cb045efb46d67f", "f6c6477bde2b2816d395eee79e2d765ba15016fb", "1e1ef9dacab7e65aa9cad30625e1be7c47100cc8", "23fe2c696c96ea2549b76c11327c83be1f2c2bd8", "ae26a88fa393558a181a93a5aa8e5eef20421f05" ], "paperAbstract": "The problem of automated discovery of process models from event logs has been intensively researched in the past two decades. Despite a rich field of proposals, state-of-the-art automated process discovery methods suffer from two recurrent deficiencies when applied to real-life logs: (i) they produce large and spaghetti-like models; and (ii) they produce models that either poorly fit the event log (low fitness) or highly generalize it (low precision). Striking a tradeoff between these quality dimensions in a robust and scalable manner has proved elusive. This paper presents an automated process discovery method that produces simple process models with low branching complexity and consistently high and balanced fitness, precision and generalization, while achieving execution times 2-6 times faster than state-of-the-art methods on a set of 12 real-life logs. Further, our approach guarantees deadlock-freedom for cyclic process models and soundness for acyclic. Our proposal combines a novel approach to filter the directly-follows graph induced by an event log, with an approach to identify combinations of split gateways that accurately capture the concurrency, conflict and causal relations between neighbors in the directly-follows graph.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b04e1a22b8a50c30a87bab3424ac4e3541c5eff2", "sources": [ "DBLP" ], "title": "Split Miner: Discovering Accurate and Simple Business Process Models from Event Logs", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "b0a064eb9437ba23b13c6886bf6a0ba49d6d31d5": { "authors": [ { "ids": [ "2321666" ], "name": "Christoph Berkholz" }, { "ids": [ "9560380" ], "name": "Jens Keppeler" }, { "ids": [ "1691736" ], "name": "Nicole Schweikardt" } ], "doi": "10.1145/3034786.3034789", "doiUrl": "https://doi.org/10.1145/3034786.3034789", "entities": [ "Conjunctive query", "Counting problem (complexity)", "Data pre-processing", "Data structure", "Database", "EXPTIME", "Exponential time hypothesis", "Join (SQL)", "Matrix multiplication", "Preprocessor", "Relational database", "Time complexity" ], "id": "b0a064eb9437ba23b13c6886bf6a0ba49d6d31d5", "inCitations": [ "86ffd2eeef909416825df85974ee04ab72be6193", "bd3bd2584901ab5db64f94fe48c0f063aa20fadb", "2734e7f6da7d841d7d8d3fd6aff857443083ef9c", "3f661734b6ba8d3b18b8b5e9198f334b80239a3c", "e43667d7aaaa69e9f96cf01bebbe826d2ead7af9", "d29f2dbb39936e9be8e32421919d48b7601c4ee7", "0a424fcdcfd55ff0f80a193848547b5e5f434614", "7b845167dd41349cafc9aab147822b9abcedeb2c", "2c8c7bf1b4a0c0adde5a033763dae5622992a2f8", "3e01af0f860a62d930ceb6c7abe50f5629de97ab", "dd3dbd2d008d93d92e1fdb6b9e321762282b85c2", "4276c04ecd4afbda69101e79c951293bd38f575d", "d26f3b3e9c3039852ad949a3af808db28dedd56d" ], "journalName": "", "journalPages": "303-318", "journalVolume": "", "outCitations": [ "08a3eda5d7af40d9d036deb28ca63c60e6870441", "076aee8e0b13704adb60ae212df22fea64613861", "eaed7286bba82a3adc56dc17623d82cebe4b34c6", "dd3dbd2d008d93d92e1fdb6b9e321762282b85c2", "3dad3d035648bcc7f7855486a3dd8ee8c1e3ae97", "c935242e20209b3e89aad72838a702d33efbf356", "380d7d4863a57bc6a5109edcc77c352d91a485da", "006b5b58e8d8475ab9295b3336beae102416444f", "1e675e52d982b70e3f056ad6d48d6b5c4de4c992", "0d7ea2da62cde74955acff50fd877331e250ee0a", "2f896bbe19229c4650f8f118c2604532ff2cecfc", "80e535eeff690c2d157cbe9a93cb1bafb0ad2326", "3ba66b7716d2f93ae39b2bb79427038e449f5a7c", "d62470be07e32896f75bf6495f99a2bf1faa039b", "98603da344a268630868adb671ac07a720df3d45", "31dfc4dc7e4bb4f72eb41f06826f19bf8b4b5693", "1767d57de855aedc924d1a7cbae66fc7c81e1bc8", "021764d0804445e0869c57314c069d07c874cb4b", "6b349f9b962f14e0366d59d759200829e20dfe19", "0a0299fe4ab7ad973dd3be6527c47a05c3cc3d93", "3b20298819690a3ac92e60901207191f1d281bb2", "161d632b1fb9c447a0930630c1fdf586e629b5ce", "25c58a8925ade1a8746b7c24d9a11f04617c4684", "431f89d5a702be51916016861b9d053044006064", "051f9b56bd0ff2507b3c0d366a6631569194b4fa", "03d9e06a8bbf15edf1e59664456ad95ba6ef6ad1", "00e59ae369ad6801c1a9dcb4493c990cb08544de", "00d3501889da08bed1214961dd4373fc69469a04", "3a1fefcceaaf21908388fa6197939f09ce8420e6", "3ec92f5dbde8163bd277531b10f58c1bc5a1fc38", "2c54a88235637dc950c4e799f9d24a0b34585df8", "1adb361e20ca38f1e358e969fdd9e62d6b63598b", "27d0efb05b770a1080b7d4a91d6079905a15fb3e" ], "paperAbstract": "We consider the task of enumerating and counting answers to k-ary conjunctive queries against relational databases that may be updated by inserting or deleting tuples. We exhibit a new notion of q-hierarchical conjunctive queries and show that these can be maintained efficiently in the following sense. During a linear time pre-processing phase, we can build a data structure that enables constant delay enumeration of the query results; and when the database is updated, we can update the data structure and restart the enumeration phase within constant time. For the special case of self-join free conjunctive queries we obtain a dichotomy: if a query is not q-hierarchical, then query enumeration with sublinear *) delay and sublinear update time (and arbitrary preprocessing time) is impossible.\n For answering Boolean conjunctive queries and for the more general problem of counting the number of solutions of k-ary queries we obtain complete dichotomies: if the query's homomorphic core is q-hierarchical, then size of the the query result can be computed in linear time and maintained with constant update time. Otherwise, the size of the query result cannot be maintained with sublinear update time.\n All our lower bounds rely on the OMv-conjecture, a conjecture on the hardness of online matrix-vector multiplication that has recently emerged in the field of fine-grained complexity to characterise the hardness of dynamic problems. The lower bound for the counting problem additionally relies on the orthogonal vectors conjecture, which in turn is implied by the strong exponential time hypothesis.*) By sublinear we mean O(n(1-ε) for some ε > 0, where n is the size of the active domain of the current database.", "pdfUrls": [ "https://arxiv.org/pdf/1702.06370v1.pdf", "http://doi.acm.org/10.1145/3034786.3034789", "http://arxiv.org/abs/1702.06370" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b0a064eb9437ba23b13c6886bf6a0ba49d6d31d5", "sources": [ "DBLP" ], "title": "Answering Conjunctive Queries under Updates", "venue": "PODS", "year": 2017 }, "b0ac2616034f56ab1469afb935b55fe7e37f8f41": { "authors": [ { "ids": [ "34033392" ], "name": "Ming-Wei Shih" }, { "ids": [ "30685860" ], "name": "Sangho Lee" }, { "ids": [ "3254849" ], "name": "Taesoo Kim" }, { "ids": [ "3798388" ], "name": "Marcus Peinado" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "Cloud computing", "Compiler", "Exception handling", "FreeType", "Haswell (microarchitecture)", "High- and low-level", "Hypervisor", "Intel Developer Zone", "Interrupt", "Malware", "Microsoft Research", "NBench", "Operating system", "Page fault", "Transactional Synchronization Extensions", "Transactional memory", "Trust (emotion)", "Trusted execution environment", "Usability", "libjpeg" ], "id": "b0ac2616034f56ab1469afb935b55fe7e37f8f41", "inCitations": [ "a355edbb24d406761407e2728218d2192f2c1fcf", "0c0994b08b790dc467b892d538321f9dbd10a3c2", "c924eaf7ed2e119449fb57b16d2216bf93edf8e2", "b3f2a11d45757e675be123d55ec0eb192bcca990", "8569785f80712b5787e12b86a3870a28c0182b2c", "345533e1f72f3f9e215e1fc468a3131a90481414", "06278774c1cd06ea9eb3cc7e1200434ea27f25c1", "8e3270ef5a0afc293021f8d594979ed059e29d5e", "091c3ab3e30621efc6326c4438b3300d203d8ddc", "0d45681a313e37dd9f716f50bfa4d178eb16c64a", "54be4148c4ebb985505664516ca0004718086c0a", "e41440cff90683629228b308a94e48c7af11ca36", "50ba271c1e0ddd814b6e79348a8963c788d9ddf9", "873d3d4efb797a4fc3ead8bfa7ab5fde906306aa", "6cfe1e553cb48c7087bb61e80031c415978a4ede", "0b23e4be50e710dd9d339fc64f025ba89cc002d3", "3ca5880e4fe23ec2ee8025ff6c121ebb5348c6fc", "548f7faddd750a642f95536a83ab5c2279c8bf33", "03e89626cbb864fb1243b4ee8b4037020a9250eb", "6f8fe3cbacb8436615e886b6188e2e62fd1a5b3c", "18e7ac5664caece8f826e7251673fd3946e653d0", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "e96fb5da6540f0b7bed8760fe81bcb116ac31240", "2a7056e53f29bc73471048a77b0c55ea4e92b897", "659bc8a947a4ea64b2ca38b7fa5a27233de2c1f8", "334ec6e57110ece9f482f9ec2e85412b0be8072a", "b643e5ce4ad3d2f674fc3f13e89bdaabf75fa066", "19aeb06b3ba5b454fc462254c178acdf233d955b", "b897c4c09b480f9934d5e9e4cfa2d540aaed522f", "8e3f04c9936949d13b9b1157857e66dd291c45d5", "2087f336b0eb38fc60a24a32ec0821fc8fe2b2aa", "377712ef264d63c97b341fb782037d063018305e", "2fea0c41dbd7878d6b285b9f3dc62e32adba94d6", "6db9824d4667b22310c51fe638403238f873e9f2", "2106bb4ef13e35ad7640376b0ba3b6261b82fe22", "7a5cf32d06c3b2e4f27bee372a53bdc2e8fcfbce", "415012ec86c7a6acebd34bf7eb02eff46dd96e68", "72880d15db2282512e5d3f0a3796b397d68cc7db", "26edca5c337b6b6ec4416356f270c35dc074057d", "33ae35cc24ef4303979b479671c2065256e1b3a7", "67b380be262b074323f050d59d6e4ca2e2b958ee", "7ab74b4e4c11626c2642fcb95342c9c318dbfdca", "287da0ab3c169c41433b0e5504161dfd1afbfa6c", "38a54f9bbbfc46599770a28999365144a273783f" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "06bf84f98e7dd39be8d96eb67bafbf56d61bc715", "1b80ae882afb809686f20765e4a42a5b99aa55de", "2d968ef0c5ad0cc6718e2f8b40ce7f4c323dbbdd", "0609c475dd31632b705e9fb9a603060a3ff2c46a", "104eb3716d8b2d7e6d69805357d5b7fb87caff3e", "3ffc94c7066b4856b1cfae99ab66cd20310e41dd", "bb52ff840b1b6e2144268e57c72118a49460d6f4", "0dc26ac58f3d59b99b225fedc906be0a637e6596", "5b2092b54860f134f78b2ec884c910750def71e6", "1bb07c114cb447552d36a95445cc207f496d85aa", "2065450d96aca38c79cad5172b58660765533650", "07f0e56d1c37c213cd5c617dbfba5a0549629a19", "f8eb4724ff1241a728786d30f6cf1bbb9f413e74", "3d22300111d1002612626834b38e956f2011a21f", "eeb5ec8d23124c4b352aa4168cb03f87f9480c92", "01fde8698110cf46ff48a17c65f2658dab4c323c", "4f91a5354dae88cdf38c54c658ed634580cae96a", "0f8bf827ce4d5468ad9353af96dffadbb31a0b7d", "1d08bb92568d98319634fe2409a9eab085d68b60", "5bddb52a9def1c1330e8139b8496fbb8bb8c5937", "a931091182afa9680d415c5c9e5f61ff3bcc48fe", "0a35c32ebc233556d11c2038f5a4362f2c40b2a9", "054bca7f2fa00c3d55f0e028b37513bebb9c4ea5", "30ba0dd406a6f22e2ff30a0bfd7d1377e672c1ba", "77a1532cb64eab28162a0277cde52b4b7eceda49", "70e77041b17af5b26146bc34003175d7cd389434", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "017aba316f6d8447a4e045d8ddd417456629031e", "4ea80cc42b2140a3bce7e64f49225323eaf56912", "30f52a79ff53f8969ffcba19013b4a43e629875f", "7cbbab21c6f6fb54a968005afa21468b825f1d1e", "178fc755cef313f3231f1fba183570c02d5e471f", "0790cd3d852a21b190c0d20593aa9293bc18f745", "06f16d9430d5f6213cf5399b167a3d989c3ff798", "6871b95c14dccca7636b498b5d363a743c5288e6", "6b6fae57882fd193461fca64654107068ce9fd9a", "c5dc96463e5ad4378277550f95aa86ee070d93ef", "1495c7daaba55dd2e68e026fc6c1848eee1ee710", "1bd2d9fb62832737735d011154834b7c80c7e50a", "52c2c050af5b32d4929b4b193967a3675d03aea0", "7f489d9801e0674f4436beb34ea8b8695050d5fe", "0cff564ecbf954a61327a944f605019ae38a0da5", "5ac7a4dca5509c9dee49d96b4c3c62cc1d0bb9dd", "76d58199db1ed2477d82b643f6b77db456138ca2", "4ea47f63c8b2a026a66566dd3f733d45e692d369", "0a289fd7b14345822b1acda6d82750b15d59663e", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "403e90c1b1037587d0deacf42dcf1a32da9f6c24", "05f70f429a7bf38efa9e457fd486cb862bd495be", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "0e7c0199bbb4533e8f074d914a45351d80e5cb55", "f1ff27bb802661483035f73020366eb72976eed4", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "30909df12b1b01760ae4c5406e15f302a6524446", "3b5b9cbd2f0cfd390eeb968c99266115cb2c9597" ], "paperAbstract": "Intel Software Guard Extensions (SGX) is a hardware-based trusted execution environment (TEE) that enables secure execution of a program in an isolated environment, an enclave. SGX hardware protects the running enclave against malicious software, including an operating system (OS), a hypervisor, and even low-level firmwares. This strong security property allows the trustworthy execution of programs in a hostile environment, such as a public cloud, without trusting anyone (e.g., a cloud provider) between the enclave and the SGX hardware. However, recent studies have demonstrated that enclave programs are vulnerable to an accurate controlled-channel attack: Since enclaves rely on the underlying OS, a curious or potentially malicious OS can observe a sequence of accessed addresses by intentionally triggering page faults. In this paper, we propose T-SGX, a complete mitigation solution to the controlled-channel attack in terms of compatibility, performance, and ease of use. T-SGX relies on a commodity component of the Intel processor (since Haswell), Transactional Synchronization Extensions (TSX), which implements a restricted form of hardware transactional memory. As TSX is implemented as an extension (i.e., snooping the cache protocol), any unusual event, such as an exception or interrupt, that should be handled in its core component, results in an abort of the ongoing transaction. One interesting property is that the TSX abort suppresses the notification of errors to the underlying OS, which means that the OS cannot know whether a page fault has occurred during the transaction. T-SGX, by utilizing such property, can carefully isolate effects of attempts to tap running enclaves, thereby completely eradicating the known controlled-channel attack. We have implemented T-SGX as a compiler-level scheme that automatically transforms a normal enclave program into a secured one. We not only evaluate the security properties of T-SGX, but also demonstrate that it applies to all the previously demonstrated attack targets including libjpeg, Hunspell, and FreeType. In addition, we evaluate the performance of T-SGX by porting ten benchmark programs of nbench to the SGX environment. The results are promising; that is, T-SGX incurs on average 50% runtime overhead, which is an order of magnitude faster than state-of-the-art mitigation schemes. \u2020 The two lead authors contributed equally to this work. \u22c6 The author did part of this work during an intership at Microsoft Research.", "pdfUrls": [ "https://www.internetsociety.org/sites/default/files/ndss2017_07-2_Shih_paper.pdf", "https://taesoo.gtisc.gatech.edu/pubs/2017/shih:tsgx.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/t-sgx-eradicating-controlled-channel-attacks-against-enclave-programs/", "http://iisp.gatech.edu/sites/default/files/images/ndss17_eradicating_controlled-channel_attacks.pdf", "https://taesoo.gtisc.gatech.edu/pubs/2017/shih:tsgx-slides.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fac7/fba7aad263c7d4cf5ce67b1d629935043e7a.pdf", "s2Url": "https://semanticscholar.org/paper/b0ac2616034f56ab1469afb935b55fe7e37f8f41", "sources": [ "DBLP" ], "title": "T-SGX: Eradicating Controlled-Channel Attacks Against Enclave Programs", "venue": "NDSS", "year": 2017 }, "b0e479c5dbc05b9eb4ed63747a5271c3e10ade1c": { "authors": [ { "ids": [ "26373868" ], "name": "Zhongqi An" }, { "ids": [ "2638943" ], "name": "Zhengyu Zhang" }, { "ids": [ "20638185" ], "name": "Qiang Li" }, { "ids": [ "2032859" ], "name": "Jing Xing" }, { "ids": [ "1684572" ], "name": "Hao Du" }, { "ids": [ "1718667" ], "name": "Zhan Wang" }, { "ids": [ "2462515" ], "name": "Zhigang Huo" }, { "ids": [ "1686247" ], "name": "Jie Ma" } ], "doi": "10.1109/CLUSTER.2017.69", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.69", "entities": [ "Attribute\u2013value pair", "Benchmark (computing)", "Communications protocol", "Data access", "Datapath", "Deployment environment", "Dynamic random-access memory", "Forwarding plane", "Java virtual machine", "Key-value database", "Memcached", "Middleware", "Pipeline (computing)", "Remote direct memory access", "Solid-state drive", "USB flash drive", "User space" ], "id": "b0e479c5dbc05b9eb4ed63747a5271c3e10ade1c", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "582-586", "journalVolume": "", "outCitations": [ "742c641506ac9efc3281af2effb31f2fb31b2dd4", "029e03cd045b1fcda76e4c469eedfa0470c79624", "78e042b06806df839e0b87d98a0eba9891ab3634", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "fd54293ccb8c629a5fd8c17584cd37121f399149", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "11c136aa1136ccf6ebbb23c3b3e1fbdd8447bb00", "7129b305ce45f83127e928e8510da9fae0783905", "29a1148d75878671dc3663bf480e33d7bd91597d", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "0276440f721b17ff77165f2b1ed24e029b9a2432", "1c82d6dd3fde20878f9500c31351a3ceb9c05a46", "f992322459aceac1567529e8091e5acae3fcb07f", "225603198cc415d363db8a8a2bd30b0df3c963b1" ], "paperAbstract": "In-memory key-value store is a crucial building block of large-scale web architecture. Given the growth of the data volume and the need for low-latency responses, cost-effective storage expansion and fast large-message processing are the major challenges. In this paper, we explore the design of key-value middleware that takes advantage of modern NVMe SSDs and RDMA interconnects to achieve high performance without excessive DRAM deployment. We propose an all-in-userland approach to improve the data plane efficiency. Both NVMe and RDMA are interfaced directly from the user-space for effective data access and tailored data management. We present a low-latency storage extension framework based on NVMe and a new design of JVM-aware Memcache protocol based on RDMA. To further accelerate large-message transfer, we provide a hybrid communication protocol fusing Eager and Rendezvous schemas, and a united I/O staging approach to achieve maximum latency hiding through pipelining. As the benchmarking results indicate, with the non-negligible JVM overhead taken into account, our solution obtains comparable communication performance with the RDMA-Memcached released by the OSU. For SSD-involved operations, the latency decreases by up to 31% compared to the kernel-based I/O processing.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.69" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b0e479c5dbc05b9eb4ed63747a5271c3e10ade1c", "sources": [ "DBLP" ], "title": "Optimizing the Datapath for Key-value Middleware with NVMe SSDs over RDMA Interconnects", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "b0fd7a0f70b64c06031bb915d9aedd44b6550b16": { "authors": [ { "ids": [ "1761499" ], "name": "Wei You" }, { "ids": [ "9957254" ], "name": "Peiyuan Zong" }, { "ids": [ "8336490" ], "name": "Kai Chen" }, { "ids": [ "34989133" ], "name": "XiaoFeng Wang" }, { "ids": [ "39591879" ], "name": "Xiaojing Liao" }, { "ids": [ "3400196" ], "name": "Pan Bian" }, { "ids": [ "36786892" ], "name": "Bin Liang" } ], "doi": "10.1145/3133956.3134085", "doiUrl": "https://doi.org/10.1145/3133956.3134085", "entities": [ "Common Vulnerabilities and Exposures", "Dangling pointer", "Data validation", "End-to-end principle", "Information extraction", "Linux", "Linux", "Memory corruption", "Natural language processing", "Reverse engineering", "Sanitization (classified information)", "Software bug", "Vulnerability (computing)" ], "id": "b0fd7a0f70b64c06031bb915d9aedd44b6550b16", "inCitations": [], "journalName": "", "journalPages": "2139-2154", "journalVolume": "", "outCitations": [ "6c0961e06bd10fd940513e7e385a91eb5681da61", "177356d35e6d2db31760d487614ecaf856f398bd", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "703dbb18e5c24dc546c66679deb677c66fd7b88d", "421ccb793d042f186e2031bd5a07a05b5a6f8162", "4b58bf3b886997d79ba469099e44c2fd5ef8c770", "6dc75e8cd10a1ee4ced29b779a1753bef574a45b", "2c067e092c35d71d23c09d9c09376aa5b684152c", "627bd11712f87bfa4a3668a717e72b237ab9e701", "0c3b9e8b3d75a914a0add39d017e2455e97d6cf1", "aad8c5ae265e8f645101245afb9d9c9cdf40b4ca", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "71691ee2dbe001d599334e5389d80dd32c44a74e", "2c115c1d0d73ec6dd19e048c7eec8970a643ba34", "4799e97e3e450259918f52df6bbe8d4a353bcbbe", "76bcb2112024eb75549d3781d1799ffee82e58f1", "5556995fb630c47805bbba560287ea59ce357fa1", "37089d3acfd7a4ffa7def419a39ebe663db39fda", "7d0577772fe06b773d359d1b4060fce92fd4948f", "85f5741fe838ed5de13031b9048a4953f1e3a62f", "c271535aa3e9a5cc7839543667017cb32ec9b94c", "5eab6c72ba39e0fea5c3aac0c2f5f9cc0a03eb0c", "1641e3de69af316ad202c74d52ecc6b55537d550", "dcddbe6e0bb64d4792610d08bae06f6c11aec0c7", "de71e2359995087b4ce7d46e4eb718c341c70ee0", "a1ce532bdbd81541680fb5390c09586feb23f5d9", "e80005855bffe792ffe4059779407ca7d291e02e", "537afee84424ade9e9e319dfb7efef12565e303b", "0c5de0e5cb46e862b933c6bd543cc15695506034", "1e73c2fa2709d3210c09f19933e99b71905364ab", "605d7b6721d46c8edbca63453d340a37f67c2ee5", "0b5b42425deb371d8dc60ac9b090c7232702370a", "84fceec78ecd77d3134bd8839b3eb2ea1ad474a5" ], "paperAbstract": "Patches and related information about software vulnerabilities are often made available to the public, aiming to facilitate timely fixes. Unfortunately, the slow paces of system updates (30 days on average) often present to the attackers enough time to recover hidden bugs for attacking the unpatched systems. Making things worse is the potential to automatically generate exploits on input-validation flaws through reverse-engineering patches, even though such vulnerabilities are relatively rare (e.g., 5% among all Linux kernel vulnerabilities in last few years). Less understood, however, are the implications of other bug-related information (e.g., bug descriptions in CVE), particularly whether utilization of such information can facilitate exploit generation, even on other vulnerability types that have never been automatically attacked.\n In this paper, we seek to use such information to generate proof-of-concept (PoC) exploits for the vulnerability types never automatically attacked. Unlike an input validation flaw that is often patched by adding missing sanitization checks, fixing other vulnerability types is more complicated, usually involving replacement of the whole chunk of code. Without understanding of the code changed, automatic exploit becomes less likely. To address this challenge, we present SemFuzz, a novel technique leveraging vulnerability-related text (e.g., CVE reports and Linux git logs) to guide automatic generation of PoC exploits. Such an end-to-end approach is made possible by natural-language processing (NLP) based information extraction and a semantics-based fuzzing process guided by such information. Running over 112 Linux kernel flaws reported in the past five years, SemFuzz successfully triggered 18 of them, and further discovered one zero-day and one undisclosed vulnerabilities. These flaws include use-after-free, memory corruption, information leak, etc., indicating that more complicated flaws can also be automatically attacked. This finding calls into question the way vulnerability-related information is shared today.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134085", "https://www.informatics.indiana.edu/xw7/papers/p2139-you.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b0fd7a0f70b64c06031bb915d9aedd44b6550b16", "sources": [ "DBLP" ], "title": "SemFuzz: Semantics-based Automatic Generation of Proof-of-Concept Exploits", "venue": "CCS", "year": 2017 }, "b11b0b6542d3cbc67f091603e89a7a24b4dc4126": { "authors": [ { "ids": [ "2702753" ], "name": "Sharad Chole" }, { "ids": [ "30305439" ], "name": "Andy Fingerhut" }, { "ids": [ "1781758" ], "name": "Sha Ma" }, { "ids": [ "39118448" ], "name": "Anirudh Sivaraman" }, { "ids": [ "3431317" ], "name": "Shay Vargaftik" }, { "ids": [ "40194347" ], "name": "Alon Berger" }, { "ids": [ "37939768" ], "name": "Gal Mendelson" }, { "ids": [ "2587719" ], "name": "Mohammad Alizadeh" }, { "ids": [ "1897595" ], "name": "Shang-Tse Chuang" }, { "ids": [ "1785739" ], "name": "Isaac Keslassy" }, { "ids": [ "1705969" ], "name": "Ariel Orda" }, { "ids": [ "2112077" ], "name": "Tom Edsall" } ], "doi": "10.1145/3098822.3098823", "doiUrl": "https://doi.org/10.1145/3098822.3098823", "entities": [ "Central processing unit", "Centralisation", "Compile time", "Compiler", "Crossbar switch", "Network switch", "Processor design", "RateMyTeachers.com", "TRAVERSE", "Throughput" ], "id": "b11b0b6542d3cbc67f091603e89a7a24b4dc4126", "inCitations": [ "571a253f7c5ed3517657ce8a49c25f0ebccc3d79", "a4e4aec44dd4b205ff2befb8af8c5a25db720681" ], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "0130c8c2c9bc7f64d9bf0aee5e0704bbeadfe9f3", "1447be3d899115a834874e585256360911036a4d", "0a59166593f0a2fa260f16fd853299c9f0863fdf", "03e38df6f8924b4efa28808768239ae953004df3", "2077579d62fc090d4ddf45f107ffae0468936165", "0847dfb07af6685f96b885a93c2cd4dfb4c3d1a9", "f535a5fcb9bfb79ce987b20fbdb80973672720f2", "52e66a4eff2ee35b147d18c6bd5ba31737aedeff", "022a90fa7d2b9b02c0a4888a8a9c3dbfa6767a12" ], "paperAbstract": "We present dRMT (disaggregated Reconfigurable Match-Action Table), a new architecture for programmable switches. dRMT overcomes two important restrictions of RMT, the predominant pipeline-based architecture for programmable switches: (1) table memory is local to an RMT pipeline stage, implying that memory not used by one stage cannot be reclaimed by another, and (2) RMT is hardwired to always sequentially execute matches followed by actions as packets traverse pipeline stages. We show that these restrictions make it difficult to execute programs efficiently on RMT.\n dRMT resolves both issues by disaggregating the memory and compute resources of a programmable switch. Specifically, dRMT moves table memories out of pipeline stages and into a centralized pool that is accessible through a crossbar. In addition, dRMT replaces RMT's pipeline stages with a cluster of processors that can execute match and action operations in any order.\n We show how to schedule a P4 program on dRMT at compile time to guarantee deterministic throughput and latency. We also present a hardware design for dRMT and analyze its feasibility and chip area. Our results show that dRMT can run programs at line rate with fewer processors compared to RMT, and avoids performance cliffs when there are not enough processors to run a program at line rate. dRMT's hardware design incurs a modest increase in chip area relative to RMT, mainly due to the crossbar.", "pdfUrls": [ "http://inat.lcs.mit.edu/papers/drmt_sigcomm_17.pdf", "http://www-users.cselabs.umn.edu/classes/Fall-2017/csci8211/Papers/SDN%20Data%20Plane%20dRMT-%20Disaggregated%20Programmable%20Switching.pdf", "http://doi.acm.org/10.1145/3098822.3098823" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b11b0b6542d3cbc67f091603e89a7a24b4dc4126", "sources": [ "DBLP" ], "title": "dRMT: Disaggregated Programmable Switching", "venue": "SIGCOMM", "year": 2017 }, "b19aa8763db92410c9b75e2993cb42890506cfbb": { "authors": [ { "ids": [ "1776730" ], "name": "Shigeru Imai" }, { "ids": [ "35092032" ], "name": "Stacy Patterson" }, { "ids": [ "1723989" ], "name": "Carlos A. Varela" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Cost efficiency", "Data rate units", "Download", "Experiment", "Operability", "Provisioning", "Scheduling (computing)", "Service-level agreement", "Simulation", "Stream processing", "Throughput", "Virtual machine" ], "id": "b19aa8763db92410c9b75e2993cb42890506cfbb", "inCitations": [ "8a03185e29cf5d2dc0121228235d84a8becb1bfc", "3dde82d20d4d1d5c6553a423576f2f5cd495f1bd" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "504-513", "journalVolume": "", "outCitations": [ "16287f07e76eaec9cfe06c76c859161b2607e7ef", "2afc4a74b85eb9e0330562f9d8690e896d5e1e92", "69884f09be947c43e1029bb3ddc95db5edc2a03d", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "4588d8b534b836fe7a2ae87a2626d7689012a204", "24251f02c34f32b1dd96572a1d984c4463a26a10", "a6c6027b69591000cab174221634cd916697286c", "ac6cdf984e241a2a924cea4f16f6c30139a6d859", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "37553adac0c8ca2a52f4a0fd5a400c9c41225183", "1f1b9d8c567145d541126d266dbf7b577f52e706", "e0b3d5095ca65792b0ae77417c66578c0253d1aa", "9cac99ee880b6e06a105bbe2327a34f5d2f86ad4", "7d21b0e467734154519df84a08d18967597397bd", "9f948448e7a5f0cc94cd53656410face8b31b18a", "04bda1bfbb271b4d132fd326c79cd7e0a6961fd3", "aedfcd490ec52bb1e7d31dd3d24cbf8d7e4da4a3", "2a59510b3adea158588928cd813915dcd7bc0fad", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "b4520f4254516768ce4c62a735b2ffd389084358", "e847c3ec130da57328db79a7fea794b07dbccdd9", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "4a520c1818fc7ca560331234f6bee68d4d8bd302" ], "paperAbstract": "In cloud-based stream processing services, the maximum sustainable throughput (MST) is defined as the maximum throughput that a system composed of a fixed number of virtual machines (VMs) can ingest indefinitely. If the incoming data rate exceeds the system's MST, unprocessed data accumulates, eventually making the system inoperable. Thus, it is important for the service provider to keep the MST always larger than the incoming data rate by dynamically changing the number of VMs used by the system. In this paper, we identify a common data processing environment used by modern data stream processing systems, and we propose MST prediction models for this environment. We train the models using linear regression with samples obtained from a few VMs and predict MST for a larger number of VMs. To minimize the time and cost for model training, we statistically determine a set of training samples using Intel's Storm benchmarks with representative resource usage patterns. Using typical use-case benchmarks on Amazon's EC2 public cloud, our experiments show that, training with up to 8 VMs, we can predict MST for streaming applications with less than 4% average prediction error for 12 VMs, 9% for 16 VMs, and 32% for 24 VMs. Further, we evaluate our prediction models with simulation based elastic VM scheduling on a realistic workload. These simulation results show that with 10% over provisioning, our proposed models' cost efficiency is on par with the cost of an optimal scaling policy without incurring any service level agreement violations.", "pdfUrls": [ "http://wcl.cs.rpi.edu/papers/ccgrid2017.pdf", "http://dl.acm.org/citation.cfm?id=3101181" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b19aa8763db92410c9b75e2993cb42890506cfbb", "sources": [ "DBLP" ], "title": "Maximum Sustainable throughput Prediction for Data Stream Processing over Public Clouds", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "b19cb8fb5bcc8e91b630cb5a216a763e53b46cbc": { "authors": [ { "ids": [ "2717314" ], "name": "Zhipeng Li" }, { "ids": [ "7869811" ], "name": "Yinlong Xu" }, { "ids": [ "1904670" ], "name": "Yongkun Li" }, { "ids": [ "3450470" ], "name": "Chengjin Tian" }, { "ids": [ "24707565" ], "name": "Youhui Bai" } ], "doi": "10.1109/ICPP.2017.49", "doiUrl": "https://doi.org/10.1109/ICPP.2017.49", "entities": [ "4chan", "Algorithm", "Digital footprint", "Experiment", "Linux", "Multidimensional scaling", "Parity bit", "RAID", "Response time (technology)", "Scalability" ], "id": "b19cb8fb5bcc8e91b630cb5a216a763e53b46cbc", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "402-411", "journalVolume": "", "outCitations": [ "03e9c5c4e1ae4fa725c1b10035741be4e65aac33", "9fb3e1391772ffc3bfbab3c5728a14466ca771b2", "130d811ffc2daff43c5203471b70f3eada0f57d1", "499b458ddd5d10dc4be158fd89aeed0e31b6b5cf", "0f5f084e7936796f2e7e284ec2b42b252a6af776", "5d796dce6ad1ab03522026f08b78a864470c51de", "1886edb4e771c1c0aa7bae360d7f3de23ac4ac8e", "1f1269db397595f5b5a08eb2e65022e9a8759648", "effbc4dd4d75631c80baa33da728f95abcf1aab0", "93f7ae572cb220afbc0b45e64f5e6bd17bf564b9", "162f1c3495c80e49246666986508e5829e269cc3", "b4a12be0d7ba62e364227791db38021c488a758c", "b4201b4f8d323990012f97814a4fbd7cf6ecd000", "fe18587396fe3107db23626ba91a9b358e5b41c9", "ec45237211873cc7b833d74535778e62070db737", "0f6a32792d0882db35fe9391445d4322232b619e" ], "paperAbstract": "Parity declustering is widely deployed in erasure coded storage systems so as to provide fast recovery and high data availability. However, to perform scaling on such RAIDs, it is necessary to preserve the parity declustered data layout so as to guarantee the RAID performance after scaling. Unfortunately, existing scaling algorithms fail to achieve this goal so they can not be applied for scaling RAIDs which have deployed parity declustering. To address this challenge, we develop an efficient scaling algorithm called PDS (Parity Declustering Scaling). In particular, we first employ an auxiliary Balanced Incomplete Block Design (BIBD) to define the data migrations during scaling so as to preserve parity declustered data layout, and then define the addressing algorithm in the scaled system based on the migrations. We provide theoretical proofs to show that PDS preserves the parity declustered data layout, which is the basis for scaling RAIDs with parity declustering, and also theoretically prove that PDS achieves the even distribution of data/parity blocks after scaling and requires only the minimal data migrations. To show the performance of PDS, we implement it in MD in Linux Kernel, and conduct experiments with real-world traces. Results show PDS can reduce 89.70% of data migration time and 24.44% of user response time during scaling on average, compared with the round-robin scheme.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.49" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b19cb8fb5bcc8e91b630cb5a216a763e53b46cbc", "sources": [ "DBLP" ], "title": "PDS: An I/O-Efficient Scaling Scheme for Parity Declustered Data Layout", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "b1a90d1ce4403aeaec632f38588ed7f8ee271813": { "authors": [ { "ids": [ "2729527" ], "name": "Bharat Sukhwani" }, { "ids": [ "2477992" ], "name": "Thomas Roewer" }, { "ids": [ "2809535" ], "name": "Charles L. Haymes" }, { "ids": [ "2306673" ], "name": "Kyu-hyoun Kim" }, { "ids": [ "16853118" ], "name": "Adam J. McPadden" }, { "ids": [ "2299788" ], "name": "Daniel M. Dreps" }, { "ids": [ "35138890" ], "name": "Dean Sanner" }, { "ids": [ "2513955" ], "name": "Jan van Lunteren" }, { "ids": [ "2321017" ], "name": "Sameh W. Asaad" } ], "doi": "10.1145/3123939.3124535", "doiUrl": "https://doi.org/10.1145/3123939.3124535", "entities": [ "CAS latency", "Computation", "DIMM", "Data buffer", "End system", "End-to-end encryption", "Field-programmable gate array", "Flash memory", "Gigabyte", "Magnetoresistive random-access memory", "Memory bus", "NVDIMM", "PCI Express", "Pin compatibility", "Prototype", "Registered memory", "Server (computing)" ], "id": "b1a90d1ce4403aeaec632f38588ed7f8ee271813", "inCitations": [], "journalName": "", "journalPages": "15-26", "journalVolume": "", "outCitations": [ "7c3ac5c755873cab6ae1120efa8606f93033b259", "9071886ada746122ec49ef0338c011f4ce1aa060", "8e8e622d5fab4c1d2a5bc7783db84e62cc570f9a", "9725f9d16b4dca9f6c558c87adc6a10125013077", "2960c89331eb7afa86584792e2e11dbf6a125820" ], "paperAbstract": "We demonstrate the use of an FPGA as a memory buffer in a POWER8® system, creating a novel prototyping platform that enables innovation in the memory subsystem of POWER-based servers. Our platform, called ConTutto, is pin-compatible with POWER8 buffered memory DIMMs and plugs into a memory slot of a standard POWER8 processor system, running at aggregate memory channel speeds of 35 GB/s per link. ConTutto, which means \"with everything\", is a platform to experiment with different memory technologies, such as STT-MRAM and NAND Flash, in an end-to-end system context. Enablement of STT-MRAM and NVDIMM using ConTutto shows up to 12.5x lower latency and 7.5x higher bandwidth compared to the respective technologies when attached to the PCIe bus. Moreover, due to the unique attach-point of the FPGA between the processor and system memory, ConTutto provides a means for in-line acceleration of certain computations on-route to memory, and enables sensitivity analysis for memory latency while running real applications. To the best of our knowledge, ConTutto is the first ever FPGA platform on the memory bus of a server class processor.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124535" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b1a90d1ce4403aeaec632f38588ed7f8ee271813", "sources": [ "DBLP" ], "title": "Contutto: a novel FPGA-based prototyping platform enabling innovation in the memory subsystem of a server class processor", "venue": "MICRO", "year": 2017 }, "b1ad9de21353e10c0ebfabbbde6259cc045b367f": { "authors": [ { "ids": [ "36737765" ], "name": "Mahmoud Awad" }, { "ids": [ "1758079" ], "name": "Daniel A. Menasc\u00e9" } ], "doi": "10.1145/3030207.3030208", "doiUrl": "https://doi.org/10.1145/3030207.3030208", "entities": [ "Apache OFBiz\u00ae", "Black box", "Box modeling", "Computer", "Concurrency (computer science)", "Estimation theory", "Linear programming", "Nonlinear programming", "Nonlinear system", "Numerical analysis", "Operational system", "Optimization problem", "Program optimization", "Value (ethics)" ], "id": "b1ad9de21353e10c0ebfabbbde6259cc045b367f", "inCitations": [], "journalName": "", "journalPages": "127-138", "journalVolume": "", "outCitations": [ "39beb15a4ce5e02a4ca40596cc7a1713dc2d069c", "44b7abdcaf8534746c19778e3a5dd433f82fb684", "37537b391f1b5530838561e773bd6ca54ab091f3", "7136aa5e991b31b5b05c67a8e2497e6a4923a335", "15b07fc3bdcc7c98ed247604d270fed0cdf0d7e0", "727f947e8f1db1eb30df2a0fed5f004d5c2b6761", "195dd12dd6c13a23a0e963988f269b578179ad3c", "30383844c77e557458e56fdb840d05d21e3d3563", "b11d03358596970e599ec768ce3228dc1f06fc38", "0a679469a275d81b1851d6293476cffc3855a76f", "8d5be262a6b469fa6ce94db09cbdabb5ecdce488", "1833dee660500dd104ca84d99600b70c2479ba3c", "be238ca0134fb703da3314a5f04d883dceb2b0d3", "3ecde367b1dcb682449fe8ab72a30a80567d2475", "27e1d6f1af27b1ffdea30860180da7cbbecc8135", "5df0b27aa354ae871fb91403167c2716870426e0", "72f6c921afd9056c03dc6db0151b539e3ae699a3", "e0b0b8298c40102d8c5d4704d7ffd7f2300b9602", "e51ad61e6b9b6e9f56af8c331f50ec2c936c26b1", "0019a2539997a8c38523268b4c03d0316457654d", "05d0f7c0778e60098a5d13168596d0609d670e8c" ], "paperAbstract": "Black-box modeling techniques are used when modeling computer systems with unknown internal structure or behavior and/or when it is not feasible or too time consuming to monitor a running computer system. The main challenge in these situations lies in estimating values for the parameters of these models, especially the values of service demands at the various devices for each transaction class. These estimates have to be compliant with the input-output relationships observed through measurements. This means that solving a model of the system with the estimated parameters should yield the same outputs (e.g., response times) for the same inputs (e.g., arrival rates or concurrency level). This paper presents a method for automatically estimating service demands for open, closed, single and multiclass queuing networks (QN). The method is based on casting the estimation problem as a non-linear optimization problem. However, because the solution of closed QNs does not have a closed form, we need to resort to black-box optimization techniques. The parameter estimation method presented here is part of iModel, a framework for automatically deriving performance models of systems whose detailed characteristics (structure and behavior) are unknown. Other portions of the framework were discussed in detail in previous publications by the authors. This paper illustrates the ideas through several numerical examples and then applies them to a multi-tiered operational system running OFBiz. The estimated service demands closely satisfy the input-output relationships at various workload intensity levels and can be used for prediction purposes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030208" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b1ad9de21353e10c0ebfabbbde6259cc045b367f", "sources": [ "DBLP" ], "title": "Deriving Parameters for Open and Closed QN Models of Operational Systems Through Black Box Optimization", "venue": "ICPE", "year": 2017 }, "b21d5b0e2610a06054f16b2baade8a765ade1623": { "authors": [ { "ids": [ "1837620" ], "name": "Ate Penders" }, { "ids": [ "1784556" ], "name": "Ana Lucia Varbanescu" }, { "ids": [ "2051828" ], "name": "Gregor Pavlin" }, { "ids": [ "1754743" ], "name": "Henk J. Sips" } ], "doi": "10.1145/3030207.3044532", "doiUrl": "https://doi.org/10.1145/3030207.3044532", "entities": [ "Decision support system", "Hoc (programming language)", "Requirement", "Scalability" ], "id": "b21d5b0e2610a06054f16b2baade8a765ade1623", "inCitations": [], "journalName": "", "journalPages": "179-187", "journalVolume": "", "outCitations": [ "35f00eb1d7662ebb298d9128b3f9eb8263e8578f", "062d84d3b29c07bb936a17ccf7cc6ac17bccbebe", "a63b5fd83511a0081521d27af31687f94d7ee04a", "a11c69a6331cd1c1e13dc43335c22bdf6f6aab24", "208aee805004d575035284b1c232209e9fa26b0e", "a8385230a613ee42aa927b41b121bb2cf2ba9305", "8a33c47c2a3f0e46dbb30f5203b6a1c6d8fefd8f" ], "paperAbstract": "Many situations in the security domain require decision-making based on complex data, i.e., many variables which need to be taken into account before adequate decisions can be made. For example, in a surveillance scenario, the size and complexity of the area of interest, the mix of objects, and the unexpected behavior of suspects are just a few examples of complex variables to be analyzed in the process. Existing decision support systems provide some analysis, but are typically limited in the complexity they can handle. Therefore, users end up with simplified models which often suffer in the accuracy of their decisions and, ultimately, may lead to incorrect decisions. In this work, we present a framework that can scale to cope with the complexity and time requirements of real-world scenarios, while remaining flexible to handle the ad-hoc adaptation to the situation. We discuss the challenges and solutions for such a scalable and flexible system, and validate it using a target tracking scenario in urban environments of different sizes.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3044532" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b21d5b0e2610a06054f16b2baade8a765ade1623", "sources": [ "DBLP" ], "title": "A Performance-centric Approach for Complex Decision Support", "venue": "ICPE", "year": 2017 }, "b22ca0c29797cedb2274efe35d3bcf98364856c6": { "authors": [ { "ids": [ "7353330" ], "name": "Jalil Boudjadar" }, { "ids": [ "1943522" ], "name": "Simin Nadjm-Tehrani" } ], "doi": "10.1145/3030207.3030233", "doiUrl": "https://doi.org/10.1145/3030207.3030233", "entities": [ "Cache (computing)", "Computer performance", "Dynamic random-access memory", "Embedded system", "File system permissions", "Interference (communication)", "Multi-core processor", "Preemption (computing)", "Real-time computing", "Scheduling (computing)", "Shared memory", "Symmetric multiprocessing", "UPPAAL" ], "id": "b22ca0c29797cedb2274efe35d3bcf98364856c6", "inCitations": [ "c5c063230cf6464c449a0ead22f8575af7028bd1" ], "journalName": "", "journalPages": "263-274", "journalVolume": "", "outCitations": [ "01eb0a3e704fd47e9d578dd6c75173fc41a6b400", "d93cb11736a6d3633aad4fede0db0b7293dc2e4b", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "23c8c65aa8104f04b03f954d1e5b34a542a7aac5", "007394c2bae389cf43e46db4567dafe206355c25", "2b85286357f0f1de32fcc18d180961f81be75891", "b736680f159329169fe4e146b52410117e142f94", "23474d9b1d7bd02c14a17888f19e51286aed84b0", "2cd283ba2b8472f960c1a9925610c96ff24a9ea3", "363a463edc875aaaf39a4ba2b20e8ee24ece88dc", "867822e5a25ae6d17e2499f27d728a8bfef14303", "a77e185375d865c390a07456e4ffb629cf17b0ae", "0593d5d8fc1ad2eb37f89a7d87065b38a280e00d", "5dcd9d16915e9339875da0b3fc270aa508d5eeac", "4c58a77eae672246f59409895d751f59dfbd8538", "10be1fd8bbccd883fd9bacf147cd68c10542892b", "15af93f7b51abbc21a985c878bd5e2559f4c5de7", "0ac1db0e0fd8401612418d2c77e6f3b64017d175", "65a6478b45537293fc7eef6925b05df7e7b23db3", "0fb953b5765af690e1e0e308d19e42733c96b798", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "4c078fd1f07b9e8b65e27ecd9790e9f4fc579af1", "9914d716a03721e20f6fa085f20e271a111259c4", "aef4edfc05fbe978cc558398cf8de95e06f22f50", "5dfca9908c4e10dbea480bacc5142b83cae2c061", "9de8612ea665496df9986abc02d06d7d4b0d5cf7", "4e7c251022412959a32679353bd0b90963338a05", "1a7444cc5327f7b48ada3219b641dbf0b5d6706c", "85398d5f19157c91bf00da3d36210e72d57887e4", "1ed99e4224e9d914d8ec1b70796f7470f58d87d3", "13311075ba146d37bfc68a3ac8c436614ee4551b" ], "paperAbstract": "Today's embedded systems demand increasing computing power to accommodate the ever-growing software functionality. Automotive and avionic systems aim to leverage the high performance capabilities of multicore platforms, but are faced with challenges with respect to temporal predictability. Multicore designers have achieved much progress on improvement of memory-dependent performance in caching systems and shared memories in general. However, having applications running simultaneously and requesting the access to the shared memories concurrently leads to interference. The performance unpredictability resulting from interference at any shared memory level may lead to violation of the timing properties in safety-critical real-time systems. In this paper, we introduce a formal analysis framework for the schedulability and memory interference of multicore systems with shared caches and DRAM. We build a multicore system model with a fine grained application behavior given in terms of periodic preemptible tasks, described with explicit read and write access numbers for shared caches and DRAM. We also provide a method to analyze and recommend candidates for task-to-core reallocation with the goal to find schedulable configurations if a given system is not schedulable. Our model-based framework is realized using Uppaal and has been used to analyze a case study.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030233", "https://research.spec.org/icpe_proceedings/2017/proceedings/p263.pdf", "http://www.ida.liu.se/labs/rtslab/publications/2017/Boudjadar_ICPE.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b22ca0c29797cedb2274efe35d3bcf98364856c6", "sources": [ "DBLP" ], "title": "Schedulability and Memory Interference Analysis of Multicore Preemptive Real-time Systems", "venue": "ICPE", "year": 2017 }, "b243c5c5fe90cb485d06ba6e89c2b977d51bcc39": { "authors": [ { "ids": [ "27024140" ], "name": "Felix Fischer" }, { "ids": [ "29905536" ], "name": "Konstantin Bottinger" }, { "ids": [ "36227053" ], "name": "Huang Xiao" }, { "ids": [ "3447368" ], "name": "Christian Stransky" }, { "ids": [ "3224778" ], "name": "Yasemin Acar" }, { "ids": [ "40387446" ], "name": "Michael Backes" }, { "ids": [ "2200198" ], "name": "Sascha Fahl" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Application security", "Code reuse", "Computer security", "Cut, copy, and paste", "Gradient", "Gradient", "Gradient descent", "Information security", "Information source", "Paste", "Paste substance", "Philosophy of mind", "Play Store", "Scanning", "Software developer", "Solutions", "Stack Overflow", "Static program analysis", "Stochastic gradient descent", "benefit" ], "id": "b243c5c5fe90cb485d06ba6e89c2b977d51bcc39", "inCitations": [ "5ea9f80d7fabe983237f1d1228beb4aa8d7ad960", "cf5927dded491e20fc140215577c5d25957d169b", "4b8c9eec7737918e3897f7ebcbc66cff2f3087b2", "d3a53dd08418d08ca9860b89291581b1e5fca0a9", "12036e4300ec545866efa64bd97492983cb81ab0", "2c0e0537ecf9f9b3b12527daba73b1bf98df10b4", "80dc1bcd0d0479b271021a1aa481258669bbc173", "2ae81140776144829d4065e148874bded8d1e130", "27149f77c76ba61388a41853835c903a7d4d14fb", "faf43dc5d23621bfe89a98082f88263686b4d0b5", "558eab42b1c2c85846b5f8cef857019a98748528", "0620e2b10f91951b0399f4c17cdd3fa7be5516ac" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "11a732848cbbdad81e660c1fc7c4a88d2d9c1d6b", "f34682e88b66ea5aa75481cfb6ce8700f4115ec4", "03a613951421cf67237d5278d6bf3702a26da9aa", "6209474964e02ba34ed539cf3cee8044048a1bda", "4b672d2682dd9ee92aa0c26b27827ec327b55adb", "023f23c300804754753cb11db51fb7f582556ab7", "7fdd4a76b2fcf90a4173a1c70ae75cf51c6525c9", "565675505369308ca04468628cf9663a9d92931c", "77122542dfb5772b394cfc95acef885583f6414d", "582302da008255ff515f05c3242f750878725745", "71f8163801980fbaa494cb8c149bd7388034c2ba", "00df34b25525997c3cba77c6c84ee9c859cdaae9", "0a4d4d227997b70933bf4af8016062caa048d985", "6a51d267dbd1567c5913c171b2d9f960407a1ae8", "7f2eae392eb9c697bc39f6581770c7ad05ae5ef3", "691bb92ffd229e2cce7c42c1ede818915afa73ee", "451b61b390b86ae5629a21461d4c619ea34046e0", "f6bbbf2cc785cf96019dcd9c41ab1801aad962dd", "24c223b3ee1b3198a021a2dcbf437136c80442ae", "f0987b07fe295b47b1d9a825daca9dde152450a3", "16cdcedec9a5fc51d89591034908b8580b911b7c", "b6f5251a67c5cf8539c0213c387a583cdcefd493", "0fa3eabd538d777556f3e87399959d05cefa1f69", "a851b38c914b5c005e2be8b43c2d17c98e09f7c1", "23fa7b866a1b1fee7bb71c8b5a9235cca7120bbc", "f60628636b64c187db1f106823f5af5730b973cd", "a411c30fa4acb68b309a21167554bd97632968fa", "6d59f58f7408362036196048c9ba11f399dd9bc2", "0774d20b08ba0ea6cc8f98ccf9caed5f337fcf22", "fbe2afdb72bd3b37c8aee4f3d209f7143ec9484d", "c93057aec13d8b1557ef1f6f68331cd3aef280c8", "d67fb709dda8c56f43463a432e15a61d04dc56c8", "48fc8f1aa0b6d1e4266b8017820ff8770fb67b6f", "6a74a8573cb1bd15c5f4fa4e047613d2340e61b9", "ea93642d3394b0ba1d5144835ce747a74457fe4f", "771821636c87f9b338e20c35674116e1b99bade6", "23c63a0b251eb3e11b1f4c2c6733261a2f765f54", "1a0ca2cf71616fa492ebe611f1d83261d2ecf052", "9048746d108e983626c3405ad1775bbcc3933c6d", "cf5dfdbb0b8d1c673726178f37f499b77fcc7f03" ], "paperAbstract": "Online programming discussion platforms such as Stack Overflow serve as a rich source of information for software developers. Available information include vibrant discussions and oftentimes ready-to-use code snippets. Previous research identified Stack Overflow as one of the most important information sources developers rely on. Anecdotes report that software developers copy and paste code snippets from those information sources for convenience reasons. Such behavior results in a constant flow of community-provided code snippets into production software. To date, the impact of this behaviour on code security is unknown. We answer this highly important question by quantifying the proliferation of security-related code snippets from Stack Overflow in Android applications available on Google Play. Access to the rich source of information available on Stack Overflow including ready-to-use code snippets provides huge benefits for software developers. However, when it comes to code security there are some caveats to bear in mind: Due to the complex nature of code security, it is very difficult to provide ready-to-use and secure solutions for every problem. Hence, integrating a security-related code snippet from Stack Overflow into production software requires caution and expertise. Unsurprisingly, we observed insecure code snippets being copied into Android applications millions of users install from Google Play every day. To quantitatively evaluate the extent of this observation, we scanned Stack Overflow for code snippets and evaluated their security score using a stochastic gradient descent classifier. In order to identify code reuse in Android applications, we applied state-of-the-art static analysis. Our results are alarming: 15.4% of the 1.3 million Android applications we analyzed, contained security-related code snippets from Stack Overflow. Out of these 97.9% contain at least one insecure code snippet.", "pdfUrls": [ "https://arxiv.org/pdf/1710.03135v1.pdf", "https://www.aisec.fraunhofer.de/content/dam/aisec/Dokumente/Publikationen/Studien_TechReports/englisch/stackoverflow.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b243/c5c5fe90cb485d06ba6e89c2b977d51bcc39.pdf", "s2Url": "https://semanticscholar.org/paper/b243c5c5fe90cb485d06ba6e89c2b977d51bcc39", "sources": [], "title": "Stack Overflow Considered Harmful? The Impact of Copy&Paste on Android Application Security", "venue": "", "year": 2017 }, "b2b0c5e447f4a6c30e0517ea0c12359afd704a02": { "authors": [ { "ids": [ "40202773" ], "name": "Fulya Kaplan" }, { "ids": [ "3133439" ], "name": "Ozan Tuncer" }, { "ids": [ "2667137" ], "name": "Vitus J. Leung" }, { "ids": [ "3490717" ], "name": "Scott K. Hemmert" }, { "ids": [ "1809774" ], "name": "Ayse Kivilcim Coskun" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Bisection bandwidth", "Circulant matrix", "Distributed computing", "DragonFly BSD", "Memory management", "Message Passing Interface", "Message passing", "Network planning and design", "Network topology", "Routing", "Simulation" ], "id": "b2b0c5e447f4a6c30e0517ea0c12359afd704a02", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "325-334", "journalVolume": "", "outCitations": [ "37f8740b10aba04ddb3bf0f59d78b6690810f737", "f3f06def344f00f23a59400d0d4b57ba4b62532f", "2d9df0c9ae1f4e120e11c6b201b11d6156123188", "9d30381c49afa033eacc04fb68975762eb7bafab", "5f8991828def57d2f0cda942566afff56740d150", "039104de40663d64bd46f4a2a227f741f1b835e6", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "0fdc552feb374a899b627b7e32b468f5f0d2dd3f", "15c0ff1ace0798e8ef9767a76f90a32d1ee3ee8b", "29c1001fbf50322f756ec35282859755e177fe8d", "31cfefc79d64ede4c13f231b8b30ebfd45666d3d", "197181eabcad2e6a481d7e5a4ba836f00ff6ecac", "4654de106f5fd7caf1aab17468fad46a525c9da2", "54ade7ae16c9495b238d260de2de79c1a588453f", "317b05a4e53fa3e7150bcc5bd65e2bdd6502b0ec", "ec78bf9c78935b02212803858527546654ce8a90", "18a8ab664b3ee23504c302640e5792202bafe401", "628aa212662145d5a5bdb517151285057a4b1e78", "75d69c48f94d16eccf1f8c253ac1ab31fa8c0c35", "cd9b00b82be34d0a408cf7389b00f31c5d061b32", "3a532ca7b36b90e3057d159419e10f565be11cc5", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "c96d3e9fef5f39b4fc720f4d4d4da13b82472c5b", "f271c36891cf15f9fda3d49b46b187ca8685bf48", "e772ca79418b5f5bae353bef93ec3064443224b1", "6069f23cf3e413a6ce60bec60acb60952d88cc95", "415c7835aa18984d92086edbb9d9937fcdd0a6eb", "3ec4cf958f6ee00dc00aa14840c96268c4c3f9c9", "6270e8d66431132aac41cbc7ace0f91248c716ab", "295f89863a7d6a4d8a71ec5c98dec4c039ba9200", "4110d5ad162fbf43a3418f28b4d46609c2a147be" ], "paperAbstract": "Network messaging delay historically constitutes a large portion of the wall-clock time for High Performance Computing (HPC) applications, as these applications run on many nodes and involve intensive communication among their tasks. Dragonfly network topology has emerged as a promising solution for building exascale HPC systems owing to its low network diameter and large bisection bandwidth. Dragonfly includes local links that form groups and global links that connect these groups via high bandwidth optical links. Many aspects of the dragonfly network design are yet to be explored, such as the performance impact of the connectivity of the global links, i.e., global link arrangements, the bandwidth of the local and global links, or the job allocation algorithm. This paper first introduces a packet-level simulation framework to model the performance of HPC applications in detail. The proposed framework is able to simulate known MPI (message passing interface) routines as well as applications with custom-defined communication patterns for a given job placement algorithm and network topology. Using this simulation framework, we investigate the coupling between global link bandwidth and arrangements, communication pattern and intensity, job allocation and task mapping algorithms, and routing mechanisms in dragonfly topologies. We demonstrate that by choosing the right combination of system settings and workload allocation algorithms, communication overhead can be decreased by up to 44%. We also show that circulant arrangement provides up to 15% higher bisection bandwidth compared to the other arrangements, but for realistic workloads, the performance impact of link arrangements is less than 3%.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101158" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b2b0c5e447f4a6c30e0517ea0c12359afd704a02", "sources": [ "DBLP" ], "title": "Unveiling the Interplay Between Global Link Arrangements and Network Management Algorithms on Dragonfly Networks", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "b30a10f682301c03a282129ada222ed048888481": { "authors": [ { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "3209205" ], "name": "Xiang Ni" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" }, { "ids": [ "1731961" ], "name": "Laxmikant V. Kal\u00e9" } ], "doi": "10.1109/IPDPS.2017.91", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.91", "entities": [ "Fat tree", "Interference (communication)", "Jumpstart Our Business Startups Act", "Mesh networking", "Router (computing)", "Spatial variability", "Supercomputer" ], "id": "b30a10f682301c03a282129ada222ed048888481", "inCitations": [ "df76dfe0724ab41d98673d13f472d9b27eada485" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "439-448", "journalVolume": "", "outCitations": [ "75d69c48f94d16eccf1f8c253ac1ab31fa8c0c35", "42e5e97272ad8728749f861ed7a920707e698778", "c64582aec1819679d48f0acf5a8d4edca5ce74cb", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "5f8991828def57d2f0cda942566afff56740d150", "6bad177eb5fc0fd7ea223149cec4a76d8567479a", "33715194bf741fe17d6f6b9559af694907c26d2a", "0a9c8fef61634e392f9de6f34361cc1c690f7a00", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "c39c26d510c1a965c5f132edc989a598ca92b700", "18fbcb1de113f5d60c8e81566231a0ecea46f3fe", "4110d5ad162fbf43a3418f28b4d46609c2a147be", "426c9d912caaed6c1781103c3700415ce35b2853", "f57ac7f53438b2877022125bac957fda2bb2a97b", "16a4367795dc7fde9bae65de3a5fda8300f27a46", "0736d68aad2c198a8f6dda851c27bd180421c2aa", "9c3ea3815b3747718cf498fb4904302fc38a4549", "15c0ff1ace0798e8ef9767a76f90a32d1ee3ee8b" ], "paperAbstract": "On most supercomputers, except some torus network based systems, resource managers allocate nodes to jobs without considering the sharing of network resources by different jobs. Such network-oblivious resource allocations result in link sharing among multiple jobs that can cause significant performance variability and performance degradation for individual jobs. In this paper, we explore low-diameter networks and corresponding node allocation policies that can eliminate inter-job interference. We propose a variation to n-dimensional mesh networks called express mesh. An express mesh is denser than the corresponding mesh network, has a low diameter independent of the number of routers, and is easily partitionable. We compare structural properties and performance of express mesh with other popular low-diameter networks. We present practical node allocation policies for express mesh and fat-tree networks that not only eliminate inter-job interference and performance variability, but also improve overall performance.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.91" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b30a10f682301c03a282129ada222ed048888481", "sources": [ "DBLP" ], "title": "Partitioning Low-Diameter Networks to Eliminate Inter-Job Interference", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "b31b56e0cf35e8cdbe7c56ce1d22e6534db659fd": { "authors": [ { "ids": [ "34595336" ], "name": "Ajai V. George" }, { "ids": [ "19066514" ], "name": "Sankar Manoj" }, { "ids": [ "32175108" ], "name": "Sanket R. Gupte" }, { "ids": [ "2586955" ], "name": "Sayantan Mitra" }, { "ids": [ "40490806" ], "name": "Santonu Sarkar" } ], "doi": "10.1109/HiPC.2017.00049", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00049", "entities": [ "CUDA", "Graphics processing unit", "Mathematical optimization", "Memory hierarchy", "Shared memory", "Software developer", "Speedup", "Supercomputer", "Thrust" ], "id": "b31b56e0cf35e8cdbe7c56ce1d22e6534db659fd", "inCitations": [ "d205b3e2d8d30dca9737e3838decf28205ea91e3" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "368-377", "journalVolume": "", "outCitations": [ "1b76da37e041d106e0007f7147510411eafa3271", "1e2bfc4119468e9a950c281caa1d46bdfd3b78e0", "464d94b3dc9a109dd64008a41a00181830f285aa", "0ef82bbfdec840663026dc2fa9e3db111add7efa", "670e46907f047b5969fb312f9da3743616570588", "34b44a9e55184b48c94a15f29f052941b342e8bf", "7370316302bfff5ef6ff760cbfaae668e4760275", "59ea950568acd366a521efdb9749ad4d8f54afbb", "055b3aaa7ac3e5007235e32d229de30f961fcaa0", "259cbdb97385a3db5cca79f11f9d589f3b4c324d", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "230bb35cda12bea4fbffb8e12d4ece1c183f8342", "77d44b1177dafea2b9ed93dcf15ef2e3202e3ed7", "4954fa180728932959997a4768411ff9136aac81" ], "paperAbstract": "A good design abstraction framework for high performance computing should provide a higher level programming abstraction that strikes a balance between the abstraction and visibility over the hardware so that the software developer can write a portable software without having to understand the hardware nuances, yet exploit the compute power optimally. In this paper we have analyzed a popular design abstraction framework called \"Thrust\" from NVIDIA, and proposed an extension called Thrust++ that provides abstraction over the memory hierarchy of an NVIDIA GPU. Thrust++ allows developers to make efficient use of shared memory and overall, provides better control over the GPU memory hierarchy while writing applications in Thrust style for the CUDA backend. We have shown that when applications are written for the CUDA backend using Thrust++, they have minimal performance degradation when compared to their equivalent CUDA versions. Further, Thrust++ provides almost 4x speedup when compared to Thrust, for certain compute intensive kernels that repeatedly use the reduce operation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00049" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b31b56e0cf35e8cdbe7c56ce1d22e6534db659fd", "sources": [ "DBLP" ], "title": "Thrust++: Extending Thrust Framework for Better Abstraction and Performance", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "b31bdf325374eb060ef8c14887c41931229c60f1": { "authors": [ { "ids": [ "38492872" ], "name": "Scott Schneider" }, { "ids": [ "9082182" ], "name": "Kun-Lung Wu" } ], "doi": "10.1145/3062341.3062366", "doiUrl": "https://doi.org/10.1145/3062341.3062366", "entities": [ "Algorithm", "Elasticity (cloud computing)", "Lock (computer science)", "Non-blocking algorithm", "Program optimization", "Programmer", "Programming language", "STREAMS", "Scalability", "Scheduling (computing)", "Software deployment", "Stream processing" ], "id": "b31bdf325374eb060ef8c14887c41931229c60f1", "inCitations": [ "f08eeeb559f4c644e0e3cfaa9f2b4a359c25e073" ], "journalName": "", "journalPages": "648-661", "journalVolume": "", "outCitations": [ "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "ca2c1d226cb8e38ad0e3457ccb3703da04c8bec0", "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "05a0ee8cfb5752aaa8f6b5e879136e6cb820a6b2", "4ece92843e5014e0c4d819811ef29e3128edd43f", "0608d9937c074520cdc93cc444cc1c77039c5332", "34b800320b332f624827c1a4507f9d8144efbb1c", "2dd7d97ca3b007aff337b717d27e6f7f155e820c", "1fcc527c54e692ab6db69a8a6b5f5ee9118e0dd6", "f631c394680833a05a39b00e9e5242a4759adc36", "0fb659af82f2277c8a62ac888f4bfd01570e5470" ], "paperAbstract": "We present the scalable, elastic operator scheduler in IBM Streams 4.2. Streams is a distributed stream processing system used in production at many companies in a wide range of industries. The programming language for Streams, SPL, presents operators, tuples and streams as the primary abstractions. A fundamental SPL optimization is operator fusion, where multiple operators execute in the same process. Streams 4.2 introduces automatic submission-time fusion to simplify application development and deployment. However, potentially thousands of operators could then execute in the same process, with no user guidance for thread placement. We needed a way to automatically figure out how many threads to use, with arbitrarily sized applications on a wide variety of hardware, and without any input from programmers. Our solution has two components. The first is a scalable operator scheduler that minimizes synchronization, locks and global data, while allowing threads to execute any operator and dynamically come and go. The second is an elastic algorithm to dynamically adjust the number of threads to optimize performance, using the principles of trusted measurements to establish trends. We demonstrate our scheduler's ability to scale to over a hundred threads, and our elasticity algorithm's ability to adapt to different workloads on an Intel Xeon system with 176 logical cores, and an IBM Power8 system with 184 logical cores.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062366" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b31bdf325374eb060ef8c14887c41931229c60f1", "sources": [ "DBLP" ], "title": "Low-synchronization, mostly lock-free, elastic scheduling for streaming runtimes", "venue": "PLDI", "year": 2017 }, "b31df172403f21e7d79b393c0a1478d67c7e7b78": { "authors": [ { "ids": [ "1714689" ], "name": "Weizhong Zhao" }, { "ids": [ "1690235" ], "name": "Gang Chen" }, { "ids": [ "6721743" ], "name": "Xiaowei Xu" } ], "doi": "10.1109/ICDM.2017.76", "doiUrl": "https://doi.org/10.1109/ICDM.2017.76", "entities": [ "Algorithm", "Anytime algorithm", "Cluster analysis", "Experiment", "Synthetic data" ], "id": "b31df172403f21e7d79b393c0a1478d67c7e7b78", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "665-674", "journalVolume": "", "outCitations": [ "0c2e3ea1e403cbd72cb834090df1284f83e17a1e", "13a375a84a6c414b85477a401541d3e28db1e11a", "1dbc30663ff1284534baf992ba82774817b2f36d", "beec7b544eeb2389dd3843feafe261867ed886c5", "818826f356444f3daa3447755bf63f171f39ec47", "b5794326ef0fe0be3323c449d5c4818cc00abcf0", "2a381e829258d36597a89e70874661f960f462d1", "13c47ed140cc36191678a48b3115939cd0aa8314", "c8325c4b3e2fc58dac7fb86104ca20e1854d40fe", "1521d39088b203ddac981d10d214f463449ae95b", "a9652fa03a112d2d1d792af8ca01165680f49122", "fc6794a6569549d1fd63abf795bf0546567e6c01", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "40a2a398862f5c62555ffaf6d8421dea9f1bbcd3", "0156d249cf86304bd16254ea95ad33c6a3006823", "c5a70f3011de3cb06efcd8b52170f758f4d0bca8", "2af4a96f88ec630c57a28461751af3659ec98dd4", "040d02525b07bf807d7efa05d3556431de99282b", "ecf9adafc610cd417be2aa4092e809446e0f361f", "71e5a6c95ff476c303c3adeae0c1a4387485f733", "3f8d3e62bc5747f0f31bb9c2dc61094ea82be706" ], "paperAbstract": "Network clustering is an essential approach to finding latent clusters in real-world networks. As the scale of real-world networks becomes increasingly larger, the existing network clustering algorithms fail to discover meaningful clusters efficiently. In this paper, we propose a framework called AnySCAN, which applies anytime theory to the structural clustering algorithm for networks (SCAN). Moreover, an active learning strategy is proposed to advance the refining procedure in AnySCAN framework. AnySCAN with the active learning strategy is able to find the exactly same clustering result on large-scale networks as the original SCAN in a significantly more efficient manner. Extensive experiments on real-world and synthetic networks demonstrate that our proposed method outperforms existing network clustering approaches.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.76" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b31df172403f21e7d79b393c0a1478d67c7e7b78", "sources": [ "DBLP" ], "title": "AnySCAN: An Efficient Anytime Framework with Active Learning for Large-Scale Network Clustering", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "b33b795533155637ceaa2c89da8bd20794a34d51": { "authors": [ { "ids": [ "25106267" ], "name": "Daeyoun Kang" }, { "ids": [ "8102722" ], "name": "Tae Joon Jun" }, { "ids": [ "2525559" ], "name": "Dohyeun Kim" }, { "ids": [ "2155208" ], "name": "Jaewook Kim" }, { "ids": [ "38004716" ], "name": "Daeyoung Kim" } ], "doi": "10.1109/CLUSTER.2017.17", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.17", "entities": [ "Algorithm", "Best, worst and average case", "Central processing unit", "Deadlock", "Docker", "Graphics", "Graphics processing unit", "Middleware", "Operating-system-level virtualization", "Parallel computing", "Scheduling (computing)", "Supercomputer" ], "id": "b33b795533155637ceaa2c89da8bd20794a34d51", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "301-309", "journalVolume": "", "outCitations": [ "04704080ae469d24797ee6369f2e2a72ffcca828", "58c26f09f59a47b89a82c9e4f82708e3798e694d", "ae69aa2a18530be63f789e6c4399dbdd6a3790f5", "7f01ded4bc1d3e658e7969a4ba7d262a6f7d2ed9", "6b2c12c91f904781019f187681833d35f5c06e57", "0be302437cec82b9200d61d13d3125e62a8ef499", "45472bef11491245ad51dde6963e3cc40c5f3b79", "01f0204e33faa4f3524f8fe652c9ec42955891b9", "7cfd14cec177b2ea04ff2855b94009cf739d42b0" ], "paperAbstract": "Nowadays, Graphics Processing Unit (GPU) is essential for general-purpose high-performance computing, because of its dominant performance in parallel computing compare to that of CPU. There have been many successful trials on the use of GPU in virtualized environment. Especially, NVIDIA Docker obtained a most practical way to bring GPU into the container-based virtualized environment. However, most of these trials did not consider sharing GPU among multiple containers. Without the above consideration, a system will experience a program failure or a deadlock situation in the worst case. In this paper, we propose ConVGPU, a solution to share the GPU in multiple containers. With ConVGPU, the system can guarantee the required GPU memory which the container needs to execute. To achieve it, we introduce four scheduling algorithms that manage the GPU memory to be taken by the containers. These algorithms can prevent the system from falling into deadlock situations between containers during execution.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b33b795533155637ceaa2c89da8bd20794a34d51", "sources": [ "DBLP" ], "title": "ConVGPU: GPU Management Middleware in Container Based Virtualized Environment", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "b34fa14b96a9058d5d273fadce33f6d7fdb152fe": { "authors": [ { "ids": [ "40545209" ], "name": "Gonzalo Pedro Rodrigo \u00c1lvarez" }, { "ids": [ "1685517" ], "name": "Erik Elmroth" }, { "ids": [ "3117458" ], "name": "Per-Olov \u00d6stberg" }, { "ids": [ "1792683" ], "name": "Lavanya Ramakrishnan" } ], "doi": "10.1145/3078597.3078604", "doiUrl": "https://doi.org/10.1145/3078597.3078604", "entities": [ "Algorithm", "Baseline (configuration management)", "Job scheduler", "Jumpstart Our Business Startups Act", "Requirement", "Scheduling (computing)", "Simulation", "Slurm", "Supercomputer", "Synthetic data" ], "id": "b34fa14b96a9058d5d273fadce33f6d7fdb152fe", "inCitations": [], "journalName": "", "journalPages": "3-14", "journalVolume": "", "outCitations": [ "5f3f9223c5c9f896be099bc177929febad508407", "6a190f5699d13f849e8d0cb9626c96dc4f64b2f7", "f524244c8e974a986ec15d05d3b4d422c936edd5", "bf6071b74ce327a8e9c32ca2989bd3516844a5e5", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0784356b46a1345b352ab634bda835c07ff04af2", "3cce658ddd549b0d2338d0742c979ea58559eacd", "1dee6413f13c8dc3b736a2a842d65b0438295f9d", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "170e745007d2baf2fd566afe360d26500290640e", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "b6571efa4483aa00d23bbcd36930c4877548ba38", "05c6db23f18ab2228baac3a7a1ba8b1df031320a", "1f2c123e0cabefe1576b8491697db07140e98783", "1324f1d5b20f08cac775f10089a788767c56d5a9", "cefb2ec6d1e856dcba0836b88f6b12b3c455379a", "835916e7ad1231d5aa2985340b0ee543cadbb5b6", "2730606a9d29bb52bcc42124393460503f736d74", "11310368999afdce94bca4316eea38216b2446c5", "9d607cb1c8479e009a1304af8f494f09ffa412dc", "0fc5af2a00652db220ba711333a02644562d3077", "7be3b202518bcce34c1b0b5224508bed7fca2294", "4e36915f4c27e7770cf59a9985f4b603f6b23637" ], "paperAbstract": "Scientific workflows are increasingly common in the workloads of current High Performance Computing (HPC) systems. However, HPC schedulers do not incorporate workflow-specific mechanisms beyond the capacity to declare dependencies between their jobs. Thus, workflows are run as sets of batch jobs with dependencies, which induces long intermediate wait times and, consequently, long workflow turnaround times. Alternatively, to reduce their turnaround time, workflows may be submitted as single pilot jobs that are allocated their maximum required resources for their entire runtime. Pilot jobs achieve shorter turnaround times but reduce the HPC system's utilization because resources may idle during the workflow's execution. We present a workflow-aware scheduling (WoAS) system that enables existing scheduling algorithms to exploit fine-grained information on a workflow's resource requirements and structure without modification. The current implementation of WoAS is integrated into Slurm, a widely used HPC batch scheduler. We evaluate the system using a simulator using real and synthetic workflows and a synthetic baseline workload that captures job patterns observed over three years of workload data from Edison, a large supercomputer hosted at the National Energy Research Scientific Computing Center. Our results show that WoAS reduces workflow turnaround times and improves system utilization without significantly slowing down conventional jobs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078604" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b34fa14b96a9058d5d273fadce33f6d7fdb152fe", "sources": [ "DBLP" ], "title": "Enabling Workflow-Aware Scheduling on HPC Systems", "venue": "HPDC", "year": 2017 }, "b359adacfa2dd6ab15c8fe930f99ac0db0be30a8": { "authors": [ { "ids": [ "1685757" ], "name": "Song Wu" }, { "ids": [ "1714812" ], "name": "Chao Niu" }, { "ids": [ "1786877" ], "name": "Jia Rao" }, { "ids": [ "2156156" ], "name": "Hai Jin" }, { "ids": [ "19208996" ], "name": "Xiaohai Dai" } ], "doi": "10.1109/IPDPS.2017.47", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.47", "entities": [ "Android", "Booting", "CPU cache", "Cloud computing", "Computation", "Computation offloading", "Computer data storage", "FUJITSU Cloud IaaS Trusted Public S5", "Memory footprint", "Mobile app", "Mobile cloud computing", "Mobile device", "Operating system", "Operating-system-level virtualization", "Runtime system", "Smartphone", "Speedup", "Virtual machine" ], "id": "b359adacfa2dd6ab15c8fe930f99ac0db0be30a8", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "123-132", "journalVolume": "", "outCitations": [ "a36bcf48132a1e115020822740e5077d5dcf73e4", "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "4e27f44ade4545931a99eee2dc8011b44f5db4b6", "24459d006abcf0afd80a33b32884da80236d7a84", "59d45d685e35f5a84768c029ea09b9c48765251e", "5892b9314971e90e32d8bf81ca4e7dcbecb5ef8f", "e65305a3a0bda61bff11d7f70ccc9057039f26b3", "1914d07f940dcae3d82642513718858925a26fc3", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "594710511ce2177ff7dbbc62fa75dbf14fc7ca26", "08b11d0812f6cc3c9b954c116d36bd983ead6241", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "31d8a5c682486e27eae512fce092d0d458e05760", "2c2cfbec94307fc92192e5a4be0d0731799f9bf9", "3760de53eec6bff96f2cca1365271b97e1118179", "0a289fd7b14345822b1acda6d82750b15d59663e", "38208e29960623a60d0f34079c2641319ac21bc9", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "99b2348bc0a4425294dedba612de72cef0b63402" ], "paperAbstract": "With the explosive growth of smartphones and cloud computing, mobile cloud, which leverages cloud resource to boost the performance of mobile applications, becomes attrac- tive. Many efforts have been made to improve the performance and reduce energy consumption of mobile devices by offloading computational codes to the cloud. However, the offloading cost caused by the cloud platform has been ignored for many years. In this paper, we propose Rattrap, a lightweight cloud platform which improves the offloading performance from cloud side. To achieve such goals, we analyze the characteristics of typical of- floading workloads and design our platform solution accordingly. Rattrap develops a new runtime environment, Cloud Android Container, for mobile computation offloading, replacing heavy- weight virtual machines (VMs). Our design exploits the idea of running operating systems with differential kernel features inside containers with driver extensions, which partially breaks the limitation of OS-level virtualization. With proposed resource sharing and code cache mechanism, Rattrap fundamentally improves the offloading performance. Our evaluation shows that Rattrap not only reduces the startup time of runtime environments and shows an average speedup of 16x, but also saves a large amount of system resources such as 75% memory footprint and at least 79% disk capacity. Moreover, Rattrap improves offloading response by as high as 63% over the cloud platform based on VM, and thus saving the battery life.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b359adacfa2dd6ab15c8fe930f99ac0db0be30a8", "sources": [ "DBLP" ], "title": "Container-Based Cloud Platform for Mobile Computation Offloading", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "b36ab08a93f8dd4a68cd3cd4f525b29b2ccea7d1": { "authors": [ { "ids": [ "3434099" ], "name": "Tzu-Hsien Wu" }, { "ids": [ "31959777" ], "name": "Jerry Chi-Yuan Chou" }, { "ids": [ "28330127" ], "name": "Shyng Hao" }, { "ids": [ "39131579" ], "name": "Bin Dong" }, { "ids": [ "1736095" ], "name": "Scott Klasky" }, { "ids": [ "1773743" ], "name": "Kesheng Wu" } ], "doi": "10.1145/3126908.3126934", "doiUrl": "https://doi.org/10.1145/3126908.3126934", "entities": [ "Bitmap", "Block size (cryptography)", "Computer simulation", "Program optimization", "R+ tree", "R-tree", "Requirement", "SciDB", "Search engine indexing" ], "id": "b36ab08a93f8dd4a68cd3cd4f525b29b2ccea7d1", "inCitations": [], "journalName": "", "journalPages": "12:1-12:10", "journalVolume": "", "outCitations": [ "cb83cfecc9b763c06a1809d7c791b676ba170e3e", "6682b1c98899d1b13e7f7b9e795ff0a1ec24b970", "6c01ba8145ab0849ea510bb9ec7e7503c1bc3151", "7a97193878b61fbf2698ad08c5741257f8230c6e", "285c2a7ca03336c0e0e0261fae2dbf619b52d05e", "8e4dad48cbc091ac08b81d273dfd7b22b84a266f", "0f2c3c81f95fe3be5e285c63b5c8a3fc5fba4a0c", "5850f683a28095d0a6223f2f4d7d81cac8c7aba7", "5cd26c491a274db155b647d8cc4591159e4b0fc3", "337d5988addf1cf6db2233ef357b19000c7b8616", "32c1ece816a8b5efa08e3ddd339345f88326be28", "4ac29283ea51b3987caeaa165fc2e2366cf17738", "690bb8ffd1f78cfb1a58cd7b8dc4b8a7b6ae02e0", "7eb77dc9a8475097f0c0f95226df4aa757704a67", "29845cab369ca85cb2c21d756f96123f01d38e7c", "0c325c32039656541760b2d8f02be4636e026785", "f83160a6405c89b005079786d6f4d4633e996513", "821a27d442b8524a4667ab7a02a982b1c7b140a5", "638a83425954679136467a55bc940d7de86fb511", "4523a15a22bcabad38c81e1eba13a1bddd6704c5", "9e0e22ae4c7bc19a9128a93be9954f5b0e078791" ], "paperAbstract": "Indexing technique has become an efficient tool to enable scientists to directly access the most relevant data records. But, the time and space requirements of building and storing indexes are expensive in the traditional approaches, such as R-tree and bitmaps. Recently, we started to address this issue by using the idea of \"block index\", and our previous work has shown promising results from comparing it against other well-known solutions, including ADIOS, SciDB, and FastBit. In this work, we further improve the technique from both theoretical and implementation perspectives. Driven by an extensive effort in characterizing scientific datasets and modeling I/O systems, we presented a theoretical model to analyze its query performance with respect to a given block size configuration. We also introduced three optimization techniques to achieve a 2.3x query time reduction comparing to the original implementation.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126934" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b36ab08a93f8dd4a68cd3cd4f525b29b2ccea7d1", "sources": [ "DBLP" ], "title": "Optimizing the query performance of block index through data analysis and I/O modeling", "venue": "SC", "year": 2017 }, "b37cdf43ff9c85693e335c04086003819a7aa4f9": { "authors": [ { "ids": [ "1973658" ], "name": "Mohammad Abdel-Majeed" }, { "ids": [ "1718163" ], "name": "Alireza Shafaei" }, { "ids": [ "33191825" ], "name": "Hyeran Jeon" }, { "ids": [ "1691311" ], "name": "Massoud Pedram" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" } ], "doi": "10.1109/HPCA.2017.47", "doiUrl": "https://doi.org/10.1109/HPCA.2017.47", "entities": [ "Cache (computing)", "Graphics processing unit", "Instructions per cycle", "Network switch", "Packet switching", "Radio frequency", "Register file", "Spectral leakage" ], "id": "b37cdf43ff9c85693e335c04086003819a7aa4f9", "inCitations": [ "9ba97fe20d1042b080b3e2c515e4f9c0ccc6e9e0", "3325110b9d6bc05f084579688bb34fb99b5aa122", "a68b183c2993257aee6b745ecf2e655538df669b", "43fa95a77875672a00c126c4cc293290c46782bd", "fa21c85107516c7f0a341de27856d7ffe4a6c5d9", "51b367eb90a51a8382da0ffa15fad7308c7e7ce7", "014d28ef6ad36b22c1a4edb43c1b34bc7981b2e3" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "589-600", "journalVolume": "", "outCitations": [ "894e653cc5a9a4cbe7511619f6e82d22ca496910", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "1618ab4395c1fba3cd106d0f6bc4b09f767e2be2", "65e871d1d320c00ea955312ffce11f80455393eb", "0d394c72f9d769dfa021796a29fc142db573aec7", "ce1a8d5e1a7abe5585f43d8c4084ee02ee638893", "3b137da82bd3df572d02234437c0a4c56618f3ae", "3add4fb0c7d28c0365b8aa0c7567fa45f1583eb8", "36e46139ac2d2f3242cfe49469ce09403b5df852", "c4c15f8a24fee4b0b3cf225fd62147d3fb5dfe03", "04a0a3c43641ab6144be3aa8f08cb9c3dd08fda8", "3372b654932c6deea28a49f9a3453a29c4199705", "2e33c9a0e0ba542693610f36cabee4d8291f8e09", "4845474141b68b3b36e614b69c3682d064bc9a57", "2d6f002477015469075954c6748a1a85af352c94", "76184f353ae5cded4b0537742fa6b90e2bb4832d", "5319433572068b41ce8692956cb9ba39fd5eb923", "29321bfe08b81e47ecc814a3bd9a1a10e95fb5f9", "35e8751d090e18e55d91511a47b37a81736c35fc", "d835686dbf6f0a783de91f61af48d96c5a847b91", "55b5e730062d6e1c56b31d89dcf3a0b239a3951e", "210a73f3b36b32ed3b005d2ac1bd3a0d89be1d19", "064803c0399c71d8a3142079d41e06be0ae5195b", "94ae24a76dcf76b88f83338991fd12e399936f37", "6e48171fbd0836d0e56be3a254d3cfed48a16fce", "d65fa4cb118527ee236e25805fb7acd3b7f46d8d", "35258f9d100845721e47bc2888731eb9bf0d4ebe", "018a2fbb3c6c2b712e1b65bee9f3b513c9f27754", "579b08fba42753f045d7e1c29f177213ca3905ed", "0be50e19466b8676186e372a0c9803a256d3eef1" ], "paperAbstract": "GPU adoption for general purpose computing hasbeen accelerating. To support a large number of concurrentlyactive threads, GPUs are provisioned with a very large registerfile (RF). The RF power consumption is a critical concern. Oneoption to reduce the power consumption dramatically is touse near-threshold voltage(NTV) to operate the RF. However, operating MOSFET devices at NTV is fraught with stabilityand reliability concerns. The adoption of FinFET devices inchip industry is providing a promising path to operate theRF at NTV while satisfactorily tackling the stability andreliability concerns. However, the fundamental problem of NTVoperation, namely slow access latency, remains. To tackle thischallenge in this paper we propose to build a partitioned RFusing FinFET technology. The partitioned RF design exploitsour observation that applications exhibit strong preference toutilize a small subset of their registers. One way to exploitthis behavior is to cache the RF content as has been proposedin recent works. However, caching leads to unnecessary areaoverheads since a fraction of the RF must be replicated. Furthermore, we show that caching is not efficient as weincrease the number of issued instructions per cycle, which isthe expected trend in GPU designs. The proposed partitionedRF splits the registers into two partitions: the highly accessedregisters are stored in a small RF that switches betweenhigh and low power modes. We use the FinFET's back gatecontrol to provide low overhead switching between the twopower modes. The remaining registers are stored in a largeRF partition that always operates at NTV. The assignment ofthe registers to the two partitions will be based on statisticscollected by the a hybrid profiling technique that combines thecompiler based profiling and the pilot warp profiling techniqueproposed in this paper. The partitioned FinFET RF is able tosave 39% and 54% of the RF leakage and the dynamic energy, respectively, and suffers less than 2% performance overhead.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b37cdf43ff9c85693e335c04086003819a7aa4f9", "sources": [ "DBLP" ], "title": "Pilot Register File: Energy Efficient Partitioned Register File for GPUs", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "b393c03cee690271d3aa3401c300cdfc42a81c3c": { "authors": [ { "ids": [ "1747199" ], "name": "Jonathan Wang" }, { "ids": [ "3091035" ], "name": "Wucherl Yoo" }, { "ids": [ "1734816" ], "name": "Alex Sim" }, { "ids": [ "32645680" ], "name": "Peter Nugent" }, { "ids": [ "1773743" ], "name": "Kesheng Wu" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Approximation algorithm", "Brute-force search", "Feature selection", "Optimization problem", "Overfitting", "Parallel algorithm", "Parallel computing", "Performance prediction", "Principal component analysis", "Selection algorithm" ], "id": "b393c03cee690271d3aa3401c300cdfc42a81c3c", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "208-217", "journalVolume": "", "outCitations": [ "3b82541375943e4abf3d91a398c929778b28743d", "b69db5d3db9470e83242d46b617fd33cb0591177", "de3b02b459a37cfc71f9675647eba8d71aa57476", "28e23034ea2fbfa57b7e3e078f3f990df01b7c3f", "298f55e2cc2860fd53d5d5fa37f8fee86f10e4d3", "8d35c8fdea71f65b47de1fb8f83bd06472bd1d82", "705c20122d0f139e747c14a9879f9bb5ae65387a", "aaa720f79798accd33c89c63579da88fdc0b5cfc", "1df6b97feca654665857f6c444cf1b8548fd585b", "0d97ee4888506beb30a3f3b6552d88a9b0ca11f0", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "4a718f3d9966b21122c31c98f29675532e5c9eea", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "4940a6e514aad00af660337c41db21f92ae322f3", "212920487d809415d5420da57169e9f4556765b7" ], "paperAbstract": "Large data analysis problems often involve a large number of variables, and the corresponding analysis algorithms may examine all variable combinations to find the optimal solution. For example, to model the time required to complete a scientific workflow, we need to consider the impact of dozens of parameters. To reduce the model building time and reduce the likelihood of overfitting, we look to variable selection methods to identify the critical variables for the performance model. In this work, we create a combination of variable selection and performance prediction methods that is as effective as the exhaustive search approach when the exhaustive search could be completed in a reasonable amount of time. To handle the cases where the exhaustive search is too time consuming, we develop the parallelized variable selection algorithm. Additionally, we develop a parallel grouping mechanism that further reduces the variable selection time by 70%.As a case study, we exercise the variable selection technique with the performance measurement data from the Palomar Transient Factory (PTF) workflow. The application scientists have determined that about 50 variables and parameters are important to the performance of the workflows. Our tests show that the Sequential Backward Selection algorithm is able to approximate the optimal subset relatively quickly. By reducing the number of variables used to build the model from 50 to 4, we are able to maintain the prediction quality while reducing the model building time by a factor of 6. Using the parallelization and grouping techniques we developed in this work, the variable selection process was reduced from over 18 hours to 15 minutes while ending up with the same variable subset.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101141" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b393c03cee690271d3aa3401c300cdfc42a81c3c", "sources": [ "DBLP" ], "title": "Parallel Variable Selection for Effective Performance Prediction", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "b3a0d5803d2f6bebcbd38cde2b2138ec4f3aa7b1": { "authors": [ { "ids": [ "2390252" ], "name": "Tingzhe Zhou" }, { "ids": [ "2182186" ], "name": "Victor Luchangco" }, { "ids": [ "1687335" ], "name": "Michael F. Spear" } ], "doi": "10.1145/3087556.3087587", "doiUrl": "https://doi.org/10.1145/3087556.3087587", "entities": [ "Data structure", "Dynamic data", "Lock (computer science)", "Scalability", "Transactional memory" ], "id": "b3a0d5803d2f6bebcbd38cde2b2138ec4f3aa7b1", "inCitations": [], "journalName": "", "journalPages": "255-264", "journalVolume": "", "outCitations": [ "03e93625d185c0ac144c97fdf269b5ae5f38351e", "13f7c5807452ae602046582a385c0fb544ec5de1", "05927a36ff88960d1624a95aabc25bd781ad1275", "38611b424808954be2c1375da1a873b1e2487ace", "94cecbcd0530b40a6b2cc0b55c4725d123caa831", "68a9005a5ec10daece36ca5ecb9cad7be44770b1", "1cd7b4c3a93e3260c4c57efcecd3282e68f475f9", "09ed565e84057123c15ab12b885c235d1f241aed", "217d408f60f749aab6705ff3056b8e77640f2948", "0b9a4c5ca3530089edcd7e9ac2c718d2317718e3", "942f2a6df29234c304b69129872835d60cf5e9e9", "15f43d2b38b60a1ae8f03e818c1532031be4fc18", "58da996efd7320d1e484263c97c930c8979c474f", "51225f24b4bfb922bc9ed9738566de0b3cae5393", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "072cad08a6886c1800cb6144a8cfec4bced6f7d9", "042f443418ff2ff98a1dccbf49df9fa258dab707", "2900690eb3132a4d1536226d629727de41f38a66", "2ac3c4537be12b52f9e60d140ccf5621dc43cb75", "cba77292e7f1f271fff1bd28238728f4f18dd13e", "27650fee2ed40f0e6c214ca112bffd9164b2e0b5", "f4d2ec012d2484ba693c63a009f5dd66dafe9b4b", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "76057a3c7b489290afd4a4dccf09b623502619fd" ], "paperAbstract": "In this paper, we introduce revocable reservations, a transactional memory mechanism to reserve locations in one transaction and check whether they are unchanged in a subsequent transaction without preventing reserved locations from being reclaimed in the interim. We describe several implementations of revocable reservations, and show how to use revocable reservations to implement lists and trees with a transactional analog to hand-over-hand locking. Our evaluation of these data structures shows that revocable reservations allow precise and immediate reclamation within transactional data structures, without sacrificing scalability or introducing excessive latency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087587" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b3a0d5803d2f6bebcbd38cde2b2138ec4f3aa7b1", "sources": [ "DBLP" ], "title": "Hand-Over-Hand Transactions with Precise Memory Reclamation", "venue": "SPAA", "year": 2017 }, "b3ab2efd87e6ecbb4c491e34c332a648e6d13f51": { "authors": [ { "ids": [ "39279009" ], "name": "Jens Kehne" }, { "ids": [ "2442755" ], "name": "Marius Hillenbrand" }, { "ids": [ "3012199" ], "name": "Jonathan Metter" }, { "ids": [ "3263996" ], "name": "Mathias Gottschlag" }, { "ids": [ "33962997" ], "name": "Martin Merkel" }, { "ids": [ "3258127" ], "name": "Frank Bellosa" } ], "doi": "10.1145/3078468.3078474", "doiUrl": "https://doi.org/10.1145/3078468.3078474", "entities": [ "Cloud computing", "Fairness measure", "Graphics", "Graphics processing unit", "Locality of reference", "Overhead (computing)", "Paging", "Virtualize" ], "id": "b3ab2efd87e6ecbb4c491e34c332a648e6d13f51", "inCitations": [], "journalName": "", "journalPages": "10:1-10:10", "journalVolume": "", "outCitations": [ "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "1618f89bc0936ab14b8ec38905120d658014ed48", "404b72fbf63ff8f3f15c26a88384a0d4d7bcfcd7", "080b1f2c8316dad80d8c385dfcb82335a64a4d29", "3be74a71c59c0e5e925aa84090fc1b1988ea6095", "04704080ae469d24797ee6369f2e2a72ffcca828", "0be302437cec82b9200d61d13d3125e62a8ef499", "6b2c12c91f904781019f187681833d35f5c06e57", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "a7a24f882aec173c01a9ed1eb52589f71d6c80f8", "25f855c968af75e4617f25c71aee3cedec1dedaf", "6bdacf836b47e40f1e8d5d8e9e1c8224d74a1cef", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "3d54eeca362711623f88d57b81446cd93a9202e7", "a5a8859edd97f0e0666abc8b8f98a2f9b320d5b6", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "c630a97772d9b5d2380d409b2ef2c28dced5e392", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "145ba8f0070a5e2ad061b358a66762ce1765c241", "45472bef11491245ad51dde6963e3cc40c5f3b79", "0f04a0b658f00f329687d8ba94d9fca25269b4b7", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "00f355ce566bb51dc70925217c62e437cc7e14e2", "cfeb833da2d3ca20adfc05a762b3f68cffa13416", "6e38285609f08477b455e1c5545256e6d29c932a", "cf240f39325ab7437be077ce503887b7732bed54", "30ba0dd406a6f22e2ff30a0bfd7d1377e672c1ba", "091780ccc35385b75bdf58b86a8342ae92d66049" ], "paperAbstract": "Over the last few years, Graphics Processing Units (GPUs) have become popular in computing, and have found their way into a number of cloud platforms. However, integrating a GPU into a cloud environment requires the cloud provider to efficiently virtualize the GPU. While several research projects have addressed this challenge in the past, few of these projects attempt to properly enable sharing of GPU memory between multiple clients: To date, GPUswap is the only project that enables sharing of GPU memory without inducing unnecessary application overhead, while maintaining both fairness and high utilization of GPU memory. However, GPUswap includes only a rudimentary swapping policy, and therefore induces a rather large application overhead.\n In this paper, we work towards a practicable swapping policy for GPUs. To that end, we analyze the behavior of various GPU applications to determine their memory access patterns. Based on our insights about these patterns, we derive a swapping policy that includes a developer-assigned priority for each GPU buffer in its swapping decisions. Experiments with our prototype implementation show that a swapping policy based on buffer priorities can significantly reduce the swapping overhead.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078474" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b3ab2efd87e6ecbb4c491e34c332a648e6d13f51", "sources": [ "DBLP" ], "title": "GPrioSwap: towards a swapping policy for GPUs", "venue": "SYSTOR", "year": 2017 }, "b414a89387b688fd0217a87e7b089ce47f91270d": { "authors": [ { "ids": [ "7181851" ], "name": "Jaehyun Han" }, { "ids": [ "10691462" ], "name": "Donghun Koo" }, { "ids": [ "8404768" ], "name": "Glenn K. Lockwood" }, { "ids": [ "1737967" ], "name": "Jaehwan Lee" }, { "ids": [ "1738654" ], "name": "Hyeonsang Eom" }, { "ids": [ "34987672" ], "name": "Soonwook Hwang" } ], "doi": "10.1109/CLUSTER.2017.60", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.60", "entities": [ "Amplifier", "Buffer overflow", "Burst mode (computing)", "Data buffer", "Garbage collection (computer science)", "Non-volatile memory", "Open-source software", "Solid-state drive", "Streaming media", "Supercomputer", "Throughput", "User interface", "User space" ], "id": "b414a89387b688fd0217a87e7b089ce47f91270d", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "245-255", "journalVolume": "", "outCitations": [ "2728758f576a8ed1aca65005c12363533436a1e1", "9edab79d681bae0071aa784328b0ce134d909c10", "3037024ee9782764cfbe8e5c9c625e2edaaf83fd", "abfd25e8e8878f28f26ca9b7303e06688bbb2722", "ba625fb8f294a5003a0880096695a92bc9bb843a", "91912a461d30035639ddda2b6de97a388823fb4b", "054b0e0c107b550f99c5c23db99b64254b95bbfa", "b3bd374752659aa65c8b5a59b7cd25a94153f593", "20cc5fdba0915a3958c31d7b18763e82a5418856", "b460a8c552ba24afa9b05cf551f5f55db1985e56", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "6cfe53d6b16e0c8303462265eeb3913707ac5d5d", "2657302160775f8766964d013efe242836693f3e", "26f092b034364f2af58df7918e965590a2d85b88", "72722e7602138e3896e5576d3f3ef730e7b7c4b4" ], "paperAbstract": "Burst buffers tolerate I/O spikes in High-Performance Computing environments by using a non-volatile flash technology. Burst buffers are commonly located between parallel file systems and compute nodes, handling bursty I/Os in the middle. In this architecture, burst buffers are shared resources. The performance of an SSD is significantly reduced when it is used excessively because of garbage collection, and we have observed that SSDs in a burst buffer become slow when many users simultaneously use the burst buffer. To mitigate the performance problem, we propose a new user-level I/O isolation framework in a High-Performance Computing environment using a multi-streamed SSD. The multi-streamed SSD allocates the same flash block for I/Os in the same stream. We assign a different stream to each user; thus, the user can use the stream exclusively. To evaluate the performance, we have used open-source supercomputing workloads and I/O traces from real workloads in the Cori supercomputer at the National Energy Research Scientific Computing Center. Via user-level I/O isolation, we have obtained up to a 125% performance improvement in terms of I/O throughput. In addition, our approach reduces the write amplification in the SSDs, leading to improved SSD endurance. This user-level I/O isolation framework could be applied to deployed burst buffers without having to make any user interface changes.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b414a89387b688fd0217a87e7b089ce47f91270d", "sources": [ "DBLP" ], "title": "Accelerating a Burst Buffer Via User-Level I/O Isolation", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "b41906610edba5cc559dd2820fdf3d6e31caba9e": { "authors": [ { "ids": [ "2815518" ], "name": "Ivan Tanasic" }, { "ids": [ "2750421" ], "name": "Isaac Gelado" }, { "ids": [ "36683684" ], "name": "Marc Jord\u00e0" }, { "ids": [ "1744495" ], "name": "Eduard Ayguad\u00e9" }, { "ids": [ "12813015" ], "name": "Nacho Navarro" } ], "doi": "10.1145/3123939.3123950", "doiUrl": "https://doi.org/10.1145/3123939.3123950", "entities": [ "Baseline (configuration management)", "Benchmark (computing)", "Central processing unit", "Classic RISC pipeline", "Context switch", "Exception handling", "Graphics processing unit", "Lazy evaluation", "Memory management", "Page fault", "Preemption (computing)", "Speedup" ], "id": "b41906610edba5cc559dd2820fdf3d6e31caba9e", "inCitations": [], "journalName": "", "journalPages": "109-122", "journalVolume": "", "outCitations": [ "a38901fd5b7143b30407435a373edafe96145561", "128dad9a25e99affd9c87101c4c2e1ac9988df61", "3364bc50921a9566d61ef8cb73baa82341725e4b", "72d8bdfc07fc35a38c5abb7e20bbd2b5bf8b76fb", "264c8bcc515dc4f79893cd6b8cf552efb6c4aea9", "65965d18adafdbb5d61c3dbb34e294953c4e797d", "1ee083021ae43aaa1366df838e53cfdae66cc91d", "109b416bdbf1739373638eb7e5b37f5d475fd40e", "279d36a76c6ca9475dea260f26e497a9a728f9aa", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "064f38e5edef42cb5a37f2a350e4413e17132b11", "c251fc6c99d8b515f3f0844604a21af92cce647f", "219590564347b3be990c7f9e8a49b153ace74f37", "cfeb833da2d3ca20adfc05a762b3f68cffa13416", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "054e4a6966d54eb9fd207cf0484214201f46424a", "145ba8f0070a5e2ad061b358a66762ce1765c241", "d609ae840b3ead89300e9e2d2cc8b8cc8772a6d8", "79289a4c1f27fb04b9bb42e8a66c6c39dab9c2a9", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "1618f89bc0936ab14b8ec38905120d658014ed48", "009b98f3ec7c28f3c87acdcc60fb7a5f90e24f9c", "56aec6967fead1b1f5c69dc4df7ed0e9c278f231", "07a63423cc46ec67ff18f707379b77ebdfbc1eb9", "624168bb99821e7c9fef722c1758ceda42eba33f", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "126fb7c266d753bd624976c90d276f9f3238e4aa", "2037e142f3b45da72d5c99c0c0de2bb506d4a829", "61d13a9a4a6cb66e2d5fcf4f75d97570dca8f3fe", "4dd1e87d1c0a680cd38a1c4b99cdc970a07029bf", "82e5923883ad13d2a604a9c046fb09a79d28a5ed", "3cdbfb36647b8fcccc0cb2847b62becac8f71bf6", "284c7fde4bbaf19dd345e3b37d98085d7bfb9a4f", "15bbc0f1de2a85d31ca627687742110742eea0bf", "200289443394949728b9af6f2125d2d435783c68", "024f3e0ea6a49e536f3d135e73d77323a924498d", "3037617a223f0fc3c63ef91013e3c85227e43672", "64b24a656e3eb0202e2b5350c96b92387cd63cdc", "1bd49b63345448116b155718c5fa45471d223509", "eccb7c83f1951116f85612e3b86634d9d141e144", "774ec7516c1194172d52ee1a1d7431f2af1218ab", "cbd9a9e99d78748e36b82f818df08b2e6fc1e631", "8e921012d27bd0a5bc238be2bf1ea8c2def713c3", "1dd6cd96a834d2a49858a4058a4605d7e2a550cf", "27ee92f60f650feda893a853d4e552a1e9dc2979", "0d8ce242e9f49d219c1c03a61cec9b71a4da0581", "595dda2e545af953c45d2dd63ae28c102e2e1b03", "2194c3460ab71f3826db00b045b2ae590c753319", "de5bd35339e5692002a77145d8b861940429ad77", "38200ced576adf86996f712cf40de3ec67510e41", "777afb96054596762d864bbd6a7f46840c53b3fb" ], "paperAbstract": "Operating systems have long relied on the exception handling mechanism to implement numerous virtual memory features and optimizations. However, today's GPUs have a limited support for exceptions, which prevents implementation of such techniques. The existing solution forwards GPU memory faults to the CPU while the faulting instruction is stalled in the GPU pipeline. This approach prevents preemption of the faulting threads, and results in underutilized hardware resources while the page fault is being resolved by the CPU.\n In this paper, we present three schemes for supporting GPU exceptions that allow the system software to preempt and restart the execution of the faulting code. There is a trade-off between the performance overhead introduced by adding exception support and the additional complexity. Our solutions range from 90% of the baseline performance with no area overheads, to 99.2% of the baseline performance with less than 1% area and 2% power overheads. Experimental results also show 10% performance improvement on some benchmarks when using this support to context switch the GPU during page migrations, to hide their latency. We further observe up to 1.75x average speedup when implementing lazy memory allocation on the GPU, also possible thanks to our exception handling support.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123950" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b41906610edba5cc559dd2820fdf3d6e31caba9e", "sources": [ "DBLP" ], "title": "Efficient exception handling support for GPUs", "venue": "MICRO", "year": 2017 }, "b45f690c4375a36e376a987de32ad1dad885838d": { "authors": [ { "ids": [ "1696504" ], "name": "Davide Ancona" }, { "ids": [ "38247594" ], "name": "Francesco Dagnino" }, { "ids": [ "31553246" ], "name": "Elena Zucca" } ], "doi": "10.1145/3133905", "doiUrl": "https://doi.org/10.1145/3133905", "entities": [ "Coinduction", "Computation", "Divergence (computer science)", "Imperative programming", "Java", "Lambda calculus", "Operational semantics", "Type safety", "Type system" ], "id": "b45f690c4375a36e376a987de32ad1dad885838d", "inCitations": [], "journalName": "PACMPL", "journalPages": "81:1-81:26", "journalVolume": "1", "outCitations": [ "c3909e1d26a92b6878ade310745858d93e1aa657", "2d71213783432890238b5f8223cc7335c8a5eba7", "0aed58ed7cc1dfd27bcd1d8823a7ae58d8de26a2", "0e2e604c1b6c3709dfdab95fffa3a62d67fb0687", "ccc85b32ac90fb2c07732195c687962dc84da4fb", "4aaf12b209797e091b39dca420f8415355d6dc0a", "77956db63aeafb91fb0ecbc08fd8d319177e74da", "787c71b64b523fea5b41ad5b72c6c8894d6727ef", "a55f1855603e0ed79ab641fdd00f4b91f933a44f", "544c433c7711b5d6d44f85fe342d9ff09eb00c25", "c85e03ec3d13480a316ecc87e365761b95b61689", "18d52120630561d558879ad31963702e557b159c", "7426514fb23d28e77691300edb8b59a44988fb9e", "2e4634a06e48ab8db369dc7d501998075e39c924", "83d4c225582fb093d7f72eede544efd468c22d96", "0ec4f158c17b6aa244e61add57eb492c27525625", "839f3e32b1c11c5e5eb076eb2ca487375c529fa0", "165b3fabbf1da294c56c772e0cd63e5f5eadb78b", "2ed15e1e2ca75e7be88487d31009eea38c4bf816", "679a413c1b05f471083f3818f147f62bc1edbcac", "c8e19570ef0065446a412dacfb12c1afd472c087", "5917366e731c5556f74d39dfd5f91631807d83a6", "c19c45531a5f6c114e9da1dbfea8aedc31c31e0d", "6977cce0cfea6a7c6742e663ce383e072ab13400", "afd116d04968c3ba3169965168c4037148c3f0cd" ], "paperAbstract": "Coaxioms have been recently introduced to enhance the expressive power of inference systems, by supporting interpretations which are neither purely inductive, nor coinductive. This paper proposes a novel approach based on coaxioms to capture divergence in semantic definitions by allowing inductive and coinductive semantic rules to be merged together for defining a unique semantic judgment. In particular, coinduction is used to derive a special result which models divergence. In this way, divergent, terminating, and stuck computations can be properly distinguished even in semantic definitions where this is typically difficult, as in big-step style. We show how the proposed approach can be applied to several languages; in particular, we first illustrate it on the paradigmatic example of the λ-calculus, then show how it can be adopted for defining the big-step semantics of a simple imperative Java-like language. We provide proof techniques to show classical results, including equivalence with small-step semantics, and type soundness for typed versions of both languages.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133905" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b45f690c4375a36e376a987de32ad1dad885838d", "sources": [ "DBLP" ], "title": "Reasoning on divergent computations with coaxioms", "venue": "PACMPL", "year": 2017 }, "b46e108dcecf012909fe3366e35e31a4d3bbc2b7": { "authors": [ { "ids": [ "37112455" ], "name": "Kshitij Mehta" }, { "ids": [ "40389252" ], "name": "Maxime R. Hugues" }, { "ids": [ "32288862" ], "name": "Oscar R. Hernandez" }, { "ids": [ "1785240" ], "name": "David E. Bernholdt" }, { "ids": [ "3329218" ], "name": "Henri Calandra" } ], "doi": "10.1109/IPDPS.2017.82", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.82", "entities": [ "Algorithm", "Directive (programming)", "FLOPS", "Graphics processing unit", "I/O bound", "Message Passing Interface", "One-way function", "OpenACC", "Sandy Bridge", "Speedup", "Supercomputer", "Titan (supercomputer)" ], "id": "b46e108dcecf012909fe3366e35e31a4d3bbc2b7", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "224-233", "journalVolume": "", "outCitations": [ "aef1c6ce4518775e948c7a906bdf6aaa85dcb6b2", "47f05344d0d5fd252ebf645dddb8a1c5118cffc6", "2566acc500a8f013610d306bea7a8f548930dfed", "2f3c8e4bb0a738aecfad51bdff7dc75a4fea28fd", "6386e607ca48fbbf37f059482497bb552c1a121a", "3656e1298cdadf5bdb17b06d541f79c9bc40ef06", "d2ae293327f4fd6fb33553fdc30238f77def1ff1", "548c89d5c18e8845598a7c117484abe86dfb795d", "9edab79d681bae0071aa784328b0ce134d909c10", "2bb29fce377e1ec9024ea7c45fd40fa178922602", "069ec88e2d30784746ab2224bc096e494c745382" ], "paperAbstract": "One-Way Wave Equation Migration (OWEM) is a depth migration algorithm used for seismic imaging. A parallel version of this algorithm is widely implemented using MPI. Heterogenous architectures that use GPUs have become popular in the Top 500 because of their performance/power ratio. In this paper, we discuss the methodology and code transformations used to port OWEM to GPUs using OpenACC, along with the code changes needed for scaling the application up to 18,400 GPUs (more than 98%) of the Titan leadership class supercomputer at Oak Ridget National Laboratory. For the individual OpenACC kernels, we achieved an average of 3X speedup on a test dataset using one GPU as compared with an 8-core Intel Sandy Bridge CPU. The application was then run at large scale on the Titan supercomputer achieving a peak of 1.2 petaflops using an average of 5.5 megawatts. After porting the application to GPUs, we discuss how we dealt with other challenges of running at scale such as the application becoming more I/O bound and prone to silent errors. We believe this work will serve as valuable proof that directive-based programming models are a viable option for scaling HPC applications to heterogenous architectures.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.82" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b46e108dcecf012909fe3366e35e31a4d3bbc2b7", "sources": [ "DBLP" ], "title": "One-Way Wave Equation Migration at Scale on GPUs Using Directive Based Programming", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "b47b72e1f1adb0114215a1efc3a3a9b79c87712d": { "authors": [ { "ids": [ "3427420" ], "name": "Runtian Ren" }, { "ids": [ "1736579" ], "name": "Xueyan Tang" } ], "doi": "10.1145/3087556.3087562", "doiUrl": "https://doi.org/10.1145/3087556.3087562", "entities": [ "Competitive analysis (online algorithm)", "Interruption science", "Job scheduler", "Job shop scheduling", "Jumpstart Our Business Startups Act", "Scheduling (computing)", "Span and div", "Time of arrival" ], "id": "b47b72e1f1adb0114215a1efc3a3a9b79c87712d", "inCitations": [ "fdc1eae0fceab126cad15886958b507d879a758b" ], "journalName": "", "journalPages": "55-66", "journalVolume": "", "outCitations": [ "a3e35498381eac5f978cfdefbe2ee51a21aecff5", "10236985b28470951de73f76d6fba5343d5f788f", "792ede245f0b88763bd50f0402cf5a59de0c2ce4", "0329b00550a7e8a0d3a6a98822d24b45c49ada50", "11eeb124cffcd749f46f8d25ede967d1a6d997f6", "74dedf3bab9d64648b955f3d85ea79a20ca3960b", "48e992a734ef6ecbc9d5aeb3fc9135bbee531e07", "ced3017c2f19bc57bf9b04b45a8fda227ebdf226", "30922d037747b192676a10849b7071dc7862520d", "1eaba5c850f0281e1536de6167a1e99fd6081b75", "0377e15b01a6659001641e86e58ec03ca338f8e9", "30af21e6ac6a0826071251c8a247e5dbfca472e7", "920e76833771c417615376b2b25b91a3aeaf11f3", "ca481f7e534d2cb907c2dc524b2fbcbd75b1dfc1", "a5f8f2eb86e89452850e9f19b2f276882d103367", "0baba373f3f5cef60d6c9959cd2b4b793818ff3d", "16cb1ee9c0ba437f49b51f58efa76d45f049a31b", "0603d6a8773529d5605bedbb019f0161d65cc354", "0625d37c46641b7488198168f2d1f626d2a01b06", "0f44833eb9047158221e7b3128cde1347b58ccd6", "d4d63b0e36932b5b95e4b92536620fa21aaf6928", "1c477be8c1ebd05ed07e8c0764a295932f3733b0", "3ce4ce0c5e8121dbcf01cfe080db60647b93525d" ], "paperAbstract": "In this paper, we study an online Flexible Job Scheduling (FJS) problem. The input of the problem is a set of jobs, each having an arrival time, a starting deadline and a processing length. Each job has to be started by the scheduler between its arrival and its starting deadline. Once started, the job runs for a period of the processing length without interruption. The target is to minimize the span of all the jobs --- the time duration in which at least one job is running. We study online FJS under both the non-clairvoyant and clairvoyant settings. In the non-clairvoyant setting, the processing length of each job is not known for scheduling purposes. We first establish a lower bound of μ on the competitive ratio of any deterministic online scheduler, where μ is the max/min job processing length ratio. Then, we propose two O(μ)-competitive schedulers: Batch and Batch+. The Batch+ scheduler is proved to have a tight competitive ratio of (μ+1). In the clairvoyant setting, the processing length of each job is known at its arrival and can be used for scheduling purposes. We establish a lower bound of (√5+1)/2 on the competitive ratio of any deterministic online scheduler, and propose two O(1)-competitive schedulers: Classify-by-Duration Batch+ and Profit. The Profit scheduler can achieve a competitive ratio of 4+2√2. Our work lays the foundation for extending several online job scheduling problems in cloud and energy-efficient computing to jobs that have laxity in starting.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087562" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b47b72e1f1adb0114215a1efc3a3a9b79c87712d", "sources": [ "DBLP" ], "title": "Online Flexible Job Scheduling for Minimum Span", "venue": "SPAA", "year": 2017 }, "b495f9c7e18734b5349471f0d5d745d73efb57d3": { "authors": [ { "ids": [ "40540677" ], "name": "Shweta Agrawal" }, { "ids": [ "2035856" ], "name": "Sanjay Bhattacherjee" }, { "ids": [ "1680071" ], "name": "Duong Hieu Phan" }, { "ids": [ "1803138" ], "name": "Damien Stehl\u00e9" }, { "ids": [ "1689211" ], "name": "Shota Yamada" } ], "doi": "", "doiUrl": "", "entities": [ "Black box", "Encryption", "Functional encryption", "Learning with errors", "Naruto Shippuden: Clash of Ninja Revolution 3", "Quadratic function", "Traceability" ], "id": "b495f9c7e18734b5349471f0d5d745d73efb57d3", "inCitations": [ "b61563bac2a6aade6903e3455a2c7515f1699c4a" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "650", "journalVolume": "2017", "outCitations": [ "d4d37fcde3ed7a99845ab3f51e3a69519e4f87b1", "76421de7130dcff1a359c8ed70411ed09f71f1cb", "91e55f174f8eb5b88736b89e5a0235014e3b1ea3", "becddb754d41fa2055be5d50eac0cade5b4004ce", "4f68a80dec7eebe30dade51c975f491ee52e7b18", "60d8af8746d97abb0d4342ee98081d595d5e758d", "6a06c062d995a841e10a15ed5ea809a9ab6e8381", "0bd21967574e1971aff7fb70f9dd7ecbed4118c5", "02bc3fa288d50fde1745eb149947fd4b5f940769", "591772092ce9d7a2b078d499b3a9193416f59eb1", "35bd1fddb880a7a59893b915a663dc19b4498e23", "3d16773d6b7a183cff002f484b66d55bb99b2e48", "1cbdeeb78685e896fe88916d3e74c01c5d8c7775", "a81af39a193cc9a087b392d6d49620b96b4f8eb3", "28da48429ea84c2e0db1a673f2f2319eb2f23431", "6315150466122890a010beca4e5def5891851be7", "26c9a6bf0cfcf56d5e9da607d5a02ed49b5c4ac2", "25518a206a45b3af9fbb68d11aa905480cf8f61d", "8645bc6360dd3705398c984e8ebe83cb3e38c648", "382892b73562f6ace340e181300d701f32987409", "020ff6532a58ee426217cf4e75ed64af4658bd6a", "1b82a2619e4500b908bf89a8a140cd4631b87e52", "1e608e5e6693f9ae384ce03c79244ed6406b7ca9", "94b248ff6a5639ba077f82650d3f8d760fa3d33f", "627976735aaa8f079d6018a79615ce454d3e97da", "4a94ad7d52370542090be52da30cc819fb403d47", "0ec8116340335a94b09e1f09586ffb945181f4db", "29aef15822b7f0fef84e7ac56e6e252cd03ae0b3", "0d6cf2ddd2f49a4b513a4e5e1f8876ccfe95e475", "186d506bcd042bfad20c83a67d8b59fd5235acb2", "32680871e6fd42a028316f334fc35e2dbb92384a", "da09bc42bbf5421b119abea92716186a1ca3f02f", "0f3f6efbc665124043bc5637fc7c1219d416b9c4", "3885a073038bc64266b641fa68ac397058b55be3", "19e6f3ea035d33590a8598be68ac82e6f00ce518", "2ca36cfea0aba89e19b3551c325e999e0fb6607c", "093090c975a71e22cec1948c29da282f30f920f8", "370e5b4ec00e883c294ff0628002dae57e206423", "87153d5d6d4047c0612748531778a1bb6bb2f90a" ], "paperAbstract": "We provide e cient constructions for trace-and-revoke systems with public traceability in the black-box con rmation model. Our constructions achieve adaptive security, are based on standard assumptions and achieve signi cant e ciency gains compared to previous constructions. Our constructions rely on a generic transformation from inner product functional encryption (IPFE) schemes to trace-and-revoke systems. Our transformation requires the underlying IPFE scheme to only satisfy a very weak notion of security the attacker may only request a bounded number of random keys in contrast to the standard notion of security where she may request an unbounded number of arbitrarily chosen keys. We exploit the much weaker security model to provide a new construction for bounded collusion and random key IPFE from the learning with errors assumption (LWE), which enjoys improved e ciency compared to the scheme of Agrawal et al. [CRYPTO'16]. Together with IPFE schemes from Agrawal et al., we obtain trace and revoke from LWE, Decision Di e Hellman and Decision Quadratic Residuosity.", "pdfUrls": [ "http://eprint.iacr.org/2017/650", "https://eprint.iacr.org/2017/650.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b495/f9c7e18734b5349471f0d5d745d73efb57d3.pdf", "s2Url": "https://semanticscholar.org/paper/b495f9c7e18734b5349471f0d5d745d73efb57d3", "sources": [ "DBLP" ], "title": "Efficient Public Trace and Revoke from Standard Assumptions", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "b4b7f7a6c668be9d966567a2de9a50eb83986fd5": { "authors": [ { "ids": [ "2173915" ], "name": "Johannes Behl" }, { "ids": [ "3187537" ], "name": "Tobias Distler" }, { "ids": [ "1740458" ], "name": "R\u00fcdiger Kapitza" } ], "doi": "10.1145/3064176.3064213", "doiUrl": "https://doi.org/10.1145/3064176.3064213", "entities": [ "Byzantine fault tolerance", "Central processing unit", "Correctness (computer science)", "FLOPS", "Fault model", "Formal specification", "Merge sort", "Multi-core processor", "State machine replication", "Trusted Execution Technology" ], "id": "b4b7f7a6c668be9d966567a2de9a50eb83986fd5", "inCitations": [ "60a72da351f9e706b1417c5ce531aa661f314456", "24bb8328ad26f21ca2e2322ec2c5da16586dccac", "77d484b0194698366ba118e28287896829cf6dfe", "03e89626cbb864fb1243b4ee8b4037020a9250eb" ], "journalName": "", "journalPages": "222-237", "journalVolume": "", "outCitations": [ "0a8d3569e9d3359ab9612e1e55775524242e5532", "b3dbcfeb9be84800260eff5bc8089ff2c97c10f0", "4af63ed343df388b6353b6fc77c7137d27822bf4", "1521e801e8e08ecec3b0baabb07f9a6ce0a67a85", "6c13854da6c526513956dc811dcee2f1bfef6a81", "07bab5c94c3d096b21f81d05be081ea36812f3ef", "efb1a85cf540fd4f901a78100a2e450d484aebac", "4fc59e699d973c90f1827ce4cbaaffaba8a99ded", "0e43a1ba5afce2d87fed72e0a34e3e4fbf1cefa5", "4be1be822928a0aeb277412bad1f20f350deb609", "4f43825eb0e31bc9c58f2ed305cf4d45f6f12be5", "1474f41f3fc537feebc051164bf1e4459fe7dd3c", "321cb9fcd2dbc691fb2a002fd1d973e646f103b8", "0e6f25ca2e9dbcca8a630ac5924470aafa3fbcac", "1e976af1585511474f38ca3b0b46c941da4848d8", "430e80edcd6f2d1e1d5156739d5d2f42dcc663ac", "00e3756119a91432622f6982b59ecd24a1340fbe", "004f89a6f17439084be233c4918f46ca7d3ae96c", "4f5ec6b6a4ea92d2222d7138912adee86f0297c5", "f6f83ccf5329a4f10e77c3558bd0ed5b85ef024d", "a1c704b281e939d343219edffbc84b379ab8a571", "296eaf328fb74c62c1704c5ac8a1b5d75243b100", "bb52ff840b1b6e2144268e57c72118a49460d6f4", "446f4603485cb159e45ebf617b93a0a725c5fd3f", "04f0ec1c2b4d96bb8aa5f5b64f6298edbe8b26c3", "dda1eb7aabbda6ab5ee8df31b20a7e1becb75df7", "032d39d927b2fda54193569bd0fb6c03fbf28e0d", "62f0dad70860a933f95d49eafcbbc58a229d7ba8", "10cc6cc6b82e553f3b52e88d7d019161edd23615", "d12d1289d2384c2ce642f01855637b9f0519e189", "0abb096e5575a7e6e3d3d7eba4765003ae7132ef", "06b7f3156ef8f0d66fe05e504c0bb908ab288c03", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "738c71d77bf3041e4f051a87b1f314738a05a4d3", "ba665f1c32653c963b6b6c8a370bf2c82619341c", "0a9cc699ed94b4729de3255d76305b91b6a56aaa", "1cc7563ce502712d94e3104a175bb67d7713690b", "20f5f8733134d87041b95b742d613051a1fb3fdb", "07a152ad1c17b35396d8b372cbde16e89705c7ec", "17007f5e69efcfeff03853a33121943e2b4df422", "3e43f6f886908b47e9e710a71554f4caacc9b756" ], "paperAbstract": "With the advent of trusted execution environments provided by recent general purpose processors, a class of replication protocols has become more attractive than ever: Protocols based on a hybrid fault model are able to tolerate arbitrary faults yet reduce the costs significantly compared to their traditional Byzantine relatives by employing a small subsystem trusted to only fail by crashing. Unfortunately, existing proposals have their own price: We are not aware of any hybrid protocol that is backed by a comprehensive formal specification, complicating the reasoning about correctness and implications. Moreover, current protocols of that class have to be performed largely sequentially. Hence, they are not well-prepared for just the modern multi-core processors that bring their very own fault model to a broad audience. In this paper, we present Hybster, a new hybrid state-machine replication protocol that is highly parallelizable and specified formally. With over 1 million operations per second using only four cores, the evaluation of our Intel SGX-based prototype implementation shows that Hybster makes hybrid state-machine replication a viable option even for today's very demanding critical services.", "pdfUrls": [ "https://www4.cs.fau.de/Publications/2017/behl_17_eurosys.pdf", "http://doi.acm.org/10.1145/3064176.3064213" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b4b7f7a6c668be9d966567a2de9a50eb83986fd5", "sources": [ "DBLP" ], "title": "Hybrids on Steroids: SGX-Based High Performance BFT", "venue": "EuroSys", "year": 2017 }, "b4c32173842274974b5724d2fefd14efa0a67077": { "authors": [ { "ids": [ "36210022" ], "name": "Aniruddha Marathe" }, { "ids": [ "2860488" ], "name": "Rushil Anirudh" }, { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "1823585" ], "name": "Abhinav Bhatele" }, { "ids": [ "1744175" ], "name": "Jayaraman J. Thiagarajan" }, { "ids": [ "1749353" ], "name": "Bhavya Kailkhura" }, { "ids": [ "37698592" ], "name": "Jae-Seung Yeom" }, { "ids": [ "2173458" ], "name": "Barry Rountree" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" } ], "doi": "10.1145/3126908.3126969", "doiUrl": "https://doi.org/10.1145/3126908.3126969", "entities": [ "Deep learning", "Transfer-based machine translation" ], "id": "b4c32173842274974b5724d2fefd14efa0a67077", "inCitations": [], "journalName": "", "journalPages": "31:1-31:12", "journalVolume": "", "outCitations": [ "3d826c295b0f1b974a4bf515e8a61a5e2d57f424", "3d25e227fc8c0e7247d7b4209d1c0820e2076341", "12fcbe4957c1c4bc6c0d753284e944d16cfc089a", "0c76a904b28c775eb5f33cd982f0bfeddab353e3", "19abc4f7bae2025fe832ba5cba4863f0b01c1308", "4ea58939c0829a726f7e79709dbc7698b0232937", "b81fb53cc1dff847804279275ce1e3238ffe8766", "f580162b0f1ca6e7cf03eb3c9cab1c10907a3a9c", "d6be948f6efd5960f6a65f3b56524011e2a411e7", "ba30a8edccafe1e0f3b8b180e68f66d7a813fded", "1e8233a8c8271c3278f1b84bed368145c0034a35", "244030cb8e73144251ef3701ac758168031d17f9", "fb98aa11fe3042c6e529de9afd53076c21f2df40", "8755fd59b74028b0bd45e9b5c355c64c0c70af04", "0354aff91dd843e01e396fbd635129bea73977b4", "ee537bb55917518a5d96ca0355da7deee0ca1284", "1108af609469e420aeae551ba8a893c3200e07fa", "3462fb38042f0bde20c758728d7c8c28a1f47e09", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "6472cab2678c39e2273673968c6d7d3cfe2a62c9", "0811d2683b4a181da82774f09dfc9f6fdc382628", "6664681b2e833e6d3a7cf364a135ea7cb0150a5e", "58e3107fcc7dff4b00edafd3becc9df9c875eae6", "693cb000198eafd44a6f02d96e2dc2126bd645e5", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "959b314bfbe49bd2b0a9dd5e60c2c0036529b494", "025b0273eb6ccd57e6a949fe44225ca5d8041cf9" ], "paperAbstract": "Tuning application parameters for optimal performance is a challenging combinatorial problem. Hence, techniques for modeling the functional relationships between various input features in the parameter space and application performance are important. We show that simple statistical inference techniques are inadequate to capture these relationships. Even with more complex ensembles of models, the minimum coverage of the parameter space required via experimental observations is still quite large. We propose a deep learning based approach that can combine information from exhaustive observations collected at a smaller scale with limited observations collected at a larger target scale. The proposed approach is able to accurately predict performance in the regimes of interest to performance analysts while outperforming many traditional techniques. In particular, our approach can identify the best performing configurations even when trained using as few as 1% of observations at the target scale.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126969" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b4c32173842274974b5724d2fefd14efa0a67077", "sources": [ "DBLP" ], "title": "Performance modeling under resource constraints using deep transfer learning", "venue": "SC", "year": 2017 }, "b516ead4f4a713b5c85524356d60b6836d07ca33": { "authors": [ { "ids": [ "40412858" ], "name": "Ryan Marcus" }, { "ids": [ "1967562" ], "name": "Olga Papaemmanouil" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Computer performance", "Enterprise resource planning", "Machine learning", "Money", "Onset (audio)", "Performance prediction", "Provisioning", "Query string", "Reinforcement learning", "Virtual machine" ], "id": "b516ead4f4a713b5c85524356d60b6836d07ca33", "inCitations": [ "09c1ec4361d9069b9a0bd9699a0c619e3fb8d027", "0bdb6e2cb1d8960ecf754fd4d28ea11714178fdf", "4fcb1e0a25a5617ddee8174b48af80d88b4881f4", "0fb3400d39c08b6dddbbfa8689711ca36a87afd0" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "The onset of cloud computing has brought about computing power that can be provisioned and released on-demand. This capability has drastically increased the complexity of workload and resource management for database applications. Existing solutions rely on query latency prediction models, which are notoriously inaccurate in cloud environments. We argue for a substantial shift away from query performance prediction models and towards machine learning techniques that directly model the monetary cost of using cloud resources and processing query workloads on them. Towards this end, we sketch the design of a learningbased service for IaaS-deployed data management applications that uses reinforcement learning to learn, over time, low-cost policies for provisioning virtual machines and dispatching queries across them. Our service can effectively handle dynamic workloads and changes in resource availability, leading to applications that are continuously adaptable, cost effective, and performance aware. In this paper, we discuss several challenges involved in building such a service, and we present results from a proof-of-concept implementation of our approach.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p85-marcus-cidr17.pdf", "http://www.cs.brandeis.edu/~olga/publications/cidr2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b516/ead4f4a713b5c85524356d60b6836d07ca33.pdf", "s2Url": "https://semanticscholar.org/paper/b516ead4f4a713b5c85524356d60b6836d07ca33", "sources": [ "DBLP" ], "title": "Releasing Cloud Databases for the Chains of Performance Prediction Models", "venue": "CIDR", "year": 2017 }, "b51cfc0298e4e3c80426c9a5ffca7a709c610ba3": { "authors": [ { "ids": [ "1738234" ], "name": "Yuichi Tsujita" }, { "ids": [ "17074822" ], "name": "Tatsuhiko Yoshizaki" }, { "ids": [ "1833664" ], "name": "Keiji Yamamoto" }, { "ids": [ "2795389" ], "name": "Fumichika Sueyasu" }, { "ids": [ "3594678" ], "name": "Ryoji Miyazaki" }, { "ids": [ "39083557" ], "name": "Atsuya Uno" } ], "doi": "10.1007/978-3-319-58667-0_17", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_17", "entities": [ "Data striping", "Load balancing (computing)" ], "id": "b51cfc0298e4e3c80426c9a5ffca7a709c610ba3", "inCitations": [], "journalName": "", "journalPages": "315-333", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_17" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b51cfc0298e4e3c80426c9a5ffca7a709c610ba3", "sources": [ "DBLP" ], "title": "Alleviating I/O Interference Through Workload-Aware Striping and Load-Balancing on Parallel File Systems", "venue": "ISC", "year": 2017 }, "b52a02df3a8e79f2430958e189f8ad64cdc5b588": { "authors": [ { "ids": [ "1804307" ], "name": "Xiaopeng Li" }, { "ids": [ "11856684" ], "name": "James She" } ], "doi": "10.1145/3097983.3098077", "doiUrl": "https://doi.org/10.1145/3097983.3098077", "entities": [ "Autoencoder", "Bayesian network", "Calculus of variations", "Cold start", "Collaborative filtering", "Generative model", "Modality (human\u2013computer interaction)", "Noise reduction", "Recommender system", "Sparse matrix", "Unsupervised learning" ], "id": "b52a02df3a8e79f2430958e189f8ad64cdc5b588", "inCitations": [ "23b0a959d07a744a80734119e94262f14f2d83e1", "1392911ca727b37cfc46e60fbb25ea44134a98c7", "55d30f35d0da3a76c4fef697c464a36783316097", "299bfd0ca5349f700f914fdab312e16aef429cdc", "081ad92ce0e71541646218f11061c86414a960c2", "630843b2dd2a6b1315e1fc71af5e47c1669906ba", "c92403e2b6b1a9d4aa10d2ba0d87ad30cfa02153" ], "journalName": "", "journalPages": "305-314", "journalVolume": "", "outCitations": [ "cfb5c4c8c19bbac3c40c8f37c33695fcffce7202", "9699a0989108fafbcaf6cfa5fce4b08610493bc0", "6ee64bb183afba0d14a286509332983497a576c8", "261a056f8b21918e8616a429b2df6e1d5d33be41", "e2b7f37cd97a7907b1b8a41138721ed06a0b76cd", "075d50e6f11b95eb5ab8b56e28a82f568737198d", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "e6f4625a3b2c0999174cdfb23d131e022f8545a0", "69d85d6841d94f44065ffac4dff53ebaae0605b8", "39afbfe64d83b17368948c6cb3567431580b2a29", "92eb167f30ad59f6949667021760eb41078cf85c", "714544b7cf35a3b8bdc12fb1967624a38f257a42", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "bff8252c3d7a2557e8a4bbbc94079d23c7c8d9fd", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "663045dd96cff694dcf837111f93e724b69ac694", "3d1427961edccf8940a360d203e44539db58a60f", "18057a63db389861fac5f270b6f67ca56fc737d2", "09b8120cbc52e7df46122e8e608146289fddbdfa", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "4f6e61d2ab1e2f468cc4bc8fdd8d6f13efaba468", "2fac6c403631eaa1658e0405aed7980786a001ab", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "7361a3cd96a05d3733a52e92ae90f7c921ebb773", "2744288f090192987e980274999065ad2d6e45d6", "3eaf79589dbb9bce5a502e867a8f03917e52de26" ], "paperAbstract": "Modern recommender systems usually employ collaborative filtering with rating information to recommend items to users due to its successful performance. However, because of the drawbacks of collaborative-based methods such as sparsity, cold start, etc., more attention has been drawn to hybrid methods that consider both the rating and content information. Most of the previous works in this area cannot learn a good representation from content for recommendation task or consider only text modality of the content, thus their methods are very limited in current multimedia scenario. This paper proposes a Bayesian generative model called collaborative variational autoencoder (CVAE) that considers both rating and content for recommendation in multimedia scenario. The model learns deep latent representations from content data in an unsupervised manner and also learns implicit relationships between items and users from both content and rating. Unlike previous works with denoising criteria, the proposed CVAE learns a latent distribution for content in latent space instead of observation space through an inference network and can be easily extended to other multimedia modalities other than text. Experiments show that CVAE is able to significantly outperform the state-of-the-art recommendation methods with more robust performance.", "pdfUrls": [ "http://cseweb.ucsd.edu/classes/fa17/cse291-b/reading/p305-li.pdf", "http://doi.acm.org/10.1145/3097983.3098077" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b52a02df3a8e79f2430958e189f8ad64cdc5b588", "sources": [ "DBLP" ], "title": "Collaborative Variational Autoencoder for Recommender Systems", "venue": "KDD", "year": 2017 }, "b532c625457aede2f11ef0eae40de38e4b5a8ab6": { "authors": [ { "ids": [ "2468154" ], "name": "Marco Oliverio" }, { "ids": [ "2072347" ], "name": "Kaveh Razavi" }, { "ids": [ "3053948" ], "name": "Herbert Bos" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "10.1145/3132747.3132781", "doiUrl": "https://doi.org/10.1145/3132747.3132781", "entities": [ "Computer data storage", "Copy-on-write", "HTTPS", "Hardware random number generator", "Hypervisor", "Linux", "Memory management", "Modern Operating Systems", "Operating system", "Overhead (computing)", "Page (computer memory)", "Pointer (computer programming)", "Randomness", "Row hammer" ], "id": "b532c625457aede2f11ef0eae40de38e4b5a8ab6", "inCitations": [], "journalName": "", "journalPages": "531-545", "journalVolume": "", "outCitations": [ "531fa3815b674e917821c9d2de02fb756ba17598", "0852a44c86db434e9b51c67704636791e9940487", "3a980f0f85524fef7a3ae0e0d25e7a46ab4e7302", "daeff61502115efc4b9ee81607a8e5489215ea88", "5bc690391cb140731f88c8a68b4dee6dacd7097d", "44d886f89cdbd4fdf5dd25d83b2d37deb7541bf7", "2612541a89857949bc512b6fb2ad7f0c153cb97c", "026fa4a12e769ae6eb40438618703c7975d2c7f6", "2ba9c60483ce810cc001aa620598cda98001af7e", "0027600a3ff1d431ee44fcb8b149388ec7bb3ac8", "482fcc1057c6ed9ea21f71c990088eeb092ec243", "4c106a45b6d1e1d9749121c02016da85e0f5b17c", "0d939c3826455ca42310a92d5c00a956c4630b0e", "11bbc477d14d1c945f203f1a83a530856a89d28f", "2d4ef2f1ceeaba4acc46dec6c48dc18deb9ddb5f", "615168555150d80752a1c195229642acbe6fb3d9", "46d63d1b3ea2cab49a863b56b97f263f75c956f3", "6aba2b1785bd26eb6d85820a734ddaa262d20571", "045bbbea384e9d54be38dd207bf237d5208ea599", "1db11a76fa33ca81970aa345fe4bc150ae846ce0", "47e98574f432ec6a7a224b807a0bbd08a2a66148", "2c9bbdd35ee20c6528c6409b2ddbe35289866712", "52c2c050af5b32d4929b4b193967a3675d03aea0", "531847b1e582c5353ff436744ba0c60682cbd022", "0c7b18190730db1887b2fae8d4474e9c49a9fa46", "33cb4013c7cc36a173e7fb4e541133056e8e43cf", "217742089058db1572042a0cebfcecdec8ce215e", "cb5ab4d277627b1d275ffbcd11379f5a3bacb68f", "450298590f15f3586098b29fc6be44edee51a25d", "4d624b942a58818f8d425460638cb4b65ed84e1c" ], "paperAbstract": "To reduce memory pressure, modern operating systems and hypervisors such as Linux/KVM deploy page-level memory fusion to merge physical memory pages with the same content (i.e., page fusion). A write to a fused memory page triggers a copy-on-write event that unmerges the page to preserve correct semantics. While page fusion is crucial in saving memory in production, recent work shows significant security weaknesses in its current implementations. Attackers can abuse timing side channels on the unmerge operation to leak sensitive data such as randomized pointers. Additionally, they can exploit the predictability of the merge operation to massage physical memory for reliable Rowhammer attacks. In this paper, we present VUsion, a secure page fusion system. VUsion can stop all the existing and even new classes of attack, where attackers leak information by side-channeling the merge operation or massage physical memory via predictable memory reuse patterns. To mitigate information disclosure attacks, we ensure attackers can no longer distinguish between fused and non-fused pages. To mitigate memory massaging attacks, we ensure fused pages are always allocated from a high-entropy pool. Despite its secure design, our comprehensive evaluation shows that VUsion retains most of the memory saving benefits of traditional memory fusion with negligible performance overhead while maintaining compatibility with other advanced memory management features.", "pdfUrls": [ "http://doi.acm.org/10.1145/3132747.3132781" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b532c625457aede2f11ef0eae40de38e4b5a8ab6", "sources": [ "DBLP" ], "title": "Secure Page Fusion with VUsion: https: //www.vusec.net/projects/VUsion", "venue": "SOSP", "year": 2017 }, "b59740eec39f90cb53e46b93d01f43023a8822a4": { "authors": [ { "ids": [ "1977696" ], "name": "Bas Ketsman" }, { "ids": [ "9246931" ], "name": "Dan Suciu" } ], "doi": "10.1145/3034786.3034788", "doiUrl": "https://doi.org/10.1145/3034786.3034788", "entities": [ "Algorithm", "Best, worst and average case", "Computation", "Conjunctive query", "Cycle (graph theory)", "Edge cover", "Parallel computing", "Server (computing)", "Vertex cover" ], "id": "b59740eec39f90cb53e46b93d01f43023a8822a4", "inCitations": [ "980fccf2e450d850cc757c3d3afda70a84c21f14", "0c7b88c4ea95081e99307700d3bf7eb08e790550", "e57b6b5ba04a188591c900083ccbc76f9f6f52ee", "28b904c0af913c8593a7808b52dee38dff51dafa", "6d71d47b469d74e59a8eb71ce6980d11005cecf2", "4a790de4588c147903a61a50d484444bdb4d8822" ], "journalName": "", "journalPages": "417-428", "journalVolume": "", "outCitations": [ "a25f6ee864f0c4fd95d9ceb2f4868e9e3fe51786", "021764d0804445e0869c57314c069d07c874cb4b", "0238c4cdb18ab4df78e76a2718eec4bc42cd826f", "3e86097fbb3cda8efadb42e1175edc817138d9c3", "1476456265553d489ec85c6f3c78a092ee8ee681", "44b2dd390f32a6a77d4e2416351df0fa08a323c1", "97fe4f19b3074756214e3d7cc03f5b40a2a48cf2", "076aee8e0b13704adb60ae212df22fea64613861", "5b8b0ca444c9efffb82d221ac01197730ebf58e6", "c0d4c5713dba6472edb651334d25c3a5aa78d6f3", "465f02fb10fff812a5270e1f8027f8511045477e", "5c10b1be8eafd816f931a43edc448054b55a5c2e", "02ad38ab3cf54fc3f2be4f6b0dd898cb45d7437f", "eaed7286bba82a3adc56dc17623d82cebe4b34c6" ], "paperAbstract": "We study the optimal communication cost for computing a full conjunctive query Q over p distributed servers. Two prior results were known. First, for one-round algorithms over skew-free data the optimal communication cost per server is m/p^(1/tau*), where m is the size of the largest input relation, and tau* is the fractional vertex covering number of the query hypergraph. Second, for multi-round algorithms and unrestricted database instances, it was shown that any algorithm requires at least m/p^(1/rho*) communication cost per server, where rho* is the fractional edge covering number of the query hypergraph; but no matching algorithms were known for this case (except for two restricted queries: chains and cycles).\n In this paper we describe a multi-round algorithm that computes any query with load m/p^(1/rho*) per server, in the case when all input relations are binary. Thus, we prove this to be the optimal load for all queries over binary input relations. Our algorithm represents a non-trivial extension of previous algorithms for chains and cycles, and exploits some unique properties of graphs, which no longer hold for hyper-graphs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034788", "https://webcourse.cs.technion.ac.il/236803/Winter2017-2018/ho/WCFiles/DolevAdas.pdf?9775=", "http://alpha.uhasselt.be/~lucp8038/papers/pods2017.pdf", "https://homes.cs.washington.edu/~suciu/bas-pods-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b59740eec39f90cb53e46b93d01f43023a8822a4", "sources": [ "DBLP" ], "title": "A Worst-Case Optimal Multi-Round Algorithm for Parallel Computation of Conjunctive Queries", "venue": "PODS", "year": 2017 }, "b5d3b961e43d8be64aef0a6d8fbdd83c710f12e9": { "authors": [ { "ids": [ "2925953" ], "name": "Moustafa AbdelBaky" }, { "ids": [ "2459687" ], "name": "Javier Diaz Montes" }, { "ids": [ "2682919" ], "name": "Merve Unuvar" }, { "ids": [ "3001630" ], "name": "Melissa Romanus" }, { "ids": [ "1709070" ], "name": "Ivan Rodero" }, { "ids": [ "1697111" ], "name": "Malgorzata Steinder" }, { "ids": [ "1750983" ], "name": "Manish Parashar" } ], "doi": "", "doiUrl": "", "entities": [ "Constraint programming", "Distributed computing", "Dynamic infrastructure", "Enterprise resource planning", "Formal language", "Service composability principle", "Software deployment" ], "id": "b5d3b961e43d8be64aef0a6d8fbdd83c710f12e9", "inCitations": [ "461db9779b118d7d4fb961f89a709bf9a5c6375a", "663aef84d4fb2d534b4bce9d5a2309c3be5099c6" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "274-283", "journalVolume": "", "outCitations": [ "ab72b755ca6ba237af1bb15910569b2c723dd2e1", "deecff8369c776b9f0e945df71e625fe421ad23b", "8958ca26b52acdcb7d6647f2db43424f11221b72", "2194026d40ee6e3ffd66b58a2882c191f0af0c38", "e514f6eb0f1262f33f4330e5cd0c767ee45cd368", "3b34a92a3e3acfcae972ddb1edcca4a28986d0ae", "26dc3b6cfde460064f2198837752074b8267ab99", "0562bddede669f31d9467861617607efa354e089", "0364d9b50978071565a1abc6206daaa0b6178899", "6a656a567097c53a49b1dbeb9e1e77bebf7524ec", "37bb2f99f60fe5bac0d69f647319ca6e7e238a75", "80cff648d0852b5f7b917408c38ad3253adb6e9b", "064924e496995e5ec3e0ed7a44280ae077c961ac", "352629d4394f630a321a80e6ef25a116d96513a5", "2549e2ae61649e8e481346b14e9895c715f52dde", "10da56ca968d4e862d9071246790e930660438d4", "ddb764b7cf45e85dbc1375af3aa7faee7ed3d81c", "7f51f3926f74b9bb6f9b69939027f339b4ecefa2", "791d4f039b74abacc6a20263fedd00ec72a5d01e", "b3e7baff31a8ddca645061ff3b6a8206936a5154" ], "paperAbstract": "Service-based access models coupled with emerging application deployment technologies are enabling opportunities for realizing highly customized software-defined environments, which can support dynamic and data-driven applications. However, this requires rethinking traditional resource federation models to support dynamic resource compositions, which can adapt to evolving application needs and the dynamic state of underlying resources. In this paper, we present a programmable approach that leverages software-defined techniques to create a dynamic space-time infrastructure service composition. We propose the use of Constraint Programming as a formal language to allow users, applications, and service providers to define the desired state of the execution environment. The resulting distributed software-defined environment continually adapts to meet objectives/constraints set by the users, applications, and/or resource providers. We present the design and prototype implementation of such distributed software-defined environment. We use a cancer informatics workflow to demonstrate the operation of our framework using resources from five different cloud providers, which are aggregated on-demand based on dynamic user and resource provider constraints.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101151" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b5d3b961e43d8be64aef0a6d8fbdd83c710f12e9", "sources": [ "DBLP" ], "title": "Enabling Distributed Software-Defined Environments Using Dynamic Infrastructure Service Composition", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "b5e781ec05b89255bcbf62f57e95641602e85bbe": { "authors": [ { "ids": [ "1959671" ], "name": "Long Xia" }, { "ids": [ "39474114" ], "name": "Jun Xu" }, { "ids": [ "37510256" ], "name": "Yanyan Lan" }, { "ids": [ "1777025" ], "name": "Jiafeng Guo" }, { "ids": [ "1687907" ], "name": "Wei Zeng" }, { "ids": [ "1717004" ], "name": "Xueqi Cheng" } ], "doi": "10.1145/3077136.3080775", "doiUrl": "https://doi.org/10.1145/3077136.3080775", "entities": [ "Benchmark (computing)", "Browsing", "Diversification (finance)", "Document", "Gradient", "Heuristic", "Markov chain", "Markov decision process", "Recurrent neural network", "Relevance", "Top-down and bottom-up design" ], "id": "b5e781ec05b89255bcbf62f57e95641602e85bbe", "inCitations": [ "59b51782dcf4287d1f47a42422ad500d14d3cd6c" ], "journalName": "", "journalPages": "535-544", "journalVolume": "", "outCitations": [ "10486f0102297696f51da9f8f7c4ded4c0c49fb3", "76a50e38d7713b6853f8d0626467cdbfdb25a1d5", "229b9e80568169ac90357428ed3cfadd3513c823", "040678daf6a49a88345ee0c680fccfd134f24d4b", "10205ce087b9190ac18ade8be02a660d92a6ea52", "6c37a8d20ed8fbf2f5cb7dcd039b4f45c6f3498a", "517a461a8839733e34c9025154de3d6275543642", "1f6bbd555ba4cbec2be94781242c97ac2c33ddd3", "2d1cfc9e81fb159967c2be8446a8e3e7b50fe36b", "621af515b66c8b1e88aac10756879458e1856b34", "035bd1607d664b0c10143ca055f1c645b7d04a11", "1510cf4b8abea80b9f352325ca4c132887de21a0", "96582ccdece746962ca3e44858943ee4363f140e", "64693b8c3b3340a4055e336b9a2be0d81b80c064", "15004aadabd967ac722a28a9c3bb39cf5bc32605", "56757c983518b0604d54719df85fcd0adf789044", "e3796f39fe2623823a5d48dee2822da9502561c5", "0f490e121d3dd3cf65ae45542ebab5049f85544b", "85b055c4fc2e324e88cdfe1c20e557a33b940d7e", "900874be4d48f919434a3107d959e57a55bc286c", "09a503095db2d68b439e48d67481399198ed0e5b", "07a02c6cdf6fabff13fe42f5309bf55c91a087fb", "ed5d79e21beccbc3a978ddd2cbd64a9e62df8499", "ded1fa5432e066c3a9f998bd52c6ed736068c582", "375f771832f671ae1ca63ad4dba11fe082097fd6", "ce5b0b9a74c1cbe8aa892d0ce6a731e79bff26e5", "65810ccd8be5010a6a4b9e51cf4b229761b15e99", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "c3eb86975f7fc0a4aa99b8a19bf3e0cf7a3a6646" ], "paperAbstract": "In this paper we address the issue of learning diverse ranking models for search result diversification. Typical methods treat the problem of constructing a diverse ranking as a process of sequential document selection. At each ranking position, the document that can provide the largest amount of additional information to the users is selected, because the search users usually browse the documents in a top-down manner. Thus, to select an optimal document for a position, it is critical for a diverse ranking model to capture the utility of information the user have perceived from the preceding documents. Existing methods usually calculate the ranking scores (e.g., the marginal relevance) directly based on the query and the selected documents, with heuristic rules or handcrafted features. The utility the user perceived at each of the ranks, however, is not explicitly modeled. In this paper, we present a novel diverse ranking model on the basis of continuous state Markov decision process (MDP) in which the user perceived utility is modeled as a part of the MDP state. Our model, referred to as MDP-DIV, sequentially takes the actions of selecting one document according to current state, and then updates the state for the chosen of the next action. The transition of the states are modeled in a recurrent manner and the model parameters are learned with policy gradient. Experimental results based on the TREC benchmarks showed that MDP-DIV can significantly outperform the state-of-the-art baselines.", "pdfUrls": [ "http://www.bigdatalab.ac.cn/~junxu/publications/SIGIR2017_MDPDIV.pdf", "http://doi.acm.org/10.1145/3077136.3080775" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b5e781ec05b89255bcbf62f57e95641602e85bbe", "sources": [ "DBLP" ], "title": "Adapting Markov Decision Process for Search Result Diversification", "venue": "SIGIR", "year": 2017 }, "b5f70ba562bf28aa297a8a865d6f1cdd74e77a44": { "authors": [ { "ids": [ "40186860" ], "name": "Esteban Stafford" }, { "ids": [ "40334130" ], "name": "Borja P\u00e9rez" }, { "ids": [ "1724085" ], "name": "Jos\u00e9 Luis Bosque" }, { "ids": [ "1762103" ], "name": "Ram\u00f3n Beivide" }, { "ids": [ "1741016" ], "name": "Mateo Valero" } ], "doi": "10.1007/978-3-319-64203-1_51", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_51", "entities": [ "Central processing unit", "Experiment", "Load balancing (computing)", "Programmer" ], "id": "b5f70ba562bf28aa297a8a865d6f1cdd74e77a44", "inCitations": [], "journalName": "", "journalPages": "710-722", "journalVolume": "", "outCitations": [ "5c07241363026c26cf5ead2e4396a772fd2380b0", "2d0af1fb1b535cb9bf5ece0b93d48982a8672e04", "1e54f8de08f64aaa3018872681254d3c8327edaa", "9f14902ba961ad5710ae8ee6f20fd477b75000fa", "a164a1f106220efd97d8776da0576c064d8b28fd", "092175c7bfa7d879ec44f864e856b61a492f7b51", "9f0756259aff27f7c7a22cdfcbf0d35ffcb8bec1", "b0e74b5139423d3ede96a241040dd32650ca1db3", "352edd2c567a202f2e5d3f5a022740a3fac23986", "3ce125d8a88fd0833bc9c8d8d14324f3e1b9e63b", "1d286a264b233125b681e522e8f5fed596a8608c", "8db3c11cd85195f459b8ba82fe3326e8f86f1d52", "ec5d0fbaa1221b029be37afc31da7f043085ee81", "804df33cbda438274e1ae2d6d9e7609238a8bb27" ], "paperAbstract": "Heterogeneous systems are nowadays a common choice in the path to Exascale. Through the use of accelerators they offer outstanding energy efficiency. The programming of these devices employs the host-device model, which is suboptimal as CPU remains idle during kernel executions, but still consumes energy. Making the CPU contribute computing effort might improve the performance and energy consumption of the system. This paper analyses the advantages of this approach and sets the limits of when its beneficial. The claims are supported by a set of models that determine how to share a single data-parallel task between the CPU and the accelerator for optimum performance, energy consumption or efficiency. Interestingly, the models show that optimising performance does not always mean optimum energy or efficiency as well. The paper experimentally validates the models, which represent an invaluable tool for programmers when faced with the dilemma of whether to distribute their workload in these systems.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_51", "http://upcommons.upc.edu/bitstream/handle/2117/107547/To+distribute+or+not+to+distribute.pdf;jsessionid=EC18689FCF2BDA1227D746629E3340AA?sequence=4" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b5f7/0ba562bf28aa297a8a865d6f1cdd74e77a44.pdf", "s2Url": "https://semanticscholar.org/paper/b5f70ba562bf28aa297a8a865d6f1cdd74e77a44", "sources": [ "DBLP" ], "title": "To Distribute or Not to Distribute: The Question of Load Balancing for Performance or Energy", "venue": "Euro-Par", "year": 2017 }, "b618f7dfbc83371c5b7790af69ba7cf13af864ab": { "authors": [ { "ids": [ "21023008" ], "name": "Debopam Bhattacherjee" }, { "ids": [ "15828032" ], "name": "Muhammad Tirmazi" }, { "ids": [ "34891793" ], "name": "Ankit Singla" } ], "doi": "", "doiUrl": "", "entities": [ "Change detection and notification", "Cloud computing", "Last mile", "Loader (computing)", "Loading screen", "Web page", "Web service" ], "id": "b618f7dfbc83371c5b7790af69ba7cf13af864ab", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2bce0f3e815c471702fb9db657914b6169098c2b", "103eef2be0295f4a26e0d5043c95b9a5c8323975", "36275d197118b73f8244828cb3d190617f924dcc", "405354fa27b9535415e23e2e02c0c82bbdc7b4f7", "0507b04c131f2244524fda97cd1707af5760216e", "2f14cec71c4b41b36ee4b90cf900dd8aea32400c", "b257f56ac990dde691e29c3697001904ff959057", "5dc26364e8c64868a40ef00004fc62d12c0e4264", "065e1b5f59ccd2526117be9ec98c2df9e4172bea", "79a959159bb856995402b4487c1eab7404852742" ], "paperAbstract": "Many popular Web services use CDNs to host their content closer to users and thus improve page load times. While this model\u2019s success is beyond question, it has its limits: for users with poor last-mile latency even to a nearby CDN node, the many RTTs needed to fetch a Web page add up to large delays. Thus, in this work, we explore a complementary model of speeding up Web page delivery \u2013 a content gathering network (CGN), whereby users establish their own geo-distributed presence, and use these points of presence to proxy content for them. We show that deploying only 14 public cloud-based CGN nodes puts the closest node within a median RTT of merely 4.8 ms (7.2 ms) from servers hosting the top 10k (100k) most popular Web sites. The CGN node nearest to a server can thus obtain content from it rapidly, and then transmit it to the client over fewer (limited by available bandwidth) high-latency interactions using aggressive transport protocols. This simple approach reduces the median page load time across 100 popular Web sites by as much as 53%, and can be deployed immediately without depending on any changes to Web servers at an estimated cost of under $1 per month per user.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/bhattacherjee", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-bhattacherjee.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b618/f7dfbc83371c5b7790af69ba7cf13af864ab.pdf", "s2Url": "https://semanticscholar.org/paper/b618f7dfbc83371c5b7790af69ba7cf13af864ab", "sources": [ "DBLP" ], "title": "A Cloud-based Content Gathering Network", "venue": "HotCloud", "year": 2017 }, "b6263576b4477fe3b5a86b1f18ec0949b8f52517": { "authors": [ { "ids": [ "2801189" ], "name": "Gabriel Poesia" }, { "ids": [ "12620313" ], "name": "Breno Campos Ferreira Guimar\u00e3es" }, { "ids": [ "3441463" ], "name": "Fabricio Ferracioli" }, { "ids": [ "39907070" ], "name": "Fernando Magno Quint\u00e3o Pereira" } ], "doi": "10.1145/3133874", "doiUrl": "https://doi.org/10.1145/3133874", "entities": [ "Algorithm", "Analysis of algorithms", "Call graph", "Central processing unit", "Computation", "Computer", "Graphics processing unit", "OpenACC", "OpenMP", "Parallel computing", "Simulated annealing", "Simulation", "Smartphone", "Static program analysis", "Supercomputer" ], "id": "b6263576b4477fe3b5a86b1f18ec0949b8f52517", "inCitations": [], "journalName": "PACMPL", "journalPages": "50:1-50:28", "journalVolume": "1", "outCitations": [ "4c92a542084b151cdd33d06bc91c952568828065", "8d39df40831d9dd239ccd9da60de93292ea894a3", "348ba95c8168b20af8b15f60dd214174b12cea26", "a8559e6c955a43ecbde0ce05b928346691dfb89f", "e572e28a20d9b201ccf5c8670878629104557429", "0b7c1bc9636d8cc66c36fb7e676d3badfe5df696", "8df62aad18d6de13331479666c3b5d6a32b0ba58", "4021aec95d6b1a090e26e75f8bb25957d0f7297e", "1a249a77a2024638788fc512f049e558d85b3aa5", "990893b26bf52167b806c23dd18f8d2632e0fa01", "4d8bebe9f9ceeddb261185c43d4c04180c90e448", "3e05ad2c59eede46625f42a244063d13de1cf1a2", "7f0d4bf7e566b9ab6b71e57d6d2ad36f282e1bab", "7e045aaacd735daf206db9cb35b50d694dcae166", "41fca6c199464c983cb6384ae65c83eb7522fb46", "3a8606d35e419a68171e420c8454a98b2fb62c44", "4ffd50725b9cdff4ab0f13c9182cf3fdb671e76c", "363ce870e54ba6b98b29df8ce053f5fced330525", "24e370e5739a58644e5a261ff47e93633522fd3a", "03b95c1c6859ce4f792ac6995137a6cfab60670c" ], "paperAbstract": "Heterogeneous architectures characterize today hardware ranging from super-computers to smartphones. However, in spite of this importance, programming such systems is still challenging. In particular, it is challenging to map computations to the different processors of a heterogeneous device. In this paper, we provide a static analysis that mitigates this problem. Our contributions are two-fold: first, we provide a semi-context-sensitive algorithm, which analyzes the programâ\u0080\u0099s call graph to determine the best processor for each calling context. This algorithm is parameterized by a cost model, which takes into consideration processorâ\u0080\u0099s characteristics and data transfer time. Second, we show how to use simulated annealing to calibrate this cost model for a given heterogeneous architecture. We have used our ideas to build Etino, a tool that annotates C programs with OpenACC or OpenMP 4.0 directives. Etino generates code for a CPU-GPU architecture without user intervention. Experiments on classic benchmarks reveal speedups of up to 75x. Moreover, our calibration process lets avoid slowdowns of up to 720x which trivial parallelization approaches would yield.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133874" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b6263576b4477fe3b5a86b1f18ec0949b8f52517", "sources": [ "DBLP" ], "title": "Static placement of computation on heterogeneous devices", "venue": "PACMPL", "year": 2017 }, "b6fbff6d287c6f80429bc6c250b292ba15d43343": { "authors": [ { "ids": [ "2526892" ], "name": "Davide Quarta" }, { "ids": [ "1952394" ], "name": "Marcello Pogliani" }, { "ids": [ "1922766" ], "name": "Mario Polino" }, { "ids": [ "2047556" ], "name": "Federico Maggi" }, { "ids": [ "2650655" ], "name": "Andrea Maria Zanchettin" }, { "ids": [ "1750264" ], "name": "Stefano Zanero" } ], "doi": "10.1109/SP.2017.20", "doiUrl": "https://doi.org/10.1109/SP.2017.20", "entities": [ "Correctness (computer science)", "Cyber-physical system", "Industrial robot", "Industry 4.0", "Logistics", "Requirement", "Robot", "Robotics", "Software deployment", "Vulnerability (computing)" ], "id": "b6fbff6d287c6f80429bc6c250b292ba15d43343", "inCitations": [ "42168d6f3df61ec701098da4268c755e1e481304", "501e04b226e9ade5af436deec80585c578ca8cca", "6c72446f41ed40410885924bb509b813f6438d43", "8267cc512d360f5859bec8d149bb4d017fa8519c", "420460a27afd6a97a01e1ea3bc1a2db2ede7e353", "a493c723794c0d3bcd0b00cb78b0e24c417b5056" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "268-286", "journalVolume": "", "outCitations": [ "924b7a60e4a64b0e78a3c3d55d804f5e223b9872", "04f956d7f0acb76a4716a5cd5f75ee634f2f393d", "08c4ec414fc74b242aa7e3700fe2b169291cc6bf", "d80e729157ae8693f677473fda4282f927a0406f", "a608bd857a131fe0d9e10c2219747b9fa03c5afc", "240061b40d87d54d4f3a5958878b8dd96b5d3128", "2b8f6813be29f884dc021e9cd2b7f06678574ee5", "90c63c1285dbea2e9e8a8bfb843ecdc933e1b281", "d2f36a061a26c87014f0be3473b9a85c5aa8d494", "ebd4e5d7166eea9a7589703cf2b3bd9a98ad5096", "1b48cd2429ca786cfcc354069f3c24d4f994eef5", "48b656ad3ee567417b68879ff503d0ef933413b2", "2a5a6d2990f0d91abc9e63eb35a6e713e0f97e11" ], "paperAbstract": "Industrial robots, automated manufacturing, and efficient logistics processes are at the heart of the upcoming fourth industrial revolution. While there are seminal studies on the vulnerabilities of cyber-physical systems in the industry, as of today there has been no systematic analysis of the security of industrial robot controllers. We examine the standard architecture of an industrial robot and analyze a concrete deployment from a systems security standpoint. Then, we propose an attacker model and confront it with the minimal set of requirements that industrial robots should honor: precision in sensing the environment, correctness in execution of control logic, and safety for human operators. Following an experimental and practical approach, we then show how our modeled attacker can subvert such requirements through the exploitation of software vulnerabilities, leading to severe consequences that are unique to the robotics domain. We conclude by discussing safety standards and security challenges in industrial robotics.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.20", "http://robosec.org/downloads/paper-robosec-sp-2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b6fbff6d287c6f80429bc6c250b292ba15d43343", "sources": [ "DBLP" ], "title": "An Experimental Security Analysis of an Industrial Robot Controller", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "b70099e9c7306aa03a1073faeba0fa441a322c6b": { "authors": [ { "ids": [ "3758909" ], "name": "Seth Gilbert" }, { "ids": [ "1779678" ], "name": "Calvin C. Newport" } ], "doi": "10.1145/3087801.3087814", "doiUrl": "https://doi.org/10.1145/3087801.3087814", "entities": [ "Symmetry breaking" ], "id": "b70099e9c7306aa03a1073faeba0fa441a322c6b", "inCitations": [], "journalName": "", "journalPages": "273-282", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087814" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b70099e9c7306aa03a1073faeba0fa441a322c6b", "sources": [ "DBLP" ], "title": "Symmetry Breaking with Noisy Processes", "venue": "PODC", "year": 2017 }, "b7437111bf04a803878ebacbc275ba3715bccb18": { "authors": [ { "ids": [ "38709123" ], "name": "Alexander Conway" }, { "ids": [ "35300553" ], "name": "Ainesh Bakshi" }, { "ids": [ "2889496" ], "name": "Yizheng Jiao" }, { "ids": [ "3354088" ], "name": "William Jannen" }, { "ids": [ "31800663" ], "name": "Yang Zhan" }, { "ids": [ "1746449" ], "name": "Jun Yuan" }, { "ids": [ "33877556" ], "name": "Michael A. Bender" }, { "ids": [ "2387399" ], "name": "Rob Johnson" }, { "ids": [ "1871661" ], "name": "Bradley C. Kuszmaul" }, { "ids": [ "1755646" ], "name": "Donald E. Porter" }, { "ids": [ "1680147" ], "name": "Martin Farach-Colton" } ], "doi": "", "doiUrl": "", "entities": [ "Dictionary", "Experiment", "Heuristic", "Locality of reference", "Synthetic data", "ZFS" ], "id": "b7437111bf04a803878ebacbc275ba3715bccb18", "inCitations": [ "27a36203f14d73b95dfffec857b4ff923d9ef430", "8cfa25e85c2c6c9305f696819d764ed5490f3faf", "1ee52dc91368b925a15bc2448d9e1ea4a1643dc4", "3e8f2997d81682e3bfbfdc2ea35be9ce6838a057", "75c3f38c4268097b45212b8c67b028f6bf4ecc2d", "8ec17bf5cf4a4acd88455e05fc2f17bc5b29e78f" ], "journalName": "", "journalPages": "45-58", "journalVolume": "", "outCitations": [ "0af5cc5352264a890a22b6e910c183b64679dc0e", "2538812c0f7922329f2c86c6c5b7190313976fb3", "5bb770af1973f929e8622f17ddf378d439245144", "1547503c340ee99713cad4516d059e01d349707e", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "12a0046a1197ae63c3d616c74e367dc583cef196", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "1772fdf329f526d4f6c9e62b99bc65eac0ff31b5", "de8f972df6b7bfd32692db268ec54bb031b1ef3c", "3b2af12a43d06338dd62681328c75a1999fc87fd", "14f03cc21d8eb6a4b6498b46e9780d60784356ee", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "1d6ded112a3462c748d45f7ad11f594c79e6fc59", "b1ec820da48f69a4652ddf08f00e2e991126cf4b", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "5ecd441bf4a54c7500cd6fc185d8dd09638e12cb" ], "paperAbstract": "File systems must allocate space for files without knowing what will be added or removed in the future. Over the life of a file system, this may cause suboptimal file placement decisions which eventually lead to slower performance, or aging. Traditional file systems employ heuristics, such as collocating related files and data blocks, to avoid aging, and many file system implementors treat aging as a solved problem. However, this paper describes realistic as well as synthetic workloads that can cause these heuristics to fail, inducing large performance declines due to aging. For example, on ext4 and ZFS, a few hundred git pull operations can reduce read performance by a factor of 2; performing a thousand pulls can reduce performance by up to a factor of 30. We further present microbenchmarks demonstrating that common placement strategies are extremely sensitive to file-creation order; varying the creation order of a few thousand small files in a real-world directory structure can slow down reads by 15\u2212 175\u00d7, depending on the file system. We argue that these slowdowns are caused by poor layout. We demonstrate a correlation between read performance of a directory scan and the locality within a file system\u2019s access patterns, using a dynamic layout score. In short, many file systems are exquisitely prone to read aging for a variety of write workloads. We show, however, that aging is not inevitable. BetrFS, a file system based on write-optimized dictionaries, exhibits almost no aging in our experiments. BetrFS typically outperforms the other file systems in our benchmarks; aged BetrFS even outperforms the unaged versions of these file systems, excepting Btrfs. We present a framework for understanding and predicting aging, and identify the key features of BetrFS that avoid aging.", "pdfUrls": [ "http://aineshbakshi.com/pubs/Filesystems.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-conway.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_conway.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_conway.pdf", "https://www.usenix.org/system/files/conference/fast17/fast17-conway.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/conway" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d6f8/84639f7599d0e772a54da7cdf986a4302fa2.pdf", "s2Url": "https://semanticscholar.org/paper/b7437111bf04a803878ebacbc275ba3715bccb18", "sources": [ "DBLP" ], "title": "File Systems Fated for Senescence? Nonsense, Says Science!", "venue": "FAST", "year": 2017 }, "b74d2874646d36b36eee5c836adf6b29d9173425": { "authors": [ { "ids": [ "1925072" ], "name": "Hongqiang Harry Liu" }, { "ids": [ "2438403" ], "name": "Yibo Zhu" }, { "ids": [ "1695132" ], "name": "Jitendra Padhye" }, { "ids": [ "3321566" ], "name": "Jiaxin Cao" }, { "ids": [ "26918820" ], "name": "Sri Tallapragada" }, { "ids": [ "37101082" ], "name": "Nuno P. Lopes" }, { "ids": [ "1733578" ], "name": "Andrey Rybalchenko" }, { "ids": [ "1915468" ], "name": "Guohan Lu" }, { "ids": [ "8072948" ], "name": "Lihua Yuan" } ], "doi": "10.1145/3132747.3132759", "doiUrl": "https://doi.org/10.1145/3132747.3132759", "entities": [ "Biological network", "Correctness (computer science)", "Emulator", "Failure rate", "Networking hardware", "Online service provider", "Virtual machine" ], "id": "b74d2874646d36b36eee5c836adf6b29d9173425", "inCitations": [ "c60960f93f657205b0f6247e00c79c97203e9a51" ], "journalName": "", "journalPages": "599-613", "journalVolume": "", "outCitations": [ "f99ba413a13ba199679ea9a96aebe7c978ddec33", "07d40084599302c18ef4498ac18e90162098b146", "17059e939aa051d7db57f4af959b2af21fa3dd18", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "4c28645e6f959aa2c5ebb6cd78e9b62f22e6fd30", "1fcdfe9d2554ed0c5c315edd4f19f9a21dff341d", "0e0f7fa2de3f757a51e747399d93c570249e72ac", "a7d0b7105de6bd84392bfddce2ed3f059c1484f6", "85cacb208b2e3aa16fd39f75dc858d44092782b6", "663e064469ad91e6bda345d216504b4c868f537b", "19114b7a2f5243a47e80590cc11a2d8ec5b96308", "0355a7b4c66e42b73fa3d0d7198ce68b2dbcd5be", "016ca0cee16190907a911d874e98fbc6dfa5a36c", "211b998175c29dd62e7f854301ed88bcb03a1fa5", "c24809e301b30cb1dcc1da4ee14e4e1f87dd742b", "44b3b0573fdf6fc9889dbb3badf134dd092bb2d1", "05a436f059c3897c3509dc059903364eff4a79af", "7510ebabd461e786d6b8a51c1d358b1ffe7bc0f7", "0f35b3fd2ef4638a23ee07db4057cc78365c982a", "ad806d9c69ab834d814865958cd3ded4df4f12f9", "9e778ebe9a4b0955b802f7e46323dcffc6174f94", "e862c6c7f519ce69d96035988980429fc737dead" ], "paperAbstract": "Network reliability is critical for large clouds and online service providers like Microsoft. Our network is large, heterogeneous, complex and undergoes constant churns. In such an environment even small issues triggered by device failures, buggy device software, configuration errors, unproven management tools and unavoidable human errors can quickly cause large outages. A promising way to minimize such network outages is to proactively validate all network operations in a high-fidelity network emulator, before they are carried out in production. To this end, we present CrystalNet, a cloud-scale, high-fidelity network emulator. It runs real network device firmwares in a network of containers and virtual machines, loaded with production configurations. Network engineers can use the same management tools and methods to interact with the emulated network as they do with a production network. CrystalNet can handle heterogeneous device firmwares and can scale to emulate thousands of network devices in a matter of minutes. To reduce resource consumption, it carefully selects a boundary of emulations, while ensuring correctness of propagation of network changes. Microsoft's network engineers use CrystalNet on a daily basis to test planned network operations. Our experience shows that CrystalNet enables operators to detect many issues that could trigger significant outages.", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/10/p599-liu.pdf", "http://doi.acm.org/10.1145/3132747.3132759" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b74d2874646d36b36eee5c836adf6b29d9173425", "sources": [ "DBLP" ], "title": "CrystalNet: Faithfully Emulating Large Production Networks", "venue": "SOSP", "year": 2017 }, "b74f6e7c1321f86f84988a06cd4dc1c16ad20c55": { "authors": [ { "ids": [ "39708150" ], "name": "Eugene Wu" }, { "ids": [ "2493657" ], "name": "Fotis Psallidas" }, { "ids": [ "2594274" ], "name": "Zhengjie Miao" }, { "ids": [ "3209058" ], "name": "Haoci Zhang" }, { "ids": [ "34011720" ], "name": "Laura Rettig" } ], "doi": "", "doiUrl": "", "entities": [ "Concurrency (computer science)", "Concurrency control", "Data visualization", "End-to-end principle", "Interaction", "Interaction technique", "Lurker", "Management system", "Method of analytic tableaux", "SQL", "Systems design", "User interface", "View (SQL)" ], "id": "b74f6e7c1321f86f84988a06cd4dc1c16ad20c55", "inCitations": [ "8995a40dbe8d976a04c055fc39f201d6443abdcf", "8c7044398d1994b12a9bf7212e11398f59eaf446", "e1f0efbb16f736ad7643edd0d29b191d89975d28", "c7dbf823311ea7e06b85db2f3125b6c5ceb20975" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "921e27dd44595b0f4353ee6b981aa53251073e55", "1df05b37ab38851a7537f5a7d1cc31d60ab819dd", "17fbc08eeefbf75e3966190bd2cc103abc90c0e3", "77f7389a3e03fb0b0d5708badd55af5e2e383f21", "06315ad1188c2607a84c359abe6c534c60516f8b", "b41761392859fdc7a4345e8b5d259c3c62c94740", "0c0cfae57d32de295292fff6e67b2a22001bde23", "7cecd69662651f61d78498a6c3ed9ecd711c5358", "65f46d2b21db5bc30cdbdd8d47c07596dd6bd8b9", "cf130b04f13bca531e9cbcc53bac3632c99e4bdc", "005addf2ff421ec7aa1294662e3001a856f25d0b", "2a5d197efb2ddfa69bf8bf925068ee196f6d63b9", "1e371829912dc15dd84d180fec1801af3766dc84", "f0c8d99c0932fe90871423290dab4f1578cc49c4", "0c504d0c8319802c9f63eeea0d7b437cded2f4ef", "5208060771fd213eefd827e3e1260b939f1aed6d", "7fad6a11246f4b9e554716ca69a2741c3629e260", "dae2c3968fa06688cd99b2e024416dfcd50a36f7", "2053bff848529aa4ff7c2d79af718f44d606694b", "2009b3c05dd9084a0a1c609abcedd81713ed7150", "bcc3c02af8efae5a710f4bb53dbdd6067237773a", "efa976a80b19185f8c6d035601ea7067d1a46949", "c305af4358b8c136175287f50edb308ccd49eaed", "01093148286c8bb7c59dd430952b4913d652bf16", "bd6b9bf3f19ecc79393451ca00ae8e7904e5c758", "20aeb2357e9e215787c7e0d0acfe7a6b598c9103", "1274c1f7a556f968cec299b8b8ba279e46e6f8eb", "45832e819d092865c3dc3cb75b17d6970f8af24d", "43278fae276acb01099493ac2750df565b31b432", "9f0bca4ad46a2ee9f03da23bba619d6edc453cb1", "14a706e83de8b9a824b12ebeec10533585ee82f5", "30febe8b5ea6e6e55c545d5cc45d1ca437b0021f", "18e638bfdb99a596cf03ac4c4a34616fbf4fd430", "aae20add6de2f5bb30de6f230fab9112cb50540e", "3de74c9b623165120e62963ee6506612433fb915", "80fcba2d1c8fa1357ddf9c1bdea62179eca9c292", "277292edf896632da575d2b8925643b5ce14613b", "1126ceee34acd741396c493c84d8b6072a18bfd7", "01966ce1d04fdf5f5944679cc83f617e60c40c14", "a5d560ef928d0b9f6df2710c4ce66240ed6802b7", "418f49d4a4b58f8aa7ba610ee474420fec4f4a71", "dc39c68a00e38f2993b450eb01c96e1d032ab850", "f9997ec7ff48f4504579e367f336c55e10044f69", "402f68ac3e61bea7b15c82d7290a57c44f4b9afa", "3db2bbf681841f2da838134d6d9cda4b4b5511db", "a3f706941ada1a6420db9855bf8c3fe680f50cb7", "4ca02ebd64464db740a1560509fec3180cc3d8f9", "53a5485c197ec44f6fb9f0308cac7716e50d1584", "0aefab978eefb6f9612a5199af291587ffc1c36a", "d90911f0f4c60d5a720d7967bc6b83b2f62ab81c" ], "paperAbstract": "Interactive data visualizations have emerged as a prominent way to bring data exploration and analysis capabilities to both technical and non-technical users. Despite their ubiquity and importance across applications, multiple designand performance-related challenges lurk beneath the visualization creation process. To meet these challenges, application designers either use visualization systems (e.g., Endeca, Tableau, and Splunk) that are tailored to domainspecific analyses, or manually design, implement, and optimize their own solutions. Unfortunately, both approaches typically slow down the creation process. In this paper, we describe the status of our progress towards an end-to-end relational approach in our data visualization management system (DVMS). We introduce DeVIL, a SQL-like language to express static as well as interactive visualizations as database views that combine user inputs modeled as event streams and database relations, and we show that DeVIL can express a range of interaction techniques across several taxonomies of interactions. We then describe how this relational lens enables a number of new functionalities and system design directions and highlight several of these directions. These include (a) the use of provenance queries to express and optimize interactions, (b) the application of concurrency control ideas to interactions, (c) a streaming framework to improve near-interactive visualizations, and (d) techniques to synthesize interactive interfaces tailored to end-users.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p95-wu-cidr17.pdf", "http://www.cs.columbia.edu/~fotis/pubs/papers/dvms-cidr17.pdf", "http://www.cs.columbia.edu/~fotis/papers/dvms-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b74f/6e7c1321f86f84988a06cd4dc1c16ad20c55.pdf", "s2Url": "https://semanticscholar.org/paper/b74f6e7c1321f86f84988a06cd4dc1c16ad20c55", "sources": [ "DBLP" ], "title": "Combining Design and Performance in a Data Visualization Management System", "venue": "CIDR", "year": 2017 }, "b757b44dd7391dbedff734e6fb4d0457ed68c0cc": { "authors": [ { "ids": [ "1690420" ], "name": "Gadi Taubenfeld" } ], "doi": "10.1145/3087801.3087807", "doiUrl": "https://doi.org/10.1145/3087801.3087807", "entities": [ "Algorithm", "Consensus (computer science)", "Key-agreement protocol", "Mutual exclusion", "Point of View (computer hardware company)", "Shared memory", "Shared register" ], "id": "b757b44dd7391dbedff734e6fb4d0457ed68c0cc", "inCitations": [], "journalName": "", "journalPages": "325-334", "journalVolume": "", "outCitations": [ "443f89754162b0a9f97f46f80eb74e9cdd416ec1", "a9b8a37dd89965ee26bf422f948169b250d6a4b3", "25430112e7f697dd583236994b6ebb4ab82c491e", "00e3756119a91432622f6982b59ecd24a1340fbe", "11951e83ab610abda6d5ddd578831ad113d00683", "27a86220c18535a08231a3bcdc618804d0d837ab", "07a152ad1c17b35396d8b372cbde16e89705c7ec", "81209d8d761c05f4ce0fcf7c14b267764ddc246d", "5a68cd9a59b28c6cd51f78fd39a375227eb95d92", "eb2da62557d480f6661ecbeafcfc650901219446", "cb41e11632f9b45362431dcfd18b1dbbde4863a2", "a32d8262d45e1a56419c8e0e90d5eac13291840f", "045a975c1753724b3a0780673ee92b37b9827be6", "252844376fd0319359cc6fc0f512f744a3c9362d", "73fd6c57527c7c9d037e8781aa8c0cbd0f75c1fe", "112db00847a1e4c153cf9a76b6a5f55c8831b9ea", "7402d22cc5ae2b9318e294130ee0c0e43db85012", "7da89dea35d519edab9d6c7f26db93bd0d12145e", "135772775121ba60b47b9f2f012e682fe4128761", "5a75d1dc283d1e29ed5efbdd805d3bd42fd1a3ef" ], "paperAbstract": "Assuming that there is an a priori agreement between processes on the names of shared memory locations, as done in almost all the publications on shared memory algorithms, is tantamount to assuming that agreement has already been solved at the lower-level. From a theoretical point of view, it is intriguing to \u0080gure out how coordination can be achieved without relying on such lower-level agreement. In order to be\u008aer understand the new model, we have designed new algorithms without relying on such a priori lowerlevel agreement, and proved space lower bounds and impossibility results for several important problems, such as mutual exclusion, consensus, election and renaming. Using these results, we identify fundamental di\u0082erences between the standard model where there is a lower-level agreement about the shared register\u2019s names and the strictly weaker model where there is no such agreement.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087807", "http://www.faculty.idc.ac.il/gadi/MyPapers/2017T-withoutPriorAgreement.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b757/b44dd7391dbedff734e6fb4d0457ed68c0cc.pdf", "s2Url": "https://semanticscholar.org/paper/b757b44dd7391dbedff734e6fb4d0457ed68c0cc", "sources": [ "DBLP" ], "title": "Coordination Without Prior Agreement", "venue": "PODC", "year": 2017 }, "b78a9ef43b956142639f37d14df1099b8d194729": { "authors": [ { "ids": [ "2772186" ], "name": "Neda Nasiriani" }, { "ids": [ "1809899" ], "name": "George Kesidis" }, { "ids": [ "1726699" ], "name": "Di Wang" } ], "doi": "10.1109/MASCOTS.2017.27", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.27", "entities": [ "Backup", "Curiously recurring template pattern", "Data center", "Emergency power system", "Load management", "Markov chain", "Markov decision process", "Online optimization", "Program optimization", "Randomness", "Stochastic optimization", "Value at risk" ], "id": "b78a9ef43b956142639f37d14df1099b8d194729", "inCitations": [ "8e34c6ca528e9977064921ac2f4b36143d4f217d" ], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "164-174", "journalVolume": "", "outCitations": [ "78e009cc05a6a832106d5ca6802ce56bef6b247f", "381c7853690a0fee6f00d2608a7779737f1365f9", "11fcb18b8a83b7874bbbf00bc7f6b0e2e3f8c8d7", "6ac0002b174440dd498f2a7ecefadb33c4a8fb50", "0125e9060834b8f23f43461dd468e23f98eeb8a5", "80edb3beef6cc2528ec16ec3d84b6a78cb458dec", "09ed9cb47e09f56608bdbe6dffaa527c8ca0cd73", "4f6fea85be94a1c545ed9c42212cd3841260b42f", "5a1bd2fe1724f5de0d6ab14959a5dd8600723d97", "434bfa445da86c7be719ad7c3a3072b2ab075d03", "33048ebbc9564a5ec289a9abd6cba04fc5b68e47", "4b5feba81544cf04ff5906f01aabf68863e2889b", "5d52cb62b7441478d879ff631f8abe84616abc3f", "5cf67ba15bf2c28ce72508ed7eb4ff98ca1caffc", "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "6c36fa5af94012ee5cb4081f753a817e2125a929", "9b69edc3ce3020de2f96139846ac27aae68dfde9", "3cc2608fd77b9b65f5bd378e8797b2ab1b8acde7", "280f2ed9d2a7505c377251bb623daee4eebfcec0", "130fa6ab2014a3096ec5e603eafc8c83f9c4945e", "07edbb1e253e57715f08c45546f83e2eba31aa07", "59a545d722fd3250d3cf2dc163f3ded2d347974b", "26099477c9bf5da89cf1d4db91cb581bca3d25a3", "68f5570eb032f7276ca1b68d3089802d224770e1", "334f70c5fbba2bd0a3d24e4311ca8480c78e32cc", "73379047f0bc12e9d4a2873e577edd276dbf2d8a", "193e98a45b0e4eb7f85595d3c8b572b59242ed68", "a51264fe065470286145c1abd77e398a31f3fcbc", "03d1325e8b7472d141bb8351cd482554c42d17a9", "e78f7f604b30e6c0e82975f1ffa83dd5e9d16e4f", "42ce7c38ed8795275633e50240d25dce2e08a7f5", "154e96866b304324446085d541072182152d276f", "137969575ae017525e3f2004352ebea720969f14" ], "paperAbstract": "A datacenter's power consumption is a major contributor to its operational expenditures (op-ex) and one-time capital expenditures (cap-ex). The recurring electricity cost is often in large determined by datacenter peak-demand under peak-based pricing which is employed by major electric utility providers. There is a growing interest in reducing a datacenter's electricity costs by using throttling techniques and/or energy storage devices (batteries) which are readily available at most datacenters as a backup energy source. A datacenter's power-demand uncertainty makes this a challenging problem, which is largely neglected in existing work, by assuming perfect predictability of power demand. We model this inherent uncertainty as a Markov chain and also evaluate the risk of over/under charging batteries as a result of the randomness in power demand. We design an online optimization framework for peak shaving which considers Conditional Value at Risk and allows for navigating cost-risk trade-offs of datacenters based on their energy infrastructure and workload characteristics. We show that this framework offers significantly higher (up to 2X) cost-savings with small risks of over/under charging batteries, compared to existing stochastic optimization techniques. This framework leverages Markov Decision Processes to perform online dynamic peak shaving, considering battery degradation costs under peak-based pricing.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.27" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b78a9ef43b956142639f37d14df1099b8d194729", "sources": [ "DBLP" ], "title": "Optimal Peak Shaving Using Batteries at Datacenters: Characterizing the Risks and Benefits", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "b7934c4daebbae363518d1c3a4d2df6a229c3817": { "authors": [ { "ids": [ "26933585" ], "name": "Mats Stijlaart" }, { "ids": [ "1714951" ], "name": "Vadim Zaytsev" } ], "doi": "10.1145/3136014.3136035", "doiUrl": "https://doi.org/10.1145/3136014.3136035", "entities": [ "Automatic taxonomy construction", "Code refactoring", "Taxonomy (general)", "Transformational grammar" ], "id": "b7934c4daebbae363518d1c3a4d2df6a229c3817", "inCitations": [ "23debceb49b8a343e97887a4e32e71c75758ab48" ], "journalName": "", "journalPages": "43-54", "journalVolume": "", "outCitations": [ "31605146b5e0b8f14bf33a356bc0ef570e2e5632", "25498d34a3cb6549cf8227802f8f2d168b3f42f4", "68022d495c81d0be421acfebb016eba724f4b77d", "9ee9772054aae204f56bded941c914b2f1a4f61e", "974bdaf9446d3087aee5cea2e2687e083100a204", "88a9a4c23973e9597ff36e7a16548118d583247b", "575f904555f4b6eb7d89ab84e2f4e4ca677ce17b", "74c648d53b64266c1677693a0dfcea3db2db2fb6", "b09b9587cac25291f70942c07da09537c3558e49", "3b719d2447c81ebbcdb732ea8a917790ac49d059", "4a022cefcf1c3980c18ded9e542229565f010eed", "530f97dd31db9edf07a1be1c6685d77373ef3178", "4ca19e0a40917bd25b56a8bfc3b05a66f141b58b", "5381ee1f77502c7b9bf3f522944ff974c9f1b8b9", "05eaa5bab12bb06b08a4e7c9e57d60db3d85094f", "aaab5fd2e81a412720a170feff20d53be9766174", "35411353ce8486ce416f46264c84ad3e315ec2f8", "04517fe9cf850668a128c21c960122f8016a92ca", "0c226de37c80821a66f69446d67a8a51cabeec36", "c506d966e4f395b814aa192f92b0966de3365f0a", "7f39b9065b97bb486604202ac20b9c4094f80e5d", "aac75306a8f30cf3912bd091ac88840c07de4bc7", "1af286a37494250d70d7a8bd8cc1b229a572fdcb", "c2a5c7c0f505cffe6ca4add871ed476fe1feeb78", "99b711e6323e2a3f3aa0d0c8657b4e988a79908b", "cc6bc05230ff33da576738daf3cf1f8207d8c663", "4ab7a08299ac8a526a4829783c69ff79bcf4418a", "488233501fde6e0ab4438fef62be80b5cfc4f30c", "aee5139118b2a8cc2af163df62a4c49e93b5fcfb", "5683372a2fbc192931ed78b8b995b26678906706", "39e658adddd568601fa8c79499cb4b486cb5d53a", "1dfb76b9279b658deda4dc70a5a879a1122a33a8", "fb35f65339c4f1bbe9c0bd25f09f83e65e436d51", "222f631a8f8318bd206da5c3ef31c82f942a7dc9", "d892689d309b7d86d895d9c8f6af0e33972c6790", "5116d4d5e511e74b74f78c5df68fd63776a8bf1d", "a5cab1abf77e44ac7359572f4ccf435fb4d69c74", "11346a25a0e3967fd0012f572fafdbb4e1195108", "a95c2d4a3659730678262459c7e72dca9210af4d", "da09f5d2b91e9089b89b750ce7728a9bf5a1b133", "56c48e12b7fdee28f30311af786c3fa5ec0b7536", "fc5952ba34b653b7028384228055341619ac9df9", "4f8489f2672c71efb97be9029d8d8e75a3044bbf", "5515747b9c7b35773b97d0d5f77c1de210464d1b", "579aadb6e1a18a43aca88aef9e420d656cc9c3ab", "054c50bdd2932e548365f0b294dc7236cf3c6c77", "1d45968bbe9c4b2f5c1fafa0cfde999fd4fcee20", "0f73e74af8fdcc62e4d1d8c058bf8c9594f12fca", "2f09a866bc7cb521227dc5ab2c4092096077c1ac", "a6b439d8973ecc6d93ee1a2379c5d76e3525a68d", "7f013f172a45824d907f68481e92a22e0188ea0b", "c44358acb602b3f6e81c7799dd56431555590133", "88962fd7ca00c2eb19df59498d93ab3cff56d9ea", "71f0aae61b46094bf84b281907e3a2ee14e14352", "914ef585fda6da809c427c4c18a5bb450d6aa239", "32c07700e7ff4a86a91002cfbf16de0a90a44797", "53c96fead0dc9307809c57e428d60665483ada9a", "790ea9d7f910ef2def2418b44a69b111d9f8ceec", "2685cdfbb74c0aa5453dd0685a9d1a14cd7fb942" ], "paperAbstract": "Any grammar engineer can tell a good grammar from a bad one, but there is no commonly accepted taxonomy of indicators of required grammar refactorings. One of the consequences of this lack of general smell taxonomy is the scarcity of tools to assess and improve the quality of grammars. By combining two lines of research — on smell detection and on grammar transformation — we have assembled a taxonomy of smells in grammars. As a pilot case, the detectors for identified smells were implemented for grammars in a broad sense and applied to the 641 grammars of the Grammar Zoo.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136035" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b7934c4daebbae363518d1c3a4d2df6a229c3817", "sources": [ "DBLP" ], "title": "Towards a taxonomy of grammar smells", "venue": "SLE", "year": 2017 }, "b793a7f8a934b6f67949b1b46b32c930c840fdc2": { "authors": [ { "ids": [ "2628935" ], "name": "Naoto Ohsaka" }, { "ids": [ "2811963" ], "name": "Tomohiro Sonobe" }, { "ids": [ "33208854" ], "name": "Sumio Fujita" }, { "ids": [ "1743527" ], "name": "Ken-ichi Kawarabayashi" } ], "doi": "10.1145/3035918.3064045", "doiUrl": "https://doi.org/10.1145/3035918.3064045", "entities": [ "Algorithm", "Approximation theory", "Bell test experiments", "Computation", "Expectation\u2013maximization algorithm", "Graph reduction", "Line graph", "Scalability", "Simulation", "Social network", "Time complexity" ], "id": "b793a7f8a934b6f67949b1b46b32c930c840fdc2", "inCitations": [ "a31964b8281008ad2611845654462240e9688a89", "1d94c13b1911b9f0269769089faed868d4e9bbd6" ], "journalName": "", "journalPages": "635-650", "journalVolume": "", "outCitations": [ "130271241a3718323e439440d897ec26acfebf06", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "706c83309fa09454a136d4e607364b27be66172c", "03231c89ecfd4790914bba49f188ac5ce69230f1", "4cd73382dc17561cd276f276c61d5ebf39bf69ad", "3dd577a7e4a48f4cec07dbf378269931bb5235bd", "6a5ae0e083ab69153ce395874c8dddcd830dfcfd", "abb152802d5b4686a394e221abe951187ea06158", "480b1e3419d38363604016a2746f3f9b3bdc7de6", "51ea20dc4f688af41f9840a854d15bac49db1be6", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "6a3146b4f60e66a3aaa06842e747099f5c735510", "5c8a66d49a3e7a294706b412804c07467dbc906c", "b790f9ae49cdee5354eacfc7897ab9752acd5e2a", "385742fffcf113656f0d3cf6c06ef95cb8439dc6", "048a42699d9991ec18b34bdb484ef244830e1d71", "712485b1dca37377a4b94cdd814b04f73df23ecd", "b9e43395663f74c581982e9ca97a0d7057a0008c", "d538c50907e9b936150d00e1205ac9a0b3dffd9a", "737e43bd36ac3cc785915fa2930997976137ef35", "d1a7ec5b32601f15b0fcf6021e36b7193fe09e63", "4f4e1fab4ae110423e0c1236fcf5ffd35a2845fb", "50d3bd1ea64259d6de6a5a1fc9f4bc6b45d6f502", "28bf0df09f97e7ef9108e71b45fe1b9a7aa201e2", "0fedb3f3459bb591c3c2e30beb7dd0a9d3af3bcc", "34affc64b7f26520fbb96db3d87d177c831cc21b", "23d85a0008429845870780c6db3640c05165acaf", "141004dee9e799b40bfaf50b4a72618613137250", "d7f9c3253552e13f24c3b73bc055ef60388af57c", "4de0edf82aa7a5158dcca157eb9777545a989947", "a6d73877be2b91e8b6c9c0896e58942c93086ff8", "376ead26a0e0a87ea9a177fc683b0bedf161fbd9", "98a401d55dc8089eed37e016074d048e178c67c7", "1f559f2eb174d05a912b2ec39a48eadfd0160b74", "4bb0f607c1f6be38ca720ad6913577a778cc2f15", "460a9dbc3bc1434d1999362427f70e96be741b08", "21968ae000669eb4cf03718a0d97e23a6bf75926", "cf8ac5240c023c0196b36ee8af030eb000c0d6b6", "9d974bff46b6a4a5a889a30ac37a5fce2c5b634d", "15013a2155c9295103d836166bc3c944f90cbf07", "233084c0d1c818c842be6a9bb50f5dd2d1d1682f", "2d49ad12e22313a82e7f14dd41efe20ecd5daf43", "20c88e23020ea3c42e640f8ae3dc2a1f8569892d", "35a156f757466dbc686e4c75290383443b8efe90", "35fc15f9ed1b249145ccd1e928bae4a25ff0df67" ], "paperAbstract": "Fueled by the increasing popularity of online social networks, social influence analysis has attracted a great deal of research attention in the past decade. The diffusion process is often modeled using influence graphs, and there has been a line of research that involves algorithmic problems in influence graphs. However, the vast size of today's real-world networks raises a serious issue with regard to computational efficiency.\n In this paper, we propose a new algorithm for reducing influence graphs. Given an input influence graph, the proposed algorithm produces a vertex-weighted influence graph, which is compact and approximates the diffusion properties of the input graph. The central strategy of influence graph reduction is coarsening, which has the potential to greatly reduce the number of edges by merging a vertex set into a single weighted vertex. We provide two implementations; a speed-oriented implementation which runs in linear time with linear space and a scalability-oriented implementation which runs in practically linear time with sublinear space. Further, we present general frameworks using our compact graphs that accelerate existing algorithms for influence maximization and influence estimation problems, which are motivated by practical applications, such as viral marketing. Using these frameworks, we can quickly obtain solutions that have accuracy guarantees under a reasonable assumption. Experiments with real-world networks demonstrate that the proposed algorithm can scale to billion-edge graphs and reduce the graph size to up to 4%. In addition, our influence maximization framework achieves four times speed-up of a state-of-the-art D-SSA algorithm, and our influence estimation framework cuts down the computation time of a simulation-based method to 3.5%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064045" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b793a7f8a934b6f67949b1b46b32c930c840fdc2", "sources": [ "DBLP" ], "title": "Coarsening Massive Influence Networks for Scalable Diffusion Analysis", "venue": "SIGMOD Conference", "year": 2017 }, "b7bb44560f9328ff01347221528c3633b3ec2373": { "authors": [ { "ids": [ "39341300" ], "name": "Lipeng Wan" }, { "ids": [ "4003076" ], "name": "Matthew Wolf" }, { "ids": [ "1706743" ], "name": "Feiyi Wang" }, { "ids": [ "32485139" ], "name": "Jong Youl Choi" }, { "ids": [ "1781276" ], "name": "George Ostrouchov" }, { "ids": [ "1736095" ], "name": "Scott Klasky" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.1", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.1", "entities": [ "Computation", "Computational science", "End-to-end principle", "Hidden Markov model", "Markov model", "Simulation", "Spatial variability", "Supercomputer", "Three-state logic", "Titan", "Titan (supercomputer)" ], "id": "b7bb44560f9328ff01347221528c3633b3ec2373", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "1-9", "journalVolume": "", "outCitations": [ "2d1118f2c18e7629f29c8143c3f639649c59907b", "85ba17686a149618fc971c6d60007bbfaf376461", "865e20c6f2a2b09c21a73792c746270acbb64f46", "8d594caed78dd607fe4ae8979cb73ee395d173c6", "0a564c5117375287c60d3a27a96003f30396f62f", "18b3af592d37fab4f48633a599a2ee9ce78c8ee9", "9edab79d681bae0071aa784328b0ce134d909c10", "3475f3916ef2a424c0945e329fae80d38e05c0a4", "3c03e217aeaf6734b5471d5f8930436e009d60af", "691d9c3f621c1d29c2b0b32fbd66cf9ecfae0d63", "20cc5fdba0915a3958c31d7b18763e82a5418856", "6a4105c2e444bf4a164c498126bc35f45e497286", "309ea2926d0e87919b928635685e0828a7425e9e", "1afc2cc0b9ac3140693302704ee44ce1054b6325", "02e56828951df7fbc42853071af175aad103517d", "f8acd93963ed622f4c8adb005daa097ac56de9f5", "f28b85c02fac0e442a1bb3f53357a873a7866a6b", "d004de96c6c1712e77a802534c339628e626945d", "885f7657cd858f3c48707946083f2a9aa7ee7aee", "9a4fa742850b7874b1ee7ab944bf92c0a29a1cbc", "bc7181d1c6bbd503c4561931b7bda074a1184393", "b2d80dde51c121c996ad7fbe2de7a7bce87dec54", "5121837e40f54742fbd26503c7ca76e68ced467a", "4be2b9ac4bf468bb1f0bd62ac170806fef3e93bf", "1352ac51d6ab2fc36905413fb34b45df5c95b8e7" ], "paperAbstract": "With the increase of scale and complexity seen in a variety of leadership-class scientific computation and simulation applications, it has become more important to understand their I/O performance characteristics. The user-observed performance is a combination of properties of how the application is using the HPC facility, as well as how others' use of the facility causes variability in the static machine capabilities. Our work leverages statistical analysis of I/O performance data gathered with fine time resolution over a full week from Titan supercomputer. Based on observed properties of the distribution of I/O latencies, we build a three-state hidden Markov model (HMM) to characterize the end-to-end I/O performance on Titan. We parameterize our model using part of the field-gathered I/O performance data and validate it against the rest. The validation results demonstrate that our model can capture the dynamics of end-to-end I/O performance on Titan accurately.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.1" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b7bb44560f9328ff01347221528c3633b3ec2373", "sources": [ "DBLP" ], "title": "Analysis and Modeling of the End-to-End I/O Performance on OLCF's Titan Supercomputer", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "b7bf2d75bdf2087af19b1455decd2f7160e74a6f": { "authors": [ { "ids": [ "1707013" ], "name": "Boaz Patt-Shamir" }, { "ids": [ "1951548" ], "name": "Will Rosenbaum" } ], "doi": "10.1145/3087801.3087803", "doiUrl": "https://doi.org/10.1145/3087801.3087803", "entities": [ "Centralisation", "Directed acyclic graph", "Queueing theory", "Shortest path problem" ], "id": "b7bf2d75bdf2087af19b1455decd2f7160e74a6f", "inCitations": [ "8792056eb7d2c917347747a8135d80ab0ebd12f7" ], "journalName": "", "journalPages": "13-22", "journalVolume": "", "outCitations": [ "381d82788c212de6dfac034b1c55df42a3af4a44", "a9698cc1c5e556f1d5c92da539c25fa77fc53dc6", "8792056eb7d2c917347747a8135d80ab0ebd12f7", "f74426a89eb90621c80c3db9310033825d4b94b7", "1264e1ba60818e06644f734fe071ebb40d33944c", "3ea8a60ef9c17ebd9a53f0114db2bd7ea312a198", "fa2fad4993a43f8ebd503ddedb70822294588b7a", "20dcf7e79ce8007a896a5d808acec770b1fe86dd", "0967bd75632d959541ee4afef35a5ef37c805cc7", "f4e2990ff8f44da04a7379baba6db85139d04700", "138f8450e884e066f3d4e75111d2bc9e3bf9de39", "122e2e6f7b564e6a58a19619e049fa1b304d8d8b", "29677eaa721a68a84913268b7ed43fb249392bc6", "2d832fdd8e87291fffb167638a5409e39d1e5a03", "249f8dde75a41351ee718abe3ed7469343522055", "1bb5295a79dc329fd271b5f2cf67509fc9ea3f93", "0c864a767d4e6e82304a2bd03b30fdc16f69b8ad", "2a5be7761aaa184fa6796e58edb93ed3d6c5dd0b", "54ebb025c9b55df0ab1a2ceac5bbb05704d70845", "6f06b350af52a3b8a6c02aed8d7a1a142282073d", "2cc6cdd232e781281707a01f6679c0051781ef69", "661f02a2e9497cb0a9163a20b79dfab3a359ee56", "b0b8b83adc3ee07815b0fec0cf8e9c2794ed772e" ], "paperAbstract": "We consider packet forwarding in acyclic networks with bounded adversarial packet injections. We focus on the model of adversarial queuing theory, where each packet is injected into the network with a prescribed path to its destination, and both the long-range average rate and the short-range burst size are bounded. Each edge has an associated buffer that stores packets while they wait to cross the edge. Our goal is to minimize the buffer space required to avoid overflows. Previous results for local forwarding protocols required buffers of size \u03a9(n). In the case of single destination trees, it is known that for centralized protocols, buffers of sizeO (1) are sufficient.We show that for local protocols, buffers of size \u0398(logn) are necessary and sufficient for single destination trees. The upper bound is achieved by a novel protocol which we call Odd-Even Downhill forwarding (OED). We also show that even slightly more general networks\u2014 such as path graphs with multiple destinations, or DAGs with a single destination\u2014require buffers of size \u03a9(n) to avoid overflows, even if forwarding is done by centralized, offline protocols.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087803" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b7bf2d75bdf2087af19b1455decd2f7160e74a6f", "sources": [ "DBLP" ], "title": "The Space Requirement of Local Forwarding on Acyclic Networks", "venue": "PODC", "year": 2017 }, "b80efd014171185080e84b72cbdc038066c396c9": { "authors": [ { "ids": [ "3240730" ], "name": "Alireza Haghdoost" }, { "ids": [ "23473919" ], "name": "Weiping He" }, { "ids": [ "9765678" ], "name": "Jerry Fredin" }, { "ids": [ "1717128" ], "name": "David Hung-Chang Du" } ], "doi": "", "doiUrl": "", "entities": [ "Action Replay", "Heuristic", "Operating system", "Performance Evaluation", "Replay value", "Scalability" ], "id": "b80efd014171185080e84b72cbdc038066c396c9", "inCitations": [ "8bb173e48e7d3a6a083eef6b9aa3bc7f715ea550", "7e126097bd676e496e7e4675e9d46fcef9220253" ], "journalName": "", "journalPages": "315-328", "journalVolume": "", "outCitations": [ "5ecd441bf4a54c7500cd6fc185d8dd09638e12cb", "11e44206984ce4186fd4b6181a5d902056e50e64", "02acd9c204a2f21087b1ea268aca3a90eb885c74", "972f0a8fcebe4fe91f6dce31577a45f72654b494", "a054aa76bdea18dbd20525ccb876de029dc2cc8a" ], "paperAbstract": "We introduce a replay tool that can be used to replay captured I/O workloads for performance evaluation of highperformance storage systems. We study several sources in the stock operating system that introduce the uncertainty of replaying a workload. Based on the remedies of these findings, we design and develop a new replay tool called hfplayer that can more accurately replay intensive block I/O workloads in a similar unscaled environment. However, to replay a given workload trace in a scaled environment, the dependency between I/O requests becomes crucial. Therefore, we propose a heuristic way of speculating I/O dependencies in a block I/O trace. Using the generated dependency graph, hfplayer is capable of replaying the I/O workload in a scaled environment. We evaluate hfplayer with a wide range of workloads using several accuracy metrics and find that it produces better accuracy when compared with two exiting available replay tools.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/fast17_slides_haghdoost_.pdf", "http://www.usenix.org./sites/default/files/conference/protected-files/fast17_slides_haghdoost_.pdf", "https://www.usenix.org/conference/fast17/technical-sessions/presentation/haghdoost", "https://www.usenix.org/system/files/conference/fast17/fast17-haghdoost.pdf", "http://www.usenix.org./system/files/conference/fast17/fast17-haghdoost.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/eb7c/5e816eb53a4dd310c50039773d9950de4792.pdf", "s2Url": "https://semanticscholar.org/paper/b80efd014171185080e84b72cbdc038066c396c9", "sources": [ "DBLP" ], "title": "On the Accuracy and Scalability of Intensive I/O Workload Replay", "venue": "FAST", "year": 2017 }, "b827eed4941374f53b249e450865ecf079e4c637": { "authors": [ { "ids": [ "1707465" ], "name": "Susan B. Davidson" }, { "ids": [ "1682639" ], "name": "Daniel Deutch" }, { "ids": [ "1702212" ], "name": "Tova Milo" }, { "ids": [ "1688094" ], "name": "Gianmaria Silvello" } ], "doi": "", "doiUrl": "", "entities": [ "Book", "Database", "Materialized view", "Relational database", "Result set", "Rewriting" ], "id": "b827eed4941374f53b249e450865ecf079e4c637", "inCitations": [ "bc70cb93a457c0b62d3ed11faef167a3b2738943", "898e5b4ad9cbb86e5165bccc4f52deb520efc73f" ], "journalName": "", "journalPages": "17", "journalVolume": "", "outCitations": [ "239ffe260e5a13c43d7131200a891194e94ff767", "1948575f4cedf689f708d1f0880e79de9ec4c4a5", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "1fe41b1240a0eddec736b675e914b4858a955876", "89bb85feef6886292d9f800c4ae069bcae140ea0", "35cedfbd97da5f0e80f78067a694be84594c066d", "3e30e934e9a5c2b47931fb4277741664406f25fe" ], "paperAbstract": "An increasing amount of information is being collected in structured, evolving, curated databases, driving the question of how information extracted from such datasets via queries should be cited. Unlike traditional research products, such books and journals, which have a fixed granularity, data citation is a challenge because the granularity varies. Different portions of the database, with varying granularity, may have different citations. Furthermore, there are an infinite number of queries over a database, each accessing and generating different subsets of the database, so we cannot hope to explicitly attach a citation to every possible result set and/or query. We present the novel problem of automatically generating citations for general queries over a relational database, and explore a solution based on a set of citation views, each of which attaches a citation to a view of the database. Citation views are then used to automatically construct citations for general queries. Our approach draws inspiration from results in two areas, query rewriting using views and database provenance and combines them in a robust model. We then discuss open issues in developing a practical solution to this challenging problem.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p19-davidson-cidr17.pdf", "http://ceur-ws.org/Vol-2037/paper_4.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/b827/eed4941374f53b249e450865ecf079e4c637.pdf", "s2Url": "https://semanticscholar.org/paper/b827eed4941374f53b249e450865ecf079e4c637", "sources": [ "DBLP" ], "title": "A Model for Fine-Grained Data Citation", "venue": "CIDR", "year": 2017 }, "b889b1d6944213bc2ca29e3ad07ee65ede20892d": { "authors": [ { "ids": [ "2741023" ], "name": "Miyuru Dayarathna" }, { "ids": [ "35709316" ], "name": "Sathya Bandara" }, { "ids": [ "35433878" ], "name": "Nandula Jayamaha" }, { "ids": [ "9159004" ], "name": "Mahen Herath" }, { "ids": [ "35367497" ], "name": "Achala Madhushan" }, { "ids": [ "1971912" ], "name": "Sanath Jayasena" }, { "ids": [ "2231831" ], "name": "Toyotaro Suzumura" } ], "doi": "10.1109/HiPC.2017.00036", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00036", "entities": [ "Algorithm", "Auxiliary memory", "Computer data storage", "Data mining", "Database engine", "Database server", "Experiment", "Graph (abstract data type)", "Graph database", "Greedy algorithm", "Neo4j", "Programming language", "Scalability", "Server (computing)", "Stream (computing)", "Streaming media", "Structure mining", "Upload", "X Window System", "X10" ], "id": "b889b1d6944213bc2ca29e3ad07ee65ede20892d", "inCitations": [ "e2b0e4811aea30fc1c791956a7b480b7f14a06c2" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "243-252", "journalVolume": "", "outCitations": [ "586414efa54ba9f4a7def0dc5322b7723f22c552", "3231d62bec8e8cc1d837e85893889855767c3b13", "3746511ef9ba685f34ceec9a3e94795be5836953", "e2b0e4811aea30fc1c791956a7b480b7f14a06c2", "71e317a53399440cb0c7e6589b41d0565f9ac0b7", "1753c2dc85cc40e0a2e8b4a405c1690eab066d8d", "4b808e7b2176d95596603937abf11bc3cfd3f6b5", "4aad76cf012f306e4304c605bc24ccbcadbbf7ee", "bdc97d4982782a2b636b367708f0ce9a17a5f28b", "04311b15b444a0f75ea2bb74fca26cc1aefbf3c1", "60f3dbc4f9a585e0d70a8359f9d5b6cfd0849177", "862cb6bf1f03e495ed5c6c9be16392712f7eb7cd", "41d7a4cb6c804945a7c6a0976a3dd85b9fe37677", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "45c81db51c128e6255288a2942cf075e7dc70c21", "95f658b8f7a47935bbe1007eed678324a1c3f7ba", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "05370a6cc820ffe5393fcc948d7d600b5949a217", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "3794abdc47d51b72cb7b7779085cd7e672966dc2" ], "paperAbstract": "Streaming graph data mining has become a significant issue in high performance graph mining due to the increasing appearance of graph data sets as streams. In this paper we propose Acacia-Stream which is a scalable distributed streaming graph database engine developed with X10 programming language. Graph streams are partitioned using a streaming graph partitioner algorithm in Acacia-Stream and streaming graph processing queries are run on the graph streams. The partitioned data sets are persisted on secondary storage across X10 places. We investigate on the use of three different streaming graph partitioner algorithms called hash, Linear Deterministic Greedy, and Fennel algorithms and report their performance. Furthermore, to demonstrate Acacia-Stream's streaming graph processing capabilities we implement streaming triangle counting with Acacia-Stream. We present performance results gathered from Acacia-Stream with different large scale streaming data sets in both horizontal and vertical scalability experiments. Furthermore, we compare streaming graph loading performance of Acacia-Stream with Neo4j and Oracle's PGX graph database servers. From these experiments we observed that Acacia-Stream's Fennel partitioner based graph uploader can upload a 948MB rmat22 graph in 1283.42 seconds which is 38% faster than PGX graph database server and 12.8 times faster than Neo4j database server. Acacia-Stream's Streaming Partitioner's batch size adjustments based optimizations reduced the time used by the network communications almost by half.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00036" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b889b1d6944213bc2ca29e3ad07ee65ede20892d", "sources": [ "DBLP" ], "title": "An X10-Based Distributed Streaming Graph Database Engine", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "b947b86f4a2e06e3a8dee5cbbe218fd605d0d9aa": { "authors": [ { "ids": [ "39005989" ], "name": "Lin Jiang" }, { "ids": [ "35356126" ], "name": "Zhijia Zhao" } ], "doi": "10.1145/3018743.3018772", "doiUrl": "https://doi.org/10.1145/3018743.3018772", "entities": [ "Computation", "Context switch", "Correctness (computer science)", "Data model", "Data structure", "Enumerated type", "Network switch", "Parallel computing", "Scalability", "Semi-structured data", "Speculative execution", "Speedup", "Static program analysis", "Web analytics", "XPath" ], "id": "b947b86f4a2e06e3a8dee5cbbe218fd605d0d9aa", "inCitations": [], "journalName": "", "journalPages": "371-383", "journalVolume": "", "outCitations": [ "43715cdc52b75ffaaff701852deafc4736a89081", "3710d16919bf3a0bc7c3c5647d377ab449964ff9", "286ae280436ccbd5586674c8509e73a4d3e2ae4d", "80de640b8afa03e4b2b0dbf1fab38063070c81c4", "7f00907026e8ed51f46e3226134c111baf796a85", "44e2f41f3e1db1e4f2bf9a6e9cd231f8fc397663", "744bd83dcfa9891a8a160a200014fcb5e6c69f82", "5f774c7a086a64491a3f54d3f105247109c2d5aa", "21aab44117fffd20c12b59809976e49abeae7585", "181b467cc650bcb22f291fe87a3fd7dc4e2053c8", "0148cfa4e12e6e8d69116eb82f49308ab1b2a32f", "bea1c8b954a9338c9069f3881a6c36b7434c7401", "26113546bcc7dd7af185c33d788ccd9647ac3728", "04295f8de661f9a929035e8c9cf64683bcb6fcb6", "13ac34ea9e6aef2acf44df66860d8091e349102d", "214615f9c8d45da7c622e27a9ccac03c76035e4e", "3b6711bd158a375267999ac095b8c1a76d9dc464", "6e9f2a48fbf0587ff52cd98d57d1d61defcbae83", "445ce8215bfa2e39df197fadadba2fda38457ee6", "db8a235c21a65c6633ecc65a218341444be0646f", "350556becaee76dc023c335fd9badd4da2629e01", "0728b8937c519d9ab2bd50111c242d3fb04bf60c", "10694e092fb4230f429dda84c6a2888c8665b0cb", "5ab0679924eb4dbd380ad3aa74681f786e352fa9", "2bd34e597d1d173e04e0557f6cef125b264bb58f", "27b1d02ab9edf212682fdfc7f8478aab471e6183", "2e8aab059c2096ff9f87002b48141ed0cf1e9838", "3f72e8fa64daed78c88c28b0d2cc51e0921c7a67", "4c8d5c4f1b9451a546492ecbb0bd75aae74cb840", "80527e7595530951081494d1b98f3f13da3033a2", "552832bc61bffa7a07f55070a295f14d280244d3", "363914217982d11cfff73dca2b46791938e4f707", "33e3f48f14ef0a5fdb0d892ebc826c98d1f3045c", "06ff7d98e56a3cc1860693647eb575bac2bd1138", "01ff121210be73903b4aac954b661ba33dd0a09d", "a05958c88b2df9ff6763b370777a503bdfe47dc4" ], "paperAbstract": "Semi-structured data emerge in many domains, especially in web analytics and business intelligence. However, querying such data is inherently sequential due to the nested structure of input data. Existing solutions pessimistically enumerate all execution paths to circumvent dependencies, yielding sub-optimal performance and limited scalability.\n This paper presents GAP, a parallelization scheme that, for the first time, leverages the grammar of the input data to boost the parallelization efficiency. GAP leverages static analysis to infer feasible execution paths for specific con- texts based on the grammar of the semi-structured data. It can eliminate unnecessary paths without compromising the correctness. In the absence of a pre-defined grammar, GAP switches into a speculative execution mode and takes potentially incomplete grammar extracted either from prior inputs. Together, the dual-mode GAP reduces the execution paths from all paths to a minimum, therefore maximizing the parallelization efficiency and scalability. The benefits of path elimination go beyond reducing extra computation -- it also enables the use of more efficient data structures, which further improves the efficiency. An evaluation on a large set of standard benchmarks with diverse queries shows that GAP yields significant efficiency increase and boosts the speedup of the state-of-the-art from 2.9X to 17.6X on a 20-core ma- chine for a set of 200 queries.", "pdfUrls": [ "http://www.cs.ucr.edu/~zhijia/papers/ppopp17.pdf", "http://dl.acm.org/citation.cfm?id=3018772" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b947b86f4a2e06e3a8dee5cbbe218fd605d0d9aa", "sources": [ "DBLP" ], "title": "Grammar-aware Parallelization for Scalable XPath Querying", "venue": "PPOPP", "year": 2017 }, "b9696bb5f23f7a449afce7b53c538237a9a87ff6": { "authors": [ { "ids": [ "40601731" ], "name": "Tianqi Zhao" }, { "ids": [ "1726357" ], "name": "Wei Zhang" }, { "ids": [ "21047414" ], "name": "Haiyan Zhao" }, { "ids": [ "1700880" ], "name": "Zhi Jin" } ], "doi": "10.1109/ICAC.2017.47", "doiUrl": "https://doi.org/10.1109/ICAC.2017.47", "entities": [ "Adaptive system", "Case-based reasoning", "E-commerce", "Logic programming", "Real-time data", "Reinforcement learning", "Run time (program lifecycle phase)", "Stationary process", "Web application" ], "id": "b9696bb5f23f7a449afce7b53c538237a9a87ff6", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "103-112", "journalVolume": "", "outCitations": [ "19feac00388f197872fc9f1f4f0cddf500bcd296", "be461255af4c8c31e88461e685ca4f1bef10ea91", "08d9a641c4bf70074dcbacd2d283bd9122b17d80", "3c111e31a8a971982520adf51945a402d13ce4da", "15dc663b6761d53e90415427d5a24cce1e0e38da", "12b412c14084d2317f8e11d709ef6dfcd44f003c", "198d1ebf7717d46be8477d9b1ee5f8e93814341b", "ad4adc973900878c1b52466794f4a887f996762c", "12d1d070a53d4084d88a77b8b143bad51c40c38f", "9e90eaf2739cc727bfe50d6372250e5d76bf64de", "41b380539d15a733e78c2b29388ffa8bef4bb370", "342b66e1f2a7d8b94c7940dd1e053a67cdb32909", "05b4396a639eb4a13131e407b6678528f6578296", "69b48d0063bfd462a7d02172510b64794a479643", "00b1f05ebf0f82b500c21d0be6d2a25244938229", "34ced4a55cad83c7321c7643e9c0df79de0af144", "a17d32dca875566c7bedadb41b885524c9ae142e", "88dfee10842bbfd2ebc74980ab64c1cac5753883", "248bdda09f6c83460fd8aa2f12d5e82cc000dba4", "b100263f12f494adf235466df48cf3f635e3e05b", "61eff4ad67da3606c8f63c3d398f06aa3e01d9f3", "c6f2bc51c9d259ee72c64c66bcc294b2b43ddab0", "a5343e665cab197cf483a833c52a9ec74c470505", "ef0bcf69c2900bde36b7aafffc4e579d532b06d7", "e598bcab38a993ac37660fdf5881860b121a4a9a", "870139a40186ef5c227d772a4038b18b14af86d6", "087d207c5c0d4d349897b01c070538a0b66809e4", "2d3678323f832bad5921477b695df362ebf4e5cc", "639d1771204cdcb0a6f396472d262a90cc661d1f", "1865136615175cc0437f63c8f4a2a9db488d2430", "47a0fe9d66f81859a6d28e2e277e332efa9828f3" ], "paperAbstract": "One of the challenges in self-adaptive systems concerns how to make adaptation to themselves at runtime in response to possible and even unexpected changes from the environment and/or user goals. A feasible solution to this challenge is rule-based adaptation, in which, adaptation decisions are made according to predefined rules that specify what particular actions should be performed to react to different changing events from the environment. Although it has the characteristic of highly- efficient decision making for adaptation, rule-based adaptation has two limitations: 1. no guarantee that those predefined rules will lead to optimal or nearly-optimal adaptation results; 2. weak support to evolve these rules to cope with non-stationary environment and changeable user goals at runtime. In this paper, we propose a reinforcement learning-based framework to the generation and evolution of software adaptation rules. This framework manifests two key capabilities for self-adaptation: 1. the capability of automatically learning adaptation rules from different goal settings at the offline phase; 2. the capability of automatically evolving adaptation rules from real-time information about the environment and user goals at the online phase. The two capabilities are built on the combination of reinforcement learning and case-based reasoning techniques. This framework improves the existing rule-based adaptation from two points: the flexibility of adaptation logic, and the quality of adaptation rules. We evaluate this framework through a case study of an E-commerce web application, which shows that this framework improves both the efficiency and effectiveness of self-adaptation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.47" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b9696bb5f23f7a449afce7b53c538237a9a87ff6", "sources": [ "DBLP" ], "title": "A Reinforcement Learning-Based Framework for the Generation and Evolution of Adaptation Rules", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "b9b3423e30356bf5335ae91c500a4f333f47a2fc": { "authors": [ { "ids": [ "39279009" ], "name": "Jens Kehne" }, { "ids": [ "2346919" ], "name": "Stanislav Spassov" }, { "ids": [ "2442755" ], "name": "Marius Hillenbrand" }, { "ids": [ "3064906" ], "name": "Marc Rittinghaus" }, { "ids": [ "3258127" ], "name": "Frank Bellosa" } ], "doi": "10.1145/3078468.3078473", "doiUrl": "https://doi.org/10.1145/3078468.3078473", "entities": [ "ARM architecture", "Cloud computing", "Computation", "Context switch", "Graphics processing unit", "Multi-user", "Multiplexing", "Network switch", "Overhead (computing)", "Round-robin scheduling", "Scheduling (computing)", "Time complexity" ], "id": "b9b3423e30356bf5335ae91c500a4f333f47a2fc", "inCitations": [], "journalName": "", "journalPages": "6:1-6:11", "journalVolume": "", "outCitations": [ "2361474f4a3b524a2761a4eee6046f956ed3c430", "3be74a71c59c0e5e925aa84090fc1b1988ea6095", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "770ec20523ff6ea96d894b7ff7f618590924a3d5", "cfeb833da2d3ca20adfc05a762b3f68cffa13416", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "a7a24f882aec173c01a9ed1eb52589f71d6c80f8", "00c581e956843c6e93009ea9146d69201a928888", "dba6ad28c88a6477c9b0ee3d624bf76986f0a51b", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "9c6b7e5f4c3233e282facd97fb7e812cc2816126", "4ee94360be7639024a0be01a5d05c1bdc3e6cd46", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "69c801f327cca60723b870caab92114da164ac99", "6bdacf836b47e40f1e8d5d8e9e1c8224d74a1cef", "00f355ce566bb51dc70925217c62e437cc7e14e2", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "6e38285609f08477b455e1c5545256e6d29c932a", "2992b8985e094c3943e29dffc550862791fae147", "43f0c099d44a68783a773f91cd03098a5252bf98" ], "paperAbstract": "Over the last few years, GPUs have become common in computing. However, current GPUs are not designed for a shared environment like a cloud, creating a number of challenges whenever a GPU must be multiplexed between multiple users. In particular, the round-robin scheduling used by today's GPUs does not distribute the available GPU computation time fairly among applications. Most of the previous work addressing this problem resorted to scheduling all GPU computation in software, which induces high overhead. While there is a GPU scheduler called NEON which reduces the scheduling overhead compared to previous work, NEON's accounting mechanism frequently disables GPU access for all but one application, resulting in considerable overhead if that application does not saturate the GPU by itself.\n In this paper, we present LoGA, a novel accounting mechanism for GPU computation time. LoGA monitors the GPU's state to detect GPU-internal context switches, and infers the amount of GPU computation time consumed by each process from the time between these context switches. This method allows LoGA to measure GPU computation time consumed by applications while keeping all applications running concurrently. As a result, LoGA achieves a lower accounting overhead than previous work, especially for applications that do not saturate the GPU by themselves. We have developed a prototype which combines LoGA with the pre-existing NEON scheduler. Experiments with that prototype have shown that LoGA induces no accounting overhead while still delivering accurate measurements of applications' consumed GPU computation time.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078473" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b9b3423e30356bf5335ae91c500a4f333f47a2fc", "sources": [ "DBLP" ], "title": "LoGA: low-overhead GPU accounting using events", "venue": "SYSTOR", "year": 2017 }, "b9cd558c4a8215a99c4fa8091140f3d412430e2c": { "authors": [ { "ids": [ "1870110" ], "name": "Arun Subramaniyan" }, { "ids": [ "3239775" ], "name": "Jingcheng Wang" }, { "ids": [ "27058303" ], "name": "Ezhil R. M. Balasubramanian" }, { "ids": [ "1687117" ], "name": "David Blaauw" }, { "ids": [ "1692316" ], "name": "Dennis Sylvester" }, { "ids": [ "40040123" ], "name": "Reetuparna Das" } ], "doi": "10.1145/3123939.3123986", "doiUrl": "https://doi.org/10.1145/3123939.3123986", "entities": [ "Alphabet (formal languages)", "Amplifier", "Automata theory", "Automaton", "Benchmark (computing)", "CPU cache", "Central processing unit", "Compiler", "Dynamic random-access memory", "Finite-state machine", "Locality of reference", "Memory bandwidth", "Nondeterministic finite automaton", "Parsing", "Pattern matching", "Pipeline (computing)", "Principle of locality", "Sense amplifier", "Speedup", "X86", "XML" ], "id": "b9cd558c4a8215a99c4fa8091140f3d412430e2c", "inCitations": [], "journalName": "", "journalPages": "259-272", "journalVolume": "", "outCitations": [ "6408891eea6af794dadd16f35485bc1c73473adc", "9fac7fd52435a4f4658308d75d833d337bc90aa6", "38adc6ce214ad89ad6a0c47b489608a0fbeedaaf", "8c9a529502244ebbd7d734053acc1c5b4719ca57", "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "0f35cad96cebf9590c168caf4baa2103af38934d", "7666859ba1c7c9a0bae972cc6e7f04e2a4f728aa", "93c25da1b96dba6a83defeb05ebd5bd3c66feb87", "64b6ef88dcb71ca50e2b3b63330da9726f982503", "9f634fd29de624626a78c7d4657a28cb5677a5b8", "1697fa905771172ab29b0d83ed6f44961444e03e", "0381580ffcedc732c02711f7800f7f325389b46f", "5e2a75065a3e80a04d4aa9cc0987d1d51232961b", "3547ac839d02f6efe3f6f76a8289738a22528442", "e2a9e2a84f4ad4d3f8e2ced96c5699e28b0c3e28", "3f8a4239c14e7210d356e23503f6418e687b1b86", "27b1d02ab9edf212682fdfc7f8478aab471e6183", "20cee308639acf53d090ff7c7d639eb64fdca8ad", "942394566ccb9dbf40243dc2bd3c4d7605bbefa2", "44da8226d8aedc3e57432c33fe1a2795eb6442e5", "41338a3f79028c0a24f7786d6e9a01ce8d2e90a4", "3710d16919bf3a0bc7c3c5647d377ab449964ff9", "f311412463c33223947df56ae04644e8c68cdd5d", "3a6fc87533e77528cc6d48aa7b0bdd74412c5095", "8d0bb67313c489aa90116c0c7df367a6ce46616d", "80527e7595530951081494d1b98f3f13da3033a2", "984273d01be0d66506ccf7d6bd5d260dfe8d9f30", "02762188684ff2208ada7fa10d6e60bd04f44a12", "07f62af22fab75b1b8dcc7a5ef45923322e50b57", "0595aa314ec597f1935f2a600461ae2581d2f8c8", "3b6711bd158a375267999ac095b8c1a76d9dc464", "7d96382d517690ab7fb2dfefbc4ad9167f4f03d1", "90a72bdeac51650e2711ffd785a152c89c97dc4b", "947ab3927c41622a8bdafe3327158cd16eba21fd", "1bad7f22b1e75ce4535471a9722fbae6c35c46a0", "25977aeebe5714a9e727218a0c71d05144cb8eba", "0c621bbd45b8a5e0cdac61ac960bc7cc2bdaa0c5", "4510a798d1f3001a6ba82ba27d7809ff6c5b9b6e", "296754083c4368a10a818c2632e3614cc317ca34", "330b46ce848047b13fadc7a63c01abfe02fd4d8b", "289431393a4d2db657bb2d5109f60602b26013a2", "2272e0efd155fb0862f251e168fd47e1ec9363ad", "6f48f470bf45fa4401ee0accef1931d6e492b730" ], "paperAbstract": "Finite State Automata are widely used to accelerate pattern matching in many emerging application domains like DNA sequencing and XML parsing. Conventional CPUs and compute-centric accelerators are bottlenecked by memory bandwidth and irregular memory access patterns in automata processing.\n We present Cache Automaton, which repurposes last-level cache for automata processing, and a compiler that automates the process of mapping large real world Non-Deterministic Finite Automata (NFAs) to the proposed architecture. Cache Automaton extends a conventional last-level cache architecture with components to accelerate two phases in NFA processing: state-match and state-transition. State-matching is made efficient using a sense-amplifier cycling technique that exploits spatial locality in symbol matches. State-transition is made efficient using a new compact switch architecture. By overlapping these two phases for adjacent symbols we realize an efficient pipelined design.\n We evaluate two designs, one optimized for performance and the other optimized for space, across a set of 20 diverse benchmarks. The performance optimized design provides a speedup of 15× over DRAM-based Micron's Automata Processor and 3840× speedup over processing in a conventional x86 CPU. The proposed design utilizes on an average 1.2MB of cache space across benchmarks, while consuming 2.3nJ of energy per input symbol. Our space optimized design can reduce the cache utilization to 0.72MB, while still providing a speedup of 9× over AP.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123986" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/b9cd558c4a8215a99c4fa8091140f3d412430e2c", "sources": [ "DBLP" ], "title": "Cache automaton", "venue": "MICRO", "year": 2017 }, "ba27b89b7f1aaf5ec2327644e110276d7be83ed1": { "authors": [ { "ids": [ "2359472" ], "name": "Bao Nguyen" }, { "ids": [ "36812091" ], "name": "Hua Tan" }, { "ids": [ "2869098" ], "name": "Xuechen Zhang" } ], "doi": "10.1145/3126908.3126944", "doiUrl": "https://doi.org/10.1145/3126908.3126944", "entities": [ "Adaptive mesh refinement", "Algorithm", "Byte", "Byte addressing", "Central processing unit", "Computer memory", "Data structure", "Failure rate", "Gerris", "In-memory database", "Non-volatile memory", "Octree", "Out-of-core algorithm", "Persistence (computer science)", "Simulation", "Supercomputer" ], "id": "ba27b89b7f1aaf5ec2327644e110276d7be83ed1", "inCitations": [], "journalName": "", "journalPages": "27:1-27:12", "journalVolume": "", "outCitations": [ "24724ad8962a9e04eb496fddaefe9708f6960601", "f6715c2d9d8a76a20f4b857f7377ce63a23f0654", "26ba0637c23b0ff904b6b6c7237b58ad5470300c", "10966f549a671aa1d04ba134557c11ad2d0d0269", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "65b7c62555d2c3f5763651bff19ec6bf040ef5db", "9183cde02e4306828089fb8adae74736a9df3ceb", "05ff7d78d2de5f2314a826ce470fa40704e797ab", "0f55217987ec25afa0f815e0aa3957e669b0280e", "5a49c1c694028fd6ca7acc6af601c0f54efa0700", "2201c6e88e990150fda976fa85d0a55521b792dc", "7e24ffb444ea7d606c720621c758c8916e66a04e", "94783d113951822195d4ba44599a8fcbdef9d4bf", "129f11028220d87525b37b4605a2c04eb26f3e73", "154f36b741e8252c7eb8b71f8ceb9b909ab33a12", "d26e3b4771a43822cfed79d44e3da7003e3e94db", "14dc05a51866b6832990fc7fe8c8f6b85730bb84", "05a1357946de5eca42a477b7b268db4944219a2e", "1c743b19515158ceb96422e1d8e94a0275eaa04d", "6434aa10f3745dcf959cfca9c379aae120396724", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "5e84fce1b0f18ff21b5a35ae1c3a223fd0720a75", "1ecbbab4f277b13a1ec41f80e487cdd4c81f8ebb", "445c01f4ecdf8559d150bb1aa656e800306e04a3", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "da8f5c3e65e2eb398dc5a4866023ef51e4056905", "556dba2cb571badf6d6c8cc33d62ab997b8f5e18", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "0645f60331e8dd88a1d0183e2bfb3b9da21c07f6", "07019437d9cf95d633d68b9820125590cf2551b6", "39183aaad56e3259dfb9c28b070cf2d4fd2035a6", "0d08856c7806d4693b091e358bae094e5ec6e483", "341310508d285539ebd5a9a2e2a5f0556ac3d875", "627791ec29c6c6b1eb57941507d5c9fc999a2bb0", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "57c823b3b07b98233394bf15cfbbaed6a84809df", "79213b84bafa4376a3c9379fcfa28dcbbbbfcabc", "81778c0996c46c77a66597e782ec0eb558f054f2", "31fcd061632d15567dbcbf4f9c5f7b781141a88a", "336572dcb5299083a98aea27fcf2f04470f0750d", "acffdaf62539826a2c38b0b47848656593b68168", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "4a454a7a361c6ae1bda7594444cc45ac4a8e91ad", "3609a17555a6c6757f8ff0297fc046e6dc623a57", "c56e3392d53a5e19f5ea8468ed64830bf0542bff" ], "paperAbstract": "Octree-based mesh adaptation has enabled simulations of complex physical phenomena. Existing meshing algorithms were proposed with the assumption that computer memory is volatile. Consequently, for failure recovery, the in-core algorithms need to save memory states as snapshots with slow file I/Os. The out-of-core algorithms store octants on disks for persistence. However, neither of them was designed to leverage unique characteristics of non-volatile byte-addressable memory (NVBM). In this paper, we propose a novel data structure Persistent Merged octree (PM-octree) for both meshing and in-memory storage of persistent octrees using NVBM. It is a multi-version data structure and can recover from failures using its earlier persistent version stored in NVBM. In addition, we design a feature-directed sampling approach to help dynamically transform the PM-octree layout for reducing NVBM-induced memory write latency. PM-octree has been successfully integrated with Gerris software for simulation of fluid dynamics. Our experimental results with real-world scientific workloads show that PM-octree scales up to 1.1 billion mesh elements with 1000 processors on the Titan supercomputer.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126944" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba27b89b7f1aaf5ec2327644e110276d7be83ed1", "sources": [ "DBLP" ], "title": "Large-scale adaptive mesh simulations through non-volatile byte-addressable memory", "venue": "SC", "year": 2017 }, "ba4731f339a2f1518af38d1bbacfd70d7d9d81aa": { "authors": [ { "ids": [ "1766680" ], "name": "Yufei Ding" }, { "ids": [ "37914192" ], "name": "Xipeng Shen" } ], "doi": "10.1145/3133898", "doiUrl": "https://doi.org/10.1145/3133898", "entities": [ "Algorithm", "Computation", "Multiprotocol Label Switching", "Program optimization" ], "id": "ba4731f339a2f1518af38d1bbacfd70d7d9d81aa", "inCitations": [ "585cec9677e5cdb04e882cb47cc491c54ecbeb80" ], "journalName": "PACMPL", "journalPages": "74:1-74:28", "journalVolume": "1", "outCitations": [ "51f16256472a334ffb4a579de5eabd371291ca59", "7dadbf42abc1ecbb963ffba2dafce95e92e04a05", "3e2480d7136fe5c6fa7213ea834566b93570c3ca", "18337728b1cb095b514fcea9d006f27ef942182b", "c0251d764976b9676c24fc33459b2c1842cd3417", "05b54e71af10df8cdf84dd5046df5f647e4609aa", "23a4855de3d3f34efa4c9f243b417d0373f5b13e", "9b3ce3fd1da5a81887b241aa46753bbbdedd11e9", "a381a60a0fb76182bc345174a1bfeaf7f9d38aaa", "9b8d8f2fb88e03f8f3ad01efbfef52718b70d104", "c49b212dbe9a58e36ce21c0fe13c8d65ad7a2fdb" ], "paperAbstract": "This paper presents GLORE, a novel approach to enabling the detection and removal of large-scoped redundant computations in nested loops. GLORE works on LER-notation, a new representation of computations in both regular and irregular loops. Together with a set of novel algorithms, it makes GLORE able to systematically consider computation reordering at both the expression level and the loop level in a unified manner. GLORE shows an applicability much broader than prior methods have, and frequently lowers the computational complexities of some nested loops that are elusive to prior optimization techniques, producing significantly larger speedups.", "pdfUrls": [ "https://cs.ucsb.edu/~yufeiding/publication/GLORE.pdf", "https://research.csc.ncsu.edu/nc-caps/yding/publication/GLORE.pdf", "http://doi.acm.org/10.1145/3133898" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba4731f339a2f1518af38d1bbacfd70d7d9d81aa", "sources": [ "DBLP" ], "title": "GLORE: generalized loop redundancy elimination upon LER-notation", "venue": "PACMPL", "year": 2017 }, "ba4fb708be68ef023ce8024ad92a481d756e1f58": { "authors": [ { "ids": [ "1762800" ], "name": "Pejman Lotfi-Kamran" }, { "ids": [ "3899445" ], "name": "Mehdi Modarressi" }, { "ids": [ "1712754" ], "name": "Hamid Sarbazi-Azad" } ], "doi": "10.1109/HPCA.2017.16", "doiUrl": "https://doi.org/10.1109/HPCA.2017.16", "entities": [ "CPU cache", "Central processing unit", "ChIP-on-chip", "Enterprise resource planning", "Manycore processor", "Network on a chip", "Parallel computing", "Physical Review A", "Quality of service", "Requirement", "Router (computing)", "Server (computing)" ], "id": "ba4fb708be68ef023ce8024ad92a481d756e1f58", "inCitations": [ "a078293864ade1c51348d2bd05e1d293d0c1d20d", "59513da4464c04ab3605940e2f1bbf191032841b", "7a961b5f6e20773ee0911b580a76fe6da8d69e5b" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "277-288", "journalVolume": "", "outCitations": [ "2cba84a71e7a7949ccdc238fd3ef6b039066d793", "0d282d1f91e423dc3347a9ee338e929f9427f3ba", "03771342b2ec13944bc7f34c65af9a2d843bc428", "14bb08611e09f2e85b2987df33d9a520227ddd89", "621f06195844f960c7afca4aa04fc39dc12ba559", "345e97f26bb24246b81061a784f90815607ee357", "155c819dde1910b2b912ade74096881f552a3158", "0df4e0e7d49cb49c4c1a2204a715bf0784e49981", "54ee30efe09b9e817ce9ec83d46553e4bded58ed", "33d8743ad609524c5f7949ac44a4dcaffe228dde", "dca113c06fed05d2a10ff7ce1afece6b07d30f70", "07df6b054f3734acdcd0f45e028ec376a18126c9", "6acd75781396e5dedcf2f06a7131ba7f3153bfb5", "0d60537e54e10cd0fdf678532b4a41c86b0a485c", "1c9c952ddcdb3677cc9e391b6d899c4954c94f25", "3ab1c2d1cc4763c3290a68fe01eb176b43ec8199", "66e0aa17f60779815d5eb35e68d545ae2dc351c3", "6f73820a6ef96c21ac6ae6f82d42b5b187b34138", "368a19c178e01641ca3c294e3bb87d5282458510", "a0b2f960c354788422eda4b0f7ed7143ee0cea54", "f29dac2e26273532c81c933f091c7a60b9480f94", "0f5277a91fd980b55178107ae76f3658c9bc4379", "014ba063a3721973ba6af6503232d4d21d1456bb", "134f49d5079b5f2f5421950e5850a37edb4cfd3a", "1b216be638fbb0a9099bbc1aabe4319676d5f573", "be52447fdd58877e7dc94e70d2658b39184c63e8", "f3325ace129dec914966f9894d9f412e5e04bdc2", "6813f13990e0553c7cadf2e0a3ffab217bc4e396", "50de0f6a952131dfe562c5b3836e5d934b39b939", "3b183fc192d9082ec460d24264aeccc1abb1975d", "654db98cf76ecc6bd2bd5d63952fff1d6365ad4f", "313b6d6a2fe071869507ba7530aef10c91aefe11", "5ad36ad05fca05d5ac1a49c898cd87efa0853999", "e2ead53ecbb03a3ea86b5e5e97b05b7e8d121f56", "bfd039474a05ae508f366ba63129044afa41e8b0", "1e157cfbd2fa3ca1b786bdf6bcf3f6cf40ed39bb", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "f054bef6f62ae1c227540c628e7e85d2db9b766e", "a58445c48c3402305e92ff7cb7eaa9641a56ca6f", "f0a058970f030a1e786945794be0db39e456d693", "373b88e34295875fdab7f6cdee1438edbd0571cb", "42cd36ab88ef628430f8274293390c48a3faf368", "a1471d43cb75cc9699c392297ecf3693a5e5cf96", "0eadb23fd9a83b28c6c861e8a40620b3a832ce50", "27c11aafa1bdb48228474f7780c5f22022a1a7b9", "6dc69032224e2bb4493a386b97879ca73e617792", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "0ab9f989ffff6389059ee5267db014d1b211d51e", "47a356bf5462a495fd2240a11665a23969e1202c", "2c2e32267c43161f80241a2e1ba21d1f0f871dd4", "2da3512258d27f90eadfceab33566c41a5ba1124", "1c799051edc9d2758717da9935ee318dcce4d0d3", "2282c7d713135a208144f65de726f703dd947f3f", "42aa176a43cc2399d7f3a2351b1e55edd06abdf7", "ab82581f2225072865c1bf49c0044b05e5afca30", "274673e58c56bdbdd953135bd626d9508d65fd27", "18b204d596a2526ac96d3d7fc88352c6b125f013" ], "paperAbstract": "Server workloads benefit from execution on many-core processors due to their massive request-level parallelism. A key characteristic of server workloads is the large instruction footprints. While a shared last-level cache (LLC) captures the footprints, it necessitates a low-latency network-on-chip (NOC) to minimize the core stall time on accesses serviced by the LLC. As strict quality-of-service requirements preclude the use of lean cores in server processors, we observe that even state-of-the-art single-cycle multi-hop NOCs are far from ideal because they impose significant NOC-induced delays on the LLC access latency, and diminish performance. Most of the NOC delay is due to per-hop resource allocation. In this paper, we take advantage of proactive resource allocation (PRA) to eliminate per-hop resource allocation time in single-cycle multi-hop networks to reach a near-ideal network for servers. PRA is undertaken during (1) the time interval in which it is known that LLC has the requested data, but the data is not yet ready, and (2) the time interval in which a packet is stalled in a router because the required resources are dedicated to another packet. Through detailed evaluation targeting a 64-core processor and a set of server workloads, we show that our proposal improves system performance by 12% over the state-of-the-art single-cycle multi-hop mesh NOC.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba4fb708be68ef023ce8024ad92a481d756e1f58", "sources": [ "DBLP" ], "title": "Near-Ideal Networks-on-Chip for Servers", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "ba65e9d69cd8510095db28b421ca4ec96c3059d5": { "authors": [ { "ids": [ "3164907" ], "name": "Tim Kaler" }, { "ids": [ "1772774" ], "name": "Yuxiong He" }, { "ids": [ "1767761" ], "name": "Sameh Elnikety" } ], "doi": "10.1145/3087556.3087566", "doiUrl": "https://doi.org/10.1145/3087556.3087566", "entities": [ "Algorithm", "Attribute\u2013value pair", "Experiment", "Key-value database", "Operator overloading", "Randomness", "Redis", "Response time (technology)", "Server (computing)", "Simulation" ], "id": "ba65e9d69cd8510095db28b421ca4ec96c3059d5", "inCitations": [], "journalName": "", "journalPages": "195-206", "journalVolume": "", "outCitations": [ "2821db8962fce43265215a9c4b8d66af02e16ae7", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "1021a40384a9adf9e85aa99ea587135197a9ca1b", "065465ac37607a347186ea50873fc63d17cd2c79", "8e597e69610a3ee22cb11fcd83edd8a2d46bfed1", "87ec26d1622d0293173b3b695d71a17e425d68c5", "0437df54051fc7c4d9b5f2941c8488e6a765918f", "2308ec37e6bc4d59aedbfe13994d905dc72385e8", "18a5f443299784479e78d9e77f175af57cb2fa2b", "00ab6bb0df7fd605038d64eb5798b31481a39dd0", "37c7b481a82a98fdba98dba8240014c770102343", "53a5485c197ec44f6fb9f0308cac7716e50d1584", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "09f0751d7452cd0480d572171593d07996325fcb", "6b7e2de8bb1b67f4487e0efa3551538f9e126458", "50f4b86e497b5a0d4f2b576238babf99a5c0f661", "2ae7c9a89c7f6c105b68343dda9a0159eb2f118d", "17bef89290a3054ecfdd68d2fe5be8066d59b856", "6e669e90a34c4179f9364406d8a7a7f855745086", "c03998fba80568419db43e7e9fd78f8f7d1798dc" ], "paperAbstract": "Interactive services send redundant requests to multiple different replicas to meet stringent tail latency requirements. These additional (reissue) requests mitigate the impact of non-deterministic delays within the system and thus increase the probability of receiving an on-time response.\n There are two existing approaches of using reissue requests to reduce tail latency. (1) Reissue requests immediately to one or more replicas, which multiplies the load and runs the risk of overloading the system. (2) Reissue requests if not completed after a fixed delay. The delay helps to bound the number of extra reissue requests, but it also reduces the chance for those requests to respond before a tail latency target.\n We introduce a new family of reissue policies, Single-Time / Random (SingleR), that reissue requests after a delay d with probability q. SingleR employs randomness to bound the reissue rate, while allowing requests to be reissued early enough so they have sufficient time to respond, exploiting the benefits of both immediate and delayed reissue of prior work. We formally prove, within a simplified analytical model, that SingleR is optimal even when compared to more complex policies that reissue multiple times.\n To use SingleR for interactive services, we provide efficient algorithms for calculating optimal reissue delay and probability from response time logs through data-driven approach. We apply iterative adaptation for systems with load-dependent queuing delays. The key advantage of this data-driven approach is its wide applicability and effectiveness to systems with various design choices and workload properties.\n We evaluated SingleR policies thoroughly. We use simulation to illustrate its internals and demonstrate its robustness to a wide range of workloads. We conduct system experiments on the Redis key-value store and Lucene search server. The results show that for utilizations ranging from 40-60%, SingleR reduces the 99th-percentile latency of Redis by 30-$70% by reissuing only 2% of requests, and the 99th-percentile latency of Lucene by 15-25% by reissuing 1% only.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087566" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba65e9d69cd8510095db28b421ca4ec96c3059d5", "sources": [ "DBLP" ], "title": "Optimal Reissue Policies for Reducing Tail Latency", "venue": "SPAA", "year": 2017 }, "ba6edc3836bc4b130e6aab956cb5935bad33a1ad": { "authors": [ { "ids": [ "2264984" ], "name": "Bahareh Sarrafzadeh" }, { "ids": [ "1788496" ], "name": "Edward Lank" } ], "doi": "10.1145/3077136.3080829", "doiUrl": "https://doi.org/10.1145/3077136.3080829", "entities": [ "Algorithm", "Data (computing)", "Exploratory search", "Information retrieval", "Information visualization", "Knowledge Graph", "Layer (electronics)", "Sensemaking", "Testbed" ], "id": "ba6edc3836bc4b130e6aab956cb5935bad33a1ad", "inCitations": [], "journalName": "", "journalPages": "145-154", "journalVolume": "", "outCitations": [ "2479e4abb2eda3ed189cd7e161eb0a651825f41a", "317585f67316e2c445e05a3615f19b4c33403e34", "4e0609b8d2442d2ed46c1b0bfaad8edd3ba9855d", "bb84b3c533510ca1e91227d1eb19cdbe0daebc8d", "28e6049b85af685c3ffbb96a0e33165a3b84825f", "8fbc8880696464e05561e5f5737c2df4b9f53e35", "c045601b1872a4dd6f3019a3bf01d19170124dea", "f9e9fa78774817479ee3acbd02111b314e275763", "b5c6f2e66cc7cc6736befeb2efa33ea3f9338d02", "b3ed8c38130a444c5bb0949e7b3446ec2c3b5b30", "38363a3d8c1e97e2a95f1f34b897cc68b291bef2", "dea266419175e64b5b543bd3b9ef11b52243f4c8", "b226e156559125dd4d4f854e2f5d95220420f501", "77f0f01a43a1b4023af3df02d3ada7f96f6334d5", "92e5197bc63b2ba352e9577b5b11549225114170", "93377a8638315e9768130fab3e7c225cea839e07", "27246a384c9449d3c8d4e4696f24c9a45a52c01c", "2cd0bed0de740dbb200364eba2919ff5ce37adc1", "dda232b51cbede591eb722ebdb328ed004fb211e", "80f5fa0d10d4ff3498ee090d9fbd2b7c4361de3f", "95381d9d0a6afd2c76afff028bf60c63d60f5b61", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "a137fec1054014c04c52debef9389cc4aab2d86e", "4b3a7db625a1a4eb1ab3bbe6f9ef98e5084392ca", "5a30b6d52e7b80c38a52b7767b8cae7ea29ac68a", "d38e6ab2e4ccbabcd20eafec415ff79f20cad7b9", "c65fc8f21b247c005f3cc060506b98e91d4b8778", "4344b174b92162b5d156b5b6f22b8c09bac6b252", "e9b906b3067bfbf72e6e234385ce15adcc1a8336", "45958fc457c72ca69a855c66b11b8a2d7b8aa829", "13a7eefe3c5ed582f91973c2838bf092d0910677" ], "paperAbstract": "In information retrieval and information visualization, hierarchies are a common tool to structure information into topics or facets, and network visualizations such as knowledge graphs link related concepts within a domain. In this paper, we explore a multi-layer extension to knowledge graphs, hierarchical knowledge graphs (HKGs), that combines hierarchical and network visualizations into a unified data representation. Through interaction logs, we show that HKGs preserve the benefits of single-layer knowledge graphs at conveying domain knowledge while incorporating the sense-making advantages of hierarchies for knowledge seeking tasks. Specially, this paper describes our algorithm to construct these visualizations, analyzes interaction logs to quantitatively demonstrate performance parity with networks and performance advantages over hierarchies, and synthesizes data from interaction logs, inter- views, and thinkalouds on a testbed data set to demonstrate the utility of the unified hierarchy+network structure in our HKGs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080829" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba6edc3836bc4b130e6aab956cb5935bad33a1ad", "sources": [ "DBLP" ], "title": "Improving Exploratory Search Experience through Hierarchical Knowledge Graphs", "venue": "SIGIR", "year": 2017 }, "ba873c4f05bd0b33d99f81417e0017ce77dd5c95": { "authors": [ { "ids": [ "2835355" ], "name": "Yentl Van Tendeloo" }, { "ids": [ "2405710" ], "name": "Simon Van Mierlo" }, { "ids": [ "33464207" ], "name": "Bart Meyers" }, { "ids": [ "1762640" ], "name": "Hans Vangheluwe" } ], "doi": "10.1145/3136014.3136017", "doiUrl": "https://doi.org/10.1145/3136014.3136017", "entities": [ "Abstract syntax", "Domain-specific language", "Graphical user interface", "Parse tree", "Programming paradigm", "Usability" ], "id": "ba873c4f05bd0b33d99f81417e0017ce77dd5c95", "inCitations": [], "journalName": "", "journalPages": "182-193", "journalVolume": "", "outCitations": [ "e7ecfdec41b3e940eac35b545a23630bd8d03f49", "30c8678ffc18878c0e65811e27182aa84dff4140", "ea21e947f7ab629cff0945ee6f28f8285b0860b1", "ab606224a9a214475da53d943f392ba378e53a67", "def43a264d287429d352b5a2c61b2038bed1683c", "fa4c3232b1c536be9230b6919bb6ead3171b4b9b", "6705d00eef4ca398f6e239379abde175108cf0c2", "b60f8b0b67a321defb3ac511bbfd8afb53b929f7", "a01b64e3c57df5ca9da2b6e6cd051b46c69cc54f", "54bc64789cb0d4061a869dd6b45b5148c9a82ea8", "7590ce062c8834b463c642d48d773adb36b02770", "fa36540973c34b016f7be404205559b9a21ca031", "db2e1a632b3e39095cfab8767e45bffc015652b6", "9955b33f62a23e793953fe3d1884ed744a6ecb40", "70fd4e838e270229cba4a0a9a2f9af090b110443", "f2a96848a4c76503ab460a3b28ff9d127f70a8ee", "3c9cba4cee7ebde38045cc719f100840cad443cd", "58690c1c73bda9df20e11e1574719d710ffeed4b", "3b16e770e4d5c214eba15395e15348858c3d9f8c", "4ac6fae400b55a4651a3576af8d2eec8a278f4d6", "924c5e3ac9a2e2c76fbfa2b6ecd44d1f2f629525", "0f73e74af8fdcc62e4d1d8c058bf8c9594f12fca", "957b7aefc91e068067cc90fa9a252594764c1dc3", "39e54f0c16fe291f0a675abd8993ece3a58a535d", "3087a47c1fdb5ebb1b28f3562533e3cce782dd36" ], "paperAbstract": "Domain-Specific Modelling Languages (DSLs) allow domain experts to create models using abstractions they are most familiar with. A DSL's syntax is specified in two parts: the abstract syntax defines the language's concepts and their allowed combinations, and the concrete syntax defines how those concepts are presented to the user (typically using a graphical or textual notation). However important concrete syntax is for the usability of the language, current modelling tools offer limited possibilities for defining the mapping between abstract and concrete syntax. Often, the language designer is restricted to defining a single icon representation of each concept, which is then rendered to the user in a (fixed) graphical interface. This paper presents a framework that explicitly models the bi-directional mapping between the abstract and concrete syntax, thereby making these restrictions easy to overcome. It is more flexible and allows, amongst others, for a model to be represented in multiple front-ends, using multiple representation formats, and multiple mappings. Our approach is evaluated with an implementation in our prototype tool, the Modelverse, and by applying it on an example language.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136017" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ba873c4f05bd0b33d99f81417e0017ce77dd5c95", "sources": [ "DBLP" ], "title": "Concrete syntax: a multi-paradigm modelling approach", "venue": "SLE", "year": 2017 }, "bab611543f57b5026cef8cdf6415983ffd0078c3": { "authors": [ { "ids": [ "1684288" ], "name": "Leyla Bilge" }, { "ids": [ "8361539" ], "name": "Yufei Han" }, { "ids": [ "8267407" ], "name": "Matteo Dell'Amico" } ], "doi": "10.1145/3133956.3134022", "doiUrl": "https://doi.org/10.1145/3133956.3134022", "entities": [ "Binary file", "Ecosystem", "Semi-supervised learning", "Supervised learning", "The Current", "Threat (computer)" ], "id": "bab611543f57b5026cef8cdf6415983ffd0078c3", "inCitations": [], "journalName": "", "journalPages": "1299-1311", "journalVolume": "", "outCitations": [ "1e73c2fa2709d3210c09f19933e99b71905364ab", "5ce9df5ca21b0ab0af606f8b16a25d02c4c9244a", "072a2a1410d6fa0e5d46afdf7c8d46bd8877a69f", "788aecc48b5cae3fe1c6f84a04fa60e0e0e122c0", "08ae384c2c68333419f76bcb5f14dc2ba2ef8d33", "1dd11cf2579d69f14bb96a52c1fe82d9d546ede6", "2028713e597c1a4dbd385af4ab1359ac9b3289e1", "87ed0f639e6d56b2c536570406c2fe0afd9c3665", "15bc274d307ce56bbc71e1e3081f72d14f6e7e5e", "6e633b41d93051375ef9135102d54fa097dc8cf8", "b4849bb63f829ade3686b2717cc5001d29d96afc", "e35a109f9525af1595765a24ae81931c148d9fea", "006c846c72e77cb913be4b2c76664967e9e01ee0", "ae4fa49a33b8beb882096876850f5fa868f0fcff", "36050949442c4d8ce797bcc7c7d5d9112b8fadaf", "7977379a318f49a1171743d9d6745cd77283955c", "17eceec10a0f5f3a3b2ce99309009bfb2e9ef389", "19d13084ea644842e42802ec7aac2fb977ed7584", "91062b2e78d1a7c9ffe8bc5c4f3d3f9cd19631d6" ], "paperAbstract": "The current evolution of the cyber-threat ecosystem shows that no system can be considered invulnerable. It is therefore important to quantify the risk level within a system and devise risk prediction methods such that proactive measures can be taken to reduce the damage of cyber attacks. We present RiskTeller, a system that analyzes binary file appearance logs of machines to predict which machines are at risk of infection months in advance. Risk prediction models are built by creating, for each machine, a comprehensive profile capturing its usage patterns, and then associating each profile to a risk level through both fully and semi-supervised learning methods. We evaluate RiskTeller on a year-long dataset containing information about all the binaries appearing on machines of 18 enterprises. We show that RiskTeller can use the machine profile computed for a given machine to predict subsequent infections with the highest prediction precision achieved to date.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134022" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bab611543f57b5026cef8cdf6415983ffd0078c3", "sources": [ "DBLP" ], "title": "RiskTeller: Predicting the Risk of Cyber Incidents", "venue": "CCS", "year": 2017 }, "bad198a6b2969786ba2c3cdd6993913af779ad17": { "authors": [ { "ids": [ "3414900" ], "name": "Burak Bastem" }, { "ids": [ "2749676" ], "name": "Didem Unat" }, { "ids": [ "2360381" ], "name": "Weiqun Zhang" }, { "ids": [ "2896475" ], "name": "Ann S. Almgren" }, { "ids": [ "1746446" ], "name": "John Shalf" } ], "doi": "10.1109/ICPP.2017.26", "doiUrl": "https://doi.org/10.1109/ICPP.2017.26", "entities": [ "Application programming interface", "C++", "CUDA", "Computation", "Execution unit", "Graphics processing unit", "OpenACC", "OpenMP", "Pipeline (computing)", "Programmer", "Programming model", "Speedup", "Tiling window manager" ], "id": "bad198a6b2969786ba2c3cdd6993913af779ad17", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "171-180", "journalVolume": "", "outCitations": [ "9e490640f84581bd8a63b785e16ebdd2649a32be", "12a2fa42713f4094bcde7f35c05657bada75c3d0", "30267d0fb3bcdc7a39b2cc8e4ccb6383c8b2892a", "30947026ad3409cc438ccb24f6862f768670c868", "019ebe0205a759f8dab80b617f9f8ccd179c5c62", "91b16d5e7953617579de164ebaaf945e831afb97", "e1fe8bda7373de0a8bc4382de18ec086c10de3b3", "0451388ae4562c2833dd5e39f897208c9c1d7bb9", "791370da29ba96d355c2fad1ecd06b8e709f8755", "0226adea5e4f5f739633a83d159ca989045eefe5", "ea766ddb6db52c98941436b741301471e426d38b", "7c2beb7b8770b40caf55dc10437023e60a8fed35", "5267a849f5b97e4c4e9b99ad0ca1d296bfbb4589", "c156cfedd88a18734131492f54d2b7ddc0adeafb", "d827cdb49d3abb23405ee03e070c5a42c07d28ea", "2bb29fce377e1ec9024ea7c45fd40fa178922602", "15f67796899118508cc3021df0f88faf2298bd45", "f8afcda83fc23a7f75a1b0269fb458ee0182b621", "f0f050f402d0c7735e1fe97316667b779515951b", "706df053ab58c3963942337314c80ab849a45c4d", "19591064d5b9f108bff0bf67893ec6a2c2e54413", "75be9b3d5047d1c44f0a5d95d7ea9314fa0af469", "51e17f9a5bc3cd4b4fb2ed8736476ad8fcae7cf6", "e115c136d706997d8700d18b6b0ab7f95e4c5a47", "38c48d4a31ab050c6e750cdae21e00421172f694", "c9ae9df2ac7a203241cb039081720c74c88f1dee", "022eb1b666e95957c7d7607b92cd0fcc80e6b17d", "0ebb8ef3ef660ea8484202e74e2e3df7b3c59cc6" ], "paperAbstract": "GPUs are employed to accelerate scientific applications however they require much more programming effort from the programmers particularly because of the disjoint address spaces between the host and the device. OpenACC and OpenMP 4.0 provide directive based programming solutions to alleviate the programming burden however synchronous data movement can create a performance bottleneck in fully taking advantage of GPUs. We propose a tiling based programming model and its library that simplifies the development of GPU programs and overlaps the data movement with computation. The programming model decomposes the data and computation into tiles and treats them as the main data transfer and execution units, which enables pipelining the transfers to hide the transfer latency. Moreover, partitioning application data into tiles allows the programmer to still take advantage of GPU even though application data cannot fit into the device memory. The library leverages C++ lambda functions, OpenACC directives, CUDA streams and tiling API from TiDA to support both productivity and performance. We show the performance of the library on a data transfer-intensive and a compute-intensive kernels and compare its speedup against OpenACC and CUDA. The results indicate that the library can hide the transfer latency, handle the cases where there is no sufficient device memory, and achieves reasonable performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.26" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bad198a6b2969786ba2c3cdd6993913af779ad17", "sources": [ "DBLP" ], "title": "Overlapping Data Transfers with Computation on GPU with Tiles", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "bb12d6b59e4d2bd4bd5a060a8b4c5e73de9b671b": { "authors": [ { "ids": [ "10797369" ], "name": "Lucien D. J. Valstar" }, { "ids": [ "2049346" ], "name": "George H. L. Fletcher" }, { "ids": [ "1680651" ], "name": "Yuichi Yoshida" } ], "doi": "10.1145/3035918.3035955", "doiUrl": "https://doi.org/10.1145/3035918.3035955", "entities": [ "Application domain", "C++", "Citation network", "Directed graph", "Experiment", "Graph labeling", "Neo4j", "Open-source software", "Query language", "Reachability", "SPARQL", "Social network" ], "id": "bb12d6b59e4d2bd4bd5a060a8b4c5e73de9b671b", "inCitations": [ "9c6bddcc4c04009abdafc4b3340868975552e1d1" ], "journalName": "", "journalPages": "345-358", "journalVolume": "", "outCitations": [ "41770c3168a8918474603d96b634035677043980", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "9d4e533b6c8af347ca96c6c16c720a4145700fee", "4410f0c48f982f960a54500df7bd88e4cab88927", "3615c522a69d613491a780a47571a84c476c3712", "6384234e698f793cfba0cbf890b1c2a2209d06b7", "62cbcc203e2b2da2f281b2807c35aaa7bf5d296c", "148edd9ac0ed0485f14f470949f64a9d92cbbc10", "38b9c1d87249e9cf9bed0091d8970348aa645d35", "3be0297876fedc5ecf91a6dd6a21d73a36b532f3", "fa189694cc9b94ced0b41a072763cc6b7ba83e70", "67b40f5353731f75695d5b07c9f03e8d10bd4933", "31b63d505dbf6f2b9a60d0c45976f2cbd5cd9619", "13bda7e7a6d3ae153ff1b8c546174d2bfac6c5aa", "28eba259d5d7dc7daf95d982e626578fae790739", "c2070e3e1fce3c420d4e3c2d310fb63f5aa7ccb6", "174abff00624ac9c34ea5559e5cdbc7faf3b570c", "0c2ff25bf68074214a960666bff2a7b0d5e29fcc", "737e43bd36ac3cc785915fa2930997976137ef35", "10aa9ee7caaf9381b6a0468ae899a9729824a6b7", "a3d5e5d2fae76af242e40e35989237a8c3e6385f", "6f8a2339aabb699d8810baed2564744ca837f009", "ac34d93e9de470f5e3247a9dee327f345409b831", "eb1cbb14f09e895b1d2c36d75811dec863190376", "93b8c7b39c2e7c5d7ea3949a308a40e9eb62b83d", "26a2b94a118334585f5d717b24ef06b6f9014ba8", "c16886f0b9c785622ea321af14f378abd8bc25fd", "3da502b57284259529044fe20d824666f4268fd9", "5979f0e38db8c250f6e6ee10b6556d45eb2b7f73", "7d94976271de76e03a266305b2d05ab4352e28af", "3c0b574b98e2fae687f021d77637e24e2d2d641f", "0658fda7abd4d2bae8515179f36482f4bd7976f6", "c17014959370282e7c3efc96d9d831c1663c919f", "93f1c84f8bd62d78315f73dcea626e32e6a670b9" ], "paperAbstract": "Consider a directed edge-labeled graph, such as a social network or a citation network. A fundamental query on such data is to determine if there is a path in the graph from a given source vertex to a given target vertex, using only edges with labels in a restricted subset of the edge labels in the graph. Such label-constrained reachability (LCR) queries play an important role in graph analytics, for example, as a core fragment of the so-called regular path queries which are supported in practical graph query languages such as the W3C's SPARQL 1.1, Neo4j's Cypher, and Oracle's PGQL. Current solutions for LCR evaluation, however, do not scale to large graphs which are increasingly common in a broad range of application domains. In this paper we present the first practical solution for efficient LCR evaluation, leveraging landmark-based indexes for large graphs. We show through extensive experiments that our indexes are significantly smaller than state-of-the-art LCR indexing techniques, while supporting up to orders of magnitude faster query evaluation times. Our complete C++ codebase is available as open source for further research.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035955" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bb12d6b59e4d2bd4bd5a060a8b4c5e73de9b671b", "sources": [ "DBLP" ], "title": "Landmark Indexing for Evaluation of Label-Constrained Reachability Queries", "venue": "SIGMOD Conference", "year": 2017 }, "bb7b2725d1fb070733c340ae10e6f994e1b53255": { "authors": [ { "ids": [ "2806139" ], "name": "Debrup Banerjee" }, { "ids": [ "40204741" ], "name": "Kazi Islam" }, { "ids": [ "1993427" ], "name": "Gang Mei" }, { "ids": [ "3310095" ], "name": "Lemin Xiao" }, { "ids": [ "3088181" ], "name": "Guangfan Zhang" }, { "ids": [ "32320955" ], "name": "Roger Xu" }, { "ids": [ "1743600" ], "name": "Shuiwang Ji" }, { "ids": [ "3893083" ], "name": "Jiang Li" } ], "doi": "10.1109/ICDM.2017.10", "doiUrl": "https://doi.org/10.1109/ICDM.2017.10", "entities": [ "Bayesian network", "Causality", "Database", "Deep belief network", "Speech recognition", "Support vector machine", "TIMIT", "Video post-processing" ], "id": "bb7b2725d1fb070733c340ae10e6f994e1b53255", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "11-20", "journalVolume": "", "outCitations": [ "c7e17fde9419da340e968411f8686278dac23332", "434be28ba089e90f6188d53193dc08c5149880e2", "0d67362a5630ec3b7562327acc278c1c996454b5", "213d7af7107fa4921eb0adea82c9f711fd105232", "a8ea1555f20a0a1c3b304d8d38f6a21aaa4c9fc3", "127316fbe268c78c519ceb23d41100e86639418a", "36d01810f0bd53d712b28a43cf7166f0ba5591f1", "93f0419c2ac37af15b2c32c4e9a520984ea9bd5f", "34f25a8704614163c4095b3ee2fc969b60de4698", "a25fbcbbae1e8f79c4360d26aa11a3abf1a11972", "3bb177388eebd1440b5748d7bb11cbad3adced0f", "6fbb4fae9741077d78edf204daa0e01d3fd5c101", "4577f10f3d31a8176641131928010c975ebb5812", "6314424a44a6bb9fc6609d946581d1d9a26239b4", "77d6686fe7a8a96f820d0a7b5a6d711dbd0a72b6", "6772164c3dd4ff6e71ba58c5c4c22fa092b9fe55", "36643778b2a7afc2b902dd46f62e1d817c81fee4", "a128b6fa42943924a6a5c4d1437add3d03a6c0f0", "a39bd32c9cbeff1abf6419adc697b584a7f69b78", "fb5199f7f16cd69aa4dd6e05556ce23e5dd73678", "795d4acb2bf1f43c7bf952aba372730f3fed8264" ], "paperAbstract": "Post-traumatic stress disorder (PTSD) is a traumatic-stressor related disorder developed by exposure to a traumatic or adverse environmental event that caused serious harm or injury. Structured interview is the only widely accepted clinical practice for PTSD diagnosis but suffers from several limitations including the stigma associated with the disease. Diagnosis of PTSD patients by analyzing speech signals has been investigated as an alternative since recent years, where speech signals are processed to extract frequency features and these features are then fed into a classification model for PTSD diagnosis. In this paper, we developed a deep belief network (DBN) model combined with a transfer learning (TL) strategy for PTSD diagnosis. We computed three categories of speech features and utilized the DBN model to fuse these features. The TL strategy was utilized to transfer knowledge learned from a large speech recognition database, TIMIT, for PTSD detection where PTSD patient data is difficult to collect. We evaluated the proposed methods on two PTSD speech databases, each of which consists of audio recordings from 26 patients. We compared the proposed methods with other popular methods and showed that the state-of-the-art support vector machine (SVM) classifier only achieved an accuracy of 57.68%, and TL strategy boosted the performance of the DBN from 61.53% to 74.99%. Altogether, our method provides a pragmatic and promising tool for PTSD diagnosis.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.10" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bb7b2725d1fb070733c340ae10e6f994e1b53255", "sources": [ "DBLP" ], "title": "A Deep Transfer Learning Approach for Improved Post-Traumatic Stress Disorder Diagnosis", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "bbd9ef6c67d73e2d780d1d3c484a6ab2b44e156d": { "authors": [], "doi": "", "doiUrl": "", "entities": [ "Automaton", "Boolean satisfiability problem", "Checking (action)", "Experiment" ], "id": "bbd9ef6c67d73e2d780d1d3c484a6ab2b44e156d", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "389f432f73ccc95b5738e80781e085bb00dfd8c4", "3d933f007baa84865f609a154d2a378b3d62442f", "fbb56b289afd5edb72505188af55282ca13e3ca0", "2100d14e855e4c66e845fb4dbddf00849b1be758", "d000d98371c266e64c136155b54f7749bdcffaf0", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "f3c59aff8a7648fd29a31728f39e3aa9ee3f2528", "363ff906a42d2ac95456d7d8fb2527989a42f1ec", "088a382a5af6a44ccb69c2f49517bf8d99ab6759", "1246299e80c8a403313adf0f5c28e4127ed15001", "90c28e38506d9cb1c3e3186d7e3971805dfcc675", "231bed067708e80dae6d836f8359fb6a2766283d", "306f123c6af2577a61f67e9155cf3897ea149d1c", "e05e140132042f494e6b64f52c0654bc55d0bbae", "5518d5853b694c865a55be5628e7205007806563", "06f2bdf034e02b3735f044589dbe60b5665cca49", "1dba737ed6207fbd69afd8a5bf9938a8790c4b92", "8db84d7974e80554930576e926b9d6de9727daed", "0e4a694548b731930cefe91330ece0737c415d32", "8eb819823e254c232f2635e69c9ee54a7e2ff387", "0634ba86fab525d37981280400963541a2e433b8", "595642f2eb3ae457911e710ef5c1816e0d51fd4b", "12f000052d3e168a41b2e1968b4488a982cf9fd8", "1019341b60ccd9e2144bb1bf8cf7ab2275b12d07", "2c173140a36f44fa2767156900cc52e71f75748e", "02ea2faa6190bc14f4244386996054ef11b0d89c", "6a659685bff877d39cb4d5a20fcdbbf42151b84f", "341d33498388711a5303c5f51433b3d5739a21d2", "6bb40354a670b1509ee312ec045326791e197ba8", "db8fe43a20608c3da49d2edce96377b7d571436e", "decb7c40e12fb4e20f04b2b514704575e4481ff8", "144382ef2ee1d00ce3d36c61601afecca5620c7d", "c537444e60011f290c48dc768cafe3d7d5d3cd1f" ], "paperAbstract": "We describe a uniform and efficient framework for checking the satisfiability of a large class of string constraints. The framework is based on the observation that both satisfiability and unsatisfiability of common constraints can be demonstrated through witnesses with simple patterns. These patterns are captured using flat automata each of which consists of a sequence of simple loops. We build a Counter-Example Guided Abstraction Refinement (CEGAR) framework which contains both an underand an over-approximation module. The flow of information between the modules allows to increase the precision in an automatic manner. We have implemented the framework as a tool and performed extensive experimentation that demonstrates both the generality and efficiency of our method.", "pdfUrls": [ "http://www.iis.sinica.edu.tw/~yfc/lib/exe/fetch.php?media=pldi2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/bbd9/ef6c67d73e2d780d1d3c484a6ab2b44e156d.pdf", "s2Url": "https://semanticscholar.org/paper/bbd9ef6c67d73e2d780d1d3c484a6ab2b44e156d", "sources": [], "title": "Flatten and Conquer (A Framework for Efficient Analysis of String Constraints)", "venue": "", "year": 2017 }, "bbf6028001540d27b5c59a36540a7af97ecdab3e": { "authors": [ { "ids": [ "15539522" ], "name": "Jan Wroblewski" }, { "ids": [ "34806932" ], "name": "Kazuaki Ishizaki" }, { "ids": [ "23929870" ], "name": "Hiroshi Inoue" }, { "ids": [ "3165589" ], "name": "Moriyoshi Ohara" } ], "doi": "10.1109/IPDPS.2017.111", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.111", "entities": [ "Apache Spark", "Computation", "Distributed computing", "High- and low-level", "Inline expansion", "Java", "MapReduce", "Programming model", "SQL", "Serialization" ], "id": "bbf6028001540d27b5c59a36540a7af97ecdab3e", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "449-458", "journalVolume": "", "outCitations": [ "87fa174d7e5f43f6a81cd3129c2834871133bf0d", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "03f69e0ddcaf066f69b75819569e8936d011eec1", "0d5ec8a177df475ceb0434c46d0df786a7a5b1e1", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "0558c94a094158ecd64f0d5014d3d9668054fb97", "b7efe971a34a0f2482e0b2520ffb31062dcdde62", "080ed793c12d97436ae29851b5e34c54c07e3816", "6ddf4cd11283dddb3b8b3871b894ae6aee9597f8", "8059ddbc4ead7f6f263cfc81e9fde8e210b5464e", "2554485ffdb8473262ce0cfde401cfdc5b85f3fe" ], "paperAbstract": "Apache Spark is a framework for distributed computing that supports the map-reduce programming model. The SQL module of Spark contains Datasets, i.e., distributed collections of records stored in a serialized low-level format in a manually managed chunk of memory. However, the functions users provide to the map-reduce computations expect Java objects. Datasets perform an additional deserialization step beforehand to support the user-provided function, which increases the overhead. We tackled this problem by replacing map functions with their counterparts that accepted the serialized data. This allowed us to skip the unnecessary part of deserialization and achieve faster data processing speeds.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.111" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bbf6028001540d27b5c59a36540a7af97ecdab3e", "sources": [ "DBLP" ], "title": "Accelerating Spark Datasets by Inlining Deserialization", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "bbfb817bbb84644704a1be1196b520fbc704e41e": { "authors": [ { "ids": [ "17822345" ], "name": "Sangkuen Lee" }, { "ids": [ "34397071" ], "name": "Hyogi Sim" }, { "ids": [ "2379012" ], "name": "Youngjae Kim" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" } ], "doi": "", "doiUrl": "", "entities": [ "Attribute\u2013value pair", "Central processing unit", "Data structure", "Emulator", "Encapsulation (networking)", "Flash memory", "Locality of reference", "Programmer", "Shared memory", "Throughput" ], "id": "bbfb817bbb84644704a1be1196b520fbc704e41e", "inCitations": [ "f901f71c5ebab0a7e30e55213d13e47d0b276373" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "619-624", "journalVolume": "", "outCitations": [ "efffff9e3c96107171f442956ee38ed29a38a448", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "0a12a179bebdf4bb69d692a1127795b3f536270b", "bf6f2e9548a5c36d3f41639d64c5924026039417", "16134b29695641d9c0086d246f96629ed6f70918", "0763e8bee8d59588ce35705ef3e58b5d601d2ae6", "0541d5338adc48276b3b8cd3a141d799e2d40150", "d4e58e7c95d66f810252af630e74adbdbaf38da7", "22a8454fab20b37c55c7c5e28ebc11cb3db994fd", "339632faa043d4697570fc4fe48a52d007c3cf06", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "4bbb4e2bed21980cfe9ca7a6e243737705b0fd20", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "fae8a785260ac5c34be82fca92a4abef4c30d655", "4678cdcf7e57c1563379ac7cc344254f01ace572", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "3fc9abe2eee1c1ca28312a355cf2ba75353db75c", "34a86b7b24d4a93dbb249fc05f0b7c0f48f90aff", "500b80adc7e25dfffa9a05d25bdffce81b1b0031", "17e251dfdaed41fe3bb9b51605ead809666f2cee" ], "paperAbstract": "Processing In Memory (PIM), the concept of integrating processing directly with memory, has been attracting a lot of attention since PIM can assist in overcoming the throughput limitation caused by data movement between CPU and memory. The challenge, however, is that it requires the programmers to have a deep understanding of the PIM architecture to maximize the benefits such as data locality and parallel thread execution on multiple PIM devices. In this study, we present AnalyzeThat, a programmable shared-memory system for parallel data processing with PIM devices. Thematic to AnalyzeThat is a rich PIM-Aware Data Structure (PADS), which is an encapsulation that integrally ties together the data, the analysis tasks and the runtime needed to interface with the PIM device array. The PADS abstraction provides (i) a key-value data container that allows programmers to easily store data on multiple PIMs, (ii) a suite of parallel operations with which users can easily implement data analysis applications, and (iii) a runtime, hidden to programmers, which provides the mechanisms needed to overlay both the data and the tasks on the PIM device array in an intelligent fashion, based on PIM-specific information collected from the hardware. We have developed a PIM emulation framework called AnalyzeThat. Our experimental evaluation with representative data analytics applications suggests that the proposed system can significantly reduce the PIM programming effort without losing its technology benefits.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101197", "http://users.nccs.gov/~vazhkuda/analyzethat-ccgrid17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bbfb817bbb84644704a1be1196b520fbc704e41e", "sources": [ "DBLP" ], "title": "AnalyzeThat: A Programmable Shared-Memory System for an Array of Processing-In-Memory Devices", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "bc2c6f372d0966c4864462867c3eb78b94c2f5a1": { "authors": [ { "ids": [ "3436523" ], "name": "Peyman Faizian" }, { "ids": [ "2087174" ], "name": "Md Atiqul Mollah" }, { "ids": [ "34737976" ], "name": "Zhou Tong" }, { "ids": [ "1737003" ], "name": "Xin Yuan" }, { "ids": [ "2242944" ], "name": "Michael Lang" } ], "doi": "10.1145/3126908.3126959", "doiUrl": "https://doi.org/10.1145/3126908.3126959", "entities": [ "Data center", "Global network", "OpenFlow", "Routing", "Software-defined networking" ], "id": "bc2c6f372d0966c4864462867c3eb78b94c2f5a1", "inCitations": [], "journalName": "", "journalPages": "51:1-51:11", "journalVolume": "", "outCitations": [ "06845215adcdfcff316effc272ab6f7bf764f71a", "bcf4b4e4ffdc26bf1f162f2e9652a511bf7164f8", "9bb6ee03d15def91dd6d99e6cf0dfbf503964a5a", "141b27cbd99d74efaeb481f19f5e91c084e91913", "49dec06d4912acb24a2b0e28f1d55e4f225aa1cb", "a15bc58fa496b6cca937713723f19f45380fc2fe", "62e08291ba342127ef431315bb26a80055644a37", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "5f8991828def57d2f0cda942566afff56740d150", "603a54c6c715851482ebc15090ee0e0b99b8f6d9", "0dc45b3b00373dfcc2baf61a2d583cd5e882a772", "1afc2cc0b9ac3140693302704ee44ce1054b6325", "78727ebd5f891e88aa2729e213280230b0453464", "7aeb7797a2340874cf436ed6f3b95c01ef5769de", "33715194bf741fe17d6f6b9559af694907c26d2a", "5a2c137ad03d65cc75518732b756fcb3618b1baa", "8609d8106db4415f3e9dbe8f3a5ab54bd1fac759", "61aa09bc1a3eae17480645c90b06a18cbd62d9af", "4110d5ad162fbf43a3418f28b4d46609c2a147be" ], "paperAbstract": "The OpenFlow-style Software Defined Networking (SDN) technology has shown promising performance in data centers and campus networks; and the HPC community is significantly interested in adopting the SDN technology. However, while OpenFlow-style SDN allows dynamic per-flow resource management using a global network view, it does not support adaptive routing, which is widely used in HPC systems. This gives rise to the question whether SDN can achieve the performance that HPC systems expect with adaptive routing. In this work, we investigate possible methods to apply the SDN technology on the current generation HPC interconnects with the Dragonfly topology, and compare the performance of SDN with that of adaptive routing. Our results indicate that adaptive routing results in higher performance than SDN when both have similar resource allocation for a given traffic condition. However, SDN can use the global network view to compete with adaptive routing by allocating network resources more effectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126959" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bc2c6f372d0966c4864462867c3eb78b94c2f5a1", "sources": [ "DBLP" ], "title": "A comparative study of SDN and adaptive routing on dragonfly networks", "venue": "SC", "year": 2017 }, "bc556164bb67af25de02e602f271d88f4d7752e0": { "authors": [ { "ids": [ "1705489" ], "name": "Xin Wang" }, { "ids": [ "1822115" ], "name": "Weihua Zhang" }, { "ids": [ "8491577" ], "name": "Zhaoguo Wang" }, { "ids": [ "9555458" ], "name": "Ziyun Wei" }, { "ids": [ "1716528" ], "name": "Haibo Chen" }, { "ids": [ "2636713" ], "name": "Wenyun Zhao" } ], "doi": "10.1145/3018743.3018752", "doiUrl": "https://doi.org/10.1145/3018743.3018752", "entities": [ "Access control", "Attribute\u2013value pair", "B+ tree", "Concurrency (computer science)", "Concurrency control", "Design pattern", "HTML", "Key-value database", "Multi-core processor", "Scalability", "Search tree", "Software design pattern", "Speedup", "Transactional memory" ], "id": "bc556164bb67af25de02e602f271d88f4d7752e0", "inCitations": [ "e45dea6588d1de0a23618e019031e67eedeeee26", "f27b19c6e3586b14ebfa139281184db2cc0bf46d", "54aaa5ea1fd24aca55f11e529ef270dc6d1d44da" ], "journalName": "", "journalPages": "385-399", "journalVolume": "", "outCitations": [ "78e47b768c784fcb15004bab48e24f80fdad579e", "3593269a4bf87a7d0f7aba639a50bc74cb288fb1", "1f7e50d220f41f4fac985a991c8d5187323aab4c", "0480cf8c50849301d6d038966de7b31498a55780", "9a8ff6073b183de6940bba457fb6f996736c39a2", "d8a124aaa3602c0272d98b0a771888f7b4a0ae71", "3702d6e0c78050f3261fdbf0eb1aefbac59fb8cf", "eb03e6432794b5accf0ef4980b3998c4fb3df345", "0b19f413ffb5bc68b43f3bd05a97c282a7c6d6ab", "54a882bc5f15877097dfb1aab8c480323036e48c", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "33f7e853fe40c466a2bda159678dba6a2caff3c8", "7227999dfa663a2a1e0e81ee450f360e1e308ff7", "3d4af9523014bc15c4b3cb00d9398813ce461cf9", "9fa6439b42c89790d67660dace8bde71300ebb33", "03416be8097852a54dd3e309434e5a0806824646", "9aa0d7253574e50fe3a190ccd924433f048997dd", "3150af98e61952c09f70e53a3f84911291d8f440", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "56f6aec0132e56769e2036bbeff791dfa137d107", "893448e800e13eb022cc2b3c6328ff1a85bdbe87", "323292fad95a1bce506e100ac8d622019a2012d2", "8bffe87cb85903d500661e9685ce2c3d8f87b1d8", "13e6aa5f61267b2814fa9b32f47c17c0fcdef2d5", "38611b424808954be2c1375da1a873b1e2487ace", "46e61ad29ab20618fb551afbc00ebb8eb4e9be21", "06001968f5a93a1bb94f7573edffb9a7126ff009", "6478b07c772d165241c618f0c63610fb1e064762", "28ecdc50beb098d9176d992fed80eb2bac5963a4", "2ac3c4537be12b52f9e60d140ccf5621dc43cb75", "4f8dfe56510bbe6fadb425b037b205f9b861cf75", "984d45494026f7a2fc9c4193ee65b5ef35d937ad", "ab12cef09635b578d1c6479a2a693de8a75be2c7", "f465e873cb9d9e5cd74cc759c2b015da06385a86", "bcb1fc2821ee64ac750f1ac3bda4ce1a8dd31e2d" ], "paperAbstract": "While hardware transactional memory (HTM) has recently been adopted to construct efficient concurrent search tree structures, such designs fail to deliver scalable performance under contention. In this paper, we first conduct a detailed analysis on an HTM-based concurrent B+Tree, which uncovers several reasons for excessive HTM aborts induced by both false and true conflicts under contention. Based on the analysis, we advocate Eunomia, a design pattern for search trees which contains several principles to reduce HTM aborts, including splitting HTM regions with version-based concurrency control to reduce HTM working sets, partitioned data layout to reduce false conflicts, proactively detecting and avoiding true conflicts, and adaptive concurrency control. To validate their effectiveness, we apply such designs to construct a scalable concurrent B+Tree using HTM. Evaluation using key-value store benchmarks on a 20-core HTM-capable multi-core machine shows that Eunomia leads to 5X-11X speedup under high contention, while incurring small overhead under low contention.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018752", "http://news.cs.nyu.edu/~zhaoguo/pub/eunomia-ppopp17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bc556164bb67af25de02e602f271d88f4d7752e0", "sources": [ "DBLP" ], "title": "Eunomia: Scaling Concurrent Search Trees under Contention Using HTM", "venue": "PPOPP", "year": 2017 }, "bc5b82c41a0c8cdf9540ae2f57a351d98cb319bc": { "authors": [ { "ids": [ "37372815" ], "name": "Joe Carri\u00f3n" }, { "ids": [ "2133672" ], "name": "Daniel Franco" }, { "ids": [ "40122813" ], "name": "Veronica Gil Costa" }, { "ids": [ "1743311" ], "name": "Mauricio Mar\u00edn" }, { "ids": [ "1694881" ], "name": "Emilio Luque" } ], "doi": "10.1007/978-3-319-64203-1_46", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_46", "entities": [ "Routing", "Web search engine" ], "id": "bc5b82c41a0c8cdf9540ae2f57a351d98cb319bc", "inCitations": [], "journalName": "", "journalPages": "638-650", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bc5b82c41a0c8cdf9540ae2f57a351d98cb319bc", "sources": [ "DBLP" ], "title": "Improving the Network of Search Engine Services Through Application-Driven Routing", "venue": "Euro-Par", "year": 2017 }, "bc866300facbd7f28c294748a89d1505e9e1df1f": { "authors": [ { "ids": [ "2827531" ], "name": "Haidong Lan" }, { "ids": [ "3429925" ], "name": "Weiguo Liu" }, { "ids": [ "2916386" ], "name": "Yongchao Liu" }, { "ids": [ "38613433" ], "name": "Bertil Schmidt" } ], "doi": "10.1109/IPDPS.2017.42", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.42", "entities": [ "Algorithm", "C++", "Central processing unit", "Execution unit", "Graphics processing unit", "Heterogeneous computing", "Manycore processor", "Model of computation", "Multi-core processor", "Open-source software", "Parallel computing", "Performance Evaluation", "SIMD", "Sequence database", "Shattered World", "Smith\u2013Waterman algorithm" ], "id": "bc866300facbd7f28c294748a89d1505e9e1df1f", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "42-51", "journalVolume": "", "outCitations": [ "86cc3ad9ba843d729df2692d2b24038c029a1e89", "6cee78bbad9731a015d1084176add9afd3420b5c", "583a320a9c612124d62da5741fede120495126fc", "27a9635d723b08c543f4ed979b58ed56d1816965", "785ba684ef8219a51db12dce9fc587a1e619cd70", "649e858552ed8a289b94ae1f33846b1255b3f07d", "90732d2e1914688cfaa8ec6b0cca59ce5ad031a4", "1b7a288a22f580bbf5698e2ac26b289a26eaa180", "babb04bc6d22d4d9c6606d9ab02ad0bc6dc9c5f8", "ce39082a41392f1691f62ea1cc505119d6862c84", "042a1f231982b9618facdcb16bd7f91135a6fdfd", "a80b76991ea3c2bd09b63d31037f3d2c794c305e", "950bca8374bf36421957b416e4f58425e9d43095", "f9685e1cacb684ec988250924c35ca339c709948", "03a2edb24a141d529252d882dd5e15ab8351d247", "7d255de4859046e3c0ebae94d507e7670c21bf9d", "d0b9559b098b224008adc82ba2d0d86009ba807c", "668a3711456d466588ff7ca0a30d672807b0229d", "4bc3b6d26e404869a2d7db0b642423f51c8ce86a", "75c9230a3388480d146022a9ee41b8585e677a55", "139954109f31d2988cceeec1f8b382c3e6d38bbf", "230d4da76106248060201fe327eb56c8b5d52625", "28552ecf4eaedb3461edca97304b29082b02fbab", "4a875009283ad5f262eb828ab270e3af3c5b12b2", "40c5441aad96b366996e6af163ca9473a19bb9ad", "85bb2a3f3684334ba1e5ad6bc7795a0330cf5421", "1c15f69566af6198f336b961aecf418b5ccd07d3" ], "paperAbstract": "Computer architectures continue to develop rapidly towards massively parallel and heterogeneous systems. Thus, easily extensible yet highly efficient parallelization approaches for a variety of platforms are urgently needed. In this paper, we present SWhybrid, a hybrid computing framework for large-scale biological sequence database search on heterogeneous computing environments with multi-core or many-core processing units (PUs) based on the Smith- Waterman (SW) algorithm. To incorporate a diverse set of PUs such as combinations of CPUs, GPUs and Xeon Phis, we abstract them as SIMD vector execution units with different number of lanes. We propose a machine model, associated with a unified programming interface implemented in C++, to abstract underlying architectural differences. Performance evaluation reveals that SWhybrid (i) outperforms all other tested state-of-the-art tools on both homogeneous and heterogeneous computing platforms, (ii) achieves an efficiency of over 80% on all tested CPUs and GPUs and over 70% on Xeon Phis, and (iii) achieves utlization rates of over 80% on all tested heterogeneous platforms. Our results demonstrate that there is enough commonality between vector-like instructions across CPUs and GPUs that one can develop higher-level abstractions and still specialize with close-to-peak performance. SWhybrid is open-source software and freely available at https://github.com/turbo0628/swhybrid.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.42" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bc866300facbd7f28c294748a89d1505e9e1df1f", "sources": [ "DBLP" ], "title": "SWhybrid: A Hybrid-Parallel Framework for Large-Scale Protein Sequence Database Search", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "bcbfbf2777e6ee87df990b871d1cc7b44a3e998d": { "authors": [ { "ids": [ "1915826" ], "name": "Zhe Wang" }, { "ids": [ "1725501" ], "name": "Chenggang Wu" }, { "ids": [ "1777710" ], "name": "Jianjun Li" }, { "ids": [ "6793913" ], "name": "Yuanming Lai" }, { "ids": [ "1771551" ], "name": "Xiangyu Zhang" }, { "ids": [ "1741913" ], "name": "Wei-Chung Hsu" }, { "ids": [ "2342612" ], "name": "Yueqiang Cheng" } ], "doi": "10.1145/3050748.3050752", "doiUrl": "https://doi.org/10.1145/3050748.3050752", "entities": [ "Address space", "Address space layout randomization", "Adversary (cryptography)", "Binary code", "Code reuse", "Performance Evaluation", "Virtual machine", "Web server" ], "id": "bcbfbf2777e6ee87df990b871d1cc7b44a3e998d", "inCitations": [ "723931de6d91a965bc2fa24ac649291c9f1a4639", "325390173841d52f7a2791ba6b0e32ad80bf2630" ], "journalName": "", "journalPages": "143-156", "journalVolume": "", "outCitations": [ "223ebd7887c988563a916ace110481a02f71a4cd", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "09faa1cc5c8784d811502c5137bf63b5f1ac2934", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "9b2585f7248c8b5a22e9c816506e01060213ca85", "6458f4c0c029b038ebd1d7f61005a010ac250892", "8c39c5d022d066e708a1eb5cd41d5db3b64bfdfe", "348b0049b0c7b3f7e74b77cca30213cb7e550360", "e23298e18aa92ac43fa941d0f5eacb339905b685", "64544d30077a54ca97752f9ffd62c80e9038ddbb", "e2b5bc4dc4ea23fa798297d67694984084b9aba2", "2ba9c60483ce810cc001aa620598cda98001af7e", "05c49820bb35d0b8d7a2168a9124e506a0334b57", "acf32e644db8c3ac54834d294bba4cf46551480a", "377e1ea567fa79fae02a0b38a62916520ef81e2d", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "08c3e50a2913da51ed3cdafdcfdfb488e8fa83c3", "b58a85e46d365e47ce937ccc09d60fbcd0fc22d4", "48a8e9d8a41009eb6b7733b139eb5eff30d72776", "3001d2504e1dbc547d12d05f3ed1a671d125c4ce", "5aa4d6f28c803e5bd05d39794e12c759a60aa6a2", "74572d07252e2f0b60b16abb931c46e819e2b448", "53396c842bc8a94575470fab3acb4aef91c5073d", "01b5b648af61ddb382da638a299fae2315b25192", "2c067e092c35d71d23c09d9c09376aa5b684152c", "f0ac31c2248ef8eb597448395da6f79227ffe916", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "1de15306de89ab834561ef1cd187ec607c2a1b9e", "13e83680f0bc1ffb29b620945357ec832726ae90", "3013fc25ace9eca344cb936124a42171d72b95ec", "592be7266ac5e1a423703242a5f976bdf05627af", "412c0be5520e80ab6f8c3662477a28e3b9ccb943", "70e8eb54e20c504b9a3c52f5a5ea4d8fbb3d985c", "4d75cd2764c45baf46c72fddc5c676fdfce6f60e" ], "paperAbstract": "Recent code reuse attacks are able to circumvent various address space layout randomization (ASLR) techniques by exploiting memory disclosure vulnerabilities. To mitigate sophisticated code reuse attacks, we proposed a light-weight virtual machine, ReRanz, which deployed a novel continuous binary code re-randomization to mitigate memory disclosure oriented attacks. In order to meet security and performance goals, costly code randomization operations were outsourced to a separate process, called the \"shuffling process\". The shuffling process continuously flushed the old code and replaced it with a fine-grained randomized code variant. ReRanz repeated the process each time an adversary might obtain the information and upload a payload. Our performance evaluation shows that ReRanz Virtual Machine incurs a very low performance overhead. The security evaluation shows that ReRanz successfully protect the Nginx web server against the Blind-ROP attack.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050752" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bcbfbf2777e6ee87df990b871d1cc7b44a3e998d", "sources": [ "DBLP" ], "title": "ReRanz: A Light-Weight Virtual Machine to Mitigate Memory Disclosure Attacks", "venue": "VEE", "year": 2017 }, "bcc956ba33cd58f8256a64436f0dac646f0de3e2": { "authors": [ { "ids": [ "1780710" ], "name": "Hartwig Anzt" }, { "ids": [ "1708869" ], "name": "Jack J. Dongarra" }, { "ids": [ "8765791" ], "name": "Goran Flegar" }, { "ids": [ "1684436" ], "name": "Enrique S. Quintana-Ort\u00ed" } ], "doi": "10.1109/ICPP.2017.18", "doiUrl": "https://doi.org/10.1109/ICPP.2017.18", "entities": [ "CUDA", "Graphics processing unit", "Jacobi method", "LU decomposition", "Linear system", "Preconditioner", "Sparse matrix" ], "id": "bcc956ba33cd58f8256a64436f0dac646f0de3e2", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "91-100", "journalVolume": "", "outCitations": [ "1ef7f02bce931c8e9ef529e095b274132ce4011a", "64b3435826a94ddd269b330e6254579f3244f214", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "bb71f94e48503a0d110ccdcf6ac587b96e242dd8" ], "paperAbstract": "We present a set of new batched CUDA kernels for the LU factorization of a large collection of independent problems of different size, and the subsequent triangular solves. All kernels heavily exploit the registers of the graphics processing unit (GPU) in order to deliver high performance for small problems. The development of these kernels is motivated by the need for tackling this embarrasingly-parallel scenario in the context of block-Jacobi preconditioning that is relevant for the iterative solution of sparse linear systems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bcc956ba33cd58f8256a64436f0dac646f0de3e2", "sources": [ "DBLP" ], "title": "Variable-Size Batched LU for Small Matrices and Its Integration into Block-Jacobi Preconditioning", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "bcf2d9d2a382dd782a7ec26f1656ca5327781225": { "authors": [], "doi": "", "doiUrl": "", "entities": [ "Data center", "Telematics", "explanation" ], "id": "bcf2d9d2a382dd782a7ec26f1656ca5327781225", "inCitations": [ "9d6ac16d4f2bc78648c9b66995721b04ff2e7462", "cf50d827bd05a50a3d05e8538fea6026877caf4d", "65085beceabd9d4e6d30aef58ca7812b84ad787b", "dcbcfaf9ef12c29a5120fc2eff57ab0634de4016", "4937876603808427510230d9d7bdf15dfd686ebf", "b61bd7521b65077eac8381e71facc397060c60b0", "4f12e36e79485604932de3c9a3a0e22f2bf2e201", "1e5ae8e09f00ea8f53047b7313b9dab884e96043", "cbbf860f8065a3e1bd72a07d4cbac5f798065ca1", "6315d7f2619d9ec05846765d1283dee28bce9bad", "16b2f6152bae3005aa426b3cc45766a17126bc16", "00f191227ef407a92db0581c2f39918d3726d2d6", "1cfb0e14b5db871e6f88e4096fb6751d33741b1e", "38a96a0585e6d4c5f9fe5d326fd639bb289e69f8", "7352c0c3e4217188ebc73430d12d4ce240e35c79", "d7c141c9290998c002cf5db3bb95e9ae044a1f7b", "5715e1e106dae3a49a40314e3b05c142d38da6d9", "5088b1ca95e4b3e988d30dadac20e8436f4a0c4e", "0c8b6128488719a216b95d255d65de2a85c5bb07", "fa099d45212e0b0e6486b6bd378e6f4fd06c222d", "4853a26200889f033c0f509abf0f91d8cafba55b", "2de68ac42d7b54b3ef0596c18e3d4a2b3a274f72", "213b5f30cd84c80c1f53e46553fa221fdcc226dd" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "MacroBase is a new kind of data analysis engine designed to prioritize human attention in large-scale fast data streams. Given increasing data volumes that are increasingly too large for manual inspection, MacroBase highlights interesting behaviors in streams and produces explanations for these behaviors that can be used for tasks including diagnostics, alerting, and root cause analysis. MacroBase has already found interesting, previously unknown behaviors and trends in production data in domains including mobile telematics, datacenter operations, electrical utilities, and satellite imaging. The purpose of this document is to describe the architecture, interaction model, and ongoing research in the MacroBase project.", "pdfUrls": [ "http://futuredata.stanford.edu/private/mb-onepage.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/bcf2/d9d2a382dd782a7ec26f1656ca5327781225.pdf", "s2Url": "https://semanticscholar.org/paper/bcf2d9d2a382dd782a7ec26f1656ca5327781225", "sources": [], "title": "MacroBase: Prioritizing Attention in Fast Data", "venue": "", "year": 2017 }, "bd3c556c4b3bd55387bf7d9681594da9f16202bd": { "authors": [ { "ids": [ "21746902" ], "name": "Suthee Chaidaroon" }, { "ids": [ "3047254" ], "name": "Yi Fang" } ], "doi": "10.1145/3077136.3080816", "doiUrl": "https://doi.org/10.1145/3077136.3080816", "entities": [ "Artificial neural network", "Binary code", "Calculus of variations", "Deep learning", "Encoder", "Experiment", "Generative model", "Hash function", "Information retrieval", "Machine learning", "Nonlinear system", "Similarity search", "Simulation", "Supervised learning", "Text corpus", "Unsupervised learning" ], "id": "bd3c556c4b3bd55387bf7d9681594da9f16202bd", "inCitations": [], "journalName": "", "journalPages": "75-84", "journalVolume": "", "outCitations": [ "5909654507b9567e925358eeca505656a880ffc7", "58513e5043c8a8fb61dbe83ab58225e7f60575af", "15174d135305c7865b584ca2bbb725159f6efef4", "34f25a8704614163c4095b3ee2fc969b60de4698", "f04cdf0867d970bbfe5c5e2e4c41d6a6c92f06ce", "0cf6d01642ae8d0b415095c4b57e400775cfbee0", "141487cd6d32f6916bdcb029ac8159eba44e23de", "245414e768c3b8c8288ac0651604a36b1a44a446", "100c730003033151c0f78ed1aab23df3e9bd5283", "ea9d2a2b4ce11aaf85136840c65f3bc9c03ab649", "0f88de2ae3dc2ec1371d1e9f675b9670902b289f", "01f3290d6f3dee5978a53d9d2362f44daebc4008", "310b203a7754959df711056a617634bc10ed1d9a", "1839830486082578d2612e46a89e0e727ea1773a", "046a1302079f56b94c81457bf7fd21c3417a9f72", "1c799eca7983c62f7815ac5f41787b3e552567b6", "0ec905e3d2751674dcfde4f8d9882c88eb07a1ff", "03fcaa855332fdd11d5b9ac8f369aa904347d577", "1a0f660f70fd179003edc271694736baaa39dec4", "613f9c438d8ef24463a987403321f808d28d89a0", "149622ff56b7810ee8f79a135ced2f1d386443f9", "4fa2b00f78b2a73b63ad014f3951ec902b8b24ae", "10eb7bfa7687f498268bdf74b2f60020a151bdc6", "452f7411af7d471dd3ba84c2b06b2aaffc38cdb9", "44c977c18752d8913746efc7ea8635b0e4be4e47", "6184ddbe780cb934f036b04dd1d28226b6bcbcce", "877d083b2a3a75cc1bb25f770a9c5684bf5f6f44", "0f56311dad9f03083a4f4e791aab0b6e0aa2ff07", "063e5be439030fd0ba54a9636d101aa6b8bc5d2a", "066b18ccd263156403b805456eb556fad89040e3", "219b7b157f2a559ecdffe21c2a0edf5285931298", "7161eb8d3b1cb01769a36528f9c6bddd663545a9", "1379ad7fe27fa07419b7f6956af754bdb6d49558", "5b9534442f91a87022427b74bca9fd95dd045383", "272216c1f097706721096669d85b2843c23fa77d", "e0d2861a9022667a93a8a0573d44f238f7c3a027", "478815622d22d85b0ade98c59b6ac78c3fb1ac21", "3f1e54ed3bd801766e1897d53a9fc962524dd3c2", "040678daf6a49a88345ee0c680fccfd134f24d4b", "6de2b1058c5b717878cce4e7e50d3a372cc4aaa6" ], "paperAbstract": "As the amount of textual data has been rapidly increasing over the past decade, efficient similarity search methods have become a crucial component of large-scale information retrieval systems. A popular strategy is to represent original data samples by compact binary codes through hashing. A spectrum of machine learning methods have been utilized, but they often lack expressiveness and flexibility in modeling to learn effective representations. The recent advances of deep learning in a wide range of applications has demonstrated its capability to learn robust and powerful feature representations for complex data. Especially, deep generative models naturally combine the expressiveness of probabilistic generative models with the high capacity of deep neural networks, which is very suitable for text modeling. However, little work has leveraged the recent progress in deep learning for text hashing.\n In this paper, we propose a series of novel deep document generative models for text hashing. The first proposed model is unsupervised while the second one is supervised by utilizing document labels/tags for hashing. The third model further considers document-specific factors that affect the generation of words. The probabilistic generative formulation of the proposed models provides a principled framework for model extension, uncertainty estimation, simulation, and interpretability. Based on variational inference and reparameterization, the proposed models can be interpreted as encoder-decoder deep neural networks and thus they are capable of learning complex nonlinear distributed representations of the original documents. We conduct a comprehensive set of experiments on four public testbeds. The experimental results have demonstrated the effectiveness of the proposed supervised learning models for text hashing.", "pdfUrls": [ "https://export.arxiv.org/pdf/1708.03436", "http://arxiv.org/abs/1708.03436", "http://doi.acm.org/10.1145/3077136.3080816", "https://arxiv.org/pdf/1708.03436v1.pdf", "http://students.engr.scu.edu/~schaidar/paper/Variational_Deep_Hashing_for_Text_Documents.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bd3c556c4b3bd55387bf7d9681594da9f16202bd", "sources": [ "DBLP" ], "title": "Variational Deep Semantic Hashing for Text Documents", "venue": "SIGIR", "year": 2017 }, "bd542ddc4b094d5cdbeea9c5ccf57b6f2d4c7dce": { "authors": [ { "ids": [ "33994059" ], "name": "Milos Nikolic" }, { "ids": [ "2241445" ], "name": "Badrish Chandramouli" }, { "ids": [ "37070730" ], "name": "Jonathan Goldstein" } ], "doi": "10.1145/3035918.3035935", "doiUrl": "https://doi.org/10.1145/3035918.3035935", "entities": [ "Central processing unit", "Computation", "Contextual Query Language", "Data model", "Dynamic problem (algorithms)", "Incremental computing", "Internet of things", "Numerical analysis", "Online and offline", "Query language", "Signal processing" ], "id": "bd542ddc4b094d5cdbeea9c5ccf57b6f2d4c7dce", "inCitations": [ "38a96a0585e6d4c5f9fe5d326fd639bb289e69f8", "2127fddaf8bc57f74efd40d71c6cc364773063f1" ], "journalName": "", "journalPages": "95-108", "journalVolume": "", "outCitations": [ "441556574aef7df8520b9040516350d09edbc7de", "ac853fee48dd655536f1fab285d95427de34de6d", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "43715cdc52b75ffaaff701852deafc4736a89081", "d7a7c36223904b3030161ba04aa98ea1935b927d", "edd661ca12ef8ef988679f7b399f2f846ef01fbd", "52ef5f3c9dc4890d47af02aaa6bf43133ba5e6e1", "0558c94a094158ecd64f0d5014d3d9668054fb97", "e3482c42436fbc6c06397e53a966b4a7e344d2ba", "5208060771fd213eefd827e3e1260b939f1aed6d", "f9f2f902e2a7708e2e975cd5f53682e04e736785", "f8e9b050c93af6dea582563f61b6460b590bc3af", "0776c68524206020faa2651273d3ac80c6442a95", "5850ead375e41daceeca0efd7d02b7f6e70578c1", "7e7b6249b598d9a4c63394e3a2efd008268ae851", "0ef1dd03db41de69165075562a051021a186c230", "00c5d5189e29dba8f2729929476b739a5c35bc02", "e847c3ec130da57328db79a7fea794b07dbccdd9", "5ebcf0eebc36f9d355debb54816b81a9f4134673", "6da6570ee13c04c9294581c290a793290b01f5cf", "5de8c44c31696abbbda0a6664d2cf5b2586fd79a", "5c94454722d8d4fb43bfd4e8449211267ab1d086", "6df617304e9f1185694f11ca5cae5c27e868809b", "1b3301aa8df5e31fa12789684e5047e6305190e9", "439e9d67451d9d465601e704b78f159ab0fb4065", "35ffae4ccf5e7ac45162b4e50e6a7da71fc74bea", "d9c2586e599816defa98d04056fcb8e490d1eba4", "83a500fcc7cd98db063b73461277ac885c8fe7c3", "8af01e6cb7375ff671ed6efd8576253ab6e12d04" ], "paperAbstract": "Internet of Things applications analyze the data coming from large networks of sensor devices using relational and signal processing operations and running the same query logic over groups of sensor signals. To support such increasingly important scenarios, many data management systems integrate with numerical frameworks like R. Such solutions, however, incur significant performance penalties as relational data processing engines and numerical tools operate on fundamentally different data models with expensive inter-communication mechanisms. In addition, none of these solutions supports efficient real-time and incremental analysis.\n In this paper, we advocate a deep integration of signal processing operations and general-purpose query processors. We aim to reconcile the disparate data models and provide a common query language that allows users to seamlessly interleave tempo-relational and signal operations for both online and offline processing. Our approach is extensible and offers frameworks for quick and easy integration of user-defined operations while supporting incremental computation. Our system that deeply integrates relational and signal operations, called TRILLDSP, achieves up to two orders of magnitude better performance than popular loosely-coupled data management systems on grouped signal processing workflows.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035935", "http://www.cs.ox.ac.uk/files/9135/sigmod2017-trilldsp.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bd542ddc4b094d5cdbeea9c5ccf57b6f2d4c7dce", "sources": [ "DBLP" ], "title": "Enabling Signal Processing over Data Streams", "venue": "SIGMOD Conference", "year": 2017 }, "bd6fe117ca35a7ab144408be1771000feb57c7fb": { "authors": [ { "ids": [ "2942686" ], "name": "Ammar Ahmad Awan" }, { "ids": [ "1780048" ], "name": "Khaled Hamidouche" }, { "ids": [ "8798733" ], "name": "Jahanzeb Maqbool Hashmi" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1145/3018743.3018769", "doiUrl": "https://doi.org/10.1145/3018743.3018769", "entities": [ "Algorithm", "CUDA", "Computation", "Deep learning", "Graphics processing unit", "ImageNet", "Open MPI", "Parallel computing", "Requirement", "Scalability", "Speedup", "TensorFlow", "Torch" ], "id": "bd6fe117ca35a7ab144408be1771000feb57c7fb", "inCitations": [ "01d7b1187d8593983181d18c357ffbed9c6ac8ac", "d771ce5fefb6e853ab176a09204556ae663e682f", "dfa37e1ad351ae889502fb704cc93f4a77c1c642", "066dffdeef5d7fb5f9713d3e955cee8214ae107a", "a7e9f6c55c1118c9947c6ef63bddd11764b85d33", "0d561187be02ccf7905c0d2376796b5814e96a6c", "466f9f9c4a63c0fbc337637a1619e3411ea14c59", "bfcd8113ba5cca0389d8c6590b540da23825140d", "60bcb1502fe1509f268b2d08a349088c92048147", "44e11af4647304e2a1875d804a9546cbf6ac7810", "77d50eef395cf38d067902fcf0961820d2c9cf3a", "ec2b36375829f5f1886676e914abc6c2bc5830af", "7f93f814459f18008ab8521c447d024ffab5c6a9", "cb4fe9befc3f6e48335d4983537f05a000eab611" ], "journalName": "", "journalPages": "193-205", "journalVolume": "", "outCitations": [ "0b99d677883883584d9a328f6f2d54738363997a", "21ba757bf394720e0b66b86e7638ae28742d6570", "464d94b3dc9a109dd64008a41a00181830f285aa", "855d0f722d75cc56a66a00ede18ace96bafee6bd", "238c07107a2a4ff72a4f225d8aad8e34decfa716", "556035beb283652f1dcfeff7ae43851cd4abc85a", "80d800dfadbe2e6c7b2367d9229cc82912d55889", "5e83ab70d0cbc003471e87ec306d27d9c80ecb16", "3f1c1427b175140e7f725a155096a4e73c1b8509", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "5d90f06bb70a0a3dced62413346235c02b1aa086", "043afbd936c95d0e33c4a391365893bd4102f1a7", "38211dc39e41273c0007889202c69f841e02248a", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "60f22ad725f041fff81d6371242485bbe5c3ebb6", "99ef5dbd87c0796854e72acc9f52116cd8d79b46", "690b801233640abd014630765da76cffd9956495", "061356704ec86334dbbc073985375fe13cd39088", "73801d5bab1dd5cc2aaaf8855e4365a1a5d0d109", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "f978d481fae83e57202d26d4fbd38e330889ea75" ], "paperAbstract": "Availability of large data sets like ImageNet and massively parallel computation support in modern HPC devices like NVIDIA GPUs have fueled a renewed interest in Deep Learning (DL) algorithms. This has triggered the development of DL frameworks like Caffe, Torch, TensorFlow, and CNTK. However, most DL frameworks have been limited to a single node. In order to scale out DL frameworks and bring HPC capabilities to the DL arena, we propose, S-Caffe; a scalable and distributed Caffe adaptation for modern multi-GPU clusters. With an in-depth analysis of new requirements brought forward by the DL frameworks and limitations of current communication runtimes, we present a co-design of the Caffe framework and the MVAPICH2-GDR MPI runtime. Using the co-design methodology, we modify Caffe's workflow to maximize the overlap of computation and communication with multi-stage data propagation and gradient aggregation schemes. We bring DL-Awareness to the MPI runtime by proposing a hierarchical reduction design that benefits from CUDA-Aware features and provides up to a massive 133x speedup over OpenMPI and 2.6x speedup over MVAPICH2 for 160 GPUs. S-Caffe successfully scales up to 160 K-80 GPUs for GoogLeNet (ImageNet) with a speedup of 2.5x over 32 GPUs. To the best of our knowledge, this is the first framework that scales up to 160 GPUs. Furthermore, even for single node training, S-Caffe shows an improvement of 14\\% and 9\\% over Nvidia's optimized Caffe for 8 and 16 GPUs, respectively. In addition, S-Caffe achieves up to 1395 samples per second for the AlexNet model, which is comparable to the performance of Microsoft CNTK.", "pdfUrls": [ "http://nowlab.cse.ohio-state.edu/static/media/publications/slide/awan-ppopp17.pdf", "http://dl.acm.org/citation.cfm?id=3018769" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bd6fe117ca35a7ab144408be1771000feb57c7fb", "sources": [ "DBLP" ], "title": "S-Caffe: Co-designing MPI Runtimes and Caffe for Scalable Deep Learning on Modern GPU Clusters", "venue": "PPOPP", "year": 2017 }, "bd7e696aad0e73702eb69bb48c9168704ee7389d": { "authors": [ { "ids": [ "38591104" ], "name": "Yanbing Yang" }, { "ids": [ "26924116" ], "name": "Jiangtian Nie" }, { "ids": [ "34309103" ], "name": "Jun Luo" } ], "doi": "10.1145/3117811.3117836", "doiUrl": "https://doi.org/10.1145/3117811.3117836", "entities": [ "Collaborative filtering", "Flicker (screen)", "Grayscale", "Interference (communication)", "Modulation", "Throughput", "Transmitter", "Uncompressed video", "VLC media player" ], "id": "bd7e696aad0e73702eb69bb48c9168704ee7389d", "inCitations": [ "7e366a5b21eee04c521475af9baf21840a677f39", "5cc8de1a43374caec152d22428a4bc9287da14e3", "b6d29bf956bd775a435f24af99506e354d10dbcf" ], "journalName": "", "journalPages": "193-205", "journalVolume": "", "outCitations": [ "02f00b07581c316d21505bcdb1f65a8dac5a8ad8", "4f63d800e71820a4e49238b4a6b16b025be15604", "d49530c0ce4413051dbf4f9309f6a1afcd55123a", "ad5b50e69756f22593fcc727e9b9f2545ce36c14", "16595d321b257dd3c28bff95bdd3e42d6254aeca", "1e37384874c84acc7919176d4e9598e9116da2ee", "786b684d577ae57aa2fbc7d1fb0870ad86b998b5", "6f5292b2e53754be85467a6fb9e253bab1203cd9", "e5532c6958e3305adb328212fd2636968e6c966c", "c57a223aba6d88da95d63c9a93595a7c73256a3d", "a210f466c7e97d969401fb463307d35fa16287fb", "6d0829624336f2fc8afbb871ce41c1cfb727a674", "2518f58c9c87e02ab33992360266f89e4486ea9b", "5a735552c960badd66ba187fd392ddbb8449cf40", "b8eb869105e5ac673336ff7f604ecc7c458d2192", "59112fe91c9c0a138e7af2a8d71cb3c20b837c35", "1555d7e5b53042aa89c055447e4b93bda9c0b3ab", "11d3e3d021725b4713cc2117aa849a9a728980e2", "ae3739951d44556cb1298315f458b5e7f5610b88", "10c9be71608b774fceb6ee1be021dd675e6b054a", "70af91d46d6d0da18782a13f50504d87b51ce176", "853f8f0a643c679c6d096dd02fdb3922567abd86", "a71eb05b2470b7a44f8195bd5334ec38791cb62e", "126d5ce9dc0f1ab22e1ea3c8ee36aaa1f74d6837", "0c7c463f78fe38b3f17c1fe3c7b62f00e85381b1" ], "paperAbstract": "As a popular approach to implementing Visible Light Communication (VLC) on commercial-off-the-shelf devices, LED-Camera VLC has attracted substantial attention recently. While such systems initially used reflected light as the communication media, direct light becomes the dominant media for the purpose of combating interference. Nonetheless, the data rate achievable by direct light LED-Camera VLC systems has hit its bottleneck: the dimension of the transmitters. In order to further improve the performance, we revisit the reflected light approach and we innovate in converting the potentially destructive interferences into collaborative transmissions. Essentially, our ReflexCode system codes information by superposing light emissions from multiple transmitters. It combines traditional amplitude demodulation with slope detection to \"decode\" the grayscale modulated signal, and it tunes decoding thresholds dynamically depending on the spatial symbol distribution. In addition, ReflexCode re-engineers the balanced codes to avoid flicker from individual transmitters. We implement ReflexCode as two prototypes and demonstrate that it can achieve a throughput up to 3.2kb/s at a distance of 3m.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117836" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bd7e696aad0e73702eb69bb48c9168704ee7389d", "sources": [ "DBLP" ], "title": "ReflexCode: Coding with Superposed Reflection Light for LED-Camera Communication", "venue": "MobiCom", "year": 2017 }, "bdaa581a662ea2cfd497e70b13f74e6d91cfbd50": { "authors": [ { "ids": [ "1724234" ], "name": "Philipp Lengauer" }, { "ids": [ "1705571" ], "name": "Verena Bitto" }, { "ids": [ "1738841" ], "name": "Hanspeter M\u00f6ssenb\u00f6ck" }, { "ids": [ "3375002" ], "name": "Markus Weninger" } ], "doi": "10.1145/3030207.3030211", "doiUrl": "https://doi.org/10.1145/3030207.3030211", "entities": [ "Benchmark (computing)", "Compiler", "DACAPO", "Garbage collection (computer science)", "Java", "Scala", "Virtual machine" ], "id": "bdaa581a662ea2cfd497e70b13f74e6d91cfbd50", "inCitations": [ "3a97e00ac606a5da9ca78bcaf7fa9fd31685eb1c", "69e98dc73bbef9aa636078d750b7dc5237b21855" ], "journalName": "", "journalPages": "3-14", "journalVolume": "", "outCitations": [ "0f1042350e2c97117620d9f5182f94262f1f5ac0", "1eb273402e8441a8b6572bf5f15e71904584e4d4", "00a9ba0063d34ec56792849a67ef57b4601becbb", "dd9e9587977ae0453546f1583908bed2da9079be", "dfab353d2ed6d2eb07e5743a7f566ddb2b75a2f2", "08c7dac8a5e712dafa7ba325516faa4fee412791", "da37fd8cfb9ffe4044637aa59f723fa794409d27" ], "paperAbstract": "Benchmark suites are an indispensable part of scientific research to compare different approaches against each another. The diversity of benchmarks is an important asset to evaluate novel approaches for effectiveness and weaknesses. In this paper, we describe the memory characteristics and the GC behavior of commonly used Java benchmarks, i.e., the DaCapo benchmark suite, the DaCapo Scala benchmark suite and the SPECjvm2008 benchmark suite. The paper can serve as a useful guide to select benchmarks in accordance with desired application characteristics on modern virtual machines as well as with different compilers and garbage collectors. It also helps to put results that are based on these benchmarks into perspective. Additionally, we compare Java's current default collector to the G1 GC.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030211" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bdaa581a662ea2cfd497e70b13f74e6d91cfbd50", "sources": [ "DBLP" ], "title": "A Comprehensive Java Benchmark Study on Memory and Garbage Collection Behavior of DaCapo, DaCapo Scala, and SPECjvm2008", "venue": "ICPE", "year": 2017 }, "bdb94b816836f891495fe152de79ace1e3d14d9b": { "authors": [ { "ids": [ "16935406" ], "name": "Rafael Glater" }, { "ids": [ "1748698" ], "name": "Rodrygo L. T. Santos" }, { "ids": [ "1705422" ], "name": "Nivio Ziviani" } ], "doi": "10.1145/3077136.3080825", "doiUrl": "https://doi.org/10.1145/3077136.3080825", "entities": [ "Benchmark (computing)", "Entity", "Experiment", "Knowledge base", "Language Integrated Query", "Learning to rank", "Markov random field", "MinEd", "Named-entity recognition", "Natural language", "Robustness (computer science)", "Semantic query" ], "id": "bdb94b816836f891495fe152de79ace1e3d14d9b", "inCitations": [], "journalName": "", "journalPages": "485-494", "journalVolume": "", "outCitations": [ "7878cf925311291f53e45a2e0bdffe478b236682", "3eae360c6ee52950f27f577aedd5f9934a04e137", "607a834558b16c318be9c735bea048ae6638841d", "dea266419175e64b5b543bd3b9ef11b52243f4c8", "4b2db5a1a6f5289c19ab2d10a6bc787658ff893d", "6bee4f579e8bc07c00e854a04cf46e7ed7d73745", "1081a81d93c74d83c3fa0f7a78d3f987c117739f", "ed4c400e1a2a53bcbfb75ab2d07df131e67e7fdb", "4ceb34fe6d20d271048306128896eaa8256149ae", "b633205924265fa81d7d3ccabb37eee416a473a4", "e1412f4fc47fcca1d84b513d3afcbc25568fe76b", "04b35f36c2778b67f893177155c8e423419a464e", "288959d9f0aed8138541c9a66d9a41c5fc0949ad", "0f66ae3575cd750ec24fd89d5b5f6c17fcf40e22", "e6d408f935668672c64a9584fa614c24e72335a5", "00d37abb9ab0235c626d5eb1fd8bfc41092e8349", "69a613849e41c928ea4c6de046c46b413c371a7e", "021aac664fc1a2f5d3c4d833dec96289d77e2b5f", "1d81f37488a6c492d5dd2733d485c9b014e9eeb4", "31f884507b58ad4e03d96d90556f7f91bb10bd52", "6a640438a4e50fa31943462eeca716413891a773", "4752c1ee6ef02a122af7ecb60aa2f044f840e004", "3c8c375f111366a7db47df0a9faa314f98734044", "628d0fbc738d4707961426b9794f63966ddb71e0", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "9092bf4d0716ebd8c3ec931814aba2b84da9029d", "0c795bc9673a7ab9aac7717abaa4f9e58e6754ae", "695deca3e6d1bbc290a81294280e4517ef2bb4a0", "a22885072f39ad9900cbdaef6c59d15d75c3353b", "1fe3833a07b81cf262f093b65ce05e95e870ab35", "2d10744b3f6e1a8afd10abaca41c12f99cfffc6f", "0b6aab9ce7910938e0d60c0764dc1c09d3219b05", "021c1a63a103d320e42f7017a6f2b33abe28f4f4", "b78f9987638719b714a19202a33114bb46335a4d", "48a12a320ec4751096e9bb298bf16b4dfb29a8bf", "3a197ce85e35890c15de0bca2f1b405037129829", "8816995476050662c38a945d5fae3d67a7fc3bae" ], "paperAbstract": "Query understanding is a challenging task primarily due to the inherent ambiguity of natural language. A common strategy for improving the understanding of natural language queries is to annotate them with semantic information mined from a knowledge base. Nevertheless, queries with different intents may arguably benefit from specialized annotation strategies. For instance, some queries could be effectively annotated with a single entity or an entity attribute, others could be better represented by a list of entities of a single type or by entities of multiple distinct types, and others may be simply ambiguous. In this paper, we propose a framework for learning semantic query annotations suitable to the target intent of each individual query. Thorough experiments on a publicly available benchmark show that our proposed approach can significantly improve state-of-the-art intent-agnostic approaches based on Markov random fields and learning to rank. Our results further demonstrate the consistent effectiveness of our approach for queries of various target intents, lengths, and difficulty levels, as well as its robustness to noise in intent detection.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080825" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bdb94b816836f891495fe152de79ace1e3d14d9b", "sources": [ "DBLP" ], "title": "Intent-Aware Semantic Query Annotation", "venue": "SIGIR", "year": 2017 }, "be001fe8d3823490417e0866331dba500520f5eb": { "authors": [ { "ids": [ "2131120" ], "name": "Chun Yang" }, { "ids": [ "6293765" ], "name": "Xianhua Liu" }, { "ids": [ "39423013" ], "name": "Xu Cheng" } ], "doi": "10.1145/3050748.3050762", "doiUrl": "https://doi.org/10.1145/3050748.3050762", "entities": [ "Booting", "CPU cache", "Cache (computing)", "Data deduplication", "Disk image", "Fingerprint", "Hypervisor", "Online and offline", "Public key fingerprint", "Random access", "Read-write memory", "Run time (program lifecycle phase)", "Semiconductor consolidation", "Throughput", "Virtual disk", "z/VM" ], "id": "be001fe8d3823490417e0866331dba500520f5eb", "inCitations": [], "journalName": "", "journalPages": "214-227", "journalVolume": "", "outCitations": [ "26497a6800ece4c608b0fd4d42fab6ae3a9d7af6", "612a8604f26c32457f47e52aa4675fd5dab84c7c", "4e8839416133588c10cc56d6325db55a42fe2215", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "92a6961f076307d5b4778fd45d5f01f6d5d84e12", "d7df6cc3a35b4a5b9bc95aacdcad810288f11f9b", "bb90aa0bd362d615e3598f52504d06b20125512d", "0c279813f1dba545c50c237f69b89c6496117015", "7c0699937a1775a01ee8ec97ca30f5427f020b99", "86337138bb6dfabef8e1d45ec3c4e30d64c3ce36", "02cc5b5ad2d7ca9f83c9e566bcb5f9b608ab0619", "b522597f20f69526bb748a2b592d8db8e9983dac", "898b60ae12a855ac9ad91f93543d82ce00ee76ff", "67b2c85458667cb15c13beb66d1559f39637c145", "16444e411e3e33bf7cb3b813f76834fc3dd87d72", "bf6275801e4bac2918f1b8698c2892e1a375808f", "10d011c92b87833ac2186b963c0bf5e039cd6c7e", "0a9a3fa7b948f12555e0df34069cbd8c7e752cac", "4c664c7015285ce14063204d0790dffbb7bbf46c", "481086af0ac174dc0416bc7daf33100fab5c649b", "0ce479229630e55e732597cf9b2aeb5018aae4c2", "045729ec838ecc50be166fe4511506ac4a08226d", "3574657705475722b6c398c266805f758268778b", "18eadfc4a6bcffd6f1ca1d1534a54a3848442d46", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "3ab540622889dec61a3f98bf9990f62b80492dc0", "3437a7e23e3f97b58f4cf73e7e5b711131e6706c", "2918c9f078aa5dfa0a8ea08ba689aa7a45d9d21c", "07042865b10297ca4fc9164829d6330db2f60b4c", "445728ecb0eabed9f7433b0c96bd36d53cb312c9", "1506e49a71ffcc4d201928dbc76a881608c9c6c4" ], "paperAbstract": "Storage consolidation in a virtualized environment introduces numerous duplications in virtual disks and imposes considerable pressure on disk I/O and caching. In this paper, we present a content look-aside buffer (CLB) approach for simultaneously providing redundancy-free virtual disk I/O and caching. CLB attaches persistent fingerprints to virtual disk blocks, which enables detection of I/O redundancy before disk access. At run time, CLB exploits content pages already present in the guest disk caches to service the redundant reads through page sharing, thus eliminating both redundant I/O requests and redundant disk cache copies. For write requests, CLB uses a group invalidating writeback protocol for updating fingerprints to support crash consistency while minimizing disk write overhead. By implementing and evaluating a CLB prototype on KVM hypervisor, we demonstrate that CLB delivers considerably improved I/O performance with realistic workloads. Our CLB prototype improves the throughput of sequential and random read on duplicate data by 4.1x and 26.2x, respectively. For typical read-intensive workloads, such as booting VM and launching application, CLB's I/O deduplication and cache deduplication eliminates 94.9%--98.5% of read requests and saves 50%--100% cache memory in each VM, respectively. Compared with the QEMU's raw virtual disk format, CLB improves the per-disk VM density by 8x--16x. For mixed read-write workloads, the cost of on-line fingerprint updating offsets the read benefit; nevertheless, CLB substantially improves overall performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050762" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/be001fe8d3823490417e0866331dba500520f5eb", "sources": [ "DBLP" ], "title": "Content Look-Aside Buffer for Redundancy-Free Virtual Disk I/O and Caching", "venue": "VEE", "year": 2017 }, "be19f18007845e0b4d10a9b6f63acf67a8e7b70e": { "authors": [ { "ids": [ "5868759" ], "name": "Xueping Liang" }, { "ids": [ "1719516" ], "name": "Sachin Shetty" }, { "ids": [ "2314055" ], "name": "Deepak K. Tosh" }, { "ids": [ "1769233" ], "name": "Charles A. Kamhoua" }, { "ids": [ "1723424" ], "name": "Kevin A. Kwiat" }, { "ids": [ "8829863" ], "name": "Laurent Njilla" } ], "doi": "", "doiUrl": "", "entities": [ "Bitcoin", "Cloud computing", "Cloud storage", "Computer data storage", "Computer forensics", "Data validation", "Performance Evaluation", "Privacy", "Tamper resistance" ], "id": "be19f18007845e0b4d10a9b6f63acf67a8e7b70e", "inCitations": [ "fe21d2d731678a5c05f95c9b762336d3d47569e5", "53f8c18473cb5b091f5a591943ba10b8076dab63", "1752b34d9e8fd906052a59022e9cad5af37dee73", "5aa42fa97a4fa12e52410cdb396c48fd464cd91d", "1a0fdc54e7ac5572275eb466fdc621b0d65d9740", "48eff5ee66cf0ad553c07763192b31bb747a306f", "832de0959659a7dd6bd88ccfe074225cc1e27645", "21d9d7178d374b67cd4f605db1c8007b42a70ffc", "5cada1c7108bdf2066ec6eab30e6dd537dedd769", "11f2a7da97ddc0c45b63c2b9a5b08e287c779381", "f317a3f472c491d04e53845d43234e138450a522", "cf1dcc9d8ae1655b1717c531c7d74d4b2e853750", "5c9b76e66cca05a412e28e5b439b85963413e970", "f3548c62a1c8b39d3379e5b230c62de2740a1731", "7f6a13251f59f853d09bf2b0be2a5dfe65952e80", "e5f35fcd6fb2b8a3f1eea82163a241b02603a48e", "10b9d7206f16f27408e9e3472de576b12c6ea464" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "468-477", "journalVolume": "", "outCitations": [ "d827a9dfd11722249a375aa478ef2c849079f94c", "11f2a7da97ddc0c45b63c2b9a5b08e287c779381", "9bb0aa7c062a1ac3df0a73d1e7caa88937e9716e", "0a3239ae38ff62c658a962d3a8cb9d1f76c02f6f", "26cce0e0da87774f939c541277d7c636ebc485fb", "5670d010e3dc07e67832647f6dddffa9dd294954", "a30aec693a9956652ff348e740c5f326d68c625d", "73dffadfc50ebfc3d8e426ea9c05d06708f612ff", "1f42fdecd70a7d72f0f108e80511320f7204316c", "2809d4876e34b8c64fc1783fe6a0a278770505b0", "ad4bdf06d1259f4b495825bbd1458fe8f9804254", "2edfc6f13d38f0b3a00e8ad3e166cd038f64f7ed", "3f874a8f4091da96a1509e64253fefd96c7acb00", "35339f6f2e99c04920f21883df1db8004436cdc7", "01ecd06d16b9ee6afef08aff9b0e2448222b097c", "48b1a1b0db3dafbb4a82c2cdf8e1580dc73dfa1f", "49cd5a7561acfb7807a1f962458fbada433abb4d", "9dbdbf81e86214a06643522b2ab9c3daea4fec04", "25a17e483599215949cb3961fd945f6867d3bcae", "1a466f3195490a0f5325c994f4ba14e02eae55b9", "447442bfe3854de9a2bab439caf48867f41c9adf", "55339bb4044d80b97add783a97e02ac38c149cbc" ], "paperAbstract": "Cloud data provenance is metadata that records the history of the creation and operations performed on a cloud data object. Secure data provenance is crucial for data accountability, forensics and privacy. In this paper, we propose a decentralized and trusted cloud data provenance architecture using blockchain technology. Blockchain-based data provenance can provide tamper-proof records, enable the transparency of data accountability in the cloud, and help to enhance the privacy and availability of the provenance data. We make use of the cloud storage scenario and choose the cloud file as a data unit to detect user operations for collecting provenance data. We design and implement ProvChain, an architecture to collect and verify cloud data provenance, by embedding the provenance data into blockchain transactions. ProvChain operates mainly in three phases: (1) provenance data collection, (2) provenance data storage, and (3) provenance data validation. Results from performance evaluation demonstrate that ProvChain provides security features including tamper-proof provenance, user privacy and reliability with low overhead for the cloud storage applications.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101176" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/be19f18007845e0b4d10a9b6f63acf67a8e7b70e", "sources": [ "DBLP" ], "title": "ProvChain: A Blockchain-Based Data Provenance Architecture in Cloud Environment with Enhanced Privacy and Availability", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "be1a2c8c588b1dd4f8ae6c6358ea4ce6a3a63bdd": { "authors": [ { "ids": [ "2617391" ], "name": "Elaine Angelino" }, { "ids": [ "10714706" ], "name": "Nicholas Larus-Stone" }, { "ids": [ "3416346" ], "name": "Daniel Alabi" }, { "ids": [ "1745942" ], "name": "Margo I. Seltzer" }, { "ids": [ "1756737" ], "name": "Cynthia Rudin" } ], "doi": "10.1145/3097983.3098047", "doiUrl": "https://doi.org/10.1145/3097983.3098047", "entities": [ "Algorithm", "Data structure", "Decision tree", "Decision tree learning", "Discrete optimization", "Feature vector", "Optimization problem", "Program optimization", "Speedup" ], "id": "be1a2c8c588b1dd4f8ae6c6358ea4ce6a3a63bdd", "inCitations": [ "518f6e4666a09443f3c0765e9285dd8ac77a1f7e", "f01a3e0b1926e17df8fa262e52eee6b0caa9f916", "bc373f9540eb1d1ec81092d0ff1428e837ccb6dc", "382f1ebe6009e580949d5513bc298cb253a1eeda", "d2adc4478fed3dd7dfa3b95d5e9d66452f54110a", "a9007cf4c4ec7e4fc956bead7008a3605451de49" ], "journalName": "", "journalPages": "35-44", "journalVolume": "", "outCitations": [ "2a89a39e18991ea9bd2ed064f794565aff3dbdfd", "e3ea7765c841a1f5c9856be718fb89e890ed93cf", "e96dc1d785ffd64e12ccf025de7a5e4f277d0cc9", "e59066910963137c75d0a91f32ee010e031ae073", "23d6818a67bcbfc82ff57f2b1727aa451149b212", "5931f8d15ce705b45f10e84d08f6c91584861d8f", "497449e075c9a9bd82b66c24e16cd9b0d0fddc66", "25d7d975a12fd657b4743acd262cbdfe2dc2e6e9", "c89c34b078c018fc5f1ac06169acba673a71c843", "08e9c9e2eca4fb1fb53c596a5024d6740b2ee667", "6ad20b53f699712232cf499bd6c5d9d08a933214", "c1453c1dda5764d98c089282a0d61bfe283d2b50", "bb2e1487aa5ce18a9a9327e67d91397700e59ca6", "236fd719c3e099da592590260cdb6439ea91ab8d", "e766b55ea117b080eac4d23c150e99424cea9399", "24ab513a04791708b56699f253b5d3315cc7fb4e", "399812d46345ad7d93f5510b1bbda30948e7a65c", "14d64f0b81ba78ccfaca6a6a587a5d2569d6dbd6", "4d9f2dc29c9df030faee185035e2c073ffbf5378", "917fee5ffe7957806b5e2148c08dae3641eb9003", "8a0f1757c9819dce17445320e6503f0958fe708d", "000cd39edecfc04f42ebf9ad58b6ddce8c979855", "7c12f430bc0d350e234a5f20f2815ca070913f5f", "703667f02e8c7738c383d15e2afd8ce5fc77c281", "2e1dab46b0547f4a08adf8d4dfffc9e8cd6b0054", "00e752adb3c2e3715c6f2c37756d75d1e9678877", "3c2f707710197be72738ce5353d0e2cb0d238397", "0a5cd9d48d8b98f45a0fba7ea9dfca05bf05a497", "321e6f0a75aa4c9713c1466440e481db84217228", "e8f0196b2b427809af36252fa183637df2903894", "ce0b8b6fca7dc089548cc2e9aaac3bae82bb19da", "38171ef0443ef60c78a861838eccd24f004c22b2" ], "paperAbstract": "We present the design and implementation of a custom discrete optimization technique for building rule lists over a categorical feature space. Our algorithm provides the optimal solution, with a certificate of optimality. By leveraging algorithmic bounds, efficient data structures, and computational reuse, we achieve several orders of magnitude speedup in time and a massive reduction of memory consumption. We demonstrate that our approach produces optimal rule lists on practical problems in seconds. This framework is a novel alternative to CART and other decision tree methods.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098047" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/be1a2c8c588b1dd4f8ae6c6358ea4ce6a3a63bdd", "sources": [ "DBLP" ], "title": "Learning Certifiably Optimal Rule Lists", "venue": "KDD", "year": 2017 }, "be6e5629fcd1cef23c5541bcb3570fde326db4bd": { "authors": [ { "ids": [ "33396960" ], "name": "Ted Kaminski" }, { "ids": [ "2777290" ], "name": "Eric Van Wyk" } ], "doi": "10.1145/3136014.3136023", "doiUrl": "https://doi.org/10.1145/3136014.3136023", "entities": [ "Attribute grammar", "Coherence (physics)", "Compiler", "Context-free grammar", "Correctness (computer science)", "Extensible programming", "Interference (communication)", "Non-interference (security)", "Parsing", "Programming language", "Terminate (software)", "Termination analysis" ], "id": "be6e5629fcd1cef23c5541bcb3570fde326db4bd", "inCitations": [ "20a71681be001a27b35389d028e0edd97007d70f" ], "journalName": "", "journalPages": "163-174", "journalVolume": "", "outCitations": [ "132055e61e9509fbd0e0d8892229c1dba38d6a84", "81ca7ac75b1ccdee0bd8a9e93b7d79bdc17a31a9", "3ed5235695f821c36d2264bb60a607cd163d2292", "45f9a0ceba5d4aaa56525fa7beb4a9537bfe011c", "d18e91ddfd00b2a04cdbbf800f25b3ce12e1c982", "0ba027d8f0e2ac77a57a02fb950da68795e9cf26", "60dbcfaccba3a8ae7653d6dbe8e4148fa19609cc", "12a449686a13b2b1c0ecfe4492a76b4ae0fe36ef", "45414af0f8ea5472660da0d6c65fc3a5a927b974", "ea0ae2de694ba6c7243c00c0216856936aa46d38", "457e62e93d81b1aee73e543f1bc19b5fb4ca1416", "403e17c145103919a9d8248d6bbdd8ef3516777f", "0954212d0d60a1053de84760d96df2f5dea6c208", "72e40e4bef8906055fadc245c4773d520334f7f6", "e9399861426542b884f6324aea19a8981b6f9327", "0b61a17906637ece5a9c5e7e3e6de93378209706", "7ef221d33b50067333a24076f17a3186847d97bc", "2f176f377081306c05e54db5a33270cb5cb1a1e3", "0de8645ab6f294b332afba2f8badec0f5c172f03", "8c0f865cba4d4e298c623252dad8ac9cd77e50b1", "05f0c383c785f168da8e80c903517ec5fdf71d41", "0b6ecc7aa29ec0c7439e9000cb42ba7c59a3e3a3", "79fee024198661361f92362d55b5ca58899b78ab", "032109815c2c824b2f8fa4e712c4207a610ae833" ], "paperAbstract": "Extensible language frameworks aim to allow independently-developed language extensions to be easily added to a host programming language. It should not require being a compiler expert, and the resulting compiler should \"just work\" as expected. Previous work has shown how specifications for parsing (based on context free grammars) and for semantic analysis (based on attribute grammars) can be automatically and reliably composed, ensuring that the resulting compiler does not terminate abnormally. \n However, this work does not ensure that a property proven to hold for a language (or extended language) still holds when another extension is added, a problem we call interference. We present a solution to this problem using of a logical notion of coherence. We show that a useful class of language extensions, implemented as attribute grammars, preserve all coherent properties. If we also restrict extensions to only making use of coherent properties in establishing their correctness, then the correctness properties of each extension will hold when composed with other extensions. As a result, there can be no interference: each extension behaves as specified.", "pdfUrls": [ "http://www-users.cs.umn.edu/~evw/pubs/kaminski17sle/kaminski17sle.pdf", "http://doi.acm.org/10.1145/3136014.3136023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/be6e5629fcd1cef23c5541bcb3570fde326db4bd", "sources": [ "DBLP" ], "title": "Ensuring non-interference of composable language extensions", "venue": "SLE", "year": 2017 }, "be9472665dfdad9c5a499b39648ddd5a0020e4c3": { "authors": [ { "ids": [ "1736568" ], "name": "Pengfei Wang" }, { "ids": [ "2274395" ], "name": "Yanjie Fu" }, { "ids": [ "2045628" ], "name": "Guannan Liu" }, { "ids": [ "40506516" ], "name": "Wenqing Hu" }, { "ids": [ "1682418" ], "name": "Charu C. Aggarwal" } ], "doi": "10.1145/3097983.3098067", "doiUrl": "https://doi.org/10.1145/3097983.3098067", "entities": [ "Algorithm", "Electron mobility", "Experiment", "Photoelectric effect", "Synthetic data", "Time of arrival", "Topic model" ], "id": "be9472665dfdad9c5a499b39648ddd5a0020e4c3", "inCitations": [ "1d7532735001e8997149681d0212e4198e4553ce", "7cbbdc878eb1e0c82372c5a0d5659d4e1c0b7b2a" ], "journalName": "", "journalPages": "495-503", "journalVolume": "", "outCitations": [ "4fa90eb3b64c9efd0ced198add08b9dc543ddbbf", "12e04dad4c983662d21ec499ef45fc5761c4ea75", "2bbbc937de355cc2971433d5c67cd984d5472fe2", "58a8bead87c8c1e37460dce28285c053c270f6e7", "1609d8b48fb59d6d7978ee8f05f5915b027dc5c3", "6cba1fcd86225dfd1358144c9b3782a369fffa4a", "850b234faaecb8c3ffb1b37e5fdb18843183d220", "537328f97adf58eb3ca05c2ca8fea8ab5d7bc268", "5ee3a3b79e089cf007847893b3a1113fbe3cb637", "3ae0b1ce788a195ae5639360213ae3ad2d464229", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "372d1ee4d726e9b537ea55f3aa996f8f0eede409", "ac1526a259a94cd50980e4b4032f8f9877a8d090", "017372aec4b163ed6300499d40e316d2a0a7a9dd", "651ae94750c28c57ea6b3f5f2b0fedc83236f1d9", "119a67a62b0e9351e4348d234d9eae4c84f366a3", "496d47f6924382aee6658cb8260e3a4b09ae3f9e", "93bac5972cadef8f0cc63d40a058e029354dbc40", "01e6c986e4fa35f604b7c7b701f0bf682d4103be", "e0e97cad8336d0ef2e50a2e8cd300af6295eb83f", "261d3ea571a9114d64764e3fa1d4a4ca4df4294c", "3ec9970b395a6c4b91eefa6654bd21d22074385c", "246d1ec848c0e6879e0cdf2993eeefe48ca48029", "9fb10a3c2dcec939784ce208e0e7e7fda4be895c", "c08206b44dd1f0ea54bd073e4effaf2e4483169b", "84455d00e44b8e924327f2b15e1e41a7dcc698f0", "29028648260ded3ada220854835e215ed30a81d1", "a0e8752d5f83e721ee74ab25248b83d958c9e80c" ], "paperAbstract": "While exploring human mobility can benefit many applications such as smart transportation, city planning, and urban economics, there are two key questions that need to be answered: (i) What is the nature of the spatial diffusion of human mobility across regions with different urban functions? (ii) How to spot and trace the trip purposes of human mobility trajectories? To answer these questions, we study large-scale and city-wide taxi trajectories; and furtherly organize them as arrival sequences according to the chronological arrival time. We figure out an important property across different regions from the arrival sequences, namely human mobility synchronization effect, which can be exploited to explain the phenomenon that two regions have similar arrival patterns in particular time periods if they share similar urban functions. In addition, the arrival sequences are mixed by arrival events with distinct trip purposes, which can be revealed by the regional environment of both the origins and destinations. To that end, in this paper, we develop a joint model that integrates Mixture of Hawkes Process (MHP) with a hierarchical topic model to capture the arrival sequences with mixed trip purposes. Essentially, the human mobility synchronization effect is encoded as a synchronization rate in the MHP; while the regional environment is modeled by introducing latent Trip Purpose and POI Topic to generate the Point of Interests (POIs) in the regions. Moreover, we provide an effective inference algorithm for parameter learning. Finally, we conduct intensive experiments on synthetic data and real-world data, and the experimental results have demonstrated the effectiveness of the proposed model.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098067" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/be9472665dfdad9c5a499b39648ddd5a0020e4c3", "sources": [ "DBLP" ], "title": "Human Mobility Synchronization and Trip Purpose Detection with Mixture of Hawkes Processes", "venue": "KDD", "year": 2017 }, "bea3d5312f7c91cc6239c0bf169b2e59b7e7cb23": { "authors": [ { "ids": [ "38001371" ], "name": "Alejandro Villegas" }, { "ids": [ "1768998" ], "name": "Rafael Asenjo" }, { "ids": [ "1751660" ], "name": "Angeles G. Navarro" }, { "ids": [ "3190187" ], "name": "Oscar G. Plata" }, { "ids": [ "1791987" ], "name": "Rafael Ubal" }, { "ids": [ "1771736" ], "name": "David R. Kaeli" } ], "doi": "10.1007/978-3-319-64203-1_20", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_20", "entities": [ "Graphics processing unit", "Scratchpad memory" ], "id": "bea3d5312f7c91cc6239c0bf169b2e59b7e7cb23", "inCitations": [ "c9b78500940c5d80630f7299af19e602eb90e093" ], "journalName": "", "journalPages": "273-286", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bea3d5312f7c91cc6239c0bf169b2e59b7e7cb23", "sources": [ "DBLP" ], "title": "Hardware Support for Scratchpad Memory Transactions on GPU Architectures", "venue": "Euro-Par", "year": 2017 }, "beaaf1cad62e5b3b9a6692935902ee2b3004d203": { "authors": [ { "ids": [ "1734704" ], "name": "Jingjing Wang" }, { "ids": [ "32434326" ], "name": "Tobin Baker" }, { "ids": [ "1718134" ], "name": "Magdalena Balazinska" }, { "ids": [ "36823652" ], "name": "Daniel Halperin" }, { "ids": [ "40604484" ], "name": "Brandon Haynes" }, { "ids": [ "1686294" ], "name": "Bill Howe" }, { "ids": [ "40035295" ], "name": "Dylan Hutchison" }, { "ids": [ "2278389" ], "name": "Shrainik Jain" }, { "ids": [ "40081201" ], "name": "Ryan Maas" }, { "ids": [ "35207951" ], "name": "Parmita Mehta" }, { "ids": [ "1714104" ], "name": "Dominik Moritz" }, { "ids": [ "40281316" ], "name": "Brandon Myers" }, { "ids": [ "40382476" ], "name": "Jennifer Ortiz" }, { "ids": [ "9246931" ], "name": "Dan Suciu" }, { "ids": [ "20825910" ], "name": "Andrew Whitaker" }, { "ids": [ "1694822" ], "name": "Shengliang Xu" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Cloud computing", "Data science" ], "id": "beaaf1cad62e5b3b9a6692935902ee2b3004d203", "inCitations": [ "1156373cdb17608780bd2c00fff26bcbeeb2189c", "75d24bcc3dc141d1b427b9f13a9bc540e83486cc", "14609528f7adb5a43bf75338308b823cc2a68335", "528bdbe171ca7ed4d0ec722a3fb773610e250788", "532012a0c0fc83b672966cc15d01e2c88113eb91", "60124fda34f3d178c23f7aee9666b9e29fe52a3c", "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f", "0c02f8b36629b43e3825ab856b0a09e188e7355a" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "cc1cabae3da49e4710f3f7e7b0f9cac778ebad4a", "00a2aeb87287d59eaf118e96f0cdbb622a7fec42", "28475bf14885d62951c060197191c0db99c8e24e", "676e50a4d2141ae66a0d2aafcf79c8c989fcce33", "7ec028ace29244cb74c105327a7e4177a34aa6bd", "52d81096f46be0e75f85e0b7eeda65640c281630", "880cbaa9efb31d6c0834fe9565b0160896c77ca2", "26bfd898fcb0cd817a178ebfa06b38abc6c0c516", "e47868841d87efe261451a43b00d6c81cf7fb7a3", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "51afd8e73b564fafc0a593984f8bf5aad2829642", "0558c94a094158ecd64f0d5014d3d9668054fb97", "44013d764279255f9ab407d7131b555827cd156e", "5a624296884f47ccd1fabae874e3eefcb889e58c", "5f6d5608cf1b3071c938a2271637fc555bf53231", "27174cf4ffd83e2044549df0f2872608b73a6ef6", "e87cba48ff23979930a8b13ec5bedb3283d1f629", "3247ceba48dd8d8f8dc4a7d4fb5b9940db7d06af", "7072ef929fa3d7c3177ca868309523c588a67bd1", "218166220acbb4951c1b3e5e568ac92fb9b55b8b", "ad78c197e040e2814431c72eacc1bfe694ad84f3", "370e1fcea7074072fe5946d3e728affd582a9a44", "0c3daa226670aa66ba4816c5f51d6f58ac94c41c", "0706356c9ab6014d6b04577d38289ea8328291a5", "5b8b0ca444c9efffb82d221ac01197730ebf58e6", "1835e1b7a3a8a59a49edcd4e0144df0c5b73b812", "72ec19ead4007e130786bde139c56c980f5466c5", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "2cfbdfcf3f590cbd7a6c9c4299eb42569e77697c", "043cf6a9e18cb499da540ffd58b37086158ffd68", "18c021c9cce95ed5615a060f590b8388b604e7c5", "084d297399d6bd9bad1f090933f261d858a31b88", "1476456265553d489ec85c6f3c78a092ee8ee681", "2997435fe9f0e646e6a37d9783b520b9cdbdd38b", "190727a716f6529d1d651d73740555eeb19b1196", "f51e94244f9207a73df03eda0630d44ec3691216", "0e23117148029fbef47d1eed869c7952546e53aa", "49a31ab5bd237c4645401981669ec3f833f3894f", "7b67164efedc43b63f8957aaf6f573c09640ed0d", "4ca383c0ee2bc5ca13a7884faab2e9c0583e35a4", "12d355e2ba7d002c5a4c1c67af417d2428dbb547", "7a75c886b043e7c3f77829412774de27648f384a" ], "paperAbstract": "In this paper, we present an overview of the Myria stack for big data management and analytics that we developed in the database group at the University of Washington and that we have been operating as a cloud service aimed at domain scientists around the UW campus. We highlight Myria\u2019s key design choices and innovations and report on our experience with using Myria for various data science use-cases.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p37-wang-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/beaa/f1cad62e5b3b9a6692935902ee2b3004d203.pdf", "s2Url": "https://semanticscholar.org/paper/beaaf1cad62e5b3b9a6692935902ee2b3004d203", "sources": [ "DBLP" ], "title": "The Myria Big Data Management and Analytics System and Cloud Services", "venue": "CIDR", "year": 2017 }, "beb1f6329e6f282115b5aabc026092dd30d5a613": { "authors": [ { "ids": [ "33411132" ], "name": "Kurt Thomas" }, { "ids": [ "13154862" ], "name": "Frank Li" }, { "ids": [ "1934617" ], "name": "Ali Zand" }, { "ids": [ "38350169" ], "name": "Jacob Barrett" }, { "ids": [ "3027501" ], "name": "Juri Ranieri" }, { "ids": [ "1829941" ], "name": "Luca Invernizzi" }, { "ids": [ "6439071" ], "name": "Yarik Markov" }, { "ids": [ "3447508" ], "name": "Oxana Comanescu" }, { "ids": [ "2494273" ], "name": "Vijay Eranti" }, { "ids": [ "2950347" ], "name": "Angelique Moscicki" }, { "ids": [ "5335709" ], "name": "Dan Margolis" }, { "ids": [ "1744800" ], "name": "Vern Paxson" }, { "ids": [ "1687723" ], "name": "Elie Bursztein" } ], "doi": "10.1145/3133956.3134067", "doiUrl": "https://doi.org/10.1145/3133956.3134067", "entities": [ "Authentication", "Black hat", "Credential", "Data breach", "E-services", "Ecosystem", "Email", "Google Account", "Keystroke logging", "Malware", "Online identity", "Password", "Phishing", "Risk IT", "Trust metric", "Underground", "User (computing)" ], "id": "beb1f6329e6f282115b5aabc026092dd30d5a613", "inCitations": [ "0b196550a65c6ebb17104cdc631286bc741baf18", "35ade5c894d5c859d72736f4e88124a3946235b6", "7cfcce2a3731e6f359b67e2683b4e37572f17dbb" ], "journalName": "", "journalPages": "1421-1434", "journalVolume": "", "outCitations": [ "04645e17a1acb783a2ffb2b9b201624c76d52ae2", "254687dec6e5456fa289826da6558186ef2cc24f", "5d3884cc6b46f0011371a6fb5c35a370a8c713c5", "6b7dee7321564106e889e57371af620e3b5f5796", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "4681a0116597fd0804b07e8176b8761e4f569743", "3374241a54e2b1d4809e7957234ee22e7a112641", "a8b16f43341adc956022bca26d3727ab8148b857", "2e265d8c1e0ae2740cd7bd386d11c3d6f1ba1ca7", "418e058c0dd22b18994ebdba8bd4713bf92588f7", "2cfbb7b89a5e220b21bbf64161dc880c1b644017", "da2695f7ba0b56feccd9f4c3c2bad61c9881921a", "7a2af54b21beb30388e109974042ee10b8d0591e", "db6e3e81788352258fbecdfd0e303cada27aab08", "55cf4d3b6b05d9bc5dbbbb0bfd5072b07a697437", "247f76030645a3ae28e23e249b23d7fd08b859df", "1558b1d92f41eb01c49ba9c548a9a5adfb1aebae", "6f5d94254c255a3296c1043549c7bc96e6373540", "53852d69c008f9ebfb05939b4eb7c1f3279437e6", "d7c97b150edaf75f8ee9a20246e54eff353d9a26", "4716cca7c5c936d4db5d0faa62673997e3bdff3c", "9db1ca86b92cb5e0a21263de77e3e266b71637af", "05d142c685b545c6426a0ea36b1c23dd4a59ed1c", "b4be4404ae4a102a27ca977f1ac538feb27f375e", "0284bb540148abf8c8e0fe4a916d9b2935e0942f", "353a9b91c8d6ee3cf9056ae81875d017590cc10a" ], "paperAbstract": "In this paper, we present the first longitudinal measurement study of the underground ecosystem fueling credential theft and assess the risk it poses to millions of users. Over the course of March, 2016--March, 2017, we identify 788,000 potential victims of off-the-shelf keyloggers; 12.4 million potential victims of phishing kits; and 1.9 billion usernames and passwords exposed via data breaches and traded on blackmarket forums. Using this dataset, we explore to what degree the stolen passwords---which originate from thousands of online services---enable an attacker to obtain a victim's valid email credentials---and thus complete control of their online identity due to transitive trust. Drawing upon Google as a case study, we find 7--25% of exposed passwords match a victim's Google account. For these accounts, we show how hardening authentication mechanisms to include additional risk signals such as a user's historical geolocations and device profiles helps to mitigate the risk of hijacking. Beyond these risk metrics, we delve into the global reach of the miscreants involved in credential theft and the blackhat tools they rely on. We observe a remarkable lack of external pressure on bad actors, with phishing kit playbooks and keylogger capabilities remaining largely unchanged since the mid-2000s.", "pdfUrls": [ "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46437.pdf", "https://acmccs.github.io/papers/p1421-thomasAembCC.pdf", "http://doi.acm.org/10.1145/3133956.3134067" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/beb1f6329e6f282115b5aabc026092dd30d5a613", "sources": [ "DBLP" ], "title": "Data Breaches, Phishing, or Malware?: Understanding the Risks of Stolen Credentials", "venue": "CCS", "year": 2017 }, "bf006dc952ff01f6daaa87933bb812859127ed1c": { "authors": [ { "ids": [ "3169583" ], "name": "Aurojit Panda" }, { "ids": [ "1702872" ], "name": "Shmuel Sagiv" }, { "ids": [ "1753148" ], "name": "Scott Shenker" } ], "doi": "10.1145/3102980.3102986", "doiUrl": "https://doi.org/10.1145/3102980.3102986", "entities": [ "Correctness (computer science)", "Formal methods", "Microservices", "Remote procedure call", "Run time (program lifecycle phase)", "Scalability" ], "id": "bf006dc952ff01f6daaa87933bb812859127ed1c", "inCitations": [ "8f42dedd1ae7468a7c7299cefe235a74c75f7d2c", "16090ea0f2aabd3d890e2eafaf461c39a872b766", "5ef00014862c26bada8b8a084400256d9e30f469" ], "journalName": "", "journalPages": "30-36", "journalVolume": "", "outCitations": [ "42d1b52254873ecd0f36eb7342f95dbad9c50187", "36222f8eb2ccf21ca345e15186cea64506581543", "9c21194d0b7ba09ae8adf2d66cd335ca204d5dc6", "10da8673314188dd6ab1f16f73c05358771dd8cf", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "5a8aafae38aeabd5533a271ab02396529b7b41c9", "d859b6698969b788bd25bc2abd96e6e57e341bea", "0ed986a645ed6448b4e360cee9e39b373a0bfef0", "883a595fd76cb4dc0509a1005040286b31610059", "78ea9dea4915d5b2a86eb1854145c91ac2c8aba5", "27f4001214ce0d449eb05d33626f444526accc7c", "79aec2093b2a1b0197e7d145b5cf86abc70fee3e", "b58e9ea3750b7e0483306e6e92d6c59a0e5a6483", "86a7d2cf1473e597b196a96d922a457ea686fa21", "0aeb77fb41dc8e863e054fcffea7b8b3011515ce", "373f25f373c9c4b769a189c84df6bd955b434dc4", "05f0c383c785f168da8e80c903517ec5fdf71d41" ], "paperAbstract": "Many large applications are now built using collections of microservices, each of which is deployed in isolated containers and which interact with each other through the use of remote procedure calls (RPCs). The use of microservices improves scalability -- each component of an application can be scaled independently -- and deployability. However, such applications are inherently distributed and current tools do not provide mechanisms to reason about and ensure their global behavior. In this paper we argue that recent advances in formal methods and software packet processing pave the path towards building mechanisms that can ensure correctness for such systems, both when they are being built and at runtime. These techniques impose minimal runtime overheads and are amenable to production deployments.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102986", "https://people.eecs.berkeley.edu/~apanda/assets/papers/ucheck-hotos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf006dc952ff01f6daaa87933bb812859127ed1c", "sources": [ "DBLP" ], "title": "Verification in the Age of Microservices", "venue": "HotOS", "year": 2017 }, "bf1122c2881e6b48be951c930e61fb882c1cfa9d": { "authors": [ { "ids": [ "2909388" ], "name": "Mohammad Shahrad" }, { "ids": [ "2748720" ], "name": "Cristian Klein" }, { "ids": [ "14904242" ], "name": "Liang Zheng" }, { "ids": [ "1908294" ], "name": "Mung Chiang" }, { "ids": [ "1685517" ], "name": "Erik Elmroth" }, { "ids": [ "1752172" ], "name": "David Wentzlaff" } ], "doi": "10.1145/3127479.3128611", "doiUrl": "https://doi.org/10.1145/3127479.3128611", "entities": [ "Amortized analysis", "Cloud computing", "End-to-end principle", "Fault tolerance", "Frequency capping", "Hypervisor", "Self-organization", "Service-level agreement", "Simulation" ], "id": "bf1122c2881e6b48be951c930e61fb882c1cfa9d", "inCitations": [ "c64e57ee6315feca352145e389aa5dc0b06d7185" ], "journalName": "", "journalPages": "52-65", "journalVolume": "", "outCitations": [ "a29afef550bf4edbf3293a50ef3fdb785ff1e5a3", "a37dd65ee554255d496c1807fd53eb68d3b4d7ca", "b20ea5b9e2eaea4541586c84ef45fc0b02e6e627", "9e98d529d158e2230d722f497fbc36373eaa8583", "067c7857753e21e7317b556c86e30be60aa7cac0", "0f44833eb9047158221e7b3128cde1347b58ccd6", "237e8290bb2cc98675812964f2bead61b448b7c3", "85dfe3c3053506f7602c410cfa97cc1595cd6143", "3c79d18f99f49d0cd34a8985bf0d9dd07b4e3bc8", "3a043714354fe498752b45e4cf429dbae0fb2558", "5ced6a0aab1350ef1dba574e1faa05a726d9517e", "4f86fa28602d9503a8575c5b31082284abc8415c", "1e62a5ec2a9795b4d9be2e6315b72e97a0408714", "6d44790b6d952eff28f302998e8121f90786e3ff", "5075192e0e25af961420412fed1f848282ae313e", "7a978f2902460e732c50c36a171deb11733df1fc", "752b64b93033944bb92a7be7f2af9f3f4a358fad", "645e060b8289c7c44f5e3612dd8f09c8818caec0", "7a7face412f48f76a898de7953680c8ce4194fb7", "08f13e484e7e51831ec13076d14570ced91a50fb", "81ba6c323d2448d3c3bc96f65c57e4eaf03faf39", "2e72178091b2ca445f46200dcba71a53417b69eb", "277f20ddc0e9fa593753ef2778110508372c597f", "37a78ef5527c2bfefbb6dbbd23080320d5a0a3df", "1bb313d0d62e55f829fced2ccf2ab3857d1fd4b6", "1e484be1595f5cfa5dfcf6d319f25e0b364484e9", "0840027f7ea3e75f20071c1e00dd11711904efe2", "8b81faaffa80f3e2a6b575d0c0c0a4831801c021", "a91f4ca63d48f1ada9e35a49d933b730e7f2ee9d", "61eff4ad67da3606c8f63c3d398f06aa3e01d9f3", "075ce944583b93b2dc7c2b3bbe53485780dfc7e2", "79bc19231b448044aa91335e7804dc1401a8080c", "678a4c0e9f7696216c6f18da1769e6a1985ec5a0", "bea39088d590a8fdae108fbb0ac1b4b8a6968929", "0d2ccae6d37b9e4053fc476887d1565de58e5924", "3af1dbbb10c662368e2a35d995807b45d024a03f", "248bdda09f6c83460fd8aa2f12d5e82cc000dba4", "440f3e59fde1fde9868bc4a0e8fa9132050ce89c", "0a07a56ed17c6541e490df16f6381073494d0058", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "40df5493fdbec13cc59c0ee360c57ef37bc360cb", "2237561aecf7aae479ab608ff60a08c4e6344e09", "7e4bf4bce26804987fabf9a8cca182b5dd550a7e", "42f594993d8a7b3d4743862a73d3745e7704a018", "4ba53e66768cf4bb96a38d71f71b7c3eb7c8183a", "01dca7c7612aa71e5da87087c97a8dfffe94d43b", "8ec9980161177676f92efd21ce42d61bf513c416", "f3733ef8e0050c2f28b9212e16a2080cfc2b67ba", "0066ad02d0dd798f95194922dffb7e45e393eb9a", "2c14aab292631a96631726b479d8eb996447847d", "2b42dc17726840c2827993a0fd0f659553c63dc8", "0a96ed079dfa8768c4aba0226dd3e014a4f61f2c", "55e475c58464b6572dd9e59adccf831f495e0e09", "64dc33a7f5e0f936d4cd80390eea769d12904cae", "0c1a77449f8bfcaadeb7f63a41e30c2486b30e4f", "7996c2337e5a381b10991350cfcf34797575d41d", "4e2ddd2e9ebaf293c730ef3dda22d58de5e695aa", "3d25025039d3be366202cf41d02cc87fbe38b2ed", "7f1b528d5b5bf124372832f21a369d56e8f389f7", "3ebcf69ea430e1397328ab2351cd1e85c6edd0cd", "08632fe2b934ed15d3499e7321282c81adc2c390", "1f8e991be11d48ca1f14df61d5e32075e2bc5fdc", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "2f8403f29dd9ee8934266406ac59979dc06935d6", "8730033f32fbcca2c82559fa0c218143c707d7f7", "783a3224871e54834ba2c8a0a5488e84deb6b80e" ], "paperAbstract": "Cloud Infrastructure as a Service (IaaS) providers continually seek higher resource utilization to better amortize capital costs. Higher utilization not only can enable higher profit for IaaS providers but also provides a mechanism to raise energy efficiency; therefore creating greener cloud services. Unfortunately, achieving high utilization is difficult mainly due to infrastructure providers needing to maintain spare capacity to service demand fluctuations.\n Graceful degradation is a self-adaptation technique originally designed for constructing robust services that survive resource shortages. Previous work has shown that graceful degradation can also be used to improve resource utilization in the cloud by absorbing demand fluctuations and reducing spare capacity. In this work, we build a system and pricing model that enables infrastructure providers to incentivize their tenants to use graceful degradation. By using graceful degradation with an appropriate pricing model, the infrastructure provider can realize higher resource utilization while simultaneously, its tenants can increase their profit. Our proposed solution is based on a hybrid model which guarantees both reserved and peak on-demand capacities over flexible periods. It also includes a global dynamic price pair for capacity which remains uniform during each tenant's Service Level Agreement (SLA) term.\n We evaluate our scheme using simulations based on real-world traces and also implement a prototype using RUBiS on the Xen hypervisor as an end-to-end demonstration. Our analysis shows that the proposed scheme never hurts a tenant's net profit, but can improve it by as much as 93%. Simultaneously, it can also improve the effective utilization of contracts from 42% to as high as 99%.", "pdfUrls": [ "http://parallel.princeton.edu/papers/socc17-shahrad.pdf", "http://doi.acm.org/10.1145/3127479.3128611" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf1122c2881e6b48be951c930e61fb882c1cfa9d", "sources": [ "DBLP" ], "title": "Incentivizing self-capping to increase cloud utilization", "venue": "SoCC", "year": 2017 }, "bf26ccc92bca086195c5f250aef2e409a1c7cd85": { "authors": [ { "ids": [ "1682675" ], "name": "Yu Wang" }, { "ids": [ "1688792" ], "name": "Qinghua Hu" }, { "ids": [ "2803189" ], "name": "Yucan Zhou" }, { "ids": [ "38059533" ], "name": "Hong Zhao" }, { "ids": [ "1771193" ], "name": "Yuhua Qian" }, { "ids": [ "3300112" ], "name": "Jiye Liang" } ], "doi": "10.1109/ICDM.2017.61", "doiUrl": "https://doi.org/10.1109/ICDM.2017.61", "entities": [ "Algorithm", "Document classification", "Entropy (information theory)", "Influence diagram", "Information", "Kullback\u2013Leibler divergence", "Loss function", "Recursion", "Risk management", "Top-down and bottom-up design", "Tree (data structure)" ], "id": "bf26ccc92bca086195c5f250aef2e409a1c7cd85", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "515-524", "journalVolume": "", "outCitations": [ "0329f632629cada61c5edaaa6a1e921a34dec34d", "0b56f1f864a5949ab4ba06a6cb7ecc0c986b3f45", "0a0c8c6eab6152910da4165688b2352cc8261d19", "98f62bb803f5cf6955d91ca101b6e83bc7151bce", "c1cb5746b3bd9d1c80c02a95a64dcb6ba86e30ff", "52363b6ebfdc3668f181f17c25ad65d6ac6ecbc5", "87b1f41d5ec0143035bb713e65a71f5192c7fb5b", "5820f6062c97063bbd9a955f681600077c9eab9b", "1fee98c45f8723d1b500464a1cf2cf497ec58663", "c23de2ab0392e707044922a673f2ab3fb908a543", "bcd842c0e6e731f347523d774b089cdf21d9e8f1", "8b9b262377e4cf15c080dd490d921872f9da631d", "7e8ce229a27ac45fe58b3c1b08fa64e84bd79b56", "9b8eb2d749b5c945d55a4ded50700cd42f01b3d6", "364c79d2d98819b27641c651cf6553142ef747bf", "551de9b970f9a78bb0c486936a9766e0e55a77ae", "3535821a1ae6df8691aaf82e43a8b27ec955ffa2", "5968bb9da9e1b96e574697f8034683f6213596fa", "3ce3f28c7c7a8e4bfc0a6f579ac784a0160c8b6a", "814b3c4932210fff3d8864e056849dc9c871708e", "214c3883bc9300c0dc00824eabb2fbed12e7a827", "234f11713077aa09179533a1f37c075662e25b0f", "0ee1916a0cb2dc7d3add086b5f1092c3d4beb38a", "38211dc39e41273c0007889202c69f841e02248a", "4abe6da057ead8c7f2029fd1927595dd7f382758", "5d90f06bb70a0a3dced62413346235c02b1aa086", "24c9b0b05c5e957e255b854f947472f9181772a4", "69d84abcd66dd398b590d1f1b541723af6d331c1", "094d9601e6f6c45579647e20b5f7b0eeb4e2819f", "76e7484e7d9880a56f01ba28e9262397bd10eb2a", "0786d19321c380f98ade66e4c9c8c9380ac89beb", "fa44ff6ca42f8d4bd5fbf79154648c11b8a46bf9", "313c782f18bb01933668dce56003553b49d1fc44" ], "paperAbstract": "In large-scale data classification tasks, it is becoming more and more challenging in finding a true class from a huge amount of candidate categories. Fortunately, a hierarchical structure usually exists in these massive categories. The task of utilizing this structure for effective classification is called hierarchical classification. It usually follows a top-down fashion which predicts a sample from the root node with a coarse-grained category to a leaf node with a fine-grained category. However, misclassification is inevitable if the information is insufficient or large uncertainty exists in the prediction process. In this scenario, we can design a stopping strategy to stop the sample at an internal node with a coarser category, instead of predicting a wrong leaf node. Several studies address the problem by improving performance in terms of hierarchical accuracy and informative prediction. However, all of these researches ignore an important issue: when predicting a sample at the current node, the error is inclined to occur if large uncertainty exists in the next lower level children nodes. In this paper, we integrate this uncertainty into a risk problem: when predicting a sample at a decision node, it will take precipitance risk in predicting the sample to a children node in the next lower level on one hand, and take conservative risk in stopping at the current node on the other. We address the risk problem by designing a Local Bayes Risk Minimization (LBRM) framework, which divides the prediction process into recursively deciding to stop or to go down at each decision node by balancing these two risks in a top-down fashion. Rather than setting a global loss function in the traditional Bayes risk framework, we replace it with different uncertainty in the two risks for each decision node. The uncertainty on the precipitance risk and the conservative risk are measured by information entropy on children nodes and information gain from the current node to children nodes, respectively. We propose a Weighted Tree Induced Error (WTIE) to obtain the predictions of minimum risk with different emphasis on the two risks. Experimental results on various datasets show the effectiveness of the proposed LBRM algorithm.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.61" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf26ccc92bca086195c5f250aef2e409a1c7cd85", "sources": [ "DBLP" ], "title": "Local Bayes Risk Minimization Based Stopping Strategy for Hierarchical Classification", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "bf5210ba53d10954a8fcf825d58c69256a0e0d34": { "authors": [ { "ids": [ "37285484" ], "name": "Jin He" }, { "ids": [ "2871379" ], "name": "Lei Li" }, { "ids": [ "1748808" ], "name": "Xindong Wu" } ], "doi": "10.1109/ICDM.2017.24", "doiUrl": "https://doi.org/10.1109/ICDM.2017.24", "entities": [ "Coherence (physics)", "Data model", "Relevance", "Social media", "Topic model" ], "id": "bf5210ba53d10954a8fcf825d58c69256a0e0d34", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "147-156", "journalVolume": "", "outCitations": [ "01a7995cf0b1c89ec6322cc2b734f70e6b18e222", "93f134aa1feccdcdcfa2e61142311dc648fc54c1", "4856d097aee84b02f973687f141ef895c1d02a14", "07f3d4ee8ade9f3802fb89720d68c56c945ff453", "2ee3d212ded03aaa8d1093884bcaa4f3490eb576", "d504a5bf60d33c02a63b94f511fef83ed7ce9a9e", "9303255f863b1adb45fd35f6c99c7db2ad8b93eb", "03f64a5989e4d2ecab989d9724ad4cc58f976daf", "b841a14a15f470cb1e9cdaf24f2514424dba0959", "82d5b206c77cd5f870d7def395f64bb15974fb3c", "687e2981d007ec267430fa4346841f0d443d0fb2", "1c163c3bceacfd2c16031f3924e625390886524a", "9a4fb5aa3effa2536296fa73336eda40378651d3", "6fc8f268815bd58a4f2f4d28ec844b1f032e4f4e", "0e8d3f68c0a0eb9dab241c63aab319dbf596e697", "0bdb77530ca8c09dde9c7288971535d56f4f0478", "e5442ca82d325d8224bb1d866bb2d6e593e759da", "070fa9e4ab4c10b860b02adef4f7945a04b6e452", "a1e0fe234791749d74a5db483af0f1124c9d5fdc", "3c6735d90977621c804a740dd6baeb84f453f7be", "2740fe4ceccaa547cbabb57edb03f8f48a10a97e", "93ee63266fbc30df73b3c935788a2dd474bdeb30", "f47a15ee1c785e851941786a1283796f7236f9a1", "684266a922c6479f09e09532110716a0abfe6e45", "bf8116e06f7b498c6abfbf97aeb67d0838c08609", "48d159cc3523f140d44d748df41745df4ac214b9", "592262352a59c75e8b33a81ac5f05693fafa9902", "07bd2985ebe29eaa182569e1fd3e3e0f9df4c14a", "aca3f89481aedc3ee096f33f02db40952329bd63", "92c604a604942fd43c8890f58831982f8f9d6b79", "f16915e4e8d0361b8e577b2123ba4a36a25032ba", "676126f2bec6a366b7b63c075f8d47de46ac7b6a", "39fa77acdd3b814f37ee074f3a8b24151f984a85", "490f1c870caf88e8465ddbb984c267734d120335", "ecc543fa7fe5d961f1dc3d15cac86a128790cd42", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "4a1206c79a8406e79f79bba94b29ee700d6ca3dc", "215aa495b4c860a1e6d87f2c36f34da464376cc4" ], "paperAbstract": "The contents generated from different data sources are usually non-uniform, such as long texts produced by news websites and short texts produced by social media. Uncovering topics over large-scale non-uniform texts becomes an important task for analyzing network data. However, the existing methods may fail to recognize the difference between long texts and short texts. To address this problem, we propose a novel topic modeling method for non-uniform text topic modeling referred to as self-adaptive sliding window based topic model (SSWTM). Specifically, in all kinds of texts, relevant words have a closer distance to each other than irrelevant words. Based on this assumption, SSWTM extracts relevant words by using a selfadaptive sliding window and models on the whole corpus. The self-adaptive sliding window can filter noisy information and change the size of a window according to different text contents. Experimental results on short texts from Twitter and long texts from Chinese news articles demonstrate that our method can discover more coherent topics for non-uniform texts compared with state-of-the-art methods.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.24" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf5210ba53d10954a8fcf825d58c69256a0e0d34", "sources": [ "DBLP" ], "title": "A Self-Adaptive Sliding Window Based Topic Model for Non-uniform Texts", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "bf5fdf89e305ea7fc36efd71fbde19bb48d08d3d": { "authors": [ { "ids": [ "2778497" ], "name": "Davood Mazinanian" }, { "ids": [ "21006921" ], "name": "Ameya Ketkar" }, { "ids": [ "2101511" ], "name": "Nikolaos Tsantalis" }, { "ids": [ "1687704" ], "name": "Danny Dig" } ], "doi": "10.1145/3133909", "doiUrl": "https://doi.org/10.1145/3133909", "entities": [ "Builder pattern", "Currying", "Duplicate code", "Functional programming", "Imperative programming", "Java", "Java version history", "Lambda calculus", "Lazy evaluation", "Open-source software", "Programmer", "Programming paradigm", "Simula", "Source lines of code", "Type system", "User interface" ], "id": "bf5fdf89e305ea7fc36efd71fbde19bb48d08d3d", "inCitations": [ "61cb30a66b76418df424bbaf1d0ca9cb3147cbc4", "96ca7e01fed62772ea9b7095254c3034851cb591" ], "journalName": "PACMPL", "journalPages": "85:1-85:31", "journalVolume": "1", "outCitations": [ "8c130365cd2bb1d3ba1810a42dc53ddc26409260", "1609a2c1c8aba50887bcb60dab2493560a557038", "6133a8dfcbf242724bb5c83d15a27492250e14b1", "482d2cbe3087bd8ac7ee6080d50e0ec7a393e014", "8865aeb8efaa49a1700230e2cb1dee4c157800c8", "29443667abac29be6b37ad577885ae61183b9271", "686b5783138e9adc75aec0f5832d69703eca41d9", "b85b942784e0a6c9d84553a1ee8b7ea090aaee84", "03eaa6db5aee327db9f33c0610c80a1962839fb0", "6bdfb2da6515fb9e36553a5a3992ed87c350f606", "04b60519a11d0f6eacfe0e1e0112097c386c9102", "c17548f3ddd526be75694c4f76d740b33fc6a1b1", "40db955cb3294c029fcb3f66d1f00776cd8cd3df", "570d8c97859f0aed98912dcfef4f836b952440a9", "09979e763b70c5895e0908d564cd84383298bdc0", "e18c96257aa8f67424023c370342bb0348b132c8", "48285ed3efb4d2834dc61826b727465ca29c0878", "4bc80b8f39cf4f73137e8e804712ad20d7e18088", "05584ebb3a5ae4f1702819123136bce615c304cb", "1453a41861ae423b8375e29529be3ef88e6751e2", "29f9c339028e4dfd1098bb90a6045da15b7b0ba0", "25fdb83280d5e2b6b0a4ffb1edca657fe447bc66", "53135e55f1582b29ae1a9c3f4e5fe844d56c7d6e", "41774fd7fa2cc0be2020b0e3879f302b95387879", "0154282542c64bebd27667ffd1abcf3203d3a84f", "aa6753b4d244dcc3296d40ea67786661c9dfbd00", "c1c627b86b72b0e5c073f794a4aaa537c0e61792", "95635b7e70838b51b44ebe7220c6af908aeddb40", "98d68b03898789ffac5c7db985fc4274f72489fd", "168a16c36878b84c0dedd5e5449d8c118224a630" ], "paperAbstract": "Java 8 retrofitted lambda expressions, a core feature of functional programming, into a mainstream object-oriented language with an imperative paradigm. However, we do not know how Java developers have adapted to the functional style of thinking, and more importantly, what are the reasons motivating Java developers to adopt functional programming. Without such knowledge, researchers miss opportunities to improve the state of the art, tool builders use unrealistic assumptions, language designers fail to improve upon their designs, and developers are unable to explore efficient and effective use of lambdas. \n We present the first large-scale, quantitative and qualitative empirical study to shed light on how imperative programmers use lambda expressions as a gateway into functional thinking. Particularly, we statically scrutinize the source code of 241 open-source projects with 19,770 contributors, to study the characteristics of 100,540 lambda expressions. Moreover, we investigate the historical trends and adoption rates of lambdas in the studied projects. To get a complementary perspective, we seek the underlying reasons on why developers introduce lambda expressions, by surveying 97 developers who are introducing lambdas in their projects, using the firehouse interview method. \n Among others, our findings revealed an increasing trend in the adoption of lambdas in Java: in 2016, the ratio of lambdas introduced per added line of code increased by 54% compared to 2015. Lambdas were used for various reasons, including but not limited to (i) making existing code more succinct and readable, (ii) avoiding code duplication, and (iii) simulating lazy evaluation of functions. Interestingly, we found out that developers are using Java's built-in functional interfaces inefficiently, i.e., they prefer to use general functional interfaces over the specialized ones, overlooking the performance overheads that might be imposed. Furthermore, developers are not adopting techniques from functional programming, e.g., currying. Finally, we present the implications of our findings for researchers, tool builders, language designers, and developers.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133909", "http://dig.cs.illinois.edu/papers/Mazinanian_OOPSLA_2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf5fdf89e305ea7fc36efd71fbde19bb48d08d3d", "sources": [ "DBLP" ], "title": "Understanding the use of lambda expressions in Java", "venue": "PACMPL", "year": 2017 }, "bf61aacc6657a3040e9545e25dcfa91a2de8d3a1": { "authors": [ { "ids": [ "2186316" ], "name": "Huixiang Chen" }, { "ids": [ "1731598" ], "name": "Meng Wang" }, { "ids": [ "38639927" ], "name": "Yang Hu" }, { "ids": [ "3102340" ], "name": "Mingcong Song" }, { "ids": [ "39429972" ], "name": "Tao Li" } ], "doi": "10.1109/ISPASS.2017.7975271", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975271", "entities": [ "Advanced Configuration and Power Interface", "Central processing unit", "Clock rate", "Cloud computing", "Cloud gaming", "Computation", "Data center", "Frequency scaling", "General-purpose computing on graphics processing units", "Graphics", "Graphics processing unit", "Overhead (computing)", "Real-time data", "Resource contention", "Uncore", "X86 virtualization" ], "id": "bf61aacc6657a3040e9545e25dcfa91a2de8d3a1", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "65-76", "journalVolume": "", "outCitations": [ "fed97faf74a0f335018430d8235e8bf5800b5e17", "146139716c9e8ec4f57475b9673171761ac34074", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "35c3882db9e1b2bdf838122787968679595f61de", "6b2c12c91f904781019f187681833d35f5c06e57", "59a8a5c5b08ea695f341eb0329bd27b5ff645012", "5b3aa65922bc90b6a166ec81a81c8171b47caa37", "335504b014e48069c6dcf227645ae61830d6cf27", "45472bef11491245ad51dde6963e3cc40c5f3b79", "5d8bf9a634b8e5136726ce89ded6a014b94f2299", "388587b903aaf56791e786522246883aeaf89892", "8c08d9ba15bbbbe4293ed9b900144e91ef2bf4e4", "c65afec9f5bcabbfa5b5e72e8b8d01caa2910a2f", "0be302437cec82b9200d61d13d3125e62a8ef499", "411c80ae08b38c2072fdf2f5fe5f51e6d7a63b70", "6565cc1520fcaf69205a2c5d4d9a1065e7c6bd5b", "ce32fbc93e2df73464d563df34b5d7304d0cc91b", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "00156e79606084497789662dfaf59c3b54a10722", "14f2ab7b89c9f508f9e886e4fd5bb702c867a190", "c7d09a304c5de8050c4b97566d05d50048961ef5", "28d0b30592d994a7a736f81cfb7f7237e4f364f8", "2a660e81e6501ec3489d962fe87448ecf277237f", "294ad206a120a519cfd99294c8b5e004dcc06abf" ], "paperAbstract": "Graphics-as-a-service (GaaS) is gaining popularity in cloud computing community. There is an emerging trend of running GaaS workload using virtualized GPU in current data center deployment. This paper provides a detailed characterization of GaaS workload under virtualized GPU NUMA environment, and found that: (1) GaaS workloads exhibit different behavior with GPGPU workloads by having more frequent real-time data exchange between CPU and GPU; (2) GaaS workloads have no NUMA overhead, whether considering the influence of remote memory access or the resource contention of CPU uncore. We also test the performance and power tradeoff among the frequency scaling of CPU clock, GPU core clock, and GPU memory clock. Characterization results show that (1) ondemand CPU frequency scaling achieves the best balance between performance and power consumption; (2) GaaS workloads are GPU-computation intensive. GPU memory frequency can be set lower to save energy with little performance sacrifice.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975271" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bf61aacc6657a3040e9545e25dcfa91a2de8d3a1", "sources": [ "DBLP" ], "title": "GaaS workload characterization under NUMA architecture for virtualized GPU", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "bfa86f9268bd7189aced3bdb7dbb8dd15f3a6f4e": { "authors": [ { "ids": [ "1731210" ], "name": "Tilmann Rabl" }, { "ids": [ "1738552" ], "name": "Hans-Arno Jacobsen" } ], "doi": "10.1145/3035918.3064052", "doiUrl": "https://doi.org/10.1145/3035918.3064052", "entities": [ "Algorithm", "Data access", "Data dependency", "Database", "Distributed computing", "Distributed database", "Fragmentation (computing)", "Inter-process communication", "Online analytical processing", "Relational database management system", "Replication (computing)", "Requirement", "Scalability", "Shard (database architecture)" ], "id": "bfa86f9268bd7189aced3bdb7dbb8dd15f3a6f4e", "inCitations": [ "2b5e804e23c3cc227eec50aaaae21ac1234fba69" ], "journalName": "", "journalPages": "315-330", "journalVolume": "", "outCitations": [ "08a3d212e2b6e2bc7d5166b194a16edd8c566b6e", "2a4a6cc4942ca263fa598155c707fa8809372bb5", "688c82d3a97368e0ffc9b32703adad5393e1109e", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "4f43ae162baf252ff4a868f21633db3832c5cce0", "8831310e66f79ee379ff99073a4dcd80cf51d893", "f51674bb4796dc81d5ff9ee397b22b3aa0b8609a", "0f44833eb9047158221e7b3128cde1347b58ccd6", "13d350d63b88061db3b63355e90a05ef270972d6", "8b9a748ae77f9235396e04301b82143feb1167fe", "6fce42ef86ba6fb6d8f25b39622b22746640ee3f", "1af9a3eefa1d0eafc895d571a3865a724084c7be", "0658b1922d6a30a0191bb5fe13b0a9e43e49c999", "be272d78b46895f6e9878d59238a0ccca88ea124", "4f246dd7f2ba3764245d8a16c3048adf0cc68b1d", "16cbcf9658d9d16a422b463587f6b9823d26b2ac", "f010eef368d69d6ef80c473012aa83b49e7ee0e8", "848c717ba51e48afef714dfef4bd6ab1cc050dab", "b082bca6933d56bb630e2630aa014cb0919b221d", "0acc31039de608f2ac51f59b6848a48d50c919a5", "130ccb9eff2ac055e2abe185e14672a1cdeb2677", "7709d65b7bee682f15961f05efb5c23850475e24", "9325e04589893bf821da6d8be7546c3cdf97331b", "d1c21c34936f587779c216ed79ca33883845caa1", "32865cc4368d8b267346fac308cc9c367bd68cdf", "123e38805c50b0816d4a52c3cf8d0e8ad05c5fe4", "0541d5338adc48276b3b8cd3a141d799e2d40150", "24251f02c34f32b1dd96572a1d984c4463a26a10", "a4d773c524f3fafd7413fb505ca9e98f4c1ecdb0", "53ad6a3c95c8be67bddbae3ef76c938adcd9775d", "0cd4edaa194488d50bad837e9941ce4a3a254aeb", "207def18c67fa8024741b7ae3cdc655b57f2053f", "3dc937b9c635d08d3428703f11cb0fc6270930b2", "48e992a734ef6ecbc9d5aeb3fc9135bbee531e07", "037a0b4588db8a086e4feec9562a60b00dd05f99", "74a9dece7565783b0e4d49806a131473db05555f", "03639687dca8ec3e140e2a1705b5026b8c263fb7", "0317680478b271366a9a51639253bc02b7d63924", "1cbb5a54f4b3b0e4f48c0531f2e12b44b3d8bb36", "a038197cf7d7d8cce171673aa377e491d0757d04", "1d905342c3d78dd3236863382ae7bae0482d3055", "67eb4c1794be54919266f70b5bf8ba7a6824f091", "5f077c55d598864d16245febd6b77b35c185452f", "9235602100bee501219229b91464c3a711bac9bf", "8a43d82d7796d9d062fe6e650e3b84db71347a02", "62d9c9469bd8f99548ba296a0db72a6752d6d181", "1e557937f418accc13f9c5edb33a3d48259d80e5", "0456f71d2dbbfd77bc933cf8b12bcfd126f6da89", "676ee37c4d2faa71aec10b38db91545f9dcb612f", "5eba0b1e946e787621b893a6ef8553e39d752eba", "0f460e5e031db63ba81f9521e3f6c27a8a39ab93", "19390e486bbf9891f9c13f4ca8b4f49cae1b1429", "0d45d3f73e706ff444bde3a504d74fc5e7868a64", "682f34e8845a5f54c20d636b3255525ded099502", "740ee3de6f8ca734797d7a808c956e303f4a5730", "c5ebebfb199fae183e0650e8af5b3ca20ad9a677", "11b8ef5da9c8df214859bb41b60001a0abd2b5b2" ], "paperAbstract": "A key feature of database systems is to provide transparent access to stored data. In distributed database systems, this includes data allocation and fragmentation. Transparent access introduces data dependencies and increases system complexity and inter-process communication. Therefore, many developers are exchanging transparency for better scalability using sharding and similar techniques. However, explicitly managing data distribution and data flow requires a deep understanding of the distributed system and the data access, and it reduces the possibilities for optimizations.\n To address this problem, we present an approach for efficient data allocation that features good scalability while keeping the data distribution transparent. We propose a workload-aware, query-centric, heterogeneity-aware analytical model. We formalize our approach and present an efficient allocation algorithm. The algorithm optimizes the partitioning and data layout for local query execution and balances the workload on homogeneous and heterogeneous systems according to the query history. In the evaluation, we demonstrate that our approach scales well in performance for OLTP- and OLAP-style workloads and reduces storage requirements significantly over replicated systems while guaranteeing configurable availability.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064052" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bfa86f9268bd7189aced3bdb7dbb8dd15f3a6f4e", "sources": [ "DBLP" ], "title": "Query Centric Partitioning and Allocation for Partially Replicated Database Systems", "venue": "SIGMOD Conference", "year": 2017 }, "bfac2165cc81689656cdc2e76cdf275f241ca962": { "authors": [ { "ids": [ "2464956" ], "name": "Bagus Wibowo" }, { "ids": [ "2705791" ], "name": "Abhinav Agrawal" }, { "ids": [ "1694458" ], "name": "James Tuck" } ], "doi": "10.1109/IISWC.2017.8167782", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167782", "entities": [ "Basic block", "Fault injection", "List of fields of doctoral studies in the United States", "Magnetic-core memory", "Manifest (transportation)", "Microarchitecture", "Microsoft Windows", "Multi-core processor", "Register file", "Run to completion scheduling", "Schedule (computer science)", "Simulation", "Smart Data Compression", "Soft error", "Superscalar processor", "Transistor", "Vulnerability (computing)" ], "id": "bfac2165cc81689656cdc2e76cdf275f241ca962", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "250-260", "journalVolume": "", "outCitations": [ "406fe23ae820e770b2c27890fa03379f5be45630", "025c101818da34b1b2e7e514c869724c8da81a9f", "1d55f921999b3fcc55e73d8b73f633156b11937c", "35e3643eb7060f30ef408c4910fc6448eecde6e6", "f06346a1bf4ec894c6a867922c0829615c131f34", "925a81e193c963dd781ff4e9ad562cb1487bafbd", "e61283985b604a6a907f21326a6d136e72628fe2", "738a102562a662031039df7723da16d25627f2e2", "a5718f8b338685f8dc33568e375ef4ec6a301a7c", "0f450ad16b7f09869bc456475a790bd876023173", "b821cd8806d8e1054dbf79ab5a62245f5196d442", "bc37cf59b9fa15f82d2889d6c7d5f9ddfef80525", "bb1885c5bf94e586d1be84f06c37998df6085be8", "694d06bb3ff03fb6ff42b7891a42f8d4f3f37f34", "c472e44bd61a1d02469c8d6f2f8f7bbb769e11e5", "30690e66ed3a2f7989a389d0f0189c49e6483888", "7569c5e5f9d8c81e4dfd6ce2c044d4ce0dab07fd", "20c146b68b6a3cd15a187a788c6ad27ba994ff79", "980ea46b9e04ab53c380b7838fa225f36878117d", "023abef0f3f56cda13bcb5adeb28dd4c7241c261", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "5236160832766c58b1be2bf4f76f33d9d25b4600", "7848ea4660a27f6d05f8f5dfc33cfa55f999e975", "938286fa80fe31fa3e35f450989f27659296f25f", "5037ba6bcd3b391a8cad4abeae7b6a39ca850c72", "69d62d6464a5ffe31905dfe3e21fb5cdbc02755d", "0f3b99e14ad40fbe21ef25438718cb8988ff1b0e", "96617c3fdbc39564504edb3a85a9a475cdd80260", "145b32b0cd04ab85d6552f8485aa79368ca8a7b6", "0a43cabe7ca27b66cb902f16150bec9fb7e023b1", "62798412556ba470234227f1a7f61a3ee4fc9e2c" ], "paperAbstract": "The trends of transistor size and system complexity scaling continue. As a result, soft errors in the system, including the processor core, are predicted to become one of the major reliability challenges. A fraction of soft errors at the device level could become an unmasked error visible to the user. Unmasked soft errors may manifest as a detectable error, which could be recoverable (DRE) or unrecoverable (DUE), or a Silent Data Corruption (SDC). Detecting and recovering from an SDC is especially challenging since an explicit checker is needed to detect erroneous state. Predicting when SDCs are more likely could be valuable in designing resilient systems. To gain insight, we evaluate the Architectural Vulnerability Factor (AVF) of all major in-core memory structures of an out-of-order superscalar processor. In particular, we focus on the vulnerability factors for detectable and unrecoverable errors (DUEAVF) and silent data corruptions (SDCAVF) across windows of execution to study their characteristics, time-varying behavior, and their predictability using a linear regression trained offline. We perform more than 35 million microarchitectural fault injection simulations and, if necessary, run-to-completion using functional simulations to determine AVF, DUEAVF, and SDCAVF. Our study shows that, similar to AVF, DUEAVF and SDCAVF vary over time and across applications. We also find significant differences in DUEAVF and SDCAVF across the processor structures we studied. Furthermore, we find that DUEAVF can be predicted using a linear regression with similar accuracy as AVF estimation. However, SDCAVF could not be predicted with the same level of accuracy. As a remedy, we propose adding a software vulnerability factor, in the form of SDCPVF, to the linear regression model for estimating SDCAVF. We find that SDCPVF of the Architectural Register File explains most of the behavior of SDCAVF for the combined microarchitectural structures studied in this paper. Our evaluation shows that the addition of SDCPVF improves the accuracy by 5.19×, on average, to a level similar to DUEAVF and AVF estimates. We also evaluate the impact of limiting software-layer reliability information to only 5 basic blocks (16× cost reduction, on average), and observe that it increases error only by 18.7%, on average.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167782" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bfac2165cc81689656cdc2e76cdf275f241ca962", "sources": [ "DBLP" ], "title": "Characterizing the impact of soft errors across microarchitectural structures and implications for predictability", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "bfb97fee07509cb9e8b4c5809557d1d74b49d50f": { "authors": [ { "ids": [ "1950808" ], "name": "Kun Tang" }, { "ids": [ "1815384" ], "name": "Ping Huang" }, { "ids": [ "33367879" ], "name": "Xubin He" }, { "ids": [ "1792031" ], "name": "Tao Lu" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "34966505" ], "name": "Devesh Tiwari" } ], "doi": "10.1109/MASCOTS.2017.35", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.35", "entities": [ "Burst mode (computing)", "Non-volatile memory", "Non-volatile random-access memory", "Provisioning", "Random-access memory", "Supercomputer" ], "id": "bfb97fee07509cb9e8b4c5809557d1d74b49d50f", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "87-98", "journalVolume": "", "outCitations": [ "721c5be47c923d9c0303a3eefd3d42a57e0add03", "61ad98f7f693221bf2149897955aa93eac8950ba", "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "9f428e6fc51549b12d781ae709054bd64ad741d7", "884fc7d1c8353a6ca2f0830a9f0f840a985afa7e", "bc432fd1491c352413f635b4dc949f4e62f5ce53", "62086689983904e6a5ebfc6f299217bf76a00dc0", "2398278a25035cfeefa3dd4aba91b16d48f540ba", "06230d13e276bd871a378ca932a41b5cff94e29f", "c600f82b46e4dfd6cf7d164bb023a83f35d436ec", "20bd9e51b0a95cfe03afdb00337e1c95c290e473", "589e89d77f689ebfc3f36bc1f76fd518ae4a237c", "0377dd68b77eba56ab5bedc12db27655bde12078", "70847053d65da7666140e0de5d77e94b83835b6e", "054b0e0c107b550f99c5c23db99b64254b95bbfa", "5cde06240acc288c986a10ee39f17ea28c9ef05c", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "45ae3be13288fbcf7ace9cf7266b45d54316a406", "1a42c298dc1a3aa33d9943255a51d35b035324d2", "20cc5fdba0915a3958c31d7b18763e82a5418856", "5121837e40f54742fbd26503c7ca76e68ced467a", "1820a34042d6371a9e20484b0c63b698eb522a6c", "429d28998216da5648f40248bf4bc9e508edd2fd", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "ed1735c58eb569ba13e3fa80ead146365d0f05f2", "59ba9f62728b6231f982ea3b59f9ba7422182f28", "3c03e217aeaf6734b5471d5f8930436e009d60af", "2657302160775f8766964d013efe242836693f3e", "19a8a81b72ba9661e3588e234a4d8af7b6d737c0", "3dfb2fc722b1965762770f2add5646d7a87b5ac6", "31ceeced5d23193c369b98170c45e66bae6ff77d", "2fe5ff00fe35d404a16fc69284269f792468a5e5", "1e8fa3399883d288483c145741156c77d80a4278", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "7a2274412948765bf872b765dafd8139e51000ff" ], "paperAbstract": "HPC (high-performance computing) applications usually show bursty I/O behaviors. In order to expedite the applications, permanent storage systems are usually provisioned to serve such I/O bursts. Approaching the era of exascale computing, non-volatile RAM is introduced as burst buffers, to absorb the bursty bulk data and relax the I/O provisioning requirement of the permanent storage systems. However, without judiciously draining the burst buffers, I/O bursts are passed down to the underlying storage systems, which causes severe I/O contention issues.In order to minimize the I/O provisioning requirement and resolve the issues caused by I/O bursts, we propose a proactive draining scheme to manage the draining process of distributed node-local burst buffers. In addition, we develop an I/O provisioning model to predict the minimized I/O provisioning requirement for permanent storage systems. Evaluation results show that applying the proactive draining scheme largely relaxes the I/O provisioning requirement while preserving the I/O performance of underlying storage systems.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bfb97fee07509cb9e8b4c5809557d1d74b49d50f", "sources": [ "DBLP" ], "title": "Toward Managing HPC Burst Buffers Effectively: Draining Strategy to Regulate Bursty I/O Behavior", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "bfdd63cc828f28b071b00c9f442b6d1db26297e7": { "authors": [ { "ids": [ "26998399" ], "name": "Xieyang Xu" }, { "ids": [ "1930730" ], "name": "Yang Shen" }, { "ids": [ "3355540" ], "name": "Junrui Yang" }, { "ids": [ "40151687" ], "name": "Chenren Xu" }, { "ids": [ "3381568" ], "name": "Guobin Shen" }, { "ids": [ "1782923" ], "name": "Guojun Chen" }, { "ids": [ "31628550" ], "name": "Yunzhe Ni" } ], "doi": "10.1145/3117811.3117843", "doiUrl": "https://doi.org/10.1145/3117811.3117843", "entities": [ "Denial-of-service attack", "Liquid-crystal display", "Movie projector", "Telecommunications link", "Transmitter" ], "id": "bfdd63cc828f28b071b00c9f442b6d1db26297e7", "inCitations": [ "4616d04b2824d002d49164ac41ac5546121a8fb8", "fbefb1a3b2a70c27c074a6fac210b61b95b3b230", "0d023aa7b708a02ebeb7853565c9d0f607932ae7", "6eedbcf8ff4ee5f6c8a3500656f889b92c501d66" ], "journalName": "", "journalPages": "180-192", "journalVolume": "", "outCitations": [ "1ac57524ba2d2a69c1bb6defed7352a06fd7050d", "3848a79eb0332f74c35923abdc4161980ea40cde", "408979731b043f5bed8ce73ba07c7901bd1912a9", "a40d751f977eac807e02db679163b81eeb7f5005", "ce8c5e7fa14974a990f86a18d73fd8a0fc9474eb", "27d4dc8b28a22edd561b9fd38d481adb4e1504f4", "6a98fe74a8f047256b1d8158eb1a7c039618749e", "1e37384874c84acc7919176d4e9598e9116da2ee", "2a12a97476635f48467dfb2356e0038eae7b7e44", "cddadcb2a6e46807490a4dd81db9931dc3455777", "924ffef45df884a57b0b03ef238b2aa7ec990380", "498d2ed40427eeb78799fa96ac0f5a58c6648d05", "e659987d76bb43d5aa55776953351b437bc35ca4", "aa8161dbd8475681af18103dc25758d4268f18f4", "0d0099bd4609d73d0744505618eb902c5a35d02e", "8347fa4ad280baf119580cc680fd85ddb16d7236", "786b684d577ae57aa2fbc7d1fb0870ad86b998b5", "972e4c4ec507a621738013a3f17a980006efd3eb", "9058f1bedc6a63574b5b3dfdd158676fea2a5232", "c70e4a09a00c302f26ce60ac15e4e208af3b0621", "a7d99e695f282896ce82028eff9dbc1b623f0477", "380c8569c75b4aedb070224e39b517cbca85ad7a", "8de6b0c2d807eaf26f12a6ee555c48a903f6fb53", "a2469f37cac6f98f522a6329cf34f33ccb8cda28", "5fe77fde8f74314f530eaaceafe6bc391d51b663", "2518f58c9c87e02ab33992360266f89e4486ea9b", "a8e28313e6d4fa54f864b9108c1547912031852a", "c720bd17e837415de3e602ee844288546eb576fa", "18c55aad1423142ced6fb3725a5595e7a4758bae", "6a09dab73738ceba7078f5bb183302f5b3a219ac", "559616c85422c8b1c972e9560b2d0363ad9b5679", "880554219cf9862bf175c641bae969722018c630", "b2145536093caf72e3ce0bc89d507fd85484647c", "9d780c997a56931629cc4b342c3664c0eb5f58d2", "0c9b68449b6241478ba38c2af220b393db86e206", "175ea2f99ec3a009554c3d049f460c43a7e6a01a", "16595d321b257dd3c28bff95bdd3e42d6254aeca", "14ba7b31b92233766089dfae54b53e339822f3cc", "02c75551123cae6dfbb0c69de96a199c974bcf89", "91bdacc904edb540fa57ea9a4535a1a1d79d855b", "3608e8d69bfa03397c752ff85d7425bedfa61e4d", "02f00b07581c316d21505bcdb1f65a8dac5a8ad8" ], "paperAbstract": "This paper investigates the feasibility of practical backscatter communication using visible light for battery-free IoT applications. Based on the idea of modulating the light retroreflection with a commercial LCD shutter, we effectively synthesize these off-the-shelf optical components into a sub- mW low power visible light passive transmitter along with a retroreflecting uplink design dedicated for power constrained mobile/IoT devices. On top of that, we design, implement and evaluate PassiveVLC, a novel visible light backscatter communication system. PassiveVLC system enables a battery-free tag device to perform passive communication with the illuminating LEDs over the same light carrier and thus offers several favorable features including battery-free, sniff-proof, and biologically friendly for human-centric use cases. Experimental results from our prototyped system show that PassiveVLC is flexible with tag orientation, robust to ambient lighting conditions, and can achieve up to 1 kbps uplink speed. Link budget analysis and two proof-of-concept applications are developed to demonstrate PassiveVLC's efficacy and practicality.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117843", "http://ceca.pku.edu.cn/media/lw/63a22162bb427709099f765883bf243b.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/bfdd63cc828f28b071b00c9f442b6d1db26297e7", "sources": [ "DBLP" ], "title": "PassiveVLC: Enabling Practical Visible Light Backscatter Communication for Battery-free IoT Applications", "venue": "MobiCom", "year": 2017 }, "c00238dca09bac25c7d229efcf9a29aa857b92ff": { "authors": [ { "ids": [ "2636156" ], "name": "Immanuel Trummer" }, { "ids": [ "3123617" ], "name": "Christoph Koch" } ], "doi": "10.1145/3035918.3064039", "doiUrl": "https://doi.org/10.1145/3035918.3064039", "entities": [ "Algorithm", "Anytime algorithm", "Approximation algorithm", "Best, worst and average case", "Integer programming", "Linear function (calculus)", "Linear programming", "Operand", "PostgreSQL", "Program optimization", "Query optimization", "Search engine optimization" ], "id": "c00238dca09bac25c7d229efcf9a29aa857b92ff", "inCitations": [ "68c3433dcebdbe94f1943a955012380a59feed50", "306185d6b16f1ac9c770d2e2a80656cbdc1e9224" ], "journalName": "", "journalPages": "1025-1040", "journalVolume": "", "outCitations": [ "a25f6ee864f0c4fd95d9ceb2f4868e9e3fe51786", "1beac71beb44d2e3c512730ba5b1a071f1819cc2", "8541e44cfdc11587a04581987a03daccddd3514a", "f7991290e9555c18f5093de1d0c6c49bd1ad0bf0", "12f83c83b6d87f158a95d0796f950995a0ed0297", "e088aeeb77110d661a51cc76c884b2faf6914f6d", "3a20575b768022e3d3f20c0b8da386086cc49c57", "04e828ddaa05cccb1e5d380f4fe30b6aa36e6dfd", "88ccf51bf7dc3599e8745fb73b3a5186bfdf0dc3", "7c798b835099d95d8975e85d7fc38cc71a9ebb95", "947671144b12e7dcd8c4e5c0776e8a3bf53a49c2", "57f38687b5137c9a802b446d92059544c2eae8a0", "4bca561a9bc360135bfc364b666687b0e394361b", "04d025ea7122ee7396ad19ca94abffcc9f1b44a1", "3f5253e95f7465624bb2a4b80ccae4337a7334d4", "f5a9c3620d481236ad2ae274b0ad8672471a701c", "0a256e222bc5781f5aaf31e90061fd16b8a8295c", "4f69043e2f8e61bfa0392e718715544fdb4fe72c", "6314d537bd96114e0be251eee445488730766880", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "ca3ad75ed6d5db22c4df82200c42322ab69a03ef", "bf6afd95ac71c3022ec9b0499528a86ecdb57de7", "0ff91cc6eeb14ceaa9a18fcc232d28d74f757a18", "0ec0ce9b04a869f7bf1a3fa5b6089da61f86f8eb", "501fd14eae100042a4ad3ed58fb8b9baebc318b4", "359760cb0df73b9d8e6501b5ab5ff93a6c12b44f", "a10782f15d05ef101e4ec38968a0df4299a02fc9", "82861f6b09ed3b91ceaed00b6d947660c9642051", "e4a689ee7388f14896240fd24800882a1cbb7357", "6ffcd9a1bb0ad5ec1c6925f52c8c1bd9d7ee6216", "52327eecb10930640e23ef36bb7845d1a090e091", "c50ca444a4aa5b701670fdee589df6668c0c10d5", "280e98c45ceb53d878adbba0f8bee688c6716f7d", "82ce0158c14708b01153ac0fe7d6dc9688dfbb18" ], "paperAbstract": "We transform join ordering into a mixed integer linear program (MILP). This allows to address query optimization by mature MILP solver implementations that have evolved over decades and steadily improved their performance. They offer features such as anytime optimization and parallel search that are highly relevant for query optimization.\n We present a MILP formulation for searching left-deep query plans. We use sets of binary variables to represent join operands and intermediate results, operator implementation choices or the presence of interesting orders. Linear constraints restrict value assignments to the ones representing valid query plans. We approximate the cost of scan and join operations via linear functions, allowing to increase approximation precision up to arbitrary degrees. We integrated a prototypical implementation of our approach into the Postgres optimizer and compare against the original optimizer and several variants. Our experimental results are encouraging: we are able to optimize queries joining 40 tables within less than one minute of optimization time. Such query sizes are far beyond the capabilities of traditional query optimization algorithms with worst case guarantees on plan quality. Furthermore, as we use an existing solver, our optimizer implementation is small and can be integrated with low overhead.", "pdfUrls": [ "https://arxiv.org/pdf/1511.02071v1.pdf", "http://doi.acm.org/10.1145/3035918.3064039", "http://arxiv.org/pdf/1511.02071v1.pdf", "http://arxiv.org/abs/1511.02071" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c00238dca09bac25c7d229efcf9a29aa857b92ff", "sources": [ "DBLP" ], "title": "Solving the Join Ordering Problem via Mixed Integer Linear Programming", "venue": "SIGMOD Conference", "year": 2017 }, "c025c97af526b4487ebb4582cb9a58c1f1ff4c94": { "authors": [ { "ids": [ "20851040" ], "name": "Elaye Karstadt" }, { "ids": [ "14411446" ], "name": "Oded Schwartz" } ], "doi": "10.1145/3087556.3087579", "doiUrl": "https://doi.org/10.1145/3087556.3087579", "entities": [ "Algorithm", "Coefficient", "Computational complexity theory", "Coppersmith\u2013Winograd algorithm", "Cubic function", "Mathematical induction", "Matrix multiplication", "Matrix multiplication algorithm", "Multiplication algorithm", "Numerical integration", "Strassen algorithm" ], "id": "c025c97af526b4487ebb4582cb9a58c1f1ff4c94", "inCitations": [ "91f8ec8cde8e5e2defec22eb29e809f1aa71034a", "95e336a9318604a2da67233ecccfa448d638c34d" ], "journalName": "", "journalPages": "101-110", "journalVolume": "", "outCitations": [ "6a2789a722740df16697ff5ee6127cb644994ef3", "e99ec9a172aa5c762e618d1f4e02a87f53d9cc7f", "3269b04305dd2bd4ecbb2daea4429eeb523cc164", "0f48b218a10dbe7f37dd70073fff3032aa15f465", "74d231ca09c7106bdbf7e1ff2852fbbc7bd96c67", "f71958a5e008bbffd78133c8f48c76695baf0cf8", "b19c657cafb57b94e447aeceb836cc7df8a019c1", "4f2f128177eaeabf570c5095ed2d598e412900e3", "ed8c1461ba32cb0a7db6d32bdb61241eed64751c", "8940eaa198141baee28f825119fb3b90a8a239a9", "e669d89ad39e5e862041ba06dff697af746e4091", "114f158f3b7b37614d5d83efe33c1e73c051c7c1", "501f439df0c59188be9acde2328764c14dd33c89", "384e61d089611b10181775756feadd3f58fd2ec5", "c999286a3343871e22b68edcf1c8474724a401e0", "0fbec54268f444ee7d884c09a4819a94677b2734", "af664c46d65451b97a12e2f170495428dc49c878", "40fb5ea197206082b0b77f388c57bca79536c877", "26e02fc5572fcf1e55496a2846aaa77b9b45b14d", "69c90113b1b99c663129962201ddfe67c8256d53", "34eb32537b3f9dacbbd1567e1ce620c66e51d3c6", "4b3f502897fbfe13f8f6c824a39cee404fdce1c8", "636d19ad02af6def2956466a5ba5d69e96934d33", "a9fa03cbff886b16de948bdd7f633b1c20b395a1", "8eaa45df0a85bf7fda455cf7f1699cdfe0de1288", "3030fa2aecda339d593b86a260bfab9988b42df7", "d01e00939c1773366237e744ff0047fc55a53453", "a0c749d8af116c6d56110ebd7aa1abe1a25c11a2", "41e8b3449888e59d2bd40c07a717829f54610d5e" ], "paperAbstract": "Strassen's algorithm (1969) was the first sub-cubic matrix multiplication algorithm. Winograd (1971) improved its complexity by a constant factor. Many asymptotic improvements followed. Unfortunately, most of them have done so at the cost of very large, often gigantic, hidden constants. Consequently, Strassen-Winograd's O(nlog27) algorithm often outperforms other matrix multiplication algorithms for all feasible matrix dimensions. The leading coefficient of Strassen-Winograd's algorithm was believed to be optimal for matrix multiplication algorithms with 2x2 base case, due to a lower bound of Probert (1976).\n Surprisingly, we obtain a faster matrix multiplication algorithm, with the same base case size and asymptotic complexity as Strassen-Winograd's algorithm, but with the coefficient reduced from 6 to 5. To this end, we extend Bodrato's (2010) method for matrix squaring, and transform matrices to an alternative basis. We prove a generalization of Probert's lower bound that holds under change of basis, showing that for matrix multiplication algorithms with a 2x2 base case, the leading coefficient of our algorithm cannot be further reduced, hence optimal. We apply our technique to other Strassen-like algorithms, improving their arithmetic and communication costs by significant constant factors.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087579" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c025c97af526b4487ebb4582cb9a58c1f1ff4c94", "sources": [ "DBLP" ], "title": "Matrix Multiplication, a Little Faster", "venue": "SPAA", "year": 2017 }, "c040b6b4c5dffb90491da263d42fc649fd4fc895": { "authors": [ { "ids": [ "3573218" ], "name": "W. Cyrus Proctor" }, { "ids": [ "39794285" ], "name": "Patrick Storm" }, { "ids": [ "2967845" ], "name": "Matthew R. Hanlon" }, { "ids": [ "2996946" ], "name": "Nathaniel Mendoza" } ], "doi": "10.1145/3126908.3126957", "doiUrl": "https://doi.org/10.1145/3126908.3126957", "entities": [ "Authentication", "Command-line interface", "Component-based software engineering", "Data curation", "Experience", "Multi-factor authentication", "Open-source software", "Production system (computer science)", "Software deployment" ], "id": "c040b6b4c5dffb90491da263d42fc649fd4fc895", "inCitations": [], "journalName": "", "journalPages": "37:1-37:11", "journalVolume": "", "outCitations": [ "e28687cc68b035c17697d0846e18708e88e3ee2b", "bee3b38355388fc4ac076e2e265c4a563bb38430", "6adacd3f01ef16bd70b179132cdf8fa5ea6e6531", "65ca6f17a7972fae19b12efdc88c9c9d6d0cf2e8", "36400061d8cf4620069eb372c82fe86d0cc56bcb", "19e5adf691d70deff696dfd27a521009cb1cf437", "fabff2ac5b3a15dcce5325e808a6671d0aafc3d1" ], "paperAbstract": "Multi-factor authentication (MFA) is rapidly becoming the de facto standard for access to all computing, whether via web, phone, or direct command-line access. HPC centers and other institutions supporting hundreds or thousands of users face challenging cost, licensing, user support, and infrastructure deployment decisions when considering a transition to MFA at scale.\n This paper describes our experiences and lessons learned throughout the assessment, planning, and phased deployment of MFA across production systems supporting more than 10,000 accounts. It focuses on the ultimate curation, creation, and integration of a multitude of software components, some developed in-house and built to be compatible within existing HPC environments, and all of which are freely available for open source distribution. We motivate the development of this customized infrastructure by highlighting some of the particular needs of our research community. What follows is an information resource for others when considering their own MFA deployments.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126957" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c040b6b4c5dffb90491da263d42fc649fd4fc895", "sources": [ "DBLP" ], "title": "Securing HPC: development of a low cost, open source multi-factor authentication infrastructure", "venue": "SC", "year": 2017 }, "c0806a223ba9c7ea21e3a9d850f56cb700798db6": { "authors": [ { "ids": [ "31949440" ], "name": "Elette Boyle" }, { "ids": [ "3221424" ], "name": "Geoffroy Couteau" }, { "ids": [ "21047852" ], "name": "Niv Gilboa" }, { "ids": [ "1688856" ], "name": "Yuval Ishai" }, { "ids": [ "40535555" ], "name": "Michele Orr\u00f9" } ], "doi": "10.1145/3133956.3134107", "doiUrl": "https://doi.org/10.1145/3133956.3134107", "entities": [ "Algorithm", "Binary decision diagram", "Computation", "Discrete logarithm", "Eurocrypt", "IP Multimedia Subsystem", "Phelim Boyle", "Polynomial", "Secret sharing", "Spectral leakage" ], "id": "c0806a223ba9c7ea21e3a9d850f56cb700798db6", "inCitations": [ "bf9580375a1595d77fe9630fbf68c37a67903b3c" ], "journalName": "", "journalPages": "2105-2122", "journalVolume": "", "outCitations": [ "3c8722737ef9f37b7a1da6ab81b54224a3c64f72", "6871b95c14dccca7636b498b5d363a743c5288e6", "6eb3f92928a3a85d8635d98d7fd62baf185469d7", "207b5ec5fa64afe8d7c95c8bf480813d024ef62a", "a211bca51d271167e4e5d2e1f648873f2913ab0e", "d6e27a555fb21373763a59b887c44e3d6a951eef", "7cd3e868dca5a7e379b7f57910b41f33b5dcc912", "50b72015f192a1c4a6422b813fd3ace29b29d634", "a2e96683bab7878302515073fe47cc97346db83c", "b6695347a10114b2697081446c7c83c2a421e33b", "330e03ec4593d936e1922536af8f06b5a2abe61a", "bcb49a06e4fb7ea831257e146073d84234f4d238", "6450e07105503653f022ad5d283e2546f427fc4e", "01a14a8c67335c1cfef255455126a85e53c0eeb6", "547a94f8b16f521ee2eac299572a5c767d628289", "46527c14457cf84d1cf26487d6b4c31f4825db71", "4c435335cb4263f9e4cc81affcfd071bcc31e572", "937cb0795bf3bfb5c9cbb7387ed68301bb6995ce", "04948723dec0e6724777ee56f0d10168cce44921", "4d8e2657d6c9032c28ac4878a442e83dd99b672a", "178ca3fd7ca90480f51f3c376e5485158763d15b", "6b3aea37625702e98e5033e1107403e319b4df01", "e7ea58866539d47d0c38021e487ae92658f3d2b0", "b89437eb0078a9942b413b9ab857abe92b2ebd2e", "11418196aa35285f7924cb7e577b8fbf104bafa8", "b6964d3aea9833e78bf1b69ac34e02fbceae1e1b", "1932ac31d167e8fe5914692c80442794b41996ba", "a9ca6a9079bcb5c513ebf63a029d7cdbb8245fa3", "f98cfc3c092d69c068054698bcb4c1b6840644c6", "6f70632a51dd43b8a37d95051cbdb5e9bb02b1ec", "abb28a53d8d425fa052ec18c11996882406ddfe6", "ad0564d120af0e7471cd32d4c0438b8c25f33a0d", "488caed5bf975458e98f420e0e046e975e23f74d", "19c3736da5116e0e80a64db35afe421663c4b4a8", "6678897873d47418299809ed6204f5fe07900cf9", "374e3e1152e4084eb2f562e738cf2d8158c135be", "a7f12c3fbb20c894898b97da49b1788719032647", "741d21d9baea72c4a930912e50e8b65e9a50489a", "1db2265e3ce510fee6d4d9b39c135bddb4040949", "6eccb6635eb1d6de72415331794e3b3530811800", "129db5ec39a453ea53c94ad529cf13dccafe4167", "35cd80ccef2e5c5e5845694a3b7f7359c609c442" ], "paperAbstract": "We continue the study of Homomorphic Secret Sharing (HSS), recently introduced by Boyle et al. (Crypto 2016, Eurocrypt 2017). A (2-party) HSS scheme splits an input x into shares (x0,x1) such that (1) each share computationally hides x, and (2) there exists an efficient homomorphic evaluation algorithm $\\Eval$ such that for any function (or \"program\") from a given class it holds that Eval(x0,P)+Eval(x1,P)=P(x). Boyle et al. show how to construct an HSS scheme for branching programs, with an inverse polynomial error, using discrete-log type assumptions such as DDH.\n We make two types of contributions.\n Optimizations. We introduce new optimizations that speed up the previous optimized implementation of Boyle et al. by more than a factor of 30, significantly reduce the share size, and reduce the rate of leakage induced by selective failure.\n Applications. Our optimizations are motivated by the observation that there are natural application scenarios in which HSS is useful even when applied to simple computations on short inputs. We demonstrate the practical feasibility of our HSS implementation in the context of such applications.", "pdfUrls": [ "https://acmccs.github.io/papers/p2105-boyleA.pdf", "http://doi.acm.org/10.1145/3133956.3134107" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c0806a223ba9c7ea21e3a9d850f56cb700798db6", "sources": [ "DBLP" ], "title": "Homomorphic Secret Sharing: Optimizations and Applications", "venue": "CCS", "year": 2017 }, "c08de1c411aec1129f530f609044d6fc1e2bb1f4": { "authors": [ { "ids": [ "8599669" ], "name": "Kangfei Zhao" }, { "ids": [ "1718849" ], "name": "Jeffrey Xu Yu" } ], "doi": "10.1145/3035918.3035943", "doiUrl": "https://doi.org/10.1145/3035918.3035943", "entities": [ "Algorithm", "Computer data storage", "Datalog", "Fixed point (mathematics)", "Graph (abstract data type)", "Hierarchical and recursive queries in SQL", "List of algorithms", "Multi-function printer", "Recursion", "Relational algebra", "Relational database management system", "Semantic Web", "Social network" ], "id": "c08de1c411aec1129f530f609044d6fc1e2bb1f4", "inCitations": [ "e5c17deddc0a69c5bd3f9d1eeb515946277f76a1" ], "journalName": "", "journalPages": "1165-1180", "journalVolume": "", "outCitations": [ "465c5a68c3c62f6fb949acfd7921a9072a29841b", "43ea93b01be7d3eed2641b9393c6438d19b825a0", "78ad867eb6176d4e2f1cec4f7517f65d90a660f8", "b149cd8a742848c3b1f86edc43590f475386445d", "4d98e7aed4fce86e0ddd92bd4455b6f41fc4c780", "ad585974e2d05758cd36e2ab2e346f38b4de4a0f", "8c4f6168509c0439cb29f40d705c42d182431e26", "7c1a0f7054d496e71cc697202b84d8a5c652328a", "19dd6efece0de4e26e400d63f52dbf5f62453272", "165d9bd7e9c4a030b09cf21e35ea0bf96090d8cb", "43ae665066bfa1c52bfc83e95dadcb5fcabcee41", "7724172425879e96bd4deee1c175ba840ebac5a6", "b7c2bdfad548cbd06b06d27be3bd226d4a3ee075", "1907bb75c65a46184871d3c582fa0fa1f4a52fda", "e69870dac824d819d7b359906beff6b43d01d1a5", "60105db6a5cce5eb7bcec56b87d95ef83a5e58e0", "26a2b94a118334585f5d717b24ef06b6f9014ba8", "ac5c1088151c8f9fbe9419415709c5c8b945a129", "148edd9ac0ed0485f14f470949f64a9d92cbbc10", "863da712ef344460cdb5b3d92ea2d4cc28d860de", "6cdda58b54d24280e144e34fcee1df5de8fa9b5d", "323db86b818e7dff6639aa0b5c6d83a275a0d5cd", "71a14c91426af347409b047a0361256c35279f55", "e741b677759b94aaa5f3162e3a3b01d396a43aaa", "2d1d0ee6e21c288d96577b24656cd3398082f857", "244f7c483b1035144f04f7c58f20c3f806d3f1b3", "17152bd58eaa134ea0b56407a942c8ff97a16237", "f26feb25eb3ff2c5016700a77104740dd65a0169", "7ec3de9aeb436f3230f4c6bae92832958ffadb28", "4e8b20a55afa5a53655a73c59cc32ccc13eea76a", "eb07721a38f6bd3a9e06f95ce65ec8ca63cadb2a", "ee8c9d3ba6446f4c2cd63ec6a2fd3c82389f43e2", "061188d388cd91a8090f3106c78621bc7cb8cb07", "6099814c55861b15467d26010631124bea5dfbda", "4949f1caaf36b540f5b65f28d787bdfdaef30bf7", "1492b478e94bfbead7ff458a60c25616e44c5eb7", "0b944c6b28d4b87d8b64767955201cadf987f558", "1359d01962b882c95607a75aeafeb532188cb159", "040678daf6a49a88345ee0c680fccfd134f24d4b", "6a80af5bf2e89907ad9e60e1b6055d4d935c86a1", "6b343dd1814738da2d01edf5d81332701f005bcb", "5ee35281c2c5345e13890b7dcef3d17ee0506023", "a97b77c8c3f9e8247c497d1b4c27c958a15f3a62", "243230d5b623f79c22750b42447e902ab07a2db9", "31181e73befea410e25de462eccd0e74ba8fea0b", "a000e3a2f34444ee62351ffa443f5fc65731a110", "bacd622d373745a2cc5300231f0780c4871d16ad", "63a75a7abd9a4701e3d88a7f54c2d263a0ddb0c3", "5c78f68fd6f56acf5832b095cb190b83aacf9c37", "f3a39750bc525e9a7fb42b130c2ee58f5faa188e", "18c241f9333c5b6590184b0d5f218b3ccd51a605", "009dbf3187862352aac542bf7d61e27bce6b27f5", "351cb36f5c1489358330263d224574449c2e5d83", "1b4474be934290872e9c03fc084d940e9a51a360", "2c688c44c7a102014ffac63bc2b5213ffb9ea405" ], "paperAbstract": "To support analytics on massive graphs such as online social networks, RDF, Semantic Web, etc. many new graph algorithms are designed to query graphs for a specific problem, and many distributed graph processing systems are developed to support graph querying by programming. In this paper, we focus on RDBM, which has been well studied over decades to manage large datasets, and we revisit the issue how RDBM can support graph processing at the SQL level. Our work is motivated by the fact that there are many relations stored in RDBM that are closely related to a graph in real applications and need to be used together to query the graph, and RDBM is a system that can query and manage data while data may be updated over time. To support graph processing, in this work, we propose 4 new relational algebra operations, MM-join, MV-join, anti-join, and union-by-update. Here, MM-join and MV-join are join operations between two matrices and between a matrix and a vector, respectively, followed by aggregation computing over groups, given a matrix/vector can be represented by a relation. Both deal with the semiring by which many graph algorithms can be supported. The anti-join removes nodes/edges in a graph when they are unnecessary for the following computing. The union-by-update addresses value updates to compute PageRank, for example. The 4 new relational algebra operations can be defined by the 6 basic relational algebra operations with group-by & aggregation. We revisit SQL recursive queries and show that the 4 operations with others are ensured to have a fixpoint, following the techniques studied in DATALOG, and enhance the recursive WITH clause in SQL'99. We conduct extensive performance studies to test 10 graph algorithms using 9 large real graphs in 3 major RDBMs. We show that RDBMs are capable of dealing with graph processing in reasonable time. The focus of this work is at SQL level. There is high potential to improve the efficiency by main-memory RDBMs, efficient join processing in parallel, and new storage management.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3035943" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c08de1c411aec1129f530f609044d6fc1e2bb1f4", "sources": [ "DBLP" ], "title": "All-in-One: Graph Processing in RDBMSs Revisited", "venue": "SIGMOD Conference", "year": 2017 }, "c0a8c0e6ccf9882969ba0eda0b898affa015437b": { "authors": [ { "ids": [ "2912732" ], "name": "Vasilis Verroios" }, { "ids": [ "1695250" ], "name": "Hector Garcia-Molina" }, { "ids": [ "1786049" ], "name": "Yannis Papakonstantinou" } ], "doi": "10.1145/3035918.3035931", "doiUrl": "https://doi.org/10.1145/3035918.3035931", "entities": [ "Algorithm", "Experiment", "Interface (Java)", "User interface" ], "id": "c0a8c0e6ccf9882969ba0eda0b898affa015437b", "inCitations": [ "b672d76d09d076ea148ea4b22e4fe907f2dc4a47", "0bcb4ab04e1ebabbf67a38bd7b20b8c660018add", "7e59154a0446de99a18400a43e65c0905f248cd4", "a18ed9b20cc855088151f6edc04bbd51edbffb00" ], "journalName": "", "journalPages": "1133-1148", "journalVolume": "", "outCitations": [ "762aa12db17ecc3ed8320e1d88bef063214b595b", "5f11f3d958581f53b20577da69c9f7bc25c2ff2b", "060bdc422872e375bbef9b1cd82fbf0f936d4691", "370b5757a5379b15e30d619e4d3fb9e8e13f3256", "632eca15ed20f87490c60a6005c4c58f06bee61b", "45254b2dda5bcb0ed069a58c1173cced4d776660", "00e95e8491f4c3ae1d6e7a05da1afb132f3f4e45", "28015578d74becc4e34789de6b22d2d6321547c7", "3d359fca5a3b632d890f1640d78defc09a561d8d", "445e4442ce33a04d5c4d45eb191f431e0bc2444c", "ac773c345d751659d1c249dd5f2d6fe7ab13afcb", "2dec6e69f42b22d03208ce01afc103a7d702f276", "8a0b267493ac9510e47ceb4bcebb6d202b2f89a5", "86e9134ff5e144d4f4d4243ec66351cc068ab409", "151673abe01271dc3fc37725c02e95e7970f3bed", "213cb7593934bc675c336f53dd6c61a3c799be80", "6621bbf82c49abe99bb49ea03c8789af67b58c20", "10ae68dea2ce4426eb0d87c99c1094504469752a", "08f51a9138458f667f0c00d40b6a820c451c7d36", "c921482cb0048738284aa66e531b83f64ce46bf5", "20d90871bc0dc7956bf2557d91d8d96deb0a4520", "4f819589fd2931333326ad7deec58f628f7d2644", "15904f4fb558e33fbd607c0289a2a0deb746d73d", "009b37cf7dbc9da978d4fa604257e2e6020fd478", "af1e1bee41d004a6c1fa608a9fe2a884f48c6e5f", "3d24e2327a0ba1cb2f3b5b2bf41dd450703f8e56", "708b3c509e2ffee76918faeac18692f9ca527582", "4033104e3a37324df023fec7e95d852e962617de", "04272a11aa0a8c8a9fe1d47b4e64f7578211fdde", "47203943c86e4d9355ffd99cd3d75f37211fd805", "d84af7f39033a084d9ae95fe895d39b9ec5246dd", "2a8969bf0a02a2fa28e3784fd6025828859a7f67", "2a8a3e00b978e4ae12da7fe536b7a5a719c0f0ef", "88cd4becf3587a8378e450a99ded801fbdb264e1", "8bc23235070ce181d34002e2a44e4b233beaa732", "0cdfc808cec0254f1037909e8955c07bc7755842", "b9e43395663f74c581982e9ca97a0d7057a0008c" ], "paperAbstract": "In Entity Resolution, the objective is to find which records of a dataset refer to the same real-world entity. Crowd Entity Resolution uses humans, in addition to machine algorithms, to improve the quality of the outcome. We study a hybrid approach that combines two common interfaces for human tasks in Crowd Entity Resolution, taking into account key observations about the advantages and disadvantages of the two interfaces. We give a formal definition to the problem of human task selection and we derive algorithms with strong optimality guarantees. Our experiments with four real-world datasets show that our hybrid approach gives an improvement of 50% to 300% in the crowd cost to resolve a dataset, compared to using a single interface.", "pdfUrls": [ "http://stanford.edu/~verroios/papers/waldo.pdf", "http://doi.acm.org/10.1145/3035918.3035931", "http://ilpubs.stanford.edu:8090/1137/1/ERMultiItemTechRep.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c0a8c0e6ccf9882969ba0eda0b898affa015437b", "sources": [ "DBLP" ], "title": "Waldo: An Adaptive Human Interface for Crowd Entity Resolution", "venue": "SIGMOD Conference", "year": 2017 }, "c0cf6192ba294ee9a7a7edda864a7d0dad5ac35d": { "authors": [ { "ids": [ "1799329" ], "name": "Jee Ho Ryoo" }, { "ids": [ "2390821" ], "name": "Nagendra Dwarakanath Gulur" }, { "ids": [ "6970155" ], "name": "Shuang Song" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1145/3079856.3080210", "doiUrl": "https://doi.org/10.1145/3079856.3080210", "entities": [ "Benchmark (computing)", "Central processing unit", "Cloud computing", "Computer data storage", "Memory address", "Page table", "Parsec (parser)", "Server (computing)", "Skylake (microarchitecture)", "Software deployment", "Translation lookaside buffer", "Virtual machine", "X86" ], "id": "c0cf6192ba294ee9a7a7edda864a7d0dad5ac35d", "inCitations": [ "0231ffa4b9b095efbf0f302898cd7abd7dd0b764", "6f0c1898575d56d1c1073b1f2eb6cba5bc931005" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "469-480", "journalVolume": "", "outCitations": [ "533d720a8542b707c316d39cf5beeb58738af86d", "3b621e9a6b99f32caa518116cb400035d1deed29", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "0571492ae2aa6df23ebbfc9f6e12ce6c0eb38845", "2a59eb5eacb88eb893a31fc8bdee2c4385e22d7a", "19554445f1f3ea7b54be06a74a0d0840ade02be5", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "48215f3287f7d527dc63c5b504ca8397d3bdef4e", "211f2beaaf36bb6a920a63dbbef6842cb1d22468", "1154b2fd6fb913b02eb6f64f5287a6b75a506e64", "32dc6016338a2098147e5edbb72c7c5670f78133", "73dd5dde28119e41dd0f0a07275b7f722c4619d2", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "0b95931f0a32d3f7dfd92ddee84ce577e935cea9", "1bed30d161683d279780aee34619f94a860fa973", "44f779d08d629e915ee7cbe1c0f4f17e9f6a626f", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "343a384d5476ead9496f96559aba5ad09e95e01e", "671958087f3c24e7b025019476be8918302270e2", "8bf5afa21a0bd74551b261a7399eac4ffe2494e5", "054be29f5016aa668fce1a3eee1be40a2c001f46", "daeff61502115efc4b9ee81607a8e5489215ea88", "8314d58a250867e083838d177a40946039903e7b", "1c32ad0a42109fab826eb3054df7cfc33b424125", "19de90c933c20849c85d5428c8a643210b97ec83", "18633256bb17ba0744518479c0752ca87f0d03c6", "8007305d525a0802f09002b7a5bca2bb3f23ed7d", "1c2aed008f7bcb769d0e6f8109434c766fb22bde", "417ab9b8b003982222017ef585e19680366609f3", "3000b16ee204ffed4c602ed6f93fc7a692850b6e", "c5a00bd4aca85ea79ee05326ec34efe5cda92510", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "e8b55466fd0563ad80e0534dc2ddf709b7f54dd8", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "5d3c30ae77994e16fb2ee486cff96d4c52bccfd9", "45f9391ba46daa4b119063f4d5077a1b7f7fde75" ], "paperAbstract": "With increasing deployment of virtual machines for cloud services and server applications, memory address translation overheads in virtualized environments have received great attention. In the radix-4 type of page tables used in x86 architectures, a TLB-miss necessitates up to 24 memory references for one guest to host translation. While dedicated page walk caches and such recent enhancements eliminate many of these memory references, our measurements on the Intel Skylake processors indicate that many programs in virtualized mode of execution still spend hundreds of cycles for translations that do not hit in the TLBs.\n This paper presents an innovative scheme to reduce the cost of address translations by using a very large Translation Lookaside Buffer that is part of memory, the POM-TLB. In the POM-TLB, only one access is required instead of up to 24 accesses required in commonly used 2D walks with radix-4 type of page tables. Even if many of the 24 accesses may hit in the page walk caches, the aggregated cost of the many hits plus the overhead of occasional misses from page walk caches still exceeds the cost of one access to the POM-TLB. Since the POM-TLB is part of the memory space, TLB entries (as opposed to multiple page table entries) can be cached in large L2 and L3 data caches, yielding significant benefits. Through detailed evaluation running SPEC, PARSEC and graph workloads, we demonstrate that the proposed POM-TLB improves performance by approximately 10% on average. The improvement is more than 16% for 5 of the benchmarks. It is further seen that a POM-TLB of 16MB size can eliminate nearly all TLB misses in 8-core systems.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080210" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c0cf6192ba294ee9a7a7edda864a7d0dad5ac35d", "sources": [ "DBLP" ], "title": "Rethinking TLB designs in virtualized environments: A very large part-of-memory TLB", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "c0f5e31b87541a09d192c716129b8155aabd8387": { "authors": [ { "ids": [ "3491345" ], "name": "Linghan Zhang" }, { "ids": [ "39553256" ], "name": "Sheng Tan" }, { "ids": [ "37302154" ], "name": "Jie Yang" } ], "doi": "10.1145/3133956.3133962", "doiUrl": "https://doi.org/10.1145/3133956.3133962", "entities": [ "Adversary (cryptography)", "Amiga Reflections", "Authentication", "Biometrics", "Enhanced entity\u2013relationship model", "Liveness", "Medical ultrasound", "Microphone", "Passphrase", "Password", "Replay attack", "Sampling (signal processing)", "Signal reflection", "Smartphone", "Speaker recognition" ], "id": "c0f5e31b87541a09d192c716129b8155aabd8387", "inCitations": [], "journalName": "", "journalPages": "57-71", "journalVolume": "", "outCitations": [ "16c7ae080b59625c3cdffcd0de95e588b1fbf546", "fbbfa8ba780727e16b8f2da9d3f7da7a85b2838b", "0b4d07005f9a8b406697353a29a9f7d79caf6f59", "439f7388292f5b8ab94af8653fdb2ee5972e898e", "5dcb58a65f746859e70be21c184910eacee3e827", "64a989daeb4a4ddfa712a663bbec55fab5f0d88a", "4de74a4c970a9f7492ee93e4b06a210c08d2bc08", "06580a9d9c86611f3ae6cac9cadd0389c64007c0", "465d3e6d53505a45c1ef0a7c43d939086f696801", "c4904f4882a479780018d2edafabbba56276bd52", "0eeae1d66cc0a9d34c3ff27850fe24fe94d7059f", "228f44f499ed5b1c40dde98737b969bc3e64311e", "4ff599f2f4a40f2f2504a0600d78d5ca7dc07232", "45ab0e45058a75694ae9a5a14d131d9aaf0df3d4", "adbcadc4704488a329c32e4b44cc6895da958fed", "85b56e4562f670f5e0f34b23d111095cac12e10b", "d82330aa6351c46ef8cb3f5946ae67c3b71adf97", "689655056c8afbc8e600ec537e696cda1e0b2a15", "e52024109bb1f31b5724b7dc35418d4e0869c780", "00ecab93857c258b58cac2950290dc1c96f69302", "3548aaf3ddd94eeec8e79e73a5d7dbea38e8b056", "c030c6d72c4bb170602120d6ef447564648f726d", "063bd27b8bc32b68e5df4765fc159fc8874975aa", "91ef663032c9045d9d5f26067919e650ab87db96", "aab604bdeb581da4f510b4465b974829c9ffd920", "e62ed9118d7858add92141d178a31a1150ab0175", "3bec80650c8580d4fe47399b896ef4c2cdb01e26", "cef35c99165528e58d69af5e935cb62d58612f22" ], "paperAbstract": "Voice biometrics is drawing increasing attention as it is a promising alternative to legacy passwords for mobile authentication. Recently, a growing body of work shows that voice biometrics is vulnerable to spoofing through replay attacks, where an adversary tries to spoof voice authentication systems by using a pre-recorded voice sample collected from a genuine user. In this work, we propose VoiceGesture, a liveness detection system for replay attack detection on smartphones. It detects a live user by leveraging both the unique articulatory gesture of the user when speaking a passphrase and the mobile audio hardware advances. Specifically, our system re-uses the smartphone as a Doppler radar, which transmits a high frequency acoustic sound from the built-in speaker and listens to the reflections at the microphone when a user speaks a passphrase. The signal reflections due to user's articulatory gesture result in Doppler shifts, which are then analyzed for live user detection. VoiceGesture is practical as it requires neither cumbersome operations nor additional hardware but a speaker and a microphone that are commonly available on smartphones. Our experimental evaluation with 21 participants and different types of phones shows that it achieves over 99% detection accuracy at around 1% Equal Error Rate (EER). Results also show that it is robust to different phone placements and is able to work with different sampling frequencies.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133962", "https://acmccs.github.io/papers/p57-zhangA.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c0f5e31b87541a09d192c716129b8155aabd8387", "sources": [ "DBLP" ], "title": "Hearing Your Voice is Not Enough: An Articulatory Gesture Based Liveness Detection for Voice Authentication", "venue": "CCS", "year": 2017 }, "c131f2b65169e3162e2d6430019bad81c7919ed5": { "authors": [ { "ids": [ "17804514" ], "name": "Wonbae Kim" }, { "ids": [ "1716765" ], "name": "Young-ri Choi" }, { "ids": [ "1739708" ], "name": "Beomseok Nam" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Authentication", "Computational complexity theory", "Curiously recurring template pattern", "Enterprise resource planning", "Initialization (programming)", "Overhead (computing)", "Overhead projector" ], "id": "c131f2b65169e3162e2d6430019bad81c7919ed5", "inCitations": [ "cf0d46fa6b061d20017554f76829b2e9e2cc883b", "17d77d5e2db5b9aaf54b8240f829b1d4f077df29" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "204-207", "journalVolume": "", "outCitations": [ "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "16139ba6fa6ad2828c20abdf5d9f34687836f932", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "47947ed7d4c12855b1b5a4c4ec3123528761d64b", "0541d5338adc48276b3b8cd3a141d799e2d40150", "036d544defb7f8e6297bd4c57a3b430d04a269e8" ], "paperAbstract": "We analyze YARN container overhead and present early results of reducing its overhead by dynamically adjusting the input split size. YARN is designed as a generic resource manager that decouples programming models from resource management infrastructures. We demonstrate that YARN’s generic design incurs significant overhead because each con- tainer must perform various initialization steps, including authentication. To reduce container overhead without changing the existing YARN framework significantly, we propose leverag- ing the input split, which is the logical representation of physical HDFS blocks. With input splits, we can combine multiple HDFS blocks and increase the input size of each container, thereby enabling a single map wave and reducing the number of containers and their initialization overhead. Experimental results shows that we can avoid recurring container overhead by selecting the right size for input splits and reducing the number of containers.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101140" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c131f2b65169e3162e2d6430019bad81c7919ed5", "sources": [ "DBLP" ], "title": "Mitigating YARN Container Overhead with Input Splits", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "c148372050e3580e4bdf9f0e13ee24c9d71112f8": { "authors": [ { "ids": [ "1728624" ], "name": "Wei Chen" }, { "ids": [ "1786877" ], "name": "Jia Rao" }, { "ids": [ "1718639" ], "name": "Xiaobo Zhou" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Best-effort delivery", "Big data", "Computer cluster", "Data center", "In-memory database", "Jumpstart Our Business Startups Act", "MapReduce", "Operating-system-level virtualization", "Preemption (computing)", "Quality of service", "Requirement", "Scheduling (computing)", "Synthetic data" ], "id": "c148372050e3580e4bdf9f0e13ee24c9d71112f8", "inCitations": [ "de580138291876803d259d195c4ce571f6572e3a" ], "journalName": "", "journalPages": "251-263", "journalVolume": "", "outCitations": [ "bbc2a698c2fb2b76e256cc51a9d7c37765ab51b6", "08632fe2b934ed15d3499e7321282c81adc2c390", "01b54e85d5b02ad6af205106739a409a105fee93", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "3000e77ed7282d9fb27216f3e862a3769119d89e", "835916e7ad1231d5aa2985340b0ee543cadbb5b6", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "47f0f577e9c2053d3dd3a101ff572fa8c6e21a69", "3a043714354fe498752b45e4cf429dbae0fb2558", "a43dfb040d60d0df3dbe66a52b920e05a1ac3083", "11b12a29a9efb60a892b48fc61e70ab63e59b37e", "1bf81b7244373f9c1aed0f25590e261087e67a85", "756d1f8f07a83f3cfc0edaa81493a9f109628e1b", "090599a2caf4591c87699ad850c75554cd712937", "0608d9937c074520cdc93cc444cc1c77039c5332", "9ee6209432316baf6776838917e06bca4d874747", "4c40ba88b4c895b5c4d94fd8024e87f3a6b2d602", "185b2081a3ff8156cc2562e6064cd1dacd593b6f", "188c0013d5f79072ee97f8a48190cbe54b2009b1", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "2988e34168fa91398fa397baf823af2063893e9c", "4bf59b4d21968de33020e78cd8f20306eac2c247", "11310368999afdce94bca4316eea38216b2446c5", "0d868efa67bf06b1f784d60769c082fd9a58893e", "293259880f015cb01a6aadd60c21e90c82eebcc0", "95e74ffd3ab865b6e4a63ee65483a657597d5457", "7a978f2902460e732c50c36a171deb11733df1fc", "9da1d06e9afe37b3692a102022f561e2b6b25eaf", "090030e0d1aa117008e9e9fa4abdee0a95455f4a", "3b7e2038ec22cf637df70c833d473b0f3b43713a", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "594710511ce2177ff7dbbc62fa75dbf14fc7ca26", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f" ], "paperAbstract": "Data centers are evolving to host heterogeneous workloads on shared clusters to reduce the operational cost and achieve higher resource utilization. However, it is challenging to schedule heterogeneous workloads with diverse resource requirements and QoS constraints. On the one hand, latency-critical jobs need to be scheduled as soon as they are submitted to avoid any queuing delays. On the other hand, best-effort long jobs should be allowed to occupy the cluster when there are idle resources to improve cluster utilization. The challenge lies in how to minimize the queuing delays of short jobs while maximizing cluster utilization. Existing solutions either forcibly kill long jobs to guarantee low latency for short jobs or disable preemption to optimize utilization. Hybrid approaches with resource reservations have been proposed but need to be tuned for specific workloads. In this paper, we propose and develop BIG-C, a container-based resource management framework for Big Data cluster computing. The key design is to leverage lightweight virtualization, a.k.a, containers to make tasks preemptable in cluster scheduling. We devise two types of preemption strategies: immediate and graceful preemptions and show their effectiveness and tradeoffs with loosely-coupled MapReduce workloads as well as iterative, in-memory Spark workloads. Based on the mechanisms for task preemption, we further develop a preemptive fair share cluster scheduler. We have implemented BIG-C in YARN. Our evaluation with synthetic and production workloads shows that low-latency and high utilization can be both attained when scheduling heterogeneous workloads on a contended cluster.", "pdfUrls": [ "https://www.usenix.org/sites/default/files/conference/protected-files/atc_slides_chen_wei_0.pdf", "https://www.usenix.org/system/files/conference/atc17/atc17-chen_wei.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/chen-wei" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c42e/4f44d489b2c2e6383a9bf7d9ec9907432ad0.pdf", "s2Url": "https://semanticscholar.org/paper/c148372050e3580e4bdf9f0e13ee24c9d71112f8", "sources": [ "DBLP" ], "title": "Preemptive, Low Latency Datacenter Scheduling via Lightweight Virtualization", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "c16c4fa113cd2c93f7557e05831039ed1436735a": { "authors": [ { "ids": [ "3152086" ], "name": "Shuangchen Li" }, { "ids": [ "3310968" ], "name": "Dimin Niu" }, { "ids": [ "2807420" ], "name": "Krishna T. Malladi" }, { "ids": [ "2946187" ], "name": "Hongzhong Zheng" }, { "ids": [ "38805656" ], "name": "Bob Brennan" }, { "ids": [ "27905006" ], "name": "Yuan Xie" } ], "doi": "10.1145/3123939.3123977", "doiUrl": "https://doi.org/10.1145/3123939.3123977", "entities": [ "Application-specific integrated circuit", "Artificial neural network", "Bandwidth (signal processing)", "Bitwise operation", "Convolutional neural network", "Dynamic random-access memory", "Functional completeness", "Graphics processing unit", "In-memory database", "Internet bottleneck", "Parallel computing", "Random-access memory", "Speedup" ], "id": "c16c4fa113cd2c93f7557e05831039ed1436735a", "inCitations": [ "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6" ], "journalName": "", "journalPages": "288-301", "journalVolume": "", "outCitations": [ "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "9788f37ff5ad7aee1f2f6d041f0580ad84b7f226", "5bc38d62a09e26105973662c420628810b597750", "167f78125336294e184773d1469f816944af7e11", "2394c6644efa856f0da160a0f0031d74cd3b5000", "85f5b05e7eba438f1fc4bc6e21cc1af00c424fbc", "bc3a52c29cb8755d0abab5b7d1f9c3e2dad2b38c", "0b99d677883883584d9a328f6f2d54738363997a", "03d55467b20e662fbaa8416e853f57c93834a9fb", "40870be6b7b471628dd1115e99eae48de8f6e114", "8c3b449ed5e0e32e1e1934176265cec8dbc2bb4f", "0015d8b6ec47ec2bc4bc0564a11e2f98a3971650", "55bc52bbec8972d62874bcbe169dac573b57d1df", "1f4d6c1d0a7191c677049444e05dc282d46a34e1", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "b08ece14885f61fac2482316b306888a3d6caed7", "9f1f065bf08cd90431cc051267a708f56436cd82", "2d1b2392585b09297dd79a14ca3fb853133d64e3", "97f37efade44dabfd25b467d594c843d56db875d", "1827de6fa9c9c1b3d647a9d707042e89cf94abf0", "318a5a4119b27ca433e037a0f1a23f609a2845d4", "41d044095628119bda85189d7b4e8acd7bd8d79e", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "09b8120cbc52e7df46122e8e608146289fddbdfa", "611cfde00067dc81bc1ce6bfe68ceef79b349e25", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "1ba6cd8053aaf95221b6c2b001a929a3665f4ea8", "a5bd15d203c6aa740aba16776b422db010e66b58", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "f088374812301ed93fbfe8c5b72bf1351c084c01", "0756d1e7ed9e0d20f0c6e7cfbebfc7153db8d3a1", "6435805ebe3abd7c02fae390edad37c1a5c7c5a6", "b6a8f2d4f99277f1b7bf3b7f08c61abec4687eb5", "bb117349638a1d63be1b105bba0e152bd6c031f8", "508997c8cca393a84ae490dc3d142fae15225d9b", "72be3a9006e226b6faf9161e789f4e34a974e80a", "99d80987446ecc7fb546826e7bccebb2fdc5fa12", "74624d8d89d192e89640ead9371e3a1766fd4ffe", "01299bf5dce79d85aaa0d938670a93ddeeda4d0e", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "7c6c7a97488fdbb7c06f85c345b348183bf0a704", "44e0a7d4f3039e6eeef8873362c5ab0bd6ef235c", "123ae35aa7d6838c817072032ce5615bb891652d", "1d082b779e530b0188a967ddf773697ebedf55b6", "3718b6ec16c8a5081215707d16df97f67c6301d3", "58462eed4e22d34d35d31be2b902b4eb18a231e0", "3687c3f91cb061219e817df716341a92e8bcff83", "4b5f67cba9a1f98a5390ed9cadcf018671c02c08", "63bc7beec90c98b54add6b8d5767c10e36caa667", "3364bc50921a9566d61ef8cb73baa82341725e4b", "1dec8f5106d11047aaaf126121110cbf890f17c3", "3174b10d5efe0987ba6940a4e66943c2c3cbb3eb", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "7458f8bfffecb1baf72e32590a1da5ca8ba923d5", "179f80848143cf109fa6aebae6c3844da03b062c", "6f93e0325e577f49f4bed46a2adcfee4a649dc83", "a1e0d801a0fd064a64810a0eb6291ab52e4a96c1", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "1e40d8b7ccac6afbfdf5c89f203f368735e051f9", "10b014e882764f5800ecdcbaba1fa08795d0c54d", "061356704ec86334dbbc073985375fe13cd39088", "8b053389eb8c18c61b84d7e59a95cb7e13f205b7", "f01bf24bef27ed92321860d30081eb9d08ab5c2f", "01fcae344d2edb715bcc63a40b6052c0331741bd", "37e49c57dd4d0849380d177222db53e52ff21347" ], "paperAbstract": "Data movement between the processing units and the memory in traditional von Neumann architecture is creating the \"memory wall\" problem. To bridge the gap, two approaches, the memory-rich processor (more on-chip memory) and the compute-capable memory (processing-in-memory) have been studied. However, the first one has strong computing capability but limited memory capacity/bandwidth, whereas the second one is the exact the opposite.\n To address the challenge, we propose DRISA, a <u>D</u>RAM-based <u>R</u>econfigurable <u>I</u>n-<u>S</u>itu <u>A</u>ccelerator architecture, to provide both powerful computing capability and large memory capacity/bandwidth. DRISA is primarily composed of DRAM memory arrays, in which every memory bitline can perform bitwise Boolean logic operations (such as NOR). DRISA can be reconfigured to compute various functions with the combination of the functionally complete Boolean logic operations and the proposed hierarchical internal data movement designs. We further optimize DRISA to achieve high performance by simultaneously activating multiple rows and sub-arrays to provide massive parallelism, unblocking the internal data movement bottlenecks, and optimizing activation latency and energy. We explore four design options and present a comprehensive case study to demonstrate significant acceleration of convolutional neural networks. The experimental results show that DRISA can achieve 8.8× speedup and 1.2× better energy efficiency compared with ASICs, and 7.7× speedup and 15× better energy efficiency over GPUs with integer operations.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123977" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c16c4fa113cd2c93f7557e05831039ed1436735a", "sources": [ "DBLP" ], "title": "DRISA: a DRAM-based reconfigurable in-situ accelerator", "venue": "MICRO", "year": 2017 }, "c23aaab5d4a9fb966703211356e8d19e9a63ad22": { "authors": [ { "ids": [ "3797471" ], "name": "William M. Mellette" }, { "ids": [ "39515847" ], "name": "Rob McGuinness" }, { "ids": [ "37504857" ], "name": "Arjun Roy" }, { "ids": [ "1927236" ], "name": "Alex Forencich" }, { "ids": [ "2259827" ], "name": "George Papen" }, { "ids": [ "2199298" ], "name": "Alex C. Snoeren" }, { "ids": [ "1892184" ], "name": "George Porter" } ], "doi": "10.1145/3098822.3098838", "doiUrl": "https://doi.org/10.1145/3098822.3098838", "entities": [ "Centralisation", "Control plane", "Crossbar switch", "Data center", "Distributed control system", "Fat tree", "Network planning and design", "Network switch", "Requirement", "Software deployment", "Testbed" ], "id": "c23aaab5d4a9fb966703211356e8d19e9a63ad22", "inCitations": [ "33e28ab30ce23a4abeedeae3f4213fcba80d1947", "e3bf088fa035fea66e7bf72cc3ce3afa9a5b521c" ], "journalName": "", "journalPages": "267-280", "journalVolume": "", "outCitations": [ "d1faed16b87535fe273aa0d68b537cd304d40bee", "acbea940374ec1acb2f070a14e76cd85acf313b2", "2d2d7e9c4075868151459adb9940ca455512cf03", "56abb48a526e875551b28b2e430feef241e0b437", "065c8bfcb45e8c342d26aa1855cf292f9a5cbeff", "764ace9519283e45664e490a6df581cb68b5250b", "14ad41324f149cb81f0e00db847b5f6e62da01aa", "6357bd31db46d2114ba6b4dc145e85d5a669a488", "19b304df6f13798a0745eeaf8f4573b202a43e5f", "943cf22e168a86fec0381ca380474c1da39e509c" ], "paperAbstract": "The ever-increasing bandwidth requirements of modern datacenters have led researchers to propose networks based upon optical circuit switches, but these proposals face significant deployment challenges. In particular, previous proposals dynamically configure circuit switches in response to changes in workload, requiring network-wide demand estimation, centralized circuit assignment, and tight time synchronization between various network elements--- resulting in a complex and unwieldy control plane. Moreover, limitations in the technologies underlying the individual circuit switches restrict both the rate at which they can be reconfigured and the scale of the network that can be constructed.\n We propose RotorNet, a circuit-based network design that addresses these two challenges. While RotorNet dynamically reconfigures its constituent circuit switches, it decouples switch configuration from traffic patterns, obviating the need for demand collection and admitting a fully decentralized control plane. At the physical layer, RotorNet relaxes the requirements on the underlying circuit switches---in particular by not requiring individual switches to implement a full crossbar---enabling them to scale to 1000s of ports. We show that RotorNet outperforms comparably priced Fat Tree topologies under a variety of workload conditions, including traces taken from two commercial datacenters. We also demonstrate a small-scale RotorNet operating in practice on an eight-node testbed.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-7-1-RotorNet.pdf", "http://cseweb.ucsd.edu/~gmporter/papers/sigcomm17-rotornet.pdf", "http://doi.acm.org/10.1145/3098822.3098838" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c23aaab5d4a9fb966703211356e8d19e9a63ad22", "sources": [ "DBLP" ], "title": "RotorNet: A Scalable, Low-complexity, Optical Datacenter Network", "venue": "SIGCOMM", "year": 2017 }, "c25e4858cda0027a5a8b5383b1fd0648a6e68119": { "authors": [ { "ids": [ "1749522" ], "name": "Gokarna Sharma" }, { "ids": [ "1725541" ], "name": "Ramachandran Vaidyanathan" }, { "ids": [ "2582238" ], "name": "Jerry L. Trahan" }, { "ids": [ "1932642" ], "name": "Costas Busch" }, { "ids": [ "39840790" ], "name": "Suresh Rai" } ], "doi": "10.1109/IPDPS.2017.51", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.51", "entities": [ "Algorithm", "Autonomous car", "Autonomous robot", "Latent class model", "Mobile robot", "Robot", "Visibility (geometry)" ], "id": "c25e4858cda0027a5a8b5383b1fd0648a6e68119", "inCitations": [ "4cec08b724bad0607de0903401fe640f0eef76be" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "513-522", "journalVolume": "", "outCitations": [ "2beabb0950823dbe5f9fddf546626a8c4acea2ed", "b7faa8135d1159d0356c0710ea267adf43852dab", "7ab9de59820edc16d34429ac2ec77c8ff60b1486", "09932fd31e24c867952a7ad4c0277ee9dbdc6da5", "2a62bd4483ea6249455ad969bc50b21795e0fe6d", "d349cfd12b4c108e8b40983bbe4552d875d3761a", "7670a03964a3d658b3e962fc085f5af0738d7d36", "3cf87d1cbb7dff05e0bd302bf83f7601310bcb37", "078a5c035a62f81b57105dd2e85c0a606a702709", "159e32e3bf9f0ddd5a5aa987a5d036345036556f", "75116d7c8d8c02eea1be9caf319f1e1f1ab6f715", "874381fff381d10cfbe2e795d6c543713798e228", "3db6d7915cf6487148244049b8b12c2d66fb9a27", "07065b5481b0ee7599965b03df3a4b6e4c990577", "38f69e92858fb51ece47c5a2de5390607829af79", "0b74247faf95970ad8a0614efc91ec9305a38446", "e9b1ae133ae34f8348c19c1c1066439eddbf9ab0", "3c089d795f5a8855ce187c7d151b632a663d619b", "25e33d2f1876e9a8393345b44960277acf2edee4", "5a75bdd5cea284804e220126603805a0e3a0710f" ], "paperAbstract": "We consider the distributed setting of N autonomous mobile robots that operate in Look-Compute-Move (LCM) cycles and communicate with other robots using colored lights (the robots with lights model). We study the fundamental problem of repositioning N autonomous robots on a plane sothat each robot is visible to all others (the Complete Visibility problem) on this model; a robot cannot see another robot if a third robot is positioned between them on the straight line connecting them. There exists an O(1) time, O(1) color algorithm for this problem in the semi-synchronous setting. In this paper, we provide the first O(log N) time, O(1) color algorithm for this problem in the asynchronous setting. This is a significant improvement over an O(N)-time translation of the semi-synchronous algorithm to the asynchronous setting. The proposed algorithm is collision-free - robots do not share positions and their paths do not cross.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.51" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c25e4858cda0027a5a8b5383b1fd0648a6e68119", "sources": [ "DBLP" ], "title": "O(log N)-Time Complete Visibility for Asynchronous Robots with Lights", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "c284f581d09cfba344fc234b70e560065980a26f": { "authors": [ { "ids": [ "2873256" ], "name": "Rujia Wang" }, { "ids": [ "1686367" ], "name": "Youtao Zhang" }, { "ids": [ "40243313" ], "name": "Jun Yang" } ], "doi": "10.1109/HPCA.2017.9", "doiUrl": "https://doi.org/10.1109/HPCA.2017.9", "entities": [ "Baseline (configuration management)", "Information leakage", "Memory bandwidth", "PATH (variable)", "Random-access memory", "Scheduling (computing)", "Server (computing)" ], "id": "c284f581d09cfba344fc234b70e560065980a26f", "inCitations": [ "fcf8efb59680ef79bcca894947aa46578d2bbd8c", "2c74b71b0ef24c20fc959c7bd82fa82097187327", "a6994ee043e174871983386d6a78a3f3be6c09da" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "325-336", "journalVolume": "", "outCitations": [ "8f8de213b1318e0ef0914008010e87ab64ab94ff", "5e7a7259528f032ae282347ff43a61c82bab5db1", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "548aaa1aafdf25ad10b7e8ea9dd4904803502979", "0ed6417bda30f909210a4bc65a7e100cc6dc7c8d", "283550fce0fdc0876db5df533625dffdfcd8d099", "8f1247646e29e07dddbec698f281d06cee87acbe", "52c2c050af5b32d4929b4b193967a3675d03aea0", "2b004e0484f4940fca341fa97ecb8ac94fe780a5", "20b63210954f7c5a70664f301dcd7196856ccfa7", "201213b124452451cd6f4f06bb94523aa861a60c", "24706cbf8c48414ed66db6fbf223c47452f7cbdc", "9837a70c231c0ef3d33c2c9f5b56afd40548acce", "076e9f5d5b3e813b0cfa5dd3e47f1b8591136bf2", "96ba6f5c06850c009e5b77094c0d4532744dedc2", "1924732c99bfdf5252b4220b7e5f98d744856661", "d2dfb3ab4c5579b398cab12932b8446bed2b8345", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "06bf0862b9b3465f895ef5bf3530cfe66a428e22", "85398d5f19157c91bf00da3d36210e72d57887e4", "47a7e2ac4ad74a45c6ff47d43c47ff6126573c8c", "19218913ef99ba9acd2491d8bab1d154cb375fa3", "92eaba06af12761b5c64b84e6028d21cd05af9dd", "19f7caf88ba1e30eb85bdab58b092e46b1a054c0", "304c2b0b6e8e1cc746117a14257dbc024d5135e9", "43dcd30e653b6a66efe18b78a9eed9c3bdeaaf23" ], "paperAbstract": "Path ORAM (Oblivious RAM) is a recently proposed ORAM protocol for preventing information leakage from memory access sequences. It receives wide adoption due to its simplicity, practical efficiency and asymptotic efficiency. However, Path ORAM has extremely large memory bandwidth demand, leading to severe memory competition in server settings, e.g., a server may service one application that uses Path ORAM and one or multiple applications that do not. While Path ORAM synchronously and intensively uses all memory channels, the non-secure applications often exhibit low access intensity and large channel level imbalance. Traditional memory scheduling schemes lead to wasted memory bandwidth to the system and large performance degradation to both types of applications. In this paper, we propose CP-ORAM, a Cooperative Path ORAM design, to effectively schedule the memory requests from both types of applications. CP-ORAM consists of three schemes: P-Path, R-Path, and W-Path. P-Path assigns and enforces scheduling priority for effective memory bandwidth sharing. R-Path maximizes bandwidth utilization by proactively scheduling read operations from the next Path ORAM access. W-Path mitigates contention on busy memory channels with write redirection. We evaluate CP-ORAM and compare it to the state-of-the-art. Our results show that CP-ORAM helps to achieve 20% performance improvement on average over the baseline Path ORAM for the secure application in a four-channel server setting.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.9", "http://people.cs.pitt.edu/~zhangyt/research/hpca2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c284f581d09cfba344fc234b70e560065980a26f", "sources": [ "DBLP" ], "title": "Cooperative Path-ORAM for Effective Memory Bandwidth Sharing in Server Settings", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "c2a25c7c61bda924c73fc25c83c9d6f0add65859": { "authors": [ { "ids": [ "32112132" ], "name": "Abhinandan S. Prasad" }, { "ids": [ "2029888" ], "name": "David Koll" }, { "ids": [ "32909486" ], "name": "Jesus Omana Iglesias" }, { "ids": [ "3334155" ], "name": "Jordi Arjona Aroca" }, { "ids": [ "2809994" ], "name": "Volker Hilt" }, { "ids": [ "1799074" ], "name": "Xiaoming Fu" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Connected component (graph theory)", "OpenVMS", "Operating-system-level virtualization", "Provisioning", "Queueing theory", "Requirement", "Scalability" ], "id": "c2a25c7c61bda924c73fc25c83c9d6f0add65859", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "42-53", "journalVolume": "", "outCitations": [ "0cc547cea26938e8c4165059ed0975cabec2c660", "4c9b5b3ec35b92357936efe9401110e37e2e046c", "0b02dcb2b508664f7d3df2365da19c7f8a3c4e1b", "110b17aede6e6a4fc8aeec50a54fe4dddc2c4779", "4581948531998d5e5f23c131081ea0cdd9066bfe", "586d29fb229cdb1a50b7e71268f22cf25524f534", "265be4efcca87268845c0a2a30422d14f127b607", "2e262644b605f1dce0b5a72b83879e4a41a4e5c6", "011e2584db840ef423fe50bfb94b394412587ce6", "6168919f450a8ed906051f2562abbfe51aa4d97d", "1884fc68add9f4a30ce491261266c21b8ce6a563", "3b7c5da3a3888be5818159f31fb50d1e382efa26", "9ab8c73169846af8178afed1b339e57acb36aa90", "0f2978ce872998f10cacb807abb4bfc4980c8c92", "a818086b1d93615d1e6bac0ed69fb68c07beee1b" ], "paperAbstract": "Virtualization helps to deploy the functionality of expensive and rigid hardware appliances on scalable virtual resources running on commodity servers. However, optimal resource provisioning for non-trivial services is still an open problem. While there have been efforts to answer the questions of when to provision additional resources in a running service, and how many resources are needed, the question of what should be provisioned has not been investigated, in particular, for complex applications or services, which consist of a set of connected components, where each component in turn potentially consists of multiple component instances (e.g., VMs or containers). Each instance of a component can be run in different flavors (i.e., number of cores or amount of memory), while the service constructed by the combination of these component configurations must satisfy the customer Service Level Objective (SLO). In this work, we offer to service providers an answer to the what to deploy question by introducing Rconf, a system that automatically chooses the optimal combination of component instances for non-trivial network services. In particular, we propose an analytical model based on robust queuing theory that is able to accurately model arbitrary components, and develop an algorithm that finds the combination of their instances, such that the overall utilization of the running instances is maximized while meeting SLO requirements.", "pdfUrls": [ "http://www.net.informatik.uni-goettingen.de/publications/2008/Abhi_CCGrid2017.pdf", "http://dl.acm.org/citation.cfm?id=3101119", "http://user.informatik.uni-goettingen.de/~dkoll/files/pubs/CCGrid17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c2a25c7c61bda924c73fc25c83c9d6f0add65859", "sources": [ "DBLP" ], "title": "Optimal Resource Configuration of Complex Services in the Cloud", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "c2c618d731c8cc74ad7f0882d19a6188b2bafb6f": { "authors": [ { "ids": [ "2160733" ], "name": "Derek Hower" }, { "ids": [ "3100338" ], "name": "Harold W. Cain" }, { "ids": [ "2642780" ], "name": "Carl A. Waldspurger" } ], "doi": "10.1109/HPCA.2017.33", "doiUrl": "https://doi.org/10.1109/HPCA.2017.33", "entities": [ "CAS latency", "Catastrophic interference", "Data center", "Interference (communication)", "Jumpstart Our Business Startups Act", "Memory bandwidth", "Memory controller", "Quality of service", "Requirement prioritization", "Total cost of ownership" ], "id": "c2c618d731c8cc74ad7f0882d19a6188b2bafb6f", "inCitations": [ "dae0a4ef50b347f145ed6de8f6c7fb94d350f937" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "505-516", "journalVolume": "", "outCitations": [ "3c0bc4e9d30719269b0048d4f36752ab964145dd", "3fbba3719b3e07084cbc85daf2a1a094c9335b6d", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "0b885bb186445ee0c50277d990eca18c53fef09b", "5706f5d4404fd41a2bad05eaf8962118ec928c90", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "1bf4b9197dac1a8878d8b2aaee3dbfcc9f89eea3", "26e72340c47b7348e1b1de285f89dd96cc925b27", "3000e77ed7282d9fb27216f3e862a3769119d89e", "e259a25e9b241618a55abe9962f6656c993ef094", "2960c89331eb7afa86584792e2e11dbf6a125820", "4dd69c412369b729aeb1e9aee37d3f41c5a20e14", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "6ea670c7deabcf9f0a516a5b89049f1febfbbe38", "17e49d6850b9fd3d888720fa23bde2194a7785b9", "3cb1c133cf24aac036f08c278713290dd5f7b5f6", "60de50417a31e293540992a3a52af6a2f62de7c2", "6e4460c73472d635159b515a1117397ad1ee2bbe", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "06545f48a6b25a3cafd76e514b2310254972888b", "5a9c6a51445a3f05ed1a204a9ff7fd366fd89c5b", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "26fe59f82dcfe629385a1882c8ea83883aa9fcc9", "eb1005ca3e8d5935418b86c12461588538bc108f", "08632fe2b934ed15d3499e7321282c81adc2c390", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "48710c82bea8283382f81fcdba540160a0b00e16", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "15e176fc33eff28d9379a689dbd90211841eb1b0", "48534b21548e3692ad7d866387f1dc7f543109e1", "1d31aceaa4fbf3a8afca3139675482b1cdf84495", "31b27a3b4ff89993eb92e8b1353edead8d5f2520", "40ccd404abbc52c306442fc7c396e50021d764e7", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "6d44790b6d952eff28f302998e8121f90786e3ff", "110c6e59991e2e9abe674f24c3a19c19488f034d", "327a02b19a60319cc35be860ad0259a5c1aef920", "75fc99cbe59005d4068c3253aa6d4e472721a6f6", "0f1181e2f58395f8f6d6f14707e4e44a489aaf3f", "2eb8a42529ff20d376be980e41bfd5d032a6abd4", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "635210aa01bd460f5dad80c5fffef8a0dfb4993e" ], "paperAbstract": "Higher integration lowers total cost of ownership (TCO) in the data center by reducing equipment cost and lowering energy consumption. However, higher integration also makes it difficult to achieve guaranteed quality of service (QoS) for shared resources. Unlike many other resources, memory bandwidth cannot be finely controlled by software in existing systems. As a result, many systems running critical, bandwidth-sensitive applications remain underutilized to protect against bandwidth interference. In this paper, we propose a novel hardware architecture allowing practical, software-controlled partitioning of memory bandwidth. Proportionally Allocated Bandwidth at the Source and Target (PABST) precisely controls the bandwidth of applications by throttling request rates at the source and prioritizes requests at the target. We show that PABST is work conserving, such that excess bandwidth beyond the requested allocation will not go unused. For applications sensitive to memory latency, we pair PABST with a simple priority scheme at the memory controller. We show that when combined, the system is able to lower TCO by providing performance isolation across a wide range of workloads, even when co-located with memory-intensive background jobs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.33" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c2c618d731c8cc74ad7f0882d19a6188b2bafb6f", "sources": [ "DBLP" ], "title": "PABST: Proportionally Allocated Bandwidth at the Source and Target", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "c3104518ac6d598e26efd1fbb7f6fde59c9527d8": { "authors": [ { "ids": [ "4089267" ], "name": "Kwan Hui Lim" }, { "ids": [ "7525580" ], "name": "Jeffrey Chan" }, { "ids": [ "2476111" ], "name": "Shanika Karunasekera" }, { "ids": [ "1688394" ], "name": "Christopher Leckie" } ], "doi": "10.1145/3077136.3080778", "doiUrl": "https://doi.org/10.1145/3077136.3080778", "entities": [ "Algorithm", "Baseline (configuration management)", "F1 score", "Flickr", "Personalization", "Recommender system" ], "id": "c3104518ac6d598e26efd1fbb7f6fde59c9527d8", "inCitations": [ "0029a9f54c78106298cba746db4f78fca2daf6d8", "37b98cd039c65b4d9795baa4bd5e4bbbfea4a513", "b7703ea0dab21473d223f8bc909afc5ca0777aed" ], "journalName": "", "journalPages": "325-334", "journalVolume": "", "outCitations": [ "5d89b6f31b41b2c52a8fd95657721eea3f66bc2c", "308e6dbbc4436e96453110897b97a9ae79b413a6", "22438e339d3855e0ef42eaeedf47d23b3864427c", "f310643a22ec50a74a64f6203932b9407215d964", "30d427966703eeef1b9f9599fcba17954288a8e0", "723b87aca073b7b82970a4580f64ba8c6c64691b", "0e2c4ad06ec462a961f195492941bc70afd560ae", "5becff7d8db7907df2b29b3e9a9c3b8cafe2caf7", "a331e03b10a17a130ada7ed65ebd6c21874b4a10", "4d7fe7577baeee91c267ee9f52cb0c567a6e2301", "b2c20a877a891ea97179658c06a6d552b50cba6e", "e003b48f6a8663f9ebbb91c45918046f9ab98265", "7e0c9ca20027212f7d66587abeb9bef2ab00b3c7", "26861e41e5b44774a2801e1cd76fd56126bbe257", "a292991f2c383a2d6e2207d0e5b5cd2add3a75c8", "6a5373d1d6c4abed8cde06e3421dff572d2b908a", "bdc0c46dcf17cff22e2f60d13a7ddbf769bb2f98", "6fcf9564edffcd2f6fa96a3ab4e35641890c6a8e", "0dfd1181f8b866a7f0b3e5f63574d7998c778be8", "0ea170e6155bb2363008286684bb8a87bca2a400", "37b98cd039c65b4d9795baa4bd5e4bbbfea4a513", "2ce4e06a9fe107ff29a34ed4a8771222cbaacc9c", "1f439e03bbdf80f9993dc646b6eb3b998342c37a", "7c72393febe25ef5ce2f5614a75a69e1ed0d9857", "3ad6bd5c34b0866019b54f5976d644326069cb3d", "0a519e9c18c9f4d0dd7f278b7eace9e6a4ed5c99", "ea37a88804bab0dfdaf0b7e489bf4ec60b5ed9e6", "d2e7d37dac6b9eb313bd7918f162485111608bea", "5194f75c94585901338e588837e9c3b1cee423bb", "0e80487ac65e3e8f77794e07e5a436e83f416f6c", "ce2277b1ffc7ba25dba1e8d430a22e4b7e6411a7", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e", "54505490fe6be8ea95753e9a708b0e3d2544f7e5", "730993e8a0e4a55462ef4a1ae2fce2958c7a6f12", "d5a39a446f3e08f91c6d66c6050d457a7474c994", "68db7962b63bb775575403e650870f53e418f062", "2cde5f9a70b8c5db1bdc93c6c05e0f4172b0c212", "b42dc2424b9524d68ba18ad6cc565c285caaad6b", "2d6790440f887a7e549be8693dcbd96622a4a993", "0180af2e55dabec222e90b1b8a3fe872da16cded", "55183002153769351634745ea04fefc829840b5b", "049fd41f971298ca0babd9a0977c181cb9cd4652", "02cc6a5944d57d2353a55639c7b77336b94f29b6", "e1e99de11af88f771548f1237f72876ee1dd8653" ], "paperAbstract": "Personalized itinerary recommendation is a complex and time-consuming problem, due to the need to recommend popular attractions that are aligned to the interest preferences of a tourist, and to plan these attraction visits as an itinerary that has to be completed within a specific time limit. Furthermore, many existing itinerary recommendation systems do not automatically determine and consider queuing times at attractions in the recommended itinerary, which varies based on the time of visit to the attraction, e.g., longer queuing times at peak hours. To solve these challenges, we propose the PersQ algorithm for recommending personalized itineraries that take into consideration attraction popularity, user interests and queuing times. We also implement a framework that utilizes geo-tagged photos to derive attraction popularity, user interests and queuing times, which PersQ uses to recommend personalized and queue-aware itineraries. We demonstrate the effectiveness of PersQ in the context of five major theme parks, based on a Flickr dataset spanning nine years. Experimental results show that PersQ outperforms various state-of-the-art baselines, in terms of various queuing-time related metrics, itinerary popularity, user interest alignment, recall, precision and F1-score.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080778" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c3104518ac6d598e26efd1fbb7f6fde59c9527d8", "sources": [ "DBLP" ], "title": "Personalized Itinerary Recommendation with Queuing Time Awareness", "venue": "SIGIR", "year": 2017 }, "c31a9721cbf58058cac64ae261f294e6e59b1d17": { "authors": [ { "ids": [ "37361540" ], "name": "Zheng Zhang" }, { "ids": [ "1718546" ], "name": "Dan Feng" }, { "ids": [ "2204183" ], "name": "Zhipeng Tan" }, { "ids": [ "3269044" ], "name": "Jianxi Chen" }, { "ids": [ "1720145" ], "name": "Wei Zhou" }, { "ids": [ "1690341" ], "name": "Laurence T. Yang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.72", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.72", "entities": [ "Algorithm", "Black box", "Byte", "Computer data storage", "Flash memory", "Non-volatile memory", "Paging", "Phase-change memory", "SWAP (instrument)", "Volatility", "Wear leveling" ], "id": "c31a9721cbf58058cac64ae261f294e6e59b1d17", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "555-562", "journalVolume": "", "outCitations": [ "16658c73d2f3afe9677d42c673796b451abbc996", "15b850c6a6f23f212a4c862c9104746782941754", "3d00b0df652bd30656c5c3031a07793bce2f3f1f", "08f2b0d5682806b83185f974a2856242ea525ae3", "3aff5fb3d1e23dfc0c45989f71b4aa99b3a5784b", "233997563379e02d37778f80c028a34209de5817", "425c117685a681c6c6de55e2928dc87066b53fbb", "034f67581b577f5c9af4bf18f044a87b5480b602", "038da34be9467d33c7def3b8a99b2e1cf0b02c80", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "6f45e84202ee1678772899d3473a0b5d5ee4d886", "0f3c23f95d2e1319c7f701fc4e55e51dda2fe1bc", "0ffbd9cd0fe4fa005fc9b6eea24ecf9bff67c806", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "ccd9685f9041a896d14dc095221b7673e6ddd121", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "700ceaf012da1bbe8c8a2ff96f91c98baf7f1505", "2fc6d7c5fd1e05a81cbeb666c08511e8ef327a8c", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "12a491d732add45d9cec8bc29c326d66019031f0", "03b6a916498fa8591201a2de5f22344609b1e457", "b14fd8541f13eb69ba712202bc46a67b2194cfa2", "5bb770af1973f929e8622f17ddf378d439245144", "77b4c48a619a13ab774834df2cccdf39e149717a" ], "paperAbstract": "Phase Change Memory (PCM) is considered as one of the most popular candidates to replace flash memory in mobile consumer systems. PCM has many superior performance characteristics, including non-volatility, byte-addressability, low access latency and power consumption. However, it also suffers from finite program counts like flash memory. Prior researches used PCM as a black box, and implemented the wear leveling schemes in device controller, which failed to utilize file attributes in host side and result in poor efficiency of wear evenness. In this paper, we propose a file aware wear leveling algorithm (called FAWL) for PCM-based storage system in mobile consumer electronics. FAWL is designed in the host side, which combines file attributes and statistical information of PCM. It exploits rich attributes of files to divide files into different categories and distribute them in suitable pages to avoid extra swap overhead. In addition, by utilizing an adjust management in FAWL, the wear imbalance can be greatly mitigated. Experimental results show that FAWL effectively improves the lifetime of PCM compared with existing wear leveling algorithms, including random swapping, start-gap and segment swapping.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.72" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c31a9721cbf58058cac64ae261f294e6e59b1d17", "sources": [ "DBLP" ], "title": "File Aware Wear Leveling for PCM-based Mobile Consumer Electronics", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "c3741b06bfbca83978507febea3d87b49bbb25a4": { "authors": [ { "ids": [ "34269118" ], "name": "Haoyu Wang" }, { "ids": [ "37217705" ], "name": "Haiying Shen" }, { "ids": [ "2656336" ], "name": "Guoxin Liu" } ], "doi": "10.1145/3087556.3087559", "doiUrl": "https://doi.org/10.1145/3087556.3087559", "entities": [ "Data center", "Ethernet hub", "Experiment", "Network congestion", "Server (computing)", "Simulation", "Swarm", "Swarm intelligence", "Throughput" ], "id": "c3741b06bfbca83978507febea3d87b49bbb25a4", "inCitations": [ "20b8bc79e08aa2f066c2568bf5c93600270968a1" ], "journalName": "", "journalPages": "217-226", "journalVolume": "", "outCitations": [ "0c83169bf4ebb29979bfe47708cb6b79b6e28755", "b23a55fd057103ba299a97dbdc210ef8ffa8f923", "39300a6bb64f813bd233343b840cb169d8d0527f", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "5f3f9223c5c9f896be099bc177929febad508407", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "206b20f225fc655dfac733b6f0bd8077ed86215e", "a990a973f578db34a604beb494b05e650a270a85", "6de80f14aa1aa717e3eafd73c269c50b2a7de390", "335d859423b6d2ba7d2283f30a14619687a20391", "43bcabcec7c2595c620cd6fd4c96f517ede80d4e", "ff636e89ac4a2452216ca85f8820d0a834821454", "1dbbfd2fdf7121f6ffd85841031d28283c5231d7", "7b5d40c43239945c5488c7e2b38a6aa82e46d053", "9678cf3b6a338581a01e0e92e0d9ec61dd0bc681", "d40c1e04c0f4a731da009a0ce22fbc23db497389", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "1376bd56c64639af4645625fd9755c83b2bf7cda", "327a02b19a60319cc35be860ad0259a5c1aef920", "663e064469ad91e6bda345d216504b4c868f537b", "340ed44fd4efcc2cd193bbce716ffa37df07ac3c", "2fca78e74863daa9ea77e065a263b1aece8797cd", "6560af8ca02fd6187bddb9dadd38ae863993a1c2", "aaf52a116479a0d4c8e9ab612fe9c4ff5516f3be", "0f6f717d198ab1b99a63814facaf2fceace6b0fe", "3bf64462fc3558ab7e9329d084a1af4cf0c87ebf", "783f81c4988147c94b85427ba7e9e372a23bf777", "2031a6decaf94ce41ac09fc355022429eeeb0e49", "5594c2ddde27f4262a53668ca9b09ad7a9453102", "9ee6209432316baf6776838917e06bca4d874747", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "72f0a90d8e22da6106c0925ad6e62d4d6242ea55", "9c196f82ed47a605b849083d089d7606691c6cba", "30ce9b53eaa730b4161226c2c8eaf95adb46add7" ], "paperAbstract": "In Web applications served by datacenter nowadays, the incast congestion at the front-end server seriously degrades the data request latency performance due to the vast data transmissions from a large number data servers for a data request in a short time. Previous incast congestion control methods usually consider the direct data transmissions from data servers to the front-end server, which makes it difficult to control the sending speed or adjust workloads due to the transient transmission of only a few data objects from each data server. In this paper, we propose a Swarm-based Incast Congestion Control (SICC) system. SICC forms all target data servers of one request in the same rack into a swarm. In each swarm, a data server (called hub) is selected to forward all data objects to the front-end server, so that the number of data servers concurrently connected to the front-end server is reduced, which avoids the incast congestion. Also, the continuous data transmission from hubs to the front-end server facilitates the development of other strategies to further control the incast congestion. To fully utilize the bandwidth, SICC uses a two-level data transmission speed control method to adjust the data transmission speeds of hubs. A query redirection method further reduces the request latency by balancing the transmission remaining times between hubs. Our experiments in simulation and on a real cluster demonstrate that SICC outperforms other incast control methods in improving throughput and reducing the data request latency.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087559" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c3741b06bfbca83978507febea3d87b49bbb25a4", "sources": [ "DBLP" ], "title": "Swarm-based Incast Congestion Control in Datacenters Serving Web Applications", "venue": "SPAA", "year": 2017 }, "c38d720b03ff03b82d5572c398ad7f3da6df18a4": { "authors": [ { "ids": [ "31577391" ], "name": "Matin Hosseini" }, { "ids": [ "2854134" ], "name": "Mohsen Amini Salehi" }, { "ids": [ "2979244" ], "name": "Raju Gottumukkala" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.62", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.62", "entities": [ "Baseline (configuration management)", "Closed-circuit television", "Crowdsourcing", "Digital video", "Display device", "Interactivity", "Job scheduler", "Overselling", "Real-time data", "Scheduling (computing)", "Simulation", "Smartphone", "Streaming media", "Video server" ], "id": "c38d720b03ff03b82d5572c398ad7f3da6df18a4", "inCitations": [ "e3a61c4233a0b7afa829b70ce49a541f997b83da" ], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "474-481", "journalVolume": "", "outCitations": [ "1759725a47c1076b11b0f6d31e0b8af313398481", "2813a242c323716c6c3d9bbc21e9753070e0ac1d", "4890aadb9ec8ef0a880d3606198159cb44c0a3b5", "7df7c4f8ddfe916e38bb9d2ff9e22b2a24dec486", "58a8026295647209a90d2a686282ca2fa6866bb7", "0d15e3ddce5438f40df2732c836c40ffcc6d1a38", "63e19bbe7f586957100ba73f745b1da24ac5473b", "aec8b2c9ad406a871f682cdc18b7b9670ac9c8e6", "4c2e5337ccce99d921a83dc048f3d02f92d01f3d", "2b0aa05feca0f0f6beefc272bf31228c450fd881", "30d365e363c265072343306f6208bfda5671d704", "9b00c3fa19371e9d3572c3da54078866b0738114", "ad17876696feebc4f43bbd1f159b569e042926b3", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "a14bab862fc6b3f4a4db18cb8953aa8227446142", "ef2e9b65b3014d7d64bd325e6060db28bf23ec9a", "087b01579a8278ee092dabb2f7c1335cebba9b69", "555f40653ac8428abf4208eb904299d9e2bf7613" ], "paperAbstract": "Public safety officials want to have maximum situational-awareness though real-time information, such as video content, for natural disaster management. The video content can be generated by surveillance cameras or crowd-sourced (e.g., using smart-phones) and live-streamed to the Incident Commander. Such video contents need to be processed to adapt the characteristics of the specialized multi-view display devices. When a disaster occurs, there is a surge in the number of videos streamed to the Incident Commander that oversubscribes the processing servers and the network load. Incident Commanders, however, need a smooth and uninterrupted viewing experience specifically for the important events of interest that can change over time. The challenge is how to enable the Incident Commander to interactively prioritize important video streams to receive them uninterruptedly while the system is oversubscribed. In such a system, normal video streams (i.e., non-prioritized ones) should not be interrupted at the expense of prioritization. To address this challenge, in this research, we propose a stream-priority aware resource allocation mechanism to enable interactive video prioritization without a major impact on the flow of non-prioritized video streams. The mechanism includes a method to select appropriate tasks from the arriving ones and a method to map the selected task to the appropriate video server. Our simulation results express that the percentage of normal and prioritized video streaming tasks that have completed on-time are improved, when compared with baseline scheduling methods.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c38d720b03ff03b82d5572c398ad7f3da6df18a4", "sources": [ "DBLP" ], "title": "Enabling Interactive Video Streaming for Public Safety Monitoring through Batch Scheduling", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "c4810a223438a7d5f6cdd915b210b44873637943": { "authors": [ { "ids": [ "3149455" ], "name": "Daniel Steinmetzer" }, { "ids": [ "1960810" ], "name": "Daniel Wegemer" }, { "ids": [ "39431178" ], "name": "Matthias Schulz" }, { "ids": [ "1712634" ], "name": "J\u00f6rg Widmer" }, { "ids": [ "1734492" ], "name": "Matthias Hollick" } ], "doi": "10.1145/3143361.3143384", "doiUrl": "https://doi.org/10.1145/3143361.3143384", "entities": [ "Algorithm", "Creational pattern", "Data rate units", "Firmware", "Pseudorandomness", "Router (computing)" ], "id": "c4810a223438a7d5f6cdd915b210b44873637943", "inCitations": [], "journalName": "", "journalPages": "414-425", "journalVolume": "", "outCitations": [ "6f07a10dfbd583fdda034c7d606e53148f162f2d", "06e8e428d6c1e36575657c6c4aeda65e4930ef4b", "58392cd42505bf2bc0675610188f6465bc20fd6f", "75c5977d20fff5a7b2aa158f0ca91e1a09a3d07a", "4e4b8c8f09e9de2c7eee2aecb748334e0dc44611", "b41f996a9680b5f86ee7a05d7180998afedcd78b", "1943466070019e48204ebbee0914d87ced4ba09a", "d010052fe2129a5c80febd560732a7119486352f", "616bcc8949f3cd70ccb929269ce2ebc3c6ca7a7c", "58fef4ffd2147ccf08ceaf423702a915f92cdb34", "1bc04cbbce54bc027b6147eb0a49189a2691a35c", "81ed14364300805954f948abd7f2df397df233bf", "b5b68e61a21470c2f22c58f4aa19a7bf3882079d", "050ddf2b66b806c8807c0d8e230e548588bf6c04", "de39c6deadb182cb7baf04f0d8a70ba6124726fd", "839d13983d55f3aeeb8e644447fd9a4b5665fc56", "aee5fc5c59139923eea0046d8df4474fd2ea92a6", "0fdfc3fc7d89b5acc2e8c83ee62436d394eb8040", "8dfbe2b20613e73a988bf2cf23e599f6b2dc1c87", "1ac257a741e3ddf53d20b3ff04dd01f9eb998928", "02da7de4dab6e16a2872d2c68fbc24cfa5e60a0c", "38c5687a368a79e53ce428c01be44f59609df475", "4e5d7f01f254a73a7f670d07aaed7c4f087ddb61", "69c2ef519015c8d2fe5ec49fe3ede11eff3f733e", "5eb8f31c11cdfc30eba3205c3249f64e7678979f", "9905d9e816d7106bed6496eb8a3ad90947342afe", "0b84fe423401115d747d21f32c57ef0dede1c6f0", "631ba9b7d64abf91b1c647969d9cf68681fd78a0", "12c8c3c5fa57f7d67cc536b2abfb4df5362d773d", "094030386a98cba2abbf2c34944892a63570cb8a", "56ee03a70a10d1a0dfcff621ae60aef76fc43ad7", "77de7ce124dc980994ba80dda626c814543bdd67", "d25fd08c13f9a1a991e306950e2c3ee866395a4d", "5c9f2dc4df03ced1cd41e9d342e461a2c6efd6d6" ], "paperAbstract": "Achieving data-rates of multiple Gbps in 60 GHz millimeter-wave (mm-wave) communication systems requires efficient beam-steering algorithms. To find the optimal steering direction on IEEE 802.11ad compatible devices, state-of-the-art approaches sweep through all predefined antenna sectors. Recently, much more efficient alternatives, such as compressive path tracking, have been proposed, which scale well even with arrays with thousands of antenna elements. However, such have not yet been integrated into consumer devices. In this work, we adapt compressive path tracking for sector selection in off-the-shelf IEEE 802.1 lad devices. In contrast to existing solutions, our compressive sector selection tolerates the imperfections of low-cost hardware, tracks beam directions in 3D and does not rely on pseudo-random beams. We implement our protocol on a commodity router, the TP-Link Talon AD7200, by modifying the sector sweep algorithm in the IEEE 802.11ad chip's firmware. In particular, we modify the firmware to obtain the signal strength of received frames and to select custom sectors. Using this extension, we precisely measure the device's sector patterns. We then select the best sector based on the measured patterns and sweep only through a subset of probing sectors. Our results demonstrate, that our protocol outperforms the existing sector sweep, increases stability, and speeds up the sector selection by factor 2.3.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143384" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c4810a223438a7d5f6cdd915b210b44873637943", "sources": [ "DBLP" ], "title": "Compressive Millimeter-Wave Sector Selection in Off-the-Shelf IEEE 802.1 lad Devices", "venue": "CoNEXT", "year": 2017 }, "c4c9dd4a91176aa700a4f2058d93fcc03f54ecc6": { "authors": [ { "ids": [ "38739230" ], "name": "Pooja Aggarwal" }, { "ids": [ "2550384" ], "name": "Smruti R. Sarangi" } ], "doi": "10.1109/HiPC.2017.00027", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00027", "entities": [ "64-bit computing", "Algorithm", "Blocking (computing)", "Central processing unit", "Compare-and-swap", "Concurrent data structure", "Data item", "Data structure", "Device driver", "Grams", "LL parser", "Linearizability", "Load-link/store-conditional", "Lock (computer science)", "Memory footprint", "Non-blocking algorithm", "Operating system", "Parallel computing", "Pointer (computer programming)", "Shared memory", "Warez" ], "id": "c4c9dd4a91176aa700a4f2058d93fcc03f54ecc6", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "162-171", "journalVolume": "", "outCitations": [ "319ff7eb23e4c62597eef7e9863168eac0e23c3e", "0fca220343f411c7dac67b1f5fc1bcf5790cc030", "01d6463bd6dc85f938edb6e75c96c997c8b06799", "42c7203465451ef24d160f71c47028f953bb3ce8", "82b70d5ca408734a9a05203ef6a5c67042ce7409", "057e6e1b6621e94dfa556d577e2d041046480345", "961eb67eb799aa73428207dfe2a888fa509c3265", "0fb344be25a51b9dfca5e5b3d918da5522f31acb", "4fcfa58bda82134cdf2982ea12e653da6b553f89", "0e422bd90c8be636358d4eb75f05276b361d19d4", "5e816f92a010b97de6f1e1c5083add4662a0613b", "a9091ef790788c5d252cad94dd6862adf457e073", "6f25f21580c8f3fbbeb260c32468c15108a0eea8", "835c0aa9082e7fd7937c802f64e0393e267e6496", "068d0b393db03678ea1d346ee01871e91e88c560", "1cdedeb9461bdebedc47c7a358769f85dd7683ea", "38611b424808954be2c1375da1a873b1e2487ace", "d4fbd23eab46426607249fd4b811e010eddb7199", "042f443418ff2ff98a1dccbf49df9fa258dab707", "ccaf130cad7d3a1727b8c4ade78af6578501ffb3", "cbe1e69cab8e56f8b93e6ff2dfbe86adea693c13", "52e3dc11bce0c4bd5697298a07e66260e0943943", "1d57967ea204defcc913a7d42999081605ef88a2", "423e09f57acbab758429841d2b12fa0682707f2b", "0ced2ecad932ec86aaa043f9b3ec0d9c6e88fbb5" ], "paperAbstract": "Parallel programming models and paradigms are increasingly becoming more expressive with a steady increase in the number of cores that can be placed on a single chip. Concurrent data structures for shared memory parallel pro- grams are now being used in operating systems, middle-ware, and device drivers. In such a shared memory model, processes communicate and synchronize by applying primitive operations on memory words. To implement concurrent data structures that are linearizable and possibly lock-free or wait-free, it is often necessary to add additional information to memory words in a data structure. This additional information can range from a single bit to multiple bits that typically represent thread ids, request ids, timestamps, and other application dependent fields. Since most processors can perform compare-And-Set (CAS) or load-link/store-conditional (LL/SC) operations on only 64 bits at a time, current approaches either use some bits in a memory word to pack additional information (packing), or use the bits to store a pointer to an object that contains additional information (redirection), and the original data item. The former approach restricts the number of bits for each additional field and this reduces the range of the field, and the latter approach is wasteful in terms of space. We propose a novel and universal method called a memory word expander in this paper. It caches information for a set of memory locations that need to be augmented with additional information. It supports traditional atomic get, set, and CAS operations, and tries to maintain state for a minimum number of entries. We experimentally demonstrate that it is possible to reduce the runtime memory footprint by 20-35% for algorithms that use redirection. For algorithms that use packing, the use of the EXPANDER can make them feasible. The performance overhead is within 2-13% for 32 threads. When we compare the performance of the EXPANDER based non-blocking algorithms with the version that uses locks, we have a performance gain of at least 10-100X.", "pdfUrls": [ "http://www.cse.iitd.ernet.in/~srsarangi/files/papers/expander.pdf", "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00027" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c4c9dd4a91176aa700a4f2058d93fcc03f54ecc6", "sources": [ "DBLP" ], "title": "Expander: Lock-Free Cache for a Concurrent Data Structure", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "c4fb3564ec0da0a2733183384f51d79507fa4401": { "authors": [ { "ids": [ "9278198" ], "name": "Sohan Lal" }, { "ids": [ "36602386" ], "name": "Jan Lucas" }, { "ids": [ "1717074" ], "name": "Ben H. H. Juurlink" } ], "doi": "10.1109/IPDPS.2017.101", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.101", "entities": [ "Central processing unit", "Computer memory", "Data compression", "Entropy encoding", "Graphics", "Graphics processing unit", "Huffman coding", "Lossy compression", "Memory bandwidth", "Shannon's source coding theorem", "Speedup" ], "id": "c4fb3564ec0da0a2733183384f51d79507fa4401", "inCitations": [ "999597f2a0a3d943f116a78505343a3b415f68d3" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1119-1128", "journalVolume": "", "outCitations": [ "527f4ab202dd87c968b0123e412f65046fdaa115", "00156e79606084497789662dfaf59c3b54a10722", "12bc20a1963859e9f76afb4b308b90ded1cff1fe", "61cd159f10181b8cbbc25750b4b718009d649982", "5ab45c82a811162dc04efc9eea60f9b22b1e5a11", "2d6f002477015469075954c6748a1a85af352c94", "53aceaf7ad3ebfcf03f2ac54778cb8ed2b9636f2", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "1f1a1f0cd075cef63083c8ec15321021dbff2cfc", "50b343dbec4c5ad3933c71bfe9f63b63db02636e", "240ffbd7eead669292472c857c822b1880463929", "309ad0357af7722a24192781340881390055a3db", "2b857a46e838855f17a9ef97d90c4e972040bb6b", "6c86a995c3454d888713e66948c0d09b1451f0c2", "4f739534a366799e170599d3ff3d65597f0118db", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "37b5850e3e75a3462f3991491ca26674925f233b", "32c8c7949a6efa2c114e482c830321428ee58d70", "0345b41c8a708048a9f1d27cea06b867b52eead7", "2c0dc8ea2b2dba866f54ec6b42a7ecc823e33997" ], "paperAbstract": "Modern Graphics Processing Units (GPUs) provide much higher off-chip memory bandwidth than CPUs, but many GPU applications are still limited by memory bandwidth. Unfortunately, off-chip memory bandwidth is growing slower than the number of cores and has become a performance bottleneck. Thus, optimizations of effective memory bandwidth play a significant role for scaling the performance of GPUs. Memory compression is a promising approach for improving memory bandwidth which can translate into higher performance and energy efficiency. However, compression is not free and its challenges need to be addressed, otherwise the benefits of compression may be offset by its overhead. We propose an entropy encoding based memory compression (E2MC) technique for GPUs, which is based on the well-known Huffman encoding. We study the feasibility of entropy encoding for GPUs and show that it achieves higher compression ratios than state-of-the-art GPU compression techniques. Furthermore, we address the key challenges of probability estimation, choosing an appropriate symbol length for encoding, and decompression with low latency. The average compression ratio of E2MC is 53% higher than the state of the art. This translates into an average speedup of 20% compared to no compression and 8% higher compared to the state of the art. Energy consumption and energy-delayproduct are reduced by 13% and 27%, respectively. Moreover, the compression ratio achieved by E2MC is close to the optimal compression ratio given by Shannon’s source coding theorem.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.101" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c4fb3564ec0da0a2733183384f51d79507fa4401", "sources": [ "DBLP" ], "title": "E^2MC: Entropy Encoding Based Memory Compression for GPUs", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "c521fc23ef1ed38cfaf6ee3d901186b13a69dfdc": { "authors": [ { "ids": [ "1950599" ], "name": "Meikel P\u00f6ss" }, { "ids": [ "1731210" ], "name": "Tilmann Rabl" }, { "ids": [ "1738552" ], "name": "Hans-Arno Jacobsen" } ], "doi": "10.1145/3127479.3128603", "doiUrl": "https://doi.org/10.1145/3127479.3128603", "entities": [ "Apache Hadoop", "Apache Hive", "Benchmark (computing)", "Big data", "Blend modes", "Computer cluster", "Database", "Decision support system", "IBM Tivoli Storage Productivity Center", "In-memory database", "MapReduce", "NoSQL", "Open-source software", "Presto", "SAP HANA", "SHARK", "SQL", "Solution stack", "Technical standard", "The Void (virtual reality)", "Vortex", "Web 2.0" ], "id": "c521fc23ef1ed38cfaf6ee3d901186b13a69dfdc", "inCitations": [ "9fa2bbd621dd086731efa7a9cf9cca327ce21048" ], "journalName": "", "journalPages": "573-585", "journalVolume": "", "outCitations": [ "a776115d6567d38ed345c8c93fb23c7ff335cb1a", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "06fb8f5e59bac8b014e7ab70851b3568ea5a0a46", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "7081e3c86f5ee2f38c412386993e8361d1e42c00", "660aa29ca30b1e73f7a85abd97496435b76e0e8d", "5ee696d6a98d6dcb9936f8f49f34f637080dc4cb", "0541d5338adc48276b3b8cd3a141d799e2d40150", "0d2c4723e9e5925cde74bd879611fda6f6e3980b", "133eacaf0ad25b8364cb4510007d9363298e8adf", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "9ee6209432316baf6776838917e06bca4d874747", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "a718b85520bea702533ca9a5954c33576fd162b0", "c071ef3f2ef2976ec8b5a137f63f85db2da00774", "947c6bf534ccd620044f77c3bd6068f633b421fb", "51f8b2d9898d11a8488c185b57e447ab79e7e24e", "231b2da6a3d29f2632d3c1ad1d3ec3f2fb6737d3" ], "paperAbstract": "The advent of Web 2.0 companies, such as Facebook, Google, and Amazon with their insatiable appetite for vast amounts of structured, semi-structured, and unstructured data, triggered the development of Hadoop and related tools, e.g., YARN, MapReduce, and Pig, as well as NoSQL databases. These tools form an open source software stack to support the processing of large and diverse data sets on clustered systems to perform decision support tasks. Recently, SQL is resurrecting in many of these solutions, e.g., Hive, Stinger, Impala, Shark, and Presto. At the same time, RDBMS vendors are adding Hadoop support into their SQL engines, e.g., IBM's Big SQL, Actian's Vortex, Oracle's Big Data SQL, and SAP's HANA. Because there was no industry standard benchmark that could measure the performance of SQL-based big data solutions, marketing claims were mostly based on \"cherry picked\" subsets of the TPC-DS benchmark to suit individual companies strengths, while blending out their weaknesses. In this paper, we present and analyze our work on modifying TPC-DS to fill the void for an industry standard benchmark that is able to measure the performance of SQL-based big data solutions. The new benchmark was ratified by the TPC in early 2016. To show the significance of the new benchmark, we analyze performance data obtained on four different systems running big data, traditional RDBMS, and columnar in-memory architectures.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3128603" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c521fc23ef1ed38cfaf6ee3d901186b13a69dfdc", "sources": [ "DBLP" ], "title": "Analysis of TPC-DS: the first standard benchmark for SQL-based big data systems", "venue": "SoCC", "year": 2017 }, "c5399265946e67fff2e3806f4ecaf474333d7fe1": { "authors": [ { "ids": [ "1732624" ], "name": "Elena Kakoulli" }, { "ids": [ "2013116" ], "name": "Herodotos Herodotou" } ], "doi": "10.1145/3035918.3064023", "doiUrl": "https://doi.org/10.1145/3035918.3064023", "entities": [ "Apache Hadoop", "Clustered file system", "Computer cluster", "Computer data storage", "Data-intensive computing", "Distributed File System (Microsoft)", "Expectation\u2013maximization algorithm", "Fault tolerance", "Load balancing (computing)", "Multi-objective optimization", "Network-attached storage", "Program optimization", "Requirement", "SPARK", "Throughput", "Web storage" ], "id": "c5399265946e67fff2e3806f4ecaf474333d7fe1", "inCitations": [], "journalName": "", "journalPages": "65-78", "journalVolume": "", "outCitations": [ "5f3f9223c5c9f896be099bc177929febad508407", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "6cbcd4239345787caff1884bf8029acfac87354f", "2da760f90c3d2bf6598becdde9063093f488548c", "bd80556653a915f53b932ad13189b9fa10453436", "0558c94a094158ecd64f0d5014d3d9668054fb97", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "2f6af58c7905fb8367652fe62fbb1f6ec7e28be0", "332f77fd05703c1607e3b57884ad31fb1fad0104", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "059eb34d95c73e37dca8e35b0ac5a2fb0142f3ee", "0d2f3f6abd86368a2eb9a6d0b37d1299ec5939a6", "4124e17cf8c45c4fd9eb7d6ba3ce807f52c5645d", "3c454dd92dd3a2736f78e97e1f1cb671c472f97d", "130ee77295f5f95e9c8a45c9c56bbc650258dba0", "2cdcddb08ae6060e94cba6c9b2b58b87324e686f", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "f19870a1b4847ca61beed722d557a50189479d27", "0541d5338adc48276b3b8cd3a141d799e2d40150", "9ee6209432316baf6776838917e06bca4d874747", "26c4c1dd27fdb449fe0267eac595930766917878", "87b5b79b89e5d7c4ea3f7cfaffe23350dca1c891", "3033df10e73f3061d21e58de6c141383815c0420", "b0b2f180faa09e7bfcb6bb8e57288c3b61f11116", "3358850706a8ad2eb8489bb7790e8bbd3a5b6dba", "18c241f9333c5b6590184b0d5f218b3ccd51a605", "00fc19647ca1aa9a3910abe7cf3414af2f811e64", "b7014a268c35e377366634d6b8370a8a7db285a5", "0a5e20769fc8306a24d4426278493a999438dd42" ], "paperAbstract": "The ever-growing data storage and I/O demands of modern large-scale data analytics are challenging the current distributed storage systems. A promising trend is to exploit the recent improvements in memory, storage media, and networks for sustaining high performance and low cost. While past work explores using memory or SSDs as local storage or combine local with network-attached storage in cluster computing, this work focuses on managing multiple storage tiers in a distributed setting. We present OctopusFS, a novel distributed file system that is aware of heterogeneous storage media (e.g., memory, SSDs, HDDs, NAS) with different capacities and performance characteristics. The system offers a variety of pluggable policies for automating data management across the storage tiers and cluster nodes. The policies employ multi-objective optimization techniques for making intelligent data management decisions based on the requirements of fault tolerance, data and load balancing, and throughput maximization. At the same time, the storage media are explicitly exposed to users and applications, allowing them to choose the distribution and placement of replicas in the cluster based on their own performance and fault tolerance requirements. Our extensive evaluation shows the immediate benefits of using OctopusFS with data-intensive processing systems, such as Hadoop and Spark, in terms of both increased performance and better cluster utilization.", "pdfUrls": [ "http://doi.acm.org/10.1145/3035918.3064023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c5399265946e67fff2e3806f4ecaf474333d7fe1", "sources": [ "DBLP" ], "title": "OctopusFS: A Distributed File System with Tiered Storage Management", "venue": "SIGMOD Conference", "year": 2017 }, "c6204b1d27738930494ec31cd8073554dac59a70": { "authors": [ { "ids": [ "12526150" ], "name": "Azeem Aqil" }, { "ids": [ "9532792" ], "name": "Karim Khalil" }, { "ids": [ "1805144" ], "name": "Ahmed Osama Fathy Atya" }, { "ids": [ "3000659" ], "name": "Evangelos E. Papalexakis" }, { "ids": [ "38774813" ], "name": "Srikanth V. Krishnamurthy" }, { "ids": [ "1699210" ], "name": "Trent Jaeger" }, { "ids": [ "1706474" ], "name": "K. K. Ramakrishnan" }, { "ids": [ "4179522" ], "name": "Paul Yu" }, { "ids": [ "1703726" ], "name": "Ananthram Swami" } ], "doi": "10.1145/3143361.3143399", "doiUrl": "https://doi.org/10.1145/3143361.3143399", "entities": [ "Data breach", "Identifier", "Inference engine", "Intrusion detection system", "Network packet", "Network performance", "Software-defined networking", "Span and div", "Storage area network", "Testbed" ], "id": "c6204b1d27738930494ec31cd8073554dac59a70", "inCitations": [], "journalName": "", "journalPages": "134-146", "journalVolume": "", "outCitations": [ "0c484c5d9e0a13c2bf195c102e2908928c94d9a3", "2b19ed550e24d44c43dee558a062d33d62a52130", "140ef45a955ed132122b07fe59d5c40026ab39eb", "191cef5d1d84b81c8eb77119c7029fa74d23d9bc", "1b2dbf8b3095d9dfda0c03db156f19ed6dbe1764", "525ac8da0afe2ba149b0e26c07bf9da1fa62650d", "49bb05461318b24279b60666f3cb9a5f8cd68e56", "7efe0dc4cde074bd87089491a6f95dde84397cca", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "45b70bd4e1321359cb2fba8e56be7e2151976362", "9d88a4c2a971545c3546be1b16cf030ed5781947", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "318608cbfc2a786f116e1c671df50603e8bdf6b9", "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "2f983533f6dd084b0851652b5991fd6464706ff4", "8f158e59875886b6aa1ca7b629c286158c055ce5", "6adc921e32dcb8d6b01f02f47e2b5e8f243c2ebb", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "3591e02811f3efe3ad4a8272725993958ab6f9f6", "2167e9ef397ed5af69285a9e9018e9b373969d8d", "065ffd76b77d59b2bcbf9f71df4ff27e0adc613b", "8112c4305b88d85199267e9e03d3a0aca4432059", "271841937e574fbb4fed829ed59b0d3ed7f322ce", "0cfe1b85e5f1d56e41a95c3b2fa274e9fe8b45d0", "e7c257223f5899f11d23f283d45a702b25e4cfa1", "824bfd0c23602d0ea3618f4fbf77af1d023938ec", "02d843e3a008e76cf6a4c23bd01023d264b05686", "07769f0c29e21aac81857616435b846a669b29bf", "0f318aa5af40450af9ba2f50872bdf26741e510a", "d131c0813e4ee72e3449236ccf4faa4a014c67dc", "42b1400e01b976b8e9bfa7b772ec41338dc32ed3", "6e901357a09169cde6b865e0785c045d7298d918", "1164ec0b8d2bd8e95b9fc07e9669ff9d4d379c64" ], "paperAbstract": "We have recently seen an increasing number of attacks that are distributed, and span an entire wide area network (WAN). Today, typically, intrusion detection systems (IDSs) are deployed at enterprise scale and cannot handle attacks that cover a WAN. Moreover, such IDSs are implemented at a single entity that expects to look at all packets to determine an intrusion. Transferring copies of raw packets to centralized engines for analysis in a WAN can significantly impact both network performance and detection accuracy. In this paper, we propose Jaal, a framework for achieving accurate network intrusion detection at scale. The key idea in Jaal is to monitor traffic and construct in-network packet summaries. The summaries are then processed centrally to detect attacks with high accuracy. The main challenges that we address are (a) creating summaries that are concise, but sufficient to draw highly accurate inferences and (b) transforming traditional IDS rules to handle summaries instead of raw packets. We implement Jaal on a large scale SDN testbed. We show that on average Jaal yields a detection accuracy of about 98%, which is the highest reported for ISP scale network intrusion detection. At the same time, the overhead associated with transferring summaries to the central inference engine is only about 35% of what is consumed if raw packets are transferred.", "pdfUrls": [ "http://www.cs.ucr.edu/~krish/jaal.pdf", "http://doi.acm.org/10.1145/3143361.3143399" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c6204b1d27738930494ec31cd8073554dac59a70", "sources": [ "DBLP" ], "title": "Jaal: Towards Network Intrusion Detection at ISP Scale", "venue": "CoNEXT", "year": 2017 }, "c63f67249790de5c28525f335633439f98e5b707": { "authors": [ { "ids": [ "2644997" ], "name": "Yanwen Xie" }, { "ids": [ "1718546" ], "name": "Dan Feng" }, { "ids": [ "1713374" ], "name": "Fang Wang" } ], "doi": "10.1109/ICPP.2017.32", "doiUrl": "https://doi.org/10.1109/ICPP.2017.32", "entities": [ "Apache Hadoop", "Clustered file system", "Data striping", "Encoder", "Erasure code", "RAID", "Scalability", "Simulation" ], "id": "c63f67249790de5c28525f335633439f98e5b707", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "231-240", "journalVolume": "", "outCitations": [ "861b57b3f1245014970f23a36b39a64d33f96097", "2da760f90c3d2bf6598becdde9063093f488548c", "09af98e0978fddd4f69d9e9893bb6754ede9d98d", "2999a40e21f47fdf7180505c8389a0b07017b649", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "837f1004cb74156f6ff2cea1a136d354e4c537e8", "3f4297d2e5ae196f30f9a18dcd8400dfca5d2e47", "33e512bec260b88278d05ddcb6cb8549ca82f8dc", "6ee44bf140a6d4bcccaeea0340592eae6dffc63e", "5f3f9223c5c9f896be099bc177929febad508407", "1aefeed6a487431dc1c1137d8b18ad299b328d73", "3168681722207c86827e596860115a2977ce761f", "29f3f5918946bf0a4d75bf5244f993847d03e26c", "20a44558eed182a971f7add68ecc5931fbca2a65", "3db5c29024481b22c07ca76d3493183de9865575", "c36ac2cf18d597b48df38521af19fd32da918a92", "308e3605f9b7a3a7bb6c61a8ab0b90603735d945", "b7014a268c35e377366634d6b8370a8a7db285a5" ], "paperAbstract": "Modern distributed storage systems often store redundant data in multiple replications or erasure coding according to their access frequencies. Multiple replications scheme is well-performance for hot data while erasure coding scheme is storage-efficient for warm and cold data. When hot data turn cold, an encoding procedure starts to do the conversion. However, due to sequential striping, current conversion methods do not perform well for different data layouts, and cause risky blocks and expensive network consumption.In this paper, we propose Sice, a new encoder which deploys non-sequential striping. It constructs non-sequential stripes according to the data layout, performs conversion quickly with low overheads and ends to no reduction of system reliability. The results of both simulation and evaluation show that Sice gains almost the same good performance for different data layouts and has a great scalability. Sice helps HDFS-RAID reduce network consumption by about 65% and reduce influence on concurrent I/O-intensive applications by about 60%.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.32" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c63f67249790de5c28525f335633439f98e5b707", "sources": [ "DBLP" ], "title": "Non-Sequential Striping for Distributed Storage Systems with Different Redundancy Schemes", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "c65e4da776e144df39180df3324c92163c407b15": { "authors": [ { "ids": [ "35511217" ], "name": "Sireyya Emre Kurt" }, { "ids": [ "35563079" ], "name": "Vineeth Thumma" }, { "ids": [ "39173720" ], "name": "Changwan Hong" }, { "ids": [ "2342667" ], "name": "Aravind Sukumaran-Rajam" }, { "ids": [ "1750948" ], "name": "P. Sadayappan" } ], "doi": "10.1109/HiPC.2017.00040", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00040", "entities": [ "Graphics processing unit", "Hardware performance counter", "Matrix multiplication", "Requirement", "Sparse matrix" ], "id": "c65e4da776e144df39180df3324c92163c407b15", "inCitations": [ "58d93090d4d1b7b028f29a92f01833429f9938b0" ], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "283-293", "journalVolume": "", "outCitations": [ "09b11dd581fd9d00c3a55d4a49f83660bd7c3d9a", "601911f388ba3a8b5d666b31afc61aa6dfd1d433", "65883bf4e5267467a5db1547976153f820b71fb4", "015a97b794713b9d3ecd2a419964acc7679d2c30", "d6c4c76076efecb15655274adc648af8a445ed3a", "26e95419a1c2f4953b7d1ef6f9a0fcf03fa295ec", "3900598e45f2b5fea25d10242a4e33da9696b214", "af2f3729c333e2d0571751afcced589692c167f4", "8a329ee1e058295d51da149c9e61be957dacff4b", "092217c2267f6e0673590aa151d811e579ff7760", "3e69317455f7db9b1325239c6f6f52cbe29a5491", "5f491a183c71b0322b16e4f5dc69538c50db79e0", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "cbb557ccf729e043cbb9ef6dd709bf75c9f64cdd", "2e0ee957a4dea94ed706028d4206e0c2fe369de7", "0987c923a5fb79934ff47ecdafdf858a51143750", "4beb4761e026ef008b6c4311fdf32a190c8d9d11", "b2da7e41dda6896444d8444b745f9c050a67dc4c", "04b46e1c535f2e3d1b743fc9571bb856be7e79af" ], "paperAbstract": "Tight data movement lower bounds are known for dense matrix-vector multiplication and dense matrix-matrix multiplication and practical implementations exist on GPUs that achieve performance quite close to the roofline bounds based on operational intensity. For large dense matrices, matrix-vector multiplication is bandwidth-limited and its performance is significantly lower than matrix-matrix multiplication. However, in contrast, the performance of sparse matrix-matrix multiplication (SpGEMM) is generally much lower than that of sparse matrix-vector multiplication (SpMV). In this paper, we use a combination of lower-bounds and upper-bounds analysis of data movement requirements, as well as hardware counter based measurements to gain insights into the performance limitations of existing implementations for SpGEMM on GPUs. The analysis motivates the development of an adaptive work distribution strategy among threads and results in performance enhancement for SpGEMM code on GPUs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00040" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c65e4da776e144df39180df3324c92163c407b15", "sources": [ "DBLP" ], "title": "Characterization of Data Movement Requirements for Sparse Matrix Computations on GPUs", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "c6623245523f9a3809ee63eb98bbe084d987e12b": { "authors": [ { "ids": [ "2407825" ], "name": "Sreeram Potluri" }, { "ids": [ "33699159" ], "name": "Anshuman Goswami" }, { "ids": [ "2428889" ], "name": "Davide Rossetti" }, { "ids": [ "32234128" ], "name": "C. J. Newburn" }, { "ids": [ "3135311" ], "name": "Manjunath Gorentla Venkata" }, { "ids": [ "1794961" ], "name": "Neena Imam" } ], "doi": "10.1109/HiPC.2017.00037", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00037", "entities": [ "Application programming interface", "Bluetooth", "CUDA", "Central processing unit", "Computation", "Computer cluster", "Correctness (computer science)", "GeForce", "Graphics processing unit", "Imperative programming", "InfiniBand", "NVLink", "PCI Express", "Pascal (microarchitecture)", "Performance per watt", "Remote direct memory access", "Throughput" ], "id": "c6623245523f9a3809ee63eb98bbe084d987e12b", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "253-262", "journalVolume": "", "outCitations": [ "5048b1199db383beda869e742691c22ca15e1d56", "497fc8616563777046ecc89c85771b2ab446a518", "0451388ae4562c2833dd5e39f897208c9c1d7bb9", "49c866296354ab54c42d234645cf0700ff4a7315", "3e6f5b5e8b7cb5408da8cd10d0cc625b00910291", "6348bb3b140c47ea29621d1dc5218db52433840b", "9fb2c0811b9099829ff3ba91cb533378701139ab", "a4bb437b1452d4a2b513c288bacee071d9050c88" ], "paperAbstract": "GPUs have become an essential component for building compute clusters with high compute density and high performance per watt. As such clusters scale to have 1000s of GPUs, efficiently moving data between the GPUs becomes imperative to get maximum performance. NVSHMEM is an implementation of the OpenSHMEM standard for NVIDIA GPU clusters which allows communication to be issued from inside GPU kernels. In earlier work, we have shown how NVSHMEM can be used to achieve better application performance on GPUs connected through PCIe or NVLink. As part of this effort, we implement IB verbs for Mellanox InfiniBand adapters in CUDA. We evaluate different design alternatives, taking into consideration the relaxed memory model, automatic memory access coalescing and thread hierarchy on the GPU. We also consider correctness issues that arise in these designs. We take advantage of these designs transparently or through API extensions in NVSHMEM. With micro-benchmarks, we show that a Nvidia Pascal P100 GPU is able saturate the network bandwidth using only one or two of its 56 available streaming multiprocessors (SM). On a single GPU using a single IB EDR adapter, we achieve a throughput of around 90 million messages per second. In addition, we implement a 2dstencil application kernel using NVSHMEM and compare its performance with a CUDA-aware MPI-based implementation that uses GPUDirect RDMA. Speedups in the range of 23% to 42% are seen for input sizes large enough to fill the occupancy of Nvidia Pascal P100 GPUs on 2 to 4 nodes indicating that there are gains to be had by eliminating the CPU from the communication path when all computation runs on the GPU.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00037" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c6623245523f9a3809ee63eb98bbe084d987e12b", "sources": [ "DBLP" ], "title": "GPU-Centric Communication on NVIDIA GPU Clusters with InfiniBand: A Case Study with OpenSHMEM", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "c66c1e49e9dbbb52a45850d84c60168b44b4faa7": { "authors": [ { "ids": [ "2275943" ], "name": "Aravind Machiry" }, { "ids": [ "1709440" ], "name": "Eric Gustafson" }, { "ids": [ "2128455" ], "name": "Chad Spensky" }, { "ids": [ "3425110" ], "name": "Christopher Salls" }, { "ids": [ "12874147" ], "name": "Nick Stephens" }, { "ids": [ "8199136" ], "name": "Ruoyu Wang" }, { "ids": [ "1741440" ], "name": "Antonio Bianchi" }, { "ids": [ "1711909" ], "name": "Yung Ryn Choe" }, { "ids": [ "1715189" ], "name": "Christopher Kr\u00fcgel" }, { "ids": [ "1711242" ], "name": "Giovanni Vigna" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Computer", "Cryptography", "Kernel (operating system)", "Linux", "Linux", "Memory address", "Mobile phone", "Operating system", "Patch (computing)", "Software bug", "Trusted Execution Technology", "Trusted execution environment", "User space", "Vulnerability (computing)" ], "id": "c66c1e49e9dbbb52a45850d84c60168b44b4faa7", "inCitations": [ "a1c97189ee5ff915158f0a82d29838e55007eeaa", "6934305a246cc0b5776dcdea2030584eb7a0f274", "897c1b53f4b01c4203d336b3a981230200ea3fa2", "69ada7bb8d3284ff3eba80fcf39453633475dfee", "4a727288433c680afbfa12ac798d3c687b91501c" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "0c97dfc049282a97222999ee4ddfa216a72c74a1", "44aaefa65d7c36cc734572f177a4dca87a5a1e0c", "a8432f8160dc899e66976b9887efa1d4a544cd56", "42837d8944952c26fc6930a5ec79219d4b408fc8", "2cef6527284b58e5820748e72191cc4b9d6e1112", "67a281a8cab0053ae725214e766fef1f01a08c03", "2fafad1553f320615034ef985bbc3378033de73c", "07da538323a72ecc2e95a86a9de72482a530f17e", "5493f512ba418c21f1ce20e20985157f7509007c", "3f4eef59703ff179e2faf08d04156d00acb1b352", "1de5ae8534fc76323e4d926e10dc0fc76a28a361", "7952392115106e04b1daea1ad8ddeae2137e95bb", "1bf9569aa108b6c19c8cc4fc15470cedddbd7ba9", "09faa1cc5c8784d811502c5137bf63b5f1ac2934", "1c126c0ddc80c1fa177adb9ef32bdf84e0306846", "8d0a150bd390ba3f9f32f7b12cda58edf436aa3f" ], "paperAbstract": "In the past decade, we have come to rely on computers for various safety and security-critical tasks, such as securing our homes, operating our vehicles, and controlling our finances. To facilitate these tasks, chip manufacturers have begun including trusted execution environments (TEEs) in their processors, which enable critical code (e.g., cryptographic functions) to run in an isolated hardware environment that is protected from the traditional operating system (OS) and its applications. While code in the untrusted environment (e.g., Android or Linux) is forbidden from accessing any memory or state within the TEE, the code running in the TEE, by design, has unrestricted access to the memory of the untrusted OS and its applications. However, due to the isolation between these two environments, the TEE has very limited visibility into the untrusted environment\u2019s security mechanisms (e.g., kernel vs. application memory). In this paper, we introduce BOOMERANG, a class of vulnerabilities that arises due to this semantic separation between the TEE and the untrusted environment. These vulnerabilities permit untrusted user-level applications to read and write any memory location in the untrusted environment, including security-sensitive kernel memory, by leveraging the TEE\u2019s privileged position to perform the operations on its behalf. BOOMERANG can be used to steal sensitive data from other applications, bypass security checks, or even gain full control of the untrusted OS. To quantify the extent of this vulnerability, we developed an automated framework for detecting BOOMERANG bugs within the TEEs of popular mobile phones. Using this framework, we were able to confirm the existence of BOOMERANG on four different TEE platforms, affecting hundreds of millions of devices on the market today. Moreover, we confirmed that, in at least two instances, BOOMERANG could be leveraged to completely compromise the untrusted OS (i.e., Android). While the implications of these vulnerabilities are severe, defenses can be quickly implemented by vendors, and we are currently in contact with the affected TEE vendors to deploy adequate fixes. To this end, we evaluated the two most promising defense proposals and their inherent trade-offs. This analysis led the proposal of a novel BOOMERANG defense, addressing the major shortcomings of the existing defenses with minimal performance overhead. Our findings have been reported to and verified by the corresponding vendors, who are currently in the process of creating security patches.", "pdfUrls": [ "http://www.cs.ucsb.edu/~vigna/publications/2017_NDSS_Boomerang.pdf", "http://www.internetsociety.org/sites/default/files/ndss2017_07-3_Machiry_paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/boomerang-exploiting-semantic-gap-trusted-execution-environments/", "https://seclab.cs.ucsb.edu/media/uploads/papers/ndss17-final227.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c66c/1e49e9dbbb52a45850d84c60168b44b4faa7.pdf", "s2Url": "https://semanticscholar.org/paper/c66c1e49e9dbbb52a45850d84c60168b44b4faa7", "sources": [ "DBLP" ], "title": "BOOMERANG: Exploiting the Semantic Gap in Trusted Execution Environments", "venue": "NDSS", "year": 2017 }, "c678e962b158153924bbb24c4900b84375be7e57": { "authors": [ { "ids": [ "9767962" ], "name": "Erico Vanini" }, { "ids": [ "1710684" ], "name": "Rong Pan" }, { "ids": [ "2587719" ], "name": "Mohammad Alizadeh" }, { "ids": [ "35171501" ], "name": "Parvin Taheri" }, { "ids": [ "2112077" ], "name": "Tom Edsall" } ], "doi": "", "doiUrl": "", "entities": [ "Bisection bandwidth", "Data center", "Elasticity (cloud computing)", "Experiment", "Load balancing (computing)", "Presto", "Simulation", "Telephone exchange", "Testbed" ], "id": "c678e962b158153924bbb24c4900b84375be7e57", "inCitations": [ "caba95723cf0ac58091dbddf1edfd3fc485b0e3e", "25a2cfa6c630c96b7e1163b2fdbe7b0f37bd626c", "602fe41b74da4d92051f63b5a95831b6ad2b5552", "33e28ab30ce23a4abeedeae3f4213fcba80d1947", "959cdc393f0d9c5e04dfce997d68cbe410abce68", "2c713ea0f3dacc2ce4189891a57c69aec0707c52", "9bbd5be2829e49b1fac7f034baf7499cb069db95", "293ca58169024b0f40ae3342200737767321f6b1", "5b73b0524f4d6660acc0c9a4b50590905ed9fa7d" ], "journalName": "", "journalPages": "407-420", "journalVolume": "", "outCitations": [ "00ddc85d502aa4bdc45a3b8b9099fad75938b50a", "2730c3ec2908d852e73a62f93302e0e8bfe5510e", "1eddf92320697dbaae59cb84fafd5af73e0fc865", "4973d22ad92fe2999f18cc57dd4a4cad81ba2cfe", "35e7b16b618ca6bf63446372afb2a0ca071f2f13", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "07367703f587dbc3313cc613289c4330cebe5c8c", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "5c5d03e884d4f0094b217c62267466fa11432c8e", "11a4744f7f0883c4232a9f5aaca8b9d29bfaa762", "0e65326e8938b2b5f79fb4a207b0a9d811ab095c", "bc890ed386ebd2dc3d2b6f123e1d5983d957e3ab", "022a0317d5bf2b38847b03f7c9bc3bfa35950199", "2e3cc2d55770aac26d3ce0cb6ddd96dbbcfec4cc", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "058f6752d85a517aae298586fdf117acdd7560ea", "663e064469ad91e6bda345d216504b4c868f537b", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "0586f39a5280d49e62b49838c229dcb37d105994", "58f692e9b03cb973355aab46bb6f867239aeb513", "025652412d507a8cf98ecacd8a44d32ce28995e1", "9570d6075ecaf7f5dc28e99edfabc64914d44ca5", "129567778989fab23b50812b3df30e899e2d6a4e", "9a26f0832fa7508f6396cbee7d06db42e026c0c8", "00f6f16f4b76e931d3924e56674a74fca8d94df3", "8fbeb8f7c39588ed3014eb16850c4547603c906c", "42f6218131551632370e5e8f88370d04b220002a", "12d3952369fd92db86d4a3fff0a6fa8de3509095", "1376bd56c64639af4645625fd9755c83b2bf7cda", "baf4368220d59064026fc3241a51a66ec0f6fa02", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "122229239aeba1eb4f1623adb40f1845c582a520" ], "paperAbstract": "Datacenter networks require efficient multi-path load balancing to achieve high bisection bandwidth. Despite much progress in recent years towards addressing this challenge, a load balancing design that is both simple to implement and resilient to network asymmetry has remained elusive. In this paper, we show that flowlet switching, an idea first proposed more than a decade ago, is a powerful technique for resilient load balancing with asymmetry. Flowlets have a remarkable elasticity property: their size changes automatically based on traffic conditions on their path. We use this insight to develop LetFlow, a very simple load balancing scheme that is resilient to asymmetry. LetFlow simply picks paths at random for flowlets and lets their elasticity naturally balance the traffic on different paths. Our extensive evaluation with real hardware and packet-level simulations shows that LetFlow is very effective. Despite being much simpler, it performs significantly better than other traffic oblivious schemes like WCMP and Presto in asymmetric scenarios, while achieving average flow completions time within 10-20% of CONGA in testbed experiments and 2\u00d7 of CONGA in simulated topologies with large asymmetry and heavy traffic load.", "pdfUrls": [ "https://people.csail.mit.edu/alizadeh/papers/letflow-nsdi17.pdf", "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/vanini", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-vanini.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-vanini.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c678/e962b158153924bbb24c4900b84375be7e57.pdf", "s2Url": "https://semanticscholar.org/paper/c678e962b158153924bbb24c4900b84375be7e57", "sources": [ "DBLP" ], "title": "Let It Flow: Resilient Asymmetric Load Balancing with Flowlet Switching", "venue": "NSDI", "year": 2017 }, "c686c5c0746938b32efcc28bcaa2054e8e65d9e8": { "authors": [ { "ids": [ "2965136" ], "name": "Sebastian Werner" }, { "ids": [ "2664393" ], "name": "Javier Navaridas" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1109/HOTI.2017.18", "doiUrl": "https://doi.org/10.1109/HOTI.2017.18", "entities": [ "Algorithm", "Bus contention", "Bus mastering", "Channel I/O", "Digital subchannel", "Electrical connection", "Modular design", "Scheduling (computing)", "Throughput" ], "id": "c686c5c0746938b32efcc28bcaa2054e8e65d9e8", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "49-56", "journalVolume": "", "outCitations": [ "0709328c35748941bae7074dc0ef010cc3d198d4", "1843d5dd795a470fd990ed680b582ac27373660b", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "23022632ad418422d4629eb3e77996a6e4545528", "063700ef01aad15a1981553fde02e8d162a553e7", "31d0b90f43a78ccbed0133d72d7c19c4340dfea5", "0aef82b89c4d93263a2c63e3d8c95f1189f2e7eb", "10289d16e1336fe57b3b52137af4678c229eef54", "1d5d0534e6f4bd0954528af3fe663ff594ec300c", "3e53ba2fed07cc7e9c2ad13ed606a41656d8dd98", "59239b1dc602ee2a3a0dbf7d32aecb8fe5187594", "0cfca98cc8e16534668dc8e18f962dd8013ff661", "7ec2901a65ab52450f7e3321a8e415878e438f3f", "0108a3544506cc114214b2e30cb3284d2ff8d035", "a620b6093fd7e4a139d36630f5521f7ba4fcb10e", "8fce1221493272ebe1004d8e0253ea69835748ee", "89938adc3c8310d0f2ea7769dde57a5f24c5685d", "29ba9706af6a076d786e336f5aebf169aba46bcb", "3966af775134902da24da6ab125fba7c2f2429bf", "f53494818b4ba2e6f5ba9e8fbb1ebdd490db17f0", "2d87e9ac2ead16d3a59f1df3ecf3a5d095ccf3f0", "1f854aba5723b0a9e200b72a9c3a41024d52eaec", "6b7e83afb4db052ab050bcaa7af6bbfaffb80520", "0e679946477373b46fed57f69b673e5bd30950cb", "dbc627f52c9dc69a56a7b89ec020634037d4edd8", "58b5cfc1f9cd54a7cf81ce00a6c65b96f0ba0b7a", "1b67262a92357dffb869333a8c9d2bebb3137f20", "9ebbffde784b5aef699ceebe57b04f491ac0cdc2", "3581ed1ea7a9822f3a1226a5715e7f0e1e08f83e" ], "paperAbstract": "Maximizing bandwidth utilization of optical onchip interconnects is essential to compensate for static power overheads in optical networks-on-chip. Shared optical buses were shown to be a power-efficient, modular design solution with tremendous power saving potential by allowing optical bandwidth to be shared by all connected nodes. Previous proposals resolve bus contention by scheduling senders sequentially on the entire optical bandwidth; however, logically splitting a bus into subchannels to allow both sequential and parallel data transmission has been shown to be highly efficient in electrical interconnects and could also be applied to shared optical buses.In this paper, we propose an efficient subchannel scheduling algorithm that aims to minimize the number of bus utilization cycles by assigning sender-receiver pairs both to subchannels and time slots. We present both a distributed and a centralized bus arbitration scheme and show that both can be implemented with low overheads. Our results show that subchannel scheduling can more than double throughput on shared optical buses compared to sequential scheduling without any power overheads in most cases. Arbitration latency overheads compared to state-of-theart sequential schemes are moderate-to-low for significant bus bandwidths and only noticeable for low injection rates.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c686c5c0746938b32efcc28bcaa2054e8e65d9e8", "sources": [ "DBLP" ], "title": "Subchannel Scheduling for Shared Optical On-chip Buses", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "c70a7fb3dd97b30289bf24a9928d9cb4803a4da5": { "authors": [ { "ids": [ "1929052" ], "name": "Erik Vermij" }, { "ids": [ "1780899" ], "name": "Leandro Fiorin" }, { "ids": [ "1798496" ], "name": "Christoph Hagleitner" }, { "ids": [ "1737836" ], "name": "Koen Bertels" } ], "doi": "10.1109/ICPP.2017.12", "doiUrl": "https://doi.org/10.1109/ICPP.2017.12", "entities": [ "Benchmark (computing)", "Central processing unit", "Graph (abstract data type)", "Graph500", "Graphics processing unit", "HPCG benchmark", "Locality of reference", "Memory bandwidth", "Shared memory", "Speedup", "Supercomputer" ], "id": "c70a7fb3dd97b30289bf24a9928d9cb4803a4da5", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "31-40", "journalVolume": "", "outCitations": [ "225af03e1d95524f51ec4b211c28dc3868dcb0dd", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "259e93de2f10d395a1bdfb2dc6da72b6a3998572", "7fdd9e0d927f6a21d3a4e46e9833b1aaa242ec2d", "c3fbbd9c1fc5e53c6a9e3fe27e1bfce4755c8ef3", "0a791a760dd883342c8b8456a3e7cb75fb996ef4", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "38466b62bccbdacd3ef1dab4514a7c010e8f45df", "3ffe60733f14d9416ab478c1f273390601f987d8", "5370dd52730a143168d18985beb7f2a3d0e9b027", "830178951826621715a6573ce16e4f459ce7d0d8", "3bf23f74bf33ed52f7c28587fab315610b27221a", "55b6c3db20e441bb539c5fbd836eba5add2f4364", "9e3c09b071088ab75cb5f977a0965234132be707", "8171724b10dd95073060594be5e19a74f6a7a943", "6cd3eaac09ccb608e30ce1cc85fb1c2971b958f5", "b186611ae84d95c9fc34f5a6ff0c395eb9af7e95", "585b2d0971a4838bd8b38cb34a161b48b10b8375", "ba75e4f7f6356d0c7a98ae813f085ce1a7a0aeec", "31592070939c3cdd97f66bde9bbef5dd34340037", "14e1975b07234f9f968905696f68bd702a04a87f", "352a8957005dc5519b15ed1870751ec494d66395", "0b1c6f52c76b441cb2598f3b8ac132d921ec8274" ], "paperAbstract": "HPCG and Graph500 can be regarded as the two most relevant benchmarks for high-performance computing systems. Existing supercomputer designs, however, tend to focus on floating-point peak performance, a metric less relevant for these two benchmarks, leaving resources underutilized, and resulting in little performance improvements, for these benchmarks, over time. In this work, we analyze the implementation of both benchmarks on a novel shared-memory near-data processing architecture. We study a number of aspects: 1. a system parameter design exploration, 2. software optimizations, and 3. the exploitation of unique architectural features like user-enhanced coherence as well as the exploitation of data-locality for inter near-data processor traffic.For the HPCG benchmark, we show a factor 2.5x application level speedup with respect to a CPU, and a factor 2.5x power-efficiency improvement with respect to a GPU. For the Graph500 benchmark, we show up to a factor 3.5x speedup with respect to a CPU. Furthermore, we show that, with many of the existing data-locality optimizations for this specific graph workload applied, local memory bandwidth is not the crucial parameter, and a high-bandwidth as well as low-latency interconnect are arguably more important, shining a new light on the near-data processing characteristics most relevant for this type of heavily optimized graph processing.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c70a7fb3dd97b30289bf24a9928d9cb4803a4da5", "sources": [ "DBLP" ], "title": "Boosting the Efficiency of HPCG and Graph500 with Near-Data Processing", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "c71cb23b18f73f4a156831fd2e5e1145699ab3eb": { "authors": [ { "ids": [ "34691891" ], "name": "Shaleen Deep" }, { "ids": [ "1713920" ], "name": "Paraschos Koutris" } ], "doi": "10.1145/3035918.3064017", "doiUrl": "https://doi.org/10.1145/3035918.3064017", "entities": [ "Microsoft SQL Server", "Online marketplace", "SQL" ], "id": "c71cb23b18f73f4a156831fd2e5e1145699ab3eb", "inCitations": [ "22399eb4c26a7782c0c101e6b1bfc6ce077666b7" ], "journalName": "", "journalPages": "699-713", "journalVolume": "", "outCitations": [ "495578719ddd89eaa817a92c0988e01dc44393c8", "2c6c14f24c9ce73340554ca1a79ab60e4baef54a", "1f75eec6c4967eace44cfd0bff3d736a05a53e2a", "62e3289ff47efd3b41b0ff0d41f3556601444d10", "7a62a12e8ef38c404f471ef6aadca219e4b159bb", "1d46d08bfe56447c0f2de893ed6cea9bbfe3ee38", "06f7d924f1f483e69cd594a4570d3df925882097", "0cd61c4c8876e8d0c6e42ad826ee3054c3ca23f1", "01839e206ff31c8237476ae89aa2569a8ac4d7c4", "1f51620110dab23cb886a7025b30c2db9f52a089", "10723678d19bab6a52c8ee9b89f9118536044033", "176213a7906971b34c5e85fc4f5c366ecc725096", "05a26b5deeed6f6f7e9584555b73c5af3905063b", "06fa8c759a71f68a395070d8309bafb1dcd5dcef", "155581224d6aacc5676f76f479fe4e841a4ad355", "3ba66b7716d2f93ae39b2bb79427038e449f5a7c", "59485a774dfa35ebc3ceb45a87ff06fbb96f5637", "048d8d94c5581133d6f563b813da4a9ec0d10c31", "9c9cea8d717a63ed233918f7b059861fb69c943b", "a0cdfc02a2cc68a678090e285bf40619d34a3f71" ], "paperAbstract": "Users are increasingly engaging in buying and selling data over the web. Facilitated by the proliferation of online marketplaces that bring such users together, data brokers need to serve requests where they provide results for user queries over the underlying datasets, and price them fairly according to the information disclosed by the query. In this work, we present a novel pricing system, called QIRANA, that performs query-based data pricing for a large class of SQL queries (including aggregation) in real time. QIRANA provides prices with formal guarantees: for example, it avoids prices that create arbitrage opportunities. Our framework also allows flexible pricing, by allowing the data seller to choose from a variety of pricing functions, as well as specify relation and attribute-level parameters that control the price of queries and assign different value to different portions of the data. We test QIRANA on a variety of real-world datasets and query workloads, and we show that it can efficiently compute the prices for queries over large-scale data.", "pdfUrls": [ "http://pages.cs.wisc.edu/~paris/papers/qirana.pdf", "http://doi.acm.org/10.1145/3035918.3064017" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c71cb23b18f73f4a156831fd2e5e1145699ab3eb", "sources": [ "DBLP" ], "title": "QIRANA: A Framework for Scalable Query Pricing", "venue": "SIGMOD Conference", "year": 2017 }, "c75e0c9f473d7817a581572742746330fa5aac8d": { "authors": [ { "ids": [ "3406720" ], "name": "Adri\u00e1n Castell\u00f3" }, { "ids": [ "2632706" ], "name": "Sangmin Seo" }, { "ids": [ "1714364" ], "name": "Rafael Mayo" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" }, { "ids": [ "1684436" ], "name": "Enrique S. Quintana-Ort\u00ed" }, { "ids": [ "24636606" ], "name": "Antonio J. Pe\u00f1a" } ], "doi": "10.1109/ICPP.2017.15", "doiUrl": "https://doi.org/10.1109/ICPP.2017.15", "entities": [ "Application programming interface", "Computer programming", "Light-weight process", "OpenMP", "POSIX", "POSIX Threads", "Parallel computing", "Scheduling (computing)", "Thread (computing)" ], "id": "c75e0c9f473d7817a581572742746330fa5aac8d", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "60-69", "journalVolume": "", "outCitations": [ "5b2fdfbd1723dd5b5c4624e1249dfb5b7bef2e6e", "b76269bf962989ce271bef7ea863ff4adf9c9de6", "c5d0d547b6a3fa470dcc77f558f6c7c5768edabd", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "d57a01535daa997083bb22d740237385f1c00cc3", "c1aaee8f479dc22854a5ca7c4f3e8a641d7b9658", "5c3b19c58e63b17ba0d161636e53f67d6f2c5aa7", "0c03099cbb0e047c7c48be7cdeb8a30cad9759b7", "0fa65b966f559b64a3fb7d788f4ecc676b272e15", "7ebc2b6569d8f8d490dbf90482b85bd2f9a37f58", "6a660a404ca15aa04a2836442f513931112f5ba6", "8201d1fd40faf7a9bcec21724a16471a7efb1d1c", "b20cdd99af5421e93c811873411b55e7c26a4c69", "02b9ca3909f568467b9d721b9822a4c2b836399c", "8976796bdfb5a53a7b2e13057a9aaf06030b91fa", "273fcf24c3c9c07cde1cc68b23786ff7910e0d47", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "2a88cb605d1fbc7dfa15aae9041c69bf03be85a1", "1eaa2899ee0679acf20cdb20b34bed32d01babc0" ], "paperAbstract": "OpenMP is the de facto standard application programming interface (API) for on-node parallelism. The most popular OpenMP runtimes rely on POSIX threads (pthreads) implementations that offer an excellent performance for coarse-grained parallelism and match perfectly with the current hardware. However, a recent trend in runtimes/applications points in the direction of leveraging massive on-node parallelism in conjunction with fine-grained and dynamic scheduling paradigms. It has been demonstrated that lightweight thread (LWT) solutions are more appropriate for these new parallel paradigms. We have developed GLTO, an OpenMP implementation over the recently-emerged Generic Lightweight Threads (GLT) API. GLT exports a common API for LWT libraries that offers the possibility of running the same application over different native LWT solutions. In this paper we use GLTO to analyze different scenarios where OpenMP implementations may benefit from the use of either LWT or pthreads. Our study reveals that none of the threading approaches obtains the best performance in all the scenarios, but that there are important gaps among them.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c75e0c9f473d7817a581572742746330fa5aac8d", "sources": [ "DBLP" ], "title": "GLTO: On the Adequacy of Lightweight Thread Approaches for OpenMP Implementations", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "c76cd74aa02f56f4e86c7e8cc36130acbd2656db": { "authors": [ { "ids": [ "2797215" ], "name": "Kazem Cheshmi" }, { "ids": [ "2853477" ], "name": "Shoaib Kamil" }, { "ids": [ "1709408" ], "name": "Michelle Mills Strout" }, { "ids": [ "2917750" ], "name": "Maryam Mehri Dehnavi" } ], "doi": "10.1145/3126908.3126936", "doiUrl": "https://doi.org/10.1145/3126908.3126936", "entities": [ "Algorithm", "Code generation (compiler)", "Compile time", "Compiler", "Computation", "Decoupling (electronics)", "Eigen (C++ library)", "High- and low-level", "Numerical analysis", "Numerical method", "Simulation", "Sparse matrix" ], "id": "c76cd74aa02f56f4e86c7e8cc36130acbd2656db", "inCitations": [], "journalName": "", "journalPages": "13:1-13:13", "journalVolume": "", "outCitations": [ "3563be7789459d88bec67844f4dcdf22703eed7b", "c1d28d2ec0416de3ffb019c5066fb81090c25827", "74935773e035032faa3d9fae93e29950ed86f042", "32568a05fdf783291f33e6a6f55d5a806819f806", "8c6e313b3418e42afe4a852116e18d0f24284f35", "db81eb62f54ff80033c6123e7d3c524574bcc458", "8171c702ae27a120606bcd9a2bf9fc8fa673bff5", "2eac47ed4e8c81b71cc196f7fc0f7a472deed933", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "c4b854d185adf0caf10d4da0a567ea728a4b7c68", "e8a732e85540102732a0e729a26009891a7548b6", "163c64f06bd921fa2809342f1ffeeec6b515b145", "43f3b8af1e5924226f30bafda5cc27a4949cb3cc", "c87c36dd92e0bf59bff57c4cfea47985f09802b9", "8041d874752213fcd201ed8488bcc3f4e3e0c281", "17d6f7361e5d058a4f4d530300110ab6b023a58e", "149e87e9949041f8d87bcb5a801aedf4eaa490d8", "1fe8c9894a79f22d2edfeea0020995e714f83c38", "8bc77b016e09e67ce88bc187ff1179aaa3d73ae5", "529e1deae0a67b0f8d92fbb256adddced491ea40", "63339b2a993ba79931a51ea1c0931c3b8fa77333", "18733fb43ab54028c0064d1a18e4bbf79a4468c9", "27f3bb5ef854c0b0e559fb382114ba24891514b0", "14fde290a2f08ff1fc7260f717eef16a20cdd994", "420453b449907137be7849620dc7c3ea525e1cd9", "4fd63bec7022dc25d2f8ca2ad6093c7774ef4283", "4a2d7bf9937793a648a43c93029353ade10e64da", "24b616c54919ed2e597d92c0155ff51416e07ed2", "37751bc39e6c24b717ba8dfbe2e900f52276cd6d", "d8159bc68c497d62cee3d5ba8b7f0f1b54f0e3a9", "7aabe31e6b35dc0976c08a9cb20d76da9fef3d28", "01047af8707a36c52a90e326e26692eb16a03791", "8fe1a5a48372a91a7ec692c9374010ea4a196ece", "4c725527b426b28f9024a0092a6a09f180b25628", "16e30de98b8b5947cc03927d90aa510e8a22fbe4", "3b34509c43a9fe5a8e73a5b93ffebddf03c2a886", "9afa38c3b54ec57de27440555859a57850f1c365", "2f7fa291bdc6a2f8c7994cf1896868f057a6b0ca", "60edb775ea89a73846da0723816be83d5069b0c0", "1201aa64ab230559db7753bcabf554f47656a4e5", "cac9ef09da76b32614d0f932dc46ff145bbab08f", "0f82a8d1aa4762e19059c735f7f840e7bd60ac48", "17636a044d6d8576c0b700135aa9c88d161c2fb7", "4713f0bb624ff09fdaceed2681cc08abadbee433", "1004abb8d7a7d6dcce9092f5c99c0ac17b16b3bf", "906de356847f94f7cb9f293706acd29fcf5ca7f0", "47d0ef85de646e125b41ea31f6dc55b37c608010", "af4c42009bd535d70c6597aef2afe56d10ea3d33", "0c5ae5efefb6679a5aa209dd8a4a8ce10f1a0952", "b4a59c6ec8f3b63cb14ad011ae6434b9217177b1", "3087eeb39c88b1fc9bdc72812930451fc98cedec", "cafd01767bb6e7e86bc778e12289e0081c9834d0", "0221bee380311d75ffcc11fd66be7d5cf632f636", "f5db26f3534c383c110d65f740816ad5320ce7db" ], "paperAbstract": "Sympiler is a domain-specific code generator that optimizes sparse matrix computations by decoupling the symbolic analysis phase from the numerical manipulation stage in sparse codes. The computation patterns in sparse numerical methods are guided by the input sparsity structure and the sparse algorithm itself. In many real-world simulations, the sparsity pattern changes little or not at all. Sympiler takes advantage of these properties to symbolically analyze sparse codes at compile time and to apply inspector-guided transformations that enable applying low-level transformations to sparse codes. As a result, the Sympiler-generated code outperforms highly-optimized matrix factorization codes from commonly-used specialized libraries, obtaining average speedups over Eigen and CHOLMOD of 3.8× and 1.5× respectively.", "pdfUrls": [ "http://arxiv.org/abs/1705.06575", "http://doi.acm.org/10.1145/3126908.3126936", "https://arxiv.org/pdf/1705.06575v1.pdf", "http://www.rci.rutgers.edu/~mm2366/papers/SC17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c76cd74aa02f56f4e86c7e8cc36130acbd2656db", "sources": [ "DBLP" ], "title": "Sympiler: transforming sparse matrix codes by decoupling symbolic analysis", "venue": "SC", "year": 2017 }, "c7bcd841c65c35e4ed90254805420733fce56241": { "authors": [ { "ids": [ "2265069" ], "name": "Guixin Ye" }, { "ids": [ "2238603" ], "name": "Zhanyong Tang" }, { "ids": [ "2068791" ], "name": "Dingyi Fang" }, { "ids": [ "2466164" ], "name": "Xiaojiang Chen" }, { "ids": [ "1808255" ], "name": "Kwang In Kim" }, { "ids": [ "28801862" ], "name": "Ben Taylor" }, { "ids": [ "40514580" ], "name": "Zheng Wang" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Algorithm", "Android", "Authentication", "Authorization", "Camera phone", "Computer vision", "Information sensitivity", "Operating system", "Password cracking", "Smartphone", "Threat model" ], "id": "c7bcd841c65c35e4ed90254805420733fce56241", "inCitations": [ "93bff7a0bf74e82afd49b2d7300e8f4c883e87ee", "e1cf9ad5614f3a24b24088e4b22e9218f0abc3a0", "613ff10386b5a874bd843cd72bbcf8f0a6b26fed", "312be8a03e45eebfb4a1f0077a9e4e1ee179b5f4", "110651c5c7bc9ec40e9f4ff50b61053cb0ba0398", "ac8a9fd78346460082220364bcf507db4a59e4a5", "bb199441370d052fa88b72734c8082df0ebe010b" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "f1c0a11aef64d05c07bc6627b0987aac089d40ed", "5d992d54bb27877aaf79a1d6c11ad5c858f0bb42", "13a8777503ce3fbfe3550a78fca1d7ba670c8eaf", "4430cf7514d678df33279fed6d6187e24a9a2def", "49140401a7a4290ec5da00f76601ca115c007b74", "5f4f110872661bcb10ebc03ae851ab976d64b351", "6b3abd1a6bf9c9564147cfda946c447955d01804", "8d01bf796e44d242b093de85d93ee45fb032bd00", "0132e68e6adf3d087d45f3b1c4803d4140c55e0a", "44f2bd6cd5d09e95f8a362534d27f6413f9fc1ad", "62bf472a70a99aa7af1280e23c95250132ba34e7", "1eb4545cfd9c79eb0a70f2fc24ff3c87edfe2c85", "31c6b7b6b8a4ca6de749d874b29b07fbb2290ff5", "001c353521cd29abc0cac59141ccc29d843dd9b8", "f9106e06c56d5fd216d03d5ac460674ff1479753", "a5a7ccb14e6561207a4e91aa452e9810dbdaa709", "89a4618d9af12b98af656f9d620256e7cfcd4a1c", "4516d4d6d9a6f3bbd1c72b05efa6616e7a100512", "24666f659a171fe9a15acf27d21f0eaa2b940600", "90f27eacb7f22816c5b1d4628ff0c56980d80c02", "197350a9fa144d04129b152314d746da365ed916", "99b75bfa6465aa60f5d3a6b6850d438e0919ac23", "9f9ad7715a27eb9c5d958ebc4184ce6d1635dd59", "54cb4579fe00965cc93901e00d731be377fc984d", "5c3556d4bf94ba51cea58c5d624aed19e6223e59", "09370d132a1e238a778f5e39a7a096994dc25ec1", "bf87bea0e0feee8a684b47be63cbaa34122568a1" ], "paperAbstract": "Pattern lock is widely used as a mechanism for authentication and authorization on Android devices. This paper presents a novel video-based attack to reconstruct Android lock patterns from video footage filmed using a mobile phone camera. Unlike prior attacks on pattern lock, our approach does not require the video to capture any content displayed on the screen. Instead, we employ a computer vision algorithm to track the fingertip movements to infer the pattern. Using the geometry information extracted from the tracked fingertip motions, our approach is able to accurately identify a small number of (often one) candidate patterns to be tested by an adversary. We thoroughly evaluated our approach using 120 unique patterns collected from 215 independent users, by applying it to reconstruct patterns from video footage filmed using smartphone cameras. Experimental results show that our approach can break over 95% of the patterns in five attempts before the device is automatically locked by the Android operating system. We discovered that, in contrast to many people\u2019s belief, complex patterns do not offer stronger protection under our attacking scenarios. This is demonstrated by the fact that we are able to break all but one complex patterns (with a 97.5% success rate) as opposed to 60% of the simple patterns in the first attempt. Since our threat model is common in day-to-day life, this paper calls for the community to revisit the risks of using Android pattern lock to protect sensitive information.", "pdfUrls": [ "http://eprints.lancs.ac.uk/82868/7/paper.pdf", "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/cracking-android-pattern-lock-five-attempts/", "https://www.lancaster.ac.uk/staff/wangz3/publications/ndss_17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c7bc/d841c65c35e4ed90254805420733fce56241.pdf", "s2Url": "https://semanticscholar.org/paper/c7bcd841c65c35e4ed90254805420733fce56241", "sources": [ "DBLP" ], "title": "Cracking Android Pattern Lock in Five Attempts", "venue": "NDSS", "year": 2017 }, "c7d0afe7334f19dba56398f896a1a65b27732735": { "authors": [ { "ids": [ "3403985" ], "name": "Nhan Nguyen" }, { "ids": [ "2209601" ], "name": "Mohammad Maifi Hasan Khan" }, { "ids": [ "2815999" ], "name": "Yusuf Albayram" }, { "ids": [ "34241570" ], "name": "Kewen Wang" }, { "ids": [ "1724704" ], "name": "Swapna S. Gokhale" } ], "doi": "10.1109/CLOUD.2017.14", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.14", "entities": [ "Algorithm", "Apache Cassandra", "Data center", "Middleware", "Sampling (signal processing)", "Sensor", "Server (computing)" ], "id": "c7d0afe7334f19dba56398f896a1a65b27732735", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "34-41", "journalVolume": "", "outCitations": [ "2ed19a9dd3ea75754b42afbdc53cd1cea137492a", "ca6aa7e79798133a18795b5aaf8f51f33ae75384", "90007973141cf96c8b707140abfb502580d53b04", "30579744a261e0074af7ec59fe87254f4b990acf", "d5498c43615ea07429183fd515c22a27d785b4ab", "79d3751e1857f50eaab6afd3b7acf90858d99a62", "021bb9519c1ad72b9e110c1a215932d90b627637", "6ce9b17283a3fc4f2a3df7fa1eaed80bac69f69e", "45ef8ed95bfeeba351b79118b362e183f61cc42d", "72714925057191470ea02cdcd9fba855981aeffe", "4783c303f45d78323e1206c962ce6fcea57e724d", "5a316bec266ba4479c6a74884d7adb238525e3af", "379ef18377d803d87859314c0e110cdf64f2ea73", "4ede28dae4b8d3b6b3925094b0221c19bc571e0c" ], "paperAbstract": "In large-scale data stream management systems, sampling rate of different sensors can change quickly in response to changed execution environment. However, such changes can cause significant load imbalance on the back-end servers, leading towards performance degradation and data loss. To address this challenge, in this paper, we present a model-driven middleware service (i.e., Arion) that uses a two-step approach to minimize data loss. Specifically, Arion constructs models and algorithms for overload prediction for heterogeneous systems (where different streams can have different sampling rates and message sizes) leveraging limited execution traces from homogeneous systems (where each stream has the same sampling rate and message size). Subsequently, when an overload condition is predicted (or detected), Arion first leverages the a priori constructed models to identify the streams (if any) that can be split into multiple substreams to scale up the performance and minimize data loss without allocating additional servers. If the software based solution turns out to be inadequate, in the second stage, the system allocates additional servers and redirects streams to stabilize the system leveraging the models. Extensive evaluation on a 6 node cluster using Apache Cassandra for various scenarios shows that our approach can predict the potential overload condition with high accuracy (81.9%) while minimizing data loss and the number of additional servers significantly.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c7d0afe7334f19dba56398f896a1a65b27732735", "sources": [ "DBLP" ], "title": "Arion: A Model-Driven Middleware for Minimizing Data Loss in Stream Data Storage", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "c7d6ee693eb72e274aa8702ea579902996e4f3d5": { "authors": [ { "ids": [ "1709817" ], "name": "Youyou Lu" }, { "ids": [ "2684311" ], "name": "Jiwu Shu" }, { "ids": [ "20665946" ], "name": "Youmin Chen" }, { "ids": [ "1726351" ], "name": "Tao Li" } ], "doi": "", "doiUrl": "", "entities": [ "Direct memory access", "Distributed transaction", "Memory pool", "Networking hardware", "Non-volatile memory", "Overhead (computing)", "Persistent memory", "Remote direct memory access", "Remote procedure call", "Server (computing)", "Volatile memory" ], "id": "c7d6ee693eb72e274aa8702ea579902996e4f3d5", "inCitations": [ "9809bc2847bc9274564c6c3545561d920c5e44f3", "990e9e18455fbe1c887fdd90c9da74f561450830", "a6b0d5a4b19d9a8e133e2fb30b40a6b9eae7283d", "6ff7fd341b0a4ab4d919f8ce3b35d447668e80ae", "1b1dda022e899b2d922adf330c96a8c9f7ad2abe", "7206aead5a341f361e6571d607f3c032e65e2f7e" ], "journalName": "", "journalPages": "773-785", "journalVolume": "", "outCitations": [ "03b6a916498fa8591201a2de5f22344609b1e457", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "5f3f9223c5c9f896be099bc177929febad508407", "d58cc242fd70227cff98376a914e0b42b1b79db8", "9f8942efd7eb663bc176c285d230b01e48ae94dc", "05a1357946de5eca42a477b7b268db4944219a2e", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "29a1148d75878671dc3663bf480e33d7bd91597d", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "793f5e737284925a176f8ec82b3bb0d2178bb330", "65ad9178eb3e3bb6ef01df6ced56e389e911376c", "e11e0fa2343667a9fba2d188dd41598636e659fb", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "daf0cd0076b388712ea12ec4105572997fc50cdf", "54f5ef35bf00f18f742a71b49bd6831322c3a1c5", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "131e1e1d163a0f49881d7b5ac092892093391015", "408b8d34b7467c0b25b27fdafa77ee241ce7f4c4", "433207f45ac2c9dbe3876ab53af28ed569e94da9", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "8f369c8e284e27ff73591209cc69a62e8252ff02", "ad43b820f35a18a45438a295b2c546b689a35e0c", "205cf007cf77bbf81e55b74635017087585f7b7c", "8c06fb59a79b3b47dff8588302d8e6514a7f7a4a", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "9ac43af1dac92cc2c03e3a3c46fc5892fca6e988", "475f3564a7c996df3d647734b1c671662802a572", "42c70d64890726f60556caf3eec3f06e85642dd9", "0401a8c1feeb489f3fa011fe50e00e91a8fd7903", "5e3bf131096d59e5bbc58113f3840f5a0b7a392d" ], "paperAbstract": "Non-volatile memory (NVM) and remote direct memory access (RDMA) provide extremely high performance in storage and network hardware. However, existing distributed file systems strictly isolate file system and network layers, and the heavy layered software designs leave high-speed hardware under-exploited. In this paper, we propose an RDMA-enabled distributed persistent memory file system, Octopus, to redesign file system internal mechanisms by closely coupling NVM and RDMA features. For data operations, Octopus directly accesses a shared persistent memory pool to reduce memory copying overhead, and actively fetches and pushes data all in clients to re-balance the load between the server and network. For metadata operations, Octopus introduces self-identified RPC for immediate notification between file systems and networking, and an efficient distributed transaction mechanism for consistency. Evaluations show that Octopus achieves nearly the raw bandwidth for large I/Os and orders of magnitude better performance than existing distributed file systems.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-lu.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_chen_0.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/lu" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c7d6/ee693eb72e274aa8702ea579902996e4f3d5.pdf", "s2Url": "https://semanticscholar.org/paper/c7d6ee693eb72e274aa8702ea579902996e4f3d5", "sources": [ "DBLP" ], "title": "Octopus: an RDMA-enabled Distributed Persistent Memory File System", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "c7e2c4bea500ea7926a50973d861f01bb8e5e364": { "authors": [ { "ids": [ "2534789" ], "name": "Pradeep Kumar" }, { "ids": [ "1744674" ], "name": "H. Howie Huang" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "CPU cache", "Falcon", "Lock (computer science)", "Operating system", "Page cache", "Parallel computing", "Scalability", "Solid-state drive", "Throughput" ], "id": "c7e2c4bea500ea7926a50973d861f01bb8e5e364", "inCitations": [ "55318fe320d8217fdc0e1359f04ac79844222c8e" ], "journalName": "", "journalPages": "41-53", "journalVolume": "", "outCitations": [ "9f8942efd7eb663bc176c285d230b01e48ae94dc", "701c1b756c427f01c56e76bb250c8bbb2d4b7720", "57d2df84a585f96ddc874898977cfe2fbe02a68f", "08b5b8270713bbbc0b5b1a31c8625b0bf87d674d", "7c0699937a1775a01ee8ec97ca30f5427f020b99", "3aff5fb3d1e23dfc0c45989f71b4aa99b3a5784b", "33a62431a6cbeba146768059da3f3ebb337ca4e1", "0963f672132d42e6ebcd3637ee06b0151a6620ba", "ceb467548785dc5d0d0adc62c2f2e63ebe25eff8", "70745c6514748e8d3764d77abf410edd90a597ac", "3eb7f80bc2bef135b236ab741d1582e4d2e7a050", "151fe4cd7d0c788b3e362636d5c31a4c13f90a9a", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "4983002656dc35c6a1d0ce39eb56c70d3e55b7fd", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "98ae1d41deab44c3bfdbac51ccbcc5faf7cd0bcf", "2ec490f40efb00fa0a3b3a63306069d64630c340", "199ac28b6bc68bf05c77645ffae7640df114bca5", "38a9120f780602521af9744e31d80ef5cd9593a7", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "e2462bde978023a9069cc08326f626135a95cb89", "fec25b72777e683e827b5ce349461456d6be02d9", "71bc31c9cd0ce90e708930c2f44ed0860453a0d3", "8d1a4ae1d3a17edd5b16653f2f582f9952b71612", "37e553645f8b81e2b0bc3aebf5e7fd3d6136e187", "af7e59894b61b61d05e39cf747ebefaa3a74906f", "31ffb232b5c1186bb90502254162ac3d99baf50b", "6c590295c72d3d9e60bf10a2480d11e73d5e03b6", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "def29d202e537d026b8d3ed91655b540ef86cceb", "2a30b4cb56853002133311372ce8313b14fba158", "0a8e0a9f0ae9910d5ecf165071559a2c4191a098", "b6e74306023f8b9890714d58c781ce9f5c1d3897", "222e7666dd37307b600b0a3ebc9b6d28fb51d6e9", "7ef137faca4da278382ccdcb90da8fcd19faca36", "4e8839416133588c10cc56d6325db55a42fe2215" ], "paperAbstract": "With the high throughput offered by solid-state drives (SSDs), multi-SSD volumes have become an attractive storage solution for big data applications. Unfortunately, the IO stack in current operating systems imposes a number of volume-level limitations, such as pervolume based IO processing in the block layer, single flush thread per volume for buffer cache management, locks for parallel IOs on a file, all of which lower the performance that could otherwise be achieved on multiSSD volumes. To address this problem, we propose a new design of per-drive IO processing that separates two key functionalities of IO batching and IO serving in the IO stack. Specifically, we design and develop Falcon1 that consists of two major components: Falcon IO Management Layer that batches the incoming IOs at the volume level, and Falcon Block Layer that parallelizes IO serving on the SSD level in a new block layer. Compared to the current practice, Falcon significantly speeds up direct random file read and write on an 8-SSD volume by 1.77\u00d7 and 1.59\u00d7 respectively, and also shows strong scalability across different numbers of drives and various storage controllers. In addition, Falcon improves the performance of a variety of applications by 1.69\u00d7.", "pdfUrls": [ "https://www.usenix.org/system/files/conference/atc17/atc17-kumar.pdf", "https://www.usenix.org/sites/default/files/conference/protected-files/atc17_slides_kumar.pdf", "https://www.usenix.org/conference/atc17/technical-sessions/presentation/kumar" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/c7e2/c4bea500ea7926a50973d861f01bb8e5e364.pdf", "s2Url": "https://semanticscholar.org/paper/c7e2c4bea500ea7926a50973d861f01bb8e5e364", "sources": [ "DBLP" ], "title": "Falcon: Scaling IO Performance in Multi-SSD Volumes", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "c7edf9ab41b71a5336a1653071d56f5a53bcc17d": { "authors": [ { "ids": [ "39685774" ], "name": "Chun-Hung Hsiao" }, { "ids": [ "1678884" ], "name": "Satish Narayanasamy" }, { "ids": [ "10018692" ], "name": "Essam Muhammad Idris Khan" }, { "ids": [ "1679345" ], "name": "Cristiano L. Pereira" }, { "ids": [ "1827185" ], "name": "Gilles A. Pokam" } ], "doi": "10.1145/3037697.3037712", "doiUrl": "https://doi.org/10.1145/3037697.3037712", "entities": [ "Algorithm", "Android", "Causality", "Parallel computing", "Programming model", "Race condition", "Scalability", "Web 2.0", "X86" ], "id": "c7edf9ab41b71a5336a1653071d56f5a53bcc17d", "inCitations": [ "798d00515af0355bc80e99afcd4f851bdef93f97" ], "journalName": "", "journalPages": "193-205", "journalVolume": "", "outCitations": [ "10ba04904f12e44cd0569cb86aa6e97e47939e23", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "881b1140f002ded62808b6982504933edd337749", "a45adba59080ad625e3005c669345c3a96ad3e18", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "01293cc2b2bda3c38c7095d2ea1813fcb0611a3e", "47cdefebd5534d1d8c5d0f8061b482dbcd656e63", "03bb63660c3935ad2ec011a7f9e868587063f89c", "24e6f34e499634393416ea09c1aadd37ec9e8542", "bc075714de0641a625087830c816b238d2bd037d", "4753138822b71346ae29c72195a7cd24b8c18927", "1d54c70351e9ee93b87273b2e93750c89e32256f" ], "paperAbstract": "Asynchronous programming model is commonly used in mobile systems and Web 2.0 environments. Asynchronous race detectors use algorithms that are an order of magnitude performance and space inefficient compared to conventional data race detectors. We solve this problem by identifying and addressing two important problems in reasoning about causality between asynchronous events.\n Unlike conventional signal-wait operations, establishing causal order between two asynchronous events is fundamentally more challenging as there is no common handle they operate on. We propose a new primitive named AsyncClock that addresses this problem by explicitly tracking causally preceding events, and show that AsyncClock can handle a wide variety of asynchronous causality models. We also address the important scalability problem of efficiently identifying heirless events whose metadata can be reclaimed.\n We built the first single-pass, non-graph-based Android race detector using our algorithm and applied it to find errors in 20 popular applications. Our tool incurs about 6x performance overhead, which is several times more efficient than the state-of-the-art solution. It also scales well with the execution length. We used our tool to find 147 previously unknown harmful races.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037712" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c7edf9ab41b71a5336a1653071d56f5a53bcc17d", "sources": [ "DBLP" ], "title": "AsyncClock: Scalable Inference of Asynchronous Event Causality", "venue": "ASPLOS", "year": 2017 }, "c80f7fa9d9177fd31e4714cc45a9cd4057e534a8": { "authors": [ { "ids": [ "34745826" ], "name": "Marc S. Orr" }, { "ids": [ "11372918" ], "name": "Shuai Che" }, { "ids": [ "2825991" ], "name": "Bradford M. Beckmann" }, { "ids": [ "1723213" ], "name": "Mark Oskin" }, { "ids": [ "1783873" ], "name": "Steven K. Reinhardt" }, { "ids": [ "32548473" ], "name": "David A. Wood" } ], "doi": "10.1145/3126908.3126914", "doiUrl": "https://doi.org/10.1145/3126908.3126914", "entities": [ "Amortized analysis", "Coprocessor", "Graphics processing unit", "Network topology", "Parallel computing", "Partitioned global address space" ], "id": "c80f7fa9d9177fd31e4714cc45a9cd4057e534a8", "inCitations": [], "journalName": "", "journalPages": "23:1-23:12", "journalVolume": "", "outCitations": [ "43498db7de27abf14e5d2903a8318c62b3c4c0e9", "11fff0d9f39f7e3187dee5f2b9d54fec13b9c192", "43f0c099d44a68783a773f91cd03098a5252bf98", "143504cf0794163b60b93fb17cf61c885d7fd73c", "8bd6f67ef03b3c138c52f3e9b1716aebe937d244", "6335be42a352d1d4daa907533854410f57269926", "6c1f7496580d1169b232c53981f1e63e593be21f", "497fc8616563777046ecc89c85771b2ab446a518", "d1759060601d4e97e96b5349ba8faa450d18f9c6", "8141c6e62c890102b3b32c91907fe5e870d847d0", "08104146873817cc35cbd96d7ca3e5169cb72296", "3e6f5b5e8b7cb5408da8cd10d0cc625b00910291", "6348bb3b140c47ea29621d1dc5218db52433840b", "d77746dd3f1dcc2f2cfb750f9847313cd4689a6d", "5cfe2382c26abc8b2fa047cc263ce148866af2f5", "0ad8e89091eed09217e66adc98136126addc2619", "1156f60e40548096df49528b1342bb3e88b0f378", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad", "0270c2056eb50b5d4597afa722c50abf21e67a82", "6074c1108997e0c1f97dc3c199323a162ffe978d", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5" ], "paperAbstract": "Distributed systems incorporate GPUs because they provide massive parallelism in an energy-efficient manner. Unfortunately, existing programming models make it difficult to route a GPU-initiated network message. The traditional coprocessor model forces programmers to manually route messages through the host CPU. Other models allow GPU-initiated communication, but are inefficient for small messages.\n To enable fine-grain PGAS-style communication between threads executing on different GPUs, we introduce Gravel. GPU-initiated messages are offloaded through a GPU-efficient concurrent queue to an aggregator (implemented with CPU threads), which combines messages targeting to the same destination. Gravel leverages diverged work-group-level semantics to amortize synchronization across the GPU's data-parallel lanes.\n Using Gravel, we can distribute six applications, each with frequent small messages, across a cluster of eight GPU-accelerated nodes. Compared to one node, these applications run 5.3x faster, on average. Furthermore, we show Gravel is more programmable and usually performs better than prior GPU networking models.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126914" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c80f7fa9d9177fd31e4714cc45a9cd4057e534a8", "sources": [ "DBLP" ], "title": "Gravel: fine-grain GPU-initiated network messages", "venue": "SC", "year": 2017 }, "c8127e0a7cea1276a2af67ddd5e92b6748dde2dc": { "authors": [ { "ids": [ "3280686" ], "name": "Xiaowei Ren" }, { "ids": [ "11247851" ], "name": "Mieszko Lis" } ], "doi": "10.1109/HPCA.2017.40", "doiUrl": "https://doi.org/10.1109/HPCA.2017.40", "entities": [ "Atomicity (database systems)", "Cache coherence", "Core Storage", "Graphics processing unit", "Information privacy", "Oracle Coherence", "Read-only memory", "Read-write memory", "Sequential consistency" ], "id": "c8127e0a7cea1276a2af67ddd5e92b6748dde2dc", "inCitations": [ "f85ea15f5d417dc9b1cac8f58bec7157df47e6ba" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "625-636", "journalVolume": "", "outCitations": [], "paperAbstract": "Recent work has argued that sequential consistency (SC) in GPUs can perform on par with weak memory models, provided ordering stalls are made less frequent by relaxing ordering for private and read-only data. In this paper, we address the complementary problem of reducing stall latencies for both read-only and read-write data. We find that SC stalls are particularly problematic for workloads with inter-workgroup sharing, and occur primarily due to earlier stores in the same thread, a substantial part of the overhead comes from the need to stall until write permissions are obtained (to ensure write atomicity). To address this, we propose RCC, a GPU coherence protocol which grants write permissions without stalling but can still be used to implement SC. RCC uses logical timestamps to determine a global memory order and L1 read permissions, even though each core may see a different logical "time," SC ordering can still be maintained. Unlike previous GPU SC proposals, our design does not require invasive core changes and additional per-core storage to classify read-only/private data. For workloads with inter-workgroup sharing overall performance is 29% better and energy is 25% less than in best previous GPU SC proposals, and within 7% of the best non-SC design.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.40" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c8127e0a7cea1276a2af67ddd5e92b6748dde2dc", "sources": [ "DBLP" ], "title": "Efficient Sequential Consistency in GPUs via Relativistic Cache Coherence", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "c84687848af9ad893e092fdd65de990eb3f03e3f": { "authors": [ { "ids": [ "40395087" ], "name": "Nikela Papadopoulou" }, { "ids": [ "2652675" ], "name": "Lena Oden" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "", "doiUrl": "", "entities": [ "InfiniBand", "Level design", "Middleware", "Open-source software", "Scalability", "Use Case Points", "User interface" ], "id": "c84687848af9ad893e092fdd65de990eb3f03e3f", "inCitations": [ "9f07f3f93e4a6146b88327cdc3d3a140847fe5e8" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "345-354", "journalVolume": "", "outCitations": [ "587d32be5eba2baf0ce83ff69e01a13af72fdc26", "6bd411f57bb093367fda8b083b3c66f316e9c586", "61689b9e352a072f90ac0baceb5960b53338ef99", "7b4190802999d53cae8d89f492c50cc42d837973" ], "paperAbstract": "UCX is an open-source communication framework with a two-level APIdesign targeted at addressing the needs of large supercomputingsystems. The lower-level interface, UCT, adds minimal overhead todata transfer but requires considerable effort from the user. Thehigher-level interface, UCP, is easier to use, but adds some overheadto the communication. This work focuses on charting the performance ofUCX over InfiniBand, motivated by the usage of UCX as middleware forhigh-level communication libraries. We analyze performanceshortcomings that stem from the two-level design and the sources ofthese performance losses. In particular, we target basic functions ofUCP, evaluate their performance over InfiniBand, and analyze sourcesof overheads compared with UCT and Verbs. We propose and evaluatesome fixes to minimize these overheads, in order to enhance UCPperformance and scalability.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101160" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c84687848af9ad893e092fdd65de990eb3f03e3f", "sources": [ "DBLP" ], "title": "A Performance Study of UCX over InfiniBand", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "c8b75340394736a2658ce08b3af9cae8672c514b": { "authors": [ { "ids": [ "3293985" ], "name": "Panagiotis Patros" }, { "ids": [ "37233910" ], "name": "Dayal Dilli" }, { "ids": [ "1737718" ], "name": "Kenneth B. Kent" }, { "ids": [ "31500848" ], "name": "Michael Dawson" } ], "doi": "10.1109/CLUSTER.2017.9", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.9", "entities": [ "Booting", "Cloud computing", "Compiler", "Docker", "Graphical user interface", "Interference (communication)", "Operating system", "Operating-system-level virtualization", "Platform as a service", "Scalability", "Solution stack", "Swarm", "Usability" ], "id": "c8b75340394736a2658ce08b3af9cae8672c514b", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "290-300", "journalVolume": "", "outCitations": [ "223dca51178da8c61e2428b8909443ab2945ab51", "7a1eec7fc53fa2e4de69bab6aed1b68e949331f1", "f7d59af41e7661b78fcba086019d3e12fbb283e9", "1bdc4138a8a56a331c17141df594c73e109efcc7", "dbe151dee791f2f092a75090a7d562236da47981", "534774678395c584ec75dd4857a3a2001534d242", "4355acea7b73a74a353d54156c2cdc889f0af319", "1bdf058ab0788c2c6f538b78df5570acfe065647", "82ec25dc56f95f8bc63cc4cb4d7f1b1f4f9719d8", "8e13e107e040f44ca190b6a65bae8e14596ca4f5", "2bbc14aa15c5cf0985fb28d9b15946bf4230d372", "ab1a28bb87dc9271649f1676ac08d42fb7f0d506", "2e7699f88c75d0b5b3fb2d4de2c9ba82c87292d5", "f8ac1f09e3e9e25311324a2f12c91a44eb198009", "53d8407a77b0a7bf76fc8d7e3b3fcff77e3f5e4d", "9c96514250c4a35deba5ae3ffb93e9731fe23a79", "88525e710cf0c1aef79ffad59906f43fffd8c757", "a6b4dd1c9d8ea3d696f8009dcb25c30fe1eff625", "26f3cbecbc636984d57e52191c1d87c9377aff6f" ], "paperAbstract": "Platform as a Service (PaaS) clouds provide part of the hardware/software stack and related services to tenant applications. Increased load is handled elastically by scaling, which either modifies the number of instances an application has available on the cloud or increases their available resources. However, because all these instances run inside isolated containers, experience gained by the first instance of an application cannot be easily shared with subsequent scaled instances. This results in both increased startup time and response timeout errors for the scaled instances as well as increased performance interference for any co-located applications; reacquiring this experience is a time-consuming and resource-intensive process. We propose a scalable and secure technique to share dynamically compiled artifacts produced by the first execution instance of an application and otherwise created for intra-OS sharing only with subsequent scaled or restarted instances as a solution to these problems. Our solution abides by the usual PaaS limitations and uses a distributed and containerized cloud service, which we experimentally show to be scalable on a Docker Swarm running on top of a 6-VM cluster; also, we discuss the results of a usability survey for the service's GUI conducted with expert subjects. The effectiveness of the DCAS technique was experimentally tested on an isolated installation of the PaaS software Cloudy Foundry; we measured significant reductions in both the startup time and response errors of scaled out instances as well as performance interference to co-located tenants during scaling.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c8b75340394736a2658ce08b3af9cae8672c514b", "sources": [ "DBLP" ], "title": "Dynamically Compiled Artifact Sharing for Clouds", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "c8b82a6791711abfab16812aa97d6b5981dbf1a9": { "authors": [ { "ids": [ "2365563" ], "name": "V\u00e9ronique Cortier" }, { "ids": [ "40175420" ], "name": "Niklas Grimm" }, { "ids": [ "30981065" ], "name": "Joseph Lallemand" }, { "ids": [ "4436634" ], "name": "Matteo Maffei" } ], "doi": "10.1145/3133956.3133998", "doiUrl": "https://doi.org/10.1145/3133956.3133998", "entities": [ "Authentication", "Helios", "Privacy", "ProVerif", "Push-button", "Tamarin", "Type system" ], "id": "c8b82a6791711abfab16812aa97d6b5981dbf1a9", "inCitations": [], "journalName": "", "journalPages": "409-423", "journalVolume": "", "outCitations": [ "f31411fd1fb0078f8e3e277f1df6a462ce7ce6a4", "d0859a977059a3b93a8ee19e895db7416d844403", "4956f4f5f6339c45a9e51cb906a46c060469ca4c", "22ba6ba6fa5c25fd86c3dfa169e8bc6020f93303", "de57ec6f2b5db21b51457477af94e4a12d00867b", "3bdc6e65abddc716ed1d965f4124816625219233", "a2f62cb532f0058eef8f8349ec57bb253b103c06", "d6e1f768418c52500da28506da837eed0f21d01f", "27575853a55dc23b70aace81c66d5defd4876817", "0016802f76c301fac8b24a3f0e7135b63bb37130", "1d081dbf3e9afebafac90fdeed4bfa788012142f", "02cdfa95529573d3ca7de8483f71a4066073816e", "0e6a686b2da1c2a361f9128c91b561b4fd173835", "21265f78497e031aaa118369798622e021ceac41", "e54b7d8ef93e08b55ca41f5d65812528b29b1c27", "620def2d5ca90b450ae8f35367eec14706050c92", "282534ffccd9231f678d4c937b93b05b36307903", "7b10e8a358dd2a42aef5073cf352866a5142b6f1", "4ea440d849e14ea874f0f254568e9b72494be734", "38ffd901b31c0c08b1adc8f3bf62b5af9ba66347", "081813e981b7da05c2a503ff2fdfdf6c9a72ee00", "235031e503b18aaedbbf5b778982fcd619eb9048", "12d27de40a213b6c91f3532a15baa6628a20abe2", "0b84fb0ec9739e04f9b0fcbe040718d9f735200f", "7aea958a6cf1c7d45c067b379332e84829819ca4", "189fab4c16c57882f93e7c19bad74ee992827d19", "5f61e8a60598460955264dbd68c172ac623cc24c", "b7907a55329338a3938b7d6570927a9c173c2eda", "12918e3209e46fe58a00c2e8de7325d57c81a95d", "809f5ecebc95251481965ccd08d001e80a662837", "8e208a9025a83bd9df87bc46f69f355af846f55a", "14850f6b877423bfe303a8f78cb361ab59ab2f6b", "02fd1a072a72d24c5f61d709a1b3ce863da32729", "bf9bfb0955d5071884c543e4e6c6f98147994d96", "085afdc103ff5130924177f25f0ef24a1f012720", "63c892403a20373f2f28645ede10793317161b2e", "16f13c578990b971e32e9d2cb620e312cdddf00a", "10fa8df81501c343411bf8e537d0c6e5010c55ea", "43d8300d1b587fdd7b60a722ccd649f46d34c572", "001477a33ca44ed2535c889e4fcaf34316da73a0", "8e35d6b5215e1b88c926464961a5c6be833509c8", "06362cd3fb4cf1e3a6cb53b0703b62d75ac6701c", "9228c83bdf58fafd05e2517e777ee7402e297178", "299c0dfd2119894ccb68e3fa0a42e529af4a402e", "71688aeb911e640154aa9cbeb45d76df9c4c2661", "a7a1a4449866f8b298210b6eec525410808738ca", "02519f3a5d2a1ffdc5b3cba834e2524aeb334665", "003fefc2efa5f819ac0631f2e58d01c947714bda", "4a21248e3575dcebe66d1de6f1fe9f008d54e02f", "738a58eff2dbc9bc76742289fbf9dc9cae3a1b1f", "154fe9fbd9ba77ab8cc7631fa15a821f70575f33", "bba9f08a2e4f53b94c0fae98d55953a16b6058cc", "25241ee7f6fec34b31dd292815655f9ce43ff501", "39befc1f478fc4d9d2e226b055e2699d6a5c9f61" ], "paperAbstract": "Mature push button tools have emerged for checking trace properties (e.g. secrecy or authentication) of security protocols. The case of indistinguishability-based privacy properties (e.g. ballot privacy or anonymity) is more complex and constitutes an active research topic with several recent propositions of techniques and tools.\n We explore a novel approach based on type systems and provide a (sound) type system for proving equivalence of protocols, for a bounded or an unbounded number of sessions. The resulting prototype implementation has been tested on various protocols of the literature. It provides a significant speed-up (by orders of magnitude) compared to tools for a bounded number of sessions and complements in terms of expressiveness other state-of-the-art tools, such as ProVerif and Tamarin: e.g., we show that our analysis technique is the first one to handle a faithful encoding of the Helios e-voting protocol in the context of an untrusted ballot box.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133998" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c8b82a6791711abfab16812aa97d6b5981dbf1a9", "sources": [ "DBLP" ], "title": "A Type System for Privacy Properties", "venue": "CCS", "year": 2017 }, "c8d937e3abc6c78b0d7358a1231280904428d946": { "authors": [ { "ids": [ "2547698" ], "name": "Tiancong Wang" }, { "ids": [ "34463065" ], "name": "Sakthikumaran Sambasivam" }, { "ids": [ "1717365" ], "name": "Yan Solihin" }, { "ids": [ "1694458" ], "name": "James Tuck" } ], "doi": "10.1145/3123939.3123981", "doiUrl": "https://doi.org/10.1145/3123939.3123981", "entities": [ "Address space", "B+ tree", "Baseline (configuration management)", "Byte", "Byte addressing", "Computer data storage", "IBM Tivoli Storage Productivity Center", "Identifier", "Memory address", "Microarchitecture", "Non-volatile memory", "Out-of-order execution", "Persistence (computer science)", "Persistent memory", "Programming complexity", "Speedup" ], "id": "c8d937e3abc6c78b0d7358a1231280904428d946", "inCitations": [ "911b73ae86d61d6919f16cc2f538a6548766bf64", "41ea95cc4dca373bf324555b897760054ec4a76e" ], "journalName": "", "journalPages": "800-812", "journalVolume": "", "outCitations": [ "94783d113951822195d4ba44599a8fcbdef9d4bf", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "024e39f4185e48a0a692663a0f26dc323de47fed", "0d8609d9ded6c15f627dd54250a6845a2b3a2ec8", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "3af216f371069b57c0dca5448384d052fb490fb4", "28ab79d604962031585fd149941a5c0594e3d0ed", "0204f40221260d00c5ee63646560a40dcd7d97d1", "fd840d5275cac98d64e7778a1b9173b937a77386", "16653666b0005f91060a3e402566659749b84313", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "42c70d64890726f60556caf3eec3f06e85642dd9", "47b851237f240831abee3971bca6bb8d2a121eb1", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "57c823b3b07b98233394bf15cfbbaed6a84809df", "2ef08ccb970632bb8ada93ea70078eac54ce92d3", "2b625353fe50e219412e18b6d50b5d8de0538a60", "d76913152aeff892dbb028785f98ee8c84bfd8e3", "911b73ae86d61d6919f16cc2f538a6548766bf64", "7ef0940a5e093a7c8c3c7d243bbbbf513b3c3192", "1f482f44497c17be0573d9dff14a30d87b0bf0ca", "05a1357946de5eca42a477b7b268db4944219a2e" ], "paperAbstract": "Emerging non-volatile main memory technologies create a new opportunity for writing programs with a large, byte-addressable persistent storage that can be accessed through regular memory instructions. These new memory-as-storage technologies impose significant challenges to current programming models. In particular, some emerging persistent programming frameworks, like the NVM Library (NVML), implement relocatable persistent objects that can be mapped anywhere in the virtual address space. To make this work, persistent objects are referenced using object identifiers (ObjectID), rather than pointers, that need to be translated to an address before the object can be read or written. Frequent translation from ObjectID to address incurs significant overhead.\n We propose treating ObjectIDs as a new persistent memory address space and provide hardware support for efficiently translating ObjectIDs to virtual addresses. With our design, a program can use load and store instructions to directly access persistent data using ObjectIDs, and these new instructions can reduce the programming complexity of this system. We also describe several possible microarchitectural designs and evaluate them.\n We evaluate our design on Sniper modeling both in-order and out-of-order processors with 6 micro-benchmarks and the TPC-C application. The results show our design can give significant speedup over the baseline system using software translation. We demonstrate for the Pipelined implementation that our design has an average speedup of 1.96× and 1.58× on an in-order and out-of-order processor, respectively, over the baseline system on microbenchmarks that place persistent data randomly into persistent pools. For the same in-order and out-of-order microarchitectures, we measure a speedup of 1.17× and 1.12×, respectively, on the TPC-C application when B+Trees are put in different pools and rewritten to use our new hardware.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123981" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c8d937e3abc6c78b0d7358a1231280904428d946", "sources": [ "DBLP" ], "title": "Hardware supported persistent object address translation", "venue": "MICRO", "year": 2017 }, "c904585bab86b2ff7007afe09865a3a45c7d483b": { "authors": [ { "ids": [ "1725544" ], "name": "Tetsuya Sakai" } ], "doi": "10.1145/3077136.3080766", "doiUrl": "https://doi.org/10.1145/3077136.3080766", "entities": [ "Approximation", "Markov chain", "Markov chain Monte Carlo", "Monte Carlo" ], "id": "c904585bab86b2ff7007afe09865a3a45c7d483b", "inCitations": [ "7c4659c550ead059ed12a06068f77ddbd865d399" ], "journalName": "", "journalPages": "25-34", "journalVolume": "", "outCitations": [ "6babcc0a9cb3b23fe4877ee537f1e5dfa2829986", "d08f38f004cc02f3184a4743fdf4e5ed6669ac05", "2ae33fd2f43f1c3210fdce08968fd18f76478710", "06529efb02693595d1d455e7c6ea243c4f0c5816", "575a33aa00b7be4c63d5e6b455a6726e84d3f4d4", "35877ba6c3159c6e11ae797f364e69657d448375", "8f9ca2393da66340489ec5a909a6afebe227c16d", "6f83bcad4eea197565e7935b860b4f3b783497b3", "852a0085f35e479a793b4dcd1ffb191dcde918dc", "7cb3250a028dc7aafadeac0601d8f52e556d29a9", "1fad2ae026f545216e2a62eeaf806945067cba7c", "d00525ba5ca7563780274150e2b02fc7f3d1dfbc", "bd5d105f4fa032e58e0a112d9316bc4301976e06", "73f583aad5195324ee75eb981b8b5f1fed6f9d38", "5a729680c3f2c9ee40b4f5f3a20c24b768c0a1c9", "e3733b3f7cef5e9cc2a84ded4055d91863d3919e" ], "paperAbstract": "Using classical statistical significance tests, researchers can only discuss P(D+|H), the probability of observing the data D at hand or something more extreme, under the assumption that the hypothesis H is true (i.e., the p-value). But what we usually want is P(H|D), the probability that a hypothesis is true, given the data. If we use Bayesian statistics with state-of-the-art Markov Chain Monte Carlo (MCMC) methods for obtaining posterior distributions, this is no longer a problem. That is, instead of the classical p-values and 95% confidence intervals, which are often misinterpreted respectively as \"probability that the hypothesis is (in)correct\" and \"probability that the true parameter value drops within the interval is 95%,\" we can easily obtain P(H|D) and credible intervals which represent exactly the above. Moreover, with Bayesian tests, we can easily handle virtually any hypothesis, not just \"equality of means,\" and obtain an Expected A Posteriori (EAP) value of any statistic that we are interested in. We provide simple tools to encourage the IR community to take up paired and unpaired Bayesian tests for comparing two systems. Using a variety of TREC and NTCIR data, we compare P(H|D) with p-values, credible intervals with confidence intervals, and Bayesian EAP effect sizes with classical ones. Our results show that (a) p-values and confidence intervals can respectively be regarded as approximations of what we really want, namely, P(H|D) and credible intervals; and (b) sample effect sizes from classical significance tests can differ considerably from the Bayesian EAP effect sizes, which suggests that the former can be poor estimates of population effect sizes. For both paired and unpaired tests, we propose that the IR community report the EAP, the credible interval, and the probability of hypothesis being true, not only for the raw difference in means but also for the effect size in terms of Glass's Δ.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080766" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c904585bab86b2ff7007afe09865a3a45c7d483b", "sources": [ "DBLP" ], "title": "The Probability that Your Hypothesis Is Correct, Credible Intervals, and Effect Sizes for IR Evaluation", "venue": "SIGIR", "year": 2017 }, "c9572a162d6e1e5675e7c6eb656bee739f8beb14": { "authors": [ { "ids": [ "2122613" ], "name": "Alexander Fr\u00f6mmgen" }, { "ids": [ "2869441" ], "name": "Amr Rizk" }, { "ids": [ "30517528" ], "name": "Tobias Erbsh\u00e4u\u00dfer" }, { "ids": [ "30440290" ], "name": "Max Weller" }, { "ids": [ "1807668" ], "name": "Boris Koldehofe" }, { "ids": [ "30439760" ], "name": "Alejandro J. Buchmann" }, { "ids": [ "1725298" ], "name": "Ralf Steinmetz" } ], "doi": "10.1145/3135974.3135979", "doiUrl": "https://doi.org/10.1145/3135974.3135979", "entities": [ "Application programming interface", "Electron mobility", "High- and low-level", "High-level programming language", "Interpreter (computing)", "Linux", "Load balancing (computing)", "Multitenancy", "Program optimization", "Programming model", "Programming paradigm", "Runtime system", "Scheduling (computing)", "Software deployment", "TCP Wrapper", "Throughput", "Topological sorting" ], "id": "c9572a162d6e1e5675e7c6eb656bee739f8beb14", "inCitations": [ "5e0077867ea32cfadaa8885206635348cf2f9b64" ], "journalName": "", "journalPages": "134-146", "journalVolume": "", "outCitations": [ "1aafc7066e52f18dee78103822da24a5d85da93c", "42a0c3c651f64d61f7a8cfd3a4d2413be0713e2a", "295dfb4f77d6be0137abb03d060cd70a2c13334f", "25c8ebec4224968ebe70311159381ec05adc21c0", "834a75f99c355d1376d0eaa8c5f91f4c584a7eed", "437120941fb462977c6ce74ddb661054915df792", "83a1a478c68e21ee83dd0225091bf9d3444a8120", "0433bb657317ac22f7c66d71dfd14c8ead607d73", "1c856842406d11d4ed15384afa0630c5a20be1fd", "3e364e301f026a197fde0608481dfa2c09e85b7b", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "295c704eaa9056a29604cde206ed0e79cf2a147d", "0213d528ee33f348ddc3cb45cfdd9f64c8683d07", "8c11cecf1fb1127525ddf974e237406a80b87e28", "32a7818ee01bea31068a0076060c75e88283a16a", "756883a19a9ed652647d625a863e22f70e94d7c9", "1d19b3a9a53b6ca07c13b8dc9c410f98f1e33a98", "9b5fee147a7412c9ea2b15107287886605a1d9f6", "b678ef342b0bc2b0085a8ea7aed56dc2e70245d1", "58668f7720430470e285a9131fc0e054ddbcafd9", "712f1d9e3056e229f237fe50afc38f75df9f2bd1", "627c0d36688b2252ae3ca0b5f68ce97e341d338d", "7de0127d9b52b6154a829fdf8750199b73621c8f", "f76d5f3a858d878bd2a7ca49f49d3eca973e49d7", "1376bd56c64639af4645625fd9755c83b2bf7cda", "0881510b68e068f1d130dfad722b9b87b650e753", "2ae7c9a89c7f6c105b68343dda9a0159eb2f118d", "b1d64bfc6c5ce1ba4c972a00bc4dd91a1a8571d2", "09bd47a536362402a1b315ec1f34a69824bb6fe4", "39d9a304d321e25d4f58780b145dafb8bceaf557", "30e5e40cb96c1d15c80ff0aa199298675465c65c", "6707284e445e0f5cb640db1c0ebd943618189865", "0b701ba28f8c3c9aa3ac351cf60ab2c5d9bf98a7", "089b10645ee63cd9c5bb4ab661141dd813408e15", "3a6deeb7aaa1aa62b8d4044bc21a51f8ea74dd0b", "0b6ea07d2d7ea0f95969f9e223d362c2e6aa79b4", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "ecce9ea330498f5d49506854e87667a4deb546c2", "034b937edbff280dfdd7b2e98639655fd3587402", "7803feba5bc09d8272368988461cbd02f7b99bc1", "69f731513c6933240470204e79e854548b871814", "0acfbb7219bcc4151fd752d0b2b9e043c790b997", "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "85cacb208b2e3aa16fd39f75dc858d44092782b6", "285765d1909a815cf12b7d3f645e5ae2d92ead8f", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6" ], "paperAbstract": "Multipath TCP enables remarkable optimizations for throughput, load balancing, and mobility in today's networks. The design space of Multipath TCP scheduling, i.e., the application-aware mapping of packets to paths, is largely unexplored due to its inherent complexity. Evidence in this paper suggests that an application-aware scheduling decision, if leveraged right, pushes Multipath TCP beyond throughput optimization and thereby provides benefits for a wide range of applications.\n This paper introduces a high-level programming model that enables application-defined Multipath TCP scheduling. We provide an efficient interpreter and eBPF-based runtime environment for the Linux Kernel, enabling isolated application-defined schedulers in multi-tenancy environments. In combination with a high-level API, our work closes the gap between scheduler specification and deployment. We show the strength of our programming model by implementing seven novel schedulers tackling diverse objectives. Our real world measurements, for example, of an application- and preference-aware scheduler, show that the programming model enables timely scheduling decisions to retain fine-grained throughput objectives. Further measurements of a novel HTTP/2-aware scheduler show significantly improved interactions with upper-layer protocols, e.g., an optimized dependency resolution, while preserving path preferences.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135979" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9572a162d6e1e5675e7c6eb656bee739f8beb14", "sources": [ "DBLP" ], "title": "A programming model for application-defined multipath TCP scheduling", "venue": "Middleware", "year": 2017 }, "c95af727578ca263ca79f8c16f5e7ed9fec05ab7": { "authors": [ { "ids": [ "3375797" ], "name": "Rashad Eletreby" }, { "ids": [ "5719499" ], "name": "Diana Zhang" }, { "ids": [ "40578966" ], "name": "Swarun Kumar" }, { "ids": [ "3054744" ], "name": "Osman Yagan" } ], "doi": "10.1145/3098822.3098845", "doiUrl": "https://doi.org/10.1145/3098822.3098845", "entities": [ "FreeS/WAN", "Interference (communication)", "Low-power broadcasting", "System V printing system", "Throughput", "Universal Software Radio Peripheral", "WAN optimization" ], "id": "c95af727578ca263ca79f8c16f5e7ed9fec05ab7", "inCitations": [], "journalName": "", "journalPages": "309-321", "journalVolume": "", "outCitations": [ "0dd96161deab7c342998b20adcf2c33331776530", "280d59fc8558081f231e2950670f4e5b311258eb", "13cf30311af5dc134ae921357eac9790f19e2000", "8073b08b83099886e3582c9fc3f9bfaa99136763", "2262002d970c8d16a6782fe0149962ef14780adc", "ed2b7074e1d652da1034ee3d7b24aba46af83aa1", "25b72cdbba447f2a80be1c37c33205f5e52941f1", "0ddc8ee97d4ca2822fff642257e01ac10cfda8c2", "87aa666773495bee0d5e1d095f229a53e9f9204d", "076776638cd64861b46b1d237d669ab5ea650d62", "3a9d6effd369197649d2ae6e7c2737ee0b755dfb", "1883204b8dbb419fcfa8adf86d61baf62283c667", "118f5e7b4723a0ea846245e28574cf7ed715eccf", "e20adb6782c627170ede684c59cd18d31fe81dbc", "145568f9cbd9f5cea31bbd181315192e4f56f014", "01d5bf24c0b35d9a234d534bf69924fa16201dee", "2b3aabf4173e515a6e9bbc3410cd5dd9c87549ba", "02a8e84dee47336e08e5a5c5c902f98714232194", "d416fc67b5cecca81ef366a82b4fd156c2ed11dc", "144474ab9edd5352579c37463eb81ac1ea9fe3f6", "251932bee282d82a4088fa941bb510f3d9fe27e9", "5eb135ed334c0ccc4c18513adda154c262147523", "446d6f08859c7eb06866344f266aed3c381a6b85", "a63a9cd608efadd185d9b1faccf4baf28d205c45", "71e34371eb27da208bb2f71cb6b33e61dbd7c011", "2e8d2c19cb5b9e8a007f1a639f4dc26cce2771a8", "525d4f553530c5f8f9f30f388347003e26fc4b27", "3944192cc9e019a730d4e456427712484124a959", "015ce3f823dac9e78ab3ff1f63e67e5a00145ac6", "e70d941fe22a3a312318c05abc0f45d3aaa94d28", "50a932b19aabb1cefd2b419a73f30fa42adc0fca", "18ac74c0513e628694bc8ace53d8146bd9b1965d", "098710e29af9e905b7c3dbfa877584bfdba0824a", "7e78f4d96a9c27d0ae6f3685999c3c4470cab1f1", "026667fa041feede8987563fb0cb40014466a67b" ], "paperAbstract": "Low-Power Wide Area Networks (LP-WANs) are an attractive emerging platform to connect the Internet-of-things. LP-WANs enable low-cost devices with a 10-year battery to communicate at few kbps to a base station, kilometers away. But deploying LP-WANs in large urban environments is challenging, given the sheer density of nodes that causes interference, coupled with attenuation from buildings that limits signal range. Yet, state-of-the-art techniques to address these limitations demand inordinate hardware complexity at the base stations or clients, increasing their size and cost.\n This paper presents Choir, a system that overcomes challenges pertaining to density and range of urban LP-WANs despite the limited capabilities of base station and client hardware. First, Choir proposes a novel technique that aims to disentangle and decode large numbers of interfering transmissions at a simple, single-antenna LP-WAN base station. It does so, perhaps counter-intuitively, by taking the hardware imperfections of low-cost LP-WAN clients to its advantage. Second, Choir exploits the correlation of sensed data collected by LP-WAN nodes to collaboratively reach a faraway base station, even if individual clients are beyond its range. We implement and evaluate Choir on USRP N210 base stations serving a 10 square kilometer area surrounding Carnegie Mellon University campus. Our results reveal that Choir improves network throughput of commodity LP-WAN clients by 6.84 x and expands communication range by 2.65 x.", "pdfUrls": [ "http://www.andrew.cmu.edu/user/reletreb/papers/Sigcomm_17.pdf", "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-8-1-choir.pdf", "http://doi.acm.org/10.1145/3098822.3098845" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c95af727578ca263ca79f8c16f5e7ed9fec05ab7", "sources": [ "DBLP" ], "title": "Empowering Low-Power Wide Area Networks in Urban Settings", "venue": "SIGCOMM", "year": 2017 }, "c9748cdd2b0e8c4298f9bcf1caeecac150f291ac": { "authors": [ { "ids": [ "17795090" ], "name": "James Phung" }, { "ids": [ "1746054" ], "name": "Young Choon Lee" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Application programming interface", "Benchmark (computing)", "Central processing unit", "Distributed computing", "Hardware virtualization", "Middleware", "Operating-system-level virtualization", "Optical power meter", "Performance Application Programming Interface", "Scheduling (computing)", "Server (computing)", "Virtual machine" ], "id": "c9748cdd2b0e8c4298f9bcf1caeecac150f291ac", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "335-344", "journalVolume": "", "outCitations": [ "13ca3cb654d67afdc3ba62e47afee6a72139e622", "22289aa4d4895962bab55b646a20a9ae28a4bade", "20cca03bbdbd83d321a3939694ae9b41f6b8e66d", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "43de3a783b1093e5f47ad34c8595fb5b4de5fc38", "0b5e9d1b9b812da75ffd8abf5f218d21a0101356", "07e6ca15ba66473976f6b5d462a235c92cfc82d6", "4ef89b90538c5d466462ab6401874d903617b6da", "067c7857753e21e7317b556c86e30be60aa7cac0", "e40b1960fe6826724ec1350aef08f56bec182133", "438e22ee516ecd66ade08aa6d5a9af1dd16d5716", "7e6360010d33c720d94019d7e39937a6a1e717a0", "f87751d801f6f9c2e910c299a301679a16ced77e", "79bc19231b448044aa91335e7804dc1401a8080c", "6afcd2f545695765168473926f2e328ad62f7f44", "163252e81b3a0f8269871f0845338c53494ec4f5", "a817a687600ef821d4d89384a6a0b97c749f6f69", "0b758aeb545039656408d0caa694f9e9c6abc690", "a9c1fa73f2c830f1f1d8526042a357a697dcfab4", "487dd2e2dcee2dd05f59812c86e0f0adcb447eb1", "995c6b5e9ee851f1b70ed85a00867eb79714c246", "4416052fca95270b50a29e9e3cc245cca8962861", "502caa83bdcc2352fa926a304440913f866191a9" ], "paperAbstract": "Many servers use technologies such as virtualization or containerization to improve server utilization. These technologies pose challenges for power monitoring since it is not possible to directly measure the power use of an abstraction such as a virtual machine. Much work has been done in modeling the power use of CPUs, virtual machines and entire servers, however, there is a scarcity of work in building lightweight power monitoring middleware that can be deployed across a range of systems. In this paper, we present cWatts+ as a prototype lightweight software-based virtual power meter. Utilizing a simple but powerful application-agnostic power model, it offers comparable performance to existing "more complex and heavier-weight" power models. It uses a small number of widely available CPU event counters and the Performance Application Programming Interface Library to estimate power usage on a per-thread basis. It has minimal overhead and is portable across a variety of systems. It can be used in containerized or virtualized environments. We evaluate the estimation performance of cWatts+ for a variety of real-world benchmarks that are relevant to large distributed systems. Also, we examine the importance of including CPU core temperature data in the power model. We demonstrate that our power model has an average error of less than 5%. This result compares favorably with existing state-of-the-art power models and is achieved using a relatively simple power model that exhibits minimal power consumption (overhead). Consequently, our power monitoring middleware is viable for use in real-world applications such as power estimation for energy-aware scheduling.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101159" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9748cdd2b0e8c4298f9bcf1caeecac150f291ac", "sources": [ "DBLP" ], "title": "Application-Agnostic Power Monitoring in Virtualized Environments", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "c98ad6cf88e688a2b2d4e4e4b81640adb69b545b": { "authors": [ { "ids": [ "26336329" ], "name": "Yunbo Li" }, { "ids": [ "3164170" ], "name": "Anne-C\u00e9cile Orgerie" }, { "ids": [ "1709070" ], "name": "Ivan Rodero" }, { "ids": [ "1750983" ], "name": "Manish Parashar" }, { "ids": [ "2216103" ], "name": "Jean-Marc Menaud" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Computation", "Computation offloading", "Emergence", "Glossary of computer graphics", "Internet of things", "Quality of service", "Streaming media" ], "id": "c98ad6cf88e688a2b2d4e4e4b81640adb69b545b", "inCitations": [ "4151a309b5361ae15b8408ab8c1f3d6774fec33c" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "186-195", "journalVolume": "", "outCitations": [ "959cbc814c40ec9fa8d0ab7c1244fda009a27108", "c6c492ce8227c38c6d24183a2d5c37d89bfd05dc", "48c2559822d68f935c2e006f8f28c65224c7787b", "ccc03f657a840fcf4c48e75b99201fb7aa995fb8", "de667fddee4341d60cdd9e825581a80968b1e2ef", "b59a970bc0b90de4d6c237a2dc5a9e732d751159", "6049062a3a73d22c914e7fa8951b3b0e5f09b309", "9cac99ee880b6e06a105bbe2327a34f5d2f86ad4", "8c90c095eebd5f7b49e243fedb511c09ef98b0a8", "2b694e2640e5450bc6be6e985522161b69ef4c6d", "66a938601da5a173241b29605688bc332f3eb510", "c84960dd87c67d919d73d90630494954c9f55f99", "2219893dfcd0c9a9c2769530de9898d6868ba25b", "78ce88433728d9c27828353bc9aecef6d884ec13", "12a376e621d690f3e94bce14cd03c2798a626a38", "56c824fdf7f8585fe5a4f6a3afafe344e3f39347", "963a273144727fb1acec3c434ee00afbca9646f4", "995c6b5e9ee851f1b70ed85a00867eb79714c246", "4cd88a9280f8e982643fc0cb9d2e518adb207cc9", "3540e345c600b2a3d40c300168182d1393cab248", "97d835f0b0dc8944f7ba5c1a0db4f08895c6b0af", "84f25ddd053e414f239b91552410dab0adbaedad" ], "paperAbstract": "The emergence of Internet of Things (IoT) is participating to the increase of data-and energy-hungry applications. As connected devices do not yet offer enough capabilities for sustaining these applications, users perform computation offloading to the cloud. To avoid network bottlenecks and reduce the costs associated to data movement, edge cloud solutions have started being deployed, thus improving the Quality of Service. In this paper, we advocate for leveraging on-site renewable energy production in the different edge cloud nodes to green IoT systems while offering improved QoS compared to core cloud solutions. We propose an analytic model to decide whether to offload computation from the objects to the edge or to the core Cloud, depending on the renewable energy availability and the desired application QoS. This model is validated on our application use-case that deals with video stream analysis from vehicle cameras.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101137" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c98ad6cf88e688a2b2d4e4e4b81640adb69b545b", "sources": [ "DBLP" ], "title": "Leveraging Renewable Energy in Edge Clouds for Data Stream Analysis in IoT", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "c997c870584692dcb960f140af2460c5b52ea8b2": { "authors": [ { "ids": [ "38249487" ], "name": "Lalith Suresh" }, { "ids": [ "1775084" ], "name": "Peter Bod\u00edk" }, { "ids": [ "1684547" ], "name": "Ishai Menache" }, { "ids": [ "1709876" ], "name": "Marco Canini" }, { "ids": [ "1780839" ], "name": "Florin Ciucu" } ], "doi": "10.1145/3127479.3132020", "doiUrl": "https://doi.org/10.1145/3127479.3132020", "entities": [ "Amazon Web Services", "Distributed computing", "End-to-end principle", "Fairness measure", "Job scheduler", "Multitenancy", "Rate limiting", "Scheduling (computing)", "Service-oriented architecture", "Synthetic data", "WISP" ], "id": "c997c870584692dcb960f140af2460c5b52ea8b2", "inCitations": [], "journalName": "", "journalPages": "611-623", "journalVolume": "", "outCitations": [ "3af5e48a741634d2572b839ca57b68929cd2d648", "ad8c8feae36e649d885af3df3d427a3ea40651c2", "9e98d529d158e2230d722f497fbc36373eaa8583", "132f00de21cee656d00ad6779f1926070ad59544", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "2de63b0c867b290d4f7217459c968aa98e5ad39d", "3b988049dd8f62f772281e90196bbd793700c86b", "06db78ece7ba41bccab5df77240541e32cffd623", "4e3256845c3fdbaaf99535418d90f5112348b3de", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "6e669e90a34c4179f9364406d8a7a7f855745086", "b6571efa4483aa00d23bbcd36930c4877548ba38", "5c159777860921aedd1924d4d93ad5bbce096c64", "9bcc0099f0d34c391ca1a3c5220cb0b3b33c4183", "03543f75c4fe0c49f81af789a1c7293ff0e4e107", "f0350ae6442d2737585fdd73e97c6a1d250ac937", "20165e23266d4753183a8a584a4364ebc3de1c7f", "231ba17921ebd80e95771e28dfb5082e169d5a53", "08e07f86c660fbaa900594efa6b6ac97351edab2", "e847c3ec130da57328db79a7fea794b07dbccdd9", "238dd4c308c1ee6ef3809fdf15fdc87be74bdbc8", "661b19ff987b9ed9d9252324d4a72ab1fbd588ae", "1c5df885b27106d51fcc177ddd2d92bfcdb71b6b", "764d7de61421968d6b477f0c055d72dcb0893544", "6d83ef26b1aa268888e0787bcf30fb7b245a7f2d", "23ddae93514a47b56dcbeed80e67fab62e8b5ec9", "1d2871c56d07a35e6709d535fbbb2df6b434962a", "3bb723980b8eaee73acfe39ba74934245051575d", "151fe4cd7d0c788b3e362636d5c31a4c13f90a9a", "75032b6df2f3988d2cb6988c73cbc01c7b9e80cb", "154a1c578fe5c05e3a9bcb4f6312960b11e0f146", "177d039a925fcf384ba868d65b6449746726b127", "2e72178091b2ca445f46200dcba71a53417b69eb", "11040f24714857941c569df70b21c4c8655e074a", "47d5357957cabb610131db1b228e58b70860ee8d", "43776b15c034076a36b7143d58af8e04715e41d0", "2988e34168fa91398fa397baf823af2063893e9c", "0b95a8628f90a78909447c4cfee2dce7cb92dd52", "548651f7a20d19cf74bf8ce9d6db73a9195bc4de", "be4c6170ee4fd72ff5c8fc92e3d6ba5cba774cf6", "3a043714354fe498752b45e4cf429dbae0fb2558", "09d1a6f5a50a8c3e066fb05a8833bc00663ada0e", "52ece1e929758e9d282e818e8e9985f88570f2dd", "0b2c84be9e9f97f2464ad9d09be5f4c37edda47e", "339e48ec3ef8d548aad8ca8dab455f1c4cdb9767", "3338173866c3c85338a5ac26560d5392108c8eac", "65a2cb8a02795015b398856327bdccc36214cdc6", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "0faff4fa4347d5369956dbdbea410869fc399bfd" ], "paperAbstract": "Multi-tenant distributed systems composed of small services, such as Service-oriented Architectures (SOAs) and Micro-services, raise new challenges in attaining high performance and efficient resource utilization. In these systems, a request execution spans tens to thousands of processes, and the execution paths and resource demands on different services are generally not known when a request first enters the system. In this paper, we highlight the fundamental challenges of regulating load and scheduling in SOAs while meeting end-to-end performance objectives on metrics of concern to both tenants and operators. We design Wisp, a framework for building SOAs that transparently adapts rate limiters and request schedulers system-wide according to operator policies to satisfy end-to-end goals while responding to changing system conditions. In evaluations against production as well as synthetic workloads, Wisp successfully enforces a range of end-to-end performance objectives, such as reducing average latencies, meeting deadlines, providing fairness and isolation, and avoiding system overload.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132020" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c997c870584692dcb960f140af2460c5b52ea8b2", "sources": [ "DBLP" ], "title": "Distributed resource management across process boundaries", "venue": "SoCC", "year": 2017 }, "c9aafe12f601f92b799c14b46db7a463650ece93": { "authors": [ { "ids": [ "9551151" ], "name": "Changzheng Wei" }, { "ids": [ "26846526" ], "name": "Jian Li" }, { "ids": [ "7878550" ], "name": "Weigang Li" }, { "ids": [ "39989707" ], "name": "Ping Yu" }, { "ids": [ "7203366" ], "name": "Haibing Guan" } ], "doi": "10.1145/3127479.3127482", "doiUrl": "https://doi.org/10.1145/3127479.3127482", "entities": [ "Change detection and notification", "Cloud computing", "Content delivery network", "Digital distribution", "Elasticity (cloud computing)", "HTTPS", "Intel Developer Zone", "Key distribution", "Key management", "Key server (cryptographic)", "On-premises software", "Public-key cryptography", "Scalability", "Security bug", "Server (computing)", "Software bug", "Throughput" ], "id": "c9aafe12f601f92b799c14b46db7a463650ece93", "inCitations": [ "5d5dc223949eb6a4447ca59c539e986fa48bdc6c" ], "journalName": "", "journalPages": "201-213", "journalVolume": "", "outCitations": [ "5b2092b54860f134f78b2ec884c910750def71e6", "6287b8609480691d473fc36933ba053890c2296f", "1c0d35e024dbb8a1db0f326fb243a67d158d5f24", "17f19d9ec093ef82a10f1276fc53c10d4667836d", "2880a228064aa337250ce657b12d53027f74a05c", "845e96c20e5a5ff3b03f4caf72c3cb817a7fa542", "201b0a185dda51629d7b6fdef3b380a0beaba455", "38ae2be71d964749e24264eca9ba1102c397fa00", "be2d4ae4b5a9624496f22c3e7aa5589ffb0dbf04", "9fc84a290eaaa54b05dac8373143e90ea01c7181", "021fe0d3dd74fdd2db57c2af510d99ddf7a59d10", "d5d1c3356e6b2dce34e5a43c881eac0279ea6588", "670cdcf84a0403cf15fc80dd042e1938847fdf29", "1f0665485f7fbc06675c981866efab2c4ccbcdd4", "298c3a477103d612860afd5a6a85d8058aa41a2e", "057beee2540c3bd9cb167185a481dcf20e3647a3", "453a8de66df5f95f7b6558f60c10bf20909dd636", "4c60ec65bd28c6637f82ee3f6ad28d6eaa9c4824", "2729cc1dc4397ba1c0431d1641c84a3d126a50c9", "4dc3387459dbe471689cc71096db79290287d299", "10f328c410d4ec36127ed7aa9fad1ba2a416e38e", "43e632540fa490c2352a03546a20d53850953626", "197f0b31f4088c7a7301e4e3079b43be2eae3dc3", "69fa620d332120263317fafc41298b2a3d9b67c7", "1ada518aaad1dd3c3894ad70a0385907a9f94657", "5f830f954b754b171a8bbc9da8b0e721ad8c719b", "fff5a3270745e678d6c0540dfb854b3814fa15f9", "e2f2766d142929359c9d41760c54765e84fe8b25", "05f70f429a7bf38efa9e457fd486cb862bd495be", "e7309fff2a052ffea83597254bc2c0e0e19372a2", "0d1141aed3b02027846bb88022212ba498a0be59", "9c058479f3d8b5304d2154ef85cbabe0b8cfab0b", "3fe977b2e79827af5928ed68d96ba2e37038665c", "ee00de37ab679bffc0c03222a100e01c59bd38b8", "01fde8698110cf46ff48a17c65f2658dab4c323c", "64bb8f898dc2097f29e3fede4255e19f609bf3c7", "34e8a2787d737b050afff384ff0befd31c95e3a9", "2044ab1ce724ec11c653ed8d642a1592542d8630", "4b318377d856bab660446e5306839d59ee434d5d" ], "paperAbstract": "Protecting the customer's SSL private key is the paramount issue to persuade the website owners to migrate their contents onto the cloud infrastructure, besides the advantages of cloud infrastructure in terms of flexibility, efficiency, scalability and elasticity. The emerging Keyless SSL solution retains on-premise custody of customers' SSL private keys on their own servers. However, it suffers from significant performance degradation and limited scalability, caused by the long distance connection to Key Server for each new coming end-user request. The performance improvements using persistent session and key caching onto cloud will degrade the key invulnerability and discourage the website owners because of the cloud's security bugs.\n In this paper, the challenges of secured key protection and distribution are addressed in philosophy of \"Storing the trusted DATA on untrusted platform and transmitting through untrusted channel\". To this end, a three-phase hierarchical key management scheme, called STYX1 is proposed to provide the secured key protection together with hardware assisted service acceleration for cloud-based content delivery network (CCDN) applications. The STYX is implemented based on Intel Software Guard Extensions (SGX), Intel QuickAssist Technology (QAT) and SIGMA (SIGn-and-MAc) protocol. STYX can provide the tight key security guarantee by SGX based key distribution with a light overhead, and it can further significantly enhance the system performance with QAT based acceleration. The comprehensive evaluations show that the STYX not only guarantees the absolute security but also outperforms the direct HTTPS server deployed CDN without QAT by up to 5x throughput with significant latency reduction at the same time.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127482" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9aafe12f601f92b799c14b46db7a463650ece93", "sources": [ "DBLP" ], "title": "STYX: a trusted and accelerated hierarchical SSL key management and distribution system for cloud based CDN application", "venue": "SoCC", "year": 2017 }, "c9c258f92a51c1523829c2c8834c7c4a9c573648": { "authors": [ { "ids": [ "6504842" ], "name": "Yidan Wang" }, { "ids": [ "1699399" ], "name": "Zahir Tari" }, { "ids": [ "12212141" ], "name": "MohammadReza HoseinyFarahabady" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.28", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.28", "entities": [ "Apache Storm", "Data center", "Mathematical optimization", "Programming paradigm", "Scheduling (computing)", "Stock and flow", "Stream (computing)", "Stream processing", "Throughput", "Velocity" ], "id": "c9c258f92a51c1523829c2c8834c7c4a9c573648", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "215-222", "journalVolume": "", "outCitations": [ "5e6cbb9febc0d059b8516aff7a744a741ab59470", "6f2b74e5a9d6341fc961589981c56ff887100c39", "7adbaea3f32aabd616ec897fa5981c5727f89536", "04afd5f18d3080c57d4b304dfbd1818da9a02e8e", "2f7f5d0e989c74d6279e2620e10e8d0b0c021cb7", "dc2df2048d8da76627d7c821b72d6a306037fd25", "d72515ff5c534a919bc55a1770d69b1c8298b3f7", "42490a37f9e284ba4d368cf5a41f2ea6c26b0ee1", "685f1e1a88f299704591266f2ee9abe29afdd124", "4efbb846b1acb832e1914d6c05dae3ef05812888", "6f5d96874b919df9e884a165a21859b860f2a5fd", "b6e2a92542b50492d4e86f90882e4070cc2824a0", "ce91b0ca3b88bd0464f5e7ca7564ee84ed7b371c", "ad62c65d2c5d626f32ae9c5214d3d4b88348950b", "b02e58b556fba27538448dc8799f248e8986a987", "e3da37a350bea543fbd3191a2a058a2c5cc27d70", "f474752bbb5c8a81dd6e5ffa9ba0c172e85c5a4c", "fbafa098c81a5c5b7f5dd5ef0d985f96009c91a0", "00c5d5189e29dba8f2729929476b739a5c35bc02", "03bb29b15323f3270e813a8c2bf902f1fdeca4b4", "0ef1dd03db41de69165075562a051021a186c230", "689daac32ba52ad5d72178fd4d5e093fb9501132", "0b56c5c990051e879d341671d85408fbf519c7c8", "abc86cef65f4a7c5f81573a04f29c0eaeefa4d77", "022e936d46bf435f73faf9ca03a5a150eb90ce9b", "c894ebf70be4bc795f3af12577d6f9d6084d7ca1", "57a1e514277feafee1df954e8fad0d3e9f1baee6", "4b8fbe5e18af87ce47b728bf7b4e644c9de0c95e", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "55785c558751e5db4281ed45d6ceb6260f1d3fff", "4a520c1818fc7ca560331234f6bee68d4d8bd302" ], "paperAbstract": "Stream processing is emerging to react to the changing business situations of real-time processing. The main aim of this paradigm is to deal with the huge volume of data in the format of information flows originating from distributed devices. This consequently poses challenges to the scheduling problem in cloud data centers regarding the time-varying velocity of data ingesting and processing. In response to the uncertainties and complexities of streaming data, we propose a model-based scheduling scheme for stream processing systems, capturing the system behavior and providing an optimal allocation strategy to adapt to the changing work conditions. The proposed scheduling policy is implemented in Apache Storm, and micro-benchmarks with various shapes (e.g line, star, and diamond) were used in the evaluation. A topology that tracks trending topics on Twitter is also used, where the input is feeding with tweets in real-time. Experimental results show that the proposed solution can perform estimations that are well aligned with the system performance. The proposed scheduling policy achieves an improved performance with regards throughput and latency under varying ingesting rates.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.28" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9c258f92a51c1523829c2c8834c7c4a9c573648", "sources": [ "DBLP" ], "title": "Model-Based Scheduling for Stream Processing Systems", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "c9ca1b574aab91067b6fbf8ecc5a93239d6dced5": { "authors": [ { "ids": [ "40618723" ], "name": "Dominik Mautz" }, { "ids": [ "1725395" ], "name": "Wei Ye" }, { "ids": [ "1794612" ], "name": "Claudia Plant" }, { "ids": [ "1690834" ], "name": "Christian B\u00f6hm" } ], "doi": "10.1145/3097983.3097989", "doiUrl": "https://doi.org/10.1145/3097983.3097989", "entities": [ "Algorithm", "Cluster analysis", "Dimensionality reduction", "K-means clustering" ], "id": "c9ca1b574aab91067b6fbf8ecc5a93239d6dced5", "inCitations": [], "journalName": "", "journalPages": "365-373", "journalVolume": "", "outCitations": [ "cd800900fbd9bc89a35ff597a7ddfe74aceefa55", "018d686bc93bf0c1216f1a7f8f16a2246b477829", "2ad064e72abb2cdd97178817aa21a0ff2909f014", "1ad15c08556c8f8e3739703857ea01077ce738c5", "226f54d7f33de17a9cc77803af563a7a258c8639", "0df2b754298a40ddd26351a155ea8c66b7f66513", "10e1e88f9c137d8e350bfc6c9f60242a8f3d22b4", "36513f869e5ba2928369014244dff998ab93728c", "02d843e3a008e76cf6a4c23bd01023d264b05686", "25d04eb4f4cb9d9f71ea30e832aeb05fff40894f", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "0ad612841290e6e197996f450cd0ef5fd7cb29c7", "cbb21d412be7ef87366bce6c5f75d74d6cf08658", "3fdb0e91027e0d3cfc39220db021b838f68b90e8", "badb2fb3c8792d5b70aa27ae1ae231208ba4253f", "286027ba1b661b4d8856b42f2eefb95fb5b94519", "0795497b74a828acc6fa9585ee00438d2c7ffdda", "548c994134aab09054ea74fa0609acf8fb411f6b", "6b3471ffbefe4d8ef53be9c1c131f02f81d72e44", "34129e89eca51bc2916e8e91cb4b81d01edb6521", "9b8d8f2fb88e03f8f3ad01efbfef52718b70d104", "f87cb262798e9d682391419c79eb444d3443ba7b", "8e158fe4fed61e9a0b16944820365d0a6ebf29a8", "99b2de508b45ecd0c3aa797cb0376b64f9665d1a", "c5faa674df8fe81d17ec2537b865045c20d79990", "4cc3d8b37c72c4ffceee58f105a4e027ca130f8b" ], "paperAbstract": "Is there an optimal dimensionality reduction for k-means, revealing the prominent cluster structure hidden in the data? We propose SUBKMEANS, which extends the classic k-means algorithm. The goal of this algorithm is twofold: find a sufficient k-means-style clustering partition and transform the clusters onto a common subspace, which is optimal for the cluster structure. Our solution is able to pursue these two goals simultaneously. The dimensionality of this subspace is found automatically and therefore the algorithm comes without the burden of additional parameters. At the same time this subspace helps to mitigate the curse of dimensionality. The SUBKMEANS optimization algorithm is intriguingly simple and efficient. It is easy to implement and can readily be adopted to the current situation. Furthermore, it is compatible to many existing extensions and improvements of k-means.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3097989" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9ca1b574aab91067b6fbf8ecc5a93239d6dced5", "sources": [ "DBLP" ], "title": "Towards an Optimal Subspace for K-Means", "venue": "KDD", "year": 2017 }, "c9cc11f39638b9bcfead0192cb8e3189d3a00d6b": { "authors": [ { "ids": [ "3031038" ], "name": "Sridutt Bhalachandra" }, { "ids": [ "2119657" ], "name": "Allan Porterfield" }, { "ids": [ "2216287" ], "name": "Stephen Olivier" }, { "ids": [ "1805266" ], "name": "Jan Prins" } ], "doi": "10.1109/IPDPS.2017.114", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.114", "entities": [ "Best, worst and average case", "Computation", "Dynamic voltage scaling", "Embedded system", "Frequency scaling", "Haswell (microarchitecture)", "Modulation", "Run time (program lifecycle phase)", "Runtime system", "Speedup", "Supercomputer" ], "id": "c9cc11f39638b9bcfead0192cb8e3189d3a00d6b", "inCitations": [ "3aab1a1d15b86d8ea51467e6f1c829a65f30d168" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "947-956", "journalVolume": "", "outCitations": [ "dd97355244bb2e1b369be0b2617e8452710ca44b", "b76269bf962989ce271bef7ea863ff4adf9c9de6", "9b23b2479ac1893873e1f84426c4f3722e0b356d", "9c099f5eb32baf1504e781e902952816cdf693e1", "efee61acb1847de685817b7d9bc1b6b095ef5026", "a4cafbba0b5ca0df355ee1a63dbbb4a2b1ab9815", "ba3f47455daea57ecc1f69491e1145384e059f12", "22070b62d181dc106f66b2d5d862704bf51dd8a8", "1e8233a8c8271c3278f1b84bed368145c0034a35", "377175d109126aea51714e8ef0e4324d28eb6fcc", "d27097fe656084843aceef8d30b978ee68a8ba28", "3875d86a9dc765b5ae2e66dc46d0da58ded6d75c", "81c4e99059104b00adc14f6797758aff998c066d", "7dea9924969f6270c47527d9e21f46d1f01cf582", "244030cb8e73144251ef3701ac758168031d17f9", "2c79b0dcb3f82897d9f0628d2d5128456f277e00", "95948a521a1c87cba1ff15365f87abc0f115cde4", "031c7edc39fb9943fd7d825ca28a8a5f8c73ebc9", "cafc42e34406fd2e717af613b55e5e651f0240b1", "f2a00ca6c4cfb9e879c4240d43addaefba750b11", "5b89e01e94782e8338074d00f0b70c14b7f676e4", "9a23e531e95799d8161271df9edd63f149eee838", "346ee93e610a95c60394900f857d398bc2ae74df", "395231738b21878d3f8f55ad18ea828eee587c16", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "053d396415240493536e6003d789dd9c9376033d", "07736bb61274b9c2a0920010f3fff9919533aee6", "51afdc23a72d19f7e6b1d46d25f7c7bb1814e85b", "c214415231ae287d4ea21660ea64a904b9d112d5" ], "paperAbstract": "Energy efficiency in high performance computing (HPC) will be critical to limit operating costs and carbon footprints in future supercomputing centers. Energy efficiency of a computation can be improved by reducing time to completion without a substantial increase in power drawn or by reducing power with a little increase in time to completion. We present an Adaptive Core-specific Runtime (ACR) that dynamically adapts core frequencies to workload characteristics, and show examples of both reductions in power and improvement in the average performance. This improvement in energy efficiency is obtained without changes to the application. The adaptation policy embedded in the runtime uses existing core-specific power controls like software-controlled clock modulation and per-core Dynamic Voltage Frequency Scaling (DVFS) introduced in Intel Haswell. Experiments on six standard MPI benchmarks and a real world application show an overall 20% improvement in energy efficiency with less than 1% increase in execution time on 32 nodes (1024 cores) using per-core DVFS. An improvement in energy efficiency of up to 42% is obtained with the real world application ParaDis through a combination of speedup and power reduction. For one configuration, ParaDis achieves an average speedup of 11%, while the power is lowered by about 31%. The average improvement in the performance seen is a direct result of the reduction in run-to-run variation and running at turbo frequencies.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.114" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9cc11f39638b9bcfead0192cb8e3189d3a00d6b", "sources": [ "DBLP" ], "title": "An Adaptive Core-Specific Runtime for Energy Efficiency", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "c9e693ea37c42de06512376a35abdb7bf1fb3fb5": { "authors": [ { "ids": [ "9582313" ], "name": "Shubin Su" }, { "ids": [ "13652536" ], "name": "Limin Xiao" }, { "ids": [ "35373648" ], "name": "Zhoujie Zhang" }, { "ids": [ "39802579" ], "name": "Fei Gu" }, { "ids": [ "1728541" ], "name": "Li Ruan" }, { "ids": [ "2369464" ], "name": "Shupan Li" }, { "ids": [ "2517257" ], "name": "Zhenxue He" }, { "ids": [ "3204038" ], "name": "Zhisheng Huo" }, { "ids": [ "35416554" ], "name": "Baicheng Yan" }, { "ids": [ "36207234" ], "name": "Haitao Wang" }, { "ids": [ "2446296" ], "name": "Shaobo Liu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.60", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.60", "entities": [ "Anomaly detection", "Ecosystem", "Experiment", "Local outlier factor", "Time complexity" ], "id": "c9e693ea37c42de06512376a35abdb7bf1fb3fb5", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "458-465", "journalVolume": "", "outCitations": [ "0e01f193fe65fc222a41654486ff219e9cafc297", "36306a48c0098f6aafb3a20504e650b448689961", "470c4d26b155991637506023587306b13727cb97", "3b2a5a206752e03440f22a1132aec6c307e16f9e", "46363cf3eead8fb869c38f896762f3552df9cdd1", "6a2b5f45130337fe44b86bd611371ab0bb40f4bb", "23cdb084e4e667bcc575914dfb02db4eaa6602dd", "3942f3a1f64a100f5e2d944136d1ad8b5ed8ec6d", "741abfcb621d9aa19aa79e4aa5c5cddb1473b3da", "00b5a6a84c8d516c750706dde9330d33f56e1059", "1ffcb27536ab5436e6d753919ab27ac1a44b4b69", "2b90c939bca9e1831504fbd3c8a2ec3e19508106", "be52fc40ccb7361277b173c5078a23c3f37d87f6", "b83a6c77e61a38ada308992cc579c8cd49ee16f4", "54830db850beb382eddbc3f765316c2f2ed48329", "a2729b6ca8d24bb806c168528eb81de950871446", "5fb9176fa0213c47d1da2439a6fc58d9cac218b6", "86ab8b00ba945cf57ebfc567ff7668de94f31f8e", "be49dbac8e395dba3e8f918924ffe4a55dec34ca", "30d310bf7d38e7eb2a47ddaa5c9ec27418d465ac", "45b764d927b39261d4ea008bc183b4a6c0265688", "d491c72aa7958f09503bd470f55bfcd861be2efc", "5d1596adeac9a058462aa70016204b3dc1f19d93", "154df96b95e8b9635771442244fe48b125933bb1", "501cf08625ede44176a12ce5a6b79ae7670aad81", "4820b9b840382ed7f9609359560a2e18d28119fe" ], "paperAbstract": "Since the Local Outlier Factor (LOF) was first proposed, there is a large family of approaches that is derived from it. For the reason that the existing local outliers detection approaches only focus on the extent of overall separation between an object and its neighbors, and do not pay attention to the degree of dispersion between them, the precision of these approaches will be seriously affected in the scattered data sets for outlier detection. In this paper, we redefine the local outliers by combining the degree of dispersion of the object and its neighbors, and propose a new local outlier detection approach (N2DLOF). Compared to conventional approaches, the outliers obtained by N2DLOF are more sensitive to the degree of anomaly of the scattered data sets. Experiments show that our approach has a significant improvement on outlier detection precision in the case of scattered datasets with similar time complexity. In short, we extend the ecosystem of the local outlier detection approaches from a new perspective.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.60" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/c9e693ea37c42de06512376a35abdb7bf1fb3fb5", "sources": [ "DBLP" ], "title": "N2DLOF: A New Local Density-Based Outlier Detection Approach for Scattered Data", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "ca28ecbc338551c179072e0c10d96c876e520e04": { "authors": [ { "ids": [ "2216954" ], "name": "Jung Hyun Kim" }, { "ids": [ "2552158" ], "name": "Mao-Lin Li" }, { "ids": [ "1720972" ], "name": "K. Sel\u00e7uk Candan" }, { "ids": [ "1682380" ], "name": "Maria Luisa Sapino" } ], "doi": "10.1145/3077136.3080794", "doiUrl": "https://doi.org/10.1145/3077136.3080794", "entities": [ "Algorithm", "Approximation", "Data structure", "Graph embedding", "PageRank", "Personalization", "Possible world", "Run time (program lifecycle phase)", "Social network" ], "id": "ca28ecbc338551c179072e0c10d96c876e520e04", "inCitations": [], "journalName": "", "journalPages": "525-534", "journalVolume": "", "outCitations": [ "22c74d8be071084ce8812af19548e7bf2bf0c8b6", "1dd8db60043f51c04eb7200915ebd253d2fabf64", "18b86865ddb4d704d699da99e9f2d0881c4bd700", "2e8dec3b97ca60b9bfe3d469f6c882fdd7d7b084", "435add102af7176a8b55748ae4618ed4f1c58eea", "6eccbba04f448fa5bc93ed94bc63bb03d36e114c", "09b4276e4c1bd3621de3830b8ee1ebfc4876136e", "f5cd576fd7783c164a2bb895d2f9f9b92c84d471", "3e1e5a5edd5858d906b49363984a3e3659fb9478", "f4056cd39c477eae6dbada979af3acc029663570", "1403e1afa5270484b91c6d44e555b23bbb848ec8", "a45f7fcebaa1ec6003f805f50668a0ae452dc725", "880a06701e2b65802b577b676614255826171cbe", "cfee92792612d534f2c1cb375e840ef0c85a532a", "2ecb074cea812962a8571b50b7ab3fbba2b731fd", "b97177ff9f88a4c8f40859651c9ef639a1f1f850", "a7a1619eb979fda9053326b1e48758eaf4cc6b87", "3f46589f70ff516c4606951669fc64ffb0e08e0e", "0a888fadf7f8ac8eb0f194f7979e421ce072bcc4", "009dbf3187862352aac542bf7d61e27bce6b27f5", "5792d825d2830133e42f7c0246fd23ebbd6ea023", "cd963ea119a79b898b3eaf0ee496bf38c8d3797f", "d56064a50c53ff19204fe8eb674852e4315fa91e", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc" ], "paperAbstract": "Measures of node ranking, such as personalized PageRank, are utilized in many web and social-network based prediction and recommendation applications. Despite their effectiveness when the underlying graph is certain, however, these measures become difficult to apply in the presence of uncertainties, as they are not designed for graphs that include uncertain information, such as edges that mutually exclude each other. While there are several ways to naively extend existing techniques (such as trying to encode uncertainties as edge weights or computing all possible scenarios), as we discuss in this paper, these either lead to large degrees of errors or are very expensive to compute, as the number of possible worlds can grow exponentially with the amount of uncertainty. To tackle with this challenge, in this paper, we propose an efficient Uncertain Personalized PageRank (UPPR) algorithm to approximately compute personalized PageRank values on an uncertain graph with edge uncertainties. UPPR avoids enumeration of all possible worlds, yet it is able to achieve comparable accuracy by carefully encoding edge uncertainties in a data structure that leads to fast approximations. Experimental results show that UPPR is very efficient in terms of execution time and its accuracy is comparable or better than more costly alternatives.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080794" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ca28ecbc338551c179072e0c10d96c876e520e04", "sources": [ "DBLP" ], "title": "Personalized PageRank in Uncertain Graphs with Mutually Exclusive Edges", "venue": "SIGIR", "year": 2017 }, "ca4564d556b03eeee755fad7a89475072424ea56": { "authors": [ { "ids": [ "1681638" ], "name": "Hao Luo" }, { "ids": [ "40205336" ], "name": "Pengcheng Li" }, { "ids": [ "1716493" ], "name": "Chen Ding" } ], "doi": "10.1145/3018743.3018759", "doiUrl": "https://doi.org/10.1145/3018743.3018759", "entities": [ "Algorithm", "Central processing unit", "Multi-core processor", "OpenMP", "Parsec (parser)", "Program optimization", "Thread (computing)", "Time complexity", "Working set" ], "id": "ca4564d556b03eeee755fad7a89475072424ea56", "inCitations": [ "59ac30cfe1c9b7309b8698af9a4cac7541dffd4a", "970364730c038572ee9b7b69eef86d97a5488a2d", "d689969f143bbe74b0228b08df3f1a8215687148" ], "journalName": "", "journalPages": "103-115", "journalVolume": "", "outCitations": [ "6918681cd06dce0eb7fc15cbd8cf11ecf8322a94", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "226e0f47d9f05fcde2f2da2d24964090825dcb3b", "60b85b7ee655397a4d2202f9cdf6dd5e3f04f6fd", "2e8ab636f544007408884dc6fafafdb00a4cd62a", "32f6ded4e88667f34fe49a0ee80d9a9093b00547", "2ce5173c67380834849195ce468c11389f9f707b", "0563ad22510edae664f9c04386ad91ec57eb7786", "635210aa01bd460f5dad80c5fffef8a0dfb4993e", "a18e4aeca19e7044c005ad1df2d6cc668cf4282d", "35b1b5a69d7882053aa35e7463ceb903733a2cce", "68fc2f07326a0fafbda163d7e18d827038f6718d", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "0653e2ed9f683868cb4539eb8718551242834f6b", "11ebb411b138d2acdd481a6920b822fbc213cdc0", "3b903a39571836bcf080891eb79afad2fe0fb1f2", "83bbec4d4f56b5631f48607b7b6c75a505a8b448", "2f1993d2c6c82e6a794adf19d1b5cec9fc593602", "214539c3dc1da3eefa9cdc65079b8dea72afb07c", "47a1f146113138fa8591d5ddbde919e39c9cbbf7", "8809cf4898fb0d51d533e708411f3f1856fa99fb", "792475587d4aa1229f74295a7eca07ce21ad4330", "3767eaaa277690249aa145999cb8ccaead014c60", "57d3035be09a0703d503da8af082b128af3dfdf6", "38628d26d4f624378f4303b61ae93c5d34d007c3", "2f42558a0b49b56ef706e8435eeb7bb480f58aad", "f8aa33900f552f8112d6186d78bc845d2dfc0007", "2892b18872a42fbc6173e76263ac3e9251e2a334", "c4880d202299a883631d60ce472be436edeb0ee1", "27c7af8567915cd8f8a706357392bd1a20b3b8cf", "09056fc21d0ea8a7e24307625638589f91b42026", "0c65d05478483a294701d38c98e111d8a4b033f5", "0332013fc380ca283d3afc457c430c513d19cc51", "10ba5bd8732e8460e2876c6132129aa5f9c9b337", "685822d0bc60f288b9cd774ecd4d505b0311c3d0", "0b43a722d2ca43752750e4976f3056a006990143", "14fc1e949342af622c1a4991f0fa291170261305", "31b61e9fb535c2546087fc6afe9a5a68960f9cbb", "4f1cf2a9244816dc9ea7be304b85a45b7e0941a4", "8671317d25f917af263b457612f959823d5c86b1", "d4d2f647b8cac0cc288aa74721b8a77dda11f80d", "23a9e1f8cefc76b71f0cf5e1ccf5a6485c19cadf", "2819b4f9ccae4ba8b9b7b9cf6b81081c41d4adc4", "0c0ff71e1f225312bd24a2d78153f0b3f3816285", "0180ce1c6e98fd66362b46bd945c503bf7372aa8", "54f3331b575b2d451c2d716f86496cada23d596d", "2fd637ff36c131ad82b2fcf0b1723196ea0ce05c", "47ccfd0c9dc218f5496783310a28c581730b9ca7", "e1e71f88d56c68183bdd49d0ac1b3185cbd51eec", "93a6a32f1bbf1913e9e2232132ec4fa7a75ab152", "00d40e74ccffef2ba3e4477f48b6265dbc5e9c1f", "5b56f9508b83ae02121ef3ae9afb0dbbe7c84200", "19fd1e65138021f9406ee119bba81af564dc5edd" ], "paperAbstract": "On modern multi-core processors, independent workloads often interfere with each other by competing for shared cache space. However, for multi-threaded workloads, where a single copy of data can be accessed by multiple threads, the threads can cooperatively share cache. Because data sharing consolidates the collective working set of threads, the effective size of shared cache becomes larger than it would have been when data are not shared. This paper presents a new theory of data sharing. It includes (1) a new metric called the shared footprint to mathematically compute the amount of data shared by any group of threads in any size cache, and (2) a linear-time algorithm to measure shared footprint by scanning the memory trace of a multi-threaded program. The paper presents the practical implementation and evaluates the new theory using 14 PARSEC and SPEC OMP benchmarks, including an example use of shared footprint in program optimization.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018759" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ca4564d556b03eeee755fad7a89475072424ea56", "sources": [ "DBLP" ], "title": "Thread Data Sharing in Cache: Theory and Measurement", "venue": "PPOPP", "year": 2017 }, "ca616f7fb2bf65e41e96532a44997ff8e5f4741e": { "authors": [ { "ids": [ "2764399" ], "name": "Stefano Iannucci" }, { "ids": [ "2026840" ], "name": "Hisham A. Kholidy" }, { "ids": [ "26425345" ], "name": "Amrita Dhakal Ghimire" }, { "ids": [ "2322206" ], "name": "Rui Jia" }, { "ids": [ "1913953" ], "name": "Sherif Abdelwahed" }, { "ids": [ "3343395" ], "name": "Ioana Banicescu" } ], "doi": "10.1109/CLUSTER.2017.54", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.54", "entities": [ "Algorithm", "Benchmark (computing)", "Big data", "Graph (abstract data type)", "Intrusion detection system", "Network traffic control", "Next-generation network", "Scalability", "Synthetic data", "Veracity" ], "id": "ca616f7fb2bf65e41e96532a44997ff8e5f4741e", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "278-289", "journalVolume": "", "outCitations": [ "05d6e0185bcb48d396fe778ceedb2078e37e72ef", "1a48cfe2da6f5f0b79b332525e9df699c07a2ec4", "67d0ef219bb1dfd2deeb005cf328f5281f5a92a9", "0202d62899140f1c358ead59e92caabd4b6994d1", "38c978de58ef96b4ad66bc5ad81a01fa16fc306d", "41e8c5603c0429c5caf460b3ecf7262c0c29b270", "74ab1f58c81889deea75f87da74e3c62911ceda6", "8a2041e7c6a28fde0b773a3010ee4c58cacc5572", "5becbe1a43145a46160903a5f966570e342c68f0", "4cdcbd877edd74d96f3c900e7c60a579a140b782", "1d57d5b9832f936e6482ab4e9950e2a862262fe9", "64da5b530d72699adcb295a13064842595e22cf8", "37a50f1591d382869a3c66f66c0a5d0ae95daad9", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "84bb64f5b935818163c668dcb5025214e7334a73", "2a005868b79511cf8c924cd5990e2497527a0527", "92163d1bee0f7a2e10f8abefcdf2cf6b520cf836", "0cb9928daa3b90ac6959d9fb863c5f8ad6422dde", "3315793b73a971dce255eb634343c16606c62b76", "52ff64f7f26b28447af255fedeb2216a70b48d66", "f975b3a59d95b9e5d00134d52fde7bdad119f958", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "141e35263ab810983c90d47ad62eb4fab5e51717", "0fb0781cac8b3caac879f19d53cce72bd3de2397", "c3cf0ab1a6d4a47dce389bfe42ad5cfc685946aa", "3aed29136db8f1e5c6a89fc22d3ae4b4926a3555", "39e7bd27ae9c20e597f522bc358f6feadc7e382e", "0f3b35cecd0ea8cbbb5a8508924880a40c117df2", "025e224cf0f12f772c7efba4f7c6b769a2bf298b", "0d2c4723e9e5925cde74bd879611fda6f6e3980b", "4653782d8416577d6f8f47cc4d4c7ea424536f40", "0041adeb171b36f7ef568b6934ea3f4263163c05", "9ee76efb171dbc1264ab4b22933e3deedfd7fde8", "cd642576ce8502b533e229b537f9ffbe9254aef6", "996263c3ddbb50f0198354827445abd214f83030", "1d6320a672b866444737880cee8a980f5cca6864", "1f0612de1f191abadf250b78cd78f884203cca5e", "0371f9e3efbcd4829b5ffbff585155746ef05284", "4cee38d9d088cf021bc5f5b9fda6764feeb1806a", "17ca3ac5ce530d6a5b5c70ca54e93362e34b9b82", "31f27864950a6c417cf996927b2d5558f70d2b14", "6b1146e51b2c4d5b8433d4d9c6dbf87c6c484196", "0112891050537d4f587529c396c8b9855796d182", "eb82d3035849cd23578096462ba419b53198a556", "4755952f4c96d22407a62a5937478d0979e4840b", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "f4e959d8b5c09739931a2d9e4a9f27ddd1f31d60", "4ea7f0e7a13b555ee3adc404de38e5d5ae5c8a67", "9dbae30f8253791138e6c1031c5b7e4c7b321185", "6f5c6297f9c7dccddac313c8344061cfd12509f7", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "f2f7774c9f56ad18e2bda3f1a62200e0b5e63b65", "a5aad5abb32f6b15f31b92312bb3b0f7b6470977", "dbc0e5e5cda117a89989937fc3406b7d3f1ea9b6", "01c9330af094b7fba0f2d355d043270176b97614", "7578f6ce96bcf3215928c2ec5d8666e6baa9c8d9", "846fcf30dc75f04886092891e754791e9704f69f" ], "paperAbstract": "Property-graphs are becoming popular for Intrusion Detection Systems (IDSs) because they allow to leverage distributed graph processing platforms in order to identify malicious network traffic patterns. However, a benchmark for studying their performance when operating on big data has not yet been reported. In general, benchmarking a system involves the execution of workloads on datasets, where both of them must be representative of the application of interest. However, few datasets containing real network traffic are openly available due to privacy concerns, which in turn could limit the scope and results of the benchmark. In this work, we build two synthetic data generators for benchmarking next generation IDSs by introducing the support for property-graphs in two well-known graph generation algorithms: Barabási-Albert and Kronecker. We run an extensive experimental evaluation using a publicly available dataset as seed for the data generation, and we show that the proposed approach is able to generate synthetic datasets with high veracity, while also exhibiting linear performance scalability.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ca616f7fb2bf65e41e96532a44997ff8e5f4741e", "sources": [ "DBLP" ], "title": "A Comparison of Graph-Based Synthetic Data Generators for Benchmarking Next-Generation Intrusion Detection Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "caead4b535884d058aaff54b05d863dd0eb3dcbc": { "authors": [ { "ids": [ "39765778" ], "name": "Adam D. Smith" }, { "ids": [ "2623159" ], "name": "Abhradeep Thakurta" }, { "ids": [ "33447255" ], "name": "Jalaj Upadhyay" } ], "doi": "10.1109/SP.2017.35", "doiUrl": "https://doi.org/10.1109/SP.2017.35", "entities": [ "Aggregate data", "Algorithm", "Convex function", "Convex optimization", "Forth", "Interactivity", "Logistic regression", "Privacy", "Program optimization", "Randomized algorithm", "Server (computing)", "Support vector machine" ], "id": "caead4b535884d058aaff54b05d863dd0eb3dcbc", "inCitations": [ "952ae7bfc971e136137908a8c5d2697682a42ebd", "89b7ada3286ac9f8b3331d3ec457689fea61e11e", "41ccc1520327650880decc6aae46ee8211e15dce", "7b19cf033827bd3dc7eaafd906d16cbfb5f08009", "61cc940f7d5726861f033161036dd0a87a6a1a36", "b556fbc91af34f648dce200934f365064332ee56" ], "journalName": "2017 IEEE Symposium on Security and Privacy (SP)", "journalPages": "58-77", "journalVolume": "", "outCitations": [ "65926b61d0308954bd6cc4f6cbe46eef64147635", "ad2bf130fe8091307f815b9c6aee6b117a756cd0", "3671338dc8c84d51b285bee79f85e7f3937a5078", "54beb99ff242f1fbac137d10a4356e51455d4c37", "3a53c7a380ff166e7f4093d67a8ce77f06d2aa8b", "b532099ff8b67049f292cd62700dca37fc2be623", "2173406c4ca5fff0de66e8cbed4cb01ca959cb31", "0c9ffe6bfabf2c1cb013855d913b6089c4918966", "1520e9131e6d543be0449549a5f17c07740bdf6b", "034788876d741c6a6b54b4d37a037d1768b1e255", "49934d08d42ed9e279a82cbad2086377443c8a75", "0b09bb2e76738d60ecf1005e4a6a4815f51089b1", "0db4d7a384c7feb5832ff3563c24cb0f6140e0ef", "03b01daccd140e7d65358f31f8c4472d18573a5a", "2467eb946986fddbaa24ed8a06d6397950fb53a9", "17fac85921a6538161b30665f55991f7c7e0f940", "040d9acab9003b9d50b2291cc6844b66b2a85d12", "8e19f59156276a825cc4368d74473a0d39247241", "1b35d5ece19a77bad6bb50bbd83d16270d0d0d3f", "00c0815f4f18801342710436c394ba84b721e5ad", "88caa4a21da7d31a63d7cc7e7de1897ba2c211b8", "2672ba89286367fe312f167d85a75b3fbe64b2ab", "009d284fe935b5f421d24321073097a0cd34e21f", "4912c18161cf35a066a9e70b4e4ef45ff9d19035", "1621f05894ad5fd6a8fcb8827a8c7aca36c81775", "075f328ef87a076151feb4d5b1f97b66aa597a90", "691d77596c24d1c100a958c444eeb0ce451c302b", "64028c85cd7b7e42f208e29734028572d7735c61", "c35842884e2edc14c1c6535afd2ea28fa921484b", "1b2c76017cc6fb1fcaf01ba874e322099ee22d8f", "aeb4dd9433d19c257cda7229ccd2a20a7568ae9c", "6154ce8c02375184f7928e41c4fae532500f7175", "6227544195ed3cb30e411b31507e330ac2397398", "336237fd41c22a697fb7cab88679ac0ea0b3fa52", "547ab2443a7c9d89b044e75df62e95098a7c3da7", "4b0520b544c090e6140e6031a070c2b0166b65fe", "3bedaf6996ae29dc0c9ee9e3f309b49d81cdc1c3", "1a09a893678a6ff386fbf91d00155dad316fa05a" ], "paperAbstract": "Recent large-scale deployments of differentially private algorithms employ the local model for privacy (sometimes called PRAM or randomized response), where data are randomized on each individual's device before being sent to a server that computes approximate, aggregate statistics. The server need not be trusted for privacy, leaving data control in users' hands. For an important class of convex optimization problems (including logistic regression, support vector machines, and the Euclidean median), the best known locally differentially-private algorithms are highly interactive, requiring as many rounds of back and forth as there are users in the protocol. We ask: how much interaction is necessary to optimize convex functions in the local DP model? Existing lower bounds either do not apply to convex optimization, or say nothing about interaction. We provide new algorithms which are either noninteractive or use relatively few rounds of interaction. We also show lower bounds on the accuracy of an important class of noninteractive algorithms, suggesting a separation between what is possible with and without interaction.", "pdfUrls": [ "https://doi.org/10.1109/SP.2017.35", "https://www.ieee-security.org/TC/SP2017/papers/373.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/caead4b535884d058aaff54b05d863dd0eb3dcbc", "sources": [ "DBLP" ], "title": "Is Interaction Necessary for Distributed Private Learning?", "venue": "2017 IEEE Symposium on Security and Privacy (SP)", "year": 2017 }, "cb0da1ed189087c9ba716cc5c99c75b52430ec06": { "authors": [ { "ids": [ "7895033" ], "name": "Yutao Liu" }, { "ids": [ "31405940" ], "name": "Peitao Shi" }, { "ids": [ "2866634" ], "name": "Xinran Wang" }, { "ids": [ "1716528" ], "name": "Haibo Chen" }, { "ids": [ "7274044" ], "name": "Binyu Zang" }, { "ids": [ "7203366" ], "name": "Haibing Guan" } ], "doi": "10.1109/HPCA.2017.18", "doiUrl": "https://doi.org/10.1109/HPCA.2017.18", "entities": [ "Context-free grammar", "Control flow", "Control flow graph", "Debugging", "Executable", "Fast path", "Information Processes and Technology", "Instrumentation (computer programming)", "Library", "Library (computing)", "On the fly", "Skylake (microarchitecture)", "Tracing (software)" ], "id": "cb0da1ed189087c9ba716cc5c99c75b52430ec06", "inCitations": [ "7bab43eb94430943a6883acced0453a98e1e62c6", "65ea39f3cb19e446d708b639060460c580a328e4", "325390173841d52f7a2791ba6b0e32ad80bf2630", "c042d909ad8956dbdb1f5b5319d0e9360d964ccd", "4f4590962bde0c2050122f91e5978271bb24d556", "b4b92eb555dd9c672f894216c5d50bf6164df78b" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "529-540", "journalVolume": "", "outCitations": [ "cba87856ab1fee9161fc21afb64f741acf2c4bf9", "339141d89863fef3fcead17f4447c68ac1ea7e6c", "23e8236644775fd5d8ff5536ba06b960e19f904b", "3719236dda3923c8e93e2c9a88532ef494f76462", "25a8afdfae0d607fa991e84c20f21646bd779fe3", "255bdcb05805c97d973081b59bc61c649263ceae", "704e2027ecdaa9561b75a854b585336c16cea89f", "07f9aa35346107785ad6b6cd07537b24fa7bf0d2", "08c3e50a2913da51ed3cdafdcfdfb488e8fa83c3", "0ed6a4c8fa2c74ca2e470df6e205797371523edc", "4b9426a328fb4e379e2fab57fc72a91c5618ae94", "686150e2179840ed40a0166cba6c5d507f3aa49c", "77cab38bef9d1b8b7bcfd3bb382c9d55541465e1", "28aaf833b5dd49279ae42f517b774e2bfd1a778d", "0e039df712774fcea67f214d9b5780c1dc250747", "157153c5c5b3fb42bd4de60d2cd8380e7038a82b", "17f4d0f4150255ce9e27ee55cd73f9d3001fbf04", "642bdae15a4a3f2e580e49f9726e2eee675d5ebf", "0988a425689f6f3700e797f4a2c18f73692573c3", "6458f4c0c029b038ebd1d7f61005a010ac250892", "01b5b648af61ddb382da638a299fae2315b25192", "3e1ff9416ec1c2f94d0ddbeb6171eea4c336d663", "038343c387ed6e39c8d8eee21fee1fef8fe55f72", "4a49ea34caf1027df74125b7929e52704a8eca73", "30e76f32c323adb0ff340760380fe5a08505b641", "03f827395a17beb941241dbd72322705bdf79791", "159826aec62676cf3f24bd7b90dde2137dc92c2a", "6e40435eaf84ac3dd2c48b8e81c0dcd5f0db1a12", "6f5635fb17ff8a27b08104de6d0322c32e943a9f", "0db59f09437b7b90376f011f5150ed976ac66231", "22050b3ee9c69c64dc796358c7f0ba247d4adce3", "0c69db05df0a2c876b4ddcf292b1334f1dc0378f", "3c1b97dff8f96170f9557319e9d881286aa77c1e", "cff4ec2407bc31067d07d2ff54bdc7d75a388b43", "96ba6f5c06850c009e5b77094c0d4532744dedc2", "9b2585f7248c8b5a22e9c816506e01060213ca85", "422c2d83a959df1f7c3e99b8a2c77772d8b2e7c3", "71da01051534d46fb3becd0a7506b64db56efc7a", "969dea2db829da5a4c112945b61c899b90080dbd", "6a8f65381a627a2db6c756a7185d9106f0acefec", "360e88b37ce5a9743d363309d147f783f7c1611f", "0639adf09bea98bef72d77c3671ccd8ef32ff542", "2caf47ac0035c27019965e04b0ba2711f20d59a9", "6a5b52bbe5be23b73f3874c448de17163e09bd16", "1e37625f382709b06f72e5c3c41aba1328ff66dc" ], "paperAbstract": "Current control flow integrity (CFI) enforcement approaches either require instrumenting application executables and even shared libraries, or are unable to defend against sophisticated attacks due to relaxed security policies, or both, many of them also incur high runtime overhead. This paper observes that the main obstacle of providing transparent and strong defense against sophisticated adversaries is the lack of sufficient runtime control flow information. To this end, this paper describes FlowGuard, a lightweight, transparent CFI enforcement approach by a novel reuse of Intel Processor Trace (IPT), a recent hardware feature that efficiently captures the entire runtime control flow. The main challenge is that IPT is designed for offline performance analysis and software debugging such that decoding collected control flow traces is prohibitively slow on the fly. FlowGuard addresses this challenge by reconstructing applications' conservative control flow graphs (CFG) to be compatible with the compressed encoding format of IPT, and labeling the CFG edges with credits in the help of fuzzing-like dynamic training. At runtime, FlowGuard separates fast and slow paths such that the fast path compares the labeled CFGs with the IPT traces for fast filtering, while the slow path decodes necessary IPT traces for strong security. We have implemented and evaluated FlowGuard on a commodity Intel Skylake machine with IPT support. Evaluation results show that FlowGuard is effective in enforcing CFI for several applications, while introducing only small performance overhead. We also show that, with minor hardware extensions, the performance overhead can be further reduced.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb0da1ed189087c9ba716cc5c99c75b52430ec06", "sources": [ "DBLP" ], "title": "Transparent and Efficient CFI Enforcement with Intel Processor Trace", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "cb14f2138d6de2f459841430aa52a2b6f2d3fc90": { "authors": [ { "ids": [ "2309282" ], "name": "Morteza Ramezani" }, { "ids": [ "3031046" ], "name": "Nima Elyasi" }, { "ids": [ "2372241" ], "name": "Mohammad Arjomand" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "1743609" ], "name": "Anand Sivasubramaniam" } ], "doi": "10.1109/IISWC.2017.8167774", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167774", "entities": [ "Array data structure", "Cache (computing)", "Central processing unit", "Computer data storage", "Crossbar switch", "Dynamic random-access memory", "File system permissions", "Locality of reference", "Manycore processor", "Memory footprint", "Multi-core processor", "Paging", "Principle of locality", "Resistive random-access memory", "Scalability" ], "id": "cb14f2138d6de2f459841430aa52a2b6f2d3fc90", "inCitations": [ "6e45692cd6ff88ff38c9cf51df328c5ae0d4809a" ], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "167-176", "journalVolume": "", "outCitations": [ "8fb808a890a099896e34851179daba15659df11a", "04be73237f7956ce7351788685b8b8e5366390dd", "096ee9c89d43fd03a602aed3a37fdf43dc8e60ae", "61d16d80cd5e7f79f25785a462ee752d24e3b414", "150f874fe2c2a34b0b39270ec92e160b38145dcf", "2e9d33cba9f547a2e3febe088bae443f1d74d594", "768e6587de8c7d9c87520263e0ea28befd18f4ca", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "e16be0c501c9b7ba13f2471b5d28d80d19ee1ef1", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "29af49e9edbf480fc3abcacc5010817fdecbbfc4", "2cc69da629e857dbd7facbcf808a64b10e9db9a7", "082573e4dc88f38628242d193c966725ab355026", "e8ce82c11cc9e54ef80cd704ff5b1d9d5fc6f5b0", "228c96635a9087214bd4c16a0a78fa1a1ee830e2", "6cabaaacd2fec7509be155b45c8e4e9352a7f442", "742c789544b60c49fc8b0f7b7f87a9dae8d949d2", "1bb29cdeab20f4f5d739aacbb403e3751ca15f3b", "1820a34042d6371a9e20484b0c63b698eb522a6c", "3ac3cf82ff8f43db2e467f792d22d57d068f7ee0", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1", "df1ed68dba0407cf2d93736af8cfd2dc5cf86918", "3dff880fa95a4f5882a09f4e272fe62ddc5f46e0", "109f26c285d48ba8f7b5e259364fecef0b3273f6", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "37146652e00f0dcc23eb7006eac5fdb08baa2004", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "94bbbb62c512bf29e08e6481907e68c8574df087", "10146dfeffa2be4267525ae74fda5c644205f497", "53356bd1d40e9c9aeaeb352f0f74ad83bb1650eb", "5ee89012eea645b3eeac5e60cebadf7df42ac382", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "37a3f5ad45f6ba2fc01b7479fe8740bd17d3fc99", "1600c3ed12301b06a1107a68c2de84fb3582a918", "40eb2f5a97298da40838388700b097f82adff167" ], "paperAbstract": "Owing to the advantages of low standby power and high scalability, ReRAM technology is considered as a promising replacement for conventional DRAM in future manycore systems. In order to make ReRAM highly scalable, the memory array has to have a crossbar array structure, which needs a specific access mechanism for activating a row of memory when reading/writing a data block from/to it. This type of memory access would cause Sneak Current that would lead to voltage drop on the memory cells of the activated row, i.e., the cells which are far from the write drivers experience more voltage drop compared to those close to them. This results in a nonuniform access latency for the cells of the same row. To address this problem, we propose and evaluate a scheme that exploits the non-uniformity of write access pattern of the workloads. More specifically, based on our extensive characterization of write patterns to the cache lines and memory pages of 20 CPU workloads, we recognized that (i) on each main memory access, just a few cache lines of the activated row need to be updated on a write-back, and more importantly, there is a temporal and spatial locality of the writes to the cache lines; and (ii) all pages of the memory footprint of an application do not see the same write counts during the execution of the workload. Motivated by these characteristics, we then evaluate different intra-page memory block permutations in order to improve the performance of a crossbar ReRAM-based main memory. Our results collectively show that, by applying some types of intra-page memory block permutation, the access latency to a ReRAM-based main memory can be reduced up to 50% when running the SPEC CPU2006 workloads.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167774" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb14f2138d6de2f459841430aa52a2b6f2d3fc90", "sources": [ "DBLP" ], "title": "Exploring the impact of memory block permutation on performance of a crossbar ReRAM main memory", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "cb2513c7da8dcab8b737bb84e1ab95f8dab3bdd7": { "authors": [ { "ids": [ "40017608" ], "name": "Panayiotis Petrides" }, { "ids": [ "1765948" ], "name": "Pedro Trancoso" } ], "doi": "10.1145/3078468.3078482", "doiUrl": "https://doi.org/10.1145/3078468.3078482", "entities": [ "Benchmark (computing)", "CAS latency", "Central processing unit", "Manycore processor", "Memory controller", "Online and offline", "Requirement", "Run time (program lifecycle phase)", "Scheduling (computing)", "X86 virtualization" ], "id": "cb2513c7da8dcab8b737bb84e1ab95f8dab3bdd7", "inCitations": [], "journalName": "", "journalPages": "2:1-2:12", "journalVolume": "", "outCitations": [ "46742c000a65f676c00ec4e33d19d535a1c29dd7", "73569727e735c4c5628b8410cf6b971a9ef07ef5", "aea43dbf1edc7ad67a3f059e4c2d777b9ec68b06", "4a43db228a08047852a72fc5f9fb12f7f85f7f57", "0cb4b930159a456cd3ab7e253e0cab5c5b28c8c4", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "33357618f9ad31c521b07c9e2c264c92763bba31", "0e2efda23894526e869e57cb81c76de22f6a8d20", "30f04cd7dfd1623e62327e207fd92af738116fdd", "ab8d97380703ae1d1c061e4ff3758ca23648c536", "2992b8985e094c3943e29dffc550862791fae147", "3640fd02d3a62c22aaee643aaa8083a3b37325b7", "550960fc696179dde6bc387ef5209c54fc327d31", "0784356b46a1345b352ab634bda835c07ff04af2", "191c14ec67c561c6a3e3ce21c0a7e59e3afe490b", "3685a773ab05b40e22bbb73b2b5e801dfd95f747", "0d868efa67bf06b1f784d60769c082fd9a58893e", "471932379ea02f9f29172dac5c991181a749287d", "fa69471b7f69aa80cef55b791cbe52b0ed238030", "143ea5a90f525b056336e7751def1a8b1c4c27e2", "46fa3ec8f2fa7d0683ffaeeb438af76c6627823d", "3e74ae88cdaa33bf89136800258bde97ab397ec9", "02534e5b43b0ec75ba3c3c0dd8aa640b7b64827f", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "15e07d192d360652e68e38b4ba267e160f972390", "337c784ae4546fd212a87614814e0e0272166ea1", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "d8d0dfd9f20ae7e0d28baf598f3d3a2ed7ae0fef", "07ccd8f48c5067fff6f50e6654ff410195ff0a91", "0c65d05478483a294701d38c98e111d8a4b033f5", "1fcec27437d40285684aad5c68d2db076b27a195", "06545f48a6b25a3cafd76e514b2310254972888b", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1" ], "paperAbstract": "As the number of cores increases in a single chip processor, several challenges arise: wire delays, contention for out-of-chip accesses, and core heterogeneity. In order to address these issues and the applications demands, future large-scale many-core processors are expected to be organized as a collection of NUMA clusters of heterogeneous cores. In this work we propose a scheduler that takes into account the non-uniform memory latency, the heterogeneity of the cores, and the contention to the memory controller to find the best matching core for the application's memory and compute requirements. Scheduler decisions are based on an on-line classification process that determines applications requirements either as memory- or compute-bound. We evaluate our proposed scheduler on the 48-core Intel SCC using applications from SPEC CPU2006 benchmark suite. Our results show that even when all cores are busy, migrating processes to cores that match better the requirements of applications results in overall performance improvement. In particular we observed a reduction of the execution time from 15% to 36% compared to a random static scheduling policy.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078482" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb2513c7da8dcab8b737bb84e1ab95f8dab3bdd7", "sources": [ "DBLP" ], "title": "Heterogeneous- and NUMA-aware scheduling for many-core architectures", "venue": "SYSTOR", "year": 2017 }, "cb3caf2f5bcb5be7fb4ff2ca9c49cf26de8f40b9": { "authors": [ { "ids": [ "1870315" ], "name": "Alexander Marx" }, { "ids": [ "3183025" ], "name": "Jilles Vreeken" } ], "doi": "10.1109/ICDM.2017.40", "doiUrl": "https://doi.org/10.1109/ICDM.2017.40", "entities": [ "Algorithm", "Causality", "Kolmogorov complexity", "MDL (programming language)", "Minimum description length", "Synthetic data", "Time complexity" ], "id": "cb3caf2f5bcb5be7fb4ff2ca9c49cf26de8f40b9", "inCitations": [ "a2ef8302634e72933504a8eda8a8e4c69fa8128f" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "307-316", "journalVolume": "", "outCitations": [ "0a31077795f9ff45a8c51c1efe932c5fbbc7d4b2", "76208221af9567aa729f8b2ade71e1ec7c661df0", "6fcd06963112e3589615f197904232d942d18ed2", "477e46461e3442220cd505f02b59195be016c747", "0dbd6a32dea579b09905723b0b5aafba02466753", "49ad7e13e5b678d4dda3f46ff539c9c1a932c5ff", "0ac10b025991cf909b95a6b34a2f056a9beb89b4", "43095860c8a8869bcda6aa6354725fd7f0bc6896", "6bf69295da02ad329fbcfe4cfa2c32244d7f978a", "52130cda9eb477415f247c451a6cfa76b6539395", "c9af8b383731af9d5c861fde365c78c48c36ea9f", "d382b9c11e5c6a8e173fbeb442545e3be8d3e3a5", "0b80598da5893966efd6a5d1bde88a397bda9759", "9512f52337338232e1ed692748d9f069398ca545", "2c264f07f4e997126ba3740f3af384e0c3397462", "2f953d8f24e5545fa0e6f6114ce8436ed11066f1", "452e16fca17f5cfefda6005ffe4914b9c1a4268e", "4e3f8b31a60cd9377bb39ef778aad5652d39c2de", "7236c114be6f24cc65694d114614ba1a3cf5f7f9", "478e3b41718d8abcd7492a0dd4d18ae63e6709ab", "03b9cf696f119281334d8b7a71d64cd52560ca5c", "2b24f1dee5954352acf83d54c843969e5a5d87d0", "439cb4aca522b408e8d63567fc9fdf510ca0548c", "64824c69c9445d7362fc1939958337e059d085ab", "068cea17678fe06a4c9fc2c773a75b0aa225735c", "2605d92f712f0304fe17899396a5804a0265645d", "2bd1ef4089dfce5bb3be4566b33eddc2ea8add75" ], "paperAbstract": "We consider the fundamental problem of inferring the causal direction between two univariate numeric random variables X and Y from observational data. The two-variable case is especially difficult to solve since it is not possible to use standard conditional independence tests between the variables. To tackle this problem, we follow an information theoretic approach based on Kolmogorov complexity and use the Minimum Description Length (MDL) principle to provide a practical solution. In particular, we propose a compression scheme to encode local and global functional relations using MDL-based regression. We infer X causes Y in case it is shorter to describe Y as a function of X than the inverse direction. In addition, we introduce Slope, an efficient linear-time algorithm that through thorough empirical evaluation on both synthetic and real world data we show outperforms the state of the art by a wide margin.", "pdfUrls": [ "http://eda.mmci.uni-saarland.de/pubs/2017/slope-marx,vreeken.pdf", "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.40", "http://people.mpi-inf.mpg.de/~amarx/slope-icdm-collapsed.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb3caf2f5bcb5be7fb4ff2ca9c49cf26de8f40b9", "sources": [ "DBLP" ], "title": "Telling Cause from Effect Using MDL-Based Local and Global Regression", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "cb4468c20b85bc3228a32987240a78b885e5528f": { "authors": [ { "ids": [ "1816474" ], "name": "Germ\u00e1n Ceballos" }, { "ids": [ "2494078" ], "name": "Andreas Sembrant" }, { "ids": [ "3083590" ], "name": "Trevor E. Carlson" }, { "ids": [ "1780873" ], "name": "David Black-Schaffer" } ], "doi": "10.1109/IISWC.2017.8167761", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167761", "entities": [ "Graphics", "Graphics processing unit", "Rendering (computer graphics)", "System on a chip", "Systems design" ], "id": "cb4468c20b85bc3228a32987240a78b885e5528f", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "108-109", "journalVolume": "", "outCitations": [ "51eeb8d4ecd8351e23cbae473a98f8f912002911", "343ead469fff618b6f8c4adba7866caec8caca81", "afa99f5e8b7d2004d902b06baeeb80df00feda15" ], "paperAbstract": "Graphics rendering is a complex, multi-step process whose data demands typically dominate memory system design in SoCs. GPUs create images by merging many, simpler scenes for each frame. For performance, scenes are tiled into parallel tasks, each of which produces different parts of the final output. This execution model results in complex memory behavior, whose bandwidth demands, reuse and sharing characteristics depend heavily on the structure and complexity of each application, frame, scene, and task, and vary over time. To design systems that can efficiently accommodate and schedule these workloads, we need to understand their behavior and diversity. In this work, we explore the data demands of modern graphics rendering quantitatively, using an architecturally-independent analysis that identifies the different types of data sharing present in the applications.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167761" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb4468c20b85bc3228a32987240a78b885e5528f", "sources": [ "DBLP" ], "title": "Analyzing graphics workloads on tile-based GPUs", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "cb4fe9befc3f6e48335d4983537f05a000eab611": { "authors": [ { "ids": [ "1938539" ], "name": "Sarunya Pumma" }, { "ids": [ "3120064" ], "name": "Min Si" }, { "ids": [ "1688860" ], "name": "Wu-chun Feng" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.29", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.29", "entities": [ "Artificial neural network", "Computation", "Data access", "Deep learning", "Limiter", "Memory-mapped I/O", "OpenLDAP Lightning Memory-Mapped Database", "Parallel computing", "Run time (program lifecycle phase)", "Scalability", "Scheduling (computing)", "Supercomputer" ], "id": "cb4fe9befc3f6e48335d4983537f05a000eab611", "inCitations": [ "44e11af4647304e2a1875d804a9546cbf6ac7810" ], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "223-230", "journalVolume": "", "outCitations": [ "73801d5bab1dd5cc2aaaf8855e4365a1a5d0d109", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "1fcb1c5595b4518b0e8bab042f32605c367588c2", "5d90f06bb70a0a3dced62413346235c02b1aa086", "bd6fe117ca35a7ab144408be1771000feb57c7fb", "38211dc39e41273c0007889202c69f841e02248a", "91831bba9df741a3a945c4206a6d7211307e7686", "556035beb283652f1dcfeff7ae43851cd4abc85a", "2da4ab6c02d97fe47b589ddd450a5c41f2b47bb9", "a14e1d1d3eea6803ac34b904a4c619f8f686370c", "722fcc35def20cfcca3ada76c8dd7a585d6de386" ], "paperAbstract": "Deep learning systems have been growing in prominence as a way to automatically characterize objects, trends, and anomalies. Researchers have been investigating techniques to optimize such systems. An area of particular interest has been using supercomputing systems to quickly generate effective deep learning networks, a phase referred to as “training” of the deep neural network. As we scale deep learning frameworks-such as Caffe-on large-scale systems, we notice that parallelism can help improve the computation tremendously, leaving data I/O as the major bottleneck limiting the overall system scalability. In this paper, we present a detailed analysis of the performance bottlenecks of Caffe on large supercomputing systems. The analysis shows that Caffe's I/O subsystem-LMDB-relies on memory-mapped I/O, which can be highly inefficient on large-scale systems because of its interaction with the process-scheduling system and the network-based parallel filesystem. Based on this analysis, we present LMDBIO, an optimized I/O plugin for Caffe that takes into account the data access pattern in order to vastly improve I/O performance. Experimental results show that LMDBIO can improve the overall execution time of Caffe by nearly 20-fold in some cases.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.29", "http://www.mcs.anl.gov/papers/P7068-0717.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cb4fe9befc3f6e48335d4983537f05a000eab611", "sources": [ "DBLP" ], "title": "Towards Scalable Deep Learning via I/O Analysis and Optimization", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "cbb3f31671c201a9e8f3c09c30ee8d1ee2e641af": { "authors": [ { "ids": [ "2852809" ], "name": "Prasanna Giridhar" }, { "ids": [ "2019481" ], "name": "Shiguang Wang" }, { "ids": [ "1730531" ], "name": "Tarek F. Abdelzaher" }, { "ids": [ "17050197" ], "name": "Md. Tanvir Al Amin" }, { "ids": [ "1795727" ], "name": "Lance M. Kaplan" } ], "doi": "10.1109/ICAC.2017.46", "doiUrl": "https://doi.org/10.1109/ICAC.2017.46", "entities": [ "Algorithm", "Baseline (configuration management)", "Instagram", "Simulation", "Social network", "Unsupervised learning" ], "id": "cbb3f31671c201a9e8f3c09c30ee8d1ee2e641af", "inCitations": [ "fdb14e9482d70b1b56cf9277b3b445fcc08ee079", "2c2e033feb4294151d91911288197b474a2812f0", "5c8262bc29ca227a34ca345c4060a3f3cbfa874a", "ddc334306f269968451ca720b3d804e9b0911765" ], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "1-10", "journalVolume": "", "outCitations": [ "755b5fe168b1f3ed51c01841d1542a980461856c", "37c90fcd3016171e9f3ac86f7f88b390fe86ee99", "2c737653408c606062355b71f63919004a975ef6", "49b2a1b9606c0ccb95a36895760fc91b8b830266", "4f5ee452c65a273251a1641ab37ac5e69e0dc28b", "5481d3c7105a8cd42e3400286d3bc964ba89ae5b", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "84bffe8afd1c16ec2dc8eab3a6b9203f2473d6bc", "04dfa9aab950d7708c396723151fb3838b8188f5", "146b157524728525a868af9038f8461bce23f30f", "823262c42414bfaba9a0cea736e1c77c7cea7837", "6ba401b333ed4ed97c78863929533acdecd1133c", "357e59e9d782b0d9db8889285535a522e2ec097d", "01ff2b834772dfc2b8b7ba00620b65abb9444a75", "5397d68972cb8b5bfe775bc25af922f31843d50e", "9977958fabf2c3ee73bba92aa416616b35e553aa", "531fb16eb14329b5e5df9c8270c713fdf8aa8631", "007e5d3db415ac833141b5c01b18c9bc9339c52c", "70e0660ff33b75b751f8635bb39f5b3299335fb7", "7b56c1cf3623b086909e126ae653b4a8f7ebbd11" ], "paperAbstract": "This paper describes the implementation of a service to identify and geo-locate real world events that may be present as social activity signals in two different social networks. Specifically, we focus on content shared by users on Twitter and Instagram in order to design a system capable of fusing data across multiple networks. Past work has demonstrated that it is indeed possible to detect physical events using various social network platforms. However, many of these signals need corroboration in order to handle events that lack proper support within a single network. We leverage this insight to design an unsupervised approach that can correlate event signals across multiple social networks. Our algorithm can detect events and identify the location of the event occurrence. We evaluate our algorithm using both simulations and real world datasets collected using Twitter and Instagram. The results indicate that our algorithm significantly improves false positive elimination and attains high precision compared to baseline methods on real world datasets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cbb3f31671c201a9e8f3c09c30ee8d1ee2e641af", "sources": [ "DBLP" ], "title": "Social Fusion: Integrating Twitter and Instagram for Event Monitoring", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "cbb841fa252ad5223e1a170baaee78dce484e25c": { "authors": [ { "ids": [ "2814321" ], "name": "Bin Ren" }, { "ids": [ "1679176" ], "name": "Sriram Krishnamoorthy" }, { "ids": [ "3379439" ], "name": "Kunal Agrawal" }, { "ids": [ "1700486" ], "name": "Milind Kulkarni" } ], "doi": "10.1145/3018743.3018763", "doiUrl": "https://doi.org/10.1145/3018743.3018763", "entities": [ "Asymptotically optimal algorithm", "Baseline (configuration management)", "Computation", "Data parallelism", "Grams", "Iteration", "Multi-core processor", "Parallel computing", "Program transformation", "Recursion", "Scheduling (computing)", "Speedup", "Task parallelism", "Universal instantiation" ], "id": "cbb841fa252ad5223e1a170baaee78dce484e25c", "inCitations": [ "3fa62e121a18e6ad44bcce5f6e783f0dc4a0d52f", "17ded16813a7ef6e179252585a742e83f004c0fb" ], "journalName": "", "journalPages": "117-130", "journalVolume": "", "outCitations": [ "5834eb6b8072531e4a07ae5ce7e5c6c6f883e702", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "3c1c2799610b91b9ca9559678a6eeb7192c152d1", "101f10b90ce859135868668478fbde5882c87458", "410ae589668068dcc0a25b39763ff68684806433", "98bbc48dda4f68ce0edcc02b12130f21999bb2ba", "fce7fd98928ab9bf3e4e919e108c48fc1040f569", "462100939762e52953487e3da0e0c758ade282ae", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "09dbf94357b21ad14d2897282703ee99ae06a35e", "67dc83a15c020b84403f1b6b52140965f11e4588", "13a94e9847ceb7c55d38bd6567a6252f23caa406", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa" ], "paperAbstract": "Modern hardware contains parallel execution resources that are well-suited for data-parallelism-vector units-and task parallelism-multicores. However, most work on parallel scheduling focuses on one type of hardware or the other. In this work, we present a scheduling framework that allows for a unified treatment of task- and data-parallelism. Our key insight is an abstraction, task blocks, that uniformly handles data-parallel iterations and task-parallel tasks, allowing them to be scheduled on vector units or executed independently as multicores. Our framework allows us to define schedulers that can dynamically select between executing task- blocks on vector units or multicores. We show that these schedulers are asymptotically optimal, and deliver the maximum amount of parallelism available in computation trees. To evaluate our schedulers, we develop program transformations that can convert mixed data- and task-parallel pro- grams into task block-based programs. Using a prototype instantiation of our scheduling framework, we show that, on an 8-core system, we can simultaneously exploit vector and multicore parallelism to achieve 14×-108× speedup over sequential baselines.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018763", "https://engineering.purdue.edu/~milind/docs/ppopp17b.pdf", "http://www.cse.wustl.edu/~kunal/resources/Papers/vectorcilk-restart.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cbb841fa252ad5223e1a170baaee78dce484e25c", "sources": [ "DBLP" ], "title": "Exploiting Vector and Multicore Parallelism for Recursive, Data- and Task-Parallel Programs", "venue": "PPOPP", "year": 2017 }, "cbe246e94c466d9524b2b3878b647516a342b433": { "authors": [ { "ids": [ "1698571" ], "name": "Yan Li" }, { "ids": [ "3102417" ], "name": "Kenneth Chang" }, { "ids": [ "2462506" ], "name": "Oceane Bel" }, { "ids": [ "35551113" ], "name": "Ethan L. Miller" }, { "ids": [ "1740823" ], "name": "Darrell D. E. Long" } ], "doi": "10.1145/3126908.3126951", "doiUrl": "https://doi.org/10.1145/3126908.3126951", "entities": [ "Artificial neural network", "Benchmark (computing)", "Client\u2013server model", "Data center", "Deep learning", "Lustre", "Performance tuning", "Production system (computer science)", "Program optimization", "Q-learning", "Reinforcement learning", "Server (computing)", "Test set", "Throughput" ], "id": "cbe246e94c466d9524b2b3878b647516a342b433", "inCitations": [], "journalName": "", "journalPages": "42:1-42:14", "journalVolume": "", "outCitations": [ "3237988284481bcd75894f9cb4f4d43b6aa4b561", "2dcb0ed27b6a35b1dfe97b45604302a1f3705c01", "402da07a0ac4645e26370ff5ac8ab3540257a8ab", "478f51822252e4221c920bbf9d30a0b0491045ec", "0c575d220d8bc125d6a2290984c8e2b87011631d", "02f28656a748c351bf92c76c5a0a31c2d1d9c45d", "3552fd3e49c3b8d37cbb1c6f43e800c4b7a9cefa", "bf1ffbdffc4ba38ce992b51d6c7016dd8b826291", "1fc262bbb19b5ff0ec48ec4912d9f75c15917ce7", "117d089d8eea767e72d5bb800ba4d6e0e15dad93", "272216c1f097706721096669d85b2843c23fa77d", "22a3f0837bd6a913f516ba497469176be641c7d4", "0c4867f11c9758014d591381d8b397a1d38b04a7", "024006d4c2a89f7acacc6e4438d156525b60a98f", "0d67362a5630ec3b7562327acc278c1c996454b5", "1766e97c83f698ce3a292bc851a3bdee8179fba6", "2d8450ec56926a34fe1c25180f0b303c5cc67f2d", "4954fa180728932959997a4768411ff9136aac81", "3fbc9316a792974ba103be76702a6ce5c8d33f2d", "1ff87e6453229a6d4858cfc301701cd3a7e57987", "7b8bd1f4aa57ab44369c9ee9f1756f9f2a0ffe52", "0e32e9a888493971d9db62058952733cdfadd3ee", "441cf0fe8091d09207374a9d96723419091345ab", "35501b12a19824cf2f4cf48eb65ceb1445b28c0e", "29be34b502086bfb36e41721a6a7d4d4b518a579", "47aa3758c0ac35bfb2a3d2bbeff1e0ac28e623c2", "88dfee10842bbfd2ebc74980ab64c1cac5753883" ], "paperAbstract": "Parameter tuning is an important task of storage performance optimization. Current practice usually involves numerous tweak-benchmark cycles that are slow and costly. To address this issue, we developed CAPES, a model-less deep reinforcement learning-based unsupervised parameter tuning system driven by a deep neural network (DNN). It is designed to find the optimal values of tunable parameters in computer systems, from a simple client-server system to a large data center, where human tuning can be costly and often cannot achieve optimal performance. CAPES takes periodic measurements of a target computer system's state, and trains a DNN which uses Q-learning to suggest changes to the system's current parameter values. CAPES is minimally intrusive, and can be deployed into a production system to collect training data and suggest tuning actions during the system's daily operation. Evaluation of a prototype on a Lustre file system demonstrates an increase in I/O throughput up to 45% at saturation point.", "pdfUrls": [ "http://alumni.soe.ucsc.edu/~yanli/res/li-capes-sc17.pdf", "http://doi.acm.org/10.1145/3126908.3126951" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cbe246e94c466d9524b2b3878b647516a342b433", "sources": [ "DBLP" ], "title": "CAPES: unsupervised storage performance tuning using neural network-based deep reinforcement learning", "venue": "SC", "year": 2017 }, "cc2de9a75582e346aa3a650607c02ff81d9ac615": { "authors": [ { "ids": [ "14069534" ], "name": "Arghya Kusum Das" }, { "ids": [ "2251551" ], "name": "Jae-Ki Hong" }, { "ids": [ "3452118" ], "name": "Sayan Goswami" }, { "ids": [ "2302382" ], "name": "Richard Platania" }, { "ids": [ "1899648" ], "name": "Kisung Lee" }, { "ids": [ "2357986" ], "name": "Wooseok Chang" }, { "ids": [ "2794114" ], "name": "Seung-Jong Park" }, { "ids": [ "1685610" ], "name": "Ling Liu" } ], "doi": "10.1109/CLOUD.2017.27", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.27", "entities": [ "Amdahl's law", "Apache Hadoop", "Assembly language", "Benchmark (computing)", "Big data", "Central processing unit", "Computation", "Cyberinfrastructure", "Data center", "Dynamic random-access memory", "Hyperscale", "Operating system", "Science 2.0", "Solid-state drive" ], "id": "cc2de9a75582e346aa3a650607c02ff81d9ac615", "inCitations": [], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "147-154", "journalVolume": "", "outCitations": [ "2e8350e39d5d19b5b876f4365cc4bbef8ac5a5b1", "09cbad69073a35028379ffc3da50e129ab29a523", "b6a75410d9aed0032486664f4afa7a8eaa4c4c70", "4a829383409abdb734b29d09f041130b561c4b88", "1dc1cfe00228e1e94e1f90ff33d928403809954e", "fa7abc4d91247af9be619408438eae8bed94fb64", "378b865f0bb0c5b5476e1ac7920c49916547cb9a", "5bac410de277c4f2f62672b5b7ac32128ba4d1e5", "499d08c312fadb70ebfb91258f02bab47a10d0f2", "e0a2067e5f50fa697cee6a07f5f8ee130068fc04", "2d068a34e3f619e72d945fdd196414ba2dfa86ca", "c9bfcb190e47fb49a343a827ff68592b11020e27", "029a5f91d6d2abdb2c80e318c061280fbf7f9794", "036d544defb7f8e6297bd4c57a3b430d04a269e8", "0d2a822442a839229b04a437a58df7f032fc62a0", "83ef247c2281664391f5df5b161e43b37a5ec018", "82c934d30d2b3b43f598aa5c8897ad0fb7436ac5", "174562c148b8662aaa8363678b3a38796bfcb393", "2a71a0b61f78cee9a3ed67ce5b0eb401bd616ede" ], "paperAbstract": "High-performance analysis of big data demands more computing resources, forcing similar growth in computation cost. So, the challenge to the HPC system designers is providing not only high performance but also high performance at lower cost. For high performance yet cost effective cyberinfrastructure, we propose a new system model augmenting Amdahl's second law for balanced system to optimize price-performance-ratio. We express the optimal balance among CPU-speed, I/O-bandwidth and DRAM-size (i.e., Amdahl's I/O-and memory-number) in terms of application characteristics and hardware cost. Considering Xeon processor and recent hardware prices, we showed that a system needs almost 0.17GBPS I/O-bandwidth and 3GB DRAM per GHz CPU-speed to minimize the price-performance-ratio for data-and compute-intensive applications. To substantiate our claim, we evaluate three different cluster architectures: 1) SupermikeII, a traditional HPC cluster, 2) SwatIII, a regular datacenter, and 3) CeresII, a MicroBrick-based novel hyperscale system. CeresII with 6-Xeon-D1541 cores (2GHz/core), 1-NVMe SSD (2GBPS I/O-bandwidth) and 64GB DRAM per node, closely resembles the optimum produced by our model. Consequently, in terms of price-performance-ratio CeresII outperformed both SupermikeII (by 65-85%) and SwatIII (by 40-50%) for data-and compute-intensive Hadoop benchmarks (TeraSort and WordCount) and our own benchmark genome assembler based on Hadoop and Giraph.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.27", "http://www.ieeeconfpublishing.org/cpir/UploadedFiles/ieee-cloud-2017_AugmeningAmdahl_PdfExpressValidated.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cc2de9a75582e346aa3a650607c02ff81d9ac615", "sources": [ "DBLP" ], "title": "Augmenting Amdahl's Second Law: A Theoretical Model to Build Cost-Effective Balanced HPC Infrastructure for Data-Driven Science", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "cc31cd9d8371add2a99b5894ebd30f28f985c821": { "authors": [ { "ids": [ "3209205" ], "name": "Xiang Ni" }, { "ids": [ "1812494" ], "name": "Nikhil Jain" }, { "ids": [ "2441027" ], "name": "Kavitha Chandrasekar" }, { "ids": [ "1731961" ], "name": "Laxmikant V. Kal\u00e9" } ], "doi": "10.1109/CLUSTER.2017.94", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.94", "entities": [ "Computer performance", "Dynamic random-access memory", "High Bandwidth Memory", "Matrix multiplication", "Memory hierarchy", "Non-volatile memory", "Supercomputer", "Volatile memory" ], "id": "cc31cd9d8371add2a99b5894ebd30f28f985c821", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "147-151", "journalVolume": "", "outCitations": [ "0c36ce0ea8ec9070edef08d833e2bd18ff919b20", "0975baea2e5a34f75c06284ac355af7f2de2499b", "c250168f0103da6ed94d20a9fb00e98cce05756d" ], "paperAbstract": "The increase in memory capacity is substantially behind the increase in computing power in today's supercomputers. In order to alleviate the effect of this gap, diverse options such as NVM - non-volatile memory (less expensive but slow) and HBM - high bandwidth memory (fast but expensive) are being explored. In this paper, we present a common approach using parallel runtime techniques for utilizing NVM and HBM as extensions of the existing memory hierarchy. We evaluate our approach using matrix-matrix multiplication kernel implemented in CHARM++ and show that applications with memory requirement four times the HBM/DRAM capacity can be executed efficiently using significantly less total resources.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.94" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cc31cd9d8371add2a99b5894ebd30f28f985c821", "sources": [ "DBLP" ], "title": "Runtime Techniques for Programming with Fast and Slow Memory", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "cc3f5c2f01b3b7cfc90a9aeab42ffd1586ca9d4f": { "authors": [ { "ids": [ "1715309" ], "name": "Henry Corrigan-Gibbs" }, { "ids": [ "25629078" ], "name": "David J. Wu" }, { "ids": [ "1752788" ], "name": "Dan Boneh" } ], "doi": "10.1145/3102980.3102993", "doiUrl": "https://doi.org/10.1145/3102980.3102993", "entities": [ "Central processing unit", "Computer", "Operating system", "Quantum", "Quantum computing", "Quantum mechanics", "Scheduling (computing)", "Unit testing" ], "id": "cc3f5c2f01b3b7cfc90a9aeab42ffd1586ca9d4f", "inCitations": [ "fb3a6a00b92cff5498a3f58311390190541ea280" ], "journalName": "", "journalPages": "76-81", "journalVolume": "", "outCitations": [ "d27d85c0a42ec888abb27e0c91a95cdb2d452edf", "5c131bf0b2ce71cdb12803d1dda56381653f925e", "59c60fc1106235ca102473fa1fd0db46e618c6a4", "7ca97e5bf0a2235bbe7e7fba3c531bc678890caa", "73abef762dd61fdd388173f24f811e8693a79d7c", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "455e1168304e0eb2909093d5ab9b5ec85cda5028", "4b912eb41f90bfbe56a0a8c6b18bd4b11c874c3b", "168ec39c9eb2b4440bdf2736ee5101fafdc811c2", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4c7671550671deba9ec318d867522897f20e19ba", "ec3e3eb7e5435c1b58a9e3558830f021301ea13d", "d9891de8530cde280f01a8afb735e271a355ea58", "01ddb2881ee1e912ca52c5d59248e451d9827519", "2f31c901670dcc501aa33c8efb6938110d10f4a0", "a7315f8f263567e5234dcde02899a8111876d679", "b7919f849b00f557fb18f59d6f2bb748a1644c54", "0103b4a14ab5396b75c2eec39865e0cec66a8e19", "5bfeff331c209a508adb29a122ac73fcd99b3400", "fdb552b7ee8b5858280bc33f90d75d032d27bdaf", "8fe82724de3947a18b9be8d980907ab0eb4b3aaa", "43831ced46be9d668a7e86af6e51990b08280b37", "57eaf807b2639d4c01af674ee511f8a6f7004c8b", "839366c5e0b37a1b2078f7e25f40981f369ea220", "041929bace8fae28de43317e46734ca0e35dda8e", "b95dd6bdc16bfacf543118bce70b25fc89241b35", "dfe4d76e3fc27304a984a5daf1be530401724c25", "02b4f85d7414585bd901a6aa0ac5c65727bd5062", "157c3505a181c1fc130f970ec130b13fcd1e5e7f", "d280159a45655eb0ac75d05ff462b04d64fa8432", "418e058c0dd22b18994ebdba8bd4713bf92588f7", "5b04e8071ee7cd3f13e1015eaf3bb0070b382d28", "212e4eb3ffbee7be2cc9efce95a402da10df8c60", "4162b084682391b0a328f470f40d0f8f4aff13fb", "1244310dc2fd3ea19b14483ea068b5dd6c295a79" ], "paperAbstract": "If large-scale quantum computers become commonplace, the operating system will have to provide novel abstractions to capture the power of this bizarre new hardware. In this paper, we consider this and other systems-level issues that quantum computers would raise, and we demonstrate that these machines would offer surprising speed-ups for a number of everyday systems tasks, such as unit testing and CPU scheduling.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102993", "https://crypto.stanford.edu/~dwu4/papers/QuantumOS.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cc3f5c2f01b3b7cfc90a9aeab42ffd1586ca9d4f", "sources": [ "DBLP" ], "title": "Quantum Operating Systems", "venue": "HotOS", "year": 2017 }, "cca9f7e4d9dcc0368934026f1fe2d6590870fa68": { "authors": [ { "ids": [ "34766892" ], "name": "Daniel Firestone" } ], "doi": "", "doiUrl": "", "entities": [ "ARM architecture", "Access control list", "Algorithm", "Application programming interface", "Cloud computing", "Compiler", "Experience", "Load balancing (computing)", "Microsoft Azure", "Network address translation", "Network function virtualization", "Platform as a service", "Quality of service", "Scalability", "Software-defined networking", "State (computer science)", "Tunneling protocol" ], "id": "cca9f7e4d9dcc0368934026f1fe2d6590870fa68", "inCitations": [ "27f5cea69405564206471da0a145785bde17090f", "ea5d5ad680f54c81f455a194094cf02c669452f9", "0bd2d176f44c86e23af6544d960867db99659ba7", "7f640b84dedbb95ec84d86563b3304035c9fc980", "a6472fe7fbc978de8597c2f783891aa1eb1f87a5", "4d706ee0027880679a5358aef4e8feba58a53718", "63efcd0695d3de798e2743739c8b6a32a568fb84", "1d1a33579f6baa1c96eb2ff129bc088b8161d0cf", "7e9fd8c8973d50efa1c3259d47c4e15b8a98e351" ], "journalName": "", "journalPages": "315-328", "journalVolume": "", "outCitations": [ "84e0660e922da41223b9723bef60f5350a98d427", "058f6752d85a517aae298586fdf117acdd7560ea", "5b999d36d5230eca01532b357c7cf338a5e0d641", "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "089b10645ee63cd9c5bb4ab661141dd813408e15", "0c2e79b4e9283beb7c7270df326a85f22e460821", "35a5a870cefa0184245cf317381f3dc4092e1781", "2077579d62fc090d4ddf45f107ffae0468936165", "58ee4e33defda43292381793ed26a403d524fb73", "06c08a99344e3319cb4b2806b41b69db06e3e9f8", "4890f40f2772c1c18c66ad751d199949f178582f", "9dd79fcf6bc01db9c7bec3337db957b0a964e985", "6678b17fc8758efea8d32c2d47f9924f8a0cdc6d", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "044ce2a427c65d53f3d8279339b8eb6f020121c7", "4370972a5466ece316fed0c75f1a7d47c7f40376" ], "paperAbstract": "Many modern scalable cloud networking architectures rely on host networking for implementing VM network policy e.g. tunneling for virtual networks, NAT for load balancing, stateful ACLs, QoS, and more. We present the Virtual Filtering Platform (VFP) a programmable virtual switch that powers Microsoft Azure, a large public cloud, and provides this policy. We define several major goals for a programmable virtual switch based on our operational experiences, including support for multiple independent network controllers, policy based on connections rather than only on packets, efficient caching and classification algorithms for performance, and efficient offload of flow policy to programmable NICs, and demonstrate how VFP achieves these goals. VFP has been deployed on >1M hosts running IaaS and PaaS workloads for over 4 years. We present the design of VFP and its API, its flow language and compiler used for flow processing, performance results, and experiences deploying and using VFP in Azure over several years.", "pdfUrls": [ "https://www.usenix.org/conference/nsdi17/technical-sessions/presentation/firestone", "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/03/vfp-nsdi-2017-final.pdf", "http://www.usenix.org./system/files/conference/nsdi17/nsdi17-firestone.pdf", "https://www.usenix.org/system/files/conference/nsdi17/nsdi17-firestone.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fd35/6fba7f825c64271e5d22195bb61a4f0b6b71.pdf", "s2Url": "https://semanticscholar.org/paper/cca9f7e4d9dcc0368934026f1fe2d6590870fa68", "sources": [ "DBLP" ], "title": "VFP: A Virtual Switch Platform for Host SDN in the Public Cloud", "venue": "NSDI", "year": 2017 }, "ccc68bb05f3433f287de4f66fe9814760ebcafb3": { "authors": [ { "ids": [ "23492509" ], "name": "Guoliang Li" }, { "ids": [ "3416315" ], "name": "Chengliang Chai" }, { "ids": [ "3232294" ], "name": "Ju Fan" }, { "ids": [ "39163188" ], "name": "Xueping Weng" }, { "ids": [ "26846526" ], "name": "Jian Li" }, { "ids": [ "2985907" ], "name": "Yudian Zheng" }, { "ids": [ "4769653" ], "name": "Yuanbing Li" }, { "ids": [ "38438821" ], "name": "Xiang Yu" }, { "ids": [ "2285128" ], "name": "Xiaohang Zhang" }, { "ids": [ "34825854" ], "name": "Haitao Yuan" } ], "doi": "10.1145/3035918.3064036", "doiUrl": "https://doi.org/10.1145/3035918.3064036", "entities": [ "Benchmark (computing)", "Crowdsourcing", "Database", "Experiment", "Mathematical optimization", "Money", "Multi-objective optimization", "Program optimization", "Query optimization", "Relational database management system", "Simulation", "Unified Framework" ], "id": "ccc68bb05f3433f287de4f66fe9814760ebcafb3", "inCitations": [ "1dedeaa3cda8a50d6f7f6980fc573a5d4a70e265", "843b86e94ee75f40f458b73de6efb4b125e8e4ee", "60737db62fb5fab742371709485e4b2ddf64b7b2", "aa96a257c28225901006502266aa65877897dc5d", "e4792a28cec6ac4fcc3c4ebdd3931483f660fa79", "2b89301f592033f0001f5d79ce33163d9249caab", "4d96002263faa311fa61d7fefda7361061356f26", "f87c7779f7cf430d8f72a204bc6a39065bea3631", "31d6b211735d7e93fbc7629a731403c9e4644b7c", "b3530b8eda702a53023281a1c744c2d1f6c1b5b4" ], "journalName": "", "journalPages": "1463-1478", "journalVolume": "", "outCitations": [ "49ceba7f32b3d440f20b7b35d4c7462016666ef9", "a44b2fbd07d6d17e968afc240ac212f9070b95c3", "cd48760a142830b796b8a85a158cd469f3e5feb0", "4033104e3a37324df023fec7e95d852e962617de", "1a7651308dbbdf91ccf07886b016b5fa8678fd27", "0eb3d04dae51367b735c021054792a71e621095a", "bb0620f3e15a1c6bea44f0720ec56aa0ccd0a84c", "1b189d721adbf1d2bab93b7ed6ce826e188b0b99", "422d9b1a05bc33fcca4b9aa9381f46804c6132fd", "1c2e8d2d63312c18944c9477797e65a0975361da", "199dcbb1e5287eedb458c867b171cc83c06b0d2a", "af1e1bee41d004a6c1fa608a9fe2a884f48c6e5f", "4f739534a366799e170599d3ff3d65597f0118db", "08f51a9138458f667f0c00d40b6a820c451c7d36", "0894c223858b5b40ed6ee256d5378030a82bded2", "37c0234b8c699fcb0b9c7b5b45bdb0112514aaac", "cbd45b97b5332e4b955cd54f090baed9d2ec5a72", "0536a29e450f736fa98db5cec8b398d518f0f7d4", "0ee10b695a90d7699b0518436d9bac53410c2d0b", "5144ce3943d6dce07db0fa7469b54e1f3aea80ab", "1ab69d3c63d92460d0eb09e0cf493d4eec18209a", "35c71faec5506d8fe79bb997f6e9e3743dc436a6", "29eb9e82b7a3aa02b9af5f0f1b6d7c6a4c6e2917", "5b55010d6aa8b909ff588dccf1e4693e156cf41e", "b7e44eb637356570af6e980657d461d0d3266bfa", "7bbdb1803788a0e0cf8b814ed12a8f87e544b6ec", "66d87d19a9e7d559aedcb843127fda9cda5ce417", "7ac5425b837a5e889847afdaf04c0241b9b0b4ba", "882ea179c692c9ed7bbeee115302631dc014b199", "f5d9c0182d8578f7c0a99ad9bdd4ff62e5f7c68d", "1aecd54c380632d2d12b6bf8688f1167b61d6127", "1d8b0494623cd7d041b42e6518994b8e21367b8a", "6953420c593842697dd09bc2cf7ffbbaf67a6e8e", "16c36a0ab390553d77877ee634607899e4eececf", "0b65d7a55a1541f99e0686844e2129527e7702e0", "20d90871bc0dc7956bf2557d91d8d96deb0a4520", "86d2add9aa90014a6330e3eb59277562adaeeda4", "0d0f7b0456472116a3fa8cbc87a3bcb00dd432aa", "79f9e562470ea00d1bba08fd1fba8cc46d96f211", "8d4594b4d4827f44b57863376d54536112b7aaca", "8a0b267493ac9510e47ceb4bcebb6d202b2f89a5", "76db421d3ff172ca679f2c680d3b4f161718c194", "71b76a4de9b1364be2905278b69e0912c5f70b3a", "213cb7593934bc675c336f53dd6c61a3c799be80", "151673abe01271dc3fc37725c02e95e7970f3bed", "71a83e1747bfcd8022d3b8e31acb01765055af9d", "6a35c6aead0796373bbdcc6c8c3371d8c25146b0", "325413859d5bf180dd08352b07a09fd714ec3db9", "7a84a82a45c0e10e485c4fcf3afa9158caf20b1c", "2378e4de465227e44978f6259ac97f763448ea82", "64591361f2f39a1c0418141c4795e43573637739", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "5f11f3d958581f53b20577da69c9f7bc25c2ff2b", "32be67f8fb5b21554b950fb49f3c81098d5d23c7", "24c48b97725d84246f6dbd39c055648a305e1df4", "743a4a699ef01786a7321291ff8ca94b7bde7780", "788253d23de39a81305d72d47eda33d4d27495d6", "88cd4becf3587a8378e450a99ded801fbdb264e1", "d84af7f39033a084d9ae95fe895d39b9ec5246dd", "632eca15ed20f87490c60a6005c4c58f06bee61b", "6ac7262028d905ad97bbabce37c610e5d84e4c6f" ], "paperAbstract": "Crowdsourcing database systems have been proposed to leverage crowd-powered operations to encapsulate the complexities of interacting with the crowd. Existing systems suffer from two major limitations. Firstly, in order to optimize a query, they often adopt the traditional tree model to select an optimized table-level join order. However, the tree model provides a coarse-grained optimization, which generates the same order for different joined tuples and limits the optimization potential that different joined tuples can be optimized by different orders. Secondly, they mainly focus on optimizing the monetary cost. In fact, there are three optimization goals (i.e., smaller monetary cost, lower latency, and higher quality) in crowdsourcing, and it calls for a system to enable multi-goal optimization.\n To address the limitations, we develop a crowd-powered database system CDB that supports crowd-based query optimizations, with focus on join and selection. CDB has fundamental differences from existing systems. First, CDB employs a graph-based query model that provides more fine-grained query optimization. Second, CDB adopts a unified framework to perform the multi-goal optimization based on the graph model. We have implemented our system and deployed it on AMT, CrowdFlower and ChinaCrowd. We have also created a benchmark for evaluating crowd-powered databases. We have conducted both simulated and real experiments, and the experimental results demonstrate the performance superiority of CDB on cost, latency and quality.", "pdfUrls": [ "http://dbgroup.cs.tsinghua.edu.cn/ligl/papers/sigmod17-cdb.pdf", "http://doi.acm.org/10.1145/3035918.3064036" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ccc68bb05f3433f287de4f66fe9814760ebcafb3", "sources": [ "DBLP" ], "title": "CDB: Optimizing Queries with Crowd-Based Selections and Joins", "venue": "SIGMOD Conference", "year": 2017 }, "ccdc491a48a067c38c2f93ba186969fa422708bb": { "authors": [ { "ids": [ "1756901" ], "name": "Shay Gueron" }, { "ids": [ "1682750" ], "name": "Yehuda Lindell" } ], "doi": "10.1145/3133956.3133992", "doiUrl": "https://doi.org/10.1145/3133956.3133992", "entities": [ "AES instruction set", "Advanced Encryption Standard process", "Authenticated encryption", "Authentication", "Block cipher", "Block cipher mode of operation", "Cipher", "Cryptographic nonce", "Encryption", "Google Cloud Messaging", "Key derivation function", "PDF/A", "Plaintext" ], "id": "ccdc491a48a067c38c2f93ba186969fa422708bb", "inCitations": [ "d7eaf4762e395a6d18a1522953ba8300c63559b1", "29d25e885c62d80309b45afc2d244b7c679c7e77" ], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "702", "journalVolume": "2017", "outCitations": [ "0037875e7321eb65867ff47b0e22a080b84502da", "4fde24db691e08b274158c11cd15ef392b631336", "5adb3280105aaa6fba53523af9c9c5860ab6110f", "d7eaf4762e395a6d18a1522953ba8300c63559b1", "6d50b7cc0996bdd65c465a7db8d8d80986f3ff01", "4381e5698fa6ed369ad783b250ea1f75ac3f6144", "133c7c26692de7ac40a6241752f539cefee3999d", "099a5796bc29964ffd393677142e039176c5bf45", "94f53bda667df2030cae0b9ef5682e412fa7a332", "904322bd25a69da05d4fd28145a0b3b64ece4af3", "2aa24ddd5c4eea28fc3b751fb5915c01d9337626", "43f8cab471406e8ec59be34b31c7e24730a48045", "15f53e82bdb947b38cd9b0657fe6f22a6d492dbb", "9495db8ebbf897c4beb71c5d90e6e53a3aeeb402" ], "paperAbstract": "Block cipher modes of operation provide a way to securely encrypt using a block cipher. The main factors in analyzing modes of operation are the \\emph{level of security} achieved (chosen-plaintext security, authenticated encryption, nonce-misuse resistance, and so on) and \\textit{performance}. When measuring the security level of a mode of operation, it does not suffice to consider asymptotics, and a concrete analysis is necessary. This is especially the case today, when encryption rates can be very high, and so birthday bounds may be approached or even reached. In this paper, we show that key-derivation at every encryption significantly improves the security bounds in many cases. We present a new key-derivation method that utilizes a \\emph{truncated block cipher}, and show that this is far better than standard block-cipher based key derivation. We prove that by using our key derivation method, we obtain greatly improved bounds for many modes of operation, with a result that the lifetime of a key can be significantly extended. We demonstrate this for AES-CTR (CPA-security), AES-GCM (authenticated encryption) and AES-GCM-SIV (nonce-misuse resistance). Finally, we demonstrate that when using modern hardware with AES instructions (AES-NI), the performance penalty of deriving keys at each encryption is insignificant for most uses.", "pdfUrls": [ "http://eprint.iacr.org/2017/702", "https://eprint.iacr.org/2017/702.pdf", "http://doi.acm.org/10.1145/3133956.3133992" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ccdc491a48a067c38c2f93ba186969fa422708bb", "sources": [ "DBLP" ], "title": "Better Bounds for Block Cipher Modes of Operation via Nonce-Based Key Derivation", "venue": "CCS", "year": 2017 }, "cd1fdbc5bbe453339d26951a79e3ca533920a854": { "authors": [ { "ids": [ "2824752" ], "name": "Chuangang Ren" }, { "ids": [ "34000778" ], "name": "Peng Liu" }, { "ids": [ "1712616" ], "name": "Sencun Zhu" } ], "doi": "", "doiUrl": "", "entities": [ "Android", "Graphical user interface" ], "id": "cd1fdbc5bbe453339d26951a79e3ca533920a854", "inCitations": [ "0339584e6c0b073e2f62383a7a76d448766143f1", "3b6d6ab93e9663940a18743f16719bd7ae505b87", "ea9db089ee9add7f14cb6d29d349f143062bb76c", "4da17fac0ef3e23e6ae9849061fa604b826d3219", "18c5049806ab27e4a2a7ba6c388309843d6f90ab", "add252c8eae6d8b62737fa02dba302ac6c7279a9", "23bbc56b6b03c80a1c15e6c8726fb237c7d9c844", "086bcf94ddbb6829983ea41aab3a4500c2fbf289" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/windowguard-systematic-protection-gui-security-android/" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cd1fdbc5bbe453339d26951a79e3ca533920a854", "sources": [ "DBLP" ], "title": "WindowGuard: Systematic Protection of GUI Security in Android", "venue": "NDSS", "year": 2017 }, "cd6ddf13877b067e0a47c0a1663125e2a7f547d6": { "authors": [ { "ids": [ "3247627" ], "name": "Roc\u00edo Ca\u00f1amares" }, { "ids": [ "2912921" ], "name": "Pablo Castells" } ], "doi": "10.1145/3077136.3080836", "doiUrl": "https://doi.org/10.1145/3077136.3080836", "entities": [ "Algorithm", "Collaborative filtering", "Database normalization", "Experiment", "Heuristic", "Strategic management" ], "id": "cd6ddf13877b067e0a47c0a1663125e2a7f547d6", "inCitations": [ "17121c7b3b937dafd88314739c25bb28ab660048" ], "journalName": "", "journalPages": "215-224", "journalVolume": "", "outCitations": [ "308e6dbbc4436e96453110897b97a9ae79b413a6", "1bc108785903f5a81153ec3104b8b67db9054938", "40b5e9b2a00a4a192e5e1b6d751dcd4fcb96231d", "0b387adf32a1d978378d0e49651013624de42473", "8bdcde37ae42ff7eb54529d6a012428773b113ba", "8ac3efcccf2a92ae19830d5fc19627785a52d4bd", "2bf3fb9d50a39ad727f248ffd4b14f07a2d48e4a", "7d986dac610e20441adb9161e5466c88932626e9", "39519d1a73475502d1ccbe6c8b2ac9ad5c90412e", "12f46619885bdebc60085d4f49f7a90ef2614519", "0906cbe4c344eeb32420f994b63650b8835eeed2", "184b7281a87ee16228b24716ca02b29519d52eb5", "2c9b99c886ed0bf82b7fd3ccdce52dd9b7f10397", "5908c36f7f6a2385256e1090738923d04d407e45", "554f6cc9cb9c64a25670eeb12827b803f3db2f71", "6aa1c88b810825ee80b8ed4c27d6577429b5d3b2", "940b01d0d7931cb9d4d24f5bd50625b941b31a13", "13e9052ec45b81b262a1dd72e81368ef5ea0a1be", "d5fdc3c0b2049a025091179a73e0e4174105fcd4", "45e6f16c521e36aecfbc808c2bd4676ca466ef99", "da8b0378174bc25ed174be36a1c725787b81854d", "2c1795748e767df44d18f0a820126f1804bc2e4a", "1bc1a735bf6b3aca3b5c80912a222e4c8fc10eb7", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4" ], "paperAbstract": "We develop a probabilistic formulation giving rise to a formal version of heuristic k nearest-neighbor (kNN) collaborative filtering. Different independence assumptions in our scheme lead to user-based, item-based, normalized and non-normalized variants that match in structure the traditional formulations, while showing equivalent empirical effectiveness. The probabilistic formulation provides a principled explanation why kNN is an effective recommendation strategy, and identifies a key condition for this to be the case. Moreover, a natural explanation arises for the bias of kNN towards recommending popular items. Thereupon the kNN variants are shown to fall into two groups with similar trends in behavior, corresponding to two different notions of item popularity. We show experiments where the comparative performance of the two groups of algorithms changes substantially, which suggests that the performance measurements and comparison may heavily depend on statistical properties of the input data sample.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080836" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/cd6ddf13877b067e0a47c0a1663125e2a7f547d6", "sources": [ "DBLP" ], "title": "A Probabilistic Reformulation of Memory-Based Collaborative Filtering: Implications on Popularity Biases", "venue": "SIGIR", "year": 2017 }, "cd7efa07a77ded82bb8d25ecb472ae5d54b229a4": { "authors": [ { "ids": [ "23152078" ], "name": "Vladimir Dimic" }, { "ids": [ "2703643" ], "name": "Miquel Moret\u00f3" }, { "ids": [ "2020430" ], "name": "Marc Casas" }, { "ids": [ "1741016" ], "name": "Mateo Valero" } ], "doi": "10.1007/978-3-319-64203-1_18", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_18", "entities": [ "CPU cache", "Cache (computing)", "Computer data storage", "Data dependency", "Dataflow", "Insertion sort", "Runtime system" ], "id": "cd7efa07a77ded82bb8d25ecb472ae5d54b229a4", "inCitations": [], "journalName": "", "journalPages": "247-259", "journalVolume": "", "outCitations": [ "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "0228df458946aecc024540ed90840c789590a3a7", "2d22abbe32fcb60cadd958ef14b2e2672f6cc84c", "06125169a21ef17641d7199544417b21c378eede", "16f425a8c6d42a09c16fa074d3b0d7a87fd9348e", "0ea6d460c250167898b794e4373685a653602e8b", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "4721ad0db596f3f78ddb31b4305ddbde35f8f181", "1ef7f02bce931c8e9ef529e095b274132ce4011a", "41965de84461726a0e12296acd667a442c8eba25", "ef0212e7885fd37caae2b33ace03525b128bbeee", "20338bf6e95c532cceaa57087fa6436af17dc005", "cfd34380711f505e58289a524e6d154dc44355a1", "624bd22fcd5746d84fb8e07a0f0234200726ec8d", "5e50ffa96fa021c85ccedb1bb8b84b59ee268de8", "19ecf7778132143ec9c0324fae0aebf20c9a0217", "54f3331b575b2d451c2d716f86496cada23d596d", "2dc59e60b34b3863e4eb381b17384105fe523cec", "42452be4c840abd3a4a0fa49c4b8d4aeeb3f2f6e", "6db6459a0d7cb58b688941a98f80a7e5e1bed94b", "0003b4b4c35f0384596b6f129f7dda8449cdc79f", "7779c10dfa1f84953016b6292844815c5faf84f5" ], "paperAbstract": "Processor speed is improving at a faster rate than the speed of main memory, which makes memory accesses increasingly expensive. One way to solve this problem is to reduce miss ratio of the processor\u2019s last level cache by improving its replacement policy. We approach the problem by co-designing the runtime system and hardware and exploiting the semantics of the applications written in data-flow task-based programming models to provide hardware with information about the task types and task data-dependencies. We propose the Task-Type aware Insertion Policy, TTIP, which uses the runtime system to dynamically determine the best probability per task type for bimodal insertion in the recency stack and the static Dependency-Type aware Insertion Policy, DTIP, that inserts cache lines in the optimal position taking into account the dependency types of the current task. TTIP and DTIP perform similarly or better than state-of-the-art replacement policies, while requiring less hardware.", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_18", "http://upcommons.upc.edu/bitstream/handle/2117/107652/Runtime-Assisted+Shared+Cache+Insertion.pdf;jsessionid=EC18689FCF2BDA1227D746629E3340AA?sequence=1" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/cd7e/fa07a77ded82bb8d25ecb472ae5d54b229a4.pdf", "s2Url": "https://semanticscholar.org/paper/cd7efa07a77ded82bb8d25ecb472ae5d54b229a4", "sources": [ "DBLP" ], "title": "Runtime-Assisted Shared Cache Insertion Policies Based on Re-reference Intervals", "venue": "Euro-Par", "year": 2017 }, "cd97e84c37f821be260f524e715d1849c8f139b7": { "authors": [ { "ids": [ "34825584" ], "name": "Venkatesh Srinivasan" }, { "ids": [ "3431372" ], "name": "Ara Vartanian" }, { "ids": [ "1703130" ], "name": "Thomas W. Reps" } ], "doi": "10.1145/3133885", "doiUrl": "https://doi.org/10.1145/3133885", "entities": [ "Best-first search", "Binary file", "Bit array", "Executable", "Experiment", "Heuristic", "IA-32", "Language model", "Linear search", "Machine code", "Machine learning", "Naivety", "Program optimization", "Rewrite (programming)", "Time complexity", "Universal quantification" ], "id": "cd97e84c37f821be260f524e715d1849c8f139b7", "inCitations": [ "3a818e346ee6d7d51d1133ac1c11eea97000d92b" ], "journalName": "PACMPL", "journalPages": "61:1-61:26", "journalVolume": "1", "outCitations": [ "b9addc8ce998f6892120c2c8b23ae183312bfa6c", "0b6e1e4d69648afb6b0691c37f92b70c42016953", "93600a2c93fccaafd2a6298ea981c249f5210fc0", "39709f584f1ca149be34d851bc6a68fe8880a5fe", "6a8f65381a627a2db6c756a7185d9106f0acefec", "2b1fb527fb0d9f028de9eee6f76bf4fbdda84567", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "0f0b255f40f103efaf28e740b897d9a4b429a0e3", "00c08861cfb438d5ff209dfadc2d839641cd3ca9", "26b9001cce4a7f2e838ef99d0e7593b18553a7e0" ], "paperAbstract": "Binary rewriters are tools that are used to modify the functionality of binaries lacking source code. Binary rewriters can be used to rewrite binaries for a variety of purposes including optimization, hardening, and extraction of executable components. To rewrite a binary based on semantic criteria, an essential primitive to have is a machine-code synthesizer---a tool that synthesizes an instruction sequence from a specification of the desired behavior, often given as a formula in quantifier-free bit-vector logic (QFBV). However, state-of-the-art machine-code synthesizers such as McSynth++ employ naive search strategies for synthesis: McSynth++ merely enumerates candidates of increasing length without performing any form of prioritization. This inefficient search strategy is compounded by the huge number of unique instruction schemas in instruction sets (e.g., around 43,000 in Intel's IA-32) and the exponential cost inherent in enumeration. The effect is slow synthesis: even for relatively small specifications, McSynth++ might take several minutes or a few hours to find an implementation. \n In this paper, we describe how we use machine learning to make the search in McSynth++ smarter and potentially faster. We converted the linear search in McSynth++ into a best-first search over the space of instruction sequences. The cost heuristic for the best-first search comes from two models---used together---built from a corpus of incremental ERM and private incremental regression where the general goal is to always maintain a good empirical risk minimizer for the history observed under differential privacy. Our first contribution is a generic transformation of private batch ERM mechanisms into private incremental ERM mechanisms, based on a simple idea of invoking the private batch ERM procedure at some regular time intervals. We take this construction as a baseline for comparison. We then provide two mechanisms for the private incremental regression problem. Our first mechanism is based on privately constructing a noisy incremental gradient function, which is then used in a modified projected gradient procedure at every timestep. This mechanism has an excess empirical risk of ≈√d where d the input and constraint set can be used to derive significantly better results for certain interesting regression problems. Our second mechanism which achieves this is based on the idea of projecting the data to a lower dimensional space using random projections, and then adding privacy noise in this low dimensional space. The mechanism overcomes the issues of adaptivity inherent with the use of random projections in online streams, and uses recent developments in high-dimensional estimation to achieve an excess empirical risk bound of ≈ T1/3 W2/3, where T is the length of the stream and W is the sum of the Gaussian widths of the input domain and the constraint set that we optimize over.", "pdfUrls": [ "https://arxiv.org/pdf/1701.01093v1.pdf", "http://arxiv.org/abs/1701.01093", "http://doi.acm.org/10.1145/3034786.3034795" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d2f3433c80ada67df36111994cdb407683e18a1f", "sources": [ "DBLP" ], "title": "Private Incremental Regression", "venue": "PODS", "year": 2017 }, "d37d40586c753bb2b8659dced2d21afb821ea18c": { "authors": [ { "ids": [ "2877698" ], "name": "Aigerim Bakatkaliyevna Altayeva" }, { "ids": [ "10693991" ], "name": "Batyrkhan Omarov" }, { "ids": [ "2776752" ], "name": "Andrey Giyenko" }, { "ids": [ "1978914" ], "name": "Young-Im Cho" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.8", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.8", "entities": [ "Agent-based model", "Control system", "Intelligent control", "Management system", "Multi-agent system", "Systems design" ], "id": "d37d40586c753bb2b8659dced2d21afb821ea18c", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "58-63", "journalVolume": "", "outCitations": [ "148626f3793459f898543e9151807fb32342fb65", "44c0296c0738ea59dd448ae8ff717d5f491442ae", "b1f400bc48fb4c5c1fee6be8863e4c7682715f99", "c4b85f1c3180bc25ce4941fe42ac90d89e4e708a", "45d118010cc340e95849b4eab18e509e4e4d4e2c", "d5f3a0d572dc7e6b345ef7479cb3e6b36d36c231", "6c30554010dec579f2d837e50a5c10ce88f110a4" ], "paperAbstract": "The methodology of system design of modern intelligent control systems in the electric power industry and their features are considered when providing the required comfort in multi-zone buildings with the use of multi-agent power consumption and comfort management systems. Such a control system covers all the monitored zones of a building and, if necessary, allows providing the greatest possible overall comfort in the building while reducing the required electric power. The purpose of this study is to develop a comfort management system in a multi-zoned building that can provide the greatest possible comfort while reducing the required electric power.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.8" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d37d40586c753bb2b8659dced2d21afb821ea18c", "sources": [ "DBLP" ], "title": "Multi-agent Based Smart Grid System Development for Building Energy and Comfort Management", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "d3b34ad601a9ff4b446b315ac627e4db7dfc7232": { "authors": [ { "ids": [ "3649880" ], "name": "Sawinder Kaur" }, { "ids": [ "36911504" ], "name": "Manojit Ghose" }, { "ids": [ "32151627" ], "name": "Aryabartta Sahu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.23", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.23", "entities": [ "Algorithm", "Approximation algorithm", "Cloud computing", "Cluster analysis", "Scheduling (computing)" ], "id": "d3b34ad601a9ff4b446b315ac627e4db7dfc7232", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "178-185", "journalVolume": "", "outCitations": [ "d8057d514036d51051af78476468fe350cb7488a", "642f72cdee8f3e9a5275e47cad844e1c54b57b83", "c4d670aa572fc072e74601a09c7a4679e5af3c21", "381c7853690a0fee6f00d2608a7779737f1365f9", "8c509bec24d666b7a1227a25aebb91f995837ea2", "d7459814fef788974755eb59fab8e343da625449", "7c315765f5af9ab9932defec346f7cdccfbc7a2d", "7ebe0dfcc5e795ed1059ba0f822d7eb510fc46c0", "8e09eb942a635260032be624f8d0e85447e74ca5", "31abcf70a3a118269d4b5707a7f06b0ef8cdaab9", "e7c144bb8720de232d84aff9884677fbb2b95fcb", "cc6a68cfa395d62d4a1d76de062e87012a03e072", "1bbf9446e4f3b6116b5073bbd8bbb2705b2db9be", "0e88af5cec2d9b26b47f541907d0f244f8cba7c5" ], "paperAbstract": "Demand for cloud computing has increased tremendously in recent time due to various benefits. Along with providing promised services, the cloud providers also need to focus on the amount of energy consumption as it has many-fold benefits. Again scheduling algorithms play an important role in minimizing the energy consumption of a system. Thus, in this paper, we have proposed approaches for scheduling real time tasks on a virtualized cloud system without missing their deadlines and minimizing the overall energy consumption of the cloud system. We have divided the problem of scheduling real-time tasks on virtualized cloud system into four sub problems, analyzed and solved them separately. We have provided exact solutions for scheduling of one type of real time tasks (sub problem 1), created an approximated model for scheduling of two types of real time tasks with same deadline (sub problem 2). We have also extended the same approximated model for scheduling of many real time tasks with same deadline (sub problem 3). Finally, we came up with four different approaches for scheduling of general real time tasks using deadline clustering approach (sub problem 4) and compared their performance in minimizing the overall energy consumption of the cloud system.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d3b34ad601a9ff4b446b315ac627e4db7dfc7232", "sources": [ "DBLP" ], "title": "Energy Efficient Scheduling of Real-Time Tasks in Cloud Environment", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "d41d66249c30edce902dd0ef937fdbeeab32e750": { "authors": [ { "ids": [ "1985663" ], "name": "Adrian Loch" }, { "ids": [ "2078859" ], "name": "Hany Assasa" }, { "ids": [ "39640069" ], "name": "Joan Palacios" }, { "ids": [ "1712634" ], "name": "J\u00f6rg Widmer" }, { "ids": [ "1995263" ], "name": "Hans Suys" }, { "ids": [ "3326850" ], "name": "Bj\u00f6rn Debaillie" } ], "doi": "10.1145/3143361.3143395", "doiUrl": "https://doi.org/10.1145/3143361.3143395", "entities": [ "Backward compatibility", "Mobile device", "Overhead projector", "Simulation", "Testbed", "Throughput", "Zero" ], "id": "d41d66249c30edce902dd0ef937fdbeeab32e750", "inCitations": [], "journalName": "", "journalPages": "224-237", "journalVolume": "", "outCitations": [ "616bcc8949f3cd70ccb929269ce2ebc3c6ca7a7c", "40d4907c1d4f3dedf7aa7def3f3a96d136c822b5", "8decc24664ec3aac3f2baac8cf443e7faca41f78", "02d843e3a008e76cf6a4c23bd01023d264b05686", "34cc15799cdef2682d5d4a75e73a966964cce05c", "4c5d4150aedca478e69eda5a20270a485f51d48d", "8fbbe5c195772a3899463876000b3a5dc7977b44", "38bcb7a4d3e6caa999f08fca1da03831f52ca21a", "1ac257a741e3ddf53d20b3ff04dd01f9eb998928", "58392cd42505bf2bc0675610188f6465bc20fd6f", "0ae6b77e569d0810253aeb9835c0b40d759491a8", "e39c17f6c5a83581890640049b075badff0cd34d", "04950bc1696629f4f9e8dfff324b018a1caba208", "00f0a8788ed43c534a8888b2b2e4edbafafe8bf3", "08e6f96da8e44d6529d29fb2087f5bbf5684404d", "05fe031e53dd8990e7076a91277cb2b74e22b811", "1943466070019e48204ebbee0914d87ced4ba09a", "81ed14364300805954f948abd7f2df397df233bf", "2d4906884bc5309f1539195ff5b181d41a15ff60", "15626a85e1700ac008064c7f29d29955839e8cef", "476650b1d8a0d43c0365513761bbadac60a051c2", "e61d16a6d5df45c59f4157f7695a33df489ce3ed", "19450f33783a17fef560b596473cc91b69e8da93", "839d13983d55f3aeeb8e644447fd9a4b5665fc56", "bf4feafbfdea362968acc1c3cb4aa4cb3facaa94", "f455e1302a696b535459c070bb7384bff6fd9e6c", "603f1965545d165fd63f3a35178aec0fe76190b8", "218b6a89433ff5e2427ee77aa59e2ea877e1e5b5", "0d4c61a14bc809531bfae53afd1c08f607584ab0", "534ee575a6b0c37e03d1dddb92493b57e9271298", "69e0e4752d166df8f31c37e65139a455ee1b446e", "6f07a10dfbd583fdda034c7d606e53148f162f2d", "98cb6b925c8c8fd0f9ebf0ed894fdb982bc0b69d", "b56c10f2781e75c9721906828c5a16ba93b3cda0", "b5f72f83ea6aa952a9dd0a9465c03537828d3dfd", "a28c490a42314b4358f954b16b089f51ec6749cd", "367ed848a7c74b1750f878b7ca8f2a156b61f01f", "9483566bb9507d3cc72973547c319f6fc9cb0dd4" ], "paperAbstract": "Millimeter-wave devices must use highly directional antennas to achieve GBit/s data rates over reasonable distances due to the high path loss. As a consequence, it is important to precisely align the antenna beams between sender and receiver. Even minor movement or rotation of a device can result in beam misalignment and thus a strong performance degradation. Existing work as well as standards such as IEEE 802.11ad tackle this issue by means of antenna sector probing. This comes at the expense of a significant overhead, which may significantly reduce the performance of millimeter-wave communication, particularly in mobile scenarios. In this paper, we present a mechanism that can track both movement and rotation of 60 GHz mobile devices with zero overhead. To this end, we transmit part of the preamble of each packet using a multi-lobe beampattern. Our approach does not require any additional control messages and is backward compatible with 802.11ad. We implement our scheme on a 60 GHz testbed using phased antenna arrays, and show that we reduce the angle error to less than 5° in most cases. We also perform simulations to validate our approach in a wide range of scenarios, achieving up to 2x throughput gain.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143395" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d41d66249c30edce902dd0ef937fdbeeab32e750", "sources": [ "DBLP" ], "title": "Zero Overhead Device Tracking in 60 GHz Wireless Networks using Multi-Lobe Beam Patterns", "venue": "CoNEXT", "year": 2017 }, "d42b7b536bdd0db06616f3ea7ec95323a7c6d615": { "authors": [ { "ids": [ "3583740" ], "name": "Daniel Lustig" }, { "ids": [ "2969643" ], "name": "Andrew Wright" }, { "ids": [ "1773565" ], "name": "Alexandros Papakonstantinou" }, { "ids": [ "1978863" ], "name": "Olivier Giroux" } ], "doi": "10.1145/3037697.3037723", "doiUrl": "https://doi.org/10.1145/3037697.3037723", "entities": [ "Black box", "Black-box testing", "Cognitive dimensions of notations", "Compiler", "Consistency model", "Corner case", "Eisenstein's criterion", "Litmus", "Mathematical model", "Memory architecture", "Memory model (programming)", "Modeling language", "Observable", "Programming language", "Programming model", "Routh\u2013Hurwitz stability criterion", "Shared memory", "Software testing", "Test suite" ], "id": "d42b7b536bdd0db06616f3ea7ec95323a7c6d615", "inCitations": [ "9c79e22df657e92d6d895ac424815ea750e6dc0c", "5060b771ee52f9d6c1115b24865401a6f3df4068", "3cdb85ba7223ec85c99f7f1f4c345d0c03432b4b", "60ea4cdd9ceb049c21a26616743e07cca3ada0d3", "355734971cd9679aafd97fe4e2372fbc51260385" ], "journalName": "", "journalPages": "661-675", "journalVolume": "", "outCitations": [ "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "5eef609f21fc9327e551ab40425f7f1715c3e200", "0c25fda6287a535c4d8dec25a3d97a5cb89fa765", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "5ae07f575fb2feb1e03d08af9fe29fafe0a306d7", "3d802a3254a1a532f080bc8e713d970ea8796db5", "3eae0271717f6b4d65024abf04e5d98aef41d748", "de47178b1458510dc12e0595cc9fda383effc998", "0249e70c7cf656852a54ef4915e75c55eb5a3abb", "0a89fafea6184b469511ba73735d451da92c18fa", "560af2405c19ec25a224a95ddb55d5e34582fff3", "f0ca54ebf208c7ef592b2ccf4e8961ec5524633c", "3c142ad4ca5ed211a606450801d54b3b30d687e9", "5d8223b9caf90736f4ca75750290a1a25f66b7a8", "7b93d3e42a7498e4de67a76b8f6861875fa74d79", "370d546ab1ce3988194cbf835ee09e73e3733b41", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "3a66a682ee36cde0738824b152a51df2ccbb80fd", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "290776c417c5737f14f348a568f1e1bee1ee2859", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "62bd72d7a4160bd1a35191c51137d11cfbe30cf7" ], "paperAbstract": "The memory consistency model is a fundamental part of any shared memory architecture or programming model. Modern weak memory models are notoriously difficult to define and to implement correctly. Most real-world programming languages, compilers, and (micro)architectures therefore rely heavily on black-box testing methodologies. The success of such techniques requires that the suite of litmus tests used to perform the testing be comprehensive--it should ideally stress all obscure corner cases of the model and of its implementation. Most litmus test suites today are generated from some combination of manual effort and randomization; however, the complex and subtle nature of contemporary memory models means that manual effort is both error-prone and subject to incomplete coverage.\n This paper presents a methodology for synthesizing comprehensive litmus test suites directly from a memory model specification. By construction, these suites contain all tests satisfying a minimality criterion: that no synchronization mechanism in the test can be weakened without causing new behaviors to become observable. We formalize this notion using the Alloy modeling language, and we apply it to a number of existing and newly-proposed memory models. Our results show not only that this synthesis technique can automatically reproduce all manually-generated tests from existing suites, but also that it discovers new tests that are not as well studied.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037723" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d42b7b536bdd0db06616f3ea7ec95323a7c6d615", "sources": [ "DBLP" ], "title": "Automated Synthesis of Comprehensive Memory Model Litmus Test Suites", "venue": "ASPLOS", "year": 2017 }, "d43adde9afd4f1c8da3d88dfcf3170dc520c6529": { "authors": [ { "ids": [ "2075364" ], "name": "WonJun Song" }, { "ids": [ "2649892" ], "name": "Gwangsun Kim" }, { "ids": [ "18932772" ], "name": "Hyungjoon Jung" }, { "ids": [ "10013992" ], "name": "Jongwook Chung" }, { "ids": [ "2575874" ], "name": "Jung Ho Ahn" }, { "ids": [ "3091593" ], "name": "Jae W. Lee" }, { "ids": [ "1747205" ], "name": "John Kim" } ], "doi": "10.1145/3037697.3037753", "doiUrl": "https://doi.org/10.1145/3037697.3037753", "entities": [ "CPU cache", "Cache (computing)", "Cloud computing", "Computer data storage", "EarthBound", "Fairness measure", "HyperTransport", "Intel QuickPath Interconnect", "Non-uniform memory access", "Router (computing)", "Server (computing)", "Simulation", "Supercomputer", "Synthetic data", "Uniform memory access", "Virtual machine" ], "id": "d43adde9afd4f1c8da3d88dfcf3170dc520c6529", "inCitations": [], "journalName": "", "journalPages": "765-777", "journalVolume": "", "outCitations": [ "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "575984915952de4383679b0c54029f2a01ab002e", "2771787fa43c087058eb6b5817541d7ac61919b2", "a27377f78e195ba4b26d910b431128b556a5d3cf", "de279ffa1e749c437ecd1241433e753b45e1c154", "1d750935db43da33cd31a8911e00ea29096a03bc", "03e216973f65fcee6e4e761592c3817386faa052", "067c7857753e21e7317b556c86e30be60aa7cac0", "05c56f4abc527fbf384ad011dc9c0a613955641a", "6c61473130ccb2009717a28962096d146fbde038", "294ad206a120a519cfd99294c8b5e004dcc06abf", "fce292b76d9bd425711f7195e4a1234777db2cff", "41b24c890ae0ef99ff031c9c8549375af6025fb6", "a4f8bc8cb6de7b6afd8c7a427d041932b8a1ca32", "03771342b2ec13944bc7f34c65af9a2d843bc428", "a160dcc79380fed1ea808b11d81179dc3b85a07f", "24afeaa52e343f000e37c56b24b6da25668831ab", "5d56f125b2ff129a1113775c74bf0d7439925389", "3e7fd5ac3fc1ab19c985c97a0614e4109fa91583", "3d2860fa03061010dde95d2f5e76b04fa7bd0ea4", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "96454c681c9113870f8305426b619d48b5bd7bb7", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "2bc37c57da973aa53c41e9ebb6e1407cd8e3e13b", "373b88e34295875fdab7f6cdee1438edbd0571cb", "3ef44315caebb6169b7ffbf9886cd782193ab40e", "6813f13990e0553c7cadf2e0a3ffab217bc4e396", "3ce662e1663456ce2a5b5d240112721c0d0a4582", "3ef3a7e991ea82ae905c6336bb7d610db3a83050", "50de0f6a952131dfe562c5b3836e5d934b39b939", "a9d590711b56bb2e66f98814802f1429c20ee863", "bbebaeb1b00fdcde2aeb192543835d3ec1518f8d" ], "paperAbstract": "NUMA (non-uniform memory access) servers are commonly used in high-performance computing and datacenters. Within each server, a processor-interconnect (e.g., Intel QPI, AMD HyperTransport) is used to communicate between the different sockets or nodes. In this work, we explore the impact of the processor-interconnect on overall performance -- in particular, the performance un- fairness caused by processor-interconnect arbitration. It is well known that locally-fair arbitration does not guarantee globally-fair bandwidth sharing as closer nodes receive more bandwidth in a multi-hop network. However, this work demonstrates that the opposite can occur in a commodity NUMA server where remote nodes receive higher bandwidth (and perform better). We analyze this problem and iden- tify that this occurs because of external concentration used in router micro-architectures for processor-interconnects without globally-aware arbitration. While accessing remote memory can occur in any NUMA system, performance un- fairness (or performance variation) is more critical in cloud computing and virtual machines with shared resources. We demonstrate how this unfairness creates significant performance variation when a workload is executed on the Xen virtualization platform. We then provide analysis using synthetic workloads to better understand the source of unfair- ness and eliminate the impact of other shared resources, including the shared last-level cache and main memory. To provide fairness, we propose a novel, history-based arbitration that tracks the history of arbitration grants made in the previous history window. A weighted arbitration is done based on the history to provide global fairness. Through simulations, we show our proposed history-based arbitration can provide global fairness and minimize the processor- interconnect performance unfairness at low cost.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037753" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d43adde9afd4f1c8da3d88dfcf3170dc520c6529", "sources": [ "DBLP" ], "title": "History-Based Arbitration for Fairness in Processor-Interconnect of NUMA Servers", "venue": "ASPLOS", "year": 2017 }, "d4a715e205f608bd9689adc007c53bd749ad787e": { "authors": [ { "ids": [ "14295340" ], "name": "Francesco Di Giacomo" }, { "ids": [ "9174650" ], "name": "Mohamed Abbadi" }, { "ids": [ "1701110" ], "name": "Agostino Cortesi" }, { "ids": [ "1746390" ], "name": "Pieter Spronck" }, { "ids": [ "1801872" ], "name": "Giuseppe Maggiore" } ], "doi": "10.1145/3136014.3136015", "doiUrl": "https://doi.org/10.1145/3136014.3136015", "entities": [ "Compiler", "Data structure", "Domain-specific language", "Operational semantics", "Time complexity", "Type system" ], "id": "d4a715e205f608bd9689adc007c53bd749ad787e", "inCitations": [], "journalName": "", "journalPages": "232-243", "journalVolume": "", "outCitations": [ "f19deba5dbf9119fe58ca88ab4956f5ccfe995e8", "0d84bed55ebd61bb90e8fe752803ee9ae3d826bf", "8de0423aadeecd1d404354fcd2876431b78d8ff8", "585bf1de4e1c46b66691a29f065ee7b2425d38d0", "0d8ed24e0bc0ac0de825747cf582a0df146d05a7", "665a06f0e8dd227f1f5853f940e1aa0c5daacdf3", "ee4b62cfd6063393f944cd0d9b1137aadf9eb88d", "1da0b10ba41a613f76843e22b332fc019aa4ff9e", "410efc4370a6281c7166a99e5e6d382f09de9414", "b291f0f0be0c3048e5dc98647769d554589d617a", "43ef8552b8ef0fbc49e68dc56636cc7701b56485", "8181324107d74e57a2b93a597d8ae90003b8828b", "a55fbed8e2961649a16966ba396769056815beab", "d5b2a268d56337de54f10db0167db214debdc467", "3b10acfff57990d34b5c511fce559f7c472806b2" ], "paperAbstract": "Domain-Specific Languages (DSL's) offer language-level abstractions that General-Purpose Languages do not offer, thus speeding up the implementation of the solution of problems within a specific domain. Developers have the choice of developing a DSL by building an interpreter/compiler for it, which is a hard and time-consuming task, or embedding it in a host language, thus speeding up the development process but losing several advantages that having a dedicated compiler might bring. In this work we present a meta-compiler called Metacasanova, whose meta-language is based on operational semantics. Then, we propose a language extension with functors and modules that allows to embed the type system of a language definition inside the meta-type system of Metacasanova and improves the performance of manipulating data structures at run-time. Our results show that Metacasanova dramatically reduces the code lines required to develop a compiler, and that the running time of the Meta-program is improved by embedding the host language type system in the meta-type system with the use of functors in the meta-language.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136015" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d4a715e205f608bd9689adc007c53bd749ad787e", "sources": [ "DBLP" ], "title": "Metacasanova: an optimized meta-compiler for domain-specific languages", "venue": "SLE", "year": 2017 }, "d4d5a5baab3c0418447566724a4fd16c96e53517": { "authors": [ { "ids": [ "2851729" ], "name": "Yatin A. Manerkar" }, { "ids": [ "3583740" ], "name": "Daniel Lustig" }, { "ids": [ "1708269" ], "name": "Margaret Martonosi" }, { "ids": [ "1790200" ], "name": "Michael Pellauer" } ], "doi": "10.1145/3123939.3124536", "doiUrl": "https://doi.org/10.1145/3123939.3124536", "entities": [ "Consistency model", "High- and low-level", "Litmus", "Microarchitecture", "Multi-chip module", "Multi-core processor", "Parallel computing", "RISC-V", "Sequential consistency", "SystemVerilog", "Verification and validation" ], "id": "d4d5a5baab3c0418447566724a4fd16c96e53517", "inCitations": [ "9c79e22df657e92d6d895ac424815ea750e6dc0c" ], "journalName": "", "journalPages": "463-476", "journalVolume": "", "outCitations": [ "4292384b0b798feea238c7f0437d88476e342771", "101bcfb23ad622fa5fed78ca627f0ef3fc8e5624", "53549635de2fbfaaabf4ff24a910797cc3499d9b", "da53802ae4fc40ee354ad6fceefef6fb1eb66050", "13210f969b8b4d1d12d63a9a8361028a6be498bc", "47b8b9cd7b064619e6bf25cde95e7af4bf5bc197", "509edf1451ecd631e17d1f2ef84feea94b163ed3", "3415d0e437f2ecddee7a8e2efa9010d22c211a68", "3410896903f04ef90ed253ef98032be22de15809", "33dcafd805a3b44fd64270028633032ff0bb6fac", "00c3b08c4e1dbfa080b6d3c422fa0da0131a743c", "7b93d3e42a7498e4de67a76b8f6861875fa74d79", "5d8223b9caf90736f4ca75750290a1a25f66b7a8", "27b94d947c4b094f482e9689412e1f753b52a62f", "60ea4cdd9ceb049c21a26616743e07cca3ada0d3", "0f0046ae34181e08594ad9be7b5bfffdbaeda177", "9f7cbfb0f4f384d43220ecc2feaba1ac18030412", "26ba6ff831c6cd84c6ad380b74ffde5811e80543", "30e913c6bd6fd0bee790b6237d4c7a958a779d29", "40b4153ff9f9e8125db7e74a1cc5748ee81bd317", "78ef558e04209af5c1243c640f6aa71e5b211bf3", "34d2db88f259d69022e7492225301ffd6e0f55c0", "e2cd72273908651ea11f9cb45d0dd5d755ca3bd0", "20b704c8b63d7096691248b4bb492f4f42bed8d6", "4d1e3d20531b7118c50b137715b69926d990d7c6", "1aac5c5e6dda36e455bed8af80dd1fd6bb31321e", "1e557022b21ced558596fd37c26ce6a006e08bc8", "05a518c3b1a6f5c15d77f8829368677a263ff15d", "7785bdee6f846fac22ae39bd6255df73dae70b31", "1b9e9bfdc66140d2eb192e4ac8aca9281f0239b8", "de47178b1458510dc12e0595cc9fda383effc998", "15a8bdc24bee6c805d4a6017fe352ae4d99c6bd3" ], "paperAbstract": "Paramount to the viability of a parallel architecture is the correct implementation of its memory consistency model (MCM). Although tools exist for verifying consistency models at several design levels, a problematic verification gap exists between checking an abstract microarchitectural specification of a consistency model and verifying that the actual processor RTL implements it correctly.\n This paper presents RTLCheck, a methodology and tool for narrowing the microarchitecture/RTL MCM verification gap. Given a set of microarchitectural axioms about MCM behavior, an RTL design, and user-provided mappings to assist in connecting the two, RTLCheck automatically generates the SystemVerilog Assertions (SVA) needed to verify that the implementation satisfies the microarchitectural specification for a given litmus test program. When combined with existing automated MCM verification tools, RTLCheck enables test-based full-stack MCM verification from high-level languages to RTL. We evaluate RTLCheck on a multicore version of the RISC-V V-scale processor, and discover a bug in its memory implementation. Once the bug is fixed, we verify that the multicore V-scale implementation satisfies sequential consistency across 56 litmus tests. The JasperGold property verifier finds complete proofs for 89% of our properties, and can find bounded proofs for the remaining properties.", "pdfUrls": [ "http://www.cs.princeton.edu/~manerkar/papers/MICRO-50_Manerkar.pdf", "http://doi.acm.org/10.1145/3123939.3124536" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d4d5a5baab3c0418447566724a4fd16c96e53517", "sources": [ "DBLP" ], "title": "RTLcheck: verifying the memory consistency of RTL designs", "venue": "MICRO", "year": 2017 }, "d4ff82de141fccda4c2543917299754e9c84a71b": { "authors": [ { "ids": [ "1989470" ], "name": "Varsha Apte" }, { "ids": [ "10405465" ], "name": "T. V. S. Viswanath" }, { "ids": [ "3448900" ], "name": "Devidas Gawali" }, { "ids": [ "10444850" ], "name": "Akhilesh Kommireddy" }, { "ids": [ "1696026" ], "name": "Anshul Gupta" } ], "doi": "10.1145/3030207.3030222", "doiUrl": "https://doi.org/10.1145/3030207.3030222", "entities": [ "Hall effect", "Heuristic", "Instrumentation (computer programming)", "Internet", "Load testing", "Multitier architecture", "Nautical chart", "Profiling (information science)", "Queueing theory", "Response time (technology)", "Run time (program lifecycle phase)", "Server (computing)", "Testbed", "Throughput", "Tier 1 network", "Web application", "Web server" ], "id": "d4ff82de141fccda4c2543917299754e9c84a71b", "inCitations": [], "journalName": "", "journalPages": "115-126", "journalVolume": "", "outCitations": [ "387bb95ad5c7635f646b989fe06fcda46447d933", "81c343a4f02553094397824c715a852c75091a7e", "72c1990b2c992dfb25d48c51c44513c1f99b1baf", "d7b2d876862ad356e979b31ce9dfde4c7b340026", "e59e46303f112fb68e6bf40bfee3c4dc949515b1", "7ae1aa8edbdaad1641f7d3fb55ced8fdd578bd8f", "2498644b120efc708e253b3ae9fbb5abba062d50", "ef9d6ead4070b0aaed56958d2b5fcd07879161b0", "2ff6da15f3f9f8bc529f1e674f3c86909f60df5a", "998d7172611819ae1dddbf1256bee64d2ba5eba4", "b8220a9f9481254314569847ffb2181e5f52bdd0", "0019a2539997a8c38523268b4c03d0316457654d" ], "paperAbstract": "A multi-tier Internet server application needs to be analyzed for its performance before it is released. Performance analysis is usually done by (a) load testing of the application on a testbed and (b) building a performance model of the application. While there are a plethora of Web load-generator tools available, there are two problems with these tools: one, the tests have to be configured manually, which can lead to a time-consuming trial-and-error process until the desired performance charts in the appropriate load ranges are obtained; and two, the load generator tools do not produce output that is directly useful for creating a performance model of the application. In this paper, we present AutoPerf, a load generator tool designed to meet two distinct goals, named capacity analysis and profiling. The goal of capacity analysis is to run a comprehensive load test on a Web application, in an appropriately chosen range, at a minimal number of load levels, while still producing an accurate graph of throughput and response time vs load levels. The goal of profiling is to generate a detailed server resource usage profile per request type, without instrumenting the application code. This data (e.g. CPU execution time by Web server for one request) is crucial for parameterizing performance models of the application. AutoPerf intelligently plans and configures its load tests by using analytical results from queuing theory along with some heuristics. Results show that AutoPerf is able to run performance tests very efficiently while still producing an accurate chart of performance metrics.", "pdfUrls": [ "http://doi.acm.org/10.1145/3030207.3030222" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d4ff82de141fccda4c2543917299754e9c84a71b", "sources": [ "DBLP" ], "title": "AutoPerf: Automated Load Testing and Resource Usage Profiling of Multi-Tier Internet Applications", "venue": "ICPE", "year": 2017 }, "d53b79d49c185dafa2ec3e2b97e82d97fa259845": { "authors": [ { "ids": [ "2395051" ], "name": "Miguel Araujo" }, { "ids": [ "38208295" ], "name": "Pedro Ribeiro" }, { "ids": [ "1702392" ], "name": "Christos Faloutsos" } ], "doi": "", "doiUrl": "", "entities": [ "DBL-Browser", "Data Sources", "Evolving networks", "Hashtag", "Projections and Predictions", "Scalability", "Social network", "Time complexity" ], "id": "d53b79d49c185dafa2ec3e2b97e82d97fa259845", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "2b1ec3fdf5b695de2d7ec17393ec0ad9445ceb61", "5c69418969d8d5a286306f1087108dcceb50d39e", "8eae36cabdce7cba7c1fc316596002cd84ed5e95", "00df20e5bf5d9d645184191a34c43a4108e92723", "0ecc96d6566880b4426e3d8a26638f423d98b8cf", "31e9390e1238de072efbd2ac9c3b44bd21a493f3", "5cde95416c54ee870f9049e2174a653efb106f26", "10ac4bc35c4fcdc09bebbd3b46e3a1223993b894", "4adffe0ebdda59d39e43d42a41e1b6f80164f07e", "53fd3689d600c6705cb843f320a22fd3b94cc46c", "c5b28cae82b14417f1250e58bb241367248e827d", "13dc7205433523de3b1baec4b063f883693d1472", "09849ca4b8159ff69721ebb2f25a81025188937e", "2275762a28582716db92df6d525ed2481c7d7f14", "8451c2812a1476d3e13f2a509139322cc0adb1a2", "125a4546d8e403b17bc29e8e9944944f8af69e72", "00d23e5c06f90bed0c9d4aec22babb2f7488817f", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "1b7944d00948f4a1826763cecd3452ba2da89873", "6e9fd3b1776fdfd81b2fd7b61015edf097d2230b", "8a634a82681897822b14de28849c6548346206a0", "7cd4e19b3eeecf086574969a2cc9d5a4b987275b", "18c8f13b7b77bd0c67ddbdadf21cc3545b443068", "5de09330a31722c5ada4bcefd7cbba6c93bc0a13", "224ee20c67b4dccf664595dc5f9fabcad0defe9a" ], "paperAbstract": "Given an heterogeneous social network, can we forecast its future? Can we predict who will start using a given hashtag on twitter? Can we leverage side information, such as who retweets or follows whom, to improve our membership forecasts? We present TENSORCAST, a novel method that forecasts time-evolving networks more accurately than current state of the art methods by incorporating multiple data sources in coupled tensors. TENSORCAST is (a) scalable, being linearithmic on the number of connections; (b) effective, achieving over 20% improved precision on top-1000 forecasts of community members; (c) general, being applicable to data sources with different structure. We run our method on multiple real-world networks, including DBLP and a Twitter temporal network with over 310 million non-zeros, where we predict the evolution of the activity of the use of political hashtags.", "pdfUrls": [ "http://www.cs.cmu.edu/~christos/PUBLICATIONS/icdm17-tensor-cast-CR.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d53b/79d49c185dafa2ec3e2b97e82d97fa259845.pdf", "s2Url": "https://semanticscholar.org/paper/d53b79d49c185dafa2ec3e2b97e82d97fa259845", "sources": [], "title": "TensorCast: Forecasting with Context using Coupled Tensors", "venue": "", "year": 2017 }, "d55df4e557a56ea969a99ce9f1b3164bd21c0b1d": { "authors": [ { "ids": [ "21242411" ], "name": "Victor Garcia-Flores" }, { "ids": [ "1744495" ], "name": "Eduard Ayguad\u00e9" }, { "ids": [ "24636606" ], "name": "Antonio J. Pe\u00f1a" } ], "doi": "10.1109/ICPP.2017.21", "doiUrl": "https://doi.org/10.1109/ICPP.2017.21", "entities": [ "Baseline (configuration management)", "Computation", "False sharing", "Garbage collection (computer science)", "Graphics processing unit", "Memory management", "Memory organisation", "Network switch", "PCI Express", "Paging", "Run time (program lifecycle phase)" ], "id": "d55df4e557a56ea969a99ce9f1b3164bd21c0b1d", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "121-130", "journalVolume": "", "outCitations": [ "25296a1ee792b8709e037aa3da7ea156c41f5ccf", "334dcae6a4a5a92942b40735aa7a4d5115a9c756", "19098c8b0c0ee1c8cc934e6cad4befb745cfc3e0", "1297d8c1a7e80969fa565e428cef370eb8d85ce0", "417ab9b8b003982222017ef585e19680366609f3", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "8090a0702dae2a90bb614e6ef8de4f049e596233", "a4b0f11334d33de4f41bfeebfc520fb5b034a31e", "bd1e5be32f3f73538fbe3ad716faa11821e1fe42", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "422b119f4dac6f60b4f8ba095895fbdfb49e214e", "4534f846aaa49c7026a08455a74b584c291d95a3", "5cdf290c839ba8753876bf255ed8c99fb4ba1299", "2b9c6afa4c1b4d37824ed979d2152bff05ef65f9", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "3be74a71c59c0e5e925aa84090fc1b1988ea6095", "7e6f33b71ab21b2bda3746d509b58b90bd6dfd1f", "4b7fc4ee34526cb6fb637b412980b3dd191169d3", "90fff23ed70049236d0201ea5a1de964d6779a04", "36de396ee9d1c9991e44c01be35e5206d79c3328", "1154b2fd6fb913b02eb6f64f5287a6b75a506e64", "2d6f002477015469075954c6748a1a85af352c94", "2a660e81e6501ec3489d962fe87448ecf277237f", "c251fc6c99d8b515f3f0844604a21af92cce647f", "10a0ab781e94a75fdcbde819f3f4cddcab768bbd", "35c3882db9e1b2bdf838122787968679595f61de", "7625ff9ed326373a3cf837f5ab668f36212aae01", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "1671a230c17891da8d4ca023e5971cdc452cc779", "04379477b31622586b3a632a5ac528c664f88d7a" ], "paperAbstract": "General-purpose computing on GPUs has become more accessible due to features such as shared virtual memory and demand paging. Unfortunately it comes at a price, and that is performance. Automatic memory management is convenient but suffers from many drawbacks, preventing heterogeneous systems from achieving their full potential. In this work we analyze the challenges and inefficiencies of demand paging in GPUs, in particular on collaborative computations where data migrates multiple times between host and device. We establish that demand paging on GPUs introduces significant overheads for these kind of computations, and identify the issues of false sharing and unnecessary data transfers derived from the granularity at which data is migrated. In order to alleviate these problems we propose a memory organization and dynamic migration scheme to efficiently share data between host and device at fine granularities and without software intervention. We evaluate our design with a set of collaborative heterogeneous benchmarks and find it achieves 15% lower execution times on average with cache line-sized migrations, but severely degrading performance on benchmarks that access large blocks of contiguous memory. Page-sized migrations, although inefficient, provide on average a 47% execution time reduction with our design over a baseline system implementing demand paging. Our results suggest that cache line-sized migrations are not feasible in systems using a PCI-Express interconnect. In order to understand how future interconnect technologies will impact the feasibility of fine-grained migrations, we evaluate our scheme with various link latencies. We find interconnect latencies four to five times lower than PCI-Express are sufficient to effectively share data at finer granularities.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d55df4e557a56ea969a99ce9f1b3164bd21c0b1d", "sources": [ "DBLP" ], "title": "Efficient Data Sharing on Heterogeneous Systems", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "d57a01535daa997083bb22d740237385f1c00cc3": { "authors": [ { "ids": [ "3406720" ], "name": "Adri\u00e1n Castell\u00f3" }, { "ids": [ "2632706" ], "name": "Sangmin Seo" }, { "ids": [ "1714364" ], "name": "Rafael Mayo" }, { "ids": [ "2103230" ], "name": "Pavan Balaji" }, { "ids": [ "1684436" ], "name": "Enrique S. Quintana-Ort\u00ed" }, { "ids": [ "24636606" ], "name": "Antonio J. Pe\u00f1a" } ], "doi": "10.1007/978-3-319-64203-1_34", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_34", "entities": [ "Application programming interface", "Light-weight process" ], "id": "d57a01535daa997083bb22d740237385f1c00cc3", "inCitations": [ "c75e0c9f473d7817a581572742746330fa5aac8d" ], "journalName": "", "journalPages": "470-481", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.bsc.es/ca/printpdf/research-and-development/publications/glt-unified-api-lightweight-thread-libraries-1", "https://doi.org/10.1007/978-3-319-64203-1_34" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d57a/01535daa997083bb22d740237385f1c00cc3.pdf", "s2Url": "https://semanticscholar.org/paper/d57a01535daa997083bb22d740237385f1c00cc3", "sources": [ "DBLP" ], "title": "GLT: A Unified API for Lightweight Thread Libraries", "venue": "Euro-Par", "year": 2017 }, "d590c5194ee0fd7be259e17b716d0e811546dac6": { "authors": [ { "ids": [ "1704327" ], "name": "Riccardo Guidotti" }, { "ids": [ "2296962" ], "name": "Anna Monreale" }, { "ids": [ "1717192" ], "name": "Mirco Nanni" }, { "ids": [ "1685102" ], "name": "Fosca Giannotti" }, { "ids": [ "1693341" ], "name": "Dino Pedreschi" } ], "doi": "10.1145/3097983.3098034", "doiUrl": "https://doi.org/10.1145/3097983.3098034", "entities": [ "Algorithm", "Cluster analysis", "Database transaction", "Dynamic data", "Experiment", "Personalization", "Pervasive informatics", "Synthetic data" ], "id": "d590c5194ee0fd7be259e17b716d0e811546dac6", "inCitations": [], "journalName": "", "journalPages": "195-204", "journalVolume": "", "outCitations": [ "09edbe78da07c3ca646129673e895fe19d55f4d1", "0d9ed4163b0dacc2374320c3fac075d76c8cd5d5", "089b89477bfd14f28269211fb96176000d35e4ed", "e3ea222947268b3ceefdf1dfdb7570384c97a1b5", "474f2533aab6f20c09d682767e35be06b806eba1", "35c15d7dfa1fddd2c0292146412ebbec704e8be9", "0404bd58e5f1edbd288cd69fcbc224485af415bf", "40c143569ee82fe68fbbcdc0e2f31cbfa59d6381", "a32efce7ec99dff817bd7d3b404a6ed73541dc8f", "ae94f7b286361952308c5dc686808ffb15659cc8", "354d4c776df3851829285cfdeee995f2cb84abae", "b298e06b9ee4b3056c68a023035f228527a891a2", "348e3623d609f3e575148bfba38d167592c7606b", "1f2f630d605663454a5d74d8cf6267982c547480", "1a98ee8fe0e2dda77c066c5612dddc6ae9d10ce2", "3cb884ae32eb68b93ffd98f45404c4ecb208763b", "85dec08cd4daa177a71d0dd467f38c8d5f3de306", "b66172bf5b9f187efe3ba603a7eddd366a850eee", "d2e4ce43617c41208f889fe7a06294f61e7c5cfa", "1955c6801bca5e95a44e70ce14180f00fd3e55b8", "cd2afe72319956535e3cd5147314d5a93d681a45", "1c7df8d792c662b403cac0622faa4f98e3257638", "48373cee0cad67c854b77e672a2e33ce769b336f", "ba31b62e59b0a36c24850751f146cfc4cc3fc996", "756e68ccc5071881a2675e4f7d741541590d5630" ], "paperAbstract": "Mining a large number of datasets recording human activities for making sense of individual data is the key enabler of a new wave of personalized knowledge-based services. In this paper we focus on the problem of clustering individual transactional data for a large mass of users. Transactional data is a very pervasive kind of information that is collected by several services, often involving huge pools of users. We propose txmeans, a parameter-free clustering algorithm able to efficiently partitioning transactional data in a completely automatic way. Txmeans is designed for the case where clustering must be applied on a massive number of different datasets, for instance when a large set of users need to be analyzed individually and each of them has generated a long history of transactions. A deep experimentation on both real and synthetic datasets shows the practical effectiveness of txmeans for the mass clustering of different personal datasets, and suggests that txmeans outperforms existing methods in terms of quality and efficiency. Finally, we present a personal cart assistant application based on txmeans", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098034" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d590c5194ee0fd7be259e17b716d0e811546dac6", "sources": [ "DBLP" ], "title": "Clustering Individual Transactional Data for Masses of Users", "venue": "KDD", "year": 2017 }, "d5974504d7dadca9aa78df800a924f2ac18f24d6": { "authors": [ { "ids": [ "26439192" ], "name": "Jaeseong Im" }, { "ids": [ "14834319" ], "name": "Jongyul Kim" }, { "ids": [ "5052343" ], "name": "Jonguk Kim" }, { "ids": [ "2789001" ], "name": "Seongwook Jin" }, { "ids": [ "1728068" ], "name": "Seung Ryoul Maeng" } ], "doi": "10.1145/3127479.3129254", "doiUrl": "https://doi.org/10.1145/3127479.3129254", "entities": [ "Ableton Live", "Adobe Creative Cloud", "Bare machine", "Cloud computing", "Hypervisor", "Vulnerability (computing)", "X86" ], "id": "d5974504d7dadca9aa78df800a924f2ac18f24d6", "inCitations": [], "journalName": "", "journalPages": "378-389", "journalVolume": "", "outCitations": [ "5af5aa924c170d30e9203801e97ebe347700c3a9", "af855ba2f503323eb58a98ef3a462dbecb036525", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "2d5f651bbc406e3af967aeb109320552d6f6071a", "0bc07d7e921b9065895fcc7c5e09983da119f3ea", "cb95bf2bce03c539c032b73d4364582e71f7f9e8", "04e9d7b1544ec76e3e5c24b46ccae5d5096b638b", "46b9d88a665a94f7bd0fd88d4d99ca71891ad182", "1e34138c955a13b3e8b369aec55e51ab4ba1ac11", "faf5511cc94530fde70eca3a70e7f795c92e4424", "924d3b9576ae2e9f51dc3ca07e2c0eb5782938d9", "07aca048b6dbc583fed7434890a213b68dd4e0f1", "b69382e95bbf9bd9f141bbb7e0d9ab2bd8353e2b", "136cf1976d2c91760e9ca766902cd1afefadcf01", "95daf16ac78c9c1d02974a57ed3941615949e528", "99948278735eeb0685c75f06d26821e1836949a1", "2593ebc83d22e846e2ba314c77f96a32bb7b2ef9" ], "paperAbstract": "The level of demand for bare-metal cloud services has increased rapidly because such services are cost-effective for several types of workloads, and some cloud clients prefer a single-tenant environment due to the lower security vulnerability of such enviornments. However, as the bare-metal cloud does not utilize a virtualization layer, it cannot use live migration. Thus, there is a lack of manageability with the bare-metal cloud. Live migration support can improve the manageability of bare-metal cloud services significantly.\n This paper suggests an on-demand virtualization technique to improve the manageability of bare-metal cloud services. A thin virtualization layer is inserted into the bare-metal cloud when live migration is requested. After the completion of the live migration process, the thin virtualization layer is removed from the host. We modified BitVisor [19] to implement on-demand virtualization and live migration on the x86 architecture.\n The elapsed time of on-demand virtualization was negligible. It takes about 20 ms to insert the virtualization layer and 30 ms to remove the one. After removing the virtualization layer, the host machine works with bare-metal performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3129254" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d5974504d7dadca9aa78df800a924f2ac18f24d6", "sources": [ "DBLP" ], "title": "On-demand virtualization for live migration in bare metal cloud", "venue": "SoCC", "year": 2017 }, "d59f05e0330879d39b285196e228af78dd94a1e9": { "authors": [ { "ids": [ "2746219" ], "name": "Anirudh Santhiar" }, { "ids": [ "2594759" ], "name": "Aditya Kanade" } ], "doi": "10.1145/3062341.3062361", "doiUrl": "https://doi.org/10.1145/3062341.3062361", "entities": [ "Asynchronous I/O", "Asynchronous serial communication", "Blocking (computing)", "Continuation", "Control flow", "Deadlock", "Library (computing)", "Non-blocking algorithm", "Parallel computing", "Scheduling (computing)", "Static program analysis" ], "id": "d59f05e0330879d39b285196e228af78dd94a1e9", "inCitations": [], "journalName": "", "journalPages": "292-305", "journalVolume": "", "outCitations": [ "e9526c64d991837c9c985c044212a5a0bc636860", "22a713d92a7f2a79f22c71e66b2511937b2a1a8f", "0900d18f28dfce5fc51667a25fd31e8a5402f862", "6f50453deeadb398c17472b000dfbf8fc57e10e0", "3a2750c84a151656ba81380a57a8c21241f4b880", "cb1c8e5ff90ecffd438c446595d0056d613669c7", "48a7dcd186ecdf7a391edf3211f202e7c5aacbbc", "ef57a11760e265d028ac095e4d32e9f66dfbbe6b", "b25836d60f8598f823dc245b65f5b8653dad81f6", "275f9372090dde36e9c7214aec71c175f31ac964", "1546de94875ecd17eb1592c5ef69a98f41835c3c", "104f72bad42832670403221625cf1c58fdc5d7a1", "11fc76a6283d2a763203cd52a6e207361f534c58", "5069f6267707df50e3578afaa8dfa9c15f3c3b07", "23051fcf6dcd6ad487351490ac18d8e500aef7a6", "197fb71fc7b78b9190c75ef017be612615522796", "3fa0947cf9f4ee071be15f8d3ea9090ea4ea45aa", "385742fffcf113656f0d3cf6c06ef95cb8439dc6", "995816c683740033b910a7cd8cc4724e8fed6eb7", "82b76ebc941a9aefb8b90a5e8ae225d9ad49c413", "68c8ae4b90e151526fe616ecdad4728c8014054e", "0cd328c7e014bb0326b4a7744df5d267ebf040cb", "35f53e1071db2b40146ea4bc9029dfbd82993f16", "ea94e3130aae79601c794d7ad63c15286d97db8c", "373f25f373c9c4b769a189c84df6bd955b434dc4", "df14ca40c73e4ae1e9b7142b6d770fe419afb75c", "535a96e92fa5d32ab66a23ff6aefd6f3ee40350c", "8eae091c38ee4cf75b521c0cb52f4785756ce39e", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "0b5b22d02e6b02aa6090aef96943b2e421af30d0", "16a04050353b741974c7d0448e8b0149831bfdc0", "11e54809e8d52e3eed00bc9ef255fe6c7ecaf2f7", "4e6b47e020fe1d76288fc4821d2c735f53f7b928", "068e9f8dd77ecfcc2019fdf3123d163b159fe4eb", "64d2850e88a7bb5be698ee4e54f08f8a231edfcc", "3066557b1206eaab284dca4650d0f5fd0febac3f", "d0d5a715b58ac70c6f462eb448448fa87ed3c011", "67c2588c22bd4177f14f864a2091b6fbff34cfdd", "295449ab92654e2b51a6541522ca8b01d8a71e79", "28550d1c42aa18745b695c28a3fa360bc96e1534", "81f8dbb6af4cdb647ecf3059f55ea3ba9ea8e69e" ], "paperAbstract": "Asynchronous programming is a standard approach for designing responsive applications. Modern languages such as C# provide async/await primitives for the disciplined use of asynchrony. In spite of this, programs can deadlock because of incorrect use of blocking operations along with non-blocking (asynchronous) operations. While developers are aware of this problem, there is no automated technique to detect deadlocks in asynchronous programs. \n We present a novel representation of control flow and scheduling of asynchronous programs, called continuation scheduling graph and formulate necessary conditions for a deadlock to occur in a program. We design static analyses to construct continuation scheduling graphs of asynchronous C# programs and to identify deadlocks in them. \n We have implemented the static analyses in a tool called DeadWait. Using DeadWait, we found 43 previously unknown deadlocks in 11 asynchronous C# libraries. We reported the deadlocks to the library developers. They have confirmed and fixed 40 of them.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062361" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d59f05e0330879d39b285196e228af78dd94a1e9", "sources": [ "DBLP" ], "title": "Static deadlock detection for asynchronous C# programs", "venue": "PLDI", "year": 2017 }, "d5e72eb9d5eb35303b0885612f74a0b230355b35": { "authors": [ { "ids": [ "1875835" ], "name": "Fabien Hermenier" }, { "ids": [ "1727824" ], "name": "Ludovic Henrio" } ], "doi": "10.1145/3127479.3128608", "doiUrl": "https://doi.org/10.1145/3127479.3128608", "entities": [ "Cloud computing", "Digital subscriber line", "Scheduling (computing)", "Service-level agreement", "Software bug", "Test case", "Unit testing", "Virtual machine" ], "id": "d5e72eb9d5eb35303b0885612f74a0b230355b35", "inCitations": [], "journalName": "", "journalPages": "15-26", "journalVolume": "", "outCitations": [ "34c432c3a4a068e64eb34bb41c4e3e0f3762363c", "2f2cdd7b0c98b5e43b61272d2ac3ebb5cd29041d", "bf1fd1427473539242811b8641577cf792249172", "1e6cd21fee09c029447b597d6146a229a9d1377b", "7f51f3926f74b9bb6f9b69939027f339b4ecefa2", "8eaaf9e6a63b8d7562d27ec73aca36931815b83a", "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "086820e40dc8046c30a8751394df167bec047fe1", "2a0d9931c1e794ffe96c4b6c37507e96a3c4cafc", "ab2febd96c54eebfe871891d59fdf65421170553", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "725fee23093174b2d0bfd05284c015d56d1b9774", "0b93657965e506dfbd56fbc1c1d4b9666b1d01c8", "45ac0e85b3ff21bc12a7147df167be38f0d24b9c", "2859e75f421621ff776d55e533dc6ee7cb4b0a92" ], "paperAbstract": "In an Infrastructure As A Service (IaaS) cloud, the scheduler deploys VMs to servers according to service level objectives (SLOs). Clients and service providers must both trust the infrastructure. In particular they must be sure that the VM scheduler takes decisions that are consistent with its advertised behaviour. The difficulties to master every theoretical and practical aspects of a VM scheduler implementation leads however to faulty behaviours that break SLOs and reduce the provider revenues.\n We present SafePlace, a specification and testing framework that exhibits inconsistencies in VM schedulers. SafePlace mixes a DSL to formalise scheduling decisions with fuzz testing to generate a large spectrum of test cases and automatically report implementation faults.\n We evaluate SafePlace on the VM scheduler BtrPlace. Without any code modification, SafePlace allows to write test campaigns that are 3.83 times smaller than BtrPlace unit tests. SafePlace performs 200 tests per second, exhibited new non-trivial bugs, and outperforms the BtrPlace runtime assertion system.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3128608" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d5e72eb9d5eb35303b0885612f74a0b230355b35", "sources": [ "DBLP" ], "title": "Trustable virtual machine scheduling in a cloud", "venue": "SoCC", "year": 2017 }, "d5fef143400731d174f97052b45be908b995ee40": { "authors": [ { "ids": [ "8600116" ], "name": "Seung-Hwan Lim" }, { "ids": [ "34397071" ], "name": "Hyogi Sim" }, { "ids": [ "9441826" ], "name": "Raghul Gunasekaran" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" } ], "doi": "10.1145/3126908.3126924", "doiUrl": "https://doi.org/10.1145/3126908.3126924", "entities": [ "Petascale computing", "Requirement", "Supercomputer" ], "id": "d5fef143400731d174f97052b45be908b995ee40", "inCitations": [ "9a397280f7e809008ebe027b0d53e0a8701933d3" ], "journalName": "", "journalPages": "46:1-46:12", "journalVolume": "", "outCitations": [ "0a564c5117375287c60d3a27a96003f30396f62f", "483b2f4c7dbc72f7969b60cff0984f2062f02956", "14f03cc21d8eb6a4b6498b46e9780d60784356ee", "884fc7d1c8353a6ca2f0830a9f0f840a985afa7e", "bda863de316ddce8d36f98a4a5729a91192ef290", "3c03e217aeaf6734b5471d5f8930436e009d60af", "939b7366cee1616448ef7836da6fbcdd78644fa5", "05227501b3727de9b117907ecd77b0bff694869f", "49ccab36838d17adf8d87c157ff749da11b1d042", "e00b6d9ac717fc6539dffbb2032b7237c1e78a1e", "2fab90b0da9aaaf3825fb8ef70efff9abe7bf57f", "299506c535c7b43c2c68aa975128ac8c73557714", "3255eb881586f461e6029d1b66853d6ec6e9f862", "33ea4a2e274da902f05f89f83e1c83a77b76d141", "1cbaf27b55717e503284cfe339438c98da3a9867", "148527b97d18aa1d9e2e772f88752a616a7647ad", "0a368cdcb14046f6371f77f65d900d52c4da5486", "141e35263ab810983c90d47ad62eb4fab5e51717", "0b2838f760c139533ada9884e2cf55b431787a6a", "92e536c1789bf301f456b01590006c9a3eff6cd8", "988d1a223e2ee40f2474f729ac3ac53e012d8337", "30f2bfc8f764d0bcb65c0d22a7e206bf6d73f1d8", "7315a2c488db48da14af8de34a25ebd33d99984e", "9edab79d681bae0071aa784328b0ce134d909c10", "05dd4149b422b56cc037cd35eb041496b656d671", "c76e5f49cbe87de17a94cb0526cadf7d68fa5d3f", "002efcf9f0b58af153556b84395a37f6171195da" ], "paperAbstract": "The Oak Ridge Leadership Computing Facility (OLCF) runs the No. 4 supercomputer in the world, supported by a petascale file system, to facilitate scientific discovery. In this paper, using the daily file system metadata snapshots collected over 500 days, we have studied the behavioral trends of 1, 362 active users and 380 projects across 35 science domains. In particular, we have analyzed both individual and collective behavior of users and projects, highlighting needs from individual communities and the overall requirements to operate the file system. We have analyzed the metadata across three dimensions, namely (i) the projects' file generation and usage trends, using quantitative file system-centric metrics, (ii) scientific user behavior on the file system, and (iii) the data sharing trends of users and projects. To the best of our knowledge, our work is the first of its kind to provide comprehensive insights on user behavior from multiple science domains through metadata analysis of a large-scale shared file system. We envision that this OLCF case study will provide valuable insights for the design, operation, and management of storage systems at scale, and also encourage other HPC centers to undertake similar such efforts.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126924" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d5fef143400731d174f97052b45be908b995ee40", "sources": [ "DBLP" ], "title": "Scientific user behavior and data-sharing trends in a petascale file system", "venue": "SC", "year": 2017 }, "d65c2dcfcebe3388bca2539b2f0452653ef1766b": { "authors": [ { "ids": [ "40518823" ], "name": "Haoyi Xiong" }, { "ids": [ "1707763" ], "name": "Wei Cheng" }, { "ids": [ "40506516" ], "name": "Wenqing Hu" }, { "ids": [ "1798025" ], "name": "Jiang Bian" }, { "ids": [ "33389408" ], "name": "Zhishan Guo" } ], "doi": "10.1109/ICDM.2017.62", "doiUrl": "https://doi.org/10.1109/ICDM.2017.62", "entities": [ "Algorithm", "Bayesian inference in phylogeny", "Dimensionality reduction", "Discriminant", "Estimation theory", "Experiment", "Linear classifier", "Linear discriminant analysis", "Nonlinear system", "Well-posed problem" ], "id": "d65c2dcfcebe3388bca2539b2f0452653ef1766b", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "525-534", "journalVolume": "", "outCitations": [ "8af33a4ec2b65b1ea70a19b2dbac991f57483918", "c4e02832c83fb2202f48bd591bedcb73c28b9f6a", "12656ee00e25be80b5aa5673a2b15c13fdc99097", "47fb1844540564131e3205b0ebc569ea657c5fe5", "9a9edf09e4151b3627f078c3cba540269c488c6b", "373825cbfd2f5ff6dafc070cd4e047a6469b8af6", "00cfa4e79925e1b85b32213e84afebb6666cbf55", "6b515eb9a47b035b83b4a639c185ef1a44daf405", "761e2d3c190682896052300fd90cc9c37909ccae", "444017cf790d05f3e7027112ab981cc299e802b3", "976e4369ef41c5206b673b2c71005389eb286c81", "32439bc74650b4915d61c94877fd70ec561173c4", "d2b2f97d26f24a7df20594925d512abfc403a80e", "793651f4cf210bd81922d173346b037d66f2b4a4", "af1ee4ef5b888ac6f22fea676c6de0bbc69f1323", "d1fa8485ad749d51e7470d801bc1931706597601", "0643f8328b63b0836204100b8b7eda1fcecbdc6e", "3236bbc0aa0eadc83dddccc99dfbc6dc407e3aee", "474d29cdaa1fac60c23fc63901c2fcfbe0b0b12c", "89cf3d45506765be4f18ea60a97116f129122fe7", "537da02c5f0054bf5770625753399f9ec9eed4c8", "75528f7fe90b380b9b883aa707bb7a248b0fe6f0", "193a03ea0f24c38e648fb8628bbd7571a908ab1c", "01837e13f650207ec8fba2f19ce0fc124404934c", "3608bf231bdcafbe20bfd5230a576884128fd685", "7cc528bda7296d89f032aa634e0aef7670928a31", "bd6099429bb7bf248b1fd6a1739e744512660d55", "37ba6685c14e60399f4f7e785191da0596486a55", "d1429e7b6440e1a40cc8e6e3ca12151053b8a4f4", "b11d4d5b2fc5aa4d09edf699684202c2d7052a7b", "49e0b651269cac6ed7693b016932f5790a595151", "4cc8b9dcaec185ebc7ff7a64e6e4cfe04ea22e97" ], "paperAbstract": "Linear Discriminant Analysis (LDA) is widely-used for supervised dimension reduction and linear classification. Classical LDA, however, suffers from the ill-posed estimation problem on data with high dimension and low sample size (HDLSS). To cope with this problem, in this paper, we propose an Adaptive Wishart Discriminant Analysis (AWDA) for classification, that makes predictions in an ensemble way. Comparing to existing approaches, AWDA has two advantages: 1) leveraging theWishart distribution, AWDA ensembles multiple LDA classifiers parameterized by the sampled covariance matrices via a Bayesian Voting Scheme, which theoretically improves the robustness of classification, compared to LDA classifiers using a single (probably ill-posed) covariance matrix estimator; 2) AWDA updates the weights for voting optimally to adapt the local information of each new input data, so as to enable the nonlinear classification. Theoretical analysis indicates that AWDA guarantees a close approximation to the optimal Bayesian inference and thus achieves robust performance on high dimensional data. Extensive experiments on real-world datasets show that our approach outperforms state-of-the-art algorithms by a large margin.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d65c2dcfcebe3388bca2539b2f0452653ef1766b", "sources": [ "DBLP" ], "title": "AWDA: An Adaptive Wishart Discriminant Analysis", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "d67ac197273ef16ac74289a38698c888083846ed": { "authors": [ { "ids": [ "3074547" ], "name": "Qingsong Wei" }, { "ids": [ "40695618" ], "name": "Chundong Wang" }, { "ids": [ "1716895" ], "name": "Cheng Chen" }, { "ids": [ "28297358" ], "name": "Yechao Yang" }, { "ids": [ "1724199" ], "name": "Jun Yang" }, { "ids": [ "2744287" ], "name": "Mingdi Xue" } ], "doi": "10.1145/3126908.3126940", "doiUrl": "https://doi.org/10.1145/3126908.3126940", "entities": [ "Byte", "Byte addressing", "Durability (database systems)", "Flash memory", "Non-volatile memory", "Page cache", "Transactional memory", "Volatile memory" ], "id": "d67ac197273ef16ac74289a38698c888083846ed", "inCitations": [], "journalName": "", "journalPages": "56:1-56:12", "journalVolume": "", "outCitations": [ "24724ad8962a9e04eb496fddaefe9708f6960601", "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "2f2941de162a17eda0ffbf7eef8981ee25808df5", "039124197fac7a16e36611d8beed94524dd5fed5", "2e663c1047ff14ddc2416229459922757a20edfb", "05a1357946de5eca42a477b7b268db4944219a2e", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "4468cbc8a9ad13ebeaa210424e842f158415ab07", "64a9e074eb796cc07f97ab9be41448f338cfc555", "5c79c876d90eb55a081eba7437a7eb366e7c198f", "2e3bda19a2ed88e8a6e5cc415e27da551653ff1d", "03b6a916498fa8591201a2de5f22344609b1e457", "0c732e52164f97eba5124ca25947cb132078ce54", "0204f40221260d00c5ee63646560a40dcd7d97d1", "db97d135ddb8edec2dae6c10a830ac6e44045d94", "0d08856c7806d4693b091e358bae094e5ec6e483", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "78ec0c70c66980cc67d413004e6f88075c0201f8", "d04957ae69caf43707b13fa833e50119724688f1", "ac3b0e97a2004af682019ecc0c856114a3d63462", "aeb52e06d54017e2a09420865f1713c113b0ebdd", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "663798bc529bb73f2b3ca8640bb4fcbd83ce5c31", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "76e62e2e43e998a9ba2bb7dd07adf7f233023931", "1bf91711b94e507c62d91c79e72efcee5d21f627", "823116269044ab4c713373c66c7da3fcb495b459", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "bf5497e15f22233cbc2a4d0c3cc2c36f26738701", "922e6294fd9368684262816f64b9e13da636f3a8", "06d7e7353339136fb46318d3eb7345f894478820", "793f5e737284925a176f8ec82b3bb0d2178bb330", "1af7c3931769a270813a58b6d437f5a74752a532", "0b4ada5b8bdabf4ee378b0992b2d3b70de0c07f7", "607a678b5648121de7f0c8bfef619a60646bb8af", "47b78e7eb12859a141aed6a28a4e301eb0352629", "0903d6b3b5a26fea2cb7b4956f66365d71c78549", "2cfc8ed0f3967cea79a39e383edf310f65e2de21", "139e967aa7e44408783baecc83bee893976f7ba3", "3d00b0df652bd30656c5c3031a07793bce2f3f1f", "4228d3fee9d28b3b3a7dc8e9585d02652109029f", "265d18ced11e2e64d98afa97b0e86965e68101f7", "057919fa568a9503d97f5eed2d3aa397b7c8ef39", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "012ab4527d6aee2387c243d304c624f3b9cf03f3", "91c9224f1c1b67017632303dc4da43b6392bad4d", "2591895d4299497df60b8cc268a9321a121d05ea", "08d3396d3ff17e67acb0b1395312191931f0b49e", "a3de178c43b990b5755be4d640a7525f97ce2f33" ], "paperAbstract": "The byte-addressable non-volatile memory (NVM) is new promising storage medium. Compared to NAND flash memory, the next-generation NVM not only preserves the durability of stored data but has much shorter access latencies. An architect can utilize the fast and persistent NVM as an external disk cache. Regarding the system's crash consistency, a prevalent journaling file system needs to run atop an NVM disk cache. However, the performance is severely impaired by redundant efforts in achieving crash consistency in both file system and disk cache. Therefore, we propose a new mechanism called <u>t</u>ransact<u>i</u>onal <u>N</u>VM disk <u>ca</u>che (Tinca). In brief, Tinca jointly guarantees consistency of file system and disk cache and removes the performance penalty of file system journaling with a lightweight transaction scheme. Evaluations confirm that Tinca significantly outperforms state-of-the-art design by up to 2.5X in local and cluster tests without causing any inconsistency issue.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126940" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d67ac197273ef16ac74289a38698c888083846ed", "sources": [ "DBLP" ], "title": "Transactional NVM cache with high performance and crash consistency", "venue": "SC", "year": 2017 }, "d68e872c3d4c8a9c4cf0a2989e62bb0ca5ef1dd2": { "authors": [ { "ids": [ "3048886" ], "name": "Jake Wires" }, { "ids": [ "26378565" ], "name": "Pradeep Ganesan" }, { "ids": [ "1709411" ], "name": "Andrew Warfield" } ], "doi": "10.1145/3127479.3132021", "doiUrl": "https://doi.org/10.1145/3127479.3132021", "entities": [ "Approximation algorithm", "Data structure", "Snapshot (computer storage)", "Storage area network" ], "id": "d68e872c3d4c8a9c4cf0a2989e62bb0ca5ef1dd2", "inCitations": [], "journalName": "", "journalPages": "535-547", "journalVolume": "", "outCitations": [ "0fafd84369d00c2ec7d3261145c188811dc9e675", "088e3e939ad234b6fdd0e321290fb26937dc2553", "7b90149891786d6c34665ec2130628b16384eca7", "6d58155f4be0615ec688a6e6c21eccf12ad3a7da", "5ff311923cd8f80057b2cfc15cf7ec3ac0a6fdbc", "0f2fda69c3bd173a442009d30050c78b0df30dd9", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2" ], "paperAbstract": "Efficient snapshots are an important feature of modern storage systems. However, the implicit sharing underlying most snapshot implementations makes it difficult to answer basic questions about the storage costs of individual snapshots. Traditional techniques for answering these questions incur significant performance penalties due to expensive metadata overheads. We present a novel probabilistic data structure, compatible with existing storage systems, that can provide approximate answers about snapshot costs with very low computational and storage overheads while achieving better than 95% accuracy for real-world data sets.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132021" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d68e872c3d4c8a9c4cf0a2989e62bb0ca5ef1dd2", "sources": [ "DBLP" ], "title": "Sketches of space: ownership accounting for shared storage", "venue": "SoCC", "year": 2017 }, "d70d56ff19c4dc478f93aaf4995e94f16625e4b9": { "authors": [ { "ids": [ "21644330" ], "name": "Josep M. P\u00e9rez" }, { "ids": [ "2064715" ], "name": "Vicen\u00e7 Beltran" }, { "ids": [ "1699563" ], "name": "Jes\u00fas Labarta" }, { "ids": [ "1744495" ], "name": "Eduard Ayguad\u00e9" } ], "doi": "10.1109/IPDPS.2017.69", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.69", "entities": [ "Backward compatibility", "Benchmark (computing)", "Dependence analysis", "High- and low-level", "OpenMP", "Parallel computing", "Scheduling (computing)", "Side effect (computer science)", "Top-down and bottom-up design", "Universal instantiation" ], "id": "d70d56ff19c4dc478f93aaf4995e94f16625e4b9", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "809-818", "journalVolume": "", "outCitations": [ "4c9e7233cc630109709d9ccf9814140fd1d28c9f", "0836859831c6c69412ae633bcf47e96355a92d6b", "c5bacf71f301097ab8d47f158cbc3da6b3742976", "4b434f94fafc3ffc76e0c440897ccd222eaa38ac", "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "7af4113f5ae97ccf7428c495f91227439609afb1", "32ef8d891edde06cc01357fa5c4d1ab7fe631720", "471d4940e2ce38018b34175fdc4e0923085eaf28", "f609d6e3c365a428c6e348377c2e7d4933b3cc50", "3ccc1e2f8be4c99a38abd10560c606d9256d176b", "4f86cc14eb05db64d8b037833c0b416ea1b138ee" ], "paperAbstract": "The tasking model of OpenMP 4.0 supports both nesting and the definition of dependences between sibling tasks. A natural way to parallelize many codes with tasks is to first taskify the high-level functions and then to further refine these tasks with additional subtasks. However, this top-down approach has some drawbacks since combining nesting with dependencies usually requires additional measures to enforce the correct coordination of dependencies across nesting levels. For instance, most non-leaf tasks need to include a taskwait at the end of their code. While these measures enforce the correct order of execution, as a side effect, they also limit the discovery of parallelism. In this paper we extend the OpenMP tasking model to improve the integration of nesting and dependencies. Our proposal builds on both formulas, nesting and dependencies, and benefits from their individual strengths. On one hand, it encourages a top-down approach to parallelizing codes that also enables the parallel instantiation of tasks. On the other hand, it allows the runtime to control dependencies at a fine grain that until now was only possible using a single domain of dependencies. Our proposal is realized through additions to the OpenMP task directive that ensure backward compatibility with current codes. We have implemented a new runtime with these extensions and used it to evaluate the impact on several benchmarks. Our initial findings show that our extensions improve performance in three areas. First, they expose more parallelism. Second, they uncover dependencies across nesting levels, which allows the runtime to make better scheduling decisions. And third, they allow the parallel instantiation of tasks with dependencies between them.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.69" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d70d56ff19c4dc478f93aaf4995e94f16625e4b9", "sources": [ "DBLP" ], "title": "Improving the Integration of Task Nesting and Dependencies in OpenMP", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "d70d581545b3571b7f1c51dcf7a1178553a03fa6": { "authors": [ { "ids": [ "9223428" ], "name": "Muhammad Wajahat" }, { "ids": [ "40808784" ], "name": "Salman Masood" }, { "ids": [ "40791749" ], "name": "Abhinav Sau" }, { "ids": [ "2044504" ], "name": "Anshul Gandhi" } ], "doi": "10.1109/IGCC.2017.8323580", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323580", "entities": [ "Cache (computing)", "Cloud computing", "Elasticity (cloud computing)", "Memcached", "Multitier architecture", "Physical computing", "Software as a service", "Web server", "benefit" ], "id": "d70d581545b3571b7f1c51dcf7a1178553a03fa6", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "1594118f2696b573f08510cf837f3b37db87face", "0d62acaefd4c4f41fd5814c8d9267d7798de9284", "39c80e494f44505c80a345c5228e6ec0e74673f6", "90b2e3db0f9a2d783382e6cfe69c927c56efc82d", "0f457b7f93736da38cdcffcbd76ae9cd7a0a6749", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "3cda09fdc91d7f85a138a4d56848a3a0708df76f", "047a8db8654292560b5d023a8ef61cd335938822", "4ab775b9811a8b9f0ff24fa06b535986149e51e3", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "5f948207acb92e6f4e09aa5f5a2cf7cdf2d80ba5" ], "paperAbstract": "Cloud computing has largely replaced dedicated and physical computing systems by providing critical features such as elasticity and on-demand access to resources. However, despite its many benefits, the cloud does have its limitations, such as limited or no control over the hardware and limited customization options. Users who deploy applications on the cloud only have control over software tuning and optimizations since the infrastructure is managed by the provider. In this paper, we analyze cloud-deployed Web applications that are multi-tiered and employ Memcached as the object caching layer. Memcached is a high performance memory caching system and, if there are no other bottlenecks in the system, the overall application performance should be dictated by Memcached. However, we show that other components of the system such as web servers, load balancers, and some underlying system configurations, severely impact application performance. We analyze these components and provide guidelines on their implementation and parameter tuning to minimize resource waste in the cloud.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323580", "http://www3.cs.stonybrook.edu/~anshul/igsc17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d70d581545b3571b7f1c51dcf7a1178553a03fa6", "sources": [ "DBLP" ], "title": "Lessons learnt from software tuning of a Memcached-backed, multi-tier, web cloud application", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "d72db57aeadc0ffcb3225d1711dbf9160fd92969": { "authors": [ { "ids": [ "39652968" ], "name": "Daniel A. G. de Oliveira" }, { "ids": [ "1766536" ], "name": "La\u00e9rcio Lima Pilla" }, { "ids": [ "1775855" ], "name": "Nathan DeBardeleben" }, { "ids": [ "2970218" ], "name": "Sean Blanchard" }, { "ids": [ "32735756" ], "name": "Heather M. Quinn" }, { "ids": [ "1693299" ], "name": "Israel Koren" }, { "ids": [ "1728532" ], "name": "Philippe Olivier Alexandre Navaux" }, { "ids": [ "2290186" ], "name": "Paolo Rech" } ], "doi": "10.1145/3126908.3126960", "doiUrl": "https://doi.org/10.1145/3126908.3126960", "entities": [ "Central processing unit", "Experiment", "Fault injection", "Fault model", "High- and low-level", "Java HotSpot Virtual Machine", "Parameter (computer programming)", "Self-organized criticality", "Word error rate", "Xeon Phi" ], "id": "d72db57aeadc0ffcb3225d1711dbf9160fd92969", "inCitations": [], "journalName": "", "journalPages": "28:1-28:12", "journalVolume": "", "outCitations": [ "19d686007a37f599b850bfbca391a5d7d869def8", "89d9709aea044f9ca12fd75993dc8ea3fc124db3", "2ae34c190902632d9bec7918f661426e98639256", "2b05d38d0b689da9a6edacbdc4043811944599b4", "89d4287d6231cf711ea0aab5164669ad9cf1f20f", "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "f88f0312f24fbd6a1869c8985ddc71bd2daa25b6", "2640471efddd30a2855a2a4d76fde3459d36cdf6", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "88e3c9faad4294d56fc20b768ceb6a25debf4ae6", "6561b4c794beef42e5acfca58ecd88e97febee3c", "48c305c1cf61ad49c6697e29f1309900bc448fc7", "1a805d51dceb525493da058092c2450472084305", "bb77e845951c6fa6ed3930dd82f05f92eb2ce33f", "0a810decea2263b0b897a6b65abe57bb051e5c43", "801dbcd2cfb996ffc55692da8bab92ce7c22128b", "9a49cab9f2c4ffa07a32fb3439eb39492b111f4c", "31f4bdde3501a9d52499668bf67f548220afbb79", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "ac9d098a0504f6d35a731d748d0d33e03085eb6a", "7f6c49645686f4814c01aca621341a0b244898b6", "7d312139c903396efeb7dd38c6ad9f0e6ff04366", "01d62cd850496455ce1616500f491690effa5c98", "18992850afed53b60ce696e20374a1e1b3d9da22", "21a550b87034075c28ca35a49327c59fe5876f9a", "28540222f0ed31ae930dc329e29eb17d280663f2", "9cfdde7811175c225c87a2ec3f67180bf53156cc", "37ed4f9684e774157f38655768b996b6b875e80a", "c8beee0f49a3e15587322ffadf5c44030703e2d9", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "fa3859818faed957591c1a34183e31ff19b8561c", "2255a818da9e190540b66161f3aceb7ac377ea08", "c755f858361e90dcfc181c3d39295a06456ef00b", "3b3d0b71196ce5dae90146473721d5863524a1c1" ], "paperAbstract": "We present an in-depth analysis of transient faults effects on HPC applications in Intel Xeon Phi processors based on radiation experiments and high-level fault injection. Besides measuring the realistic error rates of Xeon Phi, we quantify Silent Data Corruption (SDCs) by correlating the distribution of corrupted elements in the output to the application's characteristics. We evaluate the benefits of imprecise computing for reducing the programs' error rate. For example, for HotSpot a 0.5% tolerance in the output value reduces the error rate by 85%.\n We inject different fault models to analyze the sensitivity of given applications. We show that portions of applications can be graded by different criticalities. For example, faults occurring in the middle of LUD execution, or in the Sort and Tree portions of CLAMR, are more critical than the remaining portions. Mitigation techniques can then be relaxed or hardened based on the criticality of the particular portions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126960" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d72db57aeadc0ffcb3225d1711dbf9160fd92969", "sources": [ "DBLP" ], "title": "Experimental and analytical study of Xeon Phi reliability", "venue": "SC", "year": 2017 }, "d7658e3c748da3668c525ece858cef8a0e2f7f6b": { "authors": [ { "ids": [ "39072666" ], "name": "Kanishka Lahiri" }, { "ids": [ "20419137" ], "name": "Subhash Kunnoth" } ], "doi": "10.1109/ISPASS.2017.7975272", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975272", "entities": [ "Decision tree", "Decision tree model", "Performance prediction", "Proxy server", "Simulation", "Stock and flow", "X86" ], "id": "d7658e3c748da3668c525ece858cef8a0e2f7f6b", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "77-86", "journalVolume": "", "outCitations": [ "62f3baa0461f185cccea92e96972a6427d066658", "4a642dc49fb43252418d138d45d31734cf99a7b0", "b8b8d587cdc6bd98515fe760ae9b34da335d94de", "6b94636097c125ec84ad7abbe063a1ea3ce825b8", "17d72a792b35fa9e65499b3e8f9c15a1f6f8b7b6", "0f2c62a6cb60699e46fe388c3f6eea83edc475b6", "39f263ae3061a83b89e1181071c19d24c8cd252b", "567d2061396061c928084ea6664f50c4200cc739", "8fc74f5708bb747ccc4a2a1dc736312baefe5684", "0d6a091c6f669862f62a18a5e6bee7da959c696d", "8681e808a9ebd7f7f155590e75fb63563a8aae6e", "3e5ef275907fc5abdaf6e9941b102cea846b5781" ], "paperAbstract": "Accurate IPC estimates are critical for generating performance projections of key workloads on future designs. However, the need to respond to projections requests in a timely manner in the face of rapidly evolving applications and software stacks and tight schedule constraints, often preclude design teams from executing detailed workload analysis, sampling and simulation flows for such purposes. We address this problem by taking advantage of the large amount of data that performance modeling teams commonly generate as part of architectural studies across thousands of workload scenarios. We propose two methods for exploiting these datasets: one that builds proxy suites, and another that builds decision-tree based classifiers. Both methods can generate IPC estimates for a target workload without collecting new workload samples, or running a single additional simulation. We discuss our experience using these techniques to estimate the IPC of numerous commercial workloads on four industrial x86 processor designs. The resulting IPC estimates were on average, within 2% of those obtained via measurements or detailed cycle-accurate simulations Importantly, using these methods, we were able to generate IPC estimates for a target workload in a matter of hours to 1–2 days, compared to several weeks using conventional approaches.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975272" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d7658e3c748da3668c525ece858cef8a0e2f7f6b", "sources": [ "DBLP" ], "title": "Fast IPC estimation for performance projections using proxy suites and decision trees", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "d76c543db9e6bcacb8b915cd672da08641d0e069": { "authors": [ { "ids": [ "2732588" ], "name": "Yeon-sup Lim" }, { "ids": [ "1771229" ], "name": "Erich M. Nahum" }, { "ids": [ "1705427" ], "name": "Donald F. Towsley" }, { "ids": [ "1853504" ], "name": "Richard J. Gibbens" } ], "doi": "10.1145/3143361.3143376", "doiUrl": "https://doi.org/10.1145/3143361.3143376", "entities": [ "Electronic Case Filing System", "Fast path", "Scheduling (computing)", "Streaming media", "Testbed" ], "id": "d76c543db9e6bcacb8b915cd672da08641d0e069", "inCitations": [ "24b0b1ddc1a402e89804f612b66e1bbac8e3bf58", "3240bf446fb2fbdb93741ab7654614dd95645bd5" ], "journalName": "", "journalPages": "33-34", "journalVolume": "", "outCitations": [ "aebe75efbdade65e22f05b6b8c2386af8fc2b8ff", "4c711f6ff4b97b38a0322bb70adca4f79f4f547f", "58668f7720430470e285a9131fc0e054ddbcafd9", "0b701ba28f8c3c9aa3ac351cf60ab2c5d9bf98a7", "db055ee0d6d5b419e1365af40ed82f71f8e0903c", "3e364e301f026a197fde0608481dfa2c09e85b7b", "32a7818ee01bea31068a0076060c75e88283a16a", "6b9b5527cb91a6422c27a42728ac8ded7b757c7a", "b1d64bfc6c5ce1ba4c972a00bc4dd91a1a8571d2", "56893647902b4ab971fd092ce78687675b6942a7", "036277d492dd5777e87e5b33ffd809e5c617a37a" ], "paperAbstract": "Multi-Path TCP (MPTCP) is a new standardized transport protocol that enables devices to utilize multiple network interfaces. The default MPTCP path scheduler prioritizes paths with the smallest round trip time (RTT).In this work, we examine whether the default MPTCP path scheduler can provide applications the ideal aggregate bandwidth, i.e., the sum of available bandwidths of every paths. Our experimental results show that heterogeneous paths cause underutilization of the fast path, resulting in undesirable application behaviors such as lower streaming quality in a video than can be obtained using the available aggregate bandwidth. To solve this problem, we propose and implement a new MPTCP path scheduler, ECF (Earliest Completion First), that utilizes all relevant information about a path, not just RTT. We compare ECF with both the default and other MPTCP path schedulers, using both an experimental testbed and in-the-wild measurements. Our results show that ECF consistently utilizes all available paths more efficiently than other approaches under path heterogeneity, particularly for streaming video. In Web browsing workloads, ECF also does better in some scenarios and never does worse.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078505.3078552", "https://people.cs.umass.edu/~ylim/mptcp_ecf/ecf-techreport.pdf", "http://doi.acm.org/10.1145/3143361.3143376" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d76c543db9e6bcacb8b915cd672da08641d0e069", "sources": [ "DBLP" ], "title": "ECF: An MPTCP Path Scheduler to Manage Heterogeneous Paths", "venue": "CoNEXT", "year": 2017 }, "d82f427d3c56b879d6792c40741b5541a41244ab": { "authors": [ { "ids": [ "2351024" ], "name": "Mathieu Faverge" }, { "ids": [ "1786954" ], "name": "Julien Langou" }, { "ids": [ "1735015" ], "name": "Yves Robert" }, { "ids": [ "1708869" ], "name": "Jack J. Dongarra" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Analysis of parallel algorithms", "Bidiagonalization", "Critical path method", "Distributed shared memory", "Experiment", "Greedy algorithm", "Multi-core processor", "QR code", "QR decomposition", "R language", "Shared memory", "Tiled web map" ], "id": "d82f427d3c56b879d6792c40741b5541a41244ab", "inCitations": [ "e9687c7f101aab7488d40174a14210dc0bc70e60", "0930da5316f6f54fd61da83cb3fdc47bb1fca24f" ], "journalName": "CoRR", "journalPages": "", "journalVolume": "abs/1611.06892", "outCitations": [ "30555c7ea92f59a9b2d3455ea98b1138015dce37", "3314470a6c5e3c55391eda0e43d4e7f1adc9a89f", "8c280d3ae4c377fed1565132272c3f0a12efe5dd", "5c85d83c341357b6d17883cace475b87817aa84c", "c1d28d2ec0416de3ffb019c5066fb81090c25827", "904e2ac8131bf903f21e8c1fa9938978303db03b", "a3014174676cdbd83bce72b0e8fae5a654d68c76", "64b3435826a94ddd269b330e6254579f3244f214", "35280a0dabe96002c5626249834d89d176d11785", "0199bccf87b17291be6d8823152eabad2be4f242", "05da43f0b23936c4970d40db13a927b747241f7b", "0a5617cf569abe3c669a71f4c604d47ca334ae12", "0cea54c9ff794629795ee819681cb0e54dbfec84", "8112c4305b88d85199267e9e03d3a0aca4432059", "fe8ec8d8f1d43fbc3a2909ffd8a9f849facd849f", "96089f345c7436356cd2e48441183d57fe8b1ef3", "82292f38366cbe3167c9de2d71ce86c75fba78a9", "017ebdc37a3f36236fd9cf3f43b369937e4da3c0", "fec9ce47524e65b89c20d4dc1671c5b1a7a0a41d", "78c5054ea4414d4d4040c17552b8d52469ddcec2", "05064b678d3bb00397f897125da0f6168c8a5290", "0952f0c177df8a2dc8bbb3d3145c4f5f086efc1e", "f6430121b2af7d55b090a1c260570630e6cf1f41", "ebbfb679e478ed7519d1a7108d0efdc9abb99a70", "9cb5af4ec44a08510a31d5a6e4856152df89cd63", "95dbfb1ccad4fd46daf9e06153aff5a4effaa129", "825315415eba86846605512c31d8adaf173e6f8d", "72d8587018e9aee30f7656c14e8265ada24bcf83", "ad35fd818e6de7fa855f414f73888a9f4a72451b", "cc12a7f07f4755ab2ab4c538941fa696b8643837", "8c6cd84ad400a69e4e06be985e857888a9413d07", "05d31db3f6d6265a30e82b9e89435cacc7618308", "fb9416ccd43e5d3241a88c2dce9ab83fc3ab352d", "34e9fbff05b850125bc61c04cfe76110bc16c3eb", "0fc0098ffa8f513959279fe5bb74c8f450225924", "0def25a673a09c6620485c78bbb075176f31062f" ], "paperAbstract": "We consider algorithms for going from a \u201cfull\u201d matrix to a condensed \u201cband bidiagonal\u201d form using orthogonal transformations. We use the framework of \u201calgorithms by tiles\u201d. Within this framework, we study: (i) the tiled bidiagonalization algorithm BiDiag, which is a tiled version of the standard scalar bidiagonalization algorithm; and (ii) the R-bidiagonalization algorithm R-BiDiag, which is a tiled version of the algorithm which consists in first performing the QR factorization of the initial matrix, then performing the band-bidiagonalization of the R-factor. For both bidiagonalization algorithms BiDiag and R-BiDiag, we use four main types of reduction trees, namely FlatTS, FlatTT, Greedy, and a newly introduced auto-adaptive tree, Auto. We provide a study of critical path lengths for these tiled algorithms, which shows that (i) R-BiDiag has a shorter critical path length than BiDiag for tall and skinny matrices, and (ii) Greedy based schemes are much better than earlier proposed variants with unbounded resources. We provide experiments on a single multicore node, and on a few multicore nodes of a parallel distributed shared-memory system, to show the superiority of the new algorithms on a variety of matrix sizes, matrix shapes and core counts.", "pdfUrls": [ "https://arxiv.org/pdf/1611.06892v1.pdf", "http://arxiv.org/abs/1611.06892" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d82f/427d3c56b879d6792c40741b5541a41244ab.pdf", "s2Url": "https://semanticscholar.org/paper/d82f427d3c56b879d6792c40741b5541a41244ab", "sources": [ "DBLP" ], "title": "Bidiagonalization with Parallel Tiled Algorithms", "venue": "ArXiv", "year": 2016 }, "d86a87c7efd2e75d256d96c9b009ac7b3249d0bc": { "authors": [ { "ids": [ "6415958" ], "name": "Dipanjan Sengupta" }, { "ids": [ "2659391" ], "name": "Yida Wang" }, { "ids": [ "1789372" ], "name": "Narayanan Sundaram" }, { "ids": [ "2999876" ], "name": "Theodore L. Willke" } ], "doi": "10.1007/978-3-319-58667-0_7", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_7", "entities": [], "id": "d86a87c7efd2e75d256d96c9b009ac7b3249d0bc", "inCitations": [], "journalName": "", "journalPages": "120-138", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_7" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d86a87c7efd2e75d256d96c9b009ac7b3249d0bc", "sources": [ "DBLP" ], "title": "High-Performance Incremental SVM Learning on Intel\u00ae Xeon Phi\u2122 Processors", "venue": "ISC", "year": 2017 }, "d878ecfe90c2503608b0218592c4348a924b6b29": { "authors": [ { "ids": [ "1855259" ], "name": "Luyu Li" }, { "ids": [ "24626316" ], "name": "Houxiang Ji" }, { "ids": [ "3070171" ], "name": "Chentao Wu" }, { "ids": [ "1684133" ], "name": "Jie Li" }, { "ids": [ "1697293" ], "name": "Minyi Guo" } ], "doi": "10.1109/ICPP.2017.31", "doiUrl": "https://doi.org/10.1109/ICPP.2017.31", "entities": [ "Bit error rate", "Cache (computing)", "Cloud computing", "Data striping", "Disk array", "Disk sector", "Failure rate", "Hit (Internet)", "Least frequently used", "Magnetic stripe card", "Money", "Page cache", "Partial-order planning", "Replication (computing)", "Simulation" ], "id": "d878ecfe90c2503608b0218592c4348a924b6b29", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "221-230", "journalVolume": "", "outCitations": [ "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "229acac1bd70c57e6a17f2c24f153c06d54de252", "100bf4d517461fa01f45e31510bd007bc9030a54", "a4c529cfc2cd2ff5f3f1f018ea82fb2e22630695", "84703fbc003bfe24a7f2494a11d1b0530540f1f6", "05bb2531f030752977c2821245d7324cb0d0b654", "58b628792d3eb22a034a871ed3cf373afe591928", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "a9b1069865ca751f770523e008ddd15eda878ec6", "5b9bf413df089ede568a8ccf438fe2cdae241c88", "118e5322d65a1d134e1586457cb546549d933816", "08111f084008cbf547f62c5ff55e27e196d49608", "0a5198973080ebbe044cfd1a07cb93c392dac003", "1354abf8baa8b6d39c65758072ae9d0b07227d5c", "1c54aa817fceb76fc2385501ee7888980586822e", "492cda7c95d4dc3dc99386cf78beb17bed9dbefa", "2d2fa890768b1299d6ed327e6e24df611158051e", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "9451f420a5d39d75d1e6c2cbbbae4544afb412a6", "268a88fedcf949ffda3bc0f5573ad5f1c8b0c29d", "3b547d706d33c110f96bf1c0e805ab8cc82afdbf", "085dbd3b78513ff5d92d7e78e6640e49d0affe5f", "4b4ee1ee9bbfd9527fba0bbd761bd61a59f96a48", "6607e684a68997ced5c021f19049e1e8c10d9f9e", "44d6041584863f6a8c192a56b0cb1c1a3a98bd59", "55abeb7684cdaddffd50cfd3ff3c1a7e8235b231", "0a121cd9c707b90ce97fb0e500ced79e8aecc34f", "1b167fff2f71d44f5fa0b34e69cda1eed638e38b", "dbb340896a55338071fe9ab1737dad53cf64e665", "0f6a32792d0882db35fe9391445d4322232b619e" ], "paperAbstract": "With the development of cloud computing, disk arrays tolerating triple disk failures (3DFTs) are receiving more attention nowadays because they can provide high data reliability with low monetary cost. However, a challenging issue in these arrays is how to efficiently reconstruct the lost data, especially for partial stripe errors (e.g., sector and chunk errors). It is one of the most significant scenarios in practice. However, existing cache strategies are not efficient for partial stripe reconstruction in 3DFTs, which is because the complex relationships among data and parities are usually ignored during the recovery process.To address this problem, in this paper, we proposed a comprehensive cache policy called Favorable Block First (FBF), which can speed up the partial stripe reconstruction of 3DFTs. FBF investigates the relationships among parity chains via allocating various priorities of shared chunks. Thus in the recovery process, by giving higher priorities to the chunks which are shared by more parities chains, FBF can dynamically hold the significant data in buffer cache for partial stripe reconstruction. Obviously, it increases the cache hit ratio and reduces the reconstruction time. To demonstrate the effectiveness of FBF, we conduct several simulations via Disksim. The results show that, compared to typical recovery schemes by combining with classic cache policies (e.g., LRU, LFU and ARC), FBF improves hit ratio by up to 2.47 times and accelerates the reconstruction process by 14.90%, respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.31", "http://www.cs.sjtu.edu.cn/~guo-my/PDF/Conferences/C174.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d878ecfe90c2503608b0218592c4348a924b6b29", "sources": [ "DBLP" ], "title": "Favorable Block First: A Comprehensive Cache Scheme to Accelerate Partial Stripe Recovery of Triple Disk Failure Tolerant Arrays", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "d881d1e75144ab9bbe5f543dea115932f09ffe0a": { "authors": [ { "ids": [ "2498412" ], "name": "Yusuke Suzuki" }, { "ids": [ "1888960" ], "name": "Hiroshi Yamada" }, { "ids": [ "3095662" ], "name": "Shinpei Kato" }, { "ids": [ "1797612" ], "name": "Kenji Kono" } ], "doi": "10.1145/3127479.3132023", "doiUrl": "https://doi.org/10.1145/3127479.3132023", "entities": [ "Event-driven programming", "General-purpose computing on graphics processing units", "Graphics", "Graphics processing unit", "Multiplexing", "Multitenancy", "Scheduling (computing)", "Semiconductor consolidation" ], "id": "d881d1e75144ab9bbe5f543dea115932f09ffe0a", "inCitations": [], "journalName": "", "journalPages": "80-93", "journalVolume": "", "outCitations": [ "892b7a56dad27028ba2cc68a900b8ee220792e85", "25f855c968af75e4617f25c71aee3cedec1dedaf", "6bdacf836b47e40f1e8d5d8e9e1c8224d74a1cef", "2a3887bcb12ff9276ecb78874a045c387a9ac39c", "08d041581636f8eee888091b5539696d729f2bff", "c8f44e96ec16d131642aaed99b1ec7193fc2dc63", "256640c86e2f7f421e0ba7b62599453241dcbafd", "6db4621f95879008ca5ee5a3de327ac5c36f80e8", "7b69e7c3dd0ede0eacb2c42c82559367c8f194d4", "1883eb486e44c4a61864f538d2f0e90dca8f45f9", "43f0c099d44a68783a773f91cd03098a5252bf98", "5821447d63168b6a19ff534028a4aee8ace16747", "cab4f35deba2f6cc7c1c17ad25f0c2e73912a914", "6debd9d773c7aca19f18f3b4640c45f8ae12b254", "2900ebddc2dfb1e4bb7d7eac7384d7f4512b2b9a", "45f119a7334f482513f6d71f3fffa4e9e239622c", "466ff5d1f695c5472db9f6746ac29575f16de753", "3cacfad4443b31024fe836c06bdebb5d85c29fbb", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0ae6c2f06cae336989cd4ff563fa7a4c0b5ebef3", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "69c801f327cca60723b870caab92114da164ac99", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "32d355a7a20f92ccda0608f83d7456870231c570", "5c9fa798a510b66a37c1b0852582fd7735ed088e", "271ef33248687205ebd60b738a61f308074aa0e1", "4954fa180728932959997a4768411ff9136aac81", "295521cfe1a56458d53a58613de5fb92c97c5c23", "7129b305ce45f83127e928e8510da9fae0783905", "3be74a71c59c0e5e925aa84090fc1b1988ea6095", "22fd20f23c40ecb9044cae7ee58b76d39fcf45b6", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "cfeb833da2d3ca20adfc05a762b3f68cffa13416", "988cb03fbe7169eb0a41e18e03c511b94f15d92d", "45fcaf11eaf31228a218a24663067dab509a1031", "8ff67bf3f713c1532e147a7271f9d649ca6c14c7", "71dc048c1cc564a8ae921f5e715e442f061d08af", "cffc3c4e332287ba5b67a20c0bf0fbcb8a6e5725", "00f355ce566bb51dc70925217c62e437cc7e14e2", "60e7f7f9367e952f53b8545ef441886a84e3ff58", "5c36d8bb0815fd4ff5daa8351df4a7e2d1b32934", "0be302437cec82b9200d61d13d3125e62a8ef499", "5cdb8b8e13f90ec7974565372a6b772faf6d611f", "a7a24f882aec173c01a9ed1eb52589f71d6c80f8", "0e191d726c2b15d54f0e4b5d50f6c0d2e426f3ad" ], "paperAbstract": "Graphics processing units (GPUs) have become an attractive platform for general-purpose computing (GPGPU) in various domains. Making GPUs a time-multiplexing resource is a key to consolidating GPGPU applications (apps) in multi-tenant cloud platforms. However, advanced GPGPU apps pose a new challenge for consolidation. Such highly functional GPGPU apps, referred to as GPU eaters, can easily monopolize a shared GPU and starve collocated GPGPU apps. This paper presents GLoop, which is a software runtime that enables us to consolidate GPGPU apps including GPU eaters. GLoop offers an event-driven programming model, which allows GLoop-based apps to inherit the GPU eaters' high functionality while proportionally scheduling them on a shared GPU in an isolated manner. We implemented a prototype of GLoop and ported eight GPU eaters on it. The experimental results demonstrate that our prototype successfully schedules the consolidated GPGPU apps on the basis of its scheduling policy and isolates resources among them.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d881d1e75144ab9bbe5f543dea115932f09ffe0a", "sources": [ "DBLP" ], "title": "GLoop: an event-driven runtime for consolidating GPGPU applications", "venue": "SoCC", "year": 2017 }, "d8a1e28a77b1bd5c8903c94065e6dfb4d6160fb4": { "authors": [ { "ids": [ "37758552" ], "name": "Andre Pawlowski" }, { "ids": [ "3222192" ], "name": "Moritz Contag" }, { "ids": [ "3354846" ], "name": "Victor van der Veen" }, { "ids": [ "32497019" ], "name": "Chris Ouwehand" }, { "ids": [ "1713890" ], "name": "Thorsten Holz" }, { "ids": [ "3053948" ], "name": "Herbert Bos" }, { "ids": [ "1729961" ], "name": "Elias Athanasopoulos" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" } ], "doi": "", "doiUrl": "", "entities": [ "Application security", "C++", "Class hierarchy", "Compiler", "Executable", "High- and low-level", "Interaction", "Machine code", "MySQL", "Node.js", "Procedural programming", "Reverse engineering", "Type safety", "Virtual method table" ], "id": "d8a1e28a77b1bd5c8903c94065e6dfb4d6160fb4", "inCitations": [ "96f4ab88932315a5339e709577fbb5f998ee7cca", "23eda414507821cbb1503563bd9cbbe9ac2566d0", "79473986fe994d4aeb9d662e0b8e572758a4511b", "426db98a14ac8e5781921f205d5bc4097bb08ae6" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "6223d1f032077eb40a6f65d65c03cb75be29d61f", "75b8c0abfd45fd77d7a61da7d12bdf516e3139c7", "dbce3d345f3c43c51d8cc71c17d073e716a4d07c", "0db59f09437b7b90376f011f5150ed976ac66231", "39a59c25e8fcdbb6304e70c86e263539af25e62d", "52612064aa065b29930b56fbf54745883bba94dc", "4c1206d65920c8434d987e705bf21e9651fd21bb", "c36fd0fd0a15d9a2c9c111baa818de70250d345b", "6a8f65381a627a2db6c756a7185d9106f0acefec", "2ecccea4b57ae20d85174e170c0e1af49d9617d3", "30e76f32c323adb0ff340760380fe5a08505b641", "0e039df712774fcea67f214d9b5780c1dc250747", "b0ecd2efb16b91f8ff3856d719aca24626406695", "1c23c34377c76776b6130d9f18f25831d8f952f1", "6c58b6b5e6f2ef229dc80e5f85f8cd6be927ff66", "48a8b9bd6df63b13093cd67ee61911e7a485be57", "9b2585f7248c8b5a22e9c816506e01060213ca85", "196d341cdfb85f1a1d2e431fc40f34604c30bb59", "7d5e165a55d62750e9ad69bb317c764a2e4e12fc", "91607d7bc71823360de59b894ae37b4f1738bca0" ], "paperAbstract": "Reverse engineering of binary executables is a difficult task which gets more involved by the way compilers translate high-level concepts used in paradigms such as objectoriented programming into native code, as it is the case for C++. Such code is harder to grasp than, e. g., traditional procedural code, since it is generally more verbose and adds complexity through features such as polymorphism or inheritance. Hence, a deep understanding of interactions between instantiated objects, their corresponding classes, and the connection between classes would vastly reduce the time it takes an analyst to understand the application. The growth in complexity in contemporary C++ applications only amplifies the effect. In this paper, we introduce Marx, an analysis framework to reconstruct class hierarchies of C++ programs and resolve virtual callsites. We have evaluated the results on a diverse set of large, real-world applications. Our experimental results show that our approach achieves a high precision (93.2% of the hierarchies reconstructed accurately for Node.js, 88.4% for MySQL Server) while keeping analysis times practical. Furthermore, we show that, despite any imprecision in the analysis, the derived information can be reliably used in classic software security hardening applications without breaking programs. We showcase this property for two applications built on top of the output of our framework: vtable protection and type-safe object reuse. This demonstrates that, in addition to traditional reverse engineering applications, Marx can aid in implementing concrete, valuable tools e. g., in the domain of exploit mitigations.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/marx-uncovering-class-hierarchies-c-programs/", "http://www.cs.vu.nl/~giuffrida/papers/marx-ndss-2017.pdf", "https://www.cs.ucy.ac.cy/~eliasathan/papers/ndss17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/d8a1/e28a77b1bd5c8903c94065e6dfb4d6160fb4.pdf", "s2Url": "https://semanticscholar.org/paper/d8a1e28a77b1bd5c8903c94065e6dfb4d6160fb4", "sources": [ "DBLP" ], "title": "MARX: Uncovering Class Hierarchies in C++ Programs", "venue": "NDSS", "year": 2017 }, "d8da1fae13be213d8627e6e4f3451115db7b1e03": { "authors": [ { "ids": [ "2155642" ], "name": "Todd Warszawski" }, { "ids": [ "2740804" ], "name": "Peter Bailis" } ], "doi": "10.1145/3035918.3064037", "doiUrl": "https://doi.org/10.1145/3035918.3064037", "entities": [ "Adversary (cryptography)", "Anomaly detection", "Application programming interface", "Bitcoin", "Concurrency (computer science)", "Database transaction", "Denial-of-service attack", "E-commerce", "Isolation (database systems)", "Language-independent specification", "Programmer", "Self-hosting", "Serializability", "Web API", "Web application" ], "id": "d8da1fae13be213d8627e6e4f3451115db7b1e03", "inCitations": [ "8c17cb64a2153ed38d7a2517ac6b57083e0a0eff" ], "journalName": "", "journalPages": "5-20", "journalVolume": "", "outCitations": [ "0ea92d9b60792714e3a13fd5c47a19d4741a11d1", "22d3fc87f5d9ea17a3bb21f885655a1f9f2deb65", "182c0524aa353b6f4f4cb75a88ff3f5fc3bd86e0", "a62f2bcc232a295ed92cc93d9b02469f5f6e3a5c", "ab0458e4e7dcad49c9aae98dab5bd2dc8c099af4", "28573c2d17dd0fcb57c6d2171e7a2761d47c6ed0", "b1cb8339ed437ce74deaf4b080b33cf61bbebd5d", "61011eb60b242f529f58eecaf7029524920cd6cf", "4edd5bfd9d9e846ccfb1d1830fb5fdfa3ab2efce", "42cf741f1b38315c3376c301601cfee74571c6b6", "0b7c1bc9636d8cc66c36fb7e676d3badfe5df696", "861fbac82ae5ec0ea654d0d95ce4d48de62419ea", "96fc6181d8f72a1b75b6660049d6a6e8c2daa4ce", "16a04050353b741974c7d0448e8b0149831bfdc0", "32257d8d2b08c87e58c7b7f4b2430d58e4b51a81", "bca55bdc9fbc192b88848cf82d1679e3bee2f505", "033fd9ff33b69fbd8d9e24b98f77aa8adee06514", "fdb29e3a00c560d54e7994d133f93c110794612d", "c911a39f2b5d0a5d5962010685d30d7f6381a7ac", "6b5eeb5a017de5758e9773b52b0292cfc987ce3d", "c7e3b2bbdef407f64bd8f513b399837e018d0784", "00ac447d02035c26c7e2852c2457fe812e89038f", "11ef7c142295aeb1a28a0e714c91fc8d610c3047", "0c80eb8588fac0a763a15e1b7a33c6d885ce80a4", "d5229a1f0e3111bc9feaccb018eedc647e03cf5f", "6e90c995cc9caa0f7d9d68d536f5e16e9bcbbcd6", "2cdeab1558a3fe046ec7e1a36224bc46afcaff90", "454b06a17e2f6656e65935cb9d36dcf2b2044bf5", "08d8c62df23a5f6f8b79cc3639cc179938a48ba4", "47e88885a7e0ef276ba68d01febbc51a53ad1314", "02eb9ac41a6a5c18829cd7b4af7fbd753185238e", "73ca3c562121d9707daaab88afc997680b8cd010", "ab0d8f966a6fd16865b9a459ccb5383bf58e70a3", "b25e758ebb32d05683b9671d5880f4016888125f", "a1c6a7817891703ff1d103a23ab01961dda598cb", "0e227474e5e90dcdf796998a33126cbe70434ce1", "34d269619576cd827b9842581755c06dac344b16", "2346439ece014d5e3ce1564adc2a7ca098a37c8e", "635fe1706a2a719b9c7935712db6e720fd418fa9", "01cfec4ca6637fb90cec8afee2a2694aebb83a61" ], "paperAbstract": "In theory, database transactions protect application data from corruption and integrity violations. In practice, database transactions frequently execute under weak isolation that exposes programs to a range of concurrency anomalies, and programmers may fail to correctly employ transactions. While low transaction volumes mask many potential concurrency-related errors under normal operation, determined adversaries can exploit them programmatically for fun and profit. In this paper, we formalize a new kind of attack on database-backed applications called an ACIDRain attack, in which an adversary systematically exploits concurrency-related vulnerabilities via programmatically accessible APIs. These attacks are not theoretical: ACIDRain attacks have already occurred in a handful of applications in the wild, including one attack which bankrupted a popular Bitcoin exchange. To proactively detect the potential for ACIDRain attacks, we extend the theory of weak isolation to analyze latent potential for non-serializable behavior under concurrent web API calls. We introduce a language-agnostic method for detecting potential isolation anomalies in web applications, called Abstract Anomaly Detection (2AD), that uses dynamic traces of database accesses to efficiently reason about the space of possible concurrent interleavings. We apply a prototype 2AD analysis tool to 12 popular self-hosted eCommerce applications written in four languages and deployed on over 2M websites. We identify and verify 22 critical ACIDRain attacks that allow attackers to corrupt store inventory, over-spend gift cards, and steal inventory.", "pdfUrls": [ "http://www.bailis.org/papers/acidrain-sigmod2017.pdf", "http://doi.acm.org/10.1145/3035918.3064037" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d8da1fae13be213d8627e6e4f3451115db7b1e03", "sources": [ "DBLP" ], "title": "ACIDRain: Concurrency-Related Attacks on Database-Backed Web Applications", "venue": "SIGMOD Conference", "year": 2017 }, "d976bb3e73493db9b059e61601e1cfe8e48c597c": { "authors": [ { "ids": [ "2477651" ], "name": "Xiuxia Zhang" }, { "ids": [ "1930057" ], "name": "Guangming Tan" }, { "ids": [ "2044823" ], "name": "Shuangbai Xue" }, { "ids": [ "1708704" ], "name": "Jiajia Li" }, { "ids": [ "3372732" ], "name": "Ke-ren Zhou" }, { "ids": [ "2750388" ], "name": "Mingyu Chen" } ], "doi": "10.1145/3018743.3018755", "doiUrl": "https://doi.org/10.1145/3018743.3018755", "entities": [ "Assembly language", "BLAS", "Bare machine", "Benchmark (computing)", "Convolution", "Graphics processing unit", "Kepler (microarchitecture)", "Memory hierarchy", "Microarchitecture", "Performance tuning", "Reverse engineering", "Throughput", "Toolchain" ], "id": "d976bb3e73493db9b059e61601e1cfe8e48c597c", "inCitations": [ "1fb1d7d86c689a325e5367473474d64edff18dda" ], "journalName": "", "journalPages": "31-43", "journalVolume": "", "outCitations": [ "1b6a6da2da8b2d7bd574f55901237c7b8d447772", "11838f52576af94e0194b4bd7d1f2213f34213f6", "05994dcf5e185a7e433438213547a9b952e146d9", "f2c2fbc35d0541571f54790851de9fcd1adde085", "888d4ade3a7552ebafe997988a82cdd16128961e", "deaa56b294715416cc348396ad558653ba88dd07", "0d6f11c5fccf2c2ed729602aceacf548fb0d5c08", "114711c2516be1f5293f6d8d242c852b11097e9d", "b951b732c1400162ddd6da6c40d1eb1047d3b979", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "23177452df15b652dd54a59324502b92c99687a7", "27bdd0a73b3d5f2c83ac7dfae447c20653dffa2d", "16b5fa19661e1e26c1b967104948bf2f031a3612", "061356704ec86334dbbc073985375fe13cd39088", "8d2fb424ccd5ae011dd444ac4fa8282bad9e76ab" ], "paperAbstract": "In this paper, we present a methodology to understand GPU microarchitectural features and improve performance for compute-intensive kernels. The methodology relies on a reverse engineering approach to crack the GPU ISA encodings in order to build a GPU assembler. An assembly microbenchmark suite correlates microarchitectural features with their performance factors to uncover instruction-level and memory hierarchy preferences. We use SGEMM as a running example to show the ways to achieve bare-metal performance tuning. The performance boost is achieved by tuning FFMA throughput by activating dual-issue, eliminating register bank conflicts, adding non-FFMA instructions with little penalty, and choosing proper width of global/shared load instructions. On NVIDIA Kepler K20m, we develop a faster SGEMM with 3.1Tflop/s performance and 88% efficiency; the performance is 15% higher than cuBLAS7.0. Applying these optimizations to convolution, the implementation gains 39%-62% performance improvement compared with cuDNN4.0. The toolchain is an attempt to automatically crack different GPU ISA encodings and build an assembler adaptively for the purpose of performance enhancements to applications on GPUs.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018755", "http://fruitfly1026.github.io/static/files/p31-zhang.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d976bb3e73493db9b059e61601e1cfe8e48c597c", "sources": [ "DBLP" ], "title": "Understanding the GPU Microarchitecture to Achieve Bare-Metal Performance Tuning", "venue": "PPOPP", "year": 2017 }, "d977c9a1b18191c924bc9e5529eb6fb25b75489d": { "authors": [ { "ids": [ "2937015" ], "name": "Brandon Schlinker" }, { "ids": [ "1900189" ], "name": "Hyojeong Kim" }, { "ids": [ "22601737" ], "name": "Timothy Cui" }, { "ids": [ "1704220" ], "name": "Ethan Katz-Bassett" }, { "ids": [ "1764637" ], "name": "Harsha V. Madhyastha" }, { "ids": [ "2033985" ], "name": "\u00cdtalo S. Cunha" }, { "ids": [ "6991078" ], "name": "James Quinn" }, { "ids": [ "31546338" ], "name": "Saif Hasan" }, { "ids": [ "9759285" ], "name": "Petr Lapukhov" }, { "ids": [ "2407352" ], "name": "Hongyi Zeng" } ], "doi": "10.1145/3098822.3098853", "doiUrl": "https://doi.org/10.1145/3098822.3098853", "entities": [ "Border Gateway Protocol", "Egress filtering", "Peering", "Real-time computing", "Routing", "Software-defined networking" ], "id": "d977c9a1b18191c924bc9e5529eb6fb25b75489d", "inCitations": [ "00fd0d48da95aa33d361ff65c9a888ab8503b8c1", "b30584ba33c8c2b1b8c54b2421538bfd1cbb32da", "13999ef7c4ae0573e51d5f981fd03b343590ce19", "a4131cb2d0e7d7b7b696a9fc40c37d8eb7eabed3" ], "journalName": "", "journalPages": "418-431", "journalVolume": "", "outCitations": [ "52d9fbccf0a793d9e090e3b33889acc46a811fa8", "64a6dd2a598d5f012a54fe6ca06d4f7235f66626", "267aa4a091dd43f7eb4ffad4c63405229fd31f1d", "0edd07551910c48f90fa07f7c5da50c8211fb994", "9570d6075ecaf7f5dc28e99edfabc64914d44ca5", "22bd3a35b9550bc5b570a0beee5648eb9033be3b", "26c2fc621d06f4b85cf145b04c963d4f2d59ccc0", "1848fea8c541b7d813f2cf21f435fac894223b31", "186b61ebf4d64a1727a30b31afb969e5f2f293a5", "0b23f0ee49ed34443ca62cb085c0addd75eac6a9", "1fddc54bc1a1610a1162fde15ac6a87336bffc3e", "0b4fdb6542884d3874a29ce072a38370d0747b47", "23208b2f513af5c3d7b40f0826ed367da77396c9", "55ef72fe52990f491ab939b91d75b7899a66180f", "19e4c40941a3767afd51f200db85c4289f189e24", "8e967cb870e5da7830a8d05030e0dc1389298bbf", "13bf13f019632a4edb967635e72e3e140f89e90e", "0f1a89bd89497587049eef69534cd15feb3c620b", "7efe0dc4cde074bd87089491a6f95dde84397cca", "2a628e4a9c5f78bc6dcdf16514353336547846cc", "4f2c94a8d689863859ac849ebb83823770cf3d6a", "2c55cc95b6014bfa3f34307af141d0ddaa771c64", "2daa396ebf3a77e11f88ca82b2cddec0eec8dfd6", "0387c89a21e113eb69fcde8a11c82a072e3a1af1", "546c0cfed69f188a0ca661c8db9b099f554a63d1", "0f2f3e328608c9409adc820d82bfaf5940d3a8db", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97" ], "paperAbstract": "Large content providers build points of presence around the world, each connected to tens or hundreds of networks. Ideally, this connectivity lets providers better serve users, but providers cannot obtain enough capacity on some preferred peering paths to handle peak traffic demands. These capacity constraints, coupled with volatile traffic and performance and the limitations of the 20 year old BGP protocol, make it difficult to best use this connectivity.\n We present Edge Fabric, an SDN-based system we built and deployed to tackle these challenges for Facebook, which serves over two billion users from dozens of points of presence on six continents. We provide the first public details on the connectivity of a provider of this scale, including opportunities and challenges. We describe how Edge Fabric operates in near real-time to avoid congesting links at the edge of Facebook's network. Our evaluation on production traffic worldwide demonstrates that Edge Fabric efficiently uses interconnections without congesting them and degrading performance. We also present real-time performance measurements of available routes and investigate incorporating them into routing decisions. We relate challenges, solutions, and lessons from four years of operating and evolving Edge Fabric.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098853", "http://www.cs.princeton.edu/courses/archive/fall17/cos561/papers/EdgeFabric17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d977c9a1b18191c924bc9e5529eb6fb25b75489d", "sources": [ "DBLP" ], "title": "Engineering Egress with Edge Fabric: Steering Oceans of Content to the World", "venue": "SIGCOMM", "year": 2017 }, "d99df8972ed69e629cfca79261c5fb9c83bb9bb4": { "authors": [ { "ids": [ "3102340" ], "name": "Mingcong Song" }, { "ids": [ "38639927" ], "name": "Yang Hu" }, { "ids": [ "2186316" ], "name": "Huixiang Chen" }, { "ids": [ "39429972" ], "name": "Tao Li" } ], "doi": "10.1109/HPCA.2017.52", "doiUrl": "https://doi.org/10.1109/HPCA.2017.52", "entities": [ "Convolutional neural network", "Fastest", "Graphics processing unit", "Kernel (operating system)", "Microarchitecture", "Multi-Touch Collaboration Wall", "Pervasive informatics", "Requirement", "Scheduling (computing)", "Throughput" ], "id": "d99df8972ed69e629cfca79261c5fb9c83bb9bb4", "inCitations": [ "d0556be65e8564ab8bb3e26b6a0146a62027bc40", "ea47b712d835815e0457297139f072f911e181d3", "66dddfbad0e89894f904425320e970a2a0d0b1dd", "5ce80b41443518a14d800f6b93b4057bbb007432", "b4871945bc9d8e6fe69d283651ecbddc87c30a2c", "c6b9fb2b5bf87d87550e2dee0d45cda00d6a3373" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "1-12", "journalVolume": "", "outCitations": [], "paperAbstract": "Accelerating Convolutional Neural Networks (CNNs) on GPUs usually involves two stages: training and inference. Traditionally, this two-stage process is deployed on high-end GPU-equipped servers. Driven by the increase in compute power of desktop and mobile GPUs, there is growing interest in performing inference on various kinds of platforms. In contrast to the requirements of high throughput and accuracy during the training stage, end-users will face diverse requirements related to inference tasks. To address this emerging trend and new requirements, we propose Pervasive CNN (P-CNN), a user satisfaction-aware CNN inference framework. P-CNN is composed of two phases: cross-platform offline compilation and run-time management. Based on users' requirements, offline compilation generates the optimal kernel using architecture-independent techniques, such as adaptive batch size selection and coordinated fine-tuning. The runtime management phase consists of accuracy tuning, execution, and calibration. First, accuracy tuning dynamically identifies the fastest kernels with acceptable accuracy. Next, the run-time kernel scheduler partitions the optimal computing resource for each layer and schedules the GPU thread blocks. If its accuracy is not acceptable to the end-user, the calibration stage selects a slower but more precise kernel to improve the accuracy. Finally, we design a user satisfaction metric for CNNs to evaluate ourPervasive deign. Our evaluation results show P-CNN can provide the best user satisfaction for different inference tasks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.52" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d99df8972ed69e629cfca79261c5fb9c83bb9bb4", "sources": [ "DBLP" ], "title": "Towards Pervasive and User Satisfactory CNN across GPU Microarchitectures", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "d9b6188d43078848214726e482fb72d0b0fcbd29": { "authors": [ { "ids": [ "2130920" ], "name": "Reena Panda" }, { "ids": [ "2286178" ], "name": "Xinnian Zheng" }, { "ids": [ "1703238" ], "name": "Lizy Kurian John" } ], "doi": "10.1109/ISPASS.2017.7975273", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975273", "entities": [ "Address space", "DSPACE", "Data access", "Digital footprint", "Dynamic random-access memory", "Locality of reference", "Memory hierarchy", "Prefetcher", "Principle of locality", "Software transactional memory", "Working set" ], "id": "d9b6188d43078848214726e482fb72d0b0fcbd29", "inCitations": [ "a2eb4235e025c969e9d3ad2f57eac914a98e459e", "dc4a0dae3b5bfcaa39f6cff3f6d2d07c0c2b2a49" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "87-96", "journalVolume": "", "outCitations": [ "fef5795f555a1200b522c5eb303c7ad7e9c2beeb", "02fe9b425b78a0211ccfaa2710f949fa2a769406", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "1c32ad0a42109fab826eb3054df7cfc33b424125", "bc2bba7e1bb4e7d8307aa36bdc5ee86cdd61cc58", "3b095e04f13a487c0b8679e64098d7929c1d7db7", "b77e7ae60aed8f307075c5a261274938da41e1e8", "12df05f3e38c615fd613e211abcd24da3b269124", "554b44bc1290f16fb1d3117de078ac832f341d4e", "2790284b6a16790d03b0cb5ed46bc6b0fecde1eb", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "0f963eef62ea6bbb538f60184aa24a7405a64ae9", "199932878100e8fa2234facce3d3a2b4b24da391", "f51026239f5786b31ee28dbb6dee4024a2f6dcd7", "11b0e5ee27e7fdf989632f157ff204fc8962918c", "db7fd20dc31565003fb992405ba83975d7f5f681", "8c6532bf7c1dc865ee8352c608f0a234e15a7d07", "dd9cb1f18de6ac02f9bdb1272ab8f2ba115d8011", "40c1e101b18ffd33fcdfead2759c4662c6e6585c", "59680d7d7feef0286605cb81d35bc8fa2292a608", "006d9d8e348f68d2e3353981c3a770385d71858a", "c27191534f96979fada3d7c9d42055d999cb69c8", "2af32811c6bf3be891ee84b19248540dfa1aa58f", "048336a0dc1029416ce47c78b9a5cba8422e6efd", "0653e2ed9f683868cb4539eb8718551242834f6b", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "214539c3dc1da3eefa9cdc65079b8dea72afb07c", "d167b5c8b21c642662000417f313798d375ff38e", "a9322c99478309a4384f4dd1de4d764e321951db", "32287c3c741e198cd6b95c31197d7f0fac235852" ], "paperAbstract": "With increasing memory footprints and working set sizes of emerging workloads, system designers need to evaluate new memory hierarchies with large last level caches (LLCs), DRAM caches, large DRAMs, etc. to optimize performance gains. This requires a deep understanding of the memory access behavior of the target workloads. It is important to have accurate mechanisms to generate address streams to study memory access behavior at and beyond LLCs. Prior memory trace generation proposals such as WEST and STM utilize LRU stack distance to capture temporal locality in the data access streams. In addition, STM also captures spatial locality information by modeling stride-based access patterns. However, a key drawback of prior models is that the metadata that they store to capture locality is significantly high. In this paper, we propose an efficient, light-weight methodology to generate accurate traces for modeling address Streams for LLC And Beyond (SLAB). SLAB leverages the key insight that memory access patterns can be efficiently characterized by combining locality and reuse statistics captured from both instruction and address streams. Compared to prior studies, which capture patterns solely based on data addresses, using the additional instruction stream localized information significantly reduces the space complexity. For programs where dominant instruction-localized patterns do not exist, SLAB exploits multi-granularity data reuse distances. We evaluate SLAB using SPEC CPU2006 and Cloudsuite benchmarks. With meta-data sizes of less than 7% of the original LLC traces, SLAB demonstrates over 91% accuracy in replicating original application behavior across ∼9000 different cache, prefetcher and memory configurations.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975273" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/d9b6188d43078848214726e482fb72d0b0fcbd29", "sources": [ "DBLP" ], "title": "Accurate address streams for LLC and beyond (SLAB): A methodology to enable system exploration", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "da02334ce53512f2f501bdca5ac25d65bd395974": { "authors": [ { "ids": [ "2038993" ], "name": "Zhigang Wang" }, { "ids": [ "39388942" ], "name": "Lixin Gao" }, { "ids": [ "40168035" ], "name": "Yu Gu" }, { "ids": [ "2917422" ], "name": "Yubin Bao" }, { "ids": [ "40024209" ], "name": "Ge Yu" } ], "doi": "10.1145/3127479.3128612", "doiUrl": "https://doi.org/10.1145/3127479.3128612", "entities": [ "Algorithm", "Computation", "Estimation theory", "Expectation\u2013maximization algorithm", "Parallel computing", "Speedup", "Synchronous optical networking", "Synthetic data" ], "id": "da02334ce53512f2f501bdca5ac25d65bd395974", "inCitations": [], "journalName": "", "journalPages": "1-14", "journalVolume": "", "outCitations": [ "7db9bac1d25b11c57449c35b0ef91fc18d29f036", "5ce105070c84399dc68649aaf19fe426b27f99e9", "d641ce8fe01ba5ae0ade43feaa1e1e2a7f4839b8", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "52aad68b6a150c5db537ef64c23e07d8abd58cc1", "2f7678f96837afbc1f58680ad844c35ffa52b0c1", "b293405e9b3cfac8c58083b38bdc85d18dd0c187", "0c1d559b1d48fb706f8b73d69e951273fc0ed93b", "c397d62eb85c07ad41c82ce558824267f792f85e", "2b3113b7fda6414548e88fc664f3be96d5209830", "a1784eb3707171958986e0eb3cb36fc561a371ea", "076a2e7be390ae5ad4e54e934a33f002be75f578", "d319ca65f99bfc5602b15eb8fe741d1e2e03f33a", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "631894dedf05309fde4fe443aed083803445538c", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "9241ea3d8cb85633d314ecb74b31567b8e73f6af", "017a7330fe2d2b5469fcc52ca933c41c2907ecca", "a2986864eb361e4e66bd4c10f8fd3bf129408147", "51694f797b0d4a7e03b1e9a6587a9d4976e92297", "0df99c05fc4b09ebc181733008afea68c9460564", "0122e063ca5f0f9fb9d144d44d41421503252010", "1156f60e40548096df49528b1342bb3e88b0f378", "2d2fbf2c8d194b2f077763055ed7b80471e65159", "6f5c1f3c7015c0e15b28c8a2d2b8178be287fa75", "4e08017cd0da3cfec22d97987e87c8e0c7f1f5a2", "0114d6ce120359304d80a82fd5085c5d985943cb", "0608d9937c074520cdc93cc444cc1c77039c5332", "043afbd936c95d0e33c4a391365893bd4102f1a7", "03fb875d5022a5e98f19c271e2403232acc55318", "be15a650ca6d26dd1403fa317dfdc2550d08afad", "ba240fac7a9af2e5e0bf8016d7e7fd039c5207f0", "4954fa180728932959997a4768411ff9136aac81", "9fc580c7a4b9ca2b0252e6f5718269e498e8f2f5", "31bd159b02068fbe1994b7a5e9d7b91adab0d142", "a718b85520bea702533ca9a5954c33576fd162b0" ], "paperAbstract": "Myriad of parameter estimation algorithms can be performed by an Expectation-Maximization (EM) approach. Traditional synchronous frameworks can parallelize these EM algorithms on the cloud to accelerate computation while guaranteeing the convergence. However, expensive synchronization costs pose great challenges for efficiency. Asynchronous solutions have been recently designed to bypass high-cost synchronous barriers but at expense of potentially losing convergence guarantee.\n This paper first proposes a flexible synchronous parallel framework (FSP) that provides the capability of synchronous EM algorithms implementations, as well as significantly reduces the barrier cost. Under FSP, every distributed worker can immediately suspend local computation when necessary, to quickly synchronize with each other. That maximizes the time fast workers spend doing useful work, instead of waiting for slow, straggling workers. We then formally prove the algorithm convergence. Further, we analyze how to automatically identify a proper barrier interval to strike a nice balance between reduced synchronization costs and the convergence speed. Empirical results demonstrate that on a broad spectrum of real-world and synthetic datasets, FSP achieves as much as 3x speedup over the up-to-date synchronous solution.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3128612" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/da02334ce53512f2f501bdca5ac25d65bd395974", "sources": [ "DBLP" ], "title": "FSP: towards flexible synchronous parallel framework for expectation-maximization based algorithms on cloud", "venue": "SoCC", "year": 2017 }, "da2446c42f601b49b8dbd689a4ee9af0a28b40bc": { "authors": [ { "ids": [ "2191690" ], "name": "Ran Ben-Basat" }, { "ids": [ "2651037" ], "name": "Gil Einziger" }, { "ids": [ "1739064" ], "name": "Roy Friedman" }, { "ids": [ "3142353" ], "name": "Marcelo Caggiani Luizelli" }, { "ids": [ "2995473" ], "name": "Erez Waisbard" } ], "doi": "10.1145/3098822.3098832", "doiUrl": "https://doi.org/10.1145/3098822.3098832", "entities": [ "Algorithm", "Anomaly detection", "Classless Inter-Domain Routing", "Denial-of-service attack", "IP address spoofing", "Open vSwitch", "Randomized algorithm", "Throughput", "Time complexity" ], "id": "da2446c42f601b49b8dbd689a4ee9af0a28b40bc", "inCitations": [ "60128847b73d1b369f5a9c7ef900c59227367c6f", "88b46e17199bfaa4cf65498bcaeced5284279b97", "78827809f6ca4ca901e1ec9d7ee27b837feea027" ], "journalName": "", "journalPages": "127-140", "journalVolume": "", "outCitations": [ "368ec7f67afe9d5b669566dfb4e2ca69d4d003d6", "174177d1631fa92a746d514ba0210382d231e583", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "5ecbf004192788346fec1cfeff7a318454dddd98", "3967126afbca6a722d7257cd671fe5e4979358a5", "3f98f1861fa3656a5258fe4bbee21bb559c1a6ec", "4465762fac009c8620e5d2ad67e8ffab4b7dc2f5", "16769cffa858486f7642dd0bd60c5a8eb8310574", "30837a30497431487cc43bbbde80ea8b9281177b", "ab521e91a2bce4fd13326cba9a765b479feefd61", "cc4d7f3b8a516c43e9433f346516196fba8d9f4a", "07595a3a571e09ccaa7727a4659efcb9d9a4f135", "55c5a2c1f2f6f66646baf64405464357fa7df8a0", "5d458a1bff91aa598fcc47711e5cfd7a6dfa559d", "80fe1abae2594a2cb5466d3646abbc57fb13d144", "159a7a03ccb0ef751db1870be1de4d26a02470f3", "0dd16e993f715a0c8b8d992d5c6ec1fd5d54eda0", "0da3e7bf3125534a7fe08c1d630b3cf32259c5fa", "5ee3dc5f9343e41d10a092522c05072fe61b2708", "3b988049dd8f62f772281e90196bbd793700c86b", "090fedae1eb71295d5505431f4a70485f20ef94f", "7a278ee0578f194700cadc3811cdda4ec751f88a", "2c6f4d21c8fe8efba8669ac09e7a1be675882f8f", "14b47e2b70ab9746c091e9c9eb1c7626d060f6c2", "40952ef7fe2d22daec75a6ab7e0fe030ce447e0a", "24763030fb1e9813dad51d28bea9c5d1414f9cda", "04c78219af35f705fd18f8aa29bf2639712abebd", "7b4e02b27607461b7658c289814d2efc9a3c7110", "00cf4b13a1bd202ccebe1e7bd0587f11e98ec3d6", "1e509cf3720a7494005871d6dbeccc79348c81c9", "211e4b486a85f5250ef68aef8a4422811ec7a932", "2083288ffd16aea2d34d47d555a286a78c41c01b", "718492ffcd94939bb092418ea126e942d22d8ecb", "025652412d507a8cf98ecacd8a44d32ce28995e1", "7a4f098647f6ac7fd31f337d8b47c9a264cc6d7b", "69199bdf78bc1490011891bae68ec37ca5b2a0e2", "59e742875c39d1e09cfe1be7501a4048efe343de" ], "paperAbstract": "Monitoring tasks, such as anomaly and DDoS detection, require identifying frequent flow aggregates based on common IP prefixes. These are known as hierarchical heavy hitters (HHH), where the hierarchy is determined based on the type of prefixes of interest in a given application. The per packet complexity of existing HHH algorithms is proportional to the size of the hierarchy, imposing significant overheads.\n In this paper, we propose a randomized constant time algorithm for HHH. We prove probabilistic precision bounds backed by an empirical evaluation. Using four real Internet packet traces, we demonstrate that our algorithm indeed obtains comparable accuracy and recall as previous works, while running up to 62 times faster. Finally, we extended Open vSwitch (OVS) with our algorithm and showed it is able to handle 13.8 million packets per second. In contrast, incorporating previous works in OVS only obtained 2.5 times lower throughput.", "pdfUrls": [ "http://doi.acm.org/10.1145/3098822.3098832", "http://arxiv.org/abs/1707.06778", "https://arxiv.org/pdf/1707.06778v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/da2446c42f601b49b8dbd689a4ee9af0a28b40bc", "sources": [ "DBLP" ], "title": "Constant Time Updates in Hierarchical Heavy Hitters", "venue": "SIGCOMM", "year": 2017 }, "da8798a9af2240cf43d078b0e4692db1acbb8c51": { "authors": [ { "ids": [ "1750001" ], "name": "Zengxiang Li" }, { "ids": [ "3047890" ], "name": "Bowen Zhang" }, { "ids": [ "2913476" ], "name": "Shen Ren" }, { "ids": [ "2189281" ], "name": "Yong Liu" }, { "ids": [ "1685620" ], "name": "Zheng Qin" }, { "ids": [ "1729436" ], "name": "Rick Siow Mong Goh" }, { "ids": [ "1711377" ], "name": "Gurusamy Mohan" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Black box", "Cost efficiency", "Data dependency", "Experiment", "Graph (abstract data type)", "Machine learning", "Run time (program lifecycle phase)", "Synthetic data", "Systems architecture", "Virtual machine" ], "id": "da8798a9af2240cf43d078b0e4692db1acbb8c51", "inCitations": [ "459e2d2df50421fd7647d860f1a3b7fa88b417dc" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "74-83", "journalVolume": "", "outCitations": [ "0ad8e89091eed09217e66adc98136126addc2619", "06db62e428605208ad6fbf96847f1af97c06a5ec", "e9490a92af65dbf97867cdfa422aa0c10267370b", "8aa09720221bdeef43e150fc7f6896f71600fb86", "8174aac0f597e4910cf31dc10ca0de4430a71c4a", "ef9d9821df55442f039b128bb5cef2b41ab2cadc", "2e7883aa71c1030032a5e414a66e72c10851ed82", "19d900eb912a00937326546387cd09de09e5ea45", "1156f60e40548096df49528b1342bb3e88b0f378", "17a2d0feb8754ef81a945b9f5046c68605f59560", "6536b5743e53c00bb1600f954959ae00dc24da98", "87ee99d4cc4e0601cbb519f6ddbac85772bdc49e", "916ddf79bfc624503032d2e9e9219816d909ef64", "df11fa641f7dde46e8e55e1ff1f95da9b0923816", "177c41a72cd1070c39366adb0a3bd88af010be81", "c0bbb56b4428e9a83d067c07054946293b475fe9", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "118def2d47645a8fb09e5432e0d4fb9c6578701c", "793bb9e73e6203c19b86b8b7a3e529ba602da0cd", "b78c04c7f29ddaeaeb208d4eae684ffccd71e04f", "31f27864950a6c417cf996927b2d5558f70d2b14", "36d15b7a20d82f82ae4bebf60579e9a96c1c659b", "48bea869164ce1539ebdb68cf3168c57b5397f9d", "1f719106e1d80d7cc7623504003c59129ffe8c5b", "461a0ccc317eca4e8eaba82a8c88063b7773ecb0", "3726c60552263e648c6856679e672de2e1c110e5", "17e1bb7fc17b45fe5ad8724a635d285ed000efa8", "4efdc5aec9f13318131bed195315d738f835f46c", "0c3b3d0a333cf7ddb1c4e26f71d6baf2e17bccc1", "070cfe6c83858b8b4ca4be0eb2cac3114d9daaae", "420a0e5fc398f197bca3dfe40291a82b2c65655a", "0ffb89f49a7152917ace399a6312c8ff6f4896b2", "a6a8313f30420c60e7eaa9f34ea5a41833695af1", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "Graph Processing has been widely used to capture complex data dependency and uncover relationship insights. Due to the ever-growing graph scale and algorithm complexity, distributed graph processing has become more and more popular. In this paper, we investigate how to balance performance and cost for large scale graph processing on configurable virtual machines (VMs). We analyze the system architecture and implementation details of a Pregel-like distributed graph processing framework and develop a system-aware model to predict the execution time. Consequently, cost effective execution scenarios are recommended by selecting a certain number of VMs with specified capability subject to the predefined resource price and user preference. Experiments using synthetic and real world graphs have verified that system-aware model can achieve much higher prediction accuracy than popular machine-learning models which treat graph processing framework as a black box. As a result, the recommended execution scenarios have comparable cost efficiency to the optimal scenarios.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101122" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/da8798a9af2240cf43d078b0e4692db1acbb8c51", "sources": [ "DBLP" ], "title": "Performance Modelling and Cost Effective Execution for Distributed Graph Processing on Configurable VMs", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "dae646a11a1132a87261c58a4b9fb0e6e51f9532": { "authors": [ { "ids": [ "2423039" ], "name": "Jinchun Kim" }, { "ids": [ "3381055" ], "name": "Elvira Teran" }, { "ids": [ "1756318" ], "name": "Paul Gratz" }, { "ids": [ "1755281" ], "name": "Daniel A. Jim\u00e9nez" }, { "ids": [ "2397358" ], "name": "Seth H. Pugsley" }, { "ids": [ "37811136" ], "name": "Chris Wilkerson" } ], "doi": "10.1145/3037697.3037701", "doiUrl": "https://doi.org/10.1145/3037697.3037701", "entities": [ "Algorithm", "Approximation theory", "CPU cache", "Compiler", "Holism", "Hysteresis", "Last mile", "Microprocessor", "Multi-core processor", "Optimizing compiler", "Prefetcher", "Program counter" ], "id": "dae646a11a1132a87261c58a4b9fb0e6e51f9532", "inCitations": [ "d1fb26e6fc2c71d984bb1213af1d5b2a57f04b6f", "27f5ed851a93ca903c3c2ce150683e213743bfe8", "f06233da50ed916579f5f536da5a66fd3c4c0ce8" ], "journalName": "", "journalPages": "737-749", "journalVolume": "", "outCitations": [ "2e2177e401a5a4b09e136469594e7966d2d3cc6b", "8ebd10979abc3a603b59fcc833b0eab38e67f3c4", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "25e0dcb0e7b3446fbf16c48e9a6a4ad36f645f3b", "300130e8d199184c8c7921f44f338fae47150152", "43c07ad30e0ce39430a9339672c1f25d35a0c924", "081dec43c2dbe76ff43c810594495f11ab092a10", "1d59804e5e433d654d9005f49a010cb508cd81b7", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "d51ad15fa164c1bf6b40bc183864667cc2cff7f9", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "5d999f4a5567e6f4a54e46bbcd6006f75ab0cbac", "7f5374e2e33b0d03264c0e4c12efe72293d412ff", "a776115d6567d38ed345c8c93fb23c7ff335cb1a", "77d4fb23ce0b5499016f2c162a5430d04f976542", "55043afbb87e38627778a323dfdc35a55357e47d", "09c5931307cba3f80d3ecc14d02eecfa46463cfe", "31b9bb18705dd4b4e85a17db4a186735a3969990", "36b0217d5f07cdd3d63c5656434c26bfc5b18fe7", "74bd8d777f1c461f7f206d1c103f99a6415c5b2b", "06125169a21ef17641d7199544417b21c378eede", "4408b7049f9241920ff8dcb5ad387e5358a75694", "335737f433c2425e3e9c463e89ca4c8de6bf4689", "6521c3812eb43ef082dda19cd5961a67ca608f33", "56470b57284ebc284bd20a681d2912f0dfe6bbe3", "082573e4dc88f38628242d193c966725ab355026", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "7bb72a9437a1ddb7e0eced6f243b8f9e66438f28", "565e452f12e2e81a92f0120ca903bef3d541dd22", "2af32811c6bf3be891ee84b19248540dfa1aa58f", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "0d8dfa6ec2fdbb432e1ce76a3e7e542336b270c2", "08237b5a7862d65185977e3dac0f81e616188add" ], "paperAbstract": "Data prefetching and cache replacement algorithms have been intensively studied in the design of high performance microprocessors. Typically, the data prefetcher operates in the private caches and does not interact with the replacement policy in the shared Last-Level Cache (LLC). Similarly, most replacement policies do not consider demand and prefetch requests as different types of requests. In particular, program counter (PC)-based replacement policies cannot learn from prefetch requests since the data prefetcher does not generate a PC value. PC-based policies can also be negatively affected by compiler optimizations. In this paper, we propose a holistic cache management technique called Kill-the-PC (KPC) that overcomes the weaknesses of traditional prefetching and replacement policy algorithms. KPC cache management has three novel contributions. First, a prefetcher which approximates the future use distance of prefetch requests based on its prediction confidence. Second, a simple replacement policy provides similar or better performance than current state-of-the-art PC-based prediction using global hysteresis. Third, KPC integrates prefetching and replacement policy into a whole system which is greater than the sum of its parts. Information from the prefetcher is used to improve the performance of the replacement policy and vice-versa. Finally, KPC removes the need to propagate the PC through entire on-chip cache hierarchy while providing a holistic cache management approach with better performance than state-of-the-art PC-, and non-PC-based schemes. Our evaluation shows that KPC provides 8% better performance than the best combination of existing prefetcher and replacement policy for multi-core workloads.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037701" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dae646a11a1132a87261c58a4b9fb0e6e51f9532", "sources": [ "DBLP" ], "title": "Kill the Program Counter: Reconstructing Program Behavior in the Processor Cache Hierarchy", "venue": "ASPLOS", "year": 2017 }, "dafaebf5b68fc1230ff5cd9d24f84f7104f46a81": { "authors": [ { "ids": [ "2997470" ], "name": "Shashank Gugnani" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Benchmark (computing)", "Big data", "Cloud computing", "Clustered file system", "Context switch", "Fastest", "IP address spoofing", "InfiniBand", "Network socket", "Object storage", "Proxy server", "Remote direct memory access", "Scalability", "Server (computing)", "Swift (programming language)", "Synthetic data", "Throughput" ], "id": "dafaebf5b68fc1230ff5cd9d24f84f7104f46a81", "inCitations": [ "61a1afa693442d829072114910b1775a8e4ceefa" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "238-247", "journalVolume": "", "outCitations": [ "06c6ee85e7d60cf64f687ba3fd0633ebaa28f645", "93008eb5924b63846bcb1c93a96d451068a2351c", "9583ab2af5f28da6fb08f79476d14214d20b9848", "306e2c025e9f5edfd5a5a0d0330d962de76c9b85", "073ea26402d1f96545edb60cec83fd5501a6f881", "3e200ebb970e92069ccc8302eb3af8492c291ebe", "62a7c092e607640273f69cae1372d0677bad2615", "074ff4d17b8ff9b6e60a3019c7cdc20af3fd8d56", "fd0830d57f02dff0cf17276d674e5d503d6ed22f", "4a57bbef15e2613160cdcd4a015be26ec2378f9a", "9af115f7b108c73a79ea144bc5f9539d444fe343", "6088230ede570bf552d16c8781ee7a9c77c6a1a1", "ee05b94d8af5113b3e4d51e957ca66fb7e6aea35", "47ef015ce9d5fb8c97d98f8a194695681eb34bf8", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "Running Big Data applications in the cloud has become extremely popular in recent times. To enable the storage of data for these applications, cloud-based distributed storage solutions are a must. OpenStack Swift is an object storage service which is widely used for such purposes. Swift is one of the main components of the OpenStack software package. Although Swift has become extremely popular in recent times, its proxy server based design limits the overall throughput and scalability of the cluster. Swift is based on the traditional TCP/IP sockets based communication which has known performance issues such as context-switch and buffer copies for each message transfer. Modern high-performance interconnects such as InfiniBand and RoCE offer advanced features such as RDMA and provide high bandwidth and low latency communication. In this paper, we propose two new designs to improve the performance and scalability of Swift. We propose changes to the Swift architecture and operation design. We propose high-performance implementations of network communication and I/O modules based on RDMA to provide the fastest possible object transfer. In addition, we use efficient hashing algorithms to accelerate object verification in Swift. Experimental evaluations with microbenchmarks, Swift stack benchmark (ssbench), and synthetic application workloads reveal up to 2x and 7.3x performance improvement with our two proposed designs for put and get operations. To the best of our knowledge, this is the first work towards accelerating OpenStack Swift with RDMA over high-performance interconnects in the literature.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101145" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dafaebf5b68fc1230ff5cd9d24f84f7104f46a81", "sources": [ "DBLP" ], "title": "Swift-X: Accelerating OpenStack Swift with RDMA for Building an Efficient HPC Cloud", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "db5aa66ec7e20068d4c5d26f6002838f9a49d349": { "authors": [ { "ids": [ "2909388" ], "name": "Mohammad Shahrad" }, { "ids": [ "1752172" ], "name": "David Wentzlaff" } ], "doi": "", "doiUrl": "", "entities": [ "Central processing unit", "Electronic waste", "Mobile device", "Mobile phone", "Moore's law", "Server (computing)", "Smartphone", "Total cost of ownership" ], "id": "db5aa66ec7e20068d4c5d26f6002838f9a49d349", "inCitations": [ "26b5be9d7327d095f5adff76134b0bbc914f56f2", "299f9809da6a41fbea396ab728b3302819c887e7" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "5016dedcbc51faec5f0aa0b5303a4e96c6e669de", "869b1062a3aa62292b7ed7a7718974233c105383", "08632fe2b934ed15d3499e7321282c81adc2c390", "36275d14731ab7ac192eb4af487f5d34958ad084", "50afa6f35a9be11a433c04876201f0fa209705b8", "073e26aa7192825a8d872fb0c6f25bc31aca77cf", "02b076a3f2bae519741124595244ad8a1b65cc4c", "7e4bf4bce26804987fabf9a8cca182b5dd550a7e", "0139dceb6cef21b234e454d53154f30391495862", "7996c2337e5a381b10991350cfcf34797575d41d", "93025ed9fa632e9e0fe15f5e3e9b7e2b7d04ffa1", "0f44833eb9047158221e7b3128cde1347b58ccd6", "21a0c328f428a1d4694246ed6c44ed472b74133a", "3376943e260945e7d5fb3a20841ab9559279d5f0", "0a613ed3377d76772dfeb3adc9280b91d3b87d75", "0d8524a1eca5e41ee755acd30a0c28a782d05331", "6330f075daf847554007b236b57293f8ccebca64", "4831b7d887d6c67074d9b3143b1e9b130175f163", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "0fa5a651cd4dd1f78546b2aa840b4b44aa807649", "2c9b77a063a3459ed8f3be0c0066724a38e225e5", "5e36511b8cab586d69047adfb03971380c17d427", "5f7264e24101ac4d42d2ef9cedd5eae8e7512eec", "2ea6e3243c9aa5d9910cf44c4f0e18002bf01638" ], "paperAbstract": "The performance of mobile phone processors has been steadily increasing, causing the performance gap between server and mobile processors to narrow with mobile processors sporting superior performance per unit energy. Fueled by the slowing of Moore\u2019s Law, the overall performance of single-chip mobile and server processors have likewise plateaued. These trends and the glut of used and partially broken smartphones which become environmental e-waste motivate creating cloud servers out of decommissioned mobile phones. This work proposes creating a compute dense server built out of used and partially broken smartphones (e.g. screen can be broken). This work evaluates the total cost of ownership (TCO) benefit of using servers based on decommissioned mobile devices and analyzes some of the architectural design trade-offs in creating such servers.", "pdfUrls": [ "https://www.usenix.org/conference/hotcloud17/program/presentation/shahrad", "https://www.usenix.org/system/files/conference/hotcloud17/hotcloud17-paper-shahrad.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/db5a/a66ec7e20068d4c5d26f6002838f9a49d349.pdf", "s2Url": "https://semanticscholar.org/paper/db5aa66ec7e20068d4c5d26f6002838f9a49d349", "sources": [ "DBLP" ], "title": "Towards Deploying Decommissioned Mobile Devices as Cheap Energy-Efficient Compute Nodes", "venue": "HotCloud", "year": 2017 }, "db6d960bcdbf6c32a075c3c554681dff77b6f5fe": { "authors": [ { "ids": [ "3101979" ], "name": "Guangxu Xun" }, { "ids": [ "38906826" ], "name": "Kishlay Jha" }, { "ids": [ "2741738" ], "name": "Vishrawas Gopalakrishnan" }, { "ids": [ "2694924" ], "name": "Yaliang Li" }, { "ids": [ "1769577" ], "name": "Aidong Zhang" } ], "doi": "10.1109/ICDM.2017.63", "doiUrl": "https://doi.org/10.1109/ICDM.2017.63", "entities": [ "Agent-based model", "Graph theory", "Information", "Machine learning", "Scalability", "Supervised learning", "Test set" ], "id": "db6d960bcdbf6c32a075c3c554681dff77b6f5fe", "inCitations": [ "1014d71f8f2225d2ff3529b86a7e2cfd6d84c0b1" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "535-544", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "45317a4a29a55d2ed55224cc0ad4be6d0b9e77c7", "e7e5046c4ef383978fb468dcbf8c9fe80a91f42e", "3a52e9be03bb24f4f93aeb227daa5ad9cc9b7a97", "ac0fc94c8ae1805f278e1b602df8c4c7c1caca7c", "b2288996988a2e2b11ea04586714d86d89794bbd", "0825788b9b5a18e3dfea5b0af123b5e939a4f564", "485a66cceeb43312a71f54c2a7f93372c9bdc83b", "0eef0dcf7daa1df2eddb4b6f1947827f5a289132", "3145ae24017a76dd4b3fb01db0533eae88efb6c4", "33a3c942836bdcf7b55e345212d3377610d32c33", "38610421806f734cadb30580b1f3ff11aa669200", "10eb7bfa7687f498268bdf74b2f60020a151bdc6", "aa9f2c7f18b5b0f8d6c276415d5d18046fd44233", "356e7c10550ccbd7e6debdb8cb649fbf00107b89", "751336e0804aca5e1e3b3b431cb2c8a1497f8c8d", "475f3bfbe3620f041051eda6cebdef5cf01ec41d", "886de60e309486784471411905739d9b21af1bf8", "4b5655e4f0a52cbeeb299a4db55b57c8f0f53387", "7b34d372724c3c0c249d06c56ffff8ba019e2c76", "175fc7cd36a126d1de55c092d391790896060952", "eae96f401dd4fc587172a98223baa795fb484e4c", "865e14930652d657973df45115236ed6c9cef107", "2b4cee34eb7463d21062eb92d783fc2be93c1cda", "0f0cab20db501181f24edc8469ac5f8bfdbedbdf", "3b06dcfd6e93a4d23d63e078e52296058dfe64fa", "244a152b82401d1619ef4ac88c51672b1662e3a2", "fdaaa24dbf66ab4cc702d216de4bb6767c305078", "6d6a2d71d0cc718d75e017d145668c2c199556ca", "1b179d6890c3055cea12bc65337dcebc3167d436", "87f8063b9b7b8e23ed12d27e7baa2b8fd47fac18", "2e2abcd603f247ee635713b1c0e0bcedd63a2e85" ], "paperAbstract": "Literature based discovery (LBD) is a task that aims to uncover hidden associations between non-interacting scientific concepts by rationally connecting independent nuggets of information. Broadly, prior approaches to LBD include use of: a) distributional statistics and explicit representation, b) graph-theoretic measures, and c) supervised machine learning methods to find associations. However, purely distributional approaches may not necessarily entail semantically meaningful association and graph-theoretic approaches suffer from scalability issues. While supervised machine learning based approaches have the potential to elucidate associations, the training data required is too expensive to generate. In this paper we propose a novel dynamic Medical Subject Heading (MeSH) embedding model which is able to model the evolutionary behavior of medical concepts to uncover latent associations between them. The proposed model allows us to learn the evolutionary trajectories of MeSH embeddings and detect informative terms. Hence, based on the dynamic MeSH embeddings, meaningful medical hypotheses can be efficiently generated. To evaluate the efficacy of the proposed model, we perform both qualitative and quantitative evaluation. The results demonstrate that leveraging the evolutionary features of MeSH concepts is an effective way for predicting novel associations.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.63" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/db6d960bcdbf6c32a075c3c554681dff77b6f5fe", "sources": [ "DBLP" ], "title": "Generating Medical Hypotheses Based on Evolutionary Medical Concepts", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "dba23d346783e3541d90787f54ec1e5a0cf8bcae": { "authors": [ { "ids": [ "28133689" ], "name": "Hussein Elnawawy" }, { "ids": [ "28074738" ], "name": "Mohammad Alshboul" }, { "ids": [ "1694458" ], "name": "James Tuck" }, { "ids": [ "1717365" ], "name": "Yan Solihin" } ], "doi": "10.1109/PACT.2017.58", "doiUrl": "https://doi.org/10.1109/PACT.2017.58", "entities": [ "Application checkpointing", "Computation", "Computer data storage", "Matrix multiplication", "Non-volatile memory", "Overhead (computing)", "Run time (program lifecycle phase)", "Volatile memory" ], "id": "dba23d346783e3541d90787f54ec1e5a0cf8bcae", "inCitations": [ "58b8e5105391f11c3c7afd5a087b207f3c83d861" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "318-329", "journalVolume": "", "outCitations": [ "f6715c2d9d8a76a20f4b857f7377ce63a23f0654", "885c666fbcfd1a10c613496d7a041d01b99c7a39", "29eba6139249e2557b2210f88e31c959605d5cb7", "05a1357946de5eca42a477b7b268db4944219a2e", "3da14037fc6e2c3dee2d6808bc2d7e933325d054", "56ad278ca41d14386d558f259f6a8b98ae6e86d1", "9858251a88afc29fa9fdb8234d998dcdf182f144", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "261b6bd9fe783e7f34e4a752ce92568d513761f2", "2e663c1047ff14ddc2416229459922757a20edfb", "fd840d5275cac98d64e7778a1b9173b937a77386", "081b6ef171a7c1711686d761293f40253f2b8389", "42c70d64890726f60556caf3eec3f06e85642dd9", "16653666b0005f91060a3e402566659749b84313", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "08b242bd5960034614cc66b786b1376d8763f390", "dc2e2b794a784782d7d9860f1358aa107f71c1bf", "aa44ae3febfcc18957024c26e7ff3177885c5e6f", "d76913152aeff892dbb028785f98ee8c84bfd8e3", "1f482f44497c17be0573d9dff14a30d87b0bf0ca", "94783d113951822195d4ba44599a8fcbdef9d4bf", "57c823b3b07b98233394bf15cfbbaed6a84809df", "39e3d058a5987cb643e000bce555676d71be1c80", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "5c0e8af36e20b8ea213561e8c3d706b4e2f2cc8d", "896f6698a74e656174045dc20840dd7e925f18bd", "10d8afea57c8f159c4eb2664a40c8fb859acefef", "f4dff66ba8f2338d118f379f2eff1410feb57ce6", "2dac7740f1fb1add2a77d119e29c3549d04e49f8", "249107c2b695dbb2c429f261359bca11beb754f8" ], "paperAbstract": "Future main memory will likely include Non-Volatile Memory. Non-Volatile Main Memory (NVMM) provides an opportunity to rethink checkpointing strategies for providing failure safety to applications. While there are many checkpointing and logging schemes in literature, their use must be revisited as they incur high execution time overheads as well as a large number of additional writes to NVMM, which may significantly impact write endurance.In this paper, we propose a novel recompute-based failure safety approach, and demonstrate its applicability to loop-based code. Rather than keeping a fully consistent logging state, we only log enough state to enable recomputation. Upon a failure, our approach recovers to a consistent state by determining which parts of the computation were not completed and recomputing them. Effectively, our approach removes the need to keep checkpoints or logs, thus reducing execution time overheads and improving NVMM write endurance, at the expense of more complex recovery. We compare our new approach against logging and checkpointing on five scientific workloads, including tiled matrix multiplication, on a computer system model that was built on gem5 and supports Intel PMEM instruction extensions. For tiled matrix multiplication, our recompute approach incurs an execution time overhead of only 5%, in contrast to 8% overhead with logging and 207% overhead with checkpointing. Furthermore, recompute only adds 7% additional NVMM writes, compared to 111% with logging and 330% with checkpointing.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.58" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dba23d346783e3541d90787f54ec1e5a0cf8bcae", "sources": [ "DBLP" ], "title": "Efficient Checkpointing of Loop-Based Codes for Non-volatile Main Memory", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "dbbd74cfd4f0b7522a838532a4596780aa1bb911": { "authors": [ { "ids": [ "2395105" ], "name": "Linda Leuschner" }, { "ids": [ "7873936" ], "name": "Martin K\u00fcttler" }, { "ids": [ "34590129" ], "name": "Tobias Stumpf" }, { "ids": [ "5403194" ], "name": "Christel Baier" }, { "ids": [ "1731688" ], "name": "Hermann H\u00e4rtig" }, { "ids": [ "1840345" ], "name": "Sascha Kl\u00fcppelholz" } ], "doi": "10.1145/3102980.3102999", "doiUrl": "https://doi.org/10.1145/3102980.3102999", "entities": [ "Formal methods", "Inter-process communication", "Lua", "Markov chain", "PRISM (surveillance program)", "Requirement", "Statistical model", "System configuration" ], "id": "dbbd74cfd4f0b7522a838532a4596780aa1bb911", "inCitations": [], "journalName": "", "journalPages": "111-117", "journalVolume": "", "outCitations": [ "431c500f160b290699d786c540f4cfd395e48b4b", "25eb43ff84def341dbc0332a5e11a5d12bdbe677", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4", "643434bb07c17d70b33fb4e96da1d7fc55e81b0b", "525f9aab0e46bfe4442e2b5f4d92f30f9f84bd53", "88e2257797e82639c57d5ab3aa09f5400db763cd", "047151f41a2ad2eaf41fadb16435fb3420d5bb9f", "02b6c3f35d4d06e1ba67ba9983e66e5fc0d97728", "f1269591359fddc20f95da10c7bd4c054080b447", "8d2902aca5dd6f3445384d0991240bdd7dd1126b", "45eb42725832f0c8c653fa644fa0b3dbb765888e", "501fef7b6a9b1aab25390e9609b268787b396994", "c7727b31990fc16000cffb85867b0411b1c7d092", "014c7570c6c30d605c2b71aba32eed5f98ab66a1", "2950fce01e2d09100b1ab14eba675690881020f5", "12b412c14084d2317f8e11d709ef6dfcd44f003c", "6f33efc19f44cfbd8c6c8172a644049ca8ca1588", "3493e0285fe329a710f54be2ef82350fdaafc991", "00eee29e698b420dc9f041c4fedba06ebc287af8", "8a5da3dd76e7621fb5a9e8e76a770d5970575cf9", "2040403199020da876aadd7d67d1b96293c3f39a", "4b24da49f292028da5430e7f655a196fac88a7bf", "043b307af412fc7f9005822e6dabbe4f9d983472" ], "paperAbstract": "The paper reports on first steps towards a systematic design process that ensures quantitative stochastic requirements like requirements on the expected energy consumption or resilience requirements by construction. The idea is to automatically extract a formal model from a configurable system and to use formal analysis techniques to automatically determine a configuration such that the system meets the quantitative requirements. As a proof of concept we present a tool that supports the automated synthesis of protocol parameters for IPC (interprocess communication). The tool takes as input a Lua script describing the communication structure of several processes. This script is annotated with quantitative information such as error probabilities and timing information. The output is a Markov chain specified in the input language of the prominent probabilistic model checker PRISM. This Markov chain yields the basis for quantitative formal analysis of failure scenarios caused by hardware faults in IPC channels. The results yield the basis for finding optimal values for protocol parameters that tune, e.g., the level of resiliency. As an initial demonstration of the tool, we analyze and adjust system parameters of a simple scenario with a few communicating processes and report on results. Though achieved under simplified assumptions, the results presented here are a proof-of-concept towards the vision of automated system configuration.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102999" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dbbd74cfd4f0b7522a838532a4596780aa1bb911", "sources": [ "DBLP" ], "title": "Towards Automated Configuration of Systems with Non-Functional Constraints", "venue": "HotOS", "year": 2017 }, "dbec01bd2c27d31366d931526ca660198fc04200": { "authors": [ { "ids": [ "2123621" ], "name": "Fabien Labernia" }, { "ids": [ "2486264" ], "name": "Bruno Zanuttini" }, { "ids": [ "1682611" ], "name": "Brice Mayag" }, { "ids": [ "1783082" ], "name": "Florian Yger" }, { "ids": [ "1773774" ], "name": "Jamal Atif" } ], "doi": "10.1109/ICDM.2017.34", "doiUrl": "https://doi.org/10.1109/ICDM.2017.34", "entities": [ "Algorithm", "Asymptotically optimal algorithm", "Directed acyclic graph", "Eisenstein's criterion", "Information theory", "Synthetic data" ], "id": "dbec01bd2c27d31366d931526ca660198fc04200", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "247-256", "journalVolume": "", "outCitations": [ "7b0ecba2e729a84e17c104264e64149d8074e88d", "0b1665cbe183af36069bc06a72571aea9151b1bc", "98199e61709b3d1a17d3912ca31234e0321a1115", "b9f23680363588bef73cba11bc663bfc460dd1a4", "8bbcded0c093f62bd2a54cad58bcc8114aed5aa6", "d921036a6cb7e340b019afa557a19bc65586a1ad", "3bcd315d30c1a0b156096413f0546521475800e1", "b6b33a5c4155b2eff881fce63b5dd34d1aa38b56", "4a932af1a32eb631b3bd82a3e95a138adb33a0ef", "49db45abcc2bdee90426c16e7c2c90384b6f8959", "77147f7b8dc2d725623c84ceb5912c0c985f2c61", "06db72ce2393316afd10b0401eae8a977052e145", "32253f9e37d5f2b6e9454809a83eee8f012d660d", "6ff505e63ffebf419736d6c65741ee63b3ea720e", "7f9f3e2010a99222885b628dfa279d2149cba4aa", "1bb07913e346f33f5e3af105e2184d314f1a6cc1", "1c296c1f3e93b1121dacd7efb6ed9616889af5c0", "078c83c5754f88988788c5411acb27b949054790", "5de4f5b9993efb14171dfc3b5724676325c8dd9f", "10f935dfe563adefb5aaa3f369772a2cd258efd1", "1b3c86ad6c149941750d97bd72b6b0122c1d8b5e" ], "paperAbstract": "We deal with online learning of acyclic Conditional Preference networks (CP-nets) from data streams, possibly corrupted with noise. We introduce a new, efficient algorithm relying on (i) information-theoretic measures defined over the induced preference rules, which allow us to deal with corrupted data in a principled way, and on (ii) the Hoeffding bound to define an asymptotically optimal decision criterion for selecting the best conditioned variable to update the learned network. This is the first algorithm dealing with online learning of CP-nets in the presence of noise. We provide a thorough theoretical analysis of the algorithm, and demonstrate its effectiveness through an empirical evaluation on synthetic and on real datasets.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.34", "http://www.lamsade.dauphine.fr/~flabernia/Recherche/PID5012735.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dbec01bd2c27d31366d931526ca660198fc04200", "sources": [ "DBLP" ], "title": "Online Learning of Acyclic Conditional Preference Networks from Noisy Data", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "dbf651f1b83e74dbe1faca0368c3af59a63131b4": { "authors": [ { "ids": [ "35710436" ], "name": "Rubens E. A. Moreira" }, { "ids": [ "2990557" ], "name": "Sylvain Collange" }, { "ids": [ "39907070" ], "name": "Fernando Magno Quint\u00e3o Pereira" } ], "doi": "10.1145/3018743.3018751", "doiUrl": "https://doi.org/10.1145/3018743.3018751", "entities": [ "Algorithm", "Automatic vectorization", "Bellman\u2013Ford algorithm", "CUDA", "Compiler", "Depth-first search", "Dynamic programming", "Graphics processing unit", "High- and low-level", "Kernel (operating system)", "Parallel computing", "SIMD", "String searching algorithm" ], "id": "dbf651f1b83e74dbe1faca0368c3af59a63131b4", "inCitations": [ "b4822885fa27c019f497c9cd4848da8333f17e78" ], "journalName": "", "journalPages": "313-326", "journalVolume": "", "outCitations": [ "08104146873817cc35cbd96d7ca3e5169cb72296", "14724c356106ae50746318b1bdd27d9b684c7d11", "387d5b24317395ae7a86c8ecc9403ac62ed6febe", "c06dbf2b7ff03f422b89dcd9c28a44c279099c8f", "217beeb53274ba6972d660afff1841e890f3721e", "7bcc53f1baf3358517a602d856192faea9442c91", "0560fc4924bbbe7e920122dc25c1ecfc3e59e374", "11df018ba5452e7806dd22b6746604ca4ca45f82", "876014931b26abf9b87a911d394d25beab674bbe", "039ad1ad259a9bd98e24b0738ba048282188d184", "4305276bdf8d68743e3148dafece6ab82d0e0e54", "ba0c70aecb17dec415e115a8b7cecb9f31876cd8", "a87a1fe8b8633780901e1f2f981b09f23cd1baf0", "802a003f669f16c451bbbf6e0a1f8d447cb23442", "1d1426d5317ef014eb9d2bfa363bb8087cb7e087", "1d809d4ea4f22d9e0df6ba1549d87d8aa45512af", "e2272d2af248ca32f755f0e21278f71f03efbe77", "0d6f11c5fccf2c2ed729602aceacf548fb0d5c08", "4e256f5f4e315bf9354c66f267b7b5ea3e9d8778", "9e24f9f3516b17912c4fcdac845ef589684a4f88", "44799559a1067e06b5a6bf052f8f10637707928f", "71affe0d9489be0ecba667f568b1a0bcd9ee3af3", "84ce362d28abd42e280a1460ad53afb32ee1cd9b", "d57b42821ec782b33dd49ee0c37976bbd62d24a4", "43b699d80b379efd292668c639a24bf391ba80bc", "45962ac13ad1b07a997b73d85ce970cb48b22def", "28552ecf4eaedb3461edca97304b29082b02fbab", "5448137a55cb3c6d7a22b91f5b0d9db60c96e33b", "347a08cd9ada1cee83713d24ec84ed49ab121987", "a06eb2e52176a5b6b941bb8544c544b64f527e32" ], "paperAbstract": "Programming languages such as C for CUDA, OpenCL or ISPC have contributed to increase the programmability of SIMD accelerators and graphics processing units. However, these languages still lack the flexibility offered by low-level SIMD programming on explicit vectors. To close this expressiveness gap while preserving performance, this paper introduces the notion of \\ourinvention{} (CREV). CREV allows changing the dimension of vectorization during the execution of a kernel, exposing it as a nested parallel kernel call. CREV affords programmability close to dynamic parallelism, a feature that allows the invocation of kernels from inside kernels, but at much lower cost. In this paper, we present a formal semantics of CREV, and an implementation of it on the ISPC compiler. We have used CREV to implement some classic algorithms, including string matching, depth first search and Bellman-Ford, with minimum effort. These algorithms, once compiled by ISPC to Intel-based vector instructions, are as fast as state-of-the-art implementations, yet much simpler. Thus, CREV gives developers the elegance of dynamic programming, and the performance of explicit SIMD programming.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018751" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dbf651f1b83e74dbe1faca0368c3af59a63131b4", "sources": [ "DBLP" ], "title": "Function Call Re-Vectorization", "venue": "PPOPP", "year": 2017 }, "dc02287acce63d8b22fb7df8676b415bf0f430ca": { "authors": [ { "ids": [ "2074724" ], "name": "Rafael Keller Tesser" }, { "ids": [ "3043413" ], "name": "Lucas Mello Schnorr" }, { "ids": [ "1998583" ], "name": "Arnaud Legrand" }, { "ids": [ "2909473" ], "name": "Fabrice Dupros" }, { "ids": [ "1728532" ], "name": "Philippe Olivier Alexandre Navaux" } ], "doi": "10.1007/978-3-319-64203-1_14", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_14", "entities": [ "Load balancing (computing)", "Simulation" ], "id": "dc02287acce63d8b22fb7df8676b415bf0f430ca", "inCitations": [ "2436ae644d71c3a86ed0053fcda0a7e5957a6ac8", "fd840a2cb6fb6918689c8374c7316dbb23847c89" ], "journalName": "", "journalPages": "192-205", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dc02287acce63d8b22fb7df8676b415bf0f430ca", "sources": [ "DBLP" ], "title": "Using Simulation to Evaluate and Tune the Performance of Dynamic Load Balancing of an Over-Decomposed Geophysics Application", "venue": "Euro-Par", "year": 2017 }, "dc4a0dae3b5bfcaa39f6cff3f6d2d07c0c2b2a49": { "authors": [ { "ids": [ "2751331" ], "name": "Yipeng Wang" }, { "ids": [ "39326009" ], "name": "Amro Awad" }, { "ids": [ "1717365" ], "name": "Yan Solihin" } ], "doi": "10.1109/ISPASS.2017.7975274", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975274", "entities": [ "Benchmark (computing)", "Information management", "LINC", "Locality of reference", "Memory footprint", "Morphing", "Optimal design", "Principle of locality", "Simulation", "Sparse matrix", "Synthetic data" ], "id": "dc4a0dae3b5bfcaa39f6cff3f6d2d07c0c2b2a49", "inCitations": [], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "97-108", "journalVolume": "", "outCitations": [ "0259f3abf9e78d0595c7105a7ad3792da08422c7", "3758fa27f18f5ba11c2f02a71a224732d3b4f33b", "164ac9e513e0b0538f53dce51850ceb5532ff521", "f04090e6164780c2c249ae0439a69fd77d715c31", "40f85cbe67ce1ce89009985e9caed648dd08c12e", "12df05f3e38c615fd613e211abcd24da3b269124", "6760a91356a65c6ca9e442180390b3a6c1ed2a94", "07230d0d609fcaa76ccb4dc7e23794d69091b763", "3321c2459ca9388d73980ef92add5e1e6c0dc610", "3b419c4f82ae366bf7425af37667012e14a93b1f", "fef5795f555a1200b522c5eb303c7ad7e9c2beeb", "d9b6188d43078848214726e482fb72d0b0fcbd29", "c7e64ac67a5d8140c53bfe4840263d4231646a98", "a7f3103822beb0d6df835778f77bd7d429560f88", "8c6532bf7c1dc865ee8352c608f0a234e15a7d07", "db7fd20dc31565003fb992405ba83975d7f5f681", "199932878100e8fa2234facce3d3a2b4b24da391", "4732e7c17ce776be64323ca35f97dfa0507f9fa8", "103d493e5c7bd28b5a8cf604f62877a96ff25bfb", "512bdafa2df31d6ca1a29b120af0f1cfc454d14d", "40c1e101b18ffd33fcdfead2759c4662c6e6585c", "dd9cb1f18de6ac02f9bdb1272ab8f2ba115d8011", "554b44bc1290f16fb1d3117de078ac832f341d4e", "b77e7ae60aed8f307075c5a261274938da41e1e8", "43bb9c9d584e2ea8f0a3090b16383bce1cb34586", "3ba0dfabf7753f69555ae054d100c5b94fd7f218", "0653e2ed9f683868cb4539eb8718551242834f6b", "1650bdd28e7e56a4e556e60c7f0b7731f71338ba" ], "paperAbstract": "Computer system designers need a deep understanding of end users' workload in order to arrive at an optimum design. However, current design practices suffer from two problems: time mismatch where designers rely on workloads available today to design systems that will be produced years into the future to run future workloads, and sparse behavior where many performance behavior is not represented by the limited set of applications available today. We propose clone morphing, a systematic method for producing new synthetic workloads (morphs) with performance behavior that does not currently exist. The morphs are generated automatically without knowing or changing the original application's source code. There are three different aspects a morph can differ from the original benchmark it is built on: temporal locality, spatial locality, and memory footprint. We showed how each of these aspects can be varied largely independently of other aspects. Furthermore, we also presented a method for merging two different applications into one that has an average behavior of both applications. We evaluated the morphs by running them on simulators and collect statistics that capture their behavior, and validated that morphs can be used for projecting future workloads and for generating new behavior that fills up the behavior map densely.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975274" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dc4a0dae3b5bfcaa39f6cff3f6d2d07c0c2b2a49", "sources": [ "DBLP" ], "title": "Clone morphing: Creating new workload behavior from existing applications", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "dc588f31472281e86d86057311702517d4fb5493": { "authors": [ { "ids": [ "2960562" ], "name": "Bin Nie" }, { "ids": [ "38838946" ], "name": "Ji Xue" }, { "ids": [ "3134457" ], "name": "Saurabh Gupta" }, { "ids": [ "1686571" ], "name": "Christian Engelmann" }, { "ids": [ "1730525" ], "name": "Evgenia Smirni" }, { "ids": [ "34966505" ], "name": "Devesh Tiwari" } ], "doi": "10.1109/MASCOTS.2017.12", "doiUrl": "https://doi.org/10.1109/MASCOTS.2017.12", "entities": [ "Artificial neural network", "Central processing unit", "Computer cooling", "Data center", "Graphics processing unit", "Simulation", "Soft error", "Supercomputer" ], "id": "dc588f31472281e86d86057311702517d4fb5493", "inCitations": [], "journalName": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "journalPages": "22-31", "journalVolume": "", "outCitations": [ "145c1f20e612918cc7161603f94677994a95bd80", "2398278a25035cfeefa3dd4aba91b16d48f540ba", "bfcf14ae04a9a326f9263dcdd30e475334a96d39", "752db3b0507208847a4335b2fbbb3a0435235b2e", "33f73088b95e3d6eed31e8ea9048b1a373f0bd75", "e2f15cf76eed3c53cbcf21d3383085494bb6a89a", "82ce7e0db4aaf8c56a0c4c9b7ca5a6985b17601b", "517c5cd1dbafb3cfa0eea4fc78d0b5cd085209b2", "e689f0e0114eaec876e0bde1ffcbd87362239393", "d44b6c8d18bec70f12677ba8050d755aa0b0dab4", "28540222f0ed31ae930dc329e29eb17d280663f2", "8a7536f311d22bd588c5bc2306d54d13effaee82", "a9a0bc5f4aba6ca46023f0bf0b1a4c3066dc8c12", "1f6451ac0572ce0a6a383a25413dadd52cf770ca", "01a136d82c63f6e8eec7fe5cffc27e91ca5b1f84", "d777b4177034dec34616bc42293978af995b84a1", "b9217e04b6b2b6d8ba486b5e4305bc90513ea865", "3b2c8bb9471bd52a40b72a61bfede076f4d414b5", "c509cdb9c228818eaf40e52b3542b79512b48911", "fb47f358b9c002694b5645cdc9124e6c691e023c", "710b3d324b07197a705683af18fc417ef712d042", "801dbcd2cfb996ffc55692da8bab92ce7c22128b", "01b5c01835a57f63c250b4eed923b7f736707624", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "452e6c05d46e061290fefff8b46d0ff161998677", "6dd96add9e86201903e2d4febce8caee4c628f6c", "7f6c49645686f4814c01aca621341a0b244898b6", "0eff9e791ae39c367f5b468b32f0ec3bf58fd24c", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4", "2a68bfb2aef3af04489070e8303df5001508fbd8", "239e046347d5075b3eeef5439050e9f2ca760b7b", "09b4b71049005a3be96722b2cae9418d9c0f2eee", "7b1d8110de1863a8758b66efc9b0d7421f4893c3" ], "paperAbstract": "GPUs have become part of the mainstream high performance computing facilities that increasingly require more computational power to simulate physical phenomena quickly and accurately. However, GPU nodes also consume significantly more power than traditional CPU nodes, and high power consumption introduces new system operation challenges, including increased temperature, power/cooling cost, and lower system reliability. This paper explores how power consumption and temperature characteristics affect reliability, provides insights into what are the implications of such understanding, and how to exploit these insights toward predicting GPU errors using neural networks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/MASCOTS.2017.12", "http://www.cs.wm.edu/~bnie/publications/2017MASCOTS.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dc588f31472281e86d86057311702517d4fb5493", "sources": [ "DBLP" ], "title": "Characterizing Temperature, Power, and Soft-Error Behaviors in Data Center Systems: Insights, Challenges, and Opportunities", "venue": "2017 IEEE 25th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS)", "year": 2017 }, "dc592f0e3d3a4b3c39e3a11588edffee76b974db": { "authors": [ { "ids": [ "2028147" ], "name": "Petra Berenbrink" }, { "ids": [ "1702236" ], "name": "Andrea E. F. Clementi" }, { "ids": [ "1751058" ], "name": "Robert Els\u00e4sser" }, { "ids": [ "1704036" ], "name": "Peter Kling" }, { "ids": [ "2385357" ], "name": "Frederik Mallmann-Trenn" }, { "ids": [ "1693545" ], "name": "Emanuele Natale" } ], "doi": "10.1145/3087801.3087817", "doiUrl": "https://doi.org/10.1145/3087801.3087817", "entities": [ "Color", "Consensus (computer science)", "With high probability" ], "id": "dc592f0e3d3a4b3c39e3a11588edffee76b974db", "inCitations": [ "1275bf8f185b7abf328efd957cf77d07bbb2536e" ], "journalName": "", "journalPages": "335-344", "journalVolume": "", "outCitations": [ "18be9f461ab087c2cdb546a62d09d93ebf0a2961", "1253e6ec21897674442808c62bc892ec850df34c", "8251f323f1c90e2428b32c397761249f78367392", "5df0fddaf332602853ff9944f069ea49355b5b01", "716a235c0095dccd5ab72f6edb65c80ac131f70f", "304a2c940e6f16acfa7e9cbe5591b1e39f76728b", "807c379142004805db66fe9130a7490ff05f3157", "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "07a152ad1c17b35396d8b372cbde16e89705c7ec", "893bb2d6a150ef04c06b143ab1854af5b863a730", "b65e7a34b0bab3f293eb26985a87d81bbced3311", "10b44b914a35142eb7c1cff7a33e5527715561ee", "1b0d61aca9971768557958f083d4d741ae22414d", "83373b291736be8280caef7b5fa8ae07b6affa44", "a666369d920ccdd1d66095d5263f22b788d26ed8", "211a6f0452fa889f90b87f256d2c720e02df10e7", "8902cafc48a55f3a8da2c5494be33212e5f16d92", "0c9215e0a103dc78dd8d14337c7caf0ed6a1b395", "32928ac8acd055a8f0c49fd5e5152104123a2730", "46d2a0131339d8cabb7fc431b8717fc6dd15d672", "b06f887b880c45e9fd2cfd85a3f28cff7013f0cd", "3045ab5550d9a5d1cd30f37a0547b956f570f14c", "fa7e9af14a46e07db867d9d01cd885e02a06fd62", "9012387d96865fb6c39f8a716ac7dadd928abe79", "31eb28bf93e7e85c862475095b97f11a3f30741b", "b23a15b89cf561d8fa6110dc0339424e407bc443" ], "paperAbstract": "We study consensus processes on the complete graph of n nodes. Initially, each node supports one from opinion from a set of up to n different opinions. Nodes randomly and in parallel sample the opinions of constant many nodes. Based on these samples, they use an update rule to change their own opinion. The goal is to reach consensus, a configuration where all nodes support the same opinion. We compare two well-known update rules: 2-Choices and 3-Majority. In the former, each node samples two nodes and adopts their opinion if they agree. In the latter, each node samples three nodes: If an opinion is supported by at least two samples the node adopts it, otherwise it randomly adopts one of the sampled opinions. Known results for these update rules focus on initial configurations with a limited number of colors (say n 1 3 ), or typically assume a bias, where one opinion has a much larger support than any other. For such biased configurations, the time to reach consensus is roughly the same for 2-Choices and 3-Majority. Interestingly, we prove that this is no longer true for configurations with a large number of initial colors. In particular, we show that 3-Majority reaches consensus with high probability in O ( n \u00b7 log n ) rounds, while 2-Choices can need \u03a9(n/ log n) rounds. We thus get the first unconditional sublinear bound for 3-Majority and the first result separating the consensus time of these processes. Along the way, we develop a framework that allows a fine-grained comparison between consensus processes from a specific class. We believe that this framework might help to classify the performance of more consensus processes.", "pdfUrls": [ "https://arxiv.org/pdf/1702.04921v1.pdf", "http://arxiv.org/abs/1702.04921", "http://doi.acm.org/10.1145/3087801.3087817" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/dc59/2f0e3d3a4b3c39e3a11588edffee76b974db.pdf", "s2Url": "https://semanticscholar.org/paper/dc592f0e3d3a4b3c39e3a11588edffee76b974db", "sources": [ "DBLP" ], "title": "Ignore or Comply?: On Breaking Symmetry in Consensus", "venue": "PODC", "year": 2017 }, "dc613690d3704d374f6147d3eb1011a64b7f3e12": { "authors": [ { "ids": [ "3101741" ], "name": "Jongse Park" }, { "ids": [ "32646274" ], "name": "Hardik Sharma" }, { "ids": [ "33278013" ], "name": "Divya Mahajan" }, { "ids": [ "11165396" ], "name": "Joon Kyung Kim" }, { "ids": [ "5708196" ], "name": "Preston Olds" }, { "ids": [ "1696563" ], "name": "Hadi Esmaeilzadeh" } ], "doi": "10.1145/3123939.3123979", "doiUrl": "https://doi.org/10.1145/3123939.3123979", "entities": [ "Algorithm", "Application-specific integrated circuit", "Broadcast automation", "Compiler", "Computer program", "Digital subscriber line", "Domain-specific language", "Field-programmable gate array", "Gradient", "Gradient descent", "High- and low-level", "Machine learning", "Parallel computing", "Processor design", "Programmer", "Scalability", "Software development", "Source lines of code", "Speedup", "Thread (computing)" ], "id": "dc613690d3704d374f6147d3eb1011a64b7f3e12", "inCitations": [ "2512a6ced085503c399ee512ecaeb88606081261", "381b008b49d04c1bd5ff00649521fa028b9d3ea8", "e9908fbf95834a81b99473277b9298cc54a5c471" ], "journalName": "", "journalPages": "367-381", "journalVolume": "", "outCitations": [ "9ad2617df15aa7cd28bdff2cab35aa2b4b580e5d", "fd12d8d785de4fb1b0d6704a52161f4fa3c34088", "406a93739fb99f806bdbf4612fb4af2aa7537581", "104d31cda9a1658a0bb693e17d935c78e158c89b", "0c7465f733161ed2c9818da22d77c2cb518f8f58", "462d2583ebe7cdc6d2ea5fea714bac563f234140", "5c3785bc4dc07d7e77deef7e90973bdeeea760a5", "3c15c84746f4473935e4c1951294452c6a52704e", "68837728232463651283edbb7ef0c93b2f502b2b", "399acab2bee6eccbfffe4a2ce688b6b1075e9c5e", "0144941d255dad89d3d90c2d131a15cc01df9829", "269c24a4aad9be622b609a0860f5df80688c2f93", "3857cd2ee9a3f433744eac366872acf16b445b2c", "46060a2decf78b81100d94c8e53635f201c715a3", "5a1913f293facdb8c6c162d5763b8b7743c1794d", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "6e7c6d76b0eeead22851339f1e52846b8ae72674", "09f72f02083830c1881b86e6016e1fe3fe41f65f", "f288e9394261e542867b3a0eecdea706bbd7da9f", "a97c6af6016bb775cfe06b99df5b52bab847e72e", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "da3d0e745fe6e616601d779b8dbf36616c6ec32c", "9e5ab6456ce2bd6be5f63c757134b8b3720d1785", "1087108b598bb447654144849673b750b237c73f", "48b8dcc7f172b94df461f00ae8135bde17854fd2", "20b5fd451d5b32e895e52be224540a0e666f1f2e", "8d67bac352dcd43c9c08f917ba8c4bebb444b55d", "a058935fd019c2367fd32c16cd1ce6983a29aafb", "2bb28f6105ca30b2bc1ba91578234ecb12e788a6", "26f1b2bf3f13707e6be671a10c5a1f057bce2515", "29c7d959f322f7006186e3c344ddb9e41e3a8d1f", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "30ec6ebe977de36c2848da0f6e191d4fb18ccb69", "06ce77e4abea63948580340be25d7f2a80369e5a", "c430950a4d363928bbbeca5ada01f29d8407e073", "49b4094f2c313a92da4461572c0bef80b0d7d649", "be715602cca477cd4b5f08260dca6a759d213e62", "006662a19c6383e8ee15616c90be206cd08867f0", "211a125c77da70a958d1dc9f70ecc29b9a69f796", "3c029e72f5c75c8dd87a6acd43d05f23407e39cf", "8866824806018b89fe373c01474daa3744c9db7d", "48215210c7adbe39063af6b46ac116b0aa22a2b0", "14a37c208a4b35d39496867abb2a6be40808e04a", "596df6fb4d50c7886948b08f525c4e3393d05a44", "6e7bf0655ea96f00c5759901406480fe941b6f7f", "15b275f0421c606f5903532e9964b140cbb2f878", "6d1ed933b64fea2fa710ae7d6298dd8310f801ce", "7c2ce21f7541af90add6ec99164d077a8d4d284f", "44c0792bf8dbc76f8e567e284c7c6a5f26d6a97f", "b6f304c8d51df47e10f4151924e7007956eaed00", "56828bb7ad555eed8d43e6d3eba4ee39e862defe", "74624d8d89d192e89640ead9371e3a1766fd4ffe", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "58462eed4e22d34d35d31be2b902b4eb18a231e0", "3daf7f079546131b9616123e0e013e47344ec2e7", "1ee339ce398778d4fcd68b6346ff57b605329eca", "6f4e372cdce030f95ec8208582ea04ed41496672", "4b197d60de05e14781d67a318b29a4d4600a7460", "2ffc74bec88d8762a613256589891ff323123e99", "ad9e0505bce49b07c9f135bb071fb4285f6f6986", "17f5bbe172a783b79026ae43d2635c6817a5ba8b", "bb0a9c9a4936298a7852f0411b7684789e37c282", "64bad9c3e4d8ed38c16b0086da865ccd574e836a", "20941b4ca2fb99512bdceeec3f038921b2dff293", "b970c9d53c699a8e09f1d8dbe440b6f309712a89", "2a19d31a818be868bf39dcf258b36a86ef2ece3f", "5bfecd14937da569eabec0afea710db846d3899b", "0541d5338adc48276b3b8cd3a141d799e2d40150" ], "paperAbstract": "The growing scale and complexity of Machine Learning (ML) algorithms has resulted in prevalent use of distributed general-purpose systems. In a rather disjoint effort, the community is focusing mostly on high performance single-node accelerators for learning. This work bridges these two paradigms and offers CoSMIC, a full computing stack constituting language, compiler, system software, template architecture, and circuit generators, that enable programmable acceleration of learning at scale. CoSMIC enables programmers to exploit scale-out acceleration using FPGAs and Programmable ASICs (P-ASICs) from a high-level and mathematical Domain-Specific Language (DSL). Nonetheless, CoSMIC does not require programmers to delve into the onerous task of system software development or hardware design. CoSMIC achieves three conflicting objectives of efficiency, automation, and programmability, by integrating a novel multi-threaded template accelerator architecture and a cohesive stack that generates the hardware and software code from its high-level DSL. CoSMIC can accelerate a wide range of learning algorithms that are most commonly trained using parallel variants of gradient descent. The key is to distribute partial gradient calculations of the learning algorithms across the accelerator-augmented nodes of the scale-out system. Additionally, CoSMIC leverages the parallelizability of the algorithms to offer multi-threaded acceleration within each node. Multi-threading allows CoSMIC to efficiently exploit the numerous resources that are becoming available on modern FPGAs/P-ASICs by striking a balance between multi-threaded parallelism and single-threaded performance. CoSMIC takes advantage of algorithmic properties of ML to offer a specialized system software that optimizes task allocation, role-assignment, thread management, and internode communication. We evaluate the versatility and efficiency of CoSMIC for 10 different machine learning applications from various domains. On average, a 16-node CoSMIC with UltraScale+ FPGAs offers 18.8× speedup over a 16-node Spark system with Xeon processors while the programmer only writes 22--55 lines of code. CoSMIC offers higher scalability compared to the state-of-the-art Spark; scaling from 4 to 16 nodes with CoSMIC yields 2.7× improvements whereas Spark offers 1.8×. These results confirm that the full-stack approach of CoSMIC takes an effective and vital step towards enabling scale-out acceleration for machine learning.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123979" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dc613690d3704d374f6147d3eb1011a64b7f3e12", "sources": [ "DBLP" ], "title": "Scale-out acceleration for machine learning", "venue": "MICRO", "year": 2017 }, "dc9c3a7d409bfa7af5de09c6afab3a664008b1e5": { "authors": [ { "ids": [ "9528094" ], "name": "Mohammad Alian" }, { "ids": [ "10796153" ], "name": "Ahmed H. M. O. Abulila" }, { "ids": [ "10757538" ], "name": "Lokesh Jindal" }, { "ids": [ "1862763" ], "name": "Daehoon Kim" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" } ], "doi": "10.1109/HPCA.2017.57", "doiUrl": "https://doi.org/10.1109/HPCA.2017.57", "entities": [ "Advanced Configuration and Power Interface", "Baseline (configuration management)", "Central processing unit", "Client\u2013server model", "Data-intensive computing", "Network interface", "Network interface controller", "Network packet", "Online and offline", "Power management", "Response time (technology)", "Server (computing)", "Service-level agreement" ], "id": "dc9c3a7d409bfa7af5de09c6afab3a664008b1e5", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "25-36", "journalVolume": "", "outCitations": [ "40a01f30a6bc67175a0974617e7a42afd7ca8f39", "afe58c76edcba40d644c31d0d93da2bf167bb4a0", "7317dcd52ea4dee743ca377dc0497397b0df2d15", "15e36995ca63b271e28b8bb9c39604568d03dd49", "6994d3b13e540784cfeca050b9713478a55aa864", "0831a5baf38c9b3d43c755319a602b15fc01c52d", "08632fe2b934ed15d3499e7321282c81adc2c390", "76129c74a6a2223169d0525ca7857547eada36c8", "352a8957005dc5519b15ed1870751ec494d66395", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "0d683085618e654a173b3590c4d2b431569cbfb6", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "18e9a7eea9c714c24152b9c6dd5cd12fb2c4b495", "0623414994c29a74c06eeea0a145e9d2e72e987a", "27f8ac77b89986f7a24f929b200b6a358b8f7d01", "cae29bb8d2cab76148568ad659ba9d4f34c91014", "d01a01e0ff9f730517231f9d2aad201e14080795", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "138856ad6b8b4cca92965aacb20961aaa4e34a92", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "64845a653e4762461a29be0cae3c0c10a01b1d14", "8b10b13fb495101d1e4eb768907cff05e3bd9315" ], "paperAbstract": "The rate of network packets encapsulating requests from clients can significantly affect the utilization, and thus performance and sleep states of processors in servers deploying a power management policy. To improve energy efficiency, servers may adopt an aggressive power management policy that frequently transitions a processor to a low-performance or sleep state at a low utilization. However, such servers may not respond to a sudden increase in the rate of requests from clients early enough due to a considerable performance penalty of transitioning a processor from a sleep or low-performance state to a high-performance state. This in turn entails violations of a service level agreement (SLA), discourages server operators from deploying an aggressive power management policy, and thus wastes energy during low-utilization periods. For both fast response time and high energy-efficiency, we propose NCAP, Network-driven, packet Context-Aware Power management for client-server architecture. NCAP enhances a network interface card (NIC) and its driver such that it can examine received and transmitted network packets, determine the rate of network packets containing latency-critical requests, and proactively transition a processor to an appropriate performance or sleep state. To demonstrate the efficacy, we evaluate on-line data-intensive (OLDI) applications and show that a server deploying NCAP consumes 37~61% lower processor energy than a baseline server while satisfying a given SLA at various load levels.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.57" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dc9c3a7d409bfa7af5de09c6afab3a664008b1e5", "sources": [ "DBLP" ], "title": "NCAP: Network-Driven, Packet Context-Aware Power Management for Client-Server Architecture", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "dcbb7001fd6f8eab583424029e91062d1d6b6521": { "authors": [ { "ids": [ "2694667" ], "name": "Hafiz Fahad Sheikh" }, { "ids": [ "1685061" ], "name": "Ishfaq Ahmad" }, { "ids": [ "40791418" ], "name": "Sheheryar Arshad" }, { "ids": [ "1917528" ], "name": "Alex Aved" } ], "doi": "10.1109/IGCC.2017.8323586", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323586", "entities": [ "Algorithm", "Allocation", "Categorization", "Conflict (Psychology)", "Dynamic frequency scaling", "Dynamic voltage scaling", "Frequency scaling", "Heuristic", "Heuristics", "Integer programming", "Linear programming", "Pareto efficiency", "Performance Evaluation", "Polyethylene terephthalate", "Program optimization", "Radiotherapy Systems, Linear Accelerator", "Scheduling (computing)", "Solutions", "algorithm" ], "id": "dcbb7001fd6f8eab583424029e91062d1d6b6521", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "64ff16765c61fd2a8b5b01462fe7f0175847f355", "20070cc8f17d24a5c2b49c5b6eb43d39ff078e94", "35a1ae598c53785ec3957e368040563ee366ecbe", "e41add4e5c8cbabb11498507c7048bf60809f0ab", "3380d106e342348aae7500081da82fd415d06c1e", "01bc34c45f1c368b5c09ce51cfa218993761c168", "a002acdce7eacd1daab28867f41dcc7488ebdc77", "46123f4152499402987489a5e1a54ab5a5e55a84", "30403fbaf24c83795bb957591641f9a8940fc3af", "2612cdd58d0e148b5287818ba9f6d6fc973b11d9", "7e27803d1f657cf024fddebc284d08d4d20920d1", "5fb59094a502a6fe29fde9f4237193a1cc0258d6", "f487ecfd2fe2c0872657310cebbb1a4c50fb6c9d", "5970ed52ee13472729ee7403085274554b5cba2e", "5aaf038072a806aa0980cd91c333543571531c42", "2837d07d403e170e1daab46ea699f074764464d6", "b3de6b962a8a8cf52244722ec40fde7e48d43ac3", "3b81e4f528c585bc0e7decc856e174979690c955", "1d403b5a5bc121abb3c09d21bf56a6592df89319", "d4ed75af0af2b05fab995ff8a0c37041014acd5e", "7a5db7b2d050efc1ee9fa11b3bdad7340887ea3c", "07e6ca15ba66473976f6b5d462a235c92cfc82d6", "7d32378c8726aca8ea73b17b2deb5add9cea7f4a", "61551c028921adb1e77ddfce0b54d4a841417d54", "bf43f6ae0f4a2da331c97b79bfd634f838cf3835", "2e151b36863baaca9e9c274973842f9ce428badf", "81bff44cf5fea59db18751e1659062b513e86523", "022704eaa6f52d090bcedb58e94950e1ae5a6cb5", "1d2764fc4147834a7185f1695b971e2343dfaa6b", "8a426f487111878782c5e31b12b50601fde58989", "ac37f867cf330149bbf0866710c572eb9c941c8f" ], "paperAbstract": "The task-to-core scheduling problem using Dynamic Voltage and Frequency Scaling (DVFS) for achieving three objectives of performance, energy, and temperature (PET), poses algorithmic challenges as it involves conflicting goals and trade-offs. Some myriad static algorithms have been proposed for solving this problem which can be roughly categorized into three groups: approaches for generating optimal solutions (for smaller sizes problems), complex optimization techniques, and fast heuristics. These algorithms generate multi-dimensional results which can be hardly intelligible. The assessment of these results requires new comparison methods and concise evaluation measures. This paper proposes a set of benchmarks and evaluation procedures for carrying out methodical comparisons of various algorithms for solving the PET-aware task-to-core scheduling problem. The proposed performance measures assist in judiciously comparing these different algorithms and analyzing their results on a unified basis. The goal is also to seek answers as to how good the Pareto-optimal algorithms are compared to fast heuristics tackling the same problem with the same assumptions. At the same time, we are interested in knowing how good both the groups of algorithms are compared to the absolute optimal (at least for small sets of problems). In addition, the paper provides methods for evaluating trade-offs and determining which application and target parameters affect the results (performance, energy consumed and temperature achieved) of these algorithms. Extensive experimentations facilitate a comprehensive comparison of different kinds of algorithms amongst themselves as well as with optimal solutions obtained through Integer Linear Programming as a reference.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323586" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dcbb7001fd6f8eab583424029e91062d1d6b6521", "sources": [ "DBLP" ], "title": "Performance evaluation of diverse techniques for performance, energy, and temperature efficient task allocation", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "dd03ad352c47f81bf9c8b09709b8804f2f4f20e4": { "authors": [ { "ids": [ "40144368" ], "name": "Chen Li" }, { "ids": [ "2769696" ], "name": "Zhonghua Lu" }, { "ids": [ "34473258" ], "name": "Yonghong Hu" }, { "ids": [ "1713096" ], "name": "Fang Liu" }, { "ids": [ "36541522" ], "name": "Jue Wang" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.46", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.46", "entities": [ "Algorithm", "Artificial neural network", "Backpropagation", "CVAR", "Computation", "Computational complexity theory", "Experiment", "Genetic algorithm", "Numerical analysis", "Penalty method", "Scalability", "Simulated annealing", "Simulation", "Supercomputer" ], "id": "dd03ad352c47f81bf9c8b09709b8804f2f4f20e4", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "348-355", "journalVolume": "", "outCitations": [ "589419fa3b83c68f23da4c32e73e7285487de6f0", "42f733e7221324396580ccc1759201a496542d96", "e0200705f91e3fc54f29f721b25cc1a52d51d208", "29983858f9c5013be88c11ba2510e68546eecd4e", "eeece2dd359bbf7145c735393ea7df82739c9776", "14ae20efeb889f3c32fa1e20431c90fa8eb4e817", "9e7d2cebee4ced1fb0ffdc7a589c5013b6c4a9a3", "aac5b60d536ca01023e33673919999c957a2dd37", "146bb2ea1fbdd86f81cd0dae7d3fd63decac9f5c", "f22f6972e66bdd2e769fa64b0df0a13063c0c101", "ce55acca734e44787b72d33bceb952b148a3b3a8", "1e2815b20142064cf06e1318b8d0bb32f6e7d70c", "e5903c482d81df7f2be306006ef9ecfbe7fc7a39", "21787d94fd0307aed5902387ea30e48b6cc7a0a2", "dc3bca53d79d1ceb0c8e65800126c2da22bd824c", "5a271c187cd4733f4deff7875f6858c5ccec9923", "b8c86e9cfa2e03f37a85b13ec6a6bcb21180a4bb", "8863790b149edfb586db318363e28182a6fedc80", "86c46e818a902a9b596a10313861c30112cc94f9" ], "paperAbstract": "In recent years, fuzzy portfolio selection theory has been well developed and widely applied. Based on the credibility theory, several fuzzy portfolio selection models have been proposed. The fuzzy Mean-CVaR portfolio model is one of the state-of-the-art. However, its' fuzzy nature which increases the computational complexity makes it take a long time to solve. In order to solve the fuzzy Mean-CVaR portfolio model efficiently, a hybrid intelligent algorithm is designed by integrating Genetic Algorithm(GA) with adaptive penalty function, Simulated Annealing Resilient Back Propagation (SARPROP) neural network and fuzzy simulation techniques, and to accelerate the computation speed further, we parallelize the hybrid intelligent algorithm with MPI technology. In order to demonstrate its validity and efficiency, we achieve numerical experiments on the Era supercomputer, and the results are compared with the method which is obtained by integrating traditional GA and fuzzy simulation directly. The results show that hybrid intelligent algorithm can get better performance. Experiments under different processor cores also achieved on the Era supercomputer demonstrate the scalability of the parallel hybrid intelligent algorithm.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.46" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dd03ad352c47f81bf9c8b09709b8804f2f4f20e4", "sources": [ "DBLP" ], "title": "A Parallel Hybrid Intelligent Algorithm for Fuzzy Mean-CVaR Portfolio Model", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "dd2d7441c3bdf83a6fe62ea5f1241ba7e102f632": { "authors": [ { "ids": [ "3436523" ], "name": "Peyman Faizian" }, { "ids": [ "2087174" ], "name": "Md Atiqul Mollah" }, { "ids": [ "2221682" ], "name": "Md Shafayat Rahman" }, { "ids": [ "1737003" ], "name": "Xin Yuan" }, { "ids": [ "1726678" ], "name": "Scott Pakin" }, { "ids": [ "3344319" ], "name": "Mike Lang" } ], "doi": "10.1109/HOTI.2017.21", "doiUrl": "https://doi.org/10.1109/HOTI.2017.21", "entities": [ "Interconnection", "Network switch", "Throughput" ], "id": "dd2d7441c3bdf83a6fe62ea5f1241ba7e102f632", "inCitations": [], "journalName": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "journalPages": "33-40", "journalVolume": "", "outCitations": [ "09adae55a947e420e2d73de8d4e3f5a1cf4e483f", "0efcb98fab0c44bfc6a5403483b7cedca1278bcb", "18326cb5a02a4eafdc908cfce62ddeb93c244fd1", "33299bbc74d62c9d83f714f0753fc0f2ecadc645", "14b82ab954a85cb8b336e86cf536c5701ca722e9", "b94d69b03a7afa113300b0913f1cfbb40659f3b0", "5521b5883cf33bf88a1efd65e97aa4ad3c8a1cdb", "4675b497d84f63ca315bfd62527cd591f8737102", "5a2c137ad03d65cc75518732b756fcb3618b1baa", "3ec4cf958f6ee00dc00aa14840c96268c4c3f9c9", "2d086787132666be7d425c5534132b0956c30435" ], "paperAbstract": "Throughput performance, an important metric for interconnection networks, is often quantified by the aggregate throughput for a set of representative traffic patterns. A number of models have been developed to estimate the aggregate throughput for a given traffic pattern on an interconnection network. Since all of the models predict the same property of interconnection networks, ideally, they should give similar performance results or at least follow the same performance trend. In this work, we examine four commonly used interconnect throughput models and identify the cases when all models show similar trends, when different models yield different trends, and when different models produce contradictory results. Our study reveals important properties of the models and demonstrates the subtle differences among them, which are important for an interconnect designer/researcher to understand, in order to properly select a throughput model in the process of interconnect evaluation.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HOTI.2017.21" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dd2d7441c3bdf83a6fe62ea5f1241ba7e102f632", "sources": [ "DBLP" ], "title": "Throughput Models of Interconnection Networks: The Good, the Bad, and the Ugly", "venue": "2017 IEEE 25th Annual Symposium on High-Performance Interconnects (HOTI)", "year": 2017 }, "dd31f8b041a007c122bee93dbfc8d42c5f33be3e": { "authors": [ { "ids": [ "34835713" ], "name": "Kyuho Jeong" }, { "ids": [ "1680675" ], "name": "Renato J. O. Figueiredo" }, { "ids": [ "3272945" ], "name": "Kohei Ichikawa" } ], "doi": "10.1109/CLOUD.2017.11", "doiUrl": "https://doi.org/10.1109/CLOUD.2017.11", "entities": [ "Cloud computing", "Data center", "Encapsulation (networking)", "Hypervisor", "Multitenancy", "Network switch", "Private network", "Requirement", "Rewriting", "Scalability", "Software deployment", "Software-defined networking", "Testbed", "Throughput", "Tunneling protocol", "Virtual private network" ], "id": "dd31f8b041a007c122bee93dbfc8d42c5f33be3e", "inCitations": [ "e6fb9f8f9a5ab896fde4a2bffda435abd30e1a7e" ], "journalName": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "journalPages": "9-17", "journalVolume": "", "outCitations": [ "3dde3fec553b8d24a85d7059a3cc629ab33f7578", "663e064469ad91e6bda345d216504b4c868f537b", "c4f94368cab4575431ca56645ab4688bc907128b", "9d3187b944f086ebcd6011166fe925094d0f5e48", "22630a79f1c50603c1356f6ac9dc8524a18d4061", "0a72d4c1a89edcffdb3e0ae32c5ee55b2b99e40c", "27f4001214ce0d449eb05d33626f444526accc7c", "4bf97ac7427196bff2b9c689b53f34bbe98d52ce", "058f6752d85a517aae298586fdf117acdd7560ea", "1ff0bb113028af7bb9b0a62d966247c8ff87f811", "2648c3271b5efbef6337719d5d148f85912355d3", "07cc5617aefe4e6bd616482b75bddf2429c831cc", "52ade19511cbd572be02c44063cff980ce70d032", "68bad7754b42992c755bdb490be67d27820866b2", "e92db48bc21395bc1fb59d73047cd3431809a739", "6d6030e70859795569baceaba58abb17ec62cca8", "d1ecfb7ec04fc3b3f93d9a5bf01b12eb38238285", "84e0660e922da41223b9723bef60f5350a98d427", "697709f821248720d30267775ef4b971f17baa6d", "205079b51ff5ec7644573a0eb68860327b2d221a", "bee4b0f18cc751090d0442cc2d4c65a05ba5bf3c", "640af017aa8d11f9f31480155c8d5d1a0d8865d7" ], "paperAbstract": "Multi-tenant data centers for cloud computing require the deployment of virtual private networks for tenants in an on-demand manner, providing isolation and security between tenants. To address these requirements, network virtualization techniques such as encapsulation and tunneling have been widely used. However, these approaches inherently incur processing overhead on end-points (such as the host hypervisor), reducing the effective throughput for the tenant virtual network compared to the native network. This problem is exacerbated with increases in line rates, now exceeding 10Gbps. In this paper, we introduce PARES (PAcket REwriting on SDN), a novel technique which uses the packet rewriting feature of SDN switches to provide multi-tenancy in data center networks at edge switches, thereby reducing the load on end-point hypervisors and improving the throughput, compared to tunneling. Experiments in an SDN testbed show that our proposed data center arhictecture with PARES achieves near line-rate multi-tenancy virtualization with 10Gbps links (compared to 20% of line-rate for VXLAN tunneling), without incurring processing overhead at end-point hypervisors or guest servers. Additionally, the paper evaluates the scalability of PARES for ARP protocol handling and with respect to number of SDN flow entries.", "pdfUrls": [ "https://doi.org/10.1109/CLOUD.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dd31f8b041a007c122bee93dbfc8d42c5f33be3e", "sources": [ "DBLP" ], "title": "PARES: Packet Rewriting on SDN-Enabled Edge Switches for Network Virtualization in Multi-Tenant Cloud Data Centers", "venue": "2017 IEEE 10th International Conference on Cloud Computing (CLOUD)", "year": 2017 }, "dd5a1af5f132510c69762479fa783859be38b948": { "authors": [ { "ids": [ "1760759" ], "name": "Giovanni Mariani" }, { "ids": [ "1979170" ], "name": "Andreea Anghel" }, { "ids": [ "1783257" ], "name": "Rik Jongerius" }, { "ids": [ "1784224" ], "name": "Gero Dittmann" } ], "doi": "", "doiUrl": "", "entities": [ "Approximation error", "Benchmark (computing)", "Cloud computing", "Computer performance", "Interaction", "Machine learning", "NAS Parallel Benchmarks", "Network-attached storage", "Pareto efficiency", "Profiling (computer programming)", "Supercomputer" ], "id": "dd5a1af5f132510c69762479fa783859be38b948", "inCitations": [ "e3e85bef335537753ae984abe6145c0b0a068afb", "8a03185e29cf5d2dc0121228235d84a8becb1bfc" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "524-533", "journalVolume": "", "outCitations": [ "50214f33437899a97c11d2bc838cd8c6ace34b60", "59d6d0603efcd0a7392886e4ea65deccc7726ed4", "4e8c75ea1ec2325619f0136d09957b3d30070e87", "246be658a2ce791070a440cfc965a3ddac325c18", "13d4c2f76a7c1a4d0a71204e1d5d263a3f5a7986", "54c408ee89d93ae85515aab4ddd05f1f6a902276", "f896983ab951a3dd651ba30f8b59e9275b6cdaa5", "1d6aebefbf69d6e423f75f10d35716be742e0730", "7c02eff1b79a78639747d250532651d4c92089d0", "0c590a850fc9d87ca46d82b836b2cc1093876d23", "0a992609540802ec80eadbc289da0ddf994002d4", "3a69f1592a65a85bab18a00481e98f95849d4d9d", "32aa639d91607c10520d163726dbafbcd79ea46e", "2549ea76f38182887f9076904d1b5b97182cc06e", "f6545677ffc4596d98d65269ab1fee464af5eab5", "04b757a576e598ceeb2c11722760efa1f6e9011d", "5740c4cfaf296b213f3752266edac93d9a10ef7b", "eefa64b35f3cb34afb783d5f9958b147be31deac", "88435a01f7623d35501769ce9614f59aeb026d4e", "0c7631b10f3e5c8d6dd083ec6ffcbfdda92d1f15", "0ce898bf3f3e4af56492e9135c7c85e3917e20e8", "54f0588b379b25ee6c22952e486d7da45bad7bab", "6b84d676c84b541d8524e2e55e8152ccd49b41d7", "f88b7bc3ba0392682a0c46f172e6fc88a14a2a71", "1237b20fa7afa553118cde32294c481db15286c6", "2c1e58b8846d912151e512ac971a20c123e52921", "f720507fc8f42f6f9257fe13e6a14f6c17ea7021", "5a709bc9f5e3e00e78fe386480b2f5bfc23a7709", "40c2cc3ff3afbd12bc8af32636ddc9e367801266", "27927355e7e2ab68726c6b1aa9d6bda98927f930", "35b22a635cf85dcb712ebee16ebb160a894c0723" ], "paperAbstract": "Cloud computing enables end users to execute high-performance computing applications by renting the required computing power. This pay-for-use approach enables small enterprises and startups to run HPC-related businesses with a significant saving in capital investment and a short time to market. When deploying an application in the cloud, the users may a) fail to understand the interactions of the application with the software layers implementing the cloud system, b) be unaware of some hardware details of the cloud system, and c) fail to understand how sharing part of the cloud system with other users might degrade application performance. These misunderstandings may lead the users to select suboptimal cloud configurations in terms of cost or performance. To aid the users in selecting the optimal cloud configuration for their applications, we suggest that the cloud provider generate a prediction model for the provided system. We propose applying machine-learning techniques to generate this prediction model. First, the cloud provider profiles a set of training applications by means of a hardware-independent profiler and then executes these applications on a set of training cloud configurations to collect actual performance values. The prediction model is trained to learn the dependencies of actual performance data on the application profile and cloud configuration parameters. The advantage of using a hardware-independent profiler is that the cloud users and the cloud provider can analyze applications on different machines and interface with the same prediction model. We validate the proposed methodology for a cloud system implemented with OpenStack. We apply the prediction model to the NAS parallel benchmarks. The resulting relative error is below 15% and the Pareto optimal cloud configurations finally found when maximizing application speed and minimizing execution cost on the prediction model are also at most 15% away from the actual optimal solutions.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101183" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dd5a1af5f132510c69762479fa783859be38b948", "sources": [ "DBLP" ], "title": "Predicting Cloud Performance for HPC Applications: A User-Oriented Approach", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "dd87fa4d0e3689df6b5ffb8873b71cf76458fea5": { "authors": [ { "ids": [ "2957170" ], "name": "Lijuan Jiang" }, { "ids": [ "39408447" ], "name": "Chao Yang" }, { "ids": [ "3341167" ], "name": "Yulong Ao" }, { "ids": [ "22987925" ], "name": "Wanwang Yin" }, { "ids": [ "33955807" ], "name": "Wenjing Ma" }, { "ids": [ "33288854" ], "name": "Qiao Sun" }, { "ids": [ "1702887" ], "name": "Fangfang Liu" }, { "ids": [ "2450368" ], "name": "Rongfen Lin" }, { "ids": [ "40188000" ], "name": "Peng Zhang" } ], "doi": "10.1109/ICPP.2017.51", "doiUrl": "https://doi.org/10.1109/ICPP.2017.51", "entities": [ "Algorithm", "BLAS", "Blocking (computing)", "Central processing unit", "Direct memory access", "Double-precision floating-point format", "Instruction scheduling", "Matrix multiplication", "Memory hierarchy", "Multiple buffering", "Parallel computing", "SW26010", "Scheduling (computing)", "Sunway", "Sunway TaihuLight", "Supercomputer", "The Matrix" ], "id": "dd87fa4d0e3689df6b5ffb8873b71cf76458fea5", "inCitations": [ "e45dea6588d1de0a23618e019031e67eedeeee26" ], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "422-431", "journalVolume": "", "outCitations": [ "b76269bf962989ce271bef7ea863ff4adf9c9de6", "004eda59c0ffceb2417bee87c95539eae4bdf0cd", "35a08e104a8ed8a6a2002dc6c5d55826f204ef18", "3bfc4ed66f9589582745ad20ce7faedbc2148ee2", "37bc39697da911ccfbe463bad6b207862e55a007", "27bdd0a73b3d5f2c83ac7dfae447c20653dffa2d", "8d2fb424ccd5ae011dd444ac4fa8282bad9e76ab", "b64c39e3ef68fa9f25ae6d7752ad4f07916a0b57", "6286a7a5d4c810d31c417d28321bd67db34cac29", "035c542402de661b544603d84b7ec45bada14e7f", "eb4f23afcc86609d9fc5fe90000d9db44cb3e575", "2cc157afda51873c30b195fff56e917b9c06b853", "3b260b6286ccac6bd3933fdefee66fd83f000768", "7fa2507aec08b080558f8a2e0971e294095756f7", "16b5fa19661e1e26c1b967104948bf2f031a3612", "4c1d73a57ea993aca7174bb9f48919b2c739bbd0", "e3891bc66c44d684a273455e06fdf9d0d45ec607", "38466b62bccbdacd3ef1dab4514a7c010e8f45df", "12f1a2a510a4e86ecd75c8081a78620c71822f99", "114711c2516be1f5293f6d8d242c852b11097e9d", "2ea6a93199c9227fa0c1c7de13725f918c9be3a4", "1cc6d4ae705dd47ec409e01e349d4a46b722ee81" ], "paperAbstract": "The matrix-matrix multiplication is an essential building block that can be found in various scientific and engineering applications. High-performance implementations of the matrix-matrix multiplication on state-of-the-art processors may be of great importance for both the vendors and the users. In this paper, we present a detailed methodology of implementing and optimizing the double-precision general format matrix-matrix multiplication (DGEMM) kernel on the emerging SW26010 processor, which is used to build the Sunway TaihuLight supercomputer. We propose a three level blocking algorithm to orchestrate data on the memory hierarchy and expose parallelism on different hardware levels, and design a collective data sharing scheme by using the register communication mechanism to exchange data efficiently among different cores. On top of those, further optimizations are done based on a data-thread mapping method for efficient data distribution, a double buffering scheme for asynchronous DMA data transfer, and an instruction scheduling method for maximizing the pipeline usage. Experiment results show that the proposed DGEMM implementation can fully exploit the unique hardware features provided by SW26010 and can sustain up to 95% of the peak performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.51" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dd87fa4d0e3689df6b5ffb8873b71cf76458fea5", "sources": [ "DBLP" ], "title": "Towards Highly Efficient DGEMM on the Emerging SW26010 Many-Core Processor", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "ddc63fc3f3ee053dc6ff68aa0e1d8c2142a707f9": { "authors": [ { "ids": [ "38818051" ], "name": "Wen Jiang" }, { "ids": [ "35143368" ], "name": "Yuhui Deng" }, { "ids": [ "2424149" ], "name": "Xiaohua Meng" }, { "ids": [ "39921348" ], "name": "Cheng Hu" }, { "ids": [ "1718061" ], "name": "Yongtao Zhou" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.13", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.13", "entities": [ "Access time", "CPU cache", "Digital footprint", "Dynamic random-access memory", "Hard disk drive performance characteristics", "Hit (Internet)", "Magnetic storage", "Mechanical calculator", "Noise (electronics)", "On-board data handling", "Page cache", "Random-access memory", "Response time (technology)", "Static random-access memory", "Synthetic data" ], "id": "ddc63fc3f3ee053dc6ff68aa0e1d8c2142a707f9", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "98-105", "journalVolume": "", "outCitations": [ "dada152cefa186c6a560af07aa144f1ebcf818b0", "519845b1045a44db0becb0ec68af165c8f326495", "f361020e35dd86efec3385b626e16159592281ac", "12d6da762b2a5d512d383f3b587bd30c23c3df97", "697c07f627c3da577d02ca5c2d7c03ff0fc3b677", "0c89da13d0cad7539a8f08584d329ba4400929b9", "0f03119bde621214e21491db7db9c01dc12ac5a2", "59d426dda9e2d2db7b887440a77adebb1227631b", "8558857f33b4a7df03d682f83b055890043c0fa0", "e7c6f67a70b5cf0842a7a2fc497131a79b6ee2c5", "110631e23b57e5323dad5dfbfe761906cbc23e5b", "e4c1be299c957f00c61b7a713b0401059899aa84", "31ceeced5d23193c369b98170c45e66bae6ff77d", "5ab45c82a811162dc04efc9eea60f9b22b1e5a11", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "d6734984bdc270cb68edc79d42a9d09160f8ee98", "acef4ee56f859b121f82b84d444e8724aaa0eaa7", "4ce2925f76b279b2b31bf303c4d8f150ab5679ec", "0f2b93100e4de27110ebb7666566a438a5df5c94", "375e2b45e0d3037c7f988692365219d93712caee", "e88c1946b63420db9a4de85539ab89b7b1647b6b", "cc33c0f452e7e0299735b181d59a4204cebeccfb", "2929cb8d528e605b338779d5b9d452b59abcc536", "dbe73781be3fcba36bb85b491789a53003e3292f", "233997563379e02d37778f80c028a34209de5817" ], "paperAbstract": "Over the last two decades, the performance gap between RAM and disk drives has been widened by about 50% per year. This is because the disk access time was improved only about 8% per year due to the mechanical delays. In order to improve the disk performance, each disk is equipped with an on-board disk cache to bridge the performance gap between the high-speed I/O bus and the slow magnetic media. The on-board disk cache is normally made of SRAM. This is because the SRAM is faster and relatively insensitive to disturbances such as electrical noise in contrast to DRAM. However, it is challenging to increase the capacity of a single-chip SRAM. Furthermore, SRAM is much more expensive and power consuming than DRAM. Therefore, the manufacture normally integrates a relatively small-size on-board disk cache to the disk drives. This paper proposes to compress the on-board disk cache to improve the effective cache size, thus improving the disk performance. Because the prefetching of on-board disk cache has a significant impact on the disk performance, this paper only compresses the prefetched data to minimize the complexity and side effect of compression. The compression is processed in background without affecting ongoing requests. Synthetic traces and real traces are employed to evaluate the proposed method. Experimental results demonstrate that the average response time can be reduced up to 21% with the increase of the compression ratio, and the hit ratio achieves up to 3x improvement.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ddc63fc3f3ee053dc6ff68aa0e1d8c2142a707f9", "sources": [ "DBLP" ], "title": "Boosting Disk Performance by Compressing On-board Disk Cache", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "ddf2190eb836c47b5bd8d8ea766639e3b212caf8": { "authors": [ { "ids": [ "1997194" ], "name": "Han Song" }, { "ids": [ "35884242" ], "name": "Yongxin Zhu" }, { "ids": [ "2959781" ], "name": "Junjie Hou" }, { "ids": [ "36752489" ], "name": "Han Wu" }, { "ids": [ "25611721" ], "name": "Long Li" }, { "ids": [ "1729612" ], "name": "Qian Wang" }, { "ids": [ "2119644" ], "name": "Meikang Qiu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.59", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.59", "entities": [ "Algorithm", "Analysis of algorithms", "Cubes", "Federal enterprise architecture", "Finite element method", "Isosurface", "Linear system", "Lookup table", "Marching cubes", "Mesh generation", "Monte Carlo method", "Numerical analysis", "Numerical method", "Octree", "Solid-state drive", "Supercomputer", "Value (ethics)" ], "id": "ddf2190eb836c47b5bd8d8ea766639e3b212caf8", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "450-457", "journalVolume": "", "outCitations": [ "f7d7ba62df72c5ee141d16bdc51a99a75dcd384a", "989c334edccf3d39b02a6399405209e748f13693", "1cb7415ea0b87bbd8f8480dff4f7fbcd9d75a41a", "02a808de5aa34685955fd1473433161edd20fd80", "705b2c27c5a49d0fd944b5489a3fb736297859be", "c43863e194de5a2469087e4f7d49131b412a2470", "db95421c61b815cfbb969d20a048abd56321dc9d", "0226cf6b2fbada60cffcdb28d4819cc4559d702b", "3ac715bebaa5771e612416943863b5da66a8ee14", "0c969a110e7ea067a31e030e7350eb7f9b9b0bcb", "1b440ab940ef64d676c7a63572b6a1544f4c4f76", "abc3a025d6cf54a13bac7d96e80983efa433fcb8", "1924f8240258129c772978a01dd13f4023d45366", "50596fc2fe529ab756bf55179a15d8d36278293d", "438997f5eb454e8f55023f402ee237314f5f7dda", "259a858dd0b9c3d68b6bd021bb6e0d03f5eff5ec", "3f4e34b9693e5eb3f6e1c63aeb496e5db3cfbdea", "8f2eed5179bf6d9499a246a75e7e94af280d83f4", "0b05a0fac27887356a6c79e30778bfafa494def4", "2a30b54e04720d0c5588a2a824a48797997bfe18", "445aff9b723cad60d3a2f51057d3b63369ac3841", "685452ab3b36db6bd057d0b7397459db7241c618", "75a1343bf3d9c25ca0708cad6c9f63f4f014b540", "12b0713eb6ec54a6d8f6a40bfe512947d42962e7", "bc3ef7b79fa3693f9ffe5323b20b08b008b46965" ], "paperAbstract": "Finite Element Analysis (FEA) is a numerical method for solving engineering problem. Implementations of FEA are common high performance computing (HPC) applications. As a typical application of FEA, surface reconstruction of large model, proves to be a computationally expensive task as well. Poisson Surface Reconstruction, a cutting-edge surface reconstruction algorithm, is a commonly used method for efficiently solving linear systems and it creates watertight surfaces from oriented point sets. Poisson Surface Reconstruction leverages an octree-based Marching Cubes (MC) method for isosurface extraction. Hierarchical octree structure avoids unnecessary cells visiting and prevents cracks arising. In this paper, we integrate several quality improved methods with MC and coordinate unrelated components, obtaining a better quality mesh implementation. We improve mesh quality based on an extended lookup table and modify the connectivity of some fundamental patterns in MC, which effectively remove the reconstruction holes, thus improving overall surface quality. As for the relative value between each vertex and the average isovalue, the extended table explicitly differentiates between “strictly larger” and “equal to”. Newly introduced patterns in MC statistically prevent poor quality triangles production. Moreover, a decision making algorithm is proposed to eliminate ambiguity problems. We adapt SnapMC algorithm to avoid non manifold triangles to a certain extent. Comparisons with traditional Poisson algorithm and Smooth Signed Distance (SSD) highlight the capability in quality mesh generation and efficacy in handling high computational demand.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.59" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ddf2190eb836c47b5bd8d8ea766639e3b212caf8", "sources": [ "DBLP" ], "title": "Integrated Quality Mesh Generation for Poisson Surface Reconstruction in HPC Applications", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "de6a490b8380088ec9067251a5afef059e634fe0": { "authors": [ { "ids": [ "7198431" ], "name": "Miryeong Kwon" }, { "ids": [ "1698586" ], "name": "Jie Zhang" }, { "ids": [ "26394325" ], "name": "Gyuyoung Park" }, { "ids": [ "2505238" ], "name": "Wonil Choi" }, { "ids": [ "1851743" ], "name": "David Donofrio" }, { "ids": [ "1746446" ], "name": "John Shalf" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "36895144" ], "name": "Myoungsoo Jung" } ], "doi": "10.1109/IISWC.2017.8167759", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167759", "entities": [ "Computer data storage", "Digital footprint", "Disk array", "Scenario (computing)", "Signal trace", "System Information (Windows)" ], "id": "de6a490b8380088ec9067251a5afef059e634fe0", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "87-96", "journalVolume": "", "outCitations": [ "df8b536d1b25501af867dafa7243f5a2f7a7f43b", "627b93073977b7b7c5ae0cf610f41ee0ed27669c", "0d25cab731e4c0509f8b6a6886be743a317ce92e", "057d21830cde5b3be2fdb3a74ee69a3c7e9109f8", "05948e66aeefea1c969fdce16edb94ae94fb651e", "31ceeced5d23193c369b98170c45e66bae6ff77d", "96afaf4d2a491dce64561d2e56478d628db0cc8a", "438c51040ee6ccf9198e52d105c47e75d615b29c", "34f6cc5a0d7656a5c1d92049eda0533ca7c07add", "394cc3b1bfc88d7bbe4b0f120004d95d0c966df1", "1846ece0c7e21449e7425f973616f76aef9b2907", "2dcb0ed27b6a35b1dfe97b45604302a1f3705c01", "4e8839416133588c10cc56d6325db55a42fe2215", "075f51b0aeaac7ebed18d5fbf67e64a14c8943f1", "29a2c28f59974786be4c045ddfd04879329f06d9", "5d29de02f83a6b7c493c6f11c5adfb8e5e725c7d", "519845b1045a44db0becb0ec68af165c8f326495", "c0aa8a876bc1774214e6d4d9168e7356ac3edf00", "4d54c8a0ed7485310e95e7d1720b38256768f78f", "988d1a223e2ee40f2474f729ac3ac53e012d8337", "48bd098e650619d8e3a8684059bf5393f1dff9d4", "7a2274412948765bf872b765dafd8139e51000ff", "76d4f2374e4f5a9dfa69df8a9a33f627fff7e861", "5ecd441bf4a54c7500cd6fc185d8dd09638e12cb" ], "paperAbstract": "Block traces are widely used for system studies, model verifications, and design analyses in both industry and academia. While such traces include detailed block access patterns, existing trace-driven research unfortunately often fails to find true-north due to a lack of runtime contexts such as user idle periods and system delays, which are fundamentally linked to the characteristics of target storage hardware. In this work, we propose TraceTracker, a novel hardware/software co-evaluation method that allows users to reuse a broad range of the existing block traces by keeping most their execution contexts and user scenarios while adjusting them with new system information. Specifically, our TraceTracker's software evaluation model can infer CPU burst times and user idle periods from old storage traces, whereas its hardware evaluation method remasters the storage traces by interoperating the inferred time information, and updates all inter-arrival times by making them aware of the target storage system. We apply the proposed co-evaluation model to 577 traces, which were collected by servers from different institutions and locations a decade ago, and revive the traces on a high-performance flash-based storage array. The evaluation results reveal that the accuracy of the execution contexts reconstructed by TraceTracker is on average 99% and 96% with regard to the frequency of idle operations and the total idle periods, respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167759", "https://arxiv.org/pdf/1709.04806v1.pdf", "http://arxiv.org/abs/1709.04806" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/de6a490b8380088ec9067251a5afef059e634fe0", "sources": [ "DBLP" ], "title": "TraceTracker: Hardware/software co-evaluation for large-scale I/O workload reconstruction", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "de71e2359995087b4ce7d46e4eb718c341c70ee0": { "authors": [ { "ids": [ "29017813" ], "name": "Sanjay Rawat" }, { "ids": [ "34615195" ], "name": "Vivek Jain" }, { "ids": [ "3234268" ], "name": "Ashish Kumar" }, { "ids": [ "2244479" ], "name": "Lucian Cojocar" }, { "ids": [ "1744275" ], "name": "Cristiano Giuffrida" }, { "ids": [ "3053948" ], "name": "Herbert Bos" } ], "doi": "", "doiUrl": "", "entities": [ "Binary file", "DARPA Grand Challenge", "Dataflow", "Parsing", "Scalability", "Software bug", "Software testing" ], "id": "de71e2359995087b4ce7d46e4eb718c341c70ee0", "inCitations": [ "cdaa7f657bc568a8d75e483b8def66009a9f19d4", "629191336187398e43f1021bdcc6c293a72d1ca8", "020af9e8d35b7f6ca563397a8e82778dfa7dac7b", "2cf43b8bc82f063e257bf21c92e5b038eacd34d3", "c924eaf7ed2e119449fb57b16d2216bf93edf8e2", "f447cc0306e90850ed57c69bd81bf71e1d6fb3ab", "824c05ae636ed07823b3a71ca1b7abf32676e437", "13d6a728b2eb1b1bc59ad661b1eb75aff4376025", "67ff9c425b17b78eaf7e3be970833aef41262cc8", "c0eb6da1f416d9305ccf0e8b2a6480b71dae0379", "30023acba3ac198a7d260228dc51fda8414b8860", "b0fd7a0f70b64c06031bb915d9aedd44b6550b16", "ea5e9f3af75b24a4c3cf09e0cf7119d5e2e9cff2", "0b549912e5f111c7c60eadda634ef4484427b684", "abf35711567f35bd5d802d3b03b66e95b0b8c1d8", "9c1fdacfcf083a252b00e3a37e3ff7d4f9f675f6", "e83d539cbab9ae4906c49e6daae8c6f952713c17", "08864ad6fc08aa2600b6c7325027defebfb66f05", "d9b98ccc75ec1a464e84121916fb39838dc61862", "6f546215728fa94b76344c35cb32253bd9f82bd3", "b6c948b65e8a62780cb94bc11fe9c5f2151945e2", "1ac30950aaab6297d2a7cb2d7a55ba6b9b2a100f", "70d5eae0007dd8f116f53850918f3e6eca9ef339" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "2c13dcfdc5ea2d355a46fe326c371038a00ba7f5", "7ae091ea6b9221fa8e7fe4c1295557fc1749a9d2", "dbf7776a7bbdae1fdd1e1add37bbb7094cdb23ef", "2d6495c5fd084edbf2a320aaae4fe65606a9fc7c", "00808012521201ae4970fe92b86d749630c9510c", "95baae72c5fcca4038339c350556dd6143d9a263", "19985cad0eb4010b22c2ae1ef9442e036a924244", "32c9b3a4990fdb26da19a4e0817936369a5e91bc", "0ff3bc9d452f44ab93b5feb1763118837c77fec3", "1148f37a8ca0a5ca0a26178c7d85a063bd539725", "34ebfcf4c815a081dcde964f7a73ba4a5a0641dc", "18e965d40f7dacb88bca7b0a231eca5adbfb6201", "48bb5f38620529dbc0b4b34a25862ed68ff3ffc4", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "0ab393affe9d674ef790be14fdfade368f3e5989", "60df0046d1292b7a518de9c3c1742bfe42894a66", "27145fe45450babe306513efb97ae0ec8590c246", "8e0ed707501bb47519e871351c7f952a35158ca4", "1aabe6009e67aa3d2a33b8de3ee8683a1d675496", "08e2d172e510ab14713b28cc71a37e7c78cc7b13", "3ab43f7930699bf9769f4001351c1d5bc5088a79", "5556995fb630c47805bbba560287ea59ce357fa1", "642fb2cdcd5b02a97d08f06a9dbff9829f63a5c0", "0f2d6f8769060937fe32207eda9e257ca95dc08c", "fcc27324d2038b3c6f181baa85630087cfb5b5d5", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "8eeff5e62ad0dd4073eb80377db29e7cd7b8a24f", "79dea2d69bc7ce05a573f6fa5c1e4e4d98f8e112", "1f7e5e582663868ed2f6763f98066ca278177a61", "6ea63d09993b9a268689790ea8d25bc36345497e", "0653e2ed9f683868cb4539eb8718551242834f6b", "11443efe465ad544f478524da6c66c085b16e28b", "0228d60b7a56a3d778e5425c41eaf72cf0b6ec55" ], "paperAbstract": "Fuzzing is an effective software testing technique to find bugs. Given the size and complexity of real-world applications, modern fuzzers tend to be either scalable, but not effective in exploring bugs that lie deeper in the execution, or capable of penetrating deeper in the application, but not scalable. In this paper, we present an application-aware evolutionary fuzzing strategy that does not require any prior knowledge of the application or input format. In order to maximize coverage and explore deeper paths, we leverage controland data-flow features based on static and dynamic analysis to infer fundamental properties of the application. This enables much faster generation of interesting inputs compared to an application-agnostic approach. We implement our fuzzing strategy in VUzzer and evaluate it on three different datasets: DARPA Grand Challenge binaries (CGC), a set of real-world applications (binary input parsers), and the recently released LAVA dataset. On all of these datasets, VUzzer yields significantly better results than state-of-the-art fuzzers, by quickly finding several existing and new bugs.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/vuzzer-application-aware-evolutionary-fuzzing/", "http://www.cs.vu.nl/~giuffrida/papers/vuzzer-ndss-2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/de71/e2359995087b4ce7d46e4eb718c341c70ee0.pdf", "s2Url": "https://semanticscholar.org/paper/de71e2359995087b4ce7d46e4eb718c341c70ee0", "sources": [ "DBLP" ], "title": "VUzzer: Application-aware Evolutionary Fuzzing", "venue": "NDSS", "year": 2017 }, "debd9e33f34b367008357c91c2c70cb85cfc532c": { "authors": [ { "ids": [ "2849170" ], "name": "Matthias Schlaipfer" }, { "ids": [ "37980475" ], "name": "Kaushik Rajan" }, { "ids": [ "1941367" ], "name": "Akash Lal" }, { "ids": [ "3226699" ], "name": "Malavika Samak" } ], "doi": "10.1145/3132747.3132773", "doiUrl": "https://doi.org/10.1145/3132747.3132773", "entities": [ "Big data", "Computational complexity theory", "DSPACE", "Program optimization", "Program synthesis", "Query language", "Query optimization", "Rewrite (programming)", "Rewriting", "SQL", "Static program analysis", "Time complexity", "Undecidable problem" ], "id": "debd9e33f34b367008357c91c2c70cb85cfc532c", "inCitations": [ "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f" ], "journalName": "", "journalPages": "631-646", "journalVolume": "", "outCitations": [ "fbd592331942b42408f364a217d748131f9fb5ba", "bde2b2c50866c95dfd420c68217e39a985d45810", "cf08482fc32e5b2e5f5bb1262d5c324a010c276b", "da7155479d31287978f8d56ceafea2f5e5d908d6", "9ee76efb171dbc1264ab4b22933e3deedfd7fde8", "30ce81b19b8779145d15fe9a71a9d9f12dd9440a", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "080ed793c12d97436ae29851b5e34c54c07e3816", "15e3d493cffef853e478d188245a2b6eff4bd6c6", "907b4128683a4e341c3bd7677723c878ebc5ae5e", "b6571efa4483aa00d23bbcd36930c4877548ba38", "3e4af9e1e3e64be2ef79bbf63daf4ef640183719", "1455be30b0bb364ffc4c35d2cf1ee05cfb32595f", "292c949b9ed32848545520263153bb417927c78d", "67d18339ed72b7fc2152cb42b63362b570c11946", "8fbaf01fbf463d5671ed5e40dd2e23df0dab4edf", "7c889b839e99316f749c4d4bff45ccdd7dbd46ef", "11256a3695e1313bc0989935a94ee80342e25cd1", "64255583e8615ab92d0d35edfaa0595b574f1b1b", "0bfae505ad588d00d4b204acf8ba4b5646eac244", "0282e990528c6a9b4aa92cc196f46257fb4ccee1", "202e33581369f6050fc800ebc31615eb65649e78", "1ef301c1b275091b6a50d620b41df4722f2108f0", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "8c7cf9c759dcca3195dea6e27c2e25ee9a05671c", "08f13e484e7e51831ec13076d14570ced91a50fb" ], "paperAbstract": "Classical query optimization relies on a predefined set of rewrite rules to re-order and substitute SQL operators at a logical level. This paper proposes Blitz, a system that can synthesize efficient query-specific operators using automated program reasoning. Blitz uses static analysis to identify sub-queries as potential targets for optimization. For each sub-query, it constructs a template that defines a large space of possible operator implementations, all restricted to have linear time and space complexity. Blitz then employs program synthesis to instantiate the template and obtain a data-parallel operator implementation that is functionally equivalent to the original sub-query up to a bound on the input size.\n Program synthesis is an undecidable problem in general and often difficult to scale, even for bounded inputs. Blitz therefore uses a series of analyses to judiciously use program synthesis and incrementally construct complex operators.\n We integrated Blitz with existing big-data query languages by embedding the synthesized operators back into the query as User Defined Operators. We evaluated Blitz on several production queries from Microsoft running on two state-of-the-art query engines: SparkSQL as well as Scope, the big-data engine of Microsoft. Blitz produces correct optimizations despite the synthesis being bounded. The resulting queries have much more succinct query plans and demonstrate significant performance improvements on both big-data systems (1.3x --- 4.7x).", "pdfUrls": [ "https://www.microsoft.com/en-us/research/wp-content/uploads/2017/11/sosp17-final219.pdf", "http://doi.acm.org/10.1145/3132747.3132773" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/debd9e33f34b367008357c91c2c70cb85cfc532c", "sources": [ "DBLP" ], "title": "Optimizing Big-Data Queries Using Program Synthesis", "venue": "SOSP", "year": 2017 }, "dee62c826374fb94902ca15407872a29198ef102": { "authors": [ { "ids": [ "1742479" ], "name": "Hao Xu" }, { "ids": [ "2989804" ], "name": "Shasha Wen" }, { "ids": [ "31074563" ], "name": "Alfredo Gim\u00e9nez" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" }, { "ids": [ "1785951" ], "name": "Xu Liu" } ], "doi": "10.1109/IPDPS.2017.97", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.97", "entities": [ "Bandwidth (signal processing)", "DR-DOS", "Data structure", "Experiment", "Machine learning", "Memory bandwidth", "Non-uniform memory access", "Program optimization", "Speedup", "Supervised learning", "Uniform memory access" ], "id": "dee62c826374fb94902ca15407872a29198ef102", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "367-376", "journalVolume": "", "outCitations": [ "f58c4b789f331f78d39eabac4a646ea87ba66e0b", "03e53dddc865bf688fe313a94ad186a4d96bffe0", "36e1b02a66ed928ef13e3a2ba6852e90a8713036", "11cd854e728e96b783c507d30a50b2fae51d3e79", "2d54112a3417b6a50ca299481a0139480800a2f8", "beea031c431014a0036a27091a41b5b1545a57b8", "326d1495d5288ce7fbe548809df56a8ac11da544", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "4a5dbda0c50f8183226bdeaa017a599351d5e80e", "09856da05bf463794a35799eaaee3a14a045afd1", "398aaf00253e2c29e6238dd0499aa3a75c76914c", "a1f5d2795d577e4f59c15125954684fa20474b9a", "7c5418a6c2ccd3e8cc069389d8884c3d8a6bf3ab", "3486ff0f2a4ecd1b616b56129334b13afd8619d6", "1a249a77a2024638788fc512f049e558d85b3aa5", "9872bf81d8559bfb5fcf4dc65674afba98dec470", "8beda53e2b3cd04434aaaabef463ef8e18706df1", "4cbd7800701981b58b4c1f6d53c7a66a5fb15633", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a" ], "paperAbstract": "Non-Uniform Memory Access (NUMA) architectures are widely used in mainstream multi-socket computer systems to scale memory bandwidth. Without a NUMA-aware design, programs can suffer from significant performance degradation due to inter-socket bandwidth contention. However, identifying bandwidth contention is challenging. Existing methods measure bandwidth consumption. However, consumption alone is insufficient to quantify bandwidth contention. Furthermore, existing methods diagnose bandwidth for the entire program execution, but lack the ability to associate bandwidth performance to the source code and data structures involved. To address these challenges, we propose DR-BW, a new tool based on machine learning to identify bandwidth contention in NUMA architectures and provide optimization guidance. DR-BW first trains a set of micro benchmarks and extracts useful features to identify bandwidth contention via a supervised machine learning model. Our experiments show that DR-BW achieves more than 96% accuracy. Second, DR-BW associates memory accesses that incur bandwidth contention with data objects, which provides intuitive guidance for optimization. Third, we apply DR-BW to a number of real benchmarks. Our optimization based on the insights obtained from DR-BW yields up to a 6.5× speedup in modern NUMA architectures.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.97" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dee62c826374fb94902ca15407872a29198ef102", "sources": [ "DBLP" ], "title": "DR-BW: Identifying Bandwidth Contention in NUMA Architectures with Supervised Learning", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "df1f0ad2b3f9e119938560045bb26629cce0006a": { "authors": [ { "ids": [ "1757387" ], "name": "Hao Pan" }, { "ids": [ "1687666" ], "name": "Yi-Chao Chen" }, { "ids": [ "1684808" ], "name": "Guangtao Xue" }, { "ids": [ "2384727" ], "name": "Xiaoyu Ji" } ], "doi": "10.1145/3117811.3117824", "doiUrl": "https://doi.org/10.1145/3117811.3117824", "entities": [ "Central processing unit", "Communications protocol", "Duplex (telecommunications)", "Laptop", "Mobile device", "Near field communication", "One-way function", "Smartphone", "Social network", "Transmitter" ], "id": "df1f0ad2b3f9e119938560045bb26629cce0006a", "inCitations": [], "journalName": "", "journalPages": "167-179", "journalVolume": "", "outCitations": [ "51cea21c5806e6ff9586c39b417746457face07d", "427899282b7da2b281fe430b7284f814ff5675d7", "118f5e7b4723a0ea846245e28574cf7ed715eccf", "3c1f11a1da88c8237842a246ed1a5dbe230737be", "e4d1e54f740ddf76d0fd8a0bb73c57d184fa8de6", "da9dd56c0700a25a22b766940c727cdd9be1ffdc", "c9125d3f9604bc87a382fa69651ea2f3e1d12d8e", "b3f3376296b46d83984de5d8a1b6087710a0da1f", "797a4178bab83be8eb6e2267cfc9fa48f56dbf51", "d8523da600783d8feec2bdb8f7ae5503b297a86f", "4998cdfc2876e4e21540051aa741ee5a82ac2682", "e97829782841f25469581b2614f2c67993a54a98", "e7e7fbfa7693d27e8f0887e2ba2fe57385e2a9d8", "0f1d34f48e3207518414f498eedf09da98b7fe0b", "a2aad247dbf030a91e23560030f944f74a71becc", "3059e1fb261236c6382e95d9ab95615a35620736", "6d0829624336f2fc8afbb871ce41c1cfb727a674", "860b97915bfad0cba4e2eb42f081f6cd8bc2d575", "02f00b07581c316d21505bcdb1f65a8dac5a8ad8", "c0bf505b6df9cb67fd2013f38bad5110331510d5", "119bfd8b14e3dd31e364bceea9827e8f969b0dcb", "d6fffb3c44e2ab0c5ba08788ba40e49954046096" ], "paperAbstract": "Near-field communication (NFC) plays a crucial role in the operation of mobile devices to enhance applications such as payment, social networks, private communication, gaming, and etc. Despite of the convenience, existing NFC standards like ISO-13157 require additional hardware (e.g., loop antenna and dedicated chip) and thereby hindering their wide-scale applications. In this work, we seek to propose a novel near-field communication protocol, MagneComm, which utilizes Magnetic Induction (MI) signals emitted from CPUs and captured by magnetometers on mobile devices for communication. Since CPUs and magnetometers are readily available components in mobile devices, MagneComm eliminates the requirement for special hardware and complements existing near-field communication protocols by providing additional bandwidth. We systematically analyze the characteristics of magnetic signals of CPUs and facilitate MagneComm with one-way communication, full-duplex communication, and multi-transmitter schemes in accordance with the hardware availability on devices. We prototype MagneComm on both laptops and smartphones. Extensive evaluation results show that MagneComm achieves up to 110bps within 10cm.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117824" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/df1f0ad2b3f9e119938560045bb26629cce0006a", "sources": [ "DBLP" ], "title": "MagneComm: Magnetometer-based Near-Field Communication", "venue": "MobiCom", "year": 2017 }, "df40a82970552cee731bf230d75220045ebdd0e3": { "authors": [ { "ids": [ "3031046" ], "name": "Nima Elyasi" }, { "ids": [ "2372241" ], "name": "Mohammad Arjomand" }, { "ids": [ "1743609" ], "name": "Anand Sivasubramaniam" }, { "ids": [ "1717494" ], "name": "Mahmut T. Kandemir" }, { "ids": [ "8948708" ], "name": "Chita R. Das" }, { "ids": [ "36895144" ], "name": "Myoungsoo Jung" } ], "doi": "10.1145/3037697.3037728", "doiUrl": "https://doi.org/10.1145/3037697.3037728", "entities": [ "Flash memory", "Fragmentation (computing)", "Parallel computing", "Read-write memory", "Response time (technology)", "Scheduling (computing)", "Slack variable", "Solid-state drive" ], "id": "df40a82970552cee731bf230d75220045ebdd0e3", "inCitations": [ "7b2be06575567a40314b377827f077ef1a7ec825", "9f6272c1ffe82f2b06cc591623db0b455592fc5a", "03d31a8e91b0f9e4ba971502e4290703aa9e8463" ], "journalName": "", "journalPages": "375-388", "journalVolume": "", "outCitations": [ "11c3525e009a096b5d82f1fb13e04f77eb10fd4f", "b45e1f16cf2b6f735013e9f279e45bf8b7a8d5db", "d15331154da6225348e3d718c9d9c1d0d9a03102", "014f2fde05dc7c6f0b71c13f98e4a0d0c4969e25", "1eb04c28403ee1143f5d75f53b68ab5ee0196e48", "004ccc26a3521cb101b7c179149227c7aa28d1db", "641aaaf84be64b61ca2f66f7f9d273c1a9bfb1f4", "068df1aa6b5dc368366e93b7094ffd1723f82e3b", "0d08856c7806d4693b091e358bae094e5ec6e483", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "1820a34042d6371a9e20484b0c63b698eb522a6c", "5271d6693ba950c389921ccc21110664f25a83db", "0925b8b42770e8f0f55bc355062f7265285c94fe", "45829a94cc995c116ab7cfc2b993372694d107ac", "7019d566d10fcdb836aa338c344de4f0ed2131b6", "2c21534c6f3ba665ec920a6293d6c4efe0a6168a", "3b1c1002d1f051fbe3212961f88809c0714f7e61", "fbfb9550099568001d9856749edd012df695e286", "615b2cab6372a8ec6715e081e6ed0c40f8c8fef1", "08e7d789b23d616c4c04432cf14b1836a73bbb6f", "7a6987f6b0b47d8c6a39cccebb2d3c9566e45054", "3685a773ab05b40e22bbb73b2b5e801dfd95f747" ], "paperAbstract": "With Solid State Disks (SSDs) offering high degrees of parallelism, SSD controllers place data and direct requests to exploit the maximum offered hardware parallelism. In the quest to maximize parallelism and utilization, sub-requests of a request that are directed to different flash chips by the scheduler can experience differential wait times since their individual queues are not coordinated and load balanced at all times. Since the macro request is considered complete only when its last sub-request completes, some of its sub-requests that complete earlier have to necessarily wait for this last sub-request. This paper opens the door to a new class of schedulers to leverage such slack between sub-requests in order to improve response times. Specifically, the paper presents the design and implementation of a slack-enabled re-ordering scheduler, called Slacker, for sub-requests issued to each flash chip. Layered under a modern SSD request scheduler, Slacker estimates the slack of each incoming sub-request to a flash chip and allows them to jump ahead of existing sub-requests with sufficient slack so as to not detrimentally impact their response times. Slacker is simple to implement and imposes only marginal additions to the hardware. Using a spectrum of 21 workloads with diverse read-write characteristics, we show that Slacker provides as much as 19.5%, 13% and 14.5% improvement in response times, with average improvements of 12%, 6.5% and 8.5%, for write-intensive, read-intensive and read-write balanced workloads, respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037728" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/df40a82970552cee731bf230d75220045ebdd0e3", "sources": [ "DBLP" ], "title": "Exploiting Intra-Request Slack to Improve SSD Performance", "venue": "ASPLOS", "year": 2017 }, "df496b09f665f4c01e1049f2cf36b4aa6d15ea5f": { "authors": [ { "ids": [ "36162942" ], "name": "Ashlie Martinez" }, { "ids": [ "2002462" ], "name": "Vijay Chidambaram" } ], "doi": "", "doiUrl": "", "entities": [], "id": "df496b09f665f4c01e1049f2cf36b4aa6d15ea5f", "inCitations": [ "75c3f38c4268097b45212b8c67b028f6bf4ecc2d" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/martinez", "https://www.usenix.org/sites/default/files/conference/protected-files/hotstorage17_slides_martinez.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/df496b09f665f4c01e1049f2cf36b4aa6d15ea5f", "sources": [ "DBLP" ], "title": "CrashMonkey: A Framework to Automatically Test File-System Crash Consistency", "venue": "HotStorage", "year": 2017 }, "df6e2807ee46442804203e7689b4d7512ac48473": { "authors": [ { "ids": [ "2734778" ], "name": "Youshi Wang" }, { "ids": [ "30161230" ], "name": "Fa Zhang" }, { "ids": [ "1691652" ], "name": "Shaolei Ren" }, { "ids": [ "1812563" ], "name": "Fangming Liu" }, { "ids": [ "39077234" ], "name": "Rui Wang" }, { "ids": [ "39358904" ], "name": "Zhiyong Liu" } ], "doi": "10.1109/IGCC.2017.8323564", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323564", "entities": [ "Algorithm", "Approximation algorithm", "Bluetooth", "Colocation centre", "Data center", "Global optimization", "Optimization mechanism", "PEG10 gene", "Program optimization", "Programs - Publication Format", "Server (computing)", "Simulation", "Trimethoprim-Sulfamethoxazole Combination", "algorithm", "ergoloid mesylates, USP" ], "id": "df6e2807ee46442804203e7689b4d7512ac48473", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "520d87e06ac5ed55ff5ca2d37430083de90606d2", "fc35556fd7a79723c241e52694116544a3a4b1d4", "0fb9337f6570bac95af021a7beada0b4deea725c", "ebffbf3ab6ac493f4229d70cba1e7ee7ef8fa008", "0dae1dc977b9943ed1216cf86853df3be2510b80", "ed6639815419d8cecfb8dab86b9377f62652e8a3", "96378814dd87b85248cf9fa77f5218afbcbc5db8", "34b765434c1ed0ce8814a4f82f144aca716223b9", "24f6cb1a80792138c856cc8f759672d29eaa4fb7", "65d49f6bebb1e3b38181ba9f5970329e2d235b58", "38a0bced15718230eeec1f5ffd29ada0f4f10a7a" ], "paperAbstract": "Colocation data centers (or colocations, for short) are important participants in emergency demand response (EDR) programs. One key challenge in colocations is that tenants control their own servers, thus, may not coordinate to reduce their power consumption. In this paper, we propose a joint truthful incentive mechanism Co-Colo to encourage tenants joining EDR programs, which includes a local optimization mechanism (LocalOpt) and a global optimization mechanism (GlobalOpt). In LocalOpt, tenants are motivated to improve the energy efficiency locally. In GlobalOpt, tenants can request some public server resources to improve the energy efficiency. By jointly considering the two mechanisms, Co-Colo effectively reduces the energy-saving cost. A (1 + ε)-approximation algorithm is proposed to obtain the asymptotic optimal energy-saving scheme. We also consider a special case when the public resources are sufficient, and design a 2-approximation algorithm. Furthermore, the robustness of the proposed algorithms are proved. Trace-driven simulations verify the effectiveness and feasibility of Co-Colo.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323564" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/df6e2807ee46442804203e7689b4d7512ac48473", "sources": [ "DBLP" ], "title": "Energy efficiency in colocation data centers: A joint incentive mechanism approach", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "df76dfe0724ab41d98673d13f472d9b27eada485": { "authors": [ { "ids": [ "7928720" ], "name": "Sudheer Chunduri" }, { "ids": [ "34074103" ], "name": "Kevin Harms" }, { "ids": [ "2032331" ], "name": "Scott Parker" }, { "ids": [ "34641509" ], "name": "Vitali A. Morozov" }, { "ids": [ "14954431" ], "name": "Samuel Oshin" }, { "ids": [ "2089110" ], "name": "Naveen Cherukuri" }, { "ids": [ "39745936" ], "name": "Kalyan Kumaran" } ], "doi": "10.1145/3126908.3126926", "doiUrl": "https://doi.org/10.1145/3126908.3126926", "entities": [ "Spatial variability", "Xeon Phi" ], "id": "df76dfe0724ab41d98673d13f472d9b27eada485", "inCitations": [ "9ddab4cbc6c64e33119f735986a30257c0cfe0cc" ], "journalName": "", "journalPages": "52:1-52:13", "journalVolume": "", "outCitations": [ "9b23b2479ac1893873e1f84426c4f3722e0b356d", "3c3fcd7a025f50bf598de03d41bc0fd00660f11f", "5a4c969c42ff82ad17d6adb4f9d8991def8f3517", "24bb5f66906421f42aff2d64dfa35b4beb3ead7a", "aa79058e4d129d3981c7ccd02aac7262850b4f5e", "90b757b41aac8e5d5cecbde427e894b94bd6020a", "41c6c8a812026f9c71835da29a81af3a1043d3de", "aad980f258658a5298a3af50b0b9aa833fc88c00", "a74f704ba81e89bc8780d01f8d0530ce47bb8da7", "16a4367795dc7fde9bae65de3a5fda8300f27a46", "18fbcb1de113f5d60c8e81566231a0ecea46f3fe", "ac473f1674f14253da0e50c25b8cb86f8801a808", "256774b46b3265ae950ea3717e5a2d0c51ab2b55", "b30a10f682301c03a282129ada222ed048888481", "7952cec7c8a805ad9396100c0e6075775c4f9247", "5adc816438843c02f71867d05f64d64100d368c0", "f3325ace129dec914966f9894d9f412e5e04bdc2" ], "paperAbstract": "The increasing complexity of HPC systems has introduced new sources of variability, which can contribute to significant differences in run-to-run performance of applications. With components at various levels of the system contributing variability, application developers and system users are now faced with the difficult task of running and tuning their applications in an environment where run-to-run performance measurements can vary by as much as a factor of two to three. In this study, we classify, quantify, and present ways to mitigate the sources of run-to-run variability on Cray XC systems with Intel Xeon Phi processors and a dragonfly interconnect. We further demonstrate that the code-tuning performance observed in a variability-mitigating environment correlates with the performance observed in production running conditions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126926" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/df76dfe0724ab41d98673d13f472d9b27eada485", "sources": [ "DBLP" ], "title": "Run-to-run variability on Xeon Phi based cray XC systems", "venue": "SC", "year": 2017 }, "df96114c34c1cb9aa8c1237ad710adfab3c269b6": { "authors": [ { "ids": [ "2265023" ], "name": "Do Le Quoc" }, { "ids": [ "2059294" ], "name": "Martin Beck" }, { "ids": [ "3025359" ], "name": "Pramod Bhatotia" }, { "ids": [ "2042672" ], "name": "Ruichuan Chen" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" }, { "ids": [ "1730027" ], "name": "Thorsten Strufe" } ], "doi": "", "doiUrl": "", "entities": [ "Computation", "Differential privacy", "Distributed computing", "Real-time computing", "Sampling (signal processing)", "Scalability", "Stream cipher", "Stream processing" ], "id": "df96114c34c1cb9aa8c1237ad710adfab3c269b6", "inCitations": [ "f3dec4cb3741bf3b88ab547e28fb8b37e371d72f", "330de12472ed98642e1ed28944ff94d3d6eee8de", "0a92b75415121f5f9fed192c97b48959451a9072", "bd3956f82487841ad6392d29218ce582e5aef47c", "baa3f0306e01a16dad20a45b50c5b7a656e8f14b" ], "journalName": "", "journalPages": "659-672", "journalVolume": "", "outCitations": [ "2e88ffc553c0d8ffdf7500f47520b9848fc90090", "209300e7bc7392974ae771afe6a0c3da577584da", "35516916cd8840566acc05d0226f711bee1b563b", "1e3449bc19cbd12f20b7084592ae304055248262", "4f553ee2246dd617d89c487f260d77388177e1c4", "0b2a811c6272298f34f21aa52162d8c7816f4206", "4e525abd3a0659a3607a0b3ba4232d2584a21b6d", "3af15292037d1fa634662f9acec89e89d0e21656", "2173406c4ca5fff0de66e8cbed4cb01ca959cb31", "63fee51e3bc2106b2fa42f332d2050fd5acfcf3e", "040d9acab9003b9d50b2291cc6844b66b2a85d12", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "e1b8dff6737781304c1f0504ab15711aa0beace7", "609d55884a2b3f16c309a7102541a645fe736004", "32334506f746e83367cecb91a0ab841e287cd958", "ec78f31c4d43c8de4ccd66a73778ff0913375a96", "06eaabcdf0c1f578c2442b3e7a0858a8dc5679c8", "3310cb2b0f1a473e6f98cdec6eb53ec6a962ac87", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "6a912e2c1f818a047bc620f475b6b6e3b0dbacfe", "69884f09be947c43e1029bb3ddc95db5edc2a03d", "9771e382794af067f7360f1cac7b6d2a1e6dd1c4", "3b034ee536cbf8c0152c8eae29b74a821d958976", "03e4f73474351a62abc9abf2fb17ec6277bb064e", "8bd231fd5382cf2f128314af40390b21e4b41a6d", "0820c0b45928df290e4ecb4da702daf6967743d3", "162e6c06bbe83daf74b4fb849367f123b4d65850", "f060942169f56e0aa8f3253047fac49b7c8eff2d", "0c9ffe6bfabf2c1cb013855d913b6089c4918966", "4b8f1518b21b73d30cedf31560a83a8322f8147d", "437d82f8fe2418ec0280efd363639211f1eff2e9", "472a63c41ef24257148d9cf4fd00aec70cf3add6", "34e7390fc54ba9b29ae88f7a135e2bb79b4ca714", "05dced4bdb29932cb6d7419745360a283b369c9f", "64028c85cd7b7e42f208e29734028572d7735c61", "0beca56d0260ffa0c68d17b7e90ccff42b820076", "1ced79eb5606e976ff367018c828d2ed25cc398b", "19db199fd25aa604618d13e80cf317f0858d5604", "1aeddb4b15bbb4524b42934594909a34965de1dd", "34c7a2a830ad3605fd75d11508bfa7880b7f1f92", "31afd0a18126720eeef5880bcaa14768c4005387", "0f622ca566380e16e1fe126d61fc72fb4b216ab0", "b532099ff8b67049f292cd62700dca37fc2be623", "45b50ed3d33633978964893b3a58ca369f35bf7e", "0456a5c3b2001465d05e84ce6786ef200184de65", "17fac85921a6538161b30665f55991f7c7e0f940", "009d284fe935b5f421d24321073097a0cd34e21f", "4a42f1599d7e2d1a5f74651f4ba21386f9afdb31", "3064117c7aafc9c1fece6ad01693a6e4c37e8912", "f3dec4cb3741bf3b88ab547e28fb8b37e371d72f", "cb8e2e279a13f0c81861cd726dbd0c7a4dbf97e8", "54d539d7558b6db3a3043fbbe4c71abe3fb629db", "9038514ebcde09eb622ae973e456741229222c82", "b5698562441fa8949047bae34cbfb9cf77b6d817", "7b2dee267cc52c7a985853382b82630c8f5876ad", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "e9c513fe159c2b243325f90b6e9bcdb5f8d75c22", "1026527f60f4df0c523dc4b4b07a06274f1f0517", "64533dbddc95edf3dacf5de7a115bc41f858ecc4", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "247fdd4425b7b29504fbed4b93ab4141828cdfd4", "3d5f3bedca7828899b81448e9c33717dd55c36c2", "22dfc02134728131f1f1bf2d8c2329727cf8d12a" ], "paperAbstract": "How to preserve users\u2019 privacy while supporting high-utility analytics for low-latency stream processing? To answer this question: we describe the design, implementation and evaluation of PRIVAPPROX, a data analytics system for privacy-preserving stream processing. PRIVAPPROX provides three important properties: (i) Privacy: zero-knowledge privacy guarantee for users, a privacy bound tighter than the state-of-the-art differential privacy; (ii) Utility: an interface for data analysts to systematically explore the trade-offs between the output accuracy (with error estimation) and the query execution budget; (iii) Latency: near real-time stream processing based on a scalable \u201csynchronization-free\u201d distributed architecture. The key idea behind our approach is to marry two techniques together, namely, sampling (used for approximate computation) and randomized response (used for privacypreserving analytics). The resulting marriage is complementary \u2014 it achieves stronger privacy guarantees, and also improves the performance for stream analytics.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/quoc", "https://www.usenix.org/system/files/conference/atc17/atc17-quoc.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/df96/114c34c1cb9aa8c1237ad710adfab3c269b6.pdf", "s2Url": "https://semanticscholar.org/paper/df96114c34c1cb9aa8c1237ad710adfab3c269b6", "sources": [ "DBLP" ], "title": "PrivApprox: Privacy-Preserving Stream Analytics", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "dfa37e1ad351ae889502fb704cc93f4a77c1c642": { "authors": [ { "ids": [ "2248558" ], "name": "Linh Nguyen" }, { "ids": [ "22742879" ], "name": "Peifeng Yu" }, { "ids": [ "2579531" ], "name": "Mosharaf Chowdhury" } ], "doi": "10.1145/3102980.3102995", "doiUrl": "https://doi.org/10.1145/3102980.3102995", "entities": [ "Confluence (abstract rewriting)", "Decoupling (electronics)", "Deep learning", "Hardware abstraction", "High- and low-level", "Machine learning" ], "id": "dfa37e1ad351ae889502fb704cc93f4a77c1c642", "inCitations": [ "081fdeea36d4b56a71e87b5b0de191aa368261c8" ], "journalName": "", "journalPages": "88-93", "journalVolume": "", "outCitations": [ "25bb88d33937e3adfd928a92271d4e85779efa5c", "a2f2662b1d3510af048a713b2690ff6d720ebdda", "153703ab30c7cb56a49718991f6bc450f0c2273f", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "6074c1108997e0c1f97dc3c199323a162ffe978d", "02f8e4a8b3f16a988233f309db548415268322c2", "590a84bb5615064c4362532017c1a02646d6eb68", "4cf0f575677146eeb002487e56abc2cf7cafe591", "39f63dbdce9207b87878290c0e3983e84cfcecd9", "32192d744d86e7cde73f0c9aa773214f88619a9e", "0ecad2b630fce029c1b7b577ed56e18fbba001ce", "1c6e067098fa86ee3f96365f28669b06f9ce0c7a", "722fcc35def20cfcca3ada76c8dd7a585d6de386", "0c23ebb3abf584fa5e0fde558584befc94fb5ea2", "12a376e621d690f3e94bce14cd03c2798a626a38", "01fcae344d2edb715bcc63a40b6052c0331741bd", "008e9e2d3908c964d5b1c408c478215709dbea10", "73f072ead051f3f3c764b31e88f3a3aeb0373f7b", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "5f0ac9b48c392abff9773e36c11dd245a5e4eef9", "4416236e5ee4239e86e3cf3db6a2d1a2ff2ae720", "6f4d58486b1c6d710586b1d182ddad7d09a8da11", "d7a4fb554d070fb548a25aec8d1205f766c184a2", "48df9d31ee1ac2a3d15362914419bfb8882cac21", "bd6fe117ca35a7ab144408be1771000feb57c7fb", "a65498d008a162646b8d3b5c4ce5b73f02ff5b6a", "43ccb510cf7c7fba473f359d2558175c8813e94a", "4954fa180728932959997a4768411ff9136aac81", "596df6fb4d50c7886948b08f525c4e3393d05a44", "2cc157afda51873c30b195fff56e917b9c06b853", "15848b6b7c86a5ce8941689b262be237287b39fb", "0122e063ca5f0f9fb9d144d44d41421503252010", "37791336941a0d954e4a98c96b1a66ca7be43eb2", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "080aebd2cc1019f17e78496354c37195560b0697", "3efe247d4f42ff0625b7607546000c9f0d905151", "8dfddcfd67a586f6ed8957174adf1d35c4bd4584", "424561d8585ff8ebce7d5d07de8dbf7aae5e7270", "9260a65f84384b0b12b32810e171a3c1cc0c0ffa", "2ffc74bec88d8762a613256589891ff323123e99", "4a0bb4eece00f3e9445d1a0d933422aa408ce8d1", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "9c5af2226a48672c9d12cbd22f41b400717f5a21", "31868290adf1c000c611dfc966b514d5a34e8d23", "563e821bb5ea825efb56b77484f5287f08cf3753", "e81ee8f90fa0e67d2e40dc794809dd1a942853aa", "a465ba2fdc04aa9c461ef6f93c1aeae37a8fad32", "8c0918f9801c7053ab7b750eefcf311488b47114", "e729739e2796348faa50c0e88e38be83b070d3fe", "7f6061c83dc36633911e4d726a497cdc1f31e58a" ], "paperAbstract": "In recent years, deep learning has pervaded many areas of computing due to the confluence of an explosive growth of large-scale computing capabilities, availability of datasets, and advances in learning techniques. While this rapid growth has resulted in diverse deep learning frameworks, it has also led to inefficiencies for both the users and developers of these frameworks. Specifically, adopting useful techniques across frameworks -- both to perform learning tasks and to optimize performance -- involves significant repetitions and reinventions.\n In this paper, we observe that despite their diverse origins, many of these frameworks share architectural similarities. We argue that by introducing a common representation of learning tasks and a hardware abstraction model to capture compute heterogeneity, we might be able to relieve machine learning researchers from dealing with low-level systems issues and systems researchers from being tied to any specific framework. We expect this decoupling to accelerate progress in both domains.", "pdfUrls": [ "http://doi.acm.org/10.1145/3102980.3102995" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/dfa37e1ad351ae889502fb704cc93f4a77c1c642", "sources": [ "DBLP" ], "title": "No!: Not Another Deep Learning Framework", "venue": "HotOS", "year": 2017 }, "e00ae8f4690260c2467ffc7c26cd28d1506e5555": { "authors": [ { "ids": [ "1885681" ], "name": "Reza Azimi" }, { "ids": [ "2202492" ], "name": "Masoud Badiei" }, { "ids": [ "2490490" ], "name": "Xin Zhan" }, { "ids": [ "1736069" ], "name": "Na Li" }, { "ids": [ "1721621" ], "name": "Sherief Reda" } ], "doi": "10.1109/HPCA.2017.49", "doiUrl": "https://doi.org/10.1109/HPCA.2017.49", "entities": [ "Computer cluster", "Deferred Procedure Call", "Frequency capping", "Heuristic", "Maximum throughput scheduling", "Power management", "Power supply", "Program optimization", "Response time (technology)", "Server (computing)", "Throughput" ], "id": "e00ae8f4690260c2467ffc7c26cd28d1506e5555", "inCitations": [ "25cb64521445ed514523bfa4b36ae2dc8c56a5ee" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "181-192", "journalVolume": "", "outCitations": [ "434bfa445da86c7be719ad7c3a3072b2ab075d03", "78e009cc05a6a832106d5ca6802ce56bef6b247f", "1d035d1d445f5ae5457db98af49ce1b87be1ebd5", "3525e2ddd314e7bfbb5abcf2df0def902bbad8e8", "b04391910d19d2d0c64b62d300927f527417414e", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "6fd344c359faa7fcd15d1adf76da58ce328a4b45", "1e8233a8c8271c3278f1b84bed368145c0034a35", "39681c36b58c757c04aed963dc8e0411c66aff31", "54754cbd5011c059af8358b162ffd9ffbcb51f39", "7d21404a90d7bf9b75c140bc0b6546551bd91979", "3462fb38042f0bde20c758728d7c8c28a1f47e09", "11ae9c83d2851149ed5c4e2ca5d3182a23cd68d4", "ac333b591801377ea736b42c30cf253437e5ac02", "5581f869f4ffa0c83924546b6b986c4d897ecda4", "160cb79c15607467255dbafa9ab1f116aa231b36", "9829af41ab5608bcbee1b833ae4a314e2bc67616", "0371f9e3efbcd4829b5ffbff585155746ef05284", "00919d778377ec8e4e037d8ebafc76c9de52db4b", "a47b408349a8146f71cb54c38226d2f7d92700fe" ], "paperAbstract": "Power capping is a mechanism to ensure that the power consumption of clusters does not exceed the provisioned resources. A fast power capping method allows for a safe over-subscription of the rated power distribution devices, provides equipment protection, and enables large clusters to participate in demand-response programs. However, current methods have a slow response time with a large actuation latency when applied across a large number of servers as they rely on hierarchical management systems. We propose a fast decentralized power capping (DPC) technique that reduces the actuation latency by localizing power management at each server. The DPC method is based on a maximum throughput optimization formulation that takes into account the workloads priorities as well as the capacity of circuit breakers. Therefore, DPC significantly improves the cluster performance compared to alternative heuristics. We implement the proposed decentralized power management scheme on a real computing cluster. Compared to state-of-the-art hierarchical methods, DPC reduces the actuation latency by 72% up to 86% depending on the cluster size. In addition, DPC improves the system throughput performance by 16%, while using only 0.02% of the available network bandwidth. We describe how to minimize the overhead of each local DPC agent to a negligible amount. We also quantify the traffic and fault resilience of our decentralized power capping approach.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.49", "http://scale.engin.brown.edu/pubs/hpca2017.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e00ae8f4690260c2467ffc7c26cd28d1506e5555", "sources": [ "DBLP" ], "title": "Fast Decentralized Power Capping for Server Clusters", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "e01a9385df6deef08ec133b0fe2a440c9711ab48": { "authors": [ { "ids": [ "2857790" ], "name": "Kuangyu Zheng" }, { "ids": [ "38638723" ], "name": "Bruce Beitman" }, { "ids": [ "1690476" ], "name": "Xiaorui Wang" } ], "doi": "10.1109/ICAC.2017.14", "doiUrl": "https://doi.org/10.1109/ICAC.2017.14", "entities": [ "Algorithm", "Android", "Computation", "Desktop computer", "Desktop metaphor", "Digital footprint", "Email", "Feature phone", "Laptop", "Optimization problem", "Program optimization", "Seamless3d", "Smartphone", "Web navigation" ], "id": "e01a9385df6deef08ec133b0fe2a440c9711ab48", "inCitations": [], "journalName": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "journalPages": "167-176", "journalVolume": "", "outCitations": [ "31f87a5e05a6a3bd380bf979ec2db45d7a3ab971", "70ae2ba890bf6ce2ff645ca2fd4262480f8048db", "c2d7efb5f612c9e3fa1c4248f56ad7593675f764", "0fe7cd8d9a8d16f8ae2ccaecd0d7c30b8e0b3b30", "a54dd6428bc2200246b896655ba28822a7f82264", "0495641c590874be9e09c3743d0d15c536cd3f4e", "464549b71215802e83c4028122b2a168202d73b8", "06706cd6c46bc413bb9c272d6c1c034313ff2742", "5aaf038072a806aa0980cd91c333543571531c42", "14d644c617b1bdf57d626d4d14f7210f99d140e6", "4e1f37dbbde87067db80379a2bcec4fa9825ee5b", "965ef290310ebfb5e6d1758c4848286d663c2ff6", "dd5e7b04cb142a6a34ad3eff43dac7326cbe8247", "293633af81675248e9cdfda6e7445dd6da841730", "9c89b6cbf43411fb8ec4366a20af5d066dffcee9", "cd8d49e70ec797318af1302bfa25bdebaf9fc3d7", "3821cdd4dbf5cc5ef4981883611bb211bded4669", "34056f1d9582d152023a1fccb41d8e2047ebcc86", "a7b788555f2a5617fdb46f40103b3c40e93c5f16", "49c61e3e4bce017e7b3e42880fb670473a3d4556", "f7104b891859d433cedbba8670ecfefd768b256f", "7d19eb2938a538325300a5967ef0c5efe4141950", "31f3a12fb25ddb0a27ebdda7dd8d014996debd74", "5892b9314971e90e32d8bf81ca4e7dcbecb5ef8f", "177dc0912254778423fa50815ed1eaf6544d0423", "0b369ac8bd9e0c618e4ea3568ebaa944f460c454", "5ca3e775ec462e19e69ffd63e56f9f6ea263c102", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2" ], "paperAbstract": "Due to the increasing popularity of smartphones, many people are now equipped with both a smartphone and at least one desktop (or laptop) computer. Although the two computing devices are used for similar purposes (e.g., email, web browsing), they are often both kept on for the user's convenience, despite only one device is actively used at a time. Therefore, if one of the two computing devices can be put into an energy-saving mode when the other one is in use, a significant amount of energy can be saved for both the phone and the desktop.In this paper, we propose CoSmart, a light-weight solution that coordinates the smartphone with the desktop computer for joint energy savings. CoSmart dynamically degrades the smartphone to a feature phone with only basic GSM functions when the user is detected to be with the desktop, in order to save both computation and idle energy. The desktop is then put into sleep for energy savings when the user leaves it, while the phone can be turned back to a smartphone, such that the user can continue the operation with seamless task migration. There are several research challenges in the design of CoSmart, which include 1) predicting whether the user would stay long enough with the desktop to offset the migration overheads, and 2) determining the best time point for task migration that can result in the most energy savings. To this end, we propose a novel algorithm for dynamic idle time length prediction, and model joint energy savings as an optimization problem for the most energy savings. A prototype of CoSmart is implemented in Android and evaluated using different real user traces and popular apps. Results show CoSmart can achieve, on average, 61.3% energy savings for the smartphone and 46.7% energy savings for the desktop, which outperforms other baselines by as much as 17.2% to 19.0%.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICAC.2017.14" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e01a9385df6deef08ec133b0fe2a440c9711ab48", "sources": [ "DBLP" ], "title": "CoSmart: Coordinating Smartphone with Desktop Computer for Joint Energy Savings", "venue": "2017 IEEE International Conference on Autonomic Computing (ICAC)", "year": 2017 }, "e087ed99f88d00c32cff2f9c3b7a8788594aec0d": { "authors": [ { "ids": [ "2810606" ], "name": "Jiayang Guo" }, { "ids": [ "1736595" ], "name": "Yiming Hu" }, { "ids": [ "1782674" ], "name": "Bo Mao" }, { "ids": [ "8175008" ], "name": "Suzhen Wu" } ], "doi": "10.1109/IPDPS.2017.55", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.55", "entities": [ "Garbage collection (computer science)", "I/O scheduling", "Parallel computing", "Participatory GIS", "Response time (technology)", "Scheduling (computing)", "Solid-state drive" ], "id": "e087ed99f88d00c32cff2f9c3b7a8788594aec0d", "inCitations": [ "0590d539e980c1b9dc33abb5c97e68cb6c39c3f9", "8378e1b124fd204a9ea92c7b0e4d3caaa34720e0" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "1184-1193", "journalVolume": "", "outCitations": [ "3aff5fb3d1e23dfc0c45989f71b4aa99b3a5784b", "c1412129d1b91c3cbe52d55a242fd0757da6a32d", "ff86fe69a1c4ada9e1e31488afe0f2ae68d24d7d", "009d8914ca7ca1ec459f6c35a772f85c602eb052", "b220199029253cda0744b3b39a876ca007a5f12b", "08e7d789b23d616c4c04432cf14b1836a73bbb6f", "fed29a65678798b87fb4b85e95aa6e7a8eebf3d6", "4a854ab27914843ea8a1bfc83a004be952f1ef2c", "d4e5801efdfb30ac9ca93096995fcb32c06f4e29", "30419d0e0fcaebbfbcdb88f702bd01306d14fb15", "38a9120f780602521af9744e31d80ef5cd9593a7", "bf9bb3cda0e940f4eddd10cff63139658c377d2c", "7a6987f6b0b47d8c6a39cccebb2d3c9566e45054", "3a8dd763354946d6cf044a0be711052178233ffb", "9d44e8268c1d2f839221a6a58f89c37663dae2d7", "11c3525e009a096b5d82f1fb13e04f77eb10fd4f", "04f94f8d48badcdcaab93c28a60414c7b1ffd274", "acaee97ed84f0e96932a419d9cb838ea82d50b5f", "1eb04c28403ee1143f5d75f53b68ab5ee0196e48", "0b2aa4a87a6c253472e801080614da0dab47cfc2", "726099036bb32c3fbaf1650d5900eeaa2ecc8fd9", "1820a34042d6371a9e20484b0c63b698eb522a6c", "1da48d8173e34eb7825870248c4c12b6bbe7d9c1", "0590d539e980c1b9dc33abb5c97e68cb6c39c3f9", "9f83ef5f08ffcfc56ddd8ca67f7efd99aadfc94a", "7d40d2dbb78a38d8ba0489a8e14cef6b59a14b86" ], "paperAbstract": "In this paper, we propose PGIS, a parallelism and garbage collection aware I/O Scheduler, which identifies the hot data based on trace characteristics to exploit the channel level internal parallelism of flash-based storage systems. PGIS not only fully exploits abundant channel resource in the SSD, but also it introduces a hot data identification mechanism to reduce the garbage collection overhead. By dispatching hot read data to different channel, the channel level internal parallelism is fully exploited. By dispatching hot write data to the same physical block, the garbage collection overhead has been alleviated. The experiment results show that compared with existing I/O schedulers, PGIS improves the response time and garbage collection performance significantly. Consequently, PGIS reduces the garbage collection overhead up to 30.9%, while exploiting channel level internal parallelism.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.55" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e087ed99f88d00c32cff2f9c3b7a8788594aec0d", "sources": [ "DBLP" ], "title": "Parallelism and Garbage Collection Aware I/O Scheduler with Improved SSD Performance", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "e0f74ac8cc27da3966ac3cc8b4e0cec1df3f576d": { "authors": [ { "ids": [ "1963560" ], "name": "Christos Kotselidis" }, { "ids": [ "40844251" ], "name": "James Clarkson" }, { "ids": [ "2322572" ], "name": "Andrey Rodchenko" }, { "ids": [ "2476643" ], "name": "Andy Nisbet" }, { "ids": [ "1865829" ], "name": "John Mawer" }, { "ids": [ "1706226" ], "name": "Mikel Luj\u00e1n" } ], "doi": "10.1145/3050748.3050764", "doiUrl": "https://doi.org/10.1145/3050748.3050764", "entities": [ "CUDA", "Computer vision", "Embedded system", "Field-programmable gate array", "Floating Point Systems", "General-purpose computing on graphics processing units", "Hardware acceleration", "OpenCL API", "Parallel computing", "Programming language", "Quality of service", "Runtime system", "Software portability" ], "id": "e0f74ac8cc27da3966ac3cc8b4e0cec1df3f576d", "inCitations": [ "55c143f5b991501a09a644ab0f39c05951ae4754", "28004163c4d4646b66079d6810d3159dd1106999", "653e56eafd7001abd35f96cb317d790a835248f2", "d440bdc8a46e57c7c0922ef6dfef68dd9fb6ae65" ], "journalName": "", "journalPages": "74-82", "journalVolume": "", "outCitations": [ "a8e9034fc1828719e98e41fda539faf8d56bf47f", "6074c1108997e0c1f97dc3c199323a162ffe978d", "4a088b3ef14d19448e77008f852f2e9805ffc1ea", "907d2c011942a78bf6acff8e048f4185d53ff8f2", "0f58afdae0b5d40a599d685c81c83f33586c671a", "00a9ba0063d34ec56792849a67ef57b4601becbb", "711fbb31886c767aa7842b7779b373156259a2e7", "7bbbbd2073503720f304d031cb4641eb45a3edee", "0a714abfcf37a2a37f2d533563b2eed19e8b801b", "28c552da5dc505fe23644cfddf7daaf06c355e45", "d57b42821ec782b33dd49ee0c37976bbd62d24a4", "98e5f816dc8f4d8ee31824669e20586b78b9f15c", "1850ceb5376a4a14a7d77031789ef3ccb4f87e93", "0259607cc9a2ad28592e77543e5433bb3d87a70c", "a19a7c5e45125a570dcbac018184669b8cab2789", "282cf28ac9508bd66a6ddf0709c9db9dc0fdf162", "d440bdc8a46e57c7c0922ef6dfef68dd9fb6ae65", "30abed73bf3d442d4d17c0a843ad59679dc3c79e", "53dcc743e94ed14c3e94382f994bdd2e948a1b5c", "0a9c4656948693f5e11c4c092303abe54da50caf", "09ea2e8942f624a6dad4c96f3c62320fbb0db7a8", "4726ec683a7db8e97ebd845b98e294ead537888a", "236fb3e74fa802e44541edb76bee5886397acc7a", "5d5e1b35dcfbf52299c327baab696568ba0e1d15" ], "paperAbstract": "Real-time 3D space understanding is becoming prevalent across a wide range of applications and hardware platforms. To meet the desired Quality of Service (QoS), computer vision applications tend to be heavily parallelized and exploit any available hardware accelerators. Current approaches to achieving real-time computer vision, evolve around programming languages typically associated with High Performance Computing along with binding extensions for OpenCL or CUDA execution.\n Such implementations, although high performing, lack portability across the wide range of diverse hardware resources and accelerators. In this paper, we showcase how a complex computer vision application can be implemented within a managed runtime system. We discuss the complexities of achieving high-performing and portable execution across embedded and desktop configurations. Furthermore, we demonstrate that it is possible to achieve the QoS target of over 30 frames per second (FPS) by exploiting FPGA and GPGPU acceleration transparently through the managed runtime system.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050764" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e0f74ac8cc27da3966ac3cc8b4e0cec1df3f576d", "sources": [ "DBLP" ], "title": "Heterogeneous Managed Runtime Systems: A Computer Vision Case Study", "venue": "VEE", "year": 2017 }, "e0f8f8bfbac97dec0fd609fcbfdbb83694932fee": { "authors": [ { "ids": [ "40144732" ], "name": "Viktor Rosenfeld" }, { "ids": [ "39168144" ], "name": "Ren\u00e9 M\u00fcller" }, { "ids": [ "1843945" ], "name": "Pinar T\u00f6z\u00fcn" }, { "ids": [ "3023309" ], "name": "Fatma \u00d6zcan" } ], "doi": "10.1145/3127479.3132022", "doiUrl": "https://doi.org/10.1145/3127479.3132022", "entities": [ "Big data", "C++", "Compiler", "Ecosystem", "Input/output", "Java", "Java virtual machine", "Machine code", "Naivety", "Overhead (computing)", "SPARK", "Universal Disk Format", "User-defined function", "Virtual machine" ], "id": "e0f8f8bfbac97dec0fd609fcbfdbb83694932fee", "inCitations": [ "db12b1acdf950527ee8eccbdaa99ee9dcf5c1274" ], "journalName": "", "journalPages": "419-431", "journalVolume": "", "outCitations": [ "2bcc56aa8f39ec3d5f16c0064e461e90a6a1764f", "027eb436c35c7e293e7ebc565163cb54c05fe2e9", "2194c3460ab71f3826db00b045b2ae590c753319", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "3924d28ebc7748ae9de4ab1e4672a2b9d599e01f", "2c74aeec68efd07d908d4f421a5d4afe8426a18c", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "ebc52e776b09cf02b063f212a765a0952dc0eff1", "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "6434aa10f3745dcf959cfca9c379aae120396724", "7ead08393fa2000ea03bb1cc1e3e6e0ac991aac6", "0558c94a094158ecd64f0d5014d3d9668054fb97", "080ed793c12d97436ae29851b5e34c54c07e3816", "74241ccbd1045cf937ad8aeb44ed4e22bcdb9ea1", "17d6ed86ee2fcce750d424a88fa4f6f297f42ffd", "7a75c886b043e7c3f77829412774de27648f384a" ], "paperAbstract": "Many popular big data analytics systems today make liberal use of user-defined functions (UDFs) in their programming interface and are written in languages based on the Java Virtual Machine (JVM). This combination creates a barrier when we want to integrate processing engines written in a language that compiles down to machine code with a JVM-based big data analytics ecosystem.\n In this paper, we investigate efficient ways of executing UDFs written in Java inside a data processing engine written in C++. While it is possible to call Java code from machine code via the Java Native Interface (JNI), a naive implementation that applies the UDF one row at a time incurs a significant overhead, up to an order of magnitude.\n Instead, we can significantly reduce the costs of JNI calls and data copies between Java and machine code, if we execute UDFs on batches of rows, and reuse input/output buffers when possible. Our evaluation of these techniques using different scalar UDFs, in a prototype system that combines Spark and a columnar data processing engine written in C++, shows that such a combination does not slow down the execution of SparkSQL queries containing such UDFs. In fact, we find that the execution of Java UDFs inside an embedded JVM in our C++ engine is 1.12X to 1.53X faster than executing in Spark alone. Our analysis also shows that compiling Java UDFs directly into machine code is not always beneficial over strided execution in the JVM.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3132022" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e0f8f8bfbac97dec0fd609fcbfdbb83694932fee", "sources": [ "DBLP" ], "title": "Processing Java UDFs in a C++ environment", "venue": "SoCC", "year": 2017 }, "e12966ff762a96e1b431306c1e0c4c8ee1c808e7": { "authors": [ { "ids": [ "1764938" ], "name": "Eric L. Seidel" }, { "ids": [ "23528421" ], "name": "Huma Sibghat" }, { "ids": [ "38120884" ], "name": "Kamalika Chaudhuri" }, { "ids": [ "1962994" ], "name": "Westley Weimer" }, { "ids": [ "1695297" ], "name": "Ranjit Jhala" } ], "doi": "10.1145/3138818", "doiUrl": "https://doi.org/10.1145/3138818", "entities": [ "OCaml", "Programmer", "Supervised learning", "Test set", "Text corpus", "Type inference", "Type system" ], "id": "e12966ff762a96e1b431306c1e0c4c8ee1c808e7", "inCitations": [ "54ff43462f9468f5ed1f0a7ee65460cd0c6b2e69" ], "journalName": "PACMPL", "journalPages": "60:1-60:27", "journalVolume": "1", "outCitations": [ "76a75933e5b13a94ddbe22d60d08b7d8940f246f", "02488fc848b324fd7305f2c79ec106daa4eb5a44", "c85086843e6c90a6f7382334a965be546b0748fe", "60f788264e6278374be9dabd8e2a644cc65129b6", "e62009e4e87c38aa62907827babd10180fb45121", "d34d46baa884e401c75b3bc875945fc48471a759", "7b743215e4819b8e2100dde9ebf4cdde8b50347c", "7e7343a5608fff1c68c5259db0c77b9193f1546d", "1b424497a13d40055ec6e0d6c1b2fdcc88a7320d", "9cb40bbd6183d6fa7aee981386c4df694ba474e1", "1224d34a3d31b80ec4df903b6885a3672afa1ab1", "6b07dd22a109afd5e8b71b17449457b38858a870", "a538b05ebb01a40323997629e171c91aa28b8e2f", "4c556f0fdb14611302d13e008c3fd3cf80f87af2", "7a758797691897ac2fe791af23572d10f4d76b49", "13bdea7a56e87b0060c5e87ccc9a6818dfc74e5b", "44e94909698d353209b058d2ea3b2a0679ce30e0", "64d3f66451646109ccb142b5580e3dc373aeebde", "9509f45ebc129bd68ea94d55d90fee410afb8143", "51c4936a7878eb2717b6451f0c26c5d6a0ed7420", "c4b7539119077ffb2d5f9e33178091f40e81d4db", "1c7e6964bec163ccd9d37cc0296532d6adb54232", "2c57d368743217e65efe05d9e9413d022245ce87", "e96dc1d785ffd64e12ccf025de7a5e4f277d0cc9", "58c4edc75e92eae076ae1ac5755f2d4017c5a08c", "0a274793ba5073c7e411fb90883def87ef682515", "18da147789c74a2633a7f7ad9d9748025ee03345", "814c164c88ba7dd22e7e501cdd1a951586a3117b", "1c2c08047cd8f7a56c707883077d6ed04b9e7bad", "01bcd4d16ab8c1afe1b77f0a99431bb1a68724e3", "a15e8a4bb92b65568e69977c5d97d1d742390ad4", "1173a8a2e9d75953d04ebaa792ae71fd417a6f17", "2987a79c48d5159051b102df2bead22bca5400e5", "a6526df1d9b18fd3542fad7fdd95e93a5edce909", "272216c1f097706721096669d85b2843c23fa77d", "13b446cb9bbe70fc12ffab3bb66b0b9213d1aa8a", "05783fc38071c5bc97a29cfac49595a71b79b9b3", "dcab98a689c5751c8d8716dd10a1f72751267781", "0e5d9ca8c876c6881109445d6d96010c388752db", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "5b4c8f105c39c033f5aad47ca2da3003ed6d9ee4", "69420bcf6dc36820df8934ffb1730bb103b25321", "c774141be4da070760416919f8e682c20a7b6c2e", "b74dc57174780cb7e73f17f45254e321706180cf" ], "paperAbstract": "Localizing type errors is challenging in languages with global type inference, as the type checker must make assumptions about what the programmer intended to do. We introduce Nate, a data-driven approach to error localization based on supervised learning. Nate analyzes a large corpus of training data -- pairs of ill-typed programs and their \"fixed\" versions -- to automatically learn a model of where the error is most likely to be found. Given a new ill-typed program, Nate executes the model to generate a list of potential blame assignments ranked by likelihood. We evaluate Nate by comparing its precision to the state of the art on a set of over 5,000 ill-typed OCaml programs drawn from two instances of an introductory programming course. We show that when the top-ranked blame assignment is considered, Nate's data-driven model is able to correctly predict the exact sub-expression that should be changed 72% of the time, 28 points higher than OCaml and 16 points higher than the state-of-the-art SHErrLoc tool. Furthermore, Nate's accuracy surpasses 85% when we consider the top two locations and reaches 91% if we consider the top three.", "pdfUrls": [ "http://arxiv.org/abs/1708.07583", "http://doi.acm.org/10.1145/3138818" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e12966ff762a96e1b431306c1e0c4c8ee1c808e7", "sources": [ "DBLP" ], "title": "Learning to blame: localizing novice type errors with data-driven diagnosis", "venue": "PACMPL", "year": 2017 }, "e12de2f959af3334b04d78368217bae3f2620d0b": { "authors": [ { "ids": [ "2324843" ], "name": "Kaiwen Zhang" }, { "ids": [ "1719119" ], "name": "Mohammad Sadoghi" }, { "ids": [ "2684392" ], "name": "Vinod Muthusamy" }, { "ids": [ "1738552" ], "name": "Hans-Arno Jacobsen" } ], "doi": "10.1145/3135974.3135976", "doiUrl": "https://doi.org/10.1145/3135974.3135976", "entities": [ "Algorithm", "Baseline (configuration management)", "Collaborative filtering", "Correctness (computer science)", "Fairness measure", "Naivety", "Program optimization", "Publish\u2013subscribe pattern", "Relevance", "Requirement", "Scalability", "Whole Earth 'Lectronic Link" ], "id": "e12de2f959af3334b04d78368217bae3f2620d0b", "inCitations": [], "journalName": "", "journalPages": "174-184", "journalVolume": "", "outCitations": [ "281f5be3b32f31dfe3974f645ba5c253e6f072cd", "37034d2cff124e5e9e6a6efcd51729ad8bbd3922", "0a191b2cecb32969feea6b9db5a4a58f9a0eb456", "a401596b7c337afefe0ea228ef9cd4908429b43a", "3a043714354fe498752b45e4cf429dbae0fb2558" ], "paperAbstract": "We investigate the use of content-based publish/subscribe for data dissemination in large-scale applications with expressive filtering requirements. In particular, we focus on top-k subscription filtering, where a publication is delivered only to the k best ranked subscribers, as ordered using expressive semantics such as relevance, fairness, and diversity. The naive approach to perform filtering early at the publisher edge works only if complete knowledge of the subscriptions is available, which is not compatible with the well-established covering optimization in scalable content-based publish/subscribe systems. We propose an efficient rank-cover technique to reconcile top-k subscription filtering with covering. We extend the covering model to support top-k and describe a novel algorithm for forwarding subscriptions to publishers while maintaining correctness. We also establish a framework for supporting different types of ranking semantics and propose an implementation to support fairness. Finally, we compare our solutions to a baseline covering system and perform sensitivity analysis to demonstrate that our optimized rank-cover algorithm retains both covering and fairness while achieving properties advantageous to our targeted workloads. In a typical setting, our optimized solution is scalable, selects fairly, and provides over 81% of the covering benefit.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135976" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e12de2f959af3334b04d78368217bae3f2620d0b", "sources": [ "DBLP" ], "title": "Efficient covering for top-k filtering in content-based publish/subscribe systems", "venue": "Middleware", "year": 2017 }, "e15071ff464fccc683f2022e46834e16969386e7": { "authors": [ { "ids": [ "3282849" ], "name": "Chanyou Hwang" }, { "ids": [ "2407246" ], "name": "Saumay Pushp" }, { "ids": [ "27007715" ], "name": "Changyoung Koh" }, { "ids": [ "27058782" ], "name": "Jungpil Yoon" }, { "ids": [ "3180228" ], "name": "Yunxin Liu" }, { "ids": [ "1852544" ], "name": "Seungpyo Choi" }, { "ids": [ "1789470" ], "name": "Junehwa Song" } ], "doi": "10.1145/3117811.3117841", "doiUrl": "https://doi.org/10.1145/3117811.3117841", "entities": [ "Experience", "Floor and ceiling functions", "Graphics processing unit", "Image resolution", "Low-power broadcasting", "Mobile device", "Mobile game", "Smartphone" ], "id": "e15071ff464fccc683f2022e46834e16969386e7", "inCitations": [], "journalName": "", "journalPages": "422-434", "journalVolume": "", "outCitations": [ "e36ce0694ea74d4910d2c59a5c9f8004dac2dc92", "9589b8a1df5b4c8e0e81784185e31265b0d096b7", "61d7b8785959a8578af9957488995b196a9ca507", "4433c9f0899699f17f06b5046de7a6886e5dd15c", "fc40ab440e106511f8978b7fc7d44f8dd281f730", "5c77de9e4d9d06226bf449126776de109b5bfceb", "4c9cec89a2c9c8173ee53ab4cda2c021421eb7a5", "43e195e8f39f3ed4b20109d9fc4ee1eab1f2a8d2", "60a1389c827f9f706c9dc1639e2584f0f3de878e", "7ea15c138cc72588fa376ff819f4bb8ca0b324da", "35d38d979d44fac88e154ea9531fc9ffc2c4514e", "3621342126b3751f117458c9859bc21898bafc9c", "13dcb87a54110b02cc4a8a7780962cb3981bc037", "9e4d8df4887b9810795a654ff9f1043aa51ceac9", "a3a305052359f121f2761565fd31669343921dcf" ], "paperAbstract": "High-end mobile GPUs are now becoming an integral part of mobile devices. However, a mobile GPU constitutes a major portion of power consumption on the devices, and mobile games top as the most popular class of graphics applications. This paper presents the design and implementation of RAVEN, a novel, on-the-fly frame rate scaling system for mobile gaming applications. RAVEN utilizes human visual perception of graphics change to opportunistically achieve power saving without degrading user experiences. The system develops a light-weight frame comparison technique to measure and predict perception-aware frame similarity. It also builds a low resolution virtual display which clones the device screen for performing similarity measurement at a low-power cost. It is able to work on an existing commercial smartphone and support applications from app stores without any modifications. It has been implemented on Nexus 5X, and its performance has been measured with 13 games. The system effectively reduces the overall power consumption of mobile devices while maintaining satisfactory user experiences. The power consumption is reduced by 21.78% on aver-age and up to 34.74%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3117811.3117841" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e15071ff464fccc683f2022e46834e16969386e7", "sources": [ "DBLP" ], "title": "RAVEN: Perception-aware Optimization of Power Consumption for Mobile Games", "venue": "MobiCom", "year": 2017 }, "e16224879ed38feae5729db436ad842300e1c485": { "authors": [ { "ids": [ "2134411" ], "name": "Lingguang Lei" }, { "ids": [ "1948282" ], "name": "Yi He" }, { "ids": [ "1761541" ], "name": "Kun Sun" }, { "ids": [ "1757984" ], "name": "Jiwu Jing" }, { "ids": [ "3352539" ], "name": "Yuewu Wang" }, { "ids": [ "29580875" ], "name": "Qi Li" }, { "ids": [ "1729395" ], "name": "Jian Weng" } ], "doi": "10.1145/3133956.3133975", "doiUrl": "https://doi.org/10.1145/3133956.3133975", "entities": [ "Android", "Credential", "Denial-of-service attack", "Download", "Information leakage", "Information sensitivity", "Login", "Play Store", "Static program analysis", "Virtual private network", "Web service" ], "id": "e16224879ed38feae5729db436ad842300e1c485", "inCitations": [], "journalName": "", "journalPages": "1051-1063", "journalVolume": "", "outCitations": [ "94d0e8b118efecb9f5b2056d84bf22253a2fb63c", "56a61eec079806d3eb32bb7d5c957aee3578083e", "03a613951421cf67237d5278d6bf3702a26da9aa", "3f1176bbedc54dcdb9c92cdc5797d1da9f13c802", "0bae04f86b5302345d18b99801829efc98c90874", "501b2aa2c55dedef322fffe84054c9c9678a61a4", "29898e452f80ba09357a2fb716c7b14d75eb3bd6", "45674ab89637524d50f4fd232c9c4983a91c9968", "0fa3eabd538d777556f3e87399959d05cefa1f69", "2d165ebfdab883e6f2a6f96b2e792aacc191467c", "335fa136d806115ef654ce2ae23e0e302cfa09f2", "6a11a3492244a71a8b87a14d3c42b4c02d63ed54", "17138b471f2dade960cd3969db0c08b623b33797", "708beb6b5638b4abc57082af2e58161699712323", "30bc245a8295dac571c58aec0b744e4bf217c287", "3343392fe056b45692252ad18278e10020ee3d8e", "738feaca3e125c6c6eae4c160b60fdedd12c89aa", "14490c37be179400c86cf89aac7c9272dddf60e7", "23fa7b866a1b1fee7bb71c8b5a9235cca7120bbc", "342fb5524fbd94ce2c566629d3cd693bf636cab0", "2166b458f5a5b7b6f5ebdc66c1d7381279f7a608", "048c6deac1c35d29dfdf2b75b15980c533f156e6", "71f8163801980fbaa494cb8c149bd7388034c2ba", "43f89337e570f36686acdda3dcd0b7885a963557", "1b12eb42a9e04af626c7ed266b2e299d7f6f96a3", "5a2ec85f87d518ff6b6f57aeac1c43bdb35729c8", "080f1f7a903ba3d77f0f21a3a89bd2db0d958e46", "8f1701f01fe27538ec8c009ee1497fa5f4fdc3ac", "32bd7b680830b3e168795ccfe650ceeb0edf7878", "3e69437c44bae0ff4421b5f408a925a340da26dd", "bb4bffbc7507b82adb0a5035e78b5639a7df1b56", "5aed9231774c7742431d79c22de749c79f7e56e2", "31478e07f1599d9f9adba8d598bcaa54455e9015", "3e9999a635934c47fb011ebc2f767526443ba3b6", "14bb333f4edb189ef62448799db90aef5e6785dd" ], "paperAbstract": "The services in Android applications can be invoked either explicitly or implicitly before Android 5.0. However, since the implicit service invocations suffer service hijacking attacks and thus lead to sensitive information leakage, they have been forbidden since Android 5.0. Thereafter since the Android system will simply throw an exception and crash the application that still invokes services implicitly, it was expected that application developers will be forced to convert the implicit service invocations to explicit ones by specifying the package name of the service to be called.\n In this paper, we revisit the service invocations by analyzing two sets of the same 1390 applications downloaded from Google Play Store before and after the the implicit service forbidden policy is enforced. We develop a static analysis framework called ISA to perform our study. Our analysis results show that the forbidden policy effectively reduces the number of vulnerable service invocations from 643 to 112, namely, 82.58% reduction. However, after a detailed analysis of the remaining 112 vulnerable invocations, we discover that the forbidden policy fails to resolve the service hijacking attacks. Among the 1390 applications downloaded in May 2017, we find 36 popular applications still vulnerable to service hijacking attacks, which can lead to the loss of user bank account and VPN login credentials, etc. Moreover, we find that the forbidden policy introduces a new type of denial of service attacks. Finally, we discuss the root challenges on resolving service hijacking attacks and propose countermeasures to help mitigate the service hijacking attacks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3133975" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e16224879ed38feae5729db436ad842300e1c485", "sources": [ "DBLP" ], "title": "Vulnerable Implicit Service: A Revisit", "venue": "CCS", "year": 2017 }, "e196bcb0dfea0854daa8714a377591e399fe858c": { "authors": [ { "ids": [ "38960812" ], "name": "Alexander Breuer" }, { "ids": [ "1722735" ], "name": "Alexander Heinecke" }, { "ids": [ "2305796" ], "name": "Yifeng Cui" } ], "doi": "10.1007/978-3-319-58667-0_3", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_3", "entities": [ "Discontinuous Galerkin method", "Galerkin method", "IBM WebSphere eXtreme Scale" ], "id": "e196bcb0dfea0854daa8714a377591e399fe858c", "inCitations": [ "3e78da59df3a1e72d2aa7d457d8fdfef6b6be9e4" ], "journalName": "", "journalPages": "41-60", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_3" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e196bcb0dfea0854daa8714a377591e399fe858c", "sources": [ "DBLP" ], "title": "EDGE: Extreme Scale Fused Seismic Simulations with the Discontinuous Galerkin Method", "venue": "ISC", "year": 2017 }, "e1bd4d45476525ff40268c8ef740dac3d9405098": { "authors": [ { "ids": [ "1886895" ], "name": "Philippe Clauss" }, { "ids": [ "19191072" ], "name": "Ervin Altintas" }, { "ids": [ "40157972" ], "name": "Matthieu Kuhn" } ], "doi": "10.1109/IPDPS.2017.34", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.34", "entities": [ "Data parallelism", "Iteration", "Iterator", "Linear function (calculus)", "Load balancing (computing)", "Loop optimization", "OpenMP", "Parallel algorithm", "Parallel computing", "Polyhedron", "Polynomial", "Program optimization", "Runtime system", "Scheduling (computing)", "X86" ], "id": "e1bd4d45476525ff40268c8ef740dac3d9405098", "inCitations": [ "56b58e9434c95a68c9486f8e0ddfd13f364e79aa" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "778-787", "journalVolume": "", "outCitations": [ "b7b7725104805e87b8656a8b456a9e1c5fd33f30", "59b474e992ac78ee9e3df3c6abc282eaea996ce0", "21b4e156771b53721dcc7311c9a820af9483161b", "1c5b15587e4034c97610b2017697ad1ea663a8fa", "56d002964786aea0fef64d6f1c81d96c22195070", "533d60a68d64caf88db21e25a3f16c9a4d0e4d92", "96b2efa9ea1ee6d071005c523ba69680eaf93da1", "936295d41f89aa0532b3bbda268b924e3dc9dc13", "33df2bddce39f19455280d9042894707b712b083", "524242205bed90261f9a70fe7c122e9226ede249", "0ecac51f093db517b11c0853fa761f2b36d17201", "4635754a1c407c58ade574cb7dcba7968626e324", "233cfa61d01fcf26effc64508250cb90396e8a78" ], "paperAbstract": "Loop collapsing is a well-known loop transformation which combines some loops that are perfectly nested into one single loop. It allows to take advantage of the whole amount of parallelism exhibited by the collapsed loops, and provides a perfect load balancing of iterations among the parallel threads. However, in the current implementations of this loop optimization, as the ones of the OpenMP language, automatic loop collapsing is limited to loops with constant loop bounds that define rectangular iteration spaces, although load imbalance is a particularly crucial issue with non-rectangular loops. The OpenMP language addresses load balance mostly through dynamic runtime scheduling of the parallel threads. Nevertheless, this runtime schedule introduces some unavoidable executiontime overhead, while preventing to exploit the entire parallelism of all the parallel loops. In this paper, we propose a technique to automatically collapse any perfectly nested loops defining non-rectangular iteration spaces, whose bounds are linear functions of the loop iterators. Such spaces may be triangular, tetrahedral, trapezoidal, rhomboidal or parallelepiped. Our solution is based on original mathematical results addressing the inversion of a multi-variate polynomial that defines a ranking of the integer points contained in a convex polyhedron. We show on a set of non-rectangular loop nests that our technique allows to generate parallel OpenMP codes that outperform the original parallel loop nests, parallelized either by using options “static” or “dynamic” of the OpenMPschedule clause.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.34" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e1bd4d45476525ff40268c8ef740dac3d9405098", "sources": [ "DBLP" ], "title": "Automatic Collapsing of Non-Rectangular Loops", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "e1d8247f85d632636c3bce3928c0a664c655385a": { "authors": [ { "ids": [ "2582970" ], "name": "Yajing Chen" }, { "ids": [ "3367877" ], "name": "Shengshuo Lu" }, { "ids": [ "1678428" ], "name": "Cheng Fu" }, { "ids": [ "1687117" ], "name": "David Blaauw" }, { "ids": [ "9298165" ], "name": "Ronald Dreslinski" }, { "ids": [ "1751516" ], "name": "Trevor N. Mudge" }, { "ids": [ "2325469" ], "name": "Hun-Seok Kim" } ], "doi": "10.1145/3079856.3080227", "doiUrl": "https://doi.org/10.1145/3079856.3080227", "entities": [ "32-bit", "8-bit", "Baseline (configuration management)", "Cryptography", "Error detection and correction", "Grammatical Framework", "Internet of things", "Irreducible polynomial", "Link distance", "Microarchitecture", "Parallel computing", "Polynomial", "Public-key cryptography", "SIMD", "Secure communication", "Speedup", "Uncompressed video" ], "id": "e1d8247f85d632636c3bce3928c0a664c655385a", "inCitations": [ "7cbd2d99742c3998123288a4ce0e7e5dd02d9b1b", "498b084f3e65c8f53983829b65e136c26f61d757" ], "journalName": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "journalPages": "55-68", "journalVolume": "", "outCitations": [ "1442dff91bf7a583cb776b024e5375f29b21b239", "f921161394f076eeee3d8e37c2742a50460ce0f0", "fc1b7d01dd7c93f4c298572dc5c178f80e33a3fc", "436982d362f4d788fabbb58f8efe938428b54945", "6ccb970de68fc2006bb2302dba8793a5016ac5d4", "68346abbb98c4bb8a7201c63c2a2cd4d441138d1", "295542f34196359b218a6acdd534858bfad3ad06", "00ba13af2660378b213f8ff734d6fab2ce6b0ec4", "43e5ee363d1be2a5c85f9e9d715192154f6dd8b2", "4b0119d3efea61e68aac12d73733e10b1597e07d", "45738e5249d7a7d2ce0f53c6ce6c8bf0d2a0f126", "7e52da57a7bbc16ef7bd2efb6fdc51b0b4c8ba37", "b5342f57d1c824bdb21c630a4dda49e9925c856e", "17a810b1dc1a103c9307d3b1ffa43ad334d6812e", "613032c25423e94cf2d787be7882c289f0be4573", "8af86b0aaa0a99c96d661ef10958e5579a43361a", "9eb52099e02206daa7500df85acc0c6a7b94bbb9", "9815a447e540220d244811f09ee5b35123e49576", "44cf97f4cc486dc040f99f1b39eb9459143a6d32", "835e870850ab0ddef6c57744c379a46b49354ba1", "00820f57912e820bd40dd24af3bededa2c7d773e", "6a1c7ea497cb6b520a65e37449787f1d7a1ddb49", "0265955b9565abd870ab9d874305a84ef397c743", "6d0ea468a737ef666d6a9da0ee1c2f1cff3f43b3", "2adf64c99546bf66fbd7fb069408d74f3fe1d7b2", "059bdf296170b030fa9cb3c80efd202472b2f350", "f6787f0088402d781f0c11574551590a2623dd43", "a911878cdfbb93e81a3d3490f41288182da94cf3", "ecb46f10b4363bf024d52242c995ac58edc8a137", "f0e166490adf296877562cf2d2465124768b60e6", "fc52220a8cf6228cf2c2d0a535e3b36af65373f7", "dcafa46185e904be469f777ecb5f7649c99c2c13", "fb6982ab0fda40c434511ac753c8bea04cfbd03b", "8a5c3f8cdf4ee92b4a137d898828d5bd69596ead", "d28bdb33e390bcc6b554d2ee4a60d609f14555e6", "c5a3175a4b13b6303c099d154b3624257aa8e344", "8648372a4c46b0f3fd3262d34f5d2a2c8d2c11df", "40a2f0613291067d178ac47b8ebb4f37ae5386cc", "1b2c83b4749604920053cd626c5476f859f16f1b", "4ee6af84b2953dd5e44cb15b8661b2b9cc8c425a", "2bdfd5325655ba43eac72e712009688010abaa53" ], "paperAbstract": "This paper investigates the feasibility of a unified processor architecture to enable error coding flexibility and secure communication in low power Internet of Things (IoT) wireless networks. Error coding flexibility for wireless communication allows IoT applications to exploit the large tradeoff space in data rate, link distance and energy-efficiency. As a solution, we present a light-weight Galois Field (GF) processor to enable energy-efficient block coding and symmetric/asymmetric cryptography kernel processing for a wide range of GF sizes (2m, m = 2, 3, ..., 233) and arbitrary irreducible polynomials. Program directed connections among primitive GF arithmetic units enable dynamically configured parallelism to efficiently perform either four-way SIMD 5- to 8-bit GF operations, including multiplicative inverse, or a wide bit-width (e.g., 32-bit) GF product in a single cycle. To illustrate our ideas, we synthesized our GF processor in a 28nm technology. Compared to a baseline software implementation optimized for a general purpose ARM M0+ processor, our processor exhibits a 5-20 x speedup for a range of error correction codes and symmetric/asymmetric cryptography applications. Additionally, our proposed GF processor consumes 431μW at 0.9V and 100MHz, and achieves 35.5pJ/b energy efficiency while executing AES operations at 12.2Mbps. We achieve this within an area of 0.01mm2.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080227" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e1d8247f85d632636c3bce3928c0a664c655385a", "sources": [ "DBLP" ], "title": "A programmable Galois Field processor for the Internet of Things", "venue": "2017 ACM/IEEE 44th Annual International Symposium on Computer Architecture (ISCA)", "year": 2017 }, "e29f3c7eb66f7f1da7c39cadd161189a131372db": { "authors": [ { "ids": [ "2691390" ], "name": "Simon Eberz" }, { "ids": [ "2165134" ], "name": "Nicola Paoletti" }, { "ids": [ "2258227" ], "name": "Marc Roeschlin" }, { "ids": [ "17537686" ], "name": "Andrea Patan\u00e9" }, { "ids": [ "1701316" ], "name": "Marta Z. Kwiatkowska" }, { "ids": [ "1697125" ], "name": "Ivan Martinovic" } ], "doi": "", "doiUrl": "", "entities": [ "Authentication", "Biometrics", "Code injection", "Denial-of-service attack", "Experiment", "Mathematical morphology", "Smartphone", "Sound card" ], "id": "e29f3c7eb66f7f1da7c39cadd161189a131372db", "inCitations": [ "cb1b5e8b35609e470ce519303915236b907b13b6", "12bae90ffde4509344b99f581d1c118ce02173bb", "856b6897fddc912be06b86fa18c8138abd6b8690", "3cb426789abda58f9d786b4255b633b63e801b27", "8e4808e71c9b9f852dc9558d7ef41566639137f3", "53695d45dd188f0040ef6cbd53662530ae0ee3c5" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "486020d807340c26d0b7155a6948383a01ffff43", "4eee3f80b4af8da12b69917f4325260d99d7fc9d", "47aacd947167ad07f367c8725750a1e87c320c26", "6ccee0299383da5e6ef08809dd0dd28b42ef224b", "5c3556d4bf94ba51cea58c5d624aed19e6223e59", "5367d509d76c2efb144a681efd442ddbf3b25f4a", "7f57e9939560562727344c1c987416285ef76cda", "492184989035226d1207801e3e5b9dff8b5f6427", "661a69178939755a87759b9b553f0cbb38c08841", "5d571263658144973b4f9bfa2f9ef0d0255fdf20", "8bba0a42134ea5df896cdde59e62bbd90e9f4dd4", "a035e9c454ae863112d984602238d592b15d3c48", "0d1567052ba67c72b4a6f5c9a1e775ebf16f30bb", "1feaaf07b177aefe72517f79abb32bf227a230f9", "c38c761f607ff0ad9fb2293ee594b33a9169a430", "1558b1d92f41eb01c49ba9c548a9a5adfb1aebae", "3b6d9eb9bc36446747467d54f6dad7ed8a0fc268", "71e42050840ffdbc1a56c2fed148db843a17b523", "fc15e41beae2123eafb8c7d0c4f9efc6184d4c85", "10136097932c1a6d1971a7a6a12aba8139308485" ], "paperAbstract": "In this work we present a systematic presentation attack against ECG biometrics. We demonstrate the attack\u2019s effectiveness using the Nymi Band, a wrist band that uses electrocardiography (ECG) as a biometric to authenticate the wearer. We instantiate the attack using a hardware-based Arbitrary Waveform Generator (AWG), an AWG software using a computer sound card, and the playback of ECG signals encoded as .wav files using an off-the-shelf audio player. In two sets of experiments we collect data from a total of 41 participants using a variety of ECG monitors, including a medical monitor, a smartphone-based mobile monitor and the Nymi Band itself. We use the first dataset to understand the statistical differences in biometric features that arise from using different measurement devices and modes. Such differences are addressed through the automated derivation of so-called mapping functions, whose purpose is to transform ECG signals from any device in order to resemble the morphology of the signals recorded with the Nymi Band. As part of our second dataset, we enroll users into the Nymi Band and test whether data from any of our sources can be used for a signal injection attack. Using data collected directly on the Nymi Band we achieve a success rate of 81%. When only using data gathered on other devices, this rate decreases to 43% when using raw data, and 62% after applying the mapping function. While we demonstrate the attack on the Nymi Band, we expect other ECG-based authentication systems to most likely suffer from the same, fundamental weaknesses.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/broken-hearted-how-attack-ecg-biometrics/", "http://qav.comlab.ox.ac.uk/papers/epr+17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e29f/3c7eb66f7f1da7c39cadd161189a131372db.pdf", "s2Url": "https://semanticscholar.org/paper/e29f3c7eb66f7f1da7c39cadd161189a131372db", "sources": [ "DBLP" ], "title": "Broken Hearted: How To Attack ECG Biometrics", "venue": "NDSS", "year": 2017 }, "e2d2acc248915386107bb9b79daf2db1388fa220": { "authors": [ { "ids": [ "33870528" ], "name": "Kun Kuang" }, { "ids": [ "1685435" ], "name": "Peng Cui" }, { "ids": [ "2485552" ], "name": "Bo Li" }, { "ids": [ "39037246" ], "name": "Meng Jiang" }, { "ids": [ "1689674" ], "name": "Shiqiang Yang" } ], "doi": "10.1145/3097983.3098032", "doiUrl": "https://doi.org/10.1145/3097983.3098032", "entities": [ "Algorithm", "Experiment", "Load balancing (computing)", "Online advertising", "Propensity score matching", "Synergy", "Synthetic data" ], "id": "e2d2acc248915386107bb9b79daf2db1388fa220", "inCitations": [], "journalName": "", "journalPages": "265-274", "journalVolume": "", "outCitations": [ "faf481fa68a00820555fd0411f35e066d0e6b90d", "a85acbe6ff39173031d877eaf79af3ca52bbc20f", "c6dd8503ca38a31d2e25cffc76639578804b0ab4", "0bd20000c635459d0539b91b4d445d624178e927", "e2f70e7b12f05b223919cac11664ec854a15d70d", "331987182af3b8d13a56bb6331237913d3207988", "99b8f95c06669ffa4176e68f4efe85f6deaebfed", "c3cd698cab8535315bb85024c2c4a29b6744c722", "cddbf8c6a22ea61f9b7c732af8e2b8518696800f", "bbb78efa3e0005e16e32566a79a58d1b63afe4f9", "1c630b52c42c35491d7954f3b09a686c35d40a08", "caecd983da760b4a0e783fb71cd123176b0c4726", "517d6e3999bd425069e45346045adcbd2d0c9299", "86ce004214845a1683d59b64c4363a067d342cac", "15b0089106ebcbe7c602d75cd1c3b798955ea6ee", "e46ecb1ed1bed969c76ebe2830715bdc3963afa8", "ff33e17663fd49d00362e83a7659b640c2e1e714", "9d6139e1444a280fd961ef46390dfe0d04caeb10", "8377420714fac0b90c6181682bb7ca0d04755c96", "03f2ec7642fd23ce55d065017c605c65627a5ab4" ], "paperAbstract": "Estimating treatment effect plays an important role on decision making in many fields, such as social marketing, healthcare, and public policy. The key challenge on estimating treatment effect in the wild observational studies is to handle confounding bias induced by imbalance of the confounder distributions between treated and control units. Traditional methods remove confounding bias by re-weighting units with supposedly accurate propensity score estimation under the unconfoundedness assumption. Controlling high-dimensional variables may make the unconfoundedness assumption more plausible, but poses new challenge on accurate propensity score estimation. One strand of recent literature seeks to directly optimize weights to balance confounder distributions, bypassing propensity score estimation. But existing balancing methods fail to do selection and differentiation among the pool of a large number of potential confounders, leading to possible underperformance in many high dimensional settings. In this paper, we propose a data-driven Differentiated Confounder Balancing (DCB) algorithm to jointly select confounders, differentiate weights of confounders and balance confounder distributions for treatment effect estimation in the wild high dimensional settings. The synergistic learning algorithm we proposed is more capable of reducing the confounding bias in many observational studies. To validate the effectiveness of our DCB algorithm, we conduct extensive experiments on both synthetic and real datasets. The experimental results clearly demonstrate that our DCB algorithm outperforms the state-of-the-art methods. We further show that the top features ranked by our algorithm generate accurate prediction of online advertising effect.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098032" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e2d2acc248915386107bb9b79daf2db1388fa220", "sources": [ "DBLP" ], "title": "Estimating Treatment Effect in the Wild via Differentiated Confounder Balancing", "venue": "KDD", "year": 2017 }, "e310520cc72c9b445de89f85df678efad27756f8": { "authors": [ { "ids": [ "2795604" ], "name": "Thejaka Amila Kanewala" }, { "ids": [ "1843069" ], "name": "Marcin Zalewski" }, { "ids": [ "2556809" ], "name": "Andrew Lumsdaine" } ], "doi": "10.1109/HiPC.2017.00016", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00016", "entities": [ "Algorithm", "Baseline (configuration management)", "Computation", "Computer vision", "Directed acyclic graph", "Distributed memory", "Graph drawing", "Independent set (graph theory)", "Information theory", "Linear algebra", "List of algorithms", "Maximal independent set", "Michael Luby", "Scheduling (computing)" ], "id": "e310520cc72c9b445de89f85df678efad27756f8", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "52-61", "journalVolume": "", "outCitations": [ "465c5a68c3c62f6fb949acfd7921a9072a29841b", "5bbe0426067cea3efcda5b1a54e0278836eb4eb4", "2a03dfef45709f0dc6f35c4aae9eb6e01035fb28", "e4a02cc0e9e158b22b6b0d69372285b2e4d19c69", "3153364f8255458ac808a800bc54989000caa94f", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "f2c4f4a13e38b906543f4a60278c9ab3641e9c02", "5a3cd8c65ffcc25bef346174d1f0bc3f83c5cbbb", "640d8b4328b87eba50c5aaf713dd511b44e4e3b0", "947c6bf534ccd620044f77c3bd6068f633b421fb", "3c375359b70cf8d96bd586e9cafcd42bd9ef8698", "37618df61fc94e8f375fde8ba222c9dc3d2f3947", "0481a2b112ade48a8b5eed438982e70d22ad9e62", "1f912ac1e1f8a9bfda1cf7664648a74b6559a407", "1b5b48e349c7a264a62a05cc1d654a8122f9133f", "524daf9f17f4083898d4f5019baf8264eafb08d9", "3536edb33e5a3756898be2d4cc8d4126d842f534", "88795e75f1504e81a32ab8daf236495e76f01f6f" ], "paperAbstract": "The maximal independent set (MIS) graph problem arises in many applications such as computer vision, information theory, molecular biology, and process scheduling. The growing scale of graph data suggests the use of distributed memory hardware as a cost-effective approach to providing necessary compute and memory resources. Existing distributed memory parallel MIS algorithms rely on synchronous communication and use techniques such as subgraph computations. In this paper, we present an asynchronous distributed-memory parallel graph algorithm that relies on a virtual directed acyclic graph (DAG) that is created during the algorithm execution. We introduce two additional algorithms that save computations by ordering generated work. The first algorithm applies ordering globally to reduce computations, and the second algorithm applies ordering locally at the level of threads to minimize the synchronization overhead. We use two different implementations of Luby's algorithm variants as baseline to compare the performance of the presented algorithms: (1) vertex-centric Luby A and Luby B implementations, and (2) the CombBLAS linear-algebra Luby A implementation. Results show that proposed algorithms outperform both implementations of Luby algorithms, especially in distributed execution. Furthermore, we show that for low- diameter graphs the algorithm that applies global ordering scales better than other algorithms and for high diameter graphs the original asynchronous algorithm and thread-level ordering algorithm show better performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00016" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e310520cc72c9b445de89f85df678efad27756f8", "sources": [ "DBLP" ], "title": "Parallel Asynchronous Distributed-Memory Maximal Independent Set Algorithm with Work Ordering", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "e3174b0258f983ce45c49f192f3ddb4502f3a4a1": { "authors": [ { "ids": [ "34564258" ], "name": "Yifei Yuan" }, { "ids": [ "1800137" ], "name": "Dong Lin" }, { "ids": [ "3096085" ], "name": "Ankit Mishra" }, { "ids": [ "3178911" ], "name": "Sajal Marwaha" }, { "ids": [ "1710176" ], "name": "Rajeev Alur" }, { "ids": [ "35206168" ], "name": "Boon Thau Loo" } ], "doi": "10.1145/3098822.3098830", "doiUrl": "https://doi.org/10.1145/3098822.3098830", "entities": [ "Compiler", "Finite-state machine", "High- and low-level", "Interaction", "Memory footprint", "Network traffic control", "Numerical analysis", "Pattern matching" ], "id": "e3174b0258f983ce45c49f192f3ddb4502f3a4a1", "inCitations": [ "88b46e17199bfaa4cf65498bcaeced5284279b97", "4059d74b7c3de3fa0ed5a22f55e5ac3c21f9975b", "4c27e01a4496a1af57cc8d757fc34e46cf17e5eb", "0fc010550a3f1a12d964e62d8bdf352933018837" ], "journalName": "", "journalPages": "99-112", "journalVolume": "", "outCitations": [ "24a963758371e511e3749c865b14f697358f025c", "35f97f2d9520b0fc16f2fcb2b850247df188cb6e", "34ed24e5addb3f19a0b69f78d66d2e7aa8ceda64", "88e5b8347e00e81475061182c60974930b6fb4e2", "1f0ea586a80833ee7b27ada93cc751449c4a3cdf", "050319a3aff4d911fca7e3ee63ffc1a99a0b1595", "59e742875c39d1e09cfe1be7501a4048efe343de", "247d6c0bd57493269f134109fbbb992845c62db8", "2e4ab1140b454fc6dacf4d23d3663aa34c741577", "06c9511a3a29f1afa3971b1885ad56b5a890dbdc", "11af8e2ac10831dd74ca3ed3a0118afceb86412f", "4616f5caa32b81723744d6d772a7617ca7d96e6e", "30a7bba8d47d7eca9f7826a721e62032a5c8e77a", "cd642576ce8502b533e229b537f9ffbe9254aef6", "0bad381b84f48b28abc1a98f05993c8eb5be747d", "405377ca200df3f7da390c37516fe13582e70776", "0f48c8c449b82647e98383d124578d95cc57e95d", "3d95c58c595130caa94cdc1fcdb0f9b9f12cb805", "5208060771fd213eefd827e3e1260b939f1aed6d", "947dbf2e58babb4dbe7c6359080152e299644372", "2baa50ceffb972260c877567a5dd513dc79fca21", "363d109c3f00026f9ef904dd8cc3c935ee463b65", "19ff9dac013d1ebca1ea1c9845325c9ddafdf93a", "0f318aa5af40450af9ba2f50872bdf26741e510a", "06beeda7be321eb0a294af55b7689d22d77a5b2b", "f4fdaaf864ca6f73ced06f937d3af978568998eb", "7e7b6249b598d9a4c63394e3a2efd008268ae851", "089b10645ee63cd9c5bb4ab661141dd813408e15" ], "paperAbstract": "In network management today, dynamic updates are required for traffic engineering and for timely response to security threats. Decisions for such updates are based on monitoring network traffic to compute numerical quantities based on a variety of network and application-level performance metrics. Today's state-of-the-art tools lack programming abstractions that capture application or session-layer semantics, and thus require network operators to specify and reason about complex state machines and interactions across layers. To address this limitation, we present the design and implementation of NetQRE, a high-level declarative toolkit that aims to simplify the specification and implementation of such quantitative network policies. NetQRE integrates regular-expression-like pattern matching at flow-level as well as application-level payloads with aggregation operations such as sum and average counts. We describe a compiler for NetQRE that automatically generates an efficient implementation with low memory footprint. Our evaluation results demonstrate that NetQRE allows natural specification of a wide range of quantitative network tasks ranging from detecting security attacks to enforcing application-layer network management policies. NetQRE results in high performance that is comparable with optimized manually-written low-level code and is significantly more efficient than alternative solutions, and can provide timely enforcement of network policies that require quantitative network monitoring.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-3-2-NetQRE.pdf", "http://doi.acm.org/10.1145/3098822.3098830" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e3174b0258f983ce45c49f192f3ddb4502f3a4a1", "sources": [ "DBLP" ], "title": "Quantitative Network Monitoring with NetQRE", "venue": "SIGCOMM", "year": 2017 }, "e36aac1e0662548a7ce305680b917a3737802466": { "authors": [ { "ids": [ "3129527" ], "name": "Xiaojun Xu" }, { "ids": [ "1692867" ], "name": "Chang Liu" }, { "ids": [ "2146469" ], "name": "Qian Feng" }, { "ids": [ "1975143" ], "name": "Heng Yin" }, { "ids": [ "1779453" ], "name": "Le Song" }, { "ids": [ "1723931" ], "name": "Dawn Xiaodong Song" } ], "doi": "10.1145/3133956.3134018", "doiUrl": "https://doi.org/10.1145/3133956.3134018", "entities": [ "Algorithm", "Approximation algorithm", "Artificial neural network", "Binary code", "Collision detection", "Computer security", "Control flow", "Control flow graph", "Deep learning", "Firmware", "Gemini Sound Products", "Graph embedding", "Malware", "Matching (graph theory)" ], "id": "e36aac1e0662548a7ce305680b917a3737802466", "inCitations": [], "journalName": "", "journalPages": "363-376", "journalVolume": "", "outCitations": [ "09490b8debd99f691c1a521d48e37a954c3ef722", "ffaa313b8da3695627cd9915ca46b8bed24a9f4a", "10e970f9747d98f79e2557ceba178dc4ca9ed754", "245d6368c36df54b225f5cb0975b349b2a6fa2fb", "d010eb099acb4ec86575dc518a2767b45715ed4a", "6ceede4549c28a42ac48f0e0f60b3c68ff3205d8", "9f5c802e44c1076c418f1bff7f266983fe1da577", "55b97032a03aeaca9fd3fdcb87baa789a1f968b6", "0acdd72b339882f7db483486a85b8f82a91e4510", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "39aadd7b9d2df7aff518ee5ce33fd16b45be8902", "8cd8298f1d91e92421c83d666669468fb9679840", "272216c1f097706721096669d85b2843c23fa77d", "7ee734cbd2ad85ab3b4d0b63ab80455cc7f6c0b0", "b00672fc5ff99434bf5347418a2d2762a3bb2639", "2059be0aa4a57d00d204c9ccdf4deeed2c984e07", "0fca9a022f4910dda7f8bdc92bbbe8a9c6e35303", "1c059493904b2244d2280b8b4c0c7d3ca115be73", "9d16c547d15a08091e68c86a99731b14366e3f0d", "0c7f1d285ce069b2f7a807a4b2750695098bffe6", "59c5017a06ea6402042b2f804452bcb1b447e67f", "3f59748d8dba7b928344d49dc8bf20e4329c1eb8", "c3a39721e079eb4baa3d286b738bf822007c20d7", "7e1569eebf13a4e906ce909a669e2a9ab1046124", "2aec4a2aa286a0093bf124482ed106f7e965ee8b", "479bf197a3a9c5f6f7b06e64eefa0a90fa0c8b41", "a8a73b74d24249d5d8c90dd8250a7bab34442d9f", "82bd162b04cef498dd2f4b6103c6e13107b7b782", "0cb4ed5d73b4885f05facfa6aee45bdcdec1847e", "6a92a251be77883b26a535c69ebbc872d0479edc", "c8d20715abce3bb53af665cbe20dd87bd20fbaef", "05357314fe2da7c2248b03d89b7ab9e358cbf01e", "0dc043122228447b6ea7218ae80aab01e21df140", "997dc5d9a058753f034422afe7bd0cc0b8ad808b", "5f2be15cdf6f5b461b9c61495eb496351d7fc91a", "46e78409aa7441262492277a0d3c63728621adf5", "0228d60b7a56a3d778e5425c41eaf72cf0b6ec55", "55b7d2e425c5e813ed51780845398ef5d246a4d3", "7e1874986cf6433fabf96fff93ef42b60bdc49f8", "158ebb18074ca6c40edfde16ee729b0970f003b5", "c271535aa3e9a5cc7839543667017cb32ec9b94c", "3efd851140aa28e95221b55fcc5659eea97b172d", "1a67622ca58aa851afe36ad6c6e78f9fb9d691d2" ], "paperAbstract": "The problem of cross-platform binary code similarity detection aims at detecting whether two binary functions coming from different platforms are similar or not. It has many security applications, including plagiarism detection, malware detection, vulnerability search, etc. Existing approaches rely on approximate graph-matching algorithms, which are inevitably slow and sometimes inaccurate, and hard to adapt to a new task. To address these issues, in this work, we propose a novel neural network-based approach to compute the embedding, i.e., a numeric vector, based on the control flow graph of each binary function, then the similarity detection can be done efficiently by measuring the distance between the embeddings for two functions. We implement a prototype called Gemini. Our extensive evaluation shows that Gemini outperforms the state-of-the-art approaches by large margins with respect to similarity detection accuracy. Further, Gemini can speed up prior art's embedding generation time by 3 to 4 orders of magnitude and reduce the required training time from more than 1 week down to 30 minutes to 10 hours. Our real world case studies demonstrate that Gemini can identify significantly more vulnerable firmware images than the state-of-the-art, i.e., Genius. Our research showcases a successful application of deep learning on computer security problems.", "pdfUrls": [ "http://iisp.gatech.edu/sites/default/files/images/neural_network-based_graph_embedding_for_cross-platform_binary_code_similarity_detection.pdf", "http://doi.acm.org/10.1145/3133956.3134018", "https://arxiv.org/pdf/1708.06525v1.pdf", "http://arxiv.org/abs/1708.06525", "http://www.cs.ucr.edu/~heng/pubs/gemini-ccs17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e36aac1e0662548a7ce305680b917a3737802466", "sources": [ "DBLP" ], "title": "Neural Network-based Graph Embedding for Cross-Platform Binary Code Similarity Detection", "venue": "CCS", "year": 2017 }, "e3f25f5e8c35c23ef2b5da335b046c4ca06f37df": { "authors": [ { "ids": [ "3197222" ], "name": "Hani Nemati" }, { "ids": [ "8677495" ], "name": "Suchakrapani Datt Sharma" }, { "ids": [ "2276568" ], "name": "Michel Dagenais" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Central processing unit", "Continuous integration", "Hypervisor", "OpenVMS", "Privacy", "Profiling (computer programming)", "Scalability", "Software deployment", "Virtual machine" ], "id": "e3f25f5e8c35c23ef2b5da335b046c4ca06f37df", "inCitations": [ "30b56233cd2a10db2f27d50b15a80911d3df9a2f" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "84-89", "journalVolume": "", "outCitations": [ "2aa01a458835656b968654991a8f67e5037f4aea", "dedfd560f76e07b64ce0b0b21b8e74ce86a08673", "ec9cdc9367b62ec5237ad9627273196d7f6f04c5", "35f7a771db093369fdf750eae8ef3cbd0b52dc01", "526ad0efab298489164587fa1ba3ba838bd38ee3", "1ecd36058e48734213c81728f42ff798a2c52833", "98b6b91d018cc4f31d77ec3a521b6043cf5ebaf2", "950909d3dbbec5be1de7fdb93bf3b4767d6afec5" ], "paperAbstract": "Nowadays, nested VMs are often being used to address compatibility issues, security concerns, software scaling and continuous integration scenarios. With the increased adoption of nested VMs, there is a need for newer techniques to troubleshoot any unexpected behavior. Because of privacy and security issues, ease of deployment and execution overhead, these investigation techniques should preferably limit their data collection in most cases to the physical host level, without internal access to the VMs. This paper introduces the Nested Virtual Machine Detection Algorithm (NDA) - a host hypervisor based analysis method which can investigate the performance of nested VMs. NDA can uncover the CPU overhead entailed by the host hypervisor and guest hypervisors, and compare it to the CPU usage of Nested VMs. We further developed several graphical views, for the TraceCompass trace visualization tool, to display the virtual CPUs of VMs and their corresponding nested VMs, along with their states. These approaches are based on host hypervisor tracing, which brings a lower overhead (around 1%) as compared to other approaches. Based on our analysis and the implemented graphical views, our techniques can quickly detect different problems and their root causes, such as unexpected delays inside nested VMs.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101123" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e3f25f5e8c35c23ef2b5da335b046c4ca06f37df", "sources": [ "DBLP" ], "title": "Fine-Grained Nested Virtual Machine Performance Analysis through First Level Hypervisor Tracing", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "e3fc67dfcf8e194f452fd734e4dfd99a53f2afeb": { "authors": [ { "ids": [ "5581754" ], "name": "Reza Yazdani" }, { "ids": [ "3084705" ], "name": "Jose-Maria Arnau" }, { "ids": [ "1747103" ], "name": "Antonio Gonz\u00e1lez" } ], "doi": "10.1145/3123939.3124542", "doiUrl": "https://doi.org/10.1145/3123939.3124542", "entities": [ "Acoustic model", "Central processing unit", "Computer data storage", "Finite-state machine", "Gigabyte", "Graphics processing unit", "Hardware acceleration", "Language model", "Megabyte", "Memory bandwidth", "Memory footprint", "Requirement", "Small form factor", "Speech recognition", "Transducer", "Viterbi algorithm", "Wearable technology" ], "id": "e3fc67dfcf8e194f452fd734e4dfd99a53f2afeb", "inCitations": [], "journalName": "", "journalPages": "69-81", "journalVolume": "", "outCitations": [ "5ed7b5ebb4048240d04cdac46df0481a9cf057c2", "7d060fc04306580d8693e1335caf4c37ad83357b", "7cb0713f62f23f9ef49bd681da9ac7870d059875", "15cb3031a0846d56d6da696f06a1586997415c87", "6182e4b5151aa27ceb75c94543e3f584c991e00f", "0fa553cfa0cf3cbdf7a913aa2ae789a757dfb32f", "1851d801a9aeab61b487a71687bcaff3a45318ea", "9b5814007c67fff8428585f6af72c785d796b17b", "7092441514518a7a651806ab19064435cf4968d1", "46d81d3472c1ec6c6d010f9c49d9881c121943bb", "0477c7869e914a4d99ed073652585bec254076f9", "0f415803e4b94cfbefa0bf118b04e9f7f79dcf36", "c00930140f49b543ae99bbdfa2bc977e60e91cdd", "f5669af64e39f5e8ec6f4d56108406071a2ed9be", "16cd50316e41cbb1d9dfeafeb524b31654cef37a", "85a7f4f0bd0ce26a6d51c1a53418dc8f53ab233a", "9f3cc03b1c9fc9c3e080e42a0ddd34cdd24a20fb", "baae4f08fa50ead7f13df8003ec6fa2db8d65106", "178631e0f0e624b1607c7a7a2507ed30d4e83a42", "21a275beb31bfa71d9884c993e161578f15caba9", "812c795ce4797b718a2947a9f9bdc5b6965c2b29", "f4697f6536ce6266ec6a49f52e3a7858158c1635", "352a8957005dc5519b15ed1870751ec494d66395", "0c7d7b4c546e38a4097a97bf1d16a60012916758", "5a016ac691ad786a82a04dc8f5688bbb0f1c1008", "1dec63e2a929bb3be57906bfef94f38e969cfbd9" ], "paperAbstract": "Accurate, real-time Automatic Speech Recognition (ASR) requires huge memory storage and computational power. The main bottleneck in state-of-the-art ASR systems is the Viterbi search on a Weighted Finite State Transducer (WFST). The WFST is a graph-based model created by composing an Acoustic Model (AM) and a Language Model (LM) offline. Offline composition simplifies the implementation of a speech recognizer as only one WFST has to be searched. However, the size of the composed WFST is huge, typically larger than a Gigabyte, resulting in a large memory footprint and memory bandwidth requirements.\n In this paper, we take a completely different approach and propose a hardware accelerator for speech recognition that composes the AM and LM graphs on-the-fly. In our ASR system, the fully-composed WFST is never generated in main memory. On the contrary, only the subset required for decoding each input speech fragment is dynamically generated from the AM and LM models. In addition to the direct benefits of this on-the-fly composition, the resulting approach is more amenable to further reduction in storage requirements through compression techniques.\n The resulting accelerator, called UNFOLD, performs the decoding in real-time using the compressed AM and LM models, and reduces the size of the datasets from more than one Gigabyte to less than 40 Megabytes, which can be very important in small form factor mobile and wearable devices.\n Besides, UNFOLD improves energy-efficiency by orders of magnitude with respect to CPUs and GPUs. Compared to a state-of-the-art Viterbi search accelerators, the proposed ASR system outperforms by providing 31x reduction in memory footprint and 28% energy savings on average.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3124542" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e3fc67dfcf8e194f452fd734e4dfd99a53f2afeb", "sources": [ "DBLP" ], "title": "UNFOLD: a memory-efficient speech recognizer using on-the-fly WFST composition", "venue": "MICRO", "year": 2017 }, "e42b0ce8acc0ebc8c1beec18cb10a30b52739873": { "authors": [ { "ids": [ "3426901" ], "name": "Samaneh Tajalizadehkhoob" }, { "ids": [ "2768298" ], "name": "Tom van Goethem" }, { "ids": [ "2540125" ], "name": "Maciej Korczynski" }, { "ids": [ "3094001" ], "name": "Arman Noroozian" }, { "ids": [ "1748910" ], "name": "Rainer B\u00f6hme" }, { "ids": [ "2601768" ], "name": "Tyler Moore" }, { "ids": [ "1752104" ], "name": "Wouter Joosen" }, { "ids": [ "9301670" ], "name": "Michel van Eeten" } ], "doi": "10.1145/3133956.3133971", "doiUrl": "https://doi.org/10.1145/3133956.3133971", "entities": [ "Application security", "Client-side", "Content Security Policy", "Exploratory factor analysis", "Exploratory testing", "Factor analysis", "Fixed effects model", "Generalized linear model", "Latent variable", "Malware", "Patch (computing)", "Phishing", "Shared web hosting service", "Solution stack", "Web application", "Web application security", "Webmaster", "World Wide Web" ], "id": "e42b0ce8acc0ebc8c1beec18cb10a30b52739873", "inCitations": [ "2272755fe95a1fc700e2a2f5998a3e7596d2f113" ], "journalName": "", "journalPages": "553-567", "journalVolume": "", "outCitations": [ "00e362100c39411f4559d6bd5ad13e4c4c582714", "b89c3f739e16ce76af8a1bcd8db509f403e1e5c3", "23017ad78de9debb8c37ec3309dc5cba9fcd12e5", "ac5b7e4a891699ea4882a55e2b91d5db919b5470", "c6b24743d3e29b2de9d146b03fdec3a18bdf6633", "03b254698d26c38f1d0ef6cad739e5d49dc31f9b", "192dc9e8618d00beb8451553d59dd391bcf53124", "729979881bc84e3c49c382ac93d3b7b61cdc529c", "0efc1c73dee7ad7ac2e4fdbfdc6e1fe46e1b1f9d", "201b0a185dda51629d7b6fdef3b380a0beaba455", "1ee96517103209cde82661f945d32195514020b5", "3e0d8e8a9b07d31b2c29138493334815422750ca", "b83b29ffa96e065ef6bf8a4bd0b1801d53afe006", "84fceec78ecd77d3134bd8839b3eb2ea1ad474a5", "0059ead43690fe62bb916d498491542b6e04a98a", "58d15aee016374a97f2d6ff00e2311f0d4c78873", "0ccd801e5a9c71cf41ed0236146d0c416202d13d", "567556797d7dd7160bedc384e1cc27573817b3b8", "1721632b420d8522b8581d44a8bdb8fa1ffaeb4a", "2afcaaf2b2f678ce8f5b8dee7d4bea408bef62a0", "49a8f9e8ed7dbd8382dbd30aa81321281cd54c07", "a17a7bc1570db0e50761383c121d4df31d9d5d2e", "752ac665cd4c55018e149cc5cd0fc3fdad2b9103", "197f0b31f4088c7a7301e4e3079b43be2eae3dc3", "07df08bab68dfca7b53ede26d265f8dbfca841e1", "2aa0e44b8529de8ee75138eade8aba0bfb9f008f", "17eceec10a0f5f3a3b2ce99309009bfb2e9ef389", "44869a0ef8ed2e584e7c2b24806e79da3339fff6", "ab865b5fb5336324fccedc4ed94959a1230abccf", "3b532950ded354ff3d657f8061aec210e9059da7", "3ba231119f9033e02034ed4a5f80ce7e2f482c61", "6e5e07b2161ba22cef8b91e0693046e7ca2f5cd6", "d78d9b34d8928c6b57dc172b05c71482bc7faf23", "54b265ac7182b33b38044939ad505c0fda8020c1", "2a79728d43895a7e16907092099446852da77310", "c38ff647b9fc57eee17980221bacd040f1668bf5", "64205427d0f900997ec0a22fdd4946a3ba16f1b9", "353bc95ea6d720d867489e3d4cf5c9427531c7c7", "4b35752ee0bcdefc745611e9942998b4c092277a" ], "paperAbstract": "Hosting providers play a key role in fighting web compromise, but their ability to prevent abuse is constrained by the security practices of their own customers. Shared hosting, offers a unique perspective since customers operate under restricted privileges and providers retain more control over configurations. We present the first empirical analysis of the distribution of web security features and software patching practices in shared hosting providers, the influence of providers on these security practices, and their impact on web compromise rates. We construct provider-level features on the global market for shared hosting -- containing 1,259 providers -- by gathering indicators from 442,684 domains. Exploratory factor analysis of 15 indicators identifies four main latent factors that capture security efforts: content security, webmaster security, web infrastructure security and web application security. We confirm, via a fixed-effect regression model, that providers exert significant influence over the latter two factors, which are both related to the software stack in their hosting environment. Finally, by means of GLM regression analysis of these factors on phishing and malware abuse, we show that the four security and software patching factors explain between 10% and 19% of the variance in abuse at providers, after controlling for size. For web-application security for instance, we found that when a provider moves from the bottom 10% to the best-performing 10%, it would experience 4 times fewer phishing incidents. We show that providers have influence over patch levels--even higher in the stack, where CMSes can run as client-side software--and that this influence is tied to a substantial reduction in abuse levels.", "pdfUrls": [ "https://tylermoore.utulsa.edu/ccs17.pdf", "http://arxiv.org/abs/1708.06693", "http://doi.acm.org/10.1145/3133956.3133971", "https://arxiv.org/pdf/1708.06693v1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e42b0ce8acc0ebc8c1beec18cb10a30b52739873", "sources": [ "DBLP" ], "title": "Herding Vulnerable Cats: A Statistical Approach to Disentangle Joint Responsibility for Web Security in Shared Hosting", "venue": "CCS", "year": 2017 }, "e4a38133a6344454677fb8ae1073b087d90acf2f": { "authors": [ { "ids": [ "3031441" ], "name": "George Giakkoupis" }, { "ids": [ "1769929" ], "name": "Philipp Woelfel" } ], "doi": "10.1145/3087801.3087837", "doiUrl": "https://doi.org/10.1145/3087801.3087837", "entities": [ "Amortized analysis", "Mutual exclusion" ], "id": "e4a38133a6344454677fb8ae1073b087d90acf2f", "inCitations": [], "journalName": "", "journalPages": "221-229", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087837" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e4a38133a6344454677fb8ae1073b087d90acf2f", "sources": [ "DBLP" ], "title": "Randomized Abortable Mutual Exclusion with Constant Amortized RMR Complexity on the CC Model", "venue": "PODC", "year": 2017 }, "e4c8c2480bdcce2abf52120d6ef552ee6bebff76": { "authors": [ { "ids": [ "3006735" ], "name": "Reza Mokhtari" }, { "ids": [ "1696433" ], "name": "Michael Stumm" } ], "doi": "10.1109/IPDPS.2017.122", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.122", "entities": [ "Attribute\u2013value pair", "Big data", "Central processing unit", "Computation", "Gigabyte", "Graphics processing unit", "Hash table", "High memory", "Kinetic Void", "Memory bandwidth", "Model of computation", "Parallel computing", "Performance Evaluation", "Speedup", "Thread (computing)", "Throughput" ], "id": "e4c8c2480bdcce2abf52120d6ef552ee6bebff76", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "866-875", "journalVolume": "", "outCitations": [ "b1cbfd6c1e7f8a77e6c1e6db6cd0625e3bd785ef", "25f855c968af75e4617f25c71aee3cedec1dedaf", "0cd87f8454774bf494bf62a58c137ca9b848d0b4", "2a02b8b4e62a62411d9684626508395040415e08", "62daa7691a92dad30589863b2590e6479a2474d3", "6479c756e597c38e57aa45e2eae8550fd738418b", "1087bbef784e7daecaf13b58bc1480d6dee4929b", "43bfc1771b421e0c2525a7513c60327713f4cf82", "b70296efb830d11ea047484998a8b52c20cf836b", "4d67da8eb142b60cfd644db4fd5bc99c6e59c116", "f17fb68a4c6b93167dddc8290bc76f5f90d326ab", "894ac5179446836f468c315cfa16ba843cbdb2aa", "5cdf290c839ba8753876bf255ed8c99fb4ba1299" ], "paperAbstract": "The massive parallelism and high memory bandwidth of GPU's are particularly well matched with the exigencies of Big Data analytics applications, for which many independent computations and high data throughput are prevalent. These applications often produce (intermediary or final) results in the form of key-value (KV) pairs, and hash tables are particularly well-suited for storing these KV pairs in memory. How such hash tables are implemented on GPUs, however, has a large impact on performance. Unfortunately, all hash table solutions designed for GPUs to date have limitations that prevent acceleration for Big Data analytics applications. In this paper, we present the design and implementation of a GPU-based hash table for efficiently storing the KV pairs of Big Data analytics applications. The hash table is able to grow beyond the size of available GPU memory without excessive performance penalties. Central to our hash table design is the SEPO model of computation, where the processing of individual tasks is selectively postponed when processing is expected to be inefficient. A performance evaluation on seven GPU-based Big Data analytics applications, each processing several Gigabytes of input data, shows that our hash table allows the applications to achieve, on average, a speedup of 3.5 over their CPU-based multi-threaded implementations. This gain is realized despite having hash tables that grow up to four times larger than the size of available GPU memory.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.122" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e4c8c2480bdcce2abf52120d6ef552ee6bebff76", "sources": [ "DBLP" ], "title": "The SEPO Model of Computation to Enable Larger-Than-Memory Hash Tables for GPU-Accelerated Big Data Analytics", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "e4e6a2b30cc6bece4d3c75532c77a6c5edc7fc79": { "authors": [ { "ids": [ "30409075" ], "name": "Yectli A. Huerta" }, { "ids": [ "40558126" ], "name": "Brent Swartz" }, { "ids": [ "1749830" ], "name": "David J. Lilja" } ], "doi": "10.1109/IISWC.2017.8167766", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167766", "entities": [ "Benchmark (computing)", "Coefficient", "Design of experiments", "Experiment", "Heterogeneous computing", "Linear model", "Linpack benchmarks" ], "id": "e4e6a2b30cc6bece4d3c75532c77a6c5edc7fc79", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "118-119", "journalVolume": "", "outCitations": [ "2436e4c6ff64e9353fd5a13f79ee4a651b0f1041" ], "paperAbstract": "In a closely coupled heterogeneous computing system the work is shared amongst all available computing resources. One challenge is to find an optimal division of work between the two or more very different kinds of processing units, each with their own optimal settings. We show that through the use of statistical techniques, a systematic search of the parameter space can be conducted. These techniques can be applied to variables that are categorical or continuous in nature and do not rely on the standard assumptions of linear models, mainly that the response variable can be described as a linear combination of the regression coefficients. Our search technique, when applied to the HPL benchmark, resulted in a performance gain of 14.5% over previously reported results.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167766" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e4e6a2b30cc6bece4d3c75532c77a6c5edc7fc79", "sources": [ "DBLP" ], "title": "Determining work partitioning on closely coupled heterogeneous computing systems using statistical design of experiments", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "e53f0135ec71672c78a58a8916f5b0bbcd6ec4e1": { "authors": [ { "ids": [ "33383407" ], "name": "Sudarsun Kannan" }, { "ids": [ "1691551" ], "name": "Ada Gavrilovska" }, { "ids": [ "1710803" ], "name": "Vishal Gupta" }, { "ids": [ "1718471" ], "name": "Karsten Schwan" } ], "doi": "10.1145/3079856.3080245", "doiUrl": "https://doi.org/10.1145/3079856.3080245", "entities": [ "Data center", "Heterogeneous System Architecture", "Hypervisor", "Memory management", "Operating system", "Server (computing)", "Virtual Machine Manager", "Virtual private server" ], "id": "e53f0135ec71672c78a58a8916f5b0bbcd6ec4e1", "inCitations": [ "88824f4400bf03caed2f99879e68f3543b214c92", "e40c66c3888cb24b925057e722b0281b5695a2ee", "9ab024dcc0f32c623e20c0344385e8d7d3c18150" ], "journalName": "", "journalPages": "521-534", "journalVolume": "", "outCitations": [ "b0cd27efc4c73578e7fbabebfca173e00ac73574", "4002b5a203c6ed8ba82ef916f6ec94f1bb1d38aa", "4d15f18a3be26bbd0e73a67550d36a95ca96c651", "69652a30c1badf6a4c21006f3ec8da3de976cbaf", "4c689148ee5e9d6d116f6babbfab21bf2116802e", "85d555f7ce19740b4fc656ff797623c6e1513018", "1b62e2f7ac7a8d3c13773dfa23d05f8ab82b1f23", "49b73fc335cb97c0b52b283ce236a4d4eb2b99c9", "5f7c6e456216a2741702ddc2e18cd7f740d5a962", "d9043a6c844905687ac72054d83d7680a82ece9d", "40c5050e470fa0890e85487e4679197e07a91c09", "69b8c7f168fdb610471c473b9c2f20daaffe052c", "158ebe313a72857c5534a313f3ec0e413593b732", "f3325ace129dec914966f9894d9f412e5e04bdc2", "484e521bcac4953b4d0ab982a7ab28e514c146e4", "242b5b545bb17879a73161134bc84d5ba3e3cf35", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "3ac6671a0c61544b9dab543b116eccdaccc6469e", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "e4aed18e1b965f90a86b57a787c52af44a8c20a4", "7fc1dddbc4b958d416e8f666737fc6a163eae2b4", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "2fee80acb6f7b4172622e0f40d350339ca4e3dc9", "823116269044ab4c713373c66c7da3fcb495b459", "983399a958ca0e6b26886e441ab5c4ddba836fc5", "36de396ee9d1c9991e44c01be35e5206d79c3328", "a6cc2def07a1880a81003449e0f0f901da597b18", "08b6730b10503a0cf1ba95307e5e2a45b0111cf1", "19554445f1f3ea7b54be06a74a0d0840ade02be5", "2988e34168fa91398fa397baf823af2063893e9c", "24dc8d1de7e78ab100d2d83cbdf1390ddb9234c9", "31c299532c42106b71e909c2fc0fc7472c39ce90", "6f93e0325e577f49f4bed46a2adcfee4a649dc83", "1154b2fd6fb913b02eb6f64f5287a6b75a506e64", "32dc6016338a2098147e5edbb72c7c5670f78133", "24724ad8962a9e04eb496fddaefe9708f6960601", "2fc8a439f0f73462f875870c403c90ea1fb99e41", "c113513903d01df2c8aa59662f7149c3199ce59b", "3bc9781415a954b2b2c0565ee8c6ef5abe560bfa", "fc6f007082a7d15d6745687358f7e21e2ec9c57e", "5cc3780286a223c5b27df8e21b20f0c8f538f571", "0204f40221260d00c5ee63646560a40dcd7d97d1", "89b11dc5ec54d088be960e305aa442ff565fbfd9", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "cba130014e6cc590a09aaeca0590623b496f126b", "98ab001452b8392bb0d0b2677cfb91281bad7708", "33e64874996ac6d163e4e5a97e28b617de7cc0f5" ], "paperAbstract": "Heterogeneous memory management combined with server virtualization in datacenters is expected to increase the software and OS management complexity. State-of-the-art solutions rely exclusively on the hypervisor (VMM) for expensive page hotness tracking and migrations, limiting the benefits from heterogeneity. To address this, we design HeteroOS, a novel application-transparent OS-level solution for managing memory heterogeneity in virtualized system. The HeteroOS design first makes the guest-OSes heterogeneity-aware and then extracts rich OS-level information about applications' memory usage to place data in the 'right' memory avoiding page migrations. When such pro-active placements are not possible, HeteroOS combines the power of the guest-OSes' information about applications with the VMM's hardware control to track for hotness and migrate only performance-critical pages. Finally, HeteroOS also designs an efficient heterogeneous memory sharing across multiple guest-VMs. Evaluation of HeteroOS with memory, storage, and network-intensive datacenter applications shows up to 2x performance improvement compared to the state-of-the-art VMM-exclusive approach.", "pdfUrls": [ "http://doi.acm.org/10.1145/3079856.3080245" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e53f0135ec71672c78a58a8916f5b0bbcd6ec4e1", "sources": [ "DBLP" ], "title": "HeteroOS: OS Design for Heterogeneous Memory Management in Datacenter", "venue": "ISCA", "year": 2017 }, "e559576b8ba5e9ce8e9ce5d4512bda14546e1af1": { "authors": [ { "ids": [ "31888223" ], "name": "Jaemin Yoo" }, { "ids": [ "32184052" ], "name": "Saehan Jo" }, { "ids": [ "1734930" ], "name": "U. Kang" } ], "doi": "10.1109/ICDM.2017.69", "doiUrl": "https://doi.org/10.1109/ICDM.2017.69", "entities": [ "Algorithm", "Belief propagation", "Casio Loopy", "Experiment", "Graph (discrete mathematics)", "Heuristic", "Local binary patterns", "Malware", "Running with Rifles", "SBP", "Scalability", "Supervised learning" ], "id": "e559576b8ba5e9ce8e9ce5d4512bda14546e1af1", "inCitations": [ "d08549351dce047f8d423bffd3b9a7d656dd37a6" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "595-604", "journalVolume": "", "outCitations": [ "87ed0f639e6d56b2c536570406c2fe0afd9c3665", "17bac2d9acd386118ac4ff41107f0608cce96150", "3baecc04e1341cbae7999e8f61a3946c76504828", "0147e9aa138fc31ec60263944a1dbad2fa7d73ec", "573a9593e2a4d5085fb0752188b73e5440bd06b6", "36b61567d354ddf281d5e0a97dbdc6c2329d3116", "39348c10c90be968357e2a6b65d5e0e479307735", "5f4c05ba08fac9cde40235ebd4eb9abc6ed2d712", "08ae384c2c68333419f76bcb5f14dc2ba2ef8d33", "7fd78d448539d94e250aa9c08fe6a2a031f44f3f", "0210e82542d73d4c8aa4c2ae26869c4618ce6ca0", "9f92972807f1e486a3fae1b75dbc0ab95966807a", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "45d84d67870fc5de3b0747d05d9971d750604e7e", "c5bb186099cc2b54dbc8fec0ac13e4227a3c31a6", "ed618cb03388592219020d3726517d9610188302", "8034bff4fe646e003148d78c129385d7f8a42eb6", "873c4035268fb41f36555cd9f4b90423c174dae9", "851b81706cead83e6b06ec9f5c0b971145795719", "5be61c9e2acddd58539858b68fd71226da3e06d3", "551558995669a43113c85b638a89adb5775cc2ce", "75a6bdbeaf768e40a0e3dff8d589af7746e68f72", "5b660f6fb6b1277a5c8a311a7e688234cde909d9", "1d03f61e87d54dea4fd507208d5a15ce39dcb88f", "2e14f4ab54715b96b235762f1b8c6de47f1e9e76", "04f39720b9b20f8ab990228ae3fe4f473e750fe3", "29efbdf3f95cee97405accafdebd3bd374f1f003", "eaec9000ef12593e4489a711eed5f042d46d5ced", "b2aa234a76dc47255e0ab53f752e6944bb8fa309" ], "paperAbstract": "Given an undirected network where some of the nodes are labeled, how can we classify the unlabeled nodes with high accuracy? Loopy Belief Propagation (LBP) is an inference algorithm widely used for this purpose with various applications including fraud detection, malware detection, web classification, and recommendation. However, previous methods based on LBP have problems in modeling complex structures of attributed networks because they manually and heuristically select the most important parameter, the propagation strength. In this paper, we propose Supervised Belief Propagation (SBP), a scalable and novel inference algorithm which automatically learns the optimal propagation strength by supervised learning. SBP is generally applicable to attributed networks including weighted and signed networks. Through extensive experiments, we demonstrate that SBP generalizes previous LBP-based methods and outperforms previous LBP and RWR based methods in real-world networks.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.69" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e559576b8ba5e9ce8e9ce5d4512bda14546e1af1", "sources": [ "DBLP" ], "title": "Supervised Belief Propagation: Scalable Supervised Inference on Attributed Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "e57bf28b32346e3e97da0bab925b5c831dcb3b30": { "authors": [ { "ids": [ "3361009" ], "name": "Zheguang Zhao" }, { "ids": [ "34627565" ], "name": "Lorenzo De Stefani" }, { "ids": [ "1945956" ], "name": "Emanuel Zgraggen" }, { "ids": [ "2691974" ], "name": "Carsten Binnig" }, { "ids": [ "1735099" ], "name": "Eli Upfal" }, { "ids": [ "1746961" ], "name": "Tim Kraska" } ], "doi": "10.1145/3035918.3064019", "doiUrl": "https://doi.org/10.1145/3035918.3064019", "entities": [ "Experiment", "Interaction", "Synthetic data" ], "id": "e57bf28b32346e3e97da0bab925b5c831dcb3b30", "inCitations": [ "88cb7136cfcd87d9b93ec039359d2fbab158f3de", "4119c775ac55f9400d5f2e183d5dd1723bb9d0f0", "a979f050861b341c3bf4eb7be12d5ee0d87b44d0", "7f75690619d23375011b658e67994284fa030477", "a6613fc6e0f8dd9a538ea901db2317b4ce3644bb", "ad8c152b67cdd6ad302414f1fa89824f1b92638b" ], "journalName": "", "journalPages": "527-540", "journalVolume": "", "outCitations": [ "7f0a29a89655d7998efc7bb53e695b3b950bf7fd", "72d61c7a81aed8f4aeacdf7db2d4536b4684483f", "5be592ae66953d1c96ed54f1e0036c5647c00d50", "460f2594a20ed190b9b3c1d1bcd47b08a2315433", "3ff09a021ebed8dfd23b6faeba04d901625e5fa6", "5becff7d8db7907df2b29b3e9a9c3b8cafe2caf7", "3fca4ed6ded83281a9b6ed46a0a4e6a3b1a23164", "1bbb160a886b61113f3ce494af055d1568e30594", "6920a43dc904b8e7d3868c9f906b10aaf3fd72b3", "9512f52337338232e1ed692748d9f069398ca545", "b4266adb19b472422dc722c53c76b8ce6336781e", "cccbc3da776de497ca9d0dde2d4a76dc6c1b0fc4", "4c815e21c909211d7c047a2437938b24217e0a22", "9a94e1981c4ed1429ac66e244dc0c042e31097ca", "156e7730b8ba8a08ec97eb6c2eaaf2124ed0ce6e", "dc39c68a00e38f2993b450eb01c96e1d032ab850", "131a422dcad1ba3c89fc56f4b5255aed32c65d2e", "e03fcae87b049671f979106c215bd985475e3f7e", "6bbe1d686263fee388c16ee6bb84b8f378e38e47", "27db63ab642d9c27601a9311d65b63e2d2d26744", "0546c441944e76b2ebf1da5736264cd7b27890db", "402f68ac3e61bea7b15c82d7290a57c44f4b9afa", "2ab43be33b606176d6cd40dd2993fe4d28ff2313" ], "paperAbstract": "Recent tools for interactive data exploration significantly increase the chance that users make false discoveries. They allow users to (visually) examine many hypotheses and make inference with simple interactions, and thus incur the issue commonly known in statistics as the \"multiple hypothesis testing error.\" In this work, we propose a solution to integrate the control of multiple hypothesis testing into interactive data exploration systems. A key insight is that existing methods for controlling the false discovery rate (such as FDR) are not directly applicable to interactive data exploration. We therefore discuss a set of new control procedures that are better suited for this task and integrate them in our system, QUDE. Via extensive experiments on both real-world and synthetic data sets we demonstrate how QUDE can help experts and novice users alike to efficiently control false discoveries.", "pdfUrls": [ "https://zheguang.github.io/research/risk-sigmod.pdf", "http://arxiv.org/abs/1612.01040", "https://arxiv.org/pdf/1612.01040v1.pdf", "http://doi.acm.org/10.1145/3035918.3064019" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e57bf28b32346e3e97da0bab925b5c831dcb3b30", "sources": [ "DBLP" ], "title": "Controlling False Discoveries During Interactive Data Exploration", "venue": "SIGMOD Conference", "year": 2017 }, "e69faf198285d2596ce26de2b1d242afa2b567b1": { "authors": [ { "ids": [ "2228059" ], "name": "Md. Vasimuddin" }, { "ids": [ "1740375" ], "name": "Srinivas Aluru" } ], "doi": "10.1109/HiPC.2017.00015", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00015", "entities": [ "Algorithm", "Bayesian network", "Central processing unit", "Computation", "Dynamic Bayesian network", "Dynamic programming", "Gene regulatory network", "Heuristic", "InfiniBand", "NP (complexity)", "Parallel algorithm", "Parallel computing", "Parallel programming model", "Programming model", "Speedup" ], "id": "e69faf198285d2596ce26de2b1d242afa2b567b1", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "42-51", "journalVolume": "", "outCitations": [ "e7acc4d625f9594d862b44e651b9be49581e2ead", "075fce8ff12053d2a61f16b40d1459942b7fd056", "ab41e8f7451f3f0afd99f6b3c974d445c4f1e173", "5c3bcb4f6ed14e94143375da9684931bc5b88032", "34e8dd9cfe652e25bd38aaf59e6147af02f2335b", "adcbcb725490d64d12b4f795e1e381ca6b8de4b4", "31dfbc7d45463f74ea88653ca88b8c91b383396a", "3f2e18cf585e5bcfcd1156437faf592d54d713d1", "3a19613b19af47305a73d35830773bef47e606ed", "4b704a990c2b89328969e74e1c40337183a8ba8b", "1dfb8b4fc86a7d8cb72012388360800a693849db", "2de3062f199c62e5a2a2810fa1c2ed914ffdeb3c", "69c837d14f383a9adce580e3a5dbdcf0436c4ded", "445d99689ca7b00e60815ae9502e876b26b9a722", "18d98ffdc51d47b0c5a647c14fa0f0df869027b0", "102b10b419541ba2390814076f2d1b6450b3f678", "9edea4ec7c927df2c4c3b025a3c62de8532b6df7", "4f20828f487b093898c989f1043cb0d81b583d5e", "24e1594bcdf7fdef2a62f1176eab3fcc1b500dee", "507c9b4917638d77b0bad5df47edff92f2ff229a", "03fb875d5022a5e98f19c271e2403232acc55318", "2d2b9f7a7a858b1e38b39412bc06f0f00cea1c89" ], "paperAbstract": "Learning the structure of Bayesian networks, even in the static case, is NP-hard, compelling much of the research to focus on heuristic-based approaches. However, there are instances where exact solutions are desirable especially for small network sizes. In this work, we present a dynamic programming based exact solution to learn dynamic Bayesian network structure. Our method simultaneously learns intra- as well as higher order inter-time-slice interactions in the network. For n variables, our exact solution requires O(n^2.2^n(M+1)) computations to learn M-th order network. To handle such high computational requirements, we present a parallel exact solution to push the limit on the size of the networks that can be learned. Given p=2^k processors, the parallel algorithm runs in O(n^2.2^nM.(2^n-k + k)) time and achieves optimal parallel efficiency when 2^n-k > k. Using MPI+X parallel programming model, the parallel algorithm linearly scales to 1,024 cores of a 64-node Intel Xeon InfiniBand cluster, sustaining >99% of parallel efficiency. We also show that the learned networks on gene network datasets are of high fidelity compared to heuristic-based techniques.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00015" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e69faf198285d2596ce26de2b1d242afa2b567b1", "sources": [ "DBLP" ], "title": "Parallel Exact Dynamic Bayesian Network Structure Learning with Application to Gene Networks", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "e6b2c40048dad9da7c6cb698a329f3ff89805813": { "authors": [ { "ids": [ "2397343" ], "name": "Rehana Begam" }, { "ids": [ "1699978" ], "name": "Wei Wang" }, { "ids": [ "1721468" ], "name": "Dakai Zhu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.65", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.65", "entities": [ "Cloud computing", "Distributed computing", "OpenVMS", "Provisioning", "Virtual machine" ], "id": "e6b2c40048dad9da7c6cb698a329f3ff89805813", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "498-505", "journalVolume": "", "outCitations": [ "08934f43ef8c80d98230728a4aef5eea9c9f4c59", "99c84ea73b851f28e5d0072c0ada970175f9cef7", "e9d5f8a9b842ee728b410ca344da63b734d5ad0c", "36aa289e05cb0c030e2c9fc82466188891260741", "3e15a085c6fccc1f1d05f104eca7332baa5889be", "72bc89493930bc88304dbcc4f1fd28e6d283c166", "30d2f7a405af45797b071ebcabfc6cc5e57d8c8f", "b742594fdf9b257cc2dd790e7c36090f2fd8a56f", "5193600c5c9ebb7c6dc78ce08c11bc04c426fb4a", "8d8f60c7fa52e458f08e51e1d55849ef16589703", "5694149d05d91af20aade694d82fc0127182f060", "29861f641995b1d2177681ba5470bc49fdd2a549", "0144adb0abd8765fb7f7872637c04dddca9db904", "4d22a82681bd58e959ed2f3544bba7495701b7f2", "92f04da6a5cb2965736b6bf0802f22b4d200b453", "3110cb61c0b2c2fb3be8f05cf8922e80fb1899b9", "5df53a74c903ea5d578f4f1d86d106a0c7c96403", "1cff7e796a4bf0c22765acfefc15b17091367243", "96fc12cd2859b8bbcd5a34dc500d2a2966eee86a", "a02fafbc54e27d5dfe0e20f3505802ff1b1fc419", "b104cad3dc82e29cbc3da88de2e9cc7242633ed0", "813dec41febdb27c23b98e8a3a7e63db29f1e73b", "b4d8013c787b250e8d1018aa26366b94aaa43e68", "c83d0f154c407ad0a94385a2b664ee2378a50e40", "1d84ed02bc65400393723c37a70cc68fe39fbef4", "57c4236411c0a1ca3cb0c605d86a40f396252da8", "8260d12f5212a9a654486d86e5467d8a9e9a5bb5", "aa5ae099220737abdd2bf6cf82861b61ba2e440d", "9e1d29afa2c4e19c3a9e734ba5d8b6b4c242ab05", "3bf64462fc3558ab7e9329d084a1af4cf0c87ebf" ], "paperAbstract": "Recently, several studies have considered applications with a single time constraint (i.e., deadline) running on cloud systems. In this work, to effectively support user requests with flexible timing constraints (e.g., users may prefer expedited services and are willing to pay extra for getting their job processed at earlier times), we consider applications with multiple deadlines for being processed in resource-constrained cloud systems and investigate corresponding virtual machine (VM) provisioning schemes. Specifically, by considering the multiple deadline-bid pairs of user requests, we propose a Slope-based Time-Sensitive Resource Factor with Dominant Resource being considered to prioritize such requests. In addition, we study the mapping schemes that allocate multiple VMs of a user request to only one or multiple computing nodes, which are denoted as Bundled and Distributed mappings, respectively. The evaluation results show that, compared to the single deadline schemes, the proposed VM provisioning schemes that consider multiple deadlines and distributed mapping can significantly improve the achieved system benefit and resource utilization.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e6b2c40048dad9da7c6cb698a329f3ff89805813", "sources": [ "DBLP" ], "title": "Virtual Machine Provisioning for Applications with Multiple Deadlines in Resource-Constrained Clouds", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "e6bc3f22eab08e3624b6384831bd66d37575533c": { "authors": [ { "ids": [ "1793299" ], "name": "Suman Roychoudhury" }, { "ids": [ "1778382" ], "name": "Sagar Sunkle" }, { "ids": [ "1797699" ], "name": "Deepali Kholkar" }, { "ids": [ "1798720" ], "name": "Vinay Kulkarni" } ], "doi": "10.1145/3136014.3136018", "doiUrl": "https://doi.org/10.1145/3136014.3136018", "entities": [ "Domain-specific language", "Governance, risk management, and compliance" ], "id": "e6bc3f22eab08e3624b6384831bd66d37575533c", "inCitations": [], "journalName": "", "journalPages": "175-181", "journalVolume": "", "outCitations": [ "06c5289976cb07eba78d13197799a13150e55b15", "98a6b93b0ffe3ad143349ce45347ec6832981b12", "47f6e54c6fdbd8c16aa85526dbd4d8be8fc8d31c", "3fbe1be19fdd853963366be5533f26a2726176e4", "4d44ee59a991e9b645c03c01facd8e605f11f65d", "06ba50c664993a89763870a6a4a9055230297bff", "852118bbeb4aa6950cadd47975e7cc3d7011958e", "25a5df764d0e3cfb3a2d484cae405db1870eaf74", "574085fae2b1b9337b70781803221d129b8ae82a", "fc00215d0c5cddf7e4f491e9e7446d58742e6ab3", "29b84f61b12fbfc0dcb3e9655ca83a4c911d6937", "6be01c76649084ec29a18408dda208d011cc8f1b", "1d000a1570a52239fa5198b50db761f4163a044d" ], "paperAbstract": "Modern enterprises operate in an unprecedented regulatory environment where increasing regulation and heavy penalties on non-compliance have placed regulatory compliance among the topmost concerns of enterprises worldwide. Previous research in the field of compliance has established that the manual specification of the regulations used by GRC frameworks not only fails to ensure their proper coverage but also negatively affects the turnaround time both in proving and maintaining the compliance. Our key contribution in this paper is an implementation of a controlled natural English like (domain-specific) language that can be used by domain experts to specify regulations for automated compliance checking. We demonstrate this language using examples from industry regulations in banking and financial services domain.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136018" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e6bc3f22eab08e3624b6384831bd66d37575533c", "sources": [ "DBLP" ], "title": "A domain-specific controlled English language for automated regulatory compliance (industrial paper)", "venue": "SLE", "year": 2017 }, "e6ecab41033296323e34ef4af99806b1e2611c99": { "authors": [ { "ids": [ "23546490" ], "name": "Milinda Fernando" }, { "ids": [ "2474639" ], "name": "Dmitry Duplyakin" }, { "ids": [ "2666202" ], "name": "Hari Sundar" } ], "doi": "10.1145/3078597.3078610", "doiUrl": "https://doi.org/10.1145/3078597.3078610", "entities": [ "Adaptive mesh refinement", "Algorithm", "Computation", "Load balancing (computing)", "Scalability", "Space-filling curve", "Titan (supercomputer)" ], "id": "e6ecab41033296323e34ef4af99806b1e2611c99", "inCitations": [], "journalName": "", "journalPages": "231-242", "journalVolume": "", "outCitations": [ "e80402ac18fc2d59a9a518d1fdeeae9767ba2971", "ff71759a3efa271670c1e7820873df872b4ca3b9", "5cfeda94aaa59702e57647045de1488b8258abef", "8d950ed304e110c6594ec96e19137d1b968d76f0", "65b7c62555d2c3f5763651bff19ec6bf040ef5db", "354c301f19ea36c654d4d8644dec9c29bef648fc", "10777b463156ec55b4068fe9ab63aa69b54c09af", "3a978c028dc1943ecf3eecb4f1448917a237cfd8", "82c6540dbfefa440f8125176d0faddb434c91e97", "949f0d4ea6d730f29aa11d42c061f3ddbd68888d", "429d28998216da5648f40248bf4bc9e508edd2fd", "77eee519041bcb7281d8bc544ae69b8af386b6cc", "601c2301b199eee04870f5ab28b85b0617928ced", "5e556b8ece6c339e1420ed154d8d1e4563d220d3", "18597fc947b6fa6b2dcc2cc5539a9b7fb724e9df", "282f9082cd3a4565dbc5c4507c092ed18244c512", "c04ff62fd8366fa57fb9a039a52e590470066f43", "e03c406dadfbe85bcb11c8ad8baf12f5dc8c20a3", "5c0d56404b4e21d0e485c2e08abda2d12ae7b953", "e41597d140377b10bf166e148083ae0ccd8e0e66", "29d485fc57f246bbbb3ef2e9adacd199ef494d6c", "e8f58cb70b0ec8a3a14a7e0447df54dd8ff41d30", "4060cf1d57b51f3862e6d121dc97cc04751408aa", "f4a91972bf1a05b195bce06a24dc33960bff1151", "77e9cab2f965b970669052b634794eb19f377541", "566e63917526cc083b103985f96cf0c65ce7a4a5", "3fea44f0ba7481db071a24a0766b42ea274a9e31", "63ea74539eb963d0be2672dcb5c29a52987c4b2b", "af9edf9892e4d46e2cd96f8a8ad90647eccaf68d", "445c01f4ecdf8559d150bb1aa656e800306e04a3", "196fea0893bbaa9230d0d003e0b98ffe2d8d4e54" ], "paperAbstract": "Load balancing and partitioning are critical when it comes to parallel computations. Popular partitioning strategies based on space filling curves focus on equally dividing work. The partitions produced are independent of the architecture or the application. Given the ever-increasing relative cost of data movement and increasing heterogeneity of our architectures, it is no longer sufficient to only consider an equal partitioning of work. Minimizing communication costs are equally if not more important. Our hypothesis is that an unequal partitioning that minimizes communication costs significantly can scale and perform better than conventional equal-work partitioning schemes. This tradeoff is dependent on the architecture as well as the application. We validate our hypothesis in the context of a finite-element computation utilizing adaptive mesh-refinement. Our central contribution is a new partitioning scheme that minimizes the overall runtime of subsequent computations by performing architecture and application-aware non-uniform work assignment in order to decrease time to solution, primarily by minimizing data-movement. We evaluate our algorithm by comparing it against standard space-filling curve based partitioning algorithms and observing time-to-solution as well as energy-to-solution for solving Finite Element computations on adaptively refined meshes. We demonstrate excellent scalability of our new partition algorithm up to $262,144$ cores on ORNL's Titan and demonstrate that the proposed partitioning scheme reduces overall energy as well as time-to-solution for application codes by up to 22.0%", "pdfUrls": [ "http://doi.acm.org/10.1145/3078597.3078610" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e6ecab41033296323e34ef4af99806b1e2611c99", "sources": [ "DBLP" ], "title": "Machine and Application Aware Partitioning for Adaptive Mesh Refinement Applications", "venue": "HPDC", "year": 2017 }, "e70fed8b08a298cdc7d8bf0c928c3e1bc617987b": { "authors": [ { "ids": [ "3247453" ], "name": "Nathan R. Tallent" }, { "ids": [ "1715527" ], "name": "Darren J. Kerbyson" }, { "ids": [ "1753153" ], "name": "Adolfy Hoisie" } ], "doi": "10.1145/3126908.3126962", "doiUrl": "https://doi.org/10.1145/3126908.3126962", "entities": [ "Algorithm", "Critical path method", "High-resolution scheme", "Kilobyte", "Message Passing Interface", "Path analysis (statistics)", "Requirement", "Scalability" ], "id": "e70fed8b08a298cdc7d8bf0c928c3e1bc617987b", "inCitations": [], "journalName": "", "journalPages": "34:1-34:12", "journalVolume": "", "outCitations": [ "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "7490d3671e0f43464c963114de434945a1ae8dc7", "1824405dd805a02dccc85ce4341abfe0799282a8", "18d761a67cd3720fc4115d6c3fe6e8a1f85088c1", "aae636bd99bc4bae4cd4afcfa4621ef573a55c26", "40644088967afffe00eeb8ca3039ea6bc865d3aa", "8c778c35886b39f53edb9e42c4fbe29b6a92e5a9", "18cc11c8a41e01a27be358b119f02f7a2d8bd889", "1eac8c7fb82607a6d20187cfb29b3f9a02d578c2", "5f704543abb6c216a3a400f598b73052a34d5f4f", "05941c054493f241523dc6545c825ee38df3959a", "fc9837533e588664bef8e66cc695ea5c2b7e916c", "0aa48caa248c27ed8905a0123cd1c29ff0dc4968", "7425b4bf809bdbf5ec3a579a81843f1d2c18dd2b", "d7e4c24cc9d98b52ccf6635c1666d31bec4d77b8", "43054be153cccd589fa417751431d71e1b4d4a19", "471dbd456e37e413da3d169f8bf96d151bed60ca", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "7a4a6c94aa2edb834ef19bd4568a4e84673fd8d6", "48f59efbc20fef3e571b9fb81d039bfa9619a9bf", "04ce0bd4df15e05d376cad98de8b9a83380341d3", "69cd319c93692acc0822eeec743706515c693471", "1a879f8e77d853aa2df7ad6c35f04b98c5165041", "411714a6890392f4a1794dd862917f04af84df2e", "bf33756bc551581e0f7a1d43e148b305ff2296d7", "b9b29a8e950aa9a1931e81f262e6b88138858bac", "e9fa7e93d2f4e958cde16c6cf9d5bc966f8e7ffc", "620bb971f3b71e8cc146e5a3748f090aaa03e839" ], "paperAbstract": "Representative paths analysis generalizes and improves MPI critical path analysis. To improve diagnostic insight, we sample the distribution of program path costs and retain k representative paths. We describe scalable algorithms to collect representative paths and path profiles. To collect full paths efficiently, we introduce path pruning that reduces permanent space requirements from a trace (proportional to ranks and MPI events) to path length (the minimum). To make space requirements independent of ranks and events --- even a small constant in practice --- we profile program paths. Avoiding the limitations of prior path profiling approaches, we dynamically discover tasks and attribute costs in high resolution. We evaluate our algorithms on seven applications scaled up to 7000 MPI ranks. Full program paths use as little as 0.01% the permanent space of current methods; profiles require a nearly constant 100--1000 KB. Execution overhead is under 5% when synchronization intervals are sufficiently large (a few milliseconds).", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126962" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e70fed8b08a298cdc7d8bf0c928c3e1bc617987b", "sources": [ "DBLP" ], "title": "Representative paths analysis", "venue": "SC", "year": 2017 }, "e7a4c9088cfc7a5ab552728a280aadbb237e54bf": { "authors": [ { "ids": [ "2546148" ], "name": "Odorico Machado Mendizabal" }, { "ids": [ "34993374" ], "name": "Ruda S. T. De Moura" }, { "ids": [ "3039851" ], "name": "Fernando Lu\u00eds Dotti" }, { "ids": [ "1739265" ], "name": "Fernando Pedone" } ], "doi": "10.1109/IPDPS.2017.29", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.29", "entities": [ "Data structure", "Dependence analysis", "Fault tolerance", "Finite-state machine", "High availability", "Multi-core processor", "Scheduling (computing)", "State machine replication", "Strong consistency", "Throughput", "Web application" ], "id": "e7a4c9088cfc7a5ab552728a280aadbb237e54bf", "inCitations": [ "32d185cff26027792ce16194f97a899fd045b0d6" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "748-757", "journalVolume": "", "outCitations": [ "1d99b7749a9311d2db24a3d84728e444eff23e4b", "0a8d3569e9d3359ab9612e1e55775524242e5532", "b77bb6ff9a7018fa4f24893a38c27ac6efbfd4e1", "036ebe81fc7bd9000c3edda83fa30bee03fedc1a", "01e9ddf1062f9a7d7847bb9bcd2371ce6e0d3e29", "3a043714354fe498752b45e4cf429dbae0fb2558", "d12d1289d2384c2ce642f01855637b9f0519e189", "18a5f443299784479e78d9e77f175af57cb2fa2b", "f3018e7589af851341e6b40affb12d0ebdfa7db1", "5f3f9223c5c9f896be099bc177929febad508407", "738c71d77bf3041e4f051a87b1f314738a05a4d3", "60d1301ceaf38d5188c3a3b1b421a0eb77f81433", "42142c121b2dbe48d55e81c2ce198a5639645030", "0e6f25ca2e9dbcca8a630ac5924470aafa3fbcac", "8a0af8ae748210ef571d074362b552af571e6d33", "00c181b8b64e824fbe0172339f1e4560b557fab5", "05a618847e4f08e5bca29dff732757779722b2e0", "a1c704b281e939d343219edffbc84b379ab8a571", "00f7b192212078fc8afcbe504cc8caf57d8f73b5", "4af63ed343df388b6353b6fc77c7137d27822bf4", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "06d8aa948ed0ff654f772439c00711dfe7fa3d1a" ], "paperAbstract": "Many services used in large scale web applications should be able to tolerate faults without impacting their performance. State machine replication is a well-known approach to implementing fault-tolerant services, providing high availability and strong consistency. To boost the performance of state machine replication, recent proposals have introduced parallel execution of commands. In parallel state machine replication, incoming commands may or may not depend on other commands that are waiting for execution. Although dependent commands must be processed in the same relative order at every replica to avoid inconsistencies, independent commands can be executed in parallel and benefit from multi-core architectures. Since many application workloads are mostly composed of independent commands, these parallel models promise high throughput without sacrificing strong consistency. The efficient execution of commands in such environments, however, requires effective scheduling strategies. Existing approaches rely on dependency tracking based on pairwise comparison between commands, which introduces scheduling contention. In this paper, we propose a new and highly efficient scheduler for parallel state machine replication. Our scheduler considers batches of commands, instead of commands individually. Moreover, each batch of commands is augmented with a compact data structure that encodes commands information needed to the dependency analysis. We show, by means of experimental evaluation, that our technique outperforms schedulers for parallel state machine replication by a fairly large margin.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.29", "http://www.inf.usi.ch/faculty/pedone/Paper/2017/2017IPDPS.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e7a4c9088cfc7a5ab552728a280aadbb237e54bf", "sources": [ "DBLP" ], "title": "Efficient and Deterministic Scheduling for Parallel State Machine Replication", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "e809122f4988c49e9edc2fbc66755fc659ff3f36": { "authors": [ { "ids": [ "3430303" ], "name": "Nailah Alhassoun" }, { "ids": [ "2333370" ], "name": "Md. Yusuf Sarwar Uddin" }, { "ids": [ "1732742" ], "name": "Nalini Venkatasubramanian" } ], "doi": "10.1109/IGCC.2017.8323585", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323585", "entities": [ "Algorithm", "Gradient", "Greater Than", "Mission critical", "Power (Psychology)", "Quorum sensing", "Simulation", "algorithm" ], "id": "e809122f4988c49e9edc2fbc66755fc659ff3f36", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [], "paperAbstract": "Perpetual awareness systems are sensing systems characterized by continuous monitoring and ubiquitous sensing; they are essential to many safety and mission-critical applications, e.g. assisted living, healthcare and public safety. In this paper, we present SAFER, a perpetual heterogeneous IoT system; deployed in homes to detect critical events (injury, hazardous-environment) that must trigger immediate action and response. A key challenge here is the energy consumption associated with perpetual operations. We propose a novel energy-aware perpetual home IoT system where battery-operated and wall-powered IoT devices co-execute to ensure safety of occupants. We use a semantic approach that extracts activities-of-daily-living from device data to drive energy-optimized sensor activations. To validate our approach, we developed an elderly fall detection system using multi-personal and in-situ sensing devices. Using initial measurements to drive larger simulations, we show that our Cost-Function-Gradient algorithm can achieve greater than 4X reductions in energy dissipation without loss of sensing accuracy.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323585" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e809122f4988c49e9edc2fbc66755fc659ff3f36", "sources": [ "DBLP" ], "title": "SAFER: An IoT-based perpetual safe community awareness and alerting network", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "e80ccbd999f8f20ef8f06619e99073c6fca995b8": { "authors": [ { "ids": [ "2326844" ], "name": "Bingchao Li" }, { "ids": [ "35900806" ], "name": "Jizhou Sun" }, { "ids": [ "1789661" ], "name": "Murali Annavaram" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" } ], "doi": "10.1109/IPDPS.2017.81", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.81", "entities": [ "CPU cache", "Cache (computing)", "Computer data storage", "Graphics processing unit", "Shared memory" ], "id": "e80ccbd999f8f20ef8f06619e99073c6fca995b8", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "82-91", "journalVolume": "", "outCitations": [ "1087bbef784e7daecaf13b58bc1480d6dee4929b", "ed0190758c03aea4e8f3c10f05851543ade1aea9", "3364bc50921a9566d61ef8cb73baa82341725e4b", "14d98ecba21e404f80daf024a03effe259cf9b88", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "7132859e2843f7adb82ec89daf0eb2bdb1da590b", "c07ebd47e86f0ece88b28c57d79ed7544f5a30f0", "559d122ef5c04a872812f8621df8f181e527b8bb", "1a850fbc5d86a91d882eec88290425fbdff57cf6", "03d832219a7cf933db0ef1f686fec730c09acd55", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "2d6f002477015469075954c6748a1a85af352c94", "a1e4f4ae16c5a18896fe1718acfe56a26aeca620", "712c92e77a2fd2cf4efec2bd2b3daa4ac7d16283", "540a65f5e2176c4000551f1335a24e0f07500f68", "28cc7453c5f3f9ecb9415e631b0829ec9af8a4c3", "2dc38b527e91f8cfee6f6c7ba4d079087c293471", "0ee3a956a67b0d679bf485d60e75abdbdb5d50e7" ], "paperAbstract": "GPUs provide high-bandwidth/low-latency on-chip shared memory and L1 cache to efficiently service a large number of concurrent memory requests (to contiguous memory space). To support warp-wide accesses to L1 cache, GPU L1 cache lines are very wide. However, such L1 cache architecture cannot always be efficiently utilized when applications generate many memory requests with irregular access patterns especially due to branch and memory divergences. In this paper, we propose Elastic-Cache that can efficiently support both fine- and coarse-grained L1 cache-line management for applications with both regular and irregular memory access patterns. Specifically, it can store 32- or 64-byte words in non-contiguous memory space to a single 128-byte cache line. Furthermore, it neither requires an extra tag storage structure nor reduces the capacity of L1 cache since it stores auxiliary tags for fine-grained L1 cache-line managements in sharedmemory space that is not fully used in many applications. Our experiment shows that Elastic-Cache improves the geo-mean performance of applications with irregular memory access patterns by 58% without degrading performance of applications with regular memory access patterns.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.81" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e80ccbd999f8f20ef8f06619e99073c6fca995b8", "sources": [ "DBLP" ], "title": "Elastic-Cache: GPU Cache Architecture for Efficient Fine- and Coarse-Grained Cache-Line Management", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "e8270345bca1a72a1f8f0c71642d3f43ff97fbb4": { "authors": [ { "ids": [ "2624785" ], "name": "Nicolas Gast" } ], "doi": "10.1145/3084454", "doiUrl": "https://doi.org/10.1145/3084454", "entities": [ "Approximation", "Choice modelling", "Dynamical system", "Experiment", "Mean squared error", "Numerical analysis", "Perturbation theory", "Perturbation theory (quantum mechanics)", "Power of two", "Programming paradigm", "Quantum field theory", "Rate of convergence", "Stationary process", "Stochastic process" ], "id": "e8270345bca1a72a1f8f0c71642d3f43ff97fbb4", "inCitations": [ "3816d1545c75850b6bfa56b3f7ce588f76c9ee6c", "b8fb092352449a4fda8386ff0e79ce2e0154d451" ], "journalName": "POMACS", "journalPages": "17:1-17:26", "journalVolume": "1", "outCitations": [ "2d42a2b470dfc9d1ee542e7b5cfead1b171dbdd7", "ad73deea37cad9a9b945d929a86d82d781450345", "129c85f86576bab5f7dd52c54a138f6d8696d940", "51832de6efb63f50edb3a2ecc2e2b6f2d6fd8f53", "0a671fc71f8e6e66ded15370e62f849ce1469489", "81b8d02a7ff7354a33a172c163ae4da7389ddeee", "ceb89a69f5271b732f9de244c2ff0e7d52b987b1", "a1ea9a706d9123274537b09ddd34fb73da7f958b", "0d5a77a52118f67438fd07c4eebaabcded7b6fd8", "1c7949a494b5530e6abf83bf99ce2d8c7c17e38c", "4414619c00ef5886ed3845d04a5a37f97946cfb0", "23813613faf43869860de967ab26395f21f68b6b", "61d3de9ef68f4878d1a3ba48e278d067c3834d58", "234e6be0d4238f76b3ac038ee422be39f391c625", "9ef384d453d2f69e168ca10b86e8083a42eceb8b", "211990380338971da189de4e308167f35d60f854" ], "paperAbstract": "Mean-field approximation is a powerful tool to study large-scale stochastic systems such as data-centers -- one example being the famous power of two-choice paradigm. It is shown in the literature that under quite general conditions, the empirical measure of a system of N interacting objects converges at rate O(1√N) to a deterministic dynamical system, called its mean-field approximation.\n In this paper, we revisit the accuracy of mean-field approximation by focusing on expected values. We show that, under almost the same general conditions, the expectation of any performance functional converges at rate O(1/N) to its mean-field approximation. Our result applies for finite and infinite-dimensional mean-field models. We also develop a new perturbation theory argument that shows that the result holds for the stationary regime if the dynamical system is asymptotically exponentially stable. We provide numerical experiments that demonstrate that this rate of convergence is tight and that illustrate the necessity of our conditions. As an example, we apply our result to the classical two-choice model. By combining our theory with numerical experiments, we claim that, as the load rho goes to 1, the average queue length of a two-choice system with N servers is log2 1/(1--ρ) + 1/(2N(1-ρ) +O(1/N2).", "pdfUrls": [ "http://doi.acm.org/10.1145/3084454" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e8270345bca1a72a1f8f0c71642d3f43ff97fbb4", "sources": [ "DBLP" ], "title": "Expected Values Estimated via Mean-Field Approximation are 1/N-Accurate", "venue": "POMACS", "year": 2017 }, "e84e857c48f4c0e4438441610b08ff665d912009": { "authors": [ { "ids": [ "34972007" ], "name": "Maruf Ahmed" }, { "ids": [ "9392149" ], "name": "Albert Y. Zomaya" } ], "doi": "10.1109/CLUSTER.2017.20", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.20", "entities": [ "Artificial neural network", "Context switch", "Critical system", "Experiment", "Hypervisor", "Interrupt", "Memory management", "Mutual exclusion", "Performance prediction", "Run time (program lifecycle phase)", "Semiconductor consolidation", "Server (computing)", "Virtual machine" ], "id": "e84e857c48f4c0e4438441610b08ff665d912009", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "557-562", "journalVolume": "", "outCitations": [ "35e7b16b618ca6bf63446372afb2a0ca071f2f13", "58fc1c39da88a8a90e22c7b07fd2339b1a716ee4", "332f77fd05703c1607e3b57884ad31fb1fad0104", "dbcdb4c402756b2b5ac910b9eb17ddb412290d16", "32a7c5e10a09b5532d56af50ef2f60d9776cc56a", "2461db22ab3ebe9eaf8e23b42ab2449bbf06d721", "4b7b45aa74d84f5b86ef3d8bc8bf460602e97d38", "0df1f6a4bd337bda593f0a9dc120841ce933a1dc", "1f612acad3f98f4f63a01e6d8632e50a0aef4257", "3af5d2164fdbcbb47f64044e62445ed5dec0c245", "0558c94a094158ecd64f0d5014d3d9668054fb97", "51e878ed0979919041030f871f6e34531ca39750", "3c4ae51452823afafabe8d33d51218d1d95c2795", "141099ae8ea1e8c76d30e3a97df389de6a07890c", "5fe4eb1749a823469950456a123c77530e33ad73", "48ea9605ac31a13b5b9fc81b5311b51384fdb3e1", "0541d5338adc48276b3b8cd3a141d799e2d40150", "477b56dd761d802805dc984afb39363a51579975", "110c050c6c992d2b956f7b47d717810ac5c91bdc", "01dca7c7612aa71e5da87087c97a8dfffe94d43b", "14b8cbbaf08d1f00145f83c0270833a03e434af3", "0de0c3240bda7972bd0a3c8369ebc4b4f2e4f9c2", "c7bb9a172b477f180ed330619f2fab3d1d54a9ea", "aeb982a2bf63181e9f440e22bd015afd143ce9cb" ], "paperAbstract": "Virtual machine (VM) consolidation is necessary for increasing the server utilization; however, it also leads to VM performance degradation. This work presents a method to predict the consolidated VMs performance from the critical system events data. Experiments are designed to demonstrate the effect of system events like interrupts, page faults, mutex operations, and context switching on the consolidated VMs. Results show that the host server counters are not reliable for such predictions. On the other hand, the coupling of the task execution time with the VM system events is an effective way to predict the consolidation performance. Results further show that the VM memory allocation plays an important part in the consolidated tasks performance. The system event data is also used to train an Artificial Neural Network (ANN) for performance prediction on three hypervisors; ESXi, Xen, and XenServer and similar results are observed in all three.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.20" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e84e857c48f4c0e4438441610b08ff665d912009", "sources": [ "DBLP" ], "title": "The Effect of Resource Allocation and System Events on VM Consolidation", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "e8734b7dcc6b7dfc926ce2e30a1b5d15659d63ea": { "authors": [ { "ids": [ "3108759" ], "name": "Marcos Amaris" }, { "ids": [ "20329824" ], "name": "Giorgio Lucarelli" }, { "ids": [ "23198960" ], "name": "Cl\u00e9ment Mommessin" }, { "ids": [ "1733901" ], "name": "Denis Trystram" } ], "doi": "10.1007/978-3-319-64203-1_16", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_16", "entities": [ "Multi-core processor" ], "id": "e8734b7dcc6b7dfc926ce2e30a1b5d15659d63ea", "inCitations": [ "0d3e9810fb91d66484750c9b85834381b3be7313" ], "journalName": "", "journalPages": "220-231", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_16" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e8734b7dcc6b7dfc926ce2e30a1b5d15659d63ea", "sources": [ "DBLP" ], "title": "Generic Algorithms for Scheduling Applications on Hybrid Multi-core Machines", "venue": "Euro-Par", "year": 2017 }, "e8b3e7fd85d32ef67a6159f58794bbe5b748e2fc": { "authors": [ { "ids": [ "1880394" ], "name": "Neha Sengupta" }, { "ids": [ "2318532" ], "name": "Michael Hamann" }, { "ids": [ "1679611" ], "name": "Dorothea Wagner" } ], "doi": "10.1109/ICDM.2017.51", "doiUrl": "https://doi.org/10.1109/ICDM.2017.51", "entities": [ "Algorithm", "Benchmark (computing)", "Graph property", "Responsiveness" ], "id": "e8b3e7fd85d32ef67a6159f58794bbe5b748e2fc", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "415-424", "journalVolume": "", "outCitations": [ "3e656e08d2b8d1bf84db56090f4053316b01c10f", "09031aa6d6743bebebc695955cd77c032cd9192f", "2c06e769efa59f59af992df7f3f0ebb14d96a2fb", "030de9d3aa4fc4cbcfef252ce8159e0149a2bbf2", "141e35263ab810983c90d47ad62eb4fab5e51717", "24c5877251ba8b31570256f46247740e42aa59e4", "74ab1f58c81889deea75f87da74e3c62911ceda6", "7c753f6f08bce184996ae61d95b075ade3e3eb47", "45f3695afb41a63705478ec0ae281bee6f2dc6fe", "6e2e566f446dbb900d890b814e7ac2e54ced912b", "688cba6fcc776ad82bb3a3b8b2b0f427f26cb2ea", "22eda725d61fe6d2e9ee246fc716973974b9d228", "d14b118c0c0b59c1cefc8f9c9f9994164f1bfa9f", "0d2df513add257ff8edab5a5ee1db7c4f4b19004", "bc69a8de6cebaae84e6dcd0b5852f2438b9063f9", "4954617a49bdcbc5d1975bc1b55eeb6f3467ed91", "a5aad5abb32f6b15f31b92312bb3b0f7b6470977", "0af803edccda82003b909c630a074c3e1061b0ab", "2af4a96f88ec630c57a28461751af3659ec98dd4", "4fa49861404325afca8a8f0decb64a323934ec22", "3eb2f78a3127a9c5f09fe2c455e71509fab30442", "d8232a1dd6e5dea56e03c837ac9dbb2ed60ff6c6", "9df7932c7a69f4f6fad6913cdb76b070a4ddcd80", "bfa054591978685758e97a4c8ee30e84fa393d0a", "ef59df2d6800757460e463d656a63c3ff603d1f1", "29252c86843d926742017da1a8bf8f5d6805e734" ], "paperAbstract": "We describe a dynamic graph generator with overlapping communities that is capable of simulating community scale events while at the same time maintaining crucial graph properties. Such a benchmark generator is useful to measure and compare the responsiveness and efficiency of dynamic community detection algorithms. Since the generator allows the user to tune multiple parameters, it can also be used to test the robustness of a community detection algorithm across a spectrum of inputs. In an experimental evaluation, we demonstrate the generator's performance and show that graph properties are indeed maintained over time. Further, we show that standard community detection algorithms are able to find the generated community structure. To the best of our knowledge, this is the first time that all of the above have been combined into one benchmark generator, and this work constitutes an important building block for the development of efficient and reliable dynamic, overlapping community detection algorithms.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.51" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e8b3e7fd85d32ef67a6159f58794bbe5b748e2fc", "sources": [ "DBLP" ], "title": "Benchmark Generator for Dynamic Overlapping Communities in Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "e94e047cb0045da8335ea9a7a66b9dc3537ab632": { "authors": [ { "ids": [ "9396418" ], "name": "Akshaya Mani" }, { "ids": [ "1765309" ], "name": "Micah Sherr" } ], "doi": "", "doiUrl": "", "entities": [ "Adversary (cryptography)", "Differential privacy", "Internet privacy", "Privacy", "Proxy server", "Relay", "Tor Messenger" ], "id": "e94e047cb0045da8335ea9a7a66b9dc3537ab632", "inCitations": [ "39748866fbc87e47d535e530c92744ddb545e3dc", "43ee0b576739bdd81ab097d26e1ba308aa830775", "dc9d6933657be10abb4f123734f0c3f3c225b228", "356017dbc2c9f1ef4f815dd13e2120e13fb996ed" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "16a7c31409e2a66b48dbab55cee3d717fdfcbc9e", "06eaabcdf0c1f578c2442b3e7a0858a8dc5679c8", "05bb5174ee7de801b6e1a55086455b0341ff5649", "1808b64aec21863489f0fe66f250890a3ac2b843", "208448ed57cb0ff70866cb3828b06610c3ff25fd", "0c9ffe6bfabf2c1cb013855d913b6089c4918966", "108747579aef6bf029623639a86070feaf5cad41", "283ecc8622694c070fa53aee7a1c37dadc603f8d", "d53c7989acf948fec62d4b1cfcb6c328df048638", "3cb56e1426d8bd33697dcb36ec9038be003c0b03", "3962fee2fcfc2437fe658cf58a189a226af7d28a", "169d768ae2827af14049f137c18fe03c05268c9c", "507e6ab013b76d8bbd6cfde4328285d6a6168152", "683c8f5c60916751bb23f159c86c1f2d4170e43f", "b532099ff8b67049f292cd62700dca37fc2be623", "040d9acab9003b9d50b2291cc6844b66b2a85d12", "34bdd36330946cf9b377d274bdaaa7dc41888aa2", "2633619177fcb13211008c6f8b952933afc01cde", "e9c513fe159c2b243325f90b6e9bcdb5f8d75c22", "14d19771bc69f1d41f63052e56e134f9ed569c1e", "19db199fd25aa604618d13e80cf317f0858d5604", "387cc19ddbcbb74da7079d4b17d7f2e7300398d3", "54d539d7558b6db3a3043fbbe4c71abe3fb629db", "05a0e62ecf23ba6cbb20b9dddac856ec2cdf255a", "0be8170df4c1ea1cf8312ae5ed326665224d5d9c" ], "paperAbstract": "A large volume of existing research attempts to understand who uses Tor and how the network is used (and misused). However, conducting measurements on the live Tor network, if done improperly, can endanger the security and anonymity of the millions of users who depend on the network to enhance their online privacy. Indeed, several existing measurement studies of Tor have been heavily criticized for unsafe research practices. Tor needs privacy-preserving methods of gathering statistics. The recently proposed PrivEx system demonstrates how data can be safely collected on Tor using techniques from differential privacy. However, as we demonstrate in this paper, the integrity of the statistics reported by PrivEx is brittle under realistic deployment conditions. An adversary who operates even a single relay in the volunteer-operated anonymity network can arbitrarily influence the result of PrivEx queries. We argue that a safe and useful data collection mechanism must provide both privacy and integrity protections. This paper presents HisTor , a privacy-preserving statistics collection scheme based on ( , \u03b4)-differential privacy that is robust against adversarial manipulation. We formalize the security guarantees of HisTor and show using historical data from the Tor Project that HisTor provides useful data collection and reporting with low bandwidth and processing overheads.", "pdfUrls": [ "https://www.ndss-symposium.org/ndss2017/ndss-2017-programme/histor%CE%B5-differentially-private-and-robust-statistics-collection-tor/", "https://www.freehaven.net/anonbib/cache/histore-ndss2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/e94e/047cb0045da8335ea9a7a66b9dc3537ab632.pdf", "s2Url": "https://semanticscholar.org/paper/e94e047cb0045da8335ea9a7a66b9dc3537ab632", "sources": [ "DBLP" ], "title": "HisTor\u03b5: Differentially Private and Robust Statistics Collection for Tor", "venue": "NDSS", "year": 2017 }, "e96e5521907bc2982d8f264d1a2c694332ed3e07": { "authors": [ { "ids": [ "35092930" ], "name": "Kevin Ong" }, { "ids": [ "2768186" ], "name": "Kalervo J\u00e4rvelin" }, { "ids": [ "2396539" ], "name": "Mark Sanderson" }, { "ids": [ "1732541" ], "name": "Falk Scholer" } ], "doi": "10.1145/3077136.3080817", "doiUrl": "https://doi.org/10.1145/3077136.3080817", "entities": [ "Desktop metaphor", "Global Positioning System", "Information foraging", "Web search engine" ], "id": "e96e5521907bc2982d8f264d1a2c694332ed3e07", "inCitations": [ "fbe5f1587de705be23aacbeb3f31cd73c0da7e02" ], "journalName": "", "journalPages": "295-304", "journalVolume": "", "outCitations": [ "3fec6927bc6f0a1ef40e10a9c1a297d6764ab599", "f237e72b2d5265da4c213e3c69979395728137dc", "0353d56503f0418629f8306987d7a99dead41864", "477928d562839cfbde209501a0a8ec5212b846b7", "157f923f3063c9ed132b3d51831dbba71a43d252", "34b3d1db8e1a479438d6bb3fd928d0f7441bbd18", "09c0211bf1458bd383cb984b577a9dce819d9bd5", "a67a2bc5e9f85fefd515c948691f5ecc03e560c1", "ab67391a1ace48cd8f46a07a02766ecff848bcef", "284664dcf8e64d7352f6cf0854a327376241c7cb", "1ee599508e9d0f8ae6dbb01eeb0e512edbe22a32", "02963d63768e7f5d9692dfe4b8b20748d693be80", "7a43cdd0e8e4b628af6619812c73b2f2d524d0bb", "4b168af7ae0e863bc2b385091f367d62dab63106", "f0b40634a83ec80ac612ce6424b23e95100895e6", "ba4ebde75c9db08e3e02b76e7c764d0c99d40a49", "1c1b9ea0dffcf963fbee14292c073972916f5a2b", "0883ff294e62403eb5c9bd8d81268ca2a0ea518c", "6cd2d88dfa41299c093b30393806b54568125532", "a36847e4a0cf4fbfa5d2cace92133508a43375e9", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "4a989ea563fea93b3c11effaeb8bee953db6abc0", "08b48058cd560bfe02809155331f05bd77502e55", "155cee79e4450298062cf26afcd9f05e0c560685", "5d389e175fdd17d514b4553e78f3246ecd0e3916", "3ce4e4df850d8aeb85d68b3a2bcf1937ec49d74b", "3453d79657e8f3e1614e73608f92465ab7f5a291" ], "paperAbstract": "This paper investigates if Information Foraging Theory can be used to understand differences in user behavior when searching on mobile and desktop web search systems. Two groups of thirty-six participants were recruited to carry out six identical web search tasks on desktop or on mobile. The search tasks were prepared with a different number and distribution of relevant documents on the first result page. Search behaviors on mobile and desktop were measurably different. Desktop participants viewed and clicked on more results but saved fewer as relevant, compared to mobile participants, when information scent level increased. Mobile participants achieved higher search accuracy than desktop participants for tasks with increasing numbers of relevant search results. Conversely, desktop participants were more accurate than mobile participants for tasks with an equal number of relevant results that were more distributed across the results page. Overall, both an increased number and better positioning of relevant search results improved the ability of participants to locate relevant results on both desktop and mobile. Participants spent more time and issued more queries on desktop, but abandoned less and saved more results for initial queries on mobile.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080817" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/e96e5521907bc2982d8f264d1a2c694332ed3e07", "sources": [ "DBLP" ], "title": "Using Information Scent to Understand Mobile and Desktop Web Search Behavior", "venue": "SIGIR", "year": 2017 }, "ea0ec5eca466341ecb4bf52eb1c66bd2cdee0b52": { "authors": [ { "ids": [ "1784588" ], "name": "Martin Kronbichler" }, { "ids": [ "1765698" ], "name": "Katharina Kormann" }, { "ids": [ "5436472" ], "name": "Igor Pasichnyk" }, { "ids": [ "2540483" ], "name": "Momme Allalen" } ], "doi": "10.1007/978-3-319-58667-0_13", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_13", "entities": [ "Galerkin method" ], "id": "ea0ec5eca466341ecb4bf52eb1c66bd2cdee0b52", "inCitations": [ "096bf447a521f76c2874d523be4251af995b08b8" ], "journalName": "", "journalPages": "237-255", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_13" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ea0ec5eca466341ecb4bf52eb1c66bd2cdee0b52", "sources": [ "DBLP" ], "title": "Fast Matrix-Free Discontinuous Galerkin Kernels on Modern Computer Architectures", "venue": "ISC", "year": 2017 }, "ea5cec32e04610174e53ddd02c8ea784de2c44b3": { "authors": [ { "ids": [ "23198960" ], "name": "Cl\u00e9ment Mommessin" }, { "ids": [ "40293316" ], "name": "Matthieu Dreher" }, { "ids": [ "2583571" ], "name": "Bruno Raffin" }, { "ids": [ "2284463" ], "name": "Tom Peterka" } ], "doi": "10.1109/CLUSTER.2017.35", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.35", "entities": [ "Atom", "Field (computer science)", "Middleware", "Molecular dynamics", "Run time (program lifecycle phase)", "Simulation", "Synthetic data" ], "id": "ea5cec32e04610174e53ddd02c8ea784de2c44b3", "inCitations": [ "3641cb70c8b14a4840c2f18fce982d00637cb6f9" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "370-378", "journalVolume": "", "outCitations": [ "36ae53af9ef18f0f66bd6ebc7bdbea7893805011", "56cc64d474dd9f3d87560a0db81d3e14e6146e54", "3a10fad57f186e8da3b912ac96e8cfa853734417", "53bcccd314c5c7483933881a6c29235407b3e1c3", "85f2aaff0e0c06bbd0cd5d52a9bfee4d8d7ab910", "701c90f0593e5675d62fc3882bd5da9b7c296394", "abc9d1c519c350845506d58bdd109c9f4d5c2492", "771156b34f7f4f539ef7289027e2205692206aed", "2073266dfb3f034d55cd5a3fca62d230832afd43", "7bb58fe298cda4983f3a51d3ad64298ce68dd136", "1a88656a4fdb9989b503a8622273b5d8b9c4b64b", "70e1cef1129793954694e0f4519441284448d938", "3641cb70c8b14a4840c2f18fce982d00637cb6f9", "6d968fd88abf401a5dfef60096acfe658fb178fc", "22461880994425508a659df74f8df6ddf2cec3da", "3197fcfe91b96162a07b9351dbdf62bb6ec98f3d", "43c67bdea30b1a94b77b93128b8d69c451350f4b", "4e63fd64ac90778b8c431e37f54b5cfd01e0f379", "c1fb67543d08642be5619f43ff5ffff62942bacf", "4fe2bf624e18d71d87ae36824606c42c64446562" ], "paperAbstract": "In situ workflows contain tasks that exchange messages composed of several data fields. However, a consumer task may not necessarily need all the data fields from its producer. For example, a molecular dynamics simulation can produce atom positions, velocities, and forces; but some analyses require only atom positions. The user should decide whether to specialize the output of a producer task for a particular consumer and get better performance or to send more data than required by the consumer. The first option limits task portability, while the second wastes resources. In this paper, we introduce contracts for in situ tasks. A contract specifies for a producer each data field available for output and for a consumer the data fields needed as input. Comparing a producer and consumer contract allows automatic selection of the data fields a producer has to send for that consumer. We integrated our contracts mechanism within Decaf, a middleware for building and executing in situ workflows. Contracts enable to automatically extract at the producer the data the consumer needs. We evaluate the cost and performance of message extraction at runtime with both synthetic examples and a real scientific workflow coupling a molecular dynamics simulation with three different data analytics codes. Our contract-based automatic data extraction removes the need to specialize producers while entailing small overheads.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.35" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ea5cec32e04610174e53ddd02c8ea784de2c44b3", "sources": [ "DBLP" ], "title": "Automatic Data Filtering for In Situ Workflows", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "ea5d5e8c9a73d317bcca27714b2eaaf7a0463c21": { "authors": [ { "ids": [ "40461265" ], "name": "Jin Young Kim" }, { "ids": [ "1703980" ], "name": "Nick Craswell" }, { "ids": [ "1728602" ], "name": "Susan T. Dumais" }, { "ids": [ "1803571" ], "name": "Filip Radlinski" }, { "ids": [ "1713096" ], "name": "Fang Liu" } ], "doi": "10.1145/3077136.3080837", "doiUrl": "https://doi.org/10.1145/3077136.3080837", "entities": [ "Baseline (configuration management)", "Client (computing)", "Email", "Generative model", "Heuristic", "Server-side", "Session (web analytics)", "Web search engine", "Web search query" ], "id": "ea5d5e8c9a73d317bcca27714b2eaaf7a0463c21", "inCitations": [ "c4b411fbf84cd1e75bff9f74ac27816090227219", "8685976d13b3b3d07ae0cbd11a387718db59a47d" ], "journalName": "", "journalPages": "265-274", "journalVolume": "", "outCitations": [ "c77be34db96695159244723fe9ffa4a88dc4a36d", "ab21e24201e6117ee6879a58624b655a52e9dd54", "17ee2113640a606072a03e548a738be5258805d2", "8a77d1025c2dc9669d04b3865a9cbd2ff8b526a6", "1ab758492347723ae8ad20257715f3fd49e75c27", "51f32b1db78ea048149de0407430e8792210fe38", "0e10dc74083d65ad8f395f9673f5bc33820b9f11", "f7fcc97be18be855f0b337972b740a036606d7c0", "6e953a5caa643ef2e310473b680ef23262bb80ce", "ab0463d8aef96697ad0e4d2c86fd1b41ffd4633d", "5a42d63d7036bc50811bada8d5d6718f85e35e43", "7005f995565aba08f6cf06256a280dcfdd7bacc8", "7cf22fc9f7bf4864e2170f0ac3ff3edc9dd7ed9c", "a0f96a954ccb478440523ef36e67f01de51e6918", "1772143a707f02685e3445485b89c888b2535e7a", "09323de10bf78ab4e6004a5bfd07b95f4e6543ba", "5251b6170ac80da4b6f6dbb3dcb542985d217e8f" ], "paperAbstract": "Email has been a dominant form of communication for many years, and email search is an important problem. In contrast to other search setting, such as web search, there have been few studies of user behavior and models of email search success. Research in email search is challenging for many reasons including the personal and private nature of the collection. Third party judges can not look at email search queries or email message content requiring new modeling techniques.\n In this study, we built an opt-in client application which monitors a user's email search activity and then pops up an in-situ survey when a search session is finished. We then merged the survey data with server-side behavioral logs. This approach allows us to study the relationship between session-level outcome and user behavior, and then build a model to predict success for email search based on behavioral interaction patterns.\n Our results show that generative models (MarkovChain) of success can predict the session-level success of email search better than baseline heuristics and discriminative models (RandomForest). The success model makes use of email-specific log activities such as reply, forward and move, as well as generic signals such as click with long dwell time. The learned model is highly interpretable, and reusable in that it can be applied to unlabeled interaction logs in the future.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080837" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ea5d5e8c9a73d317bcca27714b2eaaf7a0463c21", "sources": [ "DBLP" ], "title": "Understanding and Modeling Success in Email Search", "venue": "SIGIR", "year": 2017 }, "ea8bac74a46016734f9b4b3a34fc1c0e1010bd4d": { "authors": [ { "ids": [ "37267314" ], "name": "Masafumi Oyamada" }, { "ids": [ "1874194" ], "name": "Shinji Nakadai" } ], "doi": "10.1109/ICDM.2017.45", "doiUrl": "https://doi.org/10.1109/ICDM.2017.45", "entities": [ "Feature engineering", "Feature learning", "Feature vector", "Machine learning", "Singular value decomposition" ], "id": "ea8bac74a46016734f9b4b3a34fc1c0e1010bd4d", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "357-366", "journalVolume": "", "outCitations": [ "0dffe45c7df96e18d3300ce1d0f08d9debed4a38", "40d626ee930868b9e502e8c7d223192020b8c01d", "6dedff9791d0b30a395ab7593ae3fe76721aba6b", "9a74525c8ac6da18c56d67d0202ceb0ec7aa9812", "3605b9befd5f1b53019b8edb3b3d227901e76c89", "5a26ec6568152731ce1667a426307ebccff5a50e", "ab30b9de25048c15df0ebc353c64f4f3cf6ed52b", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "1af0851efa40686b6d06e8678967d8140fd7bc68", "fbe37d8cde691a27c4afaf07c57fc387b94579ca", "29671a0b2e8ed39f11d3535340ad59089cd40757", "287ca18b486fe0575514c51c17246255b60c22c0", "8cad46c0702468843b68d6825d191719eb4e1c18", "b8f8980efb2684ba2a0b7ddf8ea88a9fe09f2ade", "4cce5b3a63427d28ca08b5c72a657f836f57ccf4", "ea50741d115b42928a85d46fd1f9954521811524", "54c13129cbbc8737dce7d14dd1c7e6462016409f", "2452d5ce9dc467f44676893a99d14ee9f8a0da84", "2b81c7b45c3a35c75d14847d0f2f337717a89eb5", "05fcf7d0f007d5beef631737a96768641b6de517", "7f25ab2701dc6716f67a914b7877a4f837ef1c0f", "14bfba0080fc82546422e497f24f32774f8c5665", "946b6fbca1ea1fb99a3c848bfdaf6d7cca03d223", "47f84928dd6e40797255fa1e1bbb3c12b2659a7c", "0d8b00bd3ab292680dc0d17f8a23487b939aef63", "87a4a6f90f33f0dfc12bd29932802cc4f386f06f", "2473ada9b23068c25855c82bb165d264e7af74e4" ], "paperAbstract": "Given a collection of basic customer demographics (e.g., age and gender) andtheir behavioral data (e.g., item purchase histories), how can we predictsensitive demographics (e.g., income and occupation) that not every customermakes available?This demographics prediction problem is modeled as a classification task inwhich a customer's sensitive demographic y is predicted from his featurevector x. So far, two lines of work have tried to produce a"good" feature vector x from the customer's behavioraldata: (1) application-specific feature engineering using behavioral data and (2) representation learning (such as singular value decomposition or neuralembedding) on behavioral data. Although these approaches successfullyimprove the predictive performance, (1) designing a good feature requiresdomain experts to make a great effort and (2) features obtained fromrepresentation learning are hard to interpret. To overcome these problems, we present a Relational Infinite SupportVector Machine (R-iSVM), a mixture-of-experts model that can leveragebehavioral data. Instead of augmenting the feature vectors of customers, R-iSVM uses behavioral data to find out behaviorally similar customerclusters and constructs a local prediction model at each customer cluster. In doing so, R-iSVM successfully improves the predictive performance withoutrequiring application-specific feature designing and hard-to-interpretrepresentations. Experimental results on three real-world datasets demonstrate the predictiveperformance and interpretability of R-iSVM. Furthermore, R-iSVM can co-existwith previous demographics prediction methods to further improve theirpredictive performance.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.45" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ea8bac74a46016734f9b4b3a34fc1c0e1010bd4d", "sources": [ "DBLP" ], "title": "Relational Mixture of Experts: Explainable Demographics Prediction with Behavioral Data", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "eaa43952d06b9ffeb4e7d064c02dbf7e2b0124df": { "authors": [ { "ids": [ "2653506" ], "name": "Nishant Saurabh" }, { "ids": [ "1887201" ], "name": "Dragi Kimovski" }, { "ids": [ "2859069" ], "name": "Francesco Gaetano" }, { "ids": [ "1718255" ], "name": "Radu Prodan" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Data redundancy", "Distributed computing", "Erasure code", "Evolutionary algorithm", "Fault tolerance", "Mathematical optimization", "Multi-objective optimization", "Network performance", "Overlay network", "Pareto efficiency", "Program optimization", "Software deployment", "Weight function" ], "id": "eaa43952d06b9ffeb4e7d064c02dbf7e2b0124df", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "150-159", "journalVolume": "", "outCitations": [ "72f6c921afd9056c03dc6db0151b539e3ae699a3", "5d4e08da4b2f3b54aebd9d427b31c3e6d07cabde", "8c0c9937b52413b950e071a02d12ef6e009f5b09", "58d5296b12e7305121a392b117afbdb23643e958", "47185aacbd2ca1a842884fd4e8b87dcc49cf12c0", "bcdd54c3faa4d0fdebebba7b0815341ed45abd90", "1be4874baac9207cf1f93a351d16c0e43a2aff5c", "25e5d5a046afa5fcde7be23d087ae69f4b438e13", "1fe7ce68c58ba88478b2c28dec8eb967ae14b8fb", "6607e684a68997ced5c021f19049e1e8c10d9f9e", "0d77bb6ef2bb6d165f58bf0251bf3d7cf29f1491", "fe20c9537ea0be2f2a79388a53e1f2fb4b7fe771", "36d8d6485cc8824942c8a8c2d0f4f0795964dd2e", "0e69ee351252fd09a50e6baae53b4776009825ae" ], "paperAbstract": "In the recent years, overlay networks have emergedas a crucial platform for deployment of various distributed applications. Many of these applications rely on data redundancy techniques, such as erasure coding, to achieve higher fault tolerance. However, erasure coding applied in large scale overlay networksentails various overheads in terms of storage, latency and datarebuilding costs. These overheads are largely attributed to theselected erasure coding scheme and the encoded chunk placementin the overlay network. This paper explores a multi-objective optimization approach for identifying appropriate erasure codingschemes and encoded chunk placement in overlay networks. Theuniqueness of our approach lies in the consideration of multipleerasure coding objectives such as encoding rate and redundancyfactor, with overlay network performance characteristics likestorage consumption, latency and system reliability. Our approach enables a variety of tradeoff solutions with respect tothese objectives to be identified in the form of a Pareto front. To solve this problem, we propose a novel two stage multi-objective evolutionary algorithm, where the first stage determinesthe optimal set of encoding schemes, while the second stageoptimizes placement of the corresponding encoded data chunksin overlay networks of varying sizes. We study the performanceof our method by generating and analyzing the Pareto optimalsets of tradeoff solutions. Experimental results demonstrate thatthe Pareto optimal set produced by our multi-objective approachincludes and even dominates the chunk placements delivered bya related state-of-the-art weighted sum method.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101132" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eaa43952d06b9ffeb4e7d064c02dbf7e2b0124df", "sources": [ "DBLP" ], "title": "A Two-Stage Multi-objective Optimization of Erasure Coding in Overlay Networks", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "eaf5e9044192ebad8b31da8e737c5f61c045b8e5": { "authors": [ { "ids": [ "1684176" ], "name": "Yossi Azar" }, { "ids": [ "20719254" ], "name": "Danny Vainstein" } ], "doi": "10.1145/3087556.3087570", "doiUrl": "https://doi.org/10.1145/3087556.3087570", "entities": [ "Algorithm", "Bin packing problem", "Cloud computing", "Competitive analysis (online algorithm)", "Digital back-propagation", "Loss function", "Maxima and minima", "Online algorithm", "Set packing", "Time complexity" ], "id": "eaf5e9044192ebad8b31da8e737c5f61c045b8e5", "inCitations": [ "7b8754c3437e635bb106fc1bdbb43633e32fcd07" ], "journalName": "", "journalPages": "77-86", "journalVolume": "", "outCitations": [ "4682b5ee05b14275507358f22234949110b4b786", "af8c7bf62cfbe9f8b1e614c242796ff27915eb69", "e2edb5088e82f4b55af09a03d59f1f8bc489b65e", "0603d6a8773529d5605bedbb019f0161d65cc354", "ba6d6b27bf75e8e3d4468618e4a5e15ce2f099a7", "7ba475aae0b51b43df0e088c87f1da400e4e3497", "cfbfa93c4827ed168379d3cacfb3d1d963df34da", "27af6d971000caf538b19b6f2e815d3a8571f2a6", "1c477be8c1ebd05ed07e8c0764a295932f3733b0", "10236985b28470951de73f76d6fba5343d5f788f", "318575adfb030622ba709d257b459afa108b8526", "11eeb124cffcd749f46f8d25ede967d1a6d997f6", "29305b146f35b15b7d2a7e5bf7f7c06f7af24511", "74dedf3bab9d64648b955f3d85ea79a20ca3960b", "66bba71690719bfa1daca2a374004e6194231e4b" ], "paperAbstract": "In this paper we focus on the Clairvoyant Dynamic Bin Packing (DBP) problem, which extends the classical online bin packing problem in that items arrive and depart over time and the departure time of an item is known upon its arrival. The problem naturally arises when handling cloud-based networks. We focus specifically on the MinUsageTime cost function which aims to minimize the overall usage time of all bins that are opened during the packing process. Earlier work has shown a O(\\frac{\\log \\mu}{\\log \\log \\mu}) upper bound where \\mu is defined as the ratio between the maximal and minimal durations of all items. We improve the upper bound by giving an O(\\sqrt{\\log \\mu})-competitive algorithm. We then provide a matching lower bound of \\Omega(\\sqrt{\\log \\mu}) on the competitive ratio of any online algorithm, thus closing the gap with regards to this problem. We then focus on what we call the class of aligned inputs and give a O(\\log \\log \\mu)-competitive algorithm for this case, beating the lower bound of the general case by an exponential factor. Surprisingly enough, the analysis of our algorithm that we present, is closely related to various properties of binary strings.", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087570" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eaf5e9044192ebad8b31da8e737c5f61c045b8e5", "sources": [ "DBLP" ], "title": "Tight Bounds for Clairvoyant Dynamic Bin Packing", "venue": "SPAA", "year": 2017 }, "eb315d845b2a95f6280544e1a9259e3729c8529f": { "authors": [ { "ids": [ "2076251" ], "name": "Aniket Chakrabarti" }, { "ids": [ "40042034" ], "name": "Srinivasan Parthasarathy" }, { "ids": [ "5561909" ], "name": "Christopher Stewart" } ], "doi": "10.1109/ICPP.2017.62", "doiUrl": "https://doi.org/10.1109/ICPP.2017.62", "entities": [ "Algorithm", "Baseline (configuration management)", "Disk partitioning", "Distributed algorithm", "Hall-effect thruster", "Heterogeneous computing", "Multi-objective optimization", "Pareto efficiency", "Program optimization", "Quality of results", "Run time (program lifecycle phase)", "Time complexity" ], "id": "eb315d845b2a95f6280544e1a9259e3729c8529f", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "533-542", "journalVolume": "", "outCitations": [ "026a63d57667f92b0f1823aff099f2dc88cf64d4", "4164348c086789850ab89f2bd1dd8be937d999ad", "09a532479a74325ab5442f7ace716def905b189b", "0c6df8790787fada031058f4d93b50fa5e6c1895", "6b263f01d239b928c3a15d5e9e05466c679453dc", "48b18c093cdb9a46887c6f94b8bd369ed0465564", "11281957a5b9d2a9d0766578bd7972130829a5d2", "536095c3b712a60e197cbe136e785a159c697dd7", "1f396431e904ead58088ef7a7dbe9706cad7315a", "1db950ae5e36215b9a6f280a627340b07c39c3bf", "157e33bc77be75f381e431c436641e8738f7bd3e", "1d22b47116fda5de3950326892949a1882ff13e3", "4a94746810d383280c6ea03edd29781b0569b3bd", "70ac255db930b424371a366f14ad8117e5580277", "3ee0eec0553ca719f602b777f2b600a7aa5c31aa", "19f3032ecf3df3e941f9aa1a3263b98b3c494553", "17a1bdf365d125ff6667ab4524fa620e34a13b11", "c81f5ae0bbd06040cdcda52084d8647d6b6a60f4", "4c84386f9acb554431729dedd38c6148ba00a51b", "4dc578364f357b993b5554b9181c90c84aa6b4d1", "0a12a179bebdf4bb69d692a1127795b3f536270b", "36800abb0f8f801077cad8e1abca97b3c0a1be6d", "f51026239f5786b31ee28dbb6dee4024a2f6dcd7", "729094492149b58533936d30c881f8da1e527b0a", "2645529c186e4ee7b3f87ee748812b25b80fbd4f", "334f70c5fbba2bd0a3d24e4311ca8480c78e32cc", "17bef89290a3054ecfdd68d2fe5be8066d59b856", "2abe6b9ea1b13653b7384e9c8ef14b0d87e20cfc", "28e34059176c36934de116e138dd53cf4ee1dff0", "a74b56c794fb1beb378b50a95f7db3bcd8bfdf63", "59ebcb44259cabaec52ae3ff87dfa2204a0de0d0", "d1c21c34936f587779c216ed79ca33883845caa1" ], "paperAbstract": "Distributed algorithms for data analytics partition their input data across many machines for parallel execution. At scale, it is likely that some machines will perform worse than others because they are slower, power constrained or dependent on undesirable, dirty energy sources. It is challenging to balance analytics workloads across heterogeneous machines because the algorithms are sensitive to statistical skew in data partitions. A skewed partition can slow down the whole workload or degrade the quality of results. Sizing partitions in proportion to each machine's performance may introduce or further exacerbate skew. In this paper, we propose a scheme that controls the statistical distribution of each partition and sizes partitions according to the heterogeneity of the computing environment. We model heterogeneity as a multi-objective optimization, with the objectives being functions for execution time and dirty energy consumption. We use stratification to control skew. Experiments show that our computational heterogeneity-aware (Het-Aware) partitioning strategy speeds up running time by up to 51% over the stratified partitioning scheme baseline. We also have a heterogeneity and energy aware (Het-Energy-Aware) partitioning scheme which is slower than the Het-Aware solution but can lower the dirty energy footprint by up to 26%. For some analytic tasks, there is also a significant qualitative benefit when using such partitioning strategies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.62" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eb315d845b2a95f6280544e1a9259e3729c8529f", "sources": [ "DBLP" ], "title": "A Pareto Framework for Data Analytics on Heterogeneous Systems: Implications for Green Energy Usage and Performance", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "eb4d7f3c536af50ae5d6828d9bb43138c2578dc7": { "authors": [ { "ids": [ "37258122" ], "name": "Fei Mei" }, { "ids": [ "39167624" ], "name": "Qiang Cao" }, { "ids": [ "9280383" ], "name": "Hong Jiang" }, { "ids": [ "1698650" ], "name": "Lei Tian" } ], "doi": "10.1145/3127479.3127486", "doiUrl": "https://doi.org/10.1145/3127479.3127486", "entities": [ "Attribute\u2013value pair", "B-tree", "Baseline (configuration management)", "Cluster analysis", "Copy-on-write", "Data structure", "Directory (computing)", "Key-value database", "Memory-mapped I/O", "Peripheral", "Tree structure" ], "id": "eb4d7f3c536af50ae5d6828d9bb43138c2578dc7", "inCitations": [ "8cfa25e85c2c6c9305f696819d764ed5490f3faf" ], "journalName": "", "journalPages": "142-156", "journalVolume": "", "outCitations": [ "12a0046a1197ae63c3d616c74e367dc583cef196", "b8ddec47f9fab1eddb5c9cacf703781dd5337b87", "d67adb456a315aee244babf4f20e318cc14d13f3", "06bd4d2d21624c7713d7f10ccb7df61bf6b9ee71", "139ff4bc46d145a8691435fc4ad033a5af009d59", "09c0d62190aedb53e820695ccbe98d90f877cc46", "3cb34f7a770836bcfeef28f844d670b8a014ffa8", "1d99b7749a9311d2db24a3d84728e444eff23e4b", "1bf3bad98ffedc59413c965a7b3e969eaaa7edbd", "79b3fe5a9c4860c4d1d3638de0c07d847ac8fb5c", "088e3e939ad234b6fdd0e321290fb26937dc2553", "010bf8e639dbdee2c31a58ca9b65e89aeac11315", "206b20f225fc655dfac733b6f0bd8077ed86215e", "3d0d6962ce58935d6f9a4bfff061804c3d1feddc", "8cded4cc565f8b7c41b40de6fe8d20231a7e8652", "175a3360ff5bb2f0777dff1e688f3f90f20e5fcf", "00a749a432197c91302fc817489903fe314b253b", "07f30e70aa30e984c4060e385e048f5f90815216", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "8542fdcb42804a31fedb86874e3c75cd03830d4d", "d4f8d2b9f7fad337d9ad22a92555e159c9543836", "1af5f199dbe6f03aef7bd404a4236e9b29ba4410", "389b618c42c0d5d32a569b9cbaa02a7ff77c6be6", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "77ee477b0e6d0b6245eca5865ba95b55ed3db434", "ade874e837a2a6b9ce67fad0c5dce6f4e3c68d11", "18a5f443299784479e78d9e77f175af57cb2fa2b", "1e29246acdc73e27899352e3d1862e2af4b74ac2", "199ac28b6bc68bf05c77645ffae7640df114bca5", "72722e7602138e3896e5576d3f3ef730e7b7c4b4", "2be26e8aa238ac37a80e08303f128d8014bb9f3b", "aa8be317cab64446cfada579875efed5b50f4788", "1b0eace707f6b86e94793d1a7c83b7d065e604fa", "91912a461d30035639ddda2b6de97a388823fb4b", "de8f972df6b7bfd32692db268ec54bb031b1ef3c", "265d18ced11e2e64d98afa97b0e86965e68101f7", "1860428ac473bbe38da909c0ba8c882b07deb8df", "642dd27ce62d51b042e134b0d0aec2f2e7cc4d29", "184c5be1a8931fa88b7da10448b4c2fb58f4e150", "28f13ebe8e17fdb4c2500c515759a3ee0c2783ce", "5c06564087db9e53a72ef1eb5865696b0dddd8ca", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "47b78e7eb12859a141aed6a28a4e301eb0352629", "b1ec820da48f69a4652ddf08f00e2e991126cf4b", "1ae3dce5083713d1a04b959039a94ff77b346622", "26f820aa9e782f5d6ba8bcb272a31c32094dfd59", "a04678ad8398a2579c249fff4b59bfbcfdd7e25b", "f4147b82166813bbe5dc01e9486664c273d1556c", "1f0c405f9fa2cc9de23a45710fa85b9e7330a958" ], "paperAbstract": "Key-value stores are increasingly adopting LSM-trees as their enabling data structure in the backend storage, and persisting their clustered data through a file system. A file system is expected to not only provide file/directory abstraction to organize data but also retain the key benefits of LSM-trees, namely, sequential and aggregated I/O patterns on the physical device. Unfortunately, our in-depth experimental analysis reveals that some of these benefits of LSM-trees can be completely negated by the underlying file level indexes from the perspectives of both data layout and I/O processing. As a result, the write performance of LSM-trees is kept at a level far below that promised by the sequential bandwidth offered by the storage devices. In this paper, we address this problem and propose LDS, an LSM-tree based Direct Storage system that manages the storage space and provides simplified consistency control by exploiting the copy-on-write nature of the LSM-tree structure, so as to fully reap the benefits of LSM-trees.\n Running LSM-trees on LDS as a baseline for comparison, we evaluate LSM-trees on three representative file systems (EXT4, F2FS, BTRFS) with HDDs and SSDs respectively, to study the performance potentials of LSM-trees. Evaluation results show that the write throughputs of LSM-trees can be improved by from 1.8× to 3× on HDDs, and from 1.3× to 2.5× on SSDs, by employing the LSM-tree friendly data layout of LDS.", "pdfUrls": [ "http://doi.acm.org/10.1145/3127479.3127486" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eb4d7f3c536af50ae5d6828d9bb43138c2578dc7", "sources": [ "DBLP" ], "title": "LSM-tree managed storage for large-scale key-value store", "venue": "SoCC", "year": 2017 }, "eb74d7539d25e415a12e1159a7d0300e505f2b73": { "authors": [ { "ids": [ "1680126" ], "name": "Rui Yan" }, { "ids": [ "1791036" ], "name": "Dongyan Zhao" }, { "ids": [ "22226435" ], "name": "Weinan E." } ], "doi": "10.1145/3077136.3080843", "doiUrl": "https://doi.org/10.1145/3077136.3080843", "entities": [ "Big data", "Computer", "Deep learning", "Experiment", "Information retrieval", "Natural language", "Natural language processing", "Utility", "Web 2.0" ], "id": "eb74d7539d25e415a12e1159a7d0300e505f2b73", "inCitations": [], "journalName": "", "journalPages": "685-694", "journalVolume": "", "outCitations": [ "4adb97f155fa9ab8a1d3128db4b00a7812349e5b", "b44ff5104557fc9b900d48f02605dd42d794927d", "1b9d8e45250717b9b5a62ae92ef18e3b77d59327", "7e685545efd833b54ea7366aa4cc6d157d171d96", "49a0bc37ccdba7d7470d318ef803a1fd8001f3f8", "7477d88b225909ef645941a0142eed75dc3b2e56", "81e61aee80e0c2886ab064147fb8f09e86f22b69", "52e011e52d1a9e2c9ea12f31f5a2d3c1b459017e", "9b1ded3df966534b79e0d3c27be3b2a7ba30d6b3", "0a00df600e766775593ebad226a10bd778f33ee3", "22ae02d81c21cb90b0de071550cfb99e6a623e62", "0d67362a5630ec3b7562327acc278c1c996454b5", "e038cc2d1321c6f4de19476e03eef712626b2f4f", "d8a358fb026fda39546cf8e3cbf9e5d754d63463", "a36cba3f779e6624d1130f026174b31b0e596bdd", "57e1eeb8e4f442f4433670b50167404c14566e97", "3bff03b7b0b0c4e8f6384dbb2a95e4338d156524", "a8555994c3a639187d03712e89d609d9a06af7af", "6d18ab89181c24385f6d1f0fcccd7d2e54ed400c", "4aba54ea82bf99ed4690d45051f1b25d8b9554b5", "11c0d36b008980eb03ef0802bba305c089726cac", "5247a6e3a60ff0381355e66bfc313bf27512ae0c", "605b846751324b45205a15086b531f78f7fcd0d2", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "2e0bed618d023cad81eae218e69afce8bef8e4d6", "8490234d79b47e459824dcf87c1e288211a3c964", "07f3f736d90125cb2b04e7408782af411c67dd5a", "2c0e1b5db1b6851d95a765a2264bb77f19ee04e1", "2a280a11143da9040801193040e0700f79b3bba0", "17f5c7411eeeeedf25b0db99a9130aa353aee4ba", "297e1b9c24fc6f5d481c52cdc2f305d45ecfddb2", "284d64a3426461c520baa03d61cbc957bcd02522", "12db83e66e50152e170d5009c425c925ad2e2c2a", "4afa6c2eb552ceef0e396fbfe449932492873034", "20690b7465e6fef5337f0c9be0a302d33b3c9b3a", "2cac08d4eddc03d8d38656e1e57a430afb37670b", "3e2529cf0a2b7bd744b3a820e0626b448f1e7836", "2ae5a5507253aa3cada113d41d35fada1e84555f", "0e65ba2fe449eae1f90d395682e7a35e12b8a259", "b023e40ef8e41d0217e9459266376359a47db5af", "6b3140f18682652791f81fc031943043b3fce48e", "03ff3f8f4d5a700fbe8f3a3e63a39523c29bb60f", "35d6117e582825dd3467c6106047eb50704e03e1" ], "paperAbstract": "Conversation systems are of growing importance since they enable an easy interaction interface between humans and computers: using natural languages. To build a conversation system with adequate intelligence is challenging, and requires abundant resources including an acquisition of big data and interdisciplinary techniques, such as information retrieval and natural language processing. Along with the prosperity of Web 2.0, the massive data available greatly facilitate data-driven methods such as deep learning for human-computer conversation systems. Owing to the diversity of Web resources, a retrieval-based conversation system will come up with at least some results from the immense repository for any user inputs. Given a human issued message, i.e., query, a traditional conversation system would provide a response after adequate training and learning of how to respond. In this paper, we propose a new task for conversation systems: joint learning of response ranking featured with next utterance suggestion. We assume that the new conversation mode is more proactive and keeps user engaging. We examine the assumption in experiments. Besides, to address the joint learning task, we propose a novel Dual-LSTM Chain Model to couple response ranking and next utterance suggestion simultaneously. From the experimental results, we demonstrate the usefulness of the proposed task and the effectiveness of the proposed model.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080843" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eb74d7539d25e415a12e1159a7d0300e505f2b73", "sources": [ "DBLP" ], "title": "Joint Learning of Response Ranking and Next Utterance Suggestion in Human-Computer Conversation System", "venue": "SIGIR", "year": 2017 }, "eb8f783643a4be0d62fafb5c6236eebcc4a54a9b": { "authors": [ { "ids": [ "1937142" ], "name": "Jos\u00e9 Bacelar Almeida" }, { "ids": [ "1722312" ], "name": "Manuel Barbosa" }, { "ids": [ "1737231" ], "name": "Gilles Barthe" }, { "ids": [ "1988409" ], "name": "Fran\u00e7ois Dupressoir" }, { "ids": [ "1959667" ], "name": "Benjamin Gr\u00e9goire" }, { "ids": [ "3085897" ], "name": "Vincent Laporte" }, { "ids": [ "3336582" ], "name": "Vitor Pereira" } ], "doi": "10.1145/3133956.3134017", "doiUrl": "https://doi.org/10.1145/3133956.3134017", "entities": [ "ACM Computing Classification System", "Boolean circuit", "CompCert", "Compiler", "Computation", "Cryptography", "Formal verification", "Mihir Bellare", "Oblivious transfer", "Open-source software", "Optimizing compiler", "Secure multi-party computation", "Secure two-party computation", "Shannon\u2013Fano\u2013Elias coding", "Solution stack", "Yao graph" ], "id": "eb8f783643a4be0d62fafb5c6236eebcc4a54a9b", "inCitations": [], "journalName": "IACR Cryptology ePrint Archive", "journalPages": "821", "journalVolume": "2017", "outCitations": [ "04948723dec0e6724777ee56f0d10168cce44921", "4cebeddc0451a62aff08e6058e4a74c91f1f3cfd", "5af56b18071e7adf0d5b9a118e05bc893aace8e3", "614f3b72660eed2ce7b62970fa73ba8eae4d278b", "5c07dadf28f3cfdd67ab60a12d3ea9860bcd8b24", "57774604456cffa77fcf57087bbede72a23994b6", "6363259226226fe4bdf54691d0d0a081fff54a3d", "1844578c5f75884baa4931d2987cab10d70bd304", "0130d3428065bf7830263fdce79cc0192113af4e", "b273f47f97fc3f1ed922c3effda9ab88c52a1680", "61a297247f899995789dc6e32bcf3972502374b8", "bce17caeb17b88d419dbd05a50cbe02e11746855", "3369e43abcb499eea4d208f2239df00551b8d2dd", "0a36a523494c3c966f0a6e716c7ef851fcda4762", "377a034e357ee6ad4a17c83c3c0742d9a62a7a1f", "27e9745fc94ccf6039dd1804cbb99760544fc59b", "31100ccd0867d6d5338612a62b2cde11be75f1b8", "8bc6ce36585a3432682e1f3e08f166a6526145a3", "ad0564d120af0e7471cd32d4c0438b8c25f33a0d", "362246709de205ec0ac5b34e07306839c38d5a3a", "bb63c68855d42c95623ed9362d0853ea1d4cc858", "33148623fc14ea5735e73dd716d030ab17118299", "5529c33b98ef6d597d424f4c5e5d8d5a23069765", "e6d0ac36f37643ab15875c3a5a830e9e51dbf08d", "5e9244286f575e3307dac938552095d3433f332d", "430ce88f430d22d131ca3f753dd576c61f7cced3", "0435dddec1c8b1011bd5e3ea07c45d1cdf0b520b", "1d9cf87fa6d6175a2c1543afff263113657765f6", "6eaa4a716222d28de4ae6c2e17f299035b26a7eb", "490b2ab76335de294498bff727c0a25314317c63", "f7960bd2acc758e6d20c908ead40f6c89e64195c", "0a941c96beeaa5a03616955854011cce8ad94812", "0e0427aedfed65c8dd688c094b181feacf4eaab4", "5e8f9542c7ac0ef5a261e938fdf2b1c2423bd46c", "3dd7d900c870c7d71e0459d14db96725a43dc31a", "0e982be63c47a340bf3749401160ea29b9f8d10f", "9feb4b268fea8a7f9513dcc9db475f5ee9c7dfde", "094225bc5af1da42eb6cdc446489bdbeaf3edd3c", "0658394f2f6d0a4fcacdc92a33ce68c73bd4ebf3", "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "1db2265e3ce510fee6d4d9b39c135bddb4040949", "19a81231a5957cfd32edf1fd869c9b72b1c5e2e2", "f71db4d70d4cc9e931a63dde7a6db8dad10a61a0", "db0d1f3ed4a418e126d0281d5b3aadd1fd45c982", "09e73a08ee516df2d69ae6a6126bb05ff58e2042", "e151328269c95566227ca0c15d54e987ec397111", "1d081dbf3e9afebafac90fdeed4bfa788012142f", "15509fdb7fc7bf065fcdf776b38cf3d72d10c113", "3ff4a7bcfa42348102cd49f6bf33c8ca85c94472", "57adf20f0fa575a43609937c8f1a695a444a0ae0", "22144483d329aaaf82bc4380c8317aa3ac84234e", "e50ae4d480d84c7cbdb8edcebf13e57f5a47c8ad", "58be7ebd123827a0263b5a9e3a27e605edd92ec3" ], "paperAbstract": "We present a high-assurance software stack for secure function evaluation (SFE). Our stack consists of three components: i. a verified compiler (CircGen) that translates C programs into Boolean circuits; ii. a verified implementation of Yao's SFE protocol based on garbled circuits and oblivious transfer; and iii. transparent application integration and communications via FRESCO, an open-source framework for secure multiparty computation (MPC). CircGen is a general purpose tool that builds on CompCert, a verified optimizing compiler for C. It can be used in arbitrary Boolean circuit-based cryptography deployments. The security of our SFE protocol implementation is formally verified using EasyCrypt, a tool-assisted framework for building high-confidence cryptographic proofs, and it leverages a new formalization of garbled circuits based on the framework of Bellare, Hoang, and Rogaway (CCS 2012). We conduct a practical evaluation of our approach, and conclude that it is competitive with state-of-the-art (unverified) approaches. Our work provides concrete evidence of the feasibility of building efficient, verified, implementations of higher-level cryptographic systems. All our development is publicly available.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134017", "https://eprint.iacr.org/2017/821.pdf", "http://eprint.iacr.org/2017/821" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eb8f783643a4be0d62fafb5c6236eebcc4a54a9b", "sources": [ "DBLP" ], "title": "A Fast and Verified Software Stack for Secure Function Evaluation", "venue": "IACR Cryptology ePrint Archive", "year": 2017 }, "ebc52e776b09cf02b063f212a765a0952dc0eff1": { "authors": [ { "ids": [ "32491666" ], "name": "Ronald Barber" }, { "ids": [ "2339810" ], "name": "Christian Garcia-Arellano" }, { "ids": [ "3403589" ], "name": "Ronen Grosman" }, { "ids": [ "39168144" ], "name": "Ren\u00e9 M\u00fcller" }, { "ids": [ "1731987" ], "name": "Vijayshankar Raman" }, { "ids": [ "1870587" ], "name": "Richard Sidle" }, { "ids": [ "7587477" ], "name": "Matt Spilchen" }, { "ids": [ "3047270" ], "name": "Adam J. Storm" }, { "ids": [ "1968180" ], "name": "Yuanyuan Tian" }, { "ids": [ "1843945" ], "name": "Pinar T\u00f6z\u00fcn" }, { "ids": [ "2460866" ], "name": "Daniel C. Zilio" }, { "ids": [ "3416113" ], "name": "Matt Huras" }, { "ids": [ "1793770" ], "name": "Guy M. Lohman" }, { "ids": [ "39207992" ], "name": "Chandrasekaran Mohan" }, { "ids": [ "3023309" ], "name": "Fatma \u00d6zcan" }, { "ids": [ "2886859" ], "name": "Hamid Pirahesh" } ], "doi": "", "doiUrl": "", "entities": [ "Big data", "Database", "Ecosystem", "IT risk management", "Mobile app" ], "id": "ebc52e776b09cf02b063f212a765a0952dc0eff1", "inCitations": [ "5977a741cbc79c9b72a9587d40732bf2d64ff376", "e0f8f8bfbac97dec0fd609fcbfdbb83694932fee" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "3a134bc11a5805bcf45fdcb88a91321a1b1b63c3", "cf1c70afbd942ff34595052c3438dc3f50a90167", "3492873a8bc6d1d501dcac97e891c43dfecc29c0", "307a7841caf402a6e86cd652be02439b85577a56", "07d847f310d5fa9138f461f0a25c5e0024f1c4af", "afda6470dd16dc0a865dbb6fc291e5806132379b", "313e8120c31fda6877ea426d8a3be9bcf1b6e088", "0235fb69431fa5892333eb48a06ede07df6ff4f6", "080ed793c12d97436ae29851b5e34c54c07e3816", "0cefe55f602bfaa4b2484a36360b28ce6896783b", "18a5f443299784479e78d9e77f175af57cb2fa2b", "e75c5d1b7ecd71cd9f1fdc3d07f56290517ef1e5", "4c7bfa933c11c7a802c2fa9c1dc475dba36a2bd5", "7a75c886b043e7c3f77829412774de27648f384a", "0cca5e5265f8911be227ca2faeb510066555bede", "9ce1f58ade8612656ff9278a7785f2256fb8749a" ], "paperAbstract": "The rising popularity of large-scale real-time analytics applications (real-time inventory/pricing, mobile apps that give you suggestions, fraud detection, risk analysis, etc.) emphasize the need for distributed data management systems that can handle fast transactions and analytics concurrently. Efficient processing of transactional and analytical requests, however, require different optimizations and architectural decisions in a system. This paper presents the Wildfire system, which targets Hybrid Transactional and Analytical Processing (HTAP). Wildfire leverages the Spark ecosystem to enable large-scale data processing with different types of complex analytical requests, and columnar data processing to enable fast transactions and analytics concurrently.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p123-barber-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/ebc5/2e776b09cf02b063f212a765a0952dc0eff1.pdf", "s2Url": "https://semanticscholar.org/paper/ebc52e776b09cf02b063f212a765a0952dc0eff1", "sources": [ "DBLP" ], "title": "Evolving Databases for New-Gen Big Data Applications", "venue": "CIDR", "year": 2017 }, "ebeacabf000952146ca88767a8e7e26ce627b488": { "authors": [ { "ids": [ "2974390" ], "name": "Ryuichi Sakamoto" }, { "ids": [ "12898292" ], "name": "Thang Cao" }, { "ids": [ "1683736" ], "name": "Masaaki Kondo" }, { "ids": [ "1779541" ], "name": "Koji Inoue" }, { "ids": [ "2885885" ], "name": "Masatsugu Ueda" }, { "ids": [ "2613767" ], "name": "Tapasya Patki" }, { "ids": [ "2519830" ], "name": "Daniel A. Ellsworth" }, { "ids": [ "2173458" ], "name": "Barry Rountree" }, { "ids": [ "1772965" ], "name": "Martin Schulz" } ], "doi": "10.1109/IPDPS.2017.107", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.107", "entities": [ "Computer hardware", "Network planning and design", "Power management", "Production system (computer science)", "Simulation", "Slurm", "Software deployment", "Solution stack", "Supercomputer" ], "id": "ebeacabf000952146ca88767a8e7e26ce627b488", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "957-966", "journalVolume": "", "outCitations": [ "77f826132cf09ac91ea9c859387a8d52221a019a", "14bd3627a85b658ea1b8450039df7fe0fb57379e", "5341bdf934b3a99af6685b5564af8e03d1b780a7", "32d5b17b6a2b3a4055d6e2738dbb0297cdb59402", "efee61acb1847de685817b7d9bc1b6b095ef5026", "8303554a48d900acf0a432fe06e48d48c5962601", "bea77fd8eedb63f239dc01b907e717d2f43d1709", "449359f90c1052a1247742627fc8996e2a5244e7", "02d3d91f16330740cfb104f61f9aaf5a5dd6a69e", "1e8233a8c8271c3278f1b84bed368145c0034a35", "f103c1775462f4409ae15818cfa0a761e282d324", "fd6773cf5baf9a5a2116d6eb9375da9c6526b7b3", "68eec7c5cd770a7d0af62f6856263bc675998fb0", "f6ab527a5919b48b66908954a3086947c5bffde6", "9efa7f12bfd9d8ed38c29c5e128b21b07a438cd9", "1585eaffcf9c9836eb1607e279e43ce2793e59a0", "9205bdd3000dbb67650d8402b65144df137eada6", "02475251d8e5a3102986edd2e6802136590b4a93", "0c940ccba1bd9380a0ac723d791777fc1746a060", "20d6a8a39ebecd21bc8a4df53f248356d38ea6d9", "0de303916d97c05b6d98daad5901e518dbbe7a6d", "b94d6bb4506dbb02244467f989b8aa1f06389988", "494c4c60ab265415d29fd378583e1e295f20bcfe", "22928113b4f63c326811baf36eea8392edddbb79", "59b709fe6377d2332fe396bf25e9b65a10c1062c", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "1236e7c6969275eaabd23c7764f4568b1ee58705", "d2ea4e2eeac2ce5a8345ae92a54dacce231263d6", "138e5c3e264eb9cd36c1aea66e78d141ee4f04c9" ], "paperAbstract": "Limited power budgets will be one of the biggest challenges for deploying future exascale supercomputers. One of the promising ways to deal with this challenge is hardware overprovisioning, that is, installingmore hardware resources than can be fully powered under a given power limit coupled with software mechanisms to steer the limited power to where it is needed most. Prior research has demonstrated the viability of this approach, but could only rely on small-scale simulations of the software stack. While such research is useful to understand the boundaries of performance benefits that can be achieved, it does not cover any deployment or operational concerns of using overprovisioning on production systems. This paper is the first to present an extensible power-aware resource management framework for production-sized overprovisioned systems based on the widely established SLURM resource manager. Our framework provides flexible plugin interfaces and APIs for power management that can be easily extended to implement site-specific strategies and for comparison of different power management techniques. We demonstrate our framework on a 965-node HA8000 production system at Kyushu University. Our results indicate that it is indeed possible to safely overprovision hardware in production. We also find that the power consumption of idle nodes, which depends on the degree of overprovisioning, can become a bottleneck. Using real-world data, we then draw conclusions about the impact of the total number of nodes provided in an overprovisioned environment.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.107" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ebeacabf000952146ca88767a8e7e26ce627b488", "sources": [ "DBLP" ], "title": "Production Hardware Overprovisioning: Real-World Performance Optimization Using an Extensible Power-Aware Resource Management Framework", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "ebf7cbcc225730f1d3314871f35dd6956d19e631": { "authors": [ { "ids": [ "30619696" ], "name": "Mostafa Mahmoud" }, { "ids": [ "33125356" ], "name": "Bojian Zheng" }, { "ids": [ "19217864" ], "name": "Alberto Delmas Lascorz" }, { "ids": [ "4407595" ], "name": "Felix Heide" }, { "ids": [ "2961832" ], "name": "Jonathan Assouline" }, { "ids": [ "3290596" ], "name": "Paul Boucher" }, { "ids": [ "3243429" ], "name": "Emmanuel Onzon" }, { "ids": [ "1782536" ], "name": "Andreas Moshovos" } ], "doi": "10.1145/3123939.3123941", "doiUrl": "https://doi.org/10.1145/3123939.3123941", "entities": [ "Algorithm", "Approximation", "Artificial neural network", "CPU cache", "Central processing unit", "Computation", "Graphics processing unit", "Noise reduction", "Pipeline (computing)", "Pixel", "Program optimization", "Sensor", "User interface" ], "id": "ebf7cbcc225730f1d3314871f35dd6956d19e631", "inCitations": [ "7b9339d3b359310ddbaf6caae13d3a65f657bf04" ], "journalName": "", "journalPages": "82-95", "journalVolume": "", "outCitations": [ "a95d74f73715e8c1d215fabbaa927b89226591e3", "3cb3697fe004a5e073dd859d872c1a4e1c897cb4", "cc53a721320b202503050afe38623643f0784f99", "e792ccdd678fe367b7acd09b54e65e5ffc9cde3c", "b0ec95c2c4ce77e6912942f288253583985240a4", "519eac14412a6e427352909808b49bfcc93b02a9", "15bd2bd505c15ae73e86baa69dedb3b7e1eb89d9", "2202807c6aa0a08d045db4e2ac81c2923e38ebbf", "1abb835694f93afe6335aa7a5fd6effe075b99d5", "9d5636f41c2831b36e7c851a4a700f3a89361673", "bd7e7b5aa3a5120f5db50037703008af3b120611", "538282dfc40850df2fbfa7a6bbe8c274751b32f1", "3c7f8a98b25bc9209e86f4e00783b8947c388abd", "68795a8105dcd1171eb83488b22b324433fcf5e8", "bd7c9bef5036121490dbd1b8531c3453bb555bc0", "3538739741d7dd27788ef35d9f0900adb070d6f8", "1dec63e2a929bb3be57906bfef94f38e969cfbd9", "306ddd8b7ea3ead125491efc3e8a9f738ce65b89", "4bbeb2fe2bea09a8bae7ffb1003be6cbcfb60dc9", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "393ddb615ffcf38e8a172d7c583fbeeb7cf4ac5c", "323f7b288a676872bc1945d1c4a01041f5bd03b5", "2be10b2d74390eb0eae13d95507a987f7c242175", "b82251422af37820036ad080b3adc0ebcff1cb26", "3ff937a69ade3c779cbe7244c4382eb5db13d4f3", "ca71db3905d3fb2d970bcdaaa79993058560f9f7", "1967c0a6f2ad61efce0ded82dff3faec253f2814", "85791491919e1f740f0e882366046acbe56fb14c", "3b237b9eb7ea4d944a82cf507a8aa9b5f743667e", "17810349765c08963af130efe28b6a6b77b7ec51", "a6574d111bfb12d6a9988bdbbf24639d3c4534ec", "1294cb51cc2e4d2745c286109e391290c2914bf7", "352a8957005dc5519b15ed1870751ec494d66395", "6dba6e15051ecc42997be8eb6dbc8dc5ad337085", "ba2057122eabf24d5453772e9b5c5f7bf16e4333", "1356609dc9a44be5b4c635b55b7c14e48132c00b", "baefec1536c41b2943b8b4bdae9f1af87fa26f68", "4fd69173cabb3d4377432d70488938ac533a5ac3", "ea6a35af49ca7564e8c6b2904e9cea0d45deee3f", "62124e3cb35d9d34159d2d4c673c0f7d04cfa533", "00ab25c6582d543932fccbb0f15fe93445f95d61" ], "paperAbstract": "Computational imaging pipelines (CIPs) convert the raw output of imaging sensors into the high-quality images that are used for further processing. This work studies how Block-Matching and 3D filtering (BM3D), a state-of-the-art denoising algorithm can be implemented to meet the demands of user-interactive (UI) applications. Denoising is the most computationally demanding stage of a CIP taking more than 95% of time on a highly-optimized software implementation [29]. We analyze the performance and energy consumption of optimized software implementations on three commodity platforms and find that their performance is inadequate.\n Accordingly, we consider two alternatives: a dedicated accelerator, and running recently proposed Neural Network (NN) based approximations of BM3D [9, 27] on an NN accelerator. We develop Image DEnoising AcceLerator(IDEAL), a hardware BM3D accelerator which incorporates the following techniques: 1) a novel software-hardware optimization, Matches Reuse (MR), that exploits typical image content to reduce the computations needed by BM3D, 2) prefetching and judicious use of on-chip buffering to minimize execution stalls and off-chip bandwidth consumption, 3) a careful arrangement of specialized computing blocks, and 4) data type precision tuning. Over a dataset of images with resolutions ranging from 8 megapixel (MP) and up to 42MP, IDEAL is 11, 352× and 591× faster than high-end general-purpose (CPU) and graphics processor (GPU) software implementations with orders of magnitude better energy efficiency. Even when the NN approximations of BM3D are run on the DaDianNao [14] high-end hardware NN accelerator, IDEAL is 5.4× faster and 3.95× more energy efficient.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123941" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ebf7cbcc225730f1d3314871f35dd6956d19e631", "sources": [ "DBLP" ], "title": "IDEAL: image denoising accelerator", "venue": "MICRO", "year": 2017 }, "ebfb98b07e4d153ccedee664d4f3dcaa0186ea16": { "authors": [ { "ids": [ "2216121" ], "name": "Sumin Hong" }, { "ids": [ "2680629" ], "name": "Woohyuk Choi" }, { "ids": [ "1718114" ], "name": "Won-Ki Jeong" } ], "doi": "", "doiUrl": "", "entities": [ "Batch processing", "Big data", "Cloud computing", "Computation", "Computational science", "Data synchronization", "Embarrassingly parallel", "Graphics processing unit", "In-memory database", "In-memory processing", "Iteration", "Java virtual machine", "Lazy evaluation", "Machine learning", "MapReduce", "Memory hierarchy", "Memory-mapped I/O", "Message Passing Interface", "Message passing", "Numerical analysis", "Scalability", "Virtual machine" ], "id": "ebfb98b07e4d153ccedee664d4f3dcaa0186ea16", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "31-41", "journalVolume": "", "outCitations": [ "50b343dbec4c5ad3933c71bfe9f63b63db02636e", "c83d1121cb66782f4884eec6a25b2ee9dab06594", "0541d5338adc48276b3b8cd3a141d799e2d40150", "249f429bf5166a98f9e405f7188aad55a65204f9", "0558c94a094158ecd64f0d5014d3d9668054fb97", "097ca69fda44a3499771bb2ece41ab5fb561cc6c", "d57b42821ec782b33dd49ee0c37976bbd62d24a4", "216d95be596d5343a27b9e1234373637c992f750", "1d9f32d54a41d0316968d5f54fbf22c2a52bc78d", "45f119a7334f482513f6d71f3fffa4e9e239622c", "3e2783b2460d2089b4f9367df4161b5314e298c2", "1b385a54f2b18a8776ebacf9b5cf2fd000dfad89", "6c72445d0ffbbc62725b49f1e970c6f083ce4a03", "17439625dcfe137b1f1457d167747134b4a1d39e", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "24679ccb0586642553a21e9fcd8aa5a57f97cabe" ], "paperAbstract": "Due to its simplicity and scalability, MapReduce has become a de facto standard computing model for big data processing. Since the original MapReduce model was only appropriate for embarrassingly parallel batch processing, many follow-up studies have focused on improving the efficiency and performance of the model. Spark follows one of these recent trends by providing in-memory processing capability to reduce slow disk I/O for iterative computing tasks. However, the acceleration of Spark's in-memory processing using graphics processing units (GPUs) is challenging due to its deep memory hierarchy and host-to-GPU communication overhead. In this paper, we introduce a novel GPU-accelerated MapReduce framework that extends Spark's in-memory processing so that iterative computing is performed only in the GPU memory. Having discovered that the main bottleneck in the current Spark system for GPU computing is data communication on a Java virtual machine, we propose a modification of the current Spark implementation to bypass expensive data management for iterative task offloading to GPUs. We also propose a novel GPU in-memory processing and caching framework that minimizes host-to-GPU communication via lazy evaluation and reuses GPU memory over multiple mapper executions. The proposed system employs message-passing interface (MPI)-based data synchronization for inter-worker communication so that more complicated iterative computing tasks, such as iterative numerical solvers, can be efficiently handled. We demonstrate the performance of our system in terms of several iterative computing tasks in big data processing applications, including machine learning and scientific computing. We achieved up to 50 times speed up over conventional Spark and about 10 times speed up over GPU-accelerated Spark.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101117" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ebfb98b07e4d153ccedee664d4f3dcaa0186ea16", "sources": [ "DBLP" ], "title": "GPU in-Memory Processing Using Spark for Iterative Computation", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "ec18c6de077350ac497a5f8c80b5e163c9c82baf": { "authors": [ { "ids": [ "2817929" ], "name": "David Rohr" }, { "ids": [ "2460976" ], "name": "Volker Lindenstruth" } ], "doi": "10.1109/CLUSTER.2017.101", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.101", "entities": [ "Advanced Vector Extensions", "Algorithm", "Central processing unit", "Compiler", "Computation", "Data center", "Data store", "Erasure code", "Field-programmable gate array", "Gigabyte", "Graphics processing unit", "Just-in-time compilation", "Open-source software", "Reed\u2013Solomon error correction", "SIMD", "Skylake (microarchitecture)", "Streaming SIMD Extensions", "Supercomputer", "Throughput", "X86" ], "id": "ec18c6de077350ac497a5f8c80b5e163c9c82baf", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "451-463", "journalVolume": "", "outCitations": [ "34d7b001f1eebfed4fb2a515176070f468c54e7e", "3794583de685ac4c3dd654abd972920d98094ce9", "0ef441a673afad129a13094f6f89d4964daa8eb8", "1cc6d4ae705dd47ec409e01e349d4a46b722ee81", "eb4f23afcc86609d9fc5fe90000d9db44cb3e575", "03e9c5c4e1ae4fa725c1b10035741be4e65aac33", "73a98b30d957ebeb4e156d4d5c876772bcadf72e", "8c3aa70d98b7795fa318eb33abec52bd02b621f2", "a6065d3b4e7e41a643d053b64156a9daf5b89844", "2f984d013a6c6ceac472cf0be7aba67a63980e55", "128f7fcf5f1956f681912910ce428d0731233fa7", "c2f4ccc7feb6bd3928d14f2352d156d391eb0111", "a57b27156a0bbe21b124da055b3226c649b4e227", "8eaa45df0a85bf7fda455cf7f1699cdfe0de1288", "d12aee72185bdce13e3c49e0e44e10105331f4ff", "77f651d37c1d1fa7c69c8966680aec180e8f48dc" ], "paperAbstract": "Failure tolerant data encoding and storage is of paramount importance for data centers, supercomputers, data transfers, and many aspects of information technology. Reed-Solomon failure erasure codes and their variants are the basis for many applications in this field. Efficient implementation of these codes is challenging because they require computations in Galois fields, which are not supported by processors natively. Improved variants such as the Cauchy-Reed-Solomon code enable a better mapping of the required calculations to computer instructions. However, this works best when the source code of the application is tuned for fixed encoding parameters which deteriorates the flexibility. We present an approach to overcoming these limitations by just in time compiling optimized code for arbitrary encoding settings. Our open source library is optimized for x86 processors using SSE and AVX extensions and we present prototypes for GPUs and FPGAs as well. For a significant range of encoding parameters, our implementation encodes at the maximum bandwidth the processor can read the data from memory. In more complicated settings with additional redundancy data to compensate the failure of multiple data stores, the algorithm becomes compute limited. The optimized JIT code leverages the full potential of modern processors reaching an instruction throughput of more than three SIMD-instructions per compute cycle, and encodes up to 19 gigabytes of data per second on a Skylake system.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.101" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ec18c6de077350ac497a5f8c80b5e163c9c82baf", "sources": [ "DBLP" ], "title": "Fast Failure Erasure Encoding Using Just in Time Compilation for CPUs, GPUs, and FPGAs", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "ec61db6ae3b766ad295e187a96b9bbbcfaa40aa6": { "authors": [ { "ids": [ "34007461" ], "name": "Shigeru Iwase" }, { "ids": [ "1846210" ], "name": "Yasunori Futamura" }, { "ids": [ "2562770" ], "name": "Akira Imakura" }, { "ids": [ "34330490" ], "name": "Tetsuya Sakurai" }, { "ids": [ "6120570" ], "name": "Tomoya Ono" } ], "doi": "10.1145/3126908.3126942", "doiUrl": "https://doi.org/10.1145/3126908.3126942", "entities": [ "Domain decomposition methods", "Doping (semiconductor)", "Electronic band structure", "Kohn\u2013Sham equations", "Parallel computing", "Scalability" ], "id": "ec61db6ae3b766ad295e187a96b9bbbcfaa40aa6", "inCitations": [], "journalName": "", "journalPages": "40:1-40:12", "journalVolume": "", "outCitations": [ "cdc2d98b3e27ef96fbbb87bc5fb738f7522bf133", "55bf2114527eba008854354dd1fe886658a6c431", "4ad0595b7af6c4041bfd89ccf74d94f154a39762", "fa8a77f2cb8005f1b88b39d0d96f8a015dbc83cb", "8fcea6fd84af96c352b0223671eaee2082eb5648", "c0a2344bb2510d9f830671b6cf2782ec46c6f631", "50f5eda8d2076b21e63e96fb5999a1e52563e4db", "0481d2e82ecd5088f8773ecc9cf7b70b8944ee45", "01378887ff0f411c11187e0e901ed0eb78021860", "04a16685df6b4e743a1dd7a99efaac346f5bb3c6", "ec088915b21ca30f6866012c7d1187623e62fed3", "1b2704adec06dbfbb7e3b066ee5934424c53d771", "fe4b746b04654c5629cf6f767de0f74a2af191a0", "9d4c54a605dc0a5e926f05ef68698ee15ad0f616", "e26294743d44f5c23f9874fbb1a540cf385fd121", "26618871e93dfc563600329a8439d61826aab73f", "02309174c2982652a06cf5871400cf3ad49d3648", "21ec0d93cd287940d263e890aaa7480dac5e0a27", "c4bde15f96fba8c20117695dc96062b2271795e5", "34e62164b943fdd790457fd83b84ca85b7808d09", "f9e053a8c3625175080b5b63a9cccbb014b37226", "a41b809cf9a1936dce2dae48c4341f5e9078f8d7", "d51fdba81ff58c83083a06abcf09f4f090f47168", "868f0686e413c9be72f1b18c04cad7a3c2332a3a" ], "paperAbstract": "Complex band structures (CBSs) are useful to characterize the static and dynamical electronic properties of materials. Despite the intensive developments, the first-principles calculation of CBS for over several hundred atoms are still computationally demanding. We here propose an efficient and scalable computational method to calculate CBSs. The basic idea is to express the Kohn-Sham equation of the real-space grid scheme as a quadratic eigenvalue problem and compute only the solutions which are necessary to construct the CBS by Sakurai-Sugiura method. The serial performance of the proposed method shows a significant advantage in both run-time and memory usage compared to the conventional method. Furthermore, owing to the hierarchical parallelism in Sakurai-Sugiura method and the domain-decomposition technique for real-space grids, we can achieve an excellent scalability in the CBS calculation of a boron and nitrogen doped carbon nanotube consisting of more than 10,000 atoms using 2,048 nodes (139,264 cores) of Oakforest-PACS.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126942" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ec61db6ae3b766ad295e187a96b9bbbcfaa40aa6", "sources": [ "DBLP" ], "title": "Efficient and scalable calculation of complex band structure using Sakurai-Sugiura method", "venue": "SC", "year": 2017 }, "ec94b1a6fbda7cc1e9cd7b0c426da43879130b1f": { "authors": [ { "ids": [ "39768278" ], "name": "David Yu Cheng Chan" }, { "ids": [ "2387538" ], "name": "Vassos Hadzilacos" }, { "ids": [ "1729358" ], "name": "Sam Toueg" } ], "doi": "10.1145/3087801.3087822", "doiUrl": "https://doi.org/10.1145/3087801.3087822", "entities": [], "id": "ec94b1a6fbda7cc1e9cd7b0c426da43879130b1f", "inCitations": [], "journalName": "", "journalPages": "345-354", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087822" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ec94b1a6fbda7cc1e9cd7b0c426da43879130b1f", "sources": [ "DBLP" ], "title": "Life Beyond Set Agreement", "venue": "PODC", "year": 2017 }, "ec997fd7517daa7d68a6ef66e758c2dd8cee077b": { "authors": [ { "ids": [ "40448030" ], "name": "Scott Levy" }, { "ids": [ "1734561" ], "name": "Kurt B. Ferreira" }, { "ids": [ "23308884" ], "name": "Patrick G. Bridges" } ], "doi": "10.1109/CLUSTER.2017.99", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.99", "entities": [ "Backup", "Memory protection", "Smart Data Compression" ], "id": "ec997fd7517daa7d68a6ef66e758c2dd8cee077b", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "603-607", "journalVolume": "", "outCitations": [ "741a04ef3a0c3953a3d37726bf4d6170eaa68a55", "37e95e584af6ce7958d8cf3d5f96dcab0f595cba", "16abd837f7fdaa4215ff69852963fef25c0a1bad", "64d60b5ea0cb696837fc6001563d070f98f3da4b", "19d686007a37f599b850bfbca391a5d7d869def8", "0f1cb72117c29da9d1840311dee349fd88c52342", "b23f060a4574ff126e98b8fe13f8b508b9f82c1f", "4ee0564e83d0252c461087f7fd5963a01716e142", "3e99a917b9a4e89497541bbc3bb72079054644c6", "6b8b4763d1aea3f9d083f364c841737daab8db67", "694d06bb3ff03fb6ff42b7891a42f8d4f3f37f34", "14e5bbf94dba58ead368cceab1541cff7cbb0170", "dd286cdefbca8f6e435298f058ca413d131f53b0", "a19563b4014919c405964cea5271bebe918ad265", "108c840d5d1847948a2de0250490a327ae069ee6", "747ad718761b7d848a12e4f3a82aa0f46117a815", "18fe996c6f43a8f301cd842507045b679ba3506a", "34f310dffd51a8f1585b0a6a5ccaf83094d0d663", "270c88be02c3c996b652b5410a49f63a2abd7687", "b39b8b5be74498b90ae59297a6883e3fd57b1eb8", "455d253c61379bce5626fba8ef9897d3ac1307dc", "36480300b1e382c062b78c6bd610d1879efd950e" ], "paperAbstract": "Aggregating millions of hardware components to construct an exascale computing platform will pose significant resilience challenges. In addition to slowdowns associated with detected errors, silent errors are likely to further degrade application performance. Moreover, silent data corruption (SDC) has the potential to undermine the integrity of the results produced by important scientific applications.In this paper, we propose an application-independent mechanism to efficiently detect and correct SDC in read-mostly memory, where SDC may be most likely to occur. We use memory protection mechanisms to maintain compressed backups of application memory. We detect SDC by identifying changes in memory contents that occur without explicit write operations. We demonstrate that, for several applications, our approach can potentially protect a significant fraction of application memory pages from SDC with modest overheads. Moreover, our proposed technique can be straightforwardly combined with many other approaches to provide a significant bulwark against SDC.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.99" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ec997fd7517daa7d68a6ef66e758c2dd8cee077b", "sources": [ "DBLP" ], "title": "Evaluating the Viability of Using Compression to Mitigate Silent Corruption of Read-Mostly Application Data", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "ecee9dcb15647eb10eae4edda679112e251518d9": { "authors": [ { "ids": [ "8379563" ], "name": "Md Abdullah Shahneous Bari" }, { "ids": [ "31566659" ], "name": "Abid Muslim Malik" }, { "ids": [ "2705989" ], "name": "Ahmad Qawasmeh" }, { "ids": [ "1718452" ], "name": "Barbara M. Chapman" } ], "doi": "10.1109/IGCC.2017.8323575", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323575", "entities": [ "Allocation", "Anatomic Node", "Benchmark (computing)", "OpenMP", "Parallel computing", "Program optimization", "Programming model", "Run time (program lifecycle phase)", "Runtime system" ], "id": "ecee9dcb15647eb10eae4edda679112e251518d9", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "448ec64370ec1dde86eabdb28c2c298c21cef5b8", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "1e8233a8c8271c3278f1b84bed368145c0034a35", "1678c9ac3fc0b13fac8c95cb26a376ec2064310e", "1108af609469e420aeae551ba8a893c3200e07fa", "008aa3bf67964c6058df797e8cd64ae909f53984", "ba3f47455daea57ecc1f69491e1145384e059f12", "379df30439522eb6671a57c5e3d084c9d1be99a8", "025b0273eb6ccd57e6a949fe44225ca5d8041cf9", "043b307af412fc7f9005822e6dabbe4f9d983472", "7bdd1a1aeb4be253eb0226e78c2ca9930ef281e6", "027dd3662dacd3bf58162133b3b5bca9ce4c682b", "7e757fff66a63b268da83ffccf464437492ac8b6", "178d742ee02d6909f8e0018253c89f1d71ff4617", "efee61acb1847de685817b7d9bc1b6b095ef5026", "b0f9ea06d726935289456a75f739544826bb5c0d", "1585eaffcf9c9836eb1607e279e43ce2793e59a0" ], "paperAbstract": "Application level power budget allocation is one way to overcome the power constraint problem in future HPC systems. This technique mainly depends on finding an optimal number of compute nodes and power level for each node. However, utilizing that power at node level requires optimization of the underlying programming model. OpenMP is the defacto standard for intra-node parallelism. In this paper, we investigate the impact of OpenMP runtime environment on the performance of OpenMP code at the different power level. We studied 28 OpenMP parallel regions from five NAS Parallel Benchmark (NPB) applications. Based on the study we show that for a given power level, a suitable selection of OpenMP runtime parameters can improve the execution time and energy consumption of a parallel region up to 67% and 72%, respectively. We also show that these fine grain improvements resulted in upto 26% execution time and 38% energy consumption improvement for a given OpenMP application.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323575" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ecee9dcb15647eb10eae4edda679112e251518d9", "sources": [ "DBLP" ], "title": "A detailed analysis of OpenMP runtime configurations for power constrained systems", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "ed3f6dc10ae8bcbf1ed6e673a746344c141c7db5": { "authors": [ { "ids": [ "1680280" ], "name": "Li Li" }, { "ids": [ "8939083" ], "name": "Yunhao Bai" }, { "ids": [ "1690476" ], "name": "Xiaorui Wang" }, { "ids": [ "2441395" ], "name": "Mai Zheng" }, { "ids": [ "37110735" ], "name": "Feng Qin" } ], "doi": "10.1109/IGCC.2017.8323571", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323571", "entities": [ "Android", "Application checkpointing", "Aspartate Transaminase", "Booting", "Computer", "Computers", "Constrained optimization", "Constraint (mathematics)", "Digital footprint", "Laptop", "Mobile app", "Optimization problem", "Program optimization", "Shutdown (computing)", "Smartphone", "Smartphone" ], "id": "ed3f6dc10ae8bcbf1ed6e673a746344c141c7db5", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-8", "journalVolume": "", "outCitations": [ "2dbd0bca3fb1a57f441f1867ac0fa7dfc245ae66", "f16841e022038e94a59f7e0a82002102b78d79a4", "02d4b6a359a3c3216a4b0af0e3c4797a0601a322", "17c719e6c317efe986a3f34e1f980110baa5a0fc", "35963f0115d7a9b01d453d4ea33d42d9c26313d6", "0a49c1c4ec75e34071520abde7f15e3f7d4420ca", "7d19eb2938a538325300a5967ef0c5efe4141950", "0d3b2da1713f8100638ed6ea9202c33c90445fe9", "515424e9c26ed2f6150a907e9c9db085958e7350", "2825733f97124013e8841b3f8a0f5bd4ee4af88a", "5892b9314971e90e32d8bf81ca4e7dcbecb5ef8f", "932ed9910569b5ca9f6507d2536ef1608f92cff2", "c49b212dbe9a58e36ce21c0fe13c8d65ad7a2fdb", "5aa0d728699eb2dde17cd8355c2b704f196a0c43" ], "paperAbstract": "Unintended smartphone rebooting and unexpected shutdown (due to reasons like battery run outs, overheating, or automatic app upgrades) is annoying. What can be even worse is that a phone user has to restart, from the very beginning, the apps he or she was using when the phone got rebooted, because all the app states would be lost, especially when the number of apps in use is large. Hence, a recovery service is sorely needed for today's smartphones where apps are becoming increasingly complex. While checkpointing has long been used for desktop and laptop computers, such whole-system state preserving techniques cannot be applied to smartphones directly, due to the constraints of smartphones on energy, delay, and storage space. In this paper, we propose SmartCP, an intelligent checkpointing methodology, in order to reduce the energy required by a smartphone and the amount of efforts required by a user to recover the app states after the smartphone restarts. SmartCP selectively checkpoints the most important apps based on the apps' characteristics and predicted future usage, under the resource constraints of the phone. We propose a novel model that quantitatively analyzes the recovery energy and efforts of each category of smartphone apps and formulate selective checkpointing as a constrained optimization problem. We prototype SmartCP on Android and evaluate it using real-world traces as well as real user feedback. The results show that SmartCP outperforms two alternative app selection schemes by saving 28% more energy and 39% more recovery efforts on average.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323571" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ed3f6dc10ae8bcbf1ed6e673a746344c141c7db5", "sources": [ "DBLP" ], "title": "Selective checkpointing for minimizing recovery energy and efforts of smartphone apps", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "ed4a5d1681cc7a4175287fdc3494723b0367b7ef": { "authors": [ { "ids": [ "3171961" ], "name": "Kuat Yessenov" }, { "ids": [ "2514071" ], "name": "Ivan Kuraj" }, { "ids": [ "1745989" ], "name": "Armando Solar-Lezama" } ], "doi": "10.1145/3062341.3062386", "doiUrl": "https://doi.org/10.1145/3062341.3062386", "entities": [ "Application programming interface", "Eclipse", "Java", "Software framework", "Swing (Java)" ], "id": "ed4a5d1681cc7a4175287fdc3494723b0367b7ef", "inCitations": [], "journalName": "", "journalPages": "64-78", "journalVolume": "", "outCitations": [ "3046e5d89e390b44e05b90588ffb22cdde41d314", "72338dfa9f2f54bffce94055876beab16439202f", "16423eec2a7266111a16bc9a5df5d2b1c84ea9f0", "75b8c0abfd45fd77d7a61da7d12bdf516e3139c7", "206a64c2ab6114e4c6db7a73b2e0b548e4df11c4", "4467f59e594c9d0c2919bdac44388633bf8ba353", "382e55821fbc7285395a33267e267f705e4a8d30", "2f63cffb283166bb09076dffd77e2bb1f39a6d48", "8dee1460f96c42fe91fcf8b9684b6afba695429a", "1e688be9f4554aa981fe3db9e2a66388b05bd167", "91fdd1f00835fc93e906004ce8d51253ca1c1c77", "29f417b826c4aa0ebef45f94189534173ca033c5", "0282e990528c6a9b4aa92cc196f46257fb4ccee1", "1737bbf048409b19cfda6d0d18a4262dbb57a194", "5f4da1df76b8878bf8358ec24d6592a8008d2b0d", "18c8501ead80d99d2b0af34044515f4e96444074", "34ce7b7b03cbe75d8803e2c1cd1626cd60251758", "29c985f36e30085df0b959e7e792a3af1c8d6556", "21dbc0a2e79299c5734358cc88d0fe7c1e95787c", "0ed8e3871e427b33dcfb98974af0c76996134004", "2392ed364debcebdf3ab5fda1961bdd5d3ed0779", "13d4fa20983a6605fb7b13371a01bbafdbabf7d5", "0d1f5807a26286f8a486d7b535d5fc16bd37d86d", "0a2c9db80433b384af928ae947b6c4005c7c42bd", "d2b0d8933463f9a48dc01287e2a350cf9316de49", "1faabd21b3772f062e937aa44e7826f09257791f", "68022d495c81d0be421acfebb016eba724f4b77d", "5ae51fa63422b989c1ce2747ce32d442f5ba46b1", "3db4291a1a629876516bb06ae798a98475fb0148", "a6526df1d9b18fd3542fad7fdd95e93a5edce909" ], "paperAbstract": "We introduce DemoMatch, a tool for API discovery that allows the user to discover how to implement functionality using a software framework by demonstrating the functionality in existing applications built with the same framework. DemoMatch matches the demonstrations against a database of execution traces called Semeru and generates code snippets explaining how to use the functionality. We evaluated DemoMatch on several case studies involving Java Swing and Eclipse RCP.", "pdfUrls": [ "http://doi.acm.org/10.1145/3062341.3062386" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ed4a5d1681cc7a4175287fdc3494723b0367b7ef", "sources": [ "DBLP" ], "title": "DemoMatch: API discovery from demonstrations", "venue": "PLDI", "year": 2017 }, "ed61c5151108de3006ac6a1ec400713a9c697390": { "authors": [ { "ids": [ "40643787" ], "name": "Lina Jia" }, { "ids": [ "38728459" ], "name": "Min Zhu" }, { "ids": [ "1794873" ], "name": "Bibo Tu" } ], "doi": "", "doiUrl": "", "entities": [ "ARM architecture", "Correctness (computer science)", "Experiment", "Immutable object", "Introspection", "Privilege level", "Simulation", "Subversion", "Virtual Machine Manager", "Virtual machine" ], "id": "ed61c5151108de3006ac6a1ec400713a9c697390", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "478-487", "journalVolume": "", "outCitations": [ "7f7137820048e0a1611e180d483754240da588c4", "47652ff252be69026c1ff5c25480ccedcc95f72d", "3b3ac5d9718142e01ca0ce4dc348fd02669c57ae", "11fb0c80f13ea7ac62bf1caddda2dec510dc1570", "4ee94360be7639024a0be01a5d05c1bdc3e6cd46", "7048524562ae29c0c7e4eb3ec5be13dde4183f2c", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "5d43623e7301cda57fa27f4d905cbf4a51fbb7c6", "56303078dc8da6f1a9a25fce5d66001f79ca530d", "a5b42012a79664e44df65d43396d27bb22dade62", "86013daaae16572bceb755e65ee5fa2fdfb63848", "4ab4a666f5e5ed34ac219a9fdc2f70bd1cab0922", "46bc4d7c5605e8468f4355335416e15f0d7e4dcd", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "0d60e4db3034380df61b344e643cf312f2092385", "bfa04a51f8bd7a56b45a7ffefeda60b4270373de", "62d6d99866697d5efdbba3df89051f9c96082567", "44000ade990583c8ae23d6e41c037e55dc0d126e", "009af3a1fa932ea1a9efa8d34cb0b6e32feae15e", "0edd896bc82b7fb65ef63cb1e3512db795c7f7d4" ], "paperAbstract": "Nowadays, the vulnerability of cloud environment exposed in security places Virtual Machine Introspection(VMI) at risk: once attackers subvert any layers of cloud environment, such as host, virtual machine manager(VMM) or qemu, VMI will be exposed undoubtedly to those attackers too. Nearly all existing VMI techniques implicitly assume that both VMM by which VMI accesses specific VM data and host which VMI is running on, are nonmalicious and immutable. Unfortunately, this assumption can be potentially violated with the growing shortage of security in cloud environment. Once VMM or host is exploited, attackers can tamper the code or hijack the data of VMI, then, falsify VM information and certifications to Cloud system's administrators who try to make sure the security of specific VM in certain compute node. This paper proposes a new trusted VMI monitor frame: T-VMI, which can avoid the malicious subversion of the routine of VMI. T-VMIguarantees the integrity of VMI code using isolation and the correctness of VMI data using high privilege level instruction and appropriate trap mechanism. This model is evaluated on a simulation environment by using ARM Foundation Model 8.0 and has been presented on a real development ARMv8 JUNO-r0 board. We finished the comprehensive experiments including effectiveness and performance, and the result and analysis show T-VMI has achieved the aim of expected effectiveness with acceptable performance cost.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101177" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ed61c5151108de3006ac6a1ec400713a9c697390", "sources": [ "DBLP" ], "title": "T-VMI: Trusted Virtual Machine Introspection in Cloud Environments", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "edb51ab25b04f2df4fc41be30b959696fb511ceb": { "authors": [ { "ids": [ "1688846" ], "name": "Tao Sun" }, { "ids": [ "1777803" ], "name": "Daniel Sheldon" }, { "ids": [ "34379378" ], "name": "Brendan OConnor" } ], "doi": "10.1109/ICDM.2017.54", "doiUrl": "https://doi.org/10.1109/ICDM.2017.54", "entities": [ "Action potential", "Algorithm", "Binary prefix", "Ecology", "Latent variable", "Machine learning", "Message passing", "Microdata Corporation", "Second moment of area" ], "id": "edb51ab25b04f2df4fc41be30b959696fb511ceb", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "445-454", "journalVolume": "", "outCitations": [ "f20778059ed290a402256202e106a0fb78c581f9", "4915be172c5fcbc0c63ae3069873ae628c0fff70", "1c7d38f68fe1150895a186e30b60c02dd89a676a", "4bcc5cfbc1793a85d5e95307028e453ac2bfbeb2", "a3f910392b9c8a89d94b48e0035f5a2ec0718e66", "06844709444a965f975054292a23c3a6e896403f", "13112c6ea1e780ba0d9827ae85bf6a7b85603b83", "70c08f1a4861f38d444c9fe7837255d4f53985ec", "1578fefdf113e09be27cce56bb985a420b498691", "1aa39418e70fdeeb9ff3c56e62259398352f1fe5", "e4eaf3eed659a161331e455e5fe9b2e4c3224c1f", "007cfb4331faea244b11db496a5b39741a0285d3", "1a24ecd713b863acf593d5405add096d682d212e", "2089230d43176116b38b2ec075dce6312905292b", "18c7fb55ff796db5c5a604e0ca44b6baaeb12239", "7374acb58065f01d7c5c27f51e603dc703709d65", "6d43c41e19d994b802f5cff6fbe4e1feffd0d81f", "a3eadfc107f1be2c8e22c760f6996d5af3256841", "02df9340e08e1e5085f1257bb6ddda4cd225e1d3", "6acae3e35c1585652cbd6b00e4434475f18f4b85", "3611902b55313fb1dda824093bf88e220022e21d", "4e8cf472b2a0690444164d10099491fdcbaa5f18", "745782902e97be8fbacd1e05d283f11104e2fec6", "45856a14291733851ff4dc142d992873d5feebf3", "23a0f7ca03ab1576fd80751903f092d2372b5079" ], "paperAbstract": "Ecological inference (EI) is a classical problem from political science to model voting behavior of individuals given only aggregate election results. Flaxman et al. recently formulated EI as machine learning problem using distribution regression, and applied it to analyze US presidential elections. However, distribution regression unnecessarily aggregates individual-level covariates available from census microdata, and ignores known structure of the aggregation mechanism. We instead formulate the problem as learning with label proportions (LLP), and develop a new, probabilistic, LLP method to solve it. Our model is the straightforward one where individual votes are latent variables. We use cardinality potentials to efficiently perform exact inference over latent variables during learning, and introduce a novel message-passing algorithm to extend cardinality potentials to multivariate probability models for use within multiclass LLP problems. We show experimentally that LLP outperforms distribution regression for predicting individual-level attributes, and that our method is as good as or better than existing state-of-the-art LLP methods.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/edb51ab25b04f2df4fc41be30b959696fb511ceb", "sources": [ "DBLP" ], "title": "A Probabilistic Approach for Learning with Label Proportions Applied to the US Presidential Election", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "eedba2a20767747bb5fa74b8ec1c25c109a8f1b1": { "authors": [ { "ids": [ "40161995" ], "name": "Swamit S. Tannu" }, { "ids": [ "8665729" ], "name": "Zachary A. Myers" }, { "ids": [ "1936310" ], "name": "Prashant J. Nair" }, { "ids": [ "35215491" ], "name": "Douglas M. Carmean" }, { "ids": [ "1740036" ], "name": "Moinuddin K. Qureshi" } ], "doi": "10.1145/3123939.3123940", "doiUrl": "https://doi.org/10.1145/3123939.3123940", "entities": [ "Computer", "Error detection and correction", "Execution pattern", "Fault tolerance", "Microarchitecture", "Programmer", "Quantum computing", "Quantum error correction", "Quantum mechanics", "Qubit", "Scalability" ], "id": "eedba2a20767747bb5fa74b8ec1c25c109a8f1b1", "inCitations": [], "journalName": "", "journalPages": "679-691", "journalVolume": "", "outCitations": [ "1db974616dc9ce742c2f5cf2a8d8e1a25d0b9740", "0586bf547000095a0f288be2e70c94eda89c06b5", "3638336e933c3c4c56d669fa370053d12727d670", "7c4f7100c239de1a429cc23b286953aa70e6e5df", "f6b76dd978e4c623f4e65a701d2ae3d00b2393a8", "583beb5db7b7f8954fce9793202098c3e8ed31d3", "51ab7eef98f029a28330e2536938a5d494222d25", "150b07cc949afdf4be5774bf1b26c7c9b1b24366", "2954af03f0a608ee64c9086bedf4eb9ea91276a3", "a9b55406ffff7861ba089a9ea86cb0eceef237ff", "19873af56f207c1a89fac7fb667dd70b039934cb", "4852e7956c62117f6379385290fcf4673c687667", "88331df302fa2b13d6f1dc99ada50d0003b8c404", "ed053277500b9a6c6c347419790864118642fb3c", "b05c8d5761dab9b1ed071fed90ae0c05b41d5caa", "9c499c267c33a941dd3dfa7328bca88d631711d0", "0c79078c46dd7424f77470d34b2708abfc03bccd", "064bc7761f83cd79458330320d87a1279cb35167", "be3e7db1be0a51e6212d0a1d1b88a13756d9269b", "0200b1a2977f1dee45fd7d0a8e2e50cc46aed67c", "71a7b0c9045192051220ef0c05c7dae95327053f", "c20e0489fd3a4c3ac9ee1f769a08a7fbc99fb9f0", "26b7c4232872cc2327029b5354a41fde703f8e02", "f7f752acd1042bf6098f8ce8e90f4dd331564e4a", "57eaf807b2639d4c01af674ee511f8a6f7004c8b", "764428eb285e607dc0214d0273f1562c051a08e1", "212e4eb3ffbee7be2cc9efce95a402da10df8c60", "1dd872097353e200985a1e4ee8a29c3859520211", "067c4edcc88b700c3d17f5e8d86f257a8da1daee", "02d90ab465536454e1cb7f325da8bf23569391d0", "8fbe2bfddc0c0ef0f33bd4a0668e0fcfab0beb95", "42735d5a4140c9457fb2fbdea99a119f849dd4ce", "a5e6d42edb7bb7ff0ec2c33f3b74a6df3fe32ee3", "ddd6258a1781179fabeca3d81ad645ab883d303a", "8c1277f180879d4689cc651e8b6a97bd3882281e", "a6b75e7cbbff92124ad5a255e44b670990ce9e77", "2efe0191a006a02927040749b9ae1fdc91b836cb", "c0a961f855fdb441a003020b0c945ca50a032609" ], "paperAbstract": "A quantum computer consists of quantum bits (qubits) and a control processor that acts as an interface between the programmer and the qubits. As qubits are very sensitive to noise, they rely on continuous error correction to maintain the correct state. Current proposals rely on software-managed error correction and require large instruction bandwidth, which must scale in proportion to the number of qubits. While such a design may be reasonable for small-scale quantum computers, we show that instruction bandwidth tends to become a critical bottleneck for scaling quantum computers.\n In this paper, we show that 99.999% of the instructions in the instruction stream of a typical quantum workload stem from error correction. Using this observation, we propose QuEST (<u>Q</u>uantum <u>E</u>rror-Correction <u>S</u>ubs<u>t</u>rate), an architecture that delegates the task of quantum error correction to the hardware. QuEST uses a dedicated programmable micro-coded engine to continuously replay the instruction stream associated with error correction. The instruction bandwidth requirement of QuEST scales in proportion to the number of active qubits (typically << 0.1%) rather than the total number of qubits. We analyze the effectiveness of QuEST with area and thermal constraints and propose a scalable microarchitecture using typical Quantum Error Correction Code (QECC) execution patterns. Our evaluations show that QuEST reduces instruction bandwidth demand of several key workloads by five orders of magnitude while ensuring deterministic instruction delivery. Apart from error correction, we also observe a large instruction bandwidth requirement for fault tolerant quantum instructions (magic state distillation). We extend QuEST to manage these instructions in hardware and provide additional reduction in bandwidth. With QuEST, we reduce the total instruction bandwidth by eight orders of magnitude.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123940", "http://memlab.ece.gatech.edu/papers/MICRO_2017_1.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eedba2a20767747bb5fa74b8ec1c25c109a8f1b1", "sources": [ "DBLP" ], "title": "Taming the instruction bandwidth of quantum computers via hardware-managed error correction", "venue": "MICRO", "year": 2017 }, "eefa86508450c7855ea299604ae35a7cf7b21496": { "authors": [ { "ids": [ "2814902" ], "name": "Shuihai Hu" }, { "ids": [ "2438403" ], "name": "Yibo Zhu" }, { "ids": [ "1714510" ], "name": "Peng Cheng" }, { "ids": [ "39152478" ], "name": "Chuanxiong Guo" }, { "ids": [ "40165896" ], "name": "Kun Tan" }, { "ids": [ "1695132" ], "name": "Jitendra Padhye" }, { "ids": [ "40611817" ], "name": "Kai Chen" } ], "doi": "10.1145/3143361.3143382", "doiUrl": "https://doi.org/10.1145/3143361.3143382", "entities": [ "Brill tagger", "Commodity computing", "Data center", "Deadlock", "Direct memory access", "Lossless compression", "Network packet", "Network switch", "PowerBuilder Foundation Classes", "Remote direct memory access", "Routing" ], "id": "eefa86508450c7855ea299604ae35a7cf7b21496", "inCitations": [], "journalName": "", "journalPages": "451-463", "journalVolume": "", "outCitations": [ "058f6752d85a517aae298586fdf117acdd7560ea", "3191f28b942bd428fd4df250afc15bf68b402362", "daf0cd0076b388712ea12ec4105572997fc50cdf", "75e9e808f3e5226b08789fb1ed0aa0bca080015c", "56bf58eb183dbe8f6d420fae194f2c2be35fc850", "08d410ea6f0c3934324467d809e2ea6ffc8a9a73", "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "2538a796403ba39352e7ecd73637a0b8b768dd07", "022e4c238f9cf85b9d8142725c6a2adbdcca2094", "d1bdd3491912546cc2e7dbf6dd6b3c99baf5b365", "742c641506ac9efc3281af2effb31f2fb31b2dd4", "07cc98137c632be08ab1d1b194ffa20479858d4d", "100d5bf9d9f760eaeaa61f89e81488a7d3808383", "69dbd14c4ff684af0d3c918f040e281eaf49bd05", "2aa22a36b6e0beb03e478710a63a9b88e313d3b6", "b8fdf02f27d7009bb1b4b8856569f87bd030a983", "437f67ddb11022a3ccc6e4febfa5cf3aeb32ab9b", "438584c2ea63887ad6b227ad5d6743aa8ab0b443", "05be2368ad8dc210602d2cdfcb8d6c751a7602ab", "157629dc2a9d2c2c7696e021ff8c1d5a6e7b4197", "a05548af9f54a7cd57a5c3f2d51b9e76f559f04a", "ab724df417d8913f053d01aa8e10b3267f0ab7d3", "534ee575a6b0c37e03d1dddb92493b57e9271298", "8114db39b6ff4a4d7db34af2e67ceed0804ecf73", "42883511b77b4048c899f2e1c27e8f589f08530c", "05d2867228ee673e9062602a53007cd5a6ac6d8f", "0f8b04cb89e455ceadf0c88fd5dd9f9a7f338ba9", "4954fa180728932959997a4768411ff9136aac81" ], "paperAbstract": "Remote Direct Memory Access over Converged Ethernet (RoCE) deployments are vulnerable to deadlocks induced by Priority Flow Control (PFC). Prior solutions for deadlock prevention either require signi.cant changes to routing protocols, or require excessive bu.ers in the switches. In this paper, we propose Tagger, a scheme for deadlock prevention. It does not require any changes to the routing protocol, and needs only modest bu.ers. Tagger is based on the insight that given a set of expected lossless routes, a simple tagging scheme can be developed to ensure that no deadlock will occur under any failure conditions. Packets that do not travel on these lossless routes may be dropped under extreme conditions. We design such a scheme, prove that it prevents deadlock and implement it e.ciently on commodity hardware.", "pdfUrls": [ "http://doi.acm.org/10.1145/3143361.3143382" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/eefa86508450c7855ea299604ae35a7cf7b21496", "sources": [ "DBLP" ], "title": "Tagger: Practical PFC Deadlock Prevention in Data Center Networks", "venue": "CoNEXT", "year": 2017 }, "ef6000b00bef55c3b9fd4b830bfca963d997c457": { "authors": [ { "ids": [ "2662237" ], "name": "Yukinori Sato" }, { "ids": [ "39045319" ], "name": "Toshio Endo" } ], "doi": "10.1007/978-3-319-64203-1_9", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_9", "entities": [ "CPU cache" ], "id": "ef6000b00bef55c3b9fd4b830bfca963d997c457", "inCitations": [ "103260542f628061425a88f415200e6366e3c474" ], "journalName": "", "journalPages": "119-133", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_9" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ef6000b00bef55c3b9fd4b830bfca963d997c457", "sources": [ "DBLP" ], "title": "An Accurate Simulator of Cache-Line Conflicts to Exploit the Underlying Cache Performance", "venue": "Euro-Par", "year": 2017 }, "ef61748705e2728501a122ad779673a8bebae79a": { "authors": [ { "ids": [ "9560905" ], "name": "Nurit Moscovici" }, { "ids": [ "2898845" ], "name": "Nachshon Cohen" }, { "ids": [ "2210090" ], "name": "Erez Petrank" } ], "doi": "10.1109/PACT.2017.13", "doiUrl": "https://doi.org/10.1109/PACT.2017.13", "entities": [ "Algorithm", "Computation", "Data structure", "General-purpose computing on graphics processing units", "Graphics", "Graphics processing unit", "Skip list" ], "id": "ef61748705e2728501a122ad779673a8bebae79a", "inCitations": [ "c447884ff203380eea57c9072b36db5323b2c28e", "69485516475fcd6e78692ae3b952888c7d1443df" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "246-259", "journalVolume": "", "outCitations": [ "0c25ac1fb86259bc91e22b51bdaa56ce1dbc50c5", "762f5a712f4d6994ead089fcc0c5db98479a2008", "4291f71707a8ac18d5bf72c2b2c52e16c208e0ba", "22a3110123362412f91ae44c2b15e2234324f6fd", "00dd1306de5e28a6cb766f409779c784db11c58d", "4c77e5650e2328390995f3219ec44a4efd803b84", "78e47b768c784fcb15004bab48e24f80fdad579e", "6479c756e597c38e57aa45e2eae8550fd738418b", "5b975248796c2ee3f65b2f4430fd3be4d7e6191e", "8085460933105498577e741a02185c0097e36711", "2916fd514c69c1b3141c377c1c97d957bdc86c5e", "63bc7beec90c98b54add6b8d5767c10e36caa667", "4ccb2d0f62dfd174f2fe9551a4d2a32c2424c618", "3168cc1e8951c2652b7878748e77228dbf4090cb", "d410f8128bd4efa6adc886259e5d9de4cd7587bc", "3c9b5b9e3e8ad647498f1650df08ac2a4fa83346", "fa15e80d71f831ed1a3f11d5b94c88b8f098a17c", "a4d431fd93d941abb8797d2a8e7333606504c7fb", "942f2a6df29234c304b69129872835d60cf5e9e9", "217d408f60f749aab6705ff3056b8e77640f2948", "b1fc033792679dc0e12ddce2aa2e4869a33ba2c2", "1ae7993c0c2d795b243354de48dab80bf2000356", "5d153a55b6f12752afc11bb96d9d72a51c990dba", "6710769f8fb90d6fe30dc8e27183f19a6cb31faa", "b1cbfd6c1e7f8a77e6c1e6db6cd0625e3bd785ef", "3a5a237a114d70291b9611675cc97ba2bf20aa87", "a877b04a01146eae9c6c7ab27e9dda97fbee7d89", "04f020a4ab2134db6f9e98eadf216d94d440414a", "072cad08a6886c1800cb6144a8cfec4bced6f7d9", "54a882bc5f15877097dfb1aab8c480323036e48c", "0a5033c0b2bb2421f8c46e196fb0fb1464a636b6", "295521cfe1a56458d53a58613de5fb92c97c5c23", "2724de31317b1b9e026b5f90251829ee02f3fa3f", "24f641c3987721be01f2c484198608b4c53f0208", "05e4baf190150bd9ef6516ee777c003431ec57dc" ], "paperAbstract": "We propose a design for a fine-grained lock-based skiplist optimized for Graphics Processing Units (GPUs). While GPUs are often used to accelerate streaming parallel computations, it remains a significant challenge to efficiently offload concurrent computations with more complicated data-irregular access and fine-grained synchronization. Natural building blocks for such computations would be concurrent data structures, such as skiplists, which are widely used in general purpose computations. Our design utilizes array-based nodes which are accessed and updated by warp-cooperative functions, thus taking advantage of the fact that GPUs are most efficient when memory accesses are coalesced and execution divergence is minimized. The proposed design has been implemented, and measurements demonstrate improved performance of up to 11.6x over skiplist designs for the GPU existing today.", "pdfUrls": [ "http://www.cs.technion.ac.il/~erez/Papers/GPUSkiplist.pdf", "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.13", "http://www.cs.technion.ac.il/RESEARCH_DAY_17/POSTERS/nurit_moscovici.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ef61748705e2728501a122ad779673a8bebae79a", "sources": [ "DBLP" ], "title": "A GPU-Friendly Skiplist Algorithm", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "ef96667edcfe4bf4b8f76d130f3fc48972e32c2b": { "authors": [ { "ids": [ "2843212" ], "name": "Andre Lopes" }, { "ids": [ "1803904" ], "name": "Frederico Pratas" }, { "ids": [ "1713695" ], "name": "Leonel Sousa" }, { "ids": [ "1767172" ], "name": "Aleksandar Ilic" } ], "doi": "10.1109/ISPASS.2017.7975297", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975297", "entities": [ "Benchmark (computing)", "CPU cache", "Central processing unit", "Dynamic voltage scaling", "General-purpose computing on graphics processing units", "Graphics processing unit", "Memory hierarchy", "Programmer", "Software portability" ], "id": "ef96667edcfe4bf4b8f76d130f3fc48972e32c2b", "inCitations": [ "d0f85e1f8821df9444e0549d0333c5f3bc5fd304", "82d3daba05a53421e52e0f44c8ecf17f8d28954f" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "259-268", "journalVolume": "", "outCitations": [ "00156e79606084497789662dfaf59c3b54a10722", "fb401b959710b71538a0cda8fd15c52718691e08", "5f3cce1bc739ebfc03e003010d3438bb318efc14", "0b4e82c71f3c34dd394f28cacfcf9bb2c165eea8", "c50da84ed015168bd223a3234bec6cb750ee7c71", "e94b2d7bf414a822382ac86707bbb8cd77ff0f34", "07736bb61274b9c2a0920010f3fff9919533aee6", "092217c2267f6e0673590aa151d811e579ff7760", "23177452df15b652dd54a59324502b92c99687a7", "28e34059176c36934de116e138dd53cf4ee1dff0", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "453b6caf19cc2e26cf761966534f10a737eb62b4", "f65e3133def790ebfa6e91d525e64d98fd974cda", "4f805391383b20dbc9992796d515029884ba468b", "ae0d521871320a166a91e5cfa3a1858851f50fe3", "2ad29134da93304e72dd047ca99ec6cfef2b4990", "2583d51a7aafc4e4e3c9bdcd1fa8a978f7d81bc5", "125e42dc5c18f9643e3ec4dddbd48366bc1edd7a" ], "paperAbstract": "Optimization, portability and development of GPGPU applications are not trivial tasks, since the capabilities and organization of GPU processing elements and memory subsystem greatly differ from the traditional CPU concepts, as well as among different GPU architectures. This work goes a step further in aiding this process by delivering a set of visual models that can be used by GPU programmers to analyze and improve application performance and energy-efficiency across a range of different GPU devices. For the first time in this paper, the state-of-the-art Cache-aware Roofline Modeling principles are applied for insightful modeling of GPU upperbounds for performance, power consumption and energyefficiency. The proposed models are developed by relying on extensive GPU micro-benchmarking aimed at fully exercising the capabilities of GPU functional units and memory hierarchy levels. The models are experimentally validated across 8 GPU devices from 3 different NVIDIA generations, and their benefits are explored when characterizing the behavior of 23 real-world applications from 5 different benchmark suites. Furthermore, the DVFS effects on GPU performance upper-bounds are also analyzed by scaling both core and memory frequencies.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975297" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ef96667edcfe4bf4b8f76d130f3fc48972e32c2b", "sources": [ "DBLP" ], "title": "Exploring GPU performance, power and energy-efficiency bounds with Cache-aware Roofline Modeling", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "efb351341158c8cb92ea6f479021c05e8e2e6120": { "authors": [ { "ids": [ "2248998" ], "name": "Manuel Bravo" }, { "ids": [ "1741342" ], "name": "Lu\u00eds E. T. Rodrigues" }, { "ids": [ "39874026" ], "name": "Peter Van Roy" } ], "doi": "10.1145/3064176.3064210", "doiUrl": "https://doi.org/10.1145/3064176.3064210", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "Causal consistency", "Causality", "Decoupling (electronics)", "Replay attack", "Scalability", "Throughput" ], "id": "efb351341158c8cb92ea6f479021c05e8e2e6120", "inCitations": [ "dad4634e52066f3e5e8b6222daa37d751df96a0d", "22d9bd7d4d4e071ae573ac56fca7b58824c50801", "8fe5d6bb93d046c4c1ff3a075225b8acf147584f", "c1447c4c07721e4e444aaa7ad5bb6a661c742bd2", "019327fe7166f59bdac01e143a29a66ac6b64114" ], "journalName": "", "journalPages": "111-126", "journalVolume": "", "outCitations": [ "07f82eecbe3d530e3f967342245a4d5c8e41c05b", "96d2a84e57ff1475394b7702473f3e8e868feb68", "05a618847e4f08e5bca29dff732757779722b2e0", "2891da32932a45f8b14bb95f7e26b5ae9677f430", "605277f87ee483cfd04f986780514c26160d2e87", "59ca22b032e7e1fb4467a3b2ad577cbf9d8f0ea8", "ce40e225ffa0b2c4a4a2e25d7b65d33978af1eb2", "577bc0973036dcfd666772a6fc8e2647df7a2e8e", "24cece61e2128780072bc58f90b8ba47f624bc27", "0139dceb6cef21b234e454d53154f30391495862", "200adc5e9ca486f6919bc194415cec28e986df2d", "6f164cc777efdf08748c96e5be185f69a8f01cd8", "c7ab5c0886f8a9b3570d215ab981c63c77433f1f", "215aad1520ec1b087ab2ba4043f5e0ecc32e7482", "223b9e0e1bf2d696458ca0fb7aabb1bb0ea0b639", "663e064469ad91e6bda345d216504b4c868f537b", "259a11bb2ccac5af9128b00c2bd0237c3f712d3c", "ed2e39973435a4b53da760ad9837237ddce2eda5", "13cbb1b747814aa6db01892861a601c6b33dd697", "20f5f8733134d87041b95b742d613051a1fb3fdb", "4687fdf3c77ef00700fdf1399f7dd81bfe87ef97", "537feab9bf0bc59e0399c75c0a0c01ecc2706566", "d605276ebdf41305ef3e4b65acd3e8f631ce6c78", "bc631e10de057f1ae6f65cb1b6f4baac1024e449", "058f6752d85a517aae298586fdf117acdd7560ea", "61011eb60b242f529f58eecaf7029524920cd6cf", "49532e318be89eed64725b32617c1fc570f824a4", "033492cf9e4fdd36380065d7e6f31817ba561e57", "6f2f219a4f6d64843efe35f868ed919ce8b3a031", "cdee1c49685a1e66b040b6c8381ce6e85f643f3a", "9cd9321b82d573447f08d84e9a8ca31c46fd6b8e", "d12d1289d2384c2ce642f01855637b9f0519e189", "30ce9b53eaa730b4161226c2c8eaf95adb46add7", "33457f49553d918e912c2d8c54b81f4fd8a4c234", "2eb05228a775e9d70975330b4cdf17ce965c8d0a", "5dd350cee6ecfd097b57772f89e6341ff05b5725", "55bef5db971deed1358bcb2b375d6832b9ba6a1b", "1664b784dd7d446ee8838e0eec5b980f61792007", "42142c121b2dbe48d55e81c2ce198a5639645030", "2888c136064ff5527a0bb370ac1d9bf71939e066", "76eea8436996c7e9c8f7ad3dac34a12865edab24", "22d9bd7d4d4e071ae573ac56fca7b58824c50801", "6fcaf13d4a3d72ea53060941efa4b5cd57de0503", "4ecb9b8ccb17098719f7532d808f4bfe86131374", "71c0dd6bd1dd57716b6797043e9f09b951c88a22", "f502a1b0fe005a263697589a290404b01fe11be5", "740ee3de6f8ca734797d7a808c956e303f4a5730", "cf1c70afbd942ff34595052c3438dc3f50a90167" ], "paperAbstract": "This paper presents the design, implementation, and evaluation of Saturn, a metadata service for geo-replicated systems. Saturn can be used in combination with several distributed and replicated data services to ensure that remote operations are made visible in an order that respects causality, a requirement central to many consistency criteria.\n Saturn addresses two key unsolved problems inherent to previous approaches. First, it eliminates the tradeoff between throughput and data freshness, when deciding what metadata to use for tracking causality. Second, it enables genuine partial replication, a key property to ensure scalability when the number of geo-locations increases. Saturn addresses these challenges while keeping metadata size constant, independently of the number of clients, servers, data partitions, and locations. By decoupling metadata management from data dissemination, and by using clever metadata propagation techniques, it ensures that the throughput and visibility latency of updates on a given item are (mostly) shielded from operations on other items or locations.\n We evaluate Saturn in Amazon EC2 using realistic benchmarks under both full and partial geo-replication. Results show that weakly consistent datastores can lean on Saturn to upgrade their consistency guarantees to causal consistency with a negligible penalty on performance.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064210" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/efb351341158c8cb92ea6f479021c05e8e2e6120", "sources": [ "DBLP" ], "title": "Saturn: a Distributed Metadata Service for Causal Consistency", "venue": "EuroSys", "year": 2017 }, "f033e6801f557560752ee4ade8c704bfa20fc836": { "authors": [ { "ids": [ "2889531" ], "name": "Peng Ni" }, { "ids": [ "39842396" ], "name": "Masatoshi Hanai" }, { "ids": [ "2272341" ], "name": "Wen Jun Tan" }, { "ids": [ "2405565" ], "name": "Chen Wang" }, { "ids": [ "1688786" ], "name": "Wentong Cai" } ], "doi": "10.1109/ICPP.2017.58", "doiUrl": "https://doi.org/10.1109/ICPP.2017.58", "entities": [ "Algorithm", "Central processing unit", "Centrality", "Closeness centrality", "Data structure", "Experiment", "Information theory", "Multi-core processor", "Parallel algorithm", "Run time (program lifecycle phase)", "Sequential algorithm", "Social network", "Speedup", "Synthetic data", "Telecommunications network" ], "id": "f033e6801f557560752ee4ade8c704bfa20fc836", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "493-502", "journalVolume": "", "outCitations": [ "7f1d7ef36f5664da349a3e7fc3276e30f6f871fd", "b9ef5daaa31ccee9ba239a3a60c6b7c552aec5dc", "b0c2e633c6be27cea1f2402e3ee5b6b0eae40e4e", "098e3c7022714b3505a80143ff61be65cd29c22e", "4410f0c48f982f960a54500df7bd88e4cab88927", "72d66b8c0b05635e59ccd78dd70092e3d984f548", "c17faf779524c3be2ef1ef1e56d000aa60625158", "40eb1f990ac292b14b56ea06e61d9aeb9bfa28c3", "13a9ef1098dfdc8a6a89f393c3daba7bb3c6c47b", "e901ca0397785fcbf3039cf1e0867ac94fa2a558", "e97dc632afdd600bb3ef7d097daa902ea34bf082", "01c1f0e97ce5c74c714dc7aa43cb064f45cc3b04", "4f1c7b1b9c8f5510baa5d9be00044d51bdba75cf", "1156f60e40548096df49528b1342bb3e88b0f378", "0be53c7b9345890cd2267059fdb259329d13bef0", "3486aeaf540c48952120fe853d672af984f40a6a", "a892a54b02ce112e1302931231141a8b676b873b", "04ff6d8d8708aef4ebc45ebee132fcc6f055bbd4", "6302fbfa2e975fdfb5d55359fd2a3da9cc1b9550", "857b926d289e76ec00a97a72b082109e1fadf2e5", "75089ac937d66503cd8442d74bbaec1b578ed5ea" ], "paperAbstract": "Many real-world networks, including online social networks and communication networks, are commonly modeled as temporal graphs. Answering earliest-arrival queries in temporal graphs is one of the most fundamental studies with numerous applications, such as information diffusion and measuring temporal closeness centrality. As graph sizes are growing rapidly, speedup of query execution time becomes even more important.In this paper, we propose a novel edge-centric parallel algorithm for solving single-source earliest-arrival problem in temporal graphs based on a new data structure named Edge-Scan-Dependency Graph (ESD-Graph). We evaluate the proposed parallel algorithm by theoretical analysis as well as by empirical experiments on real-world temporal graphs and synthetic graphs. Empirical results show that the new parallel algorithm outperforms the existing serial algorithm by up to 8.2 and 9.5 times on multi-core processors for real-world data and synthetic data respectively.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.58" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f033e6801f557560752ee4ade8c704bfa20fc836", "sources": [ "DBLP" ], "title": "Parallel Algorithm for Single-Source Earliest-Arrival Problem in Temporal Graphs", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "f04671d1702dc404fa6c27d2ada5c70666a9abe0": { "authors": [ { "ids": [ "1755281" ], "name": "Daniel A. Jim\u00e9nez" }, { "ids": [ "3381055" ], "name": "Elvira Teran" } ], "doi": "10.1145/3123939.3123942", "doiUrl": "https://doi.org/10.1145/3123939.3123942", "entities": [ "8b/10b encoding", "Benchmark (computing)", "CPU cache", "Perceptron", "Program optimization", "Speedup", "Thread (computing)" ], "id": "f04671d1702dc404fa6c27d2ada5c70666a9abe0", "inCitations": [ "f06233da50ed916579f5f536da5a66fd3c4c0ce8" ], "journalName": "", "journalPages": "436-448", "journalVolume": "", "outCitations": [ "06125169a21ef17641d7199544417b21c378eede", "02f3eebd4281e9a241d5790da5bb783e018c8251", "77d4fb23ce0b5499016f2c162a5430d04f976542", "0598e498f7174d2138f43ffeaf3539018b3219b3", "08237b5a7862d65185977e3dac0f81e616188add", "1d4b7f01d8d3dc2739fa880ae834aa8efc34b9d4", "057ecc6780a2b2cb533884167962654451e4960b", "294273a4a63a4d06d3dbd2880598a9cd64b3087f", "43260df86b2aaa20824d73eff48e0b49162689cb", "d33880a01318ec992071968c25059763146e6343", "7bb72a9437a1ddb7e0eced6f243b8f9e66438f28", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "24e281cf2e106e35cbd3da2a122011da98056d03", "55043afbb87e38627778a323dfdc35a55357e47d", "09c5931307cba3f80d3ecc14d02eecfa46463cfe", "1728de7b027e827dfc67ceb3b0e23b841a4b1538", "a55e4ac5c453115521ee0d428948cf7c2124c220", "2804bcc9df4352c2da1367f182a54e7c67a160ec", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "0717371b254df3e466a11d1965c2c9541a43b7a3", "7779c10dfa1f84953016b6292844815c5faf84f5", "25e0dcb0e7b3446fbf16c48e9a6a4ad36f645f3b", "ed3f72267c3756f987b63a894e825e922dbd1cd4", "0680bdfaf465947354218828a51ee5997505385b", "af80cc2a0fcd817aa5aa5c39f13e50d51e803cdd", "4408b7049f9241920ff8dcb5ad387e5358a75694", "2dc59e60b34b3863e4eb381b17384105fe523cec" ], "paperAbstract": "The disparity between last-level cache and memory latencies motivates the search for efficient cache management policies. Recent work in predicting reuse of cache blocks enables optimizations that significantly improve cache performance and efficiency. However, the accuracy of the prediction mechanisms limits the scope of optimization.\n This paper introduces multiperspective reuse prediction, a technique that predicts the future reuse of cache blocks using several different types of features. The accuracy of the multiperspective technique is superior to previous work. We demonstrate the technique using a placement, promotion, and bypass optimization that outperforms state-of-the-art policies using a low overhead. On a set of single-thread benchmarks, the technique yields a geometric mean 9.0% speedup over LRU, compared with 5.1% for Hawkeye and 6.3% for Perceptron. On multi-programmed workloads, the technique gives a geometric mean weighted speedup of 8.3% over LRU, compared with 5.2% for Hawkeye and 5.8% for Perceptron.", "pdfUrls": [ "http://doi.acm.org/10.1145/3123939.3123942" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f04671d1702dc404fa6c27d2ada5c70666a9abe0", "sources": [ "DBLP" ], "title": "Multiperspective reuse prediction", "venue": "MICRO", "year": 2017 }, "f07dca3331b1ee07ec5e7aad30cf67671b583984": { "authors": [ { "ids": [ "2416851" ], "name": "Hongzhi Yin" }, { "ids": [ "3082847" ], "name": "Hongxu Chen" }, { "ids": [ "1759841" ], "name": "Xiaoshuai Sun" }, { "ids": [ "39049654" ], "name": "Hao Wang" }, { "ids": [ "2295608" ], "name": "Yang Wang" }, { "ids": [ "28120037" ], "name": "Quoc Viet Hung Nguyen" } ], "doi": "10.1109/ICDM.2017.68", "doiUrl": "https://doi.org/10.1109/ICDM.2017.68", "entities": [ "Computation", "E-commerce", "Experiment", "Program optimization", "Sampling (signal processing)", "Scalability", "Social network", "Sparse matrix" ], "id": "f07dca3331b1ee07ec5e7aad30cf67671b583984", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "585-594", "journalVolume": "", "outCitations": [ "c862baf4a671acc8bcb892f59b83491a72cbca34", "8ceb2788eef2690eec623dbd85c57656dc4f9250", "994afdf0db0cb0456f4f76468380822c2f532726", "0834e74304b547c9354b6d7da6fa78ef47a48fa8", "1c2cae04e0e00dcec3fe6840b2daa00ae9e8a0a3", "087cb6fa2dffd6a38f3f9baeccff79c3d84c1e1f", "127c27f135bed13706774168bef2c87e9d0791a1", "116cbad0ad34f83023d0b231f378dae2975e3b5e", "57bf84b70746c195eaab1ffbc0102673704cc25c", "64c871cd7e0af7e1aeb94d98c6214eb8d8e8b989", "9aa88a8a354f1d322e242376d27d0474e50252f8", "d5fdc3c0b2049a025091179a73e0e4174105fcd4", "72c49124838826ae5ff09a0bd5faee5f91c6e51e", "2df4fd764e14b1c28a5541f05ecda0b1d75b139c", "39d8eef237f475648214989494ac6f89562d2826", "1908f376b73f9726325300bcd90a6780503d084c", "1683ffc189d16b616131c300f45af87602d211f7", "10b2bbbe320b9e4523cf94835e2cb8878541d9fd", "184b7281a87ee16228b24716ca02b29519d52eb5", "25ad8ad3d1549888a4609659bd55ad825c5df82e", "530d53d8a1828e73ff0d731cd2d2cf8a8f8ecae4", "05aba481e8a221df5d8775a3bb749001e7f2525e", "165c428fec7d3aac4ab6e2c9d285af92883b643c", "7811dff921a6d8275136530d99b80580e3adbe0b", "bf75d4e8b9a2b21653dcfbfb8183d10a77474091", "36f49b05d764bf5c10428b082c2d96c13c4203b9", "0aa2a4d259433016ebc899c496faea03c024c0bd", "04b52c8230c3f9f4f4032b06458069d81c8f07b2", "20915759c4ec4af81493b4903ade736a01001f41", "762b63d2eb86f8fd0de98a08561b77527ae8f165", "68a33a3afac65eb6e0fb3726c1f9c8b727f32a42", "0a63276066079d9a67ee7157b079d777729838f4", "094cbfa06f8374b49b84524a466a63d34c9ef34f", "498ca0a1f8c980586408addf7ab2919ecdb7dd3d", "c1375bfb196c2011309e30586d9a1ced893bfbaf", "188f4d9b9d580d0432056b760b3372ec83543d1d" ], "paperAbstract": "With the rapid rise of various e-commerce and social network platforms, users are generating large amounts of heterogeneous behavior data, such as purchasehistory, adding-to-favorite, adding-to-cart and click activities, and this kind of user behavior data is usually binary, only reflecting a user's action or inaction (i.e., implicit feedback data). Tensor factorization is a promising means of modeling heterogeneous user behaviors by distinguishing different behavior types. However, ambiguity arises in the interpretation of the unobserved user behavior records that mix both real negative examples and potential positive examples. Existing tensor factorization models either ignore unobserved examples or treat all of them as negative examples, leading to either poor prediction performance or huge computation cost. In addition, the distribution of positive examples w.r.t. behavior types is heavily skewed. Existing tensor factorization models would bias towards the type of behaviors with a large number of positive examples. In this paper, we propose a scalable probabilistic tensor factorization model (SPTF) for heterogeneous behavior data and develop a novel negative sampling technique to optimize SPTF by leveraging both observed and unobserved examples with much lower computational costs and higher modeling accuracy. To overcome the issue of the heavy skewness of the behavior data distribution, we propose a novel adaptive ranking-based positive sampling approach to speed up the model convergence and improve the prediction accuracy for sparse behavior types. Our proposed model optimization techniques enable SPTF to be scalable to large-scale behavior datasets. Extensive experiments have been conducted on a large-scale e-commerce dataset, and the experimental results show the superiority of our proposed SPTF model in terms of prediction accuracy and scalability.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.68" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f07dca3331b1ee07ec5e7aad30cf67671b583984", "sources": [ "DBLP" ], "title": "SPTF: A Scalable Probabilistic Tensor Factorization Model for Semantic-Aware Behavior Prediction", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "f0c27a1717c8f75d369badcf1dc537f5271f4ccb": { "authors": [ { "ids": [ "1726798" ], "name": "Fan Zhang" }, { "ids": [ "1783406" ], "name": "Yiqun Liu" }, { "ids": [ "40613610" ], "name": "Xin Li" }, { "ids": [ "38898636" ], "name": "Min Zhang" }, { "ids": [ "7869979" ], "name": "Yinghui Xu" }, { "ids": [ "8093158" ], "name": "Shaoping Ma" } ], "doi": "10.1145/3077136.3080841", "doiUrl": "https://doi.org/10.1145/3077136.3080841", "entities": [ "Beam propagation method", "Behavioral modeling", "Definite clause grammar", "Early stopping", "Eisenstein's criterion", "Exploratory search", "Information retrieval", "PC game", "Performance", "Rapid Refresh", "Relevance", "Session (web analytics)", "Simulation" ], "id": "f0c27a1717c8f75d369badcf1dc537f5271f4ccb", "inCitations": [ "b5108683d457ff6e0b990ed1cddef881c144c057" ], "journalName": "", "journalPages": "425-434", "journalVolume": "", "outCitations": [ "40b42731dbb4c8ed6a03fbee44b945b55c00f19a", "182f6739d31915434dc39a4f22bc55b391f225d7", "9533cce453af80638bb2914a929fc5c866b2d2b5", "ca4a7f9df973718ef415469434f702745d8be289", "5ba7592fe3310106543cffad644addeffc2ad2b3", "a0f96a954ccb478440523ef36e67f01de51e6918", "cec5beaa8147c2d4054847e458e5607367ea2c8b", "5cb6d991e08c9fce0795035e2e626222ec17bda5", "0e9f79887d29e5fe8455d7cd815f3c44a78ade5e", "7b845d042a614c15b0412258103009f8c9d042a9", "1b50c8353d9e4a241a5f4a9c0088eb9c5f593e1f", "38cb9bc50282615e0631d47825c5e0c7f691c3f7", "1f0ba28a4123f71ffa045625d29088f8979945da", "a0d2d94b212f94804ec42d9c6d3455a1b7455c02", "7e73991f0aea34871cddf149a94c4d44b280f5e2", "2de7acf5c655c2e5fdab2e373e9b2fc0f9adb469", "4047d5efd1683bbd3280500c3244149089412024", "e95a1ce95b4f0e7b542b70ef80073b9525646717", "8490234d79b47e459824dcf87c1e288211a3c964", "13d72ef522b405c18f7d228c5744687609b4c3a4", "264ef4001db964929792479367c2ba26488674b0", "bd73ec7bac51a332e5833bb1a02bcd3bf7a79474", "36ea668bb7617b9c1e6e98aebe96a0aaf90b569e", "860d592c5295888ea2a32e3df44714ec6928ebd7", "328c7b4ce5a0d81326ee2a3befa0f2dd630a48c1", "021dd452f6d40203860ac214078ce111dbb8d320", "bacff358bfd85a4673782c1f56f030283bdaa4e5", "2448fdde993e92c2af2006089f42c485080e4231", "471cb4c2e5039bdaacb0274fee70c7fe2e93493e", "4678d9bd2eb00ed7b1acbab2909feb281b4e470c" ], "paperAbstract": "The design of a Web search evaluation metric is closely related with how the user's interaction process is modeled. Each behavioral model results in a different metric used to evaluate search performance. In these models and the user behavior assumptions behind them, when a user ends a search session is one of the prime concerns because it is highly related to both benefit and cost estimation. Existing metric design usually adopts some simplified criteria to decide the stopping time point: (1) upper limit for benefit (e.g. RR, AP); (2) upper limit for cost (e.g. Precision@N, DCG@N). However, in many practical search sessions (e.g. exploratory search), the stopping criterion is more complex than the simplified case. Analyzing benefit and cost of actual users' search sessions, we find that the stopping criteria vary with search tasks and are usually combination effects of both benefit and cost factors. Inspired by a popular computer game named Bejeweled, we propose a Bejeweled Player Model (BPM) to simulate users' search interaction processes and evaluate their search performances. In the BPM, a user stops when he/she either has found sufficient useful information or has no more patience to continue. Given this assumption, a new evaluation framework based on upper limits (either fixed or changeable as search proceeds) for both benefit and cost is proposed. We show how to derive a new metric from the framework and demonstrate that it can be adopted to revise traditional metrics like Discounted Cumulative Gain (DCG), Expected Reciprocal Rank (ERR) and Average Precision (AP). To show effectiveness of the proposed framework, we compare it with a number of existing metrics in terms of correlation between user satisfaction and the metrics based on a dataset that collects users' explicit satisfaction feedbacks and assessors' relevance judgements. Experiment results show that the framework is better correlated with user satisfaction feedbacks.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080841" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f0c27a1717c8f75d369badcf1dc537f5271f4ccb", "sources": [ "DBLP" ], "title": "Evaluating Web Search with a Bejeweled Player Model", "venue": "SIGIR", "year": 2017 }, "f11b4441cd98681ee6d1f29444ca5a9d762052c6": { "authors": [ { "ids": [ "30406021" ], "name": "Sruthikesh Surineni" }, { "ids": [ "3409865" ], "name": "Ruidong Gu" }, { "ids": [ "2155469" ], "name": "Huyen Nguyen" }, { "ids": [ "2948456" ], "name": "Michela Becchi" } ], "doi": "10.1109/IISWC.2017.8167778", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167778", "entities": [ "Approximation algorithm", "Benchmark (computing)", "Central processing unit", "Computation", "Double-precision floating-point format", "GNU", "Graphics processing unit", "Multithreading (computer architecture)", "Significant figures", "Synthetic data", "Thread (computing)", "Time complexity" ], "id": "f11b4441cd98681ee6d1f29444ca5a9d762052c6", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "207-218", "journalVolume": "", "outCitations": [ "2586bee6e557fefe4153ea03c244d92102f7c118", "2f23022875abd9875d71f36b46343cc06de97173", "84a676d3a92d6252c5bc49ed64b25bb86aefa0d0", "08f98807182019196abaa0931b4c1e894fd227b1", "0d8126da975a2bbebed2e47a55706777b01b026e", "0931ec319e1365a12e7d0ff85e3140d4de7688bc", "00a267fbabeb40e8ba6d196de1f8926ff2ca6317", "e667b5c0d1409d9ed6b0ea692572bf4ddce8acc2", "54ebce0965322b29be45b3a35a63d06b962cabe3", "3532a540b6a357295fd0c91e3a3be0bf53c0ae7a", "1475555fc6fae3551c85e7cf46ba4a8b969e2e39", "4b4b0f96e9fa4ec281997f560a09f468cbe62d87", "761c777013aa16efc1ff44700f277191dc789e7c", "141d79d54b8650dc950bd05ca011ecf9fdb572bc", "60965ea779973dcb50817daa33118f5035049b56", "1155c60536516eb939ef3ce7d6052aa26d4abbe2", "16d8c485fcd800b873b6a46389fb857036f1bb1a", "23177452df15b652dd54a59324502b92c99687a7", "15fbace85cb327cfc040f4786fba396a0c9120dc", "35f7270966f64181ed85163817b89ccba842ef6f", "0a07584c52998959d76dc55ce021d2fc56e4e1c3", "3589d33ce4062eb40a78e920dfca7c1ffd060af9", "2ca01423a62b071328874235bdec6ef533464d44", "290a290ef4c6f8a509f77ca53ec9424d214731fa" ], "paperAbstract": "Floating-point computations produce approximate results, possibly leading to inaccuracy and reproducibility problems. Existing work addresses two issues: first, the design of high precision floating-point representations; second, the study of methods to trade off accuracy and performance of CPU applications. However, a comprehensive study of the tradeoffs between accuracy and performance on modern GPUs is missing. This study covers the use of different floating-point precisions (i.e., single and double floating-point precision in IEEE 754 standard, GNU Multiple Precision, and composite floating-point precision) on GPU using a variety of synthetic and real-world benchmark applications. First, we analyze the support for single and double precision floating-point arithmetic on different GPU architectures, and we characterize the latencies of all floating-point instructions on GPU. Second, we study the performance/accuracy tradeoffs related to the use of different arithmetic precisions on addition, multiplication, division, and natural exponential function. Third, we analyze the combined use of different arithmetic operations on three benchmark applications characterized by different instruction mixes and arithmetic intensities. As a result of this analysis, we provide insights to guide users to the selection of the arithmetic precision leading to a good performance/accuracy tradeoff depending on the arithmetic operations and mathematical functions used in their program and the degree of multithreading of the code.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167778" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f11b4441cd98681ee6d1f29444ca5a9d762052c6", "sources": [ "DBLP" ], "title": "Understanding the performance-accuracy tradeoffs of floating-point arithmetic on GPUs", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "f1beead9a9d624f82b0c994fca9c0ea8e1da9058": { "authors": [ { "ids": [ "5674766" ], "name": "Stephen Hamilton" }, { "ids": [ "1726784" ], "name": "Randal C. Burns" }, { "ids": [ "1704150" ], "name": "Charles Meneveau" }, { "ids": [ "37511429" ], "name": "Perry Johnson" }, { "ids": [ "1682771" ], "name": "Peter Lindstrom" }, { "ids": [ "2465886" ], "name": "John Patchett" }, { "ids": [ "7934073" ], "name": "Alexander S. Szalay" } ], "doi": "10.1007/978-3-319-58667-0_15", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_15", "entities": [ "Simulation" ], "id": "f1beead9a9d624f82b0c994fca9c0ea8e1da9058", "inCitations": [], "journalName": "", "journalPages": "277-293", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_15" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f1beead9a9d624f82b0c994fca9c0ea8e1da9058", "sources": [ "DBLP" ], "title": "Extreme Event Analysis in Next Generation Simulation Architectures", "venue": "ISC", "year": 2017 }, "f1edcea009e5539a2c693b5f1a9f58da0a88545c": { "authors": [ { "ids": [ "2240099" ], "name": "Lilia Zaourar" }, { "ids": [ "19177200" ], "name": "Massinissa Ait Aba" }, { "ids": [ "2426945" ], "name": "David Briand" }, { "ids": [ "2271726" ], "name": "Jean-Marc Philippe" } ], "doi": "10.1109/IPDPSW.2017.123", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.123", "entities": [ "Algorithm", "Application lifecycle management", "Central processing unit", "Computer", "Computer performance", "Computer program", "Data center", "Design rationale", "Numerical analysis", "Parallel computing", "Program optimization", "Run time (program lifecycle phase)", "Scheduling (computing)", "Web search engine", "Web search query" ], "id": "f1edcea009e5539a2c693b5f1a9f58da0a88545c", "inCitations": [], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "65-76", "journalVolume": "", "outCitations": [ "afd129f47f3e7a8b30ed45cf1b2bf62a8c6a1e64", "9b6ddeb90dac8a828225bd58c9cf2f8ddc232812", "fb409bd94413a58de4d4c97166ec8ea27622b35b", "46f861ad3d4a61f6061e90ca839eed77a4f26063", "7406496ccb973f73f31bfdb53b5fa9c6bb44e547", "4ee57f4624439e37443afc46a3f5611eefdc9c29", "2383d10614250e429303beda65f624bfd1f0d2f0", "9bcd710374505f684556a469659b21ca2907bab1", "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "4d911e2f4c53e0c4c3a06cde3c242dcdad15f2c2", "6f9c4ca3f0dfa438df1c520aba11b236a2103514", "7836cb9d3fa2455bcc1045b1ade9129ed674fdef", "ec80941857ed558f2dd5a2444f0ae566d7a18a44", "39060313a87628b0c9fabb3b06abf407af112ae2", "04717b51d7c7dd598d732fb8a293d69d45c0fa9f", "b30a682f8189a1e71bd6166f2a160f5e22753691", "1639eb73331e503d6ccef4f5a4c5c2b3fc892a23", "2ce1680064b577cccc2874ebe5362fb1b7484918", "95c7e0cf597b08b36f2724c415e3e696383d1991", "e9a453a33fb3acafdf59dfd6c866b2a1b1d36ab6", "e0277877954f8edd18e8ddb39d85d34f4a9cb030", "79ed861c40557f984f06ee2bb3d0cbccf004b377", "38009c11d24f6a6131b760ece1270acc4275ed87", "0def25a673a09c6620485c78bbb075176f31062f", "167baa53ccbf0dd6698b0355b69072c7b3dc5059", "7d76ba8c4f6776c645673e2c3f6eb88b1a0ca7aa" ], "paperAbstract": "Many of todays important applications of our everyday lives, e.g. weather forecast, design of plane and car shapes, medical analysis or even search engine queries depend on massively-parallel computer programs that are executed in data centers hosting thousands of computers. A large amount of electrical energy is used to power them, and it is of primary importance to compute more efficiently to sustain the increasing demand of computing power while keeping energy consumption reasonable. One promising research path in this domain is heterogeneous systems. The rationale for that is that at least parts of applications execute more efficiently depending on the computing resource (processors, accelerators, etc.). Nevertheless, the exploitation of these heterogeneous platforms raises new challenges in terms of application management optimization on available computing resources. The aim of our work is to determine effective algorithms to exploit these heterogeneous platforms by finding the best mapping and scheduling of an application to optimize the execution time and energy consumption with respect to various constraints. To achieve this goal, there is a need of a detailed modeling of the applications and the underlying hardware to be able to find realistic solutions. In this paper, we propose such as model, provide two implementations with state-of-the-art tools and preliminary mapping and scheduling numerical results.", "pdfUrls": [ "https://doi.org/10.1109/IPDPSW.2017.123" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f1edcea009e5539a2c693b5f1a9f58da0a88545c", "sources": [ "DBLP" ], "title": "Modeling of Applications and Hardware to Explore Task Mapping and Scheduling Strategies on a Heterogeneous Micro-Server System", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "f27be01ce79e086c02547137524db0a428239ff7": { "authors": [ { "ids": [ "40574362" ], "name": "Vineet Kumar" }, { "ids": [ "1703799" ], "name": "Sachindra Joshi" } ], "doi": "10.1145/3077136.3080801", "doiUrl": "https://doi.org/10.1145/3077136.3080801", "entities": [ "BLEU", "Language model", "Plug-in (computing)", "Question answering", "Siri" ], "id": "f27be01ce79e086c02547137524db0a428239ff7", "inCitations": [], "journalName": "", "journalPages": "705-714", "journalVolume": "", "outCitations": [ "7477d88b225909ef645941a0142eed75dc3b2e56", "dc4bed38674e876bba8507327e8bb9de6bb994a5", "adefa82601da6e5c05828acdb50b174528b0fefa", "63e9be430ae0b64401d897641aed82cb2abcd93f", "446489f50fe45bbe7af63ddc9ddab1c1e505be9c", "36c009379f804993de22e8b4bc1d35996b324f24", "7fdcc97ef728b4dd8da886b28e0637c59848bf3c", "52ae52e10eb1554ab3750ec17e431aac832b4228", "65dda50755011ab0edbed7cea20f82b882c86b01", "95a3ec4f4266ff54676b6d343b07a3cd185bb54d", "519fbc4f8cb28c6d277282d8b011653b8ad8626b", "071b16f25117fb6133480c6259227d54fc2a5ea0", "8571611db04df42d9ddcca39b1a3c23c11d51b6d", "1ec7dac73dd9f5e005e6bc62971d6479c8348827", "0a72cb974c5f0c627b87dcb8ea75337e0235873b", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "b7df5359f705d59e9c4e34ff99e9198b0859b245", "47a87c2cbdd928bb081974d308b3d9cf678d257e", "30e7a3ccbe7b557fbb1582497f3eafbb58db3d0c", "5b9d9cfba7eced767e86b683ee008b117e34889a", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "269ed5ba525519502123b58472e069d77c5bda14", "d05e845817b68793770784c71bce6db363808f28", "0d74913dbaed92f5518d55c382226d1e9a5cd570", "146f6f6ed688c905fb6e346ad02332efd5464616", "101c0e09d533b738d83a3740f1f6e49ab2984e55", "8e499c94171933fb71cc41203d703bba55b78fbf", "0659768ba0c5108fafc2e95583dff1c0f1d8d214", "c1c051549922acd463a267253bccb78f23766182", "a122180426e3e1b5369c761b856b9f84b7ec2278", "1cd7f2c74bd7ffb3a8b1527bec8795d0876a40b6", "02df3d50dbd1d15c38db62ff58a5601ebf815d59", "4d065aa4ea2d26d6da9d4428c09fe17ce80ed197", "505bb30bffd51902c948d2684e856d93c34bcc1a", "f0f9f10f1cc5cb85c56e0c41dfd25db756b17ef7", "6dc34bae7fb2e12499ebdff2902ccde612dbb0f1", "272216c1f097706721096669d85b2843c23fa77d", "777e180b05815e7aeb64151855059e4838c3bdd3", "e9fac1091d9a1646314b1b91e58f40dae3a750cd", "7fa51d9ebf688949571a86411c7baf13d30c74d0", "6eeeba23f210f0b9fed5be1290ed3b2a9a375411", "108961c7366e36825ffed94ac9eab603e05b6bc6", "5b1fc2f3fa2cad5cb123748e7a970e553ae25b1d", "b1200547c167f804010ca63c0ed0ab1c736d4953", "df3342d6f7182e5d7bfca9b961a61869cd5ce0e5", "0662db8ec063f14507b43e4f93884c0d0e051d68", "4bb6263d482d8f8f9fc8aa0146b70ddca971a671" ], "paperAbstract": "Intelligent personal assistants (IPAs) and interactive question answering (IQA) systems frequently encounter incomplete follow-up questions. The incomplete follow-up questions only make sense when seen in conjunction with the conversation context: the previous question and answer. Thus, IQA and IPA systems need to utilize the conversation context in order to handle the incomplete follow-up questions and generate an appropriate response. In this work, we present a retrieval based sequence to sequence learning system that can generate the complete (or intended) question for an incomplete follow-up question (given the conversation context). We can train our system using only a small labeled dataset (with only a few thousand conversations), by decomposing the original problem into two simpler and independent problems. The first problem focuses solely on selecting the candidate complete questions from a library of question templates (built offline using the small labeled conversations dataset). In the second problem, we re-rank the selected candidate questions using a neural language model (trained on millions of unlabelled questions independently). Our system can achieve a BLEU score of 42.91, as compared to 29.11 using an existing generation based approach. We further demonstrate the utility of our system as a plug-in module to an existing QA pipeline. Our system when added as a plug-in module, enables Siri to achieve an improvement of 131.57% in answering incomplete follow-up questions.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080801" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f27be01ce79e086c02547137524db0a428239ff7", "sources": [ "DBLP" ], "title": "Incomplete Follow-up Question Resolution using Retrieval based Sequence to Sequence Learning", "venue": "SIGIR", "year": 2017 }, "f28fc6d566006ac0433a5f002fa46f76c206329b": { "authors": [ { "ids": [ "2454800" ], "name": "Farzaneh Mahdisoltani" }, { "ids": [ "2778391" ], "name": "Ioan A. Stefanovici" }, { "ids": [ "40520047" ], "name": "Bianca Schroeder" } ], "doi": "", "doiUrl": "", "entities": [ "Computer data storage", "Degraded mode", "Failure rate", "Hard disk drive", "Machine learning", "RAID", "Robustness (computer science)", "Solid-state drive", "Solid-state electronics", "Test set", "USB flash drive" ], "id": "f28fc6d566006ac0433a5f002fa46f76c206329b", "inCitations": [ "cb85f14d3c9685cad65c95087f8e8f505eab24a7" ], "journalName": "", "journalPages": "391-402", "journalVolume": "", "outCitations": [ "33438b1148a84d6e5bf2cad70bf7754d546ca5d7", "05dd6cb44124b8a210ac391f15ec25e68918ef22", "08f604950b1b52014d8fa986b5aae412cc2db864", "09bcd050bb006639ae8bcacb3af149f0b6d964f3", "736290016ba0d35a57c373f5ba25ad709b3a59e1", "033103f56a29d74667c6c3e51b70022a3e2bb0da", "32d23ce43877aa8cd385a8e01f366329dd015a5c", "f4f37512b9786dbd9980d343e47fdc41032d0ca6", "562a596836f42714d81e1f861671959ba12e0246", "1886edb4e771c1c0aa7bae360d7f3de23ac4ac8e", "3a8311285e37b221d752426e3a630b3c1d9d16f2", "329da7a7c974fdd24e3db4ed9aea5f9262b6dc80", "1dec50169a6f60c8612abc2a729166f5127f1522", "ff794090fed038e01595c38a3156e6441af3fa7d", "229acac1bd70c57e6a17f2c24f153c06d54de252", "00d0f20c16ab208171d6493cd1c83a99d1056b93", "276dfdf225f462fe433a9034c7e76d45d50d5e79", "4cc32481e83d6218ef0da503bd8f0524e1906dd7", "546303ce50c1f5b04391bc3fcbbb6ab5252c30ab" ], "paperAbstract": "This paper proposes the use of machine learning techniques to make storage systems more reliable in the face of sector errors. Sector errors are partial drive failures, where individual sectors on a drive become unavailable, and occur at a high rate in both hard disk drives and solid state drives. The data in the affected sectors can only be recovered through redundancy in the system (e.g. another drive in the same RAID) and is lost if the error is encountered while the system operates in degraded mode, e.g. during RAID reconstruction. In this paper, we explore a range of different machine learning techniques and show that sector errors can be predicted ahead of time with high accuracy. Prediction is robust, even when only little training data or only training data for a different drive model is available. We also discuss a number of possible use cases for improving storage system reliability through the use of sector error predictors. We evaluate one such use case in detail: We show that the mean time to detecting errors (and hence the window of vulnerability to data loss) can be greatly reduced by adapting the speed of a scrubber based on error predictions.", "pdfUrls": [ "https://www.usenix.org/conference/atc17/technical-sessions/presentation/mahdisoltani", "https://www.usenix.org/system/files/conference/atc17/atc17-mahdisoltani.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f28f/c6d566006ac0433a5f002fa46f76c206329b.pdf", "s2Url": "https://semanticscholar.org/paper/f28fc6d566006ac0433a5f002fa46f76c206329b", "sources": [ "DBLP" ], "title": "Proactive error prediction to improve storage system reliability", "venue": "USENIX Annual Technical Conference", "year": 2017 }, "f311412463c33223947df56ae04644e8c68cdd5d": { "authors": [ { "ids": [ "1801523" ], "name": "Peng Jiang" }, { "ids": [ "1742495" ], "name": "Gagan Agrawal" } ], "doi": "10.1145/3018743.3018760", "doiUrl": "https://doi.org/10.1145/3018743.3018760", "entities": [ "Finite-state machine", "HTML", "Huffman coding", "Manycore processor", "Multi-core processor", "Parallel computing", "Pushdown automaton", "Regular expression", "SIMD", "Speculative execution", "Speedup", "Tokenization (data security)", "Xeon Phi" ], "id": "f311412463c33223947df56ae04644e8c68cdd5d", "inCitations": [ "289f1567dafdadb4209e5302e31d9364e1fab46e", "071341873bf6755131dae4347a09996b29852c90", "b9cd558c4a8215a99c4fa8091140f3d412430e2c" ], "journalName": "", "journalPages": "179-191", "journalVolume": "", "outCitations": [ "ced67b58ff902a631add9df25cd4a09a65f9c3b7", "3710d16919bf3a0bc7c3c5647d377ab449964ff9", "a207985b7828224f5a1d3fb10ba94e8e7bdd25e5", "a62ea31fbbdb5c4031ea929e82ea086122d7833c", "45010247286ccd7df378b1ad959e08e5cbdbf5ed", "34b44a9e55184b48c94a15f29f052941b342e8bf", "1291dc27b5e569bfeae7c9d114eed350b31cb8b7", "85bb2a3f3684334ba1e5ad6bc7795a0330cf5421", "3f4b5703f44970649551c96c6891465339e78ee4", "47a6a274c648aeb5ff02eb09aff7ea310eae122e", "18f278c8317e98dc31d270577b60b0624fa3c31f", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "500c7a8fa53b31260d2cf4a7a2e5d1e47622b414", "87de316ea08272afbda356b8d580385dd0d8382f", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "89a109ff0fc018f4138924b647f6bb7b7aa93785", "27b1d02ab9edf212682fdfc7f8478aab471e6183", "70b48b364b26b82e447571ac846871418e09ea2a", "80527e7595530951081494d1b98f3f13da3033a2", "3b6711bd158a375267999ac095b8c1a76d9dc464", "5412468cac5613762699d107dd519da94541017c", "577412cf4fb567fa3dfd2c8cde337590122cb34a", "716b3455c4df7b8cfaade6801adf4e8538279ebd", "7492a6938f3dc878a57a08ea411a65c997aa6b0f" ], "paperAbstract": "Finite State Machine (FSM) is the key kernel behind many popular applications, including regular expression matching, text tokenization, and Huffman decoding. Parallelizing FSMs is extremely difficult because of the strong dependencies and unpredictable memory accesses. Previous efforts have largely focused on multi-core parallelization, and used different approaches, including {\\em speculative} and {\\em enumerative} execution, both of which have been effective but also have limitations. With increasing width and improving flexibility in SIMD instruction sets, this paper focuses on combining SIMD and multi/many-core parallelism for FSMs. We have developed a novel strategy, called {\\em enumerative speculation}. Instead of speculating on a single state as in speculative execution or enumerating all possible states as in enumerative execution, our strategy speculates transitions from several possible states, reducing the prediction overheads of speculation approach and the large amount of redundant work in the enumerative approach. A simple lookback approach produces a set of guessed states to achieve high speculation success rates in our enumerative speculation. We evaluate our method with four popular FSM applications: Huffman decoding, regular expression matching, HTML tokenization, and Div7. We obtain up to 2.5x speedup using SIMD on one core and up to 95x combining SIMD with 60 cores of an Intel Xeon Phi. On a single core, we outperform the best single-state speculative execution version by an average of 1.6x, and in combining SIMD and many-core parallelism, outperform enumerative execution by an average of 2x.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3018760" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f311412463c33223947df56ae04644e8c68cdd5d", "sources": [ "DBLP" ], "title": "Combining SIMD and Many/Multi-core Parallelism for Finite State Machines with Enumerative Speculation", "venue": "PPOPP", "year": 2017 }, "f31796af827c0ee9326d4546121b126ec69c38a2": { "authors": [ { "ids": [ "37146060" ], "name": "Sergio M. Martin" }, { "ids": [ "34768652" ], "name": "Marsha J. Berger" }, { "ids": [ "1777004" ], "name": "Scott B. Baden" } ], "doi": "10.1109/IPDPS.2017.44", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.44", "entities": [ "Block (programming)", "C++", "Computation", "Inline expansion", "Intel Edison", "Message Passing Interface", "Overlap\u2013add method", "Programmer", "Relocation (computing)", "Runtime system" ], "id": "f31796af827c0ee9326d4546121b126ec69c38a2", "inCitations": [], "journalName": "", "journalPages": "998-1007", "journalVolume": "", "outCitations": [ "2043b7c6fa9bdd1174513cb0528cf8735c68e819", "1b058a94bf00727c3e86155566e1e1b1a0d1b7c5", "984250da0a78dbc6f47ce5624a98cfd6a4dc0aae", "7945a39eebec1f288264464ff70c5a9a1715f367", "383aec58bdf09e4549c4df2c984214838c5cb7f6" ], "paperAbstract": "We discuss early results with Toucan, a sourceto-source translator that automatically restructures C/C++ MPI applications to overlap communication with computation. We co-designed the translator and runtime system to enable dynamic, dependence-driven execution of MPI applications, and require only a modest amount of programmer annotation. Co-design was essential to realizing overlap through dynamic code block reordering and avoiding the limitations of static code relocation and inlining. We demonstrate that Toucan hides significant communication in four representative applications running on up to 24K cores of NERSC\u2019s Edison platform. Using Toucan, we have hidden from 33% to 85% of the communication overhead, with performance meeting or exceeding that of painstakingly hand-written overlap variants. Keywords-Communication/Computation Overlap; Source-toSource Translator; MPI; Data-Driven.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.44", "http://cseweb.ucsd.edu/groups/hpcl/scg/papers/2017/IPDPS17-CameraReady.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f317/96af827c0ee9326d4546121b126ec69c38a2.pdf", "s2Url": "https://semanticscholar.org/paper/f31796af827c0ee9326d4546121b126ec69c38a2", "sources": [ "DBLP" ], "title": "Toucan - A Translator for Communication Tolerant MPI Applications", "venue": "IPDPS", "year": 2017 }, "f3318491a55590e00dfe45d68708f515822e343a": { "authors": [ { "ids": [ "1689575" ], "name": "Shumo Chu" }, { "ids": [ "1838891" ], "name": "Chenglong Wang" }, { "ids": [ "2560192" ], "name": "Konstantin Weitz" }, { "ids": [ "39866566" ], "name": "Alvin Cheung" } ], "doi": "", "doiUrl": "", "entities": [ "Automated theorem proving", "Conjunctive query", "Constraint satisfaction problem", "Correctness (computer science)", "Experiment", "Proof assistant", "Relational database management system", "Rewrite (programming)", "SQL", "SQL Server Compact", "Undecidable problem" ], "id": "f3318491a55590e00dfe45d68708f515822e343a", "inCitations": [ "1dfc6a048fa9d7d8f6c4e9928c3dfe7332963850", "6e4682ba700c6926b49aa7b6215ab8d9afdd7b7f", "da7155479d31287978f8d56ceafea2f5e5d908d6", "5a38f3a81712b748e5279e43e2fbb212cf017933", "2b740ddd333370f404a755759d38870f6fd9f7eb", "8c7cf9c759dcca3195dea6e27c2e25ee9a05671c", "71cebf34099632371f1088d43a824a2c97a28fad", "a82f6eeaa9051cab2dcc17a264a5d759b08ec461" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "02f8e4a8b3f16a988233f309db548415268322c2", "0f642a292c967ad0a251ce1b87d063775f1974fa", "82f729361d8ea2e7bb98e15f6813fd3748961211", "8ad190feef8bc7744f6b3f155661f5a1c3389ab5", "8434d69ace5d5484cbce4e590bc62d17b467d4dd", "bff784498bffa84e0600361c2e864b852d85029d", "7c889b839e99316f749c4d4bff45ccdd7dbd46ef", "51a179eec117ec2105023c3e906433a7301140fd", "7dbf4c5424c676f7e04010a0a6678cab40e71332", "235b9c8f10461a95398e169ecb91cf3e223d3350", "119f99dd30e725040b5e5633ece9962de71f9d84", "43faccfff448d78e108f9bb7643bd024fb61ebbb", "364fad95dc43e62639b0f41e4e24bf31004c420e", "3e68d730b678eb4994e46fb5b4edeaa2c5740ad8", "605ce5fe2f5b1e478a2528f1c514b77fc2607577", "036e20936fc1e452509c0b64196a0e937ab733be", "21fe7561d76e27c5b537ebe841c1effc1bb16b0a", "36222f8eb2ccf21ca345e15186cea64506581543", "4e60a72a0b58f62b405ab5eb43b184f5fff77710" ], "paperAbstract": "Deciding query equivalence is an important problem in data management with many practical applications. Solving the problem, however, is not an easy task. While there has been a lot of work done in the database research community in reasoning about the semantic equivalence of SQL queries, prior work mainly focuses on theoretical limitations. In this paper, we present COSETTE, a fully automated prover that can determine the equivalence of SQL queries. COSETTE leverages recent advances in both automated constraint solving and interactive theorem proving, and returns a counterexample (in terms of input relations) if two queries are not equivalent, or a proof of equivalence otherwise. Although the problem of determining equivalence for arbitrary SQL queries is undecidable, our experiments show that COSETTE can determine the equivalences of a wide range of queries that arise in practice, including conjunctive queries, correlated queries, queries with outer joins, and queries with aggregates. Using COSETTE, we have also proved the validity of magic set rewrites, and confirmed various real-world query rewrite errors, including the famous COUNT bug. We are unaware of any prior tool that can automatically determine the equivalences of a broad range of queries as COSETTE, and believe that our tool represents a major step towards building provably-correct query optimizers for real-world database systems.", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p51-chu-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f331/8491a55590e00dfe45d68708f515822e343a.pdf", "s2Url": "https://semanticscholar.org/paper/f3318491a55590e00dfe45d68708f515822e343a", "sources": [ "DBLP" ], "title": "Cosette: An Automated Prover for SQL", "venue": "CIDR", "year": 2017 }, "f3563870dbfc1b10769d6640fc4e6b419b046f6b": { "authors": [ { "ids": [ "1697830" ], "name": "Ning Ding" }, { "ids": [ "2981910" ], "name": "Y. Charlie Hu" } ], "doi": "10.1145/3064176.3064206", "doiUrl": "https://doi.org/10.1145/3064176.3064206", "entities": [ "Android", "Asynchronous I/O", "Black box", "Diff utility", "Graphics", "Graphics processing unit", "Holism", "Image stitching", "Information flow (information theory)", "Interaction", "Mobile app", "OpenGL", "Rendering (computer graphics)", "Smartphone", "Software bug", "User interface", "Visual effects" ], "id": "f3563870dbfc1b10769d6640fc4e6b419b046f6b", "inCitations": [], "journalName": "", "journalPages": "359-373", "journalVolume": "", "outCitations": [ "475323cde4293723ac53fc3a8a3749bb82432268", "75ea8cef3730ee258571c7fefca4fe7036611c36", "491bbfcc4d5b8d322b312fb18bbc5d9f7bc5b2d4", "1b1ff7f94430f47d109d0deb6856c98d9df518e8", "24dcf23f4aeb146b1323b8e9f559f17f6282fdd7", "2166ed56495a8e528f891067a138f63913e9fb00", "5b9f54be658fe5e42448bbcf3a33fff9532cc0b1", "21c2189714ffc75ce5b2f2ec76616526cdff7d59", "a0b1b8ee4a9e6ae68ce6a712ad0a66ddb4a12117", "1e126cee4c1bddbfdd4e36bf91b8b1c2fe8d44c2", "265f9fe11ab32cc66b9df3823d2cd756295c5fa7", "5d323fd52e3a4cf93c5a0d951822d67cc8467fa4", "24e6f34e499634393416ea09c1aadd37ec9e8542", "47cdefebd5534d1d8c5d0f8061b482dbcd656e63", "84fdccb41f31247dfb86aadba6f2b4d75538767f", "0b369ac8bd9e0c618e4ea3568ebaa944f460c454", "456e30db9b437eb073064eac5b2a1962e464c2af", "5a51a18a63fc57cd9ef206bcfdb303933c2bcfb9", "8d78b035469b2c0c8238c2b4c85460b04aa6d4ef" ], "paperAbstract": "Graphics is one of the major energy drain sources in smartphone apps. To optimize the app graphics energy, however, developers face the challenge of highly complex graphics rendering process, which involves multiple system layers including the app, the framework, the GPU, and the asynchronous interactions among them. Current diagnostic tools can profile the resource usage from certain layers, but fall short in stitching together profiling information across all the layers which is needed to provide developers with the visual effect-energy tradeoff at the app source-code level.\n In this paper, we design and implement a holistic graphics energy diagnosis tool, GfxDoctor1, that helps developers to systematically diagnose energy inefficiencies in app graphics at the app source-code level, by precisely quantifying (1) the visual effect of each UI update, and (2) the aggregate energy drain spent in traversing the entire frame rendering stack due to each UI update. GfxDoctor overcomes three challenges faced in deriving per-UI-update visual effect and energy accounting, asynchrony across system layers, UI update batching, and \"black-box\" GPU, with two key techniques -- lightweight view-frame-ID-based information flow tracking, and OpenGL record-and-replay plus frame diffing. We show the effectiveness of GfxDoctor by profiling a randomly sampled set of 30 popular Android apps which reveals three types of graphics energy bugs happening in 8 out of the 30 apps. Removing these bugs reduces the app energy drain by 46% to 90%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3064176.3064206" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3563870dbfc1b10769d6640fc4e6b419b046f6b", "sources": [ "DBLP" ], "title": "GfxDoctor: A Holistic Graphics Energy Profiler for Mobile Devices", "venue": "EuroSys", "year": 2017 }, "f3729b0f8640411d89b5f0179ea3735443739853": { "authors": [ { "ids": [ "3831282" ], "name": "Josh Tobin" }, { "ids": [ "38960812" ], "name": "Alexander Breuer" }, { "ids": [ "1722735" ], "name": "Alexander Heinecke" }, { "ids": [ "3191398" ], "name": "Charles Yount" }, { "ids": [ "2305796" ], "name": "Yifeng Cui" } ], "doi": "10.1007/978-3-319-58667-0_8", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_8", "entities": [ "Knights", "Xeon Phi" ], "id": "f3729b0f8640411d89b5f0179ea3735443739853", "inCitations": [ "0ec4d0465cc53dc705d432274a52cda3cf6e030a" ], "journalName": "", "journalPages": "139-157", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_8" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3729b0f8640411d89b5f0179ea3735443739853", "sources": [ "DBLP" ], "title": "Accelerating Seismic Simulations Using the Intel Xeon Phi Knights Landing Processor", "venue": "ISC", "year": 2017 }, "f3b9114447a2c1e5bcadb4d0f713765bcf1834f2": { "authors": [ { "ids": [ "11833602" ], "name": "Yingchao Huang" }, { "ids": [ "1678390" ], "name": "Dong Li" } ], "doi": "10.1109/CLUSTER.2017.42", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.42", "entities": [ "Addressing mode", "Benchmark (computing)", "Computer memory", "Graphics processing unit", "Mathematical model", "Memory hierarchy", "Performance prediction", "Queuing delay", "Replay attack", "X86" ], "id": "f3b9114447a2c1e5bcadb4d0f713765bcf1834f2", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "166-177", "journalVolume": "", "outCitations": [ "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "1eb3992563b7b9fbf0c1da57d62f47220e6af5d5", "34e2b75fd5717029fc9da92dd6160eb6e2d19ad9", "0ef82bbfdec840663026dc2fa9e3db111add7efa", "c3c244e6a07810e738c8eb3c10d652b7da0267d6", "04ec5964a08a2ad62a30fea1fb9eff1e484a4524", "25296a1ee792b8709e037aa3da7ea156c41f5ccf", "00156e79606084497789662dfaf59c3b54a10722", "35b1b5a69d7882053aa35e7463ceb903733a2cce", "85398d5f19157c91bf00da3d36210e72d57887e4", "339b09ff3d328d5ec6542a6eaa57045d4fe61c5c", "5ab997b6ddb66fde2c9ea0cffcb6b869e2b74409", "2bedfffe72f27ab4c8ac67d2c17bda4ea23df748", "a5eb8900450908f3e245c3740420af4cb2348ef8", "75489469044a0bd8dd6a2d785873b48128bbf845", "2a660e81e6501ec3489d962fe87448ecf277237f", "2b85f7d3cc58dad3cf913e4a85b4e7108dc2ebbc", "23177452df15b652dd54a59324502b92c99687a7", "55220bc99ffe36591a4b31a2ee9e40620381e0ca", "702ad24deb683795e03ff1a79e96afb73cb4d988", "1401df37cc3fc78f26570d601fd123f17646b2d2", "2d6f002477015469075954c6748a1a85af352c94", "2954071739e1df663ee207e130465cb1789ae982", "2ad29134da93304e72dd047ca99ec6cfef2b4990", "d9b47764db442dc1bc1dad1570c85367002afe4a", "40718dab3e261c2456c3576d15dd0105f1e2e4e2", "32d40133459c318bc66aa781b6ce3c1921c0c13a", "548c6ffefd409f77fe24c3482257e03be2cb5617" ], "paperAbstract": "A heterogeneous memory system (HMS) consists of multiple memory components with different properties. GPU is a representative architecture with HMS. It is challenging to decide optimal placement of data objects on HMS because of the large exploration space and complicated memory hierarchy on HMS. In this paper, we introduce performance modeling techniques to predict performance of various data placements on GPU. In essence, our models quantify and capture implicit performance correlation between different data placements. Given the memory access information and performance of a sample data placement, our models predict performance for other data placements based on the quantified correlation. We reveal critical performance factors that cause performance variation across data placements. Those factors include instruction replay, addressing mode, hardware queuing delay of memory requests, off-chip memory access latency, and caching effects. Those factors, which are often not sufficiently considered in the existing performance models, can significantly impact modeling accuracy. We introduce a series of techniques to model those factors. We extensively evaluate our models with a variety of benchmarks with various data placements. Our models are able to quantitatively predict the benefit or performance loss of data placements.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.42" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3b9114447a2c1e5bcadb4d0f713765bcf1834f2", "sources": [ "DBLP" ], "title": "Performance Modeling for Optimal Data Placement on GPU with Heterogeneous Memory Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "f3c7404f534020e2b01dbfa478cff45c60618000": { "authors": [ { "ids": [ "2445900" ], "name": "Berkeley R. Churchill" }, { "ids": [ "37529085" ], "name": "Rahul Sharma" }, { "ids": [ "32389243" ], "name": "J. F. Bastien" }, { "ids": [ "4689402" ], "name": "Alexander Aiken" } ], "doi": "10.1145/3037697.3037754", "doiUrl": "https://doi.org/10.1145/3037697.3037754", "entities": [ "C standard library", "Comparison of programming languages (string functions)", "Correctness (computer science)", "Fault detection and isolation", "Google Chrome", "Google Native Client", "Operating system", "Sandbox (computer security)", "Speedup", "Superoptimization" ], "id": "f3c7404f534020e2b01dbfa478cff45c60618000", "inCitations": [ "5da199a2b340da3bec5aced418d7e52ccabda182", "3aae8a65228e86e907e095593cfb2dd85ea422ee" ], "journalName": "", "journalPages": "313-326", "journalVolume": "", "outCitations": [ "b9addc8ce998f6892120c2c8b23ae183312bfa6c", "d8b4164fef65ffc7082a3c95b0a706e5c3aa38f9", "49d50f1d0bb2a03388f3cb60f9f1ca01974d371c", "3542df8c401e8a1734992c95f4eede537efdc1e9", "090414388514998416f3d503fac27354a03c4738", "04ef59d39d7c47ed8d471260d0e6ae52f4e820e5", "2b976cf5ef035b653cffa127014ed62efeb47df0", "a207c57a9e96ac5ab719c9741c98b25d9788ba02", "1f404efe0953ada483a9b7f14e9c082558d196ac", "7fa71e17142563013365daa8526a1323f123961a", "1e52a2e8535509ab0111c0c5d89a88d3bb10b34c", "447adbe8ceee715c12b1e8707814b1cabcf5a21d", "97c1177a1702e97a30f860baf15023c1ce172549", "1796693cac1375a236e657e6115d5b21d84a69db", "02e5b7aa2c920d6cd251e954a3dd314a174164a2", "2c21f9488edfb2586327528bb59461a41363fc42", "00a7d4442ab22d4a24a3814367267a4f6d53529f", "02b72a79f17d7d86bb7b1d1e8ff8f659ca2bb1f0", "01334117dc8bb99b0ff884d6c2984f79c23f5deb", "252e15047f3cb2cd2c26c21cf74fe2e7038a3f40", "2fd6959aa2d2090ae14d3136a7a9499778eea69f", "238be0efe497fc297013ae16109fbbd2ee3d9733", "0641050837116522c610d7e7128ea3f2034e7531", "01ee7db67463b53143e0b2c126363ed6d3c8a532", "6d12aea56165acf3715e2c82b5f560e48359366d", "0856f6f40b889dba559f19654834114e9f469760", "134f4e5946efe8af306fda71d9d47e1e81dbc27c", "00c08861cfb438d5ff209dfadc2d839641cd3ca9", "58ee997cf82f4a7547857a74863ffdec077ee190", "13f63d5dc563ec42d8bfedd6b65d762e9aba68cb", "011f7da0095ac8c0d4477eeda2728e5f80a35767", "26b9001cce4a7f2e838ef99d0e7593b18553a7e0", "f8e4812bbb131c09a641e3e55c3f392e032679eb", "b00672fc5ff99434bf5347418a2d2762a3bb2639", "8a6d19bea6f04e2bf2277c7ccd61becdf2bb48e7", "50b8e5fc4f4d9f1c2f84efa04ecab75d0eeef8fb", "2c18ec63b49ba81b4eb2b67ba30a9607b0662979", "0c6d97fbc3c753f59e7fb723725639f1b18706bb", "85e7266af5751f7e1c5e26b03e71e2f92af7b8c3", "3960dda299e0f8615a7db675b8e6905b375ecf8a", "5e74f5ba5c7174e3ecf6ab2581a5e745bb69dd54", "2c20f8cf078fe1fd1543a58b3c34cc079f5252c7", "1be37ab7b64c78351e20952d4261033328ecd69c", "1a4c7185626d0f2acebf7f05a29fa2073a2fa841" ], "paperAbstract": "Software fault isolation (SFI) is an important technique for the construction of secure operating systems, web browsers, and other extensible software. We demonstrate that superoptimization can dramatically improve the performance of Google Native Client, a SFI system that ships inside the Google Chrome Browser. Key to our results are new techniques for superoptimization of loops: we propose a new architecture for superoptimization tools that incorporates both a fully sound verification technique to ensure correctness and a bounded verification technique to guide the search to optimized code. In our evaluation we optimize 13 libc string functions, formally verify the correctness of the optimizations and report a median and average speedup of 25% over the libraries shipped by Google.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037754", "http://theory.stanford.edu/~aiken/publications/papers/asplos17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3c7404f534020e2b01dbfa478cff45c60618000", "sources": [ "DBLP" ], "title": "Sound Loop Superoptimization for Google Native Client", "venue": "ASPLOS", "year": 2017 }, "f3d967a7b242c4c56f7603095a89687421e4df6b": { "authors": [ { "ids": [ "2274395" ], "name": "Yanjie Fu" }, { "ids": [ "2045628" ], "name": "Guannan Liu" }, { "ids": [ "2039569" ], "name": "Mingfei Teng" }, { "ids": [ "1682418" ], "name": "Charu C. Aggarwal" } ], "doi": "10.1145/3097983.3098044", "doiUrl": "https://doi.org/10.1145/3097983.3098044", "entities": [ "Computer user satisfaction", "Crowdsourcing", "Experiment", "Feature selection", "FlipKey", "Integer programming", "Learning to rank", "Loss function", "Neural coding", "Optimization problem", "Personalization", "Program optimization", "Recommender system" ], "id": "f3d967a7b242c4c56f7603095a89687421e4df6b", "inCitations": [], "journalName": "", "journalPages": "165-173", "journalVolume": "", "outCitations": [ "4e7034ae30eae61db772d1b8f872ab6c90df1570", "34d5e5f0f1595d7e9c87952baa177c07f82b02ca", "77008e0911bd5a5ed5f51310b19254220b835dd3", "4a394e2dcef843b1392605e8f9f37617ee6f113d", "921470d44320a5dc4c144278cef1dc157b7b81f4", "03553fd8365bb28fd11e4acb5a732d19478dfddc", "75e85c2e90b0abb17ae6445516a49ac05c1dbf0f", "5c9dc54f9dba91c9cea5732b82b874be1239ec36", "3c9b3d653047e200801a47412e06ddb495cda23d", "7e13088d568cfda45b4b042d44876e03c65ab68e", "a21692737d2c6837db182491b8f9ef19516a5c39", "a47adcc6f6428a57bcbdc1d98b9a20cd7d75e0b2", "a489d95fb930401c1f4b7d92bb139d271d49abbf", "c802c7918e5a5b9cbb2bd9d397518a255ff1465e", "617279b61a0a2b1e60d73dd799068dfdb7de15cd", "5a86fcb2b9b204a377f9a48baf338c48f8012f83" ], "paperAbstract": "Due to the sparseness of quality rating data, unsupervised recommender systems are used in many applications in Peer to Peer (P2P) rental marketplaces such as Airbnb, FlipKey, and HomeAway. We present an integer programming based recommender systems, where both accommodation benefits and community risks of lodging places are measured and incorporated into an objective function as utility measurements. More specifically, we first present an unsupervised fused scoring method for quantifying the accommodation benefits and community risks of a lodging with crowd-sourced geo-tagged data. In order to the utility of recommendations, we formulate the unsupervised P2P rental recommendations as a constrained integer programming problem, where the accommodation benefits of recommendations are maximized and the community risks of recommendations are minimized, while maintaining constraints on personalization. Furthermore, we provide an efficient solution for the optimization problem by developing a learning-to-integer-programming method for combining aggregated listwise learning to rank into branching variable selection. We apply the proposed approach to the Airbnb data of New York City and provide lodging recommendations to travelers. In our empirical experiments, we demonstrate both the efficiency and effectiveness of our method in terms of striving a trade-off between the user satisfaction, time on market, and the number of reviews, and achieving a balance between positive and negative sides.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098044" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3d967a7b242c4c56f7603095a89687421e4df6b", "sources": [ "DBLP" ], "title": "Unsupervised P2P Rental Recommendations via Integer Programming", "venue": "KDD", "year": 2017 }, "f3dec4cb3741bf3b88ab547e28fb8b37e371d72f": { "authors": [ { "ids": [ "2265023" ], "name": "Do Le Quoc" }, { "ids": [ "2042672" ], "name": "Ruichuan Chen" }, { "ids": [ "3025359" ], "name": "Pramod Bhatotia" }, { "ids": [ "2314032" ], "name": "Christof Fetzer" }, { "ids": [ "2809994" ], "name": "Volker Hilt" }, { "ids": [ "1730027" ], "name": "Thorsten Strufe" } ], "doi": "10.1145/3135974.3135989", "doiUrl": "https://doi.org/10.1145/3135974.3135989", "entities": [ "Algorithm", "Apache Flink", "Apache Spark", "Approximate computing", "Approximation algorithm", "Baseline (configuration management)", "Computation", "Machine code", "Reservoir sampling", "Secure copy", "Service control point", "Speedup", "Stream processing" ], "id": "f3dec4cb3741bf3b88ab547e28fb8b37e371d72f", "inCitations": [ "0a92b75415121f5f9fed192c97b48959451a9072", "330de12472ed98642e1ed28944ff94d3d6eee8de", "df96114c34c1cb9aa8c1237ad710adfab3c269b6", "3b55ccc328412988d6af3bc76f5070d68d72ca1a", "baa3f0306e01a16dad20a45b50c5b7a656e8f14b" ], "journalName": "", "journalPages": "185-197", "journalVolume": "", "outCitations": [ "0ef88610f4382a9c6e5c8020ab6ccf3dde0fb3eb", "df96114c34c1cb9aa8c1237ad710adfab3c269b6", "37e0d25940bd49022c41e63909532acd88eb16b9", "17a1bdf365d125ff6667ab4524fa620e34a13b11", "0b2a811c6272298f34f21aa52162d8c7816f4206", "ec78f31c4d43c8de4ccd66a73778ff0913375a96", "0456a5c3b2001465d05e84ce6786ef200184de65", "32334506f746e83367cecb91a0ab841e287cd958", "500e9737ddd9257ef2ec95b0e3bf798e86e60378", "9bcc0099f0d34c391ca1a3c5220cb0b3b33c4183", "4c40ba88b4c895b5c4d94fd8024e87f3a6b2d602", "9d88a4c2a971545c3546be1b16cf030ed5781947", "0541d5338adc48276b3b8cd3a141d799e2d40150", "4607d232f40da417bd852d9f17832b837ced7fbc", "45b50ed3d33633978964893b3a58ca369f35bf7e", "162e6c06bbe83daf74b4fb849367f123b4d65850", "0608d9937c074520cdc93cc444cc1c77039c5332", "cd642576ce8502b533e229b537f9ffbe9254aef6", "3b034ee536cbf8c0152c8eae29b74a821d958976", "22dfc02134728131f1f1bf2d8c2329727cf8d12a", "0558c94a094158ecd64f0d5014d3d9668054fb97", "4a42f1599d7e2d1a5f74651f4ba21386f9afdb31", "3af15292037d1fa634662f9acec89e89d0e21656", "00aa614734a26a19b09a0a3bdee2adc77bdac5e4", "8bd231fd5382cf2f128314af40390b21e4b41a6d", "da01f7fcc5c7eeba75bc09a41fdd946e65210090", "f060942169f56e0aa8f3253047fac49b7c8eff2d", "209300e7bc7392974ae771afe6a0c3da577584da", "baa3f0306e01a16dad20a45b50c5b7a656e8f14b", "1e3449bc19cbd12f20b7084592ae304055248262", "4e525abd3a0659a3607a0b3ba4232d2584a21b6d", "3d5f3bedca7828899b81448e9c33717dd55c36c2" ], "paperAbstract": "Approximate computing aims for efficient execution of workflows where an approximate output is sufficient instead of the exact output. The idea behind approximate computing is to compute over a representative sample instead of the entire input dataset. Thus, approximate computing --- based on the chosen sample size --- can make a systematic trade-off between the output accuracy and computation efficiency.\n Unfortunately, the state-of-the-art systems for approximate computing primarily target batch analytics, where the input data remains unchanged during the course of computation. Thus, they are not well-suited for stream analytics. This motivated the design of StreamApprox--- a stream analytics system for approximate computing. To realize this idea, we designed an online stratified reservoir sampling algorithm to produce approximate output with rigorous error bounds. Importantly, our proposed algorithm is generic and can be applied to two prominent types of stream processing systems: (1) batched stream processing such as Apache Spark Streaming, and (2) pipelined stream processing such as Apache Flink.\n To showcase the effectiveness of our algorithm, we implemented StreamApprox as a fully functional prototype based on Apache Spark Streaming and Apache Flink. We evaluated StreamApprox using a set of microbenchmarks and real-world case studies. Our results show that Spark- and Flink-based StreamApprox systems achieve a speedup of 1.15×---3× compared to the respective native Spark Streaming and Flink executions, with varying sampling fraction of 80% to 10%. Furthermore, we have also implemented an improved baseline in addition to the native execution baseline --- a Spark-based approximate computing system leveraging the existing sampling modules in Apache Spark. Compared to the improved baseline, our results show that StreamApprox achieves a speedup of 1.1×---2.4× while maintaining the same accuracy level.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135989" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3dec4cb3741bf3b88ab547e28fb8b37e371d72f", "sources": [ "DBLP" ], "title": "StreamApprox: approximate computing for stream analytics", "venue": "Middleware", "year": 2017 }, "f3f5c38525e58c094880db73643add9342c320f2": { "authors": [ { "ids": [ "2399462" ], "name": "Nicolas Laurent" } ], "doi": "10.1145/3136014.3136036", "doiUrl": "https://doi.org/10.1145/3136014.3136036", "entities": [ "Bottom-up parsing", "Design pattern", "Error message", "Parsing", "Parsing expression grammar", "Redshift", "Shift-reduce parser", "Software design pattern" ], "id": "f3f5c38525e58c094880db73643add9342c320f2", "inCitations": [], "journalName": "", "journalPages": "38-42", "journalVolume": "", "outCitations": [ "303b122551f37383a43acc1229f6e57dcde20f40", "4e37893e503466fe3673ae117f6d7c364c21779a", "2bed707e5a3f42294988cdcfe7ff15b839b6bcd8", "4a022cefcf1c3980c18ded9e542229565f010eed", "6669f223fba59edaeed7fabe02b667809a5744d9", "9269893f6ce2a5571c058019e95143ef85cbe1a1" ], "paperAbstract": "Red Shift is a new design pattern for implementing parsers. The pattern draws ideas from traditional shift-reduce parsing as well as procedural PEG parsers. Red Shift parsers behaves like shift-reduce parsers, but eliminate ambiguity by always prioritizing reductions over shifts. To compensate the resulting lack of expressivity, reducers are not simple reduction rules but full-blown procedures written in a general-purpose host language. I found many advantages to this style of parser. In particular, we can generate high-quality error messages more easily; and compose different style of parsers. I also speculate about how Red Shift parsers may improve partial compilation in the context of an IDE.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136036" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f3f5c38525e58c094880db73643add9342c320f2", "sources": [ "DBLP" ], "title": "Red Shift: procedural shift-reduce parsing (vision paper)", "venue": "SLE", "year": 2017 }, "f434db944348bbc2bffe1b44122740682163b69e": { "authors": [ { "ids": [ "2595046" ], "name": "Naama Ben-David" }, { "ids": [ "1717462" ], "name": "Guy E. Blelloch" } ], "doi": "10.1145/3087801.3087828", "doiUrl": "https://doi.org/10.1145/3087801.3087828", "entities": [ "Algorithm", "Analysis of algorithms", "Asynchronous I/O", "Backoff", "Communications protocol", "Memory address", "Shared memory", "Time complexity", "With high probability" ], "id": "f434db944348bbc2bffe1b44122740682163b69e", "inCitations": [ "f8b4eca178b95047647db1fef0af9023c0ee4bc0" ], "journalName": "", "journalPages": "53-62", "journalVolume": "", "outCitations": [ "946abaa6ceb50439db1811acf759828b59d46e6f", "866189f6ff951c0e4b2ddcdf328692a74d678ef5", "4bd009ce33cbea440b4f464a33416687de5f9573", "98ebcc174595d732ed719eb77c55bf35d7cb0637", "429e313d33a82bf086b69d47eee735450cbeb4ae", "0024f59a1fffb81c134fecf21e53261dbd29c2b7", "9a925d73aae2b8c1aa6b4eb2a90d2d93c8d5e3c4", "19b17ed55736466e0e14104372ba516049e7e7a4", "a517253fa6459d052fdaeda335e7dce2c1040a34", "0a47fae7c4571958c42b6185430beeb3309889bd", "039db2ee81b2b267b8a5366dd1bfa21650d75117", "5519d807a51f721a78b63bbb2298fb2bc4951d7c", "087a14f9f750435b3a46556c1c819ecf40a8ddac", "066907935b61c73d1e408ee01ae31a7844336353", "be9178bbe90d871962caa14fe310ffcdcee1d50b", "9038d2568350bd96cbc95889b24e9c4dd5cf6a0f", "0e41c57137ed2e59f2a11ec1b26a50f691f33310", "29a402cd8922e08072c0404a3080a0447e9a2fb2", "47b5da30deeea0bb1ecac41a5891d72264a8729a", "03193d30be0721877b5e0bee31d16d0068544c6f", "1c808644694dd6e287018491ca14a060fab1a6bd", "4f313902ddda555e3393eac1d64695a044b6abfd", "0157dc0404cb6b31a1beef7e6855980220849654", "1c9f351406bf057ca76d1e4b40ad7a4696d62ed4", "38256fb77eabac90741049a243bc9e2624dc35a2" ], "paperAbstract": "Randomized backo\u0082 protocols have long been used to reduce contention on shared resources. \u008cey are heavily used in communication channels and radio networks, and have also been shown to greatly improve the performance of shared memory algorithms in real systems. However, while backo\u0082 protocols are well understood in many se\u008aings, their e\u0082ect in shared memory has never been theoretically analyzed. \u008cis discrepency may be due to the di\u0081culty of modeling asynchrony without eliminating the advantage gained by local delays. In this paper, we introduce a new cost model for contention in shared memory. Our model allows for adversarial asynchrony, but also provides a clear notion of time, thus enabling easy calculation of contention costs and delays. We then consider a simple use case in which n processes try to update a single memory location. Using our model, we \u0080rst show that a na\u0131\u0308ve protocol, without any backo\u0082, requires \u03a9(n3) work until all processes successfully update that location. We then analyze the commonly used exponential delay protocol, and show that it requires \u0398(n2 logn) work with high probability. Finally, we show that the exponential delay protocol is suboptimal, by introducing a new backo\u0082 protocol based on adaptive probabilities and showing that, for the same use case, it requires only O (n2) work with high probability. ACM Reference format: Naama Ben-David and Guy E. Blelloch. 2017. Analyzing Contention and Backo\u0082 in Asynchronous Shared Memory. In Proceedings of PODC\u201917, July 25-27, 2017, Washington, DC, USA, , 10 pages. DOI: 10.1145/3087801.3087828", "pdfUrls": [ "http://doi.acm.org/10.1145/3087801.3087828", "http://www.cs.cmu.edu/~nbendavi/podc2017.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f434/db944348bbc2bffe1b44122740682163b69e.pdf", "s2Url": "https://semanticscholar.org/paper/f434db944348bbc2bffe1b44122740682163b69e", "sources": [ "DBLP" ], "title": "Analyzing Contention and Backoff in Asynchronous Shared Memory", "venue": "PODC", "year": 2017 }, "f486ceb5604c25000fbbc5a37f7ce716339f3788": { "authors": [ { "ids": [ "1678662" ], "name": "Yang Li" }, { "ids": [ "1713096" ], "name": "Fang Liu" }, { "ids": [ "1730284" ], "name": "Nong Xiao" }, { "ids": [ "7216253" ], "name": "Jianqiang Zeng" }, { "ids": [ "8301799" ], "name": "Lingyu Zhu" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.12", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.12", "entities": [ "B+ tree", "B-tree", "CPU cache", "Central processing unit", "Computer data storage", "Copy-on-write", "Data structure", "Durability (database systems)", "In-place algorithm", "Log-structured file system", "Mathematical optimization", "Memory footprint", "Non-volatile memory", "Persistence (computer science)", "Persistent data structure", "Program optimization", "Run time (program lifecycle phase)", "Special number field sieve", "Throughput" ], "id": "f486ceb5604c25000fbbc5a37f7ce716339f3788", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "89-97", "journalVolume": "", "outCitations": [ "10419c6f4aa50a36ed0b103c9ddb9aec45f133fe", "1d58f59e34c022951920a97ae9733b8f3683f4d1", "a95436fb5417f16497d90cd2aeb11a0e2873f55f", "d0b6d2075a653d60452b6df0fced4ee0ae093dd2", "a84a4fd3ab2d7e75b350ed75cb9ec6d1fbacaac4", "f4ed7fb35916bd0d36a53198384bb0ed2ff34c3f", "088e3e939ad234b6fdd0e321290fb26937dc2553", "5bb770af1973f929e8622f17ddf378d439245144", "9183cde02e4306828089fb8adae74736a9df3ceb", "ef387050a4d0b8e870464514c2311413e6c4ac95", "05a1357946de5eca42a477b7b268db4944219a2e", "d04957ae69caf43707b13fa833e50119724688f1", "544c1ddf24b90c3dfba7b1934049911b869c99b4", "0c60a639dc9cd8014f685ec986c29bf55a10bb5a", "24724ad8962a9e04eb496fddaefe9708f6960601", "87eb6044798792bb4fffd2dcb477bc8ad0982268", "2c84daae142c5b0f4ca6a6772ca7e8cac7d7afca", "9858251a88afc29fa9fdb8234d998dcdf182f144", "b1ce55ed6cd7fb989787d016e2783e49a66ac431", "0b0a8fb95e3331cacfe58f8938c3f7134a4c70e1", "917894256919b7a85f7c4f11c096ef99abb35d13", "1cac40347773f012d908ac5ef578da940dd840ee", "ccd9685f9041a896d14dc095221b7673e6ddd121", "2621b8f63247ea5af03f4ea0e83c3b528238c4a1", "3e8e43f61b3af63c6a8bb981b5d085c8afb1b9e2", "2e595e9e72d950681be0d6d12e983cdaa0c7cb4e" ], "paperAbstract": "Emerging non-volatile main memories (NVMMs) technologies can provide both data persistence and high performance at memory level. The design of existing file systems for NVMM has to handle the data durability problem between CPU cache and NVMM. However, most NVMM-aware file systems could not meet the strong data consistency requirement of applications with data structures, e.g. B-Tree. Traditional techniques, such as copy-on-write and journaling, delivering data consistency, have defects of write amplification and data copy, respectively. In this paper, we explore SNFS, one log-structured file system with optimization of data consistency based-on non-volatile main memory, providing high performance for applications with small writes. Specifically, SNFS adopts a small data-log mechanism to journal fine-grained data writes. It also uses in-place writes to minimize memory footprint for small data updating and accelerates data block locating with hashing strategy. Finally, we evaluate SNFS's performance with several write-intensive workloads, and experimental results show that SNFS improves the system throughput by up to 23 times compared to state-of-the-art file systems and reduces the execution time by up to 65.5%.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.12" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f486ceb5604c25000fbbc5a37f7ce716339f3788", "sources": [ "DBLP" ], "title": "SNFS: Small Writes Optimization for Log-Structured File System Based-on Non-Volatile Main Memory", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "f4cf3c8b8bbc0e98be827b90628a7f2a9ab413dd": { "authors": [ { "ids": [ "1698586" ], "name": "Jie Zhang" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1145/3050748.3050765", "doiUrl": "https://doi.org/10.1145/3050748.3050765", "entities": [ "Amazon Elastic Compute Cloud (EC2)", "CMA-ES", "Docker", "Full virtualization", "GIOVE", "Hypervisor", "InfiniBand", "Locality of reference", "Loopback", "OpenVMS", "Operating-system-level virtualization", "Shared memory", "Supercomputer", "Virtual machine", "X86 virtualization" ], "id": "f4cf3c8b8bbc0e98be827b90628a7f2a9ab413dd", "inCitations": [ "8a702304f6964ad5abdb1c4b19e6a645738a4474", "0351f22135e61de52250654b1f8b277cd8c7a173", "01d7b1187d8593983181d18c357ffbed9c6ac8ac" ], "journalName": "", "journalPages": "187-200", "journalVolume": "", "outCitations": [ "0fc0910aba6a5690059843fd72e99c871a16a577", "3b7e2038ec22cf637df70c833d473b0f3b43713a", "3e072004786e19f4f3d8918fd7483c9e12e4d4ef", "71dec883ad8effe7d6075512138625080d32ab47", "03ed30028164bd7b5215da3fb431f4402071a49f", "0007f817593d82a859f2aa2ba1d1ee7e9199e190", "54cff2c17f8c24508ef82aa8ef2d7ed3fcc5db97", "192fec9d330de17828caba1d2a44983ca414c900", "6678b17fc8758efea8d32c2d47f9924f8a0cdc6d", "59d45d685e35f5a84768c029ea09b9c48765251e", "0abc3e83ccd6e685f8d0299f24f03ae28f4c2459", "62b757b1a924f3386c33b8a988327e3749ab8a54", "010c1b6fad2e47868b22f3787e2f355875f10cec", "3135e5342bbde77d7c734456dae974e2bb5928e3", "aaee60074480179a69f1891a02698632d788613d", "855b6f36f8b7da5451dc853f9bf0e8babcf25eb3", "0d9aea55a54ccc6ab64995d70bf6ae464af25f0d", "01da9f866f2bf8a7c9319994075c0f7ea199e1ff", "a7298ad92e7f58242f3e43007fe12389e19b29a3", "97f355e50deffa3416b34dba7f2e3ab505ac8b2d" ], "paperAbstract": "Hypervisor-based virtualization solutions reveal good security and isolation, while container-based solutions make applications and workloads more portable and distributed in an effective, standardized and repeatable way. Therefore, nested virtualization based computing environments (e.g., container over virtual machine), which inherit the capabilities from both solutions, are becoming more and more attractive in clouds (e.g., running Docker over Amazon EC2 VMs). Recent studies have shown that running applications in either VMs or containers still has significant overhead, especially for I/O intensive workloads. This motivates us to investigate whether the nested virtualization based solution can be adopted to build high-performance computing (HPC) clouds for running MPI applications efficiently and where the bottlenecks lie. To eliminate performance bottlenecks, we propose a high-performance two-layer locality and NUMA aware MPI library, which is able to dynamically detect co-resident containers inside one VM as well as detect co-resident VM inside one host at MPI runtime. Thus the MPI processes across different containers and VMs can communicate to each other by shared memory or Cross Memory Attach (CMA) channels instead of network channel if they are co-resident. We further propose an enhanced NUMA aware hybrid design to utilize InfiniBand loopback based channel to optimize large message transfer across containers when they are running on different sockets. Performance evaluations show that compared with the performance of the state-of-art (1Layer) design, our proposed enhance-hybrid design can bring up to 184%, 81% and 12% benefit on point-to-point, collective operations, and end applications. Compared with the default performance, our enhanced-hybrid design delivers up to 184%, 85% and 16% performance improvement.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050765" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f4cf3c8b8bbc0e98be827b90628a7f2a9ab413dd", "sources": [ "DBLP" ], "title": "Designing Locality and NUMA Aware MPI Runtime for Nested Virtualization based HPC Cloud with SR-IOV Enabled InfiniBand", "venue": "VEE", "year": 2017 }, "f4e42c15a7a35a198a04a74cbdbe19d360a2d00c": { "authors": [ { "ids": [ "38872388" ], "name": "Wei Han" }, { "ids": [ "27998850" ], "name": "Daniel Mawhirter" }, { "ids": [ "22401706" ], "name": "Bo Wu" }, { "ids": [ "28028789" ], "name": "Matthew Buland" } ], "doi": "10.1109/PACT.2017.41", "doiUrl": "https://doi.org/10.1109/PACT.2017.41", "entities": [ "Algorithm", "Bit array", "Central processing unit", "Graph (abstract data type)", "Graphics processing unit", "Out of memory", "Overhead (computing)", "Rename (relational algebra)", "Shared memory", "TRAVERSE", "Time complexity", "Vertex (computer graphics)", "Vertex (geometry)", "X-Stream Network" ], "id": "f4e42c15a7a35a198a04a74cbdbe19d360a2d00c", "inCitations": [ "1f0572f47be66c2c0fbf3fd0f98f25e5b5f88361" ], "journalName": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "journalPages": "233-245", "journalVolume": "", "outCitations": [ "015ecc71cf4efce11dda83942f84b8e27692a402", "993e21ed73fc39048a42d06855bc85236ffd1063", "191fd33f17c2a79b3825d4cc2105c47a8f16ba44", "8db3c11cd85195f459b8ba82fe3326e8f86f1d52", "1156f60e40548096df49528b1342bb3e88b0f378", "175d795f44037ef60dd9df341701cd5fdc449f1f", "ee065876c6605908724392b0e19307598d22a8f9", "b513711621e81d0abd042e0877ca751581a993f5", "6de3915df2b9927a78f213629f3bcb052ec21e8b", "41c80483e80fab3a18280da790cec2c8d6060bdb", "93ee8e1c05d11d63aa3d61653b2c8bae75e0aecd", "0ad8e89091eed09217e66adc98136126addc2619", "2a17c90ed723d6a14415cc1f677a5c0aa512f501", "141e35263ab810983c90d47ad62eb4fab5e51717", "21f35a5ecc0faf0c5f760e20cb9ce9e63a30a768", "512a1ebdcaca56f3ea0c21aa2abe9a5ab7dace06", "0706356c9ab6014d6b04577d38289ea8328291a5", "7ebb9fad71ce8e08d5284b7644a5452cff6c75b3", "2b9e6181502369199bd89691a27f89bdbaac36e4", "bdbac20d53a08672c5b926ebcf84f54276b467a3", "2ae3ac3f7463f838c38e6ca250ca294e813529f2", "ce18973fb7c23cb4fc1c1a61c1c1c4333f4abad1", "6a888f3dd0a17b0241be61daa378ba6caffa6617", "3486aeaf540c48952120fe853d672af984f40a6a", "1eb3992563b7b9fbf0c1da57d62f47220e6af5d5", "586414efa54ba9f4a7def0dc5322b7723f22c552", "46f3bb6751419b87856c4db0193e7a72ef3fa17c", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "11e4d4d00c7b1e3aa9fcf3c490b635df98827dd9", "41880f9408bf4d826e4a715ee783e2d9d8666c2f", "5b75c61e3183ea6228d08b2f6c00fd2cd74baada", "3c84f22df1948dfa8b1b14bbd4c850baf9c5b632", "75089ac937d66503cd8442d74bbaec1b578ed5ea", "3bb6d5834bfb355553588e382ac5f9fa8a8d831d", "410ae589668068dcc0a25b39763ff68684806433", "a62ea31fbbdb5c4031ea929e82ea086122d7833c", "2209304ccc2b0501debd5e9a90ae739f9a30cdef", "095abbc06375f5790967c174ef58f8e677d1a21b", "3d985a05e4a49be71d497e7a2ff3fcbeb74c4bc8", "217beeb53274ba6972d660afff1841e890f3721e", "52a4130c74ad95664fbc067ef91fd75b748ac409", "080f44d89bf6f4404f476ffec8d2f8ad3f60e07d" ], "paperAbstract": "Most GPU-based graph systems cannot handle large-scale graphs that do not fit in the GPU memory. The ever-increasing graph size demands a scale-up graph system, which can run on a single GPU with optimized memory access efficiency and well-controlled data transfer overhead. However, existing systems either incur redundant data transfers or fail to use shared memory. In this paper we present Graphie, a systemto efficiently traverse large-scale graphs on a single GPU. Graphie stores the vertex attribute data in the GPU memory and streams edge data asynchronously to the GPU for processing. Graphie's high performance relies on two renaming algorithms. The first algorithm renames the vertices so that the source vertices can be easily loaded to the shared memory to reduce global memory accesses. The second algorithm inserts virtual vertices into the vertex set to rename real vertices, which enables the use of a small boolean array to track active partitions. The boolean array also resides in shared memory and can be updated in constant time. The renaming algorithms do not introduce any extra overhead in the GPU memory or graph storage on disk. Graphie's runtime overlaps data transfer with kernel execution and reuses transferred data in the GPU memory. The evaluation of Graphie on 7 real-world graphs with up to 1.8 billion edgesdemonstrates substantial speedups over X-Stream, a state-of-theart edge-centric graph processing framework on the CPU, and GraphReduce, an out-of-memory graph processing systems on GPUs.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/PACT.2017.41" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f4e42c15a7a35a198a04a74cbdbe19d360a2d00c", "sources": [ "DBLP" ], "title": "Graphie: Large-Scale Asynchronous Graph Traversals on Just a GPU", "venue": "2017 26th International Conference on Parallel Architectures and Compilation Techniques (PACT)", "year": 2017 }, "f5dcb92c127573347f209c1b0cd09eb4ffbfa818": { "authors": [ { "ids": [ "1702971" ], "name": "Jens Breitbart" }, { "ids": [ "2682220" ], "name": "Simon Pickartz" }, { "ids": [ "1725473" ], "name": "Stefan Lankes" }, { "ids": [ "3018445" ], "name": "Josef Weidendorfer" }, { "ids": [ "1704230" ], "name": "Antonello Monti" } ], "doi": "10.1109/CLUSTER.2017.59", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.59", "entities": [ "Computer data storage", "Job queue", "Memory bandwidth", "Memory bound function", "Run time (program lifecycle phase)", "Scheduling (computing)", "Supercomputer", "Throughput", "Virtual machine" ], "id": "f5dcb92c127573347f209c1b0cd09eb4ffbfa818", "inCitations": [ "ec2a447b8f3efe0df1b9e35a3c6b9869c75b5010" ], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "400-409", "journalVolume": "", "outCitations": [ "e59e81579734b7746f082a6ca4c95c216344474d", "09856da05bf463794a35799eaaee3a14a045afd1", "54f1bd14440eabb2495cc27e045af113500b72e5", "61e285cc4bd8fc9c4e157596e6833a0987e66652", "475212b08f58461e2468a0af1a247763275cadc1", "2966ce6356da4ca4ab9422d9233253dc433b2700", "76a51322bfe3fa86f2cf6a5eeafa85b0da65ca57", "4e40dfd5b6c66818249bb353c96a72dc5f9db746", "7f4b805160cfcbd546c70c5e781b64b85c2b4850", "3574657705475722b6c398c266805f758268778b", "9446946efa95c7e35fe2a5b67ac16fb7896e43f3", "3ca86aef0e2e6042a6b908645c8817676331962e", "48b18c093cdb9a46887c6f94b8bd369ed0465564", "f65dd009ce76891840f3744b15a130d8473e488b", "b738d92ae07719160bffd31719b84e670e2cefe4", "a1f34f7d700752d27d37ac149697d81c67a23f2e", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "7f2863f57afb3e22f08672426495a7c53e798b60", "5633fcd0211e083e169123d7fa52e83ed4727545", "2a9620c89aa1b9c90131e24c87d7e84c88c30333", "370a605eb294e893b0e1a50c86c5546057818ec8", "1b65277f50406900a475a68856df8fe8835c19be" ], "paperAbstract": "Most applications running on supercomputers achieve only a fraction of a system's peak performance. It has been demonstrated that the co-scheduling of applications can improve the overall system utilization. However, following this approach, applications need to fulfill certain criteria such that the mutual slowdown is kept at a minimum. In this paper, we present an HPC scheduler that applies co-scheduling and utilizes virtual machine migration for a re-orchestration of applications at runtime based on their main memory bandwidth requirements. Given a job queue consisting of main memory-bound applications and compute-bound applications, we can see a throughput increase of up to 35% while at the same time reducing energy consumption by around 30%.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.59" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f5dcb92c127573347f209c1b0cd09eb4ffbfa818", "sources": [ "DBLP" ], "title": "Dynamic Co-Scheduling Driven by Main Memory Bandwidth Utilization", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "f6287bb33c5ed50a8b85b4a35ce22e6e6e3891f8": { "authors": [ { "ids": [ "1950808" ], "name": "Kun Tang" }, { "ids": [ "34966505" ], "name": "Devesh Tiwari" }, { "ids": [ "3134457" ], "name": "Saurabh Gupta" }, { "ids": [ "1730937" ], "name": "Sudharshan S. Vazhkudai" }, { "ids": [ "33367879" ], "name": "Xubin He" } ], "doi": "10.1109/CLUSTER.2017.22", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.22", "entities": [ "End-to-end principle", "Frequency capping", "Simulation", "Supercomputer" ], "id": "f6287bb33c5ed50a8b85b4a35ce22e6e6e3891f8", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "344-348", "journalVolume": "", "outCitations": [ "7f6c49645686f4814c01aca621341a0b244898b6", "1645af0ebea3336d50e7140000c5adbc9bc24833", "5e3fb6a4514550dbdb1bfeb4e5705e4a7ffcc84f", "009342aa77a56c46a475fa85e66506219f271526", "429d28998216da5648f40248bf4bc9e508edd2fd", "4908fe53a91465eaf95b21c4ca4f05378b90dcc4", "21a0c328f428a1d4694246ed6c44ed472b74133a", "5cf4b2a8552277b75f6329ef162891dd4d890830", "d32d4ff33b1b2665d6081194eb6acdc3c7dd6891", "18fe996c6f43a8f301cd842507045b679ba3506a", "81a3ea77ab4f2944cfd5d507dcf87f0599c079a1", "3b5657c7ed08519bba5903af01ba6d42dfcc8848", "36275d14731ab7ac192eb4af487f5d34958ad084", "3218bbfd89deae4134d6c6d7f8f3ceb5c3a361f7", "0eacd1b47786f740b723d906d46e160f143c0378", "6a74067e23bb3a0c85f2db3b2c1cd198cebc32d7", "5ee6d6523a8e7b0fae7539503854a8d3659f126c", "07a66e0f2777bb0005384defd228d5aa0bd7f9a4", "1585eaffcf9c9836eb1607e279e43ce2793e59a0" ], "paperAbstract": "In high-performance computing (HPC), end-to-end workflows are typically utilized to gain insights from scientific simulations. An end-to-end workflow consists of scientific simulation and data analysis, and can be executed in-situ, in-transit, and offline. Existing studies on end-to-end workflows have largely focused on the high-performance execution approaches. However, the emerging heterogeneous architectures and energy concerns lead to the rethinking of workflow execution approaches. As a guide to the rethinking, this paper evaluates how to run end-to-end HPC workflows efficiently in terms of performance, energy, and error resilience. The evaluation covers emerging heterogeneous processor architectures, processor power capping techniques, and heterogeneous-reliability memory.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f6287bb33c5ed50a8b85b4a35ce22e6e6e3891f8", "sources": [ "DBLP" ], "title": "Effective Running of End-to-End HPC Workflows on Emerging Heterogeneous Architectures", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "f6944bffd4e30f6b406d273d6a84a34a0e834bc2": { "authors": [ { "ids": [ "2716195" ], "name": "An Huynh" }, { "ids": [ "1724468" ], "name": "Kenjiro Taura" } ], "doi": "10.1109/CLUSTER.2017.82", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.82", "entities": [ "Benchmark (computing)", "Directed acyclic graph", "Multi-core processor", "Parallel computing", "Parallel programming model", "Programmer", "Run time (program lifecycle phase)", "Runtime system", "Scheduling (computing)", "Software suite", "Time series", "Timeline", "Utility" ], "id": "f6944bffd4e30f6b406d273d6a84a34a0e834bc2", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "114-125", "journalVolume": "", "outCitations": [ "0606b9b2bedb67039eb1615f8ef5b13f42f8339a", "5e28ff23f2804b5581e000cbed3c58b5ed163854", "d2378cbfe444ca619aaf1de6e6240df5b2667912", "b3f8d6e69302b0ee1b40e01bf65da138f9d0f281", "8fc0623a28cc193927cd012bd8daac5e6cad75d3", "053955b8003d82ef26c06ee3a5ffcc49459c4b23", "83f2087f3c602d043277927380e35885879210f5", "cf7486af6017a124c5070cd1021ed54a52861a72", "b67c85e514a4aee75a9348c9c6cae1566e1b3216", "ac7e5716b47cc2678b70dadd34d27648ceecfb0c", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "b20cdd99af5421e93c811873411b55e7c26a4c69", "c5d0d547b6a3fa470dcc77f558f6c7c5768edabd", "1914348544b6145be77945fce14e2c68b56dd17e", "8f26c867d791b619c0867c90b3171d8bf9ed8dc0", "1eac8c7fb82607a6d20187cfb29b3f9a02d578c2", "1134aaa6a93f502cac9ce551b13c00b10ff34feb", "1f2ff98f9413bb36c641e9edcfa79f7b33eeb80a", "47fea97038923902a502403219fc44fd22b5d19f", "0b7373f2588fda6732aec095f3d98be8b6621124" ], "paperAbstract": "Modern task parallel programming models provide sophisticated runtime task schedulers for handling the scheduling of logical tasks on a large and varying number of hardware parallel resources at runtime. The performance of these programming models increasingly rely on how fast their runtime schedulers do their job. The more delay a scheduler incurs in matching a ready task to a free processor core at any point in time, the more impact it causes to the program's parallel execution. We have developed a tool that is able to detect these delayed intervals caused by the scheduler in a parallel execution, and spot them specifically on two kinds of visualizations: the logical task graph captured at runtime (DAG visualizations) and time-series visualizations of threads (timelines). By further analyzing positions of these delays on those visualizations the tool could identify possible scheduling issues in the scheduler that causes these delays, yielding improvement insights for the development of task parallel programming models. From an application programmer's perspective, our tool is useful by being able to contrast differences of various task parallel programming models executing the same program, helping users choose the right model for their application. We demonstrate that usefulness by using the tool to analyze 10 applications in BOTS benchmark suite in our case studies.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.82" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f6944bffd4e30f6b406d273d6a84a34a0e834bc2", "sources": [ "DBLP" ], "title": "Delay Spotter: A Tool for Spotting Scheduler-Caused Delays in Task Parallel Runtime Systems", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "f69cf829b22d1c4727f7ea629360636014c9e56d": { "authors": [ { "ids": [ "3327677" ], "name": "Ed Seidewitz" } ], "doi": "10.1145/3136014.3136028", "doiUrl": "https://doi.org/10.1145/3136014.3136028", "entities": [ "Compiler", "Computer architecture simulator", "Executable", "Executable UML", "Integrated development environment", "MagicDraw", "Modeling language", "Simulation", "System Simulation", "UML tool", "Unified Modeling Language" ], "id": "f69cf829b22d1c4727f7ea629360636014c9e56d", "inCitations": [], "journalName": "", "journalPages": "217-220", "journalVolume": "", "outCitations": [ "cb34dc991d14d82e93e5e066c23ab91b6d54520e", "8e74175db325f7de39ebc6354ecbc27362580ce9", "fba39d39f3764d7ad99a78ff490d7cf1299c123d" ], "paperAbstract": "Alf is an action language designed as a textual notation for specifying detailed behaviors within an executable UML model. The Alf implementation in MagicDraw, a leading commercial tool for modeling using the Unified Modeling Language (UML) from No Magic, Inc., aims to support the practical application of Alf in real-world uses of executable UML modeling. It includes syntax-aware editing and checking of Alf code, with valid code automatically and transparently compiled into UML activity models. The resulting models are fully integrated within the wider UML modeling context, and they can then be executed as part of full system simulation scenarios. The Alf compiler also tracks the dependencies of all Alf text on other UML model elements, allowing for automatic re-checking and re-building the Alf code as necessitated by changes in referenced elements. The goal is to provide an IDE-level experience for the easy entry and maintenance of Alf code within an overall executable UML model.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136028" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f69cf829b22d1c4727f7ea629360636014c9e56d", "sources": [ "DBLP" ], "title": "A development environment for the Alf language within the MagicDraw UML tool (tool demo)", "venue": "SLE", "year": 2017 }, "f6bdb554e5cebd9f862d294a435c2a187f81fa5e": { "authors": [ { "ids": [ "3244811" ], "name": "Shuo Zhou" }, { "ids": [ "2766863" ], "name": "Sarah M. Erfani" }, { "ids": [ "2051675" ], "name": "James Bailey" } ], "doi": "10.1109/ICDM.2017.77", "doiUrl": "https://doi.org/10.1109/ICDM.2017.77", "entities": [ "Algorithm", "Collaborative product development", "Dynamic data", "Experiment", "Negativity (quantum mechanics)", "Neural coding", "Scalability", "Sparse matrix", "Synthetic data", "Velocity" ], "id": "f6bdb554e5cebd9f862d294a435c2a187f81fa5e", "inCitations": [], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "675-684", "journalVolume": "", "outCitations": [ "4299ba257957b77eec8e063bd133e9c44c1c2a8c", "1a8b60db7595d60495530a59a609294fbf2870ba", "2e37d6b9a02ec31ea22252c37beae410c6b609a8", "11e90719acf63e74d20e487ce5742079860d5cd6", "446094674059088e9876e006a5cfc1984f6393c8", "1d0f25989452abbbc8feaf00a034ff110fc4b350", "a54b2b2b9b19c1a726ca82343270f2b88516d2a5", "437111b1534cce8d96a1f6b42223b9356c6d33bd", "28e23424e5143fa016268dc7530433a9b27fb959", "96d122a51c124bc9b013a4504cef15d820cbf02a", "276ebc620a8976026bd2d03582b9ecfa3738d43c", "e0336c0d72b3f6a4b7adbc0eb40da95cffda4544", "70560383cbf7c0dc5e9be1f2fd9efba905377095", "3a5a0af8dc50ecbfadab13bd6947dab2c7116dd5", "07ed71b436b9adf23f0f93c8e4533461b82e769a", "8f78f6148f6a7294a3f212c65c9e362c69ba5edb", "1d414b4f13694cc5aac244fa3ebc9ea738063a38", "184b7281a87ee16228b24716ca02b29519d52eb5", "0e601d009fd118cc165bfa2825c70b01940bdd9c", "94c95e1c69a65b6752a4ebeb6ef34f645cf7bebb", "0072eb224991ada6fc8a4e2d3465e4a51c0b26bc", "18c8f13b7b77bd0c67ddbdadf21cc3545b443068", "18aae0f20fdc6aab093c72c81005247d2cbc8512", "dbf27e801a0e5281125b149ab5cc3c47382e567f", "2e8ab628bc9f256c11c898aa44f049143c74d05d", "e23dd37582dfc31c25a5df644e3d08986c650182", "5ccd14d553d547384d7a848a6a532e65f1fd6746", "17a41aa18b8987ed87d6fc19b87d36faf8a4240c", "36f168eda4c6090de1e309b0935bc441cc032a60", "4229f467b059188fc7a1234016a3c80557fa7df0" ], "paperAbstract": "CANDECOMP/PARAFAC Decomposition (CPD) is one of the most popular tensor decomposition methods that has been extensively studied and widely applied. In recent years, sparse tensors that contain a huge portion of zeros but a limited number of non-zeros have attracted increasing interest. Existing techniques are not directly applicable to sparse tensors, since they mainly target dense ones and usually have poor efficiency. Additionally, specific issues also arise for sparse tensors, depending on different data sources and applications: the role of zero entries can be different; incorporating constraints like non-negativity and sparseness might be necessary; the ability to learn on-the-fly is a must for dynamic scenarios that new data keeps arriving at high velocity. However, state-of-art algorithms only partially address the above issues. To fill this gap, we propose a general framework for finding the CPD of sparse tensors. Modeling the sparse tensor decomposition problem by a generalized weighted CPD formulation and solving it efficiently, our proposed method is also flexible to handle constraints and dynamic data streams. Through experiments on both synthetic and real-world datasets, for the static case, our method demonstrates significant improvements in terms of effectiveness, efficiency and scalability. Moreover, under the dynamic setting, our method speeds up current technology by hundreds to thousands times, without sacrificing decomposition quality.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.77" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f6bdb554e5cebd9f862d294a435c2a187f81fa5e", "sources": [ "DBLP" ], "title": "SCED: A General Framework for Sparse Tensor Decomposition with Constraints and Elementwise Dynamic Learning", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "f7533b520c2aafe739da4958b2092bf52b3952ae": { "authors": [ { "ids": [ "2331162" ], "name": "Costas Iordanou" }, { "ids": [ "2678175" ], "name": "Claudio Soriente" }, { "ids": [ "2698864" ], "name": "Michael Sirivianos" }, { "ids": [ "1683137" ], "name": "Nikolaos Laoutaris" } ], "doi": "10.1145/3098822.3098850", "doiUrl": "https://doi.org/10.1145/3098822.3098850", "entities": [ "A/B testing", "Comparison shopping website", "Computation", "Cross-site scripting", "Distributed computing", "E-commerce", "Peer-to-peer", "Sandbox (computer security)", "Secure multi-party computation", "Server (computing)", "Server-side", "Software deployment", "Watchdog timer" ], "id": "f7533b520c2aafe739da4958b2092bf52b3952ae", "inCitations": [ "17061f61a89ca329a658e76b1c6a329757bfea47", "78f8dde18d6e2f3788da6b80c0cc2e6d7eb218f3" ], "journalName": "", "journalPages": "376-389", "journalVolume": "", "outCitations": [ "1844578c5f75884baa4931d2987cab10d70bd304", "370e5b4ec00e883c294ff0628002dae57e206423", "01dbc5466cce6abd567cc5b34a481f5c438fb15a", "f16841e022038e94a59f7e0a82002102b78d79a4", "598848aaa4aa40bb6b7ab51490821a173cf18800", "4cf4ea6c801d3dad696464b198e7c51f1a77b302", "378646bd5f089a464015676e3743fc6a0762121c", "06a1d8fe505a4ee460e24ae3cf2e279e905cc9b0", "d9e43b5772f35cd98cf833324c85f1a2541438bb", "2e04e5c381edaf6334e43343aa8f0f79835dd1db", "651b7744bebfeb6c6e42aa6b6d566e507fe319a5", "abbb235fcf3b163afd74e1967f7d3784252b44fa", "528cee5472f0a98fc295b4d8caf2e66ca1544d54", "3f33c777387742f1713d98ba1a8817aac0598e19", "1d89a12092d6323b9d3b1a5bd4e6790897e2a2be", "bb63c68855d42c95623ed9362d0853ea1d4cc858" ], "paperAbstract": "We present the design, implementation, validation, and deployment of the Price Sheriff, a highly distributed system for detecting various types of online price discrimination in e-commerce. The Price Sheriff uses a peer-to-peer architecture, sandboxing, and secure multiparty computation to allow users to tunnel price check requests through the browsers of other peers without tainting their local or server-side browsing history and state. Having operated the Price Sheriff for several months with approximately one thousand real users, we identify several instances of cross-border price discrimination based on the country of origin. Even within national borders, we identify several retailers that return different prices for the same product to different users. We examine whether the observed differences are due to personal-data-induced discrimination or A/B testing, and conclude that it is the latter.", "pdfUrls": [ "http://conferences.sigcomm.org/sigcomm/2017/files/program/ts-9-2-Sheriff.pdf", "http://doi.acm.org/10.1145/3098822.3098850" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f7533b520c2aafe739da4958b2092bf52b3952ae", "sources": [ "DBLP" ], "title": "Who is Fiddling with Prices?: Building and Deploying a Watchdog Service for E-commerce", "venue": "SIGCOMM", "year": 2017 }, "f7577e4c6c2aca3fb1c556423c270717e7c83ead": { "authors": [ { "ids": [ "7726937" ], "name": "Mohammadreza Bayatpour" }, { "ids": [ "29719971" ], "name": "Sourav Chakraborty" }, { "ids": [ "1802958" ], "name": "Hari Subramoni" }, { "ids": [ "1720335" ], "name": "Xiaoyi Lu" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1145/3126908.3126954", "doiUrl": "https://doi.org/10.1145/3126908.3126954", "entities": [ "Benchmark (computing)", "Central processing unit", "HPCG benchmark", "InfiniBand", "Omni-Path", "Parallel computing", "Scalability", "Throughput" ], "id": "f7577e4c6c2aca3fb1c556423c270717e7c83ead", "inCitations": [], "journalName": "", "journalPages": "64:1-64:11", "journalVolume": "", "outCitations": [ "a5895946af933fc3fb32f7e975a35ded0b63d619", "5f5c349fe0a2c49268820e1db32bce1588ee3b45", "10ca6fc3a9adf282073defda372355bfd668b31e", "2a0c9f8248aad793810dfc2ec2bc21a8ebdca6f4", "7605fe626c3598ee68fefaf1f4e1d21fcd2cb3d4", "a8c71a6f0cc2deff80c1647c35de31b5bb1a655f", "7536a348dacf1de7cc921214c07a87b8345d9996", "f4c217923ceebd709e8eb106b1f7d25fd5d088c2", "02de9d7b2c76a11896902c79b329a3034fc572b6", "3ddea5bd91789f6f912ac7a334d4b35120e777d6", "d83fe4a8e4e00f7c2b1020526cc83cf827c65fbe", "3f30b25fce16664accab00f54a27d4e8a6d09b01", "62b757b1a924f3386c33b8a988327e3749ab8a54", "71dec883ad8effe7d6075512138625080d32ab47", "c0c352b314e0d972e7eabd35e435789791d407cc", "0ca1e465dd85b8254bcdd7053032d7eab6e2d4b4", "3f750233c3e20da134b4427eb6645f877ac0a503", "0818fbaab77edfc91cb54c4cc1a1eab19676bc17" ], "paperAbstract": "Existing designs for MPI_Allreduce do not take advantage of the vast parallelism available in modern multi-/many-core processors like Intel Xeon/Xeon Phis or the increases in communication throughput and recent advances in high-end features seen with modern interconnects like InfiniBand and Omni-Path. In this paper, we propose a high-performance and scalable <u>D</u>ata <u>P</u>artitioning-based <u>M</u>ulti-<u>L</u>eader (DPML) solution for MPI_Allreduce that can take advantage of the parallelism offered by multi-/many-core architectures in conjunction with the high throughput and high-end features offered by InfiniBand and Omni-Path to significantly enhance the performance of MPI_Allreduce on modern HPC systems. We also model DPML-based designs to analyze the communication costs theoretically. Microbenchmark level evaluations show that the proposed DPML-based designs are able to deliver up to 3.5 times performance improvement for MPI_Allreduce for multiple HPC systems at scale. At the application-level, up to 35% and 60% improvement is seen in communication for HPCG and miniAMR respectively.", "pdfUrls": [ "http://doi.acm.org/10.1145/3126908.3126954" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f7577e4c6c2aca3fb1c556423c270717e7c83ead", "sources": [ "DBLP" ], "title": "Scalable reduction collectives with data partitioning-based multi-leader design", "venue": "SC", "year": 2017 }, "f785c442f9522c49884e49aa34e9daf6da32c3a2": { "authors": [ { "ids": [ "39856387" ], "name": "Laleh Aghababaie Beni" }, { "ids": [ "3054091" ], "name": "Aparna Chandramowlishwaran" } ], "doi": "10.1007/978-3-319-64203-1_35", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_35", "entities": [], "id": "f785c442f9522c49884e49aa34e9daf6da32c3a2", "inCitations": [], "journalName": "", "journalPages": "482-496", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_35", "http://newport.eecs.uci.edu/~amowli/hpcfactory/pdf/EuroPar-17.pdf" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f785c442f9522c49884e49aa34e9daf6da32c3a2", "sources": [ "DBLP" ], "title": "PASCAL: A Parallel Algorithmic SCALable Framework for N-body Problems", "venue": "Euro-Par", "year": 2017 }, "f7c0973089783b51ebf4fb385dfbacbbcb3d5099": { "authors": [ { "ids": [ "2952910" ], "name": "Jin-Min Yang" }, { "ids": [ "35685541" ], "name": "Enquan Yan" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.70", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.70", "entities": [ "Application checkpointing", "Diskless node", "Distributed computing", "Failure rate", "Fault tolerance", "Overhead projector" ], "id": "f7c0973089783b51ebf4fb385dfbacbbcb3d5099", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "539-546", "journalVolume": "", "outCitations": [ "03e9c5c4e1ae4fa725c1b10035741be4e65aac33", "18baeaa09ce028fb3044a89430a4939f270bc480", "0f55217987ec25afa0f815e0aa3957e669b0280e", "3a7417fd3711d6607b16b839fd768021eb09e629", "3e5387055046f17b1cf05f33c0e7f884e30a4fcb", "599018231e4c0e29c1caa58e6e47e626c5eb12f8", "5568df48a03cd16e286025c812f1912a7d1c1766", "0029d10f3f95f490de62ebf392b82ed9bba65f80", "6f6529af6a68d802254ff5ad9e952744d507fbac", "4962ba0d0af23e039cfb36a39571b72d5344cd94", "77f651d37c1d1fa7c69c8966680aec180e8f48dc", "896f6698a74e656174045dc20840dd7e925f18bd", "07d36b216c807c542ef7bd51f391a5a441c713dd", "653aca2181dcbea18fe9c0c121bbb6c61d3b8405", "2b9089329e1c8dd68d9416b1d71dba817ee16800", "67ddcb550dd8fe57e4567bd7ca78f6e841f25c4c", "168b8cbbbacd234f23b70b952ef58b5b56e67529", "8b3235bbd59d3b85081d9c22cf1af494e2d1159a" ], "paperAbstract": "Diskless checkpointing is an effective solution to avoid the I/O bottleneck in disk-based checkpointing for tolerating a small number of node failures in large distributed systems. However, the existing encoding schemes used by diskless checkpointing lead to high communication overheads because of cross node encoding. This negates the advantages of diskless checkpointing, especially in scenarios with a limited network bandwidth. This paper proposes a diskless checkpointing scheme with vertical encoding to address this problem. Vertical encoding eliminates the dependency among nodes and also facilitates a balanced communication. Moreover, an analysis model is developed to obtain an optimal configuration parameter. Experimental results show that the proposed scheme reduces significantly the communication overhead of both checkpointing and fault recovery, with no encoding overhead introduced.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.70" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f7c0973089783b51ebf4fb385dfbacbbcb3d5099", "sources": [ "DBLP" ], "title": "A Diskless Checkpointing Scheme Based on Vertical Encoding to Lower Fault Tolerance Overhead", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "f7e8cecd621a2ab009cb022eecdc0940502c6ebb": { "authors": [ { "ids": [ "38849152" ], "name": "Li-Pin Chang" }, { "ids": [ "2175178" ], "name": "Sheng-Min Huang" }, { "ids": [ "11031560" ], "name": "Kun-Lin Chou" } ], "doi": "10.1145/3078468.3078476", "doiUrl": "https://doi.org/10.1145/3078468.3078476", "entities": [ "Interrupt storm", "Solid-state drive", "Wear leveling" ], "id": "f7e8cecd621a2ab009cb022eecdc0940502c6ebb", "inCitations": [], "journalName": "", "journalPages": "5:1-5:7", "journalVolume": "", "outCitations": [ "2ee9054f414f74c31f030aa0a2fc88be0341a52e", "58d6f9e9f3aab0dc4089fc78e0f833c343a0044b", "2273211a5126a55b2d8dae10768dad222bcbe8d3", "c05ee7dee7d474424f3d403bb143ba93868e6925", "28177e71022e8a5610bad58c074945c9823911af", "05eb82aef29b2185517b63e8c51a8ac488382ef8", "66fb412a9481d0ef7582aec85241633cbba017c8", "719aeeaff7353058a152b4eb3ff77a193624a481", "64279c1d8efdc3565157de3edbc39003fe06d193", "0caaa3f8b9aac28bf8d957076e2ed72ab19b9363", "461ad971a16ab7cedcdd2e4ba45778585774856d", "5e08f66d0bca260458accdad741f6237199da091", "34f6cc5a0d7656a5c1d92049eda0533ca7c07add", "d36e9fbd0f6c34e60292534ee1b0d43575128f23", "2ac379d756ab039452f6bb6589ab73fb35de92fd", "9954b59cf94cbd84f592eb1d25ec2f518ba639ea" ], "paperAbstract": "Building self-healing SSDs is proven feasible by recent studies. When the stress of a block becomes critical, it can be healed to remove part of the stress. However, with wear leveling, all blocks are evenly worn and have similar stress, and all blocks could undergo the healing process within a short period of time. The intensive heal operations, called heal storms, cause highly unpredictable I/O performance and storage reliability. Inspired by the even distribution of erase counts under wear leveling, we propose to operate wear leveling on virtual erase counts instead of real erase counts. When the balance among virtual erase counts is achieved through wear leveling, all real erase counts become evenly dispersed in a controlled interval. In this way, blocks will undergo healing at different times. Virtual erase counts are progressively adjusted such that all blocks reach their endurance limit when the SSD permanently retires. Our results show that our approach successfully resolved the heal storm problem without impacting on the SSD lifespan.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078476" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f7e8cecd621a2ab009cb022eecdc0940502c6ebb", "sources": [ "DBLP" ], "title": "Relieving self-healing SSDs of heal storms", "venue": "SYSTOR", "year": 2017 }, "f8076a5e9acc718e22994adf2d653f33fcba327f": { "authors": [ { "ids": [ "3421852" ], "name": "Rafael K. V. Maeda" }, { "ids": [ "38183424" ], "name": "Qiong Cai" }, { "ids": [ "40011763" ], "name": "Jiang Xu" }, { "ids": [ "1915826" ], "name": "Zhe Wang" }, { "ids": [ "7483417" ], "name": "Zhongyuan Tian" } ], "doi": "10.1109/HPCA.2017.11", "doiUrl": "https://doi.org/10.1109/HPCA.2017.11", "entities": [ "CPU cache", "Cache (computing)", "Locality of reference", "Memory hierarchy", "Principle of locality", "Profiling (information science)", "Synthetic data" ], "id": "f8076a5e9acc718e22994adf2d653f33fcba327f", "inCitations": [], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "145-156", "journalVolume": "", "outCitations": [ "2d51d48a33e9231c388ebffea44f12746e415ada", "6d5099039729d930841c21893c5585a194d90a79", "0455a164fdd31c24c37104853544a66191660659", "0cbc3b849eb23d23654c882c70cb65b19f99c011", "b77e7ae60aed8f307075c5a261274938da41e1e8", "27c7af8567915cd8f8a706357392bd1a20b3b8cf", "e7d152af63a9eb3d9750d9f3cd218322c94cb0cb", "08cfe650fdfa907764423958b1923e42ba945b7e", "db7fd20dc31565003fb992405ba83975d7f5f681", "1339efd08ff6249a1f819ea7e02ccf349ac62ab2", "35b1b5a69d7882053aa35e7463ceb903733a2cce", "a7f3103822beb0d6df835778f77bd7d429560f88", "93a6a32f1bbf1913e9e2232132ec4fa7a75ab152", "57d3035be09a0703d503da8af082b128af3dfdf6", "84b37617f3e71bd35d033721c09809ca5cca6f7e", "0332013fc380ca283d3afc457c430c513d19cc51", "199932878100e8fa2234facce3d3a2b4b24da391", "0b424dc16a57d2c380cb0aa39dabcb3411e80350", "dd9cb1f18de6ac02f9bdb1272ab8f2ba115d8011", "2fd637ff36c131ad82b2fcf0b1723196ea0ce05c", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "8009b1c8cc4af8d3d4b792ac32926487a428172e", "cd3de3003ed1dae5862a687ee5bfe7aea5a8e9c8", "30eedee57b75c230a4205fbaf30f37a751ef7070", "6f45e84202ee1678772899d3473a0b5d5ee4d886", "3380929dba1d81232b9b2b85a7616f17ab6dbfb5", "73c91c7297f21818a80550a2062124fd5d639078", "32f6ded4e88667f34fe49a0ee80d9a9093b00547", "1a5ce35fc5ad575c2c9e5f692bf263082d656f80", "4209919a9b9618d69a145b15927b5c455f9d05d4", "47ccfd0c9dc218f5496783310a28c581730b9ca7", "b3c4075941bdf06f9c4aa3cf6209a1ec99bdc786", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "214539c3dc1da3eefa9cdc65079b8dea72afb07c", "6760a91356a65c6ca9e442180390b3a6c1ed2a94", "37b5850e3e75a3462f3991491ca26674925f233b", "12df05f3e38c615fd613e211abcd24da3b269124", "3e74ae88cdaa33bf89136800258bde97ab397ec9" ], "paperAbstract": "Exploring the design space of the memory hierarchy requires the use of effective methodologies, tools, and models to evaluate different parameter values. Reuse distance is of one of the locality models used in the design exploration and permits analytical cache miss estimation, program characterization, and synthetic trace generation. Unfortunately, the reuse distance is limited to a single locality granularity. Hence, it is not a suitable model for caches with hybrid line sizes, such as sectored caches, an increasingly popular choice forlarge caches. In this work, we introduce a generalization to the reuse distance, which is able to capture locality seen at multiple granularities. We refer to it as Hierarchical Reuse Distance (HRD). The proposed model has same profiling and synthesis complexity as the traditional reuse distance, and our results show that HRD reduces the average miss rate error on sectored caches by more than three times. In addition, it has superior characteristics in exploring multi-level caches with conventional single line size. For instance, our method increases the accuracy on L2 and L3 by a factor of 4 and converges three orders of magnitude faster.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.11" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f8076a5e9acc718e22994adf2d653f33fcba327f", "sources": [ "DBLP" ], "title": "Fast and Accurate Exploration of Multi-level Caches Using Hierarchical Reuse Distance", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "f87652bad3bc1623da538fa4e098dbf01bd6fa1a": { "authors": [ { "ids": [ "40698778" ], "name": "M. Meraj Ahmed" }, { "ids": [ "2227074" ], "name": "Md Shahriar Shamim" }, { "ids": [ "29886952" ], "name": "Naseef Mansoor" }, { "ids": [ "35827107" ], "name": "Sayed Ashraf Mamun" }, { "ids": [ "40413136" ], "name": "Amlan Ganguly" } ], "doi": "10.1109/IGCC.2017.8323583", "doiUrl": "https://doi.org/10.1109/IGCC.2017.8323583", "entities": [ "2.5D", "ChIP-on-chip", "Computation", "Computation (action)", "DNA Integration", "Interconnection", "Interposer", "Multi-core processor", "Network on a chip", "Routing", "Scalability", "Silicon", "Simulation", "anatomical layer" ], "id": "f87652bad3bc1623da538fa4e098dbf01bd6fa1a", "inCitations": [], "journalName": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "journalPages": "1-6", "journalVolume": "", "outCitations": [ "c22cd78260126ea8e0183c23aeb9a2ec928658e3", "0a7929d3f54d7d0d6fe42f6378c0dc8f27c843f2", "1b348a82450968c0d34348b3c64524ca36c5836a", "17f07fe6da6a72be211a66c68bc46b3ee05e7583", "69e13716985b2e86b6e8e61335a325c776ae0045", "935ae58c43fc7d785e044a596b83b1ba73c6fbb1", "03262c10910fed123e11cb1b143beff1eff9d7c3", "7812b44e9f8ca754e934092b27d6b84d8f7b803b", "f253b90255bcbfb08d5467e0cfde383a2455c4b5", "cbbb8186de93d9e79d20e2122b9a7903d6b08cd1", "818d3154455a372598d5b56eb6d76cebdc22c611", "190ca682f7b22fb81f2e506354c93170e9721e0c", "8de20d9e01b189c02f5e68ae3720965bed48c82c", "43a82ee0a78aeafc9452053428c7caacc1cc152a", "8d9b0e2cc4b01d9e0cd5a7ad746961f0be071599", "b872e246d77ec5692a05a5ca0aa35168e202b3e8", "30ee81b5d67c98c7c42c2adfacf400f4ffbb9470", "9f93788eea83a284ad07141a78479f493a9c1e98" ], "paperAbstract": "With the increase in number of processing chips in platform based computation intensive systems such as servers, a seamless, scalable, energy efficient and high bandwidth interconnection network is required. Newly envisioned silicon interposers with Network-on-Chip (NoC) interconnection framework have emerged as an energy efficient technology for 2.5D integration of multiple processor and memory chips, where multiple chips are mounted on another die called the interposer and are interconnected using the metal layers of the interposer die. However, conventional interposer based multichip integration is limited to edge-to-edge connections between the adjacent dies leaving the interposer's routing resources underutilized. In this paper, we propose large scale utilization of the available abundant interposer resources for multichip integration by implementing a hypercube interconnection architecture in an interposer for chip-to-chip communication. Through system level simulations, we demonstrate that such multichip system integrated with interposer can provide high bandwidth and energy-efficient communication under various traffic patterns.", "pdfUrls": [ "https://doi.org/10.1109/IGCC.2017.8323583" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f87652bad3bc1623da538fa4e098dbf01bd6fa1a", "sources": [ "DBLP" ], "title": "Increasing interposer utilization: A scalable, energy efficient and high bandwidth multicore-multichip integration solution", "venue": "2017 Eighth International Green and Sustainable Computing Conference (IGSC)", "year": 2017 }, "f87e8a2a349a916ede5518acba68ca9d668b4c3a": { "authors": [ { "ids": [ "1704036" ], "name": "Peter Kling" }, { "ids": [ "2319507" ], "name": "Alexander M\u00e4cker" }, { "ids": [ "2608167" ], "name": "S\u00f6ren Riechers" }, { "ids": [ "1800890" ], "name": "Alexander Skopalik" } ], "doi": "10.1145/3087556.3087578", "doiUrl": "https://doi.org/10.1145/3087556.3087578", "entities": [ "Algorithm", "Approximation", "Approximation algorithm", "Bin packing problem", "Central processing unit", "Job shop scheduling", "Jumpstart Our Business Startups Act", "Makespan", "Multiprocessor scheduling", "Scheduling (computing)", "Set packing" ], "id": "f87e8a2a349a916ede5518acba68ca9d668b4c3a", "inCitations": [], "journalName": "", "journalPages": "123-132", "journalVolume": "", "outCitations": [ "84eacac35f81b97ed4e64fb99d3c2acfe9582658", "f307ff6fb520eb9b1a0094e9959008115fabd42e", "37e38261bcb2b24c3bc3610ce4e1f347df59af32", "0ed63db7c74ee626c067130e4c28bca79f829243", "e317273d1276071012a6ab6923eafcb1bc784581", "7ebaeaa1088b79c6515b37eba37a669e69a3c407", "286f9afa3f8217b16452b97148072966e73a0835", "0c4c1a7339a8298e0984dfb8fec6d606900d100c", "06d27a317e87a97245454540273f2307702e874b", "d4b0b8e6063ee53098d5677a5c5daa46de1a478e", "8c9f526a3ed84699473d48ae3a77c10f89e3d438", "5bef9e1dd6ef8fb2f6035d1624291ba0fce9594b", "2188ff6c93a8ba3912d324ff823ff5707debe888" ], "paperAbstract": "We consider a scheduling problem on m identical processors sharing an arbitrarily divisible resource. In addition to assigning jobs to processors, the scheduler must distribute the resource among the processors (e.g., for three processors in shares of 20%, 15%, and 65%) and adjust this distribution over time. Each job j comes with a size pj ∈ R and a resource requirement rj > 0. Jobs do not benefit when receiving a share larger than rj of the resource. But providing them with a fraction of the resource requirement causes a linear decrease in the processing efficiency. We seek a (non-preemptive) job and resource assignment minimizing the makespan. Our main result is an efficient approximation algorithm which achieves an approximation ratio of 2 + 1/(m-2). It can be improved to an (asymptotic) ratio of 1 + 1/(m-1) if all jobs have unit size. Our algorithms also imply new results for a well-known bin packing problem with splittable items and a restricted number of allowed item parts per bin.\n Based upon the above solution, we also derive an approximation algorithm with similar guarantees for a setting in which we introduce so-called tasks each containing several jobs and where we are interested in the average completion time of tasks (a task is completed when all its jobs are completed).", "pdfUrls": [ "http://doi.acm.org/10.1145/3087556.3087578" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f87e8a2a349a916ede5518acba68ca9d668b4c3a", "sources": [ "DBLP" ], "title": "Sharing is Caring: Multiprocessor Scheduling with a Sharable Resource", "venue": "SPAA", "year": 2017 }, "f8a7eaa92426dbe7d549956aca87ffab6ec6b52f": { "authors": [ { "ids": [ "3039850" ], "name": "Dharanidhar Dang" }, { "ids": [ "40436575" ], "name": "Jyotikrishna Dass" }, { "ids": [ "38933347" ], "name": "Rabi N. Mahapatra" } ], "doi": "10.1109/HiPC.2017.00022", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00022", "entities": [ "Algorithm", "Analog computer", "Artificial neural network", "Artificial neuron", "Big data", "Cloud computing", "Convolutional neural network", "Deep learning", "Machine learning", "Memristor", "Neuromorphic engineering", "Optical computing", "Parallel computing", "Pipeline (computing)", "Reservoir computing", "Simulation", "Throughput" ], "id": "f8a7eaa92426dbe7d549956aca87ffab6ec6b52f", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "114-123", "journalVolume": "", "outCitations": [ "9952d4d5717afd4a27157ed8b98b0ee3dcb70d6c", "32a01a917bc310388002e7c7231ba2c07416bed6", "0b99d677883883584d9a328f6f2d54738363997a", "42dec19543930bffec09ab74441440fdec4c94b2", "061356704ec86334dbbc073985375fe13cd39088", "3888ee5d6308e1bffa1b0c922a4f200d083a628e", "0c89d5d37a25f693bc31e65c0c5b9bea63148e57", "1fdbe96edd8aa814f5c4b7f155736f5a6ad9795f", "23d14ab0f18fa881a2ac8ae027be6b9f2c91d74d", "0404d8f580496852b7bd9ff65e30fded0c7f797f", "357e97b04375f09e9f4cfd45c69ecd9d7f0a15e1" ], "paperAbstract": "Neuromorphic computing is a promising candidate to accelerate big data processing. Recently, several attempts have been made to design neuromorphic accelerators for popular machine learning algorithms, such as reservoir computing, deep learning, spiking neurons etc. Deep learning accelerator which involves convolutional neural networks (CNNs) have received widespread attention for their accuracy and efficiency. This paper proposes ConvLight, a novel deep learning accelerator based on memristor integrated photonic computing framework. While the use of on-chip photonic circuits for analog computing is well known, no prior work has demonstrated a full-fledged accelerator based on photonic components. In particular, this paper makes the following novel contributions: (i) A multilayer deep learning architecture design is proposed using compute efficient memristors and photonic components for the first time. (ii) A pipelined design for each CNN layer is presented for maximizing throughput and enabling parallelism across the layers. (iii) Simulation of ConvLight architecture with standard photonic tools for demonstrating the execution of DNN and CNN workloads yielding 25X, 60X, and 40X improvements in computational efficiency, throughput, and energy efficiency (respectively) compared to state-of-the-art design.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00022" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f8a7eaa92426dbe7d549956aca87ffab6ec6b52f", "sources": [ "DBLP" ], "title": "ConvLight: A Convolutional Accelerator with Memristor Integrated Photonic Computing", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "f912e1c1ded4faaa8576fc942a8931740d43664b": { "authors": [ { "ids": [ "2775778" ], "name": "Rogerio Pontes" }, { "ids": [ "3404728" ], "name": "Dorian Burihabwa" }, { "ids": [ "2887942" ], "name": "Francisco Maia" }, { "ids": [ "2911639" ], "name": "Jo\u00e3o Paulo" }, { "ids": [ "2106027" ], "name": "Valerio Schiavoni" }, { "ids": [ "1743906" ], "name": "Pascal Felber" }, { "ids": [ "2783810" ], "name": "Hugues Mercier" }, { "ids": [ "1679234" ], "name": "Rui Oliveira" } ], "doi": "10.1145/3078468.3078480", "doiUrl": "https://doi.org/10.1145/3078468.3078480", "entities": [ "Clustered file system", "Confidentiality", "Data store", "Distributed File System (Microsoft)", "Encryption", "FITS", "Outsourcing", "Privacy", "Protection mechanism", "Replication (computing)", "Service control point", "Software deployment", "Software-defined storage", "Stackable switch", "Time complexity", "User space" ], "id": "f912e1c1ded4faaa8576fc942a8931740d43664b", "inCitations": [], "journalName": "", "journalPages": "9:1-9:12", "journalVolume": "", "outCitations": [ "5f3f9223c5c9f896be099bc177929febad508407", "0f55217987ec25afa0f815e0aa3957e669b0280e", "111e2d5634cb30d5d841cdb22563f9b371fb5f54", "b4b26e52580d7eeb0ccbc8e5529e34a831bc4e65", "12a0046a1197ae63c3d616c74e367dc583cef196", "28022c2d8b0b2e7d70c138638472c525058c33be", "807df0de011be333fc1dd06ac58c426e8b3437ef", "165a115fe85185e84f2d073c619fd4a787b03201", "bef33ef2eb5f17824ae0f708b5815ac42f6612d7", "1cfee3e6bad11c5c92cd06065064c474a00e2412", "74ebec1d4d8f195af756e10cb818f58e32270b2e", "418e5e5e58cd9cafe802d8b679651f66160d3728", "7c4cf4515091593106242f169dac0dd2208f9d8b", "a1c96d8046181a0340f6d57d139527b4b23a63da", "1ab7c8129751d27f1f4ed0afb2a57bcc29005b47", "03e255b248ce618f8891484cb747b2ef4bb75448", "73ce1e41c2e0a97a929b9b9999c5daabd7a037b4", "288c5179ca4b9c6849ec99fafdd9f85593fc6416", "762e677fd7a1d7ee74da005cce138d72a07fb452", "0eb6c7b613f86f6c72e05f08f4dbe37dd9290cfa", "65a2cb8a02795015b398856327bdccc36214cdc6", "d4dd35bf5deaa8986f8b9b025a0dcbef88d6a8b5", "535ffd4979373706dc7d4cc6ca670f518fce3f2d", "28e0b55b96bcab20c0f914d4c2d023c361c1b3c7", "d1776dfb8f66cb40cadcac9bb66760ec9b7b3920", "4f91a5354dae88cdf38c54c658ed634580cae96a", "10425a28c7f053d0939e8c74ad81f59d7008138c", "25a331da05db524f46797327adecac83e33581a9", "b46cb54a87a448212af37f2594a512fec39a059e", "257c1c169dd0ae98e273efd0d0948f2a028d4c3f" ], "paperAbstract": "The exponential growth of data produced, the ever faster and ubiquitous connectivity, and the collaborative processing tools lead to a clear shift of data stores from local servers to the cloud. This migration occurring across different application domains and types of users---individual or corporate---raises two immediate challenges. First, out-sourcing data introduces security risks, hence protection mechanisms must be put in place to provide guarantees such as privacy, confidentiality and integrity. Second, there is no \"one-size-fits-all\" solution that would provide the right level of safety or performance for all applications and users, and it is therefore necessary to provide mechanisms that can be tailored to the various deployment scenarios.\n In this paper, we address both challenges by introducing SafeFS, a modular architecture based on software-defined storage principles featuring stackable building blocks that can be combined to construct a secure distributed file system. SafeFS allows users to specialize their data store to their specific needs by choosing the combination of blocks that provide the best safety and performance tradeoffs. The file system is implemented in user space using FUSE and can access remote data stores. The provided building blocks notably include mechanisms based on encryption, replication, and coding. We implemented SafeFS and performed in-depth evaluation across a range of workloads. Results reveal that while each layer has a cost, one can build safe yet efficient storage architectures. Furthermore, the different combinations of blocks sometimes yield surprising tradeoffs.", "pdfUrls": [ "http://doi.acm.org/10.1145/3078468.3078480" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f912e1c1ded4faaa8576fc942a8931740d43664b", "sources": [ "DBLP" ], "title": "SafeFS: a modular architecture for secure user-space file systems: one FUSE to rule them all", "venue": "SYSTOR", "year": 2017 }, "f91a49f552b2503e2d66990cfce00feff1332ec9": { "authors": [ { "ids": [ "1802958" ], "name": "Hari Subramoni" }, { "ids": [ "29719971" ], "name": "Sourav Chakraborty" }, { "ids": [ "1731654" ], "name": "Dhabaleswar K. Panda" } ], "doi": "10.1007/978-3-319-58667-0_18", "doiUrl": "https://doi.org/10.1007/978-3-319-58667-0_18", "entities": [ "Computation", "Overlap\u2013add method" ], "id": "f91a49f552b2503e2d66990cfce00feff1332ec9", "inCitations": [], "journalName": "", "journalPages": "334-354", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-58667-0_18" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f91a49f552b2503e2d66990cfce00feff1332ec9", "sources": [ "DBLP" ], "title": "Designing Dynamic and Adaptive MPI Point-to-Point Communication Protocols for Efficient Overlap of Computation and Communication", "venue": "ISC", "year": 2017 }, "f931e3ae19c536e99cdf8276c1f20cbc55a24ac6": { "authors": [ { "ids": [ "15490871" ], "name": "Xiaokang Hu" }, { "ids": [ "1904438" ], "name": "Wang Zhang" }, { "ids": [ "26846526" ], "name": "Jian Li" }, { "ids": [ "1840514" ], "name": "Ruhui Ma" }, { "ids": [ "1697194" ], "name": "Feng Wu" }, { "ids": [ "7203366" ], "name": "Haibing Guan" } ], "doi": "10.1109/ICPP.2017.23", "doiUrl": "https://doi.org/10.1109/ICPP.2017.23", "entities": [ "Content delivery network", "Data center", "Hardware virtualization", "High- and low-level", "Hypervisor", "Interconnection", "Memcached", "Operating system", "Responsiveness", "Storage virtualization", "Throughput", "Virtual machine" ], "id": "f931e3ae19c536e99cdf8276c1f20cbc55a24ac6", "inCitations": [], "journalName": "2017 46th International Conference on Parallel Processing (ICPP)", "journalPages": "141-150", "journalVolume": "", "outCitations": [ "1d99df42756144c0684de61dc11c8172a8826256", "4c2be7d70e8e521e6e845dfe1a4dfc22f60af7b7", "96b7cacf2c8a35d296a2d906550a567813cacf80", "2636930ca4f2005371b903fc7670c2ca2d3c01b3", "46a38fb1e26c9957111d154c6fc2c39101ff0ba1", "0852a44c86db434e9b51c67704636791e9940487", "4e0efb24823d75537a038fcd21a4e9b3c9ae7450", "17f1ff82aca7a592a8815e8169b6e2210bf6ae7a", "ca6e70cca64c928872a8cd137515d72708b58a69", "8f6e0bb0f41f94b18066d055d6bbc0d7790bbcc2", "6678b17fc8758efea8d32c2d47f9924f8a0cdc6d", "71a2d8c473f13d0c664f751db97e81128281b1eb", "3c4ae51452823afafabe8d33d51218d1d95c2795", "3574657705475722b6c398c266805f758268778b", "73bded14fb8c3b4bd5c2ae554d704d3ad3ff907e", "1b2933b19f5c989c4855d4923000f89216abea5b", "59ed3b2cdc038521e06cf10ca098805e1c08fcad", "92568208c84b1e5300fbde33c9e7309abaa06424", "87064d58ef49df1b47c4ac74258fda1aecab2b68", "e417900118b4467625da2c53c7cea404a167663c", "3e239cdc9eb2d33f3eabb01f552a12aa0bf98537" ], "paperAbstract": "Improving the performance of I/O virtualization is a key issue for cloud and datacenter infrastructures, especially with the rapid increase of network interconnection speeds. Previous efforts have made the performance overhead associated with the virtual I/O data path largely negligible. The remaining bottlenecks mainly lie in the event path: hypervisor interventions trigger costly virtual machine (VM) exits and lead to dramatical performance degradation. Aiming at an optimal virtual I/O event path, we propose ES2, a comprehensive scheme that simultaneously improves bidirectional I/O event delivery between guest VMs and their devices. ES2 can provide efficient I/O request delivery, non-exit interrupt delivery and enhanced I/O responsiveness. Moreover, it does not require any modification to guest operating system (OS) or compromise any virtualization benefit. We demonstrate that ES2 greatly reduces VM exit rate with the time in guest (TIG) for I/O processing above 96% for TCP streams and 99% for UDP streams, increases guest throughput by 1.8x for Memcached and 2x for Apache, and keeps guest latency at a very low level.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICPP.2017.23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f931e3ae19c536e99cdf8276c1f20cbc55a24ac6", "sources": [ "DBLP" ], "title": "ES2: Aiming at an Optimal Virtual I/O Event Path", "venue": "2017 46th International Conference on Parallel Processing (ICPP)", "year": 2017 }, "f951b7dd1845a10c91099f8ec28df72d23d2554d": { "authors": [ { "ids": [ "22222274" ], "name": "Sosuke Shiga" }, { "ids": [ "1798501" ], "name": "Hideo Joho" }, { "ids": [ "1864328" ], "name": "Roi Blanco" }, { "ids": [ "2528063" ], "name": "Johanne R. Trippas" }, { "ids": [ "2396539" ], "name": "Mark Sanderson" } ], "doi": "10.1145/3077136.3080787", "doiUrl": "https://doi.org/10.1145/3077136.3080787", "entities": [ "Behavioral pattern", "Information needs", "Information retrieval", "Information seeking", "Theory" ], "id": "f951b7dd1845a10c91099f8ec28df72d23d2554d", "inCitations": [ "6c6c61e99d6c868c666a10d31d90b3c370cc09fe" ], "journalName": "", "journalPages": "715-724", "journalVolume": "", "outCitations": [ "762b63d2eb86f8fd0de98a08561b77527ae8f165", "fea75f39a5e22ca23834e6bc575d0f32a2f36d75", "32961dfdb85d83e1b33b957f92f1776a149fb837", "1662e121c558faba468bf17d43399b5792c27e13", "957f8aef106f3ca1b90d127811636a92d27d3aaa", "7d046e73f7e70935c33af9e32ea72905ed959d2e", "874fc50f48fb9f2937621728b9710aeccf611681", "041589bb599b475fdffd5a87086856efba86b19c", "e8d5076ec7c2d2400a5399922b6add5032f3c9b9", "e6d408f935668672c64a9584fa614c24e72335a5", "71b47e9864ee7d31ed888a5ed6b334df3d24b689", "11db6d4baab84c5a91b0305cbe02c1a1705eab34", "1aae05759f085792596312eda89315145aea794b", "31f884507b58ad4e03d96d90556f7f91bb10bd52", "cc1d71e15167deac835d9ca55dee73455f156387", "5628627881cc1a7361dc70e1f6c8608cce1104ac", "214adc2dfdc2160cdf5be54001daf2b2304a03b3", "9f60c5654ea4dbca5238aca4c8224d5c789d2943", "97d980b8759cc0fbeb07ce593b12360f1d1e84ad", "356aff2cb18131ad93f9ed3c422e72173cac0df3", "7e921f84c01c7bfbc2dc588eb8dcc489eff6b85d", "9e209fbe4d667e07ca10c142520bcada61f806dd", "73c78e134a3f9b185eb825e8ae12d8da75f3ab93", "22bf4783b460047711a1bde26ea68d7392960711", "498ff5aebb7f5091893e6f651801b9d7901b8017", "e57e944de3f29b3e55396d38ff0817829044345e", "6335a1a0cf63b6411ab37eeafd0dcf920772bb0b", "c402314407034f3670ce1db0a41d07cee0349a1e", "555fba5e92000fbfd9526da15da8c7333f636f1a", "02b287d0f3d4089d9b0707a440367e3e403f131e", "1919e7c0f4c1b5252219f49f3c263db58768b1b2" ], "paperAbstract": "The increase of voice-based interaction has changed the way people seek information, making search more conversational. Development of effective conversational approaches to search requires better understanding of how people express information needs in dialogue. This paper describes the creation and examination of over 32K spoken utterances collected during 34 hours of collaborative search tasks. The contribution of this work is three-fold. First, we propose a model of conversational information needs (CINs) based on a synthesis of relevant theories in Information Seeking and Retrieval. Second, we show several behavioural patterns of CINs based on the proposed model. Third, we identify effective feature groups that may be useful for detecting CINs categories from conversations. This paper concludes with a discussion of how these findings can facilitate advance of conversational search applications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3077136.3080787" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/f951b7dd1845a10c91099f8ec28df72d23d2554d", "sources": [ "DBLP" ], "title": "Modelling Information Needs in Collaborative Search Conversations", "venue": "SIGIR", "year": 2017 }, "f96ca8f9a2aaa341d11445d16ea4e7d7f710752a": { "authors": [ { "ids": [ "20431790" ], "name": "Yoohyuk Lim" }, { "ids": [ "39603009" ], "name": "Jaemin Lee" }, { "ids": [ "20690915" ], "name": "Cassiano Campes" }, { "ids": [ "1920052" ], "name": "Euiseong Seo" } ], "doi": "", "doiUrl": "", "entities": [ "Data striping", "Flash memory", "Garbage collection (computer science)", "Multi-level cell", "Parity bit", "Solid-state drive", "Span and div" ], "id": "f96ca8f9a2aaa341d11445d16ea4e7d7f710752a", "inCitations": [], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "087111744619e9060b250abc2de6d0bc488c2e1f", "56e2223ef6feed712a0765dd451f363fe7554a95", "ab01a73db0f97a860c014ac25c3ff885741a666a", "0eb006817f850971e48eb1763bdda1adf10f7244", "fe56236034188b6d14b94bef4464715e8543865b", "38896dca5d9101a8c79185f487c1f8f6fc557057", "3569c46ce9e62936cbca126aad9fe3f4a0b2007f", "d69ed6f47e86c3d161b0886bcaf8aee184d8cf15", "469dcf17b17deaddcc5b260490a191c4bae3efe1", "303f71ad0e145415aba9efe9ba96a1f734c63391", "33af96d3f5ff238792089e641e1059dc39f86f23", "22e048f862a41f2a350b4ae6e5f6068448cbae7f", "24693bbc5bd27d89ebb57a24c27582ec291c3a02", "9aa0d7253574e50fe3a190ccd924433f048997dd", "05961fc1d02ca30653dd0b4c906113db796df941", "2163f9cc1b9ce52d331cb551e34aafa970e08f16", "2e7f2e84cccca89fbe7b654928029d7dd64fb384" ], "paperAbstract": "To reduce the performance and lifespan loss caused by the partial-stripe writes in SSD RAIDs, we propose two schemes: parity-stream separation and SLC/MLC convertible programming. Parity-stream separation splits the parity block stream from the data block stream to decrease valid page copy during garbage collection. In the convertible programming scheme, the flash memory blocks that are allocated for parity data are programmed in SLC mode to reduce the wear caused by programming stress, while the other flash memory blocks are written in MLC mode as usual. Evaluation shows that our scheme decreased garbage collection overhead by up to 58% and improved lifespan by up to 54%, assuming that the MLC write stress was 3.5 times that of the SLC.", "pdfUrls": [ "https://www.usenix.org/conference/hotstorage17/program/presentation/lim", "https://www.usenix.org/system/files/conference/hotstorage17/hotstorage17-paper-lim.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/f96c/a8f9a2aaa341d11445d16ea4e7d7f710752a.pdf", "s2Url": "https://semanticscholar.org/paper/f96ca8f9a2aaa341d11445d16ea4e7d7f710752a", "sources": [ "DBLP" ], "title": "Parity-Stream Separation and SLC/MLC Convertible Programming for Life Span and Performance Improvement of SSD RAIDs", "venue": "HotStorage", "year": 2017 }, "fa3a979d51c270ba5a228589457a34469643d380": { "authors": [ { "ids": [ "39224389" ], "name": "Anand Padmanabha Iyer" }, { "ids": [ "34928179" ], "name": "Erran L. Li" }, { "ids": [ "1716557" ], "name": "Ion Stoica" } ], "doi": "10.1145/3117811.3117813", "doiUrl": "https://doi.org/10.1145/3117811.3117813", "entities": [ "Access network", "Display resolution", "Imperative programming", "Mobile app", "Mobile phone", "Radio access network", "User experience" ], "id": "fa3a979d51c270ba5a228589457a34469643d380", "inCitations": [], "journalName": "", "journalPages": "79-87", "journalVolume": "", "outCitations": [ "23fc06fcd5f99449209bdcd2c3e4fb20a195f4dd", "5174a1e57243013d90041ed9b559fddfd3248dbc", "151ad4d750b910adb63cdbdb794a79ed2daa2b87", "b00a5a83a549050f5a7b15a88c8354ce92be204d", "63b8f3f94c217aa486ecbfb78a0fb7270f226179", "a0130cc381a4fb0fa8aab93997327c7182a5a990", "42d445a8386319bd1d0a49e859f5d43bd0e86637", "1cadb267720b8723fa417840003ac51ec56d7aa5", "78c1c3f34d9db0bfbdde678c20206ec1d1e0e2f0", "20a1c84e6296737d24f36f026774ff4bdf492c17", "2e9fb3bf5b95e9ce722bb32d39d8e93508c39309", "e9adb0fe0021662fd112d5c4bad13d160a659145", "1f79775b58072a2ab484aad798aec0c9c7fa8605", "073daaf4f6fa972d3bdee3c4e4510d21dc934dfb", "735872df0d071b92987fa553908f9997db4fc46a", "7e7b6249b598d9a4c63394e3a2efd008268ae851", "4fba6cf1fec9888feb4477da6d2985194a188d9c", "062bd67c240a7710225fcaf2e236eebafa94eecb" ], "paperAbstract": "In an increasingly mobile connected world, our user experience of mobile applications more and more depends on the performance of cellular radio access networks (RAN). To achieve high quality of experience for the user, it is imperative that operators identify and diagnose performance problems quickly. In this paper, we describe our experience in understanding the challenges in automating the diagnosis of RAN performance problems. Working with a major cellular network operator on a part of their RAN that services more than 2 million users, we demonstrate that fine-grained modeling and analysis could be the key towards this goal. We describe our methodology in analyzing RAN problems, and highlight a few of our findings, some previously unknown. We also discuss lessons from our attempt at building automated diagnosis solutions.", "pdfUrls": [ "http://www.cs.columbia.edu/~lierranli/publications/cellscope_mobicom17.pdf", "http://doi.acm.org/10.1145/3117811.3117813" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fa3a979d51c270ba5a228589457a34469643d380", "sources": [ "DBLP" ], "title": "Automating Diagnosis of Cellular Radio Access Network Problems", "venue": "MobiCom", "year": 2017 }, "fa8e5d22530efa15aa652e6fe67ac4223f1c2602": { "authors": [ { "ids": [ "14339879" ], "name": "Simon Schauss" }, { "ids": [ "1738018" ], "name": "Ralf L\u00e4mmel" }, { "ids": [ "35008169" ], "name": "Johannes H\u00e4rtel" }, { "ids": [ "2006212" ], "name": "Marcel Heinz" }, { "ids": [ "6361725" ], "name": "Kevin Klein" }, { "ids": [ "2402700" ], "name": "Lukas H\u00e4rtel" }, { "ids": [ "39565422" ], "name": "Thorsten Berger" } ], "doi": "10.1145/3136014.3136038", "doiUrl": "https://doi.org/10.1145/3136014.3136038", "entities": [ "Digital subscriber line", "Feature model", "Metaprogramming", "Software documentation", "Web application" ], "id": "fa8e5d22530efa15aa652e6fe67ac4223f1c2602", "inCitations": [], "journalName": "", "journalPages": "103-114", "journalVolume": "", "outCitations": [ "1abc7b550de3fe6eef96a6bfdf4cd74b1bfc2b08", "012089d194955f546353cc87a76948b26aca1ab6", "34ac77d1b2646c9a48c519d91e90b584296f833c", "17e4853a28b630e8ceaa619f9e42712c52a506dc", "854d434e09a02d7f9fa6258a98eab5d4910832b0", "1e477aa7eb007c493fa92b8450a7f85eb14ccf0c", "19a07c578c10dacc87b701542ac1fed62da4ef46", "7ecaca8db190608dc4482999e19b1593cc6ad4e5", "585bf1de4e1c46b66691a29f065ee7b2425d38d0", "19cd1b9989380a43bb3d88f7a4c4426afe379d84", "541f9b104d070ea246485029fbcd8d7c5420fb6f", "1d17b9b3c4067b4660e923a2041af0955b7af18d", "62851d668dd137703053a8576ee764c01d4d92f1", "e26ab8f3677a1914882bef2a59e0e70a6a86ce47", "5a1e17dca7a82ee0207dc17a4766a652d18ceaf9", "e95040410b1f1fe2cbd8af4ed6529166f955dcec", "76eb5ea4f0525e2da87efc4723566abf91fd007e", "a14f69985e19456681bc874310e7166528637bed", "1684672cd8a88254da2acfdcdbffd24ced2c090a", "aa3cd4233f7c0db95e5c38d5b8fc1d199df21857", "85fa54a41978892aa97b28962c365bfe1e03ab63", "1da0b10ba41a613f76843e22b332fc019aa4ff9e", "3af4b8a933b055325a539f15b97b4b66a705f405", "8a9b80e1759989684ea43ddefec7fd76413ab03a", "251dee257b99640608987a9ad85c6d0ddd18507a", "86a74b76f237ff694856307f2c6034bc8754f2a6", "10d8c0c468487e7da9b6044e19de372bd4bf1d37", "1dfb76b9279b658deda4dc70a5a879a1122a33a8", "837a3a54d579f0722321e3eb98b9615bb5bdd5c8", "bb351539afbc1d9a49de3f87021fe0c9132a4805" ], "paperAbstract": "Selecting and properly using approaches for DSL implementation can be challenging, given their variety and complexity. To support developers, we present the software chrestomathy MetaLib, a well-organized and well-documented collection of DSL implementations useful for learning. We focus on basic metaprogramming techniques for implementing DSL syntax and semantics. The DSL implementations are organized and enhanced by feature modeling, semantic annotation, and model-based documentation. The chrestomathy enables side-by-side exploration of different implementation approaches for DSLs. Source code, feature model, feature configurations, semantic annotations, and documentation are publicly available online, explorable through a web application, and maintained by a collaborative process.", "pdfUrls": [ "http://doi.acm.org/10.1145/3136014.3136038" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fa8e5d22530efa15aa652e6fe67ac4223f1c2602", "sources": [ "DBLP" ], "title": "A chrestomathy of DSL implementations", "venue": "SLE", "year": 2017 }, "fa9b3aa58e84f42c20beabf782ffb9af96dfb9f9": { "authors": [ { "ids": [ "1679226" ], "name": "Benny Kimelfeld" }, { "ids": [ "1803218" ], "name": "Christopher R\u00e9" } ], "doi": "10.1145/3034786.3034797", "doiUrl": "https://doi.org/10.1145/3034786.3034797", "entities": [ "Computational complexity theory", "Conjunctive grammar", "Conjunctive query", "Database", "Feature engineering", "Linear separability", "Machine learning", "Relational database", "Utility", "VC dimension", "Whole Earth 'Lectronic Link" ], "id": "fa9b3aa58e84f42c20beabf782ffb9af96dfb9f9", "inCitations": [ "34f515d64fdae5f8dd4b036034889c8dd7376590" ], "journalName": "", "journalPages": "5-20", "journalVolume": "", "outCitations": [ "525e23ff9fa2b442366effd00d0d3e4132d75d47", "25c51377a34aeb18b51ea7de1f9632eaa09f591e", "61b0d9887dbdf4645e310be77e9e01bcad1344cc", "a83bddb34618cc68f1014ca12eef7f537825d104", "17e1bb7fc17b45fe5ad8724a635d285ed000efa8", "008abebf4a9404db9050c9d2fbca769f4faf3ca6", "2749cb94f92170f79d0e8ad266605a871767f38a", "5264ae4ea4411426ddd91dc780c2892c3ff933d3", "214c966d1f9c2a4b66f4535d9a0d4078e63a5867", "45e54d2d660188c4d8be81c48d712c964b542519", "3d7713750d447d3170a0b7bebe048d069e4cb467", "3bbfc62fc13ca27c6e58e42167a6aef593a1365e", "a36b028d024bf358c4af1a5e1dc3ca0aed23b553", "092097335465a8047e1fd500bef4b8fd8d45310b", "4609f6bdc3beab00c9beceaa12dd8101fefe6f1c", "47de0569259e6a420c3eda69cdebf01bf85a1acd", "22a860b53d07902ae6161b9e70676147f0dc3d58", "37c3303d173c055592ef923235837e1cbc6bd986", "03f34688ef4ee4239464633784235387e9bff4bb", "fac319a34a9a1b93cb772d4cdb42cdb8741f2edc", "014bb6c3d56ce4734f5e08b52129b1c7eb6758d7", "9c018f1b0aabe84ba61dfc969e273f2a9653c6ec", "0f5c9968fe2cdb0f52c55b2d5b3dec7accf91306", "53f292d4c0d97fa6622838cc4ea070e65942287c", "4d7f9b2ad8cc25a6c5f67d3611e8bc8a1b85ae67", "cd48cacdc0c195be713a2232fe0f53cb3f30a1e5", "0787399ccba7f7e73cb2794503be461945b4d881", "327309ad4b74ab09f4a199c1384eb34fb6dcefd9", "293af2dc96ffed5435051e0622d6991411690da9" ], "paperAbstract": "In the design of analytical procedures and machine-learning solutions, a critical and time-consuming task is that of feature engineering, for which various recipes and tooling approaches have been developed. In this framework paper, we embark on the establishment of database foundations for feature engineering. We propose a formal framework for classification in the context of a relational database. The goal of this framework is to open the way to research and techniques to assist developers with the task of feature engineering by utilizing the database's modeling and understanding of data and queries, and by deploying the well studied principles of database management. As a first step, we demonstrate the usefulness of this framework by formally defining three key algorithmic challenges. The first challenge is that of separability, which is the problem of determining the existence of feature queries that agree with the training examples. The second is that of evaluating the VC dimension of the model class with respect to a given sequence of feature queries. The third challenge is identifiability, which is the task of testing for a property of independence among features that are represented as database queries. We give preliminary results on these challenges for the case where features are defined by means of conjunctive queries, and in particular we study the implication of various traditional syntactic restrictions on the inherent computational complexity.", "pdfUrls": [ "http://doi.acm.org/10.1145/3034786.3034797" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fa9b3aa58e84f42c20beabf782ffb9af96dfb9f9", "sources": [ "DBLP" ], "title": "A Relational Framework for Classifier Engineering", "venue": "PODS", "year": 2017 }, "faa42ad71f07ab8c96d4c2426f8af5137c19f674": { "authors": [ { "ids": [ "23209529" ], "name": "Roland Math\u00e1" }, { "ids": [ "1746860" ], "name": "Sasko Ristov" }, { "ids": [ "1718255" ], "name": "Radu Prodan" } ], "doi": "10.1007/978-3-319-64203-1_23", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_23", "entities": [], "id": "faa42ad71f07ab8c96d4c2426f8af5137c19f674", "inCitations": [], "journalName": "", "journalPages": "319-331", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_23" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/faa42ad71f07ab8c96d4c2426f8af5137c19f674", "sources": [ "DBLP" ], "title": "A Simplified Model for Simulating the Execution of a Workflow in Cloud", "venue": "Euro-Par", "year": 2017 }, "fad6c07cb914becb0905d63362e8bdd900eed3f3": { "authors": [ { "ids": [ "8521279" ], "name": "Elena Agostini" }, { "ids": [ "2428889" ], "name": "Davide Rossetti" }, { "ids": [ "2407825" ], "name": "Sreeram Potluri" } ], "doi": "", "doiUrl": "", "entities": [ "Benchmark (computing)", "CUDA", "Critical path method", "FarmVille", "Graphics processing unit", "InfiniBand", "Molecular dynamics", "Network interface controller", "Remote direct memory access", "Run time (program lifecycle phase)", "Synthetic data" ], "id": "fad6c07cb914becb0905d63362e8bdd900eed3f3", "inCitations": [ "8225fec5d29815399796dfc8117b7a677cbde9c2" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "248-257", "journalVolume": "", "outCitations": [ "46249511a2eccfd8e29e8446d8b895040caab0e9", "43f0c099d44a68783a773f91cd03098a5252bf98" ], "paperAbstract": "NVIDIA GPUDirect is a family of technologiesaimed at optimizing data movement among GPUs (P2P) orbetween GPUs and third-party devices (RDMA). GPUDirectAsync, introduced in CUDA 8.0, is a new addition whichallows direct synchronization between GPU and third partydevices. For example, Async allows an NVIDIA GPU to directlytrigger and poll for completion of communication operationsqueued to an InfiniBand Connect-IB network adapter, removingCPU involvement from the critical path in GPU acceleratedapplications. In this paper, we present the building blocks ofGPUDirect Async and explain the supported usage models ofthis new technology. We also present a performance evaluationusing a micro-benchmark and a synthetic stencil benchmark. Finally, we demonstrate the use of Async in a few multi-GPUMPI applications: HPGMG-FV (geometric multi-grid), achievingup to 25% improvement in total execution time, CoMD-CUDA(classical molecular dynamics), reducing communications timesup to 30%, LULESH2-CUDA, achieving an average performanceimprovement of 13% in the total execution time.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101146" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fad6c07cb914becb0905d63362e8bdd900eed3f3", "sources": [ "DBLP" ], "title": "Offloading Communication Control Logic in GPU Accelerated Applications", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "fb843a3e0b80007eb18acab60d68e8ac5d79bdbf": { "authors": [ { "ids": [ "20366751" ], "name": "Alireza Ranjbar" }, { "ids": [ "34790381" ], "name": "Miika Komu" }, { "ids": [ "35034594" ], "name": "Patrik Salmela" }, { "ids": [ "1710566" ], "name": "Tuomas Aura" } ], "doi": "", "doiUrl": "", "entities": [ "Cloud computing", "Electron mobility", "Host Identity Protocol", "Multitenancy", "Persistence (computer science)", "Scalability", "Software-defined networking", "Synaptic Package Manager", "Web service", "X86 virtualization" ], "id": "fb843a3e0b80007eb18acab60d68e8ac5d79bdbf", "inCitations": [ "5ef00014862c26bada8b8a084400256d9e30f469", "d1b0759bf54c081dcd83de57efa2015919932a6a" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "262-267", "journalVolume": "", "outCitations": [ "38a3f86300a1e76734acae5cbb806fce86de4869", "27f4001214ce0d449eb05d33626f444526accc7c", "07af8dcfdb0811233aa20919c0011685acc5bfa6", "656f7ec6badf49ac54c8d61e0f60445464a1a0ca", "ff5e8277cd21be96b7d60547e24cd74ec4732daa", "147f99ff0e44bd36f69acf5a95d37f0129da4b37", "382591224df8b8b2eb39712f282860424575754e", "d1ecfb7ec04fc3b3f93d9a5bf01b12eb38238285", "832ea400fb32aa499183bef221319397132d11f7", "73966d417bdfe0fd2f1bfd82e7dddf51ccbda961", "84e0660e922da41223b9723bef60f5350a98d427" ], "paperAbstract": "Cloud virtualization technology is shifting towards light-weight containers, which provide isolated environments for running cloud-based services. The emerging trends such as container-based micro-service architectures and hybrid cloud deployments result in increased traffic volumes between the micro-services, mobility of the communication endpoints, and some of the communication taking place over untrusted networks. Yet, the services are typically designed with the assumption of scalable, persistent and secure connectivity. In this paper, we present the SynAPTIC architecture, which enables secure and persistent connectivity between mobile containers, especially in the hybrid cloud and in multi-tenant cloud networks. The solution is based on the standardized Host Identity Protocol (HIP) that tenants can deploy on top of existing cloud infrastructure independently of their cloud provider. Optional cloud-provider extensions based on Software-Defined Networking (SDN) further optimize the networking architecture. Our qualitative and quantitative evaluation shows that SynAPTIC performs better than some of the existing solutions.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101148" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fb843a3e0b80007eb18acab60d68e8ac5d79bdbf", "sources": [ "DBLP" ], "title": "SynAPTIC: Secure and Persistent Connectivity for Containers", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "fb973cd5467c9ce056cb37083efc87c74bdae3d0": { "authors": [ { "ids": [ "32909486" ], "name": "Jesus Omana Iglesias" }, { "ids": [ "3334155" ], "name": "Jordi Arjona Aroca" }, { "ids": [ "2809994" ], "name": "Volker Hilt" }, { "ids": [ "1910886" ], "name": "Diego Lugones" } ], "doi": "10.1145/3135974.3135982", "doiUrl": "https://doi.org/10.1145/3135974.3135982", "entities": [ "Automaton", "Central processing unit", "End-to-end encryption", "End-to-end principle", "Holism", "ORCA", "Provisioning", "Real-time computing", "Requirement", "Server (computing)", "Virtual machine", "Virtual reality" ], "id": "fb973cd5467c9ce056cb37083efc87c74bdae3d0", "inCitations": [], "journalName": "", "journalPages": "81-94", "journalVolume": "", "outCitations": [ "4c9b5b3ec35b92357936efe9401110e37e2e046c", "0cc547cea26938e8c4165059ed0975cabec2c660", "5ae87dedd95dd5dda85012e1a8f6ebdfb7e575d0", "c1c71d0b6c0f2705e0e407f6823c928f83d67f73", "bb908983189f58ea62e33c469089448ab8c37973", "3fbc9316a792974ba103be76702a6ce5c8d33f2d", "110b17aede6e6a4fc8aeec50a54fe4dddc2c4779", "25d28bfbfd9067d9cb5a85f4af0af3a57013baf4", "3e257f01e3ee71545d824a1615c35659525b856a", "2c7e254cfea97f0b10320d723aa906f3f5c9bed6", "2e72178091b2ca445f46200dcba71a53417b69eb", "41f0fd1f3d9e72a6dc070920f412653e93be8144", "6168919f450a8ed906051f2562abbfe51aa4d97d", "1d4ac1fd1706a1ab3d93d8fa481d12332068fc30", "7d4960df4f413ab228da2b528986bd2f2ca784ad", "78f853271fe69da617d5a14a1e54cbae6a982a50", "942ecc61675d81724823b893df0f1c9418b52d90", "a818086b1d93615d1e6bac0ed69fb68c07beee1b", "39bfff0c76441e0af69f3d61915f2ad930bf6b27", "0b56c5c990051e879d341671d85408fbf519c7c8", "441cf0fe8091d09207374a9d96723419091345ab", "685a52364afb300d38f46c4cbc96e125b3a9cff2", "817f2d1e63771c8f8b5316d0edde45de22d6024b", "490d862480cf30949dce90e832aa292c498ac768", "1884fc68add9f4a30ce491261266c21b8ce6a563", "0ec59b7fae15a7caa3256ba31b21802d455618c2", "3b7c5da3a3888be5818159f31fb50d1e382efa26", "265be4efcca87268845c0a2a30422d14f127b607", "30922a3953ff740486bfd01461cc1f0c5185c39c", "7cb48cf1a7386cdab13cbf32109589a0ddd53f30", "1833dee660500dd104ca84d99600b70c2479ba3c", "34fc1f084ada119eec30411e413b21dd84dda914" ], "paperAbstract": "Onboarding network functions onto current clouds requires labor-intensive configuration of the virtual environment. Developers need to dimension the resources available to each virtual machine such as CPU and memory, define thresholds for scaling dynamically and create configuration files that operators can use to execute the network services. This process is time consuming and dependent on the server architecture. As resources are managed on an individual virtual machine basis, services cannot be orchestrated end to end without significant expertise. In this paper, we argue that much of the manual configuration needed for onboarding services onto a cloud can be automated. Moreover, we can automatically generate abstractions that consider services end-to-end and enable their holistic orchestration. We propose a framework that benchmarks network services during the onboarding process and generates an elastic model which relates workload mixes to resource requirements, identifies component dependencies and automates service operation on heterogeneous stacks. We have evaluated our framework using a real-time communication service that handles multiple classes of workloads. Results show that underprovisioning can be eliminated for regular daily traffic, reducing resource provisioning time by at least 5X for the most stressing traffic surges, while improving key performance indicators by at least 40%.", "pdfUrls": [ "http://doi.acm.org/10.1145/3135974.3135982" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fb973cd5467c9ce056cb37083efc87c74bdae3d0", "sources": [ "DBLP" ], "title": "ORCA: an ORChestration automata for configuring VNFs", "venue": "Middleware", "year": 2017 }, "fbc04b44ae88bd285072a266aa26f8b5f7b9c158": { "authors": [ { "ids": [ "3758909" ], "name": "Seth Gilbert" }, { "ids": [ "1738955" ], "name": "Fabian Kuhn" }, { "ids": [ "3311176" ], "name": "Chaodong Zheng" } ], "doi": "10.1145/3087801.3087805", "doiUrl": "https://doi.org/10.1145/3087801.3087805", "entities": [ "Algorithm", "Augmented reality", "Cognitive network", "Cognitive radio", "Distributed algorithm", "Edge coloring", "Graph coloring", "Randomized algorithm", "Regular expression", "Time complexity" ], "id": "fbc04b44ae88bd285072a266aa26f8b5f7b9c158", "inCitations": [], "journalName": "", "journalPages": "23-32", "journalVolume": "", "outCitations": [ "3b4d5b3e93aa6d5ea413c726c17057d8d1eafb2d", "763b5c4f4c896c0a9b8c6b1249856870f3c21154", "48bee338ee3a84df22bbede1b34bec32cca8aa59", "1088c2ce0726948b70e95c64797f8686ef9cd22c", "987ae46631f21d9e0ee9733c1db25cfc660b672f", "4816d2c6a85d15501a9c8b2fb047d0149096026f", "2dd0f428820dda8b104df449da98cb7f523dd0fc", "1a208b40fde95c8033cbd3f01196c6336f0442cd", "273f33867a183df04be2117b02f056ab77f8fa45", "22b8f65fc497337b0de16d645f8a5ee072ecced6", "d97ccbe86ca124e5dcbc90f1084cee30894e811d", "200c9b5f2c0ba9b785364e46b5ef06c79f625349", "5934a20f2ca954252a378e828a81533a7de5a0fe", "78dd00247a09da7568294387d87cdc0799020202", "d0f757c2736b6e3bafa1f3770fa0e48e20d34f6e", "1429f8ca736bdbb827809b058aa57e6c3574940e", "3519e60b90ad8d5ce3c6fe90741848fd74e59209", "9f5c704e6d6e67327953855cba1206211bf53ba2", "5ad64730780c05dc89261ed19fd7731be7543071", "2183f5374712456b7d27365c78011467bf08d6f2", "acb3db58c4460a4284f37469530580028ea2897d", "f9f2371f16c0435dc73767008eb9e42250773c9a" ], "paperAbstract": "Cognitive radio networks are a new type of multi-channel wireless network in which different nodes can have access to different sets of channels. By providing multiple channels, they improve the efficiency and reliability of wireless communication. However, the heterogeneous nature of cognitive radio networks also brings new challenges to the design and analysis of distributed algorithms. In this paper, we focus on two fundamental problems in cognitive radio networks: neighbor discovery, and global broadcast. We consider a network containing n nodes, each of which has access to c channels. We assume the network has diameter D, and each pair of neighbors have at least k \u2265 1, and at most kmax \u2264 c, shared channels. We also assume each node has at most \u2206 neighbors. For the neighbor discovery problem, we design a randomized algorithm CSEEK which has time complexity \u00d5((c/k) + (kmax/k) \u00b7 \u2206). CSEEK is flexible and robust, which allows us to use it as a generic \u201cfilter\u201d to find \u201cwell-connected\u201d neighbors with an even shorter running time. We then move on to the global broadcast problem, and propose CGCAST, a randomized algorithm which takes \u00d5((c/k) + (kmax/k) \u00b7 \u2206 + D \u00b7 \u2206) time. CGCAST uses CSEEK to achieve communication among neighbors, and uses edge coloring to establish an efficient schedule for fast message dissemination. Towards the end of the paper, we give lower bounds for solving the two problems. These lower bounds demonstrate that in many situations, CSEEK and CGCAST are near optimal. \u2217Part of this research was done when Chaodong Zheng was working as a postdoctoral researcher at University of Freiburg. ar X iv :1 70 3. 06 13 0v 1 [ cs .D C ] 1 7 M ar 2 01 7", "pdfUrls": [ "https://arxiv.org/pdf/1703.06130v1.pdf", "http://arxiv.org/abs/1703.06130", "http://doi.acm.org/10.1145/3087801.3087805" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fbc0/4b44ae88bd285072a266aa26f8b5f7b9c158.pdf", "s2Url": "https://semanticscholar.org/paper/fbc04b44ae88bd285072a266aa26f8b5f7b9c158", "sources": [ "DBLP" ], "title": "Communication Primitives in Cognitive Radio Networks", "venue": "PODC", "year": 2017 }, "fc25cfe166dc88aafb223aa9bb4d548d80a40f43": { "authors": [ { "ids": [ "31344273" ], "name": "Hiroshi Nakashima" }, { "ids": [ "19312747" ], "name": "Yoshiki Summura" }, { "ids": [ "19218543" ], "name": "Keisuke Kikura" }, { "ids": [ "1891277" ], "name": "Yohei Miyake" } ], "doi": "10.1109/IPDPS.2017.65", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.65", "entities": [ "Central processing unit", "Compiler", "Cray XC30", "Knights", "Load balancing (computing)", "Manycore processor", "Multi-core processor", "Particle-in-cell", "Performance Evaluation", "Product binning", "SIMD", "Simulation", "Sorting", "Supercomputer", "Thread (computing)", "Voxel", "Xeon Phi" ], "id": "fc25cfe166dc88aafb223aa9bb4d548d80a40f43", "inCitations": [ "3da3a2f4679c4b77d24d0c22c0f311ca49fc23e2" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "202-212", "journalVolume": "", "outCitations": [ "277123d2a8b1a97e97d07330dccf03f50e261dcb", "0c01348dae1ba59ab5f2ddd802ff9fa1342743be", "52c2a2ed3be3690326ca368691f43be7a6292949", "0b56c7d137b6cde03cc4657f8608a39837c49e6a", "e601fe611210e2afb0d63040ec991004a1c01458", "34468f409ff913f03eb8f3705ea6d042c0500832", "6ed87b9a87f700b480db236672336d808c1957b6", "95b8b77b02cace4f49db3020306224f224c4d04d", "b7455effa0ef5f47eb1ed671a122266c1ba0325d", "ac9bcf102038159809981e54ba6dadc4f7050db1", "fc1cb36e2f054c7ca734f388955a8394e17f81ee", "01fded66bcc34d93cba72b3490cabb87268099ba" ], "paperAbstract": "We are now developing a manycore-aware implementation of multiprocessed PIC (particle-in-cell) simulation code with automatic load balancing. A key issue of the implementation is how to exploit the wide SIMD mechanism of manycore processors such as Intel Xeon Phi. Our solution is "particle binning" to rank all particles in a cell (voxel) in a chunk of SOA (structure-of-arrays) type one-dimensional arrays so that "particle-push" and "current-scatter" operations on them are efficiently SIMD-vectorized by our compiler. In addition, our sophisticated binning mechanism performs sorting of particles according to their positions "on-the-fly", efficiently coping with occasional "bin overflow" in a fully multithreaded manner. Our performance evaluation with up to 64 nodes of Cray XC30 and XC40 supercomputers, equipped with Xeon Phi 5120D (Knights Corner) and 7250 (Knights Landing) respectively, not only exhibited good parallel performance, but also proved the effectiveness of our binning mechanism.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.65" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fc25cfe166dc88aafb223aa9bb4d548d80a40f43", "sources": [ "DBLP" ], "title": "Large Scale Manycore-Aware PIC Simulation with Efficient Particle Binning", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "fc26b3804f27cd2e2d79f3f5db95ecf5c4370792": { "authors": [ { "ids": [ "2642333" ], "name": "Minghui Qiu" }, { "ids": [ "1714894" ], "name": "Peilin Zhao" }, { "ids": [ "1678292" ], "name": "Ke Zhang" }, { "ids": [ "1691901" ], "name": "Jun Huang" }, { "ids": [ "1994590" ], "name": "Xing Shi" }, { "ids": [ "2836617" ], "name": "Xiaoguang Wang" }, { "ids": [ "2015610" ], "name": "Wei Chu" } ], "doi": "10.1109/ICDM.2017.49", "doiUrl": "https://doi.org/10.1109/ICDM.2017.49", "entities": [ "Artificial neural network", "Baseline (configuration management)", "Computer multitasking", "Convolutional neural network", "Deep learning", "End-to-end principle", "Experiment", "Multi-task learning", "Network model", "Neural Networks", "Radar", "Sensor", "Time series" ], "id": "fc26b3804f27cd2e2d79f3f5db95ecf5c4370792", "inCitations": [ "28195a9cda82f2baae5d55e16877ce01e09ae292" ], "journalName": "2017 IEEE International Conference on Data Mining (ICDM)", "journalPages": "395-404", "journalVolume": "", "outCitations": [ "2f6ed2ddcba7f0110867e9e5c705b17c8a8bc9c6", "563e821bb5ea825efb56b77484f5287f08cf3753", "b669388eddc2248e612cf24ec12cb373f42febde", "443044b5873706aadaeeb87c8594528d83687462", "1aec04aa64f165bb075cc4ce6ad79d36c89d62b6", "272216c1f097706721096669d85b2843c23fa77d", "4a861d29f36d2e4f03477c5df2730c579d8394d3", "0321393874b2c50d54e95be3d5b00d1c88468f5b", "c993431e61e524565cd2e86435978e1b47067949", "fc550414bb4992e80a8750f32e8d376db05d8e29", "a538b05ebb01a40323997629e171c91aa28b8e2f", "046a1302079f56b94c81457bf7fd21c3417a9f72", "07d1db388cd489420d40d0edb13e074d86c77dbd", "154728875d4668065ca6ba9fa2f5d2a1bcfc4a6e", "40927c5d81988a1151639fad150cbc74f64e0d68", "3acf801e35ddb12001033390a7bbe27d28922e15", "ad296d573b4f1b12c6149f96dcde488f10592416", "161ffb54a3fdf0715b198bb57bd22f910242eb49", "3e5cc4a3ef37f9206fc0cf0fe24f75cbe72e6063", "3a2f8879c4623f1d17d57e47a5516e13ef857e7a", "9222c931086c48a59c83c7594ac5f80f7cbfc48a", "052b1d8ce63b07fec3de9dbb583772d860b7c769", "8215eed5098c6f0615351afe0d60710d30e59a3b", "0740014dc7aa6dd46cd2ee64360eef49b8d6210b", "1d96d00037b9656325dfeee04b67913ff89da802" ], "paperAbstract": "Precipitation prediction, such as short-term rainfall prediction, is a very important problem in the field of meteorological service. In practice, most of recent studies focus on leveraging radar data or satellite images to make predictions. However, there is another scenario where a set of weather features are collected by various sensors at multiple observation sites. The observations of a site are sometimes incomplete but provide important clues for weather prediction at nearby sites, which are not fully exploited in existing work yet. To solve this problem, we propose a multi-task convolutional neural network model to automatically extract features from the time series measured at observation sites and leverage the correlation between the multiple sites for weather prediction via multi-tasking. To the best of our knowledge, this is the first attempt to use multi-task learning and deep learning techniques to predict short-term rainfall amount based on multi-site features. Specifically, we formulate the learning task as an end-to-end multi-site neural network model which allows to leverage the learned knowledge from one site to other correlated sites, and model the correlations between different sites. Extensive experiments show that the learned site correlations are insightful and the proposed model significantly outperforms a broad set of baseline models including the European Centre for Medium-range Weather Forecasts system (ECMWF).", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/ICDM.2017.49" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fc26b3804f27cd2e2d79f3f5db95ecf5c4370792", "sources": [ "DBLP" ], "title": "A Short-Term Rainfall Prediction Model Using Multi-task Convolutional Neural Networks", "venue": "2017 IEEE International Conference on Data Mining (ICDM)", "year": 2017 }, "fc7577d182417e2013b702cc962b9f06a3115a20": { "authors": [ { "ids": [ "9996721" ], "name": "Kirshanthan Sundararajah" }, { "ids": [ "34746935" ], "name": "Laith Sakka" }, { "ids": [ "1700486" ], "name": "Milind Kulkarni" } ], "doi": "10.1145/3037697.3037720", "doiUrl": "https://doi.org/10.1145/3037697.3037720", "entities": [ "Iteration", "Locality of reference", "Memory hierarchy", "Recursion", "Scheduling (computing)", "Tiling window manager" ], "id": "fc7577d182417e2013b702cc962b9f06a3115a20", "inCitations": [ "6b63b0bd3471d04afb88333d638736a120ce32b0", "84c9d00dcc9025e7903fae13d307c385a1ab2447" ], "journalName": "", "journalPages": "281-295", "journalVolume": "", "outCitations": [ "3e2480d7136fe5c6fa7213ea834566b93570c3ca", "8287d98ffc3ac0471327147c7ea1942051f09fba", "0ad64e710d615794af918ad5e037f4d909f97a28", "3e69317455f7db9b1325239c6f6f52cbe29a5491", "df03f67af1febf8540cb0744a624300bff2edd9a", "7ab35eef1e54c7c8392d3d886d4b9e22e3592f4c", "3f3670b4cd960367a6eaa6c1d25859d4950a67bd", "2b3bbf8a8397d41dd2d7f19bd685ca000ee038f7", "bb0dc7f89a8e64aa537e2e2d26e8c44e30bead86", "f0f4757aa2f923a349e8357e73850a78e9b80fee", "fd68bcc41917ed0a72bbe1947bca91fe269cfe04", "03880f1d3faedb37aa51deab3b70a98b939dba28", "2d3d10ed67d91b5e4042979b39283ec52f183ded", "625eeb98b22b02baddb699460d47a2f76bde52b5", "22e8cad9f175827fd9bcd9665f2131bf4553e3e7", "462100939762e52953487e3da0e0c758ade282ae", "e5d0a599b9b7c4345ae051dd3281e84d930edffe", "9eb4268f46059d73ff3b247439c81264f2425a9a", "0fb659af82f2277c8a62ac888f4bfd01570e5470", "13a94e9847ceb7c55d38bd6567a6252f23caa406", "207e063a1f6fe0422076161786f32aedff7e3049", "f2f47ecaddc7280104d72c5d10717c682a097c81", "04518da0c6ba3b5ef2538b69d13bb5bdf1b446c4", "e543c2c0a3d898ba48ba0f0d6930a242e2444e54", "17907e18f11b5ab7ae266e87008acead6d1943d8", "16de6f9e2bf6ee1068dbca8c9e5446295c904315", "4a58e3066f12bb86d7aef2776e9d8a2a4e4daf3e", "67dc83a15c020b84403f1b6b52140965f11e4588", "9fe9e5fec3dcf749a913c1c8c1208a372861d582" ], "paperAbstract": "There has been a significant amount of effort invested in designing scheduling transformations such as loop tiling and loop fusion that rearrange the execution of dynamic instances of loop nests to place operations that access the same data close together temporally. In recent years, there has been interest in designing similar transformations that operate on recursive programs, but until now these transformations have only considered simple scenarios: multiple recursions to be fused, or a recursion nested inside a simple loop. This paper develops the first set of scheduling transformations for nested recursions: recursive methods that call other recursive methods. These are the recursive analog to nested loops. We present a transformation called recursion twisting that automatically improves locality at all levels of the memory hierarchy, and show that this transformation can yield substantial performance improvements across several benchmarks that exhibit nested recursion.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037720" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fc7577d182417e2013b702cc962b9f06a3115a20", "sources": [ "DBLP" ], "title": "Locality Transformations for Nested Recursive Iteration Spaces", "venue": "ASPLOS", "year": 2017 }, "fc85750dae7a2c2415991968238ea1e6dbada345": { "authors": [ { "ids": [ "9528094" ], "name": "Mohammad Alian" }, { "ids": [ "20700548" ], "name": "Umur Darbaz" }, { "ids": [ "1700342" ], "name": "G\u00e1bor D\u00f3zsa" }, { "ids": [ "2298231" ], "name": "Stephan Diestelhorst" }, { "ids": [ "1862763" ], "name": "Daehoon Kim" }, { "ids": [ "1686484" ], "name": "Nam Sung Kim" } ], "doi": "10.1109/ISPASS.2017.7975287", "doiUrl": "https://doi.org/10.1109/ISPASS.2017.7975287", "entities": [ "Component-based software engineering", "Computer architecture simulator", "Computer cluster", "Distributed computing", "Open-source software", "Performance per watt", "Simulation", "System Simulation", "Thread (computing)" ], "id": "fc85750dae7a2c2415991968238ea1e6dbada345", "inCitations": [ "5340e44c9c96ea3220ee12f91cef1c45c75acedf" ], "journalName": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "journalPages": "153-162", "journalVolume": "", "outCitations": [ "613758b794fe3dd970fc79314b0a1affef67a145", "01ac84ffb4b7f575ea0705181795f4fd2368f519", "cae29bb8d2cab76148568ad659ba9d4f34c91014", "cfeac65a550b5c3b0fd8dfb373b53ed3784ae30f", "7631275e3266f627df6cc29441f69ab9f5f2b1c6", "6d44790b6d952eff28f302998e8121f90786e3ff", "0b11a95e88cd46218f5ea8056b51e11894ee505b", "8b10b13fb495101d1e4eb768907cff05e3bd9315", "48b22e82dba733f04a5af3408ecab872ba38f0c3", "287634bfbcc597ce27632f4045c8a5a563d2a086", "1dc18f4ffa5d0e7c49e57e3d02adfd90901d6c44", "0884c53895371aaf2d3f5e5decb150323cbe3f9d", "3bf23f74bf33ed52f7c28587fab315610b27221a", "64845a653e4762461a29be0cae3c0c10a01b1d14", "2ecc0b759ef1de7b883e316b1e5d49673a5f7728", "14edac4c80e8037ddb59db33fa86ec0ee846d7c1" ], "paperAbstract": "When analyzing a distributed computer system, we often observe that the complex interplay among processor, node, and network sub-systems can profoundly affect the performance and power efficiency of the distributed computer system. Therefore, to effectively cross-optimize hardware and software components of a distributed computer system, we need a full-system simulation infrastructure that can precisely capture the complex interplay. Responding to the aforementioned need, we present dist-gem5, a flexible, detailed, and open-source full-system simulation infrastructure that can model and simulate a distributed computer system using multiple simulation hosts. Then we validate dist-gem5 against a physical cluster and show that the latency and bandwidth of the simulated network sub-system are within 18% of the physical one. Compared with the single threaded and parallel versions of gem5, dist-gem5 speeds up the simulation of a 63-node computer cluster by 83.1x and 12.8x, respectively.", "pdfUrls": [ "https://doi.org/10.1109/ISPASS.2017.7975287" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fc85750dae7a2c2415991968238ea1e6dbada345", "sources": [ "DBLP" ], "title": "dist-gem5: Distributed simulation of computer clusters", "venue": "2017 IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)", "year": 2017 }, "fc92a3d4bc27ef30b9073450d52a2521edf5d33d": { "authors": [ { "ids": [ "1760656" ], "name": "Zhenping Lu" }, { "ids": [ "9661972" ], "name": "Fucai Chen" }, { "ids": [ "2402259" ], "name": "Guozhen Cheng" }, { "ids": [ "1936617" ], "name": "Shuxin Li" } ], "doi": "10.1109/HPCC-SmartCity-DSS.2017.55", "doiUrl": "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.55", "entities": [ "Algorithm", "Computer security", "Information leakage", "Session hijacking", "Software-defined networking" ], "id": "fc92a3d4bc27ef30b9073450d52a2521edf5d33d", "inCitations": [], "journalName": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "journalPages": "419-426", "journalVolume": "", "outCitations": [ "c15f22153572da2884f56d01c1dce8ef43e2f9cd", "80358e5612f4120555b2567cb06be78f5a7d34d8", "78d449fef69cac4c5ebaf338d3f8eee5a7dec186", "1a3ee7c6fa8105dad1bc7cc5c5a95e16871e8f50", "07cb92a76a349b7664a44c01927d62d6ab3f2c16", "cbce531d3b7b4889a39dde9ae728aa9d0148822d", "7822c4b1269c0f52e38485684abfa3c6b137902b", "0f1eaf649048c0c502b8bf9fc1f4a3d0e7f937aa", "bef4fac84835617bfeb5f861842a9c30329bac92", "c61cf11ac7083b6ffc49bbbefa30b636eeb08117", "5c574a84f160f13a2424c8f709cc9d2770be2810", "d368439524e50d8db0834590eb517049881fa2ad", "be091b4ad05fda9d978abc37b16ad81223e9e224", "75af3327cebff429dc254c78a82349b60c778d92", "ecb30ef07c20091cb1f6aa486a0f35072f620d1e", "5737d42a92d90f04d1aff1f7c974ceec51bb588c" ], "paperAbstract": "Session hijacking of controller attack is one of the most common ways of information leakage Software-Defined Networking is facing, which brings a serious threat to cyber security. However, the existing defense technologies mainly focus on how to detect attacks and reduce the attack success rate. The paper proposes a method from another perspective to minimize the cost that the network undertakes and find an optimal defender's strategy when an attack is unavoidable in some case. The main work is as follows. First of all, we models the scenario of attack and defense as a Stackelberg Games, and prove the optimal strategy is equal to the SSE (Strong Stackelberg Equilibrium). Furthermore, we design an algorithm to solve the equilibrium of the game in the case of infinite strategic space. Finally, the experimental results show that the proposed algorithm is feasible, and then the optimal defense strategy we obtained is significantly superior to other general strategies and cut down the total cost of the whole network in practical.", "pdfUrls": [ "https://doi.org/10.1109/HPCC-SmartCity-DSS.2017.55" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fc92a3d4bc27ef30b9073450d52a2521edf5d33d", "sources": [ "DBLP" ], "title": "The Best Defense Strategy against Session Hijacking Using Security Game in SDN", "venue": "2017 IEEE 19th International Conference on High Performance Computing and Communications; IEEE 15th International Conference on Smart City; IEEE 3rd International Conference on Data Science and Systems (HPCC/SmartCity/DSS)", "year": 2017 }, "fcc5b7de7fa08d287fa6c511133fdad03096234b": { "authors": [ { "ids": [ "1771311" ], "name": "Eduardo Roloff" }, { "ids": [ "1734736" ], "name": "Matthias Diener" }, { "ids": [ "34361236" ], "name": "Emmanuell Diaz Carre\u00f1o" }, { "ids": [ "2401224" ], "name": "Luciano Paschoal Gaspary" }, { "ids": [ "1728532" ], "name": "Philippe Olivier Alexandre Navaux" } ], "doi": "10.1007/978-3-319-64203-1_29", "doiUrl": "https://doi.org/10.1007/978-3-319-64203-1_29", "entities": [], "id": "fcc5b7de7fa08d287fa6c511133fdad03096234b", "inCitations": [ "7904da9fd28ef689e75644f85805d0db8b3b0ca7" ], "journalName": "", "journalPages": "399-411", "journalVolume": "", "outCitations": [], "paperAbstract": "", "pdfUrls": [ "https://doi.org/10.1007/978-3-319-64203-1_29" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fcc5b7de7fa08d287fa6c511133fdad03096234b", "sources": [ "DBLP" ], "title": "Leveraging Cloud Heterogeneity for Cost-Efficient Execution of Parallel Applications", "venue": "Euro-Par", "year": 2017 }, "fce46b792412c5a6b909af613a84f138489d6b96": { "authors": [ { "ids": [ "1784610" ], "name": "Vincenzo Bonifaci" }, { "ids": [ "7387533" ], "name": "Gianlorenzo D'Angelo" }, { "ids": [ "1809037" ], "name": "Alberto Marchetti-Spaccamela" } ], "doi": "10.1109/IPDPS.2017.22", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.22", "entities": [ "Algorithm", "Integer programming", "Job shop scheduling", "Jumpstart Our Business Startups Act", "Linear programming", "Linear programming formulation", "Linear programming relaxation", "Makespan", "Parallel computing", "Polynomial", "Relaxation (approximation)", "Scheduling (computing)", "Time complexity" ], "id": "fce46b792412c5a6b909af613a84f138489d6b96", "inCitations": [ "c1aa60e73f280517009b526a405b708a13dc588b" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "738-747", "journalVolume": "", "outCitations": [ "1b2e779f52b56075f1b335feb966e0145007e7c2", "8b5be3d0cd2af1dd12d789fe0184ca40a0ab52aa", "346c5896ff2032d7c7a8400cbbd3bd2f61c72f1a", "1d5e81244451dc58a6e6d4c9d2b8fbff6f55e10b", "617c9bb17977d6b3c5ddb9b840335ab0f7d06286", "423015df3d9e07b096df1af1b9866e82f06d8a30", "91c8d42a946110db6ba10587c1d40d10c12661f9", "41a0038093964547b276c9024d0e4303047b49ee", "647521689cb5707f1ef04a62a8b6968bf607d66d", "f70e59a27b140f71c80bc847e5c6145bc59dcf12", "1324538f0a99bb894f9696b49f6de4b558a55e2a", "4eef23f103ae360f42c40a89c61866eb98170678", "7dad526fc681a0808b0163c29d8e4bd92785b952", "9e45d6794e7bad6aed0976bcf82c6bc82222e494", "97df40ca2bd0d3cc9e69da748ea2ef1ebcae632b", "b07fd11d982244b9202226b3c8536d2f9a95cbda", "33e69f20c924f78722b783daebdd248c5f05e41e", "585ee575fe2411b6bfe444e5096b53f96aecd9f4", "1de4b557f3178c98366afc2b18fc2ec6dce2d5ac", "d2b1288cf26f523b89db5d04f390aacb1ccfa00e", "25c4dcffc6bc69b0885587aff9acb9f2dd949c07", "008b490697d36e43dc2df656efff524bedcf076f" ], "paperAbstract": "We propose a model for scheduling jobs in a parallel machine setting that takes into account the cost of migrations by assuming that the processing time of a job may depend on the specific set of machines among which the job is migrated. For the makespan minimization objective, the model generalizes classical scheduling problems such as unrelated parallel machine scheduling, as well as novel ones such as semi-partitioned and clustered scheduling. In the case of a hierarchical family of machines, we derive a compact integer linear programming formulation of the problem and leverage its fractional relaxation to obtain a polynomial-time 2-approximation algorithm. Extensions that incorporate memory capacity constraints are also discussed.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.22" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fce46b792412c5a6b909af613a84f138489d6b96", "sources": [ "DBLP" ], "title": "Algorithms for Hierarchical and Semi-Partitioned Parallel Scheduling", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "fd46807bebde44d6011f3cf92a8d19108b2bab8b": { "authors": [ { "ids": [ "3412543" ], "name": "Siddharth Rai" }, { "ids": [ "6226754" ], "name": "Mainak Chaudhuri" } ], "doi": "10.1109/IPDPSW.2017.37", "doiUrl": "https://doi.org/10.1109/IPDPSW.2017.37", "entities": [ "3D computer graphics", "Algorithm", "Central processing unit", "Deferred shading", "DirectX", "Graphics processing unit", "OpenGL", "Quality of service", "Simulation", "Systems management", "Tiled rendering" ], "id": "fd46807bebde44d6011f3cf92a8d19108b2bab8b", "inCitations": [ "37320aaa5cc1331224250fb9d0fb253c9c3844c8", "b3cf5c1a5032d0268c412bb4d9e45ebbbcc69fbe" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "journalPages": "18-29", "journalVolume": "", "outCitations": [ "12203385fbe8e26aefa1d82c9effaacb44f27a98", "0b885bb186445ee0c50277d990eca18c53fef09b", "c3da4e02b8a474d6f094f1e5ac435049e0fe3e49", "8cfa975a656838356dc4b211b6c2186bc2601a05", "0036adadc90e4826b2f7fc157752eea459070c32", "34b44a9e55184b48c94a15f29f052941b342e8bf", "2ae2d80ffb19521bcd7fdbf26e9ed2a5d9641bb0", "540a65f5e2176c4000551f1335a24e0f07500f68", "bf277908733127ada3acf7028947a5eb0e9be38b", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "2362d702b64b2f6a549155fe34a542524693d938", "3370784dacf9df1e54384190dad40b817520ba3a", "556ae96ff77bd7a8f8365f9a0e3e34aa9a55206d", "8bd32accc1244ba9add521ca5605f397374de518", "0ca2b92a4f992b35683c7fffcd49b4c883772a29", "8383b7f6f4f9556e522f735a0fd7b8c9e11e613b", "26e72340c47b7348e1b1de285f89dd96cc925b27", "0d075dae4e4ca9cabef40f9bec4c953ccfc31113", "26512755e7f78e10390b409ed4de3378aba2bac8", "02ebdcf8200135ec0433e12e4ef2459ac740370b", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "7ad74b0ae0271a79d81265afc6c8a93e5bb95701", "013b529f4ec9c1d9ddcef88a1a1f4b0efcc0c9c5", "67bf737ceccf387cdd05c379487da8301f55e93d", "792aa5a81ac1d344de450ec59eec339aa0e508aa", "b6674e59a8075a9352797f6a003017e638ab19f1", "471838a43e0e31df1c0a6572dc50d1326adfc4c3", "1eeb50d5f7937f65a910203ae61430ff8b969012", "627f7a67e64e19bb30de1e83059a190b9de1f8d0", "00ab25c6582d543932fccbb0f15fe93445f95d61" ], "paperAbstract": "Heterogeneous chip-multiprocessors with integrated CPU and GPU cores on the same die allow sharing of critical memory system resources among the applications executing on the twotypes of cores. In this paper, we explore memory system management driven by the quality of service (QoS) requirement of the GPU applications executing simultaneously with CPUapplications in such heterogeneous platforms. Our proposal dynamically estimates the level of QoS (e.g., frame rate in 3D scene rendering) of the GPU application. Unlike the priorproposals, our algorithm does not require any profile information and does not assume tile-based deferred rendering. If the estimated quality of service meets the minimum acceptable QoS level, our proposal employs a light-weight mechanism to dynamically adjust the GPU memory access rate so that the GPU is able to just meet the required QoS level. This frees up memory system resources which can be shifted to the co-running CPU applications. Detailed simulations done on a heterogeneous chip-multiprocessor with one GPU and four CPU cores running heterogeneous mixes of DirectX, OpenGL, and CPU applications show that our proposal improves the CPU performance by 18% on average.", "pdfUrls": [ "https://doi.org/10.1109/IPDPSW.2017.37" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fd46807bebde44d6011f3cf92a8d19108b2bab8b", "sources": [ "DBLP" ], "title": "Improving CPU Performance Through Dynamic GPU Access Throttling in CPU-GPU Heterogeneous Processors", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)", "year": 2017 }, "fd51bef5d4c12bb1fa49457e5a44fef0b8bc1295": { "authors": [ { "ids": [ "1695576" ], "name": "Joseph M. Hellerstein" }, { "ids": [ "7905466" ], "name": "Vikram Sreekanti" }, { "ids": [ "30503077" ], "name": "Joseph Gonzalez" }, { "ids": [ "39817819" ], "name": "James Dalton" }, { "ids": [ "37033419" ], "name": "Akon Dey" }, { "ids": [ "7529854" ], "name": "Sreyashi Nag" }, { "ids": [ "17341438" ], "name": "Krishna Ramachandran" }, { "ids": [ "32577542" ], "name": "Sudhanshu Arora" }, { "ids": [ "29361335" ], "name": "Arka Bhattacharyya" }, { "ids": [ "2640806" ], "name": "Shirshanka Das" }, { "ids": [ "7717979" ], "name": "Mark Donsky" }, { "ids": [ "39394560" ], "name": "Gabriel Fierro" }, { "ids": [ "1889439" ], "name": "Chang She" }, { "ids": [ "37841313" ], "name": "Carl Steinbach" }, { "ids": [ "38394852" ], "name": "Venkat Subramanian" }, { "ids": [ "2592571" ], "name": "Eric Sun" } ], "doi": "", "doiUrl": "", "entities": [ "Agile software development", "Apache Hive", "Application programming interface", "Best practice", "Big data", "Business informatics", "Classless Inter-Domain Routing", "Clean Slate Program", "Column (database)", "Column-oriented DBMS", "Compiler", "Computation", "Confidentiality", "Context (computing)", "Counterfactual definiteness", "Daily build", "Data dictionary", "Data governance", "Data lineage", "Data science", "Data system", "Database", "Database engine", "Dataflow programming", "Debugging", "Decoupling (electronics)", "Dictionary", "Download", "Downstream (software development)", "Email", "Experience", "Exploratory testing", "Graphical user interface", "Institute for Operations Research and the Management Sciences", "Interaction", "Issue tracking system", "JSON", "Janet basis", "Legacy system", "Machine learning", "Master data management", "Metamodeling", "Nonlinear gameplay", "Numerical analysis", "Ontology (information science)", "Open-source software", "Out-of-order execution", "Pipeline (computing)", "Python", "RSS", "Recommender system", "Requirement", "Risk aversion", "SARA (computer)", "SQL", "Scheduling (computing)", "Sentiment analysis", "Service layer", "Social media", "Social network", "Software release life cycle", "Software versioning", "Source data", "Statistical model", "Stemming", "Table (database)", "Test set", "Upstream (software development)", "Usage data", "User interface", "Value (ethics)", "Velocity", "X86" ], "id": "fd51bef5d4c12bb1fa49457e5a44fef0b8bc1295", "inCitations": [ "b904c2cbe34598bf52f82a8da8b2b02fefd791c5", "9dce39920dd4d6d62bff9e8632751f8e2d39eb20", "cd2d36ce0b17776fc34baa1eb3787004816ce352", "bde4461479169263e4e6123909c382cbfeb58d95", "49452c42619790d44f87897c6f853df769ed4967", "350203890dc2ea7535e876666e33eb7ec9323bef", "7831bc987940bbf060ca4ea18cecb5ae5ed21186", "678f12849b2b0ce95a1a01961e73a8397e03c025", "093c3b389384812ea16f1ad18ce6c5f43c4f7106", "2274f61d00020b0e596b61e113ed16f23f8c0403", "01c71603c2e9c42eb3141dd32d0ee564fb5274b4", "45cc47c1beaad4e08a85c7dfc69cb10913f824ed", "ee507c14856a8c94e140455b444cbce7a6b65779", "2803444c220b98233655724c1214a4c75f3d6523" ], "journalName": "", "journalPages": "", "journalVolume": "", "outCitations": [ "24d8a2c600076c878d05930b8501cf63d2100387", "a8df910c209b06f88e80f227fd59e52127e1a59d", "0fb2ab7176f91e34061b128c86ef100401a1b037", "fbefba9de2a6b8a0756d6adda4b0b69ab3126469", "3beeb3ef6d9efa6a5cd51205f5e9b1bae6f20673", "31a816f4fef768f29772a003e534b1378611bfe6", "00a3f6924f90fcd77e6e7e6534b957a75d0ced07", "26deee037b221bd05ed34461819f5c067b745445", "6ecae892fef17f11a67db5a0e9cd2834e3f3aa73", "5bbfe121191dc91970aa24245992deb72f5ccdbe", "3176be871d8199bd9bbbfd2a4064d195cc334ca8", "24632708434a9a26abcb84a8464da7c13547ca85", "9a641e3730fab824e5ff988107794cb0b54943fc", "0e33c7bb8b1626d00483fd34aab16403bf1a0e9c", "6bbaf76d82968a4349f7f043ece649c8ac1fbc0c", "03ad81f6276792a78312471429fc9495b89a1ffc", "ddd526d70a299c782d31d8625d789b083c32dd8b", "515b9903cb55e548b6732e953a1bd51f457c6353", "c0a265fb60dffc3c27da8815fa8ab7f46a2652c8", "6733d561eb0ffa6b69c305a4a657ba0aa8d0039e", "8d319e51946e5f45ed3500a8e04527cec9c21dcc", "09cacb2d068d605e6f8148b173524094a41670d5", "6f54a7933235ced5684e3bff18f7e5dc40510018" ], "paperAbstract": "Ground is an open-source data context service, a system to manage all the information that informs the use of data. Data usage has changed both philosophically and practically in the last decade, creating an opportunity for new data context services to foster further innovation. In this paper we frame the challenges of managing data context with basic ABCs: Applications, Behavior, and Change. We provide motivation and design guidelines, present our initial design of a common metamodel and API, and explore the current state of the storage solutions that could serve the needs of a data context service. Along the way we highlight opportunities for new research and engineering solutions. 1. FROM CRISIS TO OPPORTUNITY Traditional database management systems were developed in an era of risk-averse design. The technology itself was expensive, as was the on-site cost of managing it. Expertise was scarce and concentrated in a handful of computing and consulting firms. Two conservative design patterns emerged that lasted many decades. First, the accepted best practices for deploying databases revolved around tight control of schemas and data ingest in support of general-purpose accounting and compliance use cases. Typical advice from data warehousing leaders held that \u201cThere is no point in bringing data . . . into the data warehouse environment without integrating it\u201d [15]. Second, the data management systems designed for these users were often built by a single vendor and deployed as a monolithic stack. A traditional DBMS included a consistent storage engine, a dataflow engine, a language compiler and optimizer, a runtime scheduler, a metadata catalog, and facilities for data ingest and queueing\u2014all designed to work closely together. As computing and data have become orders of magnitude more efficient, changes have emerged for both of these patterns. Usage is changing profoundly, as expertise and control shifts from the central accountancy of an IT department to the domain expertise of \u201cbusiness units\u201d tasked with extracting value from data [12]. The changes in economics and usage brought on the \u201cthree Vs\u201d of Big Data: Volume, Velocity and Variety. Resulting best practices focus on open-ended schema-on-use data \u201clakes\u201d and agile development, This article is published under a Creative Commons Attribution License (http://creativecommons.org/licenses/by/3.0/), which permits distribution and reproduction in any medium as well allowing derivative works, provided that you attribute the original work to the author(s) and CIDR 2017. CIDR \u201917 January 8-11, 2017, Chaminade, CA, USA in support of exploratory analytics and innovative application intelligence [26]. Second, while many pieces of systems software that have emerged in this space are familiar, the overriding architecture is profoundly different. In today\u2019s leading open source data management stacks, nearly all of the components of a traditional DBMS are explicitly independent and interchangeable. This architectural decoupling is a critical and under-appreciated aspect of the Big Data movement, enabling more rapid innovation and specialization. 1.1 Crisis: Big Metadata An unfortunate consequence of the disaggregated nature of contemporary data systems is the lack of a standard mechanism to assemble a collective understanding of the origin, scope, and usage of the data they manage. In the absence of a better solution to this pressing need, the Hive Metastore is sometimes used, but it only serves simple relational schemas\u2014a dead end for representing a Variety of data. As a result, data lake projects typically lack even the most rudimentary information about the data they contain or how it is being used. For emerging Big Data customers and vendors, this Big Metadata problem is hitting a crisis point. Two significant classes of end-user problems follow directly from the absence of shared metadata services. The first is poor productivity. Analysts are often unable to discover what data exists, much less how it has been previously used by peers. Valuable data is left unused and human effort is routinely duplicated\u2014particularly in a schema-on-use world with raw data that requires preparation. \u201cTribal knowledge\u201d is a common description for how organizations manage this productivity problem. This is clearly not a systematic solution, and scales very poorly as organizations grow. The second problem stemming from the absence of a system to track metadata is governance risk. Data management necessarily entails tracking or controlling who accesses data, what they do with it, where they put it, and how it gets consumed downstream. In the absence of a standard place to store metadata and answer these questions, it is impossible to enforce policies and/or audit behavior. As a result, many administrators marginalize their Big Data stack as a playpen for non-critical data, and thereby inhibit both the adoption and the potential of new technologies. In our experiences deploying and managing systems in production, we have seen the need for a common service layer to support the capture, publishing and sharing of metadata information in a flexible way. The effort in this paper began by addressing that need. 1.2 Opportunity: Data Context The lack of metadata services in the Big Data stack can be viewed as an opportunity: a clean slate to rethink how we track and leverage modern usage of data. Storage economics and schema-on-use agility suggest that the Data Lake movement could go much farther than Data Warehousing in enabling diverse, widely-used central repositories of data that can adapt to new data formats and rapidly changing organizations. In that spirit, we advocate rethinking traditional metadata in a far more comprehensive sense. More generally, what we should strive to capture is the full context of data. To emphasize the conceptual shifts of this data context, and as a complement to the \u201cthree Vs\u201d of Big Data, we introduce three key sources of information\u2014the ABCs of Data Context. Each represents a major change from the simple metadata of traditional enterprise data management. Applications: Application context is the core information that describes how raw bits get interpreted for use. In modern agile scenarios, application context is often relativistic (many schemas for the same data) and complex (with custom code for data interpretation). Application context ranges from basic data descriptions (encodings, schemas, ontologies, tags), to statistical models and parameters, to user annotations. All of the artifacts involved\u2014wrangling scripts, view definitions, model parameters, training sets, etc.\u2014are critical aspects of application context. Behavior: This is information about how data was created and used over time. In decoupled systems, behavioral context spans multiple services, applications and formats and often originates from highvolume sources (e.g., machine-generated usage logs). Not only must we track upstream lineage\u2014 the data sets and code that led to the creation of a data object\u2014we must also track the downstream lineage, including data products derived from this data object. Aside from data lineage, behavioral context includes logs of usage: the \u201cdigital exhaust\u201d left behind by computations on the data. As a result, behavioral context metadata can often be larger than the data itself. Change: This is information about the version history of data, code and associated information, including changes over time to both structure and content. Traditional metadata focused on the present, but historical context is increasingly useful in agile organizations. This context can be a linear sequence of versions, or it can encompass branching and concurrent evolution, along with interactions between co-evolving versions. By tracking the version history of all objects spanning code, data, and entire analytics pipelines, we can simplify debugging and enable auditing and counterfactual analysis. Data context services represent an opportunity for database technology innovation, and an urgent requirement for the field. We are building an open-source data context service we call Ground, to serve as a central model, API and repository for capturing the broad context in which data gets used. Our goal is to address practical problems for the Big Data community in the short term and to open up opportunities for long-term research and innovation. In the remainder of the paper we illustrate the opportunities in this space, design requirements for solutions, and our initial efforts to tackle these challenges in open source. 2. DIVERSE USE CASES To illustrate the potential of the Ground data context service, we describe two concrete scenarios in which Ground can aid in data discovery, facilitate better collaboration, protect confidentiality, help diagnose problems, and ultimately enable new value to be captured from existing data. After presenting these scenarios, we explore the design requirements for a data context service. 2.1 Scenario: Context-Enabled Analytics This scenario represents the kind of usage we see in relatively technical organizations making aggressive use of data for machinelearning driven applications like customer targeting. In these organizations, data analysts make extensive use of flexible tools for data preparation and visualization and often have some SQL skills, while data scientists actively prototype and develop custom software for machine learning applications. Janet is an analyst in the Customer Satisfaction department at a large bank. She suspects that the social network behavior of customers can predict if they are likely to close their accounts (customer churn). Janet has access to a rich context-service-enabled data lake and a wide range of tools that she can use to assess her hypothesis. Janet begins by downloading a free sample of a social media feed. She uses an advanced data catalog application (we\u2019ll call it \u201cCatly\u201d) which connects to Ground, recognizes the content of her sample, and notifies her that the bank\u2019s data lake has a complete feed from the previous month. She then begins using Catly to search the lake for data on customer retention: what is available, and who has access to it? As Janet explores candidate schemas and data samples, Catly retrieves usage data from Ground and notifies her that Sue, from the data-science team, had previously used a database table called cust_roster as input to a Python library called cust_churn. Examining a sample from cust_roster and knowing of Sue\u2019s domain expertise, Janet decides to work with that table in her own churn analysis. Having collected the necessary data, Janet turns to a data preparation application (\u201cPreply\u201d) to clean and transform the data. The social media data is a JSON document; Preply searches Ground for relevant wrangling scripts and suggests unnesting attributes and pivoting them into tables. Based on security information in Ground, Preply warns Janet that certain customer attributes in her table are protected and may not be used for customer retention analysis. Finally, to join the social media names against the customer names, Preply uses previous wrangling scripts registered with Ground by other analysts to extract standardized keys and suggest join conditions to Janet. Having prepared the data, Janet loads it into her BI charting tool and discovers a strong correlation between customer churn and social sentiment. Janet uses the \u201cshare\u201d feature of the BI tool to send it to Sue; the tool records the share in Ground. Sue has been working on a machine learning pipeline for automated discount targeting. Janet\u2019s chart has useful features, so Sue consults Ground to find the input data. Sue joins Janet\u2019s dataset into her existing training data but discovers that her pipeline\u2019s prediction accuracy decreases. Examining Ground\u2019s schema for Janet\u2019s dataset, Sue realizes that the sentiment column is categorical and needs to be pivoted into indicator columns isPositive, isNegative, and isNeutral. Sue writes a Python script to transform Janet\u2019s data into a new file in the required format. She trains a new version of the targeting model and deploys it to send discount offers to customers at risk of leaving. Sue registers her training pipeline including Janet\u2019s social media feeds in the daily build; Ground is informed of the new code versions and service registration. After several weeks of improved predictions, Sue receives an alert from Ground about changes in Janet\u2019s script; she also sees a notable drop in prediction accuracy of her pipeline. Sue discovers that some of the new social media messages are missing sentiment scores. She queries Ground for the version of the data and pipeline code when sentiment scores first went missing. Upon examination, she sees that the upgrade to the sentiment analysis code produced new categories for which she doesn\u2019t have columns (e.g., isAngry, isSad, . . . ). Sue uses Ground to roll back the sentiment analysis code in Janet\u2019s pipeline and re-run her pipeline for the past month. This fixes Sue\u2019s problem, but Sue wonders if she can simply roll back Janet\u2019s scripts in production. Consulting Ground, Sue discovers that other pipelines now depend upon the new version of Janet\u2019s scripts. Sue calls a meeting with the relevant stakeholders to untangle the situation. Throughout our scenario, the users and their applications benefited from global data context. Applications like Catly and Preply were able to provide innovative features by mining the \u201ctribal knowledge\u201d captured in Ground: recommending datasets and code, identifying experts, flagging security concerns, notifying developers of changes, etc. The users were provided contextual awareness of both technical and organizational issues and able to interrogate global context to understand root causes. Many of these features exist in isolated applications today, but would work far better with global context. Data context services make this possible, opening up opportunities for innovation, efficiency and better governance. 2.2 Scenario: Big Data in Enterprise IT Many organizations are not as technical as the one in our previous scenario. We received feedback on an early draft of this paper from an IT executive at a global financial services firm (not affiliated with the authors), who characterized both Janet and Sue as \u201cdevelopers\u201d not analysts. (\u201cIf she knows what JSON is, she\u2019s a developer!\u201d) In his organization, such developers represent less than 10% of the data users. The remaining 90% interact solely with graphical interfaces. However, he sees data context offering enormous benefits to his organization. Here we present an illustrative enterprise IT scenario. Mark is an Data Governance manager working in the IT department of a global bank. He is responsible for a central data warehouse, and the legacy systems that support it, including ExtractTransform-Load (ETL) mappings for loading operational databases into the warehouse, and Master Data Management (MDM) systems for governing the \u201cgolden master\u201d of various reference data sets (customers, partner organizations, and so on.) Recently, the bank decided to migrate off of these systems and onto a Big Data stack, to accomodate larger data volumes and greater variety of data. In so doing, they rewrote many of their workflows; the new workflows register their context in Ground. Sara is an analyst in the bank\u2019s European Compliance office; she uses Preply to prepare monthly reports for various national governments demonstrating the firm\u2019s compliance with regulations like Basel III [33]. As Sara runs this month\u2019s AssetAllocation report, she sees that a field called IPRE_AUSNZ came back with a very small value relative to other fields prefixed with IPRE. She submits a request to the IT department\u2019s trouble ticket system (\u201cHelply\u201d) referencing the report she ran, asking \u201cWhat is this field? What are the standard values? If it is unusual, can you help me understand why?\u201d Mark receives the ticket in his email, and Helply stores an association in Ground between Sara and AssetAllocation. Mark looks in Ground at summary statistics for the report fields over time, and confirms that the value in that field is historically low by an order of magnitude. Mark then looks at a \u201cdata dictionary\u201d of reference data in Ground and sees that IPRE was documented as \u201cIncome-Producing Real Estate\u201d. He looks at lineage data in Ground and finds that the IPRE_AUSNZ field in the report is calculated by a SQL view aggregating data from both Australia and New Zealand. He also looks at version information for the view behind AssetAllocation, and finds that the view was modified on the second day of the month to compute two new fields, IPRE_AUS and IPRE_NZ that separate the reporting across those geographies. Mark submits a response in Helply that explains this to Sara. Armed with that information, Sara uses the Preply UI to sum all three fields into a single cell representing the IPRE calculation for the pair of countries over the course of the full month. Based on the Helply association, Sara is subscribed automatically to an RSS feed associated with AssetAllocation. In future, Sara will automatically learn about changes that affect the report, thanks to the the new workloads from Mark\u2019s team that autogenerate data lineage in Ground. Mark\u2019s team takes responsibility for upstream reporting of version changes to data sources (e.g. reference data) and code (ETL scripts, warehouse queries, etc), as well as the data lineage implicit in that code. Using that data lineage, a script written by Mark\u2019s team auto-computes downstream Helply alerts for all data products that depend transitively on a change to upstream data and scripts. In this scenario, both the IT and business users benefit from various kinds of context stored in Ground, including statistical data profiles, data dictionaries, field-level data lineage, code version history, and (transitive) associations between people, data, code and their versions. Our previous data science use cases largely exploited statistical and probabilistic aspects of context (correlations, recommendations); in this scenario, the initial motivation was quantitative, but the context was largely used in more deterministic and discrete ways (dependencies, definitions, alerts). Over time time, we believe organizations will leverage data context using both deterministic and probabilistic approaches. 3. DESIGN AND ARCHITECTURE In a decoupled architecture of multiple applications and backend services, context serves as a \u201cnarrow waist\u201d\u2014a single point of access for the basic information about data and its usage. It is hard to anticipate the breadth of applications that could emerge. Hence we were keen in designing Ground to focus on initial decisions that could enable new services and applications in future. 3.1 Design Requirements In our design, we were guided by Postel\u2019s Law of Robustness from Internet architecture: \u201cBe conservative in what you do, be liberal in what you accept from others.\u201d Guided by this philosophy, we identified four central design requirements for a successful data", "pdfUrls": [ "http://cidrdb.org/cidr2017/papers/p111-hellerstein-cidr17.pdf" ], "pmid": "", "s2PdfUrl": "http://pdfs.semanticscholar.org/fd51/bef5d4c12bb1fa49457e5a44fef0b8bc1295.pdf", "s2Url": "https://semanticscholar.org/paper/fd51bef5d4c12bb1fa49457e5a44fef0b8bc1295", "sources": [ "DBLP" ], "title": "Ground: A Data Context Service", "venue": "CIDR", "year": 2017 }, "fd6bb13962dcef8fd92ede6ed2be8f474eacda7c": { "authors": [ { "ids": [ "2144577" ], "name": "Lifeng Nai" }, { "ids": [ "32052175" ], "name": "Ramyad Hadidi" }, { "ids": [ "33597715" ], "name": "Jaewoong Sim" }, { "ids": [ "3194681" ], "name": "Hyojong Kim" }, { "ids": [ "39708396" ], "name": "Pranith Kumar" }, { "ids": [ "8187053" ], "name": "Hyesoon Kim" } ], "doi": "10.1109/HPCA.2017.54", "doiUrl": "https://doi.org/10.1109/HPCA.2017.54", "entities": [ "Cube", "Data science", "Emergence", "Hybrid Memory Cube", "In-memory database", "Instruction-level parallelism", "Linearizability", "Overhead (computing)", "Speedup" ], "id": "fd6bb13962dcef8fd92ede6ed2be8f474eacda7c", "inCitations": [ "04984fc1683186a526917575c435733e9311ff6e", "0c4fe1f1a8043e8f4175b21faca1b72bff8033e6", "5fa68bb091d5a46c67c341d9c3d3b37442431abb", "6b6a5f2127b5ffbccd54d4823a9ca3a73969f3d1", "82d9d4cae6cb82ad516371414cc53da6ec9708b0", "8c78f219d47da47671fd5cd4c002011ab53ac539", "348119d77d127dba6058802c12f98f06c8849f3d", "651ae380b5d500c613770dbf55c175c52576d7da", "0231ffa4b9b095efbf0f302898cd7abd7dd0b764", "64cc548a10a175e2dfb72df3457b56b8c2499925", "1832398bc25d1bcf9ebf1fc385574ce40c823ac8", "9b3664ba2ddaa276f3f2b1212a44dc0b33735841", "69066cab334c397ffab6a84bad581e007d021afc" ], "journalName": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "journalPages": "457-468", "journalVolume": "", "outCitations": [ "6f090d59bde17b7604985acf38e26785e794bcc0", "dd5f64f4b0a83ce5655d64f5955c7deec91073c8", "1156f60e40548096df49528b1342bb3e88b0f378", "045a975c1753724b3a0780673ee92b37b9827be6", "417ab9b8b003982222017ef585e19680366609f3", "bf70d60fc8d1de5fa53e8220a014fe463de4b7e5", "01e6176d319e3bfecc3667447c5bbaf2d00b9c7c", "4339f17c10b91d2def6e16ab981d7b5428e6d82c", "3230c6025956c2d3fd11971e0d30b690e3078a1e", "1603a6c37d0ad975b59868fc821dd4a03f4152cf", "352a8957005dc5519b15ed1870751ec494d66395", "1ddcc37ae33b4dea4fe74a0b83f48809f2ea01d8", "da8b0378174bc25ed174be36a1c725787b81854d", "07c3c9f1be15b52e259309b2e0a9ca71fa7dc76f", "8b04ea524cb6ced72868c120a00c4679d84be006", "0693ff4b3a8d1452b897a876d3ffe6b2074e98e4", "091aded505b84cf87c197875ccfde24d98a300c9", "5c71f2e8ab879bf508002d8f2e29c0f21317f3e9", "00ab25c6582d543932fccbb0f15fe93445f95d61", "2d680892a7318ab7eff879054ea7ab6aeeb51fe9", "a8fb0fb53ea915b9f0d979cbab72f2f2b04aa4f3", "17810349765c08963af130efe28b6a6b77b7ec51", "2b0cc03aa4625a09958c20dc721f4e0a52c13fd0", "62bfdedb87d1fed25eb5aa1bc6ff546c70a0ba6a", "3b621e9a6b99f32caa518116cb400035d1deed29", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "793066b8dec9ad2780f08345ad7522d0bbe206ce", "72dfed87d3549369ae93dbbbfd371f88c4e344c8", "8e6681c2307b9b875ea580b89b94b405aa63e78e", "9448d2f1f8260da11a0b5d9b27ee6c8eb916d8ee", "9d30381c49afa033eacc04fb68975762eb7bafab", "0020b1fa0957c6cf5e6358c94c3a72d96af33390", "c7341dcd5e6c71b149edf808331ab1e8b37cd03b", "023ba3dff9e17a15ae8448ec6cacc3e9a5ff116a", "968ab6ac401321fb403d746168a53f6dc96ccaf0", "254ded254065f2d26ca24ec024cefd7604bd74e7", "24e8be45a2b2a30a01b7e9f1502e7bd6a7870e7a", "48a7323c4894de3afb90ef2135160205ebb55011", "0763e8bee8d59588ce35705ef3e58b5d601d2ae6", "93bae7155092c8ba1ae1c4ad9f30ae1b7c829dd7", "4e8505919eb22265f107ebbeeee3fa78bf6d893a", "069eafae5ee9df25ff5c457bb636f73b98d8f6e9", "0fc3098d4413dd75ef750c8dddf6cbe87ea9d8d7", "0fca220343f411c7dac67b1f5fc1bcf5790cc030" ], "paperAbstract": "With the emergence of data science, graph computing has become increasingly important these days. Unfortunately, graph computing typically suffers from poor performance when mapped to modern computing systems because of the overhead of executing atomic operations and inefficient utilization of the memory subsystem. Meanwhile, emerging technologies, such as Hybrid Memory Cube (HMC), enable the processing-in-memory (PIM) functionality with offloading operations at an instruction level. Instruction offloading to the PIM side has considerable potentials to overcome the performance bottleneck of graph computing. Nevertheless, this functionality for graph workloads has not been fully explored, and its applications and shortcomings have not been well identified thus far. In this paper, we present GraphPIM, a full-stack solution for graph computing that achieves higher performance using PIM functionality. We perform an analysis on modern graph workloads to assess the applicability of PIM offloading and present hardware and software mechanisms to efficiently make use of the PIM functionality. Following the real-world HMC 2.0 specification, GraphPIM provides performance benefits for graph applications without any user code modification or ISA changes. In addition, we propose an extension to PIM operations that can further bring performance benefits for more graph applications. The evaluation results show that GraphPIM achieves up to a 2.4X speedup with a 37% reduction in energy consumption.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HPCA.2017.54" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fd6bb13962dcef8fd92ede6ed2be8f474eacda7c", "sources": [ "DBLP" ], "title": "GraphPIM: Enabling Instruction-Level PIM Offloading in Graph Computing Frameworks", "venue": "2017 IEEE International Symposium on High Performance Computer Architecture (HPCA)", "year": 2017 }, "fd794d5785fae299e0916daea42cb277875780f0": { "authors": [ { "ids": [ "40147091" ], "name": "Dongyao Wu" }, { "ids": [ "1783693" ], "name": "Sherif Sakr" }, { "ids": [ "8301531" ], "name": "Liming Zhu" }, { "ids": [ "5279904" ], "name": "Huijun Wu" } ], "doi": "", "doiUrl": "", "entities": [ "Apache Hadoop", "Big data", "Centralisation", "Cloud computing", "Data center", "Jumpstart Our Business Startups Act", "Mobile computing", "Requirement", "SPARK", "Scheduling (computing)" ], "id": "fd794d5785fae299e0916daea42cb277875780f0", "inCitations": [], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "218-227", "journalVolume": "", "outCitations": [ "3a043714354fe498752b45e4cf429dbae0fb2558", "c67898e699c9750e2c84ae46dec4253ea05c3aaa", "3bd6bc388dea99b023c6695bd287eac8f5d28c0a", "2000c8bc2f5bbf1f2a579726d84368d911a20bb0", "96cc70a28d0d0c6cc1f6f94c12a45f2d91f16c88", "187d3d8109e51b5e2c4941048b0fd6cf1d464370", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "0a68c6226e04180671a474c73fa0a2b4a154d129", "582bde977340390b95b1b97427f3e0d2bc4c02d7", "9771e382794af067f7360f1cac7b6d2a1e6dd1c4", "9a16a82bd087a90fb20b317f433258eb5eee9569", "4fd3aeb7d84eac614c9c7ccce00e1c76ce20aa52", "59227b361bd81c0581358b2fdfe38426df3feb68", "11aad3a9bd17be8bd73dbde5f084ca7b623096f3", "6365282c7b66a58f87796c801e5874f157ab82f9", "73f512de77dad7d0abe8076a856727021b9493d3", "51e29f5e4ef30088afaed0a8dbe3c1fa61d0c057" ], "paperAbstract": "Big data are increasingly collected and stored in a highly distributed infrastructures due to the development of sensor network, cloud computing, IoT and mobile computing among many other emerging technologies. In practice, the majority of existing big-data-processing frameworks (e.g., Hadoop and Spark) are designed based on the single-cluster setup with the assumptions of centralized management and homogeneous connectivity which makes them sub-optimal and sometimes infeasible to apply for scenarios that require implementing data analytics jobs on highly distributed data sets (across racks, data centers or multi-organizations). In order to tackle this challenge, we present HDM-MC, a multi-cluster big data processing framework which is designed to enable the capability of performing large scale data analytics across multi-clusters with minimum extra overhead due to additional scheduling requirements. In this paper, we present the architecture and realization of the system. In addition, we evaluate the performance of our framework in comparison to other state-of-art single cluster big data processing frameworks.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101142" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fd794d5785fae299e0916daea42cb277875780f0", "sources": [ "DBLP" ], "title": "Towards Big Data Analytics across Multiple Clusters", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "fd840a2cb6fb6918689c8374c7316dbb23847c89": { "authors": [ { "ids": [ "26323673" ], "name": "Franz Christian Heinrich" }, { "ids": [ "1877260" ], "name": "Tom Cornebize" }, { "ids": [ "2376907" ], "name": "Augustin Degomme" }, { "ids": [ "1998583" ], "name": "Arnaud Legrand" }, { "ids": [ "2573681" ], "name": "Alexandra Carpen-Amarie" }, { "ids": [ "1719340" ], "name": "Sascha Hunold" }, { "ids": [ "3164170" ], "name": "Anne-C\u00e9cile Orgerie" }, { "ids": [ "1756289" ], "name": "Martin Quinson" } ], "doi": "10.1109/CLUSTER.2017.66", "doiUrl": "https://doi.org/10.1109/CLUSTER.2017.66", "entities": [ "Benchmark (computing)", "Computation", "Cost efficiency", "Data center", "Experiment", "Message Passing Interface", "Model of computation", "Multi-core processor", "Run time (program lifecycle phase)", "SimGrid", "Simulation", "Supercomputer" ], "id": "fd840a2cb6fb6918689c8374c7316dbb23847c89", "inCitations": [], "journalName": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "journalPages": "92-102", "journalVolume": "", "outCitations": [ "14bd3627a85b658ea1b8450039df7fe0fb57379e", "75af374478be81cea1c0c15332c71f3379860691", "381c7853690a0fee6f00d2608a7779737f1365f9", "4535a96bbd868cbc578da3c512f0db2c3e4ccc2a", "dc02287acce63d8b22fb7df8676b415bf0f430ca", "f2c6a0039f99bf33fd8eccc1cc16a01c5fe0ffdd", "59c6c3ce034e5dadbc378604c7294eccec4ec47f", "995c6b5e9ee851f1b70ed85a00867eb79714c246", "c39c26d510c1a965c5f132edc989a598ca92b700", "705c20122d0f139e747c14a9879f9bb5ae65387a", "0f0a5c1844ca682979c6d2d0e9374e331cb9fd99", "ed476d78f163b569ed3f95fa7c9628cf95c28799", "30a82a63a339c1e69aac36b23900544fe9ec97bb", "b2b9114be56ff4b02877a98ac6383b174cb49143", "74911278247239eef9e591575822ab2fbd78ad8c", "4e23533b91d5ee16674ee632fcbfa5126c8ad125", "6049062a3a73d22c914e7fa8951b3b0e5f09b309", "306c1c1c05e9fb8db5ad4d0b4e715073f54de6fc", "38a0bced15718230eeec1f5ffd29ada0f4f10a7a", "42ba158ffa52f859e6849628c550fc65cd936cb1", "c4cbcaecad03438bc0639cb382997857a98e8b3d", "9bb6ee03d15def91dd6d99e6cf0dfbf503964a5a", "3ed3f14d93eb31ff003e5e799e1be811793c8834", "35a1ae598c53785ec3957e368040563ee366ecbe", "1661baf451086d8a33cc11ae390fd1c5cdd8dc40", "abff053bf48012569ef5b858fcf88ee49504dde6", "010a2d16eef8be8773ee2a73600f685ec0b2e371", "37525b2c3cc16a2fe166708a4f7081b949b1888e", "50b8906280a9e8834e9d4a6df3f9d8d069c5e2ff" ], "paperAbstract": "Monitoring and assessing the energy efficiency of supercomputers and data centers is crucial in order to limit and reduce their energy consumption. Applications from the domain of High Performance Computing (HPC), such as MPI applications, account for a significant fraction of the overall energy consumed by HPC centers. Simulation is a popular approach for studying the behavior of these applications in a variety of scenarios, and it is therefore advantageous to be able to study their energy consumption in a cost-efficient, controllable, and also reproducible simulation environment. Alas, simulators supporting HPC applications commonly lack the capability of predicting the energy consumption, particularly when target platforms consist of multi-core nodes. In this work, we aim to accurately predict the energy consumption of MPI applications via simulation. Firstly, we introduce the models required for meaningful simulations: The computation model, the communication model, and the energy model of the target platform. Secondly, we demonstrate that by carefully calibrating these models on a single node, the predicted energy consumption of HPC applications at a larger scale is very close (within a few percents) to real experiments. We further show how to integrate such models into the SimGrid simulation toolkit. In order to obtain good execution time predictions on multi-core architectures, we also establish that it is vital to correctly account for memory effects in simulation. The proposed simulator is validated through an extensive set of experiments with wellknown HPC benchmarks. Lastly, we show the simulator can be used to study applications at scale, which allows researchers to save both time and resources compared to real experiments.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/CLUSTER.2017.66" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fd840a2cb6fb6918689c8374c7316dbb23847c89", "sources": [ "DBLP" ], "title": "Predicting the Energy-Consumption of MPI Applications at Scale Using Only a Single Node", "venue": "2017 IEEE International Conference on Cluster Computing (CLUSTER)", "year": 2017 }, "fdc4ea002a98ec514ac746831ba766a9cd4be3d7": { "authors": [ { "ids": [ "11665967" ], "name": "Junming Liu" }, { "ids": [ "2274395" ], "name": "Yanjie Fu" }, { "ids": [ "2201634" ], "name": "Jingci Ming" }, { "ids": [ "1682980" ], "name": "Yong Ren" }, { "ids": [ "2640306" ], "name": "Leilei Sun" }, { "ids": [ "1707713" ], "name": "Hui Xiong" } ], "doi": "10.1145/3097983.3098049", "doiUrl": "https://doi.org/10.1145/3097983.3098049", "entities": [ "Cluster analysis", "Emergence", "Encryption", "Experiment", "Feature vector", "K-means clustering", "MIMD", "Microsoft Windows", "Random forest", "Scott continuity", "WhatsApp Messenger" ], "id": "fdc4ea002a98ec514ac746831ba766a9cd4be3d7", "inCitations": [ "06fe99766e446e59631bf908ae1a9b2118d92857", "c57368db04c60f492106b1311b92a7449ecc55f4" ], "journalName": "", "journalPages": "335-344", "journalVolume": "", "outCitations": [ "0479b7e8c433e3f18a2b6c5dedd328f0229c1566", "61e888668c571002cb8f04305f4ebababb772cfb", "20faa2ef4bb4e84b1d68750cda28d0a45fb16075", "1375c722eee6e58041f9e295042d42e43ac3428c", "5af944cd89e246fb1b062c965391763da824745f", "0ffdca083d43af01584ebf41b22dbb6edcc52313", "0d02effadc783768899038f1021460e628537537", "7a43f90cd60a5ff62572f04a2797d93d63af2c98", "1f876ecccc1cb44de2234e105fd58da05092877a", "03c88d1f3fb8963c52bef1376b64a2c882be8747", "a22623ba411377f219d6018a78aa375a2110293e", "3126ba3bed82bcfe3d19edfc9f3a7576264a2113", "0e28a308465b5c29875912fe72497491b947c774", "0d5017f05a9b4db635a013dab8dec06855a96dee", "f504e1c8fd0fb7c19c73a5759a5fa369131c573e", "edad544325d6bb8d84e205259df58c1bb2118a0f", "14e86f39831e30b4037ab99b5de5e5d86608ea16", "3f62fe7de3bf15af1e5871dd8f623db29d8f0c35", "39fba5209682efa186ff60f3313573f07bc954b5", "2ef606258486d6c32fd0b9ca54244273c21331b9", "dbd3e25f41498e1d41322cd0c87935404f7c3478", "33b152d08e6766fdaef82d03a7d999f08e28294b", "573c6cc9ad23fabd855e2a3e50842e3acf18a160", "7bc22b51b220da08eccc01c58630e79036aa9d49", "0736d0c3b5314b7bc611c692c309ac36c146df7d", "1cadb267720b8723fa417840003ac51ec56d7aa5" ], "paperAbstract": "The mobile in-App service analysis, aiming at classifying mobile internet traffic into different types of service usages, has become a challenging and emergent task for mobile service providers due to the increasing adoption of secure protocols for in-App services. While some efforts have been made for the classification of mobile internet traffic, existing methods rely on complex feature construction and large storage cache, which lead to low processing speed, and thus not practical for online real-time scenarios. To this end, we develop an iterative analyzer for classifying encrypted mobile traffic in a real-time way. Specifically, we first select an optimal set of most discriminative features from raw features extracted from traffic packet sequences by a novel Maximizing Inner activity similarity and Minimizing Different activity similarity (MIMD) measurement. To develop the online analyzer, we first represent a traffic flow with a series of time windows, which are described by the optimal feature vector and are updated iteratively at the packet level. Instead of extracting feature elements from a series of raw traffic packets, our feature elements are updated when a new traffic packet is observed and the storage of raw traffic packets is not required. The time windows generated from the same service usage activity are grouped by our proposed method, namely, recursive time continuity constrained KMeans clustering (rCKC). The feature vectors of cluster centers are then fed into a random forest classifier to identify corresponding service usages. Finally, we provide extensive experiments on real-world Internet traffic data from Wechat, Whatsapp, and Facebook to demonstrate the effectiveness and efficiency of our approach. The results show that the proposed analyzer provides high accuracy in real-world scenarios, and has low storage cache requirement as well as fast processing speed.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098049" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fdc4ea002a98ec514ac746831ba766a9cd4be3d7", "sources": [ "DBLP" ], "title": "Effective and Real-time In-App Activity Analysis in Encrypted Internet Traffic Streams", "venue": "KDD", "year": 2017 }, "fdd4cf09259974aa26a40be24cfbda792cf438c3": { "authors": [ { "ids": [ "1708641" ], "name": "Quan Chen" }, { "ids": [ "1976501" ], "name": "Hailong Yang" }, { "ids": [ "1697293" ], "name": "Minyi Guo" }, { "ids": [ "39620203" ], "name": "Ram Srivatsa Kannan" }, { "ids": [ "3348715" ], "name": "Jason Mars" }, { "ids": [ "2235128" ], "name": "Lingjia Tang" } ], "doi": "10.1145/3037697.3037700", "doiUrl": "https://doi.org/10.1145/3037697.3037700", "entities": [ "Bandwidth (signal processing)", "Cache (computing)", "Computer data storage", "Graphics processing unit", "Interference (communication)", "Memory bandwidth", "PCI Express", "Quality of service", "Resource contention", "Server (computing)", "Solo" ], "id": "fdd4cf09259974aa26a40be24cfbda792cf438c3", "inCitations": [ "ec7cd4b8be631f2000cf4a4bb059186b6fff85c6" ], "journalName": "", "journalPages": "17-32", "journalVolume": "", "outCitations": [ "2c2fa29dfbbab106f1b94cdfdc67939d3355bd30", "2c62673138ce576b53259f53a10a00e1b0b1ef66", "24bec342a0e677d9be167f24d2a824e1120b3e7a", "39a6aa81ec3e20c1d500b99b560deb039c451b83", "a5a95ad4b217cf5b2f1038753ba76fae94da1bec", "3000e77ed7282d9fb27216f3e862a3769119d89e", "02b141ddc423469afde9c99cf76028095ef28127", "092a1cf971fb8359d3293004c6f1de82f05f3afb", "76d791a34301b60f4c6c081b091fb7bdc2971435", "0f44833eb9047158221e7b3128cde1347b58ccd6", "5fe43f4d79d2829ba062fe9cf1711ee60903dd24", "251ea4c57e71bb951ff6f9fe0ff63897a298402f", "3c0bc4e9d30719269b0048d4f36752ab964145dd", "4aec1ba26ca8a42a041dc0ac6a43dc91f1b6abc1", "3ce662e1663456ce2a5b5d240112721c0d0a4582", "9f66b44a04ea83433ceeb71c75eba71245e2df80", "40bb6830ec715b52e0fcbe7566f489dd2d0c25fe", "269c24a4aad9be622b609a0860f5df80688c2f93", "26512755e7f78e10390b409ed4de3378aba2bac8", "a3d4cb8407bbb8abc9b668ceb356b9cd41f49268", "0612811b3ed9fc7ef8300e65cb70360613dab01d", "23f4f3430cd97f034563dc0a41039c5fbc58f6a3", "6d44790b6d952eff28f302998e8121f90786e3ff", "c3c244e6a07810e738c8eb3c10d652b7da0267d6", "1d286a264b233125b681e522e8f5fed596a8608c", "956886e5d439f35864bb9ea0ea89e29932330b2d", "1f1a1f0cd075cef63083c8ec15321021dbff2cfc", "54a9b924b0c951fa444c600df2f5bd41d96932f2", "4cc504da30fd273e12f28bc0cf573ff37f829f89", "1a4f15385f40d8ae503a29c4d70c5a908cf492d8", "0ca2b92a4f992b35683c7fffcd49b4c883772a29", "c81e776d24fa2dcb497db553fd9625aa644c009e", "7a978f2902460e732c50c36a171deb11733df1fc", "2451dc6bb08d2668f4a876ce94d0c15227ccab7a", "0e7148699994155cf8afae0ed943812fbb4f4b7f", "0c6c30e3052fcc01aa5ee38252d77f75322d7b3f", "c5b3f0caeba42a532a48adc80e6932c35bb26ac4", "093f488e41a142e981c395f69f4946ed2b1983a7", "04ba23e362786deee7af52b1987d956bb764ca7e", "14505c2bdd3822d7a62385121d28ba3eb36fea1d", "636c6bee41498659f8b264479f5013f7c7bc207e", "b04c9e851ae605592d693aa65f0d753b8af08feb", "362d884ff43d8c7cd6bce184944cfc04cdd57c18", "3e57c6706759e1821cde8fda933750e73aed6896", "7f01ded4bc1d3e658e7969a4ba7d262a6f7d2ed9", "fe93a312039f2f95cf54834a454013be22b597fe", "0652168cd1dfe44892ef6c42004b5dec267ac254", "109b416bdbf1739373638eb7e5b37f5d475fd40e", "590bd345ef4b8f274af3363a52b7d8f518cdc08a" ], "paperAbstract": "Guaranteeing Quality-of-Service (QoS) of latency-sensitive applications while improving server utilization through application co-location is important yet challenging in modern datacenters. The key challenge is that when applications are co-located on a server, performance interference due to resource contention can be detrimental to the application QoS. Although prior work has proposed techniques to identify \"safe\" co-locations where application QoS is satisfied by predicting the performance interference on multicores, no such prediction technique on accelerators such as GPUs.\n In this work, we present Prophet, an approach to precisely predict the performance degradation of latency-sensitive applications on accelerators due to application co-location. We analyzed the performance interference on accelerators through a real system investigation and found that unlike on multicores where the key contentious resources are shared caches and main memory bandwidth, the key contentious resources on accelerators are instead processing elements, accelerator memory bandwidth and PCIe bandwidth. Based on this observation, we designed interference models that enable the precise prediction for processing element, accelerator memory bandwidth and PCIe bandwidth contention on real hardware. By using a novel technique to forecast solo-run execution traces of the co-located applications using interference models, Prophet can accurately predict the performance degradation of latency-sensitive applications on non-preemptive accelerators. Using Prophet, we can identify \"safe\" co-locations on accelerators to improve utilization without violating the QoS target. Our evaluation shows that Prophet can predict the performance degradation with an average prediction error 5.47% on real systems. Meanwhile, based on the prediction, Prophet achieves accelerator utilization improvements of 49.9% on average while maintaining the QoS target of latency-sensitive applications.", "pdfUrls": [ "http://doi.acm.org/10.1145/3037697.3037700" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fdd4cf09259974aa26a40be24cfbda792cf438c3", "sources": [ "DBLP" ], "title": "Prophet: Precise QoS Prediction on Non-Preemptive Accelerators to Improve Utilization in Warehouse-Scale Computers", "venue": "ASPLOS", "year": 2017 }, "fe7aa435d3cd3a667b8cf31f5e327a2c4b479286": { "authors": [ { "ids": [ "38728459" ], "name": "Min Zhu" }, { "ids": [ "1794873" ], "name": "Bibo Tu" }, { "ids": [ "1725923" ], "name": "Wei Wei" }, { "ids": [ "1693956" ], "name": "Dan Meng" } ], "doi": "10.1145/3050748.3050767", "doiUrl": "https://doi.org/10.1145/3050748.3050767", "entities": [ "ARM architecture", "Commodity computing", "Decoupling (electronics)", "Hypervisor", "Juno (company)", "KVM switch", "Linux", "Linux", "Memory protection", "Page table", "Performance Evaluation", "Task Control Block", "Virtual machine", "X86" ], "id": "fe7aa435d3cd3a667b8cf31f5e327a2c4b479286", "inCitations": [], "journalName": "", "journalPages": "242-256", "journalVolume": "", "outCitations": [ "2f484f6f079faefef1a8acf26383ecdb019fd380", "39040e2b60fcb01dfed8d638f2cb66218cfdb144", "79e14bf42535966db5e056af6d8a0e0ec1f522b2", "0829638686dfef02a9ded604952173f06b1ab1aa", "56777045bdfcf386c037e1884660180dc3c255ae", "565a174a24e7f47dcd7a21f57cabc252b5692a0f", "3c4e907c07944cd55e800b4e55918adf8cb2a683", "a5fa4c155172dc544dedd0dd2cfb67d52156e906", "2b3cdf37bff57e29fb5aecc136603f16c855366b", "44a2ee04d8b939978bd892249c459aec5672412e", "0dac671dae4192cfe96d290b50cc3f1105798825", "080c336698f5d7a15169e5ad98fa62a0bbf6085c", "0b0422b5864ca1a25d6af274bad11c1b2fef1ed5", "1fb49ae43195232f0b3d1c9d534a5aa03bdd8f26", "4ce02fb69245a84d3ffceae20e596dcf0497508d", "765c5d29bce0617e78b2ec3e918e31f6e543645e", "008ff29ee4dbee79028e1017d6459347ad8f45d6", "05f70f429a7bf38efa9e457fd486cb862bd495be", "0d844dec1c0e7b56c178fbb09945001ba00e0d05", "6c0562ffc00ba7a4d2734ac039ffd181afe2008d", "089895ef5f96bdb7eed9dd54f482c22350c2f30d", "2fcdec58c1c0028e07c4823cf082fd6d3abc05dc", "36b1e5a0e61fdaf6ca5c56015db2376d87db376b", "90df476a4070cd797ef682f30a408086899ad16b", "173bd678095821c34781c6649ccc7206d346f219", "78ff3d73248e53e3abab2c3bf6e09730f04d1415", "10e0397e08f37ee5d3ba1f9e24ac9eb313c784f4", "5693c2a2c52f4905638559b2fc2b76c975806175", "24657cf3a48c7fde2f9611ebee271fecc8be9952", "ec79422e0bfdb61d8b6d2a6ec5b2dfbcab970852", "30f52a79ff53f8969ffcba19013b4a43e629875f", "2e8da51c545cbe8e62a3751a5a2b9a3beca00b43", "e1d9c149fa0d3f3014db2cd554a5602dced982cd", "8f7b97fa428c0156a8220df5034650c4c77933d2", "83029e6400054bb286c3188aee4434923f6dc9ea", "6d6bd93c620885cb5ddd5abfac19efffac132cd5", "5bddb52a9def1c1330e8139b8496fbb8bb8c5937", "85d555f7ce19740b4fc656ff797623c6e1513018", "3b06edbeb2adf0de12a6ddbec073fd96e82617c6", "5af5aa924c170d30e9203801e97ebe347700c3a9", "505ee623397666c0ce158e103ffac0c62dbcf2fa", "46bc4d7c5605e8468f4355335416e15f0d7e4dcd", "1251fe24e96d5c12f868bf4584351c0ee03d55ec", "b1b3c8f907db6748c373bf1d15ec0c15bb2307dc", "22a5eeb8608b35e371b7544a54fabeadca8866e3" ], "paperAbstract": "Once compromising the hypervisor, remote or local adversaries can easily access other customers' sensitive data in the memory and context of guest virtual machines (VMs). VM isolation is an efficient mechanism for protecting the memory of guest VMs from unauthorized access. However, previous VM isolation systems either modify hardware architecture or introduce a software module without being protected, and most of them focus on the x86 architecture.\n This paper proposes HA-VMSI, a lightweight hardware-assisted VM isolation approach for ARM, to provide runtime protection of guest VMs, even with a compromised hypervisor. In the ARM TrustZone secure world, a thin security monitor is introduced as HA-VMSI's entire TCB. Hence, the security monitor is much less vulnerable and safe from attacks that can compromise the hypervisor. The key of HA-VMSI is decoupling the functions of memory isolation among VMs from the hypervisor into the security monitor. As a result, the hypervisor can only update the Stage-2 page tables of VMs via the security monitor, which inspects and approves each new mapping. It is worth noting that HA-VMSI is more secure and effective than current software approaches, and more flexible and compatible than hardware approaches. We have implemented a prototype for KVM hypervisor with multiple Linux as guest OSes on Juno board. The security assessment and performance evaluation show that HA-VMSI is effective, efficient and practical.", "pdfUrls": [ "http://doi.acm.org/10.1145/3050748.3050767" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fe7aa435d3cd3a667b8cf31f5e327a2c4b479286", "sources": [ "DBLP" ], "title": "HA-VMSI: A Lightweight Virtual Machine Isolation Approach with Commodity Hardware for ARM", "venue": "VEE", "year": 2017 }, "fef1056a69be6f597b4866bc3ee306bf01a4df0d": { "authors": [ { "ids": [ "2309393" ], "name": "D. A. Beckingsale" }, { "ids": [ "2217777" ], "name": "Olga Pearce" }, { "ids": [ "1696759" ], "name": "Ignacio Laguna" }, { "ids": [ "33289410" ], "name": "Todd Gamblin" } ], "doi": "10.1109/IPDPS.2017.38", "doiUrl": "https://doi.org/10.1109/IPDPS.2017.38", "entities": [ "Compile time", "Compiler", "Kernel (operating system)", "Overhead code", "Parallel computing", "Run time (program lifecycle phase)", "Self-tuning", "Simulation" ], "id": "fef1056a69be6f597b4866bc3ee306bf01a4df0d", "inCitations": [ "a6a906ae9727a33409eaa207253a6ad32a871c11" ], "journalName": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "journalPages": "307-316", "journalVolume": "", "outCitations": [ "0ce898bf3f3e4af56492e9135c7c85e3917e20e8", "f8e9b050c93af6dea582563f61b6460b590bc3af", "85b5604ec9f9a33ce4220c3e861de274d4a14a03", "e65012425ff445a11728cc4922cfc09a4dfd6fd1", "8a2cfe57dadd3a541154c38de897221945ffc808", "41575c810be20f4bd7f0c33b7b3fa81d42ac137a", "0c76a904b28c775eb5f33cd982f0bfeddab353e3", "645d9e7e5e3c5496f11e0e303dc4cc1395109773", "b81fb53cc1dff847804279275ce1e3238ffe8766", "9a9f8973003098ad6065a7848a78cc9bf60926a9", "09ff457e09ae277e52441e228c0ed7a8921d67a7", "24da69e7b54dcea2873bc3b6289c36e5ca465b7e", "2dbc0a3ade971381152c5521d3a8576f1f8632e0", "a44e9366b28688fceea7d6363ff5b1aeba695e78", "a5a95ad4b217cf5b2f1038753ba76fae94da1bec", "5672ce28f2927b81b01303e4926643c55a4c8133", "6472cab2678c39e2273673968c6d7d3cfe2a62c9", "b8d5cc73054874b49c4ee1033717678719440145", "04b67bda49edadf2dc618004582a10d2dc66b6a3", "8dc2184214ee39b31e2c0d623842b66c0141984b", "1ac19f434c742202451da7c44591c52ad3f9e9fd", "2190f5b82326f4d61312aaa6e6226f1ae618fb0d", "220f5b0e74c7f1e71d6e23da672dcffbc9e6520a", "0a361ac9c017eaa73d39af7bb8f11a9fb8a5fc14", "1be16d8c557b15cdf2db9e7eb4453f2274fd60af", "1c4026d2fe957c4d5a29f2e091b9ee6609882c00", "075d460a4737d7c0b3fd4b7aa03e315f7256b1af", "0524b5c458a3eeda6b3e70fb26ac8f9431de5f93" ], "paperAbstract": "Increasing architectural diversity makes performance portability extremely important for parallel simulation codes. Emerging on-node parallelization frameworks such as Kokkos and RAJA decouple the work done in kernels from the parallelization mechanism, allowing for a single source kernel to be tuned for different architectures at compile time. However, computational demands in production applications change at runtime, and performance depends both on the architecture and the input problem, and tuning a kernel for one set of inputs may not improve its performance on another. The statically optimized versions need to be chosen dynamically to obtain the best performance. Existing auto-tuning approaches can handle slowly evolving applications effectively, but are too slow to tune highly input-dependent kernels. We developed Apollo, an auto-tuning extension for RAJA that uses pre-trained, reusable models to tune input-dependent code at runtime. Apollo is designed for highly dynamic applications; it generates sufficiently low-overhead code to tune parameters each time a kernel runs, making fast decisions. We apply Apollo to two hydrodynamics benchmarks and to a production multi-physics code, and show that it can achieve speedups from 1.2x to 4.8x.", "pdfUrls": [ "https://doi.org/10.1109/IPDPS.2017.38" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fef1056a69be6f597b4866bc3ee306bf01a4df0d", "sources": [ "DBLP" ], "title": "Apollo: Reusable Models for Fast, Dynamic Tuning of Input-Dependent Code", "venue": "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)", "year": 2017 }, "ff2a25ccf50d7f4b3b4deec0781ba3d8ce4b2dc5": { "authors": [ { "ids": [ "3018450" ], "name": "Ayan Palchaudhuri" }, { "ids": [ "3061534" ], "name": "Anindya Sundar Dhar" } ], "doi": "10.1109/HiPC.2017.00021", "doiUrl": "https://doi.org/10.1109/HiPC.2017.00021", "entities": [ "Adder (electronics)", "Error detection and correction", "Field-programmable gate array", "Programming paradigm", "Propagation delay", "Universal instantiation" ], "id": "ff2a25ccf50d7f4b3b4deec0781ba3d8ce4b2dc5", "inCitations": [], "journalName": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "journalPages": "104-113", "journalVolume": "", "outCitations": [ "5a9c9864f2c7e22b80168b4083451f630b23b4e8", "c53f04f54e0267bcb5f82c9f38ff956983793b3f", "51786d12bb803d96e80297ccb9699352c4d3e56a", "4ce255a4405ef5d4bda2c9eff01d31f8060912ee", "a34ad7594f99152c2cdea0b895adad2aea510bbd", "0eaf9d3f75ada1a36573fee52fd871dbe4fe8834", "1e7e95ebc7fd652af66790f83aa8d036552c5475", "5132fbe6ab39bec1282a920993ff481af4b827ad", "5c5a45cea278d0875ef4ef092d584371a702e367", "278902b43e2e97585e5374d9dda36462835fd5cc", "753a2adfb9cddd4ba96d13d4ae396647add4e295", "e677af9b717a2d428e1cda14aa7c41a3eccd1f0f", "4c94a4305885a75250d569e7affd3dec3d7c9b42", "63f9e5f6a853c7dd5bdf459e285d056388264c42", "909d9ee18699d8ccdbef8448df46a8d8afd15165", "235ec57bded938e7ca064e7bf5671e0072545d59", "59db30da930327212fff2e59ec5759337c988056", "61c4140a0ec66baa608c0d908ad4a6d0e4730f79", "24b3ed5ce3fec029c931d36891cc9209a351c359", "2aa9fcc11302468da1e75aa0b9cbe00236ab88e1", "24b763ffa1e82bac18622c809c39da5aba0a09ab" ], "paperAbstract": "Scan based error detection architectures for hybrid, carry-free radix-2 and radix-4 addition operations using redundant arithmetic are presented in this paper. Such addition operations have been chosen as representative examples as they are free from carry propagation delay and are ideal from the viewpoint of technology mapping of the logic elements onto the FPGA slices. The architectures have been conceived following the design paradigm of target FPGA specific primitive instantiation coupled with location constraints, without any degradation in the speed of circuit operation as compared to the original circuit implementation without the scan operation. Our architectures also comfortably outperform the existing state-of-the-art error detection architectures in terms of speed and consumes less area.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/HiPC.2017.00021" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ff2a25ccf50d7f4b3b4deec0781ba3d8ce4b2dc5", "sources": [ "DBLP" ], "title": "Redundant Arithmetic Based High Speed Carry Free Hybrid Adders with Built-In Scan Chain on FPGAs", "venue": "2017 IEEE 24th International Conference on High Performance Computing (HiPC)", "year": 2017 }, "ff488e2cd0e9471ab1205bffe7435222cf27e48e": { "authors": [ { "ids": [ "37167451" ], "name": "Li-Yung Ho" }, { "ids": [ "1726584" ], "name": "Jan-Jan Wu" }, { "ids": [ "38244340" ], "name": "Pangfeng Liu" }, { "ids": [ "1757417" ], "name": "Chia Chun Shih" }, { "ids": [ "2479713" ], "name": "Chi-Chang Huang" }, { "ids": [ "1848873" ], "name": "Chao-Wen Huang" } ], "doi": "", "doiUrl": "", "entities": [ "Algorithm", "Computer cluster", "Dynamic programming", "Electronic billing", "IBM Tivoli Storage Productivity Center", "Immutable object", "Nonlinear programming", "Open-source software", "Partition problem", "Rubber duck debugging", "Scalability", "Speedup", "Throughput" ], "id": "ff488e2cd0e9471ab1205bffe7435222cf27e48e", "inCitations": [ "f1973ab4391ba1feef30b661f2e477665baeee68" ], "journalName": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "journalPages": "21-30", "journalVolume": "", "outCitations": [ "e0508499b4cf5794d5aeaf717e7ad9541e9c2bba", "b9d5572f70e5b3c4287b17ba23c223e9515d3714", "57cbf8073681910b1516c74fb7714eedb839303c", "24281c886cd9339fe2fc5881faf5ed72b731a03e", "f28f59b8fe5f7057018a29a097a6fb909284101f", "7fd2f7bd015cc46f0d6ab3b48e6397cf10230817", "91f2bca4c91a980cc3e65764617292746f63e2e9", "1f7e50d220f41f4fac985a991c8d5187323aab4c", "0558c94a094158ecd64f0d5014d3d9668054fb97", "080ed793c12d97436ae29851b5e34c54c07e3816", "efcc08658d67a60655c5f3afd09bd8ba383b320c", "29ac94ebbadf6d6a74c61f9981d7eaba9c94f299", "baeb34720e124137bc86f3ecd895e601165eb572", "692c8932d060db8c51a54f84ff97067117ff54d5" ], "paperAbstract": "This paper proposes a scalable and efficient cacheupdate technique to improve the performance of in-memorycluster computing in Spark, a popular open-source system forbig data computing. Although the memory cache speeds up dataprocessing in Spark, its data immutability constraint requiresreloading the whole RDD when part of its data is updated. Suchconstraint makes the RDD update inefficient. To address thisproblem, we divide an RDD into partitions, and propose thepartial-update RDD (PRDD) method to enable users to replaceindividual partition(s) of an RDD. We devise two solutions to theRDD partition problem – a dynamic programming algorithm anda nonlinear programming method. Experiment results suggestthat, PRDD achieves 4.32x speedup when compared with theoriginal RDD in Spark. We apply PRDD to a billing system forChunghwa Telecomm, the largest telecommunication company inTaiwan. Our result shows that the PRDD based billing systemoutperforms the original billing system in CHT by a factor of24x in throughput. We also evaluate PRDD using the TPC-Hbenchmark, which also yields promising result.", "pdfUrls": [ "http://dl.acm.org/citation.cfm?id=3101116" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ff488e2cd0e9471ab1205bffe7435222cf27e48e", "sources": [ "DBLP" ], "title": "Efficient Cache Update for In-Memory Cluster Computing with Spark", "venue": "2017 17th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)", "year": 2017 }, "ff731dd16909619cfc7b3ccbe7b0936e844a0a02": { "authors": [ { "ids": [ "1703441" ], "name": "Mihir Bellare" }, { "ids": [ "40505481" ], "name": "Joseph Jaeger" }, { "ids": [ "38950355" ], "name": "Julia Len" } ], "doi": "10.1145/3133956.3134087", "doiUrl": "https://doi.org/10.1145/3133956.3134087", "entities": [ "Circular definition", "Collision resistance", "Cryptographic hash function", "Hash function", "Merkle\u2013Damg\u00e5rd construction", "One-way compression function" ], "id": "ff731dd16909619cfc7b3ccbe7b0936e844a0a02", "inCitations": [], "journalName": "", "journalPages": "891-906", "journalVolume": "", "outCitations": [ "400251fab502adf5a8ecdf6e5ba7d522bfe5cf1a", "2eb1b3a952e7154a51fc6fe957fe931ea295261c", "94df1fcf795c979f989e9f796b383ee4098990bf", "1e486d1df47fa6cad646de0ab921c566aab9e9c8", "8c304f9fecad52c6f2f183c8579002989d9b6b0f", "104a4089527fbc30bf92cd974b7f42870ea0a983", "7e7f830993041da1a2f32961e549628e091c1876", "24f22993cf1b9269ffad0cdfbe06095ffc39baea", "836ecc90c9aa5ac3c7ce73d6be63948c76a0185c", "9682341a91f0ea73f3dd9b3548c1e113d7a7f61d", "3577d41b0dcbaa7a604c6ed0ed6aa6828758687e", "3c71fdbb2e5fcf36508eac2c471c33580f8adfe3", "2f806940a478cf39915f407f7a2d9eecae7f3e7c", "2afb54393570efde9c402c2c41ac0f5495fb7622", "03e39f9cd8de9dd2b5af323f1a13d1fba5e2f396", "c330f20d553d27749626352e187df45c1b32686c", "e3b90431d985bd510f76d24b2c19eed6a563290d", "b253600873d92d1cbac878a7c5fde03c7b958733", "5c64bc6de2cc68f0f6c8e612faf14abbe3c2d19d", "d2712ce067a604c61a28778babebeced19b6bf8e", "16c15dd84e52de5bdfda58468c2581c52b136b07", "74addb84074068bb4ac4b21abbf178e4dc2b83ea", "4804e8e04327e4d16c155856a73caafca7f82656", "6512f02799cb38fea3659e5c68c1eed1434979ee", "2638a939cd8f4bbdd927dbe8a277569c0d202e93", "064107ad9e03c104a6260cd2a560fbf5c5eeed0a", "e193e44cff6f93d8d0b345fd441012f44dfcac0d", "1f33f92f5998afcba7e78c0d1076611e4ea0f0bf", "e8783ef34f73773fb679aef58d987839313e1b3c", "1a438164c1ca074c40baa6c3279cb5e0c573313e" ], "paperAbstract": "The MD transform that underlies the MD and SHA families iterates a compression function h to get a hash function H. The question we ask is, what property X of h guarantees collision resistance (CR) of H? The classical answer is that X itself be CR. We show that weaker conditions X, in particular forms of what we call constrained-CR, suffice. This reduces demands on compression functions, to the benefit of security, and also, forensically, explains why collision-finding attacks on compression functions have not, historically, lead to immediate breaks of the corresponding hash functions. We obtain our results via a definitional framework called RS security, and a parameterized treatment of MD, that also serve to unify prior work and variants of the transform.", "pdfUrls": [ "http://doi.acm.org/10.1145/3133956.3134087" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ff731dd16909619cfc7b3ccbe7b0936e844a0a02", "sources": [ "DBLP" ], "title": "Better Than Advertised: Improved Collision-Resistance Guarantees for MD-Based Hash Functions", "venue": "CCS", "year": 2017 }, "ffbaf45cb4123a76542e008875f14ed32c9b502e": { "authors": [ { "ids": [ "7266659" ], "name": "Eojin Lee" }, { "ids": [ "10013992" ], "name": "Jongwook Chung" }, { "ids": [ "2550204" ], "name": "Daejin Jung" }, { "ids": [ "4239456" ], "name": "Sukhan Lee" }, { "ids": [ "40305784" ], "name": "Sheng Li" }, { "ids": [ "2575874" ], "name": "Jung Ho Ahn" } ], "doi": "10.1109/IISWC.2017.8167773", "doiUrl": "https://doi.org/10.1109/IISWC.2017.8167773", "entities": [ "Computer data storage", "Dynamic random-access memory", "Memory architecture", "Multi-core processor", "Network switch", "Parallel computing", "Partition (database)", "Read-write memory", "Shared memory", "Thread (computing)" ], "id": "ffbaf45cb4123a76542e008875f14ed32c9b502e", "inCitations": [], "journalName": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "journalPages": "156-166", "journalVolume": "", "outCitations": [ "002e5d1003a5d8192f43419350cee2c94562478f", "4215fbbff39a0213888718549f215b124bd2e611", "54a15f2c25bec4274d5bb423dbc5002426a506a3", "1de7a8de961624bfd482744c6be24fb15ae14776", "8f126319c2c52347f3d32e5daf25bbccf759c761", "1401df37cc3fc78f26570d601fd123f17646b2d2", "026615150a7db9012ea247d3576957ca214258c6", "5293158ac8e5e2b89b96b86a064aeb0086b7dac1", "c797c15492e635ce850158dbe01f402c0f8e78cd", "5b93477e6d7d0c6701052791905300cfd887b5c2", "c7ae87b4e5952560362e24274a3e9f4e78a666f6", "9f6fb3b7ded93757952ba129fccc211dea02aab0", "4390f4a06a036b8f04cbb4fe7611fa5af9492797", "8b4682a90b39d0b95d92098be48f05687cb23086", "3ed84f2fbdc4dc6450919ec5b017e66440a5833c", "0e6b0665e0fc3c0c152885869f6c0d339aba06a1", "b34823a63f1cd1d870c7af7179c8d08b603ec791", "41b24c890ae0ef99ff031c9c8549375af6025fb6", "f3325ace129dec914966f9894d9f412e5e04bdc2", "747ad718761b7d848a12e4f3a82aa0f46117a815", "26e72340c47b7348e1b1de285f89dd96cc925b27", "434fa04db769935ae61bbcf4d9faa602b9a8c730", "08af8e9674bd5460e477b3372c0a3ebc97fe518e", "1dec8f5106d11047aaaf126121110cbf890f17c3", "094b881edab3f5833c4ff2f38d4ed207af141bcd", "464af3debb8434807ab04eb749d63594e78ee786", "0b885bb186445ee0c50277d990eca18c53fef09b", "9cce6a43d542afbea7a6cff1b4d05a5c5700ddc8", "1ab74d44982409beeca21efb2dbcb97a5c7de4b2", "9c7e0c435a94c17c16853937f60edd8c9b3a3a4a", "1dd353938063795c06ef21d8b0b3ef3b45a2fdc1", "115713b2175047e746c8e7cd22ee1b8255866d0f", "52cf88a3f3510c91d3952acabc447bdb2eb2e224", "0e44228b12df76587803f3f7a8e49fe3a0aee45f", "242cbdc5966fd14ba4a00815ac301fb278d8f544", "2608db8056e1598cf0b0bce8c2e305c3735a7bbe", "1641068a497e6c810e2bc5446c68c4728bbd5ae0", "675e82b6d0d2257c6aab0965238b0c97928b9f78", "0e28f43f2e7d093b52010378d50c780f94b7abed", "705a129de84bcf24b4039150c2fc2be1c24cc24a", "1bed30d161683d279780aee34619f94a860fa973" ], "paperAbstract": "Modern multi-core systems employ shared memory architecture, entailing problems related to the main memory such as row-buffer conflicts, time-varying hot-spots across memory channels, and superfluous switches between reads and writes originating from different cores. There have been proposals to solve these problems by partitioning main memory across banks and/or channels such that a DRAM bank is dedicated to a single core, being free from inter-thread row-buffer conflicts. However, those studies either focused on only multi-programmed workloads on which cores operate independently, not cooperatively, or specific hardware configurations with a limited number of degrees of freedom in the number of main memory banks, ranks, and channels. We analyze the influence of memory partitioning on systems with various degrees of banks, ranks, and channels using multi-threaded and multi-programmed workloads, making the following key observations. Bank partitioning is beneficial when memory-intensive applications in a multi-programmed workload have similar characteristics in bank-level parallelism, bandwidth, and capacity demands. Any diversity in these demands with a limited memory capacity greatly diminishes the bank partitioning benefits. As memory access/usage patterns across cores are more easily manageable on multi-threaded workloads, bank partitioning is more often effective with memory intensive multithreaded applications. Channel partitioning becomes effective when the reduction of the negative impacts of time-varying hotspots across memory channels outweighs the load imbalance due to partitioning. We also demonstrate the benefits of rank partitioning with regard to minimizing read-write switches on multi-threaded applications where cores can coordinate memory accesses.", "pdfUrls": [ "http://doi.ieeecomputersociety.org/10.1109/IISWC.2017.8167773" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/ffbaf45cb4123a76542e008875f14ed32c9b502e", "sources": [ "DBLP" ], "title": "Work as a team or individual: Characterizing the system-level impacts of main memory partitioning", "venue": "2017 IEEE International Symposium on Workload Characterization (IISWC)", "year": 2017 }, "fff96f5b6d6f2c8140e62a65aabfc7144abd3536": { "authors": [ { "ids": [ "2595397" ], "name": "Zilong Bai" }, { "ids": [ "2536734" ], "name": "Peter B. Walker" }, { "ids": [ "6606049" ], "name": "Anna E. Tschiffely" }, { "ids": [ "34410258" ], "name": "Fei Wang" }, { "ids": [ "38673135" ], "name": "Ian Davidson" } ], "doi": "10.1145/3097983.3098023", "doiUrl": "https://doi.org/10.1145/3097983.3098023", "entities": [ "Experiment", "Ground truth", "Matrix regularization", "Synthetic data" ], "id": "fff96f5b6d6f2c8140e62a65aabfc7144abd3536", "inCitations": [ "36b36f97a83b574dd242bb6363778c1b00fd9ac0" ], "journalName": "", "journalPages": "55-64", "journalVolume": "", "outCitations": [ "7876d48ba506b013fa8eea0b7f7e74b0e4994cc8", "96544d4857777682129280b9d934d6ff2f221f40", "300066a416806663cb0ae7eb454817320e3c2d58", "48f81ae96e3039efb1514a1fe0138be8692a0f76", "37ede09745b602b89d6d6a5248356e15aadb4cde", "59425f586011b2ccd6bc0fe8a6c089403e3798cb", "d0aaebca518ebbce7763395c700114c3bb02ed15", "18d08ce2a26cfd5219e47388334b61aa7065202d", "5c5f69361f322e83888dbfa2b92b7f5892ada9e0", "31afbc342f550ef2db3715c0b5dc5f634d5d2977", "e0336c0d72b3f6a4b7adbc0eb40da95cffda4544", "03d61a33796234b8bae5ac38de9b26c1c5ed9e2f", "8297ad4b56a76be0e82cf8a68a31668fd3a8ece0", "4e573552f9687d823120b767dc6aec2684c6cb96", "3e0aafbc3cdd4d29c1e6e5f99073c398dd597531", "e74521fddd01b675d296dd1f855a8eb90f91d065" ], "paperAbstract": "A common problem with spatiotemporal data is how to simplify the data to discover an underlying network that consists of cohesive spatial regions (nodes) and relationships between those regions (edges). This network discovery problem naturally exists in a multitude of domains including climate data (dipoles), astronomical data (gravitational lensing) and the focus of this paper, fMRI scans of human subjects. Whereas previous work requires strong supervision, we propose an unsupervised matrix tri-factorization formulation with complex constraints and spatial regularization. We show that this formulation works well in controlled experiments with synthetic networks and is able to recover the underlying ground-truth network. We then show that for real fMRI data our approach can reproduce well known results in neurology regarding the default mode network in resting-state healthy and Alzheimer affected individuals.", "pdfUrls": [ "http://doi.acm.org/10.1145/3097983.3098023" ], "pmid": "", "s2PdfUrl": "", "s2Url": "https://semanticscholar.org/paper/fff96f5b6d6f2c8140e62a65aabfc7144abd3536", "sources": [ "DBLP" ], "title": "Unsupervised Network Discovery for Brain Imaging Data", "venue": "KDD", "year": 2017 } }